diff --git a/.github/workflows/open-pr-copy-from-oss.yml b/.github/workflows/open-pr-copy-from-oss.yml
new file mode 100644
index 00000000000..05af6ea449a
--- /dev/null
+++ b/.github/workflows/open-pr-copy-from-oss.yml
@@ -0,0 +1,28 @@
+name: Open A PR to Copy Code From OSS
+
+on:
+ workflow_dispatch:
+ # schedule:
+ # - cron: '0 10 * * *'
+
+permissions:
+ contents: write
+
+jobs:
+ copy:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ ref: 'main'
+
+ - name: Install GitHub CLI (if not present)
+ run: |
+ bash scripts/code_sync/install_github_cli.sh
+
+ - name: Copy from OSS code
+ env:
+ GH_TOKEN: ${{ secrets.PAT_FOR_CODE_SYNC_FROM_LIANMIN }}
+ run: |
+ python3 scripts/code_sync/copy_from_oss.py
diff --git a/.github/workflows/open-pr-copy-to-oss.yml b/.github/workflows/open-pr-copy-to-oss.yml
new file mode 100644
index 00000000000..b3bb6aae4fa
--- /dev/null
+++ b/.github/workflows/open-pr-copy-to-oss.yml
@@ -0,0 +1,31 @@
+name: Open A PR to Copy Diff To OSS
+
+on:
+ workflow_dispatch:
+ inputs:
+ commit_sha:
+ description: 'The commit SHA to copy. Defaults to LAST to copy the latest commit.'
+ required: false
+ default: 'LAST'
+
+permissions:
+ contents: write
+
+jobs:
+ copy:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Install GitHub CLI (if not present)
+ run: |
+ bash scripts/code_sync/install_github_cli.sh
+
+ - name: Copy to OSS code
+ env:
+ GH_TOKEN: ${{ secrets.PAT_FOR_CODE_SYNC_FROM_LIANMIN }}
+ run: |
+ python3 scripts/code_sync/copy_to_oss.py --commit ${{ github.event.inputs.commit_sha }}
diff --git a/.github/workflows/pr-benchmark-rust.yml b/.github/workflows/pr-benchmark-rust.yml
index e34454c1923..e039cba23a2 100644
--- a/.github/workflows/pr-benchmark-rust.yml
+++ b/.github/workflows/pr-benchmark-rust.yml
@@ -9,6 +9,7 @@ on:
branches: [ main ]
paths:
- "sgl-router/**"
+ types: [opened, synchronize, reopened, labeled]
workflow_dispatch:
concurrency:
@@ -19,9 +20,63 @@ permissions:
pull-requests: write
issues: write
jobs:
- benchmark-router:
+ # Quick check job that always runs on PRs
+ benchmark-compile-check:
+ name: Benchmark Compilation Check
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Install dependencies
+ run: |
+ bash scripts/ci/ci_install_rust.sh
+
+ - name: Setup sccache
+ uses: mozilla-actions/sccache-action@v0.0.3
+ continue-on-error: true
+
+ - name: Rust cache
+ uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: sgl-router
+ # Share cache across all benchmark jobs
+ shared-key: "rust-cache"
+ # Save cache even on failure
+ save-if: true
+
+ - name: Check benchmarks compile
+ run: |
+ source "$HOME/.cargo/env"
+ cd sgl-router/
+ # Try to use sccache, but disable if it fails
+ if command -v sccache &> /dev/null; then
+ echo "Testing sccache availability..."
+ # Try to start sccache and check if it works
+ export RUSTC_WRAPPER=sccache
+ export SCCACHE_GHA_ENABLED="true"
+ if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+ echo "sccache is working, using it for compilation"
+ else
+ echo "sccache failed to start, falling back to regular cargo"
+ unset RUSTC_WRAPPER
+ unset SCCACHE_GHA_ENABLED
+ fi
+ else
+ echo "sccache not available, using regular cargo"
+ fi
+ cargo check --benches
+
+ # Full benchmark jobs that only run with label or on main branch
+ benchmark-request-processing:
+ name: Request Processing Benchmark
+ if: |
+ github.repository == 'sgl-project/sglang' &&
+ (github.event_name == 'push' ||
+ github.event_name == 'workflow_dispatch' ||
+ contains(github.event.pull_request.labels.*.name, 'benchmark'))
+ runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -33,77 +88,219 @@ jobs:
run: |
bash scripts/ci/ci_install_rust.sh
- - name: Cache Rust dependencies
- uses: actions/cache@v4
+ - name: Setup sccache
+ uses: mozilla-actions/sccache-action@v0.0.3
+ continue-on-error: true
+
+ - name: Rust cache
+ uses: Swatinem/rust-cache@v2
with:
- path: |
- ~/.cargo/bin/
- ~/.cargo/registry/index/
- ~/.cargo/registry/cache/
- ~/.cargo/git/db/
- sgl-router/target/
- key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-cargo-
-
- - name: Build router in release mode
+ workspaces: sgl-router
+ # Share cache across all benchmark jobs
+ shared-key: "rust-cache"
+ # Save cache even on failure
+ save-if: true
+
+ - name: Run request processing benchmark
+ timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-router/
- cargo build --release
+ # Try to use sccache, but disable if it fails
+ if command -v sccache &> /dev/null; then
+ echo "Testing sccache availability..."
+ # Try to start sccache and check if it works
+ export RUSTC_WRAPPER=sccache
+ export SCCACHE_GHA_ENABLED="true"
+ if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+ echo "sccache is working, using it for compilation"
+ else
+ echo "sccache failed to start, falling back to regular cargo"
+ unset RUSTC_WRAPPER
+ unset SCCACHE_GHA_ENABLED
+ fi
+ else
+ echo "sccache not available, using regular cargo"
+ fi
+ # Run only the summary benchmark for quick validation in PRs
+ cargo bench --bench request_processing -- benchmark_summary --exact
+
+ - name: Upload benchmark results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: request-processing-results-${{ github.sha }}
+ path: |
+ sgl-router/target/criterion/benchmark_summary/
+ retention-days: 30
- - name: Run quick benchmarks
- timeout-minutes: 15
+ benchmark-tokenizer:
+ name: Tokenizer Benchmark
+ if: |
+ github.repository == 'sgl-project/sglang' &&
+ (github.event_name == 'push' ||
+ github.event_name == 'workflow_dispatch' ||
+ contains(github.event.pull_request.labels.*.name, 'benchmark'))
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 100
+
+ - name: Install dependencies
+ run: |
+ bash scripts/ci/ci_install_rust.sh
+
+ - name: Setup sccache
+ uses: mozilla-actions/sccache-action@v0.0.3
+ continue-on-error: true
+
+ - name: Rust cache
+ uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: sgl-router
+ # Share cache across all benchmark jobs
+ shared-key: "rust-cache"
+ # Save cache even on failure
+ save-if: true
+
+ - name: Run tokenizer benchmark
+ timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-router/
- # Run quick benchmarks for PR validation using Python script
- python3 scripts/run_benchmarks.py --quick --validate-thresholds --save-results
+ # Try to use sccache, but disable if it fails
+ if command -v sccache &> /dev/null; then
+ echo "Testing sccache availability..."
+ # Try to start sccache and check if it works
+ export RUSTC_WRAPPER=sccache
+ export SCCACHE_GHA_ENABLED="true"
+ if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+ echo "sccache is working, using it for compilation"
+ else
+ echo "sccache failed to start, falling back to regular cargo"
+ unset RUSTC_WRAPPER
+ unset SCCACHE_GHA_ENABLED
+ fi
+ else
+ echo "sccache not available, using regular cargo"
+ fi
+ cargo bench --bench tokenizer_benchmark
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
- name: benchmark-results-${{ github.sha }}
+ name: tokenizer-results-${{ github.sha }}
path: |
- sgl-router/target/criterion/
+ sgl-router/target/criterion/tokenizer*/
retention-days: 30
- benchmark-integration-test:
- if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+ benchmark-tool-parser:
+ name: Tool Parser Benchmark
+ if: |
+ github.repository == 'sgl-project/sglang' &&
+ (github.event_name == 'push' ||
+ github.event_name == 'workflow_dispatch' ||
+ contains(github.event.pull_request.labels.*.name, 'benchmark'))
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
+ with:
+ fetch-depth: 100
- name: Install dependencies
run: |
bash scripts/ci/ci_install_rust.sh
- - name: Cache Rust dependencies
- uses: actions/cache@v4
+ - name: Setup sccache
+ uses: mozilla-actions/sccache-action@v0.0.3
+ continue-on-error: true
+
+ - name: Rust cache
+ uses: Swatinem/rust-cache@v2
with:
- path: |
- ~/.cargo/bin/
- ~/.cargo/registry/index/
- ~/.cargo/registry/cache/
- ~/.cargo/git/db/
- sgl-router/target/
- key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
- restore-keys: |
- ${{ runner.os }}-cargo-
-
- - name: Run benchmark integration tests
- timeout-minutes: 10
+ workspaces: sgl-router
+ # Share cache across all benchmark jobs
+ shared-key: "rust-cache"
+ # Save cache even on failure
+ save-if: true
+
+ - name: Run tool parser benchmark
+ timeout-minutes: 30
run: |
source "$HOME/.cargo/env"
cd sgl-router/
- # Run integration tests to ensure benchmark code compiles and works
- cargo test --test benchmark_integration
+ # Try to use sccache, but disable if it fails
+ if command -v sccache &> /dev/null; then
+ echo "Testing sccache availability..."
+ # Try to start sccache and check if it works
+ export RUSTC_WRAPPER=sccache
+ export SCCACHE_GHA_ENABLED="true"
+ if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+ echo "sccache is working, using it for compilation"
+ else
+ echo "sccache failed to start, falling back to regular cargo"
+ unset RUSTC_WRAPPER
+ unset SCCACHE_GHA_ENABLED
+ fi
+ else
+ echo "sccache not available, using regular cargo"
+ fi
+ cargo bench --bench tool_parser_benchmark
+
+ - name: Upload benchmark results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: tool-parser-results-${{ github.sha }}
+ path: |
+ sgl-router/target/criterion/tool_parser*/
+ retention-days: 30
+
+ benchmark-summary:
+ name: Benchmark Summary
+ needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser]
+ if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
+ runs-on: ubuntu-latest
+ steps:
+ - name: Download all benchmark results
+ uses: actions/download-artifact@v4
+ with:
+ pattern: '*-results-${{ github.sha }}'
+ path: benchmark-results
- - name: Verify benchmark compilation
+ - name: Generate summary
run: |
- source "$HOME/.cargo/env"
- cd sgl-router/
- # Ensure all benchmarks compile without running them
- cargo check --benches
+ echo "## Benchmark Results Summary" > summary.md
+ echo "" >> summary.md
+ echo "### Request Processing" >> summary.md
+ if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then
+ echo "✅ Completed" >> summary.md
+ else
+ echo "❌ Failed or skipped" >> summary.md
+ fi
+ echo "" >> summary.md
+ echo "### Tokenizer" >> summary.md
+ if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then
+ echo "✅ Completed" >> summary.md
+ else
+ echo "❌ Failed or skipped" >> summary.md
+ fi
+ echo "" >> summary.md
+ echo "### Tool Parser" >> summary.md
+ if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then
+ echo "✅ Completed" >> summary.md
+ else
+ echo "❌ Failed or skipped" >> summary.md
+ fi
+ cat summary.md
+
+ - name: Upload summary
+ uses: actions/upload-artifact@v4
+ with:
+ name: benchmark-summary-${{ github.sha }}
+ path: summary.md
+ retention-days: 30
diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml
index 02f79f7cb93..2c7e2c6527f 100644
--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -5,7 +5,7 @@ on:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-amd.yml"
@@ -13,7 +13,7 @@ on:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-amd.yml"
@@ -28,6 +28,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
@@ -54,8 +55,9 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
- runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
+ runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
@@ -70,7 +72,7 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate accuracy (TP=2)
- timeout-minutes: 30
+ timeout-minutes: 60
run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
@@ -78,6 +80,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
@@ -102,6 +105,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
@@ -142,6 +146,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
@@ -176,6 +181,7 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
runs-on: ${{matrix.runner}}
@@ -223,7 +229,7 @@ jobs:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
- part: [0, 1, 2, 3, 4, 5, 6]
+ part: [0, 1, 2, 3, 4, 5, 6, 7]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
@@ -240,14 +246,15 @@ jobs:
- name: Run test
timeout-minutes: 50
run: |
- bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 7
+ bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
- unit-test-backend-2-gpu-amd:
+ unit-test-backend-1-gpu-amd-mi35x:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
- runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
+ runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
@@ -262,16 +269,17 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
- timeout-minutes: 40
+ timeout-minutes: 50
run: |
- bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
+ bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
- unit-test-backend-8-gpu-amd:
+ unit-test-backend-2-gpu-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
- runner: [linux-mi300-gpu-8]
+ runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
@@ -286,14 +294,15 @@ jobs:
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
- timeout-minutes: 60
+ timeout-minutes: 40
run: |
- bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
+ bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
- unit-test-backend-8-gpu-CAR-amd:
+ unit-test-backend-8-gpu-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
strategy:
+ fail-fast: false
matrix:
runner: [linux-mi300-gpu-8]
runs-on: ${{matrix.runner}}
@@ -309,10 +318,10 @@ jobs:
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- - name: Run CustomAllReduce test
- timeout-minutes: 20
+ - name: Run test
+ timeout-minutes: 60
run: |
- bash scripts/ci/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
+ bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
unit-test-sgl-kernel-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
@@ -336,20 +345,22 @@ jobs:
bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
- timeout-minutes: 10
+ timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
+ docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
+ docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
pr-test-amd-finish:
if: always()
needs: [
accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
- unit-test-backend-1-gpu-amd, unit-test-backend-2-gpu-amd, unit-test-backend-8-gpu-amd,
- unit-test-sgl-kernel-amd
+ unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
+ unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
]
runs-on: ubuntu-latest
steps:
diff --git a/.github/workflows/pr-test-h20.yml b/.github/workflows/pr-test-h20.yml
index e283ea42f50..58e3352895a 100644
--- a/.github/workflows/pr-test-h20.yml
+++ b/.github/workflows/pr-test-h20.yml
@@ -37,6 +37,7 @@ jobs:
- "python/sglang/srt/models/deepseek*"
- "python/sglang/srt/layers/moe/**"
- ".github/workflows/pr-test-h20.yml"
+ - "python/pyproject.toml"
per-commit-8-gpu-h20:
needs: [check-changes]
@@ -59,7 +60,7 @@ jobs:
cd test/srt
python3 run_suite.py --suite per-commit-8-gpu-h20
- pr-test-finish:
+ pr-test-h20-finish:
needs: [
check-changes,
per-commit-8-gpu-h20,
diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml
index 45c115dbe30..c0fe381e38d 100644
--- a/.github/workflows/pr-test-npu.yml
+++ b/.github/workflows/pr-test-npu.yml
@@ -5,14 +5,14 @@ on:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test-npu.yml"
pull_request:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test-npu.yml"
workflow_dispatch:
@@ -47,7 +47,7 @@ jobs:
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
- timeout-minutes: 30
+ timeout-minutes: 60
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
@@ -82,7 +82,7 @@ jobs:
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
- timeout-minutes: 30
+ timeout-minutes: 90
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
@@ -117,7 +117,7 @@ jobs:
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
- name: Run test
- timeout-minutes: 30
+ timeout-minutes: 120
env:
SGLANG_USE_MODELSCOPE: true
SGLANG_IS_IN_CI: true
@@ -127,12 +127,48 @@ jobs:
cd test/srt
python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600
+ per-commit-16-ascend-a3:
+ if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+ github.event.pull_request.draft == false
+ runs-on: linux-aarch64-a3-16
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Install dependencies
+ run: |
+ # speed up by using infra cache services
+ CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
+ sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
+ pip config set global.index-url http://${CACHING_URL}/pypi/simple
+ pip config set global.trusted-host ${CACHING_URL}
+
+ bash scripts/ci/npu_ci_install_dependency.sh
+ # copy required file from our daily cache
+ cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
+ # copy download through proxy
+ curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
+
+ - name: Run test
+ timeout-minutes: 90
+ env:
+ SGLANG_USE_MODELSCOPE: true
+ SGLANG_IS_IN_CI: true
+ HF_ENDPOINT: https://hf-mirror.com
+ TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
+ run: |
+ cd test/srt
+ python3 run_suite.py --suite per-commit-16-ascend-a3 --timeout-per-file 5400
+
pr-test-npu-finish:
if: always()
needs:
- per-commit-1-ascend-npu
- per-commit-2-ascend-npu
- per-commit-4-ascend-npu
+ - per-commit-16-ascend-a3
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
diff --git a/.github/workflows/pr-test-pd-router.yml b/.github/workflows/pr-test-pd-router.yml
index bb5b1e76cef..2a1bde1b4e8 100644
--- a/.github/workflows/pr-test-pd-router.yml
+++ b/.github/workflows/pr-test-pd-router.yml
@@ -77,6 +77,29 @@ jobs:
exit 1
fi
+ echo "=== GPU Process Check ==="
+ # Fail fast if any GPU compute processes are active
+ if command -v nvidia-smi >/dev/null 2>&1; then
+ # Try to query compute apps first (preferred and concise)
+ gpu_procs=$(nvidia-smi --query-compute-apps=pid,process_name,gpu_uuid --format=csv,noheader 2>/dev/null | sed '/^$/d' || true)
+
+ # Fallback to detailed PIDS report if the query returns nothing but there might still be processes
+ if [ -z "$gpu_procs" ]; then
+ gpu_procs=$(nvidia-smi -q -d PIDS 2>/dev/null | awk '/Processes/{flag=1;next}/^$/{flag=0}flag' | sed '/^\s*Processes:/d' | sed '/^\s*$/d' || true)
+ fi
+
+ if [ -n "$gpu_procs" ]; then
+ echo "Error: Found active GPU processes using the device(s):"
+ echo "$gpu_procs"
+ exit 1
+ else
+ echo "No active GPU compute processes detected."
+ fi
+ else
+ echo "Error: nvidia-smi not found; skipping GPU process check."
+ exit 1
+ fi
+
echo "=== RDMA Validation ==="
if ! command -v ibv_devices >/dev/null 2>&1; then
echo "Error: InfiniBand tools not found"
@@ -118,8 +141,8 @@ jobs:
python3 -m pip --no-cache-dir install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
python3 -m pip --no-cache-dir install -e "python[all]" --break-system-packages
python3 -m pip --no-cache-dir install mooncake-transfer-engine==0.3.5
- python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.1
- python3 -m pip --no-cache-dir install sgl-kernel==0.3.5
+ python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.2
+ python3 -m pip --no-cache-dir install sgl-kernel==0.3.9.post2
- name: Build and install sgl-router
run: |
@@ -165,15 +188,25 @@ jobs:
POLICIES=("random" "round_robin" "cache_aware" "power_of_two")
BASE_URL="http://127.0.0.9:8000"
+ # Free commonly used ports for router and metrics
+ echo "Freeing ports 29000 (metrics) and 8000 (API), if in use..."
+ fuser -k -n tcp 29000 2>/dev/null || true
+ fuser -k -n tcp 8000 2>/dev/null || true
+ sleep 1
+
for policy in "${POLICIES[@]}"; do
echo ""
echo "=================================================="
echo "Testing policy: $policy"
echo "=================================================="
+ # Free ports before starting router
+ fuser -k -n tcp 29000 2>/dev/null || true
+ fuser -k -n tcp 8000 2>/dev/null || true
+
# Start router with the current policy
echo "Starting router with policy: $policy..."
- python3 -m sglang_router.launch_router \
+ RUST_BACKTRACE=1 python3 -m sglang_router.launch_router \
--pd-disaggregation \
--policy "$policy" \
--prefill http://127.0.0.1:30001 9001 \
@@ -305,10 +338,10 @@ jobs:
# Set mean thresholds (allowing for reasonable variance)
# These can be adjusted based on your performance requirements
- ttft_threshold=2.0 # Max 2.0 seconds for mean TTFT
- e2e_latency_threshold=24.0 # Max 8.0 seconds for mean E2E latency
- input_throughput_threshold=10000 # Min 9000 tokens/s for mean input throughput
- output_throughput_threshold=90 # Min 100 tokens/s for mean output throughput
+ ttft_threshold=4.7 # Max 4.7 seconds for mean TTFT
+ e2e_latency_threshold=35.0 # Max 35.0 seconds for mean E2E latency
+ input_throughput_threshold=12000 # Min 12000 tokens/s for mean input throughput
+ output_throughput_threshold=68 # Min 68 tokens/s for mean output throughput
# Validate mean thresholds
diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml
index d704488d9b1..60c7ebdf2bd 100644
--- a/.github/workflows/pr-test-rust.yml
+++ b/.github/workflows/pr-test-rust.yml
@@ -27,13 +27,24 @@ jobs:
run: |
bash scripts/ci/ci_install_rust.sh
+ - name: Rust cache
+ uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: sgl-router
+
+ - name: Run lint
+ run: |
+ source "$HOME/.cargo/env"
+ cd sgl-router/
+ cargo clippy --all-targets --all-features -- -D warnings
+
- name: Run fmt
run: |
source "$HOME/.cargo/env"
cd sgl-router/
cargo fmt -- --check
- - name: Run test
+ - name: Run Rust tests
timeout-minutes: 20
run: |
source "$HOME/.cargo/env"
@@ -47,17 +58,17 @@ jobs:
cargo check --benches
- name: Quick benchmark sanity check
- timeout-minutes: 10
+ timeout-minutes: 15
run: |
source "$HOME/.cargo/env"
cd sgl-router/
# Run quick benchmarks to ensure they work using Python script
python3 scripts/run_benchmarks.py --quick
- e2e-python:
+ pytest-rust:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: BM.A10.4
- timeout-minutes: 30
+ timeout-minutes: 25
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -77,15 +88,103 @@ jobs:
pip install setuptools-rust wheel build
python3 -m build
pip install --force-reinstall dist/*.whl
- - name: Run e2e test
+
+
+ - name: Run Python unit tests
+ run: |
+ cd sgl-router
+ source "$HOME/.cargo/env"
+ pip install pytest pytest-cov pytest-xdist
+ pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80
+
+ - name: Run Python integration tests
+ run: |
+ cd sgl-router
+ source "$HOME/.cargo/env"
+ # Integration tests use FastAPI/uvicorn for mock workers
+ pip install fastapi uvicorn orjson
+ pytest -q -m integration
+
+ - name: Run Python E2E tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
- cd sgl-router/py_test
- python3 run_suite.py
+ cd sgl-router
+ python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
+ python3 -m pip --no-cache-dir install --upgrade --break-system-packages genai-bench==0.0.2
+ pytest -m e2e -s -vv -o log_cli=true --log-cli-level=INFO
+
+ - name: Upload benchmark results
+ if: success()
+ uses: actions/upload-artifact@v4
+ with:
+ name: genai-bench-results-all-policies
+ path: sgl-router/benchmark_**/
finish:
- needs: [unit-test-rust, e2e-python]
+ needs: [unit-test-rust, pytest-rust]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
+
+ summarize-benchmarks:
+ needs: pytest-rust
+ runs-on: ubuntu-latest
+ if: success()
+
+ steps:
+ - name: Install jq
+ run: sudo apt-get update && sudo apt-get install -y jq bc
+
+ - name: Download benchmark results
+ uses: actions/download-artifact@v4
+ with:
+ name: genai-bench-results-all-policies
+
+ - name: List downloaded contents
+ run: |
+ echo "Contents after download:"
+ ls -la
+ find . -name "benchmark_*" -type d
+ echo "JSON files found:"
+ find . -name "*.json" | head -10
+
+ - name: Create benchmark summary
+ run: |
+ echo "=== DEBUG: Creating benchmark summary ==="
+ echo "Available benchmark directories:"
+ find . -name "benchmark_*" -type d || true
+ echo "=========================================="
+
+ echo "## Router E2E Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "Results captured from E2E tests for two scenarios: regular router (2 workers, dp=2) and PD router (2 prefill + 2 decode)." >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "| Scenario | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY
+ echo "|----------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY
+
+ scenarios=$'Regular (dp=2, round_robin)|benchmark_round_robin_regular\nPD (2 prefill + 2 decode, round_robin)|benchmark_round_robin_pd'
+
+ echo "$scenarios" | sed 's/^\s*//' | while IFS='|' read -r label pattern; do
+ [ -z "$label" ] && continue
+ # Find the result folder (handle different extraction layouts)
+ result_folder=$(find . -maxdepth 3 \( -name "$pattern" -o -path "*${pattern}*" \) -type d | head -1)
+
+ if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
+ json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
+
+ if [ -n "$json_file" ] && [ -f "$json_file" ]; then
+ ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
+ e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
+ input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
+ output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")
+
+ ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean")
+ e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean")
+ input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean")
+ output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean")
+
+ echo "| ${label} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY
+ fi
+ fi
+ done
diff --git a/.github/workflows/pr-test-sgl-kernel.yml b/.github/workflows/pr-test-sgl-kernel.yml
index 624d9ed32b9..832188cddb2 100644
--- a/.github/workflows/pr-test-sgl-kernel.yml
+++ b/.github/workflows/pr-test-sgl-kernel.yml
@@ -38,6 +38,8 @@ jobs:
include:
- python-version: "3.10"
cuda-version: "12.4"
+ - python-version: "3.10"
+ cuda-version: "12.8"
- python-version: "3.10"
cuda-version: "12.9"
name: Build Wheel (CUDA ${{ matrix.cuda-version }})
@@ -56,7 +58,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
- if: github.event_name != 'push' || (matrix.cuda-version != '11.8' && matrix.cuda-version != '12.9')
+ if: github.event_name != 'push' || (matrix.cuda-version != '12.4' && matrix.cuda-version != '12.8')
run: |
cd sgl-kernel
chmod +x ./build.sh
@@ -80,7 +82,7 @@ jobs:
with:
path: sgl-kernel/dist/
merge-multiple: true
- pattern: wheel-python3.10-cuda12.4
+ pattern: wheel-python3.10-cuda12.9
- name: Install
run: |
@@ -112,7 +114,7 @@ jobs:
with:
path: sgl-kernel/dist/
merge-multiple: true
- pattern: wheel-python3.10-cuda12.4
+ pattern: wheel-python3.10-cuda12.9
- name: Install
run: |
diff --git a/.github/workflows/pr-test-xeon.yml b/.github/workflows/pr-test-xeon.yml
index fc1a77689e6..fcc70f28608 100644
--- a/.github/workflows/pr-test-xeon.yml
+++ b/.github/workflows/pr-test-xeon.yml
@@ -5,7 +5,7 @@ on:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-xeon.yml"
@@ -13,7 +13,7 @@ on:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-xeon.yml"
@@ -28,6 +28,8 @@ jobs:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
runs-on: xeon-gnr
+ env:
+ HF_HOME: /home/sdp/.cache/huggingface
strategy:
matrix:
build_type: ['all']
@@ -46,6 +48,7 @@ jobs:
run: |
docker run -dt \
-v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
+ -v ${HF_HOME}:/root/.cache/huggingface \
--name ci_sglang_xeon \
sglang_xeon
@@ -67,13 +70,13 @@ jobs:
- name: Run unit tests
if: steps.check_amx.outcome == 'success'
- timeout-minutes: 20
+ timeout-minutes: 36
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "cd ./test/srt && python3 run_suite.py --suite per-commit-cpu"
- name: Change permission
- timeout-minutes: 20
+ timeout-minutes: 2
run: |
docker exec -u root ci_sglang_xeon bash -c "
rm -rf /tmp/ci-home &&
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index 7f76b02bfd7..bd1053902c4 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -36,7 +36,7 @@ jobs:
filters: |
src:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test.yml"
@@ -90,6 +90,10 @@ jobs:
github.event.pull_request.draft == false &&
needs.check-changes.outputs.src == 'true'
runs-on: 2-gpu-runner
+ strategy:
+ fail-fast: false
+ matrix:
+ part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -102,7 +106,7 @@ jobs:
timeout-minutes: 30
run: |
cd test/srt
- python3 run_suite.py --suite per-commit-2-gpu
+ python3 run_suite.py --suite per-commit-2-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
unit-test-backend-4-gpu:
needs: [check-changes, unit-test-backend-2-gpu]
@@ -417,7 +421,7 @@ jobs:
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
unit-test-backend-8-gpu-b200,
]
- if: needs.check-changes.outputs.src == 'true'
+ if: always()
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
diff --git a/.github/workflows/release-docker-amd-nightly.yml b/.github/workflows/release-docker-amd-nightly.yml
index aa97c2edda3..c61e200dff1 100644
--- a/.github/workflows/release-docker-amd-nightly.yml
+++ b/.github/workflows/release-docker-amd-nightly.yml
@@ -19,7 +19,7 @@ jobs:
environment: 'prod'
strategy:
matrix:
- gpu_arch: ['gfx942', 'gfx950']
+ gpu_arch: ['gfx942', 'gfx942-rocm700', 'gfx950']
build_type: ['all', 'srt']
steps:
- name: Checkout repository
@@ -41,6 +41,8 @@ jobs:
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm630-mi30x"
+ elif [ "${{ matrix.gpu_arch }}" = "gfx942-rocm700" ]; then
+ rocm_tag="rocm700-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
rocm_tag="rocm700-mi35x"
else
diff --git a/.github/workflows/release-docker-amd.yml b/.github/workflows/release-docker-amd.yml
index 07582243fb8..98c11e2fae7 100644
--- a/.github/workflows/release-docker-amd.yml
+++ b/.github/workflows/release-docker-amd.yml
@@ -14,7 +14,7 @@ jobs:
environment: 'prod'
strategy:
matrix:
- gpu_arch: ['gfx942', 'gfx950']
+ gpu_arch: ['gfx942', 'gfx942-rocm700', 'gfx950']
build_type: ['all', 'srt']
steps:
- name: Checkout repository
@@ -32,6 +32,8 @@ jobs:
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
rocm_tag="rocm630-mi30x"
+ elif [ "${{ matrix.gpu_arch }}" = "gfx942-rocm700" ]; then
+ rocm_tag="rocm700-mi30x"
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
rocm_tag="rocm700-mi35x"
else
diff --git a/.github/workflows/release-docker-npu-nightly.yml b/.github/workflows/release-docker-npu-nightly.yml
index 7850c073571..527a0cdc2d3 100644
--- a/.github/workflows/release-docker-npu-nightly.yml
+++ b/.github/workflows/release-docker-npu-nightly.yml
@@ -5,6 +5,7 @@ on:
- main
paths:
- ".github/workflows/release-docker-npu-nightly.yml"
+ - "docker/Dockerfile.npu"
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
@@ -72,5 +73,6 @@ jobs:
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
+ SGLANG_KERNEL_NPU_TAG=20250901
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
diff --git a/.github/workflows/release-docker-npu.yml b/.github/workflows/release-docker-npu.yml
index ad74b96dff4..f9d52eb4b4d 100644
--- a/.github/workflows/release-docker-npu.yml
+++ b/.github/workflows/release-docker-npu.yml
@@ -9,6 +9,7 @@ on:
- main
paths:
- ".github/workflows/release-docker-npu.yml"
+ - "docker/Dockerfile.npu"
jobs:
build:
@@ -54,8 +55,6 @@ jobs:
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
echo "TAG=lmsysorg/sglang:v$version-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT
- kernel_tag=$(curl -s https://api.github.com/repos/sgl-project/sgl-kernel-npu/tags | jq -r '.[0].name')
- echo "KERNEL_NPU_TAG=${kernel_tag}" >> $GITHUB_OUTPUT
- name: Build and push Docker image
id: build-and-push
@@ -70,6 +69,6 @@ jobs:
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
- SGLANG_KERNEL_NPU_TAG=${{ steps.get_version.outputs.KERNEL_NPU_TAG }}
+ SGLANG_KERNEL_NPU_TAG=20250901
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
diff --git a/.github/workflows/release-docker-xeon.yml b/.github/workflows/release-docker-xeon.yml
index 118a1392b6e..bd2a3910f8c 100644
--- a/.github/workflows/release-docker-xeon.yml
+++ b/.github/workflows/release-docker-xeon.yml
@@ -1,4 +1,4 @@
-name: Release Docker Images
+name: Release Docker Xeon Images
on:
push:
branches:
diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
index 66d2aa3d824..60a8df621f8 100644
--- a/.github/workflows/release-docker.yml
+++ b/.github/workflows/release-docker.yml
@@ -14,13 +14,15 @@ jobs:
environment: 'prod'
strategy:
matrix:
- cuda_version: ['12.6.1', '12.8.1']
+ cuda_version: ['12.6.1', '12.8.1', '12.9.1']
build_type: ['all', 'blackwell']
exclude:
- cuda_version: '12.6.1'
build_type: 'blackwell'
- cuda_version: '12.8.1'
build_type: 'all'
+ - cuda_version: '12.9.1'
+ build_type: 'all'
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
@@ -61,6 +63,8 @@ jobs:
cuda_tag="cu126"
elif [ "${{ matrix.cuda_version }}" = "12.8.1" ]; then
cuda_tag="cu128"
+ elif [ "${{ matrix.cuda_version }}" = "12.9.1" ]; then
+ cuda_tag="cu129"
else
echo "Unsupported CUDA version"
exit 1
@@ -86,3 +90,8 @@ jobs:
docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix}
docker push lmsysorg/sglang:latest${tag_suffix}
fi
+
+ if [ "${{ matrix.cuda_version }}" = "12.9.1" ]; then
+ docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:v${version}
+ docker push lmsysorg/sglang:v${version}
+ fi
diff --git a/.github/workflows/release-pypi-router.yml b/.github/workflows/release-pypi-router.yml
index 948b3f58402..a2128be8357 100644
--- a/.github/workflows/release-pypi-router.yml
+++ b/.github/workflows/release-pypi-router.yml
@@ -47,7 +47,14 @@ jobs:
env:
CIBW_BUILD: "cp38-manylinux_x86_64 cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64"
CIBW_BEFORE_ALL: |
- yum update && yum install -y openssl-devel && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ yum update -y && yum install -y openssl-devel wget unzip && \
+ # Install latest protoc (v32.0) that supports proto3
+ cd /tmp && \
+ wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip && \
+ unzip protoc-32.0-linux-x86_64.zip -d /usr/local && \
+ rm protoc-32.0-linux-x86_64.zip && \
+ # Install Rust
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
CIBW_ENVIRONMENT: "PATH=$HOME/.cargo/bin:$PATH"
- name: List built packages
diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml
index c9c44b520c6..b12c9128869 100644
--- a/.github/workflows/release-whl-kernel.yml
+++ b/.github/workflows/release-whl-kernel.yml
@@ -17,13 +17,13 @@ concurrency:
cancel-in-progress: true
jobs:
- build-cu124:
+ build-cu129:
if: github.repository == 'sgl-project/sglang'
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
- cuda-version: ["12.4"]
+ cuda-version: ["12.9"]
steps:
- uses: actions/checkout@v4
with:
@@ -46,14 +46,14 @@ jobs:
pip install twine
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
- build-cu129:
+ build-cu124:
if: github.repository == 'sgl-project/sglang'
- needs: build-cu124
+ needs: build-cu129
runs-on: sgl-kernel-release-node
strategy:
matrix:
python-version: ["3.10"]
- cuda-version: ["12.9"]
+ cuda-version: ["12.4"]
steps:
- uses: actions/checkout@v4
with:
@@ -76,8 +76,8 @@ jobs:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
- release-cu129:
- needs: build-cu129
+ release-cu124:
+ needs: build-cu124
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -114,7 +114,7 @@ jobs:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
- name: Update wheel index
- run: python3 scripts/update_kernel_whl_index.py --cuda 129
+ run: python3 scripts/update_kernel_whl_index.py --cuda 124
- name: Push wheel index
run: |
diff --git a/.github/workflows/vllm-dependency-test.yml b/.github/workflows/vllm-dependency-test.yml
index f4ca4c81613..442d76d500c 100644
--- a/.github/workflows/vllm-dependency-test.yml
+++ b/.github/workflows/vllm-dependency-test.yml
@@ -5,13 +5,13 @@ on:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
pull_request:
branches: [ main ]
paths:
- "python/**"
- - "scripts/**"
+ - "scripts/ci/**"
- "test/**"
concurrency:
@@ -32,7 +32,7 @@ jobs:
bash scripts/ci/ci_install_dependency.sh
pip install "bitsandbytes>=0.44.0"
- pip install "sgl-kernel==0.3.5"
+ pip install "sgl-kernel==0.3.7"
- name: Run vLLM dependency tests
timeout-minutes: 60
diff --git a/.gitignore b/.gitignore
index 3ca76da7111..9725fabd9f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,6 +48,9 @@ coverage.xml
*.cover
*.py,cover
.hypothesis/
+
+# Tokenizer cache for tests
+.tokenizer_cache/
.pytest_cache/
cover/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 346d8adf045..a295f2eb4e1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
hooks:
- id: codespell
additional_dependencies: ['tomli']
- args: ['--toml', 'python/pyproject.toml', '-L', 'cann']
+ args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi,makro,wil,rouge']
exclude: |
(?x)^(
test/srt/test_reasoning_parser\.py|
diff --git a/README.md b/README.md
index d4707509934..451a6d424ef 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@
| [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
## News
+- [2025/08] 🔔 SGLang x AMD SF Meetup on 8/22: Hands-on GPU workshop, tech talks by AMD/xAI/SGLang, and networking ([Roadmap](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_roadmap.pdf), [Large-scale EP](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_ep.pdf), [Highlights](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_highlights.pdf), [AITER/MoRI](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_aiter_mori.pdf), [Wave](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_wave.pdf)).
- [2025/08] 🔥 SGLang provides day-0 support for OpenAI gpt-oss model ([instructions](https://github.com/sgl-project/sglang/issues/8833))
- [2025/06] 🔥 SGLang, the high-performance serving infrastructure powering trillions of tokens daily, has been awarded the third batch of the Open Source AI Grant by a16z ([a16z blog](https://a16z.com/advancing-open-source-ai-through-benchmarks-and-bold-experimentation/)).
- [2025/06] 🔥 Deploying DeepSeek on GB200 NVL72 with PD and Large Scale EP (Part I): 2.7x Higher Decoding Throughput ([blog](https://lmsys.org/blog/2025-06-16-gb200-part-1/)).
diff --git a/benchmark/boolq/README.md b/benchmark/boolq/README.md
new file mode 100644
index 00000000000..3704742eec6
--- /dev/null
+++ b/benchmark/boolq/README.md
@@ -0,0 +1,19 @@
+## Download data
+```
+git clone https://hf-mirror.com/datasets/google/boolq
+```
+
+## Convert parquet to json
+```
+bash parquet_to_json.sh
+```
+## Run benchmark
+
+### Benchmark sglang
+```
+python -m sglang.launch_server --model-path ramblingpolymath/Qwen3-32B-W8A8 --port 30000
+```
+
+```
+python3 bench_sglang.py
+```
diff --git a/benchmark/boolq/bench_sglang.py b/benchmark/boolq/bench_sglang.py
new file mode 100644
index 00000000000..b3ce3c9962a
--- /dev/null
+++ b/benchmark/boolq/bench_sglang.py
@@ -0,0 +1,124 @@
+import argparse
+import json
+import time
+
+import numpy as np
+
+from sglang.api import set_default_backend
+from sglang.test.test_utils import (
+ add_common_sglang_args_and_parse,
+ select_sglang_backend,
+)
+from sglang.utils import read_jsonl
+
+
+def get_example(lines, i, answer):
+ prompt = "Question: " + lines[i]["question"] + lines[i]["passage"] + "\nAnswer:"
+ if answer:
+ prompt += str(lines[i]["answer"])
+ return prompt
+
+
+def few_shot_examples(lines, k):
+ prompts = ""
+ for i in range(k):
+ prompts += get_example(lines, i, True) + "\n\n"
+ return prompts
+
+
+def main(args):
+ # Select backend
+ set_default_backend(select_sglang_backend(args))
+
+ # Read data
+ train_data_path = args.train_data_path
+ test_data_path = args.test_data_path
+ lines_train = list(read_jsonl(train_data_path))
+ lines_test = list(read_jsonl(test_data_path))
+
+ # Construct prompts
+ num_questions = args.num_questions
+ num_shots = args.num_shots
+ few_shots = few_shot_examples(lines_train, num_shots)
+
+ questions = []
+ answer = []
+ for i in range(len(lines_test[:num_questions])):
+ questions.append(get_example(lines_test, i, False))
+ answer.append(str(lines_test[i]["answer"]))
+ arguments = [{"question": q} for q in questions]
+
+ #####################################
+ ######### SGL Program Begin #########
+ #####################################
+
+ import sglang as sgl
+
+ @sgl.function
+ def few_shot_boolq(s, question):
+ s += few_shots + question
+ s += sgl.gen("answer", max_tokens=5, stop=["\n"])
+
+ #####################################
+ ########## SGL Program End ##########
+ #####################################
+
+ # Run requests
+ tic = time.perf_counter()
+ states = few_shot_boolq.run_batch(
+ arguments,
+ temperature=0,
+ num_threads=args.parallel,
+ progress_bar=True,
+ )
+ latency = time.perf_counter() - tic
+
+ preds = []
+ for i in range(len(states)):
+ preds.append(states[i]["answer"])
+
+ # Compute accuracy
+ acc = np.mean(np.array(preds) == np.array(answer))
+
+ # Compute speed
+ num_output_tokens = sum(
+ s.get_meta_info("answer")["completion_tokens"] for s in states
+ )
+ output_throughput = num_output_tokens / latency
+
+ # Print results
+ print(f"Accuracy: {acc:.3f}")
+ print(f"Latency: {latency:.3f} s")
+ print(f"Output throughput: {output_throughput:.3f} token/s")
+
+ # Results
+ with open(args.result_file, "a") as fout:
+ value = {
+ "task": "boolq",
+ "backend": args.backend,
+ "num_gpus": 1,
+ "latency": round(latency, 3),
+ "accuracy": round(acc, 3),
+ "num_requests": args.num_questions,
+ "other": {
+ "num_questions": args.num_questions,
+ "parallel": args.parallel,
+ },
+ }
+ fout.write(json.dumps(value) + "\n")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--num-shots", type=int, default=5)
+ parser.add_argument(
+ "--train-data-path", type=str, default="./boolq/data/train-00000-of-00001.json"
+ )
+ parser.add_argument(
+ "--test-data-path",
+ type=str,
+ default="./boolq/data/validation-00000-of-00001.json",
+ )
+ parser.add_argument("--num-questions", type=int, default=200)
+ args = add_common_sglang_args_and_parse(parser)
+ main(args)
diff --git a/benchmark/boolq/convert_parquet_to_json.py b/benchmark/boolq/convert_parquet_to_json.py
new file mode 100644
index 00000000000..e3e69cb31b2
--- /dev/null
+++ b/benchmark/boolq/convert_parquet_to_json.py
@@ -0,0 +1,28 @@
+import sys
+
+import pyarrow.parquet as pq
+
+
+def convert_parquet_to_json(input_file, output_file):
+ # read parquet file
+ table = pq.read_table(input_file)
+
+ # turn parquet data to dataframe
+ df = table.to_pandas()
+
+ # turn dataframe to json form
+ json_data = df.to_json(orient="records", lines=True)
+
+ # write json to file
+ with open(output_file, "w") as f:
+ f.write(json_data)
+
+
+if __name__ == "__main__":
+ if len(sys.argv) != 3:
+ print("Usage:python convert_parquet_to_json.py ")
+
+ input_file = sys.argv[1]
+ output_file = sys.argv[2]
+
+ convert_parquet_to_json(input_file, output_file)
diff --git a/benchmark/boolq/parquet_to_json.sh b/benchmark/boolq/parquet_to_json.sh
new file mode 100755
index 00000000000..9aaf087ff54
--- /dev/null
+++ b/benchmark/boolq/parquet_to_json.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#define input and output direction
+input_dir="./boolq/data"
+output_dir="./boolq/data"
+
+#define files needed to be handled
+files=(
+ "train-00000-of-00001.parquet"
+ "validation-00000-of-00001.parquet"
+)
+
+#foe files above, use python script to convert the form
+for file in "${files[@]}"; do
+ input_file="${input_dir}/${file}"
+ output_file="${output_dir}/${file%.parquet}.json"
+
+ echo "Converting ${input_file} to ${output_file} ..."
+ python3 convert_parquet_to_json.py "${input_file}" "${output_file}"
+
+ if [ $? -eq 0 ]; then
+ echo "Conversion successful: ${output_file}"
+ else
+ echo "Conversion failed: ${input_file}"
+ fi
+done
diff --git a/benchmark/ceval/README.md b/benchmark/ceval/README.md
new file mode 100644
index 00000000000..b822e43c3b3
--- /dev/null
+++ b/benchmark/ceval/README.md
@@ -0,0 +1,15 @@
+## Download data
+```
+git lfs clone https://huggingface.co/datasets/ceval/ceval-exam
+```
+
+## Run benchmark
+
+### Benchmark sglang
+```
+python -m sglang.launch_server --model-path ramblingpolymath/Qwen3-32B-W8A8 --port 30000
+```
+
+```
+python3 bench_sglang.py
+```
diff --git a/benchmark/ceval/bench_sglang.py b/benchmark/ceval/bench_sglang.py
new file mode 100644
index 00000000000..32ed0baf2e1
--- /dev/null
+++ b/benchmark/ceval/bench_sglang.py
@@ -0,0 +1,138 @@
+import argparse
+import json
+import os
+import random
+import re
+import time
+
+import numpy as np
+from datasets import load_dataset
+
+from sglang.api import set_default_backend
+from sglang.test.test_utils import (
+ add_common_sglang_args_and_parse,
+ select_sglang_backend,
+)
+
+choices = ["A", "B", "C", "D"]
+
+
+def get_one_example(line, include_answer):
+ res = line["question"]
+ res += f"\nA. {line['A']}"
+ res += f"\nB. {line['B']}"
+ res += f"\nC. {line['C']}"
+ res += f"\nD. {line['D']}"
+
+ if include_answer:
+ res += f"\nAnswer: {line['answer']} \n\n"
+ return res
+
+
+def get_few_shot_examples(lines):
+ res = ""
+ for line in lines:
+ res += get_one_example(line, True) + "\n\n"
+ return res
+
+
+def get_answer_value(response):
+ pattern = r"(Answer:|answer:|答案是|答案是:|正确答案是:|答案:|Assistant:)\s*([A-D])(?![\w])"
+ match = re.search(pattern, response)
+
+ if match:
+ return match.group(2)
+
+ return random.choice(choices)
+
+
+def main(args):
+ # Read data && Construct prompts
+ arguments = []
+ labels = []
+ examples = "examples:\n"
+ data_path = args.data_path
+ for subject in os.listdir(data_path):
+ subject_path = os.path.join(data_path, subject)
+ if os.path.isdir(subject_path) and subject != ".git":
+ dataset = load_dataset(data_path, name=subject)
+ dev_lines_temp = dataset["dev"]
+ val_lines_temp = dataset["val"]
+ few_shot_examples = get_few_shot_examples(dev_lines_temp, subject)
+ examples += f"{few_shot_examples}"
+ for val_line in val_lines_temp:
+ arguments.append(
+ {
+ "examples": few_shot_examples,
+ "question": get_one_example(val_line, False),
+ }
+ )
+ labels.append(val_line["answer"])
+
+ #####################################
+ ######### SGL Program Begin #########
+ #####################################
+
+ import sglang as sgl
+
+ @sgl.function
+ def few_shot_ceval(s, examples, question):
+ s += examples + question + sgl.gen("Answer")
+
+ #####################################
+ ########## SGL Program End ##########
+ #####################################
+
+ num_questions = args.num_questions if args.num_questions else len(arguments)
+
+ # Select backend
+ set_default_backend(select_sglang_backend(args))
+
+ # Run requests
+ tic = time.perf_counter()
+ states = few_shot_ceval.run_batch(
+ arguments[:num_questions],
+ temperature=0,
+ num_threads=args.parallel,
+ progress_bar=True,
+ )
+ latency = time.perf_counter() - tic
+
+ preds = [get_answer_value(states[i]["Answer"]) for i in range(num_questions)]
+
+ # Compute accuracy
+ acc = np.mean(np.array(preds) == np.array(labels[:num_questions]))
+
+ # Compute speed
+ num_output_tokens = sum(
+ s.get_meta_info("Answer")["completion_tokens"] for s in states
+ )
+ output_throughput = num_output_tokens / latency
+
+ # Print results
+ print(f"Accuracy: {acc:.3f}")
+ print(f"Latency: {latency:.3f} s")
+ print(f"Output throughput: {output_throughput:.3f} token/s")
+
+ # Write results
+ with open(args.result_file, "a") as fout:
+ value = {
+ "task": "ceval",
+ "backend": args.backend,
+ "num_gpus": 1,
+ "latency": round(latency, 3),
+ "accuracy": round(acc, 3),
+ "num_requests": args.num_questions,
+ "other": {
+ "parallel": args.parallel,
+ },
+ }
+ fout.write(json.dumps(value) + "\n")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--data-path", type=str, default="ceval-exam")
+ parser.add_argument("--num-questions", type=int, default=None)
+ args = add_common_sglang_args_and_parse(parser)
+ main(args)
diff --git a/benchmark/deepseek_v3/README.md b/benchmark/deepseek_v3/README.md
index 44d691cdbf5..53efc23f5b1 100644
--- a/benchmark/deepseek_v3/README.md
+++ b/benchmark/deepseek_v3/README.md
@@ -1,10 +1,10 @@
-# DeepSeek V3 Support
+# DeepSeek V3.1/V3/R1 Support
The SGLang and DeepSeek teams collaborated to get DeepSeek V3 FP8 running on NVIDIA and AMD GPUs **from day one**. SGLang also supports [MLA optimization](https://lmsys.org/blog/2024-09-04-sglang-v0-3/#deepseek-multi-head-latent-attention-mla-throughput-optimizations) and [DP attention](https://lmsys.org/blog/2024-12-04-sglang-v0-4/#data-parallelism-attention-for-deepseek-models), making SGLang one of the best open-source LLM engines for running DeepSeek models. SGLang is the inference engine recommended by the official [DeepSeek team](https://github.com/deepseek-ai/DeepSeek-V3/tree/main?tab=readme-ov-file#62-inference-with-sglang-recommended).
Special thanks to Meituan's Search & Recommend Platform Team and Baseten's Model Performance Team for implementing the model, and DataCrunch for providing GPU resources.
-For optimizations made on the DeepSeek series models regarding SGLang, please refer to [DeepSeek Model Optimizations in SGLang](https://docs.sglang.ai/references/deepseek.html).
+For optimizations made on the DeepSeek series models regarding SGLang, please refer to [DeepSeek Model Optimizations in SGLang](https://docs.sglang.ai/basic_usage/deepseek.html).
## Installation & Launch
@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
```bash
# Installation
-pip install "sglang[all]>=0.5.0rc2"
+pip install "sglang[all]>=0.5.2"
# Launch
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code
@@ -50,7 +50,9 @@ Add [performance optimization options](#performance-optimization-options) as nee
- [Data Parallelism Attention](https://lmsys.org/blog/2024-12-04-sglang-v0-4/#data-parallelism-attention-for-deepseek-models): For high QPS scenarios, add the `--enable-dp-attention` argument to boost throughput.
- [Torch.compile Optimization](https://lmsys.org/blog/2024-09-04-sglang-v0-3/#torchcompile-latency-optimizations): Add `--enable-torch-compile` argument to enable it. This will take some time while server starts. The maximum batch size for torch.compile optimization can be controlled with `--torch-compile-max-bs`. It's recommended to set it between `1` and `8`. (e.g., `--torch-compile-max-bs 8`)
-### Example: Sending requests with OpenAI API
+### Usage: Chat with DeepSeek
+
+#### DeepSeek V3/R1
```python3
import openai
@@ -70,6 +72,82 @@ response = client.chat.completions.create(
print(response)
```
+#### DeepSeek V3.1
+On top of the basic usage similar to the DeepSeek V3/R1 example, DeepSeek V3.1 supports a request-level thinking/non-thinking toggle. Simply switch the `"thinking"` field in `extra_body={"chat_template_kwargs": {"thinking": True}}` to enable/disable the thinking mode.
+
+##### Non Thinking
+```python3
+import openai
+client = openai.Client(
+ base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
+
+# Chat completion
+response = client.chat.completions.create(
+ model="default",
+ messages=[
+ {"role": "system", "content": "You are a helpful AI assistant"},
+ {"role": "user", "content": "Answer the following with the second letter of the correct answer only: What is the capital of France?"},
+ ],
+ temperature=0,
+ max_tokens=1024,
+ extra_body = {"chat_template_kwargs": {"thinking": False}}
+)
+print(response.choices[0].message.content)
+```
+Answer:
+```
+h
+```
+* The correct response should be 'A', as the correct answer to the question is 'Paris'.
+##### Thinking
+```python3
+import openai
+client = openai.Client(
+ base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
+
+# Chat completion
+response = client.chat.completions.create(
+ model="default",
+ messages=[
+ {"role": "system", "content": "You are a helpful AI assistant"},
+ {"role": "user", "content": "Answer the following with the second letter of the correct answer only: What is the capital of France?"},
+ ],
+ temperature=0,
+ max_tokens=1024,
+ extra_body = {"chat_template_kwargs": {"thinking": True}}
+)
+print(response)
+```
+Answer:
+```
+First, the question is: "What is the capital of France?" I know that the capital of France is Paris.
+
+The user says: "Answer the following with the second letter of the correct answer only." So, I need to provide only the second letter of the correct answer.
+
+The correct answer is "Paris". Now, I need to find the second letter of "Paris".
+
+Let's spell it out: P-A-R-I-S.
+
+- First letter: P
+
+- Second letter: A
+
+- Third letter: R
+
+- Fourth letter: I
+
+- Fifth letter: S
+
+So, the second letter is "A".
+
+I should only output the second letter, which is "A". No additional text or explanation, just the letter.
+
+The user emphasized "the second letter of the correct answer only", so my response should be just "A".
+
+Finally, I need to make sure that this is the correct answer. Yes, Paris is indeed the capital of France.A
+```
+* The response contains `` thinking trace and model was able to derive the correct answer from it.
+
### Example: Serving with two H20\*8 nodes
For example, there are two H20 nodes, each with 8 GPUs. The first node's IP is `10.0.0.1`, and the second node's IP is `10.0.0.2`. Please **use the first node's IP** for both commands.
diff --git a/benchmark/gpt_oss/README.md b/benchmark/gpt_oss/README.md
new file mode 100644
index 00000000000..4d1b00e9134
--- /dev/null
+++ b/benchmark/gpt_oss/README.md
@@ -0,0 +1,163 @@
+# How to reproduce the result of GPT-OSS with SGLang
+
+### Install the latest SGLang
+
+```bash
+git clone https://github.com/sgl-project/sglang.git
+cd sglang
+git checkout v0.5.1.post3
+
+pip install --upgrade pip
+pip install -e "python[all]"
+```
+
+### Reproduce the benchmark throughput result (Batch Size 1)
+
+Launch Command
+
+```bash
+# MXFP4 120B on H100
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --tp 8 --attention-backend triton
+
+# BF16 120B on H100
+python3 -m sglang.launch_server --model lmsys/gpt-oss-120b-bf16 --tp 8 --attention-backend triton
+
+# MXFP4 120B on B200
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --tp 4
+
+# BF16 120B on B200
+python3 -m sglang.launch_server --model lmsys/gpt-oss-120b-bf16 --tp 4
+```
+
+Benchmark Command
+
+```bash
+
+# MXFP4 120B on H100
+python3 -m sglang.bench_one_batch_server --model openai/gpt-oss-120b --base-url http://localhost:30000 --batch-size 1 --input-len 1024 --output-len 512 --show-report
+```
+
+### Reproduce the benchmark throughput result (Batch Size 32)
+
+Launch Command
+
+```bash
+# MXFP4 120B on H100
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --tp 8
+
+# BF16 120B on H100
+python3 -m sglang.launch_server --model lmsys/gpt-oss-120b-bf16 --tp 8
+
+# MXFP4 120B on B200
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --tp 4
+
+# BF16 120B on B200
+python3 -m sglang.launch_server --model lmsys/gpt-oss-120b-bf16 --tp 4
+```
+
+Benchmark Command
+
+```bash
+python3 -m sglang.bench_one_batch_server --model openai/gpt-oss-120b --base-url http://localhost:30000 --batch-size 32 --input-len 1024 8192 --output-len 512 --show-report
+```
+
+### Reproduce the evaluation result
+
+Install gpt-oss
+
+```bash
+git clone https://github.com/openai/gpt-oss.git
+cd gpt-oss
+pip install -e .
+```
+
+Evaluation Command
+
+```bash
+DATASET=gpqa
+BASE_URL=YOUR_BASE_URL
+OPENAI_API_KEY=dummy python -m gpt_oss.evals \
+ --base-url ${BASE_URL}/v1 \
+ --model dummy \
+ --reasoning-effort low,medium,high \
+ --eval $DATASET \
+ --n-threads 1000
+```
+
+### Reproduce the benchmark result of acceptance length
+> Note: On B200, if top k is 1, set `--attention-backend trtllm_mha`
+```bash
+git clone https://github.com/sgl-project/SpecForge.git
+cd SpecForge/benchmarks
+config_list=(
+ "1,0,0,0"
+ "1,3,1,4"
+ "1,5,4,8"
+)
+python3 bench_model_speedup.py \
+ --model-path openai/gpt-oss-120b \
+ --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 \
+ --port 20001 \
+ --trust-remote-code \
+ --mem-fraction-static 0.8 \
+ --tp-size 4 \
+ --attention-backend fa3 \
+ --config-list "${config_list[@]}" \
+ --benchmark-list mtbench:80 gsm8k:200 humaneval:200 math500:200 \
+ --output lmsys_gpt-oss-120b_Eagle3_result.jsonl
+
+python3 bench_model_speedup.py \
+ --model-path openai/gpt-oss-120b \
+ --speculative-draft-model-path nvidia/gpt-oss-120b-Eagle3 \
+ --port 20001 \
+ --trust-remote-code \
+ --mem-fraction-static 0.8 \
+ --tp-size 4 \
+ --attention-backend fa3 \
+ --config-list "${config_list[@]}" \
+ --benchmark-list mtbench:80 gsm8k:200 humaneval:200 math500:200 \
+ --output nv_gpt-oss-120b_Eagle3_result.jsonl
+```
+
+### Reproduce the result of speculative decoding speedup
+
+Launch Command
+
+```bash
+# On Hopper:
+# - Tree decoding (topk > 1) and chain decoding (topk = 1) are supported on both FA3 and Triton backends.
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algorithm EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algorithm EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --tp 4
+
+# On Blackwell:
+# - Chain decoding (topk = 1) is supported on TRTLLM-MHA backend. Tree decoding (topk > 1) is in progress, stay tuned!
+# - Both tree decoding (topk > 1) and chain decoding (topk = 1) are supported on the Triton backend.
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
+python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4
+```
+
+Benchmark Command
+
+```bash
+config_list=(
+ "1,0,0,0"
+ "1,3,1,4"
+ "1,5,4,8"
+)
+python3 bench_model_speedup.py \
+ --model-path openai/gpt-oss-120b \
+ --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 \
+ --port 20001 \
+ --trust-remote-code \
+ --mem-fraction-static 0.8 \
+ --tp-size 4 \
+ --attention-backend fa3 \
+ --config-list "${config_list[@]}" \
+ --benchmark-list gsm8k:200 humaneval:200 math500:200 \
+ --output lmsys_gpt-oss-120b_Eagle3_result.jsonl
+```
+
+We can gain the best speedup with the following settings:
+
+- **1.39x** speedup with the `--speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4` setting.
+- **1.52x** speedup with the `--speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8` setting.
diff --git a/benchmark/hf3fs/bench.sh b/benchmark/hf3fs/bench.sh
index bb1bbcd3228..049116b892d 100644
--- a/benchmark/hf3fs/bench.sh
+++ b/benchmark/hf3fs/bench.sh
@@ -1,6 +1,16 @@
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.12/dist-packages:/usr/local/lib/python3.12/dist-packages/torch/lib
+python3 benchmark/hf3fs/bench_client.py
+
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.12/dist-packages:/usr/local/lib/python3.12/dist-packages/torch/lib
SGLANG_HICACHE_HF3FS_CONFIG_PATH=/sgl-workspace/sglang/benchmark/hf3fs/hf3fs.json \
python3 benchmark/hf3fs/bench_storage.py
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.12/dist-packages:/usr/local/lib/python3.12/dist-packages/torch/lib
+export SGLANG_HICACHE_HF3FS_CONFIG_PATH=/sgl-workspace/sglang/benchmark/hf3fs/hf3fs.json
+echo '{"file_path_prefix": "/data/hf3fs-test-0", "file_size": 1099511627776, "numjobs": 16, "entries": 8}' > \
+${SGLANG_HICACHE_HF3FS_CONFIG_PATH}
+python3 benchmark/hf3fs/bench_zerocopy.py
+
####################################################################################################
rm -rf nohup.out && \
diff --git a/benchmark/hf3fs/bench_storage.py b/benchmark/hf3fs/bench_storage.py
index 4e96c8ec937..f0ce171bf67 100644
--- a/benchmark/hf3fs/bench_storage.py
+++ b/benchmark/hf3fs/bench_storage.py
@@ -8,6 +8,9 @@
import torch
from tqdm import tqdm
+from sglang.srt.mem_cache.storage.hf3fs.mini_3fs_metadata_server import (
+ Hf3fsLocalMetadataClient,
+)
from sglang.srt.mem_cache.storage.hf3fs.storage_hf3fs import HiCacheHF3FS
@@ -54,9 +57,7 @@ def test():
)
except Exception as e:
raise RuntimeError(f"Failed to dump config to {config_path}: {str(e)}")
-
- rank = 0
- hicache_hf3fs = HiCacheHF3FS.from_env_config(rank, bytes_per_page, dtype)
+ hicache_hf3fs = HiCacheHF3FS.from_env_config(bytes_per_page, dtype)
numel = 2 * tokens_per_page * layer_num * head_num * head_dim
assert numel * dtype.itemsize == bytes_per_page
@@ -67,12 +68,15 @@ def test():
k = f"key_{i}"
v = torch.randn((numel,)).to(dtype=dtype)
ok = hicache_hf3fs.set(k, v)
- assert ok, f"Failed to insert {k}"
+ if i < (file_size // bytes_per_page):
+ assert ok, f"Failed to insert {k}"
+ else:
+ assert not ok
tensors[k] = v
- assert hicache_hf3fs.get("key_0") is None
- assert hicache_hf3fs.get("key_1") is None
+ assert hicache_hf3fs.get("key_8") is None
+ assert hicache_hf3fs.get("key_9") is None
- start = num_pages - hicache_hf3fs.num_pages
+ start = 0
for i in range(start, start + hicache_hf3fs.num_pages):
k = f"key_{i}"
assert hicache_hf3fs.exists(k)
@@ -83,13 +87,16 @@ def test():
assert not hicache_hf3fs.exists("not_exists")
- hicache_hf3fs.delete("key_9")
+ hicache_hf3fs.delete("key_7")
v2 = torch.randn((numel,)).to(dtype=dtype)
assert hicache_hf3fs.set("key_new", v2)
assert torch.allclose(hicache_hf3fs.get("key_new"), v2, atol=1e-3)
hicache_hf3fs.clear()
- assert len(hicache_hf3fs.free_pages) == hicache_hf3fs.num_pages
+ assert (
+ len(hicache_hf3fs.metadata_client.rank_metadata.free_pages)
+ == hicache_hf3fs.metadata_client.rank_metadata.num_pages
+ )
# batch
num_pages = 10
@@ -134,12 +141,14 @@ def bench():
entries = 8
dtype = store_dtype
hicache_hf3fs = HiCacheHF3FS(
+ rank=0,
file_path=file_path,
file_size=file_size,
numjobs=numjobs,
bytes_per_page=bytes_per_page,
entries=entries,
dtype=dtype,
+ metadata_client=Hf3fsLocalMetadataClient(),
)
numel = 2 * tokens_per_page * layer_num * head_num * head_dim
@@ -167,7 +176,10 @@ def bench():
r_bw = []
r_size = num_page * bytes_per_page / (1 << 30)
for i in tqdm(range(warmup + iteration), desc="Benchmarking read (GB/s)"):
- keys = random.sample(list(hicache_hf3fs.key_to_index.keys()), num_page)
+ keys = random.sample(
+ list(hicache_hf3fs.metadata_client.rank_metadata.key_to_index.keys()),
+ num_page,
+ )
tik = time.perf_counter()
results = hicache_hf3fs.batch_get(keys)
tok = time.perf_counter()
@@ -195,12 +207,14 @@ def allclose():
entries = 8
dtype = store_dtype
hicache_hf3fs = HiCacheHF3FS(
+ rank=0,
file_path=file_path,
file_size=file_size,
numjobs=numjobs,
bytes_per_page=bytes_per_page,
entries=entries,
dtype=dtype,
+ metadata_client=Hf3fsLocalMetadataClient(),
)
numel = 2 * tokens_per_page * layer_num * head_num * head_dim
@@ -218,7 +232,10 @@ def allclose():
read_keys, read_results = [], []
for i in tqdm(range(iteration), desc="Benchmarking read (GB/s)"):
- keys = random.sample(list(hicache_hf3fs.key_to_index.keys()), num_page)
+ keys = random.sample(
+ list(hicache_hf3fs.metadata_client.rank_metadata.key_to_index.keys()),
+ num_page,
+ )
results = hicache_hf3fs.batch_get(keys)
read_keys.extend(keys)
read_results.extend(results)
diff --git a/benchmark/hf3fs/bench_zerocopy.py b/benchmark/hf3fs/bench_zerocopy.py
new file mode 100644
index 00000000000..bfa7bff0e60
--- /dev/null
+++ b/benchmark/hf3fs/bench_zerocopy.py
@@ -0,0 +1,140 @@
+import threading
+import time
+
+import torch
+from tqdm import tqdm
+
+from sglang.srt.distributed import (
+ get_world_group,
+ init_distributed_environment,
+ initialize_model_parallel,
+)
+from sglang.srt.managers.cache_controller import (
+ HiCacheController,
+ PrefetchOperation,
+ StorageOperation,
+)
+from sglang.srt.mem_cache.allocator import TokenToKVPoolAllocator
+from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool
+from sglang.srt.mem_cache.memory_pool_host import MHATokenToKVPoolHost
+
+init_distributed_environment(
+ world_size=1,
+ rank=0,
+ distributed_init_method="tcp://127.0.0.1:23456",
+ local_rank=0,
+ backend="gloo",
+)
+
+initialize_model_parallel(
+ tensor_model_parallel_size=1,
+ pipeline_model_parallel_size=1,
+)
+
+group = get_world_group().cpu_group
+
+max_total_num_tokens = 524288
+page_size = 64
+kv_cache_dtype = torch.bfloat16
+layer_num = 64
+head_num, head_dim = 8, 128
+device = "cuda"
+hicache_ratio = 2
+hicache_size = 0
+hicache_mem_layout = "page_first"
+# hicache_mem_layout = "layer_first"
+hicache_write_policy = "write_through"
+hicache_io_backend = "kernel"
+hicache_storage_backend = "hf3fs"
+prefetch_threshold = 256
+
+op_size = 1024
+op_num = 16
+
+token_to_kv_pool = MHATokenToKVPool(
+ max_total_num_tokens,
+ page_size=page_size,
+ dtype=kv_cache_dtype,
+ head_num=head_num,
+ head_dim=head_dim,
+ layer_num=layer_num,
+ device=device,
+ enable_memory_saver=True,
+)
+
+token_to_kv_pool_allocator = TokenToKVPoolAllocator(
+ max_total_num_tokens,
+ dtype=kv_cache_dtype,
+ device=device,
+ kvcache=token_to_kv_pool,
+ need_sort=False,
+)
+
+kv_cache = token_to_kv_pool_allocator.get_kvcache()
+token_to_kv_pool_host = MHATokenToKVPoolHost(
+ kv_cache,
+ hicache_ratio,
+ hicache_size,
+ page_size,
+ hicache_mem_layout,
+)
+
+load_cache_event = threading.Event()
+cache_controller = HiCacheController(
+ token_to_kv_pool_allocator,
+ token_to_kv_pool_host,
+ page_size,
+ group,
+ load_cache_event=load_cache_event,
+ write_policy=hicache_write_policy,
+ io_backend=hicache_io_backend,
+ storage_backend=hicache_storage_backend,
+ prefetch_threshold=prefetch_threshold,
+)
+
+operations = [
+ StorageOperation(
+ torch.tensor(list(range(i, i + op_size))),
+ list(range(i, i + op_size)),
+ hash_value=[f"{j}" for j in range(i, i + op_size, page_size)],
+ )
+ for i in tqdm(range(0, op_num * op_size, op_size))
+]
+
+tik = time.monotonic()
+if hicache_mem_layout == "page_first":
+ for operation in operations:
+ cache_controller.zerocopy_page_backup(operation, batch_size=128)
+elif hicache_mem_layout == "layer_first":
+ for operation in operations:
+ cache_controller.generic_page_backup(operation, batch_size=128)
+tok = time.monotonic()
+print(f"{tok-tik:.6f} s")
+
+operations = [
+ PrefetchOperation(
+ f"{i}",
+ torch.tensor(list(range(i, i + op_size))),
+ list(range(i, i + op_size)),
+ f"{i}",
+ )
+ for i in tqdm(range(0, op_num * op_size, op_size))
+]
+
+for operation in operations:
+ operation.hash_value = [
+ f"{j}"
+ for j in range(
+ int(operation.last_hash), int(operation.last_hash) + op_size, page_size
+ )
+ ]
+
+tik = time.monotonic()
+if hicache_mem_layout == "page_first":
+ for operation in operations:
+ cache_controller.zerocopy_page_transfer(operation, batch_size=128)
+elif hicache_mem_layout == "layer_first":
+ for operation in operations:
+ cache_controller.generic_page_transfer(operation, batch_size=128)
+tok = time.monotonic()
+print(f"{tok-tik:.6f} s")
diff --git a/benchmark/hicache/bench_long_context.py b/benchmark/hicache/bench_long_context.py
index dc153b8a931..eed0ae5dc2d 100644
--- a/benchmark/hicache/bench_long_context.py
+++ b/benchmark/hicache/bench_long_context.py
@@ -31,9 +31,10 @@ def __init__(self, args):
self.completed_requests = 0
self.dataset = json.load(open(args.dataset_path))
+ num_requests = min(args.num_clients, len(self.dataset["queries"]))
init_requests = []
- for i in range(min(args.num_clients, len(self.dataset["queries"]))):
+ for i in range(num_requests):
context_id = self.dataset["queries"][i]["context"]
init_requests.append(
(
@@ -52,13 +53,14 @@ def __init__(self, args):
self.ready_queue = ReadyQueue(init_requests=init_requests)
self.response_queue = queue.Queue()
- self.pbar = tqdm(total=args.num_clients * args.num_rounds)
+ self.pbar = tqdm(total=num_requests)
self.performance_metrics = {
"ttft": [],
"latency": [],
"itl": [],
"prompt_len": [],
"cached_tokens": [],
+ "generated_len": [],
}
self.max_parallel = args.max_parallel
@@ -75,6 +77,9 @@ def response_handler(self):
self.performance_metrics["ttft"].append(response.ttft)
self.performance_metrics["itl"].extend(response.itl)
self.performance_metrics["latency"].append(response.latency)
+ self.performance_metrics["prompt_len"].append(response.prompt_len)
+ self.performance_metrics["cached_tokens"].append(response.cached_tokens)
+ self.performance_metrics["generated_len"].append(response.generated_len)
self.completed_requests += 1
except queue.Empty:
@@ -85,7 +90,7 @@ def response_handler(self):
if __name__ == "__main__":
args = parse_args()
args.num_rounds = 1
- args.max_parallel = 128
+ args.max_parallel = 24
flush_cache_url = f"http://{args.host}:{args.port}/flush_cache"
for request_rate in [24, 16, 12, 8, 4, 2, 1]:
diff --git a/benchmark/hicache/bench_mix.py b/benchmark/hicache/bench_mix.py
new file mode 100644
index 00000000000..cfd25bc4003
--- /dev/null
+++ b/benchmark/hicache/bench_mix.py
@@ -0,0 +1,567 @@
+import argparse
+import asyncio
+import json
+import logging
+import os
+import queue
+import random
+import threading
+import time
+from dataclasses import dataclass
+from functools import wraps
+
+import aiohttp
+
+from sglang.bench_serving import (
+ RequestFuncOutput,
+ get_tokenizer,
+ remove_prefix,
+ sample_random_requests,
+)
+
+# Set up logger
+logger = logging.getLogger(__name__)
+
+# Set up JSONL file for debug logging
+debug_log_file = None
+# Create a lock for thread-safe debug log writing
+debug_log_lock = threading.Lock()
+
+
+def write_debug_log(data):
+ global debug_log_file
+
+ """Write debug information to a JSONL file"""
+ if debug_log_file is None:
+ return
+
+ # Acquire lock for thread-safe writing
+ with debug_log_lock:
+ # Write as JSONL (JSON Line format)
+ debug_log_file.write(json.dumps(data) + "\n")
+ debug_log_file.flush()
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="Script to benchmark concurrent requests to a server."
+ )
+ parser.add_argument(
+ "--model-path",
+ type=str,
+ default="/data/models/Qwen3-0.6B",
+ help="model path compatible with Hugging Face Transformers",
+ )
+ parser.add_argument(
+ "--dataset-path",
+ type=str,
+ default="/data/models/ShareGPT_V3_unfiltered_cleaned_split/ShareGPT_V3_unfiltered_cleaned_split.json",
+ help="local dataset to sample tokens from",
+ )
+ parser.add_argument(
+ "--host",
+ type=str,
+ default="localhost",
+ help="Server hostname or IP (default: localhost)",
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=30000,
+ help="Server port (default: 30000)",
+ )
+ parser.add_argument(
+ "--duration",
+ type=int,
+ default=600,
+ help="Duration to run the benchmark in seconds (default: 300 seconds)",
+ )
+ parser.add_argument(
+ "--log-level",
+ type=str,
+ default="info",
+ choices=["debug", "info"],
+ help="Set the logging level (default: info)",
+ )
+ parser.add_argument(
+ "--debug-log-file",
+ type=str,
+ default="debug.log.jsonl",
+ help="File to write debug logs in JSONL format",
+ )
+ return parser.parse_args()
+
+
+def load_config():
+ config_path = os.getenv("CONFIG_PATH")
+ if not config_path:
+ raise ValueError("Environment variable 'CONFIG_PATH' is not set.")
+
+ with open(config_path, "r") as f:
+ config = json.load(f)
+
+ required_keys = [
+ "num_rounds",
+ "num_clients",
+ "round_ratios",
+ "mean_new_tokens_per_round",
+ "mean_return_tokens_per_round",
+ "mean_inter_round_interval",
+ ]
+
+ for key in required_keys:
+ if key not in config:
+ raise KeyError(f"Missing required configuration key: {key}")
+
+ num_rounds = config["num_rounds"]
+ assert len(config["round_ratios"]) == num_rounds
+ assert len(config["mean_new_tokens_per_round"]) == num_rounds
+ assert len(config["mean_return_tokens_per_round"]) == num_rounds
+ assert len(config["mean_inter_round_interval"]) == num_rounds
+
+ print(config)
+
+ return config
+
+
+@dataclass
+class UserData:
+ user_id: int
+ current_round: int
+ total_rounds: int
+ prompt: str
+ return_tokens: int
+ start: int
+
+
+def synchronized():
+ def _decorator(func):
+ @wraps(func)
+ def wrapper(self, *args, **kwargs):
+ with self.lock:
+ return func(self, *args, **kwargs)
+
+ return wrapper
+
+ return _decorator
+
+
+class UserGenerator:
+ def __init__(self, config, model_path, dataset_path):
+ self.tokenizer_path = model_path
+ self.tokenizer = get_tokenizer(self.tokenizer_path)
+ self.dataset_path = dataset_path
+
+ self.user_id = 0
+ self.lock = threading.Lock()
+
+ self.num_rounds = config["num_rounds"]
+
+ self.cumulative_ratios = [
+ sum(config["round_ratios"][: i + 1])
+ for i in range(len(config["round_ratios"]))
+ ]
+ self.mean_new_tokens_per_round = config["mean_new_tokens_per_round"]
+ self.mean_return_tokens_per_round = config["mean_return_tokens_per_round"]
+ self.mean_inter_round_interval = config["mean_inter_round_interval"]
+
+ self.sigma = 100
+ self.range_ratio = 0.8
+ assert self.range_ratio <= 1
+
+ self.candidate_inputs = [
+ [
+ r
+ for r in sample_random_requests(
+ input_len=(
+ self.mean_new_tokens_per_round[i] * (2 - self.range_ratio)
+ ),
+ output_len=(
+ self.mean_return_tokens_per_round[i] * (2 - self.range_ratio)
+ ),
+ num_prompts=config["num_clients"],
+ range_ratio=self.range_ratio / (2 - self.range_ratio),
+ tokenizer=self.tokenizer,
+ dataset_path=self.dataset_path,
+ random_sample=False,
+ )
+ ]
+ for i in range(self.num_rounds)
+ ]
+
+ self.multiturn_queue = []
+
+ self.user_stats = [0 for _ in range(self.num_rounds)]
+ self.input_stats = [[0, 0] for _ in range(self.num_rounds)]
+ self.output_stats = [[0, 0] for _ in range(self.num_rounds)]
+
+ def gen(self):
+ user_id = self.user_id
+ self.user_id += 1
+
+ rand_ratio = random.randint(0, self.cumulative_ratios[-1])
+ i = len(self.cumulative_ratios)
+ for idx, cumulative_ratio in enumerate(self.cumulative_ratios):
+ if rand_ratio >= cumulative_ratio:
+ continue
+ else:
+ i = idx + 1
+ break
+ total_rounds = i
+ current_round = 0
+
+ candidate_input = random.sample(self.candidate_inputs[current_round], 1)[0]
+ self.input_stats[0][0] += candidate_input.prompt_len
+ self.input_stats[0][1] += 1
+ prompt = f"{user_id} " + candidate_input.prompt
+ return_tokens = int(
+ random.gauss(self.mean_return_tokens_per_round[current_round], self.sigma)
+ )
+ if return_tokens <= 0:
+ return_tokens = self.mean_return_tokens_per_round[current_round]
+ start = 0
+
+ user_data = UserData(
+ user_id, current_round, total_rounds, prompt, return_tokens, start
+ )
+
+ self.user_stats[total_rounds - 1] += 1
+
+ return user_data
+
+ @synchronized()
+ def push(self, user_data, generated_text, len_itl):
+ self.output_stats[user_data.current_round][0] += len_itl + 1
+ self.output_stats[user_data.current_round][1] += 1
+ user_data.current_round += 1
+ if user_data.current_round >= user_data.total_rounds:
+ return
+
+ candidate_input = random.sample(
+ self.candidate_inputs[user_data.current_round], 1
+ )[0]
+ self.input_stats[user_data.current_round][0] += candidate_input.prompt_len
+ self.input_stats[user_data.current_round][1] += 1
+ user_data.prompt += generated_text + candidate_input.prompt
+ user_data.return_tokens = int(
+ random.gauss(
+ self.mean_return_tokens_per_round[user_data.current_round], self.sigma
+ )
+ )
+ if user_data.return_tokens <= 0:
+ user_data.return_tokens = self.mean_return_tokens_per_round[
+ user_data.current_round
+ ]
+ interval = random.gauss(
+ self.mean_inter_round_interval[user_data.current_round], self.sigma
+ )
+ if interval <= 0:
+ interval = self.mean_inter_round_interval[user_data.current_round]
+ user_data.start = time.perf_counter() + interval
+
+ if len(self.multiturn_queue) == 0:
+ self.multiturn_queue.append(user_data)
+ else:
+ i = len(self.multiturn_queue)
+ for idx, d in enumerate(self.multiturn_queue):
+ if user_data.start < d.start:
+ i = idx
+ break
+ self.multiturn_queue.insert(idx, user_data)
+
+ @synchronized()
+ def pop(self):
+ if (
+ len(self.multiturn_queue)
+ and time.perf_counter() > self.multiturn_queue[0].start
+ ):
+ return self.multiturn_queue.pop(0)
+ return self.gen()
+
+
+def gen_payload(prompt, output_len):
+ payload = {
+ "text": prompt,
+ "sampling_params": {
+ "temperature": 0.0,
+ "max_new_tokens": output_len,
+ "ignore_eos": True,
+ },
+ "stream": True,
+ "stream_options": {"include_usage": True},
+ "lora_path": "",
+ "return_logprob": False,
+ "logprob_start_len": -1,
+ }
+ return payload
+
+
+AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=20 * 60 * 60)
+
+
+async def async_request_sglang_generate(
+ user_data,
+ url,
+ atomic_counter,
+):
+ """
+ Sends a streaming request to the server. Gathers text token-by-token.
+ """
+ async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
+ headers = {}
+ generated_text = ""
+ ttft = 0.0
+ st = time.perf_counter()
+ most_recent_timestamp = st
+ output = RequestFuncOutput()
+ payload = gen_payload(user_data.prompt, user_data.return_tokens)
+ write_debug_log({"timestamp": st, "user_data": user_data.__dict__})
+
+ try:
+ async with session.post(url=url, json=payload, headers=headers) as response:
+ if response.status == 200:
+ prompt_tokens = 0
+ cached_tokens = 0
+ async for chunk_bytes in response.content:
+ chunk_bytes = chunk_bytes.strip()
+ if not chunk_bytes:
+ continue
+
+ chunk = remove_prefix(chunk_bytes.decode("utf-8"), "data: ")
+ latency = time.perf_counter() - st
+ if chunk == "[DONE]":
+ pass
+ else:
+ data = json.loads(chunk)
+
+ if data.get("text"):
+ timestamp = time.perf_counter()
+ # First token
+ if ttft == 0.0:
+ ttft = time.perf_counter() - st
+ output.ttft = ttft
+ prompt_tokens = (data.get("meta_info") or {}).get(
+ "prompt_tokens", 0
+ )
+ cached_tokens = (data.get("meta_info") or {}).get(
+ "cached_tokens", 0
+ )
+
+ # Decoding phase
+ else:
+ output.itl.append(timestamp - most_recent_timestamp)
+
+ most_recent_timestamp = timestamp
+ generated_text = data["text"]
+
+ output.generated_text = generated_text
+ output.success = True
+ output.latency = latency
+ output.prompt_len = prompt_tokens
+ output.cached_tokens = cached_tokens
+ else:
+ output.error = response.reason or ""
+ output.success = False
+ except Exception as e:
+ output.success = False
+ output.error = str(e)
+ print(f"Request failed: {e}")
+
+ atomic_counter.increment(1)
+ return output
+
+
+class AtomicCounter:
+ def __init__(self, initial_value=0):
+ self._value = initial_value
+ self.lock = threading.Lock()
+
+ @synchronized()
+ def increment(self, amount=1):
+ self._value += amount
+
+ @synchronized()
+ def get(self):
+ return self._value
+
+
+class WorkloadGenerator:
+ def __init__(self, args):
+ config = load_config()
+ user_generator = UserGenerator(
+ config,
+ args.model_path,
+ args.dataset_path,
+ )
+
+ self.url = f"http://{args.host}:{args.port}/generate"
+
+ self.tokenizer = user_generator.tokenizer
+ self.start_time = None
+ self.finished_time = None
+ self.duration = args.duration
+ self.done = False
+
+ self.sent_requests = 0
+ self.completed_requests = 0
+
+ self.user_generator = user_generator
+ self.response_queue = queue.Queue()
+ self.performance_metrics = {
+ "ttft": [],
+ "latency": [],
+ "prompt_len": [],
+ "cached_tokens": [],
+ }
+ self.max_parallel = config["num_clients"]
+
+ self.atomic_counter = AtomicCounter()
+
+ async def handle_request(self, user_data):
+ try:
+ response = await async_request_sglang_generate(
+ user_data, self.url, self.atomic_counter
+ )
+ self.response_queue.put((user_data, response))
+ except Exception as e:
+ print(f"Request failed: {e}")
+ self.completed_requests += 1
+
+ def request_sender(self):
+ async def request_loop():
+ while True:
+ if self.sent_requests - self.completed_requests < self.max_parallel:
+ new_request = self.user_generator.pop()
+ if new_request:
+ asyncio.create_task(self.handle_request(new_request))
+ self.sent_requests += 1
+ else:
+ await asyncio.sleep(0.05)
+ continue
+
+ if time.perf_counter() - self.start_time > self.duration:
+ self.done = True
+ break
+
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ loop.run_until_complete(request_loop())
+ loop.close()
+
+ def response_handler(self):
+ while True:
+ try:
+ user_data, response = self.response_queue.get(timeout=10)
+ logger.info(
+ f"{((time.perf_counter()-self.start_time)/self.duration*100):.2f}%"
+ )
+ if not response.success:
+ raise ValueError(f"Request failed with error: {response.error}")
+
+ self.user_generator.push(
+ user_data, response.generated_text, len(response.itl)
+ )
+ self.performance_metrics["ttft"].append(response.ttft)
+ self.performance_metrics["latency"].append(response.latency)
+ self.performance_metrics["prompt_len"].append(response.prompt_len)
+ self.performance_metrics["cached_tokens"].append(response.cached_tokens)
+ self.completed_requests += 1
+ self.finished_time = time.perf_counter()
+
+ except queue.Empty:
+ if self.done:
+ break
+ except ValueError as e:
+ print(f"Error processing response for client {user_data}: {e}")
+ continue
+
+ def run(self):
+ request_thread = threading.Thread(target=self.request_sender, daemon=True)
+ response_thread = threading.Thread(target=self.response_handler, daemon=True)
+
+ self.start_time = time.perf_counter()
+ request_thread.start()
+ response_thread.start()
+
+ request_thread.join()
+ response_thread.join()
+
+ performance_data = {
+ "summary": {
+ "total_requests": len(self.performance_metrics["ttft"]),
+ "average_ttft": sum(self.performance_metrics["ttft"])
+ / len(self.performance_metrics["ttft"]),
+ "p90_ttft": sorted(self.performance_metrics["ttft"])[
+ int(0.9 * len(self.performance_metrics["ttft"]))
+ ],
+ "median_ttft": sorted(self.performance_metrics["ttft"])[
+ len(self.performance_metrics["ttft"]) // 2
+ ],
+ "average_latency": sum(self.performance_metrics["latency"])
+ / len(self.performance_metrics["latency"]),
+ "p90_latency": sorted(self.performance_metrics["latency"])[
+ int(0.9 * len(self.performance_metrics["latency"]))
+ ],
+ "median_latency": sorted(self.performance_metrics["latency"])[
+ len(self.performance_metrics["latency"]) // 2
+ ],
+ "throughput": self.atomic_counter.get()
+ / (self.finished_time - self.start_time),
+ "cache_hit_rate": (
+ 0
+ if sum(self.performance_metrics["prompt_len"]) == 0
+ else sum(self.performance_metrics["cached_tokens"])
+ / sum(self.performance_metrics["prompt_len"])
+ ),
+ },
+ }
+ print("All requests completed")
+ print("Performance metrics summary:")
+ print(f" Total requests: {performance_data['summary']['total_requests']}")
+ print(f" Average TTFT: {performance_data['summary']['average_ttft']:.2f}")
+ print(f" P90 TTFT: {performance_data['summary']['p90_ttft']:.2f}")
+ print(f" Median TTFT: {performance_data['summary']['median_ttft']:.2f}")
+ print(
+ f" Average latency: {performance_data['summary']['average_latency']:.2f}"
+ )
+ print(f" P90 latency: {performance_data['summary']['p90_latency']:.2f}")
+ print(f" Median latency: {performance_data['summary']['median_latency']:.2f}")
+ print(
+ f" Throughput: {performance_data['summary']['throughput']:.2f} requests per second"
+ )
+ print(f" Cache Hit Rate: {performance_data['summary']['cache_hit_rate']:.6f}")
+
+ user_stats = self.user_generator.user_stats
+ input_stats = self.user_generator.input_stats
+ output_stats = self.user_generator.output_stats
+ print(f"round_ratios: {user_stats}")
+ print(
+ f"mean_new_tokens_per_round: {[int(a/b) if b > 0 else 0 for a, b in input_stats]}"
+ )
+ print(
+ f"mean_return_tokens_per_round: {[int(a/b) if b > 0 else 0 for a, b in output_stats]}"
+ )
+ return performance_data
+
+
+def main():
+ global debug_log_file
+
+ args = parse_args()
+ if args.log_level == "debug":
+ logging.basicConfig(level=logging.DEBUG)
+ logger.info("use log_level debug")
+ # Initialize debug log file
+ debug_log_file = open(args.debug_log_file, "w")
+ else:
+ logging.basicConfig(level=logging.INFO)
+ logger.info("use log_level info")
+ performance_data = WorkloadGenerator(args).run()
+
+ # Close debug log file if it was opened
+ if debug_log_file:
+ debug_log_file.close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/hicache/bench_mix.sh b/benchmark/hicache/bench_mix.sh
new file mode 100755
index 00000000000..5ff6dca94cd
--- /dev/null
+++ b/benchmark/hicache/bench_mix.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.12/dist-packages:/usr/local/lib/python3.12/dist-packages/torch/lib
+rm -rf nohup.out && \
+nohup python3 -m sglang.launch_server \
+ --attention-backend triton \
+ --model-path /code/models/Qwen3-32B/ \
+ --log-level info \
+ --tp 4 --mem-frac 0.25 \
+ --host 0.0.0.0 --port 33301 \
+ --enable-metrics --enable-cache-report \
+ --page-size 64 \
+ --enable-hierarchical-cache \
+ --hicache-ratio 2.5 --hicache-size 0 \
+ --hicache-io-backend kernel \
+ --hicache-mem-layout layer_first \
+ --hicache-write-policy write_through \
+ &
+
+##################################################
+
+export CONFIG_PATH=/tmp/bench_mix_config.json
+
+# num_clients: Maximum number of concurrent client requests to be simulated
+# round_ratios: Distribution of requests across rounds. Given sum(round_ratios) total requests,
+# round_ratios[i] denotes the number of requests that will execute for (i+1) rounds
+echo '{
+ "num_rounds": 10,
+ "num_clients": 60,
+ "round_ratios": [50, 25, 15, 15, 10, 10, 9, 8, 7, 6],
+ "mean_new_tokens_per_round": [1000, 400, 350, 300, 280, 260, 240, 220, 210, 200],
+ "mean_return_tokens_per_round": [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
+ "mean_inter_round_interval": [30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
+}' > ${CONFIG_PATH}
+
+rm -rf bench_mix.out && \
+nohup python3 /sgl-workspace/sglang/benchmark/hicache/bench_mix.py \
+ --model-path /code/models/Qwen3-32B/ \
+ --dataset-path /code/models/ShareGPT_V3_unfiltered_cleaned_split.json \
+ --port 33301 \
+ --duration 600 \
+> bench_mix.out &
diff --git a/benchmark/hicache/bench_multiturn.py b/benchmark/hicache/bench_multiturn.py
index 35e638d33d1..a3e8b0d7404 100644
--- a/benchmark/hicache/bench_multiturn.py
+++ b/benchmark/hicache/bench_multiturn.py
@@ -130,6 +130,12 @@ def parse_args():
help="Tag of a certain run in the log file",
)
parser.add_argument("--seed", type=int, default=1, help="The random seed.")
+ parser.add_argument(
+ "--lora-path",
+ type=str,
+ default="",
+ help="String of LoRA path. Currently we only support benchmarking on a single LoRA adaptor.",
+ )
return parser.parse_args()
@@ -191,6 +197,7 @@ async def async_request_sglang_generate(
output.latency = latency
output.prompt_len = prompt_tokens
output.cached_tokens = cached_tokens
+ output.generated_len = len(output.itl) + 1
else:
output.error = response.reason or ""
output.success = False
@@ -204,7 +211,7 @@ async def async_request_sglang_generate(
return output
-def gen_payload(prompt, output_len):
+def gen_payload(prompt, output_len, lora_path=""):
payload = {
"text": prompt,
"sampling_params": {
@@ -214,7 +221,7 @@ def gen_payload(prompt, output_len):
},
"stream": True,
"stream_options": {"include_usage": True},
- "lora_path": "",
+ "lora_path": lora_path,
"return_logprob": False,
"logprob_start_len": -1,
}
@@ -302,7 +309,12 @@ def __init__(self, args):
)
init_requests = [
- (i, gen_payload(self.candidate_inputs[i], args.output_length))
+ (
+ i,
+ gen_payload(
+ self.candidate_inputs[i], args.output_length, args.lora_path
+ ),
+ )
for i in range(args.num_clients)
]
self.client_records = {
@@ -321,6 +333,7 @@ def __init__(self, args):
"latency": [],
"prompt_len": [],
"cached_tokens": [],
+ "generated_len": [],
}
self.num_rounds = args.num_rounds
self.max_parallel = args.max_parallel
@@ -383,6 +396,7 @@ def response_handler(self):
self.performance_metrics["latency"].append(response.latency)
self.performance_metrics["prompt_len"].append(response.prompt_len)
self.performance_metrics["cached_tokens"].append(response.cached_tokens)
+ self.performance_metrics["generated_len"].append(response.generated_len)
self.completed_requests += 1
if self.client_records[client_id]["round"] < self.num_rounds:
@@ -396,6 +410,7 @@ def response_handler(self):
gen_payload(
self.client_records[client_id]["history"],
self.output_length,
+ args.lora_path,
),
)
)
@@ -418,6 +433,7 @@ def run(self):
response_thread.join()
self.pbar.close()
+ duration = self.finished_time - self.start_time
performance_data = {
"summary": {
"total_requests": len(self.performance_metrics["ttft"]),
@@ -438,7 +454,13 @@ def run(self):
"median_latency": sorted(self.performance_metrics["latency"])[
len(self.performance_metrics["latency"]) // 2
],
- "throughput": self.pbar.total / (self.finished_time - self.start_time),
+ "input_token_throughput": sum(self.performance_metrics["prompt_len"])
+ / duration,
+ "output_token_throughput": sum(
+ self.performance_metrics["generated_len"]
+ )
+ / duration,
+ "throughput": self.pbar.total / duration,
"cache_hit_rate": (
0
if sum(self.performance_metrics["prompt_len"]) == 0
@@ -461,7 +483,13 @@ def run(self):
print(f" P90 latency: {performance_data['summary']['p90_latency']:.2f}")
print(f" Median latency: {performance_data['summary']['median_latency']:.2f}")
print(
- f" Throughput: {performance_data['summary']['throughput']:.2f} requests per second"
+ f" Input token throughput: {performance_data['summary']['input_token_throughput']:.2f} tokens per second"
+ )
+ print(
+ f" Output token throughput: {performance_data['summary']['output_token_throughput']:.2f} tokens per second"
+ )
+ print(
+ f" Request Throughput: {performance_data['summary']['throughput']:.2f} requests per second"
)
print(f" Cache Hit Rate: {performance_data['summary']['cache_hit_rate']:.6f}")
return performance_data
diff --git a/benchmark/hicache/data_processing.py b/benchmark/hicache/data_processing.py
index 0152406a8e1..8f72a0d95e9 100644
--- a/benchmark/hicache/data_processing.py
+++ b/benchmark/hicache/data_processing.py
@@ -439,8 +439,8 @@ def get_gen_prefix_cache_path(args, tokenizer):
# Create a unique cache filename based on the generation parameters
cache_key = (
- f"gen_prefix_{args.gen_num_groups}_{args.gen_prompts_per_group}_"
- f"{args.gen_system_prompt_len}_{args.gen_question_len}_{args.gen_output_len}_"
+ f"gsp_prefix_{args.gsp_num_groups}_{args.gsp_prompts_per_group}_"
+ f"{args.gsp_system_prompt_len}_{args.gsp_question_len}_{args.gsp_output_len}_"
f"{tokenizer.__class__.__name__}.pkl"
)
return cache_dir / cache_key
@@ -577,11 +577,11 @@ def get_dataset(args, tokenizer):
)
elif args.dataset_name == "generated-shared-prefix":
input_requests = sample_generated_shared_prefix_requests(
- num_groups=args.gen_num_groups,
- prompts_per_group=args.gen_prompts_per_group,
- system_prompt_len=args.gen_system_prompt_len,
- question_len=args.gen_question_len,
- output_len=args.gen_output_len,
+ num_groups=args.gsp_num_groups,
+ prompts_per_group=args.gsp_prompts_per_group,
+ system_prompt_len=args.gsp_system_prompt_len,
+ question_len=args.gsp_question_len,
+ output_len=args.gsp_output_len,
args=args,
tokenizer=tokenizer,
)
diff --git a/benchmark/fbgemm/README.md b/benchmark/kernels/fbgemm/README.md
similarity index 100%
rename from benchmark/fbgemm/README.md
rename to benchmark/kernels/fbgemm/README.md
diff --git a/benchmark/fbgemm/benchmark_fbgemm_grouped_gemm.py b/benchmark/kernels/fbgemm/benchmark_fbgemm_grouped_gemm.py
similarity index 100%
rename from benchmark/fbgemm/benchmark_fbgemm_grouped_gemm.py
rename to benchmark/kernels/fbgemm/benchmark_fbgemm_grouped_gemm.py
diff --git a/benchmark/kernels/flashinfer_allreduce_fusion/README.md b/benchmark/kernels/flashinfer_allreduce_fusion/README.md
new file mode 100644
index 00000000000..e651604c765
--- /dev/null
+++ b/benchmark/kernels/flashinfer_allreduce_fusion/README.md
@@ -0,0 +1,102 @@
+# FlashInfer Fused AllReduce + RMSNorm Benchmark
+
+This benchmark script is modified from the [original implementation](https://github.com/vllm-project/vllm/blob/237e1fb887c7f5a579420fa0295097f24b006594/benchmarks/kernels/benchmark_fused_collective.py) by the vLLM community. It aims to compare the performance differences between FlashInfer fused operators in SGLang (trtllm_allreduce_fusion: AllReduce + Residual Add + RMSNorm + optional quantization) and conventional implementations (standard `tensor_model_parallel_all_reduce` + separate RMSNorm/quantization). Specifically, this script tests the timing performance of two implementation paths: 1) Standard AllReduce and RMSNorm executed separately; 2) FlashInfer's fused operator combining AllReduce, Residual Add, RMSNorm, and optional quantization operations.
+
+This benchmark script helps us tune the ipc workspace size of the `flashinfer_allreduce_residual_rmsnorm` operator in SGLang and prepare for applications with FP8/FP4 quantized fused operators.
+
+Script path: `benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py`
+
+## Feature Overview
+
+- Compare average execution time (ms) and calculate speedup ratios for the following paths:
+ - standard_allreduce_rmsnorm (Standard AllReduce + RMSNorm)
+ - flashinfer_fused_allreduce_rmsnorm (Fused AllReduce + RMSNorm), including oneshot and twoshot modes
+ - Optionally compare FP8/FP4 quantized fused paths with standard paths
+- Use CUDA Graph capture and batch replay to reduce measurement noise
+- Automatically select the faster "standard baseline" (native/compiled version) as the denominator for speedup calculation
+- Optionally export results in Markdown format
+
+## Runtime Environment and Prerequisites
+
+- At least 2 GPUs, and launch multi-process distributed training using `torchrun` (NCCL backend)
+- Properly install/compile sglang along with sgl-kernel and custom operators
+
+## Quick Start (Command Examples)
+
+The following examples use world_size=2. You can modify `--nproc_per_node` and parameters according to your machine:
+
+- Regular paths only (no quantization):
+```
+torchrun --nproc_per_node=2 \
+benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py \
+--no-quant --hidden-dim 1024 --seq-lens 512 1024 2048 4096 --trials 100
+```
+
+- FP8 quantization paths only:
+```
+torchrun --nproc_per_node=2 \
+benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py \
+--quant-fp8 --hidden-dim 1024 --seq-lens 512 1024 2048 4096 --trials 100
+```
+
+- FP4 quantization paths only:
+```
+torchrun --nproc_per_node=2 \
+benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py \
+--quant-fp4 --hidden-dim 1024 --seq-lens 512 1024 2048 4096 --trials 100
+```
+
+- Larger hidden dimensions:
+```
+torchrun --nproc_per_node=2 \
+benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py \
+--no-quant --hidden-dim 4096 --seq-lens 512 1024 2048 4096 --trials 100
+```
+
+## Parameter Description
+- `--seq-lens`: List of sequence lengths to test (default: 128 512 1024 2048)
+- `--hidden-dim`: Hidden dimension (default: 8192)
+- `--dtypes`: Data type list, `float16|bfloat16|float32` (default: bfloat16)
+- `--no-residual`: Only test "no residual" scenarios (default tests both "with/without residual")
+- Mutually exclusive quantization options:
+ - `--no-quant`: No quantization testing
+ - `--quant-fp8`: Only FP8 quantization testing
+ - `--quant-fp4`: Only FP4 quantization testing
+ - `--quant-all`: Test all (default)
+- FlashInfer related:
+ - `--disable-oneshot`: Disable oneshot mode (default enables oneshot and tests twoshot simultaneously)
+- Runtime configuration:
+ - `--warmup`: Warmup count before graph capture and before graph replay (default 5)
+ - `--trials`: Benchmark iteration count (default 20; internally each `graph.replay()` will batch replay multiple times)
+ - `--output-file`: Save results as Markdown file (only rank0 takes effect)
+
+## Output Example
+
+Each configuration group prints a table showing average execution time and relative speedup ratios (baseline is the faster standard implementation). For example:
+```
+================================================================================
+Results: seq_len=1024, hidden_dim=1024
+dtype=torch.bfloat16, residual=yes, quant_mode=none
+================================================================================
+Operation Time (ms) Speedup
+--------------------------------------------------------------------------------
+standard_allreduce_rmsnorm 0.024 0.98x
+standard_allreduce_rmsnorm_native_compiled 0.023 baseline
+flashinfer_fused_allreduce_rmsnorm_oneshot 0.011 2.19x
+flashinfer_fused_allreduce_rmsnorm_twoshot 0.041 0.57x
+```
+
+If `--output-file` is specified, all configurations will be summarized in Markdown tables in that file.
+
+## Important Notes and Recommendations
+
+- Distributed: The script uses `torchrun` environment variables to initialize distributed training and binds tensors/communication groups to the current rank's corresponding device.
+- World size: Requires `WORLD_SIZE > 1` to perform communication operator benchmarks. Otherwise, the script will error and prompt.
+- FlashInfer:
+ - If not installed or interfaces are missing, the script will only run standard paths and provide prompts in the logs.
+ - The fused operator internally uses "oneshot"/"twoshot" two trigger methods; oneshot is enabled by default and twoshot is tested simultaneously.
+- FP8/FP4:
+ - FP8 uses sglang's FP8 tools and dtype, with underlying platform selection of `e4m3`/`e4m3fnuz` etc.
+ - FP4 uses sgl-kernel's `scaled_fp4_quant`, requiring corresponding platform support.
+- CUDA Graph:
+ - Uses sglang's `graph_capture()` to prepare capture-ready state for communication, then uses `torch.cuda.graph` to capture kernels, reducing measurement jitter.
diff --git a/benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py b/benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py
new file mode 100644
index 00000000000..4aebf62b90e
--- /dev/null
+++ b/benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py
@@ -0,0 +1,1304 @@
+# Modified from https://github.com/vllm-project/vllm/blob/237e1fb887c7f5a579420fa0295097f24b006594/benchmarks/kernels/benchmark_fused_collective.py
+
+"""
+Benchmark for FlashInfer fused collective operations vs standard operations.
+
+This benchmark compares:
+1. FlashInfer's trtllm_allreduce_fusion (fused allreduce + rmsnorm + optional quant)
+2. Standard tensor_model_parallel_all_reduce + separate rmsnorm/quant operations
+
+Usage with torchrun:
+ torchrun --nproc_per_node=2 benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py --no-quant --hidden-dim 1024 --seq-len 512 1024 2048 4096 --trials 100
+ torchrun --nproc_per_node=2 benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py --quant-fp8 --hidden-dim 1024 --seq-len 512 1024 2048 4096 --trials 100
+ torchrun --nproc_per_node=2 benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py --quant-fp4 --hidden-dim 1024 --seq-len 512 1024 2048 4096 --trials 100
+
+ torchrun --nproc_per_node=2 benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py --no-quant --hidden-dim 4096 --seq-len 512 1024 2048 4096 --trials 100
+ torchrun --nproc_per_node=2 benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py --quant-fp8 --hidden-dim 4096 --seq-len 512 1024 2048 4096 --trials 100
+ torchrun --nproc_per_node=2 benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py --quant-fp4 --hidden-dim 4096 --seq-len 512 1024 2048 4096 --trials 100
+"""
+
+import argparse
+import contextlib
+import itertools
+import logging
+import os
+import time
+from typing import Optional
+
+import torch # type: ignore
+import torch.distributed as dist # type: ignore
+
+from sglang.srt.distributed import get_tp_group, tensor_model_parallel_all_reduce
+from sglang.srt.distributed.parallel_state import (
+ cleanup_dist_env_and_memory,
+ graph_capture,
+ init_distributed_environment,
+ initialize_model_parallel,
+)
+from sglang.srt.layers.layernorm import RMSNorm # noqa
+from sglang.srt.layers.quantization.fp8_kernel import fp8_dtype as SGLANG_FP8_DTYPE
+from sglang.srt.layers.quantization.fp8_kernel import static_quant_fp8
+
+try:
+ from sgl_kernel import fused_add_rmsnorm as SGL_FUSED_ADD_RMS_NORM
+ from sgl_kernel import rmsnorm as SGL_RMS_NORM
+ from sgl_kernel import scaled_fp4_quant as SGL_SCALED_FP4_QUANT
+except Exception: # pragma: no cover - fallback on non-supported platforms
+ SGL_FUSED_ADD_RMS_NORM = None
+ SGL_RMS_NORM = None
+ SGL_SCALED_FP4_QUANT = None
+
+FP8_DTYPE = SGLANG_FP8_DTYPE
+
+logger = logging.getLogger(__name__)
+
+# Try to import FlashInfer
+try:
+ import flashinfer.comm as flashinfer_comm # type: ignore
+
+ if not hasattr(flashinfer_comm, "trtllm_allreduce_fusion"):
+ flashinfer_comm = None
+ logger.warning(
+ "FlashInfer comm module found but missing trtllm_allreduce_fusion"
+ )
+except ImportError:
+ flashinfer_comm = None
+ logger.warning("FlashInfer not found, only benchmarking standard operations")
+
+# Constants
+MiB = 1024 * 1024
+
+# FlashInfer max sizes per world size
+# Enable 64MB for 2, 4, 8 world sizes to verify large input sizes
+# use --disable-oneshot to disable oneshot mode for very large input sizes
+_FI_MAX_SIZES = {
+ 2: 64 * MiB, # 64MB
+ 4: 64 * MiB, # 64MB
+ 8: 64 * MiB, # 64MB
+}
+
+# Global workspace tensor for FlashInfer
+_FI_WORKSPACE_TENSOR = None
+
+
+def setup_flashinfer_workspace(
+ world_size: int,
+ rank: int,
+ hidden_dim: int,
+ max_token_num: int,
+ use_fp32_lamport: bool = False,
+):
+ """Setup FlashInfer workspace for fused allreduce operations."""
+ global _FI_WORKSPACE_TENSOR
+
+ if flashinfer_comm is None:
+ return None, None
+
+ if world_size not in _FI_MAX_SIZES:
+ logger.warning("FlashInfer not supported for world size %s", world_size)
+ return None, None
+
+ try:
+ # Create IPC workspace
+ ipc_handles, workspace_tensor = (
+ flashinfer_comm.trtllm_create_ipc_workspace_for_all_reduce_fusion(
+ tp_rank=rank,
+ tp_size=world_size,
+ max_token_num=max_token_num,
+ hidden_dim=hidden_dim,
+ group=get_tp_group().device_group,
+ use_fp32_lamport=use_fp32_lamport,
+ )
+ )
+
+ _FI_WORKSPACE_TENSOR = workspace_tensor
+ return ipc_handles, workspace_tensor
+ except Exception as e:
+ logger.error("Failed to setup FlashInfer workspace: %s", e)
+ return None, None
+
+
+def cleanup_flashinfer_workspace(ipc_handles):
+ """Cleanup FlashInfer workspace."""
+ if flashinfer_comm is None or ipc_handles is None:
+ return
+
+ try:
+ group = get_tp_group().device_group
+ flashinfer_comm.trtllm_destroy_ipc_workspace_for_all_reduce(ipc_handles, group)
+ except Exception as e:
+ logger.error("Failed to cleanup FlashInfer workspace: %s", e)
+
+
+class FlashInferFusedAllReduceParams:
+ """Parameters for FlashInfer fused allreduce operations."""
+
+ def __init__(
+ self,
+ rank: int,
+ world_size: int,
+ use_fp32_lamport: bool = False,
+ max_token_num: int = 1024,
+ ):
+ self.rank = rank
+ self.world_size = world_size
+ self.use_fp32_lamport = use_fp32_lamport
+ self.trigger_completion_at_end = True
+ self.launch_with_pdl = True
+ self.fp32_acc = True
+ self.max_token_num = max_token_num
+
+ def get_trtllm_fused_allreduce_kwargs(self):
+ return {
+ "world_rank": self.rank,
+ "world_size": self.world_size,
+ "launch_with_pdl": self.launch_with_pdl,
+ "trigger_completion_at_end": self.trigger_completion_at_end,
+ "fp32_acc": self.fp32_acc,
+ }
+
+
+def flashinfer_fused_allreduce_rmsnorm(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rms_gamma: torch.Tensor,
+ rms_eps: float,
+ allreduce_params: "FlashInferFusedAllReduceParams",
+ use_oneshot: bool,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """FlashInfer fused allreduce + rmsnorm operation."""
+ if flashinfer_comm is None or _FI_WORKSPACE_TENSOR is None:
+ raise RuntimeError("FlashInfer not available or workspace not initialized")
+
+ if norm_out is None:
+ norm_out = input_tensor
+ residual_out = residual
+ else:
+ residual_out = input_tensor
+
+ flashinfer_comm.trtllm_allreduce_fusion(
+ allreduce_in=input_tensor,
+ token_num=input_tensor.shape[0],
+ residual_in=residual,
+ residual_out=residual_out,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ hidden_dim=input_tensor.shape[-1],
+ workspace_ptrs=_FI_WORKSPACE_TENSOR,
+ pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNorm,
+ allreduce_out=None,
+ quant_out=None,
+ scale_out=None,
+ layout_code=None,
+ scale_factor=None,
+ use_oneshot=use_oneshot,
+ **allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+ )
+
+
+def flashinfer_fused_allreduce_rmsnorm_fp8_quant(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rms_gamma: torch.Tensor,
+ rms_eps: float,
+ scale_factor: torch.Tensor,
+ allreduce_params: FlashInferFusedAllReduceParams,
+ use_oneshot: bool = True,
+ norm_out: Optional[torch.Tensor] = None,
+ quant_out: Optional[torch.Tensor] = None,
+):
+ """FlashInfer fused allreduce + rmsnorm + FP8 quantization."""
+ if flashinfer_comm is None or _FI_WORKSPACE_TENSOR is None:
+ raise RuntimeError("FlashInfer not available or workspace not initialized")
+
+ if norm_out is None:
+ norm_out = input_tensor
+ residual_out = residual
+ else:
+ residual_out = input_tensor
+
+ flashinfer_comm.trtllm_allreduce_fusion(
+ allreduce_in=input_tensor,
+ token_num=input_tensor.shape[0],
+ residual_in=residual,
+ residual_out=residual_out,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ hidden_dim=input_tensor.shape[-1],
+ workspace_ptrs=_FI_WORKSPACE_TENSOR,
+ pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP8Quant,
+ allreduce_out=None,
+ quant_out=quant_out,
+ scale_out=None,
+ layout_code=None,
+ scale_factor=scale_factor,
+ use_oneshot=use_oneshot,
+ **allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+ )
+
+
+def flashinfer_fused_allreduce_rmsnorm_fp4_quant(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rms_gamma: torch.Tensor,
+ rms_eps: float,
+ input_global_scale: torch.Tensor,
+ allreduce_params: FlashInferFusedAllReduceParams,
+ quant_out: torch.Tensor,
+ use_oneshot: bool,
+ output_scale: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """FlashInfer fused allreduce + rmsnorm + FP4 quantization."""
+ if flashinfer_comm is None or _FI_WORKSPACE_TENSOR is None:
+ raise RuntimeError("FlashInfer not available or workspace not initialized")
+
+ if norm_out is None:
+ norm_out = input_tensor
+ residual_out = residual
+ else:
+ residual_out = input_tensor
+
+ flashinfer_comm.trtllm_allreduce_fusion(
+ allreduce_in=input_tensor,
+ token_num=input_tensor.shape[0],
+ residual_in=residual,
+ residual_out=residual_out,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ hidden_dim=input_tensor.shape[-1],
+ workspace_ptrs=_FI_WORKSPACE_TENSOR,
+ pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP4Quant,
+ allreduce_out=None,
+ quant_out=quant_out,
+ scale_out=output_scale,
+ layout_code=None,
+ scale_factor=input_global_scale,
+ use_oneshot=use_oneshot,
+ **allreduce_params.get_trtllm_fused_allreduce_kwargs(),
+ )
+
+
+def standard_allreduce_rmsnorm(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rms_gamma: torch.Tensor,
+ rms_eps: float,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """Standard allreduce + rmsnorm operations."""
+ # All-reduce first
+ allreduce_out = tensor_model_parallel_all_reduce(input_tensor)
+ # Then RMS norm
+ if residual is not None:
+ # Fused add + RMS norm (in-place on allreduce_out)
+ if SGL_FUSED_ADD_RMS_NORM is not None:
+ SGL_FUSED_ADD_RMS_NORM(allreduce_out, residual, rms_gamma, rms_eps)
+ else:
+ rms = RMSNorm(allreduce_out.shape[-1], eps=rms_eps)
+ rms.weight.data = rms_gamma
+ rms.forward_native(allreduce_out, residual)
+ else:
+ # Just RMS norm
+ if SGL_RMS_NORM is not None:
+ _ = SGL_RMS_NORM(allreduce_out, rms_gamma, rms_eps)
+ else:
+ rms = RMSNorm(allreduce_out.shape[-1], eps=rms_eps)
+ rms.weight.data = rms_gamma
+ _ = rms.forward_native(allreduce_out)
+
+
+def standard_allreduce_rmsnorm_fp8_quant(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rms_gamma: torch.Tensor,
+ rms_eps: float,
+ scale_factor: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+ quant_out: Optional[torch.Tensor] = None,
+):
+ """Standard allreduce + rmsnorm + FP8 quantization."""
+ # All-reduce first
+ allreduce_out = tensor_model_parallel_all_reduce(input_tensor)
+
+ # Then RMS norm + static FP8 quantization
+ if residual is not None:
+ if SGL_FUSED_ADD_RMS_NORM is not None:
+ SGL_FUSED_ADD_RMS_NORM(allreduce_out, residual, rms_gamma, rms_eps)
+ quant_out, _ = static_quant_fp8(
+ allreduce_out, scale_factor, repeat_scale=False
+ )
+ else:
+ rms = RMSNorm(allreduce_out.shape[-1], eps=rms_eps)
+ rms.weight.data = rms_gamma
+ normed, _ = rms.forward_native(allreduce_out, residual)
+ quant_out, _ = static_quant_fp8(normed, scale_factor, repeat_scale=False)
+ return quant_out, residual
+ else:
+ if SGL_RMS_NORM is not None:
+ normed = SGL_RMS_NORM(allreduce_out, rms_gamma, rms_eps)
+ else:
+ rms = RMSNorm(allreduce_out.shape[-1], eps=rms_eps)
+ rms.weight.data = rms_gamma
+ normed = rms.forward_native(allreduce_out)
+ quant_out, _ = static_quant_fp8(normed, scale_factor, repeat_scale=False)
+ return quant_out
+
+
+def standard_allreduce_rmsnorm_fp4_quant(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rms_gamma: torch.Tensor,
+ rms_eps: float,
+ input_global_scale: torch.Tensor,
+ quant_out: torch.Tensor,
+ output_scale: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """Standard allreduce + rmsnorm + FP4 quantization."""
+
+ # All-reduce first
+ allreduce_out = tensor_model_parallel_all_reduce(input_tensor)
+
+ # Then RMS norm
+ if residual is not None:
+ if SGL_FUSED_ADD_RMS_NORM is not None:
+ SGL_FUSED_ADD_RMS_NORM(allreduce_out, residual, rms_gamma, rms_eps)
+ quant_input = allreduce_out
+ else:
+ rms = RMSNorm(allreduce_out.shape[-1], eps=rms_eps)
+ rms.weight.data = rms_gamma
+ quant_input, _ = rms.forward_native(allreduce_out, residual)
+ residual_out = residual
+ else:
+ if SGL_RMS_NORM is not None:
+ quant_input = SGL_RMS_NORM(allreduce_out, rms_gamma, rms_eps)
+ else:
+ rms = RMSNorm(allreduce_out.shape[-1], eps=rms_eps)
+ rms.weight.data = rms_gamma
+ quant_input = rms.forward_native(allreduce_out)
+ residual_out = allreduce_out
+
+ # Finally FP4 quantization
+ if SGL_SCALED_FP4_QUANT is None:
+ raise RuntimeError("scaled_fp4_quant is not available on this platform")
+ quant_res, output_scale_res = SGL_SCALED_FP4_QUANT(quant_input, input_global_scale)
+ if residual is not None:
+ return quant_res, residual_out, output_scale_res
+ else:
+ return quant_res, quant_input
+
+
+def standard_allreduce_rmsnorm_native(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rmsnorm_layer: RMSNorm,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """Standard allreduce + rmsnorm operations using native RMSNorm forward."""
+ # All-reduce first
+ allreduce_out = tensor_model_parallel_all_reduce(input_tensor)
+ # Apply native RMSNorm
+ if residual is not None:
+ result = rmsnorm_layer.forward_native(allreduce_out, residual)
+ return result # Returns (norm_out, residual_out)
+ else:
+ result = rmsnorm_layer.forward_native(allreduce_out)
+ return result # Returns norm_out
+
+
+def standard_allreduce_rmsnorm_fp8_quant_native(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rmsnorm_layer: RMSNorm,
+ scale_factor: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+ quant_out: Optional[torch.Tensor] = None,
+):
+ """Standard allreduce + rmsnorm + FP8 quantization using native implementations."""
+ # All-reduce first
+ allreduce_out = tensor_model_parallel_all_reduce(input_tensor)
+
+ # Apply native RMSNorm
+ if residual is not None:
+ norm_out, residual_out = rmsnorm_layer.forward_native(allreduce_out, residual)
+ else:
+ norm_out = rmsnorm_layer.forward_native(allreduce_out)
+ residual_out = allreduce_out
+
+ # Apply native FP8 quantization
+ quant_out, _ = static_quant_fp8(norm_out, scale_factor, repeat_scale=False)
+
+ if residual is not None:
+ return quant_out, residual_out
+ else:
+ return quant_out
+
+
+def standard_allreduce_rmsnorm_fp4_quant_native(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rmsnorm_layer: RMSNorm,
+ input_global_scale: torch.Tensor,
+ quant_out: torch.Tensor,
+ output_scale: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """Standard allreduce + rmsnorm + FP4 quantization using native RMSNorm."""
+ # All-reduce first
+ allreduce_out = tensor_model_parallel_all_reduce(input_tensor)
+
+ # Apply native RMSNorm
+ if residual is not None:
+ norm_out, residual_out = rmsnorm_layer.forward_native(allreduce_out, residual)
+ quant_input = norm_out
+ else:
+ norm_out = rmsnorm_layer.forward_native(allreduce_out)
+ quant_input = norm_out
+ residual_out = allreduce_out
+
+ # Apply FP4 quantization (still using fused CUDA op as there's no native FP4)
+ if SGL_SCALED_FP4_QUANT is None:
+ raise RuntimeError("scaled_fp4_quant is not available on this platform")
+ quant_res, output_scale_res = SGL_SCALED_FP4_QUANT(quant_input, input_global_scale)
+
+ if residual is not None:
+ return quant_res, residual_out, output_scale_res
+ else:
+ return quant_res, norm_out
+
+
+# Compiled versions of native functions
+@torch.compile
+def standard_allreduce_rmsnorm_native_compiled(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rmsnorm_layer: RMSNorm,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """Compiled version of standard allreduce + rmsnorm."""
+ return standard_allreduce_rmsnorm_native(
+ input_tensor, residual, rmsnorm_layer, norm_out
+ )
+
+
+@torch.compile
+def standard_allreduce_rmsnorm_fp8_quant_native_compiled(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rmsnorm_layer: RMSNorm,
+ scale_factor: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+ quant_out: Optional[torch.Tensor] = None,
+):
+ """Compiled version of standard allreduce + rmsnorm + FP8 quantization."""
+ return standard_allreduce_rmsnorm_fp8_quant_native(
+ input_tensor,
+ residual,
+ rmsnorm_layer,
+ scale_factor,
+ norm_out,
+ quant_out,
+ )
+
+
+@torch.compile
+def standard_allreduce_rmsnorm_fp4_quant_native_compiled(
+ input_tensor: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ rmsnorm_layer: RMSNorm,
+ input_global_scale: torch.Tensor,
+ quant_out: torch.Tensor,
+ output_scale: torch.Tensor,
+ norm_out: Optional[torch.Tensor] = None,
+):
+ """Compiled version of standard allreduce + rmsnorm + FP4 quantization."""
+ return standard_allreduce_rmsnorm_fp4_quant_native(
+ input_tensor,
+ residual,
+ rmsnorm_layer,
+ input_global_scale,
+ quant_out,
+ output_scale,
+ norm_out,
+ )
+
+
+def create_test_tensors(
+ seq_len: int, hidden_dim: int, dtype: torch.dtype, use_residual: bool = True
+):
+ """Create test tensors for benchmarking."""
+ input_tensor = torch.randn(seq_len, hidden_dim, dtype=dtype)
+ residual = (
+ torch.randn_like(input_tensor)
+ if use_residual
+ else torch.zeros_like(input_tensor)
+ )
+ rms_gamma = torch.ones(hidden_dim, dtype=dtype)
+ norm_out = None if use_residual else torch.empty_like(input_tensor)
+
+ # Quantization scales
+ scale_fp8 = torch.tensor(1.0, dtype=torch.float32)
+ scale_fp4 = torch.tensor(1.0, dtype=torch.float32)
+ quant_out_fp8 = torch.empty_like(input_tensor, dtype=FP8_DTYPE)
+ # Pre-allocate FP4 output tensors (to avoid allocation overhead in benchmarks)
+ fp4_quant_out = torch.empty((seq_len, hidden_dim // 2), dtype=torch.uint8)
+ fp4_output_scale = torch.empty((128, 4), dtype=torch.int32)
+
+ return (
+ input_tensor,
+ norm_out,
+ residual,
+ rms_gamma,
+ scale_fp8,
+ quant_out_fp8,
+ scale_fp4,
+ fp4_quant_out,
+ fp4_output_scale,
+ )
+
+
+def benchmark_operation(
+ operation_func, *args, warmup: int = 5, trials: int = 20, **kwargs
+):
+ """Benchmark a single operation using CUDA graphs."""
+ # Warmup before graph capture
+ for _ in range(warmup):
+ operation_func(*args, **kwargs)
+ torch.cuda.synchronize()
+
+ # Create CUDA graph
+ graph = torch.cuda.CUDAGraph()
+ num_op_per_cudagraph = 10
+
+ # Use sglang's graph_capture to make tensor_model_parallel_all_reduce graph-safe
+ with graph_capture() as graph_capture_context:
+ with torch.cuda.graph(graph, stream=graph_capture_context.stream):
+ for _ in range(num_op_per_cudagraph):
+ operation_func(*args, **kwargs)
+
+ # Graph warmup
+ torch.cuda.synchronize()
+ for _ in range(warmup):
+ graph.replay()
+
+ # Benchmark with CUDA graph
+ torch.cuda.synchronize()
+ start_time = time.perf_counter()
+
+ for _ in range(trials // num_op_per_cudagraph):
+ # operation_func(*args, **kwargs)
+ graph.replay()
+
+ torch.cuda.synchronize()
+ end_time = time.perf_counter()
+
+ avg_time_ms = ((end_time - start_time) / trials) * 1000
+ return avg_time_ms
+
+
+def run_benchmarks(
+ seq_len: int,
+ hidden_dim: int,
+ dtype: torch.dtype,
+ use_residual: bool,
+ allreduce_params: Optional[FlashInferFusedAllReduceParams],
+ quant_mode: str = "all",
+ disable_oneshot: bool = False,
+):
+ """Run all benchmarks for given configuration.
+
+ Args:
+ quant_mode: "none", "fp8_only", "fp4_only", or "all"
+ """
+ (
+ input_tensor,
+ norm_out,
+ residual,
+ rms_gamma,
+ scale_fp8,
+ quant_out_fp8,
+ scale_fp4,
+ fp4_quant_out,
+ fp4_output_scale,
+ ) = create_test_tensors(seq_len, hidden_dim, dtype, use_residual)
+
+ rms_eps = 1e-6
+ results = {}
+
+ # Create RMSNorm once for native benchmarks
+ rmsnorm_layer = RMSNorm(hidden_dim, eps=rms_eps)
+ rmsnorm_layer.weight.data = rms_gamma
+
+ if quant_mode in ["all", "none"]:
+ # Standard AllReduce + RMSNorm
+ try:
+ time_ms = benchmark_operation(
+ standard_allreduce_rmsnorm,
+ input_tensor,
+ norm_out=norm_out,
+ residual=residual,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ )
+ results["standard_allreduce_rmsnorm"] = time_ms
+ except Exception as e:
+ logger.error("Standard AllReduce+RMSNorm failed: %s", e)
+ results["standard_allreduce_rmsnorm"] = float("inf")
+
+ # Standard AllReduce + RMSNorm Native Compiled
+ try:
+ time_ms = benchmark_operation(
+ standard_allreduce_rmsnorm_native_compiled,
+ input_tensor,
+ residual=residual,
+ rmsnorm_layer=rmsnorm_layer,
+ norm_out=norm_out,
+ )
+ results["standard_allreduce_rmsnorm_native_compiled"] = time_ms
+ except Exception as e:
+ logger.error("Standard AllReduce+RMSNorm Native Compiled failed: %s", e)
+ results["standard_allreduce_rmsnorm_native_compiled"] = float("inf")
+
+ # FlashInfer Fused AllReduce + RMSNorm Oneshot
+ if flashinfer_comm is not None and allreduce_params is not None:
+ try:
+ if not disable_oneshot:
+ time_ms = benchmark_operation(
+ flashinfer_fused_allreduce_rmsnorm,
+ input_tensor,
+ residual=residual,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ allreduce_params=allreduce_params,
+ use_oneshot=True,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_oneshot"] = time_ms
+ except Exception as e:
+ logger.error("FlashInfer Fused AllReduce+RMSNorm Oneshot failed: %s", e)
+ results["flashinfer_fused_allreduce_rmsnorm_oneshot"] = float("inf")
+
+ # FlashInfer Fused AllReduce + RMSNorm Two-shot
+ try:
+ time_ms = benchmark_operation(
+ flashinfer_fused_allreduce_rmsnorm,
+ input_tensor,
+ residual=residual,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ allreduce_params=allreduce_params,
+ use_oneshot=False,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_twoshot"] = time_ms
+ except Exception as e:
+ logger.error(
+ "FlashInfer Fused AllReduce+RMSNorm Two-shot failed: %s", e
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_twoshot"] = float("inf")
+
+ if quant_mode in ["all", "fp8_only"]:
+ # Standard AllReduce + RMSNorm + FP8 Quant
+ try:
+ time_ms = benchmark_operation(
+ standard_allreduce_rmsnorm_fp8_quant,
+ input_tensor,
+ norm_out=norm_out,
+ residual=residual,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ scale_factor=scale_fp8,
+ quant_out=quant_out_fp8,
+ )
+ results["standard_allreduce_rmsnorm_fp8_quant"] = time_ms
+ except Exception as e:
+ logger.error("Standard AllReduce+RMSNorm+FP8 failed: %s", e)
+ results["standard_allreduce_rmsnorm_fp8_quant"] = float("inf")
+
+ # Standard AllReduce + RMSNorm + FP8 Quant Native Compiled
+ try:
+ time_ms = benchmark_operation(
+ standard_allreduce_rmsnorm_fp8_quant_native_compiled,
+ input_tensor,
+ residual=residual,
+ rmsnorm_layer=rmsnorm_layer,
+ # quant_fp8_layer removed in sglang version; static_quant_fp8 is used within the function
+ scale_factor=scale_fp8,
+ norm_out=norm_out,
+ quant_out=quant_out_fp8,
+ )
+ results["standard_allreduce_rmsnorm_fp8_quant_native_compiled"] = time_ms
+ except Exception as e:
+ logger.error("Standard AllReduce+RMSNorm+FP8 Native Compiled failed: %s", e)
+ results["standard_allreduce_rmsnorm_fp8_quant_native_compiled"] = float(
+ "inf"
+ )
+
+ # FlashInfer Fused AllReduce + RMSNorm + FP8 Quant Oneshot
+ if flashinfer_comm is not None and allreduce_params is not None:
+ try:
+ if not disable_oneshot:
+ time_ms = benchmark_operation(
+ flashinfer_fused_allreduce_rmsnorm_fp8_quant,
+ input_tensor,
+ norm_out=norm_out,
+ residual=residual,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ scale_factor=scale_fp8,
+ quant_out=quant_out_fp8,
+ allreduce_params=allreduce_params,
+ use_oneshot=True,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp8_quant_oneshot"] = (
+ time_ms
+ )
+ except Exception as e:
+ logger.error(
+ "FlashInfer Fused AllReduce+RMSNorm+FP8 Oneshot failed: %s",
+ e,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp8_quant_oneshot"] = float(
+ "inf"
+ )
+ # FlashInfer Fused AllReduce + RMSNorm + FP8 Quant Two-shot
+ try:
+ time_ms = benchmark_operation(
+ flashinfer_fused_allreduce_rmsnorm_fp8_quant,
+ input_tensor,
+ norm_out=norm_out,
+ residual=residual,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ scale_factor=scale_fp8,
+ quant_out=quant_out_fp8,
+ allreduce_params=allreduce_params,
+ use_oneshot=False,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp8_quant_twoshot"] = (
+ time_ms
+ )
+ except Exception as e:
+ logger.error(
+ "FlashInfer Fused AllReduce+RMSNorm+FP8 Two-shot failed: %s",
+ e,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp8_quant_twoshot"] = float(
+ "inf"
+ )
+
+ if quant_mode in ["all", "fp4_only"]:
+ # Standard AllReduce + RMSNorm + FP4 Quant
+ try:
+ time_ms = benchmark_operation(
+ standard_allreduce_rmsnorm_fp4_quant,
+ input_tensor,
+ norm_out=norm_out,
+ residual=residual,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ input_global_scale=scale_fp4,
+ quant_out=fp4_quant_out,
+ output_scale=fp4_output_scale,
+ )
+ results["standard_allreduce_rmsnorm_fp4_quant"] = time_ms
+ except Exception as e:
+ logger.error("Standard AllReduce+RMSNorm+FP4 failed: %s", e)
+ results["standard_allreduce_rmsnorm_fp4_quant"] = float("inf")
+
+ # Standard AllReduce + RMSNorm + FP4 Quant Native Compiled
+ try:
+ time_ms = benchmark_operation(
+ standard_allreduce_rmsnorm_fp4_quant_native_compiled,
+ input_tensor,
+ residual=residual,
+ rmsnorm_layer=rmsnorm_layer,
+ input_global_scale=scale_fp4,
+ quant_out=fp4_quant_out,
+ output_scale=fp4_output_scale,
+ norm_out=norm_out,
+ )
+ results["standard_allreduce_rmsnorm_fp4_quant_native_compiled"] = time_ms
+ except Exception as e:
+ logger.error("Standard AllReduce+RMSNorm+FP4 Native Compiled failed: %s", e)
+ results["standard_allreduce_rmsnorm_fp4_quant_native_compiled"] = float(
+ "inf"
+ )
+
+ # FlashInfer Fused AllReduce + RMSNorm + FP4 Quant Oneshot
+ if flashinfer_comm is not None and allreduce_params is not None:
+ try:
+ if not disable_oneshot:
+ time_ms = benchmark_operation(
+ flashinfer_fused_allreduce_rmsnorm_fp4_quant,
+ input_tensor,
+ residual=residual,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ input_global_scale=scale_fp4,
+ allreduce_params=allreduce_params,
+ quant_out=fp4_quant_out,
+ output_scale=fp4_output_scale,
+ use_oneshot=True,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp4_quant_oneshot"] = (
+ time_ms
+ )
+ except Exception as e:
+ logger.error(
+ "FlashInfer Fused AllReduce+RMSNorm+FP4 Oneshot failed: %s",
+ e,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp4_quant_oneshot"] = float(
+ "inf"
+ )
+
+ # FlashInfer Fused AllReduce + RMSNorm + FP4 Quant Two-shot
+ if flashinfer_comm is not None and allreduce_params is not None:
+ try:
+ time_ms = benchmark_operation(
+ flashinfer_fused_allreduce_rmsnorm_fp4_quant,
+ input_tensor,
+ residual=residual,
+ norm_out=norm_out,
+ rms_gamma=rms_gamma,
+ rms_eps=rms_eps,
+ input_global_scale=scale_fp4,
+ allreduce_params=allreduce_params,
+ quant_out=fp4_quant_out,
+ output_scale=fp4_output_scale,
+ use_oneshot=False,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp4_quant_twoshot"] = (
+ time_ms
+ )
+ except Exception as e:
+ logger.error(
+ "FlashInfer Fused AllReduce+RMSNorm+FP4 Two-shot failed: %s",
+ e,
+ )
+ results["flashinfer_fused_allreduce_rmsnorm_fp4_quant_twoshot"] = float(
+ "inf"
+ )
+
+ return results
+
+
+def prepare_results_with_speedups(results_dict):
+ """Prepare results with speedup calculations based on dynamic baseline selection."""
+ prepared_results = []
+
+ # Determine the fastest baseline for each operation type
+ def get_fastest_baseline(op_name, results_dict):
+ """Get the fastest baseline between standard and native_compiled versions."""
+ if "fp8_quant" in op_name:
+ candidates = [
+ "standard_allreduce_rmsnorm_fp8_quant",
+ "standard_allreduce_rmsnorm_fp8_quant_native_compiled",
+ ]
+ elif "fp4_quant" in op_name:
+ candidates = [
+ "standard_allreduce_rmsnorm_fp4_quant",
+ "standard_allreduce_rmsnorm_fp4_quant_native_compiled",
+ ]
+ else:
+ candidates = [
+ "standard_allreduce_rmsnorm",
+ "standard_allreduce_rmsnorm_native_compiled",
+ ]
+
+ # Find the fastest among available candidates
+ fastest_time = float("inf")
+ fastest_baseline = None
+
+ for candidate in candidates:
+ if (
+ candidate in results_dict
+ and results_dict[candidate] != float("inf")
+ and results_dict[candidate] < fastest_time
+ ):
+ fastest_time = results_dict[candidate]
+ fastest_baseline = candidate
+
+ return fastest_baseline
+
+ # Create dynamic baseline mapping
+ dynamic_baseline_mapping = {}
+ for op_name in results_dict:
+ if (
+ op_name.startswith("flashinfer_")
+ or op_name.startswith("standard_")
+ and not op_name.endswith("_native_compiled")
+ ):
+ dynamic_baseline_mapping[op_name] = get_fastest_baseline(
+ op_name, results_dict
+ )
+
+ for op_name, time_ms in results_dict.items():
+ if time_ms == float("inf"):
+ speedup_str = "FAILED"
+ time_str = "FAILED"
+ else:
+ time_str = f"{time_ms:.3f}"
+ # Find the appropriate baseline for this operation
+ baseline_op = dynamic_baseline_mapping.get(op_name)
+ if baseline_op and baseline_op in results_dict:
+ baseline_time = results_dict[baseline_op]
+ if baseline_time != float("inf") and baseline_time > 0:
+ speedup = baseline_time / time_ms
+ speedup_str = f"{speedup:.2f}x"
+ else:
+ speedup_str = "N/A"
+ else:
+ # For baseline operations, determine if this is the fastest baseline
+ if op_name.endswith("_native_compiled") or (
+ op_name.startswith("standard_")
+ and not op_name.endswith("_native_compiled")
+ ):
+ fastest_baseline = get_fastest_baseline(op_name, results_dict)
+ if fastest_baseline == op_name:
+ speedup_str = "baseline"
+ else:
+ if fastest_baseline and fastest_baseline in results_dict:
+ baseline_time = results_dict[fastest_baseline]
+ if baseline_time != float("inf") and baseline_time > 0:
+ speedup = baseline_time / time_ms
+ speedup_str = f"{speedup:.2f}x"
+ else:
+ speedup_str = "N/A"
+ else:
+ speedup_str = "N/A"
+ else:
+ speedup_str = "N/A"
+
+ prepared_results.append(
+ {
+ "operation": op_name,
+ "time_ms": time_ms,
+ "time_str": time_str,
+ "speedup_str": speedup_str,
+ }
+ )
+
+ return prepared_results
+
+
+def print_results(results_dict, seq_len, hidden_dim, dtype, use_residual, quant_mode):
+ """Print benchmark results in a formatted table."""
+ print(f"\n{'=' * 80}")
+ print(f"Results: seq_len={seq_len}, hidden_dim={hidden_dim}")
+ print(
+ f"dtype={dtype}, residual={'yes' if use_residual else 'no'}, "
+ f"quant_mode={quant_mode}"
+ )
+ print(f"{'=' * 80}")
+ print(f"{'Operation':<50} {'Time (ms)':<12} {'Speedup':<10}")
+ print(f"{'-' * 80}")
+
+ # Prepare results with speedup calculations
+ prepared_results = prepare_results_with_speedups(results_dict)
+
+ for result in prepared_results:
+ if result["time_ms"] == float("inf"):
+ time_display = result["time_str"]
+ else:
+ time_display = f"{result['time_ms']:.3f}"
+
+ print(
+ f"{result['operation']:<50} {time_display:<12} {result['speedup_str']:<10}"
+ )
+
+
+def format_results_markdown(
+ all_results: list[dict], world_size: int, args: argparse.Namespace
+) -> str:
+ """Format all benchmark results as markdown."""
+ markdown = f"""# FlashInfer Fused Collective Operations Benchmark Results
+
+**World Size:** {world_size}
+**Hidden Dimension:** {args.hidden_dim}
+**Warmup Iterations:** {args.warmup}
+**Benchmark Trials:** {args.trials}
+**Quantization Mode:** {all_results[0]["quant_mode"] if all_results else "N/A"}
+
+---
+
+"""
+
+ for result in all_results:
+ seq_len = result["seq_len"]
+ dtype = result["dtype"]
+ use_residual = result["use_residual"]
+ results_dict = result["results"]
+
+ residual_str = "with residual" if use_residual else "no residual"
+
+ markdown += f"""
+## Configuration: seq_len={seq_len}, dtype={dtype}, {residual_str}
+
+| Operation | Time (ms) | Speedup |
+|-----------|-----------|---------|
+"""
+
+ # Prepare results with speedup calculations
+ prepared_results = prepare_results_with_speedups(results_dict)
+
+ for result in prepared_results:
+ # Format operation name for better readability
+ formatted_op_name = result["operation"].replace("_", " ").title()
+ markdown += f"| {formatted_op_name} | {result['time_str']} |"
+ markdown += f"{result['speedup_str']} |\n"
+
+ markdown += "\n"
+
+ return markdown
+
+
+def save_results_to_file(
+ all_results: list[dict], world_size: int, args: argparse.Namespace, rank: int
+):
+ """Save benchmark results to markdown file (only on rank 0)."""
+ if rank != 0:
+ return
+
+ if not all_results:
+ logger.warning("No results to save")
+ return
+
+ output_path = args.output_file
+
+ try:
+ markdown_content = format_results_markdown(all_results, world_size, args)
+
+ with open(output_path, "w") as f:
+ f.write(markdown_content)
+
+ except Exception as e:
+ logger.error("Failed to save results to file: %s", e)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Benchmark fused collective operations"
+ )
+ parser.add_argument(
+ "--seq-lens",
+ type=int,
+ nargs="+",
+ default=[128, 512, 1024, 2048],
+ help="Sequence lengths to test",
+ )
+ parser.add_argument(
+ "--hidden-dim", type=int, default=8192, help="Hidden dimension size"
+ )
+ parser.add_argument(
+ "--dtypes",
+ type=str,
+ nargs="+",
+ default=["bfloat16"],
+ choices=["float16", "bfloat16", "float32"],
+ help="Data types to test",
+ )
+ parser.add_argument(
+ "--no-residual",
+ action="store_true",
+ help="Skip residual connection tests",
+ )
+
+ # Quantization mode options (mutually exclusive with --no-quant)
+ quant_group = parser.add_mutually_exclusive_group()
+ quant_group.add_argument(
+ "--no-quant", action="store_true", help="Skip all quantization tests"
+ )
+ quant_group.add_argument(
+ "--quant-fp8", action="store_true", help="Only run FP8 quantization tests"
+ )
+ quant_group.add_argument(
+ "--quant-fp4", action="store_true", help="Only run FP4 quantization tests"
+ )
+ quant_group.add_argument(
+ "--quant-all",
+ action="store_true",
+ help="Run all quantization tests (default)",
+ )
+
+ parser.add_argument(
+ "--disable-oneshot",
+ action="store_true",
+ help="Disable oneshot mode for FlashInfer operations",
+ )
+ parser.add_argument(
+ "--warmup", type=int, default=5, help="Number of warmup iterations"
+ )
+ parser.add_argument(
+ "--trials", type=int, default=20, help="Number of benchmark trials"
+ )
+ parser.add_argument(
+ "--output-file",
+ type=str,
+ help="""Output file path for markdown results
+ (default: benchmark_results_.md)
+ """,
+ )
+
+ args = parser.parse_args()
+
+ # Check if running with torchrun (required for collective operations)
+ if "RANK" not in os.environ or "WORLD_SIZE" not in os.environ:
+ raise RuntimeError(
+ "Must run with torchrun for distributed benchmarking. "
+ "Example: torchrun --nproc_per_node=2 benchmark_fused_collective.py"
+ )
+
+ # Initialize distributed environment
+ rank = int(os.environ["RANK"])
+ world_size = int(os.environ["WORLD_SIZE"])
+
+ device = torch.device(f"cuda:{rank}")
+ torch.cuda.set_device(device)
+ torch.set_default_device(device)
+
+ init_distributed_environment(
+ world_size=world_size,
+ rank=rank,
+ local_rank=rank,
+ backend="nccl",
+ )
+ initialize_model_parallel(tensor_model_parallel_size=world_size)
+
+ # Validate world size (must be > 1 for collective operations)
+ if world_size <= 1:
+ raise ValueError(
+ "World size must be > 1 for collective operations benchmarking. "
+ f"Current world size: {world_size}. Use torchrun with --nproc_per_node > 1."
+ )
+
+ # Determine quantization mode
+ if args.no_quant:
+ quant_mode = "none"
+ elif args.quant_fp8:
+ quant_mode = "fp8_only"
+ elif args.quant_fp4:
+ quant_mode = "fp4_only"
+ else: # args.quant_all or default
+ quant_mode = "all"
+
+ if rank == 0:
+ logger.info("Running benchmark with world_size=%s, rank=%s", world_size, rank)
+ logger.info("Quantization mode: %s", quant_mode)
+ if flashinfer_comm is not None:
+ oneshot_status = "enabled" if not args.disable_oneshot else "disabled"
+ logger.info(
+ "FlashInfer available - will benchmark fused operations (oneshot: %s)",
+ oneshot_status,
+ )
+ else:
+ logger.info(
+ "FlashInfer not available - only benchmarking standard operations"
+ )
+
+ # Convert dtype strings to torch dtypes
+ dtype_map = {
+ "float16": torch.float16,
+ "bfloat16": torch.bfloat16,
+ "float32": torch.float32,
+ }
+ dtypes = [dtype_map[dt] for dt in args.dtypes]
+
+ # Test configurations
+ residual_options = [True] if not args.no_residual else [False]
+ if not args.no_residual:
+ residual_options.append(False)
+
+ configs = list(itertools.product(args.seq_lens, dtypes, residual_options))
+
+ # Setup FlashInfer workspace if available
+ ipc_handles = None
+ allreduce_params = None
+
+ if flashinfer_comm is not None:
+ # Use the largest hidden dimension for workspace setup
+ max_num_token = _FI_MAX_SIZES.get(world_size) // (
+ args.hidden_dim * world_size * 2
+ )
+
+ ipc_handles, workspace_tensor = setup_flashinfer_workspace(
+ world_size, rank, args.hidden_dim, max_num_token
+ )
+
+ if workspace_tensor is not None:
+ allreduce_params = FlashInferFusedAllReduceParams(
+ rank=rank,
+ world_size=world_size,
+ max_token_num=max_num_token,
+ )
+
+ # Collect all results for markdown export
+ all_results = []
+
+ try:
+ # Run benchmarks
+ for seq_len, dtype, use_residual in configs:
+ if rank == 0:
+ logger.info(
+ "\nTesting: seq_len=%s, hidden_dim=%s, dtype=%s, residual=%s",
+ seq_len,
+ args.hidden_dim,
+ dtype,
+ use_residual,
+ )
+
+ results = run_benchmarks(
+ seq_len,
+ args.hidden_dim,
+ dtype,
+ use_residual,
+ allreduce_params,
+ quant_mode=quant_mode,
+ disable_oneshot=args.disable_oneshot,
+ )
+
+ # Store results for markdown export
+ if rank == 0:
+ all_results.append(
+ {
+ "seq_len": seq_len,
+ "hidden_dim": args.hidden_dim,
+ "dtype": str(dtype).replace("torch.", ""),
+ "use_residual": use_residual,
+ "quant_mode": quant_mode,
+ "results": results,
+ }
+ )
+
+ print_results(
+ results,
+ seq_len,
+ args.hidden_dim,
+ dtype,
+ use_residual,
+ quant_mode,
+ )
+
+ # Save results to markdown file
+ if args.output_file and rank == 0:
+ save_results_to_file(all_results, world_size, args, rank)
+
+ finally:
+ # Cleanup
+ if ipc_handles is not None:
+ cleanup_flashinfer_workspace(ipc_handles)
+
+ with contextlib.suppress(Exception):
+ dist.barrier()
+ cleanup_dist_env_and_memory(shutdown_ray=False)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py b/benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py
index dd8504fd90c..7621628c18f 100644
--- a/benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py
+++ b/benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py
@@ -17,6 +17,8 @@
from sglang.srt.layers.moe.fused_moe_triton.triton_kernels_moe import (
triton_kernel_moe_forward,
)
+from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
+from sglang.srt.layers.moe.topk import TopK, TopKConfig, select_experts
def get_model_config(model_name: str, tp_size: int):
@@ -80,13 +82,26 @@ def fused_moe_triton_api(
input_gating,
topk,
):
+ topk_op = TopK(
+ top_k=topk,
+ renormalize=False,
+ use_grouped_topk=False,
+ )
+ topk_op.use_triton_kernels = True
+ triton_topk_output = topk_op.forward_cuda(
+ hidden_states=x,
+ router_logits=input_gating,
+ )
+
+ moe_runner_config = MoeRunnerConfig(
+ inplace=False,
+ )
return triton_kernel_moe_forward(
x,
w1,
w2,
- input_gating,
- topk,
- renormalize=False,
+ triton_topk_output,
+ moe_runner_config,
)
@@ -103,14 +118,16 @@ def fused_moe_sglang_api(
a2_scale=None,
block_shape=None,
):
+ topk_output = select_experts(
+ hidden_states=x,
+ router_logits=input_gating,
+ topk_config=TopKConfig(top_k=topk, renormalize=False),
+ )
return fused_moe_sglang(
x,
w1,
w2,
- input_gating,
- topk,
- renormalize=False,
- inplace=True,
+ topk_output,
use_fp8_w8a8=use_fp8_w8a8,
w1_scale=w1_scale,
w2_scale=w2_scale,
diff --git a/benchmark/kernels/fused_moe_triton/benchmark_sum_scale.py b/benchmark/kernels/fused_moe_triton/benchmark_sum_scale.py
index 13ff617448e..979d2bbd111 100644
--- a/benchmark/kernels/fused_moe_triton/benchmark_sum_scale.py
+++ b/benchmark/kernels/fused_moe_triton/benchmark_sum_scale.py
@@ -4,7 +4,6 @@
from triton.testing import do_bench
-# _moe_sum_reduce_kernel kernel modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/common/fused_moe/moe_sum_reduce.py
@triton.jit
def _moe_sum_reduce_kernel(
input_ptr,
@@ -29,31 +28,35 @@ def _moe_sum_reduce_kernel(
token_block_id = tl.program_id(0)
dim_block_id = tl.program_id(1)
- token_start = token_block_id * BLOCK_M
- token_end = min((token_block_id + 1) * BLOCK_M, token_num)
+ offs_token = token_block_id * BLOCK_M + tl.arange(0, BLOCK_M)
+ offs_dim = dim_block_id * BLOCK_DIM + tl.arange(0, BLOCK_DIM)
- dim_start = dim_block_id * BLOCK_DIM
- dim_end = min((dim_block_id + 1) * BLOCK_DIM, hidden_dim)
+ mask_token = offs_token < token_num
+ mask_dim = offs_dim < hidden_dim
- offs_dim = dim_start + tl.arange(0, BLOCK_DIM)
+ base_ptrs = input_ptr + offs_token[:, None] * input_stride_0 + offs_dim[None, :]
- for token_index in range(token_start, token_end):
- accumulator = tl.zeros((BLOCK_DIM,), dtype=tl.float32)
- input_t_ptr = input_ptr + token_index * input_stride_0 + offs_dim
- for i in tl.range(0, topk_num, num_stages=NUM_STAGE):
- tmp = tl.load(
- input_t_ptr + i * input_stride_1, mask=offs_dim < dim_end, other=0.0
- )
- accumulator += tmp
- accumulator = accumulator * routed_scaling_factor
- store_t_ptr = output_ptr + token_index * output_stride_0 + offs_dim
- tl.store(
- store_t_ptr,
- accumulator.to(input_ptr.dtype.element_ty),
- mask=offs_dim < dim_end,
+ accumulator = tl.zeros((BLOCK_M, BLOCK_DIM), dtype=tl.float32)
+
+ for i in tl.range(0, topk_num, num_stages=NUM_STAGE):
+ tile = tl.load(
+ base_ptrs + i * input_stride_1,
+ mask=mask_token[:, None] & mask_dim[None, :],
+ other=0.0,
)
+ accumulator += tile.to(tl.float32)
+ accumulator *= routed_scaling_factor
+
+ # -------- Write back --------
+ store_ptrs = output_ptr + offs_token[:, None] * output_stride_0 + offs_dim[None, :]
+ tl.store(
+ store_ptrs,
+ accumulator.to(input_ptr.dtype.element_ty),
+ mask=mask_token[:, None] & mask_dim[None, :],
+ )
+# _moe_sum_reduce_kernel kernel modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/common/fused_moe/moe_sum_reduce.py
def moe_sum_reduce(
input: torch.Tensor, output: torch.Tensor, routed_scaling_factor: float
):
@@ -66,7 +69,7 @@ def moe_sum_reduce(
BLOCK_M = 1
BLOCK_DIM = 2048
NUM_STAGE = 1
- num_warps = 8
+ num_warps = 16
grid = (
triton.cdiv(token_num, BLOCK_M),
diff --git a/benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py b/benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py
index 1fcea7cd49d..2b4faa24b1d 100644
--- a/benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py
+++ b/benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py
@@ -9,7 +9,7 @@
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
fused_moe as fused_moe_triton,
)
-from sglang.srt.model_executor.graph_runner import set_torch_compile_config
+from sglang.srt.model_executor.cuda_graph_runner import set_torch_compile_config
def get_model_config(model_name: str, tp_size: int):
diff --git a/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py b/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
index 09caf9e9e75..7b52f02a3ab 100644
--- a/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
+++ b/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
@@ -2,6 +2,7 @@
import argparse
import json
import time
+from contextlib import nullcontext
from datetime import datetime
from typing import Any, Dict, List, Tuple, TypedDict
@@ -245,6 +246,9 @@ def __init__(self, seed: int) -> None:
torch.set_default_device("cuda")
torch.cuda.manual_seed_all(0)
self.seed = seed
+ # Get the device ID to allocate tensors and kernels
+ # on the respective GPU.
+ self.device_id = int(ray.get_gpu_ids()[0])
def benchmark(
self,
@@ -283,19 +287,20 @@ def benchmark(
)
else:
config = op_config[min(op_config.keys(), key=lambda x: abs(x - num_tokens))]
- kernel_time = benchmark_config(
- config,
- num_tokens,
- num_experts,
- shard_intermediate_size,
- hidden_size,
- topk,
- dtype,
- use_fp8_w8a8,
- use_int8_w8a8,
- use_int8_w8a16,
- block_shape,
- )
+ with torch.cuda.device(self.device_id) if is_hip() else nullcontext():
+ kernel_time = benchmark_config(
+ config,
+ num_tokens,
+ num_experts,
+ shard_intermediate_size,
+ hidden_size,
+ topk,
+ dtype,
+ use_fp8_w8a8,
+ use_int8_w8a8,
+ use_int8_w8a16,
+ block_shape,
+ )
return config, kernel_time
def tune(
@@ -314,29 +319,30 @@ def tune(
) -> Dict[str, int]:
best_config = None
best_time = float("inf")
- for config in tqdm(search_space):
- try:
- kernel_time = benchmark_config(
- config,
- num_tokens,
- num_experts,
- shard_intermediate_size,
- hidden_size,
- topk,
- dtype,
- use_fp8_w8a8,
- use_int8_w8a8,
- use_int8_w8a16,
- block_shape,
- num_iters=10,
- )
- except triton.runtime.autotuner.OutOfResources:
- # Some configurations may be invalid and fail to compile.
- continue
-
- if kernel_time < best_time:
- best_time = kernel_time
- best_config = config
+ with torch.cuda.device(self.device_id) if is_hip() else nullcontext():
+ for config in tqdm(search_space):
+ try:
+ kernel_time = benchmark_config(
+ config,
+ num_tokens,
+ num_experts,
+ shard_intermediate_size,
+ hidden_size,
+ topk,
+ dtype,
+ use_fp8_w8a8,
+ use_int8_w8a8,
+ use_int8_w8a16,
+ block_shape,
+ num_iters=10,
+ )
+ except (triton.runtime.autotuner.OutOfResources, RuntimeError):
+ # Some configurations may be invalid and fail to compile.
+ continue
+
+ if kernel_time < best_time:
+ best_time = kernel_time
+ best_config = config
now = datetime.now()
print(f"{now.ctime()}] Completed tuning for batch_size={num_tokens}")
assert best_config is not None
diff --git a/benchmark/kernels/quantization/bench_fp4_quant.py b/benchmark/kernels/quantization/bench_fp4_quant.py
new file mode 100644
index 00000000000..318e820adda
--- /dev/null
+++ b/benchmark/kernels/quantization/bench_fp4_quant.py
@@ -0,0 +1,133 @@
+import argparse
+import itertools
+
+import torch
+import triton
+from sgl_kernel import scaled_fp4_grouped_quant, silu_and_mul_scaled_fp4_grouped_quant
+from sgl_kernel.elementwise import silu_and_mul
+
+from sglang.srt.layers.moe.ep_moe.kernels import silu_and_mul_masked_post_quant_fwd
+from sglang.srt.layers.quantization import deep_gemm_wrapper
+
+
+def _test_accuracy_once(E, M, K, input_dtype, device):
+ x = torch.randn(E, M, K, device=device, dtype=input_dtype)
+ glb_scales = torch.ones((E,), dtype=torch.float32, device=device)
+ masks = torch.full((E,), M, dtype=torch.int32, device=device)
+ out, blk_scales = silu_and_mul_scaled_fp4_grouped_quant(x, glb_scales, masks)
+ out1, blk_scales1 = scaled_fp4_grouped_quant(
+ silu_and_mul(x),
+ glb_scales,
+ masks,
+ )
+
+ torch.testing.assert_close(out, out1)
+ torch.testing.assert_close(blk_scales, blk_scales1)
+ print(f"E: {E}, M: {M}, K: {K}, type: {input_dtype} OK")
+
+
+NUM_RANKS = 48
+M_PER_RANKs = [128, 256, 512, 1024]
+Ms = [M_PER_RANK * NUM_RANKS for M_PER_RANK in M_PER_RANKs]
+Ks = [2048, 4096, 7168]
+
+
+@triton.testing.perf_report(
+ triton.testing.Benchmark(
+ x_names=["M", "K"],
+ x_vals=list(itertools.product(Ms, Ks)),
+ x_log=False,
+ line_arg="provider",
+ line_vals=["triton_fp8", "cuda_unfused_fp4", "cuda_fused_fp4"],
+ line_names=["triton_fp8", "cuda_unfused_fp4", "cuda_fused_fp4"],
+ styles=[("blue", "-"), ("orange", "-"), ("green", "-")],
+ ylabel="ms",
+ plot_name="fp4 quant",
+ args={},
+ )
+)
+def benchmark(M, K, provider):
+ E = 6
+ device = "cuda"
+ x = torch.randn(E, M, K, device=device, dtype=torch.bfloat16)
+ glb_scales = torch.ones((E,), dtype=torch.float32, device=device)
+ masks = torch.randint(1, 4096, (E,), dtype=torch.int32, device=device)
+ fp8_out = torch.empty(
+ (
+ x.shape[0],
+ x.shape[1],
+ x.shape[2] // 2,
+ ),
+ device=x.device,
+ dtype=torch.float8_e4m3fn,
+ )
+ scale_block_size = 128
+ fp8_scales = torch.empty(
+ (
+ x.shape[0],
+ x.shape[1],
+ x.shape[2] // 2 // scale_block_size,
+ ),
+ device=x.device,
+ dtype=torch.float32,
+ )
+
+ quantiles = [0.5, 0.2, 0.8]
+ if provider == "triton_fp8":
+ ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(
+ lambda: silu_and_mul_masked_post_quant_fwd(
+ x,
+ fp8_out,
+ fp8_scales,
+ scale_block_size,
+ masks,
+ scale_ue8m0=deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0,
+ ),
+ quantiles=quantiles,
+ )
+ if provider == "cuda_unfused_fp4":
+ ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(
+ lambda: scaled_fp4_grouped_quant(
+ silu_and_mul(x),
+ glb_scales,
+ masks,
+ ),
+ quantiles=quantiles,
+ )
+ if provider == "cuda_fused_fp4":
+ ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(
+ lambda: silu_and_mul_scaled_fp4_grouped_quant(
+ x,
+ glb_scales,
+ masks,
+ ),
+ quantiles=quantiles,
+ )
+
+ return ms, min_ms, max_ms
+
+
+def test_accuracy():
+ E = 6
+ N_RANKS = 48
+ Ms = [128, 256, 512, 1024]
+ Ks = [2048, 4096, 7168]
+ input_dtype = torch.bfloat16
+ for M in Ms:
+ for K in Ks:
+ _test_accuracy_once(E, N_RANKS * M, K, input_dtype, "cuda")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--save_path",
+ type=str,
+ default="./bench_fp4_quant_res",
+ help="Path to save fp4 quant benchmark results",
+ )
+ args = parser.parse_args()
+
+ test_accuracy()
+
+ benchmark.run(print_data=True, show_plots=True, save_path=args.save_path)
diff --git a/benchmark/mmmu/bench_hf.py b/benchmark/mmmu/bench_hf.py
index 0295bc5dc52..949b63b802a 100644
--- a/benchmark/mmmu/bench_hf.py
+++ b/benchmark/mmmu/bench_hf.py
@@ -141,9 +141,13 @@ def eval_mmmu(args):
print(f"response: {response}")
process_result(response, sample, answer_dict, out_samples)
- args.output_path = f"{args.model_path}_val_hf.json"
+ args.output_path = f"{args.model_path}_answer_hf.json"
save_json(args.output_path, out_samples)
- eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)
+ eval_result(
+ model_answer_path=args.output_path,
+ answer_dict=answer_dict,
+ eval_output_path=f"{args.model_path}_val_hf.json",
+ )
if __name__ == "__main__":
diff --git a/benchmark/mmmu/bench_sglang.py b/benchmark/mmmu/bench_sglang.py
index 372bfeed886..d8834ea5f87 100644
--- a/benchmark/mmmu/bench_sglang.py
+++ b/benchmark/mmmu/bench_sglang.py
@@ -187,9 +187,13 @@ async def eval_mmmu(args) -> None:
print("Profiler stopped")
print(f"Benchmark time: {time.perf_counter() - start}")
- args.output_path = f"./val_sglang.json"
+ args.output_path = "./answer_sglang.json"
save_json(args.output_path, out_samples)
- eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)
+ eval_result(
+ model_answer_path=args.output_path,
+ answer_dict=answer_dict,
+ eval_output_path="./val_sglang.json",
+ )
def parse_args():
diff --git a/benchmark/mmmu/eval_utils.py b/benchmark/mmmu/eval_utils.py
index 83f6dd7fb1a..ca0e87c6a71 100644
--- a/benchmark/mmmu/eval_utils.py
+++ b/benchmark/mmmu/eval_utils.py
@@ -544,7 +544,9 @@ def process_result(response, sample, answer_dict, out_samples):
}
-def eval_result(model_answer_path, answer_dict):
+def eval_result(model_answer_path, answer_dict, eval_output_path=None):
+ if eval_output_path is None:
+ eval_output_path = model_answer_path
print("Evaluating...")
output_dict = json.load(open(model_answer_path))
# answer_dict = json.load(open(answer_path))
@@ -639,7 +641,7 @@ def eval_result(model_answer_path, answer_dict):
"acc": overall_acc,
}
pprint.pprint(printable_results)
- out = model_answer_path
+ out = eval_output_path
with open(out, "w", encoding="utf-8") as outfile:
json.dump(printable_results, outfile)
print(f"eval out saved to {out}")
diff --git a/benchmark/mtbench/README.md b/benchmark/mtbench/README.md
index e6babf96e56..fc37caee90c 100644
--- a/benchmark/mtbench/README.md
+++ b/benchmark/mtbench/README.md
@@ -18,7 +18,7 @@ python3 bench_sglang.py --num-questions 80
### Benchmark sglang EAGLE
```
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algo EAGLE \
- --speculative-draft lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
+ --speculative-draft-model-path lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
--speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --dtype float16 --port 30000
```
diff --git a/benchmark/prefill_only/bench_embeddings.py b/benchmark/prefill_only/bench_embeddings.py
new file mode 100644
index 00000000000..ca66c85a3b1
--- /dev/null
+++ b/benchmark/prefill_only/bench_embeddings.py
@@ -0,0 +1,148 @@
+"""
+SGLang Embeddings Benchmark Script
+
+This script benchmarks SGLang's /v1/embeddings API performance using HTTP requests.
+
+Features:
+- HTTP-only implementation
+- Uses /v1/embeddings API endpoint directly
+- Configurable RPS, duration, and batch sizes
+- Progress tracking and detailed metrics
+- Poisson and constant request distributions
+
+Usage:
+- Update configuration variables at the top of the file
+- Ensure SGLang server is running on the configured HTTP_URL
+- Run: python bench_embeddings.py
+"""
+
+import asyncio
+import logging
+
+from transformers import AutoTokenizer
+from util import (
+ BenchmarkConfig,
+ generate_text_with_token_count,
+ run_benchmark_main,
+ run_generic_benchmark,
+)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+###############################################################################
+# CONFIG
+###############################################################################
+# Create benchmark configuration
+config = BenchmarkConfig()
+config.rps_values = [500]
+config.duration_secs_values = [60]
+config.num_unique_requests = 100
+config.distribution = "POISSON"
+config.profile = False
+config.freeze_gc = True # Enable GC freeze functionality
+# Profiler output directory - by default uses present working directory (pwd)
+# Uncomment and customize the line below to override the default location:
+# config.profiler_dir = "/sglang-oss-trace"
+
+# HTTP Configuration
+HTTP_URL = "http://localhost:30000/v1/embeddings"
+
+# Embeddings API Config
+EMBEDDINGS_MODEL_PATH = "/Qwen/Qwen3-Embedding-0.6B"
+BATCH_SIZE = [1] # Number of items per request (batch size)
+
+# Configurable input token length
+EMBEDDINGS_INPUT_TOKENS = 500 # Default token length
+
+# Load tokenizer once for embeddings text generation
+print("Loading tokenizer for embeddings input generation...")
+embeddings_tokenizer = AutoTokenizer.from_pretrained(EMBEDDINGS_MODEL_PATH)
+
+# Generate input text with the specified token length using pre-loaded tokenizer
+EMBEDDINGS_INPUT_TEXT = generate_text_with_token_count(
+ EMBEDDINGS_MODEL_PATH,
+ EMBEDDINGS_INPUT_TOKENS,
+ config.special_replicated_token,
+ tokenizer=embeddings_tokenizer,
+)
+
+
+###############################################################################
+# REQUEST GENERATION (in parallel)
+###############################################################################
+def build_embeddings_request(index: int, item_count: int) -> tuple:
+ """Build a single embeddings request."""
+ try:
+ # For embeddings, input can be a string or list of strings
+ if item_count == 1:
+ input_data = EMBEDDINGS_INPUT_TEXT
+ else:
+ input_data = [EMBEDDINGS_INPUT_TEXT for _ in range(item_count)]
+ req = {
+ "input": input_data,
+ "model": EMBEDDINGS_MODEL_PATH,
+ }
+ return (index, req)
+ except Exception as e:
+ logger.error(f"Error building request {index}: {e}")
+ return (index, None)
+
+
+def validate_embeddings_response(response_data: dict) -> bool:
+ """Validate embeddings API response."""
+ return "data" in response_data
+
+
+def build_warmup_embeddings_request() -> dict:
+ """Build a warmup request for the embeddings API."""
+ return {
+ "input": EMBEDDINGS_INPUT_TEXT,
+ "model": EMBEDDINGS_MODEL_PATH,
+ }
+
+
+###############################################################################
+# MAIN
+###############################################################################
+async def run_benchmark(rps, duration_secs, item_count):
+ """Run a single embeddings benchmark with the given RPS value."""
+ return await run_generic_benchmark(
+ rps=rps,
+ duration_secs=duration_secs,
+ item_count=item_count,
+ config=config,
+ http_url=HTTP_URL,
+ build_request_func=build_embeddings_request,
+ response_validator=validate_embeddings_response,
+ api_name="EMBEDDINGS",
+ request_description="embeddings requests",
+ )
+
+
+async def main():
+ additional_info = {
+ "Input text length": f"{EMBEDDINGS_INPUT_TOKENS} tokens",
+ "Input text preview": (
+ EMBEDDINGS_INPUT_TEXT[:100] + "..."
+ if len(EMBEDDINGS_INPUT_TEXT) > 100
+ else EMBEDDINGS_INPUT_TEXT
+ ),
+ }
+
+ await run_benchmark_main(
+ config,
+ run_benchmark,
+ "EMBEDDINGS",
+ HTTP_URL,
+ BATCH_SIZE,
+ additional_info,
+ build_warmup_embeddings_request,
+ )
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/benchmark/prefill_only/bench_score.py b/benchmark/prefill_only/bench_score.py
new file mode 100644
index 00000000000..117335eae0e
--- /dev/null
+++ b/benchmark/prefill_only/bench_score.py
@@ -0,0 +1,192 @@
+"""
+SGLang Scoring Benchmark Script
+
+This script benchmarks SGLang's scoring API performance using HTTP requests.
+
+Current Features:
+- HTTP-only implementation (open source compatible)
+- Uses /v1/score API endpoint directly
+- Single item scoring with batching support
+- Configurable RPS, duration, and batch sizes
+- Progress tracking and detailed metrics
+- Poisson and constant request distributions
+
+Usage:
+- Update configuration variables at the top of the file
+- Ensure SGLang server is running on the configured HTTP_URL
+- Run: python bench_score.py
+- Each request will contain ITEM_COUNT_VALUES items for batch scoring
+
+"""
+
+import asyncio
+
+from transformers import AutoTokenizer
+from util import (
+ BenchmarkConfig,
+ generate_text_with_token_count,
+ run_benchmark_main,
+ run_generic_benchmark,
+)
+
+###############################################################################
+# CONFIG
+###############################################################################
+# Create benchmark configuration
+config = BenchmarkConfig()
+config.rps_values = [160]
+config.duration_secs_values = [60]
+config.num_unique_requests = 100
+config.distribution = "POISSON"
+config.profile = False
+config.freeze_gc = True # Enable GC freeze functionality
+# Profiler output directory - by default uses present working directory (pwd)
+# Uncomment and customize the line below to override the default location:
+# config.profiler_dir = "/sglang-oss-trace"
+
+# HTTP Configuration
+HTTP_URL = "http://localhost:30000/v1/score" # Use score API directly
+
+# Score API Config
+# ITEM_COUNT_VALUES determines number of items per score request (batch size)
+SCORE_QUERY_TOKENS = 120
+SCORE_ITEM_TOKENS = 180
+SCORE_MODEL_PATH = "Qwen/Qwen3-0.6B"
+SCORE_LABEL_TOKEN_IDS = [9454, 2753] # Yes/No token IDs
+ITEM_COUNT_VALUES = [10] # Number of items per request
+
+# Special token to replicate for precise token counting
+SPECIAL_REPLICATED_TOKEN = "<|im_start|>"
+
+
+###############################################################################
+# REQUEST GENERATION (in parallel)
+###############################################################################
+def create_score_request_builder():
+ """Create a score request builder function with shared tokenizer."""
+ # Load tokenizer once here to verify special token and get precise counts
+ print("Loading tokenizer...")
+ tokenizer = AutoTokenizer.from_pretrained(SCORE_MODEL_PATH)
+
+ # Verify that our special token produces exactly 1 token
+ special_token_count = len(
+ tokenizer.encode(config.special_replicated_token, add_special_tokens=False)
+ )
+ print(
+ f"Special token '{config.special_replicated_token}' produces "
+ f"{special_token_count} token(s)"
+ )
+
+ def generate_text_with_token_count_local(num_toks):
+ """Generate text with precise token count using replicated token."""
+ return generate_text_with_token_count(
+ SCORE_MODEL_PATH,
+ num_toks,
+ config.special_replicated_token,
+ tokenizer=tokenizer,
+ )
+
+ def build_score_request(index: int, item_count: int) -> tuple:
+ """Build a single score request."""
+ try:
+ # Generate query and items for score API
+ query = generate_text_with_token_count_local(SCORE_QUERY_TOKENS)
+ items = [
+ generate_text_with_token_count_local(SCORE_ITEM_TOKENS)
+ for _ in range(item_count)
+ ]
+
+ # Return as dict for score API format
+ score_data = {
+ "query": query,
+ "items": items,
+ "label_token_ids": SCORE_LABEL_TOKEN_IDS,
+ "model": SCORE_MODEL_PATH,
+ }
+ return (index, score_data)
+
+ except Exception as e:
+ print(f"Error building request {index}: {e}")
+ return (index, None)
+
+ return build_score_request
+
+
+def validate_score_response(response_data: dict) -> bool:
+ """Validate score API response."""
+ return "scores" in response_data or "logprobs" in response_data
+
+
+def build_warmup_score_request() -> dict:
+ """Build a warmup request for the score API."""
+ # Load tokenizer once for warmup generation
+ tokenizer = AutoTokenizer.from_pretrained(SCORE_MODEL_PATH)
+
+ warmup_query = generate_text_with_token_count(
+ SCORE_MODEL_PATH,
+ SCORE_QUERY_TOKENS,
+ config.special_replicated_token,
+ tokenizer=tokenizer,
+ )
+ warmup_items = [
+ generate_text_with_token_count(
+ SCORE_MODEL_PATH,
+ SCORE_ITEM_TOKENS,
+ config.special_replicated_token,
+ tokenizer=tokenizer,
+ )
+ for _ in range(3)
+ ]
+
+ return {
+ "query": warmup_query,
+ "items": warmup_items,
+ "label_token_ids": SCORE_LABEL_TOKEN_IDS,
+ "model": SCORE_MODEL_PATH,
+ # Add missing parameters for consistency with the original warmup
+ "apply_softmax": True,
+ "item_first": False,
+ }
+
+
+###############################################################################
+# MAIN
+###############################################################################
+async def run_benchmark(rps, duration_secs, item_count):
+ """Run a single benchmark with the given RPS value."""
+ # Create the request builder function with shared tokenizer
+ build_request_func = create_score_request_builder()
+
+ return await run_generic_benchmark(
+ rps=rps,
+ duration_secs=duration_secs,
+ item_count=item_count,
+ config=config,
+ http_url=HTTP_URL,
+ build_request_func=build_request_func,
+ response_validator=validate_score_response,
+ api_name="SINGLE_ITEM_SCORING",
+ request_description="score requests",
+ )
+
+
+async def main():
+ """Main function that runs benchmarks for all RPS values."""
+ additional_info = {
+ "Query tokens per request": SCORE_QUERY_TOKENS,
+ "Item tokens per item": SCORE_ITEM_TOKENS,
+ }
+
+ await run_benchmark_main(
+ config,
+ run_benchmark,
+ "SINGLE_ITEM_SCORING",
+ HTTP_URL,
+ ITEM_COUNT_VALUES,
+ additional_info,
+ build_warmup_score_request,
+ )
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/benchmark/prefill_only/util.py b/benchmark/prefill_only/util.py
new file mode 100644
index 00000000000..0dbc390278d
--- /dev/null
+++ b/benchmark/prefill_only/util.py
@@ -0,0 +1,813 @@
+"""
+Common utilities for SGLang benchmark scripts.
+
+This module contains shared code for benchmarking different SGLang APIs
+including scoring, embeddings, and other endpoints.
+"""
+
+import asyncio
+import concurrent.futures
+import json
+import os
+import random
+from statistics import mean
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import aiohttp
+import numpy as np
+from tqdm import tqdm
+from transformers import AutoTokenizer
+
+
+class BenchmarkConfig:
+ """Configuration for benchmark parameters."""
+
+ def __init__(self):
+ # Common benchmark settings
+ self.server_type = "HTTP"
+ self.rps_values = [70]
+ self.duration_secs_values = [60]
+ self.num_unique_requests = 100
+ self.distribution = "POISSON" # Options: "CONSTANT", "POISSON"
+ self.profile = False
+
+ # Garbage Collection Control
+ self.freeze_gc = True # Enable/disable garbage collection freezing
+
+ # Profiler configuration
+ self.profiler_dir = (
+ os.getcwd()
+ ) # Default profiler output directory (current working directory)
+
+ # Special token for text generation
+ self.special_replicated_token = "<|im_start|>"
+
+
+def generate_text_with_token_count(
+ model_path: str,
+ num_tokens: int,
+ special_token: str = "<|im_start|>",
+ tokenizer: Optional[Any] = None,
+) -> str:
+ """
+ Generate text with precise token count using a replicated token.
+
+ Args:
+ model_path: Path to the model for tokenizer
+ num_tokens: Target number of tokens
+ special_token: Token to replicate
+ tokenizer: Optional pre-loaded tokenizer to avoid repeated loading
+
+ Returns:
+ Generated text with approximately the target token count
+ """
+ if tokenizer is None:
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
+
+ # Verify token count
+ special_token_count = len(tokenizer.encode(special_token, add_special_tokens=False))
+
+ if special_token_count == 1:
+ # Simple case: token maps to exactly 1 token
+ return special_token * num_tokens
+ else:
+ print(f"Special token '{special_token}' produces {special_token_count} tokens")
+ # Handle case where special token produces multiple tokens
+ repetitions = (num_tokens + special_token_count - 1) // special_token_count
+ text = special_token * repetitions
+
+ # Verify we got the expected token count
+ actual_tokens = len(tokenizer.encode(text, add_special_tokens=False))
+ if actual_tokens < num_tokens:
+ print(f"Warning: Generated {actual_tokens} tokens, expected {num_tokens}")
+
+ return text
+
+
+def setup_profiler(config: BenchmarkConfig, benchmark_name: str) -> None:
+ """
+ Set up profiler environment if profiling is enabled.
+
+ Args:
+ config: Benchmark configuration
+ benchmark_name: Name of the benchmark (used in directory path)
+ """
+ if config.profile:
+ # Create benchmark-specific subdirectory
+ profiler_path = os.path.join(
+ config.profiler_dir, benchmark_name.lower().replace("_", "-")
+ )
+ os.environ["SGLANG_TORCH_PROFILER_DIR"] = profiler_path
+ print(f"Profiler enabled. Output directory: {profiler_path}")
+ else:
+ print("Profiler disabled")
+
+
+def prepare_all_requests_parallel(
+ num_requests: int,
+ item_count: int,
+ build_request_func: Callable[[int, int], Tuple[int, Any]],
+ config: BenchmarkConfig,
+ description: str = "requests",
+) -> List[Any]:
+ """
+ Generic function to generate unique requests in parallel, then reuse them.
+
+ Args:
+ num_requests: Total number of requests needed
+ item_count: Number of items per request (batch size)
+ build_request_func: Function that takes (index, item_count) and returns (index, request_data)
+ config: Benchmark configuration
+ description: Description for progress bars
+
+ Returns:
+ List of request data objects
+ """
+
+ def build_request_wrapper(index):
+ """Wrapper to call the provided build_request_func."""
+ try:
+ return build_request_func(index, item_count)
+ except Exception as e:
+ print(f"Error building request {index}: {e}")
+ return (index, None)
+
+ # Generate only the unique requests
+ unique_requests = [None] * config.num_unique_requests
+ max_workers = min(8, os.cpu_count() or 1) # Limit to 8 threads max
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+ futures = []
+ for i in tqdm(
+ range(config.num_unique_requests),
+ desc=f"Submitting {description} generation tasks",
+ ):
+ future = executor.submit(build_request_wrapper, i)
+ futures.append(future)
+
+ # Collect results as they complete
+ for f in tqdm(
+ concurrent.futures.as_completed(futures),
+ desc=f"Building unique {description}",
+ total=config.num_unique_requests,
+ ):
+ try:
+ index, req_data = f.result()
+ if req_data is not None:
+ unique_requests[index] = req_data
+ else:
+ print(f"Failed to build request {index}")
+ except Exception as e:
+ print(f"Error processing request result: {e}")
+
+ # Check if we have any valid requests
+ valid_requests = [req for req in unique_requests if req is not None]
+ if not valid_requests:
+ raise RuntimeError("Failed to generate any valid requests")
+
+ print(
+ f"Successfully generated {len(valid_requests)} out of "
+ f"{config.num_unique_requests} unique {description}"
+ )
+
+ # Create the full request list by cycling through unique requests
+ print(
+ f"Reusing {len(valid_requests)} unique {description} to create "
+ f"{num_requests} total requests..."
+ )
+ all_requests = []
+ for i in tqdm(range(num_requests), desc=f"Reusing {description}"):
+ unique_index = i % len(valid_requests)
+ all_requests.append(valid_requests[unique_index])
+
+ print(f"All {description} prepared.\n")
+ return all_requests
+
+
+async def sleep_with_distribution(distribution: str, rps: float) -> None:
+ """
+ Sleep according to the specified distribution pattern.
+
+ Args:
+ distribution: "CONSTANT" or "POISSON"
+ rps: Requests per second rate
+ """
+ if distribution == "CONSTANT":
+ interval = 1 / rps
+ await asyncio.sleep(interval)
+ elif distribution == "POISSON":
+ # For Poisson process, inter-arrival times follow exponential distribution
+ interval = random.expovariate(rps)
+ await asyncio.sleep(interval)
+ else:
+ raise ValueError(
+ f"Unknown distribution: {distribution}. Use 'CONSTANT' or 'POISSON'."
+ )
+
+
+def build_http_request_json(request_data: Any) -> str:
+ """
+ Generic function to build HTTP request JSON.
+
+ Args:
+ request_data: The data to serialize to JSON
+
+ Returns:
+ JSON string representation of the request data
+ """
+ return json.dumps(request_data)
+
+
+async def make_http_call(
+ session: aiohttp.ClientSession,
+ request_data: Any,
+ request_id: int,
+ results_queue: asyncio.Queue,
+ http_url: str,
+ response_validator: Callable[[Dict[str, Any]], bool],
+ api_name: str = "API",
+) -> None:
+ """
+ Generic HTTP call function for API requests.
+
+ Args:
+ session: aiohttp client session
+ request_data: Data to send in the request
+ request_id: Unique identifier for this request
+ results_queue: Queue to put results
+ http_url: URL to send the request to
+ response_validator: Function to validate the response JSON
+ api_name: Name of the API for error messages
+ """
+ try:
+ start_time = asyncio.get_event_loop().time()
+
+ request_json = build_http_request_json(request_data)
+ headers = {"Content-Type": "application/json"}
+
+ async with session.post(http_url, data=request_json, headers=headers) as resp:
+ resp_text = await resp.text()
+
+ if resp.status != 200:
+ print(
+ f"[HTTP] {api_name} Request {request_id} failed with status "
+ f"{resp.status}: {resp_text}"
+ )
+ completion_time = asyncio.get_event_loop().time()
+ await results_queue.put((request_id, 0, False, completion_time))
+ return
+
+ # Parse and validate response
+ try:
+ response_data = json.loads(resp_text)
+ success = response_validator(response_data)
+ if not success:
+ print(
+ f"[HTTP] {api_name} Request {request_id} failed response validation"
+ )
+ except json.JSONDecodeError:
+ print(
+ f"[HTTP] {api_name} Request {request_id} failed to parse JSON response"
+ )
+ success = False
+
+ completion_time = asyncio.get_event_loop().time()
+ elapsed_time = (completion_time - start_time) * 1000
+ await results_queue.put((request_id, elapsed_time, success, completion_time))
+
+ except Exception as e:
+ print(f"[HTTP] {api_name} Error for request {request_id}: {e}")
+ completion_time = asyncio.get_event_loop().time()
+ await results_queue.put((request_id, 0, False, completion_time))
+
+
+async def send_profile_request(
+ profile_text: str, http_url: str, session: Optional[aiohttp.ClientSession] = None
+) -> None:
+ """
+ Send a profile request (START_PROFILE or STOP_PROFILE) and wait for completion.
+
+ Args:
+ profile_text: "START_PROFILE" or "STOP_PROFILE"
+ http_url: Base HTTP URL (will derive profile endpoints from this)
+ session: Optional aiohttp session to use
+ """
+ try:
+ if session:
+ print(f"Sending {profile_text} request via HTTP...")
+
+ # Determine the correct endpoint
+ if "/v1/" in http_url:
+ base_url = http_url.rsplit("/v1/", 1)[0] # Remove /v1/xxx
+ else:
+ base_url = http_url.rsplit("/", 1)[0] # Remove last path component
+
+ if profile_text == "START_PROFILE":
+ endpoint_url = f"{base_url}/start_profile"
+ elif profile_text == "STOP_PROFILE":
+ endpoint_url = f"{base_url}/stop_profile"
+ else:
+ print(f"Unknown profile request: {profile_text}")
+ return
+
+ headers = {"Content-Type": "application/json"}
+
+ async with session.post(endpoint_url, headers=headers) as resp:
+ resp_text = await resp.text()
+ if resp.status == 200:
+ print(f"{profile_text} request completed")
+ else:
+ print(
+ f"{profile_text} request failed with status "
+ f"{resp.status}: {resp_text}"
+ )
+ else:
+ print(f"Cannot send {profile_text} request - missing session")
+
+ except Exception as e:
+ print(f"Error sending {profile_text} request: {e}")
+
+
+async def call_freeze_gc_http(session: aiohttp.ClientSession, http_url: str) -> None:
+ """
+ Call the /freeze_gc HTTP endpoint.
+
+ Args:
+ session: aiohttp client session
+ http_url: Base HTTP URL to derive the freeze_gc endpoint from
+ """
+ try:
+ # Derive freeze_gc endpoint from the API URL
+ if "/v1/" in http_url:
+ freeze_gc_url = http_url.rsplit("/v1/", 1)[0] + "/freeze_gc"
+ else:
+ freeze_gc_url = http_url.rsplit("/", 1)[0] + "/freeze_gc"
+
+ print(f"Calling freeze_gc endpoint: {freeze_gc_url}")
+
+ async with session.post(freeze_gc_url) as resp:
+ if resp.status == 200:
+ print("freeze_gc called successfully")
+ else:
+ resp_text = await resp.text()
+ print(f"freeze_gc failed with status {resp.status}: {resp_text}")
+
+ except Exception as e:
+ print(f"Failed to call freeze_gc: {e}")
+
+
+async def send_warmup_requests(
+ session: aiohttp.ClientSession,
+ http_url: str,
+ build_warmup_request_func: Callable[[], Any],
+ num_warmup: int = 3,
+) -> None:
+ """
+ Send warmup requests to HTTP server.
+
+ Args:
+ session: aiohttp client session
+ http_url: URL to send warmup requests to
+ build_warmup_request_func: Function that returns a warmup request object
+ num_warmup: Number of warmup requests to send
+ """
+ print(f"Sending {num_warmup} HTTP warmup requests...")
+
+ for i in range(num_warmup):
+ try:
+ warmup_data = build_warmup_request_func()
+ request_json = build_http_request_json(warmup_data)
+ headers = {"Content-Type": "application/json"}
+
+ async with session.post(
+ http_url, data=request_json, headers=headers
+ ) as resp:
+ if resp.status == 200:
+ print(f"Warmup request {i+1}/{num_warmup} completed successfully")
+ else:
+ print(
+ f"Warmup request {i+1}/{num_warmup} failed with status {resp.status}"
+ )
+
+ except Exception as e:
+ print(f"Warmup request {i+1}/{num_warmup} failed with error: {e}")
+
+ print("HTTP warmup requests completed")
+
+
+async def perform_global_warmup_and_freeze(
+ config: BenchmarkConfig,
+ http_url: str,
+ build_warmup_request_func: Callable[[], Any],
+) -> None:
+ """
+ Perform warmup and optionally GC freeze operations once before all benchmark runs.
+
+ Args:
+ config: Benchmark configuration
+ http_url: URL for API requests
+ build_warmup_request_func: Function that returns a warmup request object
+ """
+ print("=" * 80)
+ print(f"PERFORMING GLOBAL WARMUP{' AND GC FREEZE' if config.freeze_gc else ''}")
+ print("=" * 80)
+
+ print(f"Performing HTTP warmup{' and GC freeze' if config.freeze_gc else ''}...")
+ async with aiohttp.ClientSession() as session:
+ await send_warmup_requests(session, http_url, build_warmup_request_func)
+ if config.freeze_gc:
+ await call_freeze_gc_http(session, http_url)
+ print(
+ f"HTTP warmup{' and GC freeze' if config.freeze_gc else ''} completed successfully."
+ )
+
+ print(
+ f"Global warmup{' and GC freeze' if config.freeze_gc else ''} operations completed."
+ )
+ print("=" * 80)
+
+
+async def process_results(
+ results_queue: asyncio.Queue,
+ num_requests: int,
+ send_duration: float,
+ total_duration: float,
+ rps: int,
+ duration_secs: int,
+ item_count: int,
+ test_start_time: float,
+ config: BenchmarkConfig,
+ http_mode: str = "UNKNOWN",
+) -> List[Dict[str, Any]]:
+ """
+ Process benchmark results and group them by minute intervals.
+
+ Args:
+ results_queue: Queue containing result tuples
+ num_requests: Total number of requests sent
+ send_duration: Time taken to send all requests
+ total_duration: Total time for all requests to complete
+ rps: Target requests per second
+ duration_secs: Test duration in seconds
+ item_count: Number of items per request
+ test_start_time: Start time of the test
+ config: Benchmark configuration
+ http_mode: Description of the HTTP mode/API being tested
+
+ Returns:
+ List of dictionaries containing minute-by-minute results
+ """
+ all_results = []
+
+ # Collect all results
+ for _ in range(num_requests):
+ result = await results_queue.get()
+ request_id, elapsed_time, success, completion_time = result
+ all_results.append(
+ {
+ "request_id": request_id,
+ "elapsed_time": elapsed_time,
+ "success": success,
+ "completion_time": completion_time,
+ }
+ )
+
+ # Group results by minute intervals
+ minute_results = []
+ num_minutes = int(duration_secs // 60) + (1 if duration_secs % 60 > 0 else 0)
+
+ for minute in range(num_minutes):
+ minute_start = test_start_time + (minute * 60)
+ minute_end = test_start_time + ((minute + 1) * 60)
+
+ # Filter results that completed in this minute
+ minute_data = [
+ r for r in all_results if minute_start <= r["completion_time"] < minute_end
+ ]
+
+ response_times = [r["elapsed_time"] for r in minute_data if r["success"]]
+ successful_requests = len([r for r in minute_data if r["success"]])
+ failed_requests = len([r for r in minute_data if not r["success"]])
+
+ avg_response_time = mean(response_times) if response_times else 0
+
+ # Calculate percentiles using numpy
+ if response_times:
+ p50 = np.percentile(response_times, 50)
+ p90 = np.percentile(response_times, 90)
+ p99 = np.percentile(response_times, 99)
+ else:
+ p50 = p90 = p99 = 0
+
+ minute_result = {
+ "test_duration_secs": duration_secs,
+ "minute_interval": minute + 1,
+ "target_rps": rps,
+ "item_count": item_count,
+ "server_type": config.server_type,
+ "distribution": config.distribution,
+ "unique_requests": config.num_unique_requests,
+ "total_requests": len(minute_data),
+ "successful_requests": successful_requests,
+ "failed_requests": failed_requests,
+ "send_duration_secs": send_duration,
+ "total_duration_secs": total_duration,
+ "avg_response_time_ms": avg_response_time,
+ "p50_response_time_ms": p50,
+ "p90_response_time_ms": p90,
+ "p99_response_time_ms": p99,
+ }
+
+ minute_results.append(minute_result)
+
+ print(
+ f"\nMinute {minute + 1} Summary for RPS {rps}, "
+ f"Duration {duration_secs}s, Item Count {item_count}:"
+ )
+ print(f" Requests completed in minute: {len(minute_data)}")
+ print(f" Successful requests: {successful_requests}")
+ print(f" Failed requests: {failed_requests}")
+ print(f" Average response time: {avg_response_time:.2f} ms")
+ print(f" P50 response time: {p50:.2f} ms")
+ print(f" P90 response time: {p90:.2f} ms")
+ print(f" P99 response time: {p99:.2f} ms")
+
+ # Print overall summary
+ all_response_times = [r["elapsed_time"] for r in all_results if r["success"]]
+ total_successful = len([r for r in all_results if r["success"]])
+ total_failed = len([r for r in all_results if not r["success"]])
+
+ overall_avg = mean(all_response_times) if all_response_times else 0
+ if all_response_times:
+ overall_p50 = np.percentile(all_response_times, 50)
+ overall_p90 = np.percentile(all_response_times, 90)
+ overall_p99 = np.percentile(all_response_times, 99)
+ else:
+ overall_p50 = overall_p90 = overall_p99 = 0
+
+ print(
+ f"\nOverall Summary for RPS {rps}, Duration {duration_secs}s, "
+ f"Item Count {item_count}:"
+ )
+ print(f" Test duration: {duration_secs} seconds")
+ print(f" Server type: {config.server_type}")
+ print(f" HTTP mode: {http_mode}")
+ print(f" Target RPS: {rps}")
+ print(f" Item count: {item_count}")
+ print(f" Distribution: {config.distribution}")
+ print(f" Unique requests generated: {config.num_unique_requests}")
+ print(f" Total requests sent: {num_requests}")
+ print(f" Successful requests: {total_successful}")
+ print(f" Failed requests: {total_failed}")
+ print(f" Time to send all requests: {send_duration:.2f} seconds")
+ print(f" Time for all requests to complete: {total_duration:.2f} seconds")
+ print(f" Average response time: {overall_avg:.2f} ms")
+ print(f" P50 response time: {overall_p50:.2f} ms")
+ print(f" P90 response time: {overall_p90:.2f} ms")
+ print(f" P99 response time: {overall_p99:.2f} ms\n")
+
+ return minute_results
+
+
+def print_csv_results(all_results: List[Dict[str, Any]]) -> None:
+ """
+ Print benchmark results in CSV format.
+
+ Args:
+ all_results: List of result dictionaries from process_results
+ """
+ print("\n" + "=" * 80)
+ print("FINAL CSV RESULTS:")
+ print("=" * 80)
+
+ # CSV Header
+ headers = [
+ "test_duration_secs",
+ "minute_interval",
+ "target_rps",
+ "item_count",
+ "server_type",
+ "distribution",
+ "unique_requests",
+ "total_requests",
+ "successful_requests",
+ "failed_requests",
+ "send_duration_secs",
+ "total_duration_secs",
+ "avg_response_time_ms",
+ "p50_response_time_ms",
+ "p90_response_time_ms",
+ "p99_response_time_ms",
+ ]
+ print(",".join(headers))
+
+ # CSV Data
+ for result in all_results:
+ row = [
+ result["test_duration_secs"],
+ result["minute_interval"],
+ result["target_rps"],
+ result["item_count"],
+ result["server_type"],
+ result["distribution"],
+ result["unique_requests"],
+ result["total_requests"],
+ result["successful_requests"],
+ result["failed_requests"],
+ f"{result['send_duration_secs']:.2f}",
+ f"{result['total_duration_secs']:.2f}",
+ f"{result['avg_response_time_ms']:.2f}",
+ f"{result['p50_response_time_ms']:.2f}",
+ f"{result['p90_response_time_ms']:.2f}",
+ f"{result['p99_response_time_ms']:.2f}",
+ ]
+ print(",".join(map(str, row)))
+
+
+async def run_benchmark_main(
+ config: BenchmarkConfig,
+ run_single_benchmark_func,
+ benchmark_name: str,
+ http_url: str,
+ item_count_values: List[int],
+ additional_info: Optional[Dict[str, Any]] = None,
+ build_warmup_request_func: Optional[Callable[[], Any]] = None,
+) -> None:
+ """
+ Main benchmark orchestration function.
+
+ Args:
+ config: Benchmark configuration
+ run_single_benchmark_func: Async function to run a single benchmark
+ benchmark_name: Name of the benchmark (e.g., "SCORING", "EMBEDDINGS")
+ http_url: URL of the API endpoint
+ item_count_values: List of item counts to test
+ additional_info: Additional information to print in the header
+ build_warmup_request_func: Optional function to build warmup requests
+ """
+ total_combinations = (
+ len(config.duration_secs_values)
+ * len(config.rps_values)
+ * len(item_count_values)
+ )
+
+ print(
+ f"Running benchmarks for {len(config.duration_secs_values)} duration "
+ f"values, {len(config.rps_values)} RPS values, and "
+ f"{len(item_count_values)} item count values = "
+ f"{total_combinations} total combinations"
+ )
+ print(f"Server Type: {config.server_type}")
+ print(f"HTTP Mode: {benchmark_name}")
+ print(f"API URL: {http_url}")
+
+ if additional_info:
+ for key, value in additional_info.items():
+ print(f"{key}: {value}")
+
+ print(f"Items per request (batch size): {item_count_values}")
+ print(f"Profiling Enabled: {config.profile}")
+ print(f"Duration values: {config.duration_secs_values}")
+ print(f"RPS values: {config.rps_values}")
+ print(f"Item count values: {item_count_values}")
+ print("=" * 80)
+
+ # Set up profiler environment
+ setup_profiler(config, benchmark_name)
+
+ # Perform global warmup and GC freeze operations if warmup function is provided
+ if build_warmup_request_func is not None:
+ await perform_global_warmup_and_freeze(
+ config, http_url, build_warmup_request_func
+ )
+
+ all_results = []
+
+ for duration_secs in config.duration_secs_values:
+ for rps in config.rps_values:
+ for item_count in item_count_values:
+ result = await run_single_benchmark_func(rps, duration_secs, item_count)
+ all_results.extend(result) # Extend with minute results
+
+ print_csv_results(all_results)
+
+
+async def run_generic_benchmark(
+ rps: int,
+ duration_secs: int,
+ item_count: int,
+ config: BenchmarkConfig,
+ http_url: str,
+ build_request_func: Callable[[int, int], Tuple[int, Any]],
+ response_validator: Callable[[Dict[str, Any]], bool],
+ api_name: str,
+ request_description: str = "requests",
+) -> List[Dict[str, Any]]:
+ """
+ Generic benchmark runner that can be used for different APIs.
+
+ Args:
+ rps: Requests per second
+ duration_secs: Duration of the test in seconds
+ item_count: Number of items per request (batch size)
+ config: Benchmark configuration
+ http_url: URL of the API endpoint
+ build_request_func: Function to build individual requests
+ response_validator: Function to validate API responses
+ api_name: Name of the API for logging
+ request_description: Description for progress bars
+
+ Returns:
+ List of dictionaries containing minute-by-minute results
+ """
+ num_requests = int(rps * duration_secs)
+ print(
+ f"Starting benchmark with RPS={rps}, Duration={duration_secs}s, "
+ f"Item Count={item_count}, num_requests={num_requests}"
+ )
+ print(f"Server Type: {config.server_type}")
+ print(f"HTTP Mode: {api_name}")
+ print(f"Profiling Enabled: {config.profile}")
+
+ # Build requests in parallel (unmeasured)
+ all_requests = prepare_all_requests_parallel(
+ num_requests, item_count, build_request_func, config, request_description
+ )
+
+ results_queue = asyncio.Queue()
+ tasks = []
+
+ # Track timing for sending requests
+ send_start_time = asyncio.get_event_loop().time()
+
+ # HTTP implementation
+ async with aiohttp.ClientSession(
+ timeout=aiohttp.ClientTimeout(total=300)
+ ) as session:
+
+ # Send START_PROFILE if profiling is enabled
+ if config.profile:
+ await send_profile_request("START_PROFILE", http_url, session=session)
+
+ # Add progress bar for sending requests
+ with tqdm(
+ total=len(all_requests),
+ desc=f"Sending HTTP {request_description} at {rps} RPS",
+ unit="req",
+ ) as pbar:
+ for i, request_data in enumerate(all_requests):
+ request_id = i + 1
+ tasks.append(
+ asyncio.create_task(
+ make_http_call(
+ session,
+ request_data,
+ request_id,
+ results_queue,
+ http_url,
+ response_validator,
+ api_name,
+ )
+ )
+ )
+
+ # Update progress bar
+ pbar.update(1)
+
+ # Throttle based on distribution
+ if i < len(all_requests) - 1:
+ await sleep_with_distribution(config.distribution, rps)
+
+ send_end_time = asyncio.get_event_loop().time()
+ send_duration = send_end_time - send_start_time
+
+ # Wait for all requests to complete with progress tracking
+ print(f"Waiting for {len(tasks)} HTTP {request_description} to complete...")
+ with tqdm(
+ total=len(tasks), desc=f"Completing HTTP {request_description}", unit="req"
+ ) as completion_pbar:
+ completed_tasks = []
+ for task in asyncio.as_completed(tasks):
+ await task
+ completed_tasks.append(task)
+ completion_pbar.update(1)
+
+ # Send STOP_PROFILE if profiling is enabled
+ if config.profile:
+ await send_profile_request("STOP_PROFILE", http_url, session=session)
+
+ completion_end_time = asyncio.get_event_loop().time()
+ total_duration = completion_end_time - send_start_time
+
+ return await process_results(
+ results_queue,
+ num_requests,
+ send_duration,
+ total_duration,
+ rps,
+ duration_secs,
+ item_count,
+ send_start_time,
+ config,
+ api_name,
+ )
diff --git a/benchmark/score/bench_score.py b/benchmark/score/bench_score.py
deleted file mode 100644
index 60bcea24c51..00000000000
--- a/benchmark/score/bench_score.py
+++ /dev/null
@@ -1,603 +0,0 @@
-"""
-SGLang Scoring Benchmark Script
-
-This script benchmarks SGLang's scoring API performance using HTTP requests.
-
-Current Features:
-- HTTP-only implementation (open source compatible)
-- Uses /v1/score API endpoint directly
-- Single item scoring with batching support
-- Configurable RPS, duration, and batch sizes
-- Progress tracking and detailed metrics
-- Poisson and constant request distributions
-
-Usage:
-- Update configuration variables at the top of the file
-- Ensure SGLang server is running on the configured HTTP_URL
-- Run: python bench_score.py
-- Each request will contain ITEM_COUNT_VALUES items for batch scoring
-
-"""
-
-import asyncio
-import concurrent.futures # For parallel prompt generation
-import json
-import os
-import random
-from statistics import mean
-
-import aiohttp
-import numpy as np
-from tqdm import tqdm
-from transformers import AutoTokenizer
-
-###############################################################################
-# CONFIG
-###############################################################################
-# Server Configuration
-SERVER_TYPE = "HTTP" # Fixed to HTTP for open source
-
-# HTTP Configuration
-HTTP_URL = "http://localhost:30000/v1/score" # Use score API directly
-
-# Score API Config
-# ITEM_COUNT_VALUES determines number of items per score request (batch size)
-SCORE_QUERY_TOKENS = 120
-SCORE_ITEM_TOKENS = 180
-SCORE_MODEL_PATH = "Qwen/Qwen3-0.6B"
-SCORE_LABEL_TOKEN_IDS = [9454, 2753] # Yes/No token IDs
-
-# Array of RPS values to test
-RPS_VALUES = [70]
-# Array of duration values to test
-DURATION_SECS_VALUES = [60] # Duration values in seconds
-# Array of item count values to test
-ITEM_COUNT_VALUES = [10] # Number of items per request
-# Number of unique requests to generate (will be reused)
-NUM_UNIQUE_REQUESTS = 100
-DISTRIBUTION = "POISSON" # Options: "CONSTANT", "POISSON"
-
-# Profiling Configuration
-PROFILE = False # Enable profiling with START_PROFILE/STOP_PROFILE prompts
-# Directory for profiler output
-SGLANG_TORCH_PROFILER_DIR = "/shared/user/sglang-oss-trace/remove-decode"
-if PROFILE:
- os.environ["SGLANG_TORCH_PROFILER_DIR"] = SGLANG_TORCH_PROFILER_DIR
-
-# Special token to replicate for precise token counting
-SPECIAL_REPLICATED_TOKEN = "<|im_start|>"
-
-
-###############################################################################
-# REQUEST GENERATION (in parallel)
-###############################################################################
-def prepare_all_requests_parallel(num_requests, item_count):
- """
- Generates unique requests in parallel, then reuses them to create the
- full request list. Returns a list of str prompts for HTTP.
- """
- # Load tokenizer once here to verify special token and get precise counts
- print("Loading tokenizer...")
- tokenizer = AutoTokenizer.from_pretrained(SCORE_MODEL_PATH)
-
- # Verify that our special token produces exactly 1 token
- special_token_count = len(
- tokenizer.encode(SPECIAL_REPLICATED_TOKEN, add_special_tokens=False)
- )
- print(
- f"Special token '{SPECIAL_REPLICATED_TOKEN}' produces "
- f"{special_token_count} token(s)"
- )
-
- def generate_text_with_token_count(num_toks):
- """Generate text with precise token count using replicated token."""
- if special_token_count == 1:
- # Simple case: token maps to exactly 1 token
- return SPECIAL_REPLICATED_TOKEN * num_toks
- else:
- print(
- f"Special token '{SPECIAL_REPLICATED_TOKEN}' produces more than 1 token!!!"
- )
- # Handle case where special token produces multiple tokens
- # Repeat the token enough times to get at least num_toks tokens
- repetitions = (num_toks + special_token_count - 1) // special_token_count
- text = SPECIAL_REPLICATED_TOKEN * repetitions
-
- # Verify we got the expected token count (approximately)
- actual_tokens = len(tokenizer.encode(text, add_special_tokens=False))
- if actual_tokens < num_toks:
- print(
- f"Warning: Generated {actual_tokens} tokens, "
- f"expected {num_toks}"
- )
-
- return text
-
- def build_request(index):
- """Build a single request using the shared tokenizer."""
- try:
- # Generate query and items for score API
- query = generate_text_with_token_count(SCORE_QUERY_TOKENS)
- items = [
- generate_text_with_token_count(SCORE_ITEM_TOKENS)
- for _ in range(item_count)
- ]
-
- # Return as dict for score API format
- score_data = {
- "query": query,
- "items": items,
- "label_token_ids": SCORE_LABEL_TOKEN_IDS,
- "model": SCORE_MODEL_PATH,
- }
- return (index, score_data)
-
- except Exception as e:
- print(f"Error building request {index}: {e}")
- return (index, None)
-
- # Generate only the unique requests
- unique_requests = [None] * NUM_UNIQUE_REQUESTS
-
- # Use ThreadPoolExecutor instead of ProcessPoolExecutor to avoid
- # tokenizer loading issues across processes
- max_workers = min(8, os.cpu_count() or 1) # Limit to 8 threads max
-
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
- futures = []
- for i in tqdm(
- range(NUM_UNIQUE_REQUESTS), desc="Submitting prompt generation tasks"
- ):
- future = executor.submit(build_request, i)
- futures.append(future)
-
- # Collect results as they complete
- for f in tqdm(
- concurrent.futures.as_completed(futures),
- desc="Building unique requests",
- total=NUM_UNIQUE_REQUESTS,
- ):
- try:
- index, req_data = f.result()
- if req_data is not None:
- unique_requests[index] = req_data
- else:
- print(f"Failed to build request {index}")
- except Exception as e:
- print(f"Error processing request result: {e}")
-
- # Check if we have any valid requests
- valid_requests = [req for req in unique_requests if req is not None]
- if not valid_requests:
- raise RuntimeError("Failed to generate any valid requests")
-
- print(
- f"Successfully generated {len(valid_requests)} out of "
- f"{NUM_UNIQUE_REQUESTS} unique requests"
- )
-
- # Create the full request list by cycling through unique requests
- print(
- f"Reusing {len(valid_requests)} unique requests to create "
- f"{num_requests} total requests..."
- )
- all_requests = []
- for i in tqdm(range(num_requests), desc="Reusing requests"):
- unique_index = i % len(valid_requests)
- all_requests.append(valid_requests[unique_index])
-
- print("All prompts/requests prepared.\n")
- return all_requests
-
-
-###############################################################################
-# PROFILING HELPERS
-###############################################################################
-async def send_profile_request(profile_text, item_count, session=None):
- """Send a profile request and wait for completion."""
- try:
- if session:
- print(f"Sending {profile_text} request via HTTP...")
-
- # Determine the correct endpoint
- base_url = HTTP_URL.rsplit("/", 2)[0] # Remove /v1/score
- if profile_text == "START_PROFILE":
- endpoint_url = f"{base_url}/start_profile"
- elif profile_text == "STOP_PROFILE":
- endpoint_url = f"{base_url}/stop_profile"
- else:
- print(f"Unknown profile request: {profile_text}")
- return
-
- headers = {"Content-Type": "application/json"}
-
- async with session.post(endpoint_url, headers=headers) as resp:
- resp_text = await resp.text()
- if resp.status == 200:
- print(f"{profile_text} request completed")
- else:
- print(
- f"{profile_text} request failed with status "
- f"{resp.status}: {resp_text}"
- )
- else:
- print(f"Cannot send {profile_text} request - missing session")
-
- except Exception as e:
- print(f"Error sending {profile_text} request: {e}")
-
-
-###############################################################################
-# HTTP CALLS
-###############################################################################
-def build_http_request_json(score_data):
- """Build HTTP request JSON for /v1/score endpoint.
-
- Score API format:
- {
- "query": "Generated query text with SCORE_QUERY_TOKENS tokens",
- "items": ["item1", "item2", ...], # Items to score with SCORE_ITEM_TOKENS each
- "label_token_ids": [token_id1, token_id2], # Target token IDs
- "model": "/path/to/model"
- }
-
- Args:
- score_data: A dict containing query, items, label_token_ids, and model
- """
- # score_data is already in the correct format from build_request
- return json.dumps(score_data)
-
-
-async def make_http_call(session, score_data, request_id, results_queue):
- """HTTP call to /v1/score endpoint."""
- try:
- start_time = asyncio.get_event_loop().time()
-
- request_json = build_http_request_json(score_data)
- headers = {"Content-Type": "application/json"}
-
- async with session.post(HTTP_URL, data=request_json, headers=headers) as resp:
- resp_text = await resp.text()
-
- if resp.status != 200:
- print(
- f"[HTTP] Request {request_id} failed with status "
- f"{resp.status}: {resp_text}"
- )
- completion_time = asyncio.get_event_loop().time()
- await results_queue.put((request_id, 0, False, completion_time))
- return
-
- # Parse score API response
- try:
- response_data = json.loads(resp_text)
- # Score API returns scores for each item
- # For now, just verify we got a valid response
- if "scores" in response_data or "logprobs" in response_data:
- success = True
- else:
- print(
- f"[HTTP] Request {request_id} missing expected fields in response"
- )
- success = False
- except json.JSONDecodeError:
- print(f"[HTTP] Request {request_id} failed to parse JSON response")
- success = False
-
- completion_time = asyncio.get_event_loop().time()
- elapsed_time = (completion_time - start_time) * 1000
- await results_queue.put((request_id, elapsed_time, success, completion_time))
-
- except Exception as e:
- print(f"[HTTP] Error for request {request_id}: {e}")
- completion_time = asyncio.get_event_loop().time()
- await results_queue.put((request_id, 0, False, completion_time))
-
-
-###############################################################################
-# RESULTS
-###############################################################################
-async def process_results(
- results_queue,
- num_requests,
- send_duration,
- total_duration,
- rps,
- duration_secs,
- item_count,
- test_start_time,
-):
- """Processes results and groups them by minute intervals.
- Returns a list of dictionaries, one for each minute."""
- all_results = []
-
- # Collect all results
- for _ in range(num_requests):
- result = await results_queue.get()
- request_id, elapsed_time, success, completion_time = result
- all_results.append(
- {
- "request_id": request_id,
- "elapsed_time": elapsed_time,
- "success": success,
- "completion_time": completion_time,
- }
- )
-
- # Group results by minute intervals
- minute_results = []
- num_minutes = int(duration_secs // 60) + (1 if duration_secs % 60 > 0 else 0)
-
- for minute in range(num_minutes):
- minute_start = test_start_time + (minute * 60)
- minute_end = test_start_time + ((minute + 1) * 60)
-
- # Filter results that completed in this minute
- minute_data = [
- r for r in all_results if minute_start <= r["completion_time"] < minute_end
- ]
-
- response_times = [r["elapsed_time"] for r in minute_data if r["success"]]
- successful_requests = len([r for r in minute_data if r["success"]])
- failed_requests = len([r for r in minute_data if not r["success"]])
-
- avg_response_time = mean(response_times) if response_times else 0
-
- # Calculate percentiles using numpy
- if response_times:
- p50 = np.percentile(response_times, 50)
- p90 = np.percentile(response_times, 90)
- p99 = np.percentile(response_times, 99)
- else:
- p50 = p90 = p99 = 0
-
- minute_result = {
- "test_duration_secs": duration_secs,
- "minute_interval": minute + 1,
- "target_rps": rps,
- "item_count": item_count,
- "server_type": SERVER_TYPE,
- "distribution": DISTRIBUTION,
- "unique_requests": NUM_UNIQUE_REQUESTS,
- "total_requests": len(minute_data),
- "successful_requests": successful_requests,
- "failed_requests": failed_requests,
- "send_duration_secs": send_duration,
- "total_duration_secs": total_duration,
- "avg_response_time_ms": avg_response_time,
- "p50_response_time_ms": p50,
- "p90_response_time_ms": p90,
- "p99_response_time_ms": p99,
- }
-
- minute_results.append(minute_result)
-
- print(
- f"\nMinute {minute + 1} Summary for RPS {rps}, "
- f"Duration {duration_secs}s, Item Count {item_count}:"
- )
- print(f" Requests completed in minute: {len(minute_data)}")
- print(f" Successful requests: {successful_requests}")
- print(f" Failed requests: {failed_requests}")
- print(f" Average response time: {avg_response_time:.2f} ms")
- print(f" P50 response time: {p50:.2f} ms")
- print(f" P90 response time: {p90:.2f} ms")
- print(f" P99 response time: {p99:.2f} ms")
-
- # Also print overall summary
- all_response_times = [r["elapsed_time"] for r in all_results if r["success"]]
- total_successful = len([r for r in all_results if r["success"]])
- total_failed = len([r for r in all_results if not r["success"]])
-
- overall_avg = mean(all_response_times) if all_response_times else 0
- if all_response_times:
- overall_p50 = np.percentile(all_response_times, 50)
- overall_p90 = np.percentile(all_response_times, 90)
- overall_p99 = np.percentile(all_response_times, 99)
- else:
- overall_p50 = overall_p90 = overall_p99 = 0
-
- print(
- f"\nOverall Summary for RPS {rps}, Duration {duration_secs}s, "
- f"Item Count {item_count}:"
- )
- print(f" Test duration: {duration_secs} seconds")
- print(f" Server type: {SERVER_TYPE}")
- print(f" HTTP mode: SINGLE_ITEM_SCORING")
- print(f" Target RPS: {rps}")
- print(f" Item count: {item_count}")
- print(f" Distribution: {DISTRIBUTION}")
- print(f" Unique requests generated: {NUM_UNIQUE_REQUESTS}")
- print(f" Total requests sent: {num_requests}")
- print(f" Successful requests: {total_successful}")
- print(f" Failed requests: {total_failed}")
- print(f" Time to send all requests: {send_duration:.2f} seconds")
- print(f" Time for all requests to complete: {total_duration:.2f} seconds")
- print(f" Average response time: {overall_avg:.2f} ms")
- print(f" P50 response time: {overall_p50:.2f} ms")
- print(f" P90 response time: {overall_p90:.2f} ms")
- print(f" P99 response time: {overall_p99:.2f} ms\n")
-
- return minute_results
-
-
-###############################################################################
-# MAIN
-###############################################################################
-async def run_benchmark(rps, duration_secs, item_count):
- """Run a single benchmark with the given RPS value."""
- num_requests = int(rps * duration_secs)
- print(
- f"Starting benchmark with RPS={rps}, Duration={duration_secs}s, "
- f"Item Count={item_count}, num_requests={num_requests}"
- )
- print(f"Server Type: {SERVER_TYPE}")
- print(f"HTTP Mode: SINGLE_ITEM_SCORING")
- print(f"Profiling Enabled: {PROFILE}")
-
- # Build requests in parallel (unmeasured)
- all_requests = prepare_all_requests_parallel(num_requests, item_count)
-
- results_queue = asyncio.Queue()
- tasks = []
-
- # Track timing for sending requests
- send_start_time = asyncio.get_event_loop().time()
-
- # HTTP implementation (open source only supports HTTP with /v1/score API)
- async with aiohttp.ClientSession(
- timeout=aiohttp.ClientTimeout(total=300)
- ) as session:
-
- # Send START_PROFILE if profiling is enabled
- if PROFILE:
- await send_profile_request("START_PROFILE", item_count, session=session)
-
- # Add progress bar for sending requests
- with tqdm(
- total=len(all_requests),
- desc=f"Sending HTTP score requests at {rps} RPS",
- unit="req",
- ) as pbar:
- for i, score_data in enumerate(all_requests):
- request_id = i + 1
- tasks.append(
- asyncio.create_task(
- make_http_call(session, score_data, request_id, results_queue)
- )
- )
-
- # Update progress bar
- pbar.update(1)
-
- # Throttle based on distribution
- if i < len(all_requests) - 1:
- if DISTRIBUTION == "CONSTANT":
- interval = 1 / rps
- await asyncio.sleep(interval)
- elif DISTRIBUTION == "POISSON":
- # For Poisson process, inter-arrival times follow
- # exponential distribution
- interval = random.expovariate(rps)
- await asyncio.sleep(interval)
- else:
- raise ValueError(
- f"Unknown distribution: {DISTRIBUTION}. "
- f"Use 'CONSTANT' or 'POISSON'."
- )
-
- send_end_time = asyncio.get_event_loop().time()
- send_duration = send_end_time - send_start_time
-
- # Wait for all requests to complete with progress tracking
- print(f"Waiting for {len(tasks)} HTTP score requests to complete...")
- with tqdm(
- total=len(tasks), desc="Completing HTTP score requests", unit="req"
- ) as completion_pbar:
- completed_tasks = []
- for task in asyncio.as_completed(tasks):
- await task
- completed_tasks.append(task)
- completion_pbar.update(1)
-
- # Send STOP_PROFILE if profiling is enabled
- if PROFILE:
- await send_profile_request("STOP_PROFILE", item_count, session=session)
-
- completion_end_time = asyncio.get_event_loop().time()
- total_duration = completion_end_time - send_start_time
-
- return await process_results(
- results_queue,
- num_requests,
- send_duration,
- total_duration,
- rps,
- duration_secs,
- item_count,
- send_start_time,
- )
-
-
-async def main():
- """Main function that runs benchmarks for all RPS values."""
- total_combinations = (
- len(DURATION_SECS_VALUES) * len(RPS_VALUES) * len(ITEM_COUNT_VALUES)
- )
- print(
- f"Running benchmarks for {len(DURATION_SECS_VALUES)} duration "
- f"values, {len(RPS_VALUES)} RPS values, and "
- f"{len(ITEM_COUNT_VALUES)} item count values = "
- f"{total_combinations} total combinations"
- )
- print(f"Server Type: {SERVER_TYPE}")
- print(f"HTTP Mode: SINGLE_ITEM_SCORING")
- print(f"Score API URL: {HTTP_URL}")
- print(f"Query tokens per request: {SCORE_QUERY_TOKENS}")
- print(f"Item tokens per item: {SCORE_ITEM_TOKENS}")
- print(f"Items per request (batch size): {ITEM_COUNT_VALUES}")
- print(f"Profiling Enabled: {PROFILE}")
- print(f"Duration values: {DURATION_SECS_VALUES}")
- print(f"RPS values: {RPS_VALUES}")
- print(f"Item count values: {ITEM_COUNT_VALUES}")
- print("=" * 80)
-
- all_results = []
-
- for duration_secs in DURATION_SECS_VALUES:
- for rps in RPS_VALUES:
- for item_count in ITEM_COUNT_VALUES:
- result = await run_benchmark(rps, duration_secs, item_count)
- all_results.extend(result) # Extend with minute results
-
- # Print CSV header and results
- print("\n" + "=" * 80)
- print("FINAL CSV RESULTS:")
- print("=" * 80)
-
- # CSV Header
- headers = [
- "test_duration_secs",
- "minute_interval",
- "target_rps",
- "item_count",
- "server_type",
- "distribution",
- "unique_requests",
- "total_requests",
- "successful_requests",
- "failed_requests",
- "send_duration_secs",
- "total_duration_secs",
- "avg_response_time_ms",
- "p50_response_time_ms",
- "p90_response_time_ms",
- "p99_response_time_ms",
- ]
- print(",".join(headers))
-
- # CSV Data
- for result in all_results:
- row = [
- result["test_duration_secs"],
- result["minute_interval"],
- result["target_rps"],
- result["item_count"],
- result["server_type"],
- result["distribution"],
- result["unique_requests"],
- result["total_requests"],
- result["successful_requests"],
- result["failed_requests"],
- f"{result['send_duration_secs']:.2f}",
- f"{result['total_duration_secs']:.2f}",
- f"{result['avg_response_time_ms']:.2f}",
- f"{result['p50_response_time_ms']:.2f}",
- f"{result['p90_response_time_ms']:.2f}",
- f"{result['p99_response_time_ms']:.2f}",
- ]
- print(",".join(map(str, row)))
-
-
-if __name__ == "__main__":
- asyncio.run(main())
diff --git a/docker/Dockerfile b/docker/Dockerfile
index fd6b181bfae..3b9a420b31f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,7 +1,8 @@
-ARG CUDA_VERSION=12.6.1
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
+ARG CUDA_VERSION=12.9.1
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 as base
ARG BUILD_TYPE=all
+ARG BRANCH_TYPE=remote
ARG DEEPEP_COMMIT=b92d0d4860ce6866cd6d31bfbae937f9a7a3772b
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
ENV DEBIAN_FRONTEND=noninteractive \
@@ -35,7 +36,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
ibverbs-providers infiniband-diags perftest \
libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \
libboost-all-dev libssl-dev \
- libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc \
+ libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler protobuf-compiler-grpc \
pybind11-dev \
libhiredis-dev libcurl4-openssl-dev \
libczmq4 libczmq-dev \
@@ -58,10 +59,21 @@ RUN mkdir -p /tmp/gdrcopy && cd /tmp \
# Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
-# Clone and install SGLang
+FROM scratch AS local_src
+COPY . /src
+
+FROM base AS build-image
+# Install SGLang
WORKDIR /sgl-workspace
+ARG BRANCH_TYPE
+COPY --from=local_src /src /tmp/local_src
+RUN if [ "$BRANCH_TYPE" = "local" ]; then \
+ cp -r /tmp/local_src /sgl-workspace/sglang; \
+ else \
+ git clone --depth=1 https://github.com/sgl-project/sglang.git /sgl-workspace/sglang; \
+ fi \
+ && rm -rf /tmp/local_src
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \
- && git clone --depth=1 https://github.com/sgl-project/sglang.git \
&& cd sglang \
&& case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
@@ -72,17 +84,15 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
&& python3 -m flashinfer --download-cubin \
- && if [ "$CUDA_VERSION" = "12.8.1" ]; then \
- python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.5/sgl_kernel-0.3.5+cu128-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
- fi \
- && if [ "$CUDA_VERSION" = "12.9.1" ]; then \
- python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.5/sgl_kernel-0.3.5+cu129-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
+ && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
+ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.9.post2/sgl_kernel-0.3.9.post2+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
fi
# Download source files
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
git clone https://github.com/deepseek-ai/DeepEP.git && \
- cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. && \
+ cd DeepEP && git checkout ${DEEPEP_COMMIT} && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
+ cd .. && \
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
mv nvshmem_src nvshmem && \
rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
@@ -127,7 +137,8 @@ RUN python3 -m pip install --no-cache-dir \
uv \
wheel \
scikit-build-core \
- nixl
+ nixl \
+ py-spy
# Install development tools and utilities
RUN apt-get update && apt-get install -y \
@@ -204,6 +215,19 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.31.1/cmake-3.31.1
&& cp -r cmake-3.31.1-linux-x86_64/share/* /usr/local/share/ \
&& rm -rf cmake-3.31.1-linux-x86_64 cmake-3.31.1-linux-x86_64.tar.gz
+# Install Rust toolchain for sgl-router
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
+ && rustc --version && cargo --version
+
+# Build and install sgl-router
+RUN python3 -m pip install --no-cache-dir setuptools-rust \
+ && cd /sgl-workspace/sglang/sgl-router \
+ && cargo build --release \
+ && python3 -m pip install --no-cache-dir . \
+ && rm -rf /root/.cache
+
+
# Add yank script
COPY --chown=root:root <<-"EOF" /usr/local/bin/yank
#!/bin/bash
diff --git a/docker/Dockerfile.gb200 b/docker/Dockerfile.gb200
index d0e2848cf6d..164326e2323 100644
--- a/docker/Dockerfile.gb200
+++ b/docker/Dockerfile.gb200
@@ -4,6 +4,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
ARG BUILD_TYPE=blackwell
ARG DEEPEP_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
+ARG SGL_KERNEL_VERSION=0.3.9.post2
ENV DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
@@ -61,11 +62,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
12.9.1) CUINDEX=129 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \
- && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& if [ "$CUDA_VERSION" = "12.9.1" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \
- python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.4/sgl_kernel-0.3.4+cu129-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
- fi
+ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu129-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
+ fi \
+ && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
+ && python3 -m flashinfer --download-cubin
# Download source files
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
@@ -85,7 +87,7 @@ RUN cd /sgl-workspace/nvshmem && \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
- cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" && \
+ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="90;100;120" && \
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
# Install DeepEP
@@ -105,11 +107,6 @@ RUN python3 -m pip install --no-cache-dir \
wheel \
scikit-build-core
-# These will be automatically installed by future versions of flashinfer after 0.2.9rc2
-RUN python3 -m pip install --no-cache-dir \
- nvidia-cudnn-cu12 \
- nvidia-cudnn-frontend
-
# Install nixl kv transfer backend
RUN python3 -m pip install --no-cache-dir \
nixl
diff --git a/docker/Dockerfile.npu b/docker/Dockerfile.npu
index 8ab690ec28c..3f9b0ae425d 100644
--- a/docker/Dockerfile.npu
+++ b/docker/Dockerfile.npu
@@ -39,7 +39,11 @@ RUN apt-get update -y && apt upgrade -y && apt-get install -y \
clang \
locales \
ccache \
+ openssl \
+ libssl-dev \
+ pkg-config \
ca-certificates \
+ protobuf-compiler \
&& rm -rf /var/cache/apt/* \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates \
@@ -48,11 +52,18 @@ RUN apt-get update -y && apt upgrade -y && apt-get install -y \
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8
+ENV PATH="/root/.cargo/bin:${PATH}"
# Install dependencies
# TODO: install from pypi released memfabric
RUN pip install $MEMFABRIC_URL --no-cache-dir
+RUN pip install setuptools-rust wheel build --no-cache-dir
+
+# install rustup from rustup.rs
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
+ && rustc --version && cargo --version && protoc --version
+
# Install vLLM
RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \
(cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir) && rm -rf vllm
@@ -65,7 +76,9 @@ RUN pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --inde
# Install SGLang
RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
- (cd sglang/python && pip install -v .[srt_npu] --no-cache-dir) && rm -rf sglang
+ (cd sglang/python && pip install -v .[srt_npu] --no-cache-dir) && \
+ (cd sglang/sgl-router && python -m build && pip install --force-reinstall dist/*.whl) && \
+ rm -rf sglang
# Install Deep-ep
RUN git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm
index 2111fb35bcf..2c3c9c0bedb 100644
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@@ -1,33 +1,48 @@
# Usage (to build SGLang ROCm docker image):
-# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx942 -t v0.4.9.post1-rocm630-mi30x -f Dockerfile.rocm .
-# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx950 -t v0.4.9.post1-rocm700-mi35x -f Dockerfile.rocm .
+# docker build --build-arg SGL_BRANCH=v0.5.2 --build-arg GPU_ARCH=gfx942 -t v0.5.2-rocm630-mi30x -f Dockerfile.rocm .
+# docker build --build-arg SGL_BRANCH=v0.5.2 --build-arg GPU_ARCH=gfx942-rocm700 -t v0.5.2-rocm700-mi30x -f Dockerfile.rocm .
+# docker build --build-arg SGL_BRANCH=v0.5.2 --build-arg GPU_ARCH=gfx950 -t v0.5.2-rocm700-mi35x -f Dockerfile.rocm .
+
# Default base images
-ARG BASE_IMAGE_950="rocm/sgl-dev:rocm7.0_preview_ubuntu_22.04_vllm_0.9.2_mi35X_prealpha"
ARG BASE_IMAGE_942="rocm/sgl-dev:vllm20250114"
+ARG BASE_IMAGE_942_ROCM700="rocm/sgl-dev:rocm7-vllm-20250904"
+ARG BASE_IMAGE_950="rocm/sgl-dev:rocm7-vllm-20250904"
# This is necessary for scope purpose
ARG GPU_ARCH=gfx950
# ===============================
-# Base image 942 and args
+# Base image 942 with rocm630 and args
FROM $BASE_IMAGE_942 AS gfx942
ENV BUILD_VLLM="0"
ENV BUILD_TRITON="1"
ENV BUILD_LLVM="0"
ENV BUILD_AITER_ALL="1"
+ENV BUILD_MOONCAKE="1"
ENV AITER_COMMIT="v0.1.4"
ENV NO_DEPS_FLAG=""
+# ===============================
+# Base image 942 and args
+FROM $BASE_IMAGE_942_ROCM700 AS gfx942-rocm700
+ENV BUILD_VLLM="0"
+ENV BUILD_TRITON="0"
+ENV BUILD_LLVM="0"
+ENV BUILD_AITER_ALL="1"
+ENV BUILD_MOONCAKE="1"
+ENV AITER_COMMIT="v0.1.5"
+ENV NO_DEPS_FLAG=""
+
# ===============================
# Base image 950 and args
FROM $BASE_IMAGE_950 AS gfx950
ENV BUILD_VLLM="0"
ENV BUILD_TRITON="0"
+ENV BUILD_LLVM="0"
ENV BUILD_AITER_ALL="1"
-ENV BUILD_LLVM="1"
-ENV AITER_COMMIT="v0.1.4"
-ENV HIP_CLANG_PATH="/sgl-workspace/llvm-project/build/bin/"
+ENV BUILD_MOONCAKE="1"
+ENV AITER_COMMIT="v0.1.5"
ENV NO_DEPS_FLAG="--no-deps"
# ===============================
@@ -36,7 +51,7 @@ FROM ${GPU_ARCH}
# This is necessary for scope purpose, again
ARG GPU_ARCH=gfx950
-ENV GPU_ARCH_LIST=${GPU_ARCH:-${PYTORCH_ROCM_ARCH}}
+ENV GPU_ARCH_LIST=${GPU_ARCH%-*}
ARG SGL_REPO="https://github.com/sgl-project/sglang.git"
ARG SGL_DEFAULT="main"
@@ -51,6 +66,9 @@ ARG LLVM_REPO="https://github.com/jrbyrnes/llvm-project.git"
ARG LLVM_BRANCH="MainOpSelV2"
ARG LLVM_COMMIT="6520ace8227ffe2728148d5f3b9872a870b0a560"
+ARG MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git"
+ARG MOONCAKE_COMMIT="dcdf1c784b40aa6975a8ed89fe26321b028e40e8"
+
USER root
# Install some basic utilities
@@ -62,6 +80,7 @@ WORKDIR /sgl-workspace
# -----------------------
# llvm
RUN if [ "$BUILD_LLVM" = "1" ]; then \
+ ENV HIP_CLANG_PATH="/sgl-workspace/llvm-project/build/bin/" \
git clone --single-branch ${LLVM_REPO} -b ${LLVM_BRANCH} \
&& cd llvm-project \
&& git checkout ${LLVM_COMMIT} \
@@ -113,6 +132,30 @@ RUN if [ "$BUILD_VLLM" = "1" ]; then \
&& python setup.py develop; \
fi
+# -----------------------
+# Build Mooncake
+ENV PATH=$PATH:/usr/local/go/bin
+
+RUN if [ "$BUILD_MOONCAKE" = "1" ]; then \
+ apt update && apt install -y zip unzip wget && \
+ apt install -y gcc make libtool autoconf librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils perftest ethtool libibverbs-dev rdma-core && \
+ apt install -y openssh-server openmpi-bin openmpi-common libopenmpi-dev && \
+ git clone ${MOONCAKE_REPO} && \
+ cd Mooncake && \
+ git checkout ${MOONCAKE_COMMIT} && \
+ git submodule update --init --recursive && \
+ bash dependencies.sh -y && \
+ rm -rf /usr/local/go && \
+ wget https://go.dev/dl/go1.22.2.linux-amd64.tar.gz && \
+ tar -C /usr/local -xzf go1.22.2.linux-amd64.tar.gz && \
+ rm go1.22.2.linux-amd64.tar.gz && \
+ mkdir -p build && \
+ cd build && \
+ cmake .. -DUSE_ETCD=ON && \
+ make -j "$(nproc)" && make install; \
+ fi
+
+
# -----------------------
# Build SGLang
ARG BUILD_TYPE=all
@@ -120,7 +163,7 @@ ARG BUILD_TYPE=all
RUN pip install IPython \
&& pip install orjson \
&& pip install python-multipart \
- && pip install torchao \
+ && pip install torchao==0.9.0 \
&& pip install pybind11
RUN pip uninstall -y sgl_kernel sglang
diff --git a/docker/Dockerfile.router b/docker/Dockerfile.router
index 07633e50230..ded98bb8aeb 100644
--- a/docker/Dockerfile.router
+++ b/docker/Dockerfile.router
@@ -39,13 +39,13 @@ ENV PATH="/root/.cargo/bin:${PATH}"
# install dependencies
RUN apt update -y \
- && apt install -y git build-essential libssl-dev pkg-config \
+ && apt install -y git build-essential libssl-dev pkg-config protobuf-compiler \
&& rm -rf /var/lib/apt/lists/* \
&& apt clean
# install rustup from rustup.rs
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
- && rustc --version && cargo --version
+ && rustc --version && cargo --version && protoc --version
# pull the github repository
RUN cd /opt \
diff --git a/docker/Dockerfile.xeon b/docker/Dockerfile.xeon
index 087e12ccaef..fdc439b3096 100644
--- a/docker/Dockerfile.xeon
+++ b/docker/Dockerfile.xeon
@@ -31,8 +31,7 @@ ENV PIP_ROOT_USER_ACTION=ignore
ENV CONDA_PREFIX=/sgl-workspace/miniforge3
RUN pip config set global.index-url https://download.pytorch.org/whl/cpu && \
- pip config set global.extra-index-url https://pypi.org/simple && \
- pip install intel-openmp
+ pip config set global.extra-index-url https://pypi.org/simple
RUN git clone https://github.com/sgl-project/sglang.git && \
cd sglang && \
@@ -41,7 +40,7 @@ RUN git clone https://github.com/sgl-project/sglang.git && \
pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \
cd sgl-kernel && \
cp pyproject_cpu.toml pyproject.toml && \
- pip install -v .
+ pip install .
ENV SGLANG_USE_CPU_ENGINE=1
ENV LD_PRELOAD=/sgl-workspace/miniforge3/lib/libiomp5.so:/sgl-workspace/miniforge3/lib/libtcmalloc.so:/sgl-workspace/miniforge3/lib/libtbbmalloc.so.2
diff --git a/docs/advanced_features/lora.ipynb b/docs/advanced_features/lora.ipynb
index 708508134c9..1925baffcdb 100644
--- a/docs/advanced_features/lora.ipynb
+++ b/docs/advanced_features/lora.ipynb
@@ -29,7 +29,7 @@
"\n",
"* `enable_lora`: Enable LoRA support for the model. This argument is automatically set to True if `--lora-paths` is provided for backward compatibility.\n",
"\n",
- "* `lora_paths`: A mapping from each adaptor's name to its path, in the form of `{name}={path} {name}={path}`.\n",
+ "* `lora_paths`: The list of LoRA adapters to load. Each adapter must be specified in one of the following formats: | = | JSON with schema {\"lora_name\":str,\"lora_path\":str,\"pinned\":bool}.\n",
"\n",
"* `max_loras_per_batch`: Maximum number of adaptors used by each batch. This argument can affect the amount of GPU memory reserved for multi-LoRA serving, so it should be set to a smaller value when memory is scarce. Defaults to be 8.\n",
"\n",
@@ -80,6 +80,7 @@
" --enable-lora \\\n",
" --lora-paths lora0=algoprog/fact-generation-llama-3.1-8b-instruct-lora \\\n",
" --max-loras-per-batch 1 --lora-backend triton \\\n",
+ " --log-level warning \\\n",
"\"\"\"\n",
")\n",
"\n",
@@ -139,6 +140,7 @@
" --lora-paths lora0=algoprog/fact-generation-llama-3.1-8b-instruct-lora \\\n",
" lora1=Nutanix/Meta-Llama-3.1-8B-Instruct_lora_4_alpha_16 \\\n",
" --max-loras-per-batch 2 --lora-backend triton \\\n",
+ " --log-level warning \\\n",
"\"\"\"\n",
")\n",
"\n",
@@ -215,6 +217,7 @@
" --max-loras-per-batch 2 --lora-backend triton \\\n",
" --max-lora-rank 256\n",
" --lora-target-modules all\n",
+ " --log-level warning\n",
" \"\"\"\n",
")\n",
"\n",
@@ -372,6 +375,15 @@
"print(f\"Output from lora1 (updated): \\n{response.json()[1]['text']}\\n\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "terminate_process(server_process)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -387,7 +399,41 @@
"\n",
"This can improve performance in scenarios where the same adapter is frequently used across requests, by avoiding repeated memory transfers and reinitialization overhead. However, since GPU pool slots are limited, pinning adapters reduces the flexibility of the system to dynamically load other adapters on demand. If too many adapters are pinned, it may lead to degraded performance, or in the most extreme case (`Number of pinned adapters == max-loras-per-batch`), halt all unpinned requests. Therefore, currently SGLang limits maximal number of pinned adapters to `max-loras-per-batch - 1` to prevent unexpected starvations. \n",
"\n",
- "In the example below, we unload `lora1` and reload it as a `pinned` adapter:"
+ "In the example below, we start a server with `lora1` loaded as pinned, `lora2` and `lora3` loaded as regular (unpinned) adapters. Please note that, we intentionally specify `lora2` and `lora3` in two different formats to demonstrate that both are supported."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "server_process, port = launch_server_cmd(\n",
+ " \"\"\"\n",
+ " python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
+ " --enable-lora \\\n",
+ " --cuda-graph-max-bs 8 \\\n",
+ " --max-loras-per-batch 3 --lora-backend triton \\\n",
+ " --max-lora-rank 256 \\\n",
+ " --lora-target-modules all \\\n",
+ " --lora-paths \\\n",
+ " {\"lora_name\":\"lora0\",\"lora_path\":\"Nutanix/Meta-Llama-3.1-8B-Instruct_lora_4_alpha_16\",\"pinned\":true} \\\n",
+ " {\"lora_name\":\"lora1\",\"lora_path\":\"algoprog/fact-generation-llama-3.1-8b-instruct-lora\"} \\\n",
+ " lora2=philschmid/code-llama-3-1-8b-text-to-sql-lora\n",
+ " --log-level warning\n",
+ " \"\"\"\n",
+ ")\n",
+ "\n",
+ "\n",
+ "url = f\"http://127.0.0.1:{port}\"\n",
+ "wait_for_server(url)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also specify adapter as pinned during dynamic adapter loading. In the example below, we reload `lora2` as pinned adapter:"
]
},
{
@@ -407,7 +453,7 @@
" url + \"/load_lora_adapter\",\n",
" json={\n",
" \"lora_name\": \"lora1\",\n",
- " \"lora_path\": lora1,\n",
+ " \"lora_path\": \"algoprog/fact-generation-llama-3.1-8b-instruct-lora\",\n",
" \"pinned\": True, # Pin the adapter to GPU\n",
" },\n",
")"
@@ -417,7 +463,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Verify that the result is identical as before:"
+ "Verify that the results are expected:"
]
},
{
@@ -431,17 +477,19 @@
" \"text\": [\n",
" \"List 3 countries and their capitals.\",\n",
" \"List 3 countries and their capitals.\",\n",
+ " \"List 3 countries and their capitals.\",\n",
" ],\n",
" \"sampling_params\": {\"max_new_tokens\": 32, \"temperature\": 0},\n",
" # The first input uses lora0, and the second input uses lora1\n",
- " \"lora_path\": [\"lora0\", \"lora1\"],\n",
+ " \"lora_path\": [\"lora0\", \"lora1\", \"lora2\"],\n",
"}\n",
"response = requests.post(\n",
" url + \"/generate\",\n",
" json=json_data,\n",
")\n",
- "print(f\"Output from lora0: \\n{response.json()[0]['text']}\\n\")\n",
- "print(f\"Output from lora1 (pinned): \\n{response.json()[1]['text']}\\n\")"
+ "print(f\"Output from lora0 (pinned): \\n{response.json()[0]['text']}\\n\")\n",
+ "print(f\"Output from lora1 (pinned): \\n{response.json()[1]['text']}\\n\")\n",
+ "print(f\"Output from lora2 (not pinned): \\n{response.json()[2]['text']}\\n\")"
]
},
{
diff --git a/docs/advanced_features/pd_disaggregation.md b/docs/advanced_features/pd_disaggregation.md
index f7cc0adafe2..85a5db07e84 100644
--- a/docs/advanced_features/pd_disaggregation.md
+++ b/docs/advanced_features/pd_disaggregation.md
@@ -36,7 +36,7 @@ uv pip install mooncake-transfer-engine
```bash
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-ib-device mlx5_roce0
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-ib-device mlx5_roce0
-$ python -m sglang.srt.disaggregation.mini_lb --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
+$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
```
### DeepSeek Multi-Node
@@ -100,7 +100,7 @@ pip install . --config-settings=setup-args="-Ducx_path=/path/to/ucx"
```bash
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-transfer-backend nixl
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-transfer-backend nixl
-$ python -m sglang.srt.disaggregation.mini_lb --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
+$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
```
### DeepSeek Multi-Node
@@ -137,7 +137,7 @@ export ENABLE_ASCEND_TRANSFER_WITH_MOONCAKE=true
```bash
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode prefill --disaggregation-transfer-backend ascend
$ python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disaggregation-mode decode --port 30001 --base-gpu-id 1 --disaggregation-transfer-backend ascend
-$ python -m sglang.srt.disaggregation.mini_lb --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
+$ python -m sglang_router.launch_router --pd-disaggregation --prefill http://127.0.0.1:30000 --decode http://127.0.0.1:30001 --host 0.0.0.0 --port 8000
```
### DeepSeek Multi-Node
diff --git a/docs/advanced_features/router.md b/docs/advanced_features/router.md
index 555a0bc4b6c..4aba99f3712 100644
--- a/docs/advanced_features/router.md
+++ b/docs/advanced_features/router.md
@@ -278,7 +278,7 @@ The most sophisticated policy that combines cache optimization with load balanci
3. **Cache Management**:
- Maintains approximate radix trees per worker
- - Periodically evicts LRU entries based on `--eviction-interval` and `--max-tree-size`
+ - Periodically evicts LRU entries based on `--eviction-interval-secs` and `--max-tree-size`
### Data Parallelism Aware Routing
@@ -296,7 +296,7 @@ This mode coordinates with SGLang's DP controller for optimized request distribu
### Core Settings
| Parameter | Type | Default | Description |
-|-----------------------------|------|-------------|-----------------------------------------------------------------|
+| --------------------------- | ---- | ----------- | --------------------------------------------------------------- |
| `--host` | str | 127.0.0.1 | Router server host address |
| `--port` | int | 30000 | Router server port |
| `--worker-urls` | list | [] | Worker URLs for separate launch mode |
@@ -307,18 +307,18 @@ This mode coordinates with SGLang's DP controller for optimized request distribu
### Cache-Aware Routing Parameters
-| Parameter | Type | Default | Description |
-|---------------------------|-------|----------|--------------------------------------------------------|
-| `--cache-threshold` | float | 0.5 | Minimum prefix match ratio for cache routing (0.0-1.0) |
-| `--balance-abs-threshold` | int | 32 | Absolute load difference threshold |
-| `--balance-rel-threshold` | float | 1.0001 | Relative load ratio threshold |
-| `--eviction-interval` | int | 60 | Seconds between cache eviction cycles |
-| `--max-tree-size` | int | 16777216 | Maximum nodes in routing tree |
+| Parameter | Type | Default | Description |
+| -------------------------- | ----- | -------- | ------------------------------------------------------ |
+| `--cache-threshold` | float | 0.5 | Minimum prefix match ratio for cache routing (0.0-1.0) |
+| `--balance-abs-threshold` | int | 32 | Absolute load difference threshold |
+| `--balance-rel-threshold` | float | 1.0001 | Relative load ratio threshold |
+| `--eviction-interval-secs` | int | 60 | Seconds between cache eviction cycles |
+| `--max-tree-size` | int | 16777216 | Maximum nodes in routing tree |
### Fault Tolerance Parameters
| Parameter | Type | Default | Description |
-|------------------------------|-------|---------|---------------------------------------|
+| ---------------------------- | ----- | ------- | ------------------------------------- |
| `--retry-max-retries` | int | 3 | Maximum retry attempts per request |
| `--retry-initial-backoff-ms` | int | 100 | Initial retry backoff in milliseconds |
| `--retry-max-backoff-ms` | int | 10000 | Maximum retry backoff in milliseconds |
@@ -334,7 +334,7 @@ This mode coordinates with SGLang's DP controller for optimized request distribu
### Prefill-Decode Disaggregation Parameters
| Parameter | Type | Default | Description |
-|-----------------------------------|------|---------|-------------------------------------------------------|
+| --------------------------------- | ---- | ------- | ----------------------------------------------------- |
| `--pd-disaggregation` | flag | False | Enable PD disaggregated mode |
| `--prefill` | list | [] | Prefill server URLs with optional bootstrap ports |
| `--decode` | list | [] | Decode server URLs |
@@ -346,7 +346,7 @@ This mode coordinates with SGLang's DP controller for optimized request distribu
### Kubernetes Integration
| Parameter | Type | Default | Description |
-|---------------------------------|------|--------------------------|------------------------------------------------------|
+| ------------------------------- | ---- | ------------------------ | ---------------------------------------------------- |
| `--service-discovery` | flag | False | Enable Kubernetes service discovery |
| `--selector` | list | [] | Label selector for workers (key1=value1 key2=value2) |
| `--prefill-selector` | list | [] | Label selector for prefill servers in PD mode |
@@ -358,7 +358,7 @@ This mode coordinates with SGLang's DP controller for optimized request distribu
### Observability
| Parameter | Type | Default | Description |
-|------------------------|------|-----------|-------------------------------------------------------|
+| ---------------------- | ---- | --------- | ----------------------------------------------------- |
| `--prometheus-port` | int | 29000 | Prometheus metrics port |
| `--prometheus-host` | str | 127.0.0.1 | Prometheus metrics host |
| `--log-dir` | str | None | Directory for log files |
@@ -368,7 +368,7 @@ This mode coordinates with SGLang's DP controller for optimized request distribu
### CORS Configuration
| Parameter | Type | Default | Description |
-|--------------------------|------|---------|----------------------|
+| ------------------------ | ---- | ------- | -------------------- |
| `--cors-allowed-origins` | list | [] | Allowed CORS origins |
## Advanced Features
@@ -429,7 +429,7 @@ python -m sglang_router.launch_router \
2. **High latency**: Check if cache-aware routing is causing imbalance. Try adjusting `--balance-abs-threshold` and `--balance-rel-threshold`.
-3. **Memory growth**: Reduce `--max-tree-size` or decrease `--eviction-interval` for more aggressive cache cleanup.
+3. **Memory growth**: Reduce `--max-tree-size` or decrease `--eviction-interval-secs` for more aggressive cache cleanup.
4. **Circuit breaker triggering frequently**: Increase `--cb-failure-threshold` or extend `--cb-window-duration-secs`.
diff --git a/docs/advanced_features/separate_reasoning.ipynb b/docs/advanced_features/separate_reasoning.ipynb
index 83124cf4974..0c20c5a08bd 100644
--- a/docs/advanced_features/separate_reasoning.ipynb
+++ b/docs/advanced_features/separate_reasoning.ipynb
@@ -13,10 +13,11 @@
"| Model | Reasoning tags | Parser | Notes |\n",
"|---------|-----------------------------|------------------|-------|\n",
"| [DeepSeek‑R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `` … `` | `deepseek-r1` | Supports all variants (R1, R1-0528, R1-Distill) |\n",
+ "| [DeepSeek‑V3.1](https://huggingface.co/deepseek-ai/DeepSeek-V3.1) | `` … `` | `deepseek-v3` | Supports `thinking` parameter |\n",
"| [Standard Qwen3 models](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `` … `` | `qwen3` | Supports `enable_thinking` parameter |\n",
"| [Qwen3-Thinking models](https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507) | `` … `` | `qwen3` or `qwen3-thinking` | Always generates thinking content |\n",
"| [Kimi models](https://huggingface.co/moonshotai/models) | `◁think▷` … `◁/think▷` | `kimi` | Uses special thinking delimiters |\n",
- "\n",
+ "| [GPT OSS](https://huggingface.co/openai/gpt-oss-120b) | `<\\|channel\\|>analysis<\\|message\\|>` … `<\\|end\\|>` | `gpt-oss` | N/A |\n",
"### Model-Specific Behaviors\n",
"\n",
"**DeepSeek-R1 Family:**\n",
@@ -24,12 +25,18 @@
"- DeepSeek-R1-0528: Generates both `` start and `` end tags\n",
"- Both are handled by the same `deepseek-r1` parser\n",
"\n",
+ "**DeepSeek-V3 Family:**\n",
+ "- DeepSeek-V3.1: Hybrid model supporting both thinking and non-thinking modes, use the `deepseek-v3` parser and `thinking` parameter (NOTE: not `enable_thinking`)\n",
+ "\n",
"**Qwen3 Family:**\n",
"- Standard Qwen3 (e.g., Qwen3-2507): Use `qwen3` parser, supports `enable_thinking` in chat templates\n",
"- Qwen3-Thinking (e.g., Qwen3-235B-A22B-Thinking-2507): Use `qwen3` or `qwen3-thinking` parser, always thinks\n",
"\n",
"**Kimi:**\n",
- "- Kimi: Uses special `◁think▷` and `◁/think▷` tags"
+ "- Kimi: Uses special `◁think▷` and `◁/think▷` tags\n",
+ "\n",
+ "**GPT OSS:**\n",
+ "- GPT OSS: Uses special `<|channel|>analysis<|message|>` and `<|end|>` tags"
]
},
{
@@ -60,7 +67,7 @@
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --reasoning-parser deepseek-r1\"\n",
+ " \"python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --reasoning-parser deepseek-r1 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")"
@@ -196,7 +203,7 @@
" if chunk.choices[0].delta.content:\n",
" content += chunk.choices[0].delta.content\n",
" if chunk.choices[0].delta.reasoning_content:\n",
- " reasoning_content = chunk.choices[0].delta.reasoning_content\n",
+ " reasoning_content += chunk.choices[0].delta.reasoning_content\n",
"\n",
"print_highlight(\"==== Reasoning ====\")\n",
"print_highlight(reasoning_content)\n",
@@ -306,7 +313,7 @@
"outputs": [],
"source": [
"import sglang as sgl\n",
- "from sglang.srt.reasoning_parser import ReasoningParser\n",
+ "from sglang.srt.parser.reasoning_parser import ReasoningParser\n",
"from sglang.utils import print_highlight\n",
"\n",
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
@@ -354,92 +361,6 @@
"\n",
"For future reasoning models, you can implement the reasoning parser as a subclass of `BaseReasoningFormatDetector` in `python/sglang/srt/reasoning_parser.py` and specify the reasoning parser for new reasoning model schemas accordingly."
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "```python\n",
- "class DeepSeekR1Detector(BaseReasoningFormatDetector):\n",
- " \"\"\"\n",
- " Detector for DeepSeek-R1 family models.\n",
- " \n",
- " Supported models:\n",
- " - DeepSeek-R1: Always generates thinking content without start tag\n",
- " - DeepSeek-R1-0528: Generates thinking content with start tag\n",
- " \n",
- " This detector handles both patterns automatically.\n",
- " \"\"\"\n",
- "\n",
- " def __init__(self, stream_reasoning: bool = True):\n",
- " super().__init__(\"\", \"\", force_reasoning=True, stream_reasoning=stream_reasoning)\n",
- "\n",
- "\n",
- "class Qwen3Detector(BaseReasoningFormatDetector):\n",
- " \"\"\"\n",
- " Detector for standard Qwen3 models that support enable_thinking parameter.\n",
- " \n",
- " These models can switch between thinking and non-thinking modes:\n",
- " - enable_thinking=True: Generates ... tags\n",
- " - enable_thinking=False: No thinking content generated\n",
- " \"\"\"\n",
- "\n",
- " def __init__(self, stream_reasoning: bool = True):\n",
- " super().__init__(\"\", \"\", force_reasoning=False, stream_reasoning=stream_reasoning)\n",
- "\n",
- "\n",
- "class Qwen3ThinkingDetector(BaseReasoningFormatDetector):\n",
- " \"\"\"\n",
- " Detector for Qwen3-Thinking models (e.g., Qwen3-235B-A22B-Thinking-2507).\n",
- " \n",
- " These models always generate thinking content without start tag.\n",
- " They do not support the enable_thinking parameter.\n",
- " \"\"\"\n",
- "\n",
- " def __init__(self, stream_reasoning: bool = True):\n",
- " super().__init__(\"\", \"\", force_reasoning=True, stream_reasoning=stream_reasoning)\n",
- "\n",
- "\n",
- "class ReasoningParser:\n",
- " \"\"\"\n",
- " Parser that handles both streaming and non-streaming scenarios.\n",
- " \n",
- " Usage:\n",
- " # For standard Qwen3 models with enable_thinking support\n",
- " parser = ReasoningParser(\"qwen3\")\n",
- " \n",
- " # For Qwen3-Thinking models that always think\n",
- " parser = ReasoningParser(\"qwen3-thinking\")\n",
- " \"\"\"\n",
- "\n",
- " DetectorMap: Dict[str, Type[BaseReasoningFormatDetector]] = {\n",
- " \"deepseek-r1\": DeepSeekR1Detector,\n",
- " \"qwen3\": Qwen3Detector,\n",
- " \"qwen3-thinking\": Qwen3ThinkingDetector,\n",
- " \"kimi\": KimiDetector,\n",
- " }\n",
- "\n",
- " def __init__(self, model_type: str = None, stream_reasoning: bool = True):\n",
- " if not model_type:\n",
- " raise ValueError(\"Model type must be specified\")\n",
- "\n",
- " detector_class = self.DetectorMap.get(model_type.lower())\n",
- " if not detector_class:\n",
- " raise ValueError(f\"Unsupported model type: {model_type}\")\n",
- "\n",
- " self.detector = detector_class(stream_reasoning=stream_reasoning)\n",
- "\n",
- " def parse_non_stream(self, full_text: str) -> Tuple[str, str]:\n",
- " \"\"\"Returns (reasoning_text, normal_text)\"\"\"\n",
- " ret = self.detector.detect_and_parse(full_text)\n",
- " return ret.reasoning_text, ret.normal_text\n",
- "\n",
- " def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, str]:\n",
- " \"\"\"Returns (reasoning_text, normal_text) for the current chunk\"\"\"\n",
- " ret = self.detector.parse_streaming_increment(chunk_text)\n",
- " return ret.reasoning_text, ret.normal_text\n",
- "```"
- ]
}
],
"metadata": {
diff --git a/docs/advanced_features/server_arguments.md b/docs/advanced_features/server_arguments.md
index c63b8a604b7..873fa8b0520 100644
--- a/docs/advanced_features/server_arguments.md
+++ b/docs/advanced_features/server_arguments.md
@@ -121,21 +121,23 @@ Please consult the documentation below and [server_args.py](https://github.com/s
## Logging
-| Arguments | Description | Defaults |
-|-----------|-------------|----------|
-| `--log-level` | The logging level of all loggers. | info |
-| `--log-level-http` | The logging level of HTTP server. If not set, reuse --log-level by default. | None |
-| `--log-requests` | Log metadata, inputs, outputs of all requests. The verbosity is decided by --log-requests-level. | False |
-| `--log-requests-level` | 0: Log metadata (no sampling parameters). 1: Log metadata and sampling parameters. 2: Log metadata, sampling parameters and partial input/output. 3: Log every input/output. | 0 |
-| `--show-time-cost` | Show time cost of custom marks. | False |
-| `--enable-metrics` | Enable log prometheus metrics. | False |
-| `--bucket-time-to-first-token` | The buckets of time to first token, specified as a list of floats. | None |
-| `--bucket-inter-token-latency` | The buckets of inter-token latency, specified as a list of floats. | None |
-| `--bucket-e2e-request-latency` | The buckets of end-to-end request latency, specified as a list of floats. | None |
-| `--collect-tokens-histogram` | Collect prompt/generation tokens histogram. | False |
-| `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None |
-| `--decode-log-interval` | The log interval of decode batch. | 40 |
-| `--enable-request-time-stats-logging` | Enable per request time stats logging. | False |
+| Arguments | Description | Defaults |
+|---------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
+| `--log-level` | The logging level of all loggers. | info |
+| `--log-level-http` | The logging level of HTTP server. If not set, reuse --log-level by default. | None |
+| `--log-requests` | Log metadata, inputs, outputs of all requests. The verbosity is decided by --log-requests-level. | False |
+| `--log-requests-level` | 0: Log metadata (no sampling parameters). 1: Log metadata and sampling parameters. 2: Log metadata, sampling parameters and partial input/output. 3: Log every input/output. | 0 |
+| `--show-time-cost` | Show time cost of custom marks. | False |
+| `--enable-metrics` | Enable log prometheus metrics. | False |
+| `--bucket-time-to-first-token` | The buckets of time to first token, specified as a list of floats. | None |
+| `--bucket-inter-token-latency` | The buckets of inter-token latency, specified as a list of floats. | None |
+| `--bucket-e2e-request-latency` | The buckets of end-to-end request latency, specified as a list of floats. | None |
+| `--collect-tokens-histogram` | Collect prompt/generation tokens histogram. | False |
+| `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None |
+| `--decode-log-interval` | The log interval of decode batch. | 40 |
+| `--enable-request-time-stats-logging` | Enable per request time stats logging. | False |
+| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse ' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None |
+| `--generation-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse ' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None |
## API related
@@ -179,7 +181,7 @@ Please consult the documentation below and [server_args.py](https://github.com/s
| `--enable-lora` | Enable LoRA support for the model. This argument is automatically set to True if `--lora-paths` is provided for backward compatibility. | False |
| `--max-lora-rank` | The maximum LoRA rank that should be supported. If not specified, it will be automatically inferred from the adapters provided in `--lora-paths`. This argument is needed when you expect to dynamically load adapters of larger LoRA rank after server startup. | None |
| `--lora-target-modules` | The union set of all target modules where LoRA should be applied (e.g., `q_proj`, `k_proj`, `gate_proj`). If not specified, it will be automatically inferred from the adapters provided in `--lora-paths`. This argument is needed when you expect to dynamically load adapters of different target modules after server startup. You can also set it to `all` to enable LoRA for all supported modules. However, enabling LoRA on additional modules introduces a minor performance overhead. If your application is performance-sensitive, we recommend only specifying the modules for which you plan to load adapters. | None |
-| `--lora-paths` | The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}. | None |
+| `--lora-paths` | The list of LoRA adapters to load. Each adapter must be specified in one of the following formats: | = | JSON with schema {"lora_name":str,"lora_path":str,"pinned":bool} | None |
| `--max-loras-per-batch` | Maximum number of adapters for a running batch, include base-only request. | 8 |
| `--max-loaded-loras` | If specified, it limits the maximum number of LoRA adapters loaded in CPU memory at a time. The value must be greater than or equal to `--max-loras-per-batch`. | None |
| `--lora-backend` | Choose the kernel backend for multi-LoRA serving. | triton |
@@ -207,6 +209,7 @@ Please consult the documentation below and [server_args.py](https://github.com/s
| `--speculative-accept-threshold-single` | Accept a draft token if its probability in the target model is greater than this threshold. | 1.0 |
| `--speculative-accept-threshold-acc` | The accept probability of a draft token is raised from its target probability p to min(1, p / threshold_acc). | 1.0 |
| `--speculative-token-map` | The path of the draft model's small vocab table. | None |
+| `--speculative-attention-mode` | Attention backend for speculative decoding operations (both target verify and draft extend). Can be one of 'prefill' (default) or 'decode'. | Prefill |
## Expert parallelism
@@ -236,7 +239,7 @@ Please consult the documentation below and [server_args.py](https://github.com/s
| `--enable-hierarchical-cache` | Enable hierarchical cache. | False |
| `--hicache-ratio` | The ratio of the size of host KV cache memory pool to the size of device pool. | 2.0 |
| `--hicache-size` | The size of the hierarchical cache. | 0 |
-| `--hicache-write-policy` | The write policy for hierarchical cache. | write_through_selective |
+| `--hicache-write-policy` | The write policy for hierarchical cache. | write_through |
| `--hicache-io-backend` | The IO backend for hierarchical cache. | |
| `--hicache-storage-backend` | The storage backend for hierarchical cache. | None |
diff --git a/docs/advanced_features/speculative_decoding.ipynb b/docs/advanced_features/speculative_decoding.ipynb
index 6f6a064ec4b..aa62b897a8b 100644
--- a/docs/advanced_features/speculative_decoding.ipynb
+++ b/docs/advanced_features/speculative_decoding.ipynb
@@ -45,7 +45,7 @@
"source": [
"### EAGLE-2 decoding\n",
"\n",
- "You can enable EAGLE-2 decoding by setting `--speculative_algorithm EAGLE` and choosing an appropriate model."
+ "You can enable EAGLE-2 decoding by setting `--speculative-algorithm EAGLE` and choosing an appropriate model."
]
},
{
@@ -70,7 +70,7 @@
" \"\"\"\n",
"python3 -m sglang.launch_server --model meta-llama/Llama-2-7b-chat-hf --speculative-algorithm EAGLE \\\n",
" --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 3 \\\n",
- " --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8\n",
+ " --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -126,7 +126,7 @@
"python3 -m sglang.launch_server --model meta-llama/Llama-2-7b-chat-hf --speculative-algorithm EAGLE \\\n",
" --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 5 \\\n",
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --mem-fraction 0.6 \\\n",
- " --enable-torch-compile --torch-compile-max-bs 2\n",
+ " --enable-torch-compile --torch-compile-max-bs 2 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -186,7 +186,7 @@
"python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algorithm EAGLE \\\n",
" --speculative-draft-model-path lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \\\n",
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --speculative-token-map thunlp/LLaMA3-Instruct-8B-FR-Spec/freq_32768.pt \\\n",
- " --mem-fraction 0.7 --cuda-graph-max-bs 2 --dtype float16 \n",
+ " --mem-fraction 0.7 --cuda-graph-max-bs 2 --dtype float16 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -228,7 +228,7 @@
"source": [
"### EAGLE-3 Decoding\n",
"\n",
- "You can enable EAGLE-3 decoding by setting `--speculative_algorithm EAGLE3` and choosing an appropriate model."
+ "You can enable EAGLE-3 decoding by setting `--speculative-algorithm EAGLE3` and choosing an appropriate model."
]
},
{
@@ -242,7 +242,7 @@
"python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --speculative-algorithm EAGLE3 \\\n",
" --speculative-draft-model-path jamesliu1/sglang-EAGLE3-Llama-3.1-Instruct-8B --speculative-num-steps 5 \\\n",
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 32 --mem-fraction 0.6 \\\n",
- " --cuda-graph-max-bs 2 --dtype float16\n",
+ " --cuda-graph-max-bs 2 --dtype float16 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -284,7 +284,7 @@
"source": [
"## Multi Token Prediction\n",
"\n",
- "We support [MTP(Multi-Token Prediction)](https://arxiv.org/pdf/2404.19737) in SGLang by using speculative decoding. We use Xiaomi/MiMo-7B-RL model as example here (deepseek mtp usage refer to [deepseek doc](../references/deepseek.md#multi-token-prediction))"
+ "We support [MTP(Multi-Token Prediction)](https://arxiv.org/pdf/2404.19737) in SGLang by using speculative decoding. We use Xiaomi/MiMo-7B-RL model as example here (deepseek mtp usage refer to [deepseek doc](../basic_usage/deepseek.md#multi-token-prediction))"
]
},
{
@@ -297,7 +297,7 @@
" \"\"\"\n",
" python3 -m sglang.launch_server --model-path XiaomiMiMo/MiMo-7B-RL --host 0.0.0.0 --trust-remote-code \\\n",
" --speculative-algorithm EAGLE --speculative-num-steps 1 --speculative-eagle-topk 1 --speculative-num-draft-tokens 2 \\\n",
- " --mem-fraction 0.5\n",
+ " --mem-fraction 0.5 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
diff --git a/docs/advanced_features/structured_outputs.ipynb b/docs/advanced_features/structured_outputs.ipynb
index cd7e42e9d0a..1382f1e0e28 100644
--- a/docs/advanced_features/structured_outputs.ipynb
+++ b/docs/advanced_features/structured_outputs.ipynb
@@ -51,7 +51,7 @@
"\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0\"\n",
+ " \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
diff --git a/docs/advanced_features/structured_outputs_for_reasoning_models.ipynb b/docs/advanced_features/structured_outputs_for_reasoning_models.ipynb
index 1adb715bebc..c8f51a98af3 100644
--- a/docs/advanced_features/structured_outputs_for_reasoning_models.ipynb
+++ b/docs/advanced_features/structured_outputs_for_reasoning_models.ipynb
@@ -47,7 +47,7 @@
"\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --reasoning-parser deepseek-r1\"\n",
+ " \"python -m sglang.launch_server --model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --host 0.0.0.0 --reasoning-parser deepseek-r1 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
diff --git a/docs/advanced_features/function_calling.ipynb b/docs/advanced_features/tool_parser.ipynb
similarity index 89%
rename from docs/advanced_features/function_calling.ipynb
rename to docs/advanced_features/tool_parser.ipynb
index 235528b36c7..fd88b6799ec 100644
--- a/docs/advanced_features/function_calling.ipynb
+++ b/docs/advanced_features/tool_parser.ipynb
@@ -4,11 +4,29 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Tool and Function Calling\n",
+ "# Tool Parser\n",
"\n",
"This guide demonstrates how to use SGLang’s [Function calling](https://platform.openai.com/docs/guides/function-calling) functionality."
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Currently supported parsers:\n",
+ "\n",
+ "| Parser | Supported Models | Notes |\n",
+ "|---|---|---|\n",
+ "| `llama3` | Llama 3.1 / 3.2 / 3.3 (e.g. `meta-llama/Llama-3.1-8B-Instruct`, `meta-llama/Llama-3.2-1B-Instruct`, `meta-llama/Llama-3.3-70B-Instruct`) | |\n",
+ "| `llama4` | Llama 4 (e.g. `meta-llama/Llama-4-Scout-17B-16E-Instruct`) | |\n",
+ "| `mistral` | Mistral (e.g. `mistralai/Mistral-7B-Instruct-v0.3`, `mistralai/Mistral-Nemo-Instruct-2407`, `mistralai/Mistral-7B-v0.3`) | |\n",
+ "| `qwen25` | Qwen 2.5 (e.g. `Qwen/Qwen2.5-1.5B-Instruct`, `Qwen/Qwen2.5-7B-Instruct`) and QwQ (i.e. `Qwen/QwQ-32B`) | For QwQ, reasoning parser can be enabled together with tool call parser. See [reasoning parser](https://docs.sglang.ai/backend/separate_reasoning.html). |\n",
+ "| `deepseekv3` | DeepSeek-v3 (e.g., `deepseek-ai/DeepSeek-V3-0324`) | |\n",
+ "| `gpt-oss` | GPT-OSS (e.g., `openai/gpt-oss-120b`, `openai/gpt-oss-20b`, `lmsys/gpt-oss-120b-bf16`, `lmsys/gpt-oss-20b-bf16`) | The gpt-oss tool parser filters out analysis channel events and only preserves normal text. This can cause the content to be empty when explanations are in the analysis channel. To work around this, complete the tool round by returning tool results as `role=\"tool\"` messages, which enables the model to generate the final content. |\n",
+ "| `kimi_k2` | `moonshotai/Kimi-K2-Instruct` | |\n",
+ "| `pythonic` | Llama-3.2 / Llama-3.3 / Llama-4 | Model outputs function calls as Python code. Requires `--tool-call-parser pythonic` and is recommended to use with a specific chat template. |\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -35,7 +53,7 @@
"from openai import OpenAI\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0\" # qwen25\n",
+ " \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0 --log-level warning\" # qwen25\n",
")\n",
"wait_for_server(f\"http://localhost:{port}\")"
]
@@ -44,14 +62,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Note that `--tool-call-parser` defines the parser used to interpret responses. Currently supported parsers include:\n",
- "\n",
- "- llama3: Llama 3.1 / 3.2 / 3.3 (e.g. meta-llama/Llama-3.1-8B-Instruct, meta-llama/Llama-3.2-1B-Instruct, meta-llama/Llama-3.3-70B-Instruct).\n",
- "- llama4: Llama 4 (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct).\n",
- "- mistral: Mistral (e.g. mistralai/Mistral-7B-Instruct-v0.3, mistralai/Mistral-Nemo-Instruct-2407, mistralai/\n",
- "Mistral-Nemo-Instruct-2407, mistralai/Mistral-7B-v0.3).\n",
- "- qwen25: Qwen 2.5 (e.g. Qwen/Qwen2.5-1.5B-Instruct, Qwen/Qwen2.5-7B-Instruct) and QwQ (i.e. Qwen/QwQ-32B). Especially, for QwQ, we can enable the reasoning parser together with tool call parser, details about reasoning parser can be found in [reasoning parser](https://docs.sglang.ai/backend/separate_reasoning.html).\n",
- "- deepseekv3: DeepSeek-v3 (e.g., deepseek-ai/DeepSeek-V3-0324).\n"
+ "Note that `--tool-call-parser` defines the parser used to interpret responses."
]
},
{
@@ -167,11 +178,11 @@
" tools=tools,\n",
")\n",
"print_highlight(\"Non-stream response:\")\n",
- "print(response_non_stream)\n",
+ "print_highlight(response_non_stream)\n",
"print_highlight(\"==== content ====\")\n",
- "print(response_non_stream.choices[0].message.content)\n",
+ "print_highlight(response_non_stream.choices[0].message.content)\n",
"print_highlight(\"==== tool_calls ====\")\n",
- "print(response_non_stream.choices[0].message.tool_calls)"
+ "print_highlight(response_non_stream.choices[0].message.tool_calls)"
]
},
{
@@ -232,11 +243,11 @@
" if chunk.choices[0].delta.tool_calls:\n",
" tool_calls.append(chunk.choices[0].delta.tool_calls[0])\n",
"print_highlight(\"==== Text ====\")\n",
- "print(texts)\n",
+ "print_highlight(texts)\n",
"\n",
"print_highlight(\"==== Tool Call ====\")\n",
"for tool_call in tool_calls:\n",
- " print(tool_call)"
+ " print_highlight(tool_call)"
]
},
{
@@ -348,146 +359,10 @@
" tools=tools,\n",
")\n",
"print_highlight(\"Non-stream response:\")\n",
- "print(final_response)\n",
+ "print_highlight(final_response)\n",
"\n",
"print_highlight(\"==== Text ====\")\n",
- "print(final_response.choices[0].message.content)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Tool Choice Mode\n",
- "\n",
- "SGLang supports OpenAI's `tool_choice` parameter to control when and which tools the model should call. This feature is implemented using EBNF (Extended Backus-Naur Form) grammar to ensure reliable tool calling behavior.\n",
- "\n",
- "### Supported Tool Choice Options\n",
- "\n",
- "- **`tool_choice=\"required\"`**: Forces the model to call at least one tool\n",
- "- **`tool_choice={\"type\": \"function\", \"function\": {\"name\": \"specific_function\"}}`**: Forces the model to call a specific function\n",
- "\n",
- "### Backend Compatibility\n",
- "\n",
- "Tool choice is fully supported with the **Xgrammar backend**, which is the default grammar backend (`--grammar-backend xgrammar`). However, it may not be fully supported with other backends such as `outlines`.\n",
- "\n",
- "### Example: Required Tool Choice"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from openai import OpenAI\n",
- "from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
- "from sglang.test.doc_patch import launch_server_cmd\n",
- "\n",
- "# Start a new server session for tool choice examples\n",
- "server_process_tool_choice, port_tool_choice = launch_server_cmd(\n",
- " \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0\"\n",
- ")\n",
- "wait_for_server(f\"http://localhost:{port_tool_choice}\")\n",
- "\n",
- "# Initialize client for tool choice examples\n",
- "client_tool_choice = OpenAI(\n",
- " api_key=\"None\", base_url=f\"http://0.0.0.0:{port_tool_choice}/v1\"\n",
- ")\n",
- "model_name_tool_choice = client_tool_choice.models.list().data[0].id\n",
- "\n",
- "# Example with tool_choice=\"required\" - forces the model to call a tool\n",
- "messages_required = [\n",
- " {\"role\": \"user\", \"content\": \"Hello, what is the capital of France?\"}\n",
- "]\n",
- "\n",
- "# Define tools\n",
- "tools = [\n",
- " {\n",
- " \"type\": \"function\",\n",
- " \"function\": {\n",
- " \"name\": \"get_current_weather\",\n",
- " \"description\": \"Get the current weather in a given location\",\n",
- " \"parameters\": {\n",
- " \"type\": \"object\",\n",
- " \"properties\": {\n",
- " \"city\": {\n",
- " \"type\": \"string\",\n",
- " \"description\": \"The city to find the weather for, e.g. 'San Francisco'\",\n",
- " },\n",
- " \"unit\": {\n",
- " \"type\": \"string\",\n",
- " \"description\": \"The unit to fetch the temperature in\",\n",
- " \"enum\": [\"celsius\", \"fahrenheit\"],\n",
- " },\n",
- " },\n",
- " \"required\": [\"city\", \"unit\"],\n",
- " },\n",
- " },\n",
- " }\n",
- "]\n",
- "\n",
- "response_required = client_tool_choice.chat.completions.create(\n",
- " model=model_name_tool_choice,\n",
- " messages=messages_required,\n",
- " temperature=0,\n",
- " max_tokens=1024,\n",
- " tools=tools,\n",
- " tool_choice=\"required\", # Force the model to call a tool\n",
- ")\n",
- "\n",
- "print_highlight(\"Response with tool_choice='required':\")\n",
- "print(\"Content:\", response_required.choices[0].message.content)\n",
- "print(\"Tool calls:\", response_required.choices[0].message.tool_calls)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Example: Specific Function Choice\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Example with specific function choice - forces the model to call a specific function\n",
- "messages_specific = [\n",
- " {\"role\": \"user\", \"content\": \"What are the most attactive places in France?\"}\n",
- "]\n",
- "\n",
- "response_specific = client_tool_choice.chat.completions.create(\n",
- " model=model_name_tool_choice,\n",
- " messages=messages_specific,\n",
- " temperature=0,\n",
- " max_tokens=1024,\n",
- " tools=tools,\n",
- " tool_choice={\n",
- " \"type\": \"function\",\n",
- " \"function\": {\"name\": \"get_current_weather\"},\n",
- " }, # Force the model to call the specific get_current_weather function\n",
- ")\n",
- "\n",
- "print_highlight(\"Response with specific function choice:\")\n",
- "print(\"Content:\", response_specific.choices[0].message.content)\n",
- "print(\"Tool calls:\", response_specific.choices[0].message.tool_calls)\n",
- "\n",
- "if response_specific.choices[0].message.tool_calls:\n",
- " tool_call = response_specific.choices[0].message.tool_calls[0]\n",
- " print(f\"Called function: {tool_call.function.name}\")\n",
- " print(f\"Arguments: {tool_call.function.arguments}\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "terminate_process(server_process_tool_choice)"
+ "print_highlight(final_response.choices[0].message.content)"
]
},
{
@@ -530,7 +405,7 @@
"}\n",
"gen_response = requests.post(gen_url, json=gen_data).json()[\"text\"]\n",
"print_highlight(\"==== Response ====\")\n",
- "print(gen_response)\n",
+ "print_highlight(gen_response)\n",
"\n",
"# parse the response\n",
"parse_url = f\"http://localhost:{port}/parse_function_call\"\n",
@@ -583,6 +458,9 @@
" messages, tokenize=True, add_generation_prompt=True, tools=tools\n",
")\n",
"\n",
+ "# Note that for gpt-oss tool parser, adding \"no_stop_trim\": True\n",
+ "# to make sure the tool call token is not trimmed.\n",
+ "\n",
"sampling_params = {\n",
" \"max_new_tokens\": 1024,\n",
" \"temperature\": 0,\n",
@@ -594,8 +472,8 @@
"result = llm.generate(input_ids=input_ids, sampling_params=sampling_params)\n",
"generated_text = result[\"text\"] # Assume there is only one prompt\n",
"\n",
- "print(\"=== Offline Engine Output Text ===\")\n",
- "print(generated_text)\n",
+ "print_highlight(\"=== Offline Engine Output Text ===\")\n",
+ "print_highlight(generated_text)\n",
"\n",
"\n",
"# 2) Parse using FunctionCallParser\n",
@@ -616,13 +494,13 @@
"parser = FunctionCallParser(tools=tools, tool_call_parser=\"qwen25\")\n",
"normal_text, calls = parser.parse_non_stream(generated_text)\n",
"\n",
- "print(\"=== Parsing Result ===\")\n",
+ "print_highlight(\"=== Parsing Result ===\")\n",
"print(\"Normal text portion:\", normal_text)\n",
- "print(\"Function call portion:\")\n",
+ "print_highlight(\"Function call portion:\")\n",
"for call in calls:\n",
" # call: ToolCallItem\n",
- " print(f\" - tool name: {call.name}\")\n",
- " print(f\" parameters: {call.parameters}\")\n",
+ " print_highlight(f\" - tool name: {call.name}\")\n",
+ " print_highlight(f\" parameters: {call.parameters}\")\n",
"\n",
"# 3) If needed, perform additional logic on the parsed functions, such as automatically calling the corresponding function to obtain a return value, etc."
]
@@ -636,6 +514,142 @@
"llm.shutdown()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Tool Choice Mode\n",
+ "\n",
+ "SGLang supports OpenAI's `tool_choice` parameter to control when and which tools the model should call. This feature is implemented using EBNF (Extended Backus-Naur Form) grammar to ensure reliable tool calling behavior.\n",
+ "\n",
+ "### Supported Tool Choice Options\n",
+ "\n",
+ "- **`tool_choice=\"required\"`**: Forces the model to call at least one tool\n",
+ "- **`tool_choice={\"type\": \"function\", \"function\": {\"name\": \"specific_function\"}}`**: Forces the model to call a specific function\n",
+ "\n",
+ "### Backend Compatibility\n",
+ "\n",
+ "Tool choice is fully supported with the **Xgrammar backend**, which is the default grammar backend (`--grammar-backend xgrammar`). However, it may not be fully supported with other backends such as `outlines`.\n",
+ "\n",
+ "### Example: Required Tool Choice"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from openai import OpenAI\n",
+ "from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
+ "from sglang.test.doc_patch import launch_server_cmd\n",
+ "\n",
+ "# Start a new server session for tool choice examples\n",
+ "server_process_tool_choice, port_tool_choice = launch_server_cmd(\n",
+ " \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0 --log-level warning\"\n",
+ ")\n",
+ "wait_for_server(f\"http://localhost:{port_tool_choice}\")\n",
+ "\n",
+ "# Initialize client for tool choice examples\n",
+ "client_tool_choice = OpenAI(\n",
+ " api_key=\"None\", base_url=f\"http://0.0.0.0:{port_tool_choice}/v1\"\n",
+ ")\n",
+ "model_name_tool_choice = client_tool_choice.models.list().data[0].id\n",
+ "\n",
+ "# Example with tool_choice=\"required\" - forces the model to call a tool\n",
+ "messages_required = [\n",
+ " {\"role\": \"user\", \"content\": \"Hello, what is the capital of France?\"}\n",
+ "]\n",
+ "\n",
+ "# Define tools\n",
+ "tools = [\n",
+ " {\n",
+ " \"type\": \"function\",\n",
+ " \"function\": {\n",
+ " \"name\": \"get_current_weather\",\n",
+ " \"description\": \"Get the current weather in a given location\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"city\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The city to find the weather for, e.g. 'San Francisco'\",\n",
+ " },\n",
+ " \"unit\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The unit to fetch the temperature in\",\n",
+ " \"enum\": [\"celsius\", \"fahrenheit\"],\n",
+ " },\n",
+ " },\n",
+ " \"required\": [\"city\", \"unit\"],\n",
+ " },\n",
+ " },\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "response_required = client_tool_choice.chat.completions.create(\n",
+ " model=model_name_tool_choice,\n",
+ " messages=messages_required,\n",
+ " temperature=0,\n",
+ " max_tokens=1024,\n",
+ " tools=tools,\n",
+ " tool_choice=\"required\", # Force the model to call a tool\n",
+ ")\n",
+ "\n",
+ "print_highlight(\"Response with tool_choice='required':\")\n",
+ "print(\"Content:\", response_required.choices[0].message.content)\n",
+ "print(\"Tool calls:\", response_required.choices[0].message.tool_calls)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example: Specific Function Choice\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Example with specific function choice - forces the model to call a specific function\n",
+ "messages_specific = [\n",
+ " {\"role\": \"user\", \"content\": \"What are the most attactive places in France?\"}\n",
+ "]\n",
+ "\n",
+ "response_specific = client_tool_choice.chat.completions.create(\n",
+ " model=model_name_tool_choice,\n",
+ " messages=messages_specific,\n",
+ " temperature=0,\n",
+ " max_tokens=1024,\n",
+ " tools=tools,\n",
+ " tool_choice={\n",
+ " \"type\": \"function\",\n",
+ " \"function\": {\"name\": \"get_current_weather\"},\n",
+ " }, # Force the model to call the specific get_current_weather function\n",
+ ")\n",
+ "\n",
+ "print_highlight(\"Response with specific function choice:\")\n",
+ "print(\"Content:\", response_specific.choices[0].message.content)\n",
+ "print(\"Tool calls:\", response_specific.choices[0].message.tool_calls)\n",
+ "\n",
+ "if response_specific.choices[0].message.tool_calls:\n",
+ " tool_call = response_specific.choices[0].message.tool_calls[0]\n",
+ " print_highlight(f\"Called function: {tool_call.function.name}\")\n",
+ " print_highlight(f\"Arguments: {tool_call.function.arguments}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "terminate_process(server_process_tool_choice)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -657,6 +671,8 @@
"\n",
"For more information, refer to Meta’s documentation on [Zero shot function calling](https://github.com/meta-llama/llama-models/blob/main/models/llama4/prompt_format.md#zero-shot-function-calling---system-message).\n",
"\n",
+ "Note that this feature is still under development on Blackwell.\n",
+ "\n",
"### How to enable\n",
"- Launch the server with `--tool-call-parser pythonic`\n",
"- You may also specify --chat-template with the improved template for the model (e.g., `--chat-template=examples/chat_template/tool_chat_template_llama4_pythonic.jinja`).\n",
@@ -675,7 +691,7 @@
"import openai\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \" python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --tool-call-parser pythonic --tp 1\" # llama-3.2-1b-instruct\n",
+ " \" python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --tool-call-parser pythonic --tp 1 --log-level warning\" # llama-3.2-1b-instruct\n",
")\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
"\n",
@@ -755,7 +771,7 @@
" tools=tools,\n",
")\n",
"print_highlight(\"Non-stream response:\")\n",
- "print(response_non_stream)\n",
+ "print_highlight(response_non_stream)\n",
"\n",
"response_stream = client.chat.completions.create(\n",
" model=model_name,\n",
@@ -778,11 +794,11 @@
"\n",
"print_highlight(\"Streaming Response:\")\n",
"print_highlight(\"==== Text ====\")\n",
- "print(texts)\n",
+ "print_highlight(texts)\n",
"\n",
"print_highlight(\"==== Tool Call ====\")\n",
"for tool_call in tool_calls:\n",
- " print(tool_call)\n",
+ " print_highlight(tool_call)\n",
"\n",
"terminate_process(server_process)"
]
diff --git a/docs/advanced_features/vlm_query.ipynb b/docs/advanced_features/vlm_query.ipynb
index 08fc0c4b366..d9a8ae75d2e 100644
--- a/docs/advanced_features/vlm_query.ipynb
+++ b/docs/advanced_features/vlm_query.ipynb
@@ -36,32 +36,7 @@
"execution_count": null,
"id": "3",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<|im_start|>system\n",
- "You are a helpful assistant.<|im_end|>\n",
- "<|im_start|>user\n",
- "What's shown here: <|vision_start|><|image_pad|><|vision_end|>?<|im_end|>\n",
- "<|im_start|>assistant\n",
- "\n"
- ]
- },
- {
- "data": {
- "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAF8AjoDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDyDRuNQLHnCmur4POccdMVymijN8/H8NdUM7c9+lSNDkwpAHUU7Py4xk5poOeaeAOooGchrCs2qTDPAx/KqHlNj/GtnUULalMcZ5FReQOoHFYTnZm8Kd1cyxGynnj8KcIcirssOGzihEPpxilzh7LUqrD1AFO8sjg8VbRDycHikeMZzS5xuFkZE6gynPpQsSuRlsVJd/LORx0FRpksBW6bsczVmWLWDDO3opxW5oq7bJzz98/yFZkK7YXI/umtbRxnS29fNP8AIVSEbGn6ounTRTHnaM1l3Wo3WuX8zeaY7fPIJ61R1FijKDwp4yelTaSvlpjgjrmlbW4/UqRzvHHK4iUIGOAg5GD+VOt7+EvuB+Y+tWH024SzKx/NnqAaxYbeWO5USRuvXqKaIubfmozbumV4708RkLkEEEckVj42OdjFfXB4qb7SyHh1f6jB/wAKHJpm9OTS0LoGXXI4zUN+eV+tJHexORuyG9xS3GLhVZGB/Hincmo7s1fDij5zjOZFFbsgJkYjj5jWJ4cG1iCRzICMGttyA59cmlclDZsCCTj+E/yrnrvixjx3x/KugmH+iy8n7h/lWBdrmxi46YpoUiSIf8SzHoppmmDFu/1qaMH+y+n8BqLSz+5k/wB6mSQ2qD7RMf8AZP8AOqmnpu1KIf8ATTmrtlzNKcfw1X0tN2qRZP8AETUsEdmMLaxAen9abMP9ElXPVTUihWto8ggbev40yZSlq5wPu0It7HJwXt3aTSxxklFHNaFrrkD2rRshBboRVOBAYLuU4+Ykc1E8KnRQxUEjpxyOaZFjoY5o5NORI5EdicEA4I/CtRPk0/bzzdR/+gmuCsYJ3hkk84hV6A1paVr9zcTQ2c3KGUSZ75xikwSOqnYGU1kaq37xB6o39K1HYFzz371kaoMzLjtEaRT2M1OYWxx8wFKwP2UA/wATE/lxSD5YSfVv6VI/+qjXvg/zp7akI6zRDs0mEd+f51o2uAxQFlQjIO7O3ntVDRbeSS3tokyPlJDYztINaPlSW7AyKimRSSg4HBrWnWppqDep9dl940kr7l7eu3e/LHoxH8/SuT0P994zhI/57E5/Ouh85DCSWKnacE9TVDQdFu7PxNbXMwjMTlipVwex7VrWeyOfOZXpxGa6c6kx9Zz/AOgios7UJ/2TRq/z34I/57Of/HRSN/qnwf4c5rm6nziMiKMzzHjqa6Kzh8qCQ+ik1m6fb4Y8VuEbLGZvRG/lSZn1MLRh+5JHpWzqExhs4HABO6sjRxi3/KtXUcNFaRk43E8+lCNeg3SLn7WZywPyYHt3rN8Su63q+X5mQn8A4rV0zEbXATBAIGRVa+uIv7SuEmdV2oCMnrQviBbFrRVaPR4t+dxJ4asK/QvqE+IXOX4OeK6KxYSafER0NYMt7DuuFKuZPNIX5PehbgdLFhLFB0IUcfhWWl38oHkHBIG7PFakxKWhPohP5CuatLyV/stuEIYuNxLD1oWojor077KRegKkZ+vFc3Y6OsN9bz72/dtxW/qoKaZcHPO3j86xNPvWn1OCBmi+UZ+U5zxRHYbN27keG3eWGWSF3wrmNyuR7+tZOn2Pn6tbPjdcM21c1oauGOnkK2CSP51m+H7/AD4gtnklDiNl4C44zRF3QmrHQazBdaG0kcg8udcZANZVvDanUBsSOK5ILFAMBs+nv7dK2PG2sPP5k3y/JLtXA52n/wDV+tYGg6xcXV2UmiSaILn99GM/gQKaWgr6mhqDBbQnPBIqvH5SX8KJg5XeRnmk8UXMR09ykLfLKvyseq1k+Hpkn1fYsXRDzR0H1N3VZAtk5f5VyBzVOxK3t9CYWBji5kf+FcjofetjUoUltD5uBGDlifT2rLtJ0lvI4YE8uFclEC4/EnuaIvQOpvrOkbDy081wPvyDj8F/qah1G7unu/K+0SbPl+UNgfpUXmosgRidw7bTUdyGku3uId4LMp5Q9hj1pJjtoM1eALp7yHqOhFcq2lx3Ukf2olvm6ZrqpLkyadLb3bLJOQ2xlGEDdV3DrgCq+mac0FqpdvMaTlsoML9KadkSONpDZ2Dw28YjXvisY6bbZPy/+O1ryxu96YpJ3ERTIiwBg59fSs2RJxK+2/lxuOPkX/CiyGee6MQL1/8Adrqsjb37c1ymjAm8fnjbXVc54GRUjQ5Qd+egpx56HimLyByc1JwTz+FMZgXuBfzHBPPaod5CYCmrt0n+lSkDnNROg2kY7da4ZS1Z3wi+VFX5mHTpQkJC8sKmjjBZvSpxGB8uMkVPMUoXK3lYHDE/hUbx/Ly1XduecGoZE3E5pqQpwVjAvQBdYGegpIk+bNSXw/07A9BToV55rtjsjgnuy0oIt5P92tjQUB0pu370/wAhWQ3Fu/0ra0Aj+zcYP32NCJRZlsEuItsnNRi0EDFQOAK1YgNvPX0qO5TOTjtTG1oV0GLfp1BqK2QNMAVyMd6n2stuMN271DZ7hLkrng8ipZkR3WnW0gOY8E9xWXNo2P8AVS59nrenZSSOnHQ1CE3AkjI9M0OVtzopuyObFhPFOuUyB3HNVfJb7cBnjPY4rrVRVmTnPtipLPThd6mMp0OacZ3IqFTRYpba+Mb5JJX8ARmttic9cjNMljVPEkygcKyj8lpzHnPTjpTJi7oZcHFnLzn5W/lWHPteyRVbLLjPtWxqJxpdy3/TM1y8e+GwSYOxbbnB5FNMJGtGD/Z+CDjGCajsXhiVwxkOemxcmqVrfyzW7Fk+QZDYOcfgasWN3bqrbHyG55pki2WBcXAHoe1Q6Sf+JnGcdGY1PbrsmlckAMOOah0cf8TNfYNQ9ho7DcBBGBx8oqG8YLYXBJ6KamYgIg77BVTUeNMnJx92kiuhhp8mjMe7Hn3odduiA+v+NOn+TSYlHei4G3R1XHpTIIohs0OVx1INM0OJTqkYx0B/lU2P+JE2O+f50/w6gfUlJHRGpMEdG5+cg+tc9rl/Ja3sYVdymP8ArXQuMyE8AE965jxEubtc/wBwChIp7DI762mXYf3bDrk1Z8sOybGDKo6j/CsO4hG7pnIB/SmxyzQLuSQgDsadl1JR614anWG0RHfOUJKD+Hmr1/MqxHYUJ6Ekc1w+i6jcGy3uck/LkVrpPJcLLcOhAOFyWH8q4Y4OTre0b0PrMFRtCMm9LF0uu0sVPTqKzfBZd/ExbcSFikOc1P5o2H5T93uaj8DLnWLqTssDV6dR3scmcaxTHX7br1T6vIf1AoQAnaxwDxkimXWWvUx0w5/8ep6ck/WsVufPrYvWthIhcfLiMZJ3dR6ir12AmkXB7+W38qZZDfbkHqh4PtT9Wwmk3QHRYiBR0M1uYenIEhAHtUmvvHFb2zSgdT1ptoCI8fSneILRLyGGF3K96EbdCfw46vZykKozJ2+lZetXcMOqyBsdB2rY0REWzwnK7sdMZrN1PTorzUHkfJOex6ULViextWXNhbn/AGa4K61KX+1J4Ukcfvzx2616HGFS0jI7KCBXMDSbN7jzhDyz5znvREOx0V45FlMcdI2/lXC6GGfVrQ4P38klq7292paSkjI2HNY9nBFHcW7Ii888DFCAv66caPOR12d/qK5jw4C+rrIYgNoIBrsLxlWFdwBGehqjaxLDdIm0bipbnrQtg6ly9jEkYUsBg55OBXOeHLedNSdplOChwfxrc1aTyo4vdqjsWQXTIuDsXnBzQloHUb4mikm09Y4ly3mDv7GsXwxYXNtdSG4yPl45rodVlSMW6u4UM2Dk1Dp8kct9cCFg4AHShbA9y3OFaSFJUV4JG8uXPXB4yPocGsbQ9H/s/WrkF9x+ZP1rS1WWOBIhMSqsetWbWRJtTeVclmgWQnHrgU4q6DqJqwZ7dAvGGzis3TFf7YjucAKeKv65crb28JYNt3YOBVHT7pLm4IVHXC55oS0BvU6iCASRI449ad5RVskAAHNPsCq2aZPvU8sqCFmyMBT2qbFI5CVoAzZkjAZ2Jy49K6PSkT+zYCu0qVyCOlcitnZiYZiBzye4rr9Oi26fbrGoChBgU7oS3MO/u7K31iTzZlVlAGMVQ/tOw/57f+On/CrGohG1O43Rbm3DnFVt8X/PJ/8Avmi4rnmuhKGupTycL/WuoySQM59q5vw6MzXZ/wBgV0e7HXrSKSHKPmYdKVeoOcU0E5OW49KccnsOKCihP/rnJ5INQsBtqSVCZnO4jJ6YoSM4wWrz6nxM9OmvdRFGueKfj5yCackJ3E7qBESCWJOai5VtCM/Kc56VC+SeD1qwYlKnIqSG0DyKewPNXEzkjmtRTZqO3H8IpYxzmrGtpt1th2AH8qijFd0dkebP4mSSD/RX+lbegLjTc+rtWLN/x6vj0ra0KQCwRO+Sf1qiUbduMgcHpTbjpnrxUkGdnpio5yCpA69KBvYhYDyOnamWaZkJHZanliYQ4HoOtNtUZWc/hSMrhOmS3H8OaqhFUHjHvV1wSr+uBVdxlSMUpJM0gyKEb5k5J5710+i2PlsXK8k81i6dal51YjgEEV2NjFsBPpRGJNV6nKXCj/hJbr/rrj/x2oucde1TT5PiC8PcSt+i1BkkjDdqoIbDpQrW7hlBBGCKhvNLtpLAjy9pxjK1O+fIYZqS8Oy0wRjkCpdymjCh0Fk09/JlDZ3EBxWfY2E0XnGSEnpzXWwkf2fx71X08cSj6UKTJschZl91wA7Db0GeM/Srlg8ouoJXQEMDkgYxxXQ2tlDO9wGiUluM4xU17psdhZWEajqzE1XNcCzIRtTn+BePwqlqfOmSj1q5J94A9lA/SqGssRpExBIIGRTRT2My+GLKBRjHepL1Smmoo/2ax455F01blmB56VakvpJLSL7QNqP904/wpmZZPGisKd4az9uJ9Iz/ADqDzkbTGhUnd2q34cidbp2KsBsxuxxSkUkdC52uB1+tcv4hb/T0AAHyc10znL+oFcxrgDakxP8AcGKExszrkHeoz/Cv8qilH+jJ6liTVm4XEnrhR/KopFzHF/vGmKJvaS+LQEdjyK0432zPtbG5ARzWbpJ2Wg7Zb5T71qKwwCUUAZwccn8KzdaztY+vwlRexin2JlkDxgY7evepfANwJLvUxjmOLHPuf/rVWjddrHaOOvtxVvwJGqR6xJ0OAM/iauM1M4M3knCJHNLbtfFYZVk2x4cg9GLEkVJGMy496wNGQi/vpMk7pCD+ZrVvL77BbPcld2wjIHuQKFufP9LHT6eNuzHd/wClM1nI0a5z1K8fnWbovibTbl0V5hC3/TTgfnWrr2z+xJGR1YErgj/eFHQzS1Me15RTjvSa8HNxCyAEeVt5YDnNLaDCID61F4iSaZoRGgkweeOlC6Gz2NHRSUsF3YJ3k8fhWVfXUtvd3MeYf3hGCScgVo6GkqaXGjrtYM3H41h6rbzSalM68jihbsT2R1SAmxTnkoOR9K5i2lkN1Fbm4TCy9BGeefWuk2lLOLJ6IvT6VgWunbb5JftinEm7Zg569KI9RPob+ooZLOSMNgsMZrNsrKSK8iZ7tpBHwF6cYq7q436fKucblxmud0PT5bfWEkeTOVPGaED3Ok1JEuI0jlfYmeTnFQWUFnHc747jzZQCDl9xxTPEdubmxWHOCWzWR4Y0v7HqNzN5m7emOnvRuh9TQ8Tywpb27ORtEmefpVfwxPDJJNt29ByKseJ9NW/iSEuQPao/DOmpYCYBidwHWi2g3uWvEVzClvG0gBweCRVbwvKj+e6EkZAqzrdql0qwnJA5wKfpMMFjGUHlxr7daFe1ioUpTlaKuV/Ftx5VnB1ALde9a2m27pbRXTPGUlt41UB/nBAycjtVHVRDewiIGJ1H96tW1mlOmW8bNFs2nlF5wp4/lVJNR1KqUKlNpyVjK8Ru5t4VRQctVTRQ5nl34GE4qzrcmHQcBcVFokm8zn04zSWxi9zrIMCBBxjaKjuG/wBHcAjO04qNA/y91x/Sq905jikc9FUk4qSzLcStcKnlgFYycE9a6q0bFpCCvOwfyrGn0+9t9J/tya3ZLOQBFLcHnocelbUIUQRcH7g/lTsJHOXUchvJX4wzHGKpG1fJ+dfyqSXU281wLWdvmIzjjNVzqE2T/ocn5Ci6A868Pcvdj1T+orothI4JNc54d4e79do/nXSc4AxSHcVWIU5/Wjv1yDRkdOOe1PG0qAaYIoP/AK5+vWlwAc4/OmM4WRzngGhplx2rzZ/Ez1qb91eg/t6etLk4xUaONpbIx9aUOvTPIpFXGDLHgHrWpZR8HIwcd6pWyq0mfeta1T5+xBqo7mUmcZr/APyMUoHYAfpUCCp9eUf8JJc49v5VCg5rujsjzJ/Ex0//AB7P05rc0NP+JZGxGM5/nWHcDFq34V0mk8aNZgj+E/zqhGnbk+WeSajuhthYgjJqSEnYSBgVDc8qRjtQN7FV7yeOLqG9iKls9RUqxkh6HqDUcse5cHgVCqBFK8HPPSkZGmt9Zur5kCn3qRYopV/durA+hzXOTJlH9CRVaBXW5iUMRlh0+tJouOx32nWwjxxXQWqkKazLGJtoIU4xwa1oRtQ1cTKTuziSQdavW9ZJKhPUCnxuG1O+Y/8APSX+dRkkn6daRrHYk6xgZzlgP1qzeg+Qo9xVeJdzIvqwxVy9jby1A9aljbIo0X7DjGcg1XsI9hk5Pbir6RkWI4x8vWorCJizjHU0CLGg2hkuZWIOM1L4pQK9gO+H/pWtotuEL5GKzfFZ/wBMsV9Eb+lNIl7mZPxIc+38qhlQNaurjcpFSz/61uO9MlBaFsccU+hfQz7rSLWTSVRVMeT/AAVQ1PRpfsttHE4IX1renDCwjGM5PakugDJarz1B5H0qbtE2IdK0mKfVFM0XmPBxszwK9Hu5ja6YsfkIEHZVAA/CsjwnbQ2Vj5rjM8zlya6HUbm3lhKFUIYc1HtE9zsjS91Hnt7qNgJ8SgI79CK5vVAsmpyAOuVxkE+1WPFNn9k1MOn+pPIrL13R7l7hL+HZKk0anEbguvHcds44rSMk9TnnTld+QtzGTKSR6VXdfljHA+YgkngVFNfzWyxwtFsZF56/N9c09L9ZmjR4TlumDV3VjNHQ2tsY7V1R/Nlz9+BwUU5+nNI8UqLvdpAF5Jx071NoMmbOdRn5Xq3qH/IOuQOuw4qeVM9Knj5QiklsZKXkB4a5cp0J/wAiuq8LQi00fU7hSH83DcEcYziuARAImLkjOOB1rt/Cu1PCeouGchpCPnGf4aqKS2McVjJV0k1axjaJwlw5/ilJqbXju0iVRjDMo5qHSOLR26Zlp+tEf2cQf760luciOfkt8rbKoIdhjipUuryG7NnFO/kmTBTcccVaRP8ATrcEfdWq8CBtXzj/AJamm9iDt7M5WLjFSagqSXzREgBU3ZJqO04aIehFVdce1jvVMoAJHU1K3L6G9Y+WbND3Of51gyXFu8crM8e8SFQM89a19NKjTrfZnaVriJr4JqkqbIyDPtHycj5sdaI7sOx3d24jsmJOMR5zWNY3sElzaBHBdj8wrX1MMmnzN6RN0+lch4cuZ7nXLeLqBktx7ULqJnT64xXTm4OMj+dUNHuPtGqx4BCLERyOM1oazGWs2RTySP51l6BJI9/Mr5O1e596SkrWRT3NHX5XjSDCk/NzimaLJ5t3OwVlQAY3VF4jlCiHJxyeab4ZcSNcuGyCyimnoLqTa5cGC6t8LlcZPOKXQ5jc/aZMY+YACqPigwi+t1mDEbf4aseFVVrSZkXCmTv9KOgdR+s3b2t5GVVGXaerYqfTA17YudmG3HGysXxkkpubXyV34znitnwXeLa6GY5kKOZW/KplUlBe6rs9PLG1VbSuRXJe2XL4Bxye1aumym40exkbkujMcf7xrL17zGsrp4k3SEfKo681f0mNotC02Ngdy2+D/wB9GtZSk1qjpzad3GL3KOq2009yFjkCqEGRt/rUmmWj2ok3vu3Y7U69e3S9czMR8o74p9m8cit5WcdMmovoeI9zeBwuOOBVG8kKRSthThSQCOKt8bmBJ6VSvABbuRknpihDZZ0TxBrniSzuIdda0XSlIRVSLDMw7Dn6VqurGEqsLqBx8gLY+oriIbmeFjCgRY1cKqAHA3Hk/WuqlmdY2KOVI54bmm2RG551qcskV9JFKCGLErzxitCAH7PH8y/cH8q2NQePVIYo72GOWWL5luNoDn2OKjitU8lOF+6O1TyFc6PMfDoG+6PTgV0JJxiud8PnEk/uFxXRZycnHPSmOw5QNpY0owRktg03jPX8Kd1UcU3sNGc6fvHzzk8UyNAc5xkUSORKwx3pqvg158viZ6EX7qBApYrgYqVI8tmoY2ySat24yeeaVi7ly1jUkApW3AgOCBjHFZVucHBHJ6e1bEAGV52/WhLUzk9DzzXv+RmvPYjp9BUKDmp9dx/wk15/vf0FQR9a7o7I8+W7C5P+jN9RXRacR/Zdpg8+Vz+Zrnbr/j1J9xXRaUuNPgPrEKpE9TTh+7gdKjnOXYegAqWMEKBmoJ5UjWSRz8q9aBvYHTK1C8I2cZ5p8d7ZzfcnUE9icVKyB0UI6tx2NFjHUyp0CqwyeSKkhjX7Vb8gDevJ+tPuoX2jK/xc8U6JGN1AMdHX+dFi76He2qlVwGBFXkUBT7kCqVsvNXVGFH+8KpbGRwMJDz3jerSH9aZnB70WfIum92/9Coyc+1JG8dhwLDaVJB3dRUl/fzwRqeG56GmJhmQED7wPSjUUVlUNnHbFQwZai1dBYBpYj93Py1f0Oe3vld4dxxjOR3rlmlU2pgwemATXReDITHbz5/v0Ik6zT02l8elc74s51WzH/TJv1IrqLQbd3vXK+KiDrdqPSL+tX0Baszp93nSAf3utNb/VkZ5x/hSz486TJ/iNMaWKJCZGwDR0L6FidT9lgHekuUJu7dMelTTNDIsCrIhzjAzzVr7OH1GJs5wPrUk6oVr82J8ts49KDrNxeALDETjqSOKTX4riCA3dqxDx8MO2K5S4/tO903zPM8plfayJn0/WsJQszvp1HKKtui/rULX7FTINyj+GqFqjiySTkhmAXjpgcD9arWhNuhYvuLV13hq5sgXtJIUkRogQrjIyKV7OyNVFzTXVnM3kSyTuHUMPcUlnodvPdWpjjKspzweBye1ezweG/Dmq6fG8ulxq0gyXi+U/mKmt/h/pUeJLaS4g9nYN/SsY42HM4vRo5amGlFnlq24tbm7RFwokx+gqprEjR6PdFPvBeK7XX/Bep6e1zdoFuoXk37ouq/WuSuAWtmTGc4AAHPWuynVjJXTMHFrc4aHUJfKcuA4XHXrXonhp0PgG6lQMoeV+p5GBiucm0ZpI5g9lIOOoQjvXV6RZNaeBfICMCzvwwwea1TTJcX2OZ0sg6ewBBPm1JrAzYoOTmQf1pY7QWRlhUYAmwfriq2vXLWlpC6qrfPyD9KS3BbB8qalFnuuKpWZ3aqM93b+tNivTNNFK8bbwofj06Uae6NqCOH3BixGKb2JR3NkgLRgEgjFM1ayS6nDuM7OMCn2J+dDjpzzVPVry8tbqYGGIRyLmNmbHHekiuht2cSR2MSA8KnArnf7KtZbgXBiOWfOS3fNdDAzfY04w3lDOPXFc7ZS3LvbxGSPYsoONvzHmkmOx02pf8eUquPlKkYrIs7KGxul8iNVdxkYznitLUQ89s0YYLuxziq1naTR3aTS3G8xrjAXFDV00S1ctu0eqWSneEZRkmixs0L+ZAgJVArALgn3qnO6W12Syfe6gcA8elXLPUomAUHJUfMa4oykpW6GXNJSsU9YHmyJHt5xxUmhxKDNznDCn3UUFzIvmTGIg4Vk5/OpdNszZeafNMhZsljXWpJxsaKV2VdVVXvth67RjFT6Gu63kJ7P0/CsDxIZxqyNFKyqyAYU1t+H4pILEpLkNvJOarSxV1cTU4vNnaMcAY5pdLGyWeJxnzAGqlqkFtc30yGWRZm2jcGwFwO/sat2bLAUKyF2jBXJOCwPTP406c76Jao9XKZXqtIt6jE9ksBCeYhGWQnPGOlTiVILW1LHankqM+nJrMvr9b5ZRMgO3oBWlJBBcQ20bvsIhXaCOBxXP7Sdm5bnNmdSTrNPoUtbsYZ7B7mMkyKOGB4xS6VbGK0RiDsfBqzZWUyB0G14uxL/pii3S4kndAhjCvwCOD9KiFV3szzYzdzS2nc+DxWVqcrxWruieYwI+XOK1DhAWBOc4Oa53xHdy22lzTRY3KRj866UzovoUoJ7l7lAYB88ilju5Ug11lw+2GXpwjdfpXBafqNy+taZCUGychpMDoeeldzeHbaysByEP8qfUUTh38TSrkYgAXg9ea7u2+zTWsMvl/fQN+YrymaCT7UwERKlsk7a9WtrQfZYf9xe3tV2M5J3PGvDoytwcdNv9a6BQMgYz/SsHw2rstxtxxjrXRKkhXlFOfQ1BqMXOMDpSn5RjJqUK2CSjH3phIx0PPtQPqYckv7x+R96mLKCDz3qFjmSQdfmOOKbuw2a42tWdqeiLUbktjHGa0YGUDPP5VRtVJGR371pQphetJIq+hdt3QjP9K17YpgZzkDOMVm2uNicc9K1YU3H1oSRMmecaw4fxFekdN9RIafrH/Iw32OMSGoo+O9dcdjhluOuebbHuK6XTB/xLoB0xGtcxct+4Huf6V1Fj8mnwe8SmqQkaEZ+XBPSqdyjS20iggbz1JwBVpSu08nPFVbiaOG3M00fmRoQcUwavsYZ0a5cZiktpeOizAn9cVXlt7y0m2MskbAZrol13Qp0AuLMBsdWgB/UVXu5tKumSK1eZlwSqRuQYz/FkntjmmrEOMuqMj7VfBlXzX69+a2bW6uZNQtY38tg0qgnocZrN03T98gmnLnPRe1dNa/Yn1C2VXiLbxtA5IxSsQ3bQ7C2BAGe/NWycJn3qvAi9Qc1YcbYieuMmn0IR53YtmG4OOob/ANCp/BGCD1qLTc/Z5TkdP61KevTipN47EsPLoBzzSatxGnY1WuZLmJEa1zv3jIHpVHVNcu4tiTW6H1BGKVmDFVGckKM49K7PwemLKUn+/jn6VwkOs27kb4HRsdV5rvvB0sc+mu8ecGTv9KaQmdLESPzrkfEoB8RwD0hH8661P61x/iNs+Joh6RL/ADNNijuUJTmVj/tE1BcxGaLaOMHOcVO4BYn3NKmMNjpijoW9jOvkzPbkDheTXSaEPNuXfO5Qa529XMyLn+Gul8KR5gPGcuf5CpdkiVqddpelPqM0oOPJXiQmuC8ZaXceHbiS2gmD2knzxkdfpXouq6hHouliKC42zMM7ccyMa5seHd8U11rKCW6kGAhORGvYV5FTG/vLvZHrUMNaF29WeZRBjCpBZi2OD6VseH4ppNSGOpP6U6905LOUpFF8lb3hfSpplL+Z5K9M06mLSjdG1Onyu7Z2WgXZtDNZS5Ei4Kj1BrabW2jaTAysaM31xXIXgjtZkntpZLhov9dITwR6D2qxdXhFrvT7szYP0INedifftOPXc6ZQUzs7XVCY4Q53Sv26fU1y/i3w/DiLWNPiVdkgNzGv/odLpdwbiZbhmwBHlfZc8Afz/GtmxumchCFYNlWB6FTwVP1pYfEzpySb0OapRXToefafP9stzcpDuYkJIkVqWCn8+vfpRJcKdTNiBGGVd8mIijBsj5SpNT67o82lam8ccMRspPmt2Mfb0/CqVpC/2yK4dYg0jsMomDtBx6+1fRUm5pSTMK2Kp2cWtbGPdjN1MO/2hqq6iqvaoHVWBY8EVakbdPKe5lbj8aju081EU981ueWtijDptvIAwUqViOCDTLfSRZQWTnklmAJHbFbVjal2ZdvybMVPq8QjSwjHYt/SnZkJ6lqx/wBagxVbWNOXUAFjuQZUffhiPlHAK/1q1Yj94Oe1ZUlwF1WR0OSrsCN36YpqNzXY6NlVLX90fkVOAfQCua0yyf8AtRXlcIoO7B5z6V0U0iJZOw5UR5GPTFZNjfQvdW6Ljez4Jx14znpUWXUdzR1eOZrGTym2txtP41meH7a8W7eaaVmjCkY3ZGcit+5tLy8tHe2tZJVj+Z2RchQPWs6yvIiQ0LkoRtHy9T3NKUuVGblZ6C3gd71XIC+WvGRnJ/wq1YTo0xjaEDd3AHI96pXil58+YoViF4HUgcCo9/kSAuJC+cMV7+oArknJ30MZSakS63ZyXc0YtpjFtbJNa9rGIw0TqQexcY2574qGB0KByxaNSAQPvLTpdS2yybGLAjHlyDGPWjne4KbvcztR0i3vLkvJvW4i4RgeK17FRJahFwGGQc9/eq8d/wDaAHEkJG3aUKZJI6CoLq5mgSLykVQetT7SXNcXPK9ylrel3YufMAPlyYX5ealgsSmnpuYhh936VYOqP8zDezkgMgY5/wB4j0qZrJ1JkEhaJhuKHgrn0NdEY1Jr3dGe7k6k5NoxoIH2ugCllPzgDJz3rU1CeBJoLaWNifJT5gcY+WsN7gJcXI3lXD4BJxjtmtbWZWiv4kxuUoufypSi7O5yZpFqs7hE1ujASO7R5wpDfzxWpHqCKInh+ZVODjnPtWVAkECi4JcqxK4Kgr070sTgOkkKLECeCGzuHvWCWp5cW0bhmjkbCvyfbiqGowq8IQqGBPIFPjvW8zyinzr82ajnuCkgQ7QzJkgDHStY1mnqaqo7GZpkS/aY3C/8tMZrfuI/MieNTyw71nWt4RcGOGCMBiTgDvWvbJ5kg85dinvmto1k3qjfDyUppNaXMg6LuJk3fhWmlk2xeG6f3jU18IoZJBC+5R3zU8RPkp838I7V2pRaue5UwlJPY8V8KJuS7wO6iuljUgenPaub8JHEd17lf610yEAZrnR4iHDPQHmk2jb0708DkHPSkYELwaQ0cZK2JpeMZc/zo2qw55NNlDGaXjqx/nUkaHA+U81yvdnVF6FuzZTgD6Vq26Erg8VmWqlB93vxWpAGzyufxqbFXLtqh243Vq2u/cF7etZtqjhckDGcda1rRHU9A3IxzQkS2eYanzr1+Sc/vW/nTEHIp2oHOu6gcf8ALVv501D0xXXFaHHLcS6B8kAHqf6V1dqP9Ctxuz+6X+Vcldn9yue5/pXTWsafZISU6oORTEix5jBXUAkgHoKbI4azkDlVVlK5bpyKzZHvoLkmKTERXgEZ2k9cVZvwF0rcZpNvAJIyaY72dzMGhakqjEIbIzw1V447qzvEaSFlw+ORxWnFrFgJbci7niWPqHTJb/61Urue5urqSeGVri2a4LKqMSEBORkduM0uVJ6GkazaaZ0f2JZbOSBWMe4FQe4zVrw/4YewIuWvA2G5Xb1Fcdba5e2ikRyrIpkOBIua6bSfEKPYzObC7uLtQSxhO2NT/CNv061omluckk0zuYlXzN2RwMdetTyugtpJN42gEbveuAj8RGC4XfC0sJG4IGwfzqe58SS6xJcrbWclvtQkfPwPr+FZybvobOMEtHdlXTfltpMjHA57dal43VFp53Wb/hU3Ru5oCI77Rp9ph9RiaSJjhQFzhvWqGrS6NfRPJA0iiGPcN5KhTnpznPbH41NfWT30aqkiR7Tkl6xrnTpbKZkmeNl5U7GGenpScmjWMIuN09SpG8GQUEbc92r0zwKMaEGKhQ0rHg142ojAzlvyr1rwJGU8MwnDAFmIyPeqbSMWmdnGpwfl71xXiBgfFmP+maf1rt7VWmiLo42rweep61wuusreLJCrZAVB1/2aL3QldPUqsec46mmS3DQYxHvUjk5p2DkcjNRzz2aRtFdPKrSAbNi5DAdR6Zo0KavojNvNTs/tWJFkVgOw4rufAxiuIBMhzEhLE/lXmV2LB7yQeechtoB9v84r0/wVpYfw3DbMxWC5zLcODz5WeFH++QfwFc2LmoU227GuHpuc12Ru6fbNql0/iCdP3aHbZq3cd3P17VbuSZLQq45Hej+1obS+WAxhYJAFA7D0puqXMNojyO+Im+62Cf5V8vUm5y2Pa1RyOoWJdyduc1esICIRGDtUjLZok1CzaRQX4Kk7iCFIHXDdKSLUDLMkVnaSTI+396PuDPbPr7VdpuNg5jbSJItPK7S3mDbjHbvWNPC66XJBk7lbKE98cjP4cVdaDV7mZXa5t4UXg7FzwVJxz6HA/M1BZabdxLN9rv8A7SWwPZBV0Yr4W9xxk0XNDl+0RxuAPmVSwHbAx/StzT48EDPANchaXDWcl1ZfckbO31+ldFZ6gsNubiUk44x6nFc9WDjJp6FTT1aNC6WC9tpLO7X905+Vx/yzb1rjJbWSzvre1mXEkec+/JruIJdPkt1mmmEe7tIdpzVTUrCw1KJZrC4jkuLfniTJYY6Yr1MvxThLkb0Z5eJopq6R5OMFmJ/56Nj8zV2CGFtzzk7FHQdSaoQnIzjqzH9TWrYJHzI/zMv3B/WveXkcK0Wpfsrcx27D5uOOelUNf4ubFPQMf5VswK4VgykAAYU1i+IP+P8AtfXYT+v/ANamZXXMWdOGJM+1ZslsZ9UUhBsDMzZOC2Owx3rQsB+8bjPGOtUWkVZ2YlzltzADnr95fcHr6g0Xad0dVKCbSZMsl8098XdmsI4FaIleDnOcGqWmEveQuAQhbqemcGtOzkR7K8tlGI5DlQRyrH7y/Q9RSadapFMhdtwByoHb61lKSvvqTOUYto0RqFxbQSQrM6Qv95N3DfUVUhZFlyQqoRkIoGV57Ck1KNHSNCM7nGBVBIXjlfZ87RdamUZbo55J3ujYsLU3UN4XMayZ+QOcVWv5280wLtyO9Voo3lkKxg/MCfXioJ3ZfkL7XX5uRk+2cVjKT7ESv1NGG7mt7fyHQEMeWHWpZ2+1rI8SKxKgHPDKfr6e9Z+JwvmKQxIwEU8N6nNNjuG87Y0JV24ccg475qGkyNwt42t523kgg5Pc56jFaCzGSVm27g3IB4BHtVUFYrplAJJG4nrtHpUNzHOpwjKpI3bB/CO/Ppmly3HYvf2riR/s0KhgAPetmxlSVCkjIMDPNc1a3IslctiSY8EelJFqTvvxM+ex44rehU5Ltnp5fjI4ZtvqSa1pZt7t7iBw6Sn5h6U7XCz6owiYDCDkfSsz7ffCQI947qXrY1byRfy5PPAJH0qptNNpmeNxMa8nKJVtDK0MkJBIbtTftDI2xVC7QFcYqTT4pYlZ/NUqCeQajmV0u/McFRJwoC5Xp/KueTd7nnGvFKjo4lOHAynvVNvMSRJ5HRs5x349DVR2nhtyj5GFG0gcE5/SrUEFxLalCjHjKkkZDfTvSSuUWrR0iuC6H5X7Z6cdqu+YWbAaRlPOXbpz6Vlxb41Be3ZdgyS/HHtVxbqG42pB/rCMkVrTaUld6HXgNa0fUv3Nv5VmZy/LEcfU1e2Y7j8qwmdiwiZm5YDBPvWs5G9ue5rvV+57+Kk4ztc8d8JgeVc9/mX+tdMoBAzXNeEv9Tcf74rpi4Uc4645NQjwUSADnFDqFHPbmmB0zw3605ipU5GeKHsNHFu/75yB1Y/zqxEeAc4qB8bicdzViNVKk8jiuR7nSnoi1blRjB71pxsSox/Ksy2QDDE8YrWtsHjJpDRbtwcdSSOa17VjhGJ5zjFZ1ugPViDWlCNoXcgPPUU+omeVXh36xfepnf8A9Cp6RITgzKD8wwe3pUE7Z1G7P/TVv500M7SbticNnvXQr2OXS5JegLGq7QTu611lmoNnD67B/KuRu2LKpxyfyrsLQgW8eOPkH8qtCJXhRiuV6e+ap6xHjR5QOOR/OtBRuGCc8+lU9bQtpu0HGWHNA5bHCXXykDHB60yNmVgdxHrg9amvUZJdpGSCRnFGnwC6voLdn2rI4Un0zTM+hraXp6ak2xP4Rk1uI66Jb3MDQlzN92QP04qhoVrLDqM1va3KgqzLu27sgHFaV7pss4Z7y5D+WudiJgE5wKFG7M5SRSiHnss6QsVkUoU3gEgcAjPfqfxrd0yTydFvbc25ZljO6fzBjkdh/SmvpItLOK5FwI1XA8rG48+lWtQjhsvDcax7Q8zNlkPJULz+OaGrCTRR0UbrN+c4C1oLGp6heevFZ+hnNrOMd1/rWoo70kdETH1i7isFhV4fMSRuRuK/rWPc3tnd3D3JmETsSWic/eGMAK3b8au+KhmWwU9y1cpqIVHQYHTpT3Qm7O6NSOythHBNNF/o7t/rEnyeOoxXomnahZRabFF5vkW8KLt8tyzYHODgcfWuRtfD4vvDtkPOIIG8DHUntmugitJ2tUtitsGkXagibggcbc9gPWocbonnsdDa61pSWkri+aNlZmSPLZb0yemT/LFcrOwfXrhsbSWGRuyc7e5qeDTozf2lrIsQDKzqwfch25J9+1RMhPim5GV/1h4HT7o6U0rKwJ3dwUHb0/OsvWbbdtn81UxwAe9dHs4xj8653xHMyXkMG1WQxbs9880NWRom9LFHTvDd3rmsCC3tw++T5vm6CveVgj07TUt7dSQihcqPSsTwh4X/ALA0aHVhIP7QuYg7iVc7QRwoqDVpr6++Z5HjHaONSa+dzDE+1moJ6I9PCUGldmTrM4ZW8sldpyC3rWvpd/YajZ4uXVpY+DGRnB9a5GcS292qyM8jBgPJBySc8AkdPUj2qDSJXjupWzyJWLD8eaqlgnOm5J6o0r14wkonfi2hj3GKGNN3XaoFTJEEjCHo1V7eRZYlZDkVc2kndkY715lTmjJxe5rFpq6Ks/mRMCCzY659Kr3Uha38uMctzk9MVoStvAwpOBnIrNmWPdscHb1AzUwlZ3XQ0gk3qZmpqzCK9Q5lQgOR3P8A9etPR7qKd2lll8uFDuaPP3j2471TldA5i2bYmTaT2U1teGtFEDC4nU8cxAYOfeu/FKNSCmvmXzcqaZbks2dnupLP7RKw+QzLhFHYBc8D6mqB1tIJFhvIPscmcJNBbKQPzz+Yq7rWrTW/mbESVBxsJwV9iR3rjbjU31K5itLOyFs7tjIckj1OewHUn0rCjB810c7ldbGVrMum2Gsywx38IQ/vFLZH3uas6Xd2Z4S7t2cnHMoH866d/EfgzTo0tk0uPUpYVCPOIFbcwHJ3N1+tVv8AhO/CAY7fCcRPr5UVe7TxElFJRbOGWCqPW2jEh2sjkSLJjqVORWFrxH9qQgdov6111p460iTD2vhK4I7NHCoA/HFTv430MPuu9A8pgOspjJx9BmtFiKjXwmH1Kalc5KxI3v71ieei6h9n82Rtz4VyPuN2I9Qehr0mDx14ZuiotNIaeZpBHtESjn3PpVrfYPKWfQtMaUsCsUCG4de3VRtB+poeJa+JWK9lKLucxeW0Ntoe5flkjw2/PfuKw9PvIb64Ta7GdQfk216rDpUl5B5L6LaW0DE5WVFU/wDfIJ/nUq+GdIsIWdkjgQA7vKUJx7miWIp2u3qZSoXd2zzC5g34SeVbd4m3KWcL9M5qEXdrYxzSSXKSE9dnzc/QV6FpOm+C9XnM1lbW8srs213dmdtvUqWz09q0l+H/AIc2MiWbRq2c7G5/Os4Yq0rS0Q3Tio6Hkej31tqt1FZW0dxNM4w5MghTHXdyC2Pwrd1/RLHSJrWPUZ440mKqs1mC/kkjgPuxnPtXeab8MvD9hK72D3EDOdxyc/zpdX+HUOoySNLf71k/5ZyIMV1p05RuZKKTOBtvD8CE/ZdYtpSTkJIpXHsSeBUB8OanHcSzmGKdGP3YX3Af1r0fXPB5vdCmtbOCO3vimEnTGM8Dnj0Fec3vgTx5ZWbi2uYZZFXhowVYkdueCKzSg3qJ0U9UR2umXUjmUQsq7inlzKyHjo3I5FV5W8iSTzBErkfKQQQR6jFaWkt4t0+xT+2o78T5JLBBIAAeAQM/WquranbSrEl5psDuzLkorRuVLYbp0POeazlTSlvdMqVOLhorNGS7BsvtWQsSC6Hke9RQh0cK6YDdXHRq3TaaOc/Z7iW2boBOu5QP94VQvNLvIkM0JSeADJeJt2B7+lHK+mxy8jvqZ1jKPPSJArfOV5HP3u9XtfEa6vcAOynPGOlU9LsHL2sqyLgsu4EEcFgevrV/WWgfVLsS7t2SBj1rS1oFSg4LVFG0mczLDIo+cZAcVpGK4mcJA2FB3AZ4rOtfKnmQOF3qu0Ennb/U1ehtZvMHmO21gcENhhj1FYyV9TFloXSmII2DN2LLyMfzpiyPyZpPmHK8/wA8VX8tpGLlirqMElcj2PtTLa4mlmYbljdeD3B/Cko2V0BqLdRu2C7MFXB3ngH+tQTXEOn4a3cHcc4I5XPamWqM4eJ4nIJ3ZCdDUk2jS30KNE+xlJ3h1PIrSC7o7sDOMKibWhFZXputUhVmBLOM8V0rsN7fMOprnLTQ7yz1CCcmJ41YE7etb3mH+5+td0Xod+OxUZTTieS+FDi3uCe7j+tWvEJZreCNX2FpAM54/GqnhbP2eQf7Yqz4iD/Z4cJvxJUnnvYx3hu4I5WN6yqrFRksCxAzwPSuus3ZtPhZiSxiBJP0rkG1K6KuHt0O4YUbD8uRt4/CuttMppseQciID9KFfqNHLqhZjz1P9anjVsFd3BqumSc+9WYXbJzyK52dCehchRgMcVp26sFBLAGs6Fx0ByavxvkA8kUmUjThZwE+bryRitGBnLYJBwKyoHOVOMAcHNacOAxcEYqU9UD2PKshry5I6eY386lQcjrUDEie4YE/6xun1NNWR+u5q6eaxy8tya8+5FgnrXX2vFsCR/CK5C8ywgBxkiuvdXSAIhTjGSTjsKpNtXC1gjnYPgoxJPXFGsMqWWCergfzpqm4AIG3HYhqZfljYIJuMv3oTB7HMXyYYHcpHsaqKrq33c45yCKv6jFESuwR571nvD1I29Om4UKV9iXB7l+wDm6tgHeMM+NwOMVrX2rapYXz29resyYABIBNZWn23nXdpE52IzhSfTmur1Wx0q3uUCvaRsFO4mbJYcY2+9Wrsykl2MCfX9VMAhnlLIfurgVqi5v7qxb7crxpHbsIUKBfl3AH6/WrOrHRILZjG1lM6wALtbcSxOMjH8XHX3rO+1faLF1R1CJExChuFBYYGPwoewl6GpoJ3Wcx9GFavU1leHwfsU3/AF0FaoPNSjdHO+Jo3kurEKvADZP5VzOoQy7gduQB1FdJ4jMh1CyVWO0hsj3rAvriaCTykZgrDkU+hL3JtF1E6de2886yyQx7sIp74rsLTXIZnW/Fpc/Z7dNhBwWzzliRx3H0rho1u3CMmWO7K8j0611VhdxP4TKGYSXMhIcbe5Yd+nSlclpLUlstd099YhkCTLFDAyIDhnLHdzx/vVJazpc69cTR52MzFc+lZKQQ2MqzeWIwO4rR0FTNeM68g7jkii+o4LsbqjeAcVNo3hUeIfF0Ruk/0W1hEr46udwwv48/kaeqMijI6dK7zwpb/ZdLluBgPM+dxHYdP61yYyq4U20zpowvLUsavcm2t9ySyRKO+zcv41wt9PPcKyjV4yjdEDkEjr0x/Wuj1+9mWX93cA8Z2MvDVwFzepLM8k8MMW4FSEyOMY9a8DD0JVZ3PV9qqVNmbfpPDfW6IXVC45DY3Enr1qxpWWnuGxwWb/0KnJZ2CBHeUzOSAkbKVDHpnP5UaSuFc9M/4mvpKVNQikePUquo7s6XSZ2hUsOU3YI9B610CzK0XDDA6EVz2jZMkqZ6gYH51Zkn8uTABC+g718/jqX75np4ZtwRpTPKy7Udk4zkdAfesi4GoSzBGUsxOAFHWr9kXu5PIIf5j1UZA+tGtrc6NZfbLC8YXUTBgqjiQen6/pWFHC1JvRHTKrGC8zQg0xLGxiuLu3hlmPLh25T2GODiqlxqKWll5cmPJVx5UgJLLnkZ9uMVzf8Awl11qlmJLm0MEjMUEiDKMfT1B61FHOtxILK5cLHMpRSW6Nxgj15rqpYWopOnImNSM43uVZ7qWRplWRtjvnGeCfWso+I9OsTcwu08kkiGNmgZRtB6gMeh9aS4W5vJp7Z7m302NHKOZ2+dyODhRzjgYqsll4V04EzPcajKw24CiCNSe+7r+leth8Co6yRw1a7UrRKqeJdMtz/o+jrKegN1eM2f+AqAK07bxJ4gnj32GnabYw/30gCgf8CfJ/Ks59UjicjT7C2tueGCmR/rubp+AFV3a4uZFeeVmZu7NXeoRS0OeWLqdW2bH2m4nl36t4jcAnlLdWY/gMAVO+raLYwLLZ2F7fzFuPtku1f97auOPYmsJrRycj5gVPYkfmaR1KQ7SOSvABBz+VDt2F9aqW0ZebxHfySK6LFbx5yIoI1VeuefWu2i+JmsooVEiQDsK88hUedEvTALAEde1X0YYHHX+dY1KEamslsS8RJrVndf8LK1sjjyxn1Wqd3411bWQdIkeNpL0GPbtwQp6/pXINcszeRa4kn/APHY/rT7K3e2uPtUNzNFcA/LOuN31qI4SkndIh1ZdzsTrcmgeJ5UsIojHp1otrl1zmRsM5HvjAzXV6P44v8AUL+K1nktbcyjCM0RIZuy9eCe1eYjiCTLs8jEs7MclmJ5JrX07SNU1NEFhas+GH7w8KMe5rWWFpzd2hRqSSsexPd65BC7rc2LbVLEGJh0GT3rhvFPxTv9DazY2cVxHcQJMrAlcbhmuy1O+NjpU0/kSTuEwIo1LFjXi3ju3a48DaBqRTDLH9nkz2Kk4/kRSlhYctugvaO5tt8Zbg29vP8A2ZG0cmQ37zkc1tR/FAxqfOtJY8d45s/pXhtu/m6TMveGVW/Bhj+lb8EyajaRhyQ4VVb3YcA/kBWLwkOly/aSPXofi5pLv5Ut3JG4OCJYq0x4u8P6lGwdtOn/ANlsD+deF6rpUkwa5j2tKo+Yf3qw4/tCgiJycjOPUe49R/Ks3g3upDVVW1R9HT6Z4Z1C18/+zzHuXKtE5A/Kubm+HEMt99s0zXbmwuBjCyYZD+WKr/DnVftmhy2EzHzrXqp/un/69SeNta1PRtGjurGby2jm2SkqGGMcda4Y1KsKnIbcsXHmsWp/AusxyRzwC3uj5oeUwSenfmuU1fTb+DVp3vbGeOMtkOyYrIX4q+ILCcF/s88R5G5Np+nFb+nfHV/9Vf6cxTvtfePyIr0OWrbVGE7TWrMWe1RJVZXJJOexx9fStGw1FyWDrujA6nrXTf8ACReAPEMKvcj+zJXIw8fyFWPseD9ap6h4Z+w2732nXSX1hj/WQnlfqKhXtZo550mlcyRqccrzRGFQ7KQJd2A319KqxebarsmwVbgMKbcabJImYgBj74PaqKXcsbGF1G0HjNFtDO2h2lneQ/Zep3L1xU9vqIeZmQY28HfwM1iWtxDaQAkbjJ+lbGl+VNcXFwSqrIoXZ9O9VCbvY0pyexo+cJEjVlKkkZH/ANesI6lLk/Pb/rWo7JEw2oFO3IIbI6elV/skPoPzrri9DSzPKPDOVgf/AH66JiXGG6jrXP8AhkfuGPQFq6IuxGW9MfWgroRiGNicgHPtU8xKWsoHACmkjHO0kYFJdKPs0qg5+U0FI5ENzU8bEDmoUQY6YOO9WIYGkDbUJPoBXO2k9TZLQtwOMZH51owP8p+lV7bRr+Yr5dpMQfRDW7Z+FdVfrZsPrxWUqsFuzRRl0IIWUjJJNaFscq2eFAP8qv23g2/Jy4RfxrTi8HThMPNj6CsPrEE9y/Zto8KALPOB13nv7mnCKTOcDn3r1mL4Q26ZJ1GcknoI8VYT4T2KH5rm4P0I/wAKuWPorqZxw0medw21vOsBeIkgAEgZPFaaQpd3gika8CAZGFwB+NegQ/DewjAxJcZH+2P8Ktp8O7HdlpLv6eaawhmNPm3Z0VKF4JLRnJW3hnTJod7T3JPp5v8A9asrUtDhtkXYk0uGBXfKePzr0xPh7pezaVuT/wBvDf41IPhvprni1Lf7zsf611LHQaskzlVGSavY8zgd4RswyDGCCQ1Z2q6ab+3ZvOjLem3n869jb4aQyA7YSn0NUm+EQZgVup0I/wBof4VzQqS9pdJ2O6rUhKlyq1zwOGJRcQoEcMH24Ix0NbJ8MifVFt5pgivF5v7tc7RnGDXqs3wOkkl8xNTljPUDbkUlx8G9aeczJ4gbeyCM5jxxXpwqq2qZ5EqT7niQ0h3SZ4WUrHgk98E4FakMD2Vi/mMrNKrIcDkbWGa9H/4U14ktoXht7yGSFyGZfMIzj/gNRaz8LfEMahbOzlmRUAwZVJznmtFUi0RyNM5zRflspveX+grTUAHn0p1t4e1bS7NlvNNuIW83PzIcdKQAhuetCaexok1uczr4VtasQXYDy26CsPVkRbiLLtyvpW9rvGrWR2jgHk/yrF1YOWVhHkUGbLXhz+xhNLJrDMIuBHtB611+m3Hh9yjxukUCh8tIhKgfwjHrXEabps0+6WG1a5x94IeldlarFp9rcT3OkyRPjfGmVAQZ4+tVbQm2ppTnQbxwiGN13jOUI3fh/SqumQxw3cwQYG5sADAAzxUNt4osLu7RBEEDOqgE9yat2EL4llkRhuZtqHgn5jz7ClGFyqcW3Y1Yked9ijljhR3Jr0IMLTT4YU2gqgULvCbsD3rlPC1vHcT/ADqvnBsqcZKKB/D+ddDrWmxXNjJHEQJwuVG4ncfSvIxzlOaglsetShGMVqebeIr/AH3hS4Mhf+4X3A/SqKXduQm+2Y7ugJH61Vv4p7nVBA8Rg8oFRuHI71FqNlOqwI77wTnPqa7cNQjCK7nDiKjnJpbI0Z7pIrmM+W8pV+AnVePypmk/NC7d6ntNEmt0jmuCyhuVG7kUyx8q3tXZyFVRkn2rpSaRgrC3etJom24ILPnhM9RWrbXkOuOslnubd1MfO3615nq2oNqF083OzO1B6CvffAHhnwsfCYm0eOSWW4iH2iZ5T5mepXIxjn0xWFbCRqNS6nRSxDhoZelXJa6mtrPizt1w0rfxt9ay9W1AXR2KxKZxnsfcVU8V65HbzHTEAs4oOGiVNpqHSotT125jFnZzSKoADOu1UH1qoUVFWRv7VWuzDe4j0qzM7xSSs07iJAfkU9zjoDjv35rk7/VLy71KK4lm8to3/dov8ODxXs/jTwSqfDmURPm/tHF0+z+IdGH0A5/CvB5gVwpO7BxnpW0aSUuZ7nM60tlsdn4qtF1PToNdtPlkxiT+QJ/l+Vc3BiVMogHY8/d9fy61veDNQE1vPp1380Uoyuf1/p+VYuqWr6bqT+YQxDYdB/Otpq6ui6yTipR+ZJCFkZYy7Ox4IQYz75q2ztE5X5UYchUGW/XpVaCeONfNd9iN8qqnLEfh/wDqpkmowB+SYUxyAMufX6fhWZys0nll2ozBRyCpc7m/AVXkZnVtxO0nCl2wD9FHJqrBqtq7eVGrxBlwHKlmYf5+tWIeH4BR3GB/FKw+vQUxCxk+bHkcFSOVx/8AqqWWWTHlw8yHHP8AcHrUAZVbaAMq/QEsAMevep1YL8oOeep7/WhAOjRLSBYY/vyHk9ye5rVsba4vZ0t7SF5ZW6Io6VqeDvBVx4id9SuJvIsAxiQjl3x97Ht2zXruk6NYaRbeTYW6xr3P8TfU1SVxtnJ+H/h9HEon1lhK/X7Mp+UfU/4V3MUSRRrHGioijCqowBTu/XNGapCuKjYdTnGSM47815f4os1ufhjqsTfftLiV19iszf0avTyfQ81wWuRhvCfiyADhZrjH4qjf1oewXPCNGPmPd23XzLdto91+YfyqbS7kxXGD908Gq2hyiLW7Rj90vtP0b5T+hpyAxTPHn5lYr+IrF7Fxeup3sJYRq5IwwzkVhatpzQkXloxXnJC/wn1FaGlvM1pECN4Cjn2rSCbwMgMCKSZDVnZlbwbrktv4xtZJSFhus2rfj0/UCvQvE2njUNNu7NgP30fy/wC8vI/lXmV3phs7d57ckeWwliYfwMDmvWGu11DSLbUYcYkjWXjtkZxXlY6PJUU0dWGleLiz5+eye6JhRd0hyy+pOMkVihWjkGRyp5Fepazoq6XNqVxb5DpP9qiA6bSckfkTXKalp0E0d29o4kaMi4UjrtbqD9MV6dKd4p9znlpJozrNhcwSQdiOM1PofibVvDF/5lhcuvZ4XP7uQe4rMs5PKugc8HirepQeYRKo69TVtXWoHpEAbxRAt/o8nlQMQJ7fPzwN1257r1Kn04rLuIxZ3s1s7mTyzwxGK5Lwv4hn8Oawl0gLwt8lxDn/AFid/wAfSuw10SXOpm8tT5ttNEGSUAkMGzg/l+RrCcEg5U0Qya+LZfKClG/hYjr+FNi8SrFchA4QkZJPSsKSzuCw3vHnGMl+cfjTzbrLcTLLcxIVUCPDg7iBwuPf1qOWJThG2h3FhrbXTyRkDckbNn6Csv8A4SqX/n2T/vuptIsbZLiZ47h2/wBEKTFnDBWP3se2arnRLXJ/4mQ/75FaR2M7HN+HCRbn3Y/0rot4IwDWB4ejzZAnsx/pWhPdrGpAIGO9aGqLj3McIBbPHYGs2712II0QjLZ/Ksq5unnY/Mdv1qlsQcl/1p2E5WNKPWBbDKWVuSB1K7qefGWqqu2Fo4x/sRKKytsCgd6QTQI33AfwqPZxe6H7WXRmmfGGvuf+QpcD/cfb/Kmf8JJrLctqV3j/AK7NVeC+t1IzCn4rWtbXlm65Ajz6bal04LohqpJ9SifEGqEf8f14fcztSL4h1YH5dQvB9JmroYCkuCkaY7cDmnyh40YCJCSOOBUckL7Bzy7mJD4n1tDxqt8B/wBdmroNH8WeIZruOFNXuGJPRzu/nXO273iMSbZGGfSu38D6c17fG4ktwm04GBU1aNNRbaRVOc3JK5654djurm3VrqUufXGK6iOytwAdgJx3rG08m3twBgADHSrH9oyjowwO2K86EsPT3Wp1zjOWzNmOGMEgIox7VHdRyiE+TL5beuKp2eol5WR8DjOalutUtbe3d5pdoXn1r1MPKnNXicdVTjuYF/ofiC+GIfFlxaf9c7dCf1qgfAOpyH/SfHGuOT12Mqj+VasPirSppikVxyPUYFaserWLIWN1AAOuZFrq5F0MVJHKj4ahjlvF3iU+uLwD/wBlq3YeAU0+5E48Q63ckDGy5ug6H8NtdB/a+n9Pt9t/39X/ABobWNPUc39qPrMv+NLluHMhEtbdAEdFbHGSKDpFg+WCSKf9iZl/kaz73xJo0XXUbYk9NsgJ/SnWGuW07fI4Knoc1jOcYOz6mkIuaujRGlxopCz3AB7GQsP1rI1TwlZ30bb4Imc/xAYP510Ecyuu7PFY+o+MNE0t2S4vVMi9Uj+Y/pWijGWxDk4vVnj3iz4Z3w1CO8s5sLH0jcZ/I1iw/C3xTrxXyrTyYSf9bM4VcfTrXpet/FKzEMsdvpcs8ZG0tI+3I/CuJ0fxrr97J5UOt7JlbiOeVt20dlwMHHTvVclhKabN/wAKfB3VtGhuVvLqwlExXOGbgLnHb3NampfDa9azuWe8sI8oVVn34Vew6VzMnxE8beH74Lqtza3FjJkpL5QYrzwCRj6Gta1+JzXztPqeiNdwhhs+zvtCj12nr+dNJBypvUp6L8KJpbyfUdkUjk/u2cFYx9PX8q6C3+G+svKZLzULPv8ALGGOKuW/xl8LtKIp1vrWU/wy2uP61s2nxI8KXhxFrNsGHaUmP/0ICjndrGim9kYN14ZudB/0gXKFH+QiMYPr/Squq6hJYpB9mClQPMc552ggYHr1rTvPEdpqi75LqCSJHBBjbIVDwWP8q5+fUo7ZIkYqzRMFyDyVLfKc+nIrOnSjOor9TeVRxp6lPUL6x1CR5I/OhGcOAoyzf4e1ZkdtbJl/tYKk9JB8w98DNdPFFYuHPkxbm5JA5z9ahl0+xuDzJIB6K/FetHDxSSZ5sqzbuc2k6ecYHR2AztfO0DI6/MRUlx4chvrZ4UmnjibG4hRzWubDSLIK7RRDHSST5v51Pba/p1rIk63mnnn5RI4x+WRTlRilohKbvuUfDfwUa4u/O1ybbZL/AKuKM/PKP9r0r1MWukaBYRW1tbRQRRjCLGu0Vz1r45jmGfOt5Fz1Bp8vjqzByzWRK8ZeXH5VxOjO+iNuZW3J7m/tJnaR4FuXU4Uva7s+gBx/OoI59WumeMad9mjXhN7BVb6Bf61lz/Ee2hbYi2DE9BHcFmb8FB/WqN54u1+/zDpUFjAzL987mZR7jgVUaMuwnNdWdNb2epLcp9uuLP7EyMsse0hnznpzjGK+b/FnhqfQteu7BMXEKtvikj5Gw8gH3A6+9eg3PgvxNqtwbnU/FLbickIrYX8K6fRvCr2UIR9SNzIB991rVYf+Yj2i2R4HYzzWlwF2+Xz/AHeRXSal5eo28Oo2yIzSDypiRkKyjjjvxkZPpXtUPhqWWHe8CyEMQY3hyCPZiM1X1Pwzo9tZmG70yNIbgjzFVdu/HrjFQ6SSaubU6kmuWx89BBaTMiFXRvlEhU4Vu/1/lVmPDMxjXzpT9+V/uL9PX9BXr954A8I39gWXURp6HLMhuFXdj+8rHdgY7frXL2/w8sr1jFp/iVLy3XLHZbu2QP7xVecfWsHC3UPU4iNgm50cMRw9w4zn/dHf0p5kELY2vufnYW+d8dSzfwrXQX3g/ULK+mjke3JjIWF937rH1OKhsfB11LORNeWgZsEF5NxkP8I+Uk7fpUdQMqJV3EZBMgwj4wWx3UdlH869G0bwmdH0R9YvoBLfSoFs4DhlV34Qn1POfauWvfDdrZBD/wAJNpc0rZ8xIywJx/DnGAo9K9C8DanHrFjaWb3C3B035y4BAfOQhwecDJ/SnFAzsNK0+PStKtbGPBFvGFJ/vN/EfzyauKcKPpUZOFzk5PWnA8fSrQkSBuaN3NR5wOKTdQBNnj6Vx2roDYeLYv7wkfH+9EP8K60NxXJ6mXaXxMijJMCkDPXMTf4UdAR81xu0cqupwVIINa1+QNUuHXhWcv8A99c/1rIPDYrVuWLSRFufMhUg/T5f/Zay6FLc73wJNDNZSW7xKzRtuHrg1vXdvE1pKI4sTDphetcD4SvzaakpA+V+G9q7mXVIN4MMhbPX5TTWqJrNaNEVn5d1amJ1BVsg1ueDiyeHpNOkbLWczQgnup+Zf5msLR7ae5efyo9yKcZ6da39C32+qX1u6bS6JKB6kHaTXBj6d6TfY0wral6mZry4WGTH96I/0rgbdXs7gyySbrW3ZreRdoyFbox9eK9O12yN1bTxRlQwKyqSPSvONThezvdRgnK4ltfM+Q5VivGQfyowNZSpKPVFVoWm33OMvYkgvJVjcOgbKH27Vf3C4tQMkFl3g/7Q603U7AQWFlMn8akN79wf1qGxkIhPJzGwYD1B6iu0zuQ39pLbMDIu0soYfSuu8D63JKv9iSsWVn3QDJ/FBjp6j3qne6NeXMEDWuZ7cRb1/wBnuR+dcvDJJbXSTQsUeNgyMDyCORQ43VgTPWL1J476YW2n7oOi+ZcAP+Kkdaqb5Or6RI/HJCxsa3NOkj8V2KaruQSyKBMAOd44zU7aARzv4rNU00NxW5hWVvBFb6lJFbTQO0ZydvDe6gdTWH2/4+r/AP78N/hXdR6a8cMqhuZMc+lR/wBlT/8APZv+/taqCJseZadcC003aTzuJxVO5vmkYnnGelQu24bNwVfejyIGHzXGPopNRzIq0itJOznAyo9qdDEz5bnpU/2e1ByJnJ/3KkQog+WZgP8Acoc0LlbK3ksAMqaYYzu5rQ83I+8SP92k2RvyzYz3xU8y7hyMpIilwpBI+ta+n26EZ2HPao44rZSGM2PfYa0be5tosBXdv+AUNpjV0aFrBKhUAYH0pbiAsrBpSCfSn280c5BLyAf7tbFpYWbEF2diee9NQQuZ9jn7HR2c5WSU+9ev+D9JWw06Prubk5rnrGwgeaNI1dhuHQcCvQLNfLiUBSFHAzXPi5KMTfDRblcuTSBIgueT1qoZeev50lxJvckdKrM+BnNfIYis5Tdj2IQSWpY89gx2tg4rlfEd1i3lV5eCPWtx5SAee1cN4slb7PLg5+U162VV3pE5cXTTi2YButO2ZEy59CarveWDHaWYr7OQP51w8lzKHZR/e44pPOZzk5zX0qkeMonb+ZpW3LOg+spH9akil0oAEPG3/Ayf61wIKB+VY/TFW45IFHEUoPqXp3Bo7f7Vpkfzo0Skeg5rrfDmvQ3CBVfPrXjLzsT8rkD/AHq1vDOqva6kEL/LJxz61x4ynz021ujpwsuWdujPZ9VIaE5ZsVyRl01pCGjAfuec10EVwLqwwWBYD1riNZZbW8zyFauXL8RL4JdDXGUI/EjYQ6dIGQJnHQZJ/rWXf6Bp7OJYppoHzkFKylu3D7o5WXPQ4rRsVvLiZT9ukx6BB0/GvWvc8tKSd0aFrLFJEbLUJkuc/KDIuNw9xUd7q1xoFtEtuitaxt079eh9qNY0eEWvmvLcPIPu/d/oK5Br+1nWSC4W53ngDlsn6UmludEZXVmjuYNSn1OZrmWez2CIhI42XjrwB+Prmuf0yFSjRzoDgDgj61n2XhXVXbzEs7r7P1yUrZtNKa0d2Kyhm/vVDnHZM0UJb2LAjQps2BVIxheBinSxGOzeYM5KgKBuJwAwI6n2p2wYBHLA1mS28YmnmAJYcjk/jWuHa50xVE3FnRQXjvC8YbGMN+FXDfsYOuGHWsu0XCsDwxUAmrMagMU7fwn39K97lR5rJtVudukzSOOkZJH4VwNpd+H4Y1Y22pAhR8ygD8c10PjXU/suhpaRn99dnb/wHv8A4UunCzktIsgCVVG5HTBPGM+/1rKesrLoaRVldnPy6joLvkjVG9AX4oTU9FBBTSJ5j6yzZz+AFdh9n0ojbIkKPnuMqfx9KbNHplku8wIM/dJXAP40vZyXUOZdjBtdd1BGA0zRIYM9HEXzfma00m8U3CF7y9FnETli5C/y5NNn1mZ8x2EOP+mhGaqJp91qUm67maQZ6FqpJoXqXV1poT5dtcT30ucAoSqL+XJqhe+ONX0fVPKhuJVuV4byyuFPpgg1tzJaaHpklyRxEmQPU9hXl8srzTyTzt+8di0jeme1Y4maUbF0Y3Z21z8VfFfkEQ61cx/N1by2/wDZaybnx94n1Ft15fvc7TkBjx+ArnliaZtzHag4GBjaPQD+tSTgLA6IMcZrhhSbTZ2pqLXc0I9c1KQSTz3Tk/wFQoP8q19H8R6mYZVh1C4iaRdsixysAy+4rH0rSJ9c1G10+AgFz85PRR6128fwnuIZTJaeIEViMfPbf/Xrjr4ujT92TsynTnJtpDfCmp2ek6213qUqLA0RVmkGeau+IPFuhRW01zpV7DNqkzFIigIEOeC3TrjhfTOapTfCfXLiPYfEdoynqGjYfyFUJPg54gH+r1Gwk/4E3+Fc/wBdovaSGqclujkHlQ/MQrKwCt2yB2+lbHhDxO2ia3HdFzsZsSKe61pN8I/Fe1lX7A3uLnH86rf8Km8XxNuFnbvnpsuUqoYinvzIfs32PeobiOeGOaJw8TqGVh0INP3/ADda8v8AC1h8QPDbLbzaLNeWBPMYkVmX1KHPH0r1BYLh0VjbyKSASCvK+1dMK8JdTF05IUOSOaTdkUeVMBzFJ/3zTQr9Nj/98mtVKL2ZNmPDHFcxeN/xNPEAJHNvF/6LeumwwOSrAe46VyerXNra6lrYmuYopJLVCEdgpICMOAfrQ5JLcdmfOc4xM4xwDVyRi1ran0DL+Rz/AFqrcIzSkjn6VOhJskU9VkOPoR/9asnNByss6bctb3kcnUA/MB3FdnDJGbg7UCs65UAttH+6PX1rg4yVyS2D2Iq1/aV4CpEzbkG1T7UKasQ6bbPRbWeNYoonlkSGV1LOG5DAep+uK27GYW3iCxgLy4kidB5i4YKwyM568rxXlEN9dyKIzK2zsK6bw7ealqHiTTEmZ55o5VXH+znk/lmufESUqbVzekmrJnpl0PNZMsV3ZjJHbcOPyIryTxgz22oRRlmOIvLy3Oc5B/z716xdnEMhB+7835c1598Q7CWW6t5IbZmDjO/sa8vAVOWpbudFaN43MfX7RLfQnhWQSiCRdrj+IYH+OK5KzcRzHdwjBhXSm11CTw7LaPZzmdmBXAGCAfXPtWV/wj+phYcWchL9srx9ea9xtHHZmvoNjdapamWytnleFgrFJ9p55AK45GB1rC161ktdWmiltzbucHy2Odv41qaNB4h0qdhb291GP4go4yPWs67stTuLiS4vYbgSSMSzSKR/OndWBJ3Oo+GOqSR6wdN3ZjuOxPGRz/jWBq19fXN/c6g93Jl5W2nzSpAzwB+FZMsMlrIVLYYdw1I1xLNCsRwET0qdFqirM29I8cazpV0j+ebiEdYbnMgP4nkfga6H/hYz45sEz/10riIND1K6jLW1hdTr1LJCzD862R4K8S4H/Epn/IUc6DlPYYvCGkL0062/74zVuLwvpQ4/s+3B/wCuYrqA6H+EflUc13bxL90E+1fExr1pPRtnvOEexiL4b08LxZW//fsVIPD1jjH2K3/74FTPqN074gjiAzj5607O6gAzdzKG9ADXZHDYmcea7SJcY20VzIHh2yPH2GD/AL4FOHhixY82EH/fArqbe90tvuspPvVz7dYxjhV/BapUJL4p2MZTeyicZ/wh+msOdOt/+/Ypw8D6Y3XTrf8ACOuwOs2ajkio216zX+I/gK0UYLeoReT+ycwngbTx0sEH0Q1Zj8F2S9LIflW0fE9ovZz+FQnxXbqOI3P1NaqrSj9tkOM39kig8NRwY8uILj0NaMWk4A3hSB7ms1vF8f8ADB+bVA3iyT+GFR9TQ8XQ+1K41SqW0VjcbRLZ+5B9jUDeHYTnErD61jnxXc84SMD8aibxRfHoUH/AaxlXwb3iWqdddTVk8Low4nI+q1m3Xw/sr0EXBMo9NxX+VQN4mvz/AMtQP+Aiom8S33/PfH0Ap0sXhqUrwiDo1pKzZX/4VD4f3EnRrdye5vJBT4/hL4eTpodn+N1LVa58YXdup3zSH6YrCPxOU3PkG7lV84AJr0qWP9om4o55YVx3Z2Efwv8ADif8wLTvxkc1Ovw18OZBOi6V/wACiLfzrn4PE890vy3MvPvSS6vet0uZP++zXNPNVGVmjRYFtXudQnw98OoMDR9H/G1qRPA+hRMGXTNHUjoRZDP864b+0b8q26ec8/8APSoRfX2/mWQj3kpPNU1sNYFp7npS6FYQrgGyT/chUVA+jWO7Iu4Bj/plHx+lcOt6235mOe/NQz3j7D5blWI4NYLMVzaRt5mrwja1dzv10iyUZOpRKPUJGP6Uj2GnouTqoH+6sY/kK8nceL5pQ1jH50JPJ+bp+lX2TU1VFvR5TsOh9fwrrqYxxgpJ3uc6wyu0+h2N6mlPlG1LIPB3OmT+lUrHwj4NS4a8uZY3mY5JNwTj8q5NdLuJMkyx899pb/CrAgltk2sy4HooH8ia4446UZc17+RSpRelrHqNvdeG7eMRxXEeB6uxP6055fDk+d7QNnrkV5Yk5Vsg9fepxeAck4/GrlmMn9k1WDXc7u50XwrdZ3CEE/3TiuU8WeEtCsvD97f2NxiWMAhN/X5gMfrVIX6j+P8ACqOs3f2nSZYVPLsoJ/4EK3weNlOtGNrXZnWw3LTbbM2FcL2U4Xn8BVmNkfzEGRhsAew71BL8rM+CoDcAnpx/hRbkqSx4JJyP6V94tInzz3MS801/EXi62t4cMY4jhCcDIBY/4VprEHVokg82SElXgddsisOox3/CpfCUT/8ACdXN3sZo47U9F7kj/wCvXQ+KdMt9QAv7OXyr+PnkEeaPTjvXzzzN0sU6ctU+p6c8MnTTWjscTJe21rIcWDpJjkSMenoM1FFdF5N4hnjj7KDgc+xBrX03V4daRlEPzocMrjlfrWuloIyQ3SvehKM1dPQ8yXuuzRiWw+0EbIpC7dWZcAe1a0ECwLuYAY9KugpFgsmB6gelUL+6W3tJbuf93BGP4urmrbSV2LfY5Pxpqasseno43582U/3fSuEaYM4AyEBO0Hr759z1qTUb17y/nuGJJkbdzxVPdzxXkVZ80rvY7KceWNkXkmfaB8ufWpHnaNeUU/rms5HZTxnNaWl6a2o38EDNjzHAz6DvVuoowb8gUW5HoPw2vdOSC7/fRLqDHLI52kp2C+tekRy9BnPoa890bwZp2n3SXLyyXEqHKlhtx+tdpFKMdev4V8PmdWFSpzQ+Z69BSUbSNmOXJ61ZST3rKilA61aSTkV5Zq0aKSc9amV89ccVnpJk1Oj+9axk0Q0X43AwcYqwk5AAzWakh9alD8da66ddpGbjcuy3DeTIFIBKnGfWsZI7kMmWBUctgt/8TV0tvQruI3dxUCWaq2fOc/VE/wDia9fB4+MU1Ih0YvVlvzAhzkj6gD9TUUMVvPdXcktvDL86rukjDdF9x71KsD7QFmA9Pkx/JhXEzeLDZarqdudS06EJdsqpOrlxwoySMjseK1r1XWVqe5SgnokdNPpunOTu0+zOexgX/Csq4srONAgsbQRg5C+SuP5VyWp/FOXTbpojZW15H/DLbzkAj8VrNPxZspiPN0yaP/dcH/CvJlhMW3dfmaezUV7yOwktLBG406yGe4t1/wAKh8q05xZ2g+kC/wCFV7HWLXVrNbq1c7e4PUGnFyATk1g51Yvlbd0HLHohxS3Q8W8A+kS/4U+G4aEkx/uyRjKqBVZZkZiFOSKQygc8ce1Uqk3u2HKl0Hf2pZM5hNxGXJ27QadbLaXMEC3trHcpH95JQSpI4JyOa426dINWlL/KI5BIpGOo+b05ru/Dctg32s3gDrHkqpJX3zkGuupT9lFST3I0ejQ5rbw9DGzr4c08YBOCCf51Wkm0Hy9//COWDKDg4Q/4+1XLu+0e4doYreQCUhVIkzs7fiOeh/SqEej6VLC0a6g7oFwVDAYweuOv410YdV6qfK7mUuSO6Eul0lE82PQdPLKMggNyo7HmopLXSbmIb9EsNo74OatRw6RhbdLt3crhRuyW4+lFnrum2Wjx232WKWcsQS6ZNZV4V6ejb1KpyhLRIyv7J0FkcHRbP2ITFYmnRWz6ldW7afaqIXBASAEle3OeK6C6vEmmLraJGx67OlZBtkTUPtQhfLLg527Rg8HJPX3rTCqcrqbYp2WxuRX9xpo8+1cx+X2B4rXXxrbFRu0wZxz81Rx3lrc+H3iuYLVpVXy1ZECuCehDKefx61z6aRfbF/0iLp/eb/CumLVNWuZS1exRb4vwSLtj0yQN6mSok+IM10x2WIGT3evMrWIL3/Wtqy2ocl8cetL6rRpu8Vse7gaftI3mzv18WXgXCQxj35rB1TxHqczD52TH92qCSAjJuD+L1WlKbmPm5B9zWjqNq3Q9alhacdVY6rRNcuZFUNM+e+TXYW1/JJGMyE/jXlel3Qjm25J59K7fTbreo47V4WNpNSujjxFOKkdF9oYj72ab5/bcapedx0FRmfHGa81RZz2L/nEd/wBab53Gc/hVLzz600zt2NUohYveeBzuoM/+1WeZ26bqaZznGc/jRyMLGj9o4oE2QOoqlEXlbANaUdvDCu+5lAUepxRy30B2sRtJnGCahld+wNV77x34Y0jKMzTOP4YxmskfGHQydn9kT7f7xIrspZfXmrqOhzyxFOLs2aUjI7YnUsh7A4NRiy0FXEh0qSRx/E85H8hSw+MdC1lP3KbG7huKrTyLjfbOpX0JxWsY1aL5WrFKUKiuXXnhXiGAQr6Bif50w3J9ayjO/VnB+lN8/wB/xFYyhd3NI7FufVXgHEanHrTLfV2m5IAz6VQfy2GHBx6ZxSII0AwqrjvmtFCPLawrO9zb+1Aj71KZGZd24Ae5rIW4ORg89sVKzzyLxDI3uFY0Kit2Juxf+0n7od+OwJx/OhJ1Eine2enIrPWG7c8QuPr8v86eLS6BBZAMerCnyX0JlaxtLcqyfK4JHomagnnkKEljg9yAKoZuQu3dCB6mcY/QVG/zHMl5bp68Mx/M1McPZ3OSMWpXHeeN3UUG5AH3h+NV2Ngv371j/uKMfzqJrjTkGEkkY/lW6opnWppFtrvHO49O1Yza+za5Dax85zx+GaLu8VIGZcfjXLaTfKniu1uHI2iXbz05GK9PLMOvaqT6HFja1oOK6npLpshhR3JdiXct1JHb/PpTbN94ZzwrMTj8TUd7cByzqASqEgdsmq7zmy05HXkqhJyOueB+pFfatpQuz52C96xc+Ht07arr1ysZmkSNNqDgklm/wroP7U8VxN5hso2VmwEL4YD6D+dc38M4mt9S1yJ/vL5YJHT+Ku6u7DT7xke8tt8idHV2U/Tg18VVxdOGJlGa07n0EaeivseGa1dajp/iW9uQGs7vzi5Ufw7ufxFdroXia31KOCKZwl1InyKxwHPcA/561gfEjTVsdfW4iQrBcxgjH94cEfyrAso4HtTbOXa7kyyR4+6wHUc8EjHHfivbw2K9xSjszzK1NOTVj2KJVeIkbhyMZ7HpiuF+Il+6QW9hET83zvz+VN8P+NSjrb6ofkbGJh/D/vVh+M5Hk8RTMpDRBQqkdOld88RFw31OWFKSkcuUctx1oWLceTj8Kn3HuMH1ozzxzXHozoFgRQwwuTnvWvp0/wBivYJxgFTms2FTntUkrlHUDPFFRL2bQ6fxJ9j0yHULq4jjewtluc8MplCsp/Hgir0Wo6omC2i3RHqkikfzrhdC1iS0ukZTjNes6Vq7yadbm82zTGMeZJtA3H8K+WxtKFJcyimerCbkVoby7Cgtp84yM4DqxH4A1cj1J1GXsb5ewPk5BPp1q9HdWbjm3VfpSb7WS8hQF0RVZ+PXp/KvJi4yvdGjuiFdZt0GZI7qMYyS9u39M1IniHS+puwM+sTj/wBlqzNsSMmOdiCcY5B5xmnLIx+8xP45/nUXiugWbGR65pp/5iFuP96QL/OrMeq2D42X1q2emJ1P9aaFRh8yRnPqik/ypr2lh5TyT2loUUZJeJf1OK1pU41JKMd2RLRXZfjuEflJEb/dYGpw7Z5B/KuLN/4SkuPLextl5I3mDAP0IxUiv4Q8pJFWNAwBGyR1I9eA3Fel/ZtRWtqZ8943sdxG5BGQcHH1xXg2tXCG4vbrd873kwJB7BjivVJdI09LFrmKW9Eaqz5ivpBgAZHGa8N84T2lm07TGEyMZNjZdhuzwT3Oa7MLTlBtS0sdGFk+dOOpWu5XmYgu5AHQmsxwhH3RketbU9rYMc2012B6TBcj8QeayzaOWJU5A6ZroUknuevWpynG7idV4Dvtsl1aE8FBIPwOD/OuwNxuYZOPU15t4VkMPiOAY4cMh/75NdqzlsgttyuM15uNpr2l11PIkuWTRpST7rk9QwJFRG4B9enrVWCTfcFm5+VifyNWdKtWuZMv90VxNWWorlSfRIr+czOJeRjAPFR3SzW6XMSFkIC7fyx/Su0VY4dqisrVIEe4YOjBpk+UFSDlSOfyJrWnOc1rqkQ3FM5nSdRvFvLaAy/uhICVVQM/U9a6WWzhUlhcyohzuJYHPrye3t0Arl0iMOpoh6rIB+Rra1O9jt7fzJmwucZr38BZRbRw4j4i/CbQxxpDMJPJIIKuDg9ulchczAvIMfddsfnWho0yMsxjKlVVQcYxnBzWA825piDnLt/OjGa2CirNkLmQ3LMEdlDdQflAwPetCKV3iiBbYQuMjmqSM7MMIGzz93OOanjsru5OVGxPU1yqT6Frds2LC5IuVAJOPSvR4tDtvJTzpYxLtG8f7XevO/DdqkWsb5j+7t1M0h9lGf54rEufEd5PdTS7m+dy33vU5pxw/tNWNyKFp4YwMvfRgemw10OleD4tRuPs8epwo2M5ZDXoa+CNEP8AywkP/bVqs2/gzSI+UglHusprk/tSlPaJ2RhWgtJHNr8JrJV3XXii3j/3I8/zIrI1rwf4U0WFj/b9zeTjA8qGJQfrkmvQz4T8PAfvw/8AwK5x/Wqz+HfBEX37VZf+Bs1bLGU0rtDVSunpJnkNvaaes++NZ+em+ZQf0U10tkIVA25/Fs12oTwfZNm30a33DoTCT/OiTxNYW64t7KOMDpsgUf1rixNeNTRI3hOq9ZO5zyJK/KxSN9FJp62F7Kfks5j/AMANXbjxm/ITI/4Hisq48XXDdHx/20NecqTeyNlN9i4NJ1R+lnIB6sQP60jaJqI+99nj/wB+ZRWFL4gupD99fwBNVZNWuGODNj/gIrVUJPoNzZ0R0eQEmS/s0x6OW/kKb9gtI/ml1WMgddkbH+Yrl21M/wAVw/8A32BVe41BBG37zd/wKtY4eT0ZLm0tzpL/AMQaXpUTbbiV2HpEP6mvN9d8Y3upO0UUrrCfwqjrN20rFQ3B96ydoUdK9nB4GnBc0ldnlYjFSbsiMh2OTnJ5yetJtI704tzxQCcdq9JM4W31JIJ5LeUSIxBB7V2Oka68qqGJyeDXEkZrS0tyGHUc1zYmlGcdUdOHqSjJK+h6XFJY43TSvzzgHFK2oaTGeEkY+rsTn8sVzSMHRck9KURp0CZ+teN7CK3Z6ntJPodB/bdgn3LSPI9ST/M1F/wkaL9y2iB9Qig/yrIWHPSI/wDfNTJaXDH5YT+VVyQQc0mXz4nusfKCM1C2v379+KammXbjO0D6046YEOZbqJB3y4p2iJ8xC+rX0i8u3PvUDXV445kIH+9VrytPjHz3ob/cGaPP0pOiTy/himkuiE79WU907D5pSaQRux++5PqKtNqNop/d2Wcf32pp1iQf6uGJP+A1ajLoibxW7Iks3fjbI1WotKuWIKQMM+tVn1i8YY87b/ugVTm1WQH97dSY/wB41SpzewnUgi7q2nXMVoS7KOM4J5rjbSMy3KAfe3cVPqGoic7UJYUzTZPKu4jnHIr0sJSlHc87E1FJ6HpN0syWuTgIVC/d69OlS3NlfTi0t9Othc3zOHWPsAoLYOfzpgWOeSGLzULFhuQZJGP8ius8OzJZ39zfzA/6PbHauOrE8fyr2cdXdLDOS3SODDx5qqXmY/gvSdX03UdUm1ayktmuQjruGM8tnH511hkB6MCR2FZFrcPLfPNKcu6nNVtVhs7Mzut1cG4ZgxVk4OeetfBqnPG1XLY92pPkWpn+OHsZbKL7Qod7d/MX3OD8v4nH5V5W07xXCz7v328Sbvfv+FdPfG4168aG33eRCCXk2kjdg/zxiuPKks+c7s819NhMP7Kmo3PNqz5pXNfVLeKG6hv4F/0e6HmgY+6f4l/CrVtbi4ZrWZTvGFOep4yjfivH1X3pdKUapok2nHHmR/PET2P+cir6p9utLLUYDiZFFvcJ0KkdD9QRkfSlXbSsVSs9Tn9Q0aayJZfnjPp2rKKc9ua9NlRLuwR8AOzCNgOzZ54/UfWua1/R1gmhNuhMkrFQijrgdhWeFxl37OS1KrUbLmRzsalecmkk4kxkEkZ61u6TpM1zb3NyUfbACBjjLAdPwqa5sJJY7hGUlY2jQELyCV6/ixFehVd1ZHNB2dzEtXKsPfjNbz+MNU011hjhikjVeCQc/wA6wWgktbho5BhlOM+tbttp9re6Lc3c8Su8CNgnPHBI/WuCVKEnaaujqU5JXRqWPj+7lj3m0hP0JH+Nba+LpIre1vZLMfvt4xv/ALrYz0rn9C0RG0u3Yx53Lnmuln0mNrawh2AiOIkj0LMTXj1o0ItpI64KTSu9zT0fxGNYulhW1ePaN5JORXSo2e9c9pNklmzlVCkgDitpHHrXjVXFy91WRsttS8jYxTbiGG7g8mdPMjznBqFHqYMMZqYTlGXNF2YnG61M640GGOJvsGlWdwWALJM+HyP7rMCMe2fwqtD4biuo8XelfYWZTmQSrkHrwoPQ9Olbqvg54/Knh8gAnIr14ZrJU+VrXuSlZW6GVdwnRPCepxLcNJELWRowwxsO09K8OTm1to498mV3MoXkHv0/CvavGc3leDtVbOCYdufqcV5z4X0Y3WmreLez2rh/LUxnHau3AVJVoOUnd3NMPKNKTb0OZUsFIdWznuCKGuEiDbuuK7/xF4TgjhgeTV9QuSzD78KkqCcZz37Vwmq6R/Zt2kTSCUMpIOMV1+yd9Uej/aEXG0XdlbR5Cuu2TDvMufxOK711cM2QMZI5NcLb4iv7FlGD56f+hCu1mkAkkxnO8jrXLjIq6Z5M5uTbY+JmVWQH5m+XI9DXV6bb+Vbxxrjcx6Vy+nJ5t0hPIUV1Et6NN0qa/Y4bmOL64rz4Ufa1FFbdSJz5Y3ZHrPiWHQlNvaKJLn+OQ1xsnjHUJpvNnbenoVrldc1x/tD4O6Zz36KKybTVbsT73ndx/EpPBr6CGHhGHKkee5ybuekJi6ubO+RspM5GO4I55rRurWK6aJpc4jbdjs3sR3FZugGJ9OQR9PM3gentWq21eTgY9TWlGmoJpCnNu1xH2pGwRVHBwBwM1hWNmpDtNGGG7itl5UCsAwyFzVS3Qi2jzwcc1niNWkXSb1JEjijUbY1Wrq6fdCxa8+zv9nX+PGB+FUgrBuSFHsP60+91Ga8Ty5J5JNqhVBPyqAMDj6Vz80YotRdyoZ/s2hapck4M2Ige+37zfpxXlkreZM8m4/MxP513niOZo9Bhs48lp27d8nNch9lA/wCXY/nXTRj7pnN6n0aL6MdLeQ/8BpkmpSbcJZyH32iszzjjhj+dQTTtg/Mfzr4mF1sfRcqJbrUdQcHbAyj3dRWLcT6g5JZkUHqTMB/KnTz9SSKyLi4GcCuylFthokSSvcfxXNuPrIxqo7Met5Fj2iYn9arPIxPA/WoiXbqVA9zXZCFkZORaPl/xXUzf7kSj+Zpn+jA9bhvrIF/kKh2IesyCnoLVSN85P0WqtbYVxSLdusG7/fkY0oEKjItoR9Vz/OnifT06rI9L/adlGMC0z9WpXk9kP3erGhx2jjH+5GB/Ss7U5JmUjDnjritE+INmdltEOO9YGqeILqUMuVUHsBW9CnJy1RhVqRUXqc5OS07ZHI61FgsSOmOtOdy7s5OSepqS1RHmVXbC565r2o6RPHlrK6IkgDDdtcn2UmkkiKH5kYfUV6DpljpMNqomny5GcBt2KxfEMVnuAhDqQM5k4J/ACmncTRyuPlrW0cQZzKxHPAArN8vP19Kt2hVMZZRj3rOouaLRpSdpJnax3elRRqBBJI2OpOBSnWbdOIrCP6u2a5r+0LaNeZgfoDULazCpO1Gb9K89YVvc7/rCS3OnbXbk/cSGP/dWoX1a9cc3DD2HFcu2tOfuQr9TUD6vdOPvqvsBWscGiHik+p0z3Eshy8rt9WNQtKiHLOq/U1y0l7NIMPM59s8VF5n4mtI4RdTJ4pnUNqNqnPmg/TmoH1mAD5Q7VzhkJHSjc2O1arDx6mTryZtvrhJ/dwgfU1D/AGlfTnEUbHP91CazFmdTkHFSLfXCj5ZWH0NWqUUQ6kn1L7was65eO456DGM/hRJpF4luZZnRBjIBPJqj9vueP3r/APfRoe9nkXa0jEe5q1FIm7e7IFJzzV2AF2BB71SB5yat6dPHFfQGc4h3rvOOgzzVxtdEvY9U0qGKzliCwybwhZnLfMx9fYV1Uc2/SLhiDkgjJPOBiuR0zWdM1O9e6hmSNmyoiLHOB39Oa2o7530udYo42yjFT5gxntkdeuKWb81Siow1DCJKfM9LD47xbZ0k6gAgge4/xxXM6vqN3rGoLYWzEzONrt/cFR6pqUlrAsUfz3bjonO33xWBbOYD5kOqT20rDD4O3dz3yOa83CYOVKLaerOmrWUpa7Ho+mWa6Rp62lrNDw4diUYFuOT161514qsI7LWJWiaNkuGaXEZ4QknIq2mra0i/Jq0cq9t8Sn+XNUdSe/ubFJLwQBEfbGUVgxz1/D0NddB1YS97VEVHBxsjL07UW03VYJDxGflf6GuySKC11Z7mR4xZ3S8oR92QkAkHtxz+VcFdBvtHb5exFdVodwmraQ9jOcyJ0J/StKq5r3M4Ox0NqWS/RS4KFsP/AL2CFb8v6VJfoItWsbuRJWjhDkeWm47iMc+2M1y2i3rreT2F0373cSM/5/Grc+qz2VyYXumVuoy3BH415s6MozU4nVCcZRcWbv8Aa1tK7pdxmO0bayr5LxvuwxYkjgg4Ax780+O/0QSs1u8R5WZg8hUljwMgjkjJyM8YrLj12ZhxcI/1qRtVMgxJBDID1ytaLF1E9Ykewj0ZR8V2Flb6fHdQXKSsku1yJFbhhkYx24P51hWGuBLO9skRilxFtz78f0zWl4ja1utPVI7WOCTzB86ccfSsXTUsobhN6zM4OQ+cBSOmMf1rphP2kb2syJJwdkeuabaLFY28ePuxKD+C81opGQ5zgkfpWJpviOzuIozufJ+XJXqa0YNUtWXd5mXPUYr5jEU5xk79z0Kck0rFxWCu2PWrKOzDgE45OFJrHFyrsW7E8Vq6feCCCR2n8oBlG4rkd6zw2G9tPleg6k3GNyyjkcHj6ipllGMk1JBeSOpQX9nI7Abcrj6n+WKtb5jLIpFm6gZXJAKnGOR/vV6UsmfRnOsUuxVV8/8A66ercVcjUMyiSygCH+MNn9Peqtst09yUuLFAnmYzHn7vvzWcsnqJXTKWJTMbxXYXmseHptO0+IyXFw6oBnGBnOSfwrlLLwb4+0mz+zW9tb+Sr7wgljb5u5GRXrehmC3urmSWRV2tsXJ/Ota41SEqQkifXNPD1vq8HFtblubvoro8Qvz8R2C+dpksu0cERrIBzn1rlNVsPE17cCW70i6VlGP9TtFe+X2qQopPmp07NXD6rqvmu3z8D3pwzSTdki91orHl9l4d1aS/gmltzGqSK3zMOxzXQSNmZyDwWP8AOtJp2knQ9ADwKoyxkszAd8irqVpVWr6EbGjpKkglercCofiBqS2axaehyttH82O5rW8OBY5PNb7sCmVvw6V5l4vv2u9QkySWkcvz6dq7cBSteb3ZzYid9Dl5XeWV3YksxyadbHEvXtTtm5cAHpRbLm4VSM9civROQ9E8JlpdM8sHGDW99nRCiSSOxduM9/pWN4MMcMDlyFUZxmuimubOWZHVHlaP7qoMLVppbhqyDULb7JpsshT+Hb781mw3LugVcDCjAAz2q/qV+ZrR0m2xRH738TfXFVy9tb5SJd6rgB3+UN6HFclanKpLR2RtTnGMbvciFvczHcz7gffd+gpZIYooy0033fTkjn0qP7c87bFJYdPkGFH41TnnD3sVooJBYM5PoOT+FEMNFb6sHVb2KOruz69a26DcIImk2jg5PAqElyf+PSf/AL91Y0GFNZ17V9QaRzDawlvKRgryKMgYY8KOOTzUiXAkRX2P8wB710xXKrGMmejvFCo5nX/vqs65ntogQblP++q8wm8QPg/vnb6tVF9fcnGPxJr5unlc+57jxkUehXmoWwyFkU1izXyk8GuYj1gyHG0mrH2rK5Z0X6tXXDBuGhlLFxZqtdn1qE3Ofesk3yA/61MfWozfxbeZM/QVsqDXQydddzY+0nNN+1DruFYT36Ho7Gqz3nPG7860WGbIeIsdMLhX6EU0yjHJrmkvXX1/OpDqT4xin9WYliE9zXnuABgGsmd2dzgU1JXlyWP60kq7VJHJrop0+Xc56lXm0RGhyDmpoHCSBj/Kq8Z7VJsLHitjG+p01nqflRjbtT3HWsy/uRPKeS7n1NZmZEwAcY9qlijdjvLdOhpJDuNxtGfzqs78nvzV25dNq8Yf+Ks5uTTEhd9IWJpMe1GOKBhkk0lLj/Ip6xOxwFJ/CgBgoxV2HS7yc4jgc/UVp23hLUpxnYFHfmkI5/tS4PpXa2vgKQkGaT6jFbdn4HsYwC6BjnvTSA8xWN3PyqxP0q3b6Te3B/dwMa9ctfD1hAuEtkyD1K1ox2dvGAAgX6CnYDym18GanPjcoQe9bVr8O2IBuLg4/wBkV6GqKvyhc/WnlFByRRYDkbXwBpaY8xHc+hNasHhXRYPu2EJI/vDNbPG7O0/hShRnAU4PrTsBXjsraCNkjhjROhCpiuTuJo9Ks2P3m6Iueprf1XUUQGJDkc7sHr7Vy6QTS3yXd4hwy5hQjgL64pSegGzoGj39u/8AaSywtezD7jttZc54BYY5FJ/wmumXLtFfQ2czqxRhcWwGDnB+YfStOxuopIUUPF5yqDh2AYNtIJB6joPz9K4GW1tY72+jWWxuy0zEJNIY3jwxOAenPfnniuOhVm21IppdDprmXwleQ7k0q2WV2CI9tcMuGJ4JHp1rG1hV8lhhdikKoC4C4xjDGsm80+cAPY6fIq7vm2SCRc4HQjnHWsi5nu3byrp5wqn/AFbZ4rpUr6gnZWY263SyeVEvzE+tbehaJqVpdpcogx0Zc84ql4cj+062iNyoDHH4V38cYRQB6U1ruSjG1Kzme8ivLWP95g7vXPYiodYsjeWCTNbnzowNyEc1u4CyDawwTwRzhhzSzgsFkJPPDE+nb8qi1izzlrdVPzWsikf7Df0pqmNDxcyIR0G4j+denxSedEvmojEcHKg81HJa2kv37OH8FxVJJrUm7TPN5WZ12tePKoOcFu9atjpfnyxb3GGyAobnj2rpLjw/pdwvFmiN6qTUOlwNsVAzBN+MBBsY8g/McYx3OaGkti4u71Gpp0thZF4kYjzdoJxjlTj3Bzjn0qja67eRf6zTw2P7ktbct/bFnsCoZlDF5Ecsq4HT0I96S18FPewrNbeKtKYMMgPGykd8cisZUITXvIv2ji9GQReK0QfvtOvU75VQwrQt/Gunwg/vLmEt2aA1Mvw88QHJgv8AR7gZ4/0jGaU/D/xeo+TTbecZ/wCWc61l9QpJ3Wj8h/WZbPUs23jPSZGDDUbUEdN8e3+YrUg8SafLv23enuZPvfvFGf1rnJfBfidRibwvOc+m1qzpfCN8HPn+FL8Edf8ARqp4WW6k0L2seqPQre9t2hEUcUTJuDjy5DkEdOcn1q4l8EYMRcKARx5xCnHt7/rXkz+HYrdsSaZqNuQeSInGPQ8fypiwrAwKapqVsO+Gfj8+v9Kzlh6yWkxqpC+x7NHOJFZwMb3LY9M1BPMTk5rN01vsel29vc3BedE+dpH3HP1pZ7uLacSJj618zWozU2nr5ndGUXFFO/m4Nc9O5Lda0L+8iGcyoP8AgVYkt7Bn/WA+wrrw1CVtglJJEycNn3pOozSWyT3jhbe2lk3HAJGB+dW3064jXdNdWcA6ZMm4g/QV6MMPN9DGVSK6lhXFl4bmcH57pti4/ur3/M/pXkV/Mb7VJXB43YH0FegeKdWij0+OG1YbI4tinpk159psYe43OCVBAwPc4r1acOWKRwzleTYspjXCBSAO5qOCPF2pPdc103iHTVFpuX70WBk1zdoczr7nmrJO40Z1gswXi8zPTmrst+543hQOyDFUYkCW6KSQoAzzSiREOETd79B+dMLizeZcKsWNhZhyTycck/8A1qmLRq213aVz1G3j8h0/GsuTUobe6ke5mUIq4UDqT3rKu/FTDKWEPlgfxN1pAdYZ0ij3TAIv8INcjrmpyC5cwgx+Yu0f7vf8/wClZ0WoXss29mMj+9TXlhNcWkmos6gRkLs9qALfhmMyG4TZuWRQrA+9dl/ZFl/DayY7fNXJeFS4a62dtvy16P8A2vpNv+4eZN8fyN846jiqsK54bzRS/lSYNBQoJHQml3E9SabilwakAJJpM0uD6UYPpQAUlLz6Uc4pgFGcUdfSjBNAEyXDou1Qo+opHmeQcn8qjAYjgZpwjcjIUnNAArEHirkLqTgttPrUEdpcSY2Qu2fRTV2Hw/q0+dllLx6jFAi3HbW0qhpLhR7E02draBNqOGPtVmDwXrMjKHWKHP8AefOPyrXtfh8uV+1Xhb2jH9aAOIldpXOBnPpUkGm3Vwf3cLH37V6jZ+FNOtQdsIbB6nmtOKxhiXCoF+gxTsFzzC38JalPz5e0d+K1rbwK7YMspHsK9BjhRBkR/mKkCbT0GMcUWEcla+CrKMAuCTWtb+HbGBuIF+tbQXHUjPv2oHB7cdTQMqpZQoPlQDjsKsiNAvAH+NO4Bx19xT+i7go5PUigBqp/D1p4XjOM+3pQME896Bu3EE9R6UwDbj0oX1wMA880Z55wSKUHnOOO+KABQScgDNPweAc89c1HyDx0xRxtOO3c0APGN2NxOKdcWk39kTXpPlQL8odv4j6CnWd0lhdR3ckQlWM58ojJk9gKzPEniDVdReKS5ht7aJWPlWxbeIx7D196AMfTLRb69JdcxpyRXQXVlDeKFljzt6EVBpcDx2YklyZJvnYn9P0q9uye3txRYDHfw+MHybhvpIM1kanosNtavdXtrbSQRjLuB05+ma6/jqVH51wnjnXNRsmbTvs8YtLiP/XHJLUml0QFCKz0C8kzbXLQOTkCOUrj8DUk/hyZ/wDU6rK3tKN365rhGwW9akimuY8bJZFHoGIqLIDu9J0K8s9RjmmktmjXOWUfNyOlXfE0gi0G58mX5/l5HpuFcCms6lDyLhiB2PNTS+Irq4tXt5grIy7TxTAi02+lsruG53tjd8wz2rq/EE90LWK9sbpwqqDKinjB6cVwwmAUrt4xx7V2nhKQavatpsqqzQKxwRy0Z+8D6460rDRee9updAiv7GZY5GBbHXODgisGDxpqQ/1kMEg/3SKktJJND1i60i5dhtciMnsTyD9GBFZmqWj2F2bmAbYmJxj+A+lC0Bm9D434HnWGP916xP7UmeeRLd5FhdyUjz6nOKynnMu3eckDFCShZUbOCDnIqrCOssbK/aGVJo/IR8FyfvsByFHoO/vXY6YoitYx7VkmRXXdjqOPfNX4JdkSr6DFAM3I3UDnH6VZjuGXG12GPRsVhLcn1qVbk460AdLDq95F/q7qZf8Adc1dj8TaqmNuoznH95s/zrkVuTng1It2cdaaEdkvi/VRjN0GH+1Gv+FK3i68df3sVrJ/vwLXHC7OfvYpPtPoadkM66TxpcgYfT9Of/ehrEvvEyXCkNo2mDjkiIg/zrFknz1NUp5OMZrN04voilJjrq9iZiwsLRfop/xqg2oyox8tIo/TZGB+tRzyjJ5FUy+TQopbIXM2WHvrt+WuJM9gGNQlsnd/F1yetRM+WA/u8nFUby+WOMhDk9KoRna5c+dJsB4Xt71b8K26MrzyLlVlHH61iXT5fHoK9A+H+k2mr6YLS4cbvO8zYeAygc89fX9KaQMo391FLPJZu21mG7npXK6TFjUmjb/lmTmu5lvNPutY1GOwhK2SSD7MJB8wXAHNed3wKXlyBwPNbj8aQHS3eu2duNu8SMOgWsO68Q3dwSI8RL2x1rNjhd2wFOPXpWjaaS8rcKTjv2oAzQsszkklie5rTs9IkmbO0+/pVh5bCwwCyzv/AHYz8o+p71TudYnuFKHCRdBGnAoAvyXOnaaNigXU69lOEB9z3rKvNVur8gTP8gPEY4WqR68ZpKBm1oWoyadcvJFjLL3p8lrdzyvKzjLkseB35rJt1eSVVTJb2r0GLT0EKf6LL90UxHn/ANnb0pPIPXFdl/YC5Gc5PpSjw+GYHBA9qQHFeUe1AhYjI5rvV8MIcYBx71JH4XhUEEryetDQHnwgfGcHFOW2kZsBWJPtXpUfh20RvmRcelWl0ezQBliBIPAFFmB5lHplzIcCNhk45FXIfDd7Lg+WQCeM16dHZwhgvkj+dS+QoOQmAPWnYDz2DwbcNktwOxrTi8FQpgyygk9q7JYk9CW6A1IilV+715zjmiwHNQeErJF+aMsc5rTh0OxiQbLVOfUZrTUbj83HP1FOCgEZGc+tOwWIIrKKNcCNV9lGKlCAFcqPqeak7cA5PT/69GQAx+bAHFMAK/NwFJHQgU7apwSxOTzTc4AOM5pyrjO4Y+lIA2qAD39D3pw24bIA+ueKbgE+uPWntljuyCAKAADcFIycdOeKGLBgcdTTRgkgcZHJ9KXrtGc/jQA7YC2Bnn1OTTsggc5x1pgAYnFLuO3GPmNIGOByTgfnTs7VJ3fMOg7UwYOTnGRS4BXGTnPPNOwC8Eg4wTS8ZJIOe1N5AOQPrSF1UAkkAmgBxz6YyOtBJxyaYX+bPPTAxzUf2iIXS2u8ec3IXNAE54AJB9gKd5nlrs2BpmwQPQe57D9T2xTOUcrGQ0g4Z8ZCH0Hqf5d6FQKvGSDySe59c/1oAI0CMS3zOQFyeBgc4A9B/nNYkw/tHWfKGTGhwfoOv68Vu9jnAyOajjt4o5XkWMB26kUAP+XGT7HApwA3ZJwRwMUhYZwB2xjHWgtnC47c0wH4UN9Bzmobm2try3MFzFHPG3VXGRTuAuBwOpIoyAc84+lAHKap4A0663S2LyWrhf8AVjlWP9K8wfdHIySKVdTgg9jXvDPhcjPpmvM/G2gm3vn1K1Qm3lOZAB9xvX8aloDkGfIAxgVHTjwcU00gE6966j4fakNM8daPO+PLa4WGXJx8j/K36GuWqaCVredJkPzIwZT7g5pDPTviXoWdftnRPvRNC8mcYaJin8ttYOj29xODb38LNalcPcg5QL67umR6d66HXtTa48Kwa1NBHeTi5Zj52SqmQBtxA68kjFee3uvahfyK1xcsQv3Yx8qKPZRwKQx2paWbZvNtz5luxwGH8P1pPs8enqWuwHnIysHZfQt/h1q7pGsur+XKx3DoSfvcY/P3qjqlg1tIZkZpIX/iPJH1pq4h48QXgbJIJqwniq8UcqpFYOOaMUxHTR+MJl+9HmrCeMlx80Rrke1HSgDt4/GVsT8yMDVmPxhYkDczD8K8+4zSmgD0mPxVp7/8tcfWp18QWLgEXC/nXl3fmnD2NAHqB1i1YYE6fnVabVLfH+uX86863N/ePT1pu5j/ABN+dAHbz6pbjJ81cfWqE2vW6EhSW+grluc/4078KVgNiTXMghc1W+0tNJuJwq9qoZxzUkb4OKYEsjFmYnmuw0GffYWUaIFeNHBw20tu6kn6HH0rjCf511ujwLLptqTuBCliQe2cbSO+RTA1rG1jS4muJZGDvckEHhQgHUn69MdhXMaa1g+tXb3yLJCWYqGJ554rp7fSLifTpbh3wyRM5DnjaBk151OcgMc5Yk0NgdJf6jo8UmYLZCV6RxEhPxzWJeatc3a7S4ji/wCeacfnWfS0hhzRgmnKpY4AP0rTsdHkuGG4Ng+1JCKENu8zYVSefSug0/w+ZSDKOPStuw0WOJVAUbh7Vv29qir06dapIClpukQ2qgrEFPqRWp5Q9qsRQkrxwf0qXyz/AHh/3zVWAopEinGVz1wKfs2j5UGT07U5S+SwwuRjjFKACMlyMdMUhiKPlwWx7Cl2AkDGf04pdvzcDA6e9B+9kseKQBgbjtXP4c08A4wflPbmmBwT/FjvxipCxK/KfYn0oAUAgEDgfSlTaQBgH2qPkcEZ9wKeAThS4X607gKpG7HTnHPQU7O3056GmZCNnPI9DShjnsdx70gHklVzg4PQe9ABOFJ4zyDSIMsAcDnOadgMTgMcHPPcUwDAyctnB4yOgp2crz+GRTflzuAPqPc0Z+YEDp680CHjgbc8daXeSuATj0HNM5znd16AHrR905yc+goGPBGAT26YpcjBPofSmg89NppwIJJA4Hp0NAg53YxgEcUuF4GT70zJ64CjPTrSjCgknJH5UAPUAcg49/WkYgDPf0pAMnAAAP4U45CdR1/GgYmSDjJGaXHzcgnH40ZI4BGO/vRnqB1HvmmINvBH40owMdDxTcENg5/OlUDOQe3FIA+XlcDJ7YNYcvhkS6s9295IIy+/YvBHtmtvcN2ST6cUbgcYbB9xQAJgLheAvQDtT2cnk59BmmgqAOu09eP5UnCrycge/WgBxPPTp7UK5xg9zTNw28fzqPfjIOQx6e9AFjOFYA9PzqPzCvQ59KrPcDJLNyOKqS3hQfdPoMc/pRcDQedUJy3btVaW9RFJLgfWqi2mr3iq1tYzFGbb5hXA/OpG8LTQr52qX0aQqMuI2xj8SKlsDPvvEEFuuNxZ+wFY91e69fws1tbPDA38bjbke5Nas2veGNKVvsVuJZlP39hZm+jN/jXH61r93qkzfMUhz8qD096V7gUbnRnhyz3MG/0D1mOio2N2T3xUjIc5phQ56dvSgCLA96TvUmKTbQB3ujE6j8O9XtGOWhiWdf8AgDY/k36VwBFd38OpRNez6cxOLmKSDHu6lR+uK4meIxTOhzlWK4NCAiGQfpzW5p2oLOhtrjBDevesPvTgWU5GRg5yKGBc1CwNpLlG3RHoaojpUnnsVYEkluuTTKYCdqTNOpOKBiYGKKXtSdRQAnel6UcUUAAJ9aM+9HFJQA8NnrS4z3pnalBI70ASbAe9OCEHimLIR1FL5px0NAh7Hke1dv4SiM+h3b5H7kgE56Z6Vwikk5rc0HVZNNkcoCQ4w6eopoDttf1AWHhafaf3twogH4/e/QfrXl85G5VH8Irode1aXWJoyU8uCIEIinP41gi3eaQkLnJoYLQrgZOMHNWrWylnbCoa1LDSCzAup9a6Sz09IwCE6deKSAybLRVjALqfxrpbOwCdFxjoc1YgsyQq449AK0Ei2gA9RVJCI4ICh+716kVbjTkDGMdSKVIwV2hsZ5qdAoGO/TNMY5F2qAFLdySeBTth/wCeb/madsCjAbPoOxqTZ/tH8v8A69FwMgkZJHGegPX86OAM569D701iS3IDH2PSncJgZDDrwf0qRjtxDAkYx170mcD0z6Gk78Y29wKUEld2cHOMAUAOBAHX68UoIx2xmmgDAwM/hxSg8kE9+g70AOGQBjPuKUN1ySMim5AJznrQMYyOvcGgCRsHAwfb3pAATyB1I/SkyoBwOR3FLkYHA570wHDBwcNjHQd6XC4Jwy896avTgn2xRk4IJpAO2jHbg/Sn55xyPcjFMzu6ZzinZzyx6+tMBQCOMDn1peAcc++aZxuzjPGMjmjIUn5eMd6AJM5bIXIxjB60uRxnOP5Go1ztJBOCOgpdzYwWOAc4xQIeHIYHPHqO1BGF7n2zTCRk4+7nkU7cNxwg9s0AP3rjJAAHamh16AgYOPakyWOT175oypHPOe1AhxPUDjnt/nmkBOQAMfTpSfxfKeR3FG47gdx9ABTGOJXOTyT70vG0MSfeos7SSzYYdRjrTfO3D5s8HFICQnGDnp0oYkAc1XEodsnIUdDSlyzbepbpjqf8aAJGkAHysNw9qaZAOCCCT3NX7Dw3rGosDDaMiH/lpL8orprD4fRou/Urwsf7kPT8z/hSuBw7TEnaAOTgAHGT/Wr1h4d1rUSGhs3RP+ekny8e2etdLca74b8NSvb2unCW4i4VxtbcT/tc1j6j8ULyRdtnZJCezu28/wBKlsC/H4BjSNm1DUtrDnbHwAPqaz9S1Lw14XiSOxiivbz+KTduIPqx6fgK4vUNZ1HUebu9mkHJ2FjtGfbpWZsBO0/LxgZHBoGbt58QdbnkfyvJjU9P3eSPzrjr64vLx2knuJpGY95Dj8qulVGSTlvSoHXIOWHI6AYosBivASPx5qu8WBwQM8dOlazqSDwcn2qq8eBzlvWgRmlOcHH1pjLir5h6n1pn2cMc8k9BigChtGaaVGOBVx4GXgjp1qN4zxgAUAaXhK5NlrkUo/hIP1pPGNolr4t1OOIjyjMzpjHAb5gPyNZq7423IxB9RTCjHkknPc8k0dQK2z3pcVNsJBP9KTZ3xxTAh20bRUuzvSbOKAIce1H5VLs4FIU46UrgR0VJs9qQpg0wGUd6dt9AaNpz0NIBtFLtPpS7TTAZijmn7fajac4oGJmjil2N6UbG7CgQoNPjkKOCM/nUYRycBTVmDT7y4IEcRNAEiSGc7FGWbgk9q6DT9LJUMUII65p+k+HXgw8xBY9vSukisyuCADx0FVYCtb2hA4T860raAmTmTb9TT47c7skDB/SrCQgDAIx6Y4oAdHCAxAbIGMnrVv7NtRZFkjYN1XdyKijgPXJA74OM1L5CBiRv5PTNMQ5k8p2AZW/3TkU4IgUHf1PKgHNCwruIUnHepvnVQwzj0AoAYrIWwVIOM5PNOyP7w/KnYB4BG4nOKNsn9xqBmKr56ADHQkcig4xwvJPXFNADYx1HXnFKy9CcgdiKkYo54PHoRTlOBuAJJGBxSopztUbj2GetJnY23acDr81MBQ5243cY6dqerhTnAAAIAHembgxzjHtnpSZORzn1oAk3kqV5+Y85FO3AEfKAOwBqNMbgT0BGTijK7jgZGaQD1J4GQQRj6UoGDznk9BTOvP8A9bNOTDHJH9eaaAdzgHG3B6E80u4Bsnr7DrSDBY4J47gUbhtDZ+cH06UCHAsTgZOT17CgnIxnkdaaGOOoOTkjPenK3POOKAJI5GX5gSBjHPOKbncc9/akJbPUDP5Um7uCBgdu3tQA4tkEDOB1OOtPByOD19qYrAjlV6dOgpd53AnAwOAKAFwBzkliOlPDgBAy5xk4z1qPcC2SRzycilwynIjwPTrQAmVCZIB549qXccjnPrxwaYueABzjnjoKVj8nHOenfNAD93QkKoJwDTd4yRjP481ch0bVLnasVnPzzu2HFbul+EoVuMaxM0XQqkZouByobOQcsc9COv5Vf03Qb/Vzm3jVYxkGSRsDNdPqN7pOju8Wmx28TKud4XzHJ+p7Vzc2upvZ4rcEkY3d6lsDpbX4f28SCTUtR/CL5R+ZrR+2eFfDa7I2gEqrnKL5jn6nmvNptUu54hG9xIyL/wAsyTtFU9w2nduZyMDBwBRqPodnq3xFuZJMadAsaD+KYbjXJ6h4k1a/Di6vJWB6oDhfyFU5BwQRtx79ahfDMSOenOOtAis7s5GwAEdBio9jNknI9cetTbnVWyevrwQKCrKvAAOOKBldlA6kDK/lUUh9s8Y44qy6llOQAepPpUBTeBgAnOevWgRVYNtJ9eRx0qsyH169vSr7xkA4J68jPeoWQtuAyf5igCgY+cDcQeophgOcYwR0A71o+WXXO0j696YsDcsFBWgDNMBbryRSCBRxzz7fyFXzCD8wTapOAP6UhhC9AcjpzQBnmLZnC5zwOKi+zlscY7VplARgjvzTNmckAgdvpQBlG0IJ7eneontcZwDWzscrtCHIwSRTDESc4LY646UAY5tmU8kZNM+zMw4BIx6VtbG3bgq4PAyM4pRalvXPpQBh/ZmU4K4z6ikEBbgA8V0K2ylgdvGO/rQbQAkBST6UAc99lZcghhj2pPs7YPHSui+xjcSQRgdAAKBYoVBKjJ6igDnfIYn7v6U37OwPQjPTiul+wIpyMgelN+wrgsFyBQBzXl46qRjqaURZ+YDNb509cEj5sHBFR/YucBT0oAxRbhgNmSccg0n2dw23acituOyw24Kdy9+1DWY3HC45zwaAMQQEjpjnFSJaMy5PFb8NiAVcpu54q1HpoVjkc0AYCaaWOEJI9atQ6S235x830rpI9Pwdw71djt1DbjjPfnOaLAYdvoi8MVzitm2sdi/ItXViQAZUAgc471YVNgOCPm6EHmqQFeGEKp2pzngkVaRAxLg/iakjTBAznjr6VMqFgeOp544oAjSJRjAGAfyqfZlsH9O1Iq8kkEED05qRFDHAXC4yQe9MBAy7MDIHoTUoCtFtx0PX1pfLKDIChf4hnOPwowCTkkHtgDigQrBVYKOB6j/PNKQST8uRjkZxSYAYDPU9RT8IBuwPTnpQBGBjAIUZ6nuKkyP77f8AfVJsDMAOpByBTBjH3TQMxA24EYyvTFKpG3BIXB4BpirvBIOB370bgDt/GkBI+CANmMfhQu0MMFTxk8d6axOcsM56cZpdwKhcAE9CT1oAepyGBYAAZGe/tShhjBBGfboaYMZC8cnrnigEYK+vYdaAJAVXpg465FKCd2SDj1Hem4IODxgdTS55A+bOeT2NADmO5j8oK9ie1KHyuMc5yM8UzIB4Y9c/SnZXbjGB355J9aAFBIO33zxSgkZBIFIqjpg+56nFH8QwOR60APB428D3xQSAMkk8enakXcACPlPc5waRiNxIGCf1oAeSCBgEjpknrS5JUgqMgjkU0Db8wyc9aXgMDtKgjgjvQABwSQAemacqqV4HzAZJzV+w8P6pfsptbV3Vv4zwPzNdPpvw6bhtRuguR92H/E0XGcQjZIXjP1qdbaeWbbDFI3ONxG3+deqQaToGhxmXyrdGjGTJKQziud1zxfpTkCziMkw6u8fH4UXFYx9M0Sy8vzNTmnVi2PIgUE89OSevt1roLe80TRot9tYRo4BJM3zSYHck1xsuv3kzH5sBuM4wRWVIzO2ZGMn1qQO9ufiJIRIkEAfjh87cfzrkLvWL27cl5XUEngHiqKnJLbSwBycjqfWmhvlIwABz+H1oACxCDGMjHOelAHUkFlJPIP8AOk3YBBOB7jvSeYpJYqCWHXtQAR4JILKeMkZ6Go5CrL8uPypz43bc5GOnemhN5+UfUZ/rQBGzMFyQo9zUT/N8oyeep71NsO4gFVzye4pm9RyxAbnGelAERDcqchxjB7imkFuCoYnvnGKeGy45Jx364p23KFjyFODz0oBFdgu4qRkY4OcYpjRvEcFhux8pHP0zVgAk5PQdh0qPYckA4zyMf54oAr7MgDeGPVtp4JppiwuDhgx5I7VbKnYBuGPXHSmbB0C4J74x9aAKiou7CKTgjjb1pXjXexOM9yOAKtbFUjjGOmewphXKlzjnrxQBV8ttpGVbuMDmomj2g5Zjg54FXtm8AAgEHrmmvHtDHHT3/lQBntGVGTlgR0HWk8o5GeR0+ntWgyNtCDA4zwOfzqNkO3bhsjp6CgCl5GGKgkZHPvT0gJbnIyB+FT+W/QAnIPbk1KARDyz7yeMNhcUAVVt8HpgU/wAgr854Pb3qf52Y/Kx4xk09EcLg52n/AGetAFf7PgAENz/OpBAC23I49Ooq2PIOQ0bbuxB4pilkk3RsVKnhqAIWtSAS6kelIIR93apPXJPFWGLO255Mt70wK5bcOc+npQBCYkClcZJ9OgoMCbSchgo7HipvKYqGJYp+VCIw4zknp8vNAEPkBcEDDDvTY4sNkAZA5461bCsSQRg56Cn7SFGE2jpweaAKbW3mOW5wDz2FSrYbkMihSO+DVrYTzkHPQY5qVYwy7Svzeg60AVEte+DVmKADKArn3PFWUjJG0o4z3AyCfepBGQNpxkcdBVWArrAduQAAPU1IkR2lhgKD0PUVYREGeQ3HQinqvzDAB+ooAjEQ4AU8jgmpoxsG4DJz6VIqLs3DgEYY5zQq4AG0g+maABRk5JbHcAVIgG4A7vXAOM0gUk/dOOuPWpEjTq6BiRxk9PegBq4yAGOGOTUoDKxLH8AKaqsh2BhgZNPRQ5Cljk9jzmmAq7cZPCnnA6ijk8lCVHTBwRS/KCcJgHg89KQ4BAHHvjmgBH4YqOQOQfSjkMGAyMdDxn8KUjnPVu2Rj9KCpBB4CkcHrmgB6kJIpAU4OcHOKcY5cn5R+dQAurDIHAwMDvS719F/WgDAXAC5GeOOetKSGH3RntUQJx1oP3j+FIZLkAcgnHpTtyg4A5PWmbjk09fumgQIwDAgggHpQPly2DnPOKD8qEDpT0Qep6UDDIC4OTx1Hen4bZuOMDpk1H3x2xTk5faemaYDmZQRnJPfA4xR8pAbdweuBzRGSs20E4IwaQjG4dl6UgJEYZGRnngj0pdqbSQxJJxgHI/OmnHlx4A+YHNRliSPpQKxKWKsV24YHnHU05Q8hO0FvQZ6Vp6NawTXaCaISAnoc/0rbt7O2aeVPJQKJdgAHbBoCxzcem3Lqjyr5cbHBJYEj8M1q2umW4t1knyNndDnd+ff6Umq3k1rJNBGw8scAFR61gyTPJISzE57dqVwOnvNbW3UQwTMuNpQ9wMe3SqMvivVigSO8ljUc5HWsF2K528YB6U+IAk5oGLLNLPN5srs0rckk5JpkY3FmYZ2jgHnFNxuAyT0NA4ifHqKEgHsAFI3g44B6ZNMBJbHTHSkYbELDg5piuzyJk54oQEjyFVZeQW4PPSk4VcjscfjTGOHX60rEnnuf8aYrDnOWByD6c8UzKngtwe+KRzl+g7UqoobbjgmkwsJv2ZHzNgdx+tBTkPkkEjnFSxMQ4XsyYNQ5xGvseKQyPaM5UHAPUGkKooLEtzz0p5+7+NJH84+bnmgCM4ZSQep9Of/AK1BQhSoII9BzUjcNnvTVc7X4HSgCNd6ru5I646cUw9MhmUE84wTUjuytjORjoaSZ/LuWVFUAY6CgQwgbQSASeOB0pzIAvzF8479R/8AWoU58zgfL0pf4CQBkjrQNDU+bjClicgjr+dJktvQ4xjBJHNPx8rA8j3pWUKy49aBEIhReg2k9eOtATBJVcn3FSSL8o5PJ/rT3UBz14HFAEHlu2W+XJPPHIpFjyOmSeuBVoKNxpQAFXgd6AIREWXaQACvUdT9adHAHcDOCT1qwhwhwB1FAUHPJ60wK5gCnyyQdpzn/A0/yVZtuc1KY1L9+lSCNQueaQkVPs5DEjbx61F5JUEsmDnPTnHtVxWJXJ5pE+aTnnr1oKKWwltwQjI5yO3tUnlcDn5gOmOtXJSUwVODio9oAX6UxEAiOckE47UGFSpbBBHPI5qcKAkZ7svNPRQQPoaQFfyUK5J59qciEHG3J/OpQMSIo4DDmliJDn360wGxpvIyMhT1z1qVY13FlBGOp7ipd22VcKuCu7GOhoZiQpzzg0wBE2Op3EjqT3qRE3E5ThRk46e1N2LwMU2FiUjzz9aAJlQcZ4b04xTtiMg5PU9ac3Ckjgg9qd91QR1NAESBcAlT/KpMI5wWAGDzjNJkuvJI5HT61NKoUjHoaAGKFXABOfX0qXP7tQ547Y5pjzvIo3HPy01f9aw7FhmmBLtYjdnb6ZHU56Cns2G3kgnPQH+dRs7KYwGOKcy5Mgycc/ypAOxkFmb5SMjAx+dKHwCN3HTAOaib5V9enWpGUefjsOgpgP2EIScqSOO+ab91CpGc8gg84oHQ++M01RuQ5J4JoAfvQNyD/sgNyDRtj/uvUZY5I7Um4+goA//Z",
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAF8CAIAAABJw4Z7AAEAAElEQVR4AZT9a4+sS3Yf+GXdsjLrvu/7XPp0N5tkNylRw6FFUdbAhq2xAb+yMBhAX0TfRB/EhuGx/UKAPZaAgYQRNZQgUaSazWafPtd99qXulVlVWeXff63MZ9c5pzXGxK79ZDwRK1asWLFirbg/a89fPL69vb27u/Pc3Nz84IMPPnz5Ac/GxsY6dz8Stba25nV0f79YLK5vbjpW4P39PZCtrS0h19fXA6Rw2DzBLEZrV1dXP//5X15fzyW/u79eu7sf+b9YjEajdf+5Bdz3/vPe1XO0vsZ/v74GPz93c3OTH0nX1lAV5MDX7hf3dyDhQe/GWoGKK8ICqQRrhWotSe7X1hejjdv70Wx+M5/PbxfoX7sbbd4tFtvjMTxwBuwuxKfA64ov1Whzcy2e+9H+/v7Ozg5i5vPrwGxs/N7v/d7W5jYOJK+7u5vZfGtz/fDw0Nv92t2jxwcffIypL27vFq9evf7yq1dXlxKunZ1dfP7lq1evvjo/PkHg3t7eZDKBHM2Xl1c8s9kMD6/noWQ8Hu/u7m5vT9fWNy9nNxsbW3diLq7W7+6mW+Nt6W8Xa9eLtYVaRPbaZLwjyf3t/dX11f3a4nZtAe1ibXR9v7he3N5tbWxsb413pl5vVeji9uLqUh3JaHN9bQOPE7x4+vTpJ598olzY9+jRI+SdnJzc3dyqbhWNV1sbm5Is7m4UvGsNN9C8trn12eefv3nz7vHTJ5gPXuDbt2/nN35v725vlO7Zs2fb29vYCJWSHh8fX15ewinw6OgIAFRo4I5PT8SO0HR7O51OX758SUYkef369fn5uWCESYiSJ0+efPTRR+uje7x6/uwZzMqm4POrmVLMr29QMiLUmypWOdcvZlef/vrXQb6xDrN8FZNHQrKB4Ob8wcGBGkfJ2dkZIk/Pz5D36PBI4P1ogW8XFxfo2ZtOhcjr+fPnf/eP/+if/bN/Bgl6ttfHiql1pLC372V+o9hIeq/ms1//+tc7e3vzm2v4L+dz1CqQGtzbmSg15EqBGPRfXsyfPnm+vTX5+utv8FKrOFhbezze+ZOf/fFHO0/355P92drhbGP3enT+2ZeH4+mL5880GA1P4uu1tb29g6P9o5vL+eX51exyProZjYjt7sb13ugX16/++S//9D9dv341Ors/mC427//+/+qP/6v/7f/m/PLsl7/6mz//83//4Ycffv75r9+9e7e7M8XhIpIMbCGMOKiF9bXNs4vzk8vTi5Rilta04aFxIn9td3eHd68SPjo8UBaSpnT/4//wP6j03WmYfLizD/NkMwK2vT3e3plGjDfWT05O3x6/29iUyeSO6tAu73Dy/mhvl3rQWiMb421VezW/+etffnp1Nb9b39jcmKxvjM7P3u1sjz/54Q/Icymwxc1dNIk2iO1fffkKto2NMQw3t3cbG2tPnzz50Y8+IQYp4HrUDhWhKr/55ht0Juv19e3pBIXqdHS3+PDZ0zXEbKT5X9/cgx+tb/2rf/WvtHRlRD8wMgbb3/pbv/eH/+Xf2Rxvnp4ef/nl1yT8v/zDv/vTn/708uL6s88++w//4T9i29u3rz/Xdo7fvXjx4vd+76cvnz8d3VxNtrcuFexu8fzZBwdHh3c03/39f//f/3OpFjfXsqBwfsh9/IOb2+sS4Cg23N7cmpLPV9+8Qe3d2ihlGd2Nx5ulOReH+wePnxyNFmnRt3d0yd1obVMb//Kr10T60dO0oPHGplK/ff0Gf37w8Yeb441n7MX9glaF//YmTXq8uaXuPvjgI0qPXt3YIr2jk7Oz12/f3N7Mz07erqWG0mZnVzez2fXVDEtu19c34MdYrVhbw975fDbB9Y27/YMpjUn5YAKuHr87JXV/9de/0AY3peGUXx5qUXQ/lYFnxL60/SB1RAMMfVRg/O0AtOtXsQ0gkAcCSlROCka18VOHRI1dCnwBry1/wgJCLMtElcWSKxDMaUAQ8SR6lCdDUrHro1DOuiW2qImnABFNuQoDnmclXk+NSdNo1X+K1o5Ml0eigJJ4lFZZAK8TskaLaWIrKwUMl/Isq7+xHq5SzB5qhl8Syo7Uvvnm9fHJueZ0cXF1cT5T1J/89m9fnJ8V/hBMrHFeUaSC36somfLjoZJU+I2MmFZgrOTm2oZiMVbrG+tb2KBksZt3C6bk7k40V9IFR1UiSSmEef8NLvlKAjkwDheQEe6WE9ulXuhlrCBDTyEHFjYNwMu6WiaREBg3eAacHf79ZwNAydNoU4crtKuQUCStV+orz3QW3ufFryAAMATb2Cy4+jX65a4EuwRHvwH/mwxJ+D0lD/MLv5YpUAh5xnUOTk6swHZDYMLfS1bIBtDIG/5hqg5HeZeun+AhaT+t2Bg6uw4cbYwoiPWtDWpd9+jufoMtTivaGp9cnF9/eUPdTA8PxuPp/PLq8ptvtta37q8X0FL9453ttc31642b2ShdSQQgUK+PNmpiBH6HYK/tGuDhU/jw2rSppXiq09n8r5pZClKXRRIebkjbeJSRRzgpxHBKeWNzzMPYdFTsS3rYqU3h6gWxF4z/bHZ+fnGrca/NdGwvzo8XO1NqUSo92mRU8kOByrFritYZiEcnIkUJuU+DjjYCFqtQPYzQWqIIcnMrLZ2odZGqrnR99QKq4CVvzUPIeAjb7HqG87prtMG/+3f/7t//+39PG8yurpUSgKzZm90yQhqduru9u4Yt3NtYZ890CBZ36d3+g3/wD/7Nv/k356cnjETKPJuxMU+ePoYk9BkQ0ACVIy6l7CGfoGjL6+k2lQJEFb6noOUyDighlx0cug76vgze6ekpYridnSn/waNDfXw2RlodshfPnsOsFFTcp7/+XP9SMS9nM6Rubqx99MFz9S/TGPu79ZubGAI49d/kFYbMZmmD1VXVV/nkBy9397Z1PfUdoV1oZOW8prqbUBQLbGHyTHmXFZYydwLRKXZVZziyantdE57ckJYHcIDubmPzmCrNW4PLYEhebR/ST4hIV5vGK10AqQQFF4Usi8WyreIfMshb6EzWNRIquVE1mC/eQCBQdHpVAy9DQ3LyD8K8i4+YAoIDHqzcWNuQokKSGuVqL4hjEGKuGFuUSFOmJLYkyCrEs13hDNleEYEAou5VgVN5lzP9WQJqTPDNN29PT87n85vx1nTvYFd5r28zYN2e0CsZKMyO5/qnqiqYo1nzDE9SjtF4ewu7jLmQQTUj+yZSeY23irK+mbbDrK3huMwVJXKZIqf44cL9xiajuH4XY4PFiCxPeAYsxg39cCOppRYB/CGmihxULd+FbmscyQ4t1UTjKZILPKi4hk/eK43WflGdqqAC5vU7TlQVdGmE+IE1zkQlch0DBPKgE09aRDEWqs6oMQtRHcXSMCOvBpf6yBtaUZqN9kaV4DxUNIInf+spTaXq8RI8wdidZvyqecOgSRNdwOiRI49AyDvT7xcnQEVYRwF7CNzEC2xsorj471W6Pmn4jJhOq34XanpjcbuhG7i4vl+b34/m92vj0fqjF8/0Wgnh9fr9Nq6MN2+u7mc381evX+9tTXYmu1sbYz2cG6q4DLzyoVhGCGjZwkxktGuCB/8y9wc/opSpqS1/ar4FwXC2OQkcTs4rJ7SLBnJwnZHwAbcofsw3gwCzCQMhRlc6wDyopYOFF1gIwBx1NF/cLW5n9A4GTMfpXkDSqKoZtWAmOXo0vCTMXMtIjQvpWqBoZKNcTYzkojwRX+GlKyrrxtxgSAVGPIDxCGRKeTi07ey1gOUVlXQCA4DgzY0lPE092d1hKiJga2usV5QisdUJ2dg4P7s4PbsgqI8ePTGvg4xf/fKvT2azH/zgB4+ePnnz7q3xaXRPdbUVqjkjIYTh+Rp1NL++jV1swb67SelAcqV9w/kuPhhawZMOvLmdI3Jvnb5aO377FvFPnzGOT6fbE0aUpTRgYpBMFzFX4/EEZzWl/b0dRUY5ToQha8bNo/OLTOSkHZV8AGNSDL4lf/zk4IOXT3Z2Js0xjK9OCdJGu7v7ChtutsOadnAV8Us7IRBAP0WF+pU+Ei6kXSPhHzwdnoTELNowanGjOt+4Fi3KzKRrEssTSaRDlgOZ4KCKYM8ASC4EpRoqbBUgRWDg1A9AFCBao2OhCYlRxsxgAEXdlZ2T+Fb/s4ATIGWbwbKXSR5qaXg9cZp/XcqwogoFJVHm1CVaVRgC+CN3NRUQeiTb2mQGJbrVaTeIuxuZ97u9/VyH4vjtydu3xrXv5jWhtzOd7u/tsYUG4CoVZiqy58H41U3Kg+KQmb483GSWgq4yYYmCYh3qdDhv5JQg7FzcLHSzDbf0yCjSTJdSapCADtcKbeo0fnq2nv2aOqguVjKuWhZegVU/xYeG7PCOCsIC68DkVC7hVWvA2gku2OB8D1MC1lEd288GaDKKHN44vB3AYiIquacoVYEPPB0IwyrnKtcqU4GQiKUxsZpKo2KEeIrCeRjSzDY2NCqozAdqbNoVAI0TQNMGBkDDy7rzbfFY0lPV18DfeQ4caGKGV2D89RoT1a8wo11229sTNX57U7UfwWdxFvqDbJVBokH37do6/UNZ75mbPDoigNTM5f2tiZvNyfbhdHdtFvtkYGLujlZeN69mPvaaCrvRozT/IetwuMY0SlGUhPZmab+2fwhEctMpFpGS8zQMypdtpPiTxjKYq546BreqwWRTTvLBdb1oPou7TJPOb+dwpsu2vkbBmdpSRvWiIqIt1tdVk97L5u1iPlvMr+82t7aFgJS21fLNbfryW1vbqp4xQN5ksotsLFD5YNADT+CrIevVCAEgBDA/2rxymXlYOXRyYj1p26OjCIkkQmppYNkBQup8fkX6ZH14gK7MYRr86cI2vLT7R4cIkItSb41uM33HRednoClf5sEwhR/mDz76GBLFZ/bYJEikZceR0UZLUiFtACiHmVng+Xy8nT4Ep55AqnodW6WUaRRg1UiE/+4ecvLPrCLm/p5Ere0dHppZ3dvZvTi7+MVnvxCumMaE+ByMpWCCUweDEstcRcQDDPYhHkIiIIcmzKjVfLR8Ufjk6eHB/rTqaixtuy6RGejbxc23es1NZcpZ6ljWlHHXkCcpBiCuwVKwlZNk5Y1+55cTD8jS5Jl7i0nBcI/UsaiAlTarJnGXthleUZclBMkmdbR8xpKlzZR6BVStOgonOrs61yk/+wIoeMB7DZSf5Bw8dL0oXc5k0vCAeaosYGq2ICZVh2LL/431TCNULA6E46txRiGPZHBBXbWOJ6pIqInmmwW9sdi4MKOdBmaK9quvvjILPL+aq9rDg0dH+4+2Jltffv2V+oYcrfKC3yweV4i1mWD2ipka4doG9Bk9qAaaBUWRxK3R1r2mtTAjpKC6ADDod2fsRZ1phFGAzBf7zXqmZ6AfVax5X4Nt/hPKlbzGbJmVV4k4xvimrXuqNfYgUJ7QhtZvO0EtAyiPSBanGqRSVeJl9QUPV1AlBuXvEE+plrFqZMXnRthJVjnkDUDYaPqt6avkjcGTZg4MCaGGSsIAM0KaoqrRZWWHtH8zAYtxq7OMbk2gmWu/PDvXFBUKuy1xaVeaHAnPstXpmRao1tJKi51SWeGDRx11EbrsTYmQdt06+IXzN8zw2jBQaRVINstrrMy1+FF5otKCZGkpbmsjvTE1bhyjnRosrt2/ene8Nbq3onMZS3x7P55sTbb3dg/GN2vXZyTuimY+2Nu2lHJ1zVqZkJkbphOvm3XjSEs/yw7ZUJUhZuXa/zAkpauC4AY6u7AAulIwFuUY3k44xw94KGkHrnIIWwYkPHBeW1yKaC80E5MJ8LVIyBaAwRrFA0nnuDlaW2xaiVDXWVvt3KPA3zP8Gp7w4f7edBRKrm9iqw72d7FBVRN8AlVlyohNVYOPcvFfA6/5q73dnaPdneiHNE9tJc2Wh4SYukRtZy0LhPV4/fLilNSNtyZ6nibxTo9PWAXSdXkxazC8UudylBzZizKuTbbAlD09JE17/e3xMRhritybN2/kG/FIXmIzkjNtKJCnqSK01EYjsSbFY0ZBV0e+YHVflI1fXplSur3Wdbb6q+DAULW3Z4F1Z29/Z/9wH7N+8YtfnJCxLTpsm9x0rIJjKR2tGXVxWGUrfMUApMG9rFlFBoCwsl6b5C5tqKya/I2lFS2UxATJZDyZqpebZZcwaFaaouGUUwjoilkWQ1TaQ7kOVw1Dwg4H0x7Plh5qDQeNgiicNKvSmDSnpN09Uf3sDHjmR/KQqfHFnmXYlRSeGVVQsxnPRu3oRAMWkqk7ijnLupkaCsvTdGO0opgjYDGblFVUudVlCWMByuDF1EXLlF6tXBIXOjOnBSnP0nVL89KF6rmIZgLKQ3aJLDkmqbImVWSLTGzF/N2bBnz3Vg9lTlafPX6me7I9nujSGElLaAaA3gTM6cgYb3lVS6YpPGUqI4xnifSBbRiwRqUJE+n5+t1GTNft7nQyLqUZDZAOdAqPPYqsfCkiBvhZiYv39r8P4QvPl+akM0Vba+GqHRDBwzVTBH7fLeWxZTMplm6VML+dSkQHeuXp1xX4t34brJ8d0RVRFaymlpUCoIUPzJAFz7dwxe6WWFTb0D8FwFbhvJajdsRqn6ob57u1m3tpfSej6V46sKK0N1PVre8AyxqesKu2BcEmrdeamRZTrmKbmmWNFJ0Ba6lfvXZJPeXlKd9GgDz+20XGfBy5SpelLLHaBlMNx84XnTU643bNIMM+AK3Yeur9xs314vp8rh822d3DqZOzU8Oy+/G92afbs8wMwxmxTxNY39xOXk1b5fatR7M0NMQtS9fAXhpU8mYjsnmah82rjsKlLhcUA/ZCuKzT9ntWRaRuSTQFen9zzQJKrjI1L9hkYYJdizPIMJ8x0xW/NoicTydU4XvzKRfAEIJUy1BBsrFRpkKJtzN3AiYZ0WOWZ0vdg9Guy4VgKISTgelkOxWH28a4KQvcsWSiKAEhUEEoSRdECCZkgibdUFlkmYQ2F9v5NjCTBMnR0cHHH3/87PHR2cm7r169RrBymrE0roKQerETwej+L/7iLx4fHf72b/+27Uiffvrp3s60axBOYBxSEQmeLemyyzuGrQSVkIBUVptmQlNKHIczdNT57Nz8nsLb4iS7o8NDqlW32xo87pkSZ9LuaaHaMSGvq5p7iJYerakRtspGDFLf0p31iZUzEESVHJvn2aFSTs9M74oNpeH4Sw6MxbJopRozPd2s9JSYK+rFBpepM44HH5VeXoROiNgOl0Un8eQCvUIoKnWQARO1T86yCym9UKmZf12IjAAiOgCahiSOIuYyFMiuvxSUONijYiYXZAZnPYSCuW0p+wM7C4YW9k9u+C93wnZnHDLKPpaMqEhqTBohzLihc/SszAhc4lEmodIJ14OpHFP2wQWoykUd8Q+u8UAlOwxFvK2A9nrJeWaW5ubm7NQOiyvd84PdPXtyJtnQJ7/RoyePz2dXBHCeWYK5jMbTyeT25vTinH9rNqdDO3f7DzUDmIm7tPKa3c7vFuub8ljcbOtspssVFpicZK5UlgmeZi54rDCuDF+7yKtqT/HbqRX1VK7LBZuY0LmarG+/wBaPju3Uw7MRLvFUaIfwhrxlVQ/g/ws8ch+gm7aHcljIl4pGeEeB77I0PYRQeyQLCi1EFFPEVqkgTNaedYFbkeleaFGchg0J5NFBFg+0SJsYF3fWfomyQFxNTdVaY0l45Kezk7AIWRLTgV2KFdOXOkVUddyX5ctrKMd/FEuhKYXmziXNsN8j9tWuFzTC/a1utxWoaD9zMGMd063NLTpCKs1Hh/Xq5mp+cvbo4NA+U3M1FxcnlzdXm7tb97vja7IUY2B3mBXQqDOCp9TyGji5JO4//zMUENEo5Eo7Z/0Pqg7x7JriaeZ0Yau8XVfLZ+cjFhgy9Lj5Neoo7iz3Bp8YHBxQNUJPaTtHFapacUBa1gIeHhkIEa5TCDgTFPouW9u7On324Y1jYGJsaqa0CcPCSpWCKAnt30hAcg3TOZIlUXRW6CvX+cqIx/zi9Ww+v2duNdkxJl+cX12cnXfNAoeq4a3WWJH6o//iDz779G9u7/7D119/bcQhCwB0xYus5dy/OzmxuMAyK4uOVwzM6QnRkEtRtWwO/HBy9n7RQoi3ONsWS1nE0g2yVgxRxNvTJMWXX355+u5YcViqly+fg2fDvvz6c+VQoc+ePVEZ56enCrt/YNC1i+OyHm/rGJFG+pa9MGKP3u7S0ehNvxxhaxgh4cziukdGWtLtXEGz8ZjfoJpfjdUejVq7kpiTazvs4KCDqDurXgNQ7ab9MhDSeQuR0POhS1o23DTFrZmJBYOiC0jN4ksZLIMkybJyJRWBDAXx6yVNCCD0pr70PTQ9wVmEyaAqYzDpMj6rZ1oXhFZsashlNIvpRUxN6JnuT1nZtdr7ZRAS6yjTHn2VqYxkJ2f2KwMS2iyLj2n8uJ+pGCSXvu4SdRmFdEaeHBNR5EsSPhgzkYxqF2lpBlnn56cGxLLSUrMOaZZpbi9W5iZ4CATkBI7sYvvmZiaXOU1rsci2GbFy2R6n+49MdK3rszCIemcKU/NcNicbdxEjXRK5YFhUpY5YbDW+hPzwrSoatTgXzufxbYfxYUPqlJMMLrmXbAlYhse3coFb+TuhZyN5EJyEybfcw/DB32j6OQR+H09HNZhYDnleQ2nl24GeAxIeAA/CvcVJogOI1fimgPxqQQXguU66VOqx8ciiXz1f27x+ekbTTaZRYXoeRloapKiqvvRbYZYwJKWxvXeivIBEUJ4rgtvjCSChDzwN36VDRuNqGE+WZTyyd9jM2t3aPFvbN+8MvjfOz8/sCJ8wWkWDbaoWJiCnvkni6fm5sfieHYP322fX56/ffhMtdjNbrOlrp+VKpW8JfymL95z8Tu4DGTwdJW2SFwbUtsMWxYIxUasnT6canjyVOg/VwcE5iBZ4Tr8Tzq3N7B3gVIXd0sBuSp41EFp7MtkxCzS639SWd3fGB7s7KghyWjhNSv81NGRAZnBQJG2llW2YzEgDFGvfCWdaBngBZMwEniMkjLAJZGIjMBRWoUI8JVcGPnTe3REJ+UIoX05gWmrtlkqnMkqDgtOnNJhaME6ZzFlJKY+89vcPTT7P5s8fffbFF199ZZMCPNGD1RsohBsMwlevvjHW+fGPf/xbP/nR5fmZeMk5YMhAZD/B2PFlRx97qcciVg1sb5ohzBgbV3Vo4L+4OLPd3N6In//85+cnpzZ0/P7v/0xZzk7PZvPL8QRdOxCenpyQPeLUBUzy6nDrBimIuVmdO3PRGGjtTXLULs1V1a0QfY9wb/2+h64AmpmlcnXIlrNK8kKqXPJsCM1S8UTIFcUoEtdFBlAFSw3zcGCaF/wDU4A1ZGOXd2ffqGiBs1MzjyqEob/Tr4gZywxh8TSVWLkVOjYy5ci6ie4+K20qI20o0x5rm8bO3jOaNgK3wBy6ls3JL0wCUGiqWwc656b0fnsWLBYOhvUNu6QyoE5taQ3sSownKWCm0sPIpFnyLzYpY7CWRHahhIhqjvEroKGrWlFbAJqteijVizE5fkcadacscBbfYpYA80soM50jIfDDGWu06gfAIwvdU1moEVGSoOry6pw8mZrXC9QKS+hDK7KzzGJPcm0fsczFxmORJQvloqFy4smkPBZoVvdrGauFu8pyq2njp11UcjQfZt3aTIumpSlB7WQXlpoOCSey4zAT/QjWd0cPNobmVf2FyNqnACDYVvNpXQqFYqRxSdlB4gM/SFi8YqAQfiHgG+zm5Jjn5nrmCaG8RMHjlYfQS4g/QqTqWCEd65UfGI95If5eFdCrBSy7JFksbGzSpaCGTAX6E6L98NgLYw5EWp1KFfrN27f0DTDEW9oQeHV53nTK7m/93s9AoiTjs6vZ1cUlMJzHmfCo21sdMFBa8Fd9vur+Hp5quvdb4y3+EEn5LmkzirizzpFdeyPLA3vwiKr2B+edFaGb8+udZ5Ox026LtU3LOxdzh39+9IPf+nJ2eTO7evf2kki+ePbycLp/8vaEkE726ZpMmlxk+epmvLs9mU637mZmBWze2lo3zLpYH+XUkYxkh0XK7slPpGm8x4+OaGrEKC+PshgoAMA2xEsVYaiFJUgwwROkUQsYfoUVyS/QU3KBnpwogdZ9O0QsdT9fZMEGZhvGhJPwx48fmwxUEebMVZolOQBeM3g4PpXE2vD6eNvURvrs1rAidWmePM1AmFWLgTVWV2AIYzTIlCVleYGfaAjprGB4pssEoq1rP5ojO7EWjx49xjPqFh5u4Jih+cHhI0AyEqhaiW44EYNlI6M2mHG+brwiYymEkk+22deRQpFS9OC2HI2fbHN/+fKD12/fXlz+CgFmr7W9adlgxsyIyhoqCtH8V3/1Vx+/fKkU7Fow2yF/dfXFF1+g4fjsVBHUOJuSLTXlqkJUicULjcg2nSg9YM44UJDGpX/793/m/BOSGLmd6fZTG+VLNysYSjSqy8tzW0xRG4Wxue74lCK//PCDE/vdD7KTxNADkefn6YLv7x3hCXHa2Z04fBYm02Dr97b1WyrDJSItI70QiyNeEa9cCLOG8uLwEfjUnwy+77AP6eFsOWCpkGp4/RTy0IF6+Lr0ZwrdtupsTGOozASaDeNjt3Jas+qMwrQoQw+m948NWU2SaXX8WRv+UBgbIoFJLvBJHQ2QfoY5N8MH68tGRBlEmB0LuEG3dHQbW5cy9BBDTykipJuTvRTegsLMkFctOBjz8BOdmFRUJ4sa/bl0iSwXgBXripICrq60USHOctqPHoSU9KNxVBrkZroMQjotitNECw85ksRTdUaCV67yQUsatl4LCSD6ULOH7Mr8yphZGdceHx5Q9TDoDlitMDW1YXV9feP26hpbJGThJ1ub4Y5ejI3sGZplDz0xomRT15pgGXt5yVxIuypIWuxACQDOa4PxR0OtUuGhJF3MwBXl718LUtqOGnC25/vPButnI+mkaAuHHlQHmCH5AN8hADVCjpCRNZ70/8qqFcdiP9LkakeAJyZrKh3Sr9qVquF0qwELpIi3q9sITMjf/M3fgOHBTH0BTtalZ7MAhlqvkUzc6KosEks2l2RXaZYlaPofhqTr9S2Xohu6rRP/6ztTyXaJ7I439+42Ny5vz09effDi6OLcNPP1zMHyq/PFxva2Mo+3Qv/WppOzLBM1S2TIwOX1VXov9dqZOONkokqKb+X54KX5j84hTAjJxAGljD7/tusSPYRXuqGAD8MboRCxWOe1MWWupVaMAlzbAtWgWPlymgwep2mkL5gUOvQ6YabkU90rOnna3zhtL6B2dR1hoOMLsEFSLr4CS75UNqmggjtHGXFSNZG397k0QBvqWHLSFlRyfiIhtvONrsif0qWZ1MlMJ6tirowLzdzS+Aw2DY4wCYkZMHXEbLx5e/zm3cn1bXqroiDfmUw//viTI3tmbA6czc/O371782bqrEtxD05iTCyR3SaQ7iStOvCQk2FcqUmaTSd3UW4G5/nzZ4Z6sRPrm3L85OOP+b/64kul2NjL9Qh2kMHgEFc4UwtD0dvVlQkHTNiuhzZlljAc298zjXl4+Miu53dvTyDQ28C3z379BRV8Nbv46KMPfvCDj4z7JMdSRwjpP0yAxKuys1uy4udJoxrcsqLqB8Yq81KtwMUJxNcB/qEHRgBC+tn8wo7MwGWtoOYDmZmSG/tQWeOupDS73hMgjW5Xsi0gbyyd4RKIMmCGQKaE7VCEjRwFDKMyTFL9ND+BMBqPYfOX+VvUtLFh4hJ6Tx0znW19itRgbpqRDV8TH0xyqkjloimSVxEFgGsx7UAAQLB1iaeGGlkhYv/MXCAvfwoZPIbYsa6J8ceWdynS/LAphwwMgBC/tWndiydCLWn9gZGTzRc6vrr/dwDc22B6ozoBjuiTbAM1gkhXolHHx4BpZ2tHoWFKBShViK5lkKCjxOHMebUegLJpshhK2v4Ub+X4260C8osh0VArVxyqfJpByza/SlgLyyvY979B8p93K0z57TrD8GTUuQt6YAPkRENxdqHw6/OLRSK/mgwvvleVorTeNjyNSnLoI/C1N5ofgMYme03RMr3XxU7tkMZHPev5DPNJ31//1S+sHrMKOomIVOViuSHT5pXXDnlY6IBVWZjUtk9Cls2EJx22lLTEwZO7n25N7ArdiC6/Gjm9Y5R1c/rq6zeTH3y8uL/KkcJ1u9TPF7c72xvGb+OzucFTWQCIFhZPdbXt/7q+dR8LhXd/dbPlEFfmxAgSQBzwHEjl4VAlvP2eoaNCom7IHvhaWMJDr6mJh3ZgVVMPkcDwHTeg7RYKgyYjUDe+IDMqUjURA61s6Zamy0hb7RiD57KXu9W2lJWodEbMG32Kz2qtTYJRtPplRJjdKjfGLMsO2FBMwsyjuCxm7iTyOc0t/Gi3ZkRCWiZ1miThO7v7/MrIbnlFbbuFc2NWm1UMU3WT4SmnPgzIqGPTca66sckF8zFTKXRehfzyV7+yVgqA1USJ8P/4538JoYUiy0Yfvnj549/6YW6CePQjK3VM4NvjMyxxzNdTAbmr65htetB8BCkVotQufYAE5eiMwbvpoZXh6fTFBx8JkZ0VrHdvXrMxzB5sJCzL/7Y9k1UioLI3Iwkde3h0wIYjD1oA+Glfhp7c3/pbf7sHTPZrHBw9Mgq8cOL54kL1QaGlxASOx2pkfzdtx0ZHFgsSZls4Sv7yP/3HcAZS7qGshPEriaTShldC8X1gCQU2TKfqZyMM5oxemAgdjzIZUcFMTkYVGXnoEGmGGVbpQ6XnGwaQ/yiV7AIseZFSS82ZImjp6BClhu30q31B8SZhbGE5UN6cg625QLiZuAAHTwhTjl4WkwukjOKmyLJVaIxCCwbE0zUZmpSdGFA3E/q5zHCVa8qbvGVOOUbfkbkeepPdZF2TOeK150ylYV5JuYoBQBqE8XglW+2XhMdrO3cmmblzG4GlYTss6rwNHEbue+qecKtsgy2H2E0DGtVRYxgZLVjtHiojs4xfs8czrYsoSChrpHI6DGC8ck1AgyV9KaYusqh2MCS8QvkDVsaigcEISWA5nk41eB6CNXADfAcsKFZuABPQyT2X3EmDXFpcgegRTtI8r2a51yDiqAdFNO4zV8kNxRyqQCCGSAI/j9culyenWdI+2iE/zGnzDgRZxrC3Vz/atO/c7pjM8GjzTVWlC2d6iRZb+QV6tkMqx5+irVpTA1RhPTo+4F38lEMVZq3XuCgj7MXcSOpmNJ7acLqYX3721V8A3ts9cvx0sTi/ud1Zu01Fo0oXEBNUOH2j1KbYt+63qDOje/M6mRoUlo7emt3V+JNcVy6ElnJAXvkiLVy/glJkxRvqQ7gQT66SplJ4QEqFnhXi5W9FhT8DTrCqwKvRL1ld38p4JRMR5UILhVJTWHp7uuSc5WHFMs/CDnNdxTBwD7MTrmUjD1qpRMkIAZJ4iuXE8qtNdc0FbU2BqmUZp7BltWFRLlYaEtRRu8xPZ92TJd13KQpy2hmonlOpuehG1XF9PcumCXqqlrGbDIpAoMO/JvQuLmd6smUmsxZlmhqM5UbbO+l9TtTzp08+/uilnYTr6xckczJNBwsx4CkcubNkuG5OEm1IlZci0KgkFhMur05tS0bVs2fQv/zlL3/51RdfhJ9VcPCAS9OWAHtUB0I4Jxfc41Gpdo6wqTL1/OLzz8zv/fCHP2Ju3U/2k5/85N//+X/8sz/7M5w0mvzZz35XvkjSeHTgbdlgLphG5LFzYJBhfRENevAgl+ZKSRT+O9VZzI0gcvIuqpatBbDXfobEcl6HkA7sZ4dHg9c2h+rsp/dIzjI8SguNKUsXH1Z/fg0FNEfqpVAIUqkF0jbPII/KV8tLB6rIRFIS0Agqgtcj2bSUBnMsnRUxMQIBrwQYHYnmYieRExdJhXaFfNneiqLGyBtXwFLFY34IEl2nLPE4Pn6TaTET3BobHtKdWpopTZkoiV9mxryomgCmVagYtU7IlgWremnMAIiLilVRip/8Ouv0Ke7VsVRWrzjbw+yuubq2bnG92KZ8olXtzqDVWPBqHVllIQ0wygjmRt6ZphWF+bS21Qvax/q25dPoR7VSsQDiA6+drugIG+BpdB2ryFwAmrmVwUN/5+hZIL/hIfl3YLwK7LxWmjAGPsUx95IMe6sLmY/VaQe1JPU/uTQSAKYvjHbd8GNXgqfm6DX7V+zEtUKgY2dmX7ejQkwvMXQQykjFPX10RPvjJJX05RefY3LXHRMiXB7563yTZzKPexDSRXsIU4AR5gIkpSUlJc8d1UmaYQg1MeG5YdVjbXYxP3mx4/qaHT2Vn//iP2ng46P9rfna9en8emGobYrmYuvRE3cwYZEy6N6Z9NtYu113mnN7c0prbS4uRzmOQ5YUQRlfffOVHGXdT4Rxkrcnk4llD1ZsNhuvoafv+X3XxVQuUY1Q2jBk5bqA/QyFxWfEsg0gLbJlZ8RmNnPqenS+pDHtuyjEfBUB2BqfBmXtilMQqBqnfHgarclaZHQ/S2BAq62RIjhrT1oUuiTgG7PX11uvYTu7yD0OOi6SCExJm1Td55W44qHdOktT8SBfpdcZSHc1nYWMApFKT9nowb5k7WI0clgC8600m0YbLY7kRcZgo+6ddlJAvWz7uThnm/Ehl0fYPuN2jNmVCZfLcxdfzPDK2AuvDFNkYVsNam/mVxo1HYVRCpU9BLUXwzDNa1fiwYENR5Mvv/r89OwYeQxtHdailOYuNtAfU3sagoHzUgXRCjlqlvsts7eeydcDmLoRNKNDl4swS2Hd69c70z3DJjyxmiXENv3f+Z3fQb+bEh1O1XzAu3SS1H388Ye0J+Y/fnyEbGfe2K2oLMl+o1MFXRldxw0zBHp9GO4VMCewXfuVw2uqXy1lW2DGS4CynmR6MFNT8ERj6qlAktHPsuNBwnI3UsZe0YzgANf8Htmo3NkabVrfKamLAo/6jT6LlMUCAQWVXXQhsdYWaZ5IGJRiEaG9x9vDOkGyXlqmIn5Z0i5XED5wUA5vAUhjyBaJrAWZ2DMNXOvGxAUk+SPiybocogUaI5sCVU9Sq7CSmKhd4i4WGvjVKJxcw+T15toqfNhLwMn+2sipeIrHLjVrz8YMks80Kmc/r9lL67sZZWRDldNZ8qrj8XJBvVygRYCGqpeAFTqUyGiSunRVsq7i5XyL8KqXMH/gAD+HZs8hkEfydu3vqCHkIeR3EooKxnIPwfglbzY2nsHfjGqOebYDE2tmgsLKu4F+5shiVYUDgF4U5nNCutHCo7IwQd2BwU7l4kmnablraz+bAyeTVknUonA4eWgECbUumoV2gkrCZFeckCv/ULQuS6f1XL0GNCxecbKoxVzdiAwXVGl6LSo/92nVRot1Und2M9qa7NkLNz48y216B083Zhf3106D2q6+6X5Yd6yc2lrDBm9vEgOGx8gl40JTSXdrThHPvlqcf31yhiFyqbIsRycI4zAHJU0VCovAVM9Af5gcuHSt2oHn6SJ7cl3MXuBtf+H+1qNxdlCTwa8uyDr1zS8hhz3JvgjwxGfcFitHR+cBqAL33zY8YCEIEPvWIVvHbLMN/v70NEeA3x2funZVrIJrCpxUYXXVoNoEqakJmZk1641/29kib/8g8RIbgJoaYWwePX4qKgOmqnohafuZjUsXvZBnCR8l2qX+qvu2pUUgSu4uAkCusg9rYvu4Tb9ZB3389LkOrZYOcqZhK+w8WyoePzo0dmGf2BXGibmqUzNXjx4/B9kM2SqTbyWTcN7Vhj1ZyN2Tg8qUsB2Vz58/tfzE8n325Vfhg+07FrZvb5gcGWGIWURstj4OLQalvKpkLfcFQ3E1zyAV3p5SEuJAGMpns7l5xW9evanbnPf/3t/7e2b2Hj06VMDoW5NnGxv7+7t2V9gSdXKSM23YhQCU4KrYboCr+bNVO5FZO3SA5q/ipOY4gYRXCH8/KzgPwEPI4AnYCgNbVWufphlMdZVxyC1/OXuVoUZXdrCmAxsMpJr5SvvUf0oWhkuZqkJEqVR1e5uu7qo7TJRq1g6P7J0vLkTbhVhYjEf4q4+c1cDITJyYojFNKOajS5YYIVDHlCbqey4g5Yrw0AdEGhJDLeknaFpYfHNl0ZUiS2eEhlQNSgrUH2poysnOVELh2rkkpMHWBwmVMi3E9uM6yi4WmFzUHFbLlMIVRA2U1RN2N7tZXNy4pBz/0iC1j93drXcnZ+zdndvXFYRyY81M99imVSY7RFcDRkCwp5+UnVTcIABC+Tv35FuuApOi+ePJ36+qvAM7SeOHYYmksmv/8OQZMDSe7zwheeg6VkjnOKTlaScqOIM1JfJUARUiiWRqf1mtSgOJqC4y4IYfMEvLP+TFB1JbUllhWjRmeCKQH9s1zlqCzP1M9Ow4SxtxwVOrXAMqIQlsySkOB1u9loAM5koxuqLC2MEBjqRNxwv7hDbutnbX8je9XR/Pbjeun3y4Y//X+HBEr83vz2/X9tYne/a4z06uAI1szEinx0VF7nu38eryk5/9+IMXk5vnk29GV7/4/Ms3x9lZkLKs5kvlmxxL1/NHWxX/m/72KyYOpEgkbbVqJYR7CAxJM8TzYfIBpgM9EVl995jMEMzwm+kszQsY2u4/8ACG1lMsHTfeeac7d3J6Qunf7u8hWNTAOn7DC66/eGBfuspym7jLVaGSF+QceJkSDSE24HmdTnaEmGUTe3T42Chh3RZ6dbcsUWpZVFPiCVhbFoKZ/HJJybPhmcwsO3bAZHp1dm66T/9WGXf23e+fawzB21NHihDsVbmg4tHCHYYmhDpY0gqUb8KlzYTA9vplbjo3HNWjalRybwcVuZXQU1o8U1POinqiBM7zN8dGdfgGcnabbV/sqckbaIUgxrYI97wEoBZ0d8pmQ66blt3bXNWFkT/GGE7BqSCs6eX6zEoY61SNZdkANUzGAFoOfiyF2UjOpnk4nRVBv7Sff/GVS0gKe3L4DU7ih64h8EUg/8MogZxABfbkX8USIJOzeEK36uKvZTMKra1b56Cf5ivG/EGGVXEm/wSYk7+VjiRjZf1G36zwwwwkCZFRxwXUUwKVW8nT66yJw6KAP6hz4zIZDFnGWZJpB/S9sYkReR3rDUIFUAE15qoCZsNdFdMk2BIbkKFoQcaJqYk9ffbknJ0zeq9VtVpOmoQVgNmlIxS1NHmQLEr4nIThUSWecleFBIhTu5Shv1i7m8zMkDwAFkfZ67NLc8TZIyW7slWju2vzjtdZvsVGpj0DRsW0eq5kN65IYZ2F4mF2Ud5m2w8VBq2JIFebkHlDC/WGjfraYou1kUtCJguGFJE6B/7SHMuhuUx7Fbq4hMIwgL+e719LWobXMHrlYFp5U5vtBD70FLI8QK783Y2g+0PEQE9SZegeLYaZCqPupdIr5s9YGxIto6rYE6/if+BSfiPjchpJI2m9DKcWbsuWVzhFCcFYSTCHUxkwdXHspmmsciz5zZu8+mYaWQJL3isndlW0pXhVQsFIXK7uqJQQW0Sbm2V/I4ohRau5Xtu82ZvqBI73HutsOyswm+z7Ss79+s714p0u6/FkbdunOOYmgDd2jLENxCn/C5txbEUYOyF8+8tff7o+Orgb7bzbuD4+fec0tEIpciRGdycFKtOCg+XwAQcy5VG1vyxgDkVt+hhF6srpCADVK0ImMPVQSeHq5p3ySdhuwDO8Yg8aZM0jll+DctDKtEGrtjT2HJSOkrg2rVu1o1BGKm5NpPpPTt/tTLe0D9Wkbv2HSjlMWNKAu3sHm1tzd0FqVgYO5k7190OzLSitXkLzUkTVuCKjIa6o7ta9O9mgSpg4dhV+rQxt9tq6YIKpMNYxFtSozQ8rQupdFyMNtOSh8KTgddlbC54bsezztrvPrrC3r1/lOiRnfiIJ6+6WpRpsLnZnlmO5+/s582TVIVqDas0oLdtQFc2+Pl9dQQiATEbZQBoXPghwDhpFiKE4o3uz6j/anu72WoQDVaYQx6U3tqe5bJd+CfvYQpvZDcrp1zqyjTnOexiTkRO2zjIGD9lX93qKugKYeX6ZoXDfEUXzA3AW27pUtkHeXrsmMPZgfeKCDOQd7u+ZuzUZiGk0pxxZL3QbB2OZsmAixhHE1Ep7hGC7pyM45MXyC25kFs+hHaKxGvbC0hWAJmmhzqtk5ZRECbVmnyjpqEzWLrJ1Cft64OZDUxkRZbtpgK0DGuwAxmsIU4u5k4HQG5YZI1+pslqJjF2JtSllpfMV8wVOP+eOfY5PIwGJcfR2emfUu00w1Z9xxURGaGVCiU+uMUdHzeGaelR0aWw/Sg2GUOUVq/dEzBd2JWU4b7QUDrlQNJ9pSN9GDdFWkLhydzWjPV4b28SpBWE9O2ET+fZkLJ1lrGjNaJ+4DDM5u4WyJSnDXnkZESNrml19472Fi4r3VQHOayiTM98SYm0uFzMXUMY+UVfmTM3uZNm31X46o2iJNOaDEv6nU5aL2lVR1iynObClUb17d6Kp4xXkTOx0J7NESDJFwShenF1+8OKFVVgVQdQdsKhVPzwZOyHJaGawGyVq20fGGcjzR8moxNRjjQXJCVRmyEQQVlWNh5aI6VyFDe2uFNKK65JGrMYHxQcpRBJ8NQgw8zHaXs+FOkq6sTVfzB1vNHR116luIhQKm8G3dRj7qbbHxjUaP2Uq98gnNt85qyOj6AsU1zmKkU4iXuKInSlUhkGnZ4xQNQr58/hTvyUxqWnYvFM/OhSQRDLW1yh34mi+B9uLt3i49vjJExP9u7nZnVkREKTUb+orWiObp2NYM0Wrcn1zaHF46ITKeSAt895mXsWcHSaAPD87poZuc64u565aE7l14eL66uhgx7rT7v62qwe2fcZr6jjnqcWMrc3x1+/OjPZ/+OMn48X0+vxqfbG9s2Y1+85qm/NMteVp496Nk5vTX3z161/84l9fHm28G83eXl7+8f/6HxjoqzefcvOJIitbZkH0iujElIXC95MRe5pyrZPnSD7gq9uZk6RpwVkodTQ5O6WzKTFfR9PHyiQzEdcGdBtp9vC0nMJCxaupeUbn1OjECEjXyseT3NRB+wPzrTfV6aI8O+soD1UgqRZICl1EsXd0mJbrIMf62pPnjybjrcNHB8agUfiYX80l4qaL5szveGyP9dH+wbmvdJ3n6AL8SCc8JIcfJfyEyn6BGk+kpdhXgzZatSmMtnErUg0vyJ4TJpHX+9t3J2+3p7kuRA+IkGfqfs2tQpsajvZiqlZVnl6+3T2MRt7Zm+K21rdzP6ZIDg+mj/aneo8XJ2/kvmtvOrF3g9/ZiY6pNmDUdXZ6Ks+DvR1A93cvSIgmbC2SHd2ebB2tHbp9/w//8O8YOr9+/dY+PGx0lzv2O7KlchWHkTHdooA6Z3ji2MNf//KXSv3kiYNQGShjLmwGc9CiWAXZvUJaj548PiqLm0GIRhthuHdSD7u0XJpGqSfug5pOFe2zzz7vZqjdEaSzsxOG6rPPPv3t3/kJ0Weo1J+C0K00tgNayut+Fhp6b2f/Zu56uXwLkMdlQMu1K9l03Xi2R5IEljLiCaE9DjCCKUJVfHtEdZJ+HWIDUJJHaklU9YXBZvIvCaL5iFANf6L0amov1iLh33EUDIAmsqP4OyQJS6ryWn1nsP6lJ8veAUsh4qgcspr8RKcIaWCB8Wapi0Zr3Vukdl61FSQiTtLrmYShOjfBh05nGGozB/Awq2IzjlEBAHC5ZRozXS+W7/VoDxpTzHppQQnKvqvvhFRBQnKuHJwLqfDYS2Mg1g7jrLFQ/dcXU5OMjk25dPp6dGUFzyuTnMuXqFQ2SU9LD2Nt3bg6TKfPlZ32H+eo+XadqdQfZF2yhTnHvWJ+aEwXxRn4Sg6PUx1KHU2Ln+VSwAcDmtReqQAlV97UQDmwYVk5ni4IDyWLeZNFDqF3kRsMhZzAhuwGEKmtjdFe0Q4/I5q/rNsU40HXdpjiUzgIQ4GlCwGX1+QeA4HAbBTJkCQVXXUorvrCwVu9gX71FCJrybmgKjz1ljqOOJcTBSfKFTTqVedv2YfTLDPZwuGcv8YQROWSsFQzwsL5cpgH6wokurvsVHr6DO723l6bLuqDgyGTkduuQzXpdX128faMzblfPH+6Nd+8PL89g3Z3bced/5Mt52Wma1dbuGD23PxOGmCZdjMSN74NkRv9Fy7aO708twnsam2ugy85K+UZWY9QpWXyoqXUbkrNMRUZZ9bnSS0Ibt3fXC6ufMSPpQx0MVyJ4EGsygnb1CT2L9tgWM0luNxQfCzFFQwUS7fuzK58bJCfUqPMVKbuucYFzNHXiHF9T8RYAMVytzpLmGObGAkdjOwxWrLXKU2O2F7aym7S7OLqeHp6fXmlz/dosYsxoa4cipSi/ZZe0KNLI0SNiVKodCvtkQCBrkwNxYqz0yG7vlpCm4fCyS1SmZIIthVEfVgtcmH6MdtJWVq6STvmp0OCAM58woxwG2LpXqcCMrKzOlm7HxkeUySxEbcL4+TtXJ9oUJEOAXEL/vmtzs3FlVvMr//kT/7kX/7Lf9nH0snMUChMSFsYUQnZCKp0mXlKY8laeKovvI18GrKoayFJm4WYaDCvkYBuYlpETQKT8KpKl/tRQNVMKMmaN/KERaY+gTR1wcE0Cx+lpqSkmnLY2HLF7mTdZDq7K7afmClfjpQszRWGe+9nRS3rqf0oQJxYT/4iMn6uk/CArID3j0QV2kaCdkaLqJrTLFRLAdVKgQXV8n/Y0NhKbhq4BLzVDvDgRXCi1E2A8S0E6LIVbtVMq4JJ9z9BXLqEQJERhcN18PLpHWBCYy2WTlyISn5LNxQPHwR5cpKuEiz5JkrF4LhKwvQIOmdcVUS3tpM42NiUcvBgLG+jBd4eT4FJrcHLJ9M4C4PwXNtBjFzruXN5fXF1+vYdu8IYBt6Eh3Xy2p9YSBKWVlEmOVHZeagxx6h45eSuiTGKlFHcTTYaIF4jMuAS0GDg0QBdPCsnylKGNzTCQ0kJaZiUL8QrcaDhia0vKVIiUTgjXCqvnUs/A1QNphnSr9CKpXwaf0cFb7HIKzB4YOMEeg1ty5amv9Ro8LMqugAQgLHqyBNwGmetjcPGweNZObwvshC5cDxNEiQ6lUIUrjPHPQi5tMT0E5Y1Kwms/kBCSwdlCcUlhNkalglGes0TofBgPh1W5GesrDoyFmMJHU+Iwz2haLi+mJ2ckrS725dbzwws3FZqY/rVYm062tm0TGVSmWL29eD5HTMiX1nkS56ooFeiB2p3uML7/pW146XSSenaAagcU5s8A0OwlB8MpoWYEmAwAoFJhQP6FmIbp3y5xtUJU6QOLFZX2mQllkvsfe6SUEF2ESVW190SQdkc+GFuYDjkTqEpA4sYtZAhnDYVAWu0PEFdcgAYc818GGQYVzFXjqzF5qxcsi8nALCnLBxcUyIdEAi9Ig9K/vCy6O/U0okiFctUk3x9BlTxJyAoAiMJbDoNNCJzaGCNF6xONqNmZqr0W2b5ttgIZhe8TD2VGvHRiYu77UmkBTZP4VofABZIn0agvQy/87s/+9t/+2+7JBfC5jZKRJkPNA/CI1BUzUdkHJnVQgsHflBYI+BqQenmJ6GVFXyo/c9YLaHsOIEhoyhJwnIFn8kYtpulv11k+5hlTbd7J18yWJyrzRoLLaB6UaY/o500L4RxIOUCkosC5QRx7e9XcCgA2OFNUNMkS65SLB+d0EuHD7GZQglkiqTXggIjYOKbDq86FyvnAIBSQToG/pk/iKYut6yG1WuLXY6EkJHMSRTTPZtm48ewOmsWUEaORLBO6RIkiWaSGfVM/qV0c0uercRDXgZ6ccxp7Gr5v1XIpjKUxjW3Ousk7xbuKsZW0NoNZ8qhHCkqfCkhdajgnQFxlpBLvoVfxbRHOrkIF1IZ5lFF0Z+yR8UMds1xbRsQTRY71y+ePNVbs8HWzL5LgDptNfZ8CLvHs405Il1GAkzku0yRJkEjmKc2zwi9sZqs0aBTRlk08Z08jK0GPJQ6eXkZ1E0VShLSAgOPLDwlD55MTC4LO+CRNWAtUJQkVD9Lz6/svgX+EL6TeLZbkrRi4CDWwoeo9gdJpWkqHuLkl3sXB4Z2UlWZ8hDSSPopBJGdRJTSaeTUaeMEw6NEwMCsKF1WtFeVH8jikuQ0ZnZmW8qt8Ri9Ji38agQYAEk81Wl5SQlaMzzNKNNI2EmabJnbczGLK9+cX/A9ByuS6WRf5OgvdXz91r43RyDGo8u9jZxJsN+WUwxj1ZELXceu83xgmMmtKkCAGukCoqfJaG4gTN16pgOJPN1yLFrLGT7fH1Hqdg0MsGvzPd9Ar8oll4GZYUu5wh8lwIO9PDhsWCWv7A4hTuu5r2FWm8dId1hdh7hFCSfttHbMWy0IQMJBElrjov78eDZJ/BLiifJ2OOCQWNUOVu5KBBhbPPWyIOyQzC5r/ass4IHE6MdQkyeTPJYA7YHpglDvWRiLkmaYLPTgmPkmel++7eCSBbbIC4MBI4Z7WAQAHUJKgGm2bdEFqxKEyUe4FSNbG377d35q26etIqHnLruEfKoKBmCaMjz8Sm/Csv0Z8GWhL+1UP7Pil20BACKFq7KmR+nsGEBtGMitWkrHwgNSjjJDj1mhLhpAXTRJ8Da5VNccLc1xsaKaQsl5OMmFvx9dNSmC2nV+7YeFE8IlhwdKtnF1FAyY6iEQjEClW0lg1gAEJjWodHkB6mt3NXg3do84Zv5NiyoXtFBUh3y7bv3o7MJEVV7ThoaWga0uvwyKvNhIyxkUg16J2xzMX1AbMqw+CysXItGRf0Vf0V8S4bXmZEJ9/EtX5HzrIYt+B4EqTysAYVPUSVwaYWlxfTzAXXyXqaArxFe+Ca8BctHjLS44VlswOryBO5csw4R4BQ4Y3KlQK2iuBxxvu+FfTV9f2lAUp4fl4gWr6TnGWjebAbaMYHVBe9Tj0RPNV35SLZpkHBW2t713Xi1ZLhFxFRrlXJo3nIsjxaE1PehSK1Uxxf7ueiJZs4lYg2n601nJtMOydPIS1YUNr0odQEO4uVb3YjtK13kACCnVle7k4Xhl4TnglJBLHZdDQGiot6W/6AcjRyDSenYUPx72axWrBwOC32MTjgbwAiEBP6BtbMjuEM8BP72av6VYRQikTe/ex6bnWVfT7K06UD0aJxWfqSfnO53YW7vbf3ogL9VRF6tmRJjbVrdH88tz6R4/OZouzjZGs5Pzkx1st7NiZ5usmexbEqCk2Vmkum1eiAxqF5Zxtb/FyBJQWjdGWAWsKaElK5Srnb1DXVIF5wGs+DzdfPDF62RnV6ajU9gzcYVCYFHEd1nP6CSS82BI80TxLeVK+zC8AWBgkD3VKSQKbq0dPHPlSY6EiwUsLScw9BQlkbra7k+lWEHrcJDtwPA0WhjkDiI4a5mDh4hKUi71Sw8DiwVS5NKCjpzLAlWQmErrsjRamGFArVlyz5zqi5DY/B0kEZr++E+N/wSiXKWAEa6f1irDKycqOs7CGOMhcVqrXXk5EtcAGIkMYClCFYogi+InQn3cSlP6F//iX/w3/81/+9Of/jQcu7lxE6DRmzGDRkzVAeZQIqEqA8+C5PxNfaUPRTUrOSJtMupSYEJ6Osk0tNFvEvqifSEkY6EO7emTj3IgB8Epgo4V9Zetj94ITJXX/cw16+OZ8rqI3SLZIuJdBlgfOnpfXpKj8725QspDB2Vcdb1lL4q/PMuyhaiV6wKD4RE2PMl/zD3rFDuSLUY+NxVv1EQuIDBgB1yzO75oF/tsQCKNsFAflzDwWOxFIEe65QQIUUZOeSmOA02HIX0r+3xy/iM7unXNjKxiD1MnrKIJWrWvOcRakpbKfjmeSh7JtdthfP8Zp3Uk13JA/HqizcA2FHYLTHNOsFgyKUsaVGV5BcCfVIWmwFJUTiCnyB3Yrx0uREI86QWjrBshQ0nsYqAdYvSjZ1W2HTUm96mhs/Nzy572/GxWt9RyNFrUPUF3lwEtRGjtLoJWKpRbIDk8PMhMgru/oijSJhEDbUpSyr1pa0IHwgDwp4xlngEDk1wgTzhTLltUyq4IaRixQtK2y5FvMxhWtjvrMKpcMBck4HbNmQ4c/A3cz+ZtUy6vhlmmLaoUrfiZKOEDgLwQ2+VFp9eO6ucQ3qk6YXNGFAeZ5GK7jBVS+Fd092/HNntdq4ULiLkfZXVaEmbMzk14WG6voTP9rvXaDpI7NWRi6YNtMq234x6H2ZVu3syXnmyy2M5A5y6r+wTS1Iv4tasbsw41SMqireagLWSl48qAev0q7aAcptlSxMDIAp0g0cPfLG1Pl05Iq+/mgygiZAeEm+36bJAePTIs4HOqHwDXwElbrgObRZ7licIT2ZAtAF4xyjPMyeVh9KiYHrwWaRaha2QjLxogWzxGPoly66PqwkEQR8llV407GyiMa328Q0l1pa0Bb+ykypLvKuvSMQlZYiiRboKl4vCh2KMRLQ1Gx3oyMF1G/tRdpTUsCc0oXXZxTL/v2F6haFjHVDBXVfbaDnhDDIzMsn8nCUorAjNdDOd4mvNPMRXW6bqjHI2hSY+1IPskaQCDZMX0SWUNyrdIjLRsrDQUi7mqyqUka4OIMiZf/YPcHGL2qnaXIBVm4WSDH3OQQUmjh18IJPwm53lAetLuoniCDTOLnyBBeHLA8E0U1yNCoo29Xql1OzsUFMO1fYG0kCQ8ngLhjPg+dLLnRLRHFEReeTokGwWKlOHZMGIDWcAN75Vr5O0XnsFLflLCVuaJqte0cjQpamUa059SV5sJJzuVJ/ukmWSIJoklnOzFzAJpEIfx2BdG1UQAI8sfEaw+pWDQy95TkVuUBXWTtfL1G/AEfM91XoIr9bfKiLOKpn4gxCnF7a0JwnWKGEsVJlXMaLX1zqEreEDIAz5gJVWdXefF7oaFWn3YhOdqESNdTXaVLWuCzdxvJy2Hn/sHu7d3492bO9tR45Im0+CialukO/mxOdJm4UqD2T3cff7s2XFWwuL0omRAvZFSf0lVtRJSq5oSg+dGd5VdJYoJFUbazHGlLPmYwEr0u1JLtUFVNMUiamPwSC6EGiFOrcd7Jys/PASXa3MIUki5eJoqGL7lqvFEdlcO2shHzRbDKa+UcVWilnCwEDb+Icpr4xjy5eEEtsezqfEknhJ2+CqwFWvjyBy4gkRUor8yn+NWbwpOkqbKzrLxONc4cQgUfjN/Z88wSPtkeKBFrRZ9uL23dbd+eTK3LX37bnZkJ2l9DOB+e3N0tb5xca3fu3O9tem7uvrM2RVLYiI0sJLRtgA3a+n/Zh6p5smNhpgZIpGBRjnFadLb00UTw3xWSTOKMpq3Nffjn/wg+xqq06bD5BQ802XjcvNBwuBZ2SreRtVPr8DaDy3V3GLTPLlliAEgMp/MXSYkaanSqlH0sPGEx9fi9FPZEJUwnWRJcpmvZHHLuqTKSV1EK33JwKQw0tSieBGcUkdlVBcQMfifWrPro479YpF6a4KLT6l3CYU71IZ+kPyApSqE7zUG+KUKL13snC8eZk3IDuQ6CSOh4q6owrcMUm2Otz8uUdmnd6vJWxCyZU59RYp6KOCewN0JbAJNA/75f/y52ebp7n6+q3IT3Y7IsCTHQMfXuTBEIw15hrJdUk+zQMDUAA5E56QD7S8AYaHA0lE8aqBThc/d4qoSvaobpGZRuE7ih3XVLkRZq0Nhs4uOhq00tCmgu/PL81evX9UJ4lxArJokNFgEU4MbuZUrMt4/RMd4rFQnjzjvYBO1EqxOoNBCOnbAxmygrEqrIqmttay41PCWhqNlkVvqIw3ImKdQtRmK6KR41WDqw1g29mEVZQ2gmhwe1Kdr0jp6TNRUZfeCTbdVIzn1heC07LK3IMxHFVx+UhLKmhsYLVB0xaUcDfrwuQpcxuannJKGP3VVIAITVsn8ClcWT4LoJjPVWznme5W6gfzfxp83qULWylx1TXtVx2Kz3aDseQwz/q67MSgbAbtPYTuQvCAgvpl+6x7GffaS2bCjGjwNX9Z0t02XmCIoCaTXJNzZebp7sG8dBUOM10k8J/fYiXA0oEtqvaxkN7VcNFdUyo0XsVY1k75Tl+chvsF4CDHTCC1IKkM4B1slT7Nsp8FraQIlaSdJu4b0REIDPAhZEobWUFi0IZtrgpNXdZkVSqCEcHZUh8jL63cQCnnoxMI9hBT65WtnNJRogOFJVOo25opfRrLG4bSrmu5wu6ld/o6Gt46mf21ZBnywdzge20eeunAmJuoym+22d0eXLrU9+/p08+J8b+Nm78XRfLE1u3Wgcu9+Npod3xlVkYPRxfqd2Rr7KEgJ9ZyJurqfHgGLuYkIMzX212QhXLlXagUNXa4q7LLqu3TNHEyK6hxl3ZEzkei2gv3DPYaKPiJjYp27UMXb9WWp5gDONNpGtWRLuBLXmHFG2g6RnBG6vGJTHfnd6YnhSKvtRCiuFD3XXTXLdppIzHJAj+hwLBWaRaYW3WAV2BnFg/tVkA5sCkLbe5GuOmtqVhSqFBJe46EEgUcSkK53xKcINVFG1Emy7ELGMjlmhk7luleyGvRI6BA/tPqT23ZQRGMkSx4qw4yapFD5UFdCQqXDXOsuZWvkJVGOCiRCRiTkzedfsQE6fC5A+tHBEbAkdEuvSc1qbgYBAnE1vVI6K+f5sskeBhuGPXGu6InZ5qEujICQ5LVz4dGJhU2OQnrhJp70SfQ4F4Z0FzOn4HxBZnJcp7YN7yxc0WNM6bPHT5CEXPDwX7t4aWtTdSOYfZKhKXFyFRmrxpKfsKSYQkdILKQIrZ02RZkEAJrXHevZrsOb1hRg2elbagHvyFjC6F/HYtkQkrrPad1o9nQSU3XRZQg27bCcd67RplFEFDIkjEqtLctE1zRjpq4YS8JhHk1d/I72xmT/t93Ol8+rJsRgNr3/7BaKi3kLmv7vR1iIYtlW4YLQFyUSxdvhyQKXWyhFYOvLZ8+F6CA3T5UrwGWGcZIwCclYrqwppqOV2cz6QZXWU4xw2AASHcnVsRB13Zapc/cEgD+eBSxAwQQSscwKu/VLQHjCepdhgEd7Hl3fZ4KyemqSWzqx2reRDwq+PL28Ygzcm2LbsigfBHX/sYu8CDo5xnP0zC6vZNQIlUjuXXwhYU1m/P1SAzl4J9D+VB+Lo6dE5eoxH1k3rzW78NR+VYtywgMSvNYuX5A65ijEQ+GkmbKrViQmbYafygMpFjHY4tmvQsw4th+YV8+W4ZAtMVdDTag6uXBh9g3TEfzN9jbJCOhYkHBqDlW5oZanOW/SFarOQmUPkBJy2KpQPIDhhMTkhwx5hNQePF2KEM/wiHp3/BrBmarNZcQMVjab7O0emBosgjfrYzKZu3MmEIu0Xk6/DvHIO9we729tPH3yyFWmG9cXB65EeZ35IKdk1xY7G7O1i7cXF99c3G5c7G9M3dfuhoetnZEb162rGOgTF7clEiEtj5Tafod0Jd1ay+KZLGTrFUmy1tv1VF6tSVlEeSqpeiTDGKJ5muOgaEwx7rui7uDAZkQ7ZfR51Cm2YI4xAI+GhDnv3r6GLenKycUvnJgjU5CectTKsEhGdJxqP3C618W9R0fyRQMAU93SuqxFCLT8iD/32ZQou32yhGOSp32nS8CiULlbRFTWknvmQGFdlgESDabYu2jUCgBXQqLf6Ic/u/Zo85wSyUgLQu1Y69GxE+7bqS6e0IJccHdRc6B654EfRVmnDk0+9zxQwkfzfMlpZmqD0vNnKVITAeaQk1JHW+qClmZDFelmss0EUv7ynU5MGt/4TKjxlVI3izQmw3V0QoJdGvLJ8Rk6P/3VZ5/8yHVIv/tnf/pvsCFTylYad/cdLHGNk8F6vl23TqFlHCOtM0DNNHwY1Z0Nihw1VseZA8Na1tS0RUUKAAZFIxpb42StHdlAo9JuznNj4et3x5obhLgEjIWGtesCJI+P4EhFnTF8VKHColnFESHCwCmgAb9U7ycDhUqTZOVgCa0r55VQepNf/A9igUuFR2I7eQPUM3fRZtKdiTDLTpd6r8GjQioxnBl28UlqhGPnCvOUA1KRmEwXpgsY1gjRjcokG9Ot3ZcVRFNaTFx+JVkSlikfY9hg0BClwupSLQYifCENsCRdyErbXsFBAldgHhQzEb/JSVvJl3ED5gQGwZJvjUpNhJjay+/pn65nb2QgBI1iqAKv4BvhgFZIg1W2KbKENHpVG0GLEul1uOpLZhhW+toosyaVHPywmOk4p053Tf1Y0N++z/mqnJL3ednJNnrkYvYofOKtDqRcErgSj9CwoiT+cgOR3hhCht0XaiXkyB8TSUgYYQorOrHsimZMjl3xojM+IJERNxSwPbJuh6lktYGHJA9f+VEyuAEGL6Gioxu/J8LS71kxuTPqZ8M0EiEDNp7G32gbeAgRKyHWVKFToQ8TDuIAJktVK4kNzsoCryURi1Fi/VZJwwr6K6nqgm1LSwAoI4vSZ6N7I6jJaN2ZoZ3b8ehs69qIeWYHwN4tc3y2WJzdLGz+m+qJq9uRbi2dYx5KjzJZkcEyP7E2OZaWPgHkHA83FLOZH5pXDaXoX700XJWXDqIlqBgqNj2A0v7R2tXplkqaFLka4ID/O6+yC2Q5ZOAnrzDSwnjQ2uauheAPMLHg2VfavzhWtqTagrUfsSG1uA1V5Z+vqrJ5k+3YJ+bKVA/9BDkyhMAsCVnhsALx1n4Snl3J9pKEfqIrBLNLJy35IC/1ItbJcf7cTpNbM5bfdQyCtMI0Uk4GyBbb2XVgPUOAOUcqwYU4/EyaSxrUPsZiHpKwlya25SPXUpXqblJLlEoTRmfGoncxNDqYpfJUnPoCXyoXZ9PGa5oHdic5s55fWsWTq25JjSQUO2vmUZsdxRPaqirxU6BzVF0jKBQrIyw9MDFqQYItzvYKq6o6Q/TPvHulUjVC7MrEW30CUBVLC0CROX5dC5DL4Xa4VnVfxZM8dDSWfgLtxgZA2ULjA4CGT9rCI+NlrFFPvgWY2wKNjlhOH0VMdRWS0hulL5Rfx8fGGUCRLbPJAhiqGKY0DxlEz2Sqoqo8VRySMs+Idxo2agVwUsZXw1Jiod+fN6oZaLo2QCttYEUEccEnfTyK6Zkypi0rR5AE9IGTqPRSqlkwoPhWGtYromNrtQEAwRcHG4xxhdAz03crV80s9JDIriT+gi7ZXVkLe4+FmxsI3RF7uSV3nKkuckjtVPmujSvUri7BJEmN2cPUmOs0V8rOdnjLTNXL3GSt/BEUIkmudDKXmCtrOL2mFNU7ib847lVgjGKxPf5Mvw1dnzQl/7mcglxbe2QFeKV9lFRXS1TP+EVeKxc4O+tWmkN4l6tkQ/ySmGZvv1buxZHELx36ICwXT3Os48Avca6wDfLfWQMDMzwfpuLHiSXiYkLHFsIoOBg6rfrxx1+5+2XGKM2eZ4M/jPXj0Wl19w28UKLg8JAmjLKJxtjC9KpFSipU+NXs8ppYr413TmaT8c7Rzt7G5q7Zu8zp3U7OVN/t+v3ldHt9d+qKg5GbPu590GkjGwcpqGpB+F0NLFd6dLvRz1tJHUOOhqH4/FyKkIJXpYtbOcR3kCKwrE4b01N0qiTW4cTqHosqHHXhdTUcrxA0Ns/2eB8CG15C4soMGJzwwxYdW0bIiIVSZxKYf8DA6Djg8NpapQaEqIiQV+XikSdIPXfDoB7F4og7VDSNo6M9WcuCS9YRw2QECYPIow0DyIJEDfqFMGAIAd/E81j169rnNwbitwMKDUm+MlSQBI92Yg0lU0ub2p1cKAvdOQnFhh2lgJoGmQKOucqdKmGm8IJZPryqzl5SgdlrShBmZrhp2IcSp3PpFmDogYpYda+kiREVq1bdFQaSTKIcnuhdJJWOhTAhq3YRMlYNpJE8JIn/ydPHuwd76VNkH5hPk2/uWUWbbp+enJl8gTorZ0Wn4Qr2+iZF7lLB79wQt2P/pYnm8LlyXJqrFXdCCj/Xnii5cvXqsXSo5DrqPXC992vHMipR3ObiMrW2bKbmw7DBKmRGAaBrFEj5GVehWP2zh81umVU2uZtI7fEnpBLxZeAU1xo4FdyuRxE5RlGlUCAC0fVOMFKN5ZbQ3/4Rs0oVIFzo129D5Q0xmNgk9Ss/BpQ0sEuRcr2FRigcDIqjFKpQBG0Q+pUoR3y5FsqIUjVmCWFqDJlMrc/9pdAuNsW66uwYTOMmlhCqmMTaRF5Zs9PJT/J8/yuEokSd8oefwSysTm9QB5q9p4+QJG0GwmleOnn6BDIj2SnFyklYeutb4gtAXhwo2MzFyEVRsCUz9FZiai1K71hGgLthd69WeaUq+Mwrcl6hepgpfztRK+97kr4TMrzy4CqiElLzLWF0OfM6nZeYVVhkHv7Ogl94k5HkEBRhYpH2MJyfq3RhbHhbyqJTQUIAPNu+CqzYpTwktsoOectDcJXM60wIvM8R5FASBVeN3PiYtt10gcb69tH0fnY7mdnIk7n07fV7M5Z2iprByaaJ6eTA4ubCLf4kUHmznLlm1T15hymsbhZdSjDCbQRwYshVhVecvFcC6R0xw3OAgU+Fzm/y5UPjKtOdxdtIGjLEGotIqI4r5xRH2vYLT9SqWhGAgZ0FD+tifwO5JTaxEctUEenKYj1XS6+vA9uemPqrPmZ6CdmiKbko/NUswWOhxkWDm9/TnSaZ2s22vnSmyGLYAHfWLp8YKFQcSSCqoMCgVl3ALqzQLrtlaRf1bXhPeOC3KIGMRlVDlwBof0JQApfIKldfZC4GjuAH0MQwL2iRHTJysUdxxisAYGAg5JECxzuwQzxN/XPmBs1hMIqmMcBr8ab8kF2pzDkhuntR90qUymL5xEbScj2p0ZgiaEM4Dicn1jPk1esy03Sal11VIUoHzDW1xlVGbtYdUne+j6HfkevWgCxrUBddIHFzjQYMjbxpk0U7gcvq8V6ZJvt2XkGD6DTlb5BvPRsG2HdcJ0+Pi+ykP6l82X3CE/Pk84xhh0nL0EyEWhOqiGKGQVUSaFb+8EQuJaIyoXZLxOPli2SGzAeCvlQHcsGQWpEkp2mUmZZ01DD0N7UStm8ZtKqDBsjTX3gQojrJ8MRpfoRhrmI1HwQiR38D/Hs2trJ7gFx848EHQsC1/DXCIaHX1GdVv7qMJ70UaTMw89T8g6gwp7Lt+AtLcFPd2WIT3upcpi/N9SinCPPmGhcBpMduV/0YztqV44bUm0+m46lWKLPkV9204EflKnkQLtVbMWgpxPxLreqoDEFmsTRyafW4aTFracfHViCW3+RWUoZKdxiM0tF0hTWnNb1ii9cq1/sGKVxWMDQDi7x4O2FT65WH67pryH4Ck7KBGyyarITfK25zEgIQ2Ek6sJ/BWQ4AN+DnB+DZAIUyPPeK4A4seI+45FinoEQNuZMiKEvFZXQlHYrk1miUSawKaOslHGe0bks0N5s2wrgR8e7q9JrSzZ6/0YYLQrP6eXG7vTFfHLrEb3zFVGxs+iQaC0d4bP1yd+PaeH3mEMFirjajd+pjOrRV1wVtrlxd5KEUA2c6fHhKzd9ll0o/3ahEJbZe5hFIk4EBCZtXrrF12gFV4ylxC6/QY+Lu4BCBLlhCUgYrOSdvLrdaB5NQ13/cgaQZddCVLttSaq0D8uQSxZp2mvEeLli5TP2JzGoil+a0nDqKAKBBIPz4DD+GZLxYW8OpXZAdxQTmqiI3OJUTSO+CR6BMNS9+DnwDRBem/WpcYLNKl4mR6k7JyMkED32LkLXkUin09CpjCRJr0jYflMralUCugYskXwDIwEug7CD3FKt9aYD6KTby3C1OXDeeVlkT9T6ZBS25AwiYXzIXKs7nZ00AJNbSdkrWAaRQ6T+pC7kAUZbIpp40fgoX6A+F6EWGARu2qwC6WIFK1ckoRfdkDUC6WQWwLoZVXYQ1/SBMhCpUFVDOYGoysEubnFeuCRXN8XsC72doK1EbAjt5Awx+mSVtTcuWrdKKqv+YrlW2OcduCUn/NEo2I2L2pWxRI2la4ocoPb5MT7dGyQCrgkUmIz81amlYswLJGo+CMPNtXqIfsqC1VDFNbSFedZYrDQyNpJ8dpsQN2U9puRJXmUeA1LQoVHltagf4FXA2rXYgzIYt7ExyWhZu6fdaVRX6tUn+BoCkM9LdkyLJ0teKlOR/UqluUbExsUDedWRj3ZSZ8UrWJfxR4dlPtdC9skeIYdDHNt7P0rcRj3vGBhXAhBoFF/L01xAALUYIKWwRbi4crfJ3uCiv4OHhBPJzAS0nI6+Esj2UWt/W3CGeknDSAmt/VOdOCWtNr8miMfNwsDaYp9f2C0zyHsCW6IYnZbZrYsNbmvSQkKfJk+pheAf2UzjH79l5VUD84UyFN6SAxgyAeMC5zK4ytQGHLnOWtvEMkNI2Hs8AxM55xNp56sn1dELYWhth0u2zwjgxb7KZ/sZ4OrqxSZBMpL4uZxe0h1Y22d+zu+l67fby3g5MO71caUsqsJfIutEL+tQOTemUHpYh3oaC2iGSfKESO5Q0eVfhhFT4e2FoGIFF4bIDp+5InP6HYoriBrB+be55drhnM6QLzt81Eu2/4aKQ0fXcBo65o+6SN2Sm2DtTaw72BZD9uDwpDQ10wNk5esIJiWWJ3nCPKNkZwaA8KatowNpDX2dMU2NcxQHZMKqJJ9goHE26xKBqMOZKNaBZXQsfooa0iIPcq/GxcS4AFSgXF9M0TEsCw+eYc7tCk1QtHnA3hu7D8lc2Jfb4UciNqHwNwm0gyehubf4syG1rgkULy8FeI0rHnkybGnH6CvDV5Zt3JwRjc+tY0vC8JNZuTJrB/pjwJGY9rtlYIdHKSyLrRyBH8vEhu5EpYd8GzZ2I5iGV+r2VbYZTIckLVU4bVQ81rKg+K8wlcqx+DMpyiNoUDE8RTUGTtiSlJM38Y1PznsRifb/CMKTlR0HqP57VnzpeyUTFAOjZz/QJUaRkmR5ccT8jpGSowW/hUDbtpFXjQP5hkupORUVGk8h/QigBFQJVJv8Ym2AJBUqFIrHA6lF13HJWEE2SZ2D8T2BgwLfrtO3v8ECtWo7wrgAhEbsYyqhzz3aZF13aregmgUu8D346sMEGbC06Mb0o98PIGGiz9MiLiN5RWsqhjOlDxlJjhHzJIfZlw5uTg9Gc8kRA9pVOb2e5aQl5GqFdSdF41ktr6yr4llQA7RmoFcWFjHZ80XHLdZqEpzeSXhK0UqENqbjR4Dq2/EZdPR8oVj/XxLodRAI5JZVQch7YPOPPZHDYKG3yK0smtl1zbPAnoxJg8hHiV/zvyvYE2cTAwz8k78AOwfkucsd6tpMkOB+4VcwyqF8fwghpVHqTkrdgdUjnZdSEY50eDQM80oCpTRmarCo9SQWrQrWvA5EvA6yz7zqzW86NTiauC/T1apM39xsMDja6bufp8yf7jw9OZmeXJzTbrd3toTBFSG8A/ogJnWltzHxjGkp4rvgowe1ieHOu04VwMGKb4IdPEAP9HV70pxYUnBMIxnN4Dbrf5Do5MGrLyMCEWk2C1aeE51fMlaimQWrU5sh/dQ7wjxS5NdHEJ0k3EdeoUpASm07lCQGuEnu8d2cgSiL/VbSHpIKkZ32FDmQuvlv1P8BzSpzGVkUY0sprspvPalBBUMV6rRoUGLDNO1GweUG/LoLtr1zadakRDZh/mgto4hoPK+Kgl3Heybm7u0zsneROyJKMNBy8vcksBYujneGbVeHrm1OUjCe7Ghr8MNigiXK1QS9md8OKkzysuE0QyCMa0GgEqG0ZqMIq0LLeO7AIKwVUSLT5JhWw3HtlGiRp9Y1GF9WzW/5wwziVDJa8aBHmeLK/gfbyNQGlHpAXXyNUKIc55irsWzVpfqFeOR7i3AniLzpRFX57F5L/dGSkXutZlgfGkkuv1dzEx2Z5oXMy8C37TKnZhJEhifdgxJTc12KIosmkBUqc4oBP1lCkGssRzOSlJWcI2vN9wSAj8YqCp0oVkvPT6Cm5ygb9IurZ2DpldG4KxBRUS47020ZK5ls3iWqPRDzJH4taqXnh+MPMdH3DwHA8i25lr4uN+nLpzyEJF0p8pSLtSwKK7TBIyBULZUMIkx15sx8s9mmUjdTOlGYjMu0jvcxM62UYnmXb4OdK75QnlIMIRWF9uguQusFaSbQSEmbGf9+eQMcMq90tq77QpH6LA6Gjyix3rgNhMPlYi5LNS+xL2b0Y2HHKlArIZn27BqJAXQntWlE6iFBCIlNgkBNuCQXimECqQaAQe7d4klE4U6mKvcv8SknxIzYrgy20AFBBUnt1hjUo3iYMQOQi405MdhP5UhkUQCdvbGhrt8qohKfU7hCSolfWIENApA32gmzuxZ/XTtIFxHcbgakn3G7gjk0vbSliyER+KghmrAOo+PCYG3d7qcAs1VRRhLvleDHNiVHhqZBssCrshCuqLIMbasjnPzBEPVi9KlZkK26Wp3LZd0YD2pxWaDIt35WIUZSBSa4QE0oKfw3dS12YiFzSnR/JAcl+WVR0sKXlxKpTPSV4usjV0uxAkyKFklM8NdsDOLEZGMXFKDveZPbO1rKcae4WSZMSpDp1lCaQeldGN6y7Xl2Mj3GYAafg+vYXOBUFLXiankG1EEmI58I8lB3SNwv3DGK4aE5Ufor5nl4xKcpGW45GRV1cMOSiKByPLlAeOc59xd2nEu7uHj97qtTX1oFn+cKhERpULlnneOpcjaImV4rNpTTHJ2+ZlrOTU4gP9qdqUhVsZi0yWkVnD9qz88u3xyc+suHOx7biloJ38+URJ3SyK0fTuV2f63hilHGbqtXO8IRRevrE9hNKJ3UK89RHdlCTQqbeIjFrme3cm2ZvpH6+VS785zcZYyIE7iqzOotaQxLXjFIlqaT0I3Ej3OkobRlDbOhXIvvgM17PyinBsie+aKl+hvNx6pvool/PyU36+DYZT1WTS9s9nTrA/3FJdKQX3jC8FA0KdGTcKicNFBwiWu5o0AhwpJA3m960nFSeeq7GoxCKDgMuSKh8cMCm7+cjgSN9/NuZubPSljIFvGDZ06YqBwlLrLXk5KvY2gbWoCCvacvmPpeTABQRBNm4IgqEYiGr9AW7rVBO40AbWtBcINGcsmJC0qcQBa+GFCOF19VLlUHKuyxC6ic6KAmhAK225WLz1W0mYZWlDiwhW6KkCg21BtM1nVqolXxR6ekVDD+YKPKyWGk7lGsC0tSjFCLM0SROg2pd1w7lMFS2WI6cJctXkcDpFRIh8q82MqFwvXADN0tfpUuhANBfMG9vmq1WS6TB5w3qw4GRzU3takZdr69NdiZbO6lw3Rs0YLUnCpGn2HAolIoleQqlTrtqPFEcABx1M1jdj2kpWpOn9R49eqIZGD9ZqY3WgIYkOlFQH22TwsVl9gXVRiQ9vuhTJlM2rmjVRvZ23UZjKRhr1l99+Uq4uQhLXFbXsuSAb3aa5W6HI13F6qHemseI3K6Zq7kn68CUx6gMZmRaOsjkZ3pUdiLodUeiteTlXFDaZAxwBKr+QCZElZXIMLOaGz87ilr333hiD81IiZBT2FCFP5BoTfyydrusroAQGk4IGL8wYBom6kFinfBu1XaTWTZUierOSDf0kwItzlemHEe7H/nkgmkcF/aYmIn6m7vObG2yMZ5uTnc3ttUkPOy+j1T4nsYGwalujA8tuhLJGvvkYG930xHUfDXnfn7jsMLY5sN7fFOA67WJ5RIS4yPuI8Co8s1OX33EJIKEh0jO7BDJQXtsoKrMAIYsah6Rej1RbRMDSIRDpovbo+rCuwPs4OBIZwjxOODgUjqY2s/9vY8Aksuo+9ubGopnuQ6f8JpuUcfYZeXTJxZPTo4n0z11op+TTZE4sxj5hKEPobn8TgumNzx2dg9ev/M5wHOfvDKsZABc+VwngfKJ4VR6WtzddMvq7LEjFnr7jya7qoUtydGK+ezwaFelaAyMNCLZAM3NQhhRp+IMmDQ91Wm8B+wHH32oNikrNONP2WMfKZ4Z8bjMhXxDObu6ePXqa5L//NmRT7emQ1KTIcpPKvQiXn3zFXYxE1999QUKCItdc1fzi5vF3D6Zze10LiMwKiYd4q1pEYxcoz2KbffAqTK82S12pS60PmLpcilqlrp//FgzuFTttT87xm9rMn7y9Ojq/ORuduHKmzMb5Y1Bs1KiddxNpjqyxjAq0S4+VXXHwOi+OKtpj7G6p6B8r4Qq9NUxbFdhxoNRArHodILf6AoUW9LzVPtph5s+c+WTWxduzsE04r23s+dDVjKi5QiG/OkQGNJ7ul7sTnclx6JNd0yxYT6cNsu4M1PknFwDWhoqzCklFSJCOCUel1Dcy+aQqPBY5RQqUVk4aZ2dsKXiBq5FIiTKmSmKgaOO1U5UHlmPiYxMJn1hL+1YGjDGCVhhi40pSpJT2JoetdhYovSi/ZdleYrS3KSQZYuANwBygwEjc7Wiq2WYUiRUpzvDDQqnAPQOgmvplBqt9YIQUICaohWAhCu3SoS2MIcTk5+Vw0khWBfdzeEDdjhnFt1b1AwGrF4DU2R5tr9QxpxA6ZmD07HEZGTd2Ko6p/om0cVKFzVbVeO0Ini5ZYVQfTrKJ3MLIet3jDocpMcIK37SV9pVdqEpffn1nmbmh6SL0CKRYlWlWIQKCXGxirAwPNozhDFX2dPhxKbPAcauaJAUG7HjSJ3+FGcQRZs4tomH4IUYWnEAlN1SBUE3VQjg8Gif3ItCaVvrCMSdM7LOEqXXhYi9SXUPq98gPTKVMewqSU6rLtluoZedUsPJDTUoMOqvXAemsEPxWyIG6JbfCEYaUTlSX/LE0KMvl/QkjajGKUf0IM2rG95kzdP4Qi9C41KWFuE0May+rZNzSVjQpKnGtSz0uqvj1sySXRzGGudQlldTv5IbNrFk+vyu1ncppw/sumDQplzXGtibwVZSD0xD8rPELkE+T6P7pefS/byMWnADAFzNFv0jgE2wYi99uEqshIY+tFWRq9RRF9UjpAnos7SiuAiqZ4Mvn0nVotetW8J0jZt7OKKiJxsTnbvkThuutjVJZtx9SkkrqS+NWB91Peks33WcTA/JJPxKzKZRAdXLzN4H+sjWA9DVwQ+7NZBWedEpxWZywqDTs4jwiUi1iSGXZ5dYEfudQWh169N7jW5CofEEef75z3+u0mzf9zQ5qZVVeV2dFBnUFQLMuEtn5tsWJwmj1fMRn8yTu+mY3KqWFLQE2PKkD0mqQbP6Er15845sESpKILZd/WRwiPDcbzRx0+D94ujg4Ne//tztybAcrD9mes81p5sZshkaV0We2ZuzM5be8ExKbMSn1I2uUJoOPkeivYbhdKpN9BlkpsjCu9ZaKrS+wFT9ixXFX90P1KegWlVJtFKzmGqarq31GlomOdLdUZ6+Csio1mvUEDGJ3s6XgbGsdkUnY7VTU6ipgJjx5cy1ZBUbyposlAjhllFVLP7BDbEEokhO+5MGBrnr92f8UKVqoS7JjjeupHxAxVOjjQSoj7SZzrh4kcDKPWHVaHg6ZAlZ0Hko8PLH9Bdb1voimSXTcsmj3PdfBQtcRn/bP1TM+7TvUa4KVWmR1LUy4Gm0Att5bSJXAcsqb3qaAADqIaLSQDFXBtpWTTPaJlX1EaPYwUJltBmHPUgIh8J5iiCDBggYBhM/DjdotD0kFdIZeQIYsuZr+iO5pT4gI7zN1IZFWsE46eKS3EOK2OfFycz19R5zZZqD6pTKlUJSlRQBz9objcC8qUmxmrQdX04U+iP/UVdV6WYkskPkfg9CmwLcxcGDWu2ZR0JuoLw9cum0qlkI7J4KlZ+qEc9qtxDEtQX1lFCUkOaAV8ih4oTgnT8hHZ6My3lttwpY1qZA2IbIzhoMVBwChGiOMjBWFg6y8/JsFJ7Cy3gvqfpWRsbtmWp1+dvd7dadD/6pVIogNzX4AHwuBWWnouGlshnMFaYypRHcxkAm8DBzaveus8igX1Y4UFRl4zzd5dYD/Q/aVixgUWoN5UNBmubmqkBRxZ4ANJ2KhxJgVa7W7gWlFjIdMySNfihuSRo8nFpotieoFuGRl+m3VW2S66bc+Pvd2TkY6p7TMRIOQ1crzAjy2rxVFpbswr7q9W/enWzMzJddnhquSRK2q4VVcwfJPjE/voslylYISPCzssgYQmDUizWj2pHhFa84VgRHHWO0CR/XsK7Yi6VsJ4NFaJORAkJlKGb5lj9VU/eTeRZzzIekJ0HotF0tyzzVYmaArKayFJd+PjPM8pnmNa9RkzCKPN6ObWFLzUba4mB97PLqHFWZ1ptsb/hMvDgTP3UlYLEwhrzKshR+1CqR0HI5XKywTTAOdMG9cvxDCAwd6NkYqg5ZR9op8iAc8NC4SptFcSVVShUa0I/ngM0qadykEip9UVMaWVMFMdgqbO3sk/7brgmX67eDl9ImtsszPHlKIJMOR+pNQLo5+jAqjVSk5ceT8CVA/8ZQxYKq2YpOpoAVK4FJkBhSEpMcRoQogZ6yK67FLyTZr5wAUw1GVw3ZwEUC4PctZ0DF8z/jQkO5ARtgfk4wfxpJOfnjeKNqAFR6Fc6k01hDlBAAXpdg5WkkQqpc+iQkU/LOAhMkoVvTdxTcyDyVEp+pwM7FT4RCSykHm5tXNBWDlbSuEsHqBy2ZM2QKUkEkgmeJvVHUk2ypFiSlQ5FiltbLukZy0+pMKUwyqRO9LDuzJdautE/GDE8MyWgDG5hYhzJMrEXStlmJBbzPWeb1jdxNQCXpp9IddI3JJdPrMbj5eEEOM2KO1tikeeUUoTi2JDf0P3ANIIAnLKrOuyaKKmQPLaqjxPJwsA44OuQ7rwI7JLGZhHSsO5oa2goIIoR19Qnnmn56LC0jMv4thzzvkvMgTNpgrlxoPc2YETMxOt2amD7Rrq1SmUFJQ8b8davuroizYpNTI8ppiEvt4TB1pDRZkVG9hMUANLy/n19dWydAVegVcrf2/Plzvf0QmfmA8Ets7HYmfCIWrRIy1o+oRBwJTJONTsCVSERkJD1QJYqqXfKkyxLMKydEdvaRGTvweOWk8qTWJz54vZ8xdAXmKY+zyzPjbzcWbp4cX5sZvpmfvTtWng8+fAmD4QiKNJIY5FH2FhkbvTs9A6jf7zp2WwHQT7RWJOQX5RxJ0EaYlIjEKFszzBZyPMZAbinTdJQQFdA6y/Xo6RNRsKHB59/FGMeQ/9gHvE6fp6pi1f9WsyhsBV12KEXOcgPgWrgSqJFZrzJO1mnTXlS6GTkjRN0QU6NmRBXPCCNcH7nfdoep0R3xyVYXS/KxDzJVx5b3TM9eaUBMWIzf+Gp+iVTCZc4TTzSrK1smAfeWq+pnjA4yTUJSENZsb87wd6WkCsq1lPI298ArijblR5tVRnzzNBmoLUOiJRRwNZmkskHjEgD4Qg5fWk1j4zELqf0R1xK1cNLoCRMyaA+J1VTglQB0MqjZj0ZRGEHFwduvMPBwhSw/YiOi2kxhC56FSov+gjRtMUij4ZIKXGY5WqvmqUUlpOgisELSIowqTD6on9rpV3sMAAdeiw95ISqNUEjpab8r+7HSOEFWCAs+AO1C2AM3vAZjRQ3PKmMK2AyJZ6XXIFgO+goVSLHF5iXyrnIqhNCV+Qlc+8MUJauBrCdJ8VevS750VJUSqwyrU9TklzpSqnqLLoKi9Wz6c6ZiYC6bpPH06eBJbqnxQYHsKsqQujmGEsQ+9PdrirDiABrzoubSD8zEW3LLdG2W7m0JoRozAZOV6Mw+a2C+ImfgRW6ZK45cZinIjVC+e+Fu7KwRpAT9F2Ywt1vWzJlAVoTQW0TJkSB9RB/g9ST9EszmDorlu3wUyn2+oLPswXmN6Cp1Ycwzx1zSdPnbdb1I0q9dcMWKbqoqE/LQpciDeOhVRx7jsMEfJPGnq4AfeS0agls4w8Sjf8LfVjnNkc3ukTdOQVKuMomcJC8Wg4UwGVhOYLtEBU9OaNhHMdmbbsz01l3XdMkUsAyxWOnM4Bx1bA1Aw7hze1s0p7nTdGAtDoxv88VGc4a3V2axzL6eXWyiMHsFmaxc22M1qz69rpHFwMhStfnSROrngRvIU4JSnGnd+SsXDbByytuocCOtoBvssgo6UU5WqIHmW/Td4vL+ak6Z4idzC5MoxOAmMZcGGsIglXUjs2GRSawp11XZIcm6PiPpeky3Q0wn+5Od7RwRuJ1NtnLfOeDgbozqNwtLO+6Vf/z0KR3qI5lMUToBZcaYt4JPPSmlgqejo0W5ITD3BWc8GiGq7hcAjVB8iMe67DBIKmhNNnrCSYw9lMurNp9Wn7adhZO6GjOyAq3isn/al1Ki11oeXohwBiyz/q5btOJ9vZzSSJXVABxynDRLD1sMwv3d6eWloSKGIA1YRpz6kvN01ywthb1lrpQxbj2kIoZDNvY2hz1TtArk4Tq8AHOxqjGTEHWnGsGiIYtYk3SA8MKrWC4SUtsUzZsYcArBruoHZxVpczfHN7MzsICTJY9nh3RmTcRACowh59uuY4O9nMgOqWfUX792opilEoaMDWi57+FbApeQ8w8ubUAWRS1UUZI0buXVWXum76J2sac0t7SGychKVm2Pa4eKj2jAJi1COn+QJADDh+z4CkBqv3Gi2jM8hcDDVfKiuHrBalZNC5QpZoJvCh8iEcKF1SvN2Ug63LNTdZKISjnqQ0zlWOoxBGwa2kRDavOeFEpKpTxJqtwm4srX300W0ZjqlgHdK028lrllIQ4k17n3a9PDPzjpBeZVBrq/6dURochxpY6N4YG21A3/UrMoLBCVsT2xrJWTHGkPPnGyv6s23NxMHGme9EOzhCgyDbiUEv9y4cQr/Kgto5Ub2UStX+RG2jJ+N3U4JFSv6FmWyKvAonAZ4kdgF5yn/RpqENZ8xUOeNHCDNZJgi0J+j7PDwTSLecSWzcov9xBJh1QBQ6q8OMxaVs9KqEShMjyv/WwwdBIh/DBaYGQ8fHPhZueRVXZXsJqUurmd6SBkH4Cd7eonZzBzJNxI1+qOQ8F2FsIEd2ZZs4Zk2f/avrqL+43ZTdEfuaQxo6wB0S+oVKfqo/ZbpK6JN42CCR5pA0qRppZiVl83MF1qHg6AjISSlwyrayosxckc4Hs2Nmdioig393OsJqjVCwJ0d3YmudwPu+zAw94WdYbKUWIXTZ0Zqee4iKNMRmC5LYKj73xvUF6QoEMtM1f0gB0I8JgZ2HTu1n0YdcWtVGlmRTyayRi/KCQRTRkxV8FTXx/WA+OPeSknC4URJYv4U4+Z8eMUG5P1CwWyAorJIxwYs9cIo71K/MTyZGNmmBy9p2vnQ54X1s0us0cKYbZb6Hjs7dgYkn3Cfb+dpeGLq3OVBYNlIEbC2pUtFOZGFeEP/+iPfv/3f7Y73Tk/fhfMdViz66VozGzk2CmI+hxJSCIoNdmA1JK49/JZIal2Hs/2LIFWda3uIBSIN80TNOhvKm8nQaTiczzp4/mvA7U6TlD9V+nCQKjC/07gmVwzAPIVJG9sXWm70raxK5UGDDUJe3CsXKAfSJvgfg2YrXzRadXVCUGrNC0KS8hKLwfgq3iFaW/IWDkhXmOgq5CgB7BBvARCpzEEQY9OCpGiAVaLqZWlxhfyAHmKGNe5rd7y2oEVuQRof0M2wHdChqj2iMWNYl5000OnMGKFDBjaMzwbGNVhvg6O4vPWlIJCJSg2Q02nL5HGUbVeE64RBbMKUIGKBPuptUNW2+Wb8JhE8pe1K84mMHqm3FChq7oZyPmuR+UGhkKEvLvJmcmnL0qUUzKxIYAGIbp9f3Y2cimxgQaBXl+fH+7fXudy0qvLfKf10UFOKzvWSsoVzaAE/yyyRFHYC8DsVVaelqZdML93vYcECWFoghU8aXsOTXMv8YaKeig6Q7OSyMUzXKqEFZ8hpkD+oZqE8Cd5tZF0A4lTFTyQaoG/alBUCVqSiuJwptiiQaZNJpzhMbyJLMtafcY6BnMlScpV2iSXJGiqousVtY0nYNH1Oiiuq7i9uD5/tLWTDTgTOWnpl1am8yFim3PtV7enR2m1RkUdXZvEi0BReTX3mK0V1q9u7Y/bugzPsSUjAF04nzr+8qvPz09PqZhH+3u4mkkqO2+X5Svi6tFkF8GJS6svmEhp1FBcxDjLPNloA6b5nwK3IxDVFLwVuAmAdZ+ZMJze2Ny2GZMiMxk4rhVaGXGpEBxEM31WTlrTiNiW6ekyigOJcIrFPiH277hjPp8sZZXvbFAZPTl69OTx4fjwKDX1vVZJV+K8hDig+gib3LSYhOcsEcpJIFOVrQjAADNO9i6igbHUOZO7tOYApDXjkLhMa8+1UJwOWvfjZ0dGbnKBNjsRShWXtGcRUVpkc05oKbUsTDYe3O1Z5O1bVq1WFZWmILbv7Wnf3rhk4250O9wiFIeZeoNnxyfHr17l21oGbbex4qoIZ5SIORnfpS8rQLi6i4mtSfWSlwSG78VDxeQR0uXtQK/dmgRKqOAG4tmGWSaZ7bdpE45ioDsSq0VUrSNAEmlxd2yjic8m+MBmIS850V76Y74raKEcOPwaKJAA0R2ep5opByBUF939FNKugiMTFAUVGM/KSUpPVQsmEdUUVwCo6OT9jJSvDJJUHQhNiWcRQW+lUO/JCIKaEdYfTBT0JCvsXSJO7hpMpkxFaxlpHFzqo9rPQwL+//o77cOnJPC3E64vxkEu3KBYCApThJUTVYpuWTrBQywPJ+EKdvmL7gyQ6wo0/FGDwWHXDWNkKdBFnFKkf6DBRGyUOAUJo+IanQASSVjT5Oo4VNPcpHa+SfVt1wQ9jB38MMMAuRRy9iQ2HL2XOY1ykINBU/EEH3gzBpVCbZiHgc0EIZIODx4BHo8vNBWTE2fnp3V//JptWXXoyN1u+YAQ191ACaFqcxUCGrUub5GR3T0V6FnZhTbwnk1n6Fi9NiQMTbNXrv2e4c/39fQSbaC+A68CJVEXQxTMLa+QhUHlErgiuzH0a5PRfhi8thOSzCrECvRo7N7AzZvRzHKEwyKb20zS3eX8dJJTjndznwu+m9ln7TKjTBVZAbl1ucDCTKpCMmXXUYfm0dILhhkr2oM9svv6q2/+8q/+3JFUnxd59ignB7KNtE6DLgnDxaSLoSYBKazSFCcIZsLLwZlbtXM3RxqkXDwFinwPE56ATxKMoYUYYk8KeuEjM5czylTC2y0r0FGjhQHCTIcK18QMJhgh+8cpKopfPvsHeyCzB7ZGPFnr00OotTH5ll7O10NcelNimUqJuSoaUAcGZhiUuf1y0cyM2BgrV8Vq3rErdZMIUo+Pj8/6mzv3+ZhIfzv7cH//kx9+fLC7xwgxtbJQeLbs4vwSTC1FhRsI0CHzrFWe6xyx08OTa/ip1aunlNNs5ywngsJci2cL+nTT7Sa5EEFTo9uMqnGRAgSp+aEfKz9+8QJncNLsP35ZpNIpNGJjHcONutYdQjWSVYeqAw1KzwS1XR3dJmALwnLgOa/NT89+VUylAOLVHn1DNnpAiHAwkYTVqKsv5A2Wkn+cb2who8C8ipK250pDkA6qatYDQh9nUlsJQCf7aPYSPehSpszDiFLNytAwTX2Hd8G6MPwY7BpODs8oCUyobksQgolajfXKCpSKubW3Gg0+cpv+RspaTVsXMs1GZ0brSueRFGYciE2+Jhh9JzZl4so05cOMZpbr3LuOT8gI30YuabQ8Kr0jcsXQdVtC9cvsj0ksCmj8qrBuMbKSYwlLknNBVdmFM8VfepOM4r5wfICkJ2q94qSiNZjX9qQpVPcE62odObtZ0xJWycWaVlb/esbykFEoEFu8MBeIChM4s+tsfDLcgDkLCfYR6aTHwGfSIQohdJAqgxhEadKmz80Mb+h8QelTNznLFfuxpQOqt9tmNplmfSc3pHWJdPd4ws9yyiNfTmCEOseY4u8tTcpEHhtSOdOSlxW9hs/WiD0zjy9pzmA5/JzxkHD+SF3kxVHKDSf5OavE8tEWbK84Pj6FiceWX/oiG+Xzhe7Z/v7Ljz766NNPP42SuskXlcKKVaeh6UQPdeMIFPyRnrqLGgxqo/LWcq2G1Q5giimQJoIEB9Rsm0NTYeCRBN6hEIENQ4gR3KmUQmC1hZJex6rSZzeDsZzub/yAs26hmAY0NTsEZ4swJWCiqKlt8khFaDBE6t5XsV2hvHIh0nFYwzesvb+e7B/ZZjEbzfen093tyd3Zzdn18e7G1uGzqd0Tt+s4NjLetNdP2eYUeu413Ux9bU2Mq9SizZxmz/f2DxQEq9AgdwV89+bt+emJwI8+eGnbBSL1lEuphxIbXlxEQBhwTPG3J2MW1Cuu+sags3LoRLP6xTfiAy3P3e31m29e7+9mgSSDlMqOPx2sLFlFuGBGmBoR+uvPvjQioW3Duo37jz/8AzjViLKrUR7fROFkimZiEJWAxJVZEmgN78KF9qxgWlGUBlZnHTFGNbsgUAVPAtfWKXrOxJTsiNYvf/lLzUiIxMru+3E//vGPf/e3f0eh6Rxzrtm8l7XgLd9N/uzLr0D2DOrlxRkW/eiTT372e79bBZ8yV7Jm3qy4ogpXjXROz8/QzBgjEw0KHklWYc6zSpb5SyefxibxsEpLsh47u47xNkmyPt66mM9s9jPNqJdgD9vVrcuFp0hypfnzZy9vF1870kTNOPPkPgCKwmL186OjX/zlXzx7+nh+kx2PTMPVeTzmWi+uYlQIiVcWsIVBJaoizNHAVE2YUO1IEcL8eq2GmVYsYaguSwOVV8NBkufQmIkchseKdUuL/o15TBWHY+YN0Q+hKWuozLjm4Ne12oowlI43RVAOdk5oezw7S+9DINKFd/YdK5uO9YywrF4blUBqyGiTp1PpKuWCynRGdGgwNkfLKgtNOLaLouUGkvilRZT/OBJhjjhHuBJatMk08OXHT6aN3NDTTIVZTQOO0vchG6+xGBNwXfJKBMdS/xYh0i5d4/fCM/i9dirP9qzAl78CH8I3zBDYiBCcMq5QpcBVzAbu5A3T2QkRtXLMVlSuqIx5qyehgGLVRDiDhzEGZr1TI9nkjDd4VYThgNgwoWsq1RNX8hB+clChh0d4BeRRNKYu+FMFba0lzNUkILOw0qmSPPHLjjMTSQIGPDzJb+UKTSIb4UOwlf/u0aPDqjSTGSNHDjUeHXwIAJhn6JuclEjR2jW2RgiMp3NbFSpFLjlKlznaLXdY65AyZ/kySztI4OxchHiVXIsihHbN8bQBy1n/kqWop3JQ0XFQ+RHAFpNGaVGxImDZVB4iBwxPcqmq7By9ggm+kvDU7KosldXyYcdSOLzh6431zTnnqUa2t91PDu1lXkzXt7MZzvE5naK9LJtkjtD8gr0wY81o3U0DC9tC1XmW3BmATRuro0pK0aNZkZWuipARDH/6KNWaBErSUcRAoFPAWUXf2c6ckktUDd0YY7vyqveWAkYD3jPKelhdLoFDefmbLQO8HAWqdOtS2RZixH1xYSDRbE/eaUqZrKH0AOe1HAxkxH+5xNdj2oLP+GoUfWJkY5JsPMlo42oxd+lBSppzhEmYVEVwoyV1ysJcgXGqyVM4R8BlWOAoWYq6EBJZgekt0rtmRFBS+AKDHLRQzfB4wtP+FgMAEkEQfmSSkBVy8aIeUdahnRQDdniAJUf2gDx5/FhDmMqCXnRz8c1VFETucBg/2ju8/+B+e2vHaa1PXnzoSx5PfL3XGE0Pdz6bZLf7vb5CzSdHXAfWIbVYpX4jmd2+stL1wIHvN57mVT8hES6w2JOaUjy1JrC28uofx1Y1c1RRJ+/X5rsQSCI51X5LOrOPJpQ0iQ2RfsUq796J1OlLJQVvgxUd0UlKBcVSIL4ndkV9SXPtWE19+EpTZgtSkUqEGFhKqGjeKFZ5ND2eyUxc6XWeVKqePJheFCmZ6qwFUorRublhRa85V/NTE0leu7Bc7gSZXqrLsnSS5Rzk5VACyertN/wCGFxHN4UCvXouSV3xrcMbpsqQRF6XSJBV2am8Tih8IEBIh3dUP4dMOwrHZAVwmVEfxct9AdF3YVdTkkYazDgjxPSDMZoD7k75aaDZB5itvRWvBnO5D8ActeGw2rPxN7Z8Xq1saqHiLUBlHy2ymqR82ZqpTUWC/UcUQxUKEJNN1FhQcyYJSq75TeTAoXAS1PddLOKGYy6+7miXsDP8XLbFy1JPUGtlsVqUiSIHD+JDRrlG68l1eCSvnGIyV5ez7KCjELvIEjakz+BCq5PrCRxmGWu3vr9KkeRwb1mRtAX7nawO5e7OfMutzTM8XqFKnaCHpFdNCVF6fwCLnkxYQg6fV4BFWqKC6YF7wKqADDH6CTidObP7uY72vTPXI/sEs2MNhTvMT3bWKjTLqZPtpFV16lyxu3nrQ+OZr9eNRr9pja17d4WsH+xu50NEIdszKrLmVCBUxZb07e3hmsNChukE4ycTNbnFx5xKWbjeyWMWEVuSJpIvRTqO5V8WU3FSdNmVrBZg2566yN+QqGaCMKo99qvxhL0lhhL70x/nGCHOxT3IJ3aEpDETCOVZZpPqCA0w6FpZt2OuYFQigS0AzWFPr1x7wMMD4RC7AlA5YQgAIejHN/44+9FrxCwWNz2ceBNriN147I0x1dGzExHy1cAdJSpOr94Mj563Xvid7TP363vTPTrNa5vwQ/dS0GpuFDE/Iuu7+721KfkaE1J9jNFkbXK0tucbsK6Zmr/67As9E+3IVU9Gt/YuOkCyvZOzH8jD0rbB/DL3yskaSasSBQbjUqzi/5K3JScCu0Se/HB2LEiKRSM1gccxsbs7O6ZMp9PHOVYVyZRdhE0TINbuLPRnD7AQZGibMOXjAiTPhH/jxUEe0TwtSbC035MTy0HxHc8Q2yR2bEOKor5kwAOz3bG2xxrf5rSV2byMQNP3hjutN//Tnci+41iaRGEW5wlhkAivoCDTBgGvWCM+YDXsCELUm2aLL13m1uL0DpcLs6jVKov4yiG5QCaLLqffLmdCyoFvB1u7Idxr8+o7AMIRPQRWFnnrhIktpz7UEwycAOE4yY9jDSOhJPwCPaXL/zgqkidZZONy7ZJYvle0VCEgVFBpeJf6NWnAxaM7H6UR16+BD7b3ra4zEv6dyi309WCVOstMoxZhQqo7kld1qU+jGRX9sIlalv8BZ4Rz4IuAQlv+fqUuSXxN6uZeAJMh2o9uWg64PPKtxyMWRREAK0UkvshoXqlcmEVxYMLh8jekEOVyDAyGahXJunu7DS9fr3B6lbabKFslpB34wtePFF82/mBrkoTQOnLhSe5VWfzJqehsP2AIw+QyDNA1wBJvQbYqF/4gx9QvJZmtyZlDvl5s2XGQDeisl69MOE1kGJUlHGezqghmYwnN2IpQPr5pUmluXslVSzLWWtShp3seMESnkE6Uijxil7sk6EdENmFCuqeP/naIf/fuZLI72TvYme7lJF/nmLpLS88uA/NRXJc7vWFNVE9pyYxGvHxKCz9tJK0q0KvIOpDhVRl1u21aKgiT3DHLs2skGZQtRHgaR7mmEBgyqhUwTykaQXIkiUdwf+skVVDYHiYUmzaTHSihFsx2vuWbAxXy0lrgV7Gdiz4CRgWyGquxCzDKGkgDJFPOLN+aBbBdhwQ8c1Nd0UM5SW4SFQbwWi2jxeBR1t6U8WBvPxcU1bqDxee786vry5vd3fvJ7p5pwVJiNSizXnB8cXt6tZhdO5KG+beX1788n1Eu072d0+PXujG2M6BfjCGE1YC29MZd1ctaCjyBVnSUIDn0lGTyIKBDMKo9beoAPITk14cwHIdcwQFLqDYVRAFTVSXOaqWrL4VMFtkO6qlmeFCoS1iZEqGqGalZKmlA0/uDDGVcFRxdfdUgQZRq6KdcOw5lPEOR2l9ZBi0bPbkfTWkrTSXfn1e/9pKlB4QCOGg1yzUILAksUZZLFamJIWBpqqVhQ2BlK8owg1iWTk7rlW/BAYzpzaCNyyJU5tY5K/LrdYQlkFKUpuP/X+qUVBKlVk88xbrQJFygPm9YEd0VsWvkCQ+leRPIhTaty9VJ9EkmRBGcMubXCUF9Yd02/WOX5qRUGhy2aVGZfatefHheMpp2G3KYh3AwoWmvDHNlkrkISx0+DGFJ3adTUyfZF5Q5BxpOjsk0S9MoIlKSh/LM28YMINxrXIV3LIgKp4BQr2ZT3BI4DSaXvrGhlkeUBS05gmUeV0V7M3Gf8kCWjIaBV/xFfJWlXtKG3adA5SFM8ZAGkc8OrjFURleWshDV8EVfHl47RFlg4ecJ78t5BaNQngI0Bk9tko4WRQ9qWuo0Gqoaqiw7CZYJsdLNmRqSytZn26u4YGttrwWt+9B8tjAKlbDY37WjcKliQYhBEkmBpIgEtfxrwtDWxAOAo/kspAPb0091RPNoSdk04V47daA/5jrBvanl9NOrM13xbA2Y7GZofb9+cXE2WrfkaY5+06DiSnO0eJk1D19IM4GRFXzCoHL0gYhrTBi7YWdeKWjEUFTYxyYicqAT3z7/9We6y3tmoB4fPnm2oKPt5cQ0SfATr/hrQimV3sQ/9HRJVzGJx9jtEauQu48jk9WaMD+NvrTnUraKPyQ5Ml2dP7VBJsuYpWFiqTGkthByZVNSZ2vPxxa9tnYUk9QaZ+Y6ploMLpZHTuXrCaeBtVsteFQ3ArLqXJMQvKX0M7WQW9Fq1/vRUWytOzJoatdfyfPp06chQ+MtRaEs0gvR63pSl+szVxCSLvShQVTLBYzpl6+tZ6u6DkZWsNyMDPpmumFQ5cMw99PR2uF484kV1XtLPRgThRhhn22uWdJaGBstplt3m3vTq9HNwbYDAFvn2S2+dXC4j3P1oRnJKmWJpVKg0zPFj07NK7RNfHhYwtkAYNqJbee1PYqpKblPisBoQ/bfv3z50nXvs1Euhh+SFWYSq5ucsYrtVKdnx/zZmHAbuwUYHtnhfOwT7P18SIeQDvwONR1OAngaHoAsIxPlYBPu6Y3SRnRigyt/KEp7qM8SttExEMQZPf9YLM2ICaOjo+bSTpJFFR+tyTpatViom5McpAj9UNazfAXfNMCCMPt3gCl5AkNoKORSISvnDclBVAg9ZeO94zuwYxPVdVmdBazsutSkh+Sho1DJBQYAjSeTAFG7eQUQGlbZhaDiG451bKMN/SvdClhg2BVDtMwig6TcBklIE4LBopZMAU/gnenX1NOcS7mWeuXPkKvcgF9yAU0ej8pTx2KbnmBfFUosUGyFeKmSWawwL05z8Ae4Sh1hgLNaaXoq+vwVnkcwlpNq5f1uyEMwcShH0shH3et74dFfhacxJGrVuhpRx8odQLc/IR0IQCDBYKtgA5PVrDqBz2NJQBSYzkKRqZKbuoORPlIiV/KIhUo4RcyB1MwyLTWbmXVBjPmjJuPhsyox0id3jqd5zi8XzwZOXPkb0lP4EMgDNI0silhDthfAMGmeXekb69SCfvK280SbE+2KWnW7weX11cLWa/YIuaYFzQDqnCifDge2mRVLp6N2+K2oQphMUdXNp0kVKISTe1NFp3zxxReQHFwefP36690vvzJP++L5y2cvnu/tZe9Gz5E4vqq65Fppscym6ZSXG/gDIaeWK4c8+KPKs1Mqd+tZe1nGQmM8mJ6u0V7oxHCQSs3AlZTl1FQ6GfZ66y1Vy0K8BSOotqY7+3uPBnPFWKe6V7IJW1OCABg+/vhjRDJXqv52fsvawSAn2/IBiurJXhiYaVJg6YG7m0bqWDuoVB1UXUyBTeokzcIxBHWV3MV7FnABVukVLFvVa/6QDdb+Nxejve3x3qYuwPru2ubhaHowt2EshxWySpDOu8t7Jvv2gIzXz7b3ZvgxHZ+vLT58/sH1yKbycze2A7q4ZLmQFD7y4GqGP7oD1T9QClqlBcDTABLZXEPyCEx41R2yOwTdHYJRZv9OT8+tPnz44Yd//Md/bEMTIfnyi8/oTN1NWYGVC4T8xcHb169f/+rXv0ozqg2ZzY3MylQzXOqUzkCy5pSn7uHgbw90cUmR8vSTp520SBcIVTvhmQ8i635js8jMba6srO5tdN1StQJZSkaqNIXPfwiXiCrWa0LKXLVEJdkSvn/TepNfWR3ATQ8PLe3ZimwJWqhBVnZJ859zq/Llt2GaKn4heJpKLR2a3ZrILkPOM4A1pCfgKK+VlRSCJJXvidSmUCpC03kJ5PHKieUXS7qWmFNWshKVJWeB6Qq1xUVb+seShMj0vtmr2n+s+geEoKEtF+sCUoZse2cEoew9Q2QNuTw5sRziV46OSgMRKBYD6i/jFa/UH4IVhGikHahxgEKz9Vg3MHGp76SpAgaRv/Rk/FHF1JOJbFNxKWbTWnMA2Oaj2sqCqdA2W2QDpMhYUlfE5iEKMPZ1SAh64KRq8egCqlODAy3KXA38mpDtYRzdtz/tewszT3h1l00fABTW+BGS9KFvU3a4O1PdE5LPL6Rp63WLKvcqBDWrHkwT1fCQS0KTDCV66OHn5Gm+zcK6nWXOhLvl1frT9u7Y+I+kZLuZXYIutXC70s3cHTwbU6tNWHpjOsiuaegx3o3kqo43g2323lRWlqlqraJZZ3Ukeyqro1Ak8qO5mK++MiWTHDfX5t9QnOuTk1yJ9OUXXzmN+8mPfkxbUfSMeoYKKnLoxZaWbLYMBe8KAqYWWH2v1L2KyIigew/XlypCfUFG3kOw4XZN24JkqifGSiTNeKSulEz7csFzhDOumH13cWUHnFvFY6f1X819EndzFzZVIjCcDXzEhpOKcfLEBEVemE0s5rAdtuZVVQdSjLL0F15czxjAqnQE8AuHsrFBNRRTOYw6wchR2/AEnNjSGJqfNpRxbrWsTKi6cclHFNa3pnfrO4uNnft1R9Luzi5ydvh+w0pqjSNtnPVNmSgmX3Ow5nPlcv2JjYzr7qcK8WsG1tcnF+eP9g9APXRy5ywVMSpW3hASXtAG0dwZZ/C3JHSR+YUruFdObKqmXvX5nD949vzlP/yH//CnP/0pAfin//Sf+hDX2Wm22oIsdbVkBSSSE0LtXXLzcdWvjhkixuBRVcaniA0pK09n3+k9OTH+JwP1LRf1ZclPRExE9E96oeWENVhS6Xha/jXl6ONqLiONEo3NUSCUQUhQMk4owgNtskhybCnllYk+2svkof/ZWiN1do6yMqlXvaXamOSV3HamZCdjSlKSyTXYmn0yXKqq5Js8Ah/iCxRRco09jqcCpeVRuG/rkY6smCTl3gPUYF9VCdQMPRs42RUDcQmwYP9ThnK9u0zDM+bQ5g2HkRHOlKYqyWPYu5UXxkoFPNQKKOaFqWFB/ROYmJgy1l2WpklClb6+zzeE/TVmTZKVJBbOJhIq7OZCUn5DiSfKiYsiVRFSTf7nL3aKAxNfpciDEEskA7UmXCpOPgUFEo0sR+cwJPquB7zlZzPX6GcDPIUEmx1duqJajnFzNhCFWRKLyijhgWuSPJW5kmdQnrZPovxlq7DjDH3SM/OVFskUE6l0pSIYYPHb12TtRDg8lJDFNB78oYLrPKOC5MNHDkgcHuY06IvnT1fX+zo9k0nRuiSOcCl+KinZ257mPVQv+eNH3cDMdQlSsJVriRUuQHRBaDnK5CsPJgB91WRvdHtuvcoNt1PfV5mUyvbNe3d4X86pPEvy+7ub1zbB2d02X/g+cy11bxjhmHdRdMgVqukJE2rprvOVqXJhiGc1w6VI4BJqLXilOsLhCGqWPwzkrm8uz2YuDncpONv/05/+3vOndrE9Nn9qlkfXmzBtu4cXT/SMU2BMrJpJ+4O1hqo2bjo2/uiRj2R01WrHs4sMszJDrg9OFWeuP90aNYU2lEwm23h0t8FKq+Tws12YXx0hxYF2fnJmpBSyWYv1kZuex+MjY2bvtaMYUVDqZ2UUSFnwk4Du54QN6SOqw6rCFCPCGeUWyc+xH1HISA2V3PZTHZfmC0WWGEGy8WXJqMSws6pX+WMd5IhSZLRMwk9gInWM5LxurLmYjW+M4G82bhwFMRjIKfpYrFjArISY5WT3aeeZ9a/D8Xx2eTk/N+VrxsXezVk1rrREIh/B53LkwGw4ahPoPgJykm4Kc6WG0gRCRQlD+0EmWTgTXqdE1dZ4hDx99uKf/JN/ot5N3evw/f2///f/9H/81zBIxUWzt1roVKM126me1I4q49euU0xR3ZkezGRgrLaOzMJkiMkNH0/RLcqAt1WJBlXd7VRAOezPsoTQ1Ea1HBmiF3FgqjmlGuuVZnCnPjvtzjF8LHl2iJwkGWSwv2yLtpIOb3SXIW7WeqMtBaSLQxSUJ3a11F5UlfYktPINmBabQb+/yIY1ZSxjR9MpyLlR1BZRpd2cnEC50YV9a9nHBTiHvaPEjfSiLNKNjaOpUysKkYQq0l+mW5BDGsoJF6sFW/Qg5mYrJCBvMlUybQlaKgwrIsXacN1jjTwhIgHoOep9+NaUCW7DBHpZAQAwYBQiLNlUxFpE6ETE3MguF4Xl6EmmuS3kQq7jG1XjV1CNmsOnmlvP7NDmujtbzEvRrL7NZ/Lb8RrqPpozs6Q5Jow88KjWcTN5fH52xk8G9E/3D49St3f3KEy5wv7l2TIcVBYJ1Q+GU4LNE8ehLHPoIukeab3aARAh1pkIje9sWUMjPtUaU3+uOLia+ciBYy7kKiY2JocKojW0FU7HN1vf1AkNm4uX0JzDRZBu5yYxX1Zds9expnwxNrLE1MbapldkgoxIqUyrUmln7gR1geHE9QU7jy735vNLLEkxUszMWlviudmyJSVkUplpM6IYqrkGff/0hSMrViduj9+8ffP1V9YRPvzg5YsXz588e/zJj374s5/97pPnzxwle/3qa2My8/WzKzeHzhxdSl3bvUlbMR1jGmc8u12cnp/74EUKbgtEMVPp0hdRnwRXwRejOTVuCFV7jjFH8nStwiIbba/3JtOr06uT+fnnxxsfbu49fvnxbc5UTWwONNy53R9fX8xc4eoDV/cnl0ePD29sqtiY7m9PR9fr1hA0iAv2VwENIDamX7w7Gz96QhRdyETUEab4mOacn5v7wzh0qTStLCJh/il6f7q9a5LHLoBv3rwe7277vpXjNcp7deESw3X9gbOT83dvTvemE+b8t37yox//+EfvTo6VxYnPy+vFXsqCLUbf0DG9vXsuig/yd6/fshb5Em1tGDl6dEDMxGk+GAcWQ7Lktpbbb4mEJpJ709XljTnDdVThA9GgULMHxd0T463Tswu1ub+7h5VkiYIipXtHuwpknU1FRNDZJ/3qtXVbSBT2+uqa0Y2ZvB8dX/iSWVoltDQYxRHJXG2YIqkon+g9qEGiMMtxQKpZO7L25StoWEpjuDufCpq7/dY4IlfIG1fd+z7WxHnt8EMRs2f17irdse21DSPTs9t5DqnZqmefzOXV1fHFi8XOk8nho4UFXiNM18PcYo1+nfulrnRKCPX6+uF4/XRzcfho/MvL1ww5LeDIg0Wiq3dvXftovw0W5MbIi4voExPCVsfKPX38GA2mDrWFjL81otyJxYzlpKknZYY/FIVWJgVpEaRKTIgopu3H7gn5P/+j/3bPt+qOjuwWsVa4ayvO3i5g42BThUSC+mzJn1+6tPL8/OwYB059q6zuEXXSDmNxzFDvk08+SUcgbI3See+gowW88wyhDWPrSIeQMfouAKVU0iNFfrR8kqye0b9EIaPZDGhjrmkRfRUftyM6xlvAoy3oTFeYmHo1p1rzkDDIEV2Vb3AWjfCrzfgrPE27jFaB1gxv2cx8UUOHACSNR1YIrtWbDOT0GvpkUJVMsaXEbN2I2ksUPmRXcvXSvZBY6i94vu2kKrSYGT0OgEMczvYr+mOsOGq22CI8w52q13TUiLoNuG509q2coMkYCEOtEvAT7kqclRWvjV9RvfJn/s3abA3M7WvFM/MaDzvg0tIUJCyfS0FhGUgZRZvHHgwEL9nY+ENn+ar8WFsupX/vwLx/SU/iBq6ySWLiwj+5d7GNLIrDFaE6VDexzvCuyhoRVzqFysxDTEb61HgotlCtaroybXIQz9HoZCfIm+LKJfPfaAlfl1RKglf+wQwy3Z/SBabC3Fv49NERBUcH6cppN5cXmXrSuWRkhKBBRkJIsCdByrEk35PK1B+h0Ojv3YLz0YvnDo3+8Z/8XTvi9g8o0003+7hpZuPJ6GZ/8ebdue9QTjfHGqdlLR2Ra1siDOpCULpW/sgbCdJXQFuxLsKW3KmHlKMFnlLGHCYkLSqsSEdmtOOc0A69uXk5m79xB2uWcRZrt+Z8MvU3t43CkWpaRJ/+0pHbjYu77HWcOvjgs4S6ght3zvLkW2jac5HCDPgWJvaenJyqiC57sqt2jcJ2w2vFLOsx/TbOprMtp9MIMANBOLPqfDs/dVH6mzdvPvv807/4i78g/767aPD64YuPGGPljUwyzKkg3I7YUADqeJVdFEtxo5szmS4xQ3b6EqVn0jrUjqvyHKW6vcoeE3iK7qgZzaGWYrJrPS69h8x+uv1B8sgMkNBgHjGbWm1AzLkxvUl9n4N99iLnoFnhmqKMLZGdmiTpJFty9zBHjFN1xljVVXJWqtalEN91Ckq+1R8xIWlmLjcXBpGe3DxkIKyqPUbQsGq6MzHLF6ORr8dfRQbvp9aolGx/svtk49GTrUeTk4VvBvvAjvN11syoUpKVTqVrFbT39JjvdbCIKxluHqSVUMsKmQU5/M9n5PQITJBmULUc8aS/klqIYkM/elRVGoVX9VK/UKeKFF/gKjwrnbv7e7ojuneHj598/fU3WEc0mPw/+IM/+Oyzz2Sn6UGlXBq85GqS/Khjl5KcnxzP69S5PkeGi64STkO26lmu8xv8/Roqvu0AyFUYsuTktekTskxbP17FLp8B/w0OAPHMM8O7DG2CqvXPCvkSZ6XmbyxYydNRxBktA3ZyO/iDDp5ITnR0dJy+YdZmNlw/3WRC+TCLIe1Dz5AvbMIHeB4hsKV5ttmuZNFCPVJZrUh1mmWrUdzCoxQgiebZmVsvIfguHmjh8VTx6rVz97ycZ08a/nqITbi9XxlvLWWoKAwNaheNxmSZsirXpCazmrEMnnLg/Aa6nvzQpmbK3vdrE1Dg33qkIMpbCh0GTnTqtlyHdKAAHlmLLRLS1xHStDVJzb1OBR5AOyk6UTNEEigGPCF1JYo8nfXwhEFgMFOJuguBrOa0u0tZHJTFMrVQmC9cIXpxucvWFnoJpQNldiQbalxwmk6QaRcjyPH4ow8+/ODZs9//2e+6WSO6RRfVYdJ5Nm3bXejDmA4ChX4T8Tt2xRqpzN3nZ83ANQS2h+cCSCLJVIRmIkprhEXMV9S0xpGJDTSoligOPC6+ZUCDmCBG39X87mymNedrujv7B+ub745fffr6ze7BvgFJxmf5mFb2A8jNcOdsfin59V1uMs2NDlub84wm1wyRRtfuwB0fHB3Sx84HkSqEKL+kcpdds7Q9nu1BcAuz2K5WtUNiBWLC0PWmpxBL0Z+e+YTFuW6+uYOqi0hao+pn5wKV2JYj/gxQTNCsiJGFbXARsvBPTzj9CbEaTVSe+6WoXjcMVa8uUOUaM8JihHwKYDzRuqLb9bZqVjOZZqeK6nCnTBp4tQPkoTFEyoJ6JRBesdEzoxOcKeaIhdlAHx4cAGkABRh+WTejPCU0+yytKGkB091FkoAMI8Cgh2mVG96jxPyAWYF8edO2XrQt1gyFbR22OGxOfKoLO3Wflux8bDvTp9ZOwhGnx9WCmzhGN9+8Ob2cmJpmFErjphfOZf8k2thgk2rnZxc+mpbOe2Q13RqO+U+hFHDZpsPMLg6+NktlJVAxpUq2tamkefVv/+3/9N/9P/8fruT40Y9+pIPoxJXdNz/44Sfugko3vA7UKjLBmLkwdJ5vhaua6pbBo5VnBwdPzdDX8lNn2RRUZikIVEN4Ry2fResAjMR2kgzFgIRLSNFdfKFRBNSWIMX2ormav6JzMq+gJ5D2x+KrWfXXqLzAwy9lIlbK9Fse2QJpiSyxDLfKSTU4AUWU5knFd58RmtIOq6dXYAl94DC9Rl+pDLFem6rGBlBg58ITmFKrQ748dI9wyRoYedIKIZGwaW8Kz9M1zcOJJcHg05EvDmif2NJog6fsTdcRTSbrXFC7KiOBkSq7AU0oZphp7jCy0RiANYNQMrjOxVPK4P+2ewgGBlCHBGpVNbwCE6l6V+aqQ5JkhRatyGhK3geWvDUGJUUqhwnhXrEXfDRUfSGC5uLHaDCFKY8G/n52jdMzMGoitEevRcIwTS6uZutvF9UGYlGQa71qh4a1jV5TUjI1Jq06kzf5VRl7h4eP9w5ePn1iIEUzvv76lX1mB0f55paq1Bv2FWV4THb5Qp6Jd7TpO0y2p2Zbrs+ijzJCg3k1PKiVlnUTtTRJgMvhQJkrJpDK0LExchBm/ekKjJkzF6wfbWy9ePbyJ4+e/dbjF/ujrc/GO199+bkBk2VjGo7cZyuBz/eN8xXmxweH0m9jrfkrXZrx5q4FgJ3x/PpscbE+3Z/++Ld/5+TsdLy/69oiyK0zyUgSrqlSyR3iGZ6UJLe29YpqkBiLD7kaqVqNfI1W3717A0BUdNPsamd3YnTVGDwbm2c7YAJLb1teMpU9VhZME6TbCSHLgqCIX+1fUHGQamRspUmtFZrlr1QAkrbaCL8q4lfXYroITSp6a3ZRg60RiFANdz0nHAJsMIWGGr5nrHxz21vSdDvk1GMjzY6/dwPOr97rky5MyVIKCyFXxdloE4h+yEVhneIxV2I3fRam7kL0LeM3b99ZorS4tn27fnNxO3t9dXB/c/V498nkaHownq1ntjAzojtTk+QuF6dfLOpc5MzV/IuTL29fTFcKlYyZy8JI94DNySICTJlG7N3y6v662giqCmidVkEoqSpCXeno5bN1ekJUumfTr754zk8vPv315//X/9v/3YKC4ZRJ0T/6oz9ktP6P//V/Da1ZJcVniDGwpcXYUaWghJNZy5sQfjWlyrjVLNNK1zRRovErpJUT2M4bTvN3rfMkpBzsAtOsV3alwRrD95/AMtYvp0rVTKRJaBZ34iQZnu1phJ6NLZ6W1Fpm89r4JAOQ1xVk0MWl84pO4bJZRQZSXJUsiPk7gwT6qxd+7iHCUFqV30mauWCWJqW5UWQ0KaKKRemQSNipPFWVKYimqqtEbalsT2BiecgroeEH5ugUVHDy1/YTFimc0xwHnLq54vylaMx/tAfNlv3N0g4uRX3olJRYOHtTDIEexmAuBxD+dgJ4AHi2nLRfQdjejsLTFLJclHIxUhxIrrHBM/Cww4FTCly0Ubn0cdPJCqM6SoeP5TKSLjzpLQ6FkHzwN5F5hlvRkpLAVcxKFeA2PtfciFk4wylnvOxCzlcnbm4yvW5OTI76E5dXB67DdhWp1Ti7BNh+K09Td+1t59W9cOyYPqMdeaabLs9NwJ9fXZyBN/lMBVzZJFZT4SFD1Wfx1zGpnNPPpKI5/u18OlYxo58yaaX61LUCUtmpBOXK4LCnshzsNXK4dp8hCsYXJyeHewdQnM1mf/Xpp+Ob+4t3JwtWSidle1w1TufTyDRqbg1AE7Ggi2WWYUS1Ax9ucXfIZG9/8uzgkT149uawDXe3b07e4JLcoz2rusPP6n5lqmrlmpkN1mFdm51EWj0M5ur4+C1IjMV2M676V5QRf4u9cPWUZ1WiVB1F+FW35R09FlGMRA1DVXqWHhpYRlBRf2D41bW3UJ5RcihHtqw5r7Lw2nrQq9waALyhUoxNBmwhI2myoOk9k/OxLhnIRTXLolCl59bIUaupqm03MUubL31kuJC27FWS7m3KnK0kGBGScpZs7OUBIAQFTDg/l/GRmwWtdVmZU/WjhU+BnJ/Pjl+d3Z9ev9zaP7i6/9Dh4BoDZ+naktTO+q2BcQbOqMjFFZSIC1tvN27PL88mVictoSmT7kskHiesqF3SLWYX3Aa5eGy8ZsZZXjmbUVSk3fGnIZRd7wpSZK7qKsVvJypk5xnUGIVjk53pV9+8gvLJs6emB/93//B///yD5+B//s1f1nX1d/oW+dJNvrDl28vTX/7Nz53NstxbOnJ5kTGytXqVG3PVFKR6ynWuXr/vROn6fydcICewnyhGaMPAx98OxW57iX0hcyw7YdJYNJuSjMZBWGpmMKkGnIPn+/lKaqUjjV57xr7qkCAFu0moEM9OhSpaW6OFramKHqthRgM0Aa0akmNXRRUqScsVTKqEg6Q9noEvx1MsfP8Q3Cyg4xIKcUF2WpWEzKIiuMULVzeKD8pToyLS/CA7F1+/5tE1jgEz12ecUVbTaEzaVW7Re3idDJ1erCUraAEoPxo42strch2cQDVS5ioQ5WDg2g+wRWQZV8zpoieqXBMzYP42ZHIU0hnyAOYaZqCiUXW+/RSC2BJZbGg1sewWDEgGyAG5qKa8o2RdpY3hHGpJ7lldLqr4Sz8aedMvd3t7h2bXZeeqs5ubR5KYo9Bs3WOdbsCc0lzLThKE1f4gHvgZBp8bz4ejfGbILZd3I28m44ycLiyjX/pw4n13fkNO9o5pq7FYfFlqKNXWDHn4VAQqg8ZUnxSoV0nUOIP6J3/vvxqdXznec/nGjpOLcUT8fnqw6yt725bcs8EnvRefH7T2cnM1N1+YTWsQRWcxr7Qi9Xl9Mp8dPnuy9fzxu9OTx3cf6aQbY0GILWjD/KFeiq+saKhHpCcHIBoNNoOelXloe0NQuVZ5Zf6jyAy9Dg72DEH4ua4jBetcuuz8opQaYsk3xzvU38lp1hc54QQoLcFU9IpZyGAajUAXTqOWE8Jl5bxyEdZpuxe4OQaWVgYbanl8OSMgwaial4FywZDzi1PAvribWSOjkKwIT28yf5UpU2mRZ6XLcou8vHIu+gPpVVmKT1A3XRF+hAlHT1s1hlkDE5KiIYLxvL/zGS7q0vhLOOek/9Qm9rWFz4NMNH+7LjZu56MzFC/suNy+Pnix54RCCnuJpti5fPQvc8UjtyOauRDFkGV2iyyuLcgfMqyPEHXdLwtZ2ZtUM4oIVl5P+fKE6avJ/HCnXMGkgGjmr/aSdtR+IqqbIi8hzgj/o3/0j7xK56PPiEv3s8bQ+uO2VAg3LraBUCrTkkZ77CrDl0aWvafZuEnjVedNEwyr8kfm/LUNaJpCmv/F54j4wPIKqVLUZGuVrMLyCBjrxNNoKyL2aqnZlvmCCPp6A4J3EoaTOFIKJqyvNrPEWXg8QtvKLxctPrLbYmFqbNP10nHK0oDJdzlmh7LksRAk+gEBK5S/4bdL2lnzqxutCBOFeG08SG0wz0axDK/XRBVwx4pqKPSUP0hwEdnFy5RPLilFnZIDdn55mbQ98Nq9RIAlE7JydLBXtd9WLVlD5Zme8mphLDj9L/kr7dpvxaCG1h0tJyEXalduGd8RD57kpP9CeQGHzzWMk1GkCPoUbumaPQ8QhE6QiORRkFWGyyIIj7OXD6vZaiORcsFrjqZzTOsmZFmqWOWzlNLEVzkUC1UwBVB7LP54NY7LLhxhymFoSdfcumPS0N++jZy+9n2g3en23cGeZjzd2Xa79ttX37w5+ZpZmBxuHu7u7PlemM12W47j6iha3FISM3ZbJ2/fvbs8nV1c3m7cmY15eybg5MJF2lP3Pjx68eIlm2dKhFLz5XpKVq8ibIndC50I9kR+OsOjLMmYLfEqMpVeAsMa7Tw5emML3tWlSf6Dl4/dnnR1fnm9vnazvWFrtrsYQtHI3Mv9xWL09ux87c3Fvc3ud6OsHbguwWWvZ/fX4/W9l89dbLXz9MnG/s7B4R6C2RJ7ID//m1QeSlCFurCz+JktCeVrhld4jSGyqvjetoklljrvihllbhNNfXiLDLYNg5ZrJEQhHCingHByLRIABLdF0UsQmBWr2pVAE+tfwBx1v7ZB8WHh5emJwcrUh4K7gRXOxoYMqhAY7YpVcri8OiejnZGFfbsYbMtRTsIoUKaWTuCvGkFGToZ0dorQlGs2clBwIXrNGABAdmXSMtmFVYW/GRhqQArHGeA4AHmw2bxTX56Eiglw25NbSAiGGjfqenRh9zpx3bZDaze6fXZnXWnz/MZKoM64A0PTxdb9Hltjy4SbMEy73I4cUrg+v3X9u16v4WbOU6HF5ksST7iyBphr5jPQM4NIlDnNYVkjNfREOUWFdJauK0LRuAfVFevbSXjAK2DDTHZ3zudXLz748B//439skEw5/Os//VOxe7uu4zC7ndqH2NZT3Tn7RfTAnj97YRPeN998bSVYB9GCXQqnRkxoh3PfczIuapqAtJ+mEiBz03Q0DD/iuMqVGa8m9wBhwv2vnyxZxVGiVadUjt6dKiIipIBzMNsnLUpSk1fxQxZC1F+T5Pk+KgWuG4xKVrJ8qFg9+ZWblWmb5aBEqmDJto4lHxvJUC6vv9FJ1LXSkF7beUVSdOg4nyPBgYeo+KukeeClVxDwM8P0cTEs3T1u210q1Uj5BwKkqsiUmt8ThgZQDu3HIn489bl3m2GFvPoqV/1bbvFEFMKM53BC6uw0wQoqfzUZmKEVjCtxK++ylHIJr77dyB++ItLrkBa84niiM8lWtdMAA5hUnYtqaJgubAcqoEYiHJKH4V6FQ45yfG5uU3/gs/LzQCwbfzpi+Vs6gUVQHqFwWXySFioBmTMRznWmng8owbMVovq1ICJjN8QubLk9O93JsaG1g53J0b5t4b6UsWcpTJsm3DvjLR/SuNAzzGlLE15ucD/LeOX8JF308aWZBvc9GIedvjumJnJm6mDfDCPReHx4ROFfb/UifEbSfZMrJqtlm8ZoOUQilc71Lfb/y//rv7u7mn/w6Oknz17uH+3a6jbKFrYt1+HZIj87Pd3aySmGm/PJtY/HXxxfnM3sPnORoIkh8mFO8jJfxrzbejtauz59cnf120e/R57hh/zk+JgHDQ9ZHW5yzdriMAaGtqzG3U/sBSjXqVQfS0wpewpWivQOVsJMVhvYU6CcZMd1DtCCZ13IsOkym9vYGMNXSyvCY66oHeTpY40yt2azwMbGmZ0Qui3zi3OLJY8OdyEZqG0/hHiOe6T4bnSOdpv+ESgWGS4yd9cH4SpFvTm5ymhN18iPZof5LtgyViGK4DFKb2ZZUoejJ2NGxdS1ENxVkAyZfdZ4mo3seJTiV3fEOE5485kZF4sqdKYrVVZZCACi7vSFCVCFujg7t0/BTggflbi78LXojLZnk9Hd9q57bdcdErFMPd88+2amqzLasBnkzoWpZhdtr3kzOzm7ON5htoyrDNZMuFAMaKtP0WcV01ycJczprsueDepskVHAmChFQLCOoYaZDsqqg1hcpc8QGZ220gB4IgQ29N/P86EfVWzA5OScEP2JTz/9Gx3G3b2pi6ky+VkZPXn02KUhgLHlz//dvzUB8GRxb5Q/f/OOeaMD0tFcmFx3xm55m2RwMeyRubINCBXVTggnpKQ0ItXhoamcWH5F8kwJSy4FOi2juVK9QxIJI5n2YeZwMXNvtBq8urKpq5pTBsyFQZWRwgtHmJAO9Ao5f5PBADR52IsAjmqj5wSCBEY7V0aUbMwAySAoISp5x0WMckJTgrBbeDecJA/3l8UkPc0fMDB47bEt6QcJT5PE0xR6cl46xBN+c8pNWDJqllYxkN0hXfeiKjiPDimaQx4nI/D2YCuLhgfganF3cny2ufWanInFAWwhBAdHjyYbPouU/gcw21wj17bOO/kITjYyQmBpcDBpIdV+5NKv7elnU0V4RHEQ0qKS8IvCZIpY1kG6no5c02kvb/oQpkOrvA2Pk4oAUHaBrw6yJ/qFc9IWWmWhA80FRLUhA2p1J4pgaLq9NQtO5UjAumvxLpAFIDe9ElFd5aiPnEWVaeZeq9hgVETOmmiP6ccwdLUigxVanbvLHQ6z3ZkqmTm6ls717pNHb1598c1XX9p49pMffmKfxccvX+zbhHd4YDOcZaEIVqYPnMF0oM6i//jm6sJ47TrbBLO2XBy/Y7bcir07nuhVvv7qawQePHmEe3obl2fnhkOWxlK6TR9jPdQXcnuNaptMH2nqNA4u0Z7qTKHYbitV17b2PTmwP2dGdrfWL5hILBotNh4dXI0WBh2bTw/PX7157Vpbs13TycXt4mx9sb8LyaavCaLZ8NCC2/mnn/7wd3/bLJe9kOmw18Yt3CvKw3xMIMNhZlkBTE5ITX3x24IKhgB4ahTmuNQmhQWelIIT3tXqF5gQZekQftUSebrNFUdyVI8UNI6+ffvuq6+/NhkIkrmafvwRZbWbD0KRROYqNQszo3h2dmGvPPpHtzdOj+ESJkbSsqQVJztHVvf3X9kcSg9ubE3x01jYhUrdlqsCkZXjXHYXwpQFy7u7J0+ekXfJDcAzmN7JhhqZIh8PUK34iiwvRjD8KVV2cXoGLT9I7cny6zBAAUlfK0j2a1QLUnatUxZYofh29kKIUarexcVGw+emZ88viKUDYn/91a+eba2/3LvbevJs9xnGZ+3q5vhm9/bg8htfJnSsxXlnU7P3B0/2r3715dns9LcObE8l2DbezUzlGsebhfMRLJMfaodjFMf5kF5mTs15qiAF0b3Y0aUuPaYMeBh/NeTveLpBpR5r9k8Bjc5/8dkXSvGDH/zAq+uXdDgmW2N9tjSu+ppXCju7Uk2yU8b/4g//CGNfffW17Z3nZ5dff/Pq/mDt+cGh2swJWVl2rigonq506Oo1bC529xP5DS+8kyjRANMhnkol0IXQ0WHlSsOURWD3aQeixrpkt32WfMuAx9y7ezeWqxqGJwdbY+in13ZeRTRMBDETXSxNMTSZU1F5Dyml3LU+U6Gxcks7Vz2ZKmYjlGIZtMrif+a3Mlg+IAwxVWoNYohaElxNOshFvY98kLZCh9J1BAmOrEQfZUdcCUFsrV20jWqgbUBEXCBR2f1kFX32zdK5GqhOSjoqHCQqpWdaByTtaRo8+3WJuX5IsMCUoZpT/b4XmCavhHnJioHIxgmg0TbOLhp/h4ek4huYBuhXfnLM79lJqgRLvSm2kQ+Yh1RdTLIAM8aDFELIVAUY8KjBLkpmnA5bXIcPCNsjkBOb3F0Gv7WRrX4u7pxuP3504HDwoTnYZOPynjGlr/cdrO5iYH1n+z4+dXK+9/rsrSQ68VbFdQscgjHuNQaZn104O79j/eTWTnIxo6+/+uJyd+/Hn/zw+Yun7vwzxf/2bb7ypXNak1c0jJLQzjoEVGSKUMuz99eWx3w42Dd2s/hIDRiPpkQ3aqPaor6ew8MXFte200x0U5miO+sUltpHDuAujk/e2Ac4vd9jY6grGt+ltDY9Ui52NLeB78pqbkM+eMK+4vDwbKbh+VBrHdXVVwl7U/6yi/YQGz8nL2l1oKl7aNTB+ibjOppfZzMOPA2T/uXI7OYdsP3TC8YxVxRuT5yVrvO5tcWugMF3nZLEGINorSDhNBnSAS3j6qsyFUZOsA4VOirhz7vj1yVLRGhhIovyNTZneyjnqI00GfFGUC4IweH0mMFgb65KKeFvdoVTJWyBrnCviOlYnmZaBL5HWrlWQb+BSWU4xOZAfEaqu0ZHN3ebs+2DzdH2jc9y6lpkji9X7eYQC7G0QGJmZ2Z3++3MoOFat8RUlONW2r9TWhkf5AT9quORngeaFQXdmIB4PVmMCW0ZRIUxEblqO4ApvCpDHgFeOa9gFEQpnj9/+vjuiZhf/OIXJ+/eqiYlsZPC+FYgHefpzGg7ufvIgoHX7/ze77/4+MOnz1/+T3/2p4zW2B0tthV5yly2qRyNPyO77FkIReUPASE2pPrDWUQ0NYJ5wvtid5PbrxK1PLEQOUVn/qQ3HECvW7bEGLyFO5kMTkKB4UzRkyxLHPCjYby+p6e0myTWSDzV5bJ9FBcGnDwhnvXKgDkuoJpzI6/fAXnjH16lTEhR9BBhJV0+hLcPWo2gX1OKZlGJQ0bUwJYKfwkvlSQ9rByQB0mJb+NsJJ6rXOQgBYEcDEMKxE6B13jVnR4oJKYf9d26sJ2WnwOmARlUrBCGMK6zUI+p1ip1k9dkLOu6Vi5Bch1rnB6FUbJPR8s3cl3dheAc5o4riZBO1ZQMmTbyjvVs1/lq9pJalS2Lldk4aQGEB5nZjZJo3jxMFYCITfN5yW0/SRd57zGBpN1UA15RChI00ha9y4ejS3qc6s67DimL8NEHP/A18Q+ePZ2Yk7nR/mOElJ2OcPof92wHNk+4dr93+9HL+89v3h4fWyJgnLMetnAnzvr52+Oj3f2jyfRSKr3XG4dofVjv7nB/18UNbn84W79AhuufKFOfPDRGacL0tZORQVUUR6oYsxXEjvMcw7Gs4c6qmhIQKInaiYXb9AHHtUsF0QrMS6V37S53Xxc0AaIDmqUyz5iyGs3Pc1Y6R1M707SslYWoVpv2JApDPDmpODAcTwfyd0VX8PtHxYZdXKULY4dUPF5FMR4W5o0FdNhSTy4WUHk1Yu6Ey7wrd+KBYKMrQwTXVVAzjvfuTjMjx0nYTjEpEmrxm9fvdOgtTcrt+sbO240f/fgTrI4EyKREGnsp8OxYMFpxuUhN+Bcb6nvTtVVghfgB/jqOo+BpSqVKxUWEqozCusieyy6pj0LXuStGVNYSNisabL2NS+E0aWmZ8nZhp7mb923juLRHxg0lei8qn9o388qq5Xg21vQZ07v1i6sctrtxnaNvwVTnz0RCxN3XajxKr+oPIIa9ntSVE5owFmEp1nUNpggFuSznqt4BoFN1COdf2v8VWmlfPntuT23NhZ/q9bio/ipbD+926gMIWTB2ZABT9LFsYr++/X//f/6/bq/4gz/4Oy9fvvjJ7/wOu/bq1Tdav/kCN2IsR1fNIPkN7iFZJcpL2ahGEj+AZuiQpJF861n9eXBdGMmIZ2GuziEdwAalx+kX4aYETdQv8cHTWRT8MveO6yyEx/JVFwm/4nQ/6LIYowzgaCvIMJLEaY7ggSdu2UjEx8EWhA9y6BBPYYkaYAq+H8FWrslrnAIehuObcEE8PZpp4AGmkQ94vIZ+NK+OEAqRlugI5OGEtOtUDeypuofw9kBVIpjbifCd2rJkyWbDHr25cgMeHoSljWVa5n2pAYpKoLKs/CCb1I7tcgHzWpoNbCT4AVODfJVnELbrEMBdNIFCOl1HdS6eDx0wWsxTKm6Ab5wPs+X/FhEN8eDZYxT1M6BKCTJvE22E892eEdOJfF/e55x+8sknjx8dGmyZBsP6dd/Ouzo3V2ESyZfor03aUz13pujunx8ZGB1/NRm/PbaDjXjTEYxDLkZ/6uPIJvQvLp88fWqBwl00Tz/4wATRZ5/9+u3r13LOKC0nGa7Ozk4eP35Kj7HVKqFYYc7NiNMS1XLR1AhAFaeCdIxIUZpAy3xYivj0Y8Bk9kz2GoJRXjaf2DCWb2xbMHMHG33lRnHzTrUJuy0WDuSbS+Wgwm+8UssPuJgsOCCewpvp/EjtwCHEayV83xIbPgBLHi8RC+GCF85VLbJhejDQlpiIraxrmteU6du3x1qPj8Tb+cLW7EyyIqCKGzE8csdYgZYSTTK724jhv5q5XSLiyqhkm0HplXSIfJB8Xdkz6+NZSpVGiTlRL0WDDmK60UUp+tOsqsQl/EWzHBmDdmIx09Orp7lKSHZ2sXyvFt423HUkqkWutZRGDDl59ySNNr3rHU1GNxbM12Zz56rou/t5jkvRbmu3bqAy2LS8yiRkSUD36Gw+OzOBbD1VJ1gx0qPWOJXTCkiqkcQojk0N6LGbHL34JS85FrX4EBK8oQ3BRc9SIUiY7mmprIR3DykKPUyGxM5mk9Ckmq0ySfDFl5/ZXmSXoFgVZDCqeiTnSB3mOaVuhvQ//fznX795TVR92u7xk3TFzr5+pSeynArpBMOzCfKEVCAPh7+eZFUIv1J11PdTDSHA2nUIf3tCK1OS0uUgjI6e7l46NjmDvxLQAq2cl7kLkLDz5YctPOQKq/BlZvWzbE5Ze4oruADyd9qloFfsCpvf3+A6uYhOC9UA1GibS2LRoJbENjFmPL8P2UlQ/jCKvwhZPsAMxRmiGj5trzIqUJK3NDzZjZCyBm3mUbLJwm66XKYgJDRXbQazkq9Y1cgboAlrOZODwHYd3v5UXEnCEEXY5bp8lVGVPS05d+os1QSK/aUbEVUZWEiaafxKCp5kV4mWtdPhoiCExuaBrDS0JsilnzJtNaGwIbBpiO83OOCtAjQ7/YYlqztfCWHO+gfhM0uaGbbQW5s/0jeHL+o/BLhYgMUfHT46fPb0cfrvsyvzUMLNvt0773K3BdSJojZX1o1ty6IvD8abR9Ptr7TXuxt3EmiUpp/c+31zeeF79AcW4Z0PpYDsEzg9Ro+NaudnNpZbHKWZswVAu37z5huzkYxKcckqUbYwmNAn2UrFFGFE1t+QHp6RCp2UHN1J4VdOvMXtzBjRgrUxOP0YJ0HSSSQV93Zn2Gdv68eWa/dsbZnu2HwYDqxErjEVDatKD3/CpY7CDR6vyG4wzw4RyPF3SD8bvgG6YtrfapE0GvFk0ANaeWpE0qaChYoIUUjawIoAep9A1WWG5l+Xp03lX39pnpKgAdpGki5yKVb86JBN0xMpDz0Pq9NoJgktcfk4y41BmCfCrq/3dUfS6ViYH440po4KD79cwFRGkY0VtYHCG1FdcKjIfCfkt8ATnMVtoO1AqhEG0SE4CbmwSwkMEK+utyxiXq0vjqlMg0Szyxt3F+uXJyZVDI6MwW0Nmirsu6ub46ub0f7e3Xh7bjATutNxKkG5ywJlVa4cEeOcrhJkQ281GexPprlePCpDWXiA8bcDJoTz2k243rIg7TUlsi3GucPrG4Zwf2/HyqKF280PP7h0zyeHJ5kCoZ3kzRjPP/7oBy6Csq3z6zdvffydKd/e2TWdaf/Fzu5+aG3WeLYw9XMIHMJ5AlwTLGgawhvDkHZILjxUBOu32kxEMKeBqDMMClC6gWVQdAkyqV7sa9bIpWu0PVB5be7EX9LKk7ySy/uy4LIkPpshqreKho+rroEoLlFV/DyrXFG333YN8O2wvCVlpUUPN/iJf8W8f+DrkByRS7cKDFxF969Yb55em8nY1YGNZIgaAHg47AJZbFhm1+FoI28xWuLKWKi5ZmADDLS1p1sd8ZNROxgESpLXDFpT9qatAYZXkEJE6ig1fOPkVx32H4EcOYlZwh3/CpXYAFSpeRphheVTTcLbL5xrnO0RJVOuUXkK6SdP+xs+L6mmvEnbCD1zT4TJlbRJBojFigOgpVEi5dLwBlTOAj853Lc9Y+b8jRUoOws2xtczGxP28gU99iD3Wc9y4tT4Tzfser43Hj8+PDh8t2sXoL49VWEKz8ZpX8r7P/3D/8Pv/+5P//W/+7NfvfrS1Z+/+ur1J7/10snKuaPFccmaiyDXFgZd45qbyiVA1nWsLdnjl+M1G+v2s4AkBhqFW9hspkez4qoSJcJySQyj7uaLbaotJ63pYZdi3FIc6tf2cmDW1ORqRJl73JRhOccuaQgID1d1VAqt2naEIvXerpstmW0+S8gjCnLPcL8cbH4923XUg/gAdSoVYZAnpa12qinntV21EVFvhBKRkehQxYfNOwBbLewdSIkybjQ7rV0SpIBxygg/eFVF9xMxph9kxm23c5WoitSdfXPwbY3pDwtRjINSpHUjrMnuIsAQTVbNBBhVtvJHbMK75l6Vtye3rQ4plL0kSL24dPfxlqEhnM5EAPYayaSTsi3PVX+5TdbeGfzSYaoVUMbBQouL1jfOXtnn5XsOqnTz/nLNn72hmpp1IR+PvL2fnfv02d1od+9wc3pwkQ3KRAi5+jaZcso5yxpL4Qk8BMnTKBxzlELR0ilyrW11c9HGw3UBiSU6MbwgY/ipKqVAaIN5KotBlb6AtMquP3EzdwV2bpXTafLHg6MwZLfk3f3r07cb+ZTvKMfy7ZHZNvjPaXc56YAFC9fC0XnwcwILUTLm76cQgyGxDSmPhmwAgUKSecldJY+m49JrLQUNQDu+ub/RVZZ4QxeZdETHZ9XSs7dGdNbQgm/HDw//kGPIoH8zInufqcCEFyWSwJOn//X0GhS/qeV0ws53aFX92s8G6Cf8hTL4hxD+oeBiVSSXwJWKBznA83CNBL0DksbQ9AMQrtSdHT8PV2iqXIWk03ZeuAleMDCJ4ecRpaNs2tr9CWa4NATaL9X4IPmAvNiDqe+1TIdA1ZSAVDavS8gS0MKUTAWyBa2zitRQhwAhmV0JN/Ix9QboTAU2jCe/qIdOYjibmzINhqV08ZvvSn2uMgrNjW35rGbDzxVMMqq3PGFrpx2YDdMI0ijiz35Cygpuf+4tzaoQIkEs3Jc/trTErOVeho07q1DZweNGm6uLdWddaqOjfr2WFiuiBTpcvLHhq0JPDx+dXM6PXXTrIOn8xnUFL589/d0f/2RvZ6KDu/OL3WtnIXf1mKMjdHIpspC3vCZNdwFRDktu6qq6G07rZrAyH7i4981aXE+S2oFpk0VtDDRrYSUzRQasU6gIPlRBJ5uB8maK/A6i3LJkQmbEAsg5Tcxkkq1j1/Nzl7jNriRrLoUD3bJSyWZFljMHGFsBCeTUHXge8FynlbBdh1RdLDW+EFFC2tNgXmlzSOhQww77A1lvWy0gZ0quHh1hrx0HDRwmlXDS/tSDbr2C0PEmnWAoAmDP6KQb0JCRWLKjT+Ccme2CFpCcX7o+swUg7FEqfwQnwuELA4s+pp2Gn5DYDToscFwXAfdi7GtDbDiJCavYyiuCKnm3I0Xg1LLMetJV6dirFv4wKubKHeekjwxmpyfVn60KNoLng9DTvcu7ndHe/O3d9H7LrcrW+u6v7ycuPd40ybkxT0KTHNPJwZMPpxvXvrx9+MgcoR2mOo768UyFLSplYcJnQquHooMiRHaIVCglZVrQHKatytKFTenqsFqUSekoz8FcVWwW58Ic/y2n+2zphcXTXEPt0rL0n20WobvJTNkssw/qyDDXMUTjenxPIwgh89Cj4nMev8SlKSh6wvowq0n0Uk5IB+JBg3UFCGwBFcgPVji/Kg88wdHuK0iLyWAo8y119byV57I26TWQi0wGZvJdt0PW44ULo10qrPx2AaejZ7dPVuXkrqEQwszKQOCioM4uVCKhafM0tLS8rBOiexSiRMIfsxkYS9KBX2m3TJ/w15PHrENKYA4hwcQpNrJLlwBBaFipuw4fYlPqla3iCXQm7DLhLVVP3angIO8WTUcms9RpgMuDYTxQDYH8D16Lt6X9gYBLsvCjkWQLXFQvFmUNI1+StaXXgRGy4GMB+jTA9aogrFzez1gmPDiWzitfV7RaVhv8CtUu1bnclBPakraAeQB0CD/Psk9Eq6+t37j1QC9JOzBZZeq3pv5T13Qoi0CGM+fn3s48rR9pskOm2dqXtUgO4qV78IJpoaSdaDwOJeFQsWgV4y3VWy6l0wVrUFlmQ3xUm06lNgNEqU2YkIis+5Wj16IgNUP2wVVGlro3ttxjkeMZ9T0F/URRPoJoH6ejV/q5dvLZOri/vU352t3pwoKvv/js4u1P/vkv/uavP/2bn/6dP/j4ww++Pj/+/Zc/pS+I7Feff2n0lpmhNNJcTIK1/z/O/uvJtizPD/vSnzzn5Elvrq2qrq5qO93TYzAOA80AFElIgviikChF6FEvMn+TIvSkCJEiKCpCIEUwSAxIYDB+Gm2mu6vLXp/2eJNOn+9aJ7Nu9wxCCq26tXOfvdde9rd+fv0W6kxQYEbXo7hgN9pbnR3KPbtksICaaii0PDeWSpah7mt/WeiKCRuBebfml5FTGIo46T+xZm3Rgzd1ESedjmenFjCB9MrJDIX21L6rQsnGxNwYYT+lt0BmLgcbU82on7i5T5kUqUxfVlD5NZ9IXF2U57CkVQN+6F8R1+I0JOrdaHK7hD9INFi15xOTIgI5o2OaUSa6MEaJEoLHSTC8sXU3b2FdIvFqjpcpvk1RpCesELc/gSLam7ZpZxft9HZMlWZc2NHjumJQjPXc0ycAEKw277jl4F5n0p8svzCX1K24hrntJ5gE05OEP8w/5UKIcmoxacNE0b+WD3O8SMS74tJZ8is8DFPguGhBtzrbG61Oa2UZuVofXTV6063mDhlr6WrNSWeXzmUWWMkOB7s7bsYj3iErl8vt5aOH+83Og58PTxNgzOwLzQUxqszkcq6xwjKGRel6M1kQxT/7uYX4CpVyhJvJwsNlzghksSQkpf11TbmLaqyOQfBmoLD0GDm0hKiUFSeX7aDmDuU1/vZ4Ffgs4wElh4M1FUHTno8G/cB9GDIE5NpGM8NCVlbwfHMS0lpGKipaX9bh06B8UFqW8Q6XmW0oSgwOx6ytRKaToH/3qTyLpKgj9cx5IKAlgBvGFSXP0hXCiovPomBrsPUy1WamPlUABSrihQlez2yixjdTaC6wGPJ26/CPIg7TqwZZy2OTo7uAbUV/sS4HmEhb2klVstZogvjS2OjhefXTWFPd5ggoLk91raRxAcB8qbhKYOpklK7lqSHLk3qnFTn7CsmCTmHbi17XRFoCQNPoIZ9Vq6U1hi6pIFdNspnUGMqjr7UiDTZi8+U7h/v8BLIQ0IR7qoYhOrCUgy2Kaki4BJ8wZpS+l291WPOykZbmMy7vIFJFQKtFbSXOTdgoYGRogJ0aS6tyQA6LkSmbASPdcw/UMJs4x6qMsEq1UIOlMtaGOuYc4wFjmTI7zp03oGuGqoKNGxk8QR71UWN8ozGQKM5eL1qtJvJV0GnIWAoxM4qbmZxV/Fc7WyyY3JyD5ZQLhDfCmY/jzTuX01ZYWyAeWJpGS9vESxVvTM0OWtTm0UhfClnlAwfL61txVbKBX2bx6SfOuFrPgpFZgxOcpgFOfHRZJrdIf2XfntGe3PCt2KCDCrTfMhfvofROuVtfXQ8ZKycNTkXVb7acUK6x8Tjm/Ye0i76Eu1q47o8GQHhd+KXb6xbbld27uMvr6cvPP6ZSWFu8mgwuxv2uYJBcH5z+EzfaA4ipRR85cKhJSRcXveHNePvhI6EtEMvx4PKms9xubC1v3ppVMdQNe1BKHDSiuRXGQBAQshO8A4ioLpy8ZVRsJY8keTnBCyIHPgm6KAmyNQJBESjmwk2nSZYjvwZ3kzClHMGOzjmCl3daK8pGEwOfomzCo4IGVD/gkaFMcmN4g8dBKMR7ecWqxKGDkOe3q/3WNrmvbG2R+XyoiuQHMStratS2/Ir3dhgeuiDzJcSR1ZNt0AAgsiMPPO7NGNkbhydd9Adapb7x5dQUZ4Vad8tr9JpoMu6Cy6aQfjaw0rPptrXd2mx2dprrG8v98dl663KtDbeg8UsrUy1eIGA6Yn4yu3L28ObmTsKsjycAlOMkmwosoK/FZWNhNJzaa7ezjZ3CmIeJDz68uszisiJmE0FEgGo0sisr/f7EVtl33nmSthaKq+OSflIF48+xGNlIdSX2koly1poj8dY++MqH0R+KZ39z83R/e+1sdP1mSDm6vfuovdXsntpud3s2GB8+3Fjf3rpaWB4vDCYLg/0Hzf7gdKNx01xbbKzcft49A/Ag1Cw6XxieYeycoiLN1f75ICBS5L/pOLGeE8BjYdkIcfYxj7y2zClLlK6B8wTVl7loys02ZS1sZRsZFhPXoWHhqqKMpUTDYoTYV/BAa0wTkhJMAyMFWG7XG62Xr09UYAnptZXjim2CcBG8o6MDmDRwIPnU1XgprhRk4c1vPKxP7p97JWV0CzTVn7oRkCyoWWd8XHigHKli5YInZcdkDu2F31M66SJYG822QFSC9XYlPt6XnJaB/tK+imNdvQ1ZQbHcg2FWRuQqgoT/g88gN1+AiSAOGx7DYcFX2Pj0wPjLAZVrgSfWnJbrAuG+FJ2OKzZ/fiHV+vMoa1JXa6mlrdazpA7lq9KzZCsllx6Ugoo1CK2NJFEelPansymu9qtQYk+UpQI3CqlFuanJ8khliYaCb6j3aZtO6bFvFI6CIKlpZ07zM9jhGUwB1ryIZGmhlxH5cp03+L4NbmpKtrukyjws7UEb3aaZGlg6WvNra/RggpAmPmxbk3ylVdhNGdwYDYqmjIn/AwppRik3TfWa1lIPKmLyCa6flGw65ZdXfg+LfBw8qHw/3UgpkyY5xHgOhF5JBg34yeCmQkB5WkW0MvXFET9Gq5RdWIYy0KWBzKvz1kUvk3osSIN+y7Y0ZmsHYTjJfEXpQlkkt8Cwzpm3WBGkcoRaON5oGaoahOEKmfVP+JHpsIfrgMVwaeiCGSVZoVnh7G2tiiklG12xRKIc7+3tGlv6KrHrtBM/ARFIrc3G7EpUipDetD8pc25EAAA4L+Q6W0kKXcc8j9ZbTgCkeSG7iMsRXYI8tr7QuwQecEuK4botKur5WciUNs8LzRhlGMw97YXJryyZ0SvDXj6v7zMjdYb9dpccMJEfiiipsIn5lSaXZML9grrCHhVUCGGYZdhNKyq6E9cXINXs0VkX2AATaLpsHdHxy/ZzoSTkCRNX5k0hEpsMaRkKtbGbT2CkYRq2tYWdvU0BVB32NBgvtJrUX9mXvjS54qKGfpsw4Of4zcP9I5N4dnKKA+BHh0vgQZBFV8YEiW212tybzD/pJA3D12XrOoQdhOUBDQhUVYG5DkvmSCpKC+UYojS7FKjBPikkPKeExD2CgZE7esGgxAErsrHo4ETKtLXlxaYThs3KcDQ47/euV6eXLbRROK5Ra7Pz7tHji88+LdEOQAqYszlLVUI6Bbzh0GZ7fXQ5RmfJfjQG2u3AZhAFo2qA6TBSd2swLaztJEn6vFjusgfCc7AKMKD00J9wM9mA7N5oZAKItRmuGzoGGxbT0ayeiHxm3kB56wRKhCCBl64X4QpU0BwR6zTG/EYZWAeu3txfVebeVUqb3rpJD0vKFGRW8lYHwt2V5GX9Ah4HgF5JxiLUo2A62VNsgVwlwFNlkfidVXdfl8JSAagv5SvWvYf1JvdFFQm7FVTHaJ5BiaDjWrxdVWhAdKV+qH2lbbWZf/f1/2sGn5X2ZHxUpOQiws5dez2pJdRe6HhhEtGyVOfnL9UqszVUHxqR+s8f5Rgjz+elKUdNBipahHJz91XNI78kd2osE5chvps7MCddLcb5u+LdFFXGs5ZWv/KotrC+leE+1RbWq2I9d+/GnEJn7u+fkAnwgJpQ0YqicLVQbVGo5MAOa71WWj8x49o2dEJrdrXPx8e38ugRLjPZaqPvupx6y9vYw0vyEy/rK+2vT2rz8pM6pTTP8/rQtd7kCdAKgGTRVOTulSIgOk9MlwbXf8Y92A/jf3vtwCoxi0QYtW2nlLU8u+iyFERhZ7GPL52FSKS1NCF+57r27YocxxijLEIGYZKc4dRmTzZ396j18RdQGzVMo90S6I13BZaDSgBrr3nw0camE7kvtzubaJjTsiajKZlgaU2Mhq2rnLU7yUn2iCfMjQ2L6oASKkfb+QMdmPxEjupdMJc0J8wbSNWaxtB0yQ5RbG1sOfp4s223WBu/AFFZ0jCXgTVKrvfzmw4qNYxQBeqAgSSDbDXn3c185P30lTygpI58Qd8lV1lB7rw14q61fOSqfgI82OfJKHEUsfpJS07ELUtJjSCkFi7Do0ePcAxQITBA5iHZnPtc1LlkRCVDflouPwvQ0WHHiEWJunTV6oixr/0Ug5fXwyvo2wG/0USoJ3sO+NMB5uX2pph+y4NJL2YN5AK5MmdlyVxGr1Sd9+iA1ZzzggNZBfDqyGgqlBvUm24aiEA3iMJKlH8gPZgZ8Blr/0IrYtxXSbaBg86UxyqnWRpNYcGjfdE+By4S0cHosmI3d+nf+FtM6VmmV5PupNtcbHR2HzROW5YTPSTgouU1c9TKijMttK7b+5tr9i+sLfcHozevjkWndJ5Wg3hNYl5ChLK/ODi2rMosEzWFEczCV1CdtUxcwW9G2BSYOIyG8GPeuvewDr6RMSBSyV9QRzDWvBB5wuSCPY4wOdQgR2mDUp8LVBY+JVUWwpCP7pIn96/u7z25f+h75Er2fFyhJwrAkgLGqT5qtGQpecoNiIazI+x4VTBvsgbQS57Mb+rV3PuSU09JeVfSl0/uave45vEKFEjJA9ZKwahVwKPWUMYijDDyiniWZKzmRb/1567SX8Bx9++9va+rchm1DR7WPLUXpRmpWP5Scxat+3k5JKTyqv6UWWW+l4kMOM9T/mTa6kClIACSp7WbIMMzV5/70HMV5XVJ8mTMy0OgUKrOeIJRVyktvGuPe3lqC+tDTySf16u5cW91uJYMach9TveqU73MoK2OgCuNlo0XKvG8KiqBOKKlzbXZvnJjvWFR3QfQywyqxY3yC7/xZTs9l5QWq2apyE8tryvBfa3aDGcoSpsVInlVr26kAOLdVNQn9aGhD+AY6fCOmbf6PFCUQwWdznAt3o5TTIrbZaSiwXB6y0+wINnpyFaX4dLUVtzYCJijbBnBdIvJpiNCL1+tCEAL2Tmdna1ojYd0pe7Dq6nQr8AfQ28eREoPES3IISO2th4nK7vBuPONEip3Y3PdSfOkIE2tE6TjJRHVUAgwE+oFXJRot+hw1I00Jd5gsXJN24hcUL9xg9l1U8g7Zu6s0JhshBFIIIn7VIuWrVbnav3kbe7SgDqb9b7M4BwOvTI7Pgzo3w34/Se+klkxyKQqdNATb6mA4H33WuU5ciU8Ff+Pyu544pXMqpcN8HCXABTYeaVVcsUiUKoOnbqyI65E0kLCbS+Di7G0/DFo1bPxQCSkNSYIOClmgkC5uWssUU3fLovNOV0W7WthbFgWHLV2s3y5cNVAOqjfY/8iEoYBzzdBPGV9RbXgUX6SqgLCZYgie+CRuA5of5FwI1iXHf3yy2cUs2siMlgEEdcI5g4PmbKTpgyxlKmhxUhxDCUETHs3msxagiNq3JpQzGuL4XBW+4vj4UX/ZHDW6C6t29/nmLYcKmwSgNUVRQuNLJyR3zdXYiy3tgBU6+JcKLeLgbDHdjWs88ujg4RFneaZSSmqQXNZlkYw+FxRpFmmI8oFSWdlmVz3hoPb4UJ3NMgaw7SJrTGbBgVF05Ykb0DCzBrVopiR56Of/9y4GEqiFSuaYTJNav/1X//1YIyKnlLNHOpSUIa4JGX97ZvkeCvJU7N5lsnBg6YpaU0Gp9AP1hi4VIZamqt8fiePq5eFmmA+ddlg+wdLFKNj3pcP9VmhbpMQJVefp2614CrLSCg23c9Dkqrlmpx5WL6qX9+34W/f1DLnrbp7rbB6W2/qtZSXS331Sz89vP/KfbKVGU1D75IMhdx7NR/tQgjC9qD1hvK+8FIU1AB3xb6M+CoGoQDKcWiNxht3FfJTmASGy1XA6CEAhInyhTVQxsHD+6bWm/pEFZL7iv3vq543/q7Nb/+VX4V6ZPV5nkkoDJdOgez7nB5DE5AIBt89UTgZRQS3Egq6USMw1WYTWJmNSq40OPkNR9b6l81Wcp6U1rr6XE43Zslz9/VtSiuz4Mu/3Wt5PAxeUlSVq7TeZIZtmj/Rq/RIIwpAegUZFdaSFBVX7z4POs0w1RwD8aD8fenuPXR6x3jCQMcds7XSCkurPsELotBYF3Xpcm087XZb3IcaMOq6XZxpNWMspFqwtpEBvelIVkqGszDeIpXccPCFvsDBxtb67sHmOrTLUYIBIS23pLJ2YGRbmCMCRd7OP9NOhYlArtuLU9Q7TIUagyiaHWHcddIsBKUtLNvyxZSY7TyJ9jpnezO+ZVFn3O6S4c5s3kG7Br+dzJRZyPiVDKWEOVzUe8XU/HVO60+9leYll5NTQD3pPGxU+QlxFr8iyt+QK1VY7aMhy038+9P7m5A6zyVPqgygCs4jYr8AmHkjCiBZHPh4TJazM/lncIaM5ZbJgP9Ma3W31VnfXMEcZPk17L9dJtSuLlIkLtOe8RBM/ylai48f6hp1Y6HzzXabmY1aQQtjaRMki53PPJikrNIg7dLronJiZ7Jbqqx8NMzCMI/mXjZ9JWnTHOgLnUuaxWqKzR0oWexAEbOW+sPBTmszHh4rV6OrbkcwD24C/ORvB5NbA3M+fTG9aW/wCwmwxgf72kGQyNAqhmS1gZuc3o4at02UcaXBDXJ5veWUUJYtTeGNSgq7iv4KBBWBpE6ZgS3iXwEwbBXIwV0VECEasXU9f/7cKSAWYTTaDb5IwxMe9TfitpRAXxVmgE5JZtYrU+NMr2mf96uBzPQE7Jdy3rpt1A8fP65a4HykEfNPyx8/FfH2k5qnPrwDs3yiGuntnKWofF47ZpFHQwd0zVTBbnqKaMdn1mSVhMQRDazNWsvb11rI/ZN5/oJudEWvClZKdXdLC2CaahRMp0rVIVfYt6DtEtrSIskol/8Lwr3FmLNIezQfivuep79lYHJTh9ifkjwBQ/d9L70Olao3NU8aX773t4xTuAmv6tVNeV8q8FBfqKQ1tEyenuZt4ZDl9zw/71pm8ODBmmrhxjmVhvbnzppHEqyVe3IVri2p0O8yYz6UfKR8yU3lQFNOSfcZ5t0seYLYy1dZevc8xR0khPNIsjTns6klhfZEQKwNhk2kWiMsKcSAxtL5cMYTvxkKl19Sgg6b4vyX2UuqUKUodfgJxNPHu5+l6vl43j9UVL13LR1NT2vOWqif9ynFFuVDJWOe62+tzJjSxxv5maXJ9WWCZec+Fl+9GVkoyPCWLAXGgTNjgh0k650tTOLKZEqrgpXF4zpacrJ22R0PN0OvOPitCtMehxV6Kf209ZCEoQcoDb471EeLQPS8U0rHJHB5aayTIRLKWe90qg5XXUHa7BsPXenBgqUiotn0uqBAmFCq4IEjng2DHfATcK7CleMKTRjYOrbMnh76pBbrxrCnGZ6U0Sy4NAuh1ng/kikHk1KK9er+ebmf/3QfolJmo3igKXWe5FdvgCcqXkJBdqHF/aPA1X1j3Cgk9CwnYyWZP/dKseug1FWYGIAT8pkdAjJv7e5xRTEDKPNwdGFT10a7M57y1RisNFfWxF5fct6F+Fhr4gYtNxfPzy76M0EjONlykmgheeJLcJ1Yp5q7iQsz0hROgh/ZCgE1sgj/BVVqRmlw5MXcZpIzH1YC+kOwipKnJGrH2DXCX8cVxVTi9LTfCJk+85kppi6/uhVxfXm2IOZrU4uGV6PJcHY9oUvEL90sj1YE9+fI37taaDopZnm8fHU2PGf1FPEZrYIIGdeuLyd+LizPNuxTF0DX6C7aVm1Njldaiyvi6y5do1sEG2XF+zotubYRvg57nYK0Jyu04N5i37HC86piiUJ+coBWORvFhyN8W9kaKE/pVx0eIxJEUVlVKAsjiDQXVnaV66dPDO/r4+MPuDrKaLSSu/wziP6BIGBSAUq53hr1XAtk1Jt69bYCZelAWamyh/AEdqk9dSVd0jFzFKhLSMhkJiNghOT7EoHU8n2Rf6X8NKosuhCYmnxdCpijcmABkSGIGaVoTspiTJ9CHnxUYAQNCOD4Vzs1L6u8dT/vY3kq//3bf9eN/HIZRAArEafNnIdSKi3tc50nVd+hDxnm93d0UePzr6TauHQhw5JUuz0vBw0yqBnb5K91hWKl30CweFLcNV4zslvVHtXYzxUz73ctqpQ977UnfsqvQPeAozbStd7Uukqd+a48DIC692luypr0rWk1DibWTdFNBihl89A0abo82po8xaKgLvViu5Araj073u189eTo6Mg1lWlYLTzTmORz/3lr8Gvhxj8wYNjBwF0ra+b4rUX6ScNqgX66kdxIstXGuKl1pfxItgEpRGsB/BrcKHNyclhUIo7HpE2hQIpBecbXMA5qYvdhpNEcFmN+4O1WM1JTG+qk/cwMsUEss3nFZOWoef5qL4JP1WmDagNyvEUsxlMsucgXTlpfWsDv07loKccOvdDAuidsyT7luG6uLfNLrqdrWeEkrHgq5tCKIiqmm4bEMtBxHQ2jYL0ZqPhxFWDImuE3a7qzeyR7aCeXM4i+6jPhULonu158rBFGW4rF5g6APcwoFXgozzJDdTDVVVNaX5Jv7z/0qty7VBCag5/n7GF1Imoe5UuyKTb+dVCIjct3M6hM1CdeKVF25gt0DR8vyYP6Ksr4lEah+YYyQpVXACbt0TWejGXPEK9Lc7K/s21wREjHP3FrXbkR62itJWT7wuXwcng2eCNE0ept43r9tr2y0W51TO41kYkTH2uLosMR4YoXiz6bz20kLtuZrq+bWaB1slWfXkNrsZOQlfEGWRvhOeY2+5Il6E8mWWN1000b6Eo4KCMOwVkVa8Dmcpnv82Xv9PJm9fJqEr5x9bbTWVnfXEiI/ZvF1RmAjf1zZcJdjwEqXs2J34JHmYxm4153vLgxGbIJWTrhevBArVWWvIveJaq70ly2A6rVbFIOkLYTOTGOJwAozoTWsC9MUGWgU6gU3sp6X/HZ7u52Z2uDNpzbqkmxIDptZP5KfHf3BuR+TKJsCTLRYy5KObcM4Y+LM3dWTAoovLkRDTI0TH2ppaQ6UiVb/X6Olerb+9Lrz7c/qR8GFjIZ/vd5QX83l2hrhj5wZ2d55Nu0i0bI4sU4Q7687mEUmMgXsdUllXIqdUtL/Ky1u96n0gAlRdmlBDUEVcYgmCQbIAkYRw1cbN4eJShAUu1xzVkyB9Z+Kcn+S0/e/qlJyqmrsdIWT7S88HDzjH+7BHmkt8up9/c5va33rrV5968CCWCDp1wZz5rT25qU4yailRdR+2ZNogROYjBGSk2GYhIICShZPblvjJt6Xwe/3pdn8+cak6bWR1lKScYYGFt67suSA8AhV4r308My1JnQ+8RIUPGFtzLLRiUtJQpg9UEqppQg+jsWTE7NV+acdKX1QeRSqrjk4pCf4Y7KFHhYhw658ry0zrO0vfx0Scqju5z1vl4V7vN0pNyE1t8/0eD1xuLl0mx5gtoE2yEi4SLEXBCB9YpDfHtrc3N3p9luRfd0vfj65Rt2O0uOLzJOcXWjTdUTxROawwVqaWGj09q6veqT1ISjbrfp8HkWJj5BPCzIQwSd7L7QBjYG20r1V2Rqa36zs7G50bacyubfcmhO7Cjpl/YHVnLjiXXmF6TP7X9dI+BunDX+JlyrUUG3uBo4/hEZZgvxEGDDaDwSry41nqunjyARKOy+/PuxqoNTR8yg1ecVTFxLYVnUnstZ397ncVPH+e0nskkWSX0VKbzMJ5CgxeKxXudRyW7kqbBt9MBSbYZ7b1VKivJQ5f5j+/GxT9RVc8qNxtAd2lDcaGw4CGbRdplni6Th5ZGhtEX72sbv6eXotHfOW4W3INQ8u5kAKUO/sLl8PV44eX5sCLOdAypfQBGjxwM7aV7xRi6dzzCD0LS9sCnaVpP2aLP26AgZwWvdqY0MrjRiIQ9YFspZxypaypUpZzZbw4MjR873vOWo7xgUzaPBayDLtlhPrh15JXA7f501NqlmX4An5SoyiFjk/tnFsGujOMPn0cpD6kpmKEC/2lpeb6+JxRL3hrUF9wyHdrU7BoewSMy6bzYkkhnNTGVIMxHYKwNfdFye4zBaqOB60+Evlrw17okGkDwLfOZbizIFzremcaOJXQCr4eht+l9lsnfBZCrYvzyUP1KzwXKV1Yd1pt9GFqWPaZDcqpHcexiAqO/KiHuegcjbiLiKqvmTBwc3E+7zqnqEeAAEsUWF6w3vq4cmxeINagjL5l+Bb6WRXT0vQ5N6Qwb9MODBIi5BZHB0CBVgSt/pM7I6sJY2A66ESNu6D60w8puP7KwpymwrSyv1lw3Ft7LpUxHPsrax6dqpCilrv1g5a3c1o+S38a3pWAcP7V/BPvDgzPPiEWcAjTUc462Sy+jNC5RHykyBs6TsWpOrVBTmqVB6DcN5RYmutclT/tncjbGmy9Ac0FQYnpQgc7NZGLQCPcAmwEPh2mBjjflCJs3QWS2pKNoDzfAjTShrWAP8NDWlgUvQU2NlHRYbDWK7rnky6Kli/qGvU0gZq4xqMY9jBo0GJRm48jmU4VsleOJaTtsNUyZpkrcqcgWUtWU4L/nVIoMSYKiUjG0ttM1z0BJxpTTJVSEWA6ml0Y7mRwbdrK0NpmPQvs3Zd1ZLkFeiQsRrq5YPfylBlBdjxO5gljGyPpZZHg2o46MNJtS2OZBmuUP77c0taAzkPRBAsNVwGOB5twsaHXWIdTRjX7x4ycmKvNg97+FoQS3F3+6mlT8baRXOmzRcXLTRg1/73vd2X7z47OXL04vT7c3mZHJxK1YBWkeUU2MQg32KYGzj9mqyu9UGePYJUFkp5+jhw5/+zU+FyRkOenaGcUew+DSeUxUlGg5YJ1QE4nb3D5udzbPznvC1hsWY8EiE72jDRuPR5TUvs+VWZ4OuAK9NPai/XBPJlFdT9o+EC9reaEdqKZFT6vQZejcxetkwZDdPUcGZ/W73WBXq9WpxN1NpjvzMdKw4yX2KOJgCP41wcJkpKHEjlQDmPNcGNarOt5PpxFlWpITxoM/pn184s5BFS+Lw1dTmpKXsTILZNjc61gwAUK9C+I8oHCrHxHpYY/g45xDHpHCmOxiHX0yLLNBoZQPT9nZi1l33z89P19qWUHN8OVrkgnFzub23vd5wWIxlvjSZTY7PjteWmygI0qUiWIoZK8ZyyBtILt20t9r26mHA+HNqjHB51nkZhzkp1TUYSvOsZQMiCJNr21maq9lKmCsUyasBUl29fnP64tWrV2vNDhLgZOnZ+ej97cfL143D1m57c0OcdpGixHpEtzY6zd6025/1L6a9/oRQ2H8xeHk6vlnfPvhWp5WxWlx02Nr54JxKEBs/cJLyyWuH1HgF6A3oxmbrg6+9D6WOZ6Pl4bk287qYzEZCMdoCUWJqznW8lkeWEthyzCliG/kue+CYn4CsG4axOoOyGCWAcX5+7gxLbTD+GbfQZGQujKaH1l12FtOFBh3iZcsiFJR+efnw8BCyjUa1rkk3NdWfGnmHZwNtXnl+n9AXb4NES/K8/p1nu38Ik8Ev0Jl/oSBaljK8L+x+nmtvaXIuKkkXimx4X5cbr6T6Ya0ig5iZDBaDznn9Ym5zTjKLIIRli4I3pRmhFT4t+DpjVvqS3qS8/19TrfTflbu0LhdjcqfyzHhLHoZRRCLuBrA+rPKdLnhVx1nOWn75Lv3y1n3NYBmHtRQKRvxsirXMSb5dniUOjMzuQ9bKIGfR6F8+/YVUS/uFR3/rR7rxVkt+6X19Vfmp+gq8kq48t8DMK4DzE1ymIRn1MiZ3Bfqp4xBHxWvaU7uZzgRif2FK8vutJxHSS4HAp1btOh/buyfpYNoyT/f5PK+P6lCrTL3sysJE1BJqS3QhBLJQO8tJNu1Ui8y6mbW03uK2iP2CYybOeBWigugxbdMMjG3khC9ub0UMenNy8uLNq8sc1XpNJYiLquCnNMlUYejoCgX9dMofp4zG0sp3vv7NX/nmdx382tnCYplrMZC6x+fHH3/+2cefPz8/v9g7fIp7IR4zzayvLKyvLNFK2vh5ezm5uRzxRhfJ0E5x+wEc1rDQ2gBxURAWTwRIE4XXnWwMWltrIlc3CfzjiRmgeDG24Q18QxAvq1Wvi2InPHLwbOFxAt5lkbqBtjL+hWs0esanDI7D2J0KGLLkYSk/3EMd0jr44TXukieSbB4ozY1ktKUgwTJrkDhkKgMCD2E5UjLwHx6YNT5EyL1RVa8JksLuFOHew9oGPF84YSnKYQGrQq28UqOv4FND0blqYr+5XD5++KQ3Obu0rfdyYIsbhdjWHqOqfeKQ5iX+TxXAJtuLsuyEpWA3MS8ZbFz0cNBXpheGXGtpCbd3OpggOjRdgpY13j0G7fTiHJOWGdUqY57tFrbxoHVxpMn+7bLM7JNzwFqvfzqa9h2z+ukXz7oX412HXA1na52jxfYe6Xza7x0fv1k9PFjNRuSFy2WsH3pfjGgL1w5FXVu62trZjpNq6PREM1jXrhfW7eByColT8QiU+ognyCpeXugwH693ut2e2peGTlhuFpaZwZanSoQRq9oAmjuDDDZMQYYXyUCs4SZPEa2yk4/u22iYIoMsRkadZR/6PCk4zLCVxV9wBQUAxk4GpEoPKFqxikbJluQsz+gygEqVWbCLIKD8zLInPpU9KzidSgA1C7umLDeSZqoslRbUWeovCLrc5dUcnxQATe7yVUWmZTpcSg9139t4vJWCc1FGulLGwn0FPjfqKsXnJnSZsT2xRamAE2VBp8w3ps8Y3eesFWs2sCFllefBGTWV0r78OX/61h+Nuc9ZbzzRUve+vX9yf5PWl6Gvb5F2N/WJPGp39VMLtdasRFwoHa+dh/xJd/j9+o+gF78yQJ01V1apFV7q9RUapjRdVcd9A+qNT9zkmq+T6s96f9enmvfLq4bVH/f57995Vd/WHKWciKqgXx6NMRn5qlZQILH2S09D1Yrwm9A2ZTbrBBkNyRNXhddPvapv87MUV5mM1JI88xZ5WVqUr3xen9YntZ31SQq5S6micEweeOuqYXyxIr9HZuRkmZ2BWR8B+kQ9cnPfGJ9Dywwc2cS6vm5DFTb89JTLn3h21DLBfYIF9C+wr70plEaA3jZpdgEvyxC0SgTQpevr7V1xBCMn9boDPuZH+4eNtfaosdLZsmt1vbNNskUlorF5fXLByv+jn3x+c+ME29VJjlyw250t3F7l6+nqIovKikAN/TOayk5rQ7wF+BhwIaSxekhlxQAxe5ihW7CJKJCdYorLOZELw3brrNvTX3qajEUZDmjTEPF8kZGR1ttgIeARnjLD6EmGrViDICMQal9S2mTWi8sGzrt+Ik+FhALhGXnf5vOS0sKair7FbX0OPRlP3+5U+lRoDGyb431pM0lLGsK9nBFoAqFfrE6i5CAI+ur05ARhoOXjHIs5Ap3yawxBR5LnZz/5qX0A6sJeWGF7O9uMB0ji9GZCHG832xh7qkABxdfo1FaXTocXFKVtm4NB8k05q56+yyw1c+it3q03RefYwC0MRl1iLnLFcNVsmeKE/lE7/iHBiG6Q2ATt75l4PvfFaoVd4FuvwZVdUBrXengqofuvr/ncN1qNrYWNR+8eLqxxaR/zsJ/OaJi3jMa2IPSNJjO1SnGKAoWMFy8HN9OxCFX2/7Za26uNxdktOmuuWIIMgoFtrrRsyrMxCwzQVBueVcbPtnMCQoR4dAjZttS3YUP8FscS0+CLQhJ9wzLZyNSTa9O8zG9Zkm5Cuqq61V/wcH6O6l9zTNFHZMY497sXVgQ9tgVVZzllFHJVwSOjVCgZwMqUCS4GvwUkfYCwF2MGgKipQomrn3V2laJcrawZ0pOYnWtdoUY11U9Asmz3hdQbn9QErLIlJx4BIB6UM0XUD/KdOrU08HeX5nWUUv52sfN6M7h5mU7fkeiaOSimUCwLK6XP2eh5i2Wv5b9dS23wL11L6Sn+l3LWWjw0OK6SHBmoYhm6L0S2uvjcmEXPa37Xep8MFnoRj9zrTR1E81rHzUOFwteVelHaeq4qn9cM9/JBnhQirEDlu0+r3kp+3qc6EG+9zK26anIvZ21kfe7BlxnKWxk80U6MghvJB5qkoQoxKrUo2SwPb8Gst2z9oNcrAKbXnnhVbtKdFFKSqqVUoe/1UQhKkm/L3+TMUincgPtUetffuy9KaSDgfo6KFtHb+yfRBBr82IWKQaewERosyaNJbhr80Yp21xNzQRBpWoSLi5PBwCK8OHtDwBz3B9C6ZdbutAwH3iK+Jdi1OPxFLrJP35yrJaypE9yFRSjoHr2cDccLk2uh0qazPuSy0hH0LYEB1prXzb31/Z1DO7j+0R/87vRqlW/yRXfK7DwizREArq9evDpprPDOQOkEXxDzDgqZOvAJrIWNo4HV3MT5BmNAQigmyshspAWJq4VYU0AZNz3UZHGMbPnRa21kbfNcPN+hwO05hb3gpuCyOegaDGMBqhEARHt5uQfbcn1uh7ZFbWNgySL3U5bRC/xnujVAypOSZM60+r8QNgLK5bUDZ8effPKJzGfnXeX0qSR7XcrSkisw4FPX2jAPSTOXsy88BJNYe4RWR9ykTGStRDZSsgxKU4MVyZ2AbUZD2u1oqFDB/kWPkm8FtDY7HGr4WdjDBFnH8SUh66J4LzsCEp+ICswGbeRTLQ6/2N7deuBojNHk+PjY0Ax6fUSI2hYDRCCPYTHmBuILgABVa45TMzUYIH1UOxAjoGOajL7xNx26RyVGqN9/sN+etY6eHG0f5Gjp0Zvhk/UHm7MWt/uVFv3flsgxrJPdsxE6dXbTP78d9tZvrrbjm+rsxpXiYagKFeGQaOQvF1aHlwJcmrGG1mNDyqBNTIohNW494DUbAhheQjQITM5mCLeluRA3ODeMDG2WTZpaYIOIFvAWIaXEmz8+Pbno9bBAT8vhcLSdp8dvQMvh/h4J2Dr21TwV3OJV5si0mlKaiekMBoPsmDToYyOJgBpzWafclxUO6pN6ra9c73/WG21NTYVC1LeeK0k/g2EqDSlfqVLynLgbR/ZiqNHXioSTRfaoKjWDhjCGRU+0v17TKxVk2kJ1dMRw+Ff0XdGzWZBWKQjwMBgniDJwDPIUUtsZLMboF1FmnUPX/9+pDm76k1qSTH/+FOJRi83PL5Fj1lJqr0NRhqV8AA9mUEB5Y7lV+NSMp1HCmWZxUQrj4hKQFPDmPw8zKhFuEYegDPmtev8MQq5pQ51BtZVqCyF0l193qb6rV890xNXP+6QN9eHdF/PGZ0YKqNznLNlKTZnNORRlQOjx07x5/vty6ppEy1Sh45574kPJItEQ8FqHThX1uats5aFJTWt968l9GzJihQkwCPd0qxZSPswnhreWlg+LmsjP+cQVfJfoX8vC1OKqr0ooAebyxMiQG4ugbbAbth0NaK03AV/wLQTXWdl98AC1onwTreezz189POg8PnogVM9wjPSMrtej5ooXA4wjzHSxkeKsYx27uqQ74qklBoMItWevT57uP3VaheBMx+PT1srBVruztOB0pd6CDTCN5eZq6/j5D5dWHRy+c7DVOtzZDfuF5F9fP3qwBfm0G/bQrI/GBKqxkOXT8ZVzFo0cFSDgobbMDlRKnua6kXeAsIaRlPiiNRbJDuFctyj9llYog6BkOJ1tAdZ3aC7qC9dz8TB6ajTghUzMZ7xOhHVHoFGgOUKutMqG6DJThnkZo80+p0YouhCo+ZTV0mRzU+9NYoGEMHaG2CsUCwrr9gawJJMbk1VrlVMmhV6+4mmgfNhV07Y2OsOYstRD3xmxlXbLVxWotFP7QZIne3t7uqD8eIWLmmdRLQllidngaDETN4RLNwENHcpG7QUHgqzE6XaB6LgmAq7oILxhUBY6vIJery56FwSUFge7Dn3siiC0T548Oj0+uzg7tybRiwJ3IDn/4GR/bNoFAMuXtPpAnHM5cqVR0a1C0Mxv7ni/aBhg6Y8mAms2lm/OLo51gnJyaXb79PDJ6vHVzUmJfrLWXLRb6WJyetZb3rg8vxl1l6fj7eWr9uJs5XownnT7g61deObWWSDtpn1V63ZCNJ2Kcj3pnYu4MkZLkNjBaIAWGjp4fDqaDIZ9TjarHZ9Q49kRTSFO+i5o1dKweMtuB2AYBBj0bFqsT0MNTmI+tDpM+8NHj0wKwLN3ynuDBhIwQ2WZz1e6e6+oJtEknq9jzF7EtTkr6nMLkFkrMJEZLusWBJhdV4Piew8LNGSm/ZT8BCZ+3j9xI/9dSk73X6KT8tUcHoOG9SV579qoh+UHTJyb/K895aMvK/XTJ7WFqbik+tD6QLdwsrWdBsLnYDD2YaRLO7KnwXs9QZ6zL/U2MUtUlTL/NgquVb99LZnTIdW+/bwOjnpTdWlyLdZPr6Q0s3xQyYXG1Az3hdRlvIEtikYwbfOFzywinyf+CYa32CRd67zoneQ+PSvoXnXmNR+WGcwI1qaWojyXK1TurVQzlIy/3Cmv7htZv7hv7dvP1VCfv/3Qff3pw/pWk+oTDTY1GEw/aS+lbDkp4FTz6Ht1KkEXDIVOzWu3gAvEWsb3xd43yQJRVOVRPPyl2u+feH7/KrUWiwUc0R/2VQQsNBdBqhUoUIY6HbVtiM2N/4ooYE3EwwUpiz/kraNOt/d3Kf6g6oeP97adWNVuH+ztHxig24Vuv2eJMukox+zCQL6KCRLHtngzuYpsJMNsMHl99ar74GxjdeNaNKWFyaw7nHVpmgZXy92FVdF0cubSqH+F11ppCeZm51Y2jGax3iy8987eB1/9937/d3/j40+f/fX3f/z5529KwGDMzdTx0WDI8rbsBDKYTAeXsyXGdGdBAQh6ZDplWYR+wuiR1zQeY5zQdE5xhVFvF1trq/wch6O+MdELw+XGYLqv4Go8M24lhZbxshuSNhIzmKwjV6w+elqU874tJYTmmTUPfWcCPCSSu48msiQ5AyXX1wcHB1AeOmqCVtcbjnJGEuSszVCIe6XV9kBo2HYJV4ri1jypImsknJBhd0mGoILITDbBgjUZCBVGCfpRjTteFvbQiVufaL6ok7OeeU4SOG9t6eYytrrVEthiFgngVoHrcXC5nNEM63KrGQvQ5haPoezyUqMqIPQC04xeGTMqODt0V6b8fqwLXknLG+t8uw0yp01nlclPfgk7sry6fnJyPhxNHNF4cvpSv548eufo4SFRsxO6tkZsay2uI76z0XVzeXN21dfYQBFPrMm16n0SX3CKOSme+pPoaZq3cVFfbSJIho2ER0XaaK3t7GxDReYmEZCm2K4pHeN2Z5tz4TKussTXFgg6ApDm1uV0t/QMeKFVmZ28LRTI4AAYcK6n95OrQfVjWKiuTZmT39JCgCA0NANwgzQlhSomkGCmyWT7k7ms9O2OXL39PEUUGK2NcA8w5Ueaci0tc1OTn4baYz8rplZ4IYDlORpRUtijOZWKwBUFUvk+rY9SR3cY1fKvVpL7pC+fk5iUlaYX7OadViomDpll31UKKUOpuSCHGO+NfknzylJgHa/AUPn1d19qUfX61ifzvtRP79/+nUWUQQuqXipxPv3EfeBojk9PrYTCOhS9QLgI0J9T8rQzKy1YxYINMdNuDwndkLiHeFELw1ryCvooQ1e5h5AvPytOmU9DGQ05a6pNzycleVhvPHn7YX2eJ3UuCztirO7ypz34k/JRyvITs5hrmhAIc+OtjkvuS/559hRbypITBpHhnk7U8l2TSqb75pkx9/Vbpfnqvrj6vP4sLcktcJdHFVrtp3uiwMLk9rx7pnngJ2x19B3ZcGMxG1IPA1dEkNKLLBUfQzTXN4nDbsMjD6sYFda39w8w0VeT8dH2zmWf3WDMjsLBbLezRY6iroOYISErT0mgENpAKY0K/n3QGMDTsALQPH91uvWwba8PrpqYhZpxcFtp2ChjCw3jzHSD7qjB5Y9JYxSJm0kbECzctrYOxFTY22vt7X7z6x9+9a/++ic//PHHn3z20iaqwvuRx7PTg1bDOOoYxBbDm64t3WANcLKXLCHON7hd7KzDXzlkQQdJBBxAHOBKanlTQMtQGRkjbGDL2GQ9Gk/Xu5+ZCD9BnXk0nvrtk+wbur7Gvxt7Sjesl1c8hevEyX8/ybWouwlfxOl/5zvfefny5fHJWdypMd/j0fb2pgz3cGUyzH+6U8BACQEwzASppOy6K8CZefeqzj6uGSpkEIqiNjawNBt9sLaubmYrCWh7kzESwT3e6RD3tc1TKNPIHituELcLIpQ7/YqQpxsC39LLKtEH48ns/OQ4BA8DhAIaZnoQW/Hc0NEmKDA4FJ0QP4SQt7I7e0L/tobPOdw/UBoS4luJig15yTCttLB2573+9YK9wERziGG60l6K4DLtdK7XGMA2VzZZRllANzZ27GXmnGCKHII2sd0KqAGpOnFxC7rkGHk7vl6aOhlrlb2KdODYAfGrVlcPkCtsVRR8dHozGyJB2jhhUwrDz3blzJqbsQHnaZSlbY2gAVlojsbm5uLgeAABAABJREFUbl5ohKukC94DA8xBnR0/ww8V8JDhLX4+/ZXHAsvMRm6jKI1iCY4DumoK9osCbSnkSu4KIu4lZflp+n3glZ/uPVef53CHtVJyGfYgINsTSIIpOBitAF/wdxHSC2PBpVxDwrDEpUZ7lO/M02gV6g4/TaIFzXPrr6DNkKz04stUh6DUXx6WnvgJKlR6/9YTSSH4F5ywrqe1AeXAMc7W1oVUmvGMllhtRbkYHWNq9yyduE8KDjWdd6s8lk0K8Q+jlK9Kn2uW+nLeHiRdcRn0cNb2ZWQYDWYUm0VZHN8h2pKlpQGgLhiNlWN9XbAeZs8WumW+YX098gQWkNn2wMVpooyV6Ywy7UrQ+XtyNd9ZkuCBQZFx9Ip4UibyrW6VjtQLJs7nmVX/QhBL90qf5r0r+QpLUfqqPYGKTJ/MmpGWFDqa/CW5AUSueWcWSiokICcQ5DSRjFAldWUM51Qwn5dv52sgeTKEpdhy48n99HAXTspWlaRMUymn/nQ1Amad02igi3924C2zZhMUW4RAcNhFnrU6XrjLbPgwOzAjtZ9hN9p+QhyKrRIwFt+8W6P4IbHMmq31hd1bFovp4ILiKXszr26JxSbM1bbTra3Ox59+Yu5IDyQTJcWNJssbP7XC4N/goY7DXp4+e/Zsp7UFrZ1fnG1vrz142GyvMKEI+56zYjEjS6vtiN1RwIzET6gDZcLOnp2urW+wr8CBhwdHv/vr33p4sP/9H/78X/3xn/OoptYEdPaqCvf2YHv74e7R45392WBkWqA9Ad8xRsfnF0wgnnDKery93Vlc6c0mPNnJOpwRCAFemUwdiLUmI+wSZ3HzYnb8qCBU8ESMMUV3GnSj1e12c1YCFX3+7FSBlNZGJ9jwTqmYOEbBezGZKKnOfob95nJ/fx8itgnnBz/4wdXlXi+mqemDvS0shXWRVhWGTGYjjMegspxM4P/0TiLYWUniMVLIAro0M0wI0TEmg9IAUJglqf2c39AbHvOJS0dVvDTKzm+HQtsPoEN+O9HYUF5dZ2+2aLEBZFrTy5NXp7qVfXUgJaHPckjN65cvdrd3VFY6FMwJ6kqv/RW3i1wmHJEBhEHYtGylvX3weA828Hl4m+xON9F6JtR8wjidX3DTe3PwwCaGrXFv8urk1SEHFG2fTS/Oetudpcbahr32q2ubg5EtDCHgoXj4ngT3zxRxC5TEDeBuE6HRCSsIATi6CqVHPjc3N1JfsR5xw+t0Ns3u1fZtc83pbivU18xg7HCATzwpI5HpggGsJ1OZI+4tw+AqQB4pCY0kGPI/AbQE6PU1OxFtWsFGCERZldLQ4lW2FfoaR2OpBpDsfoMCI/YJwJTo9UrNwkby8w/qNNR6Ygo3t7d0rKJnTcjaLkinfJBbefQVKMCdmYMgrjB/dskZ24wwqgtHhVYF9xutwEWRQy01XhrycOMDNxawGx2Mb5LHuCFwU77zRcDN0ojVkXxoFAoVyfjAOcGvEe4MiSfp75zdiz6aOOIZrfdU0DZjFfUNkhJsFiJqgYs6PKSJHY4um5Qd4Sa0uhYXHtq/kC61ZhbKh0ESvoT557w8hAVNUrMCX2VTueC7vdNFcxftYxWG8iz/NDRk1VujVKZZabgvkaFZBZSdQYoylwtOQreZe8vepGSE517CReyiZnHeaKAiY1KORka3HMlGrxvhzFQGalcDlwX2LIYSMV1oH4cp2TIZdBnZkxVBpeBD53kqGbPEcLi6xJVTnJoXgePEdjYuypFAHT2eSs2ibzCdRsBoQUDehpUs0dCzMOiX1JoDWTK02Umf47RzfpX5xDAqMwso05u72FcC9OVkJpZoUc5pcxuam33+ps/QBhwygBFTFOket2urlbU3zv5PmKIQlYLCwkkU5q6MfQ7cIph4UjayhHQh+WfdM7Q8RIMNCbc4u+32L+jWYSW6EQHXRdZQE3uz/Pu7e6H6+r1wTYUzHiRQG7cr/sJbeztf//bXdtqLrz76aFWA0eIJhqPc2GwPj0fj2QAnhvcw7bAYq0BruTUe90kJW2xCog4K/5PIqWvL24fOfWhnm0Ln9NVgZWXwzoft9u6NaTVErDaJn6tBCUxAN3VZVEuW2GK7uX55fSEcQWN58/LVF6s3a9957/GHT588ebj3//wv/2vstUnbbmyM+tOd5ebX9h+PX53cnlyE57+53dxba2wuDidXm82c9iIO8bO//H7v4y+cuSSYLj2Fda52pxUXBdoKAh/+ENg7YAJ5ZoTIEWuI6fLDxw8Ant2dUfYuXo9mI5w7SDbOZE0EZjQd7e/sOqiCLIst4D9HrEG2ABQ8pWPAGLQbbePMbRn5Ojl9Y7m3mqtC3J2eRA/GRe7k+Pir774HS4FeeFyNGnBpzxSX60XeFuceghYri0VmOu41mi3UATBahg5Ptt/lonfOKnx+0TecSjHCFF+td55gEhftvrockQi2t/d7EZVDKjaIPBtcV+LtRnS20vZ29rc3t/EZQr9uNbeZHWbL8U0/616cnJ2AtvXDg/6gy7HtYPeg3W6N+lSI6Fwz8nk9KHl2RdP2ycc/oXY8eniwstacXQkEdWXt7O62FsT2mnLyQETtO5wKvf7o4QOsRXN1rbXUmJ0PL47fLM5621tPIhDvNUf9s4XZSECKi+E5Gnvau7hu39pMNs5ei9lCE/YJERVU6Xp4ee1EqsVZd3C65Dy2YGnuMIkaNRwPgIQGoh1InI1o0QHurCd65M1S26HX6y0GJ8h7EUcdLxA7D7nsR0TjXeJGaZAvtAivEAq/+v43D0Z9wze9vuJDe/jukwePH7aXlh/s7NBvRKpd5eqR+PBmExof9Xtsw+vLW7wXGQ6duc0zc621BgxNPZB4ffI6eQvSmF/9DB4ElUVPHYwWcPyS94HRIWufZO34r3C1BRfgCEpEZas66AISDjyUOzRy/hdGDjmBoUBKmHmyqi6Gq5fHWpQqXQyxTgGEk7Sntqog6iDD/Ayj5OiWSKqAXdJm2M1NsHweBMH5VoqNFy2jmgiuDpqFcksLE6MorVN/hiL/NCntCL1KY+5T7U59oC7F+D/cwVtJR6LwKuOWxyW3RniOjXaNwGSnZFEcwfiWbtkqlinTEPIgKUGZ4wFVchM/SyeADUfMY7FvxS3KDeY99ClHt4ULdk13ywjALMqHwdVlsWlJmmFnpWZmMkvP6p901e86W8lViHoeKsr17eTzDEedHEWFjCanb5RYr3Wg6lfujThdS/2ZstGZ4tKd/IX1qR+CBDfaWQmSe5+U3sR9JtvJI1eBqnld80r9Kn0pLYmR/76iWoKrpISajImRxQ+BEn7ZmCQBlBSZivAsEV8Ko5XhiIGNwKHBFjlo8iQokJiIRFmxWJloBPFWgTJM/OKD3fNnS04aWmqsjwYXPuQTdcEzrN+NxJatIWAVBRdfLiI+RlHrLUm7YCAgJU04K1yOKPwsY9GcZmP+fQvTwTXRJhps/hH8sKJ0YlfBIwmHCrVpz4qDSSI9ODGPCut67ZYtYupx4zvffLfR/Md//Gd/9eLZqTIXr2YCFJDWvvf++813Fl8/f3V+1u0sN3Z2oJEn/enQDrBh98KRknbrvOZ5eNGlS9o42g3WrGNYRjgrqyTwb8pcgSiQ29reZCJvx5V/vNHphHOyC9nsLzOHTaHCuASRSJeXKb46o45v5XF2cZm1Onl3U1geyWConj9/+eDB4XvvPPn0008hTATDiVZeAFR8cxqCJSmTqOSRMytjD8MI2n/Wx3AY76hgk5IX4qLf617wehOEiGUqLLJ1DKdtb23EZMrzxNZmKrOrK7wFTn8suNHyMvdIReDtbIiFX6hJPRS9+KixERNhwhlGQyaAU9cOXf0dDr79K18HSiJrjcZRv5JaSbLDwTiHNqJ8vC5FmF2PwknU4sH4bDA0bO2d7W2HJmP5tIqmtjcYxAMJ3cOHzuI4vcIKNboS9ZYa8WZ9vLQWT/TFNWIn776QkOFln2/pcmvV5ke6N5sargAFfWSEWImx0J2zpAijOL0x3hXCWVjIVid+/8YEm2x/uH1X2okvsVJotJFuH2+27XPP6tXojDwslxIp/QSaiERnKWOBsTnmNqo/iCiW0vDr+FmcCh/a8OBX1sOyFtuUwcEI32/t8Uxt0CKt0JHeaBY4IxGjf/CzqUNgp1EW3GEcN9Ze/emqcQUsZM+9VybME5lqnvI2QKCvc3DIr9yHkPkf9639JRG/McbxfIwQ5gs99Sq9iyhZmPx8VwRXJdavrGu4KyTjrpHKTRXYschAsK9yi0ovaBe9CVILWJarDqkgH0AuoWWhWVEU3qXSkrsf5a8n6vqFR6VHtQH3L/ysGPWXcqZ5ekVacGC5hmm9/8sKvxsHAEBxUxIWV5sk0huYNxDB1EGEVPHFzYLcA1SLRdr4m90xn6hrGJT+MH2hYWa0vGUsXbfr2/qPqlARQrPwPM620JZiDaCSM7pl9Hzn5y+l+qpkSWZJOfWm3tcm399ruZnxkJKjZksJaGKZTANvaizX0kwd8lxDogLCZchfq8MiqKPM7byFCvQJcMdP1Lpq5vvm+ZlCChC6r6l+Veual/zWK7ceRq7VwgJa96UZzHxrzCMLBn4iPFDIJoBb8ERWb3aMZaYkmaMKId6uZOtPNIXX5JvG1oODy68+erk4O3/zCnJZGQ1fvDnFiJ5c9A1/gt8K59ZYEzaDJgYWaBBVBRGfTuhY2u2QpClPrFn3ZsMBdLKH+xwNhWwTqywTcYnXXiGVgh2Ia0wCEHrVCdnT0ej58cne/oPGAovItLm2aQPY7e0QE7W12frG1x49enz4n/3Tf/b5x6+5g03GXW5/Jy+ev7t7aCPyTlzpF3igr163Lk5fL26JKXS1jZtdXwM8mrl2M3P8cbieAjaGUUvuxjUbGEIb7lQsoBiwNW5x+02HH+KT5CWk0ayaF9uMFGvSyGGkjUs6pWyogiDCLrxdrPvKjaqI1eqnP/2pYnd39v/iL/7CDQoUIkS2LpaSopChEqgoxcm/U6uLxMZASDT6yvsfALCobYFM1HgAJyG76NxogC0V9v9Caq7EAgn9I7UTESLnBdchEJHaC8vSHfRNvrJoxuJrw+g1S/hHQgnQsMCnrBxE6VH3vHvKKDkYd9/96mMrxEmINAsGZHtzy7yhSjQNlypaXzp6dLTYFPBpdIkZuSKVTtfb21s7HXJYFoNxsMMvB/5ikXLAgsbbwCCs7srkyuGSbIz8Sq9uRyuNNibIoVxs2vii0ay32lm/Xl0YRCnIE9+pi+YyRkdJ1+AKPhZOH3HAtYBHJ2fnmCa1mbJ2Z4MLq2OsSTlmBhMXFm3BASqiuw/xcCJ/iUckhGA08EHfADL2XQOSZWw1B7nwA+IhYqt6c63VwPksXtrsu5g9EuSyG8zEup2NPocm4T7Tv0a7AEs5pGW8AHRQxxVcPe4W4hbs6FY4qeUHzW2Ws2Kfr10pQxRoKcvbjb65d2POTPX8SabyLlX+unwOhpKh5HeVwwWViCt+SQqjy7HEU5RQDMAohErGwvjDDrBbMTNYz0hdqi6IFZjIVKA6TZL8ASIlWfY2SaiqoGNYMfrDIiKl4XNCi7IX5gLoYFDS3NKB+c39GkxT5rjzS6LlSc1fb9y7yX2t8a4grTJAKVEGy9AiL0sxP/PfIuCH7zw1GB6mF4sJvtJubVhOkNDkahbFWVG1GaLO5jb86dTthP/2tIS/A8o6ojwZkmI7FQcmrWV6RfNUIS8g4zrc2dyUBYasImZp6Zf98jNtfSuVJ+anLOvyvH5Sc6bPBSTqjYe6qxewwLwMfa6MbCm5flVe3cl0d2Wm5WZeKpKugZBZURrvGvJdAM9DJdVmlvsQm5r8rHnqz9owT9yk7JLqJ6mlzukcQubTViui78KJZ9CYtbIAA0C1HA8NhV6Wt9wHYNvwhVFcCasOMRgoLry3N/aK7mxtvPO7v/HHs+FPLs6O33x+s7Bxs7h+fHJhy4zPK+zR4qKDtFYmzSZMXAg0m4oBOcp4O2JLX1zaLPtkLsWd6J5NDo+ORPAhVTFzxKuQiXx65bSI6dVsb29H485OT45fvRDHfrXRobzGkJOuxtzfr5v97tXhk/fEePiDf/Dr//nx//uEpnF1Cxr/t9//q+PW1uKY5C2i94qwHK0Dp080f/infwKhQGNADgLbevoUErdLa2ljjukyjPfAn1MSk2wOA3WVdBV71Dx+FRoP98GL3CXqeCISeWh/E6eJeLTkLIlM8FtpPsueOLlpbeXBg0d7ewecqCnrvvbhNwhY+Jjdg32LGXUJlijt0QwNIIsAz3Z7QyE5BCS86SrtHeSMUQ6PkNBzgRBfqTrTLKCeI1RWLundGxvr2F3ch+jm9lpDw3xDhpPh1s4mkfrNyev9/V2bz4aLfb4npC5DHkviWgQu0RfW6WpH6JbzcRabG82t3U12Z9hQPI0c88h/wzYyIljiRlnK4uY13tt7791Fcm3/9PT1Rf9sVcwn2W4c6YueEaMN6xKtnXiwC0JIApPloilBCBpLnZXmDrdA6qjF8Qompb162Vgc9cY3U2R0gM3BVA3Sk2VxBqeCGFHqFqQHwIEaqTwuuO0YIzZ395EibiNmR9eQKmTY2BEWIV4iBp6MuBkbAX6n37NVDIWGqIkzCsIIVIeVYgAypIQcGMHuKyHEeOus2uMXCc1QM2/ARFTFYmBmfwj9fN2Ea7NGw/ldi1do2NrWDc/b5Q4T4FIDmQu9XF6a3F5u36y3btdDriTzVyfyF6czU+oVgNBbCw8qqSCrJk8k31b8ncm39spDj1NO+U8+c59sQeMFtcYIALMB1jucX8oBR/V59NfBOfKF+Y3mkKEoBmz0i+Ys2r/8qwmUGapo+4ygTIhc5sbXUb2XrUuC50EZYRM0pqSyVErj5w3+5T+lwenZ/YvyJL2u1abxZdxc6xMjUPP74x0NY5SMRRsJ9lQrAz2eK64NR+NDHPebN8flQ5Ah1K8BjuileaLpWKjMlNiXYFHzo5+lnKCN0gwfEqTw+vCCf6bJh1HCTBMABvp3D0lqj5w+KV34hU7Vcry4exvbpKYqJ24Bdx10U1Mel+RnBYH6C2WtA6KhUIl/qa3+r2it1fY0v4pksY9qR/Xt9Vc9tdhCmDNEKa58V65fXjyXs149dSO5mX9+R678rN/UG3kAbzoFGAskEwKCa3wb7jhsRAFeU5h4ZczJWpKi86EZCR0tzRdQNjwXwzotk2i12521B9uNo63Vztp14+bXxufHPxl8gQ8dDAcwMxsYwc0ZCgHAnNytBXHuAgukPQuqnOatEVO6Mvqz1bUp+nY1setldvy6941vfn0y5FQzjurk9qbXP8Ogw0K6MeXTtbxy+vpFA/hfT8VeoAIST+O82+ON6NDG1Ware/5Zq3P4zpNtFOuf/if/5dJSc3rZGw8u7Az6e7/56/u7B6TJ0+7Fm/7FzsHO+4eHO52N01cnf/oXf9k/PXvy/ntbh3vD1trLy3EdSUNRb1wz2ncIIXrUO+iybhHjWGEztDbhLnHAA9PN0Tp0D8Hv7G5tbXcmA+ca6z2c9SX+ybwoNUCXRG/GV4LHmlh5k8vRV7/29eOzc6TOZ8SgKHkolgqxNzucOolChoXAI4MtaE4W3t87TPmFncr0aVLxkYORhe6VIhrQUBEoHMNiuxhTMDUd+Yv/9ijCBO+AzZ1t6OTSxuBbazb24HDYSwu2It3QXcGsogjCT7ZQrS9s7m48WjzSr63d7f2jXauPVXhyNe6OFqhDeRDsP9zDbQy6/VF3zLrZ3mpBZ+uba5v0tI3l3riLqB4dPEKHWJBIXWtt3olICaNbSA5/89uV6apzTLjq67/jsxu3q+2F9bUbW46vbREbXy+fs6CLBHENc0TZun5jT4JwY4QqI6NJZ2cXN0xpt5PtrWY6Hv2cgGJ8PsTLoGTmBk+gI9Znbzz5jJHYaQPGW57z3vlmZ88e6rq6YCQU/ka0iduwFIbFHELewW4MWwQp3j68NDUt+y6C1sF+tXo7a5T9Ea6BcIRVXrtcbC5tbK87zqQVhRo20PlZlHBLiPfNlKBvPzQ6qoYCHnOce7/OPfSqJovMzwqUnuRVae88c4A3v1NOfVtgTqWgz/PyPhnubtQVlW2YV8KQ5GU+hrOAa56gd/4HT2Yub4pIjjeyrc5DufgY+qlMAqapicjoJW4DYomQghuJmte0wRHYK665SBW3GINa0Xo6UZJ3kir8rvf31/Lw/teXN76rmdOydHpelOdgouZTVv7pTohy+Do5Pcl6K6oMjOeDBw+++71f9xVCQ4eQgG+U7pH5h5wqIALgBb3pC3ZMNiggns3FzqyW+6pNjUVY2QLif50vnCwYrU2SoXw+n9M0+svpTePvSvtl6aoWJbcbad61gndDNaP98++efiSD0mqBNXNqStb5c+jbfVBHeVKGa1678mWun9cSwqwV2fvtAmux9Yn8Un3imvu7n/fP5awlexImKRXH2ZoS3XNoSxsAjcdiV5Mo/JTTV5V2shEavYhWhdexu5L/GXRHDnp02HnnoPNoa2V/g/ZwvP7uw//Jf/AHw/5/dXqR+T7tYkuuBPBmctRjfYvPSfZUgt1SHY1OtjXEU7K57vw8O3iu2k5kX149Ox2fn/S6ZywSfA1vOC7HL+P8BO4L9MNMgwEd8fC8e3B0AHTgb7jf9mCRA5Yaq9urLQG4sZn9s9nWzpM//Ae/9Sf/8t9MB7aUjimRKKDOzo9Pjl+Lh0WJ87p7Aui2Ok16GEYJK5O5gTVmMuwLkrBEvLgDjzqkfgacCkIwVlJ+ljyGqIKInFIg/y6BTyNJtsIowLx3X8mQnHLVP7mWnwrEbka/F/Sw/OTxw1/73uX+wa4NAIQhLyupU7UCLRmkhyASUrUWheQ777wfv5blxcm4n/ahnLRftg046l5IhaLDGE1GjDQctZnmxTlO4xcF9m32Bg4gG9rDtdaM6wCNnBhIvWEfHgY/vcG5HWz2jNtQfEPMimHxcsg9avFqc3vDiY6WKY+S3qgPG5s13qKkFYuY52FT8MO1m/51z64pON2JuajgWoupqU3cnI7xJtOD1UNhHWOyFTdjkdZ2WYyv9a2WPva7vd7rk8VLJ1GFDLN0RuEWc/b02rFWjSmBbufANmdYiKDZWmyvTdCzawegzMM6q4TN0pmNNpPdXnZanebt8hiY4iQMOI8JemBP46gikmR400RCksH0zCaw07jV7ojXHlFVyyirWGWX+Aw63qVgu7BypCnrx+YptnrMXzQDwcfRPwGaQBjyyWvFIrEoYPI8uF7aMIHrjXbi2WZ2MXlq5dLEQ2q8yI3RImhHg3efUuQd5HloGMJNlKVbX1WocvXWtWIgKzv3Jcl8n8BqyGkRP3E+xj9ceykwbo+FXEFQpXbQCtmSPyKRgTxFlufF/cHDpUVRWbSBFZzkkgOofVlrLDo3zAGrqrqoUYOdys6GrGr2VOSfb6rHy0t+zfALxr9oEuaNfuuPxs+79tZDt56XB/Wa2/uBKs9zqXnIOuVHUKGiMgCRteYL0p2H1irRm3BsgVkBYEVE6M72VpUFASKC+ubNG6pC18urY7XxrlAINGJKob10X8JZ13gurRYaZZ4NIBmdAN1q4007yJXo35qqxrThy+bft3p+k7elC/WmDH6mtY5G/en6djZT5GdhMnw7F+DkmVcXy1aKrMS15JxXbww8l40+KkgH/kY2ZoTLIJaSLIN8LKXqcp+b+bP6Jtf5OLzVeAN+/1ql9Rs3FZIzBKWgMiBlDNHDiILqoLVLpHaf64SFjVUAwWui1JTY/uatLB8qDNHqpjud1vuPtp7stdoLo1WHoPbO2632wfbGd7754Z/+5c8JB7QqvZ4AeuLPcuakIMnqcygpQz2xDPNMVyDYN4olNK6A3ZudReHftluUUysUfr3u8NNPXrU3l9o7GLCFYb9rX5iwORqaIywIaDTtg9HiriBIE7zaePECQuOCt7t10Fi97HXPnV7Oi23xagRX/i//F//kP/tP/ks+Ga/PXjuDcDacPjx6vLW5x0v2rN8/G/Z2tzafff58cNEV8cgmClvKHCVpIwVXGQNSp8+1DE6mGIzWh6Ygq7lI5HXSLDTTCobDo83HMzCPTgBKkO+hRRF+lGrIWBvwAhOAyK/6rwD4wsVFF8ViUEBFHj15IugsLn5le1P9Vk2EYPZ5gubt4s7e/vbuHpMY7RQTlEPcB6OxYAzclQpKi4sjZQ32GNWM2631N0aploh9mGdSIKHQIuJjW/wBGf4jKvDJuMLfhxleaKxgee0KEOcCJRGJa3WpsexsDSra8dXIgUhoXgnJeC3kBW9BlWAtoSW+GUR4+6C6Q7IPnS1sZV/TeEiRv4TDlkdwQmdSZPsbXwlSCUUMhhUTdTmaiLm+3kE9GxxqeidxPA72uLy2ScA/q8f5UBgRMVlsP9adFyc9jvILi2yidKrLzQUunTF2mz0jFu2JkXcvQCJlaYc1DiWFH675BjEioEmG0URTJyCIYMB8E7EwTriM8/4pNEznSBA0hCwThDC8nDzZ3wvH403tHyTTQVq3XCR0zTHHHFSi0OQjcmMzyAovTZgcN5DJ4YIDu3GBJVgRWtdgMrrD22z/sGb4Xzp8ALQ1BXYOQJVU0ESBr7ufAFHyC6hJbiqAuqlQqy434CDXglngj3Jb0ERBFqBWz1yjHPO6WC+pfyJmojjgQB+pNpEZsMBlsvrJVXJcyq/VaYAWxr/LDQnmrlJNkqHm8VCiBVSUzRLWjFVhfORJ73ypNm7PIZlWYGnpW5dalGtddW+9+YXbUolVlSLd37/zoaSFBs1DCAomlCe9Muh8zOKgmGGE/rShf9H9YvBCgBl7IqKsZKwukWMqOy/4GLaCGy5tBZZHXyKo3ZDQKemiy1CCZGC98q1Omh4/AZsqZJAQPLtp0lDru7bqFxusmfftv7+pHazXdKQktahOBxVVc4JmQGHriSGHNbyqmUttcGk+1PUMShHOyp/sBlGOp2ZDmTqb2ecJuTD1YfpSIM2T8t28TEXVSus1P+/mr2ZzrZ/UnL+U36vabDCkWSYhM2TJwkjQ3p3/ZM1moLSQLdA1jHcZAQ2TDaOEd7IJZXnpir/xo521/faiIDzj3vmke351sTa66D3Y274cXbTXhHXobrWXerz7eGkJfCT8AHzJWVrgAzU6GykBVG9EUuWF0G7etpvXPGM6Wys3/OxPhXZafPni+MnyweYOixSnCqH7cvYjyZuLYA56tfVnPF2a0lmJR7rCW/r0+FioXccejfrnAkDwzOYA8OyTn7z7znffeXL0D37/t/+Hf/6vj54eTc8mrwdnv/pbv/WdX/n104v+r0Rlt350sP+v/+UfffTf/ouT8/PVna0zR4qoYLtjHupg/tJ0mCYjI3luxOZvC+ABQvj0bVqlBE+I+5Vc1Vk2Iz6MZr+sI3nukyfK//jjj20tsPcLd/bsixfeeizk6s3Nhnp9qyIPIU2gbpbIBPg5Lgdm+/T0HP+Xg4ARlsLCyFnaSyGY+klL1odRhK8jaxG1qV6uZogBkYULdW/Y5Wkf0WdxaTgbinLrFWSGU8DWOEoj+sMbDn6i3tqAjeJAv+HPAy8ri19596kucLvvXXTtIUN9otQbzDa2O63N9c64FfGCi76FEFkbnrV3xSbt+NlP1y4TM9nZLrMx9nS5IdLgMi0ZhwPUfnl49bC5MX3+EvUnSt5MhxR36A+PSB3HoY7Hr/vT7vIGLlDksI0lMSEdZl2Nr4QhS3VpjSlCcIrJ7aQ7GlwxdVqHiX2cGTCYwjNOhonbu7WxGx+uVWFwZ/SstjY7HyuH2gQP6C/cSgV527idEEeoiJTAQ7D0JTuIHHpwLjaIGLk6KZ5BQoc4tAbKYJmCJy0KHopogRfZmy/Kbxuhmhk/voKrVZVUFHAOH8EeNOfkysQDJmhRbysYmVJNN+KBpgJPVq8kjrTMdKlabPrl8ce9G93LsBcnk8BxobVe0WSSkrA81/wJrjDRUdzzD8FUwFu+gpfNuEr8U2pGIoVGtR1cYomXwtOM8AHxVk/WAn9MgoT/YL18ESUPc45XWTClfciWVrJrF7qYndXe+rS+15fsE+LcorUmoPQoH5Ybz6u1Q8dVgTcxl6pABpwwpD16J2mS/PJorKsMrp5Dva7udV5R3uqPtacQoRDsU1lrtR+/85RCZiau16BfUaQB8cnB/r4l++FXv/bVDz/80Y9+JELzwd4eW+9o0M05B7dYKsdd5wgZQ0UBCLg7W5kay9fnV87wbiTWu/FSHWdSrQWpCten3K8EBIO6abQnUcqnszYxOuYqgBvKtyzwSkFGnqiijoknkntFmbnSr+z38gQy0ncQoacGM/nsa7GLte1mQdN5CMtgUWqY8mHejH85lCuZM5I+j7ZWNoktrsg0qU6qedykF4U3d68XBsEn3vqE6iINM5kl0oHm6aCrQXClhQzk6LTlK9yDWYu9z4CrKLNZs1kIIKxOK5lAsZ4TXPgPPj46/Nnzn7/7cOcb7x6t33SvzsfXg7Orbu+Ln30E+k7PRv/3f/rf0EEvNTqHm2uvTs43myLhntxMVilnbBtS0y2TPXlodmWz5EZLVNm1w53G0X6ntU5zdTrGoCxutzdbt8ekJUqmzdFwzSEPnb0N223EzQP+IhP6HAywZzs+a31vbWtz90/+5E+weu9/7avMW6RWtuvB2XF7Y3Gjsf3TH/71Vz741d/5h3//5OWb//aL553Drb32/g8+//gvfvYx9tA5Sybr0YMjYcAnEPHhwXhx4cxWpMauwKI8Fk0TU43RM3RGg2odtGC6DYthpCmqIw/i7fYTM1DCn8meVcmL1WHzl5edzgYdHcCwAMGYKTP4ciqK14J7a6/UYNjFXDXebHsMcBuC+fqxttYV648FQR75ZOJrrhaLkIUI1qaAeHNyCl83N4SsTQJmpZas3N5wBIbI84PuQF0anMYIJrxpw9OMnnB5YoNXw8SNxr3d/b1nL59djM7tiBrOnGR2rlO7e9tO04hEuHAjtp6z5K1mXvsAb4s/39YGpb2+iFWqhUYJTnPeFU+NnZ2ttF+Yptb6yzevHcR59PjR9s3Om9M3Wzs7LBjHx69DeXOmmh6Hh+M6yCnm5PwNTd9mp9ntnVx0+/RkR3sPjra3r0/Ha8PL1uZu7/RsbXGyu7pyPUQosyvLKhN4aXNj63Tah0KF3X31+cXqg8MEZm+sn5+eoV6PHz8dint4NXBoNfOYDRzY6n5veNkUK51rzB6hULAdQ6c7bHijzz/b6mzxedlob6I/zleOXFX20ugX1EHVSUN+dtrjvjHoDceTq93dvabYXcMx/LC+uTK5HtrUubIRPgBvJprG6HaAIME2WWNxSaEvJDaTprinxtpD8Zg9xNwxoNZokRajrhCm01o1eb6yvAM0ZXmX6Z5bQT2vr7yVYAf5JTmBbx4V5FXz5KHHxeXC49oeT+RyNRW0Y+gM4T3faSWsE5k7xUiKTWsKQBImfeNhcE9J+hH0SY2QDliseZr60hA9KpJCcW3wNEXlSQhfKFvJExyWvIXUyRHamFQGoTSotKF0qL6JSPh2krlU+2XV929LSYhTiIEC1a61/pr16LZqVXks6gk+ZSPRmnd2dzjm8i2jprlll00QuZxXOJn8/Oc/Bw2fff6JXZaWxIsXL8DQZDQ42t+5tPdvNrNBsnbT5m4ogCXM2Xpy0uRoAKFeFUiFVtUWljH48lK6XJucDPMxKSby+knN6pWbmsH1/pX7OtyZ08IleBD4Hgzqh1w/AFVRMiRiGIhap3DBoYUZoZyNS0htg7e+1YBauK+kee13gJHq/h2pfnX/srb27atXflYYiEn0rVSypQ6FaIx0//L+FSqmPWwfUC0Nxemr508P99876jRuh6OTE3r15enk7MWrkfMRepM//6sfbcRQtfrNb399NLt8fLH50adfkOIsL7G2Cd7ACasgLg8mkjKJxUQUBS6is1GvxzSSoK7TUX95OhIOIM7EJycnNCUHD4XV1nY7MPFrgHsJ88vstbqwstFoX45mH//Nz1prLYZcr/xzRiz7Nq8DpDGHSDSWu6ev9pcav/ab33nx8tihxyvXq+srGzic7mjGy42Vd3tnW2DXPatjZbWH6mxtZousEDB38K9t4SVLyni+Ja+X8aHJosi4tnHYXMuQIS3Tqhi4zzVBMcryhPTvx7lOX73WleXe5/JTVTlxCt2yEOhN262tVpsFN8ZFGVzNm5uCwLIqhWCnXlaypmpDCwTyQHCWKcPvOme5LE35w8NRQV9diuPAL92JV1xtNhKqKkPMow+HKYACImc14Sy0h3JKi0EsflG34Ep4vLYZzNsBDYVn4ESevJrHg9cGdJp6FikSlml1i6fFigAoHDiV5qiO7ettjv7BtA/jBvns+Re6rFLMOBbcyTNiLSqcXkYb8C08FfvnF4Jwta7XjlY2GHXHw+uLyVi4iI6Ig5xVh5PT07OdrW1SOw8IG8su2yIl3IzOuoOrs9aHW0/e2fv0s+dYAdCnLuuNkWjkDJEEFjF1dlNMUXHilFCNn3z086gcORmGJl2BQ+r67Z1Nmw4pjmKGEKCExzq/AUrW26uNTTKDfdAsOiNslJ3s9t2If/jkvQft3Sal0MYez47IGS0RMG9IyTZ6yBajse1WhBg7prkQpVwzCAZMhtZTh+KK7ZF1OB08Y1hBnjk2+kbHDLmvT+qN+wof9YZrx/3zSDkFXhRSJ88rqd7XKwxAKOHIKUhYI7QjTcGIUXbeAf/8k/LTSMTbRqmqA5PIBS5SUbjhSo+SOzTEv4C3qAFKYlYAQ7Ihlb6teNw1AB1mWkkKjA+YGUqby6tSVIA7XaLoKYtIIbVh939qtmQp6f6nQupA3T/PDWqKBqu01lKv5ZtQLFOU04aW0KrV9sqOAKlCxzG/O3t6ZXFjvYHjFl3ERHzlnae94eDzTz4lPj9+eCQYgNUycCrA5XRi4w1ZgXk4p8papDtbO7u7+/twATpnB6olQFHnlDPNoTk175VI3zUqbbpvs6b5WUbbyp/30duqx8vgWOYFSDy8H4T6SekWOSyOWiDHHo8vvvjCw+FwzzkHkBR8oam2TxIqsMAWwLg3slSErHCvOuUABlUU+WbejDIX0TWYU1WXWv6OSxpZcdVb6E8+ZQYuSrr7mbLcp393Jckmmayo8mMOBmZh3iUlFyAKgCmmomNHjRBcp93Bw6/sHW01Woum482bF5+ziDpIURScP/rv/pKF42C38+E3v/ne+1/Rth/+5KPb650pdQc9SfrJYgAGkaqFseDeVpuqHMhwvcpLInIhbyACxG27s7O92TroXUzPzk9We83Dh0c2gi5y8rq5CoBfLfZOe7YFN6JJWOmd956/ebVLKmIRmF3S7DGFXE+yP2wy7DrEsbO+dXb2Sp8ef+Xrv/F7v/Gf/z/+q53WwRenF52tw9HVQquz3mO2uZwx5AvQsrW1uTgRIiI85BYBfWAw5mOWu9zPhyhTU8bf7wwRRdO1A2fbOG/jZiwzitmWusJJEkyK9oCBycqF7ZjZq6ibWZmr3z21/vOtWsR54inNR7yxQWhTkiY2OBk0iGWALUSLeR9oRkOzsNA9O6UMhO5BEluOiuE950k5ztGKA4TQyJJ9udF82PqsHp7RDYipK8iFQ082OMczS42Z+NEY+iPYhP6jP+qTWfUUs6i9gDbS9pKA5+N+H0fYTo3ZPUDqXaWt000eIuI0pdLrq9Pzc0tyb2dH57GkIIn33cnZmbWCsaNN4ZeIi7NnGjNnXdDHeEVoGw7tIeORsOR44DatX6c17IlglAasX3KeuOnEm2ZtdD46PR/a7bYmpO7KltNrEkcezue+c4WcrDq2a3x5Y2d0YigaWx0m7IK9lStaJ9hwwb5NcGkQec6jkKNsMjC2vMBcl5cafp6fdknY1qN1i9XEBRkB4AzxFtJhx1doqnVkZEU6HE/69N/xTGGa7aw2FxpjOwtv4g9PBFxbNEek+sAB8GD8sZtbEBvUAf1r3US5OhsTt0A6UAAOsSA4k8sC+dIJNdimBM0NFMpR9MJ1rabZdyQNsk8BBcF5GHj0f3zbSvyCiixKHR7K5hbwFXykjSnZT0A+L7Dgj4LigyT8D32EEJVUMwf3lJTq7ihZvDFKS2r5yvQznGXwbF7OM9RsBVvLkBIKkZMtvbhLfspYq6nPauHuvZLqw7fvPVGadJ+nvs2jOlw02BSZBdfX0gxsTRj1IGKq8cm0IeyQxkNVeGvHUReiK9vWwaF9MFyOvvjis4ODo699+NXu+QXQ+bM//teaas00N9pOCCVahSVrUobM7VW1Pa5WjnrNXSh5bJ2odubRw6S32i+zB9pvEmsvcoVwSjJ3nltvb/fUfR0uzKCgu2EmolDNmUPJvLJSzigCefa+2DYp4o7DXkULWxf71acRL+4bUOYOt+dJmL67Ia01uP67kjbWFoKfmtLqktLBkvzy1/O7YsP6SDWbq+eSPDVn/aoCfFZjjKQBS/9iM1+4frjXOcLliyA27lpV1+Ph2XF3dDH6+cc/5Nonys8/+vd+n56x3bp1PN3Tx53Lq+3BcNZPSB/bONNeAM6WsbBAl3tpPhDxZpsKa7WZjVkw3cbjhx/s736lfz4bjJ8t8AO7Xuv3xuKEQ7DZccxIML29OOnBNsaZdqs3GtC0Q5WImSBeOmeouYqjgNb8rNFuib99M+pfvGxvbX/jVz5s/9F//+z58+M3PYcJLra3qMIG49nHb47fPHuxs94+ZKaHfbg+L9+KewjnanMdnDrjMIgBDH9R5K0ynFl9GtOAdkpAfTp/KMY5tp7rU7y+rYoo0Pn2zkc68Hg37Iq8v89NjHqQ6XJ8NVfXtMATWLa/OGofbtbazZrSlVVnU+2Ax8pjBwawBIXcrydKU7iUcPGJ5SM2my1EG8wg4rmurdE0oneaBj4dYwWb8+RM1xjBbi5tG1A+gYOHH8DeO9h1RgieRcUkPjC0ublFgSD/6FqECMZXKoQWb3uqDpFxtXDYH3luRaBM2g/LEEQqagot7Pf5SmxudU5y/tkaisItipvD+TnyQCObI3SJfEyWnMhJmfa1MCPZmtU76R12jh5sP7y53Lg5G10N7F1f2tyyy/jo9ckLjYX/scSX/BSx8gJiLbJuNhwZamPA6eBUxBU+kpsbXE5W0tRrliDKWo6UwDILhCS32d7QElFKygwGx1imnnCKYMYzIXpt/igGjawOOvDaVR7gg71YR2wRkevL47Nj0ueQi4qO48TXW1O71KbX2+0tP01R2YWUkMJk4Y3l5u7yzvXabHCBYwJjYE17UEZAjaLWgE13y974mnK1aoqp9VNy71qBKZNXXNR8mucluDhYALjyeDvHhRUdwP+lQLbNhRzFZ+mX92gV1UZpAmRSAU7fsgqUg9pGs0eRhtkPtztHtYUGKI+4kAylJN+GKy49UXg6lfGunxRgju7FTcE4RXwq9dZL1lLtGrB3YyHdPcnikZRmUbpK9Zv0tDz3xCeSm9pOGfIW7yFH1lc4d1cZUhSu2V0wNVEw4ZmxV6P+AByIxyUwjTOfSavwl4G1Pt989jmW6unhYffNq2mPSLLy6rPP6PeYVa3MuJrubDthB1dWxx+rQd1RXYTVZbTtGpHwuJSSofNpay61X5mO0oXSqFw0Xhn1pzxVFKv5/bwfN9lqHvyp51IRTo0kCoQqoF7iINq8gYeicZnxNupeOHjQ8jN1Cy8taakVJ3uIgBqTpgbmqvCWIrP9prAdKioiUa3ub1/rgKdhd+9qI+uv0rS7wb/L8Lf/lkGZQwiCpBvQvmyZOispsIZ/IkZkWgQcPXzQfrjvUITB6ZtTrMaTh0/GZ+NXg2MKl8nlwm/97jebG6RelpUeP/PpqMfDzjERZANKHfiIIIDNxXqz0IE4dGS9s7Kx3dzeQUpyRFNnY2935yFu6+TkYnp5fr1Ia7L44tXs4fLO4jJHLA5o+PYFO4NyjMXWFqzXHfaefuUrInMviCjZbJd9h8KewyQiCbXs2xl031A9OgL39OTzB+/v/ZP/6B//X/7P/1fOZqtbDbHhGDGu27aIbg0vpw/3H+1tbUOW9ssIJrG1t3/y6mUFb5BgRdYB9KTOvYeYrPoQWAJgkGbwYgcGQaaSkZ0ScFWMc6gFTxONT3ofL1BLvVKssiul0La7WYtLrRnAtusXwz+IWWsixtPlh3tqiTxgjZYFhsR4DsC6vZuLHhKyORpTzV6gVUxTkAOEpl6qunxVbLSe7Ozt6oQxskY6HcGXtOoqSFnIV0cts9WRwBZv6Fo3tjrOdDbU1pMPLVu9ssSEbA/k0+5QmSV8DIGULuGq+pSyL+MmTKjBMUpiUV5c9GDewrM5YrfNTINcCDBvIYiGN8SR2LLMLj66tOvucmyZo1PO8lg5Oe3DCE49g4venJ4snl/uXJEzb4+2hbx9et0ZiyIY3/IBg6WjRDrXk1cLzky9dbbIshgohBocEYUk0zVshbowOI2Q/tv1DRHEwD5S0XAK6Lb16w3GlmnqzavXqKndoVaocE4mjwbRhFrRBvFy4XIYu3hxLygoLvg92EsPZjYb6LJpslNNEgCBYOgwEzIc7Ga/GB6sXyZCzSxWjjBrx1tyhdLUdgeytGNoJnZjQSWcXhC969myIFnV8akucpUFdbbbZhHa8lCNd9AzR9aG3luvLBn3kYWKMuc+M+jL9FRyiCxFWE9+D/MV9xHOUX5FG1AfVxxeyo/g53EomGLnFKPIB+l8SZ7fW6JSqHURopYqci21WFZ5U9L8T7lXQORPOCeZZZm/9DwKPCUXclWf53VJMku/cF/66Eltkpv7PNqBqZxLVImnFS9BSZ6U75OCiN1h8k5fv4nNfTTiECgZfBp2iwdkKNqvSX/t8ePH0Plnn7FhfQwxvffe+0av2WKATVg2i+dqkmmCEKIWuskM+okKapIa5xTyrqelf/MBl63myZiXDH7WxVz7Bc0APUl78qqoZO9zupHKJ3oXwc69J/oAvNwoCgQlpkkCJZrWcoD2lBcvEBJwKHRRRXqBsdJUNxq/tx9nB688kfxx/+9Kiq3DW3mCms1D6W9/V2ch4FUKvy9TG1JRSaVOa7rMkqWVRHfnrSJ1Bzt5Tc2z5BCFGw7TYo/C9AtnF/3j067j6v/R7/32zn4HK8wV6uTsOYVJa8Pez6GATcKFJ0wgdx/EnZkAn6jktYVGa6W1vb6BVu1v2kJbydX56emb12ei+Swsz0TDceKiiCWdTSoh7gM2V/oe3qWpSP8FysM38CjrnZ/sbBzAqsQCKNX4Cx1jKypHJ1rlzb2jlpDYi9f93puHD3d/4zd/9S//+qdipTswg7+H+blknkG/HhywVzmR9tJZTtSD3S4eSOcNT73ej5vxzOiUVF/5CeBZI8qSXSQYYVni0ZJVANthNKuAHl4kzEBJCijjX8c+V69cE4UvIISHJHMmhoR7EdNphMhvPvXWtx4CGP0FqMYXX79xyx2ahnCayOmtRFrSIpuI4FmyFNGKnsBfEgaa4ECqVju+A0p2vKt6Nxqdy/hq2qCfTZ05QqJsO6FvA9h0YjQcTuvQAJvvLpxuvJdTGUkn1i6PDYHTEyjZ6Y5jJ9HQyVsdRW9ZDlHTZkISdyvDglXj9KiFRjIbbcPR66VFcGvoeGwUci7Qw4qYTli7/cOHy7PbTz7+6PpYrI13xMDNmVkrzWWu5st7N+NrKB6pXLwlm2jpjfOpr0UOoV+7WRIXC7jxgPjZpx+rLoMZ8xibcRtdX7b5bHUFcTKM/cEFEEIpoSZDyi/Rusw4w2arQS8iO9E04w7wB3BNe0Nc0wbRR5lGz1jpL9bTTwxKmb6QDETSYoH6kQwWJMYqlMIYFnEuEgfOIPYwchuGTDRseDQhA8PyuPEv8fSE4VZihRjvtA/W8ERByVqSG0+kmo2e0j207yU48JAoYBrgpfpV+kb4Ame+WAzlI1srIo1OwyzUNLUU+AvIqDzJkg7jVUAfxqxluir27ZTMBctonm6Ub/O+3FhI6WQWAv39XUfINWHvs4dYtoK33sJctcBaTunrvLb6pNSSwmvy7sti55WWtyGcN0YWYZZnWXz8klJWGdXsyio8J+6Mmrh3fsHt7LLXn5xfWFcQkCkI4VlaIbbbe//Rj35sGk7evMGqfPDOOzaFRBxBr9bbZkt+EJJJX1pxfC33PAdZa1jvIoK5JUQsL54Qak97qrNeGbY526EfnvukTnbaWZIH/rre9Th/65P6MLnyIBob6nTyeBW2hBLYaE8AcZ4AQkGieF0X+UmT0C7lBBAsqDs+F+OrHJl1jSeY5/KoRcoE/rtT2lRS7UXNePekNK48qk/AqAIDlHdlznNiblIRXgf+Y0b2TQiwt+4CiHdrwQp0OC2XTOcA9Ydno7Kf+8XHn3/x7BXK/h/8T//xxu7G4sq0PziZXo229lqj7pAHBSql93CiXs9s1Gc1ZnfkEbO+2GwtOeN1e6/d2d+0D3SxIf7y4ss3L18/f3Py5nh1qbm1s4fxPz+mHer3umPBwW0XCttgYBNT0ehenZ2dHr3zEIIIx5+Vlis3qsAZ7QlzBDbfahYp6WrQ3t7sDs6h4N/7nd+gk/rx529samJpYjhwDtdSc4UnGyHRQEFMuP4yFBlEo2GCsDNuwEqw0h2PmOdvwYZvqUMwIVBi/NEaTQyPvsPFoNSIug8NCu6AByLil88z427KfWoEK5A+2kDrDYnn23Al6EGf65oM6pW0RH9l6w26eD6fUOvlZ6/78OFDKBjR1RFrKpFfmu3tvV2hZD774tOPP/6IpcbmaEHpB32GwyEX4gDz4qowVKCWcEbIOO114S9eMVwwHOKhkWhMlFwSE9DVaJ1ckDGn12rwm+eOOLzkAjtEmLrdiR5GSEKo9YF4eD0770alT52LyClt1B8HU90sttdjBvM5+0+zQYaOOhSRsKXq+mplNr49Pxuu3xpE6laatRInAWPMUjW53bQzghvT9ZhodxEvmwQJG9NDbiwubDI6xGfdiIH9OsKsesaEI6FkXfBn0HM+iQAGUkJXtNnYauFmexuZiB77jjnQi0WGpOyFA1cUEUHRmdACePxL4r0shMcaiTPepOAmLkTRl/HwxgOs0M9tOhJMvCq4rtFEnjhfqFSh/WHvHKV1gE2s3fhE4zcHDONJEx5sHugoqQDA/KInJs/bLxNGk4KLD0pZxgHZgv+0W9uJfXVhVyrlczcUSjIHR8stf9j9IgosrUZ1krf+waS5+gToGj6WPJXKb1lk1am0cFyuNszIRZdQ3QMBOIwis1oyIXfJk1JaloJnCtG28MJRaICBUnMKzVt/JRQ1LS+pfJSnKRlplgqBnpdZM5n+kvKyJk9KXfX93TOtVR1P+uhDsQ+MDOUodLAZxbxj1NYBS2OVaB1VbSmN7t3RjVbaF/3h0f4eMr+5f/De03e+/zc/Jn7ZRI5DpIJAw2DQwowu8nGwVkGARjHUWro8fIz3dY5rYyrC2RDdU76xUUkUyfG01OT6EOiZoQxw2oD6JL5XUlam0A8lAEo+974Or7HJERvOWY/DjmRNbu/uuEF48D3iPZ9edGHSKScl+2G4+Ii2AtncGdU01Se6YYhAkefxHLqbUIXb7l5rrDBKRaZKw6kN5VpmWR6QlYzziQiclPv5pXC4WqU6sAToQo5Kv8Ec0Ay6VSwlco44yb4/axBExG8nwCWByAW2nI3VxXX81uzq9OTs9OXpm2fPPvnZJ8SHJ++88+DJAaA+PntjP6hwCt3TC2gm68XhEotMOjj2sJIcoMybvkIWeFoa4MiXHFUub0jPtzf9l1+8NtqasrR23XKO0UrLUTDs/CZ362pjQVAE0lkCKIsXINANm8HC1769yWBAPicuMBSCaAIWM85M0NyLPn5mc3cXLet3TwlxzPhHjw9Is/+jP/yNi//mX/WfnUwm/fXtdSdr8b5bXGuNB+eCZNyy0IBUw5EBAyoYMIIW15Ao8O7nqAxO1pY1HmeLIAR30euYFDgxlp6wUzTghrIsLEQvIBfbvpS4aMory9DXeVRAEC+OAp2dde3OoHaDToM8mxuD/mRnh42zxLeJlZ4eWcFEpyE3pWzsEb58wgpFkRbdo4VglqGop08/+OavfNtBJzxK/pP/7D/9s7/612rd3mX7z0BhFAWUtwMNap74Fx+5tsUw6o7Q+cP2AwzFuE9ii5xHhOLRQHF72N6P6zgXwOE5AWBln1+kGqnBr3d2Dt4cvwrFZQocxf+Fwi3S3eU0RoBuj18+zhTY04vv7m5HYZZBybqEdy2LAOTNwmarc9taFi7gRz/+wdHW/vuPn2wdNRpnOYWH8YBFCgNueZuXur7UQtzqjpYFyZl0r2eLawN/ppOVX/3e2cU5N+MTO40TXdJCLGqgILmrdnOF46WJ5KUClsoOCu4qFn3TfGhk3F6IotMphmZJWIuCmX0ZtB66R38QqyT9M6YZPTbsxlxnNSzog3SlkbiLBSBNh9QhvfFTlzxqzjiJLUxtX7yyhetywWEA5QgKeJFgxQkICmE+gCUcKTD2DX1r87aJqpJHDWegkEqPIGJfKoirmyWZdkWSxiAHVcDuUYO6TcwtuGm+p6eQmXCqIT8ayLzeY1MxJMi4AkRGmyBsl2Y9XHb0+FVhCIbRrbBLYNu3WbS2R+hPIfX0JzFqWTzhgYJsJHJd5DSaAQH3FqL5tchMpL3oMIHRL+gv6C1EKu0OvyxQI3YtUqGsMX5aYUFLFTcVoDEy3I7DOGQpOaeDolZ8PiA3Q+TZjyaKxj6g03af+4SATLWK5bOunX6mdyi0maNKw9mBWuQia0yFRe4mYn79ww///F//q+WbWQ6vvM0p3OyoGbcZR+NQnen10sH+niCVXJGpoF+/eAnaXp6dMX+I8QwpA4e09ubmRz/4sc3/kjGhu39wdIS13N3eE/JSfAKRfbmiDmY9XbL1IfuxCmnRO7OvX1klfK8sgOV1xmhssB5WW7duGhbUEaXJiBurgnM0TyrTETWC/rK/gfLPnz0DXNpApXCI0m7vCZMwazAuzOwh2tndLu0NvjK8Gg++edMpn+LICUCUMMgZPYJxgLxMH3DTPIBmKky8LyNeUKSBxcXFfrePnPI68RhhTeGKXrZCxLXOpPqQE4uGikOqv94CpPAtEQ9Cm2ZhovBtRI7FdR5PPC6jabOlcW1heU1IA/pKw7F8PW1crRyuNR6tr1yevVi/XLt41f+3f/mJmBYffPOrf/CHv3cxeEaK4hwjdAwYsIXlctjTCzgFjDurJkHYjaWN/pG/qekIDbgnG+aXbEfsT5w6keBbNgRaj1u7bZy2A5Y3OzDw5tn5usEx+K9fHR8die69ud7cPD09ph168KhlqH/y8U83djsHh0eTcQRriqne6YW1sCJeSrOpP7jDzfXGpYCBi2svnv+40d7cOXzyT/5nv/t/+y/+mxcn4yVRD6Yre1sPbpbbp6OzKWeu6NNodVjJu7qvMYbbWQ+T8YifH4YdNjfRUDk+mwDEXI9/UjUOV86NdtNWJE5vMC5lMMoXadAOSqdkzSYWCexqvq74SK44VN7A4I6EYxRlHp3NIoUkd3bjT8iBI+cBqmowFiN179GhczEfPnrU7Z0OhuejyWBl6XbLaJZVttvaEBDoYHv/vD9aWW6qgH6Mook7Q7OztrXbGE3P/+Sv/9XL45/vH22gpjZHhyVvoExbAlI5OJBxzSH3HMCHF8ONrc13H7xnzY7OJ7a1Hm0eWST+ddbpLWjxR6LKbu901pezZ2ObN+fGBokL00Cj5GDCBw/2kZnTCzsgwy9OLgcoP3qX02CWFh1mbZn4sNs/2zvcxCny2OcgCmZ4UEJfUUCMtX1qH1WjeWj/OPeQnYdbjxu7rfOF20/5FA9aTr5faeLqnHacvbX+2YTeXJmc92y4bYumsrp0tLbV5Qq5uODUEuLqea8LU60RBddXueKQ9CCHm+su3Lyzu3lw+PDlq2eIE4uyhU/bSWIX0rez4eiQbJQE5JYPv4zNVnZnFxhIGDMqUQ1mZhTIicyM+YVUTGPkVNLi5RUPz83E6mdO3SbsdbY7Xjl+zFoQiHeJw//xmKsJ7HT2qovm2igr9q1ovxzc7QFobXYENsIVgYzgAiAiFSxU7ksI8IxrsaVbKsHJd1wsRCMFb9wl37oNCpPupU41lo292JugQ5/kHwRUeCiEHSmAS2jZSwWe3isAa0tkhCaVW+vyU/eiMfRpSgmyTguLQjJ1p5KQMrNen6c5JcljiSAHuWD0YcxSRCoKIots9EtJ9wp+TvPyeUn1LoVgE4vLiQJqpRpp34RceeJfGQcYFl9qJHXcK16rlqBgZeiwvcDDk+ckRQ3DuiLRcDRdM5TJ/d95iKbFMp8Ks0MCn83ae9tNSmPKAAqCsp0Fs9s9P3/9+jXBLYUXVj1LezjmQI3zucVdJAwuFKAVGQB26sL1izMSrwLTr5deI8NcUK0UP6H50r9wErpWO5hxStYAQeleLvBt/BIdfnjJar3+ta9/oCU6eHbidL1TZMxi+5sf/aTT3tje3KRdoZVWhkmRp3DcEbDQqAxm0e+Ttu6nz5M6fcScknCRoEGn54y4RZ7mFQkY3ZFHI9POelO6VqcCjxFaoa9gxACnOK3HFMZF0yyjWJZGKDBvXd4pXrDOt/hkq5AGW2jy6611kaITxg/T7ZBG3ML27u5v/97vMBZfXo2WebqYPFgbQVJ6+KpwVkWiyhFGQu+QVALzeR1v4FhI8De2wsSqlcPbon2IwzNaecsaIiz1cuNme6fxRd/Wl1cPDvcMQHcwlMvWH6c0bWxsIwUGM+MZ3QSLf3hzUKeuSPaUrVhJnC1DzO1liwcan5GZaAarne2jX/3Gu2s/P/npz1+vrz/E+uGinSw5ENHckQ4J6lHUGLhxrGQGVYooW6c7cF62ixZOBp6PkjAM9Q0j/7TbvV5vX7LPGAIbcMyqebHgtN/qzKx5kiDWmQwVmMiAmgxFJUvvbYMtYhbvS3YUhfMRGMFoVzzcoEUBf/mnrFNLiKuWEG9XPAFiGXTsL/jlbRSllu0fQgEvCMj5/e//xU9//oOb5djt9x/sbO5+h2XOQhJpq73e6rS2OKdofzgI4n7Z70sSfvrovdNzfudcRWzMwhmTwHVXSw2FJdg4O7ngpYtjoMjG2EdFyNOIe8ugR3rubNvQtehEZqBmkaJe4jThSzQMU2IwiWIIM4bv4GBfe1iPYKYilWJf4+u93dyCHwnM+83dSAULo9MB39DWegSPLOAcCW9OQE7ZJzScnC8wNtnfyMiaOOhmkr3Y5j2OIFAQnn6JQhqoD1DWRDagfseFaMMQqua3QBq2ItVFRwKIEGZzFvlKIRAJ6hLeQqNirMrzgiLOSxQYc6fx1rLn+oi1Yskb5PyRPqhHrpQcffpo0h/GVGYJEHQOrpsPrlvtS7tEbVzDfsQkbByyM8JmZlIayldOysqSMvB1hQcYS/KzpvrTFYi5Rk0dOLt7XNBxfVW/9VUKvHseSKT7gKkDqSmhphQzn3KLAPJUfoHfWLNln7dHaXdf/B1/aws1yXhBq6hE9OIaWT5yb9QUhpTdVyxnMZrIXmpUZ1II9n3r8qA8frvKLLFCApVZpMFUlEcZkSBfH+XnfR4lk54JAshticCEn/FJnUifurEN3lsIEhkJhodmimrTYGmkbfN4rRyJxFcTHlrOKcOtBfLV9mA2CZnAsTlTICpjWGqBx5LfObFC6IRFnkhTrrUoNsiy1HJ4O31bwhzje6wzq1apGe2IGTb+LEKl7Ns0EunOfZp3EwIoHfSzjE8dt4yT/pMaLVBeI9rmGFu73/383q98O7G04xB18Y1vfA2YOmQIxhWl1CgpHzS7Kg2utTYigHIOwrzh48EEJAdwbKxNbD3ZNMtnGeFwLBpTJ6lASHkYuPEV9SRUD3vpmvEEHaXNNOOi7eUMH3NSbLuWD9esxcX1Ju8bpp8Fx8IVzE4XEzFigYC20GmsXV4N15cu2ZxJKZz5h4O+Azs+//xzEQqevLP/27/990wla8d4Qj5dTsAb00gvsJA4I06gg/60SmepTALieOAQAhuIsuGGJkrjjRIh0FLPcYLOWLTbUhMgGvpHs7Yw3dptnpwsnV28fvRkD4JF8uFXeFHALdtCmTdIEv6BBDYGDTWeWYZZCBA5cyZ0FpFRDDbH6jrTiWfB6MK+2r3vfPM9J6a/eX0hZHjEuiXbcQiCgFxIHz4wRl4baCBstzDA3B7gxkQmlQIJsc1QnFxutBiqWgbg9OK0zqzxR33zzd1cVFgCbb7NxJe587Cm+5+kT8MHwdHKxQ/YKRj9kYl9sL/r9JCR3W7d88+ff/781eff+tbXnz55sAZR2100mRDKYToKD7p0pBraoxn0MDt2MARsMLOl2S0LcRQh1AxaYk2hL75rLbQcvuEIW0OH1Us0kfUtG6j2Dg7Pur04KkNhAMrHiuPxRqsH2XN1KRp46ISEki3eMZ4pOO7vnBEkI8AfPXJn9JlbXP4K7AfxEi+sCzekFpKWzCDBT0MLyGMtYM8K77fGSmbB2p1pTPihXw/XoPjxTWPkPJHsSV3kU4eiU0BxH13d2Vgbt69u+o5mmFxeC4o8o/S3zpdsPR7BdvbwwRozZz5f4YQSIlfpNjQLzs55E+AABNNhCZoIncGaaVge8DW6RtKCvupyM1zuJRmQHw8De84NcYTXyMnLceuHo1apDeKAZ9MufYUVfWWk+ImRIqNMhLOuOC4aQh6OI0GBr5ccgw6zRL83gQcx63xrI7PdAY2b+3u14sAy6gAgcDlHT2luRIf8vAOzgrXfMnS9DXyyKQT0aTHuCV6ND68Swp3Py4SLcLtqsZpd4g0xpyJpT54X7Jav7n660ULZXI20a7KWlOelaX7JYBx9bphrZg8Blu9KFRqQD93fp1/66bknNdU8Sqs3mboyEPd56is1EuJ9phJPNLTS7sBraXMG0DpxSgQl8Ux4LmfEQV4xyYoZnSAs2C0sH528jTnxl9EIWGcBn8J3a+fJk5wrXLwHM47LS87feff9r3CTZR6j+lnH5QkeAQbILEI0aVC6GBUZSYFKnPRE15Z5ReBZt7BcwkbbX18MNsleAFMHK96R0cP5sNbO3w2LblZ4MOx6Z1lutTa2tzbpCMAuz18bn188/+L3/8HvMczgLakDrGf1qsKEKtbnvg21KxGgywNvYtjQAMmKQu/LJIVamb6sIWg4xKiwJh6U16XZIspkN2WR/9LWOimuYRjL2ZXWatyeiaeIj84PrMNoB73OeTzUj9QIN4y6iCJfZOz6aHHtJn734vxdCz7RG13PfvjjH/O3+D/+n/53DiiiXaR5g6ToNvgXYEAtSsNqHbIfU3UKPUpg4haA5MaRtgAvR6pEg2tgGmlesbSQRbHLr9r0GZOMt2KkzThRXo8Y0h883v3058/YosUK4rixsb7VbDccrdda7bb8XmHWXnHg7OXSjaAmFpSSM8M4H1BCqEPD7XGhrnPGOWOH0N8s26fPW1uPvv7eYf83vvFH//KHnASbq7Pj8TmRnD2mubeZIc4YllUMogtD4GqaTI3yYWY39LcUv9gU+h+ChV5SHrS3NondOCTbOX0iRSzLN0EBgKvyExW8TYGK6tWNh52NjV63a6uMGZ8OE5P38d7etqPQF3m333TPXj//7JOnDw+uDnegV22w0EMBEsau8CURL+kygynUKd3zo4G62c3sghZTyKhELHNIhxnHPUDT52e9YXyvLx9TIApC6JAx1IkEDFpLlHvOh1XlpXQiOzgFvfbhKieig4R+xJzjAHgxt0SiQF/6qFfkEYRnEScqJc6ZtaDJTlTRcXBFY6YEySvYX0kO1ZjcjsnEF4NzoWeb/FYg7VMePGsbS0cjLjVB/kTTpTbJJATherXT2n5wsGenQ28BWesPJ6PGyqUDEUN3bp3HaDU7jRovQ5liUBiYaWHL2+hvgG2Ww9ICIxYGACaApRBm3yJC3CO5KOD55dEHDArihMoWoTC0Xx1ksjqPfurC5lY59mxv20DhtIajbmcNbK9wVpyMBctsLI8XxY0XiNG8+9wRX45iaTTtcl62g55O3xZsGoOrZUjKYqr9CE1K8lNykwm+uy9vckEX0J6CpQt4lQwAUeaa5KlfueZJhqPEoytPtQYH7UoMuC/TF7UmbDC8Y9BLXHWgXeoJQ2r5JThtnNY9zr2pr4QWDgiTDrFKEITpD/0rHmu1DYYM/Ie4UO/4F2G/pPBLX7ZW5rd+omfzVx7W7Pdv3ailVMdK/+VXnudViTZbq84AlHHQZdghLSu0Kzf6sba4c7DfG/ZiJlheGsf1xSlvC2Ip8hCj0wjOi0Mto3q0PCAYfG+HmUqXdcn/cO7mzuoHzRYTOiS1t7P78NET9sLxhIU/2hZIwghEj5wPE2qGropJ0hm4sQ6iFuk4MQuVMnzptWaXQcPYzyV9tUVJ83cmHCfhjCXPOrPq0NTE04wsd7S3p/E//cnfUFee96lwLk4vzvD5RbRNacqXVEeS8FMtNtLSULt6aGq5OCEGFmEFhgTJCbkyfKYx6LImw+tZSkjPIhkkfxnqwiL5AT8LMpNwgt149PUhMzMFYHZ2DxUIP5OwTE+0c6FUmA3RTlnIgBrsTi123eQRcjvFD/yz/+Kf2TL0rV/52uHDfcFxXr76zDoksmLwJo6ocQa9QY1AE1FKuww65MlCCU9rJQWsEYhOLT5jJjnUla6G4hGWu14YC5jBEL6Okjmfk/cEp7VloSnWNndavA+cqatw/P3u/uH52Renp7YEOYpmm7rCPIMBq8GoYlcyiepDJTHe9KB27XIEvhIDNkK2+Tx79TlXgO2Dr3zvm+/87IcfjejdRqe3Tk8nQnAa9imGPM3GECio8HZRMRZGrGAMw41PWW+ub5VTg/tDNviyyy/kyJqL5sAcGc+MRJkjUyCl+3err6ybzJE1auAqCFoUKBU96XQ446Mtim9jcaF7/Hpzv+0Qv9aauEONg53t/e0dWiXcP42RZUkOtOuvrHHk+RJ+K/gKH4iLZFFC/oxRQNlOEgAjbGAMMAjb7ZSbRLPTevX6mKYdcbL0Wlsdmt9wOWJZ0VeX+GgkWn+BmXWTCTbOkbE4Z6CNwTF4xPXFtZcvXz99isIeUPFxXyBnoIpnx2eYDWjIrCOydBzYU2fumi/gGqCJXQR8RLVOPcJdotNiD5udCKBMcdns3A4ub8+cgrKxEEMaxbrI7ukWzA4nmn6air3d/XeJyWerw/MXo8t+QBg0ZFCy3TMLI8CPc814McNz2zJVdA/FrQoQxSIOeHAeJj2TFawPCWHk4htC22kq0aqQ3bhUzCNup9/FeUrmgiHX0D9yqgEkJIIXp9JkL4FI82IyNSkhCU03w97wpjvBNazyOUPgF2gO1kxE1v6yWE2Mok5uXl1uRQCII3tNBWhy0ZTAXDkNqCDWPLzLFZyiY/pTU97NASwP77O5qRmCKzFT0J0BIUVWKlZoQ/0wIpcS80FMIoHlrOZUJLn3Rs7607Xe16tcWR13lg1v0/I7cuVLtSlK5prfW+rWeh/8nJaH1pY8wXdvp7xOhlxrvblzX8hVmQ+hViIEeOtDr3KFUCmkrHcsNhRimss+NooRA5H1xOWEvKzIVeTqoM92TZhP2LgxZQRohWYI0/Z2iOVsrcPfmAu4omYDHHAqnQ8fYTpm/wwfd+cdAUCFqmbM7DgSdKPZSghQPJQQP0xiODBKqFZkLtQ+Y8osglGz5mLnQC0W6YiFR7OJL0KAolRkBVo2OqVq6ODtwamDZsb0nZ03rg/LNtufvfjo5xZN9EJsQOtsP+3ZqP/T1y/e//pXbxa3j09eQxjmUFE+RODUov3GEC+mlsB1CWyhAZ5HIHe9E6R0pxCh8DcZ2+I3IaPGACJg4t4XkKNknvJcKmCJDSSSwp4rIaY4gEy/l+cnp5jHMTZ0cOFDm4UW13lLXLc71FsrM0jMKTtCVlNKEX9vZx/9/KevX798+s6Df/If/YfOOZpMe+2NdeG7OcpY51HHOZFN7SAefjCxTGA5H+B+xwiVjRwqx4z4G/aYoCGuHx0e++Nw0ldOtFP2zw0nNsly2KFnckJeZ3dz0J3R+9DlO+p2Y3Nva+di+KYPDyJ248thgjKFSiyHZs9XDfXdEsVjoVUBVLdUzBYKlWt3eMYABne1tx9/6+sPP3sxmA7f2LM9W7jiswrzh6plLWayzAKuJ5JRYS+VryKTJTGgQXXkA+GF4nkEnEqsZCOPBohWzpiqhEw8xuuubWAuRZdkOMrqyY8yYwukEuc/KQeR2Om0v/7eB4+PHr148WzlRvCgW42k/WIu4v89GWDviQJOpMAdRcKIkS5tRvWXCadgG0wDMuoGb0ZTx6yEFlR4A6VlMm7P+mfd/sXYyYWIBvS6eNtqtxtcXpqrmydvZA42v474CMyo023yTauBv1AjpfWwc4IFLeToGYPz4sUrmPrw8IG20G2CDeFpALA1he+E3I2bYrGhhk50CV9ZcT5EA7wNxZpOxcK2BOwLU6nTRrAweomFQ3JBsCjwcLqNCGKimAge6ZNpl6PNoSDsa7d9Ifu7Btno43mzeoI9I8PTL5KjmnFxpx2E0cOBoOkzpqMMV3BaYiTqsOmQO9H6YvlmkRIT000MioQq8+W+rmX6TAs/bbZPcC7+ROolm1nmqTQGD/TAnjRWcoHk0fqN7FS1YuJ7iyEhY13RydAc4KAnQJOzDK41XAC1hw1qZdGq1U1ppaojrEAGnqSHwS5Z2AGluxS84JOCrZKh2Nxc56mUVvPWMvVQCV+CZ0E3EE4xQRTV0B0J1LH6oTbUWvJhIdquSnOtGWqTys+i0rK0UDttrbVDGDUjrkOKYjvt9JVi67f1qrTStHxXPk2uFBsD/pxevp1fOVIlw+XbFCh/TYqX2czmVWmAzGFw6kiWUZTBNjtgCiDe+eBDxm3KnyyDhNCxPccZVTDo9POff6IW4UcpvphFsYuYPl0BsXUbhDJVDaSqBhkD6B9FErsWsRmbKQwPeRtrbwFzKWjYim/ZWrrU2b0hKFKmo+fwWg6M6/V72FSt1fE6zsrXi/tZcC9lKO5hISwhAsF9w2Fs1/YHvfzkk8lAAO9sdoYRYKnmlhPXHHj7+OyNzfuvnEZaEB1MR/jjxM6clNhLqGt2+rCwM6BQnVn14em0BNZPve5iaFM7wcrUalZRJ7pqklR/yklaymibu/Kde4DDPwJmschJmDzZskZ8gUcO0buc6D5tjEUl+tDqypXwfP3etg28yzdr7zy0ob5THCxF5/6zP/tjThC//we/0xYKoZxlzzTZ3LAdlYdnvF2UTGzL2saNJiQtVRwAQMSNqyfFsFZGNVuJME0IeJwAQ97wrT6H8VpMWDerogNQz5CgU9biAv2Jdc/7AhrB2DChbW0fjM8GoEgaX423NoVI4Gtny054iwyK+pQb8FMXJrzgJv63EWgpYnhmTi/OX0wurz58/8Ht0tmb41lv0BeBaGk1mNT3Gd6I6XEZKXrujHNmwVKLtwhyzItkqds9J0ZAQ3xPyOwZ/8KOYBGQD2AcWhVwyucpoaQ0sCTkxeuCNjU3XKtIec5QAbwLq+vjxZU2p/+ID1fnx29EPhCTHqLAmmVzRjkHkP7OkfKirPNlYDQbiJ4eyQ+oFrVBgr47loofnHzs9kRrfrhRTmBy0toVSqfYsZpc3m7pU3Hy/GrHlGx2AC+t7J2eniZsq8kR1pF8lyKgdbJUu+JrwsfNkMIgLubMZ/atHb85Vc7DR0ebTrEg/LVt1N0m2aND1r5sNixC7raCMHotLfXVSeEGcvAnXEjWWvTP68cvjzVvUalhjRfaDhhuO9sCwcjRmo4BFmeC9rLBJTVYh14zZyfSoa6vrO+2t/mw38yGWBLa1EVHSimEoB8cIsQTw/AlmZ6CCsayvMy4lWjQ3KPBGmkTlcrzkLyR5ZJNA7BNJaV1fwXorhRDd7JX2qI2/NFkwFKYRxCCUw8EZfWqidAPAY5pmxoUCeKebi6IdbizMuHp43g5uyCCMKtgI7+6srLCm0bin4sFRrYmnZGgzAJIeYsEeOUnkIsW8y5FzioAlzyFEtTSCpbO1x4aAt9CwhL0DSiRglpAWVAFK8kXHZ16lB9EX0D6y4o8UY5rbeF9XSmzQvwdEao/LQJ1yJyhKyKXa82sZPBdVpxcRY2J40qjvP870l1Tf+GVBkAQxhGEuPoppTr9LcJWZOusgTBKwRCJ+jp0DQ2NCL7oJAh+B7w+Nw8f8Y/wmS/4D6+ZXXAsa0zGt+K+xM1vNOYgHk+yyPpr3DkdH6JhJh7YsnwExReQcdWwig1YnJdA7oByCrKIDgovr+sgFTj2B73YnOkmNppahdk56wrd3DU+VLVq0X7QBxI80UdLO2NbhuMtcgWK0Eay1dL6wjJjt+O5eCrwg+Zlz7oiOoJzmB69++6bFy9++qMfXZwPbXcF/2ZTmw1FRgz7j5cHJGUqrSc1Vq7FvGB+zReMGT1uBKaoJWW2Dai0JcxZ0Cd1X0Gkmca7uVC4FIyY6/y1CeJNjL7CFJR44TKLeGelUqoo2TphTu9Pes6VH68Pm8uN7dZqZ719ezk6fXPcvbj4wz/4zQ8+fBpT0awn0BxfjMGwz3oEBWpNwDwAzj4ihgc2QRO0r0Aavj3v0iR9jJU9Wxt0RxN1Oaq/OJvYeG0P0HILdNGyhpajH2iM4xJtr7y1005jNxFgUX5i1hZUfEQdNtrlGuJNnB9iEcy4MW1D1whGQlNGdw1kVBb0tMRhzRTcjGZDUZqOHr334HCD9u7jz5+X02BW2Z/MQl1e6UNZa5otWfhSvZEHHNIECH2UeLRsTiS2cpoMt0Ci4TXDfmYgKXNRyil/6wW10H9vMgqSTCrTfshSOMW97a2T4aR3dnrQbD86OPjzH/zFh9/+5ve+/Z1Hj58+evepBbG0dEoGiFM8DC26kaPDHEkZt7coqcAwYqypBRWYs2vWHIqN9mabVtCWVruRrKGEclgWIbaVzYwzAvjSeDr4/NknPQLowHEhjUJX4h1AOACgZoSUY600bIYNvdO3qtC2dpBGtp8JmyJ5yd4Da8T2eU+E0sDKVKrgC+OmWAuBXIJb2t/fJ53ExyRnPo5t87cn1+LACoh8aDcVEOcPsr7AwX6LaGKLC7unzDSJk2xg40083mk2MS7jpavZWpzR7abCgTo1etgfh2ITnLOFCgAU/s8+HIe82AZE5eN8ALyiQ8xj6QCunmUlmghPtJZBw0MNM8Xa6UbjzT4q5W1WU0nyhz3LxiVnC8Y83xZjHt4jZrHuzmatHGzJ274oA6k9eazgxYKf5EFpZraUhOKGIbIoMtRs+MbQAg4NUC70ZDSNI9BXm2vgpsCotxV6PC9QFeIsp55UuV5OT7RbBp8UpBMq5X3hzaIADIgakaLR8hXMguX3MCVHyW9IoCT0KWtdb6unQNCZSqN3j1rWBymykNkygsFCEHba450UrTraw5yiRn5eEXE1CSelVZjW6Ft4mJSpwL7rGhBLmwsuq+1XjCZ4WJPCPUlnC512DYs1m/GCe/bsGTHCPMlpABVbUkwyjx49sk5evHrpydGDB+7liYosaIlFwf4SfuwrRub561fTm1tglQCCw54YAHuYZOc08liDqJwKzEdgNH51cmLNqQWR601H4iUDcYAHZIC7WoB78/Ly5PiMViFb6m5uZCA1iAKEQjFGNLY6VoKwOIyraEpnexN27k/Hr49fCWbnLG7bYpBIMKMWnUJoC6LMCVXKNw6umd2Cbuej4zfi6kCA2ZVtILZdGN9IbIZ+eWnQH+Jz7c84eflysbny7sOHCwuvznu6G46pAowxr8o7bCkj7UX3XcsAKFrz2E7oT0fEMetsbJlTQMKk42yeMvsx9mICKD+10FT6qW0mC3vup3vsmHbiH1yhLtmo2d3v7e5aiM+fP09UkbUG/3sBRjAepol3rV3YmB8WhdvJ9cHm/iL78drN4/3Hn/zoL//pf/pP/8Hf//a3f+WDz7/4aHn1av/AdhTyKsmolb27VgqTSxwFWfsF2V7FaserQ9Mj3ABejlxhoNjnUBWL2oAaZL58REHWD4u11dwWSFEo4J/99DMRwOl1QTD3r7UEUGxtHG1//ulrpqJGk+4Ie7DJOjK76p2dn27tW/+X9i2R0V8+fylEP4zsAA7cKscLgymKKTKsMTz+WGfMQQytDsTzc3nt1fOfLy06nKn57ntHl5+ex/vwarZaNlfVwQTz1glgM/iVTzWJNqXqzsvXr+psfvDBB2+Oz+JkUc6f84kKs3CsqcKOeAK06vQpluUngGRRaLoUupWgRG4tgilDB+X5evNwb/flJ6joje1W+IY//eM/+e5v/vrjd94R6qi54eQdBpibje0dI8oHiYel/fWIfBQqK9z8hpxyeZzjv2Gi81M+kCOQ3x5PrQLYJfR0Nju/OuPyIPbF+cWJhl3frIzG193e8kX3WNdAYOaIreh2Rk8lWUpc4VgNvRVQxjBaNXlRxocIZRkSyMBn6d8bCA5k0i9sbogueEWEAeT8/Y6OjgBqgHx5cn5y3njQiIA0ngmQIdA+mYy6CIRjZxk60dT99k7rptUxKJO1pcvGooPNNmzQXDhdFFU0EecdxGfnFyS+0BIB3mHAi3uO/KR06+yQv9AUyDKbf8UPMMfOkRoO9Qp9QqhMpakBkH6aJgjESBSyNIa7trY6bNAcvnQZZbXcrBfZSNXuLTizRszRyM7Giq3WAmZR9lAhxQ5oV6WVML3C/8L+/A3ABGqAVjVyNLydBuONqw2st/pPX5wJu2lnsVPCrJT2ese52NYCgSyY6O1kTA2edruRyljnWm88KZnzqn5VbwKRBVPkBeC7K9EKvX8e0uSrrFt/8nmhhqFP5U3eSppbasulgnVtjDHytuZ1LZ/7rrCQobvzOhGzysJZJyUGWMqRUkihpm6k8iRUFmlE4bQgyuC7VNuc3+7e6ld9X8qLgAw0Kx3yHLwGyDN0y9ixTz75xFsTAgWDS8pcyBEEs0763LIiXeGYIOY3x6ejq+udvR2k2fI8G2e1tJ+22Q1+9snHo+FQsBRl2t8XGaKxQm11NhClJLrsRlNAQYuECJ4YFvI8+9GPnr/44vGjp7Qco0H/ydbO4OyCW67ttwgmL7dXp8dGQmxp0QjPhheT3vVp7/x40B1ZGbcL9turXXfqENVRddVmzw1FIU8Yh8yYh5lDC55h327xiH0QcDzIEzf65hoPxg6c+IAcnZvb2dgbq30BpyCwJMCCv4i4W5I8PjSqKA5IQHFy+HhEKnHEBB+IegFiQzxVauOCNWCRGIlMIaJcJqsohFECwBH9cJqaYIaZrMlFtknac2oh+cRuR+K8+QIRLO4IgoWnFlgYWDiwiFMfJec33vvWxZuT/+Ff/JFTXX71e99qtUUddagPsaL6YOqT9VncUohThOe4emNqyMBcMCjo1yxR4xeeJtagjKdWubH9ElXQC3BSZLOrzsbuenNDuPTDg6eff/Js0D9//OQhPe0nP/tM6E/SF0XUr373Nxtr7QuHUzBUNDeKXnnydPMRXy2AQdIi61CM2WFtyCg/xVGxNUvMcSBEmaV5MFv0XyaP3k3DBe+56Le3F0eT0eHh5vNX3UuCxOUYDcBmh2c0QNHCFrpTJxANLniWOUdHEqPBEoD4onZbyJEfMdEVnaNlVjiefBAme45evE0JBeBivgh8J4E+JdCk4cnG3f5PfvyT7374jV/9le/81b/5U7uBnzx+/Pzk5MXzV+997WutrfXz/sDu2cZGC1gI8sM+b+RBMn0xNtcCtEzhYNa/MjOxgYBT8M1/kNLSWlCjnfzYI9izTE2c5SIQ2lBoCVN2xBh5PRFYKEEISePc9sZ8uzV8QBi+psqONgSvCSahdWUpB5Z3U9GCKiB6ecw78kBa8tyoQAiukg8FXZMH06nSdnvZLijuG3CCfihNYxBdIQIJJmRl5PmUSGyLtO3LtwlrZDqLPHk17U5GXBZ9uLk+bSz0GKDBWnOt/eABzp7bsJFYWpnerHA6ji2Oq3sAVhSuQplc15s0masPHz62Dx3dTe0O+x6PLQ0z0jvt6o4lI6cnOiKDbJaj9SVnIUvRfKT7/BVpGnPgSxC7+FXMfQlKWczbJmn5mgNro3GJfGslNGBH43DheryEahMB1xt8WpzwzDpAEei85blYUtePOtRXU33iWtPd4/yt2e4zu3n7XoZKO3QjPm+VYqXk/D//abmGIjIbhsGxGHCXfqR0/ovBgklZJck2r/H+oQmu8+2JpVYolIUXo079KvgiRkJrg763VFDkWaXVpiZfgZ48CXrL71xqGyrBxU2VJZlv3hqZtLGQUm0AW+49qTBXepe3r4/foFIYDU8Wjo+xJDJkZRZiML2eshAw7lvV7e1oxpSAep3wou2duef9ufXoaW88InEt5vhmPF0L9y2UQP/ijAXFntY65XAwDspZ3L6yX8QBPO8ffuXx4yfENaKDk+OYwajBYTRiGY7TicM0HgcPDtZg6tnA4RKz7hnoB0zPX7y0kXHFidwFn+pF6c6872UMKsuhqvSxXoPop/zv41uPGMH19qQAuwweNtkG26Ydp7dXdp5yCJLNuiqsNkSJdamT7jbmPZh4ZdneL4p7AUl53Idz4vwrmNviQmJrEmHqDN3QB2SdaJUnxra2ybU0fg7GkGHudMT5GcLoFS0oVn13ZwdeELYHVbENQIQ+Cw/TjQVhXb9xEJTxnE47DBcLCw93dx8d7P7pH/1XJ69f/s//yd9/8ujg6qZrnTnw9Pz0amV5x/AWrqV/m/CnDi+NuGljT5p0vTzsjfCr7BHC+eHA6e402YkhXlrPkBedD/EOOsgeFHzPcvOnP/7MxtNX3Yuf/OTnFv8Xn34BSj/66PV6Y6F7ni0NhwePHj54h6aHq9jS4mA6PnOGK2Z543apdyZS6QW17mZ7i32Fq7yNa9jSJfoXDHR7fdB/UdAoDJs1aHBJWzzvm+2bzt7e0UL7cnFrYXnj3/zZT+NAPjily4geErHKUPsoA67xfoFA68uVaLF/+MAsgGcCsWVIvrHyzV1OcDI7iTvFNzXMeyYItShsq/tgZAtYS/wrSDz5F1YHAmlOCAnO39r64rPPt7/+zccPH5m1h3vA+/FnL59/8ezZh7/yrcsBo91ofUtgZSeMzC4mA4Opm4IKiW/PmeF2YWyBF+fGEE1kfnnFFoGlfr9rVGGNmCBYQwHg7QrDI05rhp0kHZOlcsJckDWdjpnick29CRpNM+qoCzgDGtpWu2n6Ts6O5dQZogkLIi4KVBD0C0DiuDJuDw4PvYUE3JsFKTKrb8pZJIiHrYqAUEIAQuvXl+kktQZfbecfX4Qb58wzQ1xfvey+vhHceHzZXl7vtNoAK4dkDUbnJ47KsuF5sTEU7X/VoWos02vN1tPlb8Ym2tm8mSAYXBKFO0ayhUaL64uRIBqVicXYmN8bmM3Map7W6oLp00cN45dvRrwNrM+EJM3R6npHNSgP4d1BykDaqcSo78uXLw/iaRIMb5WERRLDDGubmcaFM2wytq+tGoNB4ujQ6DbiLhhGpwS1BSv2X0EtYkFhIAuiNHaKq1CoaMk98HR9O3muWeX9PI/P5pnLZNy/qjfJXPivTFg+JHRSn2fm3AMT/5UUslWSeiHuMPI+ke7LCay/VZcSapKhZoNH3XjoW/dxdS/5U0p5Xq+GXwZZik5E49KYLLzY1gqxfKsPXpAC5x+WvudJGRyF3z+vTbfG3MhgjwXdrjhPVWoGtZ7L/NWvfpVQL5snEkjFUJvC999/X9hN1urnr56b0MMW9+9dy5v4hUuyuZGTElW2hW77gw9NDJvJJVJUxB0saeh8sXCQv/9X/+v/+O/99m9dOfdzdwfo/8W/+FcPWjFQfPqTT2yzZ+38zd/5TQGcnRvjIPpz4G3X4UWX6vhg9/DNybkPVaQXNdUe1S5jKzws8xjpqqZMXE5zQAc4SbJ25vTAuPTcXLGLGGUCZGthTZslitPHD9b6kxcok5/GUCHYxrwTCw/V5uCE7oGNKHg1Nj10v8qWR4PKZA9+4BaODDdh5ej3Ukj46Bi1UgxvBZtCUnYx1RoczynEFxetMSPmE3LAg8MjNIaBxyEswswY1j5zCxk2dAYZXKAGJcw93Nneaqx87b0nXzis92f/9nd++zu/9uvfGg6/sFkOoNl31evPIC+H0oqBQSBjIV8Z283DxyU7u3osgd2+aETFEGWiGJlIUlHBbTY3dIG2pNXs4OhGVLHTG0QGSHz80Z999tmbhw+Onj97LbTa69ejnc0tOM649Ads/QtfeffwwcNd4i7QkGHhdjBatu+KC9lYoJpiO8vgOWlpbZm6hsST8Ejri+tsSKMSqIy5hVebRWahoLScBHHabWJHe02Iucn1wrsPd5493FLXZObQISb6zHgWf7F7ZZzL9OmCIaVk0GzsVPS7MoiMYDEGVjBxWeJGnVjvlylEvJiUMmXpQPKETbGmqswNHopfDQyOUFn1PNK++Y1v/+BP/vwHf/393/+t32PbR7E+/PDDk0H33/7o3+6/+1CbR8/7zS2KrAaGjFK3uS1U0QZ5Q6hyO8JuhvYVqUEtauNUC0ti/BvTFQHSkEjUnMg+sw1yKtLD6lIb80cks327iBTwdKBraeHo8NAuAk0CT4XGgM4r7iWLS+saa9s1kCtcyyLJAza2VOFxCBjsEbkMlEFr0s1x0IXRCwbQF58Aftif5hDPJA8Vup+XIrGtrgpVPFuYGaKNZptcdblwdTHqOu/qhtlqg0FI2KiG4OeXLSidN9OK804Gt5xc6T0vnR9l6z9OlFJjed22vuzlwN1OroB6TMHI2K3N0YImCR9f/HI1Wy9MAZ3Fs2cv4LMSakto+VVt1hGN5NOoncZBF+SXTLUBkUGzFRwctbgYU8WVEJqn3mYXs2XF0hQ1v63BjIxcgh3fhcdNpAP2Khg/2hLBDBv06vwfKaiBHQCKJCBQslZhc4r9POCXJe2qdP/Pb+rT++d++qhMXn3j591NMETelxJkAcql57mxUpURXF7e+uunx5iamj+6QT+LdJR1UaAXY0xBF4qRz2tetVgy4ZldMWrACOmRMAT5U/QJMW/Fjlg47kLPDI3FIifWR56SM0tFS+YpdZbWl0vkLal0SAa3GiB9mfmuI4qqgFjzuNpXwKxFzAe19VvzVI2TmU5STNEVhNLz14gtPTudtc1bFi8Lf4t6ezVRtkThw3Ow85llEjxNHf3ARquzI+5OOWIHM0ttYTcDNF7klJuL89O/+LM/fRWm5hAIfv+Hf9nfPNpttM/gney5u/zBpx+N/obHszCTzdfHx1YaU/Pm9naCH4ymQo7RmdROabwuG0bz6L5e50NQhBadNRM08Ba8+3gQO4jrcG/lZoeaibKz4/jwrhdModdVijdYBCzaKCWXAQgPm3LiXmGDrW7htWIb4WMSRXaZIohBxDzOdSzBzp8HwNlTbNwLW5PPg0gLBzJnJhQfDFumzntt8MAIZUpxNGS1vZ1tTbKdk1hiZ3N3OKqaGco0/MLteCi6w+Ll8N2HH+5uLP2//vl/3Wnf/MP/8W/ivDHcYrHbEENcY8BwjBAabwFz+pqszFRaxPrMzwYistyZCRU3tHKxajBwswLqcIBoGdWVl69CovChH330xeFh5/Bg5dmzN9/6+ns//emnO1vtb3/7W9Pxv/mDP/z7r169EDIcXG2Z/PbO2srVdDJotnZvp6Ql49Vhsbo4Fznp/OjRjsNNHLSOspFoCbL8IDfbnBBWHG1xfHF2+PABpKTSrBJzg2E1F1fXq22+MgvD3untMrFs8Xu/8pXPvuieXDA8zHAMUbfj06lrsjhxETh/eJ+r6q04UM6yduQU2qwEJouq+kMMohgUKNleuqtLjhuZBYov41+Aqi4qxSJu/nllQiE7hUvIvo1l/ZOL827v69/41t/89V9/9tkXT548+umzz9Z2Oo/fffTxi8++ePnpw688XnU4waq5vtk8aE7Hyw3nAvMWXL5hGlqYcpJU+t3aZjGv21gWVjfbezAkv4ngoYAKXdi1g6bYMpmObhO2EyYOOWcDY06yQFBbIwYbrXDaa20YPlbaAIQzYhaXRFHCj5KKXr985aERxvooFSzqI9KFh7CXzuKi8fZPTx1llmOo8CKLS29evXn69Onezl7loc9vzutobOw4F0sMRubMljFxHs1sJMQVwto0g1YS5eBYcy1WLFYH/K0vDKA4cljQqw013PpWNtaWmsGVXFWj8opoaAs8YifQRkeNEppkX3MINb3xNAY5m4LRCXgpr2L7yLYTJneZdVOPWLCQJSdJ0hRRu4oKgOUkhr5+fVyMZMsHBwcjYTxsHOOMTrLCq4n7Z0vMFX17g6UEEeM7CCQQU8pYnBPVrHkhPsDBWnmpB35YK6FeJvZuTbsxlOZgjpIKtvKzZAhOKSloK+ilYvAwRvOkD/UuXQd8JQVD1JwgBlQAHSBbkGCB+XmxqReaDIscrHufapNqUZX8uPe2PqnV8UvRPaCFTzGkKSrOdzmaJRUx55aUXoCzSJPVi8RiyXqZt37ezdrZQHd5lV7cd6reKF/97r2qLXEPsO6zmVfP1Y6dl0dLXD00i/KY/5rBJ04hIk9fnA8wn0wpG1sbGKhotruNzVabSPT0nXduH5lIkRW+ePXmtS4oSlxLZ3Q3V1pmGsRHp4tTwq1cXtpw9Zd/9qdn3XPZChiBo+H2V0WkvrrtrJ13zymvz559yg8IqdauHHWD5jMf0zuvPtek9lqLhF+HvVanKN3Raz+Tyk3wSxmBOOEQ7OMIFKlo59FRe/WdNn96RIFa8Gr2yc8/Mh1Xx+FgwbevXLOCFZuB8X1K1iRbYTZuBGMFuvmnANFTvbKek+X6esMZvq0tQAyslQNOgnnKCKcXdy2Ef8tjRYfxwrPorC4YVkyDwXcay7Db29ro4Cden5xsHz5mJohLwvJWgs0OB22M4WzaEfL+avTtD570zp6NBi9//9//7dbe8vFnPx+Nu7pg4uBwVOfzz16ynUGjJCcO9CAQbmMFE8NJQHa6LkcKWnDg0V96Ok21Y3TC4nZ9Y7VfnD//F//dz3HYrCR8Jj/46v53v938jd/8HhuLzZ4PHjy8XfgN278ODnff+8q/d3zyBlmOuWsGroUZFKSxNRQebnGTARFL8/LFyfbulsOqcAEEXt1HshBOiKrfP+06H/70+KJ3JozpowdPt20vX21kdJBNY0WptngpEo6wAkf7u1vXzenw8vjk3LFemAlLLjSKQFa4UremwLCaP0gZ19sfObBiSJUKU7MCSXB5/lnUWS6ZsEyEI/uY1OpD17Lag1xlBxF3dCWfkzMGIzcf/ezj3/qVX33n6Xsf/fQjqw7l++GPvv/gK08ev/Pw9OL1Wnel0VnuXSRAMOS4vLCJ9RuJ+kDosWuD2UMV9HPL1sBNnND4WJLhF9cEYXZuaL8/s6ja7U17qOzC7/fZevg9In/MjTezCTOq8J42iK+9On4VbXa2KIArpcYUu7WDBkV5ruUVujQYjBkfrKUuBCkvKlnIpkskzhMIAKaS4dNPPwUE0ALChgxAF95yKQIhGE3PFdtsr5Oc6VwTsVB8DR3L3gae7hWPGn+gfy2aE5WAcCr2Zt9ssAVHkWWXNchD4bKvTgj1cq6cPnLEZ7GaXXKPvLE5Xb0U1FXZo6nRLhSBaWtrm1uMNnjihq6IC4UEhUEUnCZ0SjsRIb3Q5frTcRHy48kV5Vs9slaupgOHQVLqb8NxG601B3ET6CZDwIntakxumwPerigdwyBvkKIUjKKSUdwEKiUBwaAIuyGz8hXtKs2hsMCi+5rqW/clQ4XXAqw+LOBVX5UCcqn5XXNTkJ0JMK6yAWOIA0KhdzHnvF1wLhAWMqUk+EtOv2SrNEbefJUv4nhTC/dTNskNSHSNkpzP5jon6tB/n4QsyRZEivcsuytQbZPRbC1cZA2UlKZKOuMTMpui3HhS+1Jefnnx0Nv7VF+UhkSM87O+qhK95whGfQhStcoyzkQW7Tw1iLWNp8aVYD3gLuOw3mnTjFAjaQZN2vrSqnPiHWYqnvHTd9/B0of3WWk4EkJUlBzNGHJAQVhW5BoQJzMtv3n9ykZX2hIA1Guszlrt44GQl6vtTQdDnFmx9Hb2owK0zMiqMKljp7R2Wh3h20Wfg8H/5qOf1u7X/upC/Vl7N/9R+ptXSwtjsv+a8CpXJCqBBq5Xl/B6GLiDg13Wnd2Hh3yf9t4cOt/ENqwhx3Yxp6PqkRRg0AoatLiy8pYdrAyJwXJIMuQIu3DgxDtCEbRtG602gRKCMJLcVGw+MqpWRSktsnDuMxuZ0jJbcckDJepxhJ4aEY83b14dv3wl5K6FOsTuNTafffESuTJNnOMXMIKjYQPTfTX59nc+ONpvf/bxX/36r3/t3Xd2zo8/6o5ewcqN1RZnS5I6Fqt7NrA5hqmRiGx++C7DDq/fnH/x7PTVSxRi/N3vfvuzT784O3PSeeSZb3/r6bvvHvQHZz/60d/8w3/4D4dORhosCAv+6NGD/aNDzGxnp9W9OHn3nUdmEPL5xtffZ5kXQEAcY7FzbZYcTY4XbhyFtXs9EwPu5uHRe68uv8Dx9CcjzPPJm/M2Y2ETBgRYlLKLm9sbYO358UvnWOBpLHt7zMRZJ8FnNzt7gzFevBXyNgbM1tbC+mT3aMei+toHj0ezm//+j/+tkY2GKJTNgsusSfwrzJeRJ1yTq3SNNQKAse5kGeKIy4owGfIgENaVex8aZ9NT9ReeKCR5MRbWaVYrfWFUUj45efHyYGsHLvjo40+e7B+0Nzbpt1sHHeENXx+/2Hm8z0ngzcUrckOCtg9HN7Z324XMUJqTKnjjg0lOm8E5aXuUYVbiDXHJRK2vtQY3Q+4nKGiLCbfTHFGQGoorlhURGVvxhLqZOpGp3doE3gf7hKoE9xNJ3bA5pjFhJq6v333/PcVCOKQW3hbA7+jBgcXCVQQ0Bqhofcc8XaVgJ+TAVxYa/gnK0ms6AuPAi0FmrBgNIXmFV0KaDSExTo76gmZcsTAqjCZ1wU6GJfapfJYBZIti52WHZaMjD1gCXBdwBirGmOZsFB42or2LsC8uMIlFaBO8oE1U5NDWkj0weH2cRkdTTZoytRDDHfeToqJioiNOwaYe0uO5djbClGPR9AKhwjat25gYnSFbcA6rNNF8yZBDtBTIqdsWYDbFrXbnViQYB8aN+jihkSMChlc7s7Wd1d0Ev7DBKrbwSETkK2ufC37CsGf3Nxsbu1eBIXUHBguE1RvP71FzWf/lZ70EHSTVP2gNbI8SeZBv7zRF9HPgm+wYsHYXsedKWDYmVwKBHyAT9jaumEBAVeAV9KeQrNViV9MGbav3tVLX2sJyrY0MqUh7ioBYBn3+pNLH2hHlqEKbArpJYUMswzirlZ7UbJkwz1M6A3Ne5NYz661oLWLACbsfiaq0GTtv9CI6SqHDVHzLK3BrzeY+OeNUYmnocg5E8WH51kCsdnhkLNxe9C9y5g1nf+wVvcrk+rT55npyxbj1937jt2jh5QcZpxfnMKXRs+RL26igDdfK0eGBgHIefuNrX//Or34XAHnLHfFvfvwzq9a55o52GIynDoFbBpsI56KTQDfFDmitt48ePj58cATCf/Kzn/lKSo9LMpj1p8HNOJS3USknhRC0IAQKHzr6TP8yHYFIDKw3zY2t3mD408+/eHly1utekEhyeF0Y8dL6DEathamMOmjJWbqdzsajo0PMJlyChDcbVjLngm6EgOwQtRYTaCPwcn1DNMShWS34TV8GvDIixr6s4gIJepAmm8MyKWzBzt5ovv/Vy+GY7xxHCp5ms/EAjZeNrZCblDMXLs/Pd9vLJ6eDb/zH79zcdp+9+Jv/7f/mH49GL4+PP7NYYa5Wq6iUr50138bxTfqTZ5+9ePDwoHAXwrxG1jQ4WCx++H/6Jz88fLDz9//+b/Os+f73f/jm+MV40v/kkwvc13/73/3zBw8O/w//+/8Qi2OmuFy88/TB+eln/QHbmNOwbB6/eXP8zOJ/9fpzQHTpZArM//XSztYT7qLdc/Gu2l978p0Xz0+5CPR7k42dtWefvvz2t7/K8c+BrHaroE+s8ItM/oSnywn10HaH3Lgw6vZeCWExHJ/xcRTFcVH41zF8cj6Y7D98t7n5k6PHHx4+/ODxLt5+JiYTjbWFIw5wRKAgwng6hrqIbJk91n1YqtFu8rA43NmDUJ3nRwwy/NGzxCKZtRn0kL3GFlDCW1hNhJJsqcnMsjZH05NsJcHsW1th+I6ODl9/+qyz1jh4+AAFvl69efTkycfPPxYb8ek33nnVO0Z0OrtbsfVi2fosKxsESpoj2D/Bx6MBUA0tFAaPlDNtZWfsSog01lbEURN2u9wQfMtB7GLIs2LRIcRssmrk7RMQ4oocZr0T+ntW6ekFQUoUEsTFVt1XX7xod9oEu4DQ5RDiAsCq/vZ3vyX+unOtGGzQOaKJf9wy47V4uwAYyFidjfWD/V2kCxaKMhvJX1u96J0T3r/64H3tZavb2z9UOWU4O61+aJU41kbQOuNMBuDRv3hH0OEHKdmKErd74+xoNqtSB0WFCMmUHA896DWdJxnEVt44zyExPrg6ZJNluIhsmJo53BJNEgdye2cDeAC8mxuA0yFntzpblhM61tlpk+Ey0Zx3b6+cdm2iSZP2rkSGu1lw1tmj3cedaUs8GNuZ0dwcJ0zNK6gHVUD2So9Mgi3FGwvOJt1kbFu9SliWCV4aE4SVkgnppCMjWtGyg8O0jyNDEWjSB9rJbEs0OIUFLghKbysYIUluiigSpin4qnIueNgEPqHmAYnZVI/tlZehIewzbUj8lbO7MBHZ4O66aVnAKzFreEyprBAv3zpEFC42DZYQBJ0qsvYhnKB4dC4PpWCgADmn3OgccBFRjEbxjRzIgkTmiMocOR7/Fr8QwLiWLa3YBM4UxK5uBapI71KQpYgBsLuh6KHC15j98OYOixr7looH4w+GHF1UWoKZblgi7tPWeGVy4WVFSxPzlWm0d11Yo2jx55QZANHcywHiVav1RnAqYB12ZkVU5YQ39qG9FxtOtOIos7rw/OWLN//8n4EendYm/hROfRWWgA47MbYb4uwJeHbbojpmW+Y0XdyNUEbln59325uOuF2iPXZ+wbe+9S3za/XilXDLbUc4fnh0aEF2Ng+OjsAzRwkcox0YwMJIOLRJpZQHyrQQQAV5x4xoqnkHURaHPrNdGS3gji09PT8/PethjB5SWaw0h5eL5y9Pmrw7Vnjdbw4vzls6XlaLkhUIJlk4xHk9Ozv/wz/8A34QglHx3eCW5Zxc+3ii5UHIFzhIfpe3SG846Wzu2LaGhxTe7YsXz4M5GuJ10Izl5AIsmMGHgxw0ZS7DYcQT3pZJLP/idDjiUoaf79iZP3K4FI/a9tFu5/hNt8klGEzgWa/iJfE7v/3o6EnruPfxH/77vza+PNXEp48eW7rnt6M3xyeWGV8ypvz+9Pqiz/X2ZrzRgZrts6Xog9C7/S694F5zt92J9PxX3//L1mbz137ra/wyqDm/8e1HWSgwefQwEyqTx4/b+vzZp391cODwJNY6mkZuoqja+mjSswvBLh+9woWIPcSzo3t+QRU5OBs+OVwU03R4PdrZf3R+8VmrsTo8nextt8WOInEYjcHx6dG7D95/8uT1+TnHx939o739B69fvfrzP//z7gWkGbiCz3b3DlabG48ebqF8zkC3qftmMHr07ne+9+HTP/rjH1xPBqIsWN4QILlfaKUw84AptqCFzlZrfbPhsIp1W9KtJ/EXsS+6kS01wvzb2nSVHVr8LCY2/+heApY3gaqRyrLD41UMQz6MOROZtWKYaGwwOXr4wD75j5598vWvfsCx/mLca201v7n13eZee4x/F3VvScDype2NA3L62vbmYDA8eXPaXGvu7R4IaNa/HDS5V3CnRnKXnMp3RbJcvN23zTY+NaKkb27wrCOX4KSZiAaiBJow6/wGCt7mOvnJJ5+Zd5TQZuEWn6jOLj8q291ZErEPAhCN+rNXl2/4SaXNvLoTffxWgJhR1Ak9K8UGOIT77AJXsXCbLUwtlIaRaKfTurGVZDpykN7x6Zu1VvvF8evR5aTVFDb34mbSZ3ZutZe6a7fx9hOQFhESFJaDHWvU5qZWWSzQtPUljAxRbmzL1pglaSDaE3zJ9nS08xSHw6rEmyZbOBYX3pyecBhlu4J92CUFN9EtlPz87MSMcaEYTSHIMRnp2fMeUs1iutJ4uHYa1GSs1qyThk0yYyE47aIxsntH2xqz0QwxhiNY/uIQLI4iZESq3LQbMk5USy3mRC4gth6u7O7vnb254eWx3+wcHe5vzrDQnIGwjAIYNhfXxGHukZSNEUdYRyUDqqBKaNRKlgpDE25XCgWCfQuTev/87qYo36DnrLXAWRFVTEI+kXDo5Yq99FdZYW8lf9GqiFShCzVFURNEX1RwYbDjOJj8WuNaM9XmuQbjhw4UtrxkSJ5QhZykFRFYKrVijXDi3gZ3WRlBWboakqSBhTbN+ztvSPlTe222/KKSTG049pCUfKY0wKjTMGPtbB20wkdr0nywVGENp5pwoXEYxeRouUQ9kgampLu+lXo1TXboXlWGwz8L2ANMmfcIKhCYDq9wjtyCDUA87px8KtaDLxwimG6J+hMNa/xIi8xHHJHMPRIOpnf2dh14Ycr5qdf9y8fHJwQLYaSh6ydPnm5uQ1I3+FM7SksZ1mAUa6WB87lQoF6oUy9EoNMJNlFmQyEDxE2RrCTtsdBBLfrv6ChGcnwX+oS8ic4Lj2PwRAvjPGb0MuAJC4QpsEt1jT9k1KeOwhsM8QeOfKDc5+slKBxny/fe/+Av/uLPHL9Ms3Dz6s3m1s7zFzFoW/nCfiut7JWh+7BMCkNvBM0NbgakZnpohWCn6L4kIlhanHA3s8ZyC+/PVT5uJiHSGVCBPt7/4GFnZ21nAzIzl7ER9s562h9KXwKLrpS45vog1sCwm/g9S42rfm8k6NzKavPDD79q+y9o2tk+JAVORl3hzzZaqqHNt80clx9nVyhU7EY3hiEjuno54Phuh0lCkQA/yzPLuwwVzkf7LTmND29MxcNKeHx6tiYwyYQse2z78M20f3Ey5BzngEQMt9nk1n7aPe5fjto7+0+eHO3uHX7++Rcff/RzCtt3nxzxkuBz3NnaaQos2tpkWxGmln9Gwxl6s5vX3ePtxs0GWeXSIUtC/4HMhkEGl3YawHRAEnOPA0X4TSY1GzkAJTZcWcl4BcwNiYO1yLBSUWDfrUaSGQu788FIaMYloJzN4l5aAWaQGpjej4VEHHLe9I8Ojy6v9j754vOvfePDkT2ww8X3PvzKxs6GjYO9VxPgciPYWKcZbJSxYaor2gtzsyie+ohEJ3KRMRuMe7z81xbWHWVzsLtPOcbiRtDadI4ALa5YCpyauhNCKQSJG3PKgWCEF6fnYGZzY2N/+wBmsR0NxTl+/WrWE8XD7u0Vx1QNu04+29raFYeCbSrWKRuzfFWsOwjMOqbKIAitedwd4Hk5xVmtg+4JYsumRfA6bG+c9NhNhX5ldZvgAG6bC9gz6jvYE/EosShEpmQEXehenCc6umhlUScaUiQ2RxTwZzmenUI6Tl5eWXBUGN0aZEAc3yUbAV2YmBe8qE/CBsMY2sklFzAVzJoNCXi/dRL1Db7Ltt+Z2DdT3kfTqW+t1ij/h5f7+3vC+q23wuhY1iyClNXcJi0pDErpdWQ/TA03n0n/gsWKA8t0MLoaTaACe087zezBH16eL10KlWs3FmUxCxnbMcf9SVZolKDhYrJGYAdqioIMo6eqSd2S15InVaniSXlb3uV1wTIKi2p6nvySp/yIzi9fwBSwff7GWzYSCkgkkUMF90klPgzFSpI5utcgaiu4llarnmeo2BPE18ylVSmZrOtKmirP0wo3yVyVdVHLlGXiEVRbkKP3oZ8lqVY7tMK/4HoLLO0WcBFsURCPuQApUHcga20jZ2RHIypWUgarDNe8Pdy122wkfA/KRpN48cYPStL5Oiy+k1mZNRV0Wu3iaYDkdcjbPCgk5QmRLCJRXOJKMpxAXy9S9bIFg8nBvqR55HEmmV07/Bk/OVXPZjs7uwd7B69evHr04BF7Ka3FNh6p2To5di7t0TuPnzw4OGTzj0yWQ5BI/UkxJ385remqh65GIT0tgzRvDDEr+wKi7dRDvBgfeoTnYG+/d3GmSQCAUwnTAo/HvYP9a4qpzJczaeM6xYBHEncQH7WY8Lx6CtGalNPzUw2w95nqhkEobnujaWuDtUn4DOHgbpTGjdA7OFw3ZdZlimdRnwBEGllgMkJ4kCm2xUULE84F5Q73IHIMA7iRc4zeJscpdlRFTp3z9+ThwaPHe/aAdTY5bsXgBjwStsrOlfU1JoSYEZBltq74gt52h9O9B6NtvvViZzl4eu32wcNDO7PH/JBtJrP1pbVmHFTrO2gFg6NVRiz2eg2bp2LW5s6HsQzPJmwg1RTUGdREB0Tlgvpa8PyUR2QWsTNuFl+/+vzp069BT/3epqh4veGY885ojzPgmmNtbxYmDHKjm7E4e+JcOG8JX3Hy5pjEyo/jax+8v7W/J3icUeqf9dSLGUbbX79+9ckXf3H85qw7W37n67/1zuPd/sRBrwIQYSwsKM4XElF8heoGa1/22BYAp621dZbyz2JhQIq5KCREf2kkePVai7xChCpEdHkQWbSgyIKKrj3RN2KmCGMbfYcNRlMxKTiIHB7sPX3v6Q8vfkDTsPf4ccPpttu7re2N2+Y614g3n/wM3eOARJy1IOiyBBszWgtLzoJw8ouNsVCLFWjC8Y48FWcnp6+QAe3e3tvMqcdNUSLD6KDu8K+DMQSwHVKfXl3tNNqPH7wjp3PaeJNSwAKw6+09YuKbVy9BE9AhquiFXW63s4U++mefYed6j1Ncb9JbymnXNhLprq14yuE3gLYIm3XNJX15ce/h0db2NmnEiQXjwTAHfOYEBgFe15z51CPqsW3SVTpRo+5MW2sIJnAhdNY4sSQcT5klBkPMbuIXbjsInGM5Jm4j3xKaYzotdJzSvoV7NrgJw5gQmvQX19Z7I/GFMp/kYb4dfLwMY1wFGk4Jv+HiTkUE1my6QDfcE6svbELP7gUUcIPLydnUUawn5+cXWiJehj6il8XpD6FZ5WG1SAmbE+GmlhoXJwCMV9t3EoL4cPRclzbJrk6dD6nd9uWPb66H2V9PjXMpzg4uZhOCzz4ZKyscpWokN9J84dzhUz/Lwzx3E/0fiKiY7D5rufF8jhmstVJmzeVab+RyA/kCC2VZkPIZSpnLTRaoagK+GKSSStW1AXP8nm9LOfPK8aZ4/UhcSb71ifIkoCYPfbiHkeugwCTyamz4KkNAikJepdoT81i4dfsLl3I0nCZYdcOhdSWUZAr0cYgJic3Xd6j8vtLaKs99lYcFB/mZZpWGZdWWYXGdN/7uT5pcaF4tP80tm5+89yYDUqrTQklb0k8PyyKByOQAYbB/7AWi0RQjbVnv2ViOB7NbAhtFroryrbhK0gr+yz/6I5o39EAeD/VO4QoR6fauXald4109MTgqRcLzNpMQHOSJkn1LxlJ0tz8gNtjWA8L4Vsj8jW99E4/8g7/+q/Oz04cPEYGjkeMTbUTCQ0LuZfeGcDvf+973qOBtrrQJDNLC7aJ6GvOVr3zVmV1ffPH8Rz/+ybe/+6s4cl/x3Ov1h9ZqtJeSXYqUKTxSGgn0YsA0OBSqzHjIVdiTcvwh6OXcQyThEiLx4WByNqsWQmio5YD1HDVWxocP9jhE0MNEY8XayqPWudj8koWsFVJOeL5LcQKHBcYiQlAHvj45523GZL+1ka3+usN2RIAQLNywU37jrMSTZZAPbBCLawoCAZlgFXhQqCJLHV408eQQKSNuOILUKccGf7FyYULyzJIDjWcjBnjmKPRgbTob4nXFDqaDW1qGH6973eHO5iqNFrbMPDXWWlt7B9tbe6S9f/PH/9JA/fqv/ebB4Z6Gnbx8A9d8/uwFWWoyvW5t7r733td29h5+9d13IJTJzSom/HBvY/jsjcJtrBNm3246jB+xIDxAHFOFsNfZRRtF1Wid4GiiVuA/bS4hzWwjyQAGKcKkJOBIWBlz5oM8NC9WKHJWKEomb3nJSYwaL56PaIGfffHpN4mr3/jqzz/+5EFrzelujCDEN17mBwcP/+bjj+FFyjcTKSY42kOpbhwJ/VEqkn4uxrdrjHabtijstTom3KQkclIcvvS6vzJZWHdARaTwpVfPX379m998752vLn8l3j8i9pKvzdXHH3/87LNXPF1tMPj61z589LuPzk+On718sdhc42zP/iT2omCCWTM5EHP60ZufMa+ya4IF5/PcXNK8RXIX2p8AQbeOxyNhsTBD+a8EKJnckI2oNXBBxHmHbjnuazgWNxmW4iybA9msQjSeeXtru83Ymc2+AUjRrCO3WpGAiXX21fjYtw4PTmBIGP5mYdBE8zAKcWfUHXsK+Aqz9OEj60KAmKMTMVUOTBGn0CnGK5tZRkHTWe3uvba+mhttWCSHoC4Rr6M2RVq63R7GkU4EPYMxCEzQoMxosJMi7NuGcoF+olyUM3LE2907eLB6sLhwMVm+XL4cXi30ZoucNK9W7SUgckVRtJyDzSYcS2A6WlagRq2kdOtZayTNkvysN29f7zL8wqv6ietdCRjH+eeK1T3Jks4fZet2OPRQLDgwQxwyVQpGLoKwfQQbhu29T7UNfuadVP7WRnoYuQo7FhVhGl+zKSEYyv+lXwbOktWEPCwkBxSD9jTVamHnD+bNFgQ89M5Wh4ZaCLSN5jqOPjJmqslUwdeWd+YjtraCtWvj7q7JWLYWehskVEbSfWhLqEtZrqWRXt0/14L6oeYFdEhXd33Mj0L77/qVcEecbH2u8YCPCQGHE9SYTbYrAF1wmE9//vGrV6+tNJnPzi+I2PaWh1Yl2sqyE3eGFwMRXwhV/YvuX//lX1nCCtFCowORudE2Xa4p96Vf6QMRswrNkQBJIkks3HpoaHF4L5+/oBnnN0W6Qk5MxJHdKLs73/7ud0TX1ub/D2P/HWzZlh6GfTede3K4OfXt3P365fcmYDIwAwxBAAQhUKAEkEWVxCqrpCqVJNKiZFfpD7nKLpVKf9kqu2xZLNq0ZYsyDZMiIYI0SEAIk9N780K/zunmeO7JN/v3rd3dMwLFKu/pOe/cfdZee4VvfTncvX/voEe9wEk1pE+dID/6Vzl7Z3dL6NLBQRM/ofrwpcvL+lTRUQzb9u4Otm1x6QKDrUkzl99/8IA3mh2xHRG+YNiYxaR7iU1PkJX+Gwg1+BEgbb00GhqR0Q4Np0bifKjiKY1pKDsUFYxMKL3T405pYqjRGL1yeUbBXomxGFgj4nNAlDn3SEQGyV7Bn7jPXAxD0FQUMYc7+71ysz/O3DExR+NHG6rReH5YnIyDHYvKMBAmGidACLPgTQjfR0jJAYRAjAUbORovyRVAZ+DviJQJCeuUXjiSMaIFyq16fZffYo9igaYN+eAwMTY2Xa5Mt/Z7+fFJ0MxcX9zZkxO/UIrMLtEpBUyhhk//mc9+gSneWq2vbcocgQZL6aDSi2Bvxj8hGCqqcAFbWFy+sHhpcDbWHa4OFeiKP1jbbYWte3SIUYdQRFigMrGDpkTYdI98QqLKVVT34n8cacujGj0ilGhUKOqDPMlWgqiyjbMsOyQ8O4Uhp9MU8o0ceWAM0J0VSrkaNaTMPaX8hx98cHb3+LOf/szy8LKA3kIFcQolVLx/RATVJK+RMSXf2dXGKrJrlQpjUcEwZ/EFFAyHKMqqA1lLaCGvO6Hm+IwzJ/GX+BHBdwVJxAJ+7ClZqpKvMgYjMZKCPvxnf0xGAWA8nmrqmFx7Da+xv9NenJ/9ha/9EgXgR3c+kQ+Fx2BYcMeGuelv7UoNuK26W24oP1Uje4q2KqJYg45dHUzV86BoiptFvbG7+XTzwYPO1jb/Cz6Z7YMO79JzoU7VqliIIfNgguS/3u1JqEss5B6MgXGyXIUiLvaI4tzxIVZh18R3AP5ioTw6uu8LlGVyoJVlgxIGk0S6MholxEj0TET4DKOlqWaf0wkotAIuhnEb4HDpwBe6CD866eAzNvBUUHPBAUeGOTY7OSC7XpPB0pfnpAR2DGE2dpW+tVtnIC0W6B2Ij5wVSYrWhOPEzORivnB2vt0eOmgP85nvRl7b4UESHxg4Kcz0Z2TAt5CXcwNgh3QVMPeCShlRDOqfu7Kbmqbf46+XD2ZtjTU4WrDrRL54PBBuuoCge+Yfau10ZW00Tegw/so6jEVJeu94x/MryKFufCYq8KdfrZWfYnvS+vrTF+fEF33GIkb53GAeQsiyJPo3fq8JMYv8FCQFujcDOIRyg4idL4xXBOqf1MVGJc+a6NOVxpD69O3FS33RscsXdMNLA2Ompfbp8mDWIPs0JBcI8NLw1nyxqtmvWfuEyBJjk2ib9un9Z0dYwlChxBicokadNTZojB0NjYEQ2v7AcQGm4Anw7R9HccTLly8zYuGAdMIpzul65cY1WFpII/W4+ERKQnNzP+3i88F6RcwlXV7hen4nyS7Zn/CdWVhcW4k5SbJKLDWtPbe9/d3dpaWFpcV5BvPN9fWDRw8b05j6Q/yXBobk4LHEf/zJbcaD+YXiwkKJ+ALlk3qw5fR+m9t7Auy/+KWvSD+hgiFF/MFui7yIC7LIZMfw8KC25R+fMjWrB2fPgxVJg8wODVSPGgEgEF9t1MeG69IJaHl2wGF6zBgQ23q56HwU87mbV6ffeedapRIVdvq9tuzeSqIM2n1gIKHZcZ81hQEubJTB7eEZ8lT+XNCGN/cGJ2Ot6mS/1igiccqxQu79ATrNVCOAH0CGRh76BmkWzelz0eMkAAhLWn4k+FBnwp3gqEPfi45Fcjc4zvtkHVGIUFwozXCjWhgaKQgjkkBjoKrwGIF18tByEv7aO2xa+UK/ejJan67NzS3NzC5SQ+7t74uwpjwAQgTSQqQqiMB2r2OWiwLDh+1Ob/XDj+7w8794+frCpVeGKkMUOAnNncLXpcrkiNrG45F2iMhE9LT98JzjRYskS2RjrtFVPoxWORwJHTjKGNIUcYwUCZ55YBE0VdZW3YMUAeeGs0CAGE0IvGgowxy1bRU2ovt4dU1ex1Ilv7q5Nvvs4YWLy3JVHB51PEvVKZCSfDk1Mbt993a72wZ+cjyy6NRDtXtC/6muhJwTbHtgM8/pib+lNLCF8Xphwgs9zqAaNjclqqwCmbV7KAb/4b1Hf/wH36TEEB1kWD/3s7/wta99TVaat998HYvxR3/0+3/zv/w//e4/+Ec/++UvfvnLX6SPW1ie5zmFXOkDZIjkbXU6+pdobUcFsH3RXR22HHfI9jK+tDaauMV33njjdPni6soTJ52yu3t09t7d+1zlwrGcKk8irEKdOpSoDlgMVnJkIbeHgBYOGDmbmKzg1fhzUNFS51Lx4TatsCPsIIh4YYtFpkMeEteQUpVaWgfToWu1ezyRHFzHWSfQRmAbfrfJaOREY9m1DPe0UBVJXBuZOEItMTbGl5iHBydlmcU8G+QeAPDHOgxjlTA1lwHoysONWhVjR52L+FgB6gElXgwD5rMypSlDk6qDyZHb6zk1IFtA8rKLo2oNxF1SF8g85fAGSCC8hgV2XXrJPn1xWVyXL3Ez/ep7QkrO2/OGvmRtUls7a+ODqHjIo3rOiAQKFD2mPl9+QTSCCmVXegFg1g/30qyNdXL57vjqyhX4McNBz9/3kizGgDTw+fyR1Fir9FwSlZNgpzeELwlV+gxbdzwCjEKACr1fW9EjaUHZQs5OmyP7hmT1DUkzvWmf4Whf/Jl680tcL0bEHzQf4w+M/Xz6fvUdWjS+eCo9mLXXjynG82lxdK5lIvlhxohu0go/X77oNwoTZFDFP8fYwqe2EtYyaq74/1gugitTRFc2KJkyqQcpizSeqIffx8jy84XipEs5oM9rV6+iWPxuHz58EOO2Vqh52vSX87Lw2US055nmM9tcrDNGghIt/Ak6KuV1Q5A5Df8o78I0PFmhF3xCs4fJnF2c10Disnfffddkb398hzmNWvvwpOVEYfRQCzXo1zc21jdWoSsbIen1frP1gx/9iNk7srPxtNN5u3318rXwAE7DsCDwuaGS1WSpwNs9F9YTLxTDhr0yDsYOBpSe0Nwkp2JRj8UGuBo+m5mqTFQnLi/VPvupq6/fnB0+awmPEX7P/CplkSxGJ4MhCE+dMGZtvOHIOW0bfQXtTRjtFCKUA2Kntdk/V+JoUs0sx00VM34mvDzSYjr/sb8cvO0cXG3HuLpin+LMhM6MBS6KprOQcsFzyxIFhxGzCzcVyiJPke8ITKHv0u3IMEdpj4hvFfTWqM8wi+z1d3N5rmjBq3b7JyVOaHkZEY/WtzY29/dK1YKMkaE5UD4DGR4MaIRv3npdLcGHj562ZLUIT9qTrUerK2tbkw836kuvlqcuPH2yevv2/VxlYnR8M1I9CF1OU2Azp31ixpisVCYnpidm6qs7T1VA5P5OEa/2ComW6z1TX9hI0LTTw/HTwfjZ4bjyKcQvCCwdh4RuIGWBR+HSgaxLdN48wE095n7An5pH3KOVJxOzE5eWJweDvbEcR0rTH5FKUt68095Za7utJwleuX7sFceWeZFgOMdzdYJj/pRASQYUdEu6ojwjkCMiQn8G3UNZy4+Hcusbuw8fP3vy7GmEQ0HNueIF0Y5XL0t49dlPfW5+ZlFNeSW0Hq483lrbXZhbfnj3wbe++X1pKZaVQ6tQdxVgCae11qjys79264oEnDON2ZtXbpkLB77dnf2trW12xbvvf39vrVs8ye0utbnUXZq57CBLYSWEXpTYvrSBLY47lL199FweGwnJbQeBxIIndpOUiLIch1pYjq1ReVYIvbF+sEycRGTcVehLXEM1yywBNwAWYyOPBmo75bfVU4mEMA1GjVyyeW2OuuHlj8bU6N7GZFLvxYNANF1ebZB+PemHtxEnGOOxoRAL8YcemGU0CGvKAyBOy5e4zjm5FA645vcOUVBnmQGOflsm5dPeye5Ivo+33O+VsJq97mjncGyk6HzQixPN0eRDtivO7YR3NlFq2/PjIFfZlU5UvMLM40WJxrgZC5EwV/zmS/zx4kaiF1kDn56L/0eroFguOCJrmv2YgDKQviuwcvQYGDlrHEIQdTh6nS5tsi9wp04sWnruOcHQoT9D1RA0LlponN3MHtQuRpBIhTeZI9qUmtrurOMgv/6FfTfRLb82m3s+Kd+1FxsHP4qI891B01X2Cl/sU7w9XVlfvmbvBQ/umEt2vWxmPBl5Nko/xcjS2DTQ/0+vTFLSRjZ+PwU3bjDp1b5H4/R2CFqHMePwkOfzxjVOpDqv4mTIigQ51MqSDNIAh8LQd/SPjzn0NDs1PTnVyEqrYbU4XKAlZCw2DPPKLrPJRvjTc4w7aVb+637WwCHh4sBt0EhAPnsADw/dkrO0cTgAd0QLna1jwaMWUSSwOEJlEUhr5VcjNy9HSwizdBI4OwPk9s6tb2Vjs9npy+/wo/c/WFi60BMQ8nTFPNG2G9duOpUERwG/koAADz1wV4OJ7G/Yh4LAhooDuIbiGdrA7eAVhZXsHzxbXZHMw1VMLhsUbXIFTtZLl5ZnXr3O/SS/t7bGR0GuNWhRApyh8Vp/+HBMHtVm9xA1Nv1hZTjOewM2i9GBLEWnpWY7Urp1z7f6J+cTnPPkqqDUsj02LXE8dOXB8BHKQjdjDcOcEFcMFfknT+HJSPgUswQQerFgLxiNRABL68ZabhP5XFIWhEWCRISK8bQcHe112Coqc1Mz7Za8diPVykSvt0tFTGDYb7YfP1lBPegJxiuVS9euzy3OeYQ6sTE1Q26T2X1rb19gwLUbr1y4RAG7v/JsY3isTXvk/H78ySeL1xTGXXlwb3+ssN+TSPA4nDNUFxCugu4SfshpYRRhVKxXChNsVkScfFXMjsRvUpYztREaRiQu5Wt9NHx8MHLc9j0KxIYOJAhzKJy4Op5JLITNDrcUySzlJb5wYSZKKnf3SNV0XywgvVYzbF1lXjZE8Iji2V7dlBOhOlaBmrvtpqTOJ62T4oULk6W64jUzFT5prWdPNra3N1po51GE55N9x4sVphH1NMYpf0cr7d6xkPwbl2+urD+dX1zY3Tl4+viJGlELC8vk/n3OgXSgg6NPPv6ouXdw48qNO8K8b3+0ub7ZqJQ3nskIcxx1vNXoabbWnqwa//TMrEnJIFypCo2tLc8vL80u437evvHa8HGfOWtEVcXOXrBnmJre4fe+973JpQtKfuyvbPVQa65PtGHDI6u7q/ARR3FcE2V+eM/C3bSWhz0G4GqlotCVHOwScMCk4QqUF251Juy9OE6ZRN/JdyO5Y8RSSV9zSg1zMlI8ZCMizw+dTqezhlVS8a7LydLhpJGlcz6SKgjsYyfCfB5QnBAyXJEOfhS44jmJTKdiImX+gY4zPEOlCrC1B9GhEiAM0h+gmjnhNIf9Jhs3+B2u5autLmesQb511BhgO7qjJ0c8CMNEKvbEYaAoOh+hylX/yj9KBZnBk+YqSSFQAFRlZLAnKsqkG+coDS1MS88lFegvoemERv3ovsvgjDVQHGoTvm3Okm4CV7hoqSB8D0ZkGnklF8EZcIeHtfFECiSkOIlQUPgwWxo/+e58GoovGbnyLquWNfA9dIminYAvZJQub4enNHAzodbQ3oaXbKINaYmbxiMTiV/N0qefDN7T3ujTixjFITLKUq/wRZvsV517xMa44xUx0+TEEa9LvbkTuo80cv3oOTBi6oGrtPtxM3nYQ9N+9SeUmsYcwwAr+qFhTz3H4kRmDmubSBeJPxp4+dgImYkWLJAXrXduHIwE81iusu7GGCTF6RB1OhpYpHxxiqbcFASkV5S8KBUjXJyeUDxsYwLLI+OCX+PVKfGuFZMkRIOQeHo9i6BP3JwRWlOfWsZG+D8IESjD/0jyHrUFk37P4YwxC8MPLcEwtfVJN0AQY0W5wTyDA7VrHJy2d3ebLUIMqBuRafMzX/gsqKFBDOadp698dBOTkWT6oHXx8iWHv9OKhFU2ywkBbKigrUSMzRSLajENWO4nRAnFEg+A1ljDKJ/N14rxXUVz6QZ2WiaOHqTFEXhwyitCJP3+7sZCY3amrsh6Y+hkH+NfLpVVPKbW3Ns52Nvca+33HZpes8UROF+sH7T6e82T2cWrzc12Y2qOBeDZgw0eDecj5aerHd1LHXMUHquHp8Vznn1YGOIiukVWMztsRkqOwL6MYYpgwZHxsjUXoIZ5EaZS4BfPg+pEtN8Q6o5FiiN2Er48JiRVP3H2lN/5YKTI2FKeMBPzKpcrS0vLO9ufeHu31601hDiQOE+a/d7NV1+bv7Rcm50Qpw++mSYkV3Aien0OYgPyk6hhCcsLRQGbxQlBWNWGTIK9zfba5tNKLfdzP3tF/VDWCn7ZkGDK6cC1guVJNkqaKv8cov1GcYJpMBzWRvvHvY3xSm15bqZea3gRaOl2OrWx18QqWAKcDfIzPTcJTlbXN2wfxoMTJFwZXNjZYG6msb21wy3cwNZW1m26kINnj+5Tei8uLBfLk0TMwUh/tlY/POhUR3lCD67MLk5OVURbX7+2HLZIceR765trT370w+9uboqID8vv9PTEp99588cf35FEuFyZGsvT+udmZpZ58DMObW6tPH50n+54fnb+s5/5mYvLFx4/enRhSTa/6UcPHrBmcZ8VWr44v0Bd+8EHH9y4svyrf+6X//7f//vLEsbHoT6lbt0/aK49XZ+ancEvcoUXPED0dHjwJ9evXsfB1Ipjexsrc/OXG+XcxtOHl6Zr0CQzWGmyyLLaVnJB/PnI2M5Bi6EuEheN5boyHA+f0osyV2J3Aq1RubcFKWJ66GdllLavQ5vrWxBptVThVsPqibooEk7hycrK0YHxShwAxtdZUF7GcZC8Q5nO6KrPEzIwPzAT98IJCyJEFZ0yxwpKo9u0U+GkUa0jh+gZtY3Nau6ru92AQMzPiQfJIS9GWVrY77i5t0+TAXsIJA2hdu9AlyK98CpkvQLF0AlA7amZIsXV9sFuVcqYdHAlGoOjuyd8VKS+PKIIFW4cePZ/8nJygBeU5NcMMaUBhUQf4kkgruD6A2OlK3Mu8DU1C/zu+xkd+wspRE/ppyAJaEd0rEHgv4wmYoEtWqhVsitDi9n36CoRP29Ob3veyCjCnJWM69Ey+RkFZmTXKUbhOFZES4xcedyBQujg2ngqvRQ9NMqX+DcNHtoIVtfhifaJekGRBmxBXBqnn+Ij+k+z82b3Y/TBPGcIPRtmMCYvB/z8ViZOJS0Wfzoj9Lh+AEoiPynlR6L27uswkZ8gov7UAxWWs8wNDvXCBdGX8RRA/hEY4gjGGxk4AKDiaqs1Tggpo83IQaeNnzd++F0nRgv787/InAOhdOSTq5tQYoaKlwPO3vjTf2Z3rM+LKzgSsl1aRKsUEpXLdLzIypPifTd4Rj3g6zunBWfNU1AwlRSpLryqhoe+/OUvI7fmy8WN+ZANwGbKUsXpi3kImiGqExdYyh1Xw3ZtrW9g35BwKgvGWwuIe0C2jSbklTQMJxOIGyPtLMZNsHClXDuf5VnmBHRlhCJk8AkeOe01N1pv/9oXP/XWDeeoKOjxbLLL/2NtQwXh5k6r3ZRDSsIA9ivTHFpbX0EqGtPLdx+tKX11be4mq1D/RDRvvnhawdAedCNnT7FGsyLGhU3FKkAaWKBQbmPGGa4iTAgPk4FvOkrWx8qAKexsOF8EXwSkNIl52DOsaSgEksuf6ZpwvlJD4Ti1Hx52hocKXFuV8BjNF8eGFPikLovqkF7Cdji3uNCYnqTHyVOK2RG8sH2U+np4tHd0QpEgD5hcYJ32oMJR+/jszp17raOznf7xr/z6X/yt3/yXuUaKe8tzgShw8u4R4LHg+DG8qR2ipbTBhpvtOPrlYIb9MXgHQmKUhHbht9m1MLXQWVnakpGh7d1NKJVmFvDHWDULZwtJOmTsHSfl+Nt8YOeAZwko4ODjIVW+yqWJQ47jncN3Xu3uNYVlt2DJekNgQ+/4sHN6NpDLqtfdPdhf29tfn11ovPm23GRXpqYngEGnN1jdWld1feHC9XJt7mRIKGtxY2tfmOLszMTZluj+MXGA1Yqs5qApsH3ksITOw6B4iuWin8BFc6X5+te/Xq+Wfutf+Vc5FQla59akWOUnH6+avi0pFNUrGW/ubD++98AcBZV88ME9trhqKV8v5qZrhdr4cLVAuV5ZUM35wjKGa2bp2eOtnQdrm7udHrsXPY0BW+W8kKuSoCXcrPj6w8Ou4h18HwPpB2oQMSk+PcIW+0WqiXy5ddg5O2QKkg1CgJcohVCYu0JUSBGcxNqoOLLbc5ScRFpQsBfbkgxdyVs9smqhXlz+AhqhvrHh+anIJ+DQ+VQRuD5R81IyleoFCNthB/fDbsdFkyuJVFYlmIdFy8YxffG4qDWGRlmXz4db/bbogbHh/PD42YDEfnKI4Riv5aTQQFr1iS+DhcVtjBTGhI+FWZz+3A//k1dCMUGNHKHMH8JkgpagM5BTSD7BLGgQXxIGTD9Ge3czLI31jwbukH7g8egqGgc2icQTUHk6n9lDznE0ej6caJUap/7i+5+6sm5iVMYVA3l+JdoRxMPfnoU0M8/AID8hToX+MJGrGIs+9cBli353dJwHJSwSSA+AykZBPxwwmtSWWe/ZGDyiW+OPmaXLHZeXIiE+4/lEhNz0xbNZM1/87SYi5NK5nQNDqLqbfvVsduk8vqSWsebRW5aAEtsenp04W6Udu4zsm1ueNFFImuS3dyCFChDBHdfoWmZnpz3oRUR9NvYoI8h9tswXbHxqZq7RqDFfq/2j8DFDMbz4cpwvh/3TE//pX7PvBpNUWs81lm5aE4NnP4rPhKRsrO2PqA83SPjImOj+lE4+cqsPjYjBXLpwYXd/jzpRTXPct6nBEIuSPZTrdI1I0e7uHpgnHNKHXFxeTuq+wNxWURRX8poJIPD2ZLsKGPPPLQTBPafef8ydtUyGAnvAN4WTlBxKtWL47/1b/85v3bo629vf2uj0eE5UC8OhHl3b2F7bErp7SHFOHjk87+wdV8qs3EO7Eu+dbu+0Ty7c+NTU0uW7W3dOcrXeWb5/XgIdSj7bkEoJ5SX+Uzsh16TeSIwJTAAJvJtYrySMo0tp9wEUMIDEnQxUTFxS+FqbiBJZqjYIYNVR1CJg0yrAYGgEmYxakgqwe3oQRq/wVKRgKvHzoKjzCgofsM3DZSTPPbolMLe5vWWFLQXxHJ9LGh7Pl69eqSFRve5xt3e0t3uwvrWKzBnw9euLr7964eq1Jd0kkzi1+NGErFiyjTut6eBRaQYewV84KcxmVEug06T4WRN07Y147BS6q3JJtVCJZmYV+UuHLs3P2kLtTZCzgw0MISA3vrS4KOtBfngIcp9eWBSFLgiIIq6SK/XakSYXuZV0XcmK+anq9ESp3SoGiadnPOB1w7N/WHBxiGEXfkHh5KrcYyOSuTTpIw0Hdfvcz32RbylFL+fZ3ebh6Hj95pGiTX3p9YhjihzIn1jIV5F0PgKDnjx+VBdYlzWHiFNkvjC6sKQQzYTlvXRVCexDkby00Zub69/51jdv376N6EYI4rSgx1lzevrk2YNHj7sSPEkwWW1wPnrnjdf2O6Pd/e3J8tiP73x8bXlRXBz96uz8zNIrN2eerDaPTj93fra9v+l4osah9VVxVhrIsDSM5scq8AaME6wLjwkcRFuOEjlmZQqTl/B8NPKeM9IOcKyTjcnwV0hhJ4SuEIb5nBwr5sJGIPyjjyuiqEddHC5iGMTDm0niXIQNDDh9zm4o6E4iSaCd0tLbMZrQGx4SZolMygG30CJlTx8bSl3ppEeSJiwWxe6A6hhHMX7KdBn+hKAySBhdW2/7tHXM7zRfywv7ihMiLMbY6RAP+aLygKFPjqQkVCb/gkt3Bp39mKHXwKecXglk+svQb0JGAXkwgzG9uOLPdNMXT734/gIXpmbWznEIZYm1RyMcUCjtGGIN8cullZsu3zM0FJ8JHbnjfryFiTwUcmG1DvpBvxD3A+9bzRh/cn1MtCPe6j5+AYZNYlh6PLywgkhQpOAXnLToAEUhhMoZQ+eOg4rRhV+D3YoegtAE3+u7K/r9qct84wqSFJdfsmYxmPQNBdJP1oM+3dWMRtSnNnHBVAl5Zc/6dM8dFRcVpuOtvra+bv3NAoGheKVstu2kFNIVOR1VqNTq0zMzYJcqDzsswYG+W3w3GJOcZYxL1JWUeSyqvso64TyqTAOg+QeqOoGVz97o8+XMDODleIISpInHMELxGwusW2sbEAJmbW5SgQZxjk2KFfbNXxIByObpJJt7EJ5Ll9iHcdYffPARq5L7cJt3UVfaTBz71s6eSOepSmSk1QNej4HZYUDgfXeQkDI8r+9WVfZeKo94JTwNeSbOIGQTwh1f/7zqBh3JMoTyMHdNN+qYqbEzVcSH/51/99+cm6IUPNh8+nTs7OCos0N/KjxHDjqolKNS+GQzsOXGlhZnxfHUJyqHQ+2N3WZxcqk0Nflkc6uLCy9UCcvHIxVMTu9ESo+x9uHIRFn1er7B8AdfRgbjGA5fQX56BpnYGmuT1tBqJq210cJCzOl2HjTGukXOHglplFpwE6sEJCESkJITm8VpQabDZMpisQz1ABIkyfCIoCnkYWL8wpWFhYtz+VpRKp/awuzBg4diA1jTMOrHh5tOioTxc9MLjFVbm/tIz907Dx8+XllautjZ23r95s3pipoUB0OCg0eLPB3tNYY3lEuGLSgxljlBQZTyjehhniDjyBZT0iixWKIl5CwKP5ufHaXsNs1IJQxjjIx0d5uEDVQXVJbCMSAy8VD+KPJskNwTdHn/k4/4hZLSJupTvQOHdBRfTyVIPhSMYPVATq1eBaHSOS4ulZVMiRTReHQSH+8bRqdY2RPaJGyaZK6zly9ysStGUfXa9OLoRR2qxjLEonHCxw4eoBcfSHDLl+2c86GYDYVCTiL5RSWnZCobkYzhlI2B6MfOSd/CQqozUwtDx5cu/7kf/ui7j5/cX1nZkz0T9TITXgqEVwned7a6pzlJRtR5malMSE8uwWF+9IRrwv733n+/JiZ9PBWeqdc/uPeIr/+f+eVfevedN/b2ac136eIkLIyA3IO9kGz44ASPexqJnk9pLAJH1auN5sbuML/384Ek8+qatLabYEzyb1gbqnWmqA3FHRZrxcMjkzrkCeQ0gTGHEcYI1CSnRbmG8WXN4mYc9Ec8dX1ScKdTti8DWUi6ddtxerIbBmPy2/GpODC8Ji5KJknbaGVkMqNX4KCRoFqI8VFhNHJxcZdCK60nxWHQVnCQH+uGuQoaHCkOF5yKMIQx2HDUCbdOKhoKiShT9C8kVwknR3yNZXDATANYGIdDGwgWEMFS6XTFF4QkbiTEEIctsJT2OklnL86oJ8ArjOV7yAfuBRnKcHhQIb0FG5yueGl6dfan7774DDT54oo7IY1lHHUQqiBYYQTQkCdYUBQDMPJAC4FeodLQ5tGDGUwIet7opgETcRgEI3zklBdtIrKBQ9zlIaplyC4xr8CkL69YkOB0no8q+2JrvdQ6pZFE5+4bqp0zBl+c7Og6GYqQWaugDZzoTnZpZi7ZW4JAG3n6A75+4603uckJS0Ra5C5hvZJgiS6FmZfrkeFPUOOgE+cjU7NzoiKONtdFAlZr5YnGpCS2loLOJBlpS0R8Rn34Ax9M5059LL2TmaRJxefLK5vCi9GlySap1J0YV1rXkFdphWIlY6lNM/tFmxi/z6joGv4qIHBmbh4BAtw3bt4yU4kNN7bWWQWWl5f8+XR1BT3jliX5wsqTxzzFhyX8HVYk4gDj5alLdCaJp9CxF2Uk3x3fs3fFnnpTjM8I/ScunJ9jhLbRowiTgUQZvo4HnfOz5m/963/py59/M8+/af/ZeeP0/LC0+vBgZ+WJ5NO876SCOFV/8UwsBNww3tmX/G2rMtWYu7A41OoXpxbuPXm0P8gNRmuczGTZGUjURX2VkxprmFfG0Xkqi0AxJng0vHNjjf2LGFtD9ncAInE+YMzOGzFVIS1x5BWFCoLg26NYOIooVNwqxmLHo/JtRqVgOkAkTP6D/qDDHlStCaSr0Yl1els5aQoqlfkLS4V6fqjAwfG8t79PcUdVE7S43d3b2d/d3pPbjvBUqU2RrqQ9EvQjq8vBbltp6evLl2q1yaHuefvp5onya/1gC3h2HYt8JQtwCYoDHYdXJLRxSXcZqkyhPFGxM0gSKdJsZZ+D7BCY2JXYKSkGSYF51UxO6snynfymlbR8+OgR2Pv4k4+BrmWZmp1avLCEo5ctqZA7ayxMNmW6bwJsiVIHYH9hYW6WP97SrPg00QqyHWWJQnlBk0V7x+f7otf7PSpSmjQxUjt7ETUvqx6v+qER3E8+UheOSx9erAS2wxaE8MgphEicL1Qp7NvdASHPuX7z8CZCxcOu02lyb5GCHZ5T1mN9d617qApaW89f/soXtrbX33gtivjwehXkrsy37Ea4Nt6IQyUVfo8m6vlOa+e4n1ucm6KNzZXzfdJ8c1tf3c21fGOCHPfeJ/ekL7p+5TIuHGVTTBnWQK62d/YYnCSpYtsMg9ap1JciwYscR9p7B4Xz0WlJ1RrTaLbYNAH1RqnQAwAUuTFoy+KpLqWTHwmwHBn5ISE+MpYDi7u1QyyI1PK2cviAkyblXPhDmddUedJzVOhJqDqvlBvOqU3TkkSPXB3xmpVZMfLQEL8o+1W3pgrHQNkWmp9zQV4lpsJcUZaX7nkfISGnyY3MoYJ/8/jxcGmswFszmDH5e3sDMgdngd7Z6UGvq7Iohfq/kFy9OPmBEQIcEw4yQZAUjGtgqUBXPvwUaCtDDQmnR4NEKjKUl/2oXdbITV8AvecD7RDjAtGlJxMez97lM2GlrPOM3ugjKFDWIL0avUkjibE8v9KIsgTDTnr4knihl4ZkFc4cMIQVNoMwJnjY6QFzzgPEBxf45JAJaZgzDBccYBI04Q+Xt8ff6U42ZP2nlXg+wmxBEqJ8OR73ohcz1Tgj0KZGviEVwURUEBzB9Jx1GAgtW6v0lJtZ/3qgZeZ4DXSsHvba4+rh8jWQQ69Wn5LmuViotFSX5QxULNCnQTxTlQaxrFStTNYbggup1h89vK8k48zUJFFGHnQiEd7NpHW4d9CUYDUjldk2ZW+PCaQrRmYGz9VYz3cfwg0XzVjj2EcNtTHa0HCGBJ8eSY8Hl6LGREq/pCXo9x1GkxHxK1+5IlGmYSiqS2KyGKbw+ptvT01Nw6rIufPZbjUVJLl0+TJR97DX45idSH/YhzGb1tObdGskCSZoxrMrwMa0HEVWIfydfIm2nNEPQuPodGFeloDwJ6kvTZzxSWjyVar0ts95YoXvOHvJ2ag4MOEm/eMBGxUfRb00BwM1D2lc7j99PHXhVdRQmIhFPDofo1StFWvUMcrEIARRBpWaDloKeA/bFWYsVicWyulyOpOQnQYLGxp8iIvHA6ckduOEbSAgGVWz1PhZrA8SEFJMOIMhwHCKhe1Ect0+hhWqqBdLNYR3YrJQqNiDUeWtpeUXAyFspladmKzVN3tozTllYL0ivPx8r9k/aG7BM/s7nEVn8blSK//lv/xb15dv7nzy9OPvfbjzeKO/02ltHXCqJ9Z3RS5EfougRrbcXKRRsMn0zJEwxn1+FICbDpgVM5ff3t1nenTKgCvAq9Zr5WIo/TqD94PBGhpavLBAH//BRz9+8PjBzVvXGxO1a5cu33jlOrd70N3qtt9//0ffvnf72aNVIc8XL168cOlivT6LJNQbNUIJy5f4L3slNQSx5vCk74y2eke7reHd/TBrLS/NAza5dR1tu9AWkB6lkI/5e2Nge51WKI5yMjYBb5SUw3tJGUscK+VkPVei3KQQs1zSYJnl9Gn5/HSWw4qIYJwSC7mcTKGBGRu6du3S3/gbf20QfuHQRdAAA6DofnD/UaPxyf2nTykyS6NH60/vO/iDzqXy2MhkhXt/375LNi0CWvyRIO2P7z949vBxa3vLe4k7jakpTobMUguzF4ksly+IKhbXtbNV31ATB+EEHBSeAiguTi3IrkKUqlbKs/Xpdqc/WarilwXqEvIpQ1W9Cc6GNxGTUX+QoT7jpKpgxGJvwwf4k7CkqA2WY5csnoqh+B4ciBAp1Ycj5lj6Lqd43FNoGJUgocrB16GibgL78JLd9Q0oV2pkunSR5DJWcP8E1cLFwPtJMYorzy6W5qcWuHcmXj5SLA4dRiWwdv9Ims7zCvIW8anw9r+QXOkxCENCPT4SFogPN1+SKxzrc4QQaALqoOVIZCM+0peEvBKmig+NA7QTaQmUF19T0ySKOcehTUpvyrrNfvVGixXPRg/RSeDF7E7YpQMfesidGFo0i6eh8mif3CUwsjHs9C7NDEo3/kyXg6YdNUvoXzQJyU8PoXNH0OU+id68xae3xB8R3fhc+vFo9uqXnwbrXUnoer4C6ZHoMGvszXrTQ9Ynv8eYQkbP0hx9zy73dYsyae0OGLKFfWLRUXh+E9QJ4jN+hpawgrhvzveH+KcOeylN8X6rhRmnPZdpwQRSFoDIyoy7hLhZrfSD9ba4O/t7geDxx3b8hfeNYXvpTz4zEpVGlt3MBukpAhM0igMAT/HPXiSHFM2yZXm5g6gX0uLyrJ/gqdNO21wucFien6MTQY/5DT5+8oiKHJ5668131tY2JcGZnKg7/Hfv3PaUpUNfcYDZMNxx6VA/2aoGMwIUjT1BQlK4ZduUWA1AYTlCMeJwj77+6nU58USDdA+2OXXnzjr8jPk/NZR0Du6MORBT6HjLeoizPF++dKtWn9042Htyb+tAcN5+i8apOFGRTGewuiuHreTxgaZHSwCufzy02+yflkInUYkollDJ+G5hAuSNGSNo3CHBE6HYIUKvkhIIux2rH5Qs1C6ofhRvE+WGF8baQBkcWP2PMfo4soJKb4oRIKzzSG6jHSjW3ML04nLldKyFBBIO9Q32+II6OhALxo24yZDIp1vGcNn0d7YPENUpdS7Enh6dXr184/qlGx98+4M/+f0/+uT792cL5fJZ4aR1VC4qQYIonZHIkFahUrSdhjF6zgFd/uGVUAFGNcDwgGRFYxvLF4qfevsiQ4idCs+gdO3tdLoqvfcHm9u7jFtbl1pO9trW2tzchV/9s7/27s98WrrUbqf57e9946PbH7T7B3CPgPNf+dWv8fUQfUFxjTgQR73IKeF9EBpW2WGPpfloJQXK2er65je/c/fBw7VLyxfeeP2VajF3+dISutjc3RDBSrOKeRpmTQ1rIiGAxux0ty0Gtjx2Ilc7J+pDqaXyBWkZykAaR5AOPrW3lbTPp5iQ5UtLsBa70dVXrgmWCq+fPNx6ykK1tram2vd4YR6aZa1sfa69tr4q7hCxr9anN7aaDx884bWwu9Pc7LfyIycXFmfq5dL62entD36cqzb41bCX1srTaMf21sazZyucTkqSsjSmrCn8xgf91RuvvfHq648fs4s9cHy623s8EYf56A4OrAXRpVARKc5VSbVKCD2V5BWYTaTqnqokQpuUHRkIgYJRchuGKMwu2uNwWV4aHTyE9QGu9EbUg/AnsYmCNLFTgS9Dy5cvUnjaVfvrKQDtuz75H7P/uw2RsfzQdFIADofKgOEy0EVfMtERJUYUn6n6iZ2Lb3gEclNi0xmenCvtrBoKR2InJM7Ac6Tzz/3Hi43MlQ6WzFryW+ovECvU5rSbDxTp/xn7bDL6MgINyCsJW6AxgWfTuYuW/On9lfCI0fPqd/MnBM+SxEtDb+QJLKnhJXwd1mUBmmolhVCXen6OSQNN0rvpMSOo4TUVLw08T2cStjGcJYiOUWUj1X82Bay3u/FHQsq2zZdY7mQGi5vJTwBqiGBOijPRJFIVJ8oHXcRjqb15ZV88G7oP03InPgIRuRnYCDdhEmk99etO5lthU8lKTvV5+E9ARcFm+tWzsHN0kfgDzCm2nDpWCPD69hZv9XK1jmOhSySGs00RvfsMo6GB8ayE7YxHHgZYpYya4232JVwfGoLovRRPSmRxEeepmymvfSdfYYvAZrw3TSr7ks3C8F/efM5zJEhwEwKOHQCC6QrrfrocazPGgXKqEioaB8OWQljG6mzUGwoECoS0irV67aOPPgI8JCqaBwl5eS2L49nZ3Hbf4o/ffIV5nROJB3F2DI0RwpPSGBqetcKnon2GH85jwTVlex4jDeLJy5uxVrIqJaSwvJHyQ4p9ozm5eWPh8sXpbne7WhnrHxxLY9E7aCJUjXqdhp1Rhv8gQlOtMBLzzkaWJn/43t1H62sCMur1ma2N3Vduvjq5fOnZtkQXPMrIPcmDxvtHuKQXmx3ZKDBP5mBT8jnspb8p55P5VGOrF4uXgMeT1ioSCaNMPHoZyyJLE0cG9aocCwxpLDGSFufW1kpvLLWaTOniMct5HK2kB4eHe1zThznZLF6WClaGccHv1Wm8cpTwwLvfu/uIArBGZVSf3BOjdKwgk7iZQQMnrmhWufH40erS4vJv/Nqv3/vw/u/+d/+9LAzs443q7NhxbryuUE19Z4+CrX2o3DogQ2vlYxURHKTjfG7uRqVRBVe0bd5ANHEOIITN7S1CIdDd3NgHbHCI7fMFspNkwTSP2kPi7d64OV2qjnWanf/+7/3djb0VhHosP/Lq61fnk2x05dplahMBH2ZPudrvNyGMjDF1h+I/rQoNULAr29t73/nWe3/n7/4P1JMzsxf29np/8MPvvnrr6m/+xq/h3zF/AZ5DIjS4htsaKqSofDRRLfO/IAr4FcCGg5DNEadFoeXYH2MXeYj2IgclH5/IrlflnCpG25+Sm3Dfy0vaWq7D1EuLM84nJ64owQkjLM1evDBVLkH6cqQQSsc6n38XtEsIsbW52trdVnmB2gP/afu5eSzNz8hj7yYduDwdldDARCL8zdVnSpOi/TNzsxQtUpepZvDVn/0azun3/8HvlMcrY5xiVDdWkrVak8BeFhOvKIxPC9eSETSSLFFVB7pTCNipeZ4h2uwcKwz6xNQ0XQVHdi4ehgdEUS+YRCJtkRWEY+wTSzl0RErm/UqZr6AdJUToyBDAyOKJ5IPqCPmamZoN/fDxOfuZnP/qlXN4g5VP6lY/jNbjkZKDnCb1dVRqplsW8YDV8zTgjtIjJA29SVlsyaSCZDlJdrNzwRMGbYfCaXNYISInX0DMAIVYhzl2dgkOhktPKhgKJkJmaWBkiwycNTp66eJFIBioGgqxuxiXM2fYikd0NSdQqETiD4BiP4C5UmW836LcUShAQVuwjTA55ObYw+iIsuOZkCY2NxGxZD3DW+J00IYgVVGOgBrWpMNd0qgsGYbaqYYXrB9TDd813B82DC2D7KTiDNSJljA+4XcTyfSQm+ldEsO46ZRFYZmov8NwRY6ke47KzE4E2sU5k28MG1VyFaGLyNnLQNOBPU/kFZIOIcwquuE+bxzOqq1xPqBdIO1Zab7gWRy7MArGT9KPxfQIDGsW3uBTh2Rza+ALTPjNb3y7c9hHq9iZA1mdDd29e3+6eXDp4hXd0nWEqzdWkHqwVptfmAKJlB6eFMKIFAG7ZjHfyRVpADDpb731hpQE8ke4T76Rzo0GoEhTZ+mH+cFXQZ4xxEoExY0ldYFJhNd4XInyW3AhnGhTxI2FBZieDduNO4sFCI9L5U7UFrNZGjm9t2/fwUSNjW6Q6tgSPve5zxm2EjjeaFnWVzcUDZHo4nxwNlqyR2F39V45Rq0PAGNzoWywdHz8qpUa3ZRzAozpzKUutaexoTK+JYcRbwQLiC9Fh5e39joco464S5XUPujcvDbxc199Z2X9dr2cP3Q6YPzhOkPQSadblBX16KzWaPQlVOIpMaym7ZO95t7m9idPVnZHy41SaXq/Tyk4322elutY+kFp6AwqHRocRJxBOIBEQgwu1Scd/gDH4ZlbV/mlaIRYMbHUkc+2QHgPLaWTRUcYmkf6qZjRaBRbUi44yirKWcdfNSxZkGAEmJ0UyEuRafb0+OCsRRVzMuju7m0gkMcnVrw+P9fo9ZrHJ92dvRF64ObeirDo07Pe9MKMZHYffHjbgK5cGZ+oFprd49XNJuGcbNHa74rPax1E9NXbb7zd2m9994+/e7x+Mju65HCJ4zw8H+0Pj+3u9frHI91RiLFCVhMcXKaMloZyZpZkDL9kLAVg2NlTM3dLVJcTijLRAYAoYFMYFvl3ShVen2ko6cE8Kecy5CvzSbmW29h9cu/OR5//ubff/dzlydlqvpIXu0BeQ5sV0xnu9ePkxy6fc8gPmUEcnMzoBSHGVvK414eCpDA+67VGf+e/++OFC7cWL92YWbq1121//8f3br32OmrEC6AqqZCIWq4rYT0ecyKIDtX6FE3BoN0uFetQrYoujjoETOXiYOMYI3I8d3LQ3RZPJcB59Ez+6BVcEOsNaEQPgKjpB6KL5GSBjeOiTggPtdHpmZJDwxFkEjILbRCAnwhx+WRpf2/PkSH1CBzcaR6oj7PTpELcRa17hxAs/bC06JYNWhwTAsB3bvXxfQTG4ZWy7+ZN5cBe+5d+5TcGj1eefnJ3bW19el6uVHURj7c2198efwftsfLUTxwzOJ4QYyQQAmzQOQzp3Hk7/Lu1s62qqmMFjfNOIn877ywNCquSc3iizc0uyQsKMQisZAifmy42apWuXLb7TSKRkuQ01kFjGIlPKTjbpUapIIaPj6x4T3o+KgbBDbh6UEykDd4R1aLXhm7HChM1ZrbQviT7rLVynlgYrWcxLLi8M9IVqCcx1L5k35NOK3x/oUo0FrjvHxxAKIPtUGsSMlISbCg7MuVAlFMRcFpHBCB9neokJC04KpRvcaEh7ocmMZCvpPfxosB8YTImXEcsKihMNAObBkWmUF9UIrhRWk2dhrQSTyW/OHTE98hFllhOz5K4vTa0LSnGCCIg/QbtyYSdkLrQrcDFLycbXaQrHQC3qUcM3EtDKPQPSQjfDG+Ektm6XwhAHkrdpDEkechbPQGiTdZPydsx6J+pRL9JesveFX+H7jQ4EZydv7LLr0Gr8NqeQTuDUsSYsRUax7M4AHuiT5MK9vKo0+pystjb3idjwRgIvBfV+vUjxnAG+6NjgeO0yN2DJtIBOES9xEIiHhTzRU4353ucsZtt5RHwAyiKIVFP2ot4W3qp78ZhijGAn7rcNF6UyySNNNbWNiVKFj+FEGGNws8TZxqhsFl/wzIGdR/ef2SzWYxp4Q3YeiXxbuzS8sWN8fzT9ZCuBGZKfbu7tVudbKQBRHCVMH4+clSFvLQMD+xZjUjFZgzQGScXuxAieuTV4r0WkZNslMl25bAhIHyIB62Nk+P9xWuLFy9Ocg9UGBZLt7W1K3WEZHMwCg/bze0VGeT2ZItVVX6g4uD509XtTiTsG8PQwbBbu53V3cO9QxHI/fbReGV6GgsWdDIqa4S207jOojqUHZMnSCQrO/NwkbJIsUVQSlQKUTggxB6jYYGMVfsjigWIjnRbYluiQC5wsneUS5GInRuoRL/0XnQpwP3kdHKqppQ4HDG7MG3WKiTXGouV6vQPfnT30dN7N4qvTsyq6CIh5ODRk3UYSGql5j5VMcT3tFw6gAHZsBRJRRXHy/WmvMGD0+uvvF5tzL33/p1vfffe9FBFhCYJjSMSr67J2bnZ+Qtztdrl6zeovdRL5MJTm5xk/5EdDyTuPNtgH3325Gmg72RSsyGhdx0aIloB6vrkhO/ATFqTV99849or1xTIblQbDx8//uDD9zkHFKrnr7x+YXZuYVgBFnFanf5JJ6EzyiSgcnwISK1JHHI4L8V7OJhnXQzhkEo1yg0SLlW0f/qU7bYwcjR+5cZbl67c+Cf/6O8XK9Nf+tlfzM9eGuE7UK4e9mlN2akx0EPStVen8zTpdgdrgObbgrAXZoCvmNxhPzw4zpXfatpPOQuRYOp5u3B2XuxLTii0LHywD+V2ViLxJDwoA+I944pTDGroc8vVUJNieNPg6WxSFoRhGNSZpHgAu5dPzt48vAUY2D5bvYPdJklhe2XlKR8T7DInfrgI2VdB/mBbwrLdR0ePH3xy7/q1h69fu/Gp5cv5YoUHPbYVaEECjhh6zscuoh4QW6xTxPvx1Ocra4wmQt0ghw7ZnzkK3gro3d3dx3OViY0x1gidohiB3yy1JeIoCCtiuEEmLIQIkM5HR7ClGDDel0BYAWvcPdVCaLFRbtMPDRLBK4IbWABlFAuPI0jJ8dKClJxWoIuopQTQ8S7LLkcbHIx9x7AFuTI4E3O9/OJ7dt87AkRS3gdcA94B6NoDJElzoC3LotlCWlYfdsxFuTDJncN+o5kzmD6fizv6cXmLK34LbBd/Sj4AKqJlDCEu9+OvNKRkyg34DthEsYJmxa84nviUAMedIIuDwPTcelKJ9fCigip57YM4qEvtuPC6Cmweirj0iphzEI64/+LNQsPTOkB2/htECzMQ3SK18S98dr3eGOJP97M+HB6QaeRU88gQFJTmkSaSMDg66Y4BG629ccV0sBixh5pZ8Bc2uZRPJMoYxr5EG0M0QmrIyMTGFkyMw2xaHPYIaeQZ3Y4OdzbbkqpSuXgFHQWWzt6QV3kWuLJFIwTqE2PlUkPo23/yDX0r0RuJsI8k49lLrwHIsbxGqLG3g0WP+55daa1erNaLHUyLEDDj0syfsUjpevkUvoSui7WBPzZ1fHAkxBHmd/4mh/3d7a3lpUWz3NreELaothAUTiLEIU3UGzgF5gHrMD21GIojSctrkftDlDAGmIOZN9oP7/JaCxMsTtggE1VHTFV37e7NTEzmhgr37+5VyvjcJVq03/rNvzDONyIyZUT0/sHudntnC1tYG8sdNrutnX3hLPtNbwkSJFaSV/TZ0Hh1qpwrTrZ2EC+UHG5TpSZ4nKTew6Uhl4njCFSHAwj+FL7gqdHNDVXtAZMvYspTgmEshW0ElGWyIP4x5Fhakcj9TxGQkgVKXnN6WI2yelTyCCZEQGlDr4Ab2GlxmW9euLh087VX6VdHpV3PVY5aR5euLD9dWQ+722GPa2iuuLi+9aSvOsOJ+q5To0Okr0H3QBGjAsVXDIPFjPAkmcLw+OF5/slWa23/sDA9dTRcteY3l5bm5hemZxaEEhXY7aUqLVfpXaim//Dj99TbXdtc6xJtofC2Irq2Jfy+yIE+gQRgFRmbw2OXyxPz86/cunHj5k3GJ/Tm9r1PfvzRRwd7zWerq/tNaXlDebbZ2Z69Ons61JYYBIpCnZDX8NmI4qgoOpVNnMeAushaLw0jRaCMXybCzh9FmPgcPXi6W5tYnL/8ypXLt8ZzpU/uPJIqZHe3t/f7f6yQFlcP1gucmZQxfM1DBKLPiNHuoPnoCx+CQr7G507OFqdA9GuJj+cQ4Z5jIRxgji0KMPkQz052ASpgzzAk+BzOkxuCbbVJcKGfngMH4O/vB38CNkDGcFDLhAzGSEi+BBxE2vjxUeUZ5XQYsIBMDY9dA8CiKZWMabUPV1d2tjabW5u7Y+dHG09sqMSvQ/1258mDj5Hr2SjaOD4xNweNMW/Lt8vLc7e5mxcI397rdA6IMlQz9G8qkltD44QPSYdcVylmsbiIGFwja6JhO1BOWVEd0KBjuI6Tne0IjpydnjVapFoDPzmkkrrFNFPudsaI7NQ7sESiQqhVwxIWOffdAmdDYoSOUUdqNZvnQYJWuMQL2IjCMxpFOTmYiuAJhwsEYAeBtZL27AUJsftmkL0p+9RRNEqpVMO2D9/IjsXmGkk7ulTtfqJ3ZPwgW2WI2NZmiDsWIyhZiBquwJiBR55/13/2rlAQJegIkSKgJNBc1kybQJQhXT0fWKbgezm2QHjHoQDNGoSzhOzXkUiUlIP/8DOVpPmG7Q9n8NPkMOvEg7Crl3pReleGiEP+MZwYEQktyWvxmailgWvpEVfWycvPRECfK9DM3dDTTzF+Pfoer3uOVeOm1dMsNLMkRq6IQRWicz/JGJe1dw9+i08ci/+EOwkI1FKtWemmc5zXL11cTkduTKKHo0HXCOy6zdIPpHgi/DJtRJgvhOklGZEf8/pTCiIhFGWG6ynpjlot6ATl894YfAwmrX827Bf75aZLxy+/WIdEirwwKjtnv4IN9/WTYMZ/g7tIs49MJw4xq6MVrQoDpg0fG/vi5z/P4ME6zX99+uYrcTxGRuZmpr/8pS/A7BABDEFrw6nP0TFOS2dfgzZA20lA9Troz/uSJY2lOZY//D9iZblND5/wlugdDJ1KK9UvFsf/2r/3b128MDNyrMge9cVIsdGQ0F7yqNbGFm3MUTOKgJOC6hPCP8faSL94IHXtFP4bGesdHhNRek6UrQhmbAzoJYqVkcxgQUBVaCJpgA+TLt6Dp0dNmd3PR+jMIuMDizx7cMzEQgfHGQvv0Fo2iyd/KA3XWLFz2uUtud3aTWyN/rEpwfd4jurQak7OK0015axv722fnu1I0392zhe6tLkz9uTZo+299YnpyuSUYsIze7tNNjBVk0ZP8xYo3urlwV/y7yjJhlijLpxeHB+vDxWm3vjMtauvfbk2PYNSQBztTudDou4dWYmbvMAePn5Ky6L+CmQM2FxAIo7uwIjCa9VleBDZnMpaU1M3b16/deuWdHyekZmTy8/3/viPP5SCr3OQOaYGfgfk7JkKAPZPvvH9D89Ub+G9wZs3+RmGSj8YfxgPq+pgB4ABNqTKxb5IxcfvHG4U2CH6fGNj62iodO366xOTCw/ufnzv/srFpel/8k//ZGPl8fLy3P7OJo4Nuws+q3DXWCjq5WIQ1cTG0ZcohvgxXkQFOWGRABQWmJ6Z4Mn0bOUBE6ms8Oenh/SgzDlSO1+5ckWGaGJl+O6q5Tg9bUiojq5cMgdCm3gsmSWcbAuFUAVDByWL+o1AurG93Q2fsSOEFDVE8kUkmYNPY2KGWhENahTGZmszUPflucVms7O2vvUrX//Zg86vU5Lfvffoxx/efvQwMjs+dpaxFFGzo4iUEp6tRn2qpoQ1iYCvPw5MLiaZTfIhOak1GslFsSykLumHmeZCKUE7D1zPJPzkti55f06TcqVotZO5ceAhFDrSiSUMZmp+8pmEBBtzYgDQll89eGL2di0KmUCmYRIIC4624d5uxijWKe7cRiBPkQckvM9iPNZQU9NHcT766LYG/yPpyvZnl05cYM8DBgFZaAr+sEhzVy8aMQgT0paTzT+uEs7U2Uv/bHMYOhGtDO3am9ieJLMnnBusimk4jt7lyGQNonFgaWy5rxluz0YRDdwxmPghiSY+46Zf6I9PY2DBdlHEBVqIzjUI6YMQBQOEcIM2vMQjegpyGPq0IIQQbOAV6CWaPX9njMs7wvqlxwCh8M2FbQNLp89A0aFo1HnwAsHGpxnFpIIrCcLJ4KE/wZh65toZk4u7DLfBO/uMtwUlilX31acFjx6SUjD96QmDtGb+H3pPRIydlNEfunMCaAWYglXnmlYg/PjwYbkU8pZEc9yKkkshmTKUTL575PgEnnX2jFDGSFWoJ6t1QAJcKNlki9sLeh/jz64Y3ovL/Rdff/JfM3L5Cd71Ck+FYitd7r9slz3rdrBnltFKUS0lFYRoKoXYbSIR6sfvvW/vmNOAXHN3b7tUfvzsaYTOcEzGXHU6vBn5DPkVRuC5C41ksAQ+qNJIH1hvqg5ILxiARG4tqq1DBthhxXl32zt8ueWK+Z/91X9zcY75+IwuG15VaYE2g2v1xPVrnUZ9d3VjZGpInkDptzuIve0cGZ2YmR/KV7vn+d3OSQSr0K2U6KaoqYfxw1RRljzYaYJ8/AMWoesAGDgJKl/ZbUS5tDv7FqvA6iylblAsW65tALTlYc+VB4jsAvKsnl4CI5THcqdjzb2mjWNF0dZLYsnB2DA3+s7nv3hr8fJFnlq7dEbb+0PD+bnZi/m8eva5ZysbA0hoaLC3v1mu0d+XIBVFQoSWMgjQDRId7ASjl+LpJ6PFwdn4ASPe0cl2q3lyzJjVe7jxh7stWYT2ZFagnQzQTpMqjRbSEgdosnVb/gCAY/xEXTV3lgGIe3Z6RqKvq1cu+bTdz/b2bm+scDR48uTJex/8GH5Phq6IWy8Pl7EsIPwwlDSj0HH/40fDwgTCBGkJI71HcItqQJV4D9ldl8VO51Gj4EnVyygNC5MKz7chDknc1JcvXq/UZ7n2/ejHdzY2mpcuXdndFx41/ODRDj5pay8EIGogHgBvv/Gp6elZyufFs2NHgfO33lm1GLqZlviLb+50pueXN3c2/+APf3TQ3Nzf34FlBQ7Dksb1+uubAo/u3FmTcpahEbliGQHmhipbBEMyrwR3pG4XeEKq40lhM+yxZSF+xYk4o+XMZnWuOAfLGTjnQ9hp78OwKAMKJ0cfZcQRHxeevbIXjR2NlIY/+9aNX/zal2DclVUBmd3f/70/nJ6p2VI0phNWlsOd1pZkYrligajD2e3osHfaVvM6UrxSicll0TvvEXEwf4Ua9z56LDTcCQ0SAllAFbzWoaC0xta/vL6+9mj/kTM4Nx0FL6V9CityMnWDAMMeUiTu7CzyHvNsEkSsQuuxOTHg5fAigSycamiQ5lOwmNIqeHX/kzHs+Oig3axPMGPXQ12qOE7APG/PSOjC+yHwacI58ZFAILCV74miOOrxBSDiiZAETMREtXT//j3r/Morr8Is0IgFz+x4Hvc9OqKsSqok0ADXJvQVEAbMAvFjClNEvB8d0xhAiA5JhPAfzFk8EneyC+rxRTNPUbe6AnlrE3SDyg/DEoypcSfyExJRLBaRy5cQ3dKJClY+yFdg68gHk0G8XuPSm45itulyEtzT33MKAp7CTSNQizEn4hK4+3nrF+vmJ1ur/+eI28/YxcCUGLcz4XDRPj3lz6yNLaSE8D3YbGkJYLyMdAkNSTpYLS2pPiyc737EfHI/IsT7jubErKnU1JPuSV3dN2NwLTW6x3lz6d/jVi8jIvBbIpMhgNISqDeFQ7IoQTyyugMqD+TDiSbGnhSnaY9iXWKZ0nJkX3ymm88FzRhb2iA8jJX3XhvtDkj0nQxuikHUAxyCL7CJ2BYQiM/+zre+zRZl3fwKr8nc4fRiVLGNaID6wjqnPXMMaA65Ah9hZ+XELObZmXVrpl5k9TSLU28kMTJLn+1R/OI7jg19r9XGJxtFKS2++Ll3Br3N5u6aCuMwttClTnP/uNcmlUtNA06wp5cuX79w4Wah3Dg6z+0c9Jr9wU7raKt5ZKw7vUFbnpjh0W5oVgesPzR7AWwhIcfl/VbE36T8ELzcDvW92Cz1pYRT9qbGK86WkTsWVjqIOJkBRmFsCMdOioFz8zwbO40EtpOl8ZG8s2aLQQi2LHBKrPn5uHpU05O1qQnTjCC8xtbW5sExH8XhrrQJ/d6E4mFE7Pc/fE9qV/F3so8On7eiCDC2Z0jYH4ZP8XEZT8r12alHjzbvf+u2nE1SecD4eObjHOHTeRkt1ieqM0Wnl/YtZksQYhqM+IqeCDYINZzLp2evXLmBDMLLOGJ7TJ/50aOHP/zkNmz+VIHp0XP1Nczx/sbq5tZmIeJJC9j2U2k54lDwpBjg7ujDulk9vsRBhicbXSWXpTN6LTwKhACI0LBgix1z11GHF2VpcELvLXSvQ4idmZlfunxzfunK1s7+97//Aboljy0P/2J5Zn93E9mTzEnkobzot+9u3ng9f+PW59bWVt1HSIQgg9vQR4U8NHz37t0f/OD7l66+ygfzbLi8s3eyve2wnq+stRzN6ZlCtdo7O6ufnm6trZ2USqebWxtvvjnNzG1gqm3s7DTPh3bjIJ8f1ytKLJ9F9VeBkDQb5SLndVrSmakJjeE3n46IXPuqZmHUeKf7dTBObXAkObfsSf02oyW0XBhIRcn4XVZosZbP165dmR++lv+lf/lXtlfXHj54zJHk6bNVmlVOnAuL07LDgH+GL//AEp87FWbk9EVKHNhaJc+39qApa/8RD1iRMPK/QEomEClqHFXRMlHEhqklbBZ2kNByRBZT4qfaUCjcSJ5fyfU21CHAX1GC3DhdttLXgOaYUIEc9nGG4igkIGY+smuKXtRFgMCFmm/u7EYFgl5IeGwuXNaAnC2IhDukq3RgMtQTixtnPuFhIpCfoLnALIla+LSFQsbanQMk0nnmVrC9vekJiGN+Zt6stNGJB32CLTsUX+JIJlKSkF4gk/AbRGN8jVs+gsP0zRszUhSi0/OTn25Ey6x/baLP6DVIVvanFyUFQYCvn0C+/FjBxieCFQ/HFYKUMTtrXAhBefBtYXBCD7xdjwYS//wZX5CvIIKM2sKH462YdyPF23rMTF1pVtEUsqB4jB/T5ac0ssD72QUZxVADKwVZxUzhMUwtJ2DF+XNL8GBBDo0g+dR4jo3B4y1cesBvGWP4TvZgIsgiKC4OXeO0PkE2tIQjECSozKsAJK+8MfSLGBwmTd7ewaw46/zFZTyDStjJiY0eDNIcHVKrxvLqzaet91SaprsxoewynhdfDTwWzKdnjZm9lJbAdzJfzDTFXzMaAutwmHS6AvWQOCNmz7hsiQx+rHP1yQZ+KIl6KlBGtOnm1jpbtzehfmAd3FNMsOFHvYazY2wdFTSLs0F6kUGGbpAlMOAoLT7KEYMGZcGWsy7VSqWhQe7ihbnJ2kKlKIUOvg2fF3Xg5GEqlwp7raYaksetLugQF6JMLc3oSK6jqG6LIjBfinq64+XiZG1CeaOSRIMjR1z/zscmp2Y2draDFyMBp39BrSMffPCRoM2+sw8qdlsq10YiNZ8iyJB50gSGwSLZLcFw2B0joSIoAUHBnFno4JZoTQlnGJXcgLMrgfgUu5wvV0u//Kl3VAGLmfDxGQpnXfvf2eeTxR10SppbAUIyqHf2j3a3njTqU+KLiIv4SetKocguE5sh1fpgaH+3+3R9d2Wnk4xxhfN8WTSx0ks6NwqH7Uix5RQ1xRIBzEwVvuXyfvXSRQo/28cnaWNnl0wQQIj4HEZCBMZRyIt8zK99cnpiTXlNwmkxN3/jcgqoOGDjonW1HNIZwJ5hh3Z6q5OmgzcPTe/QKAwttb5lVBg5Vjg7XsHGaRHAKPJqbLzC3yI0jXstfmuvvnFxcnZpemb+T/7km2pZvXLztXJtSkZ5e9o7GUPPaItOhyvmttfc+Wd/8N3hkckbr9wcHI31D88HTHZc5igqlN4sSeM08t0fffSLf+4v2HHxC6O5yeFRGt44nbaSB0qrVSI/1eqiyrZkGjD1UukKtGsREvqkTY1FkblT6tjW/rYanw59tTiGNPLZRbAX5+YwminZrJQvDa69Q+cF6dM++PCHV64uFmcn+VZDDcc8H/menJxL8Y7rQzwgDvnfR0abhWKjXJvYXf+kXC1/6rPXmDkvXZn56s9/jli8vrn79NlKWXYwOYX39/CUrK2ONrqj5jKskxfwK881b/GzI4ZvGeBbB6LUzZ5ClJM5pUYwvpwVugDlLHwFYY+jgbQavVqpgWKhXkPkt9DNHtlxfJX5+jMOJkLHTSiQF0+m0GSINrN1VMpQwmR9kkk97OUQ7shIvV4NQIp3ceiDQ+QRLjtN9PixmHqEa1yx5T91+c1N5weiyfCXAamzd/XiBRHlEB84Z2nA7SpHBGNqmXWV9eG7y/fnuMN3qCu7+5Of0jfMBl70xauzJv7y9vR4dPJyeN6SjUqzTLpCQTMKF+QxKFwQPMg2xhNOCEEWfY+u0v8tRAwqzfqn3+VBawAhxAkxnJDyEFknJPh0krjV1L9BvSCjQWytb7oZ94OKDId3QPY6t+yrV7BA2AAvd7mRBhDlqey3RQ5+OeQnbjMnmA7cnMeBO2CBYtMjTnIQE3QF1qb3xD7QD8BcGtgUYCFco3IcYRNAkEDtpeAM0eDRbqc4DRqnlvLhIsE5mvBy6cq1qzC+MvPcsW5/9DGdGEQp4WwxHJViXkZlCtlcfKIGBpOt2MvPmGP8izW3EUGoYq5BgE1EM1/chJrNdMDJxyWrXTKFeha+C+82xgMnKZejJ8dDvf76677zcnT+v/P979EBmot5mTsKmM/PVoT+12polc7tQIwhnQp/RvariO0zpLQvwYXDfiE2kyNxrpVyfmt19zd/4zda+zI4SA9b3eluJe8sBfByag4Wc+OrD5/trAnL7E/EGaQUGWsfDR0O5WqzC73z8cN8fazcoOcYPW6GXCVXwKncs+qAxDmybobhCkAK2BedS+vGoCwD21E9P8xFmMWi39luSkjKc5eTYBie6TLtOibmtFyRWAEYsHiPVeps1qOKIbW6fYqg0mhR/Fef9yBSMXwudypT0GihLBdJWfm+0aHt7R31tlgB5DhWqGR8tDR8lqfSnF+8uDB96cHjJwf70vSKguHWW0USRKicDecwbydnxbWtre/98AGeeHJ6Ep5Ryx3REmYrN5mZRQRjZIHCB50Wx4SE5hYuzoMfojAXTnhW2RfWLIUzmGLCKz8QK1gMBgXPRLcjY6FMFlzP+CKNlfKTVeWKjiVHUmsyNO3YnTzh6WzQD8t/KV88H84jew5uoOSwBYQyXJAiizx1SWhcw3Zlux3QAAAJnSlfeu3W4yfrjET2YnH5IlrFuhZO/OdnM/PzPEOJ2aLp6B+4s8sZLisSQ93VVxZv3/6k1f7df/ev3ZyoBcMXPrUqsAzkCBkeHxo96MrDj0rKcJQr1SbHi9vHZzuUU0aNF2l15N4s9A7Hjk/LZ0MVFFGq2OOzwoDrBwE8xjI8OGT1Yb86nZ6aO20Sr7cAxsGoWO+TsdGeNIqrU3StMnxSTqi5KjqgFusmYVHlbH5pKuHCEEigodOzPomdAwQbBdUwv64eB8VhYmX7fHu9VC+1H7VV72ajKamNUijwFK1XKzevXaWU5mIomfXaxtbjp09W1tYGESl4JoWmnOvwpZeXJ+qkxtbBLlGDUsDaSufrgDhqXknVR9nHB4iEDSE47gAe2pFxRpwWOKfdNae+SlUSdrEES56ZKhBBaI5i5AzHukGuYliF7iR6xpEQ7u0KGFZI7XxkenomeN3kOg5VRwS8Wlz93n5zF2p8Tq68FUdsQDbe4jpvQdiC1wsclOlq/AlHyO129dpFtUezR+AmZaEBZfCyz+nT83Mbw0pckN6Aru/QLUdc2BYug5ICfydMxwoQv6InoWSIK/7+yRUdGp4byIYrw5uwAUzlu4EZuS+ZJkbjuEeOMlMrHkmtY6UJD5Ri3kskNST9SJftWTysz8Bx0XPIT74hENkXDElozMgu3T7eUq/Il+noAdpViYBQj6l3eNwMlBqL56cgokinPh10ZMzwrBLMK9ulkhXoTSgN3QkPb5GzkeJHmBT7TzYjfdJ2xFN2IW0JXjM2SD0l5aDGB0N8PHmOnUe9hKdrq8PDGzpv9Tp8GEj0WDY8uX/qcdg1HVCd81/m/icmhCDy448/ksmmMaVE30ylUecWctDt0NNqCQNka2/wVia9HLFMS++WC2lOl2/+a6FxxoAPay8VqRW3Mlbe7CAiIletrIbNMAc2kD3eiDV3xYGUumZ7W9gQWCcXcmg2SLoYkIArlzLOU3FI5LEO0DhGgOEyb0HJQIIUbWAOGSIxUwt7IZZDgB1WkdYkmBUYRaE1SgYcOh/SbpuPx7Url69fvqDWE70577yJRm1Y4j2XJCCiQXORTW5cns2zbVid9arVPxwtqRElQnbm2997/7wyPVrsKTbRmJznN9FToyE3/ujpM8hXUQsbThHVuDAJn0Cvie+xpGRWUxCUA4odKbUjhqQxknP6nLx7rOAWKIN3wn9AQWaTIpqF6j8/LlFCu99xX9lHqEPEf1cd2PrU/MXLc/OLxZnp5upjBHFrrzXoHDjWmAJlYXXV3m8ddZQ4niiO11s7g1phdnDwDJaWQ7QxO/XJnfvTi5f4vnF5X3+2trm39v7HTzp9nOwI3p34RRXjfzzk5krTFspGODdAWz2tKzida9eghb2DPfan23fvBK4hPeOgVaetEvUgctg4FI35Ujijbjd36dg6hx0+lAExjLCsfkHmub2Q4uPgwXT4Kbqqqck5uxwBjnQlYdGSLDe0XkAAJqVh42BGh8bgFF79h8LXeGwzyOdlOVK4Gprunx4qVw3AZJOS4u73fu+fiOsCkyJ7hPCRLQUL7OwejEXyuvGDTl9c3fzSJeXY/19/9+/95b/0m3gHhFHEEuUq+438SVMz047M1s5mvdGwS6xVNJMn4acthlLaMvW61i9c/BnBUpgSP8JvO7v7b7zx2je++SdSGloBNX1U5+HKMTl7+XS41D8qSFRBECgVsTKC5fsWRwDTyuMDyXILldrTtQ6IkppmavK81dlb/M1fZ1g9Hpwyt4pucES5JvJ+lBMkyusImCjQuYVHUmu16ZR02ypSPiE412sS/YUKV+orruJXFxduXb3mVOyJ197e2u8cfPfj79x5cA8NlL6Pco//Yb5YnZmfFddx6FyPjUhaa+WhSjhO8PrOli1z3MM2waHGqd9pRQ5G9khHWLqsYE+Zco/kMGSGiGYwIc/jiCBUBWGMPBi6bjpZSGZnfXt1dWVpfsmuIVegqLm7C8ZYDpKXPX59pMr/i3G9XLGMSVWa+NPAQv9j9tmbAs/TICZ2nqsAoZWa0rkxSuI2RCN+EPpwMjM/A524AKRLf76bqm5fojrfsysjMM9/CGVY0AdUwku9DtJ250Xb+DXQVghMqWV6RZCrwFIuoK0Le+hdOGnYNgQsNxN/4Onnl4dT+5ipDhEYn9mVhhoDdmWtnRkkmBxMVMMAByoM7jkT3yx60CVnAAUMU2nqEHrXRXpF9B+H0pImCpRcj86oY5E+z8Jbdtdv6YkzptdiX7LnSDGZKC8kG5xEbHnSVXq1IXgQuZJbC0K3HKFalgUnmPzD+fnFyEc+LolnOEDPzs/rSqa4Rqn481//BTNVco0a9Cs/97Mcjt/70Y+u3byhwcbWJv2b2MCpmUnow3Ri18KO9pN1fv79+dr46/llaoZjmr4ANJIIZ9bQn6RCjkkhSWaImoHd1oG16vX5HrYU4/EWW0k7ZACm5Dvi5DuikZ+dRXDc4olsiVAvJy1Ghdok0YnyxOs0TvgLx5spq6FWfAhjTGwET6uAFB4JgRjpPyD/YFD8YvmuXFlWToJv3Nlhkz1AziI+TSLZseHCHske+ZH8/Pyl0ZE6jUXrMH9GsSR86nTkaEhmmMmtjhrqim5VRL0KJlMzifu4I7CzvyPnN/BI0BhknpI5af/DOwZYHJLEzsYZjs0PZwr7BycTeP0cBwlas0Mgu7o8smDPsRPx2u1Kry3APBDreLG8s99Tqb3bOv7RP/mjz37hy5N7klDQCc5IZhHBpJKjhLAqdQUxo0O/KcFAf5j4fkJTNNm4sL67OT3d2D3olidmZ5YuPlndW3sqbqf/8NnWhUtXVdQlu5CIyB9WHiMs+cWFhUVSMfp/6crVazduyOu4s7cHbFY+Wu+i/+02Mzj6FHxpgOf5/nbL2GMv6HYEbxfD9CEhA14qYncIpTTcyC8dLMxAk8GUowmwC/udkoTBUIrz4b2O87DBAfDP+d5giFPPgQ06nYBz44nZFQof/vgjGoKJiSkAYDvQhh9+77t46+//4D1ETQZCA8B2e1yH2Bia6z4Er1aYAoj58qDVCiPBCAfr02BPcyNxGE86mAYpX0k7fjxod5aXLnKzGxp6zNuu3e5TfaMbzDGHSLwyhpXi8C6+LnSYfMc7/S52kI3LEbO7tGoErU6fspCz3mynr7hiB30mZOfLZeC/2Tyaml7c2Nve2ns4u8BUA2UXN3Zb46Xt+4+fcmuUvT0BA73P2QFPUYeP8TIvK//R2s4zFUty+bI0oOY+MiZePmfMA1wwbyx5lVrITCirkZO64lQSR1avDc4Hi1dm9zv7inTdv//gycMnJFm+DxvPnvLQYyKoVyJVG34ZPKHTNhiEJywUSCA7++74AkIgGVpA608Uo+CU7T+MAXEQApmE6IWQ5sdZrFlNZGDBre7v7MJg6tokBjSsToq42f+UbO4Itxfxod4YHYZfYuBEL3h56T3AIzBRYHbjgBm18cxzwWdomJsQ1ndsdJf3Mx1FOLM6zSlwxOMezJ4l3/gzpLSAuERYUlfZGyFi/fseV8gkIc+4QLIbXpd+iA/NXnZr/tEImrQE2cIlbJ41jtcFVbMrQbE8Fy1T4+iEoQPdgTm8IXR3Wcc8UuIxz2adRM/pSiKEnEznuF/4JfvVsid5yRplDWOcWUc+PRpSbJzG4CkiGgeqgrnBVYovtmfpRYHoQ79yPlqQaTr8JM+7JAdsUowmBpChY989Hp4kaVSCYtQrs1b2MfpJ5BPqk/hSNlgWKTcZaDkpiA3UZyjFywUqI2yXUfjz1muvrq9tEt6lcldKQ5YV+f5xncHZIwMR9xPvSoOMeWWziy8v9sqdP3XZLIyvV3CgMkElRjUwBQsgjwGyxCAROIvnb1KBQi7kHhk1vQWIj/VGscAp1WHx+is3CLtOOABNNX6OjNP6WCkLgru2FQYThbBpa4C9fQwKFVQ/EXA5jcLhyneUCdKPDTFyqTFOBh4rjJxcv3pJnBXJcyAD4BgvIXqzQ68XLr21uSkzabVYn6zNLCxcbrYOZ07KZ8V+63iEtu8kV2nMXnzaeUo/JJVMt3MoL1+1Ninc1Vn6zne+ZT/YCiMLU5AsQwjZnuMLXp6ESBM7PNQQKhbbFiaqiL+CEYL0RgVkCdNJCFz10H5Cq0jUyOvPMAZmu5Q+Q0NPnm0sXbyxutniJnXv8e79Z7/75jtvv/Pum+zkPPL6p63W1po8D1wjt7ZbpbGqZC+Y+gT/5dmZuRvX6itbf3QmDJo1c2xka6enq2cbO6KsSOyPn27Ta0WaqFzu6tXlz3zmMwtz8+g39+O5uQV+Xzt7zbsP7j/+0fdonkVchdxq100zNFLkn8BHAHNGYdyUvYJqCxMjQYd0UhTdA+iZ7Y7vj62K8xWmKtKmzYuzEBqLsHD4HrsJJVD5Bf8R9kd3wI+vzhECb6vpDN3hedjqBAXa3tj0T/VeXgnIjAyTfPy+991vr61v3nrj7atXLls9uEy3Gg9OGS3PpTMkdgfoJHsHOOwzdjx5Qi+SDjigK4hzA2gIqZzjvNh297qv3ZpaXLj4/siHEn/gFulrpAc2ZOHc7c4ulbaMQAAV1Kn6sd/cnp0N4kGwUCgDV4MbaPf7DmxtcopTL2Sp/AfrHEbzoLcj70dFTZvxyt7u/nAxUqZh6kin+wfH9x9tiAco5kRzs54J3Qtrrt/hKNr+Dz96+I9/70/oIflAEic+/uj2pUuXbly7ySzE4lspVenYQRk9h7ru1kde3MzJRabIK69emz2aubRw8d3X32ruNO/cv/feez++d//hRK06GeWM9+EN5IoWmqK/uX/AqqYrx47Ego9xsqyjocJKbFmAme2KJB3qqfAhCpcxS8kzlvmLzsM/MAPRQTtU734i9BqS3yhwQuZgS7MceWFnoW/AloY/s3y7hMiflq7s3MsLNIXiMl3ZTXsQT56fyKCDqbG78sypPmAJCFrYlkiLkC7IK/jZhMRfdpihO8M0TzezXwFKNAgMHXxVQKQrQDmoUXZFg3gi49cCmLNfU6v40Ff8HGAXuhTYK4SQIFaB137SSYJLiM4Kuu1Bw/RGY9A8dRCNsx7jM10Awkzx4GzLgcK9Ax70vEMcjl5oD/6cIwuFvM06xFfSaThUCKX+I4kJVM4m4aik12UvSj0lGhagYAxG5UDLHuBfeNB6qoChtndcOfWA7CY7VppOEKqMh8gGbAuo9SAUN4sFusYKR6bVlXWy78z0XFVy8WSl1Cd++e4nd1aerYXqS4m6A9FyNQwXJOtFCJXjSUOqW2+wAD4NLHBEUOnnexovTWvuZjYdbIPO/Wmt8JsBgHGF3w11gjaAOP5O0e3+1FIn0STxZfp3xSE361KRSoDPIo7bmnL+xhSHZOlYhINoPEYN6MGsh+hKj0HVw/yDgFGrxUbS6Edj1GJMdAPKQQjhUFAtSh830W7t9STz6eyNC1nGax8J8IxYUUFSZ0Py2skauP6d795tsxEMFY+Gyyfj1SOZq3oierjeDVS+HK85+QNqwwQGzhu9ZfBDwSd5PZwa4zIOpEnQI5CT7S3sf5TfmDfrapkwNdgkBvvAzaZEnDhTFijsYWGDk18jn69MTvJRPpDq7WxsenHxZLhy5cZVaYY+frC/u9f69vfufuv7dyYmq6/cuPjOa9ejXuT6s72NzQgdtUTkNrltR85IWgsXzmfmrszNrzQ7+42Fxc3dgx+/99HRcGF9q90/6RZqkxcuLUuFwHIwMdG4vHxR5V2+J6Rh5SpU9Pjggw+frDzrS/HESq/2CmU+teY4O1DkfQAlih7xRTZqIhevB5OPTY3jFjp/C8INDkoyzfB1ckcsXYr4AbGgyZYi3CEr0NdxfTkKzhKeI3lBf3Yw8ktoZ1+PD3Hu+HTdCv3EMf/gRz/kR+oIhPPY+ZnjQG/hO97o4qUrkvjVpItNfkMJYgPP0ohAZkDOySJbgx1Ai+cAqKHezZ0po4udZFs0TVCtwlm7ddg6kJazsHzhKi3c5mDHeeJI44ojrnhG62Bxft5ZYjJIdKhP2XDr5nUwEApg2Hps3ALi3uSDBdJBMMrVA5n+yWVHXf4aBAWR3QsXFifnC0ISkN5u52xsEpEubm62JRoLgxHA5uaWG99WlHWoU64zpOXQufsPCKYHZ8PirU83tgcPH3/w9/7eBzyslhZKVy9dxiBakIkJ2blmJqYatVrVEKV+Z/kgNsCAMBU/mNduLly/fO1Tb35a/pFvfPPbD589ubiw9GR9rVitAGHbFW7rvUGhQGNBwD1vNbtWDKGyvBYzwthxXilKJ7aO1MBlhvWMr0Z4Xz3Xi2AWnVigBXogTJtFQ+h8B/YfxRYMBDLzuyFXGZiV1CVhq1FrOE/PhQYtvdIVByxhk+df4tgFU2M0xqFBFKFJ4rZdl+gwHC47/aCN4fEV+pbUQeCj6C5dvrt0Gx2nFm5nN0NaSm0CM2oTW0vRnzV9MaQwxEar1Evc9EV7KivngMu+P6GDON/ppdFV/AkFhFQXAmVo8YKwZZdh+OcO8SbwSnoKtsneCu/Gs+muYxaiWAwtLt16uwYMTqlAXOQHszhWxra5vFHgrj/d1BstjmMQlN+Q0qZqa1chKb9i9PATAhyCgrojY2ZS42vsNfwugkZ4HZgyzRB/HPQwREPMYUkKpUOshgN/+fJVe8EXZ2ysrWceogm38yQMn3Uzl9vi3ta2rEtepJ/dnZ1QnB1DCmcinDjL2mAtg8K82P+MJJi18aTZ/+TDS70l+9tMISzfMzYq5pvoHGSTgY3GrgCeaBKmC3wPgGKxt7AWTf/e5UHGBodKanY6TFsIJ7IBsG/HBgTGf7mB8WZLjTKhZbG55BeaI05OCYQiisMSpZ0PnG3HSI2a5EanpuriUc5UrR05lm5TFEq1XMeB4pThWKnVK8XB/nZ7bzdSlve4BRbyZ2Ol49HC4fA43fvQePnK9VuY6kZ9wtnBvW6sb6oh2Go1Q5qK4DbAZjUiFidBTfggGV5AYmi0QnXMJz9oSHA6cC/mMXPiGUHPyF78iY1XLIBcQxG+yPJXnxqvjuVLDeSzWJwYktb6eOSrX2/8nf/2t5v7/Z1Wc2T14MM7KzDa4lR5evba9lZ7LD+s2LGkpAxrFEFbB3uPn21UJi9fv/Xut97/9u2PNxh57q3sXbtx/eq1N2sI+AxvjMsLcpUvLEAuqtfeuXPnwx9/8Gz12V6b/lGClREu541cWfIP564yVrBHajCGTRB6cBZDVw18YnNxRSXZKcpWN6Rhm0WhhALZxQyo8CK4Dntl2dH7YO7Q8aMztJCrPbkHfNmO0A5CWLYTP+vERKh/cIIJRuOtvDZ/+N77GiNgN2+9Bsn2Dg4Em2tD3cdkuTQxJQErJobcGh6O4ZaNndFpHHccCm2lyynG0nF6PFKvktWlWHb0qPAifOV07PGjJ3/0B9+SAaTT7Pc7x3Ozi9PT8x9/8DGGcqAMR/AjwW9IiZRfFnpbVQA+GKyzke2N3Qtzy0OnY+xsVkYWCYYbnuOmDvOgRpIOkTZ2tnaNXAiHpHqdlV6r08F6DucK5Dqugfwa2q2hza3O+vrB1M1F9JrlGeg4y+1OzzHPi/gdF58QkmO7c07nN16ea3W2TkQxnQx9dFdS7I8DItHfEXhgSA2tGaHgFxYuXrrAZnz63vGrr96ampzcXW1u12p8Z64uXRm9On7rxqsPnjz+4+98SwIuDh6hOSgWmwJOkupVFLNl5xYIjOVccuSzw+6YoxRg2nEOD9kwdhYhOgQCGCDVfsS9gZDT465l0Iy+cWpyyuPmoj1Isde8H4N9jDt0yDym84QBOC+QiyvO/wsE5DuoAh2+BK5OV6CcdIXFNYKx0CdvpXgOYcIdj/g9+x6cptMYWCloiUuDdEEhQWNcz++9+Emb5y/QTbw0/tIsuoWYk3LM9xhXGqdfPWEPADauVttYoCDXeg+89vxxreOHQMWgOxllYjzZpU02O6uckStPx8uypW/r2pAAAQAASURBVHASh9grsJFgPNmuYuSWlNI19LBeh/uzIKXiGf6FhgogJv7RmeWfFqx9DNkgIV/5cJP4mL3Rrzphi2dDCYLGr+u8wNTWl/U9TPDnTIPOajgyxTCcLx5vkVDKSDAd+JFAbv5PTzIyIteDSERRGrIyIwBXL19lHHYat3c2L9Dk1CIHxCcjo2qegpvm9r4IQDZ8Xs4VJrO2ZBbUBIUuXVCsdww4m+fLT/eAQlrzbOXiM63w8y/+Y3a6iZYJfnyaKQDI5utLprazflljQBzuJOlBbkKsNJA5jo9nkmdkS0uOYYH3g+WIXQn3cr2JPoJfkHMzhW6M1rD1xuFWs8BshhDsiPs0RwHb1g+XMDp0XOdrfHaEPYyaOqxXvXCqTeQYLoz5Mb8XSkPjXY+Q9rBC4/I9iLvifECbiMTXKwWBo7gUDKm6t8aAb8OYd7pNcGgFPQkC/T+YIpsXqiEDsjDq0oYDF9gZHi7QPQXqhVZ5+tlZmk4bH+ZrBxuNGzs5HN1vd/d7wwtDjbml5Up1ZrggYhrvkyfMV2qLY7mJphLSuZne8cCifvfHj3ZXtn/28xcvTE5L782ofyg7+9FpWSKESp129aDbn5he4ir87W99Y7xU/cy7X3z17bfnlpdnFpc6tLW5vNymf/iHf8xbQSExqlFwJd+gMi67B9vSKhEL2ajUkQrzm7qFXDGCGQO/wUo4ChbBoQBIsY48ASIQPkiKiy3KOlsSvEUo1+j+NYqtkQPJnoZDv0wEKDvFNnsVoAaDxE6bCc0i9wJK7YYDETIQ2W50zOeTBw8dNFv/pa/8nC88tuGB8Kyh8ZPla2gEDyfnS6vVBn4Go2VCQIEWvdGOWHDbx+yKx19aWJiYmoLDHDV7Vy3xGRna2d37+MOPP/jxh2YRVthul83/8vKFP2GLGh8R2Us4gVQNWPkpKTBo3nbP99S6RJlb+x01w7y6oFxWcGXnhZJiLXJABxjb9DGVCGo1X+Af0gYqdfnKtfX1VUXTFhYWZQPUmxQk7c7h/u7gx+/fee3asiwnAuE67bZswJ32oNvqng3Jc1hu1IsH7SBQFLyzixdGSyfHzU169XC3HMnt7PVSPaUhXui7nc7j1c7oR48bDXWwawrlPHuoMuoyN3zuuBvPNq0w7MFFa7Ix8xd//V+dnf/m45XVZ+ura5ub/aO+xbWPVsNo2RmReVo9mylxhJvOFOwUBy4JGObuuwYEKZl1KSGsNlSCQRxjmOSOX21MMFs1GsGaJQmaHzUr5+HIsQSegTxyQ1IJwuj05Lp6Tq7icL24fHfBz/GyRDMCFxSLzplt1o1foULESMxaQkcBcKGLT/KcbgJeA1sB4z9NrtwPgcrrEk4MqpiRoIzIJPOduerWfZf2YeNJ5AqEAdz4I61F9iI4NhkLEu8aE/Zykmmk0gquWxepfRoPB76M+vorLr/Ep3OD8KUxhyrHkELMM42YiClnfcSNJHT5AtB9xJsSw4gGeAaugmfMnCwbj0fP8QqQ67vjpSunHGtJT26E8DdN/Pg5NjoMR4w/RC/ji9ecnQsgjOmrvR2qUiPCFpzLpKKmXpAraVIJc8nn4vxIuZ3e7OQ83N2V6a7bh1Sw7XLStJpt3OLmetsAJLfd3dwCXh9/+GHE2Sb/Odiat3FD/dBCcRPxgHtiHWL62axffqI8bv7zl0lBTu57EM2GRDwCWnBhbtqybJHdtBPaMFdwrEd0tYHKLQjDOADjW+XQvv/++xxtsTlQmWeVoE08Q2jCrQkDkvTe3X4XigfvMCAdt37x9Uwu+uSKFFpTQII5Qd+DnwFpgR9TcsLzqek66Ihg8VM5lprylR628MJj/FM6XV0f8b0To1pv5Hf2olLn4KxwpEyGdGpUTxjHQXuv1QuNU75JH8xzy4zEkVy5fGF/P+0yEH2+RlYyQAjdJIHE7KN4R09dYomuufcfhptiOBaADuxJYL2AHcxVaNvUAkzqYwphEQrl06FqCyc9WuJGQ3F1wCOxKxH7wkTr7FmzV56YKebPLixOXL3Yerb2oL1/sDg9MS4lroRIysjbykIRQVIYUKmTxYWbs5N35y9eeOdnPiv+uN8/lb/n6QYrTxT4ADMnh0EMSPghu44Vn6zfEawJlZweMwFEfgL6TGKs1BVEFntEbMYwyq5TQGyiklbfsbfTwVKIuSYwya6RywvLQt6gaRgf9EVyOglOnidGQz3GEW6xSccVpALNLujCkoB858WWAX7kird+2LRGc2yu/LDdBDw8k7VUNlf1QkClwSF6fnRYo/kNitV2Fqwzkmk7uJ9h8ghGVhOZ5aHDqeTihQuvuG7cFBbWlaZr+JDD4dBIWR3ee/fvqyZFfJqdCR2y8C4mt4uXFiYaxZ0tLQMAdAjP8TLwokqpLmN6QnvYspGdrTCX8Dk32cAeomED58CXWJUzCyybzKyMTUenuISjvHRQ3HYa0GNR3NmkwL1jvnZD/N4POrc/vLf1hXeuXpiMGs6nQyrV1cpHwhboYZcWLi7NX9rceszqOVorE6yJbcWJacdEpk2+Fjm1Y0Zo18nJYM7JcpqGOhu9tfXeVGVo5cEztOf6lauIll2jz7x569b1V2/S4VyTDOLma5/+zOf+zm//XTN4trGq0DCEzJqAwCMz8AbjOqYNBToRiIssQKcvqEksDj4iaeboShjquDV6t4ERupJvQ3C09N/jIIaimBKYEZcoVpWzIweCgttVkcTrcJE/Ta5evsMXl16CBeDDJokwD8V6PUQQyDfSY+Tw/gKuiYS4FiyVdNFQVbwmtHnPMRRQDnYmXbCPZ6nrAvvEqQy06Iq7iWI5znErarrIO5ehy7Cvxs9JPehHD1EaZBJAPOtKGaUsR6ADV6heQsILVKGwEKzFZRbdCpRlXfBr+D08bODW7L1ekRBa8DvxqhfkMPXtFXGi/HN2SHEjmNCjM3ZOfHL+/Ji2C804HVh3r5RWQiDb8XglZCraasNJWS2C94w5CNiUaUIAlslJiJ3Z2MO9nmAXGUZOaIEGh3gEVrfIVdJtM+eRqFTeCRFDkgrHiCqgLN2x4AV5bqPgi/ERkznYbKxsMGhubW1z8wP65g/9VeuVe5+8j7DyDpdi2cmBaxR6kxtHyG230zLE5u7O8sUl0Ydy4tFohQQavAQsatkQ8lhm2+BFP31lC+hO0GkzTBrjMNNI/KYIb3FccIj9xwcHa2EjQkK1hErn9cpKSxEihfhLrgBsKhVrtc2B9bhKTuc9DGygGzBNinGQ7EIQfi9xftn4wsbbk6PSfgE24JepdAi9w6f0d+Q8v0RNHb7SZoLm84uWc104R62M+1DV3JtDNpV35lDqrlDn0AINq52RAt5UJxk9G0MqcoMjZd+HuyenEkV0BufdkxM+ZmJjK+WKI7q5uW8LnItwHghRO6Rt254YlJQXMaAMy4R9UW08x0e8WhhDLUMD5hkAm1xvQgILZ6M8NpRTtf1Xz2F0vDw/t1SZnC9VoPoJFerDcHE6znDMjoKZxQ5fv14+XtvdY4Ocnf3Upz/T33/26O7HS1PzTGMb2weq7lanxI6dr+/vtbvNsfze4vW3X3/zM882Wisba//s9/94t9c6YTkdG6pONMisDizTkOPLHa51PNiUV2h/ZXJGEELMkdBr2Owfgff5rNKkmXC4cSpmGYyOIr6OXjgvK3JdrgZgZIXP2ViOOgw50IhXcA0gN9txTJ5qg+Euo3BvodwelfZJkhC0CrQwXQAWy8KbQcR5+PIBwTAXHZ7fvvsxgG9MTS7NL8wtLqCFzjvSCBLAIUiLI50YX4vvjXia5GnWAVc05+4YpqQP3sIl4Utf+pI6W8aC3YFJPCLOyBdnQe6JV25cl6vDMP7r//r/hpoK8oQKFAsABqvPHhlhkL3ExWL8dBtsfSSQDLxhYNC6lyYCk7g94MuiEqErCJizLwN1SYImhJvLj2PCDapUK6EZqK+xcf5Bz86OxoRWw/QPn2xcvrjATMYYZIT0vCK5HA0ltg2p1/nkiJ+hpTg+ZWQKDkwcPaABUkK8odURfINTFPgxRKBQ+xA6R3KNCU0+uff0zv1VaR+oGxaWv335ypWlyxcuXbv7uS98Xvz1X/wLv/mP/7//xEzL1SLzZLPTPM+dlavBotAxML45iafhJ5kMWLDhuSNkY6OaDA9Ob0PJnDiOcTBtqBEiJWGUWm4OHzAeV+VbGRlnvocuHQ85BcSEySpiH0eGxh1lrgG+h2ETZzc8iH2FCGybRY/enOt8aNhtAssHJbcFypAUIQD0hlASMw7y6A41KPkA8onzCsBh0hOlR2WxBn0hZ1Bc6S5Obzichworvvt/yBbBnBuN/kNx5+zGIcd7Mjc4PoGt/R/TFzgo2UqAZoJIcXaIQ6TwxYxJQJUeDCTh7AT4en1uPFytRvj8FwU3wDJR+o7SJg5DXIGf+RrkQpoE9EZh8N6efnRohhyzw65YzWYN/ZeOY9CeKOWrND8ngwtTIjNOW8Kmun2TFiqjRoeM2Y53KHeiwLODNx50lmVFvwLohk7bzQMp4MQ6LM80FiqVicJoLRZulF2lTObF+fd6nmgdHW+3emvN9nqrs9Ppi/HqQBKtc1WF7DpnpMCDw6Pts+5HH3wckaE8lMjbytIgEIjD+dDm+kpYi0aHtkIjTHcsik/B+YK8JFAuvIPpgOzpBkknImRRQZ8AGpBJgMnu7BXkmH6EQD1fEyBhc8zEJqW40fBys+CwkIzaFjnCPrptd6IZRUDIQVYNr8YPIdA5H198caRRDzHinIYF7JU5hszNMVbbcJBWGs9XLyyR1SoVcVrPkymUynma80GPL1/SxwpXStKbVJaIhUzgdpnGDHGL/PsK7wWhFYV6GGGIrY2vffFnyiXfBdAqsTYxdppD6UfqFEeHQmqZt3rdkc0t7sp77cFJeXrh0JOF/LFjoGwkGnIyVjk95wxbLBn5WKlashSqtyxfuFCdmj5+ulKuzUg1p6pCJO45UbCB/SoUldx1KBnpAdk+iqWRAjQl/DfCaMTWRG3o4z59UTk3XgN9PVojWqLG9MLipWpttlydZlERIjlyHt1Gh3QpVjZ0myfCNO3ig63Nmxdm8kNjq9vUUEqeT6LVvASqtYmj49wjlWa3D9a2u+PFzqtr69eHqzdvvvbJ4wd7rYPV5sZ5bnhidnJw2N3dWrWnuFK2k25PQFYlN8G/mhySky8fdhAhRDkAtChATdwBoRpKLuzwSRwWxJFqC7cSCDrgEyTKAN6CUtiTSGIBECmjSu20DtVA0L1Wk/BEQhLEgD0/zVOa8fYX2NByeJHCUB7KNObMM8UPF4ZPc9ubmxuragHXL124xC/TmVdGI8ych4ehKmRrZEj23WSS0gJaaffajpcOHXCQ74wY7fXr1995550333wTgQHtyAtghuBEHcldDDEiq/zX52drGCN83qfeeeeb3/xG+KMOn2Mj5ucuvndKQziMo9I1gqB//v06dALUgYeDyKKQbV/xHlmXqpFdutc9yOer0o9EAF5ePHgw1nhK+gYH9/7DB5x3yLgWMJyhDjrvfOrTEFW1NHr39ni3tfoH3/7RV7/2lcmFQr+5cXbY5TDQqBb2dlbPR4pvvnH19/7wWzRf0xONu49WDrgBj490QprNMGswT0ofwrdGGoVn2KoLI8WC6MzhrRZGZJiIic5NjtdFvNx+tH1/ZXf8e+8LXPvhe7e/+KXP/9pf+LUvfOoLr1y9+X//b/72tRtXR4+Gnj5+0ihUmKPsIwyJlktEYStJpZEMPeXzDCyK1xFprxDA6EijVFfOwE1QZM17EW5q63Z5BQ9qEsKVjWtsqHDU2UUBQktOOW9kQ0ejxdFmv2kxQ+2Y8HKQCpf5YWOyC3jhGzK0DfdlrwFVUXgwYXPLqk1wwVFtM4FjYoL1o0/A4dJDsDqIgP88f1X6M9oAErwJUhOXu/AMJjqIVUC2Nz5viRKZdwa1njBudNKFtng1ohmiQDoMUARQ8/8AVxry0P5lWpbUf1IqUmsh7+Q+08wG6Yt1iUEkIcIWPx8siD8ZcApU2zk/1aAHhtkh03qjcNrdO+qPYObVY2l3jpEzAtPpyMm5TGPDx2PhEBXJ3cQmYxC5Gew3W8iGFeNb7jdkEB3BNI7ID4IlPDsKbe7ZQBXySN82fHzQ3a9BTfnhsanSeEF4iHw5ZyfdY4gjivUx0jnFZhbciDnLSViAAsaL+dq4Mmv42jCr2N60h0Mi3C2oBcn2xBSsm8FYJUcRkNlWW2zWltGHptYQ5xg304b40xWrnH7KvgCJEGSD5GNOok93oAt39OwKliqYgGB04nl7EybGYEPcgY3m52dtkHwHGetdlaKXl9XgkOX5Av97mTELo8JRB91wdaW3XFU2t1jkY01nRZ2OWYmNSz3H7sXpCCelcQeQMjwWFunimjlQo4lCvlxUZaXZHpyF8W/Al/1QdUrROdvrzX4Hm1I9Oc2rWJ+vTZ4wjxdY98q4l1Mk61xFdJ568tFEhkGnFAdEHU/ch3BBuYwJmIQwumA02FBdwXUbVIA3Yg3EeMAc9E4UnhXUWZ2cae+F4SS8YofH2j2a/a5Us61DJQaGX3/nVZByPlySGp6XOyiibyOVkEusDr0u7BP84ulZo1p589ZrD+7d3994kh8+fP21t9nS6eUt+177cHNn5/7K+sZOf78jF+LoB/cfzE0sF4rFz3/+i3/3d34bdXm2vSqIWkAqyz9ZgUmTo9YhDefQcWWiBBONDEsHGIKIVQ20nspkGPalK5dtt5NCqUIKsQP+lF8D/QAD/oSQOT1TFvkO0ozHszoBli5/Whn3Jd3RgERgs4AiYZpQYnltnR6AVFrIM9ZZwfUSGMncMdmYXFiYB7G2Xoe6EjwH42djCIBITIzXAXhRT+6biD/BuUxJKpFy6Pm5n/0aUw1aZS7mZSR41swFA0uHrTJITDltjS9WwOscqtD7jUY+UoEBXABEIxke0dJcHAGE0HHgmIhcma17/PikNfKwtGLkUMJrLjcFX4ZZwssAayA88s6xaJNKrfqjH79vSAagW6ay7//gBzdfuc6hga3x+LT+8MmDte394Sk+ydQqYjaga5H43tyfnWnMzZRGW0P9zoEC08xv1DmiktvyccQScdECQlpiGkNLRfFMBhCFhdwGBA/nt/cRjGK41NSnUBs2mF7/8OmzDQHFv/u7/+T99z74l379z1u9n/viV3/nH/1DVPlk7vjhnQd2+Mn9x6JlbLoyY5j0fCkKiFpLy24RiBxSabO08b6lnFYtLrxJ8R+Gz5fU5OHM8+F2twV9WhDBitjo8NUntvgVdzYayVRBmv5eWFnicMVl78MGkFCMXSfNBTAlXGNAFjG7fHdl+I6aPYaXoftE8FJP0ZULskvkKLrJuvIncGYyTl0l/Bi67pheOt1hUKUrD0cjLw/1HoEEV4n8GEGI2QhQWGGjuQ0IhJpIlV2yF8aFmunbTWfCZqEpfgmnyXDA9DMKquv4F02Dx9FVeiA8lAIp49JjYDhxfmSl3FEVy4zwQL6heMiV63R9qqJHFQnZjYNiSDXB1C+6fKxqzBgXmj/IlBYebsYyIvfhLH16jM8r5cZ6rRbZZaIwPDHUqY2fFCSDlQGpt99hD7QthhTpHyU3k7QiBeJIPXk2yIPSIj0VjaSjBVZjDSk/ONk1qhOiqeiyuiddv0DS5+N09AWxVEHGCBTqe1KOhaLPFNR/6pspvSLpCmGi7IcILXw6PyQnX4Ke2jMHyxeN8WaJjscSuzQKU5upxXkNTOQCBt7uV1PwhO/Z5Y1pu2PUrlC/DQ2RyD/7uc/RXNNdOCoiVCYbNcLZ2sqqP2GuMwrRsVEea6AIT4TlBMr2bHpiklmOs74NNIFY5hxNmhFZEN5cwDeskdhYKlGsFcTlFUZltfSA6+81D6yjMoze7iw1xw6bkqTi7aVwLLJcz/bNPSdEiWU5J4doVLQV2Vjg273DgOO4V6lpitVWXi6l8cN+F1pK5DLA0iKEUAU4Qw+K40sOHwB4NB9VkronDOMo2Fhert2h1jEtEF/B0AfI8jBerV9/5Q0JBsmjT+58AjNPTMzh9LmkW3xWyYhQ63SCM7CRcRqGd/d2EPuLywvnZ515CYLPj5rNfav98Jn0ijtP1/dS0ZJxHPWDR/e/8ObnSrkilP36K699973v1UZLR6SgbvvSlYvWTDqfYqVE0JBIqVopUQKAU+nzzIjNCadmX7wLXWG3iGVOlzgWW+Or+fKWjINDKk+2AOM0WnhcYz0gDO4jG4EZU5HGvb0dq4oP8CxAhUmkIda/B9EJZMNT6IrOPSvpm3w8Vy9f8XbwDOosimauWq6hB09pGU72od3xMy6Qp26k8PeTMZg4Un316lUYxDgNTLfZgI0ZGvB42MsSIfTdZZE1MEhv0QlK4lc9gM+W/HuIRkJ6mnGX8AqjFdmYjGzYCYU9I2xImW0uL8DY65yaUOcnlOcpEqrvVtXn8uKSZ7V1/mmmRJUhV44qxyhuI3t75z/+8OPJz7+mzjGlkWSzgu/p93gPTk8tX760dPRwZ3OPt5a58y8vSgIf8fSidVnTiYVAMXAxsAynBOufTdzZAuykYYk6s5FI+mfMTnhy/D5uTI7//u///ieffPLX/+f//uc/98X79+8Lyka62MmxeoieHcb6skmyJ+tTjD+c5H1EAoaOfquXPx+XnMLxktFxMACcuJyShE7wDM9nflOck8fHe0CGfp6jDJ0tkLDwzg4GMAYjdj0TRLJxWzhj9emy6DbGfaJoTDJNzC7aUWjAHd98ZO2zX8GHp7I7Pl/+6nuGImOlgigkjjNEpXguKI7F9JneQk2o62QoiXOYtYnntQt0wGYdVrB4ixtOayjuwlYQDDsSFyoAX2NsZhH6QWxC6GtRU0pGu5vNJU0qWv3kygafvfIFKdUxcuX5k/ywYu1ejwaEzp0/Dd2iwoXnMhtLa18UIz5ZqTawYJPFGi0QXXwxV2JTsZfM57iDre1dOl+RIlG3QSB3ITc4GKuMnk6wHKh4SjM5gv89yp2y9gePYehMYfAYfyjSI71suASZV2mcapNXEodm5M+IQlwZG97a3uRbbOMdqUq1jBJYNGM/kG5ha9s8cYqOcoyHcB3McnilRTSTtOL9w2azNZDNj4o1wAOfEzxDbGe6dJVdP1kv30IIs9jxYaf06XVOtYPnO/hxqkGjO46obuJPu5sLFwmrTW0owf/rr75K5w9/wQUy+njWyCUpBaBq7WCK6/kGXOPxeOpcDL8se0mxHPkRTgN1yoiIPRQPFLwMHwuIKlxQMObS+UX6TgljjENR856TbN9o4s67CsHxNqJwPzpt1GdyoxPs92ubHb4vyPIR1U6lJuJNpSsEDnuI0thHRDlcKFX5ECkzgkQFDMnwJijE+oR6NMCXgIxEBq/kOMkUHYw0z4VwKB2NHNvdln6mpB7MGWaR03mNPtZgD8/L57mrt94olhtYoe//8IfIYr40U6Rulheu24fiLSwlc6A8llR67tHTyXqFG/0br91YvnpZ2vZWcxu/3OoesV8+frxy0DnrHLLeDol84YqsIHj3cMCnnxXuZ97+1ON796h81/bWhWMi2yPE/pwKjeIxSKQn3VZbxuJOn9VEdu1Eg0l8EjLU5P+OdPiOmF2OK9LthH+NvebZle0+KsU/G7Hxe9xPl8dd4AR5Y36D36E5f+rfFoMB/ZD23QEh+vE42AAMGgAPXSEGUr8jbNqjJdlTWvruDdZfGzf1kyiNqoYdgGfACMznP/958WTgVPseT9R0ZU95MLV3oMJSHP5bCen51Ep7zTTQObOBm5HArNF4+vSxw44vgWv8I1SJGmR8NXhPQzkexOPyLxDjGJ7E5aJVHSalRzBy4kod4BT16AR5BXsVKvjJvbtWwEkGMdtChpVhXFvtd7frlaHvfu+HX/2ChACj9ji8Eolvw8Ot1l6pOn3t0vLDJzv42JOCemxWaBC57jlSHnI9zYlyCdSYwjf5OECeqLc2KmvRFpiaaerKyH2iJVY+fGSHkQrTglR20KS/8R/8R//xf/If//lf/fXHj/6Llcdr8KPqWfPzk+XwPaYZkNBK7RDxvVRuCGpMX32dQXtw0N8baTSYdY66R62mZJsnSpJZVjgmgyInKTc6oLbz6gDsMb6aFcyl3VdcGCxJTmbFnktXxmqPDTSwfGKK/el7nLNESOLT/10ZitLo5XbClilztDueSr/85CPdyUSnn/zqZvDtmPZ4LxoRFCo9CRcFl5pGEa1MONbP2/Wc/mnBtzk1j4egwLTOqZFOQ54Pwhgkh9wOksjLSAY9Y2xHyHxkDMGKADKMuWiz9wUlzAA0Rh6OFZ5IdJHu/LTfC6uhMRE9GRvDZzqHZmAAZHbjG3smMqAunyTt/riCnISqtCQkiKCsUXsM5I0Pj59L8aLekNPX3dlc7+w1SzS0wlNGTxVOyg+dlOX7lqKQGkvw1ulRXcnpyfJkoXIwlFtVpWevdUY+j9AQVpURHtGMYyRokgQeanJCytMFg797/z5Yl38a3odZ/uj3/wBqsxiQDlCQ/wKisRCVGivj8NT0rGxH3IcYxsAwWzq6kxY6hNMAauOPlUFBLUiSsX5qb22hd6EFuBhEBVJw5DKs6j6oGD0ewylDQ5bdHdvtPFtInD73QIHM7mtgPCA4gFs+yVTqjbrm5s2b6g7UphuKDEF8FgOc7Q32jaQxMbV1uF1mK5mYzGPTzB+CCFUndxj7St1NyBqSrpdLFOWooryN2mI5CngfdVrtcEHr0wnuba1vPb7/RCocuzpenDobKR6dj5NoD1tk0nB1YcIdzknJIebD3MHqKcdfFB668dntdOF/kgjmlzorFgbrGmgWo04xGToAkJiEAOdmXMO9/cHW6rOH94++9Pm3d0+60sLLvyUgh5pPooS5+eXZ+Ut7B71P7t9d39ifm10aL5QRJrJjqRTFOBTVxJCE8jtM1pFXrXU4uLA05wzIW7p8cXGn3bt9+2P157a3drf3z6ihikyjVMi5Eo9Hk1nZ2BibGwO6U9WJL77z2R/8+AfdsRK1zO72TmmmLpjioNti17TtOxt7SwuLdBr2EY2xTU4KKAK+NstlOP6078AsfhU1JvP6YccywIM2xW66PKuBRXFTA1QK0ve4n2B8ygZnJDvwIMflcT3rVodwt8aeffToEcRORJ5s1I3HwQf5JCeAwXvbK9hdzseZCJGPwGle5XEtadj0//a773CpEFWmK0xe4O5kcDUGsKc9NJGgPAQspyWUNsA9XfrxOilj3GFo5BNA5TU5PdWYmNAb4HcE/eSwOIpEXwRVn86+B0GNBfTForEnIFcGg0lyeanj5WT5D07LclGBFIYK1OCoOIGVh5mAJy4qUAcFjqgTXnjvf7S2srHz2tU5+hOW3ThTo6NiYfu91q1Xrn3w8cO95h5tOUdjSIarPP9BvOfBvmKSjkQR1xZnJLRKgZrQJFZ8WJDfvwVncnaEjd2niHFpi+0L7sE4JYVgaNrf7/3n/+l//r/4X/6Hv/Wv/KXf+Qf/8PHjh2++9narfbAws7S4pLaq6pCP8NmAE9ZDOhyGSqFid9Ux3t/YZ7HD7RVHim0JWA8GAqXpZayYt0hkkROHNix7M9LYdXJKpS50ZKHKhRp4kPrAQgXD6z/W3afv5pDd0c4XHWV/mmFGczSITUxPZY2fs9jpjk6yfrKnNPCnz5CrAhwS/gtM6Io32qigWwmthw8XWHEKo2n802UwH+GaHDTI3mpsGLjFGHR0xkEuFJqUewZLKafIXKI7pgS5eCJ0fuGsEYrQoOTZ2NMA0iDSxKMjAwoaFR17v8ahOMI/hBKK40y8hWDP6DXwn9N9fiq9keHdw6NdSYP5BczMNKZnK8XKdGWCRdxZPAuvqSQpJd0nXa0IaGaL0yEV+AjoXNIPmcQH0khGwgM+ebCsSkbsoPFojgzg/yXuXQ3osVeosH8rwH7n6bNmq4+dzeULApDQGL5IjDbCSZcvXq7XJ1rtIxiBlMBpKEpcDIfuxTrb0Jn5BTW27z98aB0mVfrpdqdnZ2qV6sMH9wCJm/3jU/QMk2LutgORczPb5WxlYtFtTHbF+oMrGxI6WQgIZnFBSRk4WWqPO6tu+vRQdt/BIMQhbGwwH330kSERmWkynVikSIaC5t6eQ6sBcuV+s90kaXkWkUASvd5oIQ7Bh95Nh0pCgrMCuJHww1PBFszcnKxr3B6U+Ds5LIyezU3X+TxIHYLxK4RhiyGRq+DIazff+PDje7sHR8JEm72RzunQbvd8n3rs9FxeuEJVjm14eRfJL/BjU4pK8lm2qbjGD05aW1sbu1vbFIxQpyUKEAMqSesOeBG7KFCSGG0b2+seb+/2n63tH/U7c/M7WNJGtXTc423cZEWbnRpvnI9uIjiD0/c+uMuVeVSFkNokCMCrwni7u2s7m1vEEYYBK2pVR4t0ZW2n0VIrFvzeBx+tbqw+frKGGnA4zlVGCtUGv47TdpQMAd9Q+e17d4Upv33r1cLw6LWli88ePAie7WBrV0oVUb3lcS5xdp6XKYQmsQXMZbVtrm21i2AGACAqhFob5HvMOrIqh+uEi2bVI4ANAUNFbHpGumwfPimJUznUC6ECBZrBcT59108GKoUC5UHkQfYWnu0WmlxlAHBZ0BupjaXkSao5bZCrQiotbSSQWpZUxVC1MQz8kCyCX/3qV99+++0MAjWzbnqj1PQJ5LzX2F58+j1wTna56fIWj5iLcWLyp2ca/jQ2U6B/BtbJ/ZSgbJ+HjSdAFyIOIzc/x2PaZDouQMt9TgBJaXqKIVlXGTn3BTrMxqZbHqfWkNZBgjSEitOrAcikxdSgSMegvdEdDH1y59G1i7MSr2FTSxXGOZFJ/cPewfzc5SsXFz78ZD1fqhdPxyWotfLlSoMwSgtrHYNEJbwaqng6iMSrZ4TKUM2j0xHVixuP1QBX0Av5vSsYOVL2CaYWQD1+7+7G/+5/+3/4D/+Dv/GLv/hL3/rWt5g8D8NwqKjOJKUIZTWPpwrczhzAvY69Sl24Q9mlexhEeNjOWW1IO7IdDI3xF9ve2/W6yemRwqiCldC3h1SvtYdUuAVU0xLZKXOxNT8hV55x2SeffgAl9imZv2I/42ZS41FXOxtBSdIVzySeO6L1U6P4TJffs133e9Y6Oo9eEJI4PBokAuHPaIKboUXlQar7MFoFIYt/wdukgXrai2OA6YvHQ+QONJjeFBbEoDTYXreMJiOqRuuKxlFDBccHQILlSf/wqZSE8bMhG1HWEtJJ41M9QE1spcmUpIvjJO8fcqLRYUcZFifqRACV6nSnhZGQXI+lJx3bl3kzIn3HOAhKkWWtjQ6Zaw66NlFyhRFxKCPDk8OnHPzUPJiQJOywP3LSy530R0/UKThUdkDVRSqLPg89iYyjhIban6O5qgJ5o+ObTRUESGnWLnwaiM95KTgnpyYWwyVsrKggidys9JE4Qr5kYidjTZLgEjkjpufWNzftLEbV2QM62cmPrRkdI3SAaRpWf8BB1tOa+seKaItiV62jNydBE01DrnCaVLTIlcNmeUGZK1Y8rfnLm76AqIBUOroQokYdS1FWIvMBpbcDR6nGtufmxPl78907dzZWVta2N7vHfakCzNN7ueMzFPkilzSuEhRgK09y3OrQMIOLKHHur/lI6CLWRS48pr+DYv5YsXNLyEFivHCsjpGDS1W0MDl7cf7Cj77/UaM+1z1q35O9qCvgvnaWb6gIG47n5RqsHGDDw8DupuhDNn+uDqoWWQMxPZaAMpAQYJ3TlANAk+HqBOk1U6g+js+IYM/e3lZzS5TmIHw3PvxkdWl+4nSR/MdRtc5fvSFGday0utVUAiNfmhgdK87OLovdhI/YMWX75q0DB9oXUCu/rjHhJ/DyK1tbZIXG1PSjP/qjBw+fqiutTXB9zGFRqu2MNIRpzQtLK49t7W8XR8dvXbl6rqzR0PhrV272bvcquRJc0mFcHfRl3JEKw/SFzeKpMETsmi575KhZBMSGA6G2iIop27isCI7v7kv4ogGgCkQso6vpDsLB3a7pIbI8qQBQqSB7iBAZAjPjT7TNr2DDrLDziIEGkJQH8V4Ij56RSK/AzbF5oAeGtL27hznNiA24DQk78UWEnamZucuXLwsk+qVf/EWQpjej9Qp9moLvQfMDRQRcZZ9+zS53Mkg3GHeMwZeExyNJ7vDwsnESCg1bM1yIA+7t2uhnZ39P/4jk7sZWNmXYKTs6nuq2O2O1Kt2LiDYTAbQe0b8rTBkwntznYuhOTjkTkXezgdEFc/vEPIPIfElavyd/9s98WcIw3jY44ijFeXastuJhr7m4MM1TK0JLjgdTE/WdFpVn9/U33ja01ccrPJgiyxosRz0VMoN3QmkyhyX+HK+fmEs00vYRyyz76uoa+Q1PKvc2xw2bVa+Nf/TBw//y//hf/tW/+ld/9ktfffT44cyVKTlqAQFTMfcl/wSTjlAlpbT6faoL0VTusYn0BwoFWCtcBV7TupIL23zQAq8Ms9xGqaVKETeRQQIW15AycmUw1jNkUq3TA7Fttif7UyPP+DMapDYos4ezPbCNWTOf2rj//M/nnQVay+4HOKTvPuNuonlZ4/RX3NMmXuOKcIQIxEvAhLMIsgUcGCORQ+efxoccCwACBuIKD+l4zjDCLhekyp8vx+YRq5btulECdQTREwCCiYYfhhprGmdsR5hrEhWl9aMMgPIso8HoUVM+EJFLZCSEFUn7GSgaYyMT50NzRP5CsTQ5Lfw1Qqi8JhLKqOSTaDyVEIbOmayF1MIuIH4AWxSpBQ8Z0wCsgjhe6NyFq0+sRIidNvJASN9gPFzre7nT7hCT1djJaH55ecmu4EctDrkOFQNbTrtDgqMMmzyWqlzONBKSxAiu4vCqR3k20QEFdXCP9gySBRXOPBD0iCVyhvnGZ6vxcgGzxfT5p64MMHxmLaFQWCO7dOVL9JMuXzybffriEUTFSoo4wUahb2Ji3PdIp3PWzAdzqlYCt4t70pR2W1MLc/JsamDZGZ/QQiLsxQvLdpbuxDH1NpwyNWxEZ52coVVFxIHXs00cdA+7O9MLJUUTFPOiuKXD6yrPs7a+u7U7MzUn61K/J/Wn4PrS2VqfmNaJ4ODhXKXOxEsFByMkhBiWBpcTwWreizjX8LvFX6tG2ahNWPBM/kOQQaJ5BzRaAX8mpGLJxfZS47T2VBCksKo+erb7bH1zeWPm9VuS419TQ45bNvbi7qPbYrvlf/qFL3/5wsIFNjN1UmCuBw8f0cdJu0Dj5O1cfKmUIB66FIIONzC5U197893tVl/C7Y5IFwzHMauk/aWXAZZOxikUAkdJqWvHxZjNVqrXl6/+ybe/UcmX28PDO/1d4FmfmqDUo8iZm5njrNzmuZpYBCjVsTRTs7YXU4U8MoDkkCfMFcEGhyZOhZDhcYKUBplazyPG7D46ZA3Z6kMqKhQw/ng+C2v3feo8g5yMVgFpnXjQfVCB3QbdcD1Y0on2JG9cV/aIpU4EMizojsMbb7zxxS9+kSlIhKmXagPUDS/QWormAXsJPKM3Xbn8CTwTs5XdcFCeozttjNazsfLpcgcy8NXrAusk0dDJNXLvQrBlBgl9YzD2gaBcetO5XJ3V6clcsXQqYMPGJOSZfuJNMCKQ1mgtlw165623PrrzCUw0MTlJfyEUL1+s7G4MffTJg5299uIEggAAPCrVmz0Vxt1dmpuulsdawhK7B3NLS1LsW0BGAeEIB/vNgzjpIqaNVs5lXCiqOY6Joy0yNcffp/2yEtl3i4wzkQ2Vn4/UZeKrCFv4npnpxne+8/7U1O/89X/v3888rCZG+Xmt0WVIw2FASCGAw+AKHueMSVHj5B7sUSoIcE7asmO5DYLXZjWjObDsHWYvkfD53OLFJU/rJK1ZsC++IAqGoX3yzE+6ZotlR+10tn9gxX7gnS2xJTcBaN5j1td+CNPRPlvrRCDC40W/cIcXuB9Ln/RCPrFEiFRMI6UqsIUkHaOE62w3rJZxOvRa2ojP0AuXN3IlzsO7dKXn0PXhmvD5LgqaZC3E/dPCmC2ggdf59MhtIMpPk0g0GQZ4IZ9hOAE6OAuby+hZ5/p0MsTblwOFKMSYEa30ACBG8pQgb6xermCOUY9Q3h8Lxj/sk6f8zvKdHykxnNpw0mtDnlRF1jrNnqqpSpsjraNj3eASSvAysOMXZRlpaZmj6MxCw60aWr/Hl73oj1qpcHYoomT8nOIOVnEO6T3Oj7j0mcWQqpxDTQhyOD/CoFmWbaE7PV2JTcRujUu8xGLJ8H8wNXNLTdu7d+/LijA1RQUUQvB2c9swLRxuTsDjl774+QePHtarFOjDMl4szs8uLy12ui3cLakFSMV85SfvsXNWeXyXajWShAWxqqSZ2NnYxGDM/AcEuIngxDE2IwFtKQDTF+fNmgN96nWQA9045zbRUlMRxaE9IISi2VTWxZHjwC/gBLxBCqHSoQvN5bDh7/7Mp4v1qgX0oE7ggoAxlftA5uCQQCPF1DlmzA5HALxgpJPDdnd+YeGwsw8w9rdXuwdrn3/3C+PcO7r7fSVLjln+1YE9Yz94eOfeQXPQ7uYOekOX3/jU6ciWAnl9AVERmZqbnV5otw+8rlquYg+frq6xuk0YTKUITPkUBgjLfZzsEIadnR3wjDElUrhjoew+qucnDqKPnjzd35NiLi9iWIwXzmhLOtKzXfWrmr2Tn//FP7+zvc1Njucw56jf+sv/BnniYP/g8e07W7LxrG1aqAvLl3R7//5d+TcMrFKv5SullsphyTVmanpegfrvvv/Rk7UdkMktjg5KpFT4b0rNl8f+n3NKzk9Wekf9b33nm7/x9V+mG+Ci/YXPfPGbd380V68dCjs7aaklgfg7wI/bj3F18aIkRthBEQ/kFbtMrXewEl5wNoLARB9rBzWzia39lm0ycZcpcHDQw97eXkaxHjx44DtsTvTxaWXkHAr40TnhOu0+JOMOsNEPMBCzZe6ggg7PGx1KHgyKirlAuAZLyxdIYN12VzPInT8FMxXg8SubJiwGeMiFKJUvsr4HxwuKUoGehBRD9e8OTQKSzqtN89BWYY8TMwbX4ZJ8rTbqNFf0mKurqxIlTMpzRvGSDJVe5O283Lc3NvSMQsglzfsAYiLKkQsddFdOsQ/Jt2g/jsvh6RRMTbDNThZ21oApFk3/+KjFSUKDV1995b17t0Xh8vET7HHUbU7MzO63tx49W59tXCqWaoM2haE6O+LZ96h6JiYm33771b/933x/YhY7d37r1q2P70QhxhuizK5fV1GF8zJN46NHTyAjg4kDy2Sc0J0VluceW2ATraqMt3IQGjMgpzU3QiuEbuH4xMuZ9T/+3X96/crNd999WxYlLicCtu248nmhw4wUM9wToXd5BduFMfGdJwJCLaulcC4QwBpj+/T08N6ujjPiEgFRZ8cMb7vNvbnZebxIe6AGWN2CAA/oNADDGhlHRqIsXHb5M87Yy5wUKDhClXRldjZrHBsc9CkkE3dCgZbwUdaD724i5M+/hKCE1kSoL3mDoiJUGnHpwb/gxDmRJ62gJ5LDBLQUircXDleY5wQ+8G/yZPAIJWtIXjpCVex16p9o660nvjMiYm79aq3DDya8xQSs0sUkNSKKNzjQKerJLF4M9s4jPoQw6y1EK0tjDSjJCLL6C71kzEnufz61TweniJMkN4xOkgdzwhhFIlR3V9gNmMLymKt4tfGonjQx4a0GLECNgzTfQdUU6AUuTtdHFanjbX/cHTnq+6T6MVZfGYsHQ73BMIvSGNFqSFnZox695DnLfi8SsqXMD2ejnNeHj+9/9L36xJSymb3m6uhJ5/4nYTRq7u0MOnvWFj9Novz4o/c02NleJ9t02gc2aHXlsWMPfWK0OLtFZKZgMasp2UzGd0JypE7MGPDPTnWsU9rx+DM2Lu3jOdyEoujTr/AOoHcf+nDH5WBoGZzG2Tkcx79VFwAUjGEqPOho4QQtGvlP8VNtgNREfYJXA0xhI6Abial0Tu6R37M33JZQhzt7MVeoVgrjvN1iW05nqvVBpy2ObG9z5Uufe6dReVttIBECR2ddit0I0uCNpCi70O6uswdHUF8Nk2Y44Y2VJNIqcgXpqh0illMCwfFR3g3SBpmL8ZtUZN/Fz/JrFEpiMpzHRzug24CdOsKuEWYLlUCeZRI0DdNrcrykeYb/RdwBK9a3Ur120Gv92Z/5yl/9N/51mfwfrX7yjT/+1te/9iufevez4fzSaj9dWZV4e3d7d6e5f2Nyut0f7B4QNw5HDZXSZywnTBd0SoI7O7f09qc/+/GdB2KsyEOVxgSvr9hGi0wD4Twknw8fe83dofyROhaPnj291JiDZGGoq0edvdxhZ5cpSAIjofQjp6U8jz3LVZ8WCB9bSV53KriSgytLMTk1iSpbEBPPDAwUZeFo3ukjMLRh2rg00N5CyZlrry0RQqUrtM2yEs50YgzAA2zgY4JZYZYpFlFE7bOfsk6QKX/yktGbnyBH79VPWGqHh3Xr8/Nf/DJapRO9ASptIC47km2KBr5knz8Fy+nnFx9m6tIqPhMVS3+GccRMzSXrwdSyMSdJIG4aFXCwREaOYoUiHawmTzxP6S6MglG7vgu6yKDWBQ+tli7/VE8GgvXGwHcegg2TYgflm57Z2N1mw+OiJNnxUb+50xl67/1PvvTZ16QQw/BRFYW5ggaQGu6wd/Xi4uQE9x7egOJ2wuvS+ghihObo6kqV0uzsjDvb27vB0AT36fwGHxCW/5FwzqScNxgMLozscbobZvo0PCgxlFh2EVtItf///H/8N7ayVLiJV0lJciJQQc0CXJc40bfeeuu9H/xwf2uPt4XD7xUZEtCJ3cEcYFCGdncEkpdrVXdMujxeIpUqGnlA7434Tc1k2wKftDwGjWhkmdL32CELl5RRL6SlFHcVy+1Kks1LATbbRbcTraH9SHv7Ytez/8ZDwQyFLQruT2QAQoyt9FgwNUHjPBkKPyYQ0PR8SikNc5w1vYbHRHiFWiu0CgBACRJ+8YJ0J+kIiWkxakCCXeDFDE/q36vHx4ycIBckw8X3nB0k6PMp196hSGOs69BFGEUiThE7R89iGPGFkldkDHmvOHoqk0FejctIOitu83xmmUsF1x3OHSkyJ7wFCzJW8UxGmWm6xBnIXxWASCqNOAmVfmKJwpUH/eJOKEhvbOS02x6SyUs60iNK3i6PYkEIzufQyppCbnniHz3o+UjhBIInRLQrCs8MDTdPj3tn3A2RZZGwrdPOYOXu5m654qh0tyMpU3/vUXp1BAijKXx/Os2jTz58nwugmrN85FSLCJxyPrSxsi9LWArV6Ec4JLdvPIXY9/SJI0jLGJrhWKx0JfJhC4OuWVWQ4IIdsvMMst3UMFb8xQXMXP5iSKW51mcEfI3mJiemHXzP3rz+yo9//J6Qolq1rIAFTLSvPk2nNT+1UDgrslfI3kQbG/B5rsJpYfPZuly/ctUSHk76x+rY47/kNsolT07sfaNWvHJ5cdBfV+RWHFG1XpIpjDcGtWtXps/zwlGP7DsiMLU3fLrrdEizRZdvm4ZHwiN5MJjkajFSpjqj9CdRxiKRJ3NjkjmQ2CXlyBUUH6SHKzKAkgzhYheBRpbVCAILs0pOjrr+4PDOvQdY17AI0gQ4B5Yid/rmW7f+o//wr928frksR/LJ8Tvn43/2z/7Gg7uPiImtbu8pHj5o1U5rv9mXz3z4HNoKCiFHgBRW0LqMUNNTR1wtCvn5CxcbEzOvvs7WVu8MdvfkTHH0IqunnYQDA7fChQqPi7QdOjqRHuvOowcL787AdBj2xaPOnQ9/YFaFMz7Qp72TAdlwTHBneLsQqqp2PxUzC5FXVxgLeI0MoWdqPZpbtMeWgqjLy5efr0PscajgIEdkCYQgLR4HJG66g7zhmgkQJoVnMmD4C/BAkYAnNjopCb0C/kkoKA6uP7UhohkSJr0qCmBsnCpy+dKVX/u1X4M3AxRlu6BHOeSRb37IVXSVXd7ii88gSf9Tl8ez21mbbBhuWgrDM3J3jMFlnJF5KDxdI8uRETpojrnVwAGkxwOvIifiWEA85ErLCi9JkdPPtwGSX1FW2JCIjtQFnx28f9yAJOyZBFfXrl/ZOXCyk0uhetW16frkzre++/2/+q/9GiWBdtg3zgDhhAvO+p2rV5aXF/Ib+4fVcpHv4uzs4NnKlr0jyyL23Ny5wkzPTlOr0xOziiEAvLzQqmzidH0CQ1gs4VLbZ9bmFTgsXTEpm8JVEVQn14m/+Tf/5n/6v/lfgwRCv/2lr+YqBQA4RXmpPr0UQ+iS7AN7SlazfbRN1dHq/r5U9HuCvYiOZDKsiopHPJOuXb6C+FlGSaupxGiuuRUBFQ/+JKvFy73Ttc2wMQACFXL5KSaTEbP03ZNZMy2TBBVkRht/aO9LDDBdTjgCk1llIrk2NRLKFg18RNhPguoQWtKCkJwiz1/QqGhGH8YRi9oJQZKKVD9YYZGcASlJH0nIjsA0hBB/gVgowaOxITGqjuTOZCIBFuATn2GETBvvvBb1BRxjGjCOXobvjZ7i9wDkcBTSaAYgakEgkHVezpxRvhQpjXaABkmMxSki/VBCqXaCUNLY08zltBoV66kVPzFsk6mZJI0BDAhkscIxcRPjMXR0zB9o7PyEDkX9mqGzKr81gMNvPlMH33j7HVXkFBFgG+2djkp+cBT4dFTUcGdwGMZjeTzl1WJpldI2coRHram5Wv7CTM96eFGsngywOR5Eo97O2UZKcllbGjUJjybfeOsdflYSb+9vb0ChAvtkyeHmOnx6RD8qAlHBePFhJkwSSpwfMMgErpeQEtNxOWU+M4AB3GADKPsCfrI99elmBkUaW1bBUPSNuDfwzUnXUqdTEXkNlEVeWlqenZ2GV1QC1J09pvrjd9So1nUVw+Br0DvEomLHBu2emtjVQrVWKMPdNJnCF/jaqVyoRN762t3r12eZjTnEy/GNVrATwga1xmKxcryytc49WNbglXU1R1viaUero8Wa+CGglFxvnGPoKQyZFhXfWZZMh++2pVJXDJak3Hekg+pvbATnFnCAVgE5ErqBjgxaTYkcode8NBloFIET9zA89Nf/xt/4i7/x6/Ua30OFVOTX6EmXvtNscwa89+iRqi6PHj7e2tyWrwtsT87OUJ6CbFtP9oZ1pNujfu1u77DRX718/Y033y1WGpWJmV/+5T/3D37nHx60hfTygnbA4NbA0LFJcQpwHzxTD08LlYerj9+69Vp9eKhM50uKVdprNN8o1aS168kuD644auZy+IatHf4hHfNCaewsrCTYNvofGoKSEB7soCOD5wgxi3dScvPziFmj+n4ibwE2zyJRyBvU4ybSRTGVmU7dCWSanNc9YpcpD8EPoIpjmGgDtIkoMtJrmeKF+wuLS5oBmF/+5V9+651PCVn1rOHFAc/lxFNLyGHfHP+fgOz/f9+glGiYPgwGeOsQcJovQI1fKMNTqMbx4cCJRi2gYXANlWhDVRBz4ZsL94WhnBZBsF8o+lABlRzhtMpEHTYLh/uRId54fD+dJJglTlS4yQSXEanvh0Y+/zOfVRcTXsS03Lj56vqzu72joXsPHt+6Op1SXIYpBLLhmzI45Lh44eKlxZORPdkdZ+ZmOO88fLT6bG31Zz7zzqUrl7Y2V7mnz83P4HwfP17n0kCZYWGT3ABQQsA1Xx0aqmU3C3cMJtB8wioQsJ2FO93kYlMaHP0X/8X//j/7z/5TZwPPwZSr9is+Z65avXfvXrAm/NuOB2HPDYHE7IjmEQYHWe03d+07jzMbGkNiITnq14uNr3/9651W5zvf+y6Tm8MlrCKMC7h+CNPgvPhPXQbqjp9tvL2Bdt0J6mFdElny2mzP3M8u6D26yjBy2uysz9hs5CdEqaAiGgfGN/nA+9GJG5EeDIGkQ4to/oj/CTnM9OTskLVgJNSrWKSk3EtKRSMKQQvnqF9DMndQLWWfpGFsV5BGlEMLKSdUbrj4qHFt6b3vs5+5AXkGY4+MsghGmdhQlnBRIE/HSLEqzrXD7VKE6HzA5wj65tgbpu0w56eMIHpLYlIi5o7uaS4Cekd4zoC84TM7hIVHZrFO4WXHJTXAnD7IC/gZKlsTC5PkR19iTbDu8U7o1CoQ5bW3FAzc8KvgG9RxSHZwCxORJeEXMjYajLxNkh3GhbXLWE6Httlmo+7a2ocrm8HgHx6jcPJm2o0oPHQyKBecpCPSXb1akAnNUpbkxuN8MgiTL/MZ+VWOUuoLi8Y3jS4Eh5HO0YvDn4ZvWwPMBX8kjtv4HWPDiAG9uIC+CyS45EcLP5HkB8UzhAiB0YPpPG7YyxcWoSQxFs+ePVGUa31zbWd/VxCJ/hjuT+YW0pkZblQaUQIHO9npF3PFhYnZ6nj5qDNQOojfsJRLeJHxUmG3tVkTRqsGZR2eDcc3fpShAhkpzCzOqDN3+2Fza6+/2zva2D1g1wJhfA9nZqcD9Bi9STHSMLaPQIUxk6MCZcg3xtlfDhMRsRHuyQomCRtTR2RSZzQKgMb4BGgDoRFqjdVV2TeCDYMWHDqoAZBv7u7luZPyhB4abalTdCKN1jTMcO/uQw4561ub9o+zPsJGIgTfBu87qLfr7HSba+tgoDE7feHi1ZuvvlWfmkElQNdv/MZv/KN/9I/Y3muKhqRdsEEoHLzvi0BE7B4A7p32zzrH91Ye3br2KpdJ9pjrV6493l6R8DJXn26r5NJVOfYABmUDgJ7sJGStSo39dY4cNpmU7SnUrOfpuYjwAzZ+1ZITtvva8KSgowMPWpKxXMgVVOU+1bF+LBFrVgYz+tcMcuTq7Cd4yngpwbTRgza+gB9tgraFzmBYLvZf+qVfunLthmeRRkhNXlc7QEsMZvFDDNLSzIBqXXk8VsBn/A8uCqb4T1/p1Ae2Sq/zayiNQg8U+nzuiET8QJspMAsDHKxw4ubNPTqnpTwfQo+NH9UkPXgxTOIwG1UABjBB0SK/EIyTwrHROlm+Qr8aZwL0RO5C2nkgF656mLtzFaFEJW9ubgN+TMLU9Fxrt/OjDz5+980/h/5nKwObQYDq4UjkcfXKRdKReEpBTosLERKHpXCm33jjtQ/OBjJ2vr54gXOA1GM4yhh5pOKDbEJKEVNjQ/UJeOyUTfTd5Yv5WXwhayIdQaY5xRxbB3vv7/2//z+//bWv/uzHH3+gB6/rdNs2iqgdEBjhh5FcKCYXq8+9bEg22/TGEFp2dnb1/6lPvcus9Yd/8keWwtEeKbMZV0JzxO4+OjY/i5zNe3vQGLjGA94U43qxT27G+ieVfXbfkxpjwn2+3CTNnKCsQTz7AiyyO06pxkYLLwecxF2cSOA3PEdwERbJ8cW8pkMMl9K2SI8HEgAnikW6Ci19ZINitFamSD5bKBpAq6YccpDi6KAZzxVB4/wsCgrCQA0QaPApBDMDdIz95qJICnsQUjAmu0EMJiA5Xm8tiThQCaULDtyjQVyifkuYbsKOE+7vsUJICD8IpIn9GgULkdl/IgEvPd/ICANDrAgzd6igqbCDYkkKTxrjom8kes2FupFyU/MIQcg0lfqhWcrQcT5Y8PgeoV8ZgXZcQvAfI28F856BslmG1MczJU5yKKDcjwMgF3nooz00VihCe2bJnY1TsdgZnniiGjwFLhXyHH77DfhI8uajPmvQgGeBgaEc5DwHx446e3Fm0XbSg97/uSMeu5rwlE+LrL25GE+6/fzDVLPn6FTsF0k0lHrJDZrHl0YOOeg/OFDSok8hCeh3g3HfoG3npRKARA2LPIc5cGh5/sJhZ9Dhoz+9tDAz3xAir6ZGp5sXehCJoBl3x5qddr00Vp+ZikwN1iLSi+QK5Rr/fublYqE+OyjMLe384P37e72jVq+vnJ9k2JyokbrIoCgZfL5I6QEfdyy4/Wdyi1yNYgDg5wWDN1QdO5MW3Nk2Czchm8BwKfAZB4blJwowkgVIaZzSEACJb3/3ux98+OHF5SVKG6SFnwvTFoq0t3+wvrZCA2IHi2X5D5RROGRvbDPsBeEaR6t8wXPcvPXqu5/97JVXXpmam6/WyJ1h7X/15is3r12XL4d8D0fw7EmMc6y9IdlFlcQdG1YOUQGP1lcWLlw6PD8Tdi27+V67KXOhZOrU6Ynz7dNJ7O23kQFiU7at9he1MKPV9TX3vRGrAY+gT7HGo6M7mzsWwbtAF4kT8wFtacN2ZYn0Q3FEKjUp+h+666UlLn/hzqq9HjyrE6P1pxXzOp++uwPWve70JLzdmPD+7X/73/7a134emSSTZO29SwyWxp5yf2dnL9jiuMKErB/NXl5/6s+4nyia1vH1eeN4OHvEHeM3zuwnn95iTTpBkCCSpGHAjiRqGkJDrcZCBKE4x5FQC8EiYlCv5cYnqjVBk11u5aRAgcOZmFIJr0gHHqGKtwbe8O4or9zvtnmNilW7f+9hv3o0MTXX3Hl85+49LCHMw+qcw3DLtzM+zhGT8zPpavdAbGfkMMxXGatmb9+9Q0d37frl2fm52x99DPXUo9JW7SCwM6BIdDEwtEMaGY+cPlAdXIIXpChDK5l5x3ERBKtu2gthNtrYtt/+7d9+7dWowfLBBx+oxyhpr72oT07Y2KXlJS/hHBdrCqHQpVix0VEj4bcS4MmbSY2DdBkS1crasxVuH8TSUrlkXrrCFZIULHhAgweM2GV9/Ol6iWsyEuZdIOnlzWz/Xn667zttiTYQvc+sk1jyBHwIX9bxy0fiRfHS2JhAwjRyYRaMNEkyDZGQGIKsKcaEmaMYIsTY9FQdFmNa4XCHcZDwRY1J8IL/oBvVEjpF4wjdxCC8C5plJPqHsmPwwyxNMPixglNMZXE/LIthNg9/QC+iggypCCcEsn0mcqUsGNNUcEiGik4EXSOLuSW/HqOSyIfoCjrHHgfFHatOzHAyjg5iYc3fPwMHVXnUI7jvIFe4MkufqGSJ+2NABYxsCSB8rp5JisHZ2xH/0itigTF9o0xvmukkHIuC6UMjIdPQwaZ3IipBJ7wdTATWjUj1QZR6CYUQSkQjek7YQjacPStDRvEpNxCMA5h++J0feJbieG5uHl6wYqZMkYk9cPa8MAijSQC/xIrG7NMFfAO+E5r2mYGTFdO5n/ypVfBHiV9ztLAYF6/MYKIz0dAJAbW23Utr9FOVKtsxxQUNA38hc9S5eu/GZn4COMCoSOflhcWaeE+G6MGRdEY0gmqU4xRkn99rtQ28fkY8HeG0IiyJVkLZeDoiscL5nNQWw4uXbq41z/qrW/ISq8TAfQKWQCqkyaWfKaEOJMVqRZKqNpqKjyrkL6CPM1Ozs/NWwIqF0cXgkpyttUMVHGVk2M1jD/j0bW3tcItKXPhz+u1BCHZ9a+23//7f+9Vf+SUZ8OhGeQDbI5G6y5cuf/TxB7vbW2srK5AH2yFsznYNXKUt4ZOA1Xjj1Tduvfr662+9JfWX6uJoGMOrMiJq/Xrpn/n6158+eeSpiEQPPXQQAJddY30jWqG7tNeypxz0u0831y/MLZ4dNBdmF3ab+wJzOicdikEnjFCLHs/NLdg424c4qTKF6phyhlwQhuwnJhC7DM1h9pU9tkf+hDIofdwkVNl3ykDIyHa7A3W6AypgUrZ6dMumB5uSYms8SwjzitjrMEkHxPluHuCKiYKd/8/8mT/zV/7KX+Er6MHxBMkWVf9gGwLwHNFUb5S6vJMy1lMPusou/fzUXy/upv+mZj4SVxanNa4MenVoc7M7WoDkQKOONIeupCHMmvmO9+I+gBr5Dio0JpOZiRMu9zxlQVi3GTP0WCxip2BtLbGhOF87FQACL0KLFPGRWE91q2Kr3b/+ys3WHl9fUn1xdXX36bPV+QZZLcKYrKczJVQ68gdOTkxONA+3I7RWjPjCwpJkEw+fPC5XCn5lKEQAGvVpTp7CSAQjQj2JalhniGQEO5uZAOO0pqqbpix7HIcbiG98NI9cWRGTcmkvk8He3sHf+W//23/1N3+DAMoLyV6L/u52O/606DA5vZQVh3kTjx6cK6ZwEEl5jqv8GnO5JytPLMj09KRR4XLsrOUFcsCDxGjAUHiMJ8MsFtrlu44Ahy+uuAVBWzPGsWR7sIKW1dJq4742EFza2ORf5yb8n37KfgUV1HQ0aSFBIRnoQXwDPzKsh17MmAJxMfGny6melEmgSJSmbqG/E5/gJEtfEhIVokENhibpNiSaIHEOYVA+NAY1jnUJDA8zE2Zos46JOKGliWWNZpQiLAsYNCMIMhkuGI5BuFSE/I0SBRsUlkffcQRhonLeDd9FaLZJAVCxNLKTEubRp9gC7heAjsuZjD080mEFSJkwpmvyV1i5KKC4GBqOdMn0ShIghbMw+VEFCDCPSqVBhJYvHFMCDCKQKALAnFVLnXFv4Fhw55hQQKTSUvo5wEBD0qO9CXAbZUqPcH3Sqx6KtZlcrw3PEpZw5agkIaPMmj+cq9XBzYj0rGBIyg12YCP+8he+bNBPn0bFtsdP5J17rGQtvMzqnxEq1D2Um94U/jKWB+8ow8uhYCmLD6SYYeS+DpBEL61A4ZyjhOPnJ16U/U47Rjk8PFGcvHLl0sLSIn7L8caDM2YgFQ8fPIjFHo8zbLpwIKOwOwSyowN61rAc9q51Ls9ems41OGI0FVI8OuZNKyRO1kSCq5ih8WK52Tnotg9uvHpZFkNhcrvbXZEr7T4bHhgdF2PFuXzp8s1cdT734e3D+w/4qJ2pp4CHZ7k7V6lrqE8kppRS4KV92OzuW9hSoQJ9J+qb8XkjdMlwEaByqBAtqEsu2gJn9dxI66D39OmK40JqtKvAl77HrjlBtnNrde29H34fyzX8tZ9f+NyimYoCdjihpK8OfuFv/1//VhQ67g8wanhSghdZinbr4qUrooC/8KWvsPvXGpNgWzAtxmR9Z4t5zwpLjs6Q87f/9v8FmovzC+1SMwQjEwnm4Fq1drF1tBCyOaFzTzZXli9d6h8fzfK3zpeOuocbm+uDkUORaCwQ3Ahpa4lE0PSTJ0/QBhvBXytRgkKr0wZmIbRLTJ5MO6NYB1WqI7VSgcQukYEhQTpgyWm1BNrb64zgCZmy47dvfxSYOmU5gXC1Sa5oASKehe/diXOUjOLwrzatTu9Xf/XP7+3te5BixdjoDh0T6m6heKgsDITadTH/WDH8qPMRavmwOkOb3hW9B1II1c6f+nT8/RK/uQJtZO2hRUc/Yq4hFD14kMaHOkTTbGxaGqoDia9Ekq1AHEwZ0s5VgQq6BWlx78Uv8ncvjeVqxepYuagXZZLHObJKjRYZC+ACQ4y3OpXGd8bflXrzSGQ0F/BR7uwnR/tQXbM59OTx+uSrS5gopg1OGzAkDHdy2C2XpqDM0XFxm5yYT+pTkwoLsM0+fPC429xu7Uta2V2cv8Jd8NmTld5pG4AEE2qy1JZ4u8gfEbwNbOmMuIwj8goi1Zink0P16ozHhYDYGjKKdM/f+MYPX3/99ddefwVhQ+TEFLe6ElocNVv71qEHy8g8mMxgVsxqwJB6Xt/cOFl9glwd7Kv3OMaW+WR1RQUTqdyAQbFShSxVmXm2umZ/wQMoioOXcQ3QHMJtR+OfHUnbaRvoRjQLvKnlobAYSrmgc3HnNDJ6uaQpi/9QNCaLnHB4ga6MbP3OPmHDL5YA4LHoR5K0sWGqSTRJlTP/KYUPsNhJ7zybmaOuiT4dCQrZAJdgrUd3dnczSPLSdIUtBFqjrKM5I4WHs3Vo80IqSgsSIhQU4eGYpAsjgqREdqTIcsR/lD3JK0KNSPyRR4d2ESaONPVYITXpT9EWxptM2wnkdSgRYwATNCpDNkPIEPoUQqB9pcGij3PuIlsQkmTKrFHhzY7pRpNEr+ZU/AscHLWQkJzE1ozFOaBA4vFlAton6Kc3FCtEaCDxp/Mg8yDWSF27sfCQsV/2IsaTzp2NkcLZG8GBZm6Kjbdu6AT3IQuOlkUU3Emkmo34JBqAKHtjB2OfnRahxIyugccsxshoZWKaYf21t96+futVcIbzFWZIr0U/9+zJUyk7vYs7F29FjS0ef+G9zQ0yGuM4RYw3GkmYW86YEgk759bFdLrENcl1KuXd/Z1HK4//3K//Kmjek1TqcLDV2h6v5bHVm63QZTMUD1fGhwu5m9ffau01nz58etTG642XxgrXL10pdnI3b17rFDsctAVydYfbBycd8z0vcJYJthQnIPG14LdCfobIsvqM4qEpoRG7dqE0gR3ljdtD0FHUQmX54vXzXL1N4qxOK+tELNvc3gJ+qnx1BsfYxZnFq/Up2v5QgAw6h72WqqwnUHCrqcmR9HGObi5fE8WJbAOZUpEa5HB758CvcilRoOJTLGMUgM2PkjZCw3N6YiJvvfr64tw88cKG1qoNoTOTU1M/9/Nfu3jl8u3bt62G+2CD0/zU7Jxw0ZmZ2XKlhkqBgX3FpLqyWUaIlSzWmDvALZWUGJ1f/5f/wn/1X/2f7bUzpNqTzVIDpCuhd7FKd6AGMj8AgttJfkRUx4/ufHRt+TIvRPjr9/7ZP+W/c/nm5ZWdNSwkmQ84bW1sCxiwIE6W8B/6PynqJDeCUHbFfh7KQT5HsuFbb9PnZ2aF4knxRz4TNINsLCzMWR/GKiI7M55RXbhwkfnK1O7ductff2Z2igL//qOH83OL5C3yaDAuQuuPyOWhlcLaHTTbckh++tOf/r3f+70vfulnr16/aYWxFs6+LyzUqHWGL2A146RAgzsw03B/oAkA4VwFfx86CpxeuEDESQvClBEtJDE4wDiDwV0kYhOWgiBoQ8NiYP2sfNvB3gFbBKc61hSYnU4ID220yJbpy7hxXqo2d5qMdjCSqt8IFi1QdDsWYQyU//dXN25VJy9cu9hDGMZGqvRFZaqBqsyWkQI7Oa0IueFLxQaJEuvWeGhdpXLPFWpyvk/NLj5rNr/z3Q/+7Fe+2G2KkWhxouOPNDVVpoEUKnLl0tLK3oO9w67ilNi0N99993vf/COY+fLSldn67N7m/lZ1Z+HCFR31m022dUc1EeNTzoVRynVn68a169u7W4Mu340Z6JlyWp4wWj5WrRMQdyrn8qCQF68S/ARrA7z13/3D38Vsvf7W27Lx7O1tV7pFkaL7Bzst+d0PD5fGFxl9wSoBkJLcabWzpTmRnTTqJwtXApFxAuIGub670233L1++dOnadVp6PIulI0QEGQI9Cb/7b2Lk479xQRnZ/eefmtlai25zEzWIRkkEji8vOnGKfM+eRYFoeHIOWCHSciMGTrt8QcIBcQNAGdEi0Wmv/J60QphvKJWReXDM5UT2IbHZsjIIkGF6CY7JZddN2JVeSIgi/SZOAIkCZGSkAMG4Q7rxCSgBm+8YjYivCcAUTEyoFbWSVHIRVoX6iMGtGph4Wf+drpcnysXJemkKUR1XxCwYAXDJRIW3ilJ7vFhl6R4rqrnHGxEwC6B4Tq64VjteyaBFjoEPST5kLPYyZm4Sh+XBDwPco96J7NewSdRI8idHry58rmhAWNlxtVJUOMw4NdYdZ9Jlg5v7HVYNawWI0zpYv/iCcQDosUZDQ7RVkqS9++67y0sXqsWKOC8e1yXie1QcQJAZMEefPd3gWxA5GvxfqA1dt9xep8fTUoiGsBV+UDBySLbRbQTGXr526Y3XbvHQUyJPndOnT570B0db+y2U9yRtFyoKCrGUClKUOP4dRTiEMYvkj5Oc0hJy1rLhjp98UMgkZkUeEK7qMJTEZ91+B1Ovk1qjbkHUDr966dr6yPr20x3pWtjVri1feefV1y9euHh+yC04oskhiO6RZAdRqRknQSd/dtjLj50wEI4N9aUuOuq1+PiO55R5rs5MTJ2N5XZ4c++3eeaMV4sT83NbLYX4JhqTVRG7jx5toAGl2gQXj/BJOUX2iCylYrmCBQHYnqTnOjrpdKP485BisDIEyZdbr8+RMwQFESZE1ck02myqecoo3S1VgE8k3+MOwARL3+Cc//X/1X/yM5//3JUr1xiuSZA4AhloQDfAt+IXr16Zv7AcBkjIzjCFDItnTgEx29ZfkEMkVmeEir3l9cFIoHZW5yQqM0nz9tWf//m/9bf+FoU5R+FgmEalgOtQR4ANEwm9huS4FWA9zgTbOWqvbK6dT0WGpK985Sv/wzf+B6G+Uapqunw2uvDxhx/1Tjv4WpKfkdhHQDJZbwBBICpVArHYT+AK+IEZ3IyftLRWWOyUdSLIPKsGSEDhaHuq1Tqz5O7OHhJ75dKycGxCPLK9sLSEOuoQRUeVvSmYTF48nR5C9Vu/9VuhRTw7/9SnPoMHVQPO7niXPgFMAvznH0GU/Av65P9Bq/zw4jO7E9qvQA7pM/2a3c/QTKbJ91h0kP1qOvmxxALSUkgPYA2d85SpLmn4LWqQNRe4dYqN3Bo7/XE8QwBO9rPAjMbOX3lsojE1YQi4G3oZ4oCYRX7FWZk62UkjabJUB+BWmAQ5zl4nZRCXwirHmvmVex9vbojWHjhl6gSx9wsOZNHg0EuJfaCUhGROh6fMn7xwU4Q1pUFuenJ2vb96sN9SF5HGj7Vpd/XpoNeCRPDhkCcklaztkml3gu0jnyRboG2AY+wOqYgQOMr7jekm6eEsmXIzqp7xcgR1f+Vf+0uNGpDu2b5vfesbM/NToWoaHlIONF88nJiEYCowjkzwFsdFMgqO3jpQUxyfb293ao0Z7oWYFckSYX07vrmzixbY5ZCZPGNYLt8DGqOTkGksvZtpC4JOuPxp4V7+FOyJljEBZTSCqXnZ3nfNbGe+PlGgKs/M7/L2nwwddkMX9+DxOoGAysKFSvnEqvgP/AYPe6336xtxAlNAI1MmG0IiW4lHQoRC44YkEAiCXqEMUAmmxv8PoRogG6QLlxWcVOy2r2A03SdpgZM4wpHNFf/SdCecJ0bPSb/5kaFGeWymUXrr1lKjXpIKVqBSVeGQqJ5QyY1L68LvF4RVbHRo/3JlcMg1xFj1SKYmRKV/vniHNOpWFv9FwRGSNdYJ70zP9ujZI7kkITIHmPhCEOb0Ra3BVTStd6y/ExuXnQ13kkhZHYxg2ppYf+9MilwTzjZQAHn7gHsO7Cl4aMKOQeURCxT6C1iOVWx0emY+qkmGtzMOw4rTagEMwefd2IsoUcE1hEfJELdix5a1nTMDZrZYrV2p1WmQZucX9g84WneePFtdWV1HXOFWPLEv1RMpnMOdLIMZPhv6tPSQKSMq4HaKLly6ZD5M71LJx3EbG2MQxpJPX5ohvaFMgyKfi5xSdjYMVZubnPn0m+++8crrE5WGw0xFuL23fXTSVx9DPUxO3JGP4PTYp6LSmETF4MbPB2vrm5y3MLBK69ItUF0PhIrnSntrO7zGbwzJoTk+OTU3NFJb3dpb29wRlkbk6nESB6QUuZF4aVjot0xPrCcgtDYxD+qsiPjJxljVyGkpt2U0Gj6tTy9CWHxMucU/frq23z0kDXIpCiF7dJxpfbxYwq7+wi98XYTQ66+9xrmbESKgFzN3XLa3A4q1TkceAkeC3t6sgZPYy31+gOcj3cH/j7D/fJI1y/PDvrJZlZWV5c2t6+/t277H79idnZ1dcNYIoACCIkEJBEIhhkKv9AeIfKtQyLxQyISkN1IQCpEAsSSxwAbIZQBYM7Mzu7Pjenp62l/vypu05bJKn+95qntmCYp65k521pOPOed3ft6dJJSbgpRyTc6pOIdq6DQJ3dqEagvz85pLwYzPf/ozwhWf+8IXv/OdP/UnSUlcicqx84It2v8y2o86etjafxcL0tvrpNN/+ebNjc0nzZmp1UuLtjaZmK0923m619mXdrpyaRm41zefy1+/9cJNvFjMnx1NMF+7egUeMjAYhZYbGjucuXb9KpIXdSektTyw+n4V0iCooAQ5x72s1SJ2+dEH7xDSHkVfgXIO1psRQmlCjtyiMyk4/e3f/m0dlb7zne9gKfpWeAiEkfHoejD0Z2jirx6f8KJPvph+9d0X11bfPznjSzmd877kMyIvRzmTXipeijTQkpNG6EzB8AtWGdZbckwMG70av9A4r7zIHl0nY8ZWw8yGqOJSluuNJpMBAgiNdHe3SAHGqPniZt2D9jGn3UFfrMQF8b4XZisSpn/i6ODyzydqO7utd9/74I2Xryg60ODJ8x2UCarn8Kga9OYHz56PHLQ0G6Vksw4fP34qRVgij+yfxbUrEhe1EJpbnHvc2rU69AvYTo8MZ7F3V4lQeiBjiBaiuFA3ZevI1DDxau50Zl9KdOhUOh9UfOed+7wlfGoQTevqr3/9G7a7YR7pDTihS3UKwBUySr8/2d1IK0iribWHVwaE2CP5Mr0wt8htC25LC0sa+/a7h+1Wd29nJ/GfXFUA4bOMM6N1VCtanfyrnwGcUfoMCzWuctCifXe41yT9yVtJ3zzT14Z1kd06ZEXhqAUlfRT+DVNd7FEYpZgiDklflEhSDcDT/BSpVY58L4fF87/i4mUYifqce7dnpF9fXPQXrNxD3Fs+rbYAkptzU0kfiLvPn3GG5e1AlRIJUNPlO+0vBCQ0OBhMtfsj6qmmTydOhxpDY3Pn4wujtdkR4qrW9GkP9BhPfNIgyX66EKaeSGh5fNoeEoY8lhRa1h5uQQJjPTQSTmmTV47DKdWYOl1Ztu1mc+3SVbkPpnA5vGxCWaBtsxtT+C1NNMX/Oi0BO8FTgB13OWem80G4ohOUOGNsXDDG65kQUMIBhEAZYBOYgWzALkWPUhHNgEs1wzlbXbnsP/hFlii9QLJRnE/eiWFhj4AWfGiLY3Mra/XZxam9ljpTHTH18253+8N8Q0PnghO2JqnckiiQ2h6PYqfLE2JS+rIJ3TUmGo8ePJbYyW5DD5B444lks4XpiboISvt0AKH3tndeufYSc21xcd7G26+88rLJcBdQb+7d+whT0EerplynMT47PAcNjFk3ZxkTvEToFxM/PD23a0L6wo4q8ua0POfF3dpvaXy+vr13/bbakfOV1au16eNne72xyeadSzdUqu22Dxq9tioQU6Dk8hgyoxU71cebmyWdCUrOLUw2pmYlXjIr6o2FS5dumhqJjps353WZuK68t9s7WN98qKqKWSC2x9P1t/+tv/Vbv/VbMqFYbABlX6IGX3h9XEJqAF5wmOdOZ256t80+MIg0UoJCMBWGFo/T3kHraDNmq3V55523Nzc39HL4L/7z/5xl8zvf+q1XXnxleXFJAdYf/P4/mxd24iY/tdFNWkMBD8bjRrteoRR7hE7Qa2ep1WMPHj9YWVxZu3rllVdeeuv9n+71j7Z2nh/JVZmfk0mBkL1LZPHp08dR6Y96CyWgxTl+6ZIam1l7bnH0WZGpBv9JjCrTAQoOQGwRfko6d5KjT8NALFcl1kxz1jVusbi2oZJtCH9kbBseOQ1j/crS4if4xjd/k6uAJvfDH/5QeIN9BsEgInSFwz6N7YJXVEIoKmk4mM8YUB9LJl8uTn58JheU4xfXf/xndb5cf8EYiR8kRlyNzMz6FUFZ8eoyn57gMGzjMTyDdz18yNk4GK1dYpaYDP+8Bnp7O0mhnByeFmtBzAIkkiPpk+gFcWvd5jn0bmojEwzvHhk/ppALrBLPo/MLN67f7O89eee9D7759c8ppuJjoAzBCFopa3VicWFpged5V8/iYRg0OvbqK6+9/eOfaGaBHfFJG95Ba5eKcOXKlU2baalWLochea1ZOwP4vld2Egq1KFDITBPfF5FTyTNIfDRoW9r4mRjV6J/8k3/yP/rbf5PD74MP3v/mX/vmpz79+jvv/fzgvQ+SBjeus5RVGW1OzjRq00VWyaSNylztcoBL246AwTA9zees/9oc15q93whxGbveFXFlcI5PIG4ovleH73765IzvIJLQQEmzdP6TI4RtQUp9GYTjDKUCq3ugMEaj0LmMFy4BvOLQKzE6sPB4Smumbx19V3UwZL9eXDUgMwafOYwiOQUZWHXC0B2JsTEJTDdHgSKcgDqET6Jl8RaSqm6p7pOKXQxKFVyRTkk4w8gID5xMtIksk1rCdZgtSgYjGqemivxKY7ZWb65OTK2O1hfHJhaHxmaGbIY+3OQD1LpHdF0uXzqheBsFiu6U5IMiyNNTMN0EzDGWFrwjXeVayzrJdiI1Kv+1m3dKdOoM6hMtZVpUHDudH3Kf8lxHMkU+F0tS8vSRRPnInmrKmV75M/Av4TsF+NVDrAXjtfhDQAFlxzRl4iAYwtNWx8UELARv5MlaT36XFj7YIv4SPW5MK3R0GPDiw7YyARuW17HOqUTOlOZTNYEUqc0v3nmZ9fnZzz768O5H6nXYHMwTPD3Omqy8hkYT9sVdXl1xJyyQlXv7zh1Nw6SEowQCRmxDXJMtTJMbXlhT6K60165i5vnFX/n8yuKSHS8JDFDjOMMH63ON6wvTa1cuzczbfXZ8cXUREZomq1SuVO9g52B34+yoO+jr0iRlcNLejjqS61fU2Wl/9y++LXRDhj1dPxiMTe91nxydspZmV9fmpbFo+bg01bw+rRQsSQFBZsYWnYK4nmrMjkx1+Dod2rfbfvfkWIv3qdlLclUJnum5peasc/pnnl27OaEKeH39ox/94Hsbm9svvPjS//w/+J+98tId2/6CqoPAAVgBCT00PBPyhO0SyI2pSvNgtReCyjYZ0rIFiuyi6RYxaNhC9qvdee211+68cFtdqgTlF67ffPXV12gSWNrrr3/um3/td+/du8d5CJ+ckRWRbKfi6R1HGOkWH/SAvN75wf0PL1+9gghm5mefPH9meywuPuKKj1UmLlZF8AgA6jjOXLt8LXsyETk8eEDz/rs/J8bwFP2XJ1STNZq8m9icGmJo6STGZyJ2jJQpCbM0EmROPXr4+C9/8OFXvvR5KSWMLYfkEfOyFYlXyHFHtt/61m996UtfUvDlLQ8ePAL0r371qxUHgFmIyvWFjfyCZVWw9VkI5EJcVSed8cXtFz+Vaz65vnxJsUZ1a678+DfXQwOjsi5mhL6sr+eYS+Ez1fPycH8iMmOjn0FsmJLnFFZWPcwaewfVbXdzS25LgusarMTTw1ikJ5Y0Asb8aGo5jhpSReCE/tTHKdORAmp/Gu6a0dqLr7769g+2Hzx+qvghIXPqcFg+k3Ri69ne2MyaKv85+YTHo31R3PPxK2tXH0x/NGvj1uV5NXP7ne7W1sZLL90RRKRGiJ5+AhzwNFQAt2SG4ZOEZvORo4CAN1fwBwQu5WpSviN5fEX/F4j607d//pnPfGppdZU9t7y8+uF7D/7iuz9c39ww/iRrUr2VNGiPwunAC1VKO9hZTkJdeuzhoTJQht8UOci4mK43bt+4ZRje8gtxVcG0GrRB+DnD+u9Ag+CBX6tPE3BUf1YiCj+tzlygkfLhlNZipVY/kg7TTKaXM3Fe5R826WnMJWQVLRh/drZAooyA8kFeEQA55//GFYg6ks7CUknYNIhW8vwgMSaH++acb9hyQqzYdGSsu2X9klcUHvCVaJZfcqOIVNL+JLtIZZR92lBiqk2RmEc2Vm+OjM+MjM9pGzQ6ph3DlM7ddva1+ZHMQbcnA9BnOHsaC2bF2TyJWjGwON8kJRZaMTXTIH3sxcaYm7DjMJPF7Uk75CsAB3Aj73jJqu+4ZdXoxQUOsqQCrF+9KzAo6OUPD4ABnuGkPwMfcTVLaYHKNU4DQABZtv32VuhIQsehGtFvm8ARgRM5Be4yKNbwJ0pAc3bBsirjJOi0VjXLqSaHnG4ynekZDTWmBFEU873yxutPNUnF2ra2UGx2TLDP1sKyyPMXPvf5z33h8wWYIcW5hXl4E996KfugzX3ti1/VxNfC6zvX1IdXXPr4+OnDR2/+5C1tXm0tef3qDQKu7NE8SrwBhSYTwKm8HJ+NqzW2oO2sLP/JOYfrwe7bb/6wJ4DT6lxdm2MNo9yOviAj9RfuvHL3wbOfv/fwmUZPvP7DtbmFS8QOu4HzDSw0UQUoXHVlZU0mSr+n58iI+uC5weD51vbu/ke2rTo8fK5eUhCCrfT6q6/1ttqCDRQzmSbcnnBzv6XvauOrX/3aN7/5TXWUVy+vNaezIlKJKz6b75ThVqvUyaXEhAXW2enuPn5saexehj9q7SErr9PuffjhR9g9/vKlL2ZXXBQh0X9xYSbq2uDs3/+7f4/hsbV1wNtrr83l1Sv/6//N/+E//A//w539N6FCtw/rI7g7hz36j5JnxaE0KLxPY5SEZSemHj1f39o/uHJj7dbtOzvtLRtiD457+nhQbaHP+saW+AGAaH6qecHNG0sIV0tlPWoNldFjTenUfpcprfjXBK0RVuiwuLKTMfqFhaV0xDg+FcpqHbQlaBSdvcXk4utbu3KN0ILG9G7nv/zlL0dW2ZhydJSo+8EPfsAUYGwRhBA4GF4ERmE/xXUCmuXwk8M1n/xZfbm4C96Xn1zjfPVZ/VTdUc7kp4vLyjVQy4wMzCxycXmiP2EvCqsudiPyiwA6PydujZzc8hMScAZhUxWsVHKwhoYFdC+tFG0SD6Qsxr1EldOkyK5XMeNYUbBLRNNAwimTNxBiL3lSI6tr1342Or7fOfrg7qOVOWqurIeu8WiUwQQT1x0+1w6O/DuUWxitPgF1NXwnM3PzGkXsvfXW4ycP7360LLXRMjGbrFHYSwFBeUsqE4go4/fF3B0mAmtiNYoz93r+BJZq7sCCV4jLajv5Z9/5rizB5vT8THPm7Z/+nAy+ffNFuGCSguJaMKm2vvveXcsKDlxAHiJSU156/vpnPre9q6ohIcw3Xv80zeaoe/zk8bp3uf6/Q1wBrBFU4wjr/3jZqi+m5AK/OkDWQQ5bFX8yZvEvYbRqkVzpYB9EQCQDGRDTHLDU2J9QAS0fL0QSJCJNwm7Z0kIOXuRvgHO7GVWoRcvwRdwsuPJLh8JL4tAzCB3OHzYEdh1ZlwhWEVFh4BFU5JJPm3dAQ6KF9MSNtVFiHvnTf8kutdoYzsC+6MKkkihqejXJ0pTStWiDtBpBNVyXeCM/nyPtbEib69zrX2RlRlx83iytjCBPJrSyaaRvKniKFe08hbp0Oo3P84j/N/YflHdVMDv9WtL8mK4VxyZW5LkUGa5tBzzxq6MAJ5LHd8+B3xVUEh1zU8ApdphaYIISopTB5UpeJy+TKOpcHuSDzVUZqBGcGQzguNAkMo3MbaR/mFYgXJe5J7A9nZySV1mTooDhshknGlNpSLgzsKXW8solLVhk0ywvLfECXbt8DZPl3ZabimtGMPJY590Rir7Qp7RTQmzaJTqp1u386HxTtOT5s/t33zeFl16+c+3qDcEPJJAb3EYts9BnycWKO1T1Oj1zWFqWvXHNDC+bUqPEF6BWjQnTY9RNSFa0/87x5asvvPT6r2y1fnjwfHO3fdKyY5la1KHu9MkEUdbphWVYME+VW6fko33WO9jXwl7x1el+p4Mn2e/KaJeXLksJuX7t5tzC7MMH9w0LyLA0KXUsLaZPp7X16Tdeeu3lm4tLC0LJU/VhaYqKawQ/sC35MpaPh40OjjGJfleSyXzxAkREj0ko9ySW90svvnjj+nUYhINwmfq0XeSVtUvHQsFF8ceMNjclXga3jd/dN1+4feuFV+8/2bLDHn43Pt4UW5TkrxOwoaLasbMky261uUhtNHy++fY7isAYaK986gt//J1/OXaspEY2wOibb72Dl83NLRK9xIk2UOwq3+nOkjJUKHNYcxtA47VLV7bViGmfenqKCRK9MNYJsopkwr7JG3j1zjvv7G1vX1FiduuGMBixpzxAKZsW+3RzNhtXx+de+9Tf+3t/n+gSDJOy+Oabb4p4/cZv/IbnsDUr5gWGDq+g/lP1CkL94iOojpVVeFZO+/MTeslPhXCqG6qfyskLgqqu/ORxfkJiaApfDn0V6woOVA9xcQRVIcbqjMg0toErOmm0uSWSRoiIBo1rnO1v75xc5xXH0Uasx8RYjZqfQGxCTxIfwmatqZxDCmXR58PBUqBF8iT3fery1Zt76x89eb65tnqb57TX35fGJkldPia9cm62Pl2vbe90mUPULB4OFtLG08e2iLtx+wZn5EcPPvrZ2z+xnWw1R2MzTp+G6oxPMsySwT0AN2vTMVj1xYsry9KzZMcgN5cVxpogDpajfFGeoyH+09//Z//u3/l3cBp7L0gseumFl/CKuLVH7MHdVwJByWAp4CQmDFtk21lE2D45NYv40ruTlk9eHOvTffDw/iOD8fYLa64acVmt6uuFuIoCUw5n/dena0zJCyKpRDSqo+wW4bw3u8ZzY/GUixGeXIDiEOMYK8Im3FEHPAzSQ8ODXWnaWCQBk/pcQievyiPCdYvN4U78v+TjxFz3A9UBY86GI6RuEUkQFtMvzJSyEvQhwFhdcEIeBA1S0YT4lolErBepl1d7VoJYsfBgVExBrGpcYL42N6taNFYq1kCjREWxjDwgelL2B4wBZ309LUIzT8SIYaYXhjIc1qd4I+GzQRexgJXqPREZBg6sX/8CVQEjf6fUxHNSWmuEmXzkOVBQQgRm1I1NZKhxJwJzsQstuZvS7oKo8X+CzRVREJiYBB3drDzF6xKbDpM/S7mQtxgg+EuWi5cWSGQGiXWk/iMrReyCeC7M0GMeB3jxKBoUK2vcBVIzAimpDHKHuz1J6pIsNApjGdy5c/valeuKeT0tlIDZw0sN7DkICzVq7ewtWcnE2OLCJ1axe17J3kGHRXX/o7sCd7xJr732hqIHA4cYvPkwjS7jmbQd9EXAxvEphyKbiNMrk6HPVWdG2fF9SpXF2c7+weFgZHZx9Wzk8OnG9tT9J/3j0YnphenF2fbp8xOq4djkyXBNRbCtnScbc8wsu/O0nm+cHD8DJPhgNWDG2to10fD2wYHer+B4sN99p/2eVyfhjet4Vm5pdhagK1s1DOL2jZtLCwspXxwM2zkzVYiANzKkEEUEDgm7Ui6oEl0TIGZEQKHc0sKioBG7RgI3Gi4myjJYwWGrrAYGZaM9KIDadHQVhUaN+tko3gJ5O99rfrH3/TenZhau37jDMptktmJAvY76hY6t1JRKHeyhSzlCHNhDk00Zp1oUzi5febi+p+ftytpNNebtfuv5s3X8cXFBk/VZPtDQx/m5Dl+bz7etmHR2HG1hPgnrOFpJVV+H+ZRizD3s5+REDpHv2XrqSK/LNktR2PyFl166tLr2wF7px0ezc83Pfvaz2N+PfvwmMCI3MzVl/BFb9MUbBf8Ak84OefwJARDI0Wkq3CVdAvV//+EWF3isT9+rL//9t/zyr9XtKMWNlAmQrMQVDCw/eWAIBpX6KTzs/JyA5031Z6Vm8o0jHpRDAjlpFjLOu702Qg510LFLnmFTSuuQjFNdQkd51wUIv/v9Px/XcKVsr0wdcoRghkcZ6Ndu3Hn+5G6rI6taxgxz59AOz2EaWjZvr9+59MLcNC3kIL3W0mb3dH527vGj+7sH+y+MvTCvvnV9Yndrq721K0RkPA4TBM9MocDK2pmv5fBSvxon3JtqTlNWKKa0B01TnHSXK90IMlUyg2R+dvZffv9H/9F/9L+698GHLGO+FmPmpJbw75nNxqygFJ5dXlTp6FxQQu1nOwftpeXLFtdPlTnIIv/a177O8RBAGYofUIgjNIAXVFu+Vqvr2R+vsWkEWNkEAXvx6KQmw1HhYgO1POH75fBAD43NMXTet7tF2G7kYhFQzocVmpg3FHEY6If1ID4BIAwWOyzsslwWf6Yj4cgghWhjcWH5kukyce3aAJBRWnjafBaWzUDAq3Le68gDd9JS7ZnD94O5Q6tMmKPZMwtCUTZjVOCD+tBMnK8sza0uSorWGG68rvgwxRWKhwZ1zS/U42oJ6F1BNdYxV5o1jrQza3wfMWGYhHTZrFHrP+dlYbiGtOV4w/WYmyEeW84cd50PFWVymHgFkuFR8e2UdxbZH2h4WJh19ZSgSOSmIQR9/S9GqMeEjBJJizx1EARxI0SwmhlZGe6W0kV/qNSONWsZ6AsmwuFJaOVRHlnWD9llNKblySBIYqUcgCggubIIzHO7LvFAS1FL40UxvuXVVe3Af/LTH7/88quf++xn2gftnb1ddEJrieO/YOHEUJo3Qiu83sOjhJ7YeNLeEHwCI3vb+08fP7YxYBS6Zv0b3/h6uJNUzyMuu7JZIl3V1MvQzEeCL7zCu8EzQvHsZKI+ddzuTExNPXj3g/299pXVFQqd1qozi6vkjpQQttVEY+7GnaXVK9feffDBZkc/QSVWZ8O1oYmGLuzshBEhGOONOEkmZ/oI2y9V2G5v9yDCANrbzQt91sbn5vhXZtQpU3Tig866gfCpcqKb11auXlmwSFwysT7kCqNsUun0WP8O0QA05TSJzhbBFzwNqVNK6CbieTON2ds3Joh4L8IgIGwI7vh0Shx1adW9OrbNzi08ffZcXEfAiatQNSjrR9ZzozlP0RqfmP7c579CzZL9HO14YnS/qxOgIZ4nC3F/L9oJbLZ15KNnr3y69vDe/bfff/C3X/3U17/x27/3e//wowcfaK64uLSmoMJOXbduX7GB+4/+8gcszbXLlz1QhlvqzZvzlN979x6gLCf1WZpfmGOH6aGFLYh6MIt9McG7d+9vb2zcfOFF4k3qBByanp1VLf7eux8wxxniDuqOtAvuRK0jX3jhRRKOiuMMlUVhMQZllX3CKLLfE+CJz1i2CIkCWQSSCyp2QUErxOWvUJkjdBrFNFf+a4eTcDPo7afqmvA8pJKt2uIAwCR9lp4yaTdeHhtWVh5fWF5hatgrv6hboo+lBy4PXd5maVMvfDYMAiWPcndOdwx73iHOwYjkJjDkheBVC6kPDdsViF/Rlh70QurFVK3pHXt7+3PNKZv0Tk8vqKEfn6Ar7EvoJzOEA0R/J9q9/e31JWky/b2jY4g1iYLn5psmwkG/u7cN3+yLLa+XfQylTMSWdeHYGAlGhxsntnrsJC2Bxx4bMTAzopEwtn79138DHHjpMU9k4hpzSVOYwBV7GNKD46dvvfOXf/njb379VxdmFzRuX4nRPGh3uvxtdsxCVkCaVSlCiwQPw5IUIoaXzbF2AVmra16u1lF7bnb+yqXLhoT7VLwyN1p1R25DsYVRhvUH3hkFWDhQLlRAP9WRulBSi5mFRKvDndaclp/Zh0Ej1DwwEamCMQRNhERVJBQzp7wwrkn5adzo/lsNw6fwIdjlERgU9puL8zSesXKXxyvdC9ONrUIgpBUTR47E+lgb3u9TtxPLIO3ShlG2FeCU84PmXY68SIxyHFPgeznVpWBkMDk3MbQIQs2Zptj6lNRogaaEFSW+2+zK/2X7JR2D/CLj0iQuDyxvM0dNnwDC6Lyb/CE7bFONKxgexmXcTEETTKKgS+IndXvGGvqI4l3wWvzFVR6b2cZaJSX1a6BEc1gWVh3h5WvoLxpdtexuiKQGqYtPb4osBxpGFkop6rHis4g1YjeXwgG7o6hc9xRRW+gTN2CBnufHli0YFQ8mL7olMHLud5ZQsMJNpIUYHWKO2/P0KMnip7//z//gX/7RvxLCAZ/UaY9kP9a11VU6IxpmOqCQRTvzDA3t73fm55u9DtXMhsIdihgGxHONnomrKWVMHLRKWwGEfsYA5ElzCk83Gd2C00rPwugdEM8CcMhJhaZ7u3ZbTE4Rd5on8Udtbu3MLV+5dPVqqzu63zmTInjQO2Fv3RqrX789MtNc0MTJxcYgi5FWBCWUK+WvHjUuADPhzfV1xElxvqFlFAY8Oa62gVGlcZQt6kUZudk4SRvTE1fWLq+tNtNDx0oZFjIqJIIq6FgkCvuPdlSvNSaWG/AYKNDXwaCzON1kype3SyrpQBX6kvwFnIFvZGhEAypRsQ7vyX6r/b2/+MkHH917+513pSfLGKR1oXlhD3Lz5s3bUmAopxgZ35F/4ovrey2prosLszduNuXnQxPPFGL8xoiemcOH3R69UH9gcHj1lc+SebXpUb0JZGNM1Oz4kO2D+TMXllY31rfHrtRuXLsloCPmJYJFY9B5lr6C0VhETI1ktpWG4L+D8eSM/NDL17TbX5EQiKEvLS/aNfTd937OZSSpXaqlJk+mIEJJPv35n//5tWs3PE1aB+H02c9+HkooS4R1CKwCl+9wGNwqZ2B13k+fHND1k+++VBf865d9co2fqjv+W9fgclbEi7DpaBxl3xCE6fllLBcPKHflja53pV+5ojhicOLkOqEF7QmSgiR3WsaSqo6k5LnME33BmkLLAitHqciCIZi7CnAML++J8R0VU+0EJJqZXWo0F7q9k3anrywU6dIk/QqV6EWa21xanrm8MvPwiSzKPg5JBhFc+xoutttEFzJEJlgUqDKYjBbdGQOuaKam5hPY0anvviBiG1S6wGXVNpum6adqsgUOhR9F4qvdPpYX86d/8p1f+cznrCDziCKVuA1/iIwMbaj4usLoQAVjAkKA8pmwDq+QwzSctrrHPPj9/u72thddWFe+eZ93u8inI08oR3WPdaoWLKaVug2cwBGn4C8iJW5wMS6b++LF8kg9ceIZK+CO2HOuzNEyWz+Ha6r3Vg6oCC3pZ4SUL45I25heCcWZm4FZtup8+Q4FIgi8jL0Q3dav6tew1PidYmUUo83PNpZWD3B02KW/+N02JAScFpKKXDWb8FIpfPqhjOhvujgzsdzEUGemlRtkLxGC4WRoYNsVzJyT0a7V52olcOpCLKbl7wio6CYjNcUDkMkWU4F8jfpM0ksDnLTXsBUCMcAp/oAUgVKkZDQYbFx6ZFOkRADor2JVWoh8r+DpZ2/XUxBK50I/ZYFxuoAW1udiN8fqin+SFZntkg0qEoM08e7ziE5erVEqtmKh01Sa+b/BR3bCGvK4/OUVFgnNGIzYHqMtRxAm9BTXII9grEaLlECfPoo2FB8ewY+UW3/nz76nwlRGny6uS/MLQmXbm1s/G/n5xkYSMVbXLhNIr8pqu3NH2Jan6Ob1m5Dq/oO7ypBFv5xfzH6A9q/vTNgWLG0AQZlWFEsLxphrhnyW4Jw4H8Bkx5Za7aTb29veOukP7KXNuPCodisdnoSsFX9wBUNxRC9cMz45HQofnWrMwzL+3gbPYS35f/WJfjyxaR4Y/LOtFTaCjqIRfOrVVxM1aiYN16Ih8gKPMww6vV8MSe3X+Jh88QSaZ4Y2nrVpSqRT+eflyoTZmcIttoG21MKBkskRXoghwqg0w6W+8PjLALRgysSRqzPeL7FKm17lyErd3nvn3bsPHv75D370+NlzdW/n3X6Iy4MI79HazOLSk+db2OOVK8ciRhoM0CHaO61Ll6+LkG3t7GMfbG++y7XVS9LuD9t2JBheUpOgnolO3Zj+/Kc+YxvIVn///qP73MuEzT/+x/+oe9C7c+cVIuqop0x4nSxngpa5j9qmqDnX9BPWZikFby5fvkRHMSh/OhCIPAuSnuHlV6N641Ov//THP9JwRGgTx3y+uYXHSXDXEBQjZ4opuGEHfPDBW9QO8gwSI5+KY/gEeQSI22cJCtcJEMtREZHz1ZePTxcG5ey/dv6TC3zxq4/q+dXDyh0hcEc1gOoJxuOCcmUuzI1lGM5AS/LYBfIeS44CXdPOCdSuZLhhSlCR7SKjR8iXzwYHj6Zo19KSS4XHkvEmp32COXqpvhWZLadWmEhmzaidbs4rfNTuq7lkf+3AwcOzY93YyGG/tTJ2Zo/i06N9fonGzAqESpD14cOt7Q07CiT3NjvCHNLHV9YuSftEIN4g/dZ+V2gJf9O+ZPp0Bu3Yz9Svc4tqAWekKd67f9+7sAMDi4atT7/+HcWBY7kTNFE4dHryox/9+I/+6I9eunOb/rH+fBNzYNJh5NKDmf6uj758AbfyzRXFWUr5gP+mHJYDmhwfJDTGVF3t3U5mNcrhpMPQw/HKlygUrChto0rzN18cyDU3RnmgMkY0eEjht/6bw0hSooOlFiEsKlUeXC7Lg3+BPZ5TiTT9QEtrBFJH9XjIO0kG1jdylFqa4ac1hM3d83zdFsZskIDF4qH+dE0U/qgpebgJDJ3Ek+VddGSpTVqN8Vth9IJeQIDp2JHTp8MWeRMjZ/VZS5kWq7oCNlxEVkUtIprDy22uZl4BdPxw5COAU57Jv0IYYDE+mSqfpAvY8fp4guEx1tQ8hUYcyenSSJtA2XMMMI2RSqOOCgKJaiVcZIjZRdeFPqNwgbOniluQG8VKTYwvs44Fy5Zie+WiWFwRiiyP4kxIaC2DPcneEcVrZuBAc0rBQy90F31quC91nMrgIvUJp/yvDBYThRYZh6iaVG5YAJJZWu/zf5vvjstisM9UhxqDTyFRvJXIQWbTM02IrsX4hx/e7Rzs6+sgko+///n3/yJPKAlgX/ziF5n8YGhHJO4OQ//N3/zNNz79Kd0N7XFhGy9hMFXAVFHzkgILbnH9Fb3VdFN/WwJvxkd3kqKwsf740UO90fpLs4trS2t2NrRIVW7CVsqyW9vt053WUO/ENnU6aQ2OJFqyls+GhQBYypqVsbobIw3vobosDiX8wCIlmpPxfcZ065ObjiyKwwJp6ZHdV22bYsMewXMawUB2JA2019l/9aUbHhtZBd8oXvYoEXTWCB8Iqzq5s2zvUsg+TgVUVuGGMwjOy6EUFFUExyJJ0uX2llwGtpQk9WfrW4ZtoSWmji9d4ohTiAmT8QuUc/PmrZfuvCiTWM0evmm9FCZwKsK0hZkFzUileNhmmgo3daK/TlNzBI4B8KDM91pAczyhfefYxBc++4WioAyk6slFvP/gHj/f49ce0S3UY3kbScN5BgKMUX0zWGB7+7ukkfZqBqNCDlaYr+8+mbnsE+5Bkuy73/2uovgrV9Z4mUzn3sNHvjgAQcADNLSW//znP88bJiGQUUvR8SLwMR2P4kEBH/hYmEJo3NSCn1mYHP6EvdXJ6k/fq6P8/q9/+BF653y+VZ/VMwt7dMbYvM6vcAAPNIzy+8X1ubMcBoYczFGmSTYxKv3kUBUvFKqh2AhX2UVAbw/m+fLUhOormx7ZVVxwUAEQiMX6KSkWjBBlLVMoTm9icQUusQluoLPTSepivXOw0+0c1i4tJl9Ij/DwTNtt13btftne1tGiPmG/zfbW9nF9ehFTw36oDgxFY3OlkTGn4upYXBQgrIBjUibhJxNEsLQ9g3GXP9WDu4vMM0EXO+NPKyKYgPAhbVYnIma0VON1mMhrq8KuNEmuLP1cTCq97VFXiKg8JUuZFzI7AjvobH1dUwHZr/5kAvopq14dXuNv767+dGmeFfM3fDn/KU/XD0hcrfRMystcbFWxVIOEJv4szAwLzXnMOUmb2G8WleZP4484RemuQ/DhsH4s5rzAhFlJcjeShGaSdA8Rh0/G6CMy2cw2yZgBR9FLc2e67xpXEgeoleDrV9+xYiPKe7wzUD3GjxAJgz7FytnPIz/ggSMj9vc8BA73Dp8dz9BzbDs7ZT91bkDqrRanyAyc2V/nduwkqjItLkFWCP2CgCwTjf8sL0JChIcEGxOoSyEg7oBGA2bBD4nQ56MJC0enLxsdBaQ8QgRICVPFBWet8vdAfNWdBlZhQ+aS3AGSLhAuYspfJEzO+aSD+W8srUiXck1wbsA+5AEFJBxxRDbOwM4gR6M2Le7vIajzQd8LbGc8fNrks9Q9SJ5BzCXZH16Z5xgZGjnnqLHe1T7o7APr2IG+h5zRcmB76oo2t9Z1iuPzEecQ2eE0s0w7O7vLGpmVODzaUjtE99BEjnKNRbc7GrDtvvTqKxD9w3sfUakkC/z83Xf+4T/MptqvvfLKnZdfvHbj8sLCXFYife4M4ZRblh2lwgNtGDHwE8neBVa+7+3u9Nv6Vu+tzC75c+v5pnVm6yDOk+3j7b3tk+GGogVtvvY6G7XG4fhMg4FDlAIebKHmiDuBmIfCvQqdgr7MlvRTHFWVHF1XqlEpBImuhtOMCgkIBYsipM8v7cAFuhbMLwoiUOp04NaYMft/0FJq4/JlaqYc2oGapzZEp7DiNsF87kRsTlDKA70jawD5Cn3pw0TZUnsFk2HF2vWXbrz4GWYiYUZQSTc3R9sJJ9gw3VheWAwtm5Ucde+TOHqKbI/27n5A2b+ytnLj2pqswo0NFlLnYG9reI77SEumc5NsMDJ7Q4ftzkl3ZHlpbl9LLRCcYCdNvXD75hd+5fO2jjXZjc3nT58+IcAgA5XJFs/8fj/8yx8QVwQYHmePTbOQc0/M4IYug+heJ6vw1Vdf5Q6ljvzK5z5PXBk/mfTp/QNw0IlLvOrrX/+6uUMnjBJtciqCRthLsXsAKtDDOH7pAMzq1wCtwDYEGQr5K4eTjr9y6hd/OF+JN0tefQmH8nvmSE8qK+/PIADtpCxiuSWPqJ5bvc/tGD3ML7f4MOKsL37BwxGV5nxwoCZ1e6uhL9HaSiHpUHd5SJA6hHwutjdHJLR3t9s2iJmf463VMjeNaRQX9o70IJUaRPrQV+M1idmGOIalwh+gwsP91aXm7duXnm8fbe7Bms5I7Yw+gzkDqWQ3xh6zFdqYHQ8tUMM9zAe2GLFPU3DG2kGqDKPdJq7SDnEo+5FSTQ2DXHENDSk8P5aurvm4jb63bjnSouX9jz60r82Vq2tIK4hMEadFJxnMawPWsGMSy5EO9MlRAihsHhRcAvFEYwgd5yuHUwSywyXVJ1j7rVoeo/clzyqHSZuJr07msvzLpeAU+eDUhaj0h5eFr1eGh0u4cd34Cfp4QvhPeVRuNezhUSGJjCnae2RncblyB3L7nCD2+MVsr1fsLe/H2vXFIkzdbpUcTubFFwPJ2nt+kQpBOOchECeOPyQNeEvRkcEQ9hCW6YTVqHMP2dtPdqAUuSICw7UcOJYhKpWAHGSVfYK8N2lpDi/yJpzJdZBMkvDgXHh9bGA3wkFE1NmxqiZSXyR+VH2yl/Eg40qMX45VeB8RpWkPtpByueyDZYBVUpA9wDw2ok1KgkQPHi2QI6G9tIhlw/ITdMEWnSqpFpYlVlir23JtuC1LibIu4eTYu7p7u89Gh3i0YxboWxyvIB/bxNBx3K4SLm2hiyQ8jc+TSB3mgBLhY4sR+pK5MZdE9U9PPvzonrbK2AoBYFQcaEHxbvfps/U7t29qLqfb0K0bN2R1EwnSwHZ3tr785S9+8NGHspmfayW7vfXW2z/D2micNttQBkG8KTT74Y9/9Kff+fbly2vdw5ZtBcL+lhZLXYHS4Mva9SuM5f2T1ISoJPqQo8DFCFZbRJurj0zONqaJnRR+jadDtsS2+q6ufdu1Wcl1tnjXn2nv2h2bOIOJbAo55XQP4GOgB4ioASJDrUCD9WLVEjK0P54AYDyu1po5jlwxCLISW2FlUZT44uI8tmAseDHUUi/BgZPOyYKRNDa0SkeuNUtnTLr2pHutHfsJZR0fdlHAVHN8dkHLohmP2T9oc9/xDaxdX7v9UoMHqdVqI4PGpP9NrayuJcCqOKmW/FIDBRBuFI6EVKwjBI1ZKeG6wMYvciwuptDLnoV6bbAv11bmetM1pZL2PUBdGi26aG97vavnwuC4fXb0ZPvp2o1rniNEL52MfBIsfvnlF2Wi3X7hhuWWpxKGUOJyLC09O/7s29/5p//s94klaKxLxfr6M74gNGIVgJFPT6q6Y3lp5d/8H/6NuBCbDRyToFrfSs+LJ4+fQiFCmgqPJ2KRX/nVr4OD20m1drdjmlmSwqB8qUie2lCd8cml5LM6ApNy5ccn/v/81/VY2l+9K9TtNtOhEgnxRDEyEzGki9TWPNM1uaxwTn+6wCwWF+acRNpmJ6hA0dZPpGb9Y0nhQye61ttJdf/gYOnyWnR/WqxQoU2r0KPkNeHm4CBVNZkJUUFiPOETyuYY4j0NRJQHcpkWLh2Kh7Yc/QkVa9A+OFxaWsw2EWdbL77+qcfrrfuPNnCpvb3dMtgMTECzkqmki8OYCyNKfbrhuIzQM3jizXwpH1WCvowYJ8kCZhlZImuG6miNqst8L+tC6oxhFN/97nfkxy4uLpAkWKZu4Il9lBTBMjPnikXkxxiscV9xFXoCGPvNqDz2r4grVzhA2St9MUooSNA5yoszMTPxZzUUVxp9tTZo3DWg6qQjTjh/+CgrGK9UeXhOFcnH5nNZ9diKG/jFGeRNbHgFAUBuRg7G5+RF0S+7+mDRbyEI94SMoCJs3AXROUuweOfJsyrSVamV5uItPCoYgYOSqQkIVp3ND7VSTxeLSlzRdsrcz0bxtNmlqdm5iano3BRkr4mYMgdysBg+DDXP0PeudCXkiTbqqh4ghtFAmqAUjOQnAvBJ/ey4cz48qaWTCh7aj/yyPi5ni4FjTdi2nqw/x30kTxNXRqvXA4zkDDEpzjHaRzIMGXVRvQBPCsMxY4+Yhr3AWywfpMme05tFe/gLosmvQFqimP3jbpSCo/M+JVne6vbGUWd79PxgfLgraqYTtHdNTui1ezKYHJLxHdlqEx84X54KVLa5sWI+tdt98mxdtc3WdvaVePZsnWiSuw35pE1j20YZGz+Kx9DtOy88e/b8o9YBjkO3evnOi3OzM1yCstKTR6f3En3sww+ePH363nvvLF9axWOJNCWoCFm4Qmtdt8NqHrAP7n6ATuJIGx2l6AmG+WKGsHtxfunmtevXr167c+v22uplibjv/+xtneUnRuqS4BsTMiCmt7bX3/zxT6YXG4TiQ/vQ958NRhYYCvvtvVVbJJw3oizEXpFMkx4Otq2hDnV7LXRADQhmxtstlYOMYL1CQ0tgK0XJyqHkQhHZ4ohEvLSyxMV32Dvtalqv68lkDbliEBBzioNGVMEjSvQC5zUpOIL1CGbDdd+JEy+3ZVpThuTe3nsfPAK9jq0jh85tCHk+rlrhRJMCHaGaCzNQZSJGJ4yQ/UybCfiRrVYcFH+NZydgDuwlG3XrJz2jbdnfpTs47ui5etxXK1yb0sF5vHEyFXNNhFBFAl392c7GUW+fv+moP/Lh+vOHTx8SQp/7zMvCB73epf7hyVtvvfn9H/x5q72vTxX2jX7VTck4kX75O//G7ygZ/rVf+zXqiNgV6DGeNP1jcmEgQlMkqwQ/2ROkkYxINoG9Qc3dQtuWyTXsRHd9+8++y1cvSQQz/c1vaVslp781cVpVPoQdhV8VvhHGEh0ttU1uzHGhsoYBxffwV4/c+P9TgLk63K88++LOPKRcj2niM+ZbMUkT4QMovM0D/9uvMUK4YZXDJ9N5Mht28w5pJU2X4X2OwR2v7TiiFjd65/5dxZ44R2p6HRAOhdggptsX5BfFWZyfe+2VV5UZulvPSKa2HZ/rtfPtZx/Iyjs5uYkn4r55Udojye/gPerVZ+am6mMirZfWls9G693D7Aot1hixlOB+aYtua+YgeXIuPMp8QbJins7708Xm7ozvtAcOTDFpGqQ4loNh4Xj33fcNF/f3Hb3QlEkatyj1e7b+7O6Du8K9c825NHRLUjQIRyN0JGmLbmztsr+NuUfrQt+YqnMxXIrrS3mGwRAHocpPDpf6y81gbeYIyHcHMiBmFTPy41f2kedllZJaxMGWpAs3xh5EH1QBnNsAnDDJsqR58C8ZUp5JfEGMmDVxahGm1lUOZcwVf0ctZFK5zNkkE2e9dSDSuy3Ao2FQm0dHsv+KGJOVzqYsRhsYVHG8MqmsfZlTYAfmNgQyUDB1SwyociBq42H1LM7WlxZkWfCt6MbICo2h40YXJMmPRM1O4oeCx0yxEYWpMRbhdq6wQFTTslcWVDEeqV/sibbiFk1XT84mD88lVYmamPBo+/h4c2sL05d3xkYBPauC9YExuvU0LQw8HAdBIQ5TUKmgKqDyfPrTkbFHk/S/hCJdj0gcnuaw2GqiDzo7Fjg2lRDTQWv4tDc7OTQ3NbQ4fTbXGFV9oUf+2URnMNk9q7dHJmfqdsoYaYyeNfTsoHFwI1tHEHv/7r0HT56qjDFm2gGWLcWSpKZES2CVEgbvsnD29qMTci6y4PqdldWVdqf1X/83f/js8eMvfP7z1y6vSUx6tv5cXc6Lr7zMxCGE5Ctvb6yTJQ4boQL49HR99ubVueV5i37n1Vuoi06nXoQSpz4JNMCKVtVttR89fPbWT982P7HG2elZ297P1odnpxs3rtw+2O+/+MKLn/30pzc+3Hr49v1rt69de+EWK+D51jq+v7D6YrfPFbaxyMNL2dWWYyoYjTLpd7AXV4KZtD2Tio6UbMRUax71aJrpFmPNMG71wiicfvzw4T02Tb3std3r2Na1j+iNs9INWZk8Y5iXYQdKI0PKNM2OIoVavJPpwECkl3BkojaYEIO77Nu0uLyaLbPrTUE8PSms7Oz8wsrqNZzCWtuHh4tPfbbHwlfwZCXL2aBxn2B4jA7dL8dsaUPC8hIdv3hTH14jYZiNI1w0iJTcCykn/Pd46GD7pDmlpfdJr3tAQ7x2aWlzf/973/nT3/vHz9a3N1ysk8hP3vzR8+dP1KK+9torL7/2itZzX/7KF6VRyOt79vQxjqZbxWc/+2n7jW1ub9+/++Hbb78tbf61N97ATX/6bJ3w5riQgHNpZfn3fu8/kxlY6LFGGSKgyACtBXd39l7/1KfbHTVaWx+9/0G6eOBiaWkWUzfWS3RO7IXfJYyicLoQezkC5nLST4VMckOO6s9Pvnx8fThZeUKqQyrLGGl9LOvyWIiBKkV0aSeWAOTHxqZIVi9OUJhak9qp8Bp/eVnIkDIkpZi4GomuaaGFD0VziRN3aWAxwbXSiF5MCyG37ESUn4pt564y/iGpNayyBIknpqh5ZXODc7U7i7Nzde6As7EPh8ZkW8QRyEWvWdpRX84fTddcLZBusuyx+Zn6z9/64eT0mnbol5YWbSCCV5gRzPE6BGVZQQbwzdGrMX+o60z1p/l+8h3S4gD6mNKqhUWhNGDiPz9/9z1esfRbixzAYBGIFsrQWEfgIR1JWNhogbpHOySCiI1wYfd4dHKn+NA9iURAjiYTzwD3BjlQ+JtGMC2PLf6pc62ydWqwrY5OHnr+xJTL/UGJiBxS65C/gH0q1JMCG79Eu407JDpuUjAzJYHEMEyXsVfZyyUBLr3jMhrXVIe3ejRVGTRgmqc5U3CFXHOdcERebeWDcUmbQ0oEYNR9jbA42GGEi+TysaUi4xJzPQYqIM7ABCvsyRLng23nYmYyKiEYwAkr9w5ZRX4c7sJ92BTkiZl+2peSNLk621xV+jhrR7kp+4agdtspcXFxzuGJCu6khJ7bT+G4I8Meohpi3CAmHS0cqArqZh0AtJEWpr092ROn51P7HRG28fbhWYuhcz6mS+ijZ5sSurR0M8ICgcw4gCh/eqLppLlGaZoJgUiyMDhdN37piLS0EgXFM6tyBKaBfhyq7V4rRqFFVzk/OtKcHDucsqXp2cLUgpCPtnHDh60RLsARmQb7g+Oxvc7Do8HY8WDi8HRivze20z7Z7Z30Ts7W7Zxrp4G2xq9Ho0fZEI/7zjpFk5Hnk1zbrL7iYX0lVFRr8Lq0ool7nwuaf09k680f/XBrY+3Kjet42aNnT6XNXr16ebY5xYNOWx9baF65snznxRviLlQDHcenl6SoSG3ofepzn8LCfvyTn1KXyGxZZFLL/uzPvje7fHluZloUemGmyQepOwT7Z2uvA2yPttZ5FN5++OG9rcf2oJFmvXPa23H/0Jg+1pL5ajNXehIjTo+fP1lHeeSLaihW3RFy1+jyMJuXh/Ofl3KLwdhRD+yjVuNHliKlEMKTivfEgOunj+7d/eDdn2k9KqsC5O3SpCOozHgkitFYX7W+2x/dI5OMs8IV6+OJUQqLY8RJeEsp5qtOZqwutzOzK8uXrqibvXqDuNKUCw+H0uQTK5zg3NmzuUxnf/99WEyCBik0VhKjhs/jwyf9Fk2dKCSotC4cH5J5Iot/rlkbajYmUlNOF9chbGi8c9zX0TEO4xP7oQzZsVXJjr7dzKwYXEf1k07r0frTnfZBT1t9ltzw+Re/+rVu50CGJNtf2dnXv/qrX/rSr7CG5U38+fe+zU1ccta77N1f/41vfjmdt6IWU3Z/9IMfWjI5aTD/Vf0TX7z5v/vf/xAa0Ejee/99hcOXLi3RIXa3n9oA5/hUdc6J/Sbu3f3wg3ffu7RyCVu1NujfxMwUoQCbwWM+0ZSLYeQkHRcwCinQfSNQLIHzGI7zgXNILL8HaMAWKop0IAtpp3EQ5ef8EqGVh4Xf+IfvdNuiO35BRCiUJ6zB5UzxjnchLhD8jxonPDrGTcoVPzs/Q6WD0t3jE1W74eNCBUSCQR7365QJjaSPj/l77VWWKNHMjGR9mEYe4NUcSY3Jur3N9FM+7J289aOf4SZ+wg2uXl7R8/3q2uV7H3y/rwarPr+9dTAxPiXtF/lIe9jf21+0+zg/+dCZhv8yM8xjZnLa3pvv/vy9119/FQbCedPHJ+2pdvvWnadPnnPzkl6QHFR9QeDCkyWyNWNpLOXeztafPH0WDW5klI4C9xx2urKFq0iBPmRkEFQGDG5vO1hgvOrT33zrzTsvvchxXm+Ot9pd/W52dvf/m//qX2gdSTlDLy+9+AppwtGiSfz165e0ThPRNTxUKztbYSTQFv5ellDoWJaoT+uJx+NC8f3Jt8XpUWrZSJucwD1xQ0se5eFCj6fIp0+BRaVBeKgv5F1iARk0OZ91j+CLZELPwSFChFQtZ5x3bzyVWW/CtUhK3/PEoGGkGNEh94DBi0OUTL7ktPAFV2kLCWG7KhiJffuUPMa/ZQ6i8WI8Ks8GopRk1f5BNxZJblOwczJuk4dxWwcN1dMhQUzPlufctMASMRjjL/VrsdxLDELJS294gGAkrCVD3YtIiookCHcuGekV4p9+YXfZtXYw1KGR0Z5OegPp1t3+oHt03j0b2++f8ox17NYVwnBlpuqLo8w63gMY2Zc9/7G1FMCqHa6uD70l8GcAFfkhD0vjQMbVZ/kyMj8/E0JOonkZ0PBgSnEb1mGljwQrTmwdWFrJHyc3UHhDC6KBGPikcIYU3L6W+l2ZRZHAmPDoxPhszF1cG1VHM7D/ZpAhG956r3Yv2RhMsBbZG1F3bP8kWc38PV35tutb60J/ssLmuRAlGQ7OVBB/6Quff/H2jbv37+lvbFcLParsz/J84zn3DqPk5Tu3qC/Q0FPISO2ddHxQjRTJYc0ZyJPnJMPQNFtw1r5bs2ziGh/vCOZ4JIekJyu7K8/94ZMn+/YQGqnvtbX9P9HJh6m6d7A7N792KsU7+J5WEZxUyqwAdaoRJ29VHShBz3K4wOrw7wWZhbIQf2FbSZ+37/jR4eTcnI2xQIP3Aj5j2Q7ISJ47PJzF6wny/ALMUjHtOZYJ2pFvnAS+LC3OEYBUJramc66UCopi7z1IeIC91evv3Lv3iG9tf691fHbM1qEUq9OCicgePrClQGDt0pKt2Gy9dPf5Azvav/bqC7WRlw5bY62tDduT8pbOzE+fjzEQe0pFccCxkYlHHz06sD1YO+nVwycq4lVwnPeeHcrKF4ywPyeDcXpu1kapH937UIN5DllmJZH5nW//6Y0b14jd50+fIApaO6RERqamAlpk3mSVglGxOW/txfOzn7712U+98aVf+cL/7f/+f1WtKmUBkLReVZXf6eyq9rlyedXuGcx3SQTqwGwysjA78zf/5r+NIMMp1PiXFGoU4TvkBlUT/+QozAShhJ8YiT9DXSGcsJ3qe34KDYXowjLyLfIK5fpCZ3ZBTmaxPeSTu8r15S43whCEgbgENTyuaIkJW4ag4zoOUhFURfx5UFqnlqfRdjhmo/rbBUr2jk32rl6/xhSSTWpbTgG/Vc2gu9qtDa8/3+bQ1v6Pg4GWxuZef/x8f2aGVtXaWf/Mp1+6vLz6s590GaZf+Nyt/VZdf1OYDzHV9h5tPG95/si0JhcP7u8vLGmVW+f9wLuNkBFM9ZH+AOfpVTDNwVriL/ApWazi5OblS/ke11fFc9hJEqwe3n8AD/k1CC33RqA7GPQly6BKDQMBs5YgJPYhMHn91g35sfXmpA5rf/DP/+sf/eVP+KopHPutez/+0ZusLPzXMhDbn/rUp+hAV65dXVqJ36h32I0uFkuFHUYlMUCCKeIKWPGfUyLKxkGMKt99Oo+Hp+5HcrgYoNAMEpZnXJAmTVKzzLGQsoyR5MbOmqve4C+SK+eCAJCafpgIW8GVnKn4fsyuzLnoNr7BB8Ijkao8DSlyRHG5xyriUvArueElcIOEMgA9AbWw4O0loBycYAUH+TqIzngjg5bkpWeGmhI/LO9KgTqFmoeU8or7OA/rTMM4fZiNEWadT+wrExcKi87jCsp7ZqbpX6aVFvmk7piwvwRUuzSQ3IcpILU/+kja1R6RdTSsVPzJGxSoCU0U0vGS8iWf+LhFgiU+sfVApIDVIDzfwPxp4UJQ5Xa6hTGXYSf6X333iXe4MiEwmijXFjN0INgw3hUmtU2tLA4wGBvo1UgImYE2U2jT9aNDR5SAyXF1rKzbIXs1ck1GJ49j2rOkBhiLDVDEYyDkwMI5rCK8gDByHLrz7S6XHCe7UPL2bkcr1077e9/5nliTvUolRMi99PLGRI37juONE3/jyfrNF194cO/R/UcPjex3f+u3keXP3/rZ8+frujYszC9Rw/trl8kM8kWvCP8shKATMzTwOYuVl2YuA85V/jucYpCNN1MTORU7RtuK+dH+IPksuDwRCAP54jAUVEUqyPPWHJ3YoNrwkaB54LXNlStN1lFWAT7qGWP6sa2DlnpbrK5euWpG1FKOFBwII4hTBSuj9LkdNlbCiZoF87zOkJrTaV9UDcaf8cbEYZDFpflxF2MlurALYFzUL3d7VSDWSLxloi7DkHaf6gIml7Z76JzKBQ817/zeD7/XPdiZn68f91s/e+uH/xmP9GF7ZrLWam1znNcms3B77T0PxHFvXn+hd9DTLFP4iqxCQUZmb07NJu4+fqDWefHS5frCYvv9D2UHJCRoAxjOz/qo3nRW+97992S3FkTgx+GqRWCwlx91am5m0QQxR0Sg2TE3jS1OfuM3fuPb3/72D/7i+/zx/dMe9w355zaBOjtzM6D1Rum120SOhSa6PvrwvadPHghTImM8Sk4SzoPl5JXhA4rWkXeUVZ+gEcmRahA8B+vOqlUno2mFA6EwlxUiCj+q/s4XTv5yJaKKYl1oK1e63haq7BCcLwkQHOQaYiWtgUlNrIc75MbyItwPXlkjOoplMrRyvrigomSWfGn6+8kJIaTgb+3aFZY0ccWFi/nIQyE/bNXaPTxcXZXlPqkTdv+483zjySkN5MyW4rNLK4s2EX66/rQ5zZU9Kp+l3V4S7+TwYx4ZL3QyZmS4tLpE2QI2/2CK+CU+ADPVQhi8pWH9FGmU9HFyCEcP+9a8La650Lh5wV6T0wFOuFYF1c0X7rz44suLi6tMAMjJwHrps6+8/947/PM8Eyr5zdcRIU6ej3Pl9bd2dt96+x3qjpxrSq1N8n70kx/vHXSSL9ucaM4SwMfIjZkDAtD+O3/67T/5oz9Wkfn3//7f/9a3vvX+ux8YXlqQ4cooJPw+qWeVqEqiRbJr/aecMUVLajVD1hh2Ufl9sVQYe7WnV54CK8RtypGlw8ZKqhW5kiWHSwVFwliwddgRg8hfrjWOqDTZDpMXOIhnUTk5RxKQxCgJyyK6xJwECRzw0eEqTwXS/OUP/pT0cPI6iBnCAXkDYdRybMT715jDcDEFD1X+j/Fkk48RgZwJm/DywhNaJCK3ot9LFWC4dEIl+GJCDZi0kSvASmIerPXuIncjDAmx4LeRacqXjns+oYIcU0Iz/QZJP0MzIFTISxS2m0qrimxiLsEbQhZgq43qvdVEAuJy+JMGEXgVwogwL0fAVQjLjQFKCSW6mMULdJF6kJQkJTmzZIKYyoxOVcY3apzj9D+W4ondkWkAElAKOJ06NsTTSTlCBJiu55yogp+eDglZn5yoQcrC52NlYwzww0KaArSxJXx0tIV5So1eLvC41zpgYmxv7sCgKGXZqFgccWBIJnjz+i1yQtXkH/7BP1ezq3WsNvL//J/9AX+cmYlUSsQg0jjFKCq85/HRhD+CcJyxRpQQgVyoo8Pky8YKt/JxHYOfiZH74vz2fJ6cbJwejxshVYGgguPAD7l56gAMiPA36KTPHrmTUoLxERzWZaAQkTPUZ+QpR1Y3wckOJxA/AKtJWl2Zs/uV6UvUbkzNSM6CtSentrmP6uTBMUnxznJQjYrcsiVE4sTkGYKz5/ezzR3mHebOg6S7Wbk2axyBhU8mCh29ADqQeAYwSfWOpjWHx2lg4UXbz59vb3HmPVSFRU9Xab1/3NMMd6o+frC3aY/sk8MlJeMWin159P7h5t6mAb6VdvKn0xPNucYMzZ/SUdqPjfNektpUVtCrNefW1i4fiocc9vVS6tRHl+anF5eaPNYMIWtBZcmubCNaSeXhmI7AXhWLtcPT0sIc48DJr3/967SQ/+T/8//GsbUNYVXqHSukIWAIo6yvhCTNDLRVnG7MDjiXJuvtvd3v/PG/+t1/8280F+clZHISy4JBwkaKysI5kF7wL9iC9It88CXYiBwBEMQCx+IYD42EZCrvTjmdj1xGhAerQ3Y5EZ6Xo6rogR6IyFIiUWNIMzCPdb2nWanciL/l+upR6VBslY2hLFncwhbRZaFgunV2aKMk0WZPVJ1xQJAWL9x+0U6+mnqAg2QcRvfMQlO2gxt5FykK9ua4eS019dTAja2HFF9ZD+QiLPJF7p63IEjbW3oaoY5brl5aHpxOCX+aDXESlLZVe7drtGxfAtV3fA9ThUuKwoi3alKFr1RZ+4n4CLBNj9WMMwrWVP3GzJzpaxcTXmpTuHpjjBKkXQtDP3o2fs/jTRzYa1Dawcnz9U2bpUmfmZ203fYeJJGhwx3u0sKNtV+AAnFjQH5P9kb5OP/gH/yD3/u931N456UhyBg0I1rbKUE9k4zPCSg+IQUFkaaIGuh5WbkhsCnsNv6GIIH/RKYUeYV5AZZVykLECeRfshiycupqmfBZvhy5OTeJPOV7VKFcFKTIPwtYyYCw3AQILKmjknQRfsRR+GL0++AUMyHWAeqNV9Phe5yF/C2SwIPM0DQiCzLxUeliamtVEgKHzpWaqqUjA6F2NjtVEyDhiOeKIQqJK2k8xNX5sIiFQZovtq0kU7KynHQSC3mQj8AhppVfi2JnaCJC1onCxQXFAiPYUvdsxfDTeNByU+QJQKPw0q8iyTUGT5wFsP5femwHPsW6wpYykTLBjyFftJ7IyoRwXWg6eUSgFbKsvgKnpQr8k0MdSETRjAk6OOh2GAhTRdGnPAE+xhxgqnA2wkLKhj4xfCoiw4BVAjhQuT08xm8kQ1uzRk4s74ylXZABmTIbogcIGef8IL0uxqeY3OQW8XN6vEyxe+3VMb2o+90OpERjk9aqaMeYudZE6cs+PLa1sfX00RNLL8VZThSb47h/srezb9noE2S8qjjxRmvMx2kFWIJcRgYQY8dkcRAUYhFwFcuBvbH9NCxT229rj5m54SO+gBGPtSvIYV+BRPo6YscyT5Fu3PXkAPaAy4z12VjmF/U5eXVnGgdy9qbrU1GTsS3KLBNoYkIfKbUxqa06HnKOpzzVLXKVLecvr5oHm/vD+3eRDBsOBwcHDKIspeBJTdQBM/KrYZgLPPVJWiuRsT8AxPY001mQRKrzx+plqYU2A3NSaS1B3trZOT3pz0wza7I94ONHH2w8eyQ4Sygf9Vs3rq6qSyZBLfoM1+XRSbPW0Lu71z1pD7oS/Q5O29pHxqbpqyg9Z5maOHLmdQVWpYpS5O1WbRZkRivtkW0lY+M3vo1w/3DzqL/p5Uh75QhlX1IdzoeOdIgxhjsvvCEV/z/+f/6/DEZBhBgl1yVtR6zEM4NRp6e7+v+LwCisUGxH4Hd7aFQXjzc+88ZnLy/Cr1NNuM60UFFtCm4lEAfXo3KHq0RYREbhEPCw8JhIlAsKKqRKyBVjKIRSGJCfC0cqf8X3V5RnRB2OZLGRUFCAliTlzUsxBBpdockQS7ko1+XdpBuVOP2hFfyCG94ZW6UoghWp5pLYWHm91deHXmTCih9PpBdGe78Fi8Zmo9AQ9jpm9ftdtifLX/rEwtWbUL82XGtMr4yOUx22R/dHrfvR0Strq4t3t59rVdjpiZamvHdjc1eC1vUrlw+7G7qO4fRamugpeNBRWSjJo6Ykn1FiPxcjhvvoV+meMXAwGKFrEJmFjt0SF+LwxuamtGnbPNrI2JQWl5eqJkkffnQXAUJCjgUkbwM37Eg9PAQgQrBTHaO3tvfu3ns0O7c0fz5sE56Ddp9EISOtkQFXnI+jxlvof/7hv/ABBBwcoRFX2A7ICm9EqMlXATJ1M32R54R+YuFaQEoB9ImBhe8HIcykWsXCHKMv+NNi+uJMdWRJouSQJXikpXELNLCyPrFXQjjfgz1BJJeG33q+ozzNh+mPDqQ5qFkxs1JS4NUpSpA8QuTFbIq48jTf3ZjYDVcexjkUTbk8J+czCzybBUk30qcmYwhbz1Bx2JEhsT7mc6NsGGpbAETodVykKIx/i0Wl0CoCJs40CZeIFYtnyHlQjEHkijigMv+jPxAtHz62rlctx5Eir7jDKQWkV4R+5Ce/3Bk3UfxuERoOwzHgjLZsg2RB0tgAIytsy0XhA5VmXRaymnIlrtwY9IoumflWzyGdqAWBLfQnRzjH7ACq1Q/XElxBdMxRdtG0ElFmlhJce5IquE8yaa108hgf6Mh7bhMwu98ckVWpJcYjBHAn0oVKLxk+PG5jPb6oCUd9swBgY9ZMz0hwC2hD9cF0OGdAgByw1YhVo490+0le4A0ANdmRxo8537xxS2BNhwsbLA0YLDcRZ21pfunOS69EAZwYo3mJCZlPJpoYYY4stHlih2HlRY2lg4faopSQ4AZDI5Q/WcFH9IsFs9fWTnANbABPYVN98oSAlCrM0IHInkpqeHjBzGjWaCkhTgTs3VEfIfCAurq/t/Phez8teaZ+IRpVWSIW1U5JWVXzgBSDEoVhluVVxtTFZo02UCqHL3DhEHYU6zkrSG/I20MSkNOz0DYglKQsW12uzczPd45OtQrk2BEGd9ADFLlr5j5eKHJne+vJo8f91rZo2nio/FRd9om8kcMuYT80w6HKY51NWLC/mfrMYFx8Av1AHG7vlJG1+l0kjTKGUilBrogi6MjT10lGyoZmBdn2Uq+nSQsCiUaOxGZLEvYEoT5m7/Wd7d1NdvREo7bx7OntW7dUG3/329/+4MO3+U5b7Vppo9o1WddAG0zWSy0aJxNkP9K8cWQcK1OcgAL/8i++d+vV27NLrDTpMpQ6n5AADIuKEWZCZYHu/peEPZzLkArAXeZXn0GUQiDgXX2/oLgA/OLIBf+tAwpVpAdj3elXn76XhxVy89wYBN5QWFjeFFFEZpTXxCBGoNVa4ztRWFOEFF+0hWNU6QNij+H1p894dKnOHDmoyf62GJRkDHuYHgi87+43JtLbJa6CseG52YlBd8BxenS4y0gDawNDXC6gBsGZwcmmDV0uX3tFf/2BnbP58I/7tHSrq8UEBsPvagCud7jXG+FY5+CAIWACRutM8KMEX+NMq8Xrjisx3C9dvsJWJspcxhyk/6FB9gzsibim45QtVyhrVkFnCW7DJ8+ef/FX2GANTre5hTlZJHz1dMy27Un1DbDvq7SU4YTJjc1bJcJS3eA8r6mRUKlxH6Q3lHhziR8YnJxnp1hU2LGXU42SnXix2CYV6eI/jqxxWeZq8TBU0jhdR1MEAFPOkiJXRBH0sn4fE63bLa1/zlh7kApTyC3JvcqCQwkvCRSLRw/fs2yAFw9P2GACeln7EmfyQ4VDkVvyoggcD688lvFthWeyNGnbJkJweUjZ2VKQ2p65BnwGInTXsBcT0Gwp6pTJC1zhMhqeSHno2gRccaVUC/nMvHmIAXzIpoyXapkRYGEEVXEL4lAoNsZldEtCeQQGECgkP2tF3z4Y0O9u7rY458zSVMLe8wCMMEFan/E6RIAHDHqXOoDIXK1bri/2MnA5uLWcCbRiPgWGjnjMOLssALeJwpQRhVXy+PpSkkYOO126cndoMK8Bk3if6PspNjxRAtfMx9HTsRE7DxzJPCWph/sHrf5gtEOgaSilYceY/L0k3dYbNeKK2930x4/gZ4rbo9JijiBtXYt2LHoDTrFXXQgK6I/n56A1PVVfmtPN1hbytaNulwExGNaV7iUklNayvZ4NeoVb5anL67TmV65e5zmBwUqOzM58wYeOVabIjZyED/+gRzF+/DeKPmzyJ6AVJGG0Na3J3BxZERcHtmOp4/iWKk5nCjZGMYB/yYelMI7j1DRMa0FAe0E8B/SSLDN7WCaDTJLN9aEpXWJ1tDhB9MltGRprWaLoZ4mgBvPzqrKKSYuNYJBRWa2pWXi6GocTuCsQxx+dBD99g6e07+Oisfm38YMD+xIHNHF+lfsffaRlre3sktORIGupgk+l7+k3v/n1+x+9++DDPVzPw4bP7Ag1JDY7O99oH5xPTYw165OQRhKi6ZqalC0obzJw3tvlbiARiZioH0xo8vamBP92MicpZCxSTHOUgxZenJ7VpvXanEp2InwwF7pYaVQ6+PGbP/zo3ntQDt2ubzx/6YXb2oHfv3sP98Hprl+93Ou1q1Q0Mw3zhzvpZsCYFHPlSkrsh/jbN+apxk9+8oNbr7349W/+OugBLahaLGtazHtInxRyK4VVFPaB7HKBURWbBzZWbCtiyrI77RWIt2BFdRF6Lte7rygNYU2FmvJzCB7uYmTxj/j0z8/VP8Iq2ljx/eTxxbBnk1jIMLgglhSk6GfAU/GrDKJIPqJlb3+HhsfW5qQrDoeoYlLLvGNnZ499Hno33LGxg3aL11eaKLtUdeeDh0+FXmWSMn7CGyZqjA1XegsbzOu4FjUpECbu2KFvyObXNQpHolcn5wxmhgvO5S6mici6lNS4BSZopSBnyOmnC/UppHIrPA3ppVS0d/i5z33ud/4Hf/3+vQfkEETl7Vd+/v2/+IuHBwfIyZXQFfHwDFkUbmzKC2qUuddUdzk7/+DxEwqunrmyXqGUNqvCbKNHw5O6Pcl/7bZNgQPFc0hKDye/LUE8gVbMWJ2NQCsZUEjUdTEWSJpIkvDLIkLCkLNwH9vIkVtFLJfxZYgOo/dZ1p+qWw+UMXUstBQfBKE8BiNOBMdcilywmmHLGE68je4tR4U6Xpi4k8Jesb7y8PA9Hr/QO9szUuyiggpWuBCMgZULh4O5uNMiroI/qQ6eTIi65Bozium/MrJtCTU3S1rp268WHALH+xPk1H+CyntOxZa7rMg8/bazU6dmuBAwDsCIkzKLyDbiC/rhT/hYjEapFqnQUjUMF4jKcYkaNqug5XT8O2jt7+5vrm/zVLkUlPIJ4uXwHfY76SgnPNCrMLW4lauTEcBlyUyOEQvmeJzPIFkRXemVKMrK5xgVlFEX/dfFQ6cn40N2gTvReWNCT0MNvyfPD8fZsIP56QbGZTHOtLAfPdONmVg4klk+FncAN1J29bPVwUS6+GK+9klKHi4Ud2W9fjZF0eZMitwN0mAQdFxAzNjDF3iuejpId9p53OGxOiq7ZK0sLc6K8Q6P7DufeqZDvV7WbVXQav/Zd/+cQ12NDo3TpDSR09LCcxcXlvnlSSzXZ1LwiCwJ5KNh8HO4mIwoLNiCuCOONToAF6RwHbkC04icuCD4Rou/IyQQ8RQ0TZ63zTsTmuITzQL5HuVNMkrRKvw5ph8lthbJdWhDiMurEovrKAiy0qjovwzB0uI562gR3WKAuLyR0LuhavkXNgStKaeJysr0YenrW1n0MJ/m67F2DHEXeBoPXo/LrItR7baebu/ZSMKi07dkrPABqiddnJ9hIO3u6C+4QZuO9Xp2zK2q6FAZ08He7mxjar7R3NndsZzEuVg7cE3WpwUkivLFBaBMa2xybFqhj7a6AsiYAB4d/prhA2eyS/E8baf4S8XsdfaJIsazZyewSB7taUafPH/SeX9fsTO+gbj+9I/+crJO/Zcaw6o47x721NMYqgiC+B0dxdtBqN/vENYQBn1j5RidLEjLPTUz+53vfEdnZGKXml8xcStJV6uN2gcO6YbJx7gC8iLJQCyY8TF9FWliLdi+PrN45hMJlyMEmP/kiF7omnzmhwoTMJO4Z9jt8KL6BxTl+kK8+WYMWeugO8WzuG3LBXmF8z78op49AyxyNWonD/zY+P6uvJhFTImtDNtu335BH/67jx5YAtvYQw/tNDzho7v3RcA//9nPvXDn1u7mszff/IupWkMzlCePN5YXbsGETneXvpFFSqpULRFP3eBqmsaSPycL89P9Z60w3VLrqR8NSFp9yAl1cVeY6V1GHtCEtUbJ8yvq8IX9u7i6srO9pwiSNinBfXZh0S2Ma/vF3Lt7V2Ouk6O+OXpCOZJNrv7GasijYQjq0lLf3LZM9ASZfB5S4bNO8TALj6e4IEjI5gpLA21gBUGOhwiep0DV+iX9NJHAi/oe485yfQz56vVWAiG6zDh8Gr35CMmguwIciEiikFXO5ozEBlfmsvBh/49cpOf6dAseJ1jG1VWEWZ7rqYSQJ+eITcb7GYvSYKq0CyYRayh9LZCCvr/gP65f0DCngCMYwtwVJxxKgjj4ensiNvF26j07ztFFMUxbNdkp4rviS+kyMbKwODPXnFZqJbYxWRtIhZN9KoUDIQlrsUcHJ127SQjkDyWuqwZLmk0wz0HZsvQwL9MrfBlEwt+CuM5TZOgfbrGJmdBX8hqSUmnyJZisTuHoOF1HzdF8nffdioAgb2ylPZgXjuypJkXHcUEFf38CMkC7EZQqs8MZ93pCgGb8RmhgaNLih6vTAFwf7OzITjw8nBwem60PzdWHe5BlcNTeP5OirBcqHybGLLA3OXIu3U5xsXazGs+yjqgxtDP4DPLazCHhgCIV1EejeDXepXo/2+zgIKyWY0Pi484Ez4fEPWbH5zm4tzY2D/oHLpMv+HT9ueYUMBBfdTE3T5UCTnZQOyQxSnxQSw+Pvci6RCjHjB/itoaiejfbYMmCRIfJSCyZxJBszAZUvGBejcc8fvL4QLbH2diRzLLm6vT8mqANQAEIUiXr/QNpmRQ+u91qw2/ymssuXJS6xLXPFDdCxCuPD1a1Dg9AeW/j+WtvvF65LG7cuFVvNLd392wHLwCF2+LvVhDArRe6IKadjOfESAtzNDywiriSYSilWEJQ4RFelJ/S6T34QPjJJpe8wI/vkxouJsfGFWDnHvQEm5hwDAhpqXLe22k/fviAFppdb85OVxYXKGLyzrd3ty5dWplvTAmQwDwqTLYenZw8suUAfB+pkY3qhVnMns/6198pEYizgW3fkCuTjmzgJsf+OCLELRhJQCQ4iXKw/Rhq0iaVSMAheyvXGgenB8wjVpfJrlwaLx7UuCU4F7c2dzlLYSaFPVxCSxjuZdmzCeCmd9zRSfyEzF59WDl/RPmePHz0j/7T/+x3fud3XnnlNdqaQM/09Kzx241sano6C9dDpDHTEF/KN1jLYSS4i+cF3H6FRf45CcI+nfaWoAo9jfpNf4jjJ+zIAfIOs5ZFixvhcPT3CaKcN4FZcJqijTO5N3HaY/FCbl4StsDZXqUM+owSbNeMtEEIAaKg0C36TG8BBDti+7ErVy6r5tze5K095JrvtFrQUgMuHS3tEtvORp0JDaB0MsaWz8+ebiKVK5dvPHnwM1vuSkjqdRWGhid7LtzhUWeUPHr0tH2wT49pdyR4nozbomHohHPV7OChvHkNoN2iboW+bpCwgfiBM8ojzE4iIoRwAUBhPuahKSj/hD+l3XNgbq0/R5iQW6GCrqGACRWjNYBf8Uw6I1kfVAB4d3vv//R//D/TbwTb0sCH2kFWY5zs8smJKIopmeV6iRNG6StvkC61ACooU5FFERZECP3b/yjv3iTEgDnhRVLJwFbGm+8GgOPFj1FW3TOy3qDjvxbVJ15IZsA1zkPLVo4wR4PJrwRQeG6MhmhBvBMpfyJk/QUMyVEgJy7MOIjinnjmCj6h24oFk2e+V38KAdtdKUIBfVCYylG01yErYT2cR+jUg5wE8rGa8k0mGjTN60TusINa6lQ8NJwuGX0eNGAeBK3MPLuKcJvz5PjHs8cTQvZIPnMxpPQK0tctwfQIdWelivk5U8IqWGYs83G6QpVRSBdnEmTMJb8S9I26iFVTxdf4ADMdy+ZkdZRphZacT95UNPRwQH+G3vx/ZISGkv+ErV0c+dO2XvL3QncxEqNmRYpGR2N46bIjitUYo62PdZsymAlZG84mz4oMM3br4pUhNg38VUyPD/E+HIl34LycgtJI6B5jRYc0UAAKNSl1sn80JAnZeFHFpmGwdoigjREERGMjtmexRvrmSROfPB2/+/BROqVRL2AO446WI+qrdoTLzeDPk0BlysYjXDx0VuPFwyXRFfqn9vDjQdQQW1aRhoRkzXQUrOAPAk3hoIaH4s4nyuia3iXMA5v1fkqosogHAw7ky2GolYwxcq+4EDZByGycRkTx2BAv/fbw/fsfYiiY+1e/8hWF6XsHB2ymhaVlFkAZuaS+8GIWkofHM3M+MtWYFLuyPt6YZSpMs/rkISk4yRbi99BoWL1gNuH94N0PQAsDAgdHGErRUaams+9isuEp0snzAPNTXu2nD5/t7W1RurHV2ESpfT2TD3br+o04nXTL3dlnXc/NzNDs9/d7MwvLsdB52kfHIT1WH3kPh+EJ46pRkxcB5o3GSGtjTyu808M294jIHWcxTyOH6HEvCqhUASw4Xuhij3a6RKT68vTFdwYuVdMsPrb4wAXKNbOOtcGeLRZtmgKIno6la1yeIpOG+iPyarsbqDUy+oM//97Oxvrv/u5f/8rXvrq6dOXAVoQ729zI8UnbIIpQKZtTiG7QaaxnfB7QvnCd8BTjg9Fs6pg3iQ2U/+Yv6Gqt0XAIJTwr6+MOf+LIhgPyMNzPZAbEEHrxOGsUIz+ulzC4IipCO2iZFzSajaboOBsvC+z3FgHsRCgB12nS+QxfATlrZNVbe3vKFPe1ZtftaXS83enpFCOyelAbHfQVRYJ2iOsP//APf/VrX9ndfqZfjHghs+ztt9/52tfekAQRmF0I53zzRhozzUxV9+xM/dLK3JMnOyJQ0ZxHJ5luIE/TqlDR7IJdesIpxAvms2ek48eHSZ0y39Zh23cpPFIZK8GJIQu2SeF7/713d7a24gkMT0MjaTyP1kHfgD0n3osSi/IE5GCJjbO488LrXIBp+gKkSNs/yxOIJWoYiwjhU9sA1lUprwr0kpUbOMoECmuLPyWH27OG0TKDdpE3hX2GDCAVhpzoFpdK0IIQr5DDSptjrvfjBbrkO9TIJ0z0pRJeWcVIPWKFM8p8gldFyhWUiQvLGY4WVGDyxphML9IYpudCEMkiuYZzg4ij4XolcKtPBFZfqk+Px77ABSqnm8XZmDYCelnrUjch382QmJtkdEmW081G/EKndoJK6gQBA7xkKt8AUOS1BS281x8FPIBHVQAoA7ZmdI4jACtxs5ghanHS5jeehOJcIpRYs73QACZF1ljO0BFOJKEqTwzEA4e8Km9xZXUyfwZINKn8urS8AFCOcN5qqcu91r8I02R7x0CPmMW54taXQ0Hg7A71t+vDy7MT09w5p2fKpbE8ai6tHG6Mj6F/lcRDkrI1xcjmWQm+EZhQCQPxNGlSsQItUOBRziBf382luE2CiwbKnSGCyn9kcUWDuL9SmzpVz/R5iqR56W6GNQU74ZMW5pwXnc7GjomaMm2KNI35HjsZI4mPgsxge0uQoZpRhkqgiOOTOKcjew/ZElYFvyXNi1+fDzMUmFbLQ+PN/snosULZOOhizjpAz4sczsCWirqsi5f61XlvDC8bGsiolRLILdRp7739szfJQa+yx6AeB3Nzi6EkWZeNJgfw7MIqqQwangJKEz2Vv+PS4Zz0FodnuresXMaLWeBq4d2pDiaoFIdyoOz2s8d5irc8ymU8BNhHXOHN+boIQLOBz6pbZvFIAoSfH7z/Tr/XklWhQZlgweCI77mVmuKJic2Nzd5Be7rWIDUlr7PvRyTYTEwjF0SKQ2SDYQw9jvQyROtM21KFrHexxloL9dPj7eG4x/EezeQH+0fbR4ebBK3l7nUyLzzB7YZasR5LIYTpz4LLwRMiAxKiMjtwxhYI1/futDTF9oGTFsnH7IEu4kwxNlm0WonFdTs0+PC9n8mTl6//5S9/dWF+pUHd1AvARi4nWjbXoVfqRO1EJIfQLbG1zIBCGbcgFEZHtHUvTKwprC0Er8ka0sCR4GvhZglB+hNjMhkY4D9i2/y0wONdEv9tmJJ8qyjc8VwVXT345v/ol/En2Zy7ipN2e28HruJqoglhC2Qw1SJMVRaSTCS5nClopzQTbyhMB5Xu3p6p2mkYv+i73coIJwhOjack/OCg/cd//K+Er85O7NezS3fS30PleGBbQR9WJwyBkCWR9yanjhiyCrEX5mdwCFw/fNfrRYbaLUqtYQyf17EmXFF7bWMN1aVwNeYE5Arj8MzaaL910OC6Pz3+0Q9/aJsyu4Bub9mAYX1vZxeD4QuIpziMwJpb9IgdGrDXeWZETdZR441JMaoWkXE+EB3gcMJR3IaLuIHeSFljgriBVeMLuQ6qkLyMKkwth5m6GlRRft4TTEoKXoR0UkXO8ZQApBx4iv8GO4t1VL7408plvG7NOMva5DoUfHEb3PRThhVZ9fGRBOlk1lUZ39VZCMZuw5iodU2TLXpWiNn3iyvC38ogy9+F7UQ4GTQWwHZC1VhPYTrssxphwWVEQXcF/qvOVAmeaPa0mshR1voRIEFBAhso0nndZn7ElTS/s8N8kSXop1RsmGQmEAiU+QcG7kxNrWxvcQ4KhafBWl+gvEkEj2LwFnqgjuFfDmyx0toKsobFe+YwH385TCt/hp4j480FnB05U+JV4XSjozLFYYMvOY8sPz5EUMJltc4KcXhyfkUtFpyvRHK6gp6d/e6u5oFSZSfPGvJUpKLbu9Jy5GHQWkxLk83a9Nn49KHrk1rI7qbqeJjnZinK0pbPIDULpeL1VsoutoBvRTgQGBw2uPXQ+BPDDnWHP9zUiX19QwKQ9DHclsOFaDGPKb7s0VE2mUf10w32iAPKNU1tqBPNqWM2IMk7xwnG+25GNDlcKIDBIKMmE/nctqyuAJD5Mak9m0rY2ebJ+WQapumsYa1YwSZRgOy7L95rzL7jl+wYygNN03lC2RISjdqgkVcaEW1tPDtq7UqPkKnDYJK4L0+3d3wibqXdkkdz1k9OTbs9b6EYzcqFG+wedICCv4v4qVbNJ8znOeW5hAFaRXgaPxuj2UG2UWxxRVjt8DQHfT9RroUlQks2IBxk9NfCYUeePX3w4Ufvki583rbD5jewvT3ZoD+k+qfOQQeCTzZTINXr0gLsdXRJ+yyUHhNMNik4wm7+kZCC/yuC6k3Xk0h93F+fXlmbWhAUmRpYPJ2supr0U026x4fcxSF37CoeVuydl8UTS+5RaWgXOqF4gHEcHxDAp/6S1gtw3VWWDXmQGxh5mE4WlEYma8CqUBlOj/r7a5eFLc/vf/Sz/R3VZR9849d+8wtf+NLxSZ9pAm04SNJYRuGK1KbiyveH5S+EUcxnUjHu37iTkEb07fL+QnXy8j0k6myh5jA4WI121MINj6gk4HZuoojCT22QWFf2T511xgaiJmeYSNz0wqLIGYt91NeI+GwjCZMkFJkP8z01bbA5GwwFWzDTkTM9zuzqZn3jZZH/7cBE0BYMxuH1yFGwwYesnUPqAsnK/ZWl+cmJoamZKdE+mzljDiScF1m/YHJoPWECCRzzQ6cwgbNyfbSlILvVPU15CgIsG/HMxI8atySa4r85HGI0q6wcg3Us5uxaYoFcYNpn5zbh9HDOYqF/i7X+/CliIauErPDiKExFE4DS4wKjVIHM92IFrCqOZ3Q8BVypvY65KipLl2S3eX163kVJigfeElmf4JKXh62k/CnoiDv79LLw+CBN7BJTLX9FtaxOui1aRjnyyKi9+H+BDXsHYIOAZQGCguGzbL3qZPVZ3WoQ/vR8X/wLlPLP0klrTgjn4yNshAfPL3ZWNbHy1GJehpLYUTblRGUZsBGSRgAn9uC7F+U7RTy88kJcuUyaETurUh7wbMobhYXOWTyB1prPNHnqouwJtPmi8zp9IJY+NyBlgUVhvrHoaPOGn3mJvae7wYVELjPNNfmXClTOQ0wkCU9luqDtV6vviH0SQVRUs4ix6DJRP8WBAKESyqBSQS9AK648X8r3wlaK8bG7veN6r8nnxwds57MzDCqrKXBZ5JfoBewjtUfoUkrrCNzd2e/PszFHxzrH0oelfKEbmlTeYopGiAdZGC7EUQws1qG3CnaM25Yt2lSYQSr48nr10JBvZtYnzMFVGbi+EFd8DuDKY+mBFptGYZueWFTDwwfq4+Wwng2YXYvMAb72+sTQ7ExvTBcFhoZkzOSCNEbjtJRBPUuulpkakcfQ3mEHzKCFgDK0iSabNK8CabM1OgMycOwp3nDJjyp9hienYWB+ABjA9y1IoqnS0FkqrgQ3E1ZJjwDniSvEpvkEtQoObe3v3rv7PvbsLRZXx8+5+RVp/w28WQXD2YmKSNkQzdiW6bpExniUWisLHQO20HBFYkQyxaLXbs00G7gDEUVWkVJeh9lKYo4ILUjuITQwnx4oZ4gbkF/SIeEAxdGJpZy+8/O3Djv7dKz52YX62Nn2xjO5rytLS2Gfhyeq4RlQErE6p4fNmaXpuSU1PG2WW4rqsIXS4ISqYsE5t86Hphma55JEB5PD3UsLY/Wx/b/5u//Gl3/ldbDhsCr1mTI2CK1k3u7ttIhdMj4tz6Sg6X1iBYZGNDYxawzYT6ymUtlJcdOeJysE/CQDrT5ashU1k0EnBZK4UowU3abKQrIVJodsrYYB4gmd9uaf/cl/tfn8/tbG/U9/5gurK5fFpAHtdNCDVCU+gcOmXCNoW6BdDcZySYEpCnPUlFBffg01y/O0RtwuECLsPIZGEuTEqtwiqkfFZVnB9BgedgxPt4goMUVp9gTyNSy1YqdYAognm82IE+TGMjLdMI8UkcZDGOnEcXCu901yfjw55AEi53r90TEOCzM7BhkeL2l69CQPwaboUIqC6S3ox/vuPrhXmxrT+hOi4ujV4Y2yyfp7e0ydmo6QvcPNjWcCk4pqnbBZRCSbjHnsF/8PmNLojhoHX3lpeETGTBATtwx2L8L/1cmnvtgE8JlSJ3M26KDeTsuY029oTJAhGA4nXWFsWAXJ68PAvMEbQy/FNJFyHeoDPbht2PH+MaRDE0WlDlVaMu93WV7HZMY54w0l7cNB5GbwX2u9g+RyxnfjpucAuWWxI1HUHvcGpL6E6Xp1lijKRYZRZo2ZBRXQm8/q+Pi23JzJ4HDIy+oVRapMDTRwXD9FjBUjJvSHh+wf7JSHlMlF6YmYKtRkcnH359DsUg25jT/K7pOKq6L/o6FgFowKKQgQk+/OOSnfRXLB1Eh2+qFGjwxrFcy2K3Z9HF1EF2wLs8Th4F9kVeQKOZ+pGGHmXz6hPuU+ggiYoy05KliDUYRZNLSywO4ovxpQLqiuzX/dXeImZggs1tvJ4HdAFFhUB0zyIr+W60NjlKlyTaAa4BWoOuPAlMU9vc/iB3WKgWYU3lQ8ev5GHDLRj1odbShP5pvKaMY4YMTLQQKGeAhtS25Ot40skzJM51EZ2z3pU0QB+vy4TnSbkZcL8aFtwPfJPWMkvkaBJPrVe/UOO0PDs/NzXLhIwiLb2olS1sbITk5p/dnE/tjmp6P8VmiMC5XLRcso+pGiEQkBiobps5Kwa/BVqCQWVLzadJboNJp1ROup4F5xkOjURh1AuJpDkqmXWpf8LdDTOcwedMZZAbOsS7Qosy6aRNS16lcAD0CBNNhBXJPqZ+vPHj+49xG+YxTCS/qRay+wdOmyDaxqk0PiFlLf7ZXcVuO134oXMY5eOvKYrolaANOfKaTOELN0WFBFLHvbe4qOddMpVl0ASDJ5r+QUEIbhldiD7EaeoCtg+xecC+2B8/b2xqNHD2RJzs8mP1CGtNomW3UwvPYONGNNOg/SiJOcD7UxLevWniRcZpEVYtRxA9gCkBLhuWrWVFzrHyhM2OV5UmL+ja++/mtffXFlLsiZnjB0mDBYZAPApQZLlmNpEgh6UDeYejbU6nTBkLw3S76EZB/FgayFsVRNnjA5KTpQS5jONcoiCO+QUDHTgUIfvHJ9HqBn8aEu0RP8eWP7rf6De+9ubT1aWVb0DBJL6Bq1ybTH+qRsxE+Snp+pbQjvQDHmiE8rQA7PS+574W4smnwjS+kF5g7aRp5PAiWCxnxx/OHpKStCGWtaJmsmDYFIKdo+Kx5byi2WA7csVibf+WF0fGcMKN7aZANU/JPh6UbfMTJzjf83HkmsCcPg9oyaw00TFj+k5wA6CzZTzXjEr1+9tLWzQ1NjIMnVssno0emQXrSeYxBEbUiSuspampigDInaYub8OCrhYroFn7OvtbVgyae3Wb//bG6OkgS8PKl6WU9DDTA7PmPoq1nHN5pNUUTB6kQuoLOE9tboaLW1hwVPFhNxYaasRg0y4pA40+Elr+j1RJczJHIiVBTGVaAWngiH0RlsSYOSJBLGJ88pWqBEgsjfGSo100ITYYtoO1RZHod1WNnz424/2ioZXz6xet+z+unfQpBZv3BmRlvEnpUI1wyph39Gl/E0I8niXdgIGVgObylHatmMrDrjM7eOjPZ2MYIYP/765Z/ckjO0vayHXylPngCveO1jUaFejl9wpD96LAllnaIXhvtHnkVlxkAb1KT4KegzUtj9wG8bJ1RYLUtrKPwsreYilmAYH5TstiRH5L1gHQFtDv4gYTIkQAAFij0B5BtuVpT0xG+YXMUVJbMB4RVvVZU1e8ET3e5in45AwPdCIQjmE/0oE0/kIBd4vVlkLdNqMuvlPMBf3O6a8h30KgCm0IpojTgJGtFfirjFylBsqNFDyQ2acYvkOOgtTNfmpmpJGDw6zaZOIPULIzuKEruUaj/Wo8ofcZOZcZfLWz5e8r69RU61m/DRsV47O387QInzgVdBRkBWpCy6tQu7CqOSdWkD5ijR/BJYtrYhfb5siWEKgEbPb19b0/RVD2/+LkZwAJOOgNHUQIAKbVjh1FgSDuExMTjjjymqOfjE60tWFZKRM2YPqFRHqvwCOVQMnpAoJU850KCVdFfQ0mU+43WkUfb14TZ7TbOi+vMma9n78ME9jhI4pIWe8egp0Tsa6Q+Gp5r9cfvUCHjK9F241HOtRMNeF6UYnk6Jc/PzEnPRFPGsukzSh8SzSjlglSBv6Cb9taA072AUO2LKYNLRt5hW1gVKi75mpUP9ifEgcRsPvPvuO8w0iYsSLuyMbh91/WHxSxmFsgTsatRr9VhnK4sr47XpzmGKN3ViPyF+aWdBargdTSCRX54I/VjsnZE0od7U+On80uRf+8ZnZ+v91u79cUXl2Cu8CoOnlqjZg06Z5FF3+KiDHSA4TNL4KT019rxc6ulmsUVpkq5ldNamQjRJ30TAGOik3iKuV7WdMWjYmE17uyQW+sF+Oz2YK2oqdRODZSWPiVhxrl67ujAzizqOi1mlVbyoJIJjRnJbxzuFGtGPJ5geLi5mnOfgZJgdjkWrN1hymhCMP5m2ypDyE/qhSyTNeGRYl02Stb2x/nRkZJWDNOxVCnHh0clYzUNyFJU09hZtgkpCvVH5RvGB0giQTyPX+CO81mjCQBx+LpcE68Ao5ob/yFkIIJOoTNH2ZLIrvIzgbErlPyJdLi9T4oa/+Lkvz803tzefB5HDGzJ07yFg4Blc8zRbst26dfP0fH90jPSYG5+YeXV6WsYp1zSZxJ1gG+KrV9bgEY1C30It+wiwBw/vmT76vXL1qhiVrArXC7M8uP9Q18tr127QrgCh184soCdGSm5FsA1JXLzsyRJ9deNkVwdDCWDjw3siTbGLsKWiFuJzOeOHsqXSEO8l6vNMM/nWt74F4fVf5DdEpWGy5ohbxvtZiNJ14Aq2HlAdeQVKcgaGlZM+gc9ncLxcHOKhZ5d3ZBmwUMhQ/rkMYwoUw55SsxLrB4UYgVUr0CWRrT72B12MiGWUgSUjVrWdc4RixDMKodmr9hgeOipGOsrVnjsUhp3ghrXhvqYx3BlhZTzLWC004Tw86VhuAFUmqfpEZoHS6rnG+MTwiQb7kpOFQUBTUSneF4YSlynZYZYUSfDklyNyMjBsy4TgVYRbuL93lez/SHJiNeRR4ACqVoll4n7qV8hBDrThy+3QqEg2MbbgpigGfkrp84QCoDzB80NGDqhM7oOzOioPicgsAM/p6shFRWNwS8BPGvHsZNPos1pWlY1jBKBsFgJsOi/gQZS6c0GW/d7JdmdyrmPjqIgBaxx/qDpFcImxFTdo9FP8KO0LQCipVbQH3ChijKKB2UVY0GT55eRljEr9AfMA6Eyy7/lkTPVjfdbknTO41FzXho8WZ2v1kem2xMnVud3xs450Mw/BRcZH5SxcWpr/ymdfv6zmaHkle87yxbDSSnbWUwlR6XFuEumNW1SBtGjQjoXRnDXDcbh+MUVV4LQMhT7Fxj0f4U84Gh+dnpmut3qRBdaTvou7BIChhDh1gRSJQhgWCIAIFPicmhNQ6/ED6cmx9fy5fUOiiwHvVH1mYe6l12+xne7ffzwytnXzzu0x5V1n5x988B4hbeYKpVNXdHZ2sL/74OFDG4Mko8+GHpPp5rCzh57zinqBmTQNYulCXJWmmfRTS36x0gX/E5zFDfk5MV8BjRJhsvWivUzsebww1zzqHhx1W+S8dAZxEbuLcbrMzqR3C+1Xl/1h1TAWsMYNdTQ0TipDqArl0Dc+H6sDKBpjI/2TtuZQo+dHv/rFz0zXDkdPDieHupAqv2fRkQqxpn4cYqUkEQ5DS8+CIcZMYh3Rm8KKIgjheDkZBn98GAsszGtkVMp/iCtdYlzKtIve4xcxtKm6Hd0kTfLzz9GJmtMzCmO5G2nygEhoYYbYiQIylBGvV7rVyTjkKoit5U/P5If28CQg+5U2kT/z9l86OAO5zSsHJnyIri4bimLFYCYhjnvbb/742++/+/3l5SWpmz/4/l/aIQgAwIumG2KADohKJqqUbyx7TBBhjx6gOShlLjKquDEx1RhUCC20HD5SxViit/kT+5XxBIajdv+QdB41Ex3xBDItQCPN8s87y7NjbUrhRL3b2mqMT3/rr/3G6tLC1vq7xhzlGSFBbLNLvkl2z8C/Fpfm33j91dOzR+OTHLWEqSydxbOzpbW1VRLFitEJIF6oqhzS5tS13r55BQ5j0rILb9z43Msv3+IbcOY3vv4lWpMLkRizV1GBmZPB+mhsb6/LHhTl7nVsEd47lnJifx98JFEA0+JHAnWuvzQkj4gpQgCL4Xyxh7jtpu2bo11Ac2YOufMpX758lWth7PI4VZUeYsVTvqeRTPSeBH4Cd+yJ5kSViwKVpbCxAj8B+4PHldrKKPGywL4ipOix6Aa7DtjDwz/ms0GJ4G/YQZiCP8PbCmvIOf/PL8M2HMKnDcBvORPulZsoQRlW9YCYumbuwuNmfS7SETsaOwY6nB7s5GjroiRwI/GP3kd6kWojEikGw/MzTc4VPecak8NcX3rSzdV5A/t1tROjeh0k2AZfo8gN2WFM12chd3VqcIBN5jurPGooNlVE6YUqZFxhGgE6xMzknQkIKFB4yVAtLPWMhCCVJMnbRu14uH86Pzq1e9IRHpVhHXEVhh7f5+l5tQkADhqULbI5HACQZV55NAbgs/qpgjzFtEAzm9P4wpSmjOA1aqKB2moBgkQ/Fwd/HUM2A+FLsAwno2fjzfr41MrS+XRjrLHA5PY0G82O17nss/IQffRwMD86fWV4+vaA72hCqsJ4zc5y9fB4+gVg5RWVPZaeDsINZTPblApAHz6QqDUEc21IVFy9lIorMsunFoJ72ztHNxfv3r2n/7cyLHaFDWcvX7myurJiI9r5ZnNhPp0vICC0hlFCC7fv3NJEVZs+QgUisZMocbb3JRJsMtY7bLHUSlXzsIQAsSr3aUeYPa4GqVyxWPawxUF7/f25xalet1NvTOOa4AM3WVGMnMmJ6dR1nI+oIJe1SZzQ58GcEXDW33v24f3miLW2s+/0iG3oFxcs7Hxj4TPTy7tbu7vPNoIv/gn5NKaQtIol8SpllfaehAyPnj4DOY2ABZ8EZC5dvw5Q4s/NyWnI7kUOYK1NcR4wf7PRn1UswMx//MNW6GiqA1TaEhP4LFvnzZ+9NXLUW52ZGj87fvLo/p3b16ioDx/dc8dEY5rOBH3RkwAbwYA4g7D0pQkJQZpy6asFNPIuEV1UL32ZNAU+GdjLozU3dfz1r93+1jfeYFyf9w8iq+S5FrZQmoNIWunDp2I5xF8T334EFapPw4FoOo7oEVAw7thCI0aA0cBsPCTnE0/FVJhEOJLLEk+PGeJ5LOrTI1kg7h3f6ft0DytqvH9Az5TG7N0aUMom5THqGT4lxXNgHwZCUxtLQp0OPracH9/ZPVR1JovNLYpkAZxtD0UIhnhconflppgIY0OzMhhS4sn5PXRrbfrv/tu/NTc/LQmme9j9+hf/x1rwSZZiqxFpCQ2ewEG2oLYMHdlPSuLmF5bE6p5vaK7fRpj9w074LT6Q2Ho8PaZNcUh5HJAWp9FYksAm2UWBGJIBQQpKRFqOMILhbILl/vw+OrS0MH/96ppGmvvbe+quVWxSdcgPiZS833pEaAytC+PoxGnncJsmtLoy/dH99+3fKdi09Vyy7vlH70fpjFPt+JhBg4hkVstToxFCM01bJD+RRjB2qj63urSsOkETZbnUNojBWFjhh/2EYFxweNSxqWa3t3/KkBgeef4kwUi+EqnF9ZmIhOyHjPuMDE83FouP8Xx+QVZqnwfk0uX5WzfXlLvPzC42prUGmLh8bc1OpLX6zHvvfaTqbOxbv/oyLpjit3rJOAhzi7bEP4ZFh0GgDiDBNEGYbBzjqWf4QsUisSL1C+t0X3gg+GHcXGnwLfZBTBAMLTyNNPUEWOF0FKy4HQgkwsh84Jen+4bKy+F70CiKQrRd0M9tF4ItPzm8LL06vTLaWdghE1o8hjyBnVlJwjs5OmyucOrwS0MfFy6Oyj0+fMR40PnT/ovMAUOCpIkD8rlr20C6orjYI3WMtaRl0pXYsvGHkzBFEYphG9B4dhkpBpdRZ85R3YyE6UTp5smSV0uwuv34SP6orhFGNn750pVwwcNOBHCh6AIvHfvMSBgxwQkKd7A6FuqIbGzEXYpHcd3yKywx1KSvjcuac42CZ/hKeQeTylzL9REbeYrnAx3nJ/XQWRSSlF8mApCNDU2fHk8MYUPp2ZROh1aJiemO5L3TVcez+8mZ+l/wicvF/IIVMVyseZbW3/6o1ZuScFRDhTiT1CDJuJ+4c6driZTITmhAKGFRc6DmSf18+IP3n46d7c9NjzSnZqdm5ufml9CYbi2tziHcnJrWX0PcRqwGXRnb8FxtycMEVOK7wh6FPNQTpvGp/O9eu7/d6x/o+NfrHgHtEfZ1Nqp0d7Rz3jnSkmP4MHnY4EkaYxsjcsqwOkY9TpAIetpI03IqH3o0TUCjnVn8udk5znt7xj/84OdHB7u18VP2ioCcnWVHx9nz2u9Oa77Xx7BYfcMnG1vrqJ33kXOfl9+RBAnvVvHd6WhOIWxweCxOMJ29P/hObZCd/e01yKkbAsEfUy/ZQZDgQhswsE+WkpC2pr2D7vW15d7uhlzFmamJ08OWLcqvrC0TR0IsBi/lIanhDGuqXvQLdBF3hSpvymUV4CVRIFyKF6KT842T72oO032jPiJZ5vhzn/9UY2K0q58vFLClZzDVM6hO+LDghsAWiitIbHrxNwdnI6GqK0OpYQKh1cIT/E2pC29IAiDaj0pDn8uZYGq8hfRAgSfPLbeEHQjsYh9iZsgwftlc4bT7JfslfhotOwjrjtHUixfykHaZzAL4PJgUdGnv669KtkEC6qFkNXsvD59OjPLiysZSmk0TSJgpm4Zj62qE6USHa0vTa8uTv/4bv3Z02kNMgzMlUtsSOYkrmIxFpKFp9i5KXQegWCjsRpPx3uEtXIgkiHZldXk36MaJ/CG/tCTVcCrpx4WnjZwruGbAU445Pw/09Yqdm58KIMsnkw8aeAv6ol+MnFFa1BEfoHyylipN1iJA7Mg4MYqHj57eUurYnGb0X7268Gujn62lc8qMnSBF2bKnR31CWJhponEyzdqGpdik0bqXb5nLvH/cxXlkvGqbIvulvb+HrnnmYaamUO29rjgoBwhoHigx2H22f7DBrX350g1yNg78sNXR0Ga7zbSdnVlRbo4T8isuLTePTw86vXW7TaoFx7dsx9htPdtcf/rw0d0bt1+7duMOtU1t4divfvUKUATFVOlFxSFRLDDy5g2IRUVnRiaRhvhQ8twijQpO+TsOdwdQuhJmxjEWoQCPSKyExMbPeX5Y/WHgLs6V5fqCUZFVEVdhmTmgXDKmivwLKpcrLZNfRadgZfWTh6CwDETEWD+38nf+EkQto4fOKZSORiJtBDcXrMlcMvQiurCiSKVQUnaSjqswcazoYgZmoC6WcMXkPjmej70TS4qdJP4JEaGXBCJbNbJKkQd5Fp7ulclfF/eXPA2CWHG0TLKL1lTrsNlGG415mbDNq3dmzsdmRkbrZ/6NTyZWOK6H6STLlVAJBEYEqFCu9Q3TNOSI2sJnyEtDczLnszohyo8P8znjNQIeA8PRDEsACp04XB/pUo54LtXYU+XciVVEmU7M17oNKwM8kwwp14vWaNcK006z471WkCzfyNnwIDJYtEMyvi5TsTWLQ+dCFYmU0xG/sCqLUDQaQxHzJqq7gtN0FfJe9oXMZGYBmvvUq5duXVsgC6dnVxZXr83NXZpuLtZ1ELPPiL4LpcGr2XHWGLrRRhlMVirJGNTKskcnluTLzyNmv2ePVxM44hzsqTuJULN9VPuovysoOz7zdOu4d7J12AkMHQWcBUPQC3P4bFhlCWwjqCqIucDSsKgnR4+BKAWr+0/rw30xNR2PFpYWZJ0TVAqU5DVnDe1VrqpsevLq9Su89p7DeeIhwGeccrqajUs6UwgMOHQtwmmU3XiXLX+FxTFrVhddxY0mZX64RsZZkgA9J+IqIV4Rp2HRL3VXdprf3d5stw64r6QWMuEXFuY31p9BWjvESaznAxVeEh9ki+hhnH4nkJQykdoMlmE9j03sSZqPpjAaWQFy6gAE0/CHf+9v/R1KryrlodGpoRFxiCPmGTrDFqPHAhbqg8qfqFxZlNB7JZzKGjkRVL34jy8oJ6m27gm6h/GE/CJ54qGh64UxhCXl6aEBssp/C5PKs1EYDYkUjn/I38E0NFhYR/LVIxSjhjLl4gCHgWie6tTtQtyUsXoRUjkiPZK/MDU22VC2hcwPXRNmzNvC9CG07JTJ0yxnnV2ezKTuniQiW2Uq0B/YXtyWXaULQCykUbvapzGrM9qOmIT/2ObCROz3QqNDwVICkU9kktETcjbtG0nzAWvtjNXwq2BTDFCV7hHD0QgcfjRj8HBl4OB2zUyHRvZ2EbEg46E8+/TnFOwkxMBDO2ot4SdVTw4Lcyjr3t9sK1z89GsvvPLa6zduvjQ01ECCHqP8jsbHEaFEQbGKHXBEeRlo0F7sEG8Ep2xOFppJiYNPZXhbm5uWaHVFb4st3FODD5z/5LRxcrai+Bl6rD/bCNth/JL/RKkUfF6Kw7Pdnf7G8322I2d4n7Q/OlD4t7w4029xSulV2RmxjRDL/qjz+OGHuzt7X/7K1zm1WVQ7Jo/mLXywItgQnEIP8CQmlLJd8okWCqOiPyeNMkgHdQJa8IuMgCgwFbJEaGErVsFlZyMKeCgL+bVC1fy3HCMp+3J3rIiIOlfn9XSY4BgJF/HIEs5JN9g5L2icI8ZdQdlQxuh5LzInfJfREbd4RpDQ9FTRrwgq04gWk6fJRZaeG3vLUocWjBc6+0qPKJjghfhAhIRY9kASx+RcuTcvcYQ/FuuKpp0jalSa8hW08Tu9XI1Coi85MHdSirA7H5taEKWfGgxP2356aKQpSU2jWPZVY3Y+PilkH/OveigUDPgiT8M9Oc3iEHCNTy/y/+qoAFmGF/eR+9GzV2Y5SFuVQ2zsPo95bgskCxmXu7Sz1NcL50flJyBCYIcREPcy8WQTlcyUOIsp3KOcfkM35q+GmMwWdYU/0dMzisjpmPY5AC3yMyaXLmGYR047SY20ojgCRXVGOOtcszko2zo6bNu+Ev27TGxDR4LaxHxjdm26ccm2lSOjTTWRjIqPRw0fLL5Hpegp2UJUC4pUGUUmN57mu5pe2TlmYgqpTkeGonOtD3Gqs+H+0Q3G7f7hSed47MGzzvPtH+yIlUULDlpXR9CkHAQkh49fG/WkPBCBJmIsh+2t5uigu/3oyuIEU5EHcWFqaHL45Mq12yeDRnu3vdPbm5mfUcV2eKajVXupuTzJD1ebQh0SpEQI4I80P+5QKIeOl+aaa8tLaUthq6HR8fWNHVRNDSepowLErxtebJAXvBsMosQArHUDm2GscWJqrn+w//TxfZWXZzptjJwJmB/2vW6XUcXyG7EP2NFgarqma8noGOvtMJUa7JEgGqVe+2EBeR1L0s6FJmFjv5Gh9tjIoY5D0mFefOH6xPQC5ZAOzAVUV5N9hEG7mxMbPaEgUHNjRa+BpTU1eJ8W6AKm/lO++vXiTL75fwi7ICdOQsGC+fSp5B0VPuDHUCgOHgiEU8Vqw6mT+RtdNtYgRMRwoh4W9wxScQtadpXoHDmlsyU9EkkfdofAZfiY5Cc03GLbxfj9NM9saBtH32Zyls4aIugu0rUoO+dQU8QLRoZsCLa/vw5o1kVbopPT7bNBO72KtBKN0ZSSamPRbIu9ZYktHBUrWzhA9IFID2WIFoDRxQtq6iZa6mfMOHlhjoAi1lWMewQN23My2kUBE0WNVLQ3aQQ5W7givrh4phLcxYo5AsRrJsJ6iucNmVPcbHGvKeTQhBGc9A62Pnrnzf2NdTCEWZQ/qxn4nZ5NTk532sQW7ADYsEo6kxF6d43jxQ7S7S1Ptj+nOdjYg1XHltDDM3w/eZ3SDuW7Qgz6EQ9NSr8BAUexNQnmIOKM1z5+9ODn7zw8ODhZWlyxe+rI2MnSiphqtjQSGdGHK5BJGxoy8dgOpffuLusUY5m2onvHx5Jpe0HBmwKccBlaOV2RBRLGYwaFbIKDwYWgWvGMpS9R4f0gm0sq+QecHMlpRFEdATMAB+aRfTmfhK7glecXGwvqkl95eDlbLvYSNZ4lBlMQF//KUSExYilrmVeZQtKQLJyMOAatRwT7i6TxMteNlqqlGE9YeIZhBJGTKE4/FcqlmUl9pAVL5FDLYuL4ZuHG6CPDtKSmCEoIUiliJb3ynBgyKYmkXhSiwWdQb8UNwUa7ztNz4aXJwVD5JwI0UBQ19OH9e0ncpcmxzUpUODCKH1BKTL4EVBlgUQzFDqemCmQisA3W/80aDHj/WFDSnqNnuM1v5ahfamZcRdLnRAXt9JjIvsEuStKRx4cLWDftdLpAw54mCaKX+iMTwndiNBshpYk+iPLxRM5SGeSWx4MLZwmWlAhE9KkcZSG9Pn4b6ck8qEcdbNuOUDLNxkZ7gxE9e2SlZb+4yYl56Wz1GiZyaOeWKHDwDfljqt7BB2CmXlVMuTTsyZgqMvaZWEfAJbWEPhPiJTUFYAxIhnDGMjk7Y9jA0T4ab85f+pM/+wkLrFEYI74AayNeeZH1ZKpaWfdsX3AqUAGkeL0nsAt7rf3G3Pju7qOVmWHN+U3h1mpj/7hbHzlemWsO5prrG7vbOqQf7x7VIt93nuw0atNUB2Qs+Cxqh9SxH/VVHs5d6bPEpLJwalQn6k29JyQBswxlUhkWzA7mJsUFnCB57CrLHyWbEdzrC8YeKWI9P916/tTOIEoA5qZnONDWt55qsAkTNPIZn5iOjo+2rHERgVnWFG3A2EhCqYkggWkl8Ghl4zjpH5319HW2/6tMkoWVG0PjR3vb26eT54psFuesu+GlyYraOUot3TTuVQpgoTxIV/RM74vGGFzOu9EOQgkpmZRLcICInMggvwS1rRH6KvgIQcMngqUx0eEUni6/A4akCDASipICWyqLI8pT4e95FtzwvFEqI8hg+9YqUvm8O6Z44ex0d+dgYpyzjuZEmT8B0WHb2Wiyt7aSvCEJQCP1uFX4sxUkHPbtlHJkz7XtZzOza8f9XZkXgzO50/JatQIYiBpmYh4f2VNSqs6EhI9GSSAbd3Gw4AXG4IyrItNgNT6OncT/bLz0IbgbPA93IobhMzxmMFHH3RH2hRFVUPLJUYsYgStTtYCBKCBpUxImxHHObok4jJvcm3RjHrR2dyQr6cXkJZpjPX+4tbf5pFB5omKwAswpyuJ5uzvtmSkpGKgo9p+f9GVijChhF1ptdbZIvxaxMDLMrZqsv4POpdU17w8zDzcJDfqMvcIXig7SqSe6Pz4rwUvaUHZFSCr86URt+vatl5szArFiicft/XWxS1E9UPDCw/NQzeFh680f/2BheWVMalZwAZDiSIvmHsjEPIdiIEkn4BPIDjT+9s7smhZlhz0D6+BELCD3OONGeJPbXXrBrmA+PIYo4WkFovkAfiw2eOo6QtHIMMZA3nxlizqRqF3GBdQwjzNQfWB4bb476WL3REKGbrOaOZ+7Aqf8Cp8zDCM02hh95b06X7FsYgb6k5fSTfFZqaaemsmL4HekHTmQ7TaQhHZUNIRQW8GIiI9qogKUpnzh/PSYMgb0klfR8VwIFDQ7s4SsQI+D8yMoZ4IBWgGNSpMRj/v1X/sKeyVmDpW2wN+UCgS4niBEDgDjiOaJBTeAdAbjzqfXGL7XsFHjOQw28/yxzvyVT6sSyOfijDDLWsGKdcWKMsEycqwalYX/nU02GzGYvCk0YPhhEz7PaWfepZ5/SPWi6ns+JZMs60oLCID8Kxe7kzxL8kg5DUpoL8Jbodvhyd7G4Njmft3zoe44oXVu43O+x/OFZiPR0yZPOCrLu+ObxpGG02ckMMwnDD1F2ZQ4k89KZFKBc/lShsDHEunEsebTT+5G/IRyhKvWQfV6k/u+ubA4JRjkt4J+BdVBK6gYqVHynbhVPZzIITcomCASRcxiyevo7A2fdKdrp43mxDe/8vqDzfa/+O6fLK6+cuvG529cI83Ot3qt/nEfJY92a32ZlXmITAT24xivS6tzoIFQ/NtsAblt003cLN5iyqM9q/p99pyXosZkDzYFvcf1rwrGlsMowSM8wDZsms+ODQm7f+/7P9hYf4ITyq2YHBt68uRRu926fPmK3RT399sLyw3cQrsrUbAiyEEpXA4moZiCURNcXxxaCTmBZ9yEmgP1qYm7nZY+He+8/+CbX3tj4YXa+fGTpw83Fhfk8Lou8iN5CeUZua/Qk58CLDReDCyfFeU6nyNI6S2OOMsK/8izYFDFJnyH3eUiLMcKIU2LG9OJ6QJmWXbfoQge5Amwu7gQwi5Cov7ENbOcRTiG+FnG6knU6KklW2jWb6zeHh+bOR+alEnKHgDXEq0aPenEmxqUsiTZ+QWZMjQOrywsbB/s1ieEedRdoa2OfuKnJx1GFXFlOJlgSMJssJMhvmbqkrAzicvFwm4CTXqelQ1S6qYG6aPkZyHwoeJINbVoXZQAHx4DAmn5GSwMvMincjKAYwlnmsXuRLiuDPWZv24j/PXcKr1cDJKcKJSgtdWr6qt2tzYrpzQX3OrCtC3nt/a2PSMtoHllUH3GyAO3peZDwZg+xcAAOJ2DXaOiqt64tSI4OtIotVy9rliyerd+u9XUUdT9oBB04lQmJ7Bum6nb/0qyJn4fZwj/NjPAqNoHXYPVVn1r82BhqUORonaMDKlDb56d9HSkQr2jE2ONqaYdOHmndfWVLs/XEvBl7YnVEGs0Zo9GYGW20KA4HqJ54kpplBRUSuTZt5BvQatQ2sdHJIGhhFeFw+UL7b76E+t00rMiDzzA/XlYEMssI0XspRQxmZMZjdsq/LuQMV5jrS2v1XOLAj4OQI/Ku9zh/6QXvE79YwZAm8lJmORdcF/SgS/uzMWe4xV5vgt0rQvC+TTI1AWiPIPjCLPLtXuw8CBOEdH+xOugUOREXuNEYfeonAlcDRGCJbJSzud+2BtLNWWV8uzHuYcg5GDocDJBAsOx52GmUQEVIzKa/BlJE0I1/HxKrcvbyp8xsxNe5o20ZLAF0iUllrtbhpfrzP6kZ6oZXig3i1RxijiSij8vXKD6gvwtF89clNTQXfn0OznhD6iMOXLBR4MJHXgXgNohizhPzY23wPhKPJuZi41Ny8TkTemjpFBRz6U0eeZHkoc2LPin4Vw3xEk5VzQnp+jk+fmZtgjPRXAHpzLiVJW2jNRrwleDmYEnqg6ultdHc3LaXxkZY2ISRmYP65S9QprxsbMkNjIfMyt01Jg9GV3QgOawvTfhZw8th2sq4OCtRbrQ2fPeFP2oES/JR8wh7954vkndprPLp7uyPPm516+sXeusbz/64Zt/8fzhB6986iurl28vX7n2vLP3njzE03FdJ0WYgUBObU1CJeZx3th4/nxO0xVJkLx1Ew0rYDvdhDlI8vbBEYmD1ZUGFtqPxaQuBBbUqmaqwRdMOzly3eCwZaeVn/7oL9lDS7R+oRWVXN0DLhOClc6DpPkA7ZxFJl5weqAKiQFUOfSElCVNxeZaIFtFYni58djYql459O77H77zsx92D/76//J/8Xfe+/m96dl52w7rM5c8B6wba4FeMtnCanPGQ8sCBU8r2szy+Cu0Yj2DyBd/BptDXiiF5upr4RQW2/loW/kz1+fT0pRnAJRn+JVEwabCZfH9KHXp/1J0qPAYX4LMujvSfcRawpJJH0750fP9zScjIzPsIiEiBA4F8rLByVJzyn9ijsWaz3bGsmQZW7LddCpIZlDkh/rAg1F9a7mefYSUCpkDlnHGEyH+EV2EliJbUFVu4gGeB0q8EqimkFowWSjLpCiByXj0mTCw30PeOcA+DsvgbkHL2IeBXbIyA2FUD6AWuGAFZ8Lw0KSNibxXOVo4W4LuR7duXuMSlvITmA7ZD7o1eV6HI7t7W2YXbZxTCKZS1XgF8z6jVrZ9YhIA7d70puAztf1190hekjBU9AQZKDJOqX2jNic/Svs0e+CkqZCHeILN4uinPYJZoiMAGYn+ydwc3Z5yxunLl6+3958Tqwd7h7VasxBG+kXZc4q7OdmewxPZr5G+1e5oEGNJVLEFX3FlgWTuhugGgXKUvixBkA8iXhzADA0z5xyAQTcN3jFwKp4Y8PoHx1jXNB1/JXbLQnZTxIBXpWqPpJUWCrGKMRLWb4WCgsRhmEv559G5vmKyhusJ3ukni5TX5594xWRkCghBBhzHyayIwyXlz1xW+H5+Cspn8YNk/iyqTcYcTpuZxNYzmDwr8QyXEgYpmDDUOM0cebCPUIWVzU1G6bOCEX9OyM/YvcWPhb/7Le8FPeOMkhg3sfsIxDQkxdnjH8rTyrMJQgse4BRCBhh0UP5VzwsozTIxqrwoTCB7wgZpkbNaNG43vu8sLZB7EclhDQtTyDplHhFs7krUjZRDM4aE7QwO6VVFIOTBFGemjEqUUraHk/rbCAtt22MiGwlLTz3XwB22DI+z/mL4p2PnodCsBowkp76LSap0rbDX4KTXOhDy4eKwCWgy2hWU6jPG6BCJIchV2o9N18a1ZeM0SFrt9JTHUnJDpoBHqlXOelNzGClEw9kLtaJNfLdBn1XaCT5mKhV5/DxSSQkkc0c7Qs3yTkaW+iezm3YVGky5Bo6ZVLWCWK+Xgn22b6KQa1Esp2DorNq586QrK3d8c2tXph25o4KrIYg92lpdOPu7/86v9nvrH374ZH/zrU57Y2bt9p2XXnvp5qtP7u7ZVz4JgNubB529VqcXrBwZeeHlV2aaszE702NJpruME/H8xsLIYrLUxid5S4SvgFpLQOKqd5i0scjcgmnFpiF9pOeBzMn3v/sd+qtCLXpQt2U7sT2Vy5jz9s72zPxSsznX3u8oPKCrSZwJN00GAUpEKTAD8o0cj8inkGwcmmWiwTdGG/6lpYvMU5kaNif7oz/543d+9p1/6298afT8+dXLoHY6LjQdFGRjsWqosVoe8TpEyzbOi6OI2FxQMNVVJlL9Wc7kStfHEx69E0YROWJF4RrVAVuTDQhPSRHUDw+CnQiHi4fVG1LzJ3aRjJo4JyL5xBk8OBGq5M7IWVT4yPWnnEXwc2xlccl2BSNnfDkQSoqTBvBpoC4dsDghCGq+U+y5p83wyVnn0toct1qpKxcyOPVYxhGahY8AaF2QYhh9YIpuDT7MRa4lV6kBwCuDLzAA3gTbrEIgUZiEMcdDfIHTYQJhluZEV8RISLJc6OfwZ/fhBdx64bkFJuGuRRM9HYx9qKb33nOuH9nzmuFiCSzK5dUrxyfDdEOueykP9mFVDgye+9022inynZ7tqeaBedgwmg/YXpEn5LULkqHJ/ew6bRz6g97R2dSxNRL20ILGXiHAzhfFbIijC6nHCZiUYLE6yBFeqvtJnJ9Jb+DTAW1xNJoaGZYe1DqBrV2+RcTasQDmYxIC7nIz6WuC6sJaYju+u3tscelKAQueB3Je5C9IEYbo0+GJQeDKOklpUHhd0CXwc0CdaLNSrrEOK2VY5QwNHD1Id01RlN9zku2Soqg4h3NxxpIWP0mgTblYJJw95KJolMOrsvRlJcJk/XkhgSLeyr88ECzLODOejDiHt2Uw1R8XnyF2bl1EALsjf/KECivyvbKTzKu4ngsb8wi8oJAfceia8sAgZQxncMgAHW73Y/X7KTeUb4BJZvghUM1nvqCpj2eUG4NuggTVr1G6nIumSG2hZ5laDBeyDSdjy3DTuKFkD2kzw77RzQwCxDNGnMiVQKkxt1A80iaRTdRfA33n0hFA1MMbUQ50rOgGl6kS7RJ1j42V0UrALvm4CRozpz01Wl32OmFkGZi/cQJZ3+oEJrI71VDdXqanwzVOzmGZ3JK/ibij9pBy7EFn5JxtJ9ZFLPViDQyfHZQw7qG6K152AwpczTtdqHXGm7bJel3KcSfZpBw4xt8Hv8QAo0ly05EOBS+pVphFJqLAq3DerH5iltmpTEmC66M2qSoaaOGEHQc+R/sSL6aGan37IHA6orAZjWXC/jQlmrD32ubW5sraZWCUII5XZkHSGx7tCNUEvJOTM1PNpc7u8/X1reVGf3WpeTbYORm05+cX/u6/9+V//J/+y+29B+sbT7A27WzWrr8xJRN0ek6N88LyIgvKhikyjgFT/WOmk0yWKIrw1QpQ/jETCaIlzsQT2DQAHmNTbzbSZRxLjYuyMHy9jPRVcv07b9u4wZ54XaH5dvqWywrBNeRV25egQfpicaSflxb8KqwTqwP3krKB4QKVNBAYO6oIu9cbHdYPa7s20VfBPKXkTK9Yjp3RoY31vc7O0L/4l3/4177xsqbwwl2ngy5dgIcGDE05tXTqH0JaoZzqgDGxCdi3WaGY7NDOVwvkgNGOojgimpwv8lJXGilX9NpCouAeAVQ5711O70F+ND8rD3QQIogQfC9kmXt8Q8eiy7bd8sKoMFpw1dlRWt4xBLfX7yl7VXhUMjigLTM7qrghyamT7ZKuIePn41MTej+M1ObtZ3K6k2JbxnC3vy+F3V5p9Ad2sz44hpIcNEll0DONm3Vuo36wvUI4SZOmVcTFLesnSb8mhG5RFObqNaSIZSVUgSmMtTAOurlZSDC0+vS0AiaMFBOCNsqrsTLGgucSwy4kFuIrgpyn57vSfHWsqDdmOdC6O3vDtRnKKLWSRuAtaeg1IoDU13Raws/SpTWN/6Hr0kIzvRwHNiA+39zcbEzNsgVsPtA9llEz8ujpExTYH4wvraz2jsOiMYSDnlnj2Qubu/sapKlvWZmfNZ9+hxo63t3vN2bnCL9OW0ML0saWaeKbh3q28K8btpJ1ddAbG1u7ey2s7PU3Xhwfa9QkUDeaAq52TOYElyrcOSQiJjjG9dRR9GK2DtAMJMv3/Bn2WURFdQZCABkFOFiX89YIiOhXhIc9KClpaTjrtjQxpXsSy+kgE0x1oErCyWv1xJeqZ+iRW7HPSK9CrpGBHv6xKCJsvOhjcRU1I4f3RjkMizOGfKbuPTiehc6npxj5kY0MItFyFKpwaaEggpIrqSCFa4MSWWs/MkF8CUnnKKyKcpNodOwhwPTSPARp5DJWFLGHxfup+lfdFSxzJgQWieWhcNMnZn1xhIqqr65I4l8ILSOI9hz1JOJK/SvgmGxED+mI2cb0o9JlbxzdX6jVCWilQ4z0eqSR57u4yBYsogyTYXjUFg2m94Upq6XyrDRETmgiLy2ilG0SbQLJ2h9d/4DYkbyLSclNHoQyLMMKkLSCZrQZ1dTxUE9ztUMdJI5qh9JGzsZsxsEXl+ui8h1eESzJ5imd49PW8YleP/3YD+cnHRupeX8C+hK7kw3JSjTo5jS32GgjnvWsXhQC9B/1CevFbIqGlMJJJV5RGuyWaPDgVcBWVtpXU0Re0UTdDjsQOWfgZIRB0XapAIpfOdKku5HqKa8bG48gPZGZLqUTDGw5SCrYb69mU3YgYrF4nbxk+iyF8LRzhCcwXzVDOzraXF6aHTnvjI+1+PCak0P/7t/+4n/9hz85keB7/2dzV8/f2+lde/Hz0jr0yznL9hq26ZhUVGZaHphR0dvMKH6NZFH4r03K61MSM4Ak/QWMxJoblWoVQsU1abzLDqCgcGxydZ30tjafaWJL3a9NTWRDj6NOOgmFQq0vlEUmhahhBxziLUDCxY0W6EUDi/cK1xC50hYrFIi36yOcsivStYPOIRpTc74xZOvHpcWmEtUYtLSCqMxVawZ5NGhUOnj8g4gO0H75CLJ9fPziWxYvf/3yr/kzFAfpkBzERBXhORFsBo7iCBUnQ+c+SUcKKyK54EsQpcw8nCIPOuNzzou9giDJnrhHuiWPra2u6Ydlr1r5bvJrcmQRbKyszsyjaAlAmY4KnNdnw8ezCw27Yh602jxjp4Om1Bs2VbO+TCuXycMOQ8RoWXSJ1sbJKyOEn6Nko0hI4XAQFQIewsyz2StWHaKnEyLS1Y1dLhzOmUEWY6Cw5ASfzMZJuXwX7I65YjXxXM5b2n/UVWibGSIlHBVbmlta4eRMV1iZjKOnshbOhurqHaAZDJCtHHbJ1Sw99fhkdm51Z7vlNdeuvkCW3H2wodvP/NKV5xsfNudnj88mnj7a0gZldrZ5XpvZ2NnpnPZH6icStfgePMbiYRrn4/Wjs30b+dx66dYrL97RSCVl08cDOes2eNvZ2914vrvf7lGjyTb+kJHBmIbWKyuXpM1v7X5kTd741KcVgQ4NM81HvvHN37hz506nZ1O2zof37n/vz7+/d9AyQ10AAO5CDhScxgEL9siOYCQWJlAkBCwP14QrfI8x9Cj0VN4ISPDX2RfXTJp4OktKTmMw1eo6GlFKYQCXTKl0TZckSygaGHM8DZh8iaVI5IRsCk/EiNFPeXMlroJzwbZqYFFAMCnMx/+zciQ5mo3tFV2lfKJB9e7J0CvE6A8D81kRDCrwImy6GBM5WX23/0Hw2mMriVV9UglcABcr9l9GEmFz1ul0XRyY+KMcvoCh2u8yQgIiLraPvwMW2jOzX1xAX4e2AAEAAElEQVSccbiINPSgfBjkmJoMWRgyMpRJYKdFZzQNF8bC1sqnPqZFt/bMQkykCCS2EjZEzubWGYVpeXMaPvkjZ3oH+6ElqMoCT6uPeHodkDu2JlC4IYp2tHxv1dtS8gq9MhWUarAhUNJJ6CKgbnhKSuREwUVZVKftln0ENIod6TL8RydPElcbUX042xj58J0nM9B6TLCnS8yh3DCB87OeepZMB6lxZ8SkiK5cyptkD+s/faqnBklJ00e3Me0bWWkLbqygZdL4ZMZSFrGsKWzJ2qE5Xq2Eyc0wpE7WYEax8PkiAg/2PJnT6BNP2qIfM0mzMV2/zWOoDRRhMMztRpB7Uh4G+oULgJm8B7y5MSHNyVpx3QSGqrtn52SmUIw6Em5l6s7NTP/mN17r7P4kTvvdp2svrI5Fi92kPtunWm6xuiVIgS5kE0dcRT+DvFzvbJx4KdJtjXrnfBIzY/bxOZgvVii7HQzpKu3Wriot+XriDu//9K3W3mZKTegsOHH0EspfbgkyF/gEcIx4VBZUi+4VwzripczQAlgYaq8MoKMjb8GrdXixJ2W9ngCgHlJTtaHl+alrlxdXFyeurk0vL02fHLfoESEs4snyhMiCR2Dl+UAXCBaKzkujw5hv0XL9YWi+R0tzb8RKSL6M2Le4DPydFc/jQrmCBdFMrUgZe9w14foe5M28Eq7IRGNvGUKZdHm+K0iVwCS6dc7TVPLSodE3f/Kds/O6iCQGelHOJrdpZES2dPE4l0qOyZoeOMwtJCBkxUQ+fzK0tS5QuDc3W5cQ97i1vmCbbbabSly9B8yb9Il+Dsy8ZFEoE+gdOdTNKJ48ncBsQAwWeKgGm6mt9gXsAzXeTEPN0vmvqdJQ4/sUA8sW41AjIKU+BxbREoovlOzyL7CKnTDQ7ErNwkl7X3h4vDk1rwpF/Peju49XL9W9TI7d4lTj8qXlL3z+s6SShG6Ut7N98LOfvw2WX/zS12698Kn3P3jw7OnO0qUXbC1wcDjUlQl6yis901y6XV+8TrTUZpfS2pEjlt2sW1ANYh2O9ETrxuZXFmYXZ+AvDbCv3HBwurh0iY42Nj47tW/HgWTCCopajcXlhb5UDlpRY/L4uLe3v23zR8gzOj/7lz96W3cO3gg19V/71V9bWFz+L/7Jf/noyRMOPuXpCSnnKFK9+mrpC3EH34JFoXwc0Pii9avgFh9j76pr7B0R13gbP878WH28oQ3fbLNm81EKWBopxM5NBEHpawlQBbu5FagM0YbiwvWC8JqgqDGwfOOvK39Gt8rJYGJBb58V0rGXrJDH+rRfjttz+Aya5CtBlv/QCiupc4GqHoWTM4ejD8VGj2KfE+GCECDaVxzU2utnC26YhCqsiYujcYfMPTsOcvgVtg8uSoOd8UyfHs+qDOY4EhbKAWrls5pFNdQ8JI/ClWIxIPr8w1XxlBHh8DN+No2YlKkGSlY30TbQH+6PjWltIgsclHhZmUEqOA7ly55xP2A/xpGhVeoFIA9R+SUmmT92KOMNvEzXqzkxcqHf3OODqzBbikTvSi8FimaG2E/xBNlivKarY6cng5IXiAXos9QX7a/Zpmd973C3d7LTPmonEjuyODP+0s3loTnsn3Emq+JYHJ9c7p0kO66A2DP6UTapPuJdJ2fSjYQN6rVhu0TWa1o8AQcSZZtHnof5+J83g3OGA7pgD7ZBJWgBMZBrEVRJ4YtZHFXcaZfBJ4Mvd8SX2ddjGqlgCAKiJfXmiFGX8mtJ4qOjPRITlLkLmGY8AhlHeIKRSApJeY0cZa1Qel3YyU+IRZ0PH/EzyQYni27fvPXv/Fu/+X/5f/w+v9GT+z+5PTe7p05laGx6Zn5yeo6GYJWsXFHj4v3GgEIlTKvYDBkqn6STpgTFvL1UeI8KZUH6to3HO3ti5sLdkzUtG7pPnzwQA7dNn92P7MyssJ2D0VC9xiqHEsKePdBn1Dp+U9SYk0We+S/dM7pi3mWyjIP4f4DULn0v3rx8dsgpdKZfIzvy+uV5+yth2lMTQ+qvo2oGhVM8hFLxdGMOySTAHVUpf/o07tBfWElWqxBRoFqOsF2jjGjDDHJldWPW+5Oj3JLLArwgc8aX1UU7GDv1wuW51zLlvsitilpRdXl7uLrzUrGFL7l9zr/wxZcHZyRN6qUrEvNy7oednbaaoaCIImHaEd+5MtrB4LXXXnn/nXf/6T/5g3vv31WF/fKLd+LzPuzcuLrqP1Zzds42amklDIjeMjQ0XWg2/OH4ZDKe8OhokFSYFsfO5gNwP2FpBbjI0ISK+y8oUeICka1BbySXNHurl+nhm5AEz8KWEhkKr3SEh4QDGsba/JyWEDtc6c3ZhYXF2sLRYryH8XaO8g3qGTs8vMKRq6uYLRCmV67PHJ9/6vU3VD7bCHR2bvm1N+TRvFtvXudimLDJrb6hXB610fT6Gj3X/EI7NN4I0Mo47SwzdtxtbeAFNrvAOFpdoVM7wbX3tg4M9q2338VR+DlYcl1daPiPLR8rsF7f3NmtN5uf+eyrpSweAAbN5rRhtLsPPL/+9An16+btG8y4udnGhx8d2P4TYl5kQGWxs+g4Q6oBwAJ8whwitLE8kfvwwf6Bncp4uvlMxo76I2oVBjTu0ana2NLE2NzE+fzUyMxk2A6Lz7PQAVs5akukC4ypXiNQ5M2J5FgDHrNweGAvLq+O9fsYV61BlqPc9DH65kr3Qkfoby3dXgRDtEVoG9mAXJVhRn/EW4uY8Wl5fUisxPIiS6LSEFdh8C7TrgcyhPXl3SHvkHNAAlcvQv2wA1KFIr3JMxDNx9+rEfm7CIOQSiioGnnhox/PInTm+bhReXoKEnFQl2TzANoD/ehYGWdNxEWDOgHS9JQlSyCp9APaxXBrYrTHQw6+46O2KNKyxhSwfOqzURlC9c+yAb7O+rbRAZ8h26wKKfvRCB1AgW1GJJcotP2gTukT0ehxLPH2xMVK9nxAYe2KAzWsAcPAisRna2Onk5PHrf2N435pfKHPvx1cNbelZSikbT8/nqjrP6fLpz17CUuCGN21WntATDfTFrbRsIez1oQ6pGXvxGgPhIEwsVQuPEJY/FgXCcDG2rIQQB/wFykUazVfg1lZFMCLGBPPC1rgGFEFUkERfmaVrIcU+AQ9yKqBrm4hrXpjipONwVOoPQwUVIALM9UqAuPhEgwr5xsrTSWwp7FhrSkHjYmhPptjlP8jPW9gQm10wqPmGjNyTCTJ//v/k7/+e3/w7Qdb9x/cnR6uL9amZjpqmA67DLCxUXaz+rmml1ZZ16BeivKhTTQX/0E5oEQ5pTmAYIT08DkmsLP13BJoX6saTGLA4/sPs2WhrVn6bHrXKJ7m7JNoav0YHACXiF0QIpicBB/PKX/gntC3eCOKv9lpBp3pqNSaSg1f3T6sX/js66/e/EptlJtUIsaRpVQZg2NoExWdhnD3ypAchssQzwLkZEVD2G1B8vDR4FAOuF5WMDKpnIgi4MjYy6OKyM61gUDGnCPDDjxC96GpzKL8EHYeyqv+8mP1BTlX5xGa13iyBbpITcJ3BW6Ku25cWJphAjeCXwRD/n/l1jx0dwvmXlm3wTJ2er0BM2anZl+98ylJQlhIq91anl199uHOowf3pVmLdXpAiR8lOVcYVta1JJm6jV+V1Npm1Caik0PNmcHC0qRkT1ks3DbEDW4X4F/MMgp6ABkIJdukTLqCVVlhk3Qy/sRwSbgZIJkdqOQ43dvZ4HQv3VzOtgZbFDINjrnmhs/t5WkXxwNuvbXLq+TX/u6eLpsffPDR/NzyvmzCdmdotP7zn9/tHo689MrnbfYIHcYmZg5a+gafTjMzZ0kb1S9iPsN8+9zB9djxEpF6x4M9xTmiTdvbLdpWe7fVbXUPdlp0rfWdjbRgGZskotOQnYoavfmss4+ORucX5o+ORnd31QVSFM/anX2Zhy+99NKl1aXpGW0Rx/bbB/fvP5mYsJPWYMKGRbwWUCPyKSSbaA46JyIQvOAgnpC2mMfeREngs6GaThCGMgdHJ6cb41PZfWikOcFonl2UdgvHJXPYjXZIJJBbRrnOSRsHLGsfFQyBRG4VnmJJPpEoERs4SZppw/ssD9iHcItp4j8ZY/keRI/cyeHKs6qjfFYX4rmhiCsqKqM7irlbq4NMyq9CPT49IW+JIMmvzBCDyr1R9vLWfIfDwuwYFus+f4BTofHIKX/DsByxfC6++QszSQZBOcpZgPRH3pBJhalWkiwCD5wxUdtfD+kqVloc2T5tpHs43Dsc6x1qXXrWU0jMNZ+ohihjbWbqfKHWnx4TR3FOclpa93HRpVXMmfpAPNmYrV8SYzIhHgbiPJ2i2GOG7H+kM0PJMpuE3eH4MXKXP/wPr5bqRO7h6p6KgAs3uND0UQV6gA8+XUwATU0MXnlp7aB3ttYd2u6ebbYG+4KwQ0y60ZnaCfxAsyotjFBuqt1HpkeHr966DsoRGwEEhcJ/onhu7TyLuJanL/xkurI2tMPRPLWDyUaNc6XB+lZ4eZphZXDBitjgcQkFwnGPBoPjMPRbJE0JGrhI7oGwFL2t0zub0eEBUk5O1bnIU+cKkHzxo6MCwHGDlwMJBG+LJu+Blk8rgeHDVmOox+k7PTEqwRyK6o92PH4wPVCYMJ5+S4fr09PXX3hp9X/6H/ytf/j7f/rP//hfNpduXbv58uLyNRWlGOlkY2xyQl/0OtZNNGY1AoYyRXwnybon8M1E6aLeK1+aJ/Bgf88n9GpMM3TmmKL3N58+enh3Y+NZe3fTdg+GLSBMXeAIIEBNpqBtVqsKh4SksoKA5Sv05o4MhSK92PiizCpjs1q+MyuHveXa2vJhR4C9M6pV5FDP7hkicCzLmGIT9qhM9AqUI04MP36RLA+ZG9p0lNUFS19ZThbEdPwZPa8cAXOGFdyrfnWjq3KvU9X/qws+OZUv5lSeme/VM5iSF6IqP2eaLihjCJEFScAZA1A4mqzPUbG6Aygs4TySDFyMH7JQrA6diNfUeHyjJRa6Hznb2aU+fOb1z33jS7/emJwVz5K6vTg7R1t/5+c/+973/uLtn/9cekJfa2XcJY/s5cnDbb4kj2ZUwyCeptden7l9Z+XOS2uzC9B8ylLxLeAl0XnCUWgZeXtEUGg19xa+lC8FZpFn0WpcCmyR8xfgKgbGoFmf6El3lZ6PklzE80xH3N8+XDaOUxkVSrzbB/uQXfc/QsVGNj/48D0h0Vp9+vGzuyeEwsTS0yebV66+tteiO1Np66Z03uGYaTBL2IWIbSDmN0AsAMuDDthcjhO9rmrC9kn/eH9rr7Ov2eGh9R6fnNbw47AbxRhyBvV8E4c6PD88sjX5852d/vZmt67Pab1hKsvLy8r69GN54aXb8/NX9L9Ql2Hb6K9/9XMuSfPBalWqlOeq8g6iy0Ur3jJjovKWHjnHo0eKW0cn05FqHB+aqw3NaGZfH51tTMzYZ07P/MM9lZUcWBy1acKR3IvJ1MUie2HkbKHHNZgcwCATdS8JtXhfTFtKYdR+VFNoNwgXEVQJobCjsmzlZDGT/BDBc3gMvyL2gqDlCbmTXlnWKr8UrPVbXsj1J/MVz3DwDBQE8DU34OnliJ4eDM39dD37pfo1bw+5BWnik4qLI5cFcy6OQhsS0/PGgkBhOH6Lh8cLiuitGOiFOIzsCBQMLcWItAH1Pb3eoNOVLTqyfXC2d3C810r5BIYtT0lrn9X5iUFDD7iTqTpfEM+jEhJGVsQVZhMBHN6dfNOwP5KUb74pEobNYYOSX0ZjUsaLmaQC8wpM3GN4BRrJSdSGBJ+R8UESM+EAiQDPKhR9EwTCYdPP1KzU/O3tPDyvzdnsY+7S8s3avG6Z8qgmR89mJ86mRvvjTJn+3kF7t9MlLJJReHS0n2imQA3NTMlgIWZvv/z6zWQ4Ssg4pEHJreLkl1F4mm10w28yGdAHxggjp8rIs0qZBLq2vqEbc8o6ZdFdHC7m0y9n+mZIU8lmCpIc0gmHtZc0Zzt52ku3ncpcvsfeoK32Xh6ErkiVFyjWAo6eRnSpNTzp756c7g2dtlkbukVbPGEtyRqtoz1iYmqkwfdQG+seDWrn+lMg1dO9pw+ZIx2oIB4wMT5j064JzUVVIsMiD83wcXykX75QMmWiEAOhn6Pob8CgldbxsQ47c4vzNmeWSLGzuXH3g/c3dWR7+tCqyr/wECsbOc2GpqUUORUEZJwWpSO6XXhf2B4o5aWgBLfdMpJNnIUGTDBpghJ5hk5XVtYEIIZ6e6e1ToLjNpke7pNVxuZVyG4Qwy+11mFaoJ/K0LB8z/ZWOFP0m1CxFYnYLyd9L6PKAHJU9FLOBUtRmVHmI3cVvlw8iv7wZ8Zflr5cX73oYxrMw8pVWfHghWfj25wB4T4G6UxQuaLLyFhXpOFEcY4il2jpgrsutqt2JGuhedFxSS01u19oIPL43sP/5D/+RzV2dfbFkxw0Zo+b/d3N7k7X9g9jdL1QFM+NpoMRUWEKaQ+QAAg8NsF3ftbSmovv4MVXVmcXeJI1s8bwokBjHkjKDZmpS+PWT9OQ+COLsxCK8uf7taBMUc5NInwpmI/5ocypJmdFGnVQxsg9hpZ9C54+enzz1jR65oORBPjk8SMObSJqb3dzZ2uz0ZxmjW2t76kqhAKt/d0r18bef/+jjZ2T5dVbS6s3FZ9JP7LJKE6yu7PJBLedJqVKbdboyOHKogKKsfbBeeugf/9sc6ZRa23vnnRPxnSxro1P1Waimhb/sDoJ+o3+abJKFucXDlqcLotTkyNLc0NagE/JAUyn6OG9/Q2R+KP+wd0P7VJ8cHbSEtU66u+Mj/aTdgqkye6LOq6ZiBA2V+Oo6JTXEPy2PB4c20BHKr21HT+tz6T0xm4bZ83hocbwiTZSQiJlD6D0M5G7SffUDV4Zm67jR93BrlSspIAlCWNIiU7aU4yfCcjHPKepRSFGWxgI7Bo6sg9PBnQhq1CghbQYQbVy3rqEuivrJ5gA4Qoal7MwxKXWMxzWNSGGiqFFKmipQnmMVwQDi8ETgy86II3L/q9Z9BzV+6ovtNTcgd6QQNFkcO7okLETDSnXhoAK/YROQwWMD1ANCSNfv8Xd7Fre/Ty+OiIyfQsBMliCsuglNQFCAeEferz1+oqU2u1YqvQyKFU7m15M/zJChTcuhSXw3CcEDkJD4KhcXmvaF/Zc1N6weddUxJxJuJTC5D5ThkJ+ivTOs8ogzTAdMRJMjUyzAmjJQ1jJ5hNiwbyzHO7kW2jMLTeXbzQWrkzMrtWn5sPVGbG9vfPjVtKC9k7bmqJLp85efLZvOIYJSEqKKNkeNcQ7hocPurtBAZsWkZPxlUZv59I67ukp4GtZx7BFfCSWX4aVQft/WQJrmyP9kyxIgXOEtxurCVpi2AAR5XQqP6FZR6PlRNdcfaQuel4I6Zz5JbjQPexcXrh8cJAQJsQIOun1KyiUyIcmNkO2PGid1l77zGuLy8ujzaWDo93t9oGFphhoGy95mG/B9vNvvPb6154Pfvr2483NZ7XJRmN6bnnl8mSdp7GVsJn9XDhKMjcrIlDK+SdPP1aOxs3Ype5qZSFccHZpbcmyS67gUmXkEVfrjx+193br01pIy8BEeUlptOpAZ7nhE0wEouhchUB88TcPJ4Kx2J7jSkyUEpOVVQSjgoDPSDTuRJR0+NLS8kmvPVuTtsvPK35zfDqsyltZIl4R3cXh8RaNTKVwyY6RejE1qrAhsjYvtUQs/bJ4ZAZscX2wP6hYCCUlVuEmoUAP9SUX5DI/w1LPyf2/fFTs2QRQmMAq70AYWCHmGJTBycy7PArtUYSNMpgb30jyplwe2o9Whv84AxBhEzCI5p/MZ0a9nFL4EVYEfeJC4Vx98ujxBz//cG/mYL621NmhmxMIbPYOV59llCDblKKkJuN8RD28lWJVJPpVMFrEJhwglt7Q9vr5o8b2/IIk3FmBS/VGTsogKlYdBRH79co4Q7i7+F3krFvc+HdLg8A8IswtdG4mLjZEUDRXc7CljqFON4kGOwV3O/KgRuc1GxO1mm7a6VGbnhE1gIpGNFV5770PpBc8fb4pQDVKaeudT89hLJPt3snDJ3ZgG1nfP+/95J6mky++9IoCwK3tg0cPnszM6KJ7aherXufAbliNqWsLC5ckLQ1OYP7Jcfe0taV19fni3OzYxOzQ0AwVJKRITIXDhPFag60d2WEzjfqCoJDKNUU7+zv7z548GB7pXL48t7fTa7XW77x4s9/d3Xj26PLlFYqFFN0xK5Ylx7uIomyxG+c/SMg3PpI8Ja7Mdh6w9SYF1XkyeqcSQabkrbH+BsPClg1wybZzh8dqJURgj7sH/c72yeGBEtGxmpbEneHxY9RnE4LhqaHzKSkzHC4nh/LY1dsBTsSBWBKMB37Kai+YW45g9sdHRbdQN78EuS84P09JsCC4Xn7yxT9YmTLYi5Ox6csBSwv6hRAoNXF+0qagLHavZ1p5Th4fXCjPgTVylvOcUMLFUWhK2+yQIdXSfxC675YimOQjBBRJWXhGUDF/uwyXD1f1fDpTeYsTiZayNEgDnBifOKVej48sNGWS1aYnD/d08zkZSF3Jrhr216gfc+Nywdrqm7qdUBQFXIqK8QYCHm0wJmrYoHTWO+1hFGwZAOTQiZ8+/CsiDi1hc/RljGQczRqivtCkQ9hYxHoEKRliGmENwyzASOdwA8+PzkvU8Hdr2z7YWj/Y2z8f+sAyxsQVT9G5WdSS7XGsoETaoIRRIFf/d2gxjhlQXh4pHeaG6OwYJUMNVEyF3wE4JdOTm1wlVjqCqBzGTavyNWUyDuwpYwpCBJ4WlgSU7apmORiDFbssuSFhncejjZnF3V69dTjYPOytXX2hvrLWG5vcPFDzcjydWOvYzMK0gHNzdtpqUkyF+8js6XqdMmqYE4PT5szC3btvDY649Pc7cgKltg+GFqZfWJhvyLiAEPwDFD5olRJDu7g03/jpT/63i7MzfPnf/e6/2O90v/DFb87PLB71pEvMYsZt24XwUVAg9QXWl263Oz+70O8eymJXaKKR0oTm7o2aTvYS3G1OZi6Dfv/J3fvtnT37UiRncOioIFUCjokSJ9if6uaPsT5IAWKwI2tnjDJGhmypxCHEiMAG61qT9UW8JISPi1P1ZX9K2ertHkyvzh/3W2MTwsAp/ww4z8fYDYag/3OM41QixEROroh/Q2qx4ZLQVyxg+mjW11ttixqvVCSXdKCoW0lGEJzs18dnGA+Wl3c7oYJgHXKKP8agrWplbBn9Rc1nQd3CoaFEkjtonwXVI5WiioXM/FQ8OqKPUU2yGWPRWoIzoQ432hwHb4NaIZNgf7A+Rco9qr0G6rIslaszD3T551qjjwjA3L5+bfJ4YnVyvj55afJI9HYglhnd8uy0PdbT6KJz1Nf1Xym4rQkjzLQTlkHKX4AdRLDyWpxzp20+a68/aa2szovZxHl4eqgmnG1wemgpU90fzjScZV1eWWHKMK/t4cbVLGrB7vcwQCvqQmmyjSiLWYqhCQ/Z9E02hHw/3jb60MbmI7IZedo2Lkn8SdQf++DD7G7a6QxaBy3PnF+au6wVy+TsQfesPr8wUp+tzYzcuLL2bL3FRpO73+51p+vnz58/7Z8ezdbmr12+jO2g0od3393ca125uiQEdXKkufQI3/rRKSfKUWd9d3LyqHu4MT7R3G21x6cmtb4gXgIGm3SP1i8tj3b2n3bFxo44h+CjFE1hpMHu5ob6Sl6Wtw6eydizOQ9exaYXuB3rHXNxwiPlLjVp4ZHQFBIhIfW9/IaK36VTc6eSUvrijzTOZCRkj9hSdBGhkZC4rk76GQxTA7qt3sFme/u5fXfOjnsjI4eTs5LyB5NTNq4cHpseHp06H9GXfGIg4Mnq4kvAD4w+SA1/SMuYJuFNEMsn9CVsgsQyLqN9QanqdNC6HBfiCnIXhCu/Bn+LzChXREXyf08MuUTOwVNnIusl2fh/EZS5My/MT9WRuwkT90Y9sDp5TtHbInaCIlhz4bl5pKfkBUZIoWM4AGKGH/YeMlAckec5XX3CUlNGlO5jKKWIMHIgokvDawX3I2T88NT01ITcWM0yBE2np85mG8MTNZuIp0wEA/Uut+DhDCDfDS8A8+FlGWiYtp+ZFBgTesYVqio350OogUQZjrkludZYeBhdXz0tg6vsGMMuMIngj7pw8ebhrpSWrmhXy/koUZlwmAKGpS8CIyzhGYVc2TxEr00FDhLVGfMKH6PVSjSxbBl2HAZkj3vznnjGoKFbTTLmAHUh6kWZY1aBUATYwoOcpTVQRfM/Kpx9A825FMZkC0FCk9LBoaEoSL9yu9IdSKaoTw2NTbb7/aetZ21b6U422AvawOzs70doh90GulFqsDqDQgNRsc0iw9EtzvYkydgaq5vx8WFt0LcrR1192V6boTYmB5fPrNvbXZldubK8evfx06HsoTx492c/1lz1K1/5NwjBmVlO0SnoY2rcjxJrvYGpQuXDnZJWK4JU8gPlQBMzug4alU2GWlqtbm0cd+mwvH/mCuo5QAUKFnYdyQKIHx8Fb0E2NAbJMCyizSQhA9+tnXoneUWyUaKlQewucbN4qkjk2XlX7i+xnWozW5Mc8gXNzC9wjVFmqiKeZBumfUhoeXJsWmajfOX4V9QWJb+m0K0waUz2iAXCBlOWQ6sDP32kYAvw0xbi6XIUFA6mVTgXhCiT9CnW4wKBdciWpnSwP54hOwAcp4xvuGyzUuRisPiCJKEUzImM8w63h9JgRZDc7b5EocvXtPoNkBk6aQDBh6FgMtESgZZ0uHBdQ9BOzkBLRZaEXBAQ/uDbO+3xA2TXK1WTFM6BHHkCSvUD1KQQo2hOSQJRfhzgUiYP9jU7Jl34XRJ1ZT9hG8VnE/WKKZJO+bGjQkFwDHAN3mXoy8nQRtDckpecw8IjOT7Hag1IWoosZIoqFm6wgXmKGc3giaX3O9zOo+w1wm9v1+7do4tLCoVfSAHWXu/ps13N4FbGV3Ry/6/+6T+7cev1F196fW9nXy4JSGud3uoiEPp17FWtqAyZdFzf2OJalDmMKuNIG7VV2xhugnIoyuK5z7d29BDvHB/yPUykAGOy1T5sTvXGGi6wtyJcRPAq0A7PR2P1WgzbUmet1VRMDEksZuRLjlXFswx1x4f1EuBbRTssqqMzVpWoo64dgHM2oTJUT1GZvhZ4WstayZjp1cJa7mNEai51k9rf2OKuGjCtWrutnWwTJ5alPnpifmisOWSv6gk5zLSOBvWAxGKhJYgVn0cUtPjwoRAK4ywyVnjjCJLmKOyprE1h/TkF9BefwbyLKy/QuxIqhVbhQbkyJFEdZMgn9zrjIdX56mQe/fHDP/5eHh4uHE7giHhwV+RBVPp4oaLgYwS5FbzwXBqs3/KmnMXNMeeI5Fzst4+PyILU4IbNYILUUhnJszPpE7Es/DeBNKbHBTryZVL//XpNh4bdwYlNzbNlZ19zuez2Bl9FFDK6X4w5VpRXU7sUtzGGSI5SkerTOoouSuII74LmkQYuRcxuFykrUiB8L/MJx/CtqLFeYPThggWq0PZ8yI5vLDZANSe2PiBlOUdHVAjiTx4a00ayB48vLwslnbOPwSaYAN0jdGKP5cGaSJGftHYkN6ilA2j0tAGrLEMAqDKc6BXlQNFhqeF+udsVZbwmkKpaKqxMpGjdjEe+X16ybFtytn/YVei4fTh2PtfX9Lw/OJK4fDbWWF5aw5w8yLDRkyBBHmph7Z6OagQMPCoGQ1KBsa3sDXR8Nlmfa0wvUk4lsGu/tL3XySYdNmafmTntHssDG683ewft5uTMyODZuUa+2nuPDL394588vPvst7/1tyzD+Lj2e4uc5rby5Yis80hP6Z9aMrMVFmi1a0eL5PphTBo/DTgSRfWebTxbX3/Kra+HYVSCLFMGaMwVlDAxC1HOX5wKzBCPs5OZJvKv2Sg6hY/6PUaJsWhETsmC4psCsAFFZO9gQva9v9LFyq4mNfafrSV6mxt9W74GD4JDWXfQUS9Id+4cQgKplUnxZ29RqryOVBulqGYF8xaLk44P7tM4DOkHX7Ktk6BGxlzonavKxC7mg+Ff0Li1tejxkQQNPdE/Kw65kgkJD9CaZSIcnAzS0KeLDhSfE+WCdRP8DNJETFy8DmIWn1JuIRLgPuuLqg+EkQlcA5gy91RrdrKxMrM4tKUmv5uHKEDMxqrp38jhFsOR2C6rwflYpGXYG4cV140lCfaWjE1+qO31HWbTBIlbvB0qklJ8F7Gad+p3po0fnYOsipdXDWaOMA5nTMGDXPnLh/H48+S4bVy1mqxdxSnxCoprng5quqKAHGJnpXH8HfdPW7u93U1dS4afPVt/tr7lfa2OBai/+PoXZ2fmn68//9rXvjZWm/voo48kFup/sb+/1+4d1CZTwkhqsqW9cWlpqZnG9llqbzfyqOoWySAEiM8lvtePB7pjZ8uIIWl41hWBTU5JSJsgptTu2UyoxRbVNAqaHF25spjVDK6AXwr7JLvWTpWD9q/fvDm2cZCafy6M0PuhWLedxogrLV3ULx/pUDosaHw6UTYNSgW42FLgGw5FseOVjKc3ehb2ZFsU+sVh+6jbPu11pXRIxzoZF3ixp/b4aaw4+oRda6PriP9E/YCIXg2SVg24o4HB/ZCVyVefQcKPv8PlXz5cEA9iQbtfnC9kAabB5kLDfvL9l59WXVwuyFdfPvle/Vld4DP8OQ8pAg97/6ULWQdFIQsBxjrM26K+5T+5yxsxYoMPn8ayq3sNKkiHOVMPOa5SQ6FPF+Ilv0PkQo5jk9Nrl6/bBSAbNyVyjud6EqAcbq9roNCxv1xJQit4XGgVEhemHriV5eYSzFfwJEoYKvJI2cARUrbzIX+KqptdOtAx8wSrCY4Zg/EXZlfmAPDVY/1GUntgZTICp+mQvocH7cgEPCfLVVhGgXz6grEv/A+axGFrsm6hRWaq0f2VHhfflmx6yWyJ1uT9lAEKvWe5L2wMZIKZgJVlCpYEtDComL0FvJhTCIMh57y1KNeFKRcEogoxGc76x+eiYPvt41ZveH1v/6izsTB6eaiJ6sam5uZE4MBjd78FADrQikTYMI6gEkSOFm4FPQsZDo8d6Md5yLAwyeFJXfzPx7Xg3N46oJY+ePisddCxO6q0BQ4h+2QtrFxOf7XOIe+k8C7qTATi7Lj9/Ok7P/3Rr3xp8mxSXvjpxPTUvG40nKHnJ9p9CySke2Isq2FlLn392wbZJQ+7nZwc77f3t7bXdbLjAyx6NDQLQKojs87hVBYuI69IKw+LUohhWAbtAGNPZGsLwsjCxlbNLdYVjEs0qt3tb2+Pzl6dnZmc0574wf1NsS27/01NzTG7DvZF5flvhEAI+hgimJTxjE8e64RFZY5HwVolmkYuKXoRphH7S/MBeKGnEQcOO5K0Sf5A8T0bAcQI4xsmHi6c8JGj5EzsXZipnZfCG0ynYBtjBwuJCxGVhI1CkPiYUWEwEroq0MivphbdquAGdCSoCnoEPtClgMprEHQ2BYRrHEy5Cmhi3o8wCOz8oo/+0tIcu7nX3iXA0/T0KCoF6mdIcD1FN0wP26LRlphZcUuG8g0t5CIDc2xc5hT9ZmfndHtrf22qWVBdPMW+lyjQlaFYBBtwShP1l+XH5kJwYWKVMHDG4LPEIJDplVUfltgnzq3PrF3H58V3Jus2r5ko4o+vKzJPvVcEis7UA955zcb4bgBUYoE91uzXO22o9pcih2688OmP7j+7dDI0Ndlg/V+/clkJFB7AfrIhtsEdjg436ks1vujTltcbpxwpo2ZXAWmY3Nlpt7+nvbKfKC5S7uR6dLoKVA47e22tUdgxmoawzrV0tjfm2DhogFQQ3vPwlcJnEuLudo5nmotj/+D3vgepkgPmHfYFj0/vxIZcMlTYTHJTdQi1mxK1l4CUUj0/OxskhOJRh+KqhzmsiRMZUbIOWZtyhpRYxrbl6srG0FJdTsZPipMpGdbsulinMeSlIEVRCsiDlYW8PDVUk4cba/k0dCjOVXFx3pdyZLUKm/74xMUFuddhafPUYG2REDFtggRZ4186ggUXRP6Ls5+cSTQl4ZHytEj9GBjVr9i+YfnVMAvOhCPH9Del4FaFT3m5scA4J8wILiLkYLg/iSvuz0I7FCiqa4JFJ4Ps32lb2DjiU1LIkR7miX0KaI/8f4n77x/bsiw/8At/45q44d3zL7PSluuqam/YtOMkQhQlcAbSL/MH6TdBxEDQDyPpFwECBsRAIIccDTlUs5vtqrrLpak0z8cLH9ffuGH1+e7zMqt7hhDAESCdjLzv3HPP2WfvtZdfa68NIa4KK+M3DyvNdET66aKe5cXljZURiOwhPMNCnMCiAqsTTVG8fsACd9GYgj1iYBgJFAijMJcBXUABgFpDhM7VMcnFQi+56EURP0rtUUM0xo8DFAloeZ3wO4JA6BFXZiCTneYBpr64xFZP4D2J47E4KfIhSOgU3gmTYjNBV20ZCn+SL3mdL5kF3UrfijasP8GKzAdwI+nAPGK78CNtxWbVFHRn+Y+vZnrjm9P+pczz8Xx3YcuqNaWg19dWVvFc2h31mWFEH8QK7LvhyuyS8CAUJ9CTT68TGSZCiTbIidFQMUtI+tnL/U8/+UIqo5jT2WEHiDFpoYvnC6+PRvS3cwni2K+NS7qjMb/4wmbryecf372zs7tzX5WsR6vfuJh05dZYR1lfJrmWOQ7Ps3Nzth5nZ4qPMFH49xH6ydnxi1fPVbi3dIHLqKBzGBYolOl5g5zYfkHHwtHND0oAuxRODKJEoSnTZ6ShZFODzcQuCe6iG7+Ox5PTjrJd04/v7y7W1waj4aeffKn4qLKO9oZrtTh8UosjFcvZgGpV0ZznLxfro1pdyMbs2aojyrWVR24gkax4KmruFP/2UnuJ+q1svAXI2tH7qNfqiYRH4wvxl8UW4YY37Yl2FTJLiqyS1kU7KeMxOclIMOuy2oIZ+UxxrUKC5golhcMn4TF6cmRbuS9tR+OJpw6WeZG++RVpRDsMKaV0DzrVI/JW91KmCyLyCth+R+UkiTCyn0r4JLmA2CtY6kCkTQrpOiOa8QRcw18843m18c2JfFvms793fOfeKgYD9KF0xSuiOyTRSTs649aQRhwg6VZFts5jp0u+KKkuRFHAFE4A1SWT9lkfgjs25KE6Z6X/7cS8JuxpgQRAYu+RONMJf9fkHdNTS8KdbC5VmqBUFixdt1eWX77cM9z3339/YBlVr3dnZ8t6CVV7DDEkG4aVkg6pcSRzNZv26ckFQJOIRq+HPCALtbaVGLWyG5+qVvhQ5lfkM8kRo0siRUhYtZZyZDl0yTHRfJiYOTfZl7bFum41VzfWd+f+uz94aeZkXICmRaQckHYP9ClVc+5ySoCpMX/enL2y4q1kukqlEpk30zlMiVfHmeVPYDw7EUlSwqxoFQGy6ZYKS6adz9O5TAvupjwZJsPmjsfBZ1QlKkxhSDAwrD+tVUdOyqu8orJOvv7pDeaFvr4SP9ooqJhnDTjOG9qdLmokfk78DxQKTwziFg74Vftu+yvv/boPiQmlU0FlF9OHYHJBKY/EnCAX8oaYiujcsL0cZGI+eWHpm2dzXvhmfHFRIJAZ9TbbyQcvS+UcjMlW7apqTE33YoPA87BlOleYs+bBMC8VHi/91w4cjibGXZ4+pBvlE98uwNCX5OZ6dwy9dNzUJJQDOL5luyf0yIApcl3v2Xlh8kbi8LDWCsTj7kNmYdxxToNVQKpJPCbeKqkTUQCjHJQXazAqm5GFMbpXX006rXtepfQMLAarLDX+QNks0vjtVoBdZp/xOJNKYWGafjTe5CcagVemS0UkBdsy2ZiSHlaiC4QwaFTjLkRonPzvemDgUHI8uZVgeTq8Oji5OBvMXnNHz9fVDbHW0f3eH5K/vsZe8Va6JC0eu+K6UIkK5HW2AgWr1ubiwGwjKLKtO+h+/Iuf/8WPfmbjzYX51vaDeyuNFXpeX0KkorEL/PWnC8LrZplIMKcylKy2zTqU670Xn9PclKlQB0b9zuV6UzjMFvZ6rjZM4S6ysRbpwiTC+Whojqz4f/ny5dnZCa8PToFfxbFbSCbw+eowEb7ilS6EY5Z7YvcnX6aEOqK7uqU8k1Ie4RcJ4MSKTIwkXiWFe60xHxv+wv0Hu5aOQcVffPYzmxFRYKwJNam4ij+0rg86Q0Sub0ytrEypzQsB9IAC5C9SYSYlIk0jIHMHSWZhzRPQUzfnDfWpmI2Ef6PBNvWpM832croUg+Orw0DmWMkDSfaFhxckjCDIYAlO9+elppuPLvGe0qtaHRD4JktQOfYEYeKKwQYtg1DpXvkvgf5zmRVgrqWAQttW+kwGI9vrggcsICoxMH8ozxFffFYSOUMhwZH87wi+xiWCY2NDvK5BVTwPrLAHcPbq48M+fxVHvRBt+HIImbWdMkuYVFRQ2wupZKzMbbgQDhZQhNqRTFLcQ+wVZy+iixRToISQVZCWNRPiQdoaVMIVnHTW9lTjoZQEIi/Akp46Uu5hYdGgsi0V4ERLNHYFi7NBuYK5e3t7HLcrK+2Dg33DiplLDl/Ci5i5MmAlsgC+zuSIIEsPAwGkCnyEWcmep8t11ZKhsNqLeupK3hANjENbax6n+YCtYeohoAFoxIe4wCxLNNyD1FxqbVg2HACTVTV1EM1rNoHOFda8K41pGRLN1mzdznmNBXgl7HXFrREpGD8S/C6VKTgBLDjWTbRAokc5Ms9RXkpejpo/rOVr1p5NSqM+m3ARfbaenyVUhSVqMf+T1YZaYWiGnAMOQrJcr1hkuZib/YVYIzFDqy5UJ7kBMoYnoqX8ZCYM2i2s/IIMeYObvz6KphJcz7PlKOdYvVa1m9arBp2Wk/KuPI+BatrE6M+bTOLq5V7roZBEnoavCRSE/XKPRmYR9VPjy2FYOjkEHgI8kTOCBzKaAj0UEGGYT41h7oJg2RABpE0qN4TGuDugfzSdyimq2QLPEE+Gb+xQn9CIvATMvMJ+I+rizqeGrAOhyU+gW1xJMbe+IHaEO725dD0cAQonzkH/SpFiGqb5RSlpOq64RCKC5kZEYkXnSMVlLWiLrPE8iSCB3AY2PPGJpkKA+Pr0CL0ZEXZl1AK21HD+lBT6QECQxSqpEIAHAFx7wBEujOR8xtLJD2/mPTOR3EckY01nkiM9kcrLV1O9kVIWV6e9y8Me55wsPovqG9a6EzkoXx/oDRgLEmVzptR41IVUjwvChJKhN5yyyA3Ltwsw46sPj5uNqQ+/9XBzuz1WUupiYa29e2ft/tXo8uUXz8+7w/lm6wd/Y/2f/+EfPP+Xe/NKS+GCUQVu7Llnx6zB4PD0dPHpsxev9p7+xu/+7je//d17uxvP9o8mil+k2KZ0msy1fRzabTsa00PxqRvBM7/aotzA+XKp2XG4lSOgKBqDT7ptARfIVNArSAQcmc0gcCKamSuqkki3/H7ICZxSYvII9R6D7t5Omuvt16d7k9v+o8f33/vmO+98+IB/tL28eHT4yvCFx+X6y6qNysKYmLm6f2+tvVwXqHDdUgywHY4p3xOB7BLLiEIDVbr9EbTmT7MhlKPTVWckqwMhL041ZhKEi6mSRdqVLAP4pzL63O369hrsW5yXdyR8ptp9XlYApeJGloEBArUPPFAF1jq+6AdFkulqD780W9FOGCHsLhn/AUVBojgAOQzisbVGye9UH6xOon4Kh8RMn54bX0lVV07GIlRTmawAJADfogCS85yi9FDUlAWTsRQQBq4Kl8KuCl3RVOYX5b1fnB0NLs9ZfYs20xH2QideEImLKJzjpdNXQtZgSPxq2RB03gGGKAq0QhOZy19+4gZGQ6+EL+Qk/7MpdhiZzraXFJ9NHQYsAJuwLMK0yYYwgXG4WJp5Owf5VbuoNwmPK6FcTUM/c2TB1p27Oywn3VLuy85XZBgRyJmva47SiaonZahlsHC1qXB2u7WwZGdcXobiN7ZUwxpp5XipL8YWa5LKG9yIXkjOUzAzLpPlK5Wldufu2+qGMANVjcmA/QV3w6MUBL1FyuoJ1zkwaBvo9lbsSeHVLD6r1Ht0QaO3rjAsKngWj/WlRS1zNOirSwavvW/wqhlZG6U4j+xZdhV/IZej+a3UbvjC+DKhkNLwYhTHk6PfmdsM2En6nd7jGmHT+ertpc/JDXnza3WPL+HI7vFs1OpyW3m8/MR5qtWvHw97C/VqNs1UT/kMg00LrpGozqAxVC4Xyq0REY5o/56NV73cns4WdQ3pg0rIvrzNN27QsmLQL56C3RBTULRIJb0wbTJbsMUYMUadVN/UmtbXOCDo0lzqXo18ARzfQtnSqoxQyBDHyuvfWFjVKHTWGMyn8emLzgZzEUTaLPIbYcRuAfDI1IjYmN5RsPIKt5X2AgqzW8RnBFVkA6inRASJEPdCya4uwMig8mAKmceLBV5BP8Mzr15/PS0rnxpjiCl4PL8Y/hIGaqbsDic6E+84xmYvcGlp3F1ygWJJlSNMpTrCdCKWM778iP7yDz33zToMvdIsoMnbyODnxFRPhhed8S2X4PnVdP9SyUTlNVt89Kc9oeDpFJiZnuG1r9WapakMhEFAwTXg8I/IVplAlySWWJ/q2zeCiBenzdbEkipJz5L9rkcL487pzWj23u695v2G3O3u/M3v/Y3f+NlnHz3fP5I6rq5cykvLL56/7Z7tb2+uvP/+w3/1L/7bk+7h3t7zv/n3/pPVlaXT/pjcwIWViVHA0HppOwJdqtCYpMFzGck4F6XV7FWUAgiGDxY6nAkoR4bgB6AxHeWncoJmswoGfuFS9DzPOPcEeLkbLkc95hbJzi2yhCU6hqC5X+YOaEvra6stGgVsbeI+lHNJhXZZYKKLe8f3gHdNiPjrW8s2z1kpa+trbENzX28sWxijVBz9YDQ5Pzw6gDYKBF+O+5x08ImiUHSe8Eqc8fTUMGGdtrgbcEV88QL2nX0+oKiwH1g1BmLctJLwedp2fQqTpWFwbCrS0Fxqq4GkBL6UxTrrVTUkywXUIbfoT1aDtKWEJWER/I7nh8MnyS9TUwqeSruTil3mGRWi9dQrYZ2fqy8s/fnitmeJqQJ0Kpo3LdePKR+hFOlfoKpdOhB0QWdhLnlT3OTR5rJsn0HDH9g9U9DuStnKuDRMVeYwNOcUBOPyu7WcttXr9cny9FSDElq47ZXfL3wPAyknnszDIbJZgas5WwcEdhI659UfF/ux0UysTHhioxCARWAkssqTSerOO/MZiWgjrbIk6+ysYxEg2StWt/fylWSGtZXWH/7Bvxn1BxAJjyB4fuV738l7eTTQBYyEOUxOUhUfYbzGkL056ZxYLGxX0tXVZfPi3EQPej35/jNLy7LIJAlnvHGD6fGspWIIjxwATMwuertNXKzQaqy/3j8gS+lo+Jc75U/P1GdumzaGueYYvG1MGyLnHxF9o+RMXabK9XljfsUEJAITOQjHIxDCkAhlARayN8FJkTOVXDBVTp0s5SOweHhoEJdwQGiNR+jCUtdSHoiyYpkMQY/zaSuTEtZcmF4UCINxyZFuFoJ0XjiU66gxD/jp68/MaxhmeSpSo/yW33OABBBjRKSARozCu6orAX1EXcbnUvk1IjyPBavzryPvy6/lMUp46YS3IHof8NxQMGsx7TRjEgrSFPlE1HAoWx8LXeKRi5Ca5kZjLnNiJTwVM1jOLJUTk4L7M1ZW8xywbCKxSvsEFLerbicHNkKq+O6pKDFBy0HYmGy3m6ezs36J2mM5EfaIH86SqeHlHjV8pGZbEvZeJBTvcdRSSK8lFOFSiUzYI7SOMNzJ6R59uPTeHTRLffAgmPjVGzziPEtenVHzQ1DBCUrv1fk10MwuiouLrxtU0CAtXV+vLLXPRxP1YRS2MA49TDhFKZvzieBxGEEYSmSTDhsC2R5w6F44MPd7QQOLfyXYIRxoA2wz1vezEpQYuO6OFGGyj3v/8rZ2I2Z3W6Ofr+/uuAsm8LcYM/joGPCPbV7Q73Pt23FVOhxH3KKsawu0R0qhdXCy0UjtNUPimGK99fiLLrnh59aPji7/7A8+uerO347mt5a3lRS8WJ46EV0endoBcaxYhMSoxuJYefXaIvvj0y8++o//o//kG9989/Mnz/7iL//0yxfP/qP/xX/aXtky0IG9ay9v6w25orXzwdCKLFtaPPvyiS3MldHhv5OiAxzhzsXDpvNB6HKAfxAgR+aiOvwS1A/a+sm0RQen24NriSgk6akoW3mkqC/E2YzpU0dLjvb+0QlMTvaPbaOazXfevX87NRiNDi8vOrWauT6/nPR1QNidxqVNRf1tryZj4uYcP5oVclN6ivtfruvJ6cGzZ1++98G7gibNtXqvf7LYWrzz8P7xwZEEAdmEJyfHirOaavgVnGXhxncTypT3TqqpBoWCOGJgWcGvlC446511el1gIl0lNb94ftobvSKpaEiixalcPcpS06YKZiWXgnJPXs7XucIw7sWVtU2uYA6E1bZC3Ys4Zbu1PDdblwS31N55vXdo66XVjTtK+/bkutFi5xYbtebgdhyai06ZDQppcHpCnshpKyxSIepomQEKCouFNNOYq+kkbRQdf/7Zk2997z6VXdA/8ePr666afdx0qdF1QXmq1+Wa41pzAj90WMSyurZ+enoqj05b4UOU5dSSz4wzya3VC/+xID9V+KL3hhnQAhVxazTt52H6llaaveFreXBq19av7QBl8ffsYCygoIjXdOestzUet9or9i7gDFZZCuCNr9M5tZLmcO9Vs95ijsMFgofuo4xNv9cj3c1TYjsxiIroDiu98IrhxXmzZTtmAY7wAfbZksUQjTpMkbWkd+ED5pp1PUyZbWzQsCD2oh26bqZPTwa/9bt/D2n+yZ/+eC4OVsw9kgLMo45i4fSoSKIYTdgcJ2kckjpjKs7sK2pmYrXesCmIaIzcCzFeTBbLRQ98zPhX9H57N5B1US18km5RdLlu03BU+niUcoUdGt1BexwyCRG7YtYrqWM+UBf+WEwEbDE0CZUzUYU+35BjWqgO76sMLF+dO77+KdD0XVPlugZypbQUD5XruVSwoEIFeqevsb39WkwTJA1ioX2gKn5I0CvsIYY3ZpS9YFRntCFNwt3Fo89WkkuTRSEp6BLvpeEH6Fw2NDbpsNbanIOgjS0mcyNs9jJFklIAF9yjoXlZVr9KxpTyS0ZEnSmQZSBlEMileDzyjyOSAzErzsStbCShdaJKU3h9wKHYCSXLr5RsozOjBapgG7lIuGo/KGCcKbdw0e3Z+EDDgaQX+tXPGqXGxRtoxsqMVL/qrN8LPuHmuEwQqbiaYAtdMksqLiyCLMSmX97/9OAlzQAzpAZKjwwLFnlOMCnrpiLF49rPA2Gp8alWCJtcx6LmxNGDLVAYy+yQbU6S/xYwzc7Z7X5yOytXVSm3rOc1NbPy7hZ5PxeV2uCMwOGkrC8kawA3cRi7c+KgbCU+Eny+GnLAEp7yzdhuXIOjYMFsZ25WWldssq2t9V/53uNXXwz+8o8/6RwfbT24c//hQ8Wv1dxLCQZGCtcZYqElQPG56bP9V//dv/xvfvBrv2lGD47Oup0jRZUOD463d+4vr2zJHzg7GjXUkbmcWDpi2GPCqt9TcHupCaPmL4hiLQVv4/HRga9n581E+Kf86oYcQREEF+LC+3IBoeTchNFtijFg5An8RKFCzZRt2VcQb2oy3e9fd+oXHEhqK66t36wqvlZbt9hybmG8uCjpK9NkRW2C2tErojxlclhrShAQKLHhCBJ773RpFcNRh+yRdyOJgU9TrMWVG9Yv+d/rnE96+K+Dyl+sLHY4BBGH4AWlxtmNRZ3kWpwbMDG7Vg4W6u+pI8tAsoL/wh5St7Wp+cYgm60JqEuIVrnOIlfrp/oWgfBbel2nd2KXAAij7uXB6Qs4wFaUKZPVPKk5GicE5jk31376vHd6NNV9NFaPzhSOr25Oj0/IPdmmren5FptNElSCTDGUkMkkqnjSfsJKA+fYDobCawAq4R/xOkydD63bsakpgR+mEOwOcXkpYlXZBmMQpkK20A0/yGMVWMxSpJE2mHYxRWIDynuSj2PjKAcYYTBBMy3i126yzrmhai0Grszu6uH+AcWMNh3uosdF2y4di7MlzuQbEaloNFpmEUKexXptY219eWnJRuDwBUnTcSlOxPxpKMbkI1ISEi2GiRsnlaj0Wa0YpCkWu4AEFK9V0cpo3JinqMvZcFw/RZOigAZrs+dRILO5uaP4xfMX+6/3zywmZurc1qX92SNK4E/OOf8gLstwjRNKvEJWgFybxL6S4kULIF00JtskjC0MLIrs1AKE5LnRRZQAzSz1sWjMdny4XnbEvrCUuNTEotriHOiGParJlDFIgAELMUpwNbwiqdLnokD4h5BzilEVgszVcKfyez5zlK/Vafnxr9wDL8rxFUt987U8pJncmXfmbW9aSwfy1qx7IJ0q28rgvr7TiWkUmoOCWVGIyy3WGM+qkpjdGKvs3BTzgP8KFpRIXaIREvFKRn8iPw6WxMgczS6cTy+MZgRO5sZynxZq1iL0pflEUdAD9gQFiyydodAOQKKk71EXdBKn9alr/g3odMxhTNh7VBH4XRg7BYBhI2qSSTM8RQJ0n70X0Z8XeSqnIdEMUwtYsROqjhwaGV6BkbuDjVXng2mXyaXBM8KCK0hmjjSAB1DWI/39FLRIy7xwZ9Y5hQf5n1GEGLiBdWlnYyWGbpKurLdM2RnKl7SLN0ssipoN6bw6ck/Yxj4lxYgE7ag46bAD6isjEGGAM9N0hccV4rTPCc/iYDI1lvpKBXav1Pbsm8ypZIPwqFbo0+jQORBmj5wS+YLkRszgHXZOVxRD657KheF+EjRizqlo4Hf7xl1PDbIs6mqEbr/9Kw9/59fv//7v/PbU5Vx9qXF8fXo06j18tHXy8ye4IP9OqWESdkDl3L5/TwxAUcD3P3h7baP70cef//BP/2jn7mM+fvqCjYJ2dh+cnJwkY1wptCtJ7AcygZP9QYC4A2PNDIaFZWpMmcFnHjLvYAIq5TPXTa8bfYUPgU+U1EKtrpR/43dwY9QQKBCfoASvS2O1yR0Q3cydnVI6BGtsWDz1f/o//u/+4f/yt//23/xuq7E+uXgtcV/V3Vab369oitXMawxyZOEEVpwtvogZYL+8Gi6vyIBX5x67u2k1GJom3vZjhoHNX6CkpoLwb+bUpIpvCNFlWrN3dHighfLyyaSqsImjETEm5Jqzimnk01Ot6bllewPcSHCeVR5G9a9saMTPTOYxa2yvDuYLDat0OS0vFVywqYB9yeTZX/bsf9HB8Xiu2dYc7frV617/F//F/81muza1GvUm3e6gc3xMkL3z4PFpZx52DMtuMhadWCYIeBwWUb0L9AP3AD9RbYA1PifsP8waclrLRA1VXSnP4DJRycJmlJxJZujV7WgUszozGrljTkMx2EjhHpGDZc7jESFLmnXKcj1Cyx4wysEWHL4tO+Im5WPa2jlbaw4oI/cfPRBEHih22aUXZpbMRCjXO7ynom9qKvZe9nRFEeiA8WR2OEZ4RrN5lbXt5jbbnEL+6NXxe0RExpSsmIrZAeGEt3Et3EduxoV8d/tOBdFKQpT4w7ViExlZnKnTeGklvAgwBdrf/sYWTvDFk5cHh2eyH9B6ctVrIsBCKYnfxvfErrTvb2q9E7IlHUxGGP64iNtGVCWBnb1MyETke3Ui675FoBe+GkZl9OlCiY27Hncz6Qn69DXKdRZ+JOxjKDLmA3kNZGqTLAeCoax8DbF5vBzFWipsq7peXf3lTzlDk29YZ3k6j1fHX7n5zWm5IYStnz7d9tfvQfx5cblahle4QOi+rIwzAfSF7L2XUEPWoMTLQlm1N/wcUDWuprkaUAXRn6JUrM10D9rlRcbIe4CzCFHxUAXe+OWcLeP4BiaKnuLXkpXjWIttcT0mrsby0PHKsB7oJU5e5FQJY+lnruUvY3FCqkXK517TYIhx+VVqtc3do90QcoFxjgwz2gLQ5azgTtVGdMzReJIbiqwyNUHOoGdJ99BKiMnvlbwMswxbzv/UEaGQCJpwm+mZ5fa6RvTF3RrJDuHFlukedzWYsL2Q+2VqtahBRQfA38kjtXAIkgg5AyhC1FpDkMzIgiqZuNIfmQg2lyI5Yy1gbaPJdd+KyMl0d3TRn8zDvpTtmFVG+fr07FBWLnd7MmF9L+oqskeEnU4n/lybDegmFqKGxbA7GDaODl8qhnh7q9R0NgjjvgZ+uhjZZx/eqWn7Cx9PRjdTranG8mIBfnfhetCcvnjwcP1HP/+5lmgx6FFYvXtyzKJ5+Oi+QfW73Z3dJftXid1JpX362UdPfvHx7t1Hv/f7f6dzPLW6tJo9banuo5Pu8Z6UevG922uRf4p1AiSAGfj/EoGhTCb7qzn1b+amQCk2b04wY6wiYQ3ape7M0rQSRQyPqW6O1JJaycCiAiRz6+a6f4OhT3MeyKFfVqfqdu70tDff7U3P9u1qaScwO59xOWk/fTFPQTppaRig3W/nxuc3zWZjMOgTGDu7u0kCXJyzM5KwQbfbBcjoClV+ZqMODbAVk6spXuqo8dZxz94qiwS92Mtx/+Aj07xqUaukoPNKJ0WGz0yKgTWMtxaoLs411zBqXG00vBmKGtq/Fs3O240lNZDEdBBY7UZduHkRpaZRbUq+79hJYLVdHw06NMTawsreXqdV/6eDjmWpFzSJ4/Mx8futd9/7B/+z/2gwtLMgvHipOqb0U8xZ8RFbyFvCqkWTJKssAPd/ItPmDsML5aVWivrH6koknCKvAcvn08Do4G8oMZ6YWSEuxlPot7ruJGog+kJ7fC8hr8gqmeINi2852cRhKNBGD+zmkvQRJpZ3dzU7JgJZBXN1ddgfmab52XCtSXL03YqfVfQL1hpP+/CVMZEexV/Pf2Z6zAu274/CK1k8NIh4iv7qPpMeFRl6SliOc0TjyaCEdAp/MFs5LuxvdxNlc86i+RB//EQCw5lgggam8R9KfYh0Uyt7vnb/wWN1Eg4OT+2oSrAbFzYY0V6ULdiMSYURmPaccMlGOkWw6A9RGSINS8SCI+0zFfynuK13yl5jD4YiMkVageeEkeAomoneFwBg6XY14ly0Uo2cIiwx0nDV8D38JJwPyUVOpBlTF/lUNVq+Vj/ksxJvv/xezoIbb0i0mMJpINdy81+h6uqpN83iyH4N4MpR/ZbPZIhXFKxvoUAOCZ/0aFgCmsSVT6FzY0DedskgT/giuG1vRnPnNzOjC7H92ydPnvE2WVgDObTKv6wF5nGq/8lzYsRbxBd1A/eI0FAyUm10+zmoDYYAJiJaVuLcKGelFC9GF45AMBXcCHs2EaX/AV3mr0xnhpy+G2N+jNkdQR63p6ARnDfL0C7DpF5hLhEzb0AaRC8w90BmBXPTRLnZNw/kG3u6DMfXIE/4Sw4/xiJ0EgxyHcJoLGXaXj7bA8Y0hQjyGaQyO1zuBMRyo1VbJibgrf+RK2tVUZkUoXMEIR1FWZFspgUYhWPTzaE9wCKMFMcoCr755LBQLsdi9+E5rZyNRV9SqiCqBqsoLik76+F480r4hKgiClVvCr8ZSyrg1YXjCS8qWG7TJysbB6dLq4uohkc8akwisvTBrH/MmKgRGOatHdKfWS6p817UHfUVTHz78Xq7qeAZW8hyneiV62ubHMatemNnZ0uSm37oy93tjZcv9rO2aandPXnxR/+vf/Y3/tbftVmJ3G4ctnvdu7nqzlwP5DXgCshEnkq8gRQBgM9slMM0Zh7Me2buq88AOcyFvfoG5c1Z6DsHyRNEyEyUVSiI0jmT2Zo/EDJ3+J+1VLd9fRvwEAkJ0XsglPr5o6vL3mQ0WVhM9Y+BZdjmO0H1LPgVwWU6IHjVVeUuiHKBLS7WbrYMCoCznNO6P3RZHP4mEffjapVaUjQsPrJohpm0zI7s6qFh8NIkMTnCzGEmplWFmFL0cMYuTVCnPjO3NLa2bmgZxzV4UrEp29O3tpxabtu/dkFa2iqXlC0Hr8bD+dklHEKOohhlxwYY4+k+5WZ41Tk65IdcX55++eLAflCdg5vz3cGdlZ3Xl5dLC1PLizP9oz2dv7e29qDZGkqhOTzsn3WvrhsYxIvDvQ79tBjp1JBYVXi3meJRStKr3qO0KVX0AIT/pUJmmOGk4Hlch9P1WRZ54YxJ2sIXDVhT4fA+Q452pp5eUECluVBTqj/7t8sECgWwOEO50o3N9xXPypJdoBjpa2uNg/2XFyIFXKuL9lpUvmSSltF94bphHRE0uoobRmsBb+xNPD13QSx9xEjCKSIz3Ekr1qPMSzkylsjTcAz6JxwjhcgQdgpOTpYydsViK5EGJxE+hTRrNhYoQwZOalsCb+HLbL0pAXSVFfiLz58cHXfmiAqkls3zrACHE/GhBAXiJub6D6oFSASaMArmT+zhMMWPA8cE5OE+UmGVJ1e/1KdIR5lSPqB7WRAn7pSFY0Sjl9l3QF10P4k71oqrkYiz8Ivf04SImaMY+AhBjd2ATYp/35y/YabFxiqgCa39u450wRFB9ebZcpc2K60zfCg8NEzWoN3sPFdIDELtqyuFlWLFoadwucJHAx8qQDHKFclHl+r1Rg2xUocWP7qaG44n3cFg/3j85OXJl8+PD08Gqh0jKAsPuMJB2YoA3rX6/JQNxdda9Q0qdLu+3JhtZlU2rkcLw+fNN3E/XW/QGCgeNJEr2nk6Y+LT1yJCClqYkaKZlR8C/LArR0nPM0kh9Qw2XjtH3LX5uaS7gaGxZdLNpDq4AUj4FXoApbiE8SqxhfIiTZecjoLHN4XXlBdr080QNc96X5Z6FL5F0zPrdLeEoqekzFVkRivBzgh7rs94UwPdMLg43Y0wtJOOSnlADz3CJCkZuoASQ7H8mkm5KEkqWRoD9P4rdwQlg5QcKlm+hTugPL4HDp8JPYnMiittnu4s1iuEi7KE4UmpmdnFoktKuKDCGwgHlS080OlEkdnTkyOexnsPtl9fv7LoyDzyFLGsZqCAGgHxX+iEvDUWTzaBZg97VNQbpO/trjy6v3V4umfBN1LW9uB8MB7dPrizs/f8BdVNV9c3tp8e7otR1Vezs9/F+HT/Vf//8l9++vu//7d+93d/FyiWmtMrS3O9Yx5F9QTiBIQiwBrHBJiXwySWsVf4HDj8D45IavE8T/CoRarQfYRrEvcyZNoHTsS9g365QUwD36kFl8KIVpuii7nzi17f2uT5+w92pKGfnh5P3XQZTtN1bIuWToDFm+Qt2TCIxE7OgfWcOhsWpyppXw67JcdDCwZoAKoQkTlcY7PyDE0xmDPgMEx5fOGGDg2bWt7hcAKlG9kBScsKNy/uA7fAOuvlqOMXN1LmMBdJY40pBU4lHA5vXu0d9066s7IT5xZXl9ob7TZvmTzMO/d2F5sSiGZsvSuAPLmamR8zi1cbayv1uevGwvVoSDm3Au9279Uh3Qq1orGZMLOxndtOXj75rz/+ia6qk7YkZ4EjKnWocEr8dNZgmojQbBjgAijjjvFTd6RcplgG2zzagkw3Hswlju2vZvDNCV+9AhiBPNXBIexnTWKmHKHxR0QTDJXh9dYYsauQESiCtKmnlpItmcUQMAVFNZC4SslI+SJLlzcHJ52h4tTUiVl1XM86JD5PKfIxQ1AoLyyHzhBNWkEdVlDEDzc3L4k9OofcCHNCZaNiM9ugv4lS3DJHhFU5yQdFA11Lw3PRPd6FSUTGCTOWIIqI40KT7h7PnZ6abjYlvUqTq2sbNPPPv3zS644i6rCtuIfwI08zLUvzjClhYdErpbNK1Xe9Nk949yV/FX5Jwk2y9I7w4g7BQabtdhHYFGQPUykOZZxCOe4LJRGKqWTy8C41COcmnF56Fr4RMZjU2WKslfdTO0JjZsNReGz+zeFVb6CQiSssCWfLbW+O8kgl24q00u/8gnEWks5prCh/1c/l2aKLltti3mHpGsDAIy+T3hO25VqMzYqfllpoBXLBH91CZhJ1IMRgMvf69NZC1GcvT1687h2eTnWHU0MhE+t+hvFlV4e+T4+mZvvJLTk8PGnXp9aX53c2lu5stDdXFtsNSoaVRyPuJkZYMdssSooPGUPmEqqAoSkSGTwqcx3MYGZgVEYadlPui1PA+3K9kk95uTEaS3Uz8zm90lBR+lhFUcyxnAilUukP08FZWCikIH7hZtjoAR+312p1wUWkwq8g54rJqFSeKzh3eCLJnJQHCAKSHp5Wp5JISOpEJeTwSu+TQX5yCpVBMgZX+uqZ4liILlnspxhPbqHWpR3WYfQ+/0VOJg3SZxCnyNo0qcA5d1apPcWgkbWqIKc9u/lmo6pcXRwfH56dHmffyNtFAXivprRpH1joIr6CW9RIC1UuxgrkP/nFgb0J2hut030sIS4EQhP9Wq2W8LCsk3k75ViIyg4eyqzEoKiwtxctrGZmqvH+Ow9/8vN92owABwFXW1qhUwrKcEh++umn29u733z/w8HO9keffrS7uytf3g53yEuK/B/+wT8bDg5/67d+azg8ajZnJVmQgSF3hRXwq6w9Mb2ZFp/hThH2sbqrKz511Fgwt8wHIvc9TK9MS34IfsAAP6SBGG6FBlyCKVkHySCYEPweTbG88bn8i5X2UqYre4JGgeBNUJ5ekUeBE5NHlwG7iC7zr53oT86VQJWTncT0fm+k63bhm7nVmrrW1xLcsb/kWF4Mpc5T/2J/5EgpdwpPzrj8km8t6Q2DL4uQoIM+0rIurudj8dnPaM42mGZPsufFZOonH/309euD/mmfOixYttJa2l5dV2JfKby3v9HfvX8Pazo9UzxLmjotcmb7nd17jzZWWwv1+Znmyo6MEMVrT0/PxgN+yAXJDzfjUXt+bgnGjXoWnG2trCijeNXt3sxIuBYhS4q59QZK6Ddo5GgrakFEmDNahvVlvtFEXZWODjOw/vZaPKiODNJkpAgWraGaULOZCTV3xZkfTx1A5QsV1dI0BEdU2Z8eU46SmX0X6B2EFT9bsHk6S/SsDraSXV64ikNnndHUzLFygti1bUIjA0SHwlmjieIHwQJNZc7yYt4LuYgvnj/Hs7ca6/Z45BLM5sgQHlLEQ5j92NyWiF8oNJZ+ebWnHeEV6UjU1+KHSOIfIpAUAsFZuQ3bQ8q5J4dkw1zKRgkccIbZR48e4y6WJtrdGwAgit8InDjD2JgJrqoPDQWITNk0CU6IpEmyQX5GI+cGJhXWjVRIeB2LlpuKuWVo0d5jhxJX5WCh+xIeE3ibkGjZsA+zgmjFwA2ENOMzsfnww7wibKiiwzcOPePNr9UBBsbj8406GfosOnnFfAuEAqho+Gkm5zmiOHhfacTXgAVj9GTYZ2H3ep0bKRu4n25xvKGTTJ974wak9kgVmheXmrKjl7L505bEWBpxorRv//bnX/aOO1OHx5Oj06n+0CJ5McUsg5+eWWSlRm46+ENp42VHPps5jybuvOwhqf7lYKMlt9YGa81FhZGm6jVLSoPtFCaMEypwehU4R7kCqgyk2ByJZQYmsQL1kzwKiqSOpNhv/AKexRQKN/ZYcgIzJZ6IAWnk4czQmyYUlIprIDwnzALezM2tL6moP80YSgp63ONEbaBKFhVgRtjljR6z5h9o6CO4iI4XEQeRIqFuplR2qUy5MD+iNjwvOhc9C4nC+GqOcy6fX3pvQGduY/AVhMpAtGOYeVdx4VZknOkRts4yueCGviNQUwP5x1lkAXQk0CIXAhkqwtA7PT4+2l/dXFESmqmEm2gnstCyx2zIYm+QWH3W6XkvnqCK5toSQWH2gvAyBsECrDgSTTIj0UYIWATUmFbFyZhupDgHycUI+DC+9cHDf/mvf4zX89zr271798Q6//zP/nh7Z1M7qP3Fs2e//ms/ePb8CUsPHCw0tXCIDxCh/uhP//Xd7Xa2aLkYrLQssGza6Wo0HsbEL7NVZlA/36BE8cMWNIbxAUZQPIhhukx1JrzgNASBTHkos055jGSFbKE8t8fTgY4BPqAODQQUDAK6COdb9H6BwNs5ulqhjHhiYtwYu8ZiHhdHXTAiJRsE2mkPHAXZXpqLlU5+OWk3acZ4K0EVtT2GcIRwELC8V18LfpqfsPGk2Jp0JiymrVewD8tmxnFUZYoVoJtr27VFa19++epnP31ydNbBRW2Uad+j4dnwbG//8rhDteofHX9+frH/8sCDPdUkLbiaqw3tWsHcb9Rt03c10d+bxjwPlTwpK8e5Q+okpP6u1Gu3tiOcXD7c3rIK2iTJsCiVPa/sFhtYQaPrG0Uw+a+DiiipUhYtCeYLi4ssnAVvJOiRddG2TIB54zMLFSVgSGAZX0HzDL5kk6dBDkMRgsRZEoKAPGjZw5GBTFjgwk6nFuh9k7jzFiWbzM60dx8+nM2uT9P9ztntbMvubfbqFQ5HgbxxUzMyKULIJh+qmBLn3ODKAsMDrIGTw67Z1IHL3kWyzmdmNxd2onEm/0KuiaxmGqAAWZI/MdjC68OOCoNJlySdqbfVbq9Qh8gpyRXiKEv1ln0ZCSQYF6q6uSDErmeyRDdJ0VO3zaVlNTl/8dnncIwEEjtKRSl5JE4hA+SIrzF1nJIAKFzCC2jlB++G/CgBKqBBiMwyLjOBMnjNpoSY8gEM8WI4Rqis2PgSbqcbjaVgrcQKWiYzcGLKsYQau824YixhWLqmX1ZfspDnrr02Bk6MmqrIU6EeQrvETgoUkVqYV14qhEomO3BkMMlneDBQ+zWIkjk0/yHd3GZGuE4Cl7wberi3hA29C3/nB0wzach/SQtRwCRxk4RSaElcGvGPz9PQuxNM8MYeuZYrDEYzB8e3L16OXp1cfvJ0YkdlqgAyj1DyAiJ63iYgGga4dEPvdBO0UTElgEtXAOj8VD2e/snR6M7m8uaafUOIK4VfL7mFGjV1ur0cmJIma4WbAWLMOFWwSifjN+ArxOe4/kVn9JNaiz3dDs/PC8kUbGQLRyWjrCo0XuUb6UZZ4sAwqmWn+q2tHeDSqMYjk4KsOdxf5BpSDB/BsMKubq4s9I2pElEUOBM9OG/K2GYeTH5scgKGnyEH+NIWzEyx5KLHlIse5OExKVEqM7jMHTqRb6EAeIBVaMnl8pYwTXLQbXmHwaTSTxhuPASBLDAn8s71R6dUk9lWH8oqIUaVNHBUq22TtTk3/eLzX9SM+N7WLPf/dRTP1kJzosRCv7++usSRPddqfP764P766vMnLxQC+a3f/vXt2uTT3mWDxqoatK2Fex28EVb1LG8sh4wDgAAkMZvG/BwvyvR1Dz3sbty9tzN/fNrpn9+urK29+8HbP/vok4VWk4hbWV2HsoNhh4x+9/0PP/7448vJKOXKOAyHF2srK/z4P/nTP3z88FHn6OVau7Xaato27/TqjFPYZuM3VvKpzDunU7XETW3iTVlh+UZJsZqYEU9SZecRHEEqBI0yG9cGS6AIOPsjMfhTxwhY/VV8M54qXJRkiHKdumuohShYUEFnZqbTm6yutIfjqe7gsqRwm+BxnFGpaR2yQsApmz0no1CgAbHP1OfqkvvsEX0xlJRvh6hxp3tpJ1mZljqmwyKG8GjY6wuw6zrvc6RjIduqqxVGKUAQRUV6CEUp616wCWgmXrGIiCzxbjZWTk7PfvKzn3z5xcFwzOpdXF9ePXl9sL9/stVeXW0sWp172Z9Z557qnME/iKA8d2N5jTtqeNL9yR/+0L591kJZUgzP+4Pu4nzLAi6yK1bUoGdLH9X9ksY81TjvcSXPI0jUAJJoPUpfKL7AlrLI+tDvwFKUE9ipjJAFEcdnSEeGr8qhb+9KOmlMLrvhFnhd+GtYk3JfkWRZLmKQnhqvLG+VzbNS95MsadQay1aJkcXJiZngAyo221dekJQvtL60sbrxVnPpDuZKwEk9vDwfXc3fvPfN37DH99ng0h5TuMpcrT01Ueu2L6QuYHQ5ykrHFHmZnR/ZBXt29uWr52zfO3fu0iDfuvcARlmVtbTcpoDCW7wBZ8Tk2821mrU2NJJaNjoRGQsznrEWQjLaQrMd2WH5dvQ50y2gOC9fisd0alTqnQh7yG08p3BY2yOB8Orm7W+8LeimCtRx96zRXvrzv/gRhTkIC5qcFLCqoC/BddO9ulR7nm83koJXNmzF/lS29Q6wpWbA9Ev2FnGDOyCLq0mrvjSz0EzKi4jY1DRYj4io6ES3F/Cd9SsBEa9XXGd8YTGbSiY08lrWa4dmksxB5wibo5fDxSArHoiwMKaEEPGq8Kg3ByRwRrBXqW4FQzzocTAMPsTQi1IYNhoGnF/0QrEOpBxGGdmUAUS19tXr8k7Iz3uuPynFEuWVoRBvOZFgcuepLXLyZ9SNG06mO+OpzuDm+HTyfK//7OXw9cnt0XAKf8TUPWlWYrGwrhKwKgiIsLw+ciTCJy90PSupi2SzOGdAYRXfmF5fETq9qS/eRqMLk85ibVMhaAEoNK+4XRPn1BJIaElphpznAM9if+g+sOgJjzNNAy7TxWAJ1r6yskYmmPB0Js1Hjybi+70hieJKGgk6RDZUToUKku4KrL76nLnpFWEU+wyMiSWuHvGhNJJmPK6Txa4rT8F+TWvKUdSS8Da3GVK5hqIzivyaQ8ZUpBsrPr2NjELgHE+yDLKQOXfkiACOcR5ZPvYdRokkAz1COJ9cDZVcCGeGMRAEL6ZJGDGL9HJJiodlU5xacZDGP0GF5EJn3E3fNrqDvnWXAgO9vfOHuxv3ttdvTl7UxRE9zbkuc6uu7EVPIkyMnLDNiPBo0/HBcghCNSzCtguNk/7B7/zW93/6yT9vLs19452H/VHv408/WloWTGnr87DfPTk5sn72nXfeefXqVfeMoy/Zb1Em9KNee/HqxcM7O7bEUidHWu1K26a/9Z59JSkWNMlYx+eJiNC+PAN7CSm2ElymLJvJcA5dLvqBOyBetCcXKMJxGKe8HEMyqJgdwkyIy6YB0DMvwQZBAzw6xdbMwsnJcEl52rpcVlQd7S7Vu/AmHfQgp51IpfoSZiY2howwd+GOtWF6a4vb1OXzSkl8sMTkQm1SwGfkG65ZqLd4IwqxBLkzgmSXlrHE2mD8GUaehh9COzaSF2sS7JxZXrq9e7c5Pp8768+cnh2oyvg//9v/qx98+N3D53sHT19tLG9S0L/88mlnNDjqdrdrq6zuk87rpUaNd2PvyceN5ulC7YH6s6124+jFKaLw0mQyzM6p8uplyRSzMAkK4VnJzySigpZREEI81GiZHVGeklmGxBJWiJu1QlrcEOYGGeE0dQNYhvoecjGYUEaF2JizzKiIGj5FXmzVBblXrAmz0A13ZtLrUV4S6gDr2YVThVumm9hqe2Xn7sNvT82tS4e8ul3s9KTlS4c8uzjvmduFxdUZCxFvLq08o3zPL9SNTNkRRGE/L+jAPEYP7GXxJSnv//pf/+vvf/f7v/FrvxluZS6xctOHRoJUSWvU56Jx3vKiWvQyn+Q+QlRkl+1Z46yMMukr9pUxJyUNaVvfZFL5UDORNzfCeC6CtuQzMGy02hjW85d7TAxL9Tnl50YZayoEwhq2UoOSq8QLnFmYGc1N27sAdySyPBYfNcyopY6ZtziXQ4om/Yh12t78qmF1ln/OBZcbi3UrxQ0MWAWScTSzM1+fUzbD8q7Z+SubYrLTFjm7amp93NT4jQ1o3mz0+CMNKJyorGSkXUBHmpc3EjAO/5bPcksBaTC5XI+PvDqCyF8LKWIsGA09zEkeNy8QDOBwheJoUlommFNWCaS6pyqR0GB2ytYSuRr+Y81BiNr+Khbx2HbzrH+1d3Iun+K4c3XWdUX8fMpWheRvRFMlRUj++K7CodNn/+efIhnDroOagEpLyWWxjjhOB0OJVtfNVut2+Wq2YV1dkt+mGLE4F+nlTphJ9pCgkCYnMtD69lMXs2wFj786oqgGDtTkCKRKzGA24PBy7wAT8yAo6wcBw1bG3+XckNtQp+oqWHmKKy+TXg5Pu1Kd68mbNj1SDsKMoZV0huLFyNAibYHQXIQI2WE4T/VXvK0gE71E1hSKM0sGBBgFLFpM7L3cbqDEUfzIpsWIqGqBWIFqeQq83UmSa8wLU9q6BMK811jSidyb1KEQNv08rSeCz/4Zc9Fll3GhuuvQjDngtRCHG07O19dX1Fzt9Y/aV91nTz+5OtmzMIctAqCwBBIBTdWT4jgz2ZVdTWcB9vj2a2rX3t7aLuqb33p7Z+dP6quP1V777//Nn9lEvNGqYz/8q9OtxsnR8fOXz/7OB99ihZ2Pz4K+N8hHBGSm2WjPTR8/+fLph+++//Of/nDz8V1BslZtffpExe3bwcXUgB8yS2FjOMPnrI80AZCbNppRR26F3yeCGOgGE78CcVwS7o6vSWdDebnPacALP8GtPBSEzhzL9AhDnlvkxN4/HM7xLc3JTJ5qiPbMTq22l4DEysqosjHIkyOmqo0ViUw6OZC+jEeKMjTUEpKfatW8jGgvy9pOHbH/24wSt3YDQepBw4oV64mO4Y0wJ7LUmCJIIXcuoedsFylNQKqLRMzFxoOHy3fv2Eisfj272mptjWkU/WuVf9Z2doQq+6cjrazevf/BnZ0Rp1FrfjgZ/Ms/+FeTq4mir1fDk1dPXy23z1aW3llb2VhuzFm8jcCXFUhZrB/a6Bl2sZwksyrO1DuiI1AAmQ7h72W8cB9d4B56VFA09QEKPOPToEkWCssexCA0GI7Bl8M90i2gRwWhcXPBm3UhM5OMsrKIAeYr/8kkOXocWPYttNYt6l8gE+fU9UydkdVe3tnafbS0cnd8UTs+6B0cj047F3t7h6MhrahjX0bblrWawnjy3lmSoK68rODVnFxzSp+8yih33OCcliJttzd37tzZkcC6taVLdDufVpdwrmCiZVUDmgl/ZTpdjm1kbDnAclY9LSzy2SngwhcOOAZUmHNiR1AvVijWQ6fktqEd2sst5bysMxsI5i215F0nzo1s7QyJxanL5am523rTvAc/ST9DV3WBuCIoFI5kT0QeBoysK+tRxudDJUBgOjYXgyO7bXHfR1x95+59w08063amLY+qhCKQLquQlSVYjfb47ZqycxZl9Vya5rnZyfwMyXQ+c8NCShoSPkdshJLCTjJlupi5zIU39JV/3hy5bHZLbC8THIjk15Ac4gKXwqTycOgmGugVUR/GhX2XaxC+yBPRS7FSQycDqA6LNr0UNHbggKYHg6NmWLIjIMu+7IyuDk+vTro3r44vD05yIpNCAW4rZjNPms+rQ2/pD/aIyAwincphKOXffEQQmq6ieJTbw6KTPjYYtu2QKT0mLrLCeRdmG6qf0a8Rh/hR6liXMCtuNzf3rZUVzRgjDLPTARqwZF/jXMYFjAECPvO1dImKnPcWwV264wb6uFWrgBru7shA3thSnNNuDiRdCQsLm4A0udlpBlWmjTIIssCdr/7cmWHG2MtnYu6++gImwfEEvwKWuCXijk9d7Vx2RCfiMyHe3gTkMhFf/RqkLW/UnwhjyjUuFkHmfel/UtutXIM95YAksUQZPI54Sr0/L5IYvWhRMM/h5a3t6SKzx2M8AM0YGS95s87JJQo26HRe/emff97ixpp0rEaMh4H+wjkcX2zdBmTxUJnk8BocGuvJKkvmneR8a77Z6PJFd+/d3X3w/qdfvuLiePzWu8dnp1Qyq0wtGW4utdhVx8f73/jGo+PDl4N+93x00ag3VauThXDv3oNPP/7oN773fUvNe/2zna1lQbntzbWa7cCHlmFeDlQHwnvkMDGbQBoBkZV6GZjrTTVHKLfgobxfWhrNvBzSdUHQ3aBqdtEcOQC6FCgjyjzmM25hUbVk7k3PnpwOlDzqdE+Go/jtGo2phqzWOKvtlT4l0Lm80li2JlWRPsvn1LGbz+6tjx+uXt40lbGfOb2sX4AuJRWbj2zkqrk4R8sspHgUdS3KaelsgFqmsdjhb6go3UoPCxOI8WHiedEwdzrCmE+M38mC4eHltII/jL39w5OP9g7nb5aWFCJurz3/8uXunW2pQRa9XY767bXF3/mtD/vjTn9w8tb89uDyaG660z39skkSz7T6nZPV5tTutnSo5bOXe+A0pj+Khs1OWckle8wrs9AHEOF3QkhRsdiWicrjM8aXemA3Y8t+U61vPr5BwkDMc0rkTIkM44i95VlDSsxAm4lXI1MMEF80VBm2CnaIuqrKiFNx9KCa7GjtQVOcObsxoNV6887C4nZvMP3s1eHB0Wj/aPD8+aHqGGg/Gz5M+LzGJO14o60M4JLwuM3dUdlcHdxU17Ttr75JiJw1d/U6wic2fv/3/wYXHMbC54BqGByFfUgopGRcy/1HbvR53N5go0rIbWVfWkUVrpBUxtBoDuSRDr9hHdFHCBtr52AaMzU6MdmiJHICW/Xm2aklecp41i8uT8m8uc0Hj/m4SDZkqaZKUxl/TDqN8TlGexdw4xRbW1/Z3NxUMpKHG8dfWVmx/cjq6jqfkqgHKBNfvMDClWhAvvbh51/+4he/6Hc7n/78L5U0VOZsLJHmYihIZfvh2anJ7tZyff4qURk2liDNHPdOvDE4NULTdYMurCmcKz6ViC18iVwuoy4jR5OyH+FDxUCxSdOKLmEHwMGBTH850GAMFyE2eiiPhqSuMPss7I17jfGuvkep9ZeCjHGYlc8Ux8y2b9Dl4lpQfbZvQrt2Vrj6xasvD7vjl/vXrw5uutLTddJqX1tR4RWUp0gOncKlQ4q6ZESFbRqICSy81nfYXLpMtWHJJ8YXnYU1MzUZWmCo9DO0Vv2jBBPk9SzOrKhqPHdt/ngoOZdxpJRQY9Q/3/M+6G24Gi48J07yrtWLlfz23io9wXgMRyqVLNRy+Opa6R6+FWSqgOYz9xY8qzqv9YARlHEQjEL3IyIsqqjcsC5pynMEBjCXg0cgU5IJ1TAdoMTzC1TyAw9eDhLXZ5Fg4fk5Dw5EoqbR8jW358URc2SRWQ6HDUSRv0EWroAfc9YxI0IBlr1nqyu99lxpJjMC2Fg1AzVyxsI3CgqS5YC4UZVACGRR4U8iUiMbWxsKiZwev7w4P550n591Xu2otnBJoqeHg/Pu1e2y7gZIEbpEYOZWx/1bAnX0hex0NTVTkzp83Jls7ezU6o3PPv+cfCWinrzo4ghWApW1XuO1tbUvvvz0937v906O3v7o5z8nHMyPHBUbDlFvoeRf/OjPP/zgvR//6A8f31+Tl68KkjhBs3Xt31NbTI+nbT9jgoz/OmvPixavX6VPwVD+ZB/VdCDxIqsKuDFIfpQ4UFhptLrYqTnJrnoVsppzvTGzVrUTPgv1Jch6fjE46Vp6ZUtAMyioOrXULpXUO7cNhtcMesd9Q7/y0vu929/6TQy19tnnX6iGXatZZDOzUpdPQklUi4Cqfru+Xl9uLQVZJCKV+Y6+GLlUoUryzcx02HuZcJ2ssAUuYANxcMUfa1GP7Svx9/rUBdu4ySNw+OrTJx8fX5/Xr8e1Yfdqc23n5Piw9Wxhhg/o+uzBWxvf+fZbJ91rWsP5ZWemuXY2OBZTnpsaqVknAiZ4sdpeXVtZOz/r3YzUtZQqJZeGPbOIPbA21MvDcTDlsJksQClWT+QX0AoATg2Ul2UrCQ2AuCoh+k1Dmyurg+WjCTqWrFioGdaeskuhYkg6p4QDA+gmS32VuWfMh2+luLBFV0orFVEnhYWkn1ra2H2vtXKn2598+Wzv5X5v76BzdETBVkBrGBqPB82UzPRFDPd7UiLv7G7wJ9bbqyZ0PDlMuXaeWH8pbnDNlWfCIaHYlW3bfJYQVzJzwgTiSbB6wwbX52IN8T6xdqRjgID4NbGNN/HjR0XVDOowaYVSsQA+QPnOic6E4WBc7HsBLQojijg9G9zfvsdieLn32kxbDcVYovbP8UfGoigrX0gppIdtkFWSZ9h9lsN4Bf/Svft37t25K/x47/0PgjzBYVjjJI5ui73nlOxkSA36gkmvXrz8w3/zb/7kT/7k9OjQ7LDWZJ4u1mab9iCps7w4FPGXiTFPLajPwvdHjTJHxYVhuXGInhkd9oYpmHvTFsdusBQP8f8bSYb3nMqqjL8J+0lqQMw+45+T77uKGF0hfiKWk60Z78jyxgqpl6ScBXtnEs02U87rag1eFzKGSmx2Uv7QgSBfHh7JUO92efwGx6fnByfnrw4Hx53b/miKlLJ6QZ76xbS6fPNkC3qmMHFPEoF4Yviq3oRRhjNkLGU8mbbqyL8xBQAzwUAzBi40HEXWjNYTszyL0dKiCqOJW8kdXuHmLLyemxAWfJuR1vzTySSVchTMSLtaZV2hASLMQIrgccJQo2dRL/gw0qUijSInI2YqmYVR63NEw5vD5a/77LxIj/APN10lqBvzp1zNI/DdD9hcxvZmpoJFTl13oxmsfspn4fsuQLS8DAlUwrDc4asOYli+OfMHRCwGQoxohgQSLHQW9Kq5zuRzvtGm5xZ5WAY2bLOU/2RkRf+pvPCANHfnPfHzs1AXRKbMFgixUYn/2szCZb8j7Y0mbQXO5tbq2VHvaP+JXIq33lqb6p3LorEyIwLeWo7L3u1UEz8WpmXvppfeEWsTW8c1+T1cbtu8e3LFW3txeDJeXdn+9MlzxHn3zv0Xz1+1mm3ENRwNzkfndEnv5fr4wQ++//DRg71XL7E6lSQRrZq7FhHf2d367NNffPubDzc2V169evHu2/fGo64dTJZn2TGNZmN8dDy+6UzECYkd1gkdiENKbyAOEJmLcEf8PVOX2XGEtkJWV0jEjLFsIuTZSHG9ZrmmmzGScBrWcJFjFgyzffhu1f2Zra1MzR7zTsktoiVwWksqrisdG2t6hjIqks9pQX/GH3kgavX11lLj6vpzXqPB4Ipe8LI7VrgR2FgMm5tSse5w80ASyKh3BZHTz0QjqYDOgiaVq9k39+iNKc3mg74oU2xJLswGA5mh+s5E2X/1s7XlB//oH/2Hk0Hjpz989rMfPz87Zg+MXxw8uTkYPn57a2qm1+mM5+e2m4vj88mpcrKeqiVMPzMZnIz7M+oan53cni2fLqVa0u2o7Hl6XqoHkir0NWGshEehZcWewAoCgHZKnjAZ40wfKOAv6EU1EoYiD9J7OCy1kFeZcUXAsTOt5xUZjVsUe0zvlcGklV4itGxnwgfgsebCAjYq3iSEhLHJS5EBODPTri3KJnncGd68PuwfHI6fvTjZP+gMLWufr6sVg/DZadWWBTqsZu7sCPvau7Ozvr59v9VsnpxdnvROZODpDN6vOjE3OQ4WDLy4wEW14JyIgjSZBiuQMBe6fk3B6IaSjCtrq0yaxO1S2hSxErxJAjentENEl5mM6Uk+iYDRd2TSp2ANysWqMDDlrFgL5nRzg+9xToFdrPtU6UwLpSi1r14+Qacy6GlMhFYiUbg6/66cAtqSbJ6pm3U6z0pt7XoVHp+9fuUxh37G8+o1N7J6LvssqMHg4PXh2fGJBWUvX7w4t2XronrHOg+9NYsyOFIk8lprds2oKmVzMejrrDkPntkExTxjfAx7rhZZ9bT3yCKgkVxSMCFypxI/Rmh27z9ecYNzMlmiiZ8dsUOjaptxe/n4Mcp0ROzM7WjAjc03mXU2lltYUaC6bPpvFbsDYkVp4MO5ssNNTJAUmrzujS56A3sHsNxnTnpTZ6Ppk+7tkMYko49ihf6NUDUywbfJuaGaG7w/OBvxGhs/GqbDPCCuchiU2fNkobpIBKovoeWrVVe6wUG+MMlGzrKSQI9cZQme9SY2nBNAzA0AQzZnRVwUE5/RdgIt78mfn9B0Yg00HbwqMlvOVyi/11PxXbgCl9GEPiXJUL+M12OxlvxTGgFJ/1W/asRBJvh0czhFBET+d6W4/tKI84iR6ogBVNqLf5dQ5PYHBI/nznS/8BsZhTkpA/KbGzJ/8dl5d75WV4A5s5m38TRwJEUzd545Ti6JIgZTdvwWypULZcXhZGbSmMws0ilnO+WFWfssB5QvBeHxs3upBsgtFWnOhfqzcwnfYDyfjpOj/avzjgzZh49W//N/8Fvzk6PXX3xm9ZFyP4fHT9fW66yEaI3Z4pLCFXU49kup8BImMsV7s8itcnJ2ftrl373tjU4//+zZ/Nxi3Feztzy6WQfNEZ0oBbYy2zkb/OQnP33vG+88ePDw+PAojEvxOoWrZ68fPrhz9PrLj3/2sw8/ePjpxz+emXmwaBeMK+R9FVlrZztK+NzsWfeC4werCS2RvtF6QBxwzYJ59ekoMwiyhX+QAdEDQDrIE31BvkRcprGSCTN3ZT4SssFik2TtxmRUzdeXag056ZQkuF+fv+GIPlKcRZ701E121qA304E0bci1xfHa+i5jQfqqOu8kmSlq1Gcb9UWjgGzFqSg9VeZLP/uueZ7R4b2lf3AhgsnkRXa5qF3dxv7gR3yYwTgjjVdDEDJs1CKk5tLmdGvmfHz4lx+ftev3f+V3v/urv/eb+3vd509ftRoLraX5x483b2+7p91n46vjk97Tq5vu6kZ9cN6hbtp9BKtoLCwZd2OeUX3Zne5yWWB0xITmySHFDRTdcwNhGu204CpkFMmhs4BL9kHjBrSJ1O3VkHXDeCoObsZHckXEQibUyuQ0ctMhfVY+8PKXwCtmOd9fItApXUhvzfiRhU3oFWbmaTOlmL9w8IXazrXVZut+bzDz0Scvnr3YO+0NOx1rsEjBudOOkF5R6mPqgBFlD0lwJMrtHOD9d+5Nb2yu1prLt9M9lrLcCymESlmEk0aLMVIFiOZUblNvLoZT8AaL0E9RSTsiNi5H85f9641WWwQr/ARhl5sy92arBJ0J5ZLgHfyTJIhj2zjs0aNHK8vL2t979owPHL4595MUp0EpfaYFRdFYV2Z7brUuGVRjdqOIA4ssERbGQ8YFlcOCbrOs7PBQ60oY1PZffBY7ZXqOnSdZw1qTgYIlPcOom0Vwev36tUUUhmP6mJKjPkc/5qU5uH65MHdrqaOvKUOQ1KRUYZTzhyMLtIlcb7RbZoGAtebO4aTiR15tkPCA78a1SiBHAhXNJUwn3iS5RnGeGICYJAF+bcdqO8M5bHLHjM7gKDqxgoySneZBXkNSXdpxkVbwIePD9PmQpOUzH0eXt4Pz2d7oxr5Ip4ObswFxlT9L9S5NOISwIrRgHgbBfjQWPdEoREZB0CVQjBj45eFC9SWGzBtQGx+lN/NAyOHsuAbcijMwznBdslpzZmhghC4CiZGU8LiBFEEVQmE1Fggkbhcq15jVslCU88IKFT4u98ZhQXy/6VSMr9yGgZG4MCLRoxzIUS+1Xh1lW5o359GSImYiBbPsFjrmF9/CrnPQb6mVEYQucYXgHm72CPvHsyjga1gkPhkuk0X7kU/FQkJkVS9cceS8+ACK+qE2EmU8wkrydFhDuYGuhQbGZM6sgI6COjBzbEFbfyzmGpe610Ryx3IIDSn6sths6BKhVV9dnZpbPjruU5zdKBuaX662OPXy9edLTYHfmc3F9t/92z+YGuzd/PpjiwrEJX7+0z+5vDq9uOwl18Ky/3OLh7IvM8kTRVnFyOkao/38coEDeXQ+c9adLNQ2Pv7RT7MN5Mxsrz9WYMBKf+tLxqPLZnOJDY1KhHpevTz44N0P3nv//b/80Y8Yf5KMoLPY+KOHuy+frCkI9M437tAgX+4dfvDufRQo4Vn9DAJ3S5cpnvPTkoBOu3253fHP8CPFbWH48T2lwnIBVyYmEqgoVXJ3J2OBbItVrBywGH02yfH5KdkDJjXyyjTH+PVVmJ1eQFAYLOWgDsH5HkVWJrM3Qy8CaUMpvLtgKKDLuZTfyLQSV1VSgOEl6E1ceSlFlKZBsLWXG0urLUlkgojqR8T4CFIFU+AGGRVEIRmgnu6UDCmdyVLOCIssGbRsjDni/TQXz+rr1WWfB06ZJUnsF+enz/b+slbbkv3y7rfXJHyOhp1fPP+hfPVZhdKuelR0PZlcDxbrwie4xPnleO71K1s/U5clyF3Zzbmlfmytdnd3h2/QRFpsGQ31/NLqlri0SJ7SXWyNc4apO7q+UkBBXWSy6NxUxbxAKKFIWjwnqqRuq8azjAy9pD6DIaSSnttgEy7CCcWrj6RlzadYCASLGykAARvwGV/YrY1ToV2rb786Pb+aaQzPpw8PeywDG5Nyx8Dw/nDgAWCxjp9BYuN5E2HdG82NBnxyOtzc8LMlny0rSvWB1OFtxn4o4ufjMUasCJyRYTvil5FixfYOC4qSditCBBoNuZW2QyPGw36gTchYpxFprHz8OCscWYRhXx++9/6De/e/+eH7vV7v9ctXpwcHsbyveMuuZNIieBt9lZKJEgvElCk4KPzyZeBLi8OYTH2+mG4QZbJEHxconb89P9v/ZNzj6BWXygoD3B5aQ2xQtdAdTI0+jia5EipzCCy4FFhQK8aoV5yF7qeuYbO2uNZetLckoUUwra0pCaXeClnIvF1UOm/VsHHt8D+sG32kL/6SQOKzIISdVYhJMEIZp4cHrgOQn/yIkEh8jJ4HL7oYCy7WUpKLS1JBqMdgM9ZcCaB5TkwkPhXkdqfVdX4RPeKTvVRubqyegzXv/fGUbfPORrf+uuMphUwhZvbIo/MlU0ZzJYHd/qhFNdcwmvIqdkOojpocms9BX3AWth2OwOmNkcvpL1tL6HP6BPfTuGCACctimlnl7KflaMNyXiezH2gjzMiB+J4usqyIVUT2hDN5WUYe+s57KxEB6VF9ehArC5FLdg6vdwC4S+EJVGpSsogWAKx+9YkwAl8aYW6OLPFq2niiJNGCM7ZKe9ds+BWjsbw6PK3gVfoQYyypKxrJKIsl6I2wWfUyv2vBr/whfNFwDyLwIhfvEEHsxsJcM3fpBi06i25VyhRD5gd1XF33lHuzC8XljZWLSkicjeTFxAiWsZmUV4RIksgAsGWTPRUXUgucmnU86M/VV5GhTBPJF2vbmz4Jw/aS4kJdMZVf/7UPTo+frVvTMnn9i08//eKLLyYXPe7Gd959AGyM8M3Nhz/6y0++ePq61V6ttyxLnSYjpSbY4Anv6HRQ+Nonv3glUwGuIL9Wc6UvPWOUCnsLi0vsMLTtukGsrsx/+fTlw/u7f+fv/d1/9k//CepaVOZk0l9dqX/zw3dOjg4//ejj3/jNH/z4x3/xwfuP5OOdj3s7W5vPnh9ubdxbWZ69OB+2W00KoWJqvTFipFM1qGn0Hi5/K/9prnRVU6l8d+zt2Zr1yDwA0p1azcVB71RAHjs7PNynMnJ84I22ihqN+qpGqPeD+6AScyxDcnN75+X+wakCObM1VoIbCG0MY6m+sG498+WA4K5Qjljibmo153tnvdV27eZquGgXvXa85osNBQVn7Pd4996GeIx9PZhWprigEbRCwObdtzBzUw3b4rXIujC6D3wNXqmvRysLg4+ildUEHgy6WxHF2rpQQgqjbODtk3FXRenOYHq+S76p/y0fV21+ufVZrWyTJWTMJSWpEiVxLF9edqz9M2IiXM4hL9Ta48f379757re/XdxWNzjEyctXWNInP/mZ8vm8StxDwlSUaikHI8YxRLWYaHpqdDtFDUfbmQm/CnoVipMHYcgUeLkQqAwjQrEIwYAqSvEi17H1xsKMtWMQOaw7BaB5thRYml5fv3v//re64/lfPPly/6S3f3SKd+Fy/EO2JNLInd0dtlHxlqX20ebyDil+fGxFU+e0e75/1Fnf7Ddby5tbWDhvJ6ejXMFIVhOP1wzs+iVEzXd5aSF8VuVh8fEjJLmBLn1rzxcaHr7hBpyAyoKf4IxRqrP2PynrrsT4KOlUq6tbf//v//3PfvHJn//5n1Jf/vRP/i1+0OucUQXODs+2d1fNrTZFyz75/Klsi7DqQG3yMlwpfC0oULES50K4zovA0LW58/7x1ZhUtvMJBMk+wUX+U1OTNYu1thahEyAT/AJCK1A2xurMzDLim7f+a0EuvnUa7aWaZW3yA9fX7edN4TMxhIt9NJJErB+DowMOW+LHQVUxSZVHbjwcVvqU60QXqkZyDCn7UZItjlx/o8LDU1yJjx1HcxnTj5xLgxhZbwAuRQLyZpafYxPkfvCgTmCh+gFlvIJddTm7aKkp66p/fi1YZY1nH+/T2XkmCZXQagiAQ02oXUNGUERswJnDuyqE81kdLlaiyk+kFing9elx4B9YOFzOjIT2YmSJ+3OfCH9P6L+yfznF4yal1eQ5LzBuWylFCBRJAlARFuVgqlemTN5eZIjbvvrRw3nol0e0bdiXSBKW4EhnIpf0Obq1CxGCsVGi9OodFTg+o+rI7y6XiIfngg/5AY54ON8y9uK4I6AcURSjKSIAzmefDjjjujvLRF/PYGD0TQwypR5Qcz79pCpMNd2VOuJSxclmrPpUEkxZPRLrUvkAc5q++sM7kq7F3OQ4IhisgS9amzI4qmrYdNGqZNpXojhzVmLA9is6Fmv1YmLVsJzzKcvlnxx9dtp5fnlxIt1JOdxU3Ob4mp578uzg6fMTm5WJJdkDrz8S7i+2rTUGQxtELfV6152eJZhT0FaA+PzcYCCk4YKrKg8ADxFS0vfVq4NvfOMbUHFze+vBgwenR3tcvdJvxch2dza2d9qvX/b2Xx+++86HP/vpz3/nNx/OTg9AZ2u9dXq6t9hYvrfT3Ds8/f53HnWGl09endzwo0Q/ozEy+CKvMSBuC2K7FPS6sYGREfrp7t1dic7AvL21dXpyvBDv6LUVlMEICypbWaqq0/Qvj0fgJaH5enN7rb22JhbMuBz0VHg9U9ZqdbmxTE2XEiRMAxeTXm/VNDWB9LtmrdbskWTFkkLgdUu0CQal3EW2VC2iU5lK7lEIE6aHtKmT0Co0QTkL1fgsqBX6KJheIWhIEWFgY5XxHmFF2CZLP3qcTYow7t70jMHax0SUjdGZWCbjlfBTykSqBkSC3SR9QUIqHc8k1uE+6pIqdjedcW//vPP6YO/Fq5fwENeRCG1RDtEUaSBnf3GBxUFrZlrBSZKOrhnrjBKGFnBsfHymxjAtaTFRLTEnRBxVnDWU6ncoOzSmTEfInFPFF747vRFV8XwCNlLAMBuJIpx5ywv1tcH49vVh91gd5jgVzo+OTryMS81eXHTup198Sb3IWmxuvfkZ22A6GDHtZaWHrWhi6Vn0ojhL02RR48Y23WE3REH8JQ0DQWgpcsFPYXZ+9LNJwWClhAaZ0ZojOjNGGk+C2FsS8fJTwhZEMiVzZ3v7j//tH/z0xz853N+jAXCEEslsbPu5mVwZfNR0hSUlzSqQhaE0Gk2MY+4f/f3vQQVMS7vYWbgFhcfshQfFzU2++8wd0WcxnjrcJVahoRHSQJO1MjVtb2nDi/eci0aydZhPwkaNZtsAMrbky9GYNMwoYKP2I6UgvaXSyrKJvdAthd4uRxCrOrjuqhMgNky4Cy4Bhf/p9ZGGKhGwMiy8DxvLfEc3CZM9O+0EiJAhOJOnPQcQ5KMDLB1FSBe4Uzh4612mDxS8DL+/ram/YymVeuryVkeKUV6m+h80j/nEDVic5WYB1ZW2vMPUgVb66ZaiIf4Vt1cRQ8CYfpQDOcVuCQWCS5QZfYcA/ilUlFbdG5RFUtRyThU6ZbCXG9Nbo3zlViyzJIJn9jKVsZycaB/OEFhlpNU7febtsYRy5rx6Z5iDl/GUIpWImWBkrpTbQlEG5Si4mMa9K84LrnhzWyFIQZKQP0wAHHqUZB10GmxAY8FmSFM961PLaaQILexPo0KF7Asug/CBokj3R4cmFATMbiRVDlPKRMjqEHiQMIpRUpQtHZy3zoN1nRRhyVv+iAY+b6o2AJYs+RpCItpKZXya0FWrtZJtYedqI6UvRK2nblpLWHMtwftqHZag2uX55prUsJvzwfH+q8+7nUNrctF8AVjgg3dQ2CnvcjU0aTkKx83l9YLl5DDDdihzszyNr5npdgpkfAvPWD6jGyguwCobbsFNzIEEpeSq7k0PuL1e/fZ3v/Mv/tmXgth2FBRWW2o37u3uHOz1Xr58+fCtt58//wRX2t5o26extdbmMZGxpRjH/PSwPje8qk2tt4G5dmYJqoA7dFB/IDVN55XXXVlVy+BmfWMNj/AuriGumGdPPu91z95+/JCVkG6cnlKxcWvlD0MXwTXyI95Ppm9qVM1Pv/MNqcV1aR3CER//9MfXqsTWbrc3llasCrq5JLpSd9XMTk+3ltXq410cEVdLEtGyFPhipsWyDh7Ze2cRa7Fww3SJ3ERD9xzSCAyLwAr2+YvMrZA12AmGMbdREOlnKtLHiMj4AxAUazJUEK7LW8gg4JDzo8kTIC746/7cGV2GPOakgG/wF/tDaGwNkfjR+dSS3IHrW+m6Mr8biyt0mFdnp8L1uJboFXsHlyEnxeZVYZEVsNQSXCh8KisLrDFWEC8hEQ5rkSHGqN7OXNlmhD2SonyL2d8Rv4wtm/koeqGOhdxAXKdE+cu29xhsolay1G4k74GXur07C431s8Hk8+cvJFDs7b+2d7NMcql3fLyCo/3B4HD/KIyyNIhzEFEwY3V1bXl1c2CDbG7XBFOF0NT8FRumQYJpoBjw+owrOeZKAabxmJHCYrCoWApGQBVITTJQi2ummhrV+xbkNCoDiC5dzYOYBEzqdo5/9MNju2aRBQZntb6cQ3IWPHEMqfN6PYSUIxVm+pqVix7r6tvvr2WOOaZjWetFWvMPlqDT4ciIOG5BiA7uC1dTtqFVSIMb1fUkDxpV1O/MekICJTaB3VbygWkjOImlgD8jIdxIhSluNsptzksdPCKFHYxJTa7HFhVV4goMIp2LVaQzMBL7oBNZEEO6AQ+owNDzIWd1pJoLBHzEUgHVogR3bC4SCOQosBX7FYJP/mPGqatxPYV7GnsWVOclxoG9xzLL40lhT4EvsWuCin1t3XdEYOa2QCuwMwmGjsXnkyDQRk7LUc40lqac513eFprTM91TBT+/5icNhU7ynTgKMF2CXv5So8FLYQg7znhMUFTIELPDhRKMcWNwKK/OuyL7OMBCx5X4y9jSbG4pL8gI84Ceplf52TQwGoJXGs59vpRuSJx3j/NqXM5hFfqDiXwEOU/oCLFBDIrpjLije4A3NPjVkUFHxkSRhJcO+qiJdoWnAjq7Tm657i1eHf8YURSolPSKjFSgw/hnSBf3m1mBvdLLvEOnqLFBBjlHTBK6RdYsy6w1WZy2ENiqd0If32DNAMwUG0ud9elF6yuZSnap71o8aVkV8puaOreeHQDXEfVya3rq9NmzL06OD64vh+JmpqtYdKoNhZnbGP073/vthcXN3kjhq9vTjsXkBjB71u8I6572VT0+6g2uOt1xa2nFO6tJJ/NwaAeSLsAxaQH7s6cvWt96l/X18N799fXNyWL36OAwQazFmiXGIs8v9g6fPnu2vbvz5MmLu9vftHyJM+DOzgZ/0d6rvS0O97mL+bb1KevrF3NPXnUswzNm+FH2OZySQ4877Gxv2v19d3vr+Ysn9gGBNid2zIUGVtVc325uboMPHhI8s3KAuacAv5HzSAtjSF5dvDnvD1dX1utLLZEfxd8mk/5ijQ06tbZca9yeY8lqqgWbo47frq2p2MUvce4G2+Dwa5lZkn2uHkqQCG37JIOPNCpTmml9g5wMCTiQCQ/+0eyC6EVXCy2GAoIa5tNtiCFYGmrSEFlrXLnil7zPXSGbhHWz5o/R6gfYP2siIX1adykVN+F9VEnsUo/QIMuJj/rBe99YX1sShWHo+//D9z8YdM4IN7mD0H7/8PXgcjJbm/30sy/QEg6nIenZdltWOoJipSfpPNooh9EAMktibtla6ZBxWWVhG/HoMm4BDz1CGfgnQSaXKDgjV1Ak+nax17torm40WpvWq54Ohi9e73P/Hp4cY4/WHaEtK/mO9g9CaHKBxC3nLXoNL5IuITIkRNPYsRxeQXlevmLdQ+yplHMT2w1h6QCIFVh6ynczgswARM+LRpCTROzAq/AE9xq4jKjAFqUX96YGZbG5QT5U9gienxkPQPVaoWTFUbF/uCkr0Bgji5PWfztUisYa6v7IowjEkGGAAHpfD0x5oJeZY1boH1jQy2knTuRcYkBUY7OdFOpiTGQasBnOgWJkeCrAjfDVEgdXIoOxY8UhxfvEscTS/Si6RVxxPjQWa77iW4ie8A5ypGzO7N7TVzgMMzkj5KsB12I2XWT1igM+xXzWOJ4MYnK4QE1ut58iWmN0FusQbvvdyIKLOleYr+LB8cgGpGksExDOCDPYhORQMgMBW5ev7bV23rugT9WGF6yra5sV6UJVsrbC/mh4BcEJ2LQRCvIygwyRVTQE03ypPkpfyhf6ontDwvoRjNBOEQrBZKStu1AdkRcWoXnaVuzqiMnYFOUvwzKqEKjn81p4nTEhx5wEj1yv0lyCZ/nmM/0srrYy7vQk8AkFhVMiiaKyBB0qpMjdcQ7HmHXkesHLyKekP6QPUQrLwaCCLO6hnwVDysIA7hRR69hESBcoIYW5/erABx1eARlhkF6pOuarEzNMmJRu+KReFNeSCYQGgmaBZYCTKZaxTb5KXqI3Wsp9LnZ105cDeiWpR6YHvXBBAD5RTZ2Pehck8ThCBu+lRoN9qheNxgIObpe+4nzm5wx8Hz24L8hj98GXT59MBmO2kbABvIwdL5YodjW5uXv/3vad7cvb5potydqbXz47/PFPvzxRSnK61htevXp51LGO90KEpvU7v/M7v/jic8KmzJexhDWY8IIAisFbTTxqNGuKxA8H48Ojs2+8/f6P/uwP7RR/dHi6urzy4P4je7l2+r1nL55+77vv984XDk9G93Y29zt7rVqdC4r5c2d7WQE23kg2d+2qfnQ6VCDRslLhLNkoLBluwOw60Vh8++3H9BrF6R8+uFfNwsbGhiQU9mK9uby2xpDaV49qTl1J4EJswMfbZmYkJysiMblGcBZOeZbiBUT2t1GzdKk+W7No52aGuCIygEhDS20rB03nJGV+LL+l1lhCEGMjmOoFDO+gGpgHDWCR1/g0wwlVQlN8Amup8NPXwi8xj1SmD00HwZM7Ht9BMDpkAE9o5Z6O9HGPN4QTYJ2h2KzTDTGTBlgc11LZ1xweJKHRUjCJCII0Yx4ywReqX31t+d6776yut4fnQ/ZGY6n1G7/267q4srHVU/vv8ODPf/Rn3fPh/Yf3Vt76ktuoc3D0+sXe8+cvjW9G0Pl6EtjFe5GRRV02PHsnlSN5CBg3hUpGBvUb2vIO61jGFSuXKMHt8WHwwcNlmMsFu/Po7kxtqTe8OO50pTofnJ4aNN8D/U+e9v6rPZwT2VLTA7gi4uGbAY/kBl93WosrgritZp2aWFWsUPAey9VFWgHeyjNJDACbKSZFQqOgSVsJzRboClWUCQr15jfDo6OYkXxLKnzyzMUWk60eysssRujiA1Y+kVOoSD0JK8Xlalo8UGsshTdE57wWXeM/JOoNx0SZoSQCFM4UB1hOTaMmyfHCC8LeyMH4Wbn8M5MUkawbwD30ikptF2pp6DVbsoI2RnTOHWFP9OS7JjHJylx1is99UlmT4YGfXE4GXauGqy31MLW4F1Jf5NoKamuqoxK5CGBOItGsMUoZfEZSxFXkU46cUpPCZV0L+Pyqs0HWyndVHINBzK+PJImVA7MA9YrFu7BUqlRpJkXfSrhrAl9YVCTtNR0/OamJL7s1in5eYXbyGQL2md+i/5E7OhHa8X+6lql2G6Zu3gq7TyM5MhaDAPwohEYX0eMzDUATl4yJEPV2yoJPfjr7E7AmyxPGCx4QHliK/ZrhwMeS2hCUyhvtQludaMzM6WJAUfCv3BLz3oU3gij2fPzBDGdWEQGU61H6pHpb2JGj3JqYU5APCKRjG0VmMKvV2M6xlIVWyRmdTx5R1rpDPufRMfjHXM9k6V7GTcEJ/Co4xOKF8HmjK7koQ824AhMCOVvbRaADnXsoK0Vlwc/ydGzb667UySuFsiZDSwtTh9esRcRzQtJXIW5mqhiO6cZVUh6m6qIPS4zu/mjI2yZ1SqKwmhKcNfB1Mh6srayr6Xfef3mw95olaYZ4woSP7TYlsCLrW8dOz/r1pbWXr1+d39SW1m+/fGFLofHtTP3yevz81d7rV0dwCUDX1jbe/eDds96ZekvEqHEGDZG2bpG2t9PD8UBuKWQ+OjlTUctLPvzgu/z3f/wHf/zxx58KmHMGcow8fPzgrNsB1eX2xpOnr+/tPtjc3LpOIfGL5WWrHtSLmU+tjuurRTlbkfNBqcvxuX1FcT0AMLR33/uG6Rv0kzunIpRZMuNCGi5KMTg769IQHr71+NWLp3YdzEL64mQ2b6nRxsWbrcizSqVdX1IwFluhu3CVL6k1MK+m7exirCsoqtCSQhXKXticQcIQb+eibVdoypjvdCNJhchLZAzWZcrDYoADXprVBPcybyEjHiB4EErJb0W59pMz8w7tg9fgJTIUeoA18AqXD3fwW9AomENwOQlRVmdBrRAcemPBkJgsS/MSu0A8GCL3+2cojE6tXBNaUvD9ttUit8nrZnv1k/1DXH7ndvrg+Pj569e/OD55ebTXB/0l6WR2prmZP2VGEDmW9En/g6oo2xt1ovB2517vh6juOeSH6raTGAYVFWcs8sgJeL7EdDHfr1KWcX5+ubm0NblZ2Ds42Ts8IERJGtOHDXdOz44PjsRY4gRjCNMKkyihcK0lG9wMsjCM7tzeKJKuyWouK0meZpx5E1MffMKfoEqhyXAyvUF+xYVDdKUp4HBj+J5FPHprFNTXcIkCavOlrAloV+QZ36x5pcnmzisK8Hg0ULGX0iIeRI4lHfHiygJErMJ7ONKOjo60KRmPcuhh9q82TUe4XgLR6RM9CbMKEXKLSQvGGoIv+UlNz0aZ9fBWNwfydK7rqeOXX0b8TBQiHF6OsYsRRolxifwF9GGypSJukCjmdmIPkUYsNNkN0aBYRxB40NVaNbwwQy9YuI2GJlHYndCwKOnh4zAZ0hXUNKI09ctUi9RwrBcKDfqbJzClUIcO7IhTEBYZadxggzAcPZ1+XACyLQyo1AP2wkTSZVsgJu+lPLo1lBQeHamV6QvHhV9hNZZYJkpcQOV9Wk333JW5D6hyBPWqL6E7W1qKsHs20+D1UaTc4glIkhcWNgZtiy81fUseTv70o3D8PKAhT5avhhbaDJEWpJm1kjh3ZJILfWSWAw4ZON7knK/OzUYkjd1hyp3Hg16spVwqwpQKBsqOYi9G1SKegEUstMyt1AY6iWzRiCacS5Q+jCBaeCV4zLmOYHqLYULpnQlB6YgW/CSaxqtHHtMVzFX1UnJQZMXoMtuZx8LOADu2a0JQKM6sm3dYISkU4iR4UPa4sjiEUcUoi1PC1L+ht0wXnIh6hElHd5kZDs7Pzl/OLNZOu2dWyQ5nrkepARfJHMqXPjEzqxRM/+kT1cRsRptFTaWYLMAkH9n0WCQ+UQKu/6O/+Fn/Yq69fnjauxyMONCmRuMurVtNLHR094HFmCu2LeGtVE0F2FOyCkqmU7YoTQawvSpd7/TOmD69Lq9jtvh5cP/tz9Y+fvni+ItPv6RPrK+vjs77tZZeothmZ3QiWvHWo+1hIiFsAP25Xpi9Lts/1W9mG9PXhzzpK+1Wo9XaOxvJtzfvv/qr31ejFg5cXsx/8MEHZttyh3hiyg7OOzu7P/7xjx8+fLB7d7XXP1FgNFIBt4UgReRfjtVguZhXenQwojMzOEWLx8MRXrq8rP6uFIpajbAsCQGqh0l9t1RI1hxllIkpti4VjldQ4qfKWDwsRm0uCgEJLSmaEBEThTnsHOqYDZqkrERvgDMhriA+5CpEo5IFuqRJI1bPR9RFiy+FsbCRKH9hcdW9WoNs3ogSMqxQNvT0FlYfVCMxRCnkf3EJjDrdY2hC5Jtq/kCrIJVJH8/OCJrb12pkvdP8/PHe/sHrvaPT00m9fnJ59bLXv7O5zQ86y4Jttu3OlU0vhP+KxxdiIUfkCqPxupBseIXBOAn2Z8ihmxCkODk+xx5AJXASOSRsY8WObJ3R7dbGQxJLtbWXe/uvDo4gP2kzHqjEcUJcJZZJ7zRfQIEcDTYQMXANYOlgG2fv+aqlPvPtpcUIS9uD3sgwslLRGvMiRj2cPoXB4oNhQEnGJQfPsQxTjxGQkVm5aFR4V2YxOmG46tQUPk8gUSEjA/wUdpONneVWSB7jkZaGmnoKU1fS8ZCmu2CgnuAjyo9hJrL0WPzSLoBqLlwOWpTJ944KBXzamAcRUjNlFMXlEw0n1t3o+BCpa46tpi354sy0CCaoHJYevqm9wB3C3d7YyvzaFjXpLIsPFgFCENPeCD61aVSZMtnYvtwo5MITp/3oF2GNxGQ1nXEQYTThxq64X5t+YV06Lb6dMFl9LXgpr79kWmFkjLdw8gK/GfVTxulZDFvQT+Pw1Cc8wNMwc4gt2lFY3s3oerqnIgIXlqjVZTY+E31nu5rpZDYUMy6oZ144TONB0MESlgADUxbhlhf73xvhHyQFFPdHl/ZWR5F35ZZQTgUcl2PZRJYUQWKIOJoHGeZJYfIu8ivNIr/yhmv5Rl5SXXRnpF/RUOst6Ug5wn3xmlCphrOyTZvOIbR3QRGfuc+riGM/RCRnIh3UsIOjU0hZhJF/k5lpRZs1P3RQ3QoEymdQG49xLXmfWvKftkIylTGK0WdaTQoLOE4RRAM2+ERNb0K21AAkicpiixUfYvCeuBL8jZwLm8krUCxlIn5FVjnnCWzkTQldmNAoDnRl4+AzIRlSkdMKHz0yJHxZy0qvRbzeXKsOfNQfWUgxGPR2uMJkXjSXb+rns1NjgbDzzrixKMlvdHZyWrMlyAw3nVzdYtpbKUgkJleO5G2u7zy4+2By3BfmxAtu5TxfDEavDw/AisW21Fr9O7//N8T2To7P3nv7redPPx/OJUcxVpoZ0c1UMbCcIW4MpWAllNiUZGd7A5vHaB4+/sbHP/2LT794XmvUf/O3fsA481BM3pub9vr2J18+XbaD5Ox8FKyrS8XRrIkMOczMDy4vlB/kt3jw+NHdxx88Pzz7b/7lv9xYW/3e978t5R0w1tfv7x+8UIzKLh8Wopkn1tvSUvtnP/vIplx2tVCxVD465tG3dxSBOpuNISAMuounysI1w7upHV4dGjZtdjkxMwWWrGRkCoR5cZ9mz0611yZ9WR+LdVnBSdDKbjwaUDofryhzHAx+cxR6MZWmOmiNUwTFTH/IBp4U4yhUGP3MHcijSK5cj88ityUwlRuj+DrC7CKxQp1hOsmHjxlf/nxol0iKAkJwJS0rd8EtzEyeELtbwWsFkIQ5GcGqsrGxGu3G1samJQFKe48vJ4tL9e6ot7t9xzTQ5pIWlN7Eq4+320goTBr0oipD+xxeq2tIwLxXh54h9ABmSuHakHkoOru8J+UXgeB6Uj3tLvJw/f7NbXN8MTrrjM9sTZN8pCjX9vvt2ZHSEEimsvBAK2Cg4XDuLJxKKIgyhC1TyAb92amtLY/iNCjFNk8hv9ApVRCf5SnjZs4yLvu/p/IO/9hkFHYbv2K4ghwN/Bd4jQXjgFdlGpP15P31bI4eVIE/Vza9kaYwGY9KRVncTGl5C86BHktRzIPO6o2WKo3ssCmdcqElfDs90wWcORuze0cgEp3FCUBKNICNUv7w737svSInqR3ex670CcrBK7M4c92q8WZbQ3B7fs5RaSlWXI3or4o/eTFmF/EWd59PvCJPI1QzBQo68ZVVpEUsLp0xU9Gagke5livB5+hC+YJSMTRB/QIX86zdwksHUEvCRdHQNfLmdkgKRRw+M0KVq/knEIyUkbgzYlJYdwk1LS8dnFtkoEqBdLCLa65ANqndF8eXFu4ksSbTrG8MIZ+BAZxmIegfcGOELG8okQl2IW90FJeaWwqrLMEW5zqUPiEcGGiSEX4EXJyOGTUvVCSvFdax467YgBL2KWlyfiV5JMIEiH4ySaUldorpqyRQERAREt7NK+0zX4pe80YseT7SKx2j8OR18chGgks34v28oE5La6ny58zclErJQvGmLDRexhVgYgNyKqppgtteVAFaR0xXNVcF+LDDw6YchnkdTlfJQk8gxMQwfHpNwSrX5BuaVaYHqdmkQHg6RizXzKVqIyEPWejcjJwbDOLYJrkj3CiJItWfV0UB0kdLM3FP3j7+1GhduCxGdXE1r9bqk/2nJz3Dri1e3K40VlYaG9ubd5vLjeur7vLS7NVo/+6vrE9fPyc2Wwv2FpizpJcqxQbiMjw42l/Z2Mx2OTMttT+vj0fKYH/x8klJMqz3BsNnz78QPdCxWn3uP//f/mcr7fX+WX/9QVOxzeHZ2X/5f/0/b965O19v8EAILEGYLHC/kSKGXK97vYFdiR8/3BmOZ5oi/I/e3T842tvfe2ty+//4Z/98eUXO/aTTGWxv3VdI9vnrl4fd4Z3dpc74fGd9iy9PhSDCp2sBmv3gb686g8sH7zxqb65Ptxb+5s2visN7oWouBwd7ZN7VrVIp2VCD9UNiLa+t/uhHP/rWd7+1tbvz8SdHyoM26q1n/c/ZU8giKgOKv025v9dHe1t3tjltvEvGnPVadzYWdjZXFhaz19gSAE6GggUSSSyqPe91pmYmu+uN26s+zibvQubzzezo/HJkgq1qkvOSCCS+pkBe2ZbXTLIoYDWLHeHaTCWenBBLcBu6JX4YnU1uffhSSfnO7+XHQl2oI6jB5ApyOiAqBEatMNkiQFcK2ZRqEy5iEey+Kyk2tVkFyoVUzqea0hPOryxn+uYH7+1srDC+Tk72rC/Z2lr+re+9j8NxF0+fK1tyZnnNB7t315aWrTAWizpbnBzd3rZWlkkIuCkVfdY+FUSiZLOyhzzkRVML8/AtY2GmUm70qD7fon7ZNQyfm7oek6kIxtqsEnWKBt7tn69uvt9aetAZzXz6yV6nx5+hkmwKaB0dHJ2cnOEalnYVmEQ1DFO39KKYnfgaJ634K5K6vhjLBd3eWJWEwDqhFCJ3C57om/qMHhncGLgpxrf1xtpG0F5aXr64HGAA+Lz9gF89/7IxN9Ve2oTDDBKSg/a4mLUXqsLKj2XYjEpWnbyLKjKk5Bhf4Gx7vaF2kDWO47Gdz+wHvcTDyfaJPJ6dO+6oODx6593HmHOcJZZ8kYrhPpFVjqK4R2LxTDeiBMCV+L1RfeQKecsgCdciIGi2GkiJYHazJa12wPPibAELtfwSZYNn5rKKnYA2bly1Tz1ATVHt43aiVtrlII3iZ1ntZMQkQdE78t4iqJQqkRORlXSgGaEXaReBFJav6cL1qR785BaMWWmsFBXNijgCbkpNyaXG0e1ZZr8rz8rXlkdo3qNo06fkTHJ+miD6tvi8Iskj6RV0Cu4EOkcihygjSj1Qeh8WGWnopHgodcVCX0PERsOa9awQRwwa7LhoFiGbPJIOA0dUPIgZ2iFEw9UrD4FbUBNnHVsKpLRJYkmzV0fWZBC0FL2SpofkU8ujBEijJrLNDdMBEY06bj5tFveCV4BW+SzulaJbROlDU3blw7f48lJx6vZ8gMG9UfyQT/pWFAj/xk0SifCmr7qrqxwnEZqmrDCQ4E65IaaxYeRbJE1keZkvrCHDffNTbMZ8TYwtNmsEY8CSV0A4X4XDiCVxYJhk3lOMz88zs/Q7XgU18rVLz9AOsNNlK/B6ZeCPB+Z16V3C6lEVfQEqY6Ff0H8nL18/t83ERvve9tpGu7liW9h+HxIPVAyanhrM357WWqPawoRTBkqPJ1ctFdE5rWO9hz5khC9IWb6elUdxMuQPm0UNdJjxaHRyvN89ObCV++NHj1ZbyzZUv5bOO2bCDdHxb/3qD/6f/+JfZM/1GyrFytlojLtlTbT54faZncXELe94vf+q9fixhczbOzvbO/esNnv2fO+996xSXVFv/Cc/+7HtFtDQ5p17r45Oak2B6Fm1qIej8cJC7axzIq7UqC8LJystmLJGM5dbm2uraz9Atxjb6trSRx8f/+KTj7/zne+gbpjz4Ycfbm/tyjw8O+tY8rWsrHq93escU6bfee/9j3/202H/bHRx3mqoPjU3vBgpVghxLHa2W4T0DeKH8r3cUht7rAZcTEbaLnirzGBCkrIs12CS9LaY8GaNIzXh+vhTzVMhmhBJas0kB4ptgA/ErQCxIoREk0LyFXUhroJKHpUtHUs9jYSmPBkh5ojWWAwtL/JTLpWjoFnwolwkf6vLkV9ME2TFzyq5YtTHbpJwzh5V4P/ifPzq2VObimwut/26XFv45Mc/wkM21zZPjo9vz0fZpW513Y6ap8dnl1OzDGgvItv1y5itAyS5RXevJlLJIbQyPkV3J8zeOLp5TVA9ZYC+5qEkF8QGLGPRVVRmPHzgdsVtNDenLEIfTogutVrxY0g56PST5MwNLYcnDDWUF5BlnGFWAV6upQnjx0nx8DgMMEOusZKyW+AvNU9Wv73WFHGgQs3yMODQke12ebBn5bXKnJI5TR5t77Ku6IPQZXL8TCaPrTrm+nA1tBkwgJ4PmaTheUIOsi4sx/PaK9CYsv4wq+NkgsS/1VycXZTBS+mXI0yamHZTwROIn5Pf3KERVAUb/OM8ssQnaZNsQLoIPsVeiJzwoY6AnNWYk/4nqCK0IrduuPUrjRh8suzRM95OWtRamTAcMAImcauwpLCXiCKtoBOtVUextQqriqu0PJUHg0maxTahOPlGMiMtjJq0l0PlV63FeihOLOdGUGFfAOz/HDFAYawtX8xBbLBYfRFXCVWl03plG287EIDg9PnVtBgI9nJ1W8PYzDOujbLwV/fhUz68yKt81VCYDClYYkJpqxx+9m/lWy8eyb9GMPwY+TXNVccbiqm+AF0MNjCNX40Asr2NlS0qXi+oFCwMEbmEYcdfG0slIjHwyVHJDHDGwr2jmi9RJRRidVuhkxvpZ6QR4POdBRCxbILOpKE3xvkIhnm6TAfwal13itwtginYQg6FU3iuADCDKZNlhlXMy0ByK54Q6SwICxbzC630KysEzEysuvyXN80TJogm6VHU7AhSuph1b5XQDBroJBFLTPDXJrPPxMWVg/bSAW06L12OLPJeN7pezUIGEjEclwg4wfC8FepOJo8evfXeW9+qLywpDySDSXVz7jcdkxxE3VSKcn6BA5nFHeesLtB/qFU8w5C7OxitbCvbc31iL43ucHzVkkgGi07Pzl6/fNLr7Nvj473Hd7/77e92T/fb9fb0zfjdt++/fPX6zr1H77791qdPX3J/2BlGch0mw6uRCkeK8GVXORbFULXo+7u7VeFLa4cPD15/+eUTZuLm1vfurG/fuf/o6PisPlVf3dh8/fKLL78Y1edt3b7UPRmR48reWoV70rfh8PnW+uLFuNNstScGENfrjO11d3e3/87f+tvffO8Dpompenj/EQbROevBt3ffed8uR2311ROhyT7kD++9/eSzpxLkLs8lHEtlnJUaNV+b63S7z148k+nfG3QgDoaysly/vOjSCMTdzBIOk1zCUAmYlRKvXgBfCRSrJ2eEeEyxeeI9TvA4XFXoXmI7Pg9nrJYKioblJjgsmz8EHke2a9VRMC3fc2gAqQZLYHRYmedyOT9ElDkPuuYvvsVyUj3pAs2JrczcV0a1pejpUH0PS+Uupyy8tVPfpx//4vXe3tJSI6rVZbTDh2/dJfstxu11+jCXlkkdEW5Bhqgj6wr2LSPoRmkW8VLiJzpiorvGn03XAyVpLwTDIvkirSo+q6IXw2a8mngwkFBkaNBXfJwhomxVs728DtNOTjt22aDQEFaAk1VitjIqA8ow8RDnFSWUkYNKWFWhF0BC13x8yYRCj7kfS0wP6EpI0CiSr5lgfdwwUQ8CdCqIAh8KfukcvjoU11sgYW+pLv1YUWaThzHbYihkPACr9ET4ji/LaGlkrGwcP244pQ/5n6X5M+aydLvWXGR/WQ5j2ZUtkGz3IffH45gYAgfDiH6zBF/MHH80zCKeDCRuvKiuBCHmgnxou/gbD17qShQpFW9mZHqRLeEmXH0pwUekRZUFGT46WIOxxEcn8OOeEjGanBfWU3iQ8eOx4boM3yKW2BD1Oh4dKwFoQMs654KKZdbCmqGkybvt9rvRQygtoQbvNqhAh3ZW4WWEoyEkTmWMbnpjB4RtgX7+sv+F8FuZXHzKkFMzEEDioZqwJOOKRENR4EHH7GdaI10qrNBufoFPRccPUZTDDY7yWKwlLVYz58RR/eqkMNYgVHWl+kz7+Sl7m0lbkxm82phvLtyscMEsLa602+R1rBL+j8wAcsYPrEKFPeKTMXF9mi3JrZpyA8LQJPhnFlS+rIWaHYWpaygyHhwKxbqMtKtPk5Mjsi0ahknNBU8ZNR+/pXd+CrSMSCvl0JTGXMyDZVw+HfqQPSDRW9F4inO/0O7NVNd6JcxIy5muvNGJt1jqq1twIxvohGioOKEooIk0010qfEgub8Bo/Fb6p4GgtJlAnRlW9PDIUh0kdzStfbT3+P69999/797OvaP90/Gw29rcXF1Zup3mFaatC2MkKwEFglngU5tThV3Bl2SrenyhftZPDXK1u48FD3pZNytlv3PasVZ/1D9s1W5XWxTswV/88X9/7879e7sPP/rJT344uvzWd7734umnO8q99/r7Z0PCmVKpUCyosXUl0uhbXqrI3enJ0dHB3d071u0qd3337t29V08/+cXrX/2175wcd5R1PzzpAUNDUL/efPnqyQfv3KNmyVY3QrUJ+FJevzgwzF/7lQ+311pj/rjh2frW7h//4R/983/23/xn/5v/9Jvf/hbVBx/hljdSomVVnYPpuZ3NLSR+dHDwvuqFb3/DlOzubH/8k49eWs9hByABcNvvzNboFn2684snH/7KB8PLHjea4jX1xgJqTOBMhlDCrqKt9Ncw4jLDFbKEhqCEdymuKHJNxzVfZdbMkfoB57gBXyHfR3hOZjpuzWJtFKyiIQZhw8dNq+BIJZ0K5mi24JAv7ggG0DNS3Legu/Yhch4M0uYWyFMJLm1ZnrHA1zLTSKx6HJ4iDBm8WVpqMY06pwfngySk2MgUk/rZn77EHVWrYtNwZxGumKwm19Y3hBkMno3CcEr2Y9w0Uc+jiCigATOT0Gi/C4RJUOkdh2dC5+AQuYVweGwgtS0rqMlx4CdMYw8EbsLN9fVms90dTA4PjwhFOigmCGjEA0EQ4i1KZVoKUMPbM9K/fiBl4wCfyKrqYFpg/dMzrBmUxfeGcfnF0KR7yrdSfJXmWHIddcx8itnJr7EW8eyci+G8n9U3UbUvaB+qRQvZaE9vQm1qHUSUeJ10mxWvma+1BfW8J1UTFWBgdNTqinxycPFUnJ4NrTLUk04vtcpAbM5eJfqH35TP6Dn0lri8Yn1kHIiBDCCqcCwKuwofjCyskPOokljEGFkQ3yxueJET6/ZxxnBDfTcFYIVjFKCBuZirrm9strzSxBSQJXsykknO/5vlpQF2ZHvppc8qo7EYYbHGwn9p5dAYfM0ESijz4bM6IgO+EhtBalY+O1+XMn/+N31YPZWcghU0sdjTeQij/CEFCEJJw+ojOMrLwj+NJc/m0JhPbUWFwzPzxpCkswQh83/6X2EIbSI3k13EYYFN1UL1a/qVpkNCyMuEkDQazDIZGGODudYcLV95Fx51ipks7b4oSqxbKwTMRvS1mEpZs0ZicWrqcqFUnfZnMsL0k8Kge7A+VwKPCBbT80acmMNorxlaehKRE32U2JZuE/mpTyCeqc7AUadoemAV2oumFgABcRkKBSJdiuM18jLHzbVgCiFTuuprOhn9z5ww6gBHy7qWN4Vs9KPWtm86wZQFWIkWziR1q+pt0dZSMCbGAR08oVPLJ/BEACwZP29gr2sa1LVMUrQc1pXOFqS7s7V9NZ48//KL0xPa8fXVWnvYPxlPzhpNKN6dv1HeVK7NgIuooDueC9Ftm2kmReAWeIxPuhenvanhmN+YEXGlasz+/tPTk4OVdn13Z+v3f+c3v/H2488+/mR9bb02dyn28eUXL5RNurge7ey0x5c7J/0vR/TKLMysmx48NTDOBtxR64zoyZMnG2vrfhgvzHPQHR68Oh+draytHx6/vt9sraxuntnc4jY7DtfqLUbfYDTZXFq5mliuerG8tLS7dbWzcfLwztru2uLzg2673py5nmytLn34weMHu5tffPKz999//2Kowun28fHp0fEJjPv5x59ube1gDi27QSzWV9or2DM2c2fn3hefPSmFjerYhny7USor2jvt/NPPPjk8O4RjK6tNSfL+xIblXlNf6VHCezxDcg04/lN0zVTbTjN0A7l4RqMcQjjMp+g2XO+GLn5urint8hJMFt3U9ClBxt2UmwuWUfyDeiEaydchnSA0LNGgyYfkMd6DDXCYqUezhx1Bb5/B1K/+KlXGVUipkcKwptXden38pWLWVlMcnnTYzRZOJYYg10TL0gV5590K9xdrIVjYObewspSMnvH45JKRo/g1HkjxuRgaHR1SH+g+5tZe6h7k0F9dt9cadFJVj24EOJQ6yI9lgZigbEwlLjAWaUyra95vEqC5uXlfKcjTs+6h3YKV9VNn5XJEbiGWMJaALCRZGFYZb8i5ImzXdPvNEcWPKiGOacmJpQiyL8KHmQqccl6aqjRhJ8Lnrs8v0hiA1UbRmImIHjEwGZxNXQj3X12IwF0Mw/aUJwmt6f9cXJ7mIPY5m5VXzL4ZTcmnKiKxWfkxJAcAG7nVtPHhoi2qha688UpaoHHARqOLrra6ZjT2dT6sWI9PQDLQULZqAkKFZJjkFnKXaDBkX7MuROFOyBREyP0mIU9CzbhZWLzCXbqFu5UcBjmLqLpMf1ou7qO4idxpC8vwkAiEyrsYC+CGMVuuYLthwyDlV8Ki0ZCYHrhXn28gXjHcTIQX40Ck4ZvJKLeVNztjBuivMFXKby+FITIdtZafHMkfpy9nD8YL+SCpmcr144q/MuuRMCzkmHPF5RDWFwrJEWJBUGBumOR8QRS/ahcGl/ZN7438AOdV/xGKG5ynlZxEBLAtnbuH/KxEQvkpDmhBwZ4ejW5HCxhE9jB14M4ysSOvC7sPFZEfxVqtCBFAtJAlF45cSu8iGyIVdKm8intBx6l/YJFptEEhJ8wbILvZAJILzolaOeUSDQqcyS1nPNnyr0uzgQY3AGrnWEgeESyNlGeKl4vEVgSV4Gz6WVCoCG3vCjRSqrp0IBd1Rc8D+ZmZviKtAVvUQAxLpyGEhIqmShOr6zQvd1IACXW0evh67+yUixwro6IgV6MuzI0GRrRiI6anHGkLUc7VUEd/sD873UoR5rm5bvfMTgOzsxPR/dub3vTCRECQqsjMNlbaohRtJgOd5PpWDLlGB359MOhPasNLeYa3/YHd8I7OTo6ZZQ/vPnr78e7m6uLg5OX9nRW1IJSEmN6oPX74g15fxHp2bWNl5/6uxTp2UDsfj9or9dGkz7vCEiCgC8JFqTg4sPTzdHGn0esPHty5s2uzh+nNXteG0RZWTbfbyydnp+wM1t+Dh9/o2EmSo3Shga2bCpSLrSoX8Mf/5r/befp0ZnH1bHA7mlzbGPdv/tb3NpYXnn2+v1L/4CdK9n7yk7/7d/8D/hwLcX79+9/c2tz5oz/6o3lFqNvLs7cTS0rxr29+88Mf/vCH3U4fSx33BXZh4FB+uen7Y4VKr1NUVJI9yo3/j9enePNKwumttIqoavH4UTtsgAB/cDSJ58Ex46TzVPRh/iE1PhVnYrnhRolBSQJ2IJR7Q1mrZjdmc1AEpngEyQZnoinB5nBqhxZRahDLb7ScTH3UrJCC3lVSL9gHT1zJAwpReFdraTHla+ZuDzuvH3xj8fGD79dmV1k6rAeOr5oMDGWxjgd2I8vyVdCwMcVAmrQCutT7YHitpWCKJkhp4iqt4w+krYnlA5Q/aVfjRdssz02tbwoPKQU7xoTS/8IE9KV0meKSIq3xBtvXY1o+jmQJtpz1e/fGF7NHJ92j0zMvRX16IuaIm4aOvRGwjaw6Ypv69hWIK4oq8AmMQMLAw92Jh1Qj96m2IYsNBy5MLNRNZxzPTOJOYfNOWfcxXbdYeXw7Gfavz4fzS3KZ1F7MpOcR/adBcvjQuBkhSpjMNZKjO8eUbygdHHevIFajRWfFgLHm3mDEWBQVQ5ekDHFlPZhqTGCLdb969VKTdu+VN4imC9eggRDzYRu2epTyVuUZZ1iGUu6Z2Sx7+QBCQZNsbIf1GCtByhEtbmz0FCOGD+MMLvJOZf68MGmdyNBpwlf23cL1wqWLCQMcUb4LiNMfsq3MHM+p/JbAvHTLLHw1AwUD9SxeruhM+GDU7jc/0+izljsviInG8Yd60rXhoKt9zQeBPFHuJ3Rsl+Q+ktbbZCksWJzCVWrt/bRspPPgfSR5mpPE5LWV0lEuY4LhpeHtJpVfubDakFJBmsoB2Gi03JwRFkGRByvCqZArI/wrB4GSI1gic6YTR7AKw7fWj1rDp1aNbsfczW42ScHwLn0wPkPQDyMjTjJqRczii81JyCV2c4i0IFNcIwnrmz7zXf5iuMQcDqk4IpaqeUZoNjNyF8ke3hL72zhQRqcrzIfIg6O83cw8U0xeRSCVNjWmP8HfQsY8eqYwQ484zSD1VJc8iTf5GsC4A6RpBzIb2y0YbPUGgIOGxgueTG2srklvFTPw3ig0go4z53brZjyZCA/mvQFFjjQJTaomYQtkoOuZtZScEQpdlo8AB/hSdIp6mAoet9jW9eKsTRu44rnmLV0wvVFgxjY5U2nYNiVXCzMLreOOZaUwv3bWG8czc3rGEthYWn/70cM72+3OkX2Vat2YT5e1hYSIhr0D6yKUJVW5qezLPbV3PKrXZXufW13opbybiCZ4a1k0qE6unj9/vrq8bhd5STHq3p6eHH7yiy/4i5ZWjuW18+/3iaDLqe31ze7JCYWG/n41EWBIYtj6yvL3v/fgo0+f7z3/XFLxSed6Y2v7Zy8+5X397ne/+dmXX/RPnmNMdkjaWp7//MvnFiP/B//hf7zZtk6oO3/bvLv9EGyfPz829t3Nx1b4Ep/n/egEoWNTfD1Vb013sm3SVKsxvbTStpscWgrnz8oqe7zMi4MsLjSo8HGIoFQTYHgsJ19iWoFqVEYUHwJ33OL7aDl+MbWDilxTuXjGKvSV1nJs8twcXoVFqQiL/uUs61L0xsI7CuVF3eIzDz8pRyEKfMONUIn+4Xr4WuEDOsONXFheKkpOjSYXVtLuPlj98Fd++9d/8B+0m7vWbUzfTGDCLFjDuKFdnvsnh0csSFqOihLKNdmng50iMHxCqitJ4DaaZcasB5nNVanwFm7XamqNC3YwbNQObi7NPX/xmXZKl3RQF7kHfIQH6K00AoYOY0O1poWa9XPr84vLr456x0KmvdQtNP6sLMmWu8H8wpRBNwPPIHMaKvB/mDtyKIeJcD9qw6OSl0EHVWa2LC9AUzqMr+mz2n0whEYY5yAnD9y/OVfVtyYrBd+zvzySSuCRCq8gH+UuYGXNcjCvWRkiwjnfYKFcXdiWRbmZhIyX2m2TeHlT82pYIVT2+uiUD/atBw+Bpd8b83VLNxUDYnltbW292tvXzzlrLIykmk79q859Ym0uwqsMCRTfjHamP7BzR9hoQTMyKfYXdmE3hMgnK05wJcUgaP5oSypFehPoQDEmISdVWQ9Fe2LVBqf9V0EzsCnWUd5evc4/Ya05gK/8G6PASZkG/UcWOp3uFfWIrVch3K3YtXsIlrSUt0eWeJmEuwjk4AN+kDG6K1IIMjHNazaTVzwyZSmtsJ1i+veJPT7o0CdQ4HFYPJ2Q0Zwu6UwcYO7Gs0MEGCtuZcpjO5f1TJETScFQO9IioaxYchTDMYCJmzQo9QaTMpByaMrLKsTyIDAm1G9RgjlXiTKbjdPF8qzBpCPMh0xfceRqLAl74gZG/6Z4UvnqrkCbIgLaJTtEu+leAWUIPfjJBqMJV8IMrZtuQ79KxLuELKWJcRyUVU631lBknRMwJC6kK2nM/1QWn0DzBsL5MfJZdZlKHJYpKz9SwPw6a3O/bCQETnHuMZuajSXXkvCaSIFNNywTSpoPwLrLti5uMwvGH+5wcjLs9cXrMp+cECFRrQYI6Vb4k75FvycT/RoyzbRNraxv15Vcu5o5ODiCIHe3d1vN2bOjJ42lLOGuGbx9cKRIAX6mV9hv7vpCBIvdWh9dyHVZH07G8/W20vEDbsDDzszN1fry2u726t3dXfXQxnb2LDOuzwaAU5HsDMTOce/pq+PL28bkoutbs7l6eT2AO+AGcVgS8zfzbIlri1Fnpl682nv78Tu19Zoq4A/u3T05OeoPzrs9y1cv3/3wPfuVvHz53D4HVq3UGsvUfHGfmpoK1+oFpKznxtrKe+9Obe7eO+kM1Z6npNJsyWY0e2+DaB5Yhf/WvfZ/+0//yWJ9nq/q3/7r//ov/qRh0p9/8eM//YN/eufOvaOTnpbfe++7Zyd7lhKYSfmBlssgfN3DqSxAVgTfysNabVENm4VmCkJARySQsAS7Q4yk5CIzUsMGIXHoBbeggcOg6OGmrExccJj/UA6KTTTnZ1u1xtpCfdVyAHRDR5ONaiYIORgF7/1jsjWCBNNK2nChCsZcxcorrAE1uZ7ZJwfCTskQkYC8NewtwiEeDqMRrJik8NIid/x3f/U7S+1vLG/xsk6kCMjxmgLd/lARcTXna6tzW4+a0RmlvRkRZYcj6OoqQkuh0cmFnTgGuV2dU8X4zyFwr9P1EhTN1emNE7lvc5d8qpa2VxdjOjoks6CpQj/5CvvksN/GupqxbWRj/epmvmOFF5FIJuKq2VoljE0yg5GgcvRmvMAPNAGXmfJzSKGQYiAWGCEgF8gqlOWuArrkNRC6HFEIkdHGRcx1sb6uIop9dySFSg1QlAQmYdnx+FETLb+dLPAN4oR0OFUBa4p4IR9hJxUo5+fUjRd8znqH1jIDa7HbU25dVsoZKVH4Ft2HH4cIUC+lLumi0x3azYPo4mBttZesGMlwEkXOUeYyynfOjcRvGWRhwS5lvFx2FCoR4RIgiVkTuypoY5i0ofA4tZuhXlnzS2iFkyYJAjJEeSJckktCTS4CzyvCA7wsb/KuYFiRDelEkLiwHLhVbolvOkjmCJ/R47i4GcnpbuGWRZ8PW8zjUZXMMSXaUfJBw3anFlv1YnPB6ogU4NAtA1H+MXtdX023p+at++9I0+KHmr5cuJiZt5sBldW6ZAze8EqXBRX0xlpun+Ya0lAY8Vavt2qSi0ZYkvTVmQw7PbEqImUUfDFMwITersZdkC7nCDIRepmLjDL9TpTWQDIOA5LSXVOuwC4XAQZOHQOiuDjCTYOPrhQXP2UjYgMQyZjK1AF0N0FPn+7UXAFOwgl5ZabS6/EDM8VU48RjeSM0C04lm0o5iSuv2ExBe08rIWDo2WQ9r3J4e2Yz3fClyAMNh/zKFOUGigCnbkSb0eFkcTvA+ZzT2cLc3oh5yz6SPHTZOzvV4fTZ43mucCN8xxZlfet1YriTZOJPQcXkDkm7gLbpghfrT9ip/MUUYSLgaWASlpLPqoTd2ur68vrm4al0uL4JXVluW5mLpJR7WJjpCTXMxJN2sSTBfG7m1fHpYq01vNT+Im/C5ah2cbPI7J1f3L6cqj9/fvBy70R+j3y899794Fe/9yENtNs7mZ9WSOnGfo/mnIOcdzvDn5ndO3jdWmrPN1Zv5hcPO4NXr48W6ivN1hpbCrXQc6Qix9u1MF1LNGH+6dOnRHi72VJe4x1lVYd9q75e7x+ubW0+ff5ETGtsK0GLnZR9upl88tnLO5tL7EJ+G0TCzLp3565g2/qyPaeanB5IVnbC1Ez7W+/u8CLARmqXMAN7wFSH1Y4n9lxVD7vX6x4e2njCHkgn/+q/fzFIYXgFKhoLSfqzAZTdm66wGF6pxfrc3/ztX19e26zNns3MivbxmUm5tjWQNPqlmMUh6GBssIhvNmauyREHTPYzqBDP5Q5CVkgj3Nc6pNr80srqW/OLaypa2beV6LcWBfcUVcvN9sXunlm9yy1nGY3tlc15WZWXVTVqzKMVpRPwWWtiPUUttSbVYoKt9bVzhRUU28A/MCn0wLmiZoUQpE1SlC/lcp6+ffvthxvb74hF2OrLdChvAJ8XVm0bpqSKmBaLvAuhuFxhStwNRAUdPQvUmCdOkKwlRxa8ZhWjgCS7kcorUnKhupO8wws7pyzANzPOVUBIiJhAUst2hJgxN+k2drgnq6xwBJbF+qrFf+9/81f2DnqHJ90vnz1Xv5JG9fr1S4VXvN30oSG6EeAU+BQ2rqKT1BAErKUE6XP4NXpwEknqoX+JMQm7XDelg5Kj511uhuSmsz45CWNIWMbbX9lcscfvLF8GzokfUn3ma72JMgUtuzPWsrtjvb2xIVWp25s02+16Y8VKPsuiTCeUPr+YscTajjxlvZD9ShTz42mX1XIxGPeai01jh3t0R4gK7eHJg0f3JD9aWYVCo2TpXPhVOarzX47zr4krY2FZJIZE+GAdkR0kY6VJx5GIi2VLsUijZKdicFTySBuX/PmZYmPY4cyOaL7lpHzNq5MoHs7kx2LqvZFYrvHbAJwTQM6jRWPyhHRj7/VMZFIYn5PcWTEmTKoc6CJ6FZPDovSko9MQvK0E291rXGoZTFuuIByQvNUrxM8+pbhY1+JT39OtvD3vjlwMgocR0k4yYn9GA8t5Ay+YAhNWM6sfxGngGdl8rLGsJLC/AJ2qHJFHrBhwjKMNFGLpFADoW45Cz9E8uVFIeq5hwaLIiiKFA38d8J/OVepqxH++pDxx+KKxCgB4IBPsIxKpCA9IZqaSqGkZAtUKJfGqTd92RoMYxMXDhmIrz62ueaE50XheqYuEbvpSrDVNlxH4ITOZT1gHVl4cOKfDBXT0+rQQeZYjmGBVC2+NpbwxWBxBAIDUz3xiYzZ1jgr8NarkZi9crDV8OjQPsAQbyo8VWrVcyc/09atD3/0Xc1/5dVsrWJ6X6UfY61ttS60O915auTAzt7rUml5rLFz1xuLFdYqjcatyYjuO+jKt+WZqcXQxz/fGCrqerl/PNvoTutrlR58+BfbFxaVuv/MHf/RnFjO1FmeXGwtb61Y+zeNEKubJ2UU3Qkr+58Efnl8QKjvzy7VfPJcHC8FV59u5exc6VWGWggEW/FtDNe/V+/v783fu6rx8v7WNTb4ntQtevNwbDS+ePnn59luPOH6b1o3wVV5c/OKLl0I96ohCQFqBILdiHYwqq0It4gw5RDWIPwN2RqTReOemWjb8mMa8+F3tL5x1aUxY8qTXP+8Nbw6PRwoVnnSnVFkrylQNBWEg2cuQd/2ahXo2+cbW6upyEoQvZ61fjvp9vWDjodjgnolfr/zrJCvgEyECk6DqV5jvxByap+juM4uX13X7vw/ZNrhwvS0s3j0/X2ytLtRrFP+lNrv0sRVma1sPoJidw/F9YqOIAXsC3A76nZXN6DSORjv0hnetb6u81Z+tXTLI4pu44kFBVzQ+CTOki8VWBP9skydqzpQl4g5v4SABS5YGx4VBtGrDkav+7PSQuLpUCYeE8m7ucJWFBopX3gxI0YH4lo0kFFDxu6INzB0YS5DLCpjErAw7yd46SQIKO8C+yoI1y4HQD8YRxUCBLiGsRYk/q1vbl1fTJ2f9A9lB+FTMhviu2FgsNgKnAmDoq0CyOtFEvldkGO4aw6u8L44WXx1OUESM+9upBw/vYVzFL8CPPXGLfbPUTx+fd1Nsl4pRqpYTlosNu8HAdIk+q+BiuyVluTjJl5bna5b7lVCF1YV24EhCpBij/SQvNDivZJS6iAgQIpweHpNGu1u7ZLqK7EwrQNveuQMcz1++8NPS8qrOVMmjb8ZVJIS5qLQgQjgGZiF/3DgQ4cWGiE4IIr9iJy4jLuoSSGBDcJJdCjVJqZIOzrqEPdGo8nuiPlhVWGoBZtiWR3KO73zFeiqWWPpU1OkCdtgWjToWfWFYWQQQTxDrHGNMzClV7kwtmJd8j/KZL+VSwYbgAtkjGRbL1qHgR0I+ETgHx2fD0dXZ8Lw7uuxMbjvjm471d9zQZQ1WusQog01l8svUhn/TO0qSmZmekXGOlTTXBSTmRQgdrWbc07qr8xJ4IAIF2bioDIaCzpPVmdFrL/9XIKhOddWFaryxVMgA/SWqisgHSWAPgWHW6UIS9mLdRjToS2J1+S0CNXdlEoO7qg6K6pkyVDmMV6pUvo8Vk4upTUSjlGOQcGM0xcxJcaLMcgGnuQgRDQa//QojMvls4uBCbo+IypEX50haOQmUSx4sPnXQzph1J/rWFSfjG1FUHtD/kK9bqs+UyAxg8oAX+Syvnu6MztBMdWjcT4BM0+cVybO5vXQod6QBN2CLGrBAEOODHQzG9vLS9tamvaEPDxfX19d272za6JYdq2+NRQVZ6yrRnY8T2Bxf37ZXdp7vPX3dtWFn+yYpmytTN4ukl0032FU20yYBzYW1Vwdnp69enhimNYFZFhgFwbK5KdsX27eX5JLi2VhaOknY4dXy2s7q2la3BxVSWiUYFsOW+RfVl4p1TZu6mh1dDJ88e2KtMF1HMG+pvfLg0aMvvvhFlTFr3c3Dh49spiXv2JoVzPpqupYPMkRK441NBeyWZLX8zFIWd2Z1FL6JfgytgiGuQWoVeuF+QUcLCucsKrkjJUx8fJPGvNhevnzyYu/kNJtMsvItRlb3YXlp+fHjh3fu7o5GvctJ58sv95e+uSWJldbOPc33uzAjkW7RVkbwFZlCakRvNgw0tGy+Zc7Bk8wTmUG/cYMYajgDjOdGOr/gaIWcarBOTjv7criYsLe3XQq4eUHAh4eXA0veWo29V4cbG2tKWF3cXiw11i2ybi7ei/hkQt7cIEkFFGyg5MF6HaNXpwoYpkp9iaEUgqXa7fn+0yuVKK4ofNPrq/fri2vTtw0lxUWrg5XoguSykIc7/1xgajA1N76eGdAvLGuVigHRLCZOXZ0ZMV3UZh3MmM6VbB20AdPtMM0wS3EFAot2BbOFRaJARBRGVpkRm2eaujQa6YiWeQLlwts+dLq2vfuQDXba7R4eHQVTtBLzGDUrZ7iIxZamKjIMGfhalkZgMxXt6IPLxU/LfAmPwUd1rZr9MFCwgiCsbmuYgGVtpY2LSk9tr7Q6464MCCqwLYCBUo1Yfg3hKHz2Zob7cMWGUqIq87JH7L7DljyX4J8Fv47o1GxBn3NSHC88UmRoJEuV3Ig9Ls43FLC3JaPbQEh5jt27O48ePeoO+sng01ldD4OBLmE3GWHM3q+IvfrFwzHbs44q4qo4AMNDoB2dnBuJcUF1IKjkIpFVTtwWrlfxqtyJP+VFRVYBiJ+K0HOpeilpEp09vKZ05n/4GbgWGvNZDFX5hzdNpZiypYEfQ235p/DXqklsIr4GvamyHNRCZGbze6gUnnmGD8lnS1o+Q3Vyba+Hvvyuq2kmhh3T7cdY9J8SBLJkOo2mcxglmGcMhmSjtGI1N0VAVfZuYURL9oFWaU3pGiIKbuCk9n0wTGqWkvgedsXB35WxRyTAzyKIC/ADkRwBmrF5R+QQyYCMU8TIsJIn5x8POYoIwehLDVx95FINToYVdAc9Demq12V2CmL7Sosp1/2UQeTT60iZ+UqzlzSbZt1jckNUfo2FVnE3JFuOzGclKCOfTK1ulptD2m/IQvcigioRQtIRufEbF7sSIJFSPl2JZRUXUdAgvQEWlmhxugYnHeWtZcTROvlNKB3kPTiU3gSHi5srVqXHNai1PChZgobIo4EhXGRLLb4NAufunS2pfIOxzQ048ebs+I5fmE0BxivLqP3DcG02as2Fy3GfK+cvPn41vG5s7q632svXM0sqUdiN5/h4sLd/rCA6NtpqKbC0yjmDD8qYIjpf772I+JFTdj7VO794dczb1qWk2LWUXOn82fP5+uzunYeyz9V03925byAhHJyweGZAHec1dyCi2xw+ZBW3m3zgldm1xuvW0fFBogaLcwdHZ+LQBpUiTrONy5u+ZZhga7kM7pfN4sAcNQ5V7OX1NpWmpQDHtDu+Uu8yxQBPtS57wcQdJM9maW12XqlWZJftrKjDghgbW82d7TuRgRwas/Prq1v4PyffzVQL+OnoCiDL9lY4J/U2JwQ2ISjtTNpBBFTwhjMG90iIofDL6CLRUvTUreqB0Zmi94paXU33VD/sdJ/v7e/evXNwdAQU3/3ud/ZPTs9OjlDZ5Lo2nMz+7ONX2zsXSkyhraup1kef7t/bvdOzEXmxztUQCe1cL9uE99mT/Y8++tmg33v0+N6d7S2rktvt1sZq86o+pMfIEUZgyys7C7NCgNCHVUTZwi5wjdEVB7GDJ+ZKWo3cyL4uZ4scyluWQSbBIssfE7saIzRJmrHsQ4GC97aZTjk6XruQShHemd/MR1hxyNgESukKIHhwqaryH3D/uhpLwgysmO6Ih3582u1YjIiHqTJMGIQaxKLKYUacl/YztwWpfmm/vqFS78nEVRnabgn5uBIytNv9YMDmtn5lY31lfW1Vv/x0daEU0wWclxI5tuB1MFTj8mpmob2+u72xKXcUojDbOG1kVhu3bSF5BWB0eTyWgwkujCQsyd6QgKjgU9aITvhbVSqkHlnAfiQPxbjU21QV+h/8g39gueH//h//H6j+Shxiwvofxld4TWE/hXlgYbGuYFUadx5jy4k/MHXNAMNAyxU+UD+ST3Q5Pn/hK1joRhLS43kuXA+O+vRsWWNR3ucCPhRIFccR7dB5dWC61YnuJeZWBFIlkmhp0hmsbpy2JUFS0AXMYgyWLgXy5Ll/vLgITqN0uEMKk32ew8o5nzWuTTdbyiyFHVHx1THshze3I+DWLpwRnwFiYNCJwhZ1FLRQFWU9IyLHaY1MqwXbjNZxGcUm2gqvtZoLXDypMRjXCulIg+ieWUCavGSsJyBxwLKMHW4U8VJAFZQtR96ZIy4MfaYQKC6XPkMeL82UpVfmGIVXqhbPJd+l0UY42SGAly2E4vBPkX/FoDOcNFEhKBFS4rs642ZA9wq/573lMFBUkCt5GXkTSvBL4FtYXtWRqrPgVXqVbjmM482DueAhjZiuyNPAMciV65FOUC3tF4wobeXxQsOlEVApY81jprvEXIOD7nFjDpRQGq++/fKz6i1GRa/Lm60us79OtrmqqQ760z//qS0C7967jyUpYmd1CXCr28wfMemfnR4Pmy04rOZTY68ztby5fjO/dDvbspuU4MFZh7I+tC5OowwUsgqgLYaDufO1JVt3tLd2qKjR8ShN+T85SL4fnhwtNdc7o+N+/3r7ZmF9bZsPWYywEG2YTvbryMj4SIN7ZhQJfPHk85U1Fbi72Xzv5np9c6tjTXJnv9FaODo+tSxGcj/g1psrs6L8+CTOKTE+7iyGYOKRxLFaY4QDHAn2Ri0A9nihoAP6LcginZTUsrZmGhqr0NU5i0KPWw56PQVWlhrL9gnnXSQb5VPJ8u2cHO/u7vIXLTaWjzsSKcerS1L6SDxZYS2rBdiONs2CROK5QhVmIK5qw5vYTpw2kauUMOUJQmhQISHaqD9kGuUgq6CtwJtKYGRldR0zbS51NjbvSVO0GKGlzkRrVerH7EK7N4BjfUOr1y1GXOgObl/t7xe2w2lgi8i1+3fuX14tHBxd9AbQfHl2fm04nh+d8tvNLreWpuabClvbxK81X4svJHhKV5C035dLcnXdu7pRuGFslZEwNuPh6mKoOHuAZl8kMSwTTEAJ+iaVQOlYwAsPiaCydnCi8EPWzJB70bGNDZhJUOIbb3KgxFAMFI8DUrOSwIkrTlGK5GB0uXFnhbV81h337PsyHFi9BIqKZqFy6kUsdHkfHi6Hk7RVyFkHq68+kY+jUDZMyzmapN8x4qq/BNDk/xX1lbiE0cRh3HNyymcUxFHN8hwyWSqpqnKb+/xu8+233379+uD4BKIMxxHQyaKxlY9/SSmoi2Pjxep6JAx+cwPUCBAbTATPEuvRZG1lVXkXTv6Dg8NXe68It7/7H/69f/gP/yEs+8f/+B9b4EQbi7hKf8vAqhOT4M+FxCUiq/xl/OFOFYchpMI6jDHMxq9ERVIq7AslPsktizOajqBjaQETcTvQgVNmAgC1H7CGXxZo6nrO+GiTT/718YZt587ymHHqcF4Zji5rk2tLvJ3KEgvGQQjpp1f7zBAKyvuVWYqvMg/FrWksGLpYaEYX+hSfpM9mEfv5rQ3B/c3xqtoYI6FrhI14Mljtlf++6oxeulThV7rvFTO3y/YMX14W2tVJ01BBQMdevtr3SYhSy/zqcGJ/mucHe9VoKW9hzQX0zqLdVvZi4guIXjJIZNpCXVGrgM7QYpw6gH7qSgQeM+ISiIecxgBOJYtDM8S0SS4Nl+5G3GZISiVThXxiZyE9t/kUnTdp/oDyDfiTjUEtzVNRyP0YYBTRFVVJX9zoH4d/qhcVgVdm2hOZioI85A0WGe3E/JG9pGwWIPhMm4HAG5GUZvKSvEVQ8s230rEAPcMvbK3c8Ut0gUYRt6WpCLGqn0GI/IJ1z1jwn4u0Cizv6ZefHfzbP3/28uA3fu9v/co33zkb9FI68mrC7cKxPLPInlCM/2J4NrHjTr2+eved79Vba9SS2Vr7fHAhPPHsxWuxzTk5islERQgwhmHKjpA2pYhrNiTJDnCqi2rSOhNJeX6cmd0dj2S9v/f+7r37D4XBlGVqr0TJyJhzhDDQOQZGAAOaMQMpRQeDXltfQec0oa2d7dNjyuhhUiRupurd/t27ttnwpOKhO2Te6TWHQd+mbSVBOfnGqe5uw9XgapmLkmIQtSSwzSSaA6ueQJ+dZHJ6Z6lQo+ix+hJYFVcToWD1sFD5/mt71b5iV+kt/9B4MJwMb8bD3rP58Vr74s62FPKeknKT1fiz+oLrDAo7By1o5AL/zm5faCTp7WgnZBTFVs9QQrHeYUVkamzLrJGQaNBcXNq6+9julKvrG4Djmd07Cw8ePoYM9iUTIPze93+TGW2iUXujtfzoLetxdO2M9NdytzNYqIl+NUnlnZ1HhjDs91i9nOLxv800LFwlcNXZJSAWl8RdYJ/QvgHiF5KQxlPXZ1M3fYsc5qKZG5cMrHBC0OPSIIWSpmJNAT4tJHwhnhSnOttMZIQFmaXm/ILulPhX0Dj4HR4bfDbjRSk0S3A75hFIUcolmEhh47IiRAyc214SzPHpKUo0hUKHMFlrDGuvp6VG9QBRjRYqqqhDywJsrlSHi06iMWS9UricI1fgIB0HN5RtkWVl7LZoVxkZH5QEsUXLDWnkc4lNzcyqjdkZXdjyd237/FCJlpNj4BDGMm7DWmhyMTcYpHEZXigAd04qcO+XwmYzsv+Pj2UnKYuhsNf1wmZrfq7xk5/8XD5ao9mkWPzgBz8wy3/xF3/x+eefQ2kuXIsWMipHxQWAKUgbthK1n4svJ6YrvERiPcIni4pdAb+LyWXe6Av5o1v4vORrR2FmOAZDcf7EhRTwpWlTAS4MhICxUOUbphieBUapIBJOVAE0PSuMMq7P0ohJhL54deU4QlJa1rbOVIEgJxpipRU8CLMMa9YKp6pN09XMUYYz5h2VE8N2mcskxaTEUK5m+JjJrVkeQ4k/aicbf2HjxJVEOuOARgECr6T+uGKW8X/P2+5pbBno6ooeykEy5bzj6RpedXnZ6Q5gjHnHaKL42upZhs2gW5Z7GWPwNTIvCSBReLzOZ3GZJP8GndNJyNvD/kHlXzUH4OAtEJS8Cq+MvRcYO6dXACL0LKI8UMSedDhHgX91g6bNJjoDH0ievmbnAFwiGkl5LKgcQwwYyxHMSDeDJPk/AkaHQyG5GMmVB/Gc/JxrER55PCIKT9TXDFcvQm/V68xSeVonqv57rDpQlL4VF2GaKY2kWdNZ3eC9Ou5K+cuLqo6/eTx9BgnmBJbBzElpLmBzz2jQ+8sf/rkqivD2y49+rCItr+5777233JC1LOF4sNaqbe0+ai8/5lhbaNZfH3bbW0lB5lWT3bT38uDURkNXKROH4A+P9pUurR82LNSXFyp8Qkhls6qbrvLt8nflNNDu6d2DbvbQa7cUcb9qrnCp3VH2jeg6Pu14yj4miR6CPxLKbIU6AJKyiQSwaDXo6JuD4XC53ZYHwfNsN6Bnzw+pitNTJ0+ePn/08H7/5nrBZNocYHq+l7dd8t01mynofza4FLtiNiVwVZJIE7yNszTWKiHps7zRhCQlek5lPOYmIM/VzznRag2ZHdYBnpNfjnA5y6Eu1YrC/Kzlf/VKUv7N5Y5wCAbZUebg7KC/rMqGogdzN7XuZGH+ujZzlS311EHFUoJPJeGDq5L/Bl3jEHPJEJEpaFUwu0b2ypfPu4PhzHxzfffuY/4aMQ5Hv9ulca+sLJtjYRvaIaOT9ximy7VDgxvrLfH8O/fu8lMZArOWJJOSgMO0ltuP336H4SuynE2Lbi8braVwu5o1K8omMeSuusODy7HeNiGgTt0QTrddHn1LJ1hqJBANiF8M22A2o2vZPpdjFUrYHjb15P69xOp56SKapVnZEU3CiAoZ7KpAGSmgMjQaThLyNz20k0x40FgxRRpq5mRmAScRBLdaYKm1cja+Oj3rnHXP5CHzAjKzzB1oGDUzRlGBX87gG/nnQnQdr3DkC+C/8VQVJpYctfAc2esyRGSjY1EQm3QRdEIyOiR+6VhQq8FWHcKasn+yaAzbu3l9aJ+vU74sijOtgnuxvphgBytTswynocJNo4H2mI4KDdZbNcgfL4OFbOotScc7v7aByHJrczKyLlYKpTTKbMxoQqn1b7311m//9m//8//nf2ty+RlLSD3jYf0BU8aDIZo49ILa4jMrtAORKirCJoNWWE6iwCkDRYlgWpEWpK+MHtogWeVxR9geugucQnUhh0Qcor97G0oJL8lQy5FHc7fPnJSfsNDqxGdmstzsPBLPgofC1n3VrEt5R6KTeLe3BCPi34AW6UZajPUrGSQSx69pUc9iXfH+CV9ZMcbMciXcO44jOeqZYp2KLWJ2tR3XJuinwXQnpWYEgsHDHE9NvarXj01tjMCZaeCm33n/7p1t5g4biOVuvQF1BRelIIcQQrIspQwCGnmFz8q1nYYl1trqPT58uJHFjXHbaNoc5PUxuj2LkWHrpfYznh7kAzadczEIb5wRrMnTC6CNzsQKbZd4ZEAHsoEdySX9UD+icqVDmUEzmjwXRxAkGEJ8VBDJPH7F4vxG9n8lVNxNhEQYmrMiRqun85bA05GPqk1n1XX/BKT5tbrHv3pWYJMR5WLEXq6We8rzaRkkXDDn6XYZTPla+m9q4mKO0PV0QCwNG2YcHu4ZmmtPvriovX5aX2rfTPrNxfrd7a3u6dGod63e+vKKwPDUzn3LgSFIHY6Qdt3T3qeff27Piw3lH3Z3KdNHJ4dUSFii+vTJofSLjj1Kiav5uryG6UX52IJDbHssJZ7j+X63961vfQt69M46lI6V9jI2p+iRPB2lcCCArpbDXPs3+FpQYprueXra4XWMQ6ypuK0tj67tLYKJ4zIWFEsRpM+SHNPN6dnBcHQpJH85e9qZq3VBdqVVl4TDh1HqcUJA4YnAw7rVUKNZDWxcKdMQCiHpFy0e4tYZJkGAE6iCaYQcRQnnatSkmGeJPSzOpasUdFDtoVnTA/V/F2fnW93ekYUYc3NjlaiYXYvqxLHVQmIhKWpfJhJSQk1+QWjIoSQndPG6WV9o1+emLjjSB/Oj89evnqvsKwcdrUErLIzMlKJiP8kvPv+c8BYbbrdXHj58+OLFC/aWlBXiPNUXbG1cr4/taDLsWQ+gpMhsRwhgWlp8Vr4KTQXTzxAH16VMVcXXj/dfjvs9gWkyJepffIQ8OvRYtI8hE7A3VgobQZQYZji4cwRKoWeRymG/AAdLYuNqKuF88+gt1r0h3kiqMJLoa4Adl2ChuajX1cQzz/HeZFSBoRyem7lGfdXW8IPx5f7hKbPHyqaz7tFoIHImU4OweVOgAEAq/PeJEfmETsZFqulnKK7oxH4CdpHJ6C4h/BSJ59omfol8tpRpxjMIJpvv0MlMNLVCngj1mFF1xryLEysTZhSQnz1kgb2VLySRBTD9Qbc+uVSCGSOQrK/cSavZgFqYgp7oC+SzEGLQ4xqYbG/zBbbxxi++/AyW7e7u7L169emnn/72b/+mZj/66CN+Z7OZHKnCHsKNClXoQFRkkAwfAsogMPwBdDnpFc9EO26TWAF3mQ62Doq4UtQNJmNsMcIi44LxlBnQQQZptUAqCjZTgI7jfa6X2/LWwmzYMIBZ3elKjmLbpi1H2tDPEHPeoT9FKuLcLlYznfiYI76OcuQtuHYaiFSRfEoxz0sTdSuRLVdVCL5ReFpBVQuumFY8KposcpMimMb1osjNiCHsuvS6dKdsfzkn3TgRcAtIIaf032l5FnzfdFofZtr0n9ne7uzkfKygNUiCQup10u1Do6ANp4wd9LydAp+VH9RL/N/SWsRBWTAhkcFebUiFW/sw/dgg8DEW3wy5MO/qDsWpKkdfkMwcAw3Y+dRioFQdGapnA+rMdiEZ35zn9zdyoVJrKnGkgcwnkBQ6ru70RNoJoMtD5RPipIGwwUxEccZSdfQVsrsjNAqHSclMUZ7zhvLedNaFzGGpDB3FIk0XjKRGoGNkWTDhqzciT6hZ7gg+RyNIX7SaQEMSAlOUJB1EKY1VBSEay8urViavrm3gd1lvZVmSukevX5+d7clhtx6zv38hSfBs8ATvYBSTE5999hlWyOUki+HhO4//0f/6P0Vw+maHqv3DYxP9k5/8pCiPo4PjA8Ppdk4xLgU7N1aWMSoF3q1+Mtk7q8tm+KTXOTnpng+G1O+lnQZvJA6LgyzZZB1UcpyDjGUVxsWhtLd3FMHWakmXwh8FcgwSd8a+B4Mhjry39/K73/7O8eGhePXa1vzro0FDAd5Od1rN2Nm556+tFwxIqgPGJcd2jkC1nFfafvZNg91xhS4ky7+12KLOz8yrLWINk5WIqpicz+LIkwuFgOTDolzJiDTibn+4ovAbxji5lP9xb/sDtT4Ojp7tbDR742nVBcQ5Ct+Mvz5CDTnNquUj4w5VJr81imascvMTaQAR8MXp+VH/4NWHj++8fXeufy53d7jQzOo1HBWiXl+8EmOCdL0uAXPw7PSpoP1ye2E8PO13Dy1q3j/aByvTurzSfvT48U9//mNQ+uC9D096hOs1C5WZYOEzPmDgN5djwcupS5mRi/WpBSG7m8szRbjEXFRmiCcvS6eKmgj1o7AzSKy8pkmwJhRYYl9wS8bDlM+UeAgbCqaHm8ZCkcedvApEXjwz0DdeH4L5Ri2CQicVxkYlZEHMCOowrS6mFoXBvvH996bnVz9/+qWkElVRAJNn2NZ09cW5ybjrJVbxygWRfI/hmBcoEfmEVounseJCEkHgpCskhmRDHrPWUnOpqV7lLWuqd5FiQAhQNjnrDeg8pRGSTM48BOzZMXqkStmsK+QNpaLTPbW+zTlhY+Jg18SGbwpaXM91Ts5a1n8sZxGqRhJKLVLAq2N4jc9TBXiKv/r0w833T7r7P/zRD0fnPWkPewd7zeXWs2dP/sk/+a94Al88e0oFOdh7PUcme0dwN5ZweDSydALEqNxJVO84Urn7MnivyXVOsshsMxzugZCi42cBU4xbwMEpzAigRAcpXQzUCvMpL3pjTSS1PcZc4bl5XXiUacMGfb7x93yt66WT4UDB6Pyq5fgUXfaD+/O0AZQLEbc6UH6tfgoZGAgGpx8owSvizBSjUevFtQXmFtZaJEngkF57MntOBQwVZzSRmnehML4CKTwybsKsr8IOcHszJ4XdTKN2MIptO35j2+Jopqq0hxUzUuOVSptYdgadKchoUlEhdm0wneKkUXFUvzlJz9zguXBqEPgrn4W9l++5qZpVD2UiMoICtlzOEMptbz682VHenF78fzje3Fnd/1fvrNrLZzUC5l/OS6O//C24kXFkKqqeF90h8xRBUqaIsuNJYA7ipVfVqP1aHblYDl/95NSnf6AJaGvW1OALZsEnYqtOXOQwKdpDHEmVJuGEd8VT/P4ODXpEgzeXy8+e0sdk6E740CQQi5dYqEAJJaj29193e/Ia5t7/8O2Hj+7Yhx7eLLc3sO+3Hr9Lwf+d3/hNQLcW9Ug59aPjo6ND64HUaB90O1ISFCIdXFx+97vfFSFTK2GpSelcIgWnpj4ThkgIWDJgBScSVzCEGVEOGIj4EAzMSJx9MGo0lI/IWKROPXv2jGpksPXaohKAyJu3w48bO3duZ2v5C9xnGw0oBdHDy6AiNutV1uken9rLfLIwNwwE+QCTOekvKX8UsEDTimCeTX2/vrbx88bGlvYwBD7t0zEosfRUfB+K9FPCtiRBHA421tpvfeP711fjvaN9NhuCm7HhoeTWMIcEZrJA8tJCF1HMbByopK2goXC/NfsxPuDPjQVMJ4rqqnOwECtrYcY+zy2Ix92UulzaMn0t+WjTk5vNucvrNpLaWJ9vNa62N5SdneueXSxMjy20U6RibmYk8tK/7hwcfObl29vbP/vxJ0qEPL5/H/O1ve2vfOetnS1LMMUJKOBqisdVaA9kKY66G08NVRJOk1UZRLC5mhqfvgUg8BmHwgDD++YoooxRN6M/t2ZIOUAcchf0rr6TX0CB2+TIp1vLF3xF7SWfi1fSV2tLp9bZ3LCZ7RQ1w2vGt0xWGnxMd324uayw3af3u17ILSAyU7460m51Vj79xK6Sl39du5IpZhorruUkg0qR5VSOxsSCh/JqOfnO0UIObyGiOF3Rka8BobvLolJTI0WaE1g73uPtkQTh8w6uoETpFPmVIXLWO1taWXp18IJepgKitT1ey9ilOPn0UhWYvIJOWQ0temgBkwEHV80JuEMm4DMdBpepwA3CgFz1Sg/El5TYa3i8iSmflQCgGYlgYcZRbVNIO9Avr/BIOUlz5XKZFrNXmE7pQ2kndIUDRCw5Mr6vWjDnQVDt4E35DJ9Ll6CQpl0x69W7itX1BgG0kVG8uQUupKeRVTHO9ERv2Q7BqvKDiUbSlMDAodCNjmi2IFD6U02/Xnmx64ScT1OiwVZKgc0zGpQp4hsIbGKJ3mAvQO9hlO95jELTlMmsUDOWZASlKUcmtOBZuvjVFb9Whwtfnf61f6vrf/Wz+lkjpY18uKLl6uSvPfz/oy9ljNjJXz1Kb345JJ00V0W/AyU3YquZuApDAASUyq8g404TAamrAzajAUs/fCWlkJADK/fpa1WirTp3gzsdHk9cLIfTTIruILwsQmhYoJQOVETCeeB+2vX+6+fDwWltgWdpTmhkMun+23/7rxgZi7Ulyyg5xlaW12wkv7OzsbG5vrG99isLv6LZ5LXbnuT0xGaqTz77bG9vb3VZdYAhz5W8JNwodZEWFlSTWkhtxnnZ3YbuQdMHbTC9gr0pEKTLCPv05Gx1Za1et83HlbCNrDypU6i6mlypelIEdV6bfCmEEj4SlGOCZxmbI27hHNGkHZbT9rBj0pHB4F+UIErA+8x8gPdgw82Z++QkSPpWyKDXA2rizEqvBp9OrbbEjdBas1e87Rx5wPiylIU5+uzp3d2tqbnmPMHKcSA1nSqb2jCkU0SicgvEla38krqAXm4nsxfyYuh8zOF4amxDMT5/PRqfSkiRCCSrZdxfRGnVTJV5m7kcEl0pAW8nQf27He+NLo4aVnHVZv/e3/hO0jSikcw365eP7i5vZBHRgvVTK/Xpqa3l1frM6lLtUuhnaWGlBc9O5hcshFZAXSzQZpIIG0NP5a3CLiJnvTpkxdT1D47BDsSVDCDuwep348yPRRmIIyPiCrSDtMnfKH+RG9GH/RNhhkVVTCBsxnX8jfRmceJO/qF/MJ2fvj4xSWZTE6bACVBA3XhYZmWQXbdV503ij41souWUd/o3xobpC8IX/S/oXo7c4Aoz10acKyr4S9iL5xD7qsQPZIM5HnenQ3YY1ysIWkjgupu9Szdevnzpq854O/KhrFfteLC6rnvu9KmRIJJwYzEBCbz79+9T6zSIIspa6QIB6TMXCt0O+D/sD6Bui6Ysisj2huWIuIp6E0xIpaVKXJE6wOYbtZPiR9PJZJmjKA4p8GMBjzwF9Oy57CXiIqFS+L57srdqsXu8ooir2AUODeafX37km8MLIIiT8pn5C4BdL6zKiUP/4k2GWj4TtsTWTa535jPSK1IvyECfq8RaziPVIluQnJ91wERz4ij9kuobJpRXQv/DGyKr9BZOeiRSs2KXeVMOr45oAvRy7ooG0+ZVstUleYK7iXGY17J4Of+kw8RsebtHMoryqYvp5VdH1f6bG8pF5xp3WmGAr//jw6/Vxa9Pvr6ntFERyP8fZdVX3SlC6M2Xqs8+v+p84FzJqq8Anp+ArBx+qkBaQBvYwl40/PVRxNKK60785LM6qivaAMCqqQommTP6MO5YGJA5dcFX3aMV8h06qSDvFZxy8ik6p0eS/ba21qQAjMeH83NCfQvN5aadDWZn+pLuvrj8AlUqwgVH7EDRXt3QlGWtys3ZHOedd962iMbrDl7vEzDDUZ8zTuUDO8QwskS4VFPxfn+hsfC70Jn7PeucHY/mXYdlrCtMW6REkrDBklhcQ+7sNrruVEQxsYQCMR1woHxw4FpCAWk6CFgcpgW1JCxGqc26iwYrkh1ZcJPP7RzE3FiirbStwJ6003JYR9IHZxYVCS5cUhBIoIvfDCF879e+r6v/1X/1f1+xNclR//Dg5VKztrbSXFtpLdUXxNbJLtudT98ojcHwFSThGZKmSqUDUsl12RTDeyVsRBqIKduGN11XqcjWOanSrQPEN5JSFQhv9TrokKWyIdlpOX6gdXb8BVete2BCo9UU6d9o48g3yvfhTbtbjUd3lwyPVOYfv7066Q+eTk2fzS5EcQmXiklBeRZQpO/aMSM7YYTJ8Hb5iw8kM0UHgzg5D+OIpzq35Er4iP4E3CH/MMN4OXFaNF8AG5dPJa4KokNHWBem5v2GMTNPieLkfnzvPisrOIOnF4mi/KBROzdBBBVRQKqRE+BZOv+GyPzjq6NChjCbQgi+OorwCl3kXDvFhUZOeFF17vHq1+oGX8HZe6GTE/f4hFckFg+Bw9uZX4r9umiC3FC9+uvHtQA/vYsYc6dPz2rNuFyvaNwjZr+6YXZ2oJ179+65IYVN/KYJvQLLcPpiV3Fqw5ZiHhQRYC6jIkgNEHiEDUQSsy2WW/XJispaK8sLwvEJgxxBvfh2ElXKW6CRfpRZAz2vfCOzKvnkhwgY1ypFpBINmcPSvb/y8dWVYj4zUkrXc5eBlJujy+R1LkUhzMR7dX6mwxVG4D1SKop80mHrw5K2niSLiKsgYQwsYwSUPObvrxwaFnXUMS37pyBehuYITCWBlWqEXk42YVuuh02UCTDTTGDFOs0NZE4j5fi6+dJM3udy3lwOz3598vWd/+MT9/y7L/67rv+P7/z/+ko1U6UPZiVH1Z/MdXVe2ZHla7lURmWQX1/JfeVrBRYoD+/BswIppgN3nVMDfVLlvpZVTlyBw1/fXxjrGxEVdCwwRDau+/QWVzzlRabCxZubePydaMF1+oyTMGpUND3NJHr+7PPB4Ghttf7Nbz7ksmMKnE+ORsPLl6+e3d19hybO4kEyfBBWiHa6h7VRzf4gJplGLaVaTHJ7a+Pk4IBOA7csbSFjGq324fHJKMX6yD9VarNsR0KynkAUy3gARDcoOtgJtErHIsZstWOvo5Pd3VXszMBXLFtZUMnigiTDI4guINJzI8XCiCsCDHegjhbwVhzLVhvB0ExCFH3Mq/A8UjFkgqTFxja14I0K52Af5kmbam40FhegNJc3cTXq9zeWVx7dv+8R1Rub7aZUMfXw2isN0bPbucW9o+7RyeXr48vbJ4OZqQNqQLM+pUBP0sSW1iNI6vOENXmmUJw6UPhuu7WuyFdq3Jbp1sFwT2lQiXoIlikWZVMS80hLJbqu1pYbMQ0vOxa8cc0LnlyMZgTYFlvLlI7I36vp0/0X3mVlb3reXrNU8kTNx9spS/s9C/zNtqrGe9e3pyLRWk5WrdC7yP2MGkgV+wjzwkVMH6DwcImrxgxjgdHlCS3efT6nIL+/iPx85pE8nkhE1OdwNlwDD8SWcFX8LBgW+YXkK8KpxBvvga3CWJkLd+7dt0KpV/bYpY+NBqyR6CWUFa2xheE1vw5UyQQVk8hJmi6cxCfA+TSb1Wd1AnMK5mf/EUAuFBRQO68ecVt6VYjFFTcgBDdXwQ4T7SK3Km+EOx2+ArI2q24QRVUfqrf7qUJsv0JRj5vfKgmQQ9Kz+j630IzSyttugueVo5Q70+adCmtdrLfdpDcQtiSiREFwblzsWaUqzJlIqDpRybeYSq1XtrUkFNUhLFPgMZBAy4RSaiNrC+JhM4uxiTVCmJEEVqZrrQw/c2Pgvuo0yVG9WQcyZ+mFn4irQKr0ISeEWD7/Ctx9dZS7o8uUn4rZElUhwPVrvIblQaKyHFSk4JUxQTz8hxUomQpvYWunny4mUTxJFkZbDHP/0HLpNmmokrha99WrHZr1C2jlR7Z2keeKNupANW3uQeHm3p3wx4h9hUnOzZavRlrdmabKU7n4FWZUr/CTE4ef3OzElf/JRwUcj/97t1Og+u94779nf+Joqo6qwWpaC0m4rHsOmI0qHJXYcEIsVQdm7XAFL3Yb7K9uc6fDFc7z6txnBduv26yG7GJ1HTyD5GEwUSr96rVuLrcpSttURlYbKmdZMtI76+3tvbJI/9237j9+a+t3f/s7tQYdc2gDh7OT0dOnx8+efnZ8fL69+YBOaRMNEQfJbhLa5X4zg+BTo7Yw6nV//NOfDLvdD957X8IFZ06/L4wl39kfFWYcBQ+aQmqJcTkSVzZGJL3UWnZCPCBdQ1MWwadv8F+CdDUiGiiBpMt8jMXKz9osB9ABl6HRYaF9wWccmBJgDa8hh4tRscJGqXnR0oqqmgSfqUUr8zwpG8ON4dqe5wK61Q0/83euLLUtlkYgqvKIN+hSRzSi1X71eh97bTTbCoffe/juWe8nQItF53Pq1jrbUV92ynj84iUW53F8TtHCxqLyTilxzt2qIM/KcnNFoTqZ0QRO1IiF7Z1HBmjsWfAtICYKgglRNSkcc25zk+bCehg5teQgDMFNYq0HxrfnRBn5XLM/0PTo4OAFvzzYjl6nNu5SszEYnFye7zcal7ZF5uIDiCrFKRmMvDYOoElnI9xzbq4S0MfNil2V6ElhgFciWG5IPMXNdNKCVHFf5WlHAivlk0ZctG1fcs8b9lWRuZ/cF+W3vbLeaq48//ylpbq4ttxSyGM2KxdU5qeYj5yy0YUpPuUzbyiHGxxg49MFLyoX0gcXYQh4Ooc8HnTiBtIIgTicGyFcdO6nrPMtPmpP+UpKVYdmPe5wAuA+3eYrcaVxhyve5RP7c1H/uazdADmduBnXz9bkjJ4YXnmd+x1ejRjJZidSJCmkYYVgU8E6scFYGLqYLblEYPgxq+Jy9lnggYiI4g/OILIhiMVFLKr4LVlXsY7IqkyPAycA8TDy6rsXFa+Xy56OLRcpEIEc+Lkr8+pbAPo1G6zYWhLucvg1x5v2Io3pOfkhy0gqt1phem6oXuuzut9JsQWDY7qNbov3j51I61EWhhsCgkXfoYFyMjIyqcnYWdVCWqneWz5BrPQmHfIuksQVueRGb270ynQCtT7xxvoAa0NOOCuAi7fTbdRI3yr554oG3Zmfyw0aBE1XKliU17nr3+P4n/DIv0frf+3WrybGxTKhRlJ+rz5/eWuUBUc11ILBMNIYC2a+sUoryeQT3sPUiuE6rw78JehfhJNPz/6ydSxvMVqe1rzhq5e4EKiaJXcGL95QqznOGQ6SGQyoszDLiUMLXlcm1hLX/uv/N3d/9rTZdZ0HnjkhkTOAxAySIAgSpCRKpKzJllym5ZbVZbcsd990VISjoqP7osvtP8Yd0WFfV1SFb9sOD2GXwy5Zbrs00RZFS+IgjgAxA5lAJnKegP4963nfnW9+mQkRsjxELST2t/baa9prT+fss99zXn/NGDt+zGlaP8Ha/8rL337g6L4nnvQxqUMvPP/5X/iFR/4//+//+Tvf/trLP3jDoPCJBN+AP37i0FPPPO094sx5onDdGfUDh+yx2MVza37h8iVfSHIQw+rrV0wi4i7CfcmZ987mu0HTraSIvpRmsXnkYR87P37CGwe9Gu/YsanFByJ04cLbE2qd5H1b/LqZn0kJnYmAFrPPBHafjUG7/5axd8+d0wczqK/45d+V7Ga5tPSCP4cVXY3mMJqPdui71v6sT26SDGrXevqq0apjywqwf6YAswxbJz/2tLPbmsLZQgutdzu7y/T1B58Gd6Tbq3Se+9SnP3Eh7zJQmsY3SvxLmG9dvmYtz2VcTp7duPmePbnL2eLwuy3nLDzaP/DBu7PV5ppdNHzb8Pcd+hCKY97me+zwI4+ceughq9S+o16Z7smV/VTnG33ld7+vg+XrSO+ePecMtDMJRlleEn8wH3BR5P7RIuc3W16I8c7b5x0a8/7Yc2ffy/v9jjhf6sctXp9p8HIj17ez52IdNF8ZvIgJmn+u6K1qg3fFgqY7mb62/4KDrHczNnQ3zWshyvWBaLhUj57plNt7q7kOxuWzqH4BdvKpZ551p+ilEX6xqZV0Zp+QNu9r4kzi2X7IA0Wnvbg9XT0mE+CZQDQrzjUoNg7NxGVZ0Jye9NpVj5MZJnHdW5LhzaLQUyl6PJqC02nh1PqOQhgaxoieqcgi6sfjrpb0UlmXNXpTOtS8tc6qg6H83MZDkN0onysStZv5P997M9LRacCmRfze69AbZzy1400ctT8r9eyfc76opuZOherx1uvcEXhTkYs/7wa+mTdXZp9QqF3DuCGG5bMgWausAZv1xvWFbRbjiDtprTSXFSEt7ss0YfNo0oRhdFoaNJRLu7xeFLStIaIjdQEifM0OJXt9cTvnsYn3IsK5cEg201GiNV2BEHm8fMPvRxO9+ctCm0MW7gjz4lfsPrs2F0/OnnbdizJdNaN0czXEse2t1abtp26d/8YSAc/L0ktECIyXytMXxdOTBiA7PUwHO+yxaSrbzjqVpSf5gWLVLG00VvbDkY/E/OGqPsQ0N+8pqzcPveltFisSx4AAboLgeclckVkezN1mIqlh0GXJTF02/NhAERrQAQrtazjlKfoEEKVWK1K8LUIWUboYZKuNlCGgyN6Q+Tqnpfd5K+Bbb775BiLfnnnmiU8999SVa+94l8Dl4/tefu21s2+f/djHPnfs+AMGyKGD3ovi0ydXfIzK96tefPlFP/j3WfpTJ0+a8Z5/7lNvnzljbrYNonbAT4JMqca2heSVV876ic7+I/N9Ez+4NMzsAR32NvdjeEwKBq2fS/FTkIz/DMUHTBlnvQmJbyhqJ82LK0+eNFOYCAB+dCG1Y6OpnvnYx1x9etFzxrtAORXoKtOzrjPvSm3zuZW8fNkbHOxEpMPRpMcKiwlHlBwiQxYZ7a69BMRLxV78/ncfP/3oT37xi44++wjJEycfPXvuXYcVTz708LxF8yFb3+4SXKlaFDRfVWXs3rr5pK/QzqfpnNBl01qQmcYLkVzmziaEEeXXTH5ShltTv/H2226TDp2z4Wmbxw/I9vlmiHrleJNDiTnNuO/hh/yO1XR0y1rmDUGnH3nIBPPoow87ne+FC862HfXipuvvP//8530d571Ll08//onz589+53svP3z6mEvxHGK8le91eXukOSnHnOe5Qea3GZWpvAHt+n5mN7iJYwCDwqxM5jHh9c+Gk0KUTHjx9PYo3rkOdk2qLFdOWchctee2jPj+C5euHTt06pHTj793wU7gRY0m7D4dacbXFjhdBrtUdlLHwOCD5hZhkNl78y3guNYOv9vt6Qei2rGDh7YF+kxXPjykomJGuh5btXqUtUcH87yKlC7qhun111+nUBG26aK5KOeMlJStPzvS1Moy6jdzmIkQH1/cBfn2Vc5SqKM+bKFiGsK0LQad7dB3X3yXpBsp/tkcS5pTB+6oUote12TbNmuMyTM33gJuKRQBFdSoIpuJISfqUzVTuz/TLJrA9ZeDQJk/6lOuKzY3WHMQffyMA7N6x8hBrzDP0sJWpv4sLlGWITMOJGj8GUE6dSWV1b5C6qefuuy8oTMuKp2la3aLs1jlIsary3KPldWWw3knj1mBEsPXdYu+ldN72c3PfqataDOIVZCetFnqEWf8ZiJV5kW8nLuJWY9NSX7BbTRqqmnjeDmdx05uBF1W+3FklMY6x/N159xxeaXg3G5XP1mti18V0gwDslpX+iHQIJdh4+riVjawKe1svu0oG67WTqabFRCUqbGqBvFvLoJ4yDEgMHOxM0wZb9NIXM2lzUCWmRwz0Uf1Qi0lIDq0zmdNQunKZC4WtE7iiJgBzkYSwiJoKNjFCW/QakcpK4jjV4Imq6gD0kVXKVWlCB3wBN0XOqS0MWfxcFrdnb/JF8OLL33H1SI9DkQ9dvqwvalr1y+9+dZrz37qye+/+G1f4/0Lf/4pu4JeU63/P/nkY84kfec7337q6ccu5Z7p/fzs94Mb7757xo3Riy9+3zBzcv1r3/jG46fnCMYpr1248taZtxxDP//eOVe2bifMrdxQfT0W4vmWW0ZZPnNe4viDYaW+ly/fMIwvX7ooYjz0Uz/VPHPmnc985jMQn1Xkuc+OeEl5J8/nPvWsZc97wBxE1Am9bdDDMuPTDHL6kcc20TM28mNAWyvi4eVe73mKkJnL0yCDPzNCdiyZc5l8/siDn//sj/zYj37utZdfMYDeOXNeE/np1bHDD7zz1luerrl50fF/8MqrPDeijhzLro73s+r2nrJ7O67zCt7CQ6s+59dO+XGc33od8/o7l3oZdWqcq2KbRH4Me/D9z/ykL29d8FjNfJ3uZ4fTO1JtJb2fVwBfO+9HSre+/7KpPAPywP7LNrk8JNMxLWMPPXLArO4Kx1usTvji/Iljflx79OjhTzzjUsCtyZOOfnDAQUUT7D7HWa5c8LzRSw6vXb/oB0aMeeDoBsnIzSvevRk15zDcDJuo7Brk6lwHnEGSnSr/ckeV6cSfzU9u4nPGSu4wM8GomRMAvi7tS9M0uMubH1HqxZ7A+Qjv9ffPP3bytC8AvPjy655h+RbMlXezh+x5D2O6SeLjMc1cPbixeOZjT5nlWXEBBNzNC06HlY4Nx6nPC6zGLd3QU2Ei+phUYPnX+QdboR0Sjk4D0/2VlasiFMPEhh7xjujd8csiTuBWrGOw450UQ7oQYprPY7Ppw+4+vOTJbaKnuX4H7XWRGPzazfbuYyeOH/r6d94W1pl1OZkHOblhSsznRmeO+WrrsAihSVx0s4m3gTTD7N25WwpkgjCvbaYV+TDQ2DlvZ3LMMuO/meV0LZpNy+yauT2NGl3sbe6ihh4n66e0QP1chcz5+gPWFc3t3I29+Pe9tzob5Tk5OP5YrbJX+YGRkWXJ4GPDzZwaeaClxvtNK26MLLZqpAtsXJ6RmfpmKonH6Xwp3QIb2Y/cZDPAtKUc9duFJrWx/ZK+HGKGOkQbe22Lp8b6ih10qWbThxRtdd/jL/33oPJy1N5dRC2FS+eGTcOMHi6BXanMR618i3ZKo2T+8RwMvl3YNk5t7lHGycRh24OPQHRiw2O6cj5LautAx0U3EgAReKExbMooPXAVAfDWZRfhv2yhPHA1xQkgsuVpTXc5MRg5HGD6ijdtDpg0+OaK3k9uzfjGo/7ZrwABAABJREFUy5NPPmEiePThB8+dOyM+li6TwiOnf/SzP/ojZ9+++P0Xv3fx0vknnjh84eK7Fy9c/7//P/5vb7756r/89V/7xLOnfRkrv1q5ft2P9l30XDh3wQtqbbW54Yhj+S3P+6xevHxZEx7wwVUfidA15trFPNSxLZ3aZ82u861Rq4O4qomTLId1JLMVujibF9ROkQUH7pChE3TvH3EE42Q2oux9+6CD3YYs2FkGrvmVmd/Z2PH3nWPfaM61bC6n3HqJiXtHK5MZ1v3T8WNHHW586NQJC4Da6RWO7LvGyyGNnBB+z1d0TaqnH3rYLqS3vSL31lAHMEH7feuNqxfyLTgHlzLyMmXn9X32tQ7np0upbL6EY+h6RZoT/7RePWxteeDww1m7cyV08YLnE+Y7I/0Qt/hvjlJfa8tNH+a6ZElm+opznuev3nrnNY9n9p166K0bV727wS+07CLue/edr3mZ7dOPm3z2+bj6z/yZx/7bX/pzWkLXMNhvOQJ67aK1VE/U53Py3vEK0+P7zr843KhO4iZEHojQ4DUW3mZrLjmUfS2+a7GsTpo6oz4zTbLOYDvOYqGJWjNLdrasggSzfWcROuzEjgcRvsn71Mc/dfjwiQ/2v+OrxO9NF80BB2ozu+beaLZm7bY9cOR4nicB1RdhYD3QKO0h6InndmuhOHEMGrdIpyBFRoTw0gNkyWJQOvu9OZckzjqJ3oWov0GIdKHCTCciETdP7rqsVfCujoJAhG+MGm1S7ikFIhZX51LSqFSq0+Jnjg9R/uZ5yk2fdX7ejJBHMFYmVw3oeamXn3vQpROITXjn0jvRGhh5CtIIadHMxWmYNIMFKHcqmWJAOED8ySd5pPP4KjSlDBk83irrFng0WAaqinIybqiz8hhVGVhjpWsVmwpMmH6IiA8iZZib2XMf3Zm9TAOe8Rp30c9c9iacDSUai37iMc9o06SK4rRFbTPTWaum10VXbiCm4ceLOA/EpTN2bpzyQ65cLmqz8ORGPO0RxellAQPNjalnWUZFmuEB3xHPQtUUw2j9009ortvLecg9zGXFVLKprAwetdhQuzSGNK2S4bIBdTEfSXVNJLcF6y7qQQ+CvShiFh6dFV3/Xppn6tkoWb7VHDtKy7BcjXPxvCnyBvJDFXN62j3dZEv2a5zNJVQoWmGaOTNczv7lxyLI71579+w7Zxjlm+tBPyB1CMqwtDLZuDAgzV9nz5374p/5mW9/9/cPPJgX1urGn3z2s9/61is2Q7zx1gaM3/w+9dQTTz/9uJ51+vHT2t2M/+1vfcebYX0jNT+b8D2OzCOXtLX3qnkDTX6qv98xuYx2TuceJv+b+vOUzsXv0aO+RZmJQxAmYglMR5MYHjhwHLOd5rlId/+63zW1Kpgm+MyxN954jS0XwmYQzzxOP/q4wHgXq0sFI86FYz7KNrsA+ZTRoYMn546zwX/4oUfiZ94f580bF4hfuuD7iJft15l0zp+/fv7smddffc0PnD/3wmefe+6T79/ylqMLJ44ef/6Fzzqg8Z0XXzpz9ryGsHa+8+75tuT47OPF1/yi+eETXiMiSPbxHjxyXHp4n6+G+QG1o+rO2+XBr9sMxSbl9LdTR064taIndzvzQo5jx/MGZIPOIzezTWfJBsp22c0bjzmI6JpZFRy5/KzXKLzvF9wXDTn18opbn2B/5Am7G5d/8LrfpbmQ3ff6Gyp3BMOhQ2kG36zyHsjcU+VuaX5panUxMc7l6LSE6Ud7mEXMINLMO1Y11cjRs9mwMT9w2oIsvqbRaM0zd/1P87kzDmde0WUWMWF4S4A3HF697q3upx7/xKlHHj138YpvD+U5d+bStK+Q0sKU2NkQ13NE9cFjfgUXUH10obBZp5k4KU3HNwGtUTyjw/MId2zWRwo5pR/AqdLZMANzsDmLQu+5oMEL5HQ2SupDmxKzLt3Ry65lxj10bqMve4/zW+JMOZ40wdxL8ZDDWsEo81jID8sNVmcJ8u1oP8XTJ93Kz+2X0SdsOmpel3jipF86nKKL51ka5m5G13HvlWeLudPKyTevBs2qb371ZdDcss1Vj6bJ5lvnX8xZycAEZab7aMx0ZtLTNgMjmElbt8CYiWXWgVxoAHOMyKS1cbtqyDYdyUw82UK0VtkezI7xlM9U5arAWpNbvBw9z0KFTJUXIMdA0DDrX+JowfMLdR4pcQ9m+zB3X/mX27ssdOMgkdnRdb8/nw6MjihJT+LTZCVRklkjSEq3iOZka2qQdGQy1yPi1/CddyqlG63OgVLBpZDOaoYU9mS35PwltVJsVcKZ8sgWZJUuaFZR2bzGQFGJ+OZfSsKf0ZbLHJ1Xohag06he0ayO6GjApNlhmN/p5pUwSp1LVdMEefam8YgJ9+phVG0hHFMdVuoMpMSJYVC8q3QxoywGeHlWqqiAf6M2Z3Ny32ZEGUi9FG3WvPz22zlx7jNITz31lHkfj2/96LcOwb7w2R//7kvfcKzBwvfqa146fOCxx0/P9q2l7ug/+Af/X99TcC/y8osvuQX59Kees4Abp+fffdfDDs+83vAaVlOSfqJ19u/30jo3GHk25Jv3jjW5ybYv7/rcj3q9/n3iaWymn8wx9GnbTd/Tu9BVR9VUQRj98Mhpw1ZfUYFO5uDetWFm2Xc0NwS5Kcnl2qGr7/sKe3Zg1N1QEh0IoOTNt97I9Il+cL+fXB886Ft9JyyO3/zG1y5cvvrpTz77yEMPmfTfesNT3ptvnnnbAUjdnDNmIvePHPMyWdrFNgfjZ1ODWg2LzcuembfwZEZzUuO9CyzlgbYD7DnpZOfeNbXDzELjjiEnAm9cdEwi67rmNeWwOqMtX2+xz+Xc39FTJ7SvoctndwgXL5y3AIuw5da3GX2Pzl3d2TN+6H3YF3/OnT2H+PBDpxx79/a/k8d9T/nqj/3oU8eOPbZvn5OL3g95yZFo1w1pqpw+82b0rDx2AvNDYG+hy29SPelzs5WVzF6WKd+EK7Tz/T/Hu3IJnXu/Wa28mtxHgHKQxTKcjUSfCaUhNxZuaVxKezWRczCq4C73/MXLT3/mz9j4OfvO+bPnLvoksZ7mQLdFaHqyNTHt8uAh5ySOW+1ybvSEt6Tk0AQGndamq16R8E6brlGjM5QibkC3wT/tknGNUoaOFEWYUTBox+lFOUfvSggFP4rUAsMxF0ma3mKGQcyzps5PiSnhCTCm8LsIsxRxLG9/PnSIWved3Ms6svaqZpwaEejPPPNMrJy/boSnyXmzOSyXlzFk6coWnCuc+Ormw3owB16zqWVqz7KgD1GU32i4E9n+aprtEFHJdAXKLRotqfMsFNidObJNHC0gE+F4ZuXR3lk1LS36cx4mKc1S6tIoIcvKZ/8k042ElLBLwfhDX++4+Brra7nKRQJZ79/T27IoGuUu3KyLJsJMz17Un2VMjcZEnpVmbXQPtvEtNmJq5k2eJ2/WZDAktcAnFdOW8NBULm3WNCLCcKMILkpTcRcC2Wa56QfpA/gLOCHjWhRgzp8Si/1HpLRV4bJFWYlMQtDDMEZL9yzKvK52YCaCPGeCu5mAuDsxgyiXwmcA5Eaq2XyAZBYYtfb1QxWlc9tPslq0dLpNkmbxwDts6ifBxGSgbHFyB5QOy4ZnU4stQ5UMTy6toiGb29nYcrDWGPOLJV5pjR6xhfDfBGB+n8bZ59MH164ePHP2wtMPnvaLjk98/GkT9K0L1994/exPfP7PfekXf+LCe57fXHrllR94pmw0CslXf+93n3v+k9777gCEkWzim5XDV7CPZJTpElYJn+a1kOe9iS4L0/9z/5fwZymaYOY5lkBxOaMi7ud/1ee/CBpudmLGyRzBP3Qt22LcVlmIqcF0YO5QlC/A5t7hqLsHz6PSlIePxqVcrZkI3P4ZYNNRp0cYAax45DumM45l22TU0nn25k3r8Wee/7QbC2fZv/VH33QE0acn//Ab33rtjTef8O3b558/ceL6X/uVv37i1EmywmKVYsd0b+a6fCHHPuhxBtkZkPOX86aGD659cOKUW0af+fPWQJfNaSR18XqmowdPOiw5oTB6s6AKlaKHHz7lAtudtcP5WWdneXvwgwdPPnLKjZ1fij7y2COHLlmtL+4/+ICP/p1584wDC888+0nMb7/1xsnjj73wuc97S9O3vvG/PfH0x2984O102bhzWtO85UcG5gV+ZO9kfg5smrI3adb0DNHbLr0Iyd2qKHlvqgsaU7Q5ajaoZpjP/o2BZSfx8FH18hZQlz7qREgf9G1eb1TKC5Fdzd70EihvW/Ki/U+cfO7AodOPPe0pnkfbtovP5WcPH7hHtzBM750ukfM4ASuBLmGJcr+EQWR0ABsDNuKsIkK0Gm6No+0spDCXO2tM4Sx/EQOBQvohrpj0SUsRWz1koYisUlacpLBRKciI+lImTIMsXzW6duuBHJmmUGfT7O1C3HNmFd2IAzpznur3UbfLLmeLjhxxgNUOhJ94OxZw6D07vQHK9dQc73aq3q2VWxhZkdJImdQ9YNcxTB8+S5d7kPRuPdgjSYNF7cxXSk1CmYd8oyUXv7lOVw2jUsNrPjN70tnZs/IAirP5JwzKWdWiM1wVoRiZXSzVOxeDiSlNWakynmbImtssi1aPLKi5VA19Vo7MWaHP3CWN9fQx3IZ3XLdkze1dnnWZNbpbxBbX6VdN65W//qcVPcGXZCbZgOhnyMy9XZer1Gog1kdlWTvA4MKjvaVwlZ0fDLiayg9HNKQ6g4rcM43F+8MqhfBCqqaQJbF8K2UV4Sy4aFEUMdf6W4DzGXRl6uJkhCg/ar986NIuwxWae9Ys3sBVZJ1RNE2aVQcw1LDQ3yxKqy8tETK8G34M6NK7YSsYtRUpIlW1JdUiFHREUtuKZ7e9I9PAO3/+nAr64ZRbCrzuVxx+NZPYwrly49ZLL7/hZQ7HTjx647W3X/ixF/Z9cOSll39gJf75n/+pq1dwHXn55VfNknbhTpx8wPTsxIO9OHv8jjYwOz9OuhSvXBx7W6h2v5HPK7i7svDwyoDlniIRE8BUIVQ9LzDrSBDj1ZV1rgw/+GCWoktOG3jzG0Gv7+sdoQpqL9MHoEGbmXt8vOHw0YMeRlkfjX1/Z+htQjeR0c2pTVfUUImtrEGWnzrFk8cff8zdiY1B5zWe37/vtTcc9X/FQvUTP/nTJqN3ndO/ds1vWt8/8PbJRx41K//Sf/uLvgQhCGSd9KMQYtYzp3v4lK9gOHN44AMvdDW7OubhmYd31lpZNcqVSxfc+ljS9t20tFw4eMCTtv7gIM+D1U4TvvLam0a7JyZiQo/OKCxWxMdOP+Z2Vld96N1zhrAh5orfcuxEuNj68aldGS1iyXzn3Ls+Nvno448dPX7s/KWLxw7vd8xQWLIKubtyBL8TDfZszJjRHGScQWJBO+RzYPg0U7raTENC52o1H6YwkToswHSeEN26kgtse4uaL/PLfsd8jhw58ewzTx0+6vcPvgfgrKQvpQHPtfOb3NfeeOu9i9d8WcMe11zb5pzn5QvZ4qPI/yIp1Wmt9EaoS222UOB6hfgInaI04oDeks4z2Q5A5PSnGRToxNNMs0rpNqYmDCh8st2oQ7a/UZ7rjQG9nRVZy1qcSlDThwPm11kOKWyXliKLPw05QerHBHOz5bdkPnhYV5jL3DFPtQmqhfO0lqte12fDTb9005hfzuaWtsfkcvgixvPTWX8tX+m46iVEjo16XGQ2ShsdPmoZ0nwu2BIqlc5d+r4rl/MzWwtF3h2RBWsGxv73r/s9QeYod0NZfDih6SRm8c6uHMg4maxUKLNgzEKV5mbCgpKA5FavPsoOPTt7xaqKZMBalbcrHrHb4LJkv7cM+Jc32tuQsKbPDdOMz7g7947CWg0JzbTlKLoj4djuLEJA1QGmERoXRwttQCNJ2jOQtYdrbC2tT2iVNML2CdYdZraZCm5zt/+Sup35UIwGzO3ikPhvjhyI8oPeh5OFimNdnPRRWXOfFG6oAAgJdtwb4CdRDVKZcTKp+yrXipAaosEjCTGWdUc9P+WhY641pgldksg3ehqcqlhpa079Ympm7jERPZHfQJDSEfcAnXVsOTMO57eNDjqYE/HjMd4oMTbeeefWs89+0haf2dPeRp5WeGWJY5+Hj/zg5dd//POfu37+pvecPfHkJ8662Xrm4y+9+Mb3vvc9t1b2hU4/8sTx46d+7mf+rJ+j7D943VOwZz9+9uQpP0m5+uorb1y46MSg+THHU+l0oZgzwz5594DvOSak6qSOitRmmiL36Jky0o/DAJk+N38d1nLtfeuGka8uLmyvzb1gq6OyU+ucDKbBnAVRzU5A2tHjWRH0xTxTYUKXWhrapBJMS7lD5rE4MBsXyWLQPbwmLq9rOrDP0iJuH//EJ/7S/+EvezvHv/71f/MHX/+68cKv/d6hrsLXbvxP//Pf+5Ef+RHHVZyn97Nf4o+eftjt1POffsES9fiNR8Xcu6xIcU/T2Jx05+Ki1TaHU/3Wrdkg3f/W2TOe9/iGEqkL7106f+G9OR5y+dU3Xp/naznjnanE+cWcyncufd8Tpx+5cPnia6++Y8LRA8+8/d6NG9+3monYhXPnrTrPPP2k1cjKe/L4B3/zb/5fHEC5+O55S7hFnBkX07qwJ2XtthOWPCbwwUbLj2NoXgDgBz9zybE/3wu+pmUPXPXGftcgudmyR5VvnYqw2ri9zqc0vU9LSxw55tWOn3z+hWc/+dyhB47aQjTwDhw64nt4b585e+4NP569lusBi/qBHF7XH+wl6hwmERXkCRCx6ck5qKVpjFkfl9K+agrcWikVbU2P2FHQgYBZtgMZRRY/SlcpFAqNC6mOsaYClxraGifTDp3CdTk7Ez5unKgD82l/mUBXfrudmYG2cTbxtzLRtjzhAIrrGB3AciU+1PBhnM2YxUBKY/meyKGz7101Rah1/umucywwS1K6btaPuY9x/WA6t3Lli2G52dJp80kNpxW8I9pG88HL3l/UxcUd2sy8tja009qsq4msSmNr4pNdyqiaIapIj+gJcZTWPUUpN07VN6xaKSEJXRqhuD754MYyt9d0PNLdCx077t/ziHNszfJ4kwe5YctEwUAGL8gGpyOtucIYP3JdhTqF9M/KFj4Nma3QmRRSFXtcjvTMdCC1gYOn7RTv1/Oe2b4QGeCDDTp6rupNXnMVY2rnZGq46j64JAaybE/N70xjBsPELXEZnEgnpspqe+JVpscUQdSfdAgAt/ut8mrh1zw+RO1trVLPg1G0tht0v//PHZXv5BrAc+UumK1dVdVEA7fMlYFFnQ9bKj69EALQqao/smUgC/imaJWW2BQ/pOJNq2HxlGFXrSLZamMFeHTh2LpRbe6mRLiktjkd+H7hhRceOf3Qt771zdyy+NjIgydu3rrsqt0e+2/+5lc8237x+697UvXxjz/re5wXzl+2C/K1Pzjv2e7hQ3/khaS//VtfPnnKS+r2eUfeiZN+6njCe8qffuYTDxw66lrbKemXXzXhvzILpN0SmycBHW3cS73qpwjwuY5NB2+Fkqr9vJzmggbwhV+cuaAznfnPK/ynddTR/Ykic5ZqPnTyOPpMBzc8l8+UdPXqsZOnDHc7D4yC6d/BM2VMxPRtaqdbxaj+7rbn7Lvv/KVf/JLlx5cdnDn2/ol/+ev/+sKFy6++9obnLx40+dTkJz71yZOnHv72V77iN9EXr1w+9d0TTz/5pO1kDjx22muWPrj1v/7ayYf8NOqxEw/neohvNpRFz/u8fUDr6IPHTh3346mHDj/uot4/W1EOvHv/QjZa5qS9157mbYoOmTrS4jbMjOUGy8rtjkKdbvpA4uUrr7/1unMiXD//3rs2rFTN91xEw8Wq/bx33jnjJ1ynH3vConbtxs1X3jjjS/Y3rl3cf/3yg67IM3Hb8JtemlR3t/64wMjdkv29nAa0oDoxc22fn6v64qAXKbhqsSs4L3UTPOHPAQa3z+9/4Ms1vgL99FPPfMx5JLXwFq4Hjp7+/g9ePn/OxqgPOB50nPKds+e8E8Q1wy2PS2/ecIdqnvHtoTde9wIkX9/MY6TpAbk/P+QHtHMFacbvcqUIIp4dIDE/Cwy6/qAnSFtaPWn0AfHXJdIR57VklFgnMKOjiJjlyp62LNzH1TBH5KqP7BzNFLbeWTNjs1dbFNPPB0Oeh1I4CtOASHc14kOWBht78+M7TzH9Qss5oqPZLXj5pR/83u/+7qGzl3LPsQf4vvV/UzbbXbjMoyisWMBYyh6W+VN+NFgmMsyAbJaNUGdpKXUWKiTlnfo3m3phG8hd3AbdxG+Ty7OkwiyVLUwXGtOd2zc+j+08eBuIuo17KD7Le9MesRXfVX9/uB8GJX4rxUcPCLSK0WzWmvnToI3huZ/MXKFSPLeqieDoVz1Xumbzze2FpkVvq2tLjdGq6yHz6Jr7+cdmU9xCtvkXO24/Q8ke7niey4NELJNX1mt2U8gPdx92JcxuKYwz2ZW1IjGfIDGi4nZ70HjLMamqp/bjilKI3qOv95JHP567KB3a4SJPudO7THSCYLaVjoTt3vx8SnzUrpf/lIPbfmY12gSEc0Kae/P0mVzfwXknyv1tVlsnrs5qzb3eW9CmMq6xiQB6wpk96cTFME1LzVae2HuWzRkTyAxFNc5sO2LpnMm6SvHeuNmOYNo4hL975r3vfvc7tjswy7pIbGQcmvDhbQNVKz35xMfMZfMhEN8Fdrf4vleZPf7E83RywGz/ne+8wyOffn/2kz/6zFO3fBT44nk/6LzsdyYH3/Ih8DSQCdorKTKD+E6RV0z7hrk3tj/mi1v5UpR+4um6rS+3FD6HZLrgjy0W/psUGHJpjAJc0vNQfFwHvvTiSzaKPNgmrcO41Pe1rueee/bo8SMP+c6k73gePHjBo5oELfcrWta84Ii6RnXRfOuSj5X4bVl+U2ztsTPlcmliroM7UJCQi/P8y1Sl6TUNqkAJ/Ds+JXLp8ptvv/PGW2fF3DT5tW99T13Ov/ue2wiHIV57/fWf+OIXnFL5yle+4s09Tz75uMdLeZm3UxK+M3/ED3KzKe8a+IIXHZy/YlypKeCDphYrJyWTWKXmxb55fW06a06yqUK/J6ffHrmRZ/X7HzzxyMMnnnz0pDHiicb0pfwm2U+iRWC6azwf51NNIWULW3Ya5/2qDqFcufLO85/yjsFE+5amuHrRRxqlpHypXTNd1qqZsvNWRrflXnNPvS9UazgP/BMx27iWuCMHXH7oKyc8pVaLfBggr230glafdBFJsX7P7aN1yVp35tKhV8/84Ae5cOGV55emIZCx5xLqVr4SdimL/dXLF95zp+muzv34zO15+nH02Em/fXbHKbYuHVTN2uniUpDUTur1XbZAzr37nmbidp5euMM46AVXp5lw6sNAzMGWD7xIL6dVIS52fCg0TgicoZGv3m/msddfe1O4NscoZseCRV3Dd7nMevAM2umipkWnTNzKiIt20804kBjNkQq/JkQxFphwl+aTN37jQdrXBTP9HfTLhWt+UXf42Y8fOeEe9JHXXn3D4adDN70O616Qle5OSF5XyP2H6VUm08dA8C2v2k3ZpIgzfWYhKWGWz/BGzT1hNh3vLrnbn7t5PpySud40Oqc8TaaOqWXkz7xGsO0hmoYmPENyZkMVUNpKQWZPaO49x0/iDUGaYaY8OrGNmkw6SjP/zqSuCC5FHP5cu0Zn1qf8m3UniVm9jgWfwBGJZtbcbcSdQKYSYIFy/6ivkOuqPkU4zQvHty8lqg8Z8HPm1eCpb7J6rXkBgqebfhCKeauH1dupRS6LWsQ6Ftkxu2lLnHFnQoofjxiWoqilsuBuHPMusdmqimRrOsoRZc2Md6vij/GPASgVZGlxiOkArqZzQ5OnqtYnEwkiHEBoFgGjCEzt/Fjk6MMPn6aGlOXJj+W8vuj6VT89UX0t63saGN0HnD710E13ohY/qqQ56evX0tdtBnq+47tAnj+f92TQ416XwvxhS3iFnUs0jMM2h10Uh+KnviYFCG3tNqqmRqQwI/pB2KuvviwOuixZPfbylYu2ZqvZ6yRUx5rnB8jvvHuGBttfhv3TTz9J5PjJUxwD5kdBUlktlco7n8iT2Y7QZ01WrinoyTCeJmi7pL5Hj/oY2Le/9/3WQsqWUvMxfq8lffzJJ370R3/UI43vfO97gsxEBkUucvQcdaBTl9ccmX+I09n4ywIj8LpXpqf/XJ7BkgtEkCVuLryanTT3CkyLpIM/WcDmEt41mE593JtA5nl6AuDG4vDBI/Nk3Q/FauXatYcg4pyG3HfTO4vdP8dQZur4a4F25ZPDBXO62s1NlixLxzlXJZfOnbVL5wPEvnIyz5VyApm+A1bTBM1vp9y3zoWdVUaw33zpRZt+It/W7ETDXNcSq523upJvlT1jnAvRjCMvG/HVNPeCwpZfqfk9Gg89+jJs7VDnnfZ5AYQj+ZWlmR61xgAXn9KLkMWvVLaI0vSBaejSiStF1GEsUQCiV0/H8XYg9/HpFXiAayNdk4C2lCWrCFBoQJltEImrqd+EWFYdsjCg9GBHMxyyoJYDbvftBEBIaedbTsleu+ri1pNLB+h14LhL9UeA4U8S3/zZiG/nkzq80cddtpupoWVtIXtMLz176H+KWZ6k+Xu6ZpqkFCngMMAglRG4Xf8RVQqlLQFpA1dQ2mzTsnWcKNJUxdHplwI9aZ52ZyRbkg1m/3LPJGVnhQMyeHphbqDSo+kMotP4Nzf+yzpDBoDUpkrxTotNFRnVqqAIRRZ0iKJAaG1d4BDeohdIyYJGwHVlfNgCB+oVQTy8RYEDLCt6ZadhK5eVr84j0iBFYdGoxqNoidQfyhEBTmpbSoRFDIuy9JcZA4XmJiPHaJFiBnVVqWg4quTxL9NkKTcv90dXZnZj6fU3Xp2fgrJuaTHg3TPmmbl4OnaqTSwbYn/q1EOPP/4EWbtNWau8P8Lr1i/7NGPeTMoHo5Q5N/Hupby7b+PnB8b2SRTNwROzIsd0VC5Zn+BEVESK6JrUKycQtNXUzp30By6iTx0/59IVAxGLhDq6ND9z5vxDp1Jr3yCnWZX5YPcpz9veOYft9KOPMfqgD514mYdbmO1FjOdHDR0PSUllGyUhspkpVfcGHN1nDVkxe/rAsdJvfvObSxX9mDkPKKFKjXJpP1Bi6YqaVVKjkDpAvkQN3b7BIxQOkH1n/7niNE+cDBV3lunnu0C5LMf0BEijTdxZjWNH7btku0ypBW6OnGcvIdsnecOTH6t5/uen1peOPHDx2InLRw+/58f+9hutWgLuHotpF6pi7gUbjp4DXaInqnIDm+PWOdSjXtxQKcB5bBA+8AcSJWl6n7NUu/xqSucBiA0FnkaGnxYDTWCtcoN14Z13KCl/ajSXRK179Vec6TT3zi+0+AB2eTjJYtbmS74FGkCxV4gnMI3Ih0iJ9sw/cPrZbQClrhGkiHywRDn77ppJH7MxYInqZ64MDVpyvNh9/7xCsEFg3cCxhBk1HlwZMvmFXa3vTXemkt0iY8Xo3qXswclxDqBLNdUehv/82bZr7erl49XmHRMNt1SI63DZUFKF7SxZDRvi1Au+gGzFyWqbZlfagbcaklp9jmz0z1uQZXUvDIj1gayZPs0+gNhSHLCMT63AN0qmN2jd9jzWQcfhjLdDdv8h+gEGiNL6Q4QDQBYOmJLlDAqL9bB42cqwPGz1MUTL1EVRS2Vbl5YiNruL7MGXCLrKglK0F8quflnAmfpMc+M5ocpMWoamBOt2FSo15EDVkoULfmuN2cRtRBn/hsqSFTpxQ6TEkyqjywIjMS8byThp8LCDRbOGV2MTzNdx50LbRgrZxx970rNAWzSul/PeI9ugN953M+QRxZmzb1ljrl67PFtA7xuTKqdqhqjoWagY7VyGqB1Vp8Fk12wAZ02RFM4fs0CKTqZBafiJn/gJR6q8FuPCBTV9w1vb/YqZQq7ygedqwei3vv0dzK7TwTwK2ewPP/RwPvfXzqOIoXAcPixQ9jMtmSJZUE3aPCsSTB/6MxNZruCf+exnTU9KSRGPlRlWvAX53dBAvS0ukgSlGNABegW1V3mkGKQKyy9Q0NJRCvQYWFrY/t+EikKRsQC4qhAftzJ6mg5vbNmIdzoiR8Cz1eFBRVrQRWQ8ydFNg25f2sI2YemU8yc3Si4OcsOEkMtcD7zFXmCdz7ePxiLZKPFMY+faqK0WVfMr2uWwbIk2sJytgbeZpHh4J+Bug9lqSC1U9OufUkVN+QkRusYcZ3sR5UARukaBU8tEOenHVnPWJ6uFm2bZ1iuTj7DPP/yYifu/CJ36CXqVt8/YgYXwTYexULGof7pI+va3vy101nbaompe78SQJbTOqAjO6brHuKSLxkOWPhJMb0j/KMTdO2EYQoIs/E6W/5I5DoM6VrzhTuCW0zMSmpNiWymkgCiscKpWC0GAfqCFNEy7CwqeKiGiGaRF4KBdQVqe6s8oHDdKrJWQjK18+uH2qtNuYU5B7MxSSnn8Fp0/OmuzxOuzLM1VXg8VFeE2HkWxNXdXspSgcBJP2eDAJVodXmlNqFd7LbrK4qzCli7mheBfzkCIlB9SkSW42MiWiFKirHpVFkV2AWb+mD+sRrYgnnvuOVOtAeNkUzmFjpNdqzqw6/bSMPV26X3El1ePHzstGBhocJ/G+Zdf+QE9eoR7sBntdFw3IT5y+qTuI+p54rA/5w9tUnmQZGJ3LfzcJz8t0uaHuX7N+nf27LvUmiOMTw67IDXXd1aiFsIKc7zCplH8iolyCGYUp0JsO/ppjuPsNmFQTBOOjUD2fXBR+uKLL+L0rF8K/PpSCVcdEVR9X6ty5Nm7oii0uKqydZRRPQp01RFhiAmIFIX84RiES0Guecx5zETjAIsHV/kN1oG8cwtz4ywCZW5gEVEABJ3FRZEFLW1KxMxfQSmitO3MvQjONiYEVNaVXZYJR7jiKiv8zJ2xYYpn5mHOoxv+MZ2T8PktzQxLgfYNrfHBoz22XFhqccnWtJh5uOotq33Eyhmt4ImBI4Iem/hNm18OZfqmWQRcltr0V0eU1WT0A1GlE1thqz8jTlSnU2WCIphnWyb5w6l+t461hQsU2d7EQyjhp5jo1YKPAkGprWV9orcZ5o28HtglSgtO377qwkGweBwG0W6LiON2wkzR3D/pJ2ypF7WgFbQza3PCVReKXt2LmDNvv20K41WvlGlWTbGue9sY+DToFW5wXO2IGwte63B77SH/xwKNE427GNvrZoeQSiYZuIvpvwyBM3sM1z0eipQiiHgVKWdFVi12NbRepVSKyNKjK+h5HeEQzIUO6baKjqsrAMRdzQsnorGpRQHVqcNRba0COiITNQRB0VFkF50Il+iZB0xym2woA0TaOaSLqIQ52WVaKYpUUfFV2dJphhQUkS2oWo1W1ZJlF6VAaovebgJSNOBvEbsQlIWUTj8ivEpqdFFqbpeIIuBCZAD84i/+oiYweIzMzEjbcSKGpnjxVCqSTAAmWhGG7Nj6MrwrXs4INv1uXD1FdxFi/9Cgstfu4Blx45ohd2AXLlzSaHSO26mU2+82QnbC5v0FNJtV9Brd46mnnjHdmzW46lbJYqPdGZJVBZrjxixUbcFsxmcXIDMgNosMn00Kvt7kohuPa+TPf/7z6m4+IN7br5/6mZ/miRXUd8d5KBTXfdgwkJDSBlgRGS6RolPtPL9ptKllrkU4sTGNv36aZdxduZQWEzdzeWv3hQviSQoz/a0CZqB/bZH0MXi8mBOkRXZTgoXFNhrSPUq3XMGbTd9Xx77dZqM2wRo0T/X4U5c45WpKdfI7injIj/zWhD/e6EmJmy03u0FyLZrtOOuaXpOfAeRtTG7QHNxz55H9/Hm+LRRuxeanpJmI0+7beT71rd0VYaVaHLGNiAEoBXk9/WwjCzJ/VBMzRDb+zJv09BDiiuYBW6R4675Ef3YHrO+1YyNWnBUizBGXUqV7ZLvAc7kBeHqaKZE58ZqQNcL6WZBWZsRpAPonrwCdcM3NBNMff+YZftLMHz94MOIyprYKc4k0FUlNRptSSljgJOAbwIWYLoSJuruBwN1ElK2jdxVWTfpD6jiQP/dRv3ju0vOfh5CQ97LsjnlwNxotly6PEtGBUloklBCglzSmcAwNMfYya/5pms3vw2U1A5HeUxklOQnSBYaWuUVrqiu0w+lzkLywZx6xtEPoGbKsFMHcLArxWHcibjuXlSgLinOAt/ACV1OT8V+KuGrK2y3XdmqYYVOGpmWofm4g0o9Yc3CU6qyJXRx/DE83VYuJzOaGbElVT63grPgqlUWsYBuFTiIYQBcAlM9+9rMu99y7OIZrWLq+pklVXDzY6LP8zC2lSz8hIn57cqFc73cnk7/ZlE3VaB6vct+GwQUgUG/6bXd8/et/aPKy/CBaPFwFWw5nFeGqf6RTG27mzzbaWlYB5jZ9PVcjI5nDq0bYZOfOgQ+inQlOKU7Wvff92Wef83PnUXzw+ec/44e83lur9JVXXvOoE1Ln+cbWW2+fTaeYd3vzjaq03gc+iZKLoVGSRDeYOBzwVAZdKWKu931E6vABT+yefPQxfj58+hG3Vp0E3bBaMnXd6mG0wARVXhu4lK+qoRsg2BQhFuAoHCi9SqYoClSnjmliMKXTGeb3ZNjwhEGt8vjH/dT+fLKvv1OZbh8t2V93+DzVsbizRW2iag/wlk1ICtLyNuhMctk6dGbU23H8WNTNmWKjXtD8wlQ3zguW/IY1ZwgJcckI1bssCnUmngy0SCujjIHNeGyWuFYGCZcXPuU9prmrdjtskoCbFtzdOOPjKkrAve5L/F1bOOTCIiX6m14HGFK0nFEkq2mYtpIBiwoNuW2yB+uycoZk+rphNb2FCOeliibp3VHmHF2IM4gauv2cY/xEtI1hrPHHw0ZR0m/xsJVYJugBatObrE/z9WFSjdIm/rNWqcLtS9048cNAOk3HWbhZWkJjfuU2yGJI6X9xmNfUiwVnGtxdhHcNHKStotQgqNetiLRIiRgAVUDHIqUHCDqAt1RaZjyiX4CXbrqsQikRgm02uMsTLaTtzXdtY0XeZKNIR0RZl0i0sVsNUtAqQHLOfrtcLX/4wFB56pu0pZBRsEkWG/2sA2zEWYzy7WLQulQWESxOOCWASKEWl0gRRUzARzqhY0VICZZfirh8U/1S9ujc2JiRU81EICZlk/iv/uqvuuQ3Jv/Nv/k3OFHoZ5Et86PdueJSWYJqIWUIMwSzSaFTBjK2uTijPm7nV6n5BIlTc26Pcozw3XM55G1GcGlJCZ/N3TZGzCBmcwpJNYUUeAIoJMUcV+HU8gdiZkGsM/hRTJ0ojbZsSj/ILgpBRoWUrLozZ98PxWFpBzRUHDM2arlkKqGJJxxKmopGJxOeyRXBDxre4cmShoKniDmIleef/aQNwO9877uINLvXdLWlvirFkzYx5aIB8PATUoXSIaehd9sXEWVTNE+nZBdzxSmnLY96bjNHqyHHw8piAFWFmOKB0nnoNsoq4zBEFuz95lY/wPAhj1j35ZFUwQ0phTOmNEtey+6FFlapeW+PdS48GeV56QYTYmKZlgez/t2eYVDYlWITYfWFoKgIb1GWb23EcTL7rkQEUOPj4RhBKTPoSr2AUhdQRInG1YsQdRsMKOy56sRJBB2DzWGlFo9sVAryilXvgdLDt/cf4xBZf9s99G2G4ChwOmW1tXY3NXGYiX//7/+9OyoHNswClmtELc4uZ1pBKUG1bsWzTM46KhoQzMJT5hiF4ZbeAdN576DclanQkt32tL18usde0p8o37oRhUhrd1lflER8wGhbPBUJeS4P5+9mplg8YiGUuayaEUgkWQ2wfcYDR5Sij4VNUg1lhrfxxBrIAgiiPkF8aVhKNCJcY/fmSXtbnCxRsiUW1ykpocqLACiBSHUdTpSOkpGx3eKrUaUeJ8MhrIDyE+GY7DiYepVf2q6MjlPa3omBVzpZp0viAEOlqkcWGxF9FKX9lXg11Ac9kodlw7npoNvVlIaqQsePU0pbRRQBstVZtUQWKAL4O4ow0GOmJuWkHFV/7a/9NTtUp08//D/+j//Tv/23/xZuq83cahJ3AWiqFWTi9arapLE6gaLBSzp8gpZFHcWZ9jE991j7LYcnfKtK7f1+ystf/ELL76weeuik44eeNEzPctz5ogNjP3jpFYIaS4g0LoDIAjV1mjrpbK1g40DszlFAXaIBkbXSuEc8+9aZ8DyQm55xWEM7w5aJ49VXXyP3C7/w8xYMp7Bc9zz/qc989zsvnn70oXfeOf/v//1XvGPCa0gZ8vJ4P4h+6Qcvmxp0/ii/dOWRRx922tgPY155+TVNz4pJxwW4UHBPlHjCCqOKEEVbyiu18Bsax+iU2pOU5Rs6teNh4omZXVlId9sowZ+6jEIpE81WSlqGzbHYbWS4oWRHfNOFULD432a4Xs9o8gnmZhnQk8OyYzHTol/7+PjWvHI2M8FUiizTjsy4fZ1DiL7rkaMV6mVJuuW+y1qF1xMsY8q912yUQHOXNSMsk1JW2Zk3fO5uureA4C+gGFyt6dQoURIBcObts9rFDgv3vOuew/BGG/7kE08/evpxwdQ6frGg81xzY5t+/dB/89/8Nx2wHT69dulDSCacgqGcXY9eEwVxmOmlPsh6RpfhvZ060OkBdRii03aO4mr9N4L0Sc6wa1uSReClF0T0YDrbCmThqiCtNgj9UoAHg24DwaPn6GWyEGM5MwKZsq50aVmUPzGydeOHVbD83iNQl5Qub4s0oPAlWPrd6VK4OFEWLjoVSd/aDh5IbmC38WGDQPVoocquFAIoWaoaW5wAsQwQMD04M5G5QMeV6mGduXrzrtWVSpWCukS5LkKPIo1aUCrr+kgWghPDCGU8uBNuz0CkrT7g4QNm2VanUsWrpHirA6eQfiklEHRAfKqy6T9L+d1qW7Q0y8LpoRlCLQSRV9WcEA1UVYm1WIpKVVAWKFr86K2d1G3Er/zKr3iCgt9qJEp/9+/+XczeX2cGNwwcUjILGwyf+tSnDDOyhjF/FFFLJ0qVx0yexsdDFmpQNfAo8HpAP+Wh3yGbucy94cb4nbffffDYEeXLt2FOli0umTT7kgUUhljwlkIR5ioHAESLQzq5NPIYVA3x2MnjHjq0vmS5Hdxbkz2j8uaj+SWZmrrE9tOWRx85/VM/9Wf+8Gu/7/LdYRPiVnF6zCnUer715S9/2TaQvnrsxFEVocRxCacH6aG8Id2YmM4jDkARgKiLBx/YwjNzMXrr24Ye9s3gqjaU1TnLWZHiixID2yaeJoiSUuhRIltPdJrhTdKwK1OEodBSOMEtLX+H3wafKTKHLkhQ7GfxSf3YNYC6Qfx0l7iNUj8+yQJGVT7yioczpN1/4HadbiGLJrPGFMUlu7Y+bQTqQ/TcGYRoGaKGaCfBWWJ9rquC7MdQRz55RN8+9cjD+rl7GkSriJM1li7dBphMiNOjuZWKNrXVBpFdl/X1R1ErrzuiAObSoAOy+l6nKc5ocZqtIrYlGFVqHLnya+exIUk/hYXo2sa5+P3SVl8pQdb1fAijxkLmi3tD2udumCdvd5Onge9FVtt7kj8ykcd7ZBZlIWVQQ5QSm95TsGxNMdzmvNPjXhfsaiCyspVqN0JUpDk1UttJE2MIPpO6HtCGN3o7JWmAJ5562rlhXc0eka5WJRioahchgkgPYIjIblEVovj5ujTbtPnVlmcQeTeEIeW6sM2/+pxsoRTKZSmv6SJMR9uOXTyydUlRnUFB83+ZpQv0YzyNQ2VXdvEsZPRESSO2awuOvsxVBHF50viggJoQWxd3UgsSBzpru5fyG6C///f/viArRTcX43n3HS9iv3zq5EOPPfp4KuPllw9kWybXyYl3al2jGcfe+e0boAmVRrldBKeWTs6YIzxS9oZc4/bhRx9yd9WeS8lSBcml4wCt+Td9g7gDGjUn5YaeYKJpb6EZgq4P4BThZjnpcmiYkQ85pYZitnFz87Wvff3nfu7nvECDMzx0HvIb3/iGLSsREA1TDJ1WMiY+/omP/eDlJ986c1YlCzwxHc3mZ5rAs72EJ+cXunRpFD3Hyq1QNGwm5QdkTNdzCG94pbJqIZtaT2kRVmRNRvhRpAtpVgpoKB1OxAvCpFOyEbFcNCv1qLYWRyRqfapIWhHpUlVkqdoweB7lfere+5C2V8Fc9+gHIJ9Qpj9Yms4bwL1aJq9gcq8l4nN/pdTv7iYq2Re0LqVlLVrOK+YUBj90HrXNiKa5IEpRPRGQWkXQmbAYANkpLB26WUWoeuj0464PNKUbPadhnn/+03/1r/6VRx865YMAtuCsIvq8TqL/a+UuV7SBreX8VZ3oF44B0Uttt2tV7E3P1NM0Iletf3VPEdwqIqWfkKsiu8q6DaNKUaoT3uxKIYpCvQtKjw/TyrylRpobxoceSjdqwR2C2w5xB/FPlPnomjZ9ca81imbaWvUJw/bCZC/zD5FvrROVcbFqtxfLt0NpPLG70dfKyOLetuigaeOFRON0uN7uanDM7p+0NzDXaGPXI26cNYCjrppAP0BXylD1dJIdidwzIe7WSbZAFlRKmoE02uAQ2f4Spcz8KoOiOBk3I4sI37UiW50VbCdGqQip9PoZSPa0yiktVMQ0JFt+lBpCYUt2j62yIVZkWRcWVvCjlLhSw6/mlggTlFv1XWl2HjdyjKXf/M3fNE3bofoH/+AfYHZjYTkxrowu5+JcrZsUPve5z1WzsNO8wgKpt42A6cnbBISWHvTxK6j/WXRRadvtq1/9ihsmmzM8v3ThsqMx7T/0E8AJIrrtP7G7pStqW4RoftMI+tpc2pJoD4HwEM5V80IdI8XVBhACTpw45tn5H/zBH7gSsq2np6mm1E2kUFh3fDzPGubeS4hEzEuNcn/pxeTvvEMtE+LjXjqfAJ635jDBPY4V2vqlcElLLU9aCz7ovYgQIqsUMwYAUZRTd4PfL8WDE1ASMb/lHSg/eptjk3WDtIXELrP7VlBmYERivdmlrfodP88dUxaaDOo4uKl1Gj1Ll2VMaf4lIO6k8xMA77D1tate4GQdA9twacV4Tl12CY2LPgwbntu9S3y4VCIeoL16gyKM6AIuxVa3nbo5+uijerjG9THQPpjUA/9P/8df/u53nfL5tgnHiqVdSKlI9Tc2NLSyshPTTBY4WkoEormJdP5p2iauJxh6U2Wh0lvsTxhTbq04jKHtzgQ2UhPG+FDPF9LsnrSl9bZ4lcw2/un7313tUfOfK/vhlfmP9ELNd/Vv8MZR2QAT6IX0M916u1ogKhVKoFVAWNvvtbdGhyO6xN0+e9Ba+g0wa2hgSxTEdW7bnsEDBx/U3voine0WdQNDSgda61qHl6HIxs87B7yi8WsG9nwQkjZEDpfOEJwe2VRjOyOgmHZlS68JLtSiFAUoBSUWKX2lipiQrml0VafTHCnMGIrU4qKUKK1jS+0uwqvlJz2VRTSATeXsmnCF3UCyVv3jf/yP3Vch8sq1AmalzviaFNxLYbPtbvavEnYBh6Wl1Plk58Raxzijyy42v3/S7P/uy7/77e/80dVLV3xLzdpw8qE85pkdxLwOugopjaZsGU08vaKv+QRrR+e8HJFmkJrOE/yK8Eczoe+uGZGeSW0mzT6jzuLnWbolHFw4/57brC984YsvvfQD72d1q/byy684q/jUU0/2+PLE5NH33sutp1MRvHXVrp3j9zT3+JKkwa8zq40QgfpKHbrDRiFc96YQTgkERVEVSvcsV9XZFBsEVKRSXa7gQNGkwatQfiQmsF2uioZlFYXFyhLrI3g7za0UUm+o/MkBQHxELTbYRsoYz7/5ZkNOss+mobVoSeGbJs2KF7Nz2cHXIIwCQRMKSL2FQ2Qh2lTE9Eazv9RJGndjnHX8ki5AKKvwPGa2TmlTHVifN6ELMgZSmttdl21eCGL7tiJWBBOkR031XfgwCjazwxAx9NqoM9JIbCYBgmYwtnrBZJUyxOh3fcZtteruYqsTdz8qzPzZsLBFWlc3YVq8BeQjPrvqALuXB/crmerfS+Cj0jZdc9P7NuFote6yjdyq7hpBXNniUm3WXqAHuRJYPBuy/HbfFqVt3MZLAxOdS10BxVhgwoRoqrITJcpwPUa2Q7eTYH2Dm3a8ZpS4boFBDyBuwHfMw2u0CLzA0OpAigooi44iK2Wg2yOQDd8UyRoVtC1PKouHJ4hKpQBdSpsUf9Vi27W1NNe9eoKhPBVUVAr9ZasSRNmFV1WJfBDkSqFTWyXlUUQzwKA6u0ZF0i0COk7H1m36ffWrX9XdXYHa/tIiLkKNZCLwd86+6xGOyFNlvBn5xt6yBaGkHsaH3OlkoqrypjN9+DXuI3jdM7/99tk3Lr566OCRc+9cOHL0wZkz4/KqNTyC03noB7IL1DrcA0RSw0wl2nL66gzgoW0v3reN4lm/kAiLU9YETR09XOYWyszl9X1etsqW2ynTzYs/ePXxxx8xxbjT8s4KfdWXOJS6ovIdZC+8YUIWZMYe4FH9qauySkthFCLLedETf46g6Nj0ILa0zK1a01G1ISy1EKSVQgg2hexZrtABfoaKDaEjfehZK+7Qxs/yS6NwYCG6eekos5mXvyjbMCT2eUtr1jX/0gq3IUuapqp+RcYO2bwbEa4O/hz2xYa5h25YdN2GqNFDNPa7XEkF1nhRIwioIfxc8nM/3fjRRx/Tgj/xk19U5PUlfPvbf/tv/87v/BYGUk3pNNHLVpyh4koBtRQKn1LTiiYDvDIQaggPEVOZMaLIRZ5Ukc7TXYpOVlEyGiiHF1DEYuwkqQNNlz+7xODbERcNxoJaHM61oysqPtz37ure6jZ1vsPwh5vfRmmvV/fLz+C/X+Gm2+0pFohdb4tLG6BVVKTM8MJSpVEFBWgbPGKFQamHiRD0ZqVaq6sLNnguok/mRTUjfdBJM6WAbHukZlZKIQoRAKGHISJHjh5xQkeTo+iHQ3ftnBN68AVEAFmdeIj63KbblU6lq/spykozFuTyrjMMq69Hy3ZsYwOYeAIox1ZkOVn+FuFc8cGJCDg1RhNt2vDH6k7XDNNorir6K9ss5l0EJwoexPEuk2aJtbKylGBQ1KapUVlEA8nVH4p16A//8A87Vn/pl35J6OzB4ne8QqthszF49Mgxy5jFrFY0WdUuo5DlOZ1tWQhAV5pJbZ+3GORE4p//hS+99OIrh4+ccHnsuw7m/Hwibt5/sfSMYF4PT1otoyGXPZs4tP9H+8ShiMpC1C0SkZvfwRSvD1sn65LAmFpIXbyo9ldU01zm+J+VW2V/9md/9qWXX7WuYPB8SzQsUcLCD7Vz0N9OqV0ds2GehOUFD5tTRY2DlDNNiVCSGngKNdf1zPXKDF0kpcRFZrVplZBo1SYItxXuKi9etlR/6uiOrwgKnVsl6eHBt/1+rMRJm+Hl39UG59vw3E5G2u2Tu5hN3+4dWG6v4q2f/fork+UqRaKcJ1QuqgTBP5N+eHsQ1EH4WOy9aSPG4PhLkLeA/4BSuBlAi0hlRQzesPOKq3Cpi6XlOUGNJbD2n4+fOqnD+w2cr3v99m//9ve//z0buaYdbaERidiygxCZOt6uMgrTiHTpABoO0AknzhlGWXFV1ykODlz96CGAlTq569WuCXXRSZNu++du6W0/drAVkNCmjpy3dhq5xux9n13dfcQg8ptBlZbYA3f5kcbBcxd9j9zebIfrXup2hl0K1Z/mhGG6naDUtxLLtkwXWVmemdfFwtKUbqhDjD1tpq1y0Wh+Nw/Sv3//8ZO5MbLeaEVQXGpbT1NpV1emQCtWP2SmJ+PB/l5wcZgOZ+ZN/C18bqpGd+54XDXoLe0rZFE2XXMT6njWxk5ld04GjpsppRxdt4PzAcimc4O5W6IQrFKcQBWKMD2MGfAdLVE6UBPRMyDbCFAO4k1aYcs9f1EUYW9A6CwnWYAlkjNc4aVARlPaECDWJbKyiqQUlhkSph36uBYitaqpXhXXy3/rt35L1kh2ekpqsjYL2LtwwyE1BoxJqZ1AI5B+w9IIR6RKKHY1j8345m07i26lbn2lTBvSpq1f/dX/8z/7Z//M7yKd+n/wyOzDZMbEGR0aRxWlIlmdQ6zKpFmSBlrKolyLJxC5BJYNC9ZtNMiE37aR22jz5vwQRxV8lgRZxVXwJ3/yJ61bsr/wC7/wL/7Xf3n+vEOMOSqGqDOffChfSRdHC9ULn/709158kf/XHLXwDeVp67ZBrLS9Ou6muUtMs/rp2Y3rNqtcJMkCPjChLTY+b6s2f3PZrhsRV3q/tHWv+Biamm5FaOAOOlvSDOjE+PZeQnIzBpvK4cK+0kUxE0SNr8hOJ50ApwpdruYuSKmZwSRusbKq5QlXKrhdTthRbtcOGFTxCoFDs9DxJE1rrZslXKqxAJrO41pK36MNpQxMw1s1FFkTVlRPf8Cp7lYZX2L7+h/84c/82Z/zaig/yBCG06cfm9rZhqU2Txl8RB4vZMUZQrnUAJGOKkM2UDZWmjW5WSe0IAqjxpQ3XjrG73Gd7laF8W07ujvVcBu//6kWBhZQytxUjVDuBqqEn/8H8y6YfHnFyLUf4BGd1AfgN81/p+T2smBMpkispwGiq0gomZimUJua4UdH02kshRzOVi++LadqANWLZ3V6O2POCBwlk4zQjExseuKYRiQeQTGwNowNZvJvul5iP4AtJsAoqt5jR497CuCi1jNw2yM28eGeM/vYRoaZKD3oa9SHLSsujKnJh3VmeRc4iHULkWbZ1kjzABdGml8DTwNl6WJ6qukKyATol5Le2ZV3KMwqaTDnogyPe2wp8HUdP6mqcq62p9Zn/kNaRT+354NORsQ9gQ7EqOyRB3PW2Rv3mfYuVaYhud9yxzb/UBKHGdCsQLmVjYqJktHg2u1BL8RznnA8Z7FsGECz0u5RQBhl2njAzzKeUZ9+iSJFgSzArFKIrSM6HrhU31VKvIZkIQU4eoeWODNEpMHhgBFueoWY/RE1ikc1flEkMv/u3/07W38OU9j3o4oezYff9YGK26HFQ8QhC0fjlMKlTKB3xapLvealvyYw6E9mD8w8x9PqME0z/GMfe/q/++/+r87d/e7v/u6L3/uOh1gAt71efuq43ihhwBOkShH9OkOKJsguo72kgDNqndDFklBqFE2QyWtu5vzVJZyJzh6hdCZFRG9ineuSA3YFsqVpljh46PDFS1d+4zd/+3/4m3+Lkw6e/PW//quf/vTz3fFzr+LI+te//nWBMh2oY75rfPjBz73wwmtHj7340g/4o/96buBSnj53jd7poxXsMNB+4+atI4ez422pw/PBwQ+s2V7/qy5t37adsMsGnykaEl9zUbj5GTjKArLwJV7OBkfapoGAiqSa03MgM0pEI0OvpTMpZvrL2jFdsjhfZhrVV/V9Rrjn8tVWvJ8GR3Yza/CEL/MOwZh0ziKrDwNGVd5hMUadU5ib3rjtWbH+nNqGf/YeunRpSJVyjkO7+6uJWdX6BpRUiBjVK5rFMz5oeV8b8SnjfQcPb17ySbF+StwE5gvUJ08c876Sf/G//C9PP/74v/pX/+rJJ546eeLUyy/7oYLfRR23SmksM4RL5Dx6uKUd0/FQpBwwfvIzz/mhJ4t6NX7pM089pWqmGssVxCNhP9UyUny6bCKZeHambwMZskWkqXcAj6exaQtIZh4lCdBccu08ZNFWeFgp6IQ8d1bIC9ytlDaojVzAr809wWi/I+E6oAW0oJReDXUCXQIbrtsOp2TTJ1UKvVooRN0qXEzVLJvSHVh0k3cUjODGH5zTTTdKyrqTak6cHfbSgnD4abW+glH9df1OOloFjkdqSIM1Ax7an0lZEdmqJ55GvZ4eo8hHU/1TBAeGSjmpaoeQ1XIz65EzEVvtdNwEixSKlCreFiiRJTvMaUUUaYRnilTEB6meTQTzTsw2KDq1MufPnYPoK9gAPbk0nOlbKbK0dhUBhaVEKnKc37QehEWma7RFTWlGxEBECmRxAjie0mWjdNQWkSKqV/WM6Ea8Xql7eaJrTEtRlHah0o6sazJKHMmzAehOwrOrEjlMXZ88j4L3FVmQEBWhGxvVVnN1AKV2EVHqG/3VQDMG0KLW3SDHjEjQ/v4Xv/hFN23vvHvWL2CE0M2W0e4lA/qVGLtvOXosizQgUh9aO2vVCteuS3Xs7nQPz/gbLt1DD+QMBlYsJ3ywILGi+j//8z8vi0cplvPnLx44kBuvVkoMeSmkz33y2VdefePSxctmPoJKdYgi6jubCnmbjoXKfo33A/oJts1zajvlYeeA+AA+qBeAF1l1idrpEqVLZVmpP1N4e1qgHJ3bTTHXn1K2OtNFK7j0jLZUwdQJ9xqT4ji3PGmLaNsMHVawKY1uje8PwABUR5oFK6oyVEEZimtTvDwVVFaxZdnb76fi6SejI0MY3lhVFsVEATAIndqhUBhbU+VWCr1Sf/7P/3mt6VV/GuvLX/5tu3PPffJTv/d7/+HBw0cYv/DeRXvdmPsDg4sXL1OivVxS9FyfIr8ppt9iaW/clgOKTgIsbmZFPnTP3DUfi64J68asWKmT/1Vw8/+2XUKLt2kjDrsNADORbL4Vrw9Y+NkCw5sJ0NAAbg+kqqN3AesoHKWciUiU3QUxM02+9W/TlRnZ4c31Xe6JQXwbGOe2GW7O3dXEesswjWfmLXEMKYoh/zTtDh0x9LnbyO3buMRnszgPIVKl0jUMShHoMqDrOiosDc+hTE9KGwjKZTWMZbx0RdjqKsrBfbenjxKJYAA4AeLqTEHmyo5jgEWUdt+ylV49cOI6AVVwajsUY3S7d4yCh/MocDOsFEPdVlTmXR/gSgHT1JrNYw7fwCrlldIUbfkhQ4nglv12W1ahImwt7SgSXVIoLcIGL48UKC3EpwEMKLWCAAG0Ya7syKUWFVdatgpKAaKqWXtsanv04uSuTXxPZTx59jDGBoJHNWJL0D2W8c8iKcOyQRZSRXYYRlkSWSk2URXt1q5EWcAZRstTD2UhpbAFIWX6oFzv8hTzqaefnF8f5wQ5zWfPvl02CwlZOjVoLVYzc4j01JYsqIkitbubbll2aDMSydU9FpmwhLvh++//+79hhTb1+FGwCUsouDrT4+V33nmP87w1O5i23GwJrB9pPfnkB2cPnUWpG57buhewY3D8xPELPm77/n5DS0OY4MTZ/PLSD15il1crkvVMlRsoFVTUtDWVyoJGXimgofzNwgFVq5/IKmrc0NVlcdZiGRq3ykqLYJi4xUSRmmt2lSLSD4qUsxQpTyxXdbXEMkjrTwXheKTlmTfob9q3Skovrj/oou0klYIDtjCg8LB0VrSd1tTb1d0K5OpNf9bQXj5jQjMEpE5DfOr551w5WaUoMSNpCJdTjt4YIALup1MWObdNho9bKGp1jNwizy+6UIDW78svXKXP2qQ2u5D6zk3JEKf6qzjRmPXBUt3wSpWqRVt8apQprnd41ike6lH6If/RN/P2xPCPv7uq9mW+wZKNHwP9I4qTG9daMCn+3D4PM1UFJSiLWHZFKPg1DIqsFA5B5HSlpmNvlhN09SmP0LduZVNVnKQKijoYfKm1YWo4yowHQw3F4Vk5qIV4I3L5iQOldUwfQodja2eCo3S5ohZnsnNti2E5gF4R2hDh2MpJQ1VRjq4XytJAmyJZdFI1JEXHVs5qKHON1pCJkhKv1CaOczmGSEN5EIFswOXFACX4EaRwgIxfrPiAaJozunQvPC0qT8WjaqAiUCKtXdmicWdtgy/BIk0bgXKqI22lQzx0MasaWn4Uadx+4QtfgBhanlTxUH0RRcx4M41Wg9ELqR7ETtkNKbV01k8MBOsSCgYUDA2ILFxpAZGgFE/FhQWuFJv4nzz5jKXLYYdvfeub7rF4yCVf+BXJpRbz4Ol7C1p3Sj4cJvz82XK1AWe54k/BLGZ39G/9rb9pUWHdhKXuECnPzRQuos+du+T5uedYWhbd54zdI377O991Af6Vr3zV1bqvz3PJD6VZ6uaVT9pym4gNWBTxvzbvmBcQUQKsq84KjqwiItICniKII7HJImJGBGVoRaqzxKbo/OEYtkWpIaYVwYsUlwKUKiy+GOiZ8k378lwRgKBLAR4QLE/FYrqwOJXwJAwjXhPNim39kSW1RiVZXU5pO55qLs0VxICygBIz2Ne+9jWXRyZ3pRqxOi1OThgZDrqcvd9OMo0h5aRYkVIl6zpPFlgkHMOhh8LcUb38so46i19uQ82PUgHwobblwx3Ine7dUbSToQSkpaaD8G3m56xPxqNJ2zYgImfA7gAhQs19765qguodW23jLhVpvEaw62o5eVK6okK8cw82oaZs9CWTzjg/RVl6IBHPXll6UvulBuCoCmjaYdgczIvWAY2kCJu6aQOVVESDWQyFIGgnliryZYC23IQpp6f0GGRp2eAMLVw94R1IkFYKQ/sZVSisk6IQmxNfitrn6jypmJ1R10ox2jrWk2qA0yCtOFl0Wbbw1yvi6CigRbKtPsoqghSX6nz0dLeiUhnTU5FqoLmgFKfNwJHeWAlloDwEW01OouCE0FNVu5xVglK1mOElFm8QKCQrxQYUrXQIt6+msbW0SkysbgUMJzdYrhM7VzLhYJsWFyiOOVIB10NcP6IYk5jZ0knw+9ytUqcJUGgGTNTJBpwhPigl2yI8rVF9q6slYsCGue6tFN2qQCE3fvqnf/ZLX/rFM2fe+o3f+I1f/1f/Kr+nacg90NjUbnOHTTMNbIE6tizeD6FgtyhT6TwRVEfKL1+68nu/93u9GX355R9YtPjzrW99ywThMtxgEU8vWX355Teste5W3YEJ4F/5K3/lzDtnHRh5/vnnyKJcvXLt2HG/UJ7fgL///unHTn/qU58217z51utzgOVdP4Ov2+14dZ4DZOE8lILWFyJozaKInhSgtFGkYBHLL1t622Xxl7j4K7Us1oq0ILyLAU/jDEGXgtZipaVLgcACyFwSyKrc5soSf4u4AUc3FQx7mCBOooBRs0nKzyJOPNzTZJ0KFIV7uxIUWbJKv/Od77hWMO+5tbITboF54vH8KkObuoF+5PTD2k5Xd/HBss5LdtzwyvaLb7/9plsugEKzaPDBSHFYVI/1rj/6QcNLMEHZ7P4N+sMl4tD1v+1CKB8+nzONUlt/s0TpQQG1dmflpZeHHzgszZK2na6Fheztja891lWgHIuOAphblC2Sb70omn6QdBhXcruPVmELyEKaQhS19zhXA1e3DjNFEFXS+9GnPll1MUuBSjaUxTFjI+WqFkWWqmWLlM97oixzu0VKFQEK61sie/j24EHUfi3Fhp8e2aUNxakKaTkpB7IAgogZDimuo6C0iJKlZ49OIi3FKQ5SDDyBAKWcoXDxQNBRpOboWJ95oTyoy41ar13MsvmGz1QfsXWsKn2rzO3cVFErvHVjWVxurKotBuLwci73IICqCja7m9ZQqjDLdpVIbVUZnIiGK8QeoGFpnsXvpur73/8+W+5pFJmIbZKIm2vGNm7NucPQQyxXzdJJVspzYMxTXq+ktEnrWHGcjZtU0eppeBSB4d9vHrGJq5dSyAeLAdz9n65p3+fA4c0FEE/MVmTZ2Q0mD7TdMl0H9qQrpKVvsypjBsxMxDdXUapvyXE/+vu//x+0psXeCQulJjsXtmQFxxnCN998B249QxdSwaHw2U9+4tHHTv+7L3/FzyJUwYWZ1fnYI8cEXPx9kcTlvInZl5jfu3ixbsRn/2t0/3RIAVEwPXOTbvmarSG2QEsgiIUhhy5bOm8LpcAh6rX4UXCiSDes2yuPpaT8y9yysnVt0+KyuoR0KpRuDNrKpj4rQSktxSkrShrU6CsCBxrXs8nKYgZ0NssTCIrGggM4CgZeNS2CzjTcSoNh1OZiCK7n+6mfDu/nhnrUl7/8ZV1Okf7/+7//VbWmnCpXJ1pWoyvVyjykyhKle7j+i1p71DOl1BO2MiSsaiarzYUlNXfCttUWVevB55zkLHXbWdc6BHgltVwVjA7ZPFmYtmojSjUcHyBVm5P7y8AuovK8BCWKEYALqVQ3KN1QGuRgzjWme6XTtEi0iYisbM1jKKXhrvKq5RZPLA7urpjgogogKhVidRMsROsWIAimtnkDEOWKRvx2xUSBuCIa1GWGjLo4dJXeUA35TZJnUwfjoTZziqr/1ENN8NDgRAcN8NYLJcSpSCmUpyHnSiqcvo2zncgah6aNAxwPHECapYdCFKpKYWJ83oROFs8I3V4ay1knMei1KLK7Oknl/NYsV7WrFNR/JvxIBF7AMJAocTNByCH/+IY+ePcq1TdupK7zSdb6v5GeP7USuwM836WUHwWdnhYVqS2UaoNgbvVRGiIItvZbj0yMMY9YuhlobcBskxBD9+s1DcQ1IykbcUopwYaoGzRoNSca9aHewgEppXvaom5IaSsPK9iaQgA6/fH8oBUrnxgWbOuW+cty9cILn7NauLmZB87GTs6DeOe3UzsjfbsdZflQN1q0J20nWsTl84bygXc0u0DOL5+459zHn/25n/OUwpTkXLvtQau+yctYE0BTmFHjCAYn3aTaDLTqv/XWG1QRtyX4zMey6jsA6eystyz++I//uAr+3u/97uuvv0nwwDEvYzzn1Bf+9Llp/biu9XXsdgb+bVs8lfKvfWP6f3zeli56KIs4/aHRGE21E1vaSNoWGTUtTz9J8XbiS4tsQQNt2mi7BJLBEDcGqgIK0dypy3bwtg8k7/DbjNwWcUOnEuqmXavaf6Q5UGjIqPI028aMP4THLkNyuwpnMMaB28yDtRI4tYjW1I4mSXdRGae+Nvn+zT/61jetPW6zPAB69dWX3XupmsdajOv/eoJ2JNsFFcUYkaVQTJz1pNRCIXpMqwuAaHffxObunc6Mb0Nbfg6Cnrhh1k6se68pheZz4NQfvPO5OyqUtoX5FqJI2joSF55qRtwsPHd6kByOMlWADKj53VKDWjZ3se6yZlofuUxn6O1GDHMXnXgbr6sOBoDOjVlgH1QlAxuzLDpmNdES2CBGhZpwA721FW502loR+pVWp1S2sYbUB3ujOMtTEapwdnWks0bxIwI8dbhsKBio6tzUgPIWP7pSY74MiiAFdG5IKUGpEj4AsuVUhKcUbvCnnitt6MogbX0R6WGXSGtXtfTXmfrmXaqyvRTEqTo6Aw1uOyKb85XRL60e1z6hb/sHnXCw5uJml2nqVh1XkdL6XPEqZKK+bRybaGAAGBAhNDStKik9QCkeda8SFKoMRSuTbStvatBDDE7dg5/SirufgGCTEpc1qq0W6m4upg1xwVJeT0qvHqlSFhUxzUNWMMhWs5Zq1RRhWES4Pbn2ZAwcw0BQyoE5K5h7O+eglZoEonBzihpLehpVgtJM0ruh5elThWEe9FAejeU0R50XH68Q/OW//Jc5j2JBMkO5DJeKjPnLHRWvvvSlL5n1dGMbTRbUc+/lyZ8ljchf+kt/yTamzqP0L/7Fv2j3zwbjm2+e8fsOftKjoR548Cj9QMhUYPNP46p1s7v1UrWd5WFbhfk7wyQiYAVBO26ziYwuhGH6lQ4a5i6KjZjiERdGILaFZlUnI2GmRVEqgxSR3IiWcZOSZbGgBYlLk53lyl9ZoOJdoujBkzjMUKr+HNj2OlxQD7k3/5TyoSbgBIcl6hHDP6DIXymguTdJOpU5UJeWdWKCNcMBG6KbKjt75hW/7tDEXLp06Yap0kJlfepMZamzSjnbyaUsFLOJ0rVKdRxzF7ha5Iw7Rj/M4H392abtfJsuiLn0QWySOSydUGdlejBn/PI3b04IDpFqB9VvWxzJ97pu31ElBFugNovExHkzQ4lO7UGKN3aIzc7vzqx7iRo9ZGmB0mPQ+vUcYzyoEikiwTaGtHQiJeJErBLziEnH68mn7eIxZkWsQEw6dYBsKbKgGlA0AGZ1RkGnVr8phWYaEOlkueZkIeVR2jhoY/oLSvUJ+6cQQcBDvN6i40EBTFfzBvc25/bjbTfFiacAV0oJKE62RejlhAiaimhXdeEhoBwnJ8usg0H4gw2CoTpbKXog0rTdjCNjqxowT+e8QrlSP0FD3wVSsnxQ2voSgTPUsYdYK+g1VIdlCSqC4IQ0Sito5a8IXKipVUEMBA0bLqkFbaD+FxENFZSSxWAy1Rnw++UQuo017Ws0MkcbEWqBIhvClJtGyQKPuKxtimyI1dxyhgM0S2mgx0huPFG4yhl60DOwr3rla1ZNbSEm8XUuSLFhphBAWimIa7lqiE+bn5eF8zOf+exv/dbvCELcfsCbTW7ke+v5OFPutDA05RaEBuku0oyCPSe1toz7zAmeM9U3P7o4dvwoz90tqThtbqEU2cqz/Iinings/1M/9VOe57kC8GYEDLxFF162zIb2TskKXW9MBd8GrC89unvBSVsqu38/K/GqsBDV2S4krG+LU8+pQgQ3Iq0sllK2dB1ro9Iw2VEQouFGfpaZje5KYePVXD1kEGQsbC1jyDvmc+qBgpjfIjrA6nuKdvHY2sKqhG6WGg1sC/PXlxwnO08U55ICC1MHH8hyWP7aZQKiG0hrTil/sZUuvHBFU4ngOiceIq4k9DSgaUyADn/6DCc2EymKp1PG1PnzV2z06d32JtWOzitXL+lv586n7n7ujNIoZJzXlt0d3iIzhGdcSdZDH99fzi9SN0EbV1M9S0wrJcUORBzdvQcf/Nq140Vq79mAtcqqhctyLvEf3jhMxQUkXqGITbXBIJv5DtMy02IacSCqwKgo2S1O9uLIto6lwjG2pR88kgWjglL+4cEAOgXUMy6Wwkr14zx65LgqoCs1VMqDAVAVFTN9VGH16C5KUTA3xUOhFANPBEvRNEQS9W+NMOAnW0P6rixAB+hSWfqrhwP6h2ZWRCGkOmtrRLM89NxUZaNoQCkpGkBNSJttN8VVHmyAxZY2VVr6kkJHbATqFdO84oyi3QraDFxAfEE5dzUrIrs7/dU6TiA+0oqTYrRZPiDWE3Q4egWjbZZMdC7B1QIiVQQpQ5m1fptAk+GEY9N2BqEGavVlXUKqJqILRj1eliemSHNuRuC5c5YozrBIbf2REtefa45dPLWCQjkcz5p36GdIESn0VrPaqrnOwBFp4wCH6cGMAgfwgt/kNCtVSlseD87Ht7xd4p//839Gj9+I6qZtxwj/aQATbcogW7AU+fGy45HuogRExa1PwLk+l+SeP/29v/f33GZZrdVRHKQ/eOUld2BwW4gmFwFXU1fxv/u7v+eHnBbFy5eca09XNE/I+mT8xtp0hq3lP9W/NKejprMFFtLsnWlapI3SdDGPhtVeEO0l7ZUKHY2behVfnBtty0qdGW2r1VfhLlL9KFUl29LbmrfcKGDxb8l3/HUTp3UsS5pGz3cXpf/4wVwPysuqiAsqPFTZM3fAR410PhTpxok8T9yG8Q7198/MZYObXGFVlZHO2k8nQwnO7ckqw2eOo+eH5J2HdTl3JQY7t4Gs7iTIvJK6qDFh1zZvKdzQt5v/GZktjrEd6BgmIwpShklKHW7YjSNcK5ObQTv7nvO+ohmUubl5+KHTNCdAs6J0dqijJVKrCM/We8tbDKGDhWDGs2DR5zmKmlqoN9N3fWaQDlO6f5rePxQ63M256EoTTXUp1G38cizvtJhZZp711HisX30/dzZAKSv1mQk42XqFu0V1r3qaLllZpSW2Xs1W4a4IVRhKl4Jy4gGKmpXWq/KQKkMt1kRUTW9s07YIgawq4J+HdHhjDiB6tue5FamIiJt+Of/I9ikonTHt19DTRtqdOCLAo6iqtGazUTNQ9zqt40FDKWK84VdUHgp1P0UWLf3Ttrv+bUEq3bpinwqdxXpOnKCsyZQUYunwpZB+pfQgduSoLB+miyVG8MYEJ/Hpz5tol14T2DAs07ShqGk1NAsfF6Zji54OJlh6dV7MIsiue/Jaa7998e5dy4BRi06EnmWLkgL9W/Qj/KVnnLIZnDmFfsrdD/3Tf/pPeyUunu6u3FFRqvTv/J2/Y45zF8UNFh0RUEsX7++9d9WjKU1jjafKL0+PHX1PT+jPse21zMNaY0383YBOwGV2fa4fH8H3Yb1TagU5lvbA3ZRdBp4sZ8opEDsXFngb4RVn0VjE2/i2L0X3Ugg3fTXbdKtZGMO5hSpnd0u44+9yAMJim2DLcW8RVxOu0ii0Erjg0CFt9tr9s0Ore5PlSMeUcalZjRHKvRXAP9+SvEcYt/Y+/K95mY9dqsqZvm256mWKCiaX+42cUjtw4OTJ3Et1uTJJ87NLlGVprQUZGnPx3WE1zqfWiFIBgXSM51PrlIKWSQv4VE+nN24h25ozkUdKShW1LbULCjZZoenKmbh7+9YH3D0ZPB1AqStZY5OjHHNCxiwj7Iy7LXBRFh/W6kqKTJ2RdqJhFLSoHnZZpY6HUpxtmJZySaOu2mEw2SHG5NyNVZu0sqSWIIQ2sSYCMPABhUJg9JJqEBt3WZrVFU9l+QlkQTnRa6guKR2RWRq2Y6AUIi2th7JkQQUp2ZqLfg6gF2qxnChdd0Z0G8ypV5cZC42iGmI3MCfT6Ky2hUTVGKppIgB+9VreuFPAA6FDWrxurFL8y9zSrBSRlFLEZivOSVt5nqkIuAAaja7rnV4TfLMt5kIbAq4zmIu1LJyeegKnjSp0LYhehvpW04gLGnCydRUdpUqk6NJKlYc2+ktsqrQg2yIImBeVxQ5ckwFbcGo0tY+54dpEANtWzZ/kL1UbsezDZRVEsTR6HCV0bkPdTgmjOy0HVWwiCb7rcXOay77x2dPWfMXR3oRlzHtbnK3wpNzE52yL18/7RfZ/+A//gbguyVUizLFyX1+XP3s4fuhqNiBqEYS2Co7a25Xdo1x2j36sd4rsibOKtC7VtHBthnK3IeLRsP2nr2xcmICk6E74EArlehSL7VdxNJWNfGxsVRWxJhkUWg2/qwpXHhYqXRHRlKXU2RntazjMWb+cIaTNLEkEtBv7quS8l+NOFz80x9z4snFHVFBofj8/r80NAx90bAMtR9IPWV/4krsreXN+fBjAY7QO0RsuEtjK6nVFpFE719Ccr71DqqqgJusnJlkTNARTuyDtbLOXeZ5VH+kcwGO0VsMo3TzwoApx6N3TjDLimGfVTdXGqLsibjnYZhPfhaeZLhvK9afI2M3sDHhfvxUxAedYKU3b0rvVQQEonCHlRdMEq3PcS6whLuQhgB781Zb84LIAvQwNFEp9QCyCIS9hHliUVYofHkXb63FZjslGcAtTvplSy6CkNRIBpaqMDjeDo8vqENoLWz0vG06Uze3VdqQR5CsprafUN3tqWgrqSQNVi1JEFKXVT8HwbhwWyWalFcEAKX/MTZXJArjhVCvN4pySJKqgqA3KPUUGnnZB9MiEoJ80Sj070WktY6ZdjhmQOEVAre3R8wc/Q0stHINSLilqab2SOrqHogjeSx94A9gqVFBpgVpIRSrFB/pLbFqe8ivCTAniIPZf0i7pKfsP/vRP/7SnRFYR64cLxzbiUoKtcDdlW3Lfv7xqLXhHXDwF1iaqxeaFFz5jjcyh5yNHfu3Xfu21196mRTtY5b3vpswohktErrjN8gqxG15D12OEeH0kBf7cc8+bkl568eX0pQ9848rp/yN+Jqya6pYZHExD/An8j+x/SuBS1e/xjfPxfxprN90loi/xKrk7bee5m74Ei1RtFRZvp0LRc0Y8na16IMWlYm6ud4+uTV3AOTjjYs6qoA9reuD57uOPP/r66xiPmyi8ptImktli6qs3Gr+3x0j1/zDp9OSs3fOPsijhj8WHUR0G8EHXck8FMYRXigG/FUSKzVDNF3lsgHlou72OhxgXGZfzTl6jx1QnJhQqyg7JGmyoDNNFb+W3IUvzAIWccanleC5mMC5muIqdwLhJwlap8dz5i+3lxrRxbeEZtxI7bjFKc+eFVkkp5cMTQ9hEHFNxPPSMVLxFhFekiFIjv2vS1lb0K+XnRPz24a6yUVLf6AFLj9lQRUhhcPlZNln64Rmom43QnP1D9Ps2FEZpWCDbaRSl/tSHZukBpHYdKL6cly1bU/QiZWtAiqOXWTrI3jV+6YRwAH9TDtBT8fKgAHgB267bLRUcRKC0zIziFw0pHFFpA1WessERKwgXUiBrQSJLUEfH6Zf22ATWUuQHJe60DEvbg7ovToK1gqFKiJAFEJR2J5XiJ8puL6qsIsAuPZUthQYM6ACFnl2FisqGXjZZDOUp0rSccPo3nL6YPsu2LuFg8S//8i//o3/0j6zKjhIb5fRg29VTx3Z1fiS8bboUWqi+8IWf6MkUM5qp5MSJbLSKrW0UXdimw9TdAFTHXAOJsYYiQpVGMaJFXtbU8fGPPevl3B6Jif9eryaee4k/dL61bih2hTaU7QzOOZFFvG+UuLGYKRr5Pcy72T2qbhdt2zfOjMUiUb70496sMSnchY3bQ7qtc7K7RQjLgbLdL4p+EuHmSfNZrgApuHtfqS7kYsLlCMTvJSD/4l/8C4OohhDh+UyK+6HMzen2HwHmh0z5sNfOhiJX9Q39mfWC4QmMaCuooVfAo2vNPU+miA7SIsWNXStdK85PXlUtSomH9D+sMrsjs3o7j+/WRK+gzt0VkYT1QJ4wF9dMg2SbzzBXWrU8zlbnFlxZpn3zsx6dLHv6U6qVrKfu2/YfPZJnY3w1s0jtTbnN1OMswpGidHuflNK8gyEPVxK8PKDKGEsNfbT7/etE6GQZo9jm7633vVf9lkuN3rPnl/9O3PqTNy33wIvZIg/j+JZJY9N7GG0NqKJfrTs/chVgizMD2OLNEFcKIaK8pdG7FaG5gotZETYeKoJLFdUBWXj1SHkCELs2QArlb6kTStG/3a5R1AGgcTHztdrgTIBbc9CpTHVDER786eVz6VBO9GHbbARts3EeWzkxVEnXCZVKf5gQtb+1Icqmo4uqw3suEYwxdxuMsuVMmn0n86mfCrHiAYxFS+2cxyVuPCC6ksBs2jV06/zyhyfYxJ9d2priQcejKL4O3lTRquCU5GoJRUoWpUrIlg1R7WQBniKIBQNZ9LUkh1HWrGF9stXmjW34/8Jf+Av//J//cwwXL1zo5Q7O6vkTpwyRrT/1WUprb0bFyhpjQvFLavd2brD0I77zk8jUOlnjx36NM886yoGDXgtyw33VsaPXco27/9D5cxdOnnzomWc+bgFzMaHtVMoPGRuo5Xl9aLqIC/nwmt6hajcmu3h1bSO/NG+QRSfin+y2YbZyabLlXhsaBbLocL2k/NGww59pI/PZaE4RTZs+UP5WcIc2s9VOHhueO2o6lLbanYwbF/pHI9pj4JvniAZLrnguX7bxAPfbbY179eplRXYm/uE//IdEcILpBhy+Q9VHyCQqG+AzhV1+HjiUM35dooo4/WewgzG7EYNzu2OBlgZHFl0M+OamCz1B3A7DlqZrza6YB2KKCdh+wWpicrIwW3Zu3ebWxxJMm/+jyJ1GMpnSM12yjWhsuE6EoDDDnFlg4AO3hHnXflYEm32zCOTRs/9ZFbND1jzapL6z2ZqMhk13McuoMHrHedt16rYzJ9KU0eRBtlaY5eTwoSO+bOBgjE8WuOHz9np+7XvAvJtqZ/pI1DjMSTqFiedb5zczODrQAK2gVDMkitODpPgRAYTCKilFOoybaU4pQMQjLV6GaqtULZaNY1W7DCmtCalSbKoAZLt21v+yKa3+DLPq3RnhSydZP1XPpwBMP9kufeDmB7llAYoogdQfZbWLAm9RSmdVn/ptKqiluFeKtBUkC/ADRBqklBRv1kxq/92vkQxCNbJKWbTwO1dNicc8NFsYjECIuwQ3B1oHJ812RRDtdPWeDIUPNQ1RVJ9RapFa+GSTKq0Il4RUQxOJr1P9YdssV1WFgpMUQCGro+rD+mH68zQ0otBOS7EVTnS9MUeB5wrD+8tdDqsFi7/yK3/113/9/2dLnFTkCwnvHXPftuCP/5tnkBnXXWJdencE7fO70LfPnjGyL1+98vSTTz32xOPqeOLUkauXr9qDcZ3pNpVNXwvR9k6DHt4ndJlfxPnCe++prAs8ldUQ6XyHLsGfe+5Zh9Dsav7xbv2pcEyjcC/K+PqhUUoEsJVHuqQa55aOV23TFf+FKNzFh/d2IjLTTTZdpbjiNjekTbAEan9l9yDES1nIyu6hqIjIGylS65PmAG6qLl+59tTTT5jOta+P1GsvW4Xuhmd8L6/00feduth+3eOH7mN+mvWAdWWzlWIoGSscUNkTx08ZNUaf6Rpil7h4ozrrS1aQ8hPh7Ypqh+TMxJl21pxAbaNnEDWehw76qYfnBG5rDvt9ljuA/NzXcnXlfc9y3NOYxPLIiuCsok4Y3/AKwNmppGpz7Wzv0fvgOYrS6cOJCoEWLD9AoH9cuZX7J9+Ryt6l4FMYnfDpOam7//1+gBLOeURXd/nqpK9lRpa8+UaVEKlI1G/mtfbo6VS5POTKretXLlteb16zgia4SfPT49wImic9Xsx+6ezavX89SynLdN64lfczzWbn5qszSl00qBc6B7S6SsHja64R/I2gWfTW9fiTn7uZv51dnmgY9FpFEPKQfV7+RBlxSviPfvP9D7ykReNpYHqEDk2WxTYnIhPFhQUbHqbxKGr8aZvq+xuobxAMfD5sBcp8muVNnPk8Fwi+gTo/YNQKdo5zi5tfYFy/dZ0JFQH8BEzQA1AYReE5H2QVsaW6KHCl0rEVx4g0OG0pI6rEUZbmo4oIPWrn3sjC07eqN8ge73/lK1+xJim1blmNIIAUE1S5wVJr4gClPjQ+7C5bSmVJQfAIAimlEZslWfeULRDHAJYtgqA+Q8iyVcEQc2Oe1jR8cmOfPklzrCwenz+iXPba9asrOMJGlXde+32mV0y4QXz8iUd/+Zd/yZv3/vW//tes6wj6SPZbUpfMyWaU+D9vMpp9AB2bTetZHL4nmPny2hJfazywb/Nm0ozTfd/81re+8OM//tLLL3/hJ77ou0JHjx2/cPlSKmCF0w9U6IN9vjk4Tzf2+5xZaqZvT8fQUl7CeunqJbdlV29efv/KzZOHTvoi9mNPnZbNmwZ9m2k6RtzbTtYZ4bKqUdIgGwY8829VgVgkZ1JIu2wLQlQ09InItoTm6Y0UbkkbmepBrK1Sq7LaIjjUpONwwj0mzC3hHKImzA3yvSAtv+FKK4+apH4Xvdh372VEpzx7XGo29Z3oFcHpCbWOh6a7tf90HtMcsr4JYokyYfgl982b51x1+m2HO2Krilpcvnr92VMPGUTff8mbSm5YSnK04vot70afK/u8gi8fSFGF8XzrcH5r1SD4nRh0OmE9yl2H8TQ3SBkObqiMGgPZB4chJi740I8+OO8D1P/idqbfQ75mYpgbjyprqt5MoPNcmXZySo0hFmmgB2IsYM4gnGkzJwM71HGsSMFPnsy0FV8HILLGSx6PZfhFCLEjWRFf+YFCtvMCYmVRrCYeb9Ukeka6VSLP+bNlUYrVDd4po4JSlOK877xTYm0pZ4szvbcrgxTP+BBej40q0hpUIQfMuaVLOSCm9UQKMPC2fnfyQtQYUvwAQgrAsWGGR3C+jwdPgLY7TujjZ/jrHoSJyrYueCATmchSWCstrba6tPgpIQWWtorIFhQRxA9apJq7WQwWFsy5MchMlWWmshBAqkjxKpHyBFDl+7ElEgfwZotgoA0dM4osPQ1F2VAUaVxBtkSZ73RZ8XSf5NSZByR/7s/9OV0UheDM41kpZSkBNKsRSrW1iE702kKBFG8VmqKDiivlQKtTKzRTKC0zNgAnsgwlO3TEiWoyW4ZOVUktaZaILGYz3dCpa6D7IB+jgMjFS+9deOOCB3J9PoQzVmjPNJI3jwBhQlwfUyWlLAV1ItidMFJ3kpJzNWDJueCJ7LvvOoejS7sgO3jtam6yKRO5qHZrpS2DZ+JNQYBjEKlaeKO8VdZsYMdJLcXPzbDAv/H6mcRN04yH8S6XgIe9Pi96/uNhhl5bocruF4AfxtTSo3bFU8eBRLsR/lBFi2ch1bOypHdxI+Se+pYn5V/Wd+mKVhaSS4f5rb1GZMKFqGv94w+funL12sHjXqfyvgn36vWbfhuuNGNgtsfgumNUJe3VfLK3obXe8TMi2+ygmfGm8242ALtiIRZsDBpNpIyLB9yQeBHGoTw2QmQFXZBlObRLwYzuKtf/HXq04WEam60XstGPREULiiy82SqtlnZCeA2vSqIAgphBccohlICdCierqLLoNbd1Mb2ElNKVQvDjBOhFSKmL//MyjdGf/DBUEGfpi8iECwJZiLkPv6hhk8VZi53+aGipYaxbYNAntEp9WwohAGdt1VxxIkWkoIYwtFVQanH/fPmXOVl6AIUtai1kR0H46RRbWUg9XEVLHKXaCKYhbmQfDFScIASg4IQs/QuZ8kQDM2K1tb0U1WJlK74EEeHNLs5mpaqGKA4UApT6IKVcWKQYjMAOQjdVPvXr4tEMOz/Lzy82KNFw3aNfLUUVWC5RvnAKm23EsJVS4sglQVz0IgwtnjbKCh3lc1+Vq9SR1RaWzDqQRgdbbWKYiMlOmrWK21TRqZqumYxDU7+l2qMs2QjPWvuBjY+BuscZiNGFRhsQUJQsDB8Fbl7Ppxr7zIMDguxqQFad7qFGVGfAs8kBRsMz/VCLjEvxBM3Oj9VLA128kFfY2SXIvGxcaIttE9xD/0cnqTKhph9d+g6JPUoSUj1hOg8Dm8r+cbbaSXZdqtpFv8PkTmaP9T3ZWpd25JJb/iy8y5WO5HmVTqUdL53N1tSaqTQHcc968XT+rH08wL71bZ0T1a13aXS9a0o30cY/o+CDBx20O+wHIdn0M5aZcH05aX4QVYq3tNTc2NlczaNwppQipTDasckcoARgUEpb/JzhoI6QTBP+lCrFWo3CvSJeCqXDPGN0OGuVOMBcq2zIMkyKSQgpYJZZ+lkpj7TEXUqLOqQXG/14qlZaur91csWibmDgGxxSYKXuEawe2V28DmBDrJLiWqLbUxiAWhCkvPpxIjKxpB48lOvlZqWAHlC2FslCpEppGK54RWeJpZd5cUIANiKg/hOpOKRBwKOonCiW8uqUkl36MRBcXskW+oEVeNWWR9qmLI9s9ZRtDy67xMsvXTwCyHkUpgFVrYvJzsrk9sJSZO6De/2Pc03qgm2342qR1rR6aMYgxYmyrNc0/aBFLZWiNMVs1MEL9QexDbEUolQEBQ6ygzyVgreFa87MXIRCfR6/GM+/hNqwQOkqpZUsUc6PmHSu37iq1roZ2abMqSMRFKpIwXm1KOhMo2zOeBC4E0gZhHfSeBoCE3zgnieFfn3l3ISb2tucarRHLnVMtZbChOvmDQ/e1MLxlnqlXYAni24T3Rzb0IzOaeKJw20LPzwWo3fCBDykIo3DnSw/VG7p2eW+ba52p91vE3dZt3jivHVmj8492UrorFvRO/6u6txB3Wb2+CALRNVo0k+MDm5AsOtOjz722FYub8/yTFGLe2DCRPun0prTRYLMlF8itW6xk254lGd8gXY/b1Bym7S7XLHOtPFLBJusboBCkFdUjfRm5pGtHldmpZOqIBFg6nJHYVTCMasjJVJS2DIkgEz8Gs9Q8eHGB2T1bIjQQCyl2PADgvDyM4CBIArt3WqTtebXajkxl39dSDKEUlX1pEZLRCk/0/TAC9XJO3T+VnwZEq8ypwJThUrxAT+LkF0RnhPZJRIvg+jPjuotKQpt2BCrn1pZodi4NRWBKy2D0oIeA6nO8svivHZzE7SqQiTICp5qaJyFtNkiqdV40rRqm1aPVBZwFQ+pZV0RVShlkDaLgtPvkJYGdFKFxr+UCpbuPgOC3iK40npVBtqAUkQpHEPTClaEn+6l+jY/DCbBdlnDT1YQiC/ZpbCGZCGAfsxSsGu9DCVKG0MMVYgCKZRYl9BbVIQUcHKPnPsq/OhjdGxvahcnS8/r1Wa/kMFSxFB1Ll58T0qV5cp9FYSIIghFhn3r23GBrucYLLLoOLGVuYak9wWO7lRNT+Wax2xdq4TUdfeXvvQlVwbeEqLKnNyo2hWMFNiq2o5HJAcBL9joHAZXGBCwnilasXzIPbvAMzrUbfT86SQMVRHkttt/Krq3milbVj5E8R7rS6SNtSu4KZoALvriL4W2pbBFTRdddjEIrM6Awpbucf3alYTalZSh/WCebBlBngdbsYI/kOeyOli6meuR7fsDRnN67G3N0w22VkKvfiMUon+yQrMJEyB2v4oDGQt5KhYPSVXQ+QRSgHWmW01ZpXUGBT+dNFBog7Pb3p7v6PY8x6zUA2ucebolU2F5ZYTprepm0TFQh3MWqc1Ki4i5wLYsToJwgkDWYEs1trM/hAcoFFUtqcIoNysZ25vwoeNZsGUMkZ6mXBiDVAbQhdLgWctD6ZjrUn8HVluYRYTD1YZzeQJpVa1kla1ynOXpcguntoJlkyJKEUuRXVIooKqa2gysQmlFpGA3Ww1tEapaEUhhZVuRirNCKo29nT1lW6Ot3OYvzkUhq/nj4pZIqvql1Y+5pU1z5HLqWyXYIjzRZg6C3upgg+tFiBUphQi6HSSd1SplMgUt4v9CqlMW0XSPmVpEVlplSsosC19WIC2CFJfiqasqBSdID8DZyzLjELG+VWF1lqcaUBS5xuVSVM9+2vKBZs9wjF79S2/RId1COXw/JyyCM4SBCWAYApWyOzeq4nNrKjJkpaXUaF2qrfL/kClxdkUYv7Psbq38jk2lDEraEDFEVYdeQxpqaEBEpOFBOZBf17kh5oxmNe44rNJmNDpVqqfblVbzKPhoycaZEdpVgn6Htx9Na7iJ7yqvgqV26ds1uoi7yFKykJYuwT2IsC0Kzl38foI0F5ZdWYJiXrpRA7lx/S2R9yRS43r1EWYt0k+aQfDj0VlXz0HJJgE+Sf/T/ukCSvIHaERWdEsapG3lUmRnrXIVlQVJVtO7HFKKDhBpcMa82lgflek8fMAAKeAsM7quiE1HMmQA2VGW82KjbZTi0/OSnzuPCpCkhQClEFnQQw0ocW4A7q8RpRQzulT1mjWzKK2GaqtnGCreUg6gA+LwFqlX4zcUlWRx002ZGHYWM6pJ1XOGMMsCDHygCgVAZBG3snmzg4goEhFFRMqGAdQxrwKP8OisLBMqi7KyBBuEiET9Rla2sJhlcYJonFmVRxHYipSIIRUYtS1Cb7ZVkKKXQidP4NG4nakRS9Ely4C/aqWlSOtJU1k8BZQFKPD2PxFjAicYa+8f9H7TrfOIxaUtreywxxakAa+fKDwH2NyFd0JX1OHRZpXtAMAM70rmckQpbRVvkVRYpABnnakDKJBSpMOSJLh6zNkHT55z3gSX6jpwmjeEpTpRNlt8stbmxlzHcXyvGlCMnS7ToUzV27VSBQfX5w7J8yFglVIF7jleYZZ5+OHHbcqrjupbrvhPFcAjUDSroOobLGRbJCUeQ/6fKnDsbjA87i4VA3X1rTqhZsLYhLgfEnAnx+5Wov6IE4RETbgbxmjOIaZ0DA/DrMAdfapw8dZFOKQ/QrCYMXS3J/ew9cORqqqmK7EoP5yC21y7SlCrZ4822Vb5ttiHYhW/m2XZ0ueWzkWEAFJNFyLbnrzrVdtcke6ha7mOcX+M4eyZdzocrmnVy1f0GXu8Xlgs/mtceJLhHw9wtvfPbVbQXH/4lweOaeWqmlMyfUVFXuxkEvD7Jr0F0GlA4IQAxyv8YopUcKerby9XmYEXVLNUJ+G/2i0NeFDcZaE0RCxiA60+hpzYVqzahoG8SrKkOJ7N/gONHSHVi0fpZsBMfFf1KshkjeHHpmIoi0in+rRIilMR65BW1VpOihWl1dwsnkq1iBKlQMh2KVzthEhhQ7asoOB06FNKvFGoXbWW7QTBHBE8EL6Nic3UDCeIiFMo6wMNKHUSQ8URASUr5Ri1AA8QFqmsM8T0TEmiUf5hSSIrxUDVynIVvyK2IrCd9Jcb1Vbr2hV/RXYZ6CRIw1JSE7UFB6M7tQCLkxu0SREx5GdtEy54kabaCwO8+ovjETduKC0Dc+NddhLgmgy9UiIGV2pKJZIhMhu2ONHb2bgBZBGZAPjh+OGKqGIURYoiW4psgdH6I8VGHIO0g5yGLeNmZdX0KJYrRiGYCZoE9BrZ2QCMe9i4rQpdriwMXatQ6OT8F7/4ReuEF0q5v6HEpONJkvmFXYKyXdioon0sZoRS63B61lQ1G9/IptofBVSZKlZ4YqXhAH+WAgo7IZbSOm5Kt02s4e3xCGsWrYNOG990I0WQZvtEcXJCrb3aoLKCu1HyH/en/iyvIPSt7EfVPZW9PU6XeNU2tsvEKt1FylNK3Sj/Ls8u3uG0OJcJXQ5b6RD0wurYq6hsUo2ot9jrM33pJ/qY1jz9wGl9QxO4DHImEAM2zVKpaNs2BP2I7NKsj7HNqi7hcQDQcHCzMeUup9y3STMAD2aNgJC1V0chPdUf2RnOzVKLp1akLW1WkWxlIWwRQUThS0sRWeFe+6pXeMTbaqwufqgkSZVcfY4BgKEgLvVGDdFprCXaqWIJoggbvEXYWCUuC+dTZRFlQd0irqh2ycouqNpdDTiZUBNC1CqiE8BdqNYHtTDmWZGVKqVHOlKbo3pMtF5Flt1yMvHgsewrMmee5ao+wYTY8XPxYCPeOvpVE2aArZRdnbs1glOIDUM1tIL1h9EyUMXncsJbazyUA0UoUkWrIhSCJc7VdtkVB/yYieBBlMKJcEYHVApHV2VsxAEHAItlrmxMbM8XkZUlG+IsEvWhmunBIK1peDVQCEcEpKRVAkHngFkeTz1RVG00qxF+WTohui4RWQ0kiwjw4wSlCFQromq6HGZKaO69moCDCirSeWqUuCxVmMdEhmitU6uUHa7qaT6orail3Og+hCGtCn1+QOHTz3zc+7OtUsY/TlYcTHAm0I9j/D5aijI6ktRzVgDn+ZZY+e3MjPbWThHkbiAe4p2l6RTz9hnLIVXq6+rb90HsQJ47917NoXuvkkDbA2KUfvSlpw0kviWzcOiBjGLmVDPZg/nun1rQo76qqVSRjh5/dqCeU7RDC4oO6j/knqWLDol7OxN98U3asjtVlLYEW1iFe9j3ZO9Us8nhqeyuBkTVX/y7DJqwahELu2yLggcIAigi3Uqkk2oa3VjqQuHXfu3XvvCFL3jue+3GdQPBlGVh8aOUL//Wb7uHwpZRtv+Adtmcf+lAm2bVoHPZsd8sSS1nDj/g24mZ61TBub8o3O4Exi6BvAU765k1K+5lwUozGVNeGgnnlQFiqauHeNoZpJGe6UV1ZDkmiwFCOVmCczcRrw13g2goRty81cL6VCYjmQycMGBeSil1zC8EQ/W2blJsGNArIksETgR0XqgqbEstdxBlCzhJtZKlVCE6tkJ9a4qiCOdKS2mWKmx1A89yFc5m/VfZKscJMcAqpVSWqxBOIgprNVRhHRCu6qGzDGqK//qVrG04KVEdzIVqaBGKLB5p3cBMD4C0InpMcWy1xQoTdNYNpfQgSinhP0rt0oOOKHVScRFx1gEUOMDDZ/zUqrJ0854rnm2vAKjFZmrDj7hSCEFv3pHeDS0lC1oKqed8KKBggxfBVqmFoHct4RgfDBsMcJXlbSuIB1CiiHurjy2XyiBbHmzcqFEpbSiQRqmqZIUFrtaCD5EF4nnhQq5XAEGg71ez0304UegBcN1ZkedD3l/++c9/3ilHqwKfldJvBueq1B1VFyoL3nK1znOsiJRFtVvZNgRVRBbxh0E0ICn1skZqfAcifOzKDRZZXul/k2bM0LzrwL2V5wRGRHJ7+X62/ffvy5VuW4c4EBMc2ubeGv70qPdweBz4SBZI3JP/o8Y5YZz+vLyq5t1GrKHSF/+yXotSRVJQhWXQkQgiWoTcl3s3ik8g+mku/S4RrDy+n2mYAP1Wcx87cjQapnbaYmkzKCkkNZ06fexwruWyAcjE+hUwitFHlcUtI2F7P8QHc5Ck/tQ3SlCkdTg846qKAN2vFhEXJxyRV9yAd4xUHIU5vUg2O2D+yKzBRiOcu9VLRXVhQzEn149WGHORxVwKEXTQaY4sXIpeF6fw9kgjXh+Il4GXldJe889kB0kI8IAqVJO6R2qZgCye4uWRClfdaFo6nsXQCaWqNNL1WzlgUyVElHKMckUVgXMegyFaKWwADvC0AUpsik6quGc/i3kkNglDK1CYqwrF9C1QtYUOmFC64i8LluzubFYl1UxQdhRsLk3gPHGxVAalDCHyH0UQ6jDldbi1brZF0maltVWdnAFKiUAUgSVSBOcqUlpBRW1fClH4Uz38EQHEdhJq4YiACBxU7TKEfxUprQk6PcGhE6IZEQfSwSBSzagdBr/pqVMDYl0RJx3AmEXUw70rFFGpldT/bQu+0fw3/sbfUAWtJoAUYrbCUajUvZRDxpYrF8jWLduGrU49r5OrFuPYJkFc2VXTcq70PnSVyj/RMM1J/RbbQyaDnZNkXZna3rXB17ZY2u5GOJA9yYEEyuO+Gx9c2XdlVsMMMdB2wak7lvLDp/fxfzONfLie3dB9OOd/ulJR1hETpZ1BEXMzyvxtT16uimHxVryCUnSw9EB07UkTfcyC7AbdXfJTTz5toTp56pTv454587aPbbqHVup7uWzNPdQMOjIzOuavJ7aZowyc6c96wQM+Ua+vAriNQEXE4RiA16PU83jVS8R5WZFLH0teKphLq/SlWsEMqQi3gaKmEHRQho4aOFBEfxlkyyO7GfbkkYwWZVIuQpZAszGVu6U8GzAm9XVZUkrXMENZ2mujXRYxwlOKH1j2EUvHQ2c5i5dOAxfhELKrDpibVYQIIMWLSOu8IpxcbZaejKyBckLrZx/149cwqkOkCuGAuCUcohSP6mvO6qkz3AZUHT44BzqnpiOakJaTIAYAqXUpqRoqUYpZygR6+fWSEjEzF0tzhVIcm1IUzAJbo6UgPnBgc3COzmqrWqnSNqIiOJGpqb+bQbJKqzkF41udZB3svpKVzgWKyoZSu80uDWwpkiqFtL4QgmMnPhBBUX31wqDPKDK3GpYcJosBVMNuSgq9tnbTMtOGSKFURKkq85JqabNNOcO6PkAWM2IfL1mlfJqBhx4bjFS28uxYeEeRL/Y+9VReHq+r2HEkYk0CWd68yeLi5e9973sugS0bDYWGFXBPmqsfT03LAtldaBHKLk85PzwVmLZhe7hdSpXyFM2CSpU+Jxr5psNs6VC1HWH31Vof8qaE7eGLVkdztCnbjveVv0/B/erF3H0kQt4NCA272Q+R2i36k9nd1XA3vgnReN7OhgdRiBaz7MJb2nRP5yyRk43tbOjkddsufZxLc+XhwZVLkO9+9zs6HlmDhYjGxbPHBHqGTF4Enm0bvbSpT7pDSnR3BcEpBbsOVxwF88yKm0UlOue9mY0kmcWJMqVpl2pbFDwLryBOgFNNOzngySzQYukCYxKr4nDMflEl1Z+8dE8XXMzoS0nFRQGlVjv9EVeEKCVYbR08ao6IuUAbhirEtjSX2Aq3GfBXZxHpYkanZ4mzXBOLooKANnRuUA5QMAjOgQey/KDLlq10zGDZXdHHXKgSKVlAlkuy0gqWzVsz6mqLimOAYCBYPfTXrkkTXWmXVaVAaduxWaUcAzi9PUyWQjgNlUWpwzW0Ugg/yxzZbVeDd5JVVHNFyimtOQjNGJounnqFBx3QVk44ZOFlk+LkXvmLN6uOshhk+UmP+FCCQsluu1QzZoBNWoYixDEARdtQba5aFr8iXWtZrAn98913z+KRtVBhqBLBAfb6Hnvsk6579Hnba4Y/ol2aK1d9Iu8cVdrOqnbhwkWI+yr7hFYv2ijhPA9Vyo2aLJyJegsHfF4UdFnEck753uQ+RbmOnqsUN4tzkv7WLTOdcx+/8Ru/ZeFkgqAuGXWaaTsJ7NU+pvmt4fWplOZ1uvHHV6/SHlvnhVG3o4rhu5X8KVIaxj0K7xOEPVz/SbKCsKt3eZJoAC3O4+mcLRKnIW9GBFzpSiG7MKKbBN0ocALzyuWrfnV+9OWj83u+uUiaO6Sb72eG18GicGbT6TdoZgHT2+bOqcuVhe2I27F53CV1T7V6WvuGE4C6kAJZppXKoXSmMnGWn60xIAzpDsVjcHuJCSeOTcWLV9uDR3I0D50UtUoNlhs3c0WFIXnc1SjfqNVYHSqxeinqVgZilxb8LcIMAYoKzTK2PEOBE6GH37I1V/76VwaVV1SoOH5I/ZTKqkwrX7aark5pi3AC5orwmWKloEaxFYxVrtauxmALmGLm7WkJS6WowlP/aUDEBmkK4RVb+KU010rZFk8dpiRqt02JkwieAh/qj2zVUoK/bDiXoTqmtymtzuopJwqkuJQ2XmFQCzglDNFg5m3d3QkQAXgAnurHWf3NtqhqS4mNAVl/MZdHuimYChZXWs0MoXCJdZwAHUXaLDrfzPuILUUx5XMbQhxn4ywLVszRQVVBFDEKUABVZLnKIKR0WUjp+Isg6gbNEqTf8576pohvPgXgo1w/8zM/46bK/FBvR4+fyV67fOUi/nPvvoeZQo8ZvvnNP7J14+0PSnVIqjDTDxHkmzP7cxg/c9KFxO8fGlr3u9gTEAptWGaBvZq37volqRtB85TrcUXckK7Q3aVhQ8AA4lsjauXKvGllin6RVVpbERh8I/nD/YnmjwjLbXIb36ay91RT9+4u+hPYvVtJHbgnfUPcjhFuAEZ1jhTtBqoRmOFTtqbVEJHpmfobOuKNOdTjdsrSNT+F2lxzw3Xg0b0Z+5qYSPub5UonBPpeESNLJwdVSxAnkNVR3YbhBC2tHimpstU9lELtKuJtVdEMkV3jDmfZFJnH1IjDHYkVodz8DM8rbnHXEiQxGL36cVXIFq/2buXjrFU8GGovkg3xEOFMGpN4ABOrkuiyDSJXQOtT8aWkajETF8rlXucRxOpZ/DEzUNP4wZa2mQFVscQlBaGQn+gqBZfWJR5mPpr5XYpTlkKlzS4liJyRtZnTmlatlFptIIUDbOUkgmi5gKA3VUQc3gaD4wFlkDWt4EGpA22UlVVEFqDAlfILvvRAtALZtmmZd1NStbVLhKtC22tPqVAQKShiqyBQDDFHFp0n9QGOeSnhBn48kDLULiInpeiYgVIUslQBOM7y1ASeIlUIL4JHEZw/9bNtTSHKuJZLEEU1R3nsjUh7Hf5O7ojqhMEvXdxJOQqoyHWf9/A6/Udh9bg2xXn+/GVXu3/4td/3OSk3UjgfzEcPch/so2tWXObUsjWCcV+RxY/ngIamkMKQk2C+J76IECK72YUzl5Kx1Us0H5hwrsw+EicbWDFOeKfv3UfNVj9HclPV/jZLF0EyY56fYrLH+eXJhyP38//DpZQS3I3PH8v/n5vBiGhMeTnt+OHxKc/wbpLVIo1tO7NpwS+m2v1yuvtQZtet1OYhYtp03+1dGSMXODJoaENok1rtnAwkaGiIDKR0qazOg06PrCIUiCyi51xKgepsSueDGShjN7Llb8tKS6keWQiK6pQuLUIbD+vPZjOwVqkjBiA6LiJYxAqrUoOCgQHMeErEuQuKiHjMIIVjXn6TRen0h8gbSmRp5hbOOkdbTWBGkRLEI1UU52bGkQU1UX4WIcVxymIoAicFx1/KUsgNlDZz3ZC9eiPLlSwpnACiUU3H1cAKirQ6TQHmOAFhsTyyNC8HSIGKkDp4+I7pvi5JNVvtUoWZSCs10klqEWcd4DYGWcwEW2VEH8OJFV8gU+N8bUmJ74jgOgBPfWz9vP/BjfdvZb48mGPrS3N9brZX4thl6xIr8AK7gFdSFCnZ2Lhrtor56Q9t4opjozMqprEwQGhDZ1H0umzwjVoiKPyJ9m1ToldtGdB3HaOtvPqRWzUPoSinhFE/jnJvQ0p0cM0Pqlh3FekuU4fUdXU5ddlnFnANanHaf+CDFz7zuU88+zF9W3PbTGtD1weGhMEd1e8POICnFXyI5+iRbC9zW6k9Ft5OH9tcNySY02prMzCx2IHVDVrlppSIzw7XH4vqG5tHSi5h9TEuSX2f5dFHHxFkCkdzglwT99OIc3j0JLE1Zo3NXNdaudIN2oKoOoOsf/+JNwNjYjrecph7PIkz9wKl9yKn192T/lGJ99OjAXaL6iTlWj/pjle7bLtFFdE6+s90APci+cpzPvi378DFSxfcgehyRHQq9MtXL2Fuy0J0JKBIcwMfiOpAoBbxwcObQzcOWXAAccO2sbU5jpvuOsAH+zEUku7wrNsKdYQM9BnsrANFHEjRzkIDb7ap7XHaqMWpO9UH2ap1d3UElSVb6xQpAFj5o86KZLHq1qnMTOj1FaW6pNgaICZHNtMhOsBW/1AAvMB7ekpZ9VREjxR9lU5NN7WlUFGiPOGbaUKUVW/zrvQJSoYcHVrfpgfOuXT2iP7BCxcuERXZ8aErqCXdM6psIl25cok4fpW4fPmiuoTNMSqz281Un3XrHjFfjqZiDY95zJyK57nYHIswBcg2XCLgOQGfx7cajQOy4uYxAYuyKk4/keL6mSyjQFFnZ6WkpnNEHJBtf3K1jnMqnt5AiayUiUxRTMwXA/Qe65OfmvqMhLM+1gGsLoPydD2XxQZrzvTXkzrDf4acLGCrHZ3acM7AhitlpQ3dLAd6maIIGyJKcXWpEvzTfDkjTjO8znO7nJtaHDjkk1RXbubnAVWFWX00AKO6XWvtfJN/2Sc/tH+WhJyAtx6xojoYR1btnbzPSQdKVELHv3blkjgdyCfTDI/3/fxX3dD1oQvved/uVUPbSfSnn7Th93HfyHDsex5Cqce1i+fP0X/i6BG+nb902SaM2yng4QFcxBxuF6RxPN2DKt1yxk0PenA3A0RNp+7uefy3iS0ngaLCijnOFrW0WTyLudFuFt5slSDOAuxXM1km1QHo/1/72h9+/OMfszy7ttZXvfjdjeO0G2fiXmAasSg9Ah08taPAKTibgEZd5orUaphrup4k9PeCW/Mz/3uV5JryHsAj1J3IhIe5me4br92o5RPv94KGOSVVtXV489zuXiL3prV1d+Jftt2w7xHcUyQLRBFbY7XLPx1jM79vGcJJRHtJ9Vtjy3sszAOGku9Tp0Hy2D1H+DSfi1TXY9PhMyfkV8D52J+J0FqVXzQa5gSr01zqH8Fjx07UNBMQcn7MRcnVa7l812m5SpSUKzaD1xrJTOaSnN0gEp9xuqoxNI1MX3iXpQ2Q1WccMkRxN0etQcptnL6JlTGbgROgxPTse4J44IcYIy8DRvvmCbPKK65qKY9lhYAKWTgovbKtwJjI6qUUzgOBKP9u5VHKWQ1lRsHDhGxc73DZ2qr+0jGUB9FIbxRqDoMiakuEcw8RQMaZWVdm94k4iwBb/dmT0qM5qxCOH+CRamP0BfR3TiylausDXGnrpVSWhsVGqg5wD1HK0HJDKVlZqpTKYpAtsUoQ+bP0EweKqsShNPpRIrINSymI/MgC3FD3tuyWz7jl9CoN+hBgWmV1x5prBODo9LC7nKzdstWf+oCiaJXyBKUpBE91opQnrk6IZJkAEJSKFDdC2K2tFlWcgJWS/3RawrvM68waWZwq4vbITcyNm9duzb0EQaNASgRDe2zHvydMzz33nI0yCp2esAHomqZV5pW+QaTD9Q/+4A++9rWveVcsx8Zc9gxxHsoPJ29fRmh/d55kG0+et46CWetqpHTVCwNoVtq6r2wpu9lFaYdZWchGj2uT0ceyxu1JRaZ/8id/8p/8k3/ivlNbd0G9fVB9j4E92fltibWq5HooXVwbn3coq2iQ25y79F0Nu/Tby+cu9b8cfn8/b7faHd7ttOYd9PtnNNzdVjY9WcHmGIVRnB5nsWlP1ot0Wt1Alm78soi6N9BFNbTU5a5/9o/owYDYnlP9BGtdEe3Sa9fzNnBEbGM8zUehgYahdClgCwM2qgwKODYaKgWpn4YPHvz00HDV488BRBqqhEiJefG7QQVoXBx1rqwU1VKCMW8uL12WH40FXYjSWi1SQWzElZazxLsp1VDNSgHOJcuxZQgdyAJImWu9uLR66Kxa4kXUi9aWcn40JawriHS2tMpJFUFscMoJbwOgg5jZgeUG/SWjQOoYZNkd2c1cvFsXVtjFL+UntnaR+owI6Gm9RIksnpqr9eUVVbUoBUpbkTLXqLQKMVir2hlowMkuo7qUfoIN4GlakXg/OqsWXgZKaACYcUoVyUrh9Xkxy+Ipw+Kvt1og76qrnjb7jdRdvTxdgdgDAfNqIgpSOxpMxNyWtSpUoW88MaHUgLRWYWBUNp+jn3f6UUJnhvJs5GLwROpnf/Zn7QGqvuoAiGXMRp8NQEfA8aC4kYL3Y5IUehMgr3QPCq1n/YALnDOr1thk+VNKPeQGE22v8tOzooG/2V1EqSyAKAXwIqnaDiyGaQrMaQvm3EsBN4K+JIIHMpwKxTYO7+i4A12G7qCOw8uf8lB4P+aR3Xi+Rw9Nd1H+90z40BBtKr4byT3t275EickAN0QLlocUMJCB0S3Vx0CXK5t+aewBPV8pWdooqeBSBUEkiE1RGXAChhDpQJfKYi6wYpjUJXSAR7/CVltlQ6RHukts0VjYXL9mudJfjS59l4papZRGKTNGkVSRFBthWuBUF2obHbEGdhHiZCtSh1pKdomgVG3F2SXS0l2eKqm2pZPDxGmTCmVdqkI430pZ9ApShVnKFg3SmsaGgTjBmp7Nk0iXYaVlW6YhLaoU/qUhwvd6gSEiJdmOm8DWVUpWjXiIQbtgaFsQKVutoGNYIFuoJ7G65S/eUjjx1azE8cuW4uufLAJu6Gotle1tSpsGfxVyxoew8dBcoxBFcAi6KkAaCrKg/uOpCB44oiwofRfn6iKWk5/4uyowRGf5m049cjWH2SaH3o9S59WCYHzbnye66NzzOzk+0Pb44497LmVxAuPpQbdWRof7D5yUO9T3rW9966233nCCzm97ewadrFKapWxZHekvjsiKLB5Ag7rIAvrbgnB0rhLZDUI5WyPpCIUTNBookIqXfjdezpYuQcRxpyozMXHS7qWF2QfGvPl2YiUwPkiRkbtHydJ2P+Seju1xdVf2fvr10V222/hE7Hb2v1bsfvUy2P4ELq8ALmR1+2VIU+pCBrei9rd2eF2LRQuViyepcT0dYHPvZYxvusJ0VNoAPZSUDtEZ1iRpvOjnGEArQhueDrRaVFQiQTiFVKVLzZTLT0CkiBQDfhp46FkaZprjx3bWhWBDPGTDhEaKcJNB7dJFUhZfJaW0sOFyEjMEBUDixaFsKuIH2CoFAYrwoGOu68OVrZJRELewFZdyoEpIKSogLjqeUZxEKeY11GuIZqC0UniqdiEtlcUPIKqMH73K62Fxyxk6HkQpnVIUk1qRUjCXv4hUKVhZ+ouXLi2lyilpabPF27HUTqm03mIgCEeESzWcIHdmJKiUckUt1TwlNqpKidQxDHUDcYHra5y6Ncqa0ztrVwqdwoVTUlmUBfUfnapd5nqCrUhVlWExL56WCsIq2tXGN3oUtQVbF7hHU1ILvZGjLiD3ZwNRuP99v4JyfSYItvh858LPqL0B0oOpT3/60xBqdQYAN3HL4rQ4feMb3/DCUNrOn3+3tyN4lmNFWAT4AYqUV373tirLC0RZHjbVdogaF3S4QspWJfRgrjYiC/AUV7RLXNnFUGSlYgjcX9JfE3xWQVXzmqhXXnn10sXL+x9Ib6eW6aVnWflhkEotZ4js4rsa+LCb/d89fr943i8+KyAEd3l0rRahtyidLZDx7o/AalndyeRgMteUKZt5Hl6QxUytoipvl4AroraqMMt2xbJFXOXlgQP6jRQ8Fj8WrQVMk+0MhoFa2jpId9VWJxGjibghcOnCRfyUE2EXZfFQeMjY2zVMF258iHApHEJjcUpb1HpK6wQ2ggBzQRHAzHDxpnW9RVIieBaD0sKi0MYEqEiVEwGrqKWyiBXvRCA7Tm1MwMWlbCslK7iKCOInqI5Ky4Beu1UFL91cBiHSrOo3ArThrIgU1IEGUBZ/RYpfuXwZA1l2S6GBWkQi6LTJtjmrSooT4KRKiq0UzGAsxLc4M+EtvYLVzxwGJjCjwynRMz44tIn2ElGkL+qItSVbEVEi4ll7OZfReiKLMw5s/YEDpSgrhVSwNa1sS9GX5gq2lLeKNFkZWEGXVq16KcIvqzqQFhWxXGFwL9UHUU7EfeHzP65qOBezLDDkLNJs+Y3UV7/61d/5nd9xYWcNO3fu3drFD6mrbNGPmSC6vmFYKXKzZTOwDuARMURSKouIH1JKs2SxwWnDBsoj7fjHXH4pTgxD2CQoxapkt2jRTTtweylM0KkD6FreAuWW0XL1G7/xmz090Y5B/9K5R9se07ulS6TIhyjZldqD67d7KM1+iN178v+XIq4g7HHgT+b/rrZqaFSlhRVqHUf/AexqRKlO5dq6FMy6nF5qFOiN+pWLYTztZpRACqTQy4/Z9avOjHj5St4d2t5OFeaySY0OD3dRdCcb5noXho997GPGgq0I3awDEyc6DdwwxDJGbtxwgNYk03WOBnY5jw0zPznfCm7uk+RxY0LFZLWkor7ibp9WxF3aMXc41VE4CtutJyl0FCkRSClw0CIUaqWlVE/Z6iKdLWq2nJHfKuRScaGGtEq1xQ3ZsG5hVz/N9bPM9XN5TqLM6NuirAqIiqjlT50R2XKiE4cX4DWLDSCikKrC6iyDItmySSsIUVrBBhadXfUFtU6wGpqlRFeAl960WbJYUSisaQi1YOlcpYrwHzl8xIW2CReDbiDVEwAcJ4b6xgE40PZ1Br1FZWvVpEpRAKPNlh+lInVsWJJUyUrtqGEAKMwtKTu4iCM1xbPcipwLQcOjDs9JPHK67QMXLp73ZbETJx7yeqQf+7Ef8Vp0g1A1HziQPU+dX60pUl8pnJgZ3NEJa9WLL74owiKg0bWPVp00yNQlcXA8RSmPzBfW/SNHMkzi7bxaqc7Xf/TGBLIq1Xq1aLEpLWcR6dQ3Rsvf7Ep3+ctQzqZlg9cuBL9OpV3UDtgMFKhTD59UBUS4MLYjLRML2dW5iEWWG83W0B6elf0QPYvnv2bkfv6vxvohnb8f/67+3cCiFxBLl9VztN1cimRZmpnDHY/nVfnyp76tq+uZ1h5ZzARJ4FQEUHahnpdCueyuRVlFKLqQ4UPJOBK7RoplojNkeaqkPNWjgxl9OphHv3BKqhAPQKFNikF3Vco9DJs7R1ZRFYOpQy7ta2P5hKcydYswvYhEMAPZFqF04JWnwwOOGSweSClSPGUzSNDh+Gu6IlUlbam0OIRXraGoEZTVKvyJxu16UFxKG2ZW4HiYaC3QW3fZZQWilZkoYCiguzxHrNoitEFopmr5hgIH6GTLgwEoApqWEohSxPrDB5ReQLR2ZZDSMNKpRZujagmyQpUUgxQFaAlSNQEBNBDRV+jHtpToGYp8chqxFx/tKxhUVudb7lFSH6qNFSIrFIrQSyxSSnmqRLpEIACntPQhhCJL7YqbKlNbnsazRssAJ6heBEWV2xcuvOeWyKWlu6gf/bHPudADxipnldJmUF2/ksexaue4BOXoLgytUp5UAbtkFGIQClbKUEP1ZDlMA4WgztCDQTy9URk/wAkwtHYYUjp0nOjDlYT/YAWBQnjFF7HIbrYiUpylF6mgNEqnm0kxCCC7lLMo+93vflfqwIj40IF++IHNNWu17UmXlT30+2Vr9O7S++nR6nczh7Kt3b1L/6uh3q9e9/P/fvFphZTuUbiHfzHYUXPl4Wkqfk0MdC24rW+pXmdodHQQ0QM1d+nYyikLFBXRPUwU+r8eTtxeIJdkyy+th4waLB6CtueTRZfaQlekO/FEEUDX8drr0KnCtpZPZ+5lMQDIcqn88Z7f7bVchAMapQQgjEkNbBQybuuqheEyKAX1o0rZYB4buvlCtqWsVAk6uzVHiVI8ssRlVQCCgpkIZqpA6RBF2JTCSQE8slJF0kXE1t1OwWpFsNFTWeKQ4lVCtlVQRBaulKwUQ71iAsJnFPTyV3AxMKF1lc7kmBiqCIXYECmHgGiYz5/XXFsU0RSJGXGuhnJamkL8KKw3jJiLy4Iy4KG/JqrzxMmTiuoAthKltcVheLPYwANHNpsAcNpUs5z8aZdCbzUVEfRkVG8GmHEC1jGUGbG+oaA3AlKArqZSeoBAaSlBo1OtUfgsDmoJJ4uZNoBBxXFSwi6cUXQm8M+vgK/ZZLAYWWX9TOpHfvSzxqpzE2N2HytUUcI0ilWKVI+qe0Bl1nYvhcfY00UbYcp7AcToiROniOtEUyPOpE2Bgyl0IuqbcJoFSv/twj8iufCsQlk+cwCoGvHyy4qbUkCbiksxoEPIwnmrVFFLWzQubNbFhhp/gQgoQyJwMMF0bnnIeWrrh2IGtYP4aqcVKDciuUcPEyRwwmmgsHoWpdndFD+2Kq9I3dijQREeqZ81VqSUEmNoSmuxxC0l8939YMO5dZW4qx1+l7+eLFnZ8O+UKtpEajH9cYhufV+W0by3CvfjX3RS42icASK/zZZSc+I5vWnz1Cac00xulcXTlCrBqR1N3TMcNmfKsGl0acXLptHbkZhCkRpZZPVeHRInc/oz4tFjxzFXHKX802cyGLGRAuh4OIAClueUo+PHQ207GGYMUuD3JXo4HoMX8wyi/BoVA0jf5Sh5SusoanGsEO6iwKW1ASkQrE8MY8NcUAphW2nnaxQ4ChNVIlvXIZRXFcpWR8TLUFvYilSVVCnYtuYmOtiqQRGvKCmgbyGtiKfaEFc7lbL4m1VKVWWXIPG2jVJEIniaNlCIta49ihDBUJ0onWtIeZ0EIj0AT1NKzJWyOpZSiAaC4Ael4ORV1dImqsvJstUcP6uTFAqoBtnFXyINioZl03vqDx9YR68bsqVTi+jHx1KyiKVTwp+KVAp92arzwxtis5jtd+tC+qgiJnRZRdRaiCsuaPW8UkLESvW0/+jTguarp94q65ODbqQweEx1+tGHuWFGZsW9MjbZJYL+67/+69YnN1Vf//rXOeBWzDVWByofSLHCeuNTi/WBngK2Ph7ATLPSsknzC+xtzOs/Iop2wQbIFtpPNhp3BrBSRClmspA2E6TZUpRWtlaqbUnVqGyhqkw74qzTCYJFSxFPJ85zyby9LtyVZYIsKEIEPoKbPrMokAX1v+kusbKLUqRsrcWihHNM71GyK7u8usPcdtDhxFCeJSW78CIisIfS7N2cG7Yd/XcIbjXvFbwf/x3CezN7lWyvy41BrDsx4fxmTtCOFir9Wc/s0DAccFIltqA9xKBrD1ekh5eIh0g1t/XxG1woflZCDwbQ68v6Sg9nqgqCH71WpOWvt8XZqtv0o5QZpSI12qxSxMpuliuZuqsAa7MdUSpAl0GoiMccIo8HUjYMKgCvYG03rT1pq1QivBqaJVWonsWDjWBlpegcAEsKXt+UwstDVT0pRQp2LVYhNkgBXuV1Ej968QpKy1kiZiKdleCrqIh0bG78JFKXVl+p6VqJeL7mFeC2tCakwgtQumIpbWXbV5ZaDI3bzOy4NpfhiIrYqjiviFR/PSy+iPU5UtvfbSjCSQM34NU/TsWrFqF7lc+qL+sYcLY/VENNw5uV0gnqQOMwheljxKuto0i2fQ0DnVqSCVyjRLjozv+KnCH3Hj8Pe53xs974XG82+m5kzTMXG1ekOcauCdrdJoozFK+++tofff2PvBxdET3GtlGKX9BQ6pWUqwQ5BudAq49fuKQApZeEHG4TtF6NQytbtqpC6YAqGyLNmPE0AjSjtLSmq2SxlQGRq9hANVdKtgyyeACkVuZvzNUf4l6IiIFdIi6u0EciXzOqFMHKrirIYh7ypscqKpCFVL90IXuI6MQjMo5sBxxt+TfMTUdFmMtvcKYV7oZ6uOg1JysUi7jLg6EOlFj+IS72HwrZ1b8r0JjvUoJv67mXrqQzm3oOz8ZVIdrxf1cKQ2G0cjzV0U/zepe8o8YvZI6c8PWrkw8ZCLrWA/kt8KZxMec9NwfT5xFXl4a323OeSLuBlF5FQGWNcEUQxBme82R47sMwtDdC4tDOTBjXppPTRrbiJVa/WpTutycVlLa+2PAU37z9oloWtay1qjLVJUuyWZwQgBOdpblMS0+tFIb6h0dph2INqySFOGuu4hWEI+LHsOhlJltKTVc/IoMYBsk6UVxaPegcKBEOwaMIsjzfVYhYVRjKg1KQbZG0GhZDWLeAGVpbW1psiYBsnalFWUg567lUHSuFvoC4IoBSvIJlwN9SypdjimSFmgE6l1e7DNWmqEAPpB7WB9kaxYlCFtRWuyzijcu5y8FZK3UJG0S66FUuC+EVKxioapsiQnShdndXRdthY7lKMJXisVuFgZKuLr3BtTK98MILvvztV1MEcSrlEikvj2bCysTi0aPH7fvB3ca9/PIPbPr1ja6X3ruMHwNtBjbHWLHyOaq0/G8FG8/Osa0+5bxqXUhxjFHQSrXUBzXwKCrAm8VTkG248APapMMVKYgsRFrlTNe6LOYue+WsCZw0KyUrrfLKRpv34WisA2XYLGNKXYMPT6ozjeNTAYmeTPXUBPzuLEpNN6Vnl3mVFpHehv3eVxneu/nLs4euQrdl78RwAqYXWRYuFtLSd9Nh3zvYw7nk/+OQPc5U2T2Jd9jhsyqM50F2qrNbNSLafsM2tdu6rXm9ki6//+vxV4grMP2BSKusTQuyiG3fFjEBlOIvfXFi0JGGGN4qlGKg38VlU+OoqloppbJ0VlZa2aVftpwrpQFeuwThUnZrMXdFSMTowqoMjtsYNvhlTRCKZKUuPA0VDOVZ6hQVp5TfxKmiGZsiRFDD1S/FgwGSwTdTOZwUOs4FiMXpUUThrioanMvHA8GgFLMsgEvrGAQdZZRvRr7sAvSKLLstQixU7W4p+hIvXRbSiiutwyiUAx7CESEVlI3bedtZWh3sGoULi7SlBPGD1ggdvywNtWXaKiiqbzg5s+mkd3ULpbVYPUQKjEJqhX7aQAxPYDEXFlHHKIUgBBuXFox0XK1CdalXOhVOIq0d3FoF1+XIWjmAUsqNBD916mJAFlEHk6J4LPVjP/ZjPn7hpopCbtPjAz3WpLfP5IgEu3rsd7/7bTdSssC7LSxXvLKbzxBOuChRCOiPhgcecDJQKgsogSvFz0NPnYkgwiEY2uI2vRGJo/BEEf4iOIu0VBaiXkuD0ljaCtJTVdgUFTBTiKKoGmRZ4diwJ8J1TApaih+U0jRvVtTTDsRzmhFpAxAUoxbRXRaK5QpRp0JZSiCyOKN3AM8WvU3ZJS5xxbv0cMsH9tKRSKXghwaVWoYWQpqz1VFi013KsqCodhdlF7mfM7sK9/BXWxngAMP9+BVhSOmdtRbrqEWfVkhpGcpZHI92zGjypOrEsaPHjQhbBUaTbrZ6Wrr4jG6GhKvOsKit0WNlem+z6GVLbxkrGh149tnsKkU0+qoZM6gedEq6iLTrVoR+oF+VGRFbmaVTv02vrh6lIFOZGcPgr4A8FQRQgUqyJIs+WhJrDOj44cYwe+XHU6IUUYq+hhPmKikzBu6u6UApfg7hx0C/FMgiQqQYaMAJqp+GFslWUBE2aYS37ypclApiVrSHs1YohICK4IFLKS9/BbHJAtl60iypgnoRXM6LGFxRxWNg1DatHjor2yx+la1ydMQyo1OCUuUYQNRNWy56+VuqD9lfqxQGRKVSOBMES4HgiSJ71g9sXh4Px4wTVGdxIorqA0rbURbe4FDVjtQmK73mWoRZttpKL0/Mj0uykGHLT4L0QV2xl0HORDiDLv3c5z6X2s3DXqsLPbIccJzPFzE8hfIsigiGN9983a0Sty2BvTCwbjk+y4QrUP1MmxME6FI9sP5zoPpVH736tWfDQhyy4hb5aUdSioBaMPq+H14NJ+byE4HQ1mrChz1NACcCIKD0ilNeilIUVvDT0OmgPHQWwVn+jQ/TuIiyHpVGvzc3B9A2o6Zutwp2Q3B6szBYClEAfhRI6cvV0lu0dEL2QDXcJkYl2Piw6KWm4C7AexcthDJLl8Nla1The+iLUvqytZCKr3S3pou4lOxSireZFp3awqJ8CIKzpXzTLbief4UGSzWnd9WK2d0wdMHkzPrp048dP3ZCx9YxalGRjkFax9a+0TktmNade4l2bAylyxJpdnjT3ABdajktkSqUOkCPwaULYWjHMx4RZdd8XuX4Eatc2mw7c0WOHz2GCMoDKb+xiRLP6CWQzACkHJ1Kah4RKK+vzY7azYUzEUoKSsnCyULKvGrerJQIQDcpSFHq5eLnGFCETRGcthpdFKXDldCAXVnWS5FiK1RD6QTLT+eu7FIyImlFgHlpo7lZFFAHSMEbz1akdKmi5Uxt4azaprXYNBqnvooaz4rQI9tU0eIppfqldQMDKdBuLosuSwrSrLS+LURpLUoRR8Fm6sSJgkGqtGFUWe8wcm0lW0EMcA26ukqVE68G3RpPNUsXXbeuBvxqASC8tjL5+aDh51dBdvz8asqjqeonwoEqYZQbNNsb7Dc7nB0gz5zf7Fq3IOWsA5T4aD3rjNonlBoPsvSQ4jwH8EMQmZMyJxslt/bdnPcWtr283dUdslcWmRM4QBUN2EKfaEtRFpSIoRRK2KogK5BCm6kUnBAURRWEcAaQbcBRGjScfMBWPdXQNBpm2Jn/hqHXXnFEdiAUNaBh5skU1a50AWKVN0UP3xYWcQ+/rKJFDLLRspeOzJnFTzGKNCJ5NnMPqObyFN8wdQDcObcoWgp3RUL80OXwHobvQ+I/zW21mkOhH9xHYjMq9/BQUlhKppmiXHPr//qA1Fo1+35HfT3gyIN531I7A6npJpm3IfUKESLlnvGLs76hgPLgVypFqa16DocQqdpawQlW1RYPVej6EqCHCP6lJ8ZWF9iGpaX4CdJDAxEAx5zjwkajHp/MVp0ymy31oN5joAgD5kpWC2J9akpJbcgqEkd6cFZzSyveeQFetyB48FchThqksuVXtOrQUnQwPyOduO8EHb01qhuUsAIIWvVNK1xClAWtCOWjb9M8xaWappxS2gB+dPxwRDjNFZcVTzoVtTpjIVk8ZcZZYpXMx1c3Q6om6g9xnGpRfhoKZGuRQhQipUAWjgGgY+gy0Loj4oE3iwGC2Gy1mc1LrLkS8bS9ZGtOyk/KdXc4JwGe6pcqrT/SQTZdn3JFagcIehkQQcTL164bct5qdvnaVQfzGHLA7/Tpx70nyWc7vCHJAqMv1S6FpHRFQE9tlfK//ca/8eMPpyds9OkzqmMxNZJVvaaZ4Kdej99ZQp8OobO1o4ddbLJrLYRbycow8dPit7ccGh+ClMAbTJrhKI4r+CRjb2WoVYpY4Hmz6kIcToSgUpSqKq4IZdfD0lXcrEStrBQPwIxTtg4sDZDq2dJjYvr2pvcSUXuezCq1OQbc8VvNw5/OhhNwFR0RLCIT8IJSyN2URYeotASX4FE8KbWCyX/doxRVE5/QpwoxdzdUbem7Rts06GVYbGM93pZ5ld7ycaA/DVgKq0y20HDdbYEby5NdnvaNpUQRilR8tI7rsC5UerjOgJIfUXg0OV2R1HBmltBeWg3CSoRn1IzJ9B+gCF3axi2zbM1hwIx48FAuEBEB4nZcZPYuT1VVG2KlIjhuS2tCUQGlCNfjT0ZJZtcOEMR6Zfc+mv+f/8P/K5POXKMZ20jqD4x2ArzBDdABxNip4WjeWQBxUo3Sq1casLFq+hbTmqijKMa/FLHjjSx+WbKu15USJKUUYpJCYY5+PFUiO37lQTpZFW71IDykvDVSQVLweivmc9wpbQMo5zARajFwo0rwI2JQZWltVQmRZhVBEpQBOFWAOW5j5gNi+xA6P6XEq4Ha+i9FUVSjlNUNLiHKlq1ulI1aPEARc4po4LC05thiXSnxIq0OHDAktqB2saHgVHfQvi7LFkMtZQKglAjHIMXATzrRKbF00UmJ1NuIom2GDUQjXLlyLQ06oyMrjJnxhl8lp8txpk1M57ETxx9/6kl3UU89/fSJk8c++YlnqRzL6VrY6Bz3IyIIRNTX+vRHf/RHvtv78ksvtcX5wDHptG96xaVLF/ArbawUUXXtRn7qxAfMqqMIzuFWTVo6qfLUE0bj/9x4oZPiAyKQJQWUSlkBEDwAQqGKIK7YIgKyKMSrgavcoGS05iNefOAqHsTqVEQbTyCIiuqnUibQqaKHcrgUPuo3HqJYSskiVidBd4r8RIQDFrFB6kl1VtViq04M9GOAYKCBP3WMODpoBTEAUrL8P+DV+p6T2QjwFZL5bpY0P5fy2ax9Piw6ZxSb1tJ26NXKst6KNLus41nmEAt4IIo4JpXlSaFNXwaUitf/MiiiEL3Z+L9ttaW8pYuz2pRCfEhM3JZIEfz1BwNKOYs/cCgPa3Q3/5dNiwDLknnPgGoWIrxE2tyU6AkNeLsNZkW8VRelfAAYzp97D2JAwTEA/LThATVKpM7guXEru+74pUoViVh1cg8DcXSG2ntl0Ym3SCorgHgQV1E0z++63Oy1vRThWW44CoR/c2VaXbzEispp3qR4e+laFShtWYqADEcLNMgiMgAHEFLcRec9XCm6LCLNUsBLQAk6EabrgyyE04jY8ENoKBG9oK97X6dhiI6BfjWoG8O86ZHViXIzl2mbFXcYstXDlgZghYalv2yscIn/qdLEejmGAWwd2UxPpgka0KUUQvDjCesWSqkhaSuoEJtsKa1sBeFEAB7OIAoXNhRpBUsvpaUNGuYCJe1keOCIdYcG/CgijMETC/TqkS5Qr/LjBMRJAaNFtkWy3CBCFQYI5Shqhg1+/ea1U6cevnLNMpVfLh+YG1rCxNTcXt8Ln/3sMx//mPuqY6dOup46dOTQu2+9RT9VVUiPVqDf8yqLk9/M+z2v3T9HKlA0pcvM8hPBKZ2sddTqrgtxyWoUz/nDQSsA5wcPP81wtoyF4vwP9/Dz2bBvZ6YWA1kMRMjiafVrt9WvIAYAry2yeECz3FaK2KyUrHQ83yBiogksWuiCoKXwVGH5+UyhRkdfLsHBYlNKCQpmUuPA5rLs+q3rrBq0LW206wPfisTjGQIU1lvMtS4F9UERRIq5RBoaGfygyotET3Tk65qh5HLSuqXumyBwytl1/s7nRdO1lgZ6IjjAhCyQW0TZOs+Tsq20PHUVEVIov9LKIq5aVLYm4BDQ0mWx9HIitoeUWA81AZ0osqXL4mzDQXbNJev2P90yvZQ5qRsqnVMfoNwhwAfMo4cOu8rcP9/ToQebojWhVaf4azUWW80aldWfMZRYncWJ8xDQhiKFR8OBLFEdAgSHJTwQfrJOCYalUxFvM7HMjYd7DzyydLJer6pZirne8h/UrtGBzY4L/pDwISlmTxbOG1Zbt4YPDyiF0urdNQnnUOlM4pRlZlWpOF9xYqMfojS6tiBr86a1lTYoBJWbhjhQExWUgus3Ej4FeFRBqruyvqBuy1I+FUzHhYNdhC3aSqew4igUygJFDQV8rGR2K39CM7EmpV6II5EhTS0QUsRKQWq3JtRuxaQNg07zsrIEidcKCljeVo9SUqClQtE+TT8HCCpiCNAs626gHUK2TaMPtQ/UCv00w1e2mulpWJr1iAhy6IHZGJzp3lUdVY48CIUf7fbmmDmC6E6Tm5H2z4s1KTp29MjR+Xj2z//8z3vs9OTTT+nvptRL16+efedtv6E+NcOJk/RYk6xMDl/ou96TJAV1ng+85RinOYy/wWztREApHj7gF6s6r3RVs8RWDY6NCKRqCYJSFIGlhDn6BRaRwmWozBTWVr2Co1BFvKaXHllASfml2FCqsBZZQWwfQwEoNFBezfjhVV7ZPRTjiBQ/lwN1jNoOn2qTsouiFDTLRCtVSolwxOVneaRFFAE4/VJspJquInTEXX5FVbh4MBRKWfSF4G+lFgU/32qUdfRSFgOkVvCUTbrCq/8Qx9AiysM0UE+KC1EVNlUEUbSQmqgnijglbbR3telCONu+SyeGQwezLNmHqh7pWq7osUoRqZQssMPBFhFE/QSF51q8ESBOpxQowmm5wgAvkdv1ShYdAwpZRDiKLa8yEAFLmyIijNb5RTcLYeMAPfxRSo+rri6T2KqkYTc/XJtX6NYZaf0pj5R4XkAgWIDMrpdllYK6TkAdZf3BXKhnyzAN6OHbXlLJIjKEB1AFEHWImsOMXitwRFBOPOiyHWMEm42iqSorRAS55nBSUOtVW+uYFcHtN1CCGVTbaMqBXQygDqwUDwbOt09wgyAfan03rTlsRfDgrP+kdjlrBVtBUZnxo8iW0lJFEGlBUWski07V8gcFXuWLp6Ub4fmDAjR3VSUm075cjcUPNjdw9ABFIc4yTEo0VqCIq2zf5uD6lx5satre5FETE5hJUQLH/P7NLAOG4PFjxx999JFPf/L555579rHTj+vGqmJQZKG6ctHWkPPoUXgjO2CIFsVvf/vbtvv8WMq6xXTbjocYZBnCRrktfBYhGBag0Ga04Ock/s74pHAqQgETnk3fVtS6SxVVUDdgpWx0AtpkIWWLz9vODCFVitI6g4gC6EGRBapQRDqObBLitYWO32AGyvgMIMa8Ino4UBmIomomq3T5gKE4BF0pNyD4STUgNVeFilpawWabogjFco8UvEUlwqtKWvpuHRePIj6Uh05Q5lKWbJVzEuwpWiLV2VI60THTBlEE4CgtklXHpb9GpSgiLKoYTKkdIzRgruAyt2RRllEacKIAUS1dShvxavDoRymKdNduN5PxaIh26ZZ69onZDQkipHh5Up0DeZBBipWqPfxgLpvgoDVqf8gh2G30dkspJFvxhWDQ38pf5s7eFLqdq2NlXmlr10BhkJWS5Z4w8qFEtcBTYLo8lBRBYZQUigG4iK2v7GbTphxYkVhqX4SD5RCeMZB+sAxgBjVT80QgGKpNluCCFrW0vYE4c1L8KJQpxUYEUkEMmlO2FKUgzg2QNYU3BDRoWvwNELaUjir6Zb2XDoKyNNBJVhwh9CkiojTGtgO+zI07BsytXRkqhQ5Zdy0VwUAbfzrJlnOlSl191PO6V28xEK9CKTaACNRLEYWLCKkzijArpVBqvKHASVXtMoTiKrvVjNIxJ8Xg4q8zNRNTEk/glaWwniAC5h48kmPfiDWBYuEQKEQiWql+6nBHjz3Imyee+Pgnnvv/t3VvTZNlR3nHu6ePM9MaSeiAToRxAEKyb4RNBB/HDtsBYUf4O/rCvuQGAkM4EMJIsmYkGGkOPX3u9i/XvyrnlSClWZ0r88knM9dee++qeuvwr777r//gS19+7+4b63zrga8pvH/Hq0Be1/WXrbtvLO+D2/ccqafPPn3243/80KemPJfyBgpriFNV21c1S0pRpJKqUFJGJbHTrQ/JCMM41Z9D/OR8N1rIYOyuWRJhyGLsLlV2R5/wElRidcdF6bgUxdsaHuDl2sHVgsiiVNPCTa0Y2RDMAgODudxUZL1z2TnEmrCY1lT4jWXfqhhJtVE2r5AYKJVX3pA6VRvLuiLEwF5seuTGFV6BCZ3kYllMJFyMJeIybXF+w84l+/IsW4QxhKGDEVvR2IGjhJFIO0VFMtDryV6DQmB42SHLG4YRQ4HGqLZ4lsBFWcCdsnzlq192lDtBsIlyxEnrPI9HzoscSMr78ME7h3CeKoHBizjj0Xwm+BhVi5wJj0AiKmGkNGanA+dlacrSIWZXsPVfGAxXjfhi7BIZ2XMZlS2k1VbSLheLKaG4IkF6NOAzYa6TFYBZ8dU/Bbyap4YVY6EoNjlyAPj5G4CysBO+hEXkBF9P9ejqxMhljKKuUGOg5xXIC2aVGeNZcgUVxZICoFskPeJkJJFsYSzIhbAnpoxT+luX1xjZAfBvbdWDPMWSAaiqwtiJKBxCCC+Z5s8DVSvFS+dSHt3yiU1nr6Sq5WVnacULxMZLKFyEPaFjELJRyihXSyRqkcAYTJdBSaZGIfBG00YYdlvkZJ5TYhuEYWwHyAsfM3LildRKtTtNkZjSu16zmMKT8s6foa5//MPJK5cUUivWsxdrZcV88cQffu8P3n30yFfNzgscb81WuT8XgTtWFhg/mN5cf73c99nzZz4a9U+/+uX/+h//0yErhYwOFn5/1vKioiyES1W6qB4LWnmMBL+pXJAUMMam4elTxLkIloUFpmUpymia0K1GDCwBTBUWFUs7Sjq0Zx2mEvakqJYXEqaMja0epJAaSS+Fmhk7mnicvMIhedkhEbIXW2D8jACVAYyHvi6EvJGzE9OkXCy8BGe5Wit2JIyN16DPNyoAPK/jywtpZCFNKdFWbQyqlddIAISzV4ORBMt407W0AUwJZjzBxJpapZ3GBsCeS6lN5c2bwlipjElZRJkumLHiGR1lu1oNERZlGtiU3gpnoTtflor3nbe9qdXyzu3quvGmBZiJPberaGNrAwDISBQDiZa9OstuhOf18LRAFt2xUIhzOQCjWMJFnIwIE7CMRmD8YLFlZ1G2Mg7BMAhhtHkY42ehEHaCCLlO6VxgZRmXhf3nBwAiI8SvEV1rKgEvCclSP+FjoFumYCyEpTFjAMWJvXFWX/ZNGOMJnQehm3drnrwmVvnNrIKpsZWCL5dpDNLR7Z5p+1y+WYQQFiRgKUN4valQ2NFWrWmViFJ2yCwwpC0Vs6lYx4YCE6eRABCKxuOpDEicLIUwgtEL2SlMcmhmnQmMkV1SSBlJTWGIxJilHRMYScwY/EHby994NKIMNyMPeObd6vPBRIui8in+ICfMdhTrVm5PeqCPUIpDOBCfrv93//4Hnhn4bK+HVF5686F47/ZylkA+eTnP//xkAHEH8vmq93/+wd/+8Ic/+clPPvzko7ld/fKjL737RRgAbFu5qcdoWrB6uSrJCAp5qfAoemmKR23Xc37WmT1vy2IkpeOlTw/niENaTLq7qdH0YC8A+vIUBUBCGlXO3iKzUyoDJzsvC31u2NcNH0xs4T3MCmYUok7Sfq42U+ECm5aRXiV4FFms7ioJhnAFC2lkDLnLxSKEKxFCgSEU4QBomwY2DRagjOlVWN7Tx+VaxgvGbl9VJEtrC4YcM+8Kb8JeauECm8risiuQBaweAwRmTOGqvA3PxU7oMuLprBGSkR2ekWKhYCpPwVXI2JpvL589no+7uoM8fHueLQlMfDkPO+bwlKQt4QKukWuKOTqiprLrM60t8tnz+C9XS5whu8BOxBFGLsVjMBLmUrPj7zazKxkPWIGmpGazhKQT9nXJaH3sXhb1myJxEskVg7yMXACzE55//hNrVcvuAQQd/q4XhcpUKZLFkrve6Hi45PAlNMsipLKEMPJqEgxhRbD4Y3h4dgLPi60sWfTDXjP+QBgtF0xVYbPc8bAIh98py6vbs1nhIwRoysBITBUjxcN7s3wAlgBJuQIvQwuCn5SLS2um4QE2EaVAWXjp9QgsI710QgBqyriCf9JcZe0Cw7NQCGYoKVh4BdKlYOSqJC5GGaskOz22eOjAjAKFx89IIWKNccJYdi7CIqqkVcJCWp8TOgOLhXJmsZP3vvjIb9QKt19njzq6D+7ee+jEm+esXhDw3ROffvyJpG5O82a/93/2k5/+VNL7784blrwJozq3QYrA6tfOKWGuJmBIiLXYNilcDro2hRCxdBa1BfN0PkIWJFHZG10muFiMgY2xKYO9MdoCkRTCVYh0LJjXXhQvF10IMbVuyK1SSJZSNHW1gmfRo8XkVaFHA3QkJHBRqKqK3RKVnc6uNZjw7HQAsRRTunR0QmGRlI4t/pBcSiXwvGUJFl7gORqXB9EA7Iz1QifwxMOURYKxyEUClHQr5C3jRP762cQuBINY+6deWEy5rDCqOHllNK0YIQSbkYXQjZtamwJNRcEgNJaLsRNESIXxRuXoqAEShmXZwJALR+twBDCGL294IcCE4spaGVxKO+O5AJ5vgMTWkyEZPbjkjV9g7Zi2T1gSdqWSOw8+f8YihUpw6FRtZ2POzhSuDC46pexiU1CR9Cn3AKaQs4Zd/z24xGatiCyqBYNBEhKYAtPWBTPFqdphvz7cv/2nf/Zf15qiLAJthMaIRYCiiScD7Keqyy4/bHP1BM5lrHqu5aHHsxguRsibYhnxWC9G6SraPVUDhUsNUCwqf/ZQYR/u4zqpp3gL5AhZDUZUwkVZKW+LrWAjS40YcYAxUlosDL2PYN7M9vq1S4N6XGHVhodFLgxCtphysaCqTcqUd76eztMLANcjU+QdGG91o2CGxGbUZtulKxSdVDwYUUMPN7CVXZQaYoCsMFPi3XRWj6XrlLOCrsFaAIAnwmWRGr8pDAClpVAJHnXK2EoKhD/HZS6dwgX6ZljPor797W96OuWvTU+ePnY30qBl9KvYlsqPFUrxjW9/6zu//U0pfvGz9/241Ac/e19HRBZvdfIhO+tz+/6U8ezli1vPL8erqiTCANDDLFUpYwJPPTi9UGsEYFetmi2RnVD9ijQlXESUTaAjnPhbBF4MOEW1ICxWrx4toFJZFIANpkQtAgwZ2usJDKOYjFKwmxLMYESuhIvYbPYGC1peZTDKbnRE1IxKOJep7vBA8nKJagojF288vHSW8tYpPKNYIbIYwVYwEFP1E+laDVFNUQGws9QXMIVlJQY1CKlIAMICAw/AHkNskHmlYyHwAPAVn5cRkj0XGDxX9W9IKcq15eEhNzEC41kXsAVBa5HdUB0Rl91KslbAvBh4o6IDt4bA56WCyyuKssPXl0TWHDljSTEQ3h55SMElRccLp1jk3aLowEIlck4ZX7+67FUhCMU+efqZWEiFFS4kV+cyL+GyqSy+b20XhYpOyStjYAoLBni1MXr9sS5kVLZeGMXCVKq8UfEyAmwlAKbCicugqYy8xtXv352PNslLgGWn9KK3qNkc5gIUNP2f/cEIxyjASNixALRwhbBTgCWTnkJKH8CIZ5kP0+WWsLBNAUxgkOiTgnabOdyzHWHoUWU03rm+3+/gx+tgHH16BugyjfDF67lMMJIUMNIhYYFRM68o/XKFdFQOcBaXnGI/L5ilYhwkUQk8IxBO2wuGBYCRokIWn6cBaHlLKjsMaREYCYwRjNB5cRLGUndFo5MFUDr2jELkFUKndHqvBVI6XsrkO5sMUiMyTqbrk3qVq1AgpNFp5jbsG5I8i5qX+17M2/OAfcWR89ZXKHVZl/3sr5d+Uf7Dj371f/7qrxX2/DP3pqdeDJQOlUA/g8okrwcV8vqE1tt3542zvDCtKnvlMXaWHvIpkoKWl/CainXgLE6rCrNeCiovCECyG+WFj0EuUwzTy/U5k2nbQKzKgbmQEGAjOzyhl3c3XunYReGRrihTOqEgsR+sMFebMAwdWzoeAgnGWCyLpIymJ//oBMaImWKMASAYi0AuLVczvY644gkDpma9EAoet3/gCI0Epijkm9qmIiwBwoARGHb7J3AnLAsBdmSBFw/TVFIKAEs8RiHuEGFysTS1RBsbXv0s7BrMFUO58iKJwUgAqsfibFSYpgAUscCWyM6HpC8n7/IIJELg2Smmrp/GpiGtmynjOd0uD3qAtctiHRQPcBZ4rpYW0HH0Ht2WKG/tKMZpqBgKO04jErHAkZSUDrbkGZtaAa15QImEMGIgmkVeF5gpSPDHQ8+CKqNYOrabISq5rPy5GuSSglQDC5mdh5FEZ0oxjdpIcklPHJTAgnNN8PWL2DNu0ezqsIhZ8ERlytgUpykqYBb3RCSMjZSp4HjpqmcnkI2uRcNzvj16kaiApRa95PDCfUQebGnpjE1DWjXC2CHBjS3YUjHymuIs6Y6M2EhgShhHuhMSG9F+h00ugKLQ7mGLvzKQxG+axJ9dFK/Tlaua2emMpaZTEoBi1SCQsMuecLnfcKlK3xgwWWEXNLfbk9oaeggyVy4W75v44z/+Y7qvgAH2gt7Pf/5zP6D+yacfeYKlF7eocwpfHsTo+slnz548f+amNBvg1Vxk7VMJ/eFqyp7tN78sfPfhfDvty9eP7/q72RHZFVkXDLz4Tel1xKsG9q7snUK5YjACEFFGhMA2d7S8LcWuodpI/CkyIneKCokZD1ecZ31mqa1eRhguDKb0SXeym2IgQlgIDBFr3TzBslWQCJGO0bmdN7CRi6QAICQsOM+xm4ytwyGeHglkI+8SHqbLhQyAHS0Fj34hd0lLh5alqGpg36hzBbs8o+V1KFmCnRLmGG0WOjajNY+TK7a6aFquGgncMYrnNxoBZlE2b4FCKoDCyEViY1cVOwu9EC6Wyl6Y7USXFzK8wHgEcim4aZd1j2YivOkS6FypePaWt6Re7lu2a6D19zBlPsvh3fWirBWXMwaSMofy1uQlmMmUdH0YhB9skXZatMGuKeZv/3RGZRCKKbD20bLQpaboa9bk9pw4JAYZd1e3MSALrGD7OXL1UIhAEmZaOGJKeF+c18OApcAASeeCGmU6PIfWPGVavD6XhM4e1wm+LM3JMgMG1Na0gkxvRm1jvAReTRQYSlM6YdxmKHVSeVxKopObmGBjuXW5OkOSqhIiyRaQ0Yu9cgkhIQ/scinZDSRKLkhRLdbyiLKOphVvTICFuNCYRm6cio+IiqFAmPCuTfhLUWDkxsLDC182CmS0SKofA3ucRlEFgrHDYKDbW2JNdQpWSGMhdmRltAuFBFZPVxYP/30buudSNqIPV3l0DPDzn7/vl+O911zs02fzm1Jun87YDz/8JwxSYzN+9MnHXh703Qn3fH7j/u03z+2cF96l4et2nj557nVdJ+b9h/MjCLe8I2NOxRfzZQanHSMGbSLcyulEAVogdPXLBaBgx6JYAK4WJJ4AOrI6am5xAuMxpVOMJED8XJaOLkuE2Xu0KKNEUleqG7nUdIJn6xSSpVgjKgDGXB0C0w6rqes+5ooxpcgiCt40yVuUXK0DBjqkcC4VCiFNBQIQBytFd5sITKxEmDGItWIUFvUUe8l99jzd+lsKDEVhQ0ufK91VwCqAgWK6NYPJyO7ZG6WllpEFLEsFGItlJ6aVXc0IGUU5BMGMpmDlypiFzmhUJ2kd5CoErSiFseOM2Si2KAp7lbMEMy0FngSGRUemOE0h6WVJN/5GSVITeLBS94RBuOP70tlzzmiBZKhuXW6i0VabLI4vC6Ul1SZa3qLwE95SGOsXICQLwFRyvhwZiWmbQSW8Dnq9GMWyCCyXvCvb9RamADqZk9EO95NK50ALQUIilJo+68XNVzWs0nCUKaIwjCfrXHZJiQGsF2lbXFo6BwOPUhQhkMCzYKAQFrRGIYd2NhNFXlH4Q8KYZglQSUYkMJ0b5xfkL5cAWXAaz6pdHjThIZPozuzs+FkqjFkLsgjpSFSh0SkaDAYti5EAV89wnqa2PNNoK5JObBdLFCG7RGglLZadgnYvBywZ2SHpopRNqZ70vCxeWwcgEcpIacqrKYHSsXDJ3iqZ8pLwp7PZtez4jSwCeRndmTzG/NrXvuZeRWrBjUoi32HhPgTsZvbOu/NJMs+0hHzwwfv1qC8H9vF8/eOLO7PG932f6uu3zvOb1z7X7dS9by+6XUnn1vX8jTcUPP/s6Wfv3n9YqezWXCWy4FeJ8tC2AtU/mPNEH5KAAYDRwRJTAtkUOa9ptEZlsyg7S6uBH1ubgQWeJSojpJGlzUMHxl+sFKa8pvHAUyLZSuJxHPM2IsRDj4oCTwKjZa+RHQNwAYOxZzGWlLElFQtToJZNiSkAKdxY8Sph5J2Dd3YvtghjOEETRejSZTfFgD/AGiksbntIKGAUBdCVx2sql7EFzMKLmZ2UYseFsYBJSuwTzCyYTe0HU1SmwSCbGol07MYknlqgJ2AwSX0BU4xcsnC5Lg3dSSRX6UzBeCmX+POPqShZCIDW4LFxGhk7ieiHZ7YijGl70BQDgbd5WxlGwmLqeNm6dOQYIDUS23mx6dI1LwErFkxqOmScs8JvZivCWMk4rTBRIfvKlHaihABfV27+hcHW+RWsRNO8nXO9azJCik2UwTJ7aEDnmG1wCFMILlCYjJ6WFnKhOfuABXWw2HgpalWW4kocW4FrYZQIA7vRVGDFhaEjWXKweALP75GOdKg6dXmmnvnnetpsuAMef16w2BpzAZNiHb+WRRfVmf0mfwAMxDaNGVXI2vdoMap6EaIResZCWNCGVwAMQq4UhIRlV4AOv7Bc6wUWa6xUhA4HDHyFpbCUPbxDHWHkbrEuKLajOv0hyjMqCipPCnWay3MpjXg6ZQd7Y8WHv/xHFq/FO0l0LSlmLmcKTZAvL2V0iLSn3fMWeSeSws4FxV8XPZN7M+vPWzEYKAl7R4eRkl4XRhmtgCKJKanZw3a5QjGGGapzM8OMh7315EVuLJaRAKxxkUplh3TqMm6FkEm0AB1WALqoFLQVUy4h1k3l2cuiBvaFURIhSCB5YQg7o5GuHvalzcWCXJQCyOLZHUqjagklFx5HFl7xqNiJjF3+2EkZ4Qk8WiGMYMYAki5s62Qh8MaMjQo4u0XQSAww+BtZVheiJCMkL1eyXdil6oHXAqFUXgxi4ektOAaKkgqno9VsYEh2elNJy2s6hV6fGFWkE4c3JB6x2cFMCVeVV3DhjFzqNPISPFXIFVK7KmSHeXD/84shcOHscjVVQ03hZNn62Uv06SePqy1yDMhtnm1NlFjLCMDlIJhmYUSIx9itC4ZeUnlJGBY8CUyApnRJjewWwRvZKcWyU4wk2rldnSI+X6bo7Bh1kADQhekaRYvLBWCjG3mFGEtmpAM42PCUXMatQLmmWRgJWD1IFw+L8pzDYIy8coU8odek529XwFwAZYdvarRHedl9xS1CwpiUlM4YP+bK4KpTVGgJOy8erqQy6EXBoAIzZmxktz6MYMt5EyAE7QqXEGBVyR7StKaAKaaETtxRYFZYipKR0DtpHQ60OAVSpBNiSoSYehuUHcYoRJ3uT97v50blK9J5HYhuRb5YFgPko0fv+HuVb5t1o/IyIIA7mXfyuOPU5knk78+zs1m0Mt9wez67fu/WPHJ0yCvGv68dYZ/Juv446lv3/W/2bksBVoMs3p6nHuFr5GI3sjPWjn4pescgO5cyiqI761zklgcALHzhCAm2wOXqTiCWSzpGgVIIKapcEpWrqZEAi8IGTxFITPGEzz7Xg/Nn8C4cuoAh+MtYm2KRdOwqsgrBGPOyEKlL4ZClw6RDAiw5ZUuCURXLlH59doKHPcvNFQAzVTA2CnKWmBEWhS0vgAYZ8bAgNIIROtfN+oHBgClxUopipBB45CFjrmwjDOFCTokBnkUuIwtXUzqF4MRDYmABlgIDV4DCxTpAVY4WXlS3fwBgFlIIRS+lm7KuTVku4KaQxzPr7DMhJ/Vl57OfM4ZnHnn41NTN3pGbct3MxdLKiKUbKyZw659dIONQn7KR3OThwuPvV9Uvux5d05yJHiN2/YlHimKNqEzJuljwNAJUBgsAMQW2jAUCSMQe4ZyfN32HeZYpt3UPmp1uOwrhhSEs7vNqVTFMxpaArggMxq7UEu9yYIizRBW9ZXEB2wRq2/K2JSHhR5mXf0aEAJySOKdnZ769gScvo2KePJ8/gdIZ8RurE5XW5FJDISxx4mEUyyKFWACWmmWkl5FFpzjjZ6+kaF3KAexLeSmMVQIcianwkkqRMYY4WSgLrgZ4VJ7cUAjjiumpx93aaeZYPHA3UqwN9sknj/36qPfc6uPE8c5J5+fk3Z/8UUqdWlYGxSF2B0JbMY61mxOvl/5+9KMf6suLgd5h4Q+F3hxYOxbpdK01D1nmEeK0f95362dGHAF2e9D3bvvaM58d9vOG5w+QY59Dej4+8ublq9tzdZ0rmtQIrdsuqXLVoHgARqNqH/7Wb7H4O9tn3tl8vZE4o5QKYOkwALREpl46iZBROPGyLZGr9tmJ+nfascNG8ooCEHW23GwPtXFhlqg6S81CEQhsFAUcDztwlwAW55Rjt2+ajwqmjJB04STXWmqQt4I1IkSWLRU5DIBcalOzqeObkcURR8tORMUsquxNldf0HOXLg11RvCxlN5a9aUhZwOqawsXCRapw6+TlQrg1AJc9RUkxMDp56UT9RAgMABcSS2RksSbGFTAuUdlNtVxtRjxcAFsbNiIccynQFo4hKq5Twrx5jwSDEVVs2auNN5cDvVWxZDR2O3PYJSIn/1DpF/ndO7OZ1cylYDW4BrMf2BwURmAWVKZGB5dyyIbNCcuLwQgZhoJWF5T2AG8hCKXuneXtExbrg2o5RREWJMCU2sloSqJVsIxgtjoLu18TxsNSRoH4Kx7y7rOX/oo4b6EHBQJ89vyZKwvQvZevHrye99e5kPi/fcvoO6PunvOaweuYz574mPaTqlEZBj3Mkvmm9PPnRx9pwXD3wRy8Ob3O3fHZ8/MxtHOJkVxq5aoSv5/NMxX7yluhP308Hd62a+djLp6LzrKO211oNuKodqGGz4nhYTsRojGn4l23MnL/3C9fzVr0oSuVKKS9WM0qxDzLcY4u7zCfEwmYkYWwGFtKuos43VpJmo4kBr10CHHS2aXTAjbMonjpom7NfcSyn4erpyneN+da7z1uYqtEkOOv75fzCx/n6uB79s6lRFk+fOstjwStXK3kKVuZ/mr9iYP+hS+8a2H85pMzSENf+tJ7T548/tnPfmrDfOc7njt951vf+s43vvF1P4fYLsSjQvEKQOib0L0HXQoWzeka4O/+7m+9w8KdctKM3P7lhx9ZIp2K+ujjXzqIPurrLsMv+/NnT70R2nMo++A8bLzzwptvXz13ynpmJY82/ZItHmeQ1i2NLx2UXSXGsrfCUlRhKwxg6mgqxts1gP1Z1p9t9eA7oL769a8reCiATl8DmO+buu2zJA4NTkvnFqJs9ds8CI0sOB2mjnuwjogoQgfAZgVUAmkvActiS3eUUbkYieWFZBToaSuAWCngxRqxUXh1KsRUCB0DOzyLo1NS9uqRkRczOwu77BT142xLKI8OQySNkA5s7OrT/pwFOi8AcsklvFjkGNTWVFIkhH2FKwALgOnkux6+ShLSsqiZFMuFnKvU8oo1NXLBdH0wFSuK0RiGhdDVzy6dJa0vDCy2BCNmbarKsiiyv/UKRG5qnRGCwXfg2JFgMDK2ekhg4E0BjK4vUtMBzbzWXWH+8kpBDCw8KmyC7BT/t8kZe+Dk0nLsc2lesE0htgWxOwCg5wHeWWSNKP75i3kHqR81kIslowoPcjYksKmRlMyYsKifPqfG+VqZNoD22VEJJPhNgfVI96F+Rsx+1uXVi5ce4LP7UQXFeBgOQLH+tjYGx9HHVKoNw7PX7jZ+h/vVfDHoG++4uuOhcl/B7uNWfphcrGMEKdZhwtzV1T3VQeSdo856Crt0JaU0lagZLgDTRmF00oE0AoA53rWEkLcmjbzSMBIMVh+4Q2hKTLnUF9XhngFbIy8dSUjjVgvAmwWmc9uUZK+keERJQW8KE48RLGQl3RxrBJuoqt2WKSzsJ+Fcx8Gsr+62Ki52GBZSakrZjfo0xlwlRhb9Rlhh4SvY2piuvdiubsVyAZw6b/kyiTL3N3w7UnV0eB/p/aM/+qNv+1jvN7/pbRQOusox2G0Y6teoI2/58xqgF/1sJkeQbk9LwSW7RBVTX6YYwHx22+q5tz57ermDOsqihNgUXgxzFtsoc+a6uj2c5/G8d97MVcMxP/XPycYu0EgAkvrdRQDmVcDjJ589OG+8EXLn4byZxdXZ20DaGzBKIvDOdWJBLKUsRDiAlR/HuWRDsuRN521lrABmI4BcKqkYXlOjKKW21Sm88Rudiupp9UrEC1MNYmG6ZOMnuXjlqtNKgjQ1sifsLDjlFciImR6/wqrflAsMc4RI6LymtUNhEWuEByBlic2oTqOkMIQXA4Ux2vAsrQkGIVay+i3g8jNyicoFX6wQbK0GQA2GiWphkOyaMtIREnowozKMREgKWhlrpK6FgDHi0UtUXIQdrVj6NpgOlrdq6Xhgolp+YB+OMq1mY3iW+LMwKq9YI1lkBVQVjMBp8txR6qhlr55aK52xWH05PbVW0vv3Li9CAvc4hks6W5SFyI4NOHG8AphWpKmdVv3h2YVIRKxwuiLZgwlhlwVAMQCmzkRJXWFMJcUpBUUgb2fB5XYlHqhlQhoXi/Ri8BYpmJIAI6JnrGLTafE8GKTHyZVSfcaKDqBilu2wNeXKm8ILZuRFRYTQiRq41A9pKVlkN2WkqE0U4TLlLVZ4CgD95oiHhQCwE5YUxsiLXSoAduMUdLZa6ZQhkEWRATAEMEYLTkFlnKzXZ3UUq1RHvMJJxUCurAUYDK1ARvo5pq++8+1vv//+/+vosPD6Q5T7k7f2fetb3/ITU3aJKCEwbmOY3Qmw2QPV73U/v9XL5XblZUAwI36rSt+MFFH4RXVM6djYCXBeu9O08nhlMVZYvRgJQLLHXRTC1qGRxcIaq5OXfl5UmCtCLVA8fHMe8qKFZiemfsj46fnQnq2PoWLaTooElhrhtimp+zSkg6Js5dUmNlP3HiFo8YiisNMpGIB5MQjnZRdrSgGrCwBeSxqSi104EV4snR1DuaI1hmEnRRl1zaVaunAYuaQwBaMwAiBUIQsA2vi35nYy+1CfyqvhZmCNFA6JH4kpEc4CIDWFMJaOkdAP8HI6o2U0shMZq5YFDG1KJMirdjk3hSjh7NY5PeYAejdt5QFMiVytT0WanhJm3zq+pthQGVmM6pTdzZfOK6oQUxImyxbD7lmcKHJQMzT18GXvFmXhqsjWwRQbYSQ1UqzRtHo0EsCUUaxKjKZiKYyoKFbP7eHpk3kRNYb2JIAC0rGZAkQFZino7KXjApaRi0VUGYOFaYUlhVSM2Cx04VmqAYknUk4BJ6BYVJbFHya8G9n1Zxggas9obgokOGO1sgSTicIYODwwRQUS89aJculSKjHFwaATsXVFz8VCl6VElS6LcC5T9pIaiSjG4TpR2CBzsQgxBaAozAhPAVCbMQsMewJsjWAISymaAhcCQ0k3lgi4mrHBw7j6c1FMtyp4/FG1SizxR0vPgpDeiBlVel6NUap/A5eHnbCTjJbHr2/YRZ489Rkp34zuRgXgjuW25LGMm1AP8xkVTHd/Uq2tbOvYLt6S7tmJH5pit9TqkWUL3kYYhScwPkQMjIeuZecGfhbbwN7QGp10xYkwTjqJh1K6FLnYHURThZlup/VrbMUAgpXdKJfRumAoKpdKrOm209GsKXdlYBUyqhmhc4bFSnb0q4ounN0Y86bupGXMQiHITa1M9SMnisGGgR2VEabUjASmqqQ7ETPFZj15xZoSLgCL08MCtLzs0/s5j3A6HIwsUkxBZ7cUyyU7BgKwUqeiCixkp9WPAUxg5z5ygHa7xWfHzFgiIcGiavUqvl6EcLEQPKLEVoaq6gJnSWEo7IU0siA5BDPUi0RIEgBSF5RqaIofPpjYlqVST9BEAZxVmms9yQLTVGxlwGAQpX4pzl+LJ0k8jUJkBEiKLdxpGDkjhVEIpZFCsFWqUQqWymCXGjPpRbbsSqJYf/Li+eWJjijhaAvpEVtJJ8dVcFLBlGq0MkiEtG8tODsL+1mcy5c5bWFiMUSiBlJSVDIiYamGsrgcsXv3lr7I5eIuJpaCQXNTiGqwUHiN6AidgNHFWhFV8irXVAjhAqAYlaIBV0CA0psS4KikwGBKMU6Ow1zGSPCY0sOYhjTS8SDnDcO4dl5LaaxIdmAwSuNAr8KSKKBEPBVjWs2UGKQrLjC7GioGYFuAyciCfF2MTSli6QB5WVAtkkIGf9YmpFEUcftxMXWF1aalNjKePeNsuf27v/u7f/Inf/L1r38d3g3VK7o//OEPPVeQSzvuW131KgYGj99CdH/iQtWC2ze6gBdFqRgV2po7Bc6ChGwLwEi4iIspEi48phG2jDFjI8JLESyXkR3YcaxNU0bNAqfQywuAB9jmpEPaWCrsmZC/XflpcbdVWxAewCkdDJWqBAqZqPMY0NTSQUpdCi5GSXFaBMadsmCQ14iN8JrqVwpTunDlCcfJUstSzBqdk6gawkjKCCxFWYQ0xUwpZNlwWmexHZGlkhderOxqRtXUBqAz4qkwIQDsACwZEYJxgW0BYLwsWoswTsWcuFklQl87JX4hOEn8RshtRwoAU0InNdJq65FwCUFI8JhS8KiHxCaQnVE4hUByGYGtbTqYRYuTyxSDqMXHZhpm62FfGELTAhkryUjnEkhPWMhUc/1aSOVVPAClKV0islH4ayRMJGFYCMCCI6lCmJNzkjqyBIxLCIvaeCNfhth26txhsf7VIwpDBRi5NA6zO8F+DszOCBDMA2gMLilqkBc/wdb+DGNKbGBIJBjmqkFuFl1LRoKiyK2p7YWFxchLIRYlXUH0DYRvvWThsjNMXQQVaorh5J9BSM3QEUbOmE5RD2P2oujYjAIBctEZu7ZuSUji52K8ScWFbQ8Ab2UUi5OEXxeAtRNIKIuxsl1WEAKTMBR2MKsR2JirQ4gfT3nZhafXpvDPy3aDP1cPDOxg+uKVWssWVkgpdPTo0bu3b7kyzgNe12Jv3vMkyTsmuMruJue5gli3OvumZw+K8U4/Dylcyt3YbD6JOu6ibi5FPAqIzagexbDjpMjLiAE5nddoR3awUEEywsBHsitTd4xroTMm2hSiKlNsO/qLGR0ncpjCWRTD6DRlL5HUvSnDtBoKgUSoTiJQihaW3Wo4/YAdLNl5SeW5zIniknS3BGbhptrnquuq6nRFwsuCBz9pTTqCQii8jMUC0/MWO12cNQTLYrUpSsUWA/ypdHZjPJFUUi2EEUIBM+aNYcNjAKNEQt8pcPglUQnLzSgugSyb9wTN4hTLS6wzpH4xQOpRFgqXUTg7MRXIZa+2AsabhALDCyHpothNKUbhpnSEAOWlEHZsAIz2A8VUiANKKCjPeNmZEQJb/w7cIK71K9U7clmQ0AkXMHHUGKWwkShiCa/9AwZgGpWpqljKmyJv+MoLmSs7i2mWxYxrfsV7ysBAAhg3Bdd6AdTjCZ86EQpnsQ6MLZ31wcbI5djh6dmhjHR2UQCmLjVoGVt8RroQeEqEYKYARlc2yuWh3ynpcpJjrB9GuhxKQUGyrA4WstKd1VwKbSqQ0BnB5KI4KhlN6zYGhZpaIykokJTWMc7JdB6fIoyTnQiEzMtOJ+xqFk5KByBjXvoJneOdxQhpDGykT5ojgY1m1QbgILnssrQ4JbLKkZvWLzwqeoCyGKM6o9veFOkQWypp4Y0HafFtiFkWZ6sC7AGHWtQpZurxlqL58MWdt378D//X2rJ0j7GfXBD9meqDD372/e//Ifa/+Zu/cSZ4dovZi3vdM9yTRHm5T0qlUvzMPACjhdWOwA4NQGWXvdXYVQUD0CZLGxcbpLpFRcIFozAunIxcLGDEeqYbka+YCiTBRJHIKXm54GGmgLvDGSCqvADwfJUE7J1I3jXqLm6P8tbdLPS5FlsfpeqFYOj8YcTJov5CKqxG8EMCtDHoLE4Ko5DKY5zU51q5Kyx8GSDTC8dmqiTpyKbgbRlRweDkNRJ4MMWHr19g05jDR4WnFPCQjBg6jlyEUSAFzEinGBdmKip+gfXOSNEgAa4GMBKD8HjsydjigYehdxTCGLEhUYADAS9dGfFEdbgv5Nok0RZFBxBO6CTmLI0sBEwNyGUkeLIzSmTaQnUBPTsBrV64ZvNvFivhoazr08nlrHVHnOdwvnVSOooGI5dL1PZ7MPNMokpwchFlQBoBGJ0vmFGBmVK4aoSFVHZRkGVsytsUrY8JazMedgpwACN9xZSUgjHFqCrVYsaZlIU9iSHANuL1G4eeyM5FgLE5vvRCWKywM4j4IIosl9MA7oRchrK2ZMUYUViXHhVWeqR0ihGegkq4aVmN9IwAXl8qylW1q6H9BNMuxE+XS8geAzqLWMjtlhEPMHvFS2rKCEzsp2UzBTMlGCqgcBaBGBLGqjUNZndGa7qYYrcYU7UJtLik1eOVlFG4wAqGPFV8fnHhZQFIqQujmouiVKHYaWQ+azBSneHF+ouUHWzqhqQYSKNL0O/93u+5ObkPeRZFfvSjH1lqbwi0AzxgEeITvl4VtBsQ+jkPZVcPKr3Q8UinGBZCQU5OFfNwgWJUQ166QDydUT0jCSMpRQEE3mY1RSgEYStgSugpqmrFWOiQaI0dXxnZK4Zi+vb5PLICYDCw+HiXMYy1C8YF4AvgTS9H+rikiF/9oiyRO7cVoLcaPh+t5pallaEjqdnsYlnwQ6bXgjMzxZpgBlAVZjzwHRH7rcZbECF7I4FsuSjwUQEzApcOVatKcSfm8jQFWBdaK5FpUSwAUVWGHlOQ51UAcnpRGeXSFzbC24rhFOW4VDkdW0YtmDrobQCcipSIV3lG5CqBN5oSBUtBKS/OhAWmevDkZbHJ6aSqKPAySkSPJwWYxMBCNxIVgtW+1AKVDVZ3WibIYYwAhNf9a4th4ZUXZwvOxTL5rueIRCxrBNA1ZtdDiaoZhp2wWx9tYlZY68lLJwBGLpUzSmGks9MpvEY6i4wVH+1NEpZgMOzVJpauHlMSc7SmKomzFLztuoMdsKqMpq3GgtnpxLVIPTC6xqYArioscFtAUo8wt//0v/13i4K0AFC6+HgZo4Ap3vJJBsOyRYva04xOWqNoC6w4UcLrKqUqhXDZXh0bgJAKIKb4eRHCE5wAMZcFP0IlWUHnDMWtkcX9VWxXh10OIdgguaS2ZDhLxEVkBGbEsBa1wVeha4FzjwhnAVYMJEVqgi07ALsUFGwELZfsQm69mk1Jyig8GDwjQBuCF6FAn1ZwJmhQRyzeM+ODvd/73vdsdxZPsXXt9kNXqh/3cLx81kqdGKRWCRLhfo8KxiJIJKqzC8bbBV2R2ftcrfucKB8SwlAlSlIhiVANmjXtulDZjK7ISNDqon7x8NZ4sSwUJcGo1khYtmUAIsqoZhiKdELoRonUs0dh1vOty12z1lgezkF7AKMMx1gUBjw++QE8JOO4PHplH9e5vqvkeC6bTS7FO+5GAoOngrmk4wrPOCnP2wWtrdWD1wXRmoUCE6IkIouDzl5hdBZe/CyouFA5WCxc0+MRU7AWP3xediG2gUNv0RgdBakp7Pjp9QipGHaVs5+S574S29ZjKgsSqWFEabYy6Gcx5u1/tSMFi5CKLykjJGGXGpt0PuHntWip27f4owW2PhqHN7ZoGmFvwSlViEoWY8UYyclzuaCDscAYcQoEJqpSrTpZCqnmdIBowUiEqJo2QlKMX/ziV7QA7wS04BTtdFCsVRmjNfJqPKqMdOm02YGIsxS1GRghmEVwWgHLaArQRlJhISkAphIByNhUMXRRxDoj5GX0go5KVE4gedlZhMOT2mcRRcpoWgrrFiEYY8JVbRVgChMbXUfs0WanlzQMTlnoLaMpsT3Ezt88xFwTXe7P0NEZgW4KPLAQLixG6Y3oRBHTAKKqTGIWI0uBtVEgvRAA65Xx5ihKuJWKB6YaYKTj7XhQlAFp2pmjPADG2qbY+rxT5fmzihGJkowCCUx6Ch3e2PoCYO68tXUiYayvCoBHSGenqBkVvW1KCWBsQd46P3ESiZGUWs3ql5eCp1JF+TZzJ4aPTDnbjX72cPbcebzjBkMXbk/b/aI+/fRjX2DBiNaNx6gMH6Ly6/KoLGmlqiR+nXqSTifA7F0mjG0X4aR0wpGYykhQCRdIB1B5goexKBYLCAAJE5IC49CYQnJRsGkcv17oFGOLicrKiMooli4w468+/kg4YTfKiJAudhiu+5lrsp7X1u1O3tiiwuZJqqXQNdoWQeXuARZ2i6/3mMNM4lM8ng4cexZRHVBTeU1l6dDUNQtR5FAcQWJK4DWSQmcnFKmJklxrkNMV7HLZ7Qe+vcpeCIsuBKocITs9UYmpBjOGVwXmUrObApfaqNqWdytUSR0BM4YRRQcu9nQ2p2SfCuji3tkNnxe+XAWyd3TijJxOIdG2huqptZLytisKLASeF54Am2ZvP9tv0uHhBaPzkoz1K0Q9p6R5ERuJo6zB7lsUiwYvNRJgUjvqEUUnEcLQ8a89FxKxRg93JGWk4wGLTThFoJFOYEgVmgaWMaPuYhDiGNne7FPPPPabNWRPYRfuzwQUIqqx5VrmMhpJNUjdosWGkFgZNSfsALJbMX3VYAyyEBZIa0IHRpjgUQPkHLBDO21P9Wc5ONhDsJumG4MJYURqpDPSUYsystiOjBQhLI3AUhTIuxJbmIwwSRkjxNNydCGrW3gYtKREwHXhsq5zZyx8hXVNVKopEcslEMOKvPVFiQqyvE2N1bbpTvILSfXAw9C5sBFgUqDyUgB8rQE9jIRuIozEi7oSeeOM0SH3pgcw15p/+2++74O93//+910aWHhdkIftlc/K33r09sO7X52He8Slx574+x//vbza1LsQZ6MFUYDtyKjrWjNq01QImBD7Wxn2FmEXKIpMrnP5YIRUEgUSf23mFX5qm0PfAsJkAaBYGZYNkSUjO6UoRrHwSgU2cpnyOrLBMMRfXtMSmdIhK5vCcjOjZ1e+NyNjRzBvJWnHInTm1EuHUtLK4MUpJH52a1h5ygbGwGKJFq/OagPDCSN7JCxc9GiNyuBFDmbswS+Fq0PGjrkKq6GxLiqGBSddVF5RWQo3tmKUjjhC4GrmEoVQJUSg0ZQAg4U8zsseFt6US166HUJQSaHNEzS7yw50r2Jv0SgEXpZSqEGIKVFGAMb4wzAiNKICo7Bb50JEVU/gAhutp/LgZa+wwne6zMWWpbEiD36cfvNGFrm8tmypLJZvnpniX06zYOFj9heuLSZvnNtXYGMw5752FAOAYQ9HZyV7yKKaIsxYX8KV56zXL0714PSaE93J/vrV/G2VOCLIBRZrxEbA8BC0GHYfCjElXI03O+VlN25fyqhThE4NU1GyhGQsHF5SdhhGeiQ9AphHEFiMGwZaZGHsLGs0TdaIlGxu9ixgKg7GKws7i5ELef3QWXhrwJLJHsPNEY8Q6w5JYAgjXSCXFCxC6MThZDFFSOgkvGlIBdAxdE0ZxutD4wJNhdArsuLxCGEkvEQ6AIqRy1Qgb8YDmSESdphgLPO7TjdWLK8UPfo4N6pHnjb5PK8vSvJOdF/oBwOw56dwun63a49bfdzKq/ksDr/7FgBF4HR4RAGVB4OBzchySrtcK02JJeICa7mMwEYWK58Lc/ssNl73y3rcTvE4JdQZGF4shjLqSBSXUVI8hPfmrq0YIxhBiA2GjspUm73ZLwxjUndgOKUzpWvYgVSnI8eiAGMivPIYIZMaUR7O5U8RBR+YRVXZ1bZ6/GDY4PVVrptsLCpkKWPT8IwYSC78LPgDux9YW8YWk52OCgOdsXpkdwbREyF5AawDZg+6PV+H8cySaBYJgUclF8VITOFbjepsLASDxzdOKxjMGIQo0jGKSiOQ9EM/dRKYONWpJGBTFXLRec8CXB4IcmUBYBeCkLRhhAMI7CqcXi46JQBaIVuSsnGybDqwLQyMC4CF/TTldj67MbzHx4pRiX1YIOQmFUhcPJbQFKfw1uH4P99aBdo/qopNa5UqC3BdYMNQ7OpZTK2MWN7GtZTUFElRWlB5SWWRd126hm/6G3lvLhSXQCRGOlp1Wn+6MuhqQMWiEa+Q4zSF32opNklTCrzeC8E2PHvaqAZjDdBLI7JgdkKvdJEw6Iz07PFmZ1Q0CVCUKSUAXVS0wMSU8DLyWghKYErL3VoE2yguy2GK3CjElG7TlFF4gRS05GY6XXe7OvkvdyZ6GCExV1JGI1lyGElNidT0nbJEG17qMC21QK8iA2uBHhjS1IWDeHHfR6b8tLw7liM3+HP4IetCrOuC88RFwb2N3c3Ju/u89Od25ZB/9mwedVoNgUIwtxHVWY+8lAiNSIAhp4fTVBktYEaBSEyJh2x0ItAIz1un7atS8HKhJRRGY1HA4RUs1rIojyKXvEiqGRhMlCOlYNK2RkU/HHN6EPimXETI/Xvzl0W3/7Hf3F2+A/P2XNbbhZAEgyh1WgfdycJohTGox46qPEaBYCh3QbgqOBcebFapMujAGbGxt2KoGIVMeeemG4a3QAAiV3iKHmuTHcbGIMpAUv28kQDg3HowWMAOKEALHqZ0G64Rvculd+EIaxZbjXi0y14Wsck0cJ4Kz+a4vpoNxmuMXEhg2VnQGtVgJLyQBE/dVZ4pF3FcjAACjXSKUT1ZkMTPSJRRikN/2SrYHFxeCqHwqofu0Bh3TbYkJGtnpJNJcFJYHyFi7YEYwkMqpkYC339w+duV4nlz0csexhQ5L+kihrNmWejAzhfMlEoVeCq6LFoMm9rR5K1IIWpzlSiFgk+eWXlKnB1uJMhZYCjB1JOCBEwUQCXRt2sYibhcms5euDyYqE4u5DEYTW1gYEabU65Sl7Q6Y748VKkCARC1HWgr4IIxvQkoX3h23hV4FZjqRFcVV0oWe67XRsuIeQGWRpSpESdZjEXvTKtaTcLghFFDukB4Ogns/GSxZMoQGFIIQD2yu9ZXW0Z2gLI4aU07HmXMXsFGU/wwSRZ6VMYVRl4iY/x0hxOASwEU7XvcQX7wgx94G4X3zzCCSVEW9VAkdX5a4fpyZ7Kersg+WeXvUrZy7Qh88uxJ67N1yo4TiTEBoxhFsXeGqKRjxygWSe3LCFwxXKJIvYNx0QMbuUon1lSU8BTkvEKQtCCtj5ERnmBThmk8FFEET7QAjjJv+LnHPJsXJ1kI5sk1tU9q4JgpvL4Al+C3O7OUGqZKLK/AeFhIJxUqMsHXo68kZRAZK8lYnV0p8JRiSjnFyEKaYqYXqxFKlhI1CnfoXWSFaBPe1JGiC4GxsKJ4qwrAFBU7S4RKat3ARJUdM6PHQ7lcyOD1gsG5w2jaGLlcFIQrmwsVWqNwpbIDVx6wYqoNAAMvsaRclDWaVoBqD2SOZniY9g97ZXAx7orVVGxchMW0FNnhK8aUl8SAhMIol8brkbHe2YmkhIKZS4QrrTf3CNSyY+1KoiOcwrnB5CKUyI0pSGAIV4RgFCPjeoFZjIxowwzFDQlgJJIa46TErIXWGYNVhWFBaPP3EyTIKhUMwHR6O6mrJCpjgWLpRFTVtmFYTKMyYnC8jOQmT8tYoNE0XbiVr2Y6pXYol/rhcPERuhGCZDSWNZhpmcSrW4nGAjXZQYIpvKXhBY6Zi5gmXJFTEjD8wRZJQYhN1BU4x4+YxrBTGHp7GtsmWqWlgRHIWF/OfPxLwkgKSZGoxinBOjy8psqrsIrHbBr54hktEbFiZM86uiPRMyR3KbcoT6f8dcq9p3NGIlkEYlDnFx49MpURhR5csNyofEXFX/zFX1C8gKOeysBsv/ghMLlYZmuei5rRNQ5VFaKt/vqVrvprwbJQWFJ4nZPwGJQkF5dEm5SXBdWuSStwkzMLgCjIEimPBZWpXJ02SkUIpgX2BBWkRSAsdFW1jFyMRrLksw7ntrS1ySWv7xLkkgsLACpG4QLpwHqEpHTlpbduvGHgSY1gaJ0xoG3BIU0JJRhdCCQGIYdghqYCHfSKOXETmGLUpilmi2NsEVRFF1h3ptPRdVvCxFAidkh6R1MZLMK7Rjig1SkKgLDonZEuChKe0IvNrvgSFW4aRiPspkSIafbG7EIIHRKGa6eymxalL14w9bC3PSBLYbQ4lYfHFFIs0RoXS+FCghlZ6rHwMNandcDAUl9gBF54dRZrpT2hOj+WMLQOASmFM1peYDVwoWIX5QRioRMK2vSlbQpJAFqESLjAVGi0FQECGwODtQlZNqPsplFJlz3awn+Dp2nr03GR0VRIgoGdhagESTCBhJ6AmTpeYO1YU+BGGGyOjoKTeBhbCiEY4sfgegUw35YvUjMrpdw6au9SwjmEU+b1IQYw3Sq4bmKgy+2YCZdPNfTsRvnYJcYJSW/pkbQQeNjBgLcrGJymjAiNimFkoRhvdiW8aXgABQRjwZywyC4dQnaLwL6dXlHzr20HHKeQYKai5DLWBYwpMaWDBSgWXgulo1grsAAK8Irf7//+7//O7/yOXAIthbFOwwDronAu7/f74INfeLXPVd1i+gaKP//zPxfbmgDUCLwl8wsrFIVZN6m5CKWWGeFbb7rTh9d7himacyoaXe0dUt9D4umWjDoy4vR5CR/R97e0DrfnPI6Ga7tqpbNNtdBlpTVRHvKeTSqg4xIAvl2hX+0z4pdIkf3lg06qFk9U6oZnN4UkEol96We1XvlZmbce3BseMHmNkurfhQ+PjIxT+TlYWRjBkHAJpIhA3uaUyzQ2+izWOfkZpdaXFjwhtSOf+LudS+G5wagHLSo8ShVIl5eRnqDiwmzEBkAh7IwEmK5yS8prbWVUpMJMeUkrxlIUZgq7cLoRkmAwrbvatHOIF5A9X+eVtxpa1XmR7c286R8biQobXT26o8MDl7HDqnetql4nkKWj1FRVTYfXrlWiNgwswXCKKh1LfcFASio1L72DTtnLt0qUJIVewLgIGOYq3LVihzz+UW7WyeMscRz4z4+AaMgL1F65V4KHj97e5lI7Xy80m/rVnPteusdfdhVWqinX9HndZhQZj2GiJCUVZiQqqZjs1pNRIu1ouXC0nUeoJGI8NJddLbupBeSlyCLc5sSAGSG7laGP0a97HwYj4SIUXoFS06WTQggGOoHhUg+YqVySVhsXpXDZ/SmUJYyxXgR6bO3RuSjISuWC77BS1Cw1KnYH1wNxyNv/4T/+Fw6V8ZFiOODEUEpmNCV+Om/Gs6xbWVGqIagUISs2U58r4jUVhZMxAMVZp1DTNh8YsWIlNXIRSLsNZ3VKaooZIa9CjJgZhdSkQ2L3ICdcssPwvpq23njQKJarYoSY0jEb1VC1dBina1cHDFtneBYiKqoK9m6zeoeRK0KADmeVKHUubT4X9aX3vvzeF3ztrBf9IOHZ1eNN6mgdJBgFCNcRRW0/+ek//PVf/9Vf/uVfffjhPzq4aCUiFoR+6818Kougskstj18+VBiAUqvHxYfgVAxhtBJG+lvz47q3X71xwffbWm8ePnjn3v35+Y9PH3/84rlz18/B+UvQHQx+jMxWUJ5A5KJY/MSiX5iiy2XUggWQnV5qP36jKVtKyDX13ADCWwGVi+Iysjs93K7EYhBCYbQ+Og2sC7FR5aU7rlJMzLk0+BfGwhrFEhbLxeJA+GsDey1QiBTIuaw5RS5tsleG06Z0ALwTcLrTlwPk93s++uRjl7VCZh9+8b1PHj/Wl6SYjZpyrRFualSJkmRRjxfiAERldBdhtxRgMNIRUa2ekWBDLlZ5p5ZplkBaHCOMWCMetO+88ygXvILVKda1o8sfQPw1O1U9f+Kjfg/vzTaTyCMtbE4usJVeVkUram5UDpOXWs9NzoMGxYjVFDwXGNFmC/vsxXwUiQUzgJbh9QtAMXKxYzDSz4Vk9KiMMLxFaTkXu8MkBRq63o1cBN7IZcyYxUjYGU+Uy5cVm5uTCySy68tJY/FIzEp2+e4gqiGxpDaDj4sYFYyNBcY6w5sGk71NXl6LoHijRtp47AB5hcCTS45z+vCaSgFPgZTCD4JoHAnwaWQ6Io6d7BUADxn53XmUORWWVAHsKncgHBFReLBt3t70CCMLMK89QwBkYQypmFo2hYE3FkLnah3AKjIlHno8SoJUsxubv3cMQyc8UmGEiWxKOmPe4zm/oHe2yIIXoyuZkrx0+ygAC2EBU41VHu8RdoUW6JkuL8yBj117AfCkU7ZCr0GagpUFrVjLLQTGCqLNJdZlmKVpK5JudGyEVJ5AFlQJZJIRDwDk1lblvLPh7szLfRZ6jWLhvbjnJFcbr9GDDgfbR4TdqBD6Hj/h7Mqwk7yvzztNTcWKUphwrl999KFHwR9//CsXTZzKdAqdFZ3LsUPrdoVNj8RzI5vB5d4NhiCpHbciOouMdtH1mNMdhtceErvnKHKW4q252WN+8tm88fSt2+eR+xtbZX4hEkAuNcJ4gqH3S9S9uXbIhR9GlUaYkTtvPPNQG1dLROcFFstPJ9PYdYtTEl7pIButzBBeqxeV7sTzp0jVUHyVL6MVsFydWsVWEsvkOrfGKt+aY9MFpEQKsLZV6OThxVNVs4w9eHz2/J8ezwsMs/JvzzuY5nfn7sxncYAdxB5tOP953JYEYlYDEdKK0TMK2dRxSiojnVJselMMAol+q5NenQGMBIOj6UqkHs06ZIqhAOMUW1/p8J5jv3ZZe/Pag4J3337oF6ZViEFTtdMiDPIcMgye0Rt99vTO2SHs8xVhRwTyODdVK5HsxEVfwQRkrqbngm61J+60IESFxrq24cWaEkqpuWAU00pSEBphHEMK2Im4DCxSb6xwIiOL40Nx17DM9mBb0uOcwz8lWYHAp/x5QHCyXMLrC49FxiMRACRFa1seGHsFdNBtjBahqFyieI2mstR4oxrYgcNv/W4np9TPr5CmkSsDOCp4CnE4nB1z6biuni2hEgcrC51XUvUD+0OdkhSDNmRlAHSOmCK3UDDSdSglYocRxW6aHRUBkwWbCrt0V2pIdveqmpo9Db3VmxaPWoJSAlBYiH9hIqLAEAqpRAoLgFFgH4M9gWxTaHgWTiGlDs+rUJbNGDiLkKZDe9bdFH54z24G0BUS20KTMABc7ITi6yh52xYwOhUOA+kEphOw1pQRhi67Ok0FkmoQ2/qWzlSKcb+aPeHv+6Ighbv4G1i+8U3Pmn67N0Q8ffbEb4D+1he/ZiexSOoRhNG7JExtII/OXBEUgNYtqgcXPXdx3NlPI7NFnJsS3b83z9PVT1eMi7XzzU7Df+v2vO7fCmjErQZMjZVnJACyWz4/6wlqGkBUgSwtAhg8L4u+nL8BGLV8Ag1zHI3nhP+1hzsKbq0o8KfOOSLhUU01Z2dP4vOqhZEFG2Woj0itEuqBX3Ygy/R7vgMFuRQEs8ol4iVyFTVHhpzU7KoSQjFWYczCcTrWAnn9cZG9SiCtg43FwsVYSAx+gNuDgXJNovOAlIvQFVYuAMzHP2cEr+wUXnZFAphizrjTMgKo0MiLhJHeNKSx8mLIZUEgswPI2FR2UyMYC7Fuz10Cz93lzTz4vByySsJA/MKtkQved14Z4cd+Kq8AIzuj1ETNdCR5KWqgs4OZKgAsLx1hGbOIBaNLevjm6Tg9Cy+phfAwLEIIpDEwzuSmlysjGLvCXKkrzHRdjg4pL6NEMU+O6+VIXi6xONvq3QYAAhtLHW3j1hankXCtnS5qY1OM6rl753J7AyZgjC0mnYJKdjoRQhibGum2OkXlwTbvqeL2k6dzfhUIUMFcrkim5eKlM8ajDDqjaQAjYzDL29FhEeU2z0volp2rkurFdPYEH66tzBTISCqIgh2LVwboQghXSg1koYutuPUKYTcS3lPPFKQIAgZvpOMMadxphDCU6gHOqC5Ii0UAegS3ebnACIt18VIW3e5pujCLUMFllMI0y4m+XBnpauaFrwwjEmM1C5nb71l3Fol02qMSMI+pIX3t7He/+11Utq975MPz7PDHP/4xi0C/PuVCFqH7FobuZ3g06KnPrOAciunaUHaBXprjQMLOSHcd9qQ/KuFC6Cr35EoKGOBDs1eN27PLzsGtwUMyh6lLiXBSCgo7XYUUSYUY6d218ZteLZf7nyg/woGNHb4a6HiG+iy7KTudMn2djGgZV6aL8zIdOwAY1yzKkaZc2lQewCzdWRNGtEYCa1QlhRGM1B2wo0YneVl48dhglKYxoDO9e+/yPNjxEjWvrb16/elnjzGbthMqQ7gKTwlToUNj2poY8wYodmurZlNKB3Tqv97t6NiyIJHRKhGAQihopRJrK6rKRJtCXKGMouAZIU2JqYP84Dw9MvV7lqJ04YEU5UBmPefrrCI03QYAABN5SURBVM5Rm1xzbZxNyGscmL/tHN20eqSzjBJZpSfP5gAhtJiMVs8ieIhNEVXNMool2IApXBReYwVrBAM5wMsjJ7CQYGTKO7Fg7PSMBzVTigop8fAqhkjBztiyU1CxK4wOVkjlsSvG66um+jIVJRxY44wF4pROOBHuaoAHpgKi4nIc5SLwACs4K/gQfH5+IQcORpGCDkkgRZkGMDJaZ8wKtvjK2/YFbrVqK7y9bZsRUQ6fBiFNyxVzKYSYVjZlBZ7oVEbdiYVkaVnEClGnFYDBz8vVKl0uGXwC4JDS6yoEe3Sty527c8dmzIudEoaeCwMjqrGcMimkKC5V0hP+JWTpMcjWs1FxQhIhkNXpZ8RZrLLiGWOTwpSYghEYPdsyjsGGM26nvT7jPOlZvKX04hsvEiPC08GlQfVHi4oAEBYwa9chhxerHe+k8H4E3y0ryqsoXuVzx4IBFtvxkMvNySuEstcLpBocS8x4wIBfvHzmNLCoLjqM6pelLl6+mEuVBWDntU8cGV6pX7+Z5y58HCwv51o3N7+p9tWsGCOxriySNteOKEiBV4BUHc9fO2ST9givf1+eB+PtY0eJhR3DKezO7fvzDkCiAHb8klI6XcFMTzETYimsQHh1xs/OEhtL9VNOO/NWAieVKLHV0BoWC3wTP6nPAvLCR6JrIovKjcTOIQpGbv/M+p6LgjLUb5NN7Ovr8p6OigV7/OSzOhIlNbsoIzbKFHCECwAPI51NXhjIABTCmFIgnVcUuyixBI9Ax5G9OoEJABhiurKn8iPsxz+nDMDBXC4fj977gj+RMnMN7K05F2S0Po6X7AKnhvOHQoEw1w0ynZoS96vkcF/4S22i4JoNHIZXC+rfFmAcR4sPENJIZ5dIVfD0dVHOYljMaap0jNXMYpOYKoyeFOuAnOM/N3tRXDAs+ANgwMzOQmJgVFuNcEGWXWH1Xp2MpsAAyBlZclWDsVKjAoufvbzhC2chdEfZziTq9OvAlc1VOrHqdIVh15Rp4Uby6eN5tYMLnuI1HjsHT5UrIxiqzsQvPHpvjso5WYwCucRaz4o5zs+fAnW9RYJcGYWgTcQCVKdwbC6DKmRRgEqqttNZyKX6ggXcFEZpjBnp6MSbWkRTSl1ht1iMwEbTJeznYESJZeSKrZroLKiMBODRoy/EYMpbiOWrT1HbsNQCLQEkXUutchUK5O1I8C7AEywMm0IsvED3CTp7265FVDOwWHYwoiRgU8z4WYxysR//Wx+8/74HHZ42Iaw7eMfS36hcPbsoe61P2Q6Ge8///vgj71zH4P4EzyU1Kodtu9MaACmLg+7KGXmrBCnLKeDXDqgjps5nzy9PZPVSkQ6W8Dh3RMLtnS5cGmQ/yzaLXGDrQ89lyqUjiiPgv6JaGSRJRjrwvE/xvDUU2NqytJiqihYzo1EUAaPHQ0nPyAtJeNkp7NisHgs2K4w2fpZaDgx/6GfQI1ibk50Cw4itqlCx4HG8HCMAssbhP9vMK76OpqRErOPoIu4HSqyPeMzYyi68AsBYUFkxkpGF1B1kDbIoyQhDGLGlQILpDg9MW6UKSxSbkQgxKgle5XThtUkZ6gMwMoLZw/J8Om+wmL/sesMFgWxxWJLqETLKXKku54UCGG0oeAzsYgkS1TpSHiN6vKBsOi+8x4uyW2oLwoJ/2VBxmTaWDiBOC75dU8Ak1aNzDjKwwAoW0oLHn5GXOBoCydHnckwBUNKmjp9R5bzx64heXuDreTFXecbsQoCNXazgSeEs2NKVxy7ElF267Ac+A5f6wSjYVrjSubSA0LSVVA8eEsbKxIO5nQDvwkXEOhy8NkmxatjTisJOROF3RmAA3nqsA8tmVyEJoxjkhCJESSlGIgSnCuGxUSDxG7lYhMyF2+7hYyKmqPmUsmkYQU2VYvugNjWypGQRTllwXjws6Y2FDNUNsKRgBAlAU4BKoqiQiCJ42I1grbUs1lQ4i3D6FhYVQFT3vBvBiXHNTvd/Wd1g3FSwzds8r1cxb9Sdw2BZVHXKRTt753Si+Qm/PiBSHvO3v/UNVJ5C9TjFn52kVp03CbtI+vu7g+QrAI1Pnvhav196X4J0zlLfnOS3Ezsr9IhWwSd29hxmTc3hHM/ldmtqcu/uvBdIW2CAKkxUDaCpZ8/n7XYysiNxlBxxzGJZTtSJ9Ett58/douC5dOr0otSp7akMazLFeDvs+euCyzQv5Ofrc/7C0eXJZkEV2yvxzpazWVl0Jx3BrxingaowJwqqfQoA2K5JFkXuTsgyq3yeiunUIwM8FrP1ZAEuKfAKZjzOPfyMRrq8oii8Z9PNqSEWUgpFpuMXAmNkYQ9mjF+Hb17ccseCKYRdGZiBXV4Zgds2keBHG2cWeomAWZqyKJWwYDCNB9Vs1+vj92XYqEM1z8YUIFxHuaKKbXjPQak81T59MWcWsL9Dm94smJ14FLI8vrgyQSKR0bMrFoV1CGoHybgU73OqezKe5wEIrX9Z6K2JKRJlo1QnaRGMMHiMMVMIchiKqJS86ewaGdwRGN6ELlYiombGvNqXhb6r136rESGQ1SyFfVi4Rkyd3XYgvHCXBSMjNqNAhMTDU5ZEUbkoLBXQeKnyHHFeRmPHEVLePrOcHQl8ucqiQnjnGmTeBw/nqwZ4LYhi3B5cMUzrCw9YiWrQ72OBAW8KXuKM0yCldHQiBNIyClEeS8tIjwRAJewtHb01LxeqWlMPXcZ5scUECEU+OuEWc7PcGnDZZuQiLHSJSUgkBbKEUSEjr07CFx4gvdWBh+yyFTKq8gq/mUWp9XnOgllcJBjsEkgusRSwoqoWzAaydTTIy8jLSPF9EEKEe3zBqAzFWEdTJKZ0IbK0EbvtryVOPOcB6YS7XgsURbTJAuO7A+xX5XknhUo85Xry6WMf8rVZI1QYkg6KqO2dwqWwuzPMrX2XGh6S96Sa7cIyoHNQqoEerauk98o4f2IWCE/OagxGd4fn8mCFRSJedko8jNeQ+Urc2T/3Zz+REzsnoRCjtP6TxZQAWPpOXe1z2eXseudC205gZxRFsYz0qu0Q17h+iRAZY8YArzUtI+ww4a/+UpQFWzKF3brVajhApuwwCJEYK8lIpMNfLzCKIUN4HktJBODAWRAYU3+7+uTxp589fWLHQIote/ySUuqFIsRIlC0XhSsvl9gKo/PSq6QoSVVbbK7CeUmxRiLWVZexXWFaYdawpItngbH43oDtlO+i40MpjEN6xPpUoe0mIxuSSxvm18ucx3rYMDjujIfz8i4b2Z8+n1OM4ioJpn46ng4ufkZRLFbmMMxuJCw1xSvEeipgluxcbbgY27SQSWVXZyQxB4aX/Ve/+jhCRkgwI11qO0T7MGoDZjddWHl55cKg7PYVmMevKmljBAOgRE4BxkOqkzeAEQk7WjzHfLl5KMa0VeLKi9DyGblYDuVwSmGTaEEZuRijbZVclKRwaLylSzG+mxTelM5OgJFIuscFPzZ2SMcOIQWmZeQVRZeOAmmteNUmxGaofjBRXCyL5wJDaBQiVuWmwLf/03/+s/nnCGVFGrxF1qTSp7e351EJFhEAxBSYiF0jnYXrxdN5aUuThMKoODKR5/CwiMIsZPAvJgvXEjJKLZyFPSQLEYKDhYs4tHYGxbXJW8NNz/6el5WBxSJ5dm6flHiMerGTOmcQqk3Gsx6z7UQVS4FkKRanY2xleClGudyEvvnN35bXbQkMj2PgW5HwN8WA3xHqIE1rz+aJIK+MvAjpYXgrQGoCRnqfsM8/8XbFtySKqWD7QXjX6OPVgmc1c+ttN+DxEyTbu7UHK9xNB+b5q7nwEblEyQKsNWL7ygKMRJ0Cuc5TsreePZ9viGAXqEU18Ip18gpESwfglUN5pniseZ8y9ljBgkB2clYScps4HnmRHPLLjpdCebMgZ1sjx6kAJOXKywIpVjpgDPAsyIVM49fHAVwCjXuAwLLgxCOc4vWrCMtinHuRHTjPwOfgjqUT7M0tX8o9P6l1FlMNSGAk1azWKoax1EgwK0/LlLyS2sxiWdDyztY5ry+ZskuXQKIiFAKDIUx4/Ad5uSVzWe1qzkUXglMW+rDMtW4+tRDMW3Hy4knhJv2BGpz+4HywAc+KFwO55NJ4pVKE6wjtvQdzOQYuI6Ni8HjPkdUGYycsdDzWmJ4R2DLitGI1rilU+kVCzuG4XIhM1QDguLu8trUA2D3E3G3j2QULr3R4ALic4JRZj3M1cCLwolJGqcsutWLAyCzFEUb9SgqDmZ2FjoEOSdGIksTmMmWMs43NIhcjgEec/aFBLMJqiESRX/3KfHMbNi9N82rB/qFgUCq7srWzi+Ydy5plxCacXV5gOhHOy1V271t+5+35/sxKLTuM7ABICKV+KylLqySvlrsLWhutoULCS+cSyy4dUQbRLxd7azXd8pk0P8gLevWbivhqZVyd5TfCN0TRuYAXzyuvaSTKooBV1uIzmiaFy8VOYu7ZG+M2go1uFeCtuFgW6yKEhWIFLQq7hWbhZcFAwlAqiRcPYyQyQroj2vcutbLgh3EYHAN7iN0frSLhLR0jNhsFxvF2LLEpg5FSajqwvAiNnc94krLTYbyMPMg7ozuQAi2wXAjBkpCHUHfTPotGUipMFopdISOlAoSUsdF0RS4hrTkjPZf9JvbV61klXnbZFMOL1rnJS3ir0JLR1Wm0/naqEC5lsNDzAsRg+i9K9XBVkhRCSsS460CvTrB0LjphN3rAfOq8/E3bKtkb6unUciAUZrqx7RxTuTq1LveHc0xDttQ+LKxIL3Z1u6o8GcNUQHXWtXrQCiHIM2YxisqiWkiBKXgiqaRWAMC+MqqQJTyGEzKrxKJIIgotxeaMkKvslPM+fPPL35zcrso1rl+XLMbnr+bhF1gjxYGhW0mHOzt+GS8n3ZVHbA02wiTZywurSIG6tkpNubBVQLBr6PybhZc4lEZRwvFQeCOJjddPH9iTXRmcuW0DJS0+BmNGqcuCh5FOIW4SyNdFgXQINnVItEuepZGxRE0RhlSnI2XqamOsux2B9ah+49K2Q4DDG2+6fPAGJwbpdC0LPABhx2NaSCNySCKk3o3EQWEpJELNdojZqwEnErHVKd3JM2tLAaMEKBfkTitjHnDBlZ6y7l0mlpsCD1wUe7GF34StrpOFiUrCl4JOIVGhhC9Fgcb1slsLU0YNnB4uZw5X54M1AvCgoLWjq8FoLYgPRLmhv31/HoM/e/lEM7wPzt9XfWvDrICv4XBazpk6pbx6/kI17m/2B/FswDv9PHnyKNu3H3368tU7777z5S9+ycv6X3rvi1//ylc/+pTnl7/4xfwZTArXC3+UUpunFN5sOC+tvJrHFDrwf2spuzoBWhl9TWHnC+tqX0mKJ63PqyfnEM4FZ3ZMLutwLgsD8R8ZnvPNFF7hmD9AXR/ZyeWlB7GWQhk2kjrp8IlG5jJ1Pj0zuqV4PT957HUg49wcvW9Z6mMfV89Em7o5qc4X9vgjx737r95SwbnxO8QO4rlSf3J90Vw6x8tZrQyrpOWqqgwtmKr2UtY/+4fLQtW+8eyEy/0SVjjj1Hmee7EAG9ei5RYcjB0bKWmYfYbKXiA7cbz2rO66aSln4/l+9/NqEpJ4pCD3vJP2etYJr9R4jJgjF5JipNdONTeViwuhUSI87ABWjyVhSWLmBUsAcvVveDAChvbqvTyqBeDq2ZXbVVOHW2EIhaihENPeyH7I3ng1hf1m0nJZRlkGfIQSlS9UoSMsxRyG6wOjSNBuFAwSgTaMNbdnmrLUAiRMgYxoEVsiJPAEzEqy0BnVEqezoFhTAGFdl5EQXlJftWmEia2O1t4U3hIRtCXaqkJml06/LJIGEG6KvCiA6eGMKqkGSMwAkRjZYYgTip1osEqMrVL8MKaEgtDtCr8aWFpPzFyWCJ6SIAGedubhxyWXLGKJWA/fkejaKAQ4UQaqqg3Ma8peYIkq9RQ+A1cWY0aWCSx3vq0s5V8cl5GX3pjyL+IVvfnKHWyrVId1aY0o1g2McckZK1JgRVMyIvf8xGIBswSgs9vQFC4h252lJIuk8wKzWGijkJWqjafNBOOhjVf2hMjVEyb3MJhIPC7/2ttfc0+yaRx7h8ToWTmwQLmk0DhmqRmJDwawl9S0Tk2Pc4aKF5iLwtgo0LrZRYdhIz5X4im2mpHYXfBJnDezWK3NXlWNv7GSYYRrhG4keOj45eqAXho7FC2REDMjEWJ9lNdmEK6qqjXKSCif93ND48KTARUFUgoMdBbT9FPS5Y8upQagyO6AenN6JAjhHTJGXrcrukNMOmTxONYCuTTILlagEN/7LilMbKVQz2Agrg8bpSBgygtsrP4C2YUQikS8kdiBSMSyE9kP0+dntXA8CR1YLAy9cYqYMi6nCST+BEYWI8GMv5C9XZnCexQCxlvNYznidlUuo7Vjj8o4dv/duOXkHftlHYaQ0TSLUZ2VwZ4sIRe89QFzFIymqqIs8hR1GeDZTYyn/Vkli+lutPYySgHmwaWXT/B3CjMCS1QLkCwCjbUghL7CRcq9RkhS5VxITNVMshuBBbJQIMPQHSBeLrpYAM/5bE7rw85SrJEI9BORYsEEmoKJpeuowuLnMuV9+WrWnzEGI53A0/Hwlp3FUvgYMqlCMLmqcCuhkPKCRRgDvb5MMRtJ+JNzBpYwRi7TjLJQ/j9nGvDexVYwcAAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Lets create a prompt.\n",
"\n",
@@ -69,7 +44,7 @@
"import requests\n",
"from PIL import Image\n",
"\n",
- "from sglang.srt.conversation import chat_templates\n",
+ "from sglang.srt.parser.conversation import chat_templates\n",
"\n",
"image = Image.open(\n",
" BytesIO(\n",
@@ -101,22 +76,7 @@
"execution_count": null,
"id": "5",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.\n",
- "You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.\n",
- "Loading safetensors checkpoint shards: 0% Completed | 0/2 [00:00, ?it/s]\n",
- "Loading safetensors checkpoint shards: 50% Completed | 1/2 [00:03<00:03, 3.13s/it]\n",
- "Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:06<00:00, 3.27s/it]\n",
- "Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:06<00:00, 3.25s/it]\n",
- "\n",
- "Capturing batches (bs=1 avail_mem=21.63 GB): 100%|██████████| 35/35 [00:10<00:00, 3.19it/s] \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from sglang import Engine\n",
"\n",
@@ -130,15 +90,7 @@
"execution_count": null,
"id": "6",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "In the picture, a person in a yellow shirt is hanging laundry on a clothesline attached to the back of a yellow taxi in an urban setting. There are city streets, buildings, and traffic lights visible in the background. The scene appears to be incongruous and amusing, as it shows an unusual and somewhat chaotic activity happening in a busy city environment.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"out = llm.generate(prompt=conv.get_prompt(), image_data=[image])\n",
"print(out[\"text\"])"
@@ -157,22 +109,7 @@
"execution_count": null,
"id": "8",
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "7c94dead4660409c9acfac1f3461d7d9",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Compute the image embeddings using Huggingface.\n",
"\n",
@@ -190,15 +127,7 @@
"execution_count": null,
"id": "9",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The image shows a scene with two yellow taxis in an urban setting. The taxi on the left has a red light on top, indicating that it may be waiting or preparing to drive. The other taxi, which is facing left, has its hatch open with some clothing or fabric hanging out. The background features high-rise buildings and city streets, suggesting this is taking place in a downtown area of a city. The presence of multiple flags on flagpoles indicates that there might be some celebration or event within the vicinity.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"processed_prompt = processor(\n",
" images=[image], text=conv.get_prompt(), return_tensors=\"pt\"\n",
@@ -245,32 +174,7 @@
"execution_count": null,
"id": "12",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<|header_start|>user<|header_end|>\n",
- "\n",
- "What's shown here: <|image|>?<|eot|><|header_start|>assistant<|header_end|>\n",
- "\n",
- "\n",
- "Image size: (570, 380)\n"
- ]
- },
- {
- "data": {
- "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAF8AjoDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDyDRuNQLHnCmur4POccdMVymijN8/H8NdUM7c9+lSNDkwpAHUU7Py4xk5poOeaeAOooGchrCs2qTDPAx/KqHlNj/GtnUULalMcZ5FReQOoHFYTnZm8Kd1cyxGynnj8KcIcirssOGzihEPpxilzh7LUqrD1AFO8sjg8VbRDycHikeMZzS5xuFkZE6gynPpQsSuRlsVJd/LORx0FRpksBW6bsczVmWLWDDO3opxW5oq7bJzz98/yFZkK7YXI/umtbRxnS29fNP8AIVSEbGn6ounTRTHnaM1l3Wo3WuX8zeaY7fPIJ61R1FijKDwp4yelTaSvlpjgjrmlbW4/UqRzvHHK4iUIGOAg5GD+VOt7+EvuB+Y+tWH024SzKx/NnqAaxYbeWO5USRuvXqKaIubfmozbumV4708RkLkEEEckVj42OdjFfXB4qb7SyHh1f6jB/wAKHJpm9OTS0LoGXXI4zUN+eV+tJHexORuyG9xS3GLhVZGB/Hincmo7s1fDij5zjOZFFbsgJkYjj5jWJ4cG1iCRzICMGttyA59cmlclDZsCCTj+E/yrnrvixjx3x/KugmH+iy8n7h/lWBdrmxi46YpoUiSIf8SzHoppmmDFu/1qaMH+y+n8BqLSz+5k/wB6mSQ2qD7RMf8AZP8AOqmnpu1KIf8ATTmrtlzNKcfw1X0tN2qRZP8AETUsEdmMLaxAen9abMP9ElXPVTUihWto8ggbev40yZSlq5wPu0It7HJwXt3aTSxxklFHNaFrrkD2rRshBboRVOBAYLuU4+Ykc1E8KnRQxUEjpxyOaZFjoY5o5NORI5EdicEA4I/CtRPk0/bzzdR/+gmuCsYJ3hkk84hV6A1paVr9zcTQ2c3KGUSZ75xikwSOqnYGU1kaq37xB6o39K1HYFzz371kaoMzLjtEaRT2M1OYWxx8wFKwP2UA/wATE/lxSD5YSfVv6VI/+qjXvg/zp7akI6zRDs0mEd+f51o2uAxQFlQjIO7O3ntVDRbeSS3tokyPlJDYztINaPlSW7AyKimRSSg4HBrWnWppqDep9dl940kr7l7eu3e/LHoxH8/SuT0P994zhI/57E5/Ouh85DCSWKnacE9TVDQdFu7PxNbXMwjMTlipVwex7VrWeyOfOZXpxGa6c6kx9Zz/AOgios7UJ/2TRq/z34I/57Of/HRSN/qnwf4c5rm6nziMiKMzzHjqa6Kzh8qCQ+ik1m6fb4Y8VuEbLGZvRG/lSZn1MLRh+5JHpWzqExhs4HABO6sjRxi3/KtXUcNFaRk43E8+lCNeg3SLn7WZywPyYHt3rN8Su63q+X5mQn8A4rV0zEbXATBAIGRVa+uIv7SuEmdV2oCMnrQviBbFrRVaPR4t+dxJ4asK/QvqE+IXOX4OeK6KxYSafER0NYMt7DuuFKuZPNIX5PehbgdLFhLFB0IUcfhWWl38oHkHBIG7PFakxKWhPohP5CuatLyV/stuEIYuNxLD1oWojor077KRegKkZ+vFc3Y6OsN9bz72/dtxW/qoKaZcHPO3j86xNPvWn1OCBmi+UZ+U5zxRHYbN27keG3eWGWSF3wrmNyuR7+tZOn2Pn6tbPjdcM21c1oauGOnkK2CSP51m+H7/AD4gtnklDiNl4C44zRF3QmrHQazBdaG0kcg8udcZANZVvDanUBsSOK5ILFAMBs+nv7dK2PG2sPP5k3y/JLtXA52n/wDV+tYGg6xcXV2UmiSaILn99GM/gQKaWgr6mhqDBbQnPBIqvH5SX8KJg5XeRnmk8UXMR09ykLfLKvyseq1k+Hpkn1fYsXRDzR0H1N3VZAtk5f5VyBzVOxK3t9CYWBji5kf+FcjofetjUoUltD5uBGDlifT2rLtJ0lvI4YE8uFclEC4/EnuaIvQOpvrOkbDy081wPvyDj8F/qah1G7unu/K+0SbPl+UNgfpUXmosgRidw7bTUdyGku3uId4LMp5Q9hj1pJjtoM1eALp7yHqOhFcq2lx3Ukf2olvm6ZrqpLkyadLb3bLJOQ2xlGEDdV3DrgCq+mac0FqpdvMaTlsoML9KadkSONpDZ2Dw28YjXvisY6bbZPy/+O1ryxu96YpJ3ERTIiwBg59fSs2RJxK+2/lxuOPkX/CiyGee6MQL1/8Adrqsjb37c1ymjAm8fnjbXVc54GRUjQ5Qd+egpx56HimLyByc1JwTz+FMZgXuBfzHBPPaod5CYCmrt0n+lSkDnNROg2kY7da4ZS1Z3wi+VFX5mHTpQkJC8sKmjjBZvSpxGB8uMkVPMUoXK3lYHDE/hUbx/Ly1XduecGoZE3E5pqQpwVjAvQBdYGegpIk+bNSXw/07A9BToV55rtjsjgnuy0oIt5P92tjQUB0pu370/wAhWQ3Fu/0ra0Aj+zcYP32NCJRZlsEuItsnNRi0EDFQOAK1YgNvPX0qO5TOTjtTG1oV0GLfp1BqK2QNMAVyMd6n2stuMN271DZ7hLkrng8ipZkR3WnW0gOY8E9xWXNo2P8AVS59nrenZSSOnHQ1CE3AkjI9M0OVtzopuyObFhPFOuUyB3HNVfJb7cBnjPY4rrVRVmTnPtipLPThd6mMp0OacZ3IqFTRYpba+Mb5JJX8ARmttic9cjNMljVPEkygcKyj8lpzHnPTjpTJi7oZcHFnLzn5W/lWHPteyRVbLLjPtWxqJxpdy3/TM1y8e+GwSYOxbbnB5FNMJGtGD/Z+CDjGCajsXhiVwxkOemxcmqVrfyzW7Fk+QZDYOcfgasWN3bqrbHyG55pki2WBcXAHoe1Q6Sf+JnGcdGY1PbrsmlckAMOOah0cf8TNfYNQ9ho7DcBBGBx8oqG8YLYXBJ6KamYgIg77BVTUeNMnJx92kiuhhp8mjMe7Hn3odduiA+v+NOn+TSYlHei4G3R1XHpTIIohs0OVx1INM0OJTqkYx0B/lU2P+JE2O+f50/w6gfUlJHRGpMEdG5+cg+tc9rl/Ja3sYVdymP8ArXQuMyE8AE965jxEubtc/wBwChIp7DI762mXYf3bDrk1Z8sOybGDKo6j/CsO4hG7pnIB/SmxyzQLuSQgDsadl1JR614anWG0RHfOUJKD+Hmr1/MqxHYUJ6Ekc1w+i6jcGy3uck/LkVrpPJcLLcOhAOFyWH8q4Y4OTre0b0PrMFRtCMm9LF0uu0sVPTqKzfBZd/ExbcSFikOc1P5o2H5T93uaj8DLnWLqTssDV6dR3scmcaxTHX7br1T6vIf1AoQAnaxwDxkimXWWvUx0w5/8ep6ck/WsVufPrYvWthIhcfLiMZJ3dR6ir12AmkXB7+W38qZZDfbkHqh4PtT9Wwmk3QHRYiBR0M1uYenIEhAHtUmvvHFb2zSgdT1ptoCI8fSneILRLyGGF3K96EbdCfw46vZykKozJ2+lZetXcMOqyBsdB2rY0REWzwnK7sdMZrN1PTorzUHkfJOex6ULViextWXNhbn/AGa4K61KX+1J4Ukcfvzx2616HGFS0jI7KCBXMDSbN7jzhDyz5znvREOx0V45FlMcdI2/lXC6GGfVrQ4P38klq7292paSkjI2HNY9nBFHcW7Ii888DFCAv66caPOR12d/qK5jw4C+rrIYgNoIBrsLxlWFdwBGehqjaxLDdIm0bipbnrQtg6ly9jEkYUsBg55OBXOeHLedNSdplOChwfxrc1aTyo4vdqjsWQXTIuDsXnBzQloHUb4mikm09Y4ly3mDv7GsXwxYXNtdSG4yPl45rodVlSMW6u4UM2Dk1Dp8kct9cCFg4AHShbA9y3OFaSFJUV4JG8uXPXB4yPocGsbQ9H/s/WrkF9x+ZP1rS1WWOBIhMSqsetWbWRJtTeVclmgWQnHrgU4q6DqJqwZ7dAvGGzis3TFf7YjucAKeKv65crb28JYNt3YOBVHT7pLm4IVHXC55oS0BvU6iCASRI449ad5RVskAAHNPsCq2aZPvU8sqCFmyMBT2qbFI5CVoAzZkjAZ2Jy49K6PSkT+zYCu0qVyCOlcitnZiYZiBzye4rr9Oi26fbrGoChBgU7oS3MO/u7K31iTzZlVlAGMVQ/tOw/57f+On/CrGohG1O43Rbm3DnFVt8X/PJ/8Avmi4rnmuhKGupTycL/WuoySQM59q5vw6MzXZ/wBgV0e7HXrSKSHKPmYdKVeoOcU0E5OW49KccnsOKCihP/rnJ5INQsBtqSVCZnO4jJ6YoSM4wWrz6nxM9OmvdRFGueKfj5yCackJ3E7qBESCWJOai5VtCM/Kc56VC+SeD1qwYlKnIqSG0DyKewPNXEzkjmtRTZqO3H8IpYxzmrGtpt1th2AH8qijFd0dkebP4mSSD/RX+lbegLjTc+rtWLN/x6vj0ra0KQCwRO+Sf1qiUbduMgcHpTbjpnrxUkGdnpio5yCpA69KBvYhYDyOnamWaZkJHZanliYQ4HoOtNtUZWc/hSMrhOmS3H8OaqhFUHjHvV1wSr+uBVdxlSMUpJM0gyKEb5k5J5710+i2PlsXK8k81i6dal51YjgEEV2NjFsBPpRGJNV6nKXCj/hJbr/rrj/x2oucde1TT5PiC8PcSt+i1BkkjDdqoIbDpQrW7hlBBGCKhvNLtpLAjy9pxjK1O+fIYZqS8Oy0wRjkCpdymjCh0Fk09/JlDZ3EBxWfY2E0XnGSEnpzXWwkf2fx71X08cSj6UKTJschZl91wA7Db0GeM/Srlg8ouoJXQEMDkgYxxXQ2tlDO9wGiUluM4xU17psdhZWEajqzE1XNcCzIRtTn+BePwqlqfOmSj1q5J94A9lA/SqGssRpExBIIGRTRT2My+GLKBRjHepL1Smmoo/2ax455F01blmB56VakvpJLSL7QNqP904/wpmZZPGisKd4az9uJ9Iz/ADqDzkbTGhUnd2q34cidbp2KsBsxuxxSkUkdC52uB1+tcv4hb/T0AAHyc10znL+oFcxrgDakxP8AcGKExszrkHeoz/Cv8qilH+jJ6liTVm4XEnrhR/KopFzHF/vGmKJvaS+LQEdjyK0432zPtbG5ARzWbpJ2Wg7Zb5T71qKwwCUUAZwccn8KzdaztY+vwlRexin2JlkDxgY7evepfANwJLvUxjmOLHPuf/rVWjddrHaOOvtxVvwJGqR6xJ0OAM/iauM1M4M3knCJHNLbtfFYZVk2x4cg9GLEkVJGMy496wNGQi/vpMk7pCD+ZrVvL77BbPcld2wjIHuQKFufP9LHT6eNuzHd/wClM1nI0a5z1K8fnWbovibTbl0V5hC3/TTgfnWrr2z+xJGR1YErgj/eFHQzS1Me15RTjvSa8HNxCyAEeVt5YDnNLaDCID61F4iSaZoRGgkweeOlC6Gz2NHRSUsF3YJ3k8fhWVfXUtvd3MeYf3hGCScgVo6GkqaXGjrtYM3H41h6rbzSalM68jihbsT2R1SAmxTnkoOR9K5i2lkN1Fbm4TCy9BGeefWuk2lLOLJ6IvT6VgWunbb5JftinEm7Zg569KI9RPob+ooZLOSMNgsMZrNsrKSK8iZ7tpBHwF6cYq7q436fKucblxmud0PT5bfWEkeTOVPGaED3Ok1JEuI0jlfYmeTnFQWUFnHc747jzZQCDl9xxTPEdubmxWHOCWzWR4Y0v7HqNzN5m7emOnvRuh9TQ8Tywpb27ORtEmefpVfwxPDJJNt29ByKseJ9NW/iSEuQPao/DOmpYCYBidwHWi2g3uWvEVzClvG0gBweCRVbwvKj+e6EkZAqzrdql0qwnJA5wKfpMMFjGUHlxr7daFe1ioUpTlaKuV/Ftx5VnB1ALde9a2m27pbRXTPGUlt41UB/nBAycjtVHVRDewiIGJ1H96tW1mlOmW8bNFs2nlF5wp4/lVJNR1KqUKlNpyVjK8Ru5t4VRQctVTRQ5nl34GE4qzrcmHQcBcVFokm8zn04zSWxi9zrIMCBBxjaKjuG/wBHcAjO04qNA/y91x/Sq905jikc9FUk4qSzLcStcKnlgFYycE9a6q0bFpCCvOwfyrGn0+9t9J/tya3ZLOQBFLcHnocelbUIUQRcH7g/lTsJHOXUchvJX4wzHGKpG1fJ+dfyqSXU281wLWdvmIzjjNVzqE2T/ocn5Ci6A868Pcvdj1T+orothI4JNc54d4e79do/nXSc4AxSHcVWIU5/Wjv1yDRkdOOe1PG0qAaYIoP/AK5+vWlwAc4/OmM4WRzngGhplx2rzZ/Ez1qb91eg/t6etLk4xUaONpbIx9aUOvTPIpFXGDLHgHrWpZR8HIwcd6pWyq0mfeta1T5+xBqo7mUmcZr/APyMUoHYAfpUCCp9eUf8JJc49v5VCg5rujsjzJ/Ex0//AB7P05rc0NP+JZGxGM5/nWHcDFq34V0mk8aNZgj+E/zqhGnbk+WeSajuhthYgjJqSEnYSBgVDc8qRjtQN7FV7yeOLqG9iKls9RUqxkh6HqDUcse5cHgVCqBFK8HPPSkZGmt9Zur5kCn3qRYopV/durA+hzXOTJlH9CRVaBXW5iUMRlh0+tJouOx32nWwjxxXQWqkKazLGJtoIU4xwa1oRtQ1cTKTuziSQdavW9ZJKhPUCnxuG1O+Y/8APSX+dRkkn6daRrHYk6xgZzlgP1qzeg+Qo9xVeJdzIvqwxVy9jby1A9aljbIo0X7DjGcg1XsI9hk5Pbir6RkWI4x8vWorCJizjHU0CLGg2hkuZWIOM1L4pQK9gO+H/pWtotuEL5GKzfFZ/wBMsV9Eb+lNIl7mZPxIc+38qhlQNaurjcpFSz/61uO9MlBaFsccU+hfQz7rSLWTSVRVMeT/AAVQ1PRpfsttHE4IX1renDCwjGM5PakugDJarz1B5H0qbtE2IdK0mKfVFM0XmPBxszwK9Hu5ja6YsfkIEHZVAA/CsjwnbQ2Vj5rjM8zlya6HUbm3lhKFUIYc1HtE9zsjS91Hnt7qNgJ8SgI79CK5vVAsmpyAOuVxkE+1WPFNn9k1MOn+pPIrL13R7l7hL+HZKk0anEbguvHcds44rSMk9TnnTld+QtzGTKSR6VXdfljHA+YgkngVFNfzWyxwtFsZF56/N9c09L9ZmjR4TlumDV3VjNHQ2tsY7V1R/Nlz9+BwUU5+nNI8UqLvdpAF5Jx071NoMmbOdRn5Xq3qH/IOuQOuw4qeVM9Knj5QiklsZKXkB4a5cp0J/wAiuq8LQi00fU7hSH83DcEcYziuARAImLkjOOB1rt/Cu1PCeouGchpCPnGf4aqKS2McVjJV0k1axjaJwlw5/ilJqbXju0iVRjDMo5qHSOLR26Zlp+tEf2cQf760luciOfkt8rbKoIdhjipUuryG7NnFO/kmTBTcccVaRP8ATrcEfdWq8CBtXzj/AJamm9iDt7M5WLjFSagqSXzREgBU3ZJqO04aIehFVdce1jvVMoAJHU1K3L6G9Y+WbND3Of51gyXFu8crM8e8SFQM89a19NKjTrfZnaVriJr4JqkqbIyDPtHycj5sdaI7sOx3d24jsmJOMR5zWNY3sElzaBHBdj8wrX1MMmnzN6RN0+lch4cuZ7nXLeLqBktx7ULqJnT64xXTm4OMj+dUNHuPtGqx4BCLERyOM1oazGWs2RTySP51l6BJI9/Mr5O1e596SkrWRT3NHX5XjSDCk/NzimaLJ5t3OwVlQAY3VF4jlCiHJxyeab4ZcSNcuGyCyimnoLqTa5cGC6t8LlcZPOKXQ5jc/aZMY+YACqPigwi+t1mDEbf4aseFVVrSZkXCmTv9KOgdR+s3b2t5GVVGXaerYqfTA17YudmG3HGysXxkkpubXyV34znitnwXeLa6GY5kKOZW/KplUlBe6rs9PLG1VbSuRXJe2XL4Bxye1aumym40exkbkujMcf7xrL17zGsrp4k3SEfKo681f0mNotC02Ngdy2+D/wB9GtZSk1qjpzad3GL3KOq2009yFjkCqEGRt/rUmmWj2ok3vu3Y7U69e3S9czMR8o74p9m8cit5WcdMmovoeI9zeBwuOOBVG8kKRSthThSQCOKt8bmBJ6VSvABbuRknpihDZZ0TxBrniSzuIdda0XSlIRVSLDMw7Dn6VqurGEqsLqBx8gLY+oriIbmeFjCgRY1cKqAHA3Hk/WuqlmdY2KOVI54bmm2RG551qcskV9JFKCGLErzxitCAH7PH8y/cH8q2NQePVIYo72GOWWL5luNoDn2OKjitU8lOF+6O1TyFc6PMfDoG+6PTgV0JJxiud8PnEk/uFxXRZycnHPSmOw5QNpY0owRktg03jPX8Kd1UcU3sNGc6fvHzzk8UyNAc5xkUSORKwx3pqvg158viZ6EX7qBApYrgYqVI8tmoY2ySat24yeeaVi7ly1jUkApW3AgOCBjHFZVucHBHJ6e1bEAGV52/WhLUzk9DzzXv+RmvPYjp9BUKDmp9dx/wk15/vf0FQR9a7o7I8+W7C5P+jN9RXRacR/Zdpg8+Vz+Zrnbr/j1J9xXRaUuNPgPrEKpE9TTh+7gdKjnOXYegAqWMEKBmoJ5UjWSRz8q9aBvYHTK1C8I2cZ5p8d7ZzfcnUE9icVKyB0UI6tx2NFjHUyp0CqwyeSKkhjX7Vb8gDevJ+tPuoX2jK/xc8U6JGN1AMdHX+dFi76He2qlVwGBFXkUBT7kCqVsvNXVGFH+8KpbGRwMJDz3jerSH9aZnB70WfIum92/9Coyc+1JG8dhwLDaVJB3dRUl/fzwRqeG56GmJhmQED7wPSjUUVlUNnHbFQwZai1dBYBpYj93Py1f0Oe3vld4dxxjOR3rlmlU2pgwemATXReDITHbz5/v0Ik6zT02l8elc74s51WzH/TJv1IrqLQbd3vXK+KiDrdqPSL+tX0Baszp93nSAf3utNb/VkZ5x/hSz486TJ/iNMaWKJCZGwDR0L6FidT9lgHekuUJu7dMelTTNDIsCrIhzjAzzVr7OH1GJs5wPrUk6oVr82J8ts49KDrNxeALDETjqSOKTX4riCA3dqxDx8MO2K5S4/tO903zPM8plfayJn0/WsJQszvp1HKKtui/rULX7FTINyj+GqFqjiySTkhmAXjpgcD9arWhNuhYvuLV13hq5sgXtJIUkRogQrjIyKV7OyNVFzTXVnM3kSyTuHUMPcUlnodvPdWpjjKspzweBye1ezweG/Dmq6fG8ulxq0gyXi+U/mKmt/h/pUeJLaS4g9nYN/SsY42HM4vRo5amGlFnlq24tbm7RFwokx+gqprEjR6PdFPvBeK7XX/Bep6e1zdoFuoXk37ouq/WuSuAWtmTGc4AAHPWuynVjJXTMHFrc4aHUJfKcuA4XHXrXonhp0PgG6lQMoeV+p5GBiucm0ZpI5g9lIOOoQjvXV6RZNaeBfICMCzvwwwea1TTJcX2OZ0sg6ewBBPm1JrAzYoOTmQf1pY7QWRlhUYAmwfriq2vXLWlpC6qrfPyD9KS3BbB8qalFnuuKpWZ3aqM93b+tNivTNNFK8bbwofj06Uae6NqCOH3BixGKb2JR3NkgLRgEgjFM1ayS6nDuM7OMCn2J+dDjpzzVPVry8tbqYGGIRyLmNmbHHekiuht2cSR2MSA8KnArnf7KtZbgXBiOWfOS3fNdDAzfY04w3lDOPXFc7ZS3LvbxGSPYsoONvzHmkmOx02pf8eUquPlKkYrIs7KGxul8iNVdxkYznitLUQ89s0YYLuxziq1naTR3aTS3G8xrjAXFDV00S1ctu0eqWSneEZRkmixs0L+ZAgJVArALgn3qnO6W12Syfe6gcA8elXLPUomAUHJUfMa4oykpW6GXNJSsU9YHmyJHt5xxUmhxKDNznDCn3UUFzIvmTGIg4Vk5/OpdNszZeafNMhZsljXWpJxsaKV2VdVVXvth67RjFT6Gu63kJ7P0/CsDxIZxqyNFKyqyAYU1t+H4pILEpLkNvJOarSxV1cTU4vNnaMcAY5pdLGyWeJxnzAGqlqkFtc30yGWRZm2jcGwFwO/sat2bLAUKyF2jBXJOCwPTP406c76Jao9XKZXqtIt6jE9ksBCeYhGWQnPGOlTiVILW1LHankqM+nJrMvr9b5ZRMgO3oBWlJBBcQ20bvsIhXaCOBxXP7Sdm5bnNmdSTrNPoUtbsYZ7B7mMkyKOGB4xS6VbGK0RiDsfBqzZWUyB0G14uxL/pii3S4kndAhjCvwCOD9KiFV3szzYzdzS2nc+DxWVqcrxWruieYwI+XOK1DhAWBOc4Oa53xHdy22lzTRY3KRj866UzovoUoJ7l7lAYB88ilju5Ug11lw+2GXpwjdfpXBafqNy+taZCUGychpMDoeeldzeHbaysByEP8qfUUTh38TSrkYgAXg9ea7u2+zTWsMvl/fQN+YrymaCT7UwERKlsk7a9WtrQfZYf9xe3tV2M5J3PGvDoytwcdNv9a6BQMgYz/SsHw2rstxtxxjrXRKkhXlFOfQ1BqMXOMDpSn5RjJqUK2CSjH3phIx0PPtQPqYckv7x+R96mLKCDz3qFjmSQdfmOOKbuw2a42tWdqeiLUbktjHGa0YGUDPP5VRtVJGR371pQphetJIq+hdt3QjP9K17YpgZzkDOMVm2uNicc9K1YU3H1oSRMmecaw4fxFekdN9RIafrH/Iw32OMSGoo+O9dcdjhluOuebbHuK6XTB/xLoB0xGtcxct+4Huf6V1Fj8mnwe8SmqQkaEZ+XBPSqdyjS20iggbz1JwBVpSu08nPFVbiaOG3M00fmRoQcUwavsYZ0a5cZiktpeOizAn9cVXlt7y0m2MskbAZrol13Qp0AuLMBsdWgB/UVXu5tKumSK1eZlwSqRuQYz/FkntjmmrEOMuqMj7VfBlXzX69+a2bW6uZNQtY38tg0qgnocZrN03T98gmnLnPRe1dNa/Yn1C2VXiLbxtA5IxSsQ3bQ7C2BAGe/NWycJn3qvAi9Qc1YcbYieuMmn0IR53YtmG4OOob/ANCp/BGCD1qLTc/Z5TkdP61KevTipN47EsPLoBzzSatxGnY1WuZLmJEa1zv3jIHpVHVNcu4tiTW6H1BGKVmDFVGckKM49K7PwemLKUn+/jn6VwkOs27kb4HRsdV5rvvB0sc+mu8ecGTv9KaQmdLESPzrkfEoB8RwD0hH8661P61x/iNs+Joh6RL/ADNNijuUJTmVj/tE1BcxGaLaOMHOcVO4BYn3NKmMNjpijoW9jOvkzPbkDheTXSaEPNuXfO5Qa529XMyLn+Gul8KR5gPGcuf5CpdkiVqddpelPqM0oOPJXiQmuC8ZaXceHbiS2gmD2knzxkdfpXouq6hHouliKC42zMM7ccyMa5seHd8U11rKCW6kGAhORGvYV5FTG/vLvZHrUMNaF29WeZRBjCpBZi2OD6VseH4ppNSGOpP6U6905LOUpFF8lb3hfSpplL+Z5K9M06mLSjdG1Onyu7Z2WgXZtDNZS5Ei4Kj1BrabW2jaTAysaM31xXIXgjtZkntpZLhov9dITwR6D2qxdXhFrvT7szYP0INedifftOPXc6ZQUzs7XVCY4Q53Sv26fU1y/i3w/DiLWNPiVdkgNzGv/odLpdwbiZbhmwBHlfZc8Afz/GtmxumchCFYNlWB6FTwVP1pYfEzpySb0OapRXToefafP9stzcpDuYkJIkVqWCn8+vfpRJcKdTNiBGGVd8mIijBsj5SpNT67o82lam8ccMRspPmt2Mfb0/CqVpC/2yK4dYg0jsMomDtBx6+1fRUm5pSTMK2Kp2cWtbGPdjN1MO/2hqq6iqvaoHVWBY8EVakbdPKe5lbj8aju081EU981ueWtijDptvIAwUqViOCDTLfSRZQWTnklmAJHbFbVjal2ZdvybMVPq8QjSwjHYt/SnZkJ6lqx/wBagxVbWNOXUAFjuQZUffhiPlHAK/1q1Yj94Oe1ZUlwF1WR0OSrsCN36YpqNzXY6NlVLX90fkVOAfQCua0yyf8AtRXlcIoO7B5z6V0U0iJZOw5UR5GPTFZNjfQvdW6Ljez4Jx14znpUWXUdzR1eOZrGTym2txtP41meH7a8W7eaaVmjCkY3ZGcit+5tLy8tHe2tZJVj+Z2RchQPWs6yvIiQ0LkoRtHy9T3NKUuVGblZ6C3gd71XIC+WvGRnJ/wq1YTo0xjaEDd3AHI96pXil58+YoViF4HUgcCo9/kSAuJC+cMV7+oArknJ30MZSakS63ZyXc0YtpjFtbJNa9rGIw0TqQexcY2574qGB0KByxaNSAQPvLTpdS2yybGLAjHlyDGPWjne4KbvcztR0i3vLkvJvW4i4RgeK17FRJahFwGGQc9/eq8d/wDaAHEkJG3aUKZJI6CoLq5mgSLykVQetT7SXNcXPK9ylrel3YufMAPlyYX5ealgsSmnpuYhh936VYOqP8zDezkgMgY5/wB4j0qZrJ1JkEhaJhuKHgrn0NdEY1Jr3dGe7k6k5NoxoIH2ugCllPzgDJz3rU1CeBJoLaWNifJT5gcY+WsN7gJcXI3lXD4BJxjtmtbWZWiv4kxuUoufypSi7O5yZpFqs7hE1ujASO7R5wpDfzxWpHqCKInh+ZVODjnPtWVAkECi4JcqxK4Kgr070sTgOkkKLECeCGzuHvWCWp5cW0bhmjkbCvyfbiqGowq8IQqGBPIFPjvW8zyinzr82ajnuCkgQ7QzJkgDHStY1mnqaqo7GZpkS/aY3C/8tMZrfuI/MieNTyw71nWt4RcGOGCMBiTgDvWvbJ5kg85dinvmto1k3qjfDyUppNaXMg6LuJk3fhWmlk2xeG6f3jU18IoZJBC+5R3zU8RPkp838I7V2pRaue5UwlJPY8V8KJuS7wO6iuljUgenPaub8JHEd17lf610yEAZrnR4iHDPQHmk2jb0708DkHPSkYELwaQ0cZK2JpeMZc/zo2qw55NNlDGaXjqx/nUkaHA+U81yvdnVF6FuzZTgD6Vq26Erg8VmWqlB93vxWpAGzyufxqbFXLtqh243Vq2u/cF7etZtqjhckDGcda1rRHU9A3IxzQkS2eYanzr1+Sc/vW/nTEHIp2oHOu6gcf8ALVv501D0xXXFaHHLcS6B8kAHqf6V1dqP9Ctxuz+6X+Vcldn9yue5/pXTWsafZISU6oORTEix5jBXUAkgHoKbI4azkDlVVlK5bpyKzZHvoLkmKTERXgEZ2k9cVZvwF0rcZpNvAJIyaY72dzMGhakqjEIbIzw1V447qzvEaSFlw+ORxWnFrFgJbci7niWPqHTJb/61Urue5urqSeGVri2a4LKqMSEBORkduM0uVJ6GkazaaZ0f2JZbOSBWMe4FQe4zVrw/4YewIuWvA2G5Xb1Fcdba5e2ikRyrIpkOBIua6bSfEKPYzObC7uLtQSxhO2NT/CNv061omluckk0zuYlXzN2RwMdetTyugtpJN42gEbveuAj8RGC4XfC0sJG4IGwfzqe58SS6xJcrbWclvtQkfPwPr+FZybvobOMEtHdlXTfltpMjHA57dal43VFp53Wb/hU3Ru5oCI77Rp9ph9RiaSJjhQFzhvWqGrS6NfRPJA0iiGPcN5KhTnpznPbH41NfWT30aqkiR7Tkl6xrnTpbKZkmeNl5U7GGenpScmjWMIuN09SpG8GQUEbc92r0zwKMaEGKhQ0rHg142ojAzlvyr1rwJGU8MwnDAFmIyPeqbSMWmdnGpwfl71xXiBgfFmP+maf1rt7VWmiLo42rweep61wuusreLJCrZAVB1/2aL3QldPUqsec46mmS3DQYxHvUjk5p2DkcjNRzz2aRtFdPKrSAbNi5DAdR6Zo0KavojNvNTs/tWJFkVgOw4rufAxiuIBMhzEhLE/lXmV2LB7yQeechtoB9v84r0/wVpYfw3DbMxWC5zLcODz5WeFH++QfwFc2LmoU227GuHpuc12Ru6fbNql0/iCdP3aHbZq3cd3P17VbuSZLQq45Hej+1obS+WAxhYJAFA7D0puqXMNojyO+Im+62Cf5V8vUm5y2Pa1RyOoWJdyduc1esICIRGDtUjLZok1CzaRQX4Kk7iCFIHXDdKSLUDLMkVnaSTI+396PuDPbPr7VdpuNg5jbSJItPK7S3mDbjHbvWNPC66XJBk7lbKE98cjP4cVdaDV7mZXa5t4UXg7FzwVJxz6HA/M1BZabdxLN9rv8A7SWwPZBV0Yr4W9xxk0XNDl+0RxuAPmVSwHbAx/StzT48EDPANchaXDWcl1ZfckbO31+ldFZ6gsNubiUk44x6nFc9WDjJp6FTT1aNC6WC9tpLO7X905+Vx/yzb1rjJbWSzvre1mXEkec+/JruIJdPkt1mmmEe7tIdpzVTUrCw1KJZrC4jkuLfniTJYY6Yr1MvxThLkb0Z5eJopq6R5OMFmJ/56Nj8zV2CGFtzzk7FHQdSaoQnIzjqzH9TWrYJHzI/zMv3B/WveXkcK0Wpfsrcx27D5uOOelUNf4ubFPQMf5VswK4VgykAAYU1i+IP+P8AtfXYT+v/ANamZXXMWdOGJM+1ZslsZ9UUhBsDMzZOC2Owx3rQsB+8bjPGOtUWkVZ2YlzltzADnr95fcHr6g0Xad0dVKCbSZMsl8098XdmsI4FaIleDnOcGqWmEveQuAQhbqemcGtOzkR7K8tlGI5DlQRyrH7y/Q9RSadapFMhdtwByoHb61lKSvvqTOUYto0RqFxbQSQrM6Qv95N3DfUVUhZFlyQqoRkIoGV57Ck1KNHSNCM7nGBVBIXjlfZ87RdamUZbo55J3ujYsLU3UN4XMayZ+QOcVWv5280wLtyO9Voo3lkKxg/MCfXioJ3ZfkL7XX5uRk+2cVjKT7ESv1NGG7mt7fyHQEMeWHWpZ2+1rI8SKxKgHPDKfr6e9Z+JwvmKQxIwEU8N6nNNjuG87Y0JV24ccg475qGkyNwt42t523kgg5Pc56jFaCzGSVm27g3IB4BHtVUFYrplAJJG4nrtHpUNzHOpwjKpI3bB/CO/Ppmly3HYvf2riR/s0KhgAPetmxlSVCkjIMDPNc1a3IslctiSY8EelJFqTvvxM+ex44rehU5Ltnp5fjI4ZtvqSa1pZt7t7iBw6Sn5h6U7XCz6owiYDCDkfSsz7ffCQI947qXrY1byRfy5PPAJH0qptNNpmeNxMa8nKJVtDK0MkJBIbtTftDI2xVC7QFcYqTT4pYlZ/NUqCeQajmV0u/McFRJwoC5Xp/KueTd7nnGvFKjo4lOHAynvVNvMSRJ5HRs5x349DVR2nhtyj5GFG0gcE5/SrUEFxLalCjHjKkkZDfTvSSuUWrR0iuC6H5X7Z6cdqu+YWbAaRlPOXbpz6Vlxb41Be3ZdgyS/HHtVxbqG42pB/rCMkVrTaUld6HXgNa0fUv3Nv5VmZy/LEcfU1e2Y7j8qwmdiwiZm5YDBPvWs5G9ue5rvV+57+Kk4ztc8d8JgeVc9/mX+tdMoBAzXNeEv9Tcf74rpi4Uc4645NQjwUSADnFDqFHPbmmB0zw3605ipU5GeKHsNHFu/75yB1Y/zqxEeAc4qB8bicdzViNVKk8jiuR7nSnoi1blRjB71pxsSox/Ksy2QDDE8YrWtsHjJpDRbtwcdSSOa17VjhGJ5zjFZ1ugPViDWlCNoXcgPPUU+omeVXh36xfepnf8A9Cp6RITgzKD8wwe3pUE7Z1G7P/TVv500M7SbticNnvXQr2OXS5JegLGq7QTu611lmoNnD67B/KuRu2LKpxyfyrsLQgW8eOPkH8qtCJXhRiuV6e+ap6xHjR5QOOR/OtBRuGCc8+lU9bQtpu0HGWHNA5bHCXXykDHB60yNmVgdxHrg9amvUZJdpGSCRnFGnwC6voLdn2rI4Un0zTM+hraXp6ak2xP4Rk1uI66Jb3MDQlzN92QP04qhoVrLDqM1va3KgqzLu27sgHFaV7pss4Z7y5D+WudiJgE5wKFG7M5SRSiHnss6QsVkUoU3gEgcAjPfqfxrd0yTydFvbc25ZljO6fzBjkdh/SmvpItLOK5FwI1XA8rG48+lWtQjhsvDcax7Q8zNlkPJULz+OaGrCTRR0UbrN+c4C1oLGp6heevFZ+hnNrOMd1/rWoo70kdETH1i7isFhV4fMSRuRuK/rWPc3tnd3D3JmETsSWic/eGMAK3b8au+KhmWwU9y1cpqIVHQYHTpT3Qm7O6NSOythHBNNF/o7t/rEnyeOoxXomnahZRabFF5vkW8KLt8tyzYHODgcfWuRtfD4vvDtkPOIIG8DHUntmugitJ2tUtitsGkXagibggcbc9gPWocbonnsdDa61pSWkri+aNlZmSPLZb0yemT/LFcrOwfXrhsbSWGRuyc7e5qeDTozf2lrIsQDKzqwfch25J9+1RMhPim5GV/1h4HT7o6U0rKwJ3dwUHb0/OsvWbbdtn81UxwAe9dHs4xj8653xHMyXkMG1WQxbs9880NWRom9LFHTvDd3rmsCC3tw++T5vm6CveVgj07TUt7dSQihcqPSsTwh4X/ALA0aHVhIP7QuYg7iVc7QRwoqDVpr6++Z5HjHaONSa+dzDE+1moJ6I9PCUGldmTrM4ZW8sldpyC3rWvpd/YajZ4uXVpY+DGRnB9a5GcS292qyM8jBgPJBySc8AkdPUj2qDSJXjupWzyJWLD8eaqlgnOm5J6o0r14wkonfi2hj3GKGNN3XaoFTJEEjCHo1V7eRZYlZDkVc2kndkY715lTmjJxe5rFpq6Ks/mRMCCzY659Kr3Uha38uMctzk9MVoStvAwpOBnIrNmWPdscHb1AzUwlZ3XQ0gk3qZmpqzCK9Q5lQgOR3P8A9etPR7qKd2lll8uFDuaPP3j2471TldA5i2bYmTaT2U1teGtFEDC4nU8cxAYOfeu/FKNSCmvmXzcqaZbks2dnupLP7RKw+QzLhFHYBc8D6mqB1tIJFhvIPscmcJNBbKQPzz+Yq7rWrTW/mbESVBxsJwV9iR3rjbjU31K5itLOyFs7tjIckj1OewHUn0rCjB810c7ldbGVrMum2Gsywx38IQ/vFLZH3uas6Xd2Z4S7t2cnHMoH866d/EfgzTo0tk0uPUpYVCPOIFbcwHJ3N1+tVv8AhO/CAY7fCcRPr5UVe7TxElFJRbOGWCqPW2jEh2sjkSLJjqVORWFrxH9qQgdov6111p460iTD2vhK4I7NHCoA/HFTv430MPuu9A8pgOspjJx9BmtFiKjXwmH1Kalc5KxI3v71ieei6h9n82Rtz4VyPuN2I9Qehr0mDx14ZuiotNIaeZpBHtESjn3PpVrfYPKWfQtMaUsCsUCG4de3VRtB+poeJa+JWK9lKLucxeW0Ntoe5flkjw2/PfuKw9PvIb64Ta7GdQfk216rDpUl5B5L6LaW0DE5WVFU/wDfIJ/nUq+GdIsIWdkjgQA7vKUJx7miWIp2u3qZSoXd2zzC5g34SeVbd4m3KWcL9M5qEXdrYxzSSXKSE9dnzc/QV6FpOm+C9XnM1lbW8srs213dmdtvUqWz09q0l+H/AIc2MiWbRq2c7G5/Os4Yq0rS0Q3Tio6Hkej31tqt1FZW0dxNM4w5MghTHXdyC2Pwrd1/RLHSJrWPUZ440mKqs1mC/kkjgPuxnPtXeab8MvD9hK72D3EDOdxyc/zpdX+HUOoySNLf71k/5ZyIMV1p05RuZKKTOBtvD8CE/ZdYtpSTkJIpXHsSeBUB8OanHcSzmGKdGP3YX3Af1r0fXPB5vdCmtbOCO3vimEnTGM8Dnj0Fec3vgTx5ZWbi2uYZZFXhowVYkdueCKzSg3qJ0U9UR2umXUjmUQsq7inlzKyHjo3I5FV5W8iSTzBErkfKQQQR6jFaWkt4t0+xT+2o78T5JLBBIAAeAQM/WquranbSrEl5psDuzLkorRuVLYbp0POeazlTSlvdMqVOLhorNGS7BsvtWQsSC6Hke9RQh0cK6YDdXHRq3TaaOc/Z7iW2boBOu5QP94VQvNLvIkM0JSeADJeJt2B7+lHK+mxy8jvqZ1jKPPSJArfOV5HP3u9XtfEa6vcAOynPGOlU9LsHL2sqyLgsu4EEcFgevrV/WWgfVLsS7t2SBj1rS1oFSg4LVFG0mczLDIo+cZAcVpGK4mcJA2FB3AZ4rOtfKnmQOF3qu0Ennb/U1ehtZvMHmO21gcENhhj1FYyV9TFloXSmII2DN2LLyMfzpiyPyZpPmHK8/wA8VX8tpGLlirqMElcj2PtTLa4mlmYbljdeD3B/Cko2V0BqLdRu2C7MFXB3ngH+tQTXEOn4a3cHcc4I5XPamWqM4eJ4nIJ3ZCdDUk2jS30KNE+xlJ3h1PIrSC7o7sDOMKibWhFZXputUhVmBLOM8V0rsN7fMOprnLTQ7yz1CCcmJ41YE7etb3mH+5+td0Xod+OxUZTTieS+FDi3uCe7j+tWvEJZreCNX2FpAM54/GqnhbP2eQf7Yqz4iD/Z4cJvxJUnnvYx3hu4I5WN6yqrFRksCxAzwPSuus3ZtPhZiSxiBJP0rkG1K6KuHt0O4YUbD8uRt4/CuttMppseQciID9KFfqNHLqhZjz1P9anjVsFd3BqumSc+9WYXbJzyK52dCehchRgMcVp26sFBLAGs6Fx0ByavxvkA8kUmUjThZwE+bryRitGBnLYJBwKyoHOVOMAcHNacOAxcEYqU9UD2PKshry5I6eY386lQcjrUDEie4YE/6xun1NNWR+u5q6eaxy8tya8+5FgnrXX2vFsCR/CK5C8ywgBxkiuvdXSAIhTjGSTjsKpNtXC1gjnYPgoxJPXFGsMqWWCergfzpqm4AIG3HYhqZfljYIJuMv3oTB7HMXyYYHcpHsaqKrq33c45yCKv6jFESuwR571nvD1I29Om4UKV9iXB7l+wDm6tgHeMM+NwOMVrX2rapYXz29resyYABIBNZWn23nXdpE52IzhSfTmur1Wx0q3uUCvaRsFO4mbJYcY2+9Wrsykl2MCfX9VMAhnlLIfurgVqi5v7qxb7crxpHbsIUKBfl3AH6/WrOrHRILZjG1lM6wALtbcSxOMjH8XHX3rO+1faLF1R1CJExChuFBYYGPwoewl6GpoJ3Wcx9GFavU1leHwfsU3/AF0FaoPNSjdHO+Jo3kurEKvADZP5VzOoQy7gduQB1FdJ4jMh1CyVWO0hsj3rAvriaCTykZgrDkU+hL3JtF1E6de2886yyQx7sIp74rsLTXIZnW/Fpc/Z7dNhBwWzzliRx3H0rho1u3CMmWO7K8j0611VhdxP4TKGYSXMhIcbe5Yd+nSlclpLUlstd099YhkCTLFDAyIDhnLHdzx/vVJazpc69cTR52MzFc+lZKQQ2MqzeWIwO4rR0FTNeM68g7jkii+o4LsbqjeAcVNo3hUeIfF0Ruk/0W1hEr46udwwv48/kaeqMijI6dK7zwpb/ZdLluBgPM+dxHYdP61yYyq4U20zpowvLUsavcm2t9ySyRKO+zcv41wt9PPcKyjV4yjdEDkEjr0x/Wuj1+9mWX93cA8Z2MvDVwFzepLM8k8MMW4FSEyOMY9a8DD0JVZ3PV9qqVNmbfpPDfW6IXVC45DY3Enr1qxpWWnuGxwWb/0KnJZ2CBHeUzOSAkbKVDHpnP5UaSuFc9M/4mvpKVNQikePUquo7s6XSZ2hUsOU3YI9B610CzK0XDDA6EVz2jZMkqZ6gYH51Zkn8uTABC+g718/jqX75np4ZtwRpTPKy7Udk4zkdAfesi4GoSzBGUsxOAFHWr9kXu5PIIf5j1UZA+tGtrc6NZfbLC8YXUTBgqjiQen6/pWFHC1JvRHTKrGC8zQg0xLGxiuLu3hlmPLh25T2GODiqlxqKWll5cmPJVx5UgJLLnkZ9uMVzf8Awl11qlmJLm0MEjMUEiDKMfT1B61FHOtxILK5cLHMpRSW6Nxgj15rqpYWopOnImNSM43uVZ7qWRplWRtjvnGeCfWso+I9OsTcwu08kkiGNmgZRtB6gMeh9aS4W5vJp7Z7m302NHKOZ2+dyODhRzjgYqsll4V04EzPcajKw24CiCNSe+7r+leth8Co6yRw1a7UrRKqeJdMtz/o+jrKegN1eM2f+AqAK07bxJ4gnj32GnabYw/30gCgf8CfJ/Ks59UjicjT7C2tueGCmR/rubp+AFV3a4uZFeeVmZu7NXeoRS0OeWLqdW2bH2m4nl36t4jcAnlLdWY/gMAVO+raLYwLLZ2F7fzFuPtku1f97auOPYmsJrRycj5gVPYkfmaR1KQ7SOSvABBz+VDt2F9aqW0ZebxHfySK6LFbx5yIoI1VeuefWu2i+JmsooVEiQDsK88hUedEvTALAEde1X0YYHHX+dY1KEamslsS8RJrVndf8LK1sjjyxn1Wqd3411bWQdIkeNpL0GPbtwQp6/pXINcszeRa4kn/APHY/rT7K3e2uPtUNzNFcA/LOuN31qI4SkndIh1ZdzsTrcmgeJ5UsIojHp1otrl1zmRsM5HvjAzXV6P44v8AUL+K1nktbcyjCM0RIZuy9eCe1eYjiCTLs8jEs7MclmJ5JrX07SNU1NEFhas+GH7w8KMe5rWWFpzd2hRqSSsexPd65BC7rc2LbVLEGJh0GT3rhvFPxTv9DazY2cVxHcQJMrAlcbhmuy1O+NjpU0/kSTuEwIo1LFjXi3ju3a48DaBqRTDLH9nkz2Kk4/kRSlhYctugvaO5tt8Zbg29vP8A2ZG0cmQ37zkc1tR/FAxqfOtJY8d45s/pXhtu/m6TMveGVW/Bhj+lb8EyajaRhyQ4VVb3YcA/kBWLwkOly/aSPXofi5pLv5Ut3JG4OCJYq0x4u8P6lGwdtOn/ANlsD+deF6rpUkwa5j2tKo+Yf3qw4/tCgiJycjOPUe49R/Ks3g3upDVVW1R9HT6Z4Z1C18/+zzHuXKtE5A/Kubm+HEMt99s0zXbmwuBjCyYZD+WKr/DnVftmhy2EzHzrXqp/un/69SeNta1PRtGjurGby2jm2SkqGGMcda4Y1KsKnIbcsXHmsWp/AusxyRzwC3uj5oeUwSenfmuU1fTb+DVp3vbGeOMtkOyYrIX4q+ILCcF/s88R5G5Np+nFb+nfHV/9Vf6cxTvtfePyIr0OWrbVGE7TWrMWe1RJVZXJJOexx9fStGw1FyWDrujA6nrXTf8ACReAPEMKvcj+zJXIw8fyFWPseD9ap6h4Z+w2732nXSX1hj/WQnlfqKhXtZo550mlcyRqccrzRGFQ7KQJd2A319KqxebarsmwVbgMKbcabJImYgBj74PaqKXcsbGF1G0HjNFtDO2h2lneQ/Zep3L1xU9vqIeZmQY28HfwM1iWtxDaQAkbjJ+lbGl+VNcXFwSqrIoXZ9O9VCbvY0pyexo+cJEjVlKkkZH/ANesI6lLk/Pb/rWo7JEw2oFO3IIbI6elV/skPoPzrri9DSzPKPDOVgf/AH66JiXGG6jrXP8AhkfuGPQFq6IuxGW9MfWgroRiGNicgHPtU8xKWsoHACmkjHO0kYFJdKPs0qg5+U0FI5ENzU8bEDmoUQY6YOO9WIYGkDbUJPoBXO2k9TZLQtwOMZH51owP8p+lV7bRr+Yr5dpMQfRDW7Z+FdVfrZsPrxWUqsFuzRRl0IIWUjJJNaFscq2eFAP8qv23g2/Jy4RfxrTi8HThMPNj6CsPrEE9y/Zto8KALPOB13nv7mnCKTOcDn3r1mL4Q26ZJ1GcknoI8VYT4T2KH5rm4P0I/wAKuWPorqZxw0medw21vOsBeIkgAEgZPFaaQpd3gika8CAZGFwB+NegQ/DewjAxJcZH+2P8Ktp8O7HdlpLv6eaawhmNPm3Z0VKF4JLRnJW3hnTJod7T3JPp5v8A9asrUtDhtkXYk0uGBXfKePzr0xPh7pezaVuT/wBvDf41IPhvprni1Lf7zsf611LHQaskzlVGSavY8zgd4RswyDGCCQ1Z2q6ab+3ZvOjLem3n869jb4aQyA7YSn0NUm+EQZgVup0I/wBof4VzQqS9pdJ2O6rUhKlyq1zwOGJRcQoEcMH24Ix0NbJ8MifVFt5pgivF5v7tc7RnGDXqs3wOkkl8xNTljPUDbkUlx8G9aeczJ4gbeyCM5jxxXpwqq2qZ5EqT7niQ0h3SZ4WUrHgk98E4FakMD2Vi/mMrNKrIcDkbWGa9H/4U14ktoXht7yGSFyGZfMIzj/gNRaz8LfEMahbOzlmRUAwZVJznmtFUi0RyNM5zRflspveX+grTUAHn0p1t4e1bS7NlvNNuIW83PzIcdKQAhuetCaexok1uczr4VtasQXYDy26CsPVkRbiLLtyvpW9rvGrWR2jgHk/yrF1YOWVhHkUGbLXhz+xhNLJrDMIuBHtB611+m3Hh9yjxukUCh8tIhKgfwjHrXEabps0+6WG1a5x94IeldlarFp9rcT3OkyRPjfGmVAQZ4+tVbQm2ppTnQbxwiGN13jOUI3fh/SqumQxw3cwQYG5sADAAzxUNt4osLu7RBEEDOqgE9yat2EL4llkRhuZtqHgn5jz7ClGFyqcW3Y1Yked9ijljhR3Jr0IMLTT4YU2gqgULvCbsD3rlPC1vHcT/ADqvnBsqcZKKB/D+ddDrWmxXNjJHEQJwuVG4ncfSvIxzlOaglsetShGMVqebeIr/AH3hS4Mhf+4X3A/SqKXduQm+2Y7ugJH61Vv4p7nVBA8Rg8oFRuHI71FqNlOqwI77wTnPqa7cNQjCK7nDiKjnJpbI0Z7pIrmM+W8pV+AnVePypmk/NC7d6ntNEmt0jmuCyhuVG7kUyx8q3tXZyFVRkn2rpSaRgrC3etJom24ILPnhM9RWrbXkOuOslnubd1MfO3615nq2oNqF083OzO1B6CvffAHhnwsfCYm0eOSWW4iH2iZ5T5mepXIxjn0xWFbCRqNS6nRSxDhoZelXJa6mtrPizt1w0rfxt9ay9W1AXR2KxKZxnsfcVU8V65HbzHTEAs4oOGiVNpqHSotT125jFnZzSKoADOu1UH1qoUVFWRv7VWuzDe4j0qzM7xSSs07iJAfkU9zjoDjv35rk7/VLy71KK4lm8to3/dov8ODxXs/jTwSqfDmURPm/tHF0+z+IdGH0A5/CvB5gVwpO7BxnpW0aSUuZ7nM60tlsdn4qtF1PToNdtPlkxiT+QJ/l+Vc3BiVMogHY8/d9fy61veDNQE1vPp1380Uoyuf1/p+VYuqWr6bqT+YQxDYdB/Otpq6ui6yTipR+ZJCFkZYy7Ox4IQYz75q2ztE5X5UYchUGW/XpVaCeONfNd9iN8qqnLEfh/wDqpkmowB+SYUxyAMufX6fhWZys0nll2ozBRyCpc7m/AVXkZnVtxO0nCl2wD9FHJqrBqtq7eVGrxBlwHKlmYf5+tWIeH4BR3GB/FKw+vQUxCxk+bHkcFSOVx/8AqqWWWTHlw8yHHP8AcHrUAZVbaAMq/QEsAMevep1YL8oOeep7/WhAOjRLSBYY/vyHk9ye5rVsba4vZ0t7SF5ZW6Io6VqeDvBVx4id9SuJvIsAxiQjl3x97Ht2zXruk6NYaRbeTYW6xr3P8TfU1SVxtnJ+H/h9HEon1lhK/X7Mp+UfU/4V3MUSRRrHGioijCqowBTu/XNGapCuKjYdTnGSM47815f4os1ufhjqsTfftLiV19iszf0avTyfQ81wWuRhvCfiyADhZrjH4qjf1oewXPCNGPmPd23XzLdto91+YfyqbS7kxXGD908Gq2hyiLW7Rj90vtP0b5T+hpyAxTPHn5lYr+IrF7Fxeup3sJYRq5IwwzkVhatpzQkXloxXnJC/wn1FaGlvM1pECN4Cjn2rSCbwMgMCKSZDVnZlbwbrktv4xtZJSFhus2rfj0/UCvQvE2njUNNu7NgP30fy/wC8vI/lXmV3phs7d57ckeWwliYfwMDmvWGu11DSLbUYcYkjWXjtkZxXlY6PJUU0dWGleLiz5+eye6JhRd0hyy+pOMkVihWjkGRyp5Fepazoq6XNqVxb5DpP9qiA6bSckfkTXKalp0E0d29o4kaMi4UjrtbqD9MV6dKd4p9znlpJozrNhcwSQdiOM1PofibVvDF/5lhcuvZ4XP7uQe4rMs5PKugc8HirepQeYRKo69TVtXWoHpEAbxRAt/o8nlQMQJ7fPzwN1257r1Kn04rLuIxZ3s1s7mTyzwxGK5Lwv4hn8Oawl0gLwt8lxDn/AFid/wAfSuw10SXOpm8tT5ttNEGSUAkMGzg/l+RrCcEg5U0Qya+LZfKClG/hYjr+FNi8SrFchA4QkZJPSsKSzuCw3vHnGMl+cfjTzbrLcTLLcxIVUCPDg7iBwuPf1qOWJThG2h3FhrbXTyRkDckbNn6Csv8A4SqX/n2T/vuptIsbZLiZ47h2/wBEKTFnDBWP3se2arnRLXJ/4mQ/75FaR2M7HN+HCRbn3Y/0rot4IwDWB4ejzZAnsx/pWhPdrGpAIGO9aGqLj3McIBbPHYGs2712II0QjLZ/Ksq5unnY/Mdv1qlsQcl/1p2E5WNKPWBbDKWVuSB1K7qefGWqqu2Fo4x/sRKKytsCgd6QTQI33AfwqPZxe6H7WXRmmfGGvuf+QpcD/cfb/Kmf8JJrLctqV3j/AK7NVeC+t1IzCn4rWtbXlm65Ajz6bal04LohqpJ9SifEGqEf8f14fcztSL4h1YH5dQvB9JmroYCkuCkaY7cDmnyh40YCJCSOOBUckL7Bzy7mJD4n1tDxqt8B/wBdmroNH8WeIZruOFNXuGJPRzu/nXO273iMSbZGGfSu38D6c17fG4ktwm04GBU1aNNRbaRVOc3JK5654djurm3VrqUufXGK6iOytwAdgJx3rG08m3twBgADHSrH9oyjowwO2K86EsPT3Wp1zjOWzNmOGMEgIox7VHdRyiE+TL5beuKp2eol5WR8DjOalutUtbe3d5pdoXn1r1MPKnNXicdVTjuYF/ofiC+GIfFlxaf9c7dCf1qgfAOpyH/SfHGuOT12Mqj+VasPirSppikVxyPUYFaserWLIWN1AAOuZFrq5F0MVJHKj4ahjlvF3iU+uLwD/wBlq3YeAU0+5E48Q63ckDGy5ug6H8NtdB/a+n9Pt9t/39X/ABobWNPUc39qPrMv+NLluHMhEtbdAEdFbHGSKDpFg+WCSKf9iZl/kaz73xJo0XXUbYk9NsgJ/SnWGuW07fI4Knoc1jOcYOz6mkIuaujRGlxopCz3AB7GQsP1rI1TwlZ30bb4Imc/xAYP510Ecyuu7PFY+o+MNE0t2S4vVMi9Uj+Y/pWijGWxDk4vVnj3iz4Z3w1CO8s5sLH0jcZ/I1iw/C3xTrxXyrTyYSf9bM4VcfTrXpet/FKzEMsdvpcs8ZG0tI+3I/CuJ0fxrr97J5UOt7JlbiOeVt20dlwMHHTvVclhKabN/wAKfB3VtGhuVvLqwlExXOGbgLnHb3NampfDa9azuWe8sI8oVVn34Vew6VzMnxE8beH74Lqtza3FjJkpL5QYrzwCRj6Gta1+JzXztPqeiNdwhhs+zvtCj12nr+dNJBypvUp6L8KJpbyfUdkUjk/u2cFYx9PX8q6C3+G+svKZLzULPv8ALGGOKuW/xl8LtKIp1vrWU/wy2uP61s2nxI8KXhxFrNsGHaUmP/0ICjndrGim9kYN14ZudB/0gXKFH+QiMYPr/Squq6hJYpB9mClQPMc552ggYHr1rTvPEdpqi75LqCSJHBBjbIVDwWP8q5+fUo7ZIkYqzRMFyDyVLfKc+nIrOnSjOor9TeVRxp6lPUL6x1CR5I/OhGcOAoyzf4e1ZkdtbJl/tYKk9JB8w98DNdPFFYuHPkxbm5JA5z9ahl0+xuDzJIB6K/FetHDxSSZ5sqzbuc2k6ecYHR2AztfO0DI6/MRUlx4chvrZ4UmnjibG4hRzWubDSLIK7RRDHSST5v51Pba/p1rIk63mnnn5RI4x+WRTlRilohKbvuUfDfwUa4u/O1ybbZL/AKuKM/PKP9r0r1MWukaBYRW1tbRQRRjCLGu0Vz1r45jmGfOt5Fz1Bp8vjqzByzWRK8ZeXH5VxOjO+iNuZW3J7m/tJnaR4FuXU4Uva7s+gBx/OoI59WumeMad9mjXhN7BVb6Bf61lz/Ee2hbYi2DE9BHcFmb8FB/WqN54u1+/zDpUFjAzL987mZR7jgVUaMuwnNdWdNb2epLcp9uuLP7EyMsse0hnznpzjGK+b/FnhqfQteu7BMXEKtvikj5Gw8gH3A6+9eg3PgvxNqtwbnU/FLbickIrYX8K6fRvCr2UIR9SNzIB991rVYf+Yj2i2R4HYzzWlwF2+Xz/AHeRXSal5eo28Oo2yIzSDypiRkKyjjjvxkZPpXtUPhqWWHe8CyEMQY3hyCPZiM1X1Pwzo9tZmG70yNIbgjzFVdu/HrjFQ6SSaubU6kmuWx89BBaTMiFXRvlEhU4Vu/1/lVmPDMxjXzpT9+V/uL9PX9BXr954A8I39gWXURp6HLMhuFXdj+8rHdgY7frXL2/w8sr1jFp/iVLy3XLHZbu2QP7xVecfWsHC3UPU4iNgm50cMRw9w4zn/dHf0p5kELY2vufnYW+d8dSzfwrXQX3g/ULK+mjke3JjIWF937rH1OKhsfB11LORNeWgZsEF5NxkP8I+Uk7fpUdQMqJV3EZBMgwj4wWx3UdlH869G0bwmdH0R9YvoBLfSoFs4DhlV34Qn1POfauWvfDdrZBD/wAJNpc0rZ8xIywJx/DnGAo9K9C8DanHrFjaWb3C3B035y4BAfOQhwecDJ/SnFAzsNK0+PStKtbGPBFvGFJ/vN/EfzyauKcKPpUZOFzk5PWnA8fSrQkSBuaN3NR5wOKTdQBNnj6Vx2roDYeLYv7wkfH+9EP8K60NxXJ6mXaXxMijJMCkDPXMTf4UdAR81xu0cqupwVIINa1+QNUuHXhWcv8A99c/1rIPDYrVuWLSRFufMhUg/T5f/Zay6FLc73wJNDNZSW7xKzRtuHrg1vXdvE1pKI4sTDphetcD4SvzaakpA+V+G9q7mXVIN4MMhbPX5TTWqJrNaNEVn5d1amJ1BVsg1ueDiyeHpNOkbLWczQgnup+Zf5msLR7ae5efyo9yKcZ6da39C32+qX1u6bS6JKB6kHaTXBj6d6TfY0wral6mZry4WGTH96I/0rgbdXs7gyySbrW3ZreRdoyFbox9eK9O12yN1bTxRlQwKyqSPSvONThezvdRgnK4ltfM+Q5VivGQfyowNZSpKPVFVoWm33OMvYkgvJVjcOgbKH27Vf3C4tQMkFl3g/7Q603U7AQWFlMn8akN79wf1qGxkIhPJzGwYD1B6iu0zuQ39pLbMDIu0soYfSuu8D63JKv9iSsWVn3QDJ/FBjp6j3qne6NeXMEDWuZ7cRb1/wBnuR+dcvDJJbXSTQsUeNgyMDyCORQ43VgTPWL1J476YW2n7oOi+ZcAP+Kkdaqb5Or6RI/HJCxsa3NOkj8V2KaruQSyKBMAOd44zU7aARzv4rNU00NxW5hWVvBFb6lJFbTQO0ZydvDe6gdTWH2/4+r/AP78N/hXdR6a8cMqhuZMc+lR/wBlT/8APZv+/taqCJseZadcC003aTzuJxVO5vmkYnnGelQu24bNwVfejyIGHzXGPopNRzIq0itJOznAyo9qdDEz5bnpU/2e1ByJnJ/3KkQog+WZgP8Acoc0LlbK3ksAMqaYYzu5rQ83I+8SP92k2RvyzYz3xU8y7hyMpIilwpBI+ta+n26EZ2HPao44rZSGM2PfYa0be5tosBXdv+AUNpjV0aFrBKhUAYH0pbiAsrBpSCfSn280c5BLyAf7tbFpYWbEF2diee9NQQuZ9jn7HR2c5WSU+9ev+D9JWw06Prubk5rnrGwgeaNI1dhuHQcCvQLNfLiUBSFHAzXPi5KMTfDRblcuTSBIgueT1qoZeev50lxJvckdKrM+BnNfIYis5Tdj2IQSWpY89gx2tg4rlfEd1i3lV5eCPWtx5SAee1cN4slb7PLg5+U162VV3pE5cXTTi2YButO2ZEy59CarveWDHaWYr7OQP51w8lzKHZR/e44pPOZzk5zX0qkeMonb+ZpW3LOg+spH9akil0oAEPG3/Ayf61wIKB+VY/TFW45IFHEUoPqXp3Bo7f7Vpkfzo0Skeg5rrfDmvQ3CBVfPrXjLzsT8rkD/AHq1vDOqva6kEL/LJxz61x4ynz021ujpwsuWdujPZ9VIaE5ZsVyRl01pCGjAfuec10EVwLqwwWBYD1riNZZbW8zyFauXL8RL4JdDXGUI/EjYQ6dIGQJnHQZJ/rWXf6Bp7OJYppoHzkFKylu3D7o5WXPQ4rRsVvLiZT9ukx6BB0/GvWvc8tKSd0aFrLFJEbLUJkuc/KDIuNw9xUd7q1xoFtEtuitaxt079eh9qNY0eEWvmvLcPIPu/d/oK5Br+1nWSC4W53ngDlsn6UmludEZXVmjuYNSn1OZrmWez2CIhI42XjrwB+Prmuf0yFSjRzoDgDgj61n2XhXVXbzEs7r7P1yUrZtNKa0d2Kyhm/vVDnHZM0UJb2LAjQps2BVIxheBinSxGOzeYM5KgKBuJwAwI6n2p2wYBHLA1mS28YmnmAJYcjk/jWuHa50xVE3FnRQXjvC8YbGMN+FXDfsYOuGHWsu0XCsDwxUAmrMagMU7fwn39K97lR5rJtVudukzSOOkZJH4VwNpd+H4Y1Y22pAhR8ygD8c10PjXU/suhpaRn99dnb/wHv8A4UunCzktIsgCVVG5HTBPGM+/1rKesrLoaRVldnPy6joLvkjVG9AX4oTU9FBBTSJ5j6yzZz+AFdh9n0ojbIkKPnuMqfx9KbNHplku8wIM/dJXAP40vZyXUOZdjBtdd1BGA0zRIYM9HEXzfma00m8U3CF7y9FnETli5C/y5NNn1mZ8x2EOP+mhGaqJp91qUm67maQZ6FqpJoXqXV1poT5dtcT30ucAoSqL+XJqhe+ONX0fVPKhuJVuV4byyuFPpgg1tzJaaHpklyRxEmQPU9hXl8srzTyTzt+8di0jeme1Y4maUbF0Y3Z21z8VfFfkEQ61cx/N1by2/wDZaybnx94n1Ft15fvc7TkBjx+ArnliaZtzHag4GBjaPQD+tSTgLA6IMcZrhhSbTZ2pqLXc0I9c1KQSTz3Tk/wFQoP8q19H8R6mYZVh1C4iaRdsixysAy+4rH0rSJ9c1G10+AgFz85PRR6128fwnuIZTJaeIEViMfPbf/Xrjr4ujT92TsynTnJtpDfCmp2ek6213qUqLA0RVmkGeau+IPFuhRW01zpV7DNqkzFIigIEOeC3TrjhfTOapTfCfXLiPYfEdoynqGjYfyFUJPg54gH+r1Gwk/4E3+Fc/wBdovaSGqclujkHlQ/MQrKwCt2yB2+lbHhDxO2ia3HdFzsZsSKe61pN8I/Fe1lX7A3uLnH86rf8Km8XxNuFnbvnpsuUqoYinvzIfs32PeobiOeGOaJw8TqGVh0INP3/ADda8v8AC1h8QPDbLbzaLNeWBPMYkVmX1KHPH0r1BYLh0VjbyKSASCvK+1dMK8JdTF05IUOSOaTdkUeVMBzFJ/3zTQr9Nj/98mtVKL2ZNmPDHFcxeN/xNPEAJHNvF/6LeumwwOSrAe46VyerXNra6lrYmuYopJLVCEdgpICMOAfrQ5JLcdmfOc4xM4xwDVyRi1ran0DL+Rz/AFqrcIzSkjn6VOhJskU9VkOPoR/9asnNByss6bctb3kcnUA/MB3FdnDJGbg7UCs65UAttH+6PX1rg4yVyS2D2Iq1/aV4CpEzbkG1T7UKasQ6bbPRbWeNYoonlkSGV1LOG5DAep+uK27GYW3iCxgLy4kidB5i4YKwyM568rxXlEN9dyKIzK2zsK6bw7ealqHiTTEmZ55o5VXH+znk/lmufESUqbVzekmrJnpl0PNZMsV3ZjJHbcOPyIryTxgz22oRRlmOIvLy3Oc5B/z716xdnEMhB+7835c1598Q7CWW6t5IbZmDjO/sa8vAVOWpbudFaN43MfX7RLfQnhWQSiCRdrj+IYH+OK5KzcRzHdwjBhXSm11CTw7LaPZzmdmBXAGCAfXPtWV/wj+phYcWchL9srx9ea9xtHHZmvoNjdapamWytnleFgrFJ9p55AK45GB1rC161ktdWmiltzbucHy2Odv41qaNB4h0qdhb291GP4go4yPWs67stTuLiS4vYbgSSMSzSKR/OndWBJ3Oo+GOqSR6wdN3ZjuOxPGRz/jWBq19fXN/c6g93Jl5W2nzSpAzwB+FZMsMlrIVLYYdw1I1xLNCsRwET0qdFqirM29I8cazpV0j+ebiEdYbnMgP4nkfga6H/hYz45sEz/10riIND1K6jLW1hdTr1LJCzD862R4K8S4H/Epn/IUc6DlPYYvCGkL0062/74zVuLwvpQ4/s+3B/wCuYrqA6H+EflUc13bxL90E+1fExr1pPRtnvOEexiL4b08LxZW//fsVIPD1jjH2K3/74FTPqN074gjiAzj5607O6gAzdzKG9ADXZHDYmcea7SJcY20VzIHh2yPH2GD/AL4FOHhixY82EH/fArqbe90tvuspPvVz7dYxjhV/BapUJL4p2MZTeyicZ/wh+msOdOt/+/Ypw8D6Y3XTrf8ACOuwOs2ajkio216zX+I/gK0UYLeoReT+ycwngbTx0sEH0Q1Zj8F2S9LIflW0fE9ovZz+FQnxXbqOI3P1NaqrSj9tkOM39kig8NRwY8uILj0NaMWk4A3hSB7ms1vF8f8ADB+bVA3iyT+GFR9TQ8XQ+1K41SqW0VjcbRLZ+5B9jUDeHYTnErD61jnxXc84SMD8aibxRfHoUH/AaxlXwb3iWqdddTVk8Low4nI+q1m3Xw/sr0EXBMo9NxX+VQN4mvz/AMtQP+Aiom8S33/PfH0Ap0sXhqUrwiDo1pKzZX/4VD4f3EnRrdye5vJBT4/hL4eTpodn+N1LVa58YXdup3zSH6YrCPxOU3PkG7lV84AJr0qWP9om4o55YVx3Z2Efwv8ADif8wLTvxkc1Ovw18OZBOi6V/wACiLfzrn4PE890vy3MvPvSS6vet0uZP++zXNPNVGVmjRYFtXudQnw98OoMDR9H/G1qRPA+hRMGXTNHUjoRZDP864b+0b8q26ec8/8APSoRfX2/mWQj3kpPNU1sNYFp7npS6FYQrgGyT/chUVA+jWO7Iu4Bj/plHx+lcOt6235mOe/NQz3j7D5blWI4NYLMVzaRt5mrwja1dzv10iyUZOpRKPUJGP6Uj2GnouTqoH+6sY/kK8nceL5pQ1jH50JPJ+bp+lX2TU1VFvR5TsOh9fwrrqYxxgpJ3uc6wyu0+h2N6mlPlG1LIPB3OmT+lUrHwj4NS4a8uZY3mY5JNwTj8q5NdLuJMkyx899pb/CrAgltk2sy4HooH8ia4446UZc17+RSpRelrHqNvdeG7eMRxXEeB6uxP6055fDk+d7QNnrkV5Yk5Vsg9fepxeAck4/GrlmMn9k1WDXc7u50XwrdZ3CEE/3TiuU8WeEtCsvD97f2NxiWMAhN/X5gMfrVIX6j+P8ACqOs3f2nSZYVPLsoJ/4EK3weNlOtGNrXZnWw3LTbbM2FcL2U4Xn8BVmNkfzEGRhsAew71BL8rM+CoDcAnpx/hRbkqSx4JJyP6V94tInzz3MS801/EXi62t4cMY4jhCcDIBY/4VprEHVokg82SElXgddsisOox3/CpfCUT/8ACdXN3sZo47U9F7kj/wCvXQ+KdMt9QAv7OXyr+PnkEeaPTjvXzzzN0sU6ctU+p6c8MnTTWjscTJe21rIcWDpJjkSMenoM1FFdF5N4hnjj7KDgc+xBrX03V4daRlEPzocMrjlfrWuloIyQ3SvehKM1dPQ8yXuuzRiWw+0EbIpC7dWZcAe1a0ECwLuYAY9KugpFgsmB6gelUL+6W3tJbuf93BGP4urmrbSV2LfY5Pxpqasseno43582U/3fSuEaYM4AyEBO0Hr759z1qTUb17y/nuGJJkbdzxVPdzxXkVZ80rvY7KceWNkXkmfaB8ufWpHnaNeUU/rms5HZTxnNaWl6a2o38EDNjzHAz6DvVuoowb8gUW5HoPw2vdOSC7/fRLqDHLI52kp2C+tekRy9BnPoa890bwZp2n3SXLyyXEqHKlhtx+tdpFKMdev4V8PmdWFSpzQ+Z69BSUbSNmOXJ61ZST3rKilA61aSTkV5Zq0aKSc9amV89ccVnpJk1Oj+9axk0Q0X43AwcYqwk5AAzWakh9alD8da66ddpGbjcuy3DeTIFIBKnGfWsZI7kMmWBUctgt/8TV0tvQruI3dxUCWaq2fOc/VE/wDia9fB4+MU1Ih0YvVlvzAhzkj6gD9TUUMVvPdXcktvDL86rukjDdF9x71KsD7QFmA9Pkx/JhXEzeLDZarqdudS06EJdsqpOrlxwoySMjseK1r1XWVqe5SgnokdNPpunOTu0+zOexgX/Csq4srONAgsbQRg5C+SuP5VyWp/FOXTbpojZW15H/DLbzkAj8VrNPxZspiPN0yaP/dcH/CvJlhMW3dfmaezUV7yOwktLBG406yGe4t1/wAKh8q05xZ2g+kC/wCFV7HWLXVrNbq1c7e4PUGnFyATk1g51Yvlbd0HLHohxS3Q8W8A+kS/4U+G4aEkx/uyRjKqBVZZkZiFOSKQygc8ce1Uqk3u2HKl0Hf2pZM5hNxGXJ27QadbLaXMEC3trHcpH95JQSpI4JyOa426dINWlL/KI5BIpGOo+b05ru/Dctg32s3gDrHkqpJX3zkGuupT9lFST3I0ejQ5rbw9DGzr4c08YBOCCf51Wkm0Hy9//COWDKDg4Q/4+1XLu+0e4doYreQCUhVIkzs7fiOeh/SqEej6VLC0a6g7oFwVDAYweuOv410YdV6qfK7mUuSO6Eul0lE82PQdPLKMggNyo7HmopLXSbmIb9EsNo74OatRw6RhbdLt3crhRuyW4+lFnrum2Wjx232WKWcsQS6ZNZV4V6ejb1KpyhLRIyv7J0FkcHRbP2ITFYmnRWz6ldW7afaqIXBASAEle3OeK6C6vEmmLraJGx67OlZBtkTUPtQhfLLg527Rg8HJPX3rTCqcrqbYp2WxuRX9xpo8+1cx+X2B4rXXxrbFRu0wZxz81Rx3lrc+H3iuYLVpVXy1ZECuCehDKefx61z6aRfbF/0iLp/eb/CumLVNWuZS1exRb4vwSLtj0yQN6mSok+IM10x2WIGT3evMrWIL3/Wtqy2ocl8cetL6rRpu8Vse7gaftI3mzv18WXgXCQxj35rB1TxHqczD52TH92qCSAjJuD+L1WlKbmPm5B9zWjqNq3Q9alhacdVY6rRNcuZFUNM+e+TXYW1/JJGMyE/jXlel3Qjm25J59K7fTbreo47V4WNpNSujjxFOKkdF9oYj72ab5/bcapedx0FRmfHGa81RZz2L/nEd/wBab53Gc/hVLzz600zt2NUohYveeBzuoM/+1WeZ26bqaZznGc/jRyMLGj9o4oE2QOoqlEXlbANaUdvDCu+5lAUepxRy30B2sRtJnGCahld+wNV77x34Y0jKMzTOP4YxmskfGHQydn9kT7f7xIrspZfXmrqOhzyxFOLs2aUjI7YnUsh7A4NRiy0FXEh0qSRx/E85H8hSw+MdC1lP3KbG7huKrTyLjfbOpX0JxWsY1aL5WrFKUKiuXXnhXiGAQr6Bif50w3J9ayjO/VnB+lN8/wB/xFYyhd3NI7FufVXgHEanHrTLfV2m5IAz6VQfy2GHBx6ZxSII0AwqrjvmtFCPLawrO9zb+1Aj71KZGZd24Ae5rIW4ORg89sVKzzyLxDI3uFY0Kit2Juxf+0n7od+OwJx/OhJ1Eine2enIrPWG7c8QuPr8v86eLS6BBZAMerCnyX0JlaxtLcqyfK4JHomagnnkKEljg9yAKoZuQu3dCB6mcY/QVG/zHMl5bp68Mx/M1McPZ3OSMWpXHeeN3UUG5AH3h+NV2Ngv371j/uKMfzqJrjTkGEkkY/lW6opnWppFtrvHO49O1Yza+za5Dax85zx+GaLu8VIGZcfjXLaTfKniu1uHI2iXbz05GK9PLMOvaqT6HFja1oOK6npLpshhR3JdiXct1JHb/PpTbN94ZzwrMTj8TUd7cByzqASqEgdsmq7zmy05HXkqhJyOueB+pFfatpQuz52C96xc+Ht07arr1ysZmkSNNqDgklm/wroP7U8VxN5hso2VmwEL4YD6D+dc38M4mt9S1yJ/vL5YJHT+Ku6u7DT7xke8tt8idHV2U/Tg18VVxdOGJlGa07n0EaeivseGa1dajp/iW9uQGs7vzi5Ufw7ufxFdroXia31KOCKZwl1InyKxwHPcA/561gfEjTVsdfW4iQrBcxgjH94cEfyrAso4HtTbOXa7kyyR4+6wHUc8EjHHfivbw2K9xSjszzK1NOTVj2KJVeIkbhyMZ7HpiuF+Il+6QW9hET83zvz+VN8P+NSjrb6ofkbGJh/D/vVh+M5Hk8RTMpDRBQqkdOld88RFw31OWFKSkcuUctx1oWLceTj8Kn3HuMH1ozzxzXHozoFgRQwwuTnvWvp0/wBivYJxgFTms2FTntUkrlHUDPFFRL2bQ6fxJ9j0yHULq4jjewtluc8MplCsp/Hgir0Wo6omC2i3RHqkikfzrhdC1iS0ukZTjNes6Vq7yadbm82zTGMeZJtA3H8K+WxtKFJcyimerCbkVoby7Cgtp84yM4DqxH4A1cj1J1GXsb5ewPk5BPp1q9HdWbjm3VfpSb7WS8hQF0RVZ+PXp/KvJi4yvdGjuiFdZt0GZI7qMYyS9u39M1IniHS+puwM+sTj/wBlqzNsSMmOdiCcY5B5xmnLIx+8xP45/nUXiugWbGR65pp/5iFuP96QL/OrMeq2D42X1q2emJ1P9aaFRh8yRnPqik/ypr2lh5TyT2loUUZJeJf1OK1pU41JKMd2RLRXZfjuEflJEb/dYGpw7Z5B/KuLN/4SkuPLextl5I3mDAP0IxUiv4Q8pJFWNAwBGyR1I9eA3Fel/ZtRWtqZ8943sdxG5BGQcHH1xXg2tXCG4vbrd873kwJB7BjivVJdI09LFrmKW9Eaqz5ivpBgAZHGa8N84T2lm07TGEyMZNjZdhuzwT3Oa7MLTlBtS0sdGFk+dOOpWu5XmYgu5AHQmsxwhH3RketbU9rYMc2012B6TBcj8QeayzaOWJU5A6ZroUknuevWpynG7idV4Dvtsl1aE8FBIPwOD/OuwNxuYZOPU15t4VkMPiOAY4cMh/75NdqzlsgttyuM15uNpr2l11PIkuWTRpST7rk9QwJFRG4B9enrVWCTfcFm5+VifyNWdKtWuZMv90VxNWWorlSfRIr+czOJeRjAPFR3SzW6XMSFkIC7fyx/Su0VY4dqisrVIEe4YOjBpk+UFSDlSOfyJrWnOc1rqkQ3FM5nSdRvFvLaAy/uhICVVQM/U9a6WWzhUlhcyohzuJYHPrye3t0Arl0iMOpoh6rIB+Rra1O9jt7fzJmwucZr38BZRbRw4j4i/CbQxxpDMJPJIIKuDg9ulchczAvIMfddsfnWho0yMsxjKlVVQcYxnBzWA825piDnLt/OjGa2CirNkLmQ3LMEdlDdQflAwPetCKV3iiBbYQuMjmqSM7MMIGzz93OOanjsru5OVGxPU1yqT6Frds2LC5IuVAJOPSvR4tDtvJTzpYxLtG8f7XevO/DdqkWsb5j+7t1M0h9lGf54rEufEd5PdTS7m+dy33vU5pxw/tNWNyKFp4YwMvfRgemw10OleD4tRuPs8epwo2M5ZDXoa+CNEP8AywkP/bVqs2/gzSI+UglHusprk/tSlPaJ2RhWgtJHNr8JrJV3XXii3j/3I8/zIrI1rwf4U0WFj/b9zeTjA8qGJQfrkmvQz4T8PAfvw/8AwK5x/Wqz+HfBEX37VZf+Bs1bLGU0rtDVSunpJnkNvaaes++NZ+em+ZQf0U10tkIVA25/Fs12oTwfZNm30a33DoTCT/OiTxNYW64t7KOMDpsgUf1rixNeNTRI3hOq9ZO5zyJK/KxSN9FJp62F7Kfks5j/AMANXbjxm/ITI/4Hisq48XXDdHx/20NecqTeyNlN9i4NJ1R+lnIB6sQP60jaJqI+99nj/wB+ZRWFL4gupD99fwBNVZNWuGODNj/gIrVUJPoNzZ0R0eQEmS/s0x6OW/kKb9gtI/ml1WMgddkbH+Yrl21M/wAVw/8A32BVe41BBG37zd/wKtY4eT0ZLm0tzpL/AMQaXpUTbbiV2HpEP6mvN9d8Y3upO0UUrrCfwqjrN20rFQ3B96ydoUdK9nB4GnBc0ldnlYjFSbsiMh2OTnJ5yetJtI704tzxQCcdq9JM4W31JIJ5LeUSIxBB7V2Oka68qqGJyeDXEkZrS0tyGHUc1zYmlGcdUdOHqSjJK+h6XFJY43TSvzzgHFK2oaTGeEkY+rsTn8sVzSMHRck9KURp0CZ+teN7CK3Z6ntJPodB/bdgn3LSPI9ST/M1F/wkaL9y2iB9Qig/yrIWHPSI/wDfNTJaXDH5YT+VVyQQc0mXz4nusfKCM1C2v379+KammXbjO0D6046YEOZbqJB3y4p2iJ8xC+rX0i8u3PvUDXV445kIH+9VrytPjHz3ob/cGaPP0pOiTy/himkuiE79WU907D5pSaQRux++5PqKtNqNop/d2Wcf32pp1iQf6uGJP+A1ajLoibxW7Iks3fjbI1WotKuWIKQMM+tVn1i8YY87b/ugVTm1WQH97dSY/wB41SpzewnUgi7q2nXMVoS7KOM4J5rjbSMy3KAfe3cVPqGoic7UJYUzTZPKu4jnHIr0sJSlHc87E1FJ6HpN0syWuTgIVC/d69OlS3NlfTi0t9Othc3zOHWPsAoLYOfzpgWOeSGLzULFhuQZJGP8ius8OzJZ39zfzA/6PbHauOrE8fyr2cdXdLDOS3SODDx5qqXmY/gvSdX03UdUm1ayktmuQjruGM8tnH511hkB6MCR2FZFrcPLfPNKcu6nNVtVhs7Mzut1cG4ZgxVk4OeetfBqnPG1XLY92pPkWpn+OHsZbKL7Qod7d/MX3OD8v4nH5V5W07xXCz7v328Sbvfv+FdPfG4168aG33eRCCXk2kjdg/zxiuPKks+c7s819NhMP7Kmo3PNqz5pXNfVLeKG6hv4F/0e6HmgY+6f4l/CrVtbi4ZrWZTvGFOep4yjfivH1X3pdKUapok2nHHmR/PET2P+cir6p9utLLUYDiZFFvcJ0KkdD9QRkfSlXbSsVSs9Tn9Q0aayJZfnjPp2rKKc9ua9NlRLuwR8AOzCNgOzZ54/UfWua1/R1gmhNuhMkrFQijrgdhWeFxl37OS1KrUbLmRzsalecmkk4kxkEkZ61u6TpM1zb3NyUfbACBjjLAdPwqa5sJJY7hGUlY2jQELyCV6/ixFehVd1ZHNB2dzEtXKsPfjNbz+MNU011hjhikjVeCQc/wA6wWgktbho5BhlOM+tbttp9re6Lc3c8Su8CNgnPHBI/WuCVKEnaaujqU5JXRqWPj+7lj3m0hP0JH+Nba+LpIre1vZLMfvt4xv/ALrYz0rn9C0RG0u3Yx53Lnmuln0mNrawh2AiOIkj0LMTXj1o0ItpI64KTSu9zT0fxGNYulhW1ePaN5JORXSo2e9c9pNklmzlVCkgDitpHHrXjVXFy91WRsttS8jYxTbiGG7g8mdPMjznBqFHqYMMZqYTlGXNF2YnG61M640GGOJvsGlWdwWALJM+HyP7rMCMe2fwqtD4biuo8XelfYWZTmQSrkHrwoPQ9Olbqvg54/Knh8gAnIr14ZrJU+VrXuSlZW6GVdwnRPCepxLcNJELWRowwxsO09K8OTm1to498mV3MoXkHv0/CvavGc3leDtVbOCYdufqcV5z4X0Y3WmreLez2rh/LUxnHau3AVJVoOUnd3NMPKNKTb0OZUsFIdWznuCKGuEiDbuuK7/xF4TgjhgeTV9QuSzD78KkqCcZz37Vwmq6R/Zt2kTSCUMpIOMV1+yd9Uej/aEXG0XdlbR5Cuu2TDvMufxOK711cM2QMZI5NcLb4iv7FlGD56f+hCu1mkAkkxnO8jrXLjIq6Z5M5uTbY+JmVWQH5m+XI9DXV6bb+Vbxxrjcx6Vy+nJ5t0hPIUV1Et6NN0qa/Y4bmOL64rz4Ufa1FFbdSJz5Y3ZHrPiWHQlNvaKJLn+OQ1xsnjHUJpvNnbenoVrldc1x/tD4O6Zz36KKybTVbsT73ndx/EpPBr6CGHhGHKkee5ybuekJi6ubO+RspM5GO4I55rRurWK6aJpc4jbdjs3sR3FZugGJ9OQR9PM3gentWq21eTgY9TWlGmoJpCnNu1xH2pGwRVHBwBwM1hWNmpDtNGGG7itl5UCsAwyFzVS3Qi2jzwcc1niNWkXSb1JEjijUbY1Wrq6fdCxa8+zv9nX+PGB+FUgrBuSFHsP60+91Ga8Ty5J5JNqhVBPyqAMDj6Vz80YotRdyoZ/s2hapck4M2Ige+37zfpxXlkreZM8m4/MxP513niOZo9Bhs48lp27d8nNch9lA/wCXY/nXTRj7pnN6n0aL6MdLeQ/8BpkmpSbcJZyH32iszzjjhj+dQTTtg/Mfzr4mF1sfRcqJbrUdQcHbAyj3dRWLcT6g5JZkUHqTMB/KnTz9SSKyLi4GcCuylFthokSSvcfxXNuPrIxqo7Met5Fj2iYn9arPIxPA/WoiXbqVA9zXZCFkZORaPl/xXUzf7kSj+Zpn+jA9bhvrIF/kKh2IesyCnoLVSN85P0WqtbYVxSLdusG7/fkY0oEKjItoR9Vz/OnifT06rI9L/adlGMC0z9WpXk9kP3erGhx2jjH+5GB/Ss7U5JmUjDnjritE+INmdltEOO9YGqeILqUMuVUHsBW9CnJy1RhVqRUXqc5OS07ZHI61FgsSOmOtOdy7s5OSepqS1RHmVXbC565r2o6RPHlrK6IkgDDdtcn2UmkkiKH5kYfUV6DpljpMNqomny5GcBt2KxfEMVnuAhDqQM5k4J/ACmncTRyuPlrW0cQZzKxHPAArN8vP19Kt2hVMZZRj3rOouaLRpSdpJnax3elRRqBBJI2OpOBSnWbdOIrCP6u2a5r+0LaNeZgfoDULazCpO1Gb9K89YVvc7/rCS3OnbXbk/cSGP/dWoX1a9cc3DD2HFcu2tOfuQr9TUD6vdOPvqvsBWscGiHik+p0z3Eshy8rt9WNQtKiHLOq/U1y0l7NIMPM59s8VF5n4mtI4RdTJ4pnUNqNqnPmg/TmoH1mAD5Q7VzhkJHSjc2O1arDx6mTryZtvrhJ/dwgfU1D/AGlfTnEUbHP91CazFmdTkHFSLfXCj5ZWH0NWqUUQ6kn1L7was65eO456DGM/hRJpF4luZZnRBjIBPJqj9vueP3r/APfRoe9nkXa0jEe5q1FIm7e7IFJzzV2AF2BB71SB5yat6dPHFfQGc4h3rvOOgzzVxtdEvY9U0qGKzliCwybwhZnLfMx9fYV1Uc2/SLhiDkgjJPOBiuR0zWdM1O9e6hmSNmyoiLHOB39Oa2o7530udYo42yjFT5gxntkdeuKWb81Siow1DCJKfM9LD47xbZ0k6gAgge4/xxXM6vqN3rGoLYWzEzONrt/cFR6pqUlrAsUfz3bjonO33xWBbOYD5kOqT20rDD4O3dz3yOa83CYOVKLaerOmrWUpa7Ho+mWa6Rp62lrNDw4diUYFuOT161514qsI7LWJWiaNkuGaXEZ4QknIq2mra0i/Jq0cq9t8Sn+XNUdSe/ubFJLwQBEfbGUVgxz1/D0NddB1YS97VEVHBxsjL07UW03VYJDxGflf6GuySKC11Z7mR4xZ3S8oR92QkAkHtxz+VcFdBvtHb5exFdVodwmraQ9jOcyJ0J/StKq5r3M4Ox0NqWS/RS4KFsP/AL2CFb8v6VJfoItWsbuRJWjhDkeWm47iMc+2M1y2i3rreT2F0373cSM/5/Grc+qz2VyYXumVuoy3BH415s6MozU4nVCcZRcWbv8Aa1tK7pdxmO0bayr5LxvuwxYkjgg4Ax780+O/0QSs1u8R5WZg8hUljwMgjkjJyM8YrLj12ZhxcI/1qRtVMgxJBDID1ytaLF1E9Ykewj0ZR8V2Flb6fHdQXKSsku1yJFbhhkYx24P51hWGuBLO9skRilxFtz78f0zWl4ja1utPVI7WOCTzB86ccfSsXTUsobhN6zM4OQ+cBSOmMf1rphP2kb2syJJwdkeuabaLFY28ePuxKD+C81opGQ5zgkfpWJpviOzuIozufJ+XJXqa0YNUtWXd5mXPUYr5jEU5xk79z0Kck0rFxWCu2PWrKOzDgE45OFJrHFyrsW7E8Vq6feCCCR2n8oBlG4rkd6zw2G9tPleg6k3GNyyjkcHj6ipllGMk1JBeSOpQX9nI7Abcrj6n+WKtb5jLIpFm6gZXJAKnGOR/vV6UsmfRnOsUuxVV8/8A66ercVcjUMyiSygCH+MNn9Peqtst09yUuLFAnmYzHn7vvzWcsnqJXTKWJTMbxXYXmseHptO0+IyXFw6oBnGBnOSfwrlLLwb4+0mz+zW9tb+Sr7wgljb5u5GRXrehmC3urmSWRV2tsXJ/Ota41SEqQkifXNPD1vq8HFtblubvoro8Qvz8R2C+dpksu0cERrIBzn1rlNVsPE17cCW70i6VlGP9TtFe+X2qQopPmp07NXD6rqvmu3z8D3pwzSTdki91orHl9l4d1aS/gmltzGqSK3zMOxzXQSNmZyDwWP8AOtJp2knQ9ADwKoyxkszAd8irqVpVWr6EbGjpKkglercCofiBqS2axaehyttH82O5rW8OBY5PNb7sCmVvw6V5l4vv2u9QkySWkcvz6dq7cBSteb3ZzYid9Dl5XeWV3YksxyadbHEvXtTtm5cAHpRbLm4VSM9civROQ9E8JlpdM8sHGDW99nRCiSSOxduM9/pWN4MMcMDlyFUZxmuimubOWZHVHlaP7qoMLVppbhqyDULb7JpsshT+Hb781mw3LugVcDCjAAz2q/qV+ZrR0m2xRH738TfXFVy9tb5SJd6rgB3+UN6HFclanKpLR2RtTnGMbvciFvczHcz7gffd+gpZIYooy0033fTkjn0qP7c87bFJYdPkGFH41TnnD3sVooJBYM5PoOT+FEMNFb6sHVb2KOruz69a26DcIImk2jg5PAqElyf+PSf/AL91Y0GFNZ17V9QaRzDawlvKRgryKMgYY8KOOTzUiXAkRX2P8wB710xXKrGMmejvFCo5nX/vqs65ntogQblP++q8wm8QPg/vnb6tVF9fcnGPxJr5unlc+57jxkUehXmoWwyFkU1izXyk8GuYj1gyHG0mrH2rK5Z0X6tXXDBuGhlLFxZqtdn1qE3Ofesk3yA/61MfWozfxbeZM/QVsqDXQydddzY+0nNN+1DruFYT36Ho7Gqz3nPG7860WGbIeIsdMLhX6EU0yjHJrmkvXX1/OpDqT4xin9WYliE9zXnuABgGsmd2dzgU1JXlyWP60kq7VJHJrop0+Xc56lXm0RGhyDmpoHCSBj/Kq8Z7VJsLHitjG+p01nqflRjbtT3HWsy/uRPKeS7n1NZmZEwAcY9qlijdjvLdOhpJDuNxtGfzqs78nvzV25dNq8Yf+Ks5uTTEhd9IWJpMe1GOKBhkk0lLj/Ip6xOxwFJ/CgBgoxV2HS7yc4jgc/UVp23hLUpxnYFHfmkI5/tS4PpXa2vgKQkGaT6jFbdn4HsYwC6BjnvTSA8xWN3PyqxP0q3b6Te3B/dwMa9ctfD1hAuEtkyD1K1ox2dvGAAgX6CnYDym18GanPjcoQe9bVr8O2IBuLg4/wBkV6GqKvyhc/WnlFByRRYDkbXwBpaY8xHc+hNasHhXRYPu2EJI/vDNbPG7O0/hShRnAU4PrTsBXjsraCNkjhjROhCpiuTuJo9Ks2P3m6Iueprf1XUUQGJDkc7sHr7Vy6QTS3yXd4hwy5hQjgL64pSegGzoGj39u/8AaSywtezD7jttZc54BYY5FJ/wmumXLtFfQ2czqxRhcWwGDnB+YfStOxuopIUUPF5yqDh2AYNtIJB6joPz9K4GW1tY72+jWWxuy0zEJNIY3jwxOAenPfnniuOhVm21IppdDprmXwleQ7k0q2WV2CI9tcMuGJ4JHp1rG1hV8lhhdikKoC4C4xjDGsm80+cAPY6fIq7vm2SCRc4HQjnHWsi5nu3byrp5wqn/AFbZ4rpUr6gnZWY263SyeVEvzE+tbehaJqVpdpcogx0Zc84ql4cj+062iNyoDHH4V38cYRQB6U1ruSjG1Kzme8ivLWP95g7vXPYiodYsjeWCTNbnzowNyEc1u4CyDawwTwRzhhzSzgsFkJPPDE+nb8qi1izzlrdVPzWsikf7Df0pqmNDxcyIR0G4j+denxSedEvmojEcHKg81HJa2kv37OH8FxVJJrUm7TPN5WZ12tePKoOcFu9atjpfnyxb3GGyAobnj2rpLjw/pdwvFmiN6qTUOlwNsVAzBN+MBBsY8g/McYx3OaGkti4u71Gpp0thZF4kYjzdoJxjlTj3Bzjn0qja67eRf6zTw2P7ktbct/bFnsCoZlDF5Ecsq4HT0I96S18FPewrNbeKtKYMMgPGykd8cisZUITXvIv2ji9GQReK0QfvtOvU75VQwrQt/Gunwg/vLmEt2aA1Mvw88QHJgv8AR7gZ4/0jGaU/D/xeo+TTbecZ/wCWc61l9QpJ3Wj8h/WZbPUs23jPSZGDDUbUEdN8e3+YrUg8SafLv23enuZPvfvFGf1rnJfBfidRibwvOc+m1qzpfCN8HPn+FL8Edf8ARqp4WW6k0L2seqPQre9t2hEUcUTJuDjy5DkEdOcn1q4l8EYMRcKARx5xCnHt7/rXkz+HYrdsSaZqNuQeSInGPQ8fypiwrAwKapqVsO+Gfj8+v9Kzlh6yWkxqpC+x7NHOJFZwMb3LY9M1BPMTk5rN01vsel29vc3BedE+dpH3HP1pZ7uLacSJj618zWozU2nr5ndGUXFFO/m4Nc9O5Lda0L+8iGcyoP8AgVYkt7Bn/WA+wrrw1CVtglJJEycNn3pOozSWyT3jhbe2lk3HAJGB+dW3064jXdNdWcA6ZMm4g/QV6MMPN9DGVSK6lhXFl4bmcH57pti4/ur3/M/pXkV/Mb7VJXB43YH0FegeKdWij0+OG1YbI4tinpk159psYe43OCVBAwPc4r1acOWKRwzleTYspjXCBSAO5qOCPF2pPdc103iHTVFpuX70WBk1zdoczr7nmrJO40Z1gswXi8zPTmrst+543hQOyDFUYkCW6KSQoAzzSiREOETd79B+dMLizeZcKsWNhZhyTycck/8A1qmLRq213aVz1G3j8h0/GsuTUobe6ke5mUIq4UDqT3rKu/FTDKWEPlgfxN1pAdYZ0ij3TAIv8INcjrmpyC5cwgx+Yu0f7vf8/wClZ0WoXss29mMj+9TXlhNcWkmos6gRkLs9qALfhmMyG4TZuWRQrA+9dl/ZFl/DayY7fNXJeFS4a62dtvy16P8A2vpNv+4eZN8fyN846jiqsK54bzRS/lSYNBQoJHQml3E9SabilwakAJJpM0uD6UYPpQAUlLz6Uc4pgFGcUdfSjBNAEyXDou1Qo+opHmeQcn8qjAYjgZpwjcjIUnNAArEHirkLqTgttPrUEdpcSY2Qu2fRTV2Hw/q0+dllLx6jFAi3HbW0qhpLhR7E02draBNqOGPtVmDwXrMjKHWKHP8AefOPyrXtfh8uV+1Xhb2jH9aAOIldpXOBnPpUkGm3Vwf3cLH37V6jZ+FNOtQdsIbB6nmtOKxhiXCoF+gxTsFzzC38JalPz5e0d+K1rbwK7YMspHsK9BjhRBkR/mKkCbT0GMcUWEcla+CrKMAuCTWtb+HbGBuIF+tbQXHUjPv2oHB7cdTQMqpZQoPlQDjsKsiNAvAH+NO4Bx19xT+i7go5PUigBqp/D1p4XjOM+3pQME896Bu3EE9R6UwDbj0oX1wMA880Z55wSKUHnOOO+KABQScgDNPweAc89c1HyDx0xRxtOO3c0APGN2NxOKdcWk39kTXpPlQL8odv4j6CnWd0lhdR3ckQlWM58ojJk9gKzPEniDVdReKS5ht7aJWPlWxbeIx7D196AMfTLRb69JdcxpyRXQXVlDeKFljzt6EVBpcDx2YklyZJvnYn9P0q9uye3txRYDHfw+MHybhvpIM1kanosNtavdXtrbSQRjLuB05+ma6/jqVH51wnjnXNRsmbTvs8YtLiP/XHJLUml0QFCKz0C8kzbXLQOTkCOUrj8DUk/hyZ/wDU6rK3tKN365rhGwW9akimuY8bJZFHoGIqLIDu9J0K8s9RjmmktmjXOWUfNyOlXfE0gi0G58mX5/l5HpuFcCms6lDyLhiB2PNTS+Irq4tXt5grIy7TxTAi02+lsruG53tjd8wz2rq/EE90LWK9sbpwqqDKinjB6cVwwmAUrt4xx7V2nhKQavatpsqqzQKxwRy0Z+8D6460rDRee9updAiv7GZY5GBbHXODgisGDxpqQ/1kMEg/3SKktJJND1i60i5dhtciMnsTyD9GBFZmqWj2F2bmAbYmJxj+A+lC0Bm9D434HnWGP916xP7UmeeRLd5FhdyUjz6nOKynnMu3eckDFCShZUbOCDnIqrCOssbK/aGVJo/IR8FyfvsByFHoO/vXY6YoitYx7VkmRXXdjqOPfNX4JdkSr6DFAM3I3UDnH6VZjuGXG12GPRsVhLcn1qVbk460AdLDq95F/q7qZf8Adc1dj8TaqmNuoznH95s/zrkVuTng1It2cdaaEdkvi/VRjN0GH+1Gv+FK3i68df3sVrJ/vwLXHC7OfvYpPtPoadkM66TxpcgYfT9Of/ehrEvvEyXCkNo2mDjkiIg/zrFknz1NUp5OMZrN04voilJjrq9iZiwsLRfop/xqg2oyox8tIo/TZGB+tRzyjJ5FUy+TQopbIXM2WHvrt+WuJM9gGNQlsnd/F1yetRM+WA/u8nFUby+WOMhDk9KoRna5c+dJsB4Xt71b8K26MrzyLlVlHH61iXT5fHoK9A+H+k2mr6YLS4cbvO8zYeAygc89fX9KaQMo391FLPJZu21mG7npXK6TFjUmjb/lmTmu5lvNPutY1GOwhK2SSD7MJB8wXAHNed3wKXlyBwPNbj8aQHS3eu2duNu8SMOgWsO68Q3dwSI8RL2x1rNjhd2wFOPXpWjaaS8rcKTjv2oAzQsszkklie5rTs9IkmbO0+/pVh5bCwwCyzv/AHYz8o+p71TudYnuFKHCRdBGnAoAvyXOnaaNigXU69lOEB9z3rKvNVur8gTP8gPEY4WqR68ZpKBm1oWoyadcvJFjLL3p8lrdzyvKzjLkseB35rJt1eSVVTJb2r0GLT0EKf6LL90UxHn/ANnb0pPIPXFdl/YC5Gc5PpSjw+GYHBA9qQHFeUe1AhYjI5rvV8MIcYBx71JH4XhUEEryetDQHnwgfGcHFOW2kZsBWJPtXpUfh20RvmRcelWl0ezQBliBIPAFFmB5lHplzIcCNhk45FXIfDd7Lg+WQCeM16dHZwhgvkj+dS+QoOQmAPWnYDz2DwbcNktwOxrTi8FQpgyygk9q7JYk9CW6A1IilV+715zjmiwHNQeErJF+aMsc5rTh0OxiQbLVOfUZrTUbj83HP1FOCgEZGc+tOwWIIrKKNcCNV9lGKlCAFcqPqeak7cA5PT/69GQAx+bAHFMAK/NwFJHQgU7apwSxOTzTc4AOM5pyrjO4Y+lIA2qAD39D3pw24bIA+ueKbgE+uPWntljuyCAKAADcFIycdOeKGLBgcdTTRgkgcZHJ9KXrtGc/jQA7YC2Bnn1OTTsggc5x1pgAYnFLuO3GPmNIGOByTgfnTs7VJ3fMOg7UwYOTnGRS4BXGTnPPNOwC8Eg4wTS8ZJIOe1N5AOQPrSF1UAkkAmgBxz6YyOtBJxyaYX+bPPTAxzUf2iIXS2u8ec3IXNAE54AJB9gKd5nlrs2BpmwQPQe57D9T2xTOUcrGQ0g4Z8ZCH0Hqf5d6FQKvGSDySe59c/1oAI0CMS3zOQFyeBgc4A9B/nNYkw/tHWfKGTGhwfoOv68Vu9jnAyOajjt4o5XkWMB26kUAP+XGT7HApwA3ZJwRwMUhYZwB2xjHWgtnC47c0wH4UN9Bzmobm2try3MFzFHPG3VXGRTuAuBwOpIoyAc84+lAHKap4A0663S2LyWrhf8AVjlWP9K8wfdHIySKVdTgg9jXvDPhcjPpmvM/G2gm3vn1K1Qm3lOZAB9xvX8aloDkGfIAxgVHTjwcU00gE6966j4fakNM8daPO+PLa4WGXJx8j/K36GuWqaCVredJkPzIwZT7g5pDPTviXoWdftnRPvRNC8mcYaJin8ttYOj29xODb38LNalcPcg5QL67umR6d66HXtTa48Kwa1NBHeTi5Zj52SqmQBtxA68kjFee3uvahfyK1xcsQv3Yx8qKPZRwKQx2paWbZvNtz5luxwGH8P1pPs8enqWuwHnIysHZfQt/h1q7pGsur+XKx3DoSfvcY/P3qjqlg1tIZkZpIX/iPJH1pq4h48QXgbJIJqwniq8UcqpFYOOaMUxHTR+MJl+9HmrCeMlx80Rrke1HSgDt4/GVsT8yMDVmPxhYkDczD8K8+4zSmgD0mPxVp7/8tcfWp18QWLgEXC/nXl3fmnD2NAHqB1i1YYE6fnVabVLfH+uX86863N/ePT1pu5j/ABN+dAHbz6pbjJ81cfWqE2vW6EhSW+grluc/4078KVgNiTXMghc1W+0tNJuJwq9qoZxzUkb4OKYEsjFmYnmuw0GffYWUaIFeNHBw20tu6kn6HH0rjCf511ujwLLptqTuBCliQe2cbSO+RTA1rG1jS4muJZGDvckEHhQgHUn69MdhXMaa1g+tXb3yLJCWYqGJ554rp7fSLifTpbh3wyRM5DnjaBk151OcgMc5Yk0NgdJf6jo8UmYLZCV6RxEhPxzWJeatc3a7S4ji/wCeacfnWfS0hhzRgmnKpY4AP0rTsdHkuGG4Ng+1JCKENu8zYVSefSug0/w+ZSDKOPStuw0WOJVAUbh7Vv29qir06dapIClpukQ2qgrEFPqRWp5Q9qsRQkrxwf0qXyz/AHh/3zVWAopEinGVz1wKfs2j5UGT07U5S+SwwuRjjFKACMlyMdMUhiKPlwWx7Cl2AkDGf04pdvzcDA6e9B+9kseKQBgbjtXP4c08A4wflPbmmBwT/FjvxipCxK/KfYn0oAUAgEDgfSlTaQBgH2qPkcEZ9wKeAThS4X607gKpG7HTnHPQU7O3056GmZCNnPI9DShjnsdx70gHklVzg4PQe9ABOFJ4zyDSIMsAcDnOadgMTgMcHPPcUwDAyctnB4yOgp2crz+GRTflzuAPqPc0Z+YEDp680CHjgbc8daXeSuATj0HNM5znd16AHrR905yc+goGPBGAT26YpcjBPofSmg89NppwIJJA4Hp0NAg53YxgEcUuF4GT70zJ64CjPTrSjCgknJH5UAPUAcg49/WkYgDPf0pAMnAAAP4U45CdR1/GgYmSDjJGaXHzcgnH40ZI4BGO/vRnqB1HvmmINvBH40owMdDxTcENg5/OlUDOQe3FIA+XlcDJ7YNYcvhkS6s9295IIy+/YvBHtmtvcN2ST6cUbgcYbB9xQAJgLheAvQDtT2cnk59BmmgqAOu09eP5UnCrycge/WgBxPPTp7UK5xg9zTNw28fzqPfjIOQx6e9AFjOFYA9PzqPzCvQ59KrPcDJLNyOKqS3hQfdPoMc/pRcDQedUJy3btVaW9RFJLgfWqi2mr3iq1tYzFGbb5hXA/OpG8LTQr52qX0aQqMuI2xj8SKlsDPvvEEFuuNxZ+wFY91e69fws1tbPDA38bjbke5Nas2veGNKVvsVuJZlP39hZm+jN/jXH61r93qkzfMUhz8qD096V7gUbnRnhyz3MG/0D1mOio2N2T3xUjIc5phQ56dvSgCLA96TvUmKTbQB3ujE6j8O9XtGOWhiWdf8AgDY/k36VwBFd38OpRNez6cxOLmKSDHu6lR+uK4meIxTOhzlWK4NCAiGQfpzW5p2oLOhtrjBDevesPvTgWU5GRg5yKGBc1CwNpLlG3RHoaojpUnnsVYEkluuTTKYCdqTNOpOKBiYGKKXtSdRQAnel6UcUUAAJ9aM+9HFJQA8NnrS4z3pnalBI70ASbAe9OCEHimLIR1FL5px0NAh7Hke1dv4SiM+h3b5H7kgE56Z6Vwikk5rc0HVZNNkcoCQ4w6eopoDttf1AWHhafaf3twogH4/e/QfrXl85G5VH8Irode1aXWJoyU8uCIEIinP41gi3eaQkLnJoYLQrgZOMHNWrWylnbCoa1LDSCzAup9a6Sz09IwCE6deKSAybLRVjALqfxrpbOwCdFxjoc1YgsyQq449AK0Ei2gA9RVJCI4ICh+716kVbjTkDGMdSKVIwV2hsZ5qdAoGO/TNMY5F2qAFLdySeBTth/wCeb/madsCjAbPoOxqTZ/tH8v8A69FwMgkZJHGegPX86OAM569D701iS3IDH2PSncJgZDDrwf0qRjtxDAkYx170mcD0z6Gk78Y29wKUEld2cHOMAUAOBAHX68UoIx2xmmgDAwM/hxSg8kE9+g70AOGQBjPuKUN1ySMim5AJznrQMYyOvcGgCRsHAwfb3pAATyB1I/SkyoBwOR3FLkYHA570wHDBwcNjHQd6XC4Jwy896avTgn2xRk4IJpAO2jHbg/Sn55xyPcjFMzu6ZzinZzyx6+tMBQCOMDn1peAcc++aZxuzjPGMjmjIUn5eMd6AJM5bIXIxjB60uRxnOP5Go1ztJBOCOgpdzYwWOAc4xQIeHIYHPHqO1BGF7n2zTCRk4+7nkU7cNxwg9s0AP3rjJAAHamh16AgYOPakyWOT175oypHPOe1AhxPUDjnt/nmkBOQAMfTpSfxfKeR3FG47gdx9ABTGOJXOTyT70vG0MSfeos7SSzYYdRjrTfO3D5s8HFICQnGDnp0oYkAc1XEodsnIUdDSlyzbepbpjqf8aAJGkAHysNw9qaZAOCCCT3NX7Dw3rGosDDaMiH/lpL8orprD4fRou/Urwsf7kPT8z/hSuBw7TEnaAOTgAHGT/Wr1h4d1rUSGhs3RP+ekny8e2etdLca74b8NSvb2unCW4i4VxtbcT/tc1j6j8ULyRdtnZJCezu28/wBKlsC/H4BjSNm1DUtrDnbHwAPqaz9S1Lw14XiSOxiivbz+KTduIPqx6fgK4vUNZ1HUebu9mkHJ2FjtGfbpWZsBO0/LxgZHBoGbt58QdbnkfyvJjU9P3eSPzrjr64vLx2knuJpGY95Dj8qulVGSTlvSoHXIOWHI6AYosBivASPx5qu8WBwQM8dOlazqSDwcn2qq8eBzlvWgRmlOcHH1pjLir5h6n1pn2cMc8k9BigChtGaaVGOBVx4GXgjp1qN4zxgAUAaXhK5NlrkUo/hIP1pPGNolr4t1OOIjyjMzpjHAb5gPyNZq7423IxB9RTCjHkknPc8k0dQK2z3pcVNsJBP9KTZ3xxTAh20bRUuzvSbOKAIce1H5VLs4FIU46UrgR0VJs9qQpg0wGUd6dt9AaNpz0NIBtFLtPpS7TTAZijmn7fajac4oGJmjil2N6UbG7CgQoNPjkKOCM/nUYRycBTVmDT7y4IEcRNAEiSGc7FGWbgk9q6DT9LJUMUII65p+k+HXgw8xBY9vSukisyuCADx0FVYCtb2hA4T860raAmTmTb9TT47c7skDB/SrCQgDAIx6Y4oAdHCAxAbIGMnrVv7NtRZFkjYN1XdyKijgPXJA74OM1L5CBiRv5PTNMQ5k8p2AZW/3TkU4IgUHf1PKgHNCwruIUnHepvnVQwzj0AoAYrIWwVIOM5PNOyP7w/KnYB4BG4nOKNsn9xqBmKr56ADHQkcig4xwvJPXFNADYx1HXnFKy9CcgdiKkYo54PHoRTlOBuAJJGBxSopztUbj2GetJnY23acDr81MBQ5243cY6dqerhTnAAAIAHembgxzjHtnpSZORzn1oAk3kqV5+Y85FO3AEfKAOwBqNMbgT0BGTijK7jgZGaQD1J4GQQRj6UoGDznk9BTOvP8A9bNOTDHJH9eaaAdzgHG3B6E80u4Bsnr7DrSDBY4J47gUbhtDZ+cH06UCHAsTgZOT17CgnIxnkdaaGOOoOTkjPenK3POOKAJI5GX5gSBjHPOKbncc9/akJbPUDP5Um7uCBgdu3tQA4tkEDOB1OOtPByOD19qYrAjlV6dOgpd53AnAwOAKAFwBzkliOlPDgBAy5xk4z1qPcC2SRzycilwynIjwPTrQAmVCZIB549qXccjnPrxwaYueABzjnjoKVj8nHOenfNAD93QkKoJwDTd4yRjP481ch0bVLnasVnPzzu2HFbul+EoVuMaxM0XQqkZouByobOQcsc9COv5Vf03Qb/Vzm3jVYxkGSRsDNdPqN7pOju8Wmx28TKud4XzHJ+p7Vzc2upvZ4rcEkY3d6lsDpbX4f28SCTUtR/CL5R+ZrR+2eFfDa7I2gEqrnKL5jn6nmvNptUu54hG9xIyL/wAsyTtFU9w2nduZyMDBwBRqPodnq3xFuZJMadAsaD+KYbjXJ6h4k1a/Di6vJWB6oDhfyFU5BwQRtx79ahfDMSOenOOtAis7s5GwAEdBio9jNknI9cetTbnVWyevrwQKCrKvAAOOKBldlA6kDK/lUUh9s8Y44qy6llOQAepPpUBTeBgAnOevWgRVYNtJ9eRx0qsyH169vSr7xkA4J68jPeoWQtuAyf5igCgY+cDcQeophgOcYwR0A71o+WXXO0j696YsDcsFBWgDNMBbryRSCBRxzz7fyFXzCD8wTapOAP6UhhC9AcjpzQBnmLZnC5zwOKi+zlscY7VplARgjvzTNmckAgdvpQBlG0IJ7eneontcZwDWzscrtCHIwSRTDESc4LY646UAY5tmU8kZNM+zMw4BIx6VtbG3bgq4PAyM4pRalvXPpQBh/ZmU4K4z6ikEBbgA8V0K2ylgdvGO/rQbQAkBST6UAc99lZcghhj2pPs7YPHSui+xjcSQRgdAAKBYoVBKjJ6igDnfIYn7v6U37OwPQjPTiul+wIpyMgelN+wrgsFyBQBzXl46qRjqaURZ+YDNb509cEj5sHBFR/YucBT0oAxRbhgNmSccg0n2dw23acituOyw24Kdy9+1DWY3HC45zwaAMQQEjpjnFSJaMy5PFb8NiAVcpu54q1HpoVjkc0AYCaaWOEJI9atQ6S235x830rpI9Pwdw71djt1DbjjPfnOaLAYdvoi8MVzitm2sdi/ItXViQAZUAgc471YVNgOCPm6EHmqQFeGEKp2pzngkVaRAxLg/iakjTBAznjr6VMqFgeOp544oAjSJRjAGAfyqfZlsH9O1Iq8kkEED05qRFDHAXC4yQe9MBAy7MDIHoTUoCtFtx0PX1pfLKDIChf4hnOPwowCTkkHtgDigQrBVYKOB6j/PNKQST8uRjkZxSYAYDPU9RT8IBuwPTnpQBGBjAIUZ6nuKkyP77f8AfVJsDMAOpByBTBjH3TQMxA24EYyvTFKpG3BIXB4BpirvBIOB370bgDt/GkBI+CANmMfhQu0MMFTxk8d6axOcsM56cZpdwKhcAE9CT1oAepyGBYAAZGe/tShhjBBGfboaYMZC8cnrnigEYK+vYdaAJAVXpg465FKCd2SDj1Hem4IODxgdTS55A+bOeT2NADmO5j8oK9ie1KHyuMc5yM8UzIB4Y9c/SnZXbjGB355J9aAFBIO33zxSgkZBIFIqjpg+56nFH8QwOR60APB428D3xQSAMkk8enakXcACPlPc5waRiNxIGCf1oAeSCBgEjpknrS5JUgqMgjkU0Db8wyc9aXgMDtKgjgjvQABwSQAemacqqV4HzAZJzV+w8P6pfsptbV3Vv4zwPzNdPpvw6bhtRuguR92H/E0XGcQjZIXjP1qdbaeWbbDFI3ONxG3+deqQaToGhxmXyrdGjGTJKQziud1zxfpTkCziMkw6u8fH4UXFYx9M0Sy8vzNTmnVi2PIgUE89OSevt1roLe80TRot9tYRo4BJM3zSYHck1xsuv3kzH5sBuM4wRWVIzO2ZGMn1qQO9ufiJIRIkEAfjh87cfzrkLvWL27cl5XUEngHiqKnJLbSwBycjqfWmhvlIwABz+H1oACxCDGMjHOelAHUkFlJPIP8AOk3YBBOB7jvSeYpJYqCWHXtQAR4JILKeMkZ6Go5CrL8uPypz43bc5GOnemhN5+UfUZ/rQBGzMFyQo9zUT/N8oyeep71NsO4gFVzye4pm9RyxAbnGelAERDcqchxjB7imkFuCoYnvnGKeGy45Jx364p23KFjyFODz0oBFdgu4qRkY4OcYpjRvEcFhux8pHP0zVgAk5PQdh0qPYckA4zyMf54oAr7MgDeGPVtp4JppiwuDhgx5I7VbKnYBuGPXHSmbB0C4J74x9aAKiou7CKTgjjb1pXjXexOM9yOAKtbFUjjGOmewphXKlzjnrxQBV8ttpGVbuMDmomj2g5Zjg54FXtm8AAgEHrmmvHtDHHT3/lQBntGVGTlgR0HWk8o5GeR0+ntWgyNtCDA4zwOfzqNkO3bhsjp6CgCl5GGKgkZHPvT0gJbnIyB+FT+W/QAnIPbk1KARDyz7yeMNhcUAVVt8HpgU/wAgr854Pb3qf52Y/Kx4xk09EcLg52n/AGetAFf7PgAENz/OpBAC23I49Ooq2PIOQ0bbuxB4pilkk3RsVKnhqAIWtSAS6kelIIR93apPXJPFWGLO255Mt70wK5bcOc+npQBCYkClcZJ9OgoMCbSchgo7HipvKYqGJYp+VCIw4zknp8vNAEPkBcEDDDvTY4sNkAZA5461bCsSQRg56Cn7SFGE2jpweaAKbW3mOW5wDz2FSrYbkMihSO+DVrYTzkHPQY5qVYwy7Svzeg60AVEte+DVmKADKArn3PFWUjJG0o4z3AyCfepBGQNpxkcdBVWArrAduQAAPU1IkR2lhgKD0PUVYREGeQ3HQinqvzDAB+ooAjEQ4AU8jgmpoxsG4DJz6VIqLs3DgEYY5zQq4AG0g+maABRk5JbHcAVIgG4A7vXAOM0gUk/dOOuPWpEjTq6BiRxk9PegBq4yAGOGOTUoDKxLH8AKaqsh2BhgZNPRQ5Cljk9jzmmAq7cZPCnnA6ijk8lCVHTBwRS/KCcJgHg89KQ4BAHHvjmgBH4YqOQOQfSjkMGAyMdDxn8KUjnPVu2Rj9KCpBB4CkcHrmgB6kJIpAU4OcHOKcY5cn5R+dQAurDIHAwMDvS719F/WgDAXAC5GeOOetKSGH3RntUQJx1oP3j+FIZLkAcgnHpTtyg4A5PWmbjk09fumgQIwDAgggHpQPly2DnPOKD8qEDpT0Qep6UDDIC4OTx1Hen4bZuOMDpk1H3x2xTk5faemaYDmZQRnJPfA4xR8pAbdweuBzRGSs20E4IwaQjG4dl6UgJEYZGRnngj0pdqbSQxJJxgHI/OmnHlx4A+YHNRliSPpQKxKWKsV24YHnHU05Q8hO0FvQZ6Vp6NawTXaCaISAnoc/0rbt7O2aeVPJQKJdgAHbBoCxzcem3Lqjyr5cbHBJYEj8M1q2umW4t1knyNndDnd+ff6Umq3k1rJNBGw8scAFR61gyTPJISzE57dqVwOnvNbW3UQwTMuNpQ9wMe3SqMvivVigSO8ljUc5HWsF2K528YB6U+IAk5oGLLNLPN5srs0rckk5JpkY3FmYZ2jgHnFNxuAyT0NA4ifHqKEgHsAFI3g44B6ZNMBJbHTHSkYbELDg5piuzyJk54oQEjyFVZeQW4PPSk4VcjscfjTGOHX60rEnnuf8aYrDnOWByD6c8UzKngtwe+KRzl+g7UqoobbjgmkwsJv2ZHzNgdx+tBTkPkkEjnFSxMQ4XsyYNQ5xGvseKQyPaM5UHAPUGkKooLEtzz0p5+7+NJH84+bnmgCM4ZSQep9Of/AK1BQhSoII9BzUjcNnvTVc7X4HSgCNd6ru5I646cUw9MhmUE84wTUjuytjORjoaSZ/LuWVFUAY6CgQwgbQSASeOB0pzIAvzF8479R/8AWoU58zgfL0pf4CQBkjrQNDU+bjClicgjr+dJktvQ4xjBJHNPx8rA8j3pWUKy49aBEIhReg2k9eOtATBJVcn3FSSL8o5PJ/rT3UBz14HFAEHlu2W+XJPPHIpFjyOmSeuBVoKNxpQAFXgd6AIREWXaQACvUdT9adHAHcDOCT1qwhwhwB1FAUHPJ60wK5gCnyyQdpzn/A0/yVZtuc1KY1L9+lSCNQueaQkVPs5DEjbx61F5JUEsmDnPTnHtVxWJXJ5pE+aTnnr1oKKWwltwQjI5yO3tUnlcDn5gOmOtXJSUwVODio9oAX6UxEAiOckE47UGFSpbBBHPI5qcKAkZ7svNPRQQPoaQFfyUK5J59qciEHG3J/OpQMSIo4DDmliJDn360wGxpvIyMhT1z1qVY13FlBGOp7ipd22VcKuCu7GOhoZiQpzzg0wBE2Op3EjqT3qRE3E5ThRk46e1N2LwMU2FiUjzz9aAJlQcZ4b04xTtiMg5PU9ac3Ckjgg9qd91QR1NAESBcAlT/KpMI5wWAGDzjNJkuvJI5HT61NKoUjHoaAGKFXABOfX0qXP7tQ547Y5pjzvIo3HPy01f9aw7FhmmBLtYjdnb6ZHU56Cns2G3kgnPQH+dRs7KYwGOKcy5Mgycc/ypAOxkFmb5SMjAx+dKHwCN3HTAOaib5V9enWpGUefjsOgpgP2EIScqSOO+ab91CpGc8gg84oHQ++M01RuQ5J4JoAfvQNyD/sgNyDRtj/uvUZY5I7Um4+goA//Z",
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAF8CAIAAABJw4Z7AAEAAElEQVR4AZT9a4+sS3Yf+GXdsjLrvu/7XPp0N5tkNylRw6FFUdbAhq2xAb+yMBhAX0TfRB/EhuGx/UKAPZaAgYQRNZQgUaSazWafPtd99qXulVlVWeXff63MZ9c5pzXGxK79ZDwRK1asWLFirbg/a89fPL69vb27u/Pc3Nz84IMPPnz5Ac/GxsY6dz8Stba25nV0f79YLK5vbjpW4P39PZCtrS0h19fXA6Rw2DzBLEZrV1dXP//5X15fzyW/u79eu7sf+b9YjEajdf+5Bdz3/vPe1XO0vsZ/v74GPz93c3OTH0nX1lAV5MDX7hf3dyDhQe/GWoGKK8ICqQRrhWotSe7X1hejjdv70Wx+M5/PbxfoX7sbbd4tFtvjMTxwBuwuxKfA64ov1Whzcy2e+9H+/v7Ozg5i5vPrwGxs/N7v/d7W5jYOJK+7u5vZfGtz/fDw0Nv92t2jxwcffIypL27vFq9evf7yq1dXlxKunZ1dfP7lq1evvjo/PkHg3t7eZDKBHM2Xl1c8s9kMD6/noWQ8Hu/u7m5vT9fWNy9nNxsbW3diLq7W7+6mW+Nt6W8Xa9eLtYVaRPbaZLwjyf3t/dX11f3a4nZtAe1ibXR9v7he3N5tbWxsb413pl5vVeji9uLqUh3JaHN9bQOPE7x4+vTpJ598olzY9+jRI+SdnJzc3dyqbhWNV1sbm5Is7m4UvGsNN9C8trn12eefv3nz7vHTJ5gPXuDbt2/nN35v725vlO7Zs2fb29vYCJWSHh8fX15ewinw6OgIAFRo4I5PT8SO0HR7O51OX758SUYkef369fn5uWCESYiSJ0+efPTRR+uje7x6/uwZzMqm4POrmVLMr29QMiLUmypWOdcvZlef/vrXQb6xDrN8FZNHQrKB4Ob8wcGBGkfJ2dkZIk/Pz5D36PBI4P1ogW8XFxfo2ZtOhcjr+fPnf/eP/+if/bN/Bgl6ttfHiql1pLC372V+o9hIeq/ms1//+tc7e3vzm2v4L+dz1CqQGtzbmSg15EqBGPRfXsyfPnm+vTX5+utv8FKrOFhbezze+ZOf/fFHO0/355P92drhbGP3enT+2ZeH4+mL5880GA1P4uu1tb29g6P9o5vL+eX51exyProZjYjt7sb13ugX16/++S//9D9dv341Ors/mC427//+/+qP/6v/7f/m/PLsl7/6mz//83//4Ycffv75r9+9e7e7M8XhIpIMbCGMOKiF9bXNs4vzk8vTi5Rilta04aFxIn9td3eHd68SPjo8UBaSpnT/4//wP6j03WmYfLizD/NkMwK2vT3e3plGjDfWT05O3x6/29iUyeSO6tAu73Dy/mhvl3rQWiMb421VezW/+etffnp1Nb9b39jcmKxvjM7P3u1sjz/54Q/Icymwxc1dNIk2iO1fffkKto2NMQw3t3cbG2tPnzz50Y8+IQYp4HrUDhWhKr/55ht0Juv19e3pBIXqdHS3+PDZ0zXEbKT5X9/cgx+tb/2rf/WvtHRlRD8wMgbb3/pbv/eH/+Xf2Rxvnp4ef/nl1yT8v/zDv/vTn/708uL6s88++w//4T9i29u3rz/Xdo7fvXjx4vd+76cvnz8d3VxNtrcuFexu8fzZBwdHh3c03/39f//f/3OpFjfXsqBwfsh9/IOb2+sS4Cg23N7cmpLPV9+8Qe3d2ihlGd2Nx5ulOReH+wePnxyNFmnRt3d0yd1obVMb//Kr10T60dO0oPHGplK/ff0Gf37w8Yeb441n7MX9glaF//YmTXq8uaXuPvjgI0qPXt3YIr2jk7Oz12/f3N7Mz07erqWG0mZnVzez2fXVDEtu19c34MdYrVhbw975fDbB9Y27/YMpjUn5YAKuHr87JXV/9de/0AY3peGUXx5qUXQ/lYFnxL60/SB1RAMMfVRg/O0AtOtXsQ0gkAcCSlROCka18VOHRI1dCnwBry1/wgJCLMtElcWSKxDMaUAQ8SR6lCdDUrHro1DOuiW2qImnABFNuQoDnmclXk+NSdNo1X+K1o5Ml0eigJJ4lFZZAK8TskaLaWIrKwUMl/Isq7+xHq5SzB5qhl8Syo7Uvvnm9fHJueZ0cXF1cT5T1J/89m9fnJ8V/hBMrHFeUaSC36somfLjoZJU+I2MmFZgrOTm2oZiMVbrG+tb2KBksZt3C6bk7k40V9IFR1UiSSmEef8NLvlKAjkwDheQEe6WE9ulXuhlrCBDTyEHFjYNwMu6WiaREBg3eAacHf79ZwNAydNoU4crtKuQUCStV+orz3QW3ufFryAAMATb2Cy4+jX65a4EuwRHvwH/mwxJ+D0lD/MLv5YpUAh5xnUOTk6swHZDYMLfS1bIBtDIG/5hqg5HeZeun+AhaT+t2Bg6uw4cbYwoiPWtDWpd9+jufoMtTivaGp9cnF9/eUPdTA8PxuPp/PLq8ptvtta37q8X0FL9453ttc31642b2ShdSQQgUK+PNmpiBH6HYK/tGuDhU/jw2rSppXiq09n8r5pZClKXRRIebkjbeJSRRzgpxHBKeWNzzMPYdFTsS3rYqU3h6gWxF4z/bHZ+fnGrca/NdGwvzo8XO1NqUSo92mRU8kOByrFritYZiEcnIkUJuU+DjjYCFqtQPYzQWqIIcnMrLZ2odZGqrnR99QKq4CVvzUPIeAjb7HqG87prtMG/+3f/7t//+39PG8yurpUSgKzZm90yQhqduru9u4Yt3NtYZ890CBZ36d3+g3/wD/7Nv/k356cnjETKPJuxMU+ePoYk9BkQ0ACVIy6l7CGfoGjL6+k2lQJEFb6noOUyDighlx0cug76vgze6ekpYridnSn/waNDfXw2RlodshfPnsOsFFTcp7/+XP9SMS9nM6Rubqx99MFz9S/TGPu79ZubGAI49d/kFYbMZmmD1VXVV/nkBy9397Z1PfUdoV1oZOW8prqbUBQLbGHyTHmXFZYydwLRKXZVZziyantdE57ckJYHcIDubmPzmCrNW4PLYEhebR/ST4hIV5vGK10AqQQFF4Usi8WyreIfMshb6EzWNRIquVE1mC/eQCBQdHpVAy9DQ3LyD8K8i4+YAoIDHqzcWNuQokKSGuVqL4hjEGKuGFuUSFOmJLYkyCrEs13hDNleEYEAou5VgVN5lzP9WQJqTPDNN29PT87n85vx1nTvYFd5r28zYN2e0CsZKMyO5/qnqiqYo1nzDE9SjtF4ewu7jLmQQTUj+yZSeY23irK+mbbDrK3huMwVJXKZIqf44cL9xiajuH4XY4PFiCxPeAYsxg39cCOppRYB/CGmihxULd+FbmscyQ4t1UTjKZILPKi4hk/eK43WflGdqqAC5vU7TlQVdGmE+IE1zkQlch0DBPKgE09aRDEWqs6oMQtRHcXSMCOvBpf6yBtaUZqN9kaV4DxUNIInf+spTaXq8RI8wdidZvyqecOgSRNdwOiRI49AyDvT7xcnQEVYRwF7CNzEC2xsorj471W6Pmn4jJhOq34XanpjcbuhG7i4vl+b34/m92vj0fqjF8/0Wgnh9fr9Nq6MN2+u7mc381evX+9tTXYmu1sbYz2cG6q4DLzyoVhGCGjZwkxktGuCB/8y9wc/opSpqS1/ar4FwXC2OQkcTs4rJ7SLBnJwnZHwAbcofsw3gwCzCQMhRlc6wDyopYOFF1gIwBx1NF/cLW5n9A4GTMfpXkDSqKoZtWAmOXo0vCTMXMtIjQvpWqBoZKNcTYzkojwRX+GlKyrrxtxgSAVGPIDxCGRKeTi07ey1gOUVlXQCA4DgzY0lPE092d1hKiJga2usV5QisdUJ2dg4P7s4PbsgqI8ePTGvg4xf/fKvT2azH/zgB4+ePnnz7q3xaXRPdbUVqjkjIYTh+Rp1NL++jV1swb67SelAcqV9w/kuPhhawZMOvLmdI3Jvnb5aO377FvFPnzGOT6fbE0aUpTRgYpBMFzFX4/EEZzWl/b0dRUY5ToQha8bNo/OLTOSkHZV8AGNSDL4lf/zk4IOXT3Z2Js0xjK9OCdJGu7v7ChtutsOadnAV8Us7IRBAP0WF+pU+Ei6kXSPhHzwdnoTELNowanGjOt+4Fi3KzKRrEssTSaRDlgOZ4KCKYM8ASC4EpRoqbBUgRWDg1A9AFCBao2OhCYlRxsxgAEXdlZ2T+Fb/s4ATIGWbwbKXSR5qaXg9cZp/XcqwogoFJVHm1CVaVRgC+CN3NRUQeiTb2mQGJbrVaTeIuxuZ97u9/VyH4vjtydu3xrXv5jWhtzOd7u/tsYUG4CoVZiqy58H41U3Kg+KQmb483GSWgq4yYYmCYh3qdDhv5JQg7FzcLHSzDbf0yCjSTJdSapCADtcKbeo0fnq2nv2aOqguVjKuWhZegVU/xYeG7PCOCsIC68DkVC7hVWvA2gku2OB8D1MC1lEd288GaDKKHN44vB3AYiIquacoVYEPPB0IwyrnKtcqU4GQiKUxsZpKo2KEeIrCeRjSzDY2NCqozAdqbNoVAI0TQNMGBkDDy7rzbfFY0lPV18DfeQ4caGKGV2D89RoT1a8wo11229sTNX57U7UfwWdxFvqDbJVBokH37do6/UNZ75mbPDoigNTM5f2tiZvNyfbhdHdtFvtkYGLujlZeN69mPvaaCrvRozT/IetwuMY0SlGUhPZmab+2fwhEctMpFpGS8zQMypdtpPiTxjKYq546BreqwWRTTvLBdb1oPou7TJPOb+dwpsu2vkbBmdpSRvWiIqIt1tdVk97L5u1iPlvMr+82t7aFgJS21fLNbfryW1vbqp4xQN5ksotsLFD5YNADT+CrIevVCAEgBDA/2rxymXlYOXRyYj1p26OjCIkkQmppYNkBQup8fkX6ZH14gK7MYRr86cI2vLT7R4cIkItSb41uM33HRednoClf5sEwhR/mDz76GBLFZ/bYJEikZceR0UZLUiFtACiHmVng+Xy8nT4Ep55AqnodW6WUaRRg1UiE/+4ecvLPrCLm/p5Ere0dHppZ3dvZvTi7+MVnvxCumMaE+ByMpWCCUweDEstcRcQDDPYhHkIiIIcmzKjVfLR8Ufjk6eHB/rTqaixtuy6RGejbxc23es1NZcpZ6ljWlHHXkCcpBiCuwVKwlZNk5Y1+55cTD8jS5Jl7i0nBcI/UsaiAlTarJnGXthleUZclBMkmdbR8xpKlzZR6BVStOgonOrs61yk/+wIoeMB7DZSf5Bw8dL0oXc5k0vCAeaosYGq2ICZVh2LL/431TCNULA6E46txRiGPZHBBXbWOJ6pIqInmmwW9sdi4MKOdBmaK9quvvjILPL+aq9rDg0dH+4+2Jltffv2V+oYcrfKC3yweV4i1mWD2ipka4doG9Bk9qAaaBUWRxK3R1r2mtTAjpKC6ADDod2fsRZ1phFGAzBf7zXqmZ6AfVax5X4Nt/hPKlbzGbJmVV4k4xvimrXuqNfYgUJ7QhtZvO0EtAyiPSBanGqRSVeJl9QUPV1AlBuXvEE+plrFqZMXnRthJVjnkDUDYaPqt6avkjcGTZg4MCaGGSsIAM0KaoqrRZWWHtH8zAYtxq7OMbk2gmWu/PDvXFBUKuy1xaVeaHAnPstXpmRao1tJKi51SWeGDRx11EbrsTYmQdt06+IXzN8zw2jBQaRVINstrrMy1+FF5otKCZGkpbmsjvTE1bhyjnRosrt2/ene8Nbq3onMZS3x7P55sTbb3dg/GN2vXZyTuimY+2Nu2lHJ1zVqZkJkbphOvm3XjSEs/yw7ZUJUhZuXa/zAkpauC4AY6u7AAulIwFuUY3k44xw94KGkHrnIIWwYkPHBeW1yKaC80E5MJ8LVIyBaAwRrFA0nnuDlaW2xaiVDXWVvt3KPA3zP8Gp7w4f7edBRKrm9iqw72d7FBVRN8AlVlyohNVYOPcvFfA6/5q73dnaPdneiHNE9tJc2Wh4SYukRtZy0LhPV4/fLilNSNtyZ6nibxTo9PWAXSdXkxazC8UudylBzZizKuTbbAlD09JE17/e3xMRhritybN2/kG/FIXmIzkjNtKJCnqSK01EYjsSbFY0ZBV0e+YHVflI1fXplSur3Wdbb6q+DAULW3Z4F1Z29/Z/9wH7N+8YtfnJCxLTpsm9x0rIJjKR2tGXVxWGUrfMUApMG9rFlFBoCwsl6b5C5tqKya/I2lFS2UxATJZDyZqpebZZcwaFaaouGUUwjoilkWQ1TaQ7kOVw1Dwg4H0x7Plh5qDQeNgiicNKvSmDSnpN09Uf3sDHjmR/KQqfHFnmXYlRSeGVVQsxnPRu3oRAMWkqk7ijnLupkaCsvTdGO0opgjYDGblFVUudVlCWMByuDF1EXLlF6tXBIXOjOnBSnP0nVL89KF6rmIZgLKQ3aJLDkmqbImVWSLTGzF/N2bBnz3Vg9lTlafPX6me7I9nujSGElLaAaA3gTM6cgYb3lVS6YpPGUqI4xnifSBbRiwRqUJE+n5+t1GTNft7nQyLqUZDZAOdAqPPYqsfCkiBvhZiYv39r8P4QvPl+akM0Vba+GqHRDBwzVTBH7fLeWxZTMplm6VML+dSkQHeuXp1xX4t34brJ8d0RVRFaymlpUCoIUPzJAFz7dwxe6WWFTb0D8FwFbhvJajdsRqn6ob57u1m3tpfSej6V46sKK0N1PVre8AyxqesKu2BcEmrdeamRZTrmKbmmWNFJ0Ba6lfvXZJPeXlKd9GgDz+20XGfBy5SpelLLHaBlMNx84XnTU643bNIMM+AK3Yeur9xs314vp8rh822d3DqZOzU8Oy+/G92afbs8wMwxmxTxNY39xOXk1b5fatR7M0NMQtS9fAXhpU8mYjsnmah82rjsKlLhcUA/ZCuKzT9ntWRaRuSTQFen9zzQJKrjI1L9hkYYJdizPIMJ8x0xW/NoicTydU4XvzKRfAEIJUy1BBsrFRpkKJtzN3AiYZ0WOWZ0vdg9Guy4VgKISTgelkOxWH28a4KQvcsWSiKAEhUEEoSRdECCZkgibdUFlkmYQ2F9v5NjCTBMnR0cHHH3/87PHR2cm7r169RrBymrE0roKQerETwej+L/7iLx4fHf72b/+27Uiffvrp3s60axBOYBxSEQmeLemyyzuGrQSVkIBUVptmQlNKHIczdNT57Nz8nsLb4iS7o8NDqlW32xo87pkSZ9LuaaHaMSGvq5p7iJYerakRtspGDFLf0p31iZUzEESVHJvn2aFSTs9M74oNpeH4Sw6MxbJopRozPd2s9JSYK+rFBpepM44HH5VeXoROiNgOl0Un8eQCvUIoKnWQARO1T86yCym9UKmZf12IjAAiOgCahiSOIuYyFMiuvxSUONijYiYXZAZnPYSCuW0p+wM7C4YW9k9u+C93wnZnHDLKPpaMqEhqTBohzLihc/SszAhc4lEmodIJ14OpHFP2wQWoykUd8Q+u8UAlOwxFvK2A9nrJeWaW5ubm7NQOiyvd84PdPXtyJtnQJ7/RoyePz2dXBHCeWYK5jMbTyeT25vTinH9rNqdDO3f7DzUDmIm7tPKa3c7vFuub8ljcbOtspssVFpicZK5UlgmeZi54rDCuDF+7yKtqT/HbqRX1VK7LBZuY0LmarG+/wBaPju3Uw7MRLvFUaIfwhrxlVQ/g/ws8ch+gm7aHcljIl4pGeEeB77I0PYRQeyQLCi1EFFPEVqkgTNaedYFbkeleaFGchg0J5NFBFg+0SJsYF3fWfomyQFxNTdVaY0l45Kezk7AIWRLTgV2KFdOXOkVUddyX5ctrKMd/FEuhKYXmziXNsN8j9tWuFzTC/a1utxWoaD9zMGMd063NLTpCKs1Hh/Xq5mp+cvbo4NA+U3M1FxcnlzdXm7tb97vja7IUY2B3mBXQqDOCp9TyGji5JO4//zMUENEo5Eo7Z/0Pqg7x7JriaeZ0Yau8XVfLZ+cjFhgy9Lj5Neoo7iz3Bp8YHBxQNUJPaTtHFapacUBa1gIeHhkIEa5TCDgTFPouW9u7On324Y1jYGJsaqa0CcPCSpWCKAnt30hAcg3TOZIlUXRW6CvX+cqIx/zi9Ww+v2duNdkxJl+cX12cnXfNAoeq4a3WWJH6o//iDz779G9u7/7D119/bcQhCwB0xYus5dy/OzmxuMAyK4uOVwzM6QnRkEtRtWwO/HBy9n7RQoi3ONsWS1nE0g2yVgxRxNvTJMWXX355+u5YcViqly+fg2fDvvz6c+VQoc+ePVEZ56enCrt/YNC1i+OyHm/rGJFG+pa9MGKP3u7S0ehNvxxhaxgh4cziukdGWtLtXEGz8ZjfoJpfjdUejVq7kpiTazvs4KCDqDurXgNQ7ab9MhDSeQuR0POhS1o23DTFrZmJBYOiC0jN4ksZLIMkybJyJRWBDAXx6yVNCCD0pr70PTQ9wVmEyaAqYzDpMj6rZ1oXhFZsashlNIvpRUxN6JnuT1nZtdr7ZRAS6yjTHn2VqYxkJ2f2KwMS2iyLj2n8uJ+pGCSXvu4SdRmFdEaeHBNR5EsSPhgzkYxqF2lpBlnn56cGxLLSUrMOaZZpbi9W5iZ4CATkBI7sYvvmZiaXOU1rsci2GbFy2R6n+49MdK3rszCIemcKU/NcNicbdxEjXRK5YFhUpY5YbDW+hPzwrSoatTgXzufxbYfxYUPqlJMMLrmXbAlYhse3coFb+TuhZyN5EJyEybfcw/DB32j6OQR+H09HNZhYDnleQ2nl24GeAxIeAA/CvcVJogOI1fimgPxqQQXguU66VOqx8ciiXz1f27x+ekbTTaZRYXoeRloapKiqvvRbYZYwJKWxvXeivIBEUJ4rgtvjCSChDzwN36VDRuNqGE+WZTyyd9jM2t3aPFvbN+8MvjfOz8/sCJ8wWkWDbaoWJiCnvkni6fm5sfieHYP322fX56/ffhMtdjNbrOlrp+VKpW8JfymL95z8Tu4DGTwdJW2SFwbUtsMWxYIxUasnT6canjyVOg/VwcE5iBZ4Tr8Tzq3N7B3gVIXd0sBuSp41EFp7MtkxCzS639SWd3fGB7s7KghyWjhNSv81NGRAZnBQJG2llW2YzEgDFGvfCWdaBngBZMwEniMkjLAJZGIjMBRWoUI8JVcGPnTe3REJ+UIoX05gWmrtlkqnMkqDgtOnNJhaME6ZzFlJKY+89vcPTT7P5s8fffbFF199ZZMCPNGD1RsohBsMwlevvjHW+fGPf/xbP/nR5fmZeMk5YMhAZD/B2PFlRx97qcciVg1sb5ohzBgbV3Vo4L+4OLPd3N6In//85+cnpzZ0/P7v/0xZzk7PZvPL8QRdOxCenpyQPeLUBUzy6nDrBimIuVmdO3PRGGjtTXLULs1V1a0QfY9wb/2+h64AmpmlcnXIlrNK8kKqXPJsCM1S8UTIFcUoEtdFBlAFSw3zcGCaF/wDU4A1ZGOXd2ffqGiBs1MzjyqEob/Tr4gZywxh8TSVWLkVOjYy5ci6ie4+K20qI20o0x5rm8bO3jOaNgK3wBy6ls3JL0wCUGiqWwc656b0fnsWLBYOhvUNu6QyoE5taQ3sSownKWCm0sPIpFnyLzYpY7CWRHahhIhqjvEroKGrWlFbAJqteijVizE5fkcadacscBbfYpYA80soM50jIfDDGWu06gfAIwvdU1moEVGSoOry6pw8mZrXC9QKS+hDK7KzzGJPcm0fsczFxmORJQvloqFy4smkPBZoVvdrGauFu8pyq2njp11UcjQfZt3aTIumpSlB7WQXlpoOCSey4zAT/QjWd0cPNobmVf2FyNqnACDYVvNpXQqFYqRxSdlB4gM/SFi8YqAQfiHgG+zm5Jjn5nrmCaG8RMHjlYfQS4g/QqTqWCEd65UfGI95If5eFdCrBSy7JFksbGzSpaCGTAX6E6L98NgLYw5EWp1KFfrN27f0DTDEW9oQeHV53nTK7m/93s9AoiTjs6vZ1cUlMJzHmfCo21sdMFBa8Fd9vur+Hp5quvdb4y3+EEn5LmkzirizzpFdeyPLA3vwiKr2B+edFaGb8+udZ5Ox026LtU3LOxdzh39+9IPf+nJ2eTO7evf2kki+ePbycLp/8vaEkE726ZpMmlxk+epmvLs9mU637mZmBWze2lo3zLpYH+XUkYxkh0XK7slPpGm8x4+OaGrEKC+PshgoAMA2xEsVYaiFJUgwwROkUQsYfoUVyS/QU3KBnpwogdZ9O0QsdT9fZMEGZhvGhJPwx48fmwxUEebMVZolOQBeM3g4PpXE2vD6eNvURvrs1rAidWmePM1AmFWLgTVWV2AIYzTIlCVleYGfaAjprGB4pssEoq1rP5ojO7EWjx49xjPqFh5u4Jih+cHhI0AyEqhaiW44EYNlI6M2mHG+brwiYymEkk+22deRQpFS9OC2HI2fbHN/+fKD12/fXlz+CgFmr7W9adlgxsyIyhoqCtH8V3/1Vx+/fKkU7Fow2yF/dfXFF1+g4fjsVBHUOJuSLTXlqkJUicULjcg2nSg9YM44UJDGpX/793/m/BOSGLmd6fZTG+VLNysYSjSqy8tzW0xRG4Wxue74lCK//PCDE/vdD7KTxNADkefn6YLv7x3hCXHa2Z04fBYm02Dr97b1WyrDJSItI70QiyNeEa9cCLOG8uLwEfjUnwy+77AP6eFsOWCpkGp4/RTy0IF6+Lr0ZwrdtupsTGOozASaDeNjt3Jas+qMwrQoQw+m948NWU2SaXX8WRv+UBgbIoFJLvBJHQ2QfoY5N8MH68tGRBlEmB0LuEG3dHQbW5cy9BBDTykipJuTvRTegsLMkFctOBjz8BOdmFRUJ4sa/bl0iSwXgBXripICrq60USHOctqPHoSU9KNxVBrkZroMQjotitNECw85ksRTdUaCV67yQUsatl4LCSD6ULOH7Mr8yphZGdceHx5Q9TDoDlitMDW1YXV9feP26hpbJGThJ1ub4Y5ejI3sGZplDz0xomRT15pgGXt5yVxIuypIWuxACQDOa4PxR0OtUuGhJF3MwBXl718LUtqOGnC25/vPButnI+mkaAuHHlQHmCH5AN8hADVCjpCRNZ70/8qqFcdiP9LkakeAJyZrKh3Sr9qVquF0qwELpIi3q9sITMjf/M3fgOHBTH0BTtalZ7MAhlqvkUzc6KosEks2l2RXaZYlaPofhqTr9S2Xohu6rRP/6ztTyXaJ7I439+42Ny5vz09effDi6OLcNPP1zMHyq/PFxva2Mo+3Qv/WppOzLBM1S2TIwOX1VXov9dqZOONkokqKb+X54KX5j84hTAjJxAGljD7/tusSPYRXuqGAD8MboRCxWOe1MWWupVaMAlzbAtWgWPlymgwep2mkL5gUOvQ6YabkU90rOnna3zhtL6B2dR1hoOMLsEFSLr4CS75UNqmggjtHGXFSNZG397k0QBvqWHLSFlRyfiIhtvONrsif0qWZ1MlMJ6tirowLzdzS+Aw2DY4wCYkZMHXEbLx5e/zm3cn1bXqroiDfmUw//viTI3tmbA6czc/O371782bqrEtxD05iTCyR3SaQ7iStOvCQk2FcqUmaTSd3UW4G5/nzZ4Z6sRPrm3L85OOP+b/64kul2NjL9Qh2kMHgEFc4UwtD0dvVlQkHTNiuhzZlljAc298zjXl4+Miu53dvTyDQ28C3z379BRV8Nbv46KMPfvCDj4z7JMdSRwjpP0yAxKuys1uy4udJoxrcsqLqB8Yq81KtwMUJxNcB/qEHRgBC+tn8wo7MwGWtoOYDmZmSG/tQWeOupDS73hMgjW5Xsi0gbyyd4RKIMmCGQKaE7VCEjRwFDKMyTFL9ND+BMBqPYfOX+VvUtLFh4hJ6Tx0znW19itRgbpqRDV8TH0xyqkjloimSVxEFgGsx7UAAQLB1iaeGGlkhYv/MXCAvfwoZPIbYsa6J8ceWdynS/LAphwwMgBC/tWndiydCLWn9gZGTzRc6vrr/dwDc22B6ozoBjuiTbAM1gkhXolHHx4BpZ2tHoWFKBShViK5lkKCjxOHMebUegLJpshhK2v4Ub+X4260C8osh0VArVxyqfJpByza/SlgLyyvY979B8p93K0z57TrD8GTUuQt6YAPkRENxdqHw6/OLRSK/mgwvvleVorTeNjyNSnLoI/C1N5ofgMYme03RMr3XxU7tkMZHPev5DPNJ31//1S+sHrMKOomIVOViuSHT5pXXDnlY6IBVWZjUtk9Cls2EJx22lLTEwZO7n25N7ArdiC6/Gjm9Y5R1c/rq6zeTH3y8uL/KkcJ1u9TPF7c72xvGb+OzucFTWQCIFhZPdbXt/7q+dR8LhXd/dbPlEFfmxAgSQBzwHEjl4VAlvP2eoaNCom7IHvhaWMJDr6mJh3ZgVVMPkcDwHTeg7RYKgyYjUDe+IDMqUjURA61s6Zamy0hb7RiD57KXu9W2lJWodEbMG32Kz2qtTYJRtPplRJjdKjfGLMsO2FBMwsyjuCxm7iTyOc0t/Gi3ZkRCWiZ1miThO7v7/MrIbnlFbbuFc2NWm1UMU3WT4SmnPgzIqGPTca66sckF8zFTKXRehfzyV7+yVgqA1USJ8P/4538JoYUiy0Yfvnj549/6YW6CePQjK3VM4NvjMyxxzNdTAbmr65htetB8BCkVotQufYAE5eiMwbvpoZXh6fTFBx8JkZ0VrHdvXrMxzB5sJCzL/7Y9k1UioLI3Iwkde3h0wIYjD1oA+Glfhp7c3/pbf7sHTPZrHBw9Mgq8cOL54kL1QaGlxASOx2pkfzdtx0ZHFgsSZls4Sv7yP/3HcAZS7qGshPEriaTShldC8X1gCQU2TKfqZyMM5oxemAgdjzIZUcFMTkYVGXnoEGmGGVbpQ6XnGwaQ/yiV7AIseZFSS82ZImjp6BClhu30q31B8SZhbGE5UN6cg625QLiZuAAHTwhTjl4WkwukjOKmyLJVaIxCCwbE0zUZmpSdGFA3E/q5zHCVa8qbvGVOOUbfkbkeepPdZF2TOeK150ylYV5JuYoBQBqE8XglW+2XhMdrO3cmmblzG4GlYTss6rwNHEbue+qecKtsgy2H2E0DGtVRYxgZLVjtHiojs4xfs8czrYsoSChrpHI6DGC8ck1AgyV9KaYusqh2MCS8QvkDVsaigcEISWA5nk41eB6CNXADfAcsKFZuABPQyT2X3EmDXFpcgegRTtI8r2a51yDiqAdFNO4zV8kNxRyqQCCGSAI/j9culyenWdI+2iE/zGnzDgRZxrC3Vz/atO/c7pjM8GjzTVWlC2d6iRZb+QV6tkMqx5+irVpTA1RhPTo+4F38lEMVZq3XuCgj7MXcSOpmNJ7acLqYX3721V8A3ts9cvx0sTi/ud1Zu01Fo0oXEBNUOH2j1KbYt+63qDOje/M6mRoUlo7emt3V+JNcVy6ElnJAXvkiLVy/glJkxRvqQ7gQT66SplJ4QEqFnhXi5W9FhT8DTrCqwKvRL1ld38p4JRMR5UILhVJTWHp7uuSc5WHFMs/CDnNdxTBwD7MTrmUjD1qpRMkIAZJ4iuXE8qtNdc0FbU2BqmUZp7BltWFRLlYaEtRRu8xPZ92TJd13KQpy2hmonlOpuehG1XF9PcumCXqqlrGbDIpAoMO/JvQuLmd6smUmsxZlmhqM5UbbO+l9TtTzp08+/uilnYTr6xckczJNBwsx4CkcubNkuG5OEm1IlZci0KgkFhMur05tS0bVs2fQv/zlL3/51RdfhJ9VcPCAS9OWAHtUB0I4Jxfc41Gpdo6wqTL1/OLzz8zv/fCHP2Ju3U/2k5/85N//+X/8sz/7M5w0mvzZz35XvkjSeHTgbdlgLphG5LFzYJBhfRENevAgl+ZKSRT+O9VZzI0gcvIuqpatBbDXfobEcl6HkA7sZ4dHg9c2h+rsp/dIzjI8SguNKUsXH1Z/fg0FNEfqpVAIUqkF0jbPII/KV8tLB6rIRFIS0Agqgtcj2bSUBnMsnRUxMQIBrwQYHYnmYieRExdJhXaFfNneiqLGyBtXwFLFY34IEl2nLPE4Pn6TaTET3BobHtKdWpopTZkoiV9mxryomgCmVagYtU7IlgWremnMAIiLilVRip/8Ouv0Ke7VsVRWrzjbw+yuubq2bnG92KZ8olXtzqDVWPBqHVllIQ0wygjmRt6ZphWF+bS21Qvax/q25dPoR7VSsQDiA6+drugIG+BpdB2ryFwAmrmVwUN/5+hZIL/hIfl3YLwK7LxWmjAGPsUx95IMe6sLmY/VaQe1JPU/uTQSAKYvjHbd8GNXgqfm6DX7V+zEtUKgY2dmX7ejQkwvMXQQykjFPX10RPvjJJX05RefY3LXHRMiXB7563yTZzKPexDSRXsIU4AR5gIkpSUlJc8d1UmaYQg1MeG5YdVjbXYxP3mx4/qaHT2Vn//iP2ng46P9rfna9en8emGobYrmYuvRE3cwYZEy6N6Z9NtYu113mnN7c0prbS4uRzmOQ5YUQRlfffOVHGXdT4Rxkrcnk4llD1ZsNhuvoafv+X3XxVQuUY1Q2jBk5bqA/QyFxWfEsg0gLbJlZ8RmNnPqenS+pDHtuyjEfBUB2BqfBmXtilMQqBqnfHgarclaZHQ/S2BAq62RIjhrT1oUuiTgG7PX11uvYTu7yD0OOi6SCExJm1Td55W44qHdOktT8SBfpdcZSHc1nYWMApFKT9nowb5k7WI0clgC8600m0YbLY7kRcZgo+6ddlJAvWz7uThnm/Ehl0fYPuN2jNmVCZfLcxdfzPDK2AuvDFNkYVsNam/mVxo1HYVRCpU9BLUXwzDNa1fiwYENR5Mvv/r89OwYeQxtHdailOYuNtAfU3sagoHzUgXRCjlqlvsts7eeydcDmLoRNKNDl4swS2Hd69c70z3DJjyxmiXENv3f+Z3fQb+bEh1O1XzAu3SS1H388Ye0J+Y/fnyEbGfe2K2oLMl+o1MFXRldxw0zBHp9GO4VMCewXfuVw2uqXy1lW2DGS4CynmR6MFNT8ERj6qlAktHPsuNBwnI3UsZe0YzgANf8Htmo3NkabVrfKamLAo/6jT6LlMUCAQWVXXQhsdYWaZ5IGJRiEaG9x9vDOkGyXlqmIn5Z0i5XED5wUA5vAUhjyBaJrAWZ2DMNXOvGxAUk+SPiybocogUaI5sCVU9Sq7CSmKhd4i4WGvjVKJxcw+T15toqfNhLwMn+2sipeIrHLjVrz8YMks80Kmc/r9lL67sZZWRDldNZ8qrj8XJBvVygRYCGqpeAFTqUyGiSunRVsq7i5XyL8KqXMH/gAD+HZs8hkEfydu3vqCHkIeR3EooKxnIPwfglbzY2nsHfjGqOebYDE2tmgsLKu4F+5shiVYUDgF4U5nNCutHCo7IwQd2BwU7l4kmnablraz+bAyeTVknUonA4eWgECbUumoV2gkrCZFeckCv/ULQuS6f1XL0GNCxecbKoxVzdiAwXVGl6LSo/92nVRot1Und2M9qa7NkLNz48y216B083Zhf3106D2q6+6X5Yd6yc2lrDBm9vEgOGx8gl40JTSXdrThHPvlqcf31yhiFyqbIsRycI4zAHJU0VCovAVM9Af5gcuHSt2oHn6SJ7cl3MXuBtf+H+1qNxdlCTwa8uyDr1zS8hhz3JvgjwxGfcFitHR+cBqAL33zY8YCEIEPvWIVvHbLMN/v70NEeA3x2funZVrIJrCpxUYXXVoNoEqakJmZk1641/29kib/8g8RIbgJoaYWwePX4qKgOmqnohafuZjUsXvZBnCR8l2qX+qvu2pUUgSu4uAkCusg9rYvu4Tb9ZB3389LkOrZYOcqZhK+w8WyoePzo0dmGf2BXGibmqUzNXjx4/B9kM2SqTbyWTcN7Vhj1ZyN2Tg8qUsB2Vz58/tfzE8n325Vfhg+07FrZvb5gcGWGIWURstj4OLQalvKpkLfcFQ3E1zyAV3p5SEuJAGMpns7l5xW9evanbnPf/3t/7e2b2Hj06VMDoW5NnGxv7+7t2V9gSdXKSM23YhQCU4KrYboCr+bNVO5FZO3SA5q/ipOY4gYRXCH8/KzgPwEPI4AnYCgNbVWufphlMdZVxyC1/OXuVoUZXdrCmAxsMpJr5SvvUf0oWhkuZqkJEqVR1e5uu7qo7TJRq1g6P7J0vLkTbhVhYjEf4q4+c1cDITJyYojFNKOajS5YYIVDHlCbqey4g5Yrw0AdEGhJDLeknaFpYfHNl0ZUiS2eEhlQNSgrUH2poysnOVELh2rkkpMHWBwmVMi3E9uM6yi4WmFzUHFbLlMIVRA2U1RN2N7tZXNy4pBz/0iC1j93drXcnZ+zdndvXFYRyY81M99imVSY7RFcDRkCwp5+UnVTcIABC+Tv35FuuApOi+ePJ36+qvAM7SeOHYYmksmv/8OQZMDSe7zwheeg6VkjnOKTlaScqOIM1JfJUARUiiWRqf1mtSgOJqC4y4IYfMEvLP+TFB1JbUllhWjRmeCKQH9s1zlqCzP1M9Ow4SxtxwVOrXAMqIQlsySkOB1u9loAM5koxuqLC2MEBjqRNxwv7hDbutnbX8je9XR/Pbjeun3y4Y//X+HBEr83vz2/X9tYne/a4z06uAI1szEinx0VF7nu38eryk5/9+IMXk5vnk29GV7/4/Ms3x9lZkLKs5kvlmxxL1/NHWxX/m/72KyYOpEgkbbVqJYR7CAxJM8TzYfIBpgM9EVl995jMEMzwm+kszQsY2u4/8ACG1lMsHTfeeac7d3J6Qunf7u8hWNTAOn7DC66/eGBfuspym7jLVaGSF+QceJkSDSE24HmdTnaEmGUTe3T42Chh3RZ6dbcsUWpZVFPiCVhbFoKZ/HJJybPhmcwsO3bAZHp1dm66T/9WGXf23e+fawzB21NHihDsVbmg4tHCHYYmhDpY0gqUb8KlzYTA9vplbjo3HNWjalRybwcVuZXQU1o8U1POinqiBM7zN8dGdfgGcnabbV/sqckbaIUgxrYI97wEoBZ0d8pmQ66blt3bXNWFkT/GGE7BqSCs6eX6zEoY61SNZdkANUzGAFoOfiyF2UjOpnk4nRVBv7Sff/GVS0gKe3L4DU7ih64h8EUg/8MogZxABfbkX8USIJOzeEK36uKvZTMKra1b56Cf5ivG/EGGVXEm/wSYk7+VjiRjZf1G36zwwwwkCZFRxwXUUwKVW8nT66yJw6KAP6hz4zIZDFnGWZJpB/S9sYkReR3rDUIFUAE15qoCZsNdFdMk2BIbkKFoQcaJqYk9ffbknJ0zeq9VtVpOmoQVgNmlIxS1NHmQLEr4nIThUSWecleFBIhTu5Shv1i7m8zMkDwAFkfZ67NLc8TZIyW7slWju2vzjtdZvsVGpj0DRsW0eq5kN65IYZ2F4mF2Ud5m2w8VBq2JIFebkHlDC/WGjfraYou1kUtCJguGFJE6B/7SHMuhuUx7Fbq4hMIwgL+e719LWobXMHrlYFp5U5vtBD70FLI8QK783Y2g+0PEQE9SZegeLYaZCqPupdIr5s9YGxIto6rYE6/if+BSfiPjchpJI2m9DKcWbsuWVzhFCcFYSTCHUxkwdXHspmmsciz5zZu8+mYaWQJL3isndlW0pXhVQsFIXK7uqJQQW0Sbm2V/I4ohRau5Xtu82ZvqBI73HutsOyswm+z7Ss79+s714p0u6/FkbdunOOYmgDd2jLENxCn/C5txbEUYOyF8+8tff7o+Orgb7bzbuD4+fec0tEIpciRGdycFKtOCg+XwAQcy5VG1vyxgDkVt+hhF6srpCADVK0ImMPVQSeHq5p3ySdhuwDO8Yg8aZM0jll+DctDKtEGrtjT2HJSOkrg2rVu1o1BGKm5NpPpPTt/tTLe0D9Wkbv2HSjlMWNKAu3sHm1tzd0FqVgYO5k7190OzLSitXkLzUkTVuCKjIa6o7ta9O9mgSpg4dhV+rQxt9tq6YIKpMNYxFtSozQ8rQupdFyMNtOSh8KTgddlbC54bsezztrvPrrC3r1/lOiRnfiIJ6+6WpRpsLnZnlmO5+/s582TVIVqDas0oLdtQFc2+Pl9dQQiATEbZQBoXPghwDhpFiKE4o3uz6j/anu72WoQDVaYQx6U3tqe5bJd+CfvYQpvZDcrp1zqyjTnOexiTkRO2zjIGD9lX93qKugKYeX6ZoXDfEUXzA3AW27pUtkHeXrsmMPZgfeKCDOQd7u+ZuzUZiGk0pxxZL3QbB2OZsmAixhHE1Ep7hGC7pyM45MXyC25kFs+hHaKxGvbC0hWAJmmhzqtk5ZRECbVmnyjpqEzWLrJ1Cft64OZDUxkRZbtpgK0DGuwAxmsIU4u5k4HQG5YZI1+pslqJjF2JtSllpfMV8wVOP+eOfY5PIwGJcfR2emfUu00w1Z9xxURGaGVCiU+uMUdHzeGaelR0aWw/Sg2GUOUVq/dEzBd2JWU4b7QUDrlQNJ9pSN9GDdFWkLhydzWjPV4b28SpBWE9O2ET+fZkLJ1lrGjNaJ+4DDM5u4WyJSnDXnkZESNrml19472Fi4r3VQHOayiTM98SYm0uFzMXUMY+UVfmTM3uZNm31X46o2iJNOaDEv6nU5aL2lVR1iynObClUb17d6Kp4xXkTOx0J7NESDJFwShenF1+8OKFVVgVQdQdsKhVPzwZOyHJaGawGyVq20fGGcjzR8moxNRjjQXJCVRmyEQQVlWNh5aI6VyFDe2uFNKK65JGrMYHxQcpRBJ8NQgw8zHaXs+FOkq6sTVfzB1vNHR116luIhQKm8G3dRj7qbbHxjUaP2Uq98gnNt85qyOj6AsU1zmKkU4iXuKInSlUhkGnZ4xQNQr58/hTvyUxqWnYvFM/OhSQRDLW1yh34mi+B9uLt3i49vjJExP9u7nZnVkREKTUb+orWiObp2NYM0Wrcn1zaHF46ITKeSAt895mXsWcHSaAPD87poZuc64u565aE7l14eL66uhgx7rT7v62qwe2fcZr6jjnqcWMrc3x1+/OjPZ/+OMn48X0+vxqfbG9s2Y1+85qm/NMteVp496Nk5vTX3z161/84l9fHm28G83eXl7+8f/6HxjoqzefcvOJIitbZkH0iujElIXC95MRe5pyrZPnSD7gq9uZk6RpwVkodTQ5O6WzKTFfR9PHyiQzEdcGdBtp9vC0nMJCxaupeUbn1OjECEjXyseT3NRB+wPzrTfV6aI8O+soD1UgqRZICl1EsXd0mJbrIMf62pPnjybjrcNHB8agUfiYX80l4qaL5szveGyP9dH+wbmvdJ3n6AL8SCc8JIcfJfyEyn6BGk+kpdhXgzZatSmMtnErUg0vyJ4TJpHX+9t3J2+3p7kuRA+IkGfqfs2tQpsajvZiqlZVnl6+3T2MRt7Zm+K21rdzP6ZIDg+mj/aneo8XJ2/kvmtvOrF3g9/ZiY6pNmDUdXZ6Ks+DvR1A93cvSIgmbC2SHd2ebB2tHbp9/w//8O8YOr9+/dY+PGx0lzv2O7KlchWHkTHdooA6Z3ji2MNf//KXSv3kiYNQGShjLmwGc9CiWAXZvUJaj548PiqLm0GIRhthuHdSD7u0XJpGqSfug5pOFe2zzz7vZqjdEaSzsxOG6rPPPv3t3/kJ0Weo1J+C0K00tgNayut+Fhp6b2f/Zu56uXwLkMdlQMu1K9l03Xi2R5IEljLiCaE9DjCCKUJVfHtEdZJ+HWIDUJJHaklU9YXBZvIvCaL5iFANf6L0amov1iLh33EUDIAmsqP4OyQJS6ryWn1nsP6lJ8veAUsh4qgcspr8RKcIaWCB8Wapi0Zr3Vukdl61FSQiTtLrmYShOjfBh05nGGozB/Awq2IzjlEBAHC5ZRozXS+W7/VoDxpTzHppQQnKvqvvhFRBQnKuHJwLqfDYS2Mg1g7jrLFQ/dcXU5OMjk25dPp6dGUFzyuTnMuXqFQ2SU9LD2Nt3bg6TKfPlZ32H+eo+XadqdQfZF2yhTnHvWJ+aEwXxRn4Sg6PUx1KHU2Ln+VSwAcDmtReqQAlV97UQDmwYVk5ni4IDyWLeZNFDqF3kRsMhZzAhuwGEKmtjdFe0Q4/I5q/rNsU40HXdpjiUzgIQ4GlCwGX1+QeA4HAbBTJkCQVXXUorvrCwVu9gX71FCJrybmgKjz1ljqOOJcTBSfKFTTqVedv2YfTLDPZwuGcv8YQROWSsFQzwsL5cpgH6wokurvsVHr6DO723l6bLuqDgyGTkduuQzXpdX128faMzblfPH+6Nd+8PL89g3Z3bced/5Mt52Wma1dbuGD23PxOGmCZdjMSN74NkRv9Fy7aO708twnsam2ugy85K+UZWY9QpWXyoqXUbkrNMRUZZ9bnSS0Ibt3fXC6ufMSPpQx0MVyJ4EGsygnb1CT2L9tgWM0luNxQfCzFFQwUS7fuzK58bJCfUqPMVKbuucYFzNHXiHF9T8RYAMVytzpLmGObGAkdjOwxWrLXKU2O2F7aym7S7OLqeHp6fXmlz/dosYsxoa4cipSi/ZZe0KNLI0SNiVKodCvtkQCBrkwNxYqz0yG7vlpCm4fCyS1SmZIIthVEfVgtcmH6MdtJWVq6STvmp0OCAM58woxwG2LpXqcCMrKzOlm7HxkeUySxEbcL4+TtXJ9oUJEOAXEL/vmtzs3FlVvMr//kT/7kX/7Lf9nH0snMUChMSFsYUQnZCKp0mXlKY8laeKovvI18GrKoayFJm4WYaDCvkYBuYlpETQKT8KpKl/tRQNVMKMmaN/KERaY+gTR1wcE0Cx+lpqSkmnLY2HLF7mTdZDq7K7afmClfjpQszRWGe+9nRS3rqf0oQJxYT/4iMn6uk/CArID3j0QV2kaCdkaLqJrTLFRLAdVKgQXV8n/Y0NhKbhq4BLzVDvDgRXCi1E2A8S0E6LIVbtVMq4JJ9z9BXLqEQJERhcN18PLpHWBCYy2WTlyISn5LNxQPHwR5cpKuEiz5JkrF4LhKwvQIOmdcVUS3tpM42NiUcvBgLG+jBd4eT4FJrcHLJ9M4C4PwXNtBjFzruXN5fXF1+vYdu8IYBt6Eh3Xy2p9YSBKWVlEmOVHZeagxx6h45eSuiTGKlFHcTTYaIF4jMuAS0GDg0QBdPCsnylKGNzTCQ0kJaZiUL8QrcaDhia0vKVIiUTgjXCqvnUs/A1QNphnSr9CKpXwaf0cFb7HIKzB4YOMEeg1ty5amv9Ro8LMqugAQgLHqyBNwGmetjcPGweNZObwvshC5cDxNEiQ6lUIUrjPHPQi5tMT0E5Y1Kwms/kBCSwdlCcUlhNkalglGes0TofBgPh1W5GesrDoyFmMJHU+Iwz2haLi+mJ2ckrS725dbzwws3FZqY/rVYm062tm0TGVSmWL29eD5HTMiX1nkS56ooFeiB2p3uML7/pW146XSSenaAagcU5s8A0OwlB8MpoWYEmAwAoFJhQP6FmIbp3y5xtUJU6QOLFZX2mQllkvsfe6SUEF2ESVW190SQdkc+GFuYDjkTqEpA4sYtZAhnDYVAWu0PEFdcgAYc818GGQYVzFXjqzF5qxcsi8nALCnLBxcUyIdEAi9Ig9K/vCy6O/U0okiFctUk3x9BlTxJyAoAiMJbDoNNCJzaGCNF6xONqNmZqr0W2b5ttgIZhe8TD2VGvHRiYu77UmkBTZP4VofABZIn0agvQy/87s/+9t/+2+7JBfC5jZKRJkPNA/CI1BUzUdkHJnVQgsHflBYI+BqQenmJ6GVFXyo/c9YLaHsOIEhoyhJwnIFn8kYtpulv11k+5hlTbd7J18yWJyrzRoLLaB6UaY/o500L4RxIOUCkosC5QRx7e9XcCgA2OFNUNMkS65SLB+d0EuHD7GZQglkiqTXggIjYOKbDq86FyvnAIBSQToG/pk/iKYut6yG1WuLXY6EkJHMSRTTPZtm48ewOmsWUEaORLBO6RIkiWaSGfVM/qV0c0uercRDXgZ6ccxp7Gr5v1XIpjKUxjW3Ousk7xbuKsZW0NoNZ8qhHCkqfCkhdajgnQFxlpBLvoVfxbRHOrkIF1IZ5lFF0Z+yR8UMds1xbRsQTRY71y+ePNVbs8HWzL5LgDptNfZ8CLvHs405Il1GAkzku0yRJkEjmKc2zwi9sZqs0aBTRlk08Z08jK0GPJQ6eXkZ1E0VShLSAgOPLDwlD55MTC4LO+CRNWAtUJQkVD9Lz6/svgX+EL6TeLZbkrRi4CDWwoeo9gdJpWkqHuLkl3sXB4Z2UlWZ8hDSSPopBJGdRJTSaeTUaeMEw6NEwMCsKF1WtFeVH8jikuQ0ZnZmW8qt8Ri9Ji38agQYAEk81Wl5SQlaMzzNKNNI2EmabJnbczGLK9+cX/A9ByuS6WRf5OgvdXz91r43RyDGo8u9jZxJsN+WUwxj1ZELXceu83xgmMmtKkCAGukCoqfJaG4gTN16pgOJPN1yLFrLGT7fH1Hqdg0MsGvzPd9Ar8oll4GZYUu5wh8lwIO9PDhsWCWv7A4hTuu5r2FWm8dId1hdh7hFCSfttHbMWy0IQMJBElrjov78eDZJ/BLiifJ2OOCQWNUOVu5KBBhbPPWyIOyQzC5r/ass4IHE6MdQkyeTPJYA7YHpglDvWRiLkmaYLPTgmPkmel++7eCSBbbIC4MBI4Z7WAQAHUJKgGm2bdEFqxKEyUe4FSNbG377d35q26etIqHnLruEfKoKBmCaMjz8Sm/Csv0Z8GWhL+1UP7Pil20BACKFq7KmR+nsGEBtGMitWkrHwgNSjjJDj1mhLhpAXTRJ8Da5VNccLc1xsaKaQsl5OMmFvx9dNSmC2nV+7YeFE8IlhwdKtnF1FAyY6iEQjEClW0lg1gAEJjWodHkB6mt3NXg3do84Zv5NiyoXtFBUh3y7bv3o7MJEVV7ThoaWga0uvwyKvNhIyxkUg16J2xzMX1AbMqw+CysXItGRf0Vf0V8S4bXmZEJ9/EtX5HzrIYt+B4EqTysAYVPUSVwaYWlxfTzAXXyXqaArxFe+Ca8BctHjLS44VlswOryBO5csw4R4BQ4Y3KlQK2iuBxxvu+FfTV9f2lAUp4fl4gWr6TnGWjebAbaMYHVBe9Tj0RPNV35SLZpkHBW2t713Xi1ZLhFxFRrlXJo3nIsjxaE1PehSK1Uxxf7ueiJZs4lYg2n601nJtMOydPIS1YUNr0odQEO4uVb3YjtK13kACCnVle7k4Xhl4TnglJBLHZdDQGiot6W/6AcjRyDSenYUPx72axWrBwOC32MTjgbwAiEBP6BtbMjuEM8BP72av6VYRQikTe/ex6bnWVfT7K06UD0aJxWfqSfnO53YW7vbf3ogL9VRF6tmRJjbVrdH88tz6R4/OZouzjZGs5Pzkx1st7NiZ5usmexbEqCk2Vmkum1eiAxqF5Zxtb/FyBJQWjdGWAWsKaElK5Srnb1DXVIF5wGs+DzdfPDF62RnV6ajU9gzcYVCYFHEd1nP6CSS82BI80TxLeVK+zC8AWBgkD3VKSQKbq0dPHPlSY6EiwUsLScw9BQlkbra7k+lWEHrcJDtwPA0WhjkDiI4a5mDh4hKUi71Sw8DiwVS5NKCjpzLAlWQmErrsjRamGFArVlyz5zqi5DY/B0kEZr++E+N/wSiXKWAEa6f1irDKycqOs7CGOMhcVqrXXk5EtcAGIkMYClCFYogi+InQn3cSlP6F//iX/w3/81/+9Of/jQcu7lxE6DRmzGDRkzVAeZQIqEqA8+C5PxNfaUPRTUrOSJtMupSYEJ6Osk0tNFvEvqifSEkY6EO7emTj3IgB8Epgo4V9Zetj94ITJXX/cw16+OZ8rqI3SLZIuJdBlgfOnpfXpKj8725QspDB2Vcdb1lL4q/PMuyhaiV6wKD4RE2PMl/zD3rFDuSLUY+NxVv1EQuIDBgB1yzO75oF/tsQCKNsFAflzDwWOxFIEe65QQIUUZOeSmOA02HIX0r+3xy/iM7unXNjKxiD1MnrKIJWrWvOcRakpbKfjmeSh7JtdthfP8Zp3Uk13JA/HqizcA2FHYLTHNOsFgyKUsaVGV5BcCfVIWmwFJUTiCnyB3Yrx0uREI86QWjrBshQ0nsYqAdYvSjZ1W2HTUm96mhs/Nzy572/GxWt9RyNFrUPUF3lwEtRGjtLoJWKpRbIDk8PMhMgru/oijSJhEDbUpSyr1pa0IHwgDwp4xlngEDk1wgTzhTLltUyq4IaRixQtK2y5FvMxhWtjvrMKpcMBck4HbNmQ4c/A3cz+ZtUy6vhlmmLaoUrfiZKOEDgLwQ2+VFp9eO6ucQ3qk6YXNGFAeZ5GK7jBVS+Fd092/HNntdq4ULiLkfZXVaEmbMzk14WG6voTP9rvXaDpI7NWRi6YNtMq234x6H2ZVu3syXnmyy2M5A5y6r+wTS1Iv4tasbsw41SMqireagLWSl48qAev0q7aAcptlSxMDIAp0g0cPfLG1Pl05Iq+/mgygiZAeEm+36bJAePTIs4HOqHwDXwElbrgObRZ7licIT2ZAtAF4xyjPMyeVh9KiYHrwWaRaha2QjLxogWzxGPoly66PqwkEQR8llV407GyiMa328Q0l1pa0Bb+ykypLvKuvSMQlZYiiRboKl4vCh2KMRLQ1Gx3oyMF1G/tRdpTUsCc0oXXZxTL/v2F6haFjHVDBXVfbaDnhDDIzMsn8nCUorAjNdDOd4mvNPMRXW6bqjHI2hSY+1IPskaQCDZMX0SWUNyrdIjLRsrDQUi7mqyqUka4OIMiZf/YPcHGL2qnaXIBVm4WSDH3OQQUmjh18IJPwm53lAetLuoniCDTOLnyBBeHLA8E0U1yNCoo29Xql1OzsUFMO1fYG0kCQ8ngLhjPg+dLLnRLRHFEReeTokGwWKlOHZMGIDWcAN75Vr5O0XnsFLflLCVuaJqte0cjQpamUa059SV5sJJzuVJ/ukmWSIJoklnOzFzAJpEIfx2BdG1UQAI8sfEaw+pWDQy95TkVuUBXWTtfL1G/AEfM91XoIr9bfKiLOKpn4gxCnF7a0JwnWKGEsVJlXMaLX1zqEreEDIAz5gJVWdXefF7oaFWn3YhOdqESNdTXaVLWuCzdxvJy2Hn/sHu7d3492bO9tR45Im0+CialukO/mxOdJm4UqD2T3cff7s2XFWwuL0omRAvZFSf0lVtRJSq5oSg+dGd5VdJYoJFUbazHGlLPmYwEr0u1JLtUFVNMUiamPwSC6EGiFOrcd7Jys/PASXa3MIUki5eJoqGL7lqvFEdlcO2shHzRbDKa+UcVWilnCwEDb+Icpr4xjy5eEEtsezqfEknhJ2+CqwFWvjyBy4gkRUor8yn+NWbwpOkqbKzrLxONc4cQgUfjN/Z88wSPtkeKBFrRZ9uL23dbd+eTK3LX37bnZkJ2l9DOB+e3N0tb5xca3fu3O9tem7uvrM2RVLYiI0sJLRtgA3a+n/Zh6p5smNhpgZIpGBRjnFadLb00UTw3xWSTOKMpq3Nffjn/wg+xqq06bD5BQ802XjcvNBwuBZ2SreRtVPr8DaDy3V3GLTPLlliAEgMp/MXSYkaanSqlH0sPGEx9fi9FPZEJUwnWRJcpmvZHHLuqTKSV1EK33JwKQw0tSieBGcUkdlVBcQMfifWrPro479YpF6a4KLT6l3CYU71IZ+kPyApSqE7zUG+KUKL13snC8eZk3IDuQ6CSOh4q6owrcMUm2Otz8uUdmnd6vJWxCyZU59RYp6KOCewN0JbAJNA/75f/y52ebp7n6+q3IT3Y7IsCTHQMfXuTBEIw15hrJdUk+zQMDUAA5E56QD7S8AYaHA0lE8aqBThc/d4qoSvaobpGZRuE7ih3XVLkRZq0Nhs4uOhq00tCmgu/PL81evX9UJ4lxArJokNFgEU4MbuZUrMt4/RMd4rFQnjzjvYBO1EqxOoNBCOnbAxmygrEqrIqmttay41PCWhqNlkVvqIw3ImKdQtRmK6KR41WDqw1g29mEVZQ2gmhwe1Kdr0jp6TNRUZfeCTbdVIzn1heC07LK3IMxHFVx+UhLKmhsYLVB0xaUcDfrwuQpcxuannJKGP3VVIAITVsn8ClcWT4LoJjPVWznme5W6gfzfxp83qULWylx1TXtVx2Kz3aDseQwz/q67MSgbAbtPYTuQvCAgvpl+6x7GffaS2bCjGjwNX9Z0t02XmCIoCaTXJNzZebp7sG8dBUOM10k8J/fYiXA0oEtqvaxkN7VcNFdUyo0XsVY1k75Tl+chvsF4CDHTCC1IKkM4B1slT7Nsp8FraQIlaSdJu4b0REIDPAhZEobWUFi0IZtrgpNXdZkVSqCEcHZUh8jL63cQCnnoxMI9hBT65WtnNJRogOFJVOo25opfRrLG4bSrmu5wu6ld/o6Gt46mf21ZBnywdzge20eeunAmJuoym+22d0eXLrU9+/p08+J8b+Nm78XRfLE1u3Wgcu9+Npod3xlVkYPRxfqd2Rr7KEgJ9ZyJurqfHgGLuYkIMzX212QhXLlXagUNXa4q7LLqu3TNHEyK6hxl3ZEzkei2gv3DPYaKPiJjYp27UMXb9WWp5gDONNpGtWRLuBLXmHFG2g6RnBG6vGJTHfnd6YnhSKvtRCiuFD3XXTXLdppIzHJAj+hwLBWaRaYW3WAV2BnFg/tVkA5sCkLbe5GuOmtqVhSqFBJe46EEgUcSkK53xKcINVFG1Emy7ELGMjlmhk7luleyGvRI6BA/tPqT23ZQRGMkSx4qw4yapFD5UFdCQqXDXOsuZWvkJVGOCiRCRiTkzedfsQE6fC5A+tHBEbAkdEuvSc1qbgYBAnE1vVI6K+f5sskeBhuGPXGu6InZ5qEujICQ5LVz4dGJhU2OQnrhJp70SfQ4F4Z0FzOn4HxBZnJcp7YN7yxc0WNM6bPHT5CEXPDwX7t4aWtTdSOYfZKhKXFyFRmrxpKfsKSYQkdILKQIrZ02RZkEAJrXHevZrsOb1hRg2elbagHvyFjC6F/HYtkQkrrPad1o9nQSU3XRZQg27bCcd67RplFEFDIkjEqtLctE1zRjpq4YS8JhHk1d/I72xmT/t93Ol8+rJsRgNr3/7BaKi3kLmv7vR1iIYtlW4YLQFyUSxdvhyQKXWyhFYOvLZ8+F6CA3T5UrwGWGcZIwCclYrqwppqOV2cz6QZXWU4xw2AASHcnVsRB13Zapc/cEgD+eBSxAwQQSscwKu/VLQHjCepdhgEd7Hl3fZ4KyemqSWzqx2reRDwq+PL28Ygzcm2LbsigfBHX/sYu8CDo5xnP0zC6vZNQIlUjuXXwhYU1m/P1SAzl4J9D+VB+Lo6dE5eoxH1k3rzW78NR+VYtywgMSvNYuX5A65ijEQ+GkmbKrViQmbYafygMpFjHY4tmvQsw4th+YV8+W4ZAtMVdDTag6uXBh9g3TEfzN9jbJCOhYkHBqDlW5oZanOW/SFarOQmUPkBJy2KpQPIDhhMTkhwx5hNQePF2KEM/wiHp3/BrBmarNZcQMVjab7O0emBosgjfrYzKZu3MmEIu0Xk6/DvHIO9we729tPH3yyFWmG9cXB65EeZ35IKdk1xY7G7O1i7cXF99c3G5c7G9M3dfuhoetnZEb162rGOgTF7clEiEtj5Tafod0Jd1ay+KZLGTrFUmy1tv1VF6tSVlEeSqpeiTDGKJ5muOgaEwx7rui7uDAZkQ7ZfR51Cm2YI4xAI+GhDnv3r6GLenKycUvnJgjU5CectTKsEhGdJxqP3C618W9R0fyRQMAU93SuqxFCLT8iD/32ZQou32yhGOSp32nS8CiULlbRFTWknvmQGFdlgESDabYu2jUCgBXQqLf6Ic/u/Zo85wSyUgLQu1Y69GxE+7bqS6e0IJccHdRc6B654EfRVmnDk0+9zxQwkfzfMlpZmqD0vNnKVITAeaQk1JHW+qClmZDFelmss0EUv7ynU5MGt/4TKjxlVI3izQmw3V0QoJdGvLJ8Rk6P/3VZ5/8yHVIv/tnf/pvsCFTylYad/cdLHGNk8F6vl23TqFlHCOtM0DNNHwY1Z0Nihw1VseZA8Na1tS0RUUKAAZFIxpb42StHdlAo9JuznNj4et3x5obhLgEjIWGtesCJI+P4EhFnTF8VKHColnFESHCwCmgAb9U7ycDhUqTZOVgCa0r55VQepNf/A9igUuFR2I7eQPUM3fRZtKdiTDLTpd6r8GjQioxnBl28UlqhGPnCvOUA1KRmEwXpgsY1gjRjcokG9Ot3ZcVRFNaTFx+JVkSlikfY9hg0BClwupSLQYifCENsCRdyErbXsFBAldgHhQzEb/JSVvJl3ED5gQGwZJvjUpNhJjay+/pn65nb2QgBI1iqAKv4BvhgFZIg1W2KbKENHpVG0GLEul1uOpLZhhW+toosyaVHPywmOk4p053Tf1Y0N++z/mqnJL3ednJNnrkYvYofOKtDqRcErgSj9CwoiT+cgOR3hhCht0XaiXkyB8TSUgYYQorOrHsimZMjl3xojM+IJERNxSwPbJuh6lktYGHJA9f+VEyuAEGL6Gioxu/J8LS71kxuTPqZ8M0EiEDNp7G32gbeAgRKyHWVKFToQ8TDuIAJktVK4kNzsoCryURi1Fi/VZJwwr6K6nqgm1LSwAoI4vSZ6N7I6jJaN2ZoZ3b8ehs69qIeWYHwN4tc3y2WJzdLGz+m+qJq9uRbi2dYx5KjzJZkcEyP7E2OZaWPgHkHA83FLOZH5pXDaXoX700XJWXDqIlqBgqNj2A0v7R2tXplkqaFLka4ID/O6+yC2Q5ZOAnrzDSwnjQ2uauheAPMLHg2VfavzhWtqTagrUfsSG1uA1V5Z+vqrJ5k+3YJ+bKVA/9BDkyhMAsCVnhsALx1n4Snl3J9pKEfqIrBLNLJy35IC/1ItbJcf7cTpNbM5bfdQyCtMI0Uk4GyBbb2XVgPUOAOUcqwYU4/EyaSxrUPsZiHpKwlya25SPXUpXqblJLlEoTRmfGoncxNDqYpfJUnPoCXyoXZ9PGa5oHdic5s55fWsWTq25JjSQUO2vmUZsdxRPaqirxU6BzVF0jKBQrIyw9MDFqQYItzvYKq6o6Q/TPvHulUjVC7MrEW30CUBVLC0CROX5dC5DL4Xa4VnVfxZM8dDSWfgLtxgZA2ULjA4CGT9rCI+NlrFFPvgWY2wKNjlhOH0VMdRWS0hulL5Rfx8fGGUCRLbPJAhiqGKY0DxlEz2Sqoqo8VRySMs+Idxo2agVwUsZXw1Jiod+fN6oZaLo2QCttYEUEccEnfTyK6Zkypi0rR5AE9IGTqPRSqlkwoPhWGtYromNrtQEAwRcHG4xxhdAz03crV80s9JDIriT+gi7ZXVkLe4+FmxsI3RF7uSV3nKkuckjtVPmujSvUri7BJEmN2cPUmOs0V8rOdnjLTNXL3GSt/BEUIkmudDKXmCtrOL2mFNU7ib847lVgjGKxPf5Mvw1dnzQl/7mcglxbe2QFeKV9lFRXS1TP+EVeKxc4O+tWmkN4l6tkQ/ySmGZvv1buxZHELx36ICwXT3Os48Avca6wDfLfWQMDMzwfpuLHiSXiYkLHFsIoOBg6rfrxx1+5+2XGKM2eZ4M/jPXj0Wl19w28UKLg8JAmjLKJxtjC9KpFSipU+NXs8ppYr413TmaT8c7Rzt7G5q7Zu8zp3U7OVN/t+v3ldHt9d+qKg5GbPu590GkjGwcpqGpB+F0NLFd6dLvRz1tJHUOOhqH4/FyKkIJXpYtbOcR3kCKwrE4b01N0qiTW4cTqHosqHHXhdTUcrxA0Ns/2eB8CG15C4soMGJzwwxYdW0bIiIVSZxKYf8DA6Djg8NpapQaEqIiQV+XikSdIPXfDoB7F4og7VDSNo6M9WcuCS9YRw2QECYPIow0DyIJEDfqFMGAIAd/E81j169rnNwbitwMKDUm+MlSQBI92Yg0lU0ub2p1cKAvdOQnFhh2lgJoGmQKOucqdKmGm8IJZPryqzl5SgdlrShBmZrhp2IcSp3PpFmDogYpYda+kiREVq1bdFQaSTKIcnuhdJJWOhTAhq3YRMlYNpJE8JIn/ydPHuwd76VNkH5hPk2/uWUWbbp+enJl8gTorZ0Wn4Qr2+iZF7lLB79wQt2P/pYnm8LlyXJqrFXdCCj/Xnii5cvXqsXSo5DrqPXC992vHMipR3ObiMrW2bKbmw7DBKmRGAaBrFEj5GVehWP2zh81umVU2uZtI7fEnpBLxZeAU1xo4FdyuRxE5RlGlUCAC0fVOMFKN5ZbQ3/4Rs0oVIFzo129D5Q0xmNgk9Ss/BpQ0sEuRcr2FRigcDIqjFKpQBG0Q+pUoR3y5FsqIUjVmCWFqDJlMrc/9pdAuNsW66uwYTOMmlhCqmMTaRF5Zs9PJT/J8/yuEokSd8oefwSysTm9QB5q9p4+QJG0GwmleOnn6BDIj2SnFyklYeutb4gtAXhwo2MzFyEVRsCUz9FZiai1K71hGgLthd69WeaUq+Mwrcl6hepgpfztRK+97kr4TMrzy4CqiElLzLWF0OfM6nZeYVVhkHv7Ogl94k5HkEBRhYpH2MJyfq3RhbHhbyqJTQUIAPNu+CqzYpTwktsoOectDcJXM60wIvM8R5FASBVeN3PiYtt10gcb69tH0fnY7mdnIk7n07fV7M5Z2iprByaaJ6eTA4ubCLf4kUHmznLlm1T15hymsbhZdSjDCbQRwYshVhVecvFcC6R0xw3OAgU+Fzm/y5UPjKtOdxdtIGjLEGotIqI4r5xRH2vYLT9SqWhGAgZ0FD+tifwO5JTaxEctUEenKYj1XS6+vA9uemPqrPmZ6CdmiKbko/NUswWOhxkWDm9/TnSaZ2s22vnSmyGLYAHfWLp8YKFQcSSCqoMCgVl3ALqzQLrtlaRf1bXhPeOC3KIGMRlVDlwBof0JQApfIKldfZC4GjuAH0MQwL2iRHTJysUdxxisAYGAg5JECxzuwQzxN/XPmBs1hMIqmMcBr8ab8kF2pzDkhuntR90qUymL5xEbScj2p0ZgiaEM4Dicn1jPk1esy03Sal11VIUoHzDW1xlVGbtYdUne+j6HfkevWgCxrUBddIHFzjQYMjbxpk0U7gcvq8V6ZJvt2XkGD6DTlb5BvPRsG2HdcJ0+Pi+ykP6l82X3CE/Pk84xhh0nL0EyEWhOqiGKGQVUSaFb+8EQuJaIyoXZLxOPli2SGzAeCvlQHcsGQWpEkp2mUmZZ01DD0N7UStm8ZtKqDBsjTX3gQojrJ8MRpfoRhrmI1HwQiR38D/Hs2trJ7gFx848EHQsC1/DXCIaHX1GdVv7qMJ70UaTMw89T8g6gwp7Lt+AtLcFPd2WIT3upcpi/N9SinCPPmGhcBpMduV/0YztqV44bUm0+m46lWKLPkV9204EflKnkQLtVbMWgpxPxLreqoDEFmsTRyafW4aTFracfHViCW3+RWUoZKdxiM0tF0hTWnNb1ii9cq1/sGKVxWMDQDi7x4O2FT65WH67pryH4Ck7KBGyyarITfK25zEgIQ2Ek6sJ/BWQ4AN+DnB+DZAIUyPPeK4A4seI+45FinoEQNuZMiKEvFZXQlHYrk1miUSawKaOslHGe0bks0N5s2wrgR8e7q9JrSzZ6/0YYLQrP6eXG7vTFfHLrEb3zFVGxs+iQaC0d4bP1yd+PaeH3mEMFirjajd+pjOrRV1wVtrlxd5KEUA2c6fHhKzd9ll0o/3ahEJbZe5hFIk4EBCZtXrrF12gFV4ylxC6/QY+Lu4BCBLlhCUgYrOSdvLrdaB5NQ13/cgaQZddCVLttSaq0D8uQSxZp2mvEeLli5TP2JzGoil+a0nDqKAKBBIPz4DD+GZLxYW8OpXZAdxQTmqiI3OJUTSO+CR6BMNS9+DnwDRBem/WpcYLNKl4mR6k7JyMkED32LkLXkUin09CpjCRJr0jYflMralUCugYskXwDIwEug7CD3FKt9aYD6KTby3C1OXDeeVlkT9T6ZBS25AwiYXzIXKs7nZ00AJNbSdkrWAaRQ6T+pC7kAUZbIpp40fgoX6A+F6EWGARu2qwC6WIFK1ckoRfdkDUC6WQWwLoZVXYQ1/SBMhCpUFVDOYGoysEubnFeuCRXN8XsC72doK1EbAjt5Awx+mSVtTcuWrdKKqv+YrlW2OcduCUn/NEo2I2L2pWxRI2la4ocoPb5MT7dGyQCrgkUmIz81amlYswLJGo+CMPNtXqIfsqC1VDFNbSFedZYrDQyNpJ8dpsQN2U9puRJXmUeA1LQoVHltagf4FXA2rXYgzIYt7ExyWhZu6fdaVRX6tUn+BoCkM9LdkyLJ0teKlOR/UqluUbExsUDedWRj3ZSZ8UrWJfxR4dlPtdC9skeIYdDHNt7P0rcRj3vGBhXAhBoFF/L01xAALUYIKWwRbi4crfJ3uCiv4OHhBPJzAS0nI6+Esj2UWt/W3CGeknDSAmt/VOdOCWtNr8miMfNwsDaYp9f2C0zyHsCW6IYnZbZrYsNbmvSQkKfJk+pheAf2UzjH79l5VUD84UyFN6SAxgyAeMC5zK4ytQGHLnOWtvEMkNI2Hs8AxM55xNp56sn1dELYWhth0u2zwjgxb7KZ/sZ4OrqxSZBMpL4uZxe0h1Y22d+zu+l67fby3g5MO71caUsqsJfIutEL+tQOTemUHpYh3oaC2iGSfKESO5Q0eVfhhFT4e2FoGIFF4bIDp+5InP6HYoriBrB+be55drhnM6QLzt81Eu2/4aKQ0fXcBo65o+6SN2Sm2DtTaw72BZD9uDwpDQ10wNk5esIJiWWJ3nCPKNkZwaA8KatowNpDX2dMU2NcxQHZMKqJJ9goHE26xKBqMOZKNaBZXQsfooa0iIPcq/GxcS4AFSgXF9M0TEsCw+eYc7tCk1QtHnA3hu7D8lc2Jfb4UciNqHwNwm0gyehubf4syG1rgkULy8FeI0rHnkybGnH6CvDV5Zt3JwRjc+tY0vC8JNZuTJrB/pjwJGY9rtlYIdHKSyLrRyBH8vEhu5EpYd8GzZ2I5iGV+r2VbYZTIckLVU4bVQ81rKg+K8wlcqx+DMpyiNoUDE8RTUGTtiSlJM38Y1PznsRifb/CMKTlR0HqP57VnzpeyUTFAOjZz/QJUaRkmR5ccT8jpGSowW/hUDbtpFXjQP5hkupORUVGk8h/QigBFQJVJv8Ym2AJBUqFIrHA6lF13HJWEE2SZ2D8T2BgwLfrtO3v8ECtWo7wrgAhEbsYyqhzz3aZF13aregmgUu8D346sMEGbC06Mb0o98PIGGiz9MiLiN5RWsqhjOlDxlJjhHzJIfZlw5uTg9Gc8kRA9pVOb2e5aQl5GqFdSdF41ktr6yr4llQA7RmoFcWFjHZ80XHLdZqEpzeSXhK0UqENqbjR4Dq2/EZdPR8oVj/XxLodRAI5JZVQch7YPOPPZHDYKG3yK0smtl1zbPAnoxJg8hHiV/zvyvYE2cTAwz8k78AOwfkucsd6tpMkOB+4VcwyqF8fwghpVHqTkrdgdUjnZdSEY50eDQM80oCpTRmarCo9SQWrQrWvA5EvA6yz7zqzW86NTiauC/T1apM39xsMDja6bufp8yf7jw9OZmeXJzTbrd3toTBFSG8A/ogJnWltzHxjGkp4rvgowe1ieHOu04VwMGKb4IdPEAP9HV70pxYUnBMIxnN4Dbrf5Do5MGrLyMCEWk2C1aeE51fMlaimQWrU5sh/dQ7wjxS5NdHEJ0k3EdeoUpASm07lCQGuEnu8d2cgSiL/VbSHpIKkZ32FDmQuvlv1P8BzSpzGVkUY0sprspvPalBBUMV6rRoUGLDNO1GweUG/LoLtr1zadakRDZh/mgto4hoPK+Kgl3Heybm7u0zsneROyJKMNBy8vcksBYujneGbVeHrm1OUjCe7Ghr8MNigiXK1QS9md8OKkzysuE0QyCMa0GgEqG0ZqMIq0LLeO7AIKwVUSLT5JhWw3HtlGiRp9Y1GF9WzW/5wwziVDJa8aBHmeLK/gfbyNQGlHpAXXyNUKIc55irsWzVpfqFeOR7i3AniLzpRFX57F5L/dGSkXutZlgfGkkuv1dzEx2Z5oXMy8C37TKnZhJEhifdgxJTc12KIosmkBUqc4oBP1lCkGssRzOSlJWcI2vN9wSAj8YqCp0oVkvPT6Cm5ygb9IurZ2DpldG4KxBRUS47020ZK5ls3iWqPRDzJH4taqXnh+MPMdH3DwHA8i25lr4uN+nLpzyEJF0p8pSLtSwKK7TBIyBULZUMIkx15sx8s9mmUjdTOlGYjMu0jvcxM62UYnmXb4OdK75QnlIMIRWF9uguQusFaSbQSEmbGf9+eQMcMq90tq77QpH6LA6Gjyix3rgNhMPlYi5LNS+xL2b0Y2HHKlArIZn27BqJAXQntWlE6iFBCIlNgkBNuCQXimECqQaAQe7d4klE4U6mKvcv8SknxIzYrgy20AFBBUnt1hjUo3iYMQOQi405MdhP5UhkUQCdvbGhrt8qohKfU7hCSolfWIENApA32gmzuxZ/XTtIFxHcbgakn3G7gjk0vbSliyER+KghmrAOo+PCYG3d7qcAs1VRRhLvleDHNiVHhqZBssCrshCuqLIMbasjnPzBEPVi9KlZkK26Wp3LZd0YD2pxWaDIt35WIUZSBSa4QE0oKfw3dS12YiFzSnR/JAcl+WVR0sKXlxKpTPSV4usjV0uxAkyKFklM8NdsDOLEZGMXFKDveZPbO1rKcae4WSZMSpDp1lCaQeldGN6y7Xl2Mj3GYAafg+vYXOBUFLXiankG1EEmI58I8lB3SNwv3DGK4aE5Ufor5nl4xKcpGW45GRV1cMOSiKByPLlAeOc59xd2nEu7uHj97qtTX1oFn+cKhERpULlnneOpcjaImV4rNpTTHJ2+ZlrOTU4gP9qdqUhVsZi0yWkVnD9qz88u3xyc+suHOx7biloJ38+URJ3SyK0fTuV2f63hilHGbqtXO8IRRevrE9hNKJ3UK89RHdlCTQqbeIjFrme3cm2ZvpH6+VS785zcZYyIE7iqzOotaQxLXjFIlqaT0I3Ej3OkobRlDbOhXIvvgM17PyinBsie+aKl+hvNx6pvool/PyU36+DYZT1WTS9s9nTrA/3FJdKQX3jC8FA0KdGTcKicNFBwiWu5o0AhwpJA3m960nFSeeq7GoxCKDgMuSKh8cMCm7+cjgSN9/NuZubPSljIFvGDZ06YqBwlLrLXk5KvY2gbWoCCvacvmPpeTABQRBNm4IgqEYiGr9AW7rVBO40AbWtBcINGcsmJC0qcQBa+GFCOF19VLlUHKuyxC6ic6KAmhAK225WLz1W0mYZWlDiwhW6KkCg21BtM1nVqolXxR6ekVDD+YKPKyWGk7lGsC0tSjFCLM0SROg2pd1w7lMFS2WI6cJctXkcDpFRIh8q82MqFwvXADN0tfpUuhANBfMG9vmq1WS6TB5w3qw4GRzU3takZdr69NdiZbO6lw3Rs0YLUnCpGn2HAolIoleQqlTrtqPFEcABx1M1jdj2kpWpOn9R49eqIZGD9ZqY3WgIYkOlFQH22TwsVl9gXVRiQ9vuhTJlM2rmjVRvZ23UZjKRhr1l99+Uq4uQhLXFbXsuSAb3aa5W6HI13F6qHemseI3K6Zq7kn68CUx6gMZmRaOsjkZ3pUdiLodUeiteTlXFDaZAxwBKr+QCZElZXIMLOaGz87ilr333hiD81IiZBT2FCFP5BoTfyydrusroAQGk4IGL8wYBom6kFinfBu1XaTWTZUierOSDf0kwItzlemHEe7H/nkgmkcF/aYmIn6m7vObG2yMZ5uTnc3ttUkPOy+j1T4nsYGwalujA8tuhLJGvvkYG930xHUfDXnfn7jsMLY5sN7fFOA67WJ5RIS4yPuI8Co8s1OX33EJIKEh0jO7BDJQXtsoKrMAIYsah6Rej1RbRMDSIRDpovbo+rCuwPs4OBIZwjxOODgUjqY2s/9vY8Aksuo+9ubGopnuQ6f8JpuUcfYZeXTJxZPTo4n0z11op+TTZE4sxj5hKEPobn8TgumNzx2dg9ev/M5wHOfvDKsZABc+VwngfKJ4VR6WtzddMvq7LEjFnr7jya7qoUtydGK+ezwaFelaAyMNCLZAM3NQhhRp+IMmDQ91Wm8B+wHH32oNikrNONP2WMfKZ4Z8bjMhXxDObu6ePXqa5L//NmRT7emQ1KTIcpPKvQiXn3zFXYxE1999QUKCItdc1fzi5vF3D6Zze10LiMwKiYd4q1pEYxcoz2KbffAqTK82S12pS60PmLpcilqlrp//FgzuFTttT87xm9rMn7y9Ojq/ORuduHKmzMb5Y1Bs1KiddxNpjqyxjAq0S4+VXXHwOi+OKtpj7G6p6B8r4Qq9NUxbFdhxoNRArHodILf6AoUW9LzVPtph5s+c+WTWxduzsE04r23s+dDVjKi5QiG/OkQGNJ7ul7sTnclx6JNd0yxYT6cNsu4M1PknFwDWhoqzCklFSJCOCUel1Dcy+aQqPBY5RQqUVk4aZ2dsKXiBq5FIiTKmSmKgaOO1U5UHlmPiYxMJn1hL+1YGjDGCVhhi40pSpJT2JoetdhYovSi/ZdleYrS3KSQZYuANwBygwEjc7Wiq2WYUiRUpzvDDQqnAPQOgmvplBqt9YIQUICaohWAhCu3SoS2MIcTk5+Vw0khWBfdzeEDdjhnFt1b1AwGrF4DU2R5tr9QxpxA6ZmD07HEZGTd2Ko6p/om0cVKFzVbVeO0Ini5ZYVQfTrKJ3MLIet3jDocpMcIK37SV9pVdqEpffn1nmbmh6SL0CKRYlWlWIQKCXGxirAwPNozhDFX2dPhxKbPAcauaJAUG7HjSJ3+FGcQRZs4tomH4IUYWnEAlN1SBUE3VQjg8Gif3ItCaVvrCMSdM7LOEqXXhYi9SXUPq98gPTKVMewqSU6rLtluoZedUsPJDTUoMOqvXAemsEPxWyIG6JbfCEYaUTlSX/LE0KMvl/QkjajGKUf0IM2rG95kzdP4Qi9C41KWFuE0May+rZNzSVjQpKnGtSz0uqvj1sySXRzGGudQlldTv5IbNrFk+vyu1ncppw/sumDQplzXGtibwVZSD0xD8rPELkE+T6P7pefS/byMWnADAFzNFv0jgE2wYi99uEqshIY+tFWRq9RRF9UjpAnos7SiuAiqZ4Mvn0nVotetW8J0jZt7OKKiJxsTnbvkThuutjVJZtx9SkkrqS+NWB91Peks33WcTA/JJPxKzKZRAdXLzN4H+sjWA9DVwQ+7NZBWedEpxWZywqDTs4jwiUi1iSGXZ5dYEfudQWh169N7jW5CofEEef75z3+u0mzf9zQ5qZVVeV2dFBnUFQLMuEtn5tsWJwmj1fMRn8yTu+mY3KqWFLQE2PKkD0mqQbP6Er15845sESpKILZd/WRwiPDcbzRx0+D94ujg4Ne//tztybAcrD9mes81p5sZshkaV0We2ZuzM5be8ExKbMSn1I2uUJoOPkeivYbhdKpN9BlkpsjCu9ZaKrS+wFT9ixXFX90P1KegWlVJtFKzmGqarq31GlomOdLdUZ6+Csio1mvUEDGJ3s6XgbGsdkUnY7VTU6ipgJjx5cy1ZBUbyposlAjhllFVLP7BDbEEokhO+5MGBrnr92f8UKVqoS7JjjeupHxAxVOjjQSoj7SZzrh4kcDKPWHVaHg6ZAlZ0Hko8PLH9Bdb1voimSXTcsmj3PdfBQtcRn/bP1TM+7TvUa4KVWmR1LUy4Gm0Att5bSJXAcsqb3qaAADqIaLSQDFXBtpWTTPaJlX1EaPYwUJltBmHPUgIh8J5iiCDBggYBhM/DjdotD0kFdIZeQIYsuZr+iO5pT4gI7zN1IZFWsE46eKS3EOK2OfFycz19R5zZZqD6pTKlUJSlRQBz9objcC8qUmxmrQdX04U+iP/UVdV6WYkskPkfg9CmwLcxcGDWu2ZR0JuoLw9cum0qlkI7J4KlZ+qEc9qtxDEtQX1lFCUkOaAV8ih4oTgnT8hHZ6My3lttwpY1qZA2IbIzhoMVBwChGiOMjBWFg6y8/JsFJ7Cy3gvqfpWRsbtmWp1+dvd7dadD/6pVIogNzX4AHwuBWWnouGlshnMFaYypRHcxkAm8DBzaveus8igX1Y4UFRl4zzd5dYD/Q/aVixgUWoN5UNBmubmqkBRxZ4ANJ2KhxJgVa7W7gWlFjIdMySNfihuSRo8nFpotieoFuGRl+m3VW2S66bc+Pvd2TkY6p7TMRIOQ1crzAjy2rxVFpbswr7q9W/enWzMzJddnhquSRK2q4VVcwfJPjE/voslylYISPCzssgYQmDUizWj2pHhFa84VgRHHWO0CR/XsK7Yi6VsJ4NFaJORAkJlKGb5lj9VU/eTeRZzzIekJ0HotF0tyzzVYmaArKayFJd+PjPM8pnmNa9RkzCKPN6ObWFLzUba4mB97PLqHFWZ1ptsb/hMvDgTP3UlYLEwhrzKshR+1CqR0HI5XKywTTAOdMG9cvxDCAwd6NkYqg5ZR9op8iAc8NC4SptFcSVVShUa0I/ngM0qadykEip9UVMaWVMFMdgqbO3sk/7brgmX67eDl9ImtsszPHlKIJMOR+pNQLo5+jAqjVSk5ceT8CVA/8ZQxYKq2YpOpoAVK4FJkBhSEpMcRoQogZ6yK67FLyTZr5wAUw1GVw3ZwEUC4PctZ0DF8z/jQkO5ARtgfk4wfxpJOfnjeKNqAFR6Fc6k01hDlBAAXpdg5WkkQqpc+iQkU/LOAhMkoVvTdxTcyDyVEp+pwM7FT4RCSykHm5tXNBWDlbSuEsHqBy2ZM2QKUkEkgmeJvVHUk2ypFiSlQ5FiltbLukZy0+pMKUwyqRO9LDuzJdautE/GDE8MyWgDG5hYhzJMrEXStlmJBbzPWeb1jdxNQCXpp9IddI3JJdPrMbj5eEEOM2KO1tikeeUUoTi2JDf0P3ANIIAnLKrOuyaKKmQPLaqjxPJwsA44OuQ7rwI7JLGZhHSsO5oa2goIIoR19Qnnmn56LC0jMv4thzzvkvMgTNpgrlxoPc2YETMxOt2amD7Rrq1SmUFJQ8b8davuroizYpNTI8ppiEvt4TB1pDRZkVG9hMUANLy/n19dWydAVegVcrf2/Plzvf0QmfmA8Ets7HYmfCIWrRIy1o+oRBwJTJONTsCVSERkJD1QJYqqXfKkyxLMKydEdvaRGTvweOWk8qTWJz54vZ8xdAXmKY+zyzPjbzcWbp4cX5sZvpmfvTtWng8+fAmD4QiKNJIY5FH2FhkbvTs9A6jf7zp2WwHQT7RWJOQX5RxJ0EaYlIjEKFszzBZyPMZAbinTdJQQFdA6y/Xo6RNRsKHB59/FGMeQ/9gHvE6fp6pi1f9WsyhsBV12KEXOcgPgWrgSqJFZrzJO1mnTXlS6GTkjRN0QU6NmRBXPCCNcH7nfdoep0R3xyVYXS/KxDzJVx5b3TM9eaUBMWIzf+Gp+iVTCZc4TTzSrK1smAfeWq+pnjA4yTUJSENZsb87wd6WkCsq1lPI298ArijblR5tVRnzzNBmoLUOiJRRwNZmkskHjEgD4Qg5fWk1j4zELqf0R1xK1cNLoCRMyaA+J1VTglQB0MqjZj0ZRGEHFwduvMPBwhSw/YiOi2kxhC56FSov+gjRtMUij4ZIKXGY5WqvmqUUlpOgisELSIowqTD6on9rpV3sMAAdeiw95ISqNUEjpab8r+7HSOEFWCAs+AO1C2AM3vAZjRQ3PKmMK2AyJZ6XXIFgO+goVSLHF5iXyrnIqhNCV+Qlc+8MUJauBrCdJ8VevS750VJUSqwyrU9TklzpSqnqLLoKi9Wz6c6ZiYC6bpPH06eBJbqnxQYHsKsqQujmGEsQ+9PdrirDiABrzoubSD8zEW3LLdG2W7m0JoRozAZOV6Mw+a2C+ImfgRW6ZK45cZinIjVC+e+Fu7KwRpAT9F2Ywt1vWzJlAVoTQW0TJkSB9RB/g9ST9EszmDorlu3wUyn2+oLPswXmN6Cp1Ycwzx1zSdPnbdb1I0q9dcMWKbqoqE/LQpciDeOhVRx7jsMEfJPGnq4AfeS0agls4w8Sjf8LfVjnNkc3ukTdOQVKuMomcJC8Wg4UwGVhOYLtEBU9OaNhHMdmbbsz01l3XdMkUsAyxWOnM4Bx1bA1Aw7hze1s0p7nTdGAtDoxv88VGc4a3V2axzL6eXWyiMHsFmaxc22M1qz69rpHFwMhStfnSROrngRvIU4JSnGnd+SsXDbByytuocCOtoBvssgo6UU5WqIHmW/Td4vL+ak6Z4idzC5MoxOAmMZcGGsIglXUjs2GRSawp11XZIcm6PiPpeky3Q0wn+5Od7RwRuJ1NtnLfOeDgbozqNwtLO+6Vf/z0KR3qI5lMUToBZcaYt4JPPSmlgqejo0W5ITD3BWc8GiGq7hcAjVB8iMe67DBIKmhNNnrCSYw9lMurNp9Wn7adhZO6GjOyAq3isn/al1Ki11oeXohwBiyz/q5btOJ9vZzSSJXVABxynDRLD1sMwv3d6eWloSKGIA1YRpz6kvN01ywthb1lrpQxbj2kIoZDNvY2hz1TtArk4Tq8AHOxqjGTEHWnGsGiIYtYk3SA8MKrWC4SUtsUzZsYcArBruoHZxVpczfHN7MzsICTJY9nh3RmTcRACowh59uuY4O9nMgOqWfUX792opilEoaMDWi57+FbApeQ8w8ubUAWRS1UUZI0buXVWXum76J2sac0t7SGychKVm2Pa4eKj2jAJi1COn+QJADDh+z4CkBqv3Gi2jM8hcDDVfKiuHrBalZNC5QpZoJvCh8iEcKF1SvN2Ug63LNTdZKISjnqQ0zlWOoxBGwa2kRDavOeFEpKpTxJqtwm4srX300W0ZjqlgHdK028lrllIQ4k17n3a9PDPzjpBeZVBrq/6dURochxpY6N4YG21A3/UrMoLBCVsT2xrJWTHGkPPnGyv6s23NxMHGme9EOzhCgyDbiUEv9y4cQr/Kgto5Ub2UStX+RG2jJ+N3U4JFSv6FmWyKvAonAZ4kdgF5yn/RpqENZ8xUOeNHCDNZJgi0J+j7PDwTSLecSWzcov9xBJh1QBQ6q8OMxaVs9KqEShMjyv/WwwdBIh/DBaYGQ8fHPhZueRVXZXsJqUurmd6SBkH4Cd7eonZzBzJNxI1+qOQ8F2FsIEd2ZZs4Zk2f/avrqL+43ZTdEfuaQxo6wB0S+oVKfqo/ZbpK6JN42CCR5pA0qRppZiVl83MF1qHg6AjISSlwyrayosxckc4Hs2Nmdioig393OsJqjVCwJ0d3YmudwPu+zAw94WdYbKUWIXTZ0Zqee4iKNMRmC5LYKj73xvUF6QoEMtM1f0gB0I8JgZ2HTu1n0YdcWtVGlmRTyayRi/KCQRTRkxV8FTXx/WA+OPeSknC4URJYv4U4+Z8eMUG5P1CwWyAorJIxwYs9cIo71K/MTyZGNmmBy9p2vnQ54X1s0us0cKYbZb6Hjs7dgYkn3Cfb+dpeGLq3OVBYNlIEbC2pUtFOZGFeEP/+iPfv/3f7Y73Tk/fhfMdViz66VozGzk2CmI+hxJSCIoNdmA1JK49/JZIal2Hs/2LIFWda3uIBSIN80TNOhvKm8nQaTiczzp4/mvA7U6TlD9V+nCQKjC/07gmVwzAPIVJG9sXWm70raxK5UGDDUJe3CsXKAfSJvgfg2YrXzRadXVCUGrNC0KS8hKLwfgq3iFaW/IWDkhXmOgq5CgB7BBvARCpzEEQY9OCpGiAVaLqZWlxhfyAHmKGNe5rd7y2oEVuQRof0M2wHdChqj2iMWNYl5000OnMGKFDBjaMzwbGNVhvg6O4vPWlIJCJSg2Q02nL5HGUbVeE64RBbMKUIGKBPuptUNW2+Wb8JhE8pe1K84mMHqm3FChq7oZyPmuR+UGhkKEvLvJmcmnL0qUUzKxIYAGIbp9f3Y2cimxgQaBXl+fH+7fXudy0qvLfKf10UFOKzvWSsoVzaAE/yyyRFHYC8DsVVaelqZdML93vYcECWFoghU8aXsOTXMv8YaKeig6Q7OSyMUzXKqEFZ8hpkD+oZqE8Cd5tZF0A4lTFTyQaoG/alBUCVqSiuJwptiiQaZNJpzhMbyJLMtafcY6BnMlScpV2iSXJGiqousVtY0nYNH1Oiiuq7i9uD5/tLWTDTgTOWnpl1am8yFim3PtV7enR2m1RkUdXZvEi0BReTX3mK0V1q9u7Y/bugzPsSUjAF04nzr+8qvPz09PqZhH+3u4mkkqO2+X5Svi6tFkF8GJS6svmEhp1FBcxDjLPNloA6b5nwK3IxDVFLwVuAmAdZ+ZMJze2Ny2GZMiMxk4rhVaGXGpEBxEM31WTlrTiNiW6ekyigOJcIrFPiH277hjPp8sZZXvbFAZPTl69OTx4fjwKDX1vVZJV+K8hDig+gib3LSYhOcsEcpJIFOVrQjAADNO9i6igbHUOZO7tOYApDXjkLhMa8+1UJwOWvfjZ0dGbnKBNjsRShWXtGcRUVpkc05oKbUsTDYe3O1Z5O1bVq1WFZWmILbv7Wnf3rhk4250O9wiFIeZeoNnxyfHr17l21oGbbex4qoIZ5SIORnfpS8rQLi6i4mtSfWSlwSG78VDxeQR0uXtQK/dmgRKqOAG4tmGWSaZ7bdpE45ioDsSq0VUrSNAEmlxd2yjic8m+MBmIS850V76Y74raKEcOPwaKJAA0R2ep5opByBUF939FNKugiMTFAUVGM/KSUpPVQsmEdUUVwCo6OT9jJSvDJJUHQhNiWcRQW+lUO/JCIKaEdYfTBT0JCvsXSJO7hpMpkxFaxlpHFzqo9rPQwL+//o77cOnJPC3E64vxkEu3KBYCApThJUTVYpuWTrBQywPJ+EKdvmL7gyQ6wo0/FGDwWHXDWNkKdBFnFKkf6DBRGyUOAUJo+IanQASSVjT5Oo4VNPcpHa+SfVt1wQ9jB38MMMAuRRy9iQ2HL2XOY1ykINBU/EEH3gzBpVCbZiHgc0EIZIODx4BHo8vNBWTE2fnp3V//JptWXXoyN1u+YAQ191ACaFqcxUCGrUub5GR3T0V6FnZhTbwnk1n6Fi9NiQMTbNXrv2e4c/39fQSbaC+A68CJVEXQxTMLa+QhUHlErgiuzH0a5PRfhi8thOSzCrECvRo7N7AzZvRzHKEwyKb20zS3eX8dJJTjndznwu+m9ln7TKjTBVZAbl1ucDCTKpCMmXXUYfm0dILhhkr2oM9svv6q2/+8q/+3JFUnxd59ignB7KNtE6DLgnDxaSLoSYBKazSFCcIZsLLwZlbtXM3RxqkXDwFinwPE56ATxKMoYUYYk8KeuEjM5czylTC2y0r0FGjhQHCTIcK18QMJhgh+8cpKopfPvsHeyCzB7ZGPFnr00OotTH5ll7O10NcelNimUqJuSoaUAcGZhiUuf1y0cyM2BgrV8Vq3rErdZMIUo+Pj8/6mzv3+ZhIfzv7cH//kx9+fLC7xwgxtbJQeLbs4vwSTC1FhRsI0CHzrFWe6xyx08OTa/ip1aunlNNs5ywngsJci2cL+nTT7Sa5EEFTo9uMqnGRAgSp+aEfKz9+8QJncNLsP35ZpNIpNGJjHcONutYdQjWSVYeqAw1KzwS1XR3dJmALwnLgOa/NT89+VUylAOLVHn1DNnpAiHAwkYTVqKsv5A2Wkn+cb2who8C8ipK250pDkA6qatYDQh9nUlsJQCf7aPYSPehSpszDiFLNytAwTX2Hd8G6MPwY7BpODs8oCUyobksQgolajfXKCpSKubW3Gg0+cpv+RspaTVsXMs1GZ0brSueRFGYciE2+Jhh9JzZl4so05cOMZpbr3LuOT8gI30YuabQ8Kr0jcsXQdVtC9cvsj0ksCmj8qrBuMbKSYwlLknNBVdmFM8VfepOM4r5wfICkJ2q94qSiNZjX9qQpVPcE62odObtZ0xJWycWaVlb/esbykFEoEFu8MBeIChM4s+tsfDLcgDkLCfYR6aTHwGfSIQohdJAqgxhEadKmz80Mb+h8QelTNznLFfuxpQOqt9tmNplmfSc3pHWJdPd4ws9yyiNfTmCEOseY4u8tTcpEHhtSOdOSlxW9hs/WiD0zjy9pzmA5/JzxkHD+SF3kxVHKDSf5OavE8tEWbK84Pj6FiceWX/oiG+Xzhe7Z/v7Ljz766NNPP42SuskXlcKKVaeh6UQPdeMIFPyRnrqLGgxqo/LWcq2G1Q5giimQJoIEB9Rsm0NTYeCRBN6hEIENQ4gR3KmUQmC1hZJex6rSZzeDsZzub/yAs26hmAY0NTsEZ4swJWCiqKlt8khFaDBE6t5XsV2hvHIh0nFYwzesvb+e7B/ZZjEbzfen093tyd3Zzdn18e7G1uGzqd0Tt+s4NjLetNdP2eYUeu413Ux9bU2Mq9SizZxmz/f2DxQEq9AgdwV89+bt+emJwI8+eGnbBSL1lEuphxIbXlxEQBhwTPG3J2MW1Cuu+sags3LoRLP6xTfiAy3P3e31m29e7+9mgSSDlMqOPx2sLFlFuGBGmBoR+uvPvjQioW3Duo37jz/8AzjViLKrUR7fROFkimZiEJWAxJVZEmgN78KF9qxgWlGUBlZnHTFGNbsgUAVPAtfWKXrOxJTsiNYvf/lLzUiIxMru+3E//vGPf/e3f0eh6Rxzrtm8l7XgLd9N/uzLr0D2DOrlxRkW/eiTT372e79bBZ8yV7Jm3qy4ogpXjXROz8/QzBgjEw0KHklWYc6zSpb5SyefxibxsEpLsh47u47xNkmyPt66mM9s9jPNqJdgD9vVrcuFp0hypfnzZy9vF1870kTNOPPkPgCKwmL186OjX/zlXzx7+nh+kx2PTMPVeTzmWi+uYlQIiVcWsIVBJaoizNHAVE2YUO1IEcL8eq2GmVYsYaguSwOVV8NBkufQmIkchseKdUuL/o15TBWHY+YN0Q+hKWuozLjm4Ne12oowlI43RVAOdk5oezw7S+9DINKFd/YdK5uO9YywrF4blUBqyGiTp1PpKuWCynRGdGgwNkfLKgtNOLaLouUGkvilRZT/OBJhjjhHuBJatMk08OXHT6aN3NDTTIVZTQOO0vchG6+xGBNwXfJKBMdS/xYh0i5d4/fCM/i9dirP9qzAl78CH8I3zBDYiBCcMq5QpcBVzAbu5A3T2QkRtXLMVlSuqIx5qyehgGLVRDiDhzEGZr1TI9nkjDd4VYThgNgwoWsq1RNX8hB+clChh0d4BeRRNKYu+FMFba0lzNUkILOw0qmSPPHLjjMTSQIGPDzJb+UKTSIb4UOwlf/u0aPDqjSTGSNHDjUeHXwIAJhn6JuclEjR2jW2RgiMp3NbFSpFLjlKlznaLXdY65AyZ/kySztI4OxchHiVXIsihHbN8bQBy1n/kqWop3JQ0XFQ+RHAFpNGaVGxImDZVB4iBwxPcqmq7By9ggm+kvDU7KosldXyYcdSOLzh6431zTnnqUa2t91PDu1lXkzXt7MZzvE5naK9LJtkjtD8gr0wY81o3U0DC9tC1XmW3BmATRuro0pK0aNZkZWuipARDH/6KNWaBErSUcRAoFPAWUXf2c6ckktUDd0YY7vyqveWAkYD3jPKelhdLoFDefmbLQO8HAWqdOtS2RZixH1xYSDRbE/eaUqZrKH0AOe1HAxkxH+5xNdj2oLP+GoUfWJkY5JsPMlo42oxd+lBSppzhEmYVEVwoyV1ysJcgXGqyVM4R8BlWOAoWYq6EBJZgekt0rtmRFBS+AKDHLRQzfB4wtP+FgMAEkEQfmSSkBVy8aIeUdahnRQDdniAJUf2gDx5/FhDmMqCXnRz8c1VFETucBg/2ju8/+B+e2vHaa1PXnzoSx5PfL3XGE0Pdz6bZLf7vb5CzSdHXAfWIbVYpX4jmd2+stL1wIHvN57mVT8hES6w2JOaUjy1JrC28uofx1Y1c1RRJ+/X5rsQSCI51X5LOrOPJpQ0iQ2RfsUq796J1OlLJQVvgxUd0UlKBcVSIL4ndkV9SXPtWE19+EpTZgtSkUqEGFhKqGjeKFZ5ND2eyUxc6XWeVKqePJheFCmZ6qwFUorRublhRa85V/NTE0leu7Bc7gSZXqrLsnSS5Rzk5VACyertN/wCGFxHN4UCvXouSV3xrcMbpsqQRF6XSJBV2am8Tih8IEBIh3dUP4dMOwrHZAVwmVEfxct9AdF3YVdTkkYazDgjxPSDMZoD7k75aaDZB5itvRWvBnO5D8ActeGw2rPxN7Z8Xq1saqHiLUBlHy2ymqR82ZqpTUWC/UcUQxUKEJNN1FhQcyYJSq75TeTAoXAS1PddLOKGYy6+7miXsDP8XLbFy1JPUGtlsVqUiSIHD+JDRrlG68l1eCSvnGIyV5ez7KCjELvIEjakz+BCq5PrCRxmGWu3vr9KkeRwb1mRtAX7nawO5e7OfMutzTM8XqFKnaCHpFdNCVF6fwCLnkxYQg6fV4BFWqKC6YF7wKqADDH6CTidObP7uY72vTPXI/sEs2MNhTvMT3bWKjTLqZPtpFV16lyxu3nrQ+OZr9eNRr9pja17d4WsH+xu50NEIdszKrLmVCBUxZb07e3hmsNChukE4ycTNbnFx5xKWbjeyWMWEVuSJpIvRTqO5V8WU3FSdNmVrBZg2566yN+QqGaCMKo99qvxhL0lhhL70x/nGCHOxT3IJ3aEpDETCOVZZpPqCA0w6FpZt2OuYFQigS0AzWFPr1x7wMMD4RC7AlA5YQgAIejHN/44+9FrxCwWNz2ceBNriN147I0x1dGzExHy1cAdJSpOr94Mj563Xvid7TP363vTPTrNa5vwQ/dS0GpuFDE/Iuu7+721KfkaE1J9jNFkbXK0tucbsK6Zmr/67As9E+3IVU9Gt/YuOkCyvZOzH8jD0rbB/DL3yskaSasSBQbjUqzi/5K3JScCu0Se/HB2LEiKRSM1gccxsbs7O6ZMp9PHOVYVyZRdhE0TINbuLPRnD7AQZGibMOXjAiTPhH/jxUEe0TwtSbC035MTy0HxHc8Q2yR2bEOKor5kwAOz3bG2xxrf5rSV2byMQNP3hjutN//Tnci+41iaRGEW5wlhkAivoCDTBgGvWCM+YDXsCELUm2aLL13m1uL0DpcLs6jVKov4yiG5QCaLLqffLmdCyoFvB1u7Idxr8+o7AMIRPQRWFnnrhIktpz7UEwycAOE4yY9jDSOhJPwCPaXL/zgqkidZZONy7ZJYvle0VCEgVFBpeJf6NWnAxaM7H6UR16+BD7b3ra4zEv6dyi309WCVOstMoxZhQqo7kld1qU+jGRX9sIlalv8BZ4Rz4IuAQlv+fqUuSXxN6uZeAJMh2o9uWg64PPKtxyMWRREAK0UkvshoXqlcmEVxYMLh8jekEOVyDAyGahXJunu7DS9fr3B6lbabKFslpB34wtePFF82/mBrkoTQOnLhSe5VWfzJqehsP2AIw+QyDNA1wBJvQbYqF/4gx9QvJZmtyZlDvl5s2XGQDeisl69MOE1kGJUlHGezqghmYwnN2IpQPr5pUmluXslVSzLWWtShp3seMESnkE6Uijxil7sk6EdENmFCuqeP/naIf/fuZLI72TvYme7lJF/nmLpLS88uA/NRXJc7vWFNVE9pyYxGvHxKCz9tJK0q0KvIOpDhVRl1u21aKgiT3DHLs2skGZQtRHgaR7mmEBgyqhUwTykaQXIkiUdwf+skVVDYHiYUmzaTHSihFsx2vuWbAxXy0lrgV7Gdiz4CRgWyGquxCzDKGkgDJFPOLN+aBbBdhwQ8c1Nd0UM5SW4SFQbwWi2jxeBR1t6U8WBvPxcU1bqDxee786vry5vd3fvJ7p5pwVJiNSizXnB8cXt6tZhdO5KG+beX1788n1Eu072d0+PXujG2M6BfjCGE1YC29MZd1ctaCjyBVnSUIDn0lGTyIKBDMKo9beoAPITk14cwHIdcwQFLqDYVRAFTVSXOaqWrL4VMFtkO6qlmeFCoS1iZEqGqGalZKmlA0/uDDGVcFRxdfdUgQZRq6KdcOw5lPEOR2l9ZBi0bPbkfTWkrTSXfn1e/9pKlB4QCOGg1yzUILAksUZZLFamJIWBpqqVhQ2BlK8owg1iWTk7rlW/BAYzpzaCNyyJU5tY5K/LrdYQlkFKUpuP/X+qUVBKlVk88xbrQJFygPm9YEd0VsWvkCQ+leRPIhTaty9VJ9EkmRBGcMubXCUF9Yd02/WOX5qRUGhy2aVGZfatefHheMpp2G3KYh3AwoWmvDHNlkrkISx0+DGFJ3adTUyfZF5Q5BxpOjsk0S9MoIlKSh/LM28YMINxrXIV3LIgKp4BQr2ZT3BI4DSaXvrGhlkeUBS05gmUeV0V7M3Gf8kCWjIaBV/xFfJWlXtKG3adA5SFM8ZAGkc8OrjFURleWshDV8EVfHl47RFlg4ecJ78t5BaNQngI0Bk9tko4WRQ9qWuo0Gqoaqiw7CZYJsdLNmRqSytZn26u4YGttrwWt+9B8tjAKlbDY37WjcKliQYhBEkmBpIgEtfxrwtDWxAOAo/kspAPb0091RPNoSdk04V47daA/5jrBvanl9NOrM13xbA2Y7GZofb9+cXE2WrfkaY5+06DiSnO0eJk1D19IM4GRFXzCoHL0gYhrTBi7YWdeKWjEUFTYxyYicqAT3z7/9We6y3tmoB4fPnm2oKPt5cQ0SfATr/hrQimV3sQ/9HRJVzGJx9jtEauQu48jk9WaMD+NvrTnUraKPyQ5Ml2dP7VBJsuYpWFiqTGkthByZVNSZ2vPxxa9tnYUk9QaZ+Y6ploMLpZHTuXrCaeBtVsteFQ3ArLqXJMQvKX0M7WQW9Fq1/vRUWytOzJoatdfyfPp06chQ+MtRaEs0gvR63pSl+szVxCSLvShQVTLBYzpl6+tZ6u6DkZWsNyMDPpmumFQ5cMw99PR2uF484kV1XtLPRgThRhhn22uWdJaGBstplt3m3vTq9HNwbYDAFvn2S2+dXC4j3P1oRnJKmWJpVKg0zPFj07NK7RNfHhYwtkAYNqJbee1PYqpKblPisBoQ/bfv3z50nXvs1Euhh+SFWYSq5ucsYrtVKdnx/zZmHAbuwUYHtnhfOwT7P18SIeQDvwONR1OAngaHoAsIxPlYBPu6Y3SRnRigyt/KEp7qM8SttExEMQZPf9YLM2ICaOjo+bSTpJFFR+tyTpatViom5McpAj9UNazfAXfNMCCMPt3gCl5AkNoKORSISvnDclBVAg9ZeO94zuwYxPVdVmdBazsutSkh+Sho1DJBQYAjSeTAFG7eQUQGlbZhaDiG451bKMN/SvdClhg2BVDtMwig6TcBklIE4LBopZMAU/gnenX1NOcS7mWeuXPkKvcgF9yAU0ej8pTx2KbnmBfFUosUGyFeKmSWawwL05z8Ae4Sh1hgLNaaXoq+vwVnkcwlpNq5f1uyEMwcShH0shH3et74dFfhacxJGrVuhpRx8odQLc/IR0IQCDBYKtgA5PVrDqBz2NJQBSYzkKRqZKbuoORPlIiV/KIhUo4RcyB1MwyLTWbmXVBjPmjJuPhsyox0id3jqd5zi8XzwZOXPkb0lP4EMgDNI0silhDthfAMGmeXekb69SCfvK280SbE+2KWnW7weX11cLWa/YIuaYFzQDqnCifDge2mRVLp6N2+K2oQphMUdXNp0kVKISTe1NFp3zxxReQHFwefP36690vvzJP++L5y2cvnu/tZe9Gz5E4vqq65Fppscym6ZSXG/gDIaeWK4c8+KPKs1Mqd+tZe1nGQmM8mJ6u0V7oxHCQSs3AlZTl1FQ6GfZ66y1Vy0K8BSOotqY7+3uPBnPFWKe6V7IJW1OCABg+/vhjRDJXqv52fsvawSAn2/IBiurJXhiYaVJg6YG7m0bqWDuoVB1UXUyBTeokzcIxBHWV3MV7FnABVukVLFvVa/6QDdb+Nxejve3x3qYuwPru2ubhaHowt2EshxWySpDOu8t7Jvv2gIzXz7b3ZvgxHZ+vLT58/sH1yKbycze2A7q4ZLmQFD7y4GqGP7oD1T9QClqlBcDTABLZXEPyCEx41R2yOwTdHYJRZv9OT8+tPnz44Yd//Md/bEMTIfnyi8/oTN1NWYGVC4T8xcHb169f/+rXv0ozqg2ZzY3MylQzXOqUzkCy5pSn7uHgbw90cUmR8vSTp520SBcIVTvhmQ8i635js8jMba6srO5tdN1StQJZSkaqNIXPfwiXiCrWa0LKXLVEJdkSvn/TepNfWR3ATQ8PLe3ZimwJWqhBVnZJ859zq/Llt2GaKn4heJpKLR2a3ZrILkPOM4A1pCfgKK+VlRSCJJXvidSmUCpC03kJ5PHKieUXS7qWmFNWshKVJWeB6Qq1xUVb+seShMj0vtmr2n+s+geEoKEtF+sCUoZse2cEoew9Q2QNuTw5sRziV46OSgMRKBYD6i/jFa/UH4IVhGikHahxgEKz9Vg3MHGp76SpAgaRv/Rk/FHF1JOJbFNxKWbTWnMA2Oaj2sqCqdA2W2QDpMhYUlfE5iEKMPZ1SAh64KRq8egCqlODAy3KXA38mpDtYRzdtz/tewszT3h1l00fABTW+BGS9KFvU3a4O1PdE5LPL6Rp63WLKvcqBDWrHkwT1fCQS0KTDCV66OHn5Gm+zcK6nWXOhLvl1frT9u7Y+I+kZLuZXYIutXC70s3cHTwbU6tNWHpjOsiuaegx3o3kqo43g2323lRWlqlqraJZZ3Ukeyqro1Ak8qO5mK++MiWTHDfX5t9QnOuTk1yJ9OUXXzmN+8mPfkxbUfSMeoYKKnLoxZaWbLYMBe8KAqYWWH2v1L2KyIigew/XlypCfUFG3kOw4XZN24JkqifGSiTNeKSulEz7csFzhDOumH13cWUHnFvFY6f1X819EndzFzZVIjCcDXzEhpOKcfLEBEVemE0s5rAdtuZVVQdSjLL0F15czxjAqnQE8AuHsrFBNRRTOYw6wchR2/AEnNjSGJqfNpRxbrWsTKi6cclHFNa3pnfrO4uNnft1R9Luzi5ydvh+w0pqjSNtnPVNmSgmX3Ow5nPlcv2JjYzr7qcK8WsG1tcnF+eP9g9APXRy5ywVMSpW3hASXtAG0dwZZ/C3JHSR+YUruFdObKqmXvX5nD949vzlP/yH//CnP/0pAfin//Sf+hDX2Wm22oIsdbVkBSSSE0LtXXLzcdWvjhkixuBRVcaniA0pK09n3+k9OTH+JwP1LRf1ZclPRExE9E96oeWENVhS6Xha/jXl6ONqLiONEo3NUSCUQUhQMk4owgNtskhybCnllYk+2svkof/ZWiN1do6yMqlXvaXamOSV3HamZCdjSlKSyTXYmn0yXKqq5Js8Ah/iCxRRco09jqcCpeVRuG/rkY6smCTl3gPUYF9VCdQMPRs42RUDcQmwYP9ThnK9u0zDM+bQ5g2HkRHOlKYqyWPYu5UXxkoFPNQKKOaFqWFB/ROYmJgy1l2WpklClb6+zzeE/TVmTZKVJBbOJhIq7OZCUn5DiSfKiYsiVRFSTf7nL3aKAxNfpciDEEskA7UmXCpOPgUFEo0sR+cwJPquB7zlZzPX6GcDPIUEmx1duqJajnFzNhCFWRKLyijhgWuSPJW5kmdQnrZPovxlq7DjDH3SM/OVFskUE6l0pSIYYPHb12TtRDg8lJDFNB78oYLrPKOC5MNHDkgcHuY06IvnT1fX+zo9k0nRuiSOcCl+KinZ257mPVQv+eNH3cDMdQlSsJVriRUuQHRBaDnK5CsPJgB91WRvdHtuvcoNt1PfV5mUyvbNe3d4X86pPEvy+7ub1zbB2d02X/g+cy11bxjhmHdRdMgVqukJE2rprvOVqXJhiGc1w6VI4BJqLXilOsLhCGqWPwzkrm8uz2YuDncpONv/05/+3vOndrE9Nn9qlkfXmzBtu4cXT/SMU2BMrJpJ+4O1hqo2bjo2/uiRj2R01WrHs4sMszJDrg9OFWeuP90aNYU2lEwm23h0t8FKq+Tws12YXx0hxYF2fnJmpBSyWYv1kZuex+MjY2bvtaMYUVDqZ2UUSFnwk4Du54QN6SOqw6rCFCPCGeUWyc+xH1HISA2V3PZTHZfmC0WWGEGy8WXJqMSws6pX+WMd5IhSZLRMwk9gInWM5LxurLmYjW+M4G82bhwFMRjIKfpYrFjArISY5WT3aeeZ9a/D8Xx2eTk/N+VrxsXezVk1rrREIh/B53LkwGw4ahPoPgJykm4Kc6WG0gRCRQlD+0EmWTgTXqdE1dZ4hDx99uKf/JN/ot5N3evw/f2///f/9H/81zBIxUWzt1roVKM126me1I4q49euU0xR3ZkezGRgrLaOzMJkiMkNH0/RLcqAt1WJBlXd7VRAOezPsoTQ1Ea1HBmiF3FgqjmlGuuVZnCnPjvtzjF8LHl2iJwkGWSwv2yLtpIOb3SXIW7WeqMtBaSLQxSUJ3a11F5UlfYktPINmBabQb+/yIY1ZSxjR9MpyLlR1BZRpd2cnEC50YV9a9nHBTiHvaPEjfSiLNKNjaOpUysKkYQq0l+mW5BDGsoJF6sFW/Qg5mYrJCBvMlUybQlaKgwrIsXacN1jjTwhIgHoOep9+NaUCW7DBHpZAQAwYBQiLNlUxFpE6ETE3MguF4Xl6EmmuS3kQq7jG1XjV1CNmsOnmlvP7NDmujtbzEvRrL7NZ/Lb8RrqPpozs6Q5Jow88KjWcTN5fH52xk8G9E/3D49St3f3KEy5wv7l2TIcVBYJ1Q+GU4LNE8ehLHPoIukeab3aARAh1pkIje9sWUMjPtUaU3+uOLia+ciBYy7kKiY2JocKojW0FU7HN1vf1AkNm4uX0JzDRZBu5yYxX1Zds9expnwxNrLE1MbapldkgoxIqUyrUmln7gR1geHE9QU7jy735vNLLEkxUszMWlviudmyJSVkUplpM6IYqrkGff/0hSMrViduj9+8ffP1V9YRPvzg5YsXz588e/zJj374s5/97pPnzxwle/3qa2My8/WzKzeHzhxdSl3bvUlbMR1jGmc8u12cnp/74EUKbgtEMVPp0hdRnwRXwRejOTVuCFV7jjFH8nStwiIbba/3JtOr06uT+fnnxxsfbu49fvnxbc5UTWwONNy53R9fX8xc4eoDV/cnl0ePD29sqtiY7m9PR9fr1hA0iAv2VwENIDamX7w7Gz96QhRdyETUEab4mOacn5v7wzh0qTStLCJh/il6f7q9a5LHLoBv3rwe7277vpXjNcp7deESw3X9gbOT83dvTvemE+b8t37yox//+EfvTo6VxYnPy+vFXsqCLUbf0DG9vXsuig/yd6/fshb5Em1tGDl6dEDMxGk+GAcWQ7Lktpbbb4mEJpJ709XljTnDdVThA9GgULMHxd0T463Tswu1ub+7h5VkiYIipXtHuwpknU1FRNDZJ/3qtXVbSBT2+uqa0Y2ZvB8dX/iSWVoltDQYxRHJXG2YIqkon+g9qEGiMMtxQKpZO7L25StoWEpjuDufCpq7/dY4IlfIG1fd+z7WxHnt8EMRs2f17irdse21DSPTs9t5DqnZqmefzOXV1fHFi8XOk8nho4UFXiNM18PcYo1+nfulrnRKCPX6+uF4/XRzcfho/MvL1ww5LeDIg0Wiq3dvXftovw0W5MbIi4voExPCVsfKPX38GA2mDrWFjL81otyJxYzlpKknZYY/FIVWJgVpEaRKTIgopu3H7gn5P/+j/3bPt+qOjuwWsVa4ayvO3i5g42BThUSC+mzJn1+6tPL8/OwYB059q6zuEXXSDmNxzFDvk08+SUcgbI3See+gowW88wyhDWPrSIeQMfouAKVU0iNFfrR8kqye0b9EIaPZDGhjrmkRfRUftyM6xlvAoy3oTFeYmHo1p1rzkDDIEV2Vb3AWjfCrzfgrPE27jFaB1gxv2cx8UUOHACSNR1YIrtWbDOT0GvpkUJVMsaXEbN2I2ksUPmRXcvXSvZBY6i94vu2kKrSYGT0OgEMczvYr+mOsOGq22CI8w52q13TUiLoNuG509q2coMkYCEOtEvAT7kqclRWvjV9RvfJn/s3abA3M7WvFM/MaDzvg0tIUJCyfS0FhGUgZRZvHHgwEL9nY+ENn+ar8WFsupX/vwLx/SU/iBq6ySWLiwj+5d7GNLIrDFaE6VDexzvCuyhoRVzqFysxDTEb61HgotlCtaroybXIQz9HoZCfIm+LKJfPfaAlfl1RKglf+wQwy3Z/SBabC3Fv49NERBUcH6cppN5cXmXrSuWRkhKBBRkJIsCdByrEk35PK1B+h0Ojv3YLz0YvnDo3+8Z/8XTvi9g8o0003+7hpZuPJ6GZ/8ebdue9QTjfHGqdlLR2Ra1siDOpCULpW/sgbCdJXQFuxLsKW3KmHlKMFnlLGHCYkLSqsSEdmtOOc0A69uXk5m79xB2uWcRZrt+Z8MvU3t43CkWpaRJ/+0pHbjYu77HWcOvjgs4S6ght3zvLkW2jac5HCDPgWJvaenJyqiC57sqt2jcJ2w2vFLOsx/TbOprMtp9MIMANBOLPqfDs/dVH6mzdvPvv807/4i78g/767aPD64YuPGGPljUwyzKkg3I7YUADqeJVdFEtxo5szmS4xQ3b6EqVn0jrUjqvyHKW6vcoeE3iK7qgZzaGWYrJrPS69h8x+uv1B8sgMkNBgHjGbWm1AzLkxvUl9n4N99iLnoFnhmqKMLZGdmiTpJFty9zBHjFN1xljVVXJWqtalEN91Ckq+1R8xIWlmLjcXBpGe3DxkIKyqPUbQsGq6MzHLF6ORr8dfRQbvp9aolGx/svtk49GTrUeTk4VvBvvAjvN11syoUpKVTqVrFbT39JjvdbCIKxluHqSVUMsKmQU5/M9n5PQITJBmULUc8aS/klqIYkM/elRVGoVX9VK/UKeKFF/gKjwrnbv7e7ojuneHj598/fU3WEc0mPw/+IM/+Oyzz2Sn6UGlXBq85GqS/Khjl5KcnxzP69S5PkeGi64STkO26lmu8xv8/Roqvu0AyFUYsuTktekTskxbP17FLp8B/w0OAPHMM8O7DG2CqvXPCvkSZ6XmbyxYydNRxBktA3ZyO/iDDp5ITnR0dJy+YdZmNlw/3WRC+TCLIe1Dz5AvbMIHeB4hsKV5ttmuZNFCPVJZrUh1mmWrUdzCoxQgiebZmVsvIfguHmjh8VTx6rVz97ycZ08a/nqITbi9XxlvLWWoKAwNaheNxmSZsirXpCazmrEMnnLg/Aa6nvzQpmbK3vdrE1Dg33qkIMpbCh0GTnTqtlyHdKAAHlmLLRLS1xHStDVJzb1OBR5AOyk6UTNEEigGPCF1JYo8nfXwhEFgMFOJuguBrOa0u0tZHJTFMrVQmC9cIXpxucvWFnoJpQNldiQbalxwmk6QaRcjyPH4ow8+/ODZs9//2e+6WSO6RRfVYdJ5Nm3bXejDmA4ChX4T8Tt2xRqpzN3nZ83ANQS2h+cCSCLJVIRmIkprhEXMV9S0xpGJDTSoligOPC6+ZUCDmCBG39X87mymNedrujv7B+ub745fffr6ze7BvgFJxmf5mFb2A8jNcOdsfin59V1uMs2NDlub84wm1wyRRtfuwB0fHB3Sx84HkSqEKL+kcpdds7Q9nu1BcAuz2K5WtUNiBWLC0PWmpxBL0Z+e+YTFuW6+uYOqi0hao+pn5wKV2JYj/gxQTNCsiJGFbXARsvBPTzj9CbEaTVSe+6WoXjcMVa8uUOUaM8JihHwKYDzRuqLb9bZqVjOZZqeK6nCnTBp4tQPkoTFEyoJ6JRBesdEzoxOcKeaIhdlAHx4cAGkABRh+WTejPCU0+yytKGkB091FkoAMI8Cgh2mVG96jxPyAWYF8edO2XrQt1gyFbR22OGxOfKoLO3Wflux8bDvTp9ZOwhGnx9WCmzhGN9+8Ob2cmJpmFErjphfOZf8k2thgk2rnZxc+mpbOe2Q13RqO+U+hFHDZpsPMLg6+NktlJVAxpUq2tamkefVv/+3/9N/9P/8fruT40Y9+pIPoxJXdNz/44Sfugko3vA7UKjLBmLkwdJ5vhaua6pbBo5VnBwdPzdDX8lNn2RRUZikIVEN4Ry2fResAjMR2kgzFgIRLSNFdfKFRBNSWIMX2ormav6JzMq+gJ5D2x+KrWfXXqLzAwy9lIlbK9Fse2QJpiSyxDLfKSTU4AUWU5knFd58RmtIOq6dXYAl94DC9Rl+pDLFem6rGBlBg58ITmFKrQ748dI9wyRoYedIKIZGwaW8Kz9M1zcOJJcHg05EvDmif2NJog6fsTdcRTSbrXFC7KiOBkSq7AU0oZphp7jCy0RiANYNQMrjOxVPK4P+2ewgGBlCHBGpVNbwCE6l6V+aqQ5JkhRatyGhK3geWvDUGJUUqhwnhXrEXfDRUfSGC5uLHaDCFKY8G/n52jdMzMGoitEevRcIwTS6uZutvF9UGYlGQa71qh4a1jV5TUjI1Jq06kzf5VRl7h4eP9w5ePn1iIEUzvv76lX1mB0f55paq1Bv2FWV4THb5Qp6Jd7TpO0y2p2Zbrs+ijzJCg3k1PKiVlnUTtTRJgMvhQJkrJpDK0LExchBm/ekKjJkzF6wfbWy9ePbyJ4+e/dbjF/ujrc/GO199+bkBk2VjGo7cZyuBz/eN8xXmxweH0m9jrfkrXZrx5q4FgJ3x/PpscbE+3Z/++Ld/5+TsdLy/69oiyK0zyUgSrqlSyR3iGZ6UJLe29YpqkBiLD7kaqVqNfI1W3717A0BUdNPsamd3YnTVGDwbm2c7YAJLb1teMpU9VhZME6TbCSHLgqCIX+1fUHGQamRspUmtFZrlr1QAkrbaCL8q4lfXYroITSp6a3ZRg60RiFANdz0nHAJsMIWGGr5nrHxz21vSdDvk1GMjzY6/dwPOr97rky5MyVIKCyFXxdloE4h+yEVhneIxV2I3fRam7kL0LeM3b99ZorS4tn27fnNxO3t9dXB/c/V498nkaHownq1ntjAzojtTk+QuF6dfLOpc5MzV/IuTL29fTFcKlYyZy8JI94DNySICTJlG7N3y6v662giqCmidVkEoqSpCXeno5bN1ekJUumfTr754zk8vPv315//X/9v/3YKC4ZRJ0T/6oz9ktP6P//V/Da1ZJcVniDGwpcXYUaWghJNZy5sQfjWlyrjVLNNK1zRRovErpJUT2M4bTvN3rfMkpBzsAtOsV3alwRrD95/AMtYvp0rVTKRJaBZ34iQZnu1phJ6NLZ6W1Fpm89r4JAOQ1xVk0MWl84pO4bJZRQZSXJUsiPk7gwT6qxd+7iHCUFqV30mauWCWJqW5UWQ0KaKKRemQSNipPFWVKYimqqtEbalsT2BiecgroeEH5ugUVHDy1/YTFimc0xwHnLq54vylaMx/tAfNlv3N0g4uRX3olJRYOHtTDIEexmAuBxD+dgJ4AHi2nLRfQdjejsLTFLJclHIxUhxIrrHBM/Cww4FTCly0Ubn0cdPJCqM6SoeP5TKSLjzpLQ6FkHzwN5F5hlvRkpLAVcxKFeA2PtfciFk4wylnvOxCzlcnbm4yvW5OTI76E5dXB67DdhWp1Ti7BNh+K09Td+1t59W9cOyYPqMdeaabLs9NwJ9fXZyBN/lMBVzZJFZT4SFD1Wfx1zGpnNPPpKI5/u18OlYxo58yaaX61LUCUtmpBOXK4LCnshzsNXK4dp8hCsYXJyeHewdQnM1mf/Xpp+Ob+4t3JwtWSidle1w1TufTyDRqbg1AE7Ggi2WWYUS1Ax9ucXfIZG9/8uzgkT149uawDXe3b07e4JLcoz2rusPP6n5lqmrlmpkN1mFdm51EWj0M5ur4+C1IjMV2M676V5QRf4u9cPWUZ1WiVB1F+FW35R09FlGMRA1DVXqWHhpYRlBRf2D41bW3UJ5RcihHtqw5r7Lw2nrQq9waALyhUoxNBmwhI2myoOk9k/OxLhnIRTXLolCl59bIUaupqm03MUubL31kuJC27FWS7m3KnK0kGBGScpZs7OUBIAQFTDg/l/GRmwWtdVmZU/WjhU+BnJ/Pjl+d3Z9ev9zaP7i6/9Dh4BoDZ+naktTO+q2BcQbOqMjFFZSIC1tvN27PL88mVictoSmT7kskHiesqF3SLWYX3Aa5eGy8ZsZZXjmbUVSk3fGnIZRd7wpSZK7qKsVvJypk5xnUGIVjk53pV9+8gvLJs6emB/93//B///yD5+B//s1f1nX1d/oW+dJNvrDl28vTX/7Nz53NstxbOnJ5kTGytXqVG3PVFKR6ynWuXr/vROn6fydcICewnyhGaMPAx98OxW57iX0hcyw7YdJYNJuSjMZBWGpmMKkGnIPn+/lKaqUjjV57xr7qkCAFu0moEM9OhSpaW6OFramKHqthRgM0Aa0akmNXRRUqScsVTKqEg6Q9noEvx1MsfP8Q3Cyg4xIKcUF2WpWEzKIiuMULVzeKD8pToyLS/CA7F1+/5tE1jgEz12ecUVbTaEzaVW7Re3idDJ1erCUraAEoPxo42strch2cQDVS5ioQ5WDg2g+wRWQZV8zpoieqXBMzYP42ZHIU0hnyAOYaZqCiUXW+/RSC2BJZbGg1sewWDEgGyAG5qKa8o2RdpY3hHGpJ7lldLqr4Sz8aedMvd3t7h2bXZeeqs5ubR5KYo9Bs3WOdbsCc0lzLThKE1f4gHvgZBp8bz4ejfGbILZd3I28m44ycLiyjX/pw4n13fkNO9o5pq7FYfFlqKNXWDHn4VAQqg8ZUnxSoV0nUOIP6J3/vvxqdXznec/nGjpOLcUT8fnqw6yt725bcs8EnvRefH7T2cnM1N1+YTWsQRWcxr7Qi9Xl9Mp8dPnuy9fzxu9OTx3cf6aQbY0GILWjD/KFeiq+saKhHpCcHIBoNNoOelXloe0NQuVZ5Zf6jyAy9Dg72DEH4ua4jBetcuuz8opQaYsk3xzvU38lp1hc54QQoLcFU9IpZyGAajUAXTqOWE8Jl5bxyEdZpuxe4OQaWVgYbanl8OSMgwaial4FywZDzi1PAvribWSOjkKwIT28yf5UpU2mRZ6XLcou8vHIu+gPpVVmKT1A3XRF+hAlHT1s1hlkDE5KiIYLxvL/zGS7q0vhLOOek/9Qm9rWFz4NMNH+7LjZu56MzFC/suNy+Pnix54RCCnuJpti5fPQvc8UjtyOauRDFkGV2iyyuLcgfMqyPEHXdLwtZ2ZtUM4oIVl5P+fKE6avJ/HCnXMGkgGjmr/aSdtR+IqqbIi8hzgj/o3/0j7xK56PPiEv3s8bQ+uO2VAg3LraBUCrTkkZ77CrDl0aWvafZuEnjVedNEwyr8kfm/LUNaJpCmv/F54j4wPIKqVLUZGuVrMLyCBjrxNNoKyL2aqnZlvmCCPp6A4J3EoaTOFIKJqyvNrPEWXg8QtvKLxctPrLbYmFqbNP10nHK0oDJdzlmh7LksRAk+gEBK5S/4bdL2lnzqxutCBOFeG08SG0wz0axDK/XRBVwx4pqKPSUP0hwEdnFy5RPLilFnZIDdn55mbQ98Nq9RIAlE7JydLBXtd9WLVlD5Zme8mphLDj9L/kr7dpvxaCG1h0tJyEXalduGd8RD57kpP9CeQGHzzWMk1GkCPoUbumaPQ8QhE6QiORRkFWGyyIIj7OXD6vZaiORcsFrjqZzTOsmZFmqWOWzlNLEVzkUC1UwBVB7LP54NY7LLhxhymFoSdfcumPS0N++jZy+9n2g3en23cGeZjzd2Xa79ttX37w5+ZpZmBxuHu7u7PlemM12W47j6iha3FISM3ZbJ2/fvbs8nV1c3m7cmY15eybg5MJF2lP3Pjx68eIlm2dKhFLz5XpKVq8ibIndC50I9kR+OsOjLMmYLfEqMpVeAsMa7Tw5emML3tWlSf6Dl4/dnnR1fnm9vnazvWFrtrsYQtHI3Mv9xWL09ux87c3Fvc3ud6OsHbguwWWvZ/fX4/W9l89dbLXz9MnG/s7B4R6C2RJ7ID//m1QeSlCFurCz+JktCeVrhld4jSGyqvjetoklljrvihllbhNNfXiLDLYNg5ZrJEQhHCingHByLRIABLdF0UsQmBWr2pVAE+tfwBx1v7ZB8WHh5emJwcrUh4K7gRXOxoYMqhAY7YpVcri8OiejnZGFfbsYbMtRTsIoUKaWTuCvGkFGToZ0dorQlGs2clBwIXrNGABAdmXSMtmFVYW/GRhqQArHGeA4AHmw2bxTX56Eiglw25NbSAiGGjfqenRh9zpx3bZDaze6fXZnXWnz/MZKoM64A0PTxdb9Hltjy4SbMEy73I4cUrg+v3X9u16v4WbOU6HF5ksST7iyBphr5jPQM4NIlDnNYVkjNfREOUWFdJauK0LRuAfVFevbSXjAK2DDTHZ3zudXLz748B//439skEw5/Os//VOxe7uu4zC7ndqH2NZT3Tn7RfTAnj97YRPeN998bSVYB9GCXQqnRkxoh3PfczIuapqAtJ+mEiBz03Q0DD/iuMqVGa8m9wBhwv2vnyxZxVGiVadUjt6dKiIipIBzMNsnLUpSk1fxQxZC1F+T5Pk+KgWuG4xKVrJ8qFg9+ZWblWmb5aBEqmDJto4lHxvJUC6vv9FJ1LXSkF7beUVSdOg4nyPBgYeo+KukeeClVxDwM8P0cTEs3T1u210q1Uj5BwKkqsiUmt8ThgZQDu3HIn489bl3m2GFvPoqV/1bbvFEFMKM53BC6uw0wQoqfzUZmKEVjCtxK++ylHIJr77dyB++ItLrkBa84niiM8lWtdMAA5hUnYtqaJgubAcqoEYiHJKH4V6FQ45yfG5uU3/gs/LzQCwbfzpi+Vs6gUVQHqFwWXySFioBmTMRznWmng8owbMVovq1ICJjN8QubLk9O93JsaG1g53J0b5t4b6UsWcpTJsm3DvjLR/SuNAzzGlLE15ucD/LeOX8JF308aWZBvc9GIedvjumJnJm6mDfDCPReHx4ROFfb/UifEbSfZMrJqtlm8ZoOUQilc71Lfb/y//rv7u7mn/w6Oknz17uH+3a6jbKFrYt1+HZIj87Pd3aySmGm/PJtY/HXxxfnM3sPnORoIkh8mFO8jJfxrzbejtauz59cnf120e/R57hh/zk+JgHDQ9ZHW5yzdriMAaGtqzG3U/sBSjXqVQfS0wpewpWivQOVsJMVhvYU6CcZMd1DtCCZ13IsOkym9vYGMNXSyvCY66oHeTpY40yt2azwMbGmZ0Qui3zi3OLJY8OdyEZqG0/hHiOe6T4bnSOdpv+ESgWGS4yd9cH4SpFvTm5ymhN18iPZof5LtgyViGK4DFKb2ZZUoejJ2NGxdS1ENxVkAyZfdZ4mo3seJTiV3fEOE5485kZF4sqdKYrVVZZCACi7vSFCVCFujg7t0/BTggflbi78LXojLZnk9Hd9q57bdcdErFMPd88+2amqzLasBnkzoWpZhdtr3kzOzm7ON5htoyrDNZMuFAMaKtP0WcV01ycJczprsueDepskVHAmChFQLCOoYaZDsqqg1hcpc8QGZ220gB4IgQ29N/P86EfVWzA5OScEP2JTz/9Gx3G3b2pi6ky+VkZPXn02KUhgLHlz//dvzUB8GRxb5Q/f/OOeaMD0tFcmFx3xm55m2RwMeyRubINCBXVTggnpKQ0ItXhoamcWH5F8kwJSy4FOi2juVK9QxIJI5n2YeZwMXNvtBq8urKpq5pTBsyFQZWRwgtHmJAO9Ao5f5PBADR52IsAjmqj5wSCBEY7V0aUbMwAySAoISp5x0WMckJTgrBbeDecJA/3l8UkPc0fMDB47bEt6QcJT5PE0xR6cl46xBN+c8pNWDJqllYxkN0hXfeiKjiPDimaQx4nI/D2YCuLhgfganF3cny2ufWanInFAWwhBAdHjyYbPouU/gcw21wj17bOO/kITjYyQmBpcDBpIdV+5NKv7elnU0V4RHEQ0qKS8IvCZIpY1kG6no5c02kvb/oQpkOrvA2Pk4oAUHaBrw6yJ/qFc9IWWmWhA80FRLUhA2p1J4pgaLq9NQtO5UjAumvxLpAFIDe9ElFd5aiPnEWVaeZeq9hgVETOmmiP6ccwdLUigxVanbvLHQ6z3ZkqmTm6ls717pNHb1598c1XX9p49pMffmKfxccvX+zbhHd4YDOcZaEIVqYPnMF0oM6i//jm6sJ47TrbBLO2XBy/Y7bcir07nuhVvv7qawQePHmEe3obl2fnhkOWxlK6TR9jPdQXcnuNaptMH2nqNA4u0Z7qTKHYbitV17b2PTmwP2dGdrfWL5hILBotNh4dXI0WBh2bTw/PX7157Vpbs13TycXt4mx9sb8LyaavCaLZ8NCC2/mnn/7wd3/bLJe9kOmw18Yt3CvKw3xMIMNhZlkBTE5ITX3x24IKhgB4ahTmuNQmhQWelIIT3tXqF5gQZekQftUSebrNFUdyVI8UNI6+ffvuq6+/NhkIkrmafvwRZbWbD0KRROYqNQszo3h2dmGvPPpHtzdOj+ESJkbSsqQVJztHVvf3X9kcSg9ubE3x01jYhUrdlqsCkZXjXHYXwpQFy7u7J0+ekXfJDcAzmN7JhhqZIh8PUK34iiwvRjD8KVV2cXoGLT9I7cny6zBAAUlfK0j2a1QLUnatUxZYofh29kKIUarexcVGw+emZ88viKUDYn/91a+eba2/3LvbevJs9xnGZ+3q5vhm9/bg8htfJnSsxXlnU7P3B0/2r3715dns9LcObE8l2DbezUzlGsebhfMRLJMfaodjFMf5kF5mTs15qiAF0b3Y0aUuPaYMeBh/NeTveLpBpR5r9k8Bjc5/8dkXSvGDH/zAq+uXdDgmW2N9tjSu+ppXCju7Uk2yU8b/4g//CGNfffW17Z3nZ5dff/Pq/mDt+cGh2swJWVl2rigonq506Oo1bC529xP5DS+8kyjRANMhnkol0IXQ0WHlSsOURWD3aQeixrpkt32WfMuAx9y7ezeWqxqGJwdbY+in13ZeRTRMBDETXSxNMTSZU1F5Dyml3LU+U6Gxcks7Vz2ZKmYjlGIZtMrif+a3Mlg+IAwxVWoNYohaElxNOshFvY98kLZCh9J1BAmOrEQfZUdcCUFsrV20jWqgbUBEXCBR2f1kFX32zdK5GqhOSjoqHCQqpWdaByTtaRo8+3WJuX5IsMCUoZpT/b4XmCavhHnJioHIxgmg0TbOLhp/h4ek4huYBuhXfnLM79lJqgRLvSm2kQ+Yh1RdTLIAM8aDFELIVAUY8KjBLkpmnA5bXIcPCNsjkBOb3F0Gv7WRrX4u7pxuP3504HDwoTnYZOPynjGlr/cdrO5iYH1n+z4+dXK+9/rsrSQ68VbFdQscgjHuNQaZn104O79j/eTWTnIxo6+/+uJyd+/Hn/zw+Yun7vwzxf/2bb7ypXNak1c0jJLQzjoEVGSKUMuz99eWx3w42Dd2s/hIDRiPpkQ3aqPaor6ew8MXFte200x0U5miO+sUltpHDuAujk/e2Ac4vd9jY6grGt+ltDY9Ui52NLeB78pqbkM+eMK+4vDwbKbh+VBrHdXVVwl7U/6yi/YQGz8nL2l1oKl7aNTB+ibjOppfZzMOPA2T/uXI7OYdsP3TC8YxVxRuT5yVrvO5tcWugMF3nZLEGINorSDhNBnSAS3j6qsyFUZOsA4VOirhz7vj1yVLRGhhIovyNTZneyjnqI00GfFGUC4IweH0mMFgb65KKeFvdoVTJWyBrnCviOlYnmZaBL5HWrlWQb+BSWU4xOZAfEaqu0ZHN3ebs+2DzdH2jc9y6lpkji9X7eYQC7G0QGJmZ2Z3++3MoOFat8RUlONW2r9TWhkf5AT9quORngeaFQXdmIB4PVmMCW0ZRIUxEblqO4ApvCpDHgFeOa9gFEQpnj9/+vjuiZhf/OIXJ+/eqiYlsZPC+FYgHefpzGg7ufvIgoHX7/ze77/4+MOnz1/+T3/2p4zW2B0tthV5yly2qRyNPyO77FkIReUPASE2pPrDWUQ0NYJ5wvtid5PbrxK1PLEQOUVn/qQ3HECvW7bEGLyFO5kMTkKB4UzRkyxLHPCjYby+p6e0myTWSDzV5bJ9FBcGnDwhnvXKgDkuoJpzI6/fAXnjH16lTEhR9BBhJV0+hLcPWo2gX1OKZlGJQ0bUwJYKfwkvlSQ9rByQB0mJb+NsJJ6rXOQgBYEcDEMKxE6B13jVnR4oJKYf9d26sJ2WnwOmARlUrBCGMK6zUI+p1ip1k9dkLOu6Vi5Bch1rnB6FUbJPR8s3cl3dheAc5o4riZBO1ZQMmTbyjvVs1/lq9pJalS2Lldk4aQGEB5nZjZJo3jxMFYCITfN5yW0/SRd57zGBpN1UA15RChI00ha9y4ejS3qc6s67DimL8NEHP/A18Q+ePZ2Yk7nR/mOElJ2OcPof92wHNk+4dr93+9HL+89v3h4fWyJgnLMetnAnzvr52+Oj3f2jyfRSKr3XG4dofVjv7nB/18UNbn84W79AhuufKFOfPDRGacL0tZORQVUUR6oYsxXEjvMcw7Gs4c6qmhIQKInaiYXb9AHHtUsF0QrMS6V37S53Xxc0AaIDmqUyz5iyGs3Pc1Y6R1M707SslYWoVpv2JApDPDmpODAcTwfyd0VX8PtHxYZdXKULY4dUPF5FMR4W5o0FdNhSTy4WUHk1Yu6Ey7wrd+KBYKMrQwTXVVAzjvfuTjMjx0nYTjEpEmrxm9fvdOgtTcrt+sbO240f/fgTrI4EyKREGnsp8OxYMFpxuUhN+Bcb6nvTtVVghfgB/jqOo+BpSqVKxUWEqozCusieyy6pj0LXuStGVNYSNisabL2NS+E0aWmZ8nZhp7mb923juLRHxg0lei8qn9o388qq5Xg21vQZ07v1i6sctrtxnaNvwVTnz0RCxN3XajxKr+oPIIa9ntSVE5owFmEp1nUNpggFuSznqt4BoFN1COdf2v8VWmlfPntuT23NhZ/q9bio/ipbD+926gMIWTB2ZABT9LFsYr++/X//f/6/bq/4gz/4Oy9fvvjJ7/wOu/bq1Tdav/kCN2IsR1fNIPkN7iFZJcpL2ahGEj+AZuiQpJF861n9eXBdGMmIZ2GuziEdwAalx+kX4aYETdQv8cHTWRT8MveO6yyEx/JVFwm/4nQ/6LIYowzgaCvIMJLEaY7ggSdu2UjEx8EWhA9y6BBPYYkaYAq+H8FWrslrnAIehuObcEE8PZpp4AGmkQ94vIZ+NK+OEAqRlugI5OGEtOtUDeypuofw9kBVIpjbifCd2rJkyWbDHr25cgMeHoSljWVa5n2pAYpKoLKs/CCb1I7tcgHzWpoNbCT4AVODfJVnELbrEMBdNIFCOl1HdS6eDx0wWsxTKm6Ab5wPs+X/FhEN8eDZYxT1M6BKCTJvE22E892eEdOJfF/e55x+8sknjx8dGmyZBsP6dd/Ouzo3V2ESyZfor03aUz13pujunx8ZGB1/NRm/PbaDjXjTEYxDLkZ/6uPIJvQvLp88fWqBwl00Tz/4wATRZ5/9+u3r13LOKC0nGa7Ozk4eP35Kj7HVKqFYYc7NiNMS1XLR1AhAFaeCdIxIUZpAy3xYivj0Y8Bk9kz2GoJRXjaf2DCWb2xbMHMHG33lRnHzTrUJuy0WDuSbS+Wgwm+8UssPuJgsOCCewpvp/EjtwCHEayV83xIbPgBLHi8RC+GCF85VLbJhejDQlpiIraxrmteU6du3x1qPj8Tb+cLW7EyyIqCKGzE8csdYgZYSTTK724jhv5q5XSLiyqhkm0HplXSIfJB8Xdkz6+NZSpVGiTlRL0WDDmK60UUp+tOsqsQl/EWzHBmDdmIx09Orp7lKSHZ2sXyvFt423HUkqkWutZRGDDl59ySNNr3rHU1GNxbM12Zz56rou/t5jkvRbmu3bqAy2LS8yiRkSUD36Gw+OzOBbD1VJ1gx0qPWOJXTCkiqkcQojk0N6LGbHL34JS85FrX4EBK8oQ3BRc9SIUiY7mmprIR3DykKPUyGxM5mk9Ckmq0ySfDFl5/ZXmSXoFgVZDCqeiTnSB3mOaVuhvQ//fznX795TVR92u7xk3TFzr5+pSeynArpBMOzCfKEVCAPh7+eZFUIv1J11PdTDSHA2nUIf3tCK1OS0uUgjI6e7l46NjmDvxLQAq2cl7kLkLDz5YctPOQKq/BlZvWzbE5Ze4oruADyd9qloFfsCpvf3+A6uYhOC9UA1GibS2LRoJbENjFmPL8P2UlQ/jCKvwhZPsAMxRmiGj5trzIqUJK3NDzZjZCyBm3mUbLJwm66XKYgJDRXbQazkq9Y1cgboAlrOZODwHYd3v5UXEnCEEXY5bp8lVGVPS05d+os1QSK/aUbEVUZWEiaafxKCp5kV4mWtdPhoiCExuaBrDS0JsilnzJtNaGwIbBpiO83OOCtAjQ7/YYlqztfCWHO+gfhM0uaGbbQW5s/0jeHL+o/BLhYgMUfHT46fPb0cfrvsyvzUMLNvt0773K3BdSJojZX1o1ty6IvD8abR9Ptr7TXuxt3EmiUpp/c+31zeeF79AcW4Z0PpYDsEzg9Ro+NaudnNpZbHKWZswVAu37z5huzkYxKcckqUbYwmNAn2UrFFGFE1t+QHp6RCp2UHN1J4VdOvMXtzBjRgrUxOP0YJ0HSSSQV93Zn2Gdv68eWa/dsbZnu2HwYDqxErjEVDatKD3/CpY7CDR6vyG4wzw4RyPF3SD8bvgG6YtrfapE0GvFk0ANaeWpE0qaChYoIUUjawIoAep9A1WWG5l+Xp03lX39pnpKgAdpGki5yKVb86JBN0xMpDz0Pq9NoJgktcfk4y41BmCfCrq/3dUfS6ViYH440po4KD79cwFRGkY0VtYHCG1FdcKjIfCfkt8ATnMVtoO1AqhEG0SE4CbmwSwkMEK+utyxiXq0vjqlMg0Szyxt3F+uXJyZVDI6MwW0Nmirsu6ub46ub0f7e3Xh7bjATutNxKkG5ywJlVa4cEeOcrhJkQ281GexPprlePCpDWXiA8bcDJoTz2k243rIg7TUlsi3GucPrG4Zwf2/HyqKF280PP7h0zyeHJ5kCoZ3kzRjPP/7oBy6Csq3z6zdvffydKd/e2TWdaf/Fzu5+aG3WeLYw9XMIHMJ5AlwTLGgawhvDkHZILjxUBOu32kxEMKeBqDMMClC6gWVQdAkyqV7sa9bIpWu0PVB5be7EX9LKk7ySy/uy4LIkPpshqreKho+rroEoLlFV/DyrXFG333YN8O2wvCVlpUUPN/iJf8W8f+DrkByRS7cKDFxF969Yb55em8nY1YGNZIgaAHg47AJZbFhm1+FoI28xWuLKWKi5ZmADDLS1p1sd8ZNROxgESpLXDFpT9qatAYZXkEJE6ig1fOPkVx32H4EcOYlZwh3/CpXYAFSpeRphheVTTcLbL5xrnO0RJVOuUXkK6SdP+xs+L6mmvEnbCD1zT4TJlbRJBojFigOgpVEi5dLwBlTOAj853Lc9Y+b8jRUoOws2xtczGxP28gU99iD3Wc9y4tT4Tzfser43Hj8+PDh8t2sXoL49VWEKz8ZpX8r7P/3D/8Pv/+5P//W/+7NfvfrS1Z+/+ur1J7/10snKuaPFccmaiyDXFgZd45qbyiVA1nWsLdnjl+M1G+v2s4AkBhqFW9hspkez4qoSJcJySQyj7uaLbaotJ63pYZdi3FIc6tf2cmDW1ORqRJl73JRhOccuaQgID1d1VAqt2naEIvXerpstmW0+S8gjCnLPcL8cbH4923XUg/gAdSoVYZAnpa12qinntV21EVFvhBKRkehQxYfNOwBbLewdSIkybjQ7rV0SpIBxygg/eFVF9xMxph9kxm23c5WoitSdfXPwbY3pDwtRjINSpHUjrMnuIsAQTVbNBBhVtvJHbMK75l6Vtye3rQ4plL0kSL24dPfxlqEhnM5EAPYayaSTsi3PVX+5TdbeGfzSYaoVUMbBQouL1jfOXtnn5XsOqnTz/nLNn72hmpp1IR+PvL2fnfv02d1od+9wc3pwkQ3KRAi5+jaZcso5yxpL4Qk8BMnTKBxzlELR0ilyrW11c9HGw3UBiSU6MbwgY/ipKqVAaIN5KotBlb6AtMquP3EzdwV2bpXTafLHg6MwZLfk3f3r07cb+ZTvKMfy7ZHZNvjPaXc56YAFC9fC0XnwcwILUTLm76cQgyGxDSmPhmwAgUKSecldJY+m49JrLQUNQDu+ub/RVZZ4QxeZdETHZ9XSs7dGdNbQgm/HDw//kGPIoH8zInufqcCEFyWSwJOn//X0GhS/qeV0ws53aFX92s8G6Cf8hTL4hxD+oeBiVSSXwJWKBznA83CNBL0DksbQ9AMQrtSdHT8PV2iqXIWk03ZeuAleMDCJ4ecRpaNs2tr9CWa4NATaL9X4IPmAvNiDqe+1TIdA1ZSAVDavS8gS0MKUTAWyBa2zitRQhwAhmV0JN/Ix9QboTAU2jCe/qIdOYjibmzINhqV08ZvvSn2uMgrNjW35rGbDzxVMMqq3PGFrpx2YDdMI0ijiz35Cygpuf+4tzaoQIkEs3Jc/trTErOVeho07q1DZweNGm6uLdWddaqOjfr2WFiuiBTpcvLHhq0JPDx+dXM6PXXTrIOn8xnUFL589/d0f/2RvZ6KDu/OL3WtnIXf1mKMjdHIpspC3vCZNdwFRDktu6qq6G07rZrAyH7i4981aXE+S2oFpk0VtDDRrYSUzRQasU6gIPlRBJ5uB8maK/A6i3LJkQmbEAsg5Tcxkkq1j1/Nzl7jNriRrLoUD3bJSyWZFljMHGFsBCeTUHXge8FynlbBdh1RdLDW+EFFC2tNgXmlzSOhQww77A1lvWy0gZ0quHh1hrx0HDRwmlXDS/tSDbr2C0PEmnWAoAmDP6KQb0JCRWLKjT+Ccme2CFpCcX7o+swUg7FEqfwQnwuELA4s+pp2Gn5DYDToscFwXAfdi7GtDbDiJCavYyiuCKnm3I0Xg1LLMetJV6dirFv4wKubKHeekjwxmpyfVn60KNoLng9DTvcu7ndHe/O3d9H7LrcrW+u6v7ycuPd40ybkxT0KTHNPJwZMPpxvXvrx9+MgcoR2mOo768UyFLSplYcJnQquHooMiRHaIVCglZVrQHKatytKFTenqsFqUSekoz8FcVWwW58Ic/y2n+2zphcXTXEPt0rL0n20WobvJTNkssw/qyDDXMUTjenxPIwgh89Cj4nMev8SlKSh6wvowq0n0Uk5IB+JBg3UFCGwBFcgPVji/Kg88wdHuK0iLyWAo8y119byV57I26TWQi0wGZvJdt0PW44ULo10qrPx2AaejZ7dPVuXkrqEQwszKQOCioM4uVCKhafM0tLS8rBOiexSiRMIfsxkYS9KBX2m3TJ/w15PHrENKYA4hwcQpNrJLlwBBaFipuw4fYlPqla3iCXQm7DLhLVVP3angIO8WTUcms9RpgMuDYTxQDYH8D16Lt6X9gYBLsvCjkWQLXFQvFmUNI1+StaXXgRGy4GMB+jTA9aogrFzez1gmPDiWzitfV7RaVhv8CtUu1bnclBPakraAeQB0CD/Psk9Eq6+t37j1QC9JOzBZZeq3pv5T13Qoi0CGM+fn3s48rR9pskOm2dqXtUgO4qV78IJpoaSdaDwOJeFQsWgV4y3VWy6l0wVrUFlmQ3xUm06lNgNEqU2YkIis+5Wj16IgNUP2wVVGlro3ttxjkeMZ9T0F/URRPoJoH6ejV/q5dvLZOri/vU352t3pwoKvv/js4u1P/vkv/uavP/2bn/6dP/j4ww++Pj/+/Zc/pS+I7Feff2n0lpmhNNJcTIK1/z/O/uvJtizPD/vSnzzn5Elvrq2qrq5qO93TYzAOA80AFElIgviikChF6FEvMn+TIvSkCJEiKCpCIEUwSAxIYDB+Gm2mu6vLXp/2eJNOn+9aJ7Nu9wxCCq26tXOfvdde9rd+fv0W6kxQYEbXo7hgN9pbnR3KPbtksICaaii0PDeWSpah7mt/WeiKCRuBebfml5FTGIo46T+xZm3Rgzd1ESedjmenFjCB9MrJDIX21L6rQsnGxNwYYT+lt0BmLgcbU82on7i5T5kUqUxfVlD5NZ9IXF2U57CkVQN+6F8R1+I0JOrdaHK7hD9INFi15xOTIgI5o2OaUSa6MEaJEoLHSTC8sXU3b2FdIvFqjpcpvk1RpCesELc/gSLam7ZpZxft9HZMlWZc2NHjumJQjPXc0ycAEKw277jl4F5n0p8svzCX1K24hrntJ5gE05OEP8w/5UKIcmoxacNE0b+WD3O8SMS74tJZ8is8DFPguGhBtzrbG61Oa2UZuVofXTV6063mDhlr6WrNSWeXzmUWWMkOB7s7bsYj3iErl8vt5aOH+83Og58PTxNgzOwLzQUxqszkcq6xwjKGRel6M1kQxT/7uYX4CpVyhJvJwsNlzghksSQkpf11TbmLaqyOQfBmoLD0GDm0hKiUFSeX7aDmDuU1/vZ4Ffgs4wElh4M1FUHTno8G/cB9GDIE5NpGM8NCVlbwfHMS0lpGKipaX9bh06B8UFqW8Q6XmW0oSgwOx6ytRKaToH/3qTyLpKgj9cx5IKAlgBvGFSXP0hXCiovPomBrsPUy1WamPlUABSrihQlez2yixjdTaC6wGPJ26/CPIg7TqwZZy2OTo7uAbUV/sS4HmEhb2klVstZogvjS2OjhefXTWFPd5ggoLk91raRxAcB8qbhKYOpklK7lqSHLk3qnFTn7CsmCTmHbi17XRFoCQNPoIZ9Vq6U1hi6pIFdNspnUGMqjr7UiDTZi8+U7h/v8BLIQ0IR7qoYhOrCUgy2Kaki4BJ8wZpS+l291WPOykZbmMy7vIFJFQKtFbSXOTdgoYGRogJ0aS6tyQA6LkSmbASPdcw/UMJs4x6qMsEq1UIOlMtaGOuYc4wFjmTI7zp03oGuGqoKNGxk8QR71UWN8ozGQKM5eL1qtJvJV0GnIWAoxM4qbmZxV/Fc7WyyY3JyD5ZQLhDfCmY/jzTuX01ZYWyAeWJpGS9vESxVvTM0OWtTm0UhfClnlAwfL61txVbKBX2bx6SfOuFrPgpFZgxOcpgFOfHRZJrdIf2XfntGe3PCt2KCDCrTfMhfvofROuVtfXQ8ZKycNTkXVb7acUK6x8Tjm/Ye0i76Eu1q47o8GQHhd+KXb6xbbld27uMvr6cvPP6ZSWFu8mgwuxv2uYJBcH5z+EzfaA4ipRR85cKhJSRcXveHNePvhI6EtEMvx4PKms9xubC1v3ppVMdQNe1BKHDSiuRXGQBAQshO8A4ioLpy8ZVRsJY8keTnBCyIHPgm6KAmyNQJBESjmwk2nSZYjvwZ3kzClHMGOzjmCl3daK8pGEwOfomzCo4IGVD/gkaFMcmN4g8dBKMR7ecWqxKGDkOe3q/3WNrmvbG2R+XyoiuQHMStratS2/Ir3dhgeuiDzJcSR1ZNt0AAgsiMPPO7NGNkbhydd9Adapb7x5dQUZ4Vad8tr9JpoMu6Cy6aQfjaw0rPptrXd2mx2dprrG8v98dl663KtDbeg8UsrUy1eIGA6Yn4yu3L28ObmTsKsjycAlOMkmwosoK/FZWNhNJzaa7ezjZ3CmIeJDz68uszisiJmE0FEgGo0sisr/f7EVtl33nmSthaKq+OSflIF48+xGNlIdSX2koly1poj8dY++MqH0R+KZ39z83R/e+1sdP1mSDm6vfuovdXsntpud3s2GB8+3Fjf3rpaWB4vDCYLg/0Hzf7gdKNx01xbbKzcft49A/Ag1Cw6XxieYeycoiLN1f75ICBS5L/pOLGeE8BjYdkIcfYxj7y2zClLlK6B8wTVl7loys02ZS1sZRsZFhPXoWHhqqKMpUTDYoTYV/BAa0wTkhJMAyMFWG7XG62Xr09UYAnptZXjim2CcBG8o6MDmDRwIPnU1XgprhRk4c1vPKxP7p97JWV0CzTVn7oRkCyoWWd8XHigHKli5YInZcdkDu2F31M66SJYG822QFSC9XYlPt6XnJaB/tK+imNdvQ1ZQbHcg2FWRuQqgoT/g88gN1+AiSAOGx7DYcFX2Pj0wPjLAZVrgSfWnJbrAuG+FJ2OKzZ/fiHV+vMoa1JXa6mlrdazpA7lq9KzZCsllx6Ugoo1CK2NJFEelPansymu9qtQYk+UpQI3CqlFuanJ8khliYaCb6j3aZtO6bFvFI6CIKlpZ07zM9jhGUwB1ryIZGmhlxH5cp03+L4NbmpKtrukyjws7UEb3aaZGlg6WvNra/RggpAmPmxbk3ylVdhNGdwYDYqmjIn/AwppRik3TfWa1lIPKmLyCa6flGw65ZdXfg+LfBw8qHw/3UgpkyY5xHgOhF5JBg34yeCmQkB5WkW0MvXFET9Gq5RdWIYy0KWBzKvz1kUvk3osSIN+y7Y0ZmsHYTjJfEXpQlkkt8Cwzpm3WBGkcoRaON5oGaoahOEKmfVP+JHpsIfrgMVwaeiCGSVZoVnh7G2tiiklG12xRKIc7+3tGlv6KrHrtBM/ARFIrc3G7EpUipDetD8pc25EAAA4L+Q6W0kKXcc8j9ZbTgCkeSG7iMsRXYI8tr7QuwQecEuK4botKur5WciUNs8LzRhlGMw97YXJryyZ0SvDXj6v7zMjdYb9dpccMJEfiiipsIn5lSaXZML9grrCHhVUCGGYZdhNKyq6E9cXINXs0VkX2AATaLpsHdHxy/ZzoSTkCRNX5k0hEpsMaRkKtbGbT2CkYRq2tYWdvU0BVB32NBgvtJrUX9mXvjS54qKGfpsw4Of4zcP9I5N4dnKKA+BHh0vgQZBFV8YEiW212tybzD/pJA3D12XrOoQdhOUBDQhUVYG5DkvmSCpKC+UYojS7FKjBPikkPKeExD2CgZE7esGgxAErsrHo4ETKtLXlxaYThs3KcDQ47/euV6eXLbRROK5Ra7Pz7tHji88+LdEOQAqYszlLVUI6Bbzh0GZ7fXQ5RmfJfjQG2u3AZhAFo2qA6TBSd2swLaztJEn6vFjusgfCc7AKMKD00J9wM9mA7N5oZAKItRmuGzoGGxbT0ayeiHxm3kB56wRKhCCBl64X4QpU0BwR6zTG/EYZWAeu3txfVebeVUqb3rpJD0vKFGRW8lYHwt2V5GX9Ah4HgF5JxiLUo2A62VNsgVwlwFNlkfidVXdfl8JSAagv5SvWvYf1JvdFFQm7FVTHaJ5BiaDjWrxdVWhAdKV+qH2lbbWZf/f1/2sGn5X2ZHxUpOQiws5dez2pJdRe6HhhEtGyVOfnL9UqszVUHxqR+s8f5Rgjz+elKUdNBipahHJz91XNI78kd2osE5chvps7MCddLcb5u+LdFFXGs5ZWv/KotrC+leE+1RbWq2I9d+/GnEJn7u+fkAnwgJpQ0YqicLVQbVGo5MAOa71WWj8x49o2dEJrdrXPx8e38ugRLjPZaqPvupx6y9vYw0vyEy/rK+2vT2rz8pM6pTTP8/rQtd7kCdAKgGTRVOTulSIgOk9MlwbXf8Y92A/jf3vtwCoxi0QYtW2nlLU8u+iyFERhZ7GPL52FSKS1NCF+57r27YocxxijLEIGYZKc4dRmTzZ396j18RdQGzVMo90S6I13BZaDSgBrr3nw0camE7kvtzubaJjTsiajKZlgaU2Mhq2rnLU7yUn2iCfMjQ2L6oASKkfb+QMdmPxEjupdMJc0J8wbSNWaxtB0yQ5RbG1sOfp4s223WBu/AFFZ0jCXgTVKrvfzmw4qNYxQBeqAgSSDbDXn3c185P30lTygpI58Qd8lV1lB7rw14q61fOSqfgI82OfJKHEUsfpJS07ELUtJjSCkFi7Do0ePcAxQITBA5iHZnPtc1LlkRCVDflouPwvQ0WHHiEWJunTV6oixr/0Ug5fXwyvo2wG/0USoJ3sO+NMB5uX2pph+y4NJL2YN5AK5MmdlyVxGr1Sd9+iA1ZzzggNZBfDqyGgqlBvUm24aiEA3iMJKlH8gPZgZ8Blr/0IrYtxXSbaBg86UxyqnWRpNYcGjfdE+By4S0cHosmI3d+nf+FtM6VmmV5PupNtcbHR2HzROW5YTPSTgouU1c9TKijMttK7b+5tr9i+sLfcHozevjkWndJ5Wg3hNYl5ChLK/ODi2rMosEzWFEczCV1CdtUxcwW9G2BSYOIyG8GPeuvewDr6RMSBSyV9QRzDWvBB5wuSCPY4wOdQgR2mDUp8LVBY+JVUWwpCP7pIn96/u7z25f+h75Er2fFyhJwrAkgLGqT5qtGQpecoNiIazI+x4VTBvsgbQS57Mb+rV3PuSU09JeVfSl0/uave45vEKFEjJA9ZKwahVwKPWUMYijDDyiniWZKzmRb/1567SX8Bx9++9va+rchm1DR7WPLUXpRmpWP5Scxat+3k5JKTyqv6UWWW+l4kMOM9T/mTa6kClIACSp7WbIMMzV5/70HMV5XVJ8mTMy0OgUKrOeIJRVyktvGuPe3lqC+tDTySf16u5cW91uJYMach9TveqU73MoK2OgCuNlo0XKvG8KiqBOKKlzbXZvnJjvWFR3QfQywyqxY3yC7/xZTs9l5QWq2apyE8tryvBfa3aDGcoSpsVInlVr26kAOLdVNQn9aGhD+AY6fCOmbf6PFCUQwWdznAt3o5TTIrbZaSiwXB6y0+wINnpyFaX4dLUVtzYCJijbBnBdIvJpiNCL1+tCEAL2Tmdna1ojYd0pe7Dq6nQr8AfQ28eREoPES3IISO2th4nK7vBuPONEip3Y3PdSfOkIE2tE6TjJRHVUAgwE+oFXJRot+hw1I00Jd5gsXJN24hcUL9xg9l1U8g7Zu6s0JhshBFIIIn7VIuWrVbnav3kbe7SgDqb9b7M4BwOvTI7Pgzo3w34/Se+klkxyKQqdNATb6mA4H33WuU5ciU8Ff+Pyu544pXMqpcN8HCXABTYeaVVcsUiUKoOnbqyI65E0kLCbS+Di7G0/DFo1bPxQCSkNSYIOClmgkC5uWssUU3fLovNOV0W7WthbFgWHLV2s3y5cNVAOqjfY/8iEoYBzzdBPGV9RbXgUX6SqgLCZYgie+CRuA5of5FwI1iXHf3yy2cUs2siMlgEEdcI5g4PmbKTpgyxlKmhxUhxDCUETHs3msxagiNq3JpQzGuL4XBW+4vj4UX/ZHDW6C6t29/nmLYcKmwSgNUVRQuNLJyR3zdXYiy3tgBU6+JcKLeLgbDHdjWs88ujg4RFneaZSSmqQXNZlkYw+FxRpFmmI8oFSWdlmVz3hoPb4UJ3NMgaw7SJrTGbBgVF05Ykb0DCzBrVopiR56Of/9y4GEqiFSuaYTJNav/1X//1YIyKnlLNHOpSUIa4JGX97ZvkeCvJU7N5lsnBg6YpaU0Gp9AP1hi4VIZamqt8fiePq5eFmmA+ddlg+wdLFKNj3pcP9VmhbpMQJVefp2614CrLSCg23c9Dkqrlmpx5WL6qX9+34W/f1DLnrbp7rbB6W2/qtZSXS331Sz89vP/KfbKVGU1D75IMhdx7NR/tQgjC9qD1hvK+8FIU1AB3xb6M+CoGoQDKcWiNxht3FfJTmASGy1XA6CEAhInyhTVQxsHD+6bWm/pEFZL7iv3vq543/q7Nb/+VX4V6ZPV5nkkoDJdOgez7nB5DE5AIBt89UTgZRQS3Egq6USMw1WYTWJmNSq40OPkNR9b6l81Wcp6U1rr6XE43Zslz9/VtSiuz4Mu/3Wt5PAxeUlSVq7TeZIZtmj/Rq/RIIwpAegUZFdaSFBVX7z4POs0w1RwD8aD8fenuPXR6x3jCQMcds7XSCkurPsELotBYF3Xpcm087XZb3IcaMOq6XZxpNWMspFqwtpEBvelIVkqGszDeIpXccPCFvsDBxtb67sHmOrTLUYIBIS23pLJ2YGRbmCMCRd7OP9NOhYlArtuLU9Q7TIUagyiaHWHcddIsBKUtLNvyxZSY7TyJ9jpnezO+ZVFn3O6S4c5s3kG7Br+dzJRZyPiVDKWEOVzUe8XU/HVO60+9leYll5NTQD3pPGxU+QlxFr8iyt+QK1VY7aMhy038+9P7m5A6zyVPqgygCs4jYr8AmHkjCiBZHPh4TJazM/lncIaM5ZbJgP9Ma3W31VnfXMEcZPk17L9dJtSuLlIkLtOe8RBM/ylai48f6hp1Y6HzzXabmY1aQQtjaRMki53PPJikrNIg7dLronJiZ7Jbqqx8NMzCMI/mXjZ9JWnTHOgLnUuaxWqKzR0oWexAEbOW+sPBTmszHh4rV6OrbkcwD24C/ORvB5NbA3M+fTG9aW/wCwmwxgf72kGQyNAqhmS1gZuc3o4at02UcaXBDXJ5veWUUJYtTeGNSgq7iv4KBBWBpE6ZgS3iXwEwbBXIwV0VECEasXU9f/7cKSAWYTTaDb5IwxMe9TfitpRAXxVmgE5JZtYrU+NMr2mf96uBzPQE7Jdy3rpt1A8fP65a4HykEfNPyx8/FfH2k5qnPrwDs3yiGuntnKWofF47ZpFHQwd0zVTBbnqKaMdn1mSVhMQRDazNWsvb11rI/ZN5/oJudEWvClZKdXdLC2CaahRMp0rVIVfYt6DtEtrSIskol/8Lwr3FmLNIezQfivuep79lYHJTh9ifkjwBQ/d9L70Olao3NU8aX773t4xTuAmv6tVNeV8q8FBfqKQ1tEyenuZt4ZDl9zw/71pm8ODBmmrhxjmVhvbnzppHEqyVe3IVri2p0O8yYz6UfKR8yU3lQFNOSfcZ5t0seYLYy1dZevc8xR0khPNIsjTns6klhfZEQKwNhk2kWiMsKcSAxtL5cMYTvxkKl19Sgg6b4vyX2UuqUKUodfgJxNPHu5+l6vl43j9UVL13LR1NT2vOWqif9ynFFuVDJWOe62+tzJjSxxv5maXJ9WWCZec+Fl+9GVkoyPCWLAXGgTNjgh0k650tTOLKZEqrgpXF4zpacrJ22R0PN0OvOPitCtMehxV6Kf209ZCEoQcoDb471EeLQPS8U0rHJHB5aayTIRLKWe90qg5XXUHa7BsPXenBgqUiotn0uqBAmFCq4IEjng2DHfATcK7CleMKTRjYOrbMnh76pBbrxrCnGZ6U0Sy4NAuh1ng/kikHk1KK9er+ebmf/3QfolJmo3igKXWe5FdvgCcqXkJBdqHF/aPA1X1j3Cgk9CwnYyWZP/dKseug1FWYGIAT8pkdAjJv7e5xRTEDKPNwdGFT10a7M57y1RisNFfWxF5fct6F+Fhr4gYtNxfPzy76M0EjONlykmgheeJLcJ1Yp5q7iQsz0hROgh/ZCgE1sgj/BVVqRmlw5MXcZpIzH1YC+kOwipKnJGrH2DXCX8cVxVTi9LTfCJk+85kppi6/uhVxfXm2IOZrU4uGV6PJcHY9oUvEL90sj1YE9+fI37taaDopZnm8fHU2PGf1FPEZrYIIGdeuLyd+LizPNuxTF0DX6C7aVm1Njldaiyvi6y5do1sEG2XF+zotubYRvg57nYK0Jyu04N5i37HC86piiUJ+coBWORvFhyN8W9kaKE/pVx0eIxJEUVlVKAsjiDQXVnaV66dPDO/r4+MPuDrKaLSSu/wziP6BIGBSAUq53hr1XAtk1Jt69bYCZelAWamyh/AEdqk9dSVd0jFzFKhLSMhkJiNghOT7EoHU8n2Rf6X8NKosuhCYmnxdCpijcmABkSGIGaVoTspiTJ9CHnxUYAQNCOD4Vzs1L6u8dT/vY3kq//3bf9eN/HIZRAArEafNnIdSKi3tc50nVd+hDxnm93d0UePzr6TauHQhw5JUuz0vBw0yqBnb5K91hWKl30CweFLcNV4zslvVHtXYzxUz73ctqpQ977UnfsqvQPeAozbStd7Uukqd+a48DIC692luypr0rWk1DibWTdFNBihl89A0abo82po8xaKgLvViu5Araj073u189eTo6Mg1lWlYLTzTmORz/3lr8Gvhxj8wYNjBwF0ra+b4rUX6ScNqgX66kdxIstXGuKl1pfxItgEpRGsB/BrcKHNyclhUIo7HpE2hQIpBecbXMA5qYvdhpNEcFmN+4O1WM1JTG+qk/cwMsUEss3nFZOWoef5qL4JP1WmDagNyvEUsxlMsucgXTlpfWsDv07loKccOvdDAuidsyT7luG6uLfNLrqdrWeEkrHgq5tCKIiqmm4bEMtBxHQ2jYL0ZqPhxFWDImuE3a7qzeyR7aCeXM4i+6jPhULonu158rBFGW4rF5g6APcwoFXgozzJDdTDVVVNaX5Jv7z/0qty7VBCag5/n7GF1Imoe5UuyKTb+dVCIjct3M6hM1CdeKVF25gt0DR8vyYP6Ksr4lEah+YYyQpVXACbt0TWejGXPEK9Lc7K/s21wREjHP3FrXbkR62itJWT7wuXwcng2eCNE0ept43r9tr2y0W51TO41kYkTH2uLosMR4YoXiz6bz20kLtuZrq+bWaB1slWfXkNrsZOQlfEGWRvhOeY2+5Il6E8mWWN1000b6Eo4KCMOwVkVa8Dmcpnv82Xv9PJm9fJqEr5x9bbTWVnfXEiI/ZvF1RmAjf1zZcJdjwEqXs2J34JHmYxm4153vLgxGbIJWTrhevBArVWWvIveJaq70ly2A6rVbFIOkLYTOTGOJwAozoTWsC9MUGWgU6gU3sp6X/HZ7u52Z2uDNpzbqkmxIDptZP5KfHf3BuR+TKJsCTLRYy5KObcM4Y+LM3dWTAoovLkRDTI0TH2ppaQ6UiVb/X6Olerb+9Lrz7c/qR8GFjIZ/vd5QX83l2hrhj5wZ2d55Nu0i0bI4sU4Q7687mEUmMgXsdUllXIqdUtL/Ky1u96n0gAlRdmlBDUEVcYgmCQbIAkYRw1cbN4eJShAUu1xzVkyB9Z+Kcn+S0/e/qlJyqmrsdIWT7S88HDzjH+7BHmkt8up9/c5va33rrV5968CCWCDp1wZz5rT25qU4yailRdR+2ZNogROYjBGSk2GYhIICShZPblvjJt6Xwe/3pdn8+cak6bWR1lKScYYGFt67suSA8AhV4r308My1JnQ+8RIUPGFtzLLRiUtJQpg9UEqppQg+jsWTE7NV+acdKX1QeRSqrjk4pCf4Y7KFHhYhw658ry0zrO0vfx0Scqju5z1vl4V7vN0pNyE1t8/0eD1xuLl0mx5gtoE2yEi4SLEXBCB9YpDfHtrc3N3p9luRfd0vfj65Rt2O0uOLzJOcXWjTdUTxROawwVqaWGj09q6veqT1ISjbrfp8HkWJj5BPCzIQwSd7L7QBjYG20r1V2Rqa36zs7G50bacyubfcmhO7Cjpl/YHVnLjiXXmF6TP7X9dI+BunDX+JlyrUUG3uBo4/hEZZgvxEGDDaDwSry41nqunjyARKOy+/PuxqoNTR8yg1ecVTFxLYVnUnstZ397ncVPH+e0nskkWSX0VKbzMJ5CgxeKxXudRyW7kqbBt9MBSbYZ7b1VKivJQ5f5j+/GxT9RVc8qNxtAd2lDcaGw4CGbRdplni6Th5ZGhtEX72sbv6eXotHfOW4W3INQ8u5kAKUO/sLl8PV44eX5sCLOdAypfQBGjxwM7aV7xRi6dzzCD0LS9sCnaVpP2aLP26AgZwWvdqY0MrjRiIQ9YFspZxypaypUpZzZbw4MjR873vOWo7xgUzaPBayDLtlhPrh15JXA7f501NqlmX4An5SoyiFjk/tnFsGujOMPn0cpD6kpmKEC/2lpeb6+JxRL3hrUF9wyHdrU7BoewSMy6bzYkkhnNTGVIMxHYKwNfdFye4zBaqOB60+Evlrw17okGkDwLfOZbizIFzremcaOJXQCr4eht+l9lsnfBZCrYvzyUP1KzwXKV1Yd1pt9GFqWPaZDcqpHcexiAqO/KiHuegcjbiLiKqvmTBwc3E+7zqnqEeAAEsUWF6w3vq4cmxeINagjL5l+Bb6WRXT0vQ5N6Qwb9MODBIi5BZHB0CBVgSt/pM7I6sJY2A66ESNu6D60w8puP7KwpymwrSyv1lw3Ft7LpUxHPsrax6dqpCilrv1g5a3c1o+S38a3pWAcP7V/BPvDgzPPiEWcAjTUc462Sy+jNC5RHykyBs6TsWpOrVBTmqVB6DcN5RYmutclT/tncjbGmy9Ac0FQYnpQgc7NZGLQCPcAmwEPh2mBjjflCJs3QWS2pKNoDzfAjTShrWAP8NDWlgUvQU2NlHRYbDWK7rnky6Kli/qGvU0gZq4xqMY9jBo0GJRm48jmU4VsleOJaTtsNUyZpkrcqcgWUtWU4L/nVIoMSYKiUjG0ttM1z0BJxpTTJVSEWA6ml0Y7mRwbdrK0NpmPQvs3Zd1ZLkFeiQsRrq5YPfylBlBdjxO5gljGyPpZZHg2o46MNJtS2OZBmuUP77c0taAzkPRBAsNVwGOB5twsaHXWIdTRjX7x4ycmKvNg97+FoQS3F3+6mlT8baRXOmzRcXLTRg1/73vd2X7z47OXL04vT7c3mZHJxK1YBWkeUU2MQg32KYGzj9mqyu9UGePYJUFkp5+jhw5/+zU+FyRkOenaGcUew+DSeUxUlGg5YJ1QE4nb3D5udzbPznvC1hsWY8EiE72jDRuPR5TUvs+VWZ4OuAK9NPai/XBPJlFdT9o+EC9reaEdqKZFT6vQZejcxetkwZDdPUcGZ/W73WBXq9WpxN1NpjvzMdKw4yX2KOJgCP41wcJkpKHEjlQDmPNcGNarOt5PpxFlWpITxoM/pn184s5BFS+Lw1dTmpKXsTILZNjc61gwAUK9C+I8oHCrHxHpYY/g45xDHpHCmOxiHX0yLLNBoZQPT9nZi1l33z89P19qWUHN8OVrkgnFzub23vd5wWIxlvjSZTY7PjteWmygI0qUiWIoZK8ZyyBtILt20t9r26mHA+HNqjHB51nkZhzkp1TUYSvOsZQMiCJNr21maq9lKmCsUyasBUl29fnP64tWrV2vNDhLgZOnZ+ej97cfL143D1m57c0OcdpGixHpEtzY6zd6025/1L6a9/oRQ2H8xeHk6vlnfPvhWp5WxWlx02Nr54JxKEBs/cJLyyWuH1HgF6A3oxmbrg6+9D6WOZ6Pl4bk287qYzEZCMdoCUWJqznW8lkeWEthyzCliG/kue+CYn4CsG4axOoOyGCWAcX5+7gxLbTD+GbfQZGQujKaH1l12FtOFBh3iZcsiFJR+efnw8BCyjUa1rkk3NdWfGnmHZwNtXnl+n9AXb4NES/K8/p1nu38Ik8Ev0Jl/oSBaljK8L+x+nmtvaXIuKkkXimx4X5cbr6T6Ya0ig5iZDBaDznn9Ym5zTjKLIIRli4I3pRmhFT4t+DpjVvqS3qS8/19TrfTflbu0LhdjcqfyzHhLHoZRRCLuBrA+rPKdLnhVx1nOWn75Lv3y1n3NYBmHtRQKRvxsirXMSb5dniUOjMzuQ9bKIGfR6F8+/YVUS/uFR3/rR7rxVkt+6X19Vfmp+gq8kq48t8DMK4DzE1ymIRn1MiZ3Bfqp4xBHxWvaU7uZzgRif2FK8vutJxHSS4HAp1btOh/buyfpYNoyT/f5PK+P6lCrTL3sysJE1BJqS3QhBLJQO8tJNu1Ui8y6mbW03uK2iP2CYybOeBWigugxbdMMjG3khC9ub0UMenNy8uLNq8sc1XpNJYiLquCnNMlUYejoCgX9dMofp4zG0sp3vv7NX/nmdx382tnCYplrMZC6x+fHH3/+2cefPz8/v9g7fIp7IR4zzayvLKyvLNFK2vh5ezm5uRzxRhfJ0E5x+wEc1rDQ2gBxURAWTwRIE4XXnWwMWltrIlc3CfzjiRmgeDG24Q18QxAvq1Wvi2InPHLwbOFxAt5lkbqBtjL+hWs0esanDI7D2J0KGLLkYSk/3EMd0jr44TXukieSbB4ozY1ktKUgwTJrkDhkKgMCD2E5UjLwHx6YNT5EyL1RVa8JksLuFOHew9oGPF84YSnKYQGrQq28UqOv4FND0blqYr+5XD5++KQ3Obu0rfdyYIsbhdjWHqOqfeKQ5iX+TxXAJtuLsuyEpWA3MS8ZbFz0cNBXpheGXGtpCbd3OpggOjRdgpY13j0G7fTiHJOWGdUqY57tFrbxoHVxpMn+7bLM7JNzwFqvfzqa9h2z+ukXz7oX412HXA1na52jxfYe6Xza7x0fv1k9PFjNRuSFy2WsH3pfjGgL1w5FXVu62trZjpNq6PREM1jXrhfW7eByColT8QiU+ognyCpeXugwH693ut2e2peGTlhuFpaZwZanSoQRq9oAmjuDDDZMQYYXyUCs4SZPEa2yk4/u22iYIoMsRkadZR/6PCk4zLCVxV9wBQUAxk4GpEoPKFqxikbJluQsz+gygEqVWbCLIKD8zLInPpU9KzidSgA1C7umLDeSZqoslRbUWeovCLrc5dUcnxQATe7yVUWmZTpcSg9139t4vJWCc1FGulLGwn0FPjfqKsXnJnSZsT2xRamAE2VBp8w3ps8Y3eesFWs2sCFllefBGTWV0r78OX/61h+Nuc9ZbzzRUve+vX9yf5PWl6Gvb5F2N/WJPGp39VMLtdasRFwoHa+dh/xJd/j9+o+gF78yQJ01V1apFV7q9RUapjRdVcd9A+qNT9zkmq+T6s96f9enmvfLq4bVH/f57995Vd/WHKWciKqgXx6NMRn5qlZQILH2S09D1Yrwm9A2ZTbrBBkNyRNXhddPvapv87MUV5mM1JI88xZ5WVqUr3xen9YntZ31SQq5S6micEweeOuqYXyxIr9HZuRkmZ2BWR8B+kQ9cnPfGJ9Dywwc2cS6vm5DFTb89JTLn3h21DLBfYIF9C+wr70plEaA3jZpdgEvyxC0SgTQpevr7V1xBCMn9boDPuZH+4eNtfaosdLZsmt1vbNNskUlorF5fXLByv+jn3x+c+ME29VJjlyw250t3F7l6+nqIovKikAN/TOayk5rQ7wF+BhwIaSxekhlxQAxe5ihW7CJKJCdYorLOZELw3brrNvTX3qajEUZDmjTEPF8kZGR1ttgIeARnjLD6EmGrViDICMQal9S2mTWi8sGzrt+Ik+FhALhGXnf5vOS0sKair7FbX0OPRlP3+5U+lRoDGyb431pM0lLGsK9nBFoAqFfrE6i5CAI+ur05ARhoOXjHIs5Ap3yawxBR5LnZz/5qX0A6sJeWGF7O9uMB0ji9GZCHG832xh7qkABxdfo1FaXTocXFKVtm4NB8k05q56+yyw1c+it3q03RefYwC0MRl1iLnLFcNVsmeKE/lE7/iHBiG6Q2ATt75l4PvfFaoVd4FuvwZVdUBrXengqofuvr/ncN1qNrYWNR+8eLqxxaR/zsJ/OaJi3jMa2IPSNJjO1SnGKAoWMFy8HN9OxCFX2/7Za26uNxdktOmuuWIIMgoFtrrRsyrMxCwzQVBueVcbPtnMCQoR4dAjZttS3YUP8FscS0+CLQhJ9wzLZyNSTa9O8zG9Zkm5Cuqq61V/wcH6O6l9zTNFHZMY497sXVgQ9tgVVZzllFHJVwSOjVCgZwMqUCS4GvwUkfYCwF2MGgKipQomrn3V2laJcrawZ0pOYnWtdoUY11U9Asmz3hdQbn9QErLIlJx4BIB6UM0XUD/KdOrU08HeX5nWUUv52sfN6M7h5mU7fkeiaOSimUCwLK6XP2eh5i2Wv5b9dS23wL11L6Sn+l3LWWjw0OK6SHBmoYhm6L0S2uvjcmEXPa37Xep8MFnoRj9zrTR1E81rHzUOFwteVelHaeq4qn9cM9/JBnhQirEDlu0+r3kp+3qc6EG+9zK26anIvZ21kfe7BlxnKWxk80U6MghvJB5qkoQoxKrUo2SwPb8Gst2z9oNcrAKbXnnhVbtKdFFKSqqVUoe/1UQhKkm/L3+TMUincgPtUetffuy9KaSDgfo6KFtHb+yfRBBr82IWKQaewERosyaNJbhr80Yp21xNzQRBpWoSLi5PBwCK8OHtDwBz3B9C6ZdbutAwH3iK+Jdi1OPxFLrJP35yrJaypE9yFRSjoHr2cDccLk2uh0qazPuSy0hH0LYEB1prXzb31/Z1DO7j+0R/87vRqlW/yRXfK7DwizREArq9evDpprPDOQOkEXxDzDgqZOvAJrIWNo4HV3MT5BmNAQigmyshspAWJq4VYU0AZNz3UZHGMbPnRa21kbfNcPN+hwO05hb3gpuCyOegaDGMBqhEARHt5uQfbcn1uh7ZFbWNgySL3U5bRC/xnujVAypOSZM60+r8QNgLK5bUDZ8effPKJzGfnXeX0qSR7XcrSkisw4FPX2jAPSTOXsy88BJNYe4RWR9ykTGStRDZSsgxKU4MVyZ2AbUZD2u1oqFDB/kWPkm8FtDY7HGr4WdjDBFnH8SUh66J4LzsCEp+ICswGbeRTLQ6/2N7deuBojNHk+PjY0Ax6fUSI2hYDRCCPYTHmBuILgABVa45TMzUYIH1UOxAjoGOajL7xNx26RyVGqN9/sN+etY6eHG0f5Gjp0Zvhk/UHm7MWt/uVFv3flsgxrJPdsxE6dXbTP78d9tZvrrbjm+rsxpXiYagKFeGQaOQvF1aHlwJcmrGG1mNDyqBNTIohNW494DUbAhheQjQITM5mCLeluRA3ODeMDG2WTZpaYIOIFvAWIaXEmz8+Pbno9bBAT8vhcLSdp8dvQMvh/h4J2Dr21TwV3OJV5si0mlKaiekMBoPsmDToYyOJgBpzWafclxUO6pN6ra9c73/WG21NTYVC1LeeK0k/g2EqDSlfqVLynLgbR/ZiqNHXioSTRfaoKjWDhjCGRU+0v17TKxVk2kJ1dMRw+Ff0XdGzWZBWKQjwMBgniDJwDPIUUtsZLMboF1FmnUPX/9+pDm76k1qSTH/+FOJRi83PL5Fj1lJqr0NRhqV8AA9mUEB5Y7lV+NSMp1HCmWZxUQrj4hKQFPDmPw8zKhFuEYegDPmtev8MQq5pQ51BtZVqCyF0l193qb6rV890xNXP+6QN9eHdF/PGZ0YKqNznLNlKTZnNORRlQOjx07x5/vty6ppEy1Sh45574kPJItEQ8FqHThX1uats5aFJTWt968l9GzJihQkwCPd0qxZSPswnhreWlg+LmsjP+cQVfJfoX8vC1OKqr0ooAebyxMiQG4ugbbAbth0NaK03AV/wLQTXWdl98AC1onwTreezz189POg8PnogVM9wjPSMrtej5ooXA4wjzHSxkeKsYx27uqQ74qklBoMItWevT57uP3VaheBMx+PT1srBVruztOB0pd6CDTCN5eZq6/j5D5dWHRy+c7DVOtzZDfuF5F9fP3qwBfm0G/bQrI/GBKqxkOXT8ZVzFo0cFSDgobbMDlRKnua6kXeAsIaRlPiiNRbJDuFctyj9llYog6BkOJ1tAdZ3aC7qC9dz8TB6ajTghUzMZ7xOhHVHoFGgOUKutMqG6DJThnkZo80+p0YouhCo+ZTV0mRzU+9NYoGEMHaG2CsUCwrr9gawJJMbk1VrlVMmhV6+4mmgfNhV07Y2OsOYstRD3xmxlXbLVxWotFP7QZIne3t7uqD8eIWLmmdRLQllidngaDETN4RLNwENHcpG7QUHgqzE6XaB6LgmAq7oILxhUBY6vIJery56FwSUFge7Dn3siiC0T548Oj0+uzg7tybRiwJ3IDn/4GR/bNoFAMuXtPpAnHM5cqVR0a1C0Mxv7ni/aBhg6Y8mAms2lm/OLo51gnJyaXb79PDJ6vHVzUmJfrLWXLRb6WJyetZb3rg8vxl1l6fj7eWr9uJs5XownnT7g61deObWWSDtpn1V63ZCNJ2Kcj3pnYu4MkZLkNjBaIAWGjp4fDqaDIZ9TjarHZ9Q49kRTSFO+i5o1dKweMtuB2AYBBj0bFqsT0MNTmI+tDpM+8NHj0wKwLN3ynuDBhIwQ2WZz1e6e6+oJtEknq9jzF7EtTkr6nMLkFkrMJEZLusWBJhdV4Piew8LNGSm/ZT8BCZ+3j9xI/9dSk73X6KT8tUcHoOG9SV579qoh+UHTJyb/K895aMvK/XTJ7WFqbik+tD6QLdwsrWdBsLnYDD2YaRLO7KnwXs9QZ6zL/U2MUtUlTL/NgquVb99LZnTIdW+/bwOjnpTdWlyLdZPr6Q0s3xQyYXG1Az3hdRlvIEtikYwbfOFzywinyf+CYa32CRd67zoneQ+PSvoXnXmNR+WGcwI1qaWojyXK1TurVQzlIy/3Cmv7htZv7hv7dvP1VCfv/3Qff3pw/pWk+oTDTY1GEw/aS+lbDkp4FTz6Ht1KkEXDIVOzWu3gAvEWsb3xd43yQJRVOVRPPyl2u+feH7/KrUWiwUc0R/2VQQsNBdBqhUoUIY6HbVtiM2N/4ooYE3EwwUpiz/kraNOt/d3Kf6g6oeP97adWNVuH+ztHxig24Vuv2eJMukox+zCQL6KCRLHtngzuYpsJMNsMHl99ar74GxjdeNaNKWFyaw7nHVpmgZXy92FVdF0cubSqH+F11ppCeZm51Y2jGax3iy8987eB1/9937/d3/j40+f/fX3f/z5529KwGDMzdTx0WDI8rbsBDKYTAeXsyXGdGdBAQh6ZDplWYR+wuiR1zQeY5zQdE5xhVFvF1trq/wch6O+MdELw+XGYLqv4Go8M24lhZbxshuSNhIzmKwjV6w+elqU874tJYTmmTUPfWcCPCSSu48msiQ5AyXX1wcHB1AeOmqCVtcbjnJGEuSszVCIe6XV9kBo2HYJV4ri1jypImsknJBhd0mGoILITDbBgjUZCBVGCfpRjTteFvbQiVufaL6ok7OeeU4SOG9t6eYytrrVEthiFgngVoHrcXC5nNEM63KrGQvQ5haPoezyUqMqIPQC04xeGTMqODt0V6b8fqwLXknLG+t8uw0yp01nlclPfgk7sry6fnJyPhxNHNF4cvpSv548eufo4SFRsxO6tkZsay2uI76z0XVzeXN21dfYQBFPrMm16n0SX3CKOSme+pPoaZq3cVFfbSJIho2ER0XaaK3t7GxDReYmEZCm2K4pHeN2Z5tz4TKussTXFgg6ApDm1uV0t/QMeKFVmZ28LRTI4AAYcK6n95OrQfVjWKiuTZmT39JCgCA0NANwgzQlhSomkGCmyWT7k7ms9O2OXL39PEUUGK2NcA8w5Ueaci0tc1OTn4baYz8rplZ4IYDlORpRUtijOZWKwBUFUvk+rY9SR3cY1fKvVpL7pC+fk5iUlaYX7OadViomDpll31UKKUOpuSCHGO+NfknzylJgHa/AUPn1d19qUfX61ifzvtRP79/+nUWUQQuqXipxPv3EfeBojk9PrYTCOhS9QLgI0J9T8rQzKy1YxYINMdNuDwndkLiHeFELw1ryCvooQ1e5h5AvPytOmU9DGQ05a6pNzycleVhvPHn7YX2eJ3UuCztirO7ypz34k/JRyvITs5hrmhAIc+OtjkvuS/559hRbypITBpHhnk7U8l2TSqb75pkx9/Vbpfnqvrj6vP4sLcktcJdHFVrtp3uiwMLk9rx7pnngJ2x19B3ZcGMxG1IPA1dEkNKLLBUfQzTXN4nDbsMjD6sYFda39w8w0VeT8dH2zmWf3WDMjsLBbLezRY6iroOYISErT0mgENpAKY0K/n3QGMDTsALQPH91uvWwba8PrpqYhZpxcFtp2ChjCw3jzHSD7qjB5Y9JYxSJm0kbECzctrYOxFTY22vt7X7z6x9+9a/++ic//PHHn3z20iaqwvuRx7PTg1bDOOoYxBbDm64t3WANcLKXLCHON7hd7KzDXzlkQQdJBBxAHOBKanlTQMtQGRkjbGDL2GQ9Gk/Xu5+ZCD9BnXk0nvrtk+wbur7Gvxt7Sjesl1c8hevEyX8/ybWouwlfxOl/5zvfefny5fHJWdypMd/j0fb2pgz3cGUyzH+6U8BACQEwzASppOy6K8CZefeqzj6uGSpkEIqiNjawNBt9sLaubmYrCWh7kzESwT3e6RD3tc1TKNPIHituELcLIpQ7/YqQpxsC39LLKtEH48ns/OQ4BA8DhAIaZnoQW/Hc0NEmKDA4FJ0QP4SQt7I7e0L/tobPOdw/UBoS4luJig15yTCttLB2573+9YK9wERziGG60l6K4DLtdK7XGMA2VzZZRllANzZ27GXmnGCKHII2sd0KqAGpOnFxC7rkGHk7vl6aOhlrlb2KdODYAfGrVlcPkCtsVRR8dHozGyJB2jhhUwrDz3blzJqbsQHnaZSlbY2gAVlojsbm5uLgeAABAABJREFUbl5ohKukC94DA8xBnR0/ww8V8JDhLX4+/ZXHAsvMRm6jKI1iCY4DumoK9osCbSnkSu4KIu4lZflp+n3glZ/uPVef53CHtVJyGfYgINsTSIIpOBitAF/wdxHSC2PBpVxDwrDEpUZ7lO/M02gV6g4/TaIFzXPrr6DNkKz04stUh6DUXx6WnvgJKlR6/9YTSSH4F5ywrqe1AeXAMc7W1oVUmvGMllhtRbkYHWNq9yyduE8KDjWdd6s8lk0K8Q+jlK9Kn2uW+nLeHiRdcRn0cNb2ZWQYDWYUm0VZHN8h2pKlpQGgLhiNlWN9XbAeZs8WumW+YX098gQWkNn2wMVpooyV6Ywy7UrQ+XtyNd9ZkuCBQZFx9Ip4UibyrW6VjtQLJs7nmVX/QhBL90qf5r0r+QpLUfqqPYGKTJ/MmpGWFDqa/CW5AUSueWcWSiokICcQ5DSRjFAldWUM51Qwn5dv52sgeTKEpdhy48n99HAXTspWlaRMUymn/nQ1Amad02igi3924C2zZhMUW4RAcNhFnrU6XrjLbPgwOzAjtZ9hN9p+QhyKrRIwFt+8W6P4IbHMmq31hd1bFovp4ILiKXszr26JxSbM1bbTra3Ox59+Yu5IDyQTJcWNJssbP7XC4N/goY7DXp4+e/Zsp7UFrZ1fnG1vrz142GyvMKEI+56zYjEjS6vtiN1RwIzET6gDZcLOnp2urW+wr8CBhwdHv/vr33p4sP/9H/78X/3xn/OoptYEdPaqCvf2YHv74e7R45392WBkWqA9Ad8xRsfnF0wgnnDKery93Vlc6c0mPNnJOpwRCAFemUwdiLUmI+wSZ3HzYnb8qCBU8ESMMUV3GnSj1e12c1YCFX3+7FSBlNZGJ9jwTqmYOEbBezGZKKnOfob95nJ/fx8itgnnBz/4wdXlXi+mqemDvS0shXWRVhWGTGYjjMegspxM4P/0TiLYWUniMVLIAro0M0wI0TEmg9IAUJglqf2c39AbHvOJS0dVvDTKzm+HQtsPoEN+O9HYUF5dZ2+2aLEBZFrTy5NXp7qVfXUgJaHPckjN65cvdrd3VFY6FMwJ6kqv/RW3i1wmHJEBhEHYtGylvX3weA828Hl4m+xON9F6JtR8wjidX3DTe3PwwCaGrXFv8urk1SEHFG2fTS/Oetudpcbahr32q2ubg5EtDCHgoXj4ngT3zxRxC5TEDeBuE6HRCSsIATi6CqVHPjc3N1JfsR5xw+t0Ns3u1fZtc83pbivU18xg7HCATzwpI5HpggGsJ1OZI+4tw+AqQB4pCY0kGPI/AbQE6PU1OxFtWsFGCERZldLQ4lW2FfoaR2OpBpDsfoMCI/YJwJTo9UrNwkby8w/qNNR6Ygo3t7d0rKJnTcjaLkinfJBbefQVKMCdmYMgrjB/dskZ24wwqgtHhVYF9xutwEWRQy01XhrycOMDNxawGx2Mb5LHuCFwU77zRcDN0ojVkXxoFAoVyfjAOcGvEe4MiSfp75zdiz6aOOIZrfdU0DZjFfUNkhJsFiJqgYs6PKSJHY4um5Qd4Sa0uhYXHtq/kC61ZhbKh0ESvoT557w8hAVNUrMCX2VTueC7vdNFcxftYxWG8iz/NDRk1VujVKZZabgvkaFZBZSdQYoylwtOQreZe8vepGSE517CReyiZnHeaKAiY1KORka3HMlGrxvhzFQGalcDlwX2LIYSMV1oH4cp2TIZdBnZkxVBpeBD53kqGbPEcLi6xJVTnJoXgePEdjYuypFAHT2eSs2ibzCdRsBoQUDehpUs0dCzMOiX1JoDWTK02Umf47RzfpX5xDAqMwso05u72FcC9OVkJpZoUc5pcxuam33+ps/QBhwygBFTFOket2urlbU3zv5PmKIQlYLCwkkU5q6MfQ7cIph4UjayhHQh+WfdM7Q8RIMNCbc4u+32L+jWYSW6EQHXRdZQE3uz/Pu7e6H6+r1wTYUzHiRQG7cr/sJbeztf//bXdtqLrz76aFWA0eIJhqPc2GwPj0fj2QAnhvcw7bAYq0BruTUe90kJW2xCog4K/5PIqWvL24fOfWhnm0Ln9NVgZWXwzoft9u6NaTVErDaJn6tBCUxAN3VZVEuW2GK7uX55fSEcQWN58/LVF6s3a9957/GHT588ebj3//wv/2vstUnbbmyM+tOd5ebX9h+PX53cnlyE57+53dxba2wuDidXm82c9iIO8bO//H7v4y+cuSSYLj2Fda52pxUXBdoKAh/+ENg7YAJ5ZoTIEWuI6fLDxw8Ant2dUfYuXo9mI5w7SDbOZE0EZjQd7e/sOqiCLIst4D9HrEG2ABQ8pWPAGLQbbePMbRn5Ojl9Y7m3mqtC3J2eRA/GRe7k+Pir774HS4FeeFyNGnBpzxSX60XeFuceghYri0VmOu41mi3UATBahg5Ptt/lonfOKnx+0TecSjHCFF+td55gEhftvrockQi2t/d7EZVDKjaIPBtcV+LtRnS20vZ29rc3t/EZQr9uNbeZHWbL8U0/616cnJ2AtvXDg/6gy7HtYPeg3W6N+lSI6Fwz8nk9KHl2RdP2ycc/oXY8eniwstacXQkEdWXt7O62FsT2mnLyQETtO5wKvf7o4QOsRXN1rbXUmJ0PL47fLM5621tPIhDvNUf9s4XZSECKi+E5Gnvau7hu39pMNs5ei9lCE/YJERVU6Xp4ee1EqsVZd3C65Dy2YGnuMIkaNRwPgIQGoh1InI1o0QHurCd65M1S26HX6y0GJ8h7EUcdLxA7D7nsR0TjXeJGaZAvtAivEAq/+v43D0Z9wze9vuJDe/jukwePH7aXlh/s7NBvRKpd5eqR+PBmExof9Xtsw+vLW7wXGQ6duc0zc621BgxNPZB4ffI6eQvSmF/9DB4ElUVPHYwWcPyS94HRIWufZO34r3C1BRfgCEpEZas66AISDjyUOzRy/hdGDjmBoUBKmHmyqi6Gq5fHWpQqXQyxTgGEk7Sntqog6iDD/Ayj5OiWSKqAXdJm2M1NsHweBMH5VoqNFy2jmgiuDpqFcksLE6MorVN/hiL/NCntCL1KY+5T7U59oC7F+D/cwVtJR6LwKuOWxyW3RniOjXaNwGSnZFEcwfiWbtkqlinTEPIgKUGZ4wFVchM/SyeADUfMY7FvxS3KDeY99ClHt4ULdk13ywjALMqHwdVlsWlJmmFnpWZmMkvP6p901e86W8lViHoeKsr17eTzDEedHEWFjCanb5RYr3Wg6lfujThdS/2ZstGZ4tKd/IX1qR+CBDfaWQmSe5+U3sR9JtvJI1eBqnld80r9Kn0pLYmR/76iWoKrpISajImRxQ+BEn7ZmCQBlBSZivAsEV8Ko5XhiIGNwKHBFjlo8iQokJiIRFmxWJloBPFWgTJM/OKD3fNnS04aWmqsjwYXPuQTdcEzrN+NxJatIWAVBRdfLiI+RlHrLUm7YCAgJU04K1yOKPwsY9GcZmP+fQvTwTXRJhps/hH8sKJ0YlfBIwmHCrVpz4qDSSI9ODGPCut67ZYtYupx4zvffLfR/Md//Gd/9eLZqTIXr2YCFJDWvvf++813Fl8/f3V+1u0sN3Z2oJEn/enQDrBh98KRknbrvOZ5eNGlS9o42g3WrGNYRjgrqyTwb8pcgSiQ29reZCJvx5V/vNHphHOyC9nsLzOHTaHCuASRSJeXKb46o45v5XF2cZm1Onl3U1geyWConj9/+eDB4XvvPPn0008hTATDiVZeAFR8cxqCJSmTqOSRMytjD8MI2n/Wx3AY76hgk5IX4qLf617wehOEiGUqLLJ1DKdtb23EZMrzxNZmKrOrK7wFTn8suNHyMvdIReDtbIiFX6hJPRS9+KixERNhwhlGQyaAU9cOXf0dDr79K18HSiJrjcZRv5JaSbLDwTiHNqJ8vC5FmF2PwknU4sH4bDA0bO2d7W2HJmP5tIqmtjcYxAMJ3cOHzuI4vcIKNboS9ZYa8WZ9vLQWT/TFNWIn776QkOFln2/pcmvV5ke6N5sargAFfWSEWImx0J2zpAijOL0x3hXCWVjIVid+/8YEm2x/uH1X2okvsVJotJFuH2+27XPP6tXojDwslxIp/QSaiERnKWOBsTnmNqo/iCiW0vDr+FmcCh/a8OBX1sOyFtuUwcEI32/t8Uxt0CKt0JHeaBY4IxGjf/CzqUNgp1EW3GEcN9Ze/emqcQUsZM+9VybME5lqnvI2QKCvc3DIr9yHkPkf9639JRG/McbxfIwQ5gs99Sq9iyhZmPx8VwRXJdavrGu4KyTjrpHKTRXYschAsK9yi0ovaBe9CVILWJarDqkgH0AuoWWhWVEU3qXSkrsf5a8n6vqFR6VHtQH3L/ysGPWXcqZ5ekVacGC5hmm9/8sKvxsHAEBxUxIWV5sk0huYNxDB1EGEVPHFzYLcA1SLRdr4m90xn6hrGJT+MH2hYWa0vGUsXbfr2/qPqlARQrPwPM620JZiDaCSM7pl9Hzn5y+l+qpkSWZJOfWm3tcm399ruZnxkJKjZksJaGKZTANvaizX0kwd8lxDogLCZchfq8MiqKPM7byFCvQJcMdP1Lpq5vvm+ZlCChC6r6l+Veual/zWK7ceRq7VwgJa96UZzHxrzCMLBn4iPFDIJoBb8ERWb3aMZaYkmaMKId6uZOtPNIXX5JvG1oODy68+erk4O3/zCnJZGQ1fvDnFiJ5c9A1/gt8K59ZYEzaDJgYWaBBVBRGfTuhY2u2QpClPrFn3ZsMBdLKH+xwNhWwTqywTcYnXXiGVgh2Ia0wCEHrVCdnT0ej58cne/oPGAovItLm2aQPY7e0QE7W12frG1x49enz4n/3Tf/b5x6+5g03GXW5/Jy+ev7t7aCPyTlzpF3igr163Lk5fL26JKXS1jZtdXwM8mrl2M3P8cbieAjaGUUvuxjUbGEIb7lQsoBiwNW5x+02HH+KT5CWk0ayaF9uMFGvSyGGkjUs6pWyogiDCLrxdrPvKjaqI1eqnP/2pYnd39v/iL/7CDQoUIkS2LpaSopChEqgoxcm/U6uLxMZASDT6yvsfALCobYFM1HgAJyG76NxogC0V9v9Caq7EAgn9I7UTESLnBdchEJHaC8vSHfRNvrJoxuJrw+g1S/hHQgnQsMCnrBxE6VH3vHvKKDkYd9/96mMrxEmINAsGZHtzy7yhSjQNlypaXzp6dLTYFPBpdIkZuSKVTtfb21s7HXJYFoNxsMMvB/5ikXLAgsbbwCCs7srkyuGSbIz8Sq9uRyuNNibIoVxs2vii0ay32lm/Xl0YRCnIE9+pi+YyRkdJ1+AKPhZOH3HAtYBHJ2fnmCa1mbJ2Z4MLq2OsSTlmBhMXFm3BASqiuw/xcCJ/iUckhGA08EHfADL2XQOSZWw1B7nwA+IhYqt6c63VwPksXtrsu5g9EuSyG8zEup2NPocm4T7Tv0a7AEs5pGW8AHRQxxVcPe4W4hbs6FY4qeUHzW2Ws2Kfr10pQxRoKcvbjb65d2POTPX8SabyLlX+unwOhpKh5HeVwwWViCt+SQqjy7HEU5RQDMAohErGwvjDDrBbMTNYz0hdqi6IFZjIVKA6TZL8ASIlWfY2SaiqoGNYMfrDIiKl4XNCi7IX5gLoYFDS3NKB+c39GkxT5rjzS6LlSc1fb9y7yX2t8a4grTJAKVEGy9AiL0sxP/PfIuCH7zw1GB6mF4sJvtJubVhOkNDkahbFWVG1GaLO5jb86dTthP/2tIS/A8o6ojwZkmI7FQcmrWV6RfNUIS8g4zrc2dyUBYasImZp6Zf98jNtfSuVJ+anLOvyvH5Sc6bPBSTqjYe6qxewwLwMfa6MbCm5flVe3cl0d2Wm5WZeKpKugZBZURrvGvJdAM9DJdVmlvsQm5r8rHnqz9owT9yk7JLqJ6mlzukcQubTViui78KJZ9CYtbIAA0C1HA8NhV6Wt9wHYNvwhVFcCasOMRgoLry3N/aK7mxtvPO7v/HHs+FPLs6O33x+s7Bxs7h+fHJhy4zPK+zR4qKDtFYmzSZMXAg0m4oBOcp4O2JLX1zaLPtkLsWd6J5NDo+ORPAhVTFzxKuQiXx65bSI6dVsb29H485OT45fvRDHfrXRobzGkJOuxtzfr5v97tXhk/fEePiDf/Dr//nx//uEpnF1Cxr/t9//q+PW1uKY5C2i94qwHK0Dp080f/infwKhQGNADgLbevoUErdLa2ljjukyjPfAn1MSk2wOA3WVdBV71Dx+FRoP98GL3CXqeCISeWh/E6eJeLTkLIlM8FtpPsueOLlpbeXBg0d7ewecqCnrvvbhNwhY+Jjdg32LGXUJlijt0QwNIIsAz3Z7QyE5BCS86SrtHeSMUQ6PkNBzgRBfqTrTLKCeI1RWLundGxvr2F3ch+jm9lpDw3xDhpPh1s4mkfrNyev9/V2bz4aLfb4npC5DHkviWgQu0RfW6WpH6JbzcRabG82t3U12Z9hQPI0c88h/wzYyIljiRlnK4uY13tt7791Fcm3/9PT1Rf9sVcwn2W4c6YueEaMN6xKtnXiwC0JIApPloilBCBpLnZXmDrdA6qjF8Qompb162Vgc9cY3U2R0gM3BVA3Sk2VxBqeCGFHqFqQHwIEaqTwuuO0YIzZ395EibiNmR9eQKmTY2BEWIV4iBp6MuBkbAX6n37NVDIWGqIkzCsIIVIeVYgAypIQcGMHuKyHEeOus2uMXCc1QM2/ARFTFYmBmfwj9fN2Ea7NGw/ldi1do2NrWDc/b5Q4T4FIDmQu9XF6a3F5u36y3btdDriTzVyfyF6czU+oVgNBbCw8qqSCrJk8k31b8ncm39spDj1NO+U8+c59sQeMFtcYIALMB1jucX8oBR/V59NfBOfKF+Y3mkKEoBmz0i+Ys2r/8qwmUGapo+4ygTIhc5sbXUb2XrUuC50EZYRM0pqSyVErj5w3+5T+lwenZ/YvyJL2u1abxZdxc6xMjUPP74x0NY5SMRRsJ9lQrAz2eK64NR+NDHPebN8flQ5Ah1K8BjuileaLpWKjMlNiXYFHzo5+lnKCN0gwfEqTw+vCCf6bJh1HCTBMABvp3D0lqj5w+KV34hU7Vcry4exvbpKYqJ24Bdx10U1Mel+RnBYH6C2WtA6KhUIl/qa3+r2it1fY0v4pksY9qR/Xt9Vc9tdhCmDNEKa58V65fXjyXs149dSO5mX9+R678rN/UG3kAbzoFGAskEwKCa3wb7jhsRAFeU5h4ZczJWpKi86EZCR0tzRdQNjwXwzotk2i12521B9uNo63Vztp14+bXxufHPxl8gQ8dDAcwMxsYwc0ZCgHAnNytBXHuAgukPQuqnOatEVO6Mvqz1bUp+nY1setldvy6941vfn0y5FQzjurk9qbXP8Ogw0K6MeXTtbxy+vpFA/hfT8VeoAIST+O82+ON6NDG1Ware/5Zq3P4zpNtFOuf/if/5dJSc3rZGw8u7Az6e7/56/u7B6TJ0+7Fm/7FzsHO+4eHO52N01cnf/oXf9k/PXvy/ntbh3vD1trLy3EdSUNRb1wz2ncIIXrUO+iybhHjWGEztDbhLnHAA9PN0Tp0D8Hv7G5tbXcmA+ca6z2c9SX+ybwoNUCXRG/GV4LHmlh5k8vRV7/29eOzc6TOZ8SgKHkolgqxNzucOolChoXAI4MtaE4W3t87TPmFncr0aVLxkYORhe6VIhrQUBEoHMNiuxhTMDUd+Yv/9ijCBO+AzZ1t6OTSxuBbazb24HDYSwu2It3QXcGsogjCT7ZQrS9s7m48WjzSr63d7f2jXauPVXhyNe6OFqhDeRDsP9zDbQy6/VF3zLrZ3mpBZ+uba5v0tI3l3riLqB4dPEKHWJBIXWtt3olICaNbSA5/89uV6apzTLjq67/jsxu3q+2F9bUbW46vbREbXy+fs6CLBHENc0TZun5jT4JwY4QqI6NJZ2cXN0xpt5PtrWY6Hv2cgGJ8PsTLoGTmBk+gI9Znbzz5jJHYaQPGW57z3vlmZ88e6rq6YCQU/ka0iduwFIbFHELewW4MWwQp3j68NDUt+y6C1sF+tXo7a5T9Ea6BcIRVXrtcbC5tbK87zqQVhRo20PlZlHBLiPfNlKBvPzQ6qoYCHnOce7/OPfSqJovMzwqUnuRVae88c4A3v1NOfVtgTqWgz/PyPhnubtQVlW2YV8KQ5GU+hrOAa56gd/4HT2Yub4pIjjeyrc5DufgY+qlMAqapicjoJW4DYomQghuJmte0wRHYK665SBW3GINa0Xo6UZJ3kir8rvf31/Lw/teXN76rmdOydHpelOdgouZTVv7pTohy+Do5Pcl6K6oMjOeDBw+++71f9xVCQ4eQgG+U7pH5h5wqIALgBb3pC3ZMNiggns3FzqyW+6pNjUVY2QLif50vnCwYrU2SoXw+n9M0+svpTePvSvtl6aoWJbcbad61gndDNaP98++efiSD0mqBNXNqStb5c+jbfVBHeVKGa1678mWun9cSwqwV2fvtAmux9Yn8Un3imvu7n/fP5awlexImKRXH2ZoS3XNoSxsAjcdiV5Mo/JTTV5V2shEavYhWhdexu5L/GXRHDnp02HnnoPNoa2V/g/ZwvP7uw//Jf/AHw/5/dXqR+T7tYkuuBPBmctRjfYvPSfZUgt1SHY1OtjXEU7K57vw8O3iu2k5kX149Ox2fn/S6ZywSfA1vOC7HL+P8BO4L9MNMgwEd8fC8e3B0AHTgb7jf9mCRA5Yaq9urLQG4sZn9s9nWzpM//Ae/9Sf/8t9MB7aUjimRKKDOzo9Pjl+Lh0WJ87p7Aui2Ok16GEYJK5O5gTVmMuwLkrBEvLgDjzqkfgacCkIwVlJ+ljyGqIKInFIg/y6BTyNJtsIowLx3X8mQnHLVP7mWnwrEbka/F/Sw/OTxw1/73uX+wa4NAIQhLyupU7UCLRmkhyASUrUWheQ777wfv5blxcm4n/ahnLRftg046l5IhaLDGE1GjDQctZnmxTlO4xcF9m32Bg4gG9rDtdaM6wCNnBhIvWEfHgY/vcG5HWz2jNtQfEPMimHxcsg9avFqc3vDiY6WKY+S3qgPG5s13qKkFYuY52FT8MO1m/51z64pON2JuajgWoupqU3cnI7xJtOD1UNhHWOyFTdjkdZ2WYyv9a2WPva7vd7rk8VLJ1GFDLN0RuEWc/b02rFWjSmBbufANmdYiKDZWmyvTdCzawegzMM6q4TN0pmNNpPdXnZanebt8hiY4iQMOI8JemBP46gikmR400RCksH0zCaw07jV7ojXHlFVyyirWGWX+Aw63qVgu7BypCnrx+YptnrMXzQDwcfRPwGaQBjyyWvFIrEoYPI8uF7aMIHrjXbi2WZ2MXlq5dLEQ2q8yI3RImhHg3efUuQd5HloGMJNlKVbX1WocvXWtWIgKzv3Jcl8n8BqyGkRP3E+xj9ceykwbo+FXEFQpXbQCtmSPyKRgTxFlufF/cHDpUVRWbSBFZzkkgOofVlrLDo3zAGrqrqoUYOdys6GrGr2VOSfb6rHy0t+zfALxr9oEuaNfuuPxs+79tZDt56XB/Wa2/uBKs9zqXnIOuVHUKGiMgCRteYL0p2H1irRm3BsgVkBYEVE6M72VpUFASKC+ubNG6pC18urY7XxrlAINGJKob10X8JZ13gurRYaZZ4NIBmdAN1q4007yJXo35qqxrThy+bft3p+k7elC/WmDH6mtY5G/en6djZT5GdhMnw7F+DkmVcXy1aKrMS15JxXbww8l40+KkgH/kY2ZoTLIJaSLIN8LKXqcp+b+bP6Jtf5OLzVeAN+/1ql9Rs3FZIzBKWgMiBlDNHDiILqoLVLpHaf64SFjVUAwWui1JTY/uatLB8qDNHqpjud1vuPtp7stdoLo1WHoPbO2632wfbGd7754Z/+5c8JB7QqvZ4AeuLPcuakIMnqcygpQz2xDPNMVyDYN4olNK6A3ZudReHftluUUysUfr3u8NNPXrU3l9o7GLCFYb9rX5iwORqaIywIaDTtg9HiriBIE7zaePECQuOCt7t10Fi97HXPnV7Oi23xagRX/i//F//kP/tP/ks+Ga/PXjuDcDacPjx6vLW5x0v2rN8/G/Z2tzafff58cNEV8cgmClvKHCVpIwVXGQNSp8+1DE6mGIzWh6Ygq7lI5HXSLDTTCobDo83HMzCPTgBKkO+hRRF+lGrIWBvwAhOAyK/6rwD4wsVFF8ViUEBFHj15IugsLn5le1P9Vk2EYPZ5gubt4s7e/vbuHpMY7RQTlEPcB6OxYAzclQpKi4sjZQ32GNWM2631N0aploh9mGdSIKHQIuJjW/wBGf4jKvDJuMLfhxleaKxgee0KEOcCJRGJa3WpsexsDSra8dXIgUhoXgnJeC3kBW9BlWAtoSW+GUR4+6C6Q7IPnS1sZV/TeEiRv4TDlkdwQmdSZPsbXwlSCUUMhhUTdTmaiLm+3kE9GxxqeidxPA72uLy2ScA/q8f5UBgRMVlsP9adFyc9jvILi2yidKrLzQUunTF2mz0jFu2JkXcvQCJlaYc1DiWFH675BjEioEmG0URTJyCIYMB8E7EwTriM8/4pNEznSBA0hCwThDC8nDzZ3wvH403tHyTTQVq3XCR0zTHHHFSi0OQjcmMzyAovTZgcN5DJ4YIDu3GBJVgRWtdgMrrD22z/sGb4Xzp8ALQ1BXYOQJVU0ESBr7ufAFHyC6hJbiqAuqlQqy434CDXglngj3Jb0ERBFqBWz1yjHPO6WC+pfyJmojjgQB+pNpEZsMBlsvrJVXJcyq/VaYAWxr/LDQnmrlJNkqHm8VCiBVSUzRLWjFVhfORJ73ypNm7PIZlWYGnpW5dalGtddW+9+YXbUolVlSLd37/zoaSFBs1DCAomlCe9Muh8zOKgmGGE/rShf9H9YvBCgBl7IqKsZKwukWMqOy/4GLaCGy5tBZZHXyKo3ZDQKemiy1CCZGC98q1Omh4/AZsqZJAQPLtp0lDru7bqFxusmfftv7+pHazXdKQktahOBxVVc4JmQGHriSGHNbyqmUttcGk+1PUMShHOyp/sBlGOp2ZDmTqb2ecJuTD1YfpSIM2T8t28TEXVSus1P+/mr2ZzrZ/UnL+U36vabDCkWSYhM2TJwkjQ3p3/ZM1moLSQLdA1jHcZAQ2TDaOEd7IJZXnpir/xo521/faiIDzj3vmke351sTa66D3Y274cXbTXhHXobrWXerz7eGkJfCT8AHzJWVrgAzU6GykBVG9EUuWF0G7etpvXPGM6Wys3/OxPhXZafPni+MnyweYOixSnCqH7cvYjyZuLYA56tfVnPF2a0lmJR7rCW/r0+FioXccejfrnAkDwzOYA8OyTn7z7znffeXL0D37/t/+Hf/6vj54eTc8mrwdnv/pbv/WdX/n104v+r0Rlt350sP+v/+UfffTf/ouT8/PVna0zR4qoYLtjHupg/tJ0mCYjI3luxOZvC+ABQvj0bVqlBE+I+5Vc1Vk2Iz6MZr+sI3nukyfK//jjj20tsPcLd/bsixfeeizk6s3Nhnp9qyIPIU2gbpbIBPg5Lgdm+/T0HP+Xg4ARlsLCyFnaSyGY+klL1odRhK8jaxG1qV6uZogBkYULdW/Y5Wkf0WdxaTgbinLrFWSGU8DWOEoj+sMbDn6i3tqAjeJAv+HPAy8ri19596kucLvvXXTtIUN9otQbzDa2O63N9c64FfGCi76FEFkbnrV3xSbt+NlP1y4TM9nZLrMx9nS5IdLgMi0ZhwPUfnl49bC5MX3+EvUnSt5MhxR36A+PSB3HoY7Hr/vT7vIGLlDksI0lMSEdZl2Nr4QhS3VpjSlCcIrJ7aQ7GlwxdVqHiX2cGTCYwjNOhonbu7WxGx+uVWFwZ/SstjY7HyuH2gQP6C/cSgV527idEEeoiJTAQ7D0JTuIHHpwLjaIGLk6KZ5BQoc4tAbKYJmCJy0KHopogRfZmy/Kbxuhmhk/voKrVZVUFHAOH8EeNOfkysQDJmhRbysYmVJNN+KBpgJPVq8kjrTMdKlabPrl8ce9G93LsBcnk8BxobVe0WSSkrA81/wJrjDRUdzzD8FUwFu+gpfNuEr8U2pGIoVGtR1cYomXwtOM8AHxVk/WAn9MgoT/YL18ESUPc45XWTClfciWVrJrF7qYndXe+rS+15fsE+LcorUmoPQoH5Ybz6u1Q8dVgTcxl6pABpwwpD16J2mS/PJorKsMrp5Dva7udV5R3uqPtacQoRDsU1lrtR+/85RCZiau16BfUaQB8cnB/r4l++FXv/bVDz/80Y9+JELzwd4eW+9o0M05B7dYKsdd5wgZQ0UBCLg7W5kay9fnV87wbiTWu/FSHWdSrQWpCten3K8EBIO6abQnUcqnszYxOuYqgBvKtyzwSkFGnqiijoknkntFmbnSr+z38gQy0ncQoacGM/nsa7GLte1mQdN5CMtgUWqY8mHejH85lCuZM5I+j7ZWNoktrsg0qU6qedykF4U3d68XBsEn3vqE6iINM5kl0oHm6aCrQXClhQzk6LTlK9yDWYu9z4CrKLNZs1kIIKxOK5lAsZ4TXPgPPj46/Nnzn7/7cOcb7x6t33SvzsfXg7Orbu+Ln30E+k7PRv/3f/rf0EEvNTqHm2uvTs43myLhntxMVilnbBtS0y2TPXlodmWz5EZLVNm1w53G0X6ntU5zdTrGoCxutzdbt8ekJUqmzdFwzSEPnb0N223EzQP+IhP6HAywZzs+a31vbWtz90/+5E+weu9/7avMW6RWtuvB2XF7Y3Gjsf3TH/71Vz741d/5h3//5OWb//aL553Drb32/g8+//gvfvYx9tA5Sybr0YMjYcAnEPHhwXhx4cxWpMauwKI8Fk0TU43RM3RGg2odtGC6DYthpCmqIw/i7fYTM1DCn8meVcmL1WHzl5edzgYdHcCwAMGYKTP4ciqK14J7a6/UYNjFXDXebHsMcBuC+fqxttYV648FQR75ZOJrrhaLkIUI1qaAeHNyCl83N4SsTQJmpZas3N5wBIbI84PuQF0anMYIJrxpw9OMnnB5YoNXw8SNxr3d/b1nL59djM7tiBrOnGR2rlO7e9tO04hEuHAjtp6z5K1mXvsAb4s/39YGpb2+iFWqhUYJTnPeFU+NnZ2ttF+Yptb6yzevHcR59PjR9s3Om9M3Wzs7LBjHx69DeXOmmh6Hh+M6yCnm5PwNTd9mp9ntnVx0+/RkR3sPjra3r0/Ha8PL1uZu7/RsbXGyu7pyPUQosyvLKhN4aXNj63Tah0KF3X31+cXqg8MEZm+sn5+eoV6PHz8dint4NXBoNfOYDRzY6n5veNkUK51rzB6hULAdQ6c7bHijzz/b6mzxedlob6I/zleOXFX20ugX1EHVSUN+dtrjvjHoDceTq93dvabYXcMx/LC+uTK5HtrUubIRPgBvJprG6HaAIME2WWNxSaEvJDaTprinxtpD8Zg9xNwxoNZokRajrhCm01o1eb6yvAM0ZXmX6Z5bQT2vr7yVYAf5JTmBbx4V5FXz5KHHxeXC49oeT+RyNRW0Y+gM4T3faSWsE5k7xUiKTWsKQBImfeNhcE9J+hH0SY2QDliseZr60hA9KpJCcW3wNEXlSQhfKFvJExyWvIXUyRHamFQGoTSotKF0qL6JSPh2krlU+2XV929LSYhTiIEC1a61/pr16LZqVXks6gk+ZSPRmnd2dzjm8i2jprlll00QuZxXOJn8/Oc/Bw2fff6JXZaWxIsXL8DQZDQ42t+5tPdvNrNBsnbT5m4ogCXM2Xpy0uRoAKFeFUiFVtUWljH48lK6XJucDPMxKSby+knN6pWbmsH1/pX7OtyZ08IleBD4Hgzqh1w/AFVRMiRiGIhap3DBoYUZoZyNS0htg7e+1YBauK+kee13gJHq/h2pfnX/srb27atXflYYiEn0rVSypQ6FaIx0//L+FSqmPWwfUC0Nxemr508P99876jRuh6OTE3r15enk7MWrkfMRepM//6sfbcRQtfrNb399NLt8fLH50adfkOIsL7G2Cd7ACasgLg8mkjKJxUQUBS6is1GvxzSSoK7TUX95OhIOIM7EJycnNCUHD4XV1nY7MPFrgHsJ88vstbqwstFoX45mH//Nz1prLYZcr/xzRiz7Nq8DpDGHSDSWu6ev9pcav/ab33nx8tihxyvXq+srGzic7mjGy42Vd3tnW2DXPatjZbWH6mxtZousEDB38K9t4SVLyni+Ja+X8aHJosi4tnHYXMuQIS3Tqhi4zzVBMcryhPTvx7lOX73WleXe5/JTVTlxCt2yEOhN262tVpsFN8ZFGVzNm5uCwLIqhWCnXlaypmpDCwTyQHCWKcPvOme5LE35w8NRQV9diuPAL92JV1xtNhKqKkPMow+HKYACImc14Sy0h3JKi0EsflG34Ep4vLYZzNsBDYVn4ESevJrHg9cGdJp6FikSlml1i6fFigAoHDiV5qiO7ettjv7BtA/jBvns+Re6rFLMOBbcyTNiLSqcXkYb8C08FfvnF4Jwta7XjlY2GHXHw+uLyVi4iI6Ig5xVh5PT07OdrW1SOw8IG8su2yIl3IzOuoOrs9aHW0/e2fv0s+dYAdCnLuuNkWjkDJEEFjF1dlNMUXHilFCNn3z086gcORmGJl2BQ+r67Z1Nmw4pjmKGEKCExzq/AUrW26uNTTKDfdAsOiNslJ3s9t2If/jkvQft3Sal0MYez47IGS0RMG9IyTZ6yBajse1WhBg7prkQpVwzCAZMhtZTh+KK7ZF1OB08Y1hBnjk2+kbHDLmvT+qN+wof9YZrx/3zSDkFXhRSJ88rqd7XKwxAKOHIKUhYI7QjTcGIUXbeAf/8k/LTSMTbRqmqA5PIBS5SUbjhSo+SOzTEv4C3qAFKYlYAQ7Ihlb6teNw1AB1mWkkKjA+YGUqby6tSVIA7XaLoKYtIIbVh939qtmQp6f6nQupA3T/PDWqKBqu01lKv5ZtQLFOU04aW0KrV9sqOAKlCxzG/O3t6ZXFjvYHjFl3ERHzlnae94eDzTz4lPj9+eCQYgNUycCrA5XRi4w1ZgXk4p8papDtbO7u7+/twATpnB6olQFHnlDPNoTk175VI3zUqbbpvs6b5WUbbyp/30duqx8vgWOYFSDy8H4T6SekWOSyOWiDHHo8vvvjCw+FwzzkHkBR8oam2TxIqsMAWwLg3slSErHCvOuUABlUU+WbejDIX0TWYU1WXWv6OSxpZcdVb6E8+ZQYuSrr7mbLcp393Jckmmayo8mMOBmZh3iUlFyAKgCmmomNHjRBcp93Bw6/sHW01Woum482bF5+ziDpIURScP/rv/pKF42C38+E3v/ne+1/Rth/+5KPb650pdQc9SfrJYgAGkaqFseDeVpuqHMhwvcpLInIhbyACxG27s7O92TroXUzPzk9We83Dh0c2gi5y8rq5CoBfLfZOe7YFN6JJWOmd956/ebVLKmIRmF3S7DGFXE+yP2wy7DrEsbO+dXb2Sp8ef+Xrv/F7v/Gf/z/+q53WwRenF52tw9HVQquz3mO2uZwx5AvQsrW1uTgRIiI85BYBfWAw5mOWu9zPhyhTU8bf7wwRRdO1A2fbOG/jZiwzitmWusJJEkyK9oCBycqF7ZjZq6ibWZmr3z21/vOtWsR54inNR7yxQWhTkiY2OBk0iGWALUSLeR9oRkOzsNA9O6UMhO5BEluOiuE950k5ztGKA4TQyJJ9udF82PqsHp7RDYipK8iFQ082OMczS42Z+NEY+iPYhP6jP+qTWfUUs6i9gDbS9pKA5+N+H0fYTo3ZPUDqXaWt000eIuI0pdLrq9Pzc0tyb2dH57GkIIn33cnZmbWCsaNN4ZeIi7NnGjNnXdDHeEVoGw7tIeORsOR44DatX6c17IlglAasX3KeuOnEm2ZtdD46PR/a7bYmpO7KltNrEkcezue+c4WcrDq2a3x5Y2d0YigaWx0m7IK9lStaJ9hwwb5NcGkQec6jkKNsMjC2vMBcl5cafp6fdknY1qN1i9XEBRkB4AzxFtJhx1doqnVkZEU6HE/69N/xTGGa7aw2FxpjOwtv4g9PBFxbNEek+sAB8GD8sZtbEBvUAf1r3US5OhsTt0A6UAAOsSA4k8sC+dIJNdimBM0NFMpR9MJ1rabZdyQNsk8BBcF5GHj0f3zbSvyCiixKHR7K5hbwFXykjSnZT0A+L7Dgj4LigyT8D32EEJVUMwf3lJTq7ihZvDFKS2r5yvQznGXwbF7OM9RsBVvLkBIKkZMtvbhLfspYq6nPauHuvZLqw7fvPVGadJ+nvs2jOlw02BSZBdfX0gxsTRj1IGKq8cm0IeyQxkNVeGvHUReiK9vWwaF9MFyOvvjis4ODo699+NXu+QXQ+bM//teaas00N9pOCCVahSVrUobM7VW1Pa5WjnrNXSh5bJ2odubRw6S32i+zB9pvEmsvcoVwSjJ3nltvb/fUfR0uzKCgu2EmolDNmUPJvLJSzigCefa+2DYp4o7DXkULWxf71acRL+4bUOYOt+dJmL67Ia01uP67kjbWFoKfmtLqktLBkvzy1/O7YsP6SDWbq+eSPDVn/aoCfFZjjKQBS/9iM1+4frjXOcLliyA27lpV1+Ph2XF3dDH6+cc/5Nonys8/+vd+n56x3bp1PN3Tx53Lq+3BcNZPSB/bONNeAM6WsbBAl3tpPhDxZpsKa7WZjVkw3cbjhx/s736lfz4bjJ8t8AO7Xuv3xuKEQ7DZccxIML29OOnBNsaZdqs3GtC0Q5WImSBeOmeouYqjgNb8rNFuib99M+pfvGxvbX/jVz5s/9F//+z58+M3PYcJLra3qMIG49nHb47fPHuxs94+ZKaHfbg+L9+KewjnanMdnDrjMIgBDH9R5K0ynFl9GtOAdkpAfTp/KMY5tp7rU7y+rYoo0Pn2zkc68Hg37Iq8v89NjHqQ6XJ8NVfXtMATWLa/OGofbtbazZrSlVVnU+2Ax8pjBwawBIXcrydKU7iUcPGJ5SM2my1EG8wg4rmurdE0oneaBj4dYwWb8+RM1xjBbi5tG1A+gYOHH8DeO9h1RgieRcUkPjC0ublFgSD/6FqECMZXKoQWb3uqDpFxtXDYH3luRaBM2g/LEEQqagot7Pf5SmxudU5y/tkaisItipvD+TnyQCObI3SJfEyWnMhJmfa1MCPZmtU76R12jh5sP7y53Lg5G10N7F1f2tyyy/jo9ckLjYX/scSX/BSx8gJiLbJuNhwZamPA6eBUxBU+kpsbXE5W0tRrliDKWo6UwDILhCS32d7QElFKygwGx1imnnCKYMYzIXpt/igGjawOOvDaVR7gg71YR2wRkevL47Nj0ueQi4qO48TXW1O71KbX2+0tP01R2YWUkMJk4Y3l5u7yzvXabHCBYwJjYE17UEZAjaLWgE13y974mnK1aoqp9VNy71qBKZNXXNR8mucluDhYALjyeDvHhRUdwP+lQLbNhRzFZ+mX92gV1UZpAmRSAU7fsgqUg9pGs0eRhtkPtztHtYUGKI+4kAylJN+GKy49UXg6lfGunxRgju7FTcE4RXwq9dZL1lLtGrB3YyHdPcnikZRmUbpK9Zv0tDz3xCeSm9pOGfIW7yFH1lc4d1cZUhSu2V0wNVEw4ZmxV6P+AByIxyUwjTOfSavwl4G1Pt989jmW6unhYffNq2mPSLLy6rPP6PeYVa3MuJrubDthB1dWxx+rQd1RXYTVZbTtGpHwuJSSofNpay61X5mO0oXSqFw0Xhn1pzxVFKv5/bwfN9lqHvyp51IRTo0kCoQqoF7iINq8gYeicZnxNupeOHjQ8jN1Cy8taakVJ3uIgBqTpgbmqvCWIrP9prAdKioiUa3ub1/rgKdhd+9qI+uv0rS7wb/L8Lf/lkGZQwiCpBvQvmyZOispsIZ/IkZkWgQcPXzQfrjvUITB6ZtTrMaTh0/GZ+NXg2MKl8nlwm/97jebG6RelpUeP/PpqMfDzjERZANKHfiIIIDNxXqz0IE4dGS9s7Kx3dzeQUpyRFNnY2935yFu6+TkYnp5fr1Ia7L44tXs4fLO4jJHLA5o+PYFO4NyjMXWFqzXHfaefuUrInMviCjZbJd9h8KewyQiCbXs2xl031A9OgL39OTzB+/v/ZP/6B//X/7P/1fOZqtbDbHhGDGu27aIbg0vpw/3H+1tbUOW9ssIJrG1t3/y6mUFb5BgRdYB9KTOvYeYrPoQWAJgkGbwYgcGQaaSkZ0ScFWMc6gFTxONT3ofL1BLvVKssiul0La7WYtLrRnAtusXwz+IWWsixtPlh3tqiTxgjZYFhsR4DsC6vZuLHhKyORpTzV6gVUxTkAOEpl6qunxVbLSe7Ozt6oQxskY6HcGXtOoqSFnIV0cts9WRwBZv6Fo3tjrOdDbU1pMPLVu9ssSEbA/k0+5QmSV8DIGULuGq+pSyL+MmTKjBMUpiUV5c9GDewrM5YrfNTINcCDBvIYiGN8SR2LLMLj66tOvucmyZo1PO8lg5Oe3DCE49g4venJ4snl/uXJEzb4+2hbx9et0ZiyIY3/IBg6WjRDrXk1cLzky9dbbIshgohBocEYUk0zVshbowOI2Q/tv1DRHEwD5S0XAK6Lb16w3GlmnqzavXqKndoVaocE4mjwbRhFrRBvFy4XIYu3hxLygoLvg92EsPZjYb6LJpslNNEgCBYOgwEzIc7Ga/GB6sXyZCzSxWjjBrx1tyhdLUdgeytGNoJnZjQSWcXhC969myIFnV8akucpUFdbbbZhHa8lCNd9AzR9aG3luvLBn3kYWKMuc+M+jL9FRyiCxFWE9+D/MV9xHOUX5FG1AfVxxeyo/g53EomGLnFKPIB+l8SZ7fW6JSqHURopYqci21WFZ5U9L8T7lXQORPOCeZZZm/9DwKPCUXclWf53VJMku/cF/66Eltkpv7PNqBqZxLVImnFS9BSZ6U75OCiN1h8k5fv4nNfTTiECgZfBp2iwdkKNqvSX/t8ePH0Plnn7FhfQwxvffe+0av2WKATVg2i+dqkmmCEKIWuskM+okKapIa5xTyrqelf/MBl63myZiXDH7WxVz7Bc0APUl78qqoZO9zupHKJ3oXwc69J/oAvNwoCgQlpkkCJZrWcoD2lBcvEBJwKHRRRXqBsdJUNxq/tx9nB688kfxx/+9Kiq3DW3mCms1D6W9/V2ch4FUKvy9TG1JRSaVOa7rMkqWVRHfnrSJ1Bzt5Tc2z5BCFGw7TYo/C9AtnF/3j067j6v/R7/32zn4HK8wV6uTsOYVJa8Pez6GATcKFJ0wgdx/EnZkAn6jktYVGa6W1vb6BVu1v2kJbydX56emb12ei+Swsz0TDceKiiCWdTSoh7gM2V/oe3qWpSP8FysM38CjrnZ/sbBzAqsQCKNX4Cx1jKypHJ1rlzb2jlpDYi9f93puHD3d/4zd/9S//+qdipTswg7+H+blknkG/HhywVzmR9tJZTtSD3S4eSOcNT73ej5vxzOiUVF/5CeBZI8qSXSQYYVni0ZJVANthNKuAHl4kzEBJCijjX8c+V69cE4UvIISHJHMmhoR7EdNphMhvPvXWtx4CGP0FqMYXX79xyx2ahnCayOmtRFrSIpuI4FmyFNGKnsBfEgaa4ECqVju+A0p2vKt6Nxqdy/hq2qCfTZ05QqJsO6FvA9h0YjQcTuvQAJvvLpxuvJdTGUkn1i6PDYHTEyjZ6Y5jJ9HQyVsdRW9ZDlHTZkISdyvDglXj9KiFRjIbbcPR66VFcGvoeGwUci7Qw4qYTli7/cOHy7PbTz7+6PpYrI13xMDNmVkrzWWu5st7N+NrKB6pXLwlm2jpjfOpr0UOoV+7WRIXC7jxgPjZpx+rLoMZ8xibcRtdX7b5bHUFcTKM/cEFEEIpoSZDyi/Rusw4w2arQS8iO9E04w7wB3BNe0Nc0wbRR5lGz1jpL9bTTwxKmb6QDETSYoH6kQwWJMYqlMIYFnEuEgfOIPYwchuGTDRseDQhA8PyuPEv8fSE4VZihRjvtA/W8ERByVqSG0+kmo2e0j207yU48JAoYBrgpfpV+kb4Ame+WAzlI1srIo1OwyzUNLUU+AvIqDzJkg7jVUAfxqxluir27ZTMBctonm6Ub/O+3FhI6WQWAv39XUfINWHvs4dYtoK33sJctcBaTunrvLb6pNSSwmvy7sti55WWtyGcN0YWYZZnWXz8klJWGdXsyio8J+6Mmrh3fsHt7LLXn5xfWFcQkCkI4VlaIbbbe//Rj35sGk7evMGqfPDOOzaFRBxBr9bbZkt+EJJJX1pxfC33PAdZa1jvIoK5JUQsL54Qak97qrNeGbY526EfnvukTnbaWZIH/rre9Th/65P6MLnyIBob6nTyeBW2hBLYaE8AcZ4AQkGieF0X+UmT0C7lBBAsqDs+F+OrHJl1jSeY5/KoRcoE/rtT2lRS7UXNePekNK48qk/AqAIDlHdlznNiblIRXgf+Y0b2TQiwt+4CiHdrwQp0OC2XTOcA9Ydno7Kf+8XHn3/x7BXK/h/8T//xxu7G4sq0PziZXo229lqj7pAHBSql93CiXs9s1Gc1ZnfkEbO+2GwtOeN1e6/d2d+0D3SxIf7y4ss3L18/f3Py5nh1qbm1s4fxPz+mHer3umPBwW0XCttgYBNT0ehenZ2dHr3zEIIIx5+Vlis3qsAZ7QlzBDbfahYp6WrQ3t7sDs6h4N/7nd+gk/rx529samJpYjhwDtdSc4UnGyHRQEFMuP4yFBlEo2GCsDNuwEqw0h2PmOdvwYZvqUMwIVBi/NEaTQyPvsPFoNSIug8NCu6AByLil88z427KfWoEK5A+2kDrDYnn23Al6EGf65oM6pW0RH9l6w26eD6fUOvlZ6/78OFDKBjR1RFrKpFfmu3tvV2hZD774tOPP/6IpcbmaEHpB32GwyEX4gDz4qowVKCWcEbIOO114S9eMVwwHOKhkWhMlFwSE9DVaJ1ckDGn12rwm+eOOLzkAjtEmLrdiR5GSEKo9YF4eD0770alT52LyClt1B8HU90sttdjBvM5+0+zQYaOOhSRsKXq+mplNr49Pxuu3xpE6laatRInAWPMUjW53bQzghvT9ZhodxEvmwQJG9NDbiwubDI6xGfdiIH9OsKsesaEI6FkXfBn0HM+iQAGUkJXtNnYauFmexuZiB77jjnQi0WGpOyFA1cUEUHRmdACePxL4r0shMcaiTPepOAmLkTRl/HwxgOs0M9tOhJMvCq4rtFEnjhfqFSh/WHvHKV1gE2s3fhE4zcHDONJEx5sHugoqQDA/KInJs/bLxNGk4KLD0pZxgHZgv+0W9uJfXVhVyrlczcUSjIHR8stf9j9IgosrUZ1krf+waS5+gToGj6WPJXKb1lk1am0cFyuNszIRZdQ3QMBOIwis1oyIXfJk1JaloJnCtG28MJRaICBUnMKzVt/JRQ1LS+pfJSnKRlplgqBnpdZM5n+kvKyJk9KXfX93TOtVR1P+uhDsQ+MDOUodLAZxbxj1NYBS2OVaB1VbSmN7t3RjVbaF/3h0f4eMr+5f/De03e+/zc/Jn7ZRI5DpIJAw2DQwowu8nGwVkGARjHUWro8fIz3dY5rYyrC2RDdU76xUUkUyfG01OT6EOiZoQxw2oD6JL5XUlam0A8lAEo+974Or7HJERvOWY/DjmRNbu/uuEF48D3iPZ9edGHSKScl+2G4+Ii2AtncGdU01Se6YYhAkefxHLqbUIXb7l5rrDBKRaZKw6kN5VpmWR6QlYzziQiclPv5pXC4WqU6sAToQo5Kv8Ec0Ay6VSwlco44yb4/axBExG8nwCWByAW2nI3VxXX81uzq9OTs9OXpm2fPPvnZJ8SHJ++88+DJAaA+PntjP6hwCt3TC2gm68XhEotMOjj2sJIcoMybvkIWeFoa4MiXHFUub0jPtzf9l1+8NtqasrR23XKO0UrLUTDs/CZ362pjQVAE0lkCKIsXINANm8HC1769yWBAPicuMBSCaAIWM85M0NyLPn5mc3cXLet3TwlxzPhHjw9Is/+jP/yNi//mX/WfnUwm/fXtdSdr8b5bXGuNB+eCZNyy0IBUw5EBAyoYMIIW15Ao8O7nqAxO1pY1HmeLIAR30euYFDgxlp6wUzTghrIsLEQvIBfbvpS4aMory9DXeVRAEC+OAp2dde3OoHaDToM8mxuD/mRnh42zxLeJlZ4eWcFEpyE3pWzsEb58wgpFkRbdo4VglqGop08/+OavfNtBJzxK/pP/7D/9s7/612rd3mX7z0BhFAWUtwMNap74Fx+5tsUw6o7Q+cP2AwzFuE9ii5xHhOLRQHF72N6P6zgXwOE5AWBln1+kGqnBr3d2Dt4cvwrFZQocxf+Fwi3S3eU0RoBuj18+zhTY04vv7m5HYZZBybqEdy2LAOTNwmarc9taFi7gRz/+wdHW/vuPn2wdNRpnOYWH8YBFCgNueZuXur7UQtzqjpYFyZl0r2eLawN/ppOVX/3e2cU5N+MTO40TXdJCLGqgILmrdnOF46WJ5KUClsoOCu4qFn3TfGhk3F6IotMphmZJWIuCmX0ZtB66R38QqyT9M6YZPTbsxlxnNSzog3SlkbiLBSBNh9QhvfFTlzxqzjiJLUxtX7yyhetywWEA5QgKeJFgxQkICmE+gCUcKTD2DX1r87aJqpJHDWegkEqPIGJfKoirmyWZdkWSxiAHVcDuUYO6TcwtuGm+p6eQmXCqIT8ayLzeY1MxJMi4AkRGmyBsl2Y9XHb0+FVhCIbRrbBLYNu3WbS2R+hPIfX0JzFqWTzhgYJsJHJd5DSaAQH3FqL5tchMpL3oMIHRL+gv6C1EKu0OvyxQI3YtUqGsMX5aYUFLFTcVoDEy3I7DOGQpOaeDolZ8PiA3Q+TZjyaKxj6g03af+4SATLWK5bOunX6mdyi0maNKw9mBWuQia0yFRe4mYn79ww///F//q+WbWQ6vvM0p3OyoGbcZR+NQnen10sH+niCVXJGpoF+/eAnaXp6dMX+I8QwpA4e09ubmRz/4sc3/kjGhu39wdIS13N3eE/JSfAKRfbmiDmY9XbL1IfuxCmnRO7OvX1klfK8sgOV1xmhssB5WW7duGhbUEaXJiBurgnM0TyrTETWC/rK/gfLPnz0DXNpApXCI0m7vCZMwazAuzOwh2tndLu0NvjK8Gg++edMpn+LICUCUMMgZPYJxgLxMH3DTPIBmKky8LyNeUKSBxcXFfrePnPI68RhhTeGKXrZCxLXOpPqQE4uGikOqv94CpPAtEQ9Cm2ZhovBtRI7FdR5PPC6jabOlcW1heU1IA/pKw7F8PW1crRyuNR6tr1yevVi/XLt41f+3f/mJmBYffPOrf/CHv3cxeEaK4hwjdAwYsIXlctjTCzgFjDurJkHYjaWN/pG/qekIDbgnG+aXbEfsT5w6keBbNgRaj1u7bZy2A5Y3OzDw5tn5usEx+K9fHR8die69ud7cPD09ph168KhlqH/y8U83djsHh0eTcQRriqne6YW1sCJeSrOpP7jDzfXGpYCBi2svnv+40d7cOXzyT/5nv/t/+y/+mxcn4yVRD6Yre1sPbpbbp6OzKWeu6NNodVjJu7qvMYbbWQ+T8YifH4YdNjfRUDk+mwDEXI9/UjUOV86NdtNWJE5vMC5lMMoXadAOSqdkzSYWCexqvq74SK44VN7A4I6EYxRlHp3NIoUkd3bjT8iBI+cBqmowFiN179GhczEfPnrU7Z0OhuejyWBl6XbLaJZVttvaEBDoYHv/vD9aWW6qgH6Mook7Q7OztrXbGE3P/+Sv/9XL45/vH22gpjZHhyVvoExbAlI5OJBxzSH3HMCHF8ONrc13H7xnzY7OJ7a1Hm0eWST+ddbpLWjxR6LKbu901pezZ2ObN+fGBokL00Cj5GDCBw/2kZnTCzsgwy9OLgcoP3qX02CWFh1mbZn4sNs/2zvcxCny2OcgCmZ4UEJfUUCMtX1qH1WjeWj/OPeQnYdbjxu7rfOF20/5FA9aTr5faeLqnHacvbX+2YTeXJmc92y4bYumsrp0tLbV5Qq5uODUEuLqea8LU60RBddXueKQ9CCHm+su3Lyzu3lw+PDlq2eIE4uyhU/bSWIX0rez4eiQbJQE5JYPv4zNVnZnFxhIGDMqUQ1mZhTIicyM+YVUTGPkVNLi5RUPz83E6mdO3SbsdbY7Xjl+zFoQiHeJw//xmKsJ7HT2qovm2igr9q1ovxzc7QFobXYENsIVgYzgAiAiFSxU7ksI8IxrsaVbKsHJd1wsRCMFb9wl37oNCpPupU41lo292JugQ5/kHwRUeCiEHSmAS2jZSwWe3isAa0tkhCaVW+vyU/eiMfRpSgmyTguLQjJ1p5KQMrNen6c5JcljiSAHuWD0YcxSRCoKIots9EtJ9wp+TvPyeUn1LoVgE4vLiQJqpRpp34RceeJfGQcYFl9qJHXcK16rlqBgZeiwvcDDk+ckRQ3DuiLRcDRdM5TJ/d95iKbFMp8Ks0MCn83ae9tNSmPKAAqCsp0Fs9s9P3/9+jXBLYUXVj1LezjmQI3zucVdJAwuFKAVGQB26sL1izMSrwLTr5deI8NcUK0UP6H50r9wErpWO5hxStYAQeleLvBt/BIdfnjJar3+ta9/oCU6eHbidL1TZMxi+5sf/aTT3tje3KRdoZVWhkmRp3DcEbDQqAxm0e+Ttu6nz5M6fcScknCRoEGn54y4RZ7mFQkY3ZFHI9POelO6VqcCjxFaoa9gxACnOK3HFMZF0yyjWJZGKDBvXd4pXrDOt/hkq5AGW2jy6611kaITxg/T7ZBG3ML27u5v/97vMBZfXo2WebqYPFgbQVJ6+KpwVkWiyhFGQu+QVALzeR1v4FhI8De2wsSqlcPbon2IwzNaecsaIiz1cuNme6fxRd/Wl1cPDvcMQHcwlMvWH6c0bWxsIwUGM+MZ3QSLf3hzUKeuSPaUrVhJnC1DzO1liwcan5GZaAarne2jX/3Gu2s/P/npz1+vrz/E+uGinSw5ENHckQ4J6lHUGLhxrGQGVYooW6c7cF62ixZOBp6PkjAM9Q0j/7TbvV5vX7LPGAIbcMyqebHgtN/qzKx5kiDWmQwVmMiAmgxFJUvvbYMtYhbvS3YUhfMRGMFoVzzcoEUBf/mnrFNLiKuWEG9XPAFiGXTsL/jlbRSllu0fQgEvCMj5/e//xU9//oOb5djt9x/sbO5+h2XOQhJpq73e6rS2OKdofzgI4n7Z70sSfvrovdNzfudcRWzMwhmTwHVXSw2FJdg4O7ngpYtjoMjG2EdFyNOIe8ugR3rubNvQtehEZqBmkaJe4jThSzQMU2IwiWIIM4bv4GBfe1iPYKYilWJf4+u93dyCHwnM+83dSAULo9MB39DWegSPLOAcCW9OQE7ZJzScnC8wNtnfyMiaOOhmkr3Y5j2OIFAQnn6JQhqoD1DWRDagfseFaMMQqua3QBq2ItVFRwKIEGZzFvlKIRAJ6hLeQqNirMrzgiLOSxQYc6fx1rLn+oi1Yskb5PyRPqhHrpQcffpo0h/GVGYJEHQOrpsPrlvtS7tEbVzDfsQkbByyM8JmZlIayldOysqSMvB1hQcYS/KzpvrTFYi5Rk0dOLt7XNBxfVW/9VUKvHseSKT7gKkDqSmhphQzn3KLAPJUfoHfWLNln7dHaXdf/B1/aws1yXhBq6hE9OIaWT5yb9QUhpTdVyxnMZrIXmpUZ1II9n3r8qA8frvKLLFCApVZpMFUlEcZkSBfH+XnfR4lk54JAshticCEn/FJnUifurEN3lsIEhkJhodmimrTYGmkbfN4rRyJxFcTHlrOKcOtBfLV9mA2CZnAsTlTICpjWGqBx5LfObFC6IRFnkhTrrUoNsiy1HJ4O31bwhzje6wzq1apGe2IGTb+LEKl7Ns0EunOfZp3EwIoHfSzjE8dt4yT/pMaLVBeI9rmGFu73/383q98O7G04xB18Y1vfA2YOmQIxhWl1CgpHzS7Kg2utTYigHIOwrzh48EEJAdwbKxNbD3ZNMtnGeFwLBpTJ6lASHkYuPEV9SRUD3vpmvEEHaXNNOOi7eUMH3NSbLuWD9esxcX1Ju8bpp8Fx8IVzE4XEzFigYC20GmsXV4N15cu2ZxJKZz5h4O+Azs+//xzEQqevLP/27/990wla8d4Qj5dTsAb00gvsJA4I06gg/60SmepTALieOAQAhuIsuGGJkrjjRIh0FLPcYLOWLTbUhMgGvpHs7Yw3dptnpwsnV28fvRkD4JF8uFXeFHALdtCmTdIEv6BBDYGDTWeWYZZCBA5cyZ0FpFRDDbH6jrTiWfB6MK+2r3vfPM9J6a/eX0hZHjEuiXbcQiCgFxIHz4wRl4baCBstzDA3B7gxkQmlQIJsc1QnFxutBiqWgbg9OK0zqzxR33zzd1cVFgCbb7NxJe587Cm+5+kT8MHwdHKxQ/YKRj9kYl9sL/r9JCR3W7d88+ff/781eff+tbXnz55sAZR2100mRDKYToKD7p0pBraoxn0MDt2MARsMLOl2S0LcRQh1AxaYk2hL75rLbQcvuEIW0OH1Us0kfUtG6j2Dg7Pur04KkNhAMrHiuPxRqsH2XN1KRp46ISEki3eMZ4pOO7vnBEkI8AfPXJn9JlbXP4K7AfxEi+sCzekFpKWzCDBT0MLyGMtYM8K77fGSmbB2p1pTPihXw/XoPjxTWPkPJHsSV3kU4eiU0BxH13d2Vgbt69u+o5mmFxeC4o8o/S3zpdsPR7BdvbwwRozZz5f4YQSIlfpNjQLzs55E+AABNNhCZoIncGaaVge8DW6RtKCvupyM1zuJRmQHw8De84NcYTXyMnLceuHo1apDeKAZ9MufYUVfWWk+ImRIqNMhLOuOC4aQh6OI0GBr5ccgw6zRL83gQcx63xrI7PdAY2b+3u14sAy6gAgcDlHT2luRIf8vAOzgrXfMnS9DXyyKQT0aTHuCV6ND68Swp3Py4SLcLtqsZpd4g0xpyJpT54X7Jav7n660ULZXI20a7KWlOelaX7JYBx9bphrZg8Blu9KFRqQD93fp1/66bknNdU8Sqs3mboyEPd56is1EuJ9phJPNLTS7sBraXMG0DpxSgQl8Ux4LmfEQV4xyYoZnSAs2C0sH528jTnxl9EIWGcBn8J3a+fJk5wrXLwHM47LS87feff9r3CTZR6j+lnH5QkeAQbILEI0aVC6GBUZSYFKnPRE15Z5ReBZt7BcwkbbX18MNsleAFMHK96R0cP5sNbO3w2LblZ4MOx6Z1lutTa2tzbpCMAuz18bn188/+L3/8HvMczgLakDrGf1qsKEKtbnvg21KxGgywNvYtjQAMmKQu/LJIVamb6sIWg4xKiwJh6U16XZIspkN2WR/9LWOimuYRjL2ZXWatyeiaeIj84PrMNoB73OeTzUj9QIN4y6iCJfZOz6aHHtJn734vxdCz7RG13PfvjjH/O3+D/+n/53DiiiXaR5g6ToNvgXYEAtSsNqHbIfU3UKPUpg4haA5MaRtgAvR6pEg2tgGmlesbSQRbHLr9r0GZOMt2KkzThRXo8Y0h883v3058/YosUK4rixsb7VbDccrdda7bb8XmHWXnHg7OXSjaAmFpSSM8M4H1BCqEPD7XGhrnPGOWOH0N8s26fPW1uPvv7eYf83vvFH//KHnASbq7Pj8TmRnD2mubeZIc4YllUMogtD4GqaTI3yYWY39LcUv9gU+h+ChV5SHrS3NondOCTbOX0iRSzLN0EBgKvyExW8TYGK6tWNh52NjV63a6uMGZ8OE5P38d7etqPQF3m333TPXj//7JOnDw+uDnegV22w0EMBEsau8CURL+kygynUKd3zo4G62c3sghZTyKhELHNIhxnHPUDT52e9YXyvLx9TIApC6JAx1IkEDFpLlHvOh1XlpXQiOzgFvfbhKieig4R+xJzjAHgxt0SiQF/6qFfkEYRnEScqJc6ZtaDJTlTRcXBFY6YEySvYX0kO1ZjcjsnEF4NzoWeb/FYg7VMePGsbS0cjLjVB/kTTpTbJJATherXT2n5wsGenQ28BWesPJ6PGyqUDEUN3bp3HaDU7jRovQ5liUBiYaWHL2+hvgG2Ww9ICIxYGACaApRBm3yJC3CO5KOD55dEHDArihMoWoTC0Xx1ksjqPfurC5lY59mxv20DhtIajbmcNbK9wVpyMBctsLI8XxY0XiNG8+9wRX45iaTTtcl62g55O3xZsGoOrZUjKYqr9CE1K8lNykwm+uy9vckEX0J6CpQt4lQwAUeaa5KlfueZJhqPEoytPtQYH7UoMuC/TF7UmbDC8Y9BLXHWgXeoJQ2r5JThtnNY9zr2pr4QWDgiTDrFKEITpD/0rHmu1DYYM/Ie4UO/4F2G/pPBLX7ZW5rd+omfzVx7W7Pdv3ailVMdK/+VXnudViTZbq84AlHHQZdghLSu0Kzf6sba4c7DfG/ZiJlheGsf1xSlvC2Ip8hCj0wjOi0Mto3q0PCAYfG+HmUqXdcn/cO7mzuoHzRYTOiS1t7P78NET9sLxhIU/2hZIwghEj5wPE2qGropJ0hm4sQ6iFuk4MQuVMnzptWaXQcPYzyV9tUVJ83cmHCfhjCXPOrPq0NTE04wsd7S3p/E//cnfUFee96lwLk4vzvD5RbRNacqXVEeS8FMtNtLSULt6aGq5OCEGFmEFhgTJCbkyfKYx6LImw+tZSkjPIhkkfxnqwiL5AT8LMpNwgt149PUhMzMFYHZ2DxUIP5OwTE+0c6FUmA3RTlnIgBrsTi123eQRcjvFD/yz/+Kf2TL0rV/52uHDfcFxXr76zDoksmLwJo6ocQa9QY1AE1FKuww65MlCCU9rJQWsEYhOLT5jJjnUla6G4hGWu14YC5jBEL6Okjmfk/cEp7VloSnWNndavA+cqatw/P3u/uH52Renp7YEOYpmm7rCPIMBq8GoYlcyiepDJTHe9KB27XIEvhIDNkK2+Tx79TlXgO2Dr3zvm+/87IcfjejdRqe3Tk8nQnAa9imGPM3GECio8HZRMRZGrGAMw41PWW+ub5VTg/tDNviyyy/kyJqL5sAcGc+MRJkjUyCl+3err6ybzJE1auAqCFoUKBU96XQ446Mtim9jcaF7/Hpzv+0Qv9aauEONg53t/e0dWiXcP42RZUkOtOuvrHHk+RJ+K/gKH4iLZFFC/oxRQNlOEgAjbGAMMAjb7ZSbRLPTevX6mKYdcbL0Wlsdmt9wOWJZ0VeX+GgkWn+BmXWTCTbOkbE4Z6CNwTF4xPXFtZcvXz99isIeUPFxXyBnoIpnx2eYDWjIrCOydBzYU2fumi/gGqCJXQR8RLVOPcJdotNiD5udCKBMcdns3A4ub8+cgrKxEEMaxbrI7ukWzA4nmn6air3d/XeJyWerw/MXo8t+QBg0ZFCy3TMLI8CPc814McNz2zJVdA/FrQoQxSIOeHAeJj2TFawPCWHk4htC22kq0aqQ3bhUzCNup9/FeUrmgiHX0D9yqgEkJIIXp9JkL4FI82IyNSkhCU03w97wpjvBNazyOUPgF2gO1kxE1v6yWE2Mok5uXl1uRQCII3tNBWhy0ZTAXDkNqCDWPLzLFZyiY/pTU97NASwP77O5qRmCKzFT0J0BIUVWKlZoQ/0wIpcS80FMIoHlrOZUJLn3Rs7607Xe16tcWR13lg1v0/I7cuVLtSlK5prfW+rWeh/8nJaH1pY8wXdvp7xOhlxrvblzX8hVmQ+hViIEeOtDr3KFUCmkrHcsNhRimss+NooRA5H1xOWEvKzIVeTqoM92TZhP2LgxZQRohWYI0/Z2iOVsrcPfmAu4omYDHHAqnQ8fYTpm/wwfd+cdAUCFqmbM7DgSdKPZSghQPJQQP0xiODBKqFZkLtQ+Y8osglGz5mLnQC0W6YiFR7OJL0KAolRkBVo2OqVq6ODtwamDZsb0nZ03rg/LNtufvfjo5xZN9EJsQOtsP+3ZqP/T1y/e//pXbxa3j09eQxjmUFE+RODUov3GEC+mlsB1CWyhAZ5HIHe9E6R0pxCh8DcZ2+I3IaPGACJg4t4XkKNknvJcKmCJDSSSwp4rIaY4gEy/l+cnp5jHMTZ0cOFDm4UW13lLXLc71FsrM0jMKTtCVlNKEX9vZx/9/KevX798+s6Df/If/YfOOZpMe+2NdeG7OcpY51HHOZFN7SAefjCxTGA5H+B+xwiVjRwqx4z4G/aYoCGuHx0e++Nw0ldOtFP2zw0nNsly2KFnckJeZ3dz0J3R+9DlO+p2Y3Nva+di+KYPDyJ248thgjKFSiyHZs9XDfXdEsVjoVUBVLdUzBYKlWt3eMYABne1tx9/6+sPP3sxmA7f2LM9W7jiswrzh6plLWayzAKuJ5JRYS+VryKTJTGgQXXkA+GF4nkEnEqsZCOPBohWzpiqhEw8xuuubWAuRZdkOMrqyY8yYwukEuc/KQeR2Om0v/7eB4+PHr148WzlRvCgW42k/WIu4v89GWDviQJOpMAdRcKIkS5tRvWXCadgG0wDMuoGb0ZTx6yEFlR4A6VlMm7P+mfd/sXYyYWIBvS6eNtqtxtcXpqrmydvZA42v474CMyo023yTauBv1AjpfWwc4IFLeToGYPz4sUrmPrw8IG20G2CDeFpALA1he+E3I2bYrGhhk50CV9ZcT5EA7wNxZpOxcK2BOwLU6nTRrAweomFQ3JBsCjwcLqNCGKimAge6ZNpl6PNoSDsa7d9Ifu7Btno43mzeoI9I8PTL5KjmnFxpx2E0cOBoOkzpqMMV3BaYiTqsOmQO9H6YvlmkRIT000MioQq8+W+rmX6TAs/bbZPcC7+ROolm1nmqTQGD/TAnjRWcoHk0fqN7FS1YuJ7iyEhY13RydAc4KAnQJOzDK41XAC1hw1qZdGq1U1ppaojrEAGnqSHwS5Z2AGluxS84JOCrZKh2Nxc56mUVvPWMvVQCV+CZ0E3EE4xQRTV0B0J1LH6oTbUWvJhIdquSnOtGWqTys+i0rK0UDttrbVDGDUjrkOKYjvt9JVi67f1qrTStHxXPk2uFBsD/pxevp1fOVIlw+XbFCh/TYqX2czmVWmAzGFw6kiWUZTBNjtgCiDe+eBDxm3KnyyDhNCxPccZVTDo9POff6IW4UcpvphFsYuYPl0BsXUbhDJVDaSqBhkD6B9FErsWsRmbKQwPeRtrbwFzKWjYim/ZWrrU2b0hKFKmo+fwWg6M6/V72FSt1fE6zsrXi/tZcC9lKO5hISwhAsF9w2Fs1/YHvfzkk8lAAO9sdoYRYKnmlhPXHHj7+OyNzfuvnEZaEB1MR/jjxM6clNhLqGt2+rCwM6BQnVn14em0BNZPve5iaFM7wcrUalZRJ7pqklR/yklaymibu/Kde4DDPwJmschJmDzZskZ8gUcO0buc6D5tjEUl+tDqypXwfP3etg28yzdr7zy0ob5THCxF5/6zP/tjThC//we/0xYKoZxlzzTZ3LAdlYdnvF2UTGzL2saNJiQtVRwAQMSNqyfFsFZGNVuJME0IeJwAQ97wrT6H8VpMWDerogNQz5CgU9biAv2Jdc/7AhrB2DChbW0fjM8GoEgaX423NoVI4Gtny054iwyK+pQb8FMXJrzgJv63EWgpYnhmTi/OX0wurz58/8Ht0tmb41lv0BeBaGk1mNT3Gd6I6XEZKXrujHNmwVKLtwhyzItkqds9J0ZAQ3xPyOwZ/8KOYBGQD2AcWhVwyucpoaQ0sCTkxeuCNjU3XKtIec5QAbwLq+vjxZU2p/+ID1fnx29EPhCTHqLAmmVzRjkHkP7OkfKirPNlYDQbiJ4eyQ+oFrVBgr47loofnHzs9kRrfrhRTmBy0toVSqfYsZpc3m7pU3Hy/GrHlGx2AC+t7J2eniZsq8kR1pF8lyKgdbJUu+JrwsfNkMIgLubMZ/atHb85Vc7DR0ebTrEg/LVt1N0m2aND1r5sNixC7raCMHotLfXVSeEGcvAnXEjWWvTP68cvjzVvUalhjRfaDhhuO9sCwcjRmo4BFmeC9rLBJTVYh14zZyfSoa6vrO+2t/mw38yGWBLa1EVHSimEoB8cIsQTw/AlmZ6CCsayvMy4lWjQ3KPBGmkTlcrzkLyR5ZJNA7BNJaV1fwXorhRDd7JX2qI2/NFkwFKYRxCCUw8EZfWqidAPAY5pmxoUCeKebi6IdbizMuHp43g5uyCCMKtgI7+6srLCm0bin4sFRrYmnZGgzAJIeYsEeOUnkIsW8y5FzioAlzyFEtTSCpbO1x4aAt9CwhL0DSiRglpAWVAFK8kXHZ16lB9EX0D6y4o8UY5rbeF9XSmzQvwdEao/LQJ1yJyhKyKXa82sZPBdVpxcRY2J40qjvP870l1Tf+GVBkAQxhGEuPoppTr9LcJWZOusgTBKwRCJ+jp0DQ2NCL7oJAh+B7w+Nw8f8Y/wmS/4D6+ZXXAsa0zGt+K+xM1vNOYgHk+yyPpr3DkdH6JhJh7YsnwExReQcdWwig1YnJdA7oByCrKIDgovr+sgFTj2B73YnOkmNppahdk56wrd3DU+VLVq0X7QBxI80UdLO2NbhuMtcgWK0Eay1dL6wjJjt+O5eCrwg+Zlz7oiOoJzmB69++6bFy9++qMfXZwPbXcF/2ZTmw1FRgz7j5cHJGUqrSc1Vq7FvGB+zReMGT1uBKaoJWW2Dai0JcxZ0Cd1X0Gkmca7uVC4FIyY6/y1CeJNjL7CFJR44TKLeGelUqoo2TphTu9Pes6VH68Pm8uN7dZqZ719ezk6fXPcvbj4wz/4zQ8+fBpT0awn0BxfjMGwz3oEBWpNwDwAzj4ihgc2QRO0r0Aavj3v0iR9jJU9Wxt0RxN1Oaq/OJvYeG0P0HILdNGyhpajH2iM4xJtr7y1005jNxFgUX5i1hZUfEQdNtrlGuJNnB9iEcy4MW1D1whGQlNGdw1kVBb0tMRhzRTcjGZDUZqOHr334HCD9u7jz5+X02BW2Z/MQl1e6UNZa5otWfhSvZEHHNIECH2UeLRsTiS2cpoMt0Ci4TXDfmYgKXNRyil/6wW10H9vMgqSTCrTfshSOMW97a2T4aR3dnrQbD86OPjzH/zFh9/+5ve+/Z1Hj58+evepBbG0dEoGiFM8DC26kaPDHEkZt7coqcAwYqypBRWYs2vWHIqN9mabVtCWVruRrKGEclgWIbaVzYwzAvjSeDr4/NknPQLowHEhjUJX4h1AOACgZoSUY600bIYNvdO3qtC2dpBGtp8JmyJ5yd4Da8T2eU+E0sDKVKrgC+OmWAuBXIJb2t/fJ53ExyRnPo5t87cn1+LACoh8aDcVEOcPsr7AwX6LaGKLC7unzDSJk2xg40083mk2MS7jpavZWpzR7abCgTo1etgfh2ITnLOFCgAU/s8+HIe82AZE5eN8ALyiQ8xj6QCunmUlmghPtJZBw0MNM8Xa6UbjzT4q5W1WU0nyhz3LxiVnC8Y83xZjHt4jZrHuzmatHGzJ274oA6k9eazgxYKf5EFpZraUhOKGIbIoMtRs+MbQAg4NUC70ZDSNI9BXm2vgpsCotxV6PC9QFeIsp55UuV5OT7RbBp8UpBMq5X3hzaIADIgakaLR8hXMguX3MCVHyW9IoCT0KWtdb6unQNCZSqN3j1rWBymykNkygsFCEHba450UrTraw5yiRn5eEXE1CSelVZjW6Ft4mJSpwL7rGhBLmwsuq+1XjCZ4WJPCPUlnC512DYs1m/GCe/bsGTHCPMlpABVbUkwyjx49sk5evHrpydGDB+7liYosaIlFwf4SfuwrRub561fTm1tglQCCw54YAHuYZOc08liDqJwKzEdgNH51cmLNqQWR601H4iUDcYAHZIC7WoB78/Ly5PiMViFb6m5uZCA1iAKEQjFGNLY6VoKwOIyraEpnexN27k/Hr49fCWbnLG7bYpBIMKMWnUJoC6LMCVXKNw6umd2Cbuej4zfi6kCA2ZVtILZdGN9IbIZ+eWnQH+Jz7c84eflysbny7sOHCwuvznu6G46pAowxr8o7bCkj7UX3XcsAKFrz2E7oT0fEMetsbJlTQMKk42yeMvsx9mICKD+10FT6qW0mC3vup3vsmHbiH1yhLtmo2d3v7e5aiM+fP09UkbUG/3sBRjAepol3rV3YmB8WhdvJ9cHm/iL78drN4/3Hn/zoL//pf/pP/8Hf//a3f+WDz7/4aHn1av/AdhTyKsmolb27VgqTSxwFWfsF2V7FaserQ9Mj3ABejlxhoNjnUBWL2oAaZL58REHWD4u11dwWSFEo4J/99DMRwOl1QTD3r7UEUGxtHG1//ulrpqJGk+4Ie7DJOjK76p2dn27tW/+X9i2R0V8+fylEP4zsAA7cKscLgymKKTKsMTz+WGfMQQytDsTzc3nt1fOfLy06nKn57ntHl5+ex/vwarZaNlfVwQTz1glgM/iVTzWJNqXqzsvXr+psfvDBB2+Oz+JkUc6f84kKs3CsqcKOeAK06vQpluUngGRRaLoUupWgRG4tgilDB+X5evNwb/flJ6joje1W+IY//eM/+e5v/vrjd94R6qi54eQdBpibje0dI8oHiYel/fWIfBQqK9z8hpxyeZzjv2Gi81M+kCOQ3x5PrQLYJfR0Nju/OuPyIPbF+cWJhl3frIzG193e8kX3WNdAYOaIreh2Rk8lWUpc4VgNvRVQxjBaNXlRxocIZRkSyMBn6d8bCA5k0i9sbogueEWEAeT8/Y6OjgBqgHx5cn5y3njQiIA0ngmQIdA+mYy6CIRjZxk60dT99k7rptUxKJO1pcvGooPNNmzQXDhdFFU0EecdxGfnFyS+0BIB3mHAi3uO/KR06+yQv9AUyDKbf8UPMMfOkRoO9Qp9QqhMpakBkH6aJgjESBSyNIa7trY6bNAcvnQZZbXcrBfZSNXuLTizRszRyM7Giq3WAmZR9lAhxQ5oV6WVML3C/8L+/A3ABGqAVjVyNLydBuONqw2st/pPX5wJu2lnsVPCrJT2ese52NYCgSyY6O1kTA2edruRyljnWm88KZnzqn5VbwKRBVPkBeC7K9EKvX8e0uSrrFt/8nmhhqFP5U3eSppbasulgnVtjDHytuZ1LZ/7rrCQobvzOhGzysJZJyUGWMqRUkihpm6k8iRUFmlE4bQgyuC7VNuc3+7e6ld9X8qLgAw0Kx3yHLwGyDN0y9ixTz75xFsTAgWDS8pcyBEEs0763LIiXeGYIOY3x6ejq+udvR2k2fI8G2e1tJ+22Q1+9snHo+FQsBRl2t8XGaKxQm11NhClJLrsRlNAQYuECJ4YFvI8+9GPnr/44vGjp7Qco0H/ydbO4OyCW67ttwgmL7dXp8dGQmxp0QjPhheT3vVp7/x40B1ZGbcL9turXXfqENVRddVmzw1FIU8Yh8yYh5lDC55h327xiH0QcDzIEzf65hoPxg6c+IAcnZvb2dgbq30BpyCwJMCCv4i4W5I8PjSqKA5IQHFy+HhEKnHEBB+IegFiQzxVauOCNWCRGIlMIaJcJqsohFECwBH9cJqaYIaZrMlFtknac2oh+cRuR+K8+QIRLO4IgoWnFlgYWDiwiFMfJec33vvWxZuT/+Ff/JFTXX71e99qtUUddagPsaL6YOqT9VncUohThOe4emNqyMBcMCjo1yxR4xeeJtagjKdWubH9ElXQC3BSZLOrzsbuenNDuPTDg6eff/Js0D9//OQhPe0nP/tM6E/SF0XUr373Nxtr7QuHUzBUNDeKXnnydPMRXy2AQdIi61CM2WFtyCg/xVGxNUvMcSBEmaV5MFv0XyaP3k3DBe+56Le3F0eT0eHh5vNX3UuCxOUYDcBmh2c0QNHCFrpTJxANLniWOUdHEqPBEoD4onZbyJEfMdEVnaNlVjiefBAme45evE0JBeBivgh8J4E+JdCk4cnG3f5PfvyT7374jV/9le/81b/5U7uBnzx+/Pzk5MXzV+997WutrfXz/sDu2cZGC1gI8sM+b+RBMn0xNtcCtEzhYNa/MjOxgYBT8M1/kNLSWlCjnfzYI9izTE2c5SIQ2lBoCVN2xBh5PRFYKEEISePc9sZ8uzV8QBi+psqONgSvCSahdWUpB5Z3U9GCKiB6ecw78kBa8tyoQAiukg8FXZMH06nSdnvZLijuG3CCfihNYxBdIQIJJmRl5PmUSGyLtO3LtwlrZDqLPHk17U5GXBZ9uLk+bSz0GKDBWnOt/eABzp7bsJFYWpnerHA6ji2Oq3sAVhSuQplc15s0masPHz62Dx3dTe0O+x6PLQ0z0jvt6o4lI6cnOiKDbJaj9SVnIUvRfKT7/BVpGnPgSxC7+FXMfQlKWczbJmn5mgNro3GJfGslNGBH43DheryEahMB1xt8WpzwzDpAEei85blYUtePOtRXU33iWtPd4/yt2e4zu3n7XoZKO3QjPm+VYqXk/D//abmGIjIbhsGxGHCXfqR0/ovBgklZJck2r/H+oQmu8+2JpVYolIUXo079KvgiRkJrg763VFDkWaXVpiZfgZ48CXrL71xqGyrBxU2VJZlv3hqZtLGQUm0AW+49qTBXepe3r4/foFIYDU8Wjo+xJDJkZRZiML2eshAw7lvV7e1oxpSAep3wou2duef9ufXoaW88InEt5vhmPF0L9y2UQP/ijAXFntY65XAwDspZ3L6yX8QBPO8ffuXx4yfENaKDk+OYwajBYTRiGY7TicM0HgcPDtZg6tnA4RKz7hnoB0zPX7y0kXHFidwFn+pF6c6872UMKsuhqvSxXoPop/zv41uPGMH19qQAuwweNtkG26Ydp7dXdp5yCJLNuiqsNkSJdamT7jbmPZh4ZdneL4p7AUl53Idz4vwrmNviQmJrEmHqDN3QB2SdaJUnxra2ybU0fg7GkGHudMT5GcLoFS0oVn13ZwdeELYHVbENQIQ+Cw/TjQVhXb9xEJTxnE47DBcLCw93dx8d7P7pH/1XJ69f/s//yd9/8ujg6qZrnTnw9Pz0amV5x/AWrqV/m/CnDi+NuGljT5p0vTzsjfCr7BHC+eHA6e402YkhXlrPkBedD/EOOsgeFHzPcvOnP/7MxtNX3Yuf/OTnFv8Xn34BSj/66PV6Y6F7ni0NhwePHj54h6aHq9jS4mA6PnOGK2Z543apdyZS6QW17mZ7i32Fq7yNa9jSJfoXDHR7fdB/UdAoDJs1aHBJWzzvm+2bzt7e0UL7cnFrYXnj3/zZT+NAPjily4geErHKUPsoA67xfoFA68uVaLF/+MAsgGcCsWVIvrHyzV1OcDI7iTvFNzXMeyYItShsq/tgZAtYS/wrSDz5F1YHAmlOCAnO39r64rPPt7/+zccPH5m1h3vA+/FnL59/8ezZh7/yrcsBo91ofUtgZSeMzC4mA4Opm4IKiW/PmeF2YWyBF+fGEE1kfnnFFoGlfr9rVGGNmCBYQwHg7QrDI05rhp0kHZOlcsJckDWdjpnick29CRpNM+qoCzgDGtpWu2n6Ts6O5dQZogkLIi4KVBD0C0DiuDJuDw4PvYUE3JsFKTKrb8pZJIiHrYqAUEIAQuvXl+kktQZfbecfX4Qb58wzQ1xfvey+vhHceHzZXl7vtNoAK4dkDUbnJ47KsuF5sTEU7X/VoWos02vN1tPlb8Ym2tm8mSAYXBKFO0ayhUaL64uRIBqVicXYmN8bmM3Map7W6oLp00cN45dvRrwNrM+EJM3R6npHNSgP4d1BykDaqcSo78uXLw/iaRIMb5WERRLDDGubmcaFM2wytq+tGoNB4ujQ6DbiLhhGpwS1BSv2X0EtYkFhIAuiNHaKq1CoaMk98HR9O3muWeX9PI/P5pnLZNy/qjfJXPivTFg+JHRSn2fm3AMT/5UUslWSeiHuMPI+ke7LCay/VZcSapKhZoNH3XjoW/dxdS/5U0p5Xq+GXwZZik5E49KYLLzY1gqxfKsPXpAC5x+WvudJGRyF3z+vTbfG3MhgjwXdrjhPVWoGtZ7L/NWvfpVQL5snEkjFUJvC999/X9hN1urnr56b0MMW9+9dy5v4hUuyuZGTElW2hW77gw9NDJvJJVJUxB0saeh8sXCQv/9X/+v/+O/99m9dOfdzdwfo/8W/+FcPWjFQfPqTT2yzZ+38zd/5TQGcnRvjIPpz4G3X4UWX6vhg9/DNybkPVaQXNdUe1S5jKzws8xjpqqZMXE5zQAc4SbJ25vTAuPTcXLGLGGUCZGthTZslitPHD9b6kxcok5/GUCHYxrwTCw/V5uCE7oGNKHg1Nj10v8qWR4PKZA9+4BaODDdh5ej3Ukj46Bi1UgxvBZtCUnYx1RoczynEFxetMSPmE3LAg8MjNIaBxyEswswY1j5zCxk2dAYZXKAGJcw93Nneaqx87b0nXzis92f/9nd++zu/9uvfGg6/sFkOoNl31evPIC+H0oqBQSBjIV8Z283DxyU7u3osgd2+aETFEGWiGJlIUlHBbTY3dIG2pNXs4OhGVLHTG0QGSHz80Z999tmbhw+Onj97LbTa69ejnc0tOM649Ads/QtfeffwwcNd4i7QkGHhdjBatu+KC9lYoJpiO8vgOWlpbZm6hsST8Ejri+tsSKMSqIy5hVebRWahoLScBHHabWJHe02Iucn1wrsPd5493FLXZObQISb6zHgWf7F7ZZzL9OmCIaVk0GzsVPS7MoiMYDEGVjBxWeJGnVjvlylEvJiUMmXpQPKETbGmqswNHopfDQyOUFn1PNK++Y1v/+BP/vwHf/393/+t32PbR7E+/PDDk0H33/7o3+6/+1CbR8/7zS2KrAaGjFK3uS1U0QZ5Q6hyO8JuhvYVqUEtauNUC0ti/BvTFQHSkEjUnMg+sw1yKtLD6lIb80cks327iBTwdKBraeHo8NAuAk0CT4XGgM4r7iWLS+saa9s1kCtcyyLJAza2VOFxCBjsEbkMlEFr0s1x0IXRCwbQF58Aftif5hDPJA8Vup+XIrGtrgpVPFuYGaKNZptcdblwdTHqOu/qhtlqg0FI2KiG4OeXLSidN9OK804Gt5xc6T0vnR9l6z9OlFJjed22vuzlwN1OroB6TMHI2K3N0YImCR9f/HI1Wy9MAZ3Fs2cv4LMSakto+VVt1hGN5NOoncZBF+SXTLUBkUGzFRwctbgYU8WVEJqn3mYXs2XF0hQ1v63BjIxcgh3fhcdNpAP2Khg/2hLBDBv06vwfKaiBHQCKJCBQslZhc4r9POCXJe2qdP/Pb+rT++d++qhMXn3j591NMETelxJkAcql57mxUpURXF7e+uunx5iamj+6QT+LdJR1UaAXY0xBF4qRz2tetVgy4ZldMWrACOmRMAT5U/QJMW/Fjlg47kLPDI3FIifWR56SM0tFS+YpdZbWl0vkLal0SAa3GiB9mfmuI4qqgFjzuNpXwKxFzAe19VvzVI2TmU5STNEVhNLz14gtPTudtc1bFi8Lf4t6ezVRtkThw3Ow85llEjxNHf3ARquzI+5OOWIHM0ttYTcDNF7klJuL89O/+LM/fRWm5hAIfv+Hf9nfPNpttM/gney5u/zBpx+N/obHszCTzdfHx1YaU/Pm9naCH4ymQo7RmdROabwuG0bz6L5e50NQhBadNRM08Ba8+3gQO4jrcG/lZoeaibKz4/jwrhdModdVijdYBCzaKCWXAQgPm3LiXmGDrW7htWIb4WMSRXaZIohBxDzOdSzBzp8HwNlTbNwLW5PPg0gLBzJnJhQfDFumzntt8MAIZUpxNGS1vZ1tTbKdk1hiZ3N3OKqaGco0/MLteCi6w+Ll8N2HH+5uLP2//vl/3Wnf/MP/8W/ivDHcYrHbEENcY8BwjBAabwFz+pqszFRaxPrMzwYistyZCRU3tHKxajBwswLqcIBoGdWVl69CovChH330xeFh5/Bg5dmzN9/6+ns//emnO1vtb3/7W9Pxv/mDP/z7r169EDIcXG2Z/PbO2srVdDJotnZvp6Ql49Vhsbo4Fznp/OjRjsNNHLSOspFoCbL8IDfbnBBWHG1xfHF2+PABpKTSrBJzg2E1F1fXq22+MgvD3untMrFs8Xu/8pXPvuieXDA8zHAMUbfj06lrsjhxETh/eJ+r6q04UM6yduQU2qwEJouq+kMMohgUKNleuqtLjhuZBYov41+Aqi4qxSJu/nllQiE7hUvIvo1l/ZOL827v69/41t/89V9/9tkXT548+umzz9Z2Oo/fffTxi8++ePnpw688XnU4waq5vtk8aE7Hyw3nAvMWXL5hGlqYcpJU+t3aZjGv21gWVjfbezAkv4ngoYAKXdi1g6bYMpmObhO2EyYOOWcDY06yQFBbIwYbrXDaa20YPlbaAIQzYhaXRFHCj5KKXr985aERxvooFSzqI9KFh7CXzuKi8fZPTx1llmOo8CKLS29evXn69Onezl7loc9vzutobOw4F0sMRubMljFxHs1sJMQVwto0g1YS5eBYcy1WLFYH/K0vDKA4cljQqw013PpWNtaWmsGVXFWj8opoaAs8YifQRkeNEppkX3MINb3xNAY5m4LRCXgpr2L7yLYTJneZdVOPWLCQJSdJ0hRRu4oKgOUkhr5+fVyMZMsHBwcjYTxsHOOMTrLCq4n7Z0vMFX17g6UEEeM7CCQQU8pYnBPVrHkhPsDBWnmpB35YK6FeJvZuTbsxlOZgjpIKtvKzZAhOKSloK+ilYvAwRvOkD/UuXQd8JQVD1JwgBlQAHSBbkGCB+XmxqReaDIscrHufapNqUZX8uPe2PqnV8UvRPaCFTzGkKSrOdzmaJRUx55aUXoCzSJPVi8RiyXqZt37ezdrZQHd5lV7cd6reKF/97r2qLXEPsO6zmVfP1Y6dl0dLXD00i/KY/5rBJ04hIk9fnA8wn0wpG1sbGKhotruNzVabSPT0nXduH5lIkRW+ePXmtS4oSlxLZ3Q3V1pmGsRHp4tTwq1cXtpw9Zd/9qdn3XPZChiBo+H2V0WkvrrtrJ13zymvz559yg8IqdauHHWD5jMf0zuvPtek9lqLhF+HvVanKN3Raz+Tyk3wSxmBOOEQ7OMIFKlo59FRe/WdNn96RIFa8Gr2yc8/Mh1Xx+FgwbevXLOCFZuB8X1K1iRbYTZuBGMFuvmnANFTvbKek+X6esMZvq0tQAyslQNOgnnKCKcXdy2Ef8tjRYfxwrPorC4YVkyDwXcay7Db29ro4Cden5xsHz5mJohLwvJWgs0OB22M4WzaEfL+avTtD570zp6NBi9//9//7dbe8vFnPx+Nu7pg4uBwVOfzz16ynUGjJCcO9CAQbmMFE8NJQHa6LkcKWnDg0V96Ok21Y3TC4nZ9Y7VfnD//F//dz3HYrCR8Jj/46v53v938jd/8HhuLzZ4PHjy8XfgN278ODnff+8q/d3zyBlmOuWsGroUZFKSxNRQebnGTARFL8/LFyfbulsOqcAEEXt1HshBOiKrfP+06H/70+KJ3JozpowdPt20vX21kdJBNY0WptngpEo6wAkf7u1vXzenw8vjk3LFemAlLLjSKQFa4UremwLCaP0gZ19sfObBiSJUKU7MCSXB5/lnUWS6ZsEyEI/uY1OpD17Lag1xlBxF3dCWfkzMGIzcf/ezj3/qVX33n6Xsf/fQjqw7l++GPvv/gK08ev/Pw9OL1Wnel0VnuXSRAMOS4vLCJ9RuJ+kDosWuD2UMV9HPL1sBNnND4WJLhF9cEYXZuaL8/s6ja7U17qOzC7/fZevg9In/MjTezCTOq8J42iK+9On4VbXa2KIArpcYUu7WDBkV5ruUVujQYjBkfrKUuBCkvKlnIpkskzhMIAKaS4dNPPwUE0ALChgxAF95yKQIhGE3PFdtsr5Oc6VwTsVB8DR3L3gae7hWPGn+gfy2aE5WAcCr2Zt9ssAVHkWWXNchD4bKvTgj1cq6cPnLEZ7GaXXKPvLE5Xb0U1FXZo6nRLhSBaWtrm1uMNnjihq6IC4UEhUEUnCZ0SjsRIb3Q5frTcRHy48kV5Vs9slaupgOHQVLqb8NxG601B3ET6CZDwIntakxumwPerigdwyBvkKIUjKKSUdwEKiUBwaAIuyGz8hXtKs2hsMCi+5rqW/clQ4XXAqw+LOBVX5UCcqn5XXNTkJ0JMK6yAWOIA0KhdzHnvF1wLhAWMqUk+EtOv2SrNEbefJUv4nhTC/dTNskNSHSNkpzP5jon6tB/n4QsyRZEivcsuytQbZPRbC1cZA2UlKZKOuMTMpui3HhS+1Jefnnx0Nv7VF+UhkSM87O+qhK95whGfQhStcoyzkQW7Tw1iLWNp8aVYD3gLuOw3mnTjFAjaQZN2vrSqnPiHWYqnvHTd9/B0of3WWk4EkJUlBzNGHJAQVhW5BoQJzMtv3n9ykZX2hIA1Guszlrt44GQl6vtTQdDnFmx9Hb2owK0zMiqMKljp7R2Wh3h20Wfg8H/5qOf1u7X/upC/Vl7N/9R+ptXSwtjsv+a8CpXJCqBBq5Xl/B6GLiDg13Wnd2Hh3yf9t4cOt/ENqwhx3Yxp6PqkRRg0AoatLiy8pYdrAyJwXJIMuQIu3DgxDtCEbRtG602gRKCMJLcVGw+MqpWRSktsnDuMxuZ0jJbcckDJepxhJ4aEY83b14dv3wl5K6FOsTuNTafffESuTJNnOMXMIKjYQPTfTX59nc+ONpvf/bxX/36r3/t3Xd2zo8/6o5ewcqN1RZnS5I6Fqt7NrA5hqmRiGx++C7DDq/fnH/x7PTVSxRi/N3vfvuzT784O3PSeeSZb3/r6bvvHvQHZz/60d/8w3/4D4dORhosCAv+6NGD/aNDzGxnp9W9OHn3nUdmEPL5xtffZ5kXQEAcY7FzbZYcTY4XbhyFtXs9EwPu5uHRe68uv8Dx9CcjzPPJm/M2Y2ETBgRYlLKLm9sbYO358UvnWOBpLHt7zMRZJ8FnNzt7gzFevBXyNgbM1tbC+mT3aMei+toHj0ezm//+j/+tkY2GKJTNgsusSfwrzJeRJ1yTq3SNNQKAse5kGeKIy4owGfIgENaVex8aZ9NT9ReeKCR5MRbWaVYrfWFUUj45efHyYGsHLvjo40+e7B+0Nzbpt1sHHeENXx+/2Hm8z0ngzcUrckOCtg9HN7Z324XMUJqTKnjjg0lOm8E5aXuUYVbiDXHJRK2vtQY3Q+4nKGiLCbfTHFGQGoorlhURGVvxhLqZOpGp3doE3gf7hKoE9xNJ3bA5pjFhJq6v333/PcVCOKQW3hbA7+jBgcXCVQQ0Bqhofcc8XaVgJ+TAVxYa/gnK0ms6AuPAi0FmrBgNIXmFV0KaDSExTo76gmZcsTAqjCZ1wU6GJfapfJYBZIti52WHZaMjD1gCXBdwBirGmOZsFB42or2LsC8uMIlFaBO8oE1U5NDWkj0weH2cRkdTTZoytRDDHfeToqJioiNOwaYe0uO5djbClGPR9AKhwjat25gYnSFbcA6rNNF8yZBDtBTIqdsWYDbFrXbnViQYB8aN+jihkSMChlc7s7Wd1d0Ev7DBKrbwSETkK2ufC37CsGf3Nxsbu1eBIXUHBguE1RvP71FzWf/lZ70EHSTVP2gNbI8SeZBv7zRF9HPgm+wYsHYXsedKWDYmVwKBHyAT9jaumEBAVeAV9KeQrNViV9MGbav3tVLX2sJyrY0MqUh7ioBYBn3+pNLH2hHlqEKbArpJYUMswzirlZ7UbJkwz1M6A3Ne5NYz661oLWLACbsfiaq0GTtv9CI6SqHDVHzLK3BrzeY+OeNUYmnocg5E8WH51kCsdnhkLNxe9C9y5g1nf+wVvcrk+rT55npyxbj1937jt2jh5QcZpxfnMKXRs+RL26igDdfK0eGBgHIefuNrX//Or34XAHnLHfFvfvwzq9a55o52GIynDoFbBpsI56KTQDfFDmitt48ePj58cATCf/Kzn/lKSo9LMpj1p8HNOJS3USknhRC0IAQKHzr6TP8yHYFIDKw3zY2t3mD408+/eHly1utekEhyeF0Y8dL6DEathamMOmjJWbqdzsajo0PMJlyChDcbVjLngm6EgOwQtRYTaCPwcn1DNMShWS34TV8GvDIixr6s4gIJepAmm8MyKWzBzt5ovv/Vy+GY7xxHCp5ms/EAjZeNrZCblDMXLs/Pd9vLJ6eDb/zH79zcdp+9+Jv/7f/mH49GL4+PP7NYYa5Wq6iUr50138bxTfqTZ5+9ePDwoHAXwrxG1jQ4WCx++H/6Jz88fLDz9//+b/Os+f73f/jm+MV40v/kkwvc13/73/3zBw8O/w//+/8Qi2OmuFy88/TB+eln/QHbmNOwbB6/eXP8zOJ/9fpzQHTpZArM//XSztYT7qLdc/Gu2l978p0Xz0+5CPR7k42dtWefvvz2t7/K8c+BrHaroE+s8ItM/oSnywn10HaH3Lgw6vZeCWExHJ/xcRTFcVH41zF8cj6Y7D98t7n5k6PHHx4+/ODxLt5+JiYTjbWFIw5wRKAgwng6hrqIbJk91n1YqtFu8rA43NmDUJ3nRwwy/NGzxCKZtRn0kL3GFlDCW1hNhJJsqcnMsjZH05NsJcHsW1th+I6ODl9/+qyz1jh4+AAFvl69efTkycfPPxYb8ek33nnVO0Z0OrtbsfVi2fosKxsESpoj2D/Bx6MBUA0tFAaPlDNtZWfsSog01lbEURN2u9wQfMtB7GLIs2LRIcRssmrk7RMQ4oocZr0T+ntW6ekFQUoUEsTFVt1XX7xod9oEu4DQ5RDiAsCq/vZ3vyX+unOtGGzQOaKJf9wy47V4uwAYyFidjfWD/V2kCxaKMhvJX1u96J0T3r/64H3tZavb2z9UOWU4O61+aJU41kbQOuNMBuDRv3hH0OEHKdmKErd74+xoNqtSB0WFCMmUHA896DWdJxnEVt44zyExPrg6ZJNluIhsmJo53BJNEgdye2cDeAC8mxuA0yFntzpblhM61tlpk+Ey0Zx3b6+cdm2iSZP2rkSGu1lw1tmj3cedaUs8GNuZ0dwcJ0zNK6gHVUD2So9Mgi3FGwvOJt1kbFu9SliWCV4aE4SVkgnppCMjWtGyg8O0jyNDEWjSB9rJbEs0OIUFLghKbysYIUluiigSpin4qnIueNgEPqHmAYnZVI/tlZehIewzbUj8lbO7MBHZ4O66aVnAKzFreEyprBAv3zpEFC42DZYQBJ0qsvYhnKB4dC4PpWCgADmn3OgccBFRjEbxjRzIgkTmiMocOR7/Fr8QwLiWLa3YBM4UxK5uBapI71KQpYgBsLuh6KHC15j98OYOixr7looH4w+GHF1UWoKZblgi7tPWeGVy4WVFSxPzlWm0d11Yo2jx55QZANHcywHiVav1RnAqYB12ZkVU5YQ39qG9FxtOtOIos7rw/OWLN//8n4EendYm/hROfRWWgA47MbYb4uwJeHbbojpmW+Y0XdyNUEbln59325uOuF2iPXZ+wbe+9S3za/XilXDLbUc4fnh0aEF2Ng+OjsAzRwkcox0YwMJIOLRJpZQHyrQQQAV5x4xoqnkHURaHPrNdGS3gji09PT8/PethjB5SWaw0h5eL5y9Pmrw7Vnjdbw4vzls6XlaLkhUIJlk4xHk9Ozv/wz/8A34QglHx3eCW5Zxc+3ii5UHIFzhIfpe3SG846Wzu2LaGhxTe7YsXz4M5GuJ10Izl5AIsmMGHgxw0ZS7DYcQT3pZJLP/idDjiUoaf79iZP3K4FI/a9tFu5/hNt8klGEzgWa/iJfE7v/3o6EnruPfxH/77vza+PNXEp48eW7rnt6M3xyeWGV8ypvz+9Pqiz/X2ZrzRgZrts6Xog9C7/S694F5zt92J9PxX3//L1mbz137ra/wyqDm/8e1HWSgwefQwEyqTx4/b+vzZp391cODwJNY6mkZuoqja+mjSswvBLh+9woWIPcSzo3t+QRU5OBs+OVwU03R4PdrZf3R+8VmrsTo8nextt8WOInEYjcHx6dG7D95/8uT1+TnHx939o739B69fvfrzP//z7gWkGbiCz3b3DlabG48ebqF8zkC3qftmMHr07ne+9+HTP/rjH1xPBqIsWN4QILlfaKUw84AptqCFzlZrfbPhsIp1W9KtJ/EXsS+6kS01wvzb2nSVHVr8LCY2/+heApY3gaqRyrLD41UMQz6MOROZtWKYaGwwOXr4wD75j5598vWvfsCx/mLca201v7n13eZee4x/F3VvScDype2NA3L62vbmYDA8eXPaXGvu7R4IaNa/HDS5V3CnRnKXnMp3RbJcvN23zTY+NaKkb27wrCOX4KSZiAaiBJow6/wGCt7mOvnJJ5+Zd5TQZuEWn6jOLj8q291ZErEPAhCN+rNXl2/4SaXNvLoTffxWgJhR1Ak9K8UGOIT77AJXsXCbLUwtlIaRaKfTurGVZDpykN7x6Zu1VvvF8evR5aTVFDb34mbSZ3ZutZe6a7fx9hOQFhESFJaDHWvU5qZWWSzQtPUljAxRbmzL1pglaSDaE3zJ9nS08xSHw6rEmyZbOBYX3pyecBhlu4J92CUFN9EtlPz87MSMcaEYTSHIMRnp2fMeUs1iutJ4uHYa1GSs1qyThk0yYyE47aIxsntH2xqz0QwxhiNY/uIQLI4iZESq3LQbMk5USy3mRC4gth6u7O7vnb254eWx3+wcHe5vzrDQnIGwjAIYNhfXxGHukZSNEUdYRyUDqqBKaNRKlgpDE25XCgWCfQuTev/87qYo36DnrLXAWRFVTEI+kXDo5Yq99FdZYW8lf9GqiFShCzVFURNEX1RwYbDjOJj8WuNaM9XmuQbjhw4UtrxkSJ5QhZykFRFYKrVijXDi3gZ3WRlBWboakqSBhTbN+ztvSPlTe222/KKSTG049pCUfKY0wKjTMGPtbB20wkdr0nywVGENp5pwoXEYxeRouUQ9kgampLu+lXo1TXboXlWGwz8L2ANMmfcIKhCYDq9wjtyCDUA87px8KtaDLxwimG6J+hMNa/xIi8xHHJHMPRIOpnf2dh14Ycr5qdf9y8fHJwQLYaSh6ydPnm5uQ1I3+FM7SksZ1mAUa6WB87lQoF6oUy9EoNMJNlFmQyEDxE2RrCTtsdBBLfrv6ChGcnwX+oS8ic4Lj2PwRAvjPGb0MuAJC4QpsEt1jT9k1KeOwhsM8QeOfKDc5+slKBxny/fe/+Av/uLPHL9Ms3Dz6s3m1s7zFzFoW/nCfiut7JWh+7BMCkNvBM0NbgakZnpohWCn6L4kIlhanHA3s8ZyC+/PVT5uJiHSGVCBPt7/4GFnZ21nAzIzl7ER9s562h9KXwKLrpS45vog1sCwm/g9S42rfm8k6NzKavPDD79q+y9o2tk+JAVORl3hzzZaqqHNt80clx9nVyhU7EY3hiEjuno54Phuh0lCkQA/yzPLuwwVzkf7LTmND29MxcNKeHx6tiYwyYQse2z78M20f3Ey5BzngEQMt9nk1n7aPe5fjto7+0+eHO3uHX7++Rcff/RzCtt3nxzxkuBz3NnaaQos2tpkWxGmln9Gwxl6s5vX3ePtxs0GWeXSIUtC/4HMhkEGl3YawHRAEnOPA0X4TSY1GzkAJTZcWcl4BcwNiYO1yLBSUWDfrUaSGQu788FIaMYloJzN4l5aAWaQGpjej4VEHHLe9I8Ojy6v9j754vOvfePDkT2ww8X3PvzKxs6GjYO9VxPgciPYWKcZbJSxYaor2gtzsyie+ohEJ3KRMRuMe7z81xbWHWVzsLtPOcbiRtDadI4ALa5YCpyauhNCKQSJG3PKgWCEF6fnYGZzY2N/+wBmsR0NxTl+/WrWE8XD7u0Vx1QNu04+29raFYeCbSrWKRuzfFWsOwjMOqbKIAitedwd4Hk5xVmtg+4JYsumRfA6bG+c9NhNhX5ldZvgAG6bC9gz6jvYE/EosShEpmQEXehenCc6umhlUScaUiQ2RxTwZzmenUI6Tl5eWXBUGN0aZEAc3yUbAV2YmBe8qE/CBsMY2sklFzAVzJoNCXi/dRL1Db7Ltt+Z2DdT3kfTqW+t1ij/h5f7+3vC+q23wuhY1iyClNXcJi0pDErpdWQ/TA03n0n/gsWKA8t0MLoaTaACe087zezBH16eL10KlWs3FmUxCxnbMcf9SVZolKDhYrJGYAdqioIMo6eqSd2S15InVaniSXlb3uV1wTIKi2p6nvySp/yIzi9fwBSwff7GWzYSCkgkkUMF90klPgzFSpI5utcgaiu4llarnmeo2BPE18ylVSmZrOtKmirP0wo3yVyVdVHLlGXiEVRbkKP3oZ8lqVY7tMK/4HoLLO0WcBFsURCPuQApUHcga20jZ2RHIypWUgarDNe8Pdy122wkfA/KRpN48cYPStL5Oiy+k1mZNRV0Wu3iaYDkdcjbPCgk5QmRLCJRXOJKMpxAXy9S9bIFg8nBvqR55HEmmV07/Bk/OVXPZjs7uwd7B69evHr04BF7Ka3FNh6p2To5di7t0TuPnzw4OGTzj0yWQ5BI/UkxJ385remqh65GIT0tgzRvDDEr+wKi7dRDvBgfeoTnYG+/d3GmSQCAUwnTAo/HvYP9a4qpzJczaeM6xYBHEncQH7WY8Lx6CtGalNPzUw2w95nqhkEobnujaWuDtUn4DOHgbpTGjdA7OFw3ZdZlimdRnwBEGllgMkJ4kCm2xUULE84F5Q73IHIMA7iRc4zeJscpdlRFTp3z9+ThwaPHe/aAdTY5bsXgBjwStsrOlfU1JoSYEZBltq74gt52h9O9B6NtvvViZzl4eu32wcNDO7PH/JBtJrP1pbVmHFTrO2gFg6NVRiz2eg2bp2LW5s6HsQzPJmwg1RTUGdREB0Tlgvpa8PyUR2QWsTNuFl+/+vzp069BT/3epqh4veGY885ojzPgmmNtbxYmDHKjm7E4e+JcOG8JX3Hy5pjEyo/jax+8v7W/J3icUeqf9dSLGUbbX79+9ckXf3H85qw7W37n67/1zuPd/sRBrwIQYSwsKM4XElF8heoGa1/22BYAp621dZbyz2JhQIq5KCREf2kkePVai7xChCpEdHkQWbSgyIKKrj3RN2KmCGMbfYcNRlMxKTiIHB7sPX3v6Q8vfkDTsPf4ccPpttu7re2N2+Y614g3n/wM3eOARJy1IOiyBBszWgtLzoJw8ouNsVCLFWjC8Y48FWcnp6+QAe3e3tvMqcdNUSLD6KDu8K+DMQSwHVKfXl3tNNqPH7wjp3PaeJNSwAKw6+09YuKbVy9BE9AhquiFXW63s4U++mefYed6j1Ncb9JbymnXNhLprq14yuE3gLYIm3XNJX15ce/h0db2NmnEiQXjwTAHfOYEBgFe15z51CPqsW3SVTpRo+5MW2sIJnAhdNY4sSQcT5klBkPMbuIXbjsInGM5Jm4j3xKaYzotdJzSvoV7NrgJw5gQmvQX19Z7I/GFMp/kYb4dfLwMY1wFGk4Jv+HiTkUE1my6QDfcE6svbELP7gUUcIPLydnUUawn5+cXWiJehj6il8XpD6FZ5WG1SAmbE+GmlhoXJwCMV9t3EoL4cPRclzbJrk6dD6nd9uWPb66H2V9PjXMpzg4uZhOCzz4ZKyscpWokN9J84dzhUz/Lwzx3E/0fiKiY7D5rufF8jhmstVJmzeVab+RyA/kCC2VZkPIZSpnLTRaoagK+GKSSStW1AXP8nm9LOfPK8aZ4/UhcSb71ifIkoCYPfbiHkeugwCTyamz4KkNAikJepdoT81i4dfsLl3I0nCZYdcOhdSWUZAr0cYgJic3Xd6j8vtLaKs99lYcFB/mZZpWGZdWWYXGdN/7uT5pcaF4tP80tm5+89yYDUqrTQklb0k8PyyKByOQAYbB/7AWi0RQjbVnv2ViOB7NbAhtFroryrbhK0gr+yz/6I5o39EAeD/VO4QoR6fauXald4109MTgqRcLzNpMQHOSJkn1LxlJ0tz8gNtjWA8L4Vsj8jW99E4/8g7/+q/Oz04cPEYGjkeMTbUTCQ0LuZfeGcDvf+973qOBtrrQJDNLC7aJ6GvOVr3zVmV1ffPH8Rz/+ybe/+6s4cl/x3Ov1h9ZqtJeSXYqUKTxSGgn0YsA0OBSqzHjIVdiTcvwh6OXcQyThEiLx4WByNqsWQmio5YD1HDVWxocP9jhE0MNEY8XayqPWudj8koWsFVJOeL5LcQKHBcYiQlAHvj45523GZL+1ka3+usN2RIAQLNywU37jrMSTZZAPbBCLawoCAZlgFXhQqCJLHV408eQQKSNuOILUKccGf7FyYULyzJIDjWcjBnjmKPRgbTob4nXFDqaDW1qGH6973eHO5iqNFrbMPDXWWlt7B9tbe6S9f/PH/9JA/fqv/ebB4Z6Gnbx8A9d8/uwFWWoyvW5t7r733td29h5+9d13IJTJzSom/HBvY/jsjcJtrBNm3246jB+xIDxAHFOFsNfZRRtF1Wid4GiiVuA/bS4hzWwjyQAGKcKkJOBIWBlz5oM8NC9WKHJWKEomb3nJSYwaL56PaIGfffHpN4mr3/jqzz/+5EFrzelujCDEN17mBwcP/+bjj+FFyjcTKSY42kOpbhwJ/VEqkn4uxrdrjHabtijstTom3KQkclIcvvS6vzJZWHdARaTwpVfPX379m998752vLn8l3j8i9pKvzdXHH3/87LNXPF1tMPj61z589LuPzk+On718sdhc42zP/iT2omCCWTM5EHP60ZufMa+ya4IF5/PcXNK8RXIX2p8AQbeOxyNhsTBD+a8EKJnckI2oNXBBxHmHbjnuazgWNxmW4iybA9msQjSeeXtru83Ymc2+AUjRrCO3WpGAiXX21fjYtw4PTmBIGP5mYdBE8zAKcWfUHXsK+Aqz9OEj60KAmKMTMVUOTBGn0CnGK5tZRkHTWe3uvba+mhttWCSHoC4Rr6M2RVq63R7GkU4EPYMxCEzQoMxosJMi7NuGcoF+olyUM3LE2907eLB6sLhwMVm+XL4cXi30ZoucNK9W7SUgckVRtJyDzSYcS2A6WlagRq2kdOtZayTNkvysN29f7zL8wqv6ietdCRjH+eeK1T3Jks4fZet2OPRQLDgwQxwyVQpGLoKwfQQbhu29T7UNfuadVP7WRnoYuQo7FhVhGl+zKSEYyv+lXwbOktWEPCwkBxSD9jTVamHnD+bNFgQ89M5Wh4ZaCLSN5jqOPjJmqslUwdeWd+YjtraCtWvj7q7JWLYWehskVEbSfWhLqEtZrqWRXt0/14L6oeYFdEhXd33Mj0L77/qVcEecbH2u8YCPCQGHE9SYTbYrAF1wmE9//vGrV6+tNJnPzi+I2PaWh1Yl2sqyE3eGFwMRXwhV/YvuX//lX1nCCtFCowORudE2Xa4p96Vf6QMRswrNkQBJIkks3HpoaHF4L5+/oBnnN0W6Qk5MxJHdKLs73/7ud0TX1ub/D2P/HWzZlh6GfTede3K4OfXt3P365fcmYDIwAwxBAAQhUKAEkEWVxCqrpCqVJNKiZFfpD7nKLpVKf9kqu2xZLNq0ZYsyDZMiIYI0SEAIk9N780K/zunmeO7JN/v3rd3dMwLFKu/pOe/cfdZee4VvfTncvX/voEe9wEk1pE+dID/6Vzl7Z3dL6NLBQRM/ofrwpcvL+lTRUQzb9u4Otm1x6QKDrUkzl99/8IA3mh2xHRG+YNiYxaR7iU1PkJX+Gwg1+BEgbb00GhqR0Q4Np0bifKjiKY1pKDsUFYxMKL3T405pYqjRGL1yeUbBXomxGFgj4nNAlDn3SEQGyV7Bn7jPXAxD0FQUMYc7+71ysz/O3DExR+NHG6rReH5YnIyDHYvKMBAmGidACLPgTQjfR0jJAYRAjAUbORovyRVAZ+DviJQJCeuUXjiSMaIFyq16fZffYo9igaYN+eAwMTY2Xa5Mt/Z7+fFJ0MxcX9zZkxO/UIrMLtEpBUyhhk//mc9+gSneWq2vbcocgQZL6aDSi2Bvxj8hGCqqcAFbWFy+sHhpcDbWHa4OFeiKP1jbbYWte3SIUYdQRFigMrGDpkTYdI98QqLKVVT34n8cacujGj0ilGhUKOqDPMlWgqiyjbMsOyQ8O4Uhp9MU8o0ceWAM0J0VSrkaNaTMPaX8hx98cHb3+LOf/szy8LKA3kIFcQolVLx/RATVJK+RMSXf2dXGKrJrlQpjUcEwZ/EFFAyHKMqqA1lLaCGvO6Hm+IwzJ/GX+BHBdwVJxAJ+7ClZqpKvMgYjMZKCPvxnf0xGAWA8nmrqmFx7Da+xv9NenJ/9ha/9EgXgR3c+kQ+Fx2BYcMeGuelv7UoNuK26W24oP1Uje4q2KqJYg45dHUzV86BoiptFvbG7+XTzwYPO1jb/Cz6Z7YMO79JzoU7VqliIIfNgguS/3u1JqEss5B6MgXGyXIUiLvaI4tzxIVZh18R3AP5ioTw6uu8LlGVyoJVlgxIGk0S6MholxEj0TET4DKOlqWaf0wkotAIuhnEb4HDpwBe6CD866eAzNvBUUHPBAUeGOTY7OSC7XpPB0pfnpAR2DGE2dpW+tVtnIC0W6B2Ij5wVSYrWhOPEzORivnB2vt0eOmgP85nvRl7b4UESHxg4Kcz0Z2TAt5CXcwNgh3QVMPeCShlRDOqfu7Kbmqbf46+XD2ZtjTU4WrDrRL54PBBuuoCge+Yfau10ZW00Tegw/so6jEVJeu94x/MryKFufCYq8KdfrZWfYnvS+vrTF+fEF33GIkb53GAeQsiyJPo3fq8JMYv8FCQFujcDOIRyg4idL4xXBOqf1MVGJc+a6NOVxpD69O3FS33RscsXdMNLA2Ompfbp8mDWIPs0JBcI8NLw1nyxqtmvWfuEyBJjk2ib9un9Z0dYwlChxBicokadNTZojB0NjYEQ2v7AcQGm4Anw7R9HccTLly8zYuGAdMIpzul65cY1WFpII/W4+ERKQnNzP+3i88F6RcwlXV7hen4nyS7Zn/CdWVhcW4k5SbJKLDWtPbe9/d3dpaWFpcV5BvPN9fWDRw8b05j6Q/yXBobk4LHEf/zJbcaD+YXiwkKJ+ALlk3qw5fR+m9t7Auy/+KWvSD+hgiFF/MFui7yIC7LIZMfw8KC25R+fMjWrB2fPgxVJg8wODVSPGgEgEF9t1MeG69IJaHl2wGF6zBgQ23q56HwU87mbV6ffeedapRIVdvq9tuzeSqIM2n1gIKHZcZ81hQEubJTB7eEZ8lT+XNCGN/cGJ2Ot6mS/1igiccqxQu79ATrNVCOAH0CGRh76BmkWzelz0eMkAAhLWn4k+FBnwp3gqEPfi45Fcjc4zvtkHVGIUFwozXCjWhgaKQgjkkBjoKrwGIF18tByEv7aO2xa+UK/ejJan67NzS3NzC5SQ+7t74uwpjwAQgTSQqQqiMB2r2OWiwLDh+1Ob/XDj+7w8794+frCpVeGKkMUOAnNncLXpcrkiNrG45F2iMhE9LT98JzjRYskS2RjrtFVPoxWORwJHTjKGNIUcYwUCZ55YBE0VdZW3YMUAeeGs0CAGE0IvGgowxy1bRU2ovt4dU1ex1Ilv7q5Nvvs4YWLy3JVHB51PEvVKZCSfDk1Mbt993a72wZ+cjyy6NRDtXtC/6muhJwTbHtgM8/pib+lNLCF8Xphwgs9zqAaNjclqqwCmbV7KAb/4b1Hf/wH36TEEB1kWD/3s7/wta99TVaat998HYvxR3/0+3/zv/w//e4/+Ec/++UvfvnLX6SPW1ie5zmFXOkDZIjkbXU6+pdobUcFsH3RXR22HHfI9jK+tDaauMV33njjdPni6soTJ52yu3t09t7d+1zlwrGcKk8irEKdOpSoDlgMVnJkIbeHgBYOGDmbmKzg1fhzUNFS51Lx4TatsCPsIIh4YYtFpkMeEteQUpVaWgfToWu1ezyRHFzHWSfQRmAbfrfJaOREY9m1DPe0UBVJXBuZOEItMTbGl5iHBydlmcU8G+QeAPDHOgxjlTA1lwHoysONWhVjR52L+FgB6gElXgwD5rMypSlDk6qDyZHb6zk1IFtA8rKLo2oNxF1SF8g85fAGSCC8hgV2XXrJPn1xWVyXL3Ez/ep7QkrO2/OGvmRtUls7a+ODqHjIo3rOiAQKFD2mPl9+QTSCCmVXegFg1g/30qyNdXL57vjqyhX4McNBz9/3kizGgDTw+fyR1Fir9FwSlZNgpzeELwlV+gxbdzwCjEKACr1fW9EjaUHZQs5OmyP7hmT1DUkzvWmf4Whf/Jl680tcL0bEHzQf4w+M/Xz6fvUdWjS+eCo9mLXXjynG82lxdK5lIvlhxohu0go/X77oNwoTZFDFP8fYwqe2EtYyaq74/1gugitTRFc2KJkyqQcpizSeqIffx8jy84XipEs5oM9rV6+iWPxuHz58EOO2Vqh52vSX87Lw2US055nmM9tcrDNGghIt/Ak6KuV1Q5A5Df8o78I0PFmhF3xCs4fJnF2c10Disnfffddkb398hzmNWvvwpOVEYfRQCzXo1zc21jdWoSsbIen1frP1gx/9iNk7srPxtNN5u3318rXwAE7DsCDwuaGS1WSpwNs9F9YTLxTDhr0yDsYOBpSe0Nwkp2JRj8UGuBo+m5mqTFQnLi/VPvupq6/fnB0+awmPEX7P/CplkSxGJ4MhCE+dMGZtvOHIOW0bfQXtTRjtFCKUA2Kntdk/V+JoUs0sx00VM34mvDzSYjr/sb8cvO0cXG3HuLpin+LMhM6MBS6KprOQcsFzyxIFhxGzCzcVyiJPke8ITKHv0u3IMEdpj4hvFfTWqM8wi+z1d3N5rmjBq3b7JyVOaHkZEY/WtzY29/dK1YKMkaE5UD4DGR4MaIRv3npdLcGHj562ZLUIT9qTrUerK2tbkw836kuvlqcuPH2yevv2/VxlYnR8M1I9CF1OU2Azp31ixpisVCYnpidm6qs7T1VA5P5OEa/2ComW6z1TX9hI0LTTw/HTwfjZ4bjyKcQvCCwdh4RuIGWBR+HSgaxLdN48wE095n7An5pH3KOVJxOzE5eWJweDvbEcR0rTH5FKUt68095Za7utJwleuX7sFceWeZFgOMdzdYJj/pRASQYUdEu6ojwjkCMiQn8G3UNZy4+Hcusbuw8fP3vy7GmEQ0HNueIF0Y5XL0t49dlPfW5+ZlFNeSW0Hq483lrbXZhbfnj3wbe++X1pKZaVQ6tQdxVgCae11qjys79264oEnDON2ZtXbpkLB77dnf2trW12xbvvf39vrVs8ye0utbnUXZq57CBLYSWEXpTYvrSBLY47lL199FweGwnJbQeBxIIndpOUiLIch1pYjq1ReVYIvbF+sEycRGTcVehLXEM1yywBNwAWYyOPBmo75bfVU4mEMA1GjVyyeW2OuuHlj8bU6N7GZFLvxYNANF1ebZB+PemHtxEnGOOxoRAL8YcemGU0CGvKAyBOy5e4zjm5FA645vcOUVBnmQGOflsm5dPeye5Ivo+33O+VsJq97mjncGyk6HzQixPN0eRDtivO7YR3NlFq2/PjIFfZlU5UvMLM40WJxrgZC5EwV/zmS/zx4kaiF1kDn56L/0eroFguOCJrmv2YgDKQviuwcvQYGDlrHEIQdTh6nS5tsi9wp04sWnruOcHQoT9D1RA0LlponN3MHtQuRpBIhTeZI9qUmtrurOMgv/6FfTfRLb82m3s+Kd+1FxsHP4qI891B01X2Cl/sU7w9XVlfvmbvBQ/umEt2vWxmPBl5Nko/xcjS2DTQ/0+vTFLSRjZ+PwU3bjDp1b5H4/R2CFqHMePwkOfzxjVOpDqv4mTIigQ51MqSDNIAh8LQd/SPjzn0NDs1PTnVyEqrYbU4XKAlZCw2DPPKLrPJRvjTc4w7aVb+637WwCHh4sBt0EhAPnsADw/dkrO0cTgAd0QLna1jwaMWUSSwOEJlEUhr5VcjNy9HSwizdBI4OwPk9s6tb2Vjs9npy+/wo/c/WFi60BMQ8nTFPNG2G9duOpUERwG/koAADz1wV4OJ7G/Yh4LAhooDuIbiGdrA7eAVhZXsHzxbXZHMw1VMLhsUbXIFTtZLl5ZnXr3O/SS/t7bGR0GuNWhRApyh8Vp/+HBMHtVm9xA1Nv1hZTjOewM2i9GBLEWnpWY7Urp1z7f6J+cTnPPkqqDUsj02LXE8dOXB8BHKQjdjDcOcEFcMFfknT+HJSPgUswQQerFgLxiNRABL68ZabhP5XFIWhEWCRISK8bQcHe112Coqc1Mz7Za8diPVykSvt0tFTGDYb7YfP1lBPegJxiuVS9euzy3OeYQ6sTE1Q26T2X1rb19gwLUbr1y4RAG7v/JsY3isTXvk/H78ySeL1xTGXXlwb3+ssN+TSPA4nDNUFxCugu4SfshpYRRhVKxXChNsVkScfFXMjsRvUpYztREaRiQu5Wt9NHx8MHLc9j0KxIYOJAhzKJy4Op5JLITNDrcUySzlJb5wYSZKKnf3SNV0XywgvVYzbF1lXjZE8Iji2V7dlBOhOlaBmrvtpqTOJ62T4oULk6W64jUzFT5prWdPNra3N1po51GE55N9x4sVphH1NMYpf0cr7d6xkPwbl2+urD+dX1zY3Tl4+viJGlELC8vk/n3OgXSgg6NPPv6ouXdw48qNO8K8b3+0ub7ZqJQ3nskIcxx1vNXoabbWnqwa//TMrEnJIFypCo2tLc8vL80u437evvHa8HGfOWtEVcXOXrBnmJre4fe+973JpQtKfuyvbPVQa65PtGHDI6u7q/ARR3FcE2V+eM/C3bSWhz0G4GqlotCVHOwScMCk4QqUF251Juy9OE6ZRN/JdyO5Y8RSSV9zSg1zMlI8ZCMizw+dTqezhlVS8a7LydLhpJGlcz6SKgjsYyfCfB5QnBAyXJEOfhS44jmJTKdiImX+gY4zPEOlCrC1B9GhEiAM0h+gmjnhNIf9Jhs3+B2u5autLmesQb511BhgO7qjJ0c8CMNEKvbEYaAoOh+hylX/yj9KBZnBk+YqSSFQAFRlZLAnKsqkG+coDS1MS88lFegvoemERv3ovsvgjDVQHGoTvm3Okm4CV7hoqSB8D0ZkGnklF8EZcIeHtfFECiSkOIlQUPgwWxo/+e58GoovGbnyLquWNfA9dIminYAvZJQub4enNHAzodbQ3oaXbKINaYmbxiMTiV/N0qefDN7T3ujTixjFITLKUq/wRZvsV517xMa44xUx0+TEEa9LvbkTuo80cv3oOTBi6oGrtPtxM3nYQ9N+9SeUmsYcwwAr+qFhTz3H4kRmDmubSBeJPxp4+dgImYkWLJAXrXduHIwE81iusu7GGCTF6RB1OhpYpHxxiqbcFASkV5S8KBUjXJyeUDxsYwLLI+OCX+PVKfGuFZMkRIOQeHo9i6BP3JwRWlOfWsZG+D8IESjD/0jyHrUFk37P4YwxC8MPLcEwtfVJN0AQY0W5wTyDA7VrHJy2d3ebLUIMqBuRafMzX/gsqKFBDOadp698dBOTkWT6oHXx8iWHv9OKhFU2ywkBbKigrUSMzRSLajENWO4nRAnFEg+A1ljDKJ/N14rxXUVz6QZ2WiaOHqTFEXhwyitCJP3+7sZCY3amrsh6Y+hkH+NfLpVVPKbW3Ns52Nvca+33HZpes8UROF+sH7T6e82T2cWrzc12Y2qOBeDZgw0eDecj5aerHd1LHXMUHquHp8Vznn1YGOIiukVWMztsRkqOwL6MYYpgwZHxsjUXoIZ5EaZS4BfPg+pEtN8Q6o5FiiN2Er48JiRVP3H2lN/5YKTI2FKeMBPzKpcrS0vLO9ufeHu31601hDiQOE+a/d7NV1+bv7Rcm50Qpw++mSYkV3Aien0OYgPyk6hhCcsLRQGbxQlBWNWGTIK9zfba5tNKLfdzP3tF/VDWCn7ZkGDK6cC1guVJNkqaKv8cov1GcYJpMBzWRvvHvY3xSm15bqZea3gRaOl2OrWx18QqWAKcDfIzPTcJTlbXN2wfxoMTJFwZXNjZYG6msb21wy3cwNZW1m26kINnj+5Tei8uLBfLk0TMwUh/tlY/POhUR3lCD67MLk5OVURbX7+2HLZIceR765trT370w+9uboqID8vv9PTEp99588cf35FEuFyZGsvT+udmZpZ58DMObW6tPH50n+54fnb+s5/5mYvLFx4/enRhSTa/6UcPHrBmcZ8VWr44v0Bd+8EHH9y4svyrf+6X//7f//vLEsbHoT6lbt0/aK49XZ+ancEvcoUXPED0dHjwJ9evXsfB1Ipjexsrc/OXG+XcxtOHl6Zr0CQzWGmyyLLaVnJB/PnI2M5Bi6EuEheN5boyHA+f0osyV2J3Aq1RubcFKWJ66GdllLavQ5vrWxBptVThVsPqibooEk7hycrK0YHxShwAxtdZUF7GcZC8Q5nO6KrPEzIwPzAT98IJCyJEFZ0yxwpKo9u0U+GkUa0jh+gZtY3Nau6ru92AQMzPiQfJIS9GWVrY77i5t0+TAXsIJA2hdu9AlyK98CpkvQLF0AlA7amZIsXV9sFuVcqYdHAlGoOjuyd8VKS+PKIIFW4cePZ/8nJygBeU5NcMMaUBhUQf4kkgruD6A2OlK3Mu8DU1C/zu+xkd+wspRE/ppyAJaEd0rEHgv4wmYoEtWqhVsitDi9n36CoRP29Ob3veyCjCnJWM69Ey+RkFZmTXKUbhOFZES4xcedyBQujg2ngqvRQ9NMqX+DcNHtoIVtfhifaJekGRBmxBXBqnn+Ij+k+z82b3Y/TBPGcIPRtmMCYvB/z8ViZOJS0Wfzoj9Lh+AEoiPynlR6L27uswkZ8gov7UAxWWs8wNDvXCBdGX8RRA/hEY4gjGGxk4AKDiaqs1Tggpo83IQaeNnzd++F0nRgv787/InAOhdOSTq5tQYoaKlwPO3vjTf2Z3rM+LKzgSsl1aRKsUEpXLdLzIypPifTd4Rj3g6zunBWfNU1AwlRSpLryqhoe+/OUvI7fmy8WN+ZANwGbKUsXpi3kImiGqExdYyh1Xw3ZtrW9g35BwKgvGWwuIe0C2jSbklTQMJxOIGyPtLMZNsHClXDuf5VnmBHRlhCJk8AkeOe01N1pv/9oXP/XWDeeoKOjxbLLL/2NtQwXh5k6r3ZRDSsIA9ivTHFpbX0EqGtPLdx+tKX11be4mq1D/RDRvvnhawdAedCNnT7FGsyLGhU3FKkAaWKBQbmPGGa4iTAgPk4FvOkrWx8qAKexsOF8EXwSkNIl52DOsaSgEksuf6ZpwvlJD4Ti1Hx52hocKXFuV8BjNF8eGFPikLovqkF7Cdji3uNCYnqTHyVOK2RG8sH2U+np4tHd0QpEgD5hcYJ32oMJR+/jszp17raOznf7xr/z6X/yt3/yXuUaKe8tzgShw8u4R4LHg+DG8qR2ipbTBhpvtOPrlYIb9MXgHQmKUhHbht9m1MLXQWVnakpGh7d1NKJVmFvDHWDULZwtJOmTsHSfl+Nt8YOeAZwko4ODjIVW+yqWJQ47jncN3Xu3uNYVlt2DJekNgQ+/4sHN6NpDLqtfdPdhf29tfn11ovPm23GRXpqYngEGnN1jdWld1feHC9XJt7mRIKGtxY2tfmOLszMTZluj+MXGA1Yqs5qApsH3ksITOw6B4iuWin8BFc6X5+te/Xq+Wfutf+Vc5FQla59akWOUnH6+avi0pFNUrGW/ubD++98AcBZV88ME9trhqKV8v5qZrhdr4cLVAuV5ZUM35wjKGa2bp2eOtnQdrm7udHrsXPY0BW+W8kKuSoCXcrPj6w8Ou4h18HwPpB2oQMSk+PcIW+0WqiXy5ddg5O2QKkg1CgJcohVCYu0JUSBGcxNqoOLLbc5ScRFpQsBfbkgxdyVs9smqhXlz+AhqhvrHh+anIJ+DQ+VQRuD5R81IyleoFCNthB/fDbsdFkyuJVFYlmIdFy8YxffG4qDWGRlmXz4db/bbogbHh/PD42YDEfnKI4Riv5aTQQFr1iS+DhcVtjBTGhI+FWZz+3A//k1dCMUGNHKHMH8JkgpagM5BTSD7BLGgQXxIGTD9Ge3czLI31jwbukH7g8egqGgc2icQTUHk6n9lDznE0ej6caJUap/7i+5+6sm5iVMYVA3l+JdoRxMPfnoU0M8/AID8hToX+MJGrGIs+9cBli353dJwHJSwSSA+AykZBPxwwmtSWWe/ZGDyiW+OPmaXLHZeXIiE+4/lEhNz0xbNZM1/87SYi5NK5nQNDqLqbfvVsduk8vqSWsebRW5aAEtsenp04W6Udu4zsm1ueNFFImuS3dyCFChDBHdfoWmZnpz3oRUR9NvYoI8h9tswXbHxqZq7RqDFfq/2j8DFDMbz4cpwvh/3TE//pX7PvBpNUWs81lm5aE4NnP4rPhKRsrO2PqA83SPjImOj+lE4+cqsPjYjBXLpwYXd/jzpRTXPct6nBEIuSPZTrdI1I0e7uHpgnHNKHXFxeTuq+wNxWURRX8poJIPD2ZLsKGPPPLQTBPafef8ydtUyGAnvAN4WTlBxKtWL47/1b/85v3bo629vf2uj0eE5UC8OhHl3b2F7bErp7SHFOHjk87+wdV8qs3EO7Eu+dbu+0Ty7c+NTU0uW7W3dOcrXeWb5/XgIdSj7bkEoJ5SX+Uzsh16TeSIwJTAAJvJtYrySMo0tp9wEUMIDEnQxUTFxS+FqbiBJZqjYIYNVR1CJg0yrAYGgEmYxakgqwe3oQRq/wVKRgKvHzoKjzCgofsM3DZSTPPbolMLe5vWWFLQXxHJ9LGh7Pl69eqSFRve5xt3e0t3uwvrWKzBnw9euLr7964eq1Jd0kkzi1+NGErFiyjTut6eBRaQYewV84KcxmVEug06T4WRN07Y147BS6q3JJtVCJZmYV+UuHLs3P2kLtTZCzgw0MISA3vrS4KOtBfngIcp9eWBSFLgiIIq6SK/XakSYXuZV0XcmK+anq9ESp3SoGiadnPOB1w7N/WHBxiGEXfkHh5KrcYyOSuTTpIw0Hdfvcz32RbylFL+fZ3ebh6Hj95pGiTX3p9YhjihzIn1jIV5F0PgKDnjx+VBdYlzWHiFNkvjC6sKQQzYTlvXRVCexDkby00Zub69/51jdv376N6EYI4rSgx1lzevrk2YNHj7sSPEkwWW1wPnrnjdf2O6Pd/e3J8tiP73x8bXlRXBz96uz8zNIrN2eerDaPTj93fra9v+l4osah9VVxVhrIsDSM5scq8AaME6wLjwkcRFuOEjlmZQqTl/B8NPKeM9IOcKyTjcnwV0hhJ4SuEIb5nBwr5sJGIPyjjyuiqEddHC5iGMTDm0niXIQNDDh9zm4o6E4iSaCd0tLbMZrQGx4SZolMygG30CJlTx8bSl3ppEeSJiwWxe6A6hhHMX7KdBn+hKAySBhdW2/7tHXM7zRfywv7ihMiLMbY6RAP+aLygKFPjqQkVCb/gkt3Bp39mKHXwKecXglk+svQb0JGAXkwgzG9uOLPdNMXT734/gIXpmbWznEIZYm1RyMcUCjtGGIN8cullZsu3zM0FJ8JHbnjfryFiTwUcmG1DvpBvxD3A+9bzRh/cn1MtCPe6j5+AYZNYlh6PLywgkhQpOAXnLToAEUhhMoZQ+eOg4rRhV+D3YoegtAE3+u7K/r9qct84wqSFJdfsmYxmPQNBdJP1oM+3dWMRtSnNnHBVAl5Zc/6dM8dFRcVpuOtvra+bv3NAoGheKVstu2kFNIVOR1VqNTq0zMzYJcqDzsswYG+W3w3GJOcZYxL1JWUeSyqvso64TyqTAOg+QeqOoGVz97o8+XMDODleIISpInHMELxGwusW2sbEAJmbW5SgQZxjk2KFfbNXxIByObpJJt7EJ5Ll9iHcdYffPARq5L7cJt3UVfaTBz71s6eSOepSmSk1QNej4HZYUDgfXeQkDI8r+9WVfZeKo94JTwNeSbOIGQTwh1f/7zqBh3JMoTyMHdNN+qYqbEzVcSH/51/99+cm6IUPNh8+nTs7OCos0N/KjxHDjqolKNS+GQzsOXGlhZnxfHUJyqHQ+2N3WZxcqk0Nflkc6uLCy9UCcvHIxVMTu9ESo+x9uHIRFn1er7B8AdfRgbjGA5fQX56BpnYGmuT1tBqJq210cJCzOl2HjTGukXOHglplFpwE6sEJCESkJITm8VpQabDZMpisQz1ABIkyfCIoCnkYWL8wpWFhYtz+VpRKp/awuzBg4diA1jTMOrHh5tOioTxc9MLjFVbm/tIz907Dx8+XllautjZ23r95s3pipoUB0OCg0eLPB3tNYY3lEuGLSgxljlBQZTyjehhniDjyBZT0iixWKIl5CwKP5ufHaXsNs1IJQxjjIx0d5uEDVQXVJbCMSAy8VD+KPJskNwTdHn/k4/4hZLSJupTvQOHdBRfTyVIPhSMYPVATq1eBaHSOS4ulZVMiRTReHQSH+8bRqdY2RPaJGyaZK6zly9ysStGUfXa9OLoRR2qxjLEonHCxw4eoBcfSHDLl+2c86GYDYVCTiL5RSWnZCobkYzhlI2B6MfOSd/CQqozUwtDx5cu/7kf/ui7j5/cX1nZkz0T9TITXgqEVwned7a6pzlJRtR5malMSE8uwWF+9IRrwv733n+/JiZ9PBWeqdc/uPeIr/+f+eVfevedN/b2ac136eIkLIyA3IO9kGz44ASPexqJnk9pLAJH1auN5sbuML/384Ek8+qatLabYEzyb1gbqnWmqA3FHRZrxcMjkzrkCeQ0gTGHEcYI1CSnRbmG8WXN4mYc9Ec8dX1ScKdTti8DWUi6ddtxerIbBmPy2/GpODC8Ji5KJknbaGVkMqNX4KCRoFqI8VFhNHJxcZdCK60nxWHQVnCQH+uGuQoaHCkOF5yKMIQx2HDUCbdOKhoKiShT9C8kVwknR3yNZXDATANYGIdDGwgWEMFS6XTFF4QkbiTEEIctsJT2OklnL86oJ8ArjOV7yAfuBRnKcHhQIb0FG5yueGl6dfan7774DDT54oo7IY1lHHUQqiBYYQTQkCdYUBQDMPJAC4FeodLQ5tGDGUwIet7opgETcRgEI3zklBdtIrKBQ9zlIaplyC4xr8CkL69YkOB0no8q+2JrvdQ6pZFE5+4bqp0zBl+c7Og6GYqQWaugDZzoTnZpZi7ZW4JAG3n6A75+4603uckJS0Ra5C5hvZJgiS6FmZfrkeFPUOOgE+cjU7NzoiKONtdFAlZr5YnGpCS2loLOJBlpS0R8Rn34Ax9M5059LL2TmaRJxefLK5vCi9GlySap1J0YV1rXkFdphWIlY6lNM/tFmxi/z6joGv4qIHBmbh4BAtw3bt4yU4kNN7bWWQWWl5f8+XR1BT3jliX5wsqTxzzFhyX8HVYk4gDj5alLdCaJp9CxF2Uk3x3fs3fFnnpTjM8I/ScunJ9jhLbRowiTgUQZvo4HnfOz5m/963/py59/M8+/af/ZeeP0/LC0+vBgZ+WJ5NO876SCOFV/8UwsBNww3tmX/G2rMtWYu7A41OoXpxbuPXm0P8gNRmuczGTZGUjURX2VkxprmFfG0Xkqi0AxJng0vHNjjf2LGFtD9ncAInE+YMzOGzFVIS1x5BWFCoLg26NYOIooVNwqxmLHo/JtRqVgOkAkTP6D/qDDHlStCaSr0Yl1els5aQoqlfkLS4V6fqjAwfG8t79PcUdVE7S43d3b2d/d3pPbjvBUqU2RrqQ9EvQjq8vBbltp6evLl2q1yaHuefvp5onya/1gC3h2HYt8JQtwCYoDHYdXJLRxSXcZqkyhPFGxM0gSKdJsZZ+D7BCY2JXYKSkGSYF51UxO6snynfymlbR8+OgR2Pv4k4+BrmWZmp1avLCEo5ctqZA7ayxMNmW6bwJsiVIHYH9hYW6WP97SrPg00QqyHWWJQnlBk0V7x+f7otf7PSpSmjQxUjt7ETUvqx6v+qER3E8+UheOSx9erAS2wxaE8MgphEicL1Qp7NvdASHPuX7z8CZCxcOu02lyb5GCHZ5T1mN9d617qApaW89f/soXtrbX33gtivjwehXkrsy37Ea4Nt6IQyUVfo8m6vlOa+e4n1ucm6KNzZXzfdJ8c1tf3c21fGOCHPfeJ/ekL7p+5TIuHGVTTBnWQK62d/YYnCSpYtsMg9ap1JciwYscR9p7B4Xz0WlJ1RrTaLbYNAH1RqnQAwAUuTFoy+KpLqWTHwmwHBn5ISE+MpYDi7u1QyyI1PK2cviAkyblXPhDmddUedJzVOhJqDqvlBvOqU3TkkSPXB3xmpVZMfLQEL8o+1W3pgrHQNkWmp9zQV4lpsJcUZaX7nkfISGnyY3MoYJ/8/jxcGmswFszmDH5e3sDMgdngd7Z6UGvq7Iohfq/kFy9OPmBEQIcEw4yQZAUjGtgqUBXPvwUaCtDDQmnR4NEKjKUl/2oXdbITV8AvecD7RDjAtGlJxMez97lM2GlrPOM3ugjKFDWIL0avUkjibE8v9KIsgTDTnr4knihl4ZkFc4cMIQVNoMwJnjY6QFzzgPEBxf45JAJaZgzDBccYBI04Q+Xt8ff6U42ZP2nlXg+wmxBEqJ8OR73ohcz1Tgj0KZGviEVwURUEBzB9Jx1GAgtW6v0lJtZ/3qgZeZ4DXSsHvba4+rh8jWQQ69Wn5LmuViotFSX5QxULNCnQTxTlQaxrFStTNYbggup1h89vK8k48zUJFFGHnQiEd7NpHW4d9CUYDUjldk2ZW+PCaQrRmYGz9VYz3cfwg0XzVjj2EcNtTHa0HCGBJ8eSY8Hl6LGREq/pCXo9x1GkxHxK1+5IlGmYSiqS2KyGKbw+ptvT01Nw6rIufPZbjUVJLl0+TJR97DX45idSH/YhzGb1tObdGskCSZoxrMrwMa0HEVWIfydfIm2nNEPQuPodGFeloDwJ6kvTZzxSWjyVar0ts95YoXvOHvJ2ag4MOEm/eMBGxUfRb00BwM1D2lc7j99PHXhVdRQmIhFPDofo1StFWvUMcrEIARRBpWaDloKeA/bFWYsVicWyulyOpOQnQYLGxp8iIvHA6ckduOEbSAgGVWz1PhZrA8SEFJMOIMhwHCKhe1Ect0+hhWqqBdLNYR3YrJQqNiDUeWtpeUXAyFspladmKzVN3tozTllYL0ivPx8r9k/aG7BM/s7nEVn8blSK//lv/xb15dv7nzy9OPvfbjzeKO/02ltHXCqJ9Z3RS5EfougRrbcXKRRsMn0zJEwxn1+FICbDpgVM5ff3t1nenTKgCvAq9Zr5WIo/TqD94PBGhpavLBAH//BRz9+8PjBzVvXGxO1a5cu33jlOrd70N3qtt9//0ffvnf72aNVIc8XL168cOlivT6LJNQbNUIJy5f4L3slNQSx5vCk74y2eke7reHd/TBrLS/NAza5dR1tu9AWkB6lkI/5e2Nge51WKI5yMjYBb5SUw3tJGUscK+VkPVei3KQQs1zSYJnl9Gn5/HSWw4qIYJwSC7mcTKGBGRu6du3S3/gbf20QfuHQRdAAA6DofnD/UaPxyf2nTykyS6NH60/vO/iDzqXy2MhkhXt/375LNi0CWvyRIO2P7z949vBxa3vLe4k7jakpTobMUguzF4ksly+IKhbXtbNV31ATB+EEHBSeAiguTi3IrkKUqlbKs/Xpdqc/WarilwXqEvIpQ1W9Cc6GNxGTUX+QoT7jpKpgxGJvwwf4k7CkqA2WY5csnoqh+B4ciBAp1Ycj5lj6Lqd43FNoGJUgocrB16GibgL78JLd9Q0oV2pkunSR5DJWcP8E1cLFwPtJMYorzy6W5qcWuHcmXj5SLA4dRiWwdv9Ims7zCvIW8anw9r+QXOkxCENCPT4SFogPN1+SKxzrc4QQaALqoOVIZCM+0peEvBKmig+NA7QTaQmUF19T0ySKOcehTUpvyrrNfvVGixXPRg/RSeDF7E7YpQMfesidGFo0i6eh8mif3CUwsjHs9C7NDEo3/kyXg6YdNUvoXzQJyU8PoXNH0OU+id68xae3xB8R3fhc+vFo9uqXnwbrXUnoer4C6ZHoMGvszXrTQ9Ynv8eYQkbP0hx9zy73dYsyae0OGLKFfWLRUXh+E9QJ4jN+hpawgrhvzveH+KcOeylN8X6rhRmnPZdpwQRSFoDIyoy7hLhZrfSD9ba4O/t7geDxx3b8hfeNYXvpTz4zEpVGlt3MBukpAhM0igMAT/HPXiSHFM2yZXm5g6gX0uLyrJ/gqdNO21wucFien6MTQY/5DT5+8oiKHJ5668131tY2JcGZnKg7/Hfv3PaUpUNfcYDZMNxx6VA/2aoGMwIUjT1BQlK4ZduUWA1AYTlCMeJwj77+6nU58USDdA+2OXXnzjr8jPk/NZR0Du6MORBT6HjLeoizPF++dKtWn9042Htyb+tAcN5+i8apOFGRTGewuiuHreTxgaZHSwCufzy02+yflkInUYkollDJ+G5hAuSNGSNo3CHBE6HYIUKvkhIIux2rH5Qs1C6ofhRvE+WGF8baQBkcWP2PMfo4soJKb4oRIKzzSG6jHSjW3ML04nLldKyFBBIO9Q32+II6OhALxo24yZDIp1vGcNn0d7YPENUpdS7Enh6dXr184/qlGx98+4M/+f0/+uT792cL5fJZ4aR1VC4qQYIonZHIkFahUrSdhjF6zgFd/uGVUAFGNcDwgGRFYxvLF4qfevsiQ4idCs+gdO3tdLoqvfcHm9u7jFtbl1pO9trW2tzchV/9s7/27s98WrrUbqf57e9946PbH7T7B3CPgPNf+dWv8fUQfUFxjTgQR73IKeF9EBpW2WGPpfloJQXK2er65je/c/fBw7VLyxfeeP2VajF3+dISutjc3RDBSrOKeRpmTQ1rIiGAxux0ty0Gtjx2Ilc7J+pDqaXyBWkZykAaR5AOPrW3lbTPp5iQ5UtLsBa70dVXrgmWCq+fPNx6ykK1tram2vd4YR6aZa1sfa69tr4q7hCxr9anN7aaDx884bWwu9Pc7LfyIycXFmfq5dL62entD36cqzb41bCX1srTaMf21sazZyucTkqSsjSmrCn8xgf91RuvvfHq648fs4s9cHy623s8EYf56A4OrAXRpVARKc5VSbVKCD2V5BWYTaTqnqokQpuUHRkIgYJRchuGKMwu2uNwWV4aHTyE9QGu9EbUg/AnsYmCNLFTgS9Dy5cvUnjaVfvrKQDtuz75H7P/uw2RsfzQdFIADofKgOEy0EVfMtERJUYUn6n6iZ2Lb3gEclNi0xmenCvtrBoKR2InJM7Ac6Tzz/3Hi43MlQ6WzFryW+ovECvU5rSbDxTp/xn7bDL6MgINyCsJW6AxgWfTuYuW/On9lfCI0fPqd/MnBM+SxEtDb+QJLKnhJXwd1mUBmmolhVCXen6OSQNN0rvpMSOo4TUVLw08T2cStjGcJYiOUWUj1X82Bay3u/FHQsq2zZdY7mQGi5vJTwBqiGBOijPRJFIVJ8oHXcRjqb15ZV88G7oP03InPgIRuRnYCDdhEmk99etO5lthU8lKTvV5+E9ARcFm+tWzsHN0kfgDzCm2nDpWCPD69hZv9XK1jmOhSySGs00RvfsMo6GB8ayE7YxHHgZYpYya4232JVwfGoLovRRPSmRxEeepmymvfSdfYYvAZrw3TSr7ks3C8F/efM5zJEhwEwKOHQCC6QrrfrocazPGgXKqEioaB8OWQljG6mzUGwoECoS0irV67aOPPgI8JCqaBwl5eS2L49nZ3Hbf4o/ffIV5nROJB3F2DI0RwpPSGBqetcKnon2GH85jwTVlex4jDeLJy5uxVrIqJaSwvJHyQ4p9ozm5eWPh8sXpbne7WhnrHxxLY9E7aCJUjXqdhp1Rhv8gQlOtMBLzzkaWJn/43t1H62sCMur1ma2N3Vduvjq5fOnZtkQXPMrIPcmDxvtHuKQXmx3ZKDBP5mBT8jnspb8p55P5VGOrF4uXgMeT1ioSCaNMPHoZyyJLE0cG9aocCwxpLDGSFufW1kpvLLWaTOniMct5HK2kB4eHe1zThznZLF6WClaGccHv1Wm8cpTwwLvfu/uIArBGZVSf3BOjdKwgk7iZQQMnrmhWufH40erS4vJv/Nqv3/vw/u/+d/+9LAzs443q7NhxbryuUE19Z4+CrX2o3DogQ2vlYxURHKTjfG7uRqVRBVe0bd5ANHEOIITN7S1CIdDd3NgHbHCI7fMFspNkwTSP2kPi7d64OV2qjnWanf/+7/3djb0VhHosP/Lq61fnk2x05dplahMBH2ZPudrvNyGMjDF1h+I/rQoNULAr29t73/nWe3/n7/4P1JMzsxf29np/8MPvvnrr6m/+xq/h3zF/AZ5DIjS4htsaKqSofDRRLfO/IAr4FcCGg5DNEadFoeXYH2MXeYj2IgclH5/IrlflnCpG25+Sm3Dfy0vaWq7D1EuLM84nJ64owQkjLM1evDBVLkH6cqQQSsc6n38XtEsIsbW52trdVnmB2gP/afu5eSzNz8hj7yYduDwdldDARCL8zdVnSpOi/TNzsxQtUpepZvDVn/0azun3/8HvlMcrY5xiVDdWkrVak8BeFhOvKIxPC9eSETSSLFFVB7pTCNipeZ4h2uwcKwz6xNQ0XQVHdi4ehgdEUS+YRCJtkRWEY+wTSzl0RErm/UqZr6AdJUToyBDAyOKJ5IPqCPmamZoN/fDxOfuZnP/qlXN4g5VP6lY/jNbjkZKDnCb1dVRqplsW8YDV8zTgjtIjJA29SVlsyaSCZDlJdrNzwRMGbYfCaXNYISInX0DMAIVYhzl2dgkOhktPKhgKJkJmaWBkiwycNTp66eJFIBioGgqxuxiXM2fYikd0NSdQqETiD4BiP4C5UmW836LcUShAQVuwjTA55ObYw+iIsuOZkCY2NxGxZD3DW+J00IYgVVGOgBrWpMNd0qgsGYbaqYYXrB9TDd813B82DC2D7KTiDNSJljA+4XcTyfSQm+ldEsO46ZRFYZmov8NwRY6ke47KzE4E2sU5k28MG1VyFaGLyNnLQNOBPU/kFZIOIcwquuE+bxzOqq1xPqBdIO1Zab7gWRy7MArGT9KPxfQIDGsW3uBTh2Rza+ALTPjNb3y7c9hHq9iZA1mdDd29e3+6eXDp4hXd0nWEqzdWkHqwVptfmAKJlB6eFMKIFAG7ZjHfyRVpADDpb731hpQE8ke4T76Rzo0GoEhTZ+mH+cFXQZ4xxEoExY0ldYFJhNd4XInyW3AhnGhTxI2FBZieDduNO4sFCI9L5U7UFrNZGjm9t2/fwUSNjW6Q6tgSPve5zxm2EjjeaFnWVzcUDZHo4nxwNlqyR2F39V45Rq0PAGNzoWywdHz8qpUa3ZRzAozpzKUutaexoTK+JYcRbwQLiC9Fh5e39joco464S5XUPujcvDbxc199Z2X9dr2cP3Q6YPzhOkPQSadblBX16KzWaPQlVOIpMaym7ZO95t7m9idPVnZHy41SaXq/Tyk4322elutY+kFp6AwqHRocRJxBOIBEQgwu1Scd/gDH4ZlbV/mlaIRYMbHUkc+2QHgPLaWTRUcYmkf6qZjRaBRbUi44yirKWcdfNSxZkGAEmJ0UyEuRafb0+OCsRRVzMuju7m0gkMcnVrw+P9fo9ZrHJ92dvRF64ObeirDo07Pe9MKMZHYffHjbgK5cGZ+oFprd49XNJuGcbNHa74rPax1E9NXbb7zd2m9994+/e7x+Mju65HCJ4zw8H+0Pj+3u9frHI91RiLFCVhMcXKaMloZyZpZkDL9kLAVg2NlTM3dLVJcTijLRAYAoYFMYFvl3ShVen2ko6cE8Kecy5CvzSbmW29h9cu/OR5//ubff/dzlydlqvpIXu0BeQ5sV0xnu9ePkxy6fc8gPmUEcnMzoBSHGVvK414eCpDA+67VGf+e/++OFC7cWL92YWbq1121//8f3br32OmrEC6AqqZCIWq4rYT0ecyKIDtX6FE3BoN0uFetQrYoujjoETOXiYOMYI3I8d3LQ3RZPJcB59Ez+6BVcEOsNaEQPgKjpB6KL5GSBjeOiTggPtdHpmZJDwxFkEjILbRCAnwhx+WRpf2/PkSH1CBzcaR6oj7PTpELcRa17hxAs/bC06JYNWhwTAsB3bvXxfQTG4ZWy7+ZN5cBe+5d+5TcGj1eefnJ3bW19el6uVHURj7c2198efwftsfLUTxwzOJ4QYyQQAmzQOQzp3Hk7/Lu1s62qqmMFjfNOIn877ywNCquSc3iizc0uyQsKMQisZAifmy42apWuXLb7TSKRkuQ01kFjGIlPKTjbpUapIIaPj6x4T3o+KgbBDbh6UEykDd4R1aLXhm7HChM1ZrbQviT7rLVynlgYrWcxLLi8M9IVqCcx1L5k35NOK3x/oUo0FrjvHxxAKIPtUGsSMlISbCg7MuVAlFMRcFpHBCB9neokJC04KpRvcaEh7ocmMZCvpPfxosB8YTImXEcsKihMNAObBkWmUF9UIrhRWk2dhrQSTyW/OHTE98hFllhOz5K4vTa0LSnGCCIg/QbtyYSdkLrQrcDFLycbXaQrHQC3qUcM3EtDKPQPSQjfDG+Ektm6XwhAHkrdpDEkechbPQGiTdZPydsx6J+pRL9JesveFX+H7jQ4EZydv7LLr0Gr8NqeQTuDUsSYsRUax7M4AHuiT5MK9vKo0+pystjb3idjwRgIvBfV+vUjxnAG+6NjgeO0yN2DJtIBOES9xEIiHhTzRU4353ucsZtt5RHwAyiKIVFP2ot4W3qp78ZhijGAn7rcNF6UyySNNNbWNiVKFj+FEGGNws8TZxqhsFl/wzIGdR/ef2SzWYxp4Q3YeiXxbuzS8sWN8fzT9ZCuBGZKfbu7tVudbKQBRHCVMH4+clSFvLQMD+xZjUjFZgzQGScXuxAieuTV4r0WkZNslMl25bAhIHyIB62Nk+P9xWuLFy9Ocg9UGBZLt7W1K3WEZHMwCg/bze0VGeT2ZItVVX6g4uD509XtTiTsG8PQwbBbu53V3cO9QxHI/fbReGV6GgsWdDIqa4S207jOojqUHZMnSCQrO/NwkbJIsUVQSlQKUTggxB6jYYGMVfsjigWIjnRbYluiQC5wsneUS5GInRuoRL/0XnQpwP3kdHKqppQ4HDG7MG3WKiTXGouV6vQPfnT30dN7N4qvTsyq6CIh5ODRk3UYSGql5j5VMcT3tFw6gAHZsBRJRRXHy/WmvMGD0+uvvF5tzL33/p1vfffe9FBFhCYJjSMSr67J2bnZ+Qtztdrl6zeovdRL5MJTm5xk/5EdDyTuPNtgH3325Gmg72RSsyGhdx0aIloB6vrkhO/ATFqTV99849or1xTIblQbDx8//uDD9zkHFKrnr7x+YXZuYVgBFnFanf5JJ6EzyiSgcnwISK1JHHI4L8V7OJhnXQzhkEo1yg0SLlW0f/qU7bYwcjR+5cZbl67c+Cf/6O8XK9Nf+tlfzM9eGuE7UK4e9mlN2akx0EPStVen8zTpdgdrgObbgrAXZoCvmNxhPzw4zpXfatpPOQuRYOp5u3B2XuxLTii0LHywD+V2ViLxJDwoA+I944pTDGroc8vVUJNieNPg6WxSFoRhGNSZpHgAu5dPzt48vAUY2D5bvYPdJklhe2XlKR8T7DInfrgI2VdB/mBbwrLdR0ePH3xy7/q1h69fu/Gp5cv5YoUHPbYVaEECjhh6zscuoh4QW6xTxPvx1Ocra4wmQt0ghw7ZnzkK3gro3d3dx3OViY0x1gidohiB3yy1JeIoCCtiuEEmLIQIkM5HR7ClGDDel0BYAWvcPdVCaLFRbtMPDRLBK4IbWABlFAuPI0jJ8dKClJxWoIuopQTQ8S7LLkcbHIx9x7AFuTI4E3O9/OJ7dt87AkRS3gdcA94B6NoDJElzoC3LotlCWlYfdsxFuTDJncN+o5kzmD6fizv6cXmLK34LbBd/Sj4AKqJlDCEu9+OvNKRkyg34DthEsYJmxa84nviUAMedIIuDwPTcelKJ9fCigip57YM4qEvtuPC6Cmweirj0iphzEI64/+LNQsPTOkB2/htECzMQ3SK18S98dr3eGOJP97M+HB6QaeRU88gQFJTmkSaSMDg66Y4BG629ccV0sBixh5pZ8Bc2uZRPJMoYxr5EG0M0QmrIyMTGFkyMw2xaHPYIaeQZ3Y4OdzbbkqpSuXgFHQWWzt6QV3kWuLJFIwTqE2PlUkPo23/yDX0r0RuJsI8k49lLrwHIsbxGqLG3g0WP+55daa1erNaLHUyLEDDj0syfsUjpevkUvoSui7WBPzZ1fHAkxBHmd/4mh/3d7a3lpUWz3NreELaothAUTiLEIU3UGzgF5gHrMD21GIojSctrkftDlDAGmIOZN9oP7/JaCxMsTtggE1VHTFV37e7NTEzmhgr37+5VyvjcJVq03/rNvzDONyIyZUT0/sHudntnC1tYG8sdNrutnX3hLPtNbwkSJFaSV/TZ0Hh1qpwrTrZ2EC+UHG5TpSZ4nKTew6Uhl4njCFSHAwj+FL7gqdHNDVXtAZMvYspTgmEshW0ElGWyIP4x5Fhakcj9TxGQkgVKXnN6WI2yelTyCCZEQGlDr4Ab2GlxmW9euLh087VX6VdHpV3PVY5aR5euLD9dWQ+722GPa2iuuLi+9aSvOsOJ+q5To0Okr0H3QBGjAsVXDIPFjPAkmcLw+OF5/slWa23/sDA9dTRcteY3l5bm5hemZxaEEhXY7aUqLVfpXaim//Dj99TbXdtc6xJtofC2Irq2Jfy+yIE+gQRgFRmbw2OXyxPz86/cunHj5k3GJ/Tm9r1PfvzRRwd7zWerq/tNaXlDebbZ2Z69Ons61JYYBIpCnZDX8NmI4qgoOpVNnMeAushaLw0jRaCMXybCzh9FmPgcPXi6W5tYnL/8ypXLt8ZzpU/uPJIqZHe3t/f7f6yQFlcP1gucmZQxfM1DBKLPiNHuoPnoCx+CQr7G507OFqdA9GuJj+cQ4Z5jIRxgji0KMPkQz052ASpgzzAk+BzOkxuCbbVJcKGfngMH4O/vB38CNkDGcFDLhAzGSEi+BBxE2vjxUeUZ5XQYsIBMDY9dA8CiKZWMabUPV1d2tjabW5u7Y+dHG09sqMSvQ/1258mDj5Hr2SjaOD4xNweNMW/Lt8vLc7e5mxcI397rdA6IMlQz9G8qkltD44QPSYdcVylmsbiIGFwja6JhO1BOWVEd0KBjuI6Tne0IjpydnjVapFoDPzmkkrrFNFPudsaI7NQ7sESiQqhVwxIWOffdAmdDYoSOUUdqNZvnQYJWuMQL2IjCMxpFOTmYiuAJhwsEYAeBtZL27AUJsftmkL0p+9RRNEqpVMO2D9/IjsXmGkk7ulTtfqJ3ZPwgW2WI2NZmiDsWIyhZiBquwJiBR55/13/2rlAQJegIkSKgJNBc1kybQJQhXT0fWKbgezm2QHjHoQDNGoSzhOzXkUiUlIP/8DOVpPmG7Q9n8NPkMOvEg7Crl3pReleGiEP+MZwYEQktyWvxmailgWvpEVfWycvPRECfK9DM3dDTTzF+Pfoer3uOVeOm1dMsNLMkRq6IQRWicz/JGJe1dw9+i08ci/+EOwkI1FKtWemmc5zXL11cTkduTKKHo0HXCOy6zdIPpHgi/DJtRJgvhOklGZEf8/pTCiIhFGWG6ynpjlot6ATl894YfAwmrX827Bf75aZLxy+/WIdEirwwKjtnv4IN9/WTYMZ/g7tIs49MJw4xq6MVrQoDpg0fG/vi5z/P4ME6zX99+uYrcTxGRuZmpr/8pS/A7BABDEFrw6nP0TFOS2dfgzZA20lA9Troz/uSJY2lOZY//D9iZblND5/wlugdDJ1KK9UvFsf/2r/3b128MDNyrMge9cVIsdGQ0F7yqNbGFm3MUTOKgJOC6hPCP8faSL94IHXtFP4bGesdHhNRek6UrQhmbAzoJYqVkcxgQUBVaCJpgA+TLt6Dp0dNmd3PR+jMIuMDizx7cMzEQgfHGQvv0Fo2iyd/KA3XWLFz2uUtud3aTWyN/rEpwfd4jurQak7OK0015axv722fnu1I0392zhe6tLkz9uTZo+299YnpyuSUYsIze7tNNjBVk0ZP8xYo3urlwV/y7yjJhlijLpxeHB+vDxWm3vjMtauvfbk2PYNSQBztTudDou4dWYmbvMAePn5Ky6L+CmQM2FxAIo7uwIjCa9VleBDZnMpaU1M3b16/deuWdHyekZmTy8/3/viPP5SCr3OQOaYGfgfk7JkKAPZPvvH9D89Ub+G9wZs3+RmGSj8YfxgPq+pgB4ABNqTKxb5IxcfvHG4U2CH6fGNj62iodO366xOTCw/ufnzv/srFpel/8k//ZGPl8fLy3P7OJo4Nuws+q3DXWCjq5WIQ1cTG0ZcohvgxXkQFOWGRABQWmJ6Z4Mn0bOUBE6ms8Oenh/SgzDlSO1+5ckWGaGJl+O6q5Tg9bUiojq5cMgdCm3gsmSWcbAuFUAVDByWL+o1AurG93Q2fsSOEFDVE8kUkmYNPY2KGWhENahTGZmszUPflucVms7O2vvUrX//Zg86vU5Lfvffoxx/efvQwMjs+dpaxFFGzo4iUEp6tRn2qpoQ1iYCvPw5MLiaZTfIhOak1GslFsSykLumHmeZCKUE7D1zPJPzkti55f06TcqVotZO5ceAhFDrSiSUMZmp+8pmEBBtzYgDQll89eGL2di0KmUCmYRIIC4624d5uxijWKe7cRiBPkQckvM9iPNZQU9NHcT766LYG/yPpyvZnl05cYM8DBgFZaAr+sEhzVy8aMQgT0paTzT+uEs7U2Uv/bHMYOhGtDO3am9ieJLMnnBusimk4jt7lyGQNonFgaWy5rxluz0YRDdwxmPghiSY+46Zf6I9PY2DBdlHEBVqIzjUI6YMQBQOEcIM2vMQjegpyGPq0IIQQbOAV6CWaPX9njMs7wvqlxwCh8M2FbQNLp89A0aFo1HnwAsHGpxnFpIIrCcLJ4KE/wZh65toZk4u7DLfBO/uMtwUlilX31acFjx6SUjD96QmDtGb+H3pPRIydlNEfunMCaAWYglXnmlYg/PjwYbkU8pZEc9yKkkshmTKUTL575PgEnnX2jFDGSFWoJ6t1QAJcKNlki9sLeh/jz64Y3ovL/Rdff/JfM3L5Cd71Ck+FYitd7r9slz3rdrBnltFKUS0lFYRoKoXYbSIR6sfvvW/vmNOAXHN3b7tUfvzsaYTOcEzGXHU6vBn5DPkVRuC5C41ksAQ+qNJIH1hvqg5ILxiARG4tqq1DBthhxXl32zt8ueWK+Z/91X9zcY75+IwuG15VaYE2g2v1xPVrnUZ9d3VjZGpInkDptzuIve0cGZ2YmR/KV7vn+d3OSQSr0K2U6KaoqYfxw1RRljzYaYJ8/AMWoesAGDgJKl/ZbUS5tDv7FqvA6iylblAsW65tALTlYc+VB4jsAvKsnl4CI5THcqdjzb2mjWNF0dZLYsnB2DA3+s7nv3hr8fJFnlq7dEbb+0PD+bnZi/m8eva5ZysbA0hoaLC3v1mu0d+XIBVFQoSWMgjQDRId7ASjl+LpJ6PFwdn4ASPe0cl2q3lyzJjVe7jxh7stWYT2ZFagnQzQTpMqjRbSEgdosnVb/gCAY/xEXTV3lgGIe3Z6RqKvq1cu+bTdz/b2bm+scDR48uTJex/8GH5Phq6IWy8Pl7EsIPwwlDSj0HH/40fDwgTCBGkJI71HcItqQJV4D9ldl8VO51Gj4EnVyygNC5MKz7chDknc1JcvXq/UZ7n2/ejHdzY2mpcuXdndFx41/ODRDj5pay8EIGogHgBvv/Gp6elZyufFs2NHgfO33lm1GLqZlviLb+50pueXN3c2/+APf3TQ3Nzf34FlBQ7Dksb1+uubAo/u3FmTcpahEbliGQHmhipbBEMyrwR3pG4XeEKq40lhM+yxZSF+xYk4o+XMZnWuOAfLGTjnQ9hp78OwKAMKJ0cfZcQRHxeevbIXjR2NlIY/+9aNX/zal2DclVUBmd3f/70/nJ6p2VI0phNWlsOd1pZkYrligajD2e3osHfaVvM6UrxSicll0TvvEXEwf4Ua9z56LDTcCQ0SAllAFbzWoaC0xta/vL6+9mj/kTM4Nx0FL6V9CityMnWDAMMeUiTu7CzyHvNsEkSsQuuxOTHg5fAigSycamiQ5lOwmNIqeHX/kzHs+Oig3axPMGPXQ12qOE7APG/PSOjC+yHwacI58ZFAILCV74miOOrxBSDiiZAETMREtXT//j3r/Morr8Is0IgFz+x4Hvc9OqKsSqok0ADXJvQVEAbMAvFjClNEvB8d0xhAiA5JhPAfzFk8EneyC+rxRTNPUbe6AnlrE3SDyg/DEoypcSfyExJRLBaRy5cQ3dKJClY+yFdg68gHk0G8XuPSm45itulyEtzT33MKAp7CTSNQizEn4hK4+3nrF+vmJ1ur/+eI28/YxcCUGLcz4XDRPj3lz6yNLaSE8D3YbGkJYLyMdAkNSTpYLS2pPiyc737EfHI/IsT7jubErKnU1JPuSV3dN2NwLTW6x3lz6d/jVi8jIvBbIpMhgNISqDeFQ7IoQTyyugMqD+TDiSbGnhSnaY9iXWKZ0nJkX3ymm88FzRhb2iA8jJX3XhvtDkj0nQxuikHUAxyCL7CJ2BYQiM/+zre+zRZl3fwKr8nc4fRiVLGNaID6wjqnPXMMaA65Ah9hZ+XELObZmXVrpl5k9TSLU28kMTJLn+1R/OI7jg19r9XGJxtFKS2++Ll3Br3N5u6aCuMwttClTnP/uNcmlUtNA06wp5cuX79w4Wah3Dg6z+0c9Jr9wU7raKt5ZKw7vUFbnpjh0W5oVgesPzR7AWwhIcfl/VbE36T8ELzcDvW92Cz1pYRT9qbGK86WkTsWVjqIOJkBRmFsCMdOioFz8zwbO40EtpOl8ZG8s2aLQQi2LHBKrPn5uHpU05O1qQnTjCC8xtbW5sExH8XhrrQJ/d6E4mFE7Pc/fE9qV/F3so8On7eiCDC2Z0jYH4ZP8XEZT8r12alHjzbvf+u2nE1SecD4eObjHOHTeRkt1ieqM0Wnl/YtZksQYhqM+IqeCDYINZzLp2evXLmBDMLLOGJ7TJ/50aOHP/zkNmz+VIHp0XP1Nczx/sbq5tZmIeJJC9j2U2k54lDwpBjg7ujDulk9vsRBhicbXSWXpTN6LTwKhACI0LBgix1z11GHF2VpcELvLXSvQ4idmZlfunxzfunK1s7+97//Aboljy0P/2J5Zn93E9mTzEnkobzot+9u3ng9f+PW59bWVt1HSIQgg9vQR4U8NHz37t0f/OD7l66+ygfzbLi8s3eyve2wnq+stRzN6ZlCtdo7O6ufnm6trZ2USqebWxtvvjnNzG1gqm3s7DTPh3bjIJ8f1ytKLJ9F9VeBkDQb5SLndVrSmakJjeE3n46IXPuqZmHUeKf7dTBObXAkObfsSf02oyW0XBhIRcn4XVZosZbP165dmR++lv+lf/lXtlfXHj54zJHk6bNVmlVOnAuL07LDgH+GL//AEp87FWbk9EVKHNhaJc+39qApa/8RD1iRMPK/QEomEClqHFXRMlHEhqklbBZ2kNByRBZT4qfaUCjcSJ5fyfU21CHAX1GC3DhdttLXgOaYUIEc9nGG4igkIGY+smuKXtRFgMCFmm/u7EYFgl5IeGwuXNaAnC2IhDukq3RgMtQTixtnPuFhIpCfoLnALIla+LSFQsbanQMk0nnmVrC9vekJiGN+Zt6stNGJB32CLTsUX+JIJlKSkF4gk/AbRGN8jVs+gsP0zRszUhSi0/OTn25Ey6x/baLP6DVIVvanFyUFQYCvn0C+/FjBxieCFQ/HFYKUMTtrXAhBefBtYXBCD7xdjwYS//wZX5CvIIKM2sKH462YdyPF23rMTF1pVtEUsqB4jB/T5ac0ssD72QUZxVADKwVZxUzhMUwtJ2DF+XNL8GBBDo0g+dR4jo3B4y1cesBvGWP4TvZgIsgiKC4OXeO0PkE2tIQjECSozKsAJK+8MfSLGBwmTd7ewaw46/zFZTyDStjJiY0eDNIcHVKrxvLqzaet91SaprsxoewynhdfDTwWzKdnjZm9lJbAdzJfzDTFXzMaAutwmHS6AvWQOCNmz7hsiQx+rHP1yQZ+KIl6KlBGtOnm1jpbtzehfmAd3FNMsOFHvYazY2wdFTSLs0F6kUGGbpAlMOAoLT7KEYMGZcGWsy7VSqWhQe7ihbnJ2kKlKIUOvg2fF3Xg5GEqlwp7raYaksetLugQF6JMLc3oSK6jqG6LIjBfinq64+XiZG1CeaOSRIMjR1z/zscmp2Y2draDFyMBp39BrSMffPCRoM2+sw8qdlsq10YiNZ8iyJB50gSGwSLZLcFw2B0joSIoAUHBnFno4JZoTQlnGJXcgLMrgfgUu5wvV0u//Kl3VAGLmfDxGQpnXfvf2eeTxR10SppbAUIyqHf2j3a3njTqU+KLiIv4SetKocguE5sh1fpgaH+3+3R9d2Wnk4xxhfN8WTSx0ks6NwqH7Uix5RQ1xRIBzEwVvuXyfvXSRQo/28cnaWNnl0wQQIj4HEZCBMZRyIt8zK99cnpiTXlNwmkxN3/jcgqoOGDjonW1HNIZwJ5hh3Z6q5OmgzcPTe/QKAwttb5lVBg5Vjg7XsHGaRHAKPJqbLzC3yI0jXstfmuvvnFxcnZpemb+T/7km2pZvXLztXJtSkZ5e9o7GUPPaItOhyvmttfc+Wd/8N3hkckbr9wcHI31D88HTHZc5igqlN4sSeM08t0fffSLf+4v2HHxC6O5yeFRGt44nbaSB0qrVSI/1eqiyrZkGjD1UukKtGsREvqkTY1FkblT6tjW/rYanw59tTiGNPLZRbAX5+YwminZrJQvDa69Q+cF6dM++PCHV64uFmcn+VZDDcc8H/menJxL8Y7rQzwgDvnfR0abhWKjXJvYXf+kXC1/6rPXmDkvXZn56s9/jli8vrn79NlKWXYwOYX39/CUrK2ONrqj5jKskxfwK881b/GzI4ZvGeBbB6LUzZ5ClJM5pUYwvpwVugDlLHwFYY+jgbQavVqpgWKhXkPkt9DNHtlxfJX5+jMOJkLHTSiQF0+m0GSINrN1VMpQwmR9kkk97OUQ7shIvV4NQIp3ceiDQ+QRLjtN9PixmHqEa1yx5T91+c1N5weiyfCXAamzd/XiBRHlEB84Z2nA7SpHBGNqmXWV9eG7y/fnuMN3qCu7+5Of0jfMBl70xauzJv7y9vR4dPJyeN6SjUqzTLpCQTMKF+QxKFwQPMg2xhNOCEEWfY+u0v8tRAwqzfqn3+VBawAhxAkxnJDyEFknJPh0krjV1L9BvSCjQWytb7oZ94OKDId3QPY6t+yrV7BA2AAvd7mRBhDlqey3RQ5+OeQnbjMnmA7cnMeBO2CBYtMjTnIQE3QF1qb3xD7QD8BcGtgUYCFco3IcYRNAkEDtpeAM0eDRbqc4DRqnlvLhIsE5mvBy6cq1qzC+MvPcsW5/9DGdGEQp4WwxHJViXkZlCtlcfKIGBpOt2MvPmGP8izW3EUGoYq5BgE1EM1/chJrNdMDJxyWrXTKFeha+C+82xgMnKZejJ8dDvf76677zcnT+v/P979EBmot5mTsKmM/PVoT+12polc7tQIwhnQp/RvariO0zpLQvwYXDfiE2kyNxrpVyfmt19zd/4zda+zI4SA9b3eluJe8sBfByag4Wc+OrD5/trAnL7E/EGaQUGWsfDR0O5WqzC73z8cN8fazcoOcYPW6GXCVXwKncs+qAxDmybobhCkAK2BedS+vGoCwD21E9P8xFmMWi39luSkjKc5eTYBie6TLtOibmtFyRWAEYsHiPVeps1qOKIbW6fYqg0mhR/Fef9yBSMXwudypT0GihLBdJWfm+0aHt7R31tlgB5DhWqGR8tDR8lqfSnF+8uDB96cHjJwf70vSKguHWW0USRKicDecwbydnxbWtre/98AGeeHJ6Ep5Ryx3REmYrN5mZRQRjZIHCB50Wx4SE5hYuzoMfojAXTnhW2RfWLIUzmGLCKz8QK1gMBgXPRLcjY6FMFlzP+CKNlfKTVeWKjiVHUmsyNO3YnTzh6WzQD8t/KV88H84jew5uoOSwBYQyXJAiizx1SWhcw3Zlux3QAAAJnSlfeu3W4yfrjET2YnH5IlrFuhZO/OdnM/PzPEOJ2aLp6B+4s8sZLisSQ93VVxZv3/6k1f7df/ev3ZyoBcMXPrUqsAzkCBkeHxo96MrDj0rKcJQr1SbHi9vHZzuUU0aNF2l15N4s9A7Hjk/LZ0MVFFGq2OOzwoDrBwE8xjI8OGT1Yb86nZ6aO20Sr7cAxsGoWO+TsdGeNIqrU3StMnxSTqi5KjqgFusmYVHlbH5pKuHCEEigodOzPomdAwQbBdUwv64eB8VhYmX7fHu9VC+1H7VV72ajKamNUijwFK1XKzevXaWU5mIomfXaxtbjp09W1tYGESl4JoWmnOvwpZeXJ+qkxtbBLlGDUsDaSufrgDhqXknVR9nHB4iEDSE47gAe2pFxRpwWOKfdNae+SlUSdrEES56ZKhBBaI5i5AzHukGuYliF7iR6xpEQ7u0KGFZI7XxkenomeN3kOg5VRwS8Wlz93n5zF2p8Tq68FUdsQDbe4jpvQdiC1wsclOlq/AlHyO129dpFtUezR+AmZaEBZfCyz+nT83Mbw0pckN6Aru/QLUdc2BYug5ICfydMxwoQv6InoWSIK/7+yRUdGp4byIYrw5uwAUzlu4EZuS+ZJkbjuEeOMlMrHkmtY6UJD5Ri3kskNST9SJftWTysz8Bx0XPIT74hENkXDElozMgu3T7eUq/Il+noAdpViYBQj6l3eNwMlBqL56cgokinPh10ZMzwrBLMK9ulkhXoTSgN3QkPb5GzkeJHmBT7TzYjfdJ2xFN2IW0JXjM2SD0l5aDGB0N8PHmOnUe9hKdrq8PDGzpv9Tp8GEj0WDY8uX/qcdg1HVCd81/m/icmhCDy448/ksmmMaVE30ylUecWctDt0NNqCQNka2/wVia9HLFMS++WC2lOl2/+a6FxxoAPay8VqRW3Mlbe7CAiIletrIbNMAc2kD3eiDV3xYGUumZ7W9gQWCcXcmg2SLoYkIArlzLOU3FI5LEO0DhGgOEyb0HJQIIUbWAOGSIxUwt7IZZDgB1WkdYkmBUYRaE1SgYcOh/SbpuPx7Url69fvqDWE70577yJRm1Y4j2XJCCiQXORTW5cns2zbVid9arVPxwtqRElQnbm2997/7wyPVrsKTbRmJznN9FToyE3/ujpM8hXUQsbThHVuDAJn0Cvie+xpGRWUxCUA4odKbUjhqQxknP6nLx7rOAWKIN3wn9AQWaTIpqF6j8/LlFCu99xX9lHqEPEf1cd2PrU/MXLc/OLxZnp5upjBHFrrzXoHDjWmAJlYXXV3m8ddZQ4niiO11s7g1phdnDwDJaWQ7QxO/XJnfvTi5f4vnF5X3+2trm39v7HTzp9nOwI3p34RRXjfzzk5krTFspGODdAWz2tKzida9eghb2DPfan23fvBK4hPeOgVaetEvUgctg4FI35Ujijbjd36dg6hx0+lAExjLCsfkHmub2Q4uPgwXT4Kbqqqck5uxwBjnQlYdGSLDe0XkAAJqVh42BGh8bgFF79h8LXeGwzyOdlOVK4Gprunx4qVw3AZJOS4u73fu+fiOsCkyJ7hPCRLQUL7OwejEXyuvGDTl9c3fzSJeXY/19/9+/95b/0m3gHhFHEEuUq+438SVMz047M1s5mvdGwS6xVNJMn4acthlLaMvW61i9c/BnBUpgSP8JvO7v7b7zx2je++SdSGloBNX1U5+HKMTl7+XS41D8qSFRBECgVsTKC5fsWRwDTyuMDyXILldrTtQ6IkppmavK81dlb/M1fZ1g9Hpwyt4pucES5JvJ+lBMkyusImCjQuYVHUmu16ZR02ypSPiE412sS/YUKV+orruJXFxduXb3mVOyJ197e2u8cfPfj79x5cA8NlL6Pco//Yb5YnZmfFddx6FyPjUhaa+WhSjhO8PrOli1z3MM2waHGqd9pRQ5G9khHWLqsYE+Zco/kMGSGiGYwIc/jiCBUBWGMPBi6bjpZSGZnfXt1dWVpfsmuIVegqLm7C8ZYDpKXPX59pMr/i3G9XLGMSVWa+NPAQv9j9tmbAs/TICZ2nqsAoZWa0rkxSuI2RCN+EPpwMjM/A524AKRLf76bqm5fojrfsysjMM9/CGVY0AdUwku9DtJ250Xb+DXQVghMqWV6RZCrwFIuoK0Le+hdOGnYNgQsNxN/4Onnl4dT+5ipDhEYn9mVhhoDdmWtnRkkmBxMVMMAByoM7jkT3yx60CVnAAUMU2nqEHrXRXpF9B+H0pImCpRcj86oY5E+z8Jbdtdv6YkzptdiX7LnSDGZKC8kG5xEbHnSVXq1IXgQuZJbC0K3HKFalgUnmPzD+fnFyEc+LolnOEDPzs/rSqa4Rqn481//BTNVco0a9Cs/97Mcjt/70Y+u3byhwcbWJv2b2MCpmUnow3Ri18KO9pN1fv79+dr46/llaoZjmr4ANJIIZ9bQn6RCjkkhSWaImoHd1oG16vX5HrYU4/EWW0k7ZACm5Dvi5DuikZ+dRXDc4olsiVAvJy1Ghdok0YnyxOs0TvgLx5spq6FWfAhjTGwET6uAFB4JgRjpPyD/YFD8YvmuXFlWToJv3Nlhkz1AziI+TSLZseHCHske+ZH8/Pyl0ZE6jUXrMH9GsSR86nTkaEhmmMmtjhrqim5VRL0KJlMzifu4I7CzvyPnN/BI0BhknpI5af/DOwZYHJLEzsYZjs0PZwr7BycTeP0cBwlas0Mgu7o8smDPsRPx2u1Kry3APBDreLG8s99Tqb3bOv7RP/mjz37hy5N7klDQCc5IZhHBpJKjhLAqdQUxo0O/KcFAf5j4fkJTNNm4sL67OT3d2D3olidmZ5YuPlndW3sqbqf/8NnWhUtXVdQlu5CIyB9WHiMs+cWFhUVSMfp/6crVazduyOu4s7cHbFY+Wu+i/+02Mzj6FHxpgOf5/nbL2GMv6HYEbxfD9CEhA14qYncIpTTcyC8dLMxAk8GUowmwC/udkoTBUIrz4b2O87DBAfDP+d5giFPPgQ06nYBz44nZFQof/vgjGoKJiSkAYDvQhh9+77t46+//4D1ETQZCA8B2e1yH2Bia6z4Er1aYAoj58qDVCiPBCAfr02BPcyNxGE86mAYpX0k7fjxod5aXLnKzGxp6zNuu3e5TfaMbzDGHSLwyhpXi8C6+LnSYfMc7/S52kI3LEbO7tGoErU6fspCz3mynr7hiB30mZOfLZeC/2Tyaml7c2Nve2ns4u8BUA2UXN3Zb46Xt+4+fcmuUvT0BA73P2QFPUYeP8TIvK//R2s4zFUty+bI0oOY+MiZePmfMA1wwbyx5lVrITCirkZO64lQSR1avDc4Hi1dm9zv7inTdv//gycMnJFm+DxvPnvLQYyKoVyJVG34ZPKHTNhiEJywUSCA7++74AkIgGVpA608Uo+CU7T+MAXEQApmE6IWQ5sdZrFlNZGDBre7v7MJg6tokBjSsToq42f+UbO4Itxfxod4YHYZfYuBEL3h56T3AIzBRYHbjgBm18cxzwWdomJsQ1ndsdJf3Mx1FOLM6zSlwxOMezJ4l3/gzpLSAuERYUlfZGyFi/fseV8gkIc+4QLIbXpd+iA/NXnZr/tEImrQE2cIlbJ41jtcFVbMrQbE8Fy1T4+iEoQPdgTm8IXR3Wcc8UuIxz2adRM/pSiKEnEznuF/4JfvVsid5yRplDWOcWUc+PRpSbJzG4CkiGgeqgrnBVYovtmfpRYHoQ79yPlqQaTr8JM+7JAdsUowmBpChY989Hp4kaVSCYtQrs1b2MfpJ5BPqk/hSNlgWKTcZaDkpiA3UZyjFywUqI2yXUfjz1muvrq9tEt6lcldKQ5YV+f5xncHZIwMR9xPvSoOMeWWziy8v9sqdP3XZLIyvV3CgMkElRjUwBQsgjwGyxCAROIvnb1KBQi7kHhk1vQWIj/VGscAp1WHx+is3CLtOOABNNX6OjNP6WCkLgru2FQYThbBpa4C9fQwKFVQ/EXA5jcLhyneUCdKPDTFyqTFOBh4rjJxcv3pJnBXJcyAD4BgvIXqzQ68XLr21uSkzabVYn6zNLCxcbrYOZ07KZ8V+63iEtu8kV2nMXnzaeUo/JJVMt3MoL1+1Ninc1Vn6zne+ZT/YCiMLU5AsQwjZnuMLXp6ESBM7PNQQKhbbFiaqiL+CEYL0RgVkCdNJCFz10H5Cq0jUyOvPMAZmu5Q+Q0NPnm0sXbyxutniJnXv8e79Z7/75jtvv/Pum+zkPPL6p63W1po8D1wjt7ZbpbGqZC+Y+gT/5dmZuRvX6itbf3QmDJo1c2xka6enq2cbO6KsSOyPn27Ta0WaqFzu6tXlz3zmMwtz8+g39+O5uQV+Xzt7zbsP7j/+0fdonkVchdxq100zNFLkn8BHAHNGYdyUvYJqCxMjQYd0UhTdA+iZ7Y7vj62K8xWmKtKmzYuzEBqLsHD4HrsJJVD5Bf8R9kd3wI+vzhECb6vpDN3hedjqBAXa3tj0T/VeXgnIjAyTfPy+991vr61v3nrj7atXLls9uEy3Gg9OGS3PpTMkdgfoJHsHOOwzdjx5Qi+SDjigK4hzA2gIqZzjvNh297qv3ZpaXLj4/siHEn/gFulrpAc2ZOHc7c4ulbaMQAAV1Kn6sd/cnp0N4kGwUCgDV4MbaPf7DmxtcopTL2Sp/AfrHEbzoLcj70dFTZvxyt7u/nAxUqZh6kin+wfH9x9tiAco5kRzs54J3Qtrrt/hKNr+Dz96+I9/70/oIflAEic+/uj2pUuXbly7ySzE4lspVenYQRk9h7ru1kde3MzJRabIK69emz2aubRw8d3X32ruNO/cv/feez++d//hRK06GeWM9+EN5IoWmqK/uX/AqqYrx47Ego9xsqyjocJKbFmAme2KJB3qqfAhCpcxS8kzlvmLzsM/MAPRQTtU734i9BqS3yhwQuZgS7MceWFnoW/AloY/s3y7hMiflq7s3MsLNIXiMl3ZTXsQT56fyKCDqbG78sypPmAJCFrYlkiLkC7IK/jZhMRfdpihO8M0TzezXwFKNAgMHXxVQKQrQDmoUXZFg3gi49cCmLNfU6v40Ff8HGAXuhTYK4SQIFaB137SSYJLiM4Kuu1Bw/RGY9A8dRCNsx7jM10Awkzx4GzLgcK9Ax70vEMcjl5oD/6cIwuFvM06xFfSaThUCKX+I4kJVM4m4aik12UvSj0lGhagYAxG5UDLHuBfeNB6qoChtndcOfWA7CY7VppOEKqMh8gGbAuo9SAUN4sFusYKR6bVlXWy78z0XFVy8WSl1Cd++e4nd1aerYXqS4m6A9FyNQwXJOtFCJXjSUOqW2+wAD4NLHBEUOnnexovTWvuZjYdbIPO/Wmt8JsBgHGF3w11gjaAOP5O0e3+1FIn0STxZfp3xSE361KRSoDPIo7bmnL+xhSHZOlYhINoPEYN6MGsh+hKj0HVw/yDgFGrxUbS6Edj1GJMdAPKQQjhUFAtSh830W7t9STz6eyNC1nGax8J8IxYUUFSZ0Py2skauP6d795tsxEMFY+Gyyfj1SOZq3oierjeDVS+HK85+QNqwwQGzhu9ZfBDwSd5PZwa4zIOpEnQI5CT7S3sf5TfmDfrapkwNdgkBvvAzaZEnDhTFijsYWGDk18jn69MTvJRPpDq7WxsenHxZLhy5cZVaYY+frC/u9f69vfufuv7dyYmq6/cuPjOa9ejXuT6s72NzQgdtUTkNrltR85IWgsXzmfmrszNrzQ7+42Fxc3dgx+/99HRcGF9q90/6RZqkxcuLUuFwHIwMdG4vHxR5V2+J6Rh5SpU9Pjggw+frDzrS/HESq/2CmU+teY4O1DkfQAlih7xRTZqIhevB5OPTY3jFjp/C8INDkoyzfB1ckcsXYr4AbGgyZYi3CEr0NdxfTkKzhKeI3lBf3Yw8ktoZ1+PD3Hu+HTdCv3EMf/gRz/kR+oIhPPY+ZnjQG/hO97o4qUrkvjVpItNfkMJYgPP0ohAZkDOySJbgx1Ai+cAqKHezZ0po4udZFs0TVCtwlm7ddg6kJazsHzhKi3c5mDHeeJI44ojrnhG62Bxft5ZYjJIdKhP2XDr5nUwEApg2Hps3ALi3uSDBdJBMMrVA5n+yWVHXf4aBAWR3QsXFifnC0ISkN5u52xsEpEubm62JRoLgxHA5uaWG99WlHWoU64zpOXQufsPCKYHZ8PirU83tgcPH3/w9/7eBzyslhZKVy9dxiBakIkJ2blmJqYatVrVEKV+Z/kgNsCAMBU/mNduLly/fO1Tb35a/pFvfPPbD589ubiw9GR9rVitAGHbFW7rvUGhQGNBwD1vNbtWDKGyvBYzwthxXilKJ7aO1MBlhvWMr0Z4Xz3Xi2AWnVigBXogTJtFQ+h8B/YfxRYMBDLzuyFXGZiV1CVhq1FrOE/PhQYtvdIVByxhk+df4tgFU2M0xqFBFKFJ4rZdl+gwHC47/aCN4fEV+pbUQeCj6C5dvrt0Gx2nFm5nN0NaSm0CM2oTW0vRnzV9MaQwxEar1Evc9EV7KivngMu+P6GDON/ppdFV/AkFhFQXAmVo8YKwZZdh+OcO8SbwSnoKtsneCu/Gs+muYxaiWAwtLt16uwYMTqlAXOQHszhWxra5vFHgrj/d1BstjmMQlN+Q0qZqa1chKb9i9PATAhyCgrojY2ZS42vsNfwugkZ4HZgyzRB/HPQwREPMYUkKpUOshgN/+fJVe8EXZ2ysrWceogm38yQMn3Uzl9vi3ta2rEtepJ/dnZ1QnB1DCmcinDjL2mAtg8K82P+MJJi18aTZ/+TDS70l+9tMISzfMzYq5pvoHGSTgY3GrgCeaBKmC3wPgGKxt7AWTf/e5UHGBodKanY6TFsIJ7IBsG/HBgTGf7mB8WZLjTKhZbG55BeaI05OCYQiisMSpZ0PnG3HSI2a5EanpuriUc5UrR05lm5TFEq1XMeB4pThWKnVK8XB/nZ7bzdSlve4BRbyZ2Ol49HC4fA43fvQePnK9VuY6kZ9wtnBvW6sb6oh2Go1Q5qK4DbAZjUiFidBTfggGV5AYmi0QnXMJz9oSHA6cC/mMXPiGUHPyF78iY1XLIBcQxG+yPJXnxqvjuVLDeSzWJwYktb6eOSrX2/8nf/2t5v7/Z1Wc2T14MM7KzDa4lR5evba9lZ7LD+s2LGkpAxrFEFbB3uPn21UJi9fv/Xut97/9u2PNxh57q3sXbtx/eq1N2sI+AxvjMsLcpUvLEAuqtfeuXPnwx9/8Gz12V6b/lGClREu541cWfIP564yVrBHajCGTRB6cBZDVw18YnNxRSXZKcpWN6Rhm0WhhALZxQyo8CK4Dntl2dH7YO7Q8aMztJCrPbkHfNmO0A5CWLYTP+vERKh/cIIJRuOtvDZ/+N77GiNgN2+9Bsn2Dg4Em2tD3cdkuTQxJQErJobcGh6O4ZaNndFpHHccCm2lyynG0nF6PFKvktWlWHb0qPAifOV07PGjJ3/0B9+SAaTT7Pc7x3Ozi9PT8x9/8DGGcqAMR/AjwW9IiZRfFnpbVQA+GKyzke2N3Qtzy0OnY+xsVkYWCYYbnuOmDvOgRpIOkTZ2tnaNXAiHpHqdlV6r08F6DucK5Dqugfwa2q2hza3O+vrB1M1F9JrlGeg4y+1OzzHPi/gdF58QkmO7c07nN16ea3W2TkQxnQx9dFdS7I8DItHfEXhgSA2tGaHgFxYuXrrAZnz63vGrr96ampzcXW1u12p8Z64uXRm9On7rxqsPnjz+4+98SwIuDh6hOSgWmwJOkupVFLNl5xYIjOVccuSzw+6YoxRg2nEOD9kwdhYhOgQCGCDVfsS9gZDT465l0Iy+cWpyyuPmoj1Isde8H4N9jDt0yDym84QBOC+QiyvO/wsE5DuoAh2+BK5OV6CcdIXFNYKx0CdvpXgOYcIdj/g9+x6cptMYWCloiUuDdEEhQWNcz++9+Emb5y/QTbw0/tIsuoWYk3LM9xhXGqdfPWEPADauVttYoCDXeg+89vxxreOHQMWgOxllYjzZpU02O6uckStPx8uypW/r2pAAAQAASURBVHASh9grsJFgPNmuYuSWlNI19LBeh/uzIKXiGf6FhgogJv7RmeWfFqx9DNkgIV/5cJP4mL3Rrzphi2dDCYLGr+u8wNTWl/U9TPDnTIPOajgyxTCcLx5vkVDKSDAd+JFAbv5PTzIyIteDSERRGrIyIwBXL19lHHYat3c2L9Dk1CIHxCcjo2qegpvm9r4IQDZ8Xs4VJrO2ZBbUBIUuXVCsdww4m+fLT/eAQlrzbOXiM63w8y/+Y3a6iZYJfnyaKQDI5utLprazflljQBzuJOlBbkKsNJA5jo9nkmdkS0uOYYH3g+WIXQn3cr2JPoJfkHMzhW6M1rD1xuFWs8BshhDsiPs0RwHb1g+XMDp0XOdrfHaEPYyaOqxXvXCqTeQYLoz5Mb8XSkPjXY+Q9rBC4/I9iLvifECbiMTXKwWBo7gUDKm6t8aAb8OYd7pNcGgFPQkC/T+YIpsXqiEDsjDq0oYDF9gZHi7QPQXqhVZ5+tlZmk4bH+ZrBxuNGzs5HN1vd/d7wwtDjbml5Up1ZrggYhrvkyfMV2qLY7mJphLSuZne8cCifvfHj3ZXtn/28xcvTE5L782ofyg7+9FpWSKESp129aDbn5he4ir87W99Y7xU/cy7X3z17bfnlpdnFpc6tLW5vNymf/iHf8xbQSExqlFwJd+gMi67B9vSKhEL2ajUkQrzm7qFXDGCGQO/wUo4ChbBoQBIsY48ASIQPkiKiy3KOlsSvEUo1+j+NYqtkQPJnoZDv0wEKDvFNnsVoAaDxE6bCc0i9wJK7YYDETIQ2W50zOeTBw8dNFv/pa/8nC88tuGB8Kyh8ZPla2gEDyfnS6vVBn4Go2VCQIEWvdGOWHDbx+yKx19aWJiYmoLDHDV7Vy3xGRna2d37+MOPP/jxh2YRVthul83/8vKFP2GLGh8R2Us4gVQNWPkpKTBo3nbP99S6RJlb+x01w7y6oFxWcGXnhZJiLXJABxjb9DGVCGo1X+Af0gYqdfnKtfX1VUXTFhYWZQPUmxQk7c7h/u7gx+/fee3asiwnAuE67bZswJ32oNvqng3Jc1hu1IsH7SBQFLyzixdGSyfHzU169XC3HMnt7PVSPaUhXui7nc7j1c7oR48bDXWwawrlPHuoMuoyN3zuuBvPNq0w7MFFa7Ix8xd//V+dnf/m45XVZ+ura5ub/aO+xbWPVsNo2RmReVo9mylxhJvOFOwUBy4JGObuuwYEKZl1KSGsNlSCQRxjmOSOX21MMFs1GsGaJQmaHzUr5+HIsQSegTxyQ1IJwuj05Lp6Tq7icL24fHfBz/GyRDMCFxSLzplt1o1foULESMxaQkcBcKGLT/KcbgJeA1sB4z9NrtwPgcrrEk4MqpiRoIzIJPOduerWfZf2YeNJ5AqEAdz4I61F9iI4NhkLEu8aE/Zykmmk0gquWxepfRoPB76M+vorLr/Ep3OD8KUxhyrHkELMM42YiClnfcSNJHT5AtB9xJsSw4gGeAaugmfMnCwbj0fP8QqQ67vjpSunHGtJT26E8DdN/Pg5NjoMR4w/RC/ji9ecnQsgjOmrvR2qUiPCFpzLpKKmXpAraVIJc8nn4vxIuZ3e7OQ83N2V6a7bh1Sw7XLStJpt3OLmetsAJLfd3dwCXh9/+GHE2Sb/Odiat3FD/dBCcRPxgHtiHWL62axffqI8bv7zl0lBTu57EM2GRDwCWnBhbtqybJHdtBPaMFdwrEd0tYHKLQjDOADjW+XQvv/++xxtsTlQmWeVoE08Q2jCrQkDkvTe3X4XigfvMCAdt37x9Uwu+uSKFFpTQII5Qd+DnwFpgR9TcsLzqek66Ihg8VM5lprylR628MJj/FM6XV0f8b0To1pv5Hf2olLn4KxwpEyGdGpUTxjHQXuv1QuNU75JH8xzy4zEkVy5fGF/P+0yEH2+RlYyQAjdJIHE7KN4R09dYomuufcfhptiOBaADuxJYL2AHcxVaNvUAkzqYwphEQrl06FqCyc9WuJGQ3F1wCOxKxH7wkTr7FmzV56YKebPLixOXL3Yerb2oL1/sDg9MS4lroRIysjbykIRQVIYUKmTxYWbs5N35y9eeOdnPiv+uN8/lb/n6QYrTxT4ADMnh0EMSPghu44Vn6zfEawJlZweMwFEfgL6TGKs1BVEFntEbMYwyq5TQGyiklbfsbfTwVKIuSYwya6RywvLQt6gaRgf9EVyOglOnidGQz3GEW6xSccVpALNLujCkoB858WWAX7kird+2LRGc2yu/LDdBDw8k7VUNlf1QkClwSF6fnRYo/kNitV2Fqwzkmk7uJ9h8ghGVhOZ5aHDqeTihQuvuG7cFBbWlaZr+JDD4dBIWR3ee/fvqyZFfJqdCR2y8C4mt4uXFiYaxZ0tLQMAdAjP8TLwokqpLmN6QnvYspGdrTCX8Dk32cAeomED58CXWJUzCyybzKyMTUenuISjvHRQ3HYa0GNR3NmkwL1jvnZD/N4POrc/vLf1hXeuXpiMGs6nQyrV1cpHwhboYZcWLi7NX9rceszqOVorE6yJbcWJacdEpk2+Fjm1Y0Zo18nJYM7JcpqGOhu9tfXeVGVo5cEztOf6lauIll2jz7x569b1V2/S4VyTDOLma5/+zOf+zm//XTN4trGq0DCEzJqAwCMz8AbjOqYNBToRiIssQKcvqEksDj4iaeboShjquDV6t4ERupJvQ3C09N/jIIaimBKYEZcoVpWzIweCgttVkcTrcJE/Ta5evsMXl16CBeDDJokwD8V6PUQQyDfSY+Tw/gKuiYS4FiyVdNFQVbwmtHnPMRRQDnYmXbCPZ6nrAvvEqQy06Iq7iWI5znErarrIO5ehy7Cvxs9JPehHD1EaZBJAPOtKGaUsR6ADV6heQsILVKGwEKzFZRbdCpRlXfBr+D08bODW7L1ekRBa8DvxqhfkMPXtFXGi/HN2SHEjmNCjM3ZOfHL+/Ji2C804HVh3r5RWQiDb8XglZCraasNJWS2C94w5CNiUaUIAlslJiJ3Z2MO9nmAXGUZOaIEGh3gEVrfIVdJtM+eRqFTeCRFDkgrHiCqgLN2x4AV5bqPgi/ERkznYbKxsMGhubW1z8wP65g/9VeuVe5+8j7DyDpdi2cmBaxR6kxtHyG230zLE5u7O8sUl0Ydy4tFohQQavAQsatkQ8lhm2+BFP31lC+hO0GkzTBrjMNNI/KYIb3FccIj9xwcHa2EjQkK1hErn9cpKSxEihfhLrgBsKhVrtc2B9bhKTuc9DGygGzBNinGQ7EIQfi9xftn4wsbbk6PSfgE24JepdAi9w6f0d+Q8v0RNHb7SZoLm84uWc104R62M+1DV3JtDNpV35lDqrlDn0AINq52RAt5UJxk9G0MqcoMjZd+HuyenEkV0BufdkxM+ZmJjK+WKI7q5uW8LnItwHghRO6Rt254YlJQXMaAMy4R9UW08x0e8WhhDLUMD5hkAm1xvQgILZ6M8NpRTtf1Xz2F0vDw/t1SZnC9VoPoJFerDcHE6znDMjoKZxQ5fv14+XtvdY4Ocnf3Upz/T33/26O7HS1PzTGMb2weq7lanxI6dr+/vtbvNsfze4vW3X3/zM882Wisba//s9/94t9c6YTkdG6pONMisDizTkOPLHa51PNiUV2h/ZXJGEELMkdBr2Owfgff5rNKkmXC4cSpmGYyOIr6OXjgvK3JdrgZgZIXP2ViOOgw50IhXcA0gN9txTJ5qg+Euo3BvodwelfZJkhC0CrQwXQAWy8KbQcR5+PIBwTAXHZ7fvvsxgG9MTS7NL8wtLqCFzjvSCBLAIUiLI50YX4vvjXia5GnWAVc05+4YpqQP3sIl4Utf+pI6W8aC3YFJPCLOyBdnQe6JV25cl6vDMP7r//r/hpoK8oQKFAsABqvPHhlhkL3ExWL8dBtsfSSQDLxhYNC6lyYCk7g94MuiEqErCJizLwN1SYImhJvLj2PCDapUK6EZqK+xcf5Bz86OxoRWw/QPn2xcvrjATMYYZIT0vCK5HA0ltg2p1/nkiJ+hpTg+ZWQKDkwcPaABUkK8odURfINTFPgxRKBQ+xA6R3KNCU0+uff0zv1VaR+oGxaWv335ypWlyxcuXbv7uS98Xvz1X/wLv/mP/7//xEzL1SLzZLPTPM+dlavBotAxML45iafhJ5kMWLDhuSNkY6OaDA9Ob0PJnDiOcTBtqBEiJWGUWm4OHzAeV+VbGRlnvocuHQ85BcSEySpiH0eGxh1lrgG+h2ETZzc8iH2FCGybRY/enOt8aNhtAssHJbcFypAUIQD0hlASMw7y6A41KPkA8onzCsBh0hOlR2WxBn0hZ1Bc6S5Obzichworvvt/yBbBnBuN/kNx5+zGIcd7Mjc4PoGt/R/TFzgo2UqAZoJIcXaIQ6TwxYxJQJUeDCTh7AT4en1uPFytRvj8FwU3wDJR+o7SJg5DXIGf+RrkQpoE9EZh8N6efnRohhyzw65YzWYN/ZeOY9CeKOWrND8ngwtTIjNOW8Kmun2TFiqjRoeM2Y53KHeiwLODNx50lmVFvwLohk7bzQMp4MQ6LM80FiqVicJoLRZulF2lTObF+fd6nmgdHW+3emvN9nqrs9Ppi/HqQBKtc1WF7DpnpMCDw6Pts+5HH3wckaE8lMjbytIgEIjD+dDm+kpYi0aHtkIjTHcsik/B+YK8JFAuvIPpgOzpBkknImRRQZ8AGpBJgMnu7BXkmH6EQD1fEyBhc8zEJqW40fBys+CwkIzaFjnCPrptd6IZRUDIQVYNr8YPIdA5H198caRRDzHinIYF7JU5hszNMVbbcJBWGs9XLyyR1SoVcVrPkymUynma80GPL1/SxwpXStKbVJaIhUzgdpnGDHGL/PsK7wWhFYV6GGGIrY2vffFnyiXfBdAqsTYxdppD6UfqFEeHQmqZt3rdkc0t7sp77cFJeXrh0JOF/LFjoGwkGnIyVjk95wxbLBn5WKlashSqtyxfuFCdmj5+ulKuzUg1p6pCJO45UbCB/SoUldx1KBnpAdk+iqWRAjQl/DfCaMTWRG3o4z59UTk3XgN9PVojWqLG9MLipWpttlydZlERIjlyHt1Gh3QpVjZ0myfCNO3ig63Nmxdm8kNjq9vUUEqeT6LVvASqtYmj49wjlWa3D9a2u+PFzqtr69eHqzdvvvbJ4wd7rYPV5sZ5bnhidnJw2N3dWrWnuFK2k25PQFYlN8G/mhySky8fdhAhRDkAtChATdwBoRpKLuzwSRwWxJFqC7cSCDrgEyTKAN6CUtiTSGIBECmjSu20DtVA0L1Wk/BEQhLEgD0/zVOa8fYX2NByeJHCUB7KNObMM8UPF4ZPc9ubmxuragHXL124xC/TmVdGI8ych4ehKmRrZEj23WSS0gJaaffajpcOHXCQ74wY7fXr1995550333wTgQHtyAtghuBEHcldDDEiq/zX52drGCN83qfeeeeb3/xG+KMOn2Mj5ucuvndKQziMo9I1gqB//v06dALUgYeDyKKQbV/xHlmXqpFdutc9yOer0o9EAF5ePHgw1nhK+gYH9/7DB5x3yLgWMJyhDjrvfOrTEFW1NHr39ni3tfoH3/7RV7/2lcmFQr+5cXbY5TDQqBb2dlbPR4pvvnH19/7wWzRf0xONu49WDrgBj490QprNMGswT0ofwrdGGoVn2KoLI8WC6MzhrRZGZJiIic5NjtdFvNx+tH1/ZXf8e+8LXPvhe7e/+KXP/9pf+LUvfOoLr1y9+X//b/72tRtXR4+Gnj5+0ihUmKPsIwyJlktEYStJpZEMPeXzDCyK1xFprxDA6EijVFfOwE1QZM17EW5q63Z5BQ9qEsKVjWtsqHDU2UUBQktOOW9kQ0ejxdFmv2kxQ+2Y8HKQCpf5YWOyC3jhGzK0DfdlrwFVUXgwYXPLqk1wwVFtM4FjYoL1o0/A4dJDsDqIgP88f1X6M9oAErwJUhOXu/AMJjqIVUC2Nz5viRKZdwa1njBudNKFtng1ohmiQDoMUARQ8/8AVxry0P5lWpbUf1IqUmsh7+Q+08wG6Yt1iUEkIcIWPx8siD8ZcApU2zk/1aAHhtkh03qjcNrdO+qPYObVY2l3jpEzAtPpyMm5TGPDx2PhEBXJ3cQmYxC5Gew3W8iGFeNb7jdkEB3BNI7ID4IlPDsKbe7ZQBXySN82fHzQ3a9BTfnhsanSeEF4iHw5ZyfdY4gjivUx0jnFZhbciDnLSViAAsaL+dq4Mmv42jCr2N60h0Mi3C2oBcn2xBSsm8FYJUcRkNlWW2zWltGHptYQ5xg304b40xWrnH7KvgCJEGSD5GNOok93oAt39OwKliqYgGB04nl7EybGYEPcgY3m52dtkHwHGetdlaKXl9XgkOX5Av97mTELo8JRB91wdaW3XFU2t1jkY01nRZ2OWYmNSz3H7sXpCCelcQeQMjwWFunimjlQo4lCvlxUZaXZHpyF8W/Al/1QdUrROdvrzX4Hm1I9Oc2rWJ+vTZ4wjxdY98q4l1Mk61xFdJ568tFEhkGnFAdEHU/ch3BBuYwJmIQwumA02FBdwXUbVIA3Yg3EeMAc9E4UnhXUWZ2cae+F4SS8YofH2j2a/a5Us61DJQaGX3/nVZByPlySGp6XOyiibyOVkEusDr0u7BP84ulZo1p589ZrD+7d3994kh8+fP21t9nS6eUt+177cHNn5/7K+sZOf78jF+LoB/cfzE0sF4rFz3/+i3/3d34bdXm2vSqIWkAqyz9ZgUmTo9YhDefQcWWiBBONDEsHGIKIVQ20nspkGPalK5dtt5NCqUIKsQP+lF8D/QAD/oSQOT1TFvkO0ozHszoBli5/Whn3Jd3RgERgs4AiYZpQYnltnR6AVFrIM9ZZwfUSGMncMdmYXFiYB7G2Xoe6EjwH42djCIBITIzXAXhRT+6biD/BuUxJKpFy6Pm5n/0aUw1aZS7mZSR41swFA0uHrTJITDltjS9WwOscqtD7jUY+UoEBXABEIxke0dJcHAGE0HHgmIhcma17/PikNfKwtGLkUMJrLjcFX4ZZwssAayA88s6xaJNKrfqjH79vSAagW6ay7//gBzdfuc6hga3x+LT+8MmDte394Sk+ydQqYjaga5H43tyfnWnMzZRGW0P9zoEC08xv1DmiktvyccQScdECQlpiGkNLRfFMBhCFhdwGBA/nt/cRjGK41NSnUBs2mF7/8OmzDQHFv/u7/+T99z74l379z1u9n/viV3/nH/1DVPlk7vjhnQd2+Mn9x6JlbLoyY5j0fCkKiFpLy24RiBxSabO08b6lnFYtLrxJ8R+Gz5fU5OHM8+F2twV9WhDBitjo8NUntvgVdzYayVRBmv5eWFnicMVl78MGkFCMXSfNBTAlXGNAFjG7fHdl+I6aPYaXoftE8FJP0ZULskvkKLrJuvIncGYyTl0l/Bi67pheOt1hUKUrD0cjLw/1HoEEV4n8GEGI2QhQWGGjuQ0IhJpIlV2yF8aFmunbTWfCZqEpfgmnyXDA9DMKquv4F02Dx9FVeiA8lAIp49JjYDhxfmSl3FEVy4zwQL6heMiV63R9qqJHFQnZjYNiSDXB1C+6fKxqzBgXmj/IlBYebsYyIvfhLH16jM8r5cZ6rRbZZaIwPDHUqY2fFCSDlQGpt99hD7QthhTpHyU3k7QiBeJIPXk2yIPSIj0VjaSjBVZjDSk/ONk1qhOiqeiyuiddv0DS5+N09AWxVEHGCBTqe1KOhaLPFNR/6pspvSLpCmGi7IcILXw6PyQnX4Ke2jMHyxeN8WaJjscSuzQKU5upxXkNTOQCBt7uV1PwhO/Z5Y1pu2PUrlC/DQ2RyD/7uc/RXNNdOCoiVCYbNcLZ2sqqP2GuMwrRsVEea6AIT4TlBMr2bHpiklmOs74NNIFY5hxNmhFZEN5cwDeskdhYKlGsFcTlFUZltfSA6+81D6yjMoze7iw1xw6bkqTi7aVwLLJcz/bNPSdEiWU5J4doVLQV2Vjg273DgOO4V6lpitVWXi6l8cN+F1pK5DLA0iKEUAU4Qw+K40sOHwB4NB9VkronDOMo2Fhert2h1jEtEF/B0AfI8jBerV9/5Q0JBsmjT+58AjNPTMzh9LmkW3xWyYhQ63SCM7CRcRqGd/d2EPuLywvnZ515CYLPj5rNfav98Jn0ijtP1/dS0ZJxHPWDR/e/8ObnSrkilP36K699973v1UZLR6SgbvvSlYvWTDqfYqVE0JBIqVopUQKAU+nzzIjNCadmX7wLXWG3iGVOlzgWW+Or+fKWjINDKk+2AOM0WnhcYz0gDO4jG4EZU5HGvb0dq4oP8CxAhUmkIda/B9EJZMNT6IrOPSvpm3w8Vy9f8XbwDOosimauWq6hB09pGU72od3xMy6Qp26k8PeTMZg4Un316lUYxDgNTLfZgI0ZGvB42MsSIfTdZZE1MEhv0QlK4lc9gM+W/HuIRkJ6mnGX8AqjFdmYjGzYCYU9I2xImW0uL8DY65yaUOcnlOcpEqrvVtXn8uKSZ7V1/mmmRJUhV44qxyhuI3t75z/+8OPJz7+mzjGlkWSzgu/p93gPTk8tX760dPRwZ3OPt5a58y8vSgIf8fSidVnTiYVAMXAxsAynBOufTdzZAuykYYk6s5FI+mfMTnhy/D5uTI7//u///ieffPLX/+f//uc/98X79+8Lyka62MmxeoieHcb6skmyJ+tTjD+c5H1EAoaOfquXPx+XnMLxktFxMACcuJyShE7wDM9nflOck8fHe0CGfp6jDJ0tkLDwzg4GMAYjdj0TRLJxWzhj9emy6DbGfaJoTDJNzC7aUWjAHd98ZO2zX8GHp7I7Pl/+6nuGImOlgigkjjNEpXguKI7F9JneQk2o62QoiXOYtYnntQt0wGYdVrB4ixtOayjuwlYQDDsSFyoAX2NsZhH6QWxC6GtRU0pGu5vNJU0qWv3kygafvfIFKdUxcuX5k/ywYu1ejwaEzp0/Dd2iwoXnMhtLa18UIz5ZqTawYJPFGi0QXXwxV2JTsZfM57iDre1dOl+RIlG3QSB3ITc4GKuMnk6wHKh4SjM5gv89yp2y9gePYehMYfAYfyjSI71suASZV2mcapNXEodm5M+IQlwZG97a3uRbbOMdqUq1jBJYNGM/kG5ha9s8cYqOcoyHcB3McnilRTSTtOL9w2azNZDNj4o1wAOfEzxDbGe6dJVdP1kv30IIs9jxYaf06XVOtYPnO/hxqkGjO46obuJPu5sLFwmrTW0owf/rr75K5w9/wQUy+njWyCUpBaBq7WCK6/kGXOPxeOpcDL8se0mxHPkRTgN1yoiIPRQPFLwMHwuIKlxQMObS+UX6TgljjENR856TbN9o4s67CsHxNqJwPzpt1GdyoxPs92ubHb4vyPIR1U6lJuJNpSsEDnuI0thHRDlcKFX5ECkzgkQFDMnwJijE+oR6NMCXgIxEBq/kOMkUHYw0z4VwKB2NHNvdln6mpB7MGWaR03mNPtZgD8/L57mrt94olhtYoe//8IfIYr40U6Rulheu24fiLSwlc6A8llR67tHTyXqFG/0br91YvnpZ2vZWcxu/3OoesV8+frxy0DnrHLLeDol84YqsIHj3cMCnnxXuZ97+1ON796h81/bWhWMi2yPE/pwKjeIxSKQn3VZbxuJOn9VEdu1Eg0l8EjLU5P+OdPiOmF2OK9LthH+NvebZle0+KsU/G7Hxe9xPl8dd4AR5Y36D36E5f+rfFoMB/ZD23QEh+vE42AAMGgAPXSEGUr8jbNqjJdlTWvruDdZfGzf1kyiNqoYdgGfACMznP/958WTgVPseT9R0ZU95MLV3oMJSHP5bCen51Ep7zTTQObOBm5HArNF4+vSxw44vgWv8I1SJGmR8NXhPQzkexOPyLxDjGJ7E5aJVHSalRzBy4kod4BT16AR5BXsVKvjJvbtWwEkGMdtChpVhXFvtd7frlaHvfu+HX/2ChACj9ji8Eolvw8Ot1l6pOn3t0vLDJzv42JOCemxWaBC57jlSHnI9zYlyCdSYwjf5OECeqLc2KmvRFpiaaerKyH2iJVY+fGSHkQrTglR20KS/8R/8R//xf/If//lf/fXHj/6Llcdr8KPqWfPzk+XwPaYZkNBK7RDxvVRuCGpMX32dQXtw0N8baTSYdY66R62mZJsnSpJZVjgmgyInKTc6oLbz6gDsMb6aFcyl3VdcGCxJTmbFnktXxmqPDTSwfGKK/el7nLNESOLT/10ZitLo5XbClilztDueSr/85CPdyUSnn/zqZvDtmPZ4LxoRFCo9CRcFl5pGEa1MONbP2/Wc/mnBtzk1j4egwLTOqZFOQ54Pwhgkh9wOksjLSAY9Y2xHyHxkDMGKADKMuWiz9wUlzAA0Rh6OFZ5IdJHu/LTfC6uhMRE9GRvDZzqHZmAAZHbjG3smMqAunyTt/riCnISqtCQkiKCsUXsM5I0Pj59L8aLekNPX3dlc7+w1SzS0wlNGTxVOyg+dlOX7lqKQGkvw1ulRXcnpyfJkoXIwlFtVpWevdUY+j9AQVpURHtGMYyRokgQeanJCytMFg797/z5Yl38a3odZ/uj3/wBqsxiQDlCQ/wKisRCVGivj8NT0rGxH3IcYxsAwWzq6kxY6hNMAauOPlUFBLUiSsX5qb22hd6EFuBhEBVJw5DKs6j6oGD0ewylDQ5bdHdvtPFtInD73QIHM7mtgPCA4gFs+yVTqjbrm5s2b6g7UphuKDEF8FgOc7Q32jaQxMbV1uF1mK5mYzGPTzB+CCFUndxj7St1NyBqSrpdLFOWooryN2mI5CngfdVrtcEHr0wnuba1vPb7/RCocuzpenDobKR6dj5NoD1tk0nB1YcIdzknJIebD3MHqKcdfFB668dntdOF/kgjmlzorFgbrGmgWo04xGToAkJiEAOdmXMO9/cHW6rOH94++9Pm3d0+60sLLvyUgh5pPooS5+eXZ+Ut7B71P7t9d39ifm10aL5QRJrJjqRTFOBTVxJCE8jtM1pFXrXU4uLA05wzIW7p8cXGn3bt9+2P157a3drf3z6ihikyjVMi5Eo9Hk1nZ2BibGwO6U9WJL77z2R/8+AfdsRK1zO72TmmmLpjioNti17TtOxt7SwuLdBr2EY2xTU4KKAK+NstlOP6078AsfhU1JvP6YccywIM2xW66PKuBRXFTA1QK0ve4n2B8ygZnJDvwIMflcT3rVodwt8aeffToEcRORJ5s1I3HwQf5JCeAwXvbK9hdzseZCJGPwGle5XEtadj0//a773CpEFWmK0xe4O5kcDUGsKc9NJGgPAQspyWUNsA9XfrxOilj3GFo5BNA5TU5PdWYmNAb4HcE/eSwOIpEXwRVn86+B0GNBfTForEnIFcGg0lyeanj5WT5D07LclGBFIYK1OCoOIGVh5mAJy4qUAcFjqgTXnjvf7S2srHz2tU5+hOW3ThTo6NiYfu91q1Xrn3w8cO95h5tOUdjSIarPP9BvOfBvmKSjkQR1xZnJLRKgZrQJFZ8WJDfvwVncnaEjd2niHFpi+0L7sE4JYVgaNrf7/3n/+l//r/4X/6Hv/Wv/KXf+Qf/8PHjh2++9narfbAws7S4pLaq6pCP8NmAE9ZDOhyGSqFid9Ux3t/YZ7HD7RVHim0JWA8GAqXpZayYt0hkkROHNix7M9LYdXJKpS50ZKHKhRp4kPrAQgXD6z/W3afv5pDd0c4XHWV/mmFGczSITUxPZY2fs9jpjk6yfrKnNPCnz5CrAhwS/gtM6Io32qigWwmthw8XWHEKo2n802UwH+GaHDTI3mpsGLjFGHR0xkEuFJqUewZLKafIXKI7pgS5eCJ0fuGsEYrQoOTZ2NMA0iDSxKMjAwoaFR17v8ahOMI/hBKK40y8hWDP6DXwn9N9fiq9keHdw6NdSYP5BczMNKZnK8XKdGWCRdxZPAuvqSQpJd0nXa0IaGaL0yEV+AjoXNIPmcQH0khGwgM+ebCsSkbsoPFojgzg/yXuXQ3osVeosH8rwH7n6bNmq4+dzeULApDQGL5IjDbCSZcvXq7XJ1rtIxiBlMBpKEpcDIfuxTrb0Jn5BTW27z98aB0mVfrpdqdnZ2qV6sMH9wCJm/3jU/QMk2LutgORczPb5WxlYtFtTHbF+oMrGxI6WQgIZnFBSRk4WWqPO6tu+vRQdt/BIMQhbGwwH330kSERmWkynVikSIaC5t6eQ6sBcuV+s90kaXkWkUASvd5oIQ7Bh95Nh0pCgrMCuJHww1PBFszcnKxr3B6U+Ds5LIyezU3X+TxIHYLxK4RhiyGRq+DIazff+PDje7sHR8JEm72RzunQbvd8n3rs9FxeuEJVjm14eRfJL/BjU4pK8lm2qbjGD05aW1sbu1vbFIxQpyUKEAMqSesOeBG7KFCSGG0b2+seb+/2n63tH/U7c/M7WNJGtXTc423cZEWbnRpvnI9uIjiD0/c+uMuVeVSFkNokCMCrwni7u2s7m1vEEYYBK2pVR4t0ZW2n0VIrFvzeBx+tbqw+frKGGnA4zlVGCtUGv47TdpQMAd9Q+e17d4Upv33r1cLw6LWli88ePAie7WBrV0oVUb3lcS5xdp6XKYQmsQXMZbVtrm21i2AGACAqhFob5HvMOrIqh+uEi2bVI4ANAUNFbHpGumwfPimJUznUC6ECBZrBcT59108GKoUC5UHkQfYWnu0WmlxlAHBZ0BupjaXkSao5bZCrQiotbSSQWpZUxVC1MQz8kCyCX/3qV99+++0MAjWzbnqj1PQJ5LzX2F58+j1wTna56fIWj5iLcWLyp2ca/jQ2U6B/BtbJ/ZSgbJ+HjSdAFyIOIzc/x2PaZDouQMt9TgBJaXqKIVlXGTn3BTrMxqZbHqfWkNZBgjSEitOrAcikxdSgSMegvdEdDH1y59G1i7MSr2FTSxXGOZFJ/cPewfzc5SsXFz78ZD1fqhdPxyWotfLlSoMwSgtrHYNEJbwaqng6iMSrZ4TKUM2j0xHVixuP1QBX0Av5vSsYOVL2CaYWQD1+7+7G/+5/+3/4D/+Dv/GLv/hL3/rWt5g8D8NwqKjOJKUIZTWPpwrczhzAvY69Sl24Q9mlexhEeNjOWW1IO7IdDI3xF9ve2/W6yemRwqiCldC3h1SvtYdUuAVU0xLZKXOxNT8hV55x2SeffgAl9imZv2I/42ZS41FXOxtBSdIVzySeO6L1U6P4TJffs133e9Y6Oo9eEJI4PBokAuHPaIKboUXlQar7MFoFIYt/wdukgXrai2OA6YvHQ+QONJjeFBbEoDTYXreMJiOqRuuKxlFDBccHQILlSf/wqZSE8bMhG1HWEtJJ41M9QE1spcmUpIvjJO8fcqLRYUcZFifqRACV6nSnhZGQXI+lJx3bl3kzIn3HOAhKkWWtjQ6Zaw66NlFyhRFxKCPDk8OnHPzUPJiQJOywP3LSy530R0/UKThUdkDVRSqLPg89iYyjhIban6O5qgJ5o+ObTRUESGnWLnwaiM95KTgnpyYWwyVsrKggidys9JE4Qr5kYidjTZLgEjkjpufWNzftLEbV2QM62cmPrRkdI3SAaRpWf8BB1tOa+seKaItiV62jNydBE01DrnCaVLTIlcNmeUGZK1Y8rfnLm76AqIBUOroQokYdS1FWIvMBpbcDR6nGtufmxPl78907dzZWVta2N7vHfakCzNN7ueMzFPkilzSuEhRgK09y3OrQMIOLKHHur/lI6CLWRS48pr+DYv5YsXNLyEFivHCsjpGDS1W0MDl7cf7Cj77/UaM+1z1q35O9qCvgvnaWb6gIG47n5RqsHGDDw8DupuhDNn+uDqoWWQMxPZaAMpAQYJ3TlANAk+HqBOk1U6g+js+IYM/e3lZzS5TmIHw3PvxkdWl+4nSR/MdRtc5fvSFGday0utVUAiNfmhgdK87OLovdhI/YMWX75q0DB9oXUCu/rjHhJ/DyK1tbZIXG1PSjP/qjBw+fqiutTXB9zGFRqu2MNIRpzQtLK49t7W8XR8dvXbl6rqzR0PhrV272bvcquRJc0mFcHfRl3JEKw/SFzeKpMETsmi575KhZBMSGA6G2iIop27isCI7v7kv4ogGgCkQso6vpDsLB3a7pIbI8qQBQqSB7iBAZAjPjT7TNr2DDrLDziIEGkJQH8V4Ij56RSK/AzbF5oAeGtL27hznNiA24DQk78UWEnamZucuXLwsk+qVf/EWQpjej9Qp9moLvQfMDRQRcZZ9+zS53Mkg3GHeMwZeExyNJ7vDwsnESCg1bM1yIA+7t2uhnZ39P/4jk7sZWNmXYKTs6nuq2O2O1Kt2LiDYTAbQe0b8rTBkwntznYuhOTjkTkXezgdEFc/vEPIPIfElavyd/9s98WcIw3jY44ijFeXastuJhr7m4MM1TK0JLjgdTE/WdFpVn9/U33ja01ccrPJgiyxosRz0VMoN3QmkyhyX+HK+fmEs00vYRyyz76uoa+Q1PKvc2xw2bVa+Nf/TBw//y//hf/tW/+ld/9ktfffT44cyVKTlqAQFTMfcl/wSTjlAlpbT6faoL0VTusYn0BwoFWCtcBV7TupIL23zQAq8Ms9xGqaVKETeRQQIW15AycmUw1jNkUq3TA7Fttif7UyPP+DMapDYos4ezPbCNWTOf2rj//M/nnQVay+4HOKTvPuNuonlZ4/RX3NMmXuOKcIQIxEvAhLMIsgUcGCORQ+efxoccCwACBuIKD+l4zjDCLhekyp8vx+YRq5btulECdQTREwCCiYYfhhprGmdsR5hrEhWl9aMMgPIso8HoUVM+EJFLZCSEFUn7GSgaYyMT50NzRP5CsTQ5Lfw1Qqi8JhLKqOSTaDyVEIbOmayF1MIuIH4AWxSpBQ8Z0wCsgjhe6NyFq0+sRIidNvJASN9gPFzre7nT7hCT1djJaH55ecmu4EctDrkOFQNbTrtDgqMMmzyWqlzONBKSxAiu4vCqR3k20QEFdXCP9gySBRXOPBD0iCVyhvnGZ6vxcgGzxfT5p64MMHxmLaFQWCO7dOVL9JMuXzybffriEUTFSoo4wUahb2Ji3PdIp3PWzAdzqlYCt4t70pR2W1MLc/JsamDZGZ/QQiLsxQvLdpbuxDH1NpwyNWxEZ52coVVFxIHXs00cdA+7O9MLJUUTFPOiuKXD6yrPs7a+u7U7MzUn61K/J/Wn4PrS2VqfmNaJ4ODhXKXOxEsFByMkhBiWBpcTwWreizjX8LvFX6tG2ahNWPBM/kOQQaJ5BzRaAX8mpGLJxfZS47T2VBCksKo+erb7bH1zeWPm9VuS419TQ45bNvbi7qPbYrvlf/qFL3/5wsIFNjN1UmCuBw8f0cdJu0Dj5O1cfKmUIB66FIIONzC5U197893tVl/C7Y5IFwzHMauk/aWXAZZOxikUAkdJqWvHxZjNVqrXl6/+ybe/UcmX28PDO/1d4FmfmqDUo8iZm5njrNzmuZpYBCjVsTRTs7YXU4U8MoDkkCfMFcEGhyZOhZDhcYKUBplazyPG7D46ZA3Z6kMqKhQw/ng+C2v3feo8g5yMVgFpnXjQfVCB3QbdcD1Y0on2JG9cV/aIpU4EMizojsMbb7zxxS9+kSlIhKmXagPUDS/QWormAXsJPKM3Xbn8CTwTs5XdcFCeozttjNazsfLpcgcy8NXrAusk0dDJNXLvQrBlBgl9YzD2gaBcetO5XJ3V6clcsXQqYMPGJOSZfuJNMCKQ1mgtlw165623PrrzCUw0MTlJfyEUL1+s7G4MffTJg5299uIEggAAPCrVmz0Vxt1dmpuulsdawhK7B3NLS1LsW0BGAeEIB/vNgzjpIqaNVs5lXCiqOY6Joy0yNcffp/2yEtl3i4wzkQ2Vn4/UZeKrCFv4npnpxne+8/7U1O/89X/v3888rCZG+Xmt0WVIw2FASCGAw+AKHueMSVHj5B7sUSoIcE7asmO5DYLXZjWjObDsHWYvkfD53OLFJU/rJK1ZsC++IAqGoX3yzE+6ZotlR+10tn9gxX7gnS2xJTcBaN5j1td+CNPRPlvrRCDC40W/cIcXuB9Ln/RCPrFEiFRMI6UqsIUkHaOE62w3rJZxOvRa2ojP0AuXN3IlzsO7dKXn0PXhmvD5LgqaZC3E/dPCmC2ggdf59MhtIMpPk0g0GQZ4IZ9hOAE6OAuby+hZ5/p0MsTblwOFKMSYEa30ACBG8pQgb6xermCOUY9Q3h8Lxj/sk6f8zvKdHykxnNpw0mtDnlRF1jrNnqqpSpsjraNj3eASSvAysOMXZRlpaZmj6MxCw60aWr/Hl73oj1qpcHYoomT8nOIOVnEO6T3Oj7j0mcWQqpxDTQhyOD/CoFmWbaE7PV2JTcRujUu8xGLJ8H8wNXNLTdu7d+/LijA1RQUUQvB2c9swLRxuTsDjl774+QePHtarFOjDMl4szs8uLy12ui3cLakFSMV85SfvsXNWeXyXajWShAWxqqSZ2NnYxGDM/AcEuIngxDE2IwFtKQDTF+fNmgN96nWQA9045zbRUlMRxaE9IISi2VTWxZHjwC/gBLxBCqHSoQvN5bDh7/7Mp4v1qgX0oE7ggoAxlftA5uCQQCPF1DlmzA5HALxgpJPDdnd+YeGwsw8w9rdXuwdrn3/3C+PcO7r7fSVLjln+1YE9Yz94eOfeQXPQ7uYOekOX3/jU6ciWAnl9AVERmZqbnV5otw+8rlquYg+frq6xuk0YTKUITPkUBgjLfZzsEIadnR3wjDElUrhjoew+qucnDqKPnjzd35NiLi9iWIwXzmhLOtKzXfWrmr2Tn//FP7+zvc1Njucw56jf+sv/BnniYP/g8e07W7LxrG1aqAvLl3R7//5d+TcMrFKv5SullsphyTVmanpegfrvvv/Rk7UdkMktjg5KpFT4b0rNl8f+n3NKzk9Wekf9b33nm7/x9V+mG+Ci/YXPfPGbd380V68dCjs7aaklgfg7wI/bj3F18aIkRthBEQ/kFbtMrXewEl5wNoLARB9rBzWzia39lm0ycZcpcHDQw97eXkaxHjx44DtsTvTxaWXkHAr40TnhOu0+JOMOsNEPMBCzZe6ggg7PGx1KHgyKirlAuAZLyxdIYN12VzPInT8FMxXg8SubJiwGeMiFKJUvsr4HxwuKUoGehBRD9e8OTQKSzqtN89BWYY8TMwbX4ZJ8rTbqNFf0mKurqxIlTMpzRvGSDJVe5O283Lc3NvSMQsglzfsAYiLKkQsddFdOsQ/Jt2g/jsvh6RRMTbDNThZ21oApFk3/+KjFSUKDV1995b17t0Xh8vET7HHUbU7MzO63tx49W59tXCqWaoM2haE6O+LZ96h6JiYm33771b/933x/YhY7d37r1q2P70QhxhuizK5fV1GF8zJN46NHTyAjg4kDy2Sc0J0VluceW2ATraqMt3IQGjMgpzU3QiuEbuH4xMuZ9T/+3X96/crNd999WxYlLicCtu248nmhw4wUM9wToXd5BduFMfGdJwJCLaulcC4QwBpj+/T08N6ujjPiEgFRZ8cMb7vNvbnZebxIe6AGWN2CAA/oNADDGhlHRqIsXHb5M87Yy5wUKDhClXRldjZrHBsc9CkkE3dCgZbwUdaD724i5M+/hKCE1kSoL3mDoiJUGnHpwb/gxDmRJ62gJ5LDBLQUircXDleY5wQ+8G/yZPAIJWtIXjpCVex16p9o660nvjMiYm79aq3DDya8xQSs0sUkNSKKNzjQKerJLF4M9s4jPoQw6y1EK0tjDSjJCLL6C71kzEnufz61TweniJMkN4xOkgdzwhhFIlR3V9gNmMLymKt4tfGonjQx4a0GLECNgzTfQdUU6AUuTtdHFanjbX/cHTnq+6T6MVZfGYsHQ73BMIvSGNFqSFnZox695DnLfi8SsqXMD2ejnNeHj+9/9L36xJSymb3m6uhJ5/4nYTRq7u0MOnvWFj9Novz4o/c02NleJ9t02gc2aHXlsWMPfWK0OLtFZKZgMasp2UzGd0JypE7MGPDPTnWsU9rx+DM2Lu3jOdyEoujTr/AOoHcf+nDH5WBoGZzG2Tkcx79VFwAUjGEqPOho4QQtGvlP8VNtgNREfYJXA0xhI6Abial0Tu6R37M33JZQhzt7MVeoVgrjvN1iW05nqvVBpy2ObG9z5Uufe6dReVttIBECR2ddit0I0uCNpCi70O6uswdHUF8Nk2Y44Y2VJNIqcgXpqh0illMCwfFR3g3SBpmL8ZtUZN/Fz/JrFEpiMpzHRzug24CdOsKuEWYLlUCeZRI0DdNrcrykeYb/RdwBK9a3Ur120Gv92Z/5yl/9N/51mfwfrX7yjT/+1te/9iufevez4fzSaj9dWZV4e3d7d6e5f2Nyut0f7B4QNw5HDZXSZywnTBd0SoI7O7f09qc/+/GdB2KsyEOVxgSvr9hGi0wD4Twknw8fe83dofyROhaPnj291JiDZGGoq0edvdxhZ5cpSAIjofQjp6U8jz3LVZ8WCB9bSV53KriSgytLMTk1iSpbEBPPDAwUZeFo3ukjMLRh2rg00N5CyZlrry0RQqUrtM2yEs50YgzAA2zgY4JZYZYpFlFE7bOfsk6QKX/yktGbnyBH79VPWGqHh3Xr8/Nf/DJapRO9ASptIC47km2KBr5knz8Fy+nnFx9m6tIqPhMVS3+GccRMzSXrwdSyMSdJIG4aFXCwREaOYoUiHawmTzxP6S6MglG7vgu6yKDWBQ+tli7/VE8GgvXGwHcegg2TYgflm57Z2N1mw+OiJNnxUb+50xl67/1PvvTZ16QQw/BRFYW5ggaQGu6wd/Xi4uQE9x7egOJ2wuvS+ghihObo6kqV0uzsjDvb27vB0AT36fwGHxCW/5FwzqScNxgMLozscbobZvo0PCgxlFh2EVtItf///H/8N7ayVLiJV0lJciJQQc0CXJc40bfeeuu9H/xwf2uPt4XD7xUZEtCJ3cEcYFCGdncEkpdrVXdMujxeIpUqGnlA7434Tc1k2wKftDwGjWhkmdL32CELl5RRL6SlFHcVy+1Kks1LATbbRbcTraH9SHv7Ytez/8ZDwQyFLQruT2QAQoyt9FgwNUHjPBkKPyYQ0PR8SikNc5w1vYbHRHiFWiu0CgBACRJ+8YJ0J+kIiWkxakCCXeDFDE/q36vHx4ycIBckw8X3nB0k6PMp196hSGOs69BFGEUiThE7R89iGPGFkldkDHmvOHoqk0FejctIOitu83xmmUsF1x3OHSkyJ7wFCzJW8UxGmWm6xBnIXxWASCqNOAmVfmKJwpUH/eJOKEhvbOS02x6SyUs60iNK3i6PYkEIzufQyppCbnniHz3o+UjhBIInRLQrCs8MDTdPj3tn3A2RZZGwrdPOYOXu5m654qh0tyMpU3/vUXp1BAijKXx/Os2jTz58nwugmrN85FSLCJxyPrSxsi9LWArV6Ec4JLdvPIXY9/SJI0jLGJrhWKx0JfJhC4OuWVWQ4IIdsvMMst3UMFb8xQXMXP5iSKW51mcEfI3mJiemHXzP3rz+yo9//J6Qolq1rIAFTLSvPk2nNT+1UDgrslfI3kQbG/B5rsJpYfPZuly/ctUSHk76x+rY47/kNsolT07sfaNWvHJ5cdBfV+RWHFG1XpIpjDcGtWtXps/zwlGP7DsiMLU3fLrrdEizRZdvm4ZHwiN5MJjkajFSpjqj9CdRxiKRJ3NjkjmQ2CXlyBUUH6SHKzKAkgzhYheBRpbVCAILs0pOjrr+4PDOvQdY17AI0gQ4B5Yid/rmW7f+o//wr928frksR/LJ8Tvn43/2z/7Gg7uPiImtbu8pHj5o1U5rv9mXz3z4HNoKCiFHgBRW0LqMUNNTR1wtCvn5CxcbEzOvvs7WVu8MdvfkTHH0IqunnYQDA7fChQqPi7QdOjqRHuvOowcL787AdBj2xaPOnQ9/YFaFMz7Qp72TAdlwTHBneLsQqqp2PxUzC5FXVxgLeI0MoWdqPZpbtMeWgqjLy5efr0PscajgIEdkCYQgLR4HJG66g7zhmgkQJoVnMmD4C/BAkYAnNjopCb0C/kkoKA6uP7UhohkSJr0qCmBsnCpy+dKVX/u1X4M3AxRlu6BHOeSRb37IVXSVXd7ii88gSf9Tl8ez21mbbBhuWgrDM3J3jMFlnJF5KDxdI8uRETpojrnVwAGkxwOvIifiWEA85ErLCi9JkdPPtwGSX1FW2JCIjtQFnx28f9yAJOyZBFfXrl/ZOXCyk0uhetW16frkzre++/2/+q/9GiWBdtg3zgDhhAvO+p2rV5aXF/Ib+4fVcpHv4uzs4NnKlr0jyyL23Ny5wkzPTlOr0xOziiEAvLzQqmzidH0CQ1gs4VLbZ9bmFTgsXTEpm8JVEVQn14m/+Tf/5n/6v/lfgwRCv/2lr+YqBQA4RXmpPr0UQ+iS7AN7SlazfbRN1dHq/r5U9HuCvYiOZDKsiopHPJOuXb6C+FlGSaupxGiuuRUBFQ/+JKvFy73Ttc2wMQACFXL5KSaTEbP03ZNZMy2TBBVkRht/aO9LDDBdTjgCk1llIrk2NRLKFg18RNhPguoQWtKCkJwiz1/QqGhGH8YRi9oJQZKKVD9YYZGcASlJH0nIjsA0hBB/gVgowaOxITGqjuTOZCIBFuATn2GETBvvvBb1BRxjGjCOXobvjZ7i9wDkcBTSaAYgakEgkHVezpxRvhQpjXaABkmMxSki/VBCqXaCUNLY08zltBoV66kVPzFsk6mZJI0BDAhkscIxcRPjMXR0zB9o7PyEDkX9mqGzKr81gMNvPlMH33j7HVXkFBFgG+2djkp+cBT4dFTUcGdwGMZjeTzl1WJpldI2coRHram5Wv7CTM96eFGsngywOR5Eo97O2UZKcllbGjUJjybfeOsdflYSb+9vb0ChAvtkyeHmOnx6RD8qAlHBePFhJkwSSpwfMMgErpeQEtNxOWU+M4AB3GADKPsCfrI99elmBkUaW1bBUPSNuDfwzUnXUqdTEXkNlEVeWlqenZ2GV1QC1J09pvrjd9So1nUVw+Br0DvEomLHBu2emtjVQrVWKMPdNJnCF/jaqVyoRN762t3r12eZjTnEy/GNVrATwga1xmKxcryytc49WNbglXU1R1viaUero8Wa+CGglFxvnGPoKQyZFhXfWZZMh++2pVJXDJak3Hekg+pvbATnFnCAVgE5ErqBjgxaTYkcode8NBloFIET9zA89Nf/xt/4i7/x6/Ua30OFVOTX6EmXvtNscwa89+iRqi6PHj7e2tyWrwtsT87OUJ6CbFtP9oZ1pNujfu1u77DRX718/Y033y1WGpWJmV/+5T/3D37nHx60hfTygnbA4NbA0LFJcQpwHzxTD08LlYerj9+69Vp9eKhM50uKVdprNN8o1aS168kuD644auZy+IatHf4hHfNCaewsrCTYNvofGoKSEB7soCOD5wgxi3dScvPziFmj+n4ibwE2zyJRyBvU4ybSRTGVmU7dCWSanNc9YpcpD8EPoIpjmGgDtIkoMtJrmeKF+wuLS5oBmF/+5V9+651PCVn1rOHFAc/lxFNLyGHfHP+fgOz/f9+glGiYPgwGeOsQcJovQI1fKMNTqMbx4cCJRi2gYXANlWhDVRBz4ZsL94WhnBZBsF8o+lABlRzhtMpEHTYLh/uRId54fD+dJJglTlS4yQSXEanvh0Y+/zOfVRcTXsS03Lj56vqzu72joXsPHt+6Op1SXIYpBLLhmzI45Lh44eKlxZORPdkdZ+ZmOO88fLT6bG31Zz7zzqUrl7Y2V7mnz83P4HwfP17n0kCZYWGT3ABQQsA1Xx0aqmU3C3cMJtB8wioQsJ2FO93kYlMaHP0X/8X//j/7z/5TZwPPwZSr9is+Z65avXfvXrAm/NuOB2HPDYHE7IjmEQYHWe03d+07jzMbGkNiITnq14uNr3/9651W5zvf+y6Tm8MlrCKMC7h+CNPgvPhPXQbqjp9tvL2Bdt0J6mFdElny2mzP3M8u6D26yjBy2uysz9hs5CdEqaAiGgfGN/nA+9GJG5EeDIGkQ4to/oj/CTnM9OTskLVgJNSrWKSk3EtKRSMKQQvnqF9DMndQLWWfpGFsV5BGlEMLKSdUbrj4qHFt6b3vs5+5AXkGY4+MsghGmdhQlnBRIE/HSLEqzrXD7VKE6HzA5wj65tgbpu0w56eMIHpLYlIi5o7uaS4Cekd4zoC84TM7hIVHZrFO4WXHJTXAnD7IC/gZKlsTC5PkR19iTbDu8U7o1CoQ5bW3FAzc8KvgG9RxSHZwCxORJeEXMjYajLxNkh3GhbXLWE6Httlmo+7a2ocrm8HgHx6jcPJm2o0oPHQyKBecpCPSXb1akAnNUpbkxuN8MgiTL/MZ+VWOUuoLi8Y3jS4Eh5HO0YvDn4ZvWwPMBX8kjtv4HWPDiAG9uIC+CyS45EcLP5HkB8UzhAiB0YPpPG7YyxcWoSQxFs+ePVGUa31zbWd/VxCJ/hjuT+YW0pkZblQaUQIHO9npF3PFhYnZ6nj5qDNQOojfsJRLeJHxUmG3tVkTRqsGZR2eDcc3fpShAhkpzCzOqDN3+2Fza6+/2zva2D1g1wJhfA9nZqcD9Bi9STHSMLaPQIUxk6MCZcg3xtlfDhMRsRHuyQomCRtTR2RSZzQKgMb4BGgDoRFqjdVV2TeCDYMWHDqoAZBv7u7luZPyhB4abalTdCKN1jTMcO/uQw4561ub9o+zPsJGIgTfBu87qLfr7HSba+tgoDE7feHi1ZuvvlWfmkElQNdv/MZv/KN/9I/Y3muKhqRdsEEoHLzvi0BE7B4A7p32zzrH91Ye3br2KpdJ9pjrV6493l6R8DJXn26r5NJVOfYABmUDgJ7sJGStSo39dY4cNpmU7SnUrOfpuYjwAzZ+1ZITtvva8KSgowMPWpKxXMgVVOU+1bF+LBFrVgYz+tcMcuTq7Cd4yngpwbTRgza+gB9tgraFzmBYLvZf+qVfunLthmeRRkhNXlc7QEsMZvFDDNLSzIBqXXk8VsBn/A8uCqb4T1/p1Ae2Sq/zayiNQg8U+nzuiET8QJspMAsDHKxw4ubNPTqnpTwfQo+NH9UkPXgxTOIwG1UABjBB0SK/EIyTwrHROlm+Qr8aZwL0RO5C2nkgF656mLtzFaFEJW9ubgN+TMLU9Fxrt/OjDz5+980/h/5nKwObQYDq4UjkcfXKRdKReEpBTosLERKHpXCm33jjtQ/OBjJ2vr54gXOA1GM4yhh5pOKDbEJKEVNjQ/UJeOyUTfTd5Yv5WXwhayIdQaY5xRxbB3vv7/2//z+//bWv/uzHH3+gB6/rdNs2iqgdEBjhh5FcKCYXq8+9bEg22/TGEFp2dnb1/6lPvcus9Yd/8keWwtEeKbMZV0JzxO4+OjY/i5zNe3vQGLjGA94U43qxT27G+ieVfXbfkxpjwn2+3CTNnKCsQTz7AiyyO06pxkYLLwecxF2cSOA3PEdwERbJ8cW8pkMMl9K2SI8HEgAnikW6Ci19ZINitFamSD5bKBpAq6YccpDi6KAZzxVB4/wsCgrCQA0QaPApBDMDdIz95qJICnsQUjAmu0EMJiA5Xm8tiThQCaULDtyjQVyifkuYbsKOE+7vsUJICD8IpIn9GgULkdl/IgEvPd/ICANDrAgzd6igqbCDYkkKTxrjom8kes2FupFyU/MIQcg0lfqhWcrQcT5Y8PgeoV8ZgXZcQvAfI28F856BslmG1MczJU5yKKDcjwMgF3nooz00VihCe2bJnY1TsdgZnniiGjwFLhXyHH77DfhI8uajPmvQgGeBgaEc5DwHx446e3Fm0XbSg97/uSMeu5rwlE+LrL25GE+6/fzDVLPn6FTsF0k0lHrJDZrHl0YOOeg/OFDSok8hCeh3g3HfoG3npRKARA2LPIc5cGh5/sJhZ9Dhoz+9tDAz3xAir6ZGp5sXehCJoBl3x5qddr00Vp+ZikwN1iLSi+QK5Rr/fublYqE+OyjMLe384P37e72jVq+vnJ9k2JyokbrIoCgZfL5I6QEfdyy4/Wdyi1yNYgDg5wWDN1QdO5MW3Nk2Czchm8BwKfAZB4blJwowkgVIaZzSEACJb3/3ux98+OHF5SVKG6SFnwvTFoq0t3+wvrZCA2IHi2X5D5RROGRvbDPsBeEaR6t8wXPcvPXqu5/97JVXXpmam6/WyJ1h7X/15is3r12XL4d8D0fw7EmMc6y9IdlFlcQdG1YOUQGP1lcWLlw6PD8Tdi27+V67KXOhZOrU6Ynz7dNJ7O23kQFiU7at9he1MKPV9TX3vRGrAY+gT7HGo6M7mzsWwbtAF4kT8wFtacN2ZYn0Q3FEKjUp+h+666UlLn/hzqq9HjyrE6P1pxXzOp++uwPWve70JLzdmPD+7X/73/7a134emSSTZO29SwyWxp5yf2dnL9jiuMKErB/NXl5/6s+4nyia1vH1eeN4OHvEHeM3zuwnn95iTTpBkCCSpGHAjiRqGkJDrcZCBKE4x5FQC8EiYlCv5cYnqjVBk11u5aRAgcOZmFIJr0gHHqGKtwbe8O4or9zvtnmNilW7f+9hv3o0MTXX3Hl85+49LCHMw+qcw3DLtzM+zhGT8zPpavdAbGfkMMxXGatmb9+9Q0d37frl2fm52x99DPXUo9JW7SCwM6BIdDEwtEMaGY+cPlAdXIIXpChDK5l5x3ERBKtu2gthNtrYtt/+7d9+7dWowfLBBx+oxyhpr72oT07Y2KXlJS/hHBdrCqHQpVix0VEj4bcS4MmbSY2DdBkS1crasxVuH8TSUrlkXrrCFZIULHhAgweM2GV9/Ol6iWsyEuZdIOnlzWz/Xn667zttiTYQvc+sk1jyBHwIX9bxy0fiRfHS2JhAwjRyYRaMNEkyDZGQGIKsKcaEmaMYIsTY9FQdFmNa4XCHcZDwRY1J8IL/oBvVEjpF4wjdxCC8C5plJPqHsmPwwyxNMPixglNMZXE/LIthNg9/QC+iggypCCcEsn0mcqUsGNNUcEiGik4EXSOLuSW/HqOSyIfoCjrHHgfFHatOzHAyjg5iYc3fPwMHVXnUI7jvIFe4MkufqGSJ+2NABYxsCSB8rp5JisHZ2xH/0itigTF9o0xvmukkHIuC6UMjIdPQwaZ3IipBJ7wdTATWjUj1QZR6CYUQSkQjek7YQjacPStDRvEpNxCMA5h++J0feJbieG5uHl6wYqZMkYk9cPa8MAijSQC/xIrG7NMFfAO+E5r2mYGTFdO5n/ypVfBHiV9ztLAYF6/MYKIz0dAJAbW23Utr9FOVKtsxxQUNA38hc9S5eu/GZn4COMCoSOflhcWaeE+G6MGRdEY0gmqU4xRkn99rtQ28fkY8HeG0IiyJVkLZeDoiscL5nNQWw4uXbq41z/qrW/ISq8TAfQKWQCqkyaWfKaEOJMVqRZKqNpqKjyrkL6CPM1Ozs/NWwIqF0cXgkpyttUMVHGVk2M1jD/j0bW3tcItKXPhz+u1BCHZ9a+23//7f+9Vf+SUZ8OhGeQDbI5G6y5cuf/TxB7vbW2srK5AH2yFsznYNXKUt4ZOA1Xjj1Tduvfr662+9JfWX6uJoGMOrMiJq/Xrpn/n6158+eeSpiEQPPXQQAJddY30jWqG7tNeypxz0u0831y/MLZ4dNBdmF3ab+wJzOicdikEnjFCLHs/NLdg424c4qTKF6phyhlwQhuwnJhC7DM1h9pU9tkf+hDIofdwkVNl3ykDIyHa7A3W6AypgUrZ6dMumB5uSYms8SwjzitjrMEkHxPluHuCKiYKd/8/8mT/zV/7KX+Er6MHxBMkWVf9gGwLwHNFUb5S6vJMy1lMPusou/fzUXy/upv+mZj4SVxanNa4MenVoc7M7WoDkQKOONIeupCHMmvmO9+I+gBr5Dio0JpOZiRMu9zxlQVi3GTP0WCxip2BtLbGhOF87FQACL0KLFPGRWE91q2Kr3b/+ys3WHl9fUn1xdXX36bPV+QZZLcKYrKczJVQ68gdOTkxONA+3I7RWjPjCwpJkEw+fPC5XCn5lKEQAGvVpTp7CSAQjQj2JalhniGQEO5uZAOO0pqqbpix7HIcbiG98NI9cWRGTcmkvk8He3sHf+W//23/1N3+DAMoLyV6L/u52O/606DA5vZQVh3kTjx6cK6ZwEEl5jqv8GnO5JytPLMj09KRR4XLsrOUFcsCDxGjAUHiMJ8MsFtrlu44Ahy+uuAVBWzPGsWR7sIKW1dJq4742EFza2ORf5yb8n37KfgUV1HQ0aSFBIRnoQXwDPzKsh17MmAJxMfGny6melEmgSJSmbqG/E5/gJEtfEhIVokENhibpNiSaIHEOYVA+NAY1jnUJDA8zE2Zos46JOKGliWWNZpQiLAsYNCMIMhkuGI5BuFSE/I0SBRsUlkffcQRhonLeDd9FaLZJAVCxNLKTEubRp9gC7heAjsuZjD080mEFSJkwpmvyV1i5KKC4GBqOdMn0ShIghbMw+VEFCDCPSqVBhJYvHFMCDCKQKALAnFVLnXFv4Fhw55hQQKTSUvo5wEBD0qO9CXAbZUqPcH3Sqx6KtZlcrw3PEpZw5agkIaPMmj+cq9XBzYj0rGBIyg12YCP+8he+bNBPn0bFtsdP5J17rGQtvMzqnxEq1D2Um94U/jKWB+8ow8uhYCmLD6SYYeS+DpBEL61A4ZyjhOPnJ16U/U47Rjk8PFGcvHLl0sLSIn7L8caDM2YgFQ8fPIjFHo8zbLpwIKOwOwSyowN61rAc9q51Ls9ems41OGI0FVI8OuZNKyRO1kSCq5ih8WK52Tnotg9uvHpZFkNhcrvbXZEr7T4bHhgdF2PFuXzp8s1cdT734e3D+w/4qJ2pp4CHZ7k7V6lrqE8kppRS4KV92OzuW9hSoQJ9J+qb8XkjdMlwEaByqBAtqEsu2gJn9dxI66D39OmK40JqtKvAl77HrjlBtnNrde29H34fyzX8tZ9f+NyimYoCdjihpK8OfuFv/1//VhQ67g8wanhSghdZinbr4qUrooC/8KWvsPvXGpNgWzAtxmR9Z4t5zwpLjs6Q87f/9v8FmovzC+1SMwQjEwnm4Fq1drF1tBCyOaFzTzZXli9d6h8fzfK3zpeOuocbm+uDkUORaCwQ3Ahpa4lE0PSTJ0/QBhvBXytRgkKr0wZmIbRLTJ5MO6NYB1WqI7VSgcQukYEhQTpgyWm1BNrb64zgCZmy47dvfxSYOmU5gXC1Sa5oASKehe/diXOUjOLwrzatTu9Xf/XP7+3te5BixdjoDh0T6m6heKgsDITadTH/WDH8qPMRavmwOkOb3hW9B1II1c6f+nT8/RK/uQJtZO2hRUc/Yq4hFD14kMaHOkTTbGxaGqoDia9Ekq1AHEwZ0s5VgQq6BWlx78Uv8ncvjeVqxepYuagXZZLHObJKjRYZC+ACQ4y3OpXGd8bflXrzSGQ0F/BR7uwnR/tQXbM59OTx+uSrS5gopg1OGzAkDHdy2C2XpqDM0XFxm5yYT+pTkwoLsM0+fPC429xu7Uta2V2cv8Jd8NmTld5pG4AEE2qy1JZ4u8gfEbwNbOmMuIwj8goi1Zink0P16ozHhYDYGjKKdM/f+MYPX3/99ddefwVhQ+TEFLe6ElocNVv71qEHy8g8mMxgVsxqwJB6Xt/cOFl9glwd7Kv3OMaW+WR1RQUTqdyAQbFShSxVmXm2umZ/wQMoioOXcQ3QHMJtR+OfHUnbaRvoRjQLvKnlobAYSrmgc3HnNDJ6uaQpi/9QNCaLnHB4ga6MbP3OPmHDL5YA4LHoR5K0sWGqSTRJlTP/KYUPsNhJ7zybmaOuiT4dCQrZAJdgrUd3dnczSPLSdIUtBFqjrKM5I4WHs3Vo80IqSgsSIhQU4eGYpAsjgqREdqTIcsR/lD3JK0KNSPyRR4d2ESaONPVYITXpT9EWxptM2wnkdSgRYwATNCpDNkPIEPoUQqB9pcGij3PuIlsQkmTKrFHhzY7pRpNEr+ZU/AscHLWQkJzE1ozFOaBA4vFlAton6Kc3FCtEaCDxp/Mg8yDWSF27sfCQsV/2IsaTzp2NkcLZG8GBZm6Kjbdu6AT3IQuOlkUU3Emkmo34JBqAKHtjB2OfnRahxIyugccsxshoZWKaYf21t96+futVcIbzFWZIr0U/9+zJUyk7vYs7F29FjS0ef+G9zQ0yGuM4RYw3GkmYW86YEgk759bFdLrENcl1KuXd/Z1HK4//3K//Kmjek1TqcLDV2h6v5bHVm63QZTMUD1fGhwu5m9ffau01nz58etTG642XxgrXL10pdnI3b17rFDsctAVydYfbBycd8z0vcJYJthQnIPG14LdCfobIsvqM4qEpoRG7dqE0gR3ljdtD0FHUQmX54vXzXL1N4qxOK+tELNvc3gJ+qnx1BsfYxZnFq/Up2v5QgAw6h72WqqwnUHCrqcmR9HGObi5fE8WJbAOZUpEa5HB758CvcilRoOJTLGMUgM2PkjZCw3N6YiJvvfr64tw88cKG1qoNoTOTU1M/9/Nfu3jl8u3bt62G+2CD0/zU7Jxw0ZmZ2XKlhkqBgX3FpLqyWUaIlSzWmDvALZWUGJ1f/5f/wn/1X/2f7bUzpNqTzVIDpCuhd7FKd6AGMj8AgttJfkRUx4/ufHRt+TIvRPjr9/7ZP+W/c/nm5ZWdNSwkmQ84bW1sCxiwIE6W8B/6PynqJDeCUHbFfh7KQT5HsuFbb9PnZ2aF4knxRz4TNINsLCzMWR/GKiI7M55RXbhwkfnK1O7ductff2Z2igL//qOH83OL5C3yaDAuQuuPyOWhlcLaHTTbckh++tOf/r3f+70vfulnr16/aYWxFs6+LyzUqHWGL2A146RAgzsw03B/oAkA4VwFfx86CpxeuEDESQvClBEtJDE4wDiDwV0kYhOWgiBoQ8NiYP2sfNvB3gFbBKc61hSYnU4ID220yJbpy7hxXqo2d5qMdjCSqt8IFi1QdDsWYQyU//dXN25VJy9cu9hDGMZGqvRFZaqBqsyWkQI7Oa0IueFLxQaJEuvWeGhdpXLPFWpyvk/NLj5rNr/z3Q/+7Fe+2G2KkWhxouOPNDVVpoEUKnLl0tLK3oO9w67ilNi0N99993vf/COY+fLSldn67N7m/lZ1Z+HCFR31m022dUc1EeNTzoVRynVn68a169u7W4Mu340Z6JlyWp4wWj5WrRMQdyrn8qCQF68S/ARrA7z13/3D38Vsvf7W27Lx7O1tV7pFkaL7Bzst+d0PD5fGFxl9wSoBkJLcabWzpTmRnTTqJwtXApFxAuIGub670233L1++dOnadVp6PIulI0QEGQI9Cb/7b2Lk479xQRnZ/eefmtlai25zEzWIRkkEji8vOnGKfM+eRYFoeHIOWCHSciMGTrt8QcIBcQNAGdEi0Wmv/J60QphvKJWReXDM5UT2IbHZsjIIkGF6CY7JZddN2JVeSIgi/SZOAIkCZGSkAMG4Q7rxCSgBm+8YjYivCcAUTEyoFbWSVHIRVoX6iMGtGph4Wf+drpcnysXJemkKUR1XxCwYAXDJRIW3ilJ7vFhl6R4rqrnHGxEwC6B4Tq64VjteyaBFjoEPST5kLPYyZm4Sh+XBDwPco96J7NewSdRI8idHry58rmhAWNlxtVJUOMw4NdYdZ9Jlg5v7HVYNawWI0zpYv/iCcQDosUZDQ7RVkqS9++67y0sXqsWKOC8e1yXie1QcQJAZMEefPd3gWxA5GvxfqA1dt9xep8fTUoiGsBV+UDBySLbRbQTGXr526Y3XbvHQUyJPndOnT570B0db+y2U9yRtFyoKCrGUClKUOP4dRTiEMYvkj5Oc0hJy1rLhjp98UMgkZkUeEK7qMJTEZ91+B1Ovk1qjbkHUDr966dr6yPr20x3pWtjVri1feefV1y9euHh+yC04oskhiO6RZAdRqRknQSd/dtjLj50wEI4N9aUuOuq1+PiO55R5rs5MTJ2N5XZ4c++3eeaMV4sT83NbLYX4JhqTVRG7jx5toAGl2gQXj/BJOUX2iCylYrmCBQHYnqTnOjrpdKP485BisDIEyZdbr8+RMwQFESZE1ck02myqecoo3S1VgE8k3+MOwARL3+Cc//X/1X/yM5//3JUr1xiuSZA4AhloQDfAt+IXr16Zv7AcBkjIzjCFDItnTgEx29ZfkEMkVmeEir3l9cFIoHZW5yQqM0nz9tWf//m/9bf+FoU5R+FgmEalgOtQR4ANEwm9huS4FWA9zgTbOWqvbK6dT0WGpK985Sv/wzf+B6G+Uapqunw2uvDxhx/1Tjv4WpKfkdhHQDJZbwBBICpVArHYT+AK+IEZ3IyftLRWWOyUdSLIPKsGSEDhaHuq1Tqz5O7OHhJ75dKycGxCPLK9sLSEOuoQRUeVvSmYTF48nR5C9Vu/9VuhRTw7/9SnPoMHVQPO7niXPgFMAvznH0GU/Av65P9Bq/zw4jO7E9qvQA7pM/2a3c/QTKbJ91h0kP1qOvmxxALSUkgPYA2d85SpLmn4LWqQNRe4dYqN3Bo7/XE8QwBO9rPAjMbOX3lsojE1YQi4G3oZ4oCYRX7FWZk62UkjabJUB+BWmAQ5zl4nZRCXwirHmvmVex9vbojWHjhl6gSx9wsOZNHg0EuJfaCUhGROh6fMn7xwU4Q1pUFuenJ2vb96sN9SF5HGj7Vpd/XpoNeCRPDhkCcklaztkml3gu0jnyRboG2AY+wOqYgQOMr7jekm6eEsmXIzqp7xcgR1f+Vf+0uNGpDu2b5vfesbM/NToWoaHlIONF88nJiEYCowjkzwFsdFMgqO3jpQUxyfb293ao0Z7oWYFckSYX07vrmzixbY5ZCZPGNYLt8DGqOTkGksvZtpC4JOuPxp4V7+FOyJljEBZTSCqXnZ3nfNbGe+PlGgKs/M7/L2nwwddkMX9+DxOoGAysKFSvnEqvgP/AYPe6336xtxAlNAI1MmG0IiW4lHQoRC44YkEAiCXqEMUAmmxv8PoRogG6QLlxWcVOy2r2A03SdpgZM4wpHNFf/SdCecJ0bPSb/5kaFGeWymUXrr1lKjXpIKVqBSVeGQqJ5QyY1L68LvF4RVbHRo/3JlcMg1xFj1SKYmRKV/vniHNOpWFv9FwRGSNdYJ70zP9ujZI7kkITIHmPhCEOb0Ra3BVTStd6y/ExuXnQ13kkhZHYxg2ppYf+9MilwTzjZQAHn7gHsO7Cl4aMKOQeURCxT6C1iOVWx0emY+qkmGtzMOw4rTagEMwefd2IsoUcE1hEfJELdix5a1nTMDZrZYrV2p1WmQZucX9g84WneePFtdWV1HXOFWPLEv1RMpnMOdLIMZPhv6tPSQKSMq4HaKLly6ZD5M71LJx3EbG2MQxpJPX5ohvaFMgyKfi5xSdjYMVZubnPn0m+++8crrE5WGw0xFuL23fXTSVx9DPUxO3JGP4PTYp6LSmETF4MbPB2vrm5y3MLBK69ItUF0PhIrnSntrO7zGbwzJoTk+OTU3NFJb3dpb29wRlkbk6nESB6QUuZF4aVjot0xPrCcgtDYxD+qsiPjJxljVyGkpt2U0Gj6tTy9CWHxMucU/frq23z0kDXIpCiF7dJxpfbxYwq7+wi98XYTQ66+9xrmbESKgFzN3XLa3A4q1TkceAkeC3t6sgZPYy31+gOcj3cH/j7D/fJI1y/PDvrJZlZWV5c2t6+/t277H79idnZ1dcNYIoACCIkEJBEIhhkKv9AeIfKtQyLxQyISkN1IQCpEAsSSxwAbIZQBYM7Mzu7Pjenp62l/vypu05bJKn+95qntmCYp65k521pOPOed3ft6dJJSbgpRyTc6pOIdq6DQJ3dqEagvz85pLwYzPf/ozwhWf+8IXv/OdP/UnSUlcicqx84It2v8y2o86etjafxcL0tvrpNN/+ebNjc0nzZmp1UuLtjaZmK0923m619mXdrpyaRm41zefy1+/9cJNvFjMnx1NMF+7egUeMjAYhZYbGjucuXb9KpIXdSektTyw+n4V0iCooAQ5x72s1SJ2+dEH7xDSHkVfgXIO1psRQmlCjtyiMyk4/e3f/m0dlb7zne9gKfpWeAiEkfHoejD0Z2jirx6f8KJPvph+9d0X11bfPznjSzmd877kMyIvRzmTXipeijTQkpNG6EzB8AtWGdZbckwMG70av9A4r7zIHl0nY8ZWw8yGqOJSluuNJpMBAgiNdHe3SAHGqPniZt2D9jGn3UFfrMQF8b4XZisSpn/i6ODyzydqO7utd9/74I2Xryg60ODJ8x2UCarn8Kga9OYHz56PHLQ0G6Vksw4fP34qRVgij+yfxbUrEhe1EJpbnHvc2rU69AvYTo8MZ7F3V4lQeiBjiBaiuFA3ZevI1DDxau50Zl9KdOhUOh9UfOed+7wlfGoQTevqr3/9G7a7YR7pDTihS3UKwBUySr8/2d1IK0iribWHVwaE2CP5Mr0wt8htC25LC0sa+/a7h+1Wd29nJ/GfXFUA4bOMM6N1VCtanfyrnwGcUfoMCzWuctCifXe41yT9yVtJ3zzT14Z1kd06ZEXhqAUlfRT+DVNd7FEYpZgiDklflEhSDcDT/BSpVY58L4fF87/i4mUYifqce7dnpF9fXPQXrNxD3Fs+rbYAkptzU0kfiLvPn3GG5e1AlRIJUNPlO+0vBCQ0OBhMtfsj6qmmTydOhxpDY3Pn4wujtdkR4qrW9GkP9BhPfNIgyX66EKaeSGh5fNoeEoY8lhRa1h5uQQJjPTQSTmmTV47DKdWYOl1Ztu1mc+3SVbkPpnA5vGxCWaBtsxtT+C1NNMX/Oi0BO8FTgB13OWem80G4ohOUOGNsXDDG65kQUMIBhEAZYBOYgWzALkWPUhHNgEs1wzlbXbnsP/hFlii9QLJRnE/eiWFhj4AWfGiLY3Mra/XZxam9ljpTHTH18253+8N8Q0PnghO2JqnckiiQ2h6PYqfLE2JS+rIJ3TUmGo8ePJbYyW5DD5B444lks4XpiboISvt0AKH3tndeufYSc21xcd7G26+88rLJcBdQb+7d+whT0EerplynMT47PAcNjFk3ZxkTvEToFxM/PD23a0L6wo4q8ua0POfF3dpvaXy+vr13/bbakfOV1au16eNne72xyeadSzdUqu22Dxq9tioQU6Dk8hgyoxU71cebmyWdCUrOLUw2pmYlXjIr6o2FS5dumhqJjps353WZuK68t9s7WN98qKqKWSC2x9P1t/+tv/Vbv/VbMqFYbABlX6IGX3h9XEJqAF5wmOdOZ256t80+MIg0UoJCMBWGFo/T3kHraDNmq3V55523Nzc39HL4L/7z/5xl8zvf+q1XXnxleXFJAdYf/P4/mxd24iY/tdFNWkMBD8bjRrteoRR7hE7Qa2ep1WMPHj9YWVxZu3rllVdeeuv9n+71j7Z2nh/JVZmfk0mBkL1LZPHp08dR6Y96CyWgxTl+6ZIam1l7bnH0WZGpBv9JjCrTAQoOQGwRfko6d5KjT8NALFcl1kxz1jVusbi2oZJtCH9kbBseOQ1j/crS4if4xjd/k6uAJvfDH/5QeIN9BsEgInSFwz6N7YJXVEIoKmk4mM8YUB9LJl8uTn58JheU4xfXf/xndb5cf8EYiR8kRlyNzMz6FUFZ8eoyn57gMGzjMTyDdz18yNk4GK1dYpaYDP+8Bnp7O0mhnByeFmtBzAIkkiPpk+gFcWvd5jn0bmojEwzvHhk/ppALrBLPo/MLN67f7O89eee9D7759c8ppuJjoAzBCFopa3VicWFpged5V8/iYRg0OvbqK6+9/eOfaGaBHfFJG95Ba5eKcOXKlU2baalWLochea1ZOwP4vld2Egq1KFDITBPfF5FTyTNIfDRoW9r4mRjV6J/8k3/yP/rbf5PD74MP3v/mX/vmpz79+jvv/fzgvQ+SBjeus5RVGW1OzjRq00VWyaSNylztcoBL246AwTA9zees/9oc15q93whxGbveFXFlcI5PIG4ovleH73765IzvIJLQQEmzdP6TI4RtQUp9GYTjDKUCq3ugMEaj0LmMFy4BvOLQKzE6sPB4Smumbx19V3UwZL9eXDUgMwafOYwiOQUZWHXC0B2JsTEJTDdHgSKcgDqET6Jl8RaSqm6p7pOKXQxKFVyRTkk4w8gID5xMtIksk1rCdZgtSgYjGqemivxKY7ZWb65OTK2O1hfHJhaHxmaGbIY+3OQD1LpHdF0uXzqheBsFiu6U5IMiyNNTMN0EzDGWFrwjXeVayzrJdiI1Kv+1m3dKdOoM6hMtZVpUHDudH3Kf8lxHMkU+F0tS8vSRRPnInmrKmV75M/Av4TsF+NVDrAXjtfhDQAFlxzRl4iAYwtNWx8UELARv5MlaT36XFj7YIv4SPW5MK3R0GPDiw7YyARuW17HOqUTOlOZTNYEUqc0v3nmZ9fnZzz768O5H6nXYHMwTPD3Omqy8hkYT9sVdXl1xJyyQlXv7zh1Nw6SEowQCRmxDXJMtTJMbXlhT6K60165i5vnFX/n8yuKSHS8JDFDjOMMH63ON6wvTa1cuzczbfXZ8cXUREZomq1SuVO9g52B34+yoO+jr0iRlcNLejjqS61fU2Wl/9y++LXRDhj1dPxiMTe91nxydspZmV9fmpbFo+bg01bw+rRQsSQFBZsYWnYK4nmrMjkx1+Dod2rfbfvfkWIv3qdlLclUJnum5peasc/pnnl27OaEKeH39ox/94Hsbm9svvPjS//w/+J+98tId2/6CqoPAAVgBCT00PBPyhO0SyI2pSvNgtReCyjYZ0rIFiuyi6RYxaNhC9qvdee211+68cFtdqgTlF67ffPXV12gSWNrrr3/um3/td+/du8d5CJ+ckRWRbKfi6R1HGOkWH/SAvN75wf0PL1+9gghm5mefPH9meywuPuKKj1UmLlZF8AgA6jjOXLt8LXsyETk8eEDz/rs/J8bwFP2XJ1STNZq8m9icGmJo6STGZyJ2jJQpCbM0EmROPXr4+C9/8OFXvvR5KSWMLYfkEfOyFYlXyHFHtt/61m996UtfUvDlLQ8ePAL0r371qxUHgFmIyvWFjfyCZVWw9VkI5EJcVSed8cXtFz+Vaz65vnxJsUZ1a678+DfXQwOjsi5mhL6sr+eYS+Ez1fPycH8iMmOjn0FsmJLnFFZWPcwaewfVbXdzS25LgusarMTTw1ikJ5Y0Asb8aGo5jhpSReCE/tTHKdORAmp/Gu6a0dqLr7769g+2Hzx+qvghIXPqcFg+k3Ri69ne2MyaKv85+YTHo31R3PPxK2tXH0x/NGvj1uV5NXP7ne7W1sZLL90RRKRGiJ5+AhzwNFQAt2SG4ZOEZvORo4CAN1fwBwQu5WpSviN5fEX/F4j607d//pnPfGppdZU9t7y8+uF7D/7iuz9c39ww/iRrUr2VNGiPwunAC1VKO9hZTkJdeuzhoTJQht8UOci4mK43bt+4ZRje8gtxVcG0GrRB+DnD+u9Ag+CBX6tPE3BUf1YiCj+tzlygkfLhlNZipVY/kg7TTKaXM3Fe5R826WnMJWQVLRh/drZAooyA8kFeEQA55//GFYg6ks7CUknYNIhW8vwgMSaH++acb9hyQqzYdGSsu2X9klcUHvCVaJZfcqOIVNL+JLtIZZR92lBiqk2RmEc2Vm+OjM+MjM9pGzQ6ph3DlM7ddva1+ZHMQbcnA9BnOHsaC2bF2TyJWjGwON8kJRZaMTXTIH3sxcaYm7DjMJPF7Uk75CsAB3Aj73jJqu+4ZdXoxQUOsqQCrF+9KzAo6OUPD4ABnuGkPwMfcTVLaYHKNU4DQABZtv32VuhIQsehGtFvm8ARgRM5Be4yKNbwJ0pAc3bBsirjJOi0VjXLqSaHnG4ynekZDTWmBFEU873yxutPNUnF2ra2UGx2TLDP1sKyyPMXPvf5z33h8wWYIcW5hXl4E996KfugzX3ti1/VxNfC6zvX1IdXXPr4+OnDR2/+5C1tXm0tef3qDQKu7NE8SrwBhSYTwKm8HJ+NqzW2oO2sLP/JOYfrwe7bb/6wJ4DT6lxdm2MNo9yOviAj9RfuvHL3wbOfv/fwmUZPvP7DtbmFS8QOu4HzDSw0UQUoXHVlZU0mSr+n58iI+uC5weD51vbu/ke2rTo8fK5eUhCCrfT6q6/1ttqCDRQzmSbcnnBzv6XvauOrX/3aN7/5TXWUVy+vNaezIlKJKz6b75ThVqvUyaXEhAXW2enuPn5saexehj9q7SErr9PuffjhR9g9/vKlL2ZXXBQh0X9xYSbq2uDs3/+7f4/hsbV1wNtrr83l1Sv/6//N/+E//A//w539N6FCtw/rI7g7hz36j5JnxaE0KLxPY5SEZSemHj1f39o/uHJj7dbtOzvtLRtiD457+nhQbaHP+saW+AGAaH6qecHNG0sIV0tlPWoNldFjTenUfpcprfjXBK0RVuiwuLKTMfqFhaV0xDg+FcpqHbQlaBSdvcXk4utbu3KN0ILG9G7nv/zlL0dW2ZhydJSo+8EPfsAUYGwRhBA4GF4ERmE/xXUCmuXwk8M1n/xZfbm4C96Xn1zjfPVZ/VTdUc7kp4vLyjVQy4wMzCxycXmiP2EvCqsudiPyiwA6PydujZzc8hMScAZhUxWsVHKwhoYFdC+tFG0SD6Qsxr1EldOkyK5XMeNYUbBLRNNAwimTNxBiL3lSI6tr1342Or7fOfrg7qOVOWqurIeu8WiUwQQT1x0+1w6O/DuUWxitPgF1NXwnM3PzGkXsvfXW4ycP7360LLXRMjGbrFHYSwFBeUsqE4go4/fF3B0mAmtiNYoz93r+BJZq7sCCV4jLajv5Z9/5rizB5vT8THPm7Z/+nAy+ffNFuGCSguJaMKm2vvveXcsKDlxAHiJSU156/vpnPre9q6ohIcw3Xv80zeaoe/zk8bp3uf6/Q1wBrBFU4wjr/3jZqi+m5AK/OkDWQQ5bFX8yZvEvYbRqkVzpYB9EQCQDGRDTHLDU2J9QAS0fL0QSJCJNwm7Z0kIOXuRvgHO7GVWoRcvwRdwsuPJLh8JL4tAzCB3OHzYEdh1ZlwhWEVFh4BFU5JJPm3dAQ6KF9MSNtVFiHvnTf8kutdoYzsC+6MKkkihqejXJ0pTStWiDtBpBNVyXeCM/nyPtbEib69zrX2RlRlx83iytjCBPJrSyaaRvKniKFe08hbp0Oo3P84j/N/YflHdVMDv9WtL8mK4VxyZW5LkUGa5tBzzxq6MAJ5LHd8+B3xVUEh1zU8ApdphaYIISopTB5UpeJy+TKOpcHuSDzVUZqBGcGQzguNAkMo3MbaR/mFYgXJe5J7A9nZySV1mTooDhshknGlNpSLgzsKXW8solLVhk0ywvLfECXbt8DZPl3ZabimtGMPJY590Rir7Qp7RTQmzaJTqp1u386HxTtOT5s/t33zeFl16+c+3qDcEPJJAb3EYts9BnycWKO1T1Oj1zWFqWvXHNDC+bUqPEF6BWjQnTY9RNSFa0/87x5asvvPT6r2y1fnjwfHO3fdKyY5la1KHu9MkEUdbphWVYME+VW6fko33WO9jXwl7x1el+p4Mn2e/KaJeXLksJuX7t5tzC7MMH9w0LyLA0KXUsLaZPp7X16Tdeeu3lm4tLC0LJU/VhaYqKawQ/sC35MpaPh40OjjGJfleSyXzxAkREj0ko9ySW90svvnjj+nUYhINwmfq0XeSVtUvHQsFF8ceMNjclXga3jd/dN1+4feuFV+8/2bLDHn43Pt4UW5TkrxOwoaLasbMky261uUhtNHy++fY7isAYaK986gt//J1/OXaspEY2wOibb72Dl83NLRK9xIk2UOwq3+nOkjJUKHNYcxtA47VLV7bViGmfenqKCRK9MNYJsopkwr7JG3j1zjvv7G1vX1FiduuGMBixpzxAKZsW+3RzNhtXx+de+9Tf+3t/n+gSDJOy+Oabb4p4/cZv/IbnsDUr5gWGDq+g/lP1CkL94iOojpVVeFZO+/MTeslPhXCqG6qfyskLgqqu/ORxfkJiaApfDn0V6woOVA9xcQRVIcbqjMg0toErOmm0uSWSRoiIBo1rnO1v75xc5xXH0Uasx8RYjZqfQGxCTxIfwmatqZxDCmXR58PBUqBF8iT3fery1Zt76x89eb65tnqb57TX35fGJkldPia9cm62Pl2vbe90mUPULB4OFtLG08e2iLtx+wZn5EcPPvrZ2z+xnWw1R2MzTp+G6oxPMsySwT0AN2vTMVj1xYsry9KzZMcgN5cVxpogDpajfFGeoyH+09//Z//u3/l3cBp7L0gseumFl/CKuLVH7MHdVwJByWAp4CQmDFtk21lE2D45NYv40ruTlk9eHOvTffDw/iOD8fYLa64acVmt6uuFuIoCUw5n/dena0zJCyKpRDSqo+wW4bw3u8ZzY/GUixGeXIDiEOMYK8Im3FEHPAzSQ8ODXWnaWCQBk/pcQievyiPCdYvN4U78v+TjxFz3A9UBY86GI6RuEUkQFtMvzJSyEvQhwFhdcEIeBA1S0YT4lolErBepl1d7VoJYsfBgVExBrGpcYL42N6taNFYq1kCjREWxjDwgelL2B4wBZ309LUIzT8SIYaYXhjIc1qd4I+GzQRexgJXqPREZBg6sX/8CVQEjf6fUxHNSWmuEmXzkOVBQQgRm1I1NZKhxJwJzsQstuZvS7oKo8X+CzRVREJiYBB3drDzF6xKbDpM/S7mQtxgg+EuWi5cWSGQGiXWk/iMrReyCeC7M0GMeB3jxKBoUK2vcBVIzAimpDHKHuz1J6pIsNApjGdy5c/valeuKeT0tlIDZw0sN7DkICzVq7ewtWcnE2OLCJ1axe17J3kGHRXX/o7sCd7xJr732hqIHA4cYvPkwjS7jmbQd9EXAxvEphyKbiNMrk6HPVWdG2fF9SpXF2c7+weFgZHZx9Wzk8OnG9tT9J/3j0YnphenF2fbp8xOq4djkyXBNRbCtnScbc8wsu/O0nm+cHD8DJPhgNWDG2to10fD2wYHer+B4sN99p/2eVyfhjet4Vm5pdhagK1s1DOL2jZtLCwspXxwM2zkzVYiANzKkEEUEDgm7Ui6oEl0TIGZEQKHc0sKioBG7RgI3Gi4myjJYwWGrrAYGZaM9KIDadHQVhUaN+tko3gJ5O99rfrH3/TenZhau37jDMptktmJAvY76hY6t1JRKHeyhSzlCHNhDk00Zp1oUzi5febi+p+ftytpNNebtfuv5s3X8cXFBk/VZPtDQx/m5Dl+bz7etmHR2HG1hPgnrOFpJVV+H+ZRizD3s5+REDpHv2XrqSK/LNktR2PyFl166tLr2wF7px0ezc83Pfvaz2N+PfvwmMCI3MzVl/BFb9MUbBf8Ak84OefwJARDI0Wkq3CVdAvV//+EWF3isT9+rL//9t/zyr9XtKMWNlAmQrMQVDCw/eWAIBpX6KTzs/JyA5031Z6Vm8o0jHpRDAjlpFjLOu702Qg510LFLnmFTSuuQjFNdQkd51wUIv/v9Px/XcKVsr0wdcoRghkcZ6Ndu3Hn+5G6rI6taxgxz59AOz2EaWjZvr9+59MLcNC3kIL3W0mb3dH527vGj+7sH+y+MvTCvvnV9Yndrq721K0RkPA4TBM9MocDK2pmv5fBSvxon3JtqTlNWKKa0B01TnHSXK90IMlUyg2R+dvZffv9H/9F/9L+698GHLGO+FmPmpJbw75nNxqygFJ5dXlTp6FxQQu1nOwftpeXLFtdPlTnIIv/a177O8RBAGYofUIgjNIAXVFu+Vqvr2R+vsWkEWNkEAXvx6KQmw1HhYgO1POH75fBAD43NMXTet7tF2G7kYhFQzocVmpg3FHEY6If1ID4BIAwWOyzsslwWf6Yj4cgghWhjcWH5kukyce3aAJBRWnjafBaWzUDAq3Le68gDd9JS7ZnD94O5Q6tMmKPZMwtCUTZjVOCD+tBMnK8sza0uSorWGG68rvgwxRWKhwZ1zS/U42oJ6F1BNdYxV5o1jrQza3wfMWGYhHTZrFHrP+dlYbiGtOV4w/WYmyEeW84cd50PFWVymHgFkuFR8e2UdxbZH2h4WJh19ZSgSOSmIQR9/S9GqMeEjBJJizx1EARxI0SwmhlZGe6W0kV/qNSONWsZ6AsmwuFJaOVRHlnWD9llNKblySBIYqUcgCggubIIzHO7LvFAS1FL40UxvuXVVe3Af/LTH7/88quf++xn2gftnb1ddEJrieO/YOHEUJo3Qiu83sOjhJ7YeNLeEHwCI3vb+08fP7YxYBS6Zv0b3/h6uJNUzyMuu7JZIl3V1MvQzEeCL7zCu8EzQvHsZKI+ddzuTExNPXj3g/299pXVFQqd1qozi6vkjpQQttVEY+7GnaXVK9feffDBZkc/QSVWZ8O1oYmGLuzshBEhGOONOEkmZ/oI2y9V2G5v9yDCANrbzQt91sbn5vhXZtQpU3Tig866gfCpcqKb11auXlmwSFwysT7kCqNsUun0WP8O0QA05TSJzhbBFzwNqVNK6CbieTON2ds3Joh4L8IgIGwI7vh0Shx1adW9OrbNzi08ffZcXEfAiatQNSjrR9ZzozlP0RqfmP7c579CzZL9HO14YnS/qxOgIZ4nC3F/L9oJbLZ15KNnr3y69vDe/bfff/C3X/3U17/x27/3e//wowcfaK64uLSmoMJOXbduX7GB+4/+8gcszbXLlz1QhlvqzZvzlN979x6gLCf1WZpfmGOH6aGFLYh6MIt9McG7d+9vb2zcfOFF4k3qBByanp1VLf7eux8wxxniDuqOtAvuRK0jX3jhRRKOiuMMlUVhMQZllX3CKLLfE+CJz1i2CIkCWQSSCyp2QUErxOWvUJkjdBrFNFf+a4eTcDPo7afqmvA8pJKt2uIAwCR9lp4yaTdeHhtWVh5fWF5hatgrv6hboo+lBy4PXd5maVMvfDYMAiWPcndOdwx73iHOwYjkJjDkheBVC6kPDdsViF/Rlh70QurFVK3pHXt7+3PNKZv0Tk8vqKEfn6Ar7EvoJzOEA0R/J9q9/e31JWky/b2jY4g1iYLn5psmwkG/u7cN3+yLLa+XfQylTMSWdeHYGAlGhxsntnrsJC2Bxx4bMTAzopEwtn79138DHHjpMU9k4hpzSVOYwBV7GNKD46dvvfOXf/njb379VxdmFzRuX4nRPGh3uvxtdsxCVkCaVSlCiwQPw5IUIoaXzbF2AVmra16u1lF7bnb+yqXLhoT7VLwyN1p1R25DsYVRhvUH3hkFWDhQLlRAP9WRulBSi5mFRKvDndaclp/Zh0Ej1DwwEamCMQRNhERVJBQzp7wwrkn5adzo/lsNw6fwIdjlERgU9puL8zSesXKXxyvdC9ONrUIgpBUTR47E+lgb3u9TtxPLIO3ShlG2FeCU84PmXY68SIxyHFPgeznVpWBkMDk3MbQIQs2Zptj6lNRogaaEFSW+2+zK/2X7JR2D/CLj0iQuDyxvM0dNnwDC6Lyb/CE7bFONKxgexmXcTEETTKKgS+IndXvGGvqI4l3wWvzFVR6b2cZaJSX1a6BEc1gWVh3h5WvoLxpdtexuiKQGqYtPb4osBxpGFkop6rHis4g1YjeXwgG7o6hc9xRRW+gTN2CBnufHli0YFQ8mL7olMHLud5ZQsMJNpIUYHWKO2/P0KMnip7//z//gX/7RvxLCAZ/UaY9kP9a11VU6IxpmOqCQRTvzDA3t73fm55u9DtXMhsIdihgGxHONnomrKWVMHLRKWwGEfsYA5ElzCk83Gd2C00rPwugdEM8CcMhJhaZ7u3ZbTE4Rd5on8Udtbu3MLV+5dPVqqzu63zmTInjQO2Fv3RqrX789MtNc0MTJxcYgi5FWBCWUK+WvHjUuADPhzfV1xElxvqFlFAY8Oa62gVGlcZQt6kUZudk4SRvTE1fWLq+tNtNDx0oZFjIqJIIq6FgkCvuPdlSvNSaWG/AYKNDXwaCzON1kype3SyrpQBX6kvwFnIFvZGhEAypRsQ7vyX6r/b2/+MkHH917+513pSfLGKR1oXlhD3Lz5s3bUmAopxgZ35F/4ovrey2prosLszduNuXnQxPPFGL8xoiemcOH3R69UH9gcHj1lc+SebXpUb0JZGNM1Oz4kO2D+TMXllY31rfHrtRuXLsloCPmJYJFY9B5lr6C0VhETI1ktpWG4L+D8eSM/NDL17TbX5EQiKEvLS/aNfTd937OZSSpXaqlJk+mIEJJPv35n//5tWs3PE1aB+H02c9+HkooS4R1CKwCl+9wGNwqZ2B13k+fHND1k+++VBf865d9co2fqjv+W9fgclbEi7DpaBxl3xCE6fllLBcPKHflja53pV+5ojhicOLkOqEF7QmSgiR3WsaSqo6k5LnME33BmkLLAitHqciCIZi7CnAML++J8R0VU+0EJJqZXWo0F7q9k3anrywU6dIk/QqV6EWa21xanrm8MvPwiSzKPg5JBhFc+xoutttEFzJEJlgUqDKYjBbdGQOuaKam5hPY0anvviBiG1S6wGXVNpum6adqsgUOhR9F4qvdPpYX86d/8p1f+cznrCDziCKVuA1/iIwMbaj4usLoQAVjAkKA8pmwDq+QwzSctrrHPPj9/u72thddWFe+eZ93u8inI08oR3WPdaoWLKaVug2cwBGn4C8iJW5wMS6b++LF8kg9ceIZK+CO2HOuzNEyWz+Ha6r3Vg6oCC3pZ4SUL45I25heCcWZm4FZtup8+Q4FIgi8jL0Q3dav6tew1PidYmUUo83PNpZWD3B02KW/+N02JAScFpKKXDWb8FIpfPqhjOhvujgzsdzEUGemlRtkLxGC4WRoYNsVzJyT0a7V52olcOpCLKbl7wio6CYjNcUDkMkWU4F8jfpM0ksDnLTXsBUCMcAp/oAUgVKkZDQYbFx6ZFOkRADor2JVWoh8r+DpZ2/XUxBK50I/ZYFxuoAW1udiN8fqin+SFZntkg0qEoM08e7ziE5erVEqtmKh01Sa+b/BR3bCGvK4/OUVFgnNGIzYHqMtRxAm9BTXII9grEaLlECfPoo2FB8ewY+UW3/nz76nwlRGny6uS/MLQmXbm1s/G/n5xkYSMVbXLhNIr8pqu3NH2Jan6Ob1m5Dq/oO7ypBFv5xfzH6A9q/vTNgWLG0AQZlWFEsLxphrhnyW4Jw4H8Bkx5Za7aTb29veOukP7KXNuPCodisdnoSsFX9wBUNxRC9cMz45HQofnWrMwzL+3gbPYS35f/WJfjyxaR4Y/LOtFTaCjqIRfOrVVxM1aiYN16Ih8gKPMww6vV8MSe3X+Jh88QSaZ4Y2nrVpSqRT+eflyoTZmcIttoG21MKBkskRXoghwqg0w6W+8PjLALRgysSRqzPeL7FKm17lyErd3nvn3bsPHv75D370+NlzdW/n3X6Iy4MI79HazOLSk+db2OOVK8ciRhoM0CHaO61Ll6+LkG3t7GMfbG++y7XVS9LuD9t2JBheUpOgnolO3Zj+/Kc+YxvIVn///qP73MuEzT/+x/+oe9C7c+cVIuqop0x4nSxngpa5j9qmqDnX9BPWZikFby5fvkRHMSh/OhCIPAuSnuHlV6N641Ov//THP9JwRGgTx3y+uYXHSXDXEBQjZ4opuGEHfPDBW9QO8gwSI5+KY/gEeQSI22cJCtcJEMtREZHz1ZePTxcG5ey/dv6TC3zxq4/q+dXDyh0hcEc1gOoJxuOCcmUuzI1lGM5AS/LYBfIeS44CXdPOCdSuZLhhSlCR7SKjR8iXzwYHj6Zo19KSS4XHkvEmp32COXqpvhWZLadWmEhmzaidbs4rfNTuq7lkf+3AwcOzY93YyGG/tTJ2Zo/i06N9fonGzAqESpD14cOt7Q07CiT3NjvCHNLHV9YuSftEIN4g/dZ+V2gJf9O+ZPp0Bu3Yz9Svc4tqAWekKd67f9+7sAMDi4atT7/+HcWBY7kTNFE4dHryox/9+I/+6I9eunOb/rH+fBNzYNJh5NKDmf6uj758AbfyzRXFWUr5gP+mHJYDmhwfJDTGVF3t3U5mNcrhpMPQw/HKlygUrChto0rzN18cyDU3RnmgMkY0eEjht/6bw0hSooOlFiEsKlUeXC7Lg3+BPZ5TiTT9QEtrBFJH9XjIO0kG1jdylFqa4ac1hM3d83zdFsZskIDF4qH+dE0U/qgpebgJDJ3Ek+VddGSpTVqN8Vth9IJeQIDp2JHTp8MWeRMjZ/VZS5kWq7oCNlxEVkUtIprDy22uZl4BdPxw5COAU57Jv0IYYDE+mSqfpAvY8fp4guEx1tQ8hUYcyenSSJtA2XMMMI2RSqOOCgKJaiVcZIjZRdeFPqNwgbOniluQG8VKTYwvs44Fy5Zie+WiWFwRiiyP4kxIaC2DPcneEcVrZuBAc0rBQy90F31quC91nMrgIvUJp/yvDBYThRYZh6iaVG5YAJJZWu/zf5vvjstisM9UhxqDTyFRvJXIQWbTM02IrsX4hx/e7Rzs6+sgko+///n3/yJPKAlgX/ziF5n8YGhHJO4OQ//N3/zNNz79Kd0N7XFhGy9hMFXAVFHzkgILbnH9Fb3VdFN/WwJvxkd3kqKwsf740UO90fpLs4trS2t2NrRIVW7CVsqyW9vt053WUO/ENnU6aQ2OJFqyls+GhQBYypqVsbobIw3vobosDiX8wCIlmpPxfcZ065ObjiyKwwJp6ZHdV22bYsMewXMawUB2JA2019l/9aUbHhtZBd8oXvYoEXTWCB8Iqzq5s2zvUsg+TgVUVuGGMwjOy6EUFFUExyJJ0uX2llwGtpQk9WfrW4ZtoSWmji9d4ohTiAmT8QuUc/PmrZfuvCiTWM0evmm9FCZwKsK0hZkFzUileNhmmgo3daK/TlNzBI4B8KDM91pAczyhfefYxBc++4WioAyk6slFvP/gHj/f49ce0S3UY3kbScN5BgKMUX0zWGB7+7ukkfZqBqNCDlaYr+8+mbnsE+5Bkuy73/2uovgrV9Z4mUzn3sNHvjgAQcADNLSW//znP88bJiGQUUvR8SLwMR2P4kEBH/hYmEJo3NSCn1mYHP6EvdXJ6k/fq6P8/q9/+BF653y+VZ/VMwt7dMbYvM6vcAAPNIzy+8X1ubMcBoYczFGmSTYxKv3kUBUvFKqh2AhX2UVAbw/m+fLUhOormx7ZVVxwUAEQiMX6KSkWjBBlLVMoTm9icQUusQluoLPTSepivXOw0+0c1i4tJl9Ij/DwTNtt13btftne1tGiPmG/zfbW9nF9ehFTw36oDgxFY3OlkTGn4upYXBQgrIBjUibhJxNEsLQ9g3GXP9WDu4vMM0EXO+NPKyKYgPAhbVYnIma0VON1mMhrq8KuNEmuLP1cTCq97VFXiKg8JUuZFzI7AjvobH1dUwHZr/5kAvopq14dXuNv767+dGmeFfM3fDn/KU/XD0hcrfRMystcbFWxVIOEJv4szAwLzXnMOUmb2G8WleZP4484RemuQ/DhsH4s5rzAhFlJcjeShGaSdA8Rh0/G6CMy2cw2yZgBR9FLc2e67xpXEgeoleDrV9+xYiPKe7wzUD3GjxAJgz7FytnPIz/ggSMj9vc8BA73Dp8dz9BzbDs7ZT91bkDqrRanyAyc2V/nduwkqjItLkFWCP2CgCwTjf8sL0JChIcEGxOoSyEg7oBGA2bBD4nQ56MJC0enLxsdBaQ8QgRICVPFBWet8vdAfNWdBlZhQ+aS3AGSLhAuYspfJEzO+aSD+W8srUiXck1wbsA+5AEFJBxxRDbOwM4gR6M2Le7vIajzQd8LbGc8fNrks9Q9SJ5BzCXZH16Z5xgZGjnnqLHe1T7o7APr2IG+h5zRcmB76oo2t9Z1iuPzEecQ2eE0s0w7O7vLGpmVODzaUjtE99BEjnKNRbc7GrDtvvTqKxD9w3sfUakkC/z83Xf+4T/MptqvvfLKnZdfvHbj8sLCXFYife4M4ZRblh2lwgNtGDHwE8neBVa+7+3u9Nv6Vu+tzC75c+v5pnVm6yDOk+3j7b3tk+GGogVtvvY6G7XG4fhMg4FDlAIebKHmiDuBmIfCvQqdgr7MlvRTHFWVHF1XqlEpBImuhtOMCgkIBYsipM8v7cAFuhbMLwoiUOp04NaYMft/0FJq4/JlaqYc2oGapzZEp7DiNsF87kRsTlDKA70jawD5Cn3pw0TZUnsFk2HF2vWXbrz4GWYiYUZQSTc3R9sJJ9gw3VheWAwtm5Ucde+TOHqKbI/27n5A2b+ytnLj2pqswo0NFlLnYG9reI77SEumc5NsMDJ7Q4ftzkl3ZHlpbl9LLRCcYCdNvXD75hd+5fO2jjXZjc3nT58+IcAgA5XJFs/8fj/8yx8QVwQYHmePTbOQc0/M4IYug+heJ6vw1Vdf5Q6ljvzK5z5PXBk/mfTp/QNw0IlLvOrrX/+6uUMnjBJtciqCRthLsXsAKtDDOH7pAMzq1wCtwDYEGQr5K4eTjr9y6hd/OF+JN0tefQmH8nvmSE8qK+/PIADtpCxiuSWPqJ5bvc/tGD3ML7f4MOKsL37BwxGV5nxwoCZ1e6uhL9HaSiHpUHd5SJA6hHwutjdHJLR3t9s2iJmf463VMjeNaRQX9o70IJUaRPrQV+M1idmGOIalwh+gwsP91aXm7duXnm8fbe7Bms5I7Yw+gzkDqWQ3xh6zFdqYHQ8tUMM9zAe2GLFPU3DG2kGqDKPdJq7SDnEo+5FSTQ2DXHENDSk8P5aurvm4jb63bjnSouX9jz60r82Vq2tIK4hMEadFJxnMawPWsGMSy5EO9MlRAihsHhRcAvFEYwgd5yuHUwSywyXVJ1j7rVoeo/clzyqHSZuJr07msvzLpeAU+eDUhaj0h5eFr1eGh0u4cd34Cfp4QvhPeVRuNezhUSGJjCnae2RncblyB3L7nCD2+MVsr1fsLe/H2vXFIkzdbpUcTubFFwPJ2nt+kQpBOOchECeOPyQNeEvRkcEQ9hCW6YTVqHMP2dtPdqAUuSICw7UcOJYhKpWAHGSVfYK8N2lpDi/yJpzJdZBMkvDgXHh9bGA3wkFE1NmxqiZSXyR+VH2yl/Eg40qMX45VeB8RpWkPtpByueyDZYBVUpA9wDw2ok1KgkQPHi2QI6G9tIhlw/ITdMEWnSqpFpYlVlir23JtuC1LibIu4eTYu7p7u89Gh3i0YxboWxyvIB/bxNBx3K4SLm2hiyQ8jc+TSB3mgBLhY4sR+pK5MZdE9U9PPvzonrbK2AoBYFQcaEHxbvfps/U7t29qLqfb0K0bN2R1EwnSwHZ3tr785S9+8NGHspmfayW7vfXW2z/D2micNttQBkG8KTT74Y9/9Kff+fbly2vdw5ZtBcL+lhZLXYHS4Mva9SuM5f2T1ISoJPqQo8DFCFZbRJurj0zONqaJnRR+jadDtsS2+q6ufdu1Wcl1tnjXn2nv2h2bOIOJbAo55XQP4GOgB4ioASJDrUCD9WLVEjK0P54AYDyu1po5jlwxCLISW2FlUZT44uI8tmAseDHUUi/BgZPOyYKRNDa0SkeuNUtnTLr2pHutHfsJZR0fdlHAVHN8dkHLohmP2T9oc9/xDaxdX7v9UoMHqdVqI4PGpP9NrayuJcCqOKmW/FIDBRBuFI6EVKwjBI1ZKeG6wMYvciwuptDLnoV6bbAv11bmetM1pZL2PUBdGi26aG97vavnwuC4fXb0ZPvp2o1rniNEL52MfBIsfvnlF2Wi3X7hhuWWpxKGUOJyLC09O/7s29/5p//s94klaKxLxfr6M74gNGIVgJFPT6q6Y3lp5d/8H/6NuBCbDRyToFrfSs+LJ4+fQiFCmgqPJ2KRX/nVr4OD20m1drdjmlmSwqB8qUie2lCd8cml5LM6ApNy5ccn/v/81/VY2l+9K9TtNtOhEgnxRDEyEzGki9TWPNM1uaxwTn+6wCwWF+acRNpmJ6hA0dZPpGb9Y0nhQye61ttJdf/gYOnyWnR/WqxQoU2r0KPkNeHm4CBVNZkJUUFiPOETyuYY4j0NRJQHcpkWLh2Kh7Yc/QkVa9A+OFxaWsw2EWdbL77+qcfrrfuPNnCpvb3dMtgMTECzkqmki8OYCyNKfbrhuIzQM3jizXwpH1WCvowYJ8kCZhlZImuG6miNqst8L+tC6oxhFN/97nfkxy4uLpAkWKZu4Il9lBTBMjPnikXkxxiscV9xFXoCGPvNqDz2r4grVzhA2St9MUooSNA5yoszMTPxZzUUVxp9tTZo3DWg6qQjTjh/+CgrGK9UeXhOFcnH5nNZ9diKG/jFGeRNbHgFAUBuRg7G5+RF0S+7+mDRbyEI94SMoCJs3AXROUuweOfJsyrSVamV5uItPCoYgYOSqQkIVp3ND7VSTxeLSlzRdsrcz0bxtNmlqdm5iano3BRkr4mYMgdysBg+DDXP0PeudCXkiTbqqh4ghtFAmqAUjOQnAvBJ/ey4cz48qaWTCh7aj/yyPi5ni4FjTdi2nqw/x30kTxNXRqvXA4zkDDEpzjHaRzIMGXVRvQBPCsMxY4+Yhr3AWywfpMme05tFe/gLosmvQFqimP3jbpSCo/M+JVne6vbGUWd79PxgfLgraqYTtHdNTui1ezKYHJLxHdlqEx84X54KVLa5sWI+tdt98mxdtc3WdvaVePZsnWiSuw35pE1j20YZGz+Kx9DtOy88e/b8o9YBjkO3evnOi3OzM1yCstKTR6f3En3sww+ePH363nvvLF9axWOJNCWoCFm4Qmtdt8NqHrAP7n6ATuJIGx2l6AmG+WKGsHtxfunmtevXr167c+v22uplibjv/+xtneUnRuqS4BsTMiCmt7bX3/zxT6YXG4TiQ/vQ958NRhYYCvvtvVVbJJw3oizEXpFMkx4Otq2hDnV7LXRADQhmxtstlYOMYL1CQ0tgK0XJyqHkQhHZ4ohEvLSyxMV32Dvtalqv68lkDbliEBBzioNGVMEjSvQC5zUpOIL1CGbDdd+JEy+3ZVpThuTe3nsfPAK9jq0jh85tCHk+rlrhRJMCHaGaCzNQZSJGJ4yQ/UybCfiRrVYcFH+NZydgDuwlG3XrJz2jbdnfpTs47ui5etxXK1yb0sF5vHEyFXNNhFBFAl392c7GUW+fv+moP/Lh+vOHTx8SQp/7zMvCB73epf7hyVtvvfn9H/x5q72vTxX2jX7VTck4kX75O//G7ygZ/rVf+zXqiNgV6DGeNP1jcmEgQlMkqwQ/2ROkkYxINoG9Qc3dQtuWyTXsRHd9+8++y1cvSQQz/c1vaVslp781cVpVPoQdhV8VvhHGEh0ttU1uzHGhsoYBxffwV4/c+P9TgLk63K88++LOPKRcj2niM+ZbMUkT4QMovM0D/9uvMUK4YZXDJ9N5Mht28w5pJU2X4X2OwR2v7TiiFjd65/5dxZ44R2p6HRAOhdggptsX5BfFWZyfe+2VV5UZulvPSKa2HZ/rtfPtZx/Iyjs5uYkn4r55Udojye/gPerVZ+am6mMirZfWls9G693D7Aot1hixlOB+aYtua+YgeXIuPMp8QbJins7708Xm7ozvtAcOTDFpGqQ4loNh4Xj33fcNF/f3Hb3QlEkatyj1e7b+7O6Du8K9c825NHRLUjQIRyN0JGmLbmztsr+NuUfrQt+YqnMxXIrrS3mGwRAHocpPDpf6y81gbeYIyHcHMiBmFTPy41f2kedllZJaxMGWpAs3xh5EH1QBnNsAnDDJsqR58C8ZUp5JfEGMmDVxahGm1lUOZcwVf0ctZFK5zNkkE2e9dSDSuy3Ao2FQm0dHsv+KGJOVzqYsRhsYVHG8MqmsfZlTYAfmNgQyUDB1SwyociBq42H1LM7WlxZkWfCt6MbICo2h40YXJMmPRM1O4oeCx0yxEYWpMRbhdq6wQFTTslcWVDEeqV/sibbiFk1XT84mD88lVYmamPBo+/h4c2sL05d3xkYBPauC9YExuvU0LQw8HAdBIQ5TUKmgKqDyfPrTkbFHk/S/hCJdj0gcnuaw2GqiDzo7Fjg2lRDTQWv4tDc7OTQ3NbQ4fTbXGFV9oUf+2URnMNk9q7dHJmfqdsoYaYyeNfTsoHFwI1tHEHv/7r0HT56qjDFm2gGWLcWSpKZES2CVEgbvsnD29qMTci6y4PqdldWVdqf1X/83f/js8eMvfP7z1y6vSUx6tv5cXc6Lr7zMxCGE5Ctvb6yTJQ4boQL49HR99ubVueV5i37n1Vuoi06nXoQSpz4JNMCKVtVttR89fPbWT982P7HG2elZ297P1odnpxs3rtw+2O+/+MKLn/30pzc+3Hr49v1rt69de+EWK+D51jq+v7D6YrfPFbaxyMNL2dWWYyoYjTLpd7AXV4KZtD2Tio6UbMRUax71aJrpFmPNMG71wiicfvzw4T02Tb3std3r2Na1j+iNs9INWZk8Y5iXYQdKI0PKNM2OIoVavJPpwECkl3BkojaYEIO77Nu0uLyaLbPrTUE8PSms7Oz8wsrqNZzCWtuHh4tPfbbHwlfwZCXL2aBxn2B4jA7dL8dsaUPC8hIdv3hTH14jYZiNI1w0iJTcCykn/Pd46GD7pDmlpfdJr3tAQ7x2aWlzf/973/nT3/vHz9a3N1ysk8hP3vzR8+dP1KK+9torL7/2itZzX/7KF6VRyOt79vQxjqZbxWc/+2n7jW1ub9+/++Hbb78tbf61N97ATX/6bJ3w5riQgHNpZfn3fu8/kxlY6LFGGSKgyACtBXd39l7/1KfbHTVaWx+9/0G6eOBiaWkWUzfWS3RO7IXfJYyicLoQezkC5nLST4VMckOO6s9Pvnx8fThZeUKqQyrLGGl9LOvyWIiBKkV0aSeWAOTHxqZIVi9OUJhak9qp8Bp/eVnIkDIkpZi4GomuaaGFD0VziRN3aWAxwbXSiF5MCyG37ESUn4pt564y/iGpNayyBIknpqh5ZXODc7U7i7Nzde6As7EPh8ZkW8QRyEWvWdpRX84fTddcLZBusuyx+Zn6z9/64eT0mnbol5YWbSCCV5gRzPE6BGVZQQbwzdGrMX+o60z1p/l+8h3S4gD6mNKqhUWhNGDiPz9/9z1esfRbixzAYBGIFsrQWEfgIR1JWNhogbpHOySCiI1wYfd4dHKn+NA9iURAjiYTzwD3BjlQ+JtGMC2PLf6pc62ydWqwrY5OHnr+xJTL/UGJiBxS65C/gH0q1JMCG79Eu407JDpuUjAzJYHEMEyXsVfZyyUBLr3jMhrXVIe3ejRVGTRgmqc5U3CFXHOdcERebeWDcUmbQ0oEYNR9jbA42GGEi+TysaUi4xJzPQYqIM7ABCvsyRLng23nYmYyKiEYwAkr9w5ZRX4c7sJ92BTkiZl+2peSNLk621xV+jhrR7kp+4agdtspcXFxzuGJCu6khJ7bT+G4I8Meohpi3CAmHS0cqArqZh0AtJEWpr092ROn51P7HRG28fbhWYuhcz6mS+ijZ5sSurR0M8ICgcw4gCh/eqLppLlGaZoJgUiyMDhdN37piLS0EgXFM6tyBKaBfhyq7V4rRqFFVzk/OtKcHDucsqXp2cLUgpCPtnHDh60RLsARmQb7g+Oxvc7Do8HY8WDi8HRivze20z7Z7Z30Ts7W7Zxrp4G2xq9Ho0fZEI/7zjpFk5Hnk1zbrL7iYX0lVFRr8Lq0ool7nwuaf09k680f/XBrY+3Kjet42aNnT6XNXr16ebY5xYNOWx9baF65snznxRviLlQDHcenl6SoSG3ofepzn8LCfvyTn1KXyGxZZFLL/uzPvje7fHluZloUemGmyQepOwT7Z2uvA2yPttZ5FN5++OG9rcf2oJFmvXPa23H/0Jg+1pL5ajNXehIjTo+fP1lHeeSLaihW3RFy1+jyMJuXh/Ofl3KLwdhRD+yjVuNHliKlEMKTivfEgOunj+7d/eDdn2k9KqsC5O3SpCOozHgkitFYX7W+2x/dI5OMs8IV6+OJUQqLY8RJeEsp5qtOZqwutzOzK8uXrqibvXqDuNKUCw+H0uQTK5zg3NmzuUxnf/99WEyCBik0VhKjhs/jwyf9Fk2dKCSotC4cH5J5Iot/rlkbajYmUlNOF9chbGi8c9zX0TEO4xP7oQzZsVXJjr7dzKwYXEf1k07r0frTnfZBT1t9ltzw+Re/+rVu50CGJNtf2dnXv/qrX/rSr7CG5U38+fe+zU1ccta77N1f/41vfjmdt6IWU3Z/9IMfWjI5aTD/Vf0TX7z5v/vf/xAa0Ejee/99hcOXLi3RIXa3n9oA5/hUdc6J/Sbu3f3wg3ffu7RyCVu1NujfxMwUoQCbwWM+0ZSLYeQkHRcwCinQfSNQLIHzGI7zgXNILL8HaMAWKop0IAtpp3EQ5ef8EqGVh4Xf+IfvdNuiO35BRCiUJ6zB5UzxjnchLhD8jxonPDrGTcoVPzs/Q6WD0t3jE1W74eNCBUSCQR7365QJjaSPj/l77VWWKNHMjGR9mEYe4NUcSY3Jur3N9FM+7J289aOf4SZ+wg2uXl7R8/3q2uV7H3y/rwarPr+9dTAxPiXtF/lIe9jf21+0+zg/+dCZhv8yM8xjZnLa3pvv/vy9119/FQbCedPHJ+2pdvvWnadPnnPzkl6QHFR9QeDCkyWyNWNpLOXeztafPH0WDW5klI4C9xx2urKFq0iBPmRkEFQGDG5vO1hgvOrT33zrzTsvvchxXm+Ot9pd/W52dvf/m//qX2gdSTlDLy+9+AppwtGiSfz165e0ThPRNTxUKztbYSTQFv5ellDoWJaoT+uJx+NC8f3Jt8XpUWrZSJucwD1xQ0se5eFCj6fIp0+BRaVBeKgv5F1iARk0OZ91j+CLZELPwSFChFQtZ5x3bzyVWW/CtUhK3/PEoGGkGNEh94DBi0OUTL7ktPAFV2kLCWG7KhiJffuUPMa/ZQ6i8WI8Ks8GopRk1f5BNxZJblOwczJuk4dxWwcN1dMhQUzPlufctMASMRjjL/VrsdxLDELJS294gGAkrCVD3YtIiookCHcuGekV4p9+YXfZtXYw1KGR0Z5OegPp1t3+oHt03j0b2++f8ox17NYVwnBlpuqLo8w63gMY2Zc9/7G1FMCqHa6uD70l8GcAFfkhD0vjQMbVZ/kyMj8/E0JOonkZ0PBgSnEb1mGljwQrTmwdWFrJHyc3UHhDC6KBGPikcIYU3L6W+l2ZRZHAmPDoxPhszF1cG1VHM7D/ZpAhG956r3Yv2RhMsBbZG1F3bP8kWc38PV35tutb60J/ssLmuRAlGQ7OVBB/6Quff/H2jbv37+lvbFcLParsz/J84zn3DqPk5Tu3qC/Q0FPISO2ddHxQjRTJYc0ZyJPnJMPQNFtw1r5bs2ziGh/vCOZ4JIekJyu7K8/94ZMn+/YQGqnvtbX9P9HJh6m6d7A7N792KsU7+J5WEZxUyqwAdaoRJ29VHShBz3K4wOrw7wWZhbIQf2FbSZ+37/jR4eTcnI2xQIP3Aj5j2Q7ISJ47PJzF6wny/ALMUjHtOZYJ2pFvnAS+LC3OEYBUJramc66UCopi7z1IeIC91evv3Lv3iG9tf691fHbM1qEUq9OCicgePrClQGDt0pKt2Gy9dPf5Azvav/bqC7WRlw5bY62tDduT8pbOzE+fjzEQe0pFccCxkYlHHz06sD1YO+nVwycq4lVwnPeeHcrKF4ywPyeDcXpu1kapH937UIN5DllmJZH5nW//6Y0b14jd50+fIApaO6RERqamAlpk3mSVglGxOW/txfOzn7712U+98aVf+cL/7f/+f1WtKmUBkLReVZXf6eyq9rlyedXuGcx3SQTqwGwysjA78zf/5r+NIMMp1PiXFGoU4TvkBlUT/+QozAShhJ8YiT9DXSGcsJ3qe34KDYXowjLyLfIK5fpCZ3ZBTmaxPeSTu8r15S43whCEgbgENTyuaIkJW4ag4zoOUhFURfx5UFqnlqfRdjhmo/rbBUr2jk32rl6/xhSSTWpbTgG/Vc2gu9qtDa8/3+bQ1v6Pg4GWxuZef/x8f2aGVtXaWf/Mp1+6vLz6s590GaZf+Nyt/VZdf1OYDzHV9h5tPG95/si0JhcP7u8vLGmVW+f9wLuNkBFM9ZH+AOfpVTDNwVriL/ApWazi5OblS/ke11fFc9hJEqwe3n8AD/k1CC33RqA7GPQly6BKDQMBs5YgJPYhMHn91g35sfXmpA5rf/DP/+sf/eVP+KopHPutez/+0ZusLPzXMhDbn/rUp+hAV65dXVqJ36h32I0uFkuFHUYlMUCCKeIKWPGfUyLKxkGMKt99Oo+Hp+5HcrgYoNAMEpZnXJAmTVKzzLGQsoyR5MbOmqve4C+SK+eCAJCafpgIW8GVnKn4fsyuzLnoNr7BB8Ijkao8DSlyRHG5xyriUvArueElcIOEMgA9AbWw4O0loBycYAUH+TqIzngjg5bkpWeGmhI/LO9KgTqFmoeU8or7OA/rTMM4fZiNEWadT+wrExcKi87jCsp7ZqbpX6aVFvmk7piwvwRUuzSQ3IcpILU/+kja1R6RdTSsVPzJGxSoCU0U0vGS8iWf+LhFgiU+sfVApIDVIDzfwPxp4UJQ5Xa6hTGXYSf6X333iXe4MiEwmijXFjN0INgw3hUmtU2tLA4wGBvo1UgImYE2U2jT9aNDR5SAyXF1rKzbIXs1ck1GJ49j2rOkBhiLDVDEYyDkwMI5rCK8gDByHLrz7S6XHCe7UPL2bkcr1077e9/5nliTvUolRMi99PLGRI37juONE3/jyfrNF194cO/R/UcPjex3f+u3keXP3/rZ8+frujYszC9Rw/trl8kM8kWvCP8shKATMzTwOYuVl2YuA85V/jucYpCNN1MTORU7RtuK+dH+IPksuDwRCAP54jAUVEUqyPPWHJ3YoNrwkaB54LXNlStN1lFWAT7qGWP6sa2DlnpbrK5euWpG1FKOFBwII4hTBSuj9LkdNlbCiZoF87zOkJrTaV9UDcaf8cbEYZDFpflxF2MlurALYFzUL3d7VSDWSLxloi7DkHaf6gIml7Z76JzKBQ817/zeD7/XPdiZn68f91s/e+uH/xmP9GF7ZrLWam1znNcms3B77T0PxHFvXn+hd9DTLFP4iqxCQUZmb07NJu4+fqDWefHS5frCYvv9D2UHJCRoAxjOz/qo3nRW+97992S3FkTgx+GqRWCwlx91am5m0QQxR0Sg2TE3jS1OfuM3fuPb3/72D/7i+/zx/dMe9w355zaBOjtzM6D1Rum120SOhSa6PvrwvadPHghTImM8Sk4SzoPl5JXhA4rWkXeUVZ+gEcmRahA8B+vOqlUno2mFA6EwlxUiCj+q/s4XTv5yJaKKYl1oK1e63haq7BCcLwkQHOQaYiWtgUlNrIc75MbyItwPXlkjOoplMrRyvrigomSWfGn6+8kJIaTgb+3aFZY0ccWFi/nIQyE/bNXaPTxcXZXlPqkTdv+483zjySkN5MyW4rNLK4s2EX66/rQ5zZU9Kp+l3V4S7+TwYx4ZL3QyZmS4tLpE2QI2/2CK+CU+ADPVQhi8pWH9FGmU9HFyCEcP+9a8La650Lh5wV6T0wFOuFYF1c0X7rz44suLi6tMAMjJwHrps6+8/947/PM8Eyr5zdcRIU6ej3Pl9bd2dt96+x3qjpxrSq1N8n70kx/vHXSSL9ucaM4SwMfIjZkDAtD+O3/67T/5oz9Wkfn3//7f/9a3vvX+ux8YXlqQ4cooJPw+qWeVqEqiRbJr/aecMUVLajVD1hh2Ufl9sVQYe7WnV54CK8RtypGlw8ZKqhW5kiWHSwVFwliwddgRg8hfrjWOqDTZDpMXOIhnUTk5RxKQxCgJyyK6xJwECRzw0eEqTwXS/OUP/pT0cPI6iBnCAXkDYdRybMT715jDcDEFD1X+j/Fkk48RgZwJm/DywhNaJCK3ot9LFWC4dEIl+GJCDZi0kSvASmIerPXuIncjDAmx4LeRacqXjns+oYIcU0Iz/QZJP0MzIFTISxS2m0qrimxiLsEbQhZgq43qvdVEAuJy+JMGEXgVwogwL0fAVQjLjQFKCSW6mMULdJF6kJQkJTmzZIKYyoxOVcY3apzj9D+W4ondkWkAElAKOJ06NsTTSTlCBJiu55yogp+eDglZn5yoQcrC52NlYwzww0KaArSxJXx0tIV5So1eLvC41zpgYmxv7sCgKGXZqFgccWBIJnjz+i1yQtXkH/7BP1ezq3WsNvL//J/9AX+cmYlUSsQg0jjFKCq85/HRhD+CcJyxRpQQgVyoo8Pky8YKt/JxHYOfiZH74vz2fJ6cbJwejxshVYGgguPAD7l56gAMiPA36KTPHrmTUoLxERzWZaAQkTPUZ+QpR1Y3wckOJxA/AKtJWl2Zs/uV6UvUbkzNSM6CtSentrmP6uTBMUnxznJQjYrcsiVE4sTkGYKz5/ezzR3mHebOg6S7Wbk2axyBhU8mCh29ADqQeAYwSfWOpjWHx2lg4UXbz59vb3HmPVSFRU9Xab1/3NMMd6o+frC3aY/sk8MlJeMWin159P7h5t6mAb6VdvKn0xPNucYMzZ/SUdqPjfNektpUVtCrNefW1i4fiocc9vVS6tRHl+anF5eaPNYMIWtBZcmubCNaSeXhmI7AXhWLtcPT0sIc48DJr3/967SQ/+T/8//GsbUNYVXqHSukIWAIo6yvhCTNDLRVnG7MDjiXJuvtvd3v/PG/+t1/8280F+clZHISy4JBwkaKysI5kF7wL9iC9It88CXYiBwBEMQCx+IYD42EZCrvTjmdj1xGhAerQ3Y5EZ6Xo6rogR6IyFIiUWNIMzCPdb2nWanciL/l+upR6VBslY2hLFncwhbRZaFgunV2aKMk0WZPVJ1xQJAWL9x+0U6+mnqAg2QcRvfMQlO2gxt5FykK9ua4eS019dTAja2HFF9ZD+QiLPJF7p63IEjbW3oaoY5brl5aHpxOCX+aDXESlLZVe7drtGxfAtV3fA9ThUuKwoi3alKFr1RZ+4n4CLBNj9WMMwrWVP3GzJzpaxcTXmpTuHpjjBKkXQtDP3o2fs/jTRzYa1Dawcnz9U2bpUmfmZ203fYeJJGhwx3u0sKNtV+AAnFjQH5P9kb5OP/gH/yD3/u931N456UhyBg0I1rbKUE9k4zPCSg+IQUFkaaIGuh5WbkhsCnsNv6GIIH/RKYUeYV5AZZVykLECeRfshiycupqmfBZvhy5OTeJPOV7VKFcFKTIPwtYyYCw3AQILKmjknQRfsRR+GL0++AUMyHWAeqNV9Phe5yF/C2SwIPM0DQiCzLxUeliamtVEgKHzpWaqqUjA6F2NjtVEyDhiOeKIQqJK2k8xNX5sIiFQZovtq0kU7KynHQSC3mQj8AhppVfi2JnaCJC1onCxQXFAiPYUvdsxfDTeNByU+QJQKPw0q8iyTUGT5wFsP5femwHPsW6wpYykTLBjyFftJ7IyoRwXWg6eUSgFbKsvgKnpQr8k0MdSETRjAk6OOh2GAhTRdGnPAE+xhxgqnA2wkLKhj4xfCoiw4BVAjhQuT08xm8kQ1uzRk4s74ylXZABmTIbogcIGef8IL0uxqeY3OQW8XN6vEyxe+3VMb2o+90OpERjk9aqaMeYudZE6cs+PLa1sfX00RNLL8VZThSb47h/srezb9noE2S8qjjxRmvMx2kFWIJcRgYQY8dkcRAUYhFwFcuBvbH9NCxT229rj5m54SO+gBGPtSvIYV+BRPo6YscyT5Fu3PXkAPaAy4z12VjmF/U5eXVnGgdy9qbrU1GTsS3KLBNoYkIfKbUxqa06HnKOpzzVLXKVLecvr5oHm/vD+3eRDBsOBwcHDKIspeBJTdQBM/KrYZgLPPVJWiuRsT8AxPY001mQRKrzx+plqYU2A3NSaS1B3trZOT3pz0wza7I94ONHH2w8eyQ4Sygf9Vs3rq6qSyZBLfoM1+XRSbPW0Lu71z1pD7oS/Q5O29pHxqbpqyg9Z5maOHLmdQVWpYpS5O1WbRZkRivtkW0lY+M3vo1w/3DzqL/p5Uh75QhlX1IdzoeOdIgxhjsvvCEV/z/+f/6/DEZBhBgl1yVtR6zEM4NRp6e7+v+LwCisUGxH4Hd7aFQXjzc+88ZnLy/Cr1NNuM60UFFtCm4lEAfXo3KHq0RYREbhEPCw8JhIlAsKKqRKyBVjKIRSGJCfC0cqf8X3V5RnRB2OZLGRUFCAliTlzUsxBBpdockQS7ko1+XdpBuVOP2hFfyCG94ZW6UoghWp5pLYWHm91deHXmTCih9PpBdGe78Fi8Zmo9AQ9jpm9ftdtifLX/rEwtWbUL82XGtMr4yOUx22R/dHrfvR0Strq4t3t59rVdjpiZamvHdjc1eC1vUrlw+7G7qO4fRamugpeNBRWSjJo6Ykn1FiPxcjhvvoV+meMXAwGKFrEJmFjt0SF+LwxuamtGnbPNrI2JQWl5eqJkkffnQXAUJCjgUkbwM37Eg9PAQgQrBTHaO3tvfu3ns0O7c0fz5sE56Ddp9EISOtkQFXnI+jxlvof/7hv/ABBBwcoRFX2A7ICm9EqMlXATJ1M32R54R+YuFaQEoB9ImBhe8HIcykWsXCHKMv+NNi+uJMdWRJouSQJXikpXELNLCyPrFXQjjfgz1BJJeG33q+ozzNh+mPDqQ5qFkxs1JS4NUpSpA8QuTFbIq48jTf3ZjYDVcexjkUTbk8J+czCzybBUk30qcmYwhbz1Bx2JEhsT7mc6NsGGpbAETodVykKIx/i0Wl0CoCJs40CZeIFYtnyHlQjEHkijigMv+jPxAtHz62rlctx5Eir7jDKQWkV4R+5Ce/3Bk3UfxuERoOwzHgjLZsg2RB0tgAIytsy0XhA5VmXRaymnIlrtwY9IoumflWzyGdqAWBLfQnRzjH7ACq1Q/XElxBdMxRdtG0ElFmlhJce5IquE8yaa108hgf6Mh7bhMwu98ckVWpJcYjBHAn0oVKLxk+PG5jPb6oCUd9swBgY9ZMz0hwC2hD9cF0OGdAgByw1YhVo490+0le4A0ANdmRxo8537xxS2BNhwsbLA0YLDcRZ21pfunOS69EAZwYo3mJCZlPJpoYYY4stHlih2HlRY2lg4faopSQ4AZDI5Q/WcFH9IsFs9fWTnANbABPYVN98oSAlCrM0IHInkpqeHjBzGjWaCkhTgTs3VEfIfCAurq/t/Phez8teaZ+IRpVWSIW1U5JWVXzgBSDEoVhluVVxtTFZo02UCqHL3DhEHYU6zkrSG/I20MSkNOz0DYglKQsW12uzczPd45OtQrk2BEGd9ADFLlr5j5eKHJne+vJo8f91rZo2nio/FRd9om8kcMuYT80w6HKY51NWLC/mfrMYFx8Av1AHG7vlJG1+l0kjTKGUilBrogi6MjT10lGyoZmBdn2Uq+nSQsCiUaOxGZLEvYEoT5m7/Wd7d1NdvREo7bx7OntW7dUG3/329/+4MO3+U5b7Vppo9o1WddAG0zWSy0aJxNkP9K8cWQcK1OcgAL/8i++d+vV27NLrDTpMpQ6n5AADIuKEWZCZYHu/peEPZzLkArAXeZXn0GUQiDgXX2/oLgA/OLIBf+tAwpVpAdj3elXn76XhxVy89wYBN5QWFjeFFFEZpTXxCBGoNVa4ztRWFOEFF+0hWNU6QNij+H1p894dKnOHDmoyf62GJRkDHuYHgi87+43JtLbJa6CseG52YlBd8BxenS4y0gDawNDXC6gBsGZwcmmDV0uX3tFf/2BnbP58I/7tHSrq8UEBsPvagCud7jXG+FY5+CAIWACRutM8KMEX+NMq8Xrjisx3C9dvsJWJspcxhyk/6FB9gzsibim45QtVyhrVkFnCW7DJ8+ef/FX2GANTre5hTlZJHz1dMy27Un1DbDvq7SU4YTJjc1bJcJS3eA8r6mRUKlxH6Q3lHhziR8YnJxnp1hU2LGXU42SnXix2CYV6eI/jqxxWeZq8TBU0jhdR1MEAFPOkiJXRBH0sn4fE63bLa1/zlh7kApTyC3JvcqCQwkvCRSLRw/fs2yAFw9P2GACeln7EmfyQ4VDkVvyoggcD688lvFthWeyNGnbJkJweUjZ2VKQ2p65BnwGInTXsBcT0Gwp6pTJC1zhMhqeSHno2gRccaVUC/nMvHmIAXzIpoyXapkRYGEEVXEL4lAoNsZldEtCeQQGECgkP2tF3z4Y0O9u7rY458zSVMLe8wCMMEFan/E6RIAHDHqXOoDIXK1bri/2MnA5uLWcCbRiPgWGjnjMOLssALeJwpQRhVXy+PpSkkYOO126cndoMK8Bk3if6PspNjxRAtfMx9HTsRE7DxzJPCWph/sHrf5gtEOgaSilYceY/L0k3dYbNeKK2930x4/gZ4rbo9JijiBtXYt2LHoDTrFXXQgK6I/n56A1PVVfmtPN1hbytaNulwExGNaV7iUklNayvZ4NeoVb5anL67TmV65e5zmBwUqOzM58wYeOVabIjZyED/+gRzF+/DeKPmzyJ6AVJGG0Na3J3BxZERcHtmOp4/iWKk5nCjZGMYB/yYelMI7j1DRMa0FAe0E8B/SSLDN7WCaDTJLN9aEpXWJ1tDhB9MltGRprWaLoZ4mgBvPzqrKKSYuNYJBRWa2pWXi6GocTuCsQxx+dBD99g6e07+Oisfm38YMD+xIHNHF+lfsffaRlre3sktORIGupgk+l7+k3v/n1+x+9++DDPVzPw4bP7Ag1JDY7O99oH5xPTYw165OQRhKi6ZqalC0obzJw3tvlbiARiZioH0xo8vamBP92MicpZCxSTHOUgxZenJ7VpvXanEp2InwwF7pYaVQ6+PGbP/zo3ntQDt2ubzx/6YXb2oHfv3sP98Hprl+93Ou1q1Q0Mw3zhzvpZsCYFHPlSkrsh/jbN+apxk9+8oNbr7349W/+OugBLahaLGtazHtInxRyK4VVFPaB7HKBURWbBzZWbCtiyrI77RWIt2BFdRF6Lte7rygNYU2FmvJzCB7uYmTxj/j0z8/VP8Iq2ljx/eTxxbBnk1jIMLgglhSk6GfAU/GrDKJIPqJlb3+HhsfW5qQrDoeoYlLLvGNnZ499Hno33LGxg3aL11eaKLtUdeeDh0+FXmWSMn7CGyZqjA1XegsbzOu4FjUpECbu2KFvyObXNQpHolcn5wxmhgvO5S6mici6lNS4BSZopSBnyOmnC/UppHIrPA3ppVS0d/i5z33ud/4Hf/3+vQfkEETl7Vd+/v2/+IuHBwfIyZXQFfHwDFkUbmzKC2qUuddUdzk7/+DxEwqunrmyXqGUNqvCbKNHw5O6Pcl/7bZNgQPFc0hKDye/LUE8gVbMWJ2NQCsZUEjUdTEWSJpIkvDLIkLCkLNwH9vIkVtFLJfxZYgOo/dZ1p+qWw+UMXUstBQfBKE8BiNOBMdcilywmmHLGE68je4tR4U6Xpi4k8Jesb7y8PA9Hr/QO9szUuyiggpWuBCMgZULh4O5uNMiroI/qQ6eTIi65Bozium/MrJtCTU3S1rp268WHALH+xPk1H+CyntOxZa7rMg8/bazU6dmuBAwDsCIkzKLyDbiC/rhT/hYjEapFqnQUjUMF4jKcYkaNqug5XT8O2jt7+5vrm/zVLkUlPIJ4uXwHfY76SgnPNCrMLW4lauTEcBlyUyOEQvmeJzPIFkRXemVKMrK5xgVlFEX/dfFQ6cn40N2gTvReWNCT0MNvyfPD8fZsIP56QbGZTHOtLAfPdONmVg4klk+FncAN1J29bPVwUS6+GK+9klKHi4Ud2W9fjZF0eZMitwN0mAQdFxAzNjDF3iuejpId9p53OGxOiq7ZK0sLc6K8Q6P7DufeqZDvV7WbVXQav/Zd/+cQ12NDo3TpDSR09LCcxcXlvnlSSzXZ1LwiCwJ5KNh8HO4mIwoLNiCuCOONToAF6RwHbkC04icuCD4Rou/IyQQ8RQ0TZ63zTsTmuITzQL5HuVNMkrRKvw5ph8lthbJdWhDiMurEovrKAiy0qjovwzB0uI562gR3WKAuLyR0LuhavkXNgStKaeJysr0YenrW1n0MJ/m67F2DHEXeBoPXo/LrItR7baebu/ZSMKi07dkrPABqiddnJ9hIO3u6C+4QZuO9Xp2zK2q6FAZ08He7mxjar7R3NndsZzEuVg7cE3WpwUkivLFBaBMa2xybFqhj7a6AsiYAB4d/prhA2eyS/E8baf4S8XsdfaJIsazZyewSB7taUafPH/SeX9fsTO+gbj+9I/+crJO/Zcaw6o47x721NMYqgiC+B0dxdtBqN/vENYQBn1j5RidLEjLPTUz+53vfEdnZGKXml8xcStJV6uN2gcO6YbJx7gC8iLJQCyY8TF9FWliLdi+PrN45hMJlyMEmP/kiF7omnzmhwoTMJO4Z9jt8KL6BxTl+kK8+WYMWeugO8WzuG3LBXmF8z78op49AyxyNWonD/zY+P6uvJhFTImtDNtu335BH/67jx5YAtvYQw/tNDzho7v3RcA//9nPvXDn1u7mszff/IupWkMzlCePN5YXbsGETneXvpFFSqpULRFP3eBqmsaSPycL89P9Z60w3VLrqR8NSFp9yAl1cVeY6V1GHtCEtUbJ8yvq8IX9u7i6srO9pwiSNinBfXZh0S2Ma/vF3Lt7V2Ouk6O+OXpCOZJNrv7GasijYQjq0lLf3LZM9ASZfB5S4bNO8TALj6e4IEjI5gpLA21gBUGOhwiep0DV+iX9NJHAi/oe485yfQz56vVWAiG6zDh8Gr35CMmguwIciEiikFXO5ozEBlfmsvBh/49cpOf6dAseJ1jG1VWEWZ7rqYSQJ+eITcb7GYvSYKq0CyYRayh9LZCCvr/gP65f0DCngCMYwtwVJxxKgjj4ensiNvF26j07ztFFMUxbNdkp4rviS+kyMbKwODPXnFZqJbYxWRtIhZN9KoUDIQlrsUcHJ127SQjkDyWuqwZLmk0wz0HZsvQwL9MrfBlEwt+CuM5TZOgfbrGJmdBX8hqSUmnyJZisTuHoOF1HzdF8nffdioAgb2ylPZgXjuypJkXHcUEFf38CMkC7EZQqs8MZ93pCgGb8RmhgaNLih6vTAFwf7OzITjw8nBwem60PzdWHe5BlcNTeP5OirBcqHybGLLA3OXIu3U5xsXazGs+yjqgxtDP4DPLazCHhgCIV1EejeDXepXo/2+zgIKyWY0Pi484Ez4fEPWbH5zm4tzY2D/oHLpMv+HT9ueYUMBBfdTE3T5UCTnZQOyQxSnxQSw+Pvci6RCjHjB/itoaiejfbYMmCRIfJSCyZxJBszAZUvGBejcc8fvL4QLbH2diRzLLm6vT8mqANQAEIUiXr/QNpmRQ+u91qw2/ymssuXJS6xLXPFDdCxCuPD1a1Dg9AeW/j+WtvvF65LG7cuFVvNLd392wHLwCF2+LvVhDArRe6IKadjOfESAtzNDywiriSYSilWEJQ4RFelJ/S6T34QPjJJpe8wI/vkxouJsfGFWDnHvQEm5hwDAhpqXLe22k/fviAFppdb85OVxYXKGLyzrd3ty5dWplvTAmQwDwqTLYenZw8suUAfB+pkY3qhVnMns/6198pEYizgW3fkCuTjmzgJsf+OCLELRhJQCQ4iXKw/Rhq0iaVSMAheyvXGgenB8wjVpfJrlwaLx7UuCU4F7c2dzlLYSaFPVxCSxjuZdmzCeCmd9zRSfyEzF59WDl/RPmePHz0j/7T/+x3fud3XnnlNdqaQM/09Kzx241sano6C9dDpDHTEF/KN1jLYSS4i+cF3H6FRf45CcI+nfaWoAo9jfpNf4jjJ+zIAfIOs5ZFixvhcPT3CaKcN4FZcJqijTO5N3HaY/FCbl4StsDZXqUM+owSbNeMtEEIAaKg0C36TG8BBDti+7ErVy6r5tze5K095JrvtFrQUgMuHS3tEtvORp0JDaB0MsaWz8+ebiKVK5dvPHnwM1vuSkjqdRWGhid7LtzhUWeUPHr0tH2wT49pdyR4nozbomHohHPV7OChvHkNoN2iboW+bpCwgfiBM8ojzE4iIoRwAUBhPuahKSj/hD+l3XNgbq0/R5iQW6GCrqGACRWjNYBf8Uw6I1kfVAB4d3vv//R//D/TbwTb0sCH2kFWY5zs8smJKIopmeV6iRNG6StvkC61ACooU5FFERZECP3b/yjv3iTEgDnhRVLJwFbGm+8GgOPFj1FW3TOy3qDjvxbVJ15IZsA1zkPLVo4wR4PJrwRQeG6MhmhBvBMpfyJk/QUMyVEgJy7MOIjinnjmCj6h24oFk2e+V38KAdtdKUIBfVCYylG01yErYT2cR+jUg5wE8rGa8k0mGjTN60TusINa6lQ8NJwuGX0eNGAeBK3MPLuKcJvz5PjHs8cTQvZIPnMxpPQK0tctwfQIdWelivk5U8IqWGYs83G6QpVRSBdnEmTMJb8S9I26iFVTxdf4ADMdy+ZkdZRphZacT95UNPRwQH+G3vx/ZISGkv+ErV0c+dO2XvL3QncxEqNmRYpGR2N46bIjitUYo62PdZsymAlZG84mz4oMM3br4pUhNg38VUyPD/E+HIl34LycgtJI6B5jRYc0UAAKNSl1sn80JAnZeFHFpmGwdoigjREERGMjtmexRvrmSROfPB2/+/BROqVRL2AO446WI+qrdoTLzeDPk0BlysYjXDx0VuPFwyXRFfqn9vDjQdQQW1aRhoRkzXQUrOAPAk3hoIaH4s4nyuia3iXMA5v1fkqosogHAw7ky2GolYwxcq+4EDZByGycRkTx2BAv/fbw/fsfYiiY+1e/8hWF6XsHB2ymhaVlFkAZuaS+8GIWkofHM3M+MtWYFLuyPt6YZSpMs/rkISk4yRbi99BoWL1gNuH94N0PQAsDAgdHGErRUaams+9isuEp0snzAPNTXu2nD5/t7W1RurHV2ESpfT2TD3br+o04nXTL3dlnXc/NzNDs9/d7MwvLsdB52kfHIT1WH3kPh+EJ46pRkxcB5o3GSGtjTyu808M294jIHWcxTyOH6HEvCqhUASw4Xuhij3a6RKT68vTFdwYuVdMsPrb4wAXKNbOOtcGeLRZtmgKIno6la1yeIpOG+iPyarsbqDUy+oM//97Oxvrv/u5f/8rXvrq6dOXAVoQ729zI8UnbIIpQKZtTiG7QaaxnfB7QvnCd8BTjg9Fs6pg3iQ2U/+Yv6Gqt0XAIJTwr6+MOf+LIhgPyMNzPZAbEEHrxOGsUIz+ulzC4IipCO2iZFzSajaboOBsvC+z3FgHsRCgB12nS+QxfATlrZNVbe3vKFPe1ZtftaXS83enpFCOyelAbHfQVRYJ2iOsP//APf/VrX9ndfqZfjHghs+ztt9/52tfekAQRmF0I53zzRhozzUxV9+xM/dLK3JMnOyJQ0ZxHJ5luIE/TqlDR7IJdesIpxAvms2ek48eHSZ0y39Zh23cpPFIZK8GJIQu2SeF7/713d7a24gkMT0MjaTyP1kHfgD0n3osSi/IE5GCJjbO488LrXIBp+gKkSNs/yxOIJWoYiwjhU9sA1lUprwr0kpUbOMoECmuLPyWH27OG0TKDdpE3hX2GDCAVhpzoFpdK0IIQr5DDSptjrvfjBbrkO9TIJ0z0pRJeWcVIPWKFM8p8gldFyhWUiQvLGY4WVGDyxphML9IYpudCEMkiuYZzg4ij4XolcKtPBFZfqk+Px77ABSqnm8XZmDYCelnrUjch382QmJtkdEmW081G/EKndoJK6gQBA7xkKt8AUOS1BS281x8FPIBHVQAoA7ZmdI4jACtxs5ghanHS5jeehOJcIpRYs73QACZF1ljO0BFOJKEqTwzEA4e8Km9xZXUyfwZINKn8urS8AFCOcN5qqcu91r8I02R7x0CPmMW54taXQ0Hg7A71t+vDy7MT09w5p2fKpbE8ai6tHG6Mj6F/lcRDkrI1xcjmWQm+EZhQCQPxNGlSsQItUOBRziBf382luE2CiwbKnSGCyn9kcUWDuL9SmzpVz/R5iqR56W6GNQU74ZMW5pwXnc7GjomaMm2KNI35HjsZI4mPgsxge0uQoZpRhkqgiOOTOKcjew/ZElYFvyXNi1+fDzMUmFbLQ+PN/snosULZOOhizjpAz4sczsCWirqsi5f61XlvDC8bGsiolRLILdRp7739szfJQa+yx6AeB3Nzi6EkWZeNJgfw7MIqqQwangJKEz2Vv+PS4Zz0FodnuresXMaLWeBq4d2pDiaoFIdyoOz2s8d5irc8ymU8BNhHXOHN+boIQLOBz6pbZvFIAoSfH7z/Tr/XklWhQZlgweCI77mVmuKJic2Nzd5Be7rWIDUlr7PvRyTYTEwjF0SKQ2SDYQw9jvQyROtM21KFrHexxloL9dPj7eG4x/EezeQH+0fbR4ebBK3l7nUyLzzB7YZasR5LIYTpz4LLwRMiAxKiMjtwxhYI1/futDTF9oGTFsnH7IEu4kwxNlm0WonFdTs0+PC9n8mTl6//5S9/dWF+pUHd1AvARi4nWjbXoVfqRO1EJIfQLbG1zIBCGbcgFEZHtHUvTKwprC0Er8ka0sCR4GvhZglB+hNjMhkY4D9i2/y0wONdEv9tmJJ8qyjc8VwVXT345v/ol/En2Zy7ipN2e28HruJqoglhC2Qw1SJMVRaSTCS5nClopzQTbyhMB5Xu3p6p2mkYv+i73coIJwhOjack/OCg/cd//K+Er85O7NezS3fS30PleGBbQR9WJwyBkCWR9yanjhiyCrEX5mdwCFw/fNfrRYbaLUqtYQyf17EmXFF7bWMN1aVwNeYE5Arj8MzaaL910OC6Pz3+0Q9/aJsyu4Bub9mAYX1vZxeD4QuIpziMwJpb9IgdGrDXeWZETdZR441JMaoWkXE+EB3gcMJR3IaLuIHeSFljgriBVeMLuQ6qkLyMKkwth5m6GlRRft4TTEoKXoR0UkXO8ZQApBx4iv8GO4t1VL7408plvG7NOMva5DoUfHEb3PRThhVZ9fGRBOlk1lUZ39VZCMZuw5iodU2TLXpWiNn3iyvC38ogy9+F7UQ4GTQWwHZC1VhPYTrssxphwWVEQXcF/qvOVAmeaPa0mshR1voRIEFBAhso0nndZn7ElTS/s8N8kSXop1RsmGQmEAiU+QcG7kxNrWxvcQ4KhafBWl+gvEkEj2LwFnqgjuFfDmyx0toKsobFe+YwH385TCt/hp4j480FnB05U+JV4XSjozLFYYMvOY8sPz5EUMJltc4KcXhyfkUtFpyvRHK6gp6d/e6u5oFSZSfPGvJUpKLbu9Jy5GHQWkxLk83a9Nn49KHrk1rI7qbqeJjnZinK0pbPIDULpeL1VsoutoBvRTgQGBw2uPXQ+BPDDnWHP9zUiX19QwKQ9DHclsOFaDGPKb7s0VE2mUf10w32iAPKNU1tqBPNqWM2IMk7xwnG+25GNDlcKIDBIKMmE/nctqyuAJD5Mak9m0rY2ebJ+WQapumsYa1YwSZRgOy7L95rzL7jl+wYygNN03lC2RISjdqgkVcaEW1tPDtq7UqPkKnDYJK4L0+3d3wibqXdkkdz1k9OTbs9b6EYzcqFG+wedICCv4v4qVbNJ8znOeW5hAFaRXgaPxuj2UG2UWxxRVjt8DQHfT9RroUlQks2IBxk9NfCYUeePX3w4Ufvki583rbD5jewvT3ZoD+k+qfOQQeCTzZTINXr0gLsdXRJ+yyUHhNMNik4wm7+kZCC/yuC6k3Xk0h93F+fXlmbWhAUmRpYPJ2supr0U026x4fcxSF37CoeVuydl8UTS+5RaWgXOqF4gHEcHxDAp/6S1gtw3VWWDXmQGxh5mE4WlEYma8CqUBlOj/r7a5eFLc/vf/Sz/R3VZR9849d+8wtf+NLxSZ9pAm04SNJYRuGK1KbiyveH5S+EUcxnUjHu37iTkEb07fL+QnXy8j0k6myh5jA4WI121MINj6gk4HZuoojCT22QWFf2T511xgaiJmeYSNz0wqLIGYt91NeI+GwjCZMkFJkP8z01bbA5GwwFWzDTkTM9zuzqZn3jZZH/7cBE0BYMxuH1yFGwwYesnUPqAsnK/ZWl+cmJoamZKdE+mzljDiScF1m/YHJoPWECCRzzQ6cwgbNyfbSlILvVPU15CgIsG/HMxI8atySa4r85HGI0q6wcg3Us5uxaYoFcYNpn5zbh9HDOYqF/i7X+/CliIauErPDiKExFE4DS4wKjVIHM92IFrCqOZ3Q8BVypvY65KipLl2S3eX163kVJigfeElmf4JKXh62k/CnoiDv79LLw+CBN7BJTLX9FtaxOui1aRjnyyKi9+H+BDXsHYIOAZQGCguGzbL3qZPVZ3WoQ/vR8X/wLlPLP0klrTgjn4yNshAfPL3ZWNbHy1GJehpLYUTblRGUZsBGSRgAn9uC7F+U7RTy88kJcuUyaETurUh7wbMobhYXOWTyB1prPNHnqouwJtPmi8zp9IJY+NyBlgUVhvrHoaPOGn3mJvae7wYVELjPNNfmXClTOQ0wkCU9luqDtV6vviH0SQVRUs4ix6DJRP8WBAKESyqBSQS9AK648X8r3wlaK8bG7veN6r8nnxwds57MzDCqrKXBZ5JfoBewjtUfoUkrrCNzd2e/PszFHxzrH0oelfKEbmlTeYopGiAdZGC7EUQws1qG3CnaM25Yt2lSYQSr48nr10JBvZtYnzMFVGbi+EFd8DuDKY+mBFptGYZueWFTDwwfq4+Wwng2YXYvMAb72+sTQ7ExvTBcFhoZkzOSCNEbjtJRBPUuulpkakcfQ3mEHzKCFgDK0iSabNK8CabM1OgMycOwp3nDJjyp9hienYWB+ABjA9y1IoqnS0FkqrgQ3E1ZJjwDniSvEpvkEtQoObe3v3rv7PvbsLRZXx8+5+RVp/w28WQXD2YmKSNkQzdiW6bpExniUWisLHQO20HBFYkQyxaLXbs00G7gDEUVWkVJeh9lKYo4ILUjuITQwnx4oZ4gbkF/SIeEAxdGJpZy+8/O3Djv7dKz52YX62Nn2xjO5rytLS2Gfhyeq4RlQErE6p4fNmaXpuSU1PG2WW4rqsIXS4ISqYsE5t86Hphma55JEB5PD3UsLY/Wx/b/5u//Gl3/ldbDhsCr1mTI2CK1k3u7ttIhdMj4tz6Sg6X1iBYZGNDYxawzYT6ymUtlJcdOeJysE/CQDrT5ashU1k0EnBZK4UowU3abKQrIVJodsrYYB4gmd9uaf/cl/tfn8/tbG/U9/5gurK5fFpAHtdNCDVCU+gcOmXCNoW6BdDcZySYEpCnPUlFBffg01y/O0RtwuECLsPIZGEuTEqtwiqkfFZVnB9BgedgxPt4goMUVp9gTyNSy1YqdYAognm82IE+TGMjLdMI8UkcZDGOnEcXCu901yfjw55AEi53r90TEOCzM7BhkeL2l69CQPwaboUIqC6S3ox/vuPrhXmxrT+hOi4ujV4Y2yyfp7e0ydmo6QvcPNjWcCk4pqnbBZRCSbjHnsF/8PmNLojhoHX3lpeETGTBATtwx2L8L/1cmnvtgE8JlSJ3M26KDeTsuY029oTJAhGA4nXWFsWAXJ68PAvMEbQy/FNJFyHeoDPbht2PH+MaRDE0WlDlVaMu93WV7HZMY54w0l7cNB5GbwX2u9g+RyxnfjpucAuWWxI1HUHvcGpL6E6Xp1lijKRYZRZo2ZBRXQm8/q+Pi23JzJ4HDIy+oVRapMDTRwXD9FjBUjJvSHh+wf7JSHlMlF6YmYKtRkcnH359DsUg25jT/K7pOKq6L/o6FgFowKKQgQk+/OOSnfRXLB1Eh2+qFGjwxrFcy2K3Z9HF1EF2wLs8Th4F9kVeQKOZ+pGGHmXz6hPuU+ggiYoy05KliDUYRZNLSywO4ovxpQLqiuzX/dXeImZggs1tvJ4HdAFFhUB0zyIr+W60NjlKlyTaAa4BWoOuPAlMU9vc/iB3WKgWYU3lQ8ev5GHDLRj1odbShP5pvKaMY4YMTLQQKGeAhtS25Ot40skzJM51EZ2z3pU0QB+vy4TnSbkZcL8aFtwPfJPWMkvkaBJPrVe/UOO0PDs/NzXLhIwiLb2olS1sbITk5p/dnE/tjmp6P8VmiMC5XLRcso+pGiEQkBiobps5Kwa/BVqCQWVLzadJboNJp1ROup4F5xkOjURh1AuJpDkqmXWpf8LdDTOcwedMZZAbOsS7Qosy6aRNS16lcAD0CBNNhBXJPqZ+vPHj+49xG+YxTCS/qRay+wdOmyDaxqk0PiFlLf7ZXcVuO134oXMY5eOvKYrolaANOfKaTOELN0WFBFLHvbe4qOddMpVl0ASDJ5r+QUEIbhldiD7EaeoCtg+xecC+2B8/b2xqNHD2RJzs8mP1CGtNomW3UwvPYONGNNOg/SiJOcD7UxLevWniRcZpEVYtRxA9gCkBLhuWrWVFzrHyhM2OV5UmL+ja++/mtffXFlLsiZnjB0mDBYZAPApQZLlmNpEgh6UDeYejbU6nTBkLw3S76EZB/FgayFsVRNnjA5KTpQS5jONcoiCO+QUDHTgUIfvHJ9HqBn8aEu0RP8eWP7rf6De+9ubT1aWVb0DBJL6Bq1ybTH+qRsxE+Snp+pbQjvQDHmiE8rQA7PS+574W4smnwjS+kF5g7aRp5PAiWCxnxx/OHpKStCGWtaJmsmDYFIKdo+Kx5byi2WA7csVibf+WF0fGcMKN7aZANU/JPh6UbfMTJzjf83HkmsCcPg9oyaw00TFj+k5wA6CzZTzXjEr1+9tLWzQ1NjIMnVssno0emQXrSeYxBEbUiSuspampigDInaYub8OCrhYroFn7OvtbVgyae3Wb//bG6OkgS8PKl6WU9DDTA7PmPoq1nHN5pNUUTB6kQuoLOE9tboaLW1hwVPFhNxYaasRg0y4pA40+Elr+j1RJczJHIiVBTGVaAWngiH0RlsSYOSJBLGJ88pWqBEgsjfGSo100ITYYtoO1RZHod1WNnz424/2ioZXz6xet+z+unfQpBZv3BmRlvEnpUI1wyph39Gl/E0I8niXdgIGVgObylHatmMrDrjM7eOjPZ2MYIYP/765Z/ckjO0vayHXylPngCveO1jUaFejl9wpD96LAllnaIXhvtHnkVlxkAb1KT4KegzUtj9wG8bJ1RYLUtrKPwsreYilmAYH5TstiRH5L1gHQFtDv4gYTIkQAAFij0B5BtuVpT0xG+YXMUVJbMB4RVvVZU1e8ET3e5in45AwPdCIQjmE/0oE0/kIBd4vVlkLdNqMuvlPMBf3O6a8h30KgCm0IpojTgJGtFfirjFylBsqNFDyQ2acYvkOOgtTNfmpmpJGDw6zaZOIPULIzuKEruUaj/Wo8ofcZOZcZfLWz5e8r69RU61m/DRsV47O387QInzgVdBRkBWpCy6tQu7CqOSdWkD5ijR/BJYtrYhfb5siWEKgEbPb19b0/RVD2/+LkZwAJOOgNHUQIAKbVjh1FgSDuExMTjjjymqOfjE60tWFZKRM2YPqFRHqvwCOVQMnpAoJU850KCVdFfQ0mU+43WkUfb14TZ7TbOi+vMma9n78ME9jhI4pIWe8egp0Tsa6Q+Gp5r9cfvUCHjK9F241HOtRMNeF6UYnk6Jc/PzEnPRFPGsukzSh8SzSjlglSBv6Cb9taA072AUO2LKYNLRt5hW1gVKi75mpUP9ifEgcRsPvPvuO8w0iYsSLuyMbh91/WHxSxmFsgTsatRr9VhnK4sr47XpzmGKN3ViPyF+aWdBargdTSCRX54I/VjsnZE0od7U+On80uRf+8ZnZ+v91u79cUXl2Cu8CoOnlqjZg06Z5FF3+KiDHSA4TNL4KT019rxc6ulmsUVpkq5ldNamQjRJ30TAGOik3iKuV7WdMWjYmE17uyQW+sF+Oz2YK2oqdRODZSWPiVhxrl67ujAzizqOi1mlVbyoJIJjRnJbxzuFGtGPJ5geLi5mnOfgZJgdjkWrN1hymhCMP5m2ypDyE/qhSyTNeGRYl02Stb2x/nRkZJWDNOxVCnHh0clYzUNyFJU09hZtgkpCvVH5RvGB0giQTyPX+CO81mjCQBx+LpcE68Ao5ob/yFkIIJOoTNH2ZLIrvIzgbErlPyJdLi9T4oa/+Lkvz803tzefB5HDGzJ07yFg4Blc8zRbst26dfP0fH90jPSYG5+YeXV6WsYp1zSZxJ1gG+KrV9bgEY1C30It+wiwBw/vmT76vXL1qhiVrArXC7M8uP9Q18tr127QrgCh184soCdGSm5FsA1JXLzsyRJ9deNkVwdDCWDjw3siTbGLsKWiFuJzOeOHsqXSEO8l6vNMM/nWt74F4fVf5DdEpWGy5ohbxvtZiNJ14Aq2HlAdeQVKcgaGlZM+gc9ncLxcHOKhZ5d3ZBmwUMhQ/rkMYwoUw55SsxLrB4UYgVUr0CWRrT72B12MiGWUgSUjVrWdc4RixDMKodmr9hgeOipGOsrVnjsUhp3ghrXhvqYx3BlhZTzLWC004Tw86VhuAFUmqfpEZoHS6rnG+MTwiQb7kpOFQUBTUSneF4YSlynZYZYUSfDklyNyMjBsy4TgVYRbuL93lez/SHJiNeRR4ACqVoll4n7qV8hBDrThy+3QqEg2MbbgpigGfkrp84QCoDzB80NGDqhM7oOzOioPicgsAM/p6shFRWNwS8BPGvHsZNPos1pWlY1jBKBsFgJsOi/gQZS6c0GW/d7JdmdyrmPjqIgBaxx/qDpFcImxFTdo9FP8KO0LQCipVbQH3ChijKKB2UVY0GT55eRljEr9AfMA6Eyy7/lkTPVjfdbknTO41FzXho8WZ2v1kem2xMnVud3xs450Mw/BRcZH5SxcWpr/ymdfv6zmaHkle87yxbDSSnbWUwlR6XFuEumNW1SBtGjQjoXRnDXDcbh+MUVV4LQMhT7Fxj0f4U84Gh+dnpmut3qRBdaTvou7BIChhDh1gRSJQhgWCIAIFPicmhNQ6/ED6cmx9fy5fUOiiwHvVH1mYe6l12+xne7ffzwytnXzzu0x5V1n5x988B4hbeYKpVNXdHZ2sL/74OFDG4Mko8+GHpPp5rCzh57zinqBmTQNYulCXJWmmfRTS36x0gX/E5zFDfk5MV8BjRJhsvWivUzsebww1zzqHhx1W+S8dAZxEbuLcbrMzqR3C+1Xl/1h1TAWsMYNdTQ0TipDqArl0Dc+H6sDKBpjI/2TtuZQo+dHv/rFz0zXDkdPDieHupAqv2fRkQqxpn4cYqUkEQ5DS8+CIcZMYh3Rm8KKIgjheDkZBn98GAsszGtkVMp/iCtdYlzKtIve4xcxtKm6Hd0kTfLzz9GJmtMzCmO5G2nygEhoYYbYiQIylBGvV7rVyTjkKoit5U/P5If28CQg+5U2kT/z9l86OAO5zSsHJnyIri4bimLFYCYhjnvbb/742++/+/3l5SWpmz/4/l/aIQgAwIumG2KADohKJqqUbyx7TBBhjx6gOShlLjKquDEx1RhUCC20HD5SxViit/kT+5XxBIajdv+QdB41Ex3xBDItQCPN8s87y7NjbUrhRL3b2mqMT3/rr/3G6tLC1vq7xhzlGSFBbLNLvkl2z8C/Fpfm33j91dOzR+OTHLWEqSydxbOzpbW1VRLFitEJIF6oqhzS5tS13r55BQ5j0rILb9z43Msv3+IbcOY3vv4lWpMLkRizV1GBmZPB+mhsb6/LHhTl7nVsEd47lnJifx98JFEA0+JHAnWuvzQkj4gpQgCL4Xyxh7jtpu2bo11Ac2YOufMpX758lWth7PI4VZUeYsVTvqeRTPSeBH4Cd+yJ5kSViwKVpbCxAj8B+4PHldrKKPGywL4ipOix6Aa7DtjDwz/ms0GJ4G/YQZiCP8PbCmvIOf/PL8M2HMKnDcBvORPulZsoQRlW9YCYumbuwuNmfS7SETsaOwY6nB7s5GjroiRwI/GP3kd6kWojEikGw/MzTc4VPecak8NcX3rSzdV5A/t1tROjeh0k2AZfo8gN2WFM12chd3VqcIBN5jurPGooNlVE6YUqZFxhGgE6xMzknQkIKFB4yVAtLPWMhCCVJMnbRu14uH86Pzq1e9IRHpVhHXEVhh7f5+l5tQkADhqULbI5HACQZV55NAbgs/qpgjzFtEAzm9P4wpSmjOA1aqKB2moBgkQ/Fwd/HUM2A+FLsAwno2fjzfr41MrS+XRjrLHA5PY0G82O17nss/IQffRwMD86fWV4+vaA72hCqsJ4zc5y9fB4+gVg5RWVPZaeDsINZTPblApAHz6QqDUEc21IVFy9lIorMsunFoJ72ztHNxfv3r2n/7cyLHaFDWcvX7myurJiI9r5ZnNhPp0vICC0hlFCC7fv3NJEVZs+QgUisZMocbb3JRJsMtY7bLHUSlXzsIQAsSr3aUeYPa4GqVyxWPawxUF7/f25xalet1NvTOOa4AM3WVGMnMmJ6dR1nI+oIJe1SZzQ58GcEXDW33v24f3miLW2s+/0iG3oFxcs7Hxj4TPTy7tbu7vPNoIv/gn5NKaQtIol8SpllfaehAyPnj4DOY2ABZ8EZC5dvw5Q4s/NyWnI7kUOYK1NcR4wf7PRn1UswMx//MNW6GiqA1TaEhP4LFvnzZ+9NXLUW52ZGj87fvLo/p3b16ioDx/dc8dEY5rOBH3RkwAbwYA4g7D0pQkJQZpy6asFNPIuEV1UL32ZNAU+GdjLozU3dfz1r93+1jfeYFyf9w8iq+S5FrZQmoNIWunDp2I5xF8T334EFapPw4FoOo7oEVAw7thCI0aA0cBsPCTnE0/FVJhEOJLLEk+PGeJ5LOrTI1kg7h3f6ft0DytqvH9Az5TG7N0aUMom5THqGT4lxXNgHwZCUxtLQp0OPracH9/ZPVR1JovNLYpkAZxtD0UIhnhconflppgIY0OzMhhS4sn5PXRrbfrv/tu/NTc/LQmme9j9+hf/x1rwSZZiqxFpCQ2ewEG2oLYMHdlPSuLmF5bE6p5vaK7fRpj9w074LT6Q2Ho8PaZNcUh5HJAWp9FYksAm2UWBGJIBQQpKRFqOMILhbILl/vw+OrS0MH/96ppGmvvbe+quVWxSdcgPiZS833pEaAytC+PoxGnncJsmtLoy/dH99+3fKdi09Vyy7vlH70fpjFPt+JhBg4hkVstToxFCM01bJD+RRjB2qj63urSsOkETZbnUNojBWFjhh/2EYFxweNSxqWa3t3/KkBgeef4kwUi+EqnF9ZmIhOyHjPuMDE83FouP8Xx+QVZqnwfk0uX5WzfXlLvPzC42prUGmLh8bc1OpLX6zHvvfaTqbOxbv/oyLpjit3rJOAhzi7bEP4ZFh0GgDiDBNEGYbBzjqWf4QsUisSL1C+t0X3gg+GHcXGnwLfZBTBAMLTyNNPUEWOF0FKy4HQgkwsh84Jen+4bKy+F70CiKQrRd0M9tF4ItPzm8LL06vTLaWdghE1o8hjyBnVlJwjs5OmyucOrwS0MfFy6Oyj0+fMR40PnT/ovMAUOCpIkD8rlr20C6orjYI3WMtaRl0pXYsvGHkzBFEYphG9B4dhkpBpdRZ85R3YyE6UTp5smSV0uwuv34SP6orhFGNn750pVwwcNOBHCh6AIvHfvMSBgxwQkKd7A6FuqIbGzEXYpHcd3yKywx1KSvjcuac42CZ/hKeQeTylzL9REbeYrnAx3nJ/XQWRSSlF8mApCNDU2fHk8MYUPp2ZROh1aJiemO5L3TVcez+8mZ+l/wicvF/IIVMVyseZbW3/6o1ZuScFRDhTiT1CDJuJ+4c6driZTITmhAKGFRc6DmSf18+IP3n46d7c9NjzSnZqdm5ufml9CYbi2tziHcnJrWX0PcRqwGXRnb8FxtycMEVOK7wh6FPNQTpvGp/O9eu7/d6x/o+NfrHgHtEfZ1Nqp0d7Rz3jnSkmP4MHnY4EkaYxsjcsqwOkY9TpAIetpI03IqH3o0TUCjnVn8udk5znt7xj/84OdHB7u18VP2ioCcnWVHx9nz2u9Oa77Xx7BYfcMnG1vrqJ33kXOfl9+RBAnvVvHd6WhOIWxweCxOMJ29P/hObZCd/e01yKkbAsEfUy/ZQZDgQhswsE+WkpC2pr2D7vW15d7uhlzFmamJ08OWLcqvrC0TR0IsBi/lIanhDGuqXvQLdBF3hSpvymUV4CVRIFyKF6KT842T72oO032jPiJZ5vhzn/9UY2K0q58vFLClZzDVM6hO+LDghsAWiitIbHrxNwdnI6GqK0OpYQKh1cIT/E2pC29IAiDaj0pDn8uZYGq8hfRAgSfPLbeEHQjsYh9iZsgwftlc4bT7JfslfhotOwjrjtHUixfykHaZzAL4PJgUdGnv669KtkEC6qFkNXsvD59OjPLiysZSmk0TSJgpm4Zj62qE6USHa0vTa8uTv/4bv3Z02kNMgzMlUtsSOYkrmIxFpKFp9i5KXQegWCjsRpPx3uEtXIgkiHZldXk36MaJ/CG/tCTVcCrpx4WnjZwruGbAU445Pw/09Yqdm58KIMsnkw8aeAv6ol+MnFFa1BEfoHyylipN1iJA7Mg4MYqHj57eUurYnGb0X7268Gujn62lc8qMnSBF2bKnR31CWJhponEyzdqGpdik0bqXb5nLvH/cxXlkvGqbIvulvb+HrnnmYaamUO29rjgoBwhoHigx2H22f7DBrX350g1yNg78sNXR0Ga7zbSdnVlRbo4T8isuLTePTw86vXW7TaoFx7dsx9htPdtcf/rw0d0bt1+7duMOtU1t4divfvUKUATFVOlFxSFRLDDy5g2IRUVnRiaRhvhQ8twijQpO+TsOdwdQuhJmxjEWoQCPSKyExMbPeX5Y/WHgLs6V5fqCUZFVEVdhmTmgXDKmivwLKpcrLZNfRadgZfWTh6CwDETEWD+38nf+EkQto4fOKZSORiJtBDcXrMlcMvQiurCiSKVQUnaSjqswcazoYgZmoC6WcMXkPjmej70TS4qdJP4JEaGXBCJbNbJKkQd5Fp7ulclfF/eXPA2CWHG0TLKL1lTrsNlGG415mbDNq3dmzsdmRkbrZ/6NTyZWOK6H6STLlVAJBEYEqFCu9Q3TNOSI2sJnyEtDczLnszohyo8P8znjNQIeA8PRDEsACp04XB/pUo54LtXYU+XciVVEmU7M17oNKwM8kwwp14vWaNcK006z471WkCzfyNnwIDJYtEMyvi5TsTWLQ+dCFYmU0xG/sCqLUDQaQxHzJqq7gtN0FfJe9oXMZGYBmvvUq5duXVsgC6dnVxZXr83NXZpuLtZ1ELPPiL4LpcGr2XHWGLrRRhlMVirJGNTKskcnluTLzyNmv2ePVxM44hzsqTuJULN9VPuovysoOz7zdOu4d7J12AkMHQWcBUPQC3P4bFhlCWwjqCqIucDSsKgnR4+BKAWr+0/rw30xNR2PFpYWZJ0TVAqU5DVnDe1VrqpsevLq9Su89p7DeeIhwGeccrqajUs6UwgMOHQtwmmU3XiXLX+FxTFrVhddxY0mZX64RsZZkgA9J+IqIV4Rp2HRL3VXdprf3d5stw64r6QWMuEXFuY31p9BWjvESaznAxVeEh9ki+hhnH4nkJQykdoMlmE9j03sSZqPpjAaWQFy6gAE0/CHf+9v/R1KryrlodGpoRFxiCPmGTrDFqPHAhbqg8qfqFxZlNB7JZzKGjkRVL34jy8oJ6m27gm6h/GE/CJ54qGh64UxhCXl6aEBssp/C5PKs1EYDYkUjn/I38E0NFhYR/LVIxSjhjLl4gCHgWie6tTtQtyUsXoRUjkiPZK/MDU22VC2hcwPXRNmzNvC9CG07JTJ0yxnnV2ezKTuniQiW2Uq0B/YXtyWXaULQCykUbvapzGrM9qOmIT/2ObCROz3QqNDwVICkU9kktETcjbtG0nzAWvtjNXwq2BTDFCV7hHD0QgcfjRj8HBl4OB2zUyHRvZ2EbEg46E8+/TnFOwkxMBDO2ot4SdVTw4Lcyjr3t9sK1z89GsvvPLa6zduvjQ01ECCHqP8jsbHEaFEQbGKHXBEeRlo0F7sEG8Ep2xOFppJiYNPZXhbm5uWaHVFb4st3FODD5z/5LRxcrai+Bl6rD/bCNth/JL/RKkUfF6Kw7Pdnf7G8322I2d4n7Q/OlD4t7w4029xSulV2RmxjRDL/qjz+OGHuzt7X/7K1zm1WVQ7Jo/mLXywItgQnEIP8CQmlLJd8okWCqOiPyeNMkgHdQJa8IuMgCgwFbJEaGErVsFlZyMKeCgL+bVC1fy3HCMp+3J3rIiIOlfn9XSY4BgJF/HIEs5JN9g5L2icI8ZdQdlQxuh5LzInfJfREbd4RpDQ9FTRrwgq04gWk6fJRZaeG3vLUocWjBc6+0qPKJjghfhAhIRY9kASx+RcuTcvcYQ/FuuKpp0jalSa8hW08Tu9XI1Coi85MHdSirA7H5taEKWfGgxP2356aKQpSU2jWPZVY3Y+PilkH/OveigUDPgiT8M9Oc3iEHCNTy/y/+qoAFmGF/eR+9GzV2Y5SFuVQ2zsPo95bgskCxmXu7Sz1NcL50flJyBCYIcREPcy8WQTlcyUOIsp3KOcfkM35q+GmMwWdYU/0dMzisjpmPY5AC3yMyaXLmGYR047SY20ojgCRXVGOOtcszko2zo6bNu+Ev27TGxDR4LaxHxjdm26ccm2lSOjTTWRjIqPRw0fLL5Hpegp2UJUC4pUGUUmN57mu5pe2TlmYgqpTkeGonOtD3Gqs+H+0Q3G7f7hSed47MGzzvPtH+yIlUULDlpXR9CkHAQkh49fG/WkPBCBJmIsh+2t5uigu/3oyuIEU5EHcWFqaHL45Mq12yeDRnu3vdPbm5mfUcV2eKajVXupuTzJD1ebQh0SpEQI4I80P+5QKIeOl+aaa8tLaUthq6HR8fWNHVRNDSepowLErxtebJAXvBsMosQArHUDm2GscWJqrn+w//TxfZWXZzptjJwJmB/2vW6XUcXyG7EP2NFgarqma8noGOvtMJUa7JEgGqVe+2EBeR1L0s6FJmFjv5Gh9tjIoY5D0mFefOH6xPQC5ZAOzAVUV5N9hEG7mxMbPaEgUHNjRa+BpTU1eJ8W6AKm/lO++vXiTL75fwi7ICdOQsGC+fSp5B0VPuDHUCgOHgiEU8Vqw6mT+RtdNtYgRMRwoh4W9wxScQtadpXoHDmlsyU9EkkfdofAZfiY5Cc03GLbxfj9NM9saBtH32Zyls4aIugu0rUoO+dQU8QLRoZsCLa/vw5o1kVbopPT7bNBO72KtBKN0ZSSamPRbIu9ZYktHBUrWzhA9IFID2WIFoDRxQtq6iZa6mfMOHlhjoAi1lWMewQN23My2kUBE0WNVLQ3aQQ5W7givrh4phLcxYo5AsRrJsJ6iucNmVPcbHGvKeTQhBGc9A62Pnrnzf2NdTCEWZQ/qxn4nZ5NTk532sQW7ADYsEo6kxF6d43jxQ7S7S1Ptj+nOdjYg1XHltDDM3w/eZ3SDuW7Qgz6EQ9NSr8BAUexNQnmIOKM1z5+9ODn7zw8ODhZWlyxe+rI2MnSiphqtjQSGdGHK5BJGxoy8dgOpffuLusUY5m2onvHx5Jpe0HBmwKccBlaOV2RBRLGYwaFbIKDwYWgWvGMpS9R4f0gm0sq+QecHMlpRFEdATMAB+aRfTmfhK7glecXGwvqkl95eDlbLvYSNZ4lBlMQF//KUSExYilrmVeZQtKQLJyMOAatRwT7i6TxMteNlqqlGE9YeIZhBJGTKE4/FcqlmUl9pAVL5FDLYuL4ZuHG6CPDtKSmCEoIUiliJb3ynBgyKYmkXhSiwWdQb8UNwUa7ztNz4aXJwVD5JwI0UBQ19OH9e0ncpcmxzUpUODCKH1BKTL4EVBlgUQzFDqemCmQisA3W/80aDHj/WFDSnqNnuM1v5ahfamZcRdLnRAXt9JjIvsEuStKRx4cLWDftdLpAw54mCaKX+iMTwndiNBshpYk+iPLxRM5SGeSWx4MLZwmWlAhE9KkcZSG9Pn4b6ck8qEcdbNuOUDLNxkZ7gxE9e2SlZb+4yYl56Wz1GiZyaOeWKHDwDfljqt7BB2CmXlVMuTTsyZgqMvaZWEfAJbWEPhPiJTUFYAxIhnDGMjk7Y9jA0T4ab85f+pM/+wkLrFEYI74AayNeeZH1ZKpaWfdsX3AqUAGkeL0nsAt7rf3G3Pju7qOVmWHN+U3h1mpj/7hbHzlemWsO5prrG7vbOqQf7x7VIt93nuw0atNUB2Qs+Cxqh9SxH/VVHs5d6bPEpLJwalQn6k29JyQBswxlUhkWzA7mJsUFnCB57CrLHyWbEdzrC8YeKWI9P916/tTOIEoA5qZnONDWt55qsAkTNPIZn5iOjo+2rHERgVnWFG3A2EhCqYkggWkl8Ghl4zjpH5319HW2/6tMkoWVG0PjR3vb26eT54psFuesu+GlyYraOUot3TTuVQpgoTxIV/RM74vGGFzOu9EOQgkpmZRLcICInMggvwS1rRH6KvgIQcMngqUx0eEUni6/A4akCDASipICWyqLI8pT4e95FtzwvFEqI8hg+9YqUvm8O6Z44ex0d+dgYpyzjuZEmT8B0WHb2Wiyt7aSvCEJQCP1uFX4sxUkHPbtlHJkz7XtZzOza8f9XZkXgzO50/JatQIYiBpmYh4f2VNSqs6EhI9GSSAbd3Gw4AXG4IyrItNgNT6OncT/bLz0IbgbPA93IobhMzxmMFHH3RH2hRFVUPLJUYsYgStTtYCBKCBpUxImxHHObok4jJvcm3RjHrR2dyQr6cXkJZpjPX+4tbf5pFB5omKwAswpyuJ5uzvtmSkpGKgo9p+f9GVijChhF1ptdbZIvxaxMDLMrZqsv4POpdU17w8zDzcJDfqMvcIXig7SqSe6Pz4rwUvaUHZFSCr86URt+vatl5szArFiicft/XWxS1E9UPDCw/NQzeFh680f/2BheWVMalZwAZDiSIvmHsjEPIdiIEkn4BPIDjT+9s7smhZlhz0D6+BELCD3OONGeJPbXXrBrmA+PIYo4WkFovkAfiw2eOo6QtHIMMZA3nxlizqRqF3GBdQwjzNQfWB4bb476WL3REKGbrOaOZ+7Aqf8Cp8zDCM02hh95b06X7FsYgb6k5fSTfFZqaaemsmL4HekHTmQ7TaQhHZUNIRQW8GIiI9qogKUpnzh/PSYMgb0klfR8VwIFDQ7s4SsQI+D8yMoZ4IBWgGNSpMRj/v1X/sKeyVmDpW2wN+UCgS4niBEDgDjiOaJBTeAdAbjzqfXGL7XsFHjOQw28/yxzvyVT6sSyOfijDDLWsGKdcWKMsEycqwalYX/nU02GzGYvCk0YPhhEz7PaWfepZ5/SPWi6ns+JZMs60oLCID8Kxe7kzxL8kg5DUpoL8Jbodvhyd7G4Njmft3zoe44oXVu43O+x/OFZiPR0yZPOCrLu+ObxpGG02ckMMwnDD1F2ZQ4k89KZFKBc/lShsDHEunEsebTT+5G/IRyhKvWQfV6k/u+ubA4JRjkt4J+BdVBK6gYqVHynbhVPZzIITcomCASRcxiyevo7A2fdKdrp43mxDe/8vqDzfa/+O6fLK6+cuvG529cI83Ot3qt/nEfJY92a32ZlXmITAT24xivS6tzoIFQ/NtsAblt003cLN5iyqM9q/p99pyXosZkDzYFvcf1rwrGlsMowSM8wDZsms+ODQm7f+/7P9hYf4ITyq2YHBt68uRRu926fPmK3RT399sLyw3cQrsrUbAiyEEpXA4moZiCURNcXxxaCTmBZ9yEmgP1qYm7nZY+He+8/+CbX3tj4YXa+fGTpw83Fhfk8Lou8iN5CeUZua/Qk58CLDReDCyfFeU6nyNI6S2OOMsK/8izYFDFJnyH3eUiLMcKIU2LG9OJ6QJmWXbfoQge5Amwu7gQwi5Cov7ENbOcRTiG+FnG6knU6KklW2jWb6zeHh+bOR+alEnKHgDXEq0aPenEmxqUsiTZ+QWZMjQOrywsbB/s1ieEedRdoa2OfuKnJx1GFXFlOJlgSMJssJMhvmbqkrAzicvFwm4CTXqelQ1S6qYG6aPkZyHwoeJINbVoXZQAHx4DAmn5GSwMvMincjKAYwlnmsXuRLiuDPWZv24j/PXcKr1cDJKcKJSgtdWr6qt2tzYrpzQX3OrCtC3nt/a2PSMtoHllUH3GyAO3peZDwZg+xcAAOJ2DXaOiqt64tSI4OtIotVy9rliyerd+u9XUUdT9oBB04lQmJ7Bum6nb/0qyJn4fZwj/NjPAqNoHXYPVVn1r82BhqUORonaMDKlDb56d9HSkQr2jE2ONqaYdOHmndfWVLs/XEvBl7YnVEGs0Zo9GYGW20KA4HqJ54kpplBRUSuTZt5BvQatQ2sdHJIGhhFeFw+UL7b76E+t00rMiDzzA/XlYEMssI0XspRQxmZMZjdsq/LuQMV5jrS2v1XOLAj4OQI/Ku9zh/6QXvE79YwZAm8lJmORdcF/SgS/uzMWe4xV5vgt0rQvC+TTI1AWiPIPjCLPLtXuw8CBOEdH+xOugUOREXuNEYfeonAlcDRGCJbJSzud+2BtLNWWV8uzHuYcg5GDocDJBAsOx52GmUQEVIzKa/BlJE0I1/HxKrcvbyp8xsxNe5o20ZLAF0iUllrtbhpfrzP6kZ6oZXig3i1RxijiSij8vXKD6gvwtF89clNTQXfn0OznhD6iMOXLBR4MJHXgXgNohizhPzY23wPhKPJuZi41Ny8TkTemjpFBRz6U0eeZHkoc2LPin4Vw3xEk5VzQnp+jk+fmZtgjPRXAHpzLiVJW2jNRrwleDmYEnqg6ultdHc3LaXxkZY2ISRmYP65S9QprxsbMkNjIfMyt01Jg9GV3QgOawvTfhZw8th2sq4OCtRbrQ2fPeFP2oES/JR8wh7954vkndprPLp7uyPPm516+sXeusbz/64Zt/8fzhB6986iurl28vX7n2vLP3njzE03FdJ0WYgUBObU1CJeZx3th4/nxO0xVJkLx1Ew0rYDvdhDlI8vbBEYmD1ZUGFtqPxaQuBBbUqmaqwRdMOzly3eCwZaeVn/7oL9lDS7R+oRWVXN0DLhOClc6DpPkA7ZxFJl5weqAKiQFUOfSElCVNxeZaIFtFYni58djYql459O77H77zsx92D/76//J/8Xfe+/m96dl52w7rM5c8B6wba4FeMtnCanPGQ8sCBU8r2szy+Cu0Yj2DyBd/BptDXiiF5upr4RQW2/loW/kz1+fT0pRnAJRn+JVEwabCZfH9KHXp/1J0qPAYX4LMujvSfcRawpJJH0750fP9zScjIzPsIiEiBA4F8rLByVJzyn9ijsWaz3bGsmQZW7LddCpIZlDkh/rAg1F9a7mefYSUCpkDlnHGEyH+EV2EliJbUFVu4gGeB0q8EqimkFowWSjLpCiByXj0mTCw30PeOcA+DsvgbkHL2IeBXbIyA2FUD6AWuGAFZ8Lw0KSNibxXOVo4W4LuR7duXuMSlvITmA7ZD7o1eV6HI7t7W2YXbZxTCKZS1XgF8z6jVrZ9YhIA7d70puAztf1190hekjBU9AQZKDJOqX2jNic/Svs0e+CkqZCHeILN4uinPYJZoiMAGYn+ydwc3Z5yxunLl6+3958Tqwd7h7VasxBG+kXZc4q7OdmewxPZr5G+1e5oEGNJVLEFX3FlgWTuhugGgXKUvixBkA8iXhzADA0z5xyAQTcN3jFwKp4Y8PoHx1jXNB1/JXbLQnZTxIBXpWqPpJUWCrGKMRLWb4WCgsRhmEv559G5vmKyhusJ3ukni5TX5594xWRkCghBBhzHyayIwyXlz1xW+H5+Cspn8YNk/iyqTcYcTpuZxNYzmDwr8QyXEgYpmDDUOM0cebCPUIWVzU1G6bOCEX9OyM/YvcWPhb/7Le8FPeOMkhg3sfsIxDQkxdnjH8rTyrMJQgse4BRCBhh0UP5VzwsozTIxqrwoTCB7wgZpkbNaNG43vu8sLZB7EclhDQtTyDplHhFs7krUjZRDM4aE7QwO6VVFIOTBFGemjEqUUraHk/rbCAtt22MiGwlLTz3XwB22DI+z/mL4p2PnodCsBowkp76LSap0rbDX4KTXOhDy4eKwCWgy2hWU6jPG6BCJIchV2o9N18a1ZeM0SFrt9JTHUnJDpoBHqlXOelNzGClEw9kLtaJNfLdBn1XaCT5mKhV5/DxSSQkkc0c7Qs3yTkaW+iezm3YVGky5Bo6ZVLWCWK+Xgn22b6KQa1Esp2DorNq586QrK3d8c2tXph25o4KrIYg92lpdOPu7/86v9nvrH374ZH/zrU57Y2bt9p2XXnvp5qtP7u7ZVz4JgNubB529VqcXrBwZeeHlV2aaszE702NJpruME/H8xsLIYrLUxid5S4SvgFpLQOKqd5i0scjcgmnFpiF9pOeBzMn3v/sd+qtCLXpQt2U7sT2Vy5jz9s72zPxSsznX3u8oPKCrSZwJN00GAUpEKTAD8o0cj8inkGwcmmWiwTdGG/6lpYvMU5kaNif7oz/543d+9p1/6298afT8+dXLoHY6LjQdFGRjsWqosVoe8TpEyzbOi6OI2FxQMNVVJlL9Wc7kStfHEx69E0YROWJF4RrVAVuTDQhPSRHUDw+CnQiHi4fVG1LzJ3aRjJo4JyL5xBk8OBGq5M7IWVT4yPWnnEXwc2xlccl2BSNnfDkQSoqTBvBpoC4dsDghCGq+U+y5p83wyVnn0toct1qpKxcyOPVYxhGahY8AaF2QYhh9YIpuDT7MRa4lV6kBwCuDLzAA3gTbrEIgUZiEMcdDfIHTYQJhluZEV8RISLJc6OfwZ/fhBdx64bkFJuGuRRM9HYx9qKb33nOuH9nzmuFiCSzK5dUrxyfDdEOueykP9mFVDgye+9022inynZ7tqeaBedgwmg/YXpEn5LULkqHJ/ew6bRz6g97R2dSxNRL20ILGXiHAzhfFbIijC6nHCZiUYLE6yBFeqvtJnJ9Jb+DTAW1xNJoaGZYe1DqBrV2+RcTasQDmYxIC7nIz6WuC6sJaYju+u3tscelKAQueB3Je5C9IEYbo0+GJQeDKOklpUHhd0CXwc0CdaLNSrrEOK2VY5QwNHD1Id01RlN9zku2Soqg4h3NxxpIWP0mgTblYJJw95KJolMOrsvRlJcJk/XkhgSLeyr88ECzLODOejDiHt2Uw1R8XnyF2bl1EALsjf/KECivyvbKTzKu4ngsb8wi8oJAfceia8sAgZQxncMgAHW73Y/X7KTeUb4BJZvghUM1nvqCpj2eUG4NuggTVr1G6nIumSG2hZ5laDBeyDSdjy3DTuKFkD2kzw77RzQwCxDNGnMiVQKkxt1A80iaRTdRfA33n0hFA1MMbUQ50rOgGl6kS7RJ1j42V0UrALvm4CRozpz01Wl32OmFkGZi/cQJZ3+oEJrI71VDdXqanwzVOzmGZ3JK/ibij9pBy7EFn5JxtJ9ZFLPViDQyfHZQw7qG6K152AwpczTtdqHXGm7bJel3KcSfZpBw4xt8Hv8QAo0ly05EOBS+pVphFJqLAq3DerH5iltmpTEmC66M2qSoaaOGEHQc+R/sSL6aGan37IHA6orAZjWXC/jQlmrD32ubW5sraZWCUII5XZkHSGx7tCNUEvJOTM1PNpc7u8/X1reVGf3WpeTbYORm05+cX/u6/9+V//J/+y+29B+sbT7A27WzWrr8xJRN0ek6N88LyIgvKhikyjgFT/WOmk0yWKIrw1QpQ/jETCaIlzsQT2DQAHmNTbzbSZRxLjYuyMHy9jPRVcv07b9u4wZ54XaH5dvqWywrBNeRV25egQfpicaSflxb8KqwTqwP3krKB4QKVNBAYO6oIu9cbHdYPa7s20VfBPKXkTK9Yjp3RoY31vc7O0L/4l3/4177xsqbwwl2ngy5dgIcGDE05tXTqH0JaoZzqgDGxCdi3WaGY7NDOVwvkgNGOojgimpwv8lJXGilX9NpCouAeAVQ5711O70F+ND8rD3QQIogQfC9kmXt8Q8eiy7bd8sKoMFpw1dlRWt4xBLfX7yl7VXhUMjigLTM7qrghyamT7ZKuIePn41MTej+M1ObtZ3K6k2JbxnC3vy+F3V5p9Ad2sz44hpIcNEll0DONm3Vuo36wvUI4SZOmVcTFLesnSb8mhG5RFObqNaSIZSVUgSmMtTAOurlZSDC0+vS0AiaMFBOCNsqrsTLGgucSwy4kFuIrgpyn57vSfHWsqDdmOdC6O3vDtRnKKLWSRuAtaeg1IoDU13Raws/SpTWN/6Hr0kIzvRwHNiA+39zcbEzNsgVsPtA9llEz8ujpExTYH4wvraz2jsOiMYSDnlnj2Qubu/sapKlvWZmfNZ9+hxo63t3vN2bnCL9OW0ML0saWaeKbh3q28K8btpJ1ddAbG1u7ey2s7PU3Xhwfa9QkUDeaAq52TOYElyrcOSQiJjjG9dRR9GK2DtAMJMv3/Bn2WURFdQZCABkFOFiX89YIiOhXhIc9KClpaTjrtjQxpXsSy+kgE0x1oErCyWv1xJeqZ+iRW7HPSK9CrpGBHv6xKCJsvOhjcRU1I4f3RjkMizOGfKbuPTiehc6npxj5kY0MItFyFKpwaaEggpIrqSCFa4MSWWs/MkF8CUnnKKyKcpNodOwhwPTSPARp5DJWFLGHxfup+lfdFSxzJgQWieWhcNMnZn1xhIqqr65I4l8ILSOI9hz1JOJK/SvgmGxED+mI2cb0o9JlbxzdX6jVCWilQ4z0eqSR57u4yBYsogyTYXjUFg2m94Upq6XyrDRETmgiLy2ilG0SbQLJ2h9d/4DYkbyLSclNHoQyLMMKkLSCZrQZ1dTxUE9ztUMdJI5qh9JGzsZsxsEXl+ui8h1eESzJ5imd49PW8YleP/3YD+cnHRupeX8C+hK7kw3JSjTo5jS32GgjnvWsXhQC9B/1CevFbIqGlMJJJV5RGuyWaPDgVcBWVtpXU0Re0UTdDjsQOWfgZIRB0XapAIpfOdKku5HqKa8bG48gPZGZLqUTDGw5SCrYb69mU3YgYrF4nbxk+iyF8LRzhCcwXzVDOzraXF6aHTnvjI+1+PCak0P/7t/+4n/9hz85keB7/2dzV8/f2+lde/Hz0jr0yznL9hq26ZhUVGZaHphR0dvMKH6NZFH4r03K61MSM4Ak/QWMxJoblWoVQsU1abzLDqCgcGxydZ30tjafaWJL3a9NTWRDj6NOOgmFQq0vlEUmhahhBxziLUDCxY0W6EUDi/cK1xC50hYrFIi36yOcsivStYPOIRpTc74xZOvHpcWmEtUYtLSCqMxVawZ5NGhUOnj8g4gO0H75CLJ9fPziWxYvf/3yr/kzFAfpkBzERBXhORFsBo7iCBUnQ+c+SUcKKyK54EsQpcw8nCIPOuNzzou9giDJnrhHuiWPra2u6Ydlr1r5bvJrcmQRbKyszsyjaAlAmY4KnNdnw8ezCw27Yh602jxjp4Om1Bs2VbO+TCuXycMOQ8RoWXSJ1sbJKyOEn6Nko0hI4XAQFQIewsyz2StWHaKnEyLS1Y1dLhzOmUEWY6Cw5ASfzMZJuXwX7I65YjXxXM5b2n/UVWibGSIlHBVbmlta4eRMV1iZjKOnshbOhurqHaAZDJCtHHbJ1Sw99fhkdm51Z7vlNdeuvkCW3H2wodvP/NKV5xsfNudnj88mnj7a0gZldrZ5XpvZ2NnpnPZH6icStfgePMbiYRrn4/Wjs30b+dx66dYrL97RSCVl08cDOes2eNvZ2914vrvf7lGjyTb+kJHBmIbWKyuXpM1v7X5kTd741KcVgQ4NM81HvvHN37hz506nZ1O2zof37n/vz7+/d9AyQ10AAO5CDhScxgEL9siOYCQWJlAkBCwP14QrfI8x9Cj0VN4ISPDX2RfXTJp4OktKTmMw1eo6GlFKYQCXTKl0TZckSygaGHM8DZh8iaVI5IRsCk/EiNFPeXMlroJzwbZqYFFAMCnMx/+zciQ5mo3tFV2lfKJB9e7J0CvE6A8D81kRDCrwImy6GBM5WX23/0Hw2mMriVV9UglcABcr9l9GEmFz1ul0XRyY+KMcvoCh2u8yQgIiLraPvwMW2jOzX1xAX4e2AAEAAElEQVSccbiINPSgfBjkmJoMWRgyMpRJYKdFZzQNF8bC1sqnPqZFt/bMQkykCCS2EjZEzubWGYVpeXMaPvkjZ3oH+6ElqMoCT6uPeHodkDu2JlC4IYp2tHxv1dtS8gq9MhWUarAhUNJJ6CKgbnhKSuREwUVZVKftln0ENIod6TL8RydPElcbUX042xj58J0nM9B6TLCnS8yh3DCB87OeepZMB6lxZ8SkiK5cyptkD+s/faqnBklJ00e3Me0bWWkLbqygZdL4ZMZSFrGsKWzJ2qE5Xq2Eyc0wpE7WYEax8PkiAg/2PJnT6BNP2qIfM0mzMV2/zWOoDRRhMMztRpB7Uh4G+oULgJm8B7y5MSHNyVpx3QSGqrtn52SmUIw6Em5l6s7NTP/mN17r7P4kTvvdp2svrI5Fi92kPtunWm6xuiVIgS5kE0dcRT+DvFzvbJx4KdJtjXrnfBIzY/bxOZgvVii7HQzpKu3Wriot+XriDu//9K3W3mZKTegsOHH0EspfbgkyF/gEcIx4VBZUi+4VwzripczQAlgYaq8MoKMjb8GrdXixJ2W9ngCgHlJTtaHl+alrlxdXFyeurk0vL02fHLfoESEs4snyhMiCR2Dl+UAXCBaKzkujw5hv0XL9YWi+R0tzb8RKSL6M2Le4DPydFc/jQrmCBdFMrUgZe9w14foe5M28Eq7IRGNvGUKZdHm+K0iVwCS6dc7TVPLSodE3f/Kds/O6iCQGelHOJrdpZES2dPE4l0qOyZoeOMwtJCBkxUQ+fzK0tS5QuDc3W5cQ97i1vmCbbbabSly9B8yb9Il+Dsy8ZFEoE+gdOdTNKJ48ncBsQAwWeKgGm6mt9gXsAzXeTEPN0vmvqdJQ4/sUA8sW41AjIKU+BxbREoovlOzyL7CKnTDQ7ErNwkl7X3h4vDk1rwpF/Peju49XL9W9TI7d4lTj8qXlL3z+s6SShG6Ut7N98LOfvw2WX/zS12698Kn3P3jw7OnO0qUXbC1wcDjUlQl6yis901y6XV+8TrTUZpfS2pEjlt2sW1ANYh2O9ETrxuZXFmYXZ+AvDbCv3HBwurh0iY42Nj47tW/HgWTCCopajcXlhb5UDlpRY/L4uLe3v23zR8gzOj/7lz96W3cO3gg19V/71V9bWFz+L/7Jf/noyRMOPuXpCSnnKFK9+mrpC3EH34JFoXwc0Pii9avgFh9j76pr7B0R13gbP878WH28oQ3fbLNm81EKWBopxM5NBEHpawlQBbu5FagM0YbiwvWC8JqgqDGwfOOvK39Gt8rJYGJBb58V0rGXrJDH+rRfjttz+Aya5CtBlv/QCiupc4GqHoWTM4ejD8VGj2KfE+GCECDaVxzU2utnC26YhCqsiYujcYfMPTsOcvgVtg8uSoOd8UyfHs+qDOY4EhbKAWrls5pFNdQ8JI/ClWIxIPr8w1XxlBHh8DN+No2YlKkGSlY30TbQH+6PjWltIgsclHhZmUEqOA7ly55xP2A/xpGhVeoFIA9R+SUmmT92KOMNvEzXqzkxcqHf3OODqzBbikTvSi8FimaG2E/xBNlivKarY6cng5IXiAXos9QX7a/Zpmd973C3d7LTPmonEjuyODP+0s3loTnsn3Emq+JYHJ9c7p0kO66A2DP6UTapPuJdJ2fSjYQN6rVhu0TWa1o8AQcSZZtHnof5+J83g3OGA7pgD7ZBJWgBMZBrEVRJ4YtZHFXcaZfBJ4Mvd8SX2ddjGqlgCAKiJfXmiFGX8mtJ4qOjPRITlLkLmGY8AhlHeIKRSApJeY0cZa1Qel3YyU+IRZ0PH/EzyQYni27fvPXv/Fu/+X/5f/w+v9GT+z+5PTe7p05laGx6Zn5yeo6GYJWsXFHj4v3GgEIlTKvYDBkqn6STpgTFvL1UeI8KZUH6to3HO3ti5sLdkzUtG7pPnzwQA7dNn92P7MyssJ2D0VC9xiqHEsKePdBn1Dp+U9SYk0We+S/dM7pi3mWyjIP4f4DULn0v3rx8dsgpdKZfIzvy+uV5+yth2lMTQ+qvo2oGhVM8hFLxdGMOySTAHVUpf/o07tBfWElWqxBRoFqOsF2jjGjDDHJldWPW+5Oj3JLLArwgc8aX1UU7GDv1wuW51zLlvsitilpRdXl7uLrzUrGFL7l9zr/wxZcHZyRN6qUrEvNy7oednbaaoaCIImHaEd+5MtrB4LXXXnn/nXf/6T/5g3vv31WF/fKLd+LzPuzcuLrqP1Zzds42amklDIjeMjQ0XWg2/OH4ZDKe8OhokFSYFsfO5gNwP2FpBbjI0ISK+y8oUeICka1BbySXNHurl+nhm5AEz8KWEhkKr3SEh4QDGsba/JyWEDtc6c3ZhYXF2sLRYryH8XaO8g3qGTs8vMKRq6uYLRCmV67PHJ9/6vU3VD7bCHR2bvm1N+TRvFtvXudimLDJrb6hXB610fT6Gj3X/EI7NN4I0Mo47SwzdtxtbeAFNrvAOFpdoVM7wbX3tg4M9q2338VR+DlYcl1daPiPLR8rsF7f3NmtN5uf+eyrpSweAAbN5rRhtLsPPL/+9An16+btG8y4udnGhx8d2P4TYl5kQGWxs+g4Q6oBwAJ8whwitLE8kfvwwf6Bncp4uvlMxo76I2oVBjTu0ana2NLE2NzE+fzUyMxk2A6Lz7PQAVs5akukC4ypXiNQ5M2J5FgDHrNweGAvLq+O9fsYV61BlqPc9DH65kr3Qkfoby3dXgRDtEVoG9mAXJVhRn/EW4uY8Wl5fUisxPIiS6LSEFdh8C7TrgcyhPXl3SHvkHNAAlcvQv2wA1KFIr3JMxDNx9+rEfm7CIOQSiioGnnhox/PInTm+bhReXoKEnFQl2TzANoD/ehYGWdNxEWDOgHS9JQlSyCp9APaxXBrYrTHQw6+46O2KNKyxhSwfOqzURlC9c+yAb7O+rbRAZ8h26wKKfvRCB1AgW1GJJcotP2gTukT0ehxLPH2xMVK9nxAYe2KAzWsAcPAisRna2Onk5PHrf2N435pfKHPvx1cNbelZSikbT8/nqjrP6fLpz17CUuCGN21WntATDfTFrbRsIez1oQ6pGXvxGgPhIEwsVQuPEJY/FgXCcDG2rIQQB/wFykUazVfg1lZFMCLGBPPC1rgGFEFUkERfmaVrIcU+AQ9yKqBrm4hrXpjipONwVOoPQwUVIALM9UqAuPhEgwr5xsrTSWwp7FhrSkHjYmhPptjlP8jPW9gQm10wqPmGjNyTCTJ//v/k7/+e3/w7Qdb9x/cnR6uL9amZjpqmA67DLCxUXaz+rmml1ZZ16BeivKhTTQX/0E5oEQ5pTmAYIT08DkmsLP13BJoX6saTGLA4/sPs2WhrVn6bHrXKJ7m7JNoav0YHACXiF0QIpicBB/PKX/gntC3eCOKv9lpBp3pqNSaSg1f3T6sX/js66/e/EptlJtUIsaRpVQZg2NoExWdhnD3ypAchssQzwLkZEVD2G1B8vDR4FAOuF5WMDKpnIgi4MjYy6OKyM61gUDGnCPDDjxC96GpzKL8EHYeyqv+8mP1BTlX5xGa13iyBbpITcJ3BW6Ku25cWJphAjeCXwRD/n/l1jx0dwvmXlm3wTJ2er0BM2anZl+98ylJQlhIq91anl199uHOowf3pVmLdXpAiR8lOVcYVta1JJm6jV+V1Npm1Caik0PNmcHC0qRkT1ks3DbEDW4X4F/MMgp6ABkIJdukTLqCVVlhk3Qy/sRwSbgZIJkdqOQ43dvZ4HQv3VzOtgZbFDINjrnmhs/t5WkXxwNuvbXLq+TX/u6eLpsffPDR/NzyvmzCdmdotP7zn9/tHo689MrnbfYIHcYmZg5a+gafTjMzZ0kb1S9iPsN8+9zB9djxEpF6x4M9xTmiTdvbLdpWe7fVbXUPdlp0rfWdjbRgGZskotOQnYoavfmss4+ORucX5o+ORnd31QVSFM/anX2Zhy+99NKl1aXpGW0Rx/bbB/fvP5mYsJPWYMKGRbwWUCPyKSSbaA46JyIQvOAgnpC2mMfeREngs6GaThCGMgdHJ6cb41PZfWikOcFonl2UdgvHJXPYjXZIJJBbRrnOSRsHLGsfFQyBRG4VnmJJPpEoERs4SZppw/ssD9iHcItp4j8ZY/keRI/cyeHKs6qjfFYX4rmhiCsqKqM7irlbq4NMyq9CPT49IW+JIMmvzBCDyr1R9vLWfIfDwuwYFus+f4BTofHIKX/DsByxfC6++QszSQZBOcpZgPRH3pBJhalWkiwCD5wxUdtfD+kqVloc2T5tpHs43Dsc6x1qXXrWU0jMNZ+ohihjbWbqfKHWnx4TR3FOclpa93HRpVXMmfpAPNmYrV8SYzIhHgbiPJ2i2GOG7H+kM0PJMpuE3eH4MXKXP/wPr5bqRO7h6p6KgAs3uND0UQV6gA8+XUwATU0MXnlp7aB3ttYd2u6ebbYG+4KwQ0y60ZnaCfxAsyotjFBuqt1HpkeHr966DsoRGwEEhcJ/onhu7TyLuJanL/xkurI2tMPRPLWDyUaNc6XB+lZ4eZphZXDBitjgcQkFwnGPBoPjMPRbJE0JGrhI7oGwFL2t0zub0eEBUk5O1bnIU+cKkHzxo6MCwHGDlwMJBG+LJu+Blk8rgeHDVmOox+k7PTEqwRyK6o92PH4wPVCYMJ5+S4fr09PXX3hp9X/6H/ytf/j7f/rP//hfNpduXbv58uLyNRWlGOlkY2xyQl/0OtZNNGY1AoYyRXwnybon8M1E6aLeK1+aJ/Bgf88n9GpMM3TmmKL3N58+enh3Y+NZe3fTdg+GLSBMXeAIIEBNpqBtVqsKh4SksoKA5Sv05o4MhSK92PiizCpjs1q+MyuHveXa2vJhR4C9M6pV5FDP7hkicCzLmGIT9qhM9AqUI04MP36RLA+ZG9p0lNUFS19ZThbEdPwZPa8cAXOGFdyrfnWjq3KvU9X/qws+OZUv5lSeme/VM5iSF6IqP2eaLihjCJEFScAZA1A4mqzPUbG6Aygs4TySDFyMH7JQrA6diNfUeHyjJRa6Hznb2aU+fOb1z33jS7/emJwVz5K6vTg7R1t/5+c/+973/uLtn/9cekJfa2XcJY/s5cnDbb4kj2ZUwyCeptden7l9Z+XOS2uzC9B8ylLxLeAl0XnCUWgZeXtEUGg19xa+lC8FZpFn0WpcCmyR8xfgKgbGoFmf6El3lZ6PklzE80xH3N8+XDaOUxkVSrzbB/uQXfc/QsVGNj/48D0h0Vp9+vGzuyeEwsTS0yebV66+tteiO1Np66Z03uGYaTBL2IWIbSDmN0AsAMuDDthcjhO9rmrC9kn/eH9rr7Ov2eGh9R6fnNbw47AbxRhyBvV8E4c6PD88sjX5852d/vZmt67Pab1hKsvLy8r69GN54aXb8/NX9L9Ql2Hb6K9/9XMuSfPBalWqlOeq8g6iy0Ur3jJjovKWHjnHo0eKW0cn05FqHB+aqw3NaGZfH51tTMzYZ07P/MM9lZUcWBy1acKR3IvJ1MUie2HkbKHHNZgcwCATdS8JtXhfTFtKYdR+VFNoNwgXEVQJobCjsmzlZDGT/BDBc3gMvyL2gqDlCbmTXlnWKr8UrPVbXsj1J/MVz3DwDBQE8DU34OnliJ4eDM39dD37pfo1bw+5BWnik4qLI5cFcy6OQhsS0/PGgkBhOH6Lh8cLiuitGOiFOIzsCBQMLcWItAH1Pb3eoNOVLTqyfXC2d3C810r5BIYtT0lrn9X5iUFDD7iTqTpfEM+jEhJGVsQVZhMBHN6dfNOwP5KUb74pEobNYYOSX0ZjUsaLmaQC8wpM3GN4BRrJSdSGBJ+R8UESM+EAiQDPKhR9EwTCYdPP1KzU/O3tPDyvzdnsY+7S8s3avG6Z8qgmR89mJ86mRvvjTJn+3kF7t9MlLJJReHS0n2imQA3NTMlgIWZvv/z6zWQ4Ssg4pEHJreLkl1F4mm10w28yGdAHxggjp8rIs0qZBLq2vqEbc8o6ZdFdHC7m0y9n+mZIU8lmCpIc0gmHtZc0Zzt52ku3ncpcvsfeoK32Xh6ErkiVFyjWAo6eRnSpNTzp756c7g2dtlkbukVbPGEtyRqtoz1iYmqkwfdQG+seDWrn+lMg1dO9pw+ZIx2oIB4wMT5j064JzUVVIsMiD83wcXykX75QMmWiEAOhn6Pob8CgldbxsQ47c4vzNmeWSLGzuXH3g/c3dWR7+tCqyr/wECsbOc2GpqUUORUEZJwWpSO6XXhf2B4o5aWgBLfdMpJNnIUGTDBpghJ5hk5XVtYEIIZ6e6e1ToLjNpke7pNVxuZVyG4Qwy+11mFaoJ/K0LB8z/ZWOFP0m1CxFYnYLyd9L6PKAHJU9FLOBUtRmVHmI3cVvlw8iv7wZ8Zflr5cX73oYxrMw8pVWfHghWfj25wB4T4G6UxQuaLLyFhXpOFEcY4il2jpgrsutqt2JGuhedFxSS01u19oIPL43sP/5D/+RzV2dfbFkxw0Zo+b/d3N7k7X9g9jdL1QFM+NpoMRUWEKaQ+QAAg8NsF3ftbSmovv4MVXVmcXeJI1s8bwokBjHkjKDZmpS+PWT9OQ+COLsxCK8uf7taBMUc5NInwpmI/5ocypJmdFGnVQxsg9hpZ9C54+enzz1jR65oORBPjk8SMObSJqb3dzZ2uz0ZxmjW2t76kqhAKt/d0r18bef/+jjZ2T5dVbS6s3FZ9JP7LJKE6yu7PJBLedJqVKbdboyOHKogKKsfbBeeugf/9sc6ZRa23vnnRPxnSxro1P1Waimhb/sDoJ+o3+abJKFucXDlqcLotTkyNLc0NagE/JAUyn6OG9/Q2R+KP+wd0P7VJ8cHbSEtU66u+Mj/aTdgqkye6LOq6ZiBA2V+Oo6JTXEPy2PB4c20BHKr21HT+tz6T0xm4bZ83hocbwiTZSQiJlD6D0M5G7SffUDV4Zm67jR93BrlSspIAlCWNIiU7aU4yfCcjHPKepRSFGWxgI7Bo6sg9PBnQhq1CghbQYQbVy3rqEuivrJ5gA4Qoal7MwxKXWMxzWNSGGiqFFKmipQnmMVwQDi8ETgy86II3L/q9Z9BzV+6ovtNTcgd6QQNFkcO7okLETDSnXhoAK/YROQwWMD1ANCSNfv8Xd7Fre/Ty+OiIyfQsBMliCsuglNQFCAeEferz1+oqU2u1YqvQyKFU7m15M/zJChTcuhSXw3CcEDkJD4KhcXmvaF/Zc1N6weddUxJxJuJTC5D5ThkJ+ivTOs8ogzTAdMRJMjUyzAmjJQ1jJ5hNiwbyzHO7kW2jMLTeXbzQWrkzMrtWn5sPVGbG9vfPjVtKC9k7bmqJLp85efLZvOIYJSEqKKNkeNcQ7hocPurtBAZsWkZPxlUZv59I67ukp4GtZx7BFfCSWX4aVQft/WQJrmyP9kyxIgXOEtxurCVpi2AAR5XQqP6FZR6PlRNdcfaQuel4I6Zz5JbjQPexcXrh8cJAQJsQIOun1KyiUyIcmNkO2PGid1l77zGuLy8ujzaWDo93t9oGFphhoGy95mG/B9vNvvPb6154Pfvr2483NZ7XJRmN6bnnl8mSdp7GVsJn9XDhKMjcrIlDK+SdPP1aOxs3Ype5qZSFccHZpbcmyS67gUmXkEVfrjx+193br01pIy8BEeUlptOpAZ7nhE0wEouhchUB88TcPJ4Kx2J7jSkyUEpOVVQSjgoDPSDTuRJR0+NLS8kmvPVuTtsvPK35zfDqsyltZIl4R3cXh8RaNTKVwyY6RejE1qrAhsjYvtUQs/bJ4ZAZscX2wP6hYCCUlVuEmoUAP9SUX5DI/w1LPyf2/fFTs2QRQmMAq70AYWCHmGJTBycy7PArtUYSNMpgb30jyplwe2o9Whv84AxBhEzCI5p/MZ0a9nFL4EVYEfeJC4Vx98ujxBz//cG/mYL621NmhmxMIbPYOV59llCDblKKkJuN8RD28lWJVJPpVMFrEJhwglt7Q9vr5o8b2/IIk3FmBS/VGTsogKlYdBRH79co4Q7i7+F3krFvc+HdLg8A8IswtdG4mLjZEUDRXc7CljqFON4kGOwV3O/KgRuc1GxO1mm7a6VGbnhE1gIpGNFV5770PpBc8fb4pQDVKaeudT89hLJPt3snDJ3ZgG1nfP+/95J6mky++9IoCwK3tg0cPnszM6KJ7aherXufAbliNqWsLC5ckLQ1OYP7Jcfe0taV19fni3OzYxOzQ0AwVJKRITIXDhPFag60d2WEzjfqCoJDKNUU7+zv7z548GB7pXL48t7fTa7XW77x4s9/d3Xj26PLlFYqFFN0xK5Ylx7uIomyxG+c/SMg3PpI8Ja7Mdh6w9SYF1XkyeqcSQabkrbH+BsPClg1wybZzh8dqJURgj7sH/c72yeGBEtGxmpbEneHxY9RnE4LhqaHzKSkzHC4nh/LY1dsBTsSBWBKMB37Kai+YW45g9sdHRbdQN78EuS84P09JsCC4Xn7yxT9YmTLYi5Ox6csBSwv6hRAoNXF+0qagLHavZ1p5Th4fXCjPgTVylvOcUMLFUWhK2+yQIdXSfxC675YimOQjBBRJWXhGUDF/uwyXD1f1fDpTeYsTiZayNEgDnBifOKVej48sNGWS1aYnD/d08zkZSF3Jrhr216gfc+Nywdrqm7qdUBQFXIqK8QYCHm0wJmrYoHTWO+1hFGwZAOTQiZ8+/CsiDi1hc/RljGQczRqivtCkQ9hYxHoEKRliGmENwyzASOdwA8+PzkvU8Hdr2z7YWj/Y2z8f+sAyxsQVT9G5WdSS7XGsoETaoIRRIFf/d2gxjhlQXh4pHeaG6OwYJUMNVEyF3wE4JdOTm1wlVjqCqBzGTavyNWUyDuwpYwpCBJ4WlgSU7apmORiDFbssuSFhncejjZnF3V69dTjYPOytXX2hvrLWG5vcPFDzcjydWOvYzMK0gHNzdtpqUkyF+8js6XqdMmqYE4PT5szC3btvDY649Pc7cgKltg+GFqZfWJhvyLiAEPwDFD5olRJDu7g03/jpT/63i7MzfPnf/e6/2O90v/DFb87PLB71pEvMYsZt24XwUVAg9QXWl263Oz+70O8eymJXaKKR0oTm7o2aTvYS3G1OZi6Dfv/J3fvtnT37UiRncOioIFUCjokSJ9if6uaPsT5IAWKwI2tnjDJGhmypxCHEiMAG61qT9UW8JISPi1P1ZX9K2ertHkyvzh/3W2MTwsAp/ww4z8fYDYag/3OM41QixEROroh/Q2qx4ZLQVyxg+mjW11ttixqvVCSXdKCoW0lGEJzs18dnGA+Wl3c7oYJgHXKKP8agrWplbBn9Rc1nQd3CoaFEkjtonwXVI5WiioXM/FQ8OqKPUU2yGWPRWoIzoQ432hwHb4NaIZNgf7A+Rco9qr0G6rIslaszD3T551qjjwjA3L5+bfJ4YnVyvj55afJI9HYglhnd8uy0PdbT6KJz1Nf1Xym4rQkjzLQTlkHKX4AdRLDyWpxzp20+a68/aa2szovZxHl4eqgmnG1wemgpU90fzjScZV1eWWHKMK/t4cbVLGrB7vcwQCvqQmmyjSiLWYqhCQ/Z9E02hHw/3jb60MbmI7IZedo2Lkn8SdQf++DD7G7a6QxaBy3PnF+au6wVy+TsQfesPr8wUp+tzYzcuLL2bL3FRpO73+51p+vnz58/7Z8ezdbmr12+jO2g0od3393ca125uiQEdXKkufQI3/rRKSfKUWd9d3LyqHu4MT7R3G21x6cmtb4gXgIGm3SP1i8tj3b2n3bFxo44h+CjFE1hpMHu5ob6Sl6Wtw6eydizOQ9exaYXuB3rHXNxwiPlLjVp4ZHQFBIhIfW9/IaK36VTc6eSUvrijzTOZCRkj9hSdBGhkZC4rk76GQxTA7qt3sFme/u5fXfOjnsjI4eTs5LyB5NTNq4cHpseHp06H9GXfGIg4Mnq4kvAD4w+SA1/SMuYJuFNEMsn9CVsgsQyLqN9QanqdNC6HBfiCnIXhCu/Bn+LzChXREXyf08MuUTOwVNnIusl2fh/EZS5My/MT9WRuwkT90Y9sDp5TtHbInaCIlhz4bl5pKfkBUZIoWM4AGKGH/YeMlAckec5XX3CUlNGlO5jKKWIMHIgokvDawX3I2T88NT01ITcWM0yBE2np85mG8MTNZuIp0wEA/Uut+DhDCDfDS8A8+FlGWiYtp+ZFBgTesYVqio350OogUQZjrkludZYeBhdXz0tg6vsGMMuMIngj7pw8ebhrpSWrmhXy/koUZlwmAKGpS8CIyzhGYVc2TxEr00FDhLVGfMKH6PVSjSxbBl2HAZkj3vznnjGoKFbTTLmAHUh6kWZY1aBUATYwoOcpTVQRfM/Kpx9A825FMZkC0FCk9LBoaEoSL9yu9IdSKaoTw2NTbb7/aetZ21b6U422AvawOzs70doh90GulFqsDqDQgNRsc0iw9EtzvYkydgaq5vx8WFt0LcrR1192V6boTYmB5fPrNvbXZldubK8evfx06HsoTx492c/1lz1K1/5NwjBmVlO0SnoY2rcjxJrvYGpQuXDnZJWK4JU8gPlQBMzug4alU2GWlqtbm0cd+mwvH/mCuo5QAUKFnYdyQKIHx8Fb0E2NAbJMCyizSQhA9+tnXoneUWyUaKlQewucbN4qkjk2XlX7i+xnWozW5Mc8gXNzC9wjVFmqiKeZBumfUhoeXJsWmajfOX4V9QWJb+m0K0waUz2iAXCBlOWQ6sDP32kYAvw0xbi6XIUFA6mVTgXhCiT9CnW4wKBdciWpnSwP54hOwAcp4xvuGyzUuRisPiCJKEUzImM8w63h9JgRZDc7b5EocvXtPoNkBk6aQDBh6FgMtESgZZ0uHBdQ9BOzkBLRZaEXBAQ/uDbO+3xA2TXK1WTFM6BHHkCSvUD1KQQo2hOSQJRfhzgUiYP9jU7Jl34XRJ1ZT9hG8VnE/WKKZJO+bGjQkFwDHAN3mXoy8nQRtDckpecw8IjOT7Hag1IWoosZIoqFm6wgXmKGc3giaX3O9zOo+w1wm9v1+7do4tLCoVfSAHWXu/ps13N4FbGV3Ry/6/+6T+7cev1F196fW9nXy4JSGud3uoiEPp17FWtqAyZdFzf2OJalDmMKuNIG7VV2xhugnIoyuK5z7d29BDvHB/yPUykAGOy1T5sTvXGGi6wtyJcRPAq0A7PR2P1WgzbUmet1VRMDEksZuRLjlXFswx1x4f1EuBbRTssqqMzVpWoo64dgHM2oTJUT1GZvhZ4WstayZjp1cJa7mNEai51k9rf2OKuGjCtWrutnWwTJ5alPnpifmisOWSv6gk5zLSOBvWAxGKhJYgVn0cUtPjwoRAK4ywyVnjjCJLmKOyprE1h/TkF9BefwbyLKy/QuxIqhVbhQbkyJFEdZMgn9zrjIdX56mQe/fHDP/5eHh4uHE7giHhwV+RBVPp4oaLgYwS5FbzwXBqs3/KmnMXNMeeI5Fzst4+PyILU4IbNYILUUhnJszPpE7Es/DeBNKbHBTryZVL//XpNh4bdwYlNzbNlZ19zuez2Bl9FFDK6X4w5VpRXU7sUtzGGSI5SkerTOoouSuII74LmkQYuRcxuFykrUiB8L/MJx/CtqLFeYPThggWq0PZ8yI5vLDZANSe2PiBlOUdHVAjiTx4a00ayB48vLwslnbOPwSaYAN0jdGKP5cGaSJGftHYkN6ilA2j0tAGrLEMAqDKc6BXlQNFhqeF+udsVZbwmkKpaKqxMpGjdjEe+X16ybFtytn/YVei4fTh2PtfX9Lw/OJK4fDbWWF5aw5w8yLDRkyBBHmph7Z6OagQMPCoGQ1KBsa3sDXR8Nlmfa0wvUk4lsGu/tL3XySYdNmafmTntHssDG683ewft5uTMyODZuUa+2nuPDL394588vPvst7/1tyzD+Lj2e4uc5rby5Yis80hP6Z9aMrMVFmi1a0eL5PphTBo/DTgSRfWebTxbX3/Kra+HYVSCLFMGaMwVlDAxC1HOX5wKzBCPs5OZJvKv2Sg6hY/6PUaJsWhETsmC4psCsAFFZO9gQva9v9LFyq4mNfafrSV6mxt9W74GD4JDWXfQUS9Id+4cQgKplUnxZ29RqryOVBulqGYF8xaLk44P7tM4DOkHX7Ktk6BGxlzonavKxC7mg+Ff0Li1tejxkQQNPdE/Kw65kgkJD9CaZSIcnAzS0KeLDhSfE+WCdRP8DNJETFy8DmIWn1JuIRLgPuuLqg+EkQlcA5gy91RrdrKxMrM4tKUmv5uHKEDMxqrp38jhFsOR2C6rwflYpGXYG4cV140lCfaWjE1+qO31HWbTBIlbvB0qklJ8F7Gad+p3po0fnYOsipdXDWaOMA5nTMGDXPnLh/H48+S4bVy1mqxdxSnxCoprng5quqKAHGJnpXH8HfdPW7u93U1dS4afPVt/tr7lfa2OBai/+PoXZ2fmn68//9rXvjZWm/voo48kFup/sb+/1+4d1CZTwkhqsqW9cWlpqZnG9llqbzfyqOoWySAEiM8lvtePB7pjZ8uIIWl41hWBTU5JSJsgptTu2UyoxRbVNAqaHF25spjVDK6AXwr7JLvWTpWD9q/fvDm2cZCafy6M0PuhWLedxogrLV3ULx/pUDosaHw6UTYNSgW42FLgGw5FseOVjKc3ehb2ZFsU+sVh+6jbPu11pXRIxzoZF3ixp/b4aaw4+oRda6PriP9E/YCIXg2SVg24o4HB/ZCVyVefQcKPv8PlXz5cEA9iQbtfnC9kAabB5kLDfvL9l59WXVwuyFdfPvle/Vld4DP8OQ8pAg97/6ULWQdFIQsBxjrM26K+5T+5yxsxYoMPn8ayq3sNKkiHOVMPOa5SQ6FPF+Ilv0PkQo5jk9Nrl6/bBSAbNyVyjud6EqAcbq9roNCxv1xJQit4XGgVEhemHriV5eYSzFfwJEoYKvJI2cARUrbzIX+KqptdOtAx8wSrCY4Zg/EXZlfmAPDVY/1GUntgZTICp+mQvocH7cgEPCfLVVhGgXz6grEv/A+axGFrsm6hRWaq0f2VHhfflmx6yWyJ1uT9lAEKvWe5L2wMZIKZgJVlCpYEtDComL0FvJhTCIMh57y1KNeFKRcEogoxGc76x+eiYPvt41ZveH1v/6izsTB6eaiJ6sam5uZE4MBjd78FADrQikTYMI6gEkSOFm4FPQsZDo8d6Md5yLAwyeFJXfzPx7Xg3N46oJY+ePisddCxO6q0BQ4h+2QtrFxOf7XOIe+k8C7qTATi7Lj9/Ok7P/3Rr3xp8mxSXvjpxPTUvG40nKHnJ9p9CySke2Isq2FlLn392wbZJQ+7nZwc77f3t7bXdbLjAyx6NDQLQKojs87hVBYuI69IKw+LUohhWAbtAGNPZGsLwsjCxlbNLdYVjEs0qt3tb2+Pzl6dnZmc0574wf1NsS27/01NzTG7DvZF5flvhEAI+hgimJTxjE8e64RFZY5HwVolmkYuKXoRphH7S/MBeKGnEQcOO5K0Sf5A8T0bAcQI4xsmHi6c8JGj5EzsXZipnZfCG0ynYBtjBwuJCxGVhI1CkPiYUWEwEroq0MivphbdquAGdCSoCnoEPtClgMprEHQ2BYRrHEy5Cmhi3o8wCOz8oo/+0tIcu7nX3iXA0/T0KCoF6mdIcD1FN0wP26LRlphZcUuG8g0t5CIDc2xc5hT9ZmfndHtrf22qWVBdPMW+lyjQlaFYBBtwShP1l+XH5kJwYWKVMHDG4LPEIJDplVUfltgnzq3PrF3H58V3Jus2r5ko4o+vKzJPvVcEis7UA955zcb4bgBUYoE91uzXO22o9pcih2688OmP7j+7dDI0Ndlg/V+/clkJFB7AfrIhtsEdjg436ks1vujTltcbpxwpo2ZXAWmY3Nlpt7+nvbKfKC5S7uR6dLoKVA47e22tUdgxmoawzrV0tjfm2DhogFQQ3vPwlcJnEuLudo5nmotj/+D3vgepkgPmHfYFj0/vxIZcMlTYTHJTdQi1mxK1l4CUUj0/OxskhOJRh+KqhzmsiRMZUbIOWZtyhpRYxrbl6srG0FJdTsZPipMpGdbsulinMeSlIEVRCsiDlYW8PDVUk4cba/k0dCjOVXFx3pdyZLUKm/74xMUFuddhafPUYG2REDFtggRZ4186ggUXRP6Ls5+cSTQl4ZHytEj9GBjVr9i+YfnVMAvOhCPH9Del4FaFT3m5scA4J8wILiLkYLg/iSvuz0I7FCiqa4JFJ4Ps32lb2DjiU1LIkR7miX0KaI/8f4n77x/bsiw/8At/45q44d3zL7PSluuqam/YtOMkQhQlcAbSL/MH6TdBxEDQDyPpFwECBsRAIIccDTlUs5vtqrrLpak0z8cLH9ffuGH1+e7zMqt7hhDAESCdjLzv3HPP2WfvtZdfa68NIa4KK+M3DyvNdET66aKe5cXljZURiOwhPMNCnMCiAqsTTVG8fsACd9GYgj1iYBgJFAijMJcBXUABgFpDhM7VMcnFQi+56EURP0rtUUM0xo8DFAloeZ3wO4JA6BFXZiCTneYBpr64xFZP4D2J47E4KfIhSOgU3gmTYjNBV20ZCn+SL3mdL5kF3UrfijasP8GKzAdwI+nAPGK78CNtxWbVFHRn+Y+vZnrjm9P+pczz8Xx3YcuqNaWg19dWVvFc2h31mWFEH8QK7LvhyuyS8CAUJ9CTT68TGSZCiTbIidFQMUtI+tnL/U8/+UIqo5jT2WEHiDFpoYvnC6+PRvS3cwni2K+NS7qjMb/4wmbryecf372zs7tzX5WsR6vfuJh05dZYR1lfJrmWOQ7Ps3Nzth5nZ4qPMFH49xH6ydnxi1fPVbi3dIHLqKBzGBYolOl5g5zYfkHHwtHND0oAuxRODKJEoSnTZ6ShZFODzcQuCe6iG7+Ox5PTjrJd04/v7y7W1waj4aeffKn4qLKO9oZrtTh8UosjFcvZgGpV0ZznLxfro1pdyMbs2aojyrWVR24gkax4KmruFP/2UnuJ+q1svAXI2tH7qNfqiYRH4wvxl8UW4YY37Yl2FTJLiqyS1kU7KeMxOclIMOuy2oIZ+UxxrUKC5golhcMn4TF6cmRbuS9tR+OJpw6WeZG++RVpRDsMKaV0DzrVI/JW91KmCyLyCth+R+UkiTCyn0r4JLmA2CtY6kCkTQrpOiOa8QRcw18843m18c2JfFvms793fOfeKgYD9KF0xSuiOyTRSTs649aQRhwg6VZFts5jp0u+KKkuRFHAFE4A1SWT9lkfgjs25KE6Z6X/7cS8JuxpgQRAYu+RONMJf9fkHdNTS8KdbC5VmqBUFixdt1eWX77cM9z3339/YBlVr3dnZ8t6CVV7DDEkG4aVkg6pcSRzNZv26ckFQJOIRq+HPCALtbaVGLWyG5+qVvhQ5lfkM8kRo0siRUhYtZZyZDl0yTHRfJiYOTfZl7bFum41VzfWd+f+uz94aeZkXICmRaQckHYP9ClVc+5ySoCpMX/enL2y4q1kukqlEpk30zlMiVfHmeVPYDw7EUlSwqxoFQGy6ZYKS6adz9O5TAvupjwZJsPmjsfBZ1QlKkxhSDAwrD+tVUdOyqu8orJOvv7pDeaFvr4SP9ooqJhnDTjOG9qdLmokfk78DxQKTwziFg74Vftu+yvv/boPiQmlU0FlF9OHYHJBKY/EnCAX8oaYiujcsL0cZGI+eWHpm2dzXvhmfHFRIJAZ9TbbyQcvS+UcjMlW7apqTE33YoPA87BlOleYs+bBMC8VHi/91w4cjibGXZ4+pBvlE98uwNCX5OZ6dwy9dNzUJJQDOL5luyf0yIApcl3v2Xlh8kbi8LDWCsTj7kNmYdxxToNVQKpJPCbeKqkTUQCjHJQXazAqm5GFMbpXX006rXtepfQMLAarLDX+QNks0vjtVoBdZp/xOJNKYWGafjTe5CcagVemS0UkBdsy2ZiSHlaiC4QwaFTjLkRonPzvemDgUHI8uZVgeTq8Oji5OBvMXnNHz9fVDbHW0f3eH5K/vsZe8Va6JC0eu+K6UIkK5HW2AgWr1ubiwGwjKLKtO+h+/Iuf/8WPfmbjzYX51vaDeyuNFXpeX0KkorEL/PWnC8LrZplIMKcylKy2zTqU670Xn9PclKlQB0b9zuV6UzjMFvZ6rjZM4S6ysRbpwiTC+Whojqz4f/ny5dnZCa8PToFfxbFbSCbw+eowEb7ilS6EY5Z7YvcnX6aEOqK7uqU8k1Ie4RcJ4MSKTIwkXiWFe60xHxv+wv0Hu5aOQcVffPYzmxFRYKwJNam4ij+0rg86Q0Sub0ytrEypzQsB9IAC5C9SYSYlIk0jIHMHSWZhzRPQUzfnDfWpmI2Ef6PBNvWpM832croUg+Orw0DmWMkDSfaFhxckjCDIYAlO9+elppuPLvGe0qtaHRD4JktQOfYEYeKKwQYtg1DpXvkvgf5zmRVgrqWAQttW+kwGI9vrggcsICoxMH8ozxFffFYSOUMhwZH87wi+xiWCY2NDvK5BVTwPrLAHcPbq48M+fxVHvRBt+HIImbWdMkuYVFRQ2wupZKzMbbgQDhZQhNqRTFLcQ+wVZy+iixRToISQVZCWNRPiQdoaVMIVnHTW9lTjoZQEIi/Akp46Uu5hYdGgsi0V4ERLNHYFi7NBuYK5e3t7HLcrK+2Dg33DiplLDl/Ci5i5MmAlsgC+zuSIIEsPAwGkCnyEWcmep8t11ZKhsNqLeupK3hANjENbax6n+YCtYeohoAFoxIe4wCxLNNyD1FxqbVg2HACTVTV1EM1rNoHOFda8K41pGRLN1mzdznmNBXgl7HXFrREpGD8S/C6VKTgBLDjWTbRAokc5Ms9RXkpejpo/rOVr1p5NSqM+m3ARfbaenyVUhSVqMf+T1YZaYWiGnAMOQrJcr1hkuZib/YVYIzFDqy5UJ7kBMoYnoqX8ZCYM2i2s/IIMeYObvz6KphJcz7PlKOdYvVa1m9arBp2Wk/KuPI+BatrE6M+bTOLq5V7roZBEnoavCRSE/XKPRmYR9VPjy2FYOjkEHgI8kTOCBzKaAj0UEGGYT41h7oJg2RABpE0qN4TGuDugfzSdyimq2QLPEE+Gb+xQn9CIvATMvMJ+I+rizqeGrAOhyU+gW1xJMbe+IHaEO725dD0cAQonzkH/SpFiGqb5RSlpOq64RCKC5kZEYkXnSMVlLWiLrPE8iSCB3AY2PPGJpkKA+Pr0CL0ZEXZl1AK21HD+lBT6QECQxSqpEIAHAFx7wBEujOR8xtLJD2/mPTOR3EckY01nkiM9kcrLV1O9kVIWV6e9y8Me55wsPovqG9a6EzkoXx/oDRgLEmVzptR41IVUjwvChJKhN5yyyA3Ltwsw46sPj5uNqQ+/9XBzuz1WUupiYa29e2ft/tXo8uUXz8+7w/lm6wd/Y/2f/+EfPP+Xe/NKS+GCUQVu7Llnx6zB4PD0dPHpsxev9p7+xu/+7je//d17uxvP9o8mil+k2KZ0msy1fRzabTsa00PxqRvBM7/aotzA+XKp2XG4lSOgKBqDT7ptARfIVNArSAQcmc0gcCKamSuqkki3/H7ICZxSYvII9R6D7t5Omuvt16d7k9v+o8f33/vmO+98+IB/tL28eHT4yvCFx+X6y6qNysKYmLm6f2+tvVwXqHDdUgywHY4p3xOB7BLLiEIDVbr9EbTmT7MhlKPTVWckqwMhL041ZhKEi6mSRdqVLAP4pzL63O369hrsW5yXdyR8ptp9XlYApeJGloEBArUPPFAF1jq+6AdFkulqD780W9FOGCHsLhn/AUVBojgAOQzisbVGye9UH6xOon4Kh8RMn54bX0lVV07GIlRTmawAJADfogCS85yi9FDUlAWTsRQQBq4Kl8KuCl3RVOYX5b1fnB0NLs9ZfYs20xH2QideEImLKJzjpdNXQtZgSPxq2RB03gGGKAq0QhOZy19+4gZGQ6+EL+Qk/7MpdhiZzraXFJ9NHQYsAJuwLMK0yYYwgXG4WJp5Owf5VbuoNwmPK6FcTUM/c2TB1p27Oywn3VLuy85XZBgRyJmva47SiaonZahlsHC1qXB2u7WwZGdcXobiN7ZUwxpp5XipL8YWa5LKG9yIXkjOUzAzLpPlK5Wldufu2+qGMANVjcmA/QV3w6MUBL1FyuoJ1zkwaBvo9lbsSeHVLD6r1Ht0QaO3rjAsKngWj/WlRS1zNOirSwavvW/wqhlZG6U4j+xZdhV/IZej+a3UbvjC+DKhkNLwYhTHk6PfmdsM2En6nd7jGmHT+ertpc/JDXnza3WPL+HI7vFs1OpyW3m8/MR5qtWvHw97C/VqNs1UT/kMg00LrpGozqAxVC4Xyq0REY5o/56NV73cns4WdQ3pg0rIvrzNN27QsmLQL56C3RBTULRIJb0wbTJbsMUYMUadVN/UmtbXOCDo0lzqXo18ARzfQtnSqoxQyBDHyuvfWFjVKHTWGMyn8emLzgZzEUTaLPIbYcRuAfDI1IjYmN5RsPIKt5X2AgqzW8RnBFVkA6inRASJEPdCya4uwMig8mAKmceLBV5BP8Mzr15/PS0rnxpjiCl4PL8Y/hIGaqbsDic6E+84xmYvcGlp3F1ygWJJlSNMpTrCdCKWM778iP7yDz33zToMvdIsoMnbyODnxFRPhhed8S2X4PnVdP9SyUTlNVt89Kc9oeDpFJiZnuG1r9WapakMhEFAwTXg8I/IVplAlySWWJ/q2zeCiBenzdbEkipJz5L9rkcL487pzWj23u695v2G3O3u/M3v/Y3f+NlnHz3fP5I6rq5cykvLL56/7Z7tb2+uvP/+w3/1L/7bk+7h3t7zv/n3/pPVlaXT/pjcwIWViVHA0HppOwJdqtCYpMFzGck4F6XV7FWUAgiGDxY6nAkoR4bgB6AxHeWncoJmswoGfuFS9DzPOPcEeLkbLkc95hbJzi2yhCU6hqC5X+YOaEvra6stGgVsbeI+lHNJhXZZYKKLe8f3gHdNiPjrW8s2z1kpa+trbENzX28sWxijVBz9YDQ5Pzw6gDYKBF+O+5x08ImiUHSe8Eqc8fTUMGGdtrgbcEV88QL2nX0+oKiwH1g1BmLctJLwedp2fQqTpWFwbCrS0Fxqq4GkBL6UxTrrVTUkywXUIbfoT1aDtKWEJWER/I7nh8MnyS9TUwqeSruTil3mGRWi9dQrYZ2fqy8s/fnitmeJqQJ0Kpo3LdePKR+hFOlfoKpdOhB0QWdhLnlT3OTR5rJsn0HDH9g9U9DuStnKuDRMVeYwNOcUBOPyu7WcttXr9cny9FSDElq47ZXfL3wPAyknnszDIbJZgas5WwcEdhI659UfF/ux0UysTHhioxCARWAkssqTSerOO/MZiWgjrbIk6+ysYxEg2StWt/fylWSGtZXWH/7Bvxn1BxAJjyB4fuV738l7eTTQBYyEOUxOUhUfYbzGkL056ZxYLGxX0tXVZfPi3EQPej35/jNLy7LIJAlnvHGD6fGspWIIjxwATMwuertNXKzQaqy/3j8gS+lo+Jc75U/P1GdumzaGueYYvG1MGyLnHxF9o+RMXabK9XljfsUEJAITOQjHIxDCkAhlARayN8FJkTOVXDBVTp0s5SOweHhoEJdwQGiNR+jCUtdSHoiyYpkMQY/zaSuTEtZcmF4UCINxyZFuFoJ0XjiU66gxD/jp68/MaxhmeSpSo/yW33OABBBjRKSARozCu6orAX1EXcbnUvk1IjyPBavzryPvy6/lMUp46YS3IHof8NxQMGsx7TRjEgrSFPlE1HAoWx8LXeKRi5Ca5kZjLnNiJTwVM1jOLJUTk4L7M1ZW8xywbCKxSvsEFLerbicHNkKq+O6pKDFBy0HYmGy3m6ezs36J2mM5EfaIH86SqeHlHjV8pGZbEvZeJBTvcdRSSK8lFOFSiUzYI7SOMNzJ6R59uPTeHTRLffAgmPjVGzziPEtenVHzQ1DBCUrv1fk10MwuiouLrxtU0CAtXV+vLLXPRxP1YRS2MA49TDhFKZvzieBxGEEYSmSTDhsC2R5w6F44MPd7QQOLfyXYIRxoA2wz1vezEpQYuO6OFGGyj3v/8rZ2I2Z3W6Ofr+/uuAsm8LcYM/joGPCPbV7Q73Pt23FVOhxH3KKsawu0R0qhdXCy0UjtNUPimGK99fiLLrnh59aPji7/7A8+uerO347mt5a3lRS8WJ46EV0endoBcaxYhMSoxuJYefXaIvvj0y8++o//o//kG9989/Mnz/7iL//0yxfP/qP/xX/aXtky0IG9ay9v6w25orXzwdCKLFtaPPvyiS3MldHhv5OiAxzhzsXDpvNB6HKAfxAgR+aiOvwS1A/a+sm0RQen24NriSgk6akoW3mkqC/E2YzpU0dLjvb+0QlMTvaPbaOazXfevX87NRiNDi8vOrWauT6/nPR1QNidxqVNRf1tryZj4uYcP5oVclN6ivtfruvJ6cGzZ1++98G7gibNtXqvf7LYWrzz8P7xwZEEAdmEJyfHirOaavgVnGXhxncTypT3TqqpBoWCOGJgWcGvlC446511el1gIl0lNb94ftobvSKpaEiixalcPcpS06YKZiWXgnJPXs7XucIw7sWVtU2uYA6E1bZC3Ys4Zbu1PDdblwS31N55vXdo66XVjTtK+/bkutFi5xYbtebgdhyai06ZDQppcHpCnshpKyxSIepomQEKCouFNNOYq+kkbRQdf/7Zk2997z6VXdA/8ePr666afdx0qdF1QXmq1+Wa41pzAj90WMSyurZ+enoqj05b4UOU5dSSz4wzya3VC/+xID9V+KL3hhnQAhVxazTt52H6llaaveFreXBq19av7QBl8ffsYCygoIjXdOestzUet9or9i7gDFZZCuCNr9M5tZLmcO9Vs95ijsMFgofuo4xNv9cj3c1TYjsxiIroDiu98IrhxXmzZTtmAY7wAfbZksUQjTpMkbWkd+ED5pp1PUyZbWzQsCD2oh26bqZPTwa/9bt/D2n+yZ/+eC4OVsw9kgLMo45i4fSoSKIYTdgcJ2kckjpjKs7sK2pmYrXesCmIaIzcCzFeTBbLRQ98zPhX9H57N5B1US18km5RdLlu03BU+niUcoUdGt1BexwyCRG7YtYrqWM+UBf+WEwEbDE0CZUzUYU+35BjWqgO76sMLF+dO77+KdD0XVPlugZypbQUD5XruVSwoEIFeqevsb39WkwTJA1ioX2gKn5I0CvsIYY3ZpS9YFRntCFNwt3Fo89WkkuTRSEp6BLvpeEH6Fw2NDbpsNbanIOgjS0mcyNs9jJFklIAF9yjoXlZVr9KxpTyS0ZEnSmQZSBlEMileDzyjyOSAzErzsStbCShdaJKU3h9wKHYCSXLr5RsozOjBapgG7lIuGo/KGCcKbdw0e3Z+EDDgaQX+tXPGqXGxRtoxsqMVL/qrN8LPuHmuEwQqbiaYAtdMksqLiyCLMSmX97/9OAlzQAzpAZKjwwLFnlOMCnrpiLF49rPA2Gp8alWCJtcx6LmxNGDLVAYy+yQbU6S/xYwzc7Z7X5yOytXVSm3rOc1NbPy7hZ5PxeV2uCMwOGkrC8kawA3cRi7c+KgbCU+Eny+GnLAEp7yzdhuXIOjYMFsZ25WWldssq2t9V/53uNXXwz+8o8/6RwfbT24c//hQ8Wv1dxLCQZGCtcZYqElQPG56bP9V//dv/xvfvBrv2lGD47Oup0jRZUOD463d+4vr2zJHzg7GjXUkbmcWDpi2GPCqt9TcHupCaPmL4hiLQVv4/HRga9n581E+Kf86oYcQREEF+LC+3IBoeTchNFtijFg5An8RKFCzZRt2VcQb2oy3e9fd+oXHEhqK66t36wqvlZbt9hybmG8uCjpK9NkRW2C2tErojxlclhrShAQKLHhCBJ773RpFcNRh+yRdyOJgU9TrMWVG9Yv+d/rnE96+K+Dyl+sLHY4BBGH4AWlxtmNRZ3kWpwbMDG7Vg4W6u+pI8tAsoL/wh5St7Wp+cYgm60JqEuIVrnOIlfrp/oWgfBbel2nd2KXAAij7uXB6Qs4wFaUKZPVPKk5GicE5jk31376vHd6NNV9NFaPzhSOr25Oj0/IPdmmren5FptNElSCTDGUkMkkqnjSfsJKA+fYDobCawAq4R/xOkydD63bsakpgR+mEOwOcXkpYlXZBmMQpkK20A0/yGMVWMxSpJE2mHYxRWIDynuSj2PjKAcYYTBBMy3i126yzrmhai0Grszu6uH+AcWMNh3uosdF2y4di7MlzuQbEaloNFpmEUKexXptY219eWnJRuDwBUnTcSlOxPxpKMbkI1ISEi2GiRsnlaj0Wa0YpCkWu4AEFK9V0cpo3JinqMvZcFw/RZOigAZrs+dRILO5uaP4xfMX+6/3zywmZurc1qX92SNK4E/OOf8gLstwjRNKvEJWgFybxL6S4kULIF00JtskjC0MLIrs1AKE5LnRRZQAzSz1sWjMdny4XnbEvrCUuNTEotriHOiGParJlDFIgAELMUpwNbwiqdLnokD4h5BzilEVgszVcKfyez5zlK/Vafnxr9wDL8rxFUt987U8pJncmXfmbW9aSwfy1qx7IJ0q28rgvr7TiWkUmoOCWVGIyy3WGM+qkpjdGKvs3BTzgP8KFpRIXaIREvFKRn8iPw6WxMgczS6cTy+MZgRO5sZynxZq1iL0pflEUdAD9gQFiyydodAOQKKk71EXdBKn9alr/g3odMxhTNh7VBH4XRg7BYBhI2qSSTM8RQJ0n70X0Z8XeSqnIdEMUwtYsROqjhwaGV6BkbuDjVXng2mXyaXBM8KCK0hmjjSAB1DWI/39FLRIy7xwZ9Y5hQf5n1GEGLiBdWlnYyWGbpKurLdM2RnKl7SLN0ssipoN6bw6ck/Yxj4lxYgE7ag46bAD6isjEGGAM9N0hccV4rTPCc/iYDI1lvpKBXav1Pbsm8ypZIPwqFbo0+jQORBmj5wS+YLkRszgHXZOVxRD657KheF+EjRizqlo4Hf7xl1PDbIs6mqEbr/9Kw9/59fv//7v/PbU5Vx9qXF8fXo06j18tHXy8ye4IP9OqWESdkDl3L5/TwxAUcD3P3h7baP70cef//BP/2jn7mM+fvqCjYJ2dh+cnJwkY1wptCtJ7AcygZP9QYC4A2PNDIaFZWpMmcFnHjLvYAIq5TPXTa8bfYUPgU+U1EKtrpR/43dwY9QQKBCfoASvS2O1yR0Q3cydnVI6BGtsWDz1f/o//u/+4f/yt//23/xuq7E+uXgtcV/V3Vab369oitXMawxyZOEEVpwtvogZYL+8Gi6vyIBX5x67u2k1GJom3vZjhoHNX6CkpoLwb+bUpIpvCNFlWrN3dHighfLyyaSqsImjETEm5Jqzimnk01Ot6bllewPcSHCeVR5G9a9saMTPTOYxa2yvDuYLDat0OS0vFVywqYB9yeTZX/bsf9HB8Xiu2dYc7frV617/F//F/81muza1GvUm3e6gc3xMkL3z4PFpZx52DMtuMhadWCYIeBwWUb0L9AP3AD9RbYA1PifsP8waclrLRA1VXSnP4DJRycJmlJxJZujV7WgUszozGrljTkMx2EjhHpGDZc7jESFLmnXKcj1Cyx4wysEWHL4tO+Im5WPa2jlbaw4oI/cfPRBEHih22aUXZpbMRCjXO7ynom9qKvZe9nRFEeiA8WR2OEZ4RrN5lbXt5jbbnEL+6NXxe0RExpSsmIrZAeGEt3Et3EduxoV8d/tOBdFKQpT4w7ViExlZnKnTeGklvAgwBdrf/sYWTvDFk5cHh2eyH9B6ctVrIsBCKYnfxvfErrTvb2q9E7IlHUxGGP64iNtGVCWBnb1MyETke3Ui675FoBe+GkZl9OlCiY27Hncz6Qn69DXKdRZ+JOxjKDLmA3kNZGqTLAeCoax8DbF5vBzFWipsq7peXf3lTzlDk29YZ3k6j1fHX7n5zWm5IYStnz7d9tfvQfx5cblahle4QOi+rIwzAfSF7L2XUEPWoMTLQlm1N/wcUDWuprkaUAXRn6JUrM10D9rlRcbIe4CzCFHxUAXe+OWcLeP4BiaKnuLXkpXjWIttcT0mrsby0PHKsB7oJU5e5FQJY+lnruUvY3FCqkXK517TYIhx+VVqtc3do90QcoFxjgwz2gLQ5azgTtVGdMzReJIbiqwyNUHOoGdJ99BKiMnvlbwMswxbzv/UEaGQCJpwm+mZ5fa6RvTF3RrJDuHFlukedzWYsL2Q+2VqtahBRQfA38kjtXAIkgg5AyhC1FpDkMzIgiqZuNIfmQg2lyI5Yy1gbaPJdd+KyMl0d3TRn8zDvpTtmFVG+fr07FBWLnd7MmF9L+oqskeEnU4n/lybDegmFqKGxbA7GDaODl8qhnh7q9R0NgjjvgZ+uhjZZx/eqWn7Cx9PRjdTranG8mIBfnfhetCcvnjwcP1HP/+5lmgx6FFYvXtyzKJ5+Oi+QfW73Z3dJftXid1JpX362UdPfvHx7t1Hv/f7f6dzPLW6tJo9banuo5Pu8Z6UevG922uRf4p1AiSAGfj/EoGhTCb7qzn1b+amQCk2b04wY6wiYQ3ape7M0rQSRQyPqW6O1JJaycCiAiRz6+a6f4OhT3MeyKFfVqfqdu70tDff7U3P9u1qaScwO59xOWk/fTFPQTppaRig3W/nxuc3zWZjMOgTGDu7u0kCXJyzM5KwQbfbBcjoClV+ZqMODbAVk6spXuqo8dZxz94qiwS92Mtx/+Aj07xqUaukoPNKJ0WGz0yKgTWMtxaoLs411zBqXG00vBmKGtq/Fs3O240lNZDEdBBY7UZduHkRpaZRbUq+79hJYLVdHw06NMTawsreXqdV/6eDjmWpFzSJ4/Mx8futd9/7B/+z/2gwtLMgvHipOqb0U8xZ8RFbyFvCqkWTJKssAPd/ItPmDsML5aVWivrH6koknCKvAcvn08Do4G8oMZ6YWSEuxlPot7ruJGog+kJ7fC8hr8gqmeINi2852cRhKNBGD+zmkvQRJpZ3dzU7JgJZBXN1ddgfmab52XCtSXL03YqfVfQL1hpP+/CVMZEexV/Pf2Z6zAu274/CK1k8NIh4iv7qPpMeFRl6SliOc0TjyaCEdAp/MFs5LuxvdxNlc86i+RB//EQCw5lgggam8R9KfYh0Uyt7vnb/wWN1Eg4OT+2oSrAbFzYY0V6ULdiMSYURmPaccMlGOkWw6A9RGSINS8SCI+0zFfynuK13yl5jD4YiMkVageeEkeAomoneFwBg6XY14ly0Uo2cIiwx0nDV8D38JJwPyUVOpBlTF/lUNVq+Vj/ksxJvv/xezoIbb0i0mMJpINdy81+h6uqpN83iyH4N4MpR/ZbPZIhXFKxvoUAOCZ/0aFgCmsSVT6FzY0DedskgT/giuG1vRnPnNzOjC7H92ydPnvE2WVgDObTKv6wF5nGq/8lzYsRbxBd1A/eI0FAyUm10+zmoDYYAJiJaVuLcKGelFC9GF45AMBXcCHs2EaX/AV3mr0xnhpy+G2N+jNkdQR63p6ARnDfL0C7DpF5hLhEzb0AaRC8w90BmBXPTRLnZNw/kG3u6DMfXIE/4Sw4/xiJ0EgxyHcJoLGXaXj7bA8Y0hQjyGaQyO1zuBMRyo1VbJibgrf+RK2tVUZkUoXMEIR1FWZFspgUYhWPTzaE9wCKMFMcoCr755LBQLsdi9+E5rZyNRV9SqiCqBqsoLik76+F480r4hKgiClVvCr8ZSyrg1YXjCS8qWG7TJysbB6dLq4uohkc8akwisvTBrH/MmKgRGOatHdKfWS6p817UHfUVTHz78Xq7qeAZW8hyneiV62ubHMatemNnZ0uSm37oy93tjZcv9rO2aandPXnxR/+vf/Y3/tbftVmJ3G4ctnvdu7nqzlwP5DXgCshEnkq8gRQBgM9slMM0Zh7Me2buq88AOcyFvfoG5c1Z6DsHyRNEyEyUVSiI0jmT2Zo/EDJ3+J+1VLd9fRvwEAkJ0XsglPr5o6vL3mQ0WVhM9Y+BZdjmO0H1LPgVwWU6IHjVVeUuiHKBLS7WbrYMCoCznNO6P3RZHP4mEffjapVaUjQsPrJohpm0zI7s6qFh8NIkMTnCzGEmplWFmFL0cMYuTVCnPjO3NLa2bmgZxzV4UrEp29O3tpxabtu/dkFa2iqXlC0Hr8bD+dklHEKOohhlxwYY4+k+5WZ41Tk65IdcX55++eLAflCdg5vz3cGdlZ3Xl5dLC1PLizP9oz2dv7e29qDZGkqhOTzsn3WvrhsYxIvDvQ79tBjp1JBYVXi3meJRStKr3qO0KVX0AIT/pUJmmOGk4Hlch9P1WRZ54YxJ2sIXDVhT4fA+Q452pp5eUECluVBTqj/7t8sECgWwOEO50o3N9xXPypJdoBjpa2uNg/2XFyIFXKuL9lpUvmSSltF94bphHRE0uoobRmsBb+xNPD13QSx9xEjCKSIz3Ekr1qPMSzkylsjTcAz6JxwjhcgQdgpOTpYydsViK5EGJxE+hTRrNhYoQwZOalsCb+HLbL0pAXSVFfiLz58cHXfmiAqkls3zrACHE/GhBAXiJub6D6oFSASaMArmT+zhMMWPA8cE5OE+UmGVJ1e/1KdIR5lSPqB7WRAn7pSFY0Sjl9l3QF10P4k71oqrkYiz8Ivf04SImaMY+AhBjd2ATYp/35y/YabFxiqgCa39u450wRFB9ebZcpc2K60zfCg8NEzWoN3sPFdIDELtqyuFlWLFoadwucJHAx8qQDHKFclHl+r1Rg2xUocWP7qaG44n3cFg/3j85OXJl8+PD08Gqh0jKAsPuMJB2YoA3rX6/JQNxdda9Q0qdLu+3JhtZlU2rkcLw+fNN3E/XW/QGCgeNJEr2nk6Y+LT1yJCClqYkaKZlR8C/LArR0nPM0kh9Qw2XjtH3LX5uaS7gaGxZdLNpDq4AUj4FXoApbiE8SqxhfIiTZecjoLHN4XXlBdr080QNc96X5Z6FL5F0zPrdLeEoqekzFVkRivBzgh7rs94UwPdMLg43Y0wtJOOSnlADz3CJCkZuoASQ7H8mkm5KEkqWRoD9P4rdwQlg5QcKlm+hTugPL4HDp8JPYnMiittnu4s1iuEi7KE4UmpmdnFoktKuKDCGwgHlS080OlEkdnTkyOexnsPtl9fv7LoyDzyFLGsZqCAGgHxX+iEvDUWTzaBZg97VNQbpO/trjy6v3V4umfBN1LW9uB8MB7dPrizs/f8BdVNV9c3tp8e7otR1Vezs9/F+HT/Vf//8l9++vu//7d+93d/FyiWmtMrS3O9Yx5F9QTiBIQiwBrHBJiXwySWsVf4HDj8D45IavE8T/CoRarQfYRrEvcyZNoHTsS9g365QUwD36kFl8KIVpuii7nzi17f2uT5+w92pKGfnh5P3XQZTtN1bIuWToDFm+Qt2TCIxE7OgfWcOhsWpyppXw67JcdDCwZoAKoQkTlcY7PyDE0xmDPgMEx5fOGGDg2bWt7hcAKlG9kBScsKNy/uA7fAOuvlqOMXN1LmMBdJY40pBU4lHA5vXu0d9066s7IT5xZXl9ob7TZvmTzMO/d2F5sSiGZsvSuAPLmamR8zi1cbayv1uevGwvVoSDm3Au9279Uh3Qq1orGZMLOxndtOXj75rz/+ia6qk7YkZ4EjKnWocEr8dNZgmojQbBjgAijjjvFTd6RcplgG2zzagkw3Hswlju2vZvDNCV+9AhiBPNXBIexnTWKmHKHxR0QTDJXh9dYYsauQESiCtKmnlpItmcUQMAVFNZC4SslI+SJLlzcHJ52h4tTUiVl1XM86JD5PKfIxQ1AoLyyHzhBNWkEdVlDEDzc3L4k9OofcCHNCZaNiM9ugv4lS3DJHhFU5yQdFA11Lw3PRPd6FSUTGCTOWIIqI40KT7h7PnZ6abjYlvUqTq2sbNPPPv3zS644i6rCtuIfwI08zLUvzjClhYdErpbNK1Xe9Nk949yV/FX5Jwk2y9I7w4g7BQabtdhHYFGQPUykOZZxCOe4LJRGKqWTy8C41COcmnF56Fr4RMZjU2WKslfdTO0JjZsNReGz+zeFVb6CQiSssCWfLbW+O8kgl24q00u/8gnEWks5prCh/1c/l2aKLltti3mHpGsDAIy+T3hO25VqMzYqfllpoBXLBH91CZhJ1IMRgMvf69NZC1GcvT1687h2eTnWHU0MhE+t+hvFlV4e+T4+mZvvJLTk8PGnXp9aX53c2lu5stDdXFtsNSoaVRyPuJkZYMdssSooPGUPmEqqAoSkSGTwqcx3MYGZgVEYadlPui1PA+3K9kk95uTEaS3Uz8zm90lBR+lhFUcyxnAilUukP08FZWCikIH7hZtjoAR+312p1wUWkwq8g54rJqFSeKzh3eCLJnJQHCAKSHp5Wp5JISOpEJeTwSu+TQX5yCpVBMgZX+uqZ4liILlnspxhPbqHWpR3WYfQ+/0VOJg3SZxCnyNo0qcA5d1apPcWgkbWqIKc9u/lmo6pcXRwfH56dHmffyNtFAXivprRpH1joIr6CW9RIC1UuxgrkP/nFgb0J2hut030sIS4EQhP9Wq2W8LCsk3k75ViIyg4eyqzEoKiwtxctrGZmqvH+Ow9/8vN92owABwFXW1qhUwrKcEh++umn29u733z/w8HO9keffrS7uytf3g53yEuK/B/+wT8bDg5/67d+azg8ajZnJVmQgSF3hRXwq6w9Mb2ZFp/hThH2sbqrKz511Fgwt8wHIvc9TK9MS34IfsAAP6SBGG6FBlyCKVkHySCYEPweTbG88bn8i5X2UqYre4JGgeBNUJ5ekUeBE5NHlwG7iC7zr53oT86VQJWTncT0fm+k63bhm7nVmrrW1xLcsb/kWF4Mpc5T/2J/5EgpdwpPzrj8km8t6Q2DL4uQoIM+0rIurudj8dnPaM42mGZPsufFZOonH/309euD/mmfOixYttJa2l5dV2JfKby3v9HfvX8Pazo9UzxLmjotcmb7nd17jzZWWwv1+Znmyo6MEMVrT0/PxgN+yAXJDzfjUXt+bgnGjXoWnG2trCijeNXt3sxIuBYhS4q59QZK6Ddo5GgrakFEmDNahvVlvtFEXZWODjOw/vZaPKiODNJkpAgWraGaULOZCTV3xZkfTx1A5QsV1dI0BEdU2Z8eU46SmX0X6B2EFT9bsHk6S/SsDraSXV64ikNnndHUzLFygti1bUIjA0SHwlmjieIHwQJNZc7yYt4LuYgvnj/Hs7ca6/Z45BLM5sgQHlLEQ5j92NyWiF8oNJZ+ebWnHeEV6UjU1+KHSOIfIpAUAsFZuQ3bQ8q5J4dkw1zKRgkccIbZR48e4y6WJtrdGwAgit8InDjD2JgJrqoPDQWITNk0CU6IpEmyQX5GI+cGJhXWjVRIeB2LlpuKuWVo0d5jhxJX5WCh+xIeE3ibkGjZsA+zgmjFwA2ENOMzsfnww7wibKiiwzcOPePNr9UBBsbj8406GfosOnnFfAuEAqho+Gkm5zmiOHhfacTXgAVj9GTYZ2H3ep0bKRu4n25xvKGTTJ974wak9kgVmheXmrKjl7L505bEWBpxorRv//bnX/aOO1OHx5Oj06n+0CJ5McUsg5+eWWSlRm46+ENp42VHPps5jybuvOwhqf7lYKMlt9YGa81FhZGm6jVLSoPtFCaMEypwehU4R7kCqgyk2ByJZQYmsQL1kzwKiqSOpNhv/AKexRQKN/ZYcgIzJZ6IAWnk4czQmyYUlIprIDwnzALezM2tL6moP80YSgp63ONEbaBKFhVgRtjljR6z5h9o6CO4iI4XEQeRIqFuplR2qUy5MD+iNjwvOhc9C4nC+GqOcy6fX3pvQGduY/AVhMpAtGOYeVdx4VZknOkRts4yueCGviNQUwP5x1lkAXQk0CIXAhkqwtA7PT4+2l/dXFESmqmEm2gnstCyx2zIYm+QWH3W6XkvnqCK5toSQWH2gvAyBsECrDgSTTIj0UYIWATUmFbFyZhupDgHycUI+DC+9cHDf/mvf4zX89zr271798Q6//zP/nh7Z1M7qP3Fs2e//ms/ePb8CUsPHCw0tXCIDxCh/uhP//Xd7Xa2aLkYrLQssGza6Wo0HsbEL7NVZlA/36BE8cMWNIbxAUZQPIhhukx1JrzgNASBTHkos055jGSFbKE8t8fTgY4BPqAODQQUDAK6COdb9H6BwNs5ulqhjHhiYtwYu8ZiHhdHXTAiJRsE2mkPHAXZXpqLlU5+OWk3acZ4K0EVtT2GcIRwELC8V18LfpqfsPGk2Jp0JiymrVewD8tmxnFUZYoVoJtr27VFa19++epnP31ydNbBRW2Uad+j4dnwbG//8rhDteofHX9+frH/8sCDPdUkLbiaqw3tWsHcb9Rt03c10d+bxjwPlTwpK8e5Q+okpP6u1Gu3tiOcXD7c3rIK2iTJsCiVPa/sFhtYQaPrG0Uw+a+DiiipUhYtCeYLi4ssnAVvJOiRddG2TIB54zMLFSVgSGAZX0HzDL5kk6dBDkMRgsRZEoKAPGjZw5GBTFjgwk6nFuh9k7jzFiWbzM60dx8+nM2uT9P9ztntbMvubfbqFQ5HgbxxUzMyKULIJh+qmBLn3ODKAsMDrIGTw67Z1IHL3kWyzmdmNxd2onEm/0KuiaxmGqAAWZI/MdjC68OOCoNJlySdqbfVbq9Qh8gpyRXiKEv1ln0ZCSQYF6q6uSDErmeyRDdJ0VO3zaVlNTl/8dnncIwEEjtKRSl5JE4hA+SIrzF1nJIAKFzCC2jlB++G/CgBKqBBiMwyLjOBMnjNpoSY8gEM8WI4Rqis2PgSbqcbjaVgrcQKWiYzcGLKsYQau824YixhWLqmX1ZfspDnrr02Bk6MmqrIU6EeQrvETgoUkVqYV14qhEomO3BkMMlneDBQ+zWIkjk0/yHd3GZGuE4Cl7wberi3hA29C3/nB0wzach/SQtRwCRxk4RSaElcGvGPz9PQuxNM8MYeuZYrDEYzB8e3L16OXp1cfvJ0YkdlqgAyj1DyAiJ63iYgGga4dEPvdBO0UTElgEtXAOj8VD2e/snR6M7m8uaafUOIK4VfL7mFGjV1ur0cmJIma4WbAWLMOFWwSifjN+ArxOe4/kVn9JNaiz3dDs/PC8kUbGQLRyWjrCo0XuUb6UZZ4sAwqmWn+q2tHeDSqMYjk4KsOdxf5BpSDB/BsMKubq4s9I2pElEUOBM9OG/K2GYeTH5scgKGnyEH+NIWzEyx5KLHlIse5OExKVEqM7jMHTqRb6EAeIBVaMnl8pYwTXLQbXmHwaTSTxhuPASBLDAn8s71R6dUk9lWH8oqIUaVNHBUq22TtTk3/eLzX9SM+N7WLPf/dRTP1kJzosRCv7++usSRPddqfP764P766vMnLxQC+a3f/vXt2uTT3mWDxqoatK2Fex28EVb1LG8sh4wDgAAkMZvG/BwvyvR1Dz3sbty9tzN/fNrpn9+urK29+8HbP/vok4VWk4hbWV2HsoNhh4x+9/0PP/7448vJKOXKOAyHF2srK/z4P/nTP3z88FHn6OVau7Xaato27/TqjFPYZuM3VvKpzDunU7XETW3iTVlh+UZJsZqYEU9SZecRHEEqBI0yG9cGS6AIOPsjMfhTxwhY/VV8M54qXJRkiHKdumuohShYUEFnZqbTm6yutIfjqe7gsqRwm+BxnFGpaR2yQsApmz0no1CgAbHP1OfqkvvsEX0xlJRvh6hxp3tpJ1mZljqmwyKG8GjY6wuw6zrvc6RjIduqqxVGKUAQRUV6CEUp616wCWgmXrGIiCzxbjZWTk7PfvKzn3z5xcFwzOpdXF9ePXl9sL9/stVeXW0sWp172Z9Z557qnME/iKA8d2N5jTtqeNL9yR/+0L591kJZUgzP+4Pu4nzLAi6yK1bUoGdLH9X9ksY81TjvcSXPI0jUAJJoPUpfKL7AlrLI+tDvwFKUE9ipjJAFEcdnSEeGr8qhb+9KOmlMLrvhFnhd+GtYk3JfkWRZLmKQnhqvLG+VzbNS95MsadQay1aJkcXJiZngAyo221dekJQvtL60sbrxVnPpDuZKwEk9vDwfXc3fvPfN37DH99ng0h5TuMpcrT01Ueu2L6QuYHQ5ykrHFHmZnR/ZBXt29uWr52zfO3fu0iDfuvcARlmVtbTcpoDCW7wBZ8Tk2821mrU2NJJaNjoRGQsznrEWQjLaQrMd2WH5dvQ50y2gOC9fisd0alTqnQh7yG08p3BY2yOB8Orm7W+8LeimCtRx96zRXvrzv/gRhTkIC5qcFLCqoC/BddO9ulR7nm83koJXNmzF/lS29Q6wpWbA9Ev2FnGDOyCLq0mrvjSz0EzKi4jY1DRYj4io6ES3F/Cd9SsBEa9XXGd8YTGbSiY08lrWa4dmksxB5wibo5fDxSArHoiwMKaEEPGq8Kg3ByRwRrBXqW4FQzzocTAMPsTQi1IYNhoGnF/0QrEOpBxGGdmUAUS19tXr8k7Iz3uuPynFEuWVoRBvOZFgcuepLXLyZ9SNG06mO+OpzuDm+HTyfK//7OXw9cnt0XAKf8TUPWlWYrGwrhKwKgiIsLw+ciTCJy90PSupi2SzOGdAYRXfmF5fETq9qS/eRqMLk85ibVMhaAEoNK+4XRPn1BJIaElphpznAM9if+g+sOgJjzNNAy7TxWAJ1r6yskYmmPB0Js1Hjybi+70hieJKGgk6RDZUToUKku4KrL76nLnpFWEU+wyMiSWuHvGhNJJmPK6Txa4rT8F+TWvKUdSS8Da3GVK5hqIzivyaQ8ZUpBsrPr2NjELgHE+yDLKQOXfkiACOcR5ZPvYdRokkAz1COJ9cDZVcCGeGMRAEL6ZJGDGL9HJJiodlU5xacZDGP0GF5EJn3E3fNrqDvnWXAgO9vfOHuxv3ttdvTl7UxRE9zbkuc6uu7EVPIkyMnLDNiPBo0/HBcghCNSzCtguNk/7B7/zW93/6yT9vLs19452H/VHv408/WloWTGnr87DfPTk5sn72nXfeefXqVfeMoy/Zb1Em9KNee/HqxcM7O7bEUidHWu1K26a/9Z59JSkWNMlYx+eJiNC+PAN7CSm2ElymLJvJcA5dLvqBOyBetCcXKMJxGKe8HEMyqJgdwkyIy6YB0DMvwQZBAzw6xdbMwsnJcEl52rpcVlQd7S7Vu/AmHfQgp51IpfoSZiY2howwd+GOtWF6a4vb1OXzSkl8sMTkQm1SwGfkG65ZqLd4IwqxBLkzgmSXlrHE2mD8GUaehh9COzaSF2sS7JxZXrq9e7c5Pp8768+cnh2oyvg//9v/qx98+N3D53sHT19tLG9S0L/88mlnNDjqdrdrq6zuk87rpUaNd2PvyceN5ulC7YH6s6124+jFKaLw0mQyzM6p8uplyRSzMAkK4VnJzySigpZREEI81GiZHVGeklmGxBJWiJu1QlrcEOYGGeE0dQNYhvoecjGYUEaF2JizzKiIGj5FXmzVBblXrAmz0A13ZtLrUV4S6gDr2YVThVumm9hqe2Xn7sNvT82tS4e8ul3s9KTlS4c8uzjvmduFxdUZCxFvLq08o3zPL9SNTNkRRGE/L+jAPEYP7GXxJSnv//pf/+vvf/f7v/FrvxluZS6xctOHRoJUSWvU56Jx3vKiWvQyn+Q+QlRkl+1Z46yMMukr9pUxJyUNaVvfZFL5UDORNzfCeC6CtuQzMGy02hjW85d7TAxL9Tnl50YZayoEwhq2UoOSq8QLnFmYGc1N27sAdySyPBYfNcyopY6ZtziXQ4om/Yh12t78qmF1ln/OBZcbi3UrxQ0MWAWScTSzM1+fUzbD8q7Z+SubYrLTFjm7amp93NT4jQ1o3mz0+CMNKJyorGSkXUBHmpc3EjAO/5bPcksBaTC5XI+PvDqCyF8LKWIsGA09zEkeNy8QDOBwheJoUlommFNWCaS6pyqR0GB2ytYSuRr+Y81BiNr+Khbx2HbzrH+1d3Iun+K4c3XWdUX8fMpWheRvRFMlRUj++K7CodNn/+efIhnDroOagEpLyWWxjjhOB0OJVtfNVut2+Wq2YV1dkt+mGLE4F+nlTphJ9pCgkCYnMtD69lMXs2wFj786oqgGDtTkCKRKzGA24PBy7wAT8yAo6wcBw1bG3+XckNtQp+oqWHmKKy+TXg5Pu1Kd68mbNj1SDsKMoZV0huLFyNAibYHQXIQI2WE4T/VXvK0gE71E1hSKM0sGBBgFLFpM7L3cbqDEUfzIpsWIqGqBWIFqeQq83UmSa8wLU9q6BMK811jSidyb1KEQNv08rSeCz/4Zc9Fll3GhuuvQjDngtRCHG07O19dX1Fzt9Y/aV91nTz+5OtmzMIctAqCwBBIBTdWT4jgz2ZVdTWcB9vj2a2rX3t7aLuqb33p7Z+dP6quP1V777//Nn9lEvNGqYz/8q9OtxsnR8fOXz/7OB99ihZ2Pz4K+N8hHBGSm2WjPTR8/+fLph+++//Of/nDz8V1BslZtffpExe3bwcXUgB8yS2FjOMPnrI80AZCbNppRR26F3yeCGOgGE78CcVwS7o6vSWdDebnPacALP8GtPBSEzhzL9AhDnlvkxN4/HM7xLc3JTJ5qiPbMTq22l4DEysqosjHIkyOmqo0ViUw6OZC+jEeKMjTUEpKfatW8jGgvy9pOHbH/24wSt3YDQepBw4oV64mO4Y0wJ7LUmCJIIXcuoedsFylNQKqLRMzFxoOHy3fv2Eisfj272mptjWkU/WuVf9Z2doQq+6cjrazevf/BnZ0Rp1FrfjgZ/Ms/+FeTq4mir1fDk1dPXy23z1aW3llb2VhuzFm8jcCXFUhZrB/a6Bl2sZwksyrO1DuiI1AAmQ7h72W8cB9d4B56VFA09QEKPOPToEkWCssexCA0GI7Bl8M90i2gRwWhcXPBm3UhM5OMsrKIAeYr/8kkOXocWPYttNYt6l8gE+fU9UydkdVe3tnafbS0cnd8UTs+6B0cj047F3t7h6MhrahjX0bblrWawnjy3lmSoK68rODVnFxzSp+8yih33OCcliJttzd37tzZkcC6taVLdDufVpdwrmCiZVUDmgl/ZTpdjm1kbDnAclY9LSzy2SngwhcOOAZUmHNiR1AvVijWQ6fktqEd2sst5bysMxsI5i215F0nzo1s7QyJxanL5am523rTvAc/ST9DV3WBuCIoFI5kT0QeBoysK+tRxudDJUBgOjYXgyO7bXHfR1x95+59w08063amLY+qhCKQLquQlSVYjfb47ZqycxZl9Vya5rnZyfwMyXQ+c8NCShoSPkdshJLCTjJlupi5zIU39JV/3hy5bHZLbC8THIjk15Ac4gKXwqTycOgmGugVUR/GhX2XaxC+yBPRS7FSQycDqA6LNr0UNHbggKYHg6NmWLIjIMu+7IyuDk+vTro3r44vD05yIpNCAW4rZjNPms+rQ2/pD/aIyAwincphKOXffEQQmq6ieJTbw6KTPjYYtu2QKT0mLrLCeRdmG6qf0a8Rh/hR6liXMCtuNzf3rZUVzRgjDLPTARqwZF/jXMYFjAECPvO1dImKnPcWwV264wb6uFWrgBru7shA3thSnNNuDiRdCQsLm4A0udlpBlWmjTIIssCdr/7cmWHG2MtnYu6++gImwfEEvwKWuCXijk9d7Vx2RCfiMyHe3gTkMhFf/RqkLW/UnwhjyjUuFkHmfel/UtutXIM95YAksUQZPI54Sr0/L5IYvWhRMM/h5a3t6SKzx2M8AM0YGS95s87JJQo26HRe/emff97ixpp0rEaMh4H+wjkcX2zdBmTxUJnk8BocGuvJKkvmneR8a77Z6PJFd+/d3X3w/qdfvuLiePzWu8dnp1Qyq0wtGW4utdhVx8f73/jGo+PDl4N+93x00ag3VauThXDv3oNPP/7oN773fUvNe/2zna1lQbntzbWa7cCHlmFeDlQHwnvkMDGbQBoBkZV6GZjrTTVHKLfgobxfWhrNvBzSdUHQ3aBqdtEcOQC6FCgjyjzmM25hUbVk7k3PnpwOlDzqdE+Go/jtGo2phqzWOKvtlT4l0Lm80li2JlWRPsvn1LGbz+6tjx+uXt40lbGfOb2sX4AuJRWbj2zkqrk4R8sspHgUdS3KaelsgFqmsdjhb6go3UoPCxOI8WHiedEwdzrCmE+M38mC4eHltII/jL39w5OP9g7nb5aWFCJurz3/8uXunW2pQRa9XY767bXF3/mtD/vjTn9w8tb89uDyaG660z39skkSz7T6nZPV5tTutnSo5bOXe+A0pj+Khs1OWckle8wrs9AHEOF3QkhRsdiWicrjM8aXemA3Y8t+U61vPr5BwkDMc0rkTIkM44i95VlDSsxAm4lXI1MMEF80VBm2CnaIuqrKiFNx9KCa7GjtQVOcObsxoNV6887C4nZvMP3s1eHB0Wj/aPD8+aHqGGg/Gz5M+LzGJO14o60M4JLwuM3dUdlcHdxU17Ttr75JiJw1d/U6wic2fv/3/wYXHMbC54BqGByFfUgopGRcy/1HbvR53N5go0rIbWVfWkUVrpBUxtBoDuSRDr9hHdFHCBtr52AaMzU6MdmiJHICW/Xm2aklecp41i8uT8m8uc0Hj/m4SDZkqaZKUxl/TDqN8TlGexdw4xRbW1/Z3NxUMpKHG8dfWVmx/cjq6jqfkqgHKBNfvMDClWhAvvbh51/+4he/6Hc7n/78L5U0VOZsLJHmYihIZfvh2anJ7tZyff4qURk2liDNHPdOvDE4NULTdYMurCmcKz6ViC18iVwuoy4jR5OyH+FDxUCxSdOKLmEHwMGBTH850GAMFyE2eiiPhqSuMPss7I17jfGuvkep9ZeCjHGYlc8Ux8y2b9Dl4lpQfbZvQrt2Vrj6xasvD7vjl/vXrw5uutLTddJqX1tR4RWUp0gOncKlQ4q6ZESFbRqICSy81nfYXLpMtWHJJ8YXnYU1MzUZWmCo9DO0Vv2jBBPk9SzOrKhqPHdt/ngoOZdxpJRQY9Q/3/M+6G24Gi48J07yrtWLlfz23io9wXgMRyqVLNRy+Opa6R6+FWSqgOYz9xY8qzqv9YARlHEQjEL3IyIsqqjcsC5pynMEBjCXg0cgU5IJ1TAdoMTzC1TyAw9eDhLXZ5Fg4fk5Dw5EoqbR8jW358URc2SRWQ6HDUSRv0EWroAfc9YxI0IBlr1nqyu99lxpJjMC2Fg1AzVyxsI3CgqS5YC4UZVACGRR4U8iUiMbWxsKiZwev7w4P550n591Xu2otnBJoqeHg/Pu1e2y7gZIEbpEYOZWx/1bAnX0hex0NTVTkzp83Jls7ezU6o3PPv+cfCWinrzo4ghWApW1XuO1tbUvvvz0937v906O3v7o5z8nHMyPHBUbDlFvoeRf/OjPP/zgvR//6A8f31+Tl68KkjhBs3Xt31NbTI+nbT9jgoz/OmvPixavX6VPwVD+ZB/VdCDxIqsKuDFIfpQ4UFhptLrYqTnJrnoVsppzvTGzVrUTPgv1Jch6fjE46Vp6ZUtAMyioOrXULpXUO7cNhtcMesd9Q7/y0vu929/6TQy19tnnX6iGXatZZDOzUpdPQklUi4Cqfru+Xl9uLQVZJCKV+Y6+GLlUoUryzcx02HuZcJ2ssAUuYANxcMUfa1GP7Svx9/rUBdu4ySNw+OrTJx8fX5/Xr8e1Yfdqc23n5Piw9Wxhhg/o+uzBWxvf+fZbJ91rWsP5ZWemuXY2OBZTnpsaqVknAiZ4sdpeXVtZOz/r3YzUtZQqJZeGPbOIPbA21MvDcTDlsJksQClWT+QX0AoATg2Ul2UrCQ2AuCoh+k1Dmyurg+WjCTqWrFioGdaeskuhYkg6p4QDA+gmS32VuWfMh2+luLBFV0orFVEnhYWkn1ra2H2vtXKn2598+Wzv5X5v76BzdETBVkBrGBqPB82UzPRFDPd7UiLv7G7wJ9bbqyZ0PDlMuXaeWH8pbnDNlWfCIaHYlW3bfJYQVzJzwgTiSbB6wwbX52IN8T6xdqRjgID4NbGNN/HjR0XVDOowaYVSsQA+QPnOic6E4WBc7HsBLQojijg9G9zfvsdieLn32kxbDcVYovbP8UfGoigrX0gppIdtkFWSZ9h9lsN4Bf/Svft37t25K/x47/0PgjzBYVjjJI5ui73nlOxkSA36gkmvXrz8w3/zb/7kT/7k9OjQ7LDWZJ4u1mab9iCps7w4FPGXiTFPLajPwvdHjTJHxYVhuXGInhkd9oYpmHvTFsdusBQP8f8bSYb3nMqqjL8J+0lqQMw+45+T77uKGF0hfiKWk60Z78jyxgqpl6ScBXtnEs02U87rag1eFzKGSmx2Uv7QgSBfHh7JUO92efwGx6fnByfnrw4Hx53b/miKlLJ6QZ76xbS6fPNkC3qmMHFPEoF4Yviq3oRRhjNkLGU8mbbqyL8xBQAzwUAzBi40HEXWjNYTszyL0dKiCqOJW8kdXuHmLLyemxAWfJuR1vzTySSVchTMSLtaZV2hASLMQIrgccJQo2dRL/gw0qUijSInI2YqmYVR63NEw5vD5a/77LxIj/APN10lqBvzp1zNI/DdD9hcxvZmpoJFTl13oxmsfspn4fsuQLS8DAlUwrDc4asOYli+OfMHRCwGQoxohgQSLHQW9Kq5zuRzvtGm5xZ5WAY2bLOU/2RkRf+pvPCANHfnPfHzs1AXRKbMFgixUYn/2szCZb8j7Y0mbQXO5tbq2VHvaP+JXIq33lqb6p3LorEyIwLeWo7L3u1UEz8WpmXvppfeEWsTW8c1+T1cbtu8e3LFW3txeDJeXdn+9MlzxHn3zv0Xz1+1mm3ENRwNzkfndEnv5fr4wQ++//DRg71XL7E6lSQRrZq7FhHf2d367NNffPubDzc2V169evHu2/fGo64dTJZn2TGNZmN8dDy+6UzECYkd1gkdiENKbyAOEJmLcEf8PVOX2XGEtkJWV0jEjLFsIuTZSHG9ZrmmmzGScBrWcJFjFgyzffhu1f2Zra1MzR7zTsktoiVwWksqrisdG2t6hjIqks9pQX/GH3kgavX11lLj6vpzXqPB4Ipe8LI7VrgR2FgMm5tSse5w80ASyKh3BZHTz0QjqYDOgiaVq9k39+iNKc3mg74oU2xJLswGA5mh+s5E2X/1s7XlB//oH/2Hk0Hjpz989rMfPz87Zg+MXxw8uTkYPn57a2qm1+mM5+e2m4vj88mpcrKeqiVMPzMZnIz7M+oan53cni2fLqVa0u2o7Hl6XqoHkir0NWGshEehZcWewAoCgHZKnjAZ40wfKOAv6EU1EoYiD9J7OCy1kFeZcUXAsTOt5xUZjVsUe0zvlcGklV4itGxnwgfgsebCAjYq3iSEhLHJS5EBODPTri3KJnncGd68PuwfHI6fvTjZP+gMLWufr6sVg/DZadWWBTqsZu7sCPvau7Ozvr59v9VsnpxdnvROZODpDN6vOjE3OQ4WDLy4wEW14JyIgjSZBiuQMBe6fk3B6IaSjCtrq0yaxO1S2hSxErxJAjentENEl5mM6Uk+iYDRd2TSp2ANysWqMDDlrFgL5nRzg+9xToFdrPtU6UwLpSi1r14+Qacy6GlMhFYiUbg6/66cAtqSbJ6pm3U6z0pt7XoVHp+9fuUxh37G8+o1N7J6LvssqMHg4PXh2fGJBWUvX7w4t2XronrHOg+9NYsyOFIk8lprds2oKmVzMejrrDkPntkExTxjfAx7rhZZ9bT3yCKgkVxSMCFypxI/Rmh27z9ecYNzMlmiiZ8dsUOjaptxe/n4Mcp0ROzM7WjAjc03mXU2lltYUaC6bPpvFbsDYkVp4MO5ssNNTJAUmrzujS56A3sHsNxnTnpTZ6Ppk+7tkMYko49ihf6NUDUywbfJuaGaG7w/OBvxGhs/GqbDPCCuchiU2fNkobpIBKovoeWrVVe6wUG+MMlGzrKSQI9cZQme9SY2nBNAzA0AQzZnRVwUE5/RdgIt78mfn9B0Yg00HbwqMlvOVyi/11PxXbgCl9GEPiXJUL+M12OxlvxTGgFJ/1W/asRBJvh0czhFBET+d6W4/tKI84iR6ogBVNqLf5dQ5PYHBI/nznS/8BsZhTkpA/KbGzJ/8dl5d75WV4A5s5m38TRwJEUzd545Ti6JIgZTdvwWypULZcXhZGbSmMws0ilnO+WFWfssB5QvBeHxs3upBsgtFWnOhfqzcwnfYDyfjpOj/avzjgzZh49W//N/8Fvzk6PXX3xm9ZFyP4fHT9fW66yEaI3Z4pLCFXU49kup8BImMsV7s8itcnJ2ftrl373tjU4//+zZ/Nxi3Feztzy6WQfNEZ0oBbYy2zkb/OQnP33vG+88ePDw+PAojEvxOoWrZ68fPrhz9PrLj3/2sw8/ePjpxz+emXmwaBeMK+R9FVlrZztK+NzsWfeC4werCS2RvtF6QBxwzYJ59ekoMwiyhX+QAdEDQDrIE31BvkRcprGSCTN3ZT4SssFik2TtxmRUzdeXag056ZQkuF+fv+GIPlKcRZ701E121qA304E0bci1xfHa+i5jQfqqOu8kmSlq1Gcb9UWjgGzFqSg9VeZLP/uueZ7R4b2lf3AhgsnkRXa5qF3dxv7gR3yYwTgjjVdDEDJs1CKk5tLmdGvmfHz4lx+ftev3f+V3v/urv/eb+3vd509ftRoLraX5x483b2+7p91n46vjk97Tq5vu6kZ9cN6hbtp9BKtoLCwZd2OeUX3Zne5yWWB0xITmySHFDRTdcwNhGu204CpkFMmhs4BL9kHjBrSJ1O3VkHXDeCoObsZHckXEQibUyuQ0ctMhfVY+8PKXwCtmOd9fItApXUhvzfiRhU3oFWbmaTOlmL9w8IXazrXVZut+bzDz0Scvnr3YO+0NOx1rsEjBudOOkF5R6mPqgBFlD0lwJMrtHOD9d+5Nb2yu1prLt9M9lrLcCymESlmEk0aLMVIFiOZUblNvLoZT8AaL0E9RSTsiNi5H85f9641WWwQr/ARhl5sy92arBJ0J5ZLgHfyTJIhj2zjs0aNHK8vL2t979owPHL4595MUp0EpfaYFRdFYV2Z7brUuGVRjdqOIA4ssERbGQ8YFlcOCbrOs7PBQ60oY1PZffBY7ZXqOnSdZw1qTgYIlPcOom0Vwev36tUUUhmP6mJKjPkc/5qU5uH65MHdrqaOvKUOQ1KRUYZTzhyMLtIlcb7RbZoGAtebO4aTiR15tkPCA78a1SiBHAhXNJUwn3iS5RnGeGICYJAF+bcdqO8M5bHLHjM7gKDqxgoySneZBXkNSXdpxkVbwIePD9PmQpOUzH0eXt4Pz2d7oxr5Ip4ObswFxlT9L9S5NOISwIrRgHgbBfjQWPdEoREZB0CVQjBj45eFC9SWGzBtQGx+lN/NAyOHsuAbcijMwznBdslpzZmhghC4CiZGU8LiBFEEVQmE1Fggkbhcq15jVslCU88IKFT4u98ZhQXy/6VSMr9yGgZG4MCLRoxzIUS+1Xh1lW5o359GSImYiBbPsFjrmF9/CrnPQb6mVEYQucYXgHm72CPvHsyjga1gkPhkuk0X7kU/FQkJkVS9cceS8+ACK+qE2EmU8wkrydFhDuYGuhQbGZM6sgI6COjBzbEFbfyzmGpe610Ryx3IIDSn6sths6BKhVV9dnZpbPjruU5zdKBuaX662OPXy9edLTYHfmc3F9t/92z+YGuzd/PpjiwrEJX7+0z+5vDq9uOwl18Ky/3OLh7IvM8kTRVnFyOkao/38coEDeXQ+c9adLNQ2Pv7RT7MN5Mxsrz9WYMBKf+tLxqPLZnOJDY1KhHpevTz44N0P3nv//b/80Y8Yf5KMoLPY+KOHuy+frCkI9M437tAgX+4dfvDufRQo4Vn9DAJ3S5cpnvPTkoBOu3253fHP8CPFbWH48T2lwnIBVyYmEqgoVXJ3J2OBbItVrBywGH02yfH5KdkDJjXyyjTH+PVVmJ1eQFAYLOWgDsH5HkVWJrM3Qy8CaUMpvLtgKKDLuZTfyLQSV1VSgOEl6E1ceSlFlKZBsLWXG0urLUlkgojqR8T4CFIFU+AGGRVEIRmgnu6UDCmdyVLOCIssGbRsjDni/TQXz+rr1WWfB06ZJUnsF+enz/b+slbbkv3y7rfXJHyOhp1fPP+hfPVZhdKuelR0PZlcDxbrwie4xPnleO71K1s/U5clyF3Zzbmlfmytdnd3h2/QRFpsGQ31/NLqlri0SJ7SXWyNc4apO7q+UkBBXWSy6NxUxbxAKKFIWjwnqqRuq8azjAy9pD6DIaSSnttgEy7CCcWrj6RlzadYCASLGykAARvwGV/YrY1ToV2rb786Pb+aaQzPpw8PeywDG5Nyx8Dw/nDgAWCxjp9BYuN5E2HdG82NBnxyOtzc8LMlny0rSvWB1OFtxn4o4ufjMUasCJyRYTvil5FixfYOC4qSditCBBoNuZW2QyPGw36gTchYpxFprHz8OCscWYRhXx++9/6De/e/+eH7vV7v9ctXpwcHsbyveMuuZNIieBt9lZKJEgvElCk4KPzyZeBLi8OYTH2+mG4QZbJEHxconb89P9v/ZNzj6BWXygoD3B5aQ2xQtdAdTI0+jia5EipzCCy4FFhQK8aoV5yF7qeuYbO2uNZetLckoUUwra0pCaXeClnIvF1UOm/VsHHt8D+sG32kL/6SQOKzIISdVYhJMEIZp4cHrgOQn/yIkEh8jJ4HL7oYCy7WUpKLS1JBqMdgM9ZcCaB5TkwkPhXkdqfVdX4RPeKTvVRubqyegzXv/fGUbfPORrf+uuMphUwhZvbIo/MlU0ZzJYHd/qhFNdcwmvIqdkOojpocms9BX3AWth2OwOmNkcvpL1tL6HP6BPfTuGCACctimlnl7KflaMNyXiezH2gjzMiB+J4usqyIVUT2hDN5WUYe+s57KxEB6VF9ehArC5FLdg6vdwC4S+EJVGpSsogWAKx+9YkwAl8aYW6OLPFq2niiJNGCM7ZKe9ds+BWjsbw6PK3gVfoQYyypKxrJKIsl6I2wWfUyv2vBr/whfNFwDyLwIhfvEEHsxsJcM3fpBi06i25VyhRD5gd1XF33lHuzC8XljZWLSkicjeTFxAiWsZmUV4RIksgAsGWTPRUXUgucmnU86M/VV5GhTBPJF2vbmz4Jw/aS4kJdMZVf/7UPTo+frVvTMnn9i08//eKLLyYXPe7Gd959AGyM8M3Nhz/6y0++ePq61V6ttyxLnSYjpSbY4Anv6HRQ+Nonv3glUwGuIL9Wc6UvPWOUCnsLi0vsMLTtukGsrsx/+fTlw/u7f+fv/d1/9k//CepaVOZk0l9dqX/zw3dOjg4//ejj3/jNH/z4x3/xwfuP5OOdj3s7W5vPnh9ubdxbWZ69OB+2W00KoWJqvTFipFM1qGn0Hi5/K/9prnRVU6l8d+zt2Zr1yDwA0p1azcVB71RAHjs7PNynMnJ84I22ihqN+qpGqPeD+6AScyxDcnN75+X+wakCObM1VoIbCG0MY6m+sG498+WA4K5Qjljibmo153tnvdV27eZquGgXvXa85osNBQVn7Pd4996GeIx9PZhWprigEbRCwObdtzBzUw3b4rXIujC6D3wNXqmvRysLg4+ildUEHgy6WxHF2rpQQgqjbODtk3FXRenOYHq+S76p/y0fV21+ufVZrWyTJWTMJSWpEiVxLF9edqz9M2IiXM4hL9Ta48f379757re/XdxWNzjEyctXWNInP/mZ8vm8StxDwlSUaikHI8YxRLWYaHpqdDtFDUfbmQm/CnoVipMHYcgUeLkQqAwjQrEIwYAqSvEi17H1xsKMtWMQOaw7BaB5thRYml5fv3v//re64/lfPPly/6S3f3SKd+Fy/EO2JNLInd0dtlHxlqX20ebyDil+fGxFU+e0e75/1Fnf7Ddby5tbWDhvJ6ejXMFIVhOP1wzs+iVEzXd5aSF8VuVh8fEjJLmBLn1rzxcaHr7hBpyAyoKf4IxRqrP2PynrrsT4KOlUq6tbf//v//3PfvHJn//5n1Jf/vRP/i1+0OucUQXODs+2d1fNrTZFyz75/Klsi7DqQG3yMlwpfC0oULES50K4zovA0LW58/7x1ZhUtvMJBMk+wUX+U1OTNYu1thahEyAT/AJCK1A2xurMzDLim7f+a0EuvnUa7aWaZW3yA9fX7edN4TMxhIt9NJJErB+DowMOW+LHQVUxSZVHbjwcVvqU60QXqkZyDCn7UZItjlx/o8LDU1yJjx1HcxnTj5xLgxhZbwAuRQLyZpafYxPkfvCgTmCh+gFlvIJddTm7aKkp66p/fi1YZY1nH+/T2XkmCZXQagiAQ02oXUNGUERswJnDuyqE81kdLlaiyk+kFing9elx4B9YOFzOjIT2YmSJ+3OfCH9P6L+yfznF4yal1eQ5LzBuWylFCBRJAlARFuVgqlemTN5eZIjbvvrRw3nol0e0bdiXSBKW4EhnIpf0Obq1CxGCsVGi9OodFTg+o+rI7y6XiIfngg/5AY54ON8y9uK4I6AcURSjKSIAzmefDjjjujvLRF/PYGD0TQwypR5Qcz79pCpMNd2VOuJSxclmrPpUEkxZPRLrUvkAc5q++sM7kq7F3OQ4IhisgS9amzI4qmrYdNGqZNpXojhzVmLA9is6Fmv1YmLVsJzzKcvlnxx9dtp5fnlxIt1JOdxU3Ob4mp578uzg6fMTm5WJJdkDrz8S7i+2rTUGQxtELfV6152eJZhT0FaA+PzcYCCk4YKrKg8ADxFS0vfVq4NvfOMbUHFze+vBgwenR3tcvdJvxch2dza2d9qvX/b2Xx+++86HP/vpz3/nNx/OTg9AZ2u9dXq6t9hYvrfT3Ds8/f53HnWGl09endzwo0Q/ozEy+CKvMSBuC2K7FPS6sYGREfrp7t1dic7AvL21dXpyvBDv6LUVlMEICypbWaqq0/Qvj0fgJaH5enN7rb22JhbMuBz0VHg9U9ZqdbmxTE2XEiRMAxeTXm/VNDWB9LtmrdbskWTFkkLgdUu0CQal3EW2VC2iU5lK7lEIE6aHtKmT0Co0QTkL1fgsqBX6KJheIWhIEWFgY5XxHmFF2CZLP3qcTYow7t70jMHax0SUjdGZWCbjlfBTykSqBkSC3SR9QUIqHc8k1uE+6pIqdjedcW//vPP6YO/Fq5fwENeRCG1RDtEUaSBnf3GBxUFrZlrBSZKOrhnrjBKGFnBsfHymxjAtaTFRLTEnRBxVnDWU6ncoOzSmTEfInFPFF747vRFV8XwCNlLAMBuJIpx5ywv1tcH49vVh91gd5jgVzo+OTryMS81eXHTup198Sb3IWmxuvfkZ22A6GDHtZaWHrWhi6Vn0ojhL02RR48Y23WE3REH8JQ0DQWgpcsFPYXZ+9LNJwWClhAaZ0ZojOjNGGk+C2FsS8fJTwhZEMiVzZ3v7j//tH/z0xz853N+jAXCEEslsbPu5mVwZfNR0hSUlzSqQhaE0Gk2MY+4f/f3vQQVMS7vYWbgFhcfshQfFzU2++8wd0WcxnjrcJVahoRHSQJO1MjVtb2nDi/eci0aydZhPwkaNZtsAMrbky9GYNMwoYKP2I6UgvaXSyrKJvdAthd4uRxCrOrjuqhMgNky4Cy4Bhf/p9ZGGKhGwMiy8DxvLfEc3CZM9O+0EiJAhOJOnPQcQ5KMDLB1FSBe4Uzh4612mDxS8DL+/ram/YymVeuryVkeKUV6m+h80j/nEDVic5WYB1ZW2vMPUgVb66ZaiIf4Vt1cRQ8CYfpQDOcVuCQWCS5QZfYcA/ilUlFbdG5RFUtRyThU6ZbCXG9Nbo3zlViyzJIJn9jKVsZycaB/OEFhlpNU7febtsYRy5rx6Z5iDl/GUIpWImWBkrpTbQlEG5Si4mMa9K84LrnhzWyFIQZKQP0wAHHqUZB10GmxAY8FmSFM961PLaaQILexPo0KF7Asug/CBokj3R4cmFATMbiRVDlPKRMjqEHiQMIpRUpQtHZy3zoN1nRRhyVv+iAY+b6o2AJYs+RpCItpKZXya0FWrtZJtYedqI6UvRK2nblpLWHMtwftqHZag2uX55prUsJvzwfH+q8+7nUNrctF8AVjgg3dQ2CnvcjU0aTkKx83l9YLl5DDDdihzszyNr5npdgpkfAvPWD6jGyguwCobbsFNzIEEpeSq7k0PuL1e/fZ3v/Mv/tmXgth2FBRWW2o37u3uHOz1Xr58+fCtt58//wRX2t5o26extdbmMZGxpRjH/PSwPje8qk2tt4G5dmYJqoA7dFB/IDVN55XXXVlVy+BmfWMNj/AuriGumGdPPu91z95+/JCVkG6cnlKxcWvlD0MXwTXyI95Ppm9qVM1Pv/MNqcV1aR3CER//9MfXqsTWbrc3llasCrq5JLpSd9XMTk+3ltXq410cEVdLEtGyFPhipsWyDh7Ze2cRa7Fww3SJ3ERD9xzSCAyLwAr2+YvMrZA12AmGMbdREOlnKtLHiMj4AxAUazJUEK7LW8gg4JDzo8kTIC746/7cGV2GPOakgG/wF/tDaGwNkfjR+dSS3IHrW+m6Mr8biyt0mFdnp8L1uJboFXsHlyEnxeZVYZEVsNQSXCh8KisLrDFWEC8hEQ5rkSHGqN7OXNlmhD2SonyL2d8Rv4wtm/koeqGOhdxAXKdE+cu29xhsolay1G4k74GXur07C431s8Hk8+cvJFDs7b+2d7NMcql3fLyCo/3B4HD/KIyyNIhzEFEwY3V1bXl1c2CDbG7XBFOF0NT8FRumQYJpoBjw+owrOeZKAabxmJHCYrCoWApGQBVITTJQi2ummhrV+xbkNCoDiC5dzYOYBEzqdo5/9MNju2aRBQZntb6cQ3IWPHEMqfN6PYSUIxVm+pqVix7r6tvvr2WOOaZjWetFWvMPlqDT4ciIOG5BiA7uC1dTtqFVSIMb1fUkDxpV1O/MekICJTaB3VbygWkjOImlgD8jIdxIhSluNsptzksdPCKFHYxJTa7HFhVV4goMIp2LVaQzMBL7oBNZEEO6AQ+owNDzIWd1pJoLBHzEUgHVogR3bC4SCOQosBX7FYJP/mPGqatxPYV7GnsWVOclxoG9xzLL40lhT4EvsWuCin1t3XdEYOa2QCuwMwmGjsXnkyDQRk7LUc40lqac513eFprTM91TBT+/5icNhU7ynTgKMF2CXv5So8FLYQg7znhMUFTIELPDhRKMcWNwKK/OuyL7OMBCx5X4y9jSbG4pL8gI84Ceplf52TQwGoJXGs59vpRuSJx3j/NqXM5hFfqDiXwEOU/oCLFBDIrpjLije4A3NPjVkUFHxkSRhJcO+qiJdoWnAjq7Tm657i1eHf8YURSolPSKjFSgw/hnSBf3m1mBvdLLvEOnqLFBBjlHTBK6RdYsy6w1WZy2ENiqd0If32DNAMwUG0ud9elF6yuZSnap71o8aVkV8puaOreeHQDXEfVya3rq9NmzL06OD64vh+JmpqtYdKoNhZnbGP073/vthcXN3kjhq9vTjsXkBjB71u8I6572VT0+6g2uOt1xa2nFO6tJJ/NwaAeSLsAxaQH7s6cvWt96l/X18N799fXNyWL36OAwQazFmiXGIs8v9g6fPnu2vbvz5MmLu9vftHyJM+DOzgZ/0d6rvS0O97mL+bb1KevrF3NPXnUswzNm+FH2OZySQ4877Gxv2v19d3vr+Ysn9gGBNid2zIUGVtVc325uboMPHhI8s3KAuacAv5HzSAtjSF5dvDnvD1dX1utLLZEfxd8mk/5ijQ06tbZca9yeY8lqqgWbo47frq2p2MUvce4G2+Dwa5lZkn2uHkqQCG37JIOPNCpTmml9g5wMCTiQCQ/+0eyC6EVXCy2GAoIa5tNtiCFYGmrSEFlrXLnil7zPXSGbhHWz5o/R6gfYP2siIX1adykVN+F9VEnsUo/QIMuJj/rBe99YX1sShWHo+//D9z8YdM4IN7mD0H7/8PXgcjJbm/30sy/QEg6nIenZdltWOoJipSfpPNooh9EAMktibtla6ZBxWWVhG/HoMm4BDz1CGfgnQSaXKDgjV1Ak+nax17torm40WpvWq54Ohi9e73P/Hp4cY4/WHaEtK/mO9g9CaHKBxC3nLXoNL5IuITIkRNPYsRxeQXlevmLdQ+yplHMT2w1h6QCIFVh6ynczgswARM+LRpCTROzAq/AE9xq4jKjAFqUX96YGZbG5QT5U9gienxkPQPVaoWTFUbF/uCkr0Bgji5PWfztUisYa6v7IowjEkGGAAHpfD0x5oJeZY1boH1jQy2knTuRcYkBUY7OdFOpiTGQasBnOgWJkeCrAjfDVEgdXIoOxY8UhxfvEscTS/Si6RVxxPjQWa77iW4ie8A5ypGzO7N7TVzgMMzkj5KsB12I2XWT1igM+xXzWOJ4MYnK4QE1ut58iWmN0FusQbvvdyIKLOleYr+LB8cgGpGksExDOCDPYhORQMgMBW5ev7bV23rugT9WGF6yra5sV6UJVsrbC/mh4BcEJ2LQRCvIygwyRVTQE03ypPkpfyhf6ontDwvoRjNBOEQrBZKStu1AdkRcWoXnaVuzqiMnYFOUvwzKqEKjn81p4nTEhx5wEj1yv0lyCZ/nmM/0srrYy7vQk8AkFhVMiiaKyBB0qpMjdcQ7HmHXkesHLyKekP6QPUQrLwaCCLO6hnwVDysIA7hRR69hESBcoIYW5/erABx1eARlhkF6pOuarEzNMmJRu+KReFNeSCYQGgmaBZYCTKZaxTb5KXqI3Wsp9LnZ105cDeiWpR6YHvXBBAD5RTZ2Pehck8ThCBu+lRoN9qheNxgIObpe+4nzm5wx8Hz24L8hj98GXT59MBmO2kbABvIwdL5YodjW5uXv/3vad7cvb5potydqbXz47/PFPvzxRSnK61htevXp51LGO90KEpvU7v/M7v/jic8KmzJexhDWY8IIAisFbTTxqNGuKxA8H48Ojs2+8/f6P/uwP7RR/dHi6urzy4P4je7l2+r1nL55+77vv984XDk9G93Y29zt7rVqdC4r5c2d7WQE23kg2d+2qfnQ6VCDRslLhLNkoLBluwOw60Vh8++3H9BrF6R8+uFfNwsbGhiQU9mK9uby2xpDaV49qTl1J4EJswMfbZmYkJysiMblGcBZOeZbiBUT2t1GzdKk+W7No52aGuCIygEhDS20rB03nJGV+LL+l1lhCEGMjmOoFDO+gGpgHDWCR1/g0wwlVQlN8Amup8NPXwi8xj1SmD00HwZM7Ht9BMDpkAE9o5Z6O9HGPN4QTYJ2h2KzTDTGTBlgc11LZ1xweJKHRUjCJCII0Yx4ywReqX31t+d6776yut4fnQ/ZGY6n1G7/267q4srHVU/vv8ODPf/Rn3fPh/Yf3Vt76ktuoc3D0+sXe8+cvjW9G0Pl6EtjFe5GRRV02PHsnlSN5CBg3hUpGBvUb2vIO61jGFSuXKMHt8WHwwcNlmMsFu/Po7kxtqTe8OO50pTofnJ4aNN8D/U+e9v6rPZwT2VLTA7gi4uGbAY/kBl93WosrgritZp2aWFWsUPAey9VFWgHeyjNJDACbKSZFQqOgSVsJzRboClWUCQr15jfDo6OYkXxLKnzyzMUWk60eysssRujiA1Y+kVOoSD0JK8Xlalo8UGsshTdE57wWXeM/JOoNx0SZoSQCFM4UB1hOTaMmyfHCC8LeyMH4Wbn8M5MUkawbwD30ikptF2pp6DVbsoI2RnTOHWFP9OS7JjHJylx1is99UlmT4YGfXE4GXauGqy31MLW4F1Jf5NoKamuqoxK5CGBOItGsMUoZfEZSxFXkU46cUpPCZV0L+Pyqs0HWyndVHINBzK+PJImVA7MA9YrFu7BUqlRpJkXfSrhrAl9YVCTtNR0/OamJL7s1in5eYXbyGQL2md+i/5E7OhHa8X+6lql2G6Zu3gq7TyM5MhaDAPwohEYX0eMzDUATl4yJEPV2yoJPfjr7E7AmyxPGCx4QHliK/ZrhwMeS2hCUyhvtQludaMzM6WJAUfCv3BLz3oU3gij2fPzBDGdWEQGU61H6pHpb2JGj3JqYU5APCKRjG0VmMKvV2M6xlIVWyRmdTx5R1rpDPufRMfjHXM9k6V7GTcEJ/Co4xOKF8HmjK7koQ824AhMCOVvbRaADnXsoK0Vlwc/ydGzb667UySuFsiZDSwtTh9esRcRzQtJXIW5mqhiO6cZVUh6m6qIPS4zu/mjI2yZ1SqKwmhKcNfB1Mh6srayr6Xfef3mw95olaYZ4woSP7TYlsCLrW8dOz/r1pbWXr1+d39SW1m+/fGFLofHtTP3yevz81d7rV0dwCUDX1jbe/eDds96ZekvEqHEGDZG2bpG2t9PD8UBuKWQ+OjlTUctLPvzgu/z3f/wHf/zxx58KmHMGcow8fPzgrNsB1eX2xpOnr+/tPtjc3LpOIfGL5WWrHtSLmU+tjuurRTlbkfNBqcvxuX1FcT0AMLR33/uG6Rv0kzunIpRZMuNCGi5KMTg769IQHr71+NWLp3YdzEL64mQ2b6nRxsWbrcizSqVdX1IwFluhu3CVL6k1MK+m7exirCsoqtCSQhXKXticQcIQb+eibVdoypjvdCNJhchLZAzWZcrDYoADXprVBPcybyEjHiB4EErJb0W59pMz8w7tg9fgJTIUeoA18AqXD3fwW9AomENwOQlRVmdBrRAcemPBkJgsS/MSu0A8GCL3+2cojE6tXBNaUvD9ttUit8nrZnv1k/1DXH7ndvrg+Pj569e/OD55ebTXB/0l6WR2prmZP2VGEDmW9En/g6oo2xt1ovB2517vh6juOeSH6raTGAYVFWcs8sgJeL7EdDHfr1KWcX5+ubm0NblZ2Ds42Ts8IERJGtOHDXdOz44PjsRY4gRjCNMKkyihcK0lG9wMsjCM7tzeKJKuyWouK0meZpx5E1MffMKfoEqhyXAyvUF+xYVDdKUp4HBj+J5FPHprFNTXcIkCavOlrAloV+QZ36x5pcnmzisK8Hg0ULGX0iIeRI4lHfHiygJErMJ7ONKOjo60KRmPcuhh9q82TUe4XgLR6RM9CbMKEXKLSQvGGoIv+UlNz0aZ9fBWNwfydK7rqeOXX0b8TBQiHF6OsYsRRolxifwF9GGypSJukCjmdmIPkUYsNNkN0aBYRxB40NVaNbwwQy9YuI2GJlHYndCwKOnh4zAZ0hXUNKI09ctUi9RwrBcKDfqbJzClUIcO7IhTEBYZadxggzAcPZ1+XACyLQyo1AP2wkTSZVsgJu+lPLo1lBQeHamV6QvHhV9hNZZYJkpcQOV9Wk333JW5D6hyBPWqL6E7W1qKsHs20+D1UaTc4glIkhcWNgZtiy81fUseTv70o3D8PKAhT5avhhbaDJEWpJm1kjh3ZJILfWSWAw4ZON7knK/OzUYkjd1hyp3Hg16spVwqwpQKBsqOYi9G1SKegEUstMyt1AY6iWzRiCacS5Q+jCBaeCV4zLmOYHqLYULpnQlB6YgW/CSaxqtHHtMVzFX1UnJQZMXoMtuZx8LOADu2a0JQKM6sm3dYISkU4iR4UPa4sjiEUcUoi1PC1L+ht0wXnIh6hElHd5kZDs7Pzl/OLNZOu2dWyQ5nrkepARfJHMqXPjEzqxRM/+kT1cRsRptFTaWYLMAkH9n0WCQ+UQKu/6O/+Fn/Yq69fnjauxyMONCmRuMurVtNLHR094HFmCu2LeGtVE0F2FOyCkqmU7YoTQawvSpd7/TOmD69Lq9jtvh5cP/tz9Y+fvni+ItPv6RPrK+vjs77tZZeothmZ3QiWvHWo+1hIiFsAP25Xpi9Lts/1W9mG9PXhzzpK+1Wo9XaOxvJtzfvv/qr31ejFg5cXsx/8MEHZttyh3hiyg7OOzu7P/7xjx8+fLB7d7XXP1FgNFIBt4UgReRfjtVguZhXenQwojMzOEWLx8MRXrq8rP6uFIpajbAsCQGqh0l9t1RI1hxllIkpti4VjldQ4qfKWDwsRm0uCgEJLSmaEBEThTnsHOqYDZqkrERvgDMhriA+5CpEo5IFuqRJI1bPR9RFiy+FsbCRKH9hcdW9WoNs3ogSMqxQNvT0FlYfVCMxRCnkf3EJjDrdY2hC5Jtq/kCrIJVJH8/OCJrb12pkvdP8/PHe/sHrvaPT00m9fnJ59bLXv7O5zQ86y4Jttu3OlU0vhP+KxxdiIUfkCqPxupBseIXBOAn2Z8ihmxCkODk+xx5AJXASOSRsY8WObJ3R7dbGQxJLtbWXe/uvDo4gP2kzHqjEcUJcJZZJ7zRfQIEcDTYQMXANYOlgG2fv+aqlPvPtpcUIS9uD3sgwslLRGvMiRj2cPoXB4oNhQEnGJQfPsQxTjxGQkVm5aFR4V2YxOmG46tQUPk8gUSEjA/wUdpONneVWSB7jkZaGmnoKU1fS8ZCmu2CgnuAjyo9hJrL0WPzSLoBqLlwOWpTJ944KBXzamAcRUjNlFMXlEw0n1t3o+BCpa46tpi354sy0CCaoHJYevqm9wB3C3d7YyvzaFjXpLIsPFgFCENPeCD61aVSZMtnYvtwo5MITp/3oF2GNxGQ1nXEQYTThxq64X5t+YV06Lb6dMFl9LXgpr79kWmFkjLdw8gK/GfVTxulZDFvQT+Pw1Cc8wNMwc4gt2lFY3s3oerqnIgIXlqjVZTY+E31nu5rpZDYUMy6oZ144TONB0MESlgADUxbhlhf73xvhHyQFFPdHl/ZWR5F35ZZQTgUcl2PZRJYUQWKIOJoHGeZJYfIu8ivNIr/yhmv5Rl5SXXRnpF/RUOst6Ug5wn3xmlCphrOyTZvOIbR3QRGfuc+riGM/RCRnIh3UsIOjU0hZhJF/k5lpRZs1P3RQ3QoEymdQG49xLXmfWvKftkIylTGK0WdaTQoLOE4RRAM2+ERNb0K21AAkicpiixUfYvCeuBL8jZwLm8krUCxlIn5FVjnnCWzkTQldmNAoDnRl4+AzIRlSkdMKHz0yJHxZy0qvRbzeXKsOfNQfWUgxGPR2uMJkXjSXb+rns1NjgbDzzrixKMlvdHZyWrMlyAw3nVzdYtpbKUgkJleO5G2u7zy4+2By3BfmxAtu5TxfDEavDw/AisW21Fr9O7//N8T2To7P3nv7redPPx/OJUcxVpoZ0c1UMbCcIW4MpWAllNiUZGd7A5vHaB4+/sbHP/2LT794XmvUf/O3fsA481BM3pub9vr2J18+XbaD5Ox8FKyrS8XRrIkMOczMDy4vlB/kt3jw+NHdxx88Pzz7b/7lv9xYW/3e978t5R0w1tfv7x+8UIzKLh8Wopkn1tvSUvtnP/vIplx2tVCxVD465tG3dxSBOpuNISAMuounysI1w7upHV4dGjZtdjkxMwWWrGRkCoR5cZ9mz0611yZ9WR+LdVnBSdDKbjwaUDofryhzHAx+cxR6MZWmOmiNUwTFTH/IBp4U4yhUGP3MHcijSK5cj88ityUwlRuj+DrC7CKxQp1hOsmHjxlf/nxol0iKAkJwJS0rd8EtzEyeELtbwWsFkIQ5GcGqsrGxGu3G1samJQFKe48vJ4tL9e6ot7t9xzTQ5pIWlN7Eq4+320goTBr0oipD+xxeq2tIwLxXh54h9ABmSuHakHkoOru8J+UXgeB6Uj3tLvJw/f7NbXN8MTrrjM9sTZN8pCjX9vvt2ZHSEEimsvBAK2Cg4XDuLJxKKIgyhC1TyAb92amtLY/iNCjFNk8hv9ApVRCf5SnjZs4yLvu/p/IO/9hkFHYbv2K4ghwN/Bd4jQXjgFdlGpP15P31bI4eVIE/Vza9kaYwGY9KRVncTGl5C86BHktRzIPO6o2WKo3ssCmdcqElfDs90wWcORuze0cgEp3FCUBKNICNUv7w737svSInqR3ex670CcrBK7M4c92q8WZbQ3B7fs5RaSlWXI3or4o/eTFmF/EWd59PvCJPI1QzBQo68ZVVpEUsLp0xU9Gagke5livB5+hC+YJSMTRB/QIX86zdwksHUEvCRdHQNfLmdkgKRRw+M0KVq/knEIyUkbgzYlJYdwk1LS8dnFtkoEqBdLCLa65ANqndF8eXFu4ksSbTrG8MIZ+BAZxmIegfcGOELG8okQl2IW90FJeaWwqrLMEW5zqUPiEcGGiSEX4EXJyOGTUvVCSvFdax467YgBL2KWlyfiV5JMIEiH4ySaUldorpqyRQERAREt7NK+0zX4pe80YseT7SKx2j8OR18chGgks34v28oE5La6ny58zclErJQvGmLDRexhVgYgNyKqppgtteVAFaR0xXNVcF+LDDw6YchnkdTlfJQk8gxMQwfHpNwSrX5BuaVaYHqdmkQHg6RizXzKVqIyEPWejcjJwbDOLYJrkj3CiJItWfV0UB0kdLM3FP3j7+1GhduCxGdXE1r9bqk/2nJz3Dri1e3K40VlYaG9ubd5vLjeur7vLS7NVo/+6vrE9fPyc2Wwv2FpizpJcqxQbiMjw42l/Z2Mx2OTMttT+vj0fKYH/x8klJMqz3BsNnz78QPdCxWn3uP//f/mcr7fX+WX/9QVOxzeHZ2X/5f/0/b965O19v8EAILEGYLHC/kSKGXK97vYFdiR8/3BmOZ5oi/I/e3T842tvfe2ty+//4Z/98eUXO/aTTGWxv3VdI9vnrl4fd4Z3dpc74fGd9iy9PhSDCp2sBmv3gb686g8sH7zxqb65Ptxb+5s2visN7oWouBwd7ZN7VrVIp2VCD9UNiLa+t/uhHP/rWd7+1tbvz8SdHyoM26q1n/c/ZU8giKgOKv025v9dHe1t3tjltvEvGnPVadzYWdjZXFhaz19gSAE6GggUSSSyqPe91pmYmu+uN26s+zibvQubzzezo/HJkgq1qkvOSCCS+pkBe2ZbXTLIoYDWLHeHaTCWenBBLcBu6JX4YnU1uffhSSfnO7+XHQl2oI6jB5ApyOiAqBEatMNkiQFcK2ZRqEy5iEey+Kyk2tVkFyoVUzqea0hPOryxn+uYH7+1srDC+Tk72rC/Z2lr+re+9j8NxF0+fK1tyZnnNB7t315aWrTAWizpbnBzd3rZWlkkIuCkVfdY+FUSiZLOyhzzkRVML8/AtY2GmUm70qD7fon7ZNQyfm7oek6kIxtqsEnWKBt7tn69uvt9aetAZzXz6yV6nx5+hkmwKaB0dHJ2cnOEalnYVmEQ1DFO39KKYnfgaJ634K5K6vhjLBd3eWJWEwDqhFCJ3C57om/qMHhncGLgpxrf1xtpG0F5aXr64HGAA+Lz9gF89/7IxN9Ve2oTDDBKSg/a4mLUXqsLKj2XYjEpWnbyLKjKk5Bhf4Gx7vaF2kDWO47Gdz+wHvcTDyfaJPJ6dO+6oODx6593HmHOcJZZ8kYrhPpFVjqK4R2LxTDeiBMCV+L1RfeQKecsgCdciIGi2GkiJYHazJa12wPPibAELtfwSZYNn5rKKnYA2bly1Tz1ATVHt43aiVtrlII3iZ1ntZMQkQdE78t4iqJQqkRORlXSgGaEXaReBFJav6cL1qR785BaMWWmsFBXNijgCbkpNyaXG0e1ZZr8rz8rXlkdo3qNo06fkTHJ+miD6tvi8Iskj6RV0Cu4EOkcihygjSj1Qeh8WGWnopHgodcVCX0PERsOa9awQRwwa7LhoFiGbPJIOA0dUPIgZ2iFEw9UrD4FbUBNnHVsKpLRJYkmzV0fWZBC0FL2SpofkU8ujBEijJrLNDdMBEY06bj5tFveCV4BW+SzulaJbROlDU3blw7f48lJx6vZ8gMG9UfyQT/pWFAj/xk0SifCmr7qrqxwnEZqmrDCQ4E65IaaxYeRbJE1keZkvrCHDffNTbMZ8TYwtNmsEY8CSV0A4X4XDiCVxYJhk3lOMz88zs/Q7XgU18rVLz9AOsNNlK/B6ZeCPB+Z16V3C6lEVfQEqY6Ff0H8nL18/t83ERvve9tpGu7liW9h+HxIPVAyanhrM357WWqPawoRTBkqPJ1ctFdE5rWO9hz5khC9IWb6elUdxMuQPm0UNdJjxaHRyvN89ObCV++NHj1ZbyzZUv5bOO2bCDdHxb/3qD/6f/+JfZM/1GyrFytlojLtlTbT54faZncXELe94vf+q9fixhczbOzvbO/esNnv2fO+996xSXVFv/Cc/+7HtFtDQ5p17r45Oak2B6Fm1qIej8cJC7axzIq7UqC8LJystmLJGM5dbm2uraz9Atxjb6trSRx8f/+KTj7/zne+gbpjz4Ycfbm/tyjw8O+tY8rWsrHq93escU6bfee/9j3/202H/bHRx3mqoPjU3vBgpVghxLHa2W4T0DeKH8r3cUht7rAZcTEbaLnirzGBCkrIs12CS9LaY8GaNIzXh+vhTzVMhmhBJas0kB4ptgA/ErQCxIoREk0LyFXUhroJKHpUtHUs9jYSmPBkh5ojWWAwtL/JTLpWjoFnwolwkf6vLkV9ME2TFzyq5YtTHbpJwzh5V4P/ifPzq2VObimwut/26XFv45Mc/wkM21zZPjo9vz0fZpW513Y6ap8dnl1OzDGgvItv1y5itAyS5RXevJlLJIbQyPkV3J8zeOLp5TVA9ZYC+5qEkF8QGLGPRVVRmPHzgdsVtNDenLEIfTogutVrxY0g56PST5MwNLYcnDDWUF5BlnGFWAV6upQnjx0nx8DgMMEOusZKyW+AvNU9Wv73WFHGgQs3yMODQke12ebBn5bXKnJI5TR5t77Ku6IPQZXL8TCaPrTrm+nA1tBkwgJ4PmaTheUIOsi4sx/PaK9CYsv4wq+NkgsS/1VycXZTBS+mXI0yamHZTwROIn5Pf3KERVAUb/OM8ssQnaZNsQLoIPsVeiJzwoY6AnNWYk/4nqCK0IrduuPUrjRh8suzRM95OWtRamTAcMAImcauwpLCXiCKtoBOtVUextQqriqu0PJUHg0maxTahOPlGMiMtjJq0l0PlV63FeihOLOdGUGFfAOz/HDFAYawtX8xBbLBYfRFXCVWl03plG287EIDg9PnVtBgI9nJ1W8PYzDOujbLwV/fhUz68yKt81VCYDClYYkJpqxx+9m/lWy8eyb9GMPwY+TXNVccbiqm+AF0MNjCNX40Asr2NlS0qXi+oFCwMEbmEYcdfG0slIjHwyVHJDHDGwr2jmi9RJRRidVuhkxvpZ6QR4POdBRCxbILOpKE3xvkIhnm6TAfwal13itwtginYQg6FU3iuADCDKZNlhlXMy0ByK54Q6SwICxbzC630KysEzEysuvyXN80TJogm6VHU7AhSuph1b5XQDBroJBFLTPDXJrPPxMWVg/bSAW06L12OLPJeN7pezUIGEjEclwg4wfC8FepOJo8evfXeW9+qLywpDySDSXVz7jcdkxxE3VSKcn6BA5nFHeesLtB/qFU8w5C7OxitbCvbc31iL43ucHzVkkgGi07Pzl6/fNLr7Nvj473Hd7/77e92T/fb9fb0zfjdt++/fPX6zr1H77791qdPX3J/2BlGch0mw6uRCkeK8GVXORbFULXo+7u7VeFLa4cPD15/+eUTZuLm1vfurG/fuf/o6PisPlVf3dh8/fKLL78Y1edt3b7UPRmR48reWoV70rfh8PnW+uLFuNNstScGENfrjO11d3e3/87f+tvffO8Dpompenj/EQbROevBt3ffed8uR2311ROhyT7kD++9/eSzpxLkLs8lHEtlnJUaNV+b63S7z148k+nfG3QgDoaysly/vOjSCMTdzBIOk1zCUAmYlRKvXgBfCRSrJ2eEeEyxeeI9TvA4XFXoXmI7Pg9nrJYKioblJjgsmz8EHke2a9VRMC3fc2gAqQZLYHRYmedyOT9ElDkPuuYvvsVyUj3pAs2JrczcV0a1pejpUH0PS+Uupyy8tVPfpx//4vXe3tJSI6rVZbTDh2/dJfstxu11+jCXlkkdEW5Bhqgj6wr2LSPoRmkW8VLiJzpiorvGn03XAyVpLwTDIvkirSo+q6IXw2a8mngwkFBkaNBXfJwhomxVs728DtNOTjt22aDQEFaAk1VitjIqA8ow8RDnFSWUkYNKWFWhF0BC13x8yYRCj7kfS0wP6EpI0CiSr5lgfdwwUQ8CdCqIAh8KfukcvjoU11sgYW+pLv1YUWaThzHbYihkPACr9ET4ji/LaGlkrGwcP244pQ/5n6X5M+aydLvWXGR/WQ5j2ZUtkGz3IffH45gYAgfDiH6zBF/MHH80zCKeDCRuvKiuBCHmgnxou/gbD17qShQpFW9mZHqRLeEmXH0pwUekRZUFGT46WIOxxEcn8OOeEjGanBfWU3iQ8eOx4boM3yKW2BD1Oh4dKwFoQMs654KKZdbCmqGkybvt9rvRQygtoQbvNqhAh3ZW4WWEoyEkTmWMbnpjB4RtgX7+sv+F8FuZXHzKkFMzEEDioZqwJOOKRENR4EHH7GdaI10qrNBufoFPRccPUZTDDY7yWKwlLVYz58RR/eqkMNYgVHWl+kz7+Sl7m0lbkxm82phvLtyscMEsLa602+R1rBL+j8wAcsYPrEKFPeKTMXF9mi3JrZpyA8LQJPhnFlS+rIWaHYWpaygyHhwKxbqMtKtPk5Mjsi0ahknNBU8ZNR+/pXd+CrSMSCvl0JTGXMyDZVw+HfqQPSDRW9F4inO/0O7NVNd6JcxIy5muvNGJt1jqq1twIxvohGioOKEooIk0010qfEgub8Bo/Fb6p4GgtJlAnRlW9PDIUh0kdzStfbT3+P69999/797OvaP90/Gw29rcXF1Zup3mFaatC2MkKwEFglngU5tThV3Bl2SrenyhftZPDXK1u48FD3pZNytlv3PasVZ/1D9s1W5XWxTswV/88X9/7879e7sPP/rJT344uvzWd7734umnO8q99/r7Z0PCmVKpUCyosXUl0uhbXqrI3enJ0dHB3d071u0qd3337t29V08/+cXrX/2175wcd5R1PzzpAUNDUL/efPnqyQfv3KNmyVY3QrUJ+FJevzgwzF/7lQ+311pj/rjh2frW7h//4R/983/23/xn/5v/9Jvf/hbVBx/hljdSomVVnYPpuZ3NLSR+dHDwvuqFb3/DlOzubH/8k49eWs9hByABcNvvzNboFn2684snH/7KB8PLHjea4jX1xgJqTOBMhlDCrqKt9Ncw4jLDFbKEhqCEdymuKHJNxzVfZdbMkfoB57gBXyHfR3hOZjpuzWJtFKyiIQZhw8dNq+BIJZ0K5mi24JAv7ggG0DNS3Legu/Yhch4M0uYWyFMJLm1ZnrHA1zLTSKx6HJ4iDBm8WVpqMY06pwfngySk2MgUk/rZn77EHVWrYtNwZxGumKwm19Y3hBkMno3CcEr2Y9w0Uc+jiCigATOT0Gi/C4RJUOkdh2dC5+AQuYVweGwgtS0rqMlx4CdMYw8EbsLN9fVms90dTA4PjwhFOigmCGjEA0EQ4i1KZVoKUMPbM9K/fiBl4wCfyKrqYFpg/dMzrBmUxfeGcfnF0KR7yrdSfJXmWHIddcx8itnJr7EW8eyci+G8n9U3UbUvaB+qRQvZaE9vQm1qHUSUeJ10mxWvma+1BfW8J1UTFWBgdNTqinxycPFUnJ4NrTLUk04vtcpAbM5eJfqH35TP6Dn0lri8Yn1kHIiBDCCqcCwKuwofjCyskPOokljEGFkQ3yxueJET6/ZxxnBDfTcFYIVjFKCBuZirrm9strzSxBSQJXsykknO/5vlpQF2ZHvppc8qo7EYYbHGwn9p5dAYfM0ESijz4bM6IgO+EhtBalY+O1+XMn/+N31YPZWcghU0sdjTeQij/CEFCEJJw+ojOMrLwj+NJc/m0JhPbUWFwzPzxpCkswQh83/6X2EIbSI3k13EYYFN1UL1a/qVpkNCyMuEkDQazDIZGGODudYcLV95Fx51ipks7b4oSqxbKwTMRvS1mEpZs0ZicWrqcqFUnfZnMsL0k8Kge7A+VwKPCBbT80acmMNorxlaehKRE32U2JZuE/mpTyCeqc7AUadoemAV2oumFgABcRkKBSJdiuM18jLHzbVgCiFTuuprOhn9z5ww6gBHy7qWN4Vs9KPWtm86wZQFWIkWziR1q+pt0dZSMCbGAR08oVPLJ/BEACwZP29gr2sa1LVMUrQc1pXOFqS7s7V9NZ48//KL0xPa8fXVWnvYPxlPzhpNKN6dv1HeVK7NgIuooDueC9Ftm2kmReAWeIxPuhenvanhmN+YEXGlasz+/tPTk4OVdn13Z+v3f+c3v/H2488+/mR9bb02dyn28eUXL5RNurge7ey0x5c7J/0vR/TKLMysmx48NTDOBtxR64zoyZMnG2vrfhgvzHPQHR68Oh+draytHx6/vt9sraxuntnc4jY7DtfqLUbfYDTZXFq5mliuerG8tLS7dbWzcfLwztru2uLzg2673py5nmytLn34weMHu5tffPKz999//2Kowun28fHp0fEJjPv5x59ube1gDi27QSzWV9or2DM2c2fn3hefPSmFjerYhny7USor2jvt/NPPPjk8O4RjK6tNSfL+xIblXlNf6VHCezxDcg04/lN0zVTbTjN0A7l4RqMcQjjMp+g2XO+GLn5urint8hJMFt3U9ClBxt2UmwuWUfyDeiEaydchnSA0LNGgyYfkMd6DDXCYqUezhx1Bb5/B1K/+KlXGVUipkcKwptXden38pWLWVlMcnnTYzRZOJYYg10TL0gV5590K9xdrIVjYObewspSMnvH45JKRo/g1HkjxuRgaHR1SH+g+5tZe6h7k0F9dt9cadFJVj24EOJQ6yI9lgZigbEwlLjAWaUyra95vEqC5uXlfKcjTs+6h3YKV9VNn5XJEbiGWMJaALCRZGFYZb8i5ImzXdPvNEcWPKiGOacmJpQiyL8KHmQqccl6aqjRhJ8Lnrs8v0hiA1UbRmImIHjEwGZxNXQj3X12IwF0Mw/aUJwmt6f9cXJ7mIPY5m5VXzL4ZTcmnKiKxWfkxJAcAG7nVtPHhoi2qha688UpaoHHARqOLrra6ZjT2dT6sWI9PQDLQULZqAkKFZJjkFnKXaDBkX7MuROFOyBREyP0mIU9CzbhZWLzCXbqFu5UcBjmLqLpMf1ou7qO4idxpC8vwkAiEyrsYC+CGMVuuYLthwyDlV8Ki0ZCYHrhXn28gXjHcTIQX40Ck4ZvJKLeVNztjBuivMFXKby+FITIdtZafHMkfpy9nD8YL+SCpmcr144q/MuuRMCzkmHPF5RDWFwrJEWJBUGBumOR8QRS/ahcGl/ZN7438AOdV/xGKG5ynlZxEBLAtnbuH/KxEQvkpDmhBwZ4ejW5HCxhE9jB14M4ysSOvC7sPFZEfxVqtCBFAtJAlF45cSu8iGyIVdKm8intBx6l/YJFptEEhJ8wbILvZAJILzolaOeUSDQqcyS1nPNnyr0uzgQY3AGrnWEgeESyNlGeKl4vEVgSV4Gz6WVCoCG3vCjRSqrp0IBd1Rc8D+ZmZviKtAVvUQAxLpyGEhIqmShOr6zQvd1IACXW0evh67+yUixwro6IgV6MuzI0GRrRiI6anHGkLUc7VUEd/sD873UoR5rm5bvfMTgOzsxPR/dub3vTCRECQqsjMNlbaohRtJgOd5PpWDLlGB359MOhPasNLeYa3/YHd8I7OTo6ZZQ/vPnr78e7m6uLg5OX9nRW1IJSEmN6oPX74g15fxHp2bWNl5/6uxTp2UDsfj9or9dGkz7vCEiCgC8JFqTg4sPTzdHGn0esPHty5s2uzh+nNXteG0RZWTbfbyydnp+wM1t+Dh9/o2EmSo3Shga2bCpSLrSoX8Mf/5r/befp0ZnH1bHA7mlzbGPdv/tb3NpYXnn2+v1L/4CdK9n7yk7/7d/8D/hwLcX79+9/c2tz5oz/6o3lFqNvLs7cTS0rxr29+88Mf/vCH3U4fSx33BXZh4FB+uen7Y4VKr1NUVJI9yo3/j9enePNKwumttIqoavH4UTtsgAB/cDSJ58Ex46TzVPRh/iE1PhVnYrnhRolBSQJ2IJR7Q1mrZjdmc1AEpngEyQZnoinB5nBqhxZRahDLb7ScTH3UrJCC3lVSL9gHT1zJAwpReFdraTHla+ZuDzuvH3xj8fGD79dmV1k6rAeOr5oMDGWxjgd2I8vyVdCwMcVAmrQCutT7YHitpWCKJkhp4iqt4w+krYnlA5Q/aVfjRdssz02tbwoPKQU7xoTS/8IE9KV0meKSIq3xBtvXY1o+jmQJtpz1e/fGF7NHJ92j0zMvRX16IuaIm4aOvRGwjaw6Ypv69hWIK4oq8AmMQMLAw92Jh1Qj96m2IYsNBy5MLNRNZxzPTOJOYfNOWfcxXbdYeXw7Gfavz4fzS3KZ1F7MpOcR/adBcvjQuBkhSpjMNZKjO8eUbygdHHevIFajRWfFgLHm3mDEWBQVQ5ekDHFlPZhqTGCLdb969VKTdu+VN4imC9eggRDzYRu2epTyVuUZZ1iGUu6Z2Sx7+QBCQZNsbIf1GCtByhEtbmz0FCOGD+MMLvJOZf68MGmdyNBpwlf23cL1wqWLCQMcUb4LiNMfsq3MHM+p/JbAvHTLLHw1AwUD9SxeruhM+GDU7jc/0+izljsviInG8Yd60rXhoKt9zQeBPFHuJ3Rsl+Q+ktbbZCksWJzCVWrt/bRspPPgfSR5mpPE5LWV0lEuY4LhpeHtJpVfubDakFJBmsoB2Gi03JwRFkGRByvCqZArI/wrB4GSI1gic6YTR7AKw7fWj1rDp1aNbsfczW42ScHwLn0wPkPQDyMjTjJqRczii81JyCV2c4i0IFNcIwnrmz7zXf5iuMQcDqk4IpaqeUZoNjNyF8ke3hL72zhQRqcrzIfIg6O83cw8U0xeRSCVNjWmP8HfQsY8eqYwQ484zSD1VJc8iTf5GsC4A6RpBzIb2y0YbPUGgIOGxgueTG2srklvFTPw3ig0go4z53brZjyZCA/mvQFFjjQJTaomYQtkoOuZtZScEQpdlo8AB/hSdIp6mAoet9jW9eKsTRu44rnmLV0wvVFgxjY5U2nYNiVXCzMLreOOZaUwv3bWG8czc3rGEthYWn/70cM72+3OkX2Vat2YT5e1hYSIhr0D6yKUJVW5qezLPbV3PKrXZXufW13opbybiCZ4a1k0qE6unj9/vrq8bhd5STHq3p6eHH7yiy/4i5ZWjuW18+/3iaDLqe31ze7JCYWG/n41EWBIYtj6yvL3v/fgo0+f7z3/XFLxSed6Y2v7Zy8+5X397ne/+dmXX/RPnmNMdkjaWp7//MvnFiP/B//hf7zZtk6oO3/bvLv9EGyfPz829t3Nx1b4Ep/n/egEoWNTfD1Vb013sm3SVKsxvbTStpscWgrnz8oqe7zMi4MsLjSo8HGIoFQTYHgsJ19iWoFqVEYUHwJ33OL7aDl+MbWDilxTuXjGKvSV1nJs8twcXoVFqQiL/uUs61L0xsI7CuVF3eIzDz8pRyEKfMONUIn+4Xr4WuEDOsONXFheKkpOjSYXVtLuPlj98Fd++9d/8B+0m7vWbUzfTGDCLFjDuKFdnvsnh0csSFqOihLKNdmng50iMHxCqitJ4DaaZcasB5nNVanwFm7XamqNC3YwbNQObi7NPX/xmXZKl3RQF7kHfIQH6K00AoYOY0O1poWa9XPr84vLr456x0KmvdQtNP6sLMmWu8H8wpRBNwPPIHMaKvB/mDtyKIeJcD9qw6OSl0EHVWa2LC9AUzqMr+mz2n0whEYY5yAnD9y/OVfVtyYrBd+zvzySSuCRCq8gH+UuYGXNcjCvWRkiwjnfYKFcXdiWRbmZhIyX2m2TeHlT82pYIVT2+uiUD/atBw+Bpd8b83VLNxUDYnltbW292tvXzzlrLIykmk79q859Ym0uwqsMCRTfjHamP7BzR9hoQTMyKfYXdmE3hMgnK05wJcUgaP5oSypFehPoQDEmISdVWQ9Fe2LVBqf9V0EzsCnWUd5evc4/Ya05gK/8G6PASZkG/UcWOp3uFfWIrVch3K3YtXsIlrSUt0eWeJmEuwjk4AN+kDG6K1IIMjHNazaTVzwyZSmtsJ1i+veJPT7o0CdQ4HFYPJ2Q0Zwu6UwcYO7Gs0MEGCtuZcpjO5f1TJETScFQO9IioaxYchTDMYCJmzQo9QaTMpByaMrLKsTyIDAm1G9RgjlXiTKbjdPF8qzBpCPMh0xfceRqLAl74gZG/6Z4UvnqrkCbIgLaJTtEu+leAWUIPfjJBqMJV8IMrZtuQ79KxLuELKWJcRyUVU631lBknRMwJC6kK2nM/1QWn0DzBsL5MfJZdZlKHJYpKz9SwPw6a3O/bCQETnHuMZuajSXXkvCaSIFNNywTSpoPwLrLti5uMwvGH+5wcjLs9cXrMp+cECFRrQYI6Vb4k75FvycT/RoyzbRNraxv15Vcu5o5ODiCIHe3d1vN2bOjJ42lLOGuGbx9cKRIAX6mV9hv7vpCBIvdWh9dyHVZH07G8/W20vEDbsDDzszN1fry2u726t3dXfXQxnb2LDOuzwaAU5HsDMTOce/pq+PL28bkoutbs7l6eT2AO+AGcVgS8zfzbIlri1Fnpl682nv78Tu19Zoq4A/u3T05OeoPzrs9y1cv3/3wPfuVvHz53D4HVq3UGsvUfHGfmpoK1+oFpKznxtrKe+9Obe7eO+kM1Z6npNJsyWY0e2+DaB5Yhf/WvfZ/+0//yWJ9nq/q3/7r//ov/qRh0p9/8eM//YN/eufOvaOTnpbfe++7Zyd7lhKYSfmBlssgfN3DqSxAVgTfysNabVENm4VmCkJARySQsAS7Q4yk5CIzUsMGIXHoBbeggcOg6OGmrExccJj/UA6KTTTnZ1u1xtpCfdVyAHRDR5ONaiYIORgF7/1jsjWCBNNK2nChCsZcxcorrAE1uZ7ZJwfCTskQkYC8NewtwiEeDqMRrJik8NIid/x3f/U7S+1vLG/xsk6kCMjxmgLd/lARcTXna6tzW4+a0RmlvRkRZYcj6OoqQkuh0cmFnTgGuV2dU8X4zyFwr9P1EhTN1emNE7lvc5d8qpa2VxdjOjoks6CpQj/5CvvksN/GupqxbWRj/epmvmOFF5FIJuKq2VoljE0yg5GgcvRmvMAPNAGXmfJzSKGQYiAWGCEgF8gqlOWuArrkNRC6HFEIkdHGRcx1sb6uIop9dySFSg1QlAQmYdnx+FETLb+dLPAN4oR0OFUBa4p4IR9hJxUo5+fUjRd8znqH1jIDa7HbU25dVsoZKVH4Ft2HH4cIUC+lLumi0x3azYPo4mBttZesGMlwEkXOUeYyynfOjcRvGWRhwS5lvFx2FCoR4RIgiVkTuypoY5i0ofA4tZuhXlnzS2iFkyYJAjJEeSJckktCTS4CzyvCA7wsb/KuYFiRDelEkLiwHLhVbolvOkjmCJ/R47i4GcnpbuGWRZ8PW8zjUZXMMSXaUfJBw3anFlv1YnPB6ogU4NAtA1H+MXtdX023p+at++9I0+KHmr5cuJiZt5sBldW6ZAze8EqXBRX0xlpun+Ya0lAY8Vavt2qSi0ZYkvTVmQw7PbEqImUUfDFMwITersZdkC7nCDIRepmLjDL9TpTWQDIOA5LSXVOuwC4XAQZOHQOiuDjCTYOPrhQXP2UjYgMQyZjK1AF0N0FPn+7UXAFOwgl5ZabS6/EDM8VU48RjeSM0C04lm0o5iSuv2ExBe08rIWDo2WQ9r3J4e2Yz3fClyAMNh/zKFOUGigCnbkSb0eFkcTvA+ZzT2cLc3oh5yz6SPHTZOzvV4fTZ43mucCN8xxZlfet1YriTZOJPQcXkDkm7gLbpghfrT9ip/MUUYSLgaWASlpLPqoTd2ur68vrm4al0uL4JXVluW5mLpJR7WJjpCTXMxJN2sSTBfG7m1fHpYq01vNT+Im/C5ah2cbPI7J1f3L6cqj9/fvBy70R+j3y899794Fe/9yENtNs7mZ9WSOnGfo/mnIOcdzvDn5ndO3jdWmrPN1Zv5hcPO4NXr48W6ivN1hpbCrXQc6Qix9u1MF1LNGH+6dOnRHi72VJe4x1lVYd9q75e7x+ubW0+ff5ETGtsK0GLnZR9upl88tnLO5tL7EJ+G0TCzLp3565g2/qyPaeanB5IVnbC1Ez7W+/u8CLARmqXMAN7wFSH1Y4n9lxVD7vX6x4e2njCHkgn/+q/fzFIYXgFKhoLSfqzAZTdm66wGF6pxfrc3/ztX19e26zNns3MivbxmUm5tjWQNPqlmMUh6GBssIhvNmauyREHTPYzqBDP5Q5CVkgj3Nc6pNr80srqW/OLaypa2beV6LcWBfcUVcvN9sXunlm9yy1nGY3tlc15WZWXVTVqzKMVpRPwWWtiPUUttSbVYoKt9bVzhRUU28A/MCn0wLmiZoUQpE1SlC/lcp6+ffvthxvb74hF2OrLdChvAJ8XVm0bpqSKmBaLvAuhuFxhStwNRAUdPQvUmCdOkKwlRxa8ZhWjgCS7kcorUnKhupO8wws7pyzANzPOVUBIiJhAUst2hJgxN+k2drgnq6xwBJbF+qrFf+9/81f2DnqHJ90vnz1Xv5JG9fr1S4VXvN30oSG6EeAU+BQ2rqKT1BAErKUE6XP4NXpwEknqoX+JMQm7XDelg5Kj511uhuSmsz45CWNIWMbbX9lcscfvLF8GzokfUn3ma72JMgUtuzPWsrtjvb2xIVWp25s02+16Y8VKPsuiTCeUPr+YscTajjxlvZD9ShTz42mX1XIxGPeai01jh3t0R4gK7eHJg0f3JD9aWYVCo2TpXPhVOarzX47zr4krY2FZJIZE+GAdkR0kY6VJx5GIi2VLsUijZKdicFTySBuX/PmZYmPY4cyOaL7lpHzNq5MoHs7kx2LqvZFYrvHbAJwTQM6jRWPyhHRj7/VMZFIYn5PcWTEmTKoc6CJ6FZPDovSko9MQvK0E291rXGoZTFuuIByQvNUrxM8+pbhY1+JT39OtvD3vjlwMgocR0k4yYn9GA8t5Ay+YAhNWM6sfxGngGdl8rLGsJLC/AJ2qHJFHrBhwjKMNFGLpFADoW45Cz9E8uVFIeq5hwaLIiiKFA38d8J/OVepqxH++pDxx+KKxCgB4IBPsIxKpCA9IZqaSqGkZAtUKJfGqTd92RoMYxMXDhmIrz62ueaE50XheqYuEbvpSrDVNlxH4ITOZT1gHVl4cOKfDBXT0+rQQeZYjmGBVC2+NpbwxWBxBAIDUz3xiYzZ1jgr8NarkZi9crDV8OjQPsAQbyo8VWrVcyc/09atD3/0Xc1/5dVsrWJ6X6UfY61ttS60O915auTAzt7rUml5rLFz1xuLFdYqjcatyYjuO+jKt+WZqcXQxz/fGCrqerl/PNvoTutrlR58+BfbFxaVuv/MHf/RnFjO1FmeXGwtb61Y+zeNEKubJ2UU3Qkr+58Efnl8QKjvzy7VfPJcHC8FV59u5exc6VWGWggEW/FtDNe/V+/v783fu6rx8v7WNTb4ntQtevNwbDS+ePnn59luPOH6b1o3wVV5c/OKLl0I96ohCQFqBILdiHYwqq0It4gw5RDWIPwN2RqTReOemWjb8mMa8+F3tL5x1aUxY8qTXP+8Nbw6PRwoVnnSnVFkrylQNBWEg2cuQd/2ahXo2+cbW6upyEoQvZ61fjvp9vWDjodjgnolfr/zrJCvgEyECk6DqV5jvxByap+juM4uX13X7vw/ZNrhwvS0s3j0/X2ytLtRrFP+lNrv0sRVma1sPoJidw/F9YqOIAXsC3A76nZXN6DSORjv0hnetb6u81Z+tXTLI4pu44kFBVzQ+CTOki8VWBP9skydqzpQl4g5v4SABS5YGx4VBtGrDkav+7PSQuLpUCYeE8m7ucJWFBopX3gxI0YH4lo0kFFDxu6INzB0YS5DLCpjErAw7yd46SQIKO8C+yoI1y4HQD8YRxUCBLiGsRYk/q1vbl1fTJ2f9A9lB+FTMhviu2FgsNgKnAmDoq0CyOtFEvldkGO4aw6u8L44WXx1OUESM+9upBw/vYVzFL8CPPXGLfbPUTx+fd1Nsl4pRqpYTlosNu8HAdIk+q+BiuyVluTjJl5bna5b7lVCF1YV24EhCpBij/SQvNDivZJS6iAgQIpweHpNGu1u7ZLqK7EwrQNveuQMcz1++8NPS8qrOVMmjb8ZVJIS5qLQgQjgGZiF/3DgQ4cWGiE4IIr9iJy4jLuoSSGBDcJJdCjVJqZIOzrqEPdGo8nuiPlhVWGoBZtiWR3KO73zFeiqWWPpU1OkCdtgWjToWfWFYWQQQTxDrHGNMzClV7kwtmJd8j/KZL+VSwYbgAtkjGRbL1qHgR0I+ETgHx2fD0dXZ8Lw7uuxMbjvjm471d9zQZQ1WusQog01l8svUhn/TO0qSmZmekXGOlTTXBSTmRQgdrWbc07qr8xJ4IAIF2bioDIaCzpPVmdFrL/9XIKhOddWFaryxVMgA/SWqisgHSWAPgWHW6UIS9mLdRjToS2J1+S0CNXdlEoO7qg6K6pkyVDmMV6pUvo8Vk4upTUSjlGOQcGM0xcxJcaLMcgGnuQgRDQa//QojMvls4uBCbo+IypEX50haOQmUSx4sPnXQzph1J/rWFSfjG1FUHtD/kK9bqs+UyAxg8oAX+Syvnu6MztBMdWjcT4BM0+cVybO5vXQod6QBN2CLGrBAEOODHQzG9vLS9tamvaEPDxfX19d272za6JYdq2+NRQVZ6yrRnY8T2Bxf37ZXdp7vPX3dtWFn+yYpmytTN4ukl0032FU20yYBzYW1Vwdnp69enhimNYFZFhgFwbK5KdsX27eX5JLi2VhaOknY4dXy2s7q2la3BxVSWiUYFsOW+RfVl4p1TZu6mh1dDJ88e2KtMF1HMG+pvfLg0aMvvvhFlTFr3c3Dh49spiXv2JoVzPpqupYPMkRK441NBeyWZLX8zFIWd2Z1FL6JfgytgiGuQWoVeuF+QUcLCucsKrkjJUx8fJPGvNhevnzyYu/kNJtMsvItRlb3YXlp+fHjh3fu7o5GvctJ58sv95e+uSWJldbOPc33uzAjkW7RVkbwFZlCakRvNgw0tGy+Zc7Bk8wTmUG/cYMYajgDjOdGOr/gaIWcarBOTjv7criYsLe3XQq4eUHAh4eXA0veWo29V4cbG2tKWF3cXiw11i2ybi7ei/hkQt7cIEkFFGyg5MF6HaNXpwoYpkp9iaEUgqXa7fn+0yuVKK4ofNPrq/fri2vTtw0lxUWrg5XoguSykIc7/1xgajA1N76eGdAvLGuVigHRLCZOXZ0ZMV3UZh3MmM6VbB20AdPtMM0wS3EFAot2BbOFRaJARBRGVpkRm2eaujQa6YiWeQLlwts+dLq2vfuQDXba7R4eHQVTtBLzGDUrZ7iIxZamKjIMGfhalkZgMxXt6IPLxU/LfAmPwUd1rZr9MFCwgiCsbmuYgGVtpY2LSk9tr7Q6464MCCqwLYCBUo1Yfg3hKHz2Zob7cMWGUqIq87JH7L7DljyX4J8Fv47o1GxBn3NSHC88UmRoJEuV3Ig9Ls43FLC3JaPbQEh5jt27O48ePeoO+sng01ldD4OBLmE3GWHM3q+IvfrFwzHbs44q4qo4AMNDoB2dnBuJcUF1IKjkIpFVTtwWrlfxqtyJP+VFRVYBiJ+K0HOpeilpEp09vKZ05n/4GbgWGvNZDFX5hzdNpZiypYEfQ235p/DXqklsIr4GvamyHNRCZGbze6gUnnmGD8lnS1o+Q3Vyba+Hvvyuq2kmhh3T7cdY9J8SBLJkOo2mcxglmGcMhmSjtGI1N0VAVfZuYURL9oFWaU3pGiIKbuCk9n0wTGqWkvgedsXB35WxRyTAzyKIC/ADkRwBmrF5R+QQyYCMU8TIsJIn5x8POYoIwehLDVx95FINToYVdAc9Demq12V2CmL7Sosp1/2UQeTT60iZ+UqzlzSbZt1jckNUfo2FVnE3JFuOzGclKCOfTK1ulptD2m/IQvcigioRQtIRufEbF7sSIJFSPl2JZRUXUdAgvQEWlmhxugYnHeWtZcTROvlNKB3kPTiU3gSHi5srVqXHNai1PChZgobIo4EhXGRLLb4NAufunS2pfIOxzQ048ebs+I5fmE0BxivLqP3DcG02as2Fy3GfK+cvPn41vG5s7q632svXM0sqUdiN5/h4sLd/rCA6NtpqKbC0yjmDD8qYIjpf772I+JFTdj7VO794dczb1qWk2LWUXOn82fP5+uzunYeyz9V03925byAhHJyweGZAHec1dyCi2xw+ZBW3m3zgldm1xuvW0fFBogaLcwdHZ+LQBpUiTrONy5u+ZZhga7kM7pfN4sAcNQ5V7OX1NpWmpQDHtDu+Uu8yxQBPtS57wcQdJM9maW12XqlWZJftrKjDghgbW82d7TuRgRwas/Prq1v4PyffzVQL+OnoCiDL9lY4J/U2JwQ2ISjtTNpBBFTwhjMG90iIofDL6CLRUvTUreqB0Zmi94paXU33VD/sdJ/v7e/evXNwdAQU3/3ud/ZPTs9OjlDZ5Lo2nMz+7ONX2zsXSkyhraup1kef7t/bvdOzEXmxztUQCe1cL9uE99mT/Y8++tmg33v0+N6d7S2rktvt1sZq86o+pMfIEUZgyys7C7NCgNCHVUTZwi5wjdEVB7GDJ+ZKWo3cyL4uZ4scyluWQSbBIssfE7saIzRJmrHsQ4GC97aZTjk6XruQShHemd/MR1hxyNgESukKIHhwqaryH3D/uhpLwgysmO6Ih3582u1YjIiHqTJMGIQaxKLKYUacl/YztwWpfmm/vqFS78nEVRnabgn5uBIytNv9YMDmtn5lY31lfW1Vv/x0daEU0wWclxI5tuB1MFTj8mpmob2+u72xKXcUojDbOG1kVhu3bSF5BWB0eTyWgwkujCQsyd6QgKjgU9aITvhbVSqkHlnAfiQPxbjU21QV+h/8g39gueH//h//H6j+Shxiwvofxld4TWE/hXlgYbGuYFUadx5jy4k/MHXNAMNAyxU+UD+ST3Q5Pn/hK1joRhLS43kuXA+O+vRsWWNR3ucCPhRIFccR7dB5dWC61YnuJeZWBFIlkmhp0hmsbpy2JUFS0AXMYgyWLgXy5Ll/vLgITqN0uEMKk32ew8o5nzWuTTdbyiyFHVHx1THshze3I+DWLpwRnwFiYNCJwhZ1FLRQFWU9IyLHaY1MqwXbjNZxGcUm2gqvtZoLXDypMRjXCulIg+ieWUCavGSsJyBxwLKMHW4U8VJAFZQtR96ZIy4MfaYQKC6XPkMeL82UpVfmGIVXqhbPJd+l0UY42SGAly2E4vBPkX/FoDOcNFEhKBFS4rs642ZA9wq/573lMFBUkCt5GXkTSvBL4FtYXtWRqrPgVXqVbjmM482DueAhjZiuyNPAMciV65FOUC3tF4wobeXxQsOlEVApY81jprvEXIOD7nFjDpRQGq++/fKz6i1GRa/Lm60us79OtrmqqQ760z//qS0C7967jyUpYmd1CXCr28wfMemfnR4Pmy04rOZTY68ztby5fjO/dDvbspuU4MFZh7I+tC5OowwUsgqgLYaDufO1JVt3tLd2qKjR8ShN+T85SL4fnhwtNdc7o+N+/3r7ZmF9bZsPWYywEG2YTvbryMj4SIN7ZhQJfPHk85U1Fbi72Xzv5np9c6tjTXJnv9FaODo+tSxGcj/g1psrs6L8+CTOKTE+7iyGYOKRxLFaY4QDHAn2Ri0A9nihoAP6LcginZTUsrZmGhqr0NU5i0KPWw56PQVWlhrL9gnnXSQb5VPJ8u2cHO/u7vIXLTaWjzsSKcerS1L6SDxZYS2rBdiONs2CROK5QhVmIK5qw5vYTpw2kauUMOUJQmhQISHaqD9kGuUgq6CtwJtKYGRldR0zbS51NjbvSVO0GKGlzkRrVerH7EK7N4BjfUOr1y1GXOgObl/t7xe2w2lgi8i1+3fuX14tHBxd9AbQfHl2fm04nh+d8tvNLreWpuabClvbxK81X4svJHhKV5C035dLcnXdu7pRuGFslZEwNuPh6mKoOHuAZl8kMSwTTEAJ+iaVQOlYwAsPiaCydnCi8EPWzJB70bGNDZhJUOIbb3KgxFAMFI8DUrOSwIkrTlGK5GB0uXFnhbV81h337PsyHFi9BIqKZqFy6kUsdHkfHi6Hk7RVyFkHq68+kY+jUDZMyzmapN8x4qq/BNDk/xX1lbiE0cRh3HNyymcUxFHN8hwyWSqpqnKb+/xu8+233379+uD4BKIMxxHQyaKxlY9/SSmoi2Pjxep6JAx+cwPUCBAbTATPEuvRZG1lVXkXTv6Dg8NXe68It7/7H/69f/gP/yEs+8f/+B9b4EQbi7hKf8vAqhOT4M+FxCUiq/xl/OFOFYchpMI6jDHMxq9ERVIq7AslPsktizOajqBjaQETcTvQgVNmAgC1H7CGXxZo6nrO+GiTT/718YZt587ymHHqcF4Zji5rk2tLvJ3KEgvGQQjpp1f7zBAKyvuVWYqvMg/FrWksGLpYaEYX+hSfpM9mEfv5rQ3B/c3xqtoYI6FrhI14Mljtlf++6oxeulThV7rvFTO3y/YMX14W2tVJ01BBQMdevtr3SYhSy/zqcGJ/mucHe9VoKW9hzQX0zqLdVvZi4guIXjJIZNpCXVGrgM7QYpw6gH7qSgQeM+ISiIecxgBOJYtDM8S0SS4Nl+5G3GZISiVThXxiZyE9t/kUnTdp/oDyDfiTjUEtzVNRyP0YYBTRFVVJX9zoH4d/qhcVgVdm2hOZioI85A0WGe3E/JG9pGwWIPhMm4HAG5GUZvKSvEVQ8s230rEAPcMvbK3c8Ut0gUYRt6WpCLGqn0GI/IJ1z1jwn4u0Cizv6ZefHfzbP3/28uA3fu9v/co33zkb9FI68mrC7cKxPLPInlCM/2J4NrHjTr2+eved79Vba9SS2Vr7fHAhPPHsxWuxzTk5islERQgwhmHKjpA2pYhrNiTJDnCqi2rSOhNJeX6cmd0dj2S9v/f+7r37D4XBlGVqr0TJyJhzhDDQOQZGAAOaMQMpRQeDXltfQec0oa2d7dNjyuhhUiRupurd/t27ttnwpOKhO2Te6TWHQd+mbSVBOfnGqe5uw9XgapmLkmIQtSSwzSSaA6ueQJ+dZHJ6Z6lQo+ix+hJYFVcToWD1sFD5/mt71b5iV+kt/9B4MJwMb8bD3rP58Vr74s62FPKeknKT1fiz+oLrDAo7By1o5AL/zm5faCTp7WgnZBTFVs9QQrHeYUVkamzLrJGQaNBcXNq6+9julKvrG4Djmd07Cw8ePoYM9iUTIPze93+TGW2iUXujtfzoLetxdO2M9NdytzNYqIl+NUnlnZ1HhjDs91i9nOLxv800LFwlcNXZJSAWl8RdYJ/QvgHiF5KQxlPXZ1M3fYsc5qKZG5cMrHBC0OPSIIWSpmJNAT4tJHwhnhSnOttMZIQFmaXm/ILulPhX0Dj4HR4bfDbjRSk0S3A75hFIUcolmEhh47IiRAyc214SzPHpKUo0hUKHMFlrDGuvp6VG9QBRjRYqqqhDywJsrlSHi06iMWS9UricI1fgIB0HN5RtkWVl7LZoVxkZH5QEsUXLDWnkc4lNzcyqjdkZXdjyd237/FCJlpNj4BDGMm7DWmhyMTcYpHEZXigAd04qcO+XwmYzsv+Pj2UnKYuhsNf1wmZrfq7xk5/8XD5ao9mkWPzgBz8wy3/xF3/x+eefQ2kuXIsWMipHxQWAKUgbthK1n4svJ6YrvERiPcIni4pdAb+LyWXe6Av5o1v4vORrR2FmOAZDcf7EhRTwpWlTAS4MhICxUOUbphieBUapIBJOVAE0PSuMMq7P0ohJhL54deU4QlJa1rbOVIEgJxpipRU8CLMMa9YKp6pN09XMUYYz5h2VE8N2mcskxaTEUK5m+JjJrVkeQ4k/aicbf2HjxJVEOuOARgECr6T+uGKW8X/P2+5pbBno6ooeykEy5bzj6RpedXnZ6Q5gjHnHaKL42upZhs2gW5Z7GWPwNTIvCSBReLzOZ3GZJP8GndNJyNvD/kHlXzUH4OAtEJS8Cq+MvRcYO6dXACL0LKI8UMSedDhHgX91g6bNJjoDH0ievmbnAFwiGkl5LKgcQwwYyxHMSDeDJPk/AkaHQyG5GMmVB/Gc/JxrER55PCIKT9TXDFcvQm/V68xSeVonqv57rDpQlL4VF2GaKY2kWdNZ3eC9Ou5K+cuLqo6/eTx9BgnmBJbBzElpLmBzz2jQ+8sf/rkqivD2y49+rCItr+5777233JC1LOF4sNaqbe0+ai8/5lhbaNZfH3bbW0lB5lWT3bT38uDURkNXKROH4A+P9pUurR82LNSXFyp8Qkhls6qbrvLt8nflNNDu6d2DbvbQa7cUcb9qrnCp3VH2jeg6Pu14yj4miR6CPxLKbIU6AJKyiQSwaDXo6JuD4XC53ZYHwfNsN6Bnzw+pitNTJ0+ePn/08H7/5nrBZNocYHq+l7dd8t01mynofza4FLtiNiVwVZJIE7yNszTWKiHps7zRhCQlek5lPOYmIM/VzznRag2ZHdYBnpNfjnA5y6Eu1YrC/Kzlf/VKUv7N5Y5wCAbZUebg7KC/rMqGogdzN7XuZGH+ujZzlS311EHFUoJPJeGDq5L/Bl3jEHPJEJEpaFUwu0b2ypfPu4PhzHxzfffuY/4aMQ5Hv9ulca+sLJtjYRvaIaOT9ximy7VDgxvrLfH8O/fu8lMZArOWJJOSgMO0ltuP336H4SuynE2Lbi8braVwu5o1K8omMeSuusODy7HeNiGgTt0QTrddHn1LJ1hqJBANiF8M22A2o2vZPpdjFUrYHjb15P69xOp56SKapVnZEU3CiAoZ7KpAGSmgMjQaThLyNz20k0x40FgxRRpq5mRmAScRBLdaYKm1cja+Oj3rnHXP5CHzAjKzzB1oGDUzRlGBX87gG/nnQnQdr3DkC+C/8VQVJpYctfAc2esyRGSjY1EQm3QRdEIyOiR+6VhQq8FWHcKasn+yaAzbu3l9aJ+vU74sijOtgnuxvphgBytTswynocJNo4H2mI4KDdZbNcgfL4OFbOotScc7v7aByHJrczKyLlYKpTTKbMxoQqn1b7311m//9m//8//nf2ty+RlLSD3jYf0BU8aDIZo49ILa4jMrtAORKirCJoNWWE6iwCkDRYlgWpEWpK+MHtogWeVxR9geugucQnUhh0Qcor97G0oJL8lQy5FHc7fPnJSfsNDqxGdmstzsPBLPgofC1n3VrEt5R6KTeLe3BCPi34AW6UZajPUrGSQSx69pUc9iXfH+CV9ZMcbMciXcO44jOeqZYp2KLWJ2tR3XJuinwXQnpWYEgsHDHE9NvarXj01tjMCZaeCm33n/7p1t5g4biOVuvQF1BRelIIcQQrIspQwCGnmFz8q1nYYl1trqPT58uJHFjXHbaNoc5PUxuj2LkWHrpfYznh7kAzadczEIb5wRrMnTC6CNzsQKbZd4ZEAHsoEdySX9UD+icqVDmUEzmjwXRxAkGEJ8VBDJPH7F4vxG9n8lVNxNhEQYmrMiRqun85bA05GPqk1n1XX/BKT5tbrHv3pWYJMR5WLEXq6We8rzaRkkXDDn6XYZTPla+m9q4mKO0PV0QCwNG2YcHu4ZmmtPvriovX5aX2rfTPrNxfrd7a3u6dGod63e+vKKwPDUzn3LgSFIHY6Qdt3T3qeff27Piw3lH3Z3KdNHJ4dUSFii+vTJofSLjj1Kiav5uryG6UX52IJDbHssJZ7j+X63961vfQt69M46lI6V9jI2p+iRPB2lcCCArpbDXPs3+FpQYprueXra4XWMQ6ypuK0tj67tLYKJ4zIWFEsRpM+SHNPN6dnBcHQpJH85e9qZq3VBdqVVl4TDh1HqcUJA4YnAw7rVUKNZDWxcKdMQCiHpFy0e4tYZJkGAE6iCaYQcRQnnatSkmGeJPSzOpasUdFDtoVnTA/V/F2fnW93ekYUYc3NjlaiYXYvqxLHVQmIhKWpfJhJSQk1+QWjIoSQndPG6WV9o1+emLjjSB/Oj89evnqvsKwcdrUErLIzMlKJiP8kvPv+c8BYbbrdXHj58+OLFC/aWlBXiPNUXbG1cr4/taDLsWQ+gpMhsRwhgWlp8Vr4KTQXTzxAH16VMVcXXj/dfjvs9gWkyJepffIQ8OvRYtI8hE7A3VgobQZQYZji4cwRKoWeRymG/AAdLYuNqKuF88+gt1r0h3kiqMJLoa4Adl2ChuajX1cQzz/HeZFSBoRyem7lGfdXW8IPx5f7hKbPHyqaz7tFoIHImU4OweVOgAEAq/PeJEfmETsZFqulnKK7oxH4CdpHJ6C4h/BSJ59omfol8tpRpxjMIJpvv0MlMNLVCngj1mFF1xryLEysTZhSQnz1kgb2VLySRBTD9Qbc+uVSCGSOQrK/cSavZgFqYgp7oC+SzEGLQ4xqYbG/zBbbxxi++/AyW7e7u7L169emnn/72b/+mZj/66CN+Z7OZHKnCHsKNClXoQFRkkAwfAsogMPwBdDnpFc9EO26TWAF3mQ62Doq4UtQNJmNsMcIi44LxlBnQQQZptUAqCjZTgI7jfa6X2/LWwmzYMIBZ3elKjmLbpi1H2tDPEHPeoT9FKuLcLlYznfiYI76OcuQtuHYaiFSRfEoxz0sTdSuRLVdVCL5ReFpBVQuumFY8KposcpMimMb1osjNiCHsuvS6dKdsfzkn3TgRcAtIIaf032l5FnzfdFofZtr0n9ne7uzkfKygNUiCQup10u1Do6ANp4wd9LydAp+VH9RL/N/SWsRBWTAhkcFebUiFW/sw/dgg8DEW3wy5MO/qDsWpKkdfkMwcAw3Y+dRioFQdGapnA+rMdiEZ35zn9zdyoVJrKnGkgcwnkBQ6ru70RNoJoMtD5RPipIGwwUxEccZSdfQVsrsjNAqHSclMUZ7zhvLedNaFzGGpDB3FIk0XjKRGoGNkWTDhqzciT6hZ7gg+RyNIX7SaQEMSAlOUJB1EKY1VBSEay8urViavrm3gd1lvZVmSukevX5+d7clhtx6zv38hSfBs8ATvYBSTE5999hlWyOUki+HhO4//0f/6P0Vw+maHqv3DYxP9k5/8pCiPo4PjA8Ppdk4xLgU7N1aWMSoF3q1+Mtk7q8tm+KTXOTnpng+G1O+lnQZvJA6LgyzZZB1UcpyDjGUVxsWhtLd3FMHWakmXwh8FcgwSd8a+B4Mhjry39/K73/7O8eGhePXa1vzro0FDAd5Od1rN2Nm556+tFwxIqgPGJcd2jkC1nFfafvZNg91xhS4ky7+12KLOz8yrLWINk5WIqpicz+LIkwuFgOTDolzJiDTibn+4ovAbxji5lP9xb/sDtT4Ojp7tbDR742nVBcQ5Ct+Mvz5CDTnNquUj4w5VJr81imascvMTaQAR8MXp+VH/4NWHj++8fXeufy53d7jQzOo1HBWiXl+8EmOCdL0uAXPw7PSpoP1ye2E8PO13Dy1q3j/aByvTurzSfvT48U9//mNQ+uC9D096hOs1C5WZYOEzPmDgN5djwcupS5mRi/WpBSG7m8szRbjEXFRmiCcvS6eKmgj1o7AzSKy8pkmwJhRYYl9wS8bDlM+UeAgbCqaHm8ZCkcedvApEXjwz0DdeH4L5Ri2CQicVxkYlZEHMCOowrS6mFoXBvvH996bnVz9/+qWkElVRAJNn2NZ09cW5ybjrJVbxygWRfI/hmBcoEfmEVounseJCEkHgpCskhmRDHrPWUnOpqV7lLWuqd5FiQAhQNjnrDeg8pRGSTM48BOzZMXqkStmsK+QNpaLTPbW+zTlhY+Jg18SGbwpaXM91Ts5a1n8sZxGqRhJKLVLAq2N4jc9TBXiKv/r0w833T7r7P/zRD0fnPWkPewd7zeXWs2dP/sk/+a94Al88e0oFOdh7PUcme0dwN5ZweDSydALEqNxJVO84Urn7MnivyXVOsshsMxzugZCi42cBU4xbwMEpzAigRAcpXQzUCvMpL3pjTSS1PcZc4bl5XXiUacMGfb7x93yt66WT4UDB6Pyq5fgUXfaD+/O0AZQLEbc6UH6tfgoZGAgGpx8owSvizBSjUevFtQXmFtZaJEngkF57MntOBQwVZzSRmnehML4CKTwybsKsr8IOcHszJ4XdTKN2MIptO35j2+Jopqq0hxUzUuOVSptYdgadKchoUlEhdm0wneKkUXFUvzlJz9zguXBqEPgrn4W9l++5qZpVD2UiMoICtlzOEMptbz682VHenF78fzje3Fnd/1fvrNrLZzUC5l/OS6O//C24kXFkKqqeF90h8xRBUqaIsuNJYA7ipVfVqP1aHblYDl/95NSnf6AJaGvW1OALZsEnYqtOXOQwKdpDHEmVJuGEd8VT/P4ODXpEgzeXy8+e0sdk6E740CQQi5dYqEAJJaj29193e/Ia5t7/8O2Hj+7Yhx7eLLc3sO+3Hr9Lwf+d3/hNQLcW9Ug59aPjo6ND64HUaB90O1ISFCIdXFx+97vfFSFTK2GpSelcIgWnpj4ThkgIWDJgBScSVzCEGVEOGIj4EAzMSJx9MGo0lI/IWKROPXv2jGpksPXaohKAyJu3w48bO3duZ2v5C9xnGw0oBdHDy6AiNutV1uken9rLfLIwNwwE+QCTOekvKX8UsEDTimCeTX2/vrbx88bGlvYwBD7t0zEosfRUfB+K9FPCtiRBHA421tpvfeP711fjvaN9NhuCm7HhoeTWMIcEZrJA8tJCF1HMbByopK2goXC/NfsxPuDPjQVMJ4rqqnOwECtrYcY+zy2Ix92UulzaMn0t+WjTk5vNucvrNpLaWJ9vNa62N5SdneueXSxMjy20U6RibmYk8tK/7hwcfObl29vbP/vxJ0qEPL5/H/O1ve2vfOetnS1LMMUJKOBqisdVaA9kKY66G08NVRJOk1UZRLC5mhqfvgUg8BmHwgDD++YoooxRN6M/t2ZIOUAcchf0rr6TX0CB2+TIp1vLF3xF7SWfi1fSV2tLp9bZ3LCZ7RQ1w2vGt0xWGnxMd324uayw3af3u17ILSAyU7460m51Vj79xK6Sl39du5IpZhorruUkg0qR5VSOxsSCh/JqOfnO0UIObyGiOF3Rka8BobvLolJTI0WaE1g73uPtkQTh8w6uoETpFPmVIXLWO1taWXp18IJepgKitT1ey9ilOPn0UhWYvIJOWQ0temgBkwEHV80JuEMm4DMdBpepwA3CgFz1Sg/El5TYa3i8iSmflQCgGYlgYcZRbVNIO9Avr/BIOUlz5XKZFrNXmE7pQ2kndIUDRCw5Mr6vWjDnQVDt4E35DJ9Ll6CQpl0x69W7itX1BgG0kVG8uQUupKeRVTHO9ERv2Q7BqvKDiUbSlMDAodCNjmi2IFD6U02/Xnmx64ScT1OiwVZKgc0zGpQp4hsIbGKJ3mAvQO9hlO95jELTlMmsUDOWZASlKUcmtOBZuvjVFb9Whwtfnf61f6vrf/Wz+lkjpY18uKLl6uSvPfz/oy9ljNjJXz1Kb345JJ00V0W/AyU3YquZuApDAASUyq8g404TAamrAzajAUs/fCWlkJADK/fpa1WirTp3gzsdHk9cLIfTTIruILwsQmhYoJQOVETCeeB+2vX+6+fDwWltgWdpTmhkMun+23/7rxgZi7Ulyyg5xlaW12wkv7OzsbG5vrG99isLv6LZ5LXbnuT0xGaqTz77bG9vb3VZdYAhz5W8JNwodZEWFlSTWkhtxnnZ3YbuQdMHbTC9gr0pEKTLCPv05Gx1Za1et83HlbCNrDypU6i6mlypelIEdV6bfCmEEj4SlGOCZxmbI27hHNGkHZbT9rBj0pHB4F+UIErA+8x8gPdgw82Z++QkSPpWyKDXA2rizEqvBp9OrbbEjdBas1e87Rx5wPiylIU5+uzp3d2tqbnmPMHKcSA1nSqb2jCkU0SicgvEla38krqAXm4nsxfyYuh8zOF4amxDMT5/PRqfSkiRCCSrZdxfRGnVTJV5m7kcEl0pAW8nQf27He+NLo4aVnHVZv/e3/hO0jSikcw365eP7i5vZBHRgvVTK/Xpqa3l1frM6lLtUuhnaWGlBc9O5hcshFZAXSzQZpIIG0NP5a3CLiJnvTpkxdT1D47BDsSVDCDuwep348yPRRmIIyPiCrSDtMnfKH+RG9GH/RNhhkVVTCBsxnX8jfRmceJO/qF/MJ2fvj4xSWZTE6bACVBA3XhYZmWQXbdV503ij41souWUd/o3xobpC8IX/S/oXo7c4Aoz10acKyr4S9iL5xD7qsQPZIM5HnenQ3YY1ysIWkjgupu9Szdevnzpq854O/KhrFfteLC6rnvu9KmRIJJwYzEBCbz79+9T6zSIIspa6QIB6TMXCt0O+D/sD6Bui6Ysisj2huWIuIp6E0xIpaVKXJE6wOYbtZPiR9PJZJmjKA4p8GMBjzwF9Oy57CXiIqFS+L57srdqsXu8ooir2AUODeafX37km8MLIIiT8pn5C4BdL6zKiUP/4k2GWj4TtsTWTa535jPSK1IvyECfq8RaziPVIluQnJ91wERz4ij9kuobJpRXQv/DGyKr9BZOeiRSs2KXeVMOr45oAvRy7ooG0+ZVstUleYK7iXGY17J4Of+kw8RsebtHMoryqYvp5VdH1f6bG8pF5xp3WmGAr//jw6/Vxa9Pvr6ntFERyP8fZdVX3SlC6M2Xqs8+v+p84FzJqq8Anp+ArBx+qkBaQBvYwl40/PVRxNKK60785LM6qivaAMCqqQommTP6MO5YGJA5dcFX3aMV8h06qSDvFZxy8ik6p0eS/ba21qQAjMeH83NCfQvN5aadDWZn+pLuvrj8AlUqwgVH7EDRXt3QlGWtys3ZHOedd962iMbrDl7vEzDDUZ8zTuUDO8QwskS4VFPxfn+hsfC70Jn7PeucHY/mXYdlrCtMW6REkrDBklhcQ+7sNrruVEQxsYQCMR1woHxw4FpCAWk6CFgcpgW1JCxGqc26iwYrkh1ZcJPP7RzE3FiirbStwJ6003JYR9IHZxYVCS5cUhBIoIvfDCF879e+r6v/1X/1f1+xNclR//Dg5VKztrbSXFtpLdUXxNbJLtudT98ojcHwFSThGZKmSqUDUsl12RTDeyVsRBqIKduGN11XqcjWOanSrQPEN5JSFQhv9TrokKWyIdlpOX6gdXb8BVete2BCo9UU6d9o48g3yvfhTbtbjUd3lwyPVOYfv7066Q+eTk2fzS5EcQmXiklBeRZQpO/aMSM7YYTJ8Hb5iw8kM0UHgzg5D+OIpzq35Er4iP4E3CH/MMN4OXFaNF8AG5dPJa4KokNHWBem5v2GMTNPieLkfnzvPisrOIOnF4mi/KBROzdBBBVRQKqRE+BZOv+GyPzjq6NChjCbQgi+OorwCl3kXDvFhUZOeFF17vHq1+oGX8HZe6GTE/f4hFckFg+Bw9uZX4r9umiC3FC9+uvHtQA/vYsYc6dPz2rNuFyvaNwjZr+6YXZ2oJ179+65IYVN/KYJvQLLcPpiV3Fqw5ZiHhQRYC6jIkgNEHiEDUQSsy2WW/XJispaK8sLwvEJgxxBvfh2ElXKW6CRfpRZAz2vfCOzKvnkhwgY1ypFpBINmcPSvb/y8dWVYj4zUkrXc5eBlJujy+R1LkUhzMR7dX6mwxVG4D1SKop80mHrw5K2niSLiKsgYQwsYwSUPObvrxwaFnXUMS37pyBehuYITCWBlWqEXk42YVuuh02UCTDTTGDFOs0NZE4j5fi6+dJM3udy3lwOz3598vWd/+MT9/y7L/67rv+P7/z/+ko1U6UPZiVH1Z/MdXVe2ZHla7lURmWQX1/JfeVrBRYoD+/BswIppgN3nVMDfVLlvpZVTlyBw1/fXxjrGxEVdCwwRDau+/QWVzzlRabCxZubePydaMF1+oyTMGpUND3NJHr+7PPB4Ghttf7Nbz7ksmMKnE+ORsPLl6+e3d19hybO4kEyfBBWiHa6h7VRzf4gJplGLaVaTHJ7a+Pk4IBOA7csbSFjGq324fHJKMX6yD9VarNsR0KynkAUy3gARDcoOtgJtErHIsZstWOvo5Pd3VXszMBXLFtZUMnigiTDI4guINJzI8XCiCsCDHegjhbwVhzLVhvB0ExCFH3Mq/A8UjFkgqTFxja14I0K52Af5kmbam40FhegNJc3cTXq9zeWVx7dv+8R1Rub7aZUMfXw2isN0bPbucW9o+7RyeXr48vbJ4OZqQNqQLM+pUBP0sSW1iNI6vOENXmmUJw6UPhuu7WuyFdq3Jbp1sFwT2lQiXoIlikWZVMS80hLJbqu1pYbMQ0vOxa8cc0LnlyMZgTYFlvLlI7I36vp0/0X3mVlb3reXrNU8kTNx9spS/s9C/zNtqrGe9e3pyLRWk5WrdC7yP2MGkgV+wjzwkVMH6DwcImrxgxjgdHlCS3efT6nIL+/iPx85pE8nkhE1OdwNlwDD8SWcFX8LBgW+YXkK8KpxBvvga3CWJkLd+7dt0KpV/bYpY+NBqyR6CWUFa2xheE1vw5UyQQVk8hJmi6cxCfA+TSb1Wd1AnMK5mf/EUAuFBRQO68ecVt6VYjFFTcgBDdXwQ4T7SK3Km+EOx2+ArI2q24QRVUfqrf7qUJsv0JRj5vfKgmQQ9Kz+j630IzSyttugueVo5Q70+adCmtdrLfdpDcQtiSiREFwblzsWaUqzJlIqDpRybeYSq1XtrUkFNUhLFPgMZBAy4RSaiNrC+JhM4uxiTVCmJEEVqZrrQw/c2Pgvuo0yVG9WQcyZ+mFn4irQKr0ISeEWD7/Ctx9dZS7o8uUn4rZElUhwPVrvIblQaKyHFSk4JUxQTz8hxUomQpvYWunny4mUTxJFkZbDHP/0HLpNmmokrha99WrHZr1C2jlR7Z2keeKNupANW3uQeHm3p3wx4h9hUnOzZavRlrdmabKU7n4FWZUr/CTE4ef3OzElf/JRwUcj/97t1Og+u94779nf+Joqo6qwWpaC0m4rHsOmI0qHJXYcEIsVQdm7XAFL3Yb7K9uc6fDFc7z6txnBduv26yG7GJ1HTyD5GEwUSr96rVuLrcpSttURlYbKmdZMtI76+3tvbJI/9237j9+a+t3f/s7tQYdc2gDh7OT0dOnx8+efnZ8fL69+YBOaRMNEQfJbhLa5X4zg+BTo7Yw6nV//NOfDLvdD957X8IFZ06/L4wl39kfFWYcBQ+aQmqJcTkSVzZGJL3UWnZCPCBdQ1MWwadv8F+CdDUiGiiBpMt8jMXKz9osB9ABl6HRYaF9wWccmBJgDa8hh4tRscJGqXnR0oqqmgSfqUUr8zwpG8ON4dqe5wK61Q0/83euLLUtlkYgqvKIN+hSRzSi1X71eh97bTTbCoffe/juWe8nQItF53Pq1jrbUV92ynj84iUW53F8TtHCxqLyTilxzt2qIM/KcnNFoTqZ0QRO1IiF7Z1HBmjsWfAtICYKgglRNSkcc25zk+bCehg5teQgDMFNYq0HxrfnRBn5XLM/0PTo4OAFvzzYjl6nNu5SszEYnFye7zcal7ZF5uIDiCrFKRmMvDYOoElnI9xzbq4S0MfNil2V6ElhgFciWG5IPMXNdNKCVHFf5WlHAivlk0ZctG1fcs8b9lWRuZ/cF+W3vbLeaq48//ylpbq4ttxSyGM2KxdU5qeYj5yy0YUpPuUzbyiHGxxg49MFLyoX0gcXYQh4Ooc8HnTiBtIIgTicGyFcdO6nrPMtPmpP+UpKVYdmPe5wAuA+3eYrcaVxhyve5RP7c1H/uazdADmduBnXz9bkjJ4YXnmd+x1ejRjJZidSJCmkYYVgU8E6scFYGLqYLblEYPgxq+Jy9lnggYiI4g/OILIhiMVFLKr4LVlXsY7IqkyPAycA8TDy6rsXFa+Xy56OLRcpEIEc+Lkr8+pbAPo1G6zYWhLucvg1x5v2Io3pOfkhy0gqt1phem6oXuuzut9JsQWDY7qNbov3j51I61EWhhsCgkXfoYFyMjIyqcnYWdVCWqneWz5BrPQmHfIuksQVueRGb270ynQCtT7xxvoAa0NOOCuAi7fTbdRI3yr554oG3Zmfyw0aBE1XKliU17nr3+P4n/DIv0frf+3WrybGxTKhRlJ+rz5/eWuUBUc11ILBMNIYC2a+sUoryeQT3sPUiuE6rw78JehfhJNPz/6ydSxvMVqe1rzhq5e4EKiaJXcGL95QqznOGQ6SGQyoszDLiUMLXlcm1hLX/uv/N3d/9rTZdZ0HnjkhkTOAxAySIAgSpCRKpKzJllym5ZbVZbcsd990VISjoqP7osvtP8Yd0WFfV1SFb9sOD2GXwy5Zbrs00RZFS+IgjgAxA5lAJnKegP4963nfnW9+mQkRsjxELST2t/baa9prT+fss99zXn/NGDt+zGlaP8Ha/8rL337g6L4nnvQxqUMvPP/5X/iFR/4//+//+Tvf/trLP3jDoPCJBN+AP37i0FPPPO094sx5onDdGfUDh+yx2MVza37h8iVfSHIQw+rrV0wi4i7CfcmZ987mu0HTraSIvpRmsXnkYR87P37CGwe9Gu/YsanFByJ04cLbE2qd5H1b/LqZn0kJnYmAFrPPBHafjUG7/5axd8+d0wczqK/45d+V7Ga5tPSCP4cVXY3mMJqPdui71v6sT26SDGrXevqq0apjywqwf6YAswxbJz/2tLPbmsLZQgutdzu7y/T1B58Gd6Tbq3Se+9SnP3Eh7zJQmsY3SvxLmG9dvmYtz2VcTp7duPmePbnL2eLwuy3nLDzaP/DBu7PV5ppdNHzb8Pcd+hCKY97me+zwI4+ceughq9S+o16Z7smV/VTnG33ld7+vg+XrSO+ePecMtDMJRlleEn8wH3BR5P7RIuc3W16I8c7b5x0a8/7Yc2ffy/v9jjhf6sctXp9p8HIj17ez52IdNF8ZvIgJmn+u6K1qg3fFgqY7mb62/4KDrHczNnQ3zWshyvWBaLhUj57plNt7q7kOxuWzqH4BdvKpZ551p+ilEX6xqZV0Zp+QNu9r4kzi2X7IA0Wnvbg9XT0mE+CZQDQrzjUoNg7NxGVZ0Jye9NpVj5MZJnHdW5LhzaLQUyl6PJqC02nh1PqOQhgaxoieqcgi6sfjrpb0UlmXNXpTOtS8tc6qg6H83MZDkN0onysStZv5P997M9LRacCmRfze69AbZzy1400ctT8r9eyfc76opuZOherx1uvcEXhTkYs/7wa+mTdXZp9QqF3DuCGG5bMgWausAZv1xvWFbRbjiDtprTSXFSEt7ss0YfNo0oRhdFoaNJRLu7xeFLStIaIjdQEifM0OJXt9cTvnsYn3IsK5cEg201GiNV2BEHm8fMPvRxO9+ctCm0MW7gjz4lfsPrs2F0/OnnbdizJdNaN0czXEse2t1abtp26d/8YSAc/L0ktECIyXytMXxdOTBiA7PUwHO+yxaSrbzjqVpSf5gWLVLG00VvbDkY/E/OGqPsQ0N+8pqzcPveltFisSx4AAboLgeclckVkezN1mIqlh0GXJTF02/NhAERrQAQrtazjlKfoEEKVWK1K8LUIWUboYZKuNlCGgyN6Q+Tqnpfd5K+Bbb775BiLfnnnmiU8999SVa+94l8Dl4/tefu21s2+f/djHPnfs+AMGyKGD3ovi0ydXfIzK96tefPlFP/j3WfpTJ0+a8Z5/7lNvnzljbrYNonbAT4JMqca2heSVV876ic7+I/N9Ez+4NMzsAR32NvdjeEwKBq2fS/FTkIz/DMUHTBlnvQmJbyhqJ82LK0+eNFOYCAB+dCG1Y6OpnvnYx1x9etFzxrtAORXoKtOzrjPvSm3zuZW8fNkbHOxEpMPRpMcKiwlHlBwiQxYZ7a69BMRLxV78/ncfP/3oT37xi44++wjJEycfPXvuXYcVTz708LxF8yFb3+4SXKlaFDRfVWXs3rr5pK/QzqfpnNBl01qQmcYLkVzmziaEEeXXTH5ShltTv/H2226TDp2z4Wmbxw/I9vlmiHrleJNDiTnNuO/hh/yO1XR0y1rmDUGnH3nIBPPoow87ne+FC862HfXipuvvP//8530d571Ll08//onz589+53svP3z6mEvxHGK8le91eXukOSnHnOe5Qea3GZWpvAHt+n5mN7iJYwCDwqxM5jHh9c+Gk0KUTHjx9PYo3rkOdk2qLFdOWchctee2jPj+C5euHTt06pHTj793wU7gRY0m7D4dacbXFjhdBrtUdlLHwOCD5hZhkNl78y3guNYOv9vt6Qei2rGDh7YF+kxXPjykomJGuh5btXqUtUcH87yKlC7qhun111+nUBG26aK5KOeMlJStPzvS1Moy6jdzmIkQH1/cBfn2Vc5SqKM+bKFiGsK0LQad7dB3X3yXpBsp/tkcS5pTB+6oUote12TbNmuMyTM33gJuKRQBFdSoIpuJISfqUzVTuz/TLJrA9ZeDQJk/6lOuKzY3WHMQffyMA7N6x8hBrzDP0sJWpv4sLlGWITMOJGj8GUE6dSWV1b5C6qefuuy8oTMuKp2la3aLs1jlIsary3KPldWWw3knj1mBEsPXdYu+ldN72c3PfqataDOIVZCetFnqEWf8ZiJV5kW8nLuJWY9NSX7BbTRqqmnjeDmdx05uBF1W+3FklMY6x/N159xxeaXg3G5XP1mti18V0gwDslpX+iHQIJdh4+riVjawKe1svu0oG67WTqabFRCUqbGqBvFvLoJ4yDEgMHOxM0wZb9NIXM2lzUCWmRwz0Uf1Qi0lIDq0zmdNQunKZC4WtE7iiJgBzkYSwiJoKNjFCW/QakcpK4jjV4Imq6gD0kVXKVWlCB3wBN0XOqS0MWfxcFrdnb/JF8OLL33H1SI9DkQ9dvqwvalr1y+9+dZrz37qye+/+G1f4/0Lf/4pu4JeU63/P/nkY84kfec7337q6ccu5Z7p/fzs94Mb7757xo3Riy9+3zBzcv1r3/jG46fnCMYpr1248taZtxxDP//eOVe2bifMrdxQfT0W4vmWW0ZZPnNe4viDYaW+ly/fMIwvX7ooYjz0Uz/VPHPmnc985jMQn1Xkuc+OeEl5J8/nPvWsZc97wBxE1Am9bdDDMuPTDHL6kcc20TM28mNAWyvi4eVe73mKkJnL0yCDPzNCdiyZc5l8/siDn//sj/zYj37utZdfMYDeOXNeE/np1bHDD7zz1luerrl50fF/8MqrPDeijhzLro73s+r2nrJ7O67zCt7CQ6s+59dO+XGc33od8/o7l3oZdWqcq2KbRH4Me/D9z/ykL29d8FjNfJ3uZ4fTO1JtJb2fVwBfO+9HSre+/7KpPAPywP7LNrk8JNMxLWMPPXLArO4Kx1usTvji/Iljflx79OjhTzzjUsCtyZOOfnDAQUUT7D7HWa5c8LzRSw6vXb/oB0aMeeDoBsnIzSvevRk15zDcDJuo7Brk6lwHnEGSnSr/ckeV6cSfzU9u4nPGSu4wM8GomRMAvi7tS9M0uMubH1HqxZ7A+Qjv9ffPP3bytC8AvPjy655h+RbMlXezh+x5D2O6SeLjMc1cPbixeOZjT5nlWXEBBNzNC06HlY4Nx6nPC6zGLd3QU2Ei+phUYPnX+QdboR0Sjk4D0/2VlasiFMPEhh7xjujd8csiTuBWrGOw450UQ7oQYprPY7Ppw+4+vOTJbaKnuX4H7XWRGPzazfbuYyeOH/r6d94W1pl1OZkHOblhSsznRmeO+WrrsAihSVx0s4m3gTTD7N25WwpkgjCvbaYV+TDQ2DlvZ3LMMuO/meV0LZpNy+yauT2NGl3sbe6ihh4n66e0QP1chcz5+gPWFc3t3I29+Pe9tzob5Tk5OP5YrbJX+YGRkWXJ4GPDzZwaeaClxvtNK26MLLZqpAtsXJ6RmfpmKonH6Xwp3QIb2Y/cZDPAtKUc9duFJrWx/ZK+HGKGOkQbe22Lp8b6ih10qWbThxRtdd/jL/33oPJy1N5dRC2FS+eGTcOMHi6BXanMR618i3ZKo2T+8RwMvl3YNk5t7lHGycRh24OPQHRiw2O6cj5LautAx0U3EgAReKExbMooPXAVAfDWZRfhv2yhPHA1xQkgsuVpTXc5MRg5HGD6ijdtDpg0+OaK3k9uzfjGo/7ZrwABAABJREFUy5NPPmEiePThB8+dOyM+li6TwiOnf/SzP/ojZ9+++P0Xv3fx0vknnjh84eK7Fy9c/7//P/5vb7756r/89V/7xLOnfRkrv1q5ft2P9l30XDh3wQtqbbW54Yhj+S3P+6xevHxZEx7wwVUfidA15trFPNSxLZ3aZ82u861Rq4O4qomTLId1JLMVujibF9ROkQUH7pChE3TvH3EE42Q2oux9+6CD3YYs2FkGrvmVmd/Z2PH3nWPfaM61bC6n3HqJiXtHK5MZ1v3T8WNHHW586NQJC4Da6RWO7LvGyyGNnBB+z1d0TaqnH3rYLqS3vSL31lAHMEH7feuNqxfyLTgHlzLyMmXn9X32tQ7np0upbL6EY+h6RZoT/7RePWxteeDww1m7cyV08YLnE+Y7I/0Qt/hvjlJfa8tNH+a6ZElm+opznuev3nrnNY9n9p166K0bV727wS+07CLue/edr3mZ7dOPm3z2+bj6z/yZx/7bX/pzWkLXMNhvOQJ67aK1VE/U53Py3vEK0+P7zr843KhO4iZEHojQ4DUW3mZrLjmUfS2+a7GsTpo6oz4zTbLOYDvOYqGJWjNLdrasggSzfWcROuzEjgcRvsn71Mc/dfjwiQ/2v+OrxO9NF80BB2ozu+beaLZm7bY9cOR4nicB1RdhYD3QKO0h6InndmuhOHEMGrdIpyBFRoTw0gNkyWJQOvu9OZckzjqJ3oWov0GIdKHCTCciETdP7rqsVfCujoJAhG+MGm1S7ikFIhZX51LSqFSq0+Jnjg9R/uZ5yk2fdX7ejJBHMFYmVw3oeamXn3vQpROITXjn0jvRGhh5CtIIadHMxWmYNIMFKHcqmWJAOED8ySd5pPP4KjSlDBk83irrFng0WAaqinIybqiz8hhVGVhjpWsVmwpMmH6IiA8iZZib2XMf3Zm9TAOe8Rp30c9c9iacDSUai37iMc9o06SK4rRFbTPTWaum10VXbiCm4ceLOA/EpTN2bpzyQ65cLmqz8ORGPO0RxellAQPNjalnWUZFmuEB3xHPQtUUw2j9009ortvLecg9zGXFVLKprAwetdhQuzSGNK2S4bIBdTEfSXVNJLcF6y7qQQ+CvShiFh6dFV3/Xppn6tkoWb7VHDtKy7BcjXPxvCnyBvJDFXN62j3dZEv2a5zNJVQoWmGaOTNczv7lxyLI71579+w7Zxjlm+tBPyB1CMqwtDLZuDAgzV9nz5374p/5mW9/9/cPPJgX1urGn3z2s9/61is2Q7zx1gaM3/w+9dQTTz/9uJ51+vHT2t2M/+1vfcebYX0jNT+b8D2OzCOXtLX3qnkDTX6qv98xuYx2TuceJv+b+vOUzsXv0aO+RZmJQxAmYglMR5MYHjhwHLOd5rlId/+63zW1Kpgm+MyxN954jS0XwmYQzzxOP/q4wHgXq0sFI86FYz7KNrsA+ZTRoYMn546zwX/4oUfiZ94f580bF4hfuuD7iJft15l0zp+/fv7smddffc0PnD/3wmefe+6T79/ylqMLJ44ef/6Fzzqg8Z0XXzpz9ryGsHa+8+75tuT47OPF1/yi+eETXiMiSPbxHjxyXHp4n6+G+QG1o+rO2+XBr9sMxSbl9LdTR064taIndzvzQo5jx/MGZIPOIzezTWfJBsp22c0bjzmI6JpZFRy5/KzXKLzvF9wXDTn18opbn2B/5Am7G5d/8LrfpbmQ3ff6Gyp3BMOhQ2kG36zyHsjcU+VuaX5panUxMc7l6LSE6Ud7mEXMINLMO1Y11cjRs9mwMT9w2oIsvqbRaM0zd/1P87kzDmde0WUWMWF4S4A3HF697q3upx7/xKlHHj138YpvD+U5d+bStK+Q0sKU2NkQ13NE9cFjfgUXUH10obBZp5k4KU3HNwGtUTyjw/MId2zWRwo5pR/AqdLZMANzsDmLQu+5oMEL5HQ2SupDmxKzLt3Ry65lxj10bqMve4/zW+JMOZ40wdxL8ZDDWsEo81jID8sNVmcJ8u1oP8XTJ93Kz+2X0SdsOmpel3jipF86nKKL51ka5m5G13HvlWeLudPKyTevBs2qb371ZdDcss1Vj6bJ5lvnX8xZycAEZab7aMx0ZtLTNgMjmElbt8CYiWXWgVxoAHOMyKS1cbtqyDYdyUw82UK0VtkezI7xlM9U5arAWpNbvBw9z0KFTJUXIMdA0DDrX+JowfMLdR4pcQ9m+zB3X/mX27ssdOMgkdnRdb8/nw6MjihJT+LTZCVRklkjSEq3iOZka2qQdGQy1yPi1/CddyqlG63OgVLBpZDOaoYU9mS35PwltVJsVcKZ8sgWZJUuaFZR2bzGQFGJ+OZfSsKf0ZbLHJ1Xohag06he0ayO6GjApNlhmN/p5pUwSp1LVdMEefam8YgJ9+phVG0hHFMdVuoMpMSJYVC8q3QxoywGeHlWqqiAf6M2Z3Ny32ZEGUi9FG3WvPz22zlx7jNITz31lHkfj2/96LcOwb7w2R//7kvfcKzBwvfqa146fOCxx0/P9q2l7ug/+Af/X99TcC/y8osvuQX59Kees4Abp+fffdfDDs+83vAaVlOSfqJ19u/30jo3GHk25Jv3jjW5ybYv7/rcj3q9/n3iaWymn8wx9GnbTd/Tu9BVR9VUQRj98Mhpw1ZfUYFO5uDetWFm2Xc0NwS5Kcnl2qGr7/sKe3Zg1N1QEh0IoOTNt97I9Il+cL+fXB886Ft9JyyO3/zG1y5cvvrpTz77yEMPmfTfesNT3ptvnnnbAUjdnDNmIvePHPMyWdrFNgfjZ1ODWg2LzcuembfwZEZzUuO9CyzlgbYD7DnpZOfeNbXDzELjjiEnAm9cdEwi67rmNeWwOqMtX2+xz+Xc39FTJ7SvoctndwgXL5y3AIuw5da3GX2Pzl3d2TN+6H3YF3/OnT2H+PBDpxx79/a/k8d9T/nqj/3oU8eOPbZvn5OL3g95yZFo1w1pqpw+82b0rDx2AvNDYG+hy29SPelzs5WVzF6WKd+EK7Tz/T/Hu3IJnXu/Wa28mtxHgHKQxTKcjUSfCaUhNxZuaVxKezWRczCq4C73/MXLT3/mz9j4OfvO+bPnLvoksZ7mQLdFaHqyNTHt8uAh5ySOW+1ybvSEt6Tk0AQGndamq16R8E6brlGjM5QibkC3wT/tknGNUoaOFEWYUTBox+lFOUfvSggFP4rUAsMxF0ma3mKGQcyzps5PiSnhCTCm8LsIsxRxLG9/PnSIWved3Ms6svaqZpwaEejPPPNMrJy/boSnyXmzOSyXlzFk6coWnCuc+Ormw3owB16zqWVqz7KgD1GU32i4E9n+aprtEFHJdAXKLRotqfMsFNidObJNHC0gE+F4ZuXR3lk1LS36cx4mKc1S6tIoIcvKZ/8k042ElLBLwfhDX++4+Brra7nKRQJZ79/T27IoGuUu3KyLJsJMz17Un2VMjcZEnpVmbXQPtvEtNmJq5k2eJ2/WZDAktcAnFdOW8NBULm3WNCLCcKMILkpTcRcC2Wa56QfpA/gLOCHjWhRgzp8Si/1HpLRV4bJFWYlMQtDDMEZL9yzKvK52YCaCPGeCu5mAuDsxgyiXwmcA5Eaq2XyAZBYYtfb1QxWlc9tPslq0dLpNkmbxwDts6ifBxGSgbHFyB5QOy4ZnU4stQ5UMTy6toiGb29nYcrDWGPOLJV5pjR6xhfDfBGB+n8bZ59MH164ePHP2wtMPnvaLjk98/GkT9K0L1994/exPfP7PfekXf+LCe57fXHrllR94pmw0CslXf+93n3v+k9777gCEkWzim5XDV7CPZJTpElYJn+a1kOe9iS4L0/9z/5fwZymaYOY5lkBxOaMi7ud/1ee/CBpudmLGyRzBP3Qt22LcVlmIqcF0YO5QlC/A5t7hqLsHz6PSlIePxqVcrZkI3P4ZYNNRp0cYAax45DumM45l22TU0nn25k3r8Wee/7QbC2fZv/VH33QE0acn//Ab33rtjTef8O3b558/ceL6X/uVv37i1EmywmKVYsd0b+a6fCHHPuhxBtkZkPOX86aGD659cOKUW0af+fPWQJfNaSR18XqmowdPOiw5oTB6s6AKlaKHHz7lAtudtcP5WWdneXvwgwdPPnLKjZ1fij7y2COHLlmtL+4/+ICP/p1584wDC888+0nMb7/1xsnjj73wuc97S9O3vvG/PfH0x2984O102bhzWtO85UcG5gV+ZO9kfg5smrI3adb0DNHbLr0Iyd2qKHlvqgsaU7Q5ajaoZpjP/o2BZSfx8FH18hZQlz7qREgf9G1eb1TKC5Fdzd70EihvW/Ki/U+cfO7AodOPPe0pnkfbtovP5WcPH7hHtzBM750ukfM4ASuBLmGJcr+EQWR0ABsDNuKsIkK0Gm6No+0spDCXO2tM4Sx/EQOBQvohrpj0SUsRWz1koYisUlacpLBRKciI+lImTIMsXzW6duuBHJmmUGfT7O1C3HNmFd2IAzpznur3UbfLLmeLjhxxgNUOhJ94OxZw6D07vQHK9dQc73aq3q2VWxhZkdJImdQ9YNcxTB8+S5d7kPRuPdgjSYNF7cxXSk1CmYd8oyUXv7lOVw2jUsNrPjN70tnZs/IAirP5JwzKWdWiM1wVoRiZXSzVOxeDiSlNWakynmbImtssi1aPLKi5VA19Vo7MWaHP3CWN9fQx3IZ3XLdkze1dnnWZNbpbxBbX6VdN65W//qcVPcGXZCbZgOhnyMy9XZer1Gog1kdlWTvA4MKjvaVwlZ0fDLiayg9HNKQ6g4rcM43F+8MqhfBCqqaQJbF8K2UV4Sy4aFEUMdf6W4DzGXRl6uJkhCg/ar986NIuwxWae9Ys3sBVZJ1RNE2aVQcw1LDQ3yxKqy8tETK8G34M6NK7YSsYtRUpIlW1JdUiFHREUtuKZ7e9I9PAO3/+nAr64ZRbCrzuVxx+NZPYwrly49ZLL7/hZQ7HTjx647W3X/ixF/Z9cOSll39gJf75n/+pq1dwHXn55VfNknbhTpx8wPTsxIO9OHv8jjYwOz9OuhSvXBx7W6h2v5HPK7i7svDwyoDlniIRE8BUIVQ9LzDrSBDj1ZV1rgw/+GCWoktOG3jzG0Gv7+sdoQpqL9MHoEGbmXt8vOHw0YMeRlkfjX1/Z+htQjeR0c2pTVfUUImtrEGWnzrFk8cff8zdiY1B5zWe37/vtTcc9X/FQvUTP/nTJqN3ndO/ds1vWt8/8PbJRx41K//Sf/uLvgQhCGSd9KMQYtYzp3v4lK9gOHN44AMvdDW7OubhmYd31lpZNcqVSxfc+ljS9t20tFw4eMCTtv7gIM+D1U4TvvLam0a7JyZiQo/OKCxWxMdOP+Z2Vld96N1zhrAh5orfcuxEuNj68aldGS1iyXzn3Ls+Nvno448dPX7s/KWLxw7vd8xQWLIKubtyBL8TDfZszJjRHGScQWJBO+RzYPg0U7raTENC52o1H6YwkToswHSeEN26kgtse4uaL/PLfsd8jhw58ewzTx0+6vcPvgfgrKQvpQHPtfOb3NfeeOu9i9d8WcMe11zb5pzn5QvZ4qPI/yIp1Wmt9EaoS222UOB6hfgInaI04oDeks4z2Q5A5PSnGRToxNNMs0rpNqYmDCh8st2oQ7a/UZ7rjQG9nRVZy1qcSlDThwPm11kOKWyXliKLPw05QerHBHOz5bdkPnhYV5jL3DFPtQmqhfO0lqte12fDTb9005hfzuaWtsfkcvgixvPTWX8tX+m46iVEjo16XGQ2ShsdPmoZ0nwu2BIqlc5d+r4rl/MzWwtF3h2RBWsGxv73r/s9QeYod0NZfDih6SRm8c6uHMg4maxUKLNgzEKV5mbCgpKA5FavPsoOPTt7xaqKZMBalbcrHrHb4LJkv7cM+Jc32tuQsKbPDdOMz7g7947CWg0JzbTlKLoj4djuLEJA1QGmERoXRwttQCNJ2jOQtYdrbC2tT2iVNML2CdYdZraZCm5zt/+Sup35UIwGzO3ikPhvjhyI8oPeh5OFimNdnPRRWXOfFG6oAAgJdtwb4CdRDVKZcTKp+yrXipAaosEjCTGWdUc9P+WhY641pgldksg3ehqcqlhpa079Ympm7jERPZHfQJDSEfcAnXVsOTMO57eNDjqYE/HjMd4oMTbeeefWs89+0haf2dPeRp5WeGWJY5+Hj/zg5dd//POfu37+pvecPfHkJ8662Xrm4y+9+Mb3vvc9t1b2hU4/8sTx46d+7mf+rJ+j7D943VOwZz9+9uQpP0m5+uorb1y46MSg+THHU+l0oZgzwz5594DvOSak6qSOitRmmiL36Jky0o/DAJk+N38d1nLtfeuGka8uLmyvzb1gq6OyU+ucDKbBnAVRzU5A2tHjWRH0xTxTYUKXWhrapBJMS7lD5rE4MBsXyWLQPbwmLq9rOrDP0iJuH//EJ/7S/+EvezvHv/71f/MHX/+68cKv/d6hrsLXbvxP//Pf+5Ef+RHHVZyn97Nf4o+eftjt1POffsES9fiNR8Xcu6xIcU/T2Jx05+Ki1TaHU/3Wrdkg3f/W2TOe9/iGEqkL7106f+G9OR5y+dU3Xp/naznjnanE+cWcyncufd8Tpx+5cPnia6++Y8LRA8+8/d6NG9+3monYhXPnrTrPPP2k1cjKe/L4B3/zb/5fHEC5+O55S7hFnBkX07qwJ2XtthOWPCbwwUbLj2NoXgDgBz9zybE/3wu+pmUPXPXGftcgudmyR5VvnYqw2ri9zqc0vU9LSxw55tWOn3z+hWc/+dyhB47aQjTwDhw64nt4b585e+4NP569lusBi/qBHF7XH+wl6hwmERXkCRCx6ck5qKVpjFkfl9K+agrcWikVbU2P2FHQgYBZtgMZRRY/SlcpFAqNC6mOsaYClxraGifTDp3CdTk7Ez5unKgD82l/mUBXfrudmYG2cTbxtzLRtjzhAIrrGB3AciU+1PBhnM2YxUBKY/meyKGz7101Rah1/umucywwS1K6btaPuY9x/WA6t3Lli2G52dJp80kNpxW8I9pG88HL3l/UxcUd2sy8tja009qsq4msSmNr4pNdyqiaIapIj+gJcZTWPUUpN07VN6xaKSEJXRqhuD754MYyt9d0PNLdCx077t/ziHNszfJ4kwe5YctEwUAGL8gGpyOtucIYP3JdhTqF9M/KFj4Nma3QmRRSFXtcjvTMdCC1gYOn7RTv1/Oe2b4QGeCDDTp6rupNXnMVY2rnZGq46j64JAaybE/N70xjBsPELXEZnEgnpspqe+JVpscUQdSfdAgAt/ut8mrh1zw+RO1trVLPg1G0tht0v//PHZXv5BrAc+UumK1dVdVEA7fMlYFFnQ9bKj69EALQqao/smUgC/imaJWW2BQ/pOJNq2HxlGFXrSLZamMFeHTh2LpRbe6mRLiktjkd+H7hhRceOf3Qt771zdyy+NjIgydu3rrsqt0e+2/+5lc8237x+697UvXxjz/re5wXzl+2C/K1Pzjv2e7hQ3/khaS//VtfPnnKS+r2eUfeiZN+6njCe8qffuYTDxw66lrbKemXXzXhvzILpN0SmycBHW3cS73qpwjwuY5NB2+Fkqr9vJzmggbwhV+cuaAznfnPK/ynddTR/Ykic5ZqPnTyOPpMBzc8l8+UdPXqsZOnDHc7D4yC6d/BM2VMxPRtaqdbxaj+7rbn7Lvv/KVf/JLlx5cdnDn2/ol/+ev/+sKFy6++9obnLx40+dTkJz71yZOnHv72V77iN9EXr1w+9d0TTz/5pO1kDjx22muWPrj1v/7ayYf8NOqxEw/neohvNpRFz/u8fUDr6IPHTh3346mHDj/uot4/W1EOvHv/QjZa5qS9157mbYoOmTrS4jbMjOUGy8rtjkKdbvpA4uUrr7/1unMiXD//3rs2rFTN91xEw8Wq/bx33jnjJ1ynH3vConbtxs1X3jjjS/Y3rl3cf/3yg67IM3Hb8JtemlR3t/64wMjdkv29nAa0oDoxc22fn6v64qAXKbhqsSs4L3UTPOHPAQa3z+9/4Ms1vgL99FPPfMx5JLXwFq4Hjp7+/g9ePn/OxqgPOB50nPKds+e8E8Q1wy2PS2/ecIdqnvHtoTde9wIkX9/MY6TpAbk/P+QHtHMFacbvcqUIIp4dIDE/Cwy6/qAnSFtaPWn0AfHXJdIR57VklFgnMKOjiJjlyp62LNzH1TBH5KqP7BzNFLbeWTNjs1dbFNPPB0Oeh1I4CtOASHc14kOWBht78+M7TzH9Qss5oqPZLXj5pR/83u/+7qGzl3LPsQf4vvV/UzbbXbjMoyisWMBYyh6W+VN+NFgmMsyAbJaNUGdpKXUWKiTlnfo3m3phG8hd3AbdxG+Ty7OkwiyVLUwXGtOd2zc+j+08eBuIuo17KD7Le9MesRXfVX9/uB8GJX4rxUcPCLSK0WzWmvnToI3huZ/MXKFSPLeqieDoVz1Xumbzze2FpkVvq2tLjdGq6yHz6Jr7+cdmU9xCtvkXO24/Q8ke7niey4NELJNX1mt2U8gPdx92JcxuKYwz2ZW1IjGfIDGi4nZ70HjLMamqp/bjilKI3qOv95JHP567KB3a4SJPudO7THSCYLaVjoTt3vx8SnzUrpf/lIPbfmY12gSEc0Kae/P0mVzfwXknyv1tVlsnrs5qzb3eW9CmMq6xiQB6wpk96cTFME1LzVae2HuWzRkTyAxFNc5sO2LpnMm6SvHeuNmOYNo4hL975r3vfvc7tjswy7pIbGQcmvDhbQNVKz35xMfMZfMhEN8Fdrf4vleZPf7E83RywGz/ne+8wyOffn/2kz/6zFO3fBT44nk/6LzsdyYH3/Ih8DSQCdorKTKD+E6RV0z7hrk3tj/mi1v5UpR+4um6rS+3FD6HZLrgjy0W/psUGHJpjAJc0vNQfFwHvvTiSzaKPNgmrcO41Pe1rueee/bo8SMP+c6k73gePHjBo5oELfcrWta84Ii6RnXRfOuSj5X4bVl+U2ztsTPlcmliroM7UJCQi/P8y1Sl6TUNqkAJ/Ds+JXLp8ptvv/PGW2fF3DT5tW99T13Ov/ue2wiHIV57/fWf+OIXnFL5yle+4s09Tz75uMdLeZm3UxK+M3/ED3KzKe8a+IIXHZy/YlypKeCDphYrJyWTWKXmxb55fW06a06yqUK/J6ffHrmRZ/X7HzzxyMMnnnz0pDHiicb0pfwm2U+iRWC6azwf51NNIWULW3Ya5/2qDqFcufLO85/yjsFE+5amuHrRRxqlpHypXTNd1qqZsvNWRrflXnNPvS9UazgP/BMx27iWuCMHXH7oKyc8pVaLfBggr230glafdBFJsX7P7aN1yVp35tKhV8/84Ae5cOGV55emIZCx5xLqVr4SdimL/dXLF95zp+muzv34zO15+nH02Em/fXbHKbYuHVTN2uniUpDUTur1XbZAzr37nmbidp5euMM46AVXp5lw6sNAzMGWD7xIL6dVIS52fCg0TgicoZGv3m/msddfe1O4NscoZseCRV3Dd7nMevAM2umipkWnTNzKiIt20804kBjNkQq/JkQxFphwl+aTN37jQdrXBTP9HfTLhWt+UXf42Y8fOeEe9JHXXn3D4adDN70O616Qle5OSF5XyP2H6VUm08dA8C2v2k3ZpIgzfWYhKWGWz/BGzT1hNh3vLrnbn7t5PpySud40Oqc8TaaOqWXkz7xGsO0hmoYmPENyZkMVUNpKQWZPaO49x0/iDUGaYaY8OrGNmkw6SjP/zqSuCC5FHP5cu0Zn1qf8m3UniVm9jgWfwBGJZtbcbcSdQKYSYIFy/6ivkOuqPkU4zQvHty8lqg8Z8HPm1eCpb7J6rXkBgqebfhCKeauH1dupRS6LWsQ6Ftkxu2lLnHFnQoofjxiWoqilsuBuHPMusdmqimRrOsoRZc2Md6vij/GPASgVZGlxiOkArqZzQ5OnqtYnEwkiHEBoFgGjCEzt/Fjk6MMPn6aGlOXJj+W8vuj6VT89UX0t63saGN0HnD710E13ohY/qqQ56evX0tdtBnq+47tAnj+f92TQ416XwvxhS3iFnUs0jMM2h10Uh+KnviYFCG3tNqqmRqQwI/pB2KuvviwOuixZPfbylYu2ZqvZ6yRUx5rnB8jvvHuGBttfhv3TTz9J5PjJUxwD5kdBUlktlco7n8iT2Y7QZ01WrinoyTCeJmi7pL5Hj/oY2Le/9/3WQsqWUvMxfq8lffzJJ370R3/UI43vfO97gsxEBkUucvQcdaBTl9ccmX+I09n4ywIj8LpXpqf/XJ7BkgtEkCVuLryanTT3CkyLpIM/WcDmEt41mE593JtA5nl6AuDG4vDBI/Nk3Q/FauXatYcg4pyG3HfTO4vdP8dQZur4a4F25ZPDBXO62s1NlixLxzlXJZfOnbVL5wPEvnIyz5VyApm+A1bTBM1vp9y3zoWdVUaw33zpRZt+It/W7ETDXNcSq523upJvlT1jnAvRjCMvG/HVNPeCwpZfqfk9Gg89+jJs7VDnnfZ5AYQj+ZWlmR61xgAXn9KLkMWvVLaI0vSBaejSiStF1GEsUQCiV0/H8XYg9/HpFXiAayNdk4C2lCWrCFBoQJltEImrqd+EWFYdsjCg9GBHMxyyoJYDbvftBEBIaedbTsleu+ri1pNLB+h14LhL9UeA4U8S3/zZiG/nkzq80cddtpupoWVtIXtMLz176H+KWZ6k+Xu6ZpqkFCngMMAglRG4Xf8RVQqlLQFpA1dQ2mzTsnWcKNJUxdHplwI9aZ52ZyRbkg1m/3LPJGVnhQMyeHphbqDSo+kMotP4Nzf+yzpDBoDUpkrxTotNFRnVqqAIRRZ0iKJAaG1d4BDeohdIyYJGwHVlfNgCB+oVQTy8RYEDLCt6ZadhK5eVr84j0iBFYdGoxqNoidQfyhEBTmpbSoRFDIuy9JcZA4XmJiPHaJFiBnVVqWg4quTxL9NkKTcv90dXZnZj6fU3Xp2fgrJuaTHg3TPmmbl4OnaqTSwbYn/q1EOPP/4EWbtNWau8P8Lr1i/7NGPeTMoHo5Q5N/Hupby7b+PnB8b2SRTNwROzIsd0VC5Zn+BEVESK6JrUKycQtNXUzp30By6iTx0/59IVAxGLhDq6ND9z5vxDp1Jr3yCnWZX5YPcpz9veOYft9KOPMfqgD514mYdbmO1FjOdHDR0PSUllGyUhspkpVfcGHN1nDVkxe/rAsdJvfvObSxX9mDkPKKFKjXJpP1Bi6YqaVVKjkDpAvkQN3b7BIxQOkH1n/7niNE+cDBV3lunnu0C5LMf0BEijTdxZjWNH7btku0ypBW6OnGcvIdsnecOTH6t5/uen1peOPHDx2InLRw+/58f+9hutWgLuHotpF6pi7gUbjp4DXaInqnIDm+PWOdSjXtxQKcB5bBA+8AcSJWl6n7NUu/xqSucBiA0FnkaGnxYDTWCtcoN14Z13KCl/ajSXRK179Vec6TT3zi+0+AB2eTjJYtbmS74FGkCxV4gnMI3Ih0iJ9sw/cPrZbQClrhGkiHywRDn77ppJH7MxYInqZ64MDVpyvNh9/7xCsEFg3cCxhBk1HlwZMvmFXa3vTXemkt0iY8Xo3qXswclxDqBLNdUehv/82bZr7erl49XmHRMNt1SI63DZUFKF7SxZDRvi1Au+gGzFyWqbZlfagbcaklp9jmz0z1uQZXUvDIj1gayZPs0+gNhSHLCMT63AN0qmN2jd9jzWQcfhjLdDdv8h+gEGiNL6Q4QDQBYOmJLlDAqL9bB42cqwPGz1MUTL1EVRS2Vbl5YiNruL7MGXCLrKglK0F8quflnAmfpMc+M5ocpMWoamBOt2FSo15EDVkoULfmuN2cRtRBn/hsqSFTpxQ6TEkyqjywIjMS8byThp8LCDRbOGV2MTzNdx50LbRgrZxx970rNAWzSul/PeI9ugN953M+QRxZmzb1ljrl67PFtA7xuTKqdqhqjoWagY7VyGqB1Vp8Fk12wAZ02RFM4fs0CKTqZBafiJn/gJR6q8FuPCBTV9w1vb/YqZQq7ygedqwei3vv0dzK7TwTwK2ewPP/RwPvfXzqOIoXAcPixQ9jMtmSJZUE3aPCsSTB/6MxNZruCf+exnTU9KSRGPlRlWvAX53dBAvS0ukgSlGNABegW1V3mkGKQKyy9Q0NJRCvQYWFrY/t+EikKRsQC4qhAftzJ6mg5vbNmIdzoiR8Cz1eFBRVrQRWQ8ydFNg25f2sI2YemU8yc3Si4OcsOEkMtcD7zFXmCdz7ePxiLZKPFMY+faqK0WVfMr2uWwbIk2sJytgbeZpHh4J+Bug9lqSC1U9OufUkVN+QkRusYcZ3sR5UARukaBU8tEOenHVnPWJ6uFm2bZ1iuTj7DPP/yYifu/CJ36CXqVt8/YgYXwTYexULGof7pI+va3vy101nbaompe78SQJbTOqAjO6brHuKSLxkOWPhJMb0j/KMTdO2EYQoIs/E6W/5I5DoM6VrzhTuCW0zMSmpNiWymkgCiscKpWC0GAfqCFNEy7CwqeKiGiGaRF4KBdQVqe6s8oHDdKrJWQjK18+uH2qtNuYU5B7MxSSnn8Fp0/OmuzxOuzLM1VXg8VFeE2HkWxNXdXspSgcBJP2eDAJVodXmlNqFd7LbrK4qzCli7mheBfzkCIlB9SkSW42MiWiFKirHpVFkV2AWb+mD+sRrYgnnvuOVOtAeNkUzmFjpNdqzqw6/bSMPV26X3El1ePHzstGBhocJ/G+Zdf+QE9eoR7sBntdFw3IT5y+qTuI+p54rA/5w9tUnmQZGJ3LfzcJz8t0uaHuX7N+nf27LvUmiOMTw67IDXXd1aiFsIKc7zCplH8iolyCGYUp0JsO/ppjuPsNmFQTBOOjUD2fXBR+uKLL+L0rF8K/PpSCVcdEVR9X6ty5Nm7oii0uKqydZRRPQp01RFhiAmIFIX84RiES0Guecx5zETjAIsHV/kN1oG8cwtz4ywCZW5gEVEABJ3FRZEFLW1KxMxfQSmitO3MvQjONiYEVNaVXZYJR7jiKiv8zJ2xYYpn5mHOoxv+MZ2T8PktzQxLgfYNrfHBoz22XFhqccnWtJh5uOotq33Eyhmt4ImBI4Iem/hNm18OZfqmWQRcltr0V0eU1WT0A1GlE1thqz8jTlSnU2WCIphnWyb5w6l+t461hQsU2d7EQyjhp5jo1YKPAkGprWV9orcZ5o28HtglSgtO377qwkGweBwG0W6LiON2wkzR3D/pJ2ypF7WgFbQza3PCVReKXt2LmDNvv20K41WvlGlWTbGue9sY+DToFW5wXO2IGwte63B77SH/xwKNE427GNvrZoeQSiYZuIvpvwyBM3sM1z0eipQiiHgVKWdFVi12NbRepVSKyNKjK+h5HeEQzIUO6baKjqsrAMRdzQsnorGpRQHVqcNRba0COiITNQRB0VFkF50Il+iZB0xym2woA0TaOaSLqIQ52WVaKYpUUfFV2dJphhQUkS2oWo1W1ZJlF6VAaovebgJSNOBvEbsQlIWUTj8ivEpqdFFqbpeIIuBCZAD84i/+oiYweIzMzEjbcSKGpnjxVCqSTAAmWhGG7Nj6MrwrXs4INv1uXD1FdxFi/9Cgstfu4Blx45ohd2AXLlzSaHSO26mU2+82QnbC5v0FNJtV9Brd46mnnjHdmzW46lbJYqPdGZJVBZrjxixUbcFsxmcXIDMgNosMn00Kvt7kohuPa+TPf/7z6m4+IN7br5/6mZ/miRXUd8d5KBTXfdgwkJDSBlgRGS6RolPtPL9ptKllrkU4sTGNv36aZdxduZQWEzdzeWv3hQviSQoz/a0CZqB/bZH0MXi8mBOkRXZTgoXFNhrSPUq3XMGbTd9Xx77dZqM2wRo0T/X4U5c45WpKdfI7injIj/zWhD/e6EmJmy03u0FyLZrtOOuaXpOfAeRtTG7QHNxz55H9/Hm+LRRuxeanpJmI0+7beT71rd0VYaVaHLGNiAEoBXk9/WwjCzJ/VBMzRDb+zJv09BDiiuYBW6R4675Ef3YHrO+1YyNWnBUizBGXUqV7ZLvAc7kBeHqaKZE58ZqQNcL6WZBWZsRpAPonrwCdcM3NBNMff+YZftLMHz94MOIyprYKc4k0FUlNRptSSljgJOAbwIWYLoSJuruBwN1ElK2jdxVWTfpD6jiQP/dRv3ju0vOfh5CQ97LsjnlwNxotly6PEtGBUloklBCglzSmcAwNMfYya/5pms3vw2U1A5HeUxklOQnSBYaWuUVrqiu0w+lzkLywZx6xtEPoGbKsFMHcLArxWHcibjuXlSgLinOAt/ACV1OT8V+KuGrK2y3XdmqYYVOGpmWofm4g0o9Yc3CU6qyJXRx/DE83VYuJzOaGbElVT63grPgqlUWsYBuFTiIYQBcAlM9+9rMu99y7OIZrWLq+pklVXDzY6LP8zC2lSz8hIn57cqFc73cnk7/ZlE3VaB6vct+GwQUgUG/6bXd8/et/aPKy/CBaPFwFWw5nFeGqf6RTG27mzzbaWlYB5jZ9PVcjI5nDq0bYZOfOgQ+inQlOKU7Wvff92Wef83PnUXzw+ec/44e83lur9JVXXvOoE1Ln+cbWW2+fTaeYd3vzjaq03gc+iZKLoVGSRDeYOBzwVAZdKWKu931E6vABT+yefPQxfj58+hG3Vp0E3bBaMnXd6mG0wARVXhu4lK+qoRsg2BQhFuAoHCi9SqYoClSnjmliMKXTGeb3ZNjwhEGt8vjH/dT+fLKvv1OZbh8t2V93+DzVsbizRW2iag/wlk1ICtLyNuhMctk6dGbU23H8WNTNmWKjXtD8wlQ3zguW/IY1ZwgJcckI1bssCnUmngy0SCujjIHNeGyWuFYGCZcXPuU9prmrdjtskoCbFtzdOOPjKkrAve5L/F1bOOTCIiX6m14HGFK0nFEkq2mYtpIBiwoNuW2yB+uycoZk+rphNb2FCOeliibp3VHmHF2IM4gauv2cY/xEtI1hrPHHw0ZR0m/xsJVYJugBatObrE/z9WFSjdIm/rNWqcLtS9048cNAOk3HWbhZWkJjfuU2yGJI6X9xmNfUiwVnGtxdhHcNHKStotQgqNetiLRIiRgAVUDHIqUHCDqAt1RaZjyiX4CXbrqsQikRgm02uMsTLaTtzXdtY0XeZKNIR0RZl0i0sVsNUtAqQHLOfrtcLX/4wFB56pu0pZBRsEkWG/2sA2zEWYzy7WLQulQWESxOOCWASKEWl0gRRUzARzqhY0VICZZfirh8U/1S9ujc2JiRU81EICZlk/iv/uqvuuQ3Jv/Nv/k3OFHoZ5Et86PdueJSWYJqIWUIMwSzSaFTBjK2uTijPm7nV6n5BIlTc26Pcozw3XM55G1GcGlJCZ/N3TZGzCBmcwpJNYUUeAIoJMUcV+HU8gdiZkGsM/hRTJ0ojbZsSj/ILgpBRoWUrLozZ98PxWFpBzRUHDM2arlkKqGJJxxKmopGJxOeyRXBDxre4cmShoKniDmIleef/aQNwO9877uINLvXdLWlvirFkzYx5aIB8PATUoXSIaehd9sXEWVTNE+nZBdzxSmnLY96bjNHqyHHw8piAFWFmOKB0nnoNsoq4zBEFuz95lY/wPAhj1j35ZFUwQ0phTOmNEtey+6FFlapeW+PdS48GeV56QYTYmKZlgez/t2eYVDYlWITYfWFoKgIb1GWb23EcTL7rkQEUOPj4RhBKTPoSr2AUhdQRInG1YsQdRsMKOy56sRJBB2DzWGlFo9sVAryilXvgdLDt/cf4xBZf9s99G2G4ChwOmW1tXY3NXGYiX//7/+9OyoHNswClmtELc4uZ1pBKUG1bsWzTM46KhoQzMJT5hiF4ZbeAdN576DclanQkt32tL18usde0p8o37oRhUhrd1lflER8wGhbPBUJeS4P5+9mplg8YiGUuayaEUgkWQ2wfcYDR5Sij4VNUg1lhrfxxBrIAgiiPkF8aVhKNCJcY/fmSXtbnCxRsiUW1ykpocqLACiBSHUdTpSOkpGx3eKrUaUeJ8MhrIDyE+GY7DiYepVf2q6MjlPa3omBVzpZp0viAEOlqkcWGxF9FKX9lXg11Ac9kodlw7npoNvVlIaqQsePU0pbRRQBstVZtUQWKAL4O4ow0GOmJuWkHFV/7a/9NTtUp08//D/+j//Tv/23/xZuq83cahJ3AWiqFWTi9arapLE6gaLBSzp8gpZFHcWZ9jE991j7LYcnfKtK7f1+ystf/ELL76weeuik44eeNEzPctz5ogNjP3jpFYIaS4g0LoDIAjV1mjrpbK1g40DszlFAXaIBkbXSuEc8+9aZ8DyQm55xWEM7w5aJ49VXXyP3C7/w8xYMp7Bc9zz/qc989zsvnn70oXfeOf/v//1XvGPCa0gZ8vJ4P4h+6Qcvmxp0/ii/dOWRRx922tgPY155+TVNz4pJxwW4UHBPlHjCCqOKEEVbyiu18Bsax+iU2pOU5Rs6teNh4omZXVlId9sowZ+6jEIpE81WSlqGzbHYbWS4oWRHfNOFULD432a4Xs9o8gnmZhnQk8OyYzHTol/7+PjWvHI2M8FUiizTjsy4fZ1DiL7rkaMV6mVJuuW+y1qF1xMsY8q912yUQHOXNSMsk1JW2Zk3fO5uureA4C+gGFyt6dQoURIBcObts9rFDgv3vOuew/BGG/7kE08/evpxwdQ6frGg81xzY5t+/dB/89/8Nx2wHT69dulDSCacgqGcXY9eEwVxmOmlPsh6RpfhvZ060OkBdRii03aO4mr9N4L0Sc6wa1uSReClF0T0YDrbCmThqiCtNgj9UoAHg24DwaPn6GWyEGM5MwKZsq50aVmUPzGydeOHVbD83iNQl5Qub4s0oPAlWPrd6VK4OFEWLjoVSd/aDh5IbmC38WGDQPVoocquFAIoWaoaW5wAsQwQMD04M5G5QMeV6mGduXrzrtWVSpWCukS5LkKPIo1aUCrr+kgWghPDCGU8uBNuz0CkrT7g4QNm2VanUsWrpHirA6eQfiklEHRAfKqy6T9L+d1qW7Q0y8LpoRlCLQSRV9WcEA1UVYm1WIpKVVAWKFr86K2d1G3Er/zKr3iCgt9qJEp/9+/+XczeX2cGNwwcUjILGwyf+tSnDDOyhjF/FFFLJ0qVx0yexsdDFmpQNfAo8HpAP+Wh3yGbucy94cb4nbffffDYEeXLt2FOli0umTT7kgUUhljwlkIR5ioHAESLQzq5NPIYVA3x2MnjHjq0vmS5Hdxbkz2j8uaj+SWZmrrE9tOWRx85/VM/9Wf+8Gu/7/LdYRPiVnF6zCnUer715S9/2TaQvnrsxFEVocRxCacH6aG8Id2YmM4jDkARgKiLBx/YwjNzMXrr24Ye9s3gqjaU1TnLWZHiixID2yaeJoiSUuhRIltPdJrhTdKwK1OEodBSOMEtLX+H3wafKTKHLkhQ7GfxSf3YNYC6Qfx0l7iNUj8+yQJGVT7yioczpN1/4HadbiGLJrPGFMUlu7Y+bQTqQ/TcGYRoGaKGaCfBWWJ9rquC7MdQRz55RN8+9cjD+rl7GkSriJM1li7dBphMiNOjuZWKNrXVBpFdl/X1R1ErrzuiAObSoAOy+l6nKc5ocZqtIrYlGFVqHLnya+exIUk/hYXo2sa5+P3SVl8pQdb1fAijxkLmi3tD2udumCdvd5Onge9FVtt7kj8ykcd7ZBZlIWVQQ5QSm95TsGxNMdzmvNPjXhfsaiCyspVqN0JUpDk1UttJE2MIPpO6HtCGN3o7JWmAJ5562rlhXc0eka5WJRioahchgkgPYIjIblEVovj5ujTbtPnVlmcQeTeEIeW6sM2/+pxsoRTKZSmv6SJMR9uOXTyydUlRnUFB83+ZpQv0YzyNQ2VXdvEsZPRESSO2awuOvsxVBHF50viggJoQWxd3UgsSBzpru5fyG6C///f/viArRTcX43n3HS9iv3zq5EOPPfp4KuPllw9kWybXyYl3al2jGcfe+e0boAmVRrldBKeWTs6YIzxS9oZc4/bhRx9yd9WeS8lSBcml4wCt+Td9g7gDGjUn5YaeYKJpb6EZgq4P4BThZjnpcmiYkQ85pYZitnFz87Wvff3nfu7nvECDMzx0HvIb3/iGLSsREA1TDJ1WMiY+/omP/eDlJ986c1YlCzwxHc3mZ5rAs72EJ+cXunRpFD3Hyq1QNGwm5QdkTNdzCG94pbJqIZtaT2kRVmRNRvhRpAtpVgpoKB1OxAvCpFOyEbFcNCv1qLYWRyRqfapIWhHpUlVkqdoweB7lfere+5C2V8Fc9+gHIJ9Qpj9Yms4bwL1aJq9gcq8l4nN/pdTv7iYq2Re0LqVlLVrOK+YUBj90HrXNiKa5IEpRPRGQWkXQmbAYANkpLB26WUWoeuj0464PNKUbPadhnn/+03/1r/6VRx865YMAtuCsIvq8TqL/a+UuV7SBreX8VZ3oF44B0Uttt2tV7E3P1NM0Iletf3VPEdwqIqWfkKsiu8q6DaNKUaoT3uxKIYpCvQtKjw/TyrylRpobxoceSjdqwR2C2w5xB/FPlPnomjZ9ca81imbaWvUJw/bCZC/zD5FvrROVcbFqtxfLt0NpPLG70dfKyOLetuigaeOFRON0uN7uanDM7p+0NzDXaGPXI26cNYCjrppAP0BXylD1dJIdidwzIe7WSbZAFlRKmoE02uAQ2f4Spcz8KoOiOBk3I4sI37UiW50VbCdGqQip9PoZSPa0yiktVMQ0JFt+lBpCYUt2j62yIVZkWRcWVvCjlLhSw6/mlggTlFv1XWl2HjdyjKXf/M3fNE3bofoH/+AfYHZjYTkxrowu5+JcrZsUPve5z1WzsNO8wgKpt42A6cnbBISWHvTxK6j/WXRRadvtq1/9ihsmmzM8v3ThsqMx7T/0E8AJIrrtP7G7pStqW4RoftMI+tpc2pJoD4HwEM5V80IdI8XVBhACTpw45tn5H/zBH7gSsq2np6mm1E2kUFh3fDzPGubeS4hEzEuNcn/pxeTvvEMtE+LjXjqfAJ635jDBPY4V2vqlcElLLU9aCz7ovYgQIqsUMwYAUZRTd4PfL8WDE1ASMb/lHSg/eptjk3WDtIXELrP7VlBmYERivdmlrfodP88dUxaaDOo4uKl1Gj1Ll2VMaf4lIO6k8xMA77D1tate4GQdA9twacV4Tl12CY2LPgwbntu9S3y4VCIeoL16gyKM6AIuxVa3nbo5+uijerjG9THQPpjUA/9P/8df/u53nfL5tgnHiqVdSKlI9Tc2NLSyshPTTBY4WkoEormJdP5p2iauJxh6U2Wh0lvsTxhTbq04jKHtzgQ2UhPG+FDPF9LsnrSl9bZ4lcw2/un7313tUfOfK/vhlfmP9ELNd/Vv8MZR2QAT6IX0M916u1ogKhVKoFVAWNvvtbdGhyO6xN0+e9Ba+g0wa2hgSxTEdW7bnsEDBx/U3voine0WdQNDSgda61qHl6HIxs87B7yi8WsG9nwQkjZEDpfOEJwe2VRjOyOgmHZlS68JLtSiFAUoBSUWKX2lipiQrml0VafTHCnMGIrU4qKUKK1jS+0uwqvlJz2VRTSATeXsmnCF3UCyVv3jf/yP3Vch8sq1AmalzviaFNxLYbPtbvavEnYBh6Wl1Plk58Raxzijyy42v3/S7P/uy7/77e/80dVLV3xLzdpw8qE85pkdxLwOugopjaZsGU08vaKv+QRrR+e8HJFmkJrOE/yK8Eczoe+uGZGeSW0mzT6jzuLnWbolHFw4/57brC984YsvvfQD72d1q/byy684q/jUU0/2+PLE5NH33sutp1MRvHXVrp3j9zT3+JKkwa8zq40QgfpKHbrDRiFc96YQTgkERVEVSvcsV9XZFBsEVKRSXa7gQNGkwatQfiQmsF2uioZlFYXFyhLrI3g7za0UUm+o/MkBQHxELTbYRsoYz7/5ZkNOss+mobVoSeGbJs2KF7Nz2cHXIIwCQRMKSL2FQ2Qh2lTE9Eazv9RJGndjnHX8ki5AKKvwPGa2TmlTHVifN6ELMgZSmttdl21eCGL7tiJWBBOkR031XfgwCjazwxAx9NqoM9JIbCYBgmYwtnrBZJUyxOh3fcZtteruYqsTdz8qzPzZsLBFWlc3YVq8BeQjPrvqALuXB/crmerfS+Cj0jZdc9P7NuFote6yjdyq7hpBXNniUm3WXqAHuRJYPBuy/HbfFqVt3MZLAxOdS10BxVhgwoRoqrITJcpwPUa2Q7eTYH2Dm3a8ZpS4boFBDyBuwHfMw2u0CLzA0OpAigooi44iK2Wg2yOQDd8UyRoVtC1PKouHJ4hKpQBdSpsUf9Vi27W1NNe9eoKhPBVUVAr9ZasSRNmFV1WJfBDkSqFTWyXlUUQzwKA6u0ZF0i0COk7H1m36ffWrX9XdXYHa/tIiLkKNZCLwd86+6xGOyFNlvBn5xt6yBaGkHsaH3OlkoqrypjN9+DXuI3jdM7/99tk3Lr566OCRc+9cOHL0wZkz4/KqNTyC03noB7IL1DrcA0RSw0wl2nL66gzgoW0v3reN4lm/kAiLU9YETR09XOYWyszl9X1etsqW2ynTzYs/ePXxxx8xxbjT8s4KfdWXOJS6ovIdZC+8YUIWZMYe4FH9qauySkthFCLLedETf46g6Nj0ILa0zK1a01G1ISy1EKSVQgg2hexZrtABfoaKDaEjfehZK+7Qxs/yS6NwYCG6eekos5mXvyjbMCT2eUtr1jX/0gq3IUuapqp+RcYO2bwbEa4O/hz2xYa5h25YdN2GqNFDNPa7XEkF1nhRIwioIfxc8nM/3fjRRx/Tgj/xk19U5PUlfPvbf/tv/87v/BYGUk3pNNHLVpyh4koBtRQKn1LTiiYDvDIQaggPEVOZMaLIRZ5Ukc7TXYpOVlEyGiiHF1DEYuwkqQNNlz+7xODbERcNxoJaHM61oysqPtz37ure6jZ1vsPwh5vfRmmvV/fLz+C/X+Gm2+0pFohdb4tLG6BVVKTM8MJSpVEFBWgbPGKFQamHiRD0ZqVaq6sLNnguok/mRTUjfdBJM6WAbHukZlZKIQoRAKGHISJHjh5xQkeTo+iHQ3ftnBN68AVEAFmdeIj63KbblU6lq/spykozFuTyrjMMq69Hy3ZsYwOYeAIox1ZkOVn+FuFc8cGJCDg1RhNt2vDH6k7XDNNorir6K9ss5l0EJwoexPEuk2aJtbKylGBQ1KapUVlEA8nVH4p16A//8A87Vn/pl35J6OzB4ne8QqthszF49Mgxy5jFrFY0WdUuo5DlOZ1tWQhAV5pJbZ+3GORE4p//hS+99OIrh4+ccHnsuw7m/Hwibt5/sfSMYF4PT1otoyGXPZs4tP9H+8ShiMpC1C0SkZvfwRSvD1sn65LAmFpIXbyo9ldU01zm+J+VW2V/9md/9qWXX7WuYPB8SzQsUcLCD7Vz0N9OqV0ds2GehOUFD5tTRY2DlDNNiVCSGngKNdf1zPXKDF0kpcRFZrVplZBo1SYItxXuKi9etlR/6uiOrwgKnVsl6eHBt/1+rMRJm+Hl39UG59vw3E5G2u2Tu5hN3+4dWG6v4q2f/fork+UqRaKcJ1QuqgTBP5N+eHsQ1EH4WOy9aSPG4PhLkLeA/4BSuBlAi0hlRQzesPOKq3Cpi6XlOUGNJbD2n4+fOqnD+w2cr3v99m//9ve//z0buaYdbaERidiygxCZOt6uMgrTiHTpABoO0AknzhlGWXFV1ykODlz96CGAlTq569WuCXXRSZNu++du6W0/drAVkNCmjpy3dhq5xux9n13dfcQg8ptBlZbYA3f5kcbBcxd9j9zebIfrXup2hl0K1Z/mhGG6naDUtxLLtkwXWVmemdfFwtKUbqhDjD1tpq1y0Wh+Nw/Sv3//8ZO5MbLeaEVQXGpbT1NpV1emQCtWP2SmJ+PB/l5wcZgOZ+ZN/C18bqpGd+54XDXoLe0rZFE2XXMT6njWxk5ld04GjpsppRxdt4PzAcimc4O5W6IQrFKcQBWKMD2MGfAdLVE6UBPRMyDbCFAO4k1aYcs9f1EUYW9A6CwnWYAlkjNc4aVARlPaECDWJbKyiqQUlhkSph36uBYitaqpXhXXy3/rt35L1kh2ekpqsjYL2LtwwyE1BoxJqZ1AI5B+w9IIR6RKKHY1j8345m07i26lbn2lTBvSpq1f/dX/8z/7Z//M7yKd+n/wyOzDZMbEGR0aRxWlIlmdQ6zKpFmSBlrKolyLJxC5BJYNC9ZtNMiE37aR22jz5vwQRxV8lgRZxVXwJ3/yJ61bsr/wC7/wL/7Xf3n+vEOMOSqGqDOffChfSRdHC9ULn/709158kf/XHLXwDeVp67ZBrLS9Ou6muUtMs/rp2Y3rNqtcJMkCPjChLTY+b6s2f3PZrhsRV3q/tHWv+Biamm5FaOAOOlvSDOjE+PZeQnIzBpvK4cK+0kUxE0SNr8hOJ50ApwpdruYuSKmZwSRusbKq5QlXKrhdTthRbtcOGFTxCoFDs9DxJE1rrZslXKqxAJrO41pK36MNpQxMw1s1FFkTVlRPf8Cp7lYZX2L7+h/84c/82Z/zaig/yBCG06cfm9rZhqU2Txl8RB4vZMUZQrnUAJGOKkM2UDZWmjW5WSe0IAqjxpQ3XjrG73Gd7laF8W07ujvVcBu//6kWBhZQytxUjVDuBqqEn/8H8y6YfHnFyLUf4BGd1AfgN81/p+T2smBMpkispwGiq0gomZimUJua4UdH02kshRzOVi++LadqANWLZ3V6O2POCBwlk4zQjExseuKYRiQeQTGwNowNZvJvul5iP4AtJsAoqt5jR497CuCi1jNw2yM28eGeM/vYRoaZKD3oa9SHLSsujKnJh3VmeRc4iHULkWbZ1kjzABdGml8DTwNl6WJ6qukKyATol5Le2ZV3KMwqaTDnogyPe2wp8HUdP6mqcq62p9Zn/kNaRT+354NORsQ9gQ7EqOyRB3PW2Rv3mfYuVaYhud9yxzb/UBKHGdCsQLmVjYqJktHg2u1BL8RznnA8Z7FsGECz0u5RQBhl2njAzzKeUZ9+iSJFgSzArFKIrSM6HrhU31VKvIZkIQU4eoeWODNEpMHhgBFueoWY/RE1ikc1flEkMv/u3/07W38OU9j3o4oezYff9YGK26HFQ8QhC0fjlMKlTKB3xapLvealvyYw6E9mD8w8x9PqME0z/GMfe/q/++/+r87d/e7v/u6L3/uOh1gAt71efuq43ihhwBOkShH9OkOKJsguo72kgDNqndDFklBqFE2QyWtu5vzVJZyJzh6hdCZFRG9ineuSA3YFsqVpljh46PDFS1d+4zd/+3/4m3+Lkw6e/PW//quf/vTz3fFzr+LI+te//nWBMh2oY75rfPjBz73wwmtHj7340g/4o/96buBSnj53jd7poxXsMNB+4+atI4ez422pw/PBwQ+s2V7/qy5t37adsMsGnykaEl9zUbj5GTjKArLwJV7OBkfapoGAiqSa03MgM0pEI0OvpTMpZvrL2jFdsjhfZhrVV/V9Rrjn8tVWvJ8GR3Yza/CEL/MOwZh0ziKrDwNGVd5hMUadU5ib3rjtWbH+nNqGf/YeunRpSJVyjkO7+6uJWdX6BpRUiBjVK5rFMz5oeV8b8SnjfQcPb17ySbF+StwE5gvUJ08c876Sf/G//C9PP/74v/pX/+rJJ546eeLUyy/7oYLfRR23SmksM4RL5Dx6uKUd0/FQpBwwfvIzz/mhJ4t6NX7pM089pWqmGssVxCNhP9UyUny6bCKZeHambwMZskWkqXcAj6exaQtIZh4lCdBccu08ZNFWeFgp6IQ8d1bIC9ytlDaojVzAr809wWi/I+E6oAW0oJReDXUCXQIbrtsOp2TTJ1UKvVooRN0qXEzVLJvSHVh0k3cUjODGH5zTTTdKyrqTak6cHfbSgnD4abW+glH9df1OOloFjkdqSIM1Ax7an0lZEdmqJ55GvZ4eo8hHU/1TBAeGSjmpaoeQ1XIz65EzEVvtdNwEixSKlCreFiiRJTvMaUUUaYRnilTEB6meTQTzTsw2KDq1MufPnYPoK9gAPbk0nOlbKbK0dhUBhaVEKnKc37QehEWma7RFTWlGxEBECmRxAjie0mWjdNQWkSKqV/WM6Ea8Xql7eaJrTEtRlHah0o6sazJKHMmzAehOwrOrEjlMXZ88j4L3FVmQEBWhGxvVVnN1AKV2EVHqG/3VQDMG0KLW3SDHjEjQ/v4Xv/hFN23vvHvWL2CE0M2W0e4lA/qVGLtvOXosizQgUh9aO2vVCteuS3Xs7nQPz/gbLt1DD+QMBlYsJ3ywILGi+j//8z8vi0cplvPnLx44kBuvVkoMeSmkz33y2VdefePSxctmPoJKdYgi6jubCnmbjoXKfo33A/oJts1zajvlYeeA+AA+qBeAF1l1idrpEqVLZVmpP1N4e1qgHJ3bTTHXn1K2OtNFK7j0jLZUwdQJ9xqT4ji3PGmLaNsMHVawKY1uje8PwABUR5oFK6oyVEEZimtTvDwVVFaxZdnb76fi6SejI0MY3lhVFsVEATAIndqhUBhbU+VWCr1Sf/7P/3mt6VV/GuvLX/5tu3PPffJTv/d7/+HBw0cYv/DeRXvdmPsDg4sXL1OivVxS9FyfIr8ppt9iaW/clgOKTgIsbmZFPnTP3DUfi64J68asWKmT/1Vw8/+2XUKLt2kjDrsNADORbL4Vrw9Y+NkCw5sJ0NAAbg+kqqN3AesoHKWciUiU3QUxM02+9W/TlRnZ4c31Xe6JQXwbGOe2GW7O3dXEesswjWfmLXEMKYoh/zTtDh0x9LnbyO3buMRnszgPIVKl0jUMShHoMqDrOiosDc+hTE9KGwjKZTWMZbx0RdjqKsrBfbenjxKJYAA4AeLqTEHmyo5jgEWUdt+ylV49cOI6AVVwajsUY3S7d4yCh/MocDOsFEPdVlTmXR/gSgHT1JrNYw7fwCrlldIUbfkhQ4nglv12W1ahImwt7SgSXVIoLcIGL48UKC3EpwEMKLWCAAG0Ya7syKUWFVdatgpKAaKqWXtsanv04uSuTXxPZTx59jDGBoJHNWJL0D2W8c8iKcOyQRZSRXYYRlkSWSk2URXt1q5EWcAZRstTD2UhpbAFIWX6oFzv8hTzqaefnF8f5wQ5zWfPvl02CwlZOjVoLVYzc4j01JYsqIkitbubbll2aDMSydU9FpmwhLvh++//+79hhTb1+FGwCUsouDrT4+V33nmP87w1O5i23GwJrB9pPfnkB2cPnUWpG57buhewY3D8xPELPm77/n5DS0OY4MTZ/PLSD15il1crkvVMlRsoFVTUtDWVyoJGXimgofzNwgFVq5/IKmrc0NVlcdZiGRq3ykqLYJi4xUSRmmt2lSLSD4qUsxQpTyxXdbXEMkjrTwXheKTlmTfob9q3Skovrj/oou0klYIDtjCg8LB0VrSd1tTb1d0K5OpNf9bQXj5jQjMEpE5DfOr551w5WaUoMSNpCJdTjt4YIALup1MWObdNho9bKGp1jNwizy+6UIDW78svXKXP2qQ2u5D6zk3JEKf6qzjRmPXBUt3wSpWqRVt8apQprnd41ike6lH6If/RN/P2xPCPv7uq9mW+wZKNHwP9I4qTG9daMCn+3D4PM1UFJSiLWHZFKPg1DIqsFA5B5HSlpmNvlhN09SmP0LduZVNVnKQKijoYfKm1YWo4yowHQw3F4Vk5qIV4I3L5iQOldUwfQodja2eCo3S5ohZnsnNti2E5gF4R2hDh2MpJQ1VRjq4XytJAmyJZdFI1JEXHVs5qKHON1pCJkhKv1CaOczmGSEN5EIFswOXFACX4EaRwgIxfrPiAaJozunQvPC0qT8WjaqAiUCKtXdmicWdtgy/BIk0bgXKqI22lQzx0MasaWn4Uadx+4QtfgBhanlTxUH0RRcx4M41Wg9ELqR7ETtkNKbV01k8MBOsSCgYUDA2ILFxpAZGgFE/FhQWuFJv4nzz5jKXLYYdvfeub7rF4yCVf+BXJpRbz4Ol7C1p3Sj4cJvz82XK1AWe54k/BLGZ39G/9rb9pUWHdhKXuECnPzRQuos+du+T5uedYWhbd54zdI377O991Af6Vr3zV1bqvz3PJD6VZ6uaVT9pym4gNWBTxvzbvmBcQUQKsq84KjqwiItICniKII7HJImJGBGVoRaqzxKbo/OEYtkWpIaYVwYsUlwKUKiy+GOiZ8k378lwRgKBLAR4QLE/FYrqwOJXwJAwjXhPNim39kSW1RiVZXU5pO55qLs0VxICygBIz2Ne+9jWXRyZ3pRqxOi1OThgZDrqcvd9OMo0h5aRYkVIl6zpPFlgkHMOhh8LcUb38so46i19uQ82PUgHwobblwx3Ine7dUbSToQSkpaaD8G3m56xPxqNJ2zYgImfA7gAhQs19765qguodW23jLhVpvEaw62o5eVK6okK8cw82oaZs9CWTzjg/RVl6IBHPXll6UvulBuCoCmjaYdgczIvWAY2kCJu6aQOVVESDWQyFIGgnliryZYC23IQpp6f0GGRp2eAMLVw94R1IkFYKQ/sZVSisk6IQmxNfitrn6jypmJ1R10ox2jrWk2qA0yCtOFl0Wbbw1yvi6CigRbKtPsoqghSX6nz0dLeiUhnTU5FqoLmgFKfNwJHeWAlloDwEW01OouCE0FNVu5xVglK1mOElFm8QKCQrxQYUrXQIt6+msbW0SkysbgUMJzdYrhM7VzLhYJsWFyiOOVIB10NcP6IYk5jZ0knw+9ytUqcJUGgGTNTJBpwhPigl2yI8rVF9q6slYsCGue6tFN2qQCE3fvqnf/ZLX/rFM2fe+o3f+I1f/1f/Kr+nacg90NjUbnOHTTMNbIE6tizeD6FgtyhT6TwRVEfKL1+68nu/93u9GX355R9YtPjzrW99ywThMtxgEU8vWX355Teste5W3YEJ4F/5K3/lzDtnHRh5/vnnyKJcvXLt2HG/UJ7fgL///unHTn/qU58217z51utzgOVdP4Ov2+14dZ4DZOE8lILWFyJozaKInhSgtFGkYBHLL1t622Xxl7j4K7Us1oq0ILyLAU/jDEGXgtZipaVLgcACyFwSyKrc5soSf4u4AUc3FQx7mCBOooBRs0nKzyJOPNzTZJ0KFIV7uxIUWbJKv/Od77hWMO+5tbITboF54vH8KkObuoF+5PTD2k5Xd/HBss5LdtzwyvaLb7/9plsugEKzaPDBSHFYVI/1rj/6QcNLMEHZ7P4N+sMl4tD1v+1CKB8+nzONUlt/s0TpQQG1dmflpZeHHzgszZK2na6Fheztja891lWgHIuOAphblC2Sb70omn6QdBhXcruPVmELyEKaQhS19zhXA1e3DjNFEFXS+9GnPll1MUuBSjaUxTFjI+WqFkWWqmWLlM97oixzu0VKFQEK61sie/j24EHUfi3Fhp8e2aUNxakKaTkpB7IAgogZDimuo6C0iJKlZ49OIi3FKQ5SDDyBAKWcoXDxQNBRpOboWJ95oTyoy41ar13MsvmGz1QfsXWsKn2rzO3cVFErvHVjWVxurKotBuLwci73IICqCja7m9ZQqjDLdpVIbVUZnIiGK8QeoGFpnsXvpur73/8+W+5pFJmIbZKIm2vGNm7NucPQQyxXzdJJVspzYMxTXq+ktEnrWHGcjZtU0eppeBSB4d9vHrGJq5dSyAeLAdz9n65p3+fA4c0FEE/MVmTZ2Q0mD7TdMl0H9qQrpKVvsypjBsxMxDdXUapvyXE/+vu//x+0psXeCQulJjsXtmQFxxnCN998B249QxdSwaHw2U9+4tHHTv+7L3/FzyJUwYWZ1fnYI8cEXPx9kcTlvInZl5jfu3ixbsRn/2t0/3RIAVEwPXOTbvmarSG2QEsgiIUhhy5bOm8LpcAh6rX4UXCiSDes2yuPpaT8y9yysnVt0+KyuoR0KpRuDNrKpj4rQSktxSkrShrU6CsCBxrXs8nKYgZ0NssTCIrGggM4CgZeNS2CzjTcSoNh1OZiCK7n+6mfDu/nhnrUl7/8ZV1Okf7/+7//VbWmnCpXJ1pWoyvVyjykyhKle7j+i1p71DOl1BO2MiSsaiarzYUlNXfCttUWVevB55zkLHXbWdc6BHgltVwVjA7ZPFmYtmojSjUcHyBVm5P7y8AuovK8BCWKEYALqVQ3KN1QGuRgzjWme6XTtEi0iYisbM1jKKXhrvKq5RZPLA7urpjgogogKhVidRMsROsWIAimtnkDEOWKRvx2xUSBuCIa1GWGjLo4dJXeUA35TZJnUwfjoTZziqr/1ENN8NDgRAcN8NYLJcSpSCmUpyHnSiqcvo2zncgah6aNAxwPHECapYdCFKpKYWJ83oROFs8I3V4ay1knMei1KLK7Oknl/NYsV7WrFNR/JvxIBF7AMJAocTNByCH/+IY+ePcq1TdupK7zSdb6v5GeP7USuwM836WUHwWdnhYVqS2UaoNgbvVRGiIItvZbj0yMMY9YuhlobcBskxBD9+s1DcQ1IykbcUopwYaoGzRoNSca9aHewgEppXvaom5IaSsPK9iaQgA6/fH8oBUrnxgWbOuW+cty9cILn7NauLmZB87GTs6DeOe3UzsjfbsdZflQN1q0J20nWsTl84bygXc0u0DOL5+459zHn/25n/OUwpTkXLvtQau+yctYE0BTmFHjCAYn3aTaDLTqv/XWG1QRtyX4zMey6jsA6eystyz++I//uAr+3u/97uuvv0nwwDEvYzzn1Bf+9Llp/biu9XXsdgb+bVs8lfKvfWP6f3zeli56KIs4/aHRGE21E1vaSNoWGTUtTz9J8XbiS4tsQQNt2mi7BJLBEDcGqgIK0dypy3bwtg8k7/DbjNwWcUOnEuqmXavaf6Q5UGjIqPI028aMP4THLkNyuwpnMMaB28yDtRI4tYjW1I4mSXdRGae+Nvn+zT/61jetPW6zPAB69dWX3XupmsdajOv/eoJ2JNsFFcUYkaVQTJz1pNRCIXpMqwuAaHffxObunc6Mb0Nbfg6Cnrhh1k6se68pheZz4NQfvPO5OyqUtoX5FqJI2joSF55qRtwsPHd6kByOMlWADKj53VKDWjZ3se6yZlofuUxn6O1GDHMXnXgbr6sOBoDOjVlgH1QlAxuzLDpmNdES2CBGhZpwA721FW502loR+pVWp1S2sYbUB3ujOMtTEapwdnWks0bxIwI8dbhsKBio6tzUgPIWP7pSY74MiiAFdG5IKUGpEj4AsuVUhKcUbvCnnitt6MogbX0R6WGXSGtXtfTXmfrmXaqyvRTEqTo6Aw1uOyKb85XRL60e1z6hb/sHnXCw5uJml2nqVh1XkdL6XPEqZKK+bRybaGAAGBAhNDStKik9QCkeda8SFKoMRSuTbStvatBDDE7dg5/SirufgGCTEpc1qq0W6m4upg1xwVJeT0qvHqlSFhUxzUNWMMhWs5Zq1RRhWES4Pbn2ZAwcw0BQyoE5K5h7O+eglZoEonBzihpLehpVgtJM0ruh5elThWEe9FAejeU0R50XH68Q/OW//Jc5j2JBMkO5DJeKjPnLHRWvvvSlL5n1dGMbTRbUc+/lyZ8ljchf+kt/yTamzqP0L/7Fv2j3zwbjm2+e8fsOftKjoR548Cj9QMhUYPNP46p1s7v1UrWd5WFbhfk7wyQiYAVBO26ziYwuhGH6lQ4a5i6KjZjiERdGILaFZlUnI2GmRVEqgxSR3IiWcZOSZbGgBYlLk53lyl9ZoOJdoujBkzjMUKr+HNj2OlxQD7k3/5TyoSbgBIcl6hHDP6DIXymguTdJOpU5UJeWdWKCNcMBG6KbKjt75hW/7tDEXLp06Yap0kJlfepMZamzSjnbyaUsFLOJ0rVKdRxzF7ha5Iw7Rj/M4H392abtfJsuiLn0QWySOSydUGdlejBn/PI3b04IDpFqB9VvWxzJ97pu31ElBFugNovExHkzQ4lO7UGKN3aIzc7vzqx7iRo9ZGmB0mPQ+vUcYzyoEikiwTaGtHQiJeJErBLziEnH68mn7eIxZkWsQEw6dYBsKbKgGlA0AGZ1RkGnVr8phWYaEOlkueZkIeVR2jhoY/oLSvUJ+6cQQcBDvN6i40EBTFfzBvc25/bjbTfFiacAV0oJKE62RejlhAiaimhXdeEhoBwnJ8usg0H4gw2CoTpbKXog0rTdjCNjqxowT+e8QrlSP0FD3wVSsnxQ2voSgTPUsYdYK+g1VIdlCSqC4IQ0Sito5a8IXKipVUEMBA0bLqkFbaD+FxENFZSSxWAy1Rnw++UQuo017Ws0MkcbEWqBIhvClJtGyQKPuKxtimyI1dxyhgM0S2mgx0huPFG4yhl60DOwr3rla1ZNbSEm8XUuSLFhphBAWimIa7lqiE+bn5eF8zOf+exv/dbvCELcfsCbTW7ke+v5OFPutDA05RaEBuku0oyCPSe1toz7zAmeM9U3P7o4dvwoz90tqThtbqEU2cqz/Iinings/1M/9VOe57kC8GYEDLxFF162zIb2TskKXW9MBd8GrC89unvBSVsqu38/K/GqsBDV2S4krG+LU8+pQgQ3Iq0sllK2dB1ro9Iw2VEQouFGfpaZje5KYePVXD1kEGQsbC1jyDvmc+qBgpjfIjrA6nuKdvHY2sKqhG6WGg1sC/PXlxwnO08U55ICC1MHH8hyWP7aZQKiG0hrTil/sZUuvHBFU4ngOiceIq4k9DSgaUyADn/6DCc2EymKp1PG1PnzV2z06d32JtWOzitXL+lv586n7n7ujNIoZJzXlt0d3iIzhGdcSdZDH99fzi9SN0EbV1M9S0wrJcUORBzdvQcf/Nq140Vq79mAtcqqhctyLvEf3jhMxQUkXqGITbXBIJv5DtMy02IacSCqwKgo2S1O9uLIto6lwjG2pR88kgWjglL+4cEAOgXUMy6Wwkr14zx65LgqoCs1VMqDAVAVFTN9VGH16C5KUTA3xUOhFANPBEvRNEQS9W+NMOAnW0P6rixAB+hSWfqrhwP6h2ZWRCGkOmtrRLM89NxUZaNoQCkpGkBNSJttN8VVHmyAxZY2VVr6kkJHbATqFdO84oyi3QraDFxAfEE5dzUrIrs7/dU6TiA+0oqTYrRZPiDWE3Q4egWjbZZMdC7B1QIiVQQpQ5m1fptAk+GEY9N2BqEGavVlXUKqJqILRj1eliemSHNuRuC5c5YozrBIbf2REtefa45dPLWCQjkcz5p36GdIESn0VrPaqrnOwBFp4wCH6cGMAgfwgt/kNCtVSlseD87Ht7xd4p//839Gj9+I6qZtxwj/aQATbcogW7AU+fGy45HuogRExa1PwLk+l+SeP/29v/f33GZZrdVRHKQ/eOUld2BwW4gmFwFXU1fxv/u7v+eHnBbFy5eca09XNE/I+mT8xtp0hq3lP9W/NKejprMFFtLsnWlapI3SdDGPhtVeEO0l7ZUKHY2behVfnBtty0qdGW2r1VfhLlL9KFUl29LbmrfcKGDxb8l3/HUTp3UsS5pGz3cXpf/4wVwPysuqiAsqPFTZM3fAR410PhTpxok8T9yG8Q7198/MZYObXGFVlZHO2k8nQwnO7ckqw2eOo+eH5J2HdTl3JQY7t4Gs7iTIvJK6qDFh1zZvKdzQt5v/GZktjrEd6BgmIwpShklKHW7YjSNcK5ObQTv7nvO+ohmUubl5+KHTNCdAs6J0dqijJVKrCM/We8tbDKGDhWDGs2DR5zmKmlqoN9N3fWaQDlO6f5rePxQ63M256EoTTXUp1G38cizvtJhZZp711HisX30/dzZAKSv1mQk42XqFu0V1r3qaLllZpSW2Xs1W4a4IVRhKl4Jy4gGKmpXWq/KQKkMt1kRUTW9s07YIgawq4J+HdHhjDiB6tue5FamIiJt+Of/I9ikonTHt19DTRtqdOCLAo6iqtGazUTNQ9zqt40FDKWK84VdUHgp1P0UWLf3Ttrv+bUEq3bpinwqdxXpOnKCsyZQUYunwpZB+pfQgduSoLB+miyVG8MYEJ/Hpz5tol14T2DAs07ShqGk1NAsfF6Zji54OJlh6dV7MIsiue/Jaa7998e5dy4BRi06EnmWLkgL9W/Qj/KVnnLIZnDmFfsrdD/3Tf/pPeyUunu6u3FFRqvTv/J2/Y45zF8UNFh0RUEsX7++9d9WjKU1jjafKL0+PHX1PT+jPse21zMNaY0383YBOwGV2fa4fH8H3Yb1TagU5lvbA3ZRdBp4sZ8opEDsXFngb4RVn0VjE2/i2L0X3Ugg3fTXbdKtZGMO5hSpnd0u44+9yAMJim2DLcW8RVxOu0ii0Erjg0CFt9tr9s0Ore5PlSMeUcalZjRHKvRXAP9+SvEcYt/Y+/K95mY9dqsqZvm256mWKCiaX+42cUjtw4OTJ3Et1uTJJ87NLlGVprQUZGnPx3WE1zqfWiFIBgXSM51PrlIKWSQv4VE+nN24h25ozkUdKShW1LbULCjZZoenKmbh7+9YH3D0ZPB1AqStZY5OjHHNCxiwj7Iy7LXBRFh/W6kqKTJ2RdqJhFLSoHnZZpY6HUpxtmJZySaOu2mEw2SHG5NyNVZu0sqSWIIQ2sSYCMPABhUJg9JJqEBt3WZrVFU9l+QlkQTnRa6guKR2RWRq2Y6AUIi2th7JkQQUp2ZqLfg6gF2qxnChdd0Z0G8ypV5cZC42iGmI3MCfT6Ky2hUTVGKppIgB+9VreuFPAA6FDWrxurFL8y9zSrBSRlFLEZivOSVt5nqkIuAAaja7rnV4TfLMt5kIbAq4zmIu1LJyeegKnjSp0LYhehvpW04gLGnCydRUdpUqk6NJKlYc2+ktsqrQg2yIImBeVxQ5ckwFbcGo0tY+54dpEANtWzZ/kL1UbsezDZRVEsTR6HCV0bkPdTgmjOy0HVWwiCb7rcXOay77x2dPWfMXR3oRlzHtbnK3wpNzE52yL18/7RfZ/+A//gbguyVUizLFyX1+XP3s4fuhqNiBqEYS2Co7a25Xdo1x2j36sd4rsibOKtC7VtHBthnK3IeLRsP2nr2xcmICk6E74EArlehSL7VdxNJWNfGxsVRWxJhkUWg2/qwpXHhYqXRHRlKXU2RntazjMWb+cIaTNLEkEtBv7quS8l+NOFz80x9z4snFHVFBofj8/r80NAx90bAMtR9IPWV/4krsreXN+fBjAY7QO0RsuEtjK6nVFpFE719Ccr71DqqqgJusnJlkTNARTuyDtbLOXeZ5VH+kcwGO0VsMo3TzwoApx6N3TjDLimGfVTdXGqLsibjnYZhPfhaeZLhvK9afI2M3sDHhfvxUxAedYKU3b0rvVQQEonCHlRdMEq3PcS6whLuQhgB781Zb84LIAvQwNFEp9QCyCIS9hHliUVYofHkXb63FZjslGcAtTvplSy6CkNRIBpaqMDjeDo8vqENoLWz0vG06Uze3VdqQR5CsprafUN3tqWgrqSQNVi1JEFKXVT8HwbhwWyWalFcEAKX/MTZXJArjhVCvN4pySJKqgqA3KPUUGnnZB9MiEoJ80Sj070WktY6ZdjhmQOEVAre3R8wc/Q0stHINSLilqab2SOrqHogjeSx94A9gqVFBpgVpIRSrFB/pLbFqe8ivCTAniIPZf0i7pKfsP/vRP/7SnRFYR64cLxzbiUoKtcDdlW3Lfv7xqLXhHXDwF1iaqxeaFFz5jjcyh5yNHfu3Xfu21196mRTtY5b3vpswohktErrjN8gqxG15D12OEeH0kBf7cc8+bkl568eX0pQ9848rp/yN+Jqya6pYZHExD/An8j+x/SuBS1e/xjfPxfxprN90loi/xKrk7bee5m74Ei1RtFRZvp0LRc0Y8na16IMWlYm6ud4+uTV3AOTjjYs6qoA9reuD57uOPP/r66xiPmyi8ptImktli6qs3Gr+3x0j1/zDp9OSs3fOPsijhj8WHUR0G8EHXck8FMYRXigG/FUSKzVDNF3lsgHlou72OhxgXGZfzTl6jx1QnJhQqyg7JGmyoDNNFb+W3IUvzAIWccanleC5mMC5muIqdwLhJwlap8dz5i+3lxrRxbeEZtxI7bjFKc+eFVkkp5cMTQ9hEHFNxPPSMVLxFhFekiFIjv2vS1lb0K+XnRPz24a6yUVLf6AFLj9lQRUhhcPlZNln64Rmom43QnP1D9Ps2FEZpWCDbaRSl/tSHZukBpHYdKL6cly1bU/QiZWtAiqOXWTrI3jV+6YRwAH9TDtBT8fKgAHgB267bLRUcRKC0zIziFw0pHFFpA1WessERKwgXUiBrQSJLUEfH6Zf22ATWUuQHJe60DEvbg7ovToK1gqFKiJAFEJR2J5XiJ8puL6qsIsAuPZUthQYM6ACFnl2FisqGXjZZDOUp0rSccPo3nL6YPsu2LuFg8S//8i//o3/0j6zKjhIb5fRg29VTx3Z1fiS8bboUWqi+8IWf6MkUM5qp5MSJbLSKrW0UXdimw9TdAFTHXAOJsYYiQpVGMaJFXtbU8fGPPevl3B6Jif9eryaee4k/dL61bih2hTaU7QzOOZFFvG+UuLGYKRr5Pcy72T2qbhdt2zfOjMUiUb70496sMSnchY3bQ7qtc7K7RQjLgbLdL4p+EuHmSfNZrgApuHtfqS7kYsLlCMTvJSD/4l/8C4OohhDh+UyK+6HMzen2HwHmh0z5sNfOhiJX9Q39mfWC4QmMaCuooVfAo2vNPU+miA7SIsWNXStdK85PXlUtSomH9D+sMrsjs3o7j+/WRK+gzt0VkYT1QJ4wF9dMg2SbzzBXWrU8zlbnFlxZpn3zsx6dLHv6U6qVrKfu2/YfPZJnY3w1s0jtTbnN1OMswpGidHuflNK8gyEPVxK8PKDKGEsNfbT7/etE6GQZo9jm7633vVf9lkuN3rPnl/9O3PqTNy33wIvZIg/j+JZJY9N7GG0NqKJfrTs/chVgizMD2OLNEFcKIaK8pdG7FaG5gotZETYeKoJLFdUBWXj1SHkCELs2QArlb6kTStG/3a5R1AGgcTHztdrgTIBbc9CpTHVDER786eVz6VBO9GHbbARts3EeWzkxVEnXCZVKf5gQtb+1Icqmo4uqw3suEYwxdxuMsuVMmn0n86mfCrHiAYxFS+2cxyVuPCC6ksBs2jV06/zyhyfYxJ9d2priQcejKL4O3lTRquCU5GoJRUoWpUrIlg1R7WQBniKIBQNZ9LUkh1HWrGF9stXmjW34/8Jf+Av//J//cwwXL1zo5Q7O6vkTpwyRrT/1WUprb0bFyhpjQvFLavd2brD0I77zk8jUOlnjx36NM886yoGDXgtyw33VsaPXco27/9D5cxdOnnzomWc+bgFzMaHtVMoPGRuo5Xl9aLqIC/nwmt6hajcmu3h1bSO/NG+QRSfin+y2YbZyabLlXhsaBbLocL2k/NGww59pI/PZaE4RTZs+UP5WcIc2s9VOHhueO2o6lLbanYwbF/pHI9pj4JvniAZLrnguX7bxAPfbbY179eplRXYm/uE//IdEcILpBhy+Q9VHyCQqG+AzhV1+HjiUM35dooo4/WewgzG7EYNzu2OBlgZHFl0M+OamCz1B3A7DlqZrza6YB2KKCdh+wWpicrIwW3Zu3ebWxxJMm/+jyJ1GMpnSM12yjWhsuE6EoDDDnFlg4AO3hHnXflYEm32zCOTRs/9ZFbND1jzapL6z2ZqMhk13McuoMHrHedt16rYzJ9KU0eRBtlaY5eTwoSO+bOBgjE8WuOHz9np+7XvAvJtqZ/pI1DjMSTqFiedb5zczODrQAK2gVDMkitODpPgRAYTCKilFOoybaU4pQMQjLV6GaqtULZaNY1W7DCmtCalSbKoAZLt21v+yKa3+DLPq3RnhSydZP1XPpwBMP9kufeDmB7llAYoogdQfZbWLAm9RSmdVn/ptKqiluFeKtBUkC/ADRBqklBRv1kxq/92vkQxCNbJKWbTwO1dNicc8NFsYjECIuwQ3B1oHJ812RRDtdPWeDIUPNQ1RVJ9RapFa+GSTKq0Il4RUQxOJr1P9YdssV1WFgpMUQCGro+rD+mH68zQ0otBOS7EVTnS9MUeB5wrD+8tdDqsFi7/yK3/113/9/2dLnFTkCwnvHXPftuCP/5tnkBnXXWJdencE7fO70LfPnjGyL1+98vSTTz32xOPqeOLUkauXr9qDcZ3pNpVNXwvR9k6DHt4ndJlfxPnCe++prAs8ldUQ6XyHLsGfe+5Zh9Dsav7xbv2pcEyjcC/K+PqhUUoEsJVHuqQa55aOV23TFf+FKNzFh/d2IjLTTTZdpbjiNjekTbAEan9l9yDES1nIyu6hqIjIGylS65PmAG6qLl+59tTTT5jOta+P1GsvW4Xuhmd8L6/00feduth+3eOH7mN+mvWAdWWzlWIoGSscUNkTx08ZNUaf6Rpil7h4ozrrS1aQ8hPh7Ypqh+TMxJl21pxAbaNnEDWehw76qYfnBG5rDvt9ljuA/NzXcnXlfc9y3NOYxPLIiuCsok4Y3/AKwNmppGpz7Wzv0fvgOYrS6cOJCoEWLD9AoH9cuZX7J9+Ryt6l4FMYnfDpOam7//1+gBLOeURXd/nqpK9lRpa8+UaVEKlI1G/mtfbo6VS5POTKretXLlteb16zgia4SfPT49wImic9Xsx+6ezavX89SynLdN64lfczzWbn5qszSl00qBc6B7S6SsHja64R/I2gWfTW9fiTn7uZv51dnmgY9FpFEPKQfV7+RBlxSviPfvP9D7ykReNpYHqEDk2WxTYnIhPFhQUbHqbxKGr8aZvq+xuobxAMfD5sBcp8muVNnPk8Fwi+gTo/YNQKdo5zi5tfYFy/dZ0JFQH8BEzQA1AYReE5H2QVsaW6KHCl0rEVx4g0OG0pI6rEUZbmo4oIPWrn3sjC07eqN8ge73/lK1+xJim1blmNIIAUE1S5wVJr4gClPjQ+7C5bSmVJQfAIAimlEZslWfeULRDHAJYtgqA+Q8iyVcEQc2Oe1jR8cmOfPklzrCwenz+iXPba9asrOMJGlXde+32mV0y4QXz8iUd/+Zd/yZv3/vW//tes6wj6SPZbUpfMyWaU+D9vMpp9AB2bTetZHL4nmPny2hJfazywb/Nm0ozTfd/81re+8OM//tLLL3/hJ77ou0JHjx2/cPlSKmCF0w9U6IN9vjk4Tzf2+5xZaqZvT8fQUl7CeunqJbdlV29efv/KzZOHTvoi9mNPnZbNmwZ9m2k6RtzbTtYZ4bKqUdIgGwY8829VgVgkZ1JIu2wLQlQ09InItoTm6Y0UbkkbmepBrK1Sq7LaIjjUpONwwj0mzC3hHKImzA3yvSAtv+FKK4+apH4Xvdh372VEpzx7XGo29Z3oFcHpCbWOh6a7tf90HtMcsr4JYokyYfgl982b51x1+m2HO2Krilpcvnr92VMPGUTff8mbSm5YSnK04vot70afK/u8gi8fSFGF8XzrcH5r1SD4nRh0OmE9yl2H8TQ3SBkObqiMGgPZB4chJi740I8+OO8D1P/idqbfQ75mYpgbjyprqt5MoPNcmXZySo0hFmmgB2IsYM4gnGkzJwM71HGsSMFPnsy0FV8HILLGSx6PZfhFCLEjWRFf+YFCtvMCYmVRrCYeb9Ukeka6VSLP+bNlUYrVDd4po4JSlOK877xTYm0pZ4szvbcrgxTP+BBej40q0hpUIQfMuaVLOSCm9UQKMPC2fnfyQtQYUvwAQgrAsWGGR3C+jwdPgLY7TujjZ/jrHoSJyrYueCATmchSWCstrba6tPgpIQWWtorIFhQRxA9apJq7WQwWFsy5MchMlWWmshBAqkjxKpHyBFDl+7ElEgfwZotgoA0dM4osPQ1F2VAUaVxBtkSZ73RZ8XSf5NSZByR/7s/9OV0UheDM41kpZSkBNKsRSrW1iE702kKBFG8VmqKDiivlQKtTKzRTKC0zNgAnsgwlO3TEiWoyW4ZOVUktaZaILGYz3dCpa6D7IB+jgMjFS+9deOOCB3J9PoQzVmjPNJI3jwBhQlwfUyWlLAV1ItidMFJ3kpJzNWDJueCJ7LvvOoejS7sgO3jtam6yKRO5qHZrpS2DZ+JNQYBjEKlaeKO8VdZsYMdJLcXPzbDAv/H6mcRN04yH8S6XgIe9Pi96/uNhhl5bocruF4AfxtTSo3bFU8eBRLsR/lBFi2ch1bOypHdxI+Se+pYn5V/Wd+mKVhaSS4f5rb1GZMKFqGv94w+funL12sHjXqfyvgn36vWbfhuuNGNgtsfgumNUJe3VfLK3obXe8TMi2+ygmfGm8242ALtiIRZsDBpNpIyLB9yQeBHGoTw2QmQFXZBlObRLwYzuKtf/HXq04WEam60XstGPREULiiy82SqtlnZCeA2vSqIAgphBccohlICdCierqLLoNbd1Mb2ElNKVQvDjBOhFSKmL//MyjdGf/DBUEGfpi8iECwJZiLkPv6hhk8VZi53+aGipYaxbYNAntEp9WwohAGdt1VxxIkWkoIYwtFVQanH/fPmXOVl6AIUtai1kR0H46RRbWUg9XEVLHKXaCKYhbmQfDFScIASg4IQs/QuZ8kQDM2K1tb0U1WJlK74EEeHNLs5mpaqGKA4UApT6IKVcWKQYjMAOQjdVPvXr4tEMOz/Lzy82KNFw3aNfLUUVWC5RvnAKm23EsJVS4sglQVz0IgwtnjbKCh3lc1+Vq9SR1RaWzDqQRgdbbWKYiMlOmrWK21TRqZqumYxDU7+l2qMs2QjPWvuBjY+BuscZiNGFRhsQUJQsDB8Fbl7Ppxr7zIMDguxqQFad7qFGVGfAs8kBRsMz/VCLjEvxBM3Oj9VLA128kFfY2SXIvGxcaIttE9xD/0cnqTKhph9d+g6JPUoSUj1hOg8Dm8r+cbbaSXZdqtpFv8PkTmaP9T3ZWpd25JJb/iy8y5WO5HmVTqUdL53N1tSaqTQHcc968XT+rH08wL71bZ0T1a13aXS9a0o30cY/o+CDBx20O+wHIdn0M5aZcH05aX4QVYq3tNTc2NlczaNwppQipTDasckcoARgUEpb/JzhoI6QTBP+lCrFWo3CvSJeCqXDPGN0OGuVOMBcq2zIMkyKSQgpYJZZ+lkpj7TEXUqLOqQXG/14qlZaur91csWibmDgGxxSYKXuEawe2V28DmBDrJLiWqLbUxiAWhCkvPpxIjKxpB48lOvlZqWAHlC2FslCpEppGK54RWeJpZd5cUIANiKg/hOpOKRBwKOonCiW8uqUkl36MRBcXskW+oEVeNWWR9qmLI9s9ZRtDy67xMsvXTwCyHkUpgFVrYvJzsrk9sJSZO6De/2Pc03qgm2342qR1rR6aMYgxYmyrNc0/aBFLZWiNMVs1MEL9QexDbEUolQEBQ6ygzyVgreFa87MXIRCfR6/GM+/hNqwQOkqpZUsUc6PmHSu37iq1roZ2abMqSMRFKpIwXm1KOhMo2zOeBC4E0gZhHfSeBoCE3zgnieFfn3l3ISb2tucarRHLnVMtZbChOvmDQ/e1MLxlnqlXYAni24T3Rzb0IzOaeKJw20LPzwWo3fCBDykIo3DnSw/VG7p2eW+ba52p91vE3dZt3jivHVmj8492UrorFvRO/6u6txB3Wb2+CALRNVo0k+MDm5AsOtOjz722FYub8/yTFGLe2DCRPun0prTRYLMlF8itW6xk254lGd8gXY/b1Bym7S7XLHOtPFLBJusboBCkFdUjfRm5pGtHldmpZOqIBFg6nJHYVTCMasjJVJS2DIkgEz8Gs9Q8eHGB2T1bIjQQCyl2PADgvDyM4CBIArt3WqTtebXajkxl39dSDKEUlX1pEZLRCk/0/TAC9XJO3T+VnwZEq8ypwJThUrxAT+LkF0RnhPZJRIvg+jPjuotKQpt2BCrn1pZodi4NRWBKy2D0oIeA6nO8svivHZzE7SqQiTICp5qaJyFtNkiqdV40rRqm1aPVBZwFQ+pZV0RVShlkDaLgtPvkJYGdFKFxr+UCpbuPgOC3iK40npVBtqAUkQpHEPTClaEn+6l+jY/DCbBdlnDT1YQiC/ZpbCGZCGAfsxSsGu9DCVKG0MMVYgCKZRYl9BbVIQUcHKPnPsq/OhjdGxvahcnS8/r1Wa/kMFSxFB1Ll58T0qV5cp9FYSIIghFhn3r23GBrucYLLLoOLGVuYak9wWO7lRNT+Wax2xdq4TUdfeXvvQlVwbeEqLKnNyo2hWMFNiq2o5HJAcBL9joHAZXGBCwnilasXzIPbvAMzrUbfT86SQMVRHkttt/Krq3milbVj5E8R7rS6SNtSu4KZoALvriL4W2pbBFTRdddjEIrM6Awpbucf3alYTalZSh/WCebBlBngdbsYI/kOeyOli6meuR7fsDRnN67G3N0w22VkKvfiMUon+yQrMJEyB2v4oDGQt5KhYPSVXQ+QRSgHWmW01ZpXUGBT+dNFBog7Pb3p7v6PY8x6zUA2ucebolU2F5ZYTprepm0TFQh3MWqc1Ki4i5wLYsToJwgkDWYEs1trM/hAcoFFUtqcIoNysZ25vwoeNZsGUMkZ6mXBiDVAbQhdLgWctD6ZjrUn8HVluYRYTD1YZzeQJpVa1kla1ynOXpcguntoJlkyJKEUuRXVIooKqa2gysQmlFpGA3Ww1tEapaEUhhZVuRirNCKo29nT1lW6Ot3OYvzkUhq/nj4pZIqvql1Y+5pU1z5HLqWyXYIjzRZg6C3upgg+tFiBUphQi6HSSd1SplMgUt4v9CqlMW0XSPmVpEVlplSsosC19WIC2CFJfiqasqBSdID8DZyzLjELG+VWF1lqcaUBS5xuVSVM9+2vKBZs9wjF79S2/RId1COXw/JyyCM4SBCWAYApWyOzeq4nNrKjJkpaXUaF2qrfL/kClxdkUYv7Psbq38jk2lDEraEDFEVYdeQxpqaEBEpOFBOZBf17kh5oxmNe44rNJmNDpVqqfblVbzKPhoycaZEdpVgn6Htx9Na7iJ7yqvgqV26ds1uoi7yFKykJYuwT2IsC0Kzl38foI0F5ZdWYJiXrpRA7lx/S2R9yRS43r1EWYt0k+aQfDj0VlXz0HJJgE+Sf/T/ukCSvIHaERWdEsapG3lUmRnrXIVlQVJVtO7HFKKDhBpcMa82lgflek8fMAAKeAsM7quiE1HMmQA2VGW82KjbZTi0/OSnzuPCpCkhQClEFnQQw0ocW4A7q8RpRQzulT1mjWzKK2GaqtnGCreUg6gA+LwFqlX4zcUlWRx002ZGHYWM6pJ1XOGMMsCDHygCgVAZBG3snmzg4goEhFFRMqGAdQxrwKP8OisLBMqi7KyBBuEiET9Rla2sJhlcYJonFmVRxHYipSIIRUYtS1Cb7ZVkKKXQidP4NG4nakRS9Ely4C/aqWlSOtJU1k8BZQFKPD2PxFjAicYa+8f9H7TrfOIxaUtreywxxakAa+fKDwH2NyFd0JX1OHRZpXtAMAM70rmckQpbRVvkVRYpABnnakDKJBSpMOSJLh6zNkHT55z3gSX6jpwmjeEpTpRNlt8stbmxlzHcXyvGlCMnS7ToUzV27VSBQfX5w7J8yFglVIF7jleYZZ5+OHHbcqrjupbrvhPFcAjUDSroOobLGRbJCUeQ/6fKnDsbjA87i4VA3X1rTqhZsLYhLgfEnAnx+5Wov6IE4RETbgbxmjOIaZ0DA/DrMAdfapw8dZFOKQ/QrCYMXS3J/ew9cORqqqmK7EoP5yC21y7SlCrZ4822Vb5ttiHYhW/m2XZ0ueWzkWEAFJNFyLbnrzrVdtcke6ha7mOcX+M4eyZdzocrmnVy1f0GXu8Xlgs/mtceJLhHw9wtvfPbVbQXH/4lweOaeWqmlMyfUVFXuxkEvD7Jr0F0GlA4IQAxyv8YopUcKerby9XmYEXVLNUJ+G/2i0NeFDcZaE0RCxiA60+hpzYVqzahoG8SrKkOJ7N/gONHSHVi0fpZsBMfFf1KshkjeHHpmIoi0in+rRIilMR65BW1VpOihWl1dwsnkq1iBKlQMh2KVzthEhhQ7asoOB06FNKvFGoXbWW7QTBHBE8EL6Nic3UDCeIiFMo6wMNKHUSQ8URASUr5Ri1AA8QFqmsM8T0TEmiUf5hSSIrxUDVynIVvyK2IrCd9Jcb1Vbr2hV/RXYZ6CRIw1JSE7UFB6M7tQCLkxu0SREx5GdtEy54kabaCwO8+ovjETduKC0Dc+NddhLgmgy9UiIGV2pKJZIhMhu2ONHb2bgBZBGZAPjh+OGKqGIURYoiW4psgdH6I8VGHIO0g5yGLeNmZdX0KJYrRiGYCZoE9BrZ2QCMe9i4rQpdriwMXatQ6OT8F7/4ReuEF0q5v6HEpONJkvmFXYKyXdioon0sZoRS63B61lQ1G9/IptofBVSZKlZ4YqXhAH+WAgo7IZbSOm5Kt02s4e3xCGsWrYNOG990I0WQZvtEcXJCrb3aoLKCu1HyH/en/iyvIPSt7EfVPZW9PU6XeNU2tsvEKt1FylNK3Sj/Ls8u3uG0OJcJXQ5b6RD0wurYq6hsUo2ot9jrM33pJ/qY1jz9wGl9QxO4DHImEAM2zVKpaNs2BP2I7NKsj7HNqi7hcQDQcHCzMeUup9y3STMAD2aNgJC1V0chPdUf2RnOzVKLp1akLW1WkWxlIWwRQUThS0sRWeFe+6pXeMTbaqwufqgkSZVcfY4BgKEgLvVGDdFprCXaqWIJoggbvEXYWCUuC+dTZRFlQd0irqh2ycouqNpdDTiZUBNC1CqiE8BdqNYHtTDmWZGVKqVHOlKbo3pMtF5Flt1yMvHgsewrMmee5ao+wYTY8XPxYCPeOvpVE2aArZRdnbs1glOIDUM1tIL1h9EyUMXncsJbazyUA0UoUkWrIhSCJc7VdtkVB/yYieBBlMKJcEYHVApHV2VsxAEHAItlrmxMbM8XkZUlG+IsEvWhmunBIK1peDVQCEcEpKRVAkHngFkeTz1RVG00qxF+WTohui4RWQ0kiwjw4wSlCFQromq6HGZKaO69moCDCirSeWqUuCxVmMdEhmitU6uUHa7qaT6orail3Og+hCGtCn1+QOHTz3zc+7OtUsY/TlYcTHAm0I9j/D5aijI6ktRzVgDn+ZZY+e3MjPbWThHkbiAe4p2l6RTz9hnLIVXq6+rb90HsQJ47917NoXuvkkDbA2KUfvSlpw0kviWzcOiBjGLmVDPZg/nun1rQo76qqVSRjh5/dqCeU7RDC4oO6j/knqWLDol7OxN98U3asjtVlLYEW1iFe9j3ZO9Us8nhqeyuBkTVX/y7DJqwahELu2yLggcIAigi3Uqkk2oa3VjqQuHXfu3XvvCFL3jue+3GdQPBlGVh8aOUL//Wb7uHwpZRtv+Adtmcf+lAm2bVoHPZsd8sSS1nDj/g24mZ61TBub8o3O4Exi6BvAU765k1K+5lwUozGVNeGgnnlQFiqauHeNoZpJGe6UV1ZDkmiwFCOVmCczcRrw13g2goRty81cL6VCYjmQycMGBeSil1zC8EQ/W2blJsGNArIksETgR0XqgqbEstdxBlCzhJtZKlVCE6tkJ9a4qiCOdKS2mWKmx1A89yFc5m/VfZKscJMcAqpVSWqxBOIgprNVRhHRCu6qGzDGqK//qVrG04KVEdzIVqaBGKLB5p3cBMD4C0InpMcWy1xQoTdNYNpfQgSinhP0rt0oOOKHVScRFx1gEUOMDDZ/zUqrJ0854rnm2vAKjFZmrDj7hSCEFv3pHeDS0lC1oKqed8KKBggxfBVqmFoHct4RgfDBsMcJXlbSuIB1CiiHurjy2XyiBbHmzcqFEpbSiQRqmqZIUFrtaCD5EF4nnhQq5XAEGg71ez0304UegBcN1ZkedD3l/++c9/3ilHqwKfldJvBueq1B1VFyoL3nK1znOsiJRFtVvZNgRVRBbxh0E0ICn1skZqfAcifOzKDRZZXul/k2bM0LzrwL2V5wRGRHJ7+X62/ffvy5VuW4c4EBMc2ubeGv70qPdweBz4SBZI3JP/o8Y5YZz+vLyq5t1GrKHSF/+yXotSRVJQhWXQkQgiWoTcl3s3ik8g+mku/S4RrDy+n2mYAP1Wcx87cjQapnbaYmkzKCkkNZ06fexwruWyAcjE+hUwitFHlcUtI2F7P8QHc5Ck/tQ3SlCkdTg846qKAN2vFhEXJxyRV9yAd4xUHIU5vUg2O2D+yKzBRiOcu9VLRXVhQzEn149WGHORxVwKEXTQaY4sXIpeF6fw9kgjXh+Il4GXldJe889kB0kI8IAqVJO6R2qZgCye4uWRClfdaFo6nsXQCaWqNNL1WzlgUyVElHKMckUVgXMegyFaKWwADvC0AUpsik6quGc/i3kkNglDK1CYqwrF9C1QtYUOmFC64i8LluzubFYl1UxQdhRsLk3gPHGxVAalDCHyH0UQ6jDldbi1brZF0maltVWdnAFKiUAUgSVSBOcqUlpBRW1fClH4Uz38EQHEdhJq4YiACBxU7TKEfxUprQk6PcGhE6IZEQfSwSBSzagdBr/pqVMDYl0RJx3AmEXUw70rFFGpldT/bQu+0fw3/sbfUAWtJoAUYrbCUajUvZRDxpYrF8jWLduGrU49r5OrFuPYJkFc2VXTcq70PnSVyj/RMM1J/RbbQyaDnZNkXZna3rXB17ZY2u5GOJA9yYEEyuO+Gx9c2XdlVsMMMdB2wak7lvLDp/fxfzONfLie3dB9OOd/ulJR1hETpZ1BEXMzyvxtT16uimHxVryCUnSw9EB07UkTfcyC7AbdXfJTTz5toTp56pTv454587aPbbqHVup7uWzNPdQMOjIzOuavJ7aZowyc6c96wQM+Ua+vAriNQEXE4RiA16PU83jVS8R5WZFLH0teKphLq/SlWsEMqQi3gaKmEHRQho4aOFBEfxlkyyO7GfbkkYwWZVIuQpZAszGVu6U8GzAm9XVZUkrXMENZ2mujXRYxwlOKH1j2EUvHQ2c5i5dOAxfhELKrDpibVYQIIMWLSOu8IpxcbZaejKyBckLrZx/149cwqkOkCuGAuCUcohSP6mvO6qkz3AZUHT44BzqnpiOakJaTIAYAqXUpqRoqUYpZygR6+fWSEjEzF0tzhVIcm1IUzAJbo6UgPnBgc3COzmqrWqnSNqIiOJGpqb+bQbJKqzkF41udZB3svpKVzgWKyoZSu80uDWwpkiqFtL4QgmMnPhBBUX31wqDPKDK3GpYcJosBVMNuSgq9tnbTMtOGSKFURKkq85JqabNNOcO6PkAWM2IfL1mlfJqBhx4bjFS28uxYeEeRL/Y+9VReHq+r2HEkYk0CWd68yeLi5e9973sugS0bDYWGFXBPmqsfT03LAtldaBHKLk85PzwVmLZhe7hdSpXyFM2CSpU+Jxr5psNs6VC1HWH31Vof8qaE7eGLVkdztCnbjveVv0/B/erF3H0kQt4NCA272Q+R2i36k9nd1XA3vgnReN7OhgdRiBaz7MJb2nRP5yyRk43tbOjkddsufZxLc+XhwZVLkO9+9zs6HlmDhYjGxbPHBHqGTF4Enm0bvbSpT7pDSnR3BcEpBbsOVxwF88yKm0UlOue9mY0kmcWJMqVpl2pbFDwLryBOgFNNOzngySzQYukCYxKr4nDMflEl1Z+8dE8XXMzoS0nFRQGlVjv9EVeEKCVYbR08ao6IuUAbhirEtjSX2Aq3GfBXZxHpYkanZ4mzXBOLooKANnRuUA5QMAjOgQey/KDLlq10zGDZXdHHXKgSKVlAlkuy0gqWzVsz6mqLimOAYCBYPfTXrkkTXWmXVaVAaduxWaUcAzi9PUyWQjgNlUWpwzW0Ugg/yxzZbVeDd5JVVHNFyimtOQjNGJounnqFBx3QVk44ZOFlk+LkXvmLN6uOshhk+UmP+FCCQsluu1QzZoBNWoYixDEARdtQba5aFr8iXWtZrAn98913z+KRtVBhqBLBAfb6Hnvsk6579Hnba4Y/ol2aK1d9Iu8cVdrOqnbhwkWI+yr7hFYv2ijhPA9Vyo2aLJyJegsHfF4UdFnEck753uQ+RbmOnqsUN4tzkv7WLTOdcx+/8Ru/ZeFkgqAuGXWaaTsJ7NU+pvmt4fWplOZ1uvHHV6/SHlvnhVG3o4rhu5X8KVIaxj0K7xOEPVz/SbKCsKt3eZJoAC3O4+mcLRKnIW9GBFzpSiG7MKKbBN0ocALzyuWrfnV+9OWj83u+uUiaO6Sb72eG18GicGbT6TdoZgHT2+bOqcuVhe2I27F53CV1T7V6WvuGE4C6kAJZppXKoXSmMnGWn60xIAzpDsVjcHuJCSeOTcWLV9uDR3I0D50UtUoNlhs3c0WFIXnc1SjfqNVYHSqxeinqVgZilxb8LcIMAYoKzTK2PEOBE6GH37I1V/76VwaVV1SoOH5I/ZTKqkwrX7aark5pi3AC5orwmWKloEaxFYxVrtauxmALmGLm7WkJS6WowlP/aUDEBmkK4RVb+KU010rZFk8dpiRqt02JkwieAh/qj2zVUoK/bDiXoTqmtymtzuopJwqkuJQ2XmFQCzglDNFg5m3d3QkQAXgAnurHWf3NtqhqS4mNAVl/MZdHuimYChZXWs0MoXCJdZwAHUXaLDrfzPuILUUx5XMbQhxn4ywLVszRQVVBFDEKUABVZLnKIKR0WUjp+Isg6gbNEqTf8576pohvPgXgo1w/8zM/46bK/FBvR4+fyV67fOUi/nPvvoeZQo8ZvvnNP7J14+0PSnVIqjDTDxHkmzP7cxg/c9KFxO8fGlr3u9gTEAptWGaBvZq37volqRtB85TrcUXckK7Q3aVhQ8AA4lsjauXKvGllin6RVVpbERh8I/nD/YnmjwjLbXIb36ay91RT9+4u+hPYvVtJHbgnfUPcjhFuAEZ1jhTtBqoRmOFTtqbVEJHpmfobOuKNOdTjdsrSNT+F2lxzw3Xg0b0Z+5qYSPub5UonBPpeESNLJwdVSxAnkNVR3YbhBC2tHimpstU9lELtKuJtVdEMkV3jDmfZFJnH1IjDHYkVodz8DM8rbnHXEiQxGL36cVXIFq/2buXjrFU8GGovkg3xEOFMGpN4ABOrkuiyDSJXQOtT8aWkajETF8rlXucRxOpZ/DEzUNP4wZa2mQFVscQlBaGQn+gqBZfWJR5mPpr5XYpTlkKlzS4liJyRtZnTmlatlFptIIUDbOUkgmi5gKA3VUQc3gaD4wFlkDWt4EGpA22UlVVEFqDAlfILvvRAtALZtmmZd1NStbVLhKtC22tPqVAQKShiqyBQDDFHFp0n9QGOeSnhBn48kDLULiInpeiYgVIUslQBOM7y1ASeIlUIL4JHEZw/9bNtTSHKuJZLEEU1R3nsjUh7Hf5O7ojqhMEvXdxJOQqoyHWf9/A6/Udh9bg2xXn+/GVXu3/4td/3OSk3UjgfzEcPch/so2tWXObUsjWCcV+RxY/ngIamkMKQk2C+J76IECK72YUzl5Kx1Us0H5hwrsw+EicbWDFOeKfv3UfNVj9HclPV/jZLF0EyY56fYrLH+eXJhyP38//DpZQS3I3PH8v/n5vBiGhMeTnt+OHxKc/wbpLVIo1tO7NpwS+m2v1yuvtQZtet1OYhYtp03+1dGSMXODJoaENok1rtnAwkaGiIDKR0qazOg06PrCIUiCyi51xKgepsSueDGShjN7Llb8tKS6keWQiK6pQuLUIbD+vPZjOwVqkjBiA6LiJYxAqrUoOCgQHMeErEuQuKiHjMIIVjXn6TRen0h8gbSmRp5hbOOkdbTWBGkRLEI1UU52bGkQU1UX4WIcVxymIoAicFx1/KUsgNlDZz3ZC9eiPLlSwpnACiUU3H1cAKirQ6TQHmOAFhsTyyNC8HSIGKkDp4+I7pvi5JNVvtUoWZSCs10klqEWcd4DYGWcwEW2VEH8OJFV8gU+N8bUmJ74jgOgBPfWz9vP/BjfdvZb48mGPrS3N9brZX4thl6xIr8AK7gFdSFCnZ2Lhrtor56Q9t4opjozMqprEwQGhDZ1H0umzwjVoiKPyJ9m1ToldtGdB3HaOtvPqRWzUPoSinhFE/jnJvQ0p0cM0Pqlh3FekuU4fUdXU5ddlnFnANanHaf+CDFz7zuU88+zF9W3PbTGtD1weGhMEd1e8POICnFXyI5+iRbC9zW6k9Ft5OH9tcNySY02prMzCx2IHVDVrlppSIzw7XH4vqG5tHSi5h9TEuSX2f5dFHHxFkCkdzglwT99OIc3j0JLE1Zo3NXNdaudIN2oKoOoOsf/+JNwNjYjrecph7PIkz9wKl9yKn192T/lGJ99OjAXaL6iTlWj/pjle7bLtFFdE6+s90APci+cpzPvi378DFSxfcgehyRHQq9MtXL2Fuy0J0JKBIcwMfiOpAoBbxwcObQzcOWXAAccO2sbU5jpvuOsAH+zEUku7wrNsKdYQM9BnsrANFHEjRzkIDb7ap7XHaqMWpO9UH2ap1d3UElSVb6xQpAFj5o86KZLHq1qnMTOj1FaW6pNgaICZHNtMhOsBW/1AAvMB7ekpZ9VREjxR9lU5NN7WlUFGiPOGbaUKUVW/zrvQJSoYcHVrfpgfOuXT2iP7BCxcuERXZ8aErqCXdM6psIl25cok4fpW4fPmiuoTNMSqz281Un3XrHjFfjqZiDY95zJyK57nYHIswBcg2XCLgOQGfx7cajQOy4uYxAYuyKk4/keL6mSyjQFFnZ6WkpnNEHJBtf3K1jnMqnt5AiayUiUxRTMwXA/Qe65OfmvqMhLM+1gGsLoPydD2XxQZrzvTXkzrDf4acLGCrHZ3acM7AhitlpQ3dLAd6maIIGyJKcXWpEvzTfDkjTjO8znO7nJtaHDjkk1RXbubnAVWFWX00AKO6XWvtfJN/2Sc/tH+WhJyAtx6xojoYR1btnbzPSQdKVELHv3blkjgdyCfTDI/3/fxX3dD1oQvved/uVUPbSfSnn7Th93HfyHDsex5Cqce1i+fP0X/i6BG+nb902SaM2yng4QFcxBxuF6RxPN2DKt1yxk0PenA3A0RNp+7uefy3iS0ngaLCijnOFrW0WTyLudFuFt5slSDOAuxXM1km1QHo/1/72h9+/OMfszy7ttZXvfjdjeO0G2fiXmAasSg9Ah08taPAKTibgEZd5orUaphrup4k9PeCW/Mz/3uV5JryHsAj1J3IhIe5me4br92o5RPv94KGOSVVtXV489zuXiL3prV1d+Jftt2w7xHcUyQLRBFbY7XLPx1jM79vGcJJRHtJ9Vtjy3sszAOGku9Tp0Hy2D1H+DSfi1TXY9PhMyfkV8D52J+J0FqVXzQa5gSr01zqH8Fjx07UNBMQcn7MRcnVa7l812m5SpSUKzaD1xrJTOaSnN0gEp9xuqoxNI1MX3iXpQ2Q1WccMkRxN0etQcptnL6JlTGbgROgxPTse4J44IcYIy8DRvvmCbPKK65qKY9lhYAKWTgovbKtwJjI6qUUzgOBKP9u5VHKWQ1lRsHDhGxc73DZ2qr+0jGUB9FIbxRqDoMiakuEcw8RQMaZWVdm94k4iwBb/dmT0qM5qxCOH+CRamP0BfR3TiylausDXGnrpVSWhsVGqg5wD1HK0HJDKVlZqpTKYpAtsUoQ+bP0EweKqsShNPpRIrINSymI/MgC3FD3tuyWz7jl9CoN+hBgWmV1x5prBODo9LC7nKzdstWf+oCiaJXyBKUpBE91opQnrk6IZJkAEJSKFDdC2K2tFlWcgJWS/3RawrvM68waWZwq4vbITcyNm9duzb0EQaNASgRDe2zHvydMzz33nI0yCp2esAHomqZV5pW+QaTD9Q/+4A++9rWveVcsx8Zc9gxxHsoPJ29fRmh/d55kG0+et46CWetqpHTVCwNoVtq6r2wpu9lFaYdZWchGj2uT0ceyxu1JRaZ/8id/8p/8k3/ivlNbd0G9fVB9j4E92fltibWq5HooXVwbn3coq2iQ25y79F0Nu/Tby+cu9b8cfn8/b7faHd7ttOYd9PtnNNzdVjY9WcHmGIVRnB5nsWlP1ot0Wt1Alm78soi6N9BFNbTU5a5/9o/owYDYnlP9BGtdEe3Sa9fzNnBEbGM8zUehgYahdClgCwM2qgwKODYaKgWpn4YPHvz00HDV488BRBqqhEiJefG7QQVoXBx1rqwU1VKCMW8uL12WH40FXYjSWi1SQWzElZazxLsp1VDNSgHOJcuxZQgdyAJImWu9uLR66Kxa4kXUi9aWcn40JawriHS2tMpJFUFscMoJbwOgg5jZgeUG/SWjQOoYZNkd2c1cvFsXVtjFL+UntnaR+owI6Gm9RIksnpqr9eUVVbUoBUpbkTLXqLQKMVir2hlowMkuo7qUfoIN4GlakXg/OqsWXgZKaACYcUoVyUrh9Xkxy+Ipw+Kvt1og76qrnjb7jdRdvTxdgdgDAfNqIgpSOxpMxNyWtSpUoW88MaHUgLRWYWBUNp+jn3f6UUJnhvJs5GLwROpnf/Zn7QGqvuoAiGXMRp8NQEfA8aC4kYL3Y5IUehMgr3QPCq1n/YALnDOr1thk+VNKPeQGE22v8tOzooG/2V1EqSyAKAXwIqnaDiyGaQrMaQvm3EsBN4K+JIIHMpwKxTYO7+i4A12G7qCOw8uf8lB4P+aR3Xi+Rw9Nd1H+90z40BBtKr4byT3t275EickAN0QLlocUMJCB0S3Vx0CXK5t+aewBPV8pWdooqeBSBUEkiE1RGXAChhDpQJfKYi6wYpjUJXSAR7/CVltlQ6RHukts0VjYXL9mudJfjS59l4papZRGKTNGkVSRFBthWuBUF2obHbEGdhHiZCtSh1pKdomgVG3F2SXS0l2eKqm2pZPDxGmTCmVdqkI430pZ9ApShVnKFg3SmsaGgTjBmp7Nk0iXYaVlW6YhLaoU/qUhwvd6gSEiJdmOm8DWVUpWjXiIQbtgaFsQKVutoGNYIFuoJ7G65S/eUjjx1azE8cuW4uufLAJu6Gotle1tSpsGfxVyxoew8dBcoxBFcAi6KkAaCrKg/uOpCB44oiwofRfn6iKWk5/4uyowRGf5m049cjWH2SaH3o9S59WCYHzbnye66NzzOzk+0Pb44497LmVxAuPpQbdWRof7D5yUO9T3rW9966233nCCzm97ewadrFKapWxZHekvjsiKLB5Ag7rIAvrbgnB0rhLZDUI5WyPpCIUTNBookIqXfjdezpYuQcRxpyozMXHS7qWF2QfGvPl2YiUwPkiRkbtHydJ2P+Seju1xdVf2fvr10V222/hE7Hb2v1bsfvUy2P4ELq8ALmR1+2VIU+pCBrei9rd2eF2LRQuViyepcT0dYHPvZYxvusJ0VNoAPZSUDtEZ1iRpvOjnGEArQhueDrRaVFQiQTiFVKVLzZTLT0CkiBQDfhp46FkaZprjx3bWhWBDPGTDhEaKcJNB7dJFUhZfJaW0sOFyEjMEBUDixaFsKuIH2CoFAYrwoGOu68OVrZJRELewFZdyoEpIKSogLjqeUZxEKeY11GuIZqC0UniqdiEtlcUPIKqMH73K62Fxyxk6HkQpnVIUk1qRUjCXv4hUKVhZ+ouXLi2lyilpabPF27HUTqm03mIgCEeESzWcIHdmJKiUckUt1TwlNqpKidQxDHUDcYHra5y6Ncqa0ztrVwqdwoVTUlmUBfUfnapd5nqCrUhVlWExL56WCsIq2tXGN3oUtQVbF7hHU1ILvZGjLiD3ZwNRuP99v4JyfSYItvh858LPqL0B0oOpT3/60xBqdQYAN3HL4rQ4feMb3/DCUNrOn3+3tyN4lmNFWAT4AYqUV373tirLC0RZHjbVdogaF3S4QspWJfRgrjYiC/AUV7RLXNnFUGSlYgjcX9JfE3xWQVXzmqhXXnn10sXL+x9Ib6eW6aVnWflhkEotZ4js4rsa+LCb/d89fr943i8+KyAEd3l0rRahtyidLZDx7o/AalndyeRgMteUKZt5Hl6QxUytoipvl4AroraqMMt2xbJFXOXlgQP6jRQ8Fj8WrQVMk+0MhoFa2jpId9VWJxGjibghcOnCRfyUE2EXZfFQeMjY2zVMF258iHApHEJjcUpb1HpK6wQ2ggBzQRHAzHDxpnW9RVIieBaD0sKi0MYEqEiVEwGrqKWyiBXvRCA7Tm1MwMWlbCslK7iKCOInqI5Ky4Beu1UFL91cBiHSrOo3ArThrIgU1IEGUBZ/RYpfuXwZA1l2S6GBWkQi6LTJtjmrSooT4KRKiq0UzGAsxLc4M+EtvYLVzxwGJjCjwynRMz44tIn2ElGkL+qItSVbEVEi4ll7OZfReiKLMw5s/YEDpSgrhVSwNa1sS9GX5gq2lLeKNFkZWEGXVq16KcIvqzqQFhWxXGFwL9UHUU7EfeHzP65qOBezLDDkLNJs+Y3UV7/61d/5nd9xYWcNO3fu3drFD6mrbNGPmSC6vmFYKXKzZTOwDuARMURSKouIH1JKs2SxwWnDBsoj7fjHXH4pTgxD2CQoxapkt2jRTTtweylM0KkD6FreAuWW0XL1G7/xmz090Y5B/9K5R9se07ulS6TIhyjZldqD67d7KM1+iN178v+XIq4g7HHgT+b/rrZqaFSlhRVqHUf/AexqRKlO5dq6FMy6nF5qFOiN+pWLYTztZpRACqTQy4/Z9avOjHj5St4d2t5OFeaySY0OD3dRdCcb5noXho997GPGgq0I3awDEyc6DdwwxDJGbtxwgNYk03WOBnY5jw0zPznfCm7uk+RxY0LFZLWkor7ibp9WxF3aMXc41VE4CtutJyl0FCkRSClw0CIUaqWlVE/Z6iKdLWq2nJHfKuRScaGGtEq1xQ3ZsG5hVz/N9bPM9XN5TqLM6NuirAqIiqjlT50R2XKiE4cX4DWLDSCikKrC6iyDItmySSsIUVrBBhadXfUFtU6wGpqlRFeAl960WbJYUSisaQi1YOlcpYrwHzl8xIW2CReDbiDVEwAcJ4b6xgE40PZ1Br1FZWvVpEpRAKPNlh+lInVsWJJUyUrtqGEAKMwtKTu4iCM1xbPcipwLQcOjDs9JPHK67QMXLp73ZbETJx7yeqQf+7Ef8Vp0g1A1HziQPU+dX60pUl8pnJgZ3NEJa9WLL74owiKg0bWPVp00yNQlcXA8RSmPzBfW/SNHMkzi7bxaqc7Xf/TGBLIq1Xq1aLEpLWcR6dQ3Rsvf7Ep3+ctQzqZlg9cuBL9OpV3UDtgMFKhTD59UBUS4MLYjLRML2dW5iEWWG83W0B6elf0QPYvnv2bkfv6vxvohnb8f/67+3cCiFxBLl9VztN1cimRZmpnDHY/nVfnyp76tq+uZ1h5ZzARJ4FQEUHahnpdCueyuRVlFKLqQ4UPJOBK7RoplojNkeaqkPNWjgxl9OphHv3BKqhAPQKFNikF3Vco9DJs7R1ZRFYOpQy7ta2P5hKcydYswvYhEMAPZFqF04JWnwwOOGSweSClSPGUzSNDh+Gu6IlUlbam0OIRXraGoEZTVKvyJxu16UFxKG2ZW4HiYaC3QW3fZZQWilZkoYCiguzxHrNoitEFopmr5hgIH6GTLgwEoApqWEohSxPrDB5ReQLR2ZZDSMNKpRZujagmyQpUUgxQFaAlSNQEBNBDRV+jHtpToGYp8chqxFx/tKxhUVudb7lFSH6qNFSIrFIrQSyxSSnmqRLpEIACntPQhhCJL7YqbKlNbnsazRssAJ6heBEWV2xcuvOeWyKWlu6gf/bHPudADxipnldJmUF2/ksexaue4BOXoLgytUp5UAbtkFGIQClbKUEP1ZDlMA4WgztCDQTy9URk/wAkwtHYYUjp0nOjDlYT/YAWBQnjFF7HIbrYiUpylF6mgNEqnm0kxCCC7lLMo+93vflfqwIj40IF++IHNNWu17UmXlT30+2Vr9O7S++nR6nczh7Kt3b1L/6uh3q9e9/P/fvFphZTuUbiHfzHYUXPl4Wkqfk0MdC24rW+pXmdodHQQ0QM1d+nYyikLFBXRPUwU+r8eTtxeIJdkyy+th4waLB6CtueTRZfaQlekO/FEEUDX8drr0KnCtpZPZ+5lMQDIcqn88Z7f7bVchAMapQQgjEkNbBQybuuqheEyKAX1o0rZYB4buvlCtqWsVAk6uzVHiVI8ssRlVQCCgpkIZqpA6RBF2JTCSQE8slJF0kXE1t1OwWpFsNFTWeKQ4lVCtlVQRBaulKwUQ71iAsJnFPTyV3AxMKF1lc7kmBiqCIXYECmHgGiYz5/XXFsU0RSJGXGuhnJamkL8KKw3jJiLy4Iy4KG/JqrzxMmTiuoAthKltcVheLPYwANHNpsAcNpUs5z8aZdCbzUVEfRkVG8GmHEC1jGUGbG+oaA3AlKArqZSeoBAaSlBo1OtUfgsDmoJJ4uZNoBBxXFSwi6cUXQm8M+vgK/ZZLAYWWX9TOpHfvSzxqpzE2N2HytUUcI0ilWKVI+qe0Bl1nYvhcfY00UbYcp7AcToiROniOtEUyPOpE2Bgyl0IuqbcJoFSv/twj8iufCsQlk+cwCoGvHyy4qbUkCbiksxoEPIwnmrVFFLWzQubNbFhhp/gQgoQyJwMMF0bnnIeWrrh2IGtYP4aqcVKDciuUcPEyRwwmmgsHoWpdndFD+2Kq9I3dijQREeqZ81VqSUEmNoSmuxxC0l8939YMO5dZW4qx1+l7+eLFnZ8O+UKtpEajH9cYhufV+W0by3CvfjX3RS42icASK/zZZSc+I5vWnz1Cac00xulcXTlCrBqR1N3TMcNmfKsGl0acXLptHbkZhCkRpZZPVeHRInc/oz4tFjxzFXHKX802cyGLGRAuh4OIAClueUo+PHQ207GGYMUuD3JXo4HoMX8wyi/BoVA0jf5Sh5SusoanGsEO6iwKW1ASkQrE8MY8NcUAphW2nnaxQ4ChNVIlvXIZRXFcpWR8TLUFvYilSVVCnYtuYmOtiqQRGvKCmgbyGtiKfaEFc7lbL4m1VKVWWXIPG2jVJEIniaNlCIta49ihDBUJ0onWtIeZ0EIj0AT1NKzJWyOpZSiAaC4Ael4ORV1dImqsvJstUcP6uTFAqoBtnFXyINioZl03vqDx9YR68bsqVTi+jHx1KyiKVTwp+KVAp92arzwxtis5jtd+tC+qgiJnRZRdRaiCsuaPW8UkLESvW0/+jTguarp94q65ODbqQweEx1+tGHuWFGZsW9MjbZJYL+67/+69YnN1Vf//rXOeBWzDVWByofSLHCeuNTi/WBngK2Ph7ATLPSsknzC+xtzOs/Iop2wQbIFtpPNhp3BrBSRClmspA2E6TZUpRWtlaqbUnVqGyhqkw74qzTCYJFSxFPJ85zyby9LtyVZYIsKEIEPoKbPrMokAX1v+kusbKLUqRsrcWihHNM71GyK7u8usPcdtDhxFCeJSW78CIisIfS7N2cG7Yd/XcIbjXvFbwf/x3CezN7lWyvy41BrDsx4fxmTtCOFir9Wc/s0DAccFIltqA9xKBrD1ekh5eIh0g1t/XxG1woflZCDwbQ68v6Sg9nqgqCH71WpOWvt8XZqtv0o5QZpSI12qxSxMpuliuZuqsAa7MdUSpAl0GoiMccIo8HUjYMKgCvYG03rT1pq1QivBqaJVWonsWDjWBlpegcAEsKXt+UwstDVT0pRQp2LVYhNkgBXuV1Ej968QpKy1kiZiKdleCrqIh0bG78JFKXVl+p6VqJeL7mFeC2tCakwgtQumIpbWXbV5ZaDI3bzOy4NpfhiIrYqjiviFR/PSy+iPU5UtvfbSjCSQM34NU/TsWrFqF7lc+qL+sYcLY/VENNw5uV0gnqQOMwheljxKuto0i2fQ0DnVqSCVyjRLjozv+KnCH3Hj8Pe53xs974XG82+m5kzTMXG1ekOcauCdrdJoozFK+++tofff2PvBxdET3GtlGKX9BQ6pWUqwQ5BudAq49fuKQApZeEHG4TtF6NQytbtqpC6YAqGyLNmPE0AjSjtLSmq2SxlQGRq9hANVdKtgyyeACkVuZvzNUf4l6IiIFdIi6u0EciXzOqFMHKrirIYh7ypscqKpCFVL90IXuI6MQjMo5sBxxt+TfMTUdFmMtvcKYV7oZ6uOg1JysUi7jLg6EOlFj+IS72HwrZ1b8r0JjvUoJv67mXrqQzm3oOz8ZVIdrxf1cKQ2G0cjzV0U/zepe8o8YvZI6c8PWrkw8ZCLrWA/kt8KZxMec9NwfT5xFXl4a323OeSLuBlF5FQGWNcEUQxBme82R47sMwtDdC4tDOTBjXppPTRrbiJVa/WpTutycVlLa+2PAU37z9oloWtay1qjLVJUuyWZwQgBOdpblMS0+tFIb6h0dph2INqySFOGuu4hWEI+LHsOhlJltKTVc/IoMYBsk6UVxaPegcKBEOwaMIsjzfVYhYVRjKg1KQbZG0GhZDWLeAGVpbW1psiYBsnalFWUg567lUHSuFvoC4IoBSvIJlwN9SypdjimSFmgE6l1e7DNWmqEAPpB7WB9kaxYlCFtRWuyzijcu5y8FZK3UJG0S66FUuC+EVKxioapsiQnShdndXRdthY7lKMJXisVuFgZKuLr3BtTK98MILvvztV1MEcSrlEikvj2bCysTi0aPH7fvB3ca9/PIPbPr1ja6X3ruMHwNtBjbHWLHyOaq0/G8FG8/Osa0+5bxqXUhxjFHQSrXUBzXwKCrAm8VTkG248APapMMVKYgsRFrlTNe6LOYue+WsCZw0KyUrrfLKRpv34WisA2XYLGNKXYMPT6ozjeNTAYmeTPXUBPzuLEpNN6Vnl3mVFpHehv3eVxneu/nLs4euQrdl78RwAqYXWRYuFtLSd9Nh3zvYw7nk/+OQPc5U2T2Jd9jhsyqM50F2qrNbNSLafsM2tdu6rXm9ki6//+vxV4grMP2BSKusTQuyiG3fFjEBlOIvfXFi0JGGGN4qlGKg38VlU+OoqloppbJ0VlZa2aVftpwrpQFeuwThUnZrMXdFSMTowqoMjtsYNvhlTRCKZKUuPA0VDOVZ6hQVp5TfxKmiGZsiRFDD1S/FgwGSwTdTOZwUOs4FiMXpUUThrioanMvHA8GgFLMsgEvrGAQdZZRvRr7sAvSKLLstQixU7W4p+hIvXRbSiiutwyiUAx7CESEVlI3bedtZWh3sGoULi7SlBPGD1ggdvywNtWXaKiiqbzg5s+mkd3ULpbVYPUQKjEJqhX7aQAxPYDEXFlHHKIUgBBuXFox0XK1CdalXOhVOIq0d3FoF1+XIWjmAUsqNBD916mJAFlEHk6J4LPVjP/ZjPn7hpopCbtPjAz3WpLfP5IgEu3rsd7/7bTdSssC7LSxXvLKbzxBOuChRCOiPhgcecDJQKgsogSvFz0NPnYkgwiEY2uI2vRGJo/BEEf4iOIu0VBaiXkuD0ljaCtJTVdgUFTBTiKKoGmRZ4diwJ8J1TApaih+U0jRvVtTTDsRzmhFpAxAUoxbRXRaK5QpRp0JZSiCyOKN3AM8WvU3ZJS5xxbv0cMsH9tKRSKXghwaVWoYWQpqz1VFi013KsqCodhdlF7mfM7sK9/BXWxngAMP9+BVhSOmdtRbrqEWfVkhpGcpZHI92zGjypOrEsaPHjQhbBUaTbrZ6Wrr4jG6GhKvOsKit0WNlem+z6GVLbxkrGh149tnsKkU0+qoZM6gedEq6iLTrVoR+oF+VGRFbmaVTv02vrh6lIFOZGcPgr4A8FQRQgUqyJIs+WhJrDOj44cYwe+XHU6IUUYq+hhPmKikzBu6u6UApfg7hx0C/FMgiQqQYaMAJqp+GFslWUBE2aYS37ypclApiVrSHs1YohICK4IFLKS9/BbHJAtl60iypgnoRXM6LGFxRxWNg1DatHjor2yx+la1ydMQyo1OCUuUYQNRNWy56+VuqD9lfqxQGRKVSOBMES4HgiSJ71g9sXh4Px4wTVGdxIorqA0rbURbe4FDVjtQmK73mWoRZttpKL0/Mj0uykGHLT4L0QV2xl0HORDiDLv3c5z6X2s3DXqsLPbIccJzPFzE8hfIsigiGN9983a0Sty2BvTCwbjk+y4QrUP1MmxME6FI9sP5zoPpVH736tWfDQhyy4hb5aUdSioBaMPq+H14NJ+byE4HQ1mrChz1NACcCIKD0ilNeilIUVvDT0OmgPHQWwVn+jQ/TuIiyHpVGvzc3B9A2o6Zutwp2Q3B6szBYClEAfhRI6cvV0lu0dEL2QDXcJkYl2Piw6KWm4C7AexcthDJLl8Nla1The+iLUvqytZCKr3S3pou4lOxSireZFp3awqJ8CIKzpXzTLbief4UGSzWnd9WK2d0wdMHkzPrp048dP3ZCx9YxalGRjkFax9a+0TktmNade4l2bAylyxJpdnjT3ABdajktkSqUOkCPwaULYWjHMx4RZdd8XuX4Eatc2mw7c0WOHz2GCMoDKb+xiRLP6CWQzACkHJ1Kah4RKK+vzY7azYUzEUoKSsnCyULKvGrerJQIQDcpSFHq5eLnGFCETRGcthpdFKXDldCAXVnWS5FiK1RD6QTLT+eu7FIyImlFgHlpo7lZFFAHSMEbz1akdKmi5Uxt4azaprXYNBqnvooaz4rQI9tU0eIppfqldQMDKdBuLosuSwrSrLS+LURpLUoRR8Fm6sSJgkGqtGFUWe8wcm0lW0EMcA26ukqVE68G3RpPNUsXXbeuBvxqASC8tjL5+aDh51dBdvz8asqjqeonwoEqYZQbNNsb7Dc7nB0gz5zf7Fq3IOWsA5T4aD3rjNonlBoPsvSQ4jwH8EMQmZMyJxslt/bdnPcWtr283dUdslcWmRM4QBUN2EKfaEtRFpSIoRRK2KogK5BCm6kUnBAURRWEcAaQbcBRGjScfMBWPdXQNBpm2Jn/hqHXXnFEdiAUNaBh5skU1a50AWKVN0UP3xYWcQ+/rKJFDLLRspeOzJnFTzGKNCJ5NnMPqObyFN8wdQDcObcoWgp3RUL80OXwHobvQ+I/zW21mkOhH9xHYjMq9/BQUlhKppmiXHPr//qA1Fo1+35HfT3gyIN531I7A6npJpm3IfUKESLlnvGLs76hgPLgVypFqa16DocQqdpawQlW1RYPVej6EqCHCP6lJ8ZWF9iGpaX4CdJDAxEAx5zjwkajHp/MVp0ymy31oN5joAgD5kpWC2J9akpJbcgqEkd6cFZzSyveeQFetyB48FchThqksuVXtOrQUnQwPyOduO8EHb01qhuUsAIIWvVNK1xClAWtCOWjb9M8xaWappxS2gB+dPxwRDjNFZcVTzoVtTpjIVk8ZcZZYpXMx1c3Q6om6g9xnGpRfhoKZGuRQhQipUAWjgGgY+gy0Loj4oE3iwGC2Gy1mc1LrLkS8bS9ZGtOyk/KdXc4JwGe6pcqrT/SQTZdn3JFagcIehkQQcTL164bct5qdvnaVQfzGHLA7/Tpx70nyWc7vCHJAqMv1S6FpHRFQE9tlfK//ca/8eMPpyds9OkzqmMxNZJVvaaZ4Kdej99ZQp8OobO1o4ddbLJrLYRbycow8dPit7ccGh+ClMAbTJrhKI4r+CRjb2WoVYpY4Hmz6kIcToSgUpSqKq4IZdfD0lXcrEStrBQPwIxTtg4sDZDq2dJjYvr2pvcSUXuezCq1OQbc8VvNw5/OhhNwFR0RLCIT8IJSyN2URYeotASX4FE8KbWCyX/doxRVE5/QpwoxdzdUbem7Rts06GVYbGM93pZ5ld7ycaA/DVgKq0y20HDdbYEby5NdnvaNpUQRilR8tI7rsC5UerjOgJIfUXg0OV2R1HBmltBeWg3CSoRn1IzJ9B+gCF3axi2zbM1hwIx48FAuEBEB4nZcZPYuT1VVG2KlIjhuS2tCUQGlCNfjT0ZJZtcOEMR6Zfc+mv+f/8P/K5POXKMZ20jqD4x2ArzBDdABxNip4WjeWQBxUo3Sq1casLFq+hbTmqijKMa/FLHjjSx+WbKu15USJKUUYpJCYY5+PFUiO37lQTpZFW71IDykvDVSQVLweivmc9wpbQMo5zARajFwo0rwI2JQZWltVQmRZhVBEpQBOFWAOW5j5gNi+xA6P6XEq4Ha+i9FUVSjlNUNLiHKlq1ulI1aPEARc4po4LC05thiXSnxIq0OHDAktqB2saHgVHfQvi7LFkMtZQKglAjHIMXATzrRKbF00UmJ1NuIom2GDUQjXLlyLQ06oyMrjJnxhl8lp8txpk1M57ETxx9/6kl3UU89/fSJk8c++YlnqRzL6VrY6Bz3IyIIRNTX+vRHf/RHvtv78ksvtcX5wDHptG96xaVLF/ArbawUUXXtRn7qxAfMqqMIzuFWTVo6qfLUE0bj/9x4oZPiAyKQJQWUSlkBEDwAQqGKIK7YIgKyKMSrgavcoGS05iNefOAqHsTqVEQbTyCIiuqnUibQqaKHcrgUPuo3HqJYSskiVidBd4r8RIQDFrFB6kl1VtViq04M9GOAYKCBP3WMODpoBTEAUrL8P+DV+p6T2QjwFZL5bpY0P5fy2ax9Piw6ZxSb1tJ26NXKst6KNLus41nmEAt4IIo4JpXlSaFNXwaUitf/MiiiEL3Z+L9ttaW8pYuz2pRCfEhM3JZIEfz1BwNKOYs/cCgPa3Q3/5dNiwDLknnPgGoWIrxE2tyU6AkNeLsNZkW8VRelfAAYzp97D2JAwTEA/LThATVKpM7guXEru+74pUoViVh1cg8DcXSG2ntl0Ym3SCorgHgQV1E0z++63Oy1vRThWW44CoR/c2VaXbzEispp3qR4e+laFShtWYqADEcLNMgiMgAHEFLcRec9XCm6LCLNUsBLQAk6EabrgyyE04jY8ENoKBG9oK97X6dhiI6BfjWoG8O86ZHViXIzl2mbFXcYstXDlgZghYalv2yscIn/qdLEejmGAWwd2UxPpgka0KUUQvDjCesWSqkhaSuoEJtsKa1sBeFEAB7OIAoXNhRpBUsvpaUNGuYCJe1keOCIdYcG/CgijMETC/TqkS5Qr/LjBMRJAaNFtkWy3CBCFQYI5Shqhg1+/ea1U6cevnLNMpVfLh+YG1rCxNTcXt8Ln/3sMx//mPuqY6dOup46dOTQu2+9RT9VVUiPVqDf8yqLk9/M+z2v3T9HKlA0pcvM8hPBKZ2sddTqrgtxyWoUz/nDQSsA5wcPP81wtoyF4vwP9/Dz2bBvZ6YWA1kMRMjiafVrt9WvIAYAry2yeECz3FaK2KyUrHQ83yBiogksWuiCoKXwVGH5+UyhRkdfLsHBYlNKCQpmUuPA5rLs+q3rrBq0LW206wPfisTjGQIU1lvMtS4F9UERRIq5RBoaGfygyotET3Tk65qh5HLSuqXumyBwytl1/s7nRdO1lgZ6IjjAhCyQW0TZOs+Tsq20PHUVEVIov9LKIq5aVLYm4BDQ0mWx9HIitoeUWA81AZ0osqXL4mzDQXbNJev2P90yvZQ5qRsqnVMfoNwhwAfMo4cOu8rcP9/ToQebojWhVaf4azUWW80aldWfMZRYncWJ8xDQhiKFR8OBLFEdAgSHJTwQfrJOCYalUxFvM7HMjYd7DzyydLJer6pZirne8h/UrtGBzY4L/pDwISlmTxbOG1Zbt4YPDyiF0urdNQnnUOlM4pRlZlWpOF9xYqMfojS6tiBr86a1lTYoBJWbhjhQExWUgus3Ej4FeFRBqruyvqBuy1I+FUzHhYNdhC3aSqew4igUygJFDQV8rGR2K39CM7EmpV6II5EhTS0QUsRKQWq3JtRuxaQNg07zsrIEidcKCljeVo9SUqClQtE+TT8HCCpiCNAs626gHUK2TaMPtQ/UCv00w1e2mulpWJr1iAhy6IHZGJzp3lUdVY48CIUf7fbmmDmC6E6Tm5H2z4s1KTp29MjR+Xj2z//8z3vs9OTTT+nvptRL16+efedtv6E+NcOJk/RYk6xMDl/ou96TJAV1ng+85RinOYy/wWztREApHj7gF6s6r3RVs8RWDY6NCKRqCYJSFIGlhDn6BRaRwmWozBTWVr2Co1BFvKaXHllASfml2FCqsBZZQWwfQwEoNFBezfjhVV7ZPRTjiBQ/lwN1jNoOn2qTsouiFDTLRCtVSolwxOVneaRFFAE4/VJspJquInTEXX5FVbh4MBRKWfSF4G+lFgU/32qUdfRSFgOkVvCUTbrCq/8Qx9AiysM0UE+KC1EVNlUEUbSQmqgnijglbbR3telCONu+SyeGQwezLNmHqh7pWq7osUoRqZQssMPBFhFE/QSF51q8ESBOpxQowmm5wgAvkdv1ShYdAwpZRDiKLa8yEAFLmyIijNb5RTcLYeMAPfxRSo+rri6T2KqkYTc/XJtX6NYZaf0pj5R4XkAgWIDMrpdllYK6TkAdZf3BXKhnyzAN6OHbXlLJIjKEB1AFEHWImsOMXitwRFBOPOiyHWMEm42iqSorRAS55nBSUOtVW+uYFcHtN1CCGVTbaMqBXQygDqwUDwbOt09wgyAfan03rTlsRfDgrP+kdjlrBVtBUZnxo8iW0lJFEGlBUWski07V8gcFXuWLp6Ub4fmDAjR3VSUm075cjcUPNjdw9ABFIc4yTEo0VqCIq2zf5uD6lx5satre5FETE5hJUQLH/P7NLAOG4PFjxx999JFPf/L555579rHTj+vGqmJQZKG6ctHWkPPoUXgjO2CIFsVvf/vbtvv8WMq6xXTbjocYZBnCRrktfBYhGBag0Ga04Ock/s74pHAqQgETnk3fVtS6SxVVUDdgpWx0AtpkIWWLz9vODCFVitI6g4gC6EGRBapQRDqObBLitYWO32AGyvgMIMa8Ino4UBmIomomq3T5gKE4BF0pNyD4STUgNVeFilpawWabogjFco8UvEUlwqtKWvpuHRePIj6Uh05Q5lKWbJVzEuwpWiLV2VI60THTBlEE4CgtklXHpb9GpSgiLKoYTKkdIzRgruAyt2RRllEacKIAUS1dShvxavDoRymKdNduN5PxaIh26ZZ69onZDQkipHh5Up0DeZBBipWqPfxgLpvgoDVqf8gh2G30dkspJFvxhWDQ38pf5s7eFLqdq2NlXmlr10BhkJWS5Z4w8qFEtcBTYLo8lBRBYZQUigG4iK2v7GbTphxYkVhqX4SD5RCeMZB+sAxgBjVT80QgGKpNluCCFrW0vYE4c1L8KJQpxUYEUkEMmlO2FKUgzg2QNYU3BDRoWvwNELaUjir6Zb2XDoKyNNBJVhwh9CkiojTGtgO+zI07BsytXRkqhQ5Zdy0VwUAbfzrJlnOlSl191PO6V28xEK9CKTaACNRLEYWLCKkzijArpVBqvKHASVXtMoTiKrvVjNIxJ8Xg4q8zNRNTEk/glaWwniAC5h48kmPfiDWBYuEQKEQiWql+6nBHjz3Imyee+Pgnnvv/t3VvTZNlR3nHu6ePM9MaSeiAToRxAEKyb4RNBB/HDtsBYUf4O/rCvuQGAkM4EMJIsmYkGGkOPX3u9i/XvyrnlSClWZ0r88knM9dee++qeuvwr777r//gS19+7+4b63zrga8pvH/Hq0Be1/WXrbtvLO+D2/ccqafPPn3243/80KemPJfyBgpriFNV21c1S0pRpJKqUFJGJbHTrQ/JCMM41Z9D/OR8N1rIYOyuWRJhyGLsLlV2R5/wElRidcdF6bgUxdsaHuDl2sHVgsiiVNPCTa0Y2RDMAgODudxUZL1z2TnEmrCY1lT4jWXfqhhJtVE2r5AYKJVX3pA6VRvLuiLEwF5seuTGFV6BCZ3kYllMJFyMJeIybXF+w84l+/IsW4QxhKGDEVvR2IGjhJFIO0VFMtDryV6DQmB42SHLG4YRQ4HGqLZ4lsBFWcCdsnzlq192lDtBsIlyxEnrPI9HzoscSMr78ME7h3CeKoHBizjj0Xwm+BhVi5wJj0AiKmGkNGanA+dlacrSIWZXsPVfGAxXjfhi7BIZ2XMZlS2k1VbSLheLKaG4IkF6NOAzYa6TFYBZ8dU/Bbyap4YVY6EoNjlyAPj5G4CysBO+hEXkBF9P9ejqxMhljKKuUGOg5xXIC2aVGeNZcgUVxZICoFskPeJkJJFsYSzIhbAnpoxT+luX1xjZAfBvbdWDPMWSAaiqwtiJKBxCCC+Z5s8DVSvFS+dSHt3yiU1nr6Sq5WVnacULxMZLKFyEPaFjELJRyihXSyRqkcAYTJdBSaZGIfBG00YYdlvkZJ5TYhuEYWwHyAsfM3LildRKtTtNkZjSu16zmMKT8s6foa5//MPJK5cUUivWsxdrZcV88cQffu8P3n30yFfNzgscb81WuT8XgTtWFhg/mN5cf73c99nzZz4a9U+/+uX/+h//0yErhYwOFn5/1vKioiyES1W6qB4LWnmMBL+pXJAUMMam4elTxLkIloUFpmUpymia0K1GDCwBTBUWFUs7Sjq0Zx2mEvakqJYXEqaMja0epJAaSS+Fmhk7mnicvMIhedkhEbIXW2D8jACVAYyHvi6EvJGzE9OkXCy8BGe5Wit2JIyN16DPNyoAPK/jywtpZCFNKdFWbQyqlddIAISzV4ORBMt407W0AUwJZjzBxJpapZ3GBsCeS6lN5c2bwlipjElZRJkumLHiGR1lu1oNERZlGtiU3gpnoTtflor3nbe9qdXyzu3quvGmBZiJPberaGNrAwDISBQDiZa9OstuhOf18LRAFt2xUIhzOQCjWMJFnIwIE7CMRmD8YLFlZ1G2Mg7BMAhhtHkY42ehEHaCCLlO6VxgZRmXhf3nBwAiI8SvEV1rKgEvCclSP+FjoFumYCyEpTFjAMWJvXFWX/ZNGOMJnQehm3drnrwmVvnNrIKpsZWCL5dpDNLR7Z5p+1y+WYQQFiRgKUN4valQ2NFWrWmViFJ2yCwwpC0Vs6lYx4YCE6eRABCKxuOpDEicLIUwgtEL2SlMcmhmnQmMkV1SSBlJTWGIxJilHRMYScwY/EHby994NKIMNyMPeObd6vPBRIui8in+ICfMdhTrVm5PeqCPUIpDOBCfrv93//4Hnhn4bK+HVF5686F47/ZylkA+eTnP//xkAHEH8vmq93/+wd/+8Ic/+clPPvzko7ld/fKjL737RRgAbFu5qcdoWrB6uSrJCAp5qfAoemmKR23Xc37WmT1vy2IkpeOlTw/niENaTLq7qdH0YC8A+vIUBUBCGlXO3iKzUyoDJzsvC31u2NcNH0xs4T3MCmYUok7Sfq42U+ECm5aRXiV4FFms7ioJhnAFC2lkDLnLxSKEKxFCgSEU4QBomwY2DRagjOlVWN7Tx+VaxgvGbl9VJEtrC4YcM+8Kb8JeauECm8risiuQBaweAwRmTOGqvA3PxU7oMuLprBGSkR2ekWKhYCpPwVXI2JpvL589no+7uoM8fHueLQlMfDkPO+bwlKQt4QKukWuKOTqiprLrM60t8tnz+C9XS5whu8BOxBFGLsVjMBLmUrPj7zazKxkPWIGmpGazhKQT9nXJaH3sXhb1myJxEskVg7yMXACzE55//hNrVcvuAQQd/q4XhcpUKZLFkrve6Hi45PAlNMsipLKEMPJqEgxhRbD4Y3h4dgLPi60sWfTDXjP+QBgtF0xVYbPc8bAIh98py6vbs1nhIwRoysBITBUjxcN7s3wAlgBJuQIvQwuCn5SLS2um4QE2EaVAWXjp9QgsI710QgBqyriCf9JcZe0Cw7NQCGYoKVh4BdKlYOSqJC5GGaskOz22eOjAjAKFx89IIWKNccJYdi7CIqqkVcJCWp8TOgOLhXJmsZP3vvjIb9QKt19njzq6D+7ee+jEm+esXhDw3ROffvyJpG5O82a/93/2k5/+VNL7784blrwJozq3QYrA6tfOKWGuJmBIiLXYNilcDro2hRCxdBa1BfN0PkIWJFHZG10muFiMgY2xKYO9MdoCkRTCVYh0LJjXXhQvF10IMbVuyK1SSJZSNHW1gmfRo8XkVaFHA3QkJHBRqKqK3RKVnc6uNZjw7HQAsRRTunR0QmGRlI4t/pBcSiXwvGUJFl7gORqXB9EA7Iz1QifwxMOURYKxyEUClHQr5C3jRP762cQuBINY+6deWEy5rDCqOHllNK0YIQSbkYXQjZtamwJNRcEgNJaLsRNESIXxRuXoqAEShmXZwJALR+twBDCGL294IcCE4spaGVxKO+O5AJ5vgMTWkyEZPbjkjV9g7Zi2T1gSdqWSOw8+f8YihUpw6FRtZ2POzhSuDC46pexiU1CR9Cn3AKaQs4Zd/z24xGatiCyqBYNBEhKYAtPWBTPFqdphvz7cv/2nf/Zf15qiLAJthMaIRYCiiScD7Keqyy4/bHP1BM5lrHqu5aHHsxguRsibYhnxWC9G6SraPVUDhUsNUCwqf/ZQYR/u4zqpp3gL5AhZDUZUwkVZKW+LrWAjS40YcYAxUlosDL2PYN7M9vq1S4N6XGHVhodFLgxCtphysaCqTcqUd76eztMLANcjU+QdGG91o2CGxGbUZtulKxSdVDwYUUMPN7CVXZQaYoCsMFPi3XRWj6XrlLOCrsFaAIAnwmWRGr8pDAClpVAJHnXK2EoKhD/HZS6dwgX6ZljPor797W96OuWvTU+ePnY30qBl9KvYlsqPFUrxjW9/6zu//U0pfvGz9/241Ac/e19HRBZvdfIhO+tz+/6U8ezli1vPL8erqiTCANDDLFUpYwJPPTi9UGsEYFetmi2RnVD9ijQlXESUTaAjnPhbBF4MOEW1ICxWrx4toFJZFIANpkQtAgwZ2usJDKOYjFKwmxLMYESuhIvYbPYGC1peZTDKbnRE1IxKOJep7vBA8nKJagojF288vHSW8tYpPKNYIbIYwVYwEFP1E+laDVFNUQGws9QXMIVlJQY1CKlIAMICAw/AHkNskHmlYyHwAPAVn5cRkj0XGDxX9W9IKcq15eEhNzEC41kXsAVBa5HdUB0Rl91KslbAvBh4o6IDt4bA56WCyyuKssPXl0TWHDljSTEQ3h55SMElRccLp1jk3aLowEIlck4ZX7+67FUhCMU+efqZWEiFFS4kV+cyL+GyqSy+b20XhYpOyStjYAoLBni1MXr9sS5kVLZeGMXCVKq8UfEyAmwlAKbCicugqYy8xtXv352PNslLgGWn9KK3qNkc5gIUNP2f/cEIxyjASNixALRwhbBTgCWTnkJKH8CIZ5kP0+WWsLBNAUxgkOiTgnabOdyzHWHoUWU03rm+3+/gx+tgHH16BugyjfDF67lMMJIUMNIhYYFRM68o/XKFdFQOcBaXnGI/L5ilYhwkUQk8IxBO2wuGBYCRokIWn6cBaHlLKjsMaREYCYwRjNB5cRLGUndFo5MFUDr2jELkFUKndHqvBVI6XsrkO5sMUiMyTqbrk3qVq1AgpNFp5jbsG5I8i5qX+17M2/OAfcWR89ZXKHVZl/3sr5d+Uf7Dj371f/7qrxX2/DP3pqdeDJQOlUA/g8okrwcV8vqE1tt3542zvDCtKnvlMXaWHvIpkoKWl/CainXgLE6rCrNeCiovCECyG+WFj0EuUwzTy/U5k2nbQKzKgbmQEGAjOzyhl3c3XunYReGRrihTOqEgsR+sMFebMAwdWzoeAgnGWCyLpIymJ//oBMaImWKMASAYi0AuLVczvY644gkDpma9EAoet3/gCI0Epijkm9qmIiwBwoARGHb7J3AnLAsBdmSBFw/TVFIKAEs8RiHuEGFysTS1RBsbXv0s7BrMFUO58iKJwUgAqsfibFSYpgAUscCWyM6HpC8n7/IIJELg2Smmrp/GpiGtmynjOd0uD3qAtctiHRQPcBZ4rpYW0HH0Ht2WKG/tKMZpqBgKO04jErHAkZSUDrbkGZtaAa15QImEMGIgmkVeF5gpSPDHQ8+CKqNYOrabISq5rPy5GuSSglQDC5mdh5FEZ0oxjdpIcklPHJTAgnNN8PWL2DNu0ezqsIhZ8ERlytgUpykqYBb3RCSMjZSp4HjpqmcnkI2uRcNzvj16kaiApRa95PDCfUQebGnpjE1DWjXC2CHBjS3YUjHymuIs6Y6M2EhgShhHuhMSG9F+h00ugKLQ7mGLvzKQxG+axJ9dFK/Tlaua2emMpaZTEoBi1SCQsMuecLnfcKlK3xgwWWEXNLfbk9oaeggyVy4W75v44z/+Y7qvgAH2gt7Pf/5zP6D+yacfeYKlF7eocwpfHsTo+slnz548f+amNBvg1Vxk7VMJ/eFqyp7tN78sfPfhfDvty9eP7/q72RHZFVkXDLz4Tel1xKsG9q7snUK5YjACEFFGhMA2d7S8LcWuodpI/CkyIneKCokZD1ecZ31mqa1eRhguDKb0SXeym2IgQlgIDBFr3TzBslWQCJGO0bmdN7CRi6QAICQsOM+xm4ytwyGeHglkI+8SHqbLhQyAHS0Fj34hd0lLh5alqGpg36hzBbs8o+V1KFmCnRLmGG0WOjajNY+TK7a6aFquGgncMYrnNxoBZlE2b4FCKoDCyEViY1cVOwu9EC6Wyl6Y7USXFzK8wHgEcim4aZd1j2YivOkS6FypePaWt6Re7lu2a6D19zBlPsvh3fWirBWXMwaSMofy1uQlmMmUdH0YhB9skXZatMGuKeZv/3RGZRCKKbD20bLQpaboa9bk9pw4JAYZd1e3MSALrGD7OXL1UIhAEmZaOGJKeF+c18OApcAASeeCGmU6PIfWPGVavD6XhM4e1wm+LM3JMgMG1Na0gkxvRm1jvAReTRQYSlM6YdxmKHVSeVxKopObmGBjuXW5OkOSqhIiyRaQ0Yu9cgkhIQ/scinZDSRKLkhRLdbyiLKOphVvTICFuNCYRm6cio+IiqFAmPCuTfhLUWDkxsLDC182CmS0SKofA3ucRlEFgrHDYKDbW2JNdQpWSGMhdmRltAuFBFZPVxYP/30buudSNqIPV3l0DPDzn7/vl+O911zs02fzm1Jun87YDz/8JwxSYzN+9MnHXh703Qn3fH7j/u03z+2cF96l4et2nj557nVdJ+b9h/MjCLe8I2NOxRfzZQanHSMGbSLcyulEAVogdPXLBaBgx6JYAK4WJJ4AOrI6am5xAuMxpVOMJED8XJaOLkuE2Xu0KKNEUleqG7nUdIJn6xSSpVgjKgDGXB0C0w6rqes+5ooxpcgiCt40yVuUXK0DBjqkcC4VCiFNBQIQBytFd5sITKxEmDGItWIUFvUUe8l99jzd+lsKDEVhQ0ufK91VwCqAgWK6NYPJyO7ZG6WllpEFLEsFGItlJ6aVXc0IGUU5BMGMpmDlypiFzmhUJ2kd5CoErSiFseOM2Si2KAp7lbMEMy0FngSGRUemOE0h6WVJN/5GSVITeLBS94RBuOP70tlzzmiBZKhuXW6i0VabLI4vC6Ul1SZa3qLwE95SGOsXICQLwFRyvhwZiWmbQSW8Dnq9GMWyCCyXvCvb9RamADqZk9EO95NK50ALQUIilJo+68XNVzWs0nCUKaIwjCfrXHZJiQGsF2lbXFo6BwOPUhQhkMCzYKAQFrRGIYd2NhNFXlH4Q8KYZglQSUYkMJ0b5xfkL5cAWXAaz6pdHjThIZPozuzs+FkqjFkLsgjpSFSh0SkaDAYti5EAV89wnqa2PNNoK5JObBdLFCG7RGglLZadgnYvBywZ2SHpopRNqZ70vCxeWwcgEcpIacqrKYHSsXDJ3iqZ8pLwp7PZtez4jSwCeRndmTzG/NrXvuZeRWrBjUoi32HhPgTsZvbOu/NJMs+0hHzwwfv1qC8H9vF8/eOLO7PG932f6uu3zvOb1z7X7dS9by+6XUnn1vX8jTcUPP/s6Wfv3n9YqezWXCWy4FeJ8tC2AtU/mPNEH5KAAYDRwRJTAtkUOa9ptEZlsyg7S6uBH1ubgQWeJSojpJGlzUMHxl+sFKa8pvHAUyLZSuJxHPM2IsRDj4oCTwKjZa+RHQNwAYOxZzGWlLElFQtToJZNiSkAKdxY8Sph5J2Dd3YvtghjOEETRejSZTfFgD/AGiksbntIKGAUBdCVx2sql7EFzMKLmZ2UYseFsYBJSuwTzCyYTe0HU1SmwSCbGol07MYknlqgJ2AwSX0BU4xcsnC5Lg3dSSRX6UzBeCmX+POPqShZCIDW4LFxGhk7ieiHZ7YijGl70BQDgbd5WxlGwmLqeNm6dOQYIDUS23mx6dI1LwErFkxqOmScs8JvZivCWMk4rTBRIfvKlHaihABfV27+hcHW+RWsRNO8nXO9azJCik2UwTJ7aEDnmG1wCFMILlCYjJ6WFnKhOfuABXWw2HgpalWW4kocW4FrYZQIA7vRVGDFhaEjWXKweALP75GOdKg6dXmmnvnnetpsuAMef16w2BpzAZNiHb+WRRfVmf0mfwAMxDaNGVXI2vdoMap6EaIResZCWNCGVwAMQq4UhIRlV4AOv7Bc6wUWa6xUhA4HDHyFpbCUPbxDHWHkbrEuKLajOv0hyjMqCipPCnWay3MpjXg6ZQd7Y8WHv/xHFq/FO0l0LSlmLmcKTZAvL2V0iLSn3fMWeSeSws4FxV8XPZN7M+vPWzEYKAl7R4eRkl4XRhmtgCKJKanZw3a5QjGGGapzM8OMh7315EVuLJaRAKxxkUplh3TqMm6FkEm0AB1WALqoFLQVUy4h1k3l2cuiBvaFURIhSCB5YQg7o5GuHvalzcWCXJQCyOLZHUqjagklFx5HFl7xqNiJjF3+2EkZ4Qk8WiGMYMYAki5s62Qh8MaMjQo4u0XQSAww+BtZVheiJCMkL1eyXdil6oHXAqFUXgxi4ektOAaKkgqno9VsYEh2elNJy2s6hV6fGFWkE4c3JB6x2cFMCVeVV3DhjFzqNPISPFXIFVK7KmSHeXD/84shcOHscjVVQ03hZNn62Uv06SePqy1yDMhtnm1NlFjLCMDlIJhmYUSIx9itC4ZeUnlJGBY8CUyApnRJjewWwRvZKcWyU4wk2rldnSI+X6bo7Bh1kADQhekaRYvLBWCjG3mFGEtmpAM42PCUXMatQLmmWRgJWD1IFw+L8pzDYIy8coU8odek529XwFwAZYdvarRHedl9xS1CwpiUlM4YP+bK4KpTVGgJOy8erqQy6EXBoAIzZmxktz6MYMt5EyAE7QqXEGBVyR7StKaAKaaETtxRYFZYipKR0DtpHQ60OAVSpBNiSoSYehuUHcYoRJ3uT97v50blK9J5HYhuRb5YFgPko0fv+HuVb5t1o/IyIIA7mXfyuOPU5knk78+zs1m0Mt9wez67fu/WPHJ0yCvGv68dYZ/Juv446lv3/W/2bksBVoMs3p6nHuFr5GI3sjPWjn4pescgO5cyiqI761zklgcALHzhCAm2wOXqTiCWSzpGgVIIKapcEpWrqZEAi8IGTxFITPGEzz7Xg/Nn8C4cuoAh+MtYm2KRdOwqsgrBGPOyEKlL4ZClw6RDAiw5ZUuCURXLlH59doKHPcvNFQAzVTA2CnKWmBEWhS0vgAYZ8bAgNIIROtfN+oHBgClxUopipBB45CFjrmwjDOFCTokBnkUuIwtXUzqF4MRDYmABlgIDV4DCxTpAVY4WXlS3fwBgFlIIRS+lm7KuTVku4KaQxzPr7DMhJ/Vl57OfM4ZnHnn41NTN3pGbct3MxdLKiKUbKyZw659dIONQn7KR3OThwuPvV9Uvux5d05yJHiN2/YlHimKNqEzJuljwNAJUBgsAMQW2jAUCSMQe4ZyfN32HeZYpt3UPmp1uOwrhhSEs7vNqVTFMxpaArggMxq7UEu9yYIizRBW9ZXEB2wRq2/K2JSHhR5mXf0aEAJySOKdnZ769gScvo2KePJ8/gdIZ8RurE5XW5FJDISxx4mEUyyKFWACWmmWkl5FFpzjjZ6+kaF3KAexLeSmMVQIcianwkkqRMYY4WSgLrgZ4VJ7cUAjjiumpx93aaeZYPHA3UqwN9sknj/36qPfc6uPE8c5J5+fk3Z/8UUqdWlYGxSF2B0JbMY61mxOvl/5+9KMf6suLgd5h4Q+F3hxYOxbpdK01D1nmEeK0f95362dGHAF2e9D3bvvaM58d9vOG5w+QY59Dej4+8ublq9tzdZ0rmtQIrdsuqXLVoHgARqNqH/7Wb7H4O9tn3tl8vZE4o5QKYOkwALREpl46iZBROPGyLZGr9tmJ+nfascNG8ooCEHW23GwPtXFhlqg6S81CEQhsFAUcDztwlwAW55Rjt2+ajwqmjJB04STXWmqQt4I1IkSWLRU5DIBcalOzqeObkcURR8tORMUsquxNldf0HOXLg11RvCxlN5a9aUhZwOqawsXCRapw6+TlQrg1AJc9RUkxMDp56UT9RAgMABcSS2RksSbGFTAuUdlNtVxtRjxcAFsbNiIccynQFo4hKq5Twrx5jwSDEVVs2auNN5cDvVWxZDR2O3PYJSIn/1DpF/ndO7OZ1cylYDW4BrMf2BwURmAWVKZGB5dyyIbNCcuLwQgZhoJWF5T2AG8hCKXuneXtExbrg2o5RREWJMCU2sloSqJVsIxgtjoLu18TxsNSRoH4Kx7y7rOX/oo4b6EHBQJ89vyZKwvQvZevHrye99e5kPi/fcvoO6PunvOaweuYz574mPaTqlEZBj3Mkvmm9PPnRx9pwXD3wRy8Ob3O3fHZ8/MxtHOJkVxq5aoSv5/NMxX7yluhP308Hd62a+djLp6LzrKO211oNuKodqGGz4nhYTsRojGn4l23MnL/3C9fzVr0oSuVKKS9WM0qxDzLcY4u7zCfEwmYkYWwGFtKuos43VpJmo4kBr10CHHS2aXTAjbMonjpom7NfcSyn4erpyneN+da7z1uYqtEkOOv75fzCx/n6uB79s6lRFk+fOstjwStXK3kKVuZ/mr9iYP+hS+8a2H85pMzSENf+tJ7T548/tnPfmrDfOc7njt951vf+s43vvF1P4fYLsSjQvEKQOib0L0HXQoWzeka4O/+7m+9w8KdctKM3P7lhx9ZIp2K+ujjXzqIPurrLsMv+/NnT70R2nMo++A8bLzzwptvXz13ynpmJY82/ZItHmeQ1i2NLx2UXSXGsrfCUlRhKwxg6mgqxts1gP1Z1p9t9eA7oL769a8reCiATl8DmO+buu2zJA4NTkvnFqJs9ds8CI0sOB2mjnuwjogoQgfAZgVUAmkvActiS3eUUbkYieWFZBToaSuAWCngxRqxUXh1KsRUCB0DOzyLo1NS9uqRkRczOwu77BT142xLKI8OQySNkA5s7OrT/pwFOi8AcsklvFjkGNTWVFIkhH2FKwALgOnkux6+ShLSsqiZFMuFnKvU8oo1NXLBdH0wFSuK0RiGhdDVzy6dJa0vDCy2BCNmbarKsiiyv/UKRG5qnRGCwXfg2JFgMDK2ekhg4E0BjK4vUtMBzbzWXWH+8kpBDCw8KmyC7BT/t8kZe+Dk0nLsc2lesE0htgWxOwCg5wHeWWSNKP75i3kHqR81kIslowoPcjYksKmRlMyYsKifPqfG+VqZNoD22VEJJPhNgfVI96F+Rsx+1uXVi5ce4LP7UQXFeBgOQLH+tjYGx9HHVKoNw7PX7jZ+h/vVfDHoG++4uuOhcl/B7uNWfphcrGMEKdZhwtzV1T3VQeSdo856Crt0JaU0lagZLgDTRmF00oE0AoA53rWEkLcmjbzSMBIMVh+4Q2hKTLnUF9XhngFbIy8dSUjjVgvAmwWmc9uUZK+keERJQW8KE48RLGQl3RxrBJuoqt2WKSzsJ+Fcx8Gsr+62Ki52GBZSakrZjfo0xlwlRhb9Rlhh4SvY2piuvdiubsVyAZw6b/kyiTL3N3w7UnV0eB/p/aM/+qNv+1jvN7/pbRQOusox2G0Y6teoI2/58xqgF/1sJkeQbk9LwSW7RBVTX6YYwHx22+q5tz57ermDOsqihNgUXgxzFtsoc+a6uj2c5/G8d97MVcMxP/XPycYu0EgAkvrdRQDmVcDjJ589OG+8EXLn4byZxdXZ20DaGzBKIvDOdWJBLKUsRDiAlR/HuWRDsuRN521lrABmI4BcKqkYXlOjKKW21Sm88Rudiupp9UrEC1MNYmG6ZOMnuXjlqtNKgjQ1sifsLDjlFciImR6/wqrflAsMc4RI6LymtUNhEWuEByBlic2oTqOkMIQXA4Ux2vAsrQkGIVay+i3g8jNyicoFX6wQbK0GQA2GiWphkOyaMtIREnowozKMREgKWhlrpK6FgDHi0UtUXIQdrVj6NpgOlrdq6Xhgolp+YB+OMq1mY3iW+LMwKq9YI1lkBVQVjMBp8txR6qhlr55aK52xWH05PbVW0vv3Li9CAvc4hks6W5SFyI4NOHG8AphWpKmdVv3h2YVIRKxwuiLZgwlhlwVAMQCmzkRJXWFMJcUpBUUgb2fB5XYlHqhlQhoXi/Ri8BYpmJIAI6JnrGLTafE8GKTHyZVSfcaKDqBilu2wNeXKm8ILZuRFRYTQiRq41A9pKVlkN2WkqE0U4TLlLVZ4CgD95oiHhQCwE5YUxsiLXSoAduMUdLZa6ZQhkEWRATAEMEYLTkFlnKzXZ3UUq1RHvMJJxUCurAUYDK1ARvo5pq++8+1vv//+/+vosPD6Q5T7k7f2fetb3/ITU3aJKCEwbmOY3Qmw2QPV73U/v9XL5XblZUAwI36rSt+MFFH4RXVM6djYCXBeu9O08nhlMVZYvRgJQLLHXRTC1qGRxcIaq5OXfl5UmCtCLVA8fHMe8qKFZiemfsj46fnQnq2PoWLaTooElhrhtimp+zSkg6Js5dUmNlP3HiFo8YiisNMpGIB5MQjnZRdrSgGrCwBeSxqSi104EV4snR1DuaI1hmEnRRl1zaVaunAYuaQwBaMwAiBUIQsA2vi35nYy+1CfyqvhZmCNFA6JH4kpEc4CIDWFMJaOkdAP8HI6o2U0shMZq5YFDG1KJMirdjk3hSjh7NY5PeYAejdt5QFMiVytT0WanhJm3zq+pthQGVmM6pTdzZfOK6oQUxImyxbD7lmcKHJQMzT18GXvFmXhqsjWwRQbYSQ1UqzRtHo0EsCUUaxKjKZiKYyoKFbP7eHpk3kRNYb2JIAC0rGZAkQFZino7KXjApaRi0VUGYOFaYUlhVSM2Cx04VmqAYknUk4BJ6BYVJbFHya8G9n1Zxggas9obgokOGO1sgSTicIYODwwRQUS89aJculSKjHFwaATsXVFz8VCl6VElS6LcC5T9pIaiSjG4TpR2CBzsQgxBaAozAhPAVCbMQsMewJsjWAISymaAhcCQ0k3lgi4mrHBw7j6c1FMtyp4/FG1SizxR0vPgpDeiBlVel6NUap/A5eHnbCTjJbHr2/YRZ489Rkp34zuRgXgjuW25LGMm1AP8xkVTHd/Uq2tbOvYLt6S7tmJH5pit9TqkWUL3kYYhScwPkQMjIeuZecGfhbbwN7QGp10xYkwTjqJh1K6FLnYHURThZlup/VrbMUAgpXdKJfRumAoKpdKrOm209GsKXdlYBUyqhmhc4bFSnb0q4ounN0Y86bupGXMQiHITa1M9SMnisGGgR2VEabUjASmqqQ7ETPFZj15xZoSLgCL08MCtLzs0/s5j3A6HIwsUkxBZ7cUyyU7BgKwUqeiCixkp9WPAUxg5z5ygHa7xWfHzFgiIcGiavUqvl6EcLEQPKLEVoaq6gJnSWEo7IU0siA5BDPUi0RIEgBSF5RqaIofPpjYlqVST9BEAZxVmms9yQLTVGxlwGAQpX4pzl+LJ0k8jUJkBEiKLdxpGDkjhVEIpZFCsFWqUQqWymCXGjPpRbbsSqJYf/Li+eWJjijhaAvpEVtJJ8dVcFLBlGq0MkiEtG8tODsL+1mcy5c5bWFiMUSiBlJSVDIiYamGsrgcsXv3lr7I5eIuJpaCQXNTiGqwUHiN6AidgNHFWhFV8irXVAjhAqAYlaIBV0CA0psS4KikwGBKMU6Ow1zGSPCY0sOYhjTS8SDnDcO4dl5LaaxIdmAwSuNAr8KSKKBEPBVjWs2UGKQrLjC7GioGYFuAyciCfF2MTSli6QB5WVAtkkIGf9YmpFEUcftxMXWF1aalNjKePeNsuf27v/u7f/Inf/L1r38d3g3VK7o//OEPPVeQSzvuW131KgYGj99CdH/iQtWC2ze6gBdFqRgV2po7Bc6ChGwLwEi4iIspEi48phG2jDFjI8JLESyXkR3YcaxNU0bNAqfQywuAB9jmpEPaWCrsmZC/XflpcbdVWxAewCkdDJWqBAqZqPMY0NTSQUpdCi5GSXFaBMadsmCQ14iN8JrqVwpTunDlCcfJUstSzBqdk6gawkjKCCxFWYQ0xUwpZNlwWmexHZGlkhderOxqRtXUBqAz4qkwIQDsACwZEYJxgW0BYLwsWoswTsWcuFklQl87JX4hOEn8RshtRwoAU0InNdJq65FwCUFI8JhS8KiHxCaQnVE4hUByGYGtbTqYRYuTyxSDqMXHZhpm62FfGELTAhkryUjnEkhPWMhUc/1aSOVVPAClKV0islH4ayRMJGFYCMCCI6lCmJNzkjqyBIxLCIvaeCNfhth26txhsf7VIwpDBRi5NA6zO8F+DszOCBDMA2gMLilqkBc/wdb+DGNKbGBIJBjmqkFuFl1LRoKiyK2p7YWFxchLIRYlXUH0DYRvvWThsjNMXQQVaorh5J9BSM3QEUbOmE5RD2P2oujYjAIBctEZu7ZuSUji52K8ScWFbQ8Ab2UUi5OEXxeAtRNIKIuxsl1WEAKTMBR2MKsR2JirQ4gfT3nZhafXpvDPy3aDP1cPDOxg+uKVWssWVkgpdPTo0bu3b7kyzgNe12Jv3vMkyTsmuMruJue5gli3OvumZw+K8U4/Dylcyt3YbD6JOu6ibi5FPAqIzagexbDjpMjLiAE5nddoR3awUEEywsBHsitTd4xroTMm2hSiKlNsO/qLGR0ncpjCWRTD6DRlL5HUvSnDtBoKgUSoTiJQihaW3Wo4/YAdLNl5SeW5zIniknS3BGbhptrnquuq6nRFwsuCBz9pTTqCQii8jMUC0/MWO12cNQTLYrUpSsUWA/ypdHZjPJFUUi2EEUIBM+aNYcNjAKNEQt8pcPglUQnLzSgugSyb9wTN4hTLS6wzpH4xQOpRFgqXUTg7MRXIZa+2AsabhALDCyHpothNKUbhpnSEAOWlEHZsAIz2A8VUiANKKCjPeNmZEQJb/w7cIK71K9U7clmQ0AkXMHHUGKWwkShiCa/9AwZgGpWpqljKmyJv+MoLmSs7i2mWxYxrfsV7ysBAAhg3Bdd6AdTjCZ86EQpnsQ6MLZ31wcbI5djh6dmhjHR2UQCmLjVoGVt8RroQeEqEYKYARlc2yuWh3ynpcpJjrB9GuhxKQUGyrA4WstKd1VwKbSqQ0BnB5KI4KhlN6zYGhZpaIykokJTWMc7JdB6fIoyTnQiEzMtOJ+xqFk5KByBjXvoJneOdxQhpDGykT5ojgY1m1QbgILnssrQ4JbLKkZvWLzwqeoCyGKM6o9veFOkQWypp4Y0HafFtiFkWZ6sC7AGHWtQpZurxlqL58MWdt378D//X2rJ0j7GfXBD9meqDD372/e//Ifa/+Zu/cSZ4dovZi3vdM9yTRHm5T0qlUvzMPACjhdWOwA4NQGWXvdXYVQUD0CZLGxcbpLpFRcIFozAunIxcLGDEeqYbka+YCiTBRJHIKXm54GGmgLvDGSCqvADwfJUE7J1I3jXqLm6P8tbdLPS5FlsfpeqFYOj8YcTJov5CKqxG8EMCtDHoLE4Ko5DKY5zU51q5Kyx8GSDTC8dmqiTpyKbgbRlRweDkNRJ4MMWHr19g05jDR4WnFPCQjBg6jlyEUSAFzEinGBdmKip+gfXOSNEgAa4GMBKD8HjsydjigYehdxTCGLEhUYADAS9dGfFEdbgv5Nok0RZFBxBO6CTmLI0sBEwNyGUkeLIzSmTaQnUBPTsBrV64ZvNvFivhoazr08nlrHVHnOdwvnVSOooGI5dL1PZ7MPNMokpwchFlQBoBGJ0vmFGBmVK4aoSFVHZRkGVsytsUrY8JazMedgpwACN9xZSUgjHFqCrVYsaZlIU9iSHANuL1G4eeyM5FgLE5vvRCWKywM4j4IIosl9MA7oRchrK2ZMUYUViXHhVWeqR0ihGegkq4aVmN9IwAXl8qylW1q6H9BNMuxE+XS8geAzqLWMjtlhEPMHvFS2rKCEzsp2UzBTMlGCqgcBaBGBLGqjUNZndGa7qYYrcYU7UJtLik1eOVlFG4wAqGPFV8fnHhZQFIqQujmouiVKHYaWQ+azBSneHF+ouUHWzqhqQYSKNL0O/93u+5ObkPeRZFfvSjH1lqbwi0AzxgEeITvl4VtBsQ+jkPZVcPKr3Q8UinGBZCQU5OFfNwgWJUQ166QDydUT0jCSMpRQEE3mY1RSgEYStgSugpqmrFWOiQaI0dXxnZK4Zi+vb5PLICYDCw+HiXMYy1C8YF4AvgTS9H+rikiF/9oiyRO7cVoLcaPh+t5pallaEjqdnsYlnwQ6bXgjMzxZpgBlAVZjzwHRH7rcZbECF7I4FsuSjwUQEzApcOVatKcSfm8jQFWBdaK5FpUSwAUVWGHlOQ51UAcnpRGeXSFzbC24rhFOW4VDkdW0YtmDrobQCcipSIV3lG5CqBN5oSBUtBKS/OhAWmevDkZbHJ6aSqKPAySkSPJwWYxMBCNxIVgtW+1AKVDVZ3WibIYYwAhNf9a4th4ZUXZwvOxTL5rueIRCxrBNA1ZtdDiaoZhp2wWx9tYlZY68lLJwBGLpUzSmGks9MpvEY6i4wVH+1NEpZgMOzVJpauHlMSc7SmKomzFLztuoMdsKqMpq3GgtnpxLVIPTC6xqYArioscFtAUo8wt//0v/13i4K0AFC6+HgZo4Ap3vJJBsOyRYva04xOWqNoC6w4UcLrKqUqhXDZXh0bgJAKIKb4eRHCE5wAMZcFP0IlWUHnDMWtkcX9VWxXh10OIdgguaS2ZDhLxEVkBGbEsBa1wVeha4FzjwhnAVYMJEVqgi07ALsUFGwELZfsQm69mk1Jyig8GDwjQBuCF6FAn1ZwJmhQRyzeM+ODvd/73vdsdxZPsXXt9kNXqh/3cLx81kqdGKRWCRLhfo8KxiJIJKqzC8bbBV2R2ftcrfucKB8SwlAlSlIhiVANmjXtulDZjK7ISNDqon7x8NZ4sSwUJcGo1khYtmUAIsqoZhiKdELoRonUs0dh1vOty12z1lgezkF7AKMMx1gUBjw++QE8JOO4PHplH9e5vqvkeC6bTS7FO+5GAoOngrmk4wrPOCnP2wWtrdWD1wXRmoUCE6IkIouDzl5hdBZe/CyouFA5WCxc0+MRU7AWP3xediG2gUNv0RgdBakp7Pjp9QipGHaVs5+S574S29ZjKgsSqWFEabYy6Gcx5u1/tSMFi5CKLykjJGGXGpt0PuHntWip27f4owW2PhqHN7ZoGmFvwSlViEoWY8UYyclzuaCDscAYcQoEJqpSrTpZCqnmdIBowUiEqJo2QlKMX/ziV7QA7wS04BTtdFCsVRmjNfJqPKqMdOm02YGIsxS1GRghmEVwWgHLaArQRlJhISkAphIByNhUMXRRxDoj5GX0go5KVE4gedlZhMOT2mcRRcpoWgrrFiEYY8JVbRVgChMbXUfs0WanlzQMTlnoLaMpsT3Ezt88xFwTXe7P0NEZgW4KPLAQLixG6Y3oRBHTAKKqTGIWI0uBtVEgvRAA65Xx5ihKuJWKB6YaYKTj7XhQlAFp2pmjPADG2qbY+rxT5fmzihGJkowCCUx6Ch3e2PoCYO68tXUiYayvCoBHSGenqBkVvW1KCWBsQd46P3ESiZGUWs3ql5eCp1JF+TZzJ4aPTDnbjX72cPbcebzjBkMXbk/b/aI+/fRjX2DBiNaNx6gMH6Ly6/KoLGmlqiR+nXqSTifA7F0mjG0X4aR0wpGYykhQCRdIB1B5goexKBYLCAAJE5IC49CYQnJRsGkcv17oFGOLicrKiMooli4w468+/kg4YTfKiJAudhiu+5lrsp7X1u1O3tiiwuZJqqXQNdoWQeXuARZ2i6/3mMNM4lM8ng4cexZRHVBTeU1l6dDUNQtR5FAcQWJK4DWSQmcnFKmJklxrkNMV7HLZ7Qe+vcpeCIsuBKocITs9UYmpBjOGVwXmUrObApfaqNqWdytUSR0BM4YRRQcu9nQ2p2SfCuji3tkNnxe+XAWyd3TijJxOIdG2huqptZLytisKLASeF54Am2ZvP9tv0uHhBaPzkoz1K0Q9p6R5ERuJo6zB7lsUiwYvNRJgUjvqEUUnEcLQ8a89FxKxRg93JGWk4wGLTThFoJFOYEgVmgaWMaPuYhDiGNne7FPPPPabNWRPYRfuzwQUIqqx5VrmMhpJNUjdosWGkFgZNSfsALJbMX3VYAyyEBZIa0IHRpjgUQPkHLBDO21P9Wc5ONhDsJumG4MJYURqpDPSUYsystiOjBQhLI3AUhTIuxJbmIwwSRkjxNNydCGrW3gYtKREwHXhsq5zZyx8hXVNVKopEcslEMOKvPVFiQqyvE2N1bbpTvILSfXAw9C5sBFgUqDyUgB8rQE9jIRuIozEi7oSeeOM0SH3pgcw15p/+2++74O93//+910aWHhdkIftlc/K33r09sO7X52He8Slx574+x//vbza1LsQZ6MFUYDtyKjrWjNq01QImBD7Wxn2FmEXKIpMrnP5YIRUEgUSf23mFX5qm0PfAsJkAaBYGZYNkSUjO6UoRrHwSgU2cpnyOrLBMMRfXtMSmdIhK5vCcjOjZ1e+NyNjRzBvJWnHInTm1EuHUtLK4MUpJH52a1h5ygbGwGKJFq/OagPDCSN7JCxc9GiNyuBFDmbswS+Fq0PGjrkKq6GxLiqGBSddVF5RWQo3tmKUjjhC4GrmEoVQJUSg0ZQAg4U8zsseFt6US166HUJQSaHNEzS7yw50r2Jv0SgEXpZSqEGIKVFGAMb4wzAiNKICo7Bb50JEVU/gAhutp/LgZa+wwne6zMWWpbEiD36cfvNGFrm8tmypLJZvnpniX06zYOFj9heuLSZvnNtXYGMw5752FAOAYQ9HZyV7yKKaIsxYX8KV56zXL0714PSaE93J/vrV/G2VOCLIBRZrxEbA8BC0GHYfCjElXI03O+VlN25fyqhThE4NU1GyhGQsHF5SdhhGeiQ9AphHEFiMGwZaZGHsLGs0TdaIlGxu9ixgKg7GKws7i5ELef3QWXhrwJLJHsPNEY8Q6w5JYAgjXSCXFCxC6MThZDFFSOgkvGlIBdAxdE0ZxutD4wJNhdArsuLxCGEkvEQ6AIqRy1Qgb8YDmSESdphgLPO7TjdWLK8UPfo4N6pHnjb5PK8vSvJOdF/oBwOw56dwun63a49bfdzKq/ksDr/7FgBF4HR4RAGVB4OBzchySrtcK02JJeICa7mMwEYWK58Lc/ssNl73y3rcTvE4JdQZGF4shjLqSBSXUVI8hPfmrq0YIxhBiA2GjspUm73ZLwxjUndgOKUzpWvYgVSnI8eiAGMivPIYIZMaUR7O5U8RBR+YRVXZ1bZ6/GDY4PVVrptsLCpkKWPT8IwYSC78LPgDux9YW8YWk52OCgOdsXpkdwbREyF5AawDZg+6PV+H8cySaBYJgUclF8VITOFbjepsLASDxzdOKxjMGIQo0jGKSiOQ9EM/dRKYONWpJGBTFXLRec8CXB4IcmUBYBeCkLRhhAMI7CqcXi46JQBaIVuSsnGybDqwLQyMC4CF/TTldj67MbzHx4pRiX1YIOQmFUhcPJbQFKfw1uH4P99aBdo/qopNa5UqC3BdYMNQ7OpZTK2MWN7GtZTUFElRWlB5SWWRd126hm/6G3lvLhSXQCRGOlp1Wn+6MuhqQMWiEa+Q4zSF32opNklTCrzeC8E2PHvaqAZjDdBLI7JgdkKvdJEw6Iz07PFmZ1Q0CVCUKSUAXVS0wMSU8DLyWghKYErL3VoE2yguy2GK3CjElG7TlFF4gRS05GY6XXe7OvkvdyZ6GCExV1JGI1lyGElNidT0nbJEG17qMC21QK8iA2uBHhjS1IWDeHHfR6b8tLw7liM3+HP4IetCrOuC88RFwb2N3c3Ju/u89Od25ZB/9mwedVoNgUIwtxHVWY+8lAiNSIAhp4fTVBktYEaBSEyJh2x0ItAIz1un7atS8HKhJRRGY1HA4RUs1rIojyKXvEiqGRhMlCOlYNK2RkU/HHN6EPimXETI/Xvzl0W3/7Hf3F2+A/P2XNbbhZAEgyh1WgfdycJohTGox46qPEaBYCh3QbgqOBcebFapMujAGbGxt2KoGIVMeeemG4a3QAAiV3iKHmuTHcbGIMpAUv28kQDg3HowWMAOKEALHqZ0G64Rvculd+EIaxZbjXi0y14Wsck0cJ4Kz+a4vpoNxmuMXEhg2VnQGtVgJLyQBE/dVZ4pF3FcjAACjXSKUT1ZkMTPSJRRikN/2SrYHFxeCqHwqofu0Bh3TbYkJGtnpJNJcFJYHyFi7YEYwkMqpkYC339w+duV4nlz0csexhQ5L+kihrNmWejAzhfMlEoVeCq6LFoMm9rR5K1IIWpzlSiFgk+eWXlKnB1uJMhZYCjB1JOCBEwUQCXRt2sYibhcms5euDyYqE4u5DEYTW1gYEabU65Sl7Q6Y748VKkCARC1HWgr4IIxvQkoX3h23hV4FZjqRFcVV0oWe67XRsuIeQGWRpSpESdZjEXvTKtaTcLghFFDukB4Ogns/GSxZMoQGFIIQD2yu9ZXW0Z2gLI4aU07HmXMXsFGU/wwSRZ6VMYVRl4iY/x0hxOASwEU7XvcQX7wgx94G4X3zzCCSVEW9VAkdX5a4fpyZ7Kersg+WeXvUrZy7Qh88uxJ67N1yo4TiTEBoxhFsXeGqKRjxygWSe3LCFwxXKJIvYNx0QMbuUon1lSU8BTkvEKQtCCtj5ERnmBThmk8FFEET7QAjjJv+LnHPJsXJ1kI5sk1tU9q4JgpvL4Al+C3O7OUGqZKLK/AeFhIJxUqMsHXo68kZRAZK8lYnV0p8JRiSjnFyEKaYqYXqxFKlhI1CnfoXWSFaBPe1JGiC4GxsKJ4qwrAFBU7S4RKat3ARJUdM6PHQ7lcyOD1gsG5w2jaGLlcFIQrmwsVWqNwpbIDVx6wYqoNAAMvsaRclDWaVoBqD2SOZniY9g97ZXAx7orVVGxchMW0FNnhK8aUl8SAhMIol8brkbHe2YmkhIKZS4QrrTf3CNSyY+1KoiOcwrnB5CKUyI0pSGAIV4RgFCPjeoFZjIxowwzFDQlgJJIa46TErIXWGYNVhWFBaPP3EyTIKhUMwHR6O6mrJCpjgWLpRFTVtmFYTKMyYnC8jOQmT8tYoNE0XbiVr2Y6pXYol/rhcPERuhGCZDSWNZhpmcSrW4nGAjXZQYIpvKXhBY6Zi5gmXJFTEjD8wRZJQYhN1BU4x4+YxrBTGHp7GtsmWqWlgRHIWF/OfPxLwkgKSZGoxinBOjy8psqrsIrHbBr54hktEbFiZM86uiPRMyR3KbcoT6f8dcq9p3NGIlkEYlDnFx49MpURhR5csNyofEXFX/zFX1C8gKOeysBsv/ghMLlYZmuei5rRNQ5VFaKt/vqVrvprwbJQWFJ4nZPwGJQkF5dEm5SXBdWuSStwkzMLgCjIEimPBZWpXJ02SkUIpgX2BBWkRSAsdFW1jFyMRrLksw7ntrS1ySWv7xLkkgsLACpG4QLpwHqEpHTlpbduvGHgSY1gaJ0xoG3BIU0JJRhdCCQGIYdghqYCHfSKOXETmGLUpilmi2NsEVRFF1h3ptPRdVvCxFAidkh6R1MZLMK7Rjig1SkKgLDonZEuChKe0IvNrvgSFW4aRiPspkSIafbG7EIIHRKGa6eymxalL14w9bC3PSBLYbQ4lYfHFFIs0RoXS+FCghlZ6rHwMNandcDAUl9gBF54dRZrpT2hOj+WMLQOASmFM1peYDVwoWIX5QRioRMK2vSlbQpJAFqESLjAVGi0FQECGwODtQlZNqPsplFJlz3awn+Dp2nr03GR0VRIgoGdhagESTCBhJ6AmTpeYO1YU+BGGGyOjoKTeBhbCiEY4sfgegUw35YvUjMrpdw6au9SwjmEU+b1IQYw3Sq4bmKgy+2YCZdPNfTsRvnYJcYJSW/pkbQQeNjBgLcrGJymjAiNimFkoRhvdiW8aXgABQRjwZywyC4dQnaLwL6dXlHzr20HHKeQYKai5DLWBYwpMaWDBSgWXgulo1grsAAK8Irf7//+7//O7/yOXAIthbFOwwDronAu7/f74INfeLXPVd1i+gaKP//zPxfbmgDUCLwl8wsrFIVZN6m5CKWWGeFbb7rTh9d7himacyoaXe0dUt9D4umWjDoy4vR5CR/R97e0DrfnPI6Ga7tqpbNNtdBlpTVRHvKeTSqg4xIAvl2hX+0z4pdIkf3lg06qFk9U6oZnN4UkEol96We1XvlZmbce3BseMHmNkurfhQ+PjIxT+TlYWRjBkHAJpIhA3uaUyzQ2+izWOfkZpdaXFjwhtSOf+LudS+G5wagHLSo8ShVIl5eRnqDiwmzEBkAh7IwEmK5yS8prbWVUpMJMeUkrxlIUZgq7cLoRkmAwrbvatHOIF5A9X+eVtxpa1XmR7c286R8biQobXT26o8MDl7HDqnetql4nkKWj1FRVTYfXrlWiNgwswXCKKh1LfcFASio1L72DTtnLt0qUJIVewLgIGOYq3LVihzz+UW7WyeMscRz4z4+AaMgL1F65V4KHj97e5lI7Xy80m/rVnPteusdfdhVWqinX9HndZhQZj2GiJCUVZiQqqZjs1pNRIu1ouXC0nUeoJGI8NJddLbupBeSlyCLc5sSAGSG7laGP0a97HwYj4SIUXoFS06WTQggGOoHhUg+YqVySVhsXpXDZ/SmUJYyxXgR6bO3RuSjISuWC77BS1Cw1KnYH1wNxyNv/4T/+Fw6V8ZFiOODEUEpmNCV+Om/Gs6xbWVGqIagUISs2U58r4jUVhZMxAMVZp1DTNh8YsWIlNXIRSLsNZ3VKaooZIa9CjJgZhdSkQ2L3ICdcssPwvpq23njQKJarYoSY0jEb1VC1dBina1cHDFtneBYiKqoK9m6zeoeRK0KADmeVKHUubT4X9aX3vvzeF3ztrBf9IOHZ1eNN6mgdJBgFCNcRRW0/+ek//PVf/9Vf/uVfffjhPzq4aCUiFoR+6818Kougskstj18+VBiAUqvHxYfgVAxhtBJG+lvz47q3X71xwffbWm8ePnjn3v35+Y9PH3/84rlz18/B+UvQHQx+jMxWUJ5A5KJY/MSiX5iiy2XUggWQnV5qP36jKVtKyDX13ADCWwGVi+Iysjs93K7EYhBCYbQ+Og2sC7FR5aU7rlJMzLk0+BfGwhrFEhbLxeJA+GsDey1QiBTIuaw5RS5tsleG06Z0ALwTcLrTlwPk93s++uRjl7VCZh9+8b1PHj/Wl6SYjZpyrRFualSJkmRRjxfiAERldBdhtxRgMNIRUa2ekWBDLlZ5p5ZplkBaHCOMWCMetO+88ygXvILVKda1o8sfQPw1O1U9f+Kjfg/vzTaTyCMtbE4usJVeVkUram5UDpOXWs9NzoMGxYjVFDwXGNFmC/vsxXwUiQUzgJbh9QtAMXKxYzDSz4Vk9KiMMLxFaTkXu8MkBRq63o1cBN7IZcyYxUjYGU+Uy5cVm5uTCySy68tJY/FIzEp2+e4gqiGxpDaDj4sYFYyNBcY6w5sGk71NXl6LoHijRtp47AB5hcCTS45z+vCaSgFPgZTCD4JoHAnwaWQ6Io6d7BUADxn53XmUORWWVAHsKncgHBFReLBt3t70CCMLMK89QwBkYQypmFo2hYE3FkLnah3AKjIlHno8SoJUsxubv3cMQyc8UmGEiWxKOmPe4zm/oHe2yIIXoyuZkrx0+ygAC2EBU41VHu8RdoUW6JkuL8yBj117AfCkU7ZCr0GagpUFrVjLLQTGCqLNJdZlmKVpK5JudGyEVJ5AFlQJZJIRDwDk1lblvLPh7szLfRZ6jWLhvbjnJFcbr9GDDgfbR4TdqBD6Hj/h7Mqwk7yvzztNTcWKUphwrl999KFHwR9//CsXTZzKdAqdFZ3LsUPrdoVNj8RzI5vB5d4NhiCpHbciOouMdtH1mNMdhtceErvnKHKW4q252WN+8tm88fSt2+eR+xtbZX4hEkAuNcJ4gqH3S9S9uXbIhR9GlUaYkTtvPPNQG1dLROcFFstPJ9PYdYtTEl7pIButzBBeqxeV7sTzp0jVUHyVL6MVsFydWsVWEsvkOrfGKt+aY9MFpEQKsLZV6OThxVNVs4w9eHz2/J8ezwsMs/JvzzuY5nfn7sxncYAdxB5tOP953JYEYlYDEdKK0TMK2dRxSiojnVJselMMAol+q5NenQGMBIOj6UqkHs06ZIqhAOMUW1/p8J5jv3ZZe/Pag4J3337oF6ZViEFTtdMiDPIcMgye0Rt99vTO2SHs8xVhRwTyODdVK5HsxEVfwQRkrqbngm61J+60IESFxrq24cWaEkqpuWAU00pSEBphHEMK2Im4DCxSb6xwIiOL40Nx17DM9mBb0uOcwz8lWYHAp/x5QHCyXMLrC49FxiMRACRFa1seGHsFdNBtjBahqFyieI2mstR4oxrYgcNv/W4np9TPr5CmkSsDOCp4CnE4nB1z6biuni2hEgcrC51XUvUD+0OdkhSDNmRlAHSOmCK3UDDSdSglYocRxW6aHRUBkwWbCrt0V2pIdveqmpo9Db3VmxaPWoJSAlBYiH9hIqLAEAqpRAoLgFFgH4M9gWxTaHgWTiGlDs+rUJbNGDiLkKZDe9bdFH54z24G0BUS20KTMABc7ITi6yh52xYwOhUOA+kEphOw1pQRhi67Ok0FkmoQ2/qWzlSKcb+aPeHv+6Ighbv4G1i+8U3Pmn67N0Q8ffbEb4D+1he/ZiexSOoRhNG7JExtII/OXBEUgNYtqgcXPXdx3NlPI7NFnJsS3b83z9PVT1eMi7XzzU7Df+v2vO7fCmjErQZMjZVnJACyWz4/6wlqGkBUgSwtAhg8L4u+nL8BGLV8Ag1zHI3nhP+1hzsKbq0o8KfOOSLhUU01Z2dP4vOqhZEFG2Woj0itEuqBX3Ygy/R7vgMFuRQEs8ol4iVyFTVHhpzU7KoSQjFWYczCcTrWAnn9cZG9SiCtg43FwsVYSAx+gNuDgXJNovOAlIvQFVYuAMzHP2cEr+wUXnZFAphizrjTMgKo0MiLhJHeNKSx8mLIZUEgswPI2FR2UyMYC7Fuz10Cz93lzTz4vByySsJA/MKtkQved14Z4cd+Kq8AIzuj1ETNdCR5KWqgs4OZKgAsLx1hGbOIBaNLevjm6Tg9Cy+phfAwLEIIpDEwzuSmlysjGLvCXKkrzHRdjg4pL6NEMU+O6+VIXi6xONvq3QYAAhtLHW3j1hankXCtnS5qY1OM6rl753J7AyZgjC0mnYJKdjoRQhibGum2OkXlwTbvqeL2k6dzfhUIUMFcrkim5eKlM8ajDDqjaQAjYzDL29FhEeU2z0volp2rkurFdPYEH66tzBTISCqIgh2LVwboQghXSg1koYutuPUKYTcS3lPPFKQIAgZvpOMMadxphDCU6gHOqC5Ii0UAegS3ebnACIt18VIW3e5pujCLUMFllMI0y4m+XBnpauaFrwwjEmM1C5nb71l3Fol02qMSMI+pIX3t7He/+11Utq975MPz7PDHP/4xi0C/PuVCFqH7FobuZ3g06KnPrOAciunaUHaBXprjQMLOSHcd9qQ/KuFC6Cr35EoKGOBDs1eN27PLzsGtwUMyh6lLiXBSCgo7XYUUSYUY6d218ZteLZf7nyg/woGNHb4a6HiG+iy7KTudMn2djGgZV6aL8zIdOwAY1yzKkaZc2lQewCzdWRNGtEYCa1QlhRGM1B2wo0YneVl48dhglKYxoDO9e+/yPNjxEjWvrb16/elnjzGbthMqQ7gKTwlToUNj2poY8wYodmurZlNKB3Tqv97t6NiyIJHRKhGAQihopRJrK6rKRJtCXKGMouAZIU2JqYP84Dw9MvV7lqJ04YEU5UBmPefrrCI03QYAABN5SURBVM5Rm1xzbZxNyGscmL/tHN20eqSzjBJZpSfP5gAhtJiMVs8ieIhNEVXNMool2IApXBReYwVrBAM5wMsjJ7CQYGTKO7Fg7PSMBzVTigop8fAqhkjBztiyU1CxK4wOVkjlsSvG66um+jIVJRxY44wF4pROOBHuaoAHpgKi4nIc5SLwACs4K/gQfH5+IQcORpGCDkkgRZkGMDJaZ8wKtvjK2/YFbrVqK7y9bZsRUQ6fBiFNyxVzKYSYVjZlBZ7oVEbdiYVkaVnEClGnFYDBz8vVKl0uGXwC4JDS6yoEe3Sty527c8dmzIudEoaeCwMjqrGcMimkKC5V0hP+JWTpMcjWs1FxQhIhkNXpZ8RZrLLiGWOTwpSYghEYPdsyjsGGM26nvT7jPOlZvKX04hsvEiPC08GlQfVHi4oAEBYwa9chhxerHe+k8H4E3y0ryqsoXuVzx4IBFtvxkMvNySuEstcLpBocS8x4wIBfvHzmNLCoLjqM6pelLl6+mEuVBWDntU8cGV6pX7+Z5y58HCwv51o3N7+p9tWsGCOxriySNteOKEiBV4BUHc9fO2ST9givf1+eB+PtY0eJhR3DKezO7fvzDkCiAHb8klI6XcFMTzETYimsQHh1xs/OEhtL9VNOO/NWAieVKLHV0BoWC3wTP6nPAvLCR6JrIovKjcTOIQpGbv/M+p6LgjLUb5NN7Ovr8p6OigV7/OSzOhIlNbsoIzbKFHCECwAPI51NXhjIABTCmFIgnVcUuyixBI9Ax5G9OoEJABhiurKn8iPsxz+nDMDBXC4fj977gj+RMnMN7K05F2S0Po6X7AKnhvOHQoEw1w0ynZoS96vkcF/4S22i4JoNHIZXC+rfFmAcR4sPENJIZ5dIVfD0dVHOYljMaap0jNXMYpOYKoyeFOuAnOM/N3tRXDAs+ANgwMzOQmJgVFuNcEGWXWH1Xp2MpsAAyBlZclWDsVKjAoufvbzhC2chdEfZziTq9OvAlc1VOrHqdIVh15Rp4Uby6eN5tYMLnuI1HjsHT5UrIxiqzsQvPHpvjso5WYwCucRaz4o5zs+fAnW9RYJcGYWgTcQCVKdwbC6DKmRRgEqqttNZyKX6ggXcFEZpjBnp6MSbWkRTSl1ht1iMwEbTJeznYESJZeSKrZroLKiMBODRoy/EYMpbiOWrT1HbsNQCLQEkXUutchUK5O1I8C7AEywMm0IsvED3CTp7265FVDOwWHYwoiRgU8z4WYxysR//Wx+8/74HHZ42Iaw7eMfS36hcPbsoe61P2Q6Ge8///vgj71zH4P4EzyU1Kodtu9MaACmLg+7KGXmrBCnLKeDXDqgjps5nzy9PZPVSkQ6W8Dh3RMLtnS5cGmQ/yzaLXGDrQ89lyqUjiiPgv6JaGSRJRjrwvE/xvDUU2NqytJiqihYzo1EUAaPHQ0nPyAtJeNkp7NisHgs2K4w2fpZaDgx/6GfQI1ibk50Cw4itqlCx4HG8HCMAssbhP9vMK76OpqRErOPoIu4HSqyPeMzYyi68AsBYUFkxkpGF1B1kDbIoyQhDGLGlQILpDg9MW6UKSxSbkQgxKgle5XThtUkZ6gMwMoLZw/J8Om+wmL/sesMFgWxxWJLqETLKXKku54UCGG0oeAzsYgkS1TpSHiN6vKBsOi+8x4uyW2oLwoJ/2VBxmTaWDiBOC75dU8Ak1aNzDjKwwAoW0oLHn5GXOBoCydHnckwBUNKmjp9R5bzx64heXuDreTFXecbsQoCNXazgSeEs2NKVxy7ElF267Ac+A5f6wSjYVrjSubSA0LSVVA8eEsbKxIO5nQDvwkXEOhy8NkmxatjTisJOROF3RmAA3nqsA8tmVyEJoxjkhCJESSlGIgSnCuGxUSDxG7lYhMyF2+7hYyKmqPmUsmkYQU2VYvugNjWypGQRTllwXjws6Y2FDNUNsKRgBAlAU4BKoqiQiCJ42I1grbUs1lQ4i3D6FhYVQFT3vBvBiXHNTvd/Wd1g3FSwzds8r1cxb9Sdw2BZVHXKRTt753Si+Qm/PiBSHvO3v/UNVJ5C9TjFn52kVp03CbtI+vu7g+QrAI1Pnvhav196X4J0zlLfnOS3Ezsr9IhWwSd29hxmTc3hHM/ldmtqcu/uvBdIW2CAKkxUDaCpZ8/n7XYysiNxlBxxzGJZTtSJ9Ett58/douC5dOr0otSp7akMazLFeDvs+euCyzQv5Ofrc/7C0eXJZkEV2yvxzpazWVl0Jx3BrxingaowJwqqfQoA2K5JFkXuTsgyq3yeiunUIwM8FrP1ZAEuKfAKZjzOPfyMRrq8oii8Z9PNqSEWUgpFpuMXAmNkYQ9mjF+Hb17ccseCKYRdGZiBXV4Zgds2keBHG2cWeomAWZqyKJWwYDCNB9Vs1+vj92XYqEM1z8YUIFxHuaKKbXjPQak81T59MWcWsL9Dm94smJ14FLI8vrgyQSKR0bMrFoV1CGoHybgU73OqezKe5wEIrX9Z6K2JKRJlo1QnaRGMMHiMMVMIchiKqJS86ewaGdwRGN6ELlYiombGvNqXhb6r136rESGQ1SyFfVi4Rkyd3XYgvHCXBSMjNqNAhMTDU5ZEUbkoLBXQeKnyHHFeRmPHEVLePrOcHQl8ucqiQnjnGmTeBw/nqwZ4LYhi3B5cMUzrCw9YiWrQ72OBAW8KXuKM0yCldHQiBNIyClEeS8tIjwRAJewtHb01LxeqWlMPXcZ5scUECEU+OuEWc7PcGnDZZuQiLHSJSUgkBbKEUSEjr07CFx4gvdWBh+yyFTKq8gq/mUWp9XnOgllcJBjsEkgusRSwoqoWzAaydTTIy8jLSPF9EEKEe3zBqAzFWEdTJKZ0IbK0EbvtryVOPOcB6YS7XgsURbTJAuO7A+xX5XknhUo85Xry6WMf8rVZI1QYkg6KqO2dwqWwuzPMrX2XGh6S96Sa7cIyoHNQqoEerauk98o4f2IWCE/OagxGd4fn8mCFRSJedko8jNeQ+Urc2T/3Zz+REzsnoRCjtP6TxZQAWPpOXe1z2eXseudC205gZxRFsYz0qu0Q17h+iRAZY8YArzUtI+ww4a/+UpQFWzKF3brVajhApuwwCJEYK8lIpMNfLzCKIUN4HktJBODAWRAYU3+7+uTxp589fWLHQIote/ySUuqFIsRIlC0XhSsvl9gKo/PSq6QoSVVbbK7CeUmxRiLWVZexXWFaYdawpItngbH43oDtlO+i40MpjEN6xPpUoe0mIxuSSxvm18ucx3rYMDjujIfz8i4b2Z8+n1OM4ioJpn46ng4ufkZRLFbmMMxuJCw1xSvEeipgluxcbbgY27SQSWVXZyQxB4aX/Ve/+jhCRkgwI11qO0T7MGoDZjddWHl55cKg7PYVmMevKmljBAOgRE4BxkOqkzeAEQk7WjzHfLl5KMa0VeLKi9DyGblYDuVwSmGTaEEZuRijbZVclKRwaLylSzG+mxTelM5OgJFIuscFPzZ2SMcOIQWmZeQVRZeOAmmteNUmxGaofjBRXCyL5wJDaBQiVuWmwLf/03/+s/nnCGVFGrxF1qTSp7e351EJFhEAxBSYiF0jnYXrxdN5aUuThMKoODKR5/CwiMIsZPAvJgvXEjJKLZyFPSQLEYKDhYs4tHYGxbXJW8NNz/6el5WBxSJ5dm6flHiMerGTOmcQqk3Gsx6z7UQVS4FkKRanY2xleClGudyEvvnN35bXbQkMj2PgW5HwN8WA3xHqIE1rz+aJIK+MvAjpYXgrQGoCRnqfsM8/8XbFtySKqWD7QXjX6OPVgmc1c+ttN+DxEyTbu7UHK9xNB+b5q7nwEblEyQKsNWL7ygKMRJ0Cuc5TsreePZ9viGAXqEU18Ip18gpESwfglUN5pniseZ8y9ljBgkB2clYScps4HnmRHPLLjpdCebMgZ1sjx6kAJOXKywIpVjpgDPAsyIVM49fHAVwCjXuAwLLgxCOc4vWrCMtinHuRHTjPwOfgjqUT7M0tX8o9P6l1FlMNSGAk1azWKoax1EgwK0/LlLyS2sxiWdDyztY5ry+ZskuXQKIiFAKDIUx4/Ad5uSVzWe1qzkUXglMW+rDMtW4+tRDMW3Hy4knhJv2BGpz+4HywAc+KFwO55NJ4pVKE6wjtvQdzOQYuI6Ni8HjPkdUGYycsdDzWmJ4R2DLitGI1rilU+kVCzuG4XIhM1QDguLu8trUA2D3E3G3j2QULr3R4ALic4JRZj3M1cCLwolJGqcsutWLAyCzFEUb9SgqDmZ2FjoEOSdGIksTmMmWMs43NIhcjgEec/aFBLMJqiESRX/3KfHMbNi9N82rB/qFgUCq7srWzi+Ydy5plxCacXV5gOhHOy1V271t+5+35/sxKLTuM7ABICKV+KylLqySvlrsLWhutoULCS+cSyy4dUQbRLxd7azXd8pk0P8gLevWbivhqZVyd5TfCN0TRuYAXzyuvaSTKooBV1uIzmiaFy8VOYu7ZG+M2go1uFeCtuFgW6yKEhWIFLQq7hWbhZcFAwlAqiRcPYyQyQroj2vcutbLgh3EYHAN7iN0frSLhLR0jNhsFxvF2LLEpg5FSajqwvAiNnc94krLTYbyMPMg7ozuQAi2wXAjBkpCHUHfTPotGUipMFopdISOlAoSUsdF0RS4hrTkjPZf9JvbV61klXnbZFMOL1rnJS3ir0JLR1Wm0/naqEC5lsNDzAsRg+i9K9XBVkhRCSsS460CvTrB0LjphN3rAfOq8/E3bKtkb6unUciAUZrqx7RxTuTq1LveHc0xDttQ+LKxIL3Z1u6o8GcNUQHXWtXrQCiHIM2YxisqiWkiBKXgiqaRWAMC+MqqQJTyGEzKrxKJIIgotxeaMkKvslPM+fPPL35zcrso1rl+XLMbnr+bhF1gjxYGhW0mHOzt+GS8n3ZVHbA02wiTZywurSIG6tkpNubBVQLBr6PybhZc4lEZRwvFQeCOJjddPH9iTXRmcuW0DJS0+BmNGqcuCh5FOIW4SyNdFgXQINnVItEuepZGxRE0RhlSnI2XqamOsux2B9ah+49K2Q4DDG2+6fPAGJwbpdC0LPABhx2NaSCNySCKk3o3EQWEpJELNdojZqwEnErHVKd3JM2tLAaMEKBfkTitjHnDBlZ6y7l0mlpsCD1wUe7GF34StrpOFiUrCl4JOIVGhhC9Fgcb1slsLU0YNnB4uZw5X54M1AvCgoLWjq8FoLYgPRLmhv31/HoM/e/lEM7wPzt9XfWvDrICv4XBazpk6pbx6/kI17m/2B/FswDv9PHnyKNu3H3368tU7777z5S9+ycv6X3rvi1//ylc/+pTnl7/4xfwZTArXC3+UUpunFN5sOC+tvJrHFDrwf2spuzoBWhl9TWHnC+tqX0mKJ63PqyfnEM4FZ3ZMLutwLgsD8R8ZnvPNFF7hmD9AXR/ZyeWlB7GWQhk2kjrp8IlG5jJ1Pj0zuqV4PT957HUg49wcvW9Z6mMfV89Em7o5qc4X9vgjx737r95SwbnxO8QO4rlSf3J90Vw6x8tZrQyrpOWqqgwtmKr2UtY/+4fLQtW+8eyEy/0SVjjj1Hmee7EAG9ei5RYcjB0bKWmYfYbKXiA7cbz2rO66aSln4/l+9/NqEpJ4pCD3vJP2etYJr9R4jJgjF5JipNdONTeViwuhUSI87ABWjyVhSWLmBUsAcvVveDAChvbqvTyqBeDq2ZXbVVOHW2EIhaihENPeyH7I3ng1hf1m0nJZRlkGfIQSlS9UoSMsxRyG6wOjSNBuFAwSgTaMNbdnmrLUAiRMgYxoEVsiJPAEzEqy0BnVEqezoFhTAGFdl5EQXlJftWmEia2O1t4U3hIRtCXaqkJml06/LJIGEG6KvCiA6eGMKqkGSMwAkRjZYYgTip1osEqMrVL8MKaEgtDtCr8aWFpPzFyWCJ6SIAGedubhxyWXLGKJWA/fkejaKAQ4UQaqqg3Ma8peYIkq9RQ+A1cWY0aWCSx3vq0s5V8cl5GX3pjyL+IVvfnKHWyrVId1aY0o1g2McckZK1JgRVMyIvf8xGIBswSgs9vQFC4h252lJIuk8wKzWGijkJWqjafNBOOhjVf2hMjVEyb3MJhIPC7/2ttfc0+yaRx7h8ToWTmwQLmk0DhmqRmJDwawl9S0Tk2Pc4aKF5iLwtgo0LrZRYdhIz5X4im2mpHYXfBJnDezWK3NXlWNv7GSYYRrhG4keOj45eqAXho7FC2REDMjEWJ9lNdmEK6qqjXKSCif93ND48KTARUFUgoMdBbT9FPS5Y8upQagyO6AenN6JAjhHTJGXrcrukNMOmTxONYCuTTILlagEN/7LilMbKVQz2Agrg8bpSBgygtsrP4C2YUQikS8kdiBSMSyE9kP0+dntXA8CR1YLAy9cYqYMi6nCST+BEYWI8GMv5C9XZnCexQCxlvNYznidlUuo7Vjj8o4dv/duOXkHftlHYaQ0TSLUZ2VwZ4sIRe89QFzFIymqqIs8hR1GeDZTYyn/Vkli+lutPYySgHmwaWXT/B3CjMCS1QLkCwCjbUghL7CRcq9RkhS5VxITNVMshuBBbJQIMPQHSBeLrpYAM/5bE7rw85SrJEI9BORYsEEmoKJpeuowuLnMuV9+WrWnzEGI53A0/Hwlp3FUvgYMqlCMLmqcCuhkPKCRRgDvb5MMRtJ+JNzBpYwRi7TjLJQ/j9nGvDexVYwcAAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Lets create a prompt.\n",
"\n",
@@ -278,7 +182,7 @@
"import requests\n",
"from PIL import Image\n",
"\n",
- "from sglang.srt.conversation import chat_templates\n",
+ "from sglang.srt.parser.conversation import chat_templates\n",
"\n",
"image = Image.open(\n",
" BytesIO(\n",
@@ -312,96 +216,7 @@
"execution_count": null,
"id": "14",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Loading safetensors checkpoint shards: 0% Completed | 0/50 [00:00, ?it/s]\n",
- "Loading safetensors checkpoint shards: 2% Completed | 1/50 [00:22<18:10, 22.26s/it]\n",
- "Loading safetensors checkpoint shards: 4% Completed | 2/50 [00:44<17:44, 22.17s/it]\n",
- "Loading safetensors checkpoint shards: 6% Completed | 3/50 [01:06<17:24, 22.22s/it]\n",
- "Loading safetensors checkpoint shards: 8% Completed | 4/50 [01:28<16:55, 22.07s/it]\n",
- "Loading safetensors checkpoint shards: 10% Completed | 5/50 [01:50<16:28, 21.96s/it]\n",
- "Loading safetensors checkpoint shards: 12% Completed | 6/50 [02:11<15:59, 21.80s/it]\n",
- "Loading safetensors checkpoint shards: 14% Completed | 7/50 [02:34<15:52, 22.14s/it]\n",
- "Loading safetensors checkpoint shards: 16% Completed | 8/50 [02:54<15:05, 21.57s/it]\n",
- "Loading safetensors checkpoint shards: 18% Completed | 9/50 [03:17<14:51, 21.74s/it]\n",
- "Loading safetensors checkpoint shards: 20% Completed | 10/50 [03:29<12:31, 18.79s/it]\n",
- "Loading safetensors checkpoint shards: 22% Completed | 11/50 [03:32<09:10, 14.13s/it]\n",
- "Loading safetensors checkpoint shards: 24% Completed | 12/50 [03:36<06:53, 10.89s/it]\n",
- "Loading safetensors checkpoint shards: 26% Completed | 13/50 [03:39<05:19, 8.65s/it]\n",
- "Loading safetensors checkpoint shards: 28% Completed | 14/50 [03:43<04:15, 7.09s/it]\n",
- "Loading safetensors checkpoint shards: 30% Completed | 15/50 [03:46<03:29, 6.00s/it]\n",
- "Loading safetensors checkpoint shards: 32% Completed | 16/50 [03:50<02:57, 5.23s/it]\n",
- "Loading safetensors checkpoint shards: 34% Completed | 17/50 [03:53<02:35, 4.73s/it]\n",
- "Loading safetensors checkpoint shards: 36% Completed | 18/50 [03:57<02:18, 4.33s/it]\n",
- "Loading safetensors checkpoint shards: 38% Completed | 19/50 [04:00<02:06, 4.09s/it]\n",
- "Loading safetensors checkpoint shards: 40% Completed | 20/50 [04:04<01:56, 3.87s/it]\n",
- "Loading safetensors checkpoint shards: 42% Completed | 21/50 [04:07<01:48, 3.74s/it]\n",
- "Loading safetensors checkpoint shards: 44% Completed | 22/50 [04:11<01:43, 3.71s/it]\n",
- "Loading safetensors checkpoint shards: 46% Completed | 23/50 [04:14<01:37, 3.63s/it]\n",
- "Loading safetensors checkpoint shards: 48% Completed | 24/50 [04:18<01:33, 3.60s/it]\n",
- "Loading safetensors checkpoint shards: 50% Completed | 25/50 [04:21<01:26, 3.45s/it]\n",
- "Loading safetensors checkpoint shards: 52% Completed | 26/50 [04:21<01:02, 2.61s/it]\n",
- "Loading safetensors checkpoint shards: 54% Completed | 27/50 [04:25<01:06, 2.91s/it]\n",
- "Loading safetensors checkpoint shards: 56% Completed | 28/50 [04:28<01:07, 3.09s/it]\n",
- "Loading safetensors checkpoint shards: 58% Completed | 29/50 [04:32<01:07, 3.20s/it]\n",
- "Loading safetensors checkpoint shards: 60% Completed | 30/50 [04:35<01:05, 3.25s/it]\n",
- "Loading safetensors checkpoint shards: 62% Completed | 31/50 [04:39<01:02, 3.30s/it]\n",
- "Loading safetensors checkpoint shards: 64% Completed | 32/50 [04:42<01:00, 3.37s/it]\n",
- "Loading safetensors checkpoint shards: 66% Completed | 33/50 [04:46<00:58, 3.45s/it]\n",
- "Loading safetensors checkpoint shards: 68% Completed | 34/50 [04:49<00:55, 3.45s/it]\n",
- "Loading safetensors checkpoint shards: 70% Completed | 35/50 [04:53<00:51, 3.45s/it]\n",
- "Loading safetensors checkpoint shards: 72% Completed | 36/50 [04:56<00:48, 3.46s/it]\n",
- "Loading safetensors checkpoint shards: 74% Completed | 37/50 [05:00<00:44, 3.45s/it]\n",
- "Loading safetensors checkpoint shards: 76% Completed | 38/50 [05:03<00:41, 3.45s/it]\n",
- "Loading safetensors checkpoint shards: 78% Completed | 39/50 [05:07<00:38, 3.50s/it]\n",
- "Loading safetensors checkpoint shards: 80% Completed | 40/50 [05:10<00:34, 3.49s/it]\n",
- "Loading safetensors checkpoint shards: 82% Completed | 41/50 [05:14<00:31, 3.49s/it]\n",
- "Loading safetensors checkpoint shards: 84% Completed | 42/50 [05:17<00:27, 3.47s/it]\n",
- "Loading safetensors checkpoint shards: 86% Completed | 43/50 [05:20<00:24, 3.43s/it]\n",
- "Loading safetensors checkpoint shards: 88% Completed | 44/50 [05:24<00:20, 3.46s/it]\n",
- "Loading safetensors checkpoint shards: 90% Completed | 45/50 [05:27<00:17, 3.44s/it]\n",
- "Loading safetensors checkpoint shards: 92% Completed | 46/50 [05:31<00:13, 3.44s/it]\n",
- "Loading safetensors checkpoint shards: 94% Completed | 47/50 [05:34<00:10, 3.43s/it]\n",
- "Loading safetensors checkpoint shards: 96% Completed | 48/50 [05:38<00:06, 3.43s/it]\n",
- "Loading safetensors checkpoint shards: 98% Completed | 49/50 [05:41<00:03, 3.45s/it]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Setting sliding_window_size to be attention_chunk_size: 8192Setting sliding_window_size to be attention_chunk_size: 8192\n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Loading safetensors checkpoint shards: 100% Completed | 50/50 [05:44<00:00, 3.43s/it]\n",
- "Loading safetensors checkpoint shards: 100% Completed | 50/50 [05:44<00:00, 6.90s/it]\n",
- "\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Setting sliding_window_size to be attention_chunk_size: 8192\n",
- "Setting sliding_window_size to be attention_chunk_size: 8192\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Capturing batches (bs=1 avail_mem=21.53 GB): 100%|██████████| 35/35 [00:15<00:00, 2.25it/s] \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from sglang.test.test_utils import is_in_ci\n",
"\n",
@@ -424,15 +239,7 @@
"execution_count": null,
"id": "15",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The image depicts a man ironing clothing on the back of a yellow SUV in a city street, with another yellow taxi passing by. The man is wearing a yellow shirt and appears to be ironing a blue shirt on a makeshift ironing board set up behind the SUV. The scene suggests that the man may be a street vendor or someone who is trying to make a living by providing ironing services to people on the go.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"if not is_in_ci():\n",
" out = llm.generate(prompt=conv.get_prompt(), image_data=[image])\n",
@@ -452,22 +259,7 @@
"execution_count": null,
"id": "17",
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0eae2e36d07d42b89bc4b5ac7d62f226",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Loading checkpoint shards: 0%| | 0/50 [00:00, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"if not is_in_ci():\n",
" # Compute the image embeddings using Huggingface.\n",
@@ -488,16 +280,7 @@
"execution_count": null,
"id": "18",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "processed_prompt[\"pixel_values\"].shape=torch.Size([5, 3, 336, 336])\n",
- "The image depicts a man ironing on a makeshift ironing board set up on the back of a yellow SUV, in the middle of a busy street. The man is wearing a yellow shirt and appears to be ironing a blue shirt. In the background, there are other yellow taxis and tall buildings, suggesting that the scene is set in a city, likely New York City. The overall scene is one of a person going about their daily activities in a busy urban environment.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"if not is_in_ci():\n",
" processed_prompt = processor(\n",
diff --git a/docs/basic_usage/deepseek.md b/docs/basic_usage/deepseek.md
index 9522bba6a40..8a71696f5b0 100644
--- a/docs/basic_usage/deepseek.md
+++ b/docs/basic_usage/deepseek.md
@@ -5,9 +5,9 @@ SGLang provides many optimizations specifically designed for the DeepSeek models
This document outlines current optimizations for DeepSeek.
For an overview of the implemented features see the completed [Roadmap](https://github.com/sgl-project/sglang/issues/2591).
-## Launch DeepSeek V3 with SGLang
+## Launch DeepSeek V3.1/V3/R1 with SGLang
-To run DeepSeek V3/R1 models, the requirements are as follows:
+To run DeepSeek V3.1/V3/R1 models, the recommended settings are as follows:
| Weight Type | Configuration |
|------------|-------------------|
@@ -104,7 +104,7 @@ Overall, with these optimizations, we have achieved up to **7x** acceleration in
-**Usage**: MLA optimization is enabled by default. For MLA models on Blackwell architecture (e.g., B200), the default backend is FlashInfer. To use the optimized TRTLLM MLA backend for decode operations, explicitly specify `--attention-backend trtllm_mla`. Note that TRTLLM MLA only optimizes decode operations - prefill operations (including multimodal inputs) will fall back to FlashInfer MLA.
+**Usage**: MLA optimization is enabled by default. For MLA models on Blackwell architecture (e.g., B200), the default backend is FlashInfer. To use the optimized TRTLLM MLA backend for prefill and decode operations, explicitly specify `--attention-backend trtllm_mla`.
**Reference**: Check [Blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/#deepseek-multi-head-latent-attention-mla-throughput-optimizations) and [Slides](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/lmsys_1st_meetup_deepseek_mla.pdf) for more details.
@@ -153,7 +153,7 @@ python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3 --tp 8 --tru
The precompilation process typically takes around 10 minutes to complete.
### Multi-token Prediction
-**Description**: SGLang implements DeepSeek V3 Multi-Token Prediction (MTP) based on [EAGLE speculative decoding](https://docs.sglang.ai/backend/speculative_decoding.html#EAGLE-Decoding). With this optimization, the decoding speed can be improved by **1.8x** for batch size 1 and **1.5x** for batch size 32 respectively on H200 TP8 setting.
+**Description**: SGLang implements DeepSeek V3 Multi-Token Prediction (MTP) based on [EAGLE speculative decoding](https://docs.sglang.ai/advanced_features/speculative_decoding.html#EAGLE-Decoding). With this optimization, the decoding speed can be improved by **1.8x** for batch size 1 and **1.5x** for batch size 32 respectively on H200 TP8 setting.
**Usage**:
Add arguments `--speculative-algorithm`, `--speculative-num-steps`, `--speculative-eagle-topk` and `--speculative-num-draft-tokens` to enable this feature. For example:
@@ -167,9 +167,9 @@ python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V3-0324 --spec
- Set `--cuda-graph-bs`. It's a list of batch sizes for cuda graph capture. The default captured batch sizes for speculative decoding is set [here](https://github.com/sgl-project/sglang/blob/49420741746c8f3e80e0eb17e7d012bfaf25793a/python/sglang/srt/model_executor/cuda_graph_runner.py#L126). You can include more batch sizes into it.
-### Reasoning Content for DeepSeek R1
+### Reasoning Content for DeepSeek R1 & V3.1
-See [Separate Reasoning](https://docs.sglang.ai/backend/separate_reasoning.html).
+See [Reasoning Parser](https://docs.sglang.ai/advanced_features/separate_reasoning.html) and [Thinking Parameter for DeepSeek V3.1](https://docs.sglang.ai/basic_usage/openai_api_completions.html#Example:-DeepSeek-V3-Models).
### Function calling for DeepSeek Models
diff --git a/docs/basic_usage/gpt_oss.md b/docs/basic_usage/gpt_oss.md
index 777b518f570..240463ec4d6 100644
--- a/docs/basic_usage/gpt_oss.md
+++ b/docs/basic_usage/gpt_oss.md
@@ -1,3 +1,114 @@
# GPT OSS Usage
Please refer to [https://github.com/sgl-project/sglang/issues/8833](https://github.com/sgl-project/sglang/issues/8833).
+
+## Responses API & Built-in Tools
+
+### Responses API
+
+GPT‑OSS is compatible with the OpenAI Responses API. Use `client.responses.create(...)` with `model`, `instructions`, `input`, and optional `tools` to enable built‑in tool use.
+
+### Built-in Tools
+
+GPT‑OSS can call built‑in tools for web search and Python execution. You can use the demo tool server or connect to external MCP tool servers.
+
+#### Python Tool
+
+- Executes short Python snippets for calculations, parsing, and quick scripts.
+- By default runs in a Docker-based sandbox. To run on the host, set `PYTHON_EXECUTION_BACKEND=UV` (this executes model-generated code locally; use with care).
+- Ensure Docker is available if you are not using the UV backend. It is recommended to run `docker pull python:3.11` in advance.
+
+#### Web Search Tool
+
+- Uses the Exa backend for web search.
+- Requires an Exa API key; set `EXA_API_KEY` in your environment. Create a key at `https://exa.ai`.
+
+### Tool & Reasoning Parser
+
+- We support OpenAI Reasoning and Tool Call parser, as well as our SGLang native api for tool call and reasoning. Refer to [reasoning parser](../advanced_features/separate_reasoning.ipynb) and [tool call parser](../advanced_features/function_calling.ipynb) for more details.
+
+
+## Notes
+
+- Use **Python 3.12** for the demo tools. And install the required `gpt-oss` packages.
+- The default demo integrates the web search tool (Exa backend) and a demo Python interpreter via Docker.
+- For search, set `EXA_API_KEY`. For Python execution, either have Docker available or set `PYTHON_EXECUTION_BACKEND=UV`.
+
+Examples:
+```bash
+export EXA_API_KEY=YOUR_EXA_KEY
+# Optional: run Python tool locally instead of Docker (use with care)
+export PYTHON_EXECUTION_BACKEND=UV
+```
+
+Launch the server with the demo tool server:
+
+`python3 -m sglang.launch_server --model-path openai/gpt-oss-120b --tool-server demo --tp 2`
+
+For production usage, sglang can act as an MCP client for multiple services. An [example tool server](https://github.com/openai/gpt-oss/tree/main/gpt-oss-mcp-server) is provided. Start the servers and point sglang to them:
+```bash
+mcp run -t sse browser_server.py:mcp
+mcp run -t sse python_server.py:mcp
+
+python -m sglang.launch_server ... --tool-server ip-1:port-1,ip-2:port-2
+```
+The URLs should be MCP SSE servers that expose server information and well-documented tools. These tools are added to the system prompt so the model can use them.
+
+### Quick Demo
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="http://localhost:30000/v1",
+ api_key="sk-123456"
+)
+
+tools = [
+ {"type": "code_interpreter"},
+ {"type": "web_search_preview"},
+]
+
+# Test python tool
+response = client.responses.create(
+ model="openai/gpt-oss-120b",
+ instructions="You are a helfpul assistant, you could use python tool to execute code.",
+ input="Use python tool to calculate the sum of 29138749187 and 29138749187", # 58,277,498,374
+ tools=tools
+)
+print("====== test python tool ======")
+print(response.output_text)
+
+# Test browser tool
+response = client.responses.create(
+ model="openai/gpt-oss-120b",
+ instructions="You are a helfpul assistant, you could use browser to search the web",
+ input="Search the web for the latest news about Nvidia stock price",
+ tools=tools
+)
+print("====== test browser tool ======")
+print(response.output_text)
+```
+
+Example output:
+```
+====== test python tool ======
+The sum of 29,138,749,187 and 29,138,749,187 is **58,277,498,374**.
+====== test browser tool ======
+**Recent headlines on Nvidia (NVDA) stock**
+
+| Date (2025) | Source | Key news points | Stock‑price detail |
+|-------------|--------|----------------|--------------------|
+| **May 13** | Reuters | The market data page shows Nvidia trading “higher” at **$116.61** with no change from the previous close. | **$116.61** – latest trade (delayed ≈ 15 min)【14†L34-L38】 |
+| **Aug 18** | CNBC | Morgan Stanley kept an **overweight** rating and lifted its price target to **$206** (up from $200), implying a 14 % upside from the Friday close. The firm notes Nvidia shares have already **jumped 34 % this year**. | No exact price quoted, but the article signals strong upside expectations【9†L27-L31】 |
+| **Aug 20** | The Motley Fool | Nvidia is set to release its Q2 earnings on Aug 27. The article lists the **current price of $175.36**, down 0.16 % on the day (as of 3:58 p.m. ET). | **$175.36** – current price on Aug 20【10†L12-L15】【10†L53-L57】 |
+
+**What the news tells us**
+
+* Nvidia’s share price has risen sharply this year – up roughly a third according to Morgan Stanley – and analysts are still raising targets (now $206).
+* The most recent market quote (Reuters, May 13) was **$116.61**, but the stock has surged since then, reaching **$175.36** by mid‑August.
+* Upcoming earnings on **Aug 27** are a focal point; both the Motley Fool and Morgan Stanley expect the results could keep the rally going.
+
+**Bottom line:** Nvidia’s stock is on a strong upward trajectory in 2025, with price targets climbing toward $200‑$210 and the market price already near $175 as of late August.
+
+```
diff --git a/docs/basic_usage/native_api.ipynb b/docs/basic_usage/native_api.ipynb
index 33dffea7451..3221b4deffb 100644
--- a/docs/basic_usage/native_api.ipynb
+++ b/docs/basic_usage/native_api.ipynb
@@ -43,7 +43,7 @@
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --host 0.0.0.0\"\n",
+ " \"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --host 0.0.0.0 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")"
@@ -267,7 +267,7 @@
"embedding_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-1.5B-instruct \\\n",
- " --host 0.0.0.0 --is-embedding\n",
+ " --host 0.0.0.0 --is-embedding --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -316,7 +316,7 @@
"reranker_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path BAAI/bge-reranker-v2-m3 \\\n",
- " --host 0.0.0.0 --disable-radix-cache --chunked-prefill-size -1 --attention-backend triton --is-embedding\n",
+ " --host 0.0.0.0 --disable-radix-cache --chunked-prefill-size -1 --attention-backend triton --is-embedding --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -376,7 +376,7 @@
"\n",
"reward_process, port = launch_server_cmd(\n",
" \"\"\"\n",
- "python3 -m sglang.launch_server --model-path Skywork/Skywork-Reward-Llama-3.1-8B-v0.2 --host 0.0.0.0 --is-embedding\n",
+ "python3 -m sglang.launch_server --model-path Skywork/Skywork-Reward-Llama-3.1-8B-v0.2 --host 0.0.0.0 --is-embedding --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -441,7 +441,7 @@
"outputs": [],
"source": [
"expert_record_server_process, port = launch_server_cmd(\n",
- " \"python3 -m sglang.launch_server --model-path Qwen/Qwen1.5-MoE-A2.7B --host 0.0.0.0 --expert-distribution-recorder-mode stat\"\n",
+ " \"python3 -m sglang.launch_server --model-path Qwen/Qwen1.5-MoE-A2.7B --host 0.0.0.0 --expert-distribution-recorder-mode stat --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")"
@@ -466,7 +466,19 @@
"print_highlight(response)\n",
"\n",
"response = requests.post(f\"http://localhost:{port}/dump_expert_distribution_record\")\n",
- "print_highlight(response)"
+ "print_highlight(response)\n",
+ "\n",
+ "import glob\n",
+ "\n",
+ "output_file = glob.glob(\"expert_distribution_*.csv\")[0]\n",
+ "with open(output_file, \"r\") as f:\n",
+ " print_highlight(\"\\n| Layer ID | Expert ID | Count |\")\n",
+ " print_highlight(\"|----------|-----------|--------|\")\n",
+ " next(f)\n",
+ " for i, line in enumerate(f):\n",
+ " if i < 9:\n",
+ " layer_id, expert_id, count = line.strip().split(\",\")\n",
+ " print_highlight(f\"| {layer_id:8} | {expert_id:9} | {count:6} |\")"
]
},
{
diff --git a/docs/basic_usage/openai_api_completions.ipynb b/docs/basic_usage/openai_api_completions.ipynb
index 9d8a9a52f11..6b967709fca 100644
--- a/docs/basic_usage/openai_api_completions.ipynb
+++ b/docs/basic_usage/openai_api_completions.ipynb
@@ -36,7 +36,7 @@
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --host 0.0.0.0\"\n",
+ " \"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --host 0.0.0.0 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
@@ -78,6 +78,153 @@
"print_highlight(f\"Response: {response}\")"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Model Thinking/Reasoning Support\n",
+ "\n",
+ "Some models support internal reasoning or thinking processes that can be exposed in the API response. SGLang provides unified support for various reasoning models through the `chat_template_kwargs` parameter and compatible reasoning parsers.\n",
+ "\n",
+ "#### Supported Models and Configuration\n",
+ "\n",
+ "| Model Family | Chat Template Parameter | Reasoning Parser | Notes |\n",
+ "|--------------|------------------------|------------------|--------|\n",
+ "| DeepSeek-R1 (R1, R1-0528, R1-Distill) | `enable_thinking` | `--reasoning-parser deepseek-r1` | Standard reasoning models |\n",
+ "| DeepSeek-V3.1 | `thinking` | `--reasoning-parser deepseek-v3` | Hybrid model (thinking/non-thinking modes) |\n",
+ "| Qwen3 (standard) | `enable_thinking` | `--reasoning-parser qwen3` | Hybrid model (thinking/non-thinking modes) |\n",
+ "| Qwen3-Thinking | N/A (always enabled) | `--reasoning-parser qwen3-thinking` | Always generates reasoning |\n",
+ "| Kimi | N/A (always enabled) | `--reasoning-parser kimi` | Kimi thinking models |\n",
+ "| Gpt-Oss | N/A (always enabled) | `--reasoning-parser gpt-oss` | Gpt-Oss thinking models |\n",
+ "\n",
+ "#### Basic Usage\n",
+ "\n",
+ "To enable reasoning output, you need to:\n",
+ "1. Launch the server with the appropriate reasoning parser\n",
+ "2. Set the model-specific parameter in `chat_template_kwargs`\n",
+ "3. Optionally use `separate_reasoning: False` to not get reasoning content separately (default to `True`)\n",
+ "\n",
+ "**Note for Qwen3-Thinking models:** These models always generate thinking content and do not support the `enable_thinking` parameter. Use `--reasoning-parser qwen3-thinking` or `--reasoning-parser qwen3` to parse the thinking content.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Example: Qwen3 Models\n",
+ "\n",
+ "```python\n",
+ "# Launch server:\n",
+ "# python3 -m sglang.launch_server --model Qwen/Qwen3-4B --reasoning-parser qwen3\n",
+ "\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "client = OpenAI(\n",
+ " api_key=\"EMPTY\",\n",
+ " base_url=f\"http://127.0.0.1:30000/v1\",\n",
+ ")\n",
+ "\n",
+ "model = \"Qwen/Qwen3-4B\"\n",
+ "messages = [{\"role\": \"user\", \"content\": \"How many r's are in 'strawberry'?\"}]\n",
+ "\n",
+ "response = client.chat.completions.create(\n",
+ " model=model,\n",
+ " messages=messages,\n",
+ " extra_body={\n",
+ " \"chat_template_kwargs\": {\"enable_thinking\": True},\n",
+ " \"separate_reasoning\": True\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "print(\"Reasoning:\", response.choices[0].message.reasoning_content)\n",
+ "print(\"-\"*100)\n",
+ "print(\"Answer:\", response.choices[0].message.content)\n",
+ "```\n",
+ "\n",
+ "**ExampleOutput:**\n",
+ "```\n",
+ "Reasoning: Okay, so the user is asking how many 'r's are in the word 'strawberry'. Let me think. First, I need to make sure I have the word spelled correctly. Strawberry... S-T-R-A-W-B-E-R-R-Y. Wait, is that right? Let me break it down.\n",
+ "\n",
+ "Starting with 'strawberry', let's write out the letters one by one. S, T, R, A, W, B, E, R, R, Y. Hmm, wait, that's 10 letters. Let me check again. S (1), T (2), R (3), A (4), W (5), B (6), E (7), R (8), R (9), Y (10). So the letters are S-T-R-A-W-B-E-R-R-Y. \n",
+ "...\n",
+ "Therefore, the answer should be three R's in 'strawberry'. But I need to make sure I'm not counting any other letters as R. Let me check again. S, T, R, A, W, B, E, R, R, Y. No other R's. So three in total. Yeah, that seems right.\n",
+ "\n",
+ "----------------------------------------------------------------------------------------------------\n",
+ "Answer: The word \"strawberry\" contains **three** letters 'r'. Here's the breakdown:\n",
+ "\n",
+ "1. **S-T-R-A-W-B-E-R-R-Y** \n",
+ " - The **third letter** is 'R'. \n",
+ " - The **eighth and ninth letters** are also 'R's. \n",
+ "\n",
+ "Thus, the total count is **3**. \n",
+ "\n",
+ "**Answer:** 3.\n",
+ "```\n",
+ "\n",
+ "**Note:** Setting `\"enable_thinking\": False` (or omitting it) will result in `reasoning_content` being `None`. Qwen3-Thinking models always generate reasoning content and don't support the `enable_thinking` parameter.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Example: DeepSeek-V3 Models\n",
+ "\n",
+ "DeepSeek-V3 models support thinking mode through the `thinking` parameter:\n",
+ "\n",
+ "```python\n",
+ "# Launch server:\n",
+ "# python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3.1 --tp 8 --reasoning-parser deepseek-v3\n",
+ "\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "client = OpenAI(\n",
+ " api_key=\"EMPTY\",\n",
+ " base_url=f\"http://127.0.0.1:30000/v1\",\n",
+ ")\n",
+ "\n",
+ "model = \"deepseek-ai/DeepSeek-V3.1\"\n",
+ "messages = [{\"role\": \"user\", \"content\": \"How many r's are in 'strawberry'?\"}]\n",
+ "\n",
+ "response = client.chat.completions.create(\n",
+ " model=model,\n",
+ " messages=messages,\n",
+ " extra_body={\n",
+ " \"chat_template_kwargs\": {\"thinking\": True},\n",
+ " \"separate_reasoning\": True\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "print(\"Reasoning:\", response.choices[0].message.reasoning_content)\n",
+ "print(\"-\"*100)\n",
+ "print(\"Answer:\", response.choices[0].message.content)\n",
+ "```\n",
+ "\n",
+ "**Example Output:**\n",
+ "```\n",
+ "Reasoning: First, the question is: \"How many r's are in 'strawberry'?\"\n",
+ "\n",
+ "I need to count the number of times the letter 'r' appears in the word \"strawberry\".\n",
+ "\n",
+ "Let me write out the word: S-T-R-A-W-B-E-R-R-Y.\n",
+ "\n",
+ "Now, I'll go through each letter and count the 'r's.\n",
+ "...\n",
+ "So, I have three 'r's in \"strawberry\".\n",
+ "\n",
+ "I should double-check. The word is spelled S-T-R-A-W-B-E-R-R-Y. The letters are at positions: 3, 8, and 9 are 'r's. Yes, that's correct.\n",
+ "\n",
+ "Therefore, the answer should be 3.\n",
+ "----------------------------------------------------------------------------------------------------\n",
+ "Answer: The word \"strawberry\" contains **3** instances of the letter \"r\". Here's a breakdown for clarity:\n",
+ "\n",
+ "- The word is spelled: S-T-R-A-W-B-E-R-R-Y\n",
+ "- The \"r\" appears at the 3rd, 8th, and 9th positions.\n",
+ "```\n",
+ "\n",
+ "**Note:** DeepSeek-V3 models use the `thinking` parameter (not `enable_thinking`) to control reasoning output.\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -144,75 +291,6 @@
" print(chunk.choices[0].delta.content, end=\"\")"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Enabling Model Thinking/Reasoning\n",
- "\n",
- "You can use `chat_template_kwargs` to enable or disable the model's internal thinking or reasoning process output. Set `\"enable_thinking\": True` within `chat_template_kwargs` to include the reasoning steps in the response. This requires launching the server with a compatible reasoning parser.\n",
- "\n",
- "**Reasoning Parser Options:**\n",
- "- `--reasoning-parser deepseek-r1`: For DeepSeek-R1 family models (R1, R1-0528, R1-Distill)\n",
- "- `--reasoning-parser qwen3`: For both standard Qwen3 models that support `enable_thinking` parameter and Qwen3-Thinking models\n",
- "- `--reasoning-parser qwen3-thinking`: For Qwen3-Thinking models, force reasoning version of qwen3 parser\n",
- "- `--reasoning-parser kimi`: For Kimi thinking models\n",
- "\n",
- "Here's an example demonstrating how to enable thinking and retrieve the reasoning content separately (using `separate_reasoning: True`):\n",
- "\n",
- "```python\n",
- "# For Qwen3 models with enable_thinking support:\n",
- "# python3 -m sglang.launch_server --model-path QwQ/Qwen3-32B-250415 --reasoning-parser qwen3 ...\n",
- "\n",
- "from openai import OpenAI\n",
- "\n",
- "# Modify OpenAI's API key and API base to use SGLang's API server.\n",
- "openai_api_key = \"EMPTY\"\n",
- "openai_api_base = f\"http://127.0.0.1:{port}/v1\" # Use the correct port\n",
- "\n",
- "client = OpenAI(\n",
- " api_key=openai_api_key,\n",
- " base_url=openai_api_base,\n",
- ")\n",
- "\n",
- "model = \"QwQ/Qwen3-32B-250415\" # Use the model loaded by the server\n",
- "messages = [{\"role\": \"user\", \"content\": \"9.11 and 9.8, which is greater?\"}]\n",
- "\n",
- "response = client.chat.completions.create(\n",
- " model=model,\n",
- " messages=messages,\n",
- " extra_body={\n",
- " \"chat_template_kwargs\": {\"enable_thinking\": True},\n",
- " \"separate_reasoning\": True\n",
- " }\n",
- ")\n",
- "\n",
- "print(\"response.choices[0].message.reasoning_content: \\n\", response.choices[0].message.reasoning_content)\n",
- "print(\"response.choices[0].message.content: \\n\", response.choices[0].message.content)\n",
- "```\n",
- "\n",
- "**Example Output:**\n",
- "\n",
- "```\n",
- "response.choices[0].message.reasoning_content: \n",
- " Okay, so I need to figure out which number is greater between 9.11 and 9.8. Hmm, let me think. Both numbers start with 9, right? So the whole number part is the same. That means I need to look at the decimal parts to determine which one is bigger.\n",
- "...\n",
- "Therefore, after checking multiple methods—aligning decimals, subtracting, converting to fractions, and using a real-world analogy—it's clear that 9.8 is greater than 9.11.\n",
- "\n",
- "response.choices[0].message.content: \n",
- " To determine which number is greater between **9.11** and **9.8**, follow these steps:\n",
- "...\n",
- "**Answer**: \n",
- "9.8 is greater than 9.11.\n",
- "```\n",
- "\n",
- "Setting `\"enable_thinking\": False` (or omitting it) will result in `reasoning_content` being `None`.\n",
- "\n",
- "**Note for Qwen3-Thinking models:** These models always generate thinking content and do not support the `enable_thinking` parameter. Use `--reasoning-parser qwen3-thinking` or `--reasoning-parser qwen3` to parse the thinking content.\n",
- "\n",
- "Here is an example of a detailed chat completion request using standard OpenAI parameters:"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
diff --git a/docs/basic_usage/openai_api_embeddings.ipynb b/docs/basic_usage/openai_api_embeddings.ipynb
index 9c7c99c0f19..26e95a4e7c1 100644
--- a/docs/basic_usage/openai_api_embeddings.ipynb
+++ b/docs/basic_usage/openai_api_embeddings.ipynb
@@ -33,7 +33,7 @@
"embedding_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-1.5B-instruct \\\n",
- " --host 0.0.0.0 --is-embedding\n",
+ " --host 0.0.0.0 --is-embedding --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
diff --git a/docs/basic_usage/openai_api_vision.ipynb b/docs/basic_usage/openai_api_vision.ipynb
index 3669f5ca6d3..88d1ef7ddf0 100644
--- a/docs/basic_usage/openai_api_vision.ipynb
+++ b/docs/basic_usage/openai_api_vision.ipynb
@@ -35,7 +35,7 @@
"\n",
"vision_process, port = launch_server_cmd(\n",
" \"\"\"\n",
- "python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct\n",
+ "python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
diff --git a/docs/basic_usage/qwen3.md b/docs/basic_usage/qwen3.md
new file mode 100644
index 00000000000..89295be60f8
--- /dev/null
+++ b/docs/basic_usage/qwen3.md
@@ -0,0 +1,27 @@
+# Qwen3-Next Usage
+
+SGLang has supported Qwen3-Next-80B-A3B-Instruct and Qwen3-Next-80B-A3B-Thinking since [this PR](https://github.com/sgl-project/sglang/pull/10233).
+
+## Launch Qwen3-Next with SGLang
+
+To serve Qwen3-Next models on 4xH100/H200 GPUs:
+
+```bash
+python3 -m sglang.launch_server --model Qwen/Qwen3-Next-80B-A3B-Instruct --tp 4
+```
+
+### Configuration Tips
+- `--max-mamba-cache-size`: Adjust `--max-mamba-cache-size` to increase mamba cache space and max running requests capability. It will decrease KV cache space as a trade-off. You can adjust it according to workload.
+- `--mamba-ssm-dtype`: `bfloat16` or `float32`, use `bfloat16` to save mamba cache size and `float32` to get more accurate results. The default setting is `float32`.
+
+### EAGLE Speculative Decoding
+**Description**: SGLang has supported Qwen3-Next models with [EAGLE speculative decoding](https://docs.sglang.ai/advanced_features/speculative_decoding.html#EAGLE-Decoding).
+
+**Usage**:
+Add arguments `--speculative-algorithm`, `--speculative-num-steps`, `--speculative-eagle-topk` and `--speculative-num-draft-tokens` to enable this feature. For example:
+
+``` bash
+python3 -m sglang.launch_server --model Qwen/Qwen3-Next-80B-A3B-Instruct --tp 4 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --speculative-algo NEXTN
+```
+
+Details can be seen in [this PR](https://github.com/sgl-project/sglang/pull/10233).
diff --git a/docs/basic_usage/send_request.ipynb b/docs/basic_usage/send_request.ipynb
index b53bd356037..6e457a02b12 100644
--- a/docs/basic_usage/send_request.ipynb
+++ b/docs/basic_usage/send_request.ipynb
@@ -34,7 +34,7 @@
"server_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct \\\n",
- " --host 0.0.0.0\n",
+ " --host 0.0.0.0 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
diff --git a/docs/developer_guide/bench_serving.md b/docs/developer_guide/bench_serving.md
new file mode 100644
index 00000000000..82f7aa2afe3
--- /dev/null
+++ b/docs/developer_guide/bench_serving.md
@@ -0,0 +1,334 @@
+## Bench Serving Guide
+
+This guide explains how to benchmark online serving throughput and latency using `python -m sglang.bench_serving`. It supports multiple inference backends via OpenAI-compatible and native endpoints, and produces both console metrics and optional JSONL outputs.
+
+### What it does
+
+- Generates synthetic or dataset-driven prompts and submits them to a target serving endpoint
+- Measures throughput, time-to-first-token (TTFT), inter-token latency (ITL), per-request end-to-end latency, and more
+- Supports streaming or non-streaming modes, rate control, and concurrency limits
+
+### Supported backends and endpoints
+
+- `sglang` / `sglang-native`: `POST /generate`
+- `sglang-oai`, `vllm`, `lmdeploy`: `POST /v1/completions`
+- `sglang-oai-chat`, `vllm-chat`, `lmdeploy-chat`: `POST /v1/chat/completions`
+- `trt` (TensorRT-LLM): `POST /v2/models/ensemble/generate_stream`
+- `gserver`: Custom server (Not Implemented yet in this script)
+- `truss`: `POST /v1/models/model:predict`
+
+If `--base-url` is provided, requests are sent to it. Otherwise, `--host` and `--port` are used. When `--model` is not provided, the script will attempt to query `GET /v1/models` for an available model ID (OpenAI-compatible endpoints).
+
+### Prerequisites
+
+- Python 3.8+
+- Dependencies typically used by this script: `aiohttp`, `numpy`, `requests`, `tqdm`, `transformers`, and for some datasets `datasets`, `pillow`, `pybase64`. Install as needed.
+- An inference server running and reachable via the endpoints above
+- If your server requires authentication, set environment variable `OPENAI_API_KEY` (used as `Authorization: Bearer `)
+
+### Quick start
+
+Run a basic benchmark against an sglang server exposing `/generate`:
+
+```bash
+python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct
+```
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --num-prompts 1000 \
+ --model meta-llama/Llama-3.1-8B-Instruct
+```
+
+Or, using an OpenAI-compatible endpoint (completions):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend vllm \
+ --base-url http://127.0.0.1:8000 \
+ --num-prompts 1000 \
+ --model meta-llama/Llama-3.1-8B-Instruct
+```
+
+### Datasets
+
+Select with `--dataset-name`:
+
+- `sharegpt` (default): loads ShareGPT-style pairs; optionally restrict with `--sharegpt-context-len` and override outputs with `--sharegpt-output-len`
+- `random`: random text lengths; sampled from ShareGPT token space
+- `random-ids`: random token ids (can lead to gibberish)
+- `random-image`: generates random images and wraps them in chat messages; supports custom resolutions via 'heightxwidth' format
+- `generated-shared-prefix`: synthetic dataset with shared long system prompts and short questions
+- `mmmu`: samples from MMMU (Math split) and includes images
+
+Common dataset flags:
+
+- `--num-prompts N`: number of requests
+- `--random-input-len`, `--random-output-len`, `--random-range-ratio`: for random/random-ids/random-image
+- `--random-image-num-images`, `--random-image-resolution`: for random-image dataset (supports presets 1080p/720p/360p or custom 'heightxwidth' format)
+- `--apply-chat-template`: apply tokenizer chat template when constructing prompts
+- `--dataset-path PATH`: file path for ShareGPT json; if blank and missing, it will be downloaded and cached
+
+Generated Shared Prefix flags (for `generated-shared-prefix`):
+
+- `--gsp-num-groups`
+- `--gsp-prompts-per-group`
+- `--gsp-system-prompt-len`
+- `--gsp-question-len`
+- `--gsp-output-len`
+
+Random Image dataset flags (for `random-image`):
+
+- `--random-image-num-images`: Number of images per request
+- `--random-image-resolution`: Image resolution; supports presets (1080p, 720p, 360p) or custom 'heightxwidth' format (e.g., 1080x1920, 512x768)
+
+### Examples
+
+1. To benchmark random-image dataset with 3 images per request, 500 prompts, 512 input length, and 512 output length, you can run:
+
+```bash
+python -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-3B-Instruct --disable-radix-cache
+```
+
+```bash
+python -m sglang.bench_serving \
+ --backend sglang-oai-chat \
+ --dataset-name random-image \
+ --num-prompts 500 \
+ --random-image-num-images 3 \
+ --random-image-resolution 720p \
+ --random-input-len 512 \
+ --random-output-len 512
+```
+
+2. To benchmark random dataset with 3000 prompts, 1024 input length, and 1024 output length, you can run:
+
+```bash
+python -m sglang.launch_server --model-path Qwen/Qwen2.5-3B-Instruct
+```
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --dataset-name random \
+ --num-prompts 3000 \
+ --random-input 1024 \
+ --random-output 1024 \
+ --random-range-ratio 0.5
+```
+
+### Choosing model and tokenizer
+
+- `--model` is required unless the backend exposes `GET /v1/models`, in which case the first model ID is auto-selected.
+- `--tokenizer` defaults to `--model`. Both can be HF model IDs or local paths.
+- For ModelScope workflows, setting `SGLANG_USE_MODELSCOPE=true` enables fetching via ModelScope (weights are skipped for speed).
+- If your tokenizer lacks a chat template, the script warns because token counting can be less robust for gibberish outputs.
+
+### Rate, concurrency, and streaming
+
+- `--request-rate`: requests per second. `inf` sends all immediately (burst). Non-infinite rate uses a Poisson process for arrival times.
+- `--max-concurrency`: caps concurrent in-flight requests regardless of arrival rate.
+- `--disable-stream`: switch to non-streaming mode when supported; TTFT then equals total latency for chat completions.
+
+### Other key options
+
+- `--output-file FILE.jsonl`: append JSONL results to file; auto-named if unspecified
+- `--output-details`: include per-request arrays (generated texts, errors, ttfts, itls, input/output lens)
+- `--extra-request-body '{"top_p":0.9,"temperature":0.6}'`: merged into payload (sampling params, etc.)
+- `--disable-ignore-eos`: pass through EOS behavior (varies by backend)
+- `--warmup-requests N`: run warmup requests with short output first (default 1)
+- `--flush-cache`: call `/flush_cache` (sglang) before main run
+- `--profile`: call `/start_profile` and `/stop_profile` (requires server to enable profiling, e.g., `SGLANG_TORCH_PROFILER_DIR`)
+- `--lora-name name1 name2 ...`: randomly pick one per request and pass to backend (e.g., `lora_path` for sglang)
+- `--tokenize-prompt`: send integer IDs instead of text (currently supports `--backend sglang` only)
+
+### Authentication
+
+If your target endpoint requires OpenAI-style auth, set:
+
+```bash
+export OPENAI_API_KEY=sk-...yourkey...
+```
+
+The script will add `Authorization: Bearer $OPENAI_API_KEY` automatically for OpenAI-compatible routes.
+
+### Metrics explained
+
+Printed after each run:
+
+- Request throughput (req/s)
+- Input token throughput (tok/s)
+- Output token throughput (tok/s)
+- Total token throughput (tok/s)
+- Concurrency: aggregate time of all requests divided by wall time
+- End-to-End Latency (ms): mean/median/std/p99 per-request total latency
+- Time to First Token (TTFT, ms): mean/median/std/p99 for streaming mode
+- Inter-Token Latency (ITL, ms): mean/median/std/p95/p99/max between tokens
+- TPOT (ms): Token processing time after first token, i.e., `(latency - ttft)/(tokens-1)`
+- Accept length (sglang-only, if available): speculative decoding accept length
+
+The script also retokenizes generated text with the configured tokenizer and reports "retokenized" counts.
+
+### JSONL output format
+
+When `--output-file` is set, one JSON object is appended per run. Base fields:
+
+- Arguments summary: backend, dataset, request_rate, max_concurrency, etc.
+- Duration and totals: completed, total_input_tokens, total_output_tokens, retokenized totals
+- Throughputs and latency statistics as printed in the console
+- `accept_length` when available (sglang)
+
+With `--output-details`, an extended object also includes arrays:
+
+- `input_lens`, `output_lens`
+- `ttfts`, `itls` (per request: ITL arrays)
+- `generated_texts`, `errors`
+
+### End-to-end examples
+
+1) sglang native `/generate` (streaming):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model meta-llama/Llama-3.1-8B-Instruct \
+ --dataset-name random \
+ --random-input-len 1024 --random-output-len 1024 --random-range-ratio 0.5 \
+ --num-prompts 2000 \
+ --request-rate 100 \
+ --max-concurrency 512 \
+ --output-file sglang_random.jsonl --output-details
+```
+
+2) OpenAI-compatible Completions (e.g., vLLM):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend vllm \
+ --base-url http://127.0.0.1:8000 \
+ --model meta-llama/Llama-3.1-8B-Instruct \
+ --dataset-name sharegpt \
+ --num-prompts 1000 \
+ --sharegpt-output-len 256
+```
+
+3) OpenAI-compatible Chat Completions (streaming):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend vllm-chat \
+ --base-url http://127.0.0.1:8000 \
+ --model meta-llama/Llama-3.1-8B-Instruct \
+ --dataset-name random \
+ --num-prompts 500 \
+ --apply-chat-template
+```
+
+4) Random images (VLM) with chat template:
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model your-vlm-model \
+ --dataset-name random-image \
+ --random-image-num-images 2 \
+ --random-image-resolution 720p \
+ --random-input-len 128 --random-output-len 256 \
+ --num-prompts 200 \
+ --apply-chat-template
+```
+
+4a) Random images with custom resolution:
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model your-vlm-model \
+ --dataset-name random-image \
+ --random-image-num-images 1 \
+ --random-image-resolution 512x768 \
+ --random-input-len 64 --random-output-len 128 \
+ --num-prompts 100 \
+ --apply-chat-template
+```
+
+5) Generated shared prefix (long system prompts + short questions):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model meta-llama/Llama-3.1-8B-Instruct \
+ --dataset-name generated-shared-prefix \
+ --gsp-num-groups 64 --gsp-prompts-per-group 16 \
+ --gsp-system-prompt-len 2048 --gsp-question-len 128 --gsp-output-len 256 \
+ --num-prompts 1024
+```
+
+6) Tokenized prompts (ids) for strict length control (sglang only):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model meta-llama/Llama-3.1-8B-Instruct \
+ --dataset-name random \
+ --tokenize-prompt \
+ --random-input-len 2048 --random-output-len 256 --random-range-ratio 0.2
+```
+
+7) Profiling and cache flush (sglang):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model meta-llama/Llama-3.1-8B-Instruct \
+ --profile \
+ --flush-cache
+```
+
+8) TensorRT-LLM streaming endpoint:
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend trt \
+ --base-url http://127.0.0.1:8000 \
+ --model your-trt-llm-model \
+ --dataset-name random \
+ --num-prompts 100 \
+ --disable-ignore-eos
+```
+
+9) Evaluating large-scale KVCache sharing with mooncake trace (sglang only):
+
+```bash
+python3 -m sglang.bench_serving \
+ --backend sglang \
+ --host 127.0.0.1 --port 30000 \
+ --model mode-name \
+ --dataset-name mooncake \
+ --mooncake-slowdown-factor 1.0 \
+ --mooncake-num-rounds 1000 \
+ --mooncake-workload conversation|mooncake|agent|synthetic
+ --use-trace-timestamps true \
+ --random-output-len 256
+```
+
+### Troubleshooting
+
+- All requests failed: verify `--backend`, server URL/port, `--model`, and authentication. Check warmup errors printed by the script.
+- Throughput seems too low: adjust `--request-rate` and `--max-concurrency`; verify server batch size/scheduling; ensure streaming is enabled if appropriate.
+- Token counts look odd: prefer chat/instruct models with proper chat templates; otherwise tokenization of gibberish may be inconsistent.
+- Random-image/MMMU datasets: ensure you installed extra deps (`pillow`, `datasets`, `pybase64`).
+- Authentication errors (401/403): set `OPENAI_API_KEY` or disable auth on your server.
+
+### Notes
+
+- The script raises the file descriptor soft limit (`RLIMIT_NOFILE`) to help with many concurrent connections.
+- For sglang, `/get_server_info` is queried post-run to report speculative decoding accept length when available.
diff --git a/docs/developer_guide/benchmark_and_profiling.md b/docs/developer_guide/benchmark_and_profiling.md
index 019805456c3..948c837ffaf 100644
--- a/docs/developer_guide/benchmark_and_profiling.md
+++ b/docs/developer_guide/benchmark_and_profiling.md
@@ -31,6 +31,7 @@
[Pytorch Profiler](https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html) is a convenient basic tool to inspect kernel execution time, call stack, and kernel overlap and occupancy.
### Profile a server with `sglang.bench_serving`
+
```bash
# set trace path
export SGLANG_TORCH_PROFILER_DIR=/root/sglang/profile_log
@@ -44,6 +45,8 @@ python -m sglang.bench_serving --backend sglang --model meta-llama/Llama-3.1-8B-
Please make sure that the `SGLANG_TORCH_PROFILER_DIR` should be set at both server and client side, otherwise the trace file cannot be generated correctly . A secure way will be setting `SGLANG_TORCH_PROFILER_DIR` in the `.*rc` file of shell (e.g. `~/.bashrc` for bash shells).
+For more details, please refer to [Bench Serving Guide](./bench_serving.md).
+
### Profile a server with `sglang.bench_offline_throughput`
```bash
export SGLANG_TORCH_PROFILER_DIR=/root/sglang/profile_log
diff --git a/docs/developer_guide/contribution_guide.md b/docs/developer_guide/contribution_guide.md
index db406a54470..f8e6f692da1 100644
--- a/docs/developer_guide/contribution_guide.md
+++ b/docs/developer_guide/contribution_guide.md
@@ -72,7 +72,27 @@ If you modify files protected by code owners, their approval is required to merg
- Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
- Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.
- Keep files concise. If a file exceeds 2,000 lines of code, split it into multiple smaller files.
-- Prioritize extreme efficiency. SGLang is a runtime, and most of your code runs on the critical path for every request. Optimize every minor overhead as much as possible.
+- Prioritize extreme efficiency. SGLang is a runtime, and most of your code runs on the critical path for every request. Optimize all minor overheads as much as possible, especially in the model forward code.
+ - A common pattern is some runtime checks in the model forward pass (e.g., [this](https://github.com/sgl-project/sglang/blob/f1b0eda55c2c4838e8ab90a0fac7fb1e3d7064ab/python/sglang/srt/models/deepseek_v2.py#L486-L491)). These are very likely the same for every layer. Please cache the result as a single boolean value whenever possible.
+- Strive to make functions as pure as possible. Avoid in-place modification of arguments.
+- When supporting new hardware or features, follow these guidelines:
+ - Do not drastically change existing code.
+ - Always prefer new files to introduce specific components for your new hardware (e.g., `allocator_ascend.py`).
+ - If you write multiple if/else blocks for new features, ensure the common path (e.g., NVIDIA hardware or the existing code path) is the first branch.
+
+## How to update sgl-kernel
+Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR).
+To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs.
+
+Follow these steps:
+
+1. Submit a PR to update the sgl-kernel source code without using it in sglang python package (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)).
+2. Bump the version of sgl-kernel (e.g., [#9220](https://github.com/sgl-project/sglang/pull/9220/files)).
+ - Once merged, this will trigger an automatic release of the sgl-kernel wheel to PyPI.
+ - If not urgent, you can wait for other people to release the wheel. A new version will typically be released within one week.
+3. Apply the changes:
+ - Update the sgl-kernel version in `sglang/python/pyproject.toml` to use the modified kernels.
+ - Update the related caller code in the sglang to use the new kernel.
## Tips for newcomers
diff --git a/docs/get_started/install.md b/docs/get_started/install.md
index 0517ba30a3c..e2e780e006f 100644
--- a/docs/get_started/install.md
+++ b/docs/get_started/install.md
@@ -12,20 +12,19 @@ It is recommended to use uv for faster installation:
```bash
pip install --upgrade pip
pip install uv
-uv pip install "sglang[all]>=0.5.0rc2"
+uv pip install "sglang[all]>=0.5.2"
```
**Quick fixes to common problems**
- If you encounter `OSError: CUDA_HOME environment variable is not set`. Please set it to your CUDA install root with either of the following solutions:
1. Use `export CUDA_HOME=/usr/local/cuda-` to set the `CUDA_HOME` environment variable.
2. Install FlashInfer first following [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html), then install SGLang as described above.
-- SGLang currently uses torch 2.8 and flashinfer for torch 2.8. If you want to install flashinfer separately, please refer to [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html). Please note that the FlashInfer pypi package is called `flashinfer-python` instead of `flashinfer`.
## Method 2: From source
```bash
# Use the last release branch
-git clone -b v0.5.0rc2 https://github.com/sgl-project/sglang.git
+git clone -b v0.5.2 https://github.com/sgl-project/sglang.git
cd sglang
# Install the python packages
@@ -35,7 +34,6 @@ pip install -e "python[all]"
**Quick fixes to common problems**
- If you want to develop SGLang, it is recommended to use docker. Please refer to [setup docker container](../developer_guide/development_guide_using_docker.md#setup-docker-container). The docker image is `lmsysorg/sglang:dev`.
-- SGLang currently uses torch 2.8 and flashinfer for torch 2.8. If you want to install flashinfer separately, please refer to [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html). Please note that the FlashInfer pypi package is called `flashinfer-python` instead of `flashinfer`.
## Method 3: Using docker
diff --git a/docs/index.rst b/docs/index.rst
index 5eeca789280..f948fca247f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -28,6 +28,7 @@ The core features include:
basic_usage/deepseek.md
basic_usage/gpt_oss.md
basic_usage/llama4.md
+ basic_usage/qwen3.md
.. toctree::
:maxdepth: 1
@@ -38,7 +39,7 @@ The core features include:
advanced_features/speculative_decoding.ipynb
advanced_features/structured_outputs.ipynb
advanced_features/structured_outputs_for_reasoning_models.ipynb
- advanced_features/function_calling.ipynb
+ advanced_features/tool_parser.ipynb
advanced_features/separate_reasoning.ipynb
advanced_features/quantization.md
advanced_features/lora.ipynb
@@ -79,6 +80,7 @@ The core features include:
developer_guide/contribution_guide.md
developer_guide/development_guide_using_docker.md
developer_guide/benchmark_and_profiling.md
+ developer_guide/bench_serving.md
.. toctree::
:maxdepth: 1
diff --git a/docs/platforms/amd_gpu.md b/docs/platforms/amd_gpu.md
index ff8fbd3411d..81d6d544ad5 100644
--- a/docs/platforms/amd_gpu.md
+++ b/docs/platforms/amd_gpu.md
@@ -44,7 +44,7 @@ You can install SGLang using one of the methods below.
```bash
# Use the last release branch
-git clone -b v0.5.0rc2 https://github.com/sgl-project/sglang.git
+git clone -b v0.5.2 https://github.com/sgl-project/sglang.git
cd sglang
# Compile sgl-kernel
diff --git a/docs/platforms/ascend_npu.md b/docs/platforms/ascend_npu.md
index 53fc009fb28..f57d3fe951c 100644
--- a/docs/platforms/ascend_npu.md
+++ b/docs/platforms/ascend_npu.md
@@ -1,4 +1,4 @@
-# SGLang on Ascend NPUs
+# Ascend NPUs
You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.
@@ -99,7 +99,7 @@ We are also providing a DeepEP-compatible Library as a drop-in replacement of de
```shell
# Use the last release branch
-git clone -b v0.5.0rc2 https://github.com/sgl-project/sglang.git
+git clone -b v0.5.2 https://github.com/sgl-project/sglang.git
cd sglang
pip install --upgrade pip
diff --git a/docs/platforms/cpu_server.md b/docs/platforms/cpu_server.md
index 348bf893695..97fad918d7b 100644
--- a/docs/platforms/cpu_server.md
+++ b/docs/platforms/cpu_server.md
@@ -84,13 +84,13 @@ git checkout
# Install SGLang dependent libs, and build SGLang main package
pip install --upgrade pip setuptools
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
-pip install intel-openmp
pip install -e "python[all_cpu]"
+pip install torch==2.7.1 torchvision==0.22.1 triton==3.3.1 --force-reinstall
# Build the CPU backend kernels
cd sgl-kernel
cp pyproject_cpu.toml pyproject.toml
-pip install -v .
+pip install .
# Other required environment variables
# Recommend to set these in ~/.bashrc in order not to set every time in a new terminal
@@ -134,8 +134,17 @@ Notes:
export SGLANG_CPU_OMP_THREADS_BIND="0-39|43-82|86-125|128-167|171-210|214-253"
```
-3. A warmup step is automatically triggered when the service is started.
-The server is ready when you see the log `The server is fired up and ready to roll!`.
+ Please beware that with SGLANG_CPU_OMP_THREADS_BIND set,
+ the available memory amounts of the ranks may not be determined in prior.
+ You may need to set proper `--max-total-tokens` to avoid the out-of-memory error.
+
+3. For optimizing decoding with torch.compile, please add the flag `--enable-torch-compile`.
+ To specify the maximum batch size when using torch compile, set the flag `--torch-compile-max-bs`.
+ For example, `--enable-torch-compile --torch-compile-max-bs 4` means using torch compile and setting the
+ maximum batch size to 4.
+
+4. A warmup step is automatically triggered when the service is started.
+ The server is ready when you see the log `The server is fired up and ready to roll!`.
## Benchmarking with Requests
@@ -159,7 +168,7 @@ python -m sglang.bench_serving -h
```
Additionally, the requests can be formed with
-[OpenAI Completions API](https://docs.sglang.ai/backend/openai_api_completions.html)
+[OpenAI Completions API](https://docs.sglang.ai/basic_usage/openai_api_completions.html)
and sent via the command line (e.g. using `curl`) or via your own script.
## Example: Running DeepSeek-R1
@@ -175,7 +184,6 @@ python -m sglang.launch_server \
--quantization w8a8_int8 \
--host 0.0.0.0 \
--mem-fraction-static 0.8 \
- --max-total-token 65536 \
--tp 6
```
@@ -189,7 +197,6 @@ python -m sglang.launch_server \
--device cpu \
--host 0.0.0.0 \
--mem-fraction-static 0.8 \
- --max-total-token 65536 \
--tp 6
```
diff --git a/docs/platforms/nvidia_jetson.md b/docs/platforms/nvidia_jetson.md
index 7a37e9426cf..362f60c8356 100644
--- a/docs/platforms/nvidia_jetson.md
+++ b/docs/platforms/nvidia_jetson.md
@@ -20,12 +20,16 @@ Run the installation script:
```
bash jetson-containers/install.sh
```
-Build the container:
+Build the container image:
```
-CUDA_VERSION=12.6 jetson-containers build sglang
+jetson-containers build sglang
```
Run the container:
```
+jetson-containers run $(autotag sglang)
+```
+Or you can also manually run a container with this command:
+```
docker run --runtime nvidia -it --rm --network=host IMAGE_NAME
```
* * * * *
@@ -69,7 +73,7 @@ Structured output with XGrammar
Please refer to [SGLang doc structured output](../advanced_features/structured_outputs.ipynb).
* * * * *
-Thanks to the support from [shahizat](https://github.com/shahizat).
+Thanks to the support from [Nurgaliyev Shakhizat](https://github.com/shahizat), [Dustin Franklin](https://github.com/dusty-nv) and [Johnny Núñez Cano](https://github.com/johnnynunez).
References
----------
diff --git a/docs/references/frontend/frontend_tutorial.ipynb b/docs/references/frontend/frontend_tutorial.ipynb
index 68fb916a1fc..836cab6273d 100644
--- a/docs/references/frontend/frontend_tutorial.ipynb
+++ b/docs/references/frontend/frontend_tutorial.ipynb
@@ -39,7 +39,7 @@
"from sglang.utils import print_highlight, terminate_process, wait_for_server\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0\"\n",
+ " \"python -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --host 0.0.0.0 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
@@ -395,7 +395,7 @@
"outputs": [],
"source": [
"server_process, port = launch_server_cmd(\n",
- " \"python -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --host 0.0.0.0\"\n",
+ " \"python -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct --host 0.0.0.0 --log-level warning\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
diff --git a/docs/references/multi_node_deployment/lws_pd/lws-examples/lb.yaml b/docs/references/multi_node_deployment/lws_pd/lws-examples/lb.yaml
index da78615844f..4ca690969ab 100644
--- a/docs/references/multi_node_deployment/lws_pd/lws-examples/lb.yaml
+++ b/docs/references/multi_node_deployment/lws_pd/lws-examples/lb.yaml
@@ -27,7 +27,8 @@ spec:
command:
- python
- -m
- - sglang.srt.disaggregation.mini_lb
+ - sglang_router.launch_router
+ - --pd-disaggregation
- --prefill
- http://deepseekr10528-prefill-main:30000
- --decode
diff --git a/docs/references/multi_node_deployment/lws_pd/lws_pd_deploy.md b/docs/references/multi_node_deployment/lws_pd/lws_pd_deploy.md
index 617017077d6..eb8454997be 100644
--- a/docs/references/multi_node_deployment/lws_pd/lws_pd_deploy.md
+++ b/docs/references/multi_node_deployment/lws_pd/lws_pd_deploy.md
@@ -714,7 +714,8 @@ spec:
command:
- python
- -m
- - sglang.srt.disaggregation.mini_lb
+ - sglang_router.launch_router
+ - --pd-disaggregation
- --prefill
- http://deepseekr10528-prefill-main:30000
- --decode
diff --git a/docs/references/multi_node_deployment/multi_node.md b/docs/references/multi_node_deployment/multi_node.md
index 79b70e31111..204b6058693 100644
--- a/docs/references/multi_node_deployment/multi_node.md
+++ b/docs/references/multi_node_deployment/multi_node.md
@@ -20,7 +20,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instr
## DeepSeek V3/R1
-Please refer to [DeepSeek documents for reference](https://docs.sglang.ai/references/deepseek.html#running-examples-on-multi-node).
+Please refer to [DeepSeek documents for reference](https://docs.sglang.ai/basic_usage/deepseek.html#running-examples-on-multi-node).
## Multi-Node Inference on SLURM
diff --git a/docs/supported_models/generative_models.md b/docs/supported_models/generative_models.md
index 4d2c6eecb47..d6d3cdd4569 100644
--- a/docs/supported_models/generative_models.md
+++ b/docs/supported_models/generative_models.md
@@ -26,7 +26,8 @@ in the GitHub search bar.
| Model Family (Variants) | Example HuggingFace Identifier | Description |
|-------------------------------------|--------------------------------------------------|----------------------------------------------------------------------------------------|
| **DeepSeek** (v1, v2, v3/R1) | `deepseek-ai/DeepSeek-R1` | Series of advanced reasoning-optimized models (including a 671B MoE) trained with reinforcement learning; top performance on complex reasoning, math, and code tasks. [SGLang provides Deepseek v3/R1 model-specific optimizations](../basic_usage/deepseek.md) and [Reasoning Parser](../advanced_features/separate_reasoning.ipynb)|
-| **Qwen** (3, 3MoE, 2.5, 2 series) | `Qwen/Qwen3-0.6B`, `Qwen/Qwen3-30B-A3B` | Alibaba’s latest Qwen3 series for complex reasoning, language understanding, and generation tasks; Support for MoE variants along with previous generation 2.5, 2, etc. [SGLang provides Qwen3 specific reasoning parser](../advanced_features/separate_reasoning.ipynb)|
+| **GPT-OSS** | `openai/gpt-oss-20b`, `openai/gpt-oss-120b` | OpenAI’s latest GPT-OSS series for complex reasoning, agentic tasks, and versatile developer use cases.|
+| **Qwen** (3, 3MoE, 3Next, 2.5, 2 series) | `Qwen/Qwen3-0.6B`, `Qwen/Qwen3-30B-A3B` `Qwen/Qwen3-Next-80B-A3B-Instruct ` | Alibaba’s latest Qwen3 series for complex reasoning, language understanding, and generation tasks; Support for MoE variants along with previous generation 2.5, 2, etc. [SGLang provides Qwen3 specific reasoning parser](../advanced_features/separate_reasoning.ipynb)|
| **Llama** (2, 3.x, 4 series) | `meta-llama/Llama-4-Scout-17B-16E-Instruct` | Meta's open LLM series, spanning 7B to 400B parameters (Llama 2, 3, and new Llama 4) with well-recognized performance. [SGLang provides Llama-4 model-specific optimizations](../basic_usage/llama4.md) |
| **Mistral** (Mixtral, NeMo, Small3) | `mistralai/Mistral-7B-Instruct-v0.2` | Open 7B LLM by Mistral AI with strong performance; extended into MoE (“Mixtral”) and NeMo Megatron variants for larger scale. |
| **Gemma** (v1, v2, v3) | `google/gemma-3-1b-it` | Google’s family of efficient multilingual models (1B–27B); Gemma 3 offers a 128K context window, and its larger (4B+) variants support vision input. |
@@ -51,3 +52,5 @@ in the GitHub search bar.
| **Ling** (16.8B–290B) | `inclusionAI/Ling-lite`, `inclusionAI/Ling-plus` | InclusionAI’s open MoE models. Ling-Lite has 16.8B total / 2.75B active parameters, and Ling-Plus has 290B total / 28.8B active parameters. They are designed for high performance on NLP and complex reasoning tasks. |
| **Granite 3.0, 3.1** (IBM) | `ibm-granite/granite-3.1-8b-instruct` | IBM's open dense foundation models optimized for reasoning, code, and business AI use cases. Integrated with Red Hat and watsonx systems. |
| **Granite 3.0 MoE** (IBM) | `ibm-granite/granite-3.0-3b-a800m-instruct` | IBM’s Mixture-of-Experts models offering strong performance with cost-efficiency. MoE expert routing designed for enterprise deployment at scale. |
+| **Llama Nemotron Super** (v1, v1.5, NVIDIA) | `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, `nvidia/Llama-3_3-Nemotron-Super-49B-v1_5` | The [NVIDIA Nemotron](https://www.nvidia.com/en-us/ai-data-science/foundation-models/nemotron/) family builds on the strongest open models in the ecosystem by enhancing them with greater accuracy, efficiency, and transparency using NVIDIA open synthetic datasets, advanced techniques, and tools. This enables the creation of practical, right-sized, and high-performing AI agents. |
+| **Llama Nemotron Ultra** (v1, NVIDIA) | `nvidia/Llama-3_1-Nemotron-Ultra-253B-v1` | The [NVIDIA Nemotron](https://www.nvidia.com/en-us/ai-data-science/foundation-models/nemotron/) family builds on the strongest open models in the ecosystem by enhancing them with greater accuracy, efficiency, and transparency using NVIDIA open synthetic datasets, advanced techniques, and tools. This enables the creation of practical, right-sized, and high-performing AI agents. |
diff --git a/docs/supported_models/support_new_models.md b/docs/supported_models/support_new_models.md
index 06a8842393c..511a8f3986a 100644
--- a/docs/supported_models/support_new_models.md
+++ b/docs/supported_models/support_new_models.md
@@ -135,6 +135,182 @@ ModelRegistry.models.update(import_new_model_classes())
launch_server(server_args)
```
+## Example: Implementing and Serving a Llama Wrapper Model
+
+Below is an introductory, step-by-step walkthrough on how to implement a new model end-to-end in SGLang and then run it via the [Offline Engine](https://github.com/sgl-project/sglang/blob/main/docs/basic_usage/offline_engine_api.ipynb).
+
+### Implementing Our Model
+
+To keep things simple, this new model will be a simple wrapper around [Llama 3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct), and our goal will be just to bias the output logits for each `forward` call by taking the square root of each individual logit.
+
+Let's start by defining our model in a file called `llama_wrapper.py`.
+The first step is to import the necessary libraries from SRT, which is SGLang's internal backend.
+
+```python
+# In the file `llama_wrapper.py`
+
+import torch
+from transformers import LlamaConfig
+from typing import Optional
+from sglang.srt.layers.logits_processor import LogitsProcessorOutput
+from sglang.srt.layers.quantization.base_config import QuantizationConfig
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
+
+from sglang.srt.models.llama import LlamaForCausalLM
+```
+
+Next, we declare a new `class` for our model and have it inherit from `LlamaForCausalLM`, which allows our model to access `LlamaForCausalLM`'s predefined modules and layers, such as `LlamaAttention` and `LlamaMLP`.
+Note that almost all model implementations take in `config` and `quant_config` as arguments for their `__init__` method; `config` and `quant_config` are passed in via [`model_loader/loader.py`](https://github.com/sgl-project/sglang/blob/bf72b80122fd888bf619d17b96fa3e323ab809fc/python/sglang/srt/model_loader/loader.py#L219).
+Because we have inherited from `LlamaForCausalLM`, we can pass our parameters directly to its constructor, which will set the member variables for us.
+
+```python
+class LlamaWrapper(LlamaForCausalLM):
+ def __init__(
+ self,
+ config: LlamaConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__(config=config, quant_config=quant_config, prefix=prefix)
+```
+
+Now, we want to define the `forward` method, which is what will be called at inference time.
+Note that the signature for `forward` is essentially the same for any model; you can take a look at the other models defined in the [`models` directory](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/models/) for references.
+To see where exactly `forward` is called in the SGLang runtime's internals, take a look at [`forward_decode`](https://github.com/sgl-project/sglang/blob/bf72b80122fd888bf619d17b96fa3e323ab809fc/python/sglang/srt/model_executor/model_runner.py#L1705) and [`forward_extend`](https://github.com/sgl-project/sglang/blob/bf72b80122fd888bf619d17b96fa3e323ab809fc/python/sglang/srt/model_executor/model_runner.py#L1724) in the [`ModelRunner` class](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/model_executor/model_runner.py).
+
+```python
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ input_embeds: Optional[torch.Tensor] = None,
+ get_embedding: bool = False,
+ ) -> LogitsProcessorOutput:
+```
+
+We now call the `__call__` method for `self.model` (which is a member variable that `LlamaForCausalLM` defines in its `__init__` method), which eventually calls `LlamaForCausalLM`'s `forward` method.
+After that, we feed the `hidden_states` into our model's `LogitsProcessor` (again defined in `LlamaForCausalLM`).
+
+```python
+ hidden_states = self.model(
+ input_ids,
+ positions,
+ forward_batch,
+ input_embeds,
+ pp_proxy_tensors=pp_proxy_tensors,
+ )
+
+ res: LogitsProcessorOutput = self.logits_processor(
+ input_ids,
+ hidden_states,
+ self.lm_head,
+ forward_batch,
+ )
+```
+
+After receiving the logits for the next token, we can finally perform our biasing step.
+
+```python
+ orig_logits = res.next_token_logits
+ res.next_token_logits = torch.where(
+ orig_logits > 0,
+ orig_logits.sqrt(),
+ orig_logits
+ )
+
+ return res
+```
+Now, our `LlamaWrapper` model is created and ready to be served!
+
+### Serving Our Model Via SGLang's Offline Engine
+
+The next step of this walkthrough involves hosting our new model offline, so that it can be served locally and without an HTTP server.
+
+First, create a new file called `run.py`.
+Now, we must ensure that SGLang's `ModelRegistry` can find our model.
+To do this, we first download the model's configuration and weights from Huggingface.
+
+```python
+# In the file `run.py`
+
+import asyncio
+from functools import lru_cache
+from huggingface_hub import snapshot_download
+from llama_wrapper import LlamaWrapper # Make sure to import our new model!
+import sglang as sgl
+from sglang.srt.models.registry import ModelRegistry
+
+# Make sure to request access to this model on Huggingface, then export your
+# `HF_TOKEN` to download the model snapshot
+llama_dir = snapshot_download(
+ repo_id="meta-llama/Llama-3.1-8B-Instruct",
+ local_dir="./llama_ckpt",
+)
+```
+
+Now that we have our model on disk, we want to point it to `LlamaWrapper` by changing the `architectures` field in `./llama_ckpt/config.json` to be `LlamaWrapper`.
+That way, when we pass in the path of our model checkpoint to SGLang, it will know that we want to use "LlamaWrapper" instead of "LlamaForCausalLM" as our model.
+
+```python
+{
+ "architectures": [
+ # "LlamaForCausalLM"
+ "LlamaWrapper"
+ ],
+ ...
+}
+```
+
+However, if we don't link our `LlamaWrapper` class to the "LlamaWrapper" registry keyword, then SGLang won't be able to find our model.
+Thus, to register our `LlamaWrapper`, we want to follow the steps in the above section titled "Registering an External Model Implementation".
+
+```python
+@lru_cache()
+def import_new_model_classes():
+ model_arch_name_to_cls = {"LlamaWrapper": LlamaWrapper}
+ return model_arch_name_to_cls
+
+ModelRegistry.models.update(import_new_model_classes())
+```
+
+Lastly, when we create our `Engine`, we just pass in the path to the local model directory.
+Then, our `LlamaWrapper` is ready to be served; for this walkthrough, we will use SGLang `Engine`'s non-streaming asynchronous generation endpoint.
+
+```python
+def main():
+ llm = sgl.Engine(model_path="./llama_ckpt")
+ sampling_params = {"temperature": 0.2, "top_k": 5}
+ prompts = [
+ "Write a short, neutral self-introduction for a fictional character. Hello, my name is",
+ "Provide a concise factual statement about France’s capital city. The capital of France is",
+ "Explain possible future trends in artificial intelligence. The future of AI is",
+ ]
+
+ asyncio.run(run_llm(llm, sampling_params, prompts))
+
+ llm.shutdown()
+
+async def run_llm(
+ llm,
+ sampling_params,
+ prompts,
+) -> None:
+ outputs = await llm.async_generate(prompts, sampling_params)
+
+ for prompt, output in zip(prompts, outputs):
+ print(f"\nPrompt: {prompt}")
+ print(f"Generated text: {output['text']}")
+
+if __name__ == "__main__":
+ main()
+```
+
+Now, when we call `python run.py`, we will get the outputs of our newly created model!
+
+
## Documentation
Add to table of supported models in [generative_models.md](https://github.com/sgl-project/sglang/blob/main/docs/supported_models/generative_models.md) or [multimodal_language_models.md](https://github.com/sgl-project/sglang/blob/main/docs/supported_models/multimodal_language_models.md)
diff --git a/examples/chat_template/tool_chat_template_deepseekv3.jinja b/examples/chat_template/tool_chat_template_deepseekv3.jinja
index dde922d30bd..46c1b8801e6 100644
--- a/examples/chat_template/tool_chat_template_deepseekv3.jinja
+++ b/examples/chat_template/tool_chat_template_deepseekv3.jinja
@@ -12,7 +12,7 @@
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
-{%- endfor %}
+{%- endfor -%}
{# --- Append tool descriptions if tools are defined --- #}
{% if tools is defined and tools is not none %}
@@ -23,13 +23,13 @@
'Make sure the JSON is valid.'
'## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
{% for tool in tools %}
- {% set tool_ns.text = tool_ns.text + '- `' + tool['name'] + '`:\n```json\n' + (tool | tojson) + '\n```\n' %}
+ {% set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
{% endfor %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %}
-{{ bos_token }}
-{{ ns.system_prompt }}
+{{- bos_token }}
+{{- ns.system_prompt }}
{%- for message in messages %}
{%- if message['role'] == 'user' %}
@@ -41,7 +41,7 @@
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
- {{'<|tool▁outputs▁end|>'}}
+ {{- '<|tool▁outputs▁end|>'}}
{%- endif %}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
@@ -49,43 +49,43 @@
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if message['content'] is none %}
- {{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
+ {{- '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- else %}
- {{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
+ {{- message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
- {{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
+ {{- '\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %}
{%- endfor %}
- {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
+ {{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
- {{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
+ {{- '<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{% set content = message['content'] %}
- {{content + '<|end▁of▁sentence|>'}}
+ {{- content + '<|end▁of▁sentence|>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{%- if ns.is_output_first %}
- {{ 'Use the results below to formulate an answer to the user question unless additional information is needed.' }}
- {{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {{- 'Use the results below to formulate an answer to the user question unless additional information is needed.' }}
+ {{- '<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
{%- set ns.is_output_first = false %}
{%- else %}
- {{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {{- '\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
{%- endif %}
{%- endif %}
{%- endfor -%}
{% if ns.is_tool %}
- {{"<|tool▁outputs▁end|>"}}
+ {{- '<|tool▁outputs▁end|>'}}
{% endif %}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
- {{'<|Assistant|>'}}
+ {{- '<|Assistant|>'}}
{% endif %}
diff --git a/examples/chat_template/tool_chat_template_deepseekv31.jinja b/examples/chat_template/tool_chat_template_deepseekv31.jinja
new file mode 100644
index 00000000000..08e93a30af4
--- /dev/null
+++ b/examples/chat_template/tool_chat_template_deepseekv31.jinja
@@ -0,0 +1,91 @@
+{% if not add_generation_prompt is defined %}
+ {% set add_generation_prompt = false %}
+{% endif %}
+{% if not thinking is defined %}
+ {% set thinking = false %}
+{% endif %}
+{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}
+{%- for message in messages %}
+ {%- if message['role'] == 'system' %}
+ {%- if ns.is_first_sp %}
+ {% set ns.system_prompt = ns.system_prompt + message['content'] %}
+ {% set ns.is_first_sp = false %}
+ {%- else %}
+ {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+
+{% if tools is defined and tools is not none %}
+ {% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
+ {% for tool in tools %}
+ {% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
+ {% endfor %}
+ {% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{{additional_tool_calls}}<|tool▁calls▁end|>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
+ {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
+{% endif %}
+
+{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages %}
+ {%- if message['role'] == 'user' %}
+ {%- set ns.is_tool = false -%}
+ {%- set ns.is_first = false -%}
+ {%- set ns.is_last_user = true -%}
+ {{'<|User|>' + message['content']}}
+ {%- endif %}
+ {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
+ {%- if ns.is_last_user %}
+ {{'<|Assistant|>'}}
+ {%- endif %}
+ {%- set ns.is_last_user = false -%}
+ {%- set ns.is_first = false %}
+ {%- set ns.is_tool = false -%}
+ {%- for tool in message['tool_calls'] %}
+ {%- if not ns.is_first %}
+ {%- if message['content'] is none %}
+ {{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
+ {%- else %}
+ {{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
+ {%- endif %}
+ {%- set ns.is_first = true -%}
+ {%- else %}
+ {{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
+ {%- endif %}
+ {%- endfor %}
+ {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
+ {%- endif %}
+ {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
+ {%- if ns.is_last_user %}
+ {{'<|Assistant|>'}}
+ {%- if message['prefix'] is defined and message['prefix'] and thinking %}
+ {{''}}
+ {%- else %}
+ {{''}}
+ {%- endif %}
+ {%- endif %}
+ {%- set ns.is_last_user = false -%}
+ {%- if ns.is_tool %}
+ {{message['content'] + '<|end▁of▁sentence|>'}}
+ {%- set ns.is_tool = false -%}
+ {%- else %}
+ {%- set content = message['content'] -%}
+ {%- if '' in content %}
+ {%- set content = content.split('', 1)[1] -%}
+ {%- endif %}
+ {{content + '<|end▁of▁sentence|>'}}
+ {%- endif %}
+ {%- endif %}
+ {%- if message['role'] == 'tool' %}
+ {%- set ns.is_last_user = false -%}
+ {%- set ns.is_tool = true -%}
+ {{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- endif %}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
+ {{'<|Assistant|>'}}
+ {%- if not thinking %}
+ {{''}}
+ {%- else %}
+ {{''}}
+ {%- endif %}
+{% endif %}
diff --git a/examples/profiler/nsys_profile_tools/README.md b/examples/profiler/nsys_profile_tools/README.md
new file mode 100644
index 00000000000..687200e0535
--- /dev/null
+++ b/examples/profiler/nsys_profile_tools/README.md
@@ -0,0 +1,176 @@
+# gputrc2graph.py
+
+This script processes NVIDIA Nsight Systems (`nsys`) GPU trace files
+(`.nsys-rep`) with -t cuda tracing enabled, and generates kernel-level
+summaries and visualizations of GPU and non-GPU time. It is useful for
+profiling and analyzing nsys profile output.
+
+## Usage
+
+### Command-line Arguments
+
+- `--in_file`
+ **(required)**
+ List of input files and their metadata. Each entry should be in the format:
+ `,,,`
+ - `nsys-rep`: Path to the `.nsys-rep` file.
+ - `engine`: Engine name (e.g., `sglang`).
+ - `model`: Model name (e.g., `llama`, `gpt-oss`, `ds`).
+ - `elapsed_nonprofiled_sec`: Wall-clock runtime (in seconds) without
+ profiling. Specify `0` to use the elapsed time from the nsys-rep file
+ (this may inflate non-GPU time if actual runtime without profiling is
+ less). Multiple entries can be provided, separated by spaces.
+
+- `--out_dir`
+ Output directory for the generated CSV and HTML files.
+ If not specified, results are saved in the current directory.
+
+- `--title`
+ Title for the HTML chart/visualization.
+
+- `--nsys_cmd`
+ Path to the `nsys` command.
+ Default: `nsys` (assumes it is in your PATH).
+ Use this if `nsys` is not in your system PATH.
+
+## Notes
+
+- Make sure you have pandas installed. Any version is fine.
+- Make sure [nsys](https://developer.nvidia.com/nsight-systems/get-started) is
+installed, and specify the path to the `nsys` command with `--nsys_cmd` if it
+ is not in your PATH. The nsys version must be >= the nsys profile version that
+ was used to collect the traces when profiling the server, so that nsys can
+ process the nsys-rep that was generated.
+
+- For more details on available engines and models, see the help string in
+ the script or run:
+
+```bash
+python3 gputrc2graph.py --help
+```
+
+## Example 1: analyze a single profile
+
+To analyze the GPU cycles of for example, a llama-3.1-8B model with sglang:
+
+1. Run the following command to collect nsys profile, for sglang server config.
+
+ ```bash
+ nsys profile -t cuda -o nsys_res -f true --trace-fork-before-exec=true \
+ --cuda-graph-trace=node --delay --duration \
+ python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B ...
+ ```
+
+ where:
+
+ - DELAY: how many seconds to delay nsys from collecting profiles, needed so
+ that profiles aren't captured till sglang server has come up and load
+ generation starts.
+ - DURATION: how many seconds for nsys profile to run before generating the
+ profile. This should be > the duration of the run.
+2. After the server starts, run the client load generation command. Once the
+test completes, after DURATION amount of time, nsys profile will generate an
+nsys_res.nsys-rep file and shut down the server.
+
+3. Run step #1 again, this time starting up the server without collecting the
+profile.
+
+4. Run step #2 again, and record the total time to complete the test in
+seconds. This value will be used by the script to calculate the
+ CPU(non-GPU) seconds for the analysis.
+
+5. Say the run elapsed time from step #4 is 132 seconds. Run script to
+ analyze:
+
+ ```bash
+ python3 gputrc2graph.py \
+ --in_file run1.nsys-rep,sglang,llama,132
+ ```
+
+The command will produce 2 files for analysis:
+
+- result.html: this categorizes kernel names into different categories in a
+ stacked bar chart.
+- result.csv: shows how the kernel names are mapped to the different
+ categories.
+
+### HTML visualization with result.html
+
+The html file shows the number of elapsed seconds due to different GPU
+Substages or categories, which consist of attention kernels as the biggest
+category, at 63 seconds, followed by "gemm" kernels. This lets the user
+prioritize the kernels to focus on for performance optimizations.
+
+There's also an appended data table underneath the bar chart for copying out to
+ other post-processing tools.
+
+### Kernel to category mapping with result.csv
+
+Suppose the user would like to focus on improving triton kernels. It's not the
+biggest consumer of cycles at .01 sec but perhaps it hasn't been optimized.
+The next step is to use the result.csv to dive into what the kernels are which
+compose the triton kernel GPU cycles.
+
+## Example 2: analyze multiple profiles
+
+Suppose the user has multiple nsys trace files, captured for different models,
+say llama and gpt-oss in this case, and wish to compare their GPU/non-GPU
+time, something like the following command can be used.
+
+```bash
+python3 gputrc2graph.py \
+--in_file run1.nsys-rep,sglang,llama,100 run2.nsys-rep,sglang,gpt-oss,102 \
+--out_dir results
+```
+
+The analysis process is similar to example 1 but now there will be multiple
+stack bar charts that can be compared. The categories for the different
+kernels will remain the same, so that it's easy to compare the GPU cycles for
+the same categories.
+
+Once a category is shown to have more cycles for one configuration than
+another, the next step would be to use the csv file to see what kernels are
+mapped into that category, and which kernels are taking the largest amount of
+time which would cause a difference for the overall category.
+
+## Example 3: add new classification for a new model
+
+To create a new engine DEF with model ABC, just add another json file in the same directory as
+gputrc2graph.py with the same format as the other json files. The script will automatically pick up all the json files in the same directory as engine/model specifications.
+
+Then, for this new model, suppose there are 4 kernels to be classified into
+"gemm" and "attn", where the gemm kernels have names with "*H*" or "*I*" in
+them, and attn kernels have names with "*J*" or "*K*" in them, just add another
+ .json file in the same directory as gputrc2graph.py with the same format as
+ the other json files, like the following:
+
+```json
+{
+ "DEF": {
+ "ABC": {
+ "H|I": "gemm",
+ "J|K": "attn",
+ "CUDA mem": "non-gpu-H_D_memops",
+ ".*": "misc"
+ }
+ }
+}
+```
+
+Each entry in the dictionary consists of:
+
+- key: a regex used to classify the kernels
+- value: the category to classify the kernels into.
+
+The last 2 entries are common for all engine/models, consisting of CUDA memory
+operations and a 'misc' for anything that's leftover and can't be classified.
+
+When invoking gputrc2graph.py, specify a trace file with this new model/engine
+like the following:
+
+```bash
+--in_file new.nsys-rep,DEF,ABC,
+```
+
+If the engine_DEF.json file already exists, just add the model as a new node in
+ the existing engine file, after the other models.
diff --git a/examples/profiler/nsys_profile_tools/gputrc2graph.py b/examples/profiler/nsys_profile_tools/gputrc2graph.py
new file mode 100755
index 00000000000..f17bd18573e
--- /dev/null
+++ b/examples/profiler/nsys_profile_tools/gputrc2graph.py
@@ -0,0 +1,344 @@
+"""
+ This generates gpu kernel analysis output from nsys rep. Will call nsys
+ stats -r cuda_gpu_kern_trace, get non-overlapped gpu cycles, then generate
+ csv and html output for analysis
+"""
+
+import argparse
+import logging
+import os
+
+import regex as re
+
+logger = logging.getLogger(__name__)
+
+
+# helper data class for annotating kernels
+def load_engine_model():
+ """returns engine_model built from all json files in the current dir"""
+ import glob
+ import json
+
+ engine_model = {}
+
+ json_files = glob.glob(os.path.join(os.path.dirname(__file__) or ".", "*.json"))
+ for fname in json_files:
+ with open(fname, encoding="utf-8") as f:
+ engine_model.update(json.load(f))
+ return engine_model
+
+
+class GPUTrace2Graph:
+ """
+ Parses output of nsys report, generates csv and bar chart output
+ """
+
+ def __init__(self):
+ import pandas as pd # avoid importing till needed
+
+ self.pd = pd
+ self.pd.options.mode.copy_on_write = True
+
+ # helper functions for generating trace->summary csvs
+ def gen_nonoverlapped_sum_from_gputrace(self, in_file, out_file):
+ logger.info("loading %s", in_file)
+ df = self.pd.read_csv(
+ in_file, usecols=["Start (ns)", "Duration (ns)", "Device", "Strm", "Name"]
+ )
+ df["End (ns)"] = df["Start (ns)"] + df["Duration (ns)"]
+ df = self.sum_non_overlapping_intervals(df)
+ # get ready to print table with elapsed times per kernel
+ df["Instances"] = 1
+ df_sum = df.groupby("Name", as_index=False).agg(
+ {"Elapsed Time (ns)": "sum", "Duration (ns)": "sum", "Instances": "size"}
+ )
+
+ # generate csv
+ df_sum["Total Time (sec)"] = df_sum["Duration (ns)"] / 1e9
+ df_sum["Elapsed Time (sec)"] = df_sum["Elapsed Time (ns)"] / 1e9
+ df_sum = df_sum.sort_values(by="Elapsed Time (sec)", ascending=False)
+ df_sum[["Elapsed Time (sec)", "Total Time (sec)", "Instances", "Name"]].to_csv(
+ out_file, index=False
+ )
+
+ def sum_non_overlapping_intervals(self, df):
+ """
+ returns new sorted df with Elapsed Time (ns) column using
+ vectorized operations
+ """
+ logger.info("sorting %s trace records by start time", str(df.shape))
+
+ # Sort by start time and reset index
+ df = df.sort_values(by="Start (ns)").reset_index(drop=True)
+
+ # Initialize elapsed time as duration
+ df["Elapsed Time (ns)"] = df["Duration (ns)"]
+
+ # Get numpy arrays for faster operations
+ starts = df["Start (ns)"].values
+ ends = df["End (ns)"].values
+
+ # Keep track of current interval end
+ current_end = ends[0]
+ display_units = max(1, int(len(df) / 100))
+ # Update current_end for overlapping intervals
+ for i in range(1, len(df)):
+ if i % display_units == 0:
+ print(f"processing trace: {int(i/len(df) * 100)} %", end="\r")
+ if starts[i] <= current_end:
+ if ends[i] > current_end:
+ # Partial overlap
+ df.iloc[i, df.columns.get_loc("Elapsed Time (ns)")] = (
+ ends[i] - current_end
+ )
+ current_end = ends[i]
+ else:
+ # Complete overlap
+ df.iloc[i, df.columns.get_loc("Elapsed Time (ns)")] = 0
+ else:
+ # No overlap
+ current_end = ends[i]
+
+ return df
+
+ # functions for generating html files
+ def make_html(self, df, output_dir, title):
+ """make html graph from df"""
+ import plotly.express as px
+
+ if df.empty:
+ return
+ output_name = os.path.join(output_dir, "result")
+ if not title:
+ title = "Model_Engine"
+ x = "Model_Engine"
+ y = "Elapsed Time (sec)"
+ color = "Category"
+ """ generate kernel mapping table """
+ # Sort Model_Engine categories by last field after underscore
+ df["Model_Engine"] = self.pd.Categorical(
+ df["Model_Engine"],
+ sorted(df["Model_Engine"].unique(), key=lambda x: x.split("_")[-1]),
+ )
+ df[["Model_Engine", color, "Instances", "Name", y]].sort_values(
+ by=color
+ ).to_csv(f"{output_name}.csv", index=False)
+ graph = px.histogram(
+ df.round(2),
+ x=x,
+ y=y,
+ title=(f"{y} for {title}"),
+ color=color,
+ text_auto=True,
+ )
+ # wrap x axis labels
+ graph.update_xaxes(automargin=True)
+ graph.write_html(f"{output_name}.html")
+ """
+ Generate data table with columns per Model_Engine into result.html
+ """
+ pivot_df = df.pivot_table(
+ values="Elapsed Time (sec)",
+ index="Category",
+ columns="Model_Engine",
+ aggfunc="sum",
+ observed=False,
+ ).round(2)
+ # Add sum row at bottom
+ pivot_df.loc["total_elapsed_sec"] = pivot_df.sum()
+ pivot_df.fillna("").to_html("temp.html")
+ with (
+ open(f"{output_name}.html", "a", encoding="utf-8") as outfile,
+ open("temp.html", encoding="utf-8") as infile,
+ ):
+ outfile.write(infile.read())
+ os.remove("temp.html")
+
+ print(
+ f"Finished generating: \n"
+ f" {output_name}.html for stack bar chart \n"
+ f" {output_name}.csv for Kernel-Category mapping"
+ )
+
+ def anno_gpu_kernname(self, df, mapping):
+ """add "Category" column"""
+
+ def anno_gpu_kernname_helper(name):
+ for kern_name, val in mapping.items():
+ if re.search(kern_name, name):
+ return val
+
+ df["Category"] = df["Name"].apply(anno_gpu_kernname_helper)
+
+ def make_nongpu_row(self, df, nongpu_sec):
+ """this will append non-gpu time entry at end of df"""
+ nongpu_row = self.pd.DataFrame([df.iloc[-1]])
+ nongpu_row["Category"] = nongpu_row["Name"] = "CPU(non-GPU)"
+ nongpu_row["Instances"] = 1
+ nongpu_row["Elapsed Time (sec)"] = nongpu_sec
+ return nongpu_row
+
+ def is_valid_file(self, base_file):
+ """asserts if base_file is non-existent or is empty"""
+ assert (
+ os.path.isfile(base_file) and os.path.getsize(base_file) > 0
+ ), f"{base_file} doesn't exist or is empty"
+
+ def should_gen_file(self, new_file, base_file):
+ """figure out if new file should be generated from base_file"""
+ self.is_valid_file(base_file)
+ if (
+ os.path.exists(new_file)
+ and (os.path.getmtime(new_file) > os.path.getmtime(base_file))
+ and (os.path.getsize(base_file) > 0)
+ ):
+ logger.info("reusing %s", new_file)
+ return False
+ else:
+ logger.info("generating %s", new_file)
+ return True
+
+ def gen_sum_file(self, file, nsys_cmd):
+ """
+ generates sum file from nsys trace with times per kernel and
+ returns the name of the sum file
+ """
+ import subprocess
+
+ file_dir = os.path.dirname(file)
+ file_name = os.path.basename(file)
+
+ if not file_dir:
+ file_dir = "."
+ # Walk through trace and get the total non-overlapped time
+ nsys_stats_file = os.path.join(file_dir, f"{file_name}_cuda_gpu_trace.csv")
+ sum_file = os.path.join(file_dir, f"{file_name}_cuda_gpu_kernel_tracesum.csv")
+ if self.should_gen_file(nsys_stats_file, file):
+ cmd = [
+ nsys_cmd,
+ "stats",
+ "-r",
+ "cuda_gpu_trace",
+ file,
+ "-o",
+ f"{file_dir}/{file_name}",
+ ]
+ cmd_str = " ".join(cmd)
+ logger.info("+ %s", cmd_str)
+ # estimate time based on calibrated 240M/min
+ file_size_mb = os.path.getsize(file) / 1e6
+ logger.info(
+ "nsys stats for %.2f MB file expected to take %.2f min",
+ file_size_mb,
+ file_size_mb / 240,
+ )
+ try:
+ subprocess.run(cmd, check=True)
+ except (FileNotFoundError, subprocess.CalledProcessError) as e:
+ logger.error(
+ "'%s' failed: %s. Use --nsys_cmd to specify nsys path", cmd_str, e
+ )
+ exit(1)
+ logger.info("generating non-overalapped sum %s", sum_file)
+ self.gen_nonoverlapped_sum_from_gputrace(nsys_stats_file, sum_file)
+ self.is_valid_file(sum_file)
+ logger.info("Finished generating %s", sum_file)
+ return sum_file
+
+ def gen_graph(self, in_file, out_dir, title, nsys_cmd, engine_model):
+ """generates graph and csv file from in_file into out_dir"""
+ # Initialize an empty DataFrame to store combined data
+ combined_df = self.pd.DataFrame()
+ for idx, (file, engine, model, total_sec) in enumerate(in_file):
+ file_dir = os.path.dirname(file)
+ file_name = os.path.basename(file)
+ if not file_dir:
+ file_dir = "."
+ sum_file = self.gen_sum_file(file, nsys_cmd)
+ # read kernel summary file
+ df = self.pd.read_csv(sum_file)
+ # annotate kernel to their categories
+ assert engine_model.get(engine), f"engine {engine} unknown"
+ assert engine_model[engine].get(model), f"model {model} unknown"
+ # remove nsys-rep from file_name for shorter x-label
+ file_name = file_name.replace(".nsys-rep", "")
+ df["Model_Engine"] = f"{model}_{engine}_{file_name}_{idx}"
+ self.anno_gpu_kernname(df, engine_model[engine][model])
+ # patch in non-gpu time
+ gpu_sec = round(df["Elapsed Time (sec)"].sum(), 1)
+ total_sec = round(float(total_sec), 1)
+ if total_sec < gpu_sec:
+ logger.warning(
+ "Elapsed sec %.2f < GPU sec %.2f resetting Elapsed sec ",
+ total_sec,
+ gpu_sec,
+ )
+ total_sec = gpu_sec
+ nongpu_row = self.make_nongpu_row(df, total_sec - gpu_sec)
+ df = self.pd.concat([df, nongpu_row], ignore_index=True)
+ combined_df = self.pd.concat([combined_df, df], ignore_index=True)
+ if out_dir is None:
+ out_dir = "."
+ else:
+ os.makedirs(out_dir, exist_ok=True)
+ # generate html file
+ self.make_html(combined_df, out_dir, title)
+
+
+def parse_tuple(s):
+ return tuple(s.split(","))
+
+
+def main():
+ logging.basicConfig(
+ format=("%(asctime)s - %(levelname)s - %(message)s"), level=logging.INFO
+ )
+ parser = argparse.ArgumentParser(
+ description=(
+ "Process nsys rep and generate kernel non-overlapped cycles. \n"
+ "Example:\n"
+ "gputrc2graph.py --in_file d1.nsys-rep,sglang,llama,100 \n"
+ "d2.nsys-rep,sglang,gpt-oss,102 "
+ '--out_dir results/ --title "Model=gpt-oss SGLANG chart"'
+ ),
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+
+ # load supported engine_model
+ engine_model_supported = load_engine_model()
+ # Get a string representation of supported engine/model combinations
+ engine_model_supported_str = ", ".join(
+ f"{engine}:[{', '.join(models.keys())}]"
+ for engine, models in engine_model_supported.items()
+ )
+ parser.add_argument(
+ "--in_file",
+ type=parse_tuple,
+ nargs="+",
+ help=(
+ "list of (nsys-rep, engine, model, elapsed_nonprofiled_sec) "
+ "separated by space. Elapsed_nonprofiled_sec is runtime without "
+ "profiling used to calculate non-gpu time. Specify 0 to use "
+ "elapsed time from nsys-rep but that might inflate non-gpu time. "
+ f"Available engine:[model] are: {engine_model_supported_str} "
+ f"Example: --infile d1.nsys-rep,sglan,llama,100 "
+ "d2.nsys-rep,sglang,gpt-oss,102"
+ ),
+ required=True,
+ )
+ parser.add_argument("--out_dir", help=("output dir for result.csv/html"))
+ parser.add_argument("--title", help=("title for html chart"))
+ parser.add_argument(
+ "--nsys_cmd",
+ help=("nsys cmd, e.g. /usr/bin/nsys, Default: nsys"),
+ default="nsys",
+ )
+ args = parser.parse_args()
+ gputrace = GPUTrace2Graph()
+ gputrace.gen_graph(
+ args.in_file, args.out_dir, args.title, args.nsys_cmd, engine_model_supported
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/profiler/nsys_profile_tools/sglang_engine_model.json b/examples/profiler/nsys_profile_tools/sglang_engine_model.json
new file mode 100644
index 00000000000..253cc762b76
--- /dev/null
+++ b/examples/profiler/nsys_profile_tools/sglang_engine_model.json
@@ -0,0 +1,61 @@
+{
+ "sglang": {
+ "llama": {
+ "gemm|nvjet": "gemm",
+ "fused_moe_kernel|GroupProblemShape|group_gemm_starts|bmm_|GemmUniversal": "moe_gemm",
+ "moe|sigmoid": "moe",
+ "CatArrayBatched|prepare_inputs": "prepare_next",
+ "ncclDevKernel|cross_device_reduce": "nccl_and_custom_ar",
+ "_norm_|Norm": "norm",
+ "topk": "topk",
+ "act_and_mul_": "activation",
+ "Rotary": "rope",
+ "SoftMax": "softmax",
+ "flash|fmha": "attn",
+ "elementwise": "elementwise",
+ "fp8_quant|cvt_|quantize": "quantize",
+ "reduce_kernel": "reduce",
+ "triton": "triton_kernel",
+ "CUDA mem": "non-gpu-H_D_memops",
+ ".*": "misc"
+ },
+ "ds": {
+ "block_fp8_matmul": "block_fp8_gemm",
+ "gemm|matmul|nvjet": "gemm",
+ "fused_moe_kernel": "moe_gemm",
+ "moe|expert|sigmoid": "moe",
+ "CatArrayBatched|write_req_to": "prepare_next",
+ "ncclDevKernel|cross_device_reduce|all_gather": "nccl_and_custom_ar",
+ "Norm": "norm",
+ "topk": "topk",
+ "activation|act_and_mul": "activation",
+ "compute_position_kernel": "rope",
+ "elementwise": "elementwise",
+ "fp8_quant|quant_fp8|quantize": "quantize",
+ "SoftMax": "softmax",
+ "reduce": "reduce",
+ "_fwd_|create_flash|::mla::|KVCache": "attn",
+ "CUDA mem": "non-gpu-H_D_memops",
+ ".*": "misc"
+ },
+ "gpt-oss": {
+ "gemm|nvjet": "gemm",
+ "fused_moe_kernel|_group_gemm|GroupProblemShape|GemmUniversal|bmm_|matmul_ogs_|_topk_forward|_combined_routing|_sum_bitmatrix_rows|_compute_writeback_idx": "moe_gemm",
+ "moe|sigmoid": "moe",
+ "CatArrayBatched|prepare_inputs": "prepare_next",
+ "_norm_|Norm": "norm",
+ "ncclDevKernel|cross_device_reduce|allreduce": "nccl_and_custom_ar",
+ "topk|TopK": "topk",
+ "act_and_mul_": "activation",
+ "Rotary": "rope",
+ "SoftMax": "softmax",
+ "flash|fmha": "attn",
+ "elementwise": "elementwise",
+ "fp8_quant|cvt_|quantize": "quantize",
+ "reduce_kernel": "reduce",
+ "triton": "triton_kernel",
+ "CUDA mem": "non-gpu-H_D_memops",
+ ".*": "misc"
+ }
+ }
+}
diff --git a/examples/runtime/engine/offline_batch_inference_vlm.py b/examples/runtime/engine/offline_batch_inference_vlm.py
index 459a048cc55..3928239467b 100644
--- a/examples/runtime/engine/offline_batch_inference_vlm.py
+++ b/examples/runtime/engine/offline_batch_inference_vlm.py
@@ -7,7 +7,7 @@
import dataclasses
import sglang as sgl
-from sglang.srt.conversation import chat_templates
+from sglang.srt.parser.conversation import chat_templates
from sglang.srt.server_args import ServerArgs
diff --git a/examples/runtime/engine/save_remote_state.py b/examples/runtime/engine/save_remote_state.py
index 47812695f0d..a428195cadc 100644
--- a/examples/runtime/engine/save_remote_state.py
+++ b/examples/runtime/engine/save_remote_state.py
@@ -14,8 +14,7 @@
Then, the model can be loaded with
llm = Engine(
- model_path="/path/to/save",
- --remote-model-url [protocol]://[host]:[port]/[model_name],
+ model_path="[protocol]://[host]:[port]/[model_name]",
tensor_parallel_size=8,
)
"""
@@ -34,6 +33,12 @@
type=str,
help="remote address to store model weights",
)
+parser.add_argument(
+ "--remote-draft-model-save-url",
+ default=None,
+ type=str,
+ help="remote address to store draft model weights",
+)
def main(args):
@@ -43,7 +48,10 @@ def main(args):
raise ValueError("model path must be a local directory")
# Create LLM instance from arguments
llm = Engine(**dataclasses.asdict(engine_args))
- llm.save_remote_model(url=args.remote_model_save_url)
+ llm.save_remote_model(
+ url=args.remote_model_save_url, draft_url=args.remote_draft_model_save_url
+ )
+ print("save remote (draft) model successfully")
if __name__ == "__main__":
diff --git a/python/pyproject.toml b/python/pyproject.toml
old mode 100644
new mode 100755
index 14273daf99c..ceeadc4cb58
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
[project]
name = "sglang"
-version = "0.5.0rc2"
-description = "SGLang is yet another fast serving framework for large language models and vision language models."
+version = "0.5.2"
+description = "SGLang is a fast serving framework for large language models and vision language models."
readme = "README.md"
requires-python = ">=3.10"
license = { file = "LICENSE" }
@@ -31,7 +31,7 @@ runtime_common = [
"msgspec",
"ninja",
"openai==1.99.1",
- "openai-harmony==0.0.3",
+ "openai-harmony==0.0.4",
"orjson",
"outlines==0.1.11",
"packaging",
@@ -39,9 +39,9 @@ runtime_common = [
"pillow",
"prometheus-client>=0.20.0",
"psutil",
+ "pybase64",
"pydantic",
"pynvml",
- "pybase64",
"python-multipart",
"pyzmq>=25.1.2",
"sentencepiece",
@@ -50,30 +50,31 @@ runtime_common = [
"timm==1.0.16",
"tiktoken",
"torchao==0.9.0",
- "transformers==4.55.2",
+ "transformers==4.56.1",
"uvicorn",
"uvloop",
- "xgrammar==0.1.22",
+ "xgrammar==0.1.24",
]
srt = [
"sglang[runtime_common]",
- "sgl-kernel==0.3.5",
+ "sgl-kernel==0.3.9.post2",
"torch==2.8.0",
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
- "flashinfer_python==0.2.11.post3",
+ "flashinfer_python==0.3.1",
]
blackwell = [
"sglang[runtime_common]",
- "sgl-kernel",
+ "sgl-kernel==0.3.9.post2",
"torch==2.8.0",
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
- "flashinfer_python==0.2.11.post3",
+ "flashinfer_python==0.3.1",
+ "nvidia-cutlass-dsl==4.1.0",
]
# HIP (Heterogeneous-computing Interface for Portability) for AMD
@@ -82,11 +83,14 @@ srt_hip = [
"sglang[runtime_common]",
"torch",
"petit_kernel==0.0.2",
- "wave-lang==1.0.1",
+ "wave-lang==3.7.0",
]
-# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
-srt_cpu = ["sglang[runtime_common]", "einops"]
+# https://docs.sglang.ai/platforms/cpu_server.html
+srt_cpu = ["sglang[runtime_common]", "intel-openmp"]
+
+# https://docs.sglang.ai/platforms/ascend_npu.html
+srt_npu = ["sglang[runtime_common]"]
# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
@@ -96,14 +100,12 @@ srt_xpu = ["sglang[runtime_common]"]
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"]
-# https://vllm-ascend.readthedocs.io/en/latest/installation.html
-srt_npu = ["sglang[runtime_common]"]
-
openai = ["openai==1.99.1", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]
torch_memory_saver = ["torch_memory_saver==0.0.8"]
decord = ["decord"]
+hip-attn = ["hip-attn>=1.2.4"]
test = [
"accelerate",
"expecttest",
@@ -113,13 +115,14 @@ test = [
"peft",
"sentence_transformers",
"pytest",
+ "tabulate",
]
-all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
-all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
-all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
+all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]", "sglang[hip-attn]"]
+all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]", "sglang[hip-attn]"]
+all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]", "sglang[hip-attn]"]
+all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]", "sglang[hip-attn]"]
+all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]", "sglang[hip-attn]"]
+all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]", "sglang[hip-attn]"]
dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"]
diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py
index aa43bb027d1..3dcbbbd665a 100644
--- a/python/sglang/bench_one_batch.py
+++ b/python/sglang/bench_one_batch.py
@@ -61,6 +61,7 @@
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
from sglang.srt.entrypoints.engine import _set_envs_and_config
from sglang.srt.hf_transformers_utils import get_tokenizer
+from sglang.srt.layers.moe import initialize_moe_config
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
from sglang.srt.managers.scheduler import Scheduler
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
@@ -266,6 +267,7 @@ def extend(reqs, model_runner):
model_config=model_runner.model_config,
enable_overlap=False,
spec_algorithm=SpeculativeAlgorithm.NONE,
+ hip_attention_config=model_runner.server_args.hip_attention_config,
)
batch.prepare_for_extend()
_maybe_prepare_mlp_sync_batch(batch, model_runner)
@@ -509,6 +511,8 @@ def latency_test(
bench_args,
tp_rank,
):
+ initialize_moe_config(server_args)
+
# Set CPU affinity
if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, tp_rank)
diff --git a/python/sglang/bench_one_batch_server.py b/python/sglang/bench_one_batch_server.py
index d925ae8ceea..8495c110e35 100644
--- a/python/sglang/bench_one_batch_server.py
+++ b/python/sglang/bench_one_batch_server.py
@@ -18,7 +18,7 @@
import multiprocessing
import os
import time
-from typing import Tuple
+from typing import List, Tuple
import requests
@@ -26,7 +26,7 @@
from sglang.profiler import run_profile
from sglang.srt.entrypoints.http_server import launch_server
from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import kill_process_tree
+from sglang.srt.utils import is_blackwell, kill_process_tree
from sglang.test.test_utils import is_in_ci, write_github_step_summary
@@ -45,7 +45,9 @@ class BenchArgs:
skip_warmup: bool = False
show_report: bool = False
profile: bool = False
+ profile_steps: int = 3
profile_by_stage: bool = False
+ dataset_path: str = ""
@staticmethod
def add_cli_args(parser: argparse.ArgumentParser):
@@ -78,7 +80,16 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument("--skip-warmup", action="store_true")
parser.add_argument("--show-report", action="store_true")
parser.add_argument("--profile", action="store_true")
+ parser.add_argument(
+ "--profile-steps", type=int, default=BenchArgs.profile_steps
+ )
parser.add_argument("--profile-by-stage", action="store_true")
+ parser.add_argument(
+ "--dataset-path",
+ type=str,
+ default=BenchArgs.dataset_path,
+ help="Path to the dataset.",
+ )
@classmethod
def from_cli_args(cls, args: argparse.Namespace):
@@ -132,7 +143,9 @@ def run_one_case(
result_filename: str,
tokenizer,
profile: bool = False,
+ profile_steps: int = 3,
profile_by_stage: bool = False,
+ dataset_path: str = "",
):
requests.post(url + "/flush_cache")
input_requests = sample_random_requests(
@@ -141,7 +154,7 @@ def run_one_case(
num_prompts=batch_size,
range_ratio=1.0,
tokenizer=tokenizer,
- dataset_path="",
+ dataset_path=dataset_path,
random_sample=True,
return_text=False,
)
@@ -162,7 +175,7 @@ def run_one_case(
profile_link = None
if profile:
profile_link: str = run_profile(
- url, 3, ["CPU", "GPU"], None, None, profile_by_stage
+ url, profile_steps, ["CPU", "GPU"], None, None, profile_by_stage
)
tic = time.perf_counter()
@@ -247,6 +260,71 @@ def run_one_case(
)
+def get_report_summary(
+ result: List[Tuple], server_args: ServerArgs, bench_args: BenchArgs
+):
+ import tabulate
+
+ summary = (
+ f"\nInput lens: {bench_args.input_len}. Output lens: {bench_args.output_len}.\n"
+ )
+
+ headers = [
+ "batch size",
+ "latency (s)",
+ "input throughput (tok/s)",
+ "output throughput (tok/s)",
+ "acc length",
+ "ITL (ms)",
+ "input cost ($/1M)",
+ "output cost ($/1M)",
+ ]
+ if bench_args.profile:
+ headers.append("profile")
+ rows = []
+
+ for (
+ batch_size,
+ latency,
+ ttft,
+ input_throughput,
+ output_throughput,
+ _,
+ _,
+ acc_length,
+ trace_link,
+ ) in result:
+ if is_blackwell():
+ hourly_cost_per_gpu = 4 # $4/hour for one B200
+ else:
+ hourly_cost_per_gpu = 2 # $2/hour for one H100
+
+ hourly_cost = hourly_cost_per_gpu * server_args.tp_size
+ input_util = 0.7
+ accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
+ itl = 1 / (output_throughput / batch_size) * 1000
+ input_cost = 1e6 / (input_throughput * input_util) / 3600 * hourly_cost
+ output_cost = 1e6 / output_throughput / 3600 * hourly_cost
+ row = [
+ batch_size,
+ latency,
+ input_throughput,
+ output_throughput,
+ accept_length,
+ itl,
+ input_cost,
+ output_cost,
+ ]
+ if trace_link:
+ row.append(f"[Profile]({trace_link})")
+ rows.append(row)
+
+ summary += tabulate.tabulate(
+ rows, headers=headers, tablefmt="github", floatfmt=".2f"
+ )
+ return summary
+
+
def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
if bench_args.base_url:
proc, base_url = None, bench_args.base_url
@@ -275,6 +353,7 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
run_name="",
result_filename="",
tokenizer=tokenizer,
+ dataset_path=bench_args.dataset_path,
)
print("=" * 8 + " Warmup End " + "=" * 8 + "\n")
@@ -321,6 +400,7 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
result_filename=bench_args.result_filename,
tokenizer=tokenizer,
profile=bench_args.profile,
+ profile_steps=bench_args.profile_steps,
profile_by_stage=bench_args.profile_by_stage,
)[-1],
)
@@ -337,58 +417,14 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
if not bench_args.show_report:
return
- summary = (
- f"\nInput lens: {bench_args.input_len}. Output lens: {bench_args.output_len}.\n"
- )
- summary += "| batch size | latency (s) | input throughput (tok/s) | output throughput (tok/s) | acc length | ITL (ms) | input cost ($/1M) | output cost ($/1M) |"
-
- if bench_args.profile:
- summary += " profile |"
-
- summary += "\n"
- summary += "| ---------- | ----------- | ------------------------- | ------------------------- | ---------- | -------- | ----------------- | ------------------ |"
-
- if bench_args.profile:
- summary += "-------------|"
- summary += "\n"
-
- for (
- batch_size,
- latency,
- ttft,
- input_throughput,
- output_throughput,
- overall_throughput,
- last_gen_throughput,
- acc_length,
- trace_link,
- ) in result:
- hourly_cost = 2 * server_args.tp_size # $2/hour for one H100
- input_util = 0.7
- accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
- line = (
- f"| {batch_size} | "
- f"{latency:.2f} | "
- f"{input_throughput:.2f} | "
- f"{output_throughput:.2f} | "
- f"{accept_length} | "
- f"{1 / (output_throughput/batch_size) * 1000:.2f} | "
- f"{1e6 / (input_throughput * input_util) / 3600 * hourly_cost:.2f} | "
- f"{1e6 / output_throughput / 3600 * hourly_cost:.2f} |"
- )
- if trace_link:
- line += f" [Profile]({trace_link}) |"
- line += "\n"
- summary += line
-
- # print metrics table
+ summary = get_report_summary(result, server_args, bench_args)
print(summary)
if is_in_ci():
write_github_step_summary(summary)
-if __name__ == "__main__":
+def main():
parser = argparse.ArgumentParser()
ServerArgs.add_cli_args(parser)
BenchArgs.add_cli_args(parser)
@@ -397,3 +433,7 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
bench_args = BenchArgs.from_cli_args(args)
run_benchmark(server_args, bench_args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py
index 4ea7e22cb13..27ff8a6dabf 100644
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -12,6 +12,8 @@
import argparse
import asyncio
+import base64
+import io
import json
import os
import pickle
@@ -71,8 +73,9 @@ class RequestFuncInput:
output_len: int
model: str
lora_name: str
- image_data: str
+ image_data: Optional[List[str]]
extra_request_body: Dict[str, Any]
+ timestamp: Optional[float] = None
@dataclass
@@ -289,16 +292,19 @@ async def async_request_openai_chat_completions(
), "OpenAI Chat Completions API URL must end with 'chat/completions'."
if request_func_input.image_data:
+ # Build multi-image content: a list of image_url entries followed by the text
+ content_items = [
+ {
+ "type": "image_url",
+ "image_url": {"url": img_url},
+ }
+ for img_url in request_func_input.image_data
+ ]
+ content_items.append({"type": "text", "text": request_func_input.prompt})
messages = [
{
"role": "user",
- "content": [
- {
- "type": "image_url",
- "image_url": {"url": request_func_input.image_data},
- },
- {"type": "text", "text": request_func_input.prompt},
- ],
+ "content": content_items,
},
]
else:
@@ -497,7 +503,7 @@ async def async_request_sglang_generate(
**request_func_input.extra_request_body,
}
- # Add image data if available
+ # Add image data if available (list of image urls/base64)
if request_func_input.image_data:
payload["image_data"] = request_func_input.image_data
@@ -648,7 +654,7 @@ def get_dataset(args, tokenizer):
prompt_suffix=args.prompt_suffix,
apply_chat_template=args.apply_chat_template,
)
- elif args.dataset_name.startswith("random"):
+ elif args.dataset_name.startswith("random") and args.dataset_name != "random-image":
input_requests = sample_random_requests(
input_len=args.random_input_len,
output_len=args.random_output_len,
@@ -659,6 +665,18 @@ def get_dataset(args, tokenizer):
random_sample=args.dataset_name == "random",
return_text=not tokenize_prompt,
)
+ elif args.dataset_name == "random-image":
+ assert not tokenize_prompt, "random-image does not support --tokenize-prompt"
+ input_requests = sample_random_image_requests(
+ num_requests=args.num_prompts,
+ num_images=args.random_image_num_images,
+ input_len=args.random_input_len,
+ output_len=args.random_output_len,
+ range_ratio=args.random_range_ratio,
+ tokenizer=tokenizer,
+ apply_chat_template=args.apply_chat_template,
+ image_resolution=args.random_image_resolution,
+ )
elif args.dataset_name == "generated-shared-prefix":
assert not tokenize_prompt
input_requests = sample_generated_shared_prefix_requests(
@@ -679,6 +697,24 @@ def get_dataset(args, tokenizer):
apply_chat_template=args.apply_chat_template,
random_sample=True,
)
+ elif args.dataset_name == "mooncake":
+ # For mooncake, we don't generate the prompts here.
+ # We just load the raw trace data. The async generator will handle the rest.
+ if not args.dataset_path:
+ local_path = os.path.join("/tmp", args.mooncake_workload + "_trace.jsonl")
+ else:
+ local_path = args.dataset_path
+
+ if not os.path.exists(local_path):
+ download_and_cache_file(
+ MOONCAKE_DATASET_URL[args.mooncake_workload], local_path
+ )
+
+ with open(local_path, "r") as f:
+ all_requests_data = [json.loads(line) for line in f if line.strip()]
+
+ # Limit the number of requests based on --num-prompts
+ input_requests = all_requests_data[: args.num_prompts]
else:
raise ValueError(f"Unknown dataset: {args.dataset_name}")
return input_requests
@@ -733,6 +769,12 @@ class BenchmarkMetrics:
SHAREGPT_URL = "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"
+MOONCAKE_DATASET_URL = {
+ "mooncake": "https://raw.githubusercontent.com/kvcache-ai/Mooncake/main/FAST25-release/arxiv-trace/mooncake_trace.jsonl",
+ "conversation": "https://raw.githubusercontent.com/kvcache-ai/Mooncake/main/FAST25-release/traces/conversation_trace.jsonl",
+ "synthetic": "https://raw.githubusercontent.com/kvcache-ai/Mooncake/main/FAST25-release/traces/synthetic_trace.jsonl",
+ "toolagent": "https://raw.githubusercontent.com/kvcache-ai/Mooncake/main/FAST25-release/traces/toolagent_trace.jsonl",
+}
def download_and_cache_file(url: str, filename: Optional[str] = None):
@@ -790,7 +832,81 @@ class DatasetRow:
prompt: str
prompt_len: int
output_len: int
- image_data: Optional[str] = None
+ image_data: Optional[List[str]] = None
+ timestamp: Optional[float] = None
+
+
+async def get_mooncake_request_over_time(
+ input_requests: List[Dict],
+ tokenizer: PreTrainedTokenizerBase,
+ slowdown_factor: float,
+ num_rounds: int,
+) -> AsyncGenerator[DatasetRow, None]:
+ """
+ An async generator that yields requests based on the timestamps in the Mooncake trace file,
+ with support for multi-round sessions.
+ """
+ if not input_requests:
+ return
+
+ input_requests.sort(key=lambda r: r["timestamp"])
+
+ start_time = time.perf_counter()
+ trace_start_time_ms = input_requests[0]["timestamp"]
+
+ for record in input_requests:
+ # Calculate when this entire session should start
+ relative_arrival_time_s = (record["timestamp"] - trace_start_time_ms) / 1000.0
+ target_arrival_time_s = relative_arrival_time_s * slowdown_factor
+
+ current_elapsed_time_s = time.perf_counter() - start_time
+ sleep_duration_s = target_arrival_time_s - current_elapsed_time_s
+ if sleep_duration_s > 0:
+ await asyncio.sleep(sleep_duration_s)
+
+ # Once the session starts, generate all rounds for it as a burst
+ # This simulates a user engaging in a multi-turn conversation
+
+ # Base user query constructed from hash_ids
+ user_query_base = ""
+ hash_ids = record.get("hash_ids", [])
+ for hash_id in hash_ids:
+ user_query_base += f"{hash_id}" + " ".join(
+ ["hi"] * 128
+ ) # Shorter for multi-round
+ user_query_base += "Tell me a story based on this context."
+
+ output_len_per_round = record.get("output_length", 256)
+ chat_history = []
+
+ for i in range(num_rounds):
+ # Add user query for the current round
+ chat_history.append(
+ {"role": "user", "content": f"Round {i+1}: {user_query_base}"}
+ )
+
+ # Form the full prompt from history
+ try:
+ full_prompt_text = tokenizer.apply_chat_template(
+ chat_history, tokenize=False, add_generation_prompt=True
+ )
+ except Exception:
+ full_prompt_text = "\n".join(
+ [f"{msg['role']}: {msg['content']}" for msg in chat_history]
+ )
+
+ prompt_len = len(tokenizer.encode(full_prompt_text))
+
+ yield DatasetRow(
+ prompt=full_prompt_text,
+ prompt_len=prompt_len,
+ output_len=output_len_per_round,
+ )
+
+ # Add a placeholder assistant response for the next round's context
+ # We use a placeholder because we don't know the real response
+ placeholder_response = " ".join(["story"] * output_len_per_round)
+ chat_history.append({"role": "assistant", "content": placeholder_response})
def sample_mmmu_requests(
@@ -879,17 +995,25 @@ def sample_mmmu_requests(
prompt = f"Question: {question}\n\nAnswer: "
if apply_chat_template:
try:
+ is_phi4_multimodal = (
+ "phi-4-multimodal" in tokenizer.name_or_path.lower()
+ )
+ if is_phi4_multimodal:
+ # <|endoftext10|> is the image token used in the phi-4-multimodal model.
+ content = prompt.replace("image 1", "<|endoftext10|>")
+ else:
+ content = [
+ {
+ "type": "image_url",
+ "image_url": {"url": image_data},
+ },
+ {"type": "text", "text": prompt},
+ ]
prompt = tokenizer.apply_chat_template(
[
{
"role": "user",
- "content": [
- {
- "type": "image_url",
- "image_url": {"url": image_data},
- },
- {"type": "text", "text": prompt},
- ],
+ "content": content,
}
],
add_generation_prompt=True,
@@ -913,7 +1037,7 @@ def sample_mmmu_requests(
prompt=prompt,
prompt_len=prompt_len,
output_len=output_len,
- image_data=image_data,
+ image_data=[image_data],
)
)
@@ -1113,6 +1237,132 @@ def sample_random_requests(
return input_requests
+def parse_random_image_resolution(image_resolution: str) -> Tuple[int, int]:
+ """Parse image resolution into (width, height).
+
+ Supports presets '1080p', '720p', '360p' and custom 'heightxwidth' format
+ (e.g., '1080x1920' means height=1080, width=1920).
+ """
+ resolution_to_size = {
+ "4k": (3840, 2160),
+ "1080p": (1920, 1080),
+ "720p": (1280, 720),
+ "360p": (640, 360),
+ }
+ if image_resolution in resolution_to_size:
+ return resolution_to_size[image_resolution]
+
+ res = image_resolution.strip().lower()
+ if "x" in res:
+ parts = res.split("x")
+ if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
+ height = int(parts[0])
+ width = int(parts[1])
+ if height > 0 and width > 0:
+ return (width, height)
+
+ raise ValueError(
+ f"Unsupported random-image resolution: {image_resolution}. "
+ "Choose from 4k, 1080p, 720p, 360p, or provide custom 'heightxwidth' (e.g., 1080x1920)."
+ )
+
+
+def sample_random_image_requests(
+ num_requests: int,
+ num_images: int,
+ input_len: int,
+ output_len: int,
+ range_ratio: float,
+ tokenizer: PreTrainedTokenizerBase,
+ apply_chat_template: bool = True,
+ image_resolution: str = "1080p",
+) -> List[DatasetRow]:
+ """Generate requests with random images.
+
+ - Each request includes ``num_images`` random images.
+ - Supported resolutions: 4k (3840x2160), 1080p (1920x1080), 720p (1280x720), 360p (640x360),
+ or custom 'heightxwidth' (e.g., 1080x1920).
+ - Text lengths follow the 'random' dataset sampling rule. ``prompt_len``
+ only counts text tokens and excludes image data.
+ """
+ try:
+ import pybase64
+ from PIL import Image
+ except ImportError as e:
+ raise ImportError(
+ "Please install Pillow to generate random images: pip install pillow"
+ ) from e
+
+ # Parse resolution (supports presets and 'heightxwidth')
+ width, height = parse_random_image_resolution(image_resolution)
+
+ # Check for potentially problematic combinations and warn user
+ if width * height >= 1920 * 1080 and num_images * num_requests >= 100:
+ warnings.warn(
+ f"High resolution ({width}x{height}) with {num_images * num_requests} total images "
+ f"may take a long time. Consider reducing resolution or image count.",
+ UserWarning,
+ stacklevel=2,
+ )
+
+ # Sample text lengths
+ input_lens = np.random.randint(
+ max(int(input_len * range_ratio), 1), input_len + 1, size=num_requests
+ )
+ output_lens = np.random.randint(
+ int(output_len * range_ratio), output_len + 1, size=num_requests
+ )
+
+ def _gen_random_image_data_uri(width: int = width, height: int = height) -> str:
+ arr = (np.random.rand(height, width, 3) * 255).astype(np.uint8)
+ img = Image.fromarray(arr, mode="RGB")
+ buf = io.BytesIO()
+ img.save(buf, format="JPEG", quality=85)
+ encoded = pybase64.b64encode(buf.getvalue()).decode("utf-8")
+ return f"data:image/jpeg;base64,{encoded}"
+
+ dataset: List[DatasetRow] = []
+ for i in range(num_requests):
+ # Generate text prompt
+ text_prompt = gen_prompt(tokenizer, int(input_lens[i]))
+
+ # Generate image list
+ images = [_gen_random_image_data_uri() for _ in range(num_images)]
+
+ prompt_str = text_prompt
+ if apply_chat_template:
+ try:
+ content_items = [
+ {"type": "image_url", "image_url": {"url": img_url}}
+ for img_url in images
+ ]
+ content_items.append({"type": "text", "text": text_prompt})
+ prompt_str = tokenizer.apply_chat_template(
+ [{"role": "user", "content": content_items}],
+ add_generation_prompt=True,
+ tokenize=False,
+ )
+ except Exception:
+ # Some tokenizers do not support list content; fall back to a placeholder in the text
+ prompt_str = f"{text_prompt}"
+
+ prompt_token_ids = tokenizer.encode(prompt_str)
+ prompt_token_len = len(prompt_token_ids)
+
+ dataset.append(
+ DatasetRow(
+ prompt=prompt_str,
+ prompt_len=prompt_token_len,
+ output_len=int(output_lens[i]),
+ image_data=images,
+ )
+ )
+
+ print(f"#Input tokens: {np.sum([x.prompt_len for x in dataset])}")
+ print(f"#Output tokens: {np.sum([x.output_len for x in dataset])}")
+ return dataset
+
+
def gen_prompt(tokenizer, token_num):
"""Generate a random prompt of specified token length using tokenizer vocabulary."""
all_available_tokens = list(tokenizer.get_vocab().values())
@@ -1216,19 +1466,41 @@ def sample_generated_shared_prefix_requests(
async def get_request(
input_requests: List[DatasetRow],
request_rate: float,
+ use_trace_timestamps: bool = False,
+ slowdown_factor: float = 1.0,
) -> AsyncGenerator[DatasetRow, None]:
- input_requests = iter(input_requests)
- for request in input_requests:
- yield request
+ if use_trace_timestamps:
+ print(
+ f"Using trace timestamps for request generation with slowdown factor {slowdown_factor}."
+ )
+ # Sort requests by timestamp for correct replay
+ input_requests.sort(key=lambda r: r.timestamp)
- if request_rate == float("inf"):
- # If the request rate is infinity, then we don't need to wait.
- continue
+ start_time = time.perf_counter()
+ trace_start_time_ms = input_requests[0].timestamp if input_requests else 0
+
+ for request in input_requests:
+ trace_time_s = (request.timestamp - trace_start_time_ms) / 1000.0
+ target_arrival_time = start_time + (trace_time_s * slowdown_factor)
+
+ sleep_duration = target_arrival_time - time.perf_counter()
+ if sleep_duration > 0:
+ await asyncio.sleep(sleep_duration)
+
+ yield request
+ else:
+ input_requests_iter = iter(input_requests)
+ for request in input_requests_iter:
+ yield request
+
+ if request_rate == float("inf"):
+ # If the request rate is infinity, then we don't need to wait.
+ continue
- # Sample the request interval from the exponential distribution.
- interval = np.random.exponential(1.0 / request_rate)
- # The next request will be sent after the interval.
- await asyncio.sleep(interval)
+ # Sample the request interval from the exponential distribution.
+ interval = np.random.exponential(1.0 / request_rate)
+ # The next request will be sent after the interval.
+ await asyncio.sleep(interval)
def calculate_metrics(
@@ -1254,7 +1526,7 @@ def calculate_metrics(
tokenizer.encode(outputs[i].generated_text, add_special_tokens=False)
)
retokenized_output_lens.append(retokenized_output_len)
- total_input += input_requests[i].prompt_len
+ total_input += outputs[i].prompt_len
if output_len > 1:
tpots.append((outputs[i].latency - outputs[i].ttft) / (output_len - 1))
itls += outputs[i].itl
@@ -1326,6 +1598,9 @@ async def benchmark(
pd_separated: bool = False,
flush_cache: bool = False,
warmup_requests: int = 1,
+ use_trace_timestamps: bool = False,
+ mooncake_slowdown_factor=1.0,
+ mooncake_num_rounds=1,
):
if backend in ASYNC_REQUEST_FUNCS:
request_func = ASYNC_REQUEST_FUNCS[backend]
@@ -1345,8 +1620,32 @@ async def limited_request_func(request_func_input, pbar):
# Warmup
print(f"Starting warmup with {warmup_requests} sequences...")
- # Use the first request for all warmup iterations
- test_request = input_requests[0]
+ # Handle the data structure difference for the warmup request
+ if args.dataset_name == "mooncake":
+ # For mooncake, input_requests is a list of dicts.
+ # We need to build a temporary DatasetRow for the warmup phase.
+ warmup_record = input_requests[0]
+
+ # Build prompt from hash_ids, just like in the async generator
+ hash_ids = warmup_record.get("hash_ids", [])
+ prompt_text = ""
+ for hash_id in hash_ids:
+ prompt_text += f"{hash_id}" + " ".join(["hi"] * 512)
+ prompt_text += "Can you tell me a detailed story in 1000 words?"
+
+ output_len = warmup_record.get("output_length", 32)
+ prompt_len = len(tokenizer.encode(prompt_text))
+
+ # Create a temporary DatasetRow object for warmup
+ test_request = DatasetRow(
+ prompt=prompt_text,
+ prompt_len=prompt_len,
+ output_len=output_len,
+ image_data=None, # Mooncake doesn't have image data
+ )
+ else:
+ # For all other datasets, input_requests is a list of DatasetRow objects
+ test_request = input_requests[0]
if lora_names is not None and len(lora_names) != 0:
lora_name = lora_names[0]
@@ -1400,12 +1699,26 @@ async def limited_request_func(request_func_input, pbar):
if profile_output.success:
print("Profiler started")
- pbar = None if disable_tqdm else tqdm(total=len(input_requests))
-
# Run all requests
benchmark_start_time = time.perf_counter()
tasks: List[asyncio.Task] = []
- async for request in get_request(input_requests, request_rate):
+ pbar_total = len(input_requests)
+ if (
+ backend == "sglang" and args.dataset_name == "mooncake"
+ ): # Assuming mooncake is mainly for sglang or similar backends
+ print("Using time-based Mooncake request scheduler, ignoring --request-rate.")
+ request_generator = get_mooncake_request_over_time(
+ input_requests, tokenizer, mooncake_slowdown_factor, mooncake_num_rounds
+ )
+ print(
+ f"Starting Mooncake trace replay. Sessions: {len(input_requests)}, Rounds per session: {mooncake_num_rounds}. Slowdown factor: {mooncake_slowdown_factor}"
+ )
+ pbar_total *= args.mooncake_num_rounds
+ else:
+ request_generator = get_request(input_requests, request_rate)
+
+ pbar = None if disable_tqdm else tqdm(total=pbar_total)
+ async for request in request_generator:
if lora_names is not None and len(lora_names) != 0:
idx = random.randint(0, len(lora_names) - 1)
lora_name = lora_names[idx]
@@ -1421,6 +1734,7 @@ async def limited_request_func(request_func_input, pbar):
lora_name=lora_name,
image_data=request.image_data,
extra_request_body=extra_request_body,
+ timestamp=request.timestamp,
)
tasks.append(
@@ -1466,7 +1780,11 @@ async def limited_request_func(request_func_input, pbar):
print("\n{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
print("{:<40} {:<10}".format("Backend:", backend))
- print("{:<40} {:<10}".format("Traffic request rate:", request_rate))
+ print(
+ "{:<40} {:<10}".format(
+ "Traffic request rate:", "trace" if use_trace_timestamps else request_rate
+ )
+ )
print(
"{:<40} {:<10}".format(
"Max request concurrency:",
@@ -1535,7 +1853,7 @@ async def limited_request_func(request_func_input, pbar):
# Arguments
"backend": args.backend,
"dataset_name": args.dataset_name,
- "request_rate": request_rate,
+ "request_rate": "trace" if use_trace_timestamps else request_rate,
"max_concurrency": max_concurrency,
"sharegpt_output_len": args.sharegpt_output_len,
"random_input_len": args.random_input_len,
@@ -1579,10 +1897,18 @@ async def limited_request_func(request_func_input, pbar):
output_file_name = args.output_file
else:
now = datetime.now().strftime("%m%d")
- if args.dataset_name.startswith("random"):
+ if args.dataset_name == "random-image":
+ output_file_name = (
+ f"{args.backend}_{now}_{args.num_prompts}_{args.random_input_len}_"
+ f"{args.random_output_len}_{args.random_image_num_images}imgs_"
+ f"{args.random_image_resolution}.jsonl"
+ )
+ elif args.dataset_name.startswith("random"):
output_file_name = f"{args.backend}_{now}_{args.num_prompts}_{args.random_input_len}_{args.random_output_len}.jsonl"
else:
- output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl"
+ output_file_name = (
+ f"{args.backend}_{now}_{args.num_prompts}_{args.dataset_name}.jsonl"
+ )
result_details = {
"input_lens": [output.prompt_len for output in outputs],
@@ -1637,6 +1963,17 @@ def run_benchmark(args_: argparse.Namespace):
if not hasattr(args, "tokenize_prompt"):
args.tokenize_prompt = False
+ if not hasattr(args, "use_trace_timestamps"):
+ args.use_trace_timestamps = False
+ if not hasattr(args, "mooncake_slowdown_factor"):
+ args.mooncake_slowdown_factor = 1.0
+
+ if not hasattr(args, "mooncake_slowdown_factor"):
+ args.mooncake_slowdown_factor = 1.0
+
+ if not hasattr(args, "mooncake_num_rounds"):
+ args.mooncake_num_rounds = 1
+
print(f"benchmark_args={args}")
# Set global environments
@@ -1770,6 +2107,9 @@ def run_benchmark(args_: argparse.Namespace):
pd_separated=args.pd_separated,
flush_cache=args.flush_cache,
warmup_requests=args.warmup_requests,
+ use_trace_timestamps=args.use_trace_timestamps,
+ mooncake_slowdown_factor=args.mooncake_slowdown_factor,
+ mooncake_num_rounds=args.mooncake_num_rounds,
)
)
@@ -1819,7 +2159,15 @@ def __call__(self, parser, namespace, values, option_string=None):
"--dataset-name",
type=str,
default="sharegpt",
- choices=["sharegpt", "random", "random-ids", "generated-shared-prefix", "mmmu"],
+ choices=[
+ "sharegpt",
+ "random",
+ "random-ids",
+ "generated-shared-prefix",
+ "mmmu",
+ "random-image",
+ "mooncake",
+ ],
help="Name of the dataset to benchmark on.",
)
parser.add_argument(
@@ -1872,6 +2220,22 @@ def __call__(self, parser, namespace, values, option_string=None):
help="Range of sampled ratio of input/output length, "
"used only for random dataset.",
)
+ # random-image dataset args
+ parser.add_argument(
+ "--random-image-num-images",
+ type=int,
+ default=1,
+ help="Number of images per request (only available with the random-image dataset)",
+ )
+ parser.add_argument(
+ "--random-image-resolution",
+ type=str,
+ default="1080p",
+ help=(
+ "Resolution of random images for random-image dataset. "
+ "Supports presets 4k/1080p/720p/360p or custom 'heightxwidth' (e.g., 1080x1920)."
+ ),
+ )
parser.add_argument(
"--request-rate",
type=float,
@@ -1879,6 +2243,11 @@ def __call__(self, parser, namespace, values, option_string=None):
help="Number of requests per second. If this is inf, then all the requests are sent at time 0. "
"Otherwise, we use Poisson process to synthesize the request arrival times. Default is inf.",
)
+ parser.add_argument(
+ "--use-trace-timestamps",
+ action="store_true",
+ help="Use timestamps from the trace file for request scheduling. Only valid for 'mooncake' dataset.",
+ )
parser.add_argument(
"--max-concurrency",
type=int,
@@ -2002,5 +2371,33 @@ def __call__(self, parser, namespace, values, option_string=None):
default=256,
help="Target length in tokens for outputs in generated-shared-prefix dataset",
)
+ mooncake_group = parser.add_argument_group("mooncake dataset arguments")
+ mooncake_group.add_argument(
+ "--mooncake-slowdown-factor",
+ type=float,
+ default=1.0,
+ help="Slowdown factor for replaying the mooncake trace. "
+ "A value of 2.0 means the replay is twice as slow. "
+ "NOTE: --request-rate is IGNORED in mooncake mode.",
+ )
+ mooncake_group.add_argument(
+ "--mooncake-num-rounds",
+ type=int,
+ default=1,
+ help="Number of conversation rounds for each session in the mooncake dataset. "
+ "A value > 1 will enable true multi-turn session benchmarking.",
+ )
+ mooncake_group.add_argument(
+ "--mooncake-workload",
+ type=str,
+ default="conversation",
+ choices=[
+ "mooncake",
+ "conversation",
+ "synthetic",
+ "toolagent",
+ ],
+ help="Underlying workload for the mooncake dataset.",
+ )
args = parser.parse_args()
run_benchmark(args)
diff --git a/python/sglang/eval/llama3_eval.py b/python/sglang/eval/llama3_eval.py
index 35bd4a7e4d4..253cdf27531 100644
--- a/python/sglang/eval/llama3_eval.py
+++ b/python/sglang/eval/llama3_eval.py
@@ -12,7 +12,6 @@
import httpx
import numpy as np
import openai
-import transformers
from datasets import load_dataset
from openai import AsyncOpenAI
from tqdm import tqdm
diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py
index ab3457cbf34..8b8cdf9c530 100644
--- a/python/sglang/lang/interpreter.py
+++ b/python/sglang/lang/interpreter.py
@@ -740,7 +740,7 @@ def _execute_separate_reasoning(self, expr: SglSeparateReasoning):
# Execute the stored lazy generation calls
self.backend.role_end_generate(self)
- from sglang.srt.reasoning_parser import ReasoningParser
+ from sglang.srt.parser.reasoning_parser import ReasoningParser
reasoning_parser = ReasoningParser(expr.model_type)
other = expr.expr
diff --git a/python/sglang/profiler.py b/python/sglang/profiler.py
index 3503ae7fc85..d872ca32080 100644
--- a/python/sglang/profiler.py
+++ b/python/sglang/profiler.py
@@ -9,7 +9,6 @@
import json
import os
import time
-import urllib.parse
from argparse import ArgumentParser
from pathlib import Path
from typing import List, Optional
diff --git a/python/sglang/srt/configs/__init__.py b/python/sglang/srt/configs/__init__.py
index 9c300857263..ef880c911de 100644
--- a/python/sglang/srt/configs/__init__.py
+++ b/python/sglang/srt/configs/__init__.py
@@ -5,6 +5,8 @@
from sglang.srt.configs.janus_pro import MultiModalityConfig
from sglang.srt.configs.kimi_vl import KimiVLConfig
from sglang.srt.configs.kimi_vl_moonvit import MoonViTConfig
+from sglang.srt.configs.longcat_flash import LongcatFlashConfig
+from sglang.srt.configs.qwen3_next import Qwen3NextConfig
from sglang.srt.configs.step3_vl import (
Step3TextConfig,
Step3VisionEncoderConfig,
@@ -16,10 +18,12 @@
"ChatGLMConfig",
"DbrxConfig",
"DeepseekVL2Config",
+ "LongcatFlashConfig",
"MultiModalityConfig",
"KimiVLConfig",
"MoonViTConfig",
"Step3VLConfig",
"Step3TextConfig",
"Step3VisionEncoderConfig",
+ "Qwen3NextConfig",
]
diff --git a/python/sglang/srt/configs/internvl.py b/python/sglang/srt/configs/internvl.py
index 7033ef35958..3ba9c61c10e 100644
--- a/python/sglang/srt/configs/internvl.py
+++ b/python/sglang/srt/configs/internvl.py
@@ -6,11 +6,13 @@
import sentencepiece as spm
from transformers import (
TOKENIZER_MAPPING,
+ GptOssConfig,
LlamaConfig,
PretrainedConfig,
PreTrainedTokenizer,
Qwen2Config,
Qwen3Config,
+ Qwen3MoeConfig,
)
from sglang.utils import logger
@@ -316,7 +318,11 @@ def __init__(
elif llm_config.get("architectures")[0] == "Qwen2ForCausalLM":
self.llm_config = Qwen2Config(**llm_config)
elif llm_config.get("architectures")[0] == "Qwen3MoeForCausalLM":
+ self.llm_config = Qwen3MoeConfig(**llm_config)
+ elif llm_config.get("architectures")[0] == "Qwen3ForCausalLM":
self.llm_config = Qwen3Config(**llm_config)
+ elif llm_config.get("architectures")[0] == "GptOssForCausalLM":
+ self.llm_config = GptOssConfig(**llm_config)
else:
raise ValueError(
"Unsupported architecture: {}".format(
diff --git a/python/sglang/srt/configs/longcat_flash.py b/python/sglang/srt/configs/longcat_flash.py
new file mode 100644
index 00000000000..e6a2dfb026c
--- /dev/null
+++ b/python/sglang/srt/configs/longcat_flash.py
@@ -0,0 +1,104 @@
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+FLASH_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+
+
+class LongcatFlashConfig(PretrainedConfig):
+ model_type = "longcat_flash"
+ keys_to_ignore_at_inference = ["past_key_values"]
+
+ def __init__(
+ self,
+ vocab_size=131072,
+ hidden_size=6144,
+ intermediate_size=None,
+ ffn_hidden_size=12288,
+ expert_ffn_hidden_size=2048,
+ num_layers=28,
+ num_hidden_layers=None,
+ num_attention_heads=64,
+ ep_size=1,
+ kv_lora_rank=512,
+ q_lora_rank=1536,
+ qk_rope_head_dim=128,
+ qk_nope_head_dim=128,
+ v_head_dim=128,
+ n_routed_experts=512,
+ moe_topk=12,
+ norm_topk_prob=False,
+ max_position_embeddings=131072,
+ rms_norm_eps=1e-05,
+ use_cache=True,
+ pad_token_id=None,
+ bos_token_id=1,
+ eos_token_id=2,
+ pretraining_tp=1,
+ tie_word_embeddings=False,
+ rope_theta=10000000.0,
+ rope_scaling=None,
+ attention_bias=False,
+ attention_dropout=0.0,
+ mla_scale_q_lora=True,
+ mla_scale_kv_lora=True,
+ torch_dtype="bfloat16",
+ params_dtype="bfloat16",
+ rounter_params_dtype="float32",
+ router_bias=False,
+ topk_method=None,
+ routed_scaling_factor=6.0,
+ zero_expert_num=256,
+ zero_expert_type="identity",
+ nextn_use_scmoe=False,
+ num_nextn_predict_layers=1,
+ **kwargs,
+ ):
+ super().__init__(
+ pad_token_id=pad_token_id,
+ bos_token_id=bos_token_id,
+ eos_token_id=eos_token_id,
+ tie_word_embeddings=tie_word_embeddings,
+ torch_dtype=torch_dtype,
+ params_dtype=params_dtype,
+ rounter_params_dtype=rounter_params_dtype,
+ topk_method=topk_method,
+ router_bias=router_bias,
+ nextn_use_scmoe=nextn_use_scmoe,
+ num_nextn_predict_layers=num_nextn_predict_layers,
+ **kwargs,
+ )
+ self.vocab_size = vocab_size
+ self.max_position_embeddings = max_position_embeddings
+ self.hidden_size = hidden_size
+ self.num_hidden_layers = (
+ num_hidden_layers if num_hidden_layers is not None else num_layers
+ )
+ self.intermediate_size = (
+ intermediate_size if intermediate_size is not None else ffn_hidden_size
+ )
+ self.moe_intermediate_size = expert_ffn_hidden_size
+ self.num_attention_heads = num_attention_heads
+ self.ep_size = ep_size
+ self.kv_lora_rank = kv_lora_rank
+ self.q_lora_rank = q_lora_rank
+ self.qk_rope_head_dim = qk_rope_head_dim
+ self.v_head_dim = v_head_dim
+ self.qk_nope_head_dim = qk_nope_head_dim
+ self.n_routed_experts = n_routed_experts
+ self.moe_topk = moe_topk
+ self.norm_topk_prob = norm_topk_prob
+ self.rms_norm_eps = rms_norm_eps
+ self.pretraining_tp = pretraining_tp
+ self.use_cache = use_cache
+ self.rope_theta = rope_theta
+ self.rope_scaling = rope_scaling
+ self.attention_bias = attention_bias
+ self.attention_dropout = attention_dropout
+ self.mla_scale_q_lora = mla_scale_q_lora
+ self.mla_scale_kv_lora = mla_scale_kv_lora
+ self.zero_expert_num = zero_expert_num
+ self.zero_expert_type = zero_expert_type
+ self.routed_scaling_factor = routed_scaling_factor
+ self.hidden_act = "silu"
diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py
index bdb124e5155..758e1762e35 100644
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -32,6 +32,7 @@
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import get_bool_env_var, is_hip
+from sglang.utils import is_in_ci
logger = logging.getLogger(__name__)
@@ -59,6 +60,7 @@ def __init__(
enable_multimodal: Optional[bool] = None,
dtype: str = "auto",
quantization: Optional[str] = None,
+ is_context_extended: Optional[bool] = None,
override_config_file: Optional[str] = None,
is_draft_model: bool = False,
hybrid_kvcache_ratio: Optional[float] = None,
@@ -131,6 +133,13 @@ def __init__(
if is_draft_model and self.hf_config.architectures[0] == "Glm4MoeForCausalLM":
self.hf_config.architectures[0] = "Glm4MoeForCausalLMNextN"
+ if (
+ is_draft_model
+ and self.hf_config.architectures[0] == "LongcatFlashForCausalLM"
+ ):
+ self.hf_config.architectures[0] = "LongcatFlashForCausalLMNextN"
+ self.hf_config.num_hidden_layers = self.hf_config.num_nextn_predict_layers
+
if is_draft_model and self.hf_config.architectures[0] == "MiMoForCausalLM":
self.hf_config.architectures[0] = "MiMoMTP"
if (
@@ -139,6 +148,9 @@ def __init__(
):
self.hf_config.architectures[0] = "Ernie4_5_MoeForCausalLMMTP"
+ if is_draft_model and self.hf_config.architectures[0] == "Qwen3NextForCausalLM":
+ self.hf_config.architectures[0] = "Qwen3NextForCausalLMMTP"
+
# Check model type
self.is_generation = is_generation_model(
self.hf_config.architectures, is_embedding
@@ -166,22 +178,26 @@ def __init__(
derived_context_len = get_context_length(self.hf_text_config)
if context_length is not None:
if context_length > derived_context_len:
- if get_bool_env_var(
- "SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN", default="True"
- ):
- logger.warning(
- f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
- f"This may lead to incorrect model outputs or CUDA errors."
+ reason = "Target model's" if is_draft_model else "User-specified"
+ msg = (
+ f"Warning: {reason} context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
+ f"This may lead to incorrect model outputs or CUDA errors. Note that the derived context_length may differ from max_position_embeddings in the model's config."
+ )
+ if is_context_extended:
+ logger.info(
+ f"Context length is extended from {derived_context_len} to {context_length}."
)
+ elif (
+ get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN")
+ or is_in_ci() # FIXME: fix this special case
+ ):
+ logger.warning(msg)
self.context_len = context_length
else:
raise ValueError(
- f"User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
- f"This may lead to incorrect model outputs or CUDA errors. Note that the derived context_length may differ from max_position_embeddings in the model's config. "
- f"To allow overriding this maximum, set the env var SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1"
+ f"{msg} To allow overriding this maximum, set the env var SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1"
)
- else:
- self.context_len = context_length
+ self.context_len = context_length
else:
self.context_len = derived_context_len
@@ -197,6 +213,8 @@ def __init__(
"DeepseekV2ForCausalLM" in self.hf_config.architectures
or "DeepseekV3ForCausalLM" in self.hf_config.architectures
or "DeepseekV3ForCausalLMNextN" in self.hf_config.architectures
+ or "LongcatFlashForCausalLM" in self.hf_config.architectures
+ or "LongcatFlashForCausalLMNextN" in self.hf_config.architectures
):
self.head_dim = 256
self.attention_arch = AttentionArch.MLA
@@ -268,6 +286,9 @@ def __init__(
self.num_key_value_heads = self.num_attention_heads
self.hidden_size = self.hf_text_config.hidden_size
self.num_hidden_layers = self.hf_text_config.num_hidden_layers
+ self.num_attention_layers = self.num_hidden_layers
+ if "LongcatFlashForCausalLM" in self.hf_config.architectures:
+ self.num_attention_layers = self.num_hidden_layers * 2
self.num_nextn_predict_layers = getattr(
self.hf_text_config, "num_nextn_predict_layers", None
)
@@ -288,11 +309,16 @@ def __init__(
) or getattr(self.hf_config, "image_token_index", None)
@staticmethod
- def from_server_args(server_args: ServerArgs, model_path: str = None, **kwargs):
+ def from_server_args(
+ server_args: ServerArgs,
+ model_path: str = None,
+ model_revision: str = None,
+ **kwargs,
+ ):
return ModelConfig(
model_path=model_path or server_args.model_path,
trust_remote_code=server_args.trust_remote_code,
- revision=server_args.revision,
+ revision=model_revision or server_args.revision,
context_length=server_args.context_length,
model_override_args=server_args.json_model_override_args,
is_embedding=server_args.is_embedding,
@@ -301,6 +327,7 @@ def from_server_args(server_args: ServerArgs, model_path: str = None, **kwargs):
quantization=server_args.quantization,
hybrid_kvcache_ratio=server_args.hybrid_kvcache_ratio,
model_impl=server_args.model_impl,
+ is_context_extended=server_args.enable_hip_attention,
**kwargs,
)
@@ -341,6 +368,19 @@ def get_total_num_kv_heads(self) -> int:
"kv_n_heads",
self.hf_config.num_attention_heads,
)
+ if self.hf_config.model_type in ["nemotron-nas"]:
+ nkvh = {
+ self.hf_config.num_attention_heads // block.attention.n_heads_in_group
+ for block in self.hf_config.block_configs
+ if not block.attention.no_op
+ }
+ if len(nkvh) == 0:
+ raise RuntimeError("Couldn't determine number of kv heads")
+ if len(nkvh) > 1:
+ raise ValueError(
+ "Variable GQA (VGQA) is not yet supported for nemotron-nas in sglang"
+ )
+ return next(iter(nkvh))
attributes = [
# For Falcon:
@@ -378,17 +418,27 @@ def _parse_quant_hf_config(self):
# compressed-tensors uses a "compression_config" key
quant_cfg = getattr(self.hf_config, "compression_config", None)
if quant_cfg is None:
- # check if is modelopt model -- modelopt doesn't have corresponding field
+ # check if is modelopt or mixed-precision model -- Both of them don't have corresponding field
# in hf `config.json` but has a standalone `hf_quant_config.json` in the root directory
# example: https://huggingface.co/nvidia/Llama-3.1-8B-Instruct-FP8/tree/main
+ # example: https://huggingface.co/Barrrrry/DeepSeek-R1-W4AFP8/tree/main
is_local = os.path.exists(self.model_path)
modelopt_quant_config = {"quant_method": "modelopt"}
if not is_local:
- from huggingface_hub import HfApi
+ import huggingface_hub
+
+ try:
+ from huggingface_hub import HfApi
+
+ hf_api = HfApi()
+ if hf_api.file_exists(self.model_path, "hf_quant_config.json"):
+ quant_cfg = modelopt_quant_config
+ except huggingface_hub.errors.OfflineModeIsEnabled:
+ logger.warning(
+ "Offline mode is enabled, skipping hf_quant_config.json check"
+ )
+ pass
- hf_api = HfApi()
- if hf_api.file_exists(self.model_path, "hf_quant_config.json"):
- quant_cfg = modelopt_quant_config
elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")):
quant_config_file = os.path.join(
self.model_path, "hf_quant_config.json"
diff --git a/python/sglang/srt/configs/qwen3_next.py b/python/sglang/srt/configs/qwen3_next.py
new file mode 100644
index 00000000000..099d14d414e
--- /dev/null
+++ b/python/sglang/srt/configs/qwen3_next.py
@@ -0,0 +1,326 @@
+# coding=utf-8
+# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Qwen3Hybrid model configuration"""
+
+import enum
+import os
+
+import numpy as np
+import torch
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_rope_utils import rope_config_validation
+from transformers.utils import logging
+
+from sglang.srt.distributed.utils import divide
+from sglang.srt.layers.dp_attention import get_attention_tp_size
+
+logger = logging.get_logger(__name__)
+
+
+# NOTE: HybridLayerType
+class HybridLayerType(enum.Enum):
+ full_attention = "attention"
+ swa_attention = "swa_attention"
+ linear_attention = "linear_attention"
+ mamba2 = "mamba"
+
+
+class Qwen3NextConfig(PretrainedConfig):
+ r"""
+ This is the configuration class to store the configuration of a [`Qwen3NextModel`]. It is used to instantiate a
+ Qwen3-Next model according to the specified arguments, defining the model architecture.
+ Instantiating a configuration with the defaults will yield a similar configuration to that of
+ Qwen3-Next-80B-A3B-Instruct [Qwen/Qwen3-Next-80B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct).
+
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+ documentation from [`PretrainedConfig`] for more information.
+
+
+ Args:
+ vocab_size (`int`, *optional*, defaults to 151936):
+ Vocabulary size of the model. Defines the number of different tokens that can be represented by the
+ `inputs_ids`.
+ hidden_size (`int`, *optional*, defaults to 2048):
+ Dimension of the hidden representations.
+ intermediate_size (`int`, *optional*, defaults to 5632):
+ Dimension of the MLP representations.
+ num_hidden_layers (`int`, *optional*, defaults to 48):
+ Number of hidden layers in the Transformer encoder.
+ num_attention_heads (`int`, *optional*, defaults to 16):
+ Number of attention heads for each attention layer in the Transformer encoder.
+ num_key_value_heads (`int`, *optional*, defaults to 2):
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+ `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+ by meanpooling all the original heads within that group. For more details checkout [this
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `32`.
+ hidden_act (`str`, *optional*, defaults to `"silu"`):
+ The non-linear activation function in the decoder.
+ max_position_embeddings (`int`, *optional*, defaults to 32768):
+ The maximum sequence length that this model might ever be used with.
+ initializer_range (`float`, *optional*, defaults to 0.02):
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+ rms_norm_eps (`float`, *optional*, defaults to 1e-06):
+ The epsilon used by the rms normalization layers.
+ use_cache (`bool`, *optional*, defaults to `True`):
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
+ relevant if `config.is_decoder=True`.
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+ Whether the model's input and output word embeddings should be tied.
+ rope_theta (`float`, *optional*, defaults to 10000.0):
+ The base period of the RoPE embeddings.
+ rope_scaling (`Dict`, *optional*):
+ Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
+ and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
+ accordingly.
+ Expected contents:
+ `rope_type` (`str`):
+ The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
+ 'llama3'], with 'default' being the original RoPE implementation.
+ `factor` (`float`, *optional*):
+ Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
+ most scaling types, a `factor` of x will enable the model to handle sequences of length x *
+ original maximum pre-trained length.
+ `original_max_position_embeddings` (`int`, *optional*):
+ Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
+ pretraining.
+ `attention_factor` (`float`, *optional*):
+ Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
+ computation. If unspecified, it defaults to value recommended by the implementation, using the
+ `factor` field to infer the suggested value.
+ `beta_fast` (`float`, *optional*):
+ Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
+ ramp function. If unspecified, it defaults to 32.
+ `beta_slow` (`float`, *optional*):
+ Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
+ ramp function. If unspecified, it defaults to 1.
+ `short_factor` (`List[float]`, *optional*):
+ Only used with 'longrope'. The scaling factor to be applied to short contexts (<
+ `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
+ size divided by the number of attention heads divided by 2
+ `long_factor` (`List[float]`, *optional*):
+ Only used with 'longrope'. The scaling factor to be applied to long contexts (<
+ `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
+ size divided by the number of attention heads divided by 2
+ `low_freq_factor` (`float`, *optional*):
+ Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
+ `high_freq_factor` (`float`, *optional*):
+ Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
+ partial_rotary_factor (`float`, *optional*, defaults to 0.25):
+ Percentage of the query and keys which will have rotary embedding.
+ attention_bias (`bool`, *optional*, defaults to `False`):
+ Whether to use a bias in the query, key, value and output projection layers during self-attention.
+ attention_dropout (`float`, *optional*, defaults to 0.0):
+ The dropout ratio for the attention probabilities.
+ head_dim (`int`, *optional*, defaults to 256):
+ Projection weights dimension in multi-head attention.
+ linear_conv_kernel_dim (`int`, *optional*, defaults to 4):
+ Kernel size of the convolution used in linear attention layers.
+ linear_key_head_dim (`int`, *optional*, defaults to 128):
+ Dimension of each key head in linear attention.
+ linear_value_head_dim (`int`, *optional*, defaults to 128):
+ Dimension of each value head in linear attention.
+ linear_num_key_heads (`int`, *optional*, defaults to 16):
+ Number of key heads used in linear attention layers.
+ linear_num_value_heads (`int`, *optional*, defaults to 32):
+ Number of value heads used in linear attention layers.
+ decoder_sparse_step (`int`, *optional*, defaults to 1):
+ The frequency of the MoE layer.
+ moe_intermediate_size (`int`, *optional*, defaults to 512):
+ Intermediate size of the routed expert.
+ shared_expert_intermediate_size (`int`, *optional*, defaults to 512):
+ Intermediate size of the shared expert.
+ num_experts_per_tok (`int`, *optional*, defaults to 10):
+ Number of selected experts.
+ num_experts (`int`, *optional*, defaults to 512):
+ Number of routed experts.
+ norm_topk_prob (`bool`, *optional*, defaults to `True`):
+ Whether to normalize the topk probabilities.
+ output_router_logits (`bool`, *optional*, defaults to `False`):
+ Whether or not the router logits should be returned by the model. Enabling this will also
+ allow the model to output the auxiliary loss, including load balancing loss and router z-loss.
+ router_aux_loss_coef (`float`, *optional*, defaults to 0.001):
+ The aux loss factor for the total loss.
+ mlp_only_layers (`list[int]`, *optional*, defaults to `[]`):
+ Indicate which layers use Qwen3NextMLP rather than Qwen3NextSparseMoeBlock
+ The list contains layer index, from 0 to num_layers-1 if we have num_layers layers
+ If `mlp_only_layers` is empty, `decoder_sparse_step` is used to determine the sparsity.
+ layer_types (`list[str]`, *optional*, defaults to None):
+ Types of each layer (attention or linear).
+
+ ```python
+ >>> from transformers import Qwen3NextModel, Qwen3NextConfig
+
+ >>> # Initializing a Qwen3Next style configuration
+ >>> configuration = Qwen3NextConfig()
+
+ >>> # Initializing a model from the Qwen3-Next-80B-A3B style configuration
+ >>> model = Qwen3NextModel(configuration)
+
+ >>> # Accessing the model configuration
+ >>> configuration = model.config
+ ```
+ """
+
+ model_type = "qwen3_next"
+ keys_to_ignore_at_inference = ["past_key_values"]
+
+ def __init__(
+ self,
+ vocab_size=151936,
+ hidden_size=2048,
+ intermediate_size=5632,
+ num_hidden_layers=48,
+ num_attention_heads=16,
+ num_key_value_heads=2,
+ hidden_act="silu",
+ max_position_embeddings=32768,
+ initializer_range=0.02,
+ rms_norm_eps=1e-6,
+ use_cache=True,
+ tie_word_embeddings=False,
+ rope_theta=10000.0,
+ rope_scaling=None,
+ partial_rotary_factor=0.25,
+ attention_bias=False,
+ attention_dropout=0.0,
+ head_dim=256,
+ linear_conv_kernel_dim=4,
+ linear_key_head_dim=128,
+ linear_value_head_dim=128,
+ linear_num_key_heads=16,
+ linear_num_value_heads=32,
+ decoder_sparse_step=1,
+ moe_intermediate_size=512,
+ shared_expert_intermediate_size=512,
+ num_experts_per_tok=10,
+ num_experts=512,
+ norm_topk_prob=True,
+ output_router_logits=False,
+ router_aux_loss_coef=0.001,
+ mlp_only_layers=[],
+ layer_types=None,
+ **kwargs,
+ ):
+ super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
+ self.vocab_size = vocab_size
+ self.max_position_embeddings = max_position_embeddings
+ self.hidden_size = hidden_size
+ self.intermediate_size = intermediate_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.num_key_value_heads = num_key_value_heads
+ self.hidden_act = hidden_act
+ self.initializer_range = initializer_range
+ self.rms_norm_eps = rms_norm_eps
+ self.use_cache = use_cache
+ self.rope_theta = rope_theta
+ self.rope_scaling = rope_scaling
+ self.partial_rotary_factor = partial_rotary_factor
+ self.attention_bias = attention_bias
+ self.attention_dropout = attention_dropout
+ self.head_dim = head_dim
+ rope_config_validation(self)
+
+ # linear attention (gdn now part)
+ self.linear_conv_kernel_dim = linear_conv_kernel_dim
+ self.linear_key_head_dim = linear_key_head_dim
+ self.linear_value_head_dim = linear_value_head_dim
+ self.linear_num_key_heads = linear_num_key_heads
+ self.linear_num_value_heads = linear_num_value_heads
+
+ # MoE arguments
+ self.decoder_sparse_step = decoder_sparse_step
+ self.moe_intermediate_size = moe_intermediate_size
+ self.shared_expert_intermediate_size = shared_expert_intermediate_size
+ self.num_experts_per_tok = num_experts_per_tok
+ self.num_experts = num_experts
+ self.norm_topk_prob = norm_topk_prob
+ self.output_router_logits = output_router_logits
+ self.router_aux_loss_coef = router_aux_loss_coef
+ self.mlp_only_layers = mlp_only_layers
+
+ @property
+ def layers_block_type(self):
+ layer_type_list = []
+
+ for l in range(self.num_hidden_layers):
+ if (l + 1) % self.full_attention_interval == 0:
+ layer_type_list.append(HybridLayerType.full_attention.value)
+ else:
+ layer_type_list.append(HybridLayerType.linear_attention.value)
+
+ return layer_type_list
+
+ @property
+ def linear_layer_ids(self):
+ return [
+ i
+ for i, type_value in enumerate(self.layers_block_type)
+ if type_value == HybridLayerType.linear_attention.value
+ ]
+
+ @property
+ def full_attention_layer_ids(self):
+ return [
+ i
+ for i, type_value in enumerate(self.layers_block_type)
+ if type_value == HybridLayerType.full_attention.value
+ ]
+
+ @property
+ def hybrid_gdn_params(self):
+ world_size = get_attention_tp_size()
+ conv_dim = (
+ self.linear_key_head_dim * self.linear_num_key_heads * 2
+ + self.linear_value_head_dim * self.linear_num_value_heads
+ )
+ conv_state_shape = (
+ divide(conv_dim, world_size),
+ self.linear_conv_kernel_dim - 1,
+ )
+
+ temporal_state_shape = (
+ divide(self.linear_num_value_heads, world_size),
+ self.linear_key_head_dim,
+ self.linear_value_head_dim,
+ )
+ conv_dtype = torch.bfloat16
+ dtype_map = {
+ "float32": torch.float32,
+ "bfloat16": torch.bfloat16,
+ }
+ ssm_dtype = dtype_map[os.environ["SGLANG_MAMBA_SSM_DTYPE"]]
+ mamba_layers = self.linear_layer_ids
+ return (
+ conv_state_shape,
+ temporal_state_shape,
+ conv_dtype,
+ ssm_dtype,
+ mamba_layers,
+ )
+
+ @property
+ def mamba_cache_per_req(self):
+ conv_state_shape, temporal_state_shape, conv_dtype, ssm_dtype, mamba_layers = (
+ self.hybrid_gdn_params
+ )
+ mamba_layers_len = len(mamba_layers)
+
+ return (
+ int(np.prod(conv_state_shape)) * conv_dtype.itemsize
+ + int(np.prod(temporal_state_shape)) * ssm_dtype.itemsize
+ ) * mamba_layers_len
diff --git a/python/sglang/srt/configs/update_config.py b/python/sglang/srt/configs/update_config.py
index 241d9566ab5..abbd724fb14 100644
--- a/python/sglang/srt/configs/update_config.py
+++ b/python/sglang/srt/configs/update_config.py
@@ -49,14 +49,25 @@ def get_num_heads_padding_size(tp_size, weight_block_size):
def update_intermediate_size(model_config, attr_name, intermediate_padding_size):
- if hasattr(model_config.hf_config, attr_name):
+ attr_value = intermediate_padding_size
+ if hasattr(model_config, "hf_config") and hasattr(
+ model_config.hf_config, attr_name
+ ):
attr_value = getattr(model_config.hf_config, attr_name)
- if attr_value % intermediate_padding_size != 0:
- from sglang.srt.layers.vocab_parallel_embedding import pad_vocab_size
+ elif hasattr(model_config, attr_name):
+ attr_value = getattr(model_config, attr_name)
+
+ if attr_value % intermediate_padding_size != 0:
+ from sglang.srt.layers.vocab_parallel_embedding import pad_vocab_size
- attr_value = pad_vocab_size(attr_value, intermediate_padding_size)
+ attr_value = pad_vocab_size(attr_value, intermediate_padding_size)
+ if hasattr(model_config, "hf_config"):
setattr(model_config.hf_config, attr_name, attr_value)
- setattr(model_config.hf_text_config, attr_name, attr_value)
+ if hasattr(model_config, "hf_text_config"):
+ setattr(model_config.hf_text_config, attr_name, attr_value)
+ else:
+ setattr(model_config, attr_name, attr_value)
+
return model_config
@@ -118,4 +129,28 @@ def adjust_config_with_unaligned_cpu_tp(
model_config = update_intermediate_size(
model_config, "intermediate_size_mlp", intermediate_padding_size
)
+ if (
+ hasattr(model_config.hf_config, "vision_config")
+ and model_config.hf_config.vision_config.model_type == "siglip_vision_model"
+ ):
+ model_config.hf_config.vision_config.original_num_attention_heads = (
+ model_config.num_attention_heads
+ )
+ if model_config.hf_config.vision_config.num_attention_heads % tp_size != 0:
+ model_config.hf_config.vision_config.head_dim = (
+ model_config.hf_config.vision_config.hidden_size
+ // model_config.hf_config.vision_config.num_attention_heads
+ )
+ from sglang.srt.layers.vocab_parallel_embedding import pad_vocab_size
+
+ pad_size = get_num_heads_padding_size(tp_size, weight_block_size)
+ model_config.hf_config.vision_config.num_attention_heads = pad_vocab_size(
+ model_config.hf_config.vision_config.num_attention_heads, pad_size
+ )
+ model_config.hf_config.vision_config = update_intermediate_size(
+ model_config.hf_config.vision_config,
+ "intermediate_size",
+ intermediate_padding_size,
+ )
+
return model_config
diff --git a/python/sglang/srt/connector/__init__.py b/python/sglang/srt/connector/__init__.py
index 829644c9196..38e1d5eabb5 100644
--- a/python/sglang/srt/connector/__init__.py
+++ b/python/sglang/srt/connector/__init__.py
@@ -20,7 +20,7 @@ class ConnectorType(str, enum.Enum):
KV = "KV"
-def create_remote_connector(url, device="cpu") -> BaseConnector:
+def create_remote_connector(url, **kwargs) -> BaseConnector:
connector_type = parse_connector_type(url)
if connector_type == "redis":
return RedisConnector(url)
diff --git a/python/sglang/srt/connector/base_connector.py b/python/sglang/srt/connector/base_connector.py
index a9c00d0c958..c9a1c36e263 100644
--- a/python/sglang/srt/connector/base_connector.py
+++ b/python/sglang/srt/connector/base_connector.py
@@ -20,9 +20,8 @@ class BaseConnector(ABC):
://files/
"""
- def __init__(self, url: str, device: torch.device = "cpu"):
+ def __init__(self, url: str):
self.url = url
- self.device = device
self.closed = False
self.local_dir = tempfile.mkdtemp()
for sig in (signal.SIGINT, signal.SIGTERM):
diff --git a/python/sglang/srt/connector/redis.py b/python/sglang/srt/connector/redis.py
index 761594f7817..cb1db3f7cc9 100644
--- a/python/sglang/srt/connector/redis.py
+++ b/python/sglang/srt/connector/redis.py
@@ -15,10 +15,10 @@
class RedisConnector(BaseKVConnector):
- def __init__(self, url: str, device: torch.device = "cpu"):
+ def __init__(self, url: str):
import redis
- super().__init__(url, device)
+ super().__init__(url)
parsed_url = urlparse(url)
self.connection = redis.Redis(host=parsed_url.hostname, port=parsed_url.port)
self.model_name = parsed_url.path.lstrip("/")
diff --git a/python/sglang/srt/connector/serde/__init__.py b/python/sglang/srt/connector/serde/__init__.py
index 394dba0a661..c05b20afa2c 100644
--- a/python/sglang/srt/connector/serde/__init__.py
+++ b/python/sglang/srt/connector/serde/__init__.py
@@ -15,7 +15,7 @@ def create_serde(serde_type: str) -> Tuple[Serializer, Deserializer]:
if serde_type == "safe":
s = SafeSerializer()
- d = SafeDeserializer(torch.uint8)
+ d = SafeDeserializer()
else:
raise ValueError(f"Unknown serde type: {serde_type}")
diff --git a/python/sglang/srt/connector/serde/safe_serde.py b/python/sglang/srt/connector/serde/safe_serde.py
index 0163af9f544..3e75f9bfc4a 100644
--- a/python/sglang/srt/connector/serde/safe_serde.py
+++ b/python/sglang/srt/connector/serde/safe_serde.py
@@ -19,11 +19,12 @@ def to_bytes(self, t: torch.Tensor) -> bytes:
class SafeDeserializer(Deserializer):
- def __init__(self, dtype):
- super().__init__(dtype)
+ def __init__(self):
+ # TODO: dtype options
+ super().__init__(torch.float32)
def from_bytes_normal(self, b: Union[bytearray, bytes]) -> torch.Tensor:
- return load(bytes(b))["tensor_bytes"].to(dtype=self.dtype)
+ return load(bytes(b))["tensor_bytes"]
def from_bytes(self, b: Union[bytearray, bytes]) -> torch.Tensor:
return self.from_bytes_normal(b)
diff --git a/python/sglang/srt/constrained/xgrammar_backend.py b/python/sglang/srt/constrained/xgrammar_backend.py
index 6118aa22b8d..7b101df4f43 100644
--- a/python/sglang/srt/constrained/xgrammar_backend.py
+++ b/python/sglang/srt/constrained/xgrammar_backend.py
@@ -162,12 +162,16 @@ def __init__(
):
super().__init__()
- # Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens
- # This ensures consistency between what the model considers EOS and what XGrammar uses
- tokenizer_info = TokenizerInfo.from_huggingface(
- tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids
- )
- override_stop_tokens = None
+ if hasattr(tokenizer, "init_xgrammar"):
+ # For special tokenizer
+ tokenizer_info, override_stop_tokens = tokenizer.init_xgrammar()
+ else:
+ # Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens
+ # This ensures consistency between what the model considers EOS and what XGrammar uses
+ tokenizer_info = TokenizerInfo.from_huggingface(
+ tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids
+ )
+ override_stop_tokens = None
self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
self.vocab_size = vocab_size
diff --git a/python/sglang/srt/custom_op.py b/python/sglang/srt/custom_op.py
index 8c662b5ccb5..ea3c06e6da6 100644
--- a/python/sglang/srt/custom_op.py
+++ b/python/sglang/srt/custom_op.py
@@ -1,12 +1,20 @@
from torch import nn
-from sglang.srt.utils import cpu_has_amx_support, is_cpu, is_cuda, is_hip, is_npu
+from sglang.srt.utils import (
+ cpu_has_amx_support,
+ is_cpu,
+ is_cuda,
+ is_hip,
+ is_npu,
+ is_xpu,
+)
_is_cuda = is_cuda()
_is_hip = is_hip()
_is_cpu = is_cpu()
_is_cpu_amx_available = cpu_has_amx_support()
_is_npu = is_npu()
+_is_xpu = is_xpu()
class CustomOp(nn.Module):
@@ -88,5 +96,7 @@ def dispatch_forward(self):
return self.forward_cpu
elif _is_npu:
return self.forward_npu
+ elif _is_xpu:
+ return self.forward_xpu
else:
return self.forward_native
diff --git a/python/sglang/srt/debug_utils/dump_comparator.py b/python/sglang/srt/debug_utils/dump_comparator.py
index 946cdc4fb7d..aca9c3b7af4 100644
--- a/python/sglang/srt/debug_utils/dump_comparator.py
+++ b/python/sglang/srt/debug_utils/dump_comparator.py
@@ -1,11 +1,11 @@
import argparse
import functools
-import re
from pathlib import Path
import polars as pl
import torch
+from sglang.srt.debug_utils.dump_loader import find_row, read_meta
from sglang.srt.debug_utils.dumper import get_truncated_value
@@ -26,66 +26,77 @@ def main(args):
print("df_baseline", df_baseline)
for row in df_target.iter_rows(named=True):
- rows_baseline = df_baseline.filter(
- (
- pl.col("forward_pass_id")
- == row["forward_pass_id"] - args.start_id + args.baseline_start_id
- )
- & functools.reduce(
- lambda a, b: a & b,
- [
- pl.col(col) == row[col]
- for col in row.keys()
- if col not in ["forward_pass_id", "dump_index", "filename"]
- ],
- )
+ path_target = Path(args.target_path) / row["filename"]
+
+ row_baseline = find_row(
+ df_baseline,
+ conditions=dict(
+ forward_pass_id=row["forward_pass_id"]
+ - args.start_id
+ + args.baseline_start_id,
+ **{
+ k: v
+ for k, v in row.items()
+ if k not in ["forward_pass_id", "dump_index", "filename"]
+ },
+ ),
)
- assert len(rows_baseline) == 1, f"{rows_baseline=}"
- row_baseline = rows_baseline.to_dicts()[0]
+
+ if row_baseline is None:
+ print(f"Skip: target={str(path_target)} since no baseline")
+ x_target = _load_object(path_target)
+ if x_target is not None:
+ print(f"x_target(sample)={get_truncated_value(x_target)}")
+ continue
path_baseline = Path(args.baseline_path) / row_baseline["filename"]
- path_target = Path(args.target_path) / row["filename"]
print(f"Check: target={str(path_target)} baseline={str(path_baseline)}")
- check_tensor_pair(path_baseline=path_baseline, path_target=path_target)
+ check_tensor_pair(
+ path_baseline=path_baseline, path_target=path_target, name=row["name"]
+ )
print()
-def read_meta(directory):
- directory = Path(directory)
- assert directory.is_dir(), f"{directory=} should be a directory"
-
- rows = []
- for p in directory.glob("*.pt"):
- full_kwargs = {}
- for kv in p.stem.split("___"):
- k, v = kv.split("=")
- full_kwargs[k] = v
- rows.append(
- {
- "filename": str(p.name),
- **full_kwargs,
- }
- )
+def check_tensor_pair(path_baseline, path_target, name=""):
+ x_baseline = _load_object(path_baseline)
+ x_target = _load_object(path_target)
- df = pl.DataFrame(rows)
- df = df.with_columns(
- pl.col("forward_pass_id").cast(int),
- pl.col("rank").cast(int),
+ print(
+ f"Raw "
+ f"[shape] {x_baseline.shape} vs {x_target.shape}\t"
+ f"[dtype] {x_baseline.dtype} vs {x_target.dtype}"
)
- return df
-
-def check_tensor_pair(path_baseline, path_target):
- x_baseline = torch.load(path_baseline, weights_only=True)
- x_target = torch.load(path_target, weights_only=True)
+ x_baseline, x_target = _comparison_preprocessor(x_baseline, x_target, name=name)
+ x_baseline = _try_unify_shape(x_baseline, target_shape=x_target.shape)
print(
+ f"After preprocessor "
f"[shape] {x_baseline.shape} vs {x_target.shape}\t"
f"[dtype] {x_baseline.dtype} vs {x_target.dtype}"
)
+ x_target = x_target.float()
+ x_baseline = x_baseline.float()
+
+ for name, fn in (
+ ("mean", torch.mean),
+ ("std", torch.std),
+ ("min", torch.min),
+ ("max", torch.max),
+ ("p1", functools.partial(torch.quantile, q=0.01)),
+ ("p5", functools.partial(torch.quantile, q=0.05)),
+ ("p95", functools.partial(torch.quantile, q=0.95)),
+ ("p99", functools.partial(torch.quantile, q=0.99)),
+ ):
+ value_baseline = fn(x_baseline).item()
+ value_target = fn(x_target).item()
+ print(
+ f"[{name}] {value_baseline :.4f} vs {value_target:.4f} (diff: {value_target - value_baseline:.4f})"
+ )
+
if x_baseline.shape != x_target.shape:
- print(f"❌ Shape mismatch")
+ print(f"⚠️ Shape mismatch")
return
raw_abs_diff = (x_target - x_baseline).abs()
@@ -112,6 +123,19 @@ def check_tensor_pair(path_baseline, path_target):
print(f"x_target(sample)={get_truncated_value(x_target)}")
+def _try_unify_shape(x: torch.Tensor, target_shape):
+ x_shape = x.shape
+ num_dim_to_remove = len(x_shape) - len(target_shape)
+ if (x_shape[num_dim_to_remove:] == target_shape) and all(
+ val == 1 for val in x_shape[:num_dim_to_remove]
+ ):
+ out = functools.reduce(lambda a, _: a.squeeze(0), range(num_dim_to_remove), x)
+ print(f"Unify shape: {x_shape} -> {out.shape} (to match {target_shape})")
+ return out
+
+ return x
+
+
# Copied from DeepGEMM
def _calc_rel_diff(x: torch.Tensor, y: torch.Tensor):
x, y = x.double(), y.double()
@@ -120,6 +144,19 @@ def _calc_rel_diff(x: torch.Tensor, y: torch.Tensor):
return 1 - sim
+def _comparison_preprocessor(x_baseline, x_target, name):
+ # can insert arbitrary adhoc postprocessing logic here
+ return x_baseline, x_target
+
+
+def _load_object(path):
+ x = torch.load(path, weights_only=False)
+ if not isinstance(x, torch.Tensor):
+ print(f"Skip load {path} since {type(x)=} is not a Tensor")
+ return None
+ return x.cuda()
+
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--baseline-path", type=str)
diff --git a/python/sglang/srt/debug_utils/dump_loader.py b/python/sglang/srt/debug_utils/dump_loader.py
new file mode 100644
index 00000000000..8e6f2c79b2f
--- /dev/null
+++ b/python/sglang/srt/debug_utils/dump_loader.py
@@ -0,0 +1,97 @@
+import functools
+import os
+from pathlib import Path
+from typing import Any, Dict
+
+import polars as pl
+import torch
+
+
+class DumpLoader:
+ def __init__(self):
+ directory = os.environ.get("SGLANG_DUMP_LOADER_DIR")
+
+ self._enable = directory is not None
+ if self._enable:
+ self._directory = Path(directory)
+ self._df = read_meta(directory)
+
+ @property
+ def enable(self):
+ return self._enable
+
+ def load(self, name, **kwargs):
+ assert self._enable, "Please call DumpLoader.load only when it is enabled"
+
+ from sglang.srt.debug_utils.dumper import dumper
+
+ forward_pass_id = dumper._forward_pass_id
+ conditions = dict(name=name, forward_pass_id=forward_pass_id, **kwargs)
+ row = find_row(self._df, conditions=conditions)
+ assert (
+ row is not None
+ ), f"DumpLoader cannot find row given query {name=} {kwargs=} {self._directory=}"
+
+ path = self._directory / row["filename"]
+ output = torch.load(path, weights_only=False)
+
+ print(
+ f"[DumpLoader] load from {path=} (query: {name=} {kwargs=}, output: {type(output)})"
+ )
+ return output
+
+
+def read_meta(directory):
+ directory = Path(directory)
+ assert directory.is_dir(), f"{directory=} should be a directory"
+
+ rows = []
+ for p in directory.glob("*.pt"):
+ full_kwargs = {}
+ for kv in p.stem.split("___"):
+ k, v = kv.split("=")
+ full_kwargs[k] = v
+ rows.append(
+ {
+ "filename": str(p.name),
+ **full_kwargs,
+ }
+ )
+
+ df = pl.DataFrame(rows)
+ df = df.with_columns(
+ pl.col("forward_pass_id").cast(int),
+ pl.col("rank").cast(int),
+ pl.col("dump_index").cast(int),
+ )
+ return df
+
+
+def find_row(df, conditions: Dict[str, Any]):
+ df_sub = df.filter(
+ functools.reduce(
+ lambda a, b: a & b,
+ [
+ pl.col(col) == _cast_to_polars_dtype(conditions[col], df.schema[col])
+ for col in conditions.keys()
+ ],
+ )
+ )
+ assert len(df_sub) <= 1
+ return df_sub.to_dicts()[0] if len(df_sub) > 0 else None
+
+
+def _cast_to_polars_dtype(value, target_dtype):
+ if target_dtype in (pl.Int64, pl.Int32, pl.UInt64, pl.UInt32):
+ return int(value)
+ elif target_dtype in (pl.Float64, pl.Float32):
+ return float(value)
+ elif target_dtype == pl.Boolean:
+ return bool(value)
+ elif target_dtype == pl.String:
+ return str(value)
+ else:
+ return value
+
+
+dump_loader = DumpLoader()
diff --git a/python/sglang/srt/debug_utils/dumper.py b/python/sglang/srt/debug_utils/dumper.py
index d10301241d7..8a9808bb71f 100644
--- a/python/sglang/srt/debug_utils/dumper.py
+++ b/python/sglang/srt/debug_utils/dumper.py
@@ -53,7 +53,7 @@ def dump(self, name, value, **kwargs):
if self._partial_name is None:
self._partial_name = _get_partial_name()
- rank = dist.get_rank()
+ rank = _get_rank()
full_kwargs = dict(
forward_pass_id=self._forward_pass_id,
rank=rank,
@@ -80,12 +80,20 @@ def dump(self, name, value, **kwargs):
def _get_partial_name():
- rank = dist.get_rank()
+ rank = _get_rank()
object_list = [str(time.time()) if rank == 0 else None]
- dist.broadcast_object_list(object_list, device="cuda")
+ if dist.is_initialized():
+ dist.broadcast_object_list(object_list, device="cuda")
return object_list[0]
+def _get_rank():
+ if dist.is_initialized():
+ return dist.get_rank()
+ else:
+ return 0
+
+
def get_truncated_value(value):
if value is None:
return None
diff --git a/python/sglang/srt/debug_utils/text_comparator.py b/python/sglang/srt/debug_utils/text_comparator.py
index 5917fcfb6b8..3a6df19b9ed 100644
--- a/python/sglang/srt/debug_utils/text_comparator.py
+++ b/python/sglang/srt/debug_utils/text_comparator.py
@@ -1,4 +1,5 @@
import argparse
+import hashlib
import json
from pathlib import Path
@@ -13,7 +14,11 @@
def main(args):
- df_input = _transform_df_input(_compute_df_raw(args))
+ if args.data_type == "simple_evals":
+ df_input = _compute_df_input_mode_simple_evals(args)
+ else:
+ df_input = _transform_df_input(_compute_df_raw(args))
+
assert all(
c in df_input.columns
for c in ["category", "trial_index", "prompt_id", "prompt", "output", "correct"]
@@ -37,8 +42,9 @@ def main(args):
df_meta=df_meta.to_dicts(),
df_good_to_bad=df_good_to_bad.to_dicts(),
df_bad_to_good=df_bad_to_good.to_dicts(),
- )
- )
+ ),
+ indent=4,
+ ),
)
if not args.disable_print_details:
@@ -65,19 +71,70 @@ def main(args):
print(df)
+def _compute_df_input_mode_simple_evals(args):
+ return pl.concat(
+ [
+ _compute_df_input_one_mode_simple_evals(**info)
+ for info in _get_file_infos(args=args)
+ ]
+ )
+
+
+def _compute_df_input_one_mode_simple_evals(path, category, trial_index):
+ data = json.loads(Path(path).read_text())
+ rows = []
+
+ for single_eval_result in data["metadata"]["single_eval_results"]:
+ prompt = single_eval_result["example_level_metadata"][
+ "actual_queried_prompt_messages"
+ ]
+ score = single_eval_result["score"]
+ assert score in {0.0, 1.0}, f"{score=}"
+
+ row = dict(
+ category=category,
+ trial_index=trial_index,
+ prompt_id=_compute_id_from_object(prompt),
+ prompt=json.dumps(prompt),
+ output=single_eval_result["example_level_metadata"]["response_text"],
+ correct=score == 1.0,
+ )
+ rows.append(row)
+
+ return pl.DataFrame(rows)
+
+
+def _compute_id_from_object(obj):
+ if isinstance(obj, pl.Series):
+ obj = obj.to_list()
+ json_str = json.dumps(obj, sort_keys=True, ensure_ascii=False)
+ return hashlib.sha256(json_str.encode("utf-8")).hexdigest()
+
+
def _compute_df_raw(args):
return pl.concat(
[
- _read_df_raw(p, category=category, trial_index=i)
- for category, paths in [
- ("baseline", args.baseline_path),
- ("target", args.target_path),
- ]
- for i, p in enumerate(paths)
+ _read_df_raw(
+ path=info["path"],
+ category=info["category"],
+ trial_index=info["trial_index"],
+ )
+ for info in _get_file_infos(args=args)
]
)
+def _get_file_infos(args):
+ return [
+ dict(path=path, category=category, trial_index=trial_index)
+ for category, paths in [
+ ("baseline", args.baseline_path),
+ ("target", args.target_path),
+ ]
+ for trial_index, path in enumerate(paths)
+ ]
+
+
def _read_df_raw(path: str, category: str, trial_index: int):
return pl.read_ndjson(path).with_columns(
category=pl.lit(category), trial_index=trial_index
@@ -108,7 +165,9 @@ def _transform_df_input(df: pl.DataFrame):
print("Transform mode: SGLang bench")
return df
else:
- raise Exception(f"Unknown data: {df.columns}")
+ raise Exception(
+ f"Unknown data: {df.columns}. You may need to set `--data-type` if using e.g. simple_evals."
+ )
def _compute_df_meta(df_input: pl.DataFrame):
@@ -127,7 +186,9 @@ def _compute_df_meta(df_input: pl.DataFrame):
def _handle_one_prompt(df_one_prompt: pl.DataFrame):
- assert len(set(df_one_prompt["prompt"])) == 1
+ assert (
+ len(set(_compute_id_from_object(obj) for obj in df_one_prompt["prompt"])) == 1
+ )
df_baseline = df_one_prompt.filter(pl.col("category") == "baseline")
df_target = df_one_prompt.filter(pl.col("category") == "target")
@@ -162,6 +223,7 @@ def _compute_str_prefix_len(a: str, b: str) -> int:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=_DESCRIPTION)
+ parser.add_argument("--data-type", type=str, default="auto")
parser.add_argument("--baseline-path", type=str, nargs="+")
parser.add_argument("--target-path", type=str, nargs="+")
parser.add_argument(
diff --git a/python/sglang/srt/disaggregation/ascend/conn.py b/python/sglang/srt/disaggregation/ascend/conn.py
index 504212e0a66..b0009fc7c75 100644
--- a/python/sglang/srt/disaggregation/ascend/conn.py
+++ b/python/sglang/srt/disaggregation/ascend/conn.py
@@ -1,6 +1,12 @@
+import concurrent.futures
import logging
+from typing import List, Tuple
+
+import numpy as np
+import numpy.typing as npt
from sglang.srt.disaggregation.ascend.transfer_engine import AscendTransferEngine
+from sglang.srt.disaggregation.common.utils import group_concurrent_contiguous
from sglang.srt.disaggregation.mooncake.conn import (
MooncakeKVBootstrapServer,
MooncakeKVManager,
@@ -23,14 +29,81 @@ def init_engine(self):
)
def register_buffer_to_engine(self):
- self.engine.register(
- self.kv_args.kv_data_ptrs[0], sum(self.kv_args.kv_data_lens)
- )
+ self.engine.batch_register(self.kv_args.kv_data_ptrs, self.kv_args.kv_data_lens)
# The Ascend backend optimize batch registration for small memory blocks.
self.engine.batch_register(
self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens
)
+ def send_kvcache(
+ self,
+ mooncake_session_id: str,
+ prefill_kv_indices: npt.NDArray[np.int32],
+ dst_kv_ptrs: list[int],
+ dst_kv_indices: npt.NDArray[np.int32],
+ executor: concurrent.futures.ThreadPoolExecutor,
+ ):
+ # Group by indices
+ prefill_kv_blocks, dst_kv_blocks = group_concurrent_contiguous(
+ prefill_kv_indices, dst_kv_indices
+ )
+
+ num_layers = len(self.kv_args.kv_data_ptrs)
+ layers_params = [
+ (
+ self.kv_args.kv_data_ptrs[layer_id],
+ dst_kv_ptrs[layer_id],
+ self.kv_args.kv_item_lens[layer_id],
+ )
+ for layer_id in range(num_layers)
+ ]
+
+ def set_transfer_blocks(
+ src_ptr: int, dst_ptr: int, item_len: int
+ ) -> List[Tuple[int, int, int]]:
+ transfer_blocks = []
+ for prefill_index, decode_index in zip(prefill_kv_blocks, dst_kv_blocks):
+ src_addr = src_ptr + int(prefill_index[0]) * item_len
+ dst_addr = dst_ptr + int(decode_index[0]) * item_len
+ length = item_len * len(prefill_index)
+ transfer_blocks.append((src_addr, dst_addr, length))
+ return transfer_blocks
+
+ # Worker function for processing a single layer
+ def process_layer(src_ptr: int, dst_ptr: int, item_len: int) -> int:
+ transfer_blocks = set_transfer_blocks(src_ptr, dst_ptr, item_len)
+ return self._transfer_data(mooncake_session_id, transfer_blocks)
+
+ # Worker function for processing all layers in a batch
+ def process_layers(layers_params: List[Tuple[int, int, int]]) -> int:
+ transfer_blocks = []
+ for src_ptr, dst_ptr, item_len in layers_params:
+ transfer_blocks.extend(set_transfer_blocks(src_ptr, dst_ptr, item_len))
+ return self._transfer_data(mooncake_session_id, transfer_blocks)
+
+ if self.enable_custom_mem_pool:
+ futures = [
+ executor.submit(
+ process_layer,
+ src_ptr,
+ dst_ptr,
+ item_len,
+ )
+ for (src_ptr, dst_ptr, item_len) in layers_params
+ ]
+ for future in concurrent.futures.as_completed(futures):
+ status = future.result()
+ if status != 0:
+ for f in futures:
+ f.cancel()
+ return status
+ else:
+ # Combining all layers' params in one batch transfer is more efficient
+ # compared to using multiple threads
+ return process_layers(layers_params)
+
+ return 0
+
class AscendKVSender(MooncakeKVSender):
pass
diff --git a/python/sglang/srt/disaggregation/base/conn.py b/python/sglang/srt/disaggregation/base/conn.py
index 584530e6934..3f5877ea38f 100644
--- a/python/sglang/srt/disaggregation/base/conn.py
+++ b/python/sglang/srt/disaggregation/base/conn.py
@@ -131,4 +131,4 @@ def failure_exception(self):
class BaseKVBootstrapServer(ABC):
@abstractmethod
- def __init__(self, port: int): ...
+ def __init__(self, host: str, port: int): ...
diff --git a/python/sglang/srt/disaggregation/common/conn.py b/python/sglang/srt/disaggregation/common/conn.py
index da6cc721784..10b6093b95a 100644
--- a/python/sglang/srt/disaggregation/common/conn.py
+++ b/python/sglang/srt/disaggregation/common/conn.py
@@ -47,6 +47,7 @@ def __init__(
self.is_mla_backend = is_mla_backend
self.disaggregation_mode = disaggregation_mode
# for p/d multi node infer
+ self.bootstrap_host = server_args.host
self.bootstrap_port = server_args.disaggregation_bootstrap_port
self.dist_init_addr = server_args.dist_init_addr
self.tp_size = server_args.tp_size
@@ -72,6 +73,7 @@ def __init__(
def _register_to_bootstrap(self):
"""Register KVSender to bootstrap server via HTTP POST."""
if self.dist_init_addr:
+ # multi node: bootstrap server's host is dist_init_addr
if self.dist_init_addr.startswith("["): # [ipv6]:port or [ipv6]
if self.dist_init_addr.endswith("]"):
host = self.dist_init_addr
@@ -80,7 +82,8 @@ def _register_to_bootstrap(self):
else:
host = socket.gethostbyname(self.dist_init_addr.rsplit(":", 1)[0])
else:
- host = get_ip()
+ # single node: bootstrap server's host is same as http server's host
+ host = self.bootstrap_host
host = maybe_wrap_ipv6_address(host)
bootstrap_server_url = f"{host}:{self.bootstrap_port}"
@@ -125,12 +128,11 @@ def __init__(
mgr: BaseKVManager,
bootstrap_addr: str,
bootstrap_room: Optional[int] = None,
- data_parallel_rank: Optional[int] = None,
+ prefill_dp_rank: Optional[int] = None,
):
self.bootstrap_room = bootstrap_room
self.bootstrap_addr = bootstrap_addr
self.kv_mgr = mgr
- self.data_parallel_rank = data_parallel_rank
if self.bootstrap_addr not in self.kv_mgr.prefill_dp_size_table:
self.prefill_tp_size, self.prefill_dp_size = (
@@ -166,9 +168,6 @@ def __init__(
self.required_dst_info_num = 1
self.target_tp_ranks = [self.target_tp_rank]
elif local_tp_size_per_dp_rank > prefill_tp_size_per_dp_rank:
- assert (
- self.kv_mgr.is_mla_backend
- ), "PD with different TP sizes per DP rank is not yet supported for non-MLA models"
self.target_tp_rank = (
self.kv_mgr.kv_args.engine_rank % local_tp_size_per_dp_rank
) // (local_tp_size_per_dp_rank // prefill_tp_size_per_dp_rank)
@@ -198,11 +197,14 @@ def __init__(
self.target_tp_rank = self.target_tp_ranks[0]
self.required_dst_info_num = 1
- if self.data_parallel_rank is not None:
- logger.debug(f"Targeting DP rank: {self.data_parallel_rank}")
- self.target_dp_group = self.data_parallel_rank
+ if prefill_dp_rank is not None:
+ logger.debug(f"Targeting DP rank: {prefill_dp_rank}")
+ self.prefill_dp_rank = prefill_dp_rank
else:
- self.target_dp_group = bootstrap_room % self.prefill_dp_size
+ self.prefill_dp_rank = bootstrap_room % self.prefill_dp_size
+
+ # FIXME: alias here: target_dp_group -> prefill_dp_rank
+ self.target_dp_group = self.prefill_dp_rank
# NOTE: key distinguished by bootstrap_addr, target_dp_group, and target_tp_rank
bootstrap_key = (
@@ -308,7 +310,8 @@ def failure_exception(self):
class CommonKVBootstrapServer(BaseKVBootstrapServer):
- def __init__(self, port: int):
+ def __init__(self, host: str, port: int):
+ self.host = host
self.port = port
self.app = web.Application()
self.store = dict()
@@ -412,7 +415,7 @@ def _run_server(self):
self._runner = web.AppRunner(self.app)
self._loop.run_until_complete(self._runner.setup())
- site = web.TCPSite(self._runner, port=self.port)
+ site = web.TCPSite(self._runner, host=self.host, port=self.port)
self._loop.run_until_complete(site.start())
self._loop.run_forever()
except Exception as e:
diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py
index 1570b8b324b..0c9b6664333 100644
--- a/python/sglang/srt/disaggregation/decode.py
+++ b/python/sglang/srt/disaggregation/decode.py
@@ -24,7 +24,7 @@
from collections import deque
from dataclasses import dataclass
from http import HTTPStatus
-from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
import torch
from torch.distributed import ProcessGroup
@@ -218,8 +218,10 @@ def _init_kv_manager(self) -> BaseKVManager:
kv_args.ib_device = self.scheduler.server_args.disaggregation_ib_device
kv_args.gpu_id = self.scheduler.gpu_id
- kv_manager_class = get_kv_class(self.transfer_backend, KVClassType.MANAGER)
- kv_manager = kv_manager_class(
+ kv_manager_class: Type[BaseKVManager] = get_kv_class(
+ self.transfer_backend, KVClassType.MANAGER
+ )
+ kv_manager: BaseKVManager = kv_manager_class(
kv_args,
DisaggregationMode.DECODE,
self.scheduler.server_args,
@@ -248,7 +250,7 @@ def add(self, req: Req, is_retracted: bool = False) -> None:
mgr=self.kv_manager,
bootstrap_addr=f"{req.bootstrap_host}:{req.bootstrap_port}",
bootstrap_room=req.bootstrap_room,
- data_parallel_rank=req.data_parallel_rank,
+ prefill_dp_rank=req.data_parallel_rank,
)
self.queue.append(
@@ -259,7 +261,7 @@ def _check_if_req_exceed_kv_capacity(self, req: Req) -> bool:
if len(req.origin_input_ids) > self.max_total_num_tokens:
message = f"Request {req.rid} exceeds the maximum number of tokens: {len(req.origin_input_ids)} > {self.max_total_num_tokens}"
logger.error(message)
- prepare_abort(req, message)
+ prepare_abort(req, message, status_code=HTTPStatus.BAD_REQUEST)
self.scheduler.stream_output([req], req.return_logprob)
return True
return False
@@ -334,6 +336,8 @@ def _update_handshake_waiters(self) -> None:
error_message,
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
)
+ if self.scheduler.enable_metrics:
+ self.scheduler.metrics_collector.increment_bootstrap_failed_reqs()
else:
raise ValueError(f"Unexpected poll case: {poll}")
@@ -595,6 +599,8 @@ def pop_transferred(self) -> List[Req]:
# unlock the kv cache or it will have memory leak
self.tree_cache.cache_finished_req(decode_req.req)
indices_to_remove.add(i)
+ if self.scheduler.enable_metrics:
+ self.scheduler.metrics_collector.increment_transfer_failed_reqs()
continue
elif poll == KVPoll.Success:
@@ -864,6 +870,7 @@ def get_new_prebuilt_batch(self: Scheduler) -> Optional[ScheduleBatch]:
self.model_config,
self.enable_overlap,
self.spec_algorithm,
+ self.server_args.hip_attention_config,
)
# construct fake completed prefill
diff --git a/python/sglang/srt/disaggregation/fake/conn.py b/python/sglang/srt/disaggregation/fake/conn.py
index d25f47a381d..1206338247f 100644
--- a/python/sglang/srt/disaggregation/fake/conn.py
+++ b/python/sglang/srt/disaggregation/fake/conn.py
@@ -62,7 +62,7 @@ def __init__(
mgr: BaseKVManager,
bootstrap_addr: str,
bootstrap_room: Optional[int] = None,
- data_parallel_rank: Optional[int] = None,
+ prefill_dp_rank: Optional[int] = None,
):
self.has_init = False
diff --git a/python/sglang/srt/disaggregation/launch_lb.py b/python/sglang/srt/disaggregation/launch_lb.py
deleted file mode 100644
index bc116fb554a..00000000000
--- a/python/sglang/srt/disaggregation/launch_lb.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import argparse
-import dataclasses
-
-from sglang.srt.disaggregation.mini_lb import PrefillConfig, run
-
-
-@dataclasses.dataclass
-class LBArgs:
- rust_lb: bool = False
- host: str = "0.0.0.0"
- port: int = 8000
- policy: str = "random"
- prefill_infos: list = dataclasses.field(default_factory=list)
- decode_infos: list = dataclasses.field(default_factory=list)
- log_interval: int = 5
- timeout: int = 600
-
- @staticmethod
- def add_cli_args(parser: argparse.ArgumentParser):
- parser.add_argument(
- "--rust-lb",
- action="store_true",
- help="Deprecated, please use SGLang Router instead, this argument will have no effect.",
- )
- parser.add_argument(
- "--host",
- type=str,
- default=LBArgs.host,
- help=f"Host to bind the server (default: {LBArgs.host})",
- )
- parser.add_argument(
- "--port",
- type=int,
- default=LBArgs.port,
- help=f"Port to bind the server (default: {LBArgs.port})",
- )
- parser.add_argument(
- "--policy",
- type=str,
- default=LBArgs.policy,
- choices=["random", "po2"],
- help=f"Policy to use for load balancing (default: {LBArgs.policy})",
- )
- parser.add_argument(
- "--prefill",
- type=str,
- default=[],
- nargs="+",
- help="URLs for prefill servers",
- )
- parser.add_argument(
- "--decode",
- type=str,
- default=[],
- nargs="+",
- help="URLs for decode servers",
- )
- parser.add_argument(
- "--prefill-bootstrap-ports",
- type=int,
- nargs="+",
- help="Bootstrap ports for prefill servers",
- )
- parser.add_argument(
- "--log-interval",
- type=int,
- default=LBArgs.log_interval,
- help=f"Log interval in seconds (default: {LBArgs.log_interval})",
- )
- parser.add_argument(
- "--timeout",
- type=int,
- default=LBArgs.timeout,
- help=f"Timeout in seconds (default: {LBArgs.timeout})",
- )
-
- @classmethod
- def from_cli_args(cls, args: argparse.Namespace) -> "LBArgs":
- bootstrap_ports = args.prefill_bootstrap_ports
- if bootstrap_ports is None:
- bootstrap_ports = [None] * len(args.prefill)
- elif len(bootstrap_ports) == 1:
- bootstrap_ports = bootstrap_ports * len(args.prefill)
- else:
- if len(bootstrap_ports) != len(args.prefill):
- raise ValueError(
- "Number of prefill URLs must match number of bootstrap ports"
- )
-
- prefill_infos = [
- (url, port) for url, port in zip(args.prefill, bootstrap_ports)
- ]
-
- return cls(
- rust_lb=args.rust_lb,
- host=args.host,
- port=args.port,
- policy=args.policy,
- prefill_infos=prefill_infos,
- decode_infos=args.decode,
- log_interval=args.log_interval,
- timeout=args.timeout,
- )
-
- def __post_init__(self):
- if not self.rust_lb:
- assert (
- self.policy == "random"
- ), "Only random policy is supported for Python load balancer"
-
-
-def main():
- parser = argparse.ArgumentParser(
- description="PD Disaggregation Load Balancer Server"
- )
- LBArgs.add_cli_args(parser)
- args = parser.parse_args()
- lb_args = LBArgs.from_cli_args(args)
-
- prefill_configs = [PrefillConfig(url, port) for url, port in lb_args.prefill_infos]
- run(prefill_configs, lb_args.decode_infos, lb_args.host, lb_args.port)
-
-
-if __name__ == "__main__":
- main()
diff --git a/python/sglang/srt/disaggregation/mini_lb.py b/python/sglang/srt/disaggregation/mini_lb.py
index a80407bca58..5aaa2a70e34 100644
--- a/python/sglang/srt/disaggregation/mini_lb.py
+++ b/python/sglang/srt/disaggregation/mini_lb.py
@@ -1,414 +1,6 @@
-"""
-Minimal HTTP load balancer for prefill and decode servers for testing.
-"""
-
-import asyncio
-import dataclasses
-import logging
-import random
-import urllib
-from itertools import chain
-from typing import List, Optional
-
-import aiohttp
-import orjson
-import uvicorn
-from fastapi import FastAPI, HTTPException
-from fastapi.responses import ORJSONResponse, Response, StreamingResponse
-
-from sglang.srt.disaggregation.utils import PDRegistryRequest
-from sglang.srt.utils import maybe_wrap_ipv6_address
-
-AIOHTTP_STREAM_READ_CHUNK_SIZE = (
- 1024 * 64
-) # 64KB, to prevent aiohttp's "Chunk too big" error
-
-
-def setup_logger():
- logger = logging.getLogger("pdlb")
- logger.setLevel(logging.INFO)
-
- formatter = logging.Formatter(
- "[PDLB (Python)] %(asctime)s - %(levelname)s - %(message)s",
- datefmt="%Y-%m-%d %H:%M:%S",
- )
-
- handler = logging.StreamHandler()
- handler.setFormatter(formatter)
- logger.addHandler(handler)
-
- return logger
-
-
-logger = setup_logger()
-
-
-@dataclasses.dataclass
-class PrefillConfig:
- url: str
- bootstrap_port: Optional[int] = None
-
-
-class MiniLoadBalancer:
- def __init__(self, prefill_configs: List[PrefillConfig], decode_servers: List[str]):
- self.prefill_configs = prefill_configs
- self.prefill_servers = [p.url for p in prefill_configs]
- self.decode_servers = decode_servers
-
- def add_prefill_server(self, new_prefill_config: PrefillConfig):
- self.prefill_configs.append(new_prefill_config)
- self.prefill_servers.append(new_prefill_config.url)
-
- def add_decode_server(self, new_decode_server: str):
- self.decode_servers.append(new_decode_server)
-
- def select_pair(self):
- # TODO: return some message instead of panic
- assert len(self.prefill_configs) > 0, "No prefill servers available"
- assert len(self.decode_servers) > 0, "No decode servers available"
-
- prefill_config = random.choice(self.prefill_configs)
- decode_server = random.choice(self.decode_servers)
- return prefill_config.url, prefill_config.bootstrap_port, decode_server
-
- async def generate(
- self, modified_request, prefill_server, decode_server, endpoint
- ) -> ORJSONResponse:
- assert endpoint[0] != "/", f"Endpoint should not start with '/': {endpoint}"
-
- async with aiohttp.ClientSession(
- timeout=aiohttp.ClientTimeout(
- total=3600
- ) # Add timeout for request reliability
- ) as session:
- tasks = [
- session.post(f"{prefill_server}/{endpoint}", json=modified_request),
- session.post(f"{decode_server}/{endpoint}", json=modified_request),
- ]
-
- # Wait for both responses to complete. Prefill should end first.
- prefill_response, decode_response = await asyncio.gather(*tasks)
-
- if "return_logprob" in modified_request:
-
- prefill_json = await prefill_response.json()
- ret_json = await decode_response.json()
-
- # merge `meta_info.input_token_logprobs` from prefill to decode
- if "meta_info" in ret_json:
- if "input_token_logprobs" in ret_json["meta_info"]:
- ret_json["meta_info"]["input_token_logprobs"] = (
- prefill_json["meta_info"]["input_token_logprobs"]
- + ret_json["meta_info"]["input_token_logprobs"]
- )
- else:
- ret_json = await decode_response.json()
-
- return ORJSONResponse(
- content=ret_json,
- status_code=decode_response.status,
- )
-
- async def generate_stream(
- self, modified_request, prefill_server, decode_server, endpoint="generate"
- ):
- assert endpoint[0] != "/", f"Endpoint should not start with '/': {endpoint}"
-
- async def stream_results():
- async with aiohttp.ClientSession(
- timeout=aiohttp.ClientTimeout(
- total=3600
- ) # Add timeout for request reliability
- ) as session:
- # Create the tasks for both prefill and decode requests
- tasks = [
- session.post(f"{prefill_server}/{endpoint}", json=modified_request),
- session.post(f"{decode_server}/{endpoint}", json=modified_request),
- ]
- # Wait for both responses to complete. Since this is streaming, they return immediately.
- prefill_response, decode_response = await asyncio.gather(*tasks)
-
- if modified_request.get("return_logprob", False):
- prefill_chunks = []
- async for chunk in prefill_response.content:
- prefill_chunks.append(chunk)
-
- first_prefill_chunk = (
- prefill_chunks[0].decode("utf-8")[5:].strip("\n")
- )
- first_prefill_chunk_json = orjson.loads(first_prefill_chunk)
-
- async for chunk in decode_response.content:
- # Note: This is inefficient
- # merge prefill input_token_logprobs, output_token_logprobs to decode
- decoded_chunk = chunk.decode("utf-8")
- if (
- decoded_chunk
- and decoded_chunk.startswith("data:")
- and "[DONE]" not in decoded_chunk
- ):
- ret_json = orjson.loads(decoded_chunk[5:].strip("\n"))
- ret_json["meta_info"]["input_token_logprobs"] = (
- first_prefill_chunk_json["meta_info"][
- "input_token_logprobs"
- ]
- + ret_json["meta_info"]["input_token_logprobs"]
- )
-
- yield b"data: " + orjson.dumps(ret_json) + b"\n\n"
- else:
- yield chunk
- else:
- async for chunk in decode_response.content.iter_chunked(
- AIOHTTP_STREAM_READ_CHUNK_SIZE
- ):
- yield chunk
-
- return StreamingResponse(
- stream_results(),
- media_type="text/event-stream",
- )
-
-
-app = FastAPI()
-load_balancer: Optional[MiniLoadBalancer] = None
-
-
-@app.get("/health")
-async def health_check():
- return Response(status_code=200)
-
-
-@app.get("/health_generate")
-async def health_check():
- prefill_servers, decode_servers = (
- load_balancer.prefill_servers,
- load_balancer.decode_servers,
- )
- async with aiohttp.ClientSession() as session:
- # Create the tasks
- tasks = []
- for server in chain(prefill_servers, decode_servers):
- tasks.append(session.post(f"{server}/health_generate"))
- for i, response in enumerate(asyncio.as_completed(tasks)):
- await response
- return Response(status_code=200)
-
-
-@app.post("/flush_cache")
-async def flush_cache():
- prefill_servers, decode_servers = (
- load_balancer.prefill_servers,
- load_balancer.decode_servers,
- )
- async with aiohttp.ClientSession() as session:
- # Create the tasks
- tasks = []
- for server in chain(prefill_servers, decode_servers):
- tasks.append(session.post(f"{server}/flush_cache"))
- for i, response in enumerate(asyncio.as_completed(tasks)):
- await response
- return Response(status_code=200)
-
-
-@app.get("/get_server_info")
-async def get_server_info():
- prefill_servers, decode_servers = (
- load_balancer.prefill_servers,
- load_balancer.decode_servers,
- )
- prefill_infos = []
- decode_infos = []
- all_internal_states = []
-
- async with aiohttp.ClientSession() as session:
- for server in chain(prefill_servers):
- server_info = await session.get(f"{server}/get_server_info")
- prefill_infos.append(await server_info.json())
- for server in chain(decode_servers):
- server_info = await session.get(f"{server}/get_server_info")
- info_json = await server_info.json()
- decode_infos.append(info_json)
- # Extract internal_states from decode servers
- if "internal_states" in info_json:
- all_internal_states.extend(info_json["internal_states"])
-
- # Return format expected by bench_one_batch_server.py
- if all_internal_states:
- return {
- "internal_states": all_internal_states,
- "prefill": prefill_infos,
- "decode": decode_infos,
- }
- else:
- # Fallback with dummy data if no internal states found
- return {
- "internal_states": [
- {
- "last_gen_throughput": 0.0,
- "avg_spec_accept_length": None,
- }
- ],
- "prefill": prefill_infos,
- "decode": decode_infos,
- }
-
-
-@app.get("/get_model_info")
-async def get_model_info():
- # Dummy model information
- model_info = {
- "model_path": "/path/to/dummy/model",
- "tokenizer_path": "/path/to/dummy/tokenizer",
- "is_generation": True,
- "preferred_sampling_params": {"temperature": 0.7, "max_new_tokens": 128},
- }
- return ORJSONResponse(content=model_info)
-
-
-@app.post("/generate")
-async def handle_generate_request(request_data: dict):
- prefill_server, bootstrap_port, decode_server = load_balancer.select_pair()
-
- # Parse and transform prefill_server for bootstrap data
- parsed_url = urllib.parse.urlparse(prefill_server)
- hostname = maybe_wrap_ipv6_address(parsed_url.hostname)
- modified_request = request_data.copy()
-
- batch_size = _get_request_batch_size(modified_request)
- if batch_size is not None:
- modified_request.update(
- {
- "bootstrap_host": [hostname] * batch_size,
- "bootstrap_port": [bootstrap_port] * batch_size,
- "bootstrap_room": [
- _generate_bootstrap_room() for _ in range(batch_size)
- ],
- }
- )
- else:
- modified_request.update(
- {
- "bootstrap_host": hostname,
- "bootstrap_port": bootstrap_port,
- "bootstrap_room": _generate_bootstrap_room(),
- }
- )
-
- if request_data.get("stream", False):
- return await load_balancer.generate_stream(
- modified_request, prefill_server, decode_server, "generate"
- )
- else:
- return await load_balancer.generate(
- modified_request, prefill_server, decode_server, "generate"
- )
-
-
-async def _forward_to_backend(request_data: dict, endpoint_name: str):
- prefill_server, bootstrap_port, decode_server = load_balancer.select_pair()
-
- # Parse and transform prefill_server for bootstrap data
- parsed_url = urllib.parse.urlparse(prefill_server)
- hostname = maybe_wrap_ipv6_address(parsed_url.hostname)
- modified_request = request_data.copy()
- modified_request.update(
- {
- "bootstrap_host": hostname,
- "bootstrap_port": bootstrap_port,
- "bootstrap_room": _generate_bootstrap_room(),
- }
- )
-
- if request_data.get("stream", False):
- return await load_balancer.generate_stream(
- modified_request,
- prefill_server,
- decode_server,
- endpoint=endpoint_name,
- )
- else:
- return await load_balancer.generate(
- modified_request,
- prefill_server,
- decode_server,
- endpoint=endpoint_name,
- )
-
-
-@app.post("/v1/chat/completions")
-async def handle_chat_completion_request(request_data: dict):
- return await _forward_to_backend(request_data, "v1/chat/completions")
-
-
-@app.post("/v1/completions")
-async def handle_completion_request(request_data: dict):
- return await _forward_to_backend(request_data, "v1/completions")
-
-
-def _generate_bootstrap_room():
- return random.randint(0, 2**63 - 1)
-
-
-# We may utilize `GenerateReqInput`'s logic later
-def _get_request_batch_size(request):
- if (text := request.get("text")) is not None:
- return None if isinstance(text, str) else len(text)
- if (input_ids := request.get("input_ids")) is not None:
- return None if isinstance(input_ids[0], int) else len(input_ids)
- return None
-
-
-@app.get("/v1/models")
-async def get_models():
- prefill_server = load_balancer.prefill_servers[0] # Get the first prefill server
- async with aiohttp.ClientSession() as session:
- try:
- response = await session.get(f"{prefill_server}/v1/models")
- if response.status != 200:
- raise HTTPException(
- status_code=response.status,
- detail=f"Prefill server error: Status {response.status}",
- )
- return ORJSONResponse(content=await response.json())
- except Exception as e:
- raise HTTPException(status_code=500, detail=str(e))
-
-
-@app.post("/register")
-async def register(obj: PDRegistryRequest):
- if obj.mode == "prefill":
- load_balancer.add_prefill_server(
- PrefillConfig(obj.registry_url, obj.bootstrap_port)
- )
- logger.info(
- f"Registered prefill server: {obj.registry_url} with bootstrap port: {obj.bootstrap_port}"
- )
- elif obj.mode == "decode":
- load_balancer.add_decode_server(obj.registry_url)
- logger.info(f"Registered decode server: {obj.registry_url}")
- else:
- raise HTTPException(
- status_code=400,
- detail="Invalid mode. Must be either PREFILL or DECODE.",
- )
-
- logger.info(
- f"#Prefill servers: {len(load_balancer.prefill_configs)}, "
- f"#Decode servers: {len(load_balancer.decode_servers)}"
- )
-
- return Response(status_code=200)
-
-
-def run(prefill_configs, decode_addrs, host, port):
- global load_balancer
- load_balancer = MiniLoadBalancer(prefill_configs, decode_addrs)
- uvicorn.run(app, host=host, port=port)
-
-
-if __name__ == "__main__":
- # FIXME: remove this, use the unified entry point: sglang.srt.disaggregation.launch_lb
- from sglang.srt.disaggregation.launch_lb import main
-
- main()
+raise RuntimeError(
+ """The 'mini_lb' module has been relocated to the 'sglang_router' package.
+ We recommend installing 'sglang-router' with Rust support for optimal performance.
+ If you encounter issues building the router with Rust, set the environment variable
+ 'SGLANG_ROUTER_BUILD_NO_RUST=1' and add '--mini-lb' to the command line to use the Python version of 'mini_lb'."""
+)
diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py
index e58186d33e2..f69d296227e 100644
--- a/python/sglang/srt/disaggregation/mooncake/conn.py
+++ b/python/sglang/srt/disaggregation/mooncake/conn.py
@@ -2,6 +2,7 @@
import asyncio
import concurrent.futures
+import ctypes
import dataclasses
import logging
import os
@@ -138,7 +139,29 @@ def from_zmq(cls, msg: List[bytes]):
)
+class AuxDataCodec:
+ """Handles serialization and deserialization of auxiliary data buffers"""
+
+ @staticmethod
+ def serialize_data_from_buffer(src_addr, data_length):
+ """Serialize data from memory buffer to bytes"""
+ buffer = (ctypes.c_byte * data_length).from_address(src_addr)
+ return bytes(buffer)
+
+ @staticmethod
+ def deserialize_data_to_buffer(kv_args, buffer_index, aux_index, data):
+ """Deserialize bytes into target memory buffer"""
+ dst_aux_ptr = kv_args.aux_data_ptrs[buffer_index]
+ item_len = kv_args.aux_item_lens[buffer_index]
+ dst_addr = dst_aux_ptr + item_len * aux_index
+ buffer = (ctypes.c_byte * len(data)).from_address(dst_addr)
+ buffer[:] = data
+ return
+
+
class MooncakeKVManager(BaseKVManager):
+ AUX_DATA_HEADER = b"AUX_DATA"
+
def __init__(
self,
args: KVArgs,
@@ -152,6 +175,7 @@ def __init__(
self.disaggregation_mode = disaggregation_mode
self.init_engine()
# for p/d multi node infer
+ self.bootstrap_host = server_args.host
self.bootstrap_port = server_args.disaggregation_bootstrap_port
self.dist_init_addr = server_args.dist_init_addr
self.attn_tp_size = get_attention_tp_size()
@@ -283,21 +307,10 @@ def _transfer_data(self, mooncake_session_id, transfer_blocks):
if not transfer_blocks:
return 0
- # TODO(shangming): Fix me when nvlink_transport of Mooncake is bug-free
- if self.enable_custom_mem_pool:
- # batch_transfer_sync has a higher chance to trigger an accuracy drop for MNNVL, fallback to transfer_sync temporarily
- for src_addr, dst_addr, length in transfer_blocks:
- status = self.engine.transfer_sync(
- mooncake_session_id, src_addr, dst_addr, length
- )
- if status != 0:
- return status
- return 0
- else:
- src_addrs, dst_addrs, lengths = zip(*transfer_blocks)
- return self.engine.batch_transfer_sync(
- mooncake_session_id, list(src_addrs), list(dst_addrs), list(lengths)
- )
+ src_addrs, dst_addrs, lengths = zip(*transfer_blocks)
+ return self.engine.batch_transfer_sync(
+ mooncake_session_id, list(src_addrs), list(dst_addrs), list(lengths)
+ )
def send_kvcache(
self,
@@ -446,7 +459,9 @@ def send_kvcache_slice(
dst_head_start_offset = local_tp_rank_in_group * src_heads_per_rank
else:
# Send KVCache from 1 prefill instance to multiple decode instances
- src_head_start_offset = dst_tp_rank_in_group * dst_heads_per_rank
+ src_head_start_offset = (
+ dst_tp_rank_in_group * dst_heads_per_rank
+ ) % src_heads_per_rank
num_heads_to_send = dst_heads_per_rank
dst_head_start_offset = 0
@@ -570,11 +585,14 @@ def process_layer_tp_aware(layer_params):
def send_aux(
self,
- mooncake_session_id: str,
+ req: TransferInfo,
prefill_aux_index: int,
dst_aux_ptrs: list[int],
- dst_aux_index: int,
):
+ # TODO(shangming): Fix me when nvlink_transport of Mooncake is bug-free
+ if self.enable_custom_mem_pool:
+ return self.send_aux_tcp(req, prefill_aux_index, dst_aux_ptrs)
+
transfer_blocks = []
prefill_aux_ptrs = self.kv_args.aux_data_ptrs
prefill_aux_item_lens = self.kv_args.aux_item_lens
@@ -582,10 +600,59 @@ def send_aux(
for i, dst_aux_ptr in enumerate(dst_aux_ptrs):
length = prefill_aux_item_lens[i]
src_addr = prefill_aux_ptrs[i] + length * prefill_aux_index
- dst_addr = dst_aux_ptrs[i] + length * dst_aux_index
+ dst_addr = dst_aux_ptrs[i] + length * req.dst_aux_index
transfer_blocks.append((src_addr, dst_addr, length))
- return self._transfer_data(mooncake_session_id, transfer_blocks)
+ return self._transfer_data(req.mooncake_session_id, transfer_blocks)
+
+ def send_aux_tcp(
+ self,
+ req: TransferInfo,
+ prefill_aux_index: int,
+ dst_aux_ptrs: list[int],
+ ):
+ prefill_aux_ptrs = self.kv_args.aux_data_ptrs
+ prefill_aux_item_lens = self.kv_args.aux_item_lens
+
+ for i in range(len(prefill_aux_ptrs)):
+ length = prefill_aux_item_lens[i]
+ src_addr = prefill_aux_ptrs[i] + length * prefill_aux_index
+ data = AuxDataCodec.serialize_data_from_buffer(src_addr, length)
+
+ self.send_aux_data_to_endpoint(
+ remote=req.endpoint,
+ dst_port=req.dst_port,
+ room=req.room,
+ buffer_index=i,
+ aux_index=req.dst_aux_index,
+ data=data,
+ )
+
+ return 0
+
+ def send_aux_data_to_endpoint(
+ self,
+ remote: str,
+ dst_port: int,
+ room: int,
+ buffer_index: int,
+ aux_index: int,
+ data: bytes,
+ ):
+ socket = self._connect(
+ format_tcp_address(remote, dst_port), is_ipv6=is_valid_ipv6_address(remote)
+ )
+
+ socket.send_multipart(
+ [
+ MooncakeKVManager.AUX_DATA_HEADER,
+ str(room).encode("ascii"),
+ str(buffer_index).encode("ascii"),
+ str(aux_index).encode("ascii"),
+ struct.pack(">I", len(data)),
+ data,
+ ]
+ )
def sync_status_to_decode_endpoint(
self, remote: str, dst_port: int, room: int, status: int, prefill_rank: int
@@ -699,10 +766,9 @@ def transfer_worker(
if self.pp_group.is_last_rank:
# Only the last chunk we need to send the aux data
ret = self.send_aux(
- req.mooncake_session_id,
+ req,
kv_chunk.prefill_aux_index,
target_rank_registration_info.dst_aux_ptrs,
- req.dst_aux_index,
)
polls.append(True if ret == 0 else False)
dst_ranks_infos.append(
@@ -778,15 +844,38 @@ def bootstrap_thread():
threading.Thread(target=bootstrap_thread).start()
+ def _handle_aux_data(self, msg: List[bytes]):
+ """Handle AUX_DATA messages received by the decode thread."""
+ room = int(msg[1].decode("ascii"))
+ buffer_index = int(msg[2].decode("ascii"))
+ aux_index = int(msg[3].decode("ascii"))
+ data_length = struct.unpack(">I", msg[4])[0]
+ data = msg[5]
+
+ if len(data) != data_length:
+ logger.error(f"AUX_DATA length mismatch for bootstrap_room {room}")
+ return
+
+ AuxDataCodec.deserialize_data_to_buffer(
+ self.kv_args, buffer_index, aux_index, data
+ )
+
+ logger.debug(
+ f"Received AUX_DATA for bootstrap_room {room} with length:{len(data)}"
+ )
+
def start_decode_thread(self):
self.rank_port = get_free_port()
self._bind_server_socket()
def decode_thread():
while True:
- (bootstrap_room, status, prefill_rank) = (
- self.server_socket.recv_multipart()
- )
+ msg = self.server_socket.recv_multipart()
+ if msg[0] == MooncakeKVManager.AUX_DATA_HEADER:
+ self._handle_aux_data(msg)
+ continue
+
+ (bootstrap_room, status, prefill_rank) = msg
status = int(status.decode("ascii"))
bootstrap_room = int(bootstrap_room.decode("ascii"))
prefill_rank = int(prefill_rank.decode("ascii"))
@@ -934,6 +1023,7 @@ def get_session_id(self):
def _register_to_bootstrap(self):
"""Register KVSender to bootstrap server via HTTP POST."""
if self.dist_init_addr:
+ # multi node case: bootstrap server's host is dist_init_addr
if self.dist_init_addr.startswith("["): # [ipv6]:port or [ipv6]
if self.dist_init_addr.endswith("]"):
host = self.dist_init_addr
@@ -942,7 +1032,8 @@ def _register_to_bootstrap(self):
else:
host = socket.gethostbyname(self.dist_init_addr.rsplit(":", 1)[0])
else:
- host = get_ip()
+ # single node case: bootstrap server's host is same as http server's host
+ host = self.bootstrap_host
host = maybe_wrap_ipv6_address(host)
bootstrap_server_url = f"{host}:{self.bootstrap_port}"
@@ -1123,7 +1214,7 @@ def __init__(
mgr: MooncakeKVManager,
bootstrap_addr: str,
bootstrap_room: Optional[int] = None,
- data_parallel_rank: Optional[int] = None,
+ prefill_dp_rank: Optional[int] = None,
):
self.bootstrap_room = bootstrap_room
self.bootstrap_addr = bootstrap_addr
@@ -1132,7 +1223,6 @@ def __init__(
self.kv_mgr.update_status(self.bootstrap_room, KVPoll.Bootstrapping)
self.conclude_state = None
self.init_time = None
- self.data_parallel_rank = data_parallel_rank
if self.bootstrap_addr not in self.kv_mgr.prefill_dp_size_table:
(
@@ -1231,11 +1321,14 @@ def __init__(
self.prefill_attn_tp_size // self.kv_mgr.attn_tp_size
) * (self.prefill_pp_size // self.kv_mgr.pp_size)
- if self.data_parallel_rank is not None:
- logger.debug(f"Targeting DP rank: {self.data_parallel_rank}")
- self.target_dp_group = self.data_parallel_rank
+ if prefill_dp_rank is not None:
+ logger.debug(f"Targeting DP rank: {prefill_dp_rank}")
+ self.prefill_dp_rank = prefill_dp_rank
else:
- self.target_dp_group = bootstrap_room % self.prefill_dp_size
+ self.prefill_dp_rank = bootstrap_room % self.prefill_dp_size
+
+ # FIXME: alias here: target_dp_group -> prefill_dp_rank
+ self.target_dp_group = self.prefill_dp_rank
self.kv_mgr.required_prefill_response_num_table[self.bootstrap_room] = (
self.required_prefill_response_num
@@ -1459,7 +1552,8 @@ def abort(self):
class MooncakeKVBootstrapServer(BaseKVBootstrapServer):
- def __init__(self, port: int):
+ def __init__(self, host: str, port: int):
+ self.host = host
self.port = port
self.app = web.Application()
self.store = dict()
@@ -1587,7 +1681,7 @@ def _run_server(self):
self._runner = web.AppRunner(self.app, access_log=access_log)
self._loop.run_until_complete(self._runner.setup())
- site = web.TCPSite(self._runner, port=self.port)
+ site = web.TCPSite(self._runner, host=self.host, port=self.port)
self._loop.run_until_complete(site.start())
self._loop.run_forever()
except Exception as e:
diff --git a/python/sglang/srt/disaggregation/nixl/conn.py b/python/sglang/srt/disaggregation/nixl/conn.py
index 7a75d79b740..c911319ea96 100644
--- a/python/sglang/srt/disaggregation/nixl/conn.py
+++ b/python/sglang/srt/disaggregation/nixl/conn.py
@@ -78,6 +78,9 @@ class KVArgsRegisterInfo:
dst_kv_ptrs: list[int]
dst_aux_ptrs: list[int]
gpu_id: int
+ decode_tp_size: int
+ decode_tp_rank: int
+ dst_kv_item_len: int
@classmethod
def from_zmq(cls, msg: List[bytes]):
@@ -90,6 +93,9 @@ def from_zmq(cls, msg: List[bytes]):
dst_kv_ptrs=list(struct.unpack(f"{len(msg[5])//8}Q", msg[5])),
dst_aux_ptrs=list(struct.unpack(f"{len(msg[6])//8}Q", msg[6])),
gpu_id=int(msg[7].decode("ascii")),
+ decode_tp_size=int(msg[8].decode("ascii")),
+ decode_tp_rank=int(msg[9].decode("ascii")),
+ dst_kv_item_len=int(msg[10].decode("ascii")),
)
@@ -166,7 +172,7 @@ def register_buffer_to_engine(self):
self.kv_args.kv_data_ptrs, self.kv_args.kv_data_lens
):
kv_addrs.append((kv_data_ptr, kv_data_len, self.kv_args.gpu_id, ""))
- self.kv_descs = self.agent.register_memory(kv_addrs, "VRAM", is_sorted=False)
+ self.kv_descs = self.agent.register_memory(kv_addrs, "VRAM")
logger.debug(f"Register kv tensors, len(kv_addr)= {len(kv_addrs)}")
if not self.kv_descs:
raise Exception("NIXL memory registration failed for kv tensors")
@@ -175,7 +181,7 @@ def register_buffer_to_engine(self):
self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens
):
aux_addrs.append((aux_data_ptr, aux_data_len, 0, ""))
- self.aux_descs = self.agent.register_memory(aux_addrs, "DRAM", is_sorted=False)
+ self.aux_descs = self.agent.register_memory(aux_addrs, "DRAM")
logger.debug(f"Register aux tensors, len(aux_addrs)= {len(aux_addrs)}")
if not self.aux_descs:
raise Exception("NIXL memory registration failed for aux tensors")
@@ -222,8 +228,8 @@ def send_kvcache(
logger.debug(
f"len(src_addrs): before group: {len(prefill_kv_indices)}, after group: {len(src_addrs)}"
)
- src_descs = self.agent.get_xfer_descs(src_addrs, "VRAM", is_sorted=False)
- dst_descs = self.agent.get_xfer_descs(dst_addrs, "VRAM", is_sorted=False)
+ src_descs = self.agent.get_xfer_descs(src_addrs, "VRAM")
+ dst_descs = self.agent.get_xfer_descs(dst_addrs, "VRAM")
# Transfer data
xfer_handle = self.agent.initialize_xfer(
"WRITE",
@@ -239,6 +245,140 @@ def send_kvcache(
raise Exception("KVSender failed to post transfer")
return xfer_handle
+ def send_kvcache_slice(
+ self,
+ peer_name: str,
+ prefill_kv_indices: npt.NDArray[np.int32],
+ dst_kv_ptrs: list[int],
+ dst_kv_indices: npt.NDArray[np.int32],
+ dst_gpu_id: int,
+ notif: str,
+ prefill_tp_size: int,
+ decode_tp_size: int,
+ decode_tp_rank: int,
+ dst_kv_item_len: int,
+ ):
+ # Get configuration from kv_args
+ local_tp_rank_in_group = self.kv_args.engine_rank % prefill_tp_size
+ dst_tp_rank_in_group = decode_tp_rank % decode_tp_size
+ num_kv_heads = self.kv_args.kv_head_num
+
+ # Calculate head distribution
+ src_heads_per_rank = num_kv_heads
+ dst_heads_per_rank = num_kv_heads * prefill_tp_size // decode_tp_size
+
+ src_kv_item_len = self.kv_args.kv_item_lens[0]
+ page_size = self.kv_args.page_size
+
+ bytes_per_head_slice_to_send = (
+ dst_kv_item_len // page_size // dst_heads_per_rank
+ )
+
+ # Determine which heads to send
+ if prefill_tp_size > decode_tp_size:
+ # Multiple prefill ranks to one decode rank
+ src_head_start_offset = 0
+ num_heads_to_send = src_heads_per_rank
+ dst_head_start_offset = local_tp_rank_in_group * src_heads_per_rank
+ else:
+ # Send KVCache from 1 prefill instance to multiple decode instances
+ src_head_start_offset = (
+ dst_tp_rank_in_group * dst_heads_per_rank
+ ) % src_heads_per_rank
+ num_heads_to_send = dst_heads_per_rank
+ dst_head_start_offset = 0
+
+ # Create transfer descriptors
+ src_addrs = []
+ dst_addrs = []
+
+ bytes_per_token_on_prefill = src_kv_item_len // page_size
+ bytes_per_token_on_decode = dst_kv_item_len // page_size
+
+ num_kv_layers = len(self.kv_args.kv_data_ptrs) // 2
+ src_k_ptrs = self.kv_args.kv_data_ptrs[:num_kv_layers]
+ src_v_ptrs = self.kv_args.kv_data_ptrs[num_kv_layers:]
+ dst_k_ptrs = dst_kv_ptrs[0 : len(src_k_ptrs)]
+ dst_v_ptrs = dst_kv_ptrs[num_kv_layers : num_kv_layers + len(src_v_ptrs)]
+
+ # Calculate precise byte offset and length for the sub-slice within the token
+ src_head_slice_offset = src_head_start_offset * bytes_per_head_slice_to_send
+ dst_head_slice_offset = dst_head_start_offset * bytes_per_head_slice_to_send
+ heads_bytes_per_token_to_send = num_heads_to_send * bytes_per_head_slice_to_send
+
+ src_dst_ptr_pairs = [
+ (
+ src_k_ptrs[layer_id],
+ dst_k_ptrs[layer_id],
+ )
+ for layer_id in range(len(src_k_ptrs))
+ ] + [
+ (
+ src_v_ptrs[layer_id],
+ dst_v_ptrs[layer_id],
+ )
+ for layer_id in range(len(src_v_ptrs))
+ ]
+
+ src_addrs = []
+ dst_addrs = []
+
+ # Calculate strides for a single token slot
+ bytes_per_token_on_prefill = src_kv_item_len // page_size
+ bytes_per_token_on_decode = dst_kv_item_len // page_size
+
+ for src_ptr, dst_ptr in src_dst_ptr_pairs:
+ for i in range(len(prefill_kv_indices)):
+ prefill_page_idx = int(prefill_kv_indices[i])
+ decode_page_idx = int(dst_kv_indices[i])
+
+ # Get the starting addresses for the current src and dst pages
+ src_page_start_addr = src_ptr + prefill_page_idx * src_kv_item_len
+ dst_page_start_addr = dst_ptr + decode_page_idx * dst_kv_item_len
+
+ # Iterate through each valid token slot within the current page
+ for token_slot_in_page in range(page_size):
+ # Calculate the start address of the current token slot
+ src_token_slot_start_addr = (
+ src_page_start_addr
+ + token_slot_in_page * bytes_per_token_on_prefill
+ )
+ dst_token_slot_start_addr = (
+ dst_page_start_addr
+ + token_slot_in_page * bytes_per_token_on_decode
+ )
+
+ # Calculate final src and dst addresses by applying head-slice offsets
+ src_slice_addr = src_token_slot_start_addr + src_head_slice_offset
+ dst_slice_addr = dst_token_slot_start_addr + dst_head_slice_offset
+
+ src_addrs.append(
+ (
+ src_slice_addr,
+ heads_bytes_per_token_to_send,
+ self.kv_args.gpu_id,
+ )
+ )
+ dst_addrs.append(
+ (dst_slice_addr, heads_bytes_per_token_to_send, dst_gpu_id)
+ )
+
+ # Use NIXL agent for transfer
+ src_descs = self.agent.get_xfer_descs(src_addrs, "VRAM")
+ dst_descs = self.agent.get_xfer_descs(dst_addrs, "VRAM")
+
+ xfer_handle = self.agent.initialize_xfer(
+ "WRITE", src_descs, dst_descs, peer_name, notif.encode("ascii")
+ )
+ if not xfer_handle:
+ raise Exception("Failed to create sliced KV transfer")
+
+ state = self.agent.transfer(xfer_handle)
+ if state == "ERR":
+ raise Exception("Failed to post sliced KV transfer")
+
+ return xfer_handle
+
def send_aux(
self,
peer_name: str,
@@ -255,8 +395,8 @@ def send_aux(
decode_aux_addr = dst_aux_ptrs[0] + dst_aux_index * aux_item_len
src_addrs = [(prefill_aux_addr, aux_item_len, 0)]
dst_addrs = [(decode_aux_addr, aux_item_len, 0)]
- src_descs = self.agent.get_xfer_descs(src_addrs, "DRAM", is_sorted=False)
- dst_descs = self.agent.get_xfer_descs(dst_addrs, "DRAM", is_sorted=False)
+ src_descs = self.agent.get_xfer_descs(src_addrs, "DRAM")
+ dst_descs = self.agent.get_xfer_descs(dst_addrs, "DRAM")
# Transfer data
xfer_handle = self.agent.initialize_xfer(
"WRITE",
@@ -296,14 +436,35 @@ def add_transfer_request(
assert req.agent_name in self.decode_kv_args_table
notif = "_".join([str(req.room), "kv", str(chunk_id), str(int(is_last))])
- kv_xfer_handle = self.send_kvcache(
- req.agent_name,
- kv_indices,
- self.decode_kv_args_table[req.agent_name].dst_kv_ptrs,
- chunked_dst_kv_indice,
- self.decode_kv_args_table[req.agent_name].gpu_id,
- notif,
- )
+ decode_tp_size = self.decode_kv_args_table[req.agent_name].decode_tp_size
+
+ if decode_tp_size == self.tp_size:
+ kv_xfer_handle = self.send_kvcache(
+ req.agent_name,
+ kv_indices,
+ self.decode_kv_args_table[req.agent_name].dst_kv_ptrs,
+ chunked_dst_kv_indice,
+ self.decode_kv_args_table[req.agent_name].gpu_id,
+ notif,
+ )
+ else:
+ kv_xfer_handle = self.send_kvcache_slice(
+ req.agent_name,
+ kv_indices,
+ self.decode_kv_args_table[req.agent_name].dst_kv_ptrs,
+ chunked_dst_kv_indice,
+ self.decode_kv_args_table[req.agent_name].gpu_id,
+ notif,
+ prefill_tp_size=self.tp_size,
+ decode_tp_size=decode_tp_size,
+ decode_tp_rank=self.decode_kv_args_table[
+ req.agent_name
+ ].decode_tp_rank,
+ dst_kv_item_len=self.decode_kv_args_table[
+ req.agent_name
+ ].dst_kv_item_len,
+ )
+
handles.append(kv_xfer_handle)
# Only the last chunk we need to send the aux data.
if is_last:
@@ -454,11 +615,11 @@ def __init__(
mgr: NixlKVManager,
bootstrap_addr: str,
bootstrap_room: Optional[int] = None,
- data_parallel_rank: Optional[int] = None,
+ prefill_dp_rank: Optional[int] = None,
):
self.started_transfer = False
self.conclude_state = None
- super().__init__(mgr, bootstrap_addr, bootstrap_room, data_parallel_rank)
+ super().__init__(mgr, bootstrap_addr, bootstrap_room, prefill_dp_rank)
def init(self, kv_indices: npt.NDArray[np.int32], aux_index: Optional[int] = None):
for bootstrap_info in self.bootstrap_infos:
@@ -521,6 +682,9 @@ def _register_kv_args(self):
packed_kv_data_ptrs,
packed_aux_data_ptrs,
str(self.kv_mgr.kv_args.gpu_id).encode("ascii"),
+ str(self.kv_mgr.kv_args.decode_tp_size).encode("ascii"),
+ str(self.kv_mgr.kv_args.engine_rank).encode("ascii"),
+ str(self.kv_mgr.kv_args.kv_item_lens[0]).encode("ascii"),
]
)
diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
index 675e3708ad7..b7074825077 100644
--- a/python/sglang/srt/disaggregation/prefill.py
+++ b/python/sglang/srt/disaggregation/prefill.py
@@ -23,7 +23,7 @@
import threading
from collections import deque
from http import HTTPStatus
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List, Optional, Type
import torch
@@ -140,8 +140,10 @@ def _init_kv_manager(self) -> BaseKVManager:
kv_args.ib_device = self.scheduler.server_args.disaggregation_ib_device
kv_args.gpu_id = self.scheduler.gpu_id
- kv_manager_class = get_kv_class(self.transfer_backend, KVClassType.MANAGER)
- kv_manager = kv_manager_class(
+ kv_manager_class: Type[BaseKVManager] = get_kv_class(
+ self.transfer_backend, KVClassType.MANAGER
+ )
+ kv_manager: BaseKVManager = kv_manager_class(
kv_args,
DisaggregationMode.PREFILL,
self.scheduler.server_args,
@@ -178,7 +180,7 @@ def _check_if_req_exceed_kv_capacity(self, req: Req) -> bool:
if len(req.origin_input_ids) > self.max_total_num_tokens:
message = f"Request {req.rid} exceeds the maximum number of tokens: {len(req.origin_input_ids)} > {self.max_total_num_tokens}"
logger.error(message)
- prepare_abort(req, message)
+ prepare_abort(req, message, status_code=HTTPStatus.BAD_REQUEST)
self.scheduler.stream_output([req], req.return_logprob)
return True
return False
@@ -238,6 +240,8 @@ def pop_bootstrapped(
self.scheduler.stream_output([req], req.return_logprob)
indices_to_remove.add(i)
failed_reqs.append(req)
+ if self.scheduler.enable_metrics:
+ self.scheduler.metrics_collector.increment_bootstrap_failed_reqs()
continue
# KV.WaitingForInput - init here
@@ -522,6 +526,8 @@ def process_disagg_prefill_inflight_queue(
req, error_message, status_code=HTTPStatus.INTERNAL_SERVER_ERROR
)
done_reqs.append(req)
+ if self.enable_metrics:
+ self.metrics_collector.increment_transfer_failed_reqs()
else:
assert False, f"Unexpected polling state {poll=}"
@@ -563,7 +569,7 @@ def process_prefill_chunk(self: Scheduler) -> None:
# Move the chunked request out of the batch so that we can merge
# only finished requests to running_batch.
self.last_batch.filter_batch(chunked_req_to_exclude=self.chunked_req)
- self.tree_cache.cache_unfinished_req(self.chunked_req)
+ self.tree_cache.cache_unfinished_req(self.chunked_req, chunked=True)
if self.enable_overlap:
# Delay KV transfer to process_batch_result_disagg_prefill when overlap is enabled to ensure results are resolved
self.chunked_req.tmp_end_idx = min(
diff --git a/python/sglang/srt/disaggregation/utils.py b/python/sglang/srt/disaggregation/utils.py
index 720c9d5a59e..43770e3e22b 100644
--- a/python/sglang/srt/disaggregation/utils.py
+++ b/python/sglang/srt/disaggregation/utils.py
@@ -1,21 +1,17 @@
from __future__ import annotations
-import dataclasses
import os
import random
-import threading
-import warnings
from collections import deque
from contextlib import nullcontext
from enum import Enum
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List, Optional, Type, Union
import numpy as np
-import requests
import torch
import torch.distributed as dist
-from sglang.srt.utils import get_ip, is_npu
+from sglang.srt.utils import is_npu
if TYPE_CHECKING:
from sglang.srt.managers.schedule_batch import Req
@@ -99,7 +95,8 @@ def __init__(
# For ascend backend, output tokens are placed in the NPU and will be transferred by D2D channel.
device = "npu"
elif self.custom_mem_pool:
- device = "cuda"
+ # TODO(shangming): Fix me (use 'cuda') when nvlink_transport of Mooncake is bug-free
+ device = "cpu"
with (
torch.cuda.use_mem_pool(self.custom_mem_pool)
if self.custom_mem_pool
@@ -216,7 +213,9 @@ class KVClassType(Enum):
BOOTSTRAP_SERVER = "bootstrap_server"
-def get_kv_class(transfer_backend: TransferBackend, class_type: KVClassType):
+def get_kv_class(
+ transfer_backend: TransferBackend, class_type: KVClassType
+) -> Optional[Type]:
from sglang.srt.disaggregation.fake import FakeKVReceiver, FakeKVSender
if transfer_backend == TransferBackend.MOONCAKE:
@@ -304,49 +303,6 @@ def kv_to_page_num(num_kv_indices: int, page_size: int):
return (num_kv_indices + page_size - 1) // page_size
-#########################
-# PDLB Registry
-#########################
-
-
-@dataclasses.dataclass
-class PDRegistryRequest:
- """A request to register a machine itself to the LB."""
-
- mode: str
- registry_url: str
- bootstrap_port: Optional[int] = None
-
- def __post_init__(self):
- if self.mode == "prefill" and self.bootstrap_port is None:
- raise ValueError("Bootstrap port must be set in PREFILL mode.")
- elif self.mode == "decode" and self.bootstrap_port is not None:
- raise ValueError("Bootstrap port must not be set in DECODE mode.")
- elif self.mode not in ["prefill", "decode"]:
- raise ValueError(
- f"Invalid mode: {self.mode}. Must be 'prefill' or 'decode'."
- )
-
-
-def register_disaggregation_server(
- mode: str, server_port: int, bootstrap_port: int, pdlb_url: str
-):
- boostrap_port = bootstrap_port if mode == "prefill" else None
- registry_request = PDRegistryRequest(
- mode=mode,
- registry_url=f"http://{get_ip()}:{server_port}",
- bootstrap_port=boostrap_port,
- )
- res = requests.post(
- f"{pdlb_url}/register",
- json=dataclasses.asdict(registry_request),
- )
- if res.status_code != 200:
- warnings.warn(
- f"Failed to register disaggregation server: {res.status_code} {res.text}"
- )
-
-
#########################
# Misc
#########################
diff --git a/python/sglang/srt/distributed/naive_distributed.py b/python/sglang/srt/distributed/naive_distributed.py
new file mode 100644
index 00000000000..61165d90c05
--- /dev/null
+++ b/python/sglang/srt/distributed/naive_distributed.py
@@ -0,0 +1,112 @@
+import base64
+import os
+import pickle
+import time
+from pathlib import Path
+from typing import Any, List, Optional
+
+import torch
+
+from sglang.srt.utils import MultiprocessingSerializer
+
+
+class NaiveDistributed:
+ def __init__(self, rank: int, world_size: int, rendezvous: str):
+ self._rank = rank
+ self._world_size = world_size
+ self._operation_index = 0
+ self._directory = Path(rendezvous)
+ self._directory.mkdir(parents=True, exist_ok=True)
+ assert 0 <= rank < world_size
+
+ # both barrier to be safe, and as a sanity check
+ self.barrier()
+
+ def get_rank(self):
+ return self._rank
+
+ def get_world_size(self):
+ return self._world_size
+
+ def scatter(
+ self, tensor: torch.Tensor, scatter_list: List[torch.Tensor], src: int = 0
+ ):
+ if self._rank == src:
+ assert len(scatter_list) == self._world_size
+ else:
+ assert scatter_list is None
+
+ gathered_objects = self.all_gather_object(
+ dict(
+ serialized_scatter_list=[
+ (
+ None
+ if item_rank == src
+ else MultiprocessingSerializer.serialize(item)
+ )
+ for item_rank, item in enumerate(scatter_list)
+ ]
+ )
+ if self._rank == src
+ else dict()
+ )
+
+ remote_serialized_tensor = gathered_objects[src]["serialized_scatter_list"][
+ self._rank
+ ]
+ if self._rank == src:
+ assert remote_serialized_tensor is None
+ remote_tensor = scatter_list[self._rank]
+ else:
+ remote_tensor = MultiprocessingSerializer.deserialize(
+ remote_serialized_tensor
+ )
+ tensor.copy_(remote_tensor)
+
+ # avoid src tensor be deleted too early
+ self.barrier()
+
+ def all_gather_object(self, obj: Any) -> List[Any]:
+ self._operation_index += 1
+
+ text_postfix = "\n"
+
+ def _get_path(interesting_rank: int):
+ return (
+ self._directory
+ / f"rank{interesting_rank}_op{self._operation_index}.txt"
+ )
+
+ _get_path(self._rank).write_text(
+ base64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
+ )
+
+ def _read_one(interesting_rank: int):
+ p = _get_path(interesting_rank)
+ while True:
+ if p.exists() and (text := p.read_text()).endswith(text_postfix):
+ return pickle.loads(base64.b64decode(text[: -len(text_postfix)]))
+ time.sleep(0.001)
+
+ return [
+ _read_one(interesting_rank) for interesting_rank in range(self._world_size)
+ ]
+
+ def barrier(self):
+ actual_objs = self.all_gather_object(self._rank)
+ assert actual_objs == list(range(self._world_size)), f"{actual_objs=}"
+
+
+# Can have multi instances if needed
+_instance: Optional[NaiveDistributed] = None
+
+
+def get_naive_distributed():
+ assert _instance is not None
+ return _instance
+
+
+def set_naive_distributed(instance: NaiveDistributed):
+ global _instance
+ assert _instance is None
+ _instance = instance
diff --git a/python/sglang/srt/distributed/parallel_state.py b/python/sglang/srt/distributed/parallel_state.py
index a8a8d20f667..4f410570da3 100644
--- a/python/sglang/srt/distributed/parallel_state.py
+++ b/python/sglang/srt/distributed/parallel_state.py
@@ -43,6 +43,7 @@
direct_register_custom_op,
get_bool_env_var,
get_int_env_var,
+ is_cpu,
is_cuda_alike,
is_hip,
is_npu,
@@ -51,6 +52,9 @@
)
_is_npu = is_npu()
+_is_cpu = is_cpu()
+
+IS_ONE_DEVICE_PER_PROCESS = get_bool_env_var("SGLANG_ONE_DEVICE_PER_PROCESS")
@dataclass
@@ -60,6 +64,9 @@ class GraphCaptureContext:
TensorMetadata = namedtuple("TensorMetadata", ["device", "dtype", "size"])
+# use int value instead of ReduceOp.SUM to support torch compile
+REDUCE_OP_SUM = int(torch.distributed.ReduceOp.SUM)
+
def _split_tensor_dict(
tensor_dict: Dict[str, Union[torch.Tensor, Any]]
@@ -223,10 +230,12 @@ def __init__(
use_message_queue_broadcaster: bool = False,
group_name: Optional[str] = None,
):
+ # Set group info
group_name = group_name or "anonymous"
self.unique_name = _get_unique_name(group_name)
_register_group(self)
+ # Set rank info
self.rank = torch.distributed.get_rank()
self.local_rank = local_rank
self.device_group = None
@@ -250,15 +259,16 @@ def __init__(
assert self.cpu_group is not None
assert self.device_group is not None
+ device_id = 0 if IS_ONE_DEVICE_PER_PROCESS else local_rank
if is_cuda_alike():
- self.device = torch.device(f"cuda:{local_rank}")
+ self.device = torch.device(f"cuda:{device_id}")
elif _is_npu:
- self.device = torch.device(f"npu:{local_rank}")
+ self.device = torch.device(f"npu:{device_id}")
else:
self.device = torch.device("cpu")
-
self.device_module = torch.get_device_module(self.device)
+ # Import communicators
self.use_pynccl = use_pynccl
self.use_pymscclpp = use_pymscclpp
self.use_custom_allreduce = use_custom_allreduce
@@ -271,6 +281,9 @@ def __init__(
from sglang.srt.distributed.device_communicators.custom_all_reduce import (
CustomAllreduce,
)
+ from sglang.srt.distributed.device_communicators.pymscclpp import (
+ PyMscclppCommunicator,
+ )
from sglang.srt.distributed.device_communicators.pynccl import (
PyNcclCommunicator,
)
@@ -288,10 +301,6 @@ def __init__(
device=self.device,
)
- from sglang.srt.distributed.device_communicators.pymscclpp import (
- PyMscclppCommunicator,
- )
-
self.pymscclpp_comm: Optional[PyMscclppCommunicator] = None
if use_pymscclpp and self.world_size > 1:
self.pymscclpp_comm = PyMscclppCommunicator(
@@ -326,30 +335,30 @@ def __init__(
except Exception as e:
logger.warning(f"Failed to initialize QuickAllReduce: {e}")
+ # Create communicator for other hardware backends
from sglang.srt.distributed.device_communicators.hpu_communicator import (
HpuCommunicator,
)
+ from sglang.srt.distributed.device_communicators.npu_communicator import (
+ NpuCommunicator,
+ )
+ from sglang.srt.distributed.device_communicators.xpu_communicator import (
+ XpuCommunicator,
+ )
self.hpu_communicator: Optional[HpuCommunicator] = None
if use_hpu_communicator and self.world_size > 1:
self.hpu_communicator = HpuCommunicator(group=self.device_group)
- from sglang.srt.distributed.device_communicators.xpu_communicator import (
- XpuCommunicator,
- )
-
self.xpu_communicator: Optional[XpuCommunicator] = None
if use_xpu_communicator and self.world_size > 1:
self.xpu_communicator = XpuCommunicator(group=self.device_group)
- from sglang.srt.distributed.device_communicators.npu_communicator import (
- NpuCommunicator,
- )
-
self.npu_communicator: Optional[NpuCommunicator] = None
if use_npu_communicator and self.world_size > 1:
self.npu_communicator = NpuCommunicator(group=self.device_group)
+ # Create message queue
from sglang.srt.distributed.device_communicators.shm_broadcast import (
MessageQueue,
)
@@ -483,9 +492,7 @@ def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
if input_.is_cpu:
if is_shm_available(input_.dtype, self.world_size, self.local_size):
- torch.ops.sgl_kernel.shm_allreduce(
- input_, torch.distributed.ReduceOp.SUM
- )
+ torch.ops.sgl_kernel.shm_allreduce(input_, REDUCE_OP_SUM)
else:
torch.distributed.all_reduce(input_, group=self.device_group)
return input_
@@ -849,6 +856,11 @@ def broadcast_object_list(
)
return obj_list
+ def all_gather_object(self, obj: Any) -> List[Any]:
+ objs = [None] * self.world_size
+ torch.distributed.all_gather_object(objs, obj, group=self.cpu_group)
+ return objs
+
def send_object(self, obj: Any, dst: int) -> None:
"""Send the input object list to the destination rank."""
"""NOTE: `dst` is the local rank of the destination rank."""
@@ -868,17 +880,16 @@ def send_object(self, obj: Any, dst: int) -> None:
size_tensor = torch.tensor(
[object_tensor.numel()],
dtype=torch.long,
- device=torch.cuda.current_device(),
+ device="cpu",
)
-
# Send object size
- torch.distributed.send(
- size_tensor, dst=self.ranks[dst], group=self.device_group
- )
+ torch.distributed.send(size_tensor, dst=self.ranks[dst], group=self.cpu_group)
# Send object
torch.distributed.send(
- object_tensor, dst=self.ranks[dst], group=self.device_group
+ object_tensor,
+ dst=self.ranks[dst],
+ group=self.device_group,
)
return None
@@ -893,13 +904,11 @@ def recv_object(self, src: int) -> Any:
src != self.rank_in_group
), "Invalid source rank. Source rank is the same as the current rank."
- size_tensor = torch.empty(
- 1, dtype=torch.long, device=torch.cuda.current_device()
- )
+ size_tensor = torch.empty(1, dtype=torch.long, device="cpu")
# Receive object size
rank_size = torch.distributed.recv(
- size_tensor, src=self.ranks[src], group=self.device_group
+ size_tensor, src=self.ranks[src], group=self.cpu_group
)
# Tensor to receive serialized objects into.
@@ -917,7 +926,7 @@ def recv_object(self, src: int) -> Any:
rank_object == rank_size
), "Received object sender rank does not match the size sender rank."
- obj = pickle.loads(object_tensor.cpu().numpy().tobytes())
+ obj = pickle.loads(object_tensor.cpu().numpy())
return obj
@@ -1450,43 +1459,49 @@ def initialize_model_parallel(
_PDMUX_PREFILL_TP_GROUP.pynccl_comm.disabled = False
moe_ep_size = expert_model_parallel_size
-
moe_tp_size = tensor_model_parallel_size // moe_ep_size
+
global _MOE_EP
assert _MOE_EP is None, "expert model parallel group is already initialized"
- group_ranks = []
- for i in range(num_tensor_model_parallel_groups):
- for j in range(moe_tp_size):
- st = i * tensor_model_parallel_size + j
- en = (i + 1) * tensor_model_parallel_size + j
- ranks = list(range(st, en, moe_tp_size))
- group_ranks.append(ranks)
- _MOE_EP = init_model_parallel_group(
- group_ranks,
- get_world_group().local_rank,
- backend,
- use_custom_allreduce=False,
- group_name="moe_ep",
- )
+ if moe_ep_size == tensor_model_parallel_size:
+ _MOE_EP = _TP
+ else:
+ # TODO(ch-wan): use split_group to save memory
+ group_ranks = []
+ for i in range(num_tensor_model_parallel_groups):
+ for j in range(moe_tp_size):
+ st = i * tensor_model_parallel_size + j
+ en = (i + 1) * tensor_model_parallel_size + j
+ ranks = list(range(st, en, moe_tp_size))
+ group_ranks.append(ranks)
+ _MOE_EP = init_model_parallel_group(
+ group_ranks,
+ get_world_group().local_rank,
+ backend,
+ group_name="moe_ep",
+ )
global _MOE_TP
assert _MOE_TP is None, "expert model parallel group is already initialized"
- group_ranks = []
- for i in range(num_tensor_model_parallel_groups):
- for j in range(moe_ep_size):
- st = i * tensor_model_parallel_size + j * moe_tp_size
- en = i * tensor_model_parallel_size + (j + 1) * moe_tp_size
- ranks = list(range(st, en))
- group_ranks.append(ranks)
- _MOE_TP = init_model_parallel_group(
- group_ranks,
- get_world_group().local_rank,
- backend,
- use_custom_allreduce=False,
- group_name="moe_tp",
- )
+ if moe_tp_size == tensor_model_parallel_size:
+ _MOE_TP = _TP
+ else:
+ # TODO(ch-wan): use split_group to save memory
+ group_ranks = []
+ for i in range(num_tensor_model_parallel_groups):
+ for j in range(moe_ep_size):
+ st = i * tensor_model_parallel_size + j * moe_tp_size
+ en = i * tensor_model_parallel_size + (j + 1) * moe_tp_size
+ ranks = list(range(st, en))
+ group_ranks.append(ranks)
+ _MOE_TP = init_model_parallel_group(
+ group_ranks,
+ get_world_group().local_rank,
+ backend,
+ group_name="moe_tp",
+ )
# Build the pipeline model-parallel groups.
num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size
@@ -1572,6 +1587,16 @@ def patch_tensor_parallel_group(tp_group: GroupCoordinator):
_TP = old_tp_group
+def get_world_size():
+ """Return world size for the world group."""
+ return get_world_group().world_size
+
+
+def get_world_rank():
+ """Return my rank for the world group."""
+ return get_world_group().rank_in_group
+
+
def get_tensor_model_parallel_world_size():
"""Return world size for the tensor model parallel group."""
return get_tp_group().world_size
@@ -1582,6 +1607,16 @@ def get_tensor_model_parallel_rank():
return get_tp_group().rank_in_group
+def get_pipeline_model_parallel_world_size():
+ """Return world size for the pipeline model parallel group."""
+ return get_pp_group().world_size
+
+
+def get_pipeline_model_parallel_rank():
+ """Return my rank for the pipeline model parallel group."""
+ return get_pp_group().rank_in_group
+
+
def get_moe_expert_parallel_world_size():
"""Return world size for the moe expert parallel group."""
return get_moe_ep_group().world_size
@@ -1634,7 +1669,7 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
ray.shutdown()
gc.collect()
- if not current_platform.is_cpu():
+ if not _is_cpu:
if hasattr(torch, "cuda") and torch.cuda.is_available():
torch.cuda.empty_cache()
if hasattr(torch._C, "_host_emptyCache"):
diff --git a/python/sglang/srt/entrypoints/context.py b/python/sglang/srt/entrypoints/context.py
index ae46053747b..66f58200f31 100644
--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
-# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
+# Copied from vLLM
import json
import logging
from abc import ABC, abstractmethod
@@ -83,6 +83,14 @@ def append_output(self, output) -> None:
if isinstance(output, dict) and "output_ids" in output:
output_token_ids = output["output_ids"]
+ # TODO: REMOVE here:
+ # Very hacky, find the first occurrence of token 200006 and cut from there
+ try:
+ start_index = output_token_ids.index(200006)
+ output_token_ids = output_token_ids[start_index:]
+ except ValueError:
+ pass
+
for token_id in output_token_ids:
self.parser.process(token_id)
output_msgs = self.parser.messages
@@ -107,6 +115,8 @@ def messages(self) -> list:
return self._messages
def need_builtin_tool_call(self) -> bool:
+ if not self.messages:
+ return False
last_msg = self.messages[-1]
recipient = last_msg.recipient
return recipient is not None and (
@@ -188,6 +198,15 @@ def append_output(self, output) -> None:
# RequestOutput from SGLang with outputs
output_token_ids = output["output_ids"]
+ # TODO: REMOVE here:
+ # Very hacky, find the first occurrence of token 200006 and cut from there
+ # Find the first occurrence of token 200006 and cut from there
+ try:
+ start_index = output_token_ids.index(200006)
+ output_token_ids = output_token_ids[start_index:]
+ except ValueError:
+ pass
+
for token_id in output_token_ids:
self.parser.process(token_id)
diff --git a/python/sglang/srt/entrypoints/engine.py b/python/sglang/srt/entrypoints/engine.py
index 854d146a40a..fbd923d910b 100644
--- a/python/sglang/srt/entrypoints/engine.py
+++ b/python/sglang/srt/entrypoints/engine.py
@@ -23,8 +23,10 @@
import logging
import multiprocessing as mp
import os
+import random
import signal
import threading
+import time
from typing import AsyncIterator, Dict, Iterator, List, Optional, Tuple, Union
import zmq
@@ -58,6 +60,7 @@
UpdateWeightsFromDistributedReqInput,
UpdateWeightsFromTensorReqInput,
)
+from sglang.srt.managers.multi_tokenizer_mixin import MultiTokenizerRouter
from sglang.srt.managers.scheduler import run_scheduler_process
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
@@ -94,8 +97,8 @@ class Engine(EngineBase):
3. DetokenizerManager (subprocess): Detokenizes the output tokens and sends the result back to the Tokenizer Manager.
Note:
- 1. The HTTP server, Engine, and TokenizerManager both run in the main process.
- 2. Inter-process communication is done through ICP (each process uses a different port) via the ZMQ library.
+ 1. The HTTP server, Engine, and TokenizerManager all run in the main process.
+ 2. Inter-process communication (IPC) is handled via the ZMQ library, with each process using a different port.
"""
def __init__(self, **kwargs):
@@ -536,6 +539,22 @@ def resume_memory_occupation(self, tags: Optional[List[str]] = None):
self.tokenizer_manager.resume_memory_occupation(obj, None)
)
+ def freeze_gc(self):
+ """
+ To maintain a high performance server with low latency, we want to reduce the
+ stalls caused by the garbage collector scanning through a large number of objects.
+
+ It is usually helpful to start the server and warm it up with real requests to
+ initialize many of the long-lived objects that do not need to be garbage collected.
+
+ After sufficient warmup, we can call this function to freeze the garbage collector
+ so that all objects created before this point are considered out of scope for garbage
+ collection.
+ """
+
+ loop = asyncio.get_event_loop()
+ loop.run_until_complete(self.tokenizer_manager.freeze_gc())
+
"""
Execute an RPC call on all scheduler processes.
"""
@@ -635,6 +654,14 @@ def _set_envs_and_config(server_args: ServerArgs):
os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4"
os.environ["CUDA_MODULE_LOADING"] = "AUTO"
+ # flashinfer uses this environment variable for various kernels from MoE to quant kernels
+ if os.environ.get("TRTLLM_ENABLE_PDL", "1") != "0":
+ os.environ["TRTLLM_ENABLE_PDL"] = "1"
+
+ # Can also be passed as argument
+ os.environ["SGLANG_RUN_ID"] = (
+ f"sglang-run-{time.time()}-{random.randint(0, 100000000)}"
+ )
# Set prometheus env vars
if server_args.enable_metrics:
@@ -647,7 +674,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer":
assert_pkg_version(
"flashinfer_python",
- "0.2.11.post3",
+ "0.3.1",
"Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
@@ -655,7 +682,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
assert_pkg_version(
"sgl-kernel",
- "0.3.5",
+ "0.3.9.post2",
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
)
@@ -677,6 +704,24 @@ def launch_phase_sigquit_handler(signum, frame):
mp.set_start_method("spawn", force=True)
+def _init_tokenizer_manager(
+ server_args: ServerArgs, port_args: PortArgs
+) -> TokenizerManager:
+ # Launch tokenizer process
+ tokenizer_manager = TokenizerManager(server_args, port_args)
+
+ # Initialize templates
+ template_manager = TemplateManager()
+ template_manager.initialize_templates(
+ tokenizer_manager=tokenizer_manager,
+ model_path=server_args.model_path,
+ chat_template=server_args.chat_template,
+ completion_template=server_args.completion_template,
+ )
+
+ return tokenizer_manager, template_manager
+
+
def _launch_subprocesses(
server_args: ServerArgs, port_args: Optional[PortArgs] = None
) -> Tuple[TokenizerManager, TemplateManager, Dict]:
@@ -790,17 +835,15 @@ def _launch_subprocesses(
)
detoken_proc.start()
- # Launch tokenizer process
- tokenizer_manager = TokenizerManager(server_args, port_args)
-
- # Initialize templates
- template_manager = TemplateManager()
- template_manager.initialize_templates(
- tokenizer_manager=tokenizer_manager,
- model_path=server_args.model_path,
- chat_template=server_args.chat_template,
- completion_template=server_args.completion_template,
- )
+ # Init tokenizer manager first, as the bootstrap server is initialized here
+ if server_args.tokenizer_worker_num > 1:
+ # Launch multi-tokenizer router
+ tokenizer_manager = MultiTokenizerRouter(server_args, port_args)
+ template_manager = None
+ else:
+ tokenizer_manager, template_manager = _init_tokenizer_manager(
+ server_args, port_args
+ )
# Wait for the model to finish loading
scheduler_infos = []
@@ -823,5 +866,7 @@ def _launch_subprocesses(
# Assume all schedulers have the same scheduler_info
scheduler_info = scheduler_infos[0]
+
tokenizer_manager.max_req_input_len = scheduler_info["max_req_input_len"]
+
return tokenizer_manager, template_manager, scheduler_info
diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py
index 2dd2c75f1ff..57ee198b956 100644
--- a/python/sglang/srt/entrypoints/http_server.py
+++ b/python/sglang/srt/entrypoints/http_server.py
@@ -23,11 +23,15 @@
import logging
import multiprocessing as multiprocessing
import os
+import random
+import tempfile
import threading
import time
from http import HTTPStatus
from typing import Any, AsyncIterator, Callable, Dict, List, Optional
+import setproctitle
+
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -44,11 +48,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import ORJSONResponse, Response, StreamingResponse
-from sglang.srt.disaggregation.utils import (
- FAKE_BOOTSTRAP_HOST,
- DisaggregationMode,
- register_disaggregation_server,
-)
+from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST, DisaggregationMode
from sglang.srt.entrypoints.engine import _launch_subprocesses
from sglang.srt.entrypoints.openai.protocol import (
ChatCompletionRequest,
@@ -91,11 +91,18 @@
UpdateWeightVersionReqInput,
VertexGenerateReqInput,
)
+from sglang.srt.managers.multi_tokenizer_mixin import (
+ MultiTokenizerManager,
+ get_main_process_id,
+ monkey_patch_uvicorn_multiprocessing,
+ read_from_shared_memory,
+ write_data_for_multi_tokenizer,
+)
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager
from sglang.srt.metrics.func_timer import enable_func_timer
-from sglang.srt.reasoning_parser import ReasoningParser
-from sglang.srt.server_args import ServerArgs
+from sglang.srt.parser.reasoning_parser import ReasoningParser
+from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import (
add_api_key_middleware,
add_prometheus_middleware,
@@ -130,8 +137,72 @@ def set_global_state(global_state: _GlobalState):
_global_state = global_state
+async def init_multi_tokenizer() -> ServerArgs:
+ """Read args information from shm and init tokenizer manager for current process"""
+ pid = os.getpid()
+ main_pid = get_main_process_id()
+ logger.info(f"current worker_id: {pid}, main processID: {main_pid}")
+
+ # Read configuration from shared memory
+ port_args, server_args, scheduler_info = read_from_shared_memory(
+ f"multi_tokenizer_args_{main_pid}"
+ )
+ server_args: ServerArgs
+
+ # API key authentication is not supported in multi-tokenizer mode
+ assert (
+ server_args.api_key is None
+ ), "API key is not supported in multi-tokenizer mode"
+
+ port_args.tokenizer_ipc_name = (
+ f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}"
+ )
+
+ # Launch multi-tokenizer manager process
+ tokenizer_manager = MultiTokenizerManager(server_args, port_args)
+ template_manager = TemplateManager()
+ template_manager.initialize_templates(
+ tokenizer_manager=tokenizer_manager,
+ model_path=server_args.model_path,
+ chat_template=server_args.chat_template,
+ completion_template=server_args.completion_template,
+ )
+ # Register this tokenizer with the main tokenizer manager
+ await tokenizer_manager.register_to_main_tokenizer_manager()
+
+ tokenizer_manager.max_req_input_len = scheduler_info["max_req_input_len"]
+ set_global_state(
+ _GlobalState(
+ tokenizer_manager=tokenizer_manager,
+ template_manager=template_manager,
+ scheduler_info=scheduler_info,
+ )
+ )
+ return server_args
+
+
@asynccontextmanager
async def lifespan(fast_api_app: FastAPI):
+ if not getattr(fast_api_app, "is_single_tokenizer_mode", False):
+ # Initialize multi-tokenizer support for worker processes
+ fast_api_app.server_args: ServerArgs = await init_multi_tokenizer()
+
+ # only metrics middleware is supported in multi-tokenizer mode
+ worker_pid = os.getpid()
+ if fast_api_app.server_args.enable_metrics:
+ add_prometheus_middleware(app)
+ enable_func_timer()
+
+ logger.info(f"Worker {worker_pid} added prometheus middleware")
+ fast_api_app.warmup_thread = threading.Thread(
+ target=_wait_and_warmup,
+ args=(
+ fast_api_app.server_args,
+ None, # pipe_finish_writer not needed in worker
+ None, # launch_callback not needed in worker
+ ),
+ )
+
# Initialize OpenAI serving handlers
fast_api_app.state.openai_serving_completion = OpenAIServingCompletion(
_global_state.tokenizer_manager, _global_state.template_manager
@@ -191,7 +262,15 @@ async def lifespan(fast_api_app: FastAPI):
warmup_thread = getattr(fast_api_app, "warmup_thread", None)
if warmup_thread is not None:
warmup_thread.start()
- yield
+
+ try:
+ yield
+ finally:
+ if server_args.tokenizer_worker_num > 1:
+ pid = os.getpid()
+ logger.info(f"uvicorn worker {pid} ending...")
+ warmup_thread.join()
+ logger.info(f"uvicorn worker {pid} ended.")
# Fast API
@@ -480,6 +559,16 @@ async def flush_cache():
)
+@app.api_route("/clear_hicache_storage_backend", methods=["GET", "POST"])
+async def clear_hicache_storage_backend():
+ """Clear the hierarchical cache storage backend."""
+ ret = await _global_state.tokenizer_manager.clear_hicache_storage()
+ return Response(
+ content="Hierarchical cache storage backend cleared.\n",
+ status_code=200 if ret.success else HTTPStatus.BAD_REQUEST,
+ )
+
+
@app.api_route("/start_profile", methods=["GET", "POST"])
async def start_profile_async(obj: Optional[ProfileReqInput] = None):
"""Start profiling."""
@@ -511,6 +600,18 @@ async def stop_profile_async():
)
+@app.api_route("/freeze_gc", methods=["GET", "POST"])
+async def freeze_gc_async():
+ """
+ See engine.freeze_gc for more details.
+ """
+ await _global_state.tokenizer_manager.freeze_gc()
+ return Response(
+ content="Garbage collection frozen.\n",
+ status_code=200,
+ )
+
+
@app.api_route("/start_expert_distribution_record", methods=["GET", "POST"])
async def start_expert_distribution_record_async():
"""Start recording the expert distribution. Clear the previous record if any."""
@@ -1056,9 +1157,21 @@ def launch_server(
1. The HTTP server, Engine, and TokenizerManager both run in the main process.
2. Inter-process communication is done through IPC (each process uses a different port) via the ZMQ library.
"""
- tokenizer_manager, template_manager, scheduler_info = _launch_subprocesses(
- server_args=server_args
- )
+ if server_args.tokenizer_worker_num > 1:
+ setproctitle.setproctitle(f"sglang::http_server/multi_tokenizer_router")
+ port_args = PortArgs.init_new(server_args)
+ port_args.tokenizer_worker_ipc_name = (
+ f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}"
+ )
+ tokenizer_manager, template_manager, scheduler_info = _launch_subprocesses(
+ server_args=server_args, port_args=port_args
+ )
+ else:
+ setproctitle.setproctitle(f"sglang::http_server/tokenizer_manager")
+ tokenizer_manager, template_manager, scheduler_info = _launch_subprocesses(
+ server_args=server_args,
+ )
+
set_global_state(
_GlobalState(
tokenizer_manager=tokenizer_manager,
@@ -1067,48 +1180,99 @@ def launch_server(
)
)
- # Add api key authorization
- if server_args.api_key:
- add_api_key_middleware(app, server_args.api_key)
-
- # Add prometheus middleware
- if server_args.enable_metrics:
- add_prometheus_middleware(app)
- enable_func_timer()
-
- # Send a warmup request - we will create the thread launch it
- # in the lifespan after all other warmups have fired.
- warmup_thread = threading.Thread(
- target=_wait_and_warmup,
- args=(
+ if server_args.tokenizer_worker_num > 1:
+ multi_tokenizer_args_shm = write_data_for_multi_tokenizer(
+ port_args,
server_args,
- pipe_finish_writer,
- launch_callback,
- ),
- )
- app.warmup_thread = warmup_thread
+ scheduler_info,
+ )
+ else:
+ # Add api key authorization
+ if server_args.api_key:
+ add_api_key_middleware(app, server_args.api_key)
+
+ # Add prometheus middleware
+ if server_args.enable_metrics:
+ add_prometheus_middleware(app)
+ enable_func_timer()
+
+ # Send a warmup request - we will create the thread launch it
+ # in the lifespan after all other warmups have fired.
+ warmup_thread = threading.Thread(
+ target=_wait_and_warmup,
+ args=(
+ server_args,
+ pipe_finish_writer,
+ launch_callback,
+ ),
+ )
+ app.warmup_thread = warmup_thread
try:
# Update logging configs
set_uvicorn_logging_configs()
app.server_args = server_args
# Listen for HTTP requests
- uvicorn.run(
- app,
- host=server_args.host,
- port=server_args.port,
- log_level=server_args.log_level_http or server_args.log_level,
- timeout_keep_alive=5,
- loop="uvloop",
- )
+ if server_args.tokenizer_worker_num > 1:
+ from uvicorn.config import LOGGING_CONFIG
+
+ LOGGING_CONFIG["loggers"]["sglang.srt.entrypoints.http_server"] = {
+ "handlers": ["default"],
+ "level": "INFO",
+ "propagate": False,
+ }
+
+ monkey_patch_uvicorn_multiprocessing()
+
+ uvicorn.run(
+ "sglang.srt.entrypoints.http_server:app",
+ host=server_args.host,
+ port=server_args.port,
+ log_level=server_args.log_level_http or server_args.log_level,
+ timeout_keep_alive=5,
+ loop="uvloop",
+ workers=server_args.tokenizer_worker_num,
+ )
+ else:
+ app.is_single_tokenizer_mode = True
+ uvicorn.run(
+ app,
+ host=server_args.host,
+ port=server_args.port,
+ log_level=server_args.log_level_http or server_args.log_level,
+ timeout_keep_alive=5,
+ loop="uvloop",
+ )
finally:
- warmup_thread.join()
+ if server_args.tokenizer_worker_num > 1:
+ multi_tokenizer_args_shm.unlink()
+ _global_state.tokenizer_manager.socket_mapping.clear_all_sockets()
+ else:
+ warmup_thread.join()
def _execute_server_warmup(
server_args: ServerArgs,
pipe_finish_writer: Optional[multiprocessing.connection.Connection],
):
+ def _generate_passkey_sample(length):
+ passkey = "The passkey is **000310**. " * 3
+ filler = "The grass is green. The sky is blue. The sun is yellow. Here we go. There and back again. "
+ repeat = int(length * 1024 / 24 / 2)
+ if "Llama-4" in server_args.model_path:
+ text = f"<|header_start|>user<|header_end|>\n\nYour task is find the passkey value from the text. {filler * repeat} {passkey} {filler * repeat}.<|eot|><|header_start|>assistant<|header_end|>\n\nThe passkey is **"
+ elif "Llama-3" in server_args.model_path:
+ text = f"<|start_header_id|>user<|end_header_id|>\n\nYour task is find the passkey value from the text. {filler * repeat} {passkey} {filler * repeat}.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThe passkey is **"
+ elif "Qwen3" in server_args.model_path:
+ text = f"<|im_start|>user\nYour task is find the passkey value from the text. {filler * repeat} {passkey} {filler * repeat}.<|im_end|>\n<|im_start|>assistant\n\n\nThe passkey is **"
+ elif "GLM-4.5" in server_args.model_path:
+ text = f"[gMASK]<|user|>Your task is find the passkey value from the text. {filler * repeat} {passkey} {filler * repeat}.<|assistant|>\nThe passkey is **"
+ elif "gpt-oss" in server_args.model_path.lower():
+ text = f"<|start|>user<|message|>Your task is find the passkey value from the text. {filler * repeat} {passkey} {filler * repeat}.<|end|><|start|>assistant<|channel|>analysis<|message|><|end|><|start|>assistant<|channel|>final<|message|>The passkey is **"
+ else:
+ text = f"### User\n\nYour task is find the passkey value from the text. {filler * repeat} {passkey} {filler * repeat}.\n\n### Response\n\nThe passkey is **"
+ return text
+
headers = {}
url = server_args.url()
if server_args.api_key:
@@ -1138,20 +1302,30 @@ def _execute_server_warmup(
# Send a warmup request
request_name = "/generate" if model_info["is_generation"] else "/encode"
- max_new_tokens = 8 if model_info["is_generation"] else 1
+ max_new_tokens = (
+ int(os.getenv("PASSKEY_DECODE_LEN", 128)) if model_info["is_generation"] else 1
+ )
+ # if os.getenv('SGLANG_DEBUG_EXIT_WARMUP', '0') == '1':
+ # max_new_tokens = 10
json_data = {
"sampling_params": {
"temperature": 0,
"max_new_tokens": max_new_tokens,
+ "ignore_eos": True,
+ "no_stop_trim": False,
},
}
+ print(json_data)
if server_args.skip_tokenizer_init:
json_data["input_ids"] = [[10, 11, 12] for _ in range(server_args.dp_size)]
# TODO Workaround the bug that embedding errors for list of size 1
if server_args.dp_size == 1:
json_data["input_ids"] = json_data["input_ids"][0]
else:
- json_data["text"] = ["The capital city of France is"] * server_args.dp_size
+ passkey_len = int(os.getenv("PASSKEY_LEN", "8"))
+ text = _generate_passkey_sample(passkey_len)
+
+ json_data["text"] = [text] * server_args.dp_size
# TODO Workaround the bug that embedding errors for list of size 1
if server_args.dp_size == 1:
json_data["text"] = json_data["text"][0]
@@ -1166,14 +1340,159 @@ def _execute_server_warmup(
try:
if server_args.disaggregation_mode == "null":
- res = requests.post(
- url + request_name,
- json=json_data,
- headers=headers,
- timeout=600,
- )
+ warmup_all_seq_lens = os.getenv("SRT_WARMUP_ALL_SEQ_LENS", "0") == "1"
+ if warmup_all_seq_lens:
+ import tqdm
+ import transformers
+
+ step_size = 64
+
+ safe_zero = lambda x: x if x is not None else 0
+ context_size = safe_zero(server_args.chunked_prefill_size)
+ context_size = max(safe_zero(server_args.context_length), context_size)
+ assert context_size > 0, "consider pass explicit --context-length"
+
+ chunk_size = safe_zero(server_args.chunked_prefill_size)
+ chunk_size = chunk_size if chunk_size > 0 else context_size
+
+ tokenizer_path = model_info["tokenizer_path"]
+ tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_path)
+
+ text = _generate_passkey_sample(context_size // 1024)
+ input_ids = tokenizer.encode(text)[:context_size]
+ num_decode = 10
+ step_size = 1024
+
+ logger.info(
+ f"Start warmup all sequences. max_context={context_size}, model={tokenizer_path}"
+ )
+
+ trial_sizes = []
+ for i_chunk in range(0, context_size, chunk_size):
+ max_context_len = min(context_size, i_chunk + chunk_size)
+ real_chunk_size = max_context_len - i_chunk
+ while real_chunk_size > 1:
+ trial_all_size = int(i_chunk + real_chunk_size)
+ trial_sizes.append((int(trial_all_size), int(0)))
+ real_chunk_size /= 2.0
+
+ trial_all_size = max_context_len
+ trial_prefix_size = trial_all_size
+ while trial_prefix_size > 1:
+ if (trial_all_size > 1024) and (
+ int(trial_all_size - trial_prefix_size) > (num_decode + 1)
+ ):
+ trial_sizes.append(
+ (
+ int(trial_prefix_size),
+ int(trial_all_size - trial_prefix_size),
+ )
+ )
+ trial_prefix_size /= 2.0
+
+ logger.info(f"Prefix, Input")
+ for t_prefix, t_input in trial_sizes:
+ logger.info(f"{t_prefix}, {t_input}")
+
+ for trial_prefix, trial_input in tqdm.tqdm(
+ trial_sizes, dynamic_ncols=True
+ ):
+ trial_input -= num_decode + 1
+
+ if trial_input < 1:
+ continue
+
+ input_ids = np.random.randint(10, 1000, (context_size,)).tolist()
+ new_input_ids = np.random.randint(
+ 10, 1000, (context_size,)
+ ).tolist()
+
+ prefix_input_ids = input_ids[: (trial_input + trial_prefix)]
+ cache_input_ids = new_input_ids[: (trial_input + trial_prefix)]
+
+ text_for_prefix = tokenizer.decode(prefix_input_ids)
+ text_for_cache = tokenizer.decode(
+ prefix_input_ids[:trial_prefix] + cache_input_ids[trial_prefix:]
+ )
+
+ if len(text_for_prefix) > step_size:
+
+ json_data["text"] = text_for_prefix
+ json_data["sampling_params"]["max_new_tokens"] = num_decode
+
+ t_start = time.time()
+ res = requests.post(
+ url + request_name,
+ json=json_data,
+ headers=headers,
+ timeout=6000,
+ )
+ assert res.status_code == 200, f"{res}"
+ t_end = time.time()
+
+ logger.info(
+ f"[WARMUP] {(trial_prefix, trial_input)} (no-prefix) took {(t_end - t_start):.2f} s"
+ )
+
+ if (len(text_for_cache) > step_size) and (trial_input > 0):
+
+ json_data["text"] = text_for_cache
+ json_data["sampling_params"]["max_new_tokens"] = num_decode
+
+ t_start = time.time()
+ res = requests.post(
+ url + request_name,
+ json=json_data,
+ headers=headers,
+ timeout=6000,
+ )
+ assert res.status_code == 200, f"{res}"
+ t_end = time.time()
+
+ logger.info(
+ f"[WARMUP] {(trial_prefix, trial_input)} (with-prefix) took {(t_end - t_start):.2f} s"
+ )
+
+ if (len(text_for_cache) > step_size) and (trial_input == 0):
+
+ json_data["text"] = text_for_cache
+ json_data["sampling_params"]["max_new_tokens"] = num_decode
+
+ t_start = time.time()
+ res = requests.post(
+ url + request_name,
+ json=json_data,
+ headers=headers,
+ timeout=6000,
+ )
+ assert res.status_code == 200, f"{res}"
+ t_end = time.time()
+
+ logger.info(
+ f"[WARMUP] {(trial_prefix + trial_input, 0)} (all-prefix) took {(t_end - t_start):.2f} s"
+ )
+
+ requests.get(
+ url + "/flush_cache",
+ json=json_data,
+ headers=headers,
+ timeout=6000,
+ )
+
+ logger.info("[WARM-UP DONE]")
+ else:
+ res = requests.post(
+ url + request_name,
+ json=json_data,
+ headers=headers,
+ timeout=6000,
+ )
assert res.status_code == 200, f"{res}"
_global_state.tokenizer_manager.server_status = ServerStatus.Up
+ logger.info(f"Warm-up result: {res.json()}")
+ if os.getenv("SGLANG_DEBUG_EXIT_WARMUP", "0") == "1":
+ print("shutdown after warmup")
+ kill_process_tree(os.getpid())
else:
logger.info(f"Start of pd disaggregation warmup ...")
@@ -1192,12 +1511,13 @@ def _execute_server_warmup(
],
"input_ids": [[0, 1, 2, 3]] * server_args.dp_size,
}
- res = requests.post(
- url + request_name,
- json=json_data,
- headers=headers,
- timeout=1800, # because of deep gemm precache is very long if not precache.
- )
+ for _ in range(2):
+ res = requests.post(
+ url + request_name,
+ json=json_data,
+ headers=headers,
+ timeout=1800, # because of deep gemm precache is very long if not precache.
+ )
if res.status_code == 200:
logger.info(
f"End of prefill disaggregation mode warmup with status {res.status_code}, resp: {res.json()}"
@@ -1249,13 +1569,5 @@ def _wait_and_warmup(
if server_args.debug_tensor_dump_input_file:
kill_process_tree(os.getpid())
- if server_args.pdlb_url is not None:
- register_disaggregation_server(
- server_args.disaggregation_mode,
- server_args.port,
- server_args.disaggregation_bootstrap_port,
- server_args.pdlb_url,
- )
-
if launch_callback is not None:
launch_callback()
diff --git a/python/sglang/srt/entrypoints/openai/protocol.py b/python/sglang/srt/entrypoints/openai/protocol.py
index 9360993dfc3..730ff999d8c 100644
--- a/python/sglang/srt/entrypoints/openai/protocol.py
+++ b/python/sglang/srt/entrypoints/openai/protocol.py
@@ -13,6 +13,7 @@
# ==============================================================================
"""Pydantic models for OpenAI API protocol"""
+import os
import time
import uuid
from dataclasses import dataclass
@@ -35,6 +36,8 @@
)
from typing_extensions import Literal
+DEFAULT_MODEL_NAME = "default"
+
class ModelCard(BaseModel):
"""Model cards."""
@@ -108,6 +111,23 @@ class JsonSchemaResponseFormat(BaseModel):
strict: Optional[bool] = False
+class ResponseFormat(BaseModel):
+ type: Literal["text", "json_object", "json_schema"]
+ json_schema: Optional[JsonSchemaResponseFormat] = None
+
+
+class StructuresResponseFormat(BaseModel):
+ begin: str
+ schema_: Optional[Dict[str, object]] = Field(alias="schema", default=None)
+ end: str
+
+
+class StructuralTagResponseFormat(BaseModel):
+ type: Literal["structural_tag"]
+ structures: List[StructuresResponseFormat]
+ triggers: List[str]
+
+
class FileRequest(BaseModel):
# https://platform.openai.com/docs/api-reference/files/create
file: bytes # The File object (not file name) to be uploaded
@@ -166,7 +186,7 @@ class BatchResponse(BaseModel):
class CompletionRequest(BaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/completions/create
- model: str
+ model: str = DEFAULT_MODEL_NAME
prompt: Union[List[int], List[List[int]], str, List[str]]
best_of: Optional[int] = None
echo: bool = False
@@ -200,6 +220,7 @@ class CompletionRequest(BaseModel):
skip_special_tokens: bool = True
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
session_params: Optional[Dict] = None
+ response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None
# For PD disaggregation
bootstrap_host: Optional[Union[List[str], str]] = None
@@ -327,7 +348,7 @@ class ToolCall(BaseModel):
class ChatCompletionMessageGenericParam(BaseModel):
- role: Literal["system", "assistant", "tool"]
+ role: Literal["system", "assistant", "tool", "function"]
content: Union[str, List[ChatCompletionMessageContentTextPart], None] = Field(
default=None
)
@@ -341,9 +362,9 @@ class ChatCompletionMessageGenericParam(BaseModel):
def _normalize_role(cls, v):
if isinstance(v, str):
v_lower = v.lower()
- if v_lower not in {"system", "assistant", "tool"}:
+ if v_lower not in {"system", "assistant", "tool", "function"}:
raise ValueError(
- "'role' must be one of 'system', 'assistant', or 'tool' (case-insensitive)."
+ "'role' must be one of 'system', 'assistant', 'tool', or 'function' (case-insensitive)."
)
return v_lower
raise ValueError("'role' must be a string")
@@ -359,23 +380,6 @@ class ChatCompletionMessageUserParam(BaseModel):
]
-class ResponseFormat(BaseModel):
- type: Literal["text", "json_object", "json_schema"]
- json_schema: Optional[JsonSchemaResponseFormat] = None
-
-
-class StructuresResponseFormat(BaseModel):
- begin: str
- schema_: Optional[Dict[str, object]] = Field(alias="schema", default=None)
- end: str
-
-
-class StructuralTagResponseFormat(BaseModel):
- type: Literal["structural_tag"]
- structures: List[StructuresResponseFormat]
- triggers: List[str]
-
-
class Function(BaseModel):
"""Function descriptions."""
@@ -409,7 +413,7 @@ class ChatCompletionRequest(BaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/chat/create
messages: List[ChatCompletionMessageParam]
- model: str
+ model: str = DEFAULT_MODEL_NAME
frequency_penalty: float = 0.0
logit_bias: Optional[Dict[str, float]] = None
logprobs: bool = False
@@ -440,7 +444,7 @@ class ChatCompletionRequest(BaseModel):
) # noqa
return_hidden_states: bool = False
reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
- default="medium",
+ default=os.getenv("SRT_DEFAULT_REASONING_EFFORT", "medium"),
description="Constrains effort on reasoning for reasoning models. "
"'low' is the least effort, 'high' is the most effort. Reducing reasoning effort can "
"result in faster responses and fewer tokens used on reasoning in a response. "
@@ -457,6 +461,66 @@ def set_tool_choice_default(cls, values):
values["tool_choice"] = "auto"
return values
+ @model_validator(mode="before")
+ @classmethod
+ def normalize_reasoning_inputs(cls, values: Dict):
+ r = values.get("reasoning")
+ if r is None:
+ return values
+
+ if isinstance(r, dict):
+ effort = r.get("effort") or r.get("reasoning_effort")
+ if effort in {"low", "medium", "high"}:
+ values["reasoning_effort"] = effort
+
+ enabled = (
+ r.get("enabled")
+ if r.get("enabled") is not None
+ else r.get("enable", False)
+ )
+ if isinstance(enabled, str):
+ enabled = enabled.strip().lower() in {"1", "true", "yes", "y", "on"}
+ if enabled:
+ ctk = values.get("chat_template_kwargs")
+ if not isinstance(ctk, dict):
+ ctk = {}
+ ctk.setdefault("thinking", True)
+ values["chat_template_kwargs"] = ctk
+
+ return values
+
+ @model_validator(mode="before")
+ @classmethod
+ def set_json_schema(cls, values):
+ response_format = values.get("response_format")
+ if not response_format:
+ return values
+
+ if response_format.get("type") != "json_schema":
+ return values
+
+ schema = response_format.pop("schema", None)
+ json_schema = response_format.get("json_schema")
+
+ if json_schema:
+ return values
+
+ if schema:
+ name_ = schema.get("title", "Schema")
+ strict_ = False
+ if "properties" in schema and "strict" in schema["properties"]:
+ item = schema["properties"].pop("strict", None)
+ if item and item.get("default", False):
+ strict_ = True
+
+ response_format["json_schema"] = {
+ "name": name_,
+ "schema": schema,
+ "strict": strict_,
+ }
+
+ return values
+
# Extra parameters for SRT backend only and will be ignored by OpenAI models.
top_k: int = -1
min_p: float = 0.0
@@ -479,9 +543,9 @@ def set_tool_choice_default(cls, values):
rid: Optional[Union[List[str], str]] = None
# For PD disaggregation
- bootstrap_host: Optional[str] = None
- bootstrap_port: Optional[int] = None
- bootstrap_room: Optional[int] = None
+ bootstrap_host: Optional[Union[List[str], str]] = None
+ bootstrap_port: Optional[Union[List[Optional[int]], int]] = None
+ bootstrap_room: Optional[Union[List[int], int]] = None
class ChatMessage(BaseModel):
@@ -571,7 +635,7 @@ class EmbeddingRequest(BaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/embeddings/create
input: EmbeddingInput
- model: str
+ model: str = DEFAULT_MODEL_NAME
encoding_format: str = "float"
dimensions: Optional[int] = None
user: Optional[str] = None
@@ -605,7 +669,7 @@ class ScoringRequest(BaseModel):
)
apply_softmax: bool = False
item_first: bool = False
- model: str
+ model: str = DEFAULT_MODEL_NAME
class ScoringResponse(BaseModel):
@@ -737,8 +801,8 @@ def to_sampling_params(
else:
max_tokens = default_max_tokens
- # Avoid exceed the context length by minus 1 token
- max_tokens -= 1
+ # Avoid exceed the context length by minus 2 token
+ max_tokens -= 2
# Get parameters with defaults
temperature = self.temperature
diff --git a/python/sglang/srt/entrypoints/openai/serving_base.py b/python/sglang/srt/entrypoints/openai/serving_base.py
index ad7c35f2044..28b317e6dae 100644
--- a/python/sglang/srt/entrypoints/openai/serving_base.py
+++ b/python/sglang/srt/entrypoints/openai/serving_base.py
@@ -1,15 +1,19 @@
+from __future__ import annotations
+
import json
import logging
import uuid
from abc import ABC, abstractmethod
-from typing import Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
from fastapi import HTTPException, Request
from fastapi.responses import ORJSONResponse, StreamingResponse
from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest
from sglang.srt.managers.io_struct import GenerateReqInput
-from sglang.srt.managers.tokenizer_manager import TokenizerManager
+
+if TYPE_CHECKING:
+ from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__)
diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py
index d87c50dd620..d67cbfde33d 100644
--- a/python/sglang/srt/entrypoints/openai/serving_chat.py
+++ b/python/sglang/srt/entrypoints/openai/serving_chat.py
@@ -1,14 +1,15 @@
+from __future__ import annotations
+
import copy
import json
import logging
import time
import uuid
-from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse
-from sglang.srt.conversation import generate_chat_conv
from sglang.srt.entrypoints.openai.protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
@@ -33,13 +34,16 @@
to_openai_style_logprobs,
)
from sglang.srt.function_call.function_call_parser import FunctionCallParser
-from sglang.srt.jinja_template_utils import process_content_for_template_format
from sglang.srt.managers.io_struct import GenerateReqInput
-from sglang.srt.managers.template_manager import TemplateManager
-from sglang.srt.managers.tokenizer_manager import TokenizerManager
-from sglang.srt.reasoning_parser import ReasoningParser
+from sglang.srt.parser.conversation import generate_chat_conv
+from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
+from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.utils import convert_json_schema_to_str
+if TYPE_CHECKING:
+ from sglang.srt.managers.template_manager import TemplateManager
+ from sglang.srt.managers.tokenizer_manager import TokenizerManager
+
logger = logging.getLogger(__name__)
@@ -53,6 +57,7 @@ def __init__(
):
super().__init__(tokenizer_manager)
self.template_manager = template_manager
+ self.tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
def _request_id_prefix(self) -> str:
return "chatcmpl-"
@@ -81,12 +86,25 @@ def _validate_request(self, request: ChatCompletionRequest) -> Optional[str]:
f"This model supports at most {server_context_length} completion tokens."
)
+ if request.response_format and request.response_format.type == "json_schema":
+ schema = getattr(request.response_format.json_schema, "schema_", None)
+ if schema is None:
+ return "schema_ is required for json_schema response format request."
+
return None
def _convert_to_internal_request(
self,
request: ChatCompletionRequest,
) -> tuple[GenerateReqInput, ChatCompletionRequest]:
+ reasoning_effort = (
+ request.chat_template_kwargs.pop("reasoning_effort", None)
+ if request.chat_template_kwargs
+ else None
+ )
+ if reasoning_effort is not None:
+ request.reasoning_effort = reasoning_effort
+
"""Convert OpenAI chat completion request to internal format"""
is_multimodal = self.tokenizer_manager.model_config.is_multimodal
@@ -135,6 +153,16 @@ def _process_messages(
self, request: ChatCompletionRequest, is_multimodal: bool
) -> MessageProcessingResult:
"""Process chat messages and apply chat template"""
+ is_gpt_oss = (
+ hasattr(self.tokenizer_manager.model_config, "hf_config")
+ and hasattr(self.tokenizer_manager.model_config.hf_config, "model_type")
+ and self.tokenizer_manager.model_config.hf_config.model_type == "gpt_oss"
+ )
+
+ # GptOss model needs to keep special tokens for harmony parsing
+ if is_gpt_oss:
+ request.skip_special_tokens = False
+
tool_call_constraint = None
# Apply chat template and its stop strings
@@ -149,10 +177,11 @@ def _process_messages(
]
else:
tools = [item.function.model_dump() for item in request.tools]
-
- tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
- parser = FunctionCallParser(request.tools, tool_call_parser)
- tool_call_constraint = parser.get_structure_constraint(request.tool_choice)
+ if self.tool_call_parser:
+ parser = FunctionCallParser(request.tools, self.tool_call_parser)
+ tool_call_constraint = parser.get_structure_constraint(
+ request.tool_choice
+ )
# Use chat template
if self.template_manager.chat_template_name is None:
@@ -194,6 +223,25 @@ def _apply_jinja_template(
audio_data,
modalities,
)
+
+ # per the Transformers docs & maintainers, tool call arguments in
+ # assistant-role messages with tool_calls need to be dicts not JSON str -
+ # this is how tool-use chat templates will expect them moving forwards
+ # so, for messages that have tool_calls, parse the string (which we get
+ # from openAI format) to dict
+ if (
+ processed_msg["role"] == "assistant"
+ and "tool_calls" in processed_msg
+ and isinstance(processed_msg["tool_calls"], list)
+ ):
+ for item in processed_msg["tool_calls"]:
+ if "arguments" in item["function"] and isinstance(
+ item["function"]["arguments"], str
+ ):
+ item["function"]["arguments"] = json.loads(
+ item["function"]["arguments"]
+ )
+
openai_compatible_messages.append(processed_msg)
# Handle assistant prefix for continue_final_message
@@ -495,7 +543,11 @@ async def _generate_chat_stream(
yield f"data: {chunk.model_dump_json()}\n\n"
# Handle tool calls
- if request.tool_choice != "none" and request.tools:
+ if (
+ request.tool_choice != "none"
+ and request.tools
+ and self.tool_call_parser
+ ):
async for chunk in self._process_tool_call_stream(
index,
delta,
@@ -685,10 +737,13 @@ def _build_chat_response(
# Handle tool calls
tool_calls = None
- if request.tool_choice != "none" and request.tools:
- tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
+ if (
+ request.tool_choice != "none"
+ and request.tools
+ and self.tool_call_parser
+ ):
tool_calls, text, finish_reason = self._process_tool_calls(
- text, request.tools, tool_call_parser, finish_reason
+ text, request.tools, finish_reason
)
choice_data = ChatCompletionResponseChoice(
@@ -782,26 +837,36 @@ def _process_tool_calls(
self,
text: str,
tools: List[Any],
- tool_call_parser: Optional[str],
finish_reason: Dict[str, Any],
) -> tuple[Optional[List[ToolCall]], str, Dict[str, Any]]:
"""Process tool calls in the response"""
- parser = FunctionCallParser(tools, tool_call_parser)
+ parser = FunctionCallParser(tools, self.tool_call_parser)
if parser.has_tool_call(text):
if finish_reason["type"] == "stop":
finish_reason["type"] = "tool_calls"
finish_reason["matched"] = None
try:
text, call_info_list = parser.parse_non_stream(text)
- tool_calls = [
- ToolCall(
- id=f"call_{uuid.uuid4().hex[:24]}",
- function=FunctionResponse(
- name=call_info.name, arguments=call_info.parameters
- ),
+ tool_calls = []
+ for call_info in call_info_list:
+ # For Kimi-K2, align tool_call_id with the model format: functions.{name}:{index}
+ if (
+ self.tool_call_parser == "kimi_k2"
+ and call_info.name is not None
+ ):
+ tool_id = f"functions.{call_info.name}:{call_info.tool_index}"
+ else:
+ tool_id = f"call_{uuid.uuid4().hex[:24]}"
+
+ tool_calls.append(
+ ToolCall(
+ id=tool_id,
+ index=getattr(call_info, "tool_index", None),
+ function=FunctionResponse(
+ name=call_info.name, arguments=call_info.parameters
+ ),
+ )
)
- for call_info in call_info_list
- ]
return tool_calls, text, finish_reason
except Exception as e:
logger.error(f"Tool call parsing error: {e}")
@@ -859,12 +924,15 @@ def _get_enable_thinking_from_request(self, request: ChatCompletionRequest) -> b
Returns:
The boolean value of 'enable_thinking' if found, otherwise False.
"""
- if (
- hasattr(request, "chat_template_kwargs")
- and request.chat_template_kwargs
- and request.chat_template_kwargs.get("enable_thinking") is not None
- ):
- return request.chat_template_kwargs.get("enable_thinking")
+ if hasattr(request, "chat_template_kwargs") and request.chat_template_kwargs:
+ # For Qwen3 models, `enable_thinking` is supported.
+ if request.chat_template_kwargs.get("enable_thinking") is not None:
+ return request.chat_template_kwargs.get("enable_thinking")
+ # For DeepSeek-V3.1 models, `thinking` is supported.
+ elif request.chat_template_kwargs.get("thinking") is not None:
+ return request.chat_template_kwargs.get("thinking")
+ else:
+ return False
return False
async def _process_tool_call_stream(
@@ -880,7 +948,7 @@ async def _process_tool_call_stream(
if index not in parser_dict:
parser_dict[index] = FunctionCallParser(
tools=request.tools,
- tool_call_parser=self.tokenizer_manager.server_args.tool_call_parser,
+ tool_call_parser=self.tool_call_parser,
)
parser = parser_dict[index]
@@ -909,7 +977,11 @@ async def _process_tool_call_stream(
# Tool call ID should be generated only once per tool call
if call_item.name:
# First chunk: include ID and function name
- tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
+ if self.tool_call_parser == "kimi_k2":
+ # Align with Kimi-K2 format: functions.{name}:{index}
+ tool_call_id = f"functions.{call_item.name}:{call_item.tool_index}"
+ else:
+ tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
function_name = call_item.name
else:
# Subsequent chunks: null ID and name for argument deltas
diff --git a/python/sglang/srt/entrypoints/openai/serving_completions.py b/python/sglang/srt/entrypoints/openai/serving_completions.py
index 51fa3129699..6fe02d3254e 100644
--- a/python/sglang/srt/entrypoints/openai/serving_completions.py
+++ b/python/sglang/srt/entrypoints/openai/serving_completions.py
@@ -1,11 +1,12 @@
+from __future__ import annotations
+
import logging
import time
-from typing import Any, AsyncGenerator, Dict, List, Union
+from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse, StreamingResponse
-from sglang.srt.code_completion_parser import generate_completion_prompt_from_request
from sglang.srt.entrypoints.openai.protocol import (
CompletionRequest,
CompletionResponse,
@@ -21,8 +22,14 @@
to_openai_style_logprobs,
)
from sglang.srt.managers.io_struct import GenerateReqInput
-from sglang.srt.managers.template_manager import TemplateManager
-from sglang.srt.managers.tokenizer_manager import TokenizerManager
+from sglang.srt.parser.code_completion_parser import (
+ generate_completion_prompt_from_request,
+)
+from sglang.utils import convert_json_schema_to_str
+
+if TYPE_CHECKING:
+ from sglang.srt.managers.template_manager import TemplateManager
+ from sglang.srt.managers.tokenizer_manager import TokenizerManager
logger = logging.getLogger(__name__)
@@ -41,6 +48,14 @@ def __init__(
def _request_id_prefix(self) -> str:
return "cmpl-"
+ def _validate_request(self, request: CompletionRequest) -> Optional[str]:
+ """Validate that the input is valid."""
+ prompt = request.prompt
+ if not prompt or (isinstance(prompt, list) and all(not p for p in prompt)):
+ return "Prompt cannot be empty"
+
+ return None
+
def _convert_to_internal_request(
self,
request: CompletionRequest,
@@ -117,6 +132,20 @@ def _build_sampling_params(self, request: CompletionRequest) -> Dict[str, Any]:
"logit_bias": request.logit_bias,
}
+ # Handle response_format constraints
+ if request.response_format and request.response_format.type == "json_schema":
+ sampling_params["json_schema"] = convert_json_schema_to_str(
+ request.response_format.json_schema.schema_
+ )
+ elif request.response_format and request.response_format.type == "json_object":
+ sampling_params["json_schema"] = '{"type": "object"}'
+ elif (
+ request.response_format and request.response_format.type == "structural_tag"
+ ):
+ sampling_params["structural_tag"] = convert_json_schema_to_str(
+ request.response_format.model_dump(by_alias=True)
+ )
+
return sampling_params
async def _handle_streaming_request(
diff --git a/python/sglang/srt/entrypoints/openai/serving_embedding.py b/python/sglang/srt/entrypoints/openai/serving_embedding.py
index b9ac4559f2c..63c4fc34ae8 100644
--- a/python/sglang/srt/entrypoints/openai/serving_embedding.py
+++ b/python/sglang/srt/entrypoints/openai/serving_embedding.py
@@ -1,9 +1,10 @@
-from typing import Any, Dict, List, Optional, Union
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from fastapi import Request
from fastapi.responses import ORJSONResponse
-from sglang.srt.conversation import generate_embedding_convs
from sglang.srt.entrypoints.openai.protocol import (
EmbeddingObject,
EmbeddingRequest,
@@ -14,8 +15,11 @@
)
from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
from sglang.srt.managers.io_struct import EmbeddingReqInput
-from sglang.srt.managers.template_manager import TemplateManager
-from sglang.srt.managers.tokenizer_manager import TokenizerManager
+from sglang.srt.parser.conversation import generate_embedding_convs
+
+if TYPE_CHECKING:
+ from sglang.srt.managers.template_manager import TemplateManager
+ from sglang.srt.managers.tokenizer_manager import TokenizerManager
class OpenAIServingEmbedding(OpenAIServingBase):
diff --git a/python/sglang/srt/entrypoints/openai/serving_responses.py b/python/sglang/srt/entrypoints/openai/serving_responses.py
index a9efe4f3b08..3f7619678e3 100644
--- a/python/sglang/srt/entrypoints/openai/serving_responses.py
+++ b/python/sglang/srt/entrypoints/openai/serving_responses.py
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# Adapted from vLLM's OpenAIServingResponses
"""Handler for /v1/responses requests"""
+from __future__ import annotations
import asyncio
import copy
@@ -9,7 +10,7 @@
import time
from contextlib import AsyncExitStack
from http import HTTPStatus
-from typing import Any, AsyncGenerator, AsyncIterator, Optional, Union
+from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Optional, Union
import jinja2
import openai.types.responses as openai_responses_types
@@ -54,11 +55,13 @@
from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat
from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
from sglang.srt.managers.io_struct import GenerateReqInput
-from sglang.srt.managers.template_manager import TemplateManager
-from sglang.srt.managers.tokenizer_manager import TokenizerManager
-from sglang.srt.reasoning_parser import ReasoningParser
+from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import random_uuid
+if TYPE_CHECKING:
+ from sglang.srt.managers.template_manager import TemplateManager
+ from sglang.srt.managers.tokenizer_manager import TokenizerManager
+
logger = logging.getLogger(__name__)
@@ -944,7 +947,7 @@ def _send_event(event):
type="output_text",
text="",
annotations=[],
- logprobs=[],
+ logprobs=None,
),
)
)
@@ -992,7 +995,7 @@ def _send_event(event):
type="output_text",
text="",
annotations=[],
- logprobs=[],
+ logprobs=None,
),
)
)
diff --git a/python/sglang/srt/entrypoints/tool.py b/python/sglang/srt/entrypoints/tool.py
index 05c1c8eded4..45b87ac3aca 100644
--- a/python/sglang/srt/entrypoints/tool.py
+++ b/python/sglang/srt/entrypoints/tool.py
@@ -4,6 +4,8 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any
+from sglang.srt.utils import print_info_once, print_warning_once
+
if TYPE_CHECKING:
# Avoid circular import.
from sglang.srt.entrypoints.context import ConversationContext
@@ -25,7 +27,7 @@ def __init__(self):
exa_api_key = os.getenv("EXA_API_KEY")
if not exa_api_key:
self.enabled = False
- logger.warning_once("EXA_API_KEY is not set, browsing is disabled")
+ print_warning_once("EXA_API_KEY is not set, browsing is disabled")
return
try:
@@ -33,12 +35,12 @@ def __init__(self):
from gpt_oss.tools.simple_browser.backend import ExaBackend
except ImportError:
self.enabled = False
- logger.warning_once("gpt_oss is not installed, browsing is disabled")
+ print_warning_once("gpt_oss is not installed, browsing is disabled")
return
browser_backend = ExaBackend(source="web", api_key=exa_api_key)
self.browser_tool = SimpleBrowserTool(backend=browser_backend)
- logger.info_once("Browser tool initialized")
+ print_info_once("Browser tool initialized")
async def get_result(self, context: "ConversationContext") -> Any:
from sglang.srt.entrypoints.context import HarmonyContext
@@ -64,13 +66,11 @@ def __init__(self):
from gpt_oss.tools.python_docker.docker_tool import PythonTool
except ImportError:
self.enabled = False
- logger.warning_once(
- "gpt_oss is not installed, code interpreter is disabled"
- )
+ print_warning_once("gpt_oss is not installed, code interpreter is disabled")
return
self.python_tool = PythonTool()
- logger.info_once("Code interpreter tool initialized")
+ print_info_once("Code interpreter tool initialized")
async def get_result(self, context: "ConversationContext") -> Any:
from sglang.srt.entrypoints.context import HarmonyContext
diff --git a/python/sglang/srt/eplb/eplb_manager.py b/python/sglang/srt/eplb/eplb_manager.py
index 604e2c46493..e88a3d28e0f 100644
--- a/python/sglang/srt/eplb/eplb_manager.py
+++ b/python/sglang/srt/eplb/eplb_manager.py
@@ -55,12 +55,21 @@ def rebalance(self):
enable_timing = self._rebalance_layers_per_chunk is None
if enable_timing:
- torch.cuda.synchronize()
+ torch.get_device_module().synchronize()
time_start = time.time()
- logical_count = get_global_expert_distribution_recorder().dump_record(
+ dump_record_output = get_global_expert_distribution_recorder().dump_record(
output_mode="object"
- )["logical_count"]
+ )
+ logical_count = dump_record_output["logical_count"]
+ average_utilization_rate_over_window = dump_record_output[
+ "average_utilization_rate_over_window"
+ ]
+
+ # Check whether rebalancing is needed
+ if not self._check_rebalance_needed(average_utilization_rate_over_window):
+ return
+
expert_location_metadata = ExpertLocationMetadata.init_by_eplb(
self._server_args, self._model_runner.model_config, logical_count
)
@@ -76,11 +85,26 @@ def rebalance(self):
msg = f"[EPLBManager] rebalance end"
if enable_timing:
- torch.cuda.synchronize()
+ torch.get_device_module().synchronize()
time_end = time.time()
msg += f" time={time_end - time_start:.3f}s"
logger.info(msg)
+ def _check_rebalance_needed(self, average_utilization_rate_over_window):
+ if average_utilization_rate_over_window is None:
+ return True
+
+ if (
+ average_utilization_rate_over_window
+ > self._server_args.eplb_min_rebalancing_utilization_threshold
+ ):
+ logger.info(
+ f"[EPLBManager] Skipped ep rebalancing: current GPU utilization {average_utilization_rate_over_window:.2f} > minimum rebalance threshold {self._server_args.eplb_min_rebalancing_utilization_threshold:.2f}"
+ )
+ return False
+
+ return True
+
def _compute_update_layer_ids_chunks(self) -> List[List[int]]:
all_layer_ids = sorted(
list(self._model_runner.model.routed_experts_weights_of_layer.keys())
diff --git a/python/sglang/srt/eplb/expert_distribution.py b/python/sglang/srt/eplb/expert_distribution.py
index c4a2c38f9b3..3faf981ef38 100644
--- a/python/sglang/srt/eplb/expert_distribution.py
+++ b/python/sglang/srt/eplb/expert_distribution.py
@@ -11,23 +11,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
+
+from __future__ import annotations
+
import logging
+import math
import os
import time
from abc import ABC
from collections import deque
from contextlib import contextmanager
from pathlib import Path
-from typing import Any, Dict, List, Literal, Optional, Tuple, Type
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Type
import einops
import torch
import torch.distributed
-from sglang.srt.eplb.expert_location import ExpertLocationMetadata
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import Withable, get_bool_env_var
+from sglang.srt.utils import Withable, get_bool_env_var, is_npu
+
+_is_npu = is_npu()
+
+if TYPE_CHECKING:
+ from sglang.srt.eplb.expert_location import ExpertLocationMetadata
logger = logging.getLogger(__name__)
@@ -42,7 +50,7 @@ class ExpertDistributionRecorder(ABC):
@staticmethod
def init_new(
server_args: ServerArgs,
- expert_location_metadata: "ExpertLocationMetadata",
+ expert_location_metadata: ExpertLocationMetadata,
rank: int,
):
if server_args.expert_distribution_recorder_mode is not None:
@@ -117,7 +125,7 @@ class _ExpertDistributionRecorderReal(ExpertDistributionRecorder):
def __init__(
self,
server_args: ServerArgs,
- expert_location_metadata: "ExpertLocationMetadata",
+ expert_location_metadata: ExpertLocationMetadata,
rank: int,
):
self._server_args = server_args
@@ -210,7 +218,9 @@ def on_deepep_dispatch_low_latency(
def _on_hook(self, hook_name: str, **kwargs):
if self._disable_all:
return
- if not (self._recording or torch.cuda.is_current_stream_capturing()):
+ if not (
+ self._recording or torch.get_device_module().is_current_stream_capturing()
+ ):
return
gatherer = self._single_pass_gatherers[
self._accumulator.get_single_pass_gatherer_key(
@@ -278,7 +288,7 @@ class _SinglePassGatherer(ABC):
@staticmethod
def init_new(
server_args: ServerArgs,
- expert_location_metadata: "ExpertLocationMetadata",
+ expert_location_metadata: ExpertLocationMetadata,
rank: int,
) -> "_SinglePassGatherer":
if server_args.expert_distribution_recorder_mode == "per_token":
@@ -306,7 +316,7 @@ def init_new(
return _SelectExpertsSinglePassGatherer(expert_location_metadata, rank)
- def __init__(self, expert_location_metadata: "ExpertLocationMetadata", rank: int):
+ def __init__(self, expert_location_metadata: ExpertLocationMetadata, rank: int):
self._expert_location_metadata = expert_location_metadata
self._rank = rank
@@ -345,7 +355,7 @@ class _DetailSinglePassGatherer(_SinglePassGatherer):
def __init__(
self,
server_args: ServerArgs,
- expert_location_metadata: "ExpertLocationMetadata",
+ expert_location_metadata: ExpertLocationMetadata,
rank: int,
):
super().__init__(expert_location_metadata, rank)
@@ -445,6 +455,10 @@ def _list_sum(a: List, b: List) -> List:
class _LayerBasedGpuSinglePassGatherer(_SinglePassGatherer):
def __init__(self, *args, enable_global_physical_experts: bool, **kwargs):
super().__init__(*args, **kwargs)
+ if not _is_npu:
+ device = "cuda"
+ else:
+ device = "npu"
self._enable_global_physical_experts = enable_global_physical_experts
self._data = torch.zeros(
(
@@ -456,7 +470,7 @@ def __init__(self, *args, enable_global_physical_experts: bool, **kwargs):
),
),
dtype=torch.int,
- device="cuda",
+ device=device,
)
def reset(self):
@@ -560,7 +574,7 @@ class _Accumulator(ABC):
@staticmethod
def init_new(
server_args: ServerArgs,
- expert_location_metadata: "ExpertLocationMetadata",
+ expert_location_metadata: ExpertLocationMetadata,
rank: int,
) -> "_Accumulator":
return _Accumulator.get_class(server_args)(
@@ -579,7 +593,7 @@ def get_class(server_args: ServerArgs) -> Type["_Accumulator"]:
def __init__(
self,
server_args: ServerArgs,
- expert_location_metadata: "ExpertLocationMetadata",
+ expert_location_metadata: ExpertLocationMetadata,
rank: int,
):
self._server_args = server_args
@@ -614,8 +628,8 @@ def __init__(self, *args, **kwargs):
self._enable = self._server_args.enable_expert_distribution_metrics
if self._enable:
- window_sizes = [10, 100, 1000]
- self._history = _DequeCollection(maxlens=window_sizes)
+ self.window_sizes = [10, 100, 1000]
+ self._history = _DequeCollection(maxlens=self.window_sizes)
self._rank = torch.distributed.get_rank()
def append(
@@ -778,7 +792,7 @@ def dump(self, output_mode: _OutputMode):
if self._first_dump:
self._first_dump = False
- torch.cuda.empty_cache()
+ torch.get_device_module().empty_cache()
torch.distributed.all_reduce(
logical_count_of_buffered_step, op=torch.distributed.ReduceOp.SUM
@@ -787,6 +801,7 @@ def dump(self, output_mode: _OutputMode):
output = dict(
rank=self._rank,
logical_count=logical_count_of_buffered_step,
+ average_utilization_rate_over_window=self._get_global_average_utilization_rate(),
)
if output_mode == "file":
@@ -797,6 +812,31 @@ def dump(self, output_mode: _OutputMode):
else:
raise NotImplementedError
+ def _get_global_average_utilization_rate(self):
+ if not self._enable or math.isclose(
+ self._server_args.eplb_min_rebalancing_utilization_threshold, 1.0
+ ):
+ return None
+
+ if self._rank == 0:
+ utilization_mean_rates = self._history.mean()
+ window_index = self.window_sizes[-1]
+ average_utilization_rate_over_window = (
+ utilization_mean_rates[window_index]
+ if window_index in utilization_mean_rates
+ else 0
+ )
+
+ avg_rate_tensor = torch.tensor(
+ [average_utilization_rate_over_window],
+ dtype=torch.float32,
+ device="cuda",
+ )
+ else:
+ avg_rate_tensor = torch.empty(1, dtype=torch.float32, device="cuda")
+ torch.distributed.broadcast(avg_rate_tensor, src=0)
+ return avg_rate_tensor.item()
+
def _dump_to_file(name, data):
save_dir = Path(os.environ.get("SGLANG_EXPERT_DISTRIBUTION_RECORDER_DIR", "/tmp"))
diff --git a/python/sglang/srt/eplb/expert_location.py b/python/sglang/srt/eplb/expert_location.py
index be0e236534b..ee5f2c7ca8b 100644
--- a/python/sglang/srt/eplb/expert_location.py
+++ b/python/sglang/srt/eplb/expert_location.py
@@ -11,21 +11,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
+
+from __future__ import annotations
+
import json
import logging
import random
from dataclasses import dataclass
from pathlib import Path
-from typing import List, Optional
+from typing import TYPE_CHECKING, List, Optional
import torch
import torch.distributed
import torch.nn.functional as F
-from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.eplb import eplb_algorithms
from sglang.srt.model_loader import get_model_architecture
-from sglang.srt.server_args import ServerArgs
+
+if TYPE_CHECKING:
+ from sglang.srt.configs.model_config import ModelConfig
+ from sglang.srt.server_args import ServerArgs
logger = logging.getLogger(__name__)
diff --git a/python/sglang/srt/eplb/expert_location_updater.py b/python/sglang/srt/eplb/expert_location_updater.py
index 9887abc9752..772e65f1809 100644
--- a/python/sglang/srt/eplb/expert_location_updater.py
+++ b/python/sglang/srt/eplb/expert_location_updater.py
@@ -47,7 +47,7 @@ def update(
):
if self._first_execution:
self._first_execution = False
- torch.cuda.empty_cache()
+ torch.get_device_module().empty_cache()
old_expert_location_metadata = get_global_expert_location_metadata()
assert old_expert_location_metadata is not None
diff --git a/python/sglang/srt/function_call/deepseekv31_detector.py b/python/sglang/srt/function_call/deepseekv31_detector.py
new file mode 100644
index 00000000000..2045d8daae1
--- /dev/null
+++ b/python/sglang/srt/function_call/deepseekv31_detector.py
@@ -0,0 +1,222 @@
+import json
+import logging
+import re
+from typing import List
+
+from sglang.srt.entrypoints.openai.protocol import Tool
+from sglang.srt.function_call.base_format_detector import BaseFormatDetector
+from sglang.srt.function_call.core_types import (
+ StreamingParseResult,
+ StructureInfo,
+ ToolCallItem,
+ _GetInfoFunc,
+)
+from sglang.srt.function_call.ebnf_composer import EBNFComposer
+from sglang.srt.function_call.utils import _is_complete_json
+
+logger = logging.getLogger(__name__)
+
+
+class DeepSeekV31Detector(BaseFormatDetector):
+ """
+ Detector for DeepSeek V3 model function call format.
+
+ The DeepSeek V3 format uses special Unicode tokens to delimit function calls
+ with JSON code blocks for arguments.
+
+ Format Structure:
+ ```
+ <|tool▁calls▁begin|><|tool▁call▁begin|>{function_name}<|tool▁sep|>{json_arguments}<|tool▁calls▁end|><|end▁of▁sentence|>
+ ```
+ Examples:
+ ```
+ <|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Tokyo"}<|tool▁call▁end|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Paris"}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
+ ```
+
+ Key Components:
+ - Tool Calls Section: Wrapped between `<|tool▁calls▁begin|>` and `<|tool▁calls▁end|>`
+ - Individual Tool Call: Wrapped between `<|tool▁call▁begin|>` and `<|tool▁call▁end|>`
+ - Function Declaration: `<|tool▁call▁begin|>{function_name}<|tool▁sep|>`
+ - Arguments: JSON code block between `<|tool▁sep|>` and `<|tool▁call▁end|>`
+ - Supports multiple tool calls
+
+ Reference: https://www.modelscope.cn/models/deepseek-ai/DeepSeek-V3.1
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.bot_token = "<|tool▁calls▁begin|>"
+ self.eot_token = "<|tool▁calls▁end|>"
+ self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
+ self.func_detail_regex = (
+ r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)<|tool▁call▁end|>"
+ )
+ self._last_arguments = ""
+ self.current_tool_id = -1
+
+ def has_tool_call(self, text: str) -> bool:
+ """Check if the text contains a deepseek format tool call."""
+ return self.bot_token in text
+
+ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
+ """
+ One-time parsing: Detects and parses tool calls in the provided text.
+
+ :param text: The complete text to parse.
+ :param tools: List of available tools.
+ :return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
+ """
+ idx = text.find(self.bot_token)
+ normal_text = text[:idx].strip() if idx != -1 else text
+ if self.bot_token not in text:
+ return StreamingParseResult(normal_text=normal_text, calls=[])
+ match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
+ calls = []
+ try:
+ for match_result in match_result_list:
+ # Get function name
+ func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
+ func_name = func_detail.group(1)
+ func_args = func_detail.group(2)
+ func_args = json.loads(func_args)
+ # construct match_result for parse_base_json
+ match_result = {"name": func_name, "parameters": func_args}
+ calls.extend(self.parse_base_json(match_result, tools))
+ return StreamingParseResult(normal_text=normal_text, calls=calls)
+ except Exception as e:
+ logger.error(f"Error in detect_and_parse: {e}")
+ # return the normal text if parsing fails
+ return StreamingParseResult(normal_text=text)
+
+ def parse_streaming_increment(
+ self, new_text: str, tools: List[Tool]
+ ) -> StreamingParseResult:
+ """
+ Streaming incremental parsing tool calls for DeepSeekV3 format.
+ """
+ self._buffer += new_text
+ current_text = self._buffer
+
+ # Check if we have a tool call (either the start token or individual tool call)
+ has_tool_call = (
+ self.bot_token in current_text or "<|tool▁call▁begin|>" in current_text
+ )
+
+ if not has_tool_call:
+ self._buffer = ""
+ for e_token in [self.eot_token, "<|tool▁call▁end|>"]:
+ if e_token in new_text:
+ new_text = new_text.replace(e_token, "")
+ return StreamingParseResult(normal_text=new_text)
+
+ if not hasattr(self, "_tool_indices"):
+ self._tool_indices = self._get_tool_indices(tools)
+
+ calls: list[ToolCallItem] = []
+ try:
+ partial_match = re.search(
+ pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)<|tool▁call▁end|>",
+ string=current_text,
+ flags=re.DOTALL,
+ )
+ if partial_match:
+ func_name = partial_match.group(1).strip()
+ func_args_raw = partial_match.group(2).strip()
+
+ # Initialize state if this is the first tool call
+ if self.current_tool_id == -1:
+ self.current_tool_id = 0
+ self.prev_tool_call_arr = []
+ self.streamed_args_for_tool = [""]
+
+ # Ensure we have enough entries in our tracking arrays
+ while len(self.prev_tool_call_arr) <= self.current_tool_id:
+ self.prev_tool_call_arr.append({})
+ while len(self.streamed_args_for_tool) <= self.current_tool_id:
+ self.streamed_args_for_tool.append("")
+
+ if not self.current_tool_name_sent:
+ calls.append(
+ ToolCallItem(
+ tool_index=self.current_tool_id,
+ name=func_name,
+ parameters="",
+ )
+ )
+ self.current_tool_name_sent = True
+ # Store the tool call info for serving layer completions endpoint
+ self.prev_tool_call_arr[self.current_tool_id] = {
+ "name": func_name,
+ "arguments": {},
+ }
+ else:
+ argument_diff = (
+ func_args_raw[len(self._last_arguments) :]
+ if func_args_raw.startswith(self._last_arguments)
+ else func_args_raw
+ )
+
+ if argument_diff:
+ calls.append(
+ ToolCallItem(
+ tool_index=self.current_tool_id,
+ name=None,
+ parameters=argument_diff,
+ )
+ )
+ self._last_arguments += argument_diff
+ self.streamed_args_for_tool[
+ self.current_tool_id
+ ] += argument_diff
+
+ if _is_complete_json(func_args_raw):
+ # Update the stored arguments
+ try:
+ parsed_args = json.loads(func_args_raw)
+ self.prev_tool_call_arr[self.current_tool_id][
+ "arguments"
+ ] = parsed_args
+ except json.JSONDecodeError:
+ pass
+
+ # Find the end of the current tool call and remove only that part from buffer
+ tool_call_end_pattern = (
+ r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
+ )
+ match = re.search(
+ tool_call_end_pattern, current_text, re.DOTALL
+ )
+ if match:
+ # Remove the completed tool call from buffer, keep any remaining content
+ self._buffer = current_text[match.end() :]
+ else:
+ self._buffer = ""
+
+ result = StreamingParseResult(normal_text="", calls=calls)
+ self.current_tool_id += 1
+ self._last_arguments = ""
+ self.current_tool_name_sent = False
+ return result
+
+ return StreamingParseResult(normal_text="", calls=calls)
+
+ except Exception as e:
+ logger.error(f"Error in parse_streaming_increment: {e}")
+ return StreamingParseResult(normal_text=current_text)
+
+ def structure_info(self) -> _GetInfoFunc:
+ return lambda name: StructureInfo(
+ begin="<|tool▁call▁begin|>" + name + "<|tool▁sep|>",
+ end="<|tool▁call▁end|>",
+ trigger="<|tool▁call▁begin|>" + name + "<|tool▁sep|>",
+ )
+
+ def build_ebnf(self, tools: List[Tool]):
+ return EBNFComposer.build_ebnf(
+ tools,
+ sequence_start_token=self.bot_token,
+ sequence_end_token=self.eot_token,
+ tool_call_separator="",
+ call_rule_fmt='"<|tool▁call▁begin|>{name}<|tool▁sep|>{arguments_rule}<|tool▁call▁end|>"',
+ function_format="json",
+ )
diff --git a/python/sglang/srt/function_call/deepseekv3_detector.py b/python/sglang/srt/function_call/deepseekv3_detector.py
index afd0e301270..33c4dfc44e8 100644
--- a/python/sglang/srt/function_call/deepseekv3_detector.py
+++ b/python/sglang/srt/function_call/deepseekv3_detector.py
@@ -215,6 +215,6 @@ def build_ebnf(self, tools: List[Tool]):
sequence_start_token=self.bot_token,
sequence_end_token=self.eot_token,
tool_call_separator="",
- call_rule_fmt='"<|tool▁call▁begin|>function<|tool▁sep|>{name}\\n```json\\n" {arguments_rule} "\\n```<|tool▁call▁end|>"',
+ call_rule_fmt='"<|tool▁call▁begin|>function<|tool▁sep|>{name}\\n```json\\n"{arguments_rule}"\\n```<|tool▁call▁end|>"',
function_format="json",
)
diff --git a/python/sglang/srt/function_call/ebnf_composer.py b/python/sglang/srt/function_call/ebnf_composer.py
index d41968ea749..21b31398243 100644
--- a/python/sglang/srt/function_call/ebnf_composer.py
+++ b/python/sglang/srt/function_call/ebnf_composer.py
@@ -50,19 +50,19 @@ class EBNFComposer:
CALL_RULE_MAP = {
"pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
- "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
+ "json": 'call_{name} ::= "{{" ws "\\"name\\"" ws ":" ws "\\"{name}\\"" ws "," ws "\\"arguments\\"" ws ":" ws {arguments_rule} ws "}}"',
"xml": 'call_{name} ::= "\\n" {arguments_rule} "\\n"',
}
ARGUMENTS_RULE_MAP = {
"pythonic": "{arg_rules}",
- "json": '"{{" {arg_rules} "}}"',
+ "json": '"{{" ws {arg_rules} ws "}}"',
"xml": "{arg_rules}",
}
KEY_VALUE_RULE_MAP = {
"pythonic": '"{key}" "=" {valrule}',
- "json": '"\\"{key}\\"" ":" {valrule}',
+ "json": '"\\"{key}\\"" ws ":" ws {valrule}',
"xml": '"\\n" {valrule} "\\n"',
}
@@ -165,7 +165,7 @@ def build_ebnf(
tool_call_separator: Optional[str] = None,
call_rule_fmt: Optional[str] = None,
key_value_rule_fmt: Optional[str] = None,
- key_value_separator: str = ",",
+ key_value_separator: str = 'ws "," ws',
):
"""
Generalized EBNF builder for all detectors.
@@ -183,6 +183,10 @@ def build_ebnf(
key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted,
with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format
based on function_format will be used.
+ key_value_separator: Raw EBNF fragment inserted between key-value pairs.
+ This string is used verbatim (not auto-quoted). Pass:
+ - Quoted terminals when you need a literal token (e.g. '","' or '"\\n"').
+ - Raw/non-terminals when you need grammar tokens (e.g. 'ws "," ws').
"""
# =================================================================
# Step 1: Determine the root tool calls rule
@@ -281,9 +285,7 @@ def build_ebnf(
# Add required properties joined by commas
if required:
rule_parts.append(
- f' "{key_value_separator}" '.join(
- prop_kv_pairs[k] for k in required
- )
+ f" {key_value_separator} ".join(prop_kv_pairs[k] for k in required)
)
# Add optional properties with flexible ordering
@@ -298,14 +300,14 @@ def build_ebnf(
opt_parts.append(prop_kv_pairs[optional[j]])
else:
opt_parts.append(
- f' ( "{key_value_separator}" {prop_kv_pairs[optional[j]]} )?'
+ f" ( {key_value_separator} {prop_kv_pairs[optional[j]]} )?"
)
opt_alternatives.append("".join(opt_parts))
# Wrap with appropriate comma handling based on whether we have required properties
if required:
# Required properties exist, so optional group needs outer comma
- rule_parts.append(f' ( "{key_value_separator}" ( ')
+ rule_parts.append(f" ( {key_value_separator} ( ")
rule_parts.append(" | ".join(opt_alternatives))
rule_parts.append(" ) )?")
else:
diff --git a/python/sglang/srt/function_call/function_call_parser.py b/python/sglang/srt/function_call/function_call_parser.py
index 97e9814bfba..18fe488e4ef 100644
--- a/python/sglang/srt/function_call/function_call_parser.py
+++ b/python/sglang/srt/function_call/function_call_parser.py
@@ -10,6 +10,7 @@
from sglang.srt.function_call.base_format_detector import BaseFormatDetector
from sglang.srt.function_call.core_types import ToolCallItem
from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
+from sglang.srt.function_call.deepseekv31_detector import DeepSeekV31Detector
from sglang.srt.function_call.glm4_moe_detector import Glm4MoeDetector
from sglang.srt.function_call.gpt_oss_detector import GptOssDetector
from sglang.srt.function_call.kimik2_detector import KimiK2Detector
@@ -37,6 +38,7 @@ class FunctionCallParser:
"qwen25": Qwen25Detector,
"mistral": MistralDetector,
"deepseekv3": DeepSeekV3Detector,
+ "deepseekv31": DeepSeekV31Detector,
"pythonic": PythonicDetector,
"kimi_k2": KimiK2Detector,
"qwen3_coder": Qwen3CoderDetector,
diff --git a/python/sglang/srt/function_call/glm4_moe_detector.py b/python/sglang/srt/function_call/glm4_moe_detector.py
index 39822fb19a5..6e89fe0a167 100644
--- a/python/sglang/srt/function_call/glm4_moe_detector.py
+++ b/python/sglang/srt/function_call/glm4_moe_detector.py
@@ -160,5 +160,5 @@ def build_ebnf(self, tools: List[Tool]):
function_format="xml",
call_rule_fmt='"{name}" "\\n" ( {arguments_rule} "\\n" )?',
key_value_rule_fmt='"{key}" "\\n" "" {valrule} ""',
- key_value_separator="\\n",
+ key_value_separator='"\\n"',
)
diff --git a/python/sglang/srt/function_call/gpt_oss_detector.py b/python/sglang/srt/function_call/gpt_oss_detector.py
index 5cde6478006..7fe0a7dc8c4 100644
--- a/python/sglang/srt/function_call/gpt_oss_detector.py
+++ b/python/sglang/srt/function_call/gpt_oss_detector.py
@@ -1,7 +1,7 @@
import json
import logging
import re
-from typing import List
+from typing import List, Optional
from sglang.srt.entrypoints.openai.protocol import Tool
from sglang.srt.function_call.base_format_detector import BaseFormatDetector
@@ -10,60 +10,31 @@
ToolCallItem,
_GetInfoFunc,
)
+from sglang.srt.parser.harmony_parser import HarmonyParser
logger = logging.getLogger(__name__)
class GptOssDetector(BaseFormatDetector):
"""
- Detector for T4-style function calls with channel format.
+ Detector for T4-style function calls using HarmonyParser.
- Supports two formats:
- 1. Direct function call: <|channel|>commentary to={namespace.function}<|constrain|>json<|message|>{args}<|call|>
- 2. Commentary with action plan: <|channel|>commentary<|message|>{content}<|end|>
-
- For parallel function calls, each call is self-contained and starts with its own channel:
- <|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location":"SF"}<|call|>
- <|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query":"SF attractions"}<|call|>
-
- Examples:
- Single: <|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location":"San Francisco"}<|call|>commentary
- Multiple: <|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location":"Paris"}<|call|>commentary<|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query":"Paris tourism"}<|call|>
- With Action Plan: <|channel|>commentary<|message|>**Action plan**: 1. Do X 2. Do Y<|end|><|start|>assistant<|channel|>commentary to=functions.x<|constrain|>json<|message|>{"template": "basic_html", "path": "index.html"}<|call|>
+ Handles tool calls in the format:
+ <|channel|>commentary to={namespace.function}<|constrain|>json<|message|>{args}<|call|>
"""
def __init__(self):
super().__init__()
+ self.harmony_parser = HarmonyParser()
self.bot_token = "<|start|>assistant<|channel|>commentary"
self.eot_token = "<|call|>"
- # TODO: no clear indication how parallel tool call response format is
- self.tool_call_separator = ""
-
- # Pattern for complete function calls with to= parameter
- # Handles both <|call|> and <|call|>commentary endings
- # Also handles optional <|start|>assistant prefix and whitespace after function name
- self.function_call_pattern = re.compile(
- r"(?:<\|start\|>assistant)?<\|channel\|>commentary to=([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s*"
- r"<\|constrain\|>json<\|message\|>(.*?)<\|call\|>(?:commentary)?",
- re.DOTALL,
- )
-
- # Pattern for streaming function calls (incomplete)
- # Also handles optional whitespace after function name
- self.streaming_pattern = re.compile(
- r"(?:<\|start\|>assistant)?<\|channel\|>commentary to=([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s*"
- r"<\|constrain\|>json<\|message\|>(.*)",
- re.DOTALL,
- )
- # Pattern for commentary with action plan (no to= parameter)
- self.commentary_pattern = re.compile(
- r"<\|channel\|>commentary<\|message\|>(.*?)<\|end\|>",
+ # Pattern to extract function name and JSON from tool_call event content
+ self.tool_extract_pattern = re.compile(
+ r"to=([a-zA-Z_][a-zA-Z0-9_.]*)\s*<\|constrain\|>json<\|message\|>(.*?)(?:<\|call\|>|$)",
re.DOTALL,
)
- self._last_arguments = ""
-
def has_tool_call(self, text: str) -> bool:
"""Check if text contains TypeScript-style function call markers."""
return self.bot_token in text
@@ -73,259 +44,176 @@ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult
if not self.has_tool_call(text):
return StreamingParseResult(normal_text=text, calls=[])
- tool_indices = self._get_tool_indices(tools)
+ # Parse with HarmonyParser
+ events = self.harmony_parser.parse(text)
+ # Flush buffer for complete parsing
+ events += self.harmony_parser.parse("")
+ tool_indices = self._get_tool_indices(tools)
calls = []
+ normal_parts = []
tool_index = 0
- # Process the entire text to handle mixed commentary and tool calls
- normal_text_parts = []
-
- # Find all commentary sections (both with and without to=)
- all_commentary_pattern = re.compile(
- r"<\|channel\|>commentary(?:\s+to=[^<]*)?<\|message\|>(.*?)(?:<\|end\|>|<\|call\|>)",
- re.DOTALL,
- )
-
- # Track processed positions to avoid double-processing
- processed_ranges = []
-
- # First, extract all tool calls
- for match in self.function_call_pattern.finditer(text):
- full_function_name = match.group(1)
- args_content = match.group(2)
- processed_ranges.append((match.start(), match.end()))
-
- function_name = (
- full_function_name.split(".")[-1]
- if "." in full_function_name
- else full_function_name
- )
-
- try:
- arguments = json.loads(args_content) if args_content.strip() else {}
- except json.JSONDecodeError:
- continue
-
- if function_name in tool_indices:
- calls.append(
- ToolCallItem(
- tool_index=tool_index,
- name=function_name,
- parameters=json.dumps(arguments, ensure_ascii=False),
- )
+ for event in events:
+ if event.event_type == "tool_call":
+ # Extract tool call from event content
+ tool_call = self._extract_tool_call_from_event(
+ event.raw_text if event.raw_text else event.content,
+ tool_indices,
+ tool_index,
)
- tool_index += 1
-
- # Then, find non-tool-call commentary sections for normal text
- for match in all_commentary_pattern.finditer(text):
- # Check if this match overlaps with any processed tool call
- match_start, match_end = match.start(), match.end()
- is_tool_call = any(
- start <= match_start < end or start < match_end <= end
- for start, end in processed_ranges
- )
-
- # If this commentary is not part of a tool call, include it in normal text
- if not is_tool_call:
- content = match.group(1).strip()
- if content:
- normal_text_parts.append(content)
-
- # Handle remaining text after all matches
- if processed_ranges:
- last_match_end = max(end for _, end in processed_ranges)
- if last_match_end < len(text):
- remaining_text = text[last_match_end:]
-
- # Clean up <|start|>assistant prefixes and extract final content
- # Remove standalone <|start|>assistant prefixes
- remaining_text = re.sub(r"<\|start\|>assistant(?!\w)", "", remaining_text)
-
- # Extract content from final channel if present
- final_pattern = re.compile(
- r"<\|channel\|>final<\|message\|>(.*?)(?:<\|return\|>|$)", re.DOTALL
- )
- final_match = final_pattern.search(remaining_text)
-
- if final_match:
- # Get everything before final channel + final channel content
- before_final = remaining_text[: final_match.start()].strip()
- final_content = final_match.group(1).strip()
+ if tool_call:
+ calls.append(tool_call)
+ tool_index += 1
+ elif event.event_type == "normal":
+ normal_parts.append(event.content)
+ # Ignore reasoning events in function call context
- parts = []
- if before_final:
- parts.append(before_final)
- if final_content:
- parts.append(final_content)
- remaining_text = " ".join(parts) if parts else ""
-
- remaining_text = remaining_text.strip()
-
- if remaining_text:
- normal_text_parts.append(remaining_text)
-
- # Combine all normal text parts
- final_normal_text = " ".join(part for part in normal_text_parts if part).strip()
- return StreamingParseResult(normal_text=final_normal_text, calls=calls)
+ normal_text = " ".join(normal_parts).strip()
+ return StreamingParseResult(normal_text=normal_text, calls=calls)
def parse_streaming_increment(
self, new_text: str, tools: List[Tool]
) -> StreamingParseResult:
"""Parse incremental streaming text for TypeScript-style function calls."""
self._buffer += new_text
- current_text = self._buffer
-
- # Check if we have a tool call
- has_tool_call = "<|channel|>commentary to=" in current_text
-
- if not has_tool_call and current_text:
- # Check for commentary without function calls
- commentary_match = self.commentary_pattern.search(current_text)
- if commentary_match:
- commentary_content = commentary_match.group(1)
- self._buffer = current_text[commentary_match.end() :]
- return StreamingParseResult(normal_text=commentary_content, calls=[])
-
- # Check for final channel content
- final_pattern = re.compile(
- r"<\|channel\|>final<\|message\|>(.*?)(?:<\|return\|>|$)",
- re.DOTALL,
+
+ # Always use HarmonyParser for parsing to ensure proper filtering
+ events = self.harmony_parser.parse(new_text)
+
+ # Quick check if we might have tool calls
+ if (
+ "<|channel|>commentary to=" not in self._buffer
+ and not self.current_tool_name_sent
+ ):
+ # No tool calls detected, check for final content
+ if (
+ "<|channel|>final" in self._buffer
+ or "assistantfinal" in self._buffer.lower()
+ ):
+ # Extract normal text from events
+ normal_text = "".join(
+ [e.content for e in events if e.event_type == "normal"]
+ )
+ if normal_text:
+ self._buffer = ""
+ return StreamingParseResult(normal_text=normal_text, calls=[])
+
+ # For other content, extract normal text from events (with filtering applied)
+ normal_text = "".join(
+ [e.content for e in events if e.event_type == "normal"]
)
- final_match = final_pattern.search(current_text)
- if final_match:
- final_content = final_match.group(1).strip()
+ if normal_text or events:
self._buffer = ""
- return StreamingParseResult(normal_text=final_content, calls=[])
+ return StreamingParseResult(normal_text=normal_text, calls=[])
+ else:
+ # No events processed, continue buffering
+ return StreamingParseResult(normal_text="", calls=[])
- self._buffer = ""
- return StreamingParseResult(normal_text=new_text, calls=[])
+ if not events:
+ # No complete events yet
+ return StreamingParseResult(normal_text="", calls=[])
+ # Initialize state if needed
if not hasattr(self, "_tool_indices"):
self._tool_indices = self._get_tool_indices(tools)
calls = []
- try:
- # Check for streaming function call
- match = self.streaming_pattern.search(current_text)
- if match:
- full_function_name = match.group(1)
- args_content = match.group(2)
-
- function_name = (
- full_function_name.split(".")[-1]
- if "." in full_function_name
- else full_function_name
+ normal_text = ""
+
+ for event in events:
+ if event.event_type == "tool_call":
+ # We got a complete tool call from HarmonyParser
+ tool_call_info = self._extract_tool_call_from_event(
+ event.raw_text if event.raw_text else event.content,
+ self._tool_indices,
+ self.current_tool_id if self.current_tool_id >= 0 else 0,
)
- # Initialize state if this is the first tool call
- if self.current_tool_id == -1:
- self.current_tool_id = 0
- self.prev_tool_call_arr = []
- self.streamed_args_for_tool = [""]
-
- # Ensure we have enough entries in tracking arrays
- while len(self.prev_tool_call_arr) <= self.current_tool_id:
- self.prev_tool_call_arr.append({})
- while len(self.streamed_args_for_tool) <= self.current_tool_id:
- self.streamed_args_for_tool.append("")
-
- if not self.current_tool_name_sent:
- calls.append(
- ToolCallItem(
- tool_index=self.current_tool_id,
- name=function_name,
- parameters="",
- )
- )
- self.current_tool_name_sent = True
- # Store the tool call info
+ if tool_call_info:
+ # Initialize state if first tool
+ if self.current_tool_id == -1:
+ self.current_tool_id = 0
+ self.prev_tool_call_arr = []
+ self.streamed_args_for_tool = [""]
+
+ # Ensure arrays are large enough
+ while len(self.prev_tool_call_arr) <= self.current_tool_id:
+ self.prev_tool_call_arr.append({})
+ while len(self.streamed_args_for_tool) <= self.current_tool_id:
+ self.streamed_args_for_tool.append("")
+
+ # Store tool call info
self.prev_tool_call_arr[self.current_tool_id] = {
- "name": function_name,
- "arguments": {},
+ "name": tool_call_info.name,
+ "arguments": json.loads(tool_call_info.parameters),
}
- self.streamed_args_for_tool[self.current_tool_id] = ""
-
- # Check if we have a complete function call
- complete_match = self.function_call_pattern.search(current_text)
- if complete_match:
- args_content = complete_match.group(2)
-
- try:
- parsed_args = json.loads(args_content)
- self.prev_tool_call_arr[self.current_tool_id][
- "arguments"
- ] = parsed_args
-
- # Send complete arguments if we haven't sent them yet
- if not self.streamed_args_for_tool[self.current_tool_id]:
- # Send the complete arguments as JSON string
- calls.append(
- ToolCallItem(
- tool_index=self.current_tool_id,
- name=None,
- parameters=json.dumps(
- parsed_args, ensure_ascii=False
- ),
- )
- )
- self.streamed_args_for_tool[self.current_tool_id] = (
- json.dumps(parsed_args, ensure_ascii=False)
- )
- except json.JSONDecodeError:
- pass
-
- # Remove the completed function call from buffer
- remaining_after_call = current_text[complete_match.end() :]
-
- # Clean up <|start|>assistant prefixes and extract final content
- remaining_after_call = re.sub(
- r"<\|start\|>assistant(?!\w)", "", remaining_after_call
- )
- # Extract content from final channel if present
- final_pattern = re.compile(
- r"<\|channel\|>final<\|message\|>(.*?)(?:<\|return\|>|$)",
- re.DOTALL,
+ # Emit the complete tool call at once
+ # (Could be modified to emit name first, then args, if needed)
+ calls.append(tool_call_info)
+
+ # Mark as streamed
+ self.streamed_args_for_tool[self.current_tool_id] = (
+ tool_call_info.parameters
)
- final_match = final_pattern.search(remaining_after_call)
- if final_match:
- before_final = remaining_after_call[
- : final_match.start()
- ].strip()
- final_content = final_match.group(1).strip()
+ # Move to next tool
+ self.current_tool_id += 1
+ self.current_tool_name_sent = False
+
+ elif event.event_type == "normal":
+ normal_text += event.content
- parts = []
- if before_final:
- parts.append(before_final)
- if final_content:
- parts.append(final_content)
- remaining_after_call = " ".join(parts) if parts else ""
+ # Clear buffer since HarmonyParser handles buffering
+ self._buffer = ""
- self._buffer = remaining_after_call.strip()
+ return StreamingParseResult(normal_text=normal_text, calls=calls)
- # Reset state for next tool call
- self.current_tool_name_sent = False
- self.current_tool_id += 1
+ def _extract_tool_call_from_event(
+ self, content: str, tool_indices: dict, tool_index: int
+ ) -> Optional[ToolCallItem]:
+ """
+ Extract tool call information from HarmonyParser event content.
- # Return final content if available
- final_text = ""
- if final_match and final_content:
- final_text = final_content
- elif remaining_after_call:
- final_text = remaining_after_call
+ Content format: "commentary to=functions.get_weather<|constrain|>json<|message|>{...}"
+ """
+ match = self.tool_extract_pattern.search(content)
- return StreamingParseResult(normal_text=final_text, calls=calls)
+ if not match:
+ logger.debug(f"Could not extract tool call from: {content[:100]}")
+ return None
- return StreamingParseResult(normal_text="", calls=calls)
+ full_function_name = match.group(1)
+ json_content = match.group(2)
- except Exception as e:
- logger.error(f"Error in parse_streaming_increment: {e}")
- return StreamingParseResult(normal_text=current_text, calls=[])
+ # Extract function name (last part after .)
+ function_name = (
+ full_function_name.split(".")[-1]
+ if "." in full_function_name
+ else full_function_name
+ )
+
+ # Check if tool exists
+ if function_name not in tool_indices:
+ logger.debug(f"Function {function_name} not in available tools")
+ return None
+
+ # Parse JSON arguments
+ try:
+ arguments = json.loads(json_content) if json_content.strip() else {}
+ except json.JSONDecodeError as e:
+ logger.debug(f"Failed to parse JSON arguments: {e}")
+ return None
+
+ return ToolCallItem(
+ tool_index=tool_index,
+ name=function_name,
+ parameters=json.dumps(arguments, ensure_ascii=False),
+ )
def structure_info(self) -> _GetInfoFunc:
- raise NotImplementedError()
+ raise NotImplementedError("structure_info not used with HarmonyParser")
def build_ebnf(self, tools: List[Tool]) -> str:
- raise NotImplementedError()
+ raise NotImplementedError("build_ebnf not used with HarmonyParser")
diff --git a/python/sglang/srt/function_call/qwen3_coder_detector.py b/python/sglang/srt/function_call/qwen3_coder_detector.py
index 454f5048ed3..9bd3c7c24d7 100644
--- a/python/sglang/srt/function_call/qwen3_coder_detector.py
+++ b/python/sglang/srt/function_call/qwen3_coder_detector.py
@@ -358,5 +358,5 @@ def build_ebnf(self, tools: List[Tool]):
function_format="xml",
call_rule_fmt='"\\n" {arguments_rule} "\\n"',
key_value_rule_fmt='"\\n" {valrule} "\\n"',
- key_value_separator="\\n",
+ key_value_separator='"\\n"',
)
diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py
index 1e9b32f014a..202bb887430 100644
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -40,7 +40,9 @@
DeepseekVL2Config,
ExaoneConfig,
KimiVLConfig,
+ LongcatFlashConfig,
MultiModalityConfig,
+ Qwen3NextConfig,
Step3VLConfig,
)
from sglang.srt.configs.internvl import InternVLChatConfig
@@ -56,6 +58,8 @@
KimiVLConfig.model_type: KimiVLConfig,
InternVLChatConfig.model_type: InternVLChatConfig,
Step3VLConfig.model_type: Step3VLConfig,
+ LongcatFlashConfig.model_type: LongcatFlashConfig,
+ Qwen3NextConfig.model_type: Qwen3NextConfig,
}
for name, cls in _CONFIG_REGISTRY.items():
@@ -126,9 +130,36 @@ def get_config(
kwargs["gguf_file"] = model
model = Path(model).parent
+ if is_remote_url(model):
+ # BaseConnector implements __del__() to clean up the local dir.
+ # Since config files need to exist all the time, so we DO NOT use
+ # with statement to avoid closing the client.
+ client = create_remote_connector(model)
+ client.pull_files(ignore_pattern=["*.pt", "*.safetensors", "*.bin"])
+ model = client.get_local_dir()
+
config = AutoConfig.from_pretrained(
model, trust_remote_code=trust_remote_code, revision=revision, **kwargs
)
+ if (
+ config.architectures is not None
+ and config.architectures[0] == "Phi4MMForCausalLM"
+ ):
+ # Phi4MMForCausalLM uses a hard-coded vision_config. See:
+ # https://github.com/vllm-project/vllm/blob/6071e989df1531b59ef35568f83f7351afb0b51e/vllm/model_executor/models/phi4mm.py#L71
+ # We set it here to support cases where num_attention_heads is not divisible by the TP size.
+ from transformers import SiglipVisionConfig
+
+ vision_config = {
+ "hidden_size": 1152,
+ "image_size": 448,
+ "intermediate_size": 4304,
+ "model_type": "siglip_vision_model",
+ "num_attention_heads": 16,
+ "num_hidden_layers": 26, # Model is originally 27-layer, we only need the first 26 layers for feature extraction.
+ "patch_size": 14,
+ }
+ config.vision_config = SiglipVisionConfig(**vision_config)
text_config = get_hf_text_config(config=config)
if isinstance(model, str) and text_config is not None:
@@ -231,6 +262,14 @@ def get_context_length(config):
return 2048
+def update_context_length(config, new_context_length: int):
+ """Update the context length of a model from a huggingface model configs."""
+ text_config = config
+ for key in CONTEXT_LENGTH_KEYS:
+ if hasattr(text_config, key):
+ setattr(text_config, key, new_context_length)
+
+
# A fast LLaMA tokenizer with the pre-processed `tokenizer.json` file.
_FAST_LLAMA_TOKENIZER = "hf-internal-testing/llama-tokenizer"
@@ -244,6 +283,11 @@ def get_tokenizer(
**kwargs,
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
"""Gets a tokenizer for the given model name via Huggingface."""
+ if tokenizer_name.endswith(".json"):
+ from sglang.srt.tokenizer.tiktoken_tokenizer import TiktokenTokenizer
+
+ return TiktokenTokenizer(tokenizer_name)
+
if tokenizer_mode == "slow":
if kwargs.get("use_fast", False):
raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.")
@@ -344,13 +388,22 @@ def get_processor(
if config.model_type not in {"llava", "clip"}:
kwargs["use_fast"] = use_fast
try:
- processor = AutoProcessor.from_pretrained(
- tokenizer_name,
- *args,
- trust_remote_code=trust_remote_code,
- revision=revision,
- **kwargs,
- )
+ if "InternVL3_5" in tokenizer_name:
+ processor = AutoTokenizer.from_pretrained(
+ tokenizer_name,
+ *args,
+ trust_remote_code=trust_remote_code,
+ revision=revision,
+ **kwargs,
+ )
+ else:
+ processor = AutoProcessor.from_pretrained(
+ tokenizer_name,
+ *args,
+ trust_remote_code=trust_remote_code,
+ revision=revision,
+ **kwargs,
+ )
except ValueError as e:
error_message = str(e)
diff --git a/python/sglang/srt/host_shared_memory.py b/python/sglang/srt/host_shared_memory.py
new file mode 100644
index 00000000000..c599527f9b8
--- /dev/null
+++ b/python/sglang/srt/host_shared_memory.py
@@ -0,0 +1,83 @@
+import logging
+import os
+from dataclasses import dataclass
+from multiprocessing import shared_memory
+from pathlib import Path
+from typing import List, Optional
+
+import numpy as np
+import torch
+
+from sglang.srt.distributed.naive_distributed import get_naive_distributed
+from sglang.srt.utils import check_cuda_result
+
+logger = logging.getLogger(__name__)
+
+
+class HostSharedMemoryManager:
+ def __init__(self, base_name: str):
+ self._base_name = Path(base_name)
+ self._operation_index = 0
+ self._records: List[_Record] = []
+
+ def malloc(self, *, shape, dtype):
+ meta_tensor = torch.empty(size=shape, dtype=dtype, device="meta")
+ raw = self._malloc_raw(num_bytes=meta_tensor.nbytes)
+ return raw.view(dtype).view(*shape)
+
+ def _malloc_raw(self, *, num_bytes: int) -> torch.Tensor:
+ import cuda.bindings.runtime as cuda_rt
+
+ self._operation_index += 1
+ shm_name = f"{self._base_name}_op{self._operation_index}"
+
+ # TODO handle dispose
+ if get_naive_distributed().get_rank() == 0:
+ shm = shared_memory.SharedMemory(name=shm_name, create=True, size=num_bytes)
+
+ get_naive_distributed().barrier()
+
+ if get_naive_distributed().get_rank() != 0:
+ shm = shared_memory.SharedMemory(name=shm_name)
+
+ np_array = np.ndarray((num_bytes,), dtype=np.uint8, buffer=shm.buf)
+ tensor = torch.from_numpy(np_array)
+
+ check_cuda_result(
+ cuda_rt.cudaHostRegister(
+ tensor.data_ptr(), num_bytes, cuda_rt.cudaHostRegisterPortable
+ )
+ )
+
+ get_naive_distributed().barrier()
+
+ self._records.append(
+ _Record(
+ shm=shm,
+ np_array=np_array,
+ tensor=tensor,
+ )
+ )
+ return tensor
+
+
+@dataclass
+class _Record:
+ shm: shared_memory.SharedMemory
+ np_array: np.ndarray
+ tensor: torch.Tensor
+
+
+# Can have multi instances if needed
+_instance: Optional[HostSharedMemoryManager] = None
+
+
+def get_host_shared_memory_manager():
+ assert _instance is not None
+ return _instance
+
+
+def set_host_shared_memory_manager(instance: HostSharedMemoryManager):
+ global _instance
+ assert _instance is None
+ _instance = instance
diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py
index 15c2ba07727..3d973393ee4 100644
--- a/python/sglang/srt/layers/activation.py
+++ b/python/sglang/srt/layers/activation.py
@@ -35,6 +35,7 @@
is_cuda,
is_hip,
is_npu,
+ is_xpu,
set_weight_attrs,
)
from sglang.utils import resolve_obj_by_qualname
@@ -44,8 +45,9 @@
_is_cpu_amx_available = cpu_has_amx_support()
_is_cpu = is_cpu()
_is_hip = is_hip()
+_is_xpu = is_xpu()
-if _is_cuda:
+if _is_cuda or _is_xpu:
from sgl_kernel import gelu_and_mul, gelu_tanh_and_mul, silu_and_mul
elif _is_hip:
from sgl_kernel import gelu_and_mul, gelu_quick, gelu_tanh_and_mul, silu_and_mul
@@ -70,8 +72,6 @@ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
def forward_cpu(self, x: torch.Tensor) -> torch.Tensor:
if _is_cpu_amx_available:
- d = x.shape[-1] // 2
- output_shape = x.shape[:-1] + (d,)
out = torch.ops.sgl_kernel.silu_and_mul_cpu(x)
return out
else:
@@ -81,17 +81,20 @@ def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
out = torch_npu.npu_swiglu(x)
return out
+ def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
+ d = x.shape[-1] // 2
+ output_shape = x.shape[:-1] + (d,)
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+ silu_and_mul(x, out)
+ return out
+
class GeluAndMul(CustomOp):
def __init__(self, approximate="tanh"):
super().__init__()
self.approximate = approximate
- def forward_native(self, x: torch.Tensor) -> torch.Tensor:
- d = x.shape[-1] // 2
- return F.gelu(x[..., :d], approximate=self.approximate) * x[..., d:]
-
- def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
+ def _forward_impl(self, x: torch.Tensor) -> torch.Tensor:
d = x.shape[-1] // 2
output_shape = x.shape[:-1] + (d,)
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
@@ -103,6 +106,33 @@ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
raise RuntimeError("GeluAndMul only support tanh or none")
return out
+ def forward_native(self, x: torch.Tensor) -> torch.Tensor:
+ d = x.shape[-1] // 2
+ return F.gelu(x[..., :d], approximate=self.approximate) * x[..., d:]
+
+ def forward_cpu(self, x: torch.Tensor) -> torch.Tensor:
+ if _is_cpu_amx_available and self.approximate == "tanh":
+ return torch.ops.sgl_kernel.gelu_tanh_and_mul_cpu(x)
+ elif _is_cpu_amx_available and self.approximate == "none":
+ return torch.ops.sgl_kernel.gelu_and_mul_cpu(x)
+ else:
+ return self.forward_native(x)
+
+ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
+ return self._forward_impl(x)
+
+ def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
+ return self._forward_impl(x)
+
+ def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
+ y_npu, gelu_npu = torch_npu.npu_geglu(
+ x,
+ dim=-1,
+ approximate=1 if self.approximate == "tanh" else 0,
+ activate_left=True,
+ )
+ return y_npu
+
class NewGELU(CustomOp):
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
@@ -137,6 +167,9 @@ def forward_hip(self, x: torch.Tensor) -> torch.Tensor:
gelu_quick(x, out)
return out
+ def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
+ return torch_npu.npu_fast_gelu(x)
+
class ScaledActivation(nn.Module):
"""An activation function with post-scale parameters.
@@ -230,7 +263,9 @@ def get_cross_encoder_activation_function(config: PretrainedConfig):
return nn.Identity()
-if not (_is_cuda or _is_npu or (_is_cpu and _is_cpu_amx_available) or _is_hip):
+if not (
+ _is_cuda or _is_npu or (_is_cpu and _is_cpu_amx_available) or _is_hip or _is_xpu
+):
logger.info(
"sgl-kernel is not available on Non-NV, Non-AMD platforms or Non-AMX CPUs. Fallback to other kernel libraries."
)
diff --git a/python/sglang/srt/layers/attention/aiter_backend.py b/python/sglang/srt/layers/attention/aiter_backend.py
index 8d07d993308..188d772c778 100644
--- a/python/sglang/srt/layers/attention/aiter_backend.py
+++ b/python/sglang/srt/layers/attention/aiter_backend.py
@@ -18,7 +18,10 @@
from sglang.global_config import global_config
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
-from sglang.srt.layers.dp_attention import get_attention_tp_size
+from sglang.srt.layers.dp_attention import (
+ get_attention_tp_size,
+ is_dp_attention_enabled,
+)
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
if TYPE_CHECKING:
@@ -154,6 +157,8 @@ def __init__(
(max_bs + 1,), dtype=torch.int32, device=model_runner.device
)
+ self.enable_dp_attention = is_dp_attention_enabled()
+
def init_forward_metadata(self, forward_batch: ForwardBatch):
"""Init auxiliary variables for triton attention backend."""
@@ -302,19 +307,19 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
if self.use_mla:
self.mla_indices_updater_prefill.update(
forward_batch.req_pool_indices,
- forward_batch.extend_prefix_lens,
- sum(forward_batch.extend_prefix_lens_cpu),
+ forward_batch.seq_lens,
+ forward_batch.seq_lens_sum,
forward_batch.extend_seq_lens,
- max(forward_batch.extend_seq_lens_cpu),
- forward_batch.seq_lens_cpu.max().item(),
+ forward_batch.extend_seq_lens.max().item(),
+ forward_batch.seq_lens.max().item(),
spec_info=None,
)
- self.mla_indices_updater_prefill.kv_indptr += (
- self.mla_indices_updater_prefill.qo_indptr
- )
+
+ kv_indices = self.mla_indices_updater_prefill.kv_indices
+
self.forward_metadata = ForwardMetadata(
self.mla_indices_updater_prefill.kv_indptr,
- self.mla_indices_updater_prefill.kv_indices,
+ kv_indices,
self.mla_indices_updater_prefill.qo_indptr,
self.kv_last_page_len[:bs],
self.mla_indices_updater_prefill.max_q_len,
@@ -614,66 +619,86 @@ def forward_extend(
assert len(k.shape) == 3
assert len(v.shape) == 3
- if kv_indices.shape[0] == 0:
- o = flash_attn_varlen_func(
- q,
- k,
- v,
- qo_indptr,
- qo_indptr,
- max_q_len,
- max_q_len,
- softmax_scale=layer.scaling,
- causal=True,
- )
- return o
- elif layer.qk_head_dim != (kv_lora_rank + qk_rope_head_dim):
- K_Buffer = torch.index_select(K_Buffer, 0, kv_indices)
- kvc, k_pe = torch.split(
- K_Buffer, [kv_lora_rank, qk_rope_head_dim], dim=-1
- )
- kvprefix = layer.kv_b_proj(kvc.contiguous())[0]
+ if forward_batch.forward_mode.is_extend():
+ if kv_indices.shape[0] == 0:
+ o = flash_attn_varlen_func(
+ q,
+ k,
+ v,
+ qo_indptr,
+ qo_indptr,
+ max_q_len,
+ max_q_len,
+ softmax_scale=layer.scaling,
+ causal=True,
+ )
+ return o
+ elif layer.qk_head_dim != (kv_lora_rank + qk_rope_head_dim):
+ K_Buffer = torch.index_select(K_Buffer, 0, kv_indices)
+ kvc, k_pe = torch.split(
+ K_Buffer, [kv_lora_rank, qk_rope_head_dim], dim=-1
+ )
+ kvprefix = layer.kv_b_proj(kvc.contiguous())[0]
- kvprefix = kvprefix.view(
- -1, layer.tp_k_head_num, qk_nope_head_dim + layer.v_head_dim
- )
- k_prefix, v_prefix = torch.split(
- kvprefix, [qk_nope_head_dim, layer.v_head_dim], dim=-1
- )
- k_prefix = torch.cat(
- [
- k_prefix,
- torch.broadcast_to(
- k_pe,
- (k_pe.shape[0], layer.tp_k_head_num, k_pe.shape[2]),
- ),
- ],
- dim=-1,
- )
- assert (
- forward_batch.extend_prefix_lens.shape
- == forward_batch.extend_seq_lens.shape
- )
- k_prefix = torch.split(k_prefix, forward_batch.extend_prefix_lens_cpu)
- k_extend = torch.split(k, forward_batch.extend_seq_lens_cpu)
- assert len(k_prefix) == len(forward_batch.extend_prefix_lens_cpu)
- k = torch.cat([x for el in zip(k_prefix, k_extend) for x in el])
- v_prefix = torch.split(v_prefix, forward_batch.extend_prefix_lens_cpu)
- v_extend = torch.split(v, forward_batch.extend_seq_lens_cpu)
- v = torch.cat([x for el in zip(v_prefix, v_extend) for x in el])
-
- o = flash_attn_varlen_func(
- q,
- k,
- v,
- qo_indptr,
- kv_indptr,
- max_q_len,
- max_kv_len,
- softmax_scale=layer.scaling,
- causal=True,
- )
- return o
+ kvprefix = kvprefix.view(
+ -1, layer.tp_k_head_num, qk_nope_head_dim + layer.v_head_dim
+ )
+ k_prefix, v_prefix = torch.split(
+ kvprefix, [qk_nope_head_dim, layer.v_head_dim], dim=-1
+ )
+ k_prefix = torch.cat(
+ [
+ k_prefix,
+ torch.broadcast_to(
+ k_pe,
+ (k_pe.shape[0], layer.tp_k_head_num, k_pe.shape[2]),
+ ),
+ ],
+ dim=-1,
+ )
+ assert (
+ forward_batch.extend_prefix_lens.shape
+ == forward_batch.extend_seq_lens.shape
+ )
+
+ k = k_prefix
+ v = v_prefix
+
+ o = flash_attn_varlen_func(
+ q,
+ k,
+ v,
+ qo_indptr,
+ kv_indptr,
+ max_q_len,
+ max_kv_len,
+ softmax_scale=layer.scaling,
+ causal=True,
+ )
+ return o
+
+ else:
+ if layer.qk_head_dim != layer.v_head_dim:
+ o = q.new_empty(
+ (q.shape[0], layer.tp_q_head_num * layer.v_head_dim)
+ )
+ else:
+ o = torch.empty_like(q)
+
+ mla_prefill_fwd(
+ q.view(-1, layer.tp_q_head_num, layer.qk_head_dim),
+ K_Buffer.view(-1, 1, 1, layer.qk_head_dim),
+ o.view(-1, layer.tp_q_head_num, layer.v_head_dim),
+ qo_indptr,
+ kv_indptr,
+ kv_indices,
+ self.forward_metadata.kv_last_page_len,
+ self.forward_metadata.max_q_len,
+ layer.scaling,
+ layer.logit_cap,
+ )
+ K_Buffer = K_Buffer.view(-1, layer.tp_k_head_num, layer.qk_head_dim)
+ return o
elif forward_batch.forward_mode.is_target_verify():
o = q.new_empty((q.shape[0], layer.tp_q_head_num, layer.v_head_dim))
mla_decode_fwd(
diff --git a/python/sglang/srt/layers/attention/ascend_backend.py b/python/sglang/srt/layers/attention/ascend_backend.py
index 70ee79b25ae..6d5ed0a5c82 100644
--- a/python/sglang/srt/layers/attention/ascend_backend.py
+++ b/python/sglang/srt/layers/attention/ascend_backend.py
@@ -10,13 +10,19 @@
from sglang.srt.configs.model_config import AttentionArch
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.attention.torch_native_backend import TorchNativeAttnBackend
+from sglang.srt.layers.dp_attention import get_attention_tp_size
from sglang.srt.layers.radix_attention import AttentionType
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+from sglang.srt.utils import get_bool_env_var
if TYPE_CHECKING:
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.model_executor.model_runner import ModelRunner
+import os
+
+import numpy as np
+
@dataclass
class ForwardMetadata:
@@ -28,6 +34,7 @@ class ForwardMetadata:
extend_seq_lens_cpu_int: Optional[torch.Tensor] = None
seq_lens_cpu_int: Optional[torch.Tensor] = None
seq_lens_cpu_list: Optional[List[int]] = None
+ seq_lens_list_cumsum: Optional[List[int]] = None
class AscendAttnBackend(AttentionBackend):
@@ -54,20 +61,31 @@ def __init__(self, model_runner: ModelRunner):
super().__init__()
self.forward_metadata = None
self.device = model_runner.device
- self.gen_attention_mask(128, model_runner.dtype)
self.page_size = model_runner.page_size
self.use_mla = model_runner.model_config.attention_arch == AttentionArch.MLA
if self.use_mla:
self.kv_lora_rank = model_runner.model_config.kv_lora_rank
self.qk_rope_head_dim = model_runner.model_config.qk_rope_head_dim
- self.native_attn = TorchNativeAttnBackend(model_runner)
+ self.native_attn = TorchNativeAttnBackend(model_runner)
self.graph_metadata = {}
self.max_context_len = model_runner.model_config.context_len
self.req_to_token = model_runner.req_to_token_pool.req_to_token
self.graph_mode = False
+ self.use_fia = get_bool_env_var("ASCEND_USE_FIA", "False")
+ if not self.use_fia:
+ self.gen_attention_mask(128, model_runner.dtype)
+ mask_length = 2048
+ self.fia_mask = ~torch.tril(
+ torch.ones(
+ (mask_length, mask_length),
+ dtype=torch.bool,
+ device=model_runner.device,
+ )
+ )
def init_forward_metadata(self, forward_batch: ForwardBatch):
"""Init the metadata for a forward pass."""
+ tp_size = get_attention_tp_size()
self.forward_metadata = ForwardMetadata()
self.forward_metadata.block_tables = (
@@ -82,6 +100,13 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
)
self.forward_metadata.seq_lens_cpu_int = forward_batch.seq_lens_cpu.int()
+ seq_lens_list_cumsum = np.cumsum(forward_batch.extend_seq_lens_cpu)
+ if forward_batch.is_extend_in_batch:
+ seq_lens_list_cumsum[-1] = (
+ (seq_lens_list_cumsum[-1] - 1) // tp_size + 1
+ ) * tp_size
+ self.forward_metadata.seq_lens_list_cumsum = seq_lens_list_cumsum
+
self.graph_mode = False
def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
@@ -140,7 +165,7 @@ def init_forward_metadata_replay_cuda_graph(
self.graph_mode = True
def get_cuda_graph_seq_len_fill_value(self):
- return 1
+ return 0
def forward_extend(
self,
@@ -149,73 +174,256 @@ def forward_extend(
v,
layer: RadixAttention,
forward_batch: ForwardBatch,
- save_kv_cache=True,
+ save_kv_cache: bool = True,
):
- if save_kv_cache:
- forward_batch.token_to_kv_pool.set_kv_buffer(
- layer, forward_batch.out_cache_loc, k, v
+ if not self.use_mla:
+ if save_kv_cache:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, forward_batch.out_cache_loc, k, v
+ )
+
+ k_cache = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
+ v_cache = forward_batch.token_to_kv_pool.get_value_buffer(layer.layer_id)
+
+ if self.use_fia:
+ """FIA will support multi-bs in the later version of CANN"""
+ q = q.reshape(-1, layer.tp_q_head_num, layer.qk_head_dim)
+ attn_output = torch.empty(
+ (q.size(0), layer.tp_q_head_num, layer.v_head_dim),
+ device=q.device,
+ dtype=q.dtype,
+ )
+ q_len_offset = 0
+ for q_len in forward_batch.extend_seq_lens_cpu:
+ attn_output[q_len_offset : q_len_offset + q_len] = (
+ torch.ops.npu.npu_fused_infer_attention_score(
+ q[None, q_len_offset : q_len_offset + q_len],
+ k[None, q_len_offset : q_len_offset + q_len],
+ v[None, q_len_offset : q_len_offset + q_len],
+ num_heads=layer.tp_q_head_num,
+ num_key_value_heads=layer.tp_k_head_num,
+ input_layout="BSND", # todo, TND not supports q_heads!=k_heads
+ atten_mask=self.fia_mask.unsqueeze(0),
+ sparse_mode=3,
+ scale=layer.scaling,
+ next_tokens=0,
+ )[0]
+ )
+ q_len_offset += q_len
+ attn_output = attn_output.view(
+ -1, layer.tp_q_head_num * layer.v_head_dim
+ )
+
+ else:
+ if layer.qk_head_dim <= 128:
+ query = q.reshape(-1, layer.tp_q_head_num * layer.qk_head_dim)
+ attn_output = torch.empty(
+ (query.shape[0], layer.tp_q_head_num * layer.v_head_dim),
+ dtype=query.dtype,
+ device=query.device,
+ )
+
+ torch_npu._npu_flash_attention_qlens(
+ query=query,
+ key_cache=k_cache,
+ value_cache=v_cache,
+ mask=self.mask,
+ block_table=self.forward_metadata.block_tables,
+ seq_len=self.forward_metadata.extend_seq_lens_cpu_int,
+ context_lens=self.forward_metadata.seq_lens_cpu_int,
+ scale_value=layer.scaling,
+ num_heads=layer.tp_q_head_num,
+ num_kv_heads=layer.tp_k_head_num,
+ out=attn_output,
+ )
+ else:
+ if layer.qk_head_dim != layer.v_head_dim:
+ attn_output = q.new_empty(
+ (q.shape[0], layer.tp_q_head_num * layer.v_head_dim)
+ )
+ else:
+ attn_output = torch.empty_like(q)
+
+ use_gqa = layer.tp_q_head_num != layer.tp_k_head_num
+
+ q_ = q.view(-1, layer.tp_q_head_num, layer.qk_head_dim)
+ o_ = attn_output.view(-1, layer.tp_q_head_num, layer.v_head_dim)
+
+ causal = True
+ if (
+ layer.is_cross_attention
+ or layer.attn_type == AttentionType.ENCODER_ONLY
+ ):
+ causal = False
+
+ self.native_attn._run_sdpa_forward_extend(
+ q_,
+ o_,
+ k_cache.view(-1, layer.tp_k_head_num, layer.qk_head_dim),
+ v_cache.view(-1, layer.tp_v_head_num, layer.v_head_dim),
+ forward_batch.req_to_token_pool.req_to_token,
+ forward_batch.req_pool_indices,
+ forward_batch.seq_lens,
+ forward_batch.extend_prefix_lens,
+ forward_batch.extend_seq_lens,
+ scaling=layer.scaling,
+ enable_gqa=use_gqa,
+ causal=causal,
+ )
+ else:
+ assert (
+ layer.qk_head_dim != layer.v_head_dim
+ ), "FIA only supports qk_head_dim != v_head_dim"
+ q_nope, q_rope = q.split([layer.v_head_dim, self.qk_rope_head_dim], dim=-1)
+ k_nope, k_rope = k.split([layer.v_head_dim, self.qk_rope_head_dim], dim=-1)
+
+ attn_output, _ = torch.ops.npu.npu_fused_infer_attention_score(
+ q_nope,
+ k_nope,
+ v,
+ query_rope=q_rope,
+ key_rope=k_rope,
+ num_heads=layer.tp_q_head_num,
+ input_layout="TND",
+ atten_mask=self.fia_mask,
+ sparse_mode=3,
+ actual_seq_lengths=self.forward_metadata.seq_lens_list_cumsum,
+ actual_seq_lengths_kv=self.forward_metadata.seq_lens_list_cumsum,
+ scale=layer.scaling,
+ next_tokens=0,
)
- k_cache = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
- v_cache = forward_batch.token_to_kv_pool.get_value_buffer(layer.layer_id)
+ return attn_output
+
+ def forward_decode_graph(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ q_rope: Optional[torch.Tensor] = None,
+ k_rope: Optional[torch.Tensor] = None,
+ ):
+ if save_kv_cache:
+ if self.use_mla:
+ k = k.view(-1, layer.tp_k_head_num, self.kv_lora_rank)
+ k_rope = k_rope.view(-1, layer.tp_k_head_num, self.qk_rope_head_dim)
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, forward_batch.out_cache_loc, k, k_rope
+ )
+ else:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, forward_batch.out_cache_loc, k, v
+ )
if not self.use_mla:
- query = q.view(-1, layer.tp_q_head_num * layer.qk_head_dim)
+ k_cache = forward_batch.token_to_kv_pool.get_key_buffer(
+ layer.layer_id
+ ).view(-1, self.page_size, layer.tp_k_head_num * layer.qk_head_dim)
+ v_cache = forward_batch.token_to_kv_pool.get_value_buffer(
+ layer.layer_id
+ ).view(-1, self.page_size, layer.tp_v_head_num * layer.v_head_dim)
+ query = q.reshape(-1, 1, layer.tp_q_head_num * layer.qk_head_dim)
+ if self.forward_metadata.seq_lens_cpu_int is None:
+ actual_seq_len_kv = self.forward_metadata.seq_lens_cpu_list
+ else:
+ actual_seq_len_kv = (
+ self.forward_metadata.seq_lens_cpu_int.cpu().int().tolist()
+ )
+ num_tokens = query.shape[0]
+ workspace = torch_npu._npu_fused_infer_attention_score_get_max_workspace(
+ query,
+ k_cache,
+ v_cache,
+ block_table=self.forward_metadata.block_tables,
+ block_size=self.page_size,
+ num_heads=layer.tp_q_head_num,
+ num_key_value_heads=layer.tp_k_head_num,
+ input_layout="BSH",
+ scale=layer.scaling,
+ actual_seq_lengths_kv=actual_seq_len_kv,
+ )
output = torch.empty(
- (query.shape[0], layer.tp_q_head_num * layer.v_head_dim),
- dtype=query.dtype,
- device=query.device,
+ (num_tokens, 1, layer.tp_q_head_num * layer.v_head_dim),
+ dtype=q.dtype,
+ device=q.device,
)
-
- torch_npu._npu_flash_attention_qlens(
- query=query,
- key_cache=k_cache,
- value_cache=v_cache,
- mask=self.mask,
+ softmax_lse = torch.empty(1, dtype=q.dtype, device=q.device)
+ torch_npu.npu_fused_infer_attention_score.out(
+ query,
+ k_cache,
+ v_cache,
block_table=self.forward_metadata.block_tables,
- seq_len=self.forward_metadata.extend_seq_lens_cpu_int,
- context_lens=self.forward_metadata.seq_lens_cpu_int,
- scale_value=layer.scaling,
+ block_size=self.page_size,
num_heads=layer.tp_q_head_num,
- num_kv_heads=layer.tp_k_head_num,
- out=output,
+ num_key_value_heads=layer.tp_k_head_num,
+ input_layout="BSH",
+ scale=layer.scaling,
+ actual_seq_lengths_kv=actual_seq_len_kv,
+ workspace=workspace,
+ out=[output, softmax_lse],
)
- return output
+ return output.view(num_tokens, layer.tp_q_head_num * layer.v_head_dim)
else:
- if layer.qk_head_dim != layer.v_head_dim:
- o = q.new_empty((q.shape[0], layer.tp_q_head_num * layer.v_head_dim))
+ c_kv, k_rope = forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id)
+ k_rope_cache = k_rope.view(
+ -1, layer.tp_k_head_num, self.page_size, self.qk_rope_head_dim
+ )
+ c_kv_cache = c_kv.view(
+ -1, layer.tp_v_head_num, self.page_size, self.kv_lora_rank
+ )
+
+ q_nope = q.view(-1, layer.tp_q_head_num, 1, self.kv_lora_rank).contiguous()
+ q_rope = q_rope.view(-1, layer.tp_q_head_num, 1, self.qk_rope_head_dim)
+ if self.forward_metadata.seq_lens_cpu_int is None:
+ actual_seq_len_kv = self.forward_metadata.seq_lens_cpu_list
else:
- o = torch.empty_like(q)
-
- use_gqa = layer.tp_q_head_num != layer.tp_k_head_num
-
- q_ = q.view(-1, layer.tp_q_head_num, layer.qk_head_dim)
- o_ = o.view(-1, layer.tp_q_head_num, layer.v_head_dim)
-
- causal = True
- if (
- layer.is_cross_attention
- or layer.attn_type == AttentionType.ENCODER_ONLY
- ):
- causal = False
-
- self.native_attn._run_sdpa_forward_extend(
- q_,
- o_,
- k_cache.view(
- -1, layer.tp_k_head_num, (self.kv_lora_rank + self.qk_rope_head_dim)
- ),
- v_cache.view(-1, layer.tp_v_head_num, self.kv_lora_rank),
- forward_batch.req_to_token_pool.req_to_token,
- forward_batch.req_pool_indices,
- forward_batch.seq_lens,
- forward_batch.extend_prefix_lens,
- forward_batch.extend_seq_lens,
- scaling=layer.scaling,
- enable_gqa=use_gqa,
- causal=causal,
+ actual_seq_len_kv = (
+ self.forward_metadata.seq_lens_cpu_int.cpu().int().tolist()
+ )
+
+ workspace = torch_npu._npu_fused_infer_attention_score_get_max_workspace(
+ q_nope,
+ c_kv_cache,
+ c_kv_cache,
+ query_rope=q_rope,
+ key_rope=k_rope_cache,
+ num_heads=layer.tp_q_head_num,
+ num_key_value_heads=layer.tp_k_head_num,
+ block_table=self.forward_metadata.block_tables,
+ block_size=self.page_size,
+ input_layout="BNSD",
+ scale=layer.scaling,
+ actual_seq_lengths_kv=actual_seq_len_kv,
+ antiquant_mode=0,
+ antiquant_scale=None,
+ sparse_mode=0,
)
- return o
+ output = torch.zeros_like(q_nope, dtype=q.dtype, device=q.device)
+ softmax_lse = torch.empty(1, dtype=q.dtype, device=q.device)
+
+ torch_npu.npu_fused_infer_attention_score.out(
+ q_nope,
+ c_kv_cache,
+ c_kv_cache,
+ query_rope=q_rope,
+ key_rope=k_rope_cache,
+ num_heads=layer.tp_q_head_num,
+ num_key_value_heads=layer.tp_k_head_num,
+ block_table=self.forward_metadata.block_tables,
+ block_size=self.page_size,
+ input_layout="BNSD",
+ scale=layer.scaling,
+ actual_seq_lengths_kv=actual_seq_len_kv,
+ antiquant_mode=0,
+ antiquant_scale=None,
+ sparse_mode=0,
+ workspace=workspace,
+ out=[output, softmax_lse],
+ )
+ return output.view(-1, layer.tp_q_head_num * self.kv_lora_rank)
def forward_decode(
self,
@@ -224,65 +432,58 @@ def forward_decode(
v: torch.Tensor,
layer: RadixAttention,
forward_batch: ForwardBatch,
- save_kv_cache=True,
+ save_kv_cache: bool = True,
+ # For multi-head latent attention
+ q_rope: Optional[torch.Tensor] = None,
+ k_rope: Optional[torch.Tensor] = None,
):
- if save_kv_cache:
- forward_batch.token_to_kv_pool.set_kv_buffer(
- layer, forward_batch.out_cache_loc, k, v
+ if self.graph_mode:
+ return self.forward_decode_graph(
+ q,
+ k,
+ v,
+ layer,
+ forward_batch,
+ save_kv_cache,
+ q_rope=q_rope,
+ k_rope=k_rope,
)
+
if not self.use_mla:
- if self.graph_mode:
- k_cache = forward_batch.token_to_kv_pool.get_key_buffer(
- layer.layer_id
- ).view(-1, self.page_size, layer.tp_k_head_num * layer.qk_head_dim)
- v_cache = forward_batch.token_to_kv_pool.get_value_buffer(
- layer.layer_id
- ).view(-1, self.page_size, layer.tp_v_head_num * layer.v_head_dim)
- query = q.view(-1, 1, layer.tp_q_head_num * layer.qk_head_dim)
- num_tokens = query.shape[0]
- workspace = (
- torch_npu._npu_fused_infer_attention_score_get_max_workspace(
- query,
- k_cache,
- v_cache,
- block_table=self.forward_metadata.block_tables,
- block_size=self.page_size,
- num_heads=layer.tp_q_head_num,
- num_key_value_heads=layer.tp_k_head_num,
- input_layout="BSH",
- scale=layer.scaling,
- actual_seq_lengths_kv=self.forward_metadata.seq_lens_cpu_list,
- )
+ if save_kv_cache:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, forward_batch.out_cache_loc, k, v
)
- output = torch.empty(
- (num_tokens, 1, layer.tp_q_head_num * layer.v_head_dim),
- dtype=q.dtype,
- device=q.device,
- )
- softmax_lse = torch.empty(1, dtype=q.dtype, device=q.device)
- torch_npu.npu_fused_infer_attention_score.out(
- query,
- k_cache,
- v_cache,
- block_table=self.forward_metadata.block_tables,
- block_size=self.page_size,
+ num_tokens = q.shape[0]
+ k_cache = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
+ v_cache = forward_batch.token_to_kv_pool.get_value_buffer(layer.layer_id)
+ if self.use_fia:
+ attn_output, _ = torch.ops.npu.npu_fused_infer_attention_score(
+ q.view(
+ forward_batch.batch_size,
+ -1,
+ layer.tp_q_head_num,
+ layer.qk_head_dim,
+ ),
+ k_cache.view(
+ -1, self.page_size, layer.tp_k_head_num * layer.qk_head_dim
+ ),
+ v_cache.view(
+ -1, self.page_size, layer.tp_v_head_num * layer.qk_head_dim
+ ),
num_heads=layer.tp_q_head_num,
num_key_value_heads=layer.tp_k_head_num,
- input_layout="BSH",
+ input_layout="BSND",
+ atten_mask=None,
+ block_size=self.page_size,
+ block_table=self.forward_metadata.block_tables,
+ actual_seq_lengths_kv=self.forward_metadata.seq_lens_cpu_int,
scale=layer.scaling,
- actual_seq_lengths_kv=self.forward_metadata.seq_lens_cpu_list,
- workspace=workspace,
- out=[output, softmax_lse],
)
else:
- k_cache = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
- v_cache = forward_batch.token_to_kv_pool.get_value_buffer(
- layer.layer_id
- )
-
- query = q.view(-1, layer.tp_q_head_num, layer.qk_head_dim)
+ query = q.reshape(-1, layer.tp_q_head_num, layer.qk_head_dim)
num_tokens = query.shape[0]
- output = torch.empty(
+ attn_output = torch.empty(
(num_tokens, layer.tp_q_head_num, layer.v_head_dim),
dtype=query.dtype,
device=query.device,
@@ -297,39 +498,80 @@ def forward_decode(
scale_value=layer.scaling,
block_table=self.forward_metadata.block_tables,
context_lens=self.forward_metadata.seq_lens_cpu_int,
- out=output,
+ out=attn_output,
)
- return output.view(num_tokens, layer.tp_q_head_num * layer.v_head_dim)
+ return attn_output.view(num_tokens, layer.tp_q_head_num * layer.v_head_dim)
else:
- query = q.view(-1, layer.tp_q_head_num, layer.head_dim)
- num_tokens = query.shape[0]
- kv_c_and_k_pe_cache = forward_batch.token_to_kv_pool.get_key_buffer(
- layer.layer_id
- )
- kv_c_and_k_pe_cache = kv_c_and_k_pe_cache.view(
- -1,
- self.page_size,
- layer.tp_k_head_num,
- self.kv_lora_rank + self.qk_rope_head_dim,
- )
-
- attn_output = torch.empty(
- [num_tokens, layer.tp_q_head_num, self.kv_lora_rank],
- dtype=q.dtype,
- device=q.device,
- )
- torch_npu._npu_paged_attention_mla(
- query=query,
- key_cache=kv_c_and_k_pe_cache,
- num_kv_heads=layer.tp_k_head_num,
- num_heads=layer.tp_q_head_num,
- scale_value=layer.scaling,
- block_table=self.forward_metadata.block_tables,
- context_lens=self.forward_metadata.seq_lens_cpu_int,
- mla_vheadsize=self.kv_lora_rank,
- out=attn_output,
- )
+ if save_kv_cache:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, forward_batch.out_cache_loc, k, k_rope
+ )
+ num_tokens = q.shape[0]
+ kv_c = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
+ k_pe = forward_batch.token_to_kv_pool.get_value_buffer(layer.layer_id)
+
+ if self.use_fia and (layer.tp_q_head_num // layer.tp_k_head_num) >= 8:
+ """layer.tp_q_head_num // layer.tp_k_head_num < 8 will support in the later version of CANN"""
+ kv_c = kv_c.view(
+ -1, self.page_size, layer.tp_k_head_num * self.kv_lora_rank
+ )
+ k_pe = k_pe.view(
+ -1, self.page_size, layer.tp_k_head_num * self.qk_rope_head_dim
+ )
+ q = q.view(
+ forward_batch.batch_size, -1, layer.tp_q_head_num, self.kv_lora_rank
+ )
+ q_rope = q_rope.view(
+ forward_batch.batch_size,
+ -1,
+ layer.tp_q_head_num,
+ self.qk_rope_head_dim,
+ )
+ attn_output, _ = torch.ops.npu.npu_fused_infer_attention_score(
+ q,
+ kv_c,
+ kv_c,
+ query_rope=q_rope,
+ key_rope=k_pe,
+ num_heads=layer.tp_q_head_num,
+ num_key_value_heads=layer.tp_k_head_num,
+ input_layout="BSND",
+ atten_mask=None,
+ sparse_mode=0,
+ scale=layer.scaling,
+ antiquant_mode=0,
+ antiquant_scale=None,
+ block_table=self.forward_metadata.block_tables,
+ block_size=self.page_size,
+ actual_seq_lengths_kv=self.forward_metadata.seq_lens_cpu_int,
+ )
+ else:
+ assert (
+ self.graph_mode == False
+ ) # _npu_paged_attention_mla not support graph mode
+ q = torch.cat([q, q_rope], dim=-1)
+ query = q.view(-1, layer.tp_q_head_num, layer.head_dim)
+ kv_c_and_k_pe_cache = torch.cat([kv_c, k_pe], dim=-1)
+ kv_c_and_k_pe_cache = kv_c_and_k_pe_cache.view(
+ -1,
+ self.page_size,
+ layer.tp_k_head_num,
+ self.kv_lora_rank + self.qk_rope_head_dim,
+ )
+ attn_output = torch.empty(
+ [num_tokens, layer.tp_q_head_num, self.kv_lora_rank],
+ dtype=q.dtype,
+ device=q.device,
+ )
+ torch_npu._npu_paged_attention_mla(
+ query=query,
+ key_cache=kv_c_and_k_pe_cache,
+ num_kv_heads=layer.tp_k_head_num,
+ num_heads=layer.tp_q_head_num,
+ scale_value=layer.scaling,
+ block_table=self.forward_metadata.block_tables,
+ context_lens=self.forward_metadata.seq_lens_cpu_int,
+ mla_vheadsize=self.kv_lora_rank,
+ out=attn_output,
+ )
return attn_output.view(num_tokens, layer.tp_q_head_num * self.kv_lora_rank)
-
- def get_cuda_graph_seq_len_fill_value(self):
- return 0
diff --git a/python/sglang/srt/layers/attention/fla/chunk.py b/python/sglang/srt/layers/attention/fla/chunk.py
new file mode 100644
index 00000000000..a48a9e649f3
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/chunk.py
@@ -0,0 +1,242 @@
+# Adapted from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/gated_delta_rule/chunk.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+import warnings
+from typing import Optional
+
+import torch
+from einops import rearrange
+
+from sglang.srt.layers.attention.fla.chunk_delta_h import chunk_gated_delta_rule_fwd_h
+from sglang.srt.layers.attention.fla.chunk_o import chunk_fwd_o
+from sglang.srt.layers.attention.fla.chunk_scaled_dot_kkt import (
+ chunk_scaled_dot_kkt_fwd,
+)
+from sglang.srt.layers.attention.fla.cumsum import chunk_local_cumsum
+from sglang.srt.layers.attention.fla.l2norm import l2norm_fwd
+from sglang.srt.layers.attention.fla.solve_tril import solve_tril
+from sglang.srt.layers.attention.fla.utils import (
+ SUPPRESS_LEVEL,
+ autocast_custom_fwd,
+ input_guard,
+)
+from sglang.srt.layers.attention.fla.wy_fast import recompute_w_u_fwd
+
+
+def chunk_gated_delta_rule_fwd(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float,
+ initial_state: torch.Tensor,
+ output_final_state: bool,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+):
+ g = chunk_local_cumsum(g, chunk_size=64, cu_seqlens=cu_seqlens)
+ # obtain WY representation. u is actually the new v.
+ A = chunk_scaled_dot_kkt_fwd(
+ k=k, beta=beta, g_cumsum=g, cu_seqlens=cu_seqlens, output_dtype=torch.float32
+ )
+ A = solve_tril(A=A, cu_seqlens=cu_seqlens, output_dtype=k.dtype)
+ w, u = recompute_w_u_fwd(
+ k=k,
+ v=v,
+ beta=beta,
+ A=A,
+ g_cumsum=g,
+ cu_seqlens=cu_seqlens,
+ )
+ h, v_new, final_state = chunk_gated_delta_rule_fwd_h(
+ k=k,
+ w=w,
+ u=u,
+ g=g,
+ initial_state=initial_state,
+ output_final_state=output_final_state,
+ cu_seqlens=cu_seqlens,
+ )
+ o = chunk_fwd_o(
+ q=q,
+ k=k,
+ v=v_new,
+ h=h,
+ g=g,
+ scale=scale,
+ cu_seqlens=cu_seqlens,
+ )
+ if SUPPRESS_LEVEL < 3:
+ return g, o, A, final_state, None, None, None
+ elif SUPPRESS_LEVEL >= 3:
+ return g, o, A, final_state, w, h, v_new
+
+
+class ChunkGatedDeltaRuleFunction(torch.autograd.Function):
+
+ @staticmethod
+ @input_guard
+ @autocast_custom_fwd
+ def forward(
+ ctx,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float,
+ initial_state: torch.Tensor,
+ output_final_state: bool,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ use_qk_l2norm_in_kernel: bool = False,
+ ):
+ q_orig = q
+ k_orig = k
+
+ if use_qk_l2norm_in_kernel:
+ q = l2norm_fwd(q)
+ k = l2norm_fwd(k)
+
+ g, o, A, final_state, w, h, v_new = chunk_gated_delta_rule_fwd(
+ q=q,
+ k=k,
+ v=v,
+ g=g,
+ beta=beta,
+ scale=scale,
+ initial_state=initial_state,
+ output_final_state=output_final_state,
+ cu_seqlens=cu_seqlens,
+ )
+ return o.to(q.dtype), final_state
+
+
+@torch.compiler.disable
+def chunk_gated_delta_rule(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float = None,
+ initial_state: torch.Tensor = None,
+ output_final_state: bool = False,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ head_first: bool = False,
+ use_qk_l2norm_in_kernel: bool = False,
+):
+ r"""
+ Args:
+ q (torch.Tensor):
+ queries of shape `[B, T, H, K]` if `head_first=False` else `[B, H, T, K]`.
+ k (torch.Tensor):
+ keys of shape `[B, T, H, K]` if `head_first=False` else `[B, H, T, K]`.
+ v (torch.Tensor):
+ values of shape `[B, T, H, V]` if `head_first=False` else `[B, H, T, V]`.
+ g (torch.Tensor):
+ (forget) gating tensor (in log space!) of shape `[B, T, H]` if `head_first=False` else `[B, H, T]`.
+ beta (torch.Tensor):
+ betas of shape `[B, T, H]` if `head_first=False` else `[B, H, T]`.
+ scale (Optional[int]):
+ Scale factor for the RetNet attention scores.
+ If not provided, it will default to `1 / sqrt(K)`. Default: `None`.
+ initial_state (Optional[torch.Tensor]):
+ Initial state of shape `[N, H, K, V]` for `N` input sequences.
+ For equal-length input sequences, `N` equals the batch size `B`.
+ Default: `None`.
+ output_final_state (Optional[bool]):
+ Whether to output the final state of shape `[N, H, K, V]`. Default: `False`.
+ cu_seqlens (torch.LongTensor):
+ Cumulative sequence lengths of shape `[N+1]` used for variable-length training,
+ consistent with the FlashAttention API.
+ head_first (Optional[bool]):
+ Whether the inputs are in the head-first format, which is not supported for variable-length inputs.
+ Default: `False`.
+
+ Returns:
+ o (torch.Tensor):
+ Outputs of shape `[B, T, H, V]` if `head_first=False` else `[B, H, T, V]`.
+ final_state (torch.Tensor):
+ Final state of shape `[N, H, K, V]` if `output_final_state=True` else `None`.
+
+ Examples::
+ >>> import torch
+ >>> import torch.nn.functional as F
+ >>> from einops import rearrange
+ >>> from fla.ops.gated_delta_rule import chunk_gated_delta_rule
+ # inputs with equal lengths
+ >>> B, T, H, K, V = 4, 2048, 4, 512, 512
+ >>> q = torch.randn(B, T, H, K, dtype=torch.bfloat16, device='cuda')
+ >>> k = F.normalize(torch.randn(B, T, H, K, dtype=torch.bfloat16, device='cuda'), p=2, dim=-1)
+ >>> v = torch.randn(B, T, H, V, dtype=torch.bfloat16, device='cuda')
+ >>> beta = torch.rand(B, T, H, dtype=torch.bfloat16, device='cuda').sigmoid()
+ >>> g = F.logsigmoid(torch.rand(B, T, H, dtype=torch.bfloat16, device='cuda'))
+ >>> h0 = torch.randn(B, H, K, V, dtype=torch.bfloat16, device='cuda')
+ >>> o, ht = chunk_gated_delta_rule(
+ q, k, v, g, beta,
+ initial_state=h0,
+ output_final_state=True
+ )
+ # for variable-length inputs, the batch size `B` is expected to be 1 and `cu_seqlens` is required
+ >>> q, k, v, beta, g = map(lambda x: rearrange(x, 'b t ... -> 1 (b t) ...'), (q, k, v, beta, g))
+ # for a batch with 4 sequences, `cu_seqlens` with 5 start/end positions are expected
+ >>> cu_seqlens = q.new_tensor([0, 2048, 4096, 6144, 8192], dtype=torch.long)
+ >>> o_var, ht_var = chunk_gated_delta_rule(
+ q, k, v, g, beta,
+ initial_state=h0,
+ output_final_state=True,
+ cu_seqlens=cu_seqlens
+ )
+ """
+ assert q.dtype == k.dtype == v.dtype
+ assert (
+ q.dtype != torch.float32
+ ), "ChunkGatedDeltaRuleFunction does not support float32. Please use bfloat16."
+ assert (
+ len(beta.shape) == 3
+ ), "beta must be of shape [B, T, H] if head_first=False, or [B, H, T] otherwise."
+
+ if head_first:
+ raise DeprecationWarning(
+ "head_first is deprecated and will be removed in a future version. "
+ "Please use head_first=False for now instead."
+ )
+ q, k, v, beta, g = map(
+ lambda x: rearrange(x, "b h t ... -> b t h ..."), (q, k, v, beta, g)
+ )
+ # if not head_first and q.shape[1] < q.shape[2]:
+ # warnings.warn(
+ # f"Input tensor shape suggests potential format mismatch: seq_len ({q.shape[1]}) < num_heads ({q.shape[2]}). "
+ # "This may indicate the inputs were passed in head-first format [B, H, T, ...] "
+ # "when head_first=False was specified. "
+ # "Please verify your input tensor format matches the expected shape [B, T, H, ...]."
+ # )
+ if cu_seqlens is not None:
+ if q.shape[0] != 1:
+ raise ValueError(
+ f"The batch size is expected to be 1 rather than {q.shape[0]} when using `cu_seqlens`."
+ f"Please flatten variable-length inputs before processing."
+ )
+ if initial_state is not None and initial_state.shape[0] != len(cu_seqlens) - 1:
+ raise ValueError(
+ f"The number of initial states is expected to be equal to the number of input sequences, "
+ f"i.e., {len(cu_seqlens) - 1} rather than {initial_state.shape[0]}."
+ )
+ if scale is None:
+ scale = k.shape[-1] ** -0.5
+ o, final_state = ChunkGatedDeltaRuleFunction.apply(
+ q,
+ k,
+ v,
+ g,
+ beta,
+ scale,
+ initial_state,
+ output_final_state,
+ cu_seqlens,
+ use_qk_l2norm_in_kernel,
+ )
+ if head_first:
+ o = rearrange(o, "b t h ... -> b h t ...")
+ return o, final_state
diff --git a/python/sglang/srt/layers/attention/fla/chunk_delta_h.py b/python/sglang/srt/layers/attention/fla/chunk_delta_h.py
new file mode 100644
index 00000000000..5790e0e9b44
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/chunk_delta_h.py
@@ -0,0 +1,314 @@
+# Adapted from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/common/chunk_delta_h.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional, Tuple
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.index import (
+ prepare_chunk_indices,
+ prepare_chunk_offsets,
+)
+from sglang.srt.layers.attention.fla.op import exp, safe_exp
+from sglang.srt.layers.attention.fla.utils import is_nvidia_hopper
+
+NUM_WARPS = [2, 4] if is_nvidia_hopper else [2, 4, 8, 16]
+
+
+@triton.heuristics(
+ {
+ "USE_G": lambda args: args["g"] is not None,
+ "USE_INITIAL_STATE": lambda args: args["h0"] is not None,
+ "STORE_FINAL_STATE": lambda args: args["ht"] is not None,
+ "SAVE_NEW_VALUE": lambda args: args["v_new"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ }
+)
+# @triton.autotune(
+# configs=[
+# triton.Config({"BV": BV}, num_warps=num_warps, num_stages=num_stages)
+# for num_warps in [2, 4]
+# for num_stages in [2, 3, 4]
+# for BV in [32, 64]
+# ],
+# key=["H", "K", "V", "BT", "USE_G"],
+# use_cuda_graph=use_cuda_graph,
+# )
+@triton.jit(do_not_specialize=["T"])
+def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
+ k,
+ v,
+ w,
+ v_new,
+ g,
+ h,
+ h0,
+ ht,
+ cu_seqlens,
+ chunk_offsets,
+ T,
+ H: tl.constexpr,
+ Hg: tl.constexpr,
+ K: tl.constexpr,
+ V: tl.constexpr,
+ BT: tl.constexpr,
+ BV: tl.constexpr,
+ USE_G: tl.constexpr,
+ USE_INITIAL_STATE: tl.constexpr,
+ STORE_FINAL_STATE: tl.constexpr,
+ SAVE_NEW_VALUE: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_v, i_nh = tl.program_id(0), tl.program_id(1)
+ i_n, i_h = i_nh // H, i_nh % H
+ if IS_VARLEN:
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ NT = tl.cdiv(T, BT)
+ boh = tl.load(chunk_offsets + i_n).to(tl.int32)
+ else:
+ bos, eos = i_n * T, i_n * T + T
+ NT = tl.cdiv(T, BT)
+ boh = i_n * NT
+
+ # [BK, BV]
+ b_h1 = tl.zeros([64, BV], dtype=tl.float32)
+ if K > 64:
+ b_h2 = tl.zeros([64, BV], dtype=tl.float32)
+ if K > 128:
+ b_h3 = tl.zeros([64, BV], dtype=tl.float32)
+ if K > 192:
+ b_h4 = tl.zeros([64, BV], dtype=tl.float32)
+
+ # calculate offset
+ h += (boh * H + i_h) * K * V
+ v += (bos * H + i_h) * V
+ k += (bos * Hg + i_h // (H // Hg)) * K
+ w += (bos * H + i_h) * K
+ if SAVE_NEW_VALUE:
+ v_new += (bos * H + i_h) * V
+ stride_v = H * V
+ stride_h = H * K * V
+ stride_k = Hg * K
+ stride_w = H * K
+ if USE_INITIAL_STATE:
+ h0 = h0 + i_nh * K * V
+ if STORE_FINAL_STATE:
+ ht = ht + i_nh * K * V
+
+ # load initial state
+ if USE_INITIAL_STATE:
+ p_h0_1 = tl.make_block_ptr(h0, (K, V), (V, 1), (0, i_v * BV), (64, BV), (1, 0))
+ b_h1 += tl.load(p_h0_1, boundary_check=(0, 1)).to(tl.float32)
+ if K > 64:
+ p_h0_2 = tl.make_block_ptr(
+ h0, (K, V), (V, 1), (64, i_v * BV), (64, BV), (1, 0)
+ )
+ b_h2 += tl.load(p_h0_2, boundary_check=(0, 1)).to(tl.float32)
+ if K > 128:
+ p_h0_3 = tl.make_block_ptr(
+ h0, (K, V), (V, 1), (128, i_v * BV), (64, BV), (1, 0)
+ )
+ b_h3 += tl.load(p_h0_3, boundary_check=(0, 1)).to(tl.float32)
+ if K > 192:
+ p_h0_4 = tl.make_block_ptr(
+ h0, (K, V), (V, 1), (192, i_v * BV), (64, BV), (1, 0)
+ )
+ b_h4 += tl.load(p_h0_4, boundary_check=(0, 1)).to(tl.float32)
+
+ # main recurrence
+ for i_t in range(NT):
+ p_h1 = tl.make_block_ptr(
+ h + i_t * stride_h, (K, V), (V, 1), (0, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_h1, b_h1.to(p_h1.dtype.element_ty), boundary_check=(0, 1))
+ if K > 64:
+ p_h2 = tl.make_block_ptr(
+ h + i_t * stride_h, (K, V), (V, 1), (64, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_h2, b_h2.to(p_h2.dtype.element_ty), boundary_check=(0, 1))
+ if K > 128:
+ p_h3 = tl.make_block_ptr(
+ h + i_t * stride_h, (K, V), (V, 1), (128, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_h3, b_h3.to(p_h3.dtype.element_ty), boundary_check=(0, 1))
+ if K > 192:
+ p_h4 = tl.make_block_ptr(
+ h + i_t * stride_h, (K, V), (V, 1), (192, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_h4, b_h4.to(p_h4.dtype.element_ty), boundary_check=(0, 1))
+
+ p_v = tl.make_block_ptr(
+ v, (T, V), (stride_v, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)
+ )
+ p_v_new = (
+ tl.make_block_ptr(
+ v_new, (T, V), (stride_v, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)
+ )
+ if SAVE_NEW_VALUE
+ else None
+ )
+ b_v_new = tl.zeros([BT, BV], dtype=tl.float32)
+ p_w = tl.make_block_ptr(
+ w, (T, K), (stride_w, 1), (i_t * BT, 0), (BT, 64), (1, 0)
+ )
+ b_w = tl.load(p_w, boundary_check=(0, 1))
+ b_v_new += tl.dot(b_w, b_h1.to(b_w.dtype))
+ if K > 64:
+ p_w = tl.make_block_ptr(
+ w, (T, K), (stride_w, 1), (i_t * BT, 64), (BT, 64), (1, 0)
+ )
+ b_w = tl.load(p_w, boundary_check=(0, 1))
+ b_v_new += tl.dot(b_w, b_h2.to(b_w.dtype))
+ if K > 128:
+ p_w = tl.make_block_ptr(
+ w, (T, K), (stride_w, 1), (i_t * BT, 128), (BT, 64), (1, 0)
+ )
+ b_w = tl.load(p_w, boundary_check=(0, 1))
+ b_v_new += tl.dot(b_w, b_h3.to(b_w.dtype))
+ if K > 192:
+ p_w = tl.make_block_ptr(
+ w, (T, K), (stride_w, 1), (i_t * BT, 192), (BT, 64), (1, 0)
+ )
+ b_w = tl.load(p_w, boundary_check=(0, 1))
+ b_v_new += tl.dot(b_w, b_h4.to(b_w.dtype))
+ b_v_new = -b_v_new + tl.load(p_v, boundary_check=(0, 1))
+
+ if SAVE_NEW_VALUE:
+ p_v_new = tl.make_block_ptr(
+ v_new, (T, V), (stride_v, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)
+ )
+ tl.store(
+ p_v_new, b_v_new.to(p_v_new.dtype.element_ty), boundary_check=(0, 1)
+ )
+
+ if USE_G:
+ last_idx = min((i_t + 1) * BT, T) - 1
+ b_g_last = tl.load(g + bos * H + last_idx * H + i_h)
+ p_g = tl.make_block_ptr(
+ g + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,)
+ )
+ b_g = tl.load(p_g, boundary_check=(0,))
+ b_v_new = b_v_new * safe_exp(b_g_last - b_g)[:, None]
+ b_g_last = exp(b_g_last)
+ b_h1 = b_h1 * b_g_last
+ if K > 64:
+ b_h2 = b_h2 * b_g_last
+ if K > 128:
+ b_h3 = b_h3 * b_g_last
+ if K > 192:
+ b_h4 = b_h4 * b_g_last
+ b_v_new = b_v_new.to(k.dtype.element_ty)
+ p_k = tl.make_block_ptr(
+ k, (K, T), (1, stride_k), (0, i_t * BT), (64, BT), (0, 1)
+ )
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ b_h1 += tl.dot(b_k, b_v_new)
+ if K > 64:
+ p_k = tl.make_block_ptr(
+ k, (K, T), (1, stride_k), (64, i_t * BT), (64, BT), (0, 1)
+ )
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ b_h2 += tl.dot(b_k, b_v_new)
+ if K > 128:
+ p_k = tl.make_block_ptr(
+ k, (K, T), (1, stride_k), (128, i_t * BT), (64, BT), (0, 1)
+ )
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ b_h3 += tl.dot(b_k, b_v_new)
+ if K > 192:
+ p_k = tl.make_block_ptr(
+ k, (K, T), (1, stride_k), (192, i_t * BT), (64, BT), (0, 1)
+ )
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ b_h4 += tl.dot(b_k, b_v_new)
+
+ # epilogue
+ if STORE_FINAL_STATE:
+ p_ht = tl.make_block_ptr(ht, (K, V), (V, 1), (0, i_v * BV), (64, BV), (1, 0))
+ tl.store(p_ht, b_h1.to(p_ht.dtype.element_ty), boundary_check=(0, 1))
+ if K > 64:
+ p_ht = tl.make_block_ptr(
+ ht, (K, V), (V, 1), (64, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_ht, b_h2.to(p_ht.dtype.element_ty), boundary_check=(0, 1))
+ if K > 128:
+ p_ht = tl.make_block_ptr(
+ ht, (K, V), (V, 1), (128, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_ht, b_h3.to(p_ht.dtype.element_ty), boundary_check=(0, 1))
+ if K > 192:
+ p_ht = tl.make_block_ptr(
+ ht, (K, V), (V, 1), (192, i_v * BV), (64, BV), (1, 0)
+ )
+ tl.store(p_ht, b_h4.to(p_ht.dtype.element_ty), boundary_check=(0, 1))
+
+
+def chunk_gated_delta_rule_fwd_h(
+ k: torch.Tensor,
+ w: torch.Tensor,
+ u: torch.Tensor,
+ g: Optional[torch.Tensor] = None,
+ initial_state: Optional[torch.Tensor] = None,
+ output_final_state: bool = False,
+ chunk_size: int = 64, # SY: remove this argument and force chunk size 64?
+ save_new_value: bool = True,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ B, T, Hg, K, V = *k.shape, u.shape[-1]
+ H = u.shape[-2]
+ BT = chunk_size
+
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, chunk_size)
+ if cu_seqlens is not None
+ else None
+ )
+ # N: the actual number of sequences in the batch with either equal or variable lengths
+ if cu_seqlens is None:
+ N, NT, chunk_offsets = B, triton.cdiv(T, BT), None
+ else:
+ N, NT, chunk_offsets = (
+ len(cu_seqlens) - 1,
+ len(chunk_indices),
+ prepare_chunk_offsets(cu_seqlens, BT),
+ )
+ assert K <= 256, "current kernel does not support head dimension larger than 256."
+
+ h = k.new_empty(B, NT, H, K, V)
+ final_state = (
+ k.new_empty(N, H, K, V, dtype=torch.float32) if output_final_state else None
+ )
+
+ v_new = torch.empty_like(u) if save_new_value else None
+
+ def grid(meta):
+ return (triton.cdiv(V, meta["BV"]), N * H)
+
+ chunk_gated_delta_rule_fwd_kernel_h_blockdim64[grid](
+ k=k,
+ v=u,
+ w=w,
+ v_new=v_new,
+ g=g,
+ h=h,
+ h0=initial_state,
+ ht=final_state,
+ cu_seqlens=cu_seqlens,
+ chunk_offsets=chunk_offsets,
+ T=T,
+ H=H,
+ Hg=Hg,
+ K=K,
+ V=V,
+ BT=BT,
+ BV=32,
+ num_warps=4,
+ num_stages=2,
+ )
+ return h, v_new, final_state
diff --git a/python/sglang/srt/layers/attention/fla/chunk_o.py b/python/sglang/srt/layers/attention/fla/chunk_o.py
new file mode 100644
index 00000000000..d672c646beb
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/chunk_o.py
@@ -0,0 +1,178 @@
+# Adapted from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/common/chunk_o.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional, Tuple
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
+from sglang.srt.layers.attention.fla.op import exp, safe_exp
+from sglang.srt.layers.attention.fla.utils import check_shared_mem, is_nvidia_hopper
+
+BKV_LIST = [64, 128] if check_shared_mem() else [32, 64]
+NUM_WARPS = [2, 4] if is_nvidia_hopper else [2, 4, 8]
+
+
+@triton.heuristics(
+ {
+ "USE_G": lambda args: args["g"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ }
+)
+# @triton.autotune(
+# configs=[
+# triton.Config({"BK": BK, "BV": BV}, num_warps=num_warps, num_stages=num_stages)
+# for BK in BKV_LIST
+# for BV in BKV_LIST
+# for num_warps in NUM_WARPS
+# for num_stages in [2, 3, 4]
+# ],
+# key=["H", "K", "V", "BT"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def chunk_fwd_kernel_o(
+ q,
+ k,
+ v,
+ h,
+ g,
+ o,
+ cu_seqlens,
+ chunk_indices,
+ scale,
+ T,
+ H: tl.constexpr,
+ Hg: tl.constexpr,
+ K: tl.constexpr,
+ V: tl.constexpr,
+ BT: tl.constexpr,
+ BK: tl.constexpr,
+ BV: tl.constexpr,
+ USE_G: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+ i_b, i_h = i_bh // H, i_bh % H
+
+ if IS_VARLEN:
+ i_tg = i_t
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ NT = tl.cdiv(T, BT)
+ else:
+ NT = tl.cdiv(T, BT)
+ i_tg = i_b * NT + i_t
+ bos, eos = i_b * T, i_b * T + T
+
+ # offset calculation
+ q += (bos * Hg + i_h // (H // Hg)) * K
+ k += (bos * Hg + i_h // (H // Hg)) * K
+ v += (bos * H + i_h) * V
+ o += (bos * H + i_h) * V
+ h += (i_tg * H + i_h).to(tl.int64) * K * V
+
+ b_o = tl.zeros([BT, BV], dtype=tl.float32)
+ b_A = tl.zeros([BT, BT], dtype=tl.float32)
+
+ for i_k in range(tl.cdiv(K, BK)):
+ p_q = tl.make_block_ptr(
+ q, (T, K), (Hg * K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)
+ )
+ p_k = tl.make_block_ptr(
+ k, (K, T), (1, Hg * K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)
+ )
+ p_h = tl.make_block_ptr(
+ h, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)
+ )
+ # [BT, BK]
+ b_q = tl.load(p_q, boundary_check=(0, 1))
+ # [BK, BT]
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ # [BK, BV]
+ b_h = tl.load(p_h, boundary_check=(0, 1))
+
+ # [BT, BK] @ [BK, BV] -> [BT, BV]
+ b_o += tl.dot(b_q, b_h)
+ # [BT, BK] @ [BK, BT] -> [BT, BT]
+ b_A += tl.dot(b_q, b_k)
+
+ if USE_G:
+ g += bos * H + i_h
+ p_g = tl.make_block_ptr(g, (T,), (H,), (i_t * BT,), (BT,), (0,))
+ b_g = tl.load(p_g, boundary_check=(0,))
+ b_o = b_o * exp(b_g)[:, None]
+ b_A = b_A * safe_exp(b_g[:, None] - b_g[None, :])
+
+ o_i = tl.arange(0, BT)
+ m_A = o_i[:, None] >= o_i[None, :]
+ b_A = tl.where(m_A, b_A, 0)
+
+ p_v = tl.make_block_ptr(
+ v, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)
+ )
+ p_o = tl.make_block_ptr(
+ o, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)
+ )
+ b_v = tl.load(p_v, boundary_check=(0, 1))
+
+ # to fix mma -> mma layout conversion
+ # already solved by triton v3.2 or higher
+ b_o = b_o * scale + tl.dot(b_A.to(b_v.dtype), b_v) * scale
+ tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1))
+
+
+def chunk_fwd_o(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ h: torch.Tensor,
+ g: Optional[torch.Tensor] = None, # cumsum of log decay
+ scale: Optional[float] = None,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ chunk_size: int = 64,
+) -> torch.Tensor:
+ B, T, Hg, K, V = *q.shape, v.shape[-1]
+ H = v.shape[-2]
+ BT = min(chunk_size, max(16, triton.next_power_of_2(T)))
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
+ )
+ NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
+ if scale is None:
+ scale = k.shape[-1] ** -0.5
+
+ o = torch.empty_like(v)
+
+ def grid(meta):
+ return (triton.cdiv(V, meta["BV"]), NT, B * H)
+
+ chunk_fwd_kernel_o[grid](
+ q,
+ k,
+ v,
+ h,
+ g,
+ o,
+ cu_seqlens,
+ chunk_indices,
+ scale,
+ T=T,
+ H=H,
+ Hg=Hg,
+ K=K,
+ V=V,
+ BT=BT,
+ BK=128,
+ BV=64,
+ num_warps=4,
+ num_stages=2,
+ )
+ return o
diff --git a/python/sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py b/python/sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py
new file mode 100644
index 00000000000..699350d3174
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py
@@ -0,0 +1,151 @@
+# Adapted from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/common/chunk_scaled_dot_kkt.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
+from sglang.srt.layers.attention.fla.op import safe_exp
+
+
+@triton.heuristics(
+ {
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ "USE_G": lambda args: args["g_cumsum"] is not None,
+ }
+)
+# @triton.autotune(
+# configs=[
+# triton.Config({"BK": BK}, num_warps=num_warps, num_stages=num_stages)
+# for BK in [32, 64, 128]
+# for num_warps in [2, 4, 8]
+# for num_stages in [2, 3, 4]
+# ],
+# key=["H", "K", "BT", "IS_VARLEN"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def chunk_scaled_dot_kkt_fwd_kernel(
+ k,
+ beta,
+ g_cumsum,
+ A,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ H: tl.constexpr,
+ Hg: tl.constexpr,
+ K: tl.constexpr,
+ BT: tl.constexpr,
+ BK: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+ USE_G: tl.constexpr,
+):
+ i_t, i_bh = tl.program_id(0), tl.program_id(1)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+ o_t = tl.arange(0, BT)
+
+ p_beta = tl.make_block_ptr(
+ beta + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,)
+ )
+ b_beta = tl.load(p_beta, boundary_check=(0,))
+
+ b_A = tl.zeros([BT, BT], dtype=tl.float32)
+ for i_k in range(tl.cdiv(K, BK)):
+ p_k = tl.make_block_ptr(
+ k + (bos * Hg + i_h // (H // Hg)) * K,
+ (T, K),
+ (Hg * K, 1),
+ (i_t * BT, i_k * BK),
+ (BT, BK),
+ (1, 0),
+ )
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ b_kb = b_k * b_beta[:, None]
+ b_A += tl.dot(b_kb.to(b_k.dtype), tl.trans(b_k))
+
+ if USE_G:
+ p_g = tl.make_block_ptr(
+ g_cumsum + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,)
+ )
+ b_g = tl.load(p_g, boundary_check=(0,))
+ b_g_diff = b_g[:, None] - b_g[None, :]
+ b_A = b_A * safe_exp(b_g_diff)
+
+ b_A = tl.where(o_t[:, None] > o_t[None, :], b_A, 0)
+ p_A = tl.make_block_ptr(
+ A + (bos * H + i_h) * BT, (T, BT), (BT * H, 1), (i_t * BT, 0), (BT, BT), (1, 0)
+ )
+ tl.store(p_A, b_A.to(p_A.dtype.element_ty), boundary_check=(0, 1))
+
+
+def chunk_scaled_dot_kkt_fwd(
+ k: torch.Tensor,
+ beta: torch.Tensor,
+ g_cumsum: Optional[torch.Tensor] = None,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ chunk_size: int = 64,
+ output_dtype: torch.dtype = torch.float32,
+) -> torch.Tensor:
+ r"""
+ Compute beta * K * K^T.
+
+ Args:
+ k (torch.Tensor):
+ The key tensor of shape `[B, T, H, K]`.
+ beta (torch.Tensor):
+ The beta tensor of shape `[B, T, H]`.
+ g_cumsum (torch.Tensor):
+ The cumulative sum of the gate tensor of shape `[B, T, H]`.
+ Default: None
+ cu_seqlens (torch.LongTensor):
+ The cumulative sequence lengths of the input tensor.
+ Default: None
+ chunk_size (int):
+ The chunk size. Default: 64.
+ output_dtype (torch.dtype):
+ The dtype of the output tensor. Default: `torch.float32`
+
+ Returns:
+ beta * K * K^T of shape `[B, T, H, BT]` where `BT` is the chunk size.
+ """
+
+ B, T, Hg, K = k.shape
+
+ H = beta.shape[-1]
+ BT = chunk_size
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
+ )
+ NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
+ A = torch.empty(B, T, H, BT, device=k.device, dtype=output_dtype)
+ chunk_scaled_dot_kkt_fwd_kernel[(NT, B * H)](
+ k=k,
+ beta=beta,
+ g_cumsum=g_cumsum,
+ A=A,
+ cu_seqlens=cu_seqlens,
+ chunk_indices=chunk_indices,
+ T=T,
+ H=H,
+ Hg=Hg,
+ K=K,
+ BT=BT,
+ BK=64,
+ num_warps=8,
+ num_stages=3,
+ )
+ return A
diff --git a/python/sglang/srt/layers/attention/fla/cumsum.py b/python/sglang/srt/layers/attention/fla/cumsum.py
new file mode 100644
index 00000000000..b8e3cdde1e7
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/cumsum.py
@@ -0,0 +1,300 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/utils/cumsum.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
+from sglang.srt.layers.attention.fla.utils import check_shared_mem, input_guard
+
+BS_LIST = [32, 64] if check_shared_mem() else [16, 32]
+
+
+@triton.heuristics(
+ {
+ "HAS_SCALE": lambda args: args["scale"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ }
+)
+# @triton.autotune(
+# configs=[triton.Config({}, num_warps=num_warps) for num_warps in [1, 2, 4, 8]],
+# key=["B", "H", "BT", "IS_VARLEN", "REVERSE"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def chunk_local_cumsum_scalar_kernel(
+ s,
+ o,
+ scale,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ B: tl.constexpr,
+ H: tl.constexpr,
+ BT: tl.constexpr,
+ REVERSE: tl.constexpr,
+ HAS_SCALE: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+ HEAD_FIRST: tl.constexpr,
+):
+ i_t, i_bh = tl.program_id(0), tl.program_id(1)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+
+ if HEAD_FIRST:
+ p_s = tl.make_block_ptr(
+ s + bos * H + i_h * T, (T,), (1,), (i_t * BT,), (BT,), (0,)
+ )
+ p_o = tl.make_block_ptr(
+ o + bos * H + i_h * T, (T,), (1,), (i_t * BT,), (BT,), (0,)
+ )
+ else:
+ p_s = tl.make_block_ptr(s + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,))
+ p_o = tl.make_block_ptr(o + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,))
+ # [BT]
+ b_s = tl.load(p_s, boundary_check=(0,)).to(tl.float32)
+ b_o = tl.cumsum(b_s, axis=0)
+ if REVERSE:
+ b_z = tl.sum(b_s, axis=0)
+ b_o = -b_o + b_z[None] + b_s
+ if HAS_SCALE:
+ b_o *= scale
+ tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0,))
+
+
+@triton.heuristics(
+ {
+ "HAS_SCALE": lambda args: args["scale"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ }
+)
+@triton.autotune(
+ configs=[
+ triton.Config({"BS": BS}, num_warps=num_warps)
+ for BS in BS_LIST
+ for num_warps in [2, 4, 8]
+ ],
+ key=["B", "H", "S", "BT", "IS_VARLEN", "REVERSE"],
+)
+@triton.jit(do_not_specialize=["T"])
+def chunk_local_cumsum_vector_kernel(
+ s,
+ o,
+ scale,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ B: tl.constexpr,
+ H: tl.constexpr,
+ S: tl.constexpr,
+ BT: tl.constexpr,
+ BS: tl.constexpr,
+ REVERSE: tl.constexpr,
+ HAS_SCALE: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+ HEAD_FIRST: tl.constexpr,
+):
+ i_s, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+
+ o_i = tl.arange(0, BT)
+ if REVERSE:
+ m_s = tl.where(o_i[:, None] <= o_i[None, :], 1.0, 0.0)
+ else:
+ m_s = tl.where(o_i[:, None] >= o_i[None, :], 1.0, 0.0)
+
+ if HEAD_FIRST:
+ p_s = tl.make_block_ptr(
+ s + (bos * H + i_h * T) * S,
+ (T, S),
+ (S, 1),
+ (i_t * BT, i_s * BS),
+ (BT, BS),
+ (1, 0),
+ )
+ p_o = tl.make_block_ptr(
+ o + (bos * H + i_h * T) * S,
+ (T, S),
+ (S, 1),
+ (i_t * BT, i_s * BS),
+ (BT, BS),
+ (1, 0),
+ )
+ else:
+ p_s = tl.make_block_ptr(
+ s + (bos * H + i_h) * S,
+ (T, S),
+ (H * S, 1),
+ (i_t * BT, i_s * BS),
+ (BT, BS),
+ (1, 0),
+ )
+ p_o = tl.make_block_ptr(
+ o + (bos * H + i_h) * S,
+ (T, S),
+ (H * S, 1),
+ (i_t * BT, i_s * BS),
+ (BT, BS),
+ (1, 0),
+ )
+ # [BT, BS]
+ b_s = tl.load(p_s, boundary_check=(0, 1)).to(tl.float32)
+ b_o = tl.dot(m_s, b_s, allow_tf32=False)
+ if HAS_SCALE:
+ b_o *= scale
+ tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1))
+
+
+def chunk_local_cumsum_scalar(
+ g: torch.Tensor,
+ chunk_size: int,
+ reverse: bool = False,
+ scale: float = None,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ head_first: bool = False,
+ output_dtype: Optional[torch.dtype] = torch.float,
+) -> torch.Tensor:
+ if head_first:
+ B, H, T = g.shape
+ else:
+ B, T, H = g.shape
+ assert chunk_size == 2 ** (
+ chunk_size.bit_length() - 1
+ ), "chunk_size must be a power of 2"
+ BT = chunk_size
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
+ )
+ NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
+ g_org, g = g, torch.empty_like(g, dtype=output_dtype or g.dtype)
+ grid = (NT, B * H)
+ chunk_local_cumsum_scalar_kernel[grid](
+ s=g_org,
+ o=g,
+ scale=scale,
+ cu_seqlens=cu_seqlens,
+ chunk_indices=chunk_indices,
+ T=T,
+ B=B,
+ H=H,
+ BT=BT,
+ HEAD_FIRST=head_first,
+ REVERSE=reverse,
+ num_warps=8,
+ num_stages=3,
+ )
+ return g
+
+
+def chunk_local_cumsum_vector(
+ g: torch.Tensor,
+ chunk_size: int,
+ reverse: bool = False,
+ scale: float = None,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ head_first: bool = False,
+ output_dtype: Optional[torch.dtype] = torch.float,
+) -> torch.Tensor:
+ if head_first:
+ B, H, T, S = g.shape
+ else:
+ B, T, H, S = g.shape
+ BT = chunk_size
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, chunk_size)
+ if cu_seqlens is not None
+ else None
+ )
+ NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
+ assert chunk_size == 2 ** (
+ chunk_size.bit_length() - 1
+ ), "chunk_size must be a power of 2"
+
+ g_org, g = g, torch.empty_like(g, dtype=output_dtype or g.dtype)
+
+ def grid(meta):
+ return (triton.cdiv(meta["S"], meta["BS"]), NT, B * H)
+
+ # keep cumulative normalizer in fp32
+ # this kernel is equivalent to
+ # g = g.view(B, H, NT, BT, -1).cumsum(-2).view(B, H, T, -1)
+ chunk_local_cumsum_vector_kernel[grid](
+ s=g_org,
+ o=g,
+ scale=scale,
+ cu_seqlens=cu_seqlens,
+ chunk_indices=chunk_indices,
+ T=T,
+ B=B,
+ H=H,
+ S=S,
+ BT=BT,
+ HEAD_FIRST=head_first,
+ REVERSE=reverse,
+ )
+ return g
+
+
+@input_guard
+def chunk_local_cumsum(
+ g: torch.Tensor,
+ chunk_size: int,
+ reverse: bool = False,
+ scale: float = None,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ head_first: bool = False,
+ output_dtype: Optional[torch.dtype] = torch.float,
+ **kwargs,
+) -> torch.Tensor:
+ if cu_seqlens is not None:
+ assert (
+ g.shape[0] == 1
+ ), "Only batch size 1 is supported when cu_seqlens are provided"
+ if len(g.shape) == 3:
+ return chunk_local_cumsum_scalar(
+ g=g,
+ chunk_size=chunk_size,
+ reverse=reverse,
+ scale=scale,
+ cu_seqlens=cu_seqlens,
+ head_first=head_first,
+ output_dtype=output_dtype,
+ )
+ elif len(g.shape) == 4:
+ return chunk_local_cumsum_vector(
+ g=g,
+ chunk_size=chunk_size,
+ reverse=reverse,
+ scale=scale,
+ cu_seqlens=cu_seqlens,
+ head_first=head_first,
+ output_dtype=output_dtype,
+ )
+ else:
+ raise ValueError(
+ f"Unsupported input shape {g.shape}, "
+ f"which should be (B, T, H, D) if `head_first=False` "
+ f"or (B, H, T, D) otherwise"
+ )
diff --git a/python/sglang/srt/layers/attention/fla/fused_recurrent.py b/python/sglang/srt/layers/attention/fla/fused_recurrent.py
new file mode 100644
index 00000000000..fa7262ce294
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/fused_recurrent.py
@@ -0,0 +1,640 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/gated_delta_rule/fused_recurrent.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional, Tuple
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.op import exp
+from sglang.srt.layers.attention.fla.utils import input_guard
+
+
+@triton.heuristics(
+ {
+ "USE_INITIAL_STATE": lambda args: args["h0"] is not None,
+ "STORE_FINAL_STATE": lambda args: args["ht"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ }
+)
+@triton.jit(do_not_specialize=["T"])
+def fused_recurrent_gated_delta_rule_fwd_kernel(
+ q,
+ k,
+ v,
+ g,
+ beta,
+ o,
+ h0,
+ ht,
+ cu_seqlens,
+ scale,
+ T,
+ B: tl.constexpr,
+ H: tl.constexpr,
+ HV: tl.constexpr,
+ K: tl.constexpr,
+ V: tl.constexpr,
+ BK: tl.constexpr,
+ BV: tl.constexpr,
+ USE_INITIAL_STATE: tl.constexpr, # whether to use initial state
+ STORE_FINAL_STATE: tl.constexpr, # whether to store final state
+ IS_BETA_HEADWISE: tl.constexpr, # whether beta is headwise vector or scalar,
+ USE_QK_L2NORM_IN_KERNEL: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+ i_n, i_hv = i_nh // HV, i_nh % HV
+ i_h = i_hv // (HV // H)
+ if IS_VARLEN:
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int64), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int64)
+ all = T
+ T = eos - bos
+ else:
+ bos, eos = i_n * T, i_n * T + T
+ all = B * T
+ o_k = i_k * BK + tl.arange(0, BK)
+ o_v = i_v * BV + tl.arange(0, BV)
+
+ p_q = q + (bos * H + i_h) * K + o_k
+ p_k = k + (bos * H + i_h) * K + o_k
+ p_v = v + (bos * HV + i_hv) * V + o_v
+ if IS_BETA_HEADWISE:
+ p_beta = beta + (bos * HV + i_hv) * V + o_v
+ else:
+ p_beta = beta + bos * HV + i_hv
+ p_g = g + bos * HV + i_hv
+ p_o = o + ((i_k * all + bos) * HV + i_hv) * V + o_v
+
+ mask_k = o_k < K
+ mask_v = o_v < V
+ mask_h = mask_k[:, None] & mask_v[None, :]
+
+ b_h = tl.zeros([BK, BV], dtype=tl.float32)
+ if USE_INITIAL_STATE:
+ p_h0 = h0 + i_nh * K * V + o_k[:, None] * V + o_v[None, :]
+ b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32)
+
+ for _ in range(0, T):
+ b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32)
+ b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32)
+ b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32)
+ b_g = tl.load(p_g).to(tl.float32)
+
+ if USE_QK_L2NORM_IN_KERNEL:
+ b_q = b_q / (tl.sqrt(tl.sum(b_q * b_q)) + 1e-6)
+ b_k = b_k / (tl.sqrt(tl.sum(b_k * b_k)) + 1e-6)
+ b_q = b_q * scale
+ # [BK, BV]
+ b_h *= exp(b_g)
+ # [BV]
+ b_v -= tl.sum(b_h * b_k[:, None], 0)
+ if IS_BETA_HEADWISE:
+ b_beta = tl.load(p_beta, mask=mask_v, other=0).to(tl.float32)
+ else:
+ b_beta = tl.load(p_beta).to(tl.float32)
+ b_v *= b_beta
+ # [BK, BV]
+ b_h += b_k[:, None] * b_v[None, :]
+ # [BV]
+ b_o = tl.sum(b_h * b_q[:, None], 0)
+ tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v)
+
+ p_q += H * K
+ p_k += H * K
+ p_o += HV * V
+ p_v += HV * V
+ p_g += HV
+ p_beta += HV * (V if IS_BETA_HEADWISE else 1)
+
+ if STORE_FINAL_STATE:
+ p_ht = ht + i_nh * K * V + o_k[:, None] * V + o_v[None, :]
+ tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask_h)
+
+
+def fused_recurrent_gated_delta_rule_fwd(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float,
+ initial_state: torch.Tensor,
+ output_final_state: bool,
+ use_qk_l2norm_in_kernel: bool = False,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ B, T, H, K, V = *k.shape, v.shape[-1]
+ HV = v.shape[2]
+ N = B if cu_seqlens is None else len(cu_seqlens) - 1
+ BK, BV = triton.next_power_of_2(K), min(triton.next_power_of_2(V), 8)
+ NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV)
+ assert NK == 1, "NK > 1 is not supported yet"
+ num_stages = 3
+ num_warps = 1
+
+ o = q.new_empty(NK, *v.shape)
+ if output_final_state:
+ final_state = q.new_empty(N, HV, K, V, dtype=torch.float32)
+ else:
+ final_state = None
+
+ grid = (NK, NV, N * HV)
+ fused_recurrent_gated_delta_rule_fwd_kernel[grid](
+ q=q,
+ k=k,
+ v=v,
+ g=g,
+ beta=beta,
+ o=o,
+ h0=initial_state,
+ ht=final_state,
+ cu_seqlens=cu_seqlens,
+ scale=scale,
+ T=T,
+ B=B,
+ H=H,
+ HV=HV,
+ K=K,
+ V=V,
+ BK=BK,
+ BV=BV,
+ IS_BETA_HEADWISE=beta.ndim == v.ndim,
+ USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel,
+ num_warps=num_warps,
+ num_stages=num_stages,
+ )
+ o = o.squeeze(0)
+ return o, final_state
+
+
+class FusedRecurrentFunction(torch.autograd.Function):
+
+ @staticmethod
+ @input_guard
+ def forward(
+ ctx,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float,
+ initial_state: torch.Tensor,
+ output_final_state: bool,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ use_qk_l2norm_in_kernel: bool = False,
+ ):
+ o, final_state = fused_recurrent_gated_delta_rule_fwd(
+ q=q,
+ k=k,
+ v=v,
+ g=g,
+ beta=beta,
+ scale=scale,
+ initial_state=initial_state,
+ output_final_state=output_final_state,
+ use_qk_l2norm_in_kernel=use_qk_l2norm_in_kernel,
+ cu_seqlens=cu_seqlens,
+ )
+
+ return o, final_state
+
+ @staticmethod
+ @input_guard
+ def backward(ctx, do, dht):
+ raise NotImplementedError(
+ "Backward pass is not implemented yet and we do not have plans to implement it "
+ "because we haven't figured out how to compute dg without materializing the full "
+ "hidden states for all time steps."
+ )
+
+
+def fused_recurrent_gated_delta_rule(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor = None,
+ scale: float = None,
+ initial_state: torch.Tensor = None,
+ output_final_state: bool = False,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ use_qk_l2norm_in_kernel: bool = False,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ r"""
+ Args:
+ q (torch.Tensor):
+ queries of shape `[B, T, H, K]`.
+ k (torch.Tensor):
+ keys of shape `[B, T, H, K]`.
+ v (torch.Tensor):
+ values of shape `[B, T, HV, V]`.
+ GVA is applied if `HV > H`.
+ g (torch.Tensor):
+ g (decays) of shape `[B, T, HV]`.
+ beta (torch.Tensor):
+ betas of shape `[B, T, HV]`.
+ scale (Optional[int]):
+ Scale factor for the RetNet attention scores.
+ If not provided, it will default to `1 / sqrt(K)`. Default: `None`.
+ initial_state (Optional[torch.Tensor]):
+ Initial state of shape `[N, HV, K, V]` for `N` input sequences.
+ For equal-length input sequences, `N` equals the batch size `B`.
+ Default: `None`.
+ output_final_state (Optional[bool]):
+ Whether to output the final state of shape `[N, HV, K, V]`. Default: `False`.
+ cu_seqlens (torch.LongTensor):
+ Cumulative sequence lengths of shape `[N+1]` used for variable-length training,
+ consistent with the FlashAttention API.
+ Returns:
+ o (torch.Tensor):
+ Outputs of shape `[B, T, HV, V]`.
+ final_state (torch.Tensor):
+ Final state of shape `[N, HV, K, V]` if `output_final_state=True` else `None`.
+ Examples::
+ >>> import torch
+ >>> import torch.nn.functional as F
+ >>> from einops import rearrange
+ >>> from fla.ops.gated_delta_rule import fused_recurrent_gated_delta_rule
+ # inputs with equal lengths
+ >>> B, T, H, HV, K, V = 4, 2048, 4, 8, 512, 512
+ >>> q = torch.randn(B, T, H, K, device='cuda')
+ >>> k = F.normalize(torch.randn(B, T, H, K, device='cuda'), p=2, dim=-1)
+ >>> v = torch.randn(B, T, HV, V, device='cuda')
+ >>> g = F.logsigmoid(torch.rand(B, T, HV, device='cuda'))
+ >>> beta = torch.rand(B, T, HV, device='cuda').sigmoid()
+ >>> h0 = torch.randn(B, HV, K, V, device='cuda')
+ >>> o, ht = fused_gated_recurrent_delta_rule(
+ q, k, v, g, beta,
+ initial_state=h0,
+ output_final_state=True
+ )
+ # for variable-length inputs, the batch size `B` is expected to be 1 and `cu_seqlens` is required
+ >>> q, k, v, g, beta = map(lambda x: rearrange(x, 'b t ... -> 1 (b t) ...'), (q, k, v, g, beta))
+ # for a batch with 4 sequences, `cu_seqlens` with 5 start/end positions are expected
+ >>> cu_seqlens = q.new_tensor([0, 2048, 4096, 6144, 8192], dtype=torch.long)
+ >>> o_var, ht_var = fused_gated_recurrent_delta_rule(
+ q, k, v, g, beta,
+ initial_state=h0,
+ output_final_state=True,
+ cu_seqlens=cu_seqlens
+ )
+ """
+ if cu_seqlens is not None:
+ if q.shape[0] != 1:
+ raise ValueError(
+ f"The batch size is expected to be 1 rather than {q.shape[0]} when using `cu_seqlens`."
+ f"Please flatten variable-length inputs before processing."
+ )
+ if initial_state is not None and initial_state.shape[0] != len(cu_seqlens) - 1:
+ raise ValueError(
+ f"The number of initial states is expected to be equal to the number of input sequences, "
+ f"i.e., {len(cu_seqlens) - 1} rather than {initial_state.shape[0]}."
+ )
+ if scale is None:
+ scale = k.shape[-1] ** -0.5
+ else:
+ assert scale > 0, "scale must be positive"
+ if beta is None:
+ beta = torch.ones_like(q[..., 0])
+ o, final_state = FusedRecurrentFunction.apply(
+ q,
+ k,
+ v,
+ g,
+ beta,
+ scale,
+ initial_state,
+ output_final_state,
+ cu_seqlens,
+ use_qk_l2norm_in_kernel,
+ )
+ return o, final_state
+
+
+@triton.heuristics(
+ {
+ "USE_INITIAL_STATE": lambda args: args["h0_source"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ "CACHE_INTERMEDIATE_STATES": lambda args: args["intermediate_states_buffer"]
+ is not None,
+ }
+)
+@triton.jit(do_not_specialize=["T"])
+def fused_recurrent_gated_delta_rule_update_fwd_kernel(
+ q,
+ k,
+ v,
+ g,
+ beta,
+ o,
+ h0_source,
+ h0_indices,
+ cu_seqlens,
+ scale,
+ intermediate_states_buffer,
+ cache_steps,
+ T,
+ B: tl.constexpr,
+ H: tl.constexpr,
+ HV: tl.constexpr,
+ K: tl.constexpr,
+ V: tl.constexpr,
+ BK: tl.constexpr,
+ BV: tl.constexpr,
+ USE_INITIAL_STATE: tl.constexpr, # whether to use initial state
+ IS_BETA_HEADWISE: tl.constexpr, # whether beta is headwise vector or scalar,
+ USE_QK_L2NORM_IN_KERNEL: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+ DISABLE_STATE_UPDATE: tl.constexpr, # whether to disable final state update
+ DISABLE_OUTPUT_CALCULATION: tl.constexpr, # whether to disable output calculation
+ CACHE_INTERMEDIATE_STATES: tl.constexpr,
+):
+ i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+ i_n, i_hv = i_nh // HV, i_nh % HV
+ i_h = i_hv // (HV // H)
+ if IS_VARLEN:
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int64), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int64)
+ all = T
+ T = eos - bos
+ else:
+ bos, eos = i_n * T, i_n * T + T
+ all = B * T
+ o_k = i_k * BK + tl.arange(0, BK)
+ o_v = i_v * BV + tl.arange(0, BV)
+
+ p_q = q + (bos * H + i_h) * K + o_k
+ p_k = k + (bos * H + i_h) * K + o_k
+ p_v = v + (bos * HV + i_hv) * V + o_v
+ if IS_BETA_HEADWISE:
+ p_beta = beta + (bos * HV + i_hv) * V + o_v
+ else:
+ p_beta = beta + bos * HV + i_hv
+ p_g = g + bos * HV + i_hv
+ p_o = o + ((i_k * all + bos) * HV + i_hv) * V + o_v
+
+ mask_k = o_k < K
+ mask_v = o_v < V
+ mask_h = mask_k[:, None] & mask_v[None, :]
+
+ b_h = tl.zeros([BK, BV], dtype=tl.float32)
+ if USE_INITIAL_STATE:
+ idx = tl.load(h0_indices + i_n)
+ # Add bounds checking for idx
+ if idx >= 0: # Assuming negative indices are invalid
+ p_h0 = (
+ h0_source
+ + idx * HV * K * V
+ + i_hv * K * V
+ + o_k[:, None] * V
+ + o_v[None, :]
+ )
+ b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32)
+
+ # Prepare intermediate state cache variables if enabled
+ cache_idx = -1
+ if CACHE_INTERMEDIATE_STATES:
+ cache_idx = tl.load(h0_indices + i_n)
+
+ step_idx = 0
+ for _ in range(0, T):
+ b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32)
+ b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32)
+ b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32)
+ b_g = tl.load(p_g).to(tl.float32)
+
+ if USE_QK_L2NORM_IN_KERNEL:
+ b_q = b_q / (tl.sqrt(tl.sum(b_q * b_q)) + 1e-6)
+ b_k = b_k / (tl.sqrt(tl.sum(b_k * b_k)) + 1e-6)
+ b_q = b_q * scale
+ # [BK, BV]
+ b_h *= exp(b_g)
+ # [BV]
+ b_v -= tl.sum(b_h * b_k[:, None], 0)
+ if IS_BETA_HEADWISE:
+ b_beta = tl.load(p_beta, mask=mask_v, other=0).to(tl.float32)
+ else:
+ b_beta = tl.load(p_beta).to(tl.float32)
+ b_v *= b_beta
+ # [BK, BV]
+ b_h += b_k[:, None] * b_v[None, :]
+ # [BV]
+ if not DISABLE_OUTPUT_CALCULATION:
+ b_o = tl.sum(b_h * b_q[:, None], 0)
+ # core attn output
+ tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v)
+
+ # store intermediate states if enabled
+ if CACHE_INTERMEDIATE_STATES:
+ if cache_idx >= 0:
+ # Compute cache pointer for this step
+ step_offset = step_idx * HV * K * V
+ cache_ptr = (
+ intermediate_states_buffer
+ + cache_idx * cache_steps * HV * K * V
+ + step_offset
+ + i_hv * K * V
+ + o_k[:, None] * V
+ + o_v[None, :]
+ )
+ tl.store(cache_ptr, b_h.to(cache_ptr.dtype.element_ty), mask=mask_h)
+
+ step_idx += 1
+
+ p_q += H * K
+ p_k += H * K
+ p_o += HV * V
+ p_v += HV * V
+ p_g += HV
+ p_beta += HV * (V if IS_BETA_HEADWISE else 1)
+
+ # Store final state back to h0_source with bounds checking
+ # ssm states
+ if not DISABLE_STATE_UPDATE:
+ idx = tl.load(h0_indices + i_n)
+ if idx >= 0: # Add bounds checking
+ p_h0 = (
+ h0_source
+ + idx * HV * K * V
+ + i_hv * K * V
+ + o_k[:, None] * V
+ + o_v[None, :]
+ )
+ tl.store(p_h0, b_h.to(p_h0.dtype.element_ty), mask=mask_h)
+
+
+def fused_recurrent_gated_delta_rule_update_fwd(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float,
+ initial_state_source: torch.Tensor,
+ initial_state_indices: torch.Tensor,
+ use_qk_l2norm_in_kernel: bool = False,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ disable_state_update: bool = False,
+ disable_output_calculation: bool = False,
+ intermediate_states_buffer: Optional[torch.Tensor] = None,
+ cache_steps: Optional[int] = None,
+) -> torch.Tensor:
+ B, T, H, K, V = *k.shape, v.shape[-1]
+ HV = v.shape[2]
+ N = B if cu_seqlens is None else len(cu_seqlens) - 1
+ BK, BV = triton.next_power_of_2(K), min(triton.next_power_of_2(V), 8)
+ NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV)
+ assert NK == 1, "NK > 1 is not supported yet"
+ num_stages = 3
+ num_warps = 1
+
+ if disable_output_calculation:
+ # When output calculation is disabled, allocate minimal tensor
+ o = q.new_empty(NK, 1, 1, 1, 1) # minimal allocation
+ else:
+ o = q.new_empty(NK, *v.shape)
+
+ grid = (NK, NV, N * HV)
+
+ fused_recurrent_gated_delta_rule_update_fwd_kernel[grid](
+ q=q,
+ k=k,
+ v=v,
+ g=g,
+ beta=beta,
+ o=o,
+ h0_source=initial_state_source,
+ h0_indices=initial_state_indices,
+ cu_seqlens=cu_seqlens,
+ scale=scale,
+ intermediate_states_buffer=intermediate_states_buffer,
+ cache_steps=0 if cache_steps is None else cache_steps,
+ T=T,
+ B=B,
+ H=H,
+ HV=HV,
+ K=K,
+ V=V,
+ BK=BK,
+ BV=BV,
+ IS_BETA_HEADWISE=beta.ndim == v.ndim,
+ USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel,
+ DISABLE_STATE_UPDATE=disable_state_update,
+ DISABLE_OUTPUT_CALCULATION=disable_output_calculation,
+ num_warps=num_warps,
+ num_stages=num_stages,
+ )
+ o = o.squeeze(0)
+ return o
+
+
+class FusedRecurrentUpdateFunction(torch.autograd.Function):
+
+ @staticmethod
+ @input_guard
+ def forward(
+ ctx,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor,
+ scale: float,
+ initial_state_source: torch.Tensor,
+ initial_state_indices: torch.Tensor,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ use_qk_l2norm_in_kernel: bool = False,
+ disable_state_update: bool = False,
+ disable_output_calculation: bool = False,
+ intermediate_states_buffer: Optional[torch.Tensor] = None,
+ cache_steps: Optional[int] = None,
+ ):
+ o = fused_recurrent_gated_delta_rule_update_fwd(
+ q=q,
+ k=k,
+ v=v,
+ g=g,
+ beta=beta,
+ scale=scale,
+ initial_state_source=initial_state_source,
+ initial_state_indices=initial_state_indices,
+ use_qk_l2norm_in_kernel=use_qk_l2norm_in_kernel,
+ cu_seqlens=cu_seqlens,
+ disable_state_update=disable_state_update,
+ disable_output_calculation=disable_output_calculation,
+ intermediate_states_buffer=intermediate_states_buffer,
+ cache_steps=cache_steps,
+ )
+
+ return o
+
+ @staticmethod
+ @input_guard
+ def backward(ctx, do, dht):
+ raise NotImplementedError(
+ "Backward pass is not implemented yet and we do not have plans to implement it "
+ "because we haven't figured out how to compute dg without materializing the full "
+ "hidden states for all time steps."
+ )
+
+
+def fused_recurrent_gated_delta_rule_update(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ g: torch.Tensor,
+ beta: torch.Tensor = None,
+ scale: float = None,
+ initial_state_source: torch.Tensor = None,
+ initial_state_indices: torch.Tensor = None,
+ cu_seqlens: Optional[torch.LongTensor] = None,
+ use_qk_l2norm_in_kernel: bool = False,
+ disable_state_update: bool = False,
+ disable_output_calculation: bool = False,
+ intermediate_states_buffer: Optional[torch.Tensor] = None,
+ cache_steps: Optional[int] = None,
+) -> torch.Tensor:
+ if cu_seqlens is not None:
+ if q.shape[0] != 1:
+ raise ValueError(
+ f"The batch size is expected to be 1 rather than {q.shape[0]} when using `cu_seqlens`."
+ f"Please flatten variable-length inputs before processing."
+ )
+ if (
+ initial_state_source is not None
+ and initial_state_indices.shape[0] != len(cu_seqlens) - 1
+ ):
+ raise ValueError(
+ f"The number of initial states is expected to be equal to the number of input sequences, "
+ f"i.e., {len(cu_seqlens) - 1} rather than {initial_state_indices.shape[0]}."
+ )
+ if scale is None:
+ scale = k.shape[-1] ** -0.5
+ else:
+ assert scale > 0, "scale must be positive"
+ if beta is None:
+ beta = torch.ones_like(q[..., 0])
+ o = FusedRecurrentUpdateFunction.apply(
+ q,
+ k,
+ v,
+ g,
+ beta,
+ scale,
+ initial_state_source,
+ initial_state_indices,
+ cu_seqlens,
+ use_qk_l2norm_in_kernel,
+ disable_state_update,
+ disable_output_calculation,
+ intermediate_states_buffer,
+ cache_steps,
+ )
+ return o
diff --git a/python/sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py b/python/sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py
new file mode 100644
index 00000000000..41837b980e3
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py
@@ -0,0 +1,232 @@
+from typing import Optional
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.utils import input_guard
+
+
+@triton.heuristics(
+ {
+ "USE_INITIAL_STATE": lambda args: args["h0_source"] is not None,
+ "IS_VARLEN": lambda args: args["cu_seqlens"] is not None,
+ }
+)
+@triton.jit(do_not_specialize=["T"])
+def fused_sigmoid_gating_delta_rule_update_kernel(
+ A_log,
+ a,
+ dt_bias,
+ softplus_beta,
+ softplus_threshold,
+ q,
+ k,
+ v,
+ b,
+ o,
+ h0_source,
+ h0_indices,
+ cu_seqlens,
+ scale,
+ T,
+ B: tl.constexpr,
+ H: tl.constexpr,
+ HV: tl.constexpr,
+ K: tl.constexpr,
+ V: tl.constexpr,
+ BK: tl.constexpr,
+ BV: tl.constexpr,
+ USE_INITIAL_STATE: tl.constexpr,
+ USE_QK_L2NORM_IN_KERNEL: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ """
+ Fused kernel that combines sigmoid gating computation with recurrent delta rule update.
+ """
+ i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+ i_n, i_hv = i_nh // HV, i_nh % HV
+ i_h = i_hv // (HV // H)
+
+ if IS_VARLEN:
+ bos, eos = (
+ tl.load(cu_seqlens + i_n).to(tl.int64),
+ tl.load(cu_seqlens + i_n + 1).to(tl.int64),
+ )
+ all = T
+ T = eos - bos
+ else:
+ bos, eos = i_n * T, i_n * T + T
+ all = B * T
+
+ o_k = i_k * BK + tl.arange(0, BK)
+ o_v = i_v * BV + tl.arange(0, BV)
+
+ p_q = q + (bos * H + i_h) * K + o_k
+ p_k = k + (bos * H + i_h) * K + o_k
+ p_v = v + (bos * HV + i_hv) * V + o_v
+ p_b = b + bos * HV + i_hv
+ p_o = o + ((i_k * all + bos) * HV + i_hv) * V + o_v
+
+ # Gating computation pointers
+ p_A_log = A_log + i_hv
+ p_a = a + bos * HV + i_hv
+ p_dt_bias = dt_bias + i_hv
+
+ mask_k = o_k < K
+ mask_v = o_v < V
+ mask_h = mask_k[:, None] & mask_v[None, :]
+
+ b_h = tl.zeros([BK, BV], dtype=tl.float32)
+ if USE_INITIAL_STATE:
+ idx = tl.load(h0_indices + i_n)
+ if idx >= 0:
+ p_h0 = (
+ h0_source
+ + idx * HV * K * V
+ + i_hv * K * V
+ + o_k[:, None] * V
+ + o_v[None, :]
+ )
+ b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32)
+
+ for _ in range(0, T):
+ # Load inputs
+ b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32)
+ b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32)
+ b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32)
+ b_b = tl.load(p_b).to(tl.float32)
+
+ # Compute sigmoid gating
+ # Load gating parameters
+ b_A_log = tl.load(p_A_log).to(tl.float32)
+ b_a = tl.load(p_a).to(tl.float32)
+ b_dt_bias = tl.load(p_dt_bias).to(tl.float32)
+
+ # Compute g = -exp(A_log) * softplus(a + dt_bias)
+ x = b_a + b_dt_bias
+ beta_x = softplus_beta * x
+ # Apply softplus with numerical stability
+ softplus_x = tl.where(
+ beta_x <= softplus_threshold,
+ (1.0 / softplus_beta) * tl.log(1.0 + tl.exp(beta_x)),
+ x,
+ )
+ b_g = -tl.exp(b_A_log) * softplus_x
+
+ # Compute beta = sigmoid(b)
+ b_beta = 1.0 / (1.0 + tl.exp(-b_b))
+
+ # Apply L2 normalization if enabled
+ if USE_QK_L2NORM_IN_KERNEL:
+ b_q = b_q / (tl.sqrt(tl.sum(b_q * b_q)) + 1e-6)
+ b_k = b_k / (tl.sqrt(tl.sum(b_k * b_k)) + 1e-6)
+
+ b_q = b_q * scale
+
+ # Apply gating to hidden state: h *= exp(g)
+ b_h *= tl.exp(b_g)
+
+ # Delta rule: v -= sum(h * k, dim=0)
+ b_v -= tl.sum(b_h * b_k[:, None], 0)
+
+ # Apply beta gating: v *= beta
+ b_v *= b_beta
+
+ # Update hidden state: h += k[:, None] * v[None, :]
+ b_h += b_k[:, None] * b_v[None, :]
+
+ # Compute output: o = sum(h * q, dim=0)
+ b_o = tl.sum(b_h * b_q[:, None], 0)
+ tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v)
+
+ # Update pointers for next timestep
+ p_q += H * K
+ p_k += H * K
+ p_o += HV * V
+ p_v += HV * V
+ p_b += HV
+ p_a += HV
+
+ # Store final state back to h0_source with bounds checking
+ if USE_INITIAL_STATE:
+ idx = tl.load(h0_indices + i_n)
+ if idx >= 0:
+ p_h0 = (
+ h0_source
+ + idx * HV * K * V
+ + i_hv * K * V
+ + o_k[:, None] * V
+ + o_v[None, :]
+ )
+ tl.store(p_h0, b_h.to(p_h0.dtype.element_ty), mask=mask_h)
+
+
+@input_guard
+def fused_sigmoid_gating_delta_rule_update(
+ A_log: torch.Tensor,
+ a: torch.Tensor,
+ dt_bias: torch.Tensor,
+ softplus_beta: float,
+ softplus_threshold: float,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ b: torch.Tensor,
+ initial_state_source: torch.Tensor,
+ initial_state_indices: torch.Tensor,
+ scale: Optional[float] = None,
+ use_qk_l2norm_in_kernel: bool = False,
+ cu_seqlens: Optional[torch.Tensor] = None,
+):
+ """
+ Fused triton implementation of sigmoid gating delta rule update.
+ This function uses a single fused kernel that combines both sigmoid gating computation
+ and the recurrent delta rule update for better performance.
+ """
+ B, T, H, K, V = *k.shape, v.shape[-1]
+ HV = v.shape[2]
+ N = B if cu_seqlens is None else len(cu_seqlens) - 1
+ BK, BV = triton.next_power_of_2(K), min(triton.next_power_of_2(V), 8)
+ NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV)
+ assert NK == 1, "NK > 1 is not supported yet"
+ num_stages = 3
+ num_warps = 1
+
+ if scale is None:
+ scale = k.shape[-1] ** -0.5
+ else:
+ assert scale > 0, "scale must be positive"
+
+ o = q.new_empty(NK, *v.shape)
+ grid = (NK, NV, N * HV)
+
+ fused_sigmoid_gating_delta_rule_update_kernel[grid](
+ A_log=A_log,
+ a=a,
+ dt_bias=dt_bias,
+ softplus_beta=softplus_beta,
+ softplus_threshold=softplus_threshold,
+ q=q,
+ k=k,
+ v=v,
+ b=b,
+ o=o,
+ h0_source=initial_state_source,
+ h0_indices=initial_state_indices,
+ cu_seqlens=cu_seqlens,
+ scale=scale,
+ T=T,
+ B=B,
+ H=H,
+ HV=HV,
+ K=K,
+ V=V,
+ BK=BK,
+ BV=BV,
+ USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel,
+ num_warps=num_warps,
+ num_stages=num_stages,
+ )
+ o = o.squeeze(0)
+ return o
diff --git a/python/sglang/srt/layers/attention/fla/index.py b/python/sglang/srt/layers/attention/fla/index.py
new file mode 100644
index 00000000000..754b9871462
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/index.py
@@ -0,0 +1,37 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/utils/index.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+import torch
+import torch.nn.functional as F
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.utils import tensor_cache
+
+
+@tensor_cache
+def prepare_lens(cu_seqlens: torch.LongTensor) -> torch.LongTensor:
+ return cu_seqlens[1:] - cu_seqlens[:-1]
+
+
+@tensor_cache
+def prepare_chunk_indices(
+ cu_seqlens: torch.LongTensor, chunk_size: int
+) -> torch.LongTensor:
+ indices = torch.cat(
+ [
+ torch.arange(n)
+ for n in triton.cdiv(prepare_lens(cu_seqlens), chunk_size).tolist()
+ ]
+ )
+ return torch.stack([indices.eq(0).cumsum(0) - 1, indices], 1).to(cu_seqlens)
+
+
+@tensor_cache
+def prepare_chunk_offsets(
+ cu_seqlens: torch.LongTensor, chunk_size: int
+) -> torch.LongTensor:
+ return torch.cat(
+ [cu_seqlens.new_tensor([0]), triton.cdiv(prepare_lens(cu_seqlens), chunk_size)]
+ ).cumsum(-1)
diff --git a/python/sglang/srt/layers/attention/fla/l2norm.py b/python/sglang/srt/layers/attention/fla/l2norm.py
new file mode 100644
index 00000000000..d6b6ae7f7d2
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/l2norm.py
@@ -0,0 +1,150 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/modules/l2norm.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional
+
+import torch
+import torch.nn as nn
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.utils import input_guard
+
+BT_LIST = [8, 16, 32, 64, 128]
+
+
+# @triton.autotune(
+# configs=[
+# triton.Config({}, num_warps=num_warps) for num_warps in [1, 2, 4, 8, 16, 32]
+# ],
+# key=["D"],
+# )
+@triton.jit
+def l2norm_fwd_kernel1(
+ x,
+ y,
+ D,
+ BD: tl.constexpr,
+ eps,
+):
+ i_t = tl.program_id(0)
+ x += i_t * D
+ y += i_t * D
+ # Compute mean and variance
+ cols = tl.arange(0, BD)
+ mask = cols < D
+ b_x = tl.load(x + cols, mask=mask, other=0.0).to(tl.float32)
+ b_var = tl.sum(b_x * b_x, axis=0)
+ b_rstd = 1 / tl.sqrt(b_var + eps)
+ # tl.store(Rstd + i_t, rstd)
+ # Normalize and apply linear transformation
+ b_y = b_x * b_rstd
+ tl.store(y + cols, b_y, mask=mask)
+
+
+# @triton.autotune(
+# configs=[
+# triton.Config({"BT": BT}, num_warps=num_warps)
+# for num_warps in [1, 2, 4, 8, 16]
+# for BT in BT_LIST
+# ],
+# key=["D", "NB"],
+# )
+@triton.jit
+def l2norm_fwd_kernel(
+ x,
+ y,
+ eps,
+ NB: tl.constexpr,
+ T: tl.constexpr,
+ D: tl.constexpr,
+ BT: tl.constexpr,
+ BD: tl.constexpr,
+):
+ i_t = tl.program_id(0)
+ p_x = tl.make_block_ptr(x, (T, D), (D, 1), (i_t * BT, 0), (BT, BD), (1, 0))
+ b_x = tl.load(p_x, boundary_check=(0, 1)).to(tl.float32)
+ b_var = tl.sum(b_x * b_x, axis=1)
+ b_y = b_x / tl.sqrt(b_var + eps)[:, None]
+ p_y = tl.make_block_ptr(y, (T, D), (D, 1), (i_t * BT, 0), (BT, BD), (1, 0))
+ tl.store(p_y, b_y.to(p_y.dtype.element_ty), boundary_check=(0, 1))
+
+
+def l2norm_fwd(
+ x: torch.Tensor, eps: float = 1e-6, output_dtype: Optional[torch.dtype] = None
+):
+ x_shape_og = x.shape
+ x = x.view(-1, x.shape[-1])
+ # allocate output
+ if output_dtype is None:
+ y = torch.empty_like(x)
+ else:
+ y = torch.empty_like(x, dtype=output_dtype)
+ assert y.stride(-1) == 1
+ T, D = x.shape[0], x.shape[-1]
+ # rstd = torch.empty((T,), dtype=torch.float32, device=x.device)
+ # Less than 64KB per feature: enqueue fused kernel
+ MAX_FUSED_SIZE = 65536 // x.element_size()
+ BD = min(MAX_FUSED_SIZE, triton.next_power_of_2(D))
+ if D > BD:
+ raise RuntimeError("This layer doesn't support feature dim >= 64KB.")
+
+ if D <= 512:
+ NB = triton.cdiv(T, 2048)
+
+ def grid(meta):
+ return (triton.cdiv(T, meta["BT"]),)
+
+ l2norm_fwd_kernel[grid](
+ x,
+ y,
+ eps,
+ NB=NB,
+ T=T,
+ D=D,
+ BD=BD,
+ BT=16,
+ num_warps=8,
+ num_stages=3,
+ )
+ else:
+ l2norm_fwd_kernel1[(T,)](
+ x,
+ y,
+ eps=eps,
+ D=D,
+ BD=BD,
+ num_warps=8,
+ num_stages=3,
+ )
+
+ return y.view(x_shape_og)
+
+
+class L2NormFunction(torch.autograd.Function):
+
+ @staticmethod
+ @input_guard
+ def forward(ctx, x, eps=1e-6, output_dtype=None):
+ return l2norm_fwd(x, eps, output_dtype)
+
+
+def l2norm(
+ x: torch.Tensor, eps: float = 1e-6, output_dtype: Optional[torch.dtype] = None
+) -> torch.Tensor:
+ return L2NormFunction.apply(x, eps, output_dtype)
+
+
+l2_norm = l2norm
+
+
+class L2Norm(nn.Module):
+
+ def __init__(self, eps: float = 1e-6, output_dtype: Optional[torch.dtype] = None):
+ super().__init__()
+ self.eps = eps
+ self.output_dtype = output_dtype
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ return l2norm(x, self.eps, self.output_dtype)
diff --git a/python/sglang/srt/layers/attention/fla/layernorm_gated.py b/python/sglang/srt/layers/attention/fla/layernorm_gated.py
new file mode 100644
index 00000000000..bd53d0d64b6
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/layernorm_gated.py
@@ -0,0 +1,326 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/modules/layernorm_gated.py
+# Copyright (c) 2024, Tri Dao.
+# Based on the Triton LayerNorm tutorial: https://triton-lang.org/main/getting-started/tutorials/05-layer-norm.html
+# For the backward pass, we keep weight_grad and bias_grad in registers and accumulate.
+# This backward pass is faster for dimensions up to 8k, but after that it's much slower due to register spilling.
+# The models we train have hidden dim up to 8k anyway (e.g. Llama 70B), so this is fine.
+
+import math
+
+import torch
+import torch.nn.functional as F
+import triton
+import triton.language as tl
+from einops import rearrange
+
+
+def rms_norm_ref(
+ x,
+ weight,
+ bias,
+ z=None,
+ eps=1e-6,
+ group_size=None,
+ norm_before_gate=True,
+ upcast=True,
+):
+ dtype = x.dtype
+ N = x.shape[-1]
+ weight = weight.float()
+ bias = bias.float() if bias is not None else None
+ if upcast:
+ x = x.float()
+ z = z.float() if z is not None else z
+ if z is not None and not norm_before_gate:
+ x = x * F.silu(z)
+ if group_size is None:
+ rstd = 1 / torch.sqrt((x.square()).mean(dim=-1, keepdim=True) + eps)
+ out = (x * rstd * weight) + bias if bias is not None else (x * rstd * weight)
+ else:
+ x_group = rearrange(x, "... (g d) -> ... g d", d=group_size)
+ rstd = 1 / torch.sqrt((x_group.square()).mean(dim=-1, keepdim=True) + eps)
+ out = rearrange(x_group * rstd, "... g d -> ... (g d)") * weight
+ if bias is not None:
+ out = out + bias
+ if z is not None and norm_before_gate:
+ out *= F.silu(z)
+ return out.to(dtype)
+
+
+@triton.heuristics({"HAS_BIAS": lambda args: args["B"] is not None})
+@triton.heuristics({"HAS_Z": lambda args: args["Z"] is not None})
+@triton.jit
+def _layer_norm_fwd_1pass_kernel(
+ X, # pointer to the input
+ Y, # pointer to the output
+ W, # pointer to the weights
+ B, # pointer to the biases
+ Z, # pointer to the other branch
+ Mean, # pointer to the mean
+ Rstd, # pointer to the 1/std
+ stride_x_row, # how much to increase the pointer when moving by 1 row
+ stride_y_row,
+ stride_z_row,
+ M, # number of rows in X
+ N, # number of columns in X
+ eps, # epsilon to avoid division by zero
+ BLOCK_N: tl.constexpr,
+ HAS_BIAS: tl.constexpr,
+ HAS_Z: tl.constexpr,
+ NORM_BEFORE_GATE: tl.constexpr,
+ IS_RMS_NORM: tl.constexpr,
+):
+ # Map the program id to the row of X and Y it should compute.
+ row = tl.program_id(0)
+ group = tl.program_id(1)
+ X += row * stride_x_row + group * N
+ Y += row * stride_y_row + group * N
+ if HAS_Z:
+ Z += row * stride_z_row + group * N
+ if not IS_RMS_NORM:
+ Mean += group * M
+ Rstd += group * M
+ W += group * N
+ if HAS_BIAS:
+ B += group * N
+ # Compute mean and variance
+ cols = tl.arange(0, BLOCK_N)
+ x = tl.load(X + cols, mask=cols < N, other=0.0).to(tl.float32)
+ if HAS_Z and not NORM_BEFORE_GATE:
+ z = tl.load(Z + cols, mask=cols < N).to(tl.float32)
+ x *= z * tl.sigmoid(z)
+ if not IS_RMS_NORM:
+ mean = tl.sum(x, axis=0) / N
+ tl.store(Mean + row, mean)
+ xbar = tl.where(cols < N, x - mean, 0.0)
+ var = tl.sum(xbar * xbar, axis=0) / N
+ else:
+ xbar = tl.where(cols < N, x, 0.0)
+ var = tl.sum(xbar * xbar, axis=0) / N
+ rstd = 1 / tl.sqrt(var + eps)
+ tl.store(Rstd + row, rstd)
+ # Normalize and apply linear transformation
+ mask = cols < N
+ w = tl.load(W + cols, mask=mask).to(tl.float32)
+ if HAS_BIAS:
+ b = tl.load(B + cols, mask=mask).to(tl.float32)
+ x_hat = (x - mean) * rstd if not IS_RMS_NORM else x * rstd
+ y = x_hat * w + b if HAS_BIAS else x_hat * w
+ if HAS_Z and NORM_BEFORE_GATE:
+ z = tl.load(Z + cols, mask=mask).to(tl.float32)
+ y *= z * tl.sigmoid(z)
+ # Write output
+ tl.store(Y + cols, y, mask=mask)
+
+
+def _layer_norm_fwd(
+ x,
+ weight,
+ bias,
+ eps,
+ z=None,
+ out=None,
+ group_size=None,
+ norm_before_gate=True,
+ is_rms_norm=False,
+):
+ M, N = x.shape
+ if group_size is None:
+ group_size = N
+ assert N % group_size == 0
+ ngroups = N // group_size
+ assert x.stride(-1) == 1
+ if z is not None:
+ assert z.stride(-1) == 1
+ assert z.shape == (M, N)
+ assert weight.shape == (N,)
+ assert weight.stride(-1) == 1
+ if bias is not None:
+ assert bias.stride(-1) == 1
+ assert bias.shape == (N,)
+ # allocate output
+ if out is not None:
+ assert out.shape == x.shape
+ else:
+ out = torch.empty_like(x)
+ assert out.stride(-1) == 1
+ mean = (
+ torch.empty((ngroups * M,), dtype=torch.float32, device=x.device)
+ if not is_rms_norm
+ else None
+ )
+ rstd = torch.empty((ngroups * M,), dtype=torch.float32, device=x.device)
+ # Less than 64KB per feature: enqueue fused kernel
+ MAX_FUSED_SIZE = 65536 // x.element_size()
+ BLOCK_N = min(MAX_FUSED_SIZE, triton.next_power_of_2(group_size))
+ if group_size > BLOCK_N:
+ raise RuntimeError("This layer norm doesn't support feature dim >= 64KB.")
+ # heuristics for number of warps
+ num_warps = min(max(BLOCK_N // 256, 1), 8)
+ grid = (M, ngroups)
+ with torch.cuda.device(x.device.index):
+ _layer_norm_fwd_1pass_kernel[grid](
+ x,
+ out,
+ weight,
+ bias,
+ z,
+ mean,
+ rstd,
+ x.stride(0),
+ out.stride(0),
+ z.stride(0) if z is not None else 0,
+ M,
+ group_size,
+ eps,
+ BLOCK_N=BLOCK_N,
+ NORM_BEFORE_GATE=norm_before_gate,
+ IS_RMS_NORM=is_rms_norm,
+ num_warps=num_warps,
+ )
+ return out, mean, rstd
+
+
+class LayerNormFn(torch.autograd.Function):
+
+ @staticmethod
+ def forward(
+ ctx,
+ x,
+ weight,
+ bias,
+ z=None,
+ eps=1e-6,
+ group_size=None,
+ norm_before_gate=True,
+ is_rms_norm=False,
+ ):
+ """If z is not None, we do norm(x) * silu(z) if norm_before_gate, else norm(x * silu(z))"""
+
+ x_shape_og = x.shape
+ # reshape input data into 2D tensor
+ x = x.reshape(-1, x.shape[-1])
+ if x.stride(-1) != 1:
+ x = x.contiguous()
+ if z is not None:
+ assert z.shape == x_shape_og
+ z = z.reshape(-1, z.shape[-1])
+ if z.stride(-1) != 1:
+ z = z.contiguous()
+ weight = weight.contiguous()
+ if bias is not None:
+ bias = bias.contiguous()
+ y, mean, rstd = _layer_norm_fwd(
+ x,
+ weight,
+ bias,
+ eps,
+ z=z,
+ group_size=group_size,
+ norm_before_gate=norm_before_gate,
+ is_rms_norm=is_rms_norm,
+ )
+ return y.reshape(x_shape_og)
+
+
+def layernorm_fn(
+ x,
+ weight,
+ bias,
+ z=None,
+ eps=1e-6,
+ group_size=None,
+ norm_before_gate=True,
+ is_rms_norm=False,
+):
+ return LayerNormFn.apply(
+ x, weight, bias, z, eps, group_size, norm_before_gate, is_rms_norm
+ )
+
+
+def rmsnorm_fn(
+ x, weight, bias, z=None, eps=1e-6, group_size=None, norm_before_gate=True
+):
+ return LayerNormFn.apply(
+ x, weight, bias, z, eps, group_size, norm_before_gate, True
+ )
+
+
+class LayerNorm(torch.nn.Module):
+
+ def __init__(
+ self,
+ hidden_size,
+ eps=1e-5,
+ group_size=None,
+ norm_before_gate=True,
+ device=None,
+ dtype=None,
+ ):
+ """If group_size is not None, we do GroupNorm with each group having group_size elements.
+ group_size=None is equivalent to group_size=hidden_size (i.e. there's only 1 group).
+ """
+
+ factory_kwargs = {"device": device, "dtype": dtype}
+ super().__init__()
+ self.eps = eps
+ self.weight = torch.nn.Parameter(torch.empty(hidden_size, **factory_kwargs))
+ self.bias = torch.nn.Parameter(torch.empty(hidden_size, **factory_kwargs))
+ self.group_size = group_size
+ self.norm_before_gate = norm_before_gate
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ torch.nn.init.ones_(self.weight)
+ torch.nn.init.zeros_(self.bias)
+
+ def forward(self, x, z=None):
+ """If z is not None, we do norm(x) * silu(z) if norm_before_gate, else norm(x * silu(z))"""
+ return layernorm_fn(
+ x,
+ self.weight,
+ self.bias,
+ z=z,
+ group_size=self.group_size,
+ eps=self.eps,
+ norm_before_gate=self.norm_before_gate,
+ )
+
+
+class RMSNorm(torch.nn.Module):
+
+ def __init__(
+ self,
+ hidden_size,
+ eps=1e-5,
+ group_size=None,
+ norm_before_gate=True,
+ device=None,
+ dtype=None,
+ ):
+ """If group_size is not None, we do GroupNorm with each group having group_size elements.
+ group_size=None is equivalent to group_size=hidden_size (i.e. there's only 1 group).
+ """
+ factory_kwargs = {"device": device, "dtype": dtype}
+ super().__init__()
+ self.eps = eps
+ self.weight = torch.nn.Parameter(torch.empty(hidden_size, **factory_kwargs))
+ self.register_parameter("bias", None)
+ self.group_size = group_size
+ self.norm_before_gate = norm_before_gate
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ torch.nn.init.ones_(self.weight)
+
+ def forward(self, x, z=None):
+ """If z is not None, we do norm(x) * silu(z) if norm_before_gate, else norm(x * silu(z))"""
+ return rmsnorm_fn(
+ x,
+ self.weight,
+ self.bias,
+ z=z,
+ eps=self.eps,
+ group_size=self.group_size,
+ norm_before_gate=self.norm_before_gate,
+ )
diff --git a/python/sglang/srt/layers/attention/fla/op.py b/python/sglang/srt/layers/attention/fla/op.py
new file mode 100644
index 00000000000..9b3191075b7
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/op.py
@@ -0,0 +1,66 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/utils/op.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+import os
+
+import triton
+import triton.language as tl
+import triton.language.extra.libdevice as tldevice
+
+from sglang.srt.layers.attention.fla.utils import is_gather_supported
+
+if os.environ.get("FLA_USE_FAST_OPS", "0") == "1":
+ exp = tldevice.fast_expf
+ exp2 = tldevice.exp2
+ log = tldevice.fast_logf
+ log2 = tldevice.fast_log2f
+else:
+ exp = tl.exp
+ exp2 = tl.math.exp2
+ log = tl.log
+ log2 = tl.log2
+
+
+@triton.jit
+def safe_exp(x):
+ return exp(tl.where(x <= 0, x, float("-inf")))
+
+
+if not is_gather_supported:
+
+ @triton.jit
+ def gather(src, index, axis, _builder=None):
+ """
+ Gather operation that works when tl.gather is not supported.
+ This is a fallback implementation that returns None.
+ Just to make triton compiler happy.
+ """
+ return None
+
+else:
+ gather = tl.gather
+
+
+if hasattr(triton.language, "_experimental_make_tensor_descriptor"):
+ # For Triton 3.3.x
+ make_tensor_descriptor = triton.language._experimental_make_tensor_descriptor
+elif hasattr(triton.language, "make_tensor_descriptor"):
+ # For Triton 3.4.x and later
+ make_tensor_descriptor = triton.language.make_tensor_descriptor
+else:
+ """
+ Fallback implementation when TMA is not supported.
+ Returns None to indicate TMA descriptors are unavailable.
+ Just make triton compiler happy.
+ """
+
+ @triton.jit
+ def make_tensor_descriptor(
+ base,
+ shape,
+ strides,
+ block_shape,
+ _builder=None,
+ ):
+ return None
diff --git a/python/sglang/srt/layers/attention/fla/solve_tril.py b/python/sglang/srt/layers/attention/fla/solve_tril.py
new file mode 100644
index 00000000000..5c519507d69
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/solve_tril.py
@@ -0,0 +1,465 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/utils/solve_tril.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
+from sglang.srt.layers.attention.fla.utils import input_guard
+
+
+@triton.heuristics({"IS_VARLEN": lambda args: args["cu_seqlens"] is not None})
+# @triton.autotune(
+# configs=[
+# triton.Config({}, num_warps=num_warps, num_stages=num_stages)
+# for num_warps in [1, 2, 4, 8]
+# for num_stages in [2, 3, 4, 5]
+# ],
+# key=["BT"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def solve_tril_16x16_kernel(
+ A,
+ Ad,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ H: tl.constexpr,
+ BT: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_t, i_bh = tl.program_id(0), tl.program_id(1)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+
+ A = A + (bos * H + i_h) * BT
+ Ad = Ad + (bos * H + i_h) * 16
+
+ offset = (i_t * 16) % BT
+ p_A = tl.make_block_ptr(
+ A, (T, BT), (H * BT, 1), (i_t * 16, offset), (16, 16), (1, 0)
+ )
+ p_Ai = tl.make_block_ptr(Ad, (T, 16), (H * 16, 1), (i_t * 16, 0), (16, 16), (1, 0))
+ b_A = tl.load(p_A, boundary_check=(0, 1)).to(tl.float32)
+ b_A = -tl.where(tl.arange(0, 16)[:, None] > tl.arange(0, 16)[None, :], b_A, 0)
+
+ o_i = tl.arange(0, 16)
+ for i in range(1, min(16, T - i_t * 16)):
+ b_a = -tl.load(A + (i_t * 16 + i) * H * BT + o_i + offset)
+ b_a = b_a + tl.sum(b_a[:, None] * b_A, 0)
+ mask = o_i == i
+ b_A = tl.where(mask[:, None], b_a, b_A)
+ b_A += o_i[:, None] == o_i[None, :]
+ tl.store(
+ p_Ai,
+ b_A.to(p_Ai.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+
+
+@triton.heuristics({"IS_VARLEN": lambda args: args["cu_seqlens"] is not None})
+# @triton.autotune(
+# configs=[
+# triton.Config({}, num_warps=num_warps, num_stages=num_stages)
+# for num_warps in [1, 2, 4, 8]
+# for num_stages in [2, 3, 4, 5]
+# ],
+# key=["H", "BT", "IS_VARLEN"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def merge_16x16_to_32x32_inverse_kernel(
+ A,
+ Ad,
+ Ai,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ H: tl.constexpr,
+ BT: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_t, i_bh = tl.program_id(0), tl.program_id(1)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+
+ A += (bos * H + i_h) * 32
+ Ad += (bos * H + i_h) * 16
+ Ai += (bos * H + i_h) * 32
+
+ p_A_21 = tl.make_block_ptr(
+ A, (T, 32), (H * 32, 1), (i_t * 32 + 16, 0), (16, 16), (1, 0)
+ )
+ p_Ad_11 = tl.make_block_ptr(
+ Ad, (T, 16), (H * 16, 1), (i_t * 32, 0), (16, 16), (1, 0)
+ )
+ p_Ad_22 = tl.make_block_ptr(
+ Ad, (T, 16), (H * 16, 1), (i_t * 32 + 16, 0), (16, 16), (1, 0)
+ )
+ p_Ai_11 = tl.make_block_ptr(
+ Ai, (T, 32), (H * 32, 1), (i_t * 32, 0), (16, 16), (1, 0)
+ )
+ p_Ai_22 = tl.make_block_ptr(
+ Ai, (T, 32), (H * 32, 1), (i_t * 32 + 16, 16), (16, 16), (1, 0)
+ )
+ p_Ai_21 = tl.make_block_ptr(
+ Ai, (T, 32), (H * 32, 1), (i_t * 32 + 16, 0), (16, 16), (1, 0)
+ )
+
+ A_21 = tl.load(p_A_21, boundary_check=(0, 1)).to(tl.float32)
+ Ai_11 = tl.load(p_Ad_11, boundary_check=(0, 1)).to(tl.float32)
+ Ai_22 = tl.load(p_Ad_22, boundary_check=(0, 1)).to(tl.float32)
+ Ai_21 = -tl.dot(
+ tl.dot(Ai_22, A_21, input_precision="ieee"), Ai_11, input_precision="ieee"
+ )
+ tl.store(
+ p_Ai_11,
+ Ai_11.to(p_Ai_11.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_22,
+ Ai_22.to(p_Ai_22.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_21,
+ Ai_21.to(p_Ai_21.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+
+
+@triton.heuristics({"IS_VARLEN": lambda args: args["cu_seqlens"] is not None})
+# @triton.autotune(
+# configs=[
+# triton.Config({}, num_warps=num_warps, num_stages=num_stages)
+# for num_warps in [2, 4, 8]
+# for num_stages in [2, 3, 4, 5]
+# ],
+# key=["H", "BT", "IS_VARLEN"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def merge_16x16_to_64x64_inverse_kernel(
+ A,
+ Ad,
+ Ai,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ H: tl.constexpr,
+ BT: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_t, i_bh = tl.program_id(0), tl.program_id(1)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+
+ A += (bos * H + i_h) * 64
+ Ad += (bos * H + i_h) * 16
+ Ai += (bos * H + i_h) * 64
+
+ p_A_21 = tl.make_block_ptr(
+ A, (T, 64), (H * 64, 1), (i_t * 64 + 16, 0), (16, 16), (1, 0)
+ )
+ p_A_32 = tl.make_block_ptr(
+ A, (T, 64), (H * 64, 1), (i_t * 64 + 32, 16), (16, 16), (1, 0)
+ )
+ p_A_31 = tl.make_block_ptr(
+ A, (T, 64), (H * 64, 1), (i_t * 64 + 32, 0), (16, 16), (1, 0)
+ )
+ p_A_43 = tl.make_block_ptr(
+ A, (T, 64), (H * 64, 1), (i_t * 64 + 48, 32), (16, 16), (1, 0)
+ )
+ p_A_42 = tl.make_block_ptr(
+ A, (T, 64), (H * 64, 1), (i_t * 64 + 48, 16), (16, 16), (1, 0)
+ )
+ p_A_41 = tl.make_block_ptr(
+ A, (T, 64), (H * 64, 1), (i_t * 64 + 48, 0), (16, 16), (1, 0)
+ )
+ p_Ad_11 = tl.make_block_ptr(
+ Ad, (T, 16), (H * 16, 1), (i_t * 64, 0), (16, 16), (1, 0)
+ )
+ p_Ad_22 = tl.make_block_ptr(
+ Ad, (T, 16), (H * 16, 1), (i_t * 64 + 16, 0), (16, 16), (1, 0)
+ )
+ p_Ad_33 = tl.make_block_ptr(
+ Ad, (T, 16), (H * 16, 1), (i_t * 64 + 32, 0), (16, 16), (1, 0)
+ )
+ p_Ad_44 = tl.make_block_ptr(
+ Ad, (T, 16), (H * 16, 1), (i_t * 64 + 48, 0), (16, 16), (1, 0)
+ )
+
+ A_21 = tl.load(p_A_21, boundary_check=(0, 1)).to(tl.float32)
+ A_32 = tl.load(p_A_32, boundary_check=(0, 1)).to(tl.float32)
+ A_31 = tl.load(p_A_31, boundary_check=(0, 1)).to(tl.float32)
+ A_43 = tl.load(p_A_43, boundary_check=(0, 1)).to(tl.float32)
+ A_42 = tl.load(p_A_42, boundary_check=(0, 1)).to(tl.float32)
+ A_41 = tl.load(p_A_41, boundary_check=(0, 1)).to(tl.float32)
+
+ Ai_11 = tl.load(p_Ad_11, boundary_check=(0, 1)).to(tl.float32)
+ Ai_22 = tl.load(p_Ad_22, boundary_check=(0, 1)).to(tl.float32)
+ Ai_33 = tl.load(p_Ad_33, boundary_check=(0, 1)).to(tl.float32)
+ Ai_44 = tl.load(p_Ad_44, boundary_check=(0, 1)).to(tl.float32)
+
+ Ai_21 = -tl.dot(
+ tl.dot(Ai_22, A_21, input_precision="ieee"), Ai_11, input_precision="ieee"
+ )
+ Ai_32 = -tl.dot(
+ tl.dot(Ai_33, A_32, input_precision="ieee"), Ai_22, input_precision="ieee"
+ )
+ Ai_43 = -tl.dot(
+ tl.dot(Ai_44, A_43, input_precision="ieee"), Ai_33, input_precision="ieee"
+ )
+
+ Ai_31 = -tl.dot(
+ Ai_33,
+ tl.dot(A_31, Ai_11, input_precision="ieee")
+ + tl.dot(A_32, Ai_21, input_precision="ieee"),
+ input_precision="ieee",
+ )
+ Ai_42 = -tl.dot(
+ Ai_44,
+ tl.dot(A_42, Ai_22, input_precision="ieee")
+ + tl.dot(A_43, Ai_32, input_precision="ieee"),
+ input_precision="ieee",
+ )
+ Ai_41 = -tl.dot(
+ Ai_44,
+ tl.dot(A_41, Ai_11, input_precision="ieee")
+ + tl.dot(A_42, Ai_21, input_precision="ieee")
+ + tl.dot(A_43, Ai_31, input_precision="ieee"),
+ input_precision="ieee",
+ )
+
+ p_Ai_11 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64, 0), (16, 16), (1, 0)
+ )
+ p_Ai_22 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 16, 16), (16, 16), (1, 0)
+ )
+ p_Ai_33 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 32, 32), (16, 16), (1, 0)
+ )
+ p_Ai_44 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 48, 48), (16, 16), (1, 0)
+ )
+ p_Ai_21 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 16, 0), (16, 16), (1, 0)
+ )
+ p_Ai_31 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 32, 0), (16, 16), (1, 0)
+ )
+ p_Ai_32 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 32, 16), (16, 16), (1, 0)
+ )
+ p_Ai_41 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 48, 0), (16, 16), (1, 0)
+ )
+ p_Ai_42 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 48, 16), (16, 16), (1, 0)
+ )
+ p_Ai_43 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 48, 32), (16, 16), (1, 0)
+ )
+ tl.store(
+ p_Ai_11,
+ Ai_11.to(p_Ai_11.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_22,
+ Ai_22.to(p_Ai_22.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_33,
+ Ai_33.to(p_Ai_33.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_44,
+ Ai_44.to(p_Ai_44.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_21,
+ Ai_21.to(p_Ai_21.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_31,
+ Ai_31.to(p_Ai_31.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_32,
+ Ai_32.to(p_Ai_32.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_41,
+ Ai_41.to(p_Ai_41.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_42,
+ Ai_42.to(p_Ai_42.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_43,
+ Ai_43.to(p_Ai_43.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+
+ fill_zeros = tl.zeros((16, 16), dtype=tl.float32)
+ p_Ai_12 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64, 16), (16, 16), (1, 0)
+ )
+ p_Ai_13 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64, 32), (16, 16), (1, 0)
+ )
+ p_Ai_14 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64, 48), (16, 16), (1, 0)
+ )
+ p_Ai_23 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 16, 32), (16, 16), (1, 0)
+ )
+ p_Ai_24 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 16, 48), (16, 16), (1, 0)
+ )
+ p_Ai_34 = tl.make_block_ptr(
+ Ai, (T, 64), (H * 64, 1), (i_t * 64 + 32, 48), (16, 16), (1, 0)
+ )
+ tl.store(
+ p_Ai_12,
+ fill_zeros.to(p_Ai_12.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_13,
+ fill_zeros.to(p_Ai_13.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_14,
+ fill_zeros.to(p_Ai_14.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_23,
+ fill_zeros.to(p_Ai_23.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_24,
+ fill_zeros.to(p_Ai_24.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+ tl.store(
+ p_Ai_34,
+ fill_zeros.to(p_Ai_34.dtype.element_ty, fp_downcast_rounding="rtne"),
+ boundary_check=(0, 1),
+ )
+
+
+@input_guard
+def solve_tril(
+ A: torch.Tensor,
+ cu_seqlens: Optional[torch.Tensor] = None,
+ output_dtype: torch.dtype = torch.float,
+) -> torch.Tensor:
+ """
+ Compute the inverse of the lower triangular matrix
+ A should be strictly lower triangular, i.e., A.triu() == 0.
+
+ Args:
+ A (torch.Tensor):
+ [B, T, H, K]
+ cu_seqlens (torch.Tensor):
+ The cumulative sequence lengths of the input tensor.
+ Default: None.
+ output_dtype (torch.dtype):
+ The dtype of the output tensor. Default: `torch.float`
+
+ Returns:
+ (I + A)^-1 with the same shape as A
+ """
+ assert A.shape[-1] in [16, 32, 64]
+
+ B, T, H, BT = A.shape
+ Ad = torch.empty(
+ B, T, H, 16, device=A.device, dtype=torch.float if BT != 16 else output_dtype
+ )
+
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, 16) if cu_seqlens is not None else None
+ )
+ NT = len(chunk_indices) if cu_seqlens is not None else triton.cdiv(T, 16)
+ solve_tril_16x16_kernel[NT, B * H](
+ A=A,
+ Ad=Ad,
+ cu_seqlens=cu_seqlens,
+ chunk_indices=chunk_indices,
+ T=T,
+ H=H,
+ BT=BT,
+ num_warps=1,
+ num_stages=4,
+ )
+ if BT == 16:
+ return Ad
+
+ Ai = torch.empty(B, T, H, BT, device=A.device, dtype=output_dtype)
+ merge_fn = (
+ merge_16x16_to_32x32_inverse_kernel
+ if BT == 32
+ else merge_16x16_to_64x64_inverse_kernel
+ )
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
+ )
+ NT = len(chunk_indices) if cu_seqlens is not None else triton.cdiv(T, BT)
+ merge_fn[NT, B * H](
+ A=A,
+ Ad=Ad,
+ Ai=Ai,
+ cu_seqlens=cu_seqlens,
+ chunk_indices=chunk_indices,
+ T=T,
+ H=H,
+ BT=BT,
+ num_warps=4,
+ num_stages=3,
+ )
+ return Ai
diff --git a/python/sglang/srt/layers/attention/fla/utils.py b/python/sglang/srt/layers/attention/fla/utils.py
new file mode 100644
index 00000000000..3caf70de5d5
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/utils.py
@@ -0,0 +1,331 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/utils.py
+# -*- coding: utf-8 -*-
+
+import contextlib
+import functools
+import logging
+import os
+import sys
+from enum import Enum
+from functools import lru_cache
+from typing import Any, Callable, Dict, Literal, Optional, Tuple
+
+import torch
+import triton
+from packaging import version
+
+logger = logging.getLogger(__name__)
+
+COMPILER_MODE = os.getenv("FLA_COMPILER_MODE") == "1"
+FLA_CI_ENV = os.getenv("FLA_CI_ENV") == "1"
+
+
+@lru_cache(maxsize=1)
+def check_environments():
+ """
+ Checks the current operating system, Triton version, and Python version,
+ issuing warnings if they don't meet recommendations.
+ This function's body only runs once due to lru_cache.
+ """
+ # Check Operating System
+ if sys.platform == "win32":
+ logger.warning(
+ "Detected Windows operating system. Triton does not have an official Windows release, "
+ "thus FLA will not be adapted for Windows, and any potential errors will not be fixed. "
+ "Please consider using a Linux environment for compatibility."
+ )
+
+ triton_version = version.parse(triton.__version__)
+ required_triton_version = version.parse("3.2.0")
+
+ if triton_version < required_triton_version:
+ logger.warning(
+ f"Current Triton version {triton_version} is below the recommended 3.2.0 version. "
+ "Errors may occur and these issues will not be fixed. "
+ "Please consider upgrading Triton."
+ )
+
+ # Check Python version
+ py_version = version.parse(f"{sys.version_info.major}.{sys.version_info.minor}")
+ required_py_version = version.parse("3.11")
+
+ if py_version < required_py_version:
+ logger.warning(
+ f"Current Python version {py_version} is below the recommended 3.11 version. "
+ "It is recommended to upgrade to Python 3.11 or higher for the best experience."
+ )
+
+ return None
+
+
+check_environments()
+
+
+def get_abs_err(x, y):
+ return (x.detach() - y.detach()).flatten().abs().max().item()
+
+
+def get_err_ratio(x, y):
+ err = (x.detach() - y.detach()).flatten().square().mean().sqrt().item()
+ base = (x.detach()).flatten().square().mean().sqrt().item()
+ return err / (base + 1e-8)
+
+
+def assert_close(prefix, ref, tri, ratio, warning=False, err_atol=1e-6):
+ abs_atol = get_abs_err(ref, tri)
+ msg = f"{prefix} diff: {abs_atol:.6f} ratio: {get_err_ratio(ref, tri):.6f}"
+ logger.info(msg)
+ error_rate = get_err_ratio(ref, tri)
+ if abs_atol <= err_atol:
+ return
+ if warning or (FLA_CI_ENV and (error_rate < 0.01 or abs_atol <= 0.3)):
+ if error_rate > ratio:
+ import warnings
+
+ warnings.warn(msg)
+ else:
+ assert error_rate < ratio, msg
+
+
+SUPPRESS_LEVEL = int(os.getenv("GDN_RECOMPUTE_SUPPRESS_LEVEL", "0"))
+
+
+def tensor_cache(fn: Callable[..., torch.Tensor]) -> Callable[..., torch.Tensor]:
+ """
+ A decorator that caches the most recent results of a function with tensor inputs.
+ This decorator will store the output of the decorated function for the most recent set of input tensors.
+ The cache is limited to a fixed size (default is 4). When the cache is full, the oldest entry will be removed.
+ Args:
+ fn (Callable[..., torch.Tensor]):
+ The function to be decorated. It should take tensor inputs and return tensor outputs.
+ Returns:
+ Callable[..., torch.Tensor]:
+ A wrapped version of the input function with single-entry caching.
+ """
+
+ cache_entries: Tuple[Optional[Tuple], Optional[Dict], Any] = []
+ cache_size = 4
+
+ @functools.wraps(fn)
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
+ nonlocal cache_entries, cache_size
+ for i, entry in enumerate(cache_entries):
+ last_args, last_kwargs, last_result = entry
+ if len(args) == len(last_args) and len(kwargs) == len(last_kwargs):
+ if all(a is b for a, b in zip(args, last_args)) and all(
+ k in last_kwargs and v is last_kwargs[k] for k, v in kwargs.items()
+ ):
+ cache_entries = (
+ cache_entries[:i]
+ + cache_entries[i + 1 :]
+ + [(args, kwargs, last_result)]
+ )
+ return last_result
+
+ result = fn(*args, **kwargs)
+
+ if len(cache_entries) >= cache_size:
+ cache_entries = cache_entries[1:]
+ cache_entries.append((args, kwargs, result))
+ return result
+
+ return wrapper
+
+
+def input_guard(fn: Callable[..., torch.Tensor]) -> Callable[..., torch.Tensor]:
+ """
+ A decorator to make sure all input tensors are contiguous and set the device based on input tensors.
+ """
+
+ @functools.wraps(fn)
+ def wrapper(*args, **kwargs):
+ contiguous_args = (
+ i if not isinstance(i, torch.Tensor) else i.contiguous() for i in args
+ )
+ contiguous_kwargs = {
+ k: (v if not isinstance(v, torch.Tensor) else v.contiguous())
+ for k, v in kwargs.items()
+ }
+
+ tensor = None
+ for arg in args:
+ if isinstance(arg, torch.Tensor):
+ tensor = arg
+ break
+ if tensor is None:
+ for value in kwargs.values():
+ if isinstance(value, torch.Tensor):
+ tensor = value
+ break
+
+ if tensor is not None:
+ ctx = custom_device_ctx(tensor.device.index)
+ else:
+ ctx = contextlib.nullcontext()
+
+ with ctx:
+ return fn(*contiguous_args, **contiguous_kwargs)
+
+ return wrapper
+
+
+contiguous = input_guard
+
+
+def require_version(version, hint):
+ """
+ Perform a runtime check of the dependency versions, using the exact same syntax used by pip.
+ """
+
+ def decorator(fn):
+ @functools.wraps(fn)
+ def wrapper(ctx, *args, **kwargs):
+ from transformers.utils.versions import require_version
+
+ require_version(version, hint)
+ return fn(
+ ctx,
+ *(
+ i if not isinstance(i, torch.Tensor) else i.contiguous()
+ for i in args
+ ),
+ **{
+ k: (v if not isinstance(v, torch.Tensor) else v.contiguous())
+ for k, v in kwargs.items()
+ },
+ )
+
+ return wrapper
+
+ return decorator
+
+
+def checkpoint(fn):
+ def wrapper(*args, **kwargs):
+ return torch.utils.checkpoint.checkpoint(fn, *args, **kwargs)
+
+ return wrapper
+
+
+@lru_cache(maxsize=None)
+def check_pytorch_version(version_s: str = "2.4") -> bool:
+ return version.parse(torch.__version__) >= version.parse(version_s)
+
+
+def _cpu_device_warning():
+ import warnings
+
+ warnings.warn(
+ ("Triton is not supported on current platform, roll back to CPU."), stacklevel=1
+ )
+
+
+@lru_cache(maxsize=None)
+def get_multiprocessor_count(tensor_idx: int = 0) -> int:
+ try:
+ return triton.runtime.driver.active.utils.get_device_properties(tensor_idx)[
+ "multiprocessor_count"
+ ]
+ except BaseException:
+ _cpu_device_warning()
+ return -1
+
+
+@lru_cache(maxsize=None)
+def get_available_device() -> str:
+ try:
+ return triton.runtime.driver.active.get_current_target().backend
+ except BaseException:
+ _cpu_device_warning()
+ return "cpu"
+
+
+@lru_cache(maxsize=None)
+def _check_platform() -> Literal["nvidia", "amd", "intel", "musa"]:
+ device = get_available_device()
+ if device == "cuda":
+ return "nvidia"
+ elif device == "hip":
+ return "amd"
+ elif device == "xpu":
+ return "intel"
+ else:
+ return device
+
+
+# For AMD GPUs, the triton backend is 'hip', while for Nvidia GPUs, the triton backend is 'cuda'.
+# However, the torch backend is 'cuda' for both Nvidia and AMD GPUs.
+# Therefore, we need to check the triton backend to determine the actual GPU vendor.
+device = get_available_device() if get_available_device() != "hip" else "cuda"
+device_torch_lib = getattr(torch, device)
+device_platform = _check_platform()
+
+is_amd = device_platform == "amd"
+is_intel = device_platform == "intel"
+is_nvidia = device_platform == "nvidia"
+is_intel_alchemist = is_intel and "Intel(R) Arc(TM) A" in torch.xpu.get_device_name(0)
+is_nvidia_hopper = is_nvidia and (
+ "NVIDIA H" in torch.cuda.get_device_name(0)
+ or torch.cuda.get_device_capability()[0] >= 9
+)
+use_cuda_graph = is_nvidia and os.environ.get("FLA_USE_CUDA_GRAPH", "0") == "1"
+
+# Nvidia Ampere or newer, haven't check AMD and intel yet.
+is_tf32_supported = is_nvidia and torch.cuda.get_device_capability(0)[0] >= 8
+is_gather_supported = hasattr(triton.language, "gather")
+
+
+def get_all_max_shared_mem():
+ try:
+ return [
+ triton.runtime.driver.active.utils.get_device_properties(i)[
+ "max_shared_mem"
+ ]
+ for i in range(device_torch_lib.device_count())
+ ]
+ except BaseException:
+ _cpu_device_warning()
+ return [-1]
+
+
+class Backend(Enum):
+ ADA = 101376 # RTX 4090
+ AMPERE = 166912 # A100
+ HOPPER = 232448 # H100
+ DEFAULT = 102400 # Default
+
+ @classmethod
+ def get_shared_memory(cls, arch: str) -> int:
+ try:
+ return cls[arch.upper()].value
+ except KeyError:
+ return cls.DEFAULT.value
+
+
+@lru_cache(maxsize=None)
+def check_shared_mem(arch: str = "none", tensor_idx: int = 0) -> bool:
+ try:
+ device_shared_mem_list = get_all_max_shared_mem()
+ max_shared_memory = device_shared_mem_list[tensor_idx]
+ return max_shared_memory >= Backend.get_shared_memory(arch)
+ except Exception:
+ return False
+
+
+if check_pytorch_version("2.4"):
+ device = "cuda" if device == "cpu" else device
+ autocast_custom_fwd = functools.partial(torch.amp.custom_fwd, device_type=device)
+ autocast_custom_bwd = functools.partial(torch.amp.custom_bwd, device_type=device)
+
+ def custom_device_ctx(index: int):
+ return device_torch_lib.device(index)
+
+else:
+ assert (
+ device == "cuda"
+ ), "Only cuda device is supported for PyTorch version < 2.4.0."
+ autocast_custom_fwd = device_torch_lib.amp.custom_fwd
+ autocast_custom_bwd = device_torch_lib.amp.custom_bwd
+
+ def custom_device_ctx(index: int):
+ return torch.cuda.device(index)
diff --git a/python/sglang/srt/layers/attention/fla/wy_fast.py b/python/sglang/srt/layers/attention/fla/wy_fast.py
new file mode 100644
index 00000000000..d51500eb459
--- /dev/null
+++ b/python/sglang/srt/layers/attention/fla/wy_fast.py
@@ -0,0 +1,158 @@
+# Adapt from https://github.com/fla-org/flash-linear-attention/blob/main/fla/ops/gated_delta_rule/wy_fast.py
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
+
+from typing import Optional, Tuple
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
+from sglang.srt.layers.attention.fla.op import safe_exp
+from sglang.srt.layers.attention.fla.utils import check_shared_mem
+
+
+@triton.heuristics({"IS_VARLEN": lambda args: args["cu_seqlens"] is not None})
+# @triton.autotune(
+# configs=[
+# triton.Config({}, num_warps=num_warps, num_stages=num_stages)
+# for num_warps in [2, 4, 8]
+# for num_stages in [2, 3, 4]
+# ],
+# key=["H", "K", "V", "BT", "BK", "BV", "IS_VARLEN"],
+# )
+@triton.jit(do_not_specialize=["T"])
+def recompute_w_u_fwd_kernel(
+ k,
+ v,
+ beta,
+ w,
+ u,
+ A,
+ g,
+ cu_seqlens,
+ chunk_indices,
+ T,
+ H: tl.constexpr,
+ Hg: tl.constexpr,
+ K: tl.constexpr,
+ V: tl.constexpr,
+ BT: tl.constexpr,
+ BK: tl.constexpr,
+ BV: tl.constexpr,
+ IS_VARLEN: tl.constexpr,
+):
+ i_t, i_bh = tl.program_id(0), tl.program_id(1)
+ i_b, i_h = i_bh // H, i_bh % H
+ if IS_VARLEN:
+ i_n, i_t = tl.load(chunk_indices + i_t * 2).to(tl.int32), tl.load(
+ chunk_indices + i_t * 2 + 1
+ ).to(tl.int32)
+ bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(
+ cu_seqlens + i_n + 1
+ ).to(tl.int32)
+ T = eos - bos
+ else:
+ bos, eos = i_b * T, i_b * T + T
+ p_beta = tl.make_block_ptr(
+ beta + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,)
+ )
+ p_g = tl.make_block_ptr(g + (bos * H + i_h), (T,), (H,), (i_t * BT,), (BT,), (0,))
+ p_A = tl.make_block_ptr(
+ A + (bos * H + i_h) * BT, (T, BT), (H * BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)
+ )
+ b_beta = tl.load(p_beta, boundary_check=(0,))
+ b_A = tl.load(p_A, boundary_check=(0, 1))
+ b_g = tl.exp(tl.load(p_g, boundary_check=(0,)))
+
+ for i_v in range(tl.cdiv(V, BV)):
+ p_v = tl.make_block_ptr(
+ v + (bos * H + i_h) * V,
+ (T, V),
+ (H * V, 1),
+ (i_t * BT, i_v * BV),
+ (BT, BV),
+ (1, 0),
+ )
+ p_u = tl.make_block_ptr(
+ u + (bos * H + i_h) * V,
+ (T, V),
+ (H * V, 1),
+ (i_t * BT, i_v * BV),
+ (BT, BV),
+ (1, 0),
+ )
+ b_v = tl.load(p_v, boundary_check=(0, 1))
+ b_vb = (b_v * b_beta[:, None]).to(b_v.dtype)
+ b_u = tl.dot(b_A, b_vb, allow_tf32=False)
+ tl.store(p_u, b_u.to(p_u.dtype.element_ty), boundary_check=(0, 1))
+
+ for i_k in range(tl.cdiv(K, BK)):
+ p_k = tl.make_block_ptr(
+ k + (bos * Hg + i_h // (H // Hg)) * K,
+ (T, K),
+ (Hg * K, 1),
+ (i_t * BT, i_k * BK),
+ (BT, BK),
+ (1, 0),
+ )
+ p_w = tl.make_block_ptr(
+ w + (bos * H + i_h) * K,
+ (T, K),
+ (H * K, 1),
+ (i_t * BT, i_k * BK),
+ (BT, BK),
+ (1, 0),
+ )
+ b_k = tl.load(p_k, boundary_check=(0, 1))
+ b_kb = (b_k * b_beta[:, None] * b_g[:, None]).to(b_k.dtype)
+ b_w = tl.dot(b_A, b_kb)
+ tl.store(p_w, b_w.to(p_w.dtype.element_ty), boundary_check=(0, 1))
+
+
+def recompute_w_u_fwd(
+ k: torch.Tensor,
+ v: torch.Tensor,
+ beta: torch.Tensor,
+ g_cumsum: torch.Tensor,
+ A: torch.Tensor,
+ cu_seqlens: Optional[torch.LongTensor],
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ B, T, Hg, K, V = *k.shape, v.shape[-1]
+ H = v.shape[-2]
+ BT = A.shape[-1]
+
+ chunk_indices = (
+ prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None
+ )
+ NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices)
+ BK = 64
+ BV = 64
+ u = torch.empty_like(v)
+ w = k.new_empty(B, T, H, K)
+ recompute_w_u_fwd_kernel[(NT, B * H)](
+ k=k,
+ v=v,
+ beta=beta,
+ w=w,
+ u=u,
+ A=A,
+ g=g_cumsum,
+ cu_seqlens=cu_seqlens,
+ chunk_indices=chunk_indices,
+ T=T,
+ H=H,
+ Hg=Hg,
+ K=K,
+ V=V,
+ BT=BT,
+ BK=BK,
+ BV=BV,
+ num_warps=4,
+ num_stages=3,
+ )
+ return w, u
+
+
+fwd_recompute_w_u = recompute_w_u_fwd
diff --git a/python/sglang/srt/layers/attention/flashattention_backend.py b/python/sglang/srt/layers/attention/flashattention_backend.py
index 2d4e4b263eb..151fc9c01aa 100644
--- a/python/sglang/srt/layers/attention/flashattention_backend.py
+++ b/python/sglang/srt/layers/attention/flashattention_backend.py
@@ -1,10 +1,14 @@
from __future__ import annotations
+import os
+import time
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional, Union
import numpy as np
import torch
+import triton
+import triton.language as tl
from sglang.srt.configs.model_config import AttentionArch
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
@@ -20,6 +24,28 @@
from sgl_kernel import merge_state_v2
from sgl_kernel.flash_attn import flash_attn_varlen_func, flash_attn_with_kvcache
+try:
+ from sglang.srt.distributed import (
+ get_tensor_model_parallel_rank,
+ get_tensor_model_parallel_world_size,
+ model_parallel_is_initialized,
+ tensor_model_parallel_all_gather,
+ )
+
+ SGLANG_DIST_AVAILABLE = True
+
+except:
+ SGLANG_DIST_AVAILABLE = False
+
+
+def get_local_rank():
+ if SGLANG_DIST_AVAILABLE:
+ return (
+ get_tensor_model_parallel_rank() if model_parallel_is_initialized() else 0
+ )
+ else:
+ return 0
+
@dataclass
class FlashAttentionMetadata:
@@ -64,6 +90,9 @@ class LocalAttentionMetadata:
local_attn_metadata: Optional[LocalAttentionMetadata] = None
+ # For sliding window attention topk>1 spec decoding
+ swa_spec_metadata: Optional[FlashAttentionMetadata] = None
+
# Copied from:
# https://github.com/houseroad/vllm/blob/4e45bfcaf928bdb9bd952b4ac922a3c205589ae8/vllm/v1/attention/backends/flash_attn.py
@@ -340,6 +369,15 @@ def __init__(
else None
)
+ self._last_tick = time.time()
+
+ # For each layer, the sliding_window_size can be different. This is only used for preparing SWA metadata.
+ # We use `layer.sliding_window_size` to decide whether to use SWA for each layer.
+ self.sliding_window_size = model_runner.sliding_window_size
+ self.has_swa = (
+ self.sliding_window_size is not None and self.sliding_window_size > -1
+ )
+
def init_forward_metadata(self, forward_batch: ForwardBatch):
"""Initialize forward metadata hence all layers in the forward pass can reuse it."""
metadata = FlashAttentionMetadata()
@@ -556,6 +594,12 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
(1, 0),
)
self.forward_metadata_spec_decode_expand = metadata_expand
+
+ if self.has_swa:
+ self._init_sliding_window_attn_spec_metadata(
+ metadata, metadata_expand
+ )
+
elif forward_batch.forward_mode.is_extend_or_draft_extend_or_mixed():
metadata.cache_seqlens_int32 = seqlens_in_batch.to(torch.int32)
metadata.max_seq_len_k = forward_batch.seq_lens_cpu.max().item()
@@ -657,11 +701,10 @@ def forward_extend(
# Calculate window size (can be moved to metadata if layer properties don't change)
# we don't do layer.sliding_window_size - 1 since in model.get_attention_sliding_window_size() we already - 1
# here is two side inclusive
- window_size = (
- (layer.sliding_window_size, 0)
- if layer.sliding_window_size is not None and layer.sliding_window_size > -1
- else (-1, -1)
+ is_swa = (
+ layer.sliding_window_size is not None and layer.sliding_window_size > -1
)
+ window_size = (layer.sliding_window_size, 0) if is_swa else (-1, -1)
k_descale, v_descale = None, None
# only use kv scaling if: 1) fp8 kv is explicitly enabled, 2) RadixAttention
# has corresponding quantization method so that layer.k_scale is not None,
@@ -684,8 +727,13 @@ def forward_extend(
)
# We do cascade attention for Target Verify with topk > 1
+ # We don't use cascade attention for Sliding Window Attention:
+ # - Different window sizes should be passed in for each q in the first stage of cascade attention, but FA3 interface doesn't support pass in a list of window sizes.
+ # - The overhead of duplicated computation of the common prefix part is small for sliding window layers (seq_len <= window_size), so we can just expand it.
use_cascade_attn = (
- forward_batch.forward_mode.is_target_verify() and self.topk > 1
+ forward_batch.forward_mode.is_target_verify()
+ and self.topk > 1
+ and not is_swa
)
# For fa3 interface version compatibility, we put new fields into conditional keyword args
@@ -700,15 +748,31 @@ def forward_extend(
cu_seqlens_q = local_metadata.local_query_start_loc
cache_seqlens = local_metadata.local_seqused_k
max_seqlen_q = local_metadata.local_max_query_len
- max_seqlen_k = local_metadata.local_max_seq_len
+ elif is_swa and metadata.swa_spec_metadata is not None:
+ swa_spec_metadata = metadata.swa_spec_metadata
+ page_table = swa_spec_metadata.page_table
+ cu_seqlens_q = swa_spec_metadata.cu_seqlens_q
+ cache_seqlens = swa_spec_metadata.cache_seqlens_int32
+ max_seqlen_q = swa_spec_metadata.max_seq_len_q
+ cu_seqlens_k = swa_spec_metadata.cu_seqlens_k
else:
page_table = metadata.page_table
cu_seqlens_q = metadata.cu_seqlens_q
cache_seqlens = metadata.cache_seqlens_int32
max_seqlen_q = metadata.max_seq_len_q
- max_seqlen_k = metadata.max_seq_len_k
cu_seqlens_k = metadata.cu_seqlens_k
+ run_benchmark = (
+ (not torch.cuda.is_current_stream_capturing())
+ and os.getenv("HIP_DEBUG_BENCH", "0") == "1"
+ and (get_local_rank() == 0)
+ )
+
+ if run_benchmark:
+ start_event = torch.cuda.Event(True)
+ end_event = torch.cuda.Event(True)
+ start_event.record()
+
# Use Flash Attention for prefill
if not self.use_mla:
# Do multi-head attention
@@ -727,6 +791,9 @@ def forward_extend(
cu_seqlens_k = metadata.encoder_cu_seqlens_k
window_size = (-1, -1)
+ if key_cache.dtype == torch.float8_e4m3fn:
+ q = q.to(key_cache.dtype)
+
result = flash_attn_with_kvcache(
q=q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim),
k_cache=key_cache,
@@ -776,14 +843,13 @@ def forward_extend(
o = result
else:
if (
- not global_server_args_dict["disable_chunked_prefix_cache"]
- and forward_batch.attn_attend_prefix_cache is not None
+ forward_batch.attn_attend_prefix_cache is not None
and not forward_batch.forward_mode.is_target_verify()
and not forward_batch.forward_mode.is_draft_extend()
):
# Do multi-head attention with chunked prefix cache
-
if forward_batch.attn_attend_prefix_cache:
+ assert not global_server_args_dict["disable_chunked_prefix_cache"]
# MHA for chunked prefix kv cache when running model with MLA
assert forward_batch.prefix_chunk_idx is not None
assert forward_batch.prefix_chunk_cu_seq_lens is not None
@@ -792,7 +858,8 @@ def forward_extend(
chunk_idx = forward_batch.prefix_chunk_idx
assert chunk_idx >= 0
- output, lse, *rest = flash_attn_varlen_func(
+ assert forward_batch.mha_return_lse
+ output = flash_attn_varlen_func(
q=q.view(-1, layer.tp_q_head_num, layer.head_dim),
k=k.view(-1, layer.tp_k_head_num, layer.head_dim).to(q.dtype),
v=v.view(-1, layer.tp_k_head_num, layer.v_head_dim).to(q.dtype),
@@ -806,7 +873,7 @@ def forward_extend(
)
else:
# MHA for extend part of sequence without attending prefix kv cache
- output, lse, *rest = flash_attn_varlen_func(
+ output = flash_attn_varlen_func(
q=q.view(-1, layer.tp_q_head_num, layer.head_dim),
k=k.view(-1, layer.tp_k_head_num, layer.head_dim).to(q.dtype),
v=v.view(-1, layer.tp_k_head_num, layer.v_head_dim).to(q.dtype),
@@ -816,9 +883,28 @@ def forward_extend(
max_seqlen_k=metadata.max_seq_len_q,
softmax_scale=layer.scaling,
causal=True,
- return_softmax_lse=True,
+ return_softmax_lse=forward_batch.mha_return_lse,
+ )
+
+ if run_benchmark:
+ from hip_attn.v1_2.utils import capture
+
+ end_event.record()
+ end_event.synchronize()
+
+ elapsed = start_event.elapsed_time(end_event)
+ elapsed_layer = (time.time() - self._last_tick) * 1000
+ self._last_tick = time.time()
+ capture.report()
+ print(
+ f"[fa3] layer {layer.layer_id} took {elapsed:.2f} ms (from last tick: {elapsed_layer:.2f} ms)"
)
- return output, lse
+
+ if forward_batch.mha_return_lse:
+ output, lse, *rest = output
+ lse = torch.transpose(lse, 0, 1).contiguous()
+ return output, lse
+ return output
else:
# Do absorbed multi-latent attention
kv_cache = forward_batch.token_to_kv_pool.get_key_buffer(
@@ -893,6 +979,16 @@ def forward_extend(
else:
o = result
+ if run_benchmark:
+ from hip_attn.v1_2.utils import capture
+
+ end_event.record()
+ end_event.synchronize()
+
+ elapsed = start_event.elapsed_time(end_event)
+ capture.report()
+ print(f"[fa3] layer {layer.layer_id} took {elapsed:.2f} ms")
+
return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
def forward_decode(
@@ -1163,6 +1259,8 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
This creates fixed-size tensors that will be reused during CUDA graph replay
to avoid memory allocations.
"""
+ max_num_pages = (self.max_context_len + self.page_size - 1) // self.page_size
+
# This is being used by normal decode and draft decode when topk == 1
self.decode_cuda_graph_metadata = {
"cache_seqlens": torch.zeros(max_bs, dtype=torch.int32, device=self.device),
@@ -1174,13 +1272,7 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
),
"page_table": torch.zeros(
max_bs,
- (self.max_context_len + self.page_size - 1) // self.page_size,
- dtype=torch.int32,
- device=self.device,
- ),
- "page_table_draft_decode": torch.zeros(
- max_bs,
- (self.max_context_len + self.page_size - 1) // self.page_size,
+ max_num_pages,
dtype=torch.int32,
device=self.device,
),
@@ -1188,7 +1280,6 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
0, self.max_context_len, self.page_size, device=self.device
),
}
-
# Only allocate local attention buffers if local attention is enabled
# This prevents OOM errors when local attention is not being used
if self.attention_chunk_size is not None:
@@ -1274,6 +1365,14 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
self.speculative_num_draft_tokens is not None
and self.speculative_num_draft_tokens > 0
):
+ # "page_table_draft_decode" will be set only when spec decoding enabled to save memory
+ self.decode_cuda_graph_metadata["page_table_draft_decode"] = torch.zeros(
+ max_bs,
+ max_num_pages,
+ dtype=torch.int32,
+ device=self.device,
+ )
+
self.target_verify_metadata = {
"cache_seqlens": torch.zeros(
max_bs, dtype=torch.int32, device=self.device
@@ -1290,7 +1389,7 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
),
"page_table": torch.zeros(
max_bs,
- (self.max_context_len + self.page_size - 1) // self.page_size,
+ max_num_pages,
dtype=torch.int32,
device=self.device,
),
@@ -1313,7 +1412,7 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
),
"page_table": torch.zeros(
max_bs,
- (self.max_context_len + self.page_size - 1) // self.page_size,
+ max_num_pages,
dtype=torch.int32,
device=self.device,
),
@@ -1370,6 +1469,32 @@ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
),
}
+ if self.has_swa:
+ self.target_verify_metadata_topk_swa = {
+ "cache_seqlens": torch.zeros(
+ max_bs * self.speculative_num_draft_tokens,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "cu_seqlens_k": torch.zeros(
+ max_bs * self.speculative_num_draft_tokens + 1,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "cu_seqlens_q": torch.arange(
+ 0,
+ max_bs * self.speculative_num_draft_tokens + 1,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "page_table": torch.zeros(
+ max_bs * self.speculative_num_draft_tokens,
+ self.max_context_len,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ }
+
self.encoder_metadata = {
"encoder_page_table": torch.zeros(
max_bs,
@@ -1557,6 +1682,28 @@ def init_forward_metadata_capture_cuda_graph(
self.target_verify_metadata_topk_normal[bs] = metadata
self.target_verify_metadata_topk_expand[bs] = metadata_expand
+
+ if self.has_swa:
+ metadata_swa = FlashAttentionMetadata()
+ metadata_swa.cache_seqlens_int32 = (
+ self.target_verify_metadata_topk_swa["cache_seqlens"][
+ : bs * self.speculative_num_draft_tokens
+ ]
+ )
+ metadata_swa.max_seq_len_q = 1
+ metadata_swa.cu_seqlens_q = self.target_verify_metadata_topk_swa[
+ "cu_seqlens_q"
+ ][: bs * self.speculative_num_draft_tokens + 1]
+ metadata_swa.cu_seqlens_k = self.target_verify_metadata_topk_swa[
+ "cu_seqlens_k"
+ ][: bs * self.speculative_num_draft_tokens + 1]
+
+ metadata_swa.page_table = self.target_verify_metadata_topk_swa[
+ "page_table"
+ ][: bs * self.speculative_num_draft_tokens]
+ self.target_verify_metadata_topk_swa[bs] = metadata_swa
+ metadata.swa_spec_metadata = metadata_swa
+
elif forward_mode.is_draft_extend():
metadata.cache_seqlens_int32 = self.draft_extend_metadata["cache_seqlens"][
:bs
@@ -1797,6 +1944,12 @@ def init_forward_metadata_replay_cuda_graph(
)
)
+ if self.has_swa:
+ metadata_swa = self.target_verify_metadata_topk_swa[bs]
+ self._init_sliding_window_attn_spec_metadata(
+ metadata, metadata_expand, metadata_swa
+ )
+
elif forward_mode.is_draft_extend():
metadata = self.draft_extend_metadata[bs]
metadata.cache_seqlens_int32.copy_(seq_lens)
@@ -2032,6 +2185,159 @@ def _update_local_attn_metadata_for_replay(
lam.local_max_query_len = int(seqlens_q_local_np.max())
lam.local_max_seq_len = int(seqlens_k_local_np.max())
+ def _init_sliding_window_attn_spec_metadata(
+ self,
+ metadata: FlashAttentionMetadata,
+ metadata_expand: FlashAttentionMetadata,
+ metadata_swa: Optional[FlashAttentionMetadata] = None,
+ ):
+ # TODO: support page_size > 1 for swa spec
+ assert (
+ self.page_size == 1
+ ), "FlashAttention backend doesn't support topk > 1 speculative decoding with page size > 1 sliding window attention"
+
+ cache_seqlens_int32 = (
+ metadata.cache_seqlens_int32.repeat_interleave(
+ self.speculative_num_draft_tokens
+ )
+ + metadata_expand.cache_seqlens_int32
+ )
+ cu_seqlens_k = torch.nn.functional.pad(
+ torch.cumsum(cache_seqlens_int32, dim=0, dtype=torch.int32), (1, 0)
+ )
+ bs = cache_seqlens_int32.shape[0]
+ page_table = (
+ metadata.page_table.new_zeros(
+ (bs, metadata.max_seq_len_k + metadata_expand.page_table.shape[1])
+ )
+ if metadata_swa is None
+ else metadata_swa.page_table
+ )
+
+ prepare_swa_spec_page_table_triton(
+ page_table,
+ metadata.page_table,
+ metadata_expand.page_table,
+ metadata.cache_seqlens_int32,
+ metadata_expand.cache_seqlens_int32,
+ self.speculative_num_draft_tokens,
+ )
+
+ if metadata_swa is None:
+ metadata_swa = FlashAttentionMetadata()
+ metadata_swa.max_seq_len_q = 1
+ metadata_swa.cu_seqlens_q = metadata_expand.cu_seqlens_q
+ metadata_swa.cache_seqlens_int32 = cache_seqlens_int32
+ metadata_swa.cu_seqlens_k = cu_seqlens_k
+ metadata_swa.page_table = page_table
+ else:
+ metadata_swa.cache_seqlens_int32.copy_(cache_seqlens_int32)
+ metadata_swa.cu_seqlens_k.copy_(cu_seqlens_k)
+
+ metadata.swa_spec_metadata = metadata_swa
+
+
+@triton.jit
+def _prepare_swa_spec_page_table_kernel(
+ dst_ptr,
+ src_a_ptr,
+ src_b_ptr,
+ seq_len_a_ptr,
+ seq_len_b_ptr,
+ dst_stride_m,
+ dst_stride_n,
+ a_stride_m,
+ a_stride_n,
+ b_stride_m,
+ b_stride_n,
+ LEN_A: tl.constexpr,
+ LEN_B: tl.constexpr,
+ REPEAT_STEP: tl.constexpr,
+ BLOCK_N: tl.constexpr,
+):
+ pid_m = tl.program_id(0)
+ pid_n = tl.program_id(1)
+
+ idx_a = pid_m // REPEAT_STEP
+ idx_b = pid_m
+ seq_len_a = tl.load(seq_len_a_ptr + idx_a)
+ seq_len_b = tl.load(seq_len_b_ptr + idx_b)
+
+ offs_n = pid_n * BLOCK_N + tl.arange(0, BLOCK_N)
+ total_len = seq_len_a + seq_len_b
+
+ if pid_n * BLOCK_N >= total_len:
+ return
+
+ mask = offs_n < total_len
+ dst = dst_ptr + pid_m * dst_stride_m + offs_n * dst_stride_n
+
+ if (pid_n + 1) * BLOCK_N < seq_len_a:
+ a_ptr = src_a_ptr + idx_a * a_stride_m + offs_n * a_stride_n
+ a_mask = mask & (offs_n < LEN_A)
+ val = tl.load(a_ptr, mask=a_mask, other=0)
+ tl.store(dst, val, mask=mask)
+ elif pid_n * BLOCK_N >= seq_len_a:
+ offs_b = offs_n - seq_len_a
+ b_ptr = src_b_ptr + idx_b * b_stride_m + offs_b * b_stride_n
+ b_mask = mask & (offs_b < LEN_B)
+ val = tl.load(b_ptr, mask=b_mask, other=0)
+ tl.store(dst, val, mask=mask)
+ else:
+ # mixed part
+ a_offs = offs_n
+ a_mask = (a_offs < seq_len_a) & (a_offs < LEN_A)
+ a_ptr = src_a_ptr + idx_a * a_stride_m + a_offs * a_stride_n
+ a_val = tl.load(a_ptr, mask=a_mask, other=0)
+
+ b_offs = offs_n - seq_len_a
+ b_mask = (b_offs >= 0) & (b_offs < seq_len_b) & (b_offs < LEN_B)
+ b_ptr = src_b_ptr + idx_b * b_stride_m + b_offs * b_stride_n
+ b_val = tl.load(b_ptr, mask=b_mask, other=0)
+
+ result = tl.where(offs_n < seq_len_a, a_val, b_val)
+ tl.store(dst, result, mask=mask)
+
+
+def prepare_swa_spec_page_table_triton(
+ page_table_dst: torch.Tensor,
+ page_table_a: torch.Tensor,
+ page_table_b: torch.Tensor, # expand page table
+ seq_len_a: torch.Tensor,
+ seq_len_b: torch.Tensor, # expand seq lens
+ speculative_num_draft_tokens: int,
+):
+ # concat page_table and expand page_table by kv seq length
+ bs = seq_len_a.numel()
+ bs_expand = seq_len_b.numel()
+ assert bs_expand == bs * speculative_num_draft_tokens
+
+ LEN_A = page_table_a.shape[1]
+ LEN_B = page_table_b.shape[1]
+ LEN_OUT = LEN_A + LEN_B
+ REPEAT_STEP = speculative_num_draft_tokens
+ BLOCK_N = 256
+
+ grid = (bs_expand, triton.cdiv(LEN_OUT, BLOCK_N))
+ _prepare_swa_spec_page_table_kernel[grid](
+ page_table_dst,
+ page_table_a,
+ page_table_b,
+ seq_len_a,
+ seq_len_b,
+ page_table_dst.stride(0),
+ page_table_dst.stride(1),
+ page_table_a.stride(0),
+ page_table_a.stride(1),
+ page_table_b.stride(0),
+ page_table_b.stride(1),
+ LEN_A=LEN_A,
+ LEN_B=LEN_B,
+ REPEAT_STEP=REPEAT_STEP,
+ BLOCK_N=BLOCK_N,
+ num_warps=4,
+ )
+
class FlashAttentionMultiStepBackend:
diff --git a/python/sglang/srt/layers/attention/flashinfer_backend.py b/python/sglang/srt/layers/attention/flashinfer_backend.py
index 00d09e69d09..a5b207c779d 100644
--- a/python/sglang/srt/layers/attention/flashinfer_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_backend.py
@@ -26,11 +26,14 @@
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
from sglang.srt.layers.dp_attention import get_attention_tp_size
from sglang.srt.layers.radix_attention import AttentionType
-from sglang.srt.layers.utils import is_sm100_supported
from sglang.srt.mem_cache.allocator import SWATokenToKVPoolAllocator
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
-from sglang.srt.utils import is_flashinfer_available, next_power_of_2
+from sglang.srt.utils import (
+ is_flashinfer_available,
+ is_sm100_supported,
+ next_power_of_2,
+)
if TYPE_CHECKING:
from sglang.srt.layers.radix_attention import RadixAttention
@@ -498,8 +501,9 @@ def forward_extend(
sm_scale=layer.scaling,
window_left=layer.sliding_window_size,
logits_soft_cap=logits_soft_cap,
- k_scale=layer.k_scale,
- v_scale=layer.v_scale,
+ # Must use _float to avoid device-to-host copy that breaks cuda graph capture.
+ k_scale=layer.k_scale_float,
+ v_scale=layer.v_scale_float,
)
else:
causal = True
@@ -577,8 +581,9 @@ def forward_decode(
forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id),
sm_scale=layer.scaling,
logits_soft_cap=layer.logit_cap,
- k_scale=layer.k_scale,
- v_scale=layer.v_scale,
+ # Must use _float to avoid device-to-host copy that breaks cuda graph capture.
+ k_scale=layer.k_scale_float,
+ v_scale=layer.v_scale_float,
)
return o.view(-1, layer.tp_q_head_num * layer.head_dim)
@@ -1263,11 +1268,12 @@ def should_use_tensor_core(
# Calculate GQA group size
gqa_group_size = num_attention_heads // num_kv_heads
- # Determine based on dtype and GQA group size
+ # For Flashinfer, a GQA group size of at least 4 is needed to efficiently
+ # use Tensor Cores, as it fuses the head group with the token dimension in MMA.
if kv_cache_dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
return True
elif kv_cache_dtype in (torch.float16, torch.half, torch.bfloat16):
- return gqa_group_size > 4
+ return gqa_group_size >= 4
else:
return False
@@ -1372,7 +1378,14 @@ def fast_decode_plan(
if self.use_tensor_cores:
# ALSO convert last_page_len to CPU
- last_page_len_host = last_page_len.cpu()
+ if page_size == 1:
+ # When page size is 1, last_page_len is always 1.
+ # Directly construct the host tensor rather than executing a device-to-host copy.
+ last_page_len_host = torch.ones(
+ (batch_size,), dtype=torch.int32, device="cpu"
+ )
+ else:
+ last_page_len_host = last_page_len.cpu()
kv_lens_arr_host = get_seq_lens(indptr_host, last_page_len_host, page_size)
diff --git a/python/sglang/srt/layers/attention/flashinfer_mla_backend.py b/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
index 90576a17a15..05e9bef80c7 100644
--- a/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
@@ -28,11 +28,14 @@
create_flashinfer_kv_indices_triton,
)
from sglang.srt.layers.dp_attention import get_attention_tp_size
-from sglang.srt.layers.utils import is_sm100_supported
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
-from sglang.srt.utils import is_flashinfer_available, next_power_of_2
+from sglang.srt.utils import (
+ is_flashinfer_available,
+ is_sm100_supported,
+ next_power_of_2,
+)
if TYPE_CHECKING:
from sglang.srt.layers.radix_attention import RadixAttention
@@ -61,6 +64,117 @@ class PrefillMetadata:
global_workspace_buffer = None
+class FlashInferMhaChunkKVRunner:
+ def __init__(
+ self, model_runner: ModelRunner, attn_backend: "FlashInferMlaAttnBackend"
+ ):
+ # Parse Constants
+ self.num_local_heads = (
+ model_runner.model_config.num_attention_heads // get_attention_tp_size()
+ )
+ self.qk_nope_head_dim = model_runner.model_config.qk_nope_head_dim
+ self.qk_rope_head_dim = model_runner.model_config.qk_rope_head_dim
+ self.v_head_dim = model_runner.model_config.v_head_dim
+ self.data_type = model_runner.dtype
+ self.q_data_type = model_runner.dtype
+
+ # Buffers and wrappers
+ self.qo_indptr = attn_backend.qo_indptr
+ self.workspace_buffer = attn_backend.workspace_buffer
+ self.fmha_backend = attn_backend.fmha_backend
+
+ self.chunk_ragged_wrappers = []
+ self.ragged_wrapper = attn_backend.prefill_wrapper_ragged
+
+ def update_prefix_chunks(self, num_prefix_chunks: int):
+ while num_prefix_chunks > len(self.chunk_ragged_wrappers):
+ ragged_wrapper = BatchPrefillWithRaggedKVCacheWrapper(
+ self.workspace_buffer, "NHD", backend=self.fmha_backend
+ )
+ self.chunk_ragged_wrappers.append(ragged_wrapper)
+
+ def update_wrapper(
+ self,
+ forward_batch: ForwardBatch,
+ disable_flashinfer_ragged: bool = False,
+ ):
+ assert forward_batch.num_prefix_chunks is not None
+ num_prefix_chunks = forward_batch.num_prefix_chunks
+ self.update_prefix_chunks(num_prefix_chunks)
+
+ prefix_lens = forward_batch.extend_prefix_lens
+ seq_lens = forward_batch.seq_lens
+
+ bs = len(seq_lens)
+ qo_indptr = self.qo_indptr
+ qo_indptr[1 : bs + 1] = torch.cumsum(seq_lens - prefix_lens, dim=0)
+ qo_indptr = qo_indptr[: bs + 1]
+
+ for chunk_idx in range(forward_batch.num_prefix_chunks):
+ # MHA for chunked prefix kv cache when running model with MLA
+ assert forward_batch.prefix_chunk_idx is not None
+ assert forward_batch.prefix_chunk_cu_seq_lens is not None
+ assert forward_batch.prefix_chunk_max_seq_lens is not None
+
+ kv_indptr = forward_batch.prefix_chunk_cu_seq_lens[chunk_idx]
+ wrapper = self.chunk_ragged_wrappers[chunk_idx]
+ wrapper.begin_forward(
+ qo_indptr=qo_indptr,
+ kv_indptr=kv_indptr,
+ num_qo_heads=self.num_local_heads,
+ num_kv_heads=self.num_local_heads,
+ head_dim_qk=self.qk_nope_head_dim + self.qk_rope_head_dim,
+ head_dim_vo=self.v_head_dim,
+ q_data_type=self.q_data_type,
+ causal=False,
+ )
+ # ragged prefill
+ if not disable_flashinfer_ragged:
+ self.ragged_wrapper.begin_forward(
+ qo_indptr=qo_indptr,
+ kv_indptr=qo_indptr,
+ num_qo_heads=self.num_local_heads,
+ num_kv_heads=self.num_local_heads,
+ head_dim_qk=self.qk_nope_head_dim + self.qk_rope_head_dim,
+ head_dim_vo=self.v_head_dim,
+ q_data_type=self.q_data_type,
+ causal=True,
+ )
+
+ def forward(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ ):
+ logits_soft_cap = layer.logit_cap
+ if forward_batch.attn_attend_prefix_cache:
+ chunk_idx = forward_batch.prefix_chunk_idx
+ assert chunk_idx >= 0
+ wrapper = self.chunk_ragged_wrappers[chunk_idx]
+ o1, s1 = wrapper.forward_return_lse(
+ q.view(-1, layer.tp_q_head_num, layer.head_dim),
+ k.view(-1, layer.tp_k_head_num, layer.head_dim).to(q.dtype),
+ v.view(-1, layer.tp_v_head_num, layer.v_head_dim).to(q.dtype),
+ causal=False,
+ sm_scale=layer.scaling,
+ logits_soft_cap=logits_soft_cap,
+ )
+ else:
+ o1, s1 = self.ragged_wrapper.forward_return_lse(
+ q.view(-1, layer.tp_q_head_num, layer.head_dim),
+ k.view(-1, layer.tp_k_head_num, layer.head_dim).to(q.dtype),
+ v.view(-1, layer.tp_v_head_num, layer.v_head_dim).to(q.dtype),
+ causal=True,
+ sm_scale=layer.scaling,
+ logits_soft_cap=logits_soft_cap,
+ )
+
+ return o1, s1
+
+
class FlashInferMLAAttnBackend(AttentionBackend):
"""Flashinfer attention kernels."""
@@ -77,6 +191,13 @@ def __init__(
self.max_context_len = model_runner.model_config.context_len
self.device = model_runner.device
self.skip_prefill = skip_prefill
+ self.enable_chunk_kv = (
+ not skip_prefill
+ and global_server_args_dict["disaggregation_mode"] != "decode"
+ and not global_server_args_dict["disable_chunked_prefix_cache"]
+ and not global_server_args_dict["flashinfer_mla_disable_ragged"]
+ )
+ self.page_size = model_runner.page_size
# Allocate buffers
global global_workspace_buffer
@@ -109,11 +230,11 @@ def __init__(
else:
self.q_indptr_decode = q_indptr_decode_buf
- fmha_backend = "auto"
+ self.fmha_backend = "auto"
if is_sm100_supported():
- fmha_backend = "cutlass"
+ self.fmha_backend = "cutlass"
self.prefill_wrapper_ragged = BatchPrefillWithRaggedKVCacheWrapper(
- self.workspace_buffer, "NHD", backend=fmha_backend
+ self.workspace_buffer, "NHD", backend=self.fmha_backend
)
if not self.skip_prefill:
@@ -137,6 +258,8 @@ def __init__(
self.indices_updater_prefill = FlashInferMLAIndicesUpdaterPrefill(
model_runner, self
)
+ if self.enable_chunk_kv:
+ self.mha_chunk_kv_cache = FlashInferMhaChunkKVRunner(model_runner, self)
self.indices_updater_decode = FlashInferMLAIndicesUpdaterDecode(
model_runner, self
@@ -370,6 +493,12 @@ def init_forward_metadata_replay_cuda_graph(
def get_cuda_graph_seq_len_fill_value(self):
return 1
+ def init_mha_chunk_metadata(
+ self, forward_batch: ForwardBatch, disable_flashinfer_ragged: bool = False
+ ):
+ """Init the metadata for a forward pass."""
+ self.mha_chunk_kv_cache.update_wrapper(forward_batch, disable_flashinfer_ragged)
+
def forward_extend(
self,
q: torch.Tensor,
@@ -381,6 +510,15 @@ def forward_extend(
q_rope: Optional[torch.Tensor] = None,
k_rope: Optional[torch.Tensor] = None,
):
+ if (
+ forward_batch.attn_attend_prefix_cache is not None
+ and forward_batch.mha_return_lse
+ ): # MHA Chunk
+ assert self.enable_chunk_kv
+ assert q_rope is None
+ assert k_rope is None
+ o1, s1 = self.mha_chunk_kv_cache.forward(q, k, v, layer, forward_batch)
+ return o1, s1
cache_loc = forward_batch.out_cache_loc
logits_soft_cap = layer.logit_cap
@@ -411,8 +549,8 @@ def forward_extend(
k = torch.cat([k, k_rope], dim=-1)
o = self.prefill_wrapper_ragged.forward(
qall,
- k.view(-1, layer.tp_k_head_num, layer.head_dim),
- v.view(-1, layer.tp_k_head_num, layer.v_head_dim),
+ k.view(-1, layer.tp_k_head_num, layer.head_dim).to(q.dtype),
+ v.view(-1, layer.tp_k_head_num, layer.v_head_dim).to(q.dtype),
causal=True,
sm_scale=layer.scaling,
logits_soft_cap=logits_soft_cap,
@@ -723,6 +861,7 @@ def call_begin_forward(
head_dim_qk=self.qk_nope_head_dim + self.qk_rope_head_dim,
head_dim_vo=self.v_head_dim,
q_data_type=self.q_data_type,
+ causal=True,
)
else:
# mla paged prefill
diff --git a/python/sglang/srt/layers/attention/hip_attention.py b/python/sglang/srt/layers/attention/hip_attention.py
new file mode 100644
index 00000000000..57b18ce2fba
--- /dev/null
+++ b/python/sglang/srt/layers/attention/hip_attention.py
@@ -0,0 +1,974 @@
+from __future__ import annotations
+
+import os
+
+"""
+HiP Attention Backend for SGLang
+https://arxiv.org/pdf/2406.09827
+"""
+
+import logging
+import time
+from typing import TYPE_CHECKING, Optional, Union
+
+import torch
+import triton
+
+from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
+from sglang.srt.managers.schedule_batch import global_server_args_dict
+from sglang.srt.mem_cache.hip_offload_kv_pool_mha import MHATokenToHiPOffloadKVPool
+
+if TYPE_CHECKING:
+ from hip_attn.v1_2 import HiPAttentionConfig
+ from sglang.srt.speculative.spec_info import SpecInfo
+
+ from sglang.srt.layers.radix_attention import RadixAttention
+ from sglang.srt.model_executor.model_runner import ModelRunner
+
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
+from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
+
+logger = logging.getLogger(__name__)
+
+try:
+ from sglang.srt.distributed import (
+ get_tensor_model_parallel_rank,
+ get_tensor_model_parallel_world_size,
+ model_parallel_is_initialized,
+ tensor_model_parallel_all_gather,
+ )
+
+ SGLANG_DIST_AVAILABLE = True
+except:
+ SGLANG_DIST_AVAILABLE = False
+
+
+def get_local_rank():
+ if SGLANG_DIST_AVAILABLE:
+ return (
+ get_tensor_model_parallel_rank() if model_parallel_is_initialized() else 0
+ )
+ else:
+ return 0
+
+
+from sgl_kernel.flash_attn import flash_attn_varlen_func, flash_attn_with_kvcache
+
+from sglang.srt.configs.model_config import AttentionArch
+from sglang.srt.layers.attention.flashattention_backend import (
+ FlashAttentionBackend,
+ FlashAttentionMetadata,
+)
+
+
+class HiPAttentionBackend(AttentionBackend):
+
+ def __init__(
+ self,
+ model_runner: ModelRunner,
+ skip_prefill: bool = False,
+ speculative_step_id=0,
+ topk=0,
+ speculative_num_steps=0,
+ ):
+ super().__init__()
+
+ from hip_attn.v1_2.paged_hip import PagedHiPStateful
+
+ self.use_mla = model_runner.model_config.attention_arch == AttentionArch.MLA
+ self.page_size = model_runner.page_size
+ assert self.page_size == 1
+
+ self.forward_paged_hip = PagedHiPStateful(
+ max_batch_size=32,
+ num_layers=model_runner.model_config.num_hidden_layers,
+ num_heads=model_runner.model_config.num_attention_heads
+ // model_runner.tp_size,
+ head_dim=(
+ model_runner.model_config.head_dim
+ if not hasattr(model_runner.model_config, "v_head_dim")
+ else model_runner.model_config.v_head_dim
+ ),
+ device=model_runner.device,
+ )
+
+ self.hip_config: HiPAttentionConfig = (
+ model_runner.server_args.hip_attention_config
+ )
+ self.is_kv_cache_offload_enabled = (
+ model_runner.server_args.enable_hip_kv_cache_offload
+ )
+
+ self.max_context_len = model_runner.model_config.context_len
+
+ self.tp_rank = model_runner.tp_rank
+
+ self.attention_chunk_size = model_runner.attention_chunk_size
+
+ self.flashattention_backend = FlashAttentionBackend(
+ model_runner=model_runner,
+ skip_prefill=skip_prefill,
+ speculative_step_id=speculative_step_id,
+ topk=topk,
+ speculative_num_steps=speculative_num_steps,
+ )
+
+ self._last_tick = time.time()
+
+ self._block_table: torch.Tensor = None
+
+ def init_forward_metadata(self, forward_batch: ForwardBatch):
+ _table = self.flashattention_backend.req_to_token.index_select(
+ dim=0, index=forward_batch.req_pool_indices
+ )
+
+ if self._block_table is not None:
+ # NOTE FIXME BUG Disable cuda graph make this line bugged.
+ self._block_table[: _table.shape[0]] = (
+ _table # if this line bugged, no --disable-cuda-graph
+ )
+ else:
+ self._block_table = _table
+
+ self.flashattention_backend.init_forward_metadata(forward_batch=forward_batch)
+
+ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
+ self._block_table = torch.zeros(
+ max_bs,
+ (self.max_context_len + self.page_size - 1) // self.page_size + 4,
+ dtype=torch.int32,
+ device=self.flashattention_backend.device,
+ )
+
+ self.flashattention_backend.init_cuda_graph_state(
+ max_bs=max_bs,
+ max_num_tokens=max_num_tokens,
+ )
+
+ def init_forward_metadata_capture_cuda_graph(
+ self,
+ bs: int,
+ num_tokens: int,
+ req_pool_indices: torch.Tensor,
+ seq_lens: torch.Tensor,
+ encoder_lens: Optional[torch.Tensor],
+ forward_mode: ForwardMode,
+ spec_info: Optional[SpecInfo],
+ ):
+ _table = self.flashattention_backend.req_to_token.index_select(
+ dim=0, index=req_pool_indices
+ )
+ self._block_table[: _table.shape[0]] = _table
+
+ self.flashattention_backend.init_forward_metadata_capture_cuda_graph(
+ bs=bs,
+ num_tokens=num_tokens,
+ req_pool_indices=req_pool_indices,
+ seq_lens=seq_lens,
+ encoder_lens=encoder_lens,
+ forward_mode=forward_mode,
+ spec_info=spec_info,
+ )
+
+ def init_forward_metadata_replay_cuda_graph(
+ self,
+ bs: int,
+ req_pool_indices: torch.Tensor,
+ seq_lens: torch.Tensor,
+ seq_lens_sum: int,
+ encoder_lens: Optional[torch.Tensor],
+ forward_mode: ForwardMode,
+ spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
+ seq_lens_cpu: Optional[torch.Tensor],
+ out_cache_loc: torch.Tensor = None,
+ ):
+ _table = self.flashattention_backend.req_to_token.index_select(
+ dim=0, index=req_pool_indices
+ )
+ self._block_table[: _table.shape[0]] = _table
+
+ self.flashattention_backend.init_forward_metadata_replay_cuda_graph(
+ bs=bs,
+ req_pool_indices=req_pool_indices,
+ seq_lens=seq_lens,
+ seq_lens_sum=seq_lens_sum,
+ encoder_lens=encoder_lens,
+ forward_mode=forward_mode,
+ spec_info=spec_info,
+ seq_lens_cpu=seq_lens_cpu,
+ out_cache_loc=out_cache_loc,
+ )
+
+ # print(seq_lens)
+ # cache_seqlens = seq_lens[:bs].to(torch.int32)
+ # print(cache_seqlens.shape)
+ # cu_seqlens_q = torch.arange(
+ # 0,
+ # bs + 1,
+ # 1,
+ # device=seq_lens.device,
+ # dtype=torch.int32
+ # )
+ # print(cu_seqlens_q.shape)
+ # cu_seqlens_k = cu_seqlens_q.clone()
+ # cu_seqlens_k[1:] = cache_seqlens.cumsum(-1)
+
+ # fa3_cache_seqlens=self.flashattention_backend.forward_metadata.cache_seqlens_int32[:bs]
+ # fa3_cu_seqlens_q=self.flashattention_backend.forward_metadata.cu_seqlens_q[:bs+1]
+ # fa3_cu_seqlens_k=self.flashattention_backend.forward_metadata.cu_seqlens_k[:bs+1]
+
+ # print(seq_lens[:bs], fa3_cache_seqlens, fa3_cu_seqlens_q, fa3_cu_seqlens_k)
+
+ # assert torch.all(fa3_cache_seqlens == cache_seqlens)
+ # assert torch.all(fa3_cu_seqlens_q == cu_seqlens_q)
+ # assert torch.all(fa3_cu_seqlens_k == cu_seqlens_k)
+
+ def get_cuda_graph_seq_len_fill_value(self):
+ assert self.flashattention_backend.get_cuda_graph_seq_len_fill_value() == 1
+ return max(1, self.max_context_len - 1)
+
+ def forward_extend(
+ self,
+ q,
+ k,
+ v,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache=True,
+ # For multi-head latent attention
+ q_rope: Optional[torch.Tensor] = None,
+ k_rope: Optional[torch.Tensor] = None,
+ sinks: Optional[torch.Tensor] = None,
+ ):
+ cache_loc = (
+ forward_batch.out_cache_loc
+ if not layer.is_cross_attention
+ else forward_batch.encoder_out_cache_loc
+ )
+
+ using_chunked_sw = False
+ sw_size = layer.sliding_window_size
+ if layer.use_irope:
+ using_chunked_sw = True
+ sw_size = self.attention_chunk_size
+
+ using_dense_prefill = os.getenv("HIP_DEBUG_USING_DENSE_PREFILL", "0") == "1"
+ using_dense_prefill = using_dense_prefill and (
+ layer.layer_id in self.hip_config.dense_layers
+ )
+
+ force_dense_decode = os.getenv("HIP_DEBUG_FORCE_DENSE_DECODE", "0") == "1"
+
+ delta_attention_args = os.getenv("HIP_DELTA_ATTENTION_ARGS", "")
+ delta_dense_decode = any(
+ ["dense_decode" == key for key in delta_attention_args.split("-")]
+ )
+
+ is_decode = False
+ need_dense_prefill = using_chunked_sw or using_dense_prefill
+ need_dense_decode = using_chunked_sw or delta_dense_decode
+
+ run_benchmark = (
+ (not torch.cuda.is_current_stream_capturing())
+ and os.getenv("HIP_DEBUG_BENCH", "0") == "1"
+ and (get_local_rank() == 0)
+ )
+
+ if run_benchmark:
+ start_event = torch.cuda.Event(True)
+ end_event = torch.cuda.Event(True)
+ start_event.record()
+
+ if (need_dense_prefill and (not is_decode)) or False:
+ return self.flashattention_backend.forward_extend(
+ q=q,
+ k=k,
+ v=v,
+ layer=layer,
+ forward_batch=forward_batch,
+ save_kv_cache=save_kv_cache,
+ # For multi-head latent attention
+ q_rope=q_rope,
+ k_rope=k_rope,
+ )
+ else:
+ if not self.is_kv_cache_offload_enabled:
+ if k is not None:
+ assert v is not None
+ if save_kv_cache:
+ if not self.use_mla:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, cache_loc, k, v
+ )
+ else:
+ forward_batch.token_to_kv_pool.set_mla_kv_buffer(
+ layer,
+ cache_loc,
+ k,
+ k_rope,
+ )
+
+ if not self.use_mla:
+ k_cache, v_cache = forward_batch.token_to_kv_pool.get_kv_buffer(
+ layer.layer_id
+ )
+ k_chunk = k.reshape(-1, layer.tp_k_head_num, layer.head_dim)
+ v_chunk = v.reshape(-1, layer.tp_v_head_num, layer.v_head_dim)
+ else:
+ kv_cache = forward_batch.token_to_kv_pool.get_key_buffer(
+ layer.layer_id
+ )
+
+ offload_cache = None
+ offloading_metadata = None
+
+ else: # Offloading enabled
+ assert not self.use_mla
+ assert isinstance(
+ forward_batch.token_to_kv_pool, MHATokenToHiPOffloadKVPool
+ )
+ if k is not None:
+ assert v is not None
+ if save_kv_cache:
+ if not self.use_mla:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer,
+ cache_loc,
+ k,
+ v,
+ async_copy=True,
+ push_to_gpu_cache=False,
+ )
+ else:
+ raise Exception()
+
+ k_cache = v_cache = offload_cache = None
+ k_chunk, v_chunk, offloading_metadata = (
+ forward_batch.token_to_kv_pool.get_fetched_prefix_kv_buffer(
+ layer_id=layer.layer_id,
+ extend_seq_lens=forward_batch.extend_seq_lens,
+ extend_seq_lens_cpu=forward_batch.extend_seq_lens_cpu,
+ cache_k=k,
+ cache_v=v,
+ )
+ )
+
+ # use_cascade_attn = (
+ # forward_batch.forward_mode.is_target_verify() and self.topk > 1
+ # )
+ use_cascade_attn = False
+
+ if not self.use_mla:
+ q_reshaped = q.reshape(-1, layer.tp_q_head_num, layer.head_dim)
+
+ o, _ = self.forward_paged_hip(
+ query=q_reshaped,
+ sm_scale=layer.scaling,
+ batch_size=forward_batch.batch_size,
+ k=k_chunk,
+ v=v_chunk,
+ k_cache=k_cache,
+ v_cache=v_cache,
+ offload_cache=offload_cache,
+ positions=forward_batch.positions,
+ seq_lens=forward_batch.seq_lens,
+ req_to_tokens=forward_batch.req_to_token_pool.req_to_token,
+ req_pool_indices=forward_batch.req_pool_indices,
+ block_table=self._block_table[: forward_batch.batch_size],
+ rope_cos=layer.rope_cos,
+ rope_sin=layer.rope_sin,
+ rope_range=layer.rope_range,
+ rope_is_neox_style=layer.rope_is_neox_style,
+ layer_id=layer.layer_id,
+ logit_cap=layer.logit_cap,
+ orig_context_len=layer.orig_context_len,
+ max_context_len=self.max_context_len,
+ extend_seq_lens=forward_batch.extend_seq_lens,
+ extend_seq_lens_cpu=forward_batch.extend_seq_lens_cpu,
+ extend_prefix_lens_cpu=forward_batch.extend_prefix_lens_cpu,
+ hip_config=self.hip_config,
+ is_kv_cache_offload_enabled=self.is_kv_cache_offload_enabled,
+ online_update_cache=(
+ forward_batch.token_to_kv_pool.is_online_cache_update_enabled()
+ if self.is_kv_cache_offload_enabled
+ else None
+ ),
+ is_decode=False,
+ offloading_metadata=offloading_metadata,
+ sliding_window_size=sw_size,
+ sliding_window_sink=sinks,
+ using_chunked_sliding_window=using_chunked_sw,
+ self_extend_scale=self.hip_config.self_extend_scale,
+ )
+ else:
+ if (
+ # not global_server_args_dict["disable_chunked_prefix_cache"]
+ # and forward_batch.attn_attend_prefix_cache is not None
+ # and not forward_batch.forward_mode.is_target_verify()
+ # and not forward_batch.forward_mode.is_draft_extend()
+ not global_server_args_dict["disable_chunked_prefix_cache"]
+ # and forward_batch.attn_attend_prefix_cache is not None
+ and forward_batch.forward_mode.is_extend()
+ and not forward_batch.forward_mode.is_target_verify()
+ and not forward_batch.forward_mode.is_draft_extend()
+ ):
+ # Do multi-head attention with chunked prefix cache
+
+ assert q.shape[0] == 1, f"{q.shape=}"
+ k_reshaped = k.reshape(1, -1, layer.tp_k_head_num, layer.head_dim)
+ v_reshaped = v.reshape(1, -1, layer.tp_v_head_num, layer.v_head_dim)
+
+ assert not use_cascade_attn
+
+ o, metadata = self.forward_paged_hip(
+ query=q,
+ sm_scale=layer.scaling,
+ batch_size=forward_batch.batch_size,
+ k=k_reshaped,
+ v=v_reshaped,
+ k_cache=None,
+ v_cache=None,
+ offload_cache=offload_cache,
+ positions=forward_batch.positions,
+ seq_lens=forward_batch.seq_lens,
+ req_to_tokens=forward_batch.req_to_token_pool.req_to_token,
+ req_pool_indices=forward_batch.req_pool_indices,
+ block_table=self._block_table[: forward_batch.batch_size],
+ rope_cos=layer.rope_cos,
+ rope_sin=layer.rope_sin,
+ rope_range=layer.rope_range,
+ rope_is_neox_style=layer.rope_is_neox_style,
+ layer_id=layer.layer_id,
+ logit_cap=layer.logit_cap,
+ orig_context_len=layer.orig_context_len,
+ max_context_len=self.max_context_len,
+ extend_seq_lens=forward_batch.extend_seq_lens,
+ extend_seq_lens_cpu=forward_batch.extend_seq_lens_cpu,
+ extend_prefix_lens_cpu=forward_batch.extend_prefix_lens_cpu,
+ hip_config=self.hip_config,
+ is_kv_cache_offload_enabled=self.is_kv_cache_offload_enabled,
+ cached_metadata=None,
+ online_update_cache=(
+ forward_batch.token_to_kv_pool.is_online_cache_update_enabled()
+ if self.is_kv_cache_offload_enabled
+ else None
+ ),
+ is_decode=False,
+ offloading_metadata=offloading_metadata,
+ sliding_window_size=sw_size,
+ sliding_window_sink=sinks,
+ using_chunked_sliding_window=using_chunked_sw,
+ self_extend_scale=self.hip_config.self_extend_scale,
+ )
+ else:
+ # Do absorbed multi-latent attention
+
+ require_metadata_checkout = False
+ if forward_batch.forward_mode.is_target_verify():
+ # NOTE: this condition will be graph captured.
+ metadata = forward_batch.hip_metadata_cache_pool.get_hip_metadata_cache(
+ layer.layer_id,
+ q.shape[0],
+ forward_batch.batch_size,
+ forward_batch.hip_metadata_cached_stages,
+ block_size_q=self.hip_config.block_sparse_block_size_q,
+ )
+ require_metadata_checkout = True
+ else:
+ metadata = None
+
+ kv_cache = forward_batch.token_to_kv_pool.get_key_buffer(
+ layer.layer_id
+ )
+ nope_dim = triton.next_power_of_2(kv_cache.shape[-1]) // 2
+ rope_dim = kv_cache.shape[-1] - nope_dim
+ # print(q.shape, kv_cache.shape, nope_dim, rope_dim)
+
+ kv_head = kv_cache.shape[-2]
+ q_head = q.shape[-2]
+
+ k_rope = kv_cache[..., nope_dim:]
+ c_kv = kv_cache[..., :nope_dim]
+ # k_rope_cache = k_rope.view(
+ # -1,
+ # self.page_size,
+ # layer.tp_k_head_num,
+ # layer.head_dim - layer.v_head_dim,
+ # )
+ c_kv_cache = c_kv.view(-1, self.page_size, kv_head, nope_dim)
+ if q_rope is not None:
+ q_nope = q.view(-1, q_head, nope_dim)
+ q_rope = q_rope.view(-1, q_head, rope_dim)
+ else:
+ q_all = q.contiguous().view(-1, q_head, nope_dim + rope_dim)
+ q_nope = q_all[:, :, :nope_dim]
+ q_rope = q_all[:, :, nope_dim:]
+
+ assert q_nope.shape[-1] == layer.rope_range[0]
+ assert (q_rope.shape[-1] + q_nope.shape[-1]) == layer.rope_range[1]
+ q_merged = torch.cat([q_nope, q_rope], dim=-1)
+ # TODO FIXME
+ # k_cache = torch.cat([c_kv_cache, k_rope_cache], dim=-1)
+ k_cache = kv_cache
+ v_cache = c_kv_cache
+
+ if sinks is not None:
+ if forward_batch.forward_mode.is_draft_extend():
+ sw_size = 512
+ sw_sink = 128
+ else:
+ sw_sink = -1
+ else:
+ sw_sink = sinks
+
+ # print(q_merged.shape, k_cache.shape, v_cache.shape, sw_sink, sw_size)
+
+ o, metadata = self.forward_paged_hip(
+ query=q_merged,
+ sm_scale=layer.scaling,
+ batch_size=forward_batch.batch_size,
+ k=None,
+ v=None,
+ k_cache=k_cache,
+ v_cache=v_cache,
+ offload_cache=offload_cache,
+ positions=forward_batch.positions,
+ seq_lens=forward_batch.seq_lens,
+ req_to_tokens=forward_batch.req_to_token_pool.req_to_token,
+ req_pool_indices=forward_batch.req_pool_indices,
+ block_table=self._block_table[: forward_batch.batch_size],
+ rope_cos=layer.rope_cos,
+ rope_sin=layer.rope_sin,
+ rope_range=layer.rope_range,
+ rope_is_neox_style=layer.rope_is_neox_style,
+ layer_id=layer.layer_id,
+ logit_cap=layer.logit_cap,
+ orig_context_len=layer.orig_context_len,
+ max_context_len=self.max_context_len,
+ hip_config=self.hip_config,
+ is_kv_cache_offload_enabled=self.is_kv_cache_offload_enabled,
+ cached_metadata=metadata,
+ online_update_cache=(
+ forward_batch.token_to_kv_pool.is_online_cache_update_enabled()
+ if self.is_kv_cache_offload_enabled
+ else None
+ ),
+ is_decode=True,
+ offloading_metadata=offloading_metadata,
+ sliding_window_size=sw_size,
+ sliding_window_sink=sw_sink,
+ using_chunked_sliding_window=using_chunked_sw,
+ self_extend_scale=self.hip_config.self_extend_scale,
+ )
+
+ if require_metadata_checkout and (metadata is not None):
+ forward_batch.hip_metadata_cache_pool.set_hip_metadata_cache(
+ layer_id=layer.layer_id,
+ tdst=q.shape[0],
+ batch_size=forward_batch.batch_size,
+ metadata=metadata,
+ block_size_q=self.hip_config.block_sparse_block_size_q,
+ cached_stages=forward_batch.hip_metadata_cached_stages,
+ )
+
+ if self.is_kv_cache_offload_enabled:
+ offload_cache.handle_cache_miss(metadata)
+
+ if run_benchmark:
+ from hip_attn.v1_2.utils import capture
+
+ end_event.record()
+ end_event.synchronize()
+
+ elapsed = start_event.elapsed_time(end_event)
+ elapsed_layer = (time.time() - self._last_tick) * 1000
+ self._last_tick = time.time()
+ capture.report()
+ print(
+ f"[hip] layer {layer.layer_id} took {elapsed:.2f} ms (from last tick: {elapsed_layer:.2f} ms)"
+ )
+
+ return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
+
+ def forward_decode(
+ self,
+ q,
+ k,
+ v,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache=True,
+ # For multi-head latent attention
+ q_rope: Optional[torch.Tensor] = None,
+ k_rope: Optional[torch.Tensor] = None,
+ sinks: Optional[torch.Tensor] = None,
+ ):
+ cache_loc = (
+ forward_batch.out_cache_loc
+ if not layer.is_cross_attention
+ else forward_batch.encoder_out_cache_loc
+ )
+
+ using_chunked_sw = False
+ sw_size = layer.sliding_window_size
+ if layer.use_irope:
+ using_chunked_sw = True
+ sw_size = self.attention_chunk_size
+
+ using_dense_prefill = os.getenv("HIP_DEBUG_USING_DENSE_PREFILL", "0") == "1"
+ using_dense_prefill = using_dense_prefill and (
+ layer.layer_id in self.hip_config.dense_layers
+ )
+
+ force_dense_decode = os.getenv("HIP_DEBUG_FORCE_DENSE_DECODE", "0") == "1"
+
+ delta_attention_args = os.getenv("HIP_DELTA_ATTENTION_ARGS", "")
+ delta_dense_decode = any(
+ ["dense_decode" == key for key in delta_attention_args.split("-")]
+ )
+
+ is_decode = False
+ need_dense_prefill = using_chunked_sw or using_dense_prefill
+ need_dense_decode = using_chunked_sw or delta_dense_decode or force_dense_decode
+
+ if need_dense_decode and False:
+ o = self.flashattention_backend.forward_decode(
+ q=q,
+ k=k,
+ v=v,
+ layer=layer,
+ forward_batch=forward_batch,
+ save_kv_cache=save_kv_cache,
+ # For multi-head latent attention
+ q_rope=q_rope,
+ k_rope=k_rope,
+ )
+ else:
+ if (forward_batch.hip_metadata_cache_pool is not None) and (
+ not delta_dense_decode
+ ):
+ metadata = forward_batch.hip_metadata_cache_pool.get_hip_metadata_cache(
+ layer.layer_id,
+ q.shape[0],
+ forward_batch.batch_size,
+ forward_batch.hip_metadata_cached_stages,
+ block_size_q=self.hip_config.block_sparse_block_size_q,
+ )
+ else:
+ metadata = None
+
+ if not self.is_kv_cache_offload_enabled:
+ if k is not None:
+ assert v is not None
+ if save_kv_cache:
+ if not self.use_mla:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer, cache_loc, k, v
+ )
+ else:
+ forward_batch.token_to_kv_pool.set_mla_kv_buffer(
+ layer,
+ cache_loc,
+ k,
+ k_rope,
+ )
+ if not self.use_mla:
+ k_cache, v_cache = forward_batch.token_to_kv_pool.get_kv_buffer(
+ layer.layer_id
+ )
+ else:
+ kv_cache = forward_batch.token_to_kv_pool.get_key_buffer(
+ layer.layer_id
+ )
+
+ offload_cache = offloading_metadata = None
+ else: # Offloading enabled
+ assert isinstance(
+ forward_batch.token_to_kv_pool, MHATokenToHiPOffloadKVPool
+ )
+ if k is not None:
+ assert v is not None
+ if save_kv_cache:
+ if not self.use_mla:
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ layer,
+ cache_loc,
+ k,
+ v,
+ async_copy=False,
+ push_to_gpu_cache=True,
+ )
+ else:
+ raise Exception()
+
+ k_cache = v_cache = None
+ offload_cache, offloading_metadata = (
+ forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id)
+ )
+
+ if layer.layer_id == 0:
+ self.cache_seqlens = (
+ forward_batch.positions.view(forward_batch.batch_size, -1)[:, -1]
+ + 1
+ ).to(torch.int32)
+ self.cu_seqlens_q = torch.arange(
+ 0,
+ forward_batch.batch_size + 1,
+ q.shape[0] // forward_batch.batch_size,
+ device=q.device,
+ dtype=torch.int32,
+ )
+ self.cu_seqlens_k = self.cu_seqlens_q.clone()
+ self.cu_seqlens_k[1:] = self.cache_seqlens.cumsum(-1)
+
+ if not self.use_mla:
+ k_descale = v_descale = None
+ if k_cache is not None:
+ if k_cache.dtype not in [
+ torch.float32,
+ torch.float16,
+ torch.bfloat16,
+ ]:
+ assert k_cache.dtype in (
+ torch.float8_e5m2,
+ torch.float8_e4m3fn,
+ ), k_cache.dtype
+ if layer.k_scale is not None:
+ descale_shape = (
+ forward_batch.batch_size,
+ layer.tp_k_head_num,
+ )
+ k_descale = layer.k_scale.expand(descale_shape)
+ v_descale = layer.v_scale.expand(descale_shape)
+ # q = q.to(k_cache.dtype)
+ # assert layer.k_scale is not None, "fp8 scale should be handled"
+
+ q_reshaped = q.reshape(-1, layer.tp_q_head_num, layer.head_dim)
+ k_reshaped = k.reshape(-1, layer.tp_k_head_num, layer.head_dim)
+ v_reshaped = v.reshape(-1, layer.tp_v_head_num, layer.v_head_dim)
+
+ # fa3_cache_seqlens=self.flashattention_backend.forward_metadata.cache_seqlens_int32
+ # fa3_cu_seqlens_q=self.flashattention_backend.forward_metadata.cu_seqlens_q
+ # fa3_cu_seqlens_k=self.flashattention_backend.forward_metadata.cu_seqlens_k
+
+ # assert torch.all(fa3_cache_seqlens == cache_seqlens)
+ # assert torch.all(fa3_cu_seqlens_q == cu_seqlens_q)
+ # assert torch.all(fa3_cu_seqlens_k == cu_seqlens_k)
+
+ o, metadata = self.forward_paged_hip(
+ query=q_reshaped,
+ sm_scale=layer.scaling,
+ batch_size=forward_batch.batch_size,
+ k=k_reshaped,
+ v=v_reshaped,
+ k_cache=k_cache,
+ v_cache=v_cache,
+ offload_cache=offload_cache,
+ positions=forward_batch.positions,
+ seq_lens=forward_batch.seq_lens,
+ req_to_tokens=forward_batch.req_to_token_pool.req_to_token,
+ req_pool_indices=forward_batch.req_pool_indices,
+ block_table=self._block_table[: forward_batch.batch_size],
+ rope_cos=layer.rope_cos,
+ rope_sin=layer.rope_sin,
+ rope_range=layer.rope_range,
+ rope_is_neox_style=layer.rope_is_neox_style,
+ layer_id=layer.layer_id,
+ logit_cap=layer.logit_cap,
+ orig_context_len=layer.orig_context_len,
+ max_context_len=self.max_context_len,
+ hip_config=self.hip_config,
+ is_kv_cache_offload_enabled=self.is_kv_cache_offload_enabled,
+ cached_metadata=metadata,
+ online_update_cache=(
+ forward_batch.token_to_kv_pool.is_online_cache_update_enabled()
+ if self.is_kv_cache_offload_enabled
+ else None
+ ),
+ is_decode=True,
+ offloading_metadata=offloading_metadata,
+ sliding_window_size=sw_size,
+ sliding_window_sink=sinks,
+ using_chunked_sliding_window=using_chunked_sw,
+ k_descale=k_descale,
+ v_descale=v_descale,
+ # cache_seqlens=self.flashattention_backend.forward_metadata.cache_seqlens_int32[:forward_batch.batch_size],
+ # cu_seqlens_q=self.flashattention_backend.forward_metadata.cu_seqlens_k[:forward_batch.batch_size + 1],
+ # cu_seqlens_k=self.flashattention_backend.forward_metadata.cu_seqlens_q[:forward_batch.batch_size + 1],
+ cache_seqlens=self.cache_seqlens,
+ cu_seqlens_q=self.cu_seqlens_q,
+ cu_seqlens_k=self.cu_seqlens_k,
+ self_extend_scale=self.hip_config.self_extend_scale,
+ )
+ else:
+ if k_cache is not None:
+ if k_cache.dtype not in [
+ torch.float32,
+ torch.float16,
+ torch.bfloat16,
+ ]:
+ assert k_cache.dtype in (torch.float8_e5m2, torch.float8_e4m3fn)
+ assert layer.k_scale is not None, "fp8 scale should be handled"
+ # print(q.shape, k.shape, q_rope.shape, k_rope.shape)
+ # torch.Size([1, 16, 512]) torch.Size([1, 1, 512]) torch.Size([1, 16, 64]) torch.Size([1, 1, 64])
+
+ k_rope = kv_cache[:, :, layer.v_head_dim :]
+ c_kv = kv_cache[:, :, : layer.v_head_dim]
+ k_rope_cache = k_rope.view(
+ -1,
+ self.page_size,
+ layer.tp_k_head_num,
+ layer.head_dim - layer.v_head_dim,
+ )
+ c_kv_cache = c_kv.view(
+ -1, self.page_size, layer.tp_v_head_num, layer.v_head_dim
+ )
+
+ if q_rope is not None:
+ q_nope = q.view(-1, layer.tp_q_head_num, layer.v_head_dim)
+ q_rope = q_rope.view(
+ -1, layer.tp_q_head_num, layer.head_dim - layer.v_head_dim
+ )
+ else:
+ q_all = q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim)
+ q_nope = q_all[:, :, : layer.v_head_dim]
+ q_rope = q_all[:, :, layer.v_head_dim :]
+ max_seqlen_q = (
+ self.flashattention_backend.forward_metadata.max_seq_len_q
+ )
+
+ # print(q_rope.shape, k_rope_cache.shape, c_kv_cache.shape, q_nope.shape)
+ # torch.Size([1, 16, 64]) torch.Size([320001, 1, 1, 64]) torch.Size([320001, 1, 1, 512]) torch.Size([1, 16, 512])
+
+ assert q_nope.shape[-1] == layer.rope_range[0]
+ assert (q_rope.shape[-1] + q_nope.shape[-1]) == layer.rope_range[1]
+ q_merged = torch.cat([q_nope, q_rope], dim=-1)
+ # TODO FIXME
+ # k_cache = torch.cat([c_kv_cache, k_rope_cache], dim=-1)
+ k_cache = kv_cache
+ v_cache = c_kv_cache
+
+ o, metadata = self.forward_paged_hip(
+ query=q_merged,
+ sm_scale=layer.scaling,
+ batch_size=forward_batch.batch_size,
+ k=None,
+ v=None,
+ k_cache=k_cache,
+ v_cache=v_cache,
+ offload_cache=offload_cache,
+ positions=forward_batch.positions,
+ seq_lens=forward_batch.seq_lens,
+ req_to_tokens=forward_batch.req_to_token_pool.req_to_token,
+ req_pool_indices=forward_batch.req_pool_indices,
+ block_table=self._block_table[: forward_batch.batch_size],
+ rope_cos=layer.rope_cos,
+ rope_sin=layer.rope_sin,
+ rope_range=layer.rope_range,
+ rope_is_neox_style=layer.rope_is_neox_style,
+ layer_id=layer.layer_id,
+ logit_cap=layer.logit_cap,
+ orig_context_len=layer.orig_context_len,
+ max_context_len=self.max_context_len,
+ hip_config=self.hip_config,
+ is_kv_cache_offload_enabled=self.is_kv_cache_offload_enabled,
+ cached_metadata=metadata,
+ online_update_cache=(
+ forward_batch.token_to_kv_pool.is_online_cache_update_enabled()
+ if self.is_kv_cache_offload_enabled
+ else None
+ ),
+ is_decode=True,
+ offloading_metadata=offloading_metadata,
+ sliding_window_size=sw_size,
+ sliding_window_sink=sinks,
+ using_chunked_sliding_window=using_chunked_sw,
+ cache_seqlens=self.cache_seqlens,
+ cu_seqlens_q=self.cu_seqlens_q,
+ cu_seqlens_k=self.cu_seqlens_k,
+ self_extend_scale=self.hip_config.self_extend_scale,
+ )
+
+ if (
+ (metadata is not None)
+ and (forward_batch.hip_metadata_cache_pool is not None)
+ and (not delta_dense_decode)
+ ):
+ forward_batch.hip_metadata_cache_pool.set_hip_metadata_cache(
+ layer_id=layer.layer_id,
+ tdst=q.shape[0],
+ batch_size=forward_batch.batch_size,
+ metadata=metadata,
+ block_size_q=self.hip_config.block_sparse_block_size_q,
+ cached_stages=forward_batch.hip_metadata_cached_stages,
+ )
+
+ if self.is_kv_cache_offload_enabled:
+ offload_cache.handle_cache_miss(metadata)
+
+ return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
+
+
+class HiPAttentionMultiStepBackend:
+
+ def __init__(
+ self, model_runner: ModelRunner, topk: int, speculative_num_steps: int
+ ):
+ self.model_runner = model_runner
+ self.topk = topk
+ self.speculative_num_steps = speculative_num_steps
+ self.attn_backends = []
+ for i in range(self.speculative_num_steps):
+ self.attn_backends.append(
+ HiPAttentionBackend(
+ model_runner,
+ speculative_step_id=i,
+ topk=self.topk,
+ speculative_num_steps=self.speculative_num_steps,
+ )
+ )
+
+ def init_forward_metadata(self, forward_batch: ForwardBatch):
+ for i in range(self.speculative_num_steps - 1):
+ self.attn_backends[i].init_forward_metadata(forward_batch)
+
+ def init_cuda_graph_state(self, max_bs: int):
+ for i in range(self.speculative_num_steps):
+ self.attn_backends[i].init_cuda_graph_state(max_bs)
+
+ def init_forward_metadata_capture_cuda_graph(
+ self,
+ forward_batch: ForwardBatch,
+ ):
+ assert forward_batch.spec_info is not None
+ assert isinstance(forward_batch.spec_info, EagleDraftInput)
+
+ for i in range(self.speculative_num_steps - 1):
+ self.attn_backends[i].init_forward_metadata_capture_cuda_graph(
+ forward_batch.batch_size,
+ forward_batch.batch_size * self.topk,
+ forward_batch.req_pool_indices,
+ forward_batch.seq_lens,
+ encoder_lens=forward_batch.encoder_lens,
+ forward_mode=ForwardMode.DECODE,
+ spec_info=forward_batch.spec_info,
+ )
+
+ def init_forward_metadata_replay_cuda_graph(
+ self, forward_batch: ForwardBatch, bs: int
+ ):
+ assert forward_batch.spec_info is not None
+ assert isinstance(forward_batch.spec_info, EagleDraftInput)
+
+ for i in range(self.speculative_num_steps - 1):
+ self.attn_backends[i].init_forward_metadata_replay_cuda_graph(
+ bs,
+ forward_batch.req_pool_indices,
+ forward_batch.seq_lens,
+ forward_batch.seq_lens_sum,
+ encoder_lens=forward_batch.encoder_lens,
+ forward_mode=ForwardMode.DECODE,
+ spec_info=forward_batch.spec_info,
+ seq_lens_cpu=forward_batch.seq_lens_cpu,
+ out_cache_loc=forward_batch.out_cache_loc,
+ )
diff --git a/python/sglang/srt/layers/attention/hybrid_attn_backend.py b/python/sglang/srt/layers/attention/hybrid_attn_backend.py
index b9f829e412f..580a977ec0a 100644
--- a/python/sglang/srt/layers/attention/hybrid_attn_backend.py
+++ b/python/sglang/srt/layers/attention/hybrid_attn_backend.py
@@ -5,6 +5,7 @@
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
+from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
@@ -12,19 +13,54 @@ class HybridAttnBackend(AttentionBackend):
"""Support different backends for prefill and decode."""
def __init__(
- self, prefill_backend: AttentionBackend, decode_backend: AttentionBackend
+ self,
+ model_runner: ModelRunner,
+ prefill_backend: AttentionBackend,
+ decode_backend: AttentionBackend,
):
+ self.model_runner = model_runner
self.prefill_backend = prefill_backend
self.decode_backend = decode_backend
- def init_forward_metadata(self, forward_batch: ForwardBatch):
- if forward_batch.forward_mode.is_decode():
- self.decode_backend.init_forward_metadata(forward_batch)
+ def _select_backend(self, forward_mode: ForwardMode) -> AttentionBackend:
+ """
+ Select the appropriate attention backend based on the forward mode.
+
+ Args:
+ forward_mode: The current forward mode indicating the operation type
+
+ Returns:
+ The selected attention backend (prefill or decode)
+
+ Note:
+ - decode_or_idle: Always uses decode backend
+ - target_verify or draft_extend: Uses decode backend if speculative_attention_mode is "decode", otherwise prefill backend
+ - prefill: Always uses prefill backend
+ """
+ if forward_mode.is_decode_or_idle():
+ return self.decode_backend
+ elif forward_mode.is_target_verify() or forward_mode.is_draft_extend():
+ return (
+ self.decode_backend
+ if self.model_runner.server_args.speculative_attention_mode == "decode"
+ else self.prefill_backend
+ )
else:
- self.prefill_backend.init_forward_metadata(forward_batch)
+ return self.prefill_backend
+
+ def init_forward_metadata(self, forward_batch: ForwardBatch):
+ backend = self._select_backend(forward_batch.forward_mode)
+ backend.init_forward_metadata(forward_batch)
def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
self.decode_backend.init_cuda_graph_state(max_bs, max_num_tokens)
+ if (
+ self.model_runner.server_args.speculative_algorithm is not None
+ and self.model_runner.server_args.speculative_attention_mode == "prefill"
+ ):
+ # When speculative decoding is enabled, we need to initialize the backend
+ # that will be used for target_verify.
+ self.prefill_backend.init_cuda_graph_state(max_bs, max_num_tokens)
def init_forward_metadata_capture_cuda_graph(
self,
@@ -36,7 +72,8 @@ def init_forward_metadata_capture_cuda_graph(
forward_mode: ForwardMode,
spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
):
- self.decode_backend.init_forward_metadata_capture_cuda_graph(
+ backend = self._select_backend(forward_mode)
+ backend.init_forward_metadata_capture_cuda_graph(
bs,
num_tokens,
req_pool_indices,
@@ -57,7 +94,8 @@ def init_forward_metadata_replay_cuda_graph(
spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
seq_lens_cpu: Optional[torch.Tensor],
):
- self.decode_backend.init_forward_metadata_replay_cuda_graph(
+ backend = self._select_backend(forward_mode)
+ backend.init_forward_metadata_replay_cuda_graph(
bs,
req_pool_indices,
seq_lens,
@@ -95,6 +133,7 @@ def forward_extend(
save_kv_cache: bool = True,
**kwargs,
):
- return self.prefill_backend.forward_extend(
+ backend = self._select_backend(forward_batch.forward_mode)
+ return backend.forward_extend(
q, k, v, layer, forward_batch, save_kv_cache, **kwargs
)
diff --git a/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
new file mode 100644
index 00000000000..a676573f255
--- /dev/null
+++ b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
@@ -0,0 +1,584 @@
+from dataclasses import astuple, dataclass
+from functools import lru_cache
+from typing import Optional, Union
+
+import torch
+import torch.nn.functional as F
+
+from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
+from sglang.srt.layers.attention.fla.chunk import chunk_gated_delta_rule
+from sglang.srt.layers.attention.fla.fused_recurrent import (
+ fused_recurrent_gated_delta_rule_update,
+)
+from sglang.srt.layers.attention.fla.fused_sigmoid_gating_recurrent import (
+ fused_sigmoid_gating_delta_rule_update,
+)
+from sglang.srt.layers.attention.mamba.causal_conv1d_triton import (
+ causal_conv1d_fn,
+ causal_conv1d_update,
+)
+from sglang.srt.layers.radix_attention import RadixAttention
+from sglang.srt.mem_cache.memory_pool import HybridReqToTokenPool
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
+from sglang.srt.model_executor.model_runner import ModelRunner
+from sglang.srt.models.qwen3_next import Qwen3HybridLinearDecoderLayer, fused_gdn_gating
+from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
+
+
+@dataclass
+class ForwardMetadata:
+ query_start_loc: Optional[torch.Tensor]
+ mamba_cache_indices: torch.Tensor
+
+
+class MambaAttnBackend(AttentionBackend):
+ """Attention backend using Mamba kernel."""
+
+ def __init__(self, model_runner: ModelRunner):
+ super().__init__()
+ self.pad_slot_id = -1 # Default pad slot id
+ self.device = model_runner.device
+ self.req_to_token_pool: HybridReqToTokenPool = model_runner.req_to_token_pool
+ self.forward_metadata: ForwardMetadata = None
+ self.state_indices_list = []
+ self.query_start_loc_list = []
+
+ @classmethod
+ @lru_cache(maxsize=128)
+ def _get_cached_arange(cls, bs: int, device_str: str) -> torch.Tensor:
+ """Cache torch.arange tensors for common batch sizes to avoid repeated allocation."""
+ device = torch.device(device_str)
+ return torch.arange(0, bs + 1, dtype=torch.int32, device=device)
+
+ def init_forward_metadata(self, forward_batch: ForwardBatch):
+ bs = forward_batch.batch_size
+ if forward_batch.forward_mode.is_decode_or_idle():
+ query_start_loc = self._get_cached_arange(bs, str(self.device))
+ elif forward_batch.forward_mode.is_extend():
+ if forward_batch.forward_mode.is_target_verify():
+ query_start_loc = torch.arange(
+ 0,
+ forward_batch.input_ids.shape[0] + 1,
+ step=forward_batch.spec_info.draft_token_num,
+ dtype=torch.int32,
+ device=forward_batch.input_ids.device,
+ )
+ else:
+ query_start_loc = torch.empty(
+ (bs + 1,), dtype=torch.int32, device=self.device
+ )
+ query_start_loc[:bs] = forward_batch.extend_start_loc
+ query_start_loc[bs] = (
+ forward_batch.extend_start_loc[-1]
+ + forward_batch.extend_seq_lens[-1]
+ )
+ else:
+ raise ValueError(f"Invalid forward mode: {forward_batch.forward_mode=}")
+ mamba_cache_indices = self.req_to_token_pool.get_mamba_indices(
+ forward_batch.req_pool_indices
+ )
+ self.forward_metadata = ForwardMetadata(
+ query_start_loc=query_start_loc,
+ mamba_cache_indices=mamba_cache_indices,
+ )
+
+ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
+ for i in range(max_bs):
+ self.state_indices_list.append(
+ torch.full((i + 1,), self.pad_slot_id, dtype=torch.int32, device="cuda")
+ )
+ self.query_start_loc_list.append(
+ torch.empty((i + 2,), dtype=torch.int32, device="cuda")
+ )
+
+ def init_forward_metadata_capture_cuda_graph(
+ self,
+ bs: int,
+ num_tokens: int,
+ req_pool_indices: torch.Tensor,
+ seq_lens: torch.Tensor,
+ encoder_lens: Optional[torch.Tensor],
+ forward_mode: ForwardMode,
+ spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
+ ):
+ if forward_mode.is_decode_or_idle():
+ self.query_start_loc_list[bs - 1].copy_(self._get_cached_arange(bs, "cuda"))
+ elif forward_mode.is_target_verify():
+ self.query_start_loc_list[bs - 1].copy_(
+ torch.arange(
+ 0,
+ bs * spec_info.draft_token_num + 1,
+ step=spec_info.draft_token_num,
+ dtype=torch.int32,
+ device="cuda",
+ )
+ )
+ else:
+ raise ValueError(f"Invalid forward mode: {forward_mode=}")
+ mamba_indices = self.req_to_token_pool.get_mamba_indices(req_pool_indices)
+ self.state_indices_list[bs - 1][: len(mamba_indices)].copy_(mamba_indices)
+ self.forward_metadata = ForwardMetadata(
+ query_start_loc=self.query_start_loc_list[bs - 1],
+ mamba_cache_indices=self.state_indices_list[bs - 1],
+ )
+
+ def init_forward_metadata_replay_cuda_graph(
+ self,
+ bs: int,
+ req_pool_indices: torch.Tensor,
+ seq_lens: torch.Tensor,
+ seq_lens_sum: int,
+ encoder_lens: Optional[torch.Tensor],
+ forward_mode: ForwardMode,
+ spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
+ seq_lens_cpu: Optional[torch.Tensor],
+ ):
+ num_padding = torch.count_nonzero(
+ seq_lens_cpu == self.get_cuda_graph_seq_len_fill_value()
+ )
+ # Make sure forward metadata is correctly handled for padding reqs
+ req_pool_indices[bs - num_padding :] = 0
+ mamba_indices = self.req_to_token_pool.get_mamba_indices(req_pool_indices)
+ mamba_indices[bs - num_padding :] = -1
+ self.state_indices_list[bs - 1][: len(mamba_indices)].copy_(mamba_indices)
+ if forward_mode.is_decode_or_idle():
+ self.query_start_loc_list[bs - 1].copy_(self._get_cached_arange(bs, "cuda"))
+ if num_padding > 0:
+ self.query_start_loc_list[bs - 1][bs - num_padding :] = bs - num_padding
+ elif forward_mode.is_target_verify():
+ self.query_start_loc_list[bs - 1].copy_(
+ torch.arange(
+ 0,
+ bs * spec_info.draft_token_num + 1,
+ step=spec_info.draft_token_num,
+ dtype=torch.int32,
+ device="cuda",
+ )
+ )
+ if num_padding > 0:
+ self.query_start_loc_list[bs - 1][bs - num_padding :] = (
+ bs - num_padding
+ ) * spec_info.draft_token_num
+ else:
+ raise ValueError(f"Invalid forward mode: {forward_mode=}")
+
+ self.forward_metadata = ForwardMetadata(
+ query_start_loc=self.query_start_loc_list[bs - 1],
+ mamba_cache_indices=self.state_indices_list[bs - 1],
+ )
+
+ def get_cuda_graph_seq_len_fill_value(self):
+ return 1 # Mamba attn does not use seq lens to index kv cache
+
+ def forward_decode(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ **kwargs,
+ ):
+ mixed_qkv = kwargs["mixed_qkv"]
+ conv_weights = kwargs["conv_weights"]
+ bias = kwargs["bias"]
+ activation = kwargs["activation"]
+ key_dim = kwargs["key_dim"]
+ value_dim = kwargs["value_dim"]
+ attn_tp_size = kwargs["attention_tp_size"]
+ head_k_dim = kwargs["head_k_dim"]
+ head_v_dim = kwargs["head_v_dim"]
+ a = kwargs["a"]
+ b = kwargs["b"]
+ A_log = kwargs["A_log"]
+ dt_bias = kwargs["dt_bias"]
+ layer_id = kwargs["layer_id"]
+
+ conv_states, ssm_states, *rest = self.req_to_token_pool.get_mamba_params(
+ layer_id
+ )
+ query_start_loc = self.forward_metadata.query_start_loc
+ cache_indices = self.forward_metadata.mamba_cache_indices
+
+ mixed_qkv = causal_conv1d_update(
+ mixed_qkv,
+ conv_states,
+ conv_weights,
+ bias,
+ activation,
+ conv_state_indices=cache_indices,
+ )
+
+ query, key, value = torch.split(
+ mixed_qkv,
+ [
+ key_dim // attn_tp_size,
+ key_dim // attn_tp_size,
+ value_dim // attn_tp_size,
+ ],
+ dim=-1,
+ )
+ # Reshape from [l, h*d] to [1, l, h, d]
+ seq_len = query.shape[0]
+ num_heads = query.shape[1] // head_k_dim
+ query = query.view(1, seq_len, num_heads, head_k_dim)
+ key = key.view(1, seq_len, num_heads, head_k_dim)
+ value = value.view(1, seq_len, value.shape[1] // head_v_dim, head_v_dim)
+
+ core_attn_out = fused_sigmoid_gating_delta_rule_update(
+ A_log=A_log,
+ dt_bias=dt_bias,
+ q=query,
+ k=key,
+ v=value,
+ a=a,
+ b=b,
+ initial_state_source=ssm_states,
+ initial_state_indices=cache_indices,
+ cu_seqlens=query_start_loc,
+ use_qk_l2norm_in_kernel=True,
+ softplus_beta=1.0,
+ softplus_threshold=20.0,
+ )
+
+ return core_attn_out
+
+ def forward_extend(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ **kwargs,
+ ):
+ mixed_qkv = kwargs["mixed_qkv"]
+ conv_weights = kwargs["conv_weights"]
+ bias = kwargs["bias"]
+ activation = kwargs["activation"]
+ key_dim = kwargs["key_dim"]
+ value_dim = kwargs["value_dim"]
+ attn_tp_size = kwargs["attention_tp_size"]
+ head_k_dim = kwargs["head_k_dim"]
+ head_v_dim = kwargs["head_v_dim"]
+ a = kwargs["a"]
+ b = kwargs["b"]
+ A_log = kwargs["A_log"]
+ dt_bias = kwargs["dt_bias"]
+ layer_id = kwargs["layer_id"]
+ seq_len = kwargs["seq_len"]
+
+ is_target_verify = forward_batch.forward_mode.is_target_verify()
+
+ query_start_loc = self.forward_metadata.query_start_loc
+ cache_indices = self.forward_metadata.mamba_cache_indices
+
+ if is_target_verify:
+ (
+ conv_states,
+ ssm_states,
+ intermediate_state_cache,
+ intermediate_conv_window_cache,
+ ) = self.req_to_token_pool.get_mamba_params(layer_id)
+ has_initial_states = torch.ones(
+ seq_len // forward_batch.spec_info.draft_token_num,
+ dtype=torch.bool,
+ device=forward_batch.input_ids.device,
+ )
+ conv_states_to_use = conv_states.clone()
+ else:
+ conv_states, ssm_states, *rest = self.req_to_token_pool.get_mamba_params(
+ layer_id
+ )
+ has_initial_states = forward_batch.extend_prefix_lens > 0
+ conv_states_to_use = conv_states
+
+ if is_target_verify:
+ batch_size = seq_len // forward_batch.spec_info.draft_token_num
+ draft_token_num = forward_batch.spec_info.draft_token_num
+ mixed_qkv_reshaped = (
+ mixed_qkv.view(batch_size, draft_token_num, -1)
+ .transpose(1, 2)
+ .contiguous()
+ )
+ mixed_qkv_processed = causal_conv1d_update(
+ mixed_qkv_reshaped,
+ conv_states_to_use,
+ conv_weights,
+ bias,
+ activation,
+ conv_state_indices=cache_indices[:batch_size],
+ intermediate_conv_window=intermediate_conv_window_cache,
+ )
+ mixed_qkv = (
+ mixed_qkv_processed.transpose(1, 2).contiguous().view(seq_len, -1)
+ )
+ else:
+ mixed_qkv = causal_conv1d_fn(
+ mixed_qkv.transpose(0, 1),
+ conv_weights,
+ bias,
+ activation=activation,
+ conv_states=conv_states_to_use,
+ has_initial_state=has_initial_states,
+ cache_indices=cache_indices,
+ query_start_loc=query_start_loc,
+ ).transpose(0, 1)[:seq_len]
+
+ key_split_dim = key_dim // attn_tp_size
+ value_split_dim = value_dim // attn_tp_size
+
+ query, key, value = torch.split(
+ mixed_qkv,
+ [key_split_dim, key_split_dim, value_split_dim],
+ dim=-1,
+ )
+
+ actual_seq_len = query.shape[0]
+ num_heads = query.shape[1] // head_k_dim
+ num_value_heads = value.shape[1] // head_v_dim
+
+ query = query.view(1, actual_seq_len, num_heads, head_k_dim)
+ key = key.view(1, actual_seq_len, num_heads, head_k_dim)
+ value = value.view(1, actual_seq_len, num_value_heads, head_v_dim)
+
+ beta = b.sigmoid()
+ g = fused_gdn_gating(A_log, a, dt_bias)
+
+ g = g.unsqueeze(0)
+ beta = beta.unsqueeze(0)
+
+ if is_target_verify:
+ core_attn_out = fused_recurrent_gated_delta_rule_update(
+ q=query,
+ k=key,
+ v=value,
+ g=g,
+ beta=beta,
+ initial_state_source=ssm_states,
+ initial_state_indices=cache_indices,
+ cu_seqlens=query_start_loc,
+ use_qk_l2norm_in_kernel=True,
+ disable_state_update=True,
+ intermediate_states_buffer=intermediate_state_cache,
+ cache_steps=forward_batch.spec_info.draft_token_num,
+ )
+ else:
+ recurrent_state = ssm_states[cache_indices]
+ core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
+ q=query,
+ k=key,
+ v=value,
+ g=g,
+ beta=beta,
+ initial_state=recurrent_state,
+ output_final_state=True,
+ cu_seqlens=query_start_loc,
+ head_first=False,
+ use_qk_l2norm_in_kernel=True,
+ )
+ last_recurrent_state = last_recurrent_state.to(ssm_states.dtype, copy=False)
+ ssm_states[cache_indices] = last_recurrent_state
+
+ return core_attn_out
+
+
+class HybridLinearAttnBackend(AttentionBackend):
+ """Support different backends for prefill and decode."""
+
+ def __init__(
+ self,
+ full_attn_backend: AttentionBackend,
+ linear_attn_backend: AttentionBackend,
+ full_attn_layers: list[int],
+ ):
+ self.full_attn_layers = full_attn_layers
+ self.attn_backend_list = [full_attn_backend, linear_attn_backend]
+
+ def init_forward_metadata(self, forward_batch: ForwardBatch):
+ for attn_backend in self.attn_backend_list:
+ attn_backend.init_forward_metadata(forward_batch)
+
+ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
+ for attn_backend in self.attn_backend_list:
+ attn_backend.init_cuda_graph_state(max_bs, max_num_tokens)
+
+ def init_forward_metadata_capture_cuda_graph(
+ self,
+ bs: int,
+ num_tokens: int,
+ req_pool_indices: torch.Tensor,
+ seq_lens: torch.Tensor,
+ encoder_lens: Optional[torch.Tensor],
+ forward_mode: ForwardMode,
+ spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
+ ):
+ for attn_backend in self.attn_backend_list:
+ attn_backend.init_forward_metadata_capture_cuda_graph(
+ bs,
+ num_tokens,
+ req_pool_indices,
+ seq_lens,
+ encoder_lens,
+ forward_mode,
+ spec_info,
+ )
+
+ def init_forward_metadata_replay_cuda_graph(
+ self,
+ bs: int,
+ req_pool_indices: torch.Tensor,
+ seq_lens: torch.Tensor,
+ seq_lens_sum: int,
+ encoder_lens: Optional[torch.Tensor],
+ forward_mode: ForwardMode,
+ spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
+ seq_lens_cpu: Optional[torch.Tensor],
+ ):
+ for attn_backend in self.attn_backend_list:
+ attn_backend.init_forward_metadata_replay_cuda_graph(
+ bs,
+ req_pool_indices,
+ seq_lens,
+ seq_lens_sum,
+ encoder_lens,
+ forward_mode,
+ spec_info,
+ seq_lens_cpu,
+ )
+
+ def get_cuda_graph_seq_len_fill_value(self):
+ return self.attn_backend_list[0].get_cuda_graph_seq_len_fill_value()
+
+ def forward_decode(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ **kwargs,
+ ):
+ layer_id = layer.layer_id if layer else kwargs["layer_id"]
+ if layer_id in self.full_attn_layers:
+ return self.attn_backend_list[0].forward_decode(
+ q, k, v, layer, forward_batch, save_kv_cache, **kwargs
+ )
+ return self.attn_backend_list[1].forward_decode(
+ q, k, v, layer, forward_batch, save_kv_cache, **kwargs
+ )
+
+ def forward_extend(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ **kwargs,
+ ):
+ layer_id = layer.layer_id if layer else kwargs["layer_id"]
+ if layer_id in self.full_attn_layers:
+ return self.attn_backend_list[0].forward_extend(
+ q, k, v, layer, forward_batch, save_kv_cache, **kwargs
+ )
+ return self.attn_backend_list[1].forward_extend(
+ q, k, v, layer, forward_batch, save_kv_cache, **kwargs
+ )
+
+ def forward(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ **kwargs,
+ ):
+ """Run forward on an attention layer."""
+ if forward_batch.forward_mode.is_idle():
+ if layer is None:
+ return torch.empty_like(kwargs["z"])
+ return q.new_empty(q.shape[0], layer.tp_q_head_num * layer.v_head_dim)
+ elif forward_batch.forward_mode.is_decode():
+ return self.forward_decode(
+ q,
+ k,
+ v,
+ layer,
+ forward_batch,
+ save_kv_cache=save_kv_cache,
+ **kwargs,
+ )
+ else:
+ return self.forward_extend(
+ q,
+ k,
+ v,
+ layer,
+ forward_batch,
+ save_kv_cache=save_kv_cache,
+ **kwargs,
+ )
+
+ def update_mamba_state_after_mtp_verify(self, accepted_length, model):
+ request_number = accepted_length.shape[0]
+
+ state_indices_tensor = self.attn_backend_list[
+ 1
+ ].forward_metadata.mamba_cache_indices[:request_number]
+
+ mamba_caches = self.attn_backend_list[
+ 1
+ ].req_to_token_pool.get_mamba_params_all_layers()
+
+ (
+ conv_states,
+ ssm_states,
+ intermediate_state_cache,
+ intermediate_conv_window_cache,
+ ) = mamba_caches
+
+ # SSM state updates (chunked to reduce peak memory)
+ valid_mask = accepted_length > 0
+
+ # Compute common indices once to avoid duplication
+ last_steps_all = (accepted_length - 1).to(torch.int64)
+ valid_state_indices = state_indices_tensor[valid_mask].to(torch.int64)
+ last_steps = last_steps_all[valid_mask].to(torch.int64)
+
+ if valid_state_indices.numel() > 0:
+ chunk = 256
+ num_valid = valid_state_indices.numel()
+
+ # SSM state updates
+ for i in range(0, num_valid, chunk):
+ idx = valid_state_indices[i : i + chunk]
+ steps = last_steps[i : i + chunk]
+ # per (cache line, step)
+ for j in range(idx.numel()):
+ ci = idx[j].item()
+ st = steps[j].item()
+ ssm_states[:, ci, :].copy_(
+ intermediate_state_cache[:, ci, st].to(
+ ssm_states.dtype, copy=False
+ )
+ )
+
+ # Conv window updates
+ for i in range(0, num_valid, chunk):
+ idx = valid_state_indices[i : i + chunk]
+ steps = last_steps[i : i + chunk]
+ for j in range(idx.numel()):
+ ci = idx[j].item()
+ st = steps[j].item()
+ conv_states[:, ci, :, :].copy_(
+ intermediate_conv_window_cache[:, ci, st].to(
+ conv_states.dtype, copy=False
+ )
+ )
diff --git a/python/sglang/srt/layers/attention/intel_amx_backend.py b/python/sglang/srt/layers/attention/intel_amx_backend.py
index 9f2f7ece4d8..39e5c7428ad 100644
--- a/python/sglang/srt/layers/attention/intel_amx_backend.py
+++ b/python/sglang/srt/layers/attention/intel_amx_backend.py
@@ -49,6 +49,9 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
max_extend_len = torch.max(forward_batch.extend_seq_lens).item()
self.forward_metadata = (attn_logits, max_extend_len)
+ def get_graph_seq_len_fill_value(self):
+ return 1
+
def forward_extend(
self,
q,
diff --git a/python/sglang/srt/layers/attention/mamba/causal_conv1d.py b/python/sglang/srt/layers/attention/mamba/causal_conv1d.py
new file mode 100644
index 00000000000..d004337ffa9
--- /dev/null
+++ b/python/sglang/srt/layers/attention/mamba/causal_conv1d.py
@@ -0,0 +1,128 @@
+# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
+# SPDX-License-Identifier: Apache-2.0
+
+# Copyright (c) 2024, Tri Dao.
+# Adapted from https://github.com/Dao-AILab/causal-conv1d/blob/main/causal_conv1d/causal_conv1d_interface.py
+
+from typing import Optional
+
+import torch
+from sgl_kernel import causal_conv1d_fwd
+from sgl_kernel import causal_conv1d_update as causal_conv1d_update_kernel
+
+PAD_SLOT_ID = -1
+
+
+def causal_conv1d_fn(
+ x: torch.Tensor,
+ weight: torch.Tensor,
+ bias: Optional[torch.Tensor] = None,
+ query_start_loc: Optional[torch.Tensor] = None,
+ cache_indices: Optional[torch.Tensor] = None,
+ has_initial_state: Optional[torch.Tensor] = None,
+ conv_states: Optional[torch.Tensor] = None,
+ activation: Optional[str] = "silu",
+ pad_slot_id: int = PAD_SLOT_ID,
+):
+ """
+ x: (batch, dim, seqlen) or (dim,cu_seq_len) for varlen
+ sequences are concatenated from left to right for varlen
+ weight: (dim, width)
+ bias: (dim,)
+ query_start_loc: (batch + 1) int32
+ The cumulative sequence lengths of the sequences in
+ the batch, used to index into sequence. prepended by 0.
+ for example: query_start_loc = torch.Tensor([0,10,16,17]),
+ x.shape=(dim,17)
+ cache_indices: (batch) int32
+ indicates the corresponding state index,
+ like so: conv_state = conv_states[cache_indices[batch_id]]
+ has_initial_state: (batch) bool
+ indicates whether should the kernel take the current state as initial
+ state for the calculations
+ conv_states: (...,dim,width - 1) itype
+ updated inplace if provided
+ activation: either None or "silu" or "swish"
+ pad_slot_id: int
+ if cache_indices is passed, lets the kernel identify padded
+ entries that will not be processed,
+ for example: cache_indices = [pad_slot_id, 1, 20, pad_slot_id]
+ in this case, the kernel will not process entries at
+ indices 0 and 3
+
+
+ out: (batch, dim, seqlen)
+ """
+ if activation not in [None, "silu", "swish"]:
+ raise NotImplementedError("activation must be None, silu, or swish")
+ if x.stride(-1) != 1:
+ x = x.contiguous()
+ bias = bias.contiguous() if bias is not None else None
+
+ causal_conv1d_fwd(
+ x,
+ weight,
+ bias,
+ conv_states,
+ query_start_loc,
+ cache_indices,
+ has_initial_state,
+ activation in ["silu", "swish"],
+ pad_slot_id,
+ )
+ return x
+
+
+def causal_conv1d_update(
+ x: torch.Tensor,
+ conv_state: torch.Tensor,
+ weight: torch.Tensor,
+ bias: Optional[torch.Tensor] = None,
+ activation: Optional[str] = None,
+ cache_seqlens: Optional[torch.Tensor] = None,
+ conv_state_indices: Optional[torch.Tensor] = None,
+ pad_slot_id: int = PAD_SLOT_ID,
+):
+ """
+ x: (batch, dim) or (batch, dim, seqlen)
+ conv_state: (batch, dim, state_len), where state_len >= width - 1
+ weight: (dim, width)
+ bias: (dim,)
+ cache_seqlens: (batch,), dtype int32.
+ If not None, the conv_state is treated as a circular buffer.
+ The conv_state will be updated by copying x to the conv_state
+ starting at the index
+ @cache_seqlens % state_len.
+ conv_state_indices: (batch,), dtype int32
+ If not None, the conv_state is a larger tensor along the batch dim,
+ and we are selecting the batch coords specified by conv_state_indices.
+ Useful for a continuous batching scenario.
+ pad_slot_id: int
+ if cache_indices is passed, lets the kernel identify padded
+ entries that will not be processed,
+ for example: cache_indices = [pad_slot_id, 1 ,20 ,pad_slot_id]
+ in this case, the kernel will not process entries at
+ indices 0 and 3
+ out: (batch, dim) or (batch, dim, seqlen)
+ """
+ if activation not in [None, "silu", "swish"]:
+ raise NotImplementedError(
+ f"activation must be None, silu, or swish, actual: {activation}"
+ )
+ activation_val = activation in ["silu", "swish"]
+ unsqueeze = x.dim() == 2
+ if unsqueeze:
+ x = x.unsqueeze(-1)
+ causal_conv1d_update_kernel(
+ x,
+ conv_state,
+ weight,
+ bias,
+ activation_val,
+ cache_seqlens,
+ conv_state_indices,
+ pad_slot_id,
+ )
+ if unsqueeze:
+ x = x.squeeze(-1)
+ return x
diff --git a/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py b/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py
new file mode 100644
index 00000000000..3c1bdec48d7
--- /dev/null
+++ b/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py
@@ -0,0 +1,1052 @@
+# Copyright (c) 2024, Tri Dao.
+# Adapted from https://github.com/Dao-AILab/causal-conv1d/blob/main/causal_conv1d/causal_conv1d_interface.py
+# and https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
+
+from typing import Optional, Union
+
+import numpy as np
+import torch
+
+PAD_SLOT_ID = -1
+import triton
+import triton.language as tl
+
+
+@triton.jit()
+def _causal_conv1d_fwd_kernel( # continuous batching
+ # Pointers to matrices
+ x_ptr, # (dim, cu_seqlen) holding `batch` of actual sequences + padded sequences
+ w_ptr, # (dim, width)
+ bias_ptr,
+ initial_states_ptr, # conv_states_ptr
+ cache_indices_ptr, # conv_state_indices_ptr
+ has_initial_states_ptr,
+ query_start_loc_ptr,
+ batch_ptr,
+ token_chunk_offset_ptr,
+ o_ptr, # (dim, seqlen) - actually pointing to x_ptr
+ # Matrix dimensions
+ batch: tl.int32, # actually padded_batch
+ dim: tl.constexpr,
+ seqlen: tl.int32, # cu_seqlen
+ num_cache_lines: tl.constexpr, # added to support vLLM larger cache lines
+ # Strides
+ stride_x_seq: tl.constexpr, # stride to get to next sequence,
+ stride_x_dim: tl.constexpr, # stride to get to next feature-value,
+ stride_x_token: tl.constexpr, # stride to get to next token (same feature-index, same sequence-index)
+ stride_w_dim: tl.constexpr, # stride to get to next dim-axis value
+ stride_w_width: tl.constexpr, # stride to get to next width-axis value
+ stride_istate_seq: tl.constexpr,
+ stride_istate_dim: tl.constexpr,
+ stride_istate_token: tl.constexpr,
+ stride_o_seq: tl.constexpr,
+ stride_o_dim: tl.constexpr,
+ stride_o_token: tl.constexpr,
+ # others
+ pad_slot_id: tl.constexpr,
+ # Meta-parameters
+ HAS_BIAS: tl.constexpr,
+ KERNEL_WIDTH: tl.constexpr,
+ SILU_ACTIVATION: tl.constexpr,
+ HAS_INITIAL_STATES: tl.constexpr,
+ HAS_CACHE: tl.constexpr,
+ IS_CONTINUOUS_BATCHING: tl.constexpr,
+ USE_PAD_SLOT: tl.constexpr,
+ NP2_STATELEN: tl.constexpr,
+ BLOCK_M: tl.constexpr,
+ BLOCK_N: tl.constexpr,
+):
+ conv_states_ptr = initial_states_ptr
+ conv_state_indices_ptr = cache_indices_ptr
+ stride_conv_state_seq = stride_istate_seq
+ stride_conv_state_dim = stride_istate_dim
+ stride_conv_state_tok = stride_istate_token
+ state_len = (
+ KERNEL_WIDTH - 1
+ ) # can be passed via argument if it's not the same as this value
+
+ # one program handles one chunk in a single sequence
+ # rather than mixing sequences - to make updating initial_states across sequences efficiently
+
+ # single-sequence id
+ idx_seq = tl.load(batch_ptr + tl.program_id(0))
+ chunk_offset = tl.load(token_chunk_offset_ptr + tl.program_id(0))
+
+ # BLOCK_N elements along the feature-dimension (channel)
+ idx_feats = tl.program_id(1) * BLOCK_N + tl.arange(0, BLOCK_N)
+
+ if idx_seq == pad_slot_id:
+ return
+
+ sequence_start_index = tl.load(query_start_loc_ptr + idx_seq)
+ sequence_end_index = tl.load(query_start_loc_ptr + idx_seq + 1)
+ # find the actual sequence length
+ seqlen = sequence_end_index - sequence_start_index
+
+ token_offset = BLOCK_M * chunk_offset
+ segment_len = min(BLOCK_M, seqlen - token_offset)
+
+ # base of the sequence
+ x_base = (
+ x_ptr + sequence_start_index * stride_x_token + idx_feats * stride_x_dim
+ ) # [BLOCK_N,]
+
+ if IS_CONTINUOUS_BATCHING:
+ # cache_idx
+ conv_state_batch_coord = tl.load(conv_state_indices_ptr + idx_seq).to(tl.int64)
+ else:
+ # cache_idx
+ conv_state_batch_coord = idx_seq
+ if USE_PAD_SLOT: # noqa
+ if conv_state_batch_coord == pad_slot_id:
+ # not processing as this is not the actual sequence
+ return
+ conv_states_base = (
+ conv_states_ptr
+ + (conv_state_batch_coord * stride_conv_state_seq)
+ + (idx_feats * stride_conv_state_dim)
+ ) # [BLOCK_N,]
+
+ w_base = w_ptr + (idx_feats * stride_w_dim) # [BLOCK_N,]
+
+ # Does 2 things:
+ # 1. READ prior-block init-state data - [done by every Triton programs]
+ # 2. update conv_state with new data [only by the Triton program handles chunk_offset=0]
+ if chunk_offset == 0:
+ # read from conv_states
+ load_init_state = False
+ if HAS_INITIAL_STATES: # the new HAS_INITIAL_STATES
+ load_init_state = tl.load(has_initial_states_ptr + idx_seq).to(tl.int1)
+ if load_init_state:
+ # load from conv_states
+ prior_tokens = conv_states_base + (state_len - 1) * stride_conv_state_tok
+ mask_w = idx_feats < dim
+ if KERNEL_WIDTH == 2:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ if KERNEL_WIDTH == 3:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ conv_states_ptrs = prior_tokens - 1 * stride_conv_state_tok # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ if KERNEL_WIDTH == 4:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col2 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ conv_states_ptrs = prior_tokens - 1 * stride_conv_state_tok # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ conv_states_ptrs = prior_tokens - 2 * stride_conv_state_tok # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ if KERNEL_WIDTH == 5:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col3 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ conv_states_ptrs = prior_tokens - 1 * stride_conv_state_tok # [BLOCK_N]
+ col2 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ conv_states_ptrs = prior_tokens - 2 * stride_conv_state_tok # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ conv_states_ptrs = prior_tokens - 3 * stride_conv_state_tok # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ else:
+ # prior-tokens are zeros
+ if KERNEL_WIDTH >= 2: # STRATEGY1
+ # first chunk and does not have prior-token, so just set to 0
+ col0 = tl.zeros((BLOCK_N,), dtype=x_ptr.dtype.element_ty)
+ if KERNEL_WIDTH >= 3: # STRATEGY1
+ col1 = tl.zeros((BLOCK_N,), dtype=x_ptr.dtype.element_ty)
+ if KERNEL_WIDTH >= 4: # STRATEGY1
+ col2 = tl.zeros((BLOCK_N,), dtype=x_ptr.dtype.element_ty)
+ if KERNEL_WIDTH >= 5: # STRATEGY1
+ col3 = tl.zeros((BLOCK_N,), dtype=x_ptr.dtype.element_ty)
+
+ # STEP 2:
+ # here prepare data for updating conv_state
+ if (
+ state_len <= seqlen
+ ): # SMALL_CACHE=True (only move part of 'x' into conv_state cache)
+ # just read from 'x'
+ # copy 'x' data to conv_state
+ # load only 'x' data (and set 0 before 'x' if seqlen < state_len)
+ idx_tokens_last = (seqlen - state_len) + tl.arange(
+ 0, NP2_STATELEN
+ ) # [BLOCK_M]
+ x_ptrs = (
+ x_ptr
+ + ((sequence_start_index + idx_tokens_last) * stride_x_token)[:, None]
+ + (idx_feats * stride_x_dim)[None, :]
+ ) # [BLOCK_M,BLOCK_N,]
+ mask_x = (
+ (idx_tokens_last >= 0)[:, None]
+ & (idx_tokens_last < seqlen)[:, None]
+ & (idx_feats < dim)[None, :]
+ ) # token-index # token-index # feature-index
+ loaded_x = tl.load(x_ptrs, mask_x, 0.0)
+ new_conv_state = tl.load(x_ptrs, mask_x, 0.0)
+ idx_tokens_conv = tl.arange(0, NP2_STATELEN) # [BLOCK_M]
+ conv_states_ptrs_target = (
+ conv_states_base[None, :]
+ + (idx_tokens_conv * stride_conv_state_tok)[:, None]
+ )
+
+ mask = (idx_tokens_conv < state_len)[:, None] & (idx_feats < dim)[None, :]
+ tl.debug_barrier() # NOTE: use this due to bug in Triton compiler
+ tl.store(conv_states_ptrs_target, new_conv_state, mask)
+
+ else:
+ if load_init_state:
+ # update conv_state by shifting left, i.e. take last few cols from conv_state + cols from 'x'
+ idx_tokens_conv = tl.arange(0, NP2_STATELEN) # [BLOCK_M]
+
+ conv_states_ptrs_source = (
+ conv_states_ptr
+ + (conv_state_batch_coord * stride_conv_state_seq)
+ + (idx_feats * stride_conv_state_dim)[None, :]
+ + ((idx_tokens_conv + seqlen) * stride_conv_state_tok)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+ mask = (
+ (conv_state_batch_coord < num_cache_lines)
+ & ((idx_tokens_conv + seqlen) < state_len)[:, None]
+ & (idx_feats < dim)[None, :]
+ )
+ conv_state = tl.load(conv_states_ptrs_source, mask, other=0.0)
+
+ VAL = state_len - seqlen
+
+ x_ptrs = (
+ x_base[None, :]
+ + ((idx_tokens_conv - VAL) * stride_x_token)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+
+ mask_x = (
+ (idx_tokens_conv - VAL >= 0)[:, None]
+ & (idx_tokens_conv - VAL < seqlen)[:, None]
+ & (idx_feats < dim)[None, :]
+ ) # token-index # token-index # feature-index
+ loaded_x = tl.load(x_ptrs, mask_x, 0.0)
+
+ tl.debug_barrier() # need this due to the bug in tl.where not enforcing this when data is the result of another tl.load
+ new_conv_state = tl.where(
+ mask, conv_state, loaded_x
+ ) # BUG in 'tl.where' which requires a barrier before this
+ conv_states_ptrs_target = (
+ conv_states_base
+ + (idx_tokens_conv * stride_conv_state_tok)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+ mask = (idx_tokens_conv < state_len)[:, None] & (idx_feats < dim)[
+ None, :
+ ]
+ tl.store(conv_states_ptrs_target, new_conv_state, mask)
+ else: # load_init_state == False
+ # update conv_state by shifting left, BUT
+ # set cols prior to 'x' as zeros + cols from 'x'
+ idx_tokens_conv = tl.arange(0, NP2_STATELEN) # [BLOCK_M]
+
+ VAL = state_len - seqlen
+
+ x_ptrs = (
+ x_base[None, :]
+ + ((idx_tokens_conv - VAL) * stride_x_token)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+
+ mask_x = (
+ (idx_tokens_conv - VAL >= 0)[:, None]
+ & (idx_tokens_conv - VAL < seqlen)[:, None]
+ & (idx_feats < dim)[None, :]
+ ) # token-index # token-index # feature-index
+ new_conv_state = tl.load(x_ptrs, mask_x, 0.0)
+
+ conv_states_ptrs_target = (
+ conv_states_base
+ + (idx_tokens_conv * stride_conv_state_tok)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+ mask = (idx_tokens_conv < state_len)[:, None] & (idx_feats < dim)[
+ None, :
+ ]
+ tl.store(conv_states_ptrs_target, new_conv_state, mask)
+
+ else: # chunk_offset > 0
+ # read prior-token data from `x`
+ load_init_state = True
+ prior_tokens = x_base + (token_offset - 1) * stride_x_token
+ mask_w = idx_feats < dim
+ if KERNEL_WIDTH == 2:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ if KERNEL_WIDTH == 3:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ conv_states_ptrs = prior_tokens - 1 * stride_x_token # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ if KERNEL_WIDTH == 4:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col2 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ conv_states_ptrs = prior_tokens - 1 * stride_x_token # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ conv_states_ptrs = prior_tokens - 2 * stride_x_token # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ if KERNEL_WIDTH == 5:
+ # ruff: noqa: F841
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col3 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ conv_states_ptrs = prior_tokens - 1 * stride_x_token # [BLOCK_N]
+ col2 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ conv_states_ptrs = prior_tokens - 2 * stride_x_token # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+ conv_states_ptrs = prior_tokens - 3 * stride_x_token # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0, cache_modifier=".ca")
+
+ if HAS_BIAS:
+ bias = bias_ptr + idx_feats
+ mask_bias = idx_feats < dim
+ acc_preload = tl.load(bias, mask=mask_bias, other=0.0).to(
+ tl.float32
+ ) # [BLOCK_N]
+ else:
+ acc_preload = tl.zeros((BLOCK_N,), dtype=tl.float32)
+
+ x_base_1d = x_base + token_offset * stride_x_token # starting of chunk
+
+ # PRE-LOAD WEIGHTS
+ mask_w = idx_feats < dim
+ if KERNEL_WIDTH >= 2:
+ w_ptrs = w_base + (0 * stride_w_width) # [BLOCK_N] tensor
+ w_col0 = tl.load(w_ptrs, mask_w, other=0.0)
+ w_ptrs = w_base + (1 * stride_w_width) # [BLOCK_N] tensor
+ w_col1 = tl.load(w_ptrs, mask_w, other=0.0)
+ if KERNEL_WIDTH >= 3:
+ w_ptrs = w_base + (2 * stride_w_width) # [BLOCK_N] tensor
+ w_col2 = tl.load(w_ptrs, mask_w, other=0.0)
+ if KERNEL_WIDTH >= 4:
+ w_ptrs = w_base + (3 * stride_w_width) # [BLOCK_N] tensor
+ w_col3 = tl.load(w_ptrs, mask_w, other=0.0)
+ mask_x_1d = idx_feats < dim
+ for idx_token in range(segment_len):
+ acc = acc_preload
+
+ matrix_w = w_col0
+ matrix_x = col0
+ for j in tl.static_range(KERNEL_WIDTH):
+
+ if KERNEL_WIDTH == 2:
+ if j == 1: # KERNEL_WIDTH-1:
+ matrix_w = w_col1
+ x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N]
+ matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d)
+ elif KERNEL_WIDTH == 3:
+ if j == 1:
+ matrix_w = w_col1
+ matrix_x = col1
+ elif j == 2:
+ matrix_w = w_col2
+ x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N]
+ matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d)
+ elif KERNEL_WIDTH == 4:
+ if j == 1:
+ matrix_w = w_col1
+ matrix_x = col1
+ elif j == 2:
+ matrix_w = w_col2
+ matrix_x = col2
+ elif j == 3:
+ matrix_w = w_col3
+ x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N]
+ matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d)
+
+ acc += matrix_x * matrix_w # [BLOCK_N]
+
+ if KERNEL_WIDTH == 2:
+ col0 = matrix_x
+ elif KERNEL_WIDTH == 3:
+ col0 = col1
+ col1 = matrix_x
+ elif KERNEL_WIDTH == 4:
+ col0 = col1
+ col1 = col2
+ col2 = matrix_x
+
+ if SILU_ACTIVATION:
+ acc = acc / (1 + tl.exp(-acc))
+ mask_1d = (idx_token < segment_len) & (
+ idx_feats < dim
+ ) # token-index # feature-index
+ o_ptrs = (
+ o_ptr
+ + (sequence_start_index + token_offset + idx_token) * stride_o_token
+ + (idx_feats * stride_o_dim)
+ )
+
+ tl.store(o_ptrs, acc, mask=mask_1d)
+
+
+def causal_conv1d_fn(
+ x: torch.Tensor,
+ weight: torch.Tensor,
+ bias: Union[torch.Tensor, None],
+ conv_states: torch.Tensor,
+ query_start_loc: torch.Tensor,
+ cache_indices: Optional[torch.Tensor] = None,
+ has_initial_state: Optional[torch.Tensor] = None,
+ activation: Optional[str] = "silu",
+ pad_slot_id: int = PAD_SLOT_ID,
+ metadata=None,
+ validate_data=False,
+):
+ """support varlen + continuous batching when x is 2D tensor
+
+ x: (dim,cu_seq_len)
+ cu_seq_len = total tokens of all seqs in that batch
+ sequences are concatenated from left to right for varlen
+ weight: (dim, width)
+ conv_states: (...,dim,width - 1) itype
+ updated inplace if provided
+ [it use `cache_indices` to get the index to the cache of conv_state for that sequence
+
+ conv_state[cache_indices[i]] for seq-i - to be used as initial_state when has_initial_state[i] = True
+ and after that conv_state[cache_indices[i]] need to be shift-left and updated with values from 'x'
+ ]
+ query_start_loc: (batch + 1) int32
+ The cumulative sequence lengths of the sequences in
+ the batch, used to index into sequence. prepended by 0.
+ if
+ x = [5, 1, 1, 1] <- continuous batching (batch=4)
+ then
+ query_start_loc = [0, 5, 6, 7, 8] <- the starting index of the next sequence; while the last value is
+ the ending index of the last sequence
+ [length(query_start_loc)-1 == batch]
+ for example: query_start_loc = torch.Tensor([0,10,16,17]),
+ x.shape=(dim,17)
+ cache_indices: (batch) int32
+ indicates the corresponding state index,
+ like so: conv_state = conv_states[cache_indices[batch_id]]
+ has_initial_state: (batch) bool
+ indicates whether should the kernel take the current state as initial
+ state for the calculations
+ [single boolean for each sequence in the batch: True or False]
+ bias: (dim,)
+ activation: either None or "silu" or "swish" or True
+ pad_slot_id: int
+ if cache_indices is passed, lets the kernel identify padded
+ entries that will not be processed,
+ for example: cache_indices = [pad_slot_id, 1, 20, pad_slot_id]
+ in this case, the kernel will not process entries at
+ indices 0 and 3
+
+ out: same shape as `x`
+ """
+ if isinstance(activation, bool) and activation:
+ activation = "silu"
+
+ args = None
+ out = torch.empty_like(x)
+ if metadata is not None:
+ cu_seqlen = metadata.cu_seqlen
+ nums_dict = metadata.nums_dict
+ # x = metadata.x
+ args = nums_dict
+ batch_ptr = metadata.batch_ptr
+ token_chunk_offset_ptr = metadata.token_chunk_offset_ptr
+ else:
+ seqlens = np.diff(query_start_loc.to("cpu"))
+ args = seqlens
+ MAX_NUM_PROGRAMS = 1024
+
+ batch_ptr = torch.full(
+ (MAX_NUM_PROGRAMS,), PAD_SLOT_ID, dtype=torch.int32, device=x.device
+ ) # tracking which seq-idx the Triton program is handling
+ token_chunk_offset_ptr = torch.full(
+ (MAX_NUM_PROGRAMS,), PAD_SLOT_ID, dtype=torch.int32, device=x.device
+ ) # tracking BLOCK_M-based index in the sequence the Triton program is handling
+
+ is_channel_last = (x.stride(0) == 1) & (x.stride(1) > 1)
+ dim, cu_seqlen = x.shape
+ _, width = weight.shape
+ state_len = width - 1
+ np2_statelen = triton.next_power_of_2(state_len)
+
+ padded_batch = query_start_loc.size(0) - 1
+ stride_x_seq = 0
+ stride_x_dim = x.stride(0)
+ stride_x_token = x.stride(1)
+ stride_w_dim = weight.stride(0)
+ stride_w_width = weight.stride(1)
+ stride_istate_seq = 0
+ stride_istate_dim = 0
+ stride_istate_token = 0
+ num_cache_lines = 0
+ if conv_states is not None:
+ # extensions to support vLLM:
+ # 1. conv_states is used to replaced initial_states
+ # 2. conv_states serve as a cache with num cache lines can be larger than batch size
+ # 3. mapping from sequence x[idx] to a cache line at index as specified via cache_indices[idx]
+ # 4. computation can be skipped if cache_indices[idx] == pad_slot_id
+ num_cache_lines = conv_states.size(0)
+ assert (
+ num_cache_lines == conv_states.shape[0]
+ and dim == conv_states.shape[1]
+ and width - 1 <= conv_states.shape[2]
+ )
+ stride_istate_seq = conv_states.stride(0)
+ stride_istate_dim = conv_states.stride(1)
+ stride_istate_token = conv_states.stride(2)
+ # assert stride_istate_dim == 1
+ if out.dim() == 2:
+ stride_o_seq = 0
+ stride_o_dim = out.stride(0)
+ stride_o_token = out.stride(1)
+ else:
+ stride_o_seq = out.stride(0)
+ stride_o_dim = out.stride(1)
+ stride_o_token = out.stride(2)
+
+ if validate_data:
+ assert x.dim() == 2
+ assert query_start_loc is not None
+ assert query_start_loc.dim() == 1
+ assert x.stride(0) == 1 or x.stride(1) == 1
+ if bias is not None:
+ assert bias.dim() == 1
+ assert dim == bias.size(0)
+ if cache_indices is not None:
+ assert cache_indices.dim() == 1
+ assert padded_batch == cache_indices.size(0)
+ if has_initial_state is not None:
+ assert has_initial_state.size() == (padded_batch,)
+ assert (
+ conv_states is not None
+ ), "ERROR: `has_initial_state` is used, which needs also `conv_states`"
+ assert weight.stride(1) == 1
+ assert (dim, width) == weight.shape
+ assert is_channel_last, "Need to run in channel-last layout"
+
+ if metadata is None:
+
+ def num_program(META, seqlens):
+ tot = 0
+
+ mlist = []
+ offsetlist = [] # type: ignore
+
+ nums = -(-seqlens // META["BLOCK_M"])
+
+ tot = nums.sum().item()
+ mlist = np.repeat(np.arange(len(nums)), nums)
+ for idx, num in enumerate(nums):
+ offsetlist.extend(
+ range(num)
+ ) # chunk-idx if a sequence is split into multiple chunks
+
+ if META["batch_ptr"].nelement() < len(mlist):
+ newlen = len(mlist) + 1
+ META["batch_ptr"].resize_(newlen).fill_(PAD_SLOT_ID)
+ META["token_chunk_offset_ptr"].resize_(newlen).fill_(PAD_SLOT_ID)
+
+ if META["batch_ptr"].nelement() >= len(mlist):
+ META["batch_ptr"][0 : len(mlist)].copy_(
+ torch.from_numpy(np.array(mlist))
+ )
+ META["token_chunk_offset_ptr"][0 : len(mlist)].copy_(
+ torch.from_numpy(np.array(offsetlist))
+ )
+
+ META["batch_ptr"] = META["batch_ptr"].to(META["x_ptr"].device)
+ META["token_chunk_offset_ptr"] = META["token_chunk_offset_ptr"].to(
+ META["x_ptr"].device
+ )
+ return tot
+
+ else:
+
+ def num_program(META, nums_dict):
+ tot = nums_dict[META["BLOCK_M"]]["tot"]
+
+ mlist = nums_dict[META["BLOCK_M"]]["mlist"]
+ mlist_len = nums_dict[META["BLOCK_M"]]["mlist_len"]
+
+ offsetlist = nums_dict[META["BLOCK_M"]]["offsetlist"]
+
+ if nums_dict[META["BLOCK_M"]]["batch_ptr"] is not None:
+ META["batch_ptr"] = nums_dict[META["BLOCK_M"]]["batch_ptr"]
+ META["token_chunk_offset_ptr"] = nums_dict[META["BLOCK_M"]][
+ "token_chunk_offset_ptr"
+ ]
+ else:
+ if META["batch_ptr"].nelement() < mlist_len:
+ newlen = mlist_len + 1
+ META["batch_ptr"].resize_(newlen).fill_(PAD_SLOT_ID)
+ META["token_chunk_offset_ptr"].resize_(newlen).fill_(PAD_SLOT_ID)
+
+ if META["batch_ptr"].nelement() >= mlist_len:
+ META["batch_ptr"][0:mlist_len].copy_(mlist)
+ META["token_chunk_offset_ptr"][0:mlist_len].copy_(offsetlist)
+ return tot
+
+ def grid(META):
+ return (
+ num_program(META, args),
+ triton.cdiv(dim, META["BLOCK_N"]),
+ )
+
+ if batch_ptr.device != x.device:
+ batch_ptr = batch_ptr.to(x.device)
+ token_chunk_offset_ptr = token_chunk_offset_ptr.to(x.device)
+
+ _causal_conv1d_fwd_kernel[grid](
+ # Pointers to matrices
+ x,
+ weight,
+ bias,
+ conv_states,
+ cache_indices,
+ has_initial_state,
+ query_start_loc,
+ batch_ptr,
+ token_chunk_offset_ptr,
+ out,
+ # Matrix dimensions
+ padded_batch,
+ dim,
+ cu_seqlen,
+ num_cache_lines,
+ # stride
+ stride_x_seq,
+ stride_x_dim,
+ stride_x_token,
+ stride_w_dim,
+ stride_w_width,
+ stride_istate_seq,
+ stride_istate_dim,
+ stride_istate_token,
+ stride_o_seq,
+ stride_o_dim,
+ stride_o_token,
+ # others
+ pad_slot_id,
+ # META
+ HAS_BIAS=bias is not None,
+ KERNEL_WIDTH=width,
+ SILU_ACTIVATION=activation in ["silu", "swish"],
+ HAS_INITIAL_STATES=has_initial_state is not None,
+ HAS_CACHE=conv_states is not None,
+ IS_CONTINUOUS_BATCHING=cache_indices is not None,
+ USE_PAD_SLOT=pad_slot_id is not None,
+ NP2_STATELEN=np2_statelen,
+ # launch_cooperative_grid=True
+ BLOCK_M=8,
+ BLOCK_N=256,
+ num_stages=2,
+ )
+ return out
+
+
+@triton.jit()
+def _causal_conv1d_update_kernel(
+ # Pointers to matrices
+ x_ptr, # (batch, dim, seqlen)
+ w_ptr, # (dim, width)
+ bias_ptr,
+ conv_state_ptr,
+ cache_seqlens_ptr, # circular buffer
+ conv_state_indices_ptr,
+ num_accepted_tokens_ptr,
+ intermediate_conv_window_ptr,
+ o_ptr, # (batch, dim, seqlen)
+ # Matrix dimensions
+ batch: int,
+ dim: tl.constexpr,
+ seqlen: tl.constexpr,
+ state_len: tl.constexpr,
+ num_cache_lines: tl.constexpr, # added to support vLLM larger cache lines
+ # Strides
+ stride_x_seq: tl.constexpr,
+ stride_x_dim: tl.constexpr,
+ stride_x_token: tl.constexpr,
+ stride_w_dim: tl.constexpr,
+ stride_w_width: tl.constexpr,
+ stride_conv_state_seq: tl.constexpr,
+ stride_conv_state_dim: tl.constexpr,
+ stride_conv_state_tok: tl.constexpr,
+ stride_state_indices: tl.constexpr,
+ stride_inter_seq: tl.constexpr,
+ stride_inter_step: tl.constexpr,
+ stride_inter_dim: tl.constexpr,
+ stride_inter_win: tl.constexpr,
+ stride_o_seq: tl.constexpr,
+ stride_o_dim: tl.constexpr,
+ stride_o_token: tl.constexpr,
+ # others
+ pad_slot_id: tl.constexpr,
+ # Meta-parameters
+ HAS_BIAS: tl.constexpr,
+ KERNEL_WIDTH: tl.constexpr,
+ SILU_ACTIVATION: tl.constexpr,
+ IS_CONTINUOUS_BATCHING: tl.constexpr,
+ IS_SPEC_DECODING: tl.constexpr,
+ NP2_STATELEN: tl.constexpr,
+ USE_PAD_SLOT: tl.constexpr,
+ BLOCK_N: tl.constexpr,
+ SAVE_INTERMEDIATE: tl.constexpr,
+):
+ # ruff: noqa: E501
+ idx_seq = tl.program_id(0)
+ if idx_seq >= batch:
+ return
+
+ # [BLOCK_N,] elements along the feature-dimension (channel)
+ idx_feats = tl.program_id(1) * BLOCK_N + tl.arange(0, BLOCK_N)
+
+ if IS_CONTINUOUS_BATCHING:
+ # mask = idx_seq < batch
+ conv_state_batch_coord = tl.load(
+ conv_state_indices_ptr + idx_seq * stride_state_indices
+ ).to(tl.int64)
+ else:
+ conv_state_batch_coord = idx_seq
+ if USE_PAD_SLOT: # noqa
+ if conv_state_batch_coord == pad_slot_id:
+ # not processing as this is not the actual sequence
+ return
+
+ if IS_SPEC_DECODING:
+ # The rolling of conv state:
+ #
+ # Before forward, the conv_state is:
+ # [history1, history2, ..., historyM].
+ #
+ # After forward, the conv_state becomes:
+ # [history2, ..., historyM, draft1, draft2, ..., draftN].
+ #
+ # After acceptance, it becomes:
+ #
+ # - accept 1 tokens: [history2, ..., historyM, draft1]
+ # - accept 2 tokens: [history3, ..., historyM, draft1, draft2]
+ # - and so on.
+ conv_state_token_offset = tl.load(num_accepted_tokens_ptr + idx_seq) - 1
+ else:
+ conv_state_token_offset = 0
+
+ # STEP 1: READ init_state data
+ conv_states_base = (
+ conv_state_ptr
+ + (conv_state_batch_coord * stride_conv_state_seq)
+ + (idx_feats * stride_conv_state_dim)
+ )
+ mask_w = idx_feats < dim
+
+ prior_tokens = conv_states_base + conv_state_token_offset * stride_conv_state_tok
+ if KERNEL_WIDTH >= 2:
+ conv_states_ptrs = prior_tokens # [BLOCK_N]
+ col0 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ if KERNEL_WIDTH >= 3:
+ conv_states_ptrs = prior_tokens + 1 * stride_conv_state_tok # [BLOCK_N]
+ col1 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ if KERNEL_WIDTH >= 4:
+ conv_states_ptrs = prior_tokens + 2 * stride_conv_state_tok # [BLOCK_N]
+ col2 = tl.load(conv_states_ptrs, mask_w, 0.0)
+ if KERNEL_WIDTH == 5:
+ conv_states_ptrs = prior_tokens + 3 * stride_conv_state_tok # [BLOCK_N]
+ col3 = tl.load(conv_states_ptrs, mask_w, 0.0)
+
+ # STEP 2: assume state_len > seqlen
+ idx_tokens = tl.arange(0, NP2_STATELEN) # [BLOCK_M]
+
+ # The conv_state updates works in a sliding window manner,
+ # at each forward pass, the tokens are shift by 1, so we
+ # load since idx_tokens + 1.
+ conv_state_ptrs_source = (
+ conv_state_ptr
+ + (conv_state_batch_coord * stride_conv_state_seq)
+ + conv_state_token_offset * stride_conv_state_tok
+ + (idx_feats * stride_conv_state_dim)[None, :]
+ + ((idx_tokens + 1) * stride_conv_state_tok)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+ mask = (
+ (conv_state_batch_coord < num_cache_lines)
+ & ((idx_tokens + seqlen) < state_len)[:, None]
+ & (idx_feats < dim)[None, :]
+ )
+ conv_state = tl.load(conv_state_ptrs_source, mask, other=0.0)
+
+ VAL = state_len - seqlen
+ x_base = x_ptr + (idx_seq * stride_x_seq) + (idx_feats * stride_x_dim) # [BLOCK_N]
+
+ x_ptrs = (
+ x_base[None, :] + ((idx_tokens - VAL) * stride_x_token)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+
+ mask_x = (
+ (idx_tokens - VAL >= 0)[:, None]
+ & (idx_tokens - VAL < seqlen)[:, None]
+ & (idx_feats < dim)[None, :]
+ ) # token-index # token-index # feature-index
+ loaded_x = tl.load(x_ptrs, mask_x, 0.0)
+ tl.debug_barrier()
+
+ new_conv_state = tl.where(mask, conv_state, loaded_x)
+
+ conv_state_base = (
+ conv_state_ptr
+ + (conv_state_batch_coord * stride_conv_state_seq)
+ + (idx_feats * stride_conv_state_dim)
+ ) # [BLOCK_N,]
+ conv_state_ptrs_target = (
+ conv_state_base + (idx_tokens * stride_conv_state_tok)[:, None]
+ ) # [BLOCK_M, BLOCK_N]
+ mask = (idx_tokens < state_len)[:, None] & (idx_feats < dim)[None, :]
+ tl.store(conv_state_ptrs_target, new_conv_state, mask)
+
+ # STEP 3: init accumulator
+ if HAS_BIAS:
+ bias = bias_ptr + idx_feats
+ mask_bias = idx_feats < dim
+ acc_preload = tl.load(bias, mask=mask_bias, other=0.0).to(
+ tl.float32
+ ) # [BLOCK_N]
+ else:
+ acc_preload = tl.zeros((BLOCK_N,), dtype=tl.float32)
+
+ # STEP 4:
+ # PRE-LOAD WEIGHTS
+ # first kernel column, configured for weights to handle BLOCK_N features in range
+ w_base = w_ptr + (idx_feats * stride_w_dim) # [BLOCK_N,]
+ mask_w = idx_feats < dim
+ if KERNEL_WIDTH >= 2:
+ w_ptrs = w_base + (0 * stride_w_width) # [BLOCK_N] tensor
+ w_col0 = tl.load(w_ptrs, mask_w, other=0.0)
+ w_ptrs = w_base + (1 * stride_w_width) # [BLOCK_N] tensor
+ w_col1 = tl.load(w_ptrs, mask_w, other=0.0)
+ if KERNEL_WIDTH >= 3:
+ w_ptrs = w_base + (2 * stride_w_width) # [BLOCK_N] tensor
+ w_col2 = tl.load(w_ptrs, mask_w, other=0.0)
+ if KERNEL_WIDTH >= 4:
+ w_ptrs = w_base + (3 * stride_w_width) # [BLOCK_N] tensor
+ w_col3 = tl.load(w_ptrs, mask_w, other=0.0)
+
+ x_base_1d = x_base # starting of chunk [BLOCK_N]
+ mask_x_1d = idx_feats < dim
+
+ # STEP 5: compute each token
+ for idx_token in tl.static_range(seqlen):
+ acc = acc_preload
+
+ matrix_w = w_col0
+ matrix_x = col0
+ for j in tl.static_range(KERNEL_WIDTH):
+ if KERNEL_WIDTH == 2:
+ if j == 1: # KERNEL_WIDTH-1:
+ matrix_w = w_col1
+ x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N]
+ matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d)
+ elif KERNEL_WIDTH == 3:
+ if j == 1:
+ matrix_w = w_col1
+ matrix_x = col1
+ elif j == 2:
+ matrix_w = w_col2
+ x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N]
+ matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d)
+ elif KERNEL_WIDTH == 4:
+ if j == 1:
+ matrix_w = w_col1
+ matrix_x = col1
+ elif j == 2:
+ matrix_w = w_col2
+ matrix_x = col2
+ elif j == 3:
+ matrix_w = w_col3
+ x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N]
+ matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d)
+
+ acc += matrix_x * matrix_w # [BLOCK_N]
+
+ if KERNEL_WIDTH == 2:
+ col0 = matrix_x
+ elif KERNEL_WIDTH == 3:
+ col0 = col1
+ col1 = matrix_x
+ elif KERNEL_WIDTH == 4:
+ col0 = col1
+ col1 = col2
+ col2 = matrix_x
+
+ if SILU_ACTIVATION:
+ acc = acc / (1 + tl.exp(-acc))
+ mask_1d = (idx_token < seqlen) & (
+ idx_feats < dim
+ ) # token-index # feature-index
+ o_ptrs = (
+ o_ptr
+ + (idx_seq) * stride_o_seq
+ + idx_token * stride_o_token
+ + (idx_feats * stride_o_dim)
+ )
+
+ tl.store(o_ptrs, acc, mask=mask_1d)
+
+ if SAVE_INTERMEDIATE:
+ # Save the window state after consuming this token
+ # Layout: [seq(cache line), step, dim, win(K-1)]
+ base_ptr = (
+ intermediate_conv_window_ptr
+ + conv_state_batch_coord * stride_inter_seq
+ + idx_token * stride_inter_step
+ + idx_feats * stride_inter_dim
+ )
+ if KERNEL_WIDTH >= 2:
+ tl.store(base_ptr + 0 * stride_inter_win, col0, mask=mask_w)
+ if KERNEL_WIDTH >= 3:
+ tl.store(base_ptr + 1 * stride_inter_win, col1, mask=mask_w)
+ if KERNEL_WIDTH >= 4:
+ tl.store(base_ptr + 2 * stride_inter_win, col2, mask=mask_w)
+
+
+def causal_conv1d_update(
+ x: torch.Tensor,
+ conv_state: torch.Tensor,
+ weight: torch.Tensor,
+ bias: Optional[torch.Tensor] = None,
+ activation: Union[bool, str, None] = None,
+ cache_seqlens: Optional[torch.Tensor] = None,
+ conv_state_indices: Optional[torch.Tensor] = None,
+ num_accepted_tokens: Optional[torch.Tensor] = None,
+ intermediate_conv_window: Optional[torch.Tensor] = None,
+ pad_slot_id: int = PAD_SLOT_ID,
+ metadata=None,
+ validate_data=False,
+):
+ """
+ x: (batch, dim) or (batch, dim, seqlen)
+ [shape=2: single token prediction]
+ [shape=3: single or multiple tokens prediction]
+ conv_state: (..., dim, state_len), where state_len >= width - 1
+ weight: (dim, width)
+ bias: (dim,)
+ cache_seqlens: (batch,), dtype int32.
+ If not None, the conv_state is treated as a circular buffer.
+ The conv_state will be updated by copying x to the conv_state
+ starting at the index
+ @cache_seqlens % state_len.
+ conv_state_indices: (batch,), dtype int32
+ If not None, the conv_state is a larger tensor along the batch dim,
+ and we are selecting the batch coords specified by conv_state_indices.
+ Useful for a continuous batching scenario.
+ pad_slot_id: int
+ if cache_indices is passed, lets the kernel identify padded
+ entries that will not be processed,
+ for example: cache_indices = [pad_slot_id, 1 ,20 ,pad_slot_id]
+ in this case, the kernel will not process entries at
+ indices 0 and 3
+ out: (batch, dim) or (batch, dim, seqlen)
+ """
+ if validate_data:
+ assert cache_seqlens is None # not implemented yet - ok for vLLM
+ assert pad_slot_id is not None
+ assert x.stride(1) == 1
+ if isinstance(activation, bool):
+ activation = "silu" if activation is True else None
+ elif activation is not None:
+ assert activation in ["silu", "swish"]
+ unsqueeze = x.dim() == 2
+ if unsqueeze:
+ # make it (batch, dim, seqlen) with seqlen == 1
+ x = x.unsqueeze(-1)
+ batch, dim, seqlen = x.shape
+ _, width = weight.shape
+ # conv_state: (..., dim, state_len), where state_len >= width - 1
+ num_cache_lines, _, state_len = conv_state.size()
+
+ if validate_data:
+ assert dim == weight.size(0)
+ assert (
+ conv_state.stride(-2) == 1
+ ), f"ERROR: expect contiguous along feat-dim of conv_state (currently stride={conv_state.stride()})"
+ assert state_len >= width - 1
+ # when above happens, we don't shift-left to keep any records in conv_state
+ assert dim == conv_state.size(1)
+ if conv_state_indices is None:
+ assert conv_state.size(0) >= batch
+ else:
+ assert (batch,) == conv_state_indices.shape
+
+ assert num_cache_lines >= batch
+ assert weight.stride(1) == 1 # Need this
+ assert cache_seqlens is None # not needed for vLLM - circular buffer
+
+ # adopt the strategy in vLLM that overwrite on 'x' directly, rather than creating a new tensor 'o'
+ out = x
+ stride_w_dim, stride_w_width = weight.stride()
+
+ stride_x_seq, stride_x_dim, stride_x_token = x.stride() # X (batch, dim, seqlen)
+
+ stride_o_seq, stride_o_dim, stride_o_token = out.stride()
+ stride_istate_seq, stride_istate_dim, stride_istate_token = conv_state.stride()
+ stride_state_indices = (
+ conv_state_indices.stride(0) if conv_state_indices is not None else 0
+ )
+ state_len = width - 1 + (seqlen - 1) # effective state_len needed
+ np2_statelen = triton.next_power_of_2(state_len)
+
+ def grid(META):
+ return (
+ batch,
+ triton.cdiv(dim, META["BLOCK_N"]),
+ )
+
+ # prepare intermediate buffer strides if provided
+ if intermediate_conv_window is not None:
+ stride_inter_seq, stride_inter_step, stride_inter_dim, stride_inter_win = (
+ intermediate_conv_window.stride(0),
+ intermediate_conv_window.stride(1),
+ intermediate_conv_window.stride(2),
+ intermediate_conv_window.stride(3),
+ )
+ else:
+ stride_inter_seq = stride_inter_step = stride_inter_dim = stride_inter_win = 0
+
+ _causal_conv1d_update_kernel[grid](
+ # Pointers to matrices
+ x,
+ weight,
+ bias,
+ conv_state,
+ cache_seqlens,
+ conv_state_indices,
+ num_accepted_tokens,
+ intermediate_conv_window if intermediate_conv_window is not None else x,
+ out,
+ # Matrix dimensions
+ batch,
+ dim,
+ seqlen,
+ state_len,
+ num_cache_lines,
+ # stride
+ stride_x_seq,
+ stride_x_dim,
+ stride_x_token,
+ stride_w_dim,
+ stride_w_width,
+ stride_istate_seq,
+ stride_istate_dim,
+ stride_istate_token,
+ stride_state_indices,
+ stride_inter_seq,
+ stride_inter_step,
+ stride_inter_dim,
+ stride_inter_win,
+ stride_o_seq,
+ stride_o_dim,
+ stride_o_token,
+ # others
+ pad_slot_id,
+ # META
+ HAS_BIAS=bias is not None,
+ KERNEL_WIDTH=width,
+ SILU_ACTIVATION=activation in ["silu", "swish"],
+ IS_CONTINUOUS_BATCHING=conv_state_indices is not None,
+ IS_SPEC_DECODING=num_accepted_tokens is not None,
+ NP2_STATELEN=np2_statelen,
+ USE_PAD_SLOT=pad_slot_id is not None,
+ BLOCK_N=256,
+ SAVE_INTERMEDIATE=intermediate_conv_window is not None,
+ )
+ if unsqueeze:
+ out = out.squeeze(-1)
+ return out
diff --git a/python/sglang/srt/layers/attention/mamba/mamba.py b/python/sglang/srt/layers/attention/mamba/mamba.py
new file mode 100644
index 00000000000..045a0404867
--- /dev/null
+++ b/python/sglang/srt/layers/attention/mamba/mamba.py
@@ -0,0 +1,64 @@
+from typing import Callable, List, Tuple
+
+import torch
+
+LoaderFunction = Callable[[torch.Tensor, torch.Tensor], None]
+
+
+def mamba_v2_sharded_weight_loader(
+ shard_spec: List[Tuple[int, int, float]],
+ tp_size: int,
+ tp_rank: int,
+) -> LoaderFunction:
+ """Create a weight loader for mamba v2. This ensures that the projections
+ are correctly sharded so that they can be split into x, B, C. It also
+ ensures the the all the groups corresponding to a head shard is placed
+ together with it.
+ """
+
+ def loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
+
+ # - track boundary of (sharded) param, and loaded_weight, respectively
+ boundary, loaded_boundary = 0, 0
+
+ # - iterate over the shard specs
+ for full_dim, extra, duplicate_groups in shard_spec:
+ # - full dim is the model dim (before TP).
+ # - extra > 0, means there is expected overall increase
+ # of dimensions. This is so because of replication.
+ # - ratio is used map the tp_rank to the actual shard
+ # rank. This is useful when there is replication of
+ # groups to accompany head shards.
+
+ # - size of the loaded shard
+ shard_size = full_dim // tp_size
+
+ # - compute the rank into the loaded shard.
+ # - if there is replication, different TP shards will
+ # take from the same rank.
+ # NOTE: currently we only support duplication
+ # in the case where num_groups == 1
+ rank = 0 if duplicate_groups else tp_rank
+
+ # - leftmost boundary index into loaded weight.
+ loaded_skip = rank * shard_size
+ loaded_start_idx = loaded_boundary + loaded_skip
+
+ # - take these many dims from the loaded weight.
+ take = min(shard_size, full_dim - extra - loaded_skip)
+
+ # - always shard on dim 0
+ # - the ignore is for a mundane mypy error as it does not
+ # seem to handle slices well.
+ # https://github.com/python/mypy/issues/2410
+ param.data[
+ boundary : (boundary + take), ... # type: ignore[misc]
+ ] = loaded_weight[
+ loaded_start_idx : (loaded_start_idx + take) # type: ignore[misc]
+ ] # type: ignore[misc]
+
+ # move indexing boundaries
+ boundary += shard_size
+ loaded_boundary += full_dim - extra
+
+ return loader
diff --git a/python/sglang/srt/layers/attention/torch_native_backend.py b/python/sglang/srt/layers/attention/torch_native_backend.py
index bb06076c118..6a67ea9476e 100644
--- a/python/sglang/srt/layers/attention/torch_native_backend.py
+++ b/python/sglang/srt/layers/attention/torch_native_backend.py
@@ -193,10 +193,13 @@ def forward_extend(
else:
o = torch.empty_like(q)
+ if layer.is_cross_attention:
+ cache_loc = forward_batch.encoder_out_cache_loc
+ else:
+ cache_loc = forward_batch.out_cache_loc
+
if save_kv_cache:
- forward_batch.token_to_kv_pool.set_kv_buffer(
- layer, forward_batch.out_cache_loc, k, v
- )
+ forward_batch.token_to_kv_pool.set_kv_buffer(layer, cache_loc, k, v)
use_gqa = layer.tp_q_head_num != layer.tp_k_head_num
@@ -241,10 +244,13 @@ def forward_decode(
else:
o = torch.empty_like(q)
+ if layer.is_cross_attention:
+ cache_loc = forward_batch.encoder_out_cache_loc
+ else:
+ cache_loc = forward_batch.out_cache_loc
+
if save_kv_cache:
- forward_batch.token_to_kv_pool.set_kv_buffer(
- layer, forward_batch.out_cache_loc, k, v
- )
+ forward_batch.token_to_kv_pool.set_kv_buffer(layer, cache_loc, k, v)
use_gqa = layer.tp_q_head_num != layer.tp_k_head_num
diff --git a/python/sglang/srt/layers/attention/triton_backend.py b/python/sglang/srt/layers/attention/triton_backend.py
index a3d8f88eb15..26241d8493b 100644
--- a/python/sglang/srt/layers/attention/triton_backend.py
+++ b/python/sglang/srt/layers/attention/triton_backend.py
@@ -20,6 +20,14 @@
from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
+def logit_capping_mod(logit_capping_method, logit_cap):
+ # positive logit_cap -> tanh cap
+ if logit_capping_method == "tanh":
+ return logit_cap
+ else:
+ raise ValueError()
+
+
@dataclass
class ForwardMetadata:
attn_logits: torch.Tensor
@@ -35,6 +43,7 @@ class ForwardMetadata:
window_kv_indptr: torch.Tensor
window_kv_indices: torch.Tensor
window_num_kv_splits: torch.Tensor
+ window_kv_offsets: torch.Tensor
class TritonAttnBackend(AttentionBackend):
@@ -163,6 +172,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
window_kv_indptr = self.window_kv_indptr
window_kv_indices = None
window_num_kv_splits = None
+ window_kv_offsets = None
spec_info = forward_batch.spec_info
if forward_batch.forward_mode.is_decode_or_idle():
@@ -170,7 +180,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
kv_indptr[1 : bs + 1] = torch.cumsum(forward_batch.seq_lens, dim=0)
kv_indptr = kv_indptr[: bs + 1]
kv_indices = torch.empty(
- forward_batch.seq_lens_sum, dtype=torch.int32, device=self.device
+ forward_batch.seq_lens_sum, dtype=torch.int64, device=self.device
)
create_flashinfer_kv_indices_triton[(bs,)](
self.req_to_token,
@@ -186,7 +196,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
self.sliding_window_size is not None
and self.sliding_window_size > 0
):
- window_kv_indptr, window_kv_indices, window_kv_lens = (
+ window_kv_indptr, window_kv_indices, window_kv_lens, _ = (
update_sliding_window_buffer(
self.window_kv_indptr,
self.req_to_token,
@@ -236,7 +246,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
kv_indptr[1 : bs + 1] = torch.cumsum(forward_batch.seq_lens, dim=0)
kv_indptr = kv_indptr[: bs + 1]
kv_indices = torch.empty(
- kv_indptr[-1], dtype=torch.int32, device=self.device
+ kv_indptr[-1], dtype=torch.int64, device=self.device
)
create_flashinfer_kv_indices_triton[(bs,)](
self.req_to_token,
@@ -249,17 +259,21 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
)
if self.sliding_window_size is not None and self.sliding_window_size > 0:
- window_kv_indptr, window_kv_indices, window_kv_lens = (
- update_sliding_window_buffer(
- self.window_kv_indptr,
- self.req_to_token,
- self.sliding_window_size,
- forward_batch.seq_lens,
- forward_batch.req_pool_indices,
- bs,
- self.device,
- self.token_to_kv_pool_allocator,
- )
+ # window_kv_offsets is used to calculate the start position in custom mask
+ (
+ window_kv_indptr,
+ window_kv_indices,
+ window_kv_lens,
+ window_kv_offsets,
+ ) = update_sliding_window_buffer(
+ self.window_kv_indptr,
+ self.req_to_token,
+ self.sliding_window_size,
+ forward_batch.seq_lens,
+ forward_batch.req_pool_indices,
+ bs,
+ self.device,
+ self.token_to_kv_pool_allocator,
)
custom_mask = spec_info.custom_mask
@@ -283,6 +297,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
self.req_to_token,
)
)
+ kv_indices = kv_indices.to(torch.int64)
mask_indptr = None
# TODO(FIXME): This will trigger an invalid Eagle tree when using
# `max(spec_info.accept_length_cpu)`.
@@ -298,7 +313,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
kv_indptr = kv_indptr[: bs + 1]
kv_indices = torch.empty(
forward_batch.extend_prefix_lens.sum().item(),
- dtype=torch.int32,
+ dtype=torch.int64,
device=self.device,
)
create_flashinfer_kv_indices_triton[(bs,)](
@@ -312,15 +327,17 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
)
# Sliding window
if self.sliding_window_size is not None and self.sliding_window_size > 0:
- window_kv_indptr, window_kv_indices, _ = update_sliding_window_buffer(
- self.window_kv_indptr,
- self.req_to_token,
- self.sliding_window_size,
- forward_batch.extend_prefix_lens,
- forward_batch.req_pool_indices,
- bs,
- self.device,
- self.token_to_kv_pool_allocator,
+ window_kv_indptr, window_kv_indices, _, _ = (
+ update_sliding_window_buffer(
+ self.window_kv_indptr,
+ self.req_to_token,
+ self.sliding_window_size,
+ forward_batch.extend_prefix_lens,
+ forward_batch.req_pool_indices,
+ bs,
+ self.device,
+ self.token_to_kv_pool_allocator,
+ )
)
qo_indptr = self.qo_indptr
@@ -346,6 +363,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
window_kv_indptr,
window_kv_indices,
window_num_kv_splits,
+ window_kv_offsets,
)
def init_cuda_graph_state(
@@ -370,7 +388,7 @@ def init_cuda_graph_state(
if kv_indices_buf is None:
self.cuda_graph_kv_indices = torch.zeros(
(max_num_tokens * self.max_context_len),
- dtype=torch.int32,
+ dtype=torch.int64,
device=self.device,
)
else:
@@ -387,7 +405,7 @@ def init_cuda_graph_state(
if kv_indices_buf is None:
self.cuda_graph_window_kv_indices = torch.zeros(
(max_num_tokens * self.sliding_window_size),
- dtype=torch.int32,
+ dtype=torch.int64,
device=self.device,
)
else:
@@ -400,6 +418,12 @@ def init_cuda_graph_state(
device=self.device,
)
+ self.cuda_graph_window_kv_offsets = torch.zeros(
+ (max_bs,),
+ dtype=torch.int32,
+ device=self.device,
+ )
+
def init_forward_metadata_capture_cuda_graph(
self,
bs: int,
@@ -414,6 +438,7 @@ def init_forward_metadata_capture_cuda_graph(
window_kv_indptr = self.window_kv_indptr
window_kv_indices = None
window_num_kv_splits = None
+ window_kv_offsets = None
if forward_mode.is_decode_or_idle():
if spec_info is None:
@@ -436,7 +461,7 @@ def init_forward_metadata_capture_cuda_graph(
):
window_kv_indices = self.cuda_graph_window_kv_indices
window_num_kv_splits = self.cuda_graph_window_num_kv_splits
- window_kv_indptr, window_kv_indices, _ = (
+ window_kv_indptr, window_kv_indices, _, _ = (
update_sliding_window_buffer_cuda_graph(
self.window_kv_indptr,
window_kv_indices,
@@ -483,13 +508,14 @@ def init_forward_metadata_capture_cuda_graph(
if self.sliding_window_size is not None and self.sliding_window_size > 0:
window_kv_indices = self.cuda_graph_window_kv_indices
window_num_kv_splits = self.cuda_graph_window_num_kv_splits
- window_kv_indptr, window_kv_indices, _ = (
+ window_kv_offsets = self.cuda_graph_window_kv_offsets
+ window_kv_indptr, window_kv_indices, _, window_kv_offsets[:bs] = (
update_sliding_window_buffer_cuda_graph(
self.window_kv_indptr,
window_kv_indices,
self.req_to_token,
self.sliding_window_size,
- seq_lens,
+ seq_lens[:bs],
req_pool_indices,
bs,
self.token_to_kv_pool_allocator,
@@ -551,6 +577,7 @@ def init_forward_metadata_capture_cuda_graph(
window_kv_indptr,
window_kv_indices,
window_num_kv_splits,
+ window_kv_offsets,
)
def init_forward_metadata_replay_cuda_graph(
@@ -589,7 +616,7 @@ def init_forward_metadata_replay_cuda_graph(
):
window_num_kv_splits = self.cuda_graph_window_num_kv_splits
window_kv_indices = self.cuda_graph_window_kv_indices
- _, _, window_kv_lens = update_sliding_window_buffer_cuda_graph(
+ _, _, window_kv_lens, _ = update_sliding_window_buffer_cuda_graph(
self.window_kv_indptr,
window_kv_indices,
self.req_to_token,
@@ -635,15 +662,18 @@ def init_forward_metadata_replay_cuda_graph(
if self.sliding_window_size is not None and self.sliding_window_size > 0:
window_num_kv_splits = self.cuda_graph_window_num_kv_splits
window_kv_indices = self.cuda_graph_window_kv_indices
- _, _, window_kv_lens = update_sliding_window_buffer_cuda_graph(
- self.window_kv_indptr,
- window_kv_indices,
- self.req_to_token,
- self.sliding_window_size,
- seq_lens,
- req_pool_indices,
- bs,
- self.token_to_kv_pool_allocator,
+ window_kv_offsets = self.cuda_graph_window_kv_offsets
+ _, _, window_kv_lens, window_kv_offsets[:bs] = (
+ update_sliding_window_buffer_cuda_graph(
+ self.window_kv_indptr,
+ window_kv_indices,
+ self.req_to_token,
+ self.sliding_window_size,
+ seq_lens[:bs],
+ req_pool_indices,
+ bs,
+ self.token_to_kv_pool_allocator,
+ )
)
custom_mask = self.cuda_graph_custom_mask
custom_mask[: spec_info.custom_mask.shape[0]] = spec_info.custom_mask
@@ -696,6 +726,8 @@ def forward_extend(
layer, forward_batch.out_cache_loc, k, v
)
+ logits_soft_cap = logit_capping_mod(layer.logit_capping_method, layer.logit_cap)
+
causal = True
if layer.attn_type == AttentionType.ENCODER_ONLY:
causal = False
@@ -706,10 +738,12 @@ def forward_extend(
) # Needed for sliding window mask
kv_indptr = self.forward_metadata.window_kv_indptr
kv_indices = self.forward_metadata.window_kv_indices
+ window_kv_offsets = self.forward_metadata.window_kv_offsets
else:
sliding_window_size = -1
kv_indptr = self.forward_metadata.kv_indptr
kv_indices = self.forward_metadata.kv_indices
+ window_kv_offsets = None
self.extend_attention_fwd(
q.view(-1, layer.tp_q_head_num, layer.qk_head_dim),
@@ -726,9 +760,11 @@ def forward_extend(
self.forward_metadata.mask_indptr,
self.forward_metadata.max_extend_len,
layer.scaling,
- layer.logit_cap,
+ logit_cap=logits_soft_cap,
sliding_window_size=sliding_window_size,
sinks=sinks,
+ window_kv_offsets=window_kv_offsets,
+ xai_temperature_len=layer.xai_temperature_len,
)
return o
@@ -752,6 +788,8 @@ def forward_decode(
else:
o = torch.empty_like(q)
+ logits_soft_cap = logit_capping_mod(layer.logit_capping_method, layer.logit_cap)
+
if save_kv_cache:
forward_batch.token_to_kv_pool.set_kv_buffer(
layer, forward_batch.out_cache_loc, k, v
@@ -776,8 +814,9 @@ def forward_decode(
self.forward_metadata.num_kv_splits,
self.max_kv_splits,
layer.scaling,
- layer.logit_cap,
+ logit_cap=logits_soft_cap,
sinks=sinks,
+ xai_temperature_len=layer.xai_temperature_len,
)
return o
@@ -864,7 +903,7 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
self.speculative_num_steps,
forward_batch.batch_size * self.topk * self.max_context_len,
),
- dtype=torch.int32,
+ dtype=torch.int64,
device=self.device,
)
@@ -882,7 +921,7 @@ def call_fn(i, forward_batch):
def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
self.cuda_graph_kv_indices = torch.zeros(
(self.speculative_num_steps, max_num_tokens * self.max_context_len),
- dtype=torch.int32,
+ dtype=torch.int64,
device=self.device,
)
for i in range(self.speculative_num_steps):
@@ -991,7 +1030,7 @@ def update_sliding_window_buffer(
window_kv_indptr[1 : bs + 1] = torch.cumsum(window_kv_lens, dim=0)
window_kv_indptr = window_kv_indptr[: bs + 1]
window_kv_indices = torch.empty(
- window_kv_indptr[-1], dtype=torch.int32, device=device
+ window_kv_indptr[-1], dtype=torch.int64, device=device
)
window_kv_start_idx = seq_lens - window_kv_lens
create_flashinfer_kv_indices_triton[(bs,)](
@@ -1011,7 +1050,7 @@ def update_sliding_window_buffer(
window_kv_indices[:kv_last_index]
)
)
- return window_kv_indptr, window_kv_indices, window_kv_lens
+ return window_kv_indptr, window_kv_indices, window_kv_lens, window_kv_start_idx
def update_sliding_window_buffer_cuda_graph(
@@ -1048,4 +1087,4 @@ def update_sliding_window_buffer_cuda_graph(
window_kv_indices[:kv_last_index]
)
)
- return window_kv_indptr, window_kv_indices, window_kv_lens
+ return window_kv_indptr, window_kv_indices, window_kv_lens, window_kv_start_idx
diff --git a/python/sglang/srt/layers/attention/triton_ops/decode_attention.py b/python/sglang/srt/layers/attention/triton_ops/decode_attention.py
index 014eadab794..1ba5d463d1b 100644
--- a/python/sglang/srt/layers/attention/triton_ops/decode_attention.py
+++ b/python/sglang/srt/layers/attention/triton_ops/decode_attention.py
@@ -69,6 +69,7 @@ def _fwd_kernel_stage1(
logit_cap: tl.constexpr,
Lk: tl.constexpr,
Lv: tl.constexpr,
+ xai_temperature_len: tl.constexpr,
):
cur_batch = tl.program_id(0)
cur_head = tl.program_id(1)
@@ -85,6 +86,12 @@ def _fwd_kernel_stage1(
cur_batch_seq_len = tl.load(kv_indptr + cur_batch + 1) - cur_batch_kv_start_idx
kv_splits = tl.load(num_kv_splits + cur_batch)
+ if xai_temperature_len > 0:
+ offs_qidx = cur_batch_seq_len - 1
+ xai_temperature_scale = 1.0 / tl.log2(float(xai_temperature_len))
+ _qtemp = tl.log2(offs_qidx.to(tl.float32)) * xai_temperature_scale
+ xai_temperature_reg = tl.where(offs_qidx > xai_temperature_len, _qtemp, 1.0)
+
off_q = cur_batch * stride_qbs + cur_head * stride_qh + offs_d
kv_len_per_split = (
@@ -122,6 +129,9 @@ def _fwd_kernel_stage1(
if logit_cap > 0:
qk = logit_cap * tanh(qk / logit_cap)
+ if xai_temperature_len > 0:
+ qk *= xai_temperature_reg
+
qk = tl.where(offs_n < split_kv_end, qk, float("-inf"))
offs_buf_v = (
@@ -181,6 +191,7 @@ def _decode_att_m_fwd(
max_kv_splits,
sm_scale,
logit_cap,
+ xai_temperature_len=-1,
):
BLOCK = 64
# [TODO] work around SGPR limit on MI3xx
@@ -190,7 +201,7 @@ def _decode_att_m_fwd(
Lk = k_buffer.shape[-1]
Lv = v_buffer.shape[-1]
- batch, head_num = kv_indptr.shape[0] - 1, q.shape[1]
+ batch, head_num = q.shape[0], q.shape[1]
grid = (batch, head_num, MAX_KV_SPLITS)
kv_group_num = q.shape[1] // k_buffer.shape[1]
@@ -230,6 +241,7 @@ def _decode_att_m_fwd(
BLOCK_N=BLOCK,
MIN_BLOCK_KV=_MIN_BLOCK_KV,
logit_cap=logit_cap,
+ xai_temperature_len=xai_temperature_len,
num_warps=num_warps,
num_stages=2,
Lk=Lk,
@@ -266,6 +278,7 @@ def _fwd_grouped_kernel_stage1(
BLOCK_H: tl.constexpr,
MIN_BLOCK_KV: tl.constexpr,
logit_cap: tl.constexpr,
+ xai_temperature_len: tl.constexpr,
Lk: tl.constexpr,
Lv: tl.constexpr,
):
@@ -291,6 +304,12 @@ def _fwd_grouped_kernel_stage1(
cur_batch_seq_len = tl.load(kv_indptr + cur_batch + 1) - cur_batch_kv_start_idx
kv_splits = tl.load(num_kv_splits + cur_batch)
+ if xai_temperature_len > 0:
+ offs_qidx = cur_batch_seq_len - 1
+ xai_temperature_scale = 1.0 / tl.log2(float(xai_temperature_len))
+ _qtemp = tl.log2(offs_qidx.to(tl.float32)) * xai_temperature_scale
+ xai_temperature_reg = tl.where(offs_qidx > xai_temperature_len, _qtemp, 1.0)
+
offs_q = cur_batch * stride_qbs + cur_head[:, None] * stride_qh + offs_d[None, :]
if BLOCK_DPE > 0:
@@ -351,6 +370,9 @@ def _fwd_grouped_kernel_stage1(
if logit_cap > 0:
qk = logit_cap * tanh(qk / logit_cap)
+ if xai_temperature_len > 0:
+ qk *= xai_temperature_reg[:, None]
+
qk = tl.where(
mask_h[:, None] & (offs_n[None, :] < split_kv_end), qk, float("-inf")
)
@@ -413,6 +435,7 @@ def _decode_grouped_att_m_fwd(
max_kv_splits,
sm_scale,
logit_cap,
+ xai_temperature_len=-1,
):
BLOCK = 32
Lk = k_buffer.shape[-1]
@@ -433,7 +456,7 @@ def _decode_grouped_att_m_fwd(
BLOCK_DPE = 0
BLOCK_DV = triton.next_power_of_2(Lv)
- batch, head_num = kv_indptr.shape[0] - 1, q.shape[1]
+ batch, head_num = q.shape[0], q.shape[1]
kv_group_num = q.shape[1] // k_buffer.shape[1]
BLOCK_H = 16
@@ -480,6 +503,7 @@ def _decode_grouped_att_m_fwd(
BLOCK_H=BLOCK_H,
MIN_BLOCK_KV=_MIN_BLOCK_KV,
logit_cap=logit_cap,
+ xai_temperature_len=xai_temperature_len,
num_warps=4,
num_stages=num_stages,
Lk=Lk,
@@ -620,6 +644,7 @@ def decode_attention_fwd_normal(
sm_scale,
logit_cap=0.0,
sinks=None,
+ xai_temperature_len=-1,
):
_decode_att_m_fwd(
q,
@@ -633,6 +658,7 @@ def decode_attention_fwd_normal(
max_kv_splits,
sm_scale,
logit_cap,
+ xai_temperature_len,
)
_decode_softmax_reducev_fwd(
attn_logits,
@@ -661,6 +687,7 @@ def decode_attention_fwd_grouped(
sm_scale,
logit_cap=0.0,
sinks=None,
+ xai_temperature_len=-1,
):
_decode_grouped_att_m_fwd(
q,
@@ -674,6 +701,7 @@ def decode_attention_fwd_grouped(
max_kv_splits,
sm_scale,
logit_cap,
+ xai_temperature_len,
)
_decode_softmax_reducev_fwd(
attn_logits,
@@ -702,6 +730,7 @@ def decode_attention_fwd(
sm_scale,
logit_cap=0.0,
sinks=None,
+ xai_temperature_len=-1,
):
assert max_kv_splits == attn_logits.shape[2]
assert q.shape[0] <= kv_indptr.shape[0] - 1
@@ -725,6 +754,7 @@ def decode_attention_fwd(
sm_scale,
logit_cap=logit_cap,
sinks=sinks,
+ xai_temperature_len=xai_temperature_len,
)
else:
# GQA/MQA/MLA
@@ -742,4 +772,5 @@ def decode_attention_fwd(
sm_scale,
logit_cap=logit_cap,
sinks=sinks,
+ xai_temperature_len=xai_temperature_len,
)
diff --git a/python/sglang/srt/layers/attention/triton_ops/extend_attention.py b/python/sglang/srt/layers/attention/triton_ops/extend_attention.py
index 8b459861d41..e9146774345 100644
--- a/python/sglang/srt/layers/attention/triton_ops/extend_attention.py
+++ b/python/sglang/srt/layers/attention/triton_ops/extend_attention.py
@@ -52,6 +52,7 @@ def _fwd_kernel(
mask_ptr,
mask_indptr,
sink_ptr,
+ window_kv_offset_ptr,
sm_scale,
kv_group_num,
stride_qbs,
@@ -68,6 +69,7 @@ def _fwd_kernel(
stride_buf_vh,
SLIDING_WINDOW_SIZE: tl.constexpr,
logit_cap: tl.constexpr,
+ xai_temperature_len: tl.constexpr,
Lq: tl.constexpr,
Lv: tl.constexpr,
BLOCK_DMODEL: tl.constexpr,
@@ -95,6 +97,11 @@ def _fwd_kernel(
if USE_CUSTOM_MASK:
cur_seq_mask_start_idx = tl.load(mask_indptr + cur_seq)
+ # For SWA, we should only load the mask in the sliding window
+ window_kv_offset = 0
+ if USE_CUSTOM_MASK and SLIDING_WINDOW_SIZE > 0:
+ window_kv_offset = tl.load(window_kv_offset_ptr + cur_seq)
+
offs_d = tl.arange(0, BLOCK_DMODEL)
offs_dv = tl.arange(0, BLOCK_DV)
offs_m = tl.arange(0, BLOCK_M)
@@ -103,6 +110,15 @@ def _fwd_kernel(
mask_d = offs_d < Lq
mask_dv = offs_dv < Lv
+ if xai_temperature_len > 0:
+ offs_qidx = cur_seq_len_prefix + cur_block_m * BLOCK_M + offs_m
+ xai_temperature_scale = 1.0 / tl.log2(float(xai_temperature_len))
+ xai_temperature_reg = tl.where(
+ offs_qidx > xai_temperature_len,
+ tl.log2(offs_qidx.to(tl.float32)) * xai_temperature_scale,
+ 1.0,
+ )
+
offs_q = (
(cur_seq_extend_start_idx + cur_block_m * BLOCK_M + offs_m[:, None])
* stride_qbs
@@ -139,7 +155,9 @@ def _fwd_kernel(
custom_mask = tl.load(
mask_ptr
+ cur_seq_mask_start_idx
- + (cur_block_m * BLOCK_M + offs_m[:, None]) * cur_seq_len
+ + (cur_block_m * BLOCK_M + offs_m[:, None])
+ * (cur_seq_len + window_kv_offset)
+ + window_kv_offset
+ start_n
+ offs_n[None, :],
mask=(mask_m[:, None] & mask_n[None, :]),
@@ -195,6 +213,9 @@ def _fwd_kernel(
if logit_cap > 0:
qk = logit_cap * tanh(qk / logit_cap)
+ if xai_temperature_len > 0:
+ qk *= xai_temperature_reg[:, None]
+
qk = tl.where(final_mask, qk, float("-inf"))
row_max = tl.max(qk, 1)
@@ -236,7 +257,9 @@ def _fwd_kernel(
custom_mask = tl.load(
mask_ptr
+ cur_seq_mask_start_idx
- + (cur_block_m * BLOCK_M + offs_m[:, None]) * cur_seq_len
+ + (cur_block_m * BLOCK_M + offs_m[:, None])
+ * (cur_seq_len + window_kv_offset)
+ + window_kv_offset
+ cur_seq_len_prefix
+ start_n
+ offs_n[None, :],
@@ -296,6 +319,9 @@ def _fwd_kernel(
if logit_cap > 0:
qk = logit_cap * tanh(qk / logit_cap)
+ if xai_temperature_len > 0:
+ qk *= xai_temperature_reg[:, None]
+
qk = tl.where(final_mask, qk, float("-inf"))
row_max = tl.max(qk, 1)
@@ -362,6 +388,8 @@ def extend_attention_fwd(
skip_prefix_custom_mask=True,
sliding_window_size=-1,
sinks=None,
+ window_kv_offsets=None,
+ xai_temperature_len=-1,
):
"""
q_extend, k_extend, v_extend, o_extend: contiguous tensors
@@ -449,6 +477,7 @@ def extend_attention_fwd(
custom_mask,
mask_indptr,
sinks,
+ window_kv_offsets,
sm_scale,
kv_group_num,
q_extend.stride(0),
@@ -465,6 +494,7 @@ def extend_attention_fwd(
v_buffer.stride(1),
SLIDING_WINDOW_SIZE=sliding_window_size,
logit_cap=logit_cap,
+ xai_temperature_len=xai_temperature_len,
BLOCK_DMODEL=BLOCK_DMODEL,
BLOCK_DPE=BLOCK_DPE,
BLOCK_DV=BLOCK_DV,
diff --git a/python/sglang/srt/layers/attention/trtllm_mha_backend.py b/python/sglang/srt/layers/attention/trtllm_mha_backend.py
index d8cb8aa0bae..a48cc979479 100644
--- a/python/sglang/srt/layers/attention/trtllm_mha_backend.py
+++ b/python/sglang/srt/layers/attention/trtllm_mha_backend.py
@@ -10,13 +10,18 @@
import torch
-from sglang.srt.layers.attention.flashinfer_backend import FlashInferAttnBackend
+from sglang.srt.layers.attention.flashinfer_backend import (
+ FlashInferAttnBackend,
+ FlashInferMultiStepDraftBackend,
+)
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.utils import is_flashinfer_available
if is_flashinfer_available():
import flashinfer
+from sglang.srt.speculative.eagle_utils import EagleDraftInput
+
if TYPE_CHECKING:
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.model_executor.model_runner import ModelRunner
@@ -55,9 +60,12 @@ def __init__(
model_runner: ModelRunner,
skip_prefill: bool = False,
kv_indptr_buf: Optional[torch.Tensor] = None,
- q_indptr_decode_buf: Optional[torch.Tensor] = None,
+ kv_last_page_len_buf: Optional[torch.Tensor] = None,
+ speculative_step_id: int = 0,
):
- super().__init__(model_runner, skip_prefill, kv_indptr_buf, q_indptr_decode_buf)
+ super().__init__(
+ model_runner, skip_prefill, kv_indptr_buf, kv_last_page_len_buf
+ )
config = model_runner.model_config
@@ -87,6 +95,16 @@ def __init__(
# CUDA graph state
self.decode_cuda_graph_metadata = {}
+ # Speculative decoding
+ # Only support topk <= 1 for now.
+ self.topk = model_runner.server_args.speculative_eagle_topk or 0
+ self.speculative_step_id = speculative_step_id
+ self.target_verify_metadata = {}
+
+ self.speculative_num_draft_tokens = (
+ model_runner.server_args.speculative_num_draft_tokens
+ )
+
# Forward metadata
self.forward_metadata: Optional[TRTLLMMHAMetadata] = None
@@ -97,11 +115,12 @@ def init_cuda_graph_state(
kv_indices_buf: Optional[torch.Tensor] = None,
):
"""Initialize CUDA graph state for TRTLLM MHA."""
+ max_num_pages = (self.max_context_len + self.page_size - 1) // self.page_size
self.decode_cuda_graph_metadata = {
"cache_seqlens": torch.zeros(max_bs, dtype=torch.int32, device=self.device),
"page_table": torch.zeros(
max_bs,
- (self.max_context_len + self.page_size - 1) // self.page_size,
+ max_num_pages,
dtype=torch.int32,
device=self.device,
),
@@ -110,6 +129,70 @@ def init_cuda_graph_state(
),
}
+ if (
+ self.speculative_num_draft_tokens is not None
+ and self.speculative_num_draft_tokens > 0
+ ):
+ self.decode_cuda_graph_metadata["cu_seqlens_q"] = torch.arange(
+ 0, max_bs + 1, dtype=torch.int32, device=self.device
+ )
+ self.decode_cuda_graph_metadata["cu_seqlens_k"] = torch.zeros(
+ max_bs + 1, dtype=torch.int32, device=self.device
+ )
+ self.decode_cuda_graph_metadata["page_table_draft_decode"] = torch.zeros(
+ max_bs,
+ max_num_pages,
+ dtype=torch.int32,
+ device=self.device,
+ )
+ self.target_verify_metadata = {
+ "cache_seqlens": torch.zeros(
+ max_bs, dtype=torch.int32, device=self.device
+ ),
+ "cu_seqlens_q": torch.arange(
+ 0,
+ max_bs * self.speculative_num_draft_tokens + 1,
+ step=self.speculative_num_draft_tokens,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "cu_seqlens_k": torch.zeros(
+ max_bs + 1, dtype=torch.int32, device=self.device
+ ),
+ "page_table": torch.zeros(
+ max_bs,
+ max_num_pages,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "strided_indices": torch.arange(
+ 0, self.max_context_len, self.page_size, device=self.device
+ ),
+ }
+
+ self.draft_extend_metadata = {
+ "cache_seqlens": torch.zeros(
+ max_bs, dtype=torch.int32, device=self.device
+ ),
+ "cu_seqlens_q": torch.zeros(
+ max_bs + 1,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "cu_seqlens_k": torch.zeros(
+ max_bs + 1, dtype=torch.int32, device=self.device
+ ),
+ "page_table": torch.zeros(
+ max_bs,
+ max_num_pages,
+ dtype=torch.int32,
+ device=self.device,
+ ),
+ "strided_indices": torch.arange(
+ 0, self.max_context_len, self.page_size, device=self.device
+ ),
+ }
+
def init_forward_metadata_capture_cuda_graph(
self,
bs: int,
@@ -122,16 +205,105 @@ def init_forward_metadata_capture_cuda_graph(
):
"""Initialize metadata for CUDA graph capture."""
metadata = TRTLLMMHAMetadata()
+ device = seq_lens.device
- # Get sequence information
- metadata.cache_seqlens_int32 = seq_lens[:bs].to(torch.int32)
+ if forward_mode.is_decode_or_idle():
+ if spec_info is not None:
+ # Draft Decode
+ # Here we only support topk = 1 for now.
+ metadata.cache_seqlens_int32 = self.decode_cuda_graph_metadata[
+ "cache_seqlens"
+ ][:bs]
+ metadata.max_seq_len_k = seq_lens.max().item() + (
+ self.speculative_step_id + 1
+ )
+ metadata.cu_seqlens_q = self.decode_cuda_graph_metadata["cu_seqlens_q"][
+ : bs + 1
+ ]
+ metadata.cu_seqlens_k = torch.nn.functional.pad(
+ torch.cumsum(
+ metadata.cache_seqlens_int32, dim=0, dtype=torch.int32
+ ),
+ (1, 0),
+ )
+ metadata.page_table = self.decode_cuda_graph_metadata[
+ "page_table_draft_decode"
+ ][:bs, :]
+ self.decode_cuda_graph_metadata[bs] = metadata
+ else:
+ # Normal Decode
+ # Get sequence information
+ metadata.cache_seqlens_int32 = seq_lens[:bs].to(torch.int32)
+ batch_size = len(seq_lens)
+ metadata.cu_seqlens_k = torch.nn.functional.pad(
+ torch.cumsum(seq_lens, dim=0, dtype=torch.int32), (1, 0)
+ )
- # Precompute maximum sequence length
- metadata.max_seq_len_k = self.max_context_len
+ # Precompute maximum sequence length
+ metadata.max_seq_len_k = seq_lens.max().item()
+ # Precompute cumulative sequence lengths
+ metadata.cu_seqlens_q = torch.arange(
+ 0, batch_size + 1, dtype=torch.int32, device=device
+ )
+ # Precompute page table
+ metadata.page_table = self.decode_cuda_graph_metadata["page_table"][
+ :bs, :
+ ]
+ self.decode_cuda_graph_metadata[bs] = metadata
+ elif forward_mode.is_target_verify():
+ # Target Verify
+ # Here we only support topk = 1 for now.
+ metadata.cache_seqlens_int32 = self.target_verify_metadata["cache_seqlens"][
+ :bs
+ ]
+ metadata.cache_seqlens_int32.copy_(
+ (seq_lens + self.speculative_num_draft_tokens)
+ )
- # Precompute page table
- metadata.page_table = self.decode_cuda_graph_metadata["page_table"][:bs, :]
- self.decode_cuda_graph_metadata[bs] = metadata
+ metadata.cu_seqlens_q = torch.arange(
+ 0,
+ bs * self.speculative_num_draft_tokens + 1,
+ self.speculative_num_draft_tokens,
+ dtype=torch.int32,
+ device=device,
+ )
+
+ metadata.cu_seqlens_k = self.target_verify_metadata["cu_seqlens_k"][
+ : (bs + 1)
+ ]
+
+ metadata.max_seq_len_q = self.speculative_num_draft_tokens
+ metadata.max_seq_len_k = (
+ seq_lens.max().item() + self.speculative_num_draft_tokens
+ )
+
+ metadata.page_table = self.target_verify_metadata["page_table"][:bs, :]
+
+ self.target_verify_metadata[bs] = metadata
+ elif forward_mode.is_draft_extend():
+ metadata.cache_seqlens_int32 = self.draft_extend_metadata["cache_seqlens"][
+ :bs
+ ]
+ metadata.cache_seqlens_int32.copy_(seq_lens)
+ num_tokens_per_bs = num_tokens // bs
+ metadata.cu_seqlens_q = torch.arange(
+ 0,
+ bs * num_tokens_per_bs + 1,
+ num_tokens_per_bs,
+ dtype=torch.int32,
+ device=device,
+ )
+
+ metadata.cu_seqlens_k = self.draft_extend_metadata["cu_seqlens_k"][
+ : (bs + 1)
+ ]
+ num_tokens_per_bs = num_tokens // bs
+ metadata.max_seq_len_q = num_tokens_per_bs
+ metadata.max_seq_len_k = seq_lens.max().item()
+
+ metadata.page_table = self.draft_extend_metadata["page_table"][:bs, :]
+
+ self.draft_extend_metadata[bs] = metadata
self.forward_metadata = metadata
def init_forward_metadata_replay_cuda_graph(
@@ -149,21 +321,91 @@ def init_forward_metadata_replay_cuda_graph(
seq_lens = seq_lens[:bs]
seq_lens_cpu = seq_lens_cpu[:bs]
req_pool_indices = req_pool_indices[:bs]
- device = seq_lens.device
metadata = None
+ if forward_mode.is_decode_or_idle():
+ if spec_info is not None:
+ # Draft Decode
+ # Here we only support topk = 1 for now.
+ metadata = self.decode_cuda_graph_metadata[bs]
+ max_len = seq_lens_cpu.max().item()
+ metadata.max_seq_len_k = max_len + self.speculative_step_id + 1
+
+ max_seq_pages = (
+ metadata.max_seq_len_k + self.page_size - 1
+ ) // self.page_size
+
+ metadata.cache_seqlens_int32.copy_(
+ seq_lens + self.speculative_step_id + 1
+ )
+ else:
+ # Normal Decode
+ metadata = self.decode_cuda_graph_metadata[bs]
+ max_len = seq_lens_cpu.max().item()
+ max_seq_pages = (max_len + self.page_size - 1) // self.page_size
+ metadata.max_seq_len_k = max_len
+
+ metadata.cache_seqlens_int32.copy_(seq_lens)
+
+ metadata.cu_seqlens_k[1:].copy_(
+ torch.cumsum(metadata.cache_seqlens_int32, dim=0, dtype=torch.int32)
+ )
+ page_indices = self.req_to_token[
+ req_pool_indices[:, None],
+ self.decode_cuda_graph_metadata["strided_indices"][:max_seq_pages][
+ None, :
+ ],
+ ]
+ metadata.page_table[:, :max_seq_pages].copy_(page_indices // self.page_size)
+ elif forward_mode.is_target_verify():
+ # Here we only support topk = 1 for now.
+ metadata = self.target_verify_metadata[bs]
+ metadata.cache_seqlens_int32.copy_(
+ (seq_lens + self.speculative_num_draft_tokens)
+ )
- # Normal Decode
- metadata = self.decode_cuda_graph_metadata[bs]
- max_len = seq_lens_cpu.max().item()
- max_seq_pages = (max_len + self.page_size - 1) // self.page_size
- metadata.max_seq_len_k = self.max_context_len
-
- metadata.cache_seqlens_int32.copy_(seq_lens)
- page_indices = self.req_to_token[
- req_pool_indices[:, None],
- self.decode_cuda_graph_metadata["strided_indices"][:max_seq_pages][None, :],
- ]
- metadata.page_table[:, :max_seq_pages].copy_(page_indices // self.page_size)
+ metadata.max_seq_len_k = (
+ seq_lens_cpu.max().item() + self.speculative_num_draft_tokens
+ )
+ max_len = seq_lens_cpu.max().item()
+ metadata.cu_seqlens_k[1:].copy_(
+ torch.cumsum(metadata.cache_seqlens_int32, dim=0, dtype=torch.int32)
+ )
+ max_seq_pages = (
+ metadata.max_seq_len_k + self.page_size - 1
+ ) // self.page_size
+ page_indices = self.req_to_token[
+ req_pool_indices[:, None],
+ self.decode_cuda_graph_metadata["strided_indices"][:max_seq_pages],
+ ]
+ page_indices //= self.page_size
+ metadata.page_table[:, :max_seq_pages].copy_(page_indices)
+ elif forward_mode.is_draft_extend():
+ metadata = self.draft_extend_metadata[bs]
+ metadata.cache_seqlens_int32.copy_(seq_lens)
+
+ metadata.max_seq_len_k = seq_lens_cpu.max().item()
+ max_len = seq_lens_cpu.max().item()
+ metadata.cu_seqlens_k[1:].copy_(
+ torch.cumsum(metadata.cache_seqlens_int32, dim=0, dtype=torch.int32)
+ )
+ accept_length = spec_info.accept_length[:bs]
+ if spec_info.accept_length_cpu:
+ metadata.max_seq_len_q = max(spec_info.accept_length_cpu) + 1
+ else:
+ metadata.max_seq_len_q = 1
+
+ metadata.cu_seqlens_q[1:].copy_(
+ torch.cumsum(accept_length, dim=0, dtype=torch.int32)
+ )
+
+ max_seq_pages = (
+ metadata.max_seq_len_k + self.page_size - 1
+ ) // self.page_size
+ page_indices = self.req_to_token[
+ req_pool_indices[:, None],
+ self.draft_extend_metadata["strided_indices"][:max_seq_pages],
+ ]
+ metadata.page_table[:, :max_seq_pages].copy_(page_indices // self.page_size)
self.forward_metadata = metadata
def get_cuda_graph_seq_len_fill_value(self) -> int:
@@ -179,12 +421,65 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
device = seqlens_in_batch.device
if forward_batch.forward_mode.is_decode_or_idle():
- # Normal Decode
- metadata.cache_seqlens_int32 = seqlens_in_batch.to(torch.int32)
- metadata.max_seq_len_k = forward_batch.seq_lens_cpu.max().item()
+ if forward_batch.spec_info is not None:
+ # Draft Decode
+ # Here we only support topk = 1 for now.
+ metadata.cache_seqlens_int32 = (
+ seqlens_in_batch + (self.speculative_step_id + 1)
+ ).to(torch.int32)
+ metadata.max_seq_len_k = forward_batch.seq_lens_cpu.max().item() + (
+ self.speculative_step_id + 1
+ )
+ metadata.cu_seqlens_q = torch.arange(
+ 0, batch_size + 1, dtype=torch.int32, device=device
+ )
+ metadata.cu_seqlens_k = torch.nn.functional.pad(
+ torch.cumsum(
+ metadata.cache_seqlens_int32, dim=0, dtype=torch.int32
+ ),
+ (1, 0),
+ )
+ metadata.page_table = forward_batch.req_to_token_pool.req_to_token[
+ forward_batch.req_pool_indices, : metadata.max_seq_len_k
+ ]
+ else:
+ # Normal Decode
+ metadata.cache_seqlens_int32 = seqlens_in_batch.to(torch.int32)
+ metadata.max_seq_len_k = forward_batch.seq_lens_cpu.max().item()
+ metadata.cu_seqlens_q = torch.arange(
+ 0, batch_size + 1, dtype=torch.int32, device=device
+ )
+ metadata.cu_seqlens_k = torch.nn.functional.pad(
+ torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0)
+ )
+ metadata.page_table = forward_batch.req_to_token_pool.req_to_token[
+ forward_batch.req_pool_indices, : metadata.max_seq_len_k
+ ]
+ elif forward_batch.forward_mode.is_target_verify():
+ # Only support topk = 1 for now.
+ metadata.cache_seqlens_int32 = (
+ forward_batch.seq_lens + self.speculative_num_draft_tokens
+ ).to(torch.int32)
+ metadata.max_seq_len_q = self.speculative_num_draft_tokens
+ metadata.max_seq_len_k = (
+ forward_batch.seq_lens_cpu.max().item()
+ + self.speculative_num_draft_tokens
+ )
+ metadata.cu_seqlens_q = torch.arange(
+ 0,
+ batch_size * self.speculative_num_draft_tokens + 1,
+ self.speculative_num_draft_tokens,
+ dtype=torch.int32,
+ device=device,
+ )
+ metadata.cu_seqlens_k = torch.nn.functional.pad(
+ torch.cumsum(metadata.cache_seqlens_int32, dim=0, dtype=torch.int32),
+ (1, 0),
+ )
metadata.page_table = forward_batch.req_to_token_pool.req_to_token[
forward_batch.req_pool_indices, : metadata.max_seq_len_k
]
+
else:
metadata.cache_seqlens_int32 = seqlens_in_batch.to(torch.int32)
metadata.max_seq_len_k = forward_batch.seq_lens_cpu.max().item()
@@ -195,7 +490,10 @@ def init_forward_metadata(self, forward_batch: ForwardBatch):
forward_batch.req_pool_indices, : metadata.max_seq_len_k
]
- if any(forward_batch.extend_prefix_lens_cpu):
+ if (
+ any(forward_batch.extend_prefix_lens_cpu)
+ or forward_batch.forward_mode == ForwardMode.DRAFT_EXTEND
+ ):
extend_seq_lens = forward_batch.extend_seq_lens
metadata.max_seq_len_q = max(forward_batch.extend_seq_lens_cpu)
metadata.cu_seqlens_q = torch.nn.functional.pad(
@@ -265,7 +563,7 @@ def forward_decode(
workspace_buffer=self.workspace_buffer,
block_tables=self.forward_metadata.page_table,
seq_lens=self.forward_metadata.cache_seqlens_int32,
- max_seq_len=self.forward_metadata.max_seq_len_k,
+ max_seq_len=self.max_context_len,
bmm1_scale=bmm1_scale,
bmm2_scale=bmm2_scale,
window_left=layer.sliding_window_size,
@@ -320,7 +618,7 @@ def forward_extend(
block_tables=self.forward_metadata.page_table,
seq_lens=self.forward_metadata.cache_seqlens_int32,
max_q_len=self.forward_metadata.max_seq_len_q,
- max_kv_len=self.forward_metadata.max_seq_len_k,
+ max_kv_len=self.max_context_len,
bmm1_scale=bmm1_scale,
bmm2_scale=bmm2_scale,
batch_size=forward_batch.batch_size,
@@ -332,3 +630,65 @@ def forward_extend(
)
return o.view(-1, layer.tp_q_head_num * layer.head_dim)
+
+
+class TRTLLMHAAttnMultiStepDraftBackend(FlashInferMultiStepDraftBackend):
+ """Multi-step TRTLLM MHA attention kernel used by EAGLE."""
+
+ def __init__(
+ self, model_runner: ModelRunner, topk: int, speculative_num_steps: int
+ ):
+ super().__init__(model_runner, topk, speculative_num_steps)
+ for i in range(speculative_num_steps):
+ self.attn_backends[i] = TRTLLMHAAttnBackend(
+ model_runner,
+ skip_prefill=True,
+ kv_indptr_buf=self.kv_indptr[i],
+ kv_last_page_len_buf=self.kv_last_page_len,
+ speculative_step_id=i,
+ )
+
+ def init_forward_metadata(self, forward_batch: ForwardBatch):
+ for i in range(self.speculative_num_steps - 1):
+ self.attn_backends[i].init_forward_metadata(forward_batch)
+
+ def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
+ for i in range(self.speculative_num_steps):
+ self.attn_backends[i].init_cuda_graph_state(max_bs, max_num_tokens)
+
+ def init_forward_metadata_capture_cuda_graph(
+ self,
+ forward_batch: ForwardBatch,
+ ):
+ assert forward_batch.spec_info is not None
+ assert isinstance(forward_batch.spec_info, EagleDraftInput)
+
+ for i in range(self.speculative_num_steps - 1):
+ self.attn_backends[i].init_forward_metadata_capture_cuda_graph(
+ forward_batch.batch_size,
+ forward_batch.batch_size * self.topk,
+ forward_batch.req_pool_indices,
+ forward_batch.seq_lens,
+ encoder_lens=forward_batch.encoder_lens,
+ forward_mode=ForwardMode.DECODE,
+ spec_info=forward_batch.spec_info,
+ )
+
+ def init_forward_metadata_replay_cuda_graph(
+ self, forward_batch: ForwardBatch, bs: int
+ ):
+ assert forward_batch.spec_info is not None
+ assert isinstance(forward_batch.spec_info, EagleDraftInput)
+
+ for i in range(self.speculative_num_steps - 1):
+
+ self.attn_backends[i].init_forward_metadata_replay_cuda_graph(
+ bs,
+ forward_batch.req_pool_indices,
+ forward_batch.seq_lens,
+ forward_batch.seq_lens_sum,
+ encoder_lens=forward_batch.encoder_lens,
+ forward_mode=ForwardMode.DECODE,
+ spec_info=forward_batch.spec_info,
+ seq_lens_cpu=forward_batch.seq_lens_cpu,
+ )
diff --git a/python/sglang/srt/layers/attention/trtllm_mla_backend.py b/python/sglang/srt/layers/attention/trtllm_mla_backend.py
index 7aeb00d6b22..b8d62c3fa00 100755
--- a/python/sglang/srt/layers/attention/trtllm_mla_backend.py
+++ b/python/sglang/srt/layers/attention/trtllm_mla_backend.py
@@ -11,7 +11,10 @@
import torch
import triton
-from sglang.srt.layers.attention.flashinfer_mla_backend import FlashInferMLAAttnBackend
+from sglang.srt.layers.attention.flashinfer_mla_backend import (
+ FlashInferMLAAttnBackend,
+ FlashInferMLAMultiStepDraftBackend,
+)
from sglang.srt.layers.attention.utils import (
TRITON_PAD_NUM_PAGE_PER_BLOCK,
create_flashmla_kv_indices_triton,
@@ -42,12 +45,21 @@
global_zero_init_workspace_buffer = None
+@dataclass
+class TRTLLMMLAPrefillMetadata:
+ """Metadata for TRTLLM MLA prefill operations."""
+
+ max_seq_len: int
+ cum_seq_lens: torch.Tensor
+ seq_lens: torch.Tensor
+
+
@dataclass
class TRTLLMMLADecodeMetadata:
"""Metadata for TRTLLM MLA decode operations."""
- workspace: Optional[torch.Tensor] = None
block_kv_indices: Optional[torch.Tensor] = None
+ max_seq_len: Optional[int] = None
class TRTLLMMLABackend(FlashInferMLAAttnBackend):
@@ -96,8 +108,9 @@ def __init__(
# CUDA graph state
self.decode_cuda_graph_metadata = {}
- self.cuda_graph_kv_indices = None
- self.forward_metadata: Union[TRTLLMMLADecodeMetadata, None] = None
+ self.decode_cuda_graph_kv_indices = None
+ self.forward_prefill_metadata: Optional[TRTLLMMLAPrefillMetadata] = None
+ self.forward_decode_metadata: Union[TRTLLMMLADecodeMetadata, None] = None
def _calc_padded_blocks(self, max_seq_len: int) -> int:
"""
@@ -167,14 +180,14 @@ def init_cuda_graph_state(
kv_indices_buf: Optional[torch.Tensor] = None,
):
"""Initialize CUDA graph state for TRTLLM MLA."""
+
max_blocks_per_seq = self._calc_padded_blocks(self.max_context_len)
- self.cuda_graph_kv_indices = torch.full(
+ self.decode_cuda_graph_kv_indices = torch.full(
(max_bs, max_blocks_per_seq), -1, dtype=torch.int32, device=self.device
)
- self.cuda_graph_workspace = torch.empty(
- self.workspace_size, dtype=torch.int8, device=self.device
- )
+
+ super().init_cuda_graph_state(max_bs, max_num_tokens, kv_indices_buf)
def init_forward_metadata_capture_cuda_graph(
self,
@@ -187,8 +200,9 @@ def init_forward_metadata_capture_cuda_graph(
spec_info: Optional[SpecInfo],
):
"""Initialize metadata for CUDA graph capture."""
- # Delegate to parent for non-decode modes or when speculative execution is used.
- if not (forward_mode.is_decode_or_idle() and spec_info is None):
+
+ # Delegate to parent for non-decode modes.
+ if not forward_mode.is_decode_or_idle():
return super().init_forward_metadata_capture_cuda_graph(
bs,
num_tokens,
@@ -199,9 +213,10 @@ def init_forward_metadata_capture_cuda_graph(
spec_info,
)
- # Custom fast-path for decode/idle without speculative execution.
- max_seqlen_pad = self._calc_padded_blocks(seq_lens.max().item())
- block_kv_indices = self.cuda_graph_kv_indices[:bs, :max_seqlen_pad]
+ # Custom fast-path for decode/idle.
+ # Capture with full width so future longer sequences are safe during replay
+ max_blocks_per_seq = self._calc_padded_blocks(self.max_context_len)
+ block_kv_indices = self.decode_cuda_graph_kv_indices[:bs, :max_blocks_per_seq]
create_flashmla_kv_indices_triton[(bs,)](
self.req_to_token,
@@ -210,14 +225,22 @@ def init_forward_metadata_capture_cuda_graph(
None,
block_kv_indices,
self.req_to_token.stride(0),
- max_seqlen_pad,
+ max_blocks_per_seq,
NUM_PAGE_PER_BLOCK=TRITON_PAD_NUM_PAGE_PER_BLOCK,
PAGED_SIZE=self.page_size,
)
- metadata = TRTLLMMLADecodeMetadata(self.cuda_graph_workspace, block_kv_indices)
+ # Record the true maximum sequence length for this capture batch so that
+ # the kernel launch path (which requires an int not a tensor) can reuse
+ # it safely during both capture and replay.
+ max_seq_len_val = int(seq_lens.max().item())
+
+ metadata = TRTLLMMLADecodeMetadata(
+ block_kv_indices,
+ max_seq_len_val,
+ )
self.decode_cuda_graph_metadata[bs] = metadata
- self.forward_metadata = metadata
+ self.forward_decode_metadata = metadata
def init_forward_metadata_replay_cuda_graph(
self,
@@ -231,8 +254,8 @@ def init_forward_metadata_replay_cuda_graph(
seq_lens_cpu: Optional[torch.Tensor],
):
"""Replay CUDA graph with new inputs."""
- # Delegate to parent for non-decode modes or when speculative execution is used.
- if not (forward_mode.is_decode_or_idle() and spec_info is None):
+ # Delegate to parent for non-decode modes.
+ if not forward_mode.is_decode_or_idle():
return super().init_forward_metadata_replay_cuda_graph(
bs,
req_pool_indices,
@@ -259,40 +282,66 @@ def init_forward_metadata_replay_cuda_graph(
PAGED_SIZE=self.page_size,
)
+ # Update stored max_seq_len so subsequent kernel calls use the correct value
+ # Prefer CPU tensor to avoid GPU synchronization when available.
+ if seq_lens_cpu is not None:
+ metadata.max_seq_len = int(seq_lens_cpu.max().item())
+ else:
+ metadata.max_seq_len = int(seq_lens.max().item())
+
def get_cuda_graph_seq_len_fill_value(self) -> int:
"""Get the fill value for sequence lengths in CUDA graph."""
return 1
def init_forward_metadata(self, forward_batch: ForwardBatch):
"""Initialize the metadata for a forward pass."""
- # Delegate to parent for non-decode modes or when speculative execution is used.
- if not (
- forward_batch.forward_mode.is_decode_or_idle()
- and forward_batch.spec_info is None
+ # Delegate to parent for non-decode modes.
+ if (
+ forward_batch.forward_mode.is_extend()
+ and not forward_batch.forward_mode.is_target_verify()
+ and not forward_batch.forward_mode.is_draft_extend()
):
- return super().init_forward_metadata(forward_batch)
+ seq_lens = forward_batch.seq_lens - forward_batch.extend_prefix_lens
+ cum_seq_lens_q = torch.cat(
+ (
+ torch.tensor([0], device=forward_batch.seq_lens.device),
+ torch.cumsum(seq_lens, dim=0),
+ )
+ ).int()
+ max_seq_len = max(forward_batch.extend_seq_lens_cpu)
+ self.forward_prefill_metadata = TRTLLMMLAPrefillMetadata(
+ max_seq_len,
+ cum_seq_lens_q,
+ seq_lens,
+ )
+ elif forward_batch.forward_mode.is_decode_or_idle():
+ bs = forward_batch.batch_size
- bs = forward_batch.batch_size
+ # Get maximum sequence length.
+ if getattr(forward_batch, "seq_lens_cpu", None) is not None:
+ max_seq = forward_batch.seq_lens_cpu.max().item()
+ else:
+ max_seq = forward_batch.seq_lens.max().item()
- # Get maximum sequence length.
- if getattr(forward_batch, "seq_lens_cpu", None) is not None:
- max_seq = forward_batch.seq_lens_cpu.max().item()
+ max_seqlen_pad = self._calc_padded_blocks(max_seq)
+ block_kv_indices = self._create_block_kv_indices(
+ bs,
+ max_seqlen_pad,
+ forward_batch.req_pool_indices,
+ forward_batch.seq_lens,
+ forward_batch.seq_lens.device,
+ )
+
+ max_seq_len_val = int(max_seq)
+ self.forward_decode_metadata = TRTLLMMLADecodeMetadata(
+ block_kv_indices, max_seq_len_val
+ )
+ forward_batch.decode_trtllm_mla_metadata = self.forward_decode_metadata
else:
- max_seq = forward_batch.seq_lens.max().item()
-
- max_seqlen_pad = self._calc_padded_blocks(max_seq)
- block_kv_indices = self._create_block_kv_indices(
- bs,
- max_seqlen_pad,
- forward_batch.req_pool_indices,
- forward_batch.seq_lens,
- forward_batch.seq_lens.device,
- )
+ return super().init_forward_metadata(forward_batch)
- self.forward_metadata = TRTLLMMLADecodeMetadata(
- self.workspace_buffer, block_kv_indices
- )
- forward_batch.decode_trtllm_mla_metadata = self.forward_metadata
+ def init_mha_chunk_metadata(self, forward_batch: ForwardBatch):
+ super().init_mha_chunk_metadata(forward_batch, disable_flashinfer_ragged=True)
def quantize_and_rope_for_fp8(
self,
@@ -436,7 +485,7 @@ def forward_decode(
# Get metadata
metadata = (
getattr(forward_batch, "decode_trtllm_mla_metadata", None)
- or self.forward_metadata
+ or self.forward_decode_metadata
)
# Scale computation for TRTLLM MLA kernel BMM1 operation:
@@ -459,18 +508,82 @@ def forward_decode(
raw_out = flashinfer.decode.trtllm_batch_decode_with_kv_cache_mla(
query=query,
kv_cache=kv_cache,
- workspace_buffer=metadata.workspace,
+ workspace_buffer=self.workspace_buffer,
qk_nope_head_dim=self.qk_nope_head_dim,
kv_lora_rank=self.kv_lora_rank,
qk_rope_head_dim=self.qk_rope_head_dim,
block_tables=metadata.block_kv_indices,
seq_lens=forward_batch.seq_lens.to(torch.int32),
- max_seq_len=int(metadata.block_kv_indices.shape[1] * self.page_size),
+ max_seq_len=metadata.max_seq_len,
bmm1_scale=bmm1_scale,
)
- # Extract value projection part and reshape
- raw_out_v = raw_out[..., : layer.v_head_dim].contiguous()
- output = raw_out_v.view(-1, layer.tp_q_head_num * layer.v_head_dim)
+ # Reshape output directly without slicing
+ output = raw_out.view(-1, layer.tp_q_head_num * layer.v_head_dim)
+ return output
+
+ def forward_extend(
+ self,
+ q: torch.Tensor,
+ k: torch.Tensor,
+ v: torch.Tensor,
+ layer: RadixAttention,
+ forward_batch: ForwardBatch,
+ save_kv_cache: bool = True,
+ q_rope: Optional[torch.Tensor] = None,
+ k_rope: Optional[torch.Tensor] = None,
+ ) -> torch.Tensor:
+ if (
+ forward_batch.forward_mode.is_target_verify()
+ or forward_batch.forward_mode.is_draft_extend()
+ ):
+ return super().forward_extend(
+ q, k, v, layer, forward_batch, save_kv_cache, q_rope, k_rope
+ )
+ if not forward_batch.attn_attend_prefix_cache:
+ q = q.view(-1, layer.tp_q_head_num, layer.head_dim)
+ k = k.view(-1, layer.tp_k_head_num, layer.head_dim)
+ v = v.view(-1, layer.tp_k_head_num, layer.v_head_dim)
+ output = flashinfer.prefill.trtllm_ragged_attention_deepseek(
+ query=q,
+ key=k,
+ value=v,
+ workspace_buffer=self.workspace_buffer,
+ seq_lens=self.forward_prefill_metadata.seq_lens,
+ max_q_len=self.forward_prefill_metadata.max_seq_len,
+ max_kv_len=self.forward_prefill_metadata.max_seq_len,
+ bmm1_scale=layer.scaling,
+ bmm2_scale=1.0,
+ o_sf_scale=1.0,
+ batch_size=forward_batch.batch_size,
+ window_left=-1,
+ cum_seq_lens_q=self.forward_prefill_metadata.cum_seq_lens,
+ cum_seq_lens_kv=self.forward_prefill_metadata.cum_seq_lens,
+ enable_pdl=False,
+ is_causal=True,
+ return_lse=forward_batch.mha_return_lse,
+ )
+ else:
+ # replace with trtllm ragged attention once accuracy is resolved.
+ output = super().forward_extend(
+ q, k, v, layer, forward_batch, save_kv_cache, q_rope, k_rope
+ )
return output
+
+
+class TRTLLMMLAMultiStepDraftBackend(FlashInferMLAMultiStepDraftBackend):
+ """Multi-step draft backend for TRT-LLM MLA used by EAGLE."""
+
+ def __init__(
+ self, model_runner: "ModelRunner", topk: int, speculative_num_steps: int
+ ):
+ super().__init__(model_runner, topk, speculative_num_steps)
+
+ for i in range(self.speculative_num_steps):
+ self.attn_backends[i] = TRTLLMMLABackend(
+ model_runner,
+ skip_prefill=True,
+ kv_indptr_buf=self.kv_indptr[i],
+ q_indptr_decode_buf=self.q_indptr_decode,
+ )
diff --git a/python/sglang/srt/layers/attention/vision.py b/python/sglang/srt/layers/attention/vision.py
index 5c8200f572a..10534ef3517 100644
--- a/python/sglang/srt/layers/attention/vision.py
+++ b/python/sglang/srt/layers/attention/vision.py
@@ -12,7 +12,12 @@
from einops import rearrange
from sglang.srt.layers.dp_attention import get_attention_tp_rank, get_attention_tp_size
-from sglang.srt.utils import is_cuda, print_info_once
+from sglang.srt.utils import (
+ get_device_capability,
+ is_blackwell,
+ is_cuda,
+ print_info_once,
+)
_is_cuda = is_cuda()
@@ -20,7 +25,6 @@
from sgl_kernel.flash_attn import flash_attn_varlen_func
from sglang.srt.distributed import (
- parallel_state,
split_tensor_along_last_dim,
tensor_model_parallel_all_gather,
)
@@ -267,8 +271,8 @@ def forward(
k,
v,
output,
- cu_seqlens.cuda(),
- seq_lens.cuda(),
+ cu_seqlens.to(q.device),
+ seq_lens.to(q.device),
max_seqlen,
is_causal=False,
)
@@ -402,18 +406,14 @@ def __init__(
self.dummy_dim, eps=layer_norm_eps, var_hidden_size=embed_dim
)
- # priority: server_args > passed qkv_backend > sdpa
- if global_server_args_dict["mm_attention_backend"] is None:
- if qkv_backend is None:
- if is_cuda():
- # Double prefill throughput by setting attn backend to Triton on CUDA
- qkv_backend = "triton_attn"
- else:
- qkv_backend = "sdpa"
+ # Select attention backend via a unified method
+ _passed_backend = qkv_backend
+ qkv_backend = self._determine_attention_backend(_passed_backend)
+ if (
+ global_server_args_dict["mm_attention_backend"] is None
+ and _passed_backend is None
+ ):
print_info_once(f"Multimodal attention backend not set. Use {qkv_backend}.")
- else:
- qkv_backend = global_server_args_dict["mm_attention_backend"]
-
print_info_once(f"Using {qkv_backend} as multimodal attention backend.")
self.customized_position_embedding_applier = (
@@ -461,6 +461,33 @@ def __init__(
prefix=add_prefix("proj", prefix),
)
+ def _determine_attention_backend(self, passed_backend: Optional[str]) -> str:
+ """Decide the multimodal attention backend string.
+
+ Priority: server args override > constructor arg > platform default.
+
+ Platform defaults:
+ - CUDA: "triton_attn"
+ - Non-CUDA: "sdpa"
+ """
+ override_backend = global_server_args_dict["mm_attention_backend"]
+ if override_backend is not None:
+ backend = override_backend
+ elif passed_backend is not None:
+ backend = passed_backend
+ elif is_cuda():
+ major, minor = get_device_capability()
+ if major == 9:
+ backend = "fa3"
+ else:
+ backend = "triton_attn"
+ else:
+ backend = "sdpa"
+ if backend == "fa3" and is_blackwell():
+ raise ValueError("The 'fa3' backend is not supported on Blackwell GPUs")
+
+ return backend
+
def _apply_qk_norm(self, q: torch.Tensor, k: torch.Tensor):
"""apply qk norm for internvl vit attn"""
q = q.flatten(1, 2)
diff --git a/python/sglang/srt/layers/attention/vision_utils.py b/python/sglang/srt/layers/attention/vision_utils.py
new file mode 100644
index 00000000000..ecccb1f8528
--- /dev/null
+++ b/python/sglang/srt/layers/attention/vision_utils.py
@@ -0,0 +1,65 @@
+"""Utility functions for vision attention layers."""
+
+import torch
+
+from sglang.srt.layers.dp_attention import get_attention_tp_size
+
+
+def update_vit_attn_dummy_heads_config(config):
+ """Update HF config to ensure vision attention num_attention_heads is divisible by tp_size"""
+ tp_size = get_attention_tp_size()
+ num_heads = getattr(
+ config.vision_config,
+ "num_heads",
+ getattr(config.vision_config, "num_attention_heads", None),
+ )
+ head_dim = config.vision_config.hidden_size // num_heads
+ num_dummy_heads = 0
+
+ if num_heads % tp_size != 0:
+ num_dummy_heads = ((num_heads + tp_size - 1) // tp_size) * tp_size - num_heads
+
+ setattr(config.vision_config, "head_dim", head_dim)
+ setattr(config.vision_config, "num_dummy_heads", num_dummy_heads)
+
+
+def pad_vit_attn_dummy_heads(config, name: str, loaded_weight: torch.Tensor):
+ """Pad attention qkv weights for dummy heads"""
+ num_dummy_heads = config.vision_config.num_dummy_heads
+ if num_dummy_heads == 0:
+ return loaded_weight
+ head_dim = config.vision_config.head_dim
+
+ if "attn.qkv_proj" in name:
+ wq, wk, wv = loaded_weight.chunk(3, dim=0)
+ if name.endswith(".weight"):
+ dummy_shape = [num_dummy_heads, head_dim, wq.shape[-1]]
+ elif name.endswith(".bias"):
+ dummy_shape = [num_dummy_heads, head_dim]
+ else:
+ raise RuntimeError(f"Unsupported weight with name={name}")
+ pad_func = lambda x: torch.cat(
+ [x.unflatten(0, (-1, head_dim)), x.new_zeros(dummy_shape)], dim=0
+ ).flatten(0, 1)
+ wq, wk, wv = pad_func(wq), pad_func(wk), pad_func(wv)
+ loaded_weight = torch.cat([wq, wk, wv], dim=0)
+ elif any([_ in name for _ in ["attn.q_proj", "attn.k_proj", "attn.v_proj"]]):
+ if name.endswith(".weight"):
+ dummy_shape = [num_dummy_heads, head_dim, loaded_weight.shape[-1]]
+ elif name.endswith(".bias"):
+ dummy_shape = [num_dummy_heads, head_dim]
+ else:
+ raise RuntimeError(f"Unsupported weight with name={name}")
+ padded_weight = loaded_weight.new_zeros(dummy_shape)
+ loaded_weight = torch.cat(
+ [loaded_weight.unflatten(0, (-1, head_dim)), padded_weight], dim=0
+ ).flatten(0, 1)
+ elif "attn.proj.weight" in name:
+ padded_weight = loaded_weight.new_zeros(
+ loaded_weight.shape[0], head_dim * num_dummy_heads
+ )
+ loaded_weight = torch.cat([loaded_weight, padded_weight], dim=-1)
+ elif "attn.q_norm.weight" in name or "attn.k_norm.weight" in name:
+ padded_weight = loaded_weight.new_zeros(head_dim * num_dummy_heads)
+ loaded_weight = torch.cat([loaded_weight, padded_weight], dim=0)
+ return loaded_weight
diff --git a/python/sglang/srt/layers/attention/wave_ops/decode_attention.py b/python/sglang/srt/layers/attention/wave_ops/decode_attention.py
index cb89697bddb..c76bee9af56 100644
--- a/python/sglang/srt/layers/attention/wave_ops/decode_attention.py
+++ b/python/sglang/srt/layers/attention/wave_ops/decode_attention.py
@@ -64,8 +64,7 @@ def get_wave_kernel(
subs=hyperparams_0,
canonicalize=True,
run_bench=False,
- use_buffer_load_ops=True,
- use_buffer_store_ops=True,
+ use_buffer_ops=True,
waves_per_eu=2,
dynamic_symbols=dynamic_symbols_0,
wave_runtime=True,
@@ -77,8 +76,7 @@ def get_wave_kernel(
subs=hyperparams_1,
canonicalize=True,
run_bench=False,
- use_buffer_load_ops=False,
- use_buffer_store_ops=False,
+ use_buffer_ops=False,
waves_per_eu=4,
dynamic_symbols=dynamic_symbols_1,
wave_runtime=True,
diff --git a/python/sglang/srt/layers/attention/wave_ops/extend_attention.py b/python/sglang/srt/layers/attention/wave_ops/extend_attention.py
index 35a53d3e289..27e674db247 100644
--- a/python/sglang/srt/layers/attention/wave_ops/extend_attention.py
+++ b/python/sglang/srt/layers/attention/wave_ops/extend_attention.py
@@ -67,11 +67,9 @@ def get_wave_kernel(
schedule=SchedulingType.NONE,
use_scheduling_barriers=False,
dynamic_symbols=dynamic_symbols,
- use_buffer_load_ops=True,
- use_buffer_store_ops=True,
+ use_buffer_ops=True,
waves_per_eu=2,
denorm_fp_math_f32="preserve-sign",
- gpu_native_math_precision=True,
wave_runtime=True,
)
options = set_default_run_config(options)
diff --git a/python/sglang/srt/layers/communicator.py b/python/sglang/srt/layers/communicator.py
index 73a9030f7c2..fba8d8f18b9 100644
--- a/python/sglang/srt/layers/communicator.py
+++ b/python/sglang/srt/layers/communicator.py
@@ -34,18 +34,34 @@
get_attention_tp_size,
get_global_dp_buffer,
get_local_dp_buffer,
+ is_dp_attention_enabled,
)
from sglang.srt.layers.moe import (
get_moe_a2a_backend,
should_use_flashinfer_cutlass_moe_fp4_allgather,
)
-from sglang.srt.layers.utils import is_sm100_supported
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
-from sglang.srt.utils import is_cuda, is_flashinfer_available
+from sglang.srt.utils import (
+ get_bool_env_var,
+ is_cuda,
+ is_flashinfer_available,
+ is_gfx95_supported,
+ is_hip,
+ is_sm90_supported,
+ is_sm100_supported,
+)
_is_flashinfer_available = is_flashinfer_available()
+_is_sm90_supported = is_cuda() and is_sm90_supported()
_is_sm100_supported = is_cuda() and is_sm100_supported()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and is_hip()
+_is_gfx95_supported = is_gfx95_supported()
+
+if _use_aiter and _is_gfx95_supported:
+ from sglang.srt.layers.quantization.rocm_mxfp4_utils import fused_rms_mxfp4_quant
+
+FUSE_ALLREDUCE_MAX_BATCH_SIZE = 2048
class ScatterMode(Enum):
@@ -162,11 +178,13 @@ def __init__(
post_attention_layernorm: torch.nn.Module,
# Reduce scatter requires skipping all-reduce in model code after MoE/MLP, so only enable for models which have that implemented. Remove flag once done for all models that use LayerCommunicator.
allow_reduce_scatter: bool = False,
+ is_last_layer: bool = False,
):
self.layer_scatter_modes = layer_scatter_modes
self.input_layernorm = input_layernorm
self.post_attention_layernorm = post_attention_layernorm
self.allow_reduce_scatter = allow_reduce_scatter
+ self.is_last_layer = is_last_layer
self._context = CommunicateContext.init_new()
self._communicate_simple_fn = CommunicateSimpleFn.get_fn(
@@ -197,6 +215,7 @@ def prepare_attn(
hidden_states: torch.Tensor,
residual: torch.Tensor,
forward_batch: ForwardBatch,
+ qaunt_format: str = "",
):
if hidden_states.shape[0] == 0:
residual = hidden_states
@@ -214,11 +233,34 @@ def prepare_attn(
else:
if residual is None:
residual = hidden_states
- hidden_states = self.input_layernorm(hidden_states)
+
+ if _use_aiter and _is_gfx95_supported and ("mxfp4" in qaunt_format):
+ hidden_states = fused_rms_mxfp4_quant(
+ hidden_states,
+ self.input_layernorm.weight,
+ self.input_layernorm.variance_epsilon,
+ None,
+ None,
+ None,
+ None,
+ )
+ else:
+ hidden_states = self.input_layernorm(hidden_states)
else:
- hidden_states, residual = self.input_layernorm(
- hidden_states, residual
- )
+ if _use_aiter and _is_gfx95_supported and ("mxfp4" in qaunt_format):
+ hidden_states, residual = fused_rms_mxfp4_quant(
+ hidden_states,
+ self.input_layernorm.weight,
+ self.input_layernorm.variance_epsilon,
+ None,
+ None,
+ None,
+ residual,
+ )
+ else:
+ hidden_states, residual = self.input_layernorm(
+ hidden_states, residual
+ )
hidden_states = self._communicate_simple_fn(
hidden_states=hidden_states,
@@ -264,6 +306,41 @@ def should_use_reduce_scatter(self, forward_batch: ForwardBatch):
and forward_batch.dp_padding_mode.is_max_len()
)
+ def should_fuse_mlp_allreduce_with_next_layer(
+ self, forward_batch: ForwardBatch
+ ) -> bool:
+ speculative_algo = global_server_args_dict.get("speculative_algorithm", None)
+ if (
+ is_dp_attention_enabled()
+ and speculative_algo is not None
+ and speculative_algo.is_eagle()
+ ):
+ return False
+
+ batch_size = (
+ forward_batch.input_ids.shape[0]
+ if hasattr(forward_batch, "input_ids")
+ else 0
+ )
+ if batch_size > FUSE_ALLREDUCE_MAX_BATCH_SIZE:
+ return False
+
+ static_conditions_met = (
+ (not self.is_last_layer)
+ and (self._context.tp_size > 1)
+ and global_server_args_dict.get("enable_flashinfer_allreduce_fusion", False)
+ and _is_flashinfer_available
+ )
+
+ if not static_conditions_met:
+ return False
+
+ return (
+ batch_size > 0
+ and batch_size <= FUSE_ALLREDUCE_MAX_BATCH_SIZE
+ and (not self.is_last_layer)
+ )
+
@dataclass
class CommunicateContext:
@@ -445,11 +522,11 @@ def _gather_hidden_states_and_residual(
# According to the discussion in https://github.com/flashinfer-ai/flashinfer/issues/1223#issuecomment-3047256465
# We set the max token num to 128 for allreduce fusion with min-latency case(use_oneshot=True).
if (
- _is_sm100_supported
+ (_is_sm100_supported or _is_sm90_supported)
and _is_flashinfer_available
and hasattr(layernorm, "forward_with_allreduce_fusion")
and global_server_args_dict["enable_flashinfer_allreduce_fusion"]
- and hidden_states.shape[0] <= 2048
+ and hidden_states.shape[0] <= 4096
):
hidden_states, residual = layernorm.forward_with_allreduce_fusion(
hidden_states, residual
diff --git a/python/sglang/srt/layers/dp_attention.py b/python/sglang/srt/layers/dp_attention.py
index 58f6e0f9c73..1250636eb90 100644
--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -234,7 +234,7 @@ def initialize_dp_attention(
_DpGatheredBufferWrapper.set_metadata(
hidden_size=model_config.hidden_size,
dtype=model_config.dtype,
- device=torch.device("cuda"),
+ device=torch.device(server_args.device),
)
diff --git a/python/sglang/srt/layers/elementwise.py b/python/sglang/srt/layers/elementwise.py
index 3134e2bc18e..e05d88b32a5 100644
--- a/python/sglang/srt/layers/elementwise.py
+++ b/python/sglang/srt/layers/elementwise.py
@@ -486,3 +486,97 @@ def gelu_and_mul_triton(
return out_hidden_states, out_scales
else:
return out_hidden_states, None
+
+
+# silu on first half of vector
+@triton.jit
+def silu_and_mul_kernel(
+ out_hidden_states_ptr, # (bs, hidden_dim)
+ out_scales_ptr, # (bs,)
+ hidden_states_ptr, # (bs, hidden_dim * 2)
+ quant_max: tl.constexpr,
+ static_scale: tl.constexpr,
+ hidden_dim: tl.constexpr, # the output hidden_dim
+ BLOCK_SIZE: tl.constexpr,
+):
+ pid = tl.program_id(axis=0)
+
+ input_start = pid * hidden_dim * 2
+ output_start = pid * hidden_dim
+
+ input1_offs = tl.arange(0, BLOCK_SIZE)
+ mask = tl.arange(0, BLOCK_SIZE) < hidden_dim # shared for input1, input3, output
+ input3_offs = hidden_dim + tl.arange(0, BLOCK_SIZE)
+ output_offs = tl.arange(0, BLOCK_SIZE)
+
+ x1 = tl.load(
+ hidden_states_ptr + input_start + input1_offs, mask=mask, other=0.0
+ ).to(tl.float32)
+ x3 = tl.load(
+ hidden_states_ptr + input_start + input3_offs, mask=mask, other=0.0
+ ).to(tl.float32)
+
+ # silu
+ # cast down before mul to better match training?
+ silu_x1 = x1 * tl.sigmoid(x1)
+ out = x3 * silu_x1.to(hidden_states_ptr.dtype.element_ty)
+
+ if quant_max is not None:
+ raise NotImplementedError()
+
+ tl.store(out_hidden_states_ptr + output_start + output_offs, out, mask=mask)
+
+
+def silu_and_mul_triton(
+ hidden_states,
+ scales=None,
+ quantize=None, # dtype to quantize to
+ out=None,
+):
+ bs, in_hidden_dim = hidden_states.shape
+ hidden_dim = in_hidden_dim // 2
+
+ if out is None:
+ out_hidden_states = torch.empty(
+ (bs, hidden_dim),
+ dtype=quantize or hidden_states.dtype,
+ device=hidden_states.device,
+ )
+ else:
+ assert out.shape == (bs, hidden_dim)
+ assert out.dtype == (quantize or hidden_states.dtype)
+ out_hidden_states = out
+ out_scales = None
+ static_scale = False
+ if quantize is not None:
+ if scales is None:
+ out_scales = torch.empty(
+ (bs,), dtype=torch.float32, device=hidden_states.device
+ )
+ else:
+ out_scales = scales
+ static_scale = True
+
+ max_warps = 16 if _is_hip else 32
+ config = {
+ # 8 ele per thread (not tuned)
+ "num_warps": max(
+ min(triton.next_power_of_2(triton.cdiv(hidden_dim, 8 * 32)), max_warps), 4
+ ),
+ }
+
+ silu_and_mul_kernel[(bs,)](
+ out_hidden_states,
+ out_scales,
+ hidden_states,
+ quant_max=torch.finfo(quantize).max if quantize is not None else None,
+ static_scale=static_scale,
+ hidden_dim=hidden_dim,
+ BLOCK_SIZE=triton.next_power_of_2(hidden_dim),
+ **config,
+ )
+
+ if quantize is not None:
+ return out_hidden_states, out_scales
+ else:
+ return out_hidden_states, None
diff --git a/python/sglang/srt/layers/flashinfer_comm_fusion.py b/python/sglang/srt/layers/flashinfer_comm_fusion.py
index 023db709c35..81280db0a6c 100644
--- a/python/sglang/srt/layers/flashinfer_comm_fusion.py
+++ b/python/sglang/srt/layers/flashinfer_comm_fusion.py
@@ -5,7 +5,11 @@
import torch.distributed as dist
from sglang.srt.distributed import get_tensor_model_parallel_world_size
-from sglang.srt.utils import is_flashinfer_available
+from sglang.srt.utils import (
+ direct_register_custom_op,
+ is_flashinfer_available,
+ supports_custom_op,
+)
logger = logging.getLogger(__name__)
@@ -196,6 +200,30 @@ def flashinfer_allreduce_residual_rmsnorm(
return norm_out, residual_out
+def fake_flashinfer_allreduce_residual_rmsnorm(
+ input_tensor: torch.Tensor,
+ residual: torch.Tensor,
+ weight: torch.Tensor,
+ eps: float = 1e-6,
+ max_token_num: int = 2048,
+ use_oneshot: Optional[bool] = None,
+ trigger_completion_at_end: bool = False,
+ fp32_acc: bool = False,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ residual_out = torch.empty_like(residual)
+ norm_out = torch.empty_like(input_tensor)
+ return norm_out, residual_out
+
+
+if supports_custom_op():
+ direct_register_custom_op(
+ "flashinfer_allreduce_residual_rmsnorm",
+ flashinfer_allreduce_residual_rmsnorm,
+ mutates_args=["input_tensor", "residual", "weight"],
+ fake_impl=fake_flashinfer_allreduce_residual_rmsnorm,
+ )
+
+
def cleanup_flashinfer_workspace():
global _workspace_manager
if _workspace_manager is not None:
diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py
index 4c1f2268b32..59489cdb8a5 100644
--- a/python/sglang/srt/layers/layernorm.py
+++ b/python/sglang/srt/layers/layernorm.py
@@ -18,6 +18,7 @@
import torch
import torch.nn as nn
+from packaging.version import Version
from sglang.srt.custom_op import CustomOp
from sglang.srt.utils import (
@@ -25,34 +26,41 @@
get_bool_env_var,
is_cpu,
is_cuda,
+ is_flashinfer_available,
is_hip,
is_npu,
+ is_xpu,
+ supports_custom_op,
)
_is_cuda = is_cuda()
+_is_flashinfer_available = is_flashinfer_available()
_is_hip = is_hip()
_is_npu = is_npu()
_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
_is_cpu_amx_available = cpu_has_amx_support()
_is_cpu = is_cpu()
+_is_xpu = is_xpu()
if _is_cuda:
- from sgl_kernel import (
- fused_add_rmsnorm,
- gemma_fused_add_rmsnorm,
- gemma_rmsnorm,
- rmsnorm,
- )
+ if _is_flashinfer_available:
+ from flashinfer.norm import fused_add_rmsnorm
+ else:
+ from sgl_kernel import fused_add_rmsnorm
+ from sgl_kernel import gemma_fused_add_rmsnorm, gemma_rmsnorm, rmsnorm
if _use_aiter:
from aiter import rmsnorm2d_fwd as rms_norm
from aiter import rmsnorm2d_fwd_with_add as fused_add_rms_norm
elif _is_hip:
+ import vllm
from vllm._custom_ops import fused_add_rms_norm, rms_norm
+ _vllm_version = Version(vllm.__version__)
+
logger = logging.getLogger(__name__)
-if is_npu():
+if _is_npu:
import torch_npu
@@ -126,8 +134,21 @@ def forward_hip(
# NOTE: Remove this if aiter kernel supports discontinuous input
x = x.contiguous()
if residual is not None:
- fused_add_rms_norm(x, residual, self.weight.data, self.variance_epsilon)
- return x, residual
+ if _vllm_version < Version("0.9"):
+ fused_add_rms_norm(x, residual, self.weight.data, self.variance_epsilon)
+ return x, residual
+ else:
+ residual_out = torch.empty_like(x)
+ output = torch.empty_like(x)
+ fused_add_rms_norm(
+ output,
+ x,
+ residual_out,
+ residual,
+ self.weight.data,
+ self.variance_epsilon,
+ )
+ return output, residual_out
out = torch.empty_like(x)
rms_norm(out, x, self.weight.data, self.variance_epsilon)
return out
@@ -202,8 +223,14 @@ def forward_with_allreduce_fusion(
flashinfer_allreduce_residual_rmsnorm,
)
+ fused_op = (
+ torch.ops.sglang.flashinfer_allreduce_residual_rmsnorm
+ if supports_custom_op()
+ else flashinfer_allreduce_residual_rmsnorm
+ )
+
if get_tensor_model_parallel_world_size() > 1:
- fused_result = flashinfer_allreduce_residual_rmsnorm(
+ fused_result = fused_op(
input_tensor=x,
residual=residual,
weight=self.weight,
@@ -259,28 +286,50 @@ def forward_cuda(
out = gemma_rmsnorm(x, self.weight.data, self.variance_epsilon)
return out
+ def forward_npu(
+ self,
+ x: torch.Tensor,
+ residual: Optional[torch.Tensor] = None,
+ ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+ if residual is not None:
+ x = x + residual
+ residual = x
+
+ x, _ = torch_npu.npu_gemma_rms_norm(x, self.weight, self.variance_epsilon)
+ return x if residual is None else (x, residual)
+
-class Gemma3RMSNorm(nn.Module):
+class Gemma3RMSNorm(CustomOp):
def __init__(self, dim: int, eps: float = 1e-6):
super().__init__()
self.eps = eps
self.weight = nn.Parameter(torch.zeros(dim))
+ # Re-dispatch
def _norm(self, x):
return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
- def forward(self, x):
+ def forward_native(self, x):
output = self._norm(x.float())
# Llama does x.to(float16) * w whilst Gemma3 is (x * w).to(float16)
# See https://github.com/huggingface/transformers/pull/29402
output = output * (1.0 + self.weight.float())
return output.type_as(x)
+ def forward_cuda(self, x):
+ return self.forward_native(x)
+
+ def forward_npu(self, x):
+ output, _ = torch_npu.npu_gemma_rms_norm(x, self.weight, self.eps)
+ return output
+
def extra_repr(self):
return f"{tuple(self.weight.shape)}, eps={self.eps}"
-if not (_is_cuda or _is_hip or _is_npu or (_is_cpu and _is_cpu_amx_available)):
+if not (
+ _is_cuda or _is_hip or _is_npu or (_is_cpu and _is_cpu_amx_available) or _is_xpu
+):
logger.info(
"sgl-kernel layernorm implementation is not available on current platform. Fallback to other kernel libraries."
)
diff --git a/python/sglang/srt/layers/linear.py b/python/sglang/srt/layers/linear.py
index 9a3104fc2f8..df2b77e0844 100644
--- a/python/sglang/srt/layers/linear.py
+++ b/python/sglang/srt/layers/linear.py
@@ -110,6 +110,20 @@ def adjust_scalar_to_fused_array(param, loaded_weight, shard_id):
return param[shard_id], loaded_weight
+def adjust_shard_offsets(shard_offsets, loaded_weight, dim):
+ actual_weight_size = loaded_weight.size(dim)
+ target_weight_size = shard_offsets[-1][-1] + shard_offsets[-1][-2]
+ if actual_weight_size != target_weight_size:
+ new_shard_offsets = []
+ new_offset = 0
+ for shard_id, shard_offset, shard_size in shard_offsets:
+ actual_shard_size = actual_weight_size * shard_size // target_weight_size
+ new_shard_offsets.append((shard_id, new_offset, actual_shard_size))
+ new_offset += actual_shard_size
+ return new_shard_offsets
+ return shard_offsets
+
+
class LinearBase(torch.nn.Module):
"""Base linear layer.
@@ -535,6 +549,11 @@ def weight_loader(
packed_dim = getattr(param, "packed_dim", None)
use_bitsandbytes_4bit = getattr(param, "use_bitsandbytes_4bit", False)
+ if _is_cpu:
+ shard_offsets = adjust_shard_offsets(
+ shard_offsets, loaded_weight, output_dim
+ )
+
for shard_id, shard_offset, shard_size in shard_offsets:
# Special case for Quantization.
# If quantized, we need to adjust the offset and size to account
@@ -977,6 +996,11 @@ def weight_loader(
use_bitsandbytes_4bit = getattr(param, "use_bitsandbytes_4bit", False)
packed_dim = getattr(param, "packed_dim", None)
+ if _is_cpu:
+ shard_offsets = adjust_shard_offsets(
+ shard_offsets, loaded_weight, output_dim
+ )
+
for shard_id, shard_offset, shard_size in shard_offsets:
# Special case for Quantized Weights.
# If quantized, we need to adjust the offset and size to account
diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py
index 00b30a84809..f6603907a39 100644
--- a/python/sglang/srt/layers/logits_processor.py
+++ b/python/sglang/srt/layers/logits_processor.py
@@ -46,10 +46,12 @@
ForwardBatch,
ForwardMode,
)
-from sglang.srt.utils import dump_to_file, use_intel_amx_backend
+from sglang.srt.utils import dump_to_file, is_npu, use_intel_amx_backend
logger = logging.getLogger(__name__)
+_is_npu = is_npu()
+
@dataclasses.dataclass
class LogitsProcessorOutput:
@@ -61,7 +63,7 @@ class LogitsProcessorOutput:
hidden_states: Optional[torch.Tensor] = None
## Part 2: This part will be assigned in python/sglang/srt/layers/sampler.py::Sampler
- # The logprobs of the next tokens. shape: [#seq]
+ # he log probs of output tokens, if RETURN_ORIGINAL_LOGPROB = True, will get the log probs before applying temperature. If False, will get the log probs before applying temperature.
next_token_logprobs: Optional[torch.Tensor] = None
# The logprobs and ids of the top-k tokens in output positions. shape: [#seq, k]
next_token_top_logprobs_val: Optional[List] = None
@@ -517,7 +519,12 @@ def _get_logits(
logits = logits[:, : self.config.vocab_size].float()
if self.final_logit_softcapping:
- fused_softcap(logits, self.final_logit_softcapping)
+ if not _is_npu:
+ fused_softcap(logits, self.final_logit_softcapping)
+ else:
+ logits = self.final_logit_softcapping * torch.tanh(
+ logits / self.final_logit_softcapping
+ )
return logits
diff --git a/python/sglang/srt/model_parallel.py b/python/sglang/srt/layers/model_parallel.py
similarity index 100%
rename from python/sglang/srt/model_parallel.py
rename to python/sglang/srt/layers/model_parallel.py
diff --git a/python/sglang/srt/layers/moe/__init__.py b/python/sglang/srt/layers/moe/__init__.py
index e5e5930a26b..5c75a368268 100644
--- a/python/sglang/srt/layers/moe/__init__.py
+++ b/python/sglang/srt/layers/moe/__init__.py
@@ -1,4 +1,4 @@
-from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner import MoeRunner, MoeRunnerConfig
from sglang.srt.layers.moe.utils import (
DeepEPMode,
MoeA2ABackend,
@@ -17,6 +17,7 @@
__all__ = [
"DeepEPMode",
"MoeA2ABackend",
+ "MoeRunner",
"MoeRunnerConfig",
"MoeRunnerBackend",
"initialize_moe_config",
diff --git a/python/sglang/srt/layers/moe/cutlass_moe.py b/python/sglang/srt/layers/moe/cutlass_moe.py
index 262f1ae3937..d0fb4e3ef48 100755
--- a/python/sglang/srt/layers/moe/cutlass_moe.py
+++ b/python/sglang/srt/layers/moe/cutlass_moe.py
@@ -1,20 +1,12 @@
"""CUTLASS based Fused MoE kernels."""
-import functools
-import json
-import logging
-import os
-from typing import Any, Callable, Dict, List, Optional, Tuple
-
import torch
from sglang.srt.layers.moe.cutlass_moe_params import CutlassMoEParams
-from sglang.srt.layers.utils import is_sm90_supported, is_sm100_supported
from sglang.srt.utils import is_cuda
_is_cuda = is_cuda()
if _is_cuda:
- import sgl_kernel
from sgl_kernel import (
apply_shuffle_mul_sum,
cutlass_fp4_group_mm,
@@ -157,10 +149,6 @@ def cutlass_fused_experts_fp8(
rep_a_q = shuffle_rows(a_q, a_map, (m * topk, k))
rep_a1_scales = shuffle_rows(a1_scale, a_map, (m * topk, int(k / 128)))
- if not is_sm100_supported():
- rep_a1_scales = per_group_transpose(rep_a1_scales, expert_offsets)
- w1_scale = w1_scale.contiguous()
-
c1 = torch.empty((m * topk, n * 2), device=device, dtype=out_dtype)
c2 = torch.empty((m * topk, k), device=device, dtype=out_dtype)
@@ -192,9 +180,6 @@ def cutlass_fused_experts_fp8(
silu_and_mul(c1, intermediate)
intemediate_q, a2_scale = sglang_per_token_group_quant_fp8(intermediate, 128)
- if not is_sm100_supported():
- a2_scale = per_group_transpose(a2_scale, expert_offsets)
- w2_scale = w2_scale.contiguous()
fp8_blockwise_scaled_grouped_mm(
c2,
diff --git a/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py b/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
index 7a03511c4d8..216424eea14 100644
--- a/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
+++ b/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
@@ -91,18 +91,10 @@ def cutlass_w4a8_moe(
assert w1_q.shape[0] == w2_q.shape[0], "Expert number mismatch"
assert w1_q.shape[0] == w1_scale.shape[0], "w1 scales expert number mismatch"
assert w1_q.shape[0] == w2_scale.shape[0], "w2 scales expert number mismatch"
- assert (
- w1_scale.shape[1] == w1_q.shape[2] * 2 / 512
- and w1_scale.shape[2] == w1_q.shape[1] * 4
- ), "W1 scale shape mismatch"
- assert (
- w2_scale.shape[1] == w2_q.shape[2] * 2 / 512
- and w2_scale.shape[2] == w2_q.shape[1] * 4
- ), "W2 scale shape mismatch"
assert a_strides1.shape[0] == w1_q.shape[0], "A Strides 1 expert number mismatch"
assert b_strides1.shape[0] == w1_q.shape[0], "B Strides 1 expert number mismatch"
- assert a_strides2.shape[0] == w2_q.shape[0], "A Strides 2 expert number mismatch"
+ assert a_strides2.shape[0] == w2_q.shape[0], "A Strides 2 expert number mismatch"
assert b_strides2.shape[0] == w2_q.shape[0], "B Strides 2 expert number mismatch"
num_experts = w1_q.size(0)
m = a.size(0)
@@ -155,8 +147,8 @@ def cutlass_w4a8_moe(
k,
)
- c1 = torch.empty((m * topk, n * 2), device=device, dtype=torch.half)
- c2 = torch.zeros((m * topk, k), device=device, dtype=torch.half)
+ c1 = torch.empty((m * topk, n * 2), device=device, dtype=torch.bfloat16)
+ c2 = torch.zeros((m * topk, k), device=device, dtype=torch.bfloat16)
cutlass_w4a8_moe_mm(
c1,
@@ -174,7 +166,7 @@ def cutlass_w4a8_moe(
topk,
)
- intermediate = torch.empty((m * topk, n), device=device, dtype=torch.half)
+ intermediate = torch.empty((m * topk, n), device=device, dtype=torch.bfloat16)
silu_and_mul(c1, intermediate)
intermediate_q = torch.empty(
diff --git a/python/sglang/srt/layers/moe/ep_moe/kernels.py b/python/sglang/srt/layers/moe/ep_moe/kernels.py
index f1649d5c92f..08660812d1e 100644
--- a/python/sglang/srt/layers/moe/ep_moe/kernels.py
+++ b/python/sglang/srt/layers/moe/ep_moe/kernels.py
@@ -1362,3 +1362,77 @@ def moe_ep_deepgemm_preprocess(
gateup_input,
gateup_input_scale,
)
+
+
+@triton.jit
+def compute_identity_kernel(
+ top_k,
+ hidden_states_ptr,
+ expert_scales_ptr,
+ num_tokens,
+ output_ptr,
+ hidden_dim,
+ scales_stride,
+ BLOCK_SIZE: tl.constexpr,
+):
+ pid = tl.program_id(0)
+
+ batch_id = pid // (hidden_dim // BLOCK_SIZE)
+ dim_offset = pid % (hidden_dim // BLOCK_SIZE) * BLOCK_SIZE
+
+ if batch_id >= num_tokens or dim_offset >= hidden_dim:
+ return
+
+ h = tl.load(
+ hidden_states_ptr
+ + batch_id * hidden_dim
+ + dim_offset
+ + tl.arange(0, BLOCK_SIZE),
+ mask=(dim_offset + tl.arange(0, BLOCK_SIZE)) < hidden_dim,
+ )
+
+ result = tl.zeros([BLOCK_SIZE], dtype=tl.float32)
+ for i in range(top_k):
+ scale = tl.load(expert_scales_ptr + batch_id * scales_stride + i)
+ result += h * scale
+
+ tl.store(
+ output_ptr + batch_id * hidden_dim + dim_offset + tl.arange(0, BLOCK_SIZE),
+ result,
+ mask=(dim_offset + tl.arange(0, BLOCK_SIZE)) < hidden_dim,
+ )
+
+
+def zero_experts_compute_triton(
+ expert_indices, expert_scales, num_experts, zero_expert_type, hidden_states
+):
+ N = expert_indices.numel()
+ top_k = expert_indices.size(-1)
+ grid = lambda meta: (triton.cdiv(N, meta["BLOCK_SIZE"]),)
+
+ if zero_expert_type == "identity":
+ zero_expert_mask = expert_indices < num_experts
+ zero_expert_scales = expert_scales.clone()
+ zero_expert_scales[zero_expert_mask] = 0.0
+
+ normal_expert_mask = expert_indices >= num_experts
+ expert_indices[normal_expert_mask] = -1
+ expert_scales[normal_expert_mask] = 0.0
+
+ output = torch.zeros_like(hidden_states).to(hidden_states.device)
+ hidden_dim = hidden_states.size(-1)
+ num_tokens = hidden_states.size(0)
+
+ grid = lambda meta: (num_tokens * (hidden_dim // meta["BLOCK_SIZE"]),)
+ compute_identity_kernel[grid](
+ top_k,
+ hidden_states,
+ zero_expert_scales,
+ num_tokens,
+ output,
+ hidden_dim,
+ zero_expert_scales.stride(0),
+ BLOCK_SIZE=256,
+ )
+
+ return output
diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py
index 32684c6064f..ef33665c3ca 100644
--- a/python/sglang/srt/layers/moe/ep_moe/layer.py
+++ b/python/sglang/srt/layers/moe/ep_moe/layer.py
@@ -35,7 +35,6 @@
if TYPE_CHECKING:
from sglang.srt.layers.moe.token_dispatcher import (
- AscendDeepEPLLOutput,
DeepEPLLOutput,
DeepEPNormalOutput,
DispatchOutput,
@@ -52,7 +51,6 @@
if _use_aiter:
from aiter import ActivationType, QuantType
from aiter.fused_moe import fused_moe
- from aiter.ops.shuffle import shuffle_weight
logger = logging.getLogger(__name__)
@@ -115,9 +113,6 @@ def __init__(
with_bias=with_bias,
)
- self.start_expert_id = self.moe_ep_rank * self.num_local_experts
- self.end_expert_id = self.start_expert_id + self.num_local_experts - 1
-
self.intermediate_size = intermediate_size
if isinstance(quant_config, Fp8Config):
@@ -233,7 +228,7 @@ def forward_deepgemm(
(
_cast_to_e8m0_with_rounding_up(gateup_input_scale)
if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
- else deep_gemm_wrapper.get_col_major_tma_aligned_tensor(
+ else deep_gemm_wrapper.get_mn_major_tma_aligned_tensor(
gateup_input_scale
)
),
@@ -249,7 +244,6 @@ def forward_deepgemm(
gateup_output,
masked_m,
expected_m,
- recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
del gateup_input
del gateup_input_fp8
@@ -291,9 +285,7 @@ def forward_deepgemm(
(
down_input_scale
if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
- else deep_gemm_wrapper.get_col_major_tma_aligned_tensor(
- down_input_scale
- )
+ else deep_gemm_wrapper.get_mn_major_tma_aligned_tensor(down_input_scale)
),
)
down_output = torch.empty(
@@ -305,7 +297,6 @@ def forward_deepgemm(
down_output,
masked_m,
expected_m,
- recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
del down_input
del down_input_fp8
@@ -462,7 +453,7 @@ def moe_impl(self, dispatch_output: DispatchOutput):
# in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel
return self.forward_aiter(dispatch_output)
if _is_npu:
- assert DispatchOutputChecker.format_is_ascent_ll(dispatch_output)
+ assert DispatchOutputChecker.format_is_deepep(dispatch_output)
return self.forward_npu(dispatch_output)
if DispatchOutputChecker.format_is_deepep_normal(dispatch_output):
assert deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8
@@ -668,7 +659,6 @@ def forward_deepgemm_masked(
gateup_output,
masked_m,
expected_m,
- recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
dispose_tensor(hidden_states_fp8[0])
@@ -709,9 +699,7 @@ def forward_deepgemm_masked(
(
down_input_scale
if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
- else deep_gemm_wrapper.get_col_major_tma_aligned_tensor(
- down_input_scale
- )
+ else deep_gemm_wrapper.get_mn_major_tma_aligned_tensor(down_input_scale)
),
)
down_output = torch.empty(
@@ -723,73 +711,143 @@ def forward_deepgemm_masked(
down_output,
masked_m,
expected_m,
- recipe=(1, 128, 128) if deep_gemm_wrapper.DEEPGEMM_BLACKWELL else None,
)
return down_output
def forward_npu(
self,
- dispatch_output: DeepEPLLOutput,
+ dispatch_output: Union[DeepEPNormalOutput, DeepEPLLOutput],
):
- if TYPE_CHECKING:
- assert isinstance(dispatch_output, AscendDeepEPLLOutput)
- hidden_states, topk_idx, topk_weights, _, seg_indptr, _ = dispatch_output
assert self.quant_method is not None
- assert self.activation == "silu"
+ assert self.moe_runner_config.activation == "silu"
+
+ import torch_npu
+
+ from sglang.srt.layers.moe.token_dispatcher import DispatchOutputChecker
# NOTE: Ascend's Dispatch & Combine does not support FP16
output_dtype = torch.bfloat16
+ group_list_type = 1
+
+ def _forward_normal(dispatch_output: DeepEPNormalOutput):
+ if TYPE_CHECKING:
+ assert isinstance(dispatch_output, DeepEPNormalOutput)
+ hidden_states, _, _, num_recv_tokens_per_expert = dispatch_output
+
+ if isinstance(hidden_states, tuple):
+ per_token_scale = hidden_states[1]
+ hidden_states = hidden_states[0]
+ else:
+ # dynamic quant
+ hidden_states, per_token_scale = torch_npu.npu_dynamic_quant(
+ hidden_states
+ )
- pertoken_scale = hidden_states[1]
- hidden_states = hidden_states[0]
+ group_list = torch.tensor(num_recv_tokens_per_expert, dtype=torch.int64).to(
+ hidden_states.device
+ )
- group_list_type = 1
- seg_indptr = seg_indptr.to(torch.int64)
+ # gmm1: gate_up_proj
+ hidden_states = torch_npu.npu_grouped_matmul(
+ x=[hidden_states],
+ weight=[self.w13_weight],
+ scale=[self.w13_weight_scale.to(output_dtype)],
+ per_token_scale=[per_token_scale],
+ split_item=2,
+ group_list_type=group_list_type,
+ group_type=0,
+ group_list=group_list,
+ output_dtype=output_dtype,
+ )[0]
+
+ # act_fn: swiglu
+ hidden_states = torch_npu.npu_swiglu(hidden_states)
+ hidden_states, swiglu_out_scale = torch_npu.npu_dynamic_quant(hidden_states)
+
+ # gmm2: down_proj
+ hidden_states = torch_npu.npu_grouped_matmul(
+ x=[hidden_states],
+ weight=[self.w2_weight],
+ scale=[self.w2_weight_scale.to(output_dtype)],
+ per_token_scale=[swiglu_out_scale],
+ split_item=2,
+ group_list_type=group_list_type,
+ group_type=0,
+ group_list=group_list,
+ output_dtype=output_dtype,
+ )[0]
- import torch_npu
+ return hidden_states
- # gmm1: gate_up_proj
- hidden_states = torch_npu.npu_grouped_matmul(
- x=[hidden_states],
- weight=[self.w13_weight],
- scale=[self.w13_weight_scale.to(output_dtype)],
- per_token_scale=[pertoken_scale],
- split_item=2,
- group_list_type=group_list_type,
- group_type=0,
- group_list=seg_indptr,
- output_dtype=output_dtype,
- )[0]
-
- # act_fn: swiglu
- hidden_states = torch_npu.npu_swiglu(hidden_states)
-
- hidden_states, swiglu_out_scale = torch_npu.npu_dynamic_quant(hidden_states)
-
- # gmm2: down_proj
- hidden_states = torch_npu.npu_grouped_matmul(
- x=[hidden_states],
- weight=[self.w2_weight],
- scale=[self.w2_weight_scale.to(output_dtype)],
- per_token_scale=[swiglu_out_scale],
- split_item=2,
- group_list_type=group_list_type,
- group_type=0,
- group_list=seg_indptr,
- output_dtype=output_dtype,
- )[0]
+ def _forward_ll(dispatch_output: DeepEPLLOutput):
+ if TYPE_CHECKING:
+ assert isinstance(dispatch_output, DeepEPLLOutput)
+ hidden_states, topk_idx, topk_weights, group_list, _ = dispatch_output
+
+ per_token_scale = hidden_states[1]
+ hidden_states = hidden_states[0]
+
+ group_list = group_list.to(torch.int64)
+
+ # gmm1: gate_up_proj
+ hidden_states = torch_npu.npu_grouped_matmul(
+ x=[hidden_states],
+ weight=[self.w13_weight],
+ split_item=2,
+ group_list_type=group_list_type,
+ group_type=0,
+ group_list=group_list,
+ output_dtype=torch.int32,
+ )[0]
+
+ # act_fn: swiglu
+ hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
+ x=hidden_states,
+ weight_scale=self.w13_weight_scale.to(torch.float32),
+ activation_scale=per_token_scale,
+ bias=None,
+ quant_scale=None,
+ quant_offset=None,
+ group_index=group_list,
+ activate_left=True,
+ quant_mode=1,
+ )
- return hidden_states
+ # gmm2: down_proj
+ hidden_states = torch_npu.npu_grouped_matmul(
+ x=[hidden_states],
+ weight=[self.w2_weight],
+ scale=[self.w2_weight_scale.to(output_dtype)],
+ per_token_scale=[swiglu_out_scale],
+ split_item=2,
+ group_list_type=group_list_type,
+ group_type=0,
+ group_list=group_list,
+ output_dtype=output_dtype,
+ )[0]
+
+ return hidden_states
+
+ if DispatchOutputChecker.format_is_deepep_normal(dispatch_output):
+ return _forward_normal(dispatch_output)
+ elif DispatchOutputChecker.format_is_deepep_ll(dispatch_output):
+ return _forward_ll(dispatch_output)
+ else:
+ raise ValueError(f"Not Supported DeepEP format {dispatch_output.format}")
-def get_moe_impl_class():
+def get_moe_impl_class(quant_config: Optional[QuantizationConfig] = None):
if get_moe_a2a_backend().is_deepep():
return DeepEPMoE
# NEW: Direct FP4 detection (bypasses EP requirements)
# Check for FP4 quantization with TRTLLM flag, regardless of EP
if get_moe_runner_backend().is_flashinfer_trtllm():
+ # FlashInferFP4MoE must be paired with ModelOptNvFp4FusedMoEMethod.
+ # If UnquantizedFusedMoEMethod is detected, fall back to FusedMoE instead.
+ if quant_config is None:
+ return FusedMoE
try:
# Check the quantization argument directly
quantization = global_server_args_dict.get("quantization")
diff --git a/python/sglang/srt/layers/moe/fused_moe_native.py b/python/sglang/srt/layers/moe/fused_moe_native.py
index 92b88b1b754..a3d3a09bfba 100644
--- a/python/sglang/srt/layers/moe/fused_moe_native.py
+++ b/python/sglang/srt/layers/moe/fused_moe_native.py
@@ -8,16 +8,18 @@
from sglang.srt.layers.activation import GeluAndMul, SiluAndMul
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
+from sglang.srt.layers.moe.token_dispatcher import StandardDispatchOutput
from sglang.srt.layers.moe.topk import StandardTopKOutput
def fused_moe_forward_native(
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: StandardTopKOutput,
- moe_runner_config: MoeRunnerConfig,
+ dispatch_output: StandardDispatchOutput,
) -> torch.Tensor:
+ x, topk_output = dispatch_output
+ moe_runner_config = layer.moe_runner_config
+
if moe_runner_config.apply_router_weight_on_input:
raise NotImplementedError()
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/__init__.py b/python/sglang/srt/layers/moe/fused_moe_triton/__init__.py
index 6d8aee85293..be3ed3af412 100644
--- a/python/sglang/srt/layers/moe/fused_moe_triton/__init__.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/__init__.py
@@ -1,16 +1,18 @@
from contextlib import contextmanager
from typing import Any, Dict, Optional
-from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
- fused_experts,
+from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
+from sglang.srt.layers.moe.fused_moe_triton.fused_moe_triton_config import (
get_config_file_name,
- moe_align_block_size,
try_get_optimal_moe_config,
)
from sglang.srt.layers.moe.fused_moe_triton.layer import (
FusedMoE,
FusedMoeWeightScaleSupported,
)
+from sglang.srt.layers.moe.fused_moe_triton.moe_align_block_size import (
+ moe_align_block_size,
+)
_config: Optional[Dict[str, Any]] = None
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 00000000000..379708af4e2
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 5
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_B200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_B200,dtype=fp8_w8a8.json
new file mode 100644
index 00000000000..41d97b17b56
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_B200,dtype=fp8_w8a8.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 5
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 5
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 5
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 256,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 5
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 256,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 5
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json
new file mode 100644
index 00000000000..f8fd97b5e41
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "256": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "512": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 3
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json
new file mode 100644
index 00000000000..b962d19506c
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 5
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 5
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 5
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 4
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 256,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 5
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=161,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=161,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json
new file mode 100644
index 00000000000..f8fd97b5e41
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=161,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "256": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "512": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 256,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 3
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json
similarity index 80%
rename from python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json
rename to python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json
index b9dc2d71f6d..4e36c1544df 100644
--- a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json
@@ -1,24 +1,24 @@
{
"1": {
"BLOCK_SIZE_M": 16,
- "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
- "GROUP_SIZE_M": 1,
+ "GROUP_SIZE_M": 32,
"num_warps": 4,
"num_stages": 4
},
"2": {
"BLOCK_SIZE_M": 16,
- "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 5
+ "num_stages": 4
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 4
@@ -34,39 +34,39 @@
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"48": {
"BLOCK_SIZE_M": 16,
- "BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"64": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 4
@@ -74,18 +74,18 @@
"96": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"128": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
+ "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"256": {
"BLOCK_SIZE_M": 16,
@@ -96,34 +96,34 @@
"num_stages": 3
},
"512": {
- "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
- "GROUP_SIZE_M": 32,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"1024": {
- "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
- "GROUP_SIZE_M": 32,
+ "GROUP_SIZE_M": 1,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
},
"1536": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
- "GROUP_SIZE_M": 1,
+ "GROUP_SIZE_M": 32,
"num_warps": 4,
"num_stages": 4
},
"2048": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
- "GROUP_SIZE_M": 32,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 16,
"num_warps": 4,
"num_stages": 4
},
@@ -131,16 +131,16 @@
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
- "GROUP_SIZE_M": 32,
+ "GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 4
},
"4096": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 256,
- "GROUP_SIZE_M": 16,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 64,
"num_warps": 4,
- "num_stages": 4
+ "num_stages": 3
}
}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 00000000000..a6c635be47e
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 4
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=64,device_name=NVIDIA_A100-SXM4-80GB.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=64,device_name=NVIDIA_A100-SXM4-80GB.json
new file mode 100644
index 00000000000..dc8d6d68b66
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=64,device_name=NVIDIA_A100-SXM4-80GB.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json
new file mode 100644
index 00000000000..b8f35b62e2d
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 3
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H20-3e.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H20-3e.json
new file mode 100644
index 00000000000..039d5ade739
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H20-3e.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H20-3e.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H20-3e.json
new file mode 100644
index 00000000000..991b315f704
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H20-3e.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 4,
+ "num_stages": 3
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json
new file mode 100644
index 00000000000..64861b390c9
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -0,0 +1,146 @@
+{
+ "1": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "2": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "4": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "8": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 4
+ },
+ "16": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "24": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "32": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "48": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "64": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "96": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "128": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "256": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "512": {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "1024": {
+ "BLOCK_SIZE_M": 32,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 3
+ },
+ "1536": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 16,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "2048": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 8,
+ "num_stages": 2
+ },
+ "3072": {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 64,
+ "num_warps": 4,
+ "num_stages": 2
+ },
+ "4096": {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 3
+ }
+}
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
index 0d89ebc8818..6d3fb53b051 100644
--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
@@ -1,3 +1,4 @@
+# NOTE: this file will be separated into sglang/srt/layers/moe/moe_runner/triton_utils.py
# Adapted from https://github.com/vllm-project/vllm/blob/a6221a144af772fd1a68fe7e627935dc53e81738/vllm/model_executor/layers/fused_moe/fused_moe.py
"""Fused MoE kernel."""
@@ -5,39 +6,29 @@
from __future__ import annotations
import functools
-import json
-import logging
import os
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional
import torch
-import triton
import triton.language as tl
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
-from sglang.srt.layers.moe.topk import StandardTopKOutput
-from sglang.srt.layers.quantization.fp8_kernel import (
- per_token_group_quant_fp8,
- scaled_fp8_quant,
- sglang_per_token_group_quant_fp8,
-)
-from sglang.srt.layers.quantization.int8_kernel import (
- per_token_group_quant_int8,
- per_token_quant_int8,
- sglang_per_token_group_quant_int8,
-)
from sglang.srt.utils import (
- ceil_div,
cpu_has_amx_support,
direct_register_custom_op,
get_bool_env_var,
- get_device_name,
is_cpu,
is_cuda,
is_hip,
- next_power_of_2,
)
+from .fused_moe_triton_config import get_config_dtype_str, try_get_optimal_moe_config
+from .fused_moe_triton_kernels import invoke_fused_moe_kernel, moe_sum_reduce_triton
+from .moe_align_block_size import moe_align_block_size
+
+if TYPE_CHECKING:
+ from sglang.srt.layers.moe.topk import StandardTopKOutput
+
_is_hip = is_hip()
_is_cuda = is_cuda()
_is_cpu_amx_available = cpu_has_amx_support()
@@ -49,960 +40,17 @@
elif _is_cpu and _is_cpu_amx_available:
pass
elif _is_hip:
- from vllm import _custom_ops as vllm_ops # gelu_and_mul, silu_and_mul
+ from sgl_kernel import gelu_and_mul, silu_and_mul
if _use_aiter:
try:
from aiter import moe_sum
except ImportError:
raise ImportError("aiter is required when SGLANG_USE_AITER is set to True")
-
-
-if _is_cuda or _is_hip:
- from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size
-
-
-logger = logging.getLogger(__name__)
-padding_size = 128 if bool(int(os.getenv("SGLANG_MOE_PADDING", "0"))) else 0
-
-
-@triton.jit
-def write_zeros_to_output(
- c_ptr,
- stride_cm,
- stride_cn,
- pid_n,
- N,
- offs_token,
- token_mask,
- BLOCK_SIZE_M,
- BLOCK_SIZE_N,
- compute_type,
-):
- accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=compute_type)
- offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
- c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[None, :]
- c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
- tl.store(c_ptrs, accumulator, mask=c_mask)
-
-
-@triton.jit
-def fused_moe_kernel_gptq_awq(
- # Pointers to matrices
- a_ptr,
- b_ptr,
- c_ptr,
- b_scale_ptr,
- b_zp_ptr,
- topk_weights_ptr,
- sorted_token_ids_ptr,
- expert_ids_ptr,
- num_tokens_post_padded_ptr,
- # Matrix dimensions
- N: tl.constexpr,
- K: tl.constexpr,
- EM,
- num_valid_tokens,
- # The stride variables represent how much to increase the ptr by when
- # moving by 1 element in a particular dimension. E.g. `stride_am` is
- # how much to increase `a_ptr` by to get the element one row down
- # (A has M rows).
- stride_am,
- stride_ak,
- stride_be,
- stride_bk,
- stride_bn,
- stride_cm,
- stride_cn,
- stride_bse,
- stride_bsk,
- stride_bsn,
- stride_bze,
- stride_bzk,
- stride_bzn,
- group_size: tl.constexpr,
- # Meta-parameters
- BLOCK_SIZE_M: tl.constexpr,
- BLOCK_SIZE_N: tl.constexpr,
- BLOCK_SIZE_K: tl.constexpr,
- GROUP_SIZE_M: tl.constexpr,
- MUL_ROUTED_WEIGHT: tl.constexpr,
- top_k: tl.constexpr,
- compute_type: tl.constexpr,
- has_zp: tl.constexpr,
- use_int4_w4a16: tl.constexpr,
- use_int8_w8a16: tl.constexpr,
- even_Ks: tl.constexpr,
-):
- """
- Implements the fused computation for a Mixture of Experts (MOE) using
- token and expert matrices.
- Key Parameters:
- - A: The input tensor representing tokens with shape (*, K), where '*' can
- be any shape representing batches and K is the feature dimension of
- each token.
- - B: The stacked MOE weight tensor with shape (E, N, K), where E is
- the number of experts, K is the input feature dimension, and N is
- the output feature dimension.
- - C: The output cache tensor with shape (M, topk, N), where M is the
- total number of tokens post padding, topk is the number of times
- each token is repeated, and N is the output feature dimension.
- - sorted_token_ids: A tensor containing the sorted indices of tokens,
- repeated topk times and arranged by the expert index they are
- assigned to.
- - expert_ids: A tensor containing the indices of the expert for each
- block. It determines which expert matrix from B should be used for
- each block in A.
- This kernel performs the multiplication of a token by its corresponding
- expert matrix as determined by `expert_ids`. The sorting of
- `sorted_token_ids` by expert index and padding ensures divisibility by
- BLOCK_SIZE_M, which is necessary to maintain consistency in block matrix
- multiplication across different blocks processed by the same expert.
- """
- # -----------------------------------------------------------
- # Map program ids `pid` to the block of C it should compute.
- # This is done in a grouped ordering to promote L2 data reuse.
- pid = tl.program_id(axis=0)
- num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
- num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
- num_pid_in_group = GROUP_SIZE_M * num_pid_n
- group_id = pid // num_pid_in_group
- first_pid_m = group_id * GROUP_SIZE_M
- group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
- pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
- pid_n = (pid % num_pid_in_group) // group_size_m
-
- # ----------------------------------------------------------
- # Create pointers for the first blocks of A and B.
- # We will advance this pointer as we move in the K direction
- # and accumulate
- # `a_ptrs` is a block of [BLOCK_SIZE_M, BLOCK_SIZE_K] pointers
- # `b_ptrs` is a block of [BLOCK_SIZE_K, BLOCK_SIZE_N] pointers
- num_tokens_post_padded = tl.load(num_tokens_post_padded_ptr)
- if pid_m * BLOCK_SIZE_M >= num_tokens_post_padded:
- return
- offs_token_id = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M).to(tl.int64)
- offs_token = tl.load(sorted_token_ids_ptr + offs_token_id)
- token_mask = offs_token < num_valid_tokens
-
- off_experts = tl.load(expert_ids_ptr + pid_m).to(tl.int64)
- if off_experts == -1:
- # -----------------------------------------------------------
- # Write back zeros to the output when the expert is not
- # in the current expert parallel rank.
- write_zeros_to_output(
- c_ptr,
- stride_cm,
- stride_cn,
- pid_n,
- N,
- offs_token,
- token_mask,
- BLOCK_SIZE_M,
- BLOCK_SIZE_N,
- compute_type,
- )
- return
-
- offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N).to(tl.int64)) % N
- offs_k = tl.arange(0, BLOCK_SIZE_K)
- a_ptrs = a_ptr + (
- offs_token[:, None] // top_k * stride_am + offs_k[None, :] * stride_ak
- )
-
- if use_int4_w4a16:
- b_ptrs = (
- b_ptr
- + off_experts * stride_be
- + (offs_k[:, None] // 2) * stride_bk
- + offs_bn[None, :] * stride_bn
- )
- b_shifter = (offs_k[:, None] % 2) * 4
- elif use_int8_w8a16:
- b_ptrs = (
- b_ptr
- + off_experts * stride_be
- + offs_k[:, None] * stride_bk
- + offs_bn[None, :] * stride_bn
- )
-
- if not has_zp and use_int4_w4a16:
- b_zp_num = 8
- if not has_zp and use_int8_w8a16:
- b_zp_num = 128
- elif has_zp and use_int4_w4a16:
- b_zp_shifter = (offs_bn[None, :] % 2) * 4
-
- # -----------------------------------------------------------
- # Iterate to compute a block of the C matrix.
- # We accumulate into a `[BLOCK_SIZE_M, BLOCK_SIZE_N]` block
- # of fp32 values for higher accuracy.
- # `accumulator` will be converted back to fp16 after the loop.
- accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
- for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
- # Load the next block of A and B, generate a mask by checking the
- # K dimension.
-
- if not even_Ks:
- k_mask = offs_k[:, None] < K - k * BLOCK_SIZE_K
- k_other = 0.0
- else:
- k_mask = None
- k_other = None
-
- a = tl.load(
- a_ptrs,
- mask=token_mask[:, None] & (offs_k[None, :] < K - k * BLOCK_SIZE_K),
- other=0.0,
- )
- b = tl.load(b_ptrs)
- if use_int4_w4a16:
- b = (b >> b_shifter) & 0xF
-
- b_scale_ptrs = (
- b_scale_ptr
- + off_experts * stride_bse
- + offs_bn[None, :] * stride_bsn
- + ((offs_k[:, None] + BLOCK_SIZE_K * k) // group_size) * stride_bsk
- )
- b_scale = tl.load(b_scale_ptrs, mask=k_mask, other=k_other)
- b_scale = b_scale.to(tl.float32)
-
- if has_zp and use_int4_w4a16:
- offs_k_true = (offs_k[:, None] + BLOCK_SIZE_K * k) // group_size
- b_zp_ptrs = (
- b_zp_ptr
- + off_experts * stride_bze
- + (offs_bn[None, :] // 2) * stride_bzn
- + offs_k_true * stride_bzk
- )
- b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other)
- b_zp = (b_zp >> b_zp_shifter) & 0xF
- b_zp = b_zp.to(tl.float32)
- elif has_zp and use_int8_w8a16:
- offs_k_true = (offs_k[:, None] + BLOCK_SIZE_K * k) // group_size
- b_zp_ptrs = (
- b_zp_ptr
- + off_experts * stride_bze
- + offs_bn[None, :] * stride_bzn
- + offs_k_true * stride_bzk
- )
- b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other)
- b_zp = b_zp.to(tl.float32)
-
- # We accumulate along the K dimension.
- if has_zp:
- b = ((b.to(tl.float32) - b_zp) * b_scale).to(compute_type)
- else:
- b = ((b.to(tl.float32) - b_zp_num) * b_scale).to(compute_type)
- accumulator = tl.dot(a, b, acc=accumulator)
-
- # Advance the ptrs to the next K block.
- a_ptrs += BLOCK_SIZE_K * stride_ak
- if use_int4_w4a16:
- b_ptrs += (BLOCK_SIZE_K // 2) * stride_bk
- else:
- b_ptrs += BLOCK_SIZE_K * stride_bk
-
- if MUL_ROUTED_WEIGHT:
- moe_weight = tl.load(topk_weights_ptr + offs_token, mask=token_mask, other=0)
- accumulator = accumulator * moe_weight[:, None]
-
- accumulator = accumulator.to(compute_type)
- # -----------------------------------------------------------
- # Write back the block of the output
- offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
- c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[None, :]
- c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
- tl.store(c_ptrs, accumulator, mask=c_mask)
-
-
-@triton.jit
-def fused_moe_kernel(
- # Pointers to matrices
- a_ptr,
- b_ptr,
- bias_ptr,
- c_ptr,
- a_scale_ptr,
- b_scale_ptr,
- topk_weights_ptr,
- sorted_token_ids_ptr,
- expert_ids_ptr,
- num_tokens_post_padded_ptr,
- # Matrix dimensions
- N,
- K,
- EM,
- num_valid_tokens,
- # The stride variables represent how much to increase the ptr by when
- # moving by 1 element in a particular dimension. E.g. `stride_am` is
- # how much to increase `a_ptr` by to get the element one row down
- # (A has M rows).
- stride_am,
- stride_ak,
- stride_be,
- stride_bk,
- stride_bn,
- stride_bias_e,
- stride_bias_n,
- stride_cm,
- stride_cn,
- stride_asm,
- stride_ask,
- stride_bse,
- stride_bsk,
- stride_bsn,
- # Block size for block-wise quantization
- group_n: tl.constexpr,
- group_k: tl.constexpr,
- # Meta-parameters
- BLOCK_SIZE_M: tl.constexpr,
- BLOCK_SIZE_N: tl.constexpr,
- BLOCK_SIZE_K: tl.constexpr,
- GROUP_SIZE_M: tl.constexpr,
- MUL_ROUTED_WEIGHT: tl.constexpr,
- top_k: tl.constexpr,
- compute_type: tl.constexpr,
- use_fp8_w8a8: tl.constexpr,
- use_int8_w8a8: tl.constexpr,
- use_int8_w8a16: tl.constexpr,
- per_channel_quant: tl.constexpr,
- even_Ks: tl.constexpr,
-):
- """
- Implements the fused computation for a Mixture of Experts (MOE) using
- token and expert matrices.
-
- Key Parameters:
- - A: The input tensor representing tokens with shape (*, K), where '*' can
- be any shape representing batches and K is the feature dimension of
- each token.
- - B: The stacked MOE weight tensor with shape (E, N, K), where E is
- the number of experts, K is the input feature dimension, and N is
- the output feature dimension.
- - C: The output cache tensor with shape (M, topk, N), where M is the
- total number of tokens post padding, topk is the number of times
- each token is repeated, and N is the output feature dimension.
- - sorted_token_ids: A tensor containing the sorted indices of tokens,
- repeated topk times and arranged by the expert index they are
- assigned to.
- - expert_ids: A tensor containing the indices of the expert for each
- block. It determines which expert matrix from B should be used for
- each block in A.
-
- This kernel performs the multiplication of a token by its corresponding
- expert matrix as determined by `expert_ids`. The sorting of
- `sorted_token_ids` by expert index and padding ensures divisibility by
- BLOCK_SIZE_M, which is necessary to maintain consistency in block matrix
- multiplication across different blocks processed by the same expert.
- """
- # -----------------------------------------------------------
- # Map program ids `pid` to the block of C it should compute.
- # This is done in a grouped ordering to promote L2 data reuse.
- pid = tl.program_id(axis=0)
- num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
- num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
- num_pid_in_group = GROUP_SIZE_M * num_pid_n
- group_id = pid // num_pid_in_group
- first_pid_m = group_id * GROUP_SIZE_M
- group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
- pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
- pid_n = (pid % num_pid_in_group) // group_size_m
-
- # ----------------------------------------------------------
- # Create pointers for the first blocks of A and B.
- # We will advance this pointer as we move in the K direction
- # and accumulate
- # `a_ptrs` is a block of [BLOCK_SIZE_M, BLOCK_SIZE_K] pointers
- # `b_ptrs` is a block of [BLOCK_SIZE_K, BLOCK_SIZE_N] pointers
- num_tokens_post_padded = tl.load(num_tokens_post_padded_ptr)
- if pid_m * BLOCK_SIZE_M >= num_tokens_post_padded:
- return
- offs_token_id = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M).to(tl.int64)
- offs_token = tl.load(sorted_token_ids_ptr + offs_token_id)
- offs_token = offs_token.to(tl.int64)
- token_mask = offs_token < num_valid_tokens
-
- off_experts = tl.load(expert_ids_ptr + pid_m).to(tl.int64)
-
- if off_experts == -1:
- # -----------------------------------------------------------
- # Write back zeros to the output when the expert is not
- # in the current expert parallel rank.
- write_zeros_to_output(
- c_ptr,
- stride_cm,
- stride_cn,
- pid_n,
- N,
- offs_token,
- token_mask,
- BLOCK_SIZE_M,
- BLOCK_SIZE_N,
- compute_type,
- )
- return
-
- offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N).to(tl.int64)) % N
- offs_k = tl.arange(0, BLOCK_SIZE_K)
- a_ptrs = a_ptr + (
- offs_token[:, None] // top_k * stride_am + offs_k[None, :] * stride_ak
- )
-
- b_ptrs = (
- b_ptr
- + off_experts * stride_be
- + (offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn)
- )
- if bias_ptr is not None:
- bias = tl.load(
- bias_ptr + off_experts * stride_bias_e + offs_bn[None, :] * stride_bias_n
- )
- if use_int8_w8a16:
- b_scale_ptrs = (
- b_scale_ptr + off_experts * stride_bse + offs_bn[None, :] * stride_bsn
- )
- b_scale = tl.load(b_scale_ptrs)
-
- if use_fp8_w8a8 or use_int8_w8a8:
- # block-wise
- if group_k > 0 and group_n > 0:
- a_scale_ptrs = a_scale_ptr + (offs_token // top_k) * stride_asm
- offs_bsn = offs_bn // group_n
- b_scale_ptrs = (
- b_scale_ptr + off_experts * stride_bse + offs_bsn * stride_bsn
- )
- # channel-wise
- elif per_channel_quant:
- b_scale_ptrs = (
- b_scale_ptr + off_experts * stride_bse + offs_bn[None, :] * stride_bsn
- )
- b_scale = tl.load(b_scale_ptrs)
- # Load per-token scale for activations
- a_scale_ptrs = a_scale_ptr + (offs_token // top_k) * stride_asm
- a_scale = tl.load(a_scale_ptrs, mask=token_mask, other=0.0)[:, None]
- # tensor-wise
- else:
- a_scale = tl.load(a_scale_ptr)
- b_scale = tl.load(b_scale_ptr + off_experts)
-
- # -----------------------------------------------------------
- # Iterate to compute a block of the C matrix.
- # We accumulate into a `[BLOCK_SIZE_M, BLOCK_SIZE_N]` block
- # of fp32 values for higher accuracy.
- # `accumulator` will be converted back to fp16 after the loop.
- accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
-
- for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
- # Load the next block of A and B, generate a mask by checking the
- # K dimension.
- if even_Ks:
- a = tl.load(
- a_ptrs,
- mask=token_mask[:, None],
- other=0.0,
- )
- b = tl.load(b_ptrs)
- else:
- a = tl.load(
- a_ptrs,
- mask=token_mask[:, None] & (offs_k[None, :] < K - k * BLOCK_SIZE_K),
- other=0.0,
- )
- b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0)
-
- # We accumulate along the K dimension.
- if use_int8_w8a16:
- accumulator = tl.dot(a, b.to(compute_type), acc=accumulator)
- elif use_fp8_w8a8 or use_int8_w8a8:
- if group_k > 0 and group_n > 0:
- k_start = k * BLOCK_SIZE_K
- offs_ks = k_start // group_k
- a_scale = tl.load(
- a_scale_ptrs + offs_ks * stride_ask, mask=token_mask, other=0.0
- )
- b_scale = tl.load(b_scale_ptrs + offs_ks * stride_bsk)
-
- accumulator += tl.dot(a, b) * a_scale[:, None] * b_scale[None, :]
- else:
- if use_fp8_w8a8:
- accumulator = tl.dot(a, b, acc=accumulator)
- else:
- accumulator += tl.dot(a, b)
- else:
- accumulator += tl.dot(a, b)
- # Advance the ptrs to the next K block.
- a_ptrs += BLOCK_SIZE_K * stride_ak
- b_ptrs += BLOCK_SIZE_K * stride_bk
-
- if use_int8_w8a16:
- accumulator *= b_scale
- elif use_fp8_w8a8 or use_int8_w8a8:
- if group_k == 0 or group_n == 0:
- accumulator *= a_scale * b_scale
-
- if bias_ptr is not None:
- accumulator += bias
-
- if MUL_ROUTED_WEIGHT:
- moe_weight = tl.load(topk_weights_ptr + offs_token, mask=token_mask, other=0)
- accumulator *= moe_weight[:, None]
-
- accumulator = accumulator.to(compute_type)
- # -----------------------------------------------------------
- # Write back the block of the output
- offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
- c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[None, :]
- c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
- tl.store(c_ptrs, accumulator, mask=c_mask)
-
-
-def moe_align_block_size(
- topk_ids: torch.Tensor, block_size: int, num_experts: int
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
- """
- Aligns the token distribution across experts to be compatible with block
- size for matrix multiplication.
-
- Parameters:
- - topk_ids: A tensor of shape [total_tokens, top_k] representing the
- top-k expert indices for each token.
- - block_size: The block size used in block matrix multiplication.
- - num_experts: The total number of experts.
-
- Returns:
- - sorted_token_ids: A tensor containing the sorted token indices according
- to their allocated expert.
- - expert_ids: A tensor indicating the assigned expert index for each block.
- - num_tokens_post_padded: The total number of tokens after padding,
- ensuring divisibility by block_size.
-
- This function pads the number of tokens that each expert needs to process
- so that it is divisible by block_size.
- Padding ensures that during block matrix multiplication, the dimensions
- align correctly.
-
- Example:
- Given topk_ids = [[2, 3, 4], [1, 2, 4], [1, 3, 4], [1, 2, 3]],
- block_size = 4, and num_experts = 4:
- - We initially have 12 tokens (after repeating 'top_k' times) and 4 experts,
- with each expert needing to process 3 tokens.
- - As block_size is 4, we pad 1 token for each expert.
- - First, flatten topk_ids to [2, 3, 4, 1, 2, 4, 1, 3, 4, 1, 2, 3].
- - Then append padding tokens [12, 12, 12, 12] for each block.
- - After sorting by expert index, we obtain token_ids
- [3, 6, 9, 12, 0, 4, 10, 12, 1, 7, 11, 12, 2, 5, 8, 12].
- Tokens 12 are non-existent (padding) and are ignored in
- the subsequent matrix multiplication.
- - The padding ensures that the total number of tokens is now divisible
- by block_size for proper block matrix operations.
- """
- max_num_tokens_padded = topk_ids.numel() + (num_experts + 1) * (block_size - 1)
- sorted_ids = torch.empty(
- (max_num_tokens_padded,), dtype=torch.int32, device=topk_ids.device
- )
- max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
- expert_ids = torch.empty(
- (max_num_m_blocks,), dtype=torch.int32, device=topk_ids.device
- )
- num_tokens_post_pad = torch.empty((1), dtype=torch.int32, device=topk_ids.device)
-
- # In EP, expert_ids for filtered experts are -1. We have num_experts + 1 ids in total.
- cumsum_buffer = torch.empty(
- (num_experts + 2,), dtype=torch.int32, device=topk_ids.device
- )
-
- # Threshold based on benchmark results
- fuse_sorted_ids_padding = sorted_ids.shape[0] <= 4096
- if not fuse_sorted_ids_padding:
- sorted_ids.fill_(topk_ids.numel())
-
- sgl_moe_align_block_size(
- topk_ids,
- num_experts + 1,
- block_size,
- sorted_ids,
- expert_ids,
- num_tokens_post_pad,
- cumsum_buffer,
- fuse_sorted_ids_padding,
- )
- return sorted_ids, expert_ids, num_tokens_post_pad
-
-
-def invoke_fused_moe_kernel(
- A: torch.Tensor,
- B: torch.Tensor,
- bias: Optional[torch.Tensor],
- C: torch.Tensor,
- A_scale: Optional[torch.Tensor],
- B_scale: Optional[torch.Tensor],
- B_zp: Optional[torch.Tensor],
- topk_weights: torch.Tensor,
- topk_ids: torch.Tensor,
- sorted_token_ids: torch.Tensor,
- expert_ids: torch.Tensor,
- num_tokens_post_padded: torch.Tensor,
- mul_routed_weight: bool,
- top_k: int,
- config: Dict[str, Any],
- compute_type: tl.dtype,
- use_fp8_w8a8: bool,
- use_int8_w8a8: bool,
- use_int8_w8a16: bool,
- use_int4_w4a16: bool,
- per_channel_quant: bool,
- block_shape: Optional[List[int]] = None,
- no_combine: bool = False,
-) -> None:
- assert topk_weights.stride(1) == 1
- assert sorted_token_ids.stride(0) == 1
-
- padded_size = 0
- if use_fp8_w8a8:
- assert B_scale is not None
- if block_shape is None:
- # activation tensor-wise fp8 quantization, dynamic or static
- padded_size = padding_size
- # activations apply per-token quantization when weights apply per-channel quantization by default
- A, A_scale = scaled_fp8_quant(
- A, A_scale, use_per_token_if_dynamic=per_channel_quant
- )
- else:
- # activation block-wise fp8 quantization
- assert len(block_shape) == 2
- block_n, block_k = block_shape[0], block_shape[1]
- if _is_cuda:
- A, A_scale = sglang_per_token_group_quant_fp8(A, block_k)
- else:
- A, A_scale = per_token_group_quant_fp8(A, block_k)
- assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1]
- assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2]
- assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1]
- elif use_int8_w8a8:
- assert B_scale is not None
- if block_shape is None:
- # activation channel-wise int8 quantization
- assert (
- per_channel_quant
- ), "int8 quantization only supports channel-wise quantization except for block-wise quantization"
- A, A_scale = per_token_quant_int8(A)
- else:
- # activation block-wise int8 quantization
- assert len(block_shape) == 2
- block_n, block_k = block_shape[0], block_shape[1]
- if _is_cuda:
- A, A_scale = sglang_per_token_group_quant_int8(A, block_k)
- else:
- A, A_scale = per_token_group_quant_int8(A, block_k)
- assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1]
- assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2]
- assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1]
- elif use_int8_w8a16 or use_int4_w4a16:
- assert B_scale is not None
- assert block_shape is None or block_shape[0] == 0
- else:
- assert A_scale is None
- assert B_scale is None
-
- grid = lambda META: (
- triton.cdiv(sorted_token_ids.shape[0], META["BLOCK_SIZE_M"])
- * triton.cdiv(B.shape[1], META["BLOCK_SIZE_N"]),
- )
-
- K = B.shape[2] - padded_size
- if K % config["BLOCK_SIZE_K"] == 0:
- even_Ks = True
- else:
- even_Ks = False
-
- if (
- (use_int8_w8a16 or use_int4_w4a16)
- and block_shape is not None
- and block_shape[1] > 0
- ):
- assert B_scale is not None and B_scale.ndim == 3
- assert B_zp is None or B_zp.ndim == 3
- assert bias is None
- fused_moe_kernel_gptq_awq[grid](
- A,
- B,
- C,
- B_scale,
- B_zp,
- topk_weights,
- sorted_token_ids,
- expert_ids,
- num_tokens_post_padded,
- B.shape[1],
- A.shape[1],
- sorted_token_ids.shape[0],
- topk_ids.numel(),
- A.stride(0),
- A.stride(1),
- B.stride(0),
- B.stride(2),
- B.stride(1),
- C.stride(1),
- C.stride(2),
- B_scale.stride(0),
- B_scale.stride(2),
- B_scale.stride(1),
- B_zp.stride(0) if B_zp is not None else 0,
- B_zp.stride(2) if B_zp is not None else 0,
- B_zp.stride(1) if B_zp is not None else 0,
- group_size=block_shape[1],
- MUL_ROUTED_WEIGHT=mul_routed_weight,
- top_k=top_k,
- compute_type=compute_type,
- has_zp=B_zp is not None,
- use_int4_w4a16=use_int4_w4a16,
- use_int8_w8a16=use_int8_w8a16,
- even_Ks=even_Ks,
- **config,
- )
-
else:
+ from vllm import _custom_ops as vllm_ops
- fused_moe_kernel[grid](
- A,
- B,
- bias,
- C,
- A_scale,
- B_scale,
- topk_weights,
- sorted_token_ids,
- expert_ids,
- num_tokens_post_padded,
- B.shape[1],
- B.shape[2] - padded_size,
- sorted_token_ids.shape[0],
- topk_ids.numel(),
- A.stride(0),
- A.stride(1),
- B.stride(0),
- B.stride(2),
- B.stride(1),
- bias.stride(0) if bias is not None else 0,
- bias.stride(1) if bias is not None else 0,
- C.stride(1),
- C.stride(2),
- A_scale.stride(0) if A_scale is not None and A_scale.ndim == 2 else 0,
- A_scale.stride(1) if A_scale is not None and A_scale.ndim == 2 else 0,
- B_scale.stride(0) if B_scale is not None and B_scale.ndim >= 2 else 0,
- B_scale.stride(2) if B_scale is not None and B_scale.ndim == 3 else 0,
- B_scale.stride(1) if B_scale is not None and B_scale.ndim >= 2 else 0,
- 0 if block_shape is None else block_shape[0],
- 0 if block_shape is None else block_shape[1],
- MUL_ROUTED_WEIGHT=mul_routed_weight,
- top_k=top_k,
- compute_type=compute_type,
- use_fp8_w8a8=use_fp8_w8a8,
- use_int8_w8a8=use_int8_w8a8,
- use_int8_w8a16=use_int8_w8a16,
- per_channel_quant=per_channel_quant,
- even_Ks=even_Ks,
- **config,
- )
-
-
-def get_config_file_name(
- E: int, N: int, dtype: Optional[str], block_shape: Optional[int] = None
-) -> str:
- device_name = get_device_name().replace(" ", "_")
- dtype_selector = "" if not dtype else f",dtype={dtype}"
- block_shape_selector = (
- "" if not block_shape or not all(block_shape) else f",block_shape={block_shape}"
- )
- return f"E={E},N={N},device_name={device_name}{dtype_selector}{block_shape_selector}.json"
-
-
-@functools.lru_cache
-def get_moe_configs(
- E: int,
- N: int,
- dtype: Optional[str],
- block_n: Optional[int] = 0,
- block_k: Optional[int] = 0,
-) -> Optional[Dict[int, Any]]:
- """
- Return optimized configurations for the fused MoE kernel.
-
- The return value will be a dictionary that maps an irregular grid of
- batch sizes to configurations of the fused_moe kernel. To evaluate the
- kernel on a given batch size bs, the closest batch size in the grid should
- be picked and the associated configuration chosen to invoke the kernel.
- """
- # Supported Triton versions, should be sorted from the newest to the oldest
- supported_triton_versions = ["3.3.1", "3.2.0", "3.1.0"]
-
- # First look up if an optimized configuration is available in the configs
- # directory
- json_file_name = get_config_file_name(E, N, dtype, [block_n, block_k])
-
- # We found that using the fused_moe_kernel config from Triton 3.1.0 with Triton 3.2.0 results in negative performance gains,
- # so we also include the Triton version as a key for finding the fused_moe_kernel config to achieve the best performance.
- triton_version = triton.__version__
- version_dir = f"triton_{triton_version.replace('.', '_')}"
- config_file_path = os.path.join(
- os.path.dirname(os.path.realpath(__file__)),
- "configs",
- version_dir,
- json_file_name,
- )
- if os.path.exists(config_file_path):
- with open(config_file_path) as f:
- # Please note that although we find the config files, performance might still be suboptimal.
- # This is because the tuning environment might differ from your current environment.
- # For example, updating the Triton version might cause all old configs to become suboptimal.
- # To achieve the best performance, consider re-tuning the Triton fused MOE kernel in your environment.
- # For the tuning method, refer to: https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton
- logger.info(f"Using MoE kernel config from {config_file_path}.")
- # If a configuration has been found, return it
- return {int(key): val for key, val in json.load(f).items()}
-
- # Searching for other triton versions that supports the same config
- for try_triton_version in supported_triton_versions:
- if try_triton_version == triton_version:
- continue
- try_config_file_path = os.path.join(
- os.path.dirname(os.path.realpath(__file__)),
- "configs",
- f"triton_{try_triton_version.replace('.', '_')}",
- json_file_name,
- )
- if os.path.exists(try_config_file_path):
- with open(try_config_file_path) as f:
- logger.warning(
- f"Config file not found at {config_file_path}. Fallback to triton version {try_triton_version} and use MoE kernel config from {try_config_file_path}. Performance might be sub-optimal!",
- )
- # If a configuration has been found, return it
- return {int(key): val for key, val in json.load(f).items()}
-
- # If no optimized configuration is available, we will use the default
- # configuration
- logger.warning(
- (
- "Using default MoE kernel config. Performance might be sub-optimal! "
- "Config file not found at %s, you can create them with https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton"
- ),
- config_file_path,
- )
- return None
-
-
-def get_default_config(
- M: int,
- E: int,
- N: int,
- K: int,
- topk: int,
- dtype: Optional[str],
- is_marlin: bool,
- block_shape: Optional[List[int]] = None,
-) -> Dict[str, int]:
- if dtype == "fp8_w8a8":
- if block_shape is None:
- config = {
- "BLOCK_SIZE_M": 128,
- "BLOCK_SIZE_N": 256,
- "BLOCK_SIZE_K": 128,
- "GROUP_SIZE_M": 32,
- "num_warps": 8,
- "num_stages": 2 if _is_hip else 4,
- }
- if M <= E:
- config = {
- "BLOCK_SIZE_M": 64,
- "BLOCK_SIZE_N": 128,
- "BLOCK_SIZE_K": 128,
- "GROUP_SIZE_M": 1,
- "num_warps": 4,
- "num_stages": 2 if _is_hip else 4,
- }
- else:
- # Block-wise quant: BLOCK_SIZE_K must be divisible by block_shape[1]
- config = {
- "BLOCK_SIZE_M": 64,
- "BLOCK_SIZE_N": block_shape[0],
- "BLOCK_SIZE_K": block_shape[1],
- "GROUP_SIZE_M": 32,
- "num_warps": 4,
- "num_stages": 2 if _is_hip else 3,
- }
- else:
- config = {
- "BLOCK_SIZE_M": 64,
- "BLOCK_SIZE_N": 64,
- "BLOCK_SIZE_K": 32,
- "GROUP_SIZE_M": 8,
- }
- # A heuristic: fused marlin works faster with this config for small M
- if M <= E or (is_marlin and M <= 32):
- config = {
- "BLOCK_SIZE_M": 16,
- "BLOCK_SIZE_N": 32,
- "BLOCK_SIZE_K": 64,
- "GROUP_SIZE_M": 1,
- }
- return config
-
-
-def try_get_optimal_moe_config(
- w1_shape: Tuple[int, ...],
- w2_shape: Tuple[int, ...],
- top_k: int,
- dtype: Optional[str],
- M: int,
- is_marlin: bool = False,
- block_shape: Optional[List[int]] = None,
-):
- from sglang.srt.layers.moe.fused_moe_triton import get_config
-
- override_config = get_config()
- if override_config:
- config = override_config
- else:
- # First try to load optimal config from the file
- E, _, N = w2_shape
- block_n = block_shape[0] if block_shape else 0
- block_k = block_shape[1] if block_shape else 0
- configs = get_moe_configs(E, N, dtype, block_n, block_k)
-
- if configs:
- # If an optimal configuration map has been found, look up the
- # optimal config
- config = configs[min(configs.keys(), key=lambda x: abs(x - M))]
- else:
- # Else use the default config
- config = get_default_config(
- M, E, N, w1_shape[2], top_k, dtype, is_marlin, block_shape
- )
- return config
-
-
-def get_config_dtype_str(
- dtype: torch.dtype,
- use_int8_w8a16: Optional[bool] = False,
- use_int4_w4a16: Optional[bool] = False,
- use_fp8_w8a8: Optional[bool] = False,
- use_int8_w8a8: Optional[bool] = False,
-):
- if use_fp8_w8a8:
- return "fp8_w8a8"
- elif use_int8_w8a8:
- return "int8_w8a8"
- elif use_int4_w4a16:
- return "int4_w4a16"
- elif use_int8_w8a16:
- return "int8_w8a16"
- elif dtype == torch.float:
- # avoiding cases where kernel fails when float32 MoE
- # use fp16/bfloat16 configs
- return "float32"
- return None
+padding_size = 128 if bool(int(os.getenv("SGLANG_MOE_PADDING", "0"))) else 0
def inplace_fused_experts(
@@ -1274,92 +322,6 @@ def fused_experts(
)
-# _moe_sum_reduce_kernel kernel modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/common/fused_moe/moe_sum_reduce.py
-@triton.jit
-def _moe_sum_reduce_kernel(
- input_ptr,
- input_stride_0,
- input_stride_1,
- input_stride_2,
- output_ptr,
- output_stride_0,
- output_stride_1,
- token_num: int,
- topk_num: int,
- hidden_dim: int,
- routed_scaling_factor: tl.constexpr,
- BLOCK_M: tl.constexpr,
- BLOCK_DIM: tl.constexpr,
- NUM_STAGE: tl.constexpr,
-):
- input_stride_0 = tl.cast(input_stride_0, dtype=tl.int64)
- input_stride_1 = tl.cast(input_stride_1, dtype=tl.int64)
- output_stride_0 = tl.cast(output_stride_0, dtype=tl.int64)
-
- token_block_id = tl.program_id(0)
- dim_block_id = tl.program_id(1)
-
- token_start = token_block_id * BLOCK_M
- token_end = min((token_block_id + 1) * BLOCK_M, token_num)
-
- dim_start = dim_block_id * BLOCK_DIM
- dim_end = min((dim_block_id + 1) * BLOCK_DIM, hidden_dim)
-
- offs_dim = dim_start + tl.arange(0, BLOCK_DIM)
-
- for token_index in range(token_start, token_end):
- accumulator = tl.zeros((BLOCK_DIM,), dtype=tl.float32)
- input_t_ptr = input_ptr + token_index * input_stride_0 + offs_dim
- for i in tl.range(0, topk_num, num_stages=NUM_STAGE):
- tmp = tl.load(
- input_t_ptr + i * input_stride_1, mask=offs_dim < dim_end, other=0.0
- )
- accumulator += tmp
- accumulator = accumulator * routed_scaling_factor
- store_t_ptr = output_ptr + token_index * output_stride_0 + offs_dim
- tl.store(
- store_t_ptr,
- accumulator.to(input_ptr.dtype.element_ty),
- mask=offs_dim < dim_end,
- )
-
-
-def moe_sum_reduce_triton(
- input: torch.Tensor, output: torch.Tensor, routed_scaling_factor: float
-):
- assert input.is_contiguous()
- assert output.is_contiguous()
-
- token_num, topk_num, hidden_dim = input.shape
- assert output.shape[0] == token_num and output.shape[1] == hidden_dim
-
- BLOCK_M = 1
- BLOCK_DIM = 2048
- NUM_STAGE = 1
- num_warps = 8
-
- grid = (
- triton.cdiv(token_num, BLOCK_M),
- triton.cdiv(hidden_dim, BLOCK_DIM),
- )
-
- _moe_sum_reduce_kernel[grid](
- input,
- *input.stride(),
- output,
- *output.stride(),
- token_num=token_num,
- topk_num=topk_num,
- hidden_dim=hidden_dim,
- routed_scaling_factor=routed_scaling_factor,
- BLOCK_M=BLOCK_M,
- BLOCK_DIM=BLOCK_DIM,
- NUM_STAGE=NUM_STAGE,
- num_warps=num_warps,
- )
- return
-
-
@torch.compile
def moe_sum_reduce_torch_compile(x, out, routed_scaling_factor):
torch.sum(x, dim=1, out=out)
@@ -1537,7 +499,7 @@ def fused_experts_impl(
gemm1_alpha,
gemm1_limit,
)
- elif _is_cuda:
+ elif _is_cuda or _is_hip:
silu_and_mul(intermediate_cache1.view(-1, N), intermediate_cache2)
else:
vllm_ops.silu_and_mul(
@@ -1546,7 +508,7 @@ def fused_experts_impl(
elif activation == "gelu":
assert gemm1_alpha is None, "gemm1_alpha is not supported for gelu"
assert gemm1_limit is None, "gemm1_limit is not supported for gelu"
- if _is_cuda:
+ if _is_cuda or _is_hip:
gelu_and_mul(intermediate_cache1.view(-1, N), intermediate_cache2)
else:
vllm_ops.gelu_and_mul(
@@ -1619,10 +581,19 @@ def fused_experts_impl(
out_hidden_states[begin_chunk_idx:end_chunk_idx],
)
else:
- vllm_ops.moe_sum(
- intermediate_cache3.view(*intermediate_cache3.shape),
- out_hidden_states[begin_chunk_idx:end_chunk_idx],
- )
+ # According to micro benchmark results, torch.compile can get better performance for small token.
+ if tokens_in_chunk <= 32:
+ moe_sum_reduce_torch_compile(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states[begin_chunk_idx:end_chunk_idx],
+ routed_scaling_factor,
+ )
+ else:
+ moe_sum_reduce_triton(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states[begin_chunk_idx:end_chunk_idx],
+ routed_scaling_factor,
+ )
else:
vllm_ops.moe_sum(
intermediate_cache3.view(*intermediate_cache3.shape),
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py
new file mode 100644
index 00000000000..51114aadeb6
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py
@@ -0,0 +1,212 @@
+from __future__ import annotations
+
+import functools
+import json
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+import triton
+
+from sglang.srt.utils import get_device_name, is_hip
+
+logger = logging.getLogger(__name__)
+_is_hip = is_hip()
+
+
+def get_config_file_name(
+ E: int, N: int, dtype: Optional[str], block_shape: Optional[int] = None
+) -> str:
+ device_name = get_device_name().replace(" ", "_")
+ dtype_selector = "" if not dtype else f",dtype={dtype}"
+ block_shape_selector = (
+ "" if not block_shape or not all(block_shape) else f",block_shape={block_shape}"
+ )
+ return f"E={E},N={N},device_name={device_name}{dtype_selector}{block_shape_selector}.json"
+
+
+@functools.lru_cache
+def get_moe_configs(
+ E: int,
+ N: int,
+ dtype: Optional[str],
+ block_n: Optional[int] = 0,
+ block_k: Optional[int] = 0,
+) -> Optional[Dict[int, Any]]:
+ """
+ Return optimized configurations for the fused MoE kernel.
+
+ The return value will be a dictionary that maps an irregular grid of
+ batch sizes to configurations of the fused_moe kernel. To evaluate the
+ kernel on a given batch size bs, the closest batch size in the grid should
+ be picked and the associated configuration chosen to invoke the kernel.
+ """
+ # Supported Triton versions, should be sorted from the newest to the oldest
+ supported_triton_versions = ["3.3.1", "3.2.0", "3.1.0"]
+
+ # First look up if an optimized configuration is available in the configs
+ # directory
+ json_file_name = get_config_file_name(E, N, dtype, [block_n, block_k])
+
+ # We found that using the fused_moe_kernel config from Triton 3.1.0 with Triton 3.2.0 results in negative performance gains,
+ # so we also include the Triton version as a key for finding the fused_moe_kernel config to achieve the best performance.
+ triton_version = triton.__version__
+ version_dir = f"triton_{triton_version.replace('.', '_')}"
+ config_file_path = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ "configs",
+ version_dir,
+ json_file_name,
+ )
+ if os.path.exists(config_file_path):
+ with open(config_file_path) as f:
+ # Please note that although we find the config files, performance might still be suboptimal.
+ # This is because the tuning environment might differ from your current environment.
+ # For example, updating the Triton version might cause all old configs to become suboptimal.
+ # To achieve the best performance, consider re-tuning the Triton fused MOE kernel in your environment.
+ # For the tuning method, refer to: https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton
+ logger.info(f"Using MoE kernel config from {config_file_path}.")
+ # If a configuration has been found, return it
+ return {int(key): val for key, val in json.load(f).items()}
+
+ # Searching for other triton versions that supports the same config
+ for try_triton_version in supported_triton_versions:
+ if try_triton_version == triton_version:
+ continue
+ try_config_file_path = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ "configs",
+ f"triton_{try_triton_version.replace('.', '_')}",
+ json_file_name,
+ )
+ if os.path.exists(try_config_file_path):
+ with open(try_config_file_path) as f:
+ logger.warning(
+ f"Config file not found at {config_file_path}. Fallback to triton version {try_triton_version} and use MoE kernel config from {try_config_file_path}. Performance might be sub-optimal!",
+ )
+ # If a configuration has been found, return it
+ return {int(key): val for key, val in json.load(f).items()}
+
+ # If no optimized configuration is available, we will use the default
+ # configuration
+ logger.warning(
+ (
+ "Using default MoE kernel config. Performance might be sub-optimal! "
+ "Config file not found at %s, you can create them with https://github.com/sgl-project/sglang/tree/main/benchmark/kernels/fused_moe_triton"
+ ),
+ config_file_path,
+ )
+ return None
+
+
+def get_default_config(
+ M: int,
+ E: int,
+ N: int,
+ K: int,
+ topk: int,
+ dtype: Optional[str],
+ is_marlin: bool,
+ block_shape: Optional[List[int]] = None,
+) -> Dict[str, int]:
+ if dtype == "fp8_w8a8":
+ if block_shape is None:
+ config = {
+ "BLOCK_SIZE_M": 128,
+ "BLOCK_SIZE_N": 256,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 32,
+ "num_warps": 8,
+ "num_stages": 2 if _is_hip else 4,
+ }
+ if M <= E:
+ config = {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 128,
+ "BLOCK_SIZE_K": 128,
+ "GROUP_SIZE_M": 1,
+ "num_warps": 4,
+ "num_stages": 2 if _is_hip else 4,
+ }
+ else:
+ # Block-wise quant: BLOCK_SIZE_K must be divisible by block_shape[1]
+ config = {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": block_shape[0],
+ "BLOCK_SIZE_K": block_shape[1],
+ "GROUP_SIZE_M": 32,
+ "num_warps": 4,
+ "num_stages": 2 if _is_hip else 3,
+ }
+ else:
+ config = {
+ "BLOCK_SIZE_M": 64,
+ "BLOCK_SIZE_N": 64,
+ "BLOCK_SIZE_K": 32,
+ "GROUP_SIZE_M": 8,
+ }
+ # A heuristic: fused marlin works faster with this config for small M
+ if M <= E or (is_marlin and M <= 32):
+ config = {
+ "BLOCK_SIZE_M": 16,
+ "BLOCK_SIZE_N": 32,
+ "BLOCK_SIZE_K": 64,
+ "GROUP_SIZE_M": 1,
+ }
+ return config
+
+
+def try_get_optimal_moe_config(
+ w1_shape: Tuple[int, ...],
+ w2_shape: Tuple[int, ...],
+ top_k: int,
+ dtype: Optional[str],
+ M: int,
+ is_marlin: bool = False,
+ block_shape: Optional[List[int]] = None,
+):
+ from sglang.srt.layers.moe.fused_moe_triton import get_config
+
+ override_config = get_config()
+ if override_config:
+ config = override_config
+ else:
+ # First try to load optimal config from the file
+ E, _, N = w2_shape
+ block_n = block_shape[0] if block_shape else 0
+ block_k = block_shape[1] if block_shape else 0
+ configs = get_moe_configs(E, N, dtype, block_n, block_k)
+
+ if configs:
+ # If an optimal configuration map has been found, look up the
+ # optimal config
+ config = configs[min(configs.keys(), key=lambda x: abs(x - M))]
+ else:
+ # Else use the default config
+ config = get_default_config(
+ M, E, N, w1_shape[2], top_k, dtype, is_marlin, block_shape
+ )
+ return config
+
+
+def get_config_dtype_str(
+ dtype: torch.dtype,
+ use_int8_w8a16: Optional[bool] = False,
+ use_int4_w4a16: Optional[bool] = False,
+ use_fp8_w8a8: Optional[bool] = False,
+ use_int8_w8a8: Optional[bool] = False,
+):
+ if use_fp8_w8a8:
+ return "fp8_w8a8"
+ elif use_int8_w8a8:
+ return "int8_w8a8"
+ elif use_int4_w4a16:
+ return "int4_w4a16"
+ elif use_int8_w8a16:
+ return "int8_w8a16"
+ elif dtype == torch.float:
+ # avoiding cases where kernel fails when float32 MoE
+ # use fp16/bfloat16 configs
+ return "float32"
+ return None
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py
new file mode 100644
index 00000000000..6a7229a9b1f
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py
@@ -0,0 +1,799 @@
+from __future__ import annotations
+
+import os
+from typing import Any, Dict, List, Optional
+
+import torch
+import triton
+import triton.language as tl
+
+from sglang.srt.layers.quantization.fp8_kernel import (
+ per_token_group_quant_fp8,
+ scaled_fp8_quant,
+ sglang_per_token_group_quant_fp8,
+)
+from sglang.srt.layers.quantization.int8_kernel import (
+ per_token_group_quant_int8,
+ per_token_quant_int8,
+ sglang_per_token_group_quant_int8,
+)
+from sglang.srt.utils import (
+ cpu_has_amx_support,
+ get_bool_env_var,
+ is_cpu,
+ is_cuda,
+ is_hip,
+)
+
+_is_hip = is_hip()
+_is_cuda = is_cuda()
+_is_cpu_amx_available = cpu_has_amx_support()
+_is_cpu = is_cpu()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
+
+if _is_cuda:
+ pass
+elif _is_cpu and _is_cpu_amx_available:
+ pass
+elif _is_hip:
+ pass
+
+padding_size = 128 if bool(int(os.getenv("SGLANG_MOE_PADDING", "0"))) else 0
+
+
+@triton.jit
+def write_zeros_to_output(
+ c_ptr,
+ stride_cm,
+ stride_cn,
+ pid_n,
+ N,
+ offs_token,
+ token_mask,
+ BLOCK_SIZE_M,
+ BLOCK_SIZE_N,
+ compute_type,
+):
+ accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=compute_type)
+ offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+ c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[None, :]
+ c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
+ tl.store(c_ptrs, accumulator, mask=c_mask)
+
+
+@triton.jit
+def fused_moe_kernel_gptq_awq(
+ # Pointers to matrices
+ a_ptr,
+ b_ptr,
+ c_ptr,
+ b_scale_ptr,
+ b_zp_ptr,
+ topk_weights_ptr,
+ sorted_token_ids_ptr,
+ expert_ids_ptr,
+ num_tokens_post_padded_ptr,
+ # Matrix dimensions
+ N: tl.constexpr,
+ K: tl.constexpr,
+ EM,
+ num_valid_tokens,
+ # The stride variables represent how much to increase the ptr by when
+ # moving by 1 element in a particular dimension. E.g. `stride_am` is
+ # how much to increase `a_ptr` by to get the element one row down
+ # (A has M rows).
+ stride_am,
+ stride_ak,
+ stride_be,
+ stride_bk,
+ stride_bn,
+ stride_cm,
+ stride_cn,
+ stride_bse,
+ stride_bsk,
+ stride_bsn,
+ stride_bze,
+ stride_bzk,
+ stride_bzn,
+ group_size: tl.constexpr,
+ # Meta-parameters
+ BLOCK_SIZE_M: tl.constexpr,
+ BLOCK_SIZE_N: tl.constexpr,
+ BLOCK_SIZE_K: tl.constexpr,
+ GROUP_SIZE_M: tl.constexpr,
+ MUL_ROUTED_WEIGHT: tl.constexpr,
+ top_k: tl.constexpr,
+ compute_type: tl.constexpr,
+ has_zp: tl.constexpr,
+ use_int4_w4a16: tl.constexpr,
+ use_int8_w8a16: tl.constexpr,
+ even_Ks: tl.constexpr,
+):
+ """
+ Implements the fused computation for a Mixture of Experts (MOE) using
+ token and expert matrices.
+ Key Parameters:
+ - A: The input tensor representing tokens with shape (*, K), where '*' can
+ be any shape representing batches and K is the feature dimension of
+ each token.
+ - B: The stacked MOE weight tensor with shape (E, N, K), where E is
+ the number of experts, K is the input feature dimension, and N is
+ the output feature dimension.
+ - C: The output cache tensor with shape (M, topk, N), where M is the
+ total number of tokens post padding, topk is the number of times
+ each token is repeated, and N is the output feature dimension.
+ - sorted_token_ids: A tensor containing the sorted indices of tokens,
+ repeated topk times and arranged by the expert index they are
+ assigned to.
+ - expert_ids: A tensor containing the indices of the expert for each
+ block. It determines which expert matrix from B should be used for
+ each block in A.
+ This kernel performs the multiplication of a token by its corresponding
+ expert matrix as determined by `expert_ids`. The sorting of
+ `sorted_token_ids` by expert index and padding ensures divisibility by
+ BLOCK_SIZE_M, which is necessary to maintain consistency in block matrix
+ multiplication across different blocks processed by the same expert.
+ """
+ # -----------------------------------------------------------
+ # Map program ids `pid` to the block of C it should compute.
+ # This is done in a grouped ordering to promote L2 data reuse.
+ pid = tl.program_id(axis=0)
+ num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
+ num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
+ num_pid_in_group = GROUP_SIZE_M * num_pid_n
+ group_id = pid // num_pid_in_group
+ first_pid_m = group_id * GROUP_SIZE_M
+ group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
+ pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
+ pid_n = (pid % num_pid_in_group) // group_size_m
+
+ # ----------------------------------------------------------
+ # Create pointers for the first blocks of A and B.
+ # We will advance this pointer as we move in the K direction
+ # and accumulate
+ # `a_ptrs` is a block of [BLOCK_SIZE_M, BLOCK_SIZE_K] pointers
+ # `b_ptrs` is a block of [BLOCK_SIZE_K, BLOCK_SIZE_N] pointers
+ num_tokens_post_padded = tl.load(num_tokens_post_padded_ptr)
+ if pid_m * BLOCK_SIZE_M >= num_tokens_post_padded:
+ return
+ offs_token_id = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M).to(tl.int64)
+ offs_token = tl.load(sorted_token_ids_ptr + offs_token_id)
+ token_mask = offs_token < num_valid_tokens
+
+ off_experts = tl.load(expert_ids_ptr + pid_m).to(tl.int64)
+ if off_experts == -1:
+ # -----------------------------------------------------------
+ # Write back zeros to the output when the expert is not
+ # in the current expert parallel rank.
+ write_zeros_to_output(
+ c_ptr,
+ stride_cm,
+ stride_cn,
+ pid_n,
+ N,
+ offs_token,
+ token_mask,
+ BLOCK_SIZE_M,
+ BLOCK_SIZE_N,
+ compute_type,
+ )
+ return
+
+ offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N).to(tl.int64)) % N
+ offs_k = tl.arange(0, BLOCK_SIZE_K)
+ a_ptrs = a_ptr + (
+ offs_token[:, None] // top_k * stride_am + offs_k[None, :] * stride_ak
+ )
+
+ if use_int4_w4a16:
+ b_ptrs = (
+ b_ptr
+ + off_experts * stride_be
+ + (offs_k[:, None] // 2) * stride_bk
+ + offs_bn[None, :] * stride_bn
+ )
+ b_shifter = (offs_k[:, None] % 2) * 4
+ elif use_int8_w8a16:
+ b_ptrs = (
+ b_ptr
+ + off_experts * stride_be
+ + offs_k[:, None] * stride_bk
+ + offs_bn[None, :] * stride_bn
+ )
+
+ if not has_zp and use_int4_w4a16:
+ b_zp_num = 8
+ if not has_zp and use_int8_w8a16:
+ b_zp_num = 128
+ elif has_zp and use_int4_w4a16:
+ b_zp_shifter = (offs_bn[None, :] % 2) * 4
+
+ # -----------------------------------------------------------
+ # Iterate to compute a block of the C matrix.
+ # We accumulate into a `[BLOCK_SIZE_M, BLOCK_SIZE_N]` block
+ # of fp32 values for higher accuracy.
+ # `accumulator` will be converted back to fp16 after the loop.
+ accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
+ for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
+ # Load the next block of A and B, generate a mask by checking the
+ # K dimension.
+
+ if not even_Ks:
+ k_mask = offs_k[:, None] < K - k * BLOCK_SIZE_K
+ k_other = 0.0
+ else:
+ k_mask = None
+ k_other = None
+
+ a = tl.load(
+ a_ptrs,
+ mask=token_mask[:, None] & (offs_k[None, :] < K - k * BLOCK_SIZE_K),
+ other=0.0,
+ )
+ b = tl.load(b_ptrs)
+ if use_int4_w4a16:
+ b = (b >> b_shifter) & 0xF
+
+ b_scale_ptrs = (
+ b_scale_ptr
+ + off_experts * stride_bse
+ + offs_bn[None, :] * stride_bsn
+ + ((offs_k[:, None] + BLOCK_SIZE_K * k) // group_size) * stride_bsk
+ )
+ b_scale = tl.load(b_scale_ptrs, mask=k_mask, other=k_other)
+ b_scale = b_scale.to(tl.float32)
+
+ if has_zp and use_int4_w4a16:
+ offs_k_true = (offs_k[:, None] + BLOCK_SIZE_K * k) // group_size
+ b_zp_ptrs = (
+ b_zp_ptr
+ + off_experts * stride_bze
+ + (offs_bn[None, :] // 2) * stride_bzn
+ + offs_k_true * stride_bzk
+ )
+ b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other)
+ b_zp = (b_zp >> b_zp_shifter) & 0xF
+ b_zp = b_zp.to(tl.float32)
+ elif has_zp and use_int8_w8a16:
+ offs_k_true = (offs_k[:, None] + BLOCK_SIZE_K * k) // group_size
+ b_zp_ptrs = (
+ b_zp_ptr
+ + off_experts * stride_bze
+ + offs_bn[None, :] * stride_bzn
+ + offs_k_true * stride_bzk
+ )
+ b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other)
+ b_zp = b_zp.to(tl.float32)
+
+ # We accumulate along the K dimension.
+ if has_zp:
+ b = ((b.to(tl.float32) - b_zp) * b_scale).to(compute_type)
+ else:
+ b = ((b.to(tl.float32) - b_zp_num) * b_scale).to(compute_type)
+ accumulator = tl.dot(a, b, acc=accumulator)
+
+ # Advance the ptrs to the next K block.
+ a_ptrs += BLOCK_SIZE_K * stride_ak
+ if use_int4_w4a16:
+ b_ptrs += (BLOCK_SIZE_K // 2) * stride_bk
+ else:
+ b_ptrs += BLOCK_SIZE_K * stride_bk
+
+ if MUL_ROUTED_WEIGHT:
+ moe_weight = tl.load(topk_weights_ptr + offs_token, mask=token_mask, other=0)
+ accumulator = accumulator * moe_weight[:, None]
+
+ accumulator = accumulator.to(compute_type)
+ # -----------------------------------------------------------
+ # Write back the block of the output
+ offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+ c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[None, :]
+ c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
+ tl.store(c_ptrs, accumulator, mask=c_mask)
+
+
+@triton.jit
+def fused_moe_kernel(
+ # Pointers to matrices
+ a_ptr,
+ b_ptr,
+ bias_ptr,
+ c_ptr,
+ a_scale_ptr,
+ b_scale_ptr,
+ topk_weights_ptr,
+ sorted_token_ids_ptr,
+ expert_ids_ptr,
+ num_tokens_post_padded_ptr,
+ # Matrix dimensions
+ N,
+ K,
+ EM,
+ num_valid_tokens,
+ # The stride variables represent how much to increase the ptr by when
+ # moving by 1 element in a particular dimension. E.g. `stride_am` is
+ # how much to increase `a_ptr` by to get the element one row down
+ # (A has M rows).
+ stride_am,
+ stride_ak,
+ stride_be,
+ stride_bk,
+ stride_bn,
+ stride_bias_e,
+ stride_bias_n,
+ stride_cm,
+ stride_cn,
+ stride_asm,
+ stride_ask,
+ stride_bse,
+ stride_bsk,
+ stride_bsn,
+ # Block size for block-wise quantization
+ group_n: tl.constexpr,
+ group_k: tl.constexpr,
+ # Meta-parameters
+ BLOCK_SIZE_M: tl.constexpr,
+ BLOCK_SIZE_N: tl.constexpr,
+ BLOCK_SIZE_K: tl.constexpr,
+ GROUP_SIZE_M: tl.constexpr,
+ MUL_ROUTED_WEIGHT: tl.constexpr,
+ top_k: tl.constexpr,
+ compute_type: tl.constexpr,
+ use_fp8_w8a8: tl.constexpr,
+ use_int8_w8a8: tl.constexpr,
+ use_int8_w8a16: tl.constexpr,
+ per_channel_quant: tl.constexpr,
+ even_Ks: tl.constexpr,
+):
+ """
+ Implements the fused computation for a Mixture of Experts (MOE) using
+ token and expert matrices.
+
+ Key Parameters:
+ - A: The input tensor representing tokens with shape (*, K), where '*' can
+ be any shape representing batches and K is the feature dimension of
+ each token.
+ - B: The stacked MOE weight tensor with shape (E, N, K), where E is
+ the number of experts, K is the input feature dimension, and N is
+ the output feature dimension.
+ - C: The output cache tensor with shape (M, topk, N), where M is the
+ total number of tokens post padding, topk is the number of times
+ each token is repeated, and N is the output feature dimension.
+ - sorted_token_ids: A tensor containing the sorted indices of tokens,
+ repeated topk times and arranged by the expert index they are
+ assigned to.
+ - expert_ids: A tensor containing the indices of the expert for each
+ block. It determines which expert matrix from B should be used for
+ each block in A.
+
+ This kernel performs the multiplication of a token by its corresponding
+ expert matrix as determined by `expert_ids`. The sorting of
+ `sorted_token_ids` by expert index and padding ensures divisibility by
+ BLOCK_SIZE_M, which is necessary to maintain consistency in block matrix
+ multiplication across different blocks processed by the same expert.
+ """
+ # -----------------------------------------------------------
+ # Map program ids `pid` to the block of C it should compute.
+ # This is done in a grouped ordering to promote L2 data reuse.
+ pid = tl.program_id(axis=0)
+ num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
+ num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
+ num_pid_in_group = GROUP_SIZE_M * num_pid_n
+ group_id = pid // num_pid_in_group
+ first_pid_m = group_id * GROUP_SIZE_M
+ group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
+ pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
+ pid_n = (pid % num_pid_in_group) // group_size_m
+
+ # ----------------------------------------------------------
+ # Create pointers for the first blocks of A and B.
+ # We will advance this pointer as we move in the K direction
+ # and accumulate
+ # `a_ptrs` is a block of [BLOCK_SIZE_M, BLOCK_SIZE_K] pointers
+ # `b_ptrs` is a block of [BLOCK_SIZE_K, BLOCK_SIZE_N] pointers
+ num_tokens_post_padded = tl.load(num_tokens_post_padded_ptr)
+ if pid_m * BLOCK_SIZE_M >= num_tokens_post_padded:
+ return
+ offs_token_id = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M).to(tl.int64)
+ offs_token = tl.load(sorted_token_ids_ptr + offs_token_id)
+ offs_token = offs_token.to(tl.int64)
+ token_mask = offs_token < num_valid_tokens
+
+ off_experts = tl.load(expert_ids_ptr + pid_m).to(tl.int64)
+
+ if off_experts == -1:
+ # -----------------------------------------------------------
+ # Write back zeros to the output when the expert is not
+ # in the current expert parallel rank.
+ write_zeros_to_output(
+ c_ptr,
+ stride_cm,
+ stride_cn,
+ pid_n,
+ N,
+ offs_token,
+ token_mask,
+ BLOCK_SIZE_M,
+ BLOCK_SIZE_N,
+ compute_type,
+ )
+ return
+
+ offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N).to(tl.int64)) % N
+ offs_k = tl.arange(0, BLOCK_SIZE_K)
+ a_ptrs = a_ptr + (
+ offs_token[:, None] // top_k * stride_am + offs_k[None, :] * stride_ak
+ )
+
+ b_ptrs = (
+ b_ptr
+ + off_experts * stride_be
+ + (offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn)
+ )
+ if bias_ptr is not None:
+ bias = tl.load(
+ bias_ptr + off_experts * stride_bias_e + offs_bn[None, :] * stride_bias_n
+ )
+ if use_int8_w8a16:
+ b_scale_ptrs = (
+ b_scale_ptr + off_experts * stride_bse + offs_bn[None, :] * stride_bsn
+ )
+ b_scale = tl.load(b_scale_ptrs)
+
+ if use_fp8_w8a8 or use_int8_w8a8:
+ # block-wise
+ if group_k > 0 and group_n > 0:
+ a_scale_ptrs = a_scale_ptr + (offs_token // top_k) * stride_asm
+ offs_bsn = offs_bn // group_n
+ b_scale_ptrs = (
+ b_scale_ptr + off_experts * stride_bse + offs_bsn * stride_bsn
+ )
+ # channel-wise
+ elif per_channel_quant:
+ b_scale_ptrs = (
+ b_scale_ptr + off_experts * stride_bse + offs_bn[None, :] * stride_bsn
+ )
+ b_scale = tl.load(b_scale_ptrs)
+ # Load per-token scale for activations
+ a_scale_ptrs = a_scale_ptr + (offs_token // top_k) * stride_asm
+ a_scale = tl.load(a_scale_ptrs, mask=token_mask, other=0.0)[:, None]
+ # tensor-wise
+ else:
+ a_scale = tl.load(a_scale_ptr)
+ b_scale = tl.load(b_scale_ptr + off_experts)
+
+ # -----------------------------------------------------------
+ # Iterate to compute a block of the C matrix.
+ # We accumulate into a `[BLOCK_SIZE_M, BLOCK_SIZE_N]` block
+ # of fp32 values for higher accuracy.
+ # `accumulator` will be converted back to fp16 after the loop.
+ accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
+
+ for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
+ # Load the next block of A and B, generate a mask by checking the
+ # K dimension.
+ if even_Ks:
+ a = tl.load(
+ a_ptrs,
+ mask=token_mask[:, None],
+ other=0.0,
+ )
+ b = tl.load(b_ptrs)
+ else:
+ a = tl.load(
+ a_ptrs,
+ mask=token_mask[:, None] & (offs_k[None, :] < K - k * BLOCK_SIZE_K),
+ other=0.0,
+ )
+ b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0)
+
+ # We accumulate along the K dimension.
+ if use_int8_w8a16:
+ accumulator = tl.dot(a, b.to(compute_type), acc=accumulator)
+ elif use_fp8_w8a8 or use_int8_w8a8:
+ if group_k > 0 and group_n > 0:
+ k_start = k * BLOCK_SIZE_K
+ offs_ks = k_start // group_k
+ a_scale = tl.load(
+ a_scale_ptrs + offs_ks * stride_ask, mask=token_mask, other=0.0
+ )
+ b_scale = tl.load(b_scale_ptrs + offs_ks * stride_bsk)
+
+ accumulator += tl.dot(a, b) * a_scale[:, None] * b_scale[None, :]
+ else:
+ if use_fp8_w8a8:
+ accumulator = tl.dot(a, b, acc=accumulator)
+ else:
+ accumulator += tl.dot(a, b)
+ else:
+ accumulator += tl.dot(a, b)
+ # Advance the ptrs to the next K block.
+ a_ptrs += BLOCK_SIZE_K * stride_ak
+ b_ptrs += BLOCK_SIZE_K * stride_bk
+
+ if use_int8_w8a16:
+ accumulator *= b_scale
+ elif use_fp8_w8a8 or use_int8_w8a8:
+ if group_k == 0 or group_n == 0:
+ accumulator *= a_scale * b_scale
+
+ if bias_ptr is not None:
+ accumulator += bias
+
+ if MUL_ROUTED_WEIGHT:
+ moe_weight = tl.load(topk_weights_ptr + offs_token, mask=token_mask, other=0)
+ accumulator *= moe_weight[:, None]
+
+ accumulator = accumulator.to(compute_type)
+ # -----------------------------------------------------------
+ # Write back the block of the output
+ offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+ c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[None, :]
+ c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
+ tl.store(c_ptrs, accumulator, mask=c_mask)
+
+
+def invoke_fused_moe_kernel(
+ A: torch.Tensor,
+ B: torch.Tensor,
+ bias: Optional[torch.Tensor],
+ C: torch.Tensor,
+ A_scale: Optional[torch.Tensor],
+ B_scale: Optional[torch.Tensor],
+ B_zp: Optional[torch.Tensor],
+ topk_weights: torch.Tensor,
+ topk_ids: torch.Tensor,
+ sorted_token_ids: torch.Tensor,
+ expert_ids: torch.Tensor,
+ num_tokens_post_padded: torch.Tensor,
+ mul_routed_weight: bool,
+ top_k: int,
+ config: Dict[str, Any],
+ compute_type: tl.dtype,
+ use_fp8_w8a8: bool,
+ use_int8_w8a8: bool,
+ use_int8_w8a16: bool,
+ use_int4_w4a16: bool,
+ per_channel_quant: bool,
+ block_shape: Optional[List[int]] = None,
+ no_combine: bool = False,
+) -> None:
+ assert topk_weights.stride(1) == 1
+ assert sorted_token_ids.stride(0) == 1
+
+ padded_size = 0
+ if use_fp8_w8a8:
+ assert B_scale is not None
+ if block_shape is None:
+ # activation tensor-wise fp8 quantization, dynamic or static
+ padded_size = padding_size
+ # activations apply per-token quantization when weights apply per-channel quantization by default
+ A, A_scale = scaled_fp8_quant(
+ A, A_scale, use_per_token_if_dynamic=per_channel_quant
+ )
+ else:
+ # activation block-wise fp8 quantization
+ assert len(block_shape) == 2
+ block_n, block_k = block_shape[0], block_shape[1]
+ if _is_cuda:
+ A, A_scale = sglang_per_token_group_quant_fp8(A, block_k)
+ else:
+ A, A_scale = per_token_group_quant_fp8(A, block_k)
+ assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1]
+ assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2]
+ assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1]
+ elif use_int8_w8a8:
+ assert B_scale is not None
+ if block_shape is None:
+ # activation channel-wise int8 quantization
+ assert (
+ per_channel_quant
+ ), "int8 quantization only supports channel-wise quantization except for block-wise quantization"
+ A, A_scale = per_token_quant_int8(A)
+ else:
+ # activation block-wise int8 quantization
+ assert len(block_shape) == 2
+ block_n, block_k = block_shape[0], block_shape[1]
+ if _is_cuda:
+ A, A_scale = sglang_per_token_group_quant_int8(A, block_k)
+ else:
+ A, A_scale = per_token_group_quant_int8(A, block_k)
+ assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1]
+ assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2]
+ assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1]
+ elif use_int8_w8a16 or use_int4_w4a16:
+ assert B_scale is not None
+ assert block_shape is None or block_shape[0] == 0
+ else:
+ assert A_scale is None
+ assert B_scale is None
+
+ grid = lambda META: (
+ triton.cdiv(sorted_token_ids.shape[0], META["BLOCK_SIZE_M"])
+ * triton.cdiv(B.shape[1], META["BLOCK_SIZE_N"]),
+ )
+
+ K = B.shape[2] - padded_size
+ if K % config["BLOCK_SIZE_K"] == 0:
+ even_Ks = True
+ else:
+ even_Ks = False
+
+ if (
+ (use_int8_w8a16 or use_int4_w4a16)
+ and block_shape is not None
+ and block_shape[1] > 0
+ ):
+ assert B_scale is not None and B_scale.ndim == 3
+ assert B_zp is None or B_zp.ndim == 3
+ assert bias is None
+ fused_moe_kernel_gptq_awq[grid](
+ A,
+ B,
+ C,
+ B_scale,
+ B_zp,
+ topk_weights,
+ sorted_token_ids,
+ expert_ids,
+ num_tokens_post_padded,
+ B.shape[1],
+ A.shape[1],
+ sorted_token_ids.shape[0],
+ topk_ids.numel(),
+ A.stride(0),
+ A.stride(1),
+ B.stride(0),
+ B.stride(2),
+ B.stride(1),
+ C.stride(1),
+ C.stride(2),
+ B_scale.stride(0),
+ B_scale.stride(2),
+ B_scale.stride(1),
+ B_zp.stride(0) if B_zp is not None else 0,
+ B_zp.stride(2) if B_zp is not None else 0,
+ B_zp.stride(1) if B_zp is not None else 0,
+ group_size=block_shape[1],
+ MUL_ROUTED_WEIGHT=mul_routed_weight,
+ top_k=top_k,
+ compute_type=compute_type,
+ has_zp=B_zp is not None,
+ use_int4_w4a16=use_int4_w4a16,
+ use_int8_w8a16=use_int8_w8a16,
+ even_Ks=even_Ks,
+ **config,
+ )
+
+ else:
+
+ fused_moe_kernel[grid](
+ A,
+ B,
+ bias,
+ C,
+ A_scale,
+ B_scale,
+ topk_weights,
+ sorted_token_ids,
+ expert_ids,
+ num_tokens_post_padded,
+ B.shape[1],
+ B.shape[2] - padded_size,
+ sorted_token_ids.shape[0],
+ topk_ids.numel(),
+ A.stride(0),
+ A.stride(1),
+ B.stride(0),
+ B.stride(2),
+ B.stride(1),
+ bias.stride(0) if bias is not None else 0,
+ bias.stride(1) if bias is not None else 0,
+ C.stride(1),
+ C.stride(2),
+ A_scale.stride(0) if A_scale is not None and A_scale.ndim == 2 else 0,
+ A_scale.stride(1) if A_scale is not None and A_scale.ndim == 2 else 0,
+ B_scale.stride(0) if B_scale is not None and B_scale.ndim >= 2 else 0,
+ B_scale.stride(2) if B_scale is not None and B_scale.ndim == 3 else 0,
+ B_scale.stride(1) if B_scale is not None and B_scale.ndim >= 2 else 0,
+ 0 if block_shape is None else block_shape[0],
+ 0 if block_shape is None else block_shape[1],
+ MUL_ROUTED_WEIGHT=mul_routed_weight,
+ top_k=top_k,
+ compute_type=compute_type,
+ use_fp8_w8a8=use_fp8_w8a8,
+ use_int8_w8a8=use_int8_w8a8,
+ use_int8_w8a16=use_int8_w8a16,
+ per_channel_quant=per_channel_quant,
+ even_Ks=even_Ks,
+ **config,
+ )
+
+
+# _moe_sum_reduce_kernel kernel modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/common/fused_moe/moe_sum_reduce.py
+@triton.jit
+def _moe_sum_reduce_kernel(
+ input_ptr,
+ input_stride_0,
+ input_stride_1,
+ input_stride_2,
+ output_ptr,
+ output_stride_0,
+ output_stride_1,
+ token_num: int,
+ topk_num: int,
+ hidden_dim: int,
+ routed_scaling_factor: tl.constexpr,
+ BLOCK_M: tl.constexpr,
+ BLOCK_DIM: tl.constexpr,
+ NUM_STAGE: tl.constexpr,
+):
+ input_stride_0 = tl.cast(input_stride_0, dtype=tl.int64)
+ input_stride_1 = tl.cast(input_stride_1, dtype=tl.int64)
+ output_stride_0 = tl.cast(output_stride_0, dtype=tl.int64)
+
+ token_block_id = tl.program_id(0)
+ dim_block_id = tl.program_id(1)
+
+ offs_token = token_block_id * BLOCK_M + tl.arange(0, BLOCK_M)
+ offs_dim = dim_block_id * BLOCK_DIM + tl.arange(0, BLOCK_DIM)
+
+ mask_token = offs_token < token_num
+ mask_dim = offs_dim < hidden_dim
+
+ base_ptrs = input_ptr + offs_token[:, None] * input_stride_0 + offs_dim[None, :]
+
+ accumulator = tl.zeros((BLOCK_M, BLOCK_DIM), dtype=tl.float32)
+
+ for i in tl.range(0, topk_num, num_stages=NUM_STAGE):
+ tile = tl.load(
+ base_ptrs + i * input_stride_1,
+ mask=mask_token[:, None] & mask_dim[None, :],
+ other=0.0,
+ )
+ accumulator += tile.to(tl.float32)
+ accumulator *= routed_scaling_factor
+
+ # -------- Write back --------
+ store_ptrs = output_ptr + offs_token[:, None] * output_stride_0 + offs_dim[None, :]
+ tl.store(
+ store_ptrs,
+ accumulator.to(input_ptr.dtype.element_ty),
+ mask=mask_token[:, None] & mask_dim[None, :],
+ )
+
+
+def moe_sum_reduce_triton(
+ input: torch.Tensor, output: torch.Tensor, routed_scaling_factor: float
+):
+ assert input.is_contiguous()
+ assert output.is_contiguous()
+
+ token_num, topk_num, hidden_dim = input.shape
+ assert output.shape[0] == token_num and output.shape[1] == hidden_dim
+
+ BLOCK_M = 1
+ BLOCK_DIM = 2048
+ NUM_STAGE = 1
+ num_warps = 16
+
+ grid = (
+ triton.cdiv(token_num, BLOCK_M),
+ triton.cdiv(hidden_dim, BLOCK_DIM),
+ )
+
+ _moe_sum_reduce_kernel[grid](
+ input,
+ *input.stride(),
+ output,
+ *output.stride(),
+ token_num=token_num,
+ topk_num=topk_num,
+ hidden_dim=hidden_dim,
+ routed_scaling_factor=routed_scaling_factor,
+ BLOCK_M=BLOCK_M,
+ BLOCK_DIM=BLOCK_DIM,
+ NUM_STAGE=NUM_STAGE,
+ num_warps=num_warps,
+ )
+ return
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
index 766f7940466..0ea1fa1ebfe 100644
--- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
@@ -23,11 +23,17 @@
get_moe_runner_backend,
should_use_flashinfer_trtllm_moe,
)
+from sglang.srt.layers.moe.token_dispatcher.standard import (
+ CombineInput,
+ StandardDispatcher,
+)
from sglang.srt.layers.moe.topk import TopKOutput, TopKOutputChecker
from sglang.srt.layers.quantization.base_config import (
+ FusedMoEMethodBase,
QuantizationConfig,
QuantizeMethodBase,
)
+from sglang.srt.layers.quantization.fp8 import Fp8MoEMethod
from sglang.srt.layers.quantization.modelopt_quant import ModelOptNvFp4FusedMoEMethod
from sglang.srt.layers.quantization.unquant import UnquantizedFusedMoEMethod
from sglang.srt.managers.schedule_batch import global_server_args_dict
@@ -110,9 +116,8 @@ class FusedMoE(torch.nn.Module):
hidden_size: Input hidden state size of the transformer
intermediate_size: Intermediate size of the experts
params_dtype: Data type for the parameters.
- reduce_results: Whether to all all_reduce on the output of the layer
- renomalize: Whether to renormalize the logits in the fused_moe kernel
- quant_config: Quantization configure.
+ reduce_results: Whether to apply all_reduce on the output of the layer
+ quant_config: Quantization configuration.
inplace: suggestion to compute inplace (modify input activation).
"""
@@ -152,16 +157,6 @@ def __init__(
self.expert_map_cpu = None
self.expert_map_gpu = None
- self.moe_runner_config = MoeRunnerConfig(
- activation=activation,
- apply_router_weight_on_input=apply_router_weight_on_input,
- inplace=inplace,
- no_combine=no_combine,
- routed_scaling_factor=routed_scaling_factor,
- gemm1_alpha=gemm1_alpha,
- gemm1_clamp_limit=gemm1_clamp_limit,
- )
-
enable_flashinfer_cutlass_moe = get_moe_runner_backend().is_flashinfer_cutlass()
if enable_flashinfer_cutlass_moe and quant_config is None:
@@ -175,15 +170,14 @@ def __init__(
self.moe_tp_rank = get_moe_tensor_parallel_rank()
assert num_experts % self.moe_ep_size == 0
self.num_local_experts = num_experts // self.moe_ep_size
+ self.start_expert_id = self.moe_ep_rank * self.num_local_experts
+ self.end_expert_id = self.start_expert_id + self.num_local_experts - 1
if self.moe_ep_size > 1:
# TODO(ch-wan): support shared experts fusion
# Create a tensor of size num_experts filled with -1
self.expert_map_cpu = torch.full(
(self.num_experts,), -1, dtype=torch.int32, device="cpu"
)
- self.expert_map_cpu = torch.full(
- (self.num_experts,), -1, dtype=torch.int32, device="cpu"
- )
# Create a expert map for the local experts
self.expert_map_cpu[
self.moe_ep_rank
@@ -197,13 +191,6 @@ def __init__(
self.use_presharded_weights = use_presharded_weights
self.use_triton_kernels = get_moe_runner_backend().is_triton_kernel()
- if quant_config is None:
- self.quant_method: Optional[QuantizeMethodBase] = UnquantizedFusedMoEMethod(
- self.use_triton_kernels
- )
- else:
- self.quant_method = quant_config.get_quant_method(self, prefix)
- assert self.quant_method is not None
self.quant_config = quant_config
self.use_flashinfer_mxfp4_moe = get_moe_runner_backend().is_flashinfer_mxfp4()
@@ -214,12 +201,40 @@ def __init__(
and self.use_flashinfer_mxfp4_moe
):
hidden_size = round_up(hidden_size, 256)
+ self.hidden_size = hidden_size
+
+ self.moe_runner_config = MoeRunnerConfig(
+ num_experts=num_experts,
+ num_local_experts=self.num_local_experts,
+ hidden_size=hidden_size,
+ intermediate_size_per_partition=self.intermediate_size_per_partition,
+ layer_id=layer_id,
+ top_k=top_k,
+ num_fused_shared_experts=num_fused_shared_experts,
+ params_dtype=params_dtype,
+ activation=activation,
+ apply_router_weight_on_input=apply_router_weight_on_input,
+ inplace=inplace,
+ no_combine=no_combine,
+ routed_scaling_factor=routed_scaling_factor,
+ gemm1_alpha=gemm1_alpha,
+ gemm1_clamp_limit=gemm1_clamp_limit,
+ )
+
+ if quant_config is None:
+ self.quant_method: FusedMoEMethodBase = UnquantizedFusedMoEMethod(
+ self.use_triton_kernels
+ )
+ else:
+ self.quant_method: FusedMoEMethodBase = quant_config.get_quant_method(
+ self, prefix
+ )
+ assert self.quant_method is not None
+
self.quant_method.create_weights(
layer=self,
num_experts=self.num_local_experts,
hidden_size=hidden_size,
- # FIXME: figure out which intermediate_size to use
- intermediate_size=self.intermediate_size_per_partition,
intermediate_size_per_partition=self.intermediate_size_per_partition,
params_dtype=params_dtype,
weight_loader=(
@@ -230,6 +245,9 @@ def __init__(
with_bias=with_bias,
)
+ self.quant_method.create_moe_runner(self, self.moe_runner_config)
+ self.dispatcher = StandardDispatcher()
+
def _load_per_tensor_weight_scale(
self,
shard_id: str,
@@ -595,9 +613,12 @@ def _weight_loader_impl(
loaded_weight = loaded_weight.to(param.data.device)
if (
- "compressed" in self.quant_method.__class__.__name__.lower()
- and param.data[expert_id] != 1
- and (param.data[expert_id] - loaded_weight).abs() > 1e-5
+ (
+ "compressed" in self.quant_method.__class__.__name__.lower()
+ or "w4afp8" in self.quant_config.get_name()
+ )
+ and (param.data[expert_id] != 1).any()
+ and ((param.data[expert_id] - loaded_weight).abs() > 1e-5).any()
):
raise ValueError(
"input_scales of w1 and w3 of a layer "
@@ -811,16 +832,17 @@ def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput):
elif TopKOutputChecker.format_is_triton_kernel(topk_output):
raise NotImplementedError()
- # Matrix multiply.
- with use_symmetric_memory(get_tp_group()) as sm:
+ dispatch_output = self.dispatcher.dispatch(
+ hidden_states=hidden_states, topk_output=topk_output
+ )
- final_hidden_states = self.quant_method.apply(
- layer=self,
- x=hidden_states,
- topk_output=topk_output,
- moe_runner_config=self.moe_runner_config,
- )
- sm.tag(final_hidden_states)
+ # TODO: consider using symmetric memory
+ combine_input = self.quant_method.apply(
+ layer=self,
+ dispatch_output=dispatch_output,
+ )
+
+ final_hidden_states = self.dispatcher.combine(combine_input)
final_hidden_states = final_hidden_states[
..., :origin_hidden_states_dim
@@ -923,6 +945,12 @@ def make_expert_input_scale_params_mapping(
for shard_id in ["w1", "w2", "w3"]
]
+ def should_fuse_routed_scaling_factor_in_topk(self):
+ return isinstance(self.quant_method, ModelOptNvFp4FusedMoEMethod) or (
+ isinstance(self.quant_method, Fp8MoEMethod)
+ and self.quant_method.use_cutlass_fused_experts_fp8
+ )
+
class FlashInferFusedMoE(FusedMoE):
def __init__(self, *args, **kwargs):
@@ -932,11 +960,11 @@ def __init__(self, *args, **kwargs):
def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput):
assert self.use_flashinfer_trtllm_moe
assert (
- self.activation == "silu"
+ self.moe_runner_config.activation == "silu"
), "Only silu is supported for flashinfer blockscale fp8 moe"
assert self.quant_method is not None
assert (
- self.renormalize
+ topk_output.topk_config.renormalize
), "Renormalize is required for flashinfer blockscale fp8 moe"
assert (
self.num_fused_shared_experts == 0
@@ -949,7 +977,6 @@ def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput):
layer=self,
x=hidden_states,
topk_output=topk_output,
- moe_runner_config=self.moe_runner_config,
)
if self.reduce_results and (self.moe_tp_size > 1 or self.moe_ep_size > 1):
@@ -1001,6 +1028,8 @@ def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput):
hidden_states: Input tensor
topk_output: TopKOutput object with Bypassed format
"""
+ assert isinstance(self.quant_method, ModelOptNvFp4FusedMoEMethod)
+
assert TopKOutputChecker.format_is_bypassed(topk_output)
router_logits = topk_output.router_logits
@@ -1047,16 +1076,3 @@ def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput):
)[0]
return result
-
-
-def get_fused_moe_impl_class():
- """Factory function to get the appropriate FusedMoE implementation class."""
- if should_use_flashinfer_trtllm_moe() and _is_fp4_quantization_enabled():
- # Use FP4 variant when FP4 quantization is enabled
- return FlashInferFP4MoE
- elif should_use_flashinfer_trtllm_moe():
- # Use regular FlashInfer variant for non-FP4 FlashInfer cases
- return FlashInferFusedMoE
- else:
- # Default case
- return FusedMoE
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py b/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py
new file mode 100644
index 00000000000..64d0126d627
--- /dev/null
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+from typing import Tuple
+
+import torch
+import triton
+
+from sglang.srt.utils import is_cuda, is_hip
+
+_is_cuda = is_cuda()
+_is_hip = is_hip()
+
+if _is_cuda or _is_hip:
+ from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size
+
+
+def moe_align_block_size(
+ topk_ids: torch.Tensor, block_size: int, num_experts: int
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+ """
+ Aligns the token distribution across experts to be compatible with block
+ size for matrix multiplication.
+
+ Parameters:
+ - topk_ids: A tensor of shape [total_tokens, top_k] representing the
+ top-k expert indices for each token.
+ - block_size: The block size used in block matrix multiplication.
+ - num_experts: The total number of experts.
+
+ Returns:
+ - sorted_token_ids: A tensor containing the sorted token indices according
+ to their allocated expert.
+ - expert_ids: A tensor indicating the assigned expert index for each block.
+ - num_tokens_post_padded: The total number of tokens after padding,
+ ensuring divisibility by block_size.
+
+ This function pads the number of tokens that each expert needs to process
+ so that it is divisible by block_size.
+ Padding ensures that during block matrix multiplication, the dimensions
+ align correctly.
+
+ Example:
+ Given topk_ids = [[2, 3, 4], [1, 2, 4], [1, 3, 4], [1, 2, 3]],
+ block_size = 4, and num_experts = 4:
+ - We initially have 12 tokens (after repeating 'top_k' times) and 4 experts,
+ with each expert needing to process 3 tokens.
+ - As block_size is 4, we pad 1 token for each expert.
+ - First, flatten topk_ids to [2, 3, 4, 1, 2, 4, 1, 3, 4, 1, 2, 3].
+ - Then append padding tokens [12, 12, 12, 12] for each block.
+ - After sorting by expert index, we obtain token_ids
+ [3, 6, 9, 12, 0, 4, 10, 12, 1, 7, 11, 12, 2, 5, 8, 12].
+ Tokens 12 are non-existent (padding) and are ignored in
+ the subsequent matrix multiplication.
+ - The padding ensures that the total number of tokens is now divisible
+ by block_size for proper block matrix operations.
+ """
+ max_num_tokens_padded = topk_ids.numel() + (num_experts + 1) * (block_size - 1)
+ sorted_ids = torch.empty(
+ (max_num_tokens_padded,), dtype=torch.int32, device=topk_ids.device
+ )
+ max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
+ expert_ids = torch.empty(
+ (max_num_m_blocks,), dtype=torch.int32, device=topk_ids.device
+ )
+ num_tokens_post_pad = torch.empty((1), dtype=torch.int32, device=topk_ids.device)
+
+ # In EP, expert_ids for filtered experts are -1. We have num_experts + 1 ids in total.
+ cumsum_buffer = torch.empty(
+ (num_experts + 2,), dtype=torch.int32, device=topk_ids.device
+ )
+
+ # Threshold based on benchmark results
+ fuse_sorted_ids_padding = sorted_ids.shape[0] <= 4096
+ if not fuse_sorted_ids_padding:
+ sorted_ids.fill_(topk_ids.numel())
+
+ sgl_moe_align_block_size(
+ topk_ids,
+ num_experts + 1,
+ block_size,
+ sorted_ids,
+ expert_ids,
+ num_tokens_post_pad,
+ cumsum_buffer,
+ fuse_sorted_ids_padding,
+ )
+ return sorted_ids, expert_ids, num_tokens_post_pad
diff --git a/python/sglang/srt/layers/moe/moe_runner/__init__.py b/python/sglang/srt/layers/moe/moe_runner/__init__.py
index 9a7fa9c2962..3320a78751e 100644
--- a/python/sglang/srt/layers/moe/moe_runner/__init__.py
+++ b/python/sglang/srt/layers/moe/moe_runner/__init__.py
@@ -1,3 +1,4 @@
from sglang.srt.layers.moe.moe_runner.base import MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.runner import MoeRunner
-__all__ = ["MoeRunnerConfig"]
+__all__ = ["MoeRunnerConfig", "MoeRunner"]
diff --git a/python/sglang/srt/layers/moe/moe_runner/base.py b/python/sglang/srt/layers/moe/moe_runner/base.py
index 854aeb0e623..4d95540e6cb 100644
--- a/python/sglang/srt/layers/moe/moe_runner/base.py
+++ b/python/sglang/srt/layers/moe/moe_runner/base.py
@@ -1,9 +1,41 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
from dataclasses import dataclass
-from typing import Optional
+from typing import TYPE_CHECKING, Callable, Optional, Tuple, TypeGuard
+
+import torch
+
+from sglang.srt.layers.moe.utils import MoeA2ABackend, MoeRunnerBackend
+
+if TYPE_CHECKING:
+ from sglang.srt.layers.moe.moe_runner.triton import (
+ TritonRunnerCore,
+ TritonRunnerInput,
+ TritonRunnerOutput,
+ )
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ CombineInputFormat,
+ DispatchOutput,
+ DispatchOutputFormat,
+ )
@dataclass
class MoeRunnerConfig:
+
+ # MoE parameters
+ num_experts: Optional[int] = None
+ num_local_experts: Optional[int] = None
+ hidden_size: Optional[int] = None
+ intermediate_size_per_partition: Optional[int] = None
+ layer_id: Optional[int] = None
+ top_k: Optional[int] = None
+ num_fused_shared_experts: Optional[int] = None
+ params_dtype: Optional[torch.dtype] = None
+
+ # Runner configuration
activation: str = "silu"
apply_router_weight_on_input: bool = False
inplace: bool = True
@@ -11,3 +43,244 @@ class MoeRunnerConfig:
routed_scaling_factor: Optional[float] = None
gemm1_alpha: Optional[float] = None
gemm1_clamp_limit: Optional[float] = None
+
+
+@dataclass
+class RunnerInput(ABC):
+
+ @property
+ @abstractmethod
+ def runner_backend(self) -> MoeRunnerBackend: ...
+
+ def runner_backend_is_triton(self) -> TypeGuard[TritonRunnerInput]:
+ return self.runner_backend == MoeRunnerBackend.TRITON
+
+
+class RunnerOutput(ABC):
+
+ @property
+ @abstractmethod
+ def runner_backend(self) -> MoeRunnerBackend: ...
+
+ def runner_backend_is_triton(self) -> TypeGuard[TritonRunnerOutput]:
+ return self.runner_backend == MoeRunnerBackend.TRITON
+
+
+@dataclass
+class MoeQuantInfo(ABC):
+ """Moe quantization data."""
+
+ pass
+
+
+class MoeRunnerCore(ABC):
+
+ def __init__(self, config: MoeRunnerConfig):
+ self.config = config
+
+ @abstractmethod
+ def run(
+ self, runner_input: RunnerInput, quant_info: MoeQuantInfo, running_state: dict
+ ) -> RunnerOutput:
+ pass
+
+ @property
+ @abstractmethod
+ def runner_backend(self) -> MoeRunnerBackend: ...
+
+ def runner_backend_is_triton(self) -> TypeGuard[TritonRunnerCore]:
+ return self.runner_backend == MoeRunnerBackend.TRITON
+
+
+class FusedOpPool:
+
+ _fused_funcs: dict[str, Callable] = {}
+
+ @classmethod
+ def register_fused_func(
+ cls, a2a_backend_name: str, runner_backend_name: str, fused_func: Callable
+ ):
+ key = (a2a_backend_name, runner_backend_name)
+ if key in cls._fused_funcs:
+ raise ValueError(
+ f"Fused function for {a2a_backend_name} to {runner_backend_name} is already registered."
+ )
+ assert MoeA2ABackend(
+ a2a_backend_name
+ ), f"Invalid dispatch name: {a2a_backend_name}"
+ assert MoeRunnerBackend(
+ runner_backend_name
+ ), f"Invalid runner name: {runner_backend_name}"
+ cls._fused_funcs[key] = fused_func
+
+ @classmethod
+ def get_fused_func(cls, dispatch_name: str, runner_name: str) -> Optional[Callable]:
+ key = (dispatch_name, runner_name)
+ fused_func = cls._fused_funcs.get(key)
+ return fused_func
+
+
+class PermuteMethodPool:
+
+ _pre_permute_methods: dict[
+ Tuple[DispatchOutputFormat, MoeRunnerBackend], Callable
+ ] = {}
+ _post_permute_methods: dict[
+ Tuple[MoeRunnerBackend, CombineInputFormat], Callable
+ ] = {}
+
+ @classmethod
+ def register_pre_permute(
+ cls,
+ dispatch_output_name: str,
+ runner_backend_name: str,
+ permute_func: Callable,
+ ):
+ """
+ Register a customized pre-permute function for the given DispatchOutputFormat and MoeRunnerBackend.
+
+ :param dispatch_output_name: The DispatchOutputFormat name.
+ :param runner_backend_name: The MoeRunnerBackend name.
+ :param permute_func: The permute function to register.
+ """
+ # TODO: check if registration is valid
+ key = (dispatch_output_name, runner_backend_name)
+ if key in cls._pre_permute_methods:
+ raise ValueError(
+ f"Pre-permute method for {dispatch_output_name} to {runner_backend_name} is already registered."
+ )
+ cls._pre_permute_methods[key] = permute_func
+
+ @classmethod
+ def register_post_permute(
+ cls,
+ runner_backend_name: str,
+ combine_input_name: str,
+ permute_func: Callable,
+ ):
+ """
+ Register a customized post-permute function for the given MoeRunnerBackend and CombineInputFormat.
+
+ :param runner_backend_name: The MoeRunnerBackend name.
+ :param combine_input_name: The CombineInputFormat name.
+ :param permute_func: The permute function to register.
+ """
+ # TODO: check if registration is valid
+ key = (runner_backend_name, combine_input_name)
+ if key in cls._post_permute_methods:
+ raise ValueError(
+ f"Post-permute method for {runner_backend_name} to {combine_input_name} is already registered."
+ )
+ cls._post_permute_methods[key] = permute_func
+
+ @classmethod
+ def get_pre_permute(
+ cls,
+ dispatch_output_format: DispatchOutputFormat,
+ runner_input_format: MoeRunnerBackend,
+ ) -> Callable:
+ """
+ Retrieve the pre-permute function for the given DispatchOutputFormat and MoeRunnerBackend.
+
+ :param dispatch_output_format: The DispatchOutputFormat type.
+ :param runner_input_format: The MoeRunnerBackend type.
+ :return: The registered permute function or None if not found.
+ """
+ key = (dispatch_output_format, runner_input_format)
+ pre_permute_func = cls._pre_permute_methods.get(key)
+ assert (
+ pre_permute_func is not None
+ ), f"Pre-permute function for {dispatch_output_format} to {runner_input_format} is not registered"
+ return pre_permute_func
+
+ @classmethod
+ def get_post_permute(
+ cls,
+ runner_output_format: MoeRunnerBackend,
+ combine_input_format: CombineInputFormat,
+ ) -> Callable:
+ """
+ Retrieve the post-permute function for the given MoeRunnerBackend and CombineInputFormat.
+
+ :param runner_output_format: The MoeRunnerBackend type.
+ :param combine_input_format: The CombineInputFormat type.
+ :return: The registered permute function or None if not found.
+ """
+ key = (runner_output_format, combine_input_format)
+ post_permute_func = cls._post_permute_methods.get(key)
+ assert (
+ post_permute_func is not None
+ ), f"Post-permute function for {runner_output_format} to {combine_input_format} is not registered"
+ return post_permute_func
+
+
+def register_fused_func(
+ a2a_backend_name: str,
+ runner_backend_name: str,
+) -> Callable:
+ """
+ Decorator to register a fused function for the given DispatchOutputFormat and MoeRunnerBackend.
+
+ :param a2a_backend_name: The A2A backend name.
+ :param runner_backend_name: The MoeRunnerBackend name.
+ :return: The decorator function.
+ """
+
+ def decorator(fused_func: Callable):
+ FusedOpPool.register_fused_func(
+ a2a_backend_name, runner_backend_name, fused_func
+ )
+ return fused_func
+
+ return decorator
+
+
+def register_pre_permute(
+ dispatch_output_name: str,
+ runner_backend_name: str,
+) -> Callable:
+ """
+ Decorator to register a pre-permute function for the given DispatchOutputFormat and MoeRunnerBackend.
+
+ :param dispatch_output_name: The DispatchOutputFormat name.
+ :param runner_backend_name: The MoeRunnerBackend name.
+ :return: The decorator function.
+ """
+
+ def decorator(
+ permute_func: Callable[
+ [DispatchOutput, MoeQuantInfo, MoeRunnerConfig, dict], RunnerInput
+ ]
+ ) -> Callable:
+
+ PermuteMethodPool.register_pre_permute(
+ dispatch_output_name, runner_backend_name, permute_func
+ )
+ return permute_func
+
+ return decorator
+
+
+def register_post_permute(
+ runner_backend_name: str,
+ combine_input_name: str,
+) -> Callable:
+ """
+ Decorator to register a post-permute function for the given MoeRunnerBackend and CombineInputFormat.
+
+ :param runner_backend_name: The MoeRunnerBackend name.
+ :param combine_input_name: The CombineInputFormat name.
+ :return: The decorator function.
+ """
+
+ def decorator(
+ permute_func: Callable[
+ [RunnerOutput, MoeQuantInfo, MoeRunnerConfig, dict], CombineInput
+ ]
+ ) -> Callable:
+ PermuteMethodPool.register_post_permute(
+ runner_backend_name, combine_input_name, permute_func
+ )
+ return permute_func
+
+ return decorator
diff --git a/python/sglang/srt/layers/moe/moe_runner/runner.py b/python/sglang/srt/layers/moe/moe_runner/runner.py
new file mode 100644
index 00000000000..3b6fcd980d5
--- /dev/null
+++ b/python/sglang/srt/layers/moe/moe_runner/runner.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import logging
+import os
+from typing import TYPE_CHECKING
+
+from sglang.srt.layers.moe.moe_runner.base import (
+ FusedOpPool,
+ MoeRunnerConfig,
+ PermuteMethodPool,
+)
+from sglang.srt.layers.moe.moe_runner.triton import TritonRunnerCore
+from sglang.srt.layers.moe.utils import get_moe_a2a_backend
+
+if TYPE_CHECKING:
+ from sglang.srt.layers.moe.moe_runner.base import MoeQuantInfo
+ from sglang.srt.layers.moe.token_dispatcher.base import CombineInput, DispatchOutput
+ from sglang.srt.layers.moe.utils import MoeRunnerBackend
+
+logger = logging.getLogger(__name__)
+
+
+class MoeRunner:
+
+ def __init__(self, runner_backend: MoeRunnerBackend, config: MoeRunnerConfig):
+ self.runner_backend = runner_backend
+ self.config = config
+
+ self.fused_func = None
+
+ if runner_backend.is_triton():
+ self.runner_core = TritonRunnerCore(config)
+ else:
+ raise NotImplementedError(f"Unsupported runner backend: {runner_backend}")
+
+ a2a_backend_name = get_moe_a2a_backend().value
+ runner_backend_name = runner_backend.value
+
+ self.fused_func = FusedOpPool.get_fused_func(
+ a2a_backend_name, runner_backend_name
+ )
+
+ SGLANG_CI_DISABLE_MOE_FUSED_FUNC = os.environ.get(
+ "SGLANG_CI_DISABLE_MOE_FUSED_FUNC", "0"
+ )
+ if SGLANG_CI_DISABLE_MOE_FUSED_FUNC == "1":
+ logger.info(
+ "SGLANG_CI_DISABLE_MOE_FUSED_FUNC is set to 1, disabling fused func"
+ )
+ self.fused_func = None
+
+ def run(
+ self, dispatch_output: DispatchOutput, quant_info: MoeQuantInfo
+ ) -> CombineInput:
+
+ if self.fused_func is not None:
+ return self.fused_func(dispatch_output, quant_info, self.config)
+
+ dispatch_format = dispatch_output.format.value
+ runner_format = self.runner_core.runner_backend.value
+ self.pre_permute_func = PermuteMethodPool.get_pre_permute(
+ dispatch_format, runner_format
+ )
+
+ running_state = {}
+ runner_input = self.pre_permute_func(
+ dispatch_output, quant_info, self.config, running_state
+ )
+ runner_output = self.runner_core.run(runner_input, quant_info, running_state)
+
+ runner_format = self.runner_core.runner_backend.value
+ combine_format = dispatch_output.format.value
+ self.post_permute_func = PermuteMethodPool.get_post_permute(
+ runner_format, combine_format
+ )
+ combine_input = self.post_permute_func(
+ runner_output, quant_info, self.config, running_state
+ )
+
+ return combine_input
diff --git a/python/sglang/srt/layers/moe/moe_runner/triton.py b/python/sglang/srt/layers/moe/moe_runner/triton.py
new file mode 100644
index 00000000000..116fdcaa019
--- /dev/null
+++ b/python/sglang/srt/layers/moe/moe_runner/triton.py
@@ -0,0 +1,448 @@
+from __future__ import annotations
+
+import functools
+import os
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, List, Optional
+
+import torch
+import triton.language as tl
+
+from sglang.srt.layers.moe.moe_runner.base import (
+ MoeQuantInfo,
+ MoeRunnerConfig,
+ MoeRunnerCore,
+ RunnerInput,
+ RunnerOutput,
+ register_fused_func,
+ register_post_permute,
+ register_pre_permute,
+)
+from sglang.srt.layers.moe.utils import MoeRunnerBackend
+from sglang.srt.utils import cpu_has_amx_support, is_cpu, is_cuda, is_hip
+
+if TYPE_CHECKING:
+ from sglang.srt.layers.moe.token_dispatcher.standard import (
+ StandardCombineInput,
+ StandardDispatchOutput,
+ )
+
+
+_is_hip = is_hip()
+_is_cuda = is_cuda()
+_is_cpu_amx_available = cpu_has_amx_support()
+_is_cpu = is_cpu()
+_use_aiter = bool(int(os.getenv("SGLANG_MOE_USE_AITER", "0")))
+_MOE_PADDING_SIZE = 128 if bool(int(os.getenv("SGLANG_MOE_PADDING", "0"))) else 0
+
+
+if _is_cuda:
+ from sgl_kernel import gelu_and_mul, silu_and_mul
+elif _is_cpu and _is_cpu_amx_available:
+ pass
+elif _is_hip:
+ from vllm import _custom_ops as vllm_ops # gelu_and_mul, silu_and_mul
+
+ if _use_aiter:
+ try:
+ from aiter import moe_sum
+ except ImportError:
+ raise ImportError("aiter is required when SGLANG_USE_AITER is set to True")
+
+
+if _is_cuda or _is_hip:
+ from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size
+
+
+@dataclass
+class TritonRunnerInput(RunnerInput):
+
+ hidden_states: torch.Tensor
+ topk_weights: torch.Tensor
+ topk_ids: torch.Tensor
+ sorted_token_ids: torch.Tensor
+ expert_ids: torch.Tensor
+ num_tokens_post_padded: torch.Tensor
+
+ @property
+ def runner_backend(self) -> MoeRunnerBackend:
+ return MoeRunnerBackend.TRITON
+
+
+@dataclass
+class TritonRunnerOutput(RunnerOutput):
+
+ hidden_states: torch.Tensor
+
+ @property
+ def runner_backend(self) -> MoeRunnerBackend:
+ return MoeRunnerBackend.TRITON
+
+
+@dataclass
+class TritonMoeQuantInfo(MoeQuantInfo):
+ w13_weight: torch.Tensor
+ w2_weight: torch.Tensor
+ b13: Optional[torch.Tensor] = None
+ b2: Optional[torch.Tensor] = None
+ use_fp8_w8a8: bool = False
+ use_int8_w8a8: bool = False
+ use_int8_w8a16: bool = False
+ use_int4_w4a16: bool = False
+ per_channel_quant: bool = False
+ w13_scale: Optional[torch.Tensor] = None
+ w2_scale: Optional[torch.Tensor] = None
+ w13_zp: Optional[torch.Tensor] = None
+ w2_zp: Optional[torch.Tensor] = None
+ a13_scale: Optional[torch.Tensor] = None
+ a2_scale: Optional[torch.Tensor] = None
+ block_shape: Optional[List[int]] = None
+
+
+class TritonRunnerCore(MoeRunnerCore):
+
+ def __init__(self, config: MoeRunnerConfig):
+ super().__init__(config)
+
+ def run(
+ self,
+ runner_input: TritonRunnerInput,
+ quant_info: TritonMoeQuantInfo,
+ running_state: dict,
+ ) -> TritonRunnerOutput:
+
+ # TODO: move these functions to the triton runner
+ from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
+ invoke_fused_moe_kernel,
+ moe_sum_reduce_torch_compile,
+ moe_sum_reduce_triton,
+ swiglu_with_alpha_and_limit,
+ )
+
+ hidden_states = runner_input.hidden_states
+ topk_weights = runner_input.topk_weights
+ topk_ids = runner_input.topk_ids
+ sorted_token_ids = runner_input.sorted_token_ids
+ expert_ids = runner_input.expert_ids
+ num_tokens_post_padded = runner_input.num_tokens_post_padded
+
+ w13 = quant_info.w13_weight
+ w2 = quant_info.w2_weight
+ b13 = quant_info.b13
+ b2 = quant_info.b2
+ a13_scale = quant_info.a13_scale
+ a2_scale = quant_info.a2_scale
+ w13_scale = quant_info.w13_scale
+ w2_scale = quant_info.w2_scale
+ w13_zp = quant_info.w13_zp
+ w2_zp = quant_info.w2_zp
+ block_shape = quant_info.block_shape
+ per_channel_quant = quant_info.per_channel_quant
+ use_fp8_w8a8 = quant_info.use_fp8_w8a8
+ use_int8_w8a8 = quant_info.use_int8_w8a8
+ use_int8_w8a16 = quant_info.use_int8_w8a16
+ use_int4_w4a16 = quant_info.use_int4_w4a16
+
+ activation = self.config.activation
+ no_combine = self.config.no_combine
+ inplace = self.config.inplace
+ gemm1_alpha = self.config.gemm1_alpha
+ gemm1_limit = self.config.gemm1_clamp_limit
+ routed_scaling_factor = self.config.routed_scaling_factor
+ apply_router_weight_on_input = self.config.apply_router_weight_on_input
+
+ M = hidden_states.shape[0]
+ E, N, _ = w13.shape
+ compute_type = (
+ tl.bfloat16 if hidden_states.dtype == torch.bfloat16 else tl.float16
+ )
+
+ intermediate_cache1 = torch.empty(
+ (M, topk_ids.shape[1], N),
+ device=hidden_states.device,
+ dtype=hidden_states.dtype,
+ )
+
+ invoke_fused_moe_kernel(
+ hidden_states,
+ w13,
+ b13,
+ intermediate_cache1,
+ a13_scale,
+ w13_scale,
+ w13_zp,
+ topk_weights,
+ topk_ids,
+ sorted_token_ids,
+ expert_ids,
+ num_tokens_post_padded,
+ apply_router_weight_on_input,
+ topk_ids.shape[1],
+ running_state["config"],
+ compute_type=compute_type,
+ use_fp8_w8a8=use_fp8_w8a8,
+ use_int8_w8a8=use_int8_w8a8,
+ use_int8_w8a16=use_int8_w8a16,
+ use_int4_w4a16=use_int4_w4a16,
+ per_channel_quant=per_channel_quant,
+ block_shape=block_shape,
+ )
+
+ intermediate_cache2 = torch.empty(
+ (M * topk_ids.shape[1], N // 2),
+ device=hidden_states.device,
+ dtype=hidden_states.dtype,
+ )
+
+ if activation == "silu":
+ if gemm1_alpha is not None:
+ assert gemm1_limit is not None
+ intermediate_cache2 = swiglu_with_alpha_and_limit(
+ intermediate_cache1.view(-1, N),
+ gemm1_alpha,
+ gemm1_limit,
+ )
+ elif _is_cuda:
+ silu_and_mul(intermediate_cache1.view(-1, N), intermediate_cache2)
+ else:
+ vllm_ops.silu_and_mul(
+ intermediate_cache2, intermediate_cache1.view(-1, N)
+ )
+ elif activation == "gelu":
+ assert gemm1_alpha is None, "gemm1_alpha is not supported for gelu"
+ assert gemm1_limit is None, "gemm1_limit is not supported for gelu"
+ if _is_cuda:
+ gelu_and_mul(intermediate_cache1.view(-1, N), intermediate_cache2)
+ else:
+ vllm_ops.gelu_and_mul(
+ intermediate_cache2, intermediate_cache1.view(-1, N)
+ )
+ else:
+ raise ValueError(f"Unsupported activation: {activation=}")
+
+ intermediate_cache3 = torch.empty(
+ (M, topk_ids.shape[1], w2.shape[1]),
+ device=hidden_states.device,
+ dtype=hidden_states.dtype,
+ )
+
+ if no_combine:
+ assert not inplace
+ out_hidden_states = torch.empty(
+ (M, topk_ids.shape[1], w2.shape[1]),
+ device=hidden_states.device,
+ dtype=hidden_states.dtype,
+ )
+ elif inplace:
+ out_hidden_states = hidden_states
+ else:
+ out_hidden_states = torch.empty_like(hidden_states)
+
+ invoke_fused_moe_kernel(
+ intermediate_cache2,
+ w2,
+ b2,
+ (
+ intermediate_cache3
+ if not no_combine and topk_ids.shape[1] != 1
+ else out_hidden_states.unsqueeze(0)
+ ),
+ a2_scale,
+ w2_scale,
+ w2_zp,
+ topk_weights,
+ topk_ids,
+ sorted_token_ids,
+ expert_ids,
+ num_tokens_post_padded,
+ not apply_router_weight_on_input,
+ 1,
+ running_state["config"],
+ compute_type=compute_type,
+ use_fp8_w8a8=use_fp8_w8a8,
+ use_int8_w8a8=use_int8_w8a8,
+ use_int8_w8a16=use_int8_w8a16,
+ use_int4_w4a16=use_int4_w4a16,
+ per_channel_quant=per_channel_quant,
+ block_shape=block_shape,
+ )
+
+ if routed_scaling_factor is None:
+ routed_scaling_factor = 1.0
+
+ if no_combine:
+ pass
+ elif _is_cuda:
+ if topk_ids.shape[1] == 1 and routed_scaling_factor == 1.0:
+ pass # we write directly into out_hidden_states
+ elif topk_ids.shape[1] == 2 and routed_scaling_factor == 1.0:
+ torch.add(
+ intermediate_cache3[:, 0],
+ intermediate_cache3[:, 1],
+ out=out_hidden_states,
+ ).squeeze(dim=1)
+ else:
+ # According to micro benchmark results, torch.compile can get better performance for small token.
+ if M <= 32:
+ moe_sum_reduce_torch_compile(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states,
+ routed_scaling_factor,
+ )
+ else:
+ moe_sum_reduce_triton(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states,
+ routed_scaling_factor,
+ )
+ elif _is_hip:
+ if _use_aiter:
+ moe_sum(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states,
+ )
+ else:
+ vllm_ops.moe_sum(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states,
+ )
+ else:
+ vllm_ops.moe_sum(
+ intermediate_cache3.view(*intermediate_cache3.shape),
+ out_hidden_states,
+ )
+
+ return TritonRunnerOutput(
+ hidden_states=out_hidden_states,
+ )
+
+ @property
+ def runner_backend(self) -> MoeRunnerBackend:
+ return MoeRunnerBackend.TRITON
+
+
+@register_fused_func("none", "triton")
+def fused_experts_none_to_triton(
+ dispatch_output: StandardDispatchOutput,
+ quant_info: TritonMoeQuantInfo,
+ runner_config: MoeRunnerConfig,
+) -> StandardCombineInput:
+ from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
+ from sglang.srt.layers.moe.token_dispatcher.standard import StandardCombineInput
+
+ output = fused_experts(
+ hidden_states=dispatch_output.hidden_states,
+ w1=quant_info.w13_weight,
+ w2=quant_info.w2_weight,
+ topk_output=dispatch_output.topk_output,
+ moe_runner_config=runner_config,
+ b1=quant_info.b13,
+ b2=quant_info.b2,
+ use_fp8_w8a8=quant_info.use_fp8_w8a8,
+ use_int8_w8a8=quant_info.use_int8_w8a8,
+ use_int8_w8a16=quant_info.use_int8_w8a16,
+ use_int4_w4a16=quant_info.use_int4_w4a16,
+ per_channel_quant=quant_info.per_channel_quant,
+ w1_scale=quant_info.w13_scale,
+ w2_scale=quant_info.w2_scale,
+ w1_zp=quant_info.w13_zp,
+ w2_zp=quant_info.w2_zp,
+ a1_scale=quant_info.a13_scale,
+ a2_scale=quant_info.a2_scale,
+ block_shape=quant_info.block_shape,
+ )
+
+ return StandardCombineInput(
+ hidden_states=output,
+ )
+
+
+@register_pre_permute("standard", "triton")
+def pre_permute_standard_to_triton(
+ dispatch_output: StandardDispatchOutput,
+ quant_info: TritonMoeQuantInfo,
+ runner_config: MoeRunnerConfig,
+ running_state: dict,
+) -> TritonRunnerInput:
+
+ # NOTE: this is dead code as a fused func for standard format is registered.
+ # This is left here for testing and examples.
+
+ from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
+ get_config_dtype_str,
+ moe_align_block_size,
+ try_get_optimal_moe_config,
+ )
+ from sglang.srt.layers.moe.topk import TopKOutputChecker
+
+ hidden_states, topk_output = dispatch_output
+
+ assert TopKOutputChecker.format_is_standard(topk_output)
+
+ num_tokens = hidden_states.shape[0]
+ num_local_experts = runner_config.num_local_experts
+
+ if (
+ not (quant_info.use_fp8_w8a8 or quant_info.use_int8_w8a8)
+ or quant_info.block_shape is not None
+ or _use_aiter
+ ):
+ padding_size = 0
+ else:
+ padding_size = _MOE_PADDING_SIZE
+
+ config_dtype = get_config_dtype_str(
+ use_fp8_w8a8=quant_info.use_fp8_w8a8,
+ use_int8_w8a8=quant_info.use_int8_w8a8,
+ use_int8_w8a16=quant_info.use_int8_w8a16,
+ use_int4_w4a16=quant_info.use_int4_w4a16,
+ dtype=hidden_states.dtype,
+ )
+
+ get_config_func = functools.partial(
+ try_get_optimal_moe_config,
+ quant_info.w13_weight.shape,
+ (
+ num_local_experts,
+ quant_info.w2_weight.shape[1],
+ quant_info.w2_weight.shape[2] - padding_size,
+ ),
+ topk_output.topk_ids.shape[1],
+ config_dtype,
+ block_shape=quant_info.block_shape,
+ )
+
+ config = get_config_func(num_tokens)
+
+ sorted_token_ids, expert_ids, num_tokens_post_padded = moe_align_block_size(
+ topk_output.topk_ids, config["BLOCK_SIZE_M"], num_local_experts
+ )
+
+ running_state["config"] = config
+
+ return TritonRunnerInput(
+ hidden_states=hidden_states,
+ topk_weights=topk_output.topk_weights,
+ topk_ids=topk_output.topk_ids,
+ sorted_token_ids=sorted_token_ids,
+ expert_ids=expert_ids,
+ num_tokens_post_padded=num_tokens_post_padded,
+ )
+
+
+@register_post_permute("triton", "standard")
+def post_permute_triton_to_standard(
+ runner_output: TritonRunnerOutput,
+ quant_info: TritonMoeQuantInfo,
+ runner_config: MoeRunnerConfig,
+ running_state: dict,
+) -> StandardCombineInput:
+
+ # NOTE: this is dead code as a fused func for standard format is registered.
+ # This is left here for testing and examples.
+
+ from sglang.srt.layers.moe.token_dispatcher.standard import StandardCombineInput
+
+ return StandardCombineInput(
+ hidden_states=runner_output.hidden_states,
+ )
diff --git a/python/sglang/srt/layers/moe/rocm_moe_utils.py b/python/sglang/srt/layers/moe/rocm_moe_utils.py
new file mode 100644
index 00000000000..5fe2de1e584
--- /dev/null
+++ b/python/sglang/srt/layers/moe/rocm_moe_utils.py
@@ -0,0 +1,141 @@
+# Adapted from https://github.com/vllm-project/vllm/blob/v0.9.1rc2/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from enum import IntEnum
+from functools import cache
+from typing import Optional
+
+import torch
+
+from sglang.srt.utils import direct_register_custom_op, get_bool_env_var, is_hip
+
+_is_hip = is_hip()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
+
+
+class ActivationMethod(IntEnum):
+ # This allows interfacing with AITER ActivationType enum
+ # without importing the ActivationType enum from AITER globally.
+ SILU = 0
+ GELU = 1
+
+
+def rocm_aiter_asm_moe_tkw1_impl(
+ hidden_states: torch.Tensor,
+ w1: torch.Tensor,
+ w2: torch.Tensor,
+ topk_weights: torch.Tensor,
+ topk_ids: torch.Tensor,
+ fc1_scale: Optional[torch.Tensor] = None,
+ fc2_scale: Optional[torch.Tensor] = None,
+ fc1_smooth_scale: Optional[torch.Tensor] = None,
+ fc2_smooth_scale: Optional[torch.Tensor] = None,
+ a16: bool = False,
+ per_tensor_quant_scale: Optional[torch.Tensor] = None,
+ expert_mask: Optional[torch.Tensor] = None,
+ activation_method: int = ActivationMethod.SILU.value,
+) -> torch.Tensor:
+
+ from aiter import ActivationType
+ from aiter.fused_moe_bf16_asm import asm_moe_tkw1
+
+ activation = ActivationType(activation_method)
+
+ return asm_moe_tkw1(
+ hidden_states,
+ w1,
+ w2,
+ topk_weights,
+ topk_ids,
+ fc1_scale=fc1_scale,
+ fc2_scale=fc2_scale,
+ fc1_smooth_scale=fc1_smooth_scale,
+ fc2_smooth_scale=fc2_smooth_scale,
+ a16=a16,
+ per_tensor_quant_scale=per_tensor_quant_scale,
+ expert_mask=expert_mask,
+ activation=activation,
+ )
+
+
+def rocm_aiter_asm_moe_tkw1_fake(
+ hidden_states: torch.Tensor,
+ w1: torch.Tensor,
+ w2: torch.Tensor,
+ topk_weights: torch.Tensor,
+ topk_ids: torch.Tensor,
+ fc1_scale: Optional[torch.Tensor] = None,
+ fc2_scale: Optional[torch.Tensor] = None,
+ fc1_smooth_scale: Optional[torch.Tensor] = None,
+ fc2_smooth_scale: Optional[torch.Tensor] = None,
+ a16: bool = False,
+ per_tensor_quant_scale: Optional[torch.Tensor] = None,
+ expert_mask: Optional[torch.Tensor] = None,
+ activation_method: int = ActivationMethod.SILU.value,
+) -> torch.Tensor:
+ return torch.empty_like(hidden_states)
+
+
+if _use_aiter:
+
+ direct_register_custom_op(
+ op_name="rocm_aiter_asm_moe_tkw1",
+ op_func=rocm_aiter_asm_moe_tkw1_impl,
+ mutates_args=[],
+ fake_impl=rocm_aiter_asm_moe_tkw1_fake,
+ )
+
+
+def rocm_fused_experts_tkw1(
+ hidden_states: torch.Tensor,
+ w1: torch.Tensor,
+ w2: torch.Tensor,
+ topk_weights: torch.Tensor,
+ topk_ids: torch.Tensor,
+ activation: str = "silu",
+ apply_router_weight_on_input: bool = False,
+ use_fp8_w8a8: bool = False,
+ per_channel_quant: bool = False,
+ w1_scale: Optional[torch.Tensor] = None,
+ w2_scale: Optional[torch.Tensor] = None,
+ a1_scale: Optional[torch.Tensor] = None,
+ a2_scale: Optional[torch.Tensor] = None,
+ block_shape: Optional[list[int]] = None,
+) -> torch.Tensor:
+
+ activation_method = (
+ ActivationMethod.SILU if activation == "silu" else ActivationMethod.GELU
+ )
+ # All AITER Fused MoE kernels are expecting the following datatypes
+ topk_weights = topk_weights.to(torch.float32)
+ topk_ids = topk_ids.to(torch.int32)
+
+ # w8a8 per-channel quantization
+ if per_channel_quant and apply_router_weight_on_input and use_fp8_w8a8:
+ # AITER tkw1 kernel for FP8 models with `apply_router_weight_on_input`
+ # This applies topk_weights on the GEMM output of the first FC layer
+ # rather than the second FC.
+ assert (
+ topk_weights.dim() == 2
+ ), "`topk_weights` should be in shape (num_tokens, topk)"
+ assert topk_weights.shape[-1] == 1, (
+ "Only support topk=1 when" " `apply_router_weight_on_input` is True"
+ )
+
+ return torch.ops.sglang.rocm_aiter_asm_moe_tkw1(
+ hidden_states,
+ w1,
+ w2,
+ topk_weights,
+ topk_ids,
+ fc1_scale=w1_scale,
+ fc2_scale=w2_scale,
+ fc1_smooth_scale=None,
+ fc2_smooth_scale=None,
+ a16=False,
+ per_tensor_quant_scale=None,
+ expert_mask=None,
+ activation_method=activation_method,
+ )
+ else:
+ assert False, "This should not be called."
diff --git a/python/sglang/srt/layers/moe/router.py b/python/sglang/srt/layers/moe/router.py
index d78437f7bfe..0138dcdad48 100644
--- a/python/sglang/srt/layers/moe/router.py
+++ b/python/sglang/srt/layers/moe/router.py
@@ -45,11 +45,14 @@ def fused_moe_router_kernel(
logits = tl.sum((w_router.to(tl.float32) * x[None, :].to(tl.float32)), axis=-1)
# logit softcap
- logits_scaled = logits / moe_softcapping
- exped = tl.exp(2 * logits_scaled)
- top = exped - 1
- bottom = exped + 1
- logits_softcapped = top / bottom * moe_softcapping
+ if moe_softcapping == 0:
+ logits_softcapped = logits
+ else:
+ logits_scaled = logits / moe_softcapping
+ exped = tl.exp(2 * logits_scaled)
+ top = exped - 1
+ bottom = exped + 1
+ logits_softcapped = top / bottom * moe_softcapping
# Add bias after softcapping
if is_correction_bias:
@@ -207,9 +210,12 @@ def fused_moe_router_large_bs_kernel(
b_ptrs += BLOCK_SIZE_K
# 4. logit softcap
- logits_scaled = acc / moe_softcapping
- exped = tl.exp(2 * logits_scaled)
- logits_softcapped = (exped - 1) / (exped + 1) * moe_softcapping
+ if moe_softcapping == 0:
+ logits_softcapped = acc
+ else:
+ logits_scaled = acc / moe_softcapping
+ exped = tl.exp(2 * logits_scaled)
+ logits_softcapped = (exped - 1) / (exped + 1) * moe_softcapping
# 5. top1
arange_block_size_n = tl.arange(0, BLOCK_SIZE_N)[None, :]
@@ -234,7 +240,7 @@ def fused_moe_router_large_bs_kernel(
# 7. handle topk == 2
if topk == 2:
- cond_top2 = (arange_block_size_n < num_experts) and (
+ cond_top2 = (arange_block_size_n < num_experts) & (
arange_block_size_n != top1[:, None]
)
top2 = tl.argmax(
diff --git a/python/sglang/srt/layers/moe/token_dispatcher/__init__.py b/python/sglang/srt/layers/moe/token_dispatcher/__init__.py
index 7802968ac8e..e1dbcdd447e 100644
--- a/python/sglang/srt/layers/moe/token_dispatcher/__init__.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/__init__.py
@@ -1,29 +1,41 @@
-from sglang.srt.layers.moe.token_dispatcher.base_dispatcher import (
+from sglang.srt.layers.moe.token_dispatcher.base import (
BaseDispatcher,
BaseDispatcherConfig,
+ CombineInput,
+ CombineInputChecker,
+ CombineInputFormat,
DispatchOutput,
DispatchOutputChecker,
DispatchOutputFormat,
)
from sglang.srt.layers.moe.token_dispatcher.deepep import (
- AscendDeepEPLLOutput,
DeepEPConfig,
DeepEPDispatcher,
+ DeepEPLLCombineInput,
DeepEPLLOutput,
+ DeepEPNormalCombineInput,
DeepEPNormalOutput,
)
-from sglang.srt.layers.moe.token_dispatcher.standard import StandardDispatchOutput
+from sglang.srt.layers.moe.token_dispatcher.standard import (
+ StandardCombineInput,
+ StandardDispatchOutput,
+)
__all__ = [
- "AscendDeepEPLLOutput",
"BaseDispatcher",
"BaseDispatcherConfig",
+ "CombineInput",
+ "CombineInputChecker",
+ "CombineInputFormat",
"DispatchOutput",
"DispatchOutputFormat",
"DispatchOutputChecker",
"StandardDispatchOutput",
+ "StandardCombineInput",
"DeepEPConfig",
"DeepEPDispatcher",
"DeepEPNormalOutput",
"DeepEPLLOutput",
+ "DeepEPLLCombineInput",
+ "DeepEPNormalCombineInput",
]
diff --git a/python/sglang/srt/layers/moe/token_dispatcher/base_dispatcher.py b/python/sglang/srt/layers/moe/token_dispatcher/base.py
similarity index 50%
rename from python/sglang/srt/layers/moe/token_dispatcher/base_dispatcher.py
rename to python/sglang/srt/layers/moe/token_dispatcher/base.py
index d5ff8cf7749..15586088682 100644
--- a/python/sglang/srt/layers/moe/token_dispatcher/base_dispatcher.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/base.py
@@ -1,18 +1,23 @@
from __future__ import annotations
from abc import ABC, abstractmethod
-from enum import Enum, auto
+from enum import Enum
from typing import TYPE_CHECKING, Protocol, TypeGuard, Union, runtime_checkable
import torch
if TYPE_CHECKING:
from sglang.srt.layers.moe.token_dispatcher import (
- AscendDeepEPLLOutput,
+ DeepEPLLCombineInput,
DeepEPLLOutput,
+ DeepEPNormalCombineInput,
DeepEPNormalOutput,
+ StandardCombineInput,
StandardDispatchOutput,
)
+ from sglang.srt.layers.moe.topk import TopKOutput
+
+# ------------------------------ Dispatch Output -------------------------------------
class DispatchOutputChecker:
@@ -41,19 +46,12 @@ def format_is_deepep(
) -> TypeGuard[Union[DeepEPNormalOutput, DeepEPLLOutput]]:
return dispatch_output.format.is_deepep()
- @staticmethod
- def format_is_ascent_ll(
- dispatch_output: DispatchOutput,
- ) -> TypeGuard[AscendDeepEPLLOutput]:
- return dispatch_output.format.is_ascent_ll()
-
class DispatchOutputFormat(Enum):
- STANDARD = auto()
- DEEPEP_NORMAL = auto()
- DEEPEP_LL = auto()
- ASCENT_LL = auto()
+ STANDARD = "standard"
+ DEEPEP_NORMAL = "deepep_normal"
+ DEEPEP_LL = "deepep_ll"
def is_standard(self) -> bool:
return self == DispatchOutputFormat.STANDARD
@@ -70,18 +68,68 @@ def is_deepep(self) -> bool:
DispatchOutputFormat.DEEPEP_LL,
]
- def is_ascent_ll(self) -> bool:
- return self == DispatchOutputFormat.ASCENT_LL
-
@runtime_checkable
class DispatchOutput(Protocol):
"""Protocol for dispatch outputs in different formats."""
+ # TODO: add hidden_states to the protocol
+
@property
def format(self) -> DispatchOutputFormat: ...
+# ------------------------------ Combine Input -------------------------------------
+
+
+class CombineInputChecker:
+ @staticmethod
+ def format_is_standard(
+ combine_input: CombineInput,
+ ) -> TypeGuard[StandardCombineInput]:
+ return combine_input.format == CombineInputFormat.STANDARD
+
+ @staticmethod
+ def format_is_deepep_normal(
+ combine_input: CombineInput,
+ ) -> TypeGuard[DeepEPNormalCombineInput]:
+ return combine_input.format == CombineInputFormat.DEEPEP_NORMAL
+
+ @staticmethod
+ def format_is_deepep_ll(
+ combine_input: CombineInput,
+ ) -> TypeGuard[DeepEPLLCombineInput]:
+ return combine_input.format == CombineInputFormat.DEEPEP_LL
+
+ @staticmethod
+ def format_is_deepep(
+ combine_input: CombineInput,
+ ) -> TypeGuard[Union[DeepEPNormalCombineInput, DeepEPLLCombineInput]]:
+ return combine_input.format in [
+ CombineInputFormat.DEEPEP_NORMAL,
+ CombineInputFormat.DEEPEP_LL,
+ ]
+
+
+class CombineInputFormat(Enum):
+ STANDARD = "standard"
+ DEEPEP_NORMAL = "deepep_normal"
+ DEEPEP_LL = "deepep_ll"
+
+
+@runtime_checkable
+class CombineInput(Protocol):
+ """Protocol for combine inputs in different formats."""
+
+ # TODO: add hidden_states to the protocol
+
+ @property
+ def format(self) -> CombineInputFormat: ...
+
+
+# ------------------------------ Base Dispatcher -------------------------------------
+
+
class BaseDispatcherConfig(ABC):
"""Base class for dispatcher configs."""
@@ -92,9 +140,11 @@ class BaseDispatcher(ABC):
"""Base class for dispatchers."""
@abstractmethod
- def dispatch(self, *args, **kwargs) -> DispatchOutput:
+ def dispatch(
+ self, hidden_states: torch.Tensor, topk_output: TopKOutput, **kwargs
+ ) -> DispatchOutput:
pass
@abstractmethod
- def combine(self, *args, **kwargs) -> torch.Tensor:
+ def combine(self, combine_input: CombineInput, **kwargs) -> torch.Tensor:
pass
diff --git a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
index 3e070d8145b..450cff0cb7c 100644
--- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
@@ -5,13 +5,15 @@
from typing import TYPE_CHECKING, List, NamedTuple, Optional, Tuple, Union
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
-from sglang.srt.layers.moe import DeepEPMode, get_deepep_config, is_tbo_enabled
-from sglang.srt.layers.moe.token_dispatcher.base_dispatcher import (
+from sglang.srt.layers.moe.token_dispatcher.base import (
BaseDispatcher,
BaseDispatcherConfig,
+ CombineInput,
+ CombineInputFormat,
DispatchOutput,
DispatchOutputFormat,
)
+from sglang.srt.layers.moe.utils import DeepEPMode, get_deepep_config, is_tbo_enabled
from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.utils import (
get_bool_env_var,
@@ -40,11 +42,6 @@
import torch
import torch.distributed as dist
-from sglang.srt.layers.moe.ep_moe.kernels import (
- deepep_permute_triton_kernel,
- deepep_post_reorder_triton_kernel,
- deepep_run_moe_deep_preprocess,
-)
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and is_hip()
@@ -56,6 +53,7 @@ class DeepEPNormalOutput(NamedTuple):
"""DeepEP normal dispatch output."""
hidden_states: torch.Tensor | Tuple[torch.Tensor, torch.Tensor]
+ # hidden_states_scale
topk_idx: torch.Tensor
topk_weights: torch.Tensor
num_recv_tokens_per_expert: List[int]
@@ -79,24 +77,32 @@ def format(self) -> DispatchOutputFormat:
return DispatchOutputFormat.DEEPEP_LL
-class AscendDeepEPLLOutput(NamedTuple):
- """AscendDeepEP low latency dispatch output."""
+assert isinstance(DeepEPNormalOutput, DispatchOutput)
+assert isinstance(DeepEPLLOutput, DispatchOutput)
- hidden_states_fp8: Tuple[torch.Tensor, torch.Tensor]
- topk_idx: torch.Tensor
- topk_weights: torch.Tensor
- masked_m: torch.Tensor
- seg_indptr: torch.Tensor
- expected_m: int
+
+class DeepEPNormalCombineInput(NamedTuple):
+ """DeepEP normal combine input."""
+
+ pass
@property
- def format(self) -> DispatchOutputFormat:
- return DispatchOutputFormat.ASCENT_LL
+ def format(self) -> CombineInputFormat:
+ return CombineInputFormat.DEEPEP_NORMAL
-assert isinstance(DeepEPNormalOutput, DispatchOutput)
-assert isinstance(DeepEPLLOutput, DispatchOutput)
-assert isinstance(AscendDeepEPLLOutput, DispatchOutput)
+class DeepEPLLCombineInput(NamedTuple):
+ """DeepEP low latency combine input."""
+
+ pass
+
+ @property
+ def format(self) -> CombineInputFormat:
+ return CombineInputFormat.DEEPEP_LL
+
+
+assert isinstance(DeepEPNormalCombineInput, CombineInput)
+assert isinstance(DeepEPLLCombineInput, CombineInput)
class DeepEPDispatchMode(IntEnum):
@@ -272,6 +278,9 @@ def __init__(
self.num_max_dispatch_tokens_per_rank = get_int_env_var(
"SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
)
+ # DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024
+ # and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it
+ assert self.num_max_dispatch_tokens_per_rank <= 1024
self.handle = None
@@ -409,7 +418,11 @@ def combine_a(
topk_idx: torch.Tensor,
topk_weights: torch.Tensor,
):
- if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM or _use_aiter:
+ from sglang.srt.layers.moe.ep_moe.kernels import (
+ deepep_post_reorder_triton_kernel,
+ )
+
+ if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM or _use_aiter or _is_npu:
output = hidden_states
else:
if hidden_states.shape[0] > 0:
@@ -523,23 +536,13 @@ def dispatch_b(
masked_m
)
- if _is_npu:
- deepep_output = AscendDeepEPLLOutput(
- hidden_states,
- topk_idx,
- topk_weights,
- masked_m,
- self.handle[1],
- expected_m,
- )
- else:
- deepep_output = DeepEPLLOutput(
- hidden_states,
- topk_idx,
- topk_weights,
- masked_m,
- expected_m,
- )
+ deepep_output = DeepEPLLOutput(
+ hidden_states,
+ topk_idx,
+ topk_weights,
+ masked_m,
+ expected_m,
+ )
return deepep_output
def _dispatch_core(
diff --git a/python/sglang/srt/layers/moe/token_dispatcher/standard.py b/python/sglang/srt/layers/moe/token_dispatcher/standard.py
index 3e09e0bf67a..f984104f605 100644
--- a/python/sglang/srt/layers/moe/token_dispatcher/standard.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/standard.py
@@ -1,19 +1,61 @@
from __future__ import annotations
-from typing import NamedTuple
+from typing import TYPE_CHECKING, NamedTuple
-from sglang.srt.layers.moe.token_dispatcher.base_dispatcher import (
+import torch
+
+from sglang.srt.layers.moe.token_dispatcher.base import (
+ BaseDispatcher,
+ CombineInput,
+ CombineInputFormat,
DispatchOutput,
DispatchOutputFormat,
)
+if TYPE_CHECKING:
+ from sglang.srt.layers.moe.topk import TopKOutput
+
class StandardDispatchOutput(NamedTuple):
"""Standard dispatch output."""
+ hidden_states: torch.Tensor
+ topk_output: TopKOutput
+
@property
def format(self) -> DispatchOutputFormat:
return DispatchOutputFormat.STANDARD
assert isinstance(StandardDispatchOutput, DispatchOutput)
+
+
+class StandardCombineInput(NamedTuple):
+ """Standard combine input."""
+
+ hidden_states: torch.Tensor
+
+ @property
+ def format(self) -> CombineInputFormat:
+ return CombineInputFormat.STANDARD
+
+
+assert isinstance(StandardCombineInput, CombineInput)
+
+
+class StandardDispatcher(BaseDispatcher):
+
+ def dispatch(
+ self, hidden_states: torch.Tensor, topk_output: TopKOutput
+ ) -> DispatchOutput:
+ return StandardDispatchOutput(
+ hidden_states=hidden_states, topk_output=topk_output
+ )
+
+ def combine(self, combine_input: CombineInput) -> torch.Tensor:
+ if isinstance(combine_input, StandardCombineInput):
+ return combine_input.hidden_states
+ else:
+ # TODO: this branch should be removed in the future
+ assert isinstance(combine_input, torch.Tensor)
+ return combine_input
diff --git a/python/sglang/srt/layers/moe/topk.py b/python/sglang/srt/layers/moe/topk.py
index 3b939bca855..b8f73473c44 100644
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -85,8 +85,8 @@
class TopKConfig:
top_k: int
use_grouped_topk: bool = False
- topk_group: int = 0
- num_expert_group: int = 0
+ topk_group: Optional[int] = None
+ num_expert_group: Optional[int] = None
renormalize: bool = True
num_fused_shared_experts: int = 0
custom_routing_function: Optional[Callable] = None
@@ -189,14 +189,16 @@ def __init__(
top_k: int,
*,
use_grouped_topk: bool = False,
- topk_group: int = 0,
- num_expert_group: int = 0,
+ topk_group: Optional[int] = None,
+ num_expert_group: Optional[int] = None,
renormalize: bool = True,
num_fused_shared_experts: int = 0,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
correction_bias: Optional[torch.Tensor] = None,
routed_scaling_factor: Optional[float] = None,
+ apply_routed_scaling_factor_on_output: Optional[bool] = False,
+ force_topk: bool = False,
):
# NOTE: scoring_func is not used for now, but we keep it for future use
# see https://github.com/sgl-project/sglang/pull/4505 for more details
@@ -215,9 +217,11 @@ def __init__(
custom_routing_function=custom_routing_function,
correction_bias=correction_bias,
routed_scaling_factor=routed_scaling_factor,
+ apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
)
self.use_triton_kernels = get_moe_runner_backend().is_triton_kernel()
+ self.force_topk = force_topk
def forward_native(
self,
@@ -252,7 +256,7 @@ def forward_cuda(
sm_first=not self.topk_config.renormalize,
)
return TritonKernelTopKOutput(routing_data, gather_idx, scatter_idx)
- elif (
+ elif not self.force_topk and (
should_use_flashinfer_trtllm_moe()
or get_moe_runner_backend().is_flashinfer_mxfp4()
):
@@ -300,12 +304,12 @@ def forward_npu(
global_num_experts = router_logits.shape[-1]
# NOTE: now npu_moe_gating_top_k can only support `group_count=256` pattern
- if global_num_experts == 256 and self.topk_config.renormalize is False:
+ if global_num_experts == 256:
routed_scaling_factor = self.topk_config.routed_scaling_factor or 1
router_logits = router_logits.to(torch.float32)
- return torch_npu.npu_moe_gating_top_k(
+ topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k(
router_logits,
k=self.topk_config.top_k,
bias=self.topk_config.correction_bias.to(torch.float32),
@@ -317,6 +321,24 @@ def forward_npu(
routed_scaling_factor=routed_scaling_factor,
eps=float(1e-20),
)
+
+ if self.topk_config.renormalize:
+ topk_weights_sum = (
+ topk_weights.sum(dim=-1, keepdim=True)
+ if self.topk_config.num_fused_shared_experts == 0
+ else topk_weights[:, :-1].sum(dim=-1, keepdim=True)
+ )
+ topk_weights = topk_weights / topk_weights_sum
+
+ if expert_location_dispatch_info is not None:
+ topk_ids = topk_ids_logical_to_physical(
+ topk_ids, expert_location_dispatch_info
+ )
+ get_global_expert_distribution_recorder().on_select_experts(
+ topk_ids=topk_ids
+ )
+
+ return StandardTopKOutput(topk_weights, topk_ids, _)
else:
self.topk_config.torch_native = True
return select_experts(
@@ -343,17 +365,28 @@ def fused_topk_torch_native(
gating_output: torch.Tensor,
topk: int,
renormalize: bool,
+ correction_bias: torch.Tensor = None,
):
- assert (
- hidden_states.shape[0] == gating_output.shape[0]
- ), f"Number of tokens mismatch, {hidden_states.shape=} vs {gating_output.shape=}"
- M, _ = hidden_states.shape
- topk_weights = torch.empty(
- M, topk, dtype=torch.float32, device=hidden_states.device
- )
- topk_ids = torch.empty(M, topk, dtype=torch.int32, device=hidden_states.device)
- topk_weights = F.softmax(gating_output.float(), dim=-1)
- topk_weights, topk_ids = torch.topk(topk_weights, topk, dim=-1)
+ if correction_bias is not None:
+ n_routed_experts = gating_output.shape[-1]
+ scores = gating_output.softmax(dim=-1)
+ scores_for_choice = scores.view(
+ -1, n_routed_experts
+ ) + correction_bias.unsqueeze(0)
+ topk_ids = torch.topk(scores_for_choice, k=topk, dim=-1, sorted=False)[1]
+ topk_weights = scores.gather(1, topk_ids)
+ else:
+ assert (
+ hidden_states.shape[0] == gating_output.shape[0]
+ ), f"Number of tokens mismatch, {hidden_states.shape=} vs {gating_output.shape=}"
+ M, _ = hidden_states.shape
+ topk_weights = torch.empty(
+ M, topk, dtype=torch.float32, device=hidden_states.device
+ )
+ topk_ids = torch.empty(M, topk, dtype=torch.int32, device=hidden_states.device)
+ topk_weights = F.softmax(gating_output.float(), dim=-1)
+ topk_weights, topk_ids = torch.topk(topk_weights, topk, dim=-1)
+
if renormalize:
topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)
return topk_weights, topk_ids
@@ -366,6 +399,7 @@ def fused_topk_cpu(
renormalize: bool,
num_token_non_padded: Optional[torch.Tensor] = None,
expert_location_dispatch_info: Optional[ExpertLocationDispatchInfo] = None,
+ correction_bias: torch.Tensor = None,
):
topk_weights, topk_ids = torch.ops.sgl_kernel.topk_softmax_cpu(
hidden_states=hidden_states,
@@ -427,12 +461,13 @@ def grouped_topk_gpu(
gating_output: torch.Tensor,
topk: int,
renormalize: bool,
- num_expert_group: int = 0,
- topk_group: int = 0,
+ num_expert_group: Optional[int] = None,
+ topk_group: Optional[int] = None,
num_fused_shared_experts: int = 0,
routed_scaling_factor: Optional[float] = None,
num_token_non_padded: Optional[torch.Tensor] = None,
expert_location_dispatch_info: Optional[ExpertLocationDispatchInfo] = None,
+ apply_routed_scaling_factor_on_output: Optional[bool] = False,
):
assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch"
@@ -480,6 +515,8 @@ def grouped_topk_gpu(
else topk_weights[:, :-1].sum(dim=-1, keepdim=True)
)
topk_weights = topk_weights / topk_weights_sum
+ if apply_routed_scaling_factor_on_output:
+ topk_weights *= routed_scaling_factor
topk_weights, topk_ids = topk_weights.to(torch.float32), topk_ids.to(torch.int32)
topk_ids = topk_ids_logical_to_physical(topk_ids, expert_location_dispatch_info)
@@ -492,8 +529,8 @@ def grouped_topk_cpu(
gating_output: torch.Tensor,
topk: int,
renormalize: bool,
- num_expert_group: int = 0,
- topk_group: int = 0,
+ num_expert_group: Optional[int] = None,
+ topk_group: Optional[int] = None,
num_fused_shared_experts: int = 0,
routed_scaling_factor: Optional[float] = None,
num_token_non_padded: Optional[torch.Tensor] = None,
@@ -522,12 +559,13 @@ def biased_grouped_topk_impl(
correction_bias: torch.Tensor,
topk: int,
renormalize: bool,
- num_expert_group: int = 0,
- topk_group: int = 0,
+ num_expert_group: Optional[int] = None,
+ topk_group: Optional[int] = None,
num_fused_shared_experts: int = 0,
routed_scaling_factor: Optional[float] = None,
num_token_non_padded: Optional[torch.Tensor] = None,
expert_location_dispatch_info: Optional[ExpertLocationDispatchInfo] = None,
+ apply_routed_scaling_factor_on_output: Optional[bool] = False,
):
assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch"
@@ -579,6 +617,8 @@ def biased_grouped_topk_impl(
else topk_weights[:, :-1].sum(dim=-1, keepdim=True)
)
topk_weights = topk_weights / topk_weights_sum
+ if apply_routed_scaling_factor_on_output:
+ topk_weights *= routed_scaling_factor
topk_weights, topk_ids = topk_weights.to(torch.float32), topk_ids.to(torch.int32)
topk_ids = topk_ids_logical_to_physical(topk_ids, expert_location_dispatch_info)
@@ -615,12 +655,13 @@ def biased_grouped_topk_gpu(
correction_bias: torch.Tensor,
topk: int,
renormalize: bool,
- num_expert_group: int = 0,
- topk_group: int = 0,
+ num_expert_group: Optional[int] = None,
+ topk_group: Optional[int] = None,
num_fused_shared_experts: int = 0,
routed_scaling_factor: Optional[float] = None,
num_token_non_padded: Optional[torch.Tensor] = None,
expert_location_dispatch_info: Optional[ExpertLocationDispatchInfo] = None,
+ apply_routed_scaling_factor_on_output: Optional[bool] = False,
):
assert (
routed_scaling_factor is not None
@@ -640,6 +681,7 @@ def biased_grouped_topk_gpu(
topk,
num_fused_shared_experts,
routed_scaling_factor,
+ apply_routed_scaling_factor_on_output,
)
# TODO merge into kernel
if (expert_location_dispatch_info is not None) or (
@@ -650,6 +692,7 @@ def biased_grouped_topk_gpu(
)
return topk_weights, topk_ids
elif _use_aiter:
+ assert not apply_routed_scaling_factor_on_output, "Not implemented"
token = gating_output.shape[0]
device = gating_output.device
assert (
@@ -681,6 +724,7 @@ def biased_grouped_topk_gpu(
routed_scaling_factor=routed_scaling_factor,
num_token_non_padded=num_token_non_padded,
expert_location_dispatch_info=expert_location_dispatch_info,
+ apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
)
@@ -690,15 +734,17 @@ def biased_grouped_topk_cpu(
correction_bias: torch.Tensor,
topk: int,
renormalize: bool,
- num_expert_group: int = 0,
- topk_group: int = 0,
+ num_expert_group: Optional[int] = None,
+ topk_group: Optional[int] = None,
compiled: bool = True,
num_fused_shared_experts: int = 0,
routed_scaling_factor: Optional[float] = None,
num_token_non_padded: Optional[torch.Tensor] = None,
expert_location_dispatch_info: Optional[ExpertLocationDispatchInfo] = None,
+ apply_routed_scaling_factor_on_output: Optional[bool] = False,
):
assert expert_location_dispatch_info is None
+ assert not apply_routed_scaling_factor_on_output, "Not implemented"
return torch.ops.sgl_kernel.biased_grouped_topk_cpu(
hidden_states,
gating_output,
@@ -743,6 +789,9 @@ def select_experts(
correction_bias = topk_config.correction_bias
torch_native = topk_config.torch_native
routed_scaling_factor = topk_config.routed_scaling_factor
+ apply_routed_scaling_factor_on_output = (
+ topk_config.apply_routed_scaling_factor_on_output
+ )
router_logits, correction_bias = (
expert_location_dispatch.transform_select_experts_inputs(
@@ -768,6 +817,7 @@ def select_experts(
routed_scaling_factor=routed_scaling_factor,
num_token_non_padded=num_token_non_padded,
expert_location_dispatch_info=expert_location_dispatch_info,
+ apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
)
else:
topk_weights, topk_ids = biased_grouped_topk(
@@ -782,19 +832,23 @@ def select_experts(
routed_scaling_factor=routed_scaling_factor,
num_token_non_padded=num_token_non_padded,
expert_location_dispatch_info=expert_location_dispatch_info,
+ apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
)
elif torch_native and custom_routing_function is None:
assert (
num_token_non_padded is None
), "num_token_non_padded is not yet supported in fused_topk_native"
assert expert_location_dispatch_info is None
+ assert not apply_routed_scaling_factor_on_output, "Not implemented"
topk_weights, topk_ids = fused_topk_native(
hidden_states=hidden_states,
gating_output=router_logits,
topk=top_k,
renormalize=renormalize,
+ correction_bias=correction_bias,
)
elif custom_routing_function is None:
+ assert not apply_routed_scaling_factor_on_output, "Not implemented"
# Qwen3MOE uses fused_topk
topk_weights, topk_ids = fused_topk(
hidden_states=hidden_states,
@@ -809,6 +863,7 @@ def select_experts(
num_token_non_padded is None
), "num_token_non_padded is not yet supported in custom_routing_function"
assert expert_location_dispatch_info is None
+ assert not apply_routed_scaling_factor_on_output, "Not implemented"
topk_weights, topk_ids = custom_routing_function(
hidden_states=hidden_states,
gating_output=router_logits,
diff --git a/python/sglang/srt/layers/moe/utils.py b/python/sglang/srt/layers/moe/utils.py
index 2fbab220fcb..b4e4ec4249b 100644
--- a/python/sglang/srt/layers/moe/utils.py
+++ b/python/sglang/srt/layers/moe/utils.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import importlib.util
+import logging
from enum import Enum
from functools import lru_cache
from typing import TYPE_CHECKING, Optional
@@ -12,11 +13,12 @@
get_attention_dp_size,
is_dp_attention_enabled,
)
-from sglang.srt.utils import logger
if TYPE_CHECKING:
from sglang.srt.server_args import ServerArgs
+logger = logging.getLogger(__name__)
+
class MoeA2ABackend(Enum):
@@ -131,7 +133,7 @@ def get_moe_a2a_backend() -> MoeA2ABackend:
global MOE_A2A_BACKEND
if MOE_A2A_BACKEND is None:
logger.warning("MOE_A2A_BACKEND is not initialized, using default backend")
- MOE_A2A_BACKEND = MoeA2ABackend(None)
+ MOE_A2A_BACKEND = MoeA2ABackend.NONE
return MOE_A2A_BACKEND
@@ -139,7 +141,7 @@ def get_moe_runner_backend() -> MoeRunnerBackend:
global MOE_RUNNER_BACKEND
if MOE_RUNNER_BACKEND is None:
logger.warning("MOE_RUNNER_BACKEND is not initialized, using triton backend")
- MOE_RUNNER_BACKEND = MoeRunnerBackend("triton")
+ MOE_RUNNER_BACKEND = MoeRunnerBackend.AUTO
return MOE_RUNNER_BACKEND
@@ -147,7 +149,7 @@ def get_deepep_mode() -> DeepEPMode:
global DEEPEP_MODE
if DEEPEP_MODE is None:
logger.warning("DEEPEP_MODE is not initialized, using auto mode")
- DEEPEP_MODE = DeepEPMode("auto")
+ DEEPEP_MODE = DeepEPMode.AUTO
return DEEPEP_MODE
@@ -162,7 +164,6 @@ def get_deepep_config() -> str:
def is_tbo_enabled() -> bool:
global IS_TBO_ENABLED
if IS_TBO_ENABLED is None:
- logger.warning("IS_TBO_ENABLED is not initialized, using False")
IS_TBO_ENABLED = False
return IS_TBO_ENABLED
diff --git a/python/sglang/srt/layers/quantization/__init__.py b/python/sglang/srt/layers/quantization/__init__.py
index d001bb646c0..ff3c2b14839 100644
--- a/python/sglang/srt/layers/quantization/__init__.py
+++ b/python/sglang/srt/layers/quantization/__init__.py
@@ -16,7 +16,6 @@
)
from vllm.model_executor.layers.quantization.deepspeedfp import DeepSpeedFPConfig
from vllm.model_executor.layers.quantization.experts_int8 import ExpertsInt8Config
- from vllm.model_executor.layers.quantization.fbgemm_fp8 import FBGEMMFp8Config
from vllm.model_executor.layers.quantization.gguf import GGUFConfig
from vllm.model_executor.layers.quantization.gptq_marlin_24 import (
GPTQMarlin24Config,
@@ -37,9 +36,9 @@ def override_quantization_method(self, *args, **kwargs):
AQLMConfig = BitsAndBytesConfig = CompressedTensorsConfig = DeepSpeedFPConfig = (
ExpertsInt8Config
- ) = FBGEMMFp8Config = GGUFConfig = GPTQMarlin24Config = MarlinConfig = QQQConfig = (
- Int8TpuConfig
- ) = DummyConfig
+ ) = GGUFConfig = GPTQMarlin24Config = MarlinConfig = QQQConfig = Int8TpuConfig = (
+ DummyConfig
+ )
from sglang.srt.layers.quantization.awq import AWQConfig, AWQMarlinConfig
@@ -49,6 +48,7 @@ def override_quantization_method(self, *args, **kwargs):
CompressedTensorsConfig,
)
from sglang.srt.layers.quantization.fp8 import Fp8Config
+from sglang.srt.layers.quantization.fpgemm_fp8 import FBGEMMFp8Config
from sglang.srt.layers.quantization.gptq import GPTQConfig, GPTQMarlinConfig
from sglang.srt.layers.quantization.modelopt_quant import (
ModelOptFp4Config,
@@ -85,6 +85,7 @@ def override_quantization_method(self, *args, **kwargs):
"qoq": QoQConfig,
"w4afp8": W4AFp8Config,
"petit_nvfp4": PetitNvFp4Config,
+ "fbgemm_fp8": FBGEMMFp8Config,
}
@@ -109,7 +110,6 @@ def override_quantization_method(self, *args, **kwargs):
"aqlm": AQLMConfig,
"deepspeedfp": DeepSpeedFPConfig,
"tpu_int8": Int8TpuConfig,
- "fbgemm_fp8": FBGEMMFp8Config,
"marlin": MarlinConfig,
"gguf": GGUFConfig,
"gptq_marlin_24": GPTQMarlin24Config,
diff --git a/python/sglang/srt/layers/quantization/awq.py b/python/sglang/srt/layers/quantization/awq.py
index 19deb7dd12e..9cba60c2b53 100644
--- a/python/sglang/srt/layers/quantization/awq.py
+++ b/python/sglang/srt/layers/quantization/awq.py
@@ -34,7 +34,10 @@
if TYPE_CHECKING:
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import StandardTopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ StandardDispatchOutput,
+ CombineInput,
+ )
from sglang.srt.utils import is_cuda, is_hip
@@ -736,24 +739,32 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
)
replace_parameter(layer, "w2_qzeros", marlin_w2_zp)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: StandardTopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
assert (
- moe_runner_config.activation == "silu"
+ self.moe_runner_config.activation == "silu"
), "Only SiLU activation is supported."
# The input must currently be float16
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
orig_dtype = x.dtype
x = x.half()
topk_weights, topk_ids, router_logits = topk_output
- return fused_marlin_moe(
+ output = fused_marlin_moe(
x,
layer.w13_qweight,
layer.w2_qweight,
@@ -768,3 +779,4 @@ def apply(
w2_zeros=layer.w2_qzeros,
num_bits=self.quant_config.weight_bits,
).to(orig_dtype)
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/base_config.py b/python/sglang/srt/layers/quantization/base_config.py
index ec2b4edb107..4a5b7905eee 100644
--- a/python/sglang/srt/layers/quantization/base_config.py
+++ b/python/sglang/srt/layers/quantization/base_config.py
@@ -3,6 +3,7 @@
import inspect
from abc import ABC, abstractmethod
+from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
import torch
@@ -10,7 +11,7 @@
if TYPE_CHECKING:
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import CombineInput, DispatchOutput
class QuantizeMethodBase(ABC):
@@ -89,20 +90,24 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
raise NotImplementedError
+ @abstractmethod
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ raise NotImplementedError
+
@abstractmethod
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: DispatchOutput,
+ ) -> CombineInput:
raise NotImplementedError
diff --git a/python/sglang/srt/layers/quantization/blockwise_int8.py b/python/sglang/srt/layers/quantization/blockwise_int8.py
index a5966c4d59c..60d4e3929b0 100644
--- a/python/sglang/srt/layers/quantization/blockwise_int8.py
+++ b/python/sglang/srt/layers/quantization/blockwise_int8.py
@@ -9,6 +9,8 @@
from torch.nn import Module
from sglang.srt.distributed import get_tensor_model_parallel_world_size
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.parameter import BlockQuantScaleParameter, ModelWeightParameter
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
@@ -22,8 +24,10 @@
from sglang.srt.utils import set_weight_attrs
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
ACTIVATION_SCHEMES = ["static", "dynamic"]
@@ -257,7 +261,7 @@ def create_weights(
layer: Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
@@ -273,25 +277,28 @@ def create_weights(
)
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
# Required by column parallel or enabling merged weights
- if intermediate_size % block_n != 0:
+ if intermediate_size_per_partition % block_n != 0:
raise ValueError(
f"The output_size of gate's and up's weight = "
- f"{intermediate_size} is not divisible by "
+ f"{intermediate_size_per_partition} is not divisible by "
f"weight quantization block_n = {block_n}."
)
if tp_size > 1:
# Required by row parallel
- if intermediate_size % block_k != 0:
+ if intermediate_size_per_partition % block_k != 0:
raise ValueError(
f"The input_size of down's weight = "
- f"{intermediate_size} is not divisible by "
+ f"{intermediate_size_per_partition} is not divisible by "
f"weight quantization block_k = {block_k}."
)
# WEIGHTS
w13_weight = torch.nn.Parameter(
torch.empty(
- num_experts, 2 * intermediate_size, hidden_size, dtype=params_dtype
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ hidden_size,
+ dtype=params_dtype,
),
requires_grad=False,
)
@@ -300,7 +307,10 @@ def create_weights(
w2_weight = torch.nn.Parameter(
torch.empty(
- num_experts, hidden_size, intermediate_size, dtype=params_dtype
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition,
+ dtype=params_dtype,
),
requires_grad=False,
)
@@ -311,7 +321,7 @@ def create_weights(
w13_weight_scale = torch.nn.Parameter(
torch.ones(
num_experts,
- 2 * ((intermediate_size + block_n - 1) // block_n),
+ 2 * ((intermediate_size_per_partition + block_n - 1) // block_n),
(hidden_size + block_k - 1) // block_k,
dtype=torch.float32,
),
@@ -321,7 +331,7 @@ def create_weights(
torch.ones(
num_experts,
(hidden_size + block_n - 1) // block_n,
- (intermediate_size + block_k - 1) // block_k,
+ (intermediate_size_per_partition + block_k - 1) // block_k,
dtype=torch.float32,
),
requires_grad=False,
@@ -344,26 +354,27 @@ def process_weights_after_loading(self, layer: Module) -> None:
# Block quant doesn't need to process weights after loading
return
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
-
- # Expert fusion with INT8 quantization
- return fused_experts(
- x,
- layer.w13_weight,
- layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
use_int8_w8a8=True,
- w1_scale=(layer.w13_weight_scale_inv),
- w2_scale=(layer.w2_weight_scale_inv),
- a1_scale=layer.w13_input_scale,
+ w13_scale=layer.w13_weight_scale_inv,
+ w2_scale=layer.w2_weight_scale_inv,
+ a13_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
block_shape=self.quant_config.weight_block_size,
)
+
+ return self.runner.run(dispatch_output, quant_info)
diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index c1051510736..e2ff25e6868 100644
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -11,6 +11,8 @@
from compressed_tensors import CompressionFormat
from compressed_tensors.quantization import QuantizationStrategy
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz, scaled_fp8_quant
from sglang.srt.layers.quantization.fp8_utils import normalize_e4m3fn_to_e4m3fnuz
@@ -19,16 +21,32 @@
per_tensor_dequantize,
replace_parameter,
)
-from sglang.srt.utils import is_cpu, is_cuda, is_hip, is_npu, set_weight_attrs
+from sglang.srt.utils import (
+ get_bool_env_var,
+ is_cpu,
+ is_cuda,
+ is_hip,
+ is_npu,
+ set_weight_attrs,
+)
if TYPE_CHECKING:
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
from sglang.srt.layers.quantization.compressed_tensors.compressed_tensors import (
CompressedTensorsConfig,
)
+_is_hip = is_hip()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
+
+if _use_aiter:
+ from aiter.ops.shuffle import shuffle_weight
+
+ from sglang.srt.layers.moe.rocm_moe_utils import rocm_fused_experts_tkw1
try:
import vllm
@@ -265,29 +283,75 @@ def process_weights_after_loading(self, layer: FusedMoE) -> None:
max_w13_scales, requires_grad=False
)
+ if _use_aiter:
+ with torch.no_grad():
+ # Pre-shuffle weights
+ layer.w13_weight = torch.nn.Parameter(
+ shuffle_weight(layer.w13_weight.data, (16, 16)),
+ requires_grad=False,
+ )
+ torch.cuda.empty_cache()
+ layer.w2_weight = torch.nn.Parameter(
+ shuffle_weight(layer.w2_weight.data, (16, 16)),
+ requires_grad=False,
+ )
+ torch.cuda.empty_cache()
+
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_triton import fused_experts
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
- return fused_experts(
- x,
- layer.w13_weight,
- layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
- use_fp8_w8a8=True,
- per_channel_quant=self.weight_quant.strategy
- == QuantizationStrategy.CHANNEL,
- w1_scale=layer.w13_weight_scale,
- w2_scale=layer.w2_weight_scale,
- a1_scale=layer.w13_input_scale,
- a2_scale=layer.w2_input_scale,
- )
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
+ moe_runner_config = self.moe_runner_config
+
+ if (
+ _use_aiter
+ and self.weight_quant.strategy == QuantizationStrategy.CHANNEL
+ and moe_runner_config.apply_router_weight_on_input
+ ):
+ topk_weights, topk_ids, _ = topk_output
+ output = rocm_fused_experts_tkw1(
+ hidden_states=x,
+ w1=layer.w13_weight,
+ w2=layer.w2_weight,
+ topk_weights=topk_weights,
+ topk_ids=topk_ids,
+ activation=moe_runner_config.activation,
+ apply_router_weight_on_input=moe_runner_config.apply_router_weight_on_input,
+ use_fp8_w8a8=True,
+ per_channel_quant=self.weight_quant.strategy
+ == QuantizationStrategy.CHANNEL,
+ w1_scale=layer.w13_weight_scale,
+ w2_scale=layer.w2_weight_scale,
+ a1_scale=layer.w13_input_scale,
+ a2_scale=layer.w2_input_scale,
+ )
+ return StandardCombineInput(hidden_states=output)
+ else:
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
+ use_fp8_w8a8=True,
+ per_channel_quant=self.weight_quant.strategy
+ == QuantizationStrategy.CHANNEL,
+ w13_scale=layer.w13_weight_scale,
+ w2_scale=layer.w2_weight_scale,
+ a13_scale=layer.w13_input_scale,
+ a2_scale=layer.w2_input_scale,
+ )
+ return self.runner.run(dispatch_output, quant_info)
class CompressedTensorsWNA16MoEMethod(CompressedTensorsMoEMethod):
@@ -329,8 +393,6 @@ def create_weights(
params_dtype == torch.float16
), "float16 is required for MoE compressed models. Set dtype=torch.float16" # noqa: E501
- intermediate_size_full = extra_weight_attrs.pop("intermediate_size_full")
-
# Will transpose the loaded weight along the
# intermediate and hidden dim sizes. Will
# shard for TP along the transposed dims
@@ -364,13 +426,13 @@ def create_weights(
# In the case where we have actorder/g_idx,
# we do not partition the w2 scales
load_full_w2 = self.actorder and self.group_size != -1
- w2_scales_size = (
- intermediate_size_full if load_full_w2 else intermediate_size_per_partition
- )
- self.is_k_full = (not self.actorder) or (
- intermediate_size_per_partition == intermediate_size_full
- )
+ if load_full_w2:
+ w2_scales_size = intermediate_size_per_partition * layer.moe_tp_size
+ else:
+ w2_scales_size = intermediate_size_per_partition
+
+ self.is_k_full = (not self.actorder) or layer.moe_tp_size == 1
if self.strategy == "channel":
num_groups_w2 = num_groups_w13 = 1
@@ -589,21 +651,29 @@ def marlin_moe_permute_scales(
)
replace_tensor("w2_weight_scale", marlin_w2_scales)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
assert (
- moe_runner_config.activation == "silu"
+ self.moe_runner_config.activation == "silu"
), "Only SiLU activation is supported."
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
topk_weights, topk_ids, router_logits = topk_output
- return torch.ops.vllm.fused_marlin_moe(
+ output = torch.ops.vllm.fused_marlin_moe(
x,
layer.w13_weight_packed,
layer.w2_weight_packed,
@@ -619,3 +689,4 @@ def apply(
num_bits=self.num_bits,
is_k_full=self.is_k_full,
)
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
index 210a24f6946..a157ebc3e94 100644
--- a/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
@@ -21,9 +21,15 @@
normalize_e4m3fn_to_e4m3fnuz,
)
from sglang.srt.layers.quantization.utils import requantize_with_max_scale
+from sglang.srt.utils import get_bool_env_var, is_hip
__all__ = ["CompressedTensorsW8A8Fp8"]
+_is_hip = is_hip()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
+if _use_aiter:
+ from aiter.ops.shuffle import shuffle_weight
+
class CompressedTensorsW8A8Fp8(CompressedTensorsScheme):
@@ -76,7 +82,13 @@ def process_weights_after_loading(self, layer) -> None:
else:
weight_scale = layer.weight_scale.data
- layer.weight = Parameter(weight.t(), requires_grad=False)
+ if _use_aiter:
+ layer.weight = Parameter(
+ shuffle_weight(weight, (16, 16)), requires_grad=False
+ )
+ else:
+ layer.weight = Parameter(weight.t(), requires_grad=False)
+
# required by torch.compile to be torch.nn.Parameter
layer.weight_scale = Parameter(weight_scale, requires_grad=False)
diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py
index c3043f38917..e374759c433 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py
@@ -1,26 +1,22 @@
import logging
import os
from contextlib import contextmanager
-from dataclasses import dataclass
from enum import IntEnum, auto
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import Dict, List, Tuple
-from tqdm.contrib.concurrent import thread_map
+import torch
+from tqdm import tqdm
from sglang.srt.layers.quantization.deep_gemm_wrapper.configurer import (
- DEEPGEMM_BLACKWELL,
ENABLE_JIT_DEEPGEMM,
)
from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import get_bool_env_var, get_int_env_var
+from sglang.srt.utils import ceil_div, get_bool_env_var, get_int_env_var
logger = logging.getLogger(__name__)
-if ENABLE_JIT_DEEPGEMM and not DEEPGEMM_BLACKWELL:
- from deep_gemm import get_num_sms
- from deep_gemm.jit import build
- from deep_gemm.jit_kernels.gemm import get_best_configs
- from deep_gemm.jit_kernels.runtime import FP8GemmRuntime, GemmType
+if ENABLE_JIT_DEEPGEMM:
+ import deep_gemm
_BUILTIN_M_LIST = list(range(1, 1024 * 16 + 1))
@@ -40,19 +36,7 @@
# Refer to https://github.com/deepseek-ai/DeepGEMM/commit/d75b218b7b8f4a5dd5406ac87905039ead3ae42f
# NVRTC may have performance loss with some cases.
# And NVCC JIT speed is also 9x faster in the ref commit
-_USE_NVRTC_DEFAULT = "0"
-if ENABLE_JIT_DEEPGEMM:
- try:
- from deep_gemm.jit.compiler import get_nvcc_compiler
-
- get_nvcc_compiler()
- except:
- logger.warning(
- "NVCC Compiler not found, use NVRTC for DeepGEMM JIT "
- "and may have performance loss with some cases."
- )
- _USE_NVRTC_DEFAULT = "1"
-os.environ["DG_JIT_USE_NVRTC"] = os.getenv("SGL_DG_USE_NVRTC", _USE_NVRTC_DEFAULT)
+os.environ["DG_JIT_USE_NVRTC"] = os.getenv("SGL_DG_USE_NVRTC", "0")
def update_deep_gemm_config(gpu_id: int, server_args: ServerArgs):
@@ -75,7 +59,7 @@ def update_deep_gemm_config(gpu_id: int, server_args: ServerArgs):
# Default each rank will try compile all Ms to
# load all symbols at the launch stages.
# Avoid loading symbols at the serving stages.
- _DO_COMPILE_ALL = _IS_FIRST_RANK_ON_NODE or not _IN_PRECOMPILE_STAGE
+ _DO_COMPILE_ALL = _IS_FIRST_RANK_ON_NODE
class DeepGemmKernelType(IntEnum):
@@ -84,185 +68,15 @@ class DeepGemmKernelType(IntEnum):
GEMM_NT_F8F8BF16 = auto()
-@dataclass
-class DeepGemmKernelHelper:
- name: str
- compile_func: Callable[
- [
- int,
- int,
- int,
- Tuple[int, int, int, int, Tuple[int, bool], Tuple[int, int, int]],
- ],
- None,
- ]
- configure_func: Callable[
- [int, int, int, int, int],
- Tuple[int, int, int, int, Tuple[int, bool], Tuple[int, int, int]],
- ]
-
-
_INITIALIZATION_DICT: Dict[Tuple[DeepGemmKernelType, int, int, int], bool] = dict()
-# TODO improve naming
-def _compile_warning_1():
- if not _IN_PRECOMPILE_STAGE and _IS_FIRST_RANK_ON_NODE:
- logger.warning(
- "Entering DeepGEMM JIT Pre-Compile session. "
- "It may takes a long time (typically 10-20 mins) "
- "if you have not run `sglang.compile_deep_gemm`. "
- "It is recommended to run `sglang.compile_deep_gemm` with same args as `sglang.launch_server`"
- " for pre-compilation to reduce the overhead if you have not run it before. "
- "For example: "
- "`python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code`"
- )
-
-
-# TODO improve naming
-def _compile_warning_2():
- logger.warning(
- "Entering DeepGEMM JIT Single Kernel Compile session. "
- "And it will makes inference throughput becomes flaky. "
- "Please run `sglang.compile_deep_gemm` with same args as `sglang.launch_server`"
- " for pre-compilation to solve this issue. "
- "For example: "
- "`python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code`"
- )
-
-
-def _compile_grouped_gemm_nt_f8f8bf16_masked_one(
- n: int,
- k: int,
- num_groups: int,
- config: Tuple[int, int, int, int, Tuple[int, bool], Tuple[int, int, int]],
-) -> None:
- num_sms, block_m, block_n, num_stages, tma_multicast_config, smem_config = config
- block_k = 128
- num_tma_threads = 128
- num_math_threads_per_group = 128
-
- kwargs = {
- "GEMM_TYPE": GemmType.GroupedMasked,
- "NUM_TMA_THREADS": num_tma_threads,
- "NUM_MATH_THREADS_PER_GROUP": num_math_threads_per_group,
- "N": n,
- "K": k,
- "NUM_GROUPS": num_groups,
- "BLOCK_M": block_m,
- "BLOCK_N": block_n,
- "BLOCK_K": block_k,
- "SWIZZLE_D_MODE": smem_config[1],
- "BLOCK_N_PADDING": smem_config[2],
- "NUM_STAGES": num_stages,
- "NUM_TMA_MULTICAST": tma_multicast_config[0],
- "IS_TMA_MULTICAST_ON_A": tma_multicast_config[1],
- "NUM_SMS": num_sms,
- "SMEM_SIZE": smem_config[0],
- }
-
- code = FP8GemmRuntime.generate(kwargs)
- _ = build("m_grouped_gemm_fp8_fp8_bf16_nt", code, FP8GemmRuntime, kwargs)
-
-
-def _compile_grouped_gemm_nt_f8f8bf16_contig_one(
- n: int,
- k: int,
- num_groups: int,
- config: Tuple[int, int, int, int, Tuple[int, bool], Tuple[int, int, int]],
-) -> None:
- num_sms, block_m, block_n, num_stages, tma_multicast_config, smem_config = config
- block_k = 128
- num_tma_threads = 128
- num_math_threads_per_group = 128
- kwargs = {
- "GEMM_TYPE": GemmType.GroupedContiguous,
- "NUM_TMA_THREADS": num_tma_threads,
- "NUM_MATH_THREADS_PER_GROUP": num_math_threads_per_group,
- "N": n,
- "K": k,
- "NUM_GROUPS": 1,
- "BLOCK_M": block_m,
- "BLOCK_N": block_n,
- "BLOCK_K": block_k,
- "SWIZZLE_D_MODE": smem_config[1],
- "BLOCK_N_PADDING": smem_config[2],
- "NUM_STAGES": num_stages,
- "NUM_TMA_MULTICAST": tma_multicast_config[0],
- "IS_TMA_MULTICAST_ON_A": tma_multicast_config[1],
- "NUM_SMS": num_sms,
- "SMEM_SIZE": smem_config[0],
- }
-
- code = FP8GemmRuntime.generate(kwargs)
- _ = build("m_grouped_gemm_fp8_fp8_bf16_nt", code, FP8GemmRuntime, kwargs)
-
-
-def _compile_gemm_nt_f8f8bf16_one(
- n: int,
- k: int,
- _: int, # _ is a dummy parameter to align with other interfaces
- config: Tuple[int, int, int, int, Tuple[int, bool], Tuple[int, int, int]],
-) -> None:
- num_sms, block_m, block_n, num_stages, tma_multicast_config, smem_config = config
- block_k = 128
- num_tma_threads = 128
- num_math_threads_per_group = 128
- kwargs = {
- "GEMM_TYPE": GemmType.Normal,
- "NUM_TMA_THREADS": num_tma_threads,
- "NUM_MATH_THREADS_PER_GROUP": num_math_threads_per_group,
- "N": n,
- "K": k,
- "NUM_GROUPS": 1,
- "BLOCK_M": block_m,
- "BLOCK_N": block_n,
- "BLOCK_K": block_k,
- "SWIZZLE_D_MODE": smem_config[1],
- "BLOCK_N_PADDING": smem_config[2],
- "NUM_STAGES": num_stages,
- "NUM_TMA_MULTICAST": tma_multicast_config[0],
- "IS_TMA_MULTICAST_ON_A": tma_multicast_config[1],
- "NUM_SMS": num_sms,
- "SMEM_SIZE": smem_config[0],
- }
-
- code = FP8GemmRuntime.generate(kwargs)
- _ = build("gemm_fp8_fp8_bf16_nt", code, FP8GemmRuntime, kwargs)
-
-
-# TODO further refactor warmup-related
-_KERNEL_HELPER_DICT: Dict[DeepGemmKernelType, DeepGemmKernelHelper] = {
- DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_MASKED: DeepGemmKernelHelper(
- name="m_grouped_gemm_fp8_fp8_bf16_nt_masked",
- compile_func=_compile_grouped_gemm_nt_f8f8bf16_masked_one,
- configure_func=lambda m, n, k, num_groups, num_sms: get_best_configs(
- m, n, k, num_groups, num_sms, is_grouped_masked=True
- ),
- ),
- DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_CONTIG: DeepGemmKernelHelper(
- name="m_grouped_gemm_fp8_fp8_bf16_nt_contiguous",
- compile_func=_compile_grouped_gemm_nt_f8f8bf16_contig_one,
- configure_func=lambda m, n, k, _, num_sms: get_best_configs(
- m, n, k, 1, num_sms, is_grouped_contiguous=True
- ),
- ),
- DeepGemmKernelType.GEMM_NT_F8F8BF16: DeepGemmKernelHelper(
- name="gemm_fp8_fp8_bf16_nt",
- compile_func=_compile_gemm_nt_f8f8bf16_one,
- configure_func=lambda m, n, k, _, num_sms: get_best_configs(
- m, n, k, 1, num_sms
- ),
- ),
-}
-
-
+# TODO improve code
def _maybe_compile_deep_gemm_one_type_all(
kernel_type: DeepGemmKernelType,
n: int,
k: int,
num_groups: int,
- m_list: Optional[List[int]] = None,
) -> None:
global _INITIALIZATION_DICT
global _BUILTIN_M_LIST
@@ -275,61 +89,153 @@ def _maybe_compile_deep_gemm_one_type_all(
):
_INITIALIZATION_DICT[query_key] = True
- kernel_helper = _KERNEL_HELPER_DICT[kernel_type]
- _compile_warning_1()
+ # TODO maybe improve logs
+ if not _IN_PRECOMPILE_STAGE and _IS_FIRST_RANK_ON_NODE:
+ logger.warning(
+ "Entering DeepGEMM JIT Pre-Compile session. "
+ "It may take a long time (typically 10-20 mins) "
+ "if you have not run `sglang.compile_deep_gemm`. "
+ "It is recommended to run `sglang.compile_deep_gemm` with same args as `sglang.launch_server`"
+ " for pre-compilation to reduce the overhead if you have not run it before. "
+ "For example: "
+ "`python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code`"
+ )
+
logger.info(
f"Try DeepGEMM JIT Compiling for "
- f"<{kernel_helper.name}> N={n}, K={k}, num_groups={num_groups} with all Ms."
+ f"<{kernel_type.name}> N={n}, K={k}, num_groups={num_groups} with all Ms."
f"{' It only takes a little time (typically 1 sec) if you have run `python3 -m sglang.compile_deep_gemm`. ' if not _IN_PRECOMPILE_STAGE else ''}"
)
- # NOTE(alcanderian): get_num_sms should be change when 2-batch-overlap is introduced
- num_sms = get_num_sms()
- collected_configs = set()
- for m in m_list if m_list is not None else _BUILTIN_M_LIST:
- # Put config into set to get unique configs and reduce cases to be compiled
- collected_configs.add(
- kernel_helper.configure_func(m, n, k, num_groups, num_sms)
- )
- compile_func = lambda config: kernel_helper.compile_func(
- n, k, num_groups, config
+ _compile_deep_gemm_one_type_all(
+ kernel_type=kernel_type,
+ n=n,
+ k=k,
+ num_groups=num_groups,
+ m_list=_BUILTIN_M_LIST,
)
- thread_map(compile_func, collected_configs, max_workers=_COMPILE_WORKERS)
-@contextmanager
-def _log_jit_build(M: int, N: int, K: int, kernel_type: DeepGemmKernelType):
- if _IN_PRECOMPILE_STAGE:
- yield
- return
+# NOTE(alcanderian): get_num_sms should be change when 2-batch-overlap is introduced
+def _compile_deep_gemm_one_type_all(
+ kernel_type: DeepGemmKernelType,
+ n: int,
+ k: int,
+ num_groups: int,
+ m_list: List[int],
+) -> None:
+ if kernel_type == DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_CONTIG:
+ m_alignment = deep_gemm.get_mk_alignment_for_contiguous_layout()
+ m_list = sorted(list(set(m for m in m_list if m % m_alignment == 0)))
+
+ executor = _BaseWarmupExecutor.create(
+ kernel_type, max_m=max(m_list), n=n, k=k, num_groups=num_groups
+ )
- from deep_gemm.jit.runtime import RuntimeCache
+ old_compile_mode = deep_gemm.get_compile_mode()
+ deep_gemm.set_compile_mode(1)
+ # TODO can use multi thread
+ for m in tqdm(m_list, desc=f"DeepGEMM warmup"):
+ executor.execute(m=m)
+ deep_gemm.set_compile_mode(old_compile_mode)
+
+ # clean up input buffers
+ torch.cuda.current_stream().synchronize()
+ del executor
+ torch.cuda.empty_cache()
+
+
+class _BaseWarmupExecutor:
+ @staticmethod
+ def create(kernel_type: DeepGemmKernelType, **kwargs):
+ return {
+ DeepGemmKernelType.GEMM_NT_F8F8BF16: _NormalWarmupExecutor,
+ DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_CONTIG: _GroupedContWarmupExecutor,
+ DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_MASKED: _GroupedMaskedWarmupExecutor,
+ }[kernel_type](**kwargs)
+
+ def execute(self, m):
+ raise NotImplementedError
+
+
+def _empty_token_fp8(size):
+ *dims, k = size
+ return (
+ torch.empty(size, device="cuda", dtype=torch.float8_e4m3fn),
+ torch.empty(
+ (*dims, ceil_div(k, _BLOCK_SIZE)), device="cuda", dtype=torch.float32
+ ),
+ )
- origin_func = RuntimeCache.get
- def __patched_func(self, *args, **kwargs):
- ret = origin_func(self, *args, **kwargs)
- if ret is None:
- kernel_helper = _KERNEL_HELPER_DICT[kernel_type]
- if not DEEPGEMM_BLACKWELL:
- _compile_warning_2()
- logger.warning(
- f"DeepGEMM JIT Compiling for <{kernel_helper.name}> M={M}, N={N}, K={K}. Please wait."
- )
- return ret
+def _empty_block_fp8(size):
+ *dims, n, k = size
+ return (
+ torch.empty(size, device="cuda", dtype=torch.float8_e4m3fn),
+ torch.empty(
+ (*dims, ceil_div(n, _BLOCK_SIZE), ceil_div(k, _BLOCK_SIZE)),
+ device="cuda",
+ dtype=torch.float32,
+ ),
+ )
- RuntimeCache.get = __patched_func
- yield
- RuntimeCache.get = origin_func
+
+_BLOCK_SIZE = 128
+
+
+class _NormalWarmupExecutor(_BaseWarmupExecutor):
+ def __init__(self, max_m: int, n: int, k: int, num_groups: int):
+ self.lhs_q, self.lhs_s = _empty_token_fp8((max_m, k))
+ self.rhs_q, self.rhs_s = _empty_block_fp8((n, k))
+ self.out = torch.empty((max_m, n), device="cuda", dtype=torch.bfloat16)
+
+ def execute(self, m):
+ deep_gemm.fp8_gemm_nt(
+ (self.lhs_q[:m], self.lhs_s[:m]),
+ (self.rhs_q, self.rhs_s),
+ self.out[:m],
+ )
+
+
+class _GroupedContWarmupExecutor(_BaseWarmupExecutor):
+ def __init__(self, max_m: int, n: int, k: int, num_groups: int):
+ self.lhs_q, self.lhs_s = _empty_token_fp8((max_m, k))
+ self.rhs_q, self.rhs_s = _empty_block_fp8((num_groups, n, k))
+ self.m_indices = torch.zeros((max_m,), device="cuda", dtype=torch.int32)
+ self.out = torch.empty((max_m, n), device="cuda", dtype=torch.bfloat16)
+
+ def execute(self, m):
+ deep_gemm.m_grouped_fp8_gemm_nt_contiguous(
+ (self.lhs_q[:m], self.lhs_s[:m]),
+ (self.rhs_q, self.rhs_s),
+ self.out[:m],
+ m_indices=self.m_indices[:m],
+ )
+
+
+class _GroupedMaskedWarmupExecutor(_BaseWarmupExecutor):
+ def __init__(self, max_m: int, n: int, k: int, num_groups: int):
+ self.lhs_q, self.lhs_s = _empty_token_fp8((num_groups, max_m, k))
+ self.rhs_q, self.rhs_s = _empty_block_fp8((num_groups, n, k))
+ self.masked_m = torch.zeros((num_groups,), device="cuda", dtype=torch.int32)
+ self.out = torch.empty(
+ (num_groups, max_m, n), device="cuda", dtype=torch.bfloat16
+ )
+
+ def execute(self, m):
+ deep_gemm.fp8_m_grouped_gemm_nt_masked(
+ (self.lhs_q, self.lhs_s),
+ (self.rhs_q, self.rhs_s),
+ self.out,
+ masked_m=self.masked_m,
+ # DeepGEMM uses `expect_m` instead of input shape for `get_best_config`
+ expected_m=m,
+ )
@contextmanager
def deep_gemm_execution_hook(
m: int, n: int, k: int, num_groups: int, kernel_type: DeepGemmKernelType
):
- # not supported yet
- if not DEEPGEMM_BLACKWELL:
- _maybe_compile_deep_gemm_one_type_all(kernel_type, n, k, num_groups)
-
- with _log_jit_build(m, n, k, kernel_type):
- yield
+ _maybe_compile_deep_gemm_one_type_all(kernel_type, n, k, num_groups)
+ yield
diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
index 4288fff6e34..ecf7d1647f8 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
@@ -1,5 +1,7 @@
import logging
+import torch
+
from sglang.srt.utils import get_bool_env_var, get_device_sm
logger = logging.getLogger(__name__)
@@ -19,14 +21,12 @@ def _compute_enable_deep_gemm():
return get_bool_env_var("SGL_ENABLE_JIT_DEEPGEMM", default="true")
-ENABLE_JIT_DEEPGEMM = _compute_enable_deep_gemm()
+def _is_blackwell_arch() -> bool:
+ major, minor = torch.cuda.get_device_capability(torch.cuda.current_device())
+ return major == 10
-try:
- from deep_gemm import fp8_gemm_nt
- # They have not given a name to this breaking change
- DEEPGEMM_BLACKWELL = True
-except ImportError:
- DEEPGEMM_BLACKWELL = False
+ENABLE_JIT_DEEPGEMM = _compute_enable_deep_gemm()
+DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and _is_blackwell_arch()
DEEPGEMM_SCALE_UE8M0 = DEEPGEMM_BLACKWELL
diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
index 9dad33f9e91..02945f44961 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
@@ -11,53 +11,41 @@
ENABLE_JIT_DEEPGEMM,
)
from sglang.srt.server_args import ServerArgs
+from sglang.srt.utils import get_bool_env_var
logger = logging.getLogger(__name__)
if ENABLE_JIT_DEEPGEMM:
import deep_gemm
+ from deep_gemm.utils.layout import get_mn_major_tma_aligned_tensor
- if DEEPGEMM_BLACKWELL:
- from deep_gemm import fp8_gemm_nt as _gemm_nt_f8f8bf16_raw
- from deep_gemm import (
- fp8_m_grouped_gemm_nt_masked as _grouped_gemm_nt_f8f8bf16_masked_raw,
- )
- from deep_gemm import (
- m_grouped_fp8_gemm_nt_contiguous as _grouped_gemm_nt_f8f8bf16_contig_raw,
- )
- else:
- from deep_gemm import gemm_fp8_fp8_bf16_nt as _gemm_nt_f8f8bf16_raw
- from deep_gemm import get_col_major_tma_aligned_tensor
- from deep_gemm import (
- m_grouped_gemm_fp8_fp8_bf16_nt_contiguous as _grouped_gemm_nt_f8f8bf16_contig_raw,
- )
- from deep_gemm import (
- m_grouped_gemm_fp8_fp8_bf16_nt_masked as _grouped_gemm_nt_f8f8bf16_masked_raw,
- )
+_SANITY_CHECK = get_bool_env_var("SGLANG_DEEPGEMM_SANITY_CHECK")
+# TODO maybe rename these functions
def grouped_gemm_nt_f8f8bf16_masked(
lhs: Tuple[torch.Tensor, torch.Tensor],
rhs: Tuple[torch.Tensor, torch.Tensor],
out: torch.Tensor,
masked_m: torch.Tensor,
expected_m: int,
- recipe=None,
):
num_groups, _, k = lhs[0].shape
_, n, _ = rhs[0].shape
kernel_type = compile_utils.DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_MASKED
+ _sanity_check_input(lhs)
+ _sanity_check_input(rhs)
+
with compile_utils.deep_gemm_execution_hook(
expected_m, n, k, num_groups, kernel_type
):
- _grouped_gemm_nt_f8f8bf16_masked_raw(
+ deep_gemm.fp8_m_grouped_gemm_nt_masked(
lhs,
rhs,
out,
masked_m,
expected_m,
- **({"recipe": recipe} if DEEPGEMM_BLACKWELL else {})
)
@@ -71,8 +59,11 @@ def grouped_gemm_nt_f8f8bf16_contig(
num_groups, n, _ = rhs[0].shape
kernel_type = compile_utils.DeepGemmKernelType.GROUPED_GEMM_NT_F8F8BF16_CONTIG
+ _sanity_check_input(lhs)
+ _sanity_check_input(rhs)
+
with compile_utils.deep_gemm_execution_hook(m, n, k, num_groups, kernel_type):
- _grouped_gemm_nt_f8f8bf16_contig_raw(lhs, rhs, out, m_indices)
+ deep_gemm.m_grouped_fp8_gemm_nt_contiguous(lhs, rhs, out, m_indices)
def gemm_nt_f8f8bf16(
@@ -85,8 +76,11 @@ def gemm_nt_f8f8bf16(
num_groups = 1
kernel_type = compile_utils.DeepGemmKernelType.GEMM_NT_F8F8BF16
+ _sanity_check_input(lhs)
+ _sanity_check_input(rhs)
+
with compile_utils.deep_gemm_execution_hook(m, n, k, num_groups, kernel_type):
- _gemm_nt_f8f8bf16_raw(
+ deep_gemm.fp8_gemm_nt(
lhs,
rhs,
out,
@@ -108,3 +102,18 @@ def configure_deep_gemm_num_sms(num_sms):
yield
finally:
deep_gemm.set_num_sms(original_num_sms)
+
+
+def _sanity_check_input(x_fp8: Tuple[torch.Tensor, torch.Tensor]):
+ if not _SANITY_CHECK:
+ return
+
+ x, x_scale = x_fp8
+
+ if x_scale.dtype == torch.int:
+ return
+
+ from sglang.srt.layers.quantization.fp8_utils import ceil_to_ue8m0
+
+ x_scale_ceil = ceil_to_ue8m0(x_scale)
+ assert torch.all(x_scale == x_scale_ceil), f"{x_scale=} {x_scale_ceil=}"
diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py
index 14ce92f36e7..b020e41887d 100644
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -30,6 +30,9 @@ def dummy_func(*args, **kwargs):
from sglang.srt.distributed import get_tensor_model_parallel_world_size
from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
+from sglang.srt.layers.moe.token_dispatcher.base import DispatchOutputChecker
from sglang.srt.layers.parameter import (
BlockQuantScaleParameter,
ModelWeightParameter,
@@ -49,6 +52,7 @@ def dummy_func(*args, **kwargs):
)
from sglang.srt.layers.quantization.fp8_utils import (
apply_fp8_linear,
+ can_auto_enable_marlin_fp8,
cutlass_fp8_supported,
dispatch_w8a8_block_fp8_linear,
input_to_float8,
@@ -63,7 +67,6 @@ def dummy_func(*args, **kwargs):
per_tensor_dequantize,
requantize_with_max_scale,
)
-from sglang.srt.layers.utils import is_sm90_supported, is_sm100_supported
from sglang.srt.utils import (
cpu_has_amx_support,
get_bool_env_var,
@@ -71,6 +74,8 @@ def dummy_func(*args, **kwargs):
is_cuda,
is_hip,
is_npu,
+ is_sm90_supported,
+ is_sm100_supported,
log_info_on_rank0,
next_power_of_2,
print_warning_once,
@@ -79,7 +84,11 @@ def dummy_func(*args, **kwargs):
)
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ DispatchOutput,
+ StandardDispatchOutput,
+ )
from sglang.srt.layers.moe.topk import TopKOutput
from sglang.srt.layers.quantization.w4afp8 import W4AFp8Config
@@ -209,17 +218,13 @@ def __init__(self, quant_config: Union[Fp8Config, W4AFp8Config]):
# For GPUs that lack FP8 hardware support, we can leverage the Marlin
# kernel for fast weight-only FP8 quantization
- self.use_marlin = (
- get_bool_env_var("SGLANG_FORCE_FP8_MARLIN") and MARLIN_FP8_AVAILABLE
- )
- # Disable marlin for ROCm
- if _is_hip:
- self.use_marlin = False
+ self.use_marlin = False
+ if _is_cuda and MARLIN_FP8_AVAILABLE:
+ force_marlin = get_bool_env_var("SGLANG_FORCE_FP8_MARLIN")
+ auto_enable = can_auto_enable_marlin_fp8()
+ self.use_marlin = force_marlin or auto_enable
self.block_quant = self.quant_config.weight_block_size is not None
- if self.block_quant:
- # Marlin doesn't support block-wise fp8
- self.use_marlin = False
self.w8a8_block_fp8_linear = dispatch_w8a8_block_fp8_linear()
@@ -332,7 +337,6 @@ def create_weights(
layer.register_parameter("input_scale", None)
def process_weights_after_loading(self, layer: Module) -> None:
- # Block quant doesn't need to process weights after loading
if self.block_quant:
# If ROCm, normalize the weights and scales to e4m3fnuz
if _is_fp8_fnuz:
@@ -342,100 +346,106 @@ def process_weights_after_loading(self, layer: Module) -> None:
weight_scale=layer.weight_scale_inv,
input_scale=None,
)
-
layer.input_scale = None
elif _is_cpu:
assert (
_is_cpu_amx_available
), "Fp8LinearMethod on CPU requires that CPU has AMX support"
_amx_process_weight_after_loading(layer, ["weight"])
+ layer.weight_scale_inv = torch.nn.Parameter(
+ layer.weight_scale_inv.data, requires_grad=False
+ )
return
else:
weight, weight_scale = layer.weight.data, layer.weight_scale_inv.data
- layer.weight = torch.nn.Parameter(weight, requires_grad=False)
- layer.weight_scale_inv = torch.nn.Parameter(
- weight_scale, requires_grad=False
- )
- return
+ layer.weight = Parameter(weight, requires_grad=False)
+ layer.weight_scale_inv = Parameter(weight_scale, requires_grad=False)
+ else:
+ layer.weight = Parameter(layer.weight.data, requires_grad=False)
- layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)
+ # If checkpoint not serialized fp8, quantize the weights.
+ if not self.quant_config.is_checkpoint_fp8_serialized:
+ if self.cutlass_fp8_supported or self.use_marlin:
+ # apply per-channel quantization default as
+ # cutlass sgl-kernel and marlin only support per-channel scale
+ qweight, weight_scale = per_token_group_quant_fp8(
+ layer.weight, layer.weight.shape[-1]
+ )
+ weight_scale = weight_scale.t().contiguous()
+ else:
+ # per-tensor quantization
+ qweight, weight_scale = input_to_float8(layer.weight)
+
+ # Update the layer with the new values.
+ layer.weight = Parameter(qweight.t(), requires_grad=False)
+ layer.weight_scale = Parameter(weight_scale, requires_grad=False)
+ layer.input_scale = None
- # If checkpoint not serialized fp8, quantize the weights.
- if not self.quant_config.is_checkpoint_fp8_serialized:
- if self.cutlass_fp8_supported or self.use_marlin:
- # apply per-channel quantization default, as cutlass sgl-kernel and marlin only support per-channel scale
- qweight, weight_scale = per_token_group_quant_fp8(
- layer.weight, layer.weight.shape[-1]
- )
- weight_scale = weight_scale.t().contiguous()
+ # If checkpoint is fp8, handle that there are N scales for N
+ # shards in a fused module
else:
- # per-tensor quantization
- qweight, weight_scale = input_to_float8(layer.weight)
-
- # Update the layer with the new values.
- layer.weight = Parameter(qweight.t(), requires_grad=False)
- layer.weight_scale = Parameter(weight_scale, requires_grad=False)
- layer.input_scale = None
-
- # If checkpoint is fp8, handle that there are N scales for N
- # shards in a fused module
- else:
- layer.weight_scale = torch.nn.Parameter(
- layer.weight_scale.data, requires_grad=False
- )
- if (
- hasattr(self.quant_config, "activation_scheme")
- and self.quant_config.activation_scheme == "static"
- ) or (
- hasattr(self.quant_config, "linear_activation_scheme")
- and self.quant_config.linear_activation_scheme == "static"
- ):
- layer.input_scale = torch.nn.Parameter(
- layer.input_scale.data, requires_grad=False
+ layer.weight_scale = Parameter(
+ layer.weight_scale.data, requires_grad=False
)
+ if (
+ hasattr(self.quant_config, "activation_scheme")
+ and self.quant_config.activation_scheme == "static"
+ ) or (
+ hasattr(self.quant_config, "linear_activation_scheme")
+ and self.quant_config.linear_activation_scheme == "static"
+ ):
+ layer.input_scale = Parameter(
+ layer.input_scale.data, requires_grad=False
+ )
- # cutlass sgl-kernel and marlin only support per-channel scale
- if self.cutlass_fp8_supported or self.use_marlin:
- weight = layer.weight
- weight_scale = convert_to_channelwise(
- layer.weight_scale, layer.logical_widths
- )
- else:
- # Dequant -> Quant with max scale so we can run per tensor.
- weight = layer.weight
- weight_scale = layer.weight_scale
- # If ROCm, normalize the weights and scales to e4m3fnuz
- if _is_fp8_fnuz:
- weight, weight_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz(
+ # cutlass sgl-kernel and marlin only support per-channel scale
+ if self.cutlass_fp8_supported or self.use_marlin:
+ weight = layer.weight
+ weight_scale = convert_to_channelwise(
+ layer.weight_scale, layer.logical_widths
+ )
+ else:
+ # Dequant -> Quant with max scale so we can run per tensor.
+ weight = layer.weight
+ weight_scale = layer.weight_scale
+ # If ROCm, normalize the weights and scales to e4m3fnuz
+ if _is_fp8_fnuz:
+ weight, weight_scale, input_scale = (
+ normalize_e4m3fn_to_e4m3fnuz(
+ weight=weight,
+ weight_scale=weight_scale,
+ input_scale=layer.input_scale,
+ )
+ )
+ if input_scale is not None:
+ layer.input_scale = Parameter(
+ input_scale, requires_grad=False
+ )
+
+ weight_scale, weight = requantize_with_max_scale(
weight=weight,
weight_scale=weight_scale,
- input_scale=layer.input_scale,
+ logical_widths=layer.logical_widths,
)
- if input_scale is not None:
- layer.input_scale = Parameter(input_scale, requires_grad=False)
-
- weight_scale, weight = requantize_with_max_scale(
- weight=weight,
- weight_scale=weight_scale,
- logical_widths=layer.logical_widths,
- )
- # Update layer with new values.
- layer.weight = Parameter(weight.t(), requires_grad=False)
- layer.weight_scale = Parameter(weight_scale, requires_grad=False)
- if (
- hasattr(self.quant_config, "activation_scheme")
- and self.quant_config.activation_scheme == "static"
- ) or (
- hasattr(self.quant_config, "linear_activation_scheme")
- and self.quant_config.linear_activation_scheme == "static"
- ):
- layer.input_scale = Parameter(
- layer.input_scale.max(), requires_grad=False
- )
+ # Update layer with new values.
+ layer.weight = Parameter(weight.t(), requires_grad=False)
+ layer.weight_scale = Parameter(weight_scale, requires_grad=False)
+ if (
+ hasattr(self.quant_config, "activation_scheme")
+ and self.quant_config.activation_scheme == "static"
+ ) or (
+ hasattr(self.quant_config, "linear_activation_scheme")
+ and self.quant_config.linear_activation_scheme == "static"
+ ):
+ layer.input_scale = Parameter(
+ layer.input_scale.max(), requires_grad=False
+ )
if self.use_marlin:
- prepare_fp8_layer_for_marlin(layer)
+ if self.block_quant:
+ layer.weight_block_size = self.quant_config.weight_block_size
+ prepare_fp8_layer_for_marlin(layer, not self.block_quant)
# Activations not quantized for marlin.
del layer.input_scale
@@ -445,7 +455,6 @@ def apply(
x: torch.Tensor,
bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
-
if self.use_marlin:
return apply_fp8_marlin_linear(
input=x,
@@ -516,13 +525,19 @@ def __init__(self, quant_config: Fp8Config):
self.quant_config = quant_config
self.block_quant = self.quant_config.weight_block_size is not None
self.cutlass_fp8_supported = cutlass_fp8_supported()
+ self.use_cutlass_fused_experts_fp8 = (
+ get_bool_env_var("SGLANG_CUTLASS_MOE")
+ and self.cutlass_fp8_supported
+ and self.block_quant
+ and (is_sm100_supported() or is_sm90_supported())
+ )
def create_weights(
self,
layer: Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
@@ -538,18 +553,18 @@ def create_weights(
)
# NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
# Required by column parallel or enabling merged weights
- if intermediate_size % block_n != 0:
+ if intermediate_size_per_partition % block_n != 0:
raise ValueError(
f"The output_size of gate's and up's weight = "
- f"{intermediate_size} is not divisible by "
+ f"{intermediate_size_per_partition} is not divisible by "
f"weight quantization block_n = {block_n}."
)
if tp_size > 1:
# Required by row parallel
- if intermediate_size % block_k != 0:
+ if intermediate_size_per_partition % block_k != 0:
raise ValueError(
f"The input_size of down's weight = "
- f"{intermediate_size} is not divisible by "
+ f"{intermediate_size_per_partition} is not divisible by "
f"weight quantization block_k = {block_k}."
)
@@ -559,7 +574,7 @@ def create_weights(
w13_weight = torch.nn.Parameter(
torch.empty(
num_experts,
- 2 * intermediate_size,
+ 2 * intermediate_size_per_partition,
hidden_size // 8,
dtype=params_dtype,
),
@@ -567,20 +582,29 @@ def create_weights(
)
w2_weight = torch.nn.Parameter(
torch.empty(
- num_experts, hidden_size, intermediate_size // 8, dtype=params_dtype
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition // 8,
+ dtype=params_dtype,
),
requires_grad=False,
)
else:
w13_weight = torch.nn.Parameter(
torch.empty(
- num_experts, 2 * intermediate_size, hidden_size, dtype=params_dtype
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ hidden_size,
+ dtype=params_dtype,
),
requires_grad=False,
)
w2_weight = torch.nn.Parameter(
torch.empty(
- num_experts, hidden_size, intermediate_size, dtype=params_dtype
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition,
+ dtype=params_dtype,
),
requires_grad=False,
)
@@ -596,7 +620,7 @@ def create_weights(
w13_weight_scale = torch.nn.Parameter(
torch.ones(
num_experts,
- 2 * ((intermediate_size + block_n - 1) // block_n),
+ 2 * ((intermediate_size_per_partition + block_n - 1) // block_n),
(hidden_size + block_k - 1) // block_k,
dtype=torch.float32,
),
@@ -606,7 +630,7 @@ def create_weights(
torch.ones(
num_experts,
(hidden_size + block_n - 1) // block_n,
- (intermediate_size + block_k - 1) // block_k,
+ (intermediate_size_per_partition + block_k - 1) // block_k,
dtype=torch.float32,
),
requires_grad=False,
@@ -614,11 +638,7 @@ def create_weights(
layer.register_parameter("w13_weight_scale_inv", w13_weight_scale)
layer.register_parameter("w2_weight_scale_inv", w2_weight_scale)
assert self.quant_config.activation_scheme == "dynamic"
- if (
- get_bool_env_var("SGLANG_CUTLASS_MOE")
- and self.cutlass_fp8_supported
- and (is_sm100_supported() or is_sm90_supported())
- ):
+ if self.use_cutlass_fused_experts_fp8:
self.ab_strides1 = torch.full(
(num_experts,),
hidden_size,
@@ -627,13 +647,13 @@ def create_weights(
)
self.c_strides1 = torch.full(
(num_experts,),
- 2 * intermediate_size,
+ 2 * intermediate_size_per_partition,
device=w13_weight.device,
dtype=torch.int64,
)
self.ab_strides2 = torch.full(
(num_experts,),
- intermediate_size,
+ intermediate_size_per_partition,
device=w2_weight.device,
dtype=torch.int64,
)
@@ -686,7 +706,11 @@ def create_weights(
if _is_hip: # _use_aiter: TODO: add check back after triton kernel
# ROCm - using column scaling, duplicate scaling numbers in case per tensor scaling
w13_weight_scale1 = torch.nn.Parameter(
- torch.ones(num_experts, 2 * intermediate_size, dtype=torch.float32),
+ torch.ones(
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ dtype=torch.float32,
+ ),
requires_grad=False,
)
w2_weight_scale1 = torch.nn.Parameter(
@@ -962,6 +986,7 @@ def process_weights_hip_scale_padding(self, layer: Module):
requires_grad=False,
)
torch.cuda.empty_cache()
+
# ROCm (_use_aiter): using column-wise scaling
layer.w13_weight_scale1 *= layer.w13_weight_scale.unsqueeze(-1)
layer.w2_weight_scale1 *= layer.w2_weight_scale.unsqueeze(-1)
@@ -978,14 +1003,23 @@ def process_weights_hip_scale_padding(self, layer: Module):
)
torch.cuda.empty_cache()
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
+ dispatch_output: DispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+ moe_runner_config = self.moe_runner_config
if use_intel_amx_backend(layer):
from sglang.srt.layers.moe.topk import apply_topk_weights_cpu
@@ -995,7 +1029,7 @@ def apply(
moe_runner_config.apply_router_weight_on_input, topk_weights, x
)
- return torch.ops.sgl_kernel.fused_experts_cpu(
+ output = torch.ops.sgl_kernel.fused_experts_cpu(
x,
layer.w13_weight,
layer.w2_weight,
@@ -1011,6 +1045,7 @@ def apply(
None, # a2_scale
True, # is_vnni
)
+ return StandardCombineInput(hidden_states=output)
if _is_hip:
ret = self.maybe_apply_hip_fused_experts(
@@ -1021,14 +1056,9 @@ def apply(
moe_runner_config.no_combine,
)
if ret is not None:
- return ret
+ return StandardCombineInput(hidden_states=ret)
- if (
- get_bool_env_var("SGLANG_CUTLASS_MOE")
- and self.cutlass_fp8_supported
- and self.block_quant
- and (is_sm100_supported() or is_sm90_supported())
- ):
+ if self.use_cutlass_fused_experts_fp8:
from sglang.srt.layers.moe.cutlass_moe import cutlass_fused_experts_fp8
topk_weights, topk_ids, _ = topk_output
@@ -1055,19 +1085,13 @@ def apply(
self.problem_sizes2,
use_fp8_blockscale=True,
)
- # TODO: Fuse into select_experts
- if moe_runner_config.routed_scaling_factor is not None:
- output *= moe_runner_config.routed_scaling_factor
- return output
- # Expert fusion with FP8 quantization
- return fused_experts(
- x,
- layer.w13_weight,
- layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ return StandardCombineInput(hidden_states=output)
+
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
use_fp8_w8a8=True,
- w1_scale=(
+ w13_scale=(
layer.w13_weight_scale_inv
if self.block_quant
else layer.w13_weight_scale
@@ -1075,21 +1099,22 @@ def apply(
w2_scale=(
layer.w2_weight_scale_inv if self.block_quant else layer.w2_weight_scale
),
- a1_scale=layer.w13_input_scale,
+ a13_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
block_shape=self.quant_config.weight_block_size,
)
+ return self.runner.run(dispatch_output, quant_info)
def apply_with_router_logits(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
+ dispatch_output: StandardDispatchOutput,
) -> torch.Tensor:
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
- activation = moe_runner_config.activation
- routed_scaling_factor = moe_runner_config.routed_scaling_factor
+ activation = self.moe_runner_config.activation
+ routed_scaling_factor = self.moe_runner_config.routed_scaling_factor
from flashinfer.fused_moe import trtllm_fp8_block_scale_moe
@@ -1105,9 +1130,20 @@ def apply_with_router_logits(
# NOTE: scales of hidden states have to be transposed!
a_sf_t = a_sf.t().contiguous()
+ assert (
+ topk_config.num_expert_group is not None
+ and topk_config.topk_group is not None
+ ), "Current trtllm_fp8_block_scale_moe kernel does not support these two arguments as None"
+
+ correction_bias = (
+ None
+ if topk_config.correction_bias is None
+ else topk_config.correction_bias.to(x.dtype)
+ )
+
return trtllm_fp8_block_scale_moe(
routing_logits=router_logits.to(torch.float32),
- routing_bias=layer.correction_bias.to(x.dtype),
+ routing_bias=correction_bias,
hidden_states=a_q,
hidden_states_scale=a_sf_t,
gemm1_weights=layer.w13_weight,
@@ -1121,9 +1157,11 @@ def apply_with_router_logits(
intermediate_size=layer.w2_weight.shape[2],
local_expert_offset=layer.moe_ep_rank * layer.num_local_experts,
local_num_experts=layer.num_local_experts,
- routed_scaling_factor=routed_scaling_factor,
+ routed_scaling_factor=(
+ routed_scaling_factor if routed_scaling_factor is not None else 1.0
+ ),
tile_tokens_dim=get_tile_tokens_dim(
- x.shape[0], layer.top_k, layer.num_experts
+ x.shape[0], topk_config.top_k, layer.num_experts
),
routing_method_type=2, # DeepSeek-styled routing method
use_shuffled_weight=False,
diff --git a/python/sglang/srt/layers/quantization/fp8_kernel.py b/python/sglang/srt/layers/quantization/fp8_kernel.py
index e9df65a1560..f0512365b40 100644
--- a/python/sglang/srt/layers/quantization/fp8_kernel.py
+++ b/python/sglang/srt/layers/quantization/fp8_kernel.py
@@ -113,7 +113,7 @@ def deep_gemm_fp8_fp8_bf16_nt_fake(
@triton.jit
-def _per_token_group_quant_fp8(
+def _per_token_group_quant_8bit(
# Pointers to inputs and output
y_ptr,
y_q_ptr,
@@ -125,8 +125,8 @@ def _per_token_group_quant_fp8(
# Avoid to divide zero
eps,
# Information for float8
- fp8_min,
- fp8_max,
+ bit8_min,
+ bit8_max,
# Meta-parameters
BLOCK: tl.constexpr,
):
@@ -147,16 +147,16 @@ def _per_token_group_quant_fp8(
y = tl.load(y_ptr + cols, mask=mask, other=0.0).to(tl.float32)
# Quant
_absmax = tl.maximum(tl.max(tl.abs(y)), eps)
- y_s = _absmax / fp8_max
+ y_s = _absmax / bit8_max
y_s_inv = 1.0 / y_s
- y_q = tl.clamp(y * y_s_inv, fp8_min, fp8_max).to(y_q_ptr.dtype.element_ty)
+ y_q = tl.clamp(y * y_s_inv, bit8_min, bit8_max).to(y_q_ptr.dtype.element_ty)
tl.store(y_q_ptr + cols, y_q, mask=mask)
tl.store(y_s_ptr, y_s)
@triton.jit
-def _per_token_group_quant_fp8_colmajor(
+def _per_token_group_quant_8bit_colmajor(
# Pointers to inputs and output
y_ptr,
y_q_ptr,
@@ -169,8 +169,8 @@ def _per_token_group_quant_fp8_colmajor(
# Avoid to divide zero
eps,
# Information for float8
- fp8_min,
- fp8_max,
+ bit8_min,
+ bit8_max,
# Meta-parameters
BLOCK: tl.constexpr,
SCALE_UE8M0: tl.constexpr,
@@ -197,19 +197,20 @@ def _per_token_group_quant_fp8_colmajor(
y = tl.load(y_ptr + cols, mask=mask, other=0.0).to(tl.float32)
# Quant
_absmax = tl.maximum(tl.max(tl.abs(y)), eps)
- y_s = _absmax / fp8_max
+ y_s = _absmax / bit8_max
if SCALE_UE8M0:
y_s = tl.exp2(tl.ceil(tl.log2(tl.abs(y_s))))
- y_q = tl.clamp(y / y_s, fp8_min, fp8_max).to(y_q_ptr.dtype.element_ty)
+ y_q = tl.clamp(y / y_s, bit8_min, bit8_max).to(y_q_ptr.dtype.element_ty)
tl.store(y_q_ptr + cols, y_q, mask=mask)
tl.store(y_s_ptr, y_s)
-def per_token_group_quant_fp8(
+def _per_token_group_quant_8bit_raw(
x: torch.Tensor,
group_size: int,
eps: float = 1e-10,
+ dtype: torch.dtype = fp8_dtype,
column_major_scales: bool = False,
scale_tma_aligned: bool = False,
scale_ue8m0: bool = False,
@@ -223,6 +224,7 @@ def per_token_group_quant_fp8(
x: The input tenosr with ndim >= 2.
group_size: The group size used for quantization.
eps: The minimum to avoid dividing zero.
+ dtype: The dype of output tensor.
Returns:
Tuple[torch.Tensor, torch.Tensor]: The quantized tensor and the scaling factor for quantization.
@@ -232,7 +234,21 @@ def per_token_group_quant_fp8(
), "the last dimension of `x` cannot be divisible by `group_size`"
assert x.is_contiguous(), "`x` is not contiguous"
- x_q = torch.empty_like(x, device=x.device, dtype=fp8_dtype)
+ if _is_hip:
+ if dtype == torch.int8:
+ bit8_max = 127.0
+ else:
+ bit8_max = 224.0
+ bit8_min = -bit8_max # TODO incorrect for int8
+ else:
+ if dtype == torch.int8:
+ info = torch.iinfo(dtype)
+ else:
+ info = torch.finfo(dtype)
+ bit8_max = info.max
+ bit8_min = info.min
+
+ x_q = torch.empty_like(x, device=x.device, dtype=dtype)
x_s = create_per_token_group_quant_fp8_output_scale(
x_shape=x.shape,
device=x.device,
@@ -250,7 +266,7 @@ def per_token_group_quant_fp8(
num_warps = min(max(BLOCK // 256, 1), 8)
num_stages = 1
if column_major_scales:
- _per_token_group_quant_fp8_colmajor[(M,)](
+ _per_token_group_quant_8bit_colmajor[(M,)](
x,
x_q,
x_s,
@@ -258,8 +274,8 @@ def per_token_group_quant_fp8(
x.shape[1],
x_s.stride(1),
eps,
- fp8_min=fp8_min,
- fp8_max=fp8_max,
+ bit8_min=bit8_min,
+ bit8_max=bit8_max,
BLOCK=BLOCK,
num_warps=num_warps,
num_stages=num_stages,
@@ -267,22 +283,22 @@ def per_token_group_quant_fp8(
)
else:
assert not scale_ue8m0
- _per_token_group_quant_fp8[(M,)](
+ _per_token_group_quant_8bit[(M,)](
x,
x_q,
x_s,
group_size,
N,
eps,
- fp8_min=fp8_min,
- fp8_max=fp8_max,
+ bit8_min=bit8_min,
+ bit8_max=bit8_max,
BLOCK=BLOCK,
num_warps=num_warps,
num_stages=num_stages,
)
if scale_ue8m0:
- from deep_gemm.utils.layout import transform_sf_into_required_layout
+ from deep_gemm import transform_sf_into_required_layout
assert group_size == 128
x_s = transform_sf_into_required_layout(
@@ -297,6 +313,117 @@ def per_token_group_quant_fp8(
return x_q, x_s
+# backward compatibility
+per_token_group_quant_fp8 = _per_token_group_quant_8bit_raw
+
+
+def _per_token_group_quant_8bit_fuse_silu_and_mul(
+ x: torch.Tensor,
+ group_size: int,
+ dst_dtype: torch.dtype,
+ column_major_scales: bool,
+ scale_tma_aligned: bool,
+ scale_ue8m0: bool,
+ masked_m: Optional[torch.Tensor],
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ # Another way to implement (can be used in e.g. comparison tests)
+ # from sgl_kernel import silu_and_mul
+ # x_after_silu_and_mul = silu_and_mul(x)
+ # return per_token_group_quant_fp8(
+ # x_after_silu_and_mul,
+ # group_size=group_size,
+ # eps=eps,
+ # column_major_scales=column_major_scales,
+ # scale_tma_aligned=scale_tma_aligned,
+ # scale_ue8m0=scale_ue8m0,
+ # )
+
+ from deep_gemm import transform_sf_into_required_layout
+
+ from sglang.srt.layers.moe.ep_moe.kernels import silu_and_mul_masked_post_quant_fwd
+
+ assert column_major_scales
+ assert scale_tma_aligned
+ assert scale_ue8m0
+
+ needs_unsqueeze = x.dim() == 2
+ if needs_unsqueeze:
+ num_tokens, _ = x.shape
+ x = x.unsqueeze(0)
+ assert masked_m is None
+ masked_m = torch.tensor([num_tokens], device=x.device, dtype=torch.int32)
+
+ # Use `zeros` for easier testing
+ output = torch.zeros(
+ (*x.shape[:-1], x.shape[-1] // 2),
+ device=x.device,
+ dtype=dst_dtype,
+ )
+ # Use `zeros` for easier testing
+ output_scale_for_kernel = torch.zeros(
+ (*x.shape[:-1], x.shape[-1] // 2 // group_size),
+ device=x.device,
+ dtype=torch.float32,
+ )
+ silu_and_mul_masked_post_quant_fwd(
+ input=x,
+ output=output,
+ output_scale=output_scale_for_kernel,
+ quant_group_size=group_size,
+ masked_m=masked_m,
+ scale_ue8m0=scale_ue8m0,
+ )
+
+ assert group_size == 128
+ output_scale = transform_sf_into_required_layout(
+ output_scale_for_kernel,
+ num_groups=output.shape[0],
+ mn=output.shape[-2],
+ k=output.shape[-1],
+ recipe=(1, group_size, group_size),
+ is_sfa=True,
+ )
+
+ if needs_unsqueeze:
+ output = output.squeeze(0)
+ output_scale = output_scale.squeeze(0)
+
+ return output, output_scale
+
+
+def per_token_group_quant_8bit(
+ x: torch.Tensor,
+ group_size: int,
+ dst_dtype: torch.dtype,
+ eps: float = 1e-10,
+ column_major_scales: bool = False,
+ scale_tma_aligned: bool = False,
+ scale_ue8m0: bool = False,
+ fuse_silu_and_mul: bool = False,
+ masked_m: Optional[torch.Tensor] = None,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ if fuse_silu_and_mul:
+ return _per_token_group_quant_8bit_fuse_silu_and_mul(
+ x=x,
+ group_size=group_size,
+ dst_dtype=dst_dtype,
+ column_major_scales=column_major_scales,
+ scale_tma_aligned=scale_tma_aligned,
+ scale_ue8m0=scale_ue8m0,
+ masked_m=masked_m,
+ )
+ else:
+ return _per_token_group_quant_8bit_raw(
+ x=x,
+ group_size=group_size,
+ eps=eps,
+ column_major_scales=column_major_scales,
+ scale_tma_aligned=scale_tma_aligned,
+ scale_ue8m0=scale_ue8m0,
+ dtype=dst_dtype,
+ )
+
+
def create_per_token_group_quant_fp8_output_scale(
x_shape,
device,
@@ -307,16 +434,16 @@ def create_per_token_group_quant_fp8_output_scale(
):
if scale_ue8m0:
assert column_major_scales and scale_tma_aligned
- x_q_mn, x_q_k = x_shape
+ *x_batch, x_q_mn, x_q_k = x_shape
x_s_mn, x_s_k = x_q_mn, x_q_k // 128
aligned_mn = align(x_s_mn, 4)
aligned_k = align(x_s_k, 4)
# TODO(FIXME): Fix cuda kernel and recover here to empty.
- return torch.zeros(
- (aligned_k // 4, aligned_mn),
+ return torch.empty(
+ (*x_batch, aligned_k // 4, aligned_mn),
device=device,
dtype=torch.int,
- ).transpose(0, 1)[:x_s_mn, :]
+ ).transpose(-1, -2)[..., :x_s_mn, :]
elif column_major_scales:
if scale_tma_aligned:
# TODO extract "align" function
@@ -348,15 +475,19 @@ def sglang_per_token_group_quant_fp8(
column_major_scales: bool = False,
scale_tma_aligned: bool = False,
scale_ue8m0: bool = False,
+ fuse_silu_and_mul: bool = False,
+ masked_m: Optional[torch.Tensor] = None,
):
assert (
x.shape[-1] % group_size == 0
), "the last dimension of `x` cannot be divisible by `group_size`"
assert x.is_contiguous(), "`x` is not contiguous"
- x_q = torch.empty_like(x, device=x.device, dtype=fp8_dtype)
+ out_shape = (*x.shape[:-1], x.shape[-1] // (2 if fuse_silu_and_mul else 1))
+
+ x_q = torch.empty(out_shape, device=x.device, dtype=fp8_dtype)
x_s = create_per_token_group_quant_fp8_output_scale(
- x_shape=x.shape,
+ x_shape=out_shape,
device=x.device,
group_size=group_size,
column_major_scales=column_major_scales,
@@ -372,6 +503,46 @@ def sglang_per_token_group_quant_fp8(
return x_q, x_s
+# TODO maybe unify int8 and fp8 code later
+def sglang_per_token_group_quant_8bit(
+ x: torch.Tensor,
+ group_size: int,
+ dst_dtype: torch.dtype,
+ eps: float = 1e-10,
+ column_major_scales: bool = False,
+ scale_tma_aligned: bool = False,
+ scale_ue8m0: bool = False,
+ fuse_silu_and_mul: bool = False,
+ masked_m: Optional[torch.Tensor] = None,
+):
+ from sglang.srt.layers.quantization.int8_kernel import (
+ sglang_per_token_group_quant_int8,
+ )
+
+ if dst_dtype == torch.int8:
+ assert not column_major_scales
+ assert not scale_tma_aligned
+ assert not fuse_silu_and_mul
+ assert masked_m is None
+ return sglang_per_token_group_quant_int8(
+ x=x,
+ group_size=group_size,
+ eps=eps,
+ dtype=dst_dtype,
+ )
+
+ return sglang_per_token_group_quant_fp8(
+ x=x,
+ group_size=group_size,
+ eps=eps,
+ column_major_scales=column_major_scales,
+ scale_tma_aligned=scale_tma_aligned,
+ scale_ue8m0=scale_ue8m0,
+ fuse_silu_and_mul=fuse_silu_and_mul,
+ masked_m=masked_m,
+ )
+
+
def sglang_per_token_quant_fp8(
x: torch.Tensor,
dtype: torch.dtype = fp8_dtype,
diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py
index 259d0098b1b..998423b8632 100644
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -5,7 +5,7 @@
from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8
from sglang.srt.layers.quantization.mxfp4_tensor import MXFP4QuantizeUtil
-from sglang.srt.layers.utils import is_sm100_supported
+from sglang.srt.utils import is_sm100_supported
try:
from vllm import _custom_ops as ops
@@ -45,7 +45,7 @@
if _use_aiter:
import aiter
- from aiter import gemm_a8w8_blockscale, get_hip_quant
+ from aiter import gemm_a8w8_blockscale, gemm_a8w8_bpreshuffle, get_hip_quant
aiter_per1x128_quant = get_hip_quant(aiter.QuantType.per_1x128)
@@ -53,6 +53,7 @@
from sgl_kernel import fp8_blockwise_scaled_mm, fp8_scaled_mm
use_vllm_cutlass_w8a8_fp8_kernel = get_bool_env_var("USE_VLLM_CUTLASS_W8A8_FP8_KERNEL")
+use_triton_w8a8_fp8_kernel = get_bool_env_var("USE_TRITON_W8A8_FP8_KERNEL")
# Input scaling factors are no longer optional in _scaled_mm starting
# from pytorch 2.5. Allocating a dummy tensor to pass as input_scale
@@ -247,11 +248,6 @@ def deepgemm_w8a8_block_fp8_linear_with_fallback(
scale_ue8m0=deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0,
)
- # NOTE(alcanderian): Useless when scale is packed to int32
- # if get_bool_env_var("SGLANG_W8A8_DEEPGEMM_SANITY_CHECK_UE8M0"):
- # _check_ue8m0("x_scale", x_scale)
- # _check_ue8m0("weight_scale", ws)
-
output = w8a8_block_fp8_matmul_deepgemm(
q_input, weight, x_scale, weight_scale, block_size, output_dtype=output_dtype
)
@@ -260,11 +256,6 @@ def deepgemm_w8a8_block_fp8_linear_with_fallback(
return output.to(dtype=output_dtype).view(*output_shape)
-def _check_ue8m0(name, x):
- x_ceil = ceil_to_ue8m0(x)
- assert torch.all(x == x_ceil), f"{name=} {x=} {x_ceil=}"
-
-
def aiter_w8a8_block_fp8_linear(
input: torch.Tensor,
weight: torch.Tensor,
@@ -458,7 +449,7 @@ def _transform_scale(sf, mn: int):
import deep_gemm.utils.layout
sf = sf.index_select(-2, torch.arange(mn, device=sf.device) // 128)
- sf = deep_gemm.utils.layout.get_col_major_tma_aligned_packed_tensor(sf)
+ sf = deep_gemm.utils.layout.get_mn_major_tma_aligned_packed_ue8m0_tensor(sf)
return sf
out_s = _transform_scale(out_s, mn=out_w.shape[-2])
@@ -556,7 +547,10 @@ def apply_fp8_linear(
# We also don't pad when using torch.compile,
# as it breaks with dynamic shapes.
if pad_output is None:
- pad_output = not get_bool_env_var("SGLANG_ENABLE_TORCH_COMPILE")
+ pad_output = (
+ not get_bool_env_var("SGLANG_ENABLE_TORCH_COMPILE")
+ and not cutlass_fp8_supported
+ )
output_padding = 17 if pad_output else None
# View input as 2D matrix for fp8 methods
@@ -592,7 +586,7 @@ def apply_fp8_linear(
cutlass_compatible_b = (
weight.shape[0] % 16 == 0 and weight.shape[1] % 16 == 0
)
- if not cutlass_compatible_b:
+ if not cutlass_compatible_b or use_triton_w8a8_fp8_kernel:
# Massage the input to be 2D
qinput = qinput.view(-1, qinput.shape[-1])
output = triton_scaled_mm(
@@ -648,25 +642,49 @@ def apply_fp8_linear(
use_per_token_if_dynamic
and not per_tensor_weights
and not per_tensor_activations
- and USE_ROWWISE_TORCH_SCALED_MM
+ and (USE_ROWWISE_TORCH_SCALED_MM or _use_aiter)
):
- # For now validated on ROCm platform
- # fp8 rowwise scaling in torch._scaled_mm is introduced in
- # https://github.com/pytorch/pytorch/pull/144432 using hipBLASLt
- # and ROCm 6.3, which only exists in torch 2.7 and above.
- # For CUDA platform please validate if the
- # torch._scaled_mm support rowwise scaled GEMM
- # Fused GEMM_DQ Rowwise GEMM
- output = torch._scaled_mm(
- qinput,
- weight,
- out_dtype=input.dtype,
- scale_a=x_scale,
- scale_b=weight_scale.t(),
- bias=bias,
- )
- return _process_scaled_mm_output(output, input_2d.shape, output_shape)
-
+ # into this sector means use dynamic per-token-per-channel quant
+ # per-token scale quant for input matrix, every row(one token) have one scale factor
+ # per-channel scale quant for weight matrix, every col(one channel) have one scale factor
+ if _use_aiter:
+ # gemm_a8w8_bpreshuffle(XQ, WQ, x_scale, w_scale, dtype)
+ # XQ -> input tensor, shape = (m, k)
+ # WQ -> weight tensor, shape = (n, k), with preshuffe get better perf
+ # x_scale -> input scale tensor, shape = (m, 1)
+ # w_scale -> weight scale tensor, shape = (n ,1)
+ # dtype -> output dtype
+ output = gemm_a8w8_bpreshuffle(
+ XQ=qinput,
+ WQ=weight,
+ x_scale=x_scale,
+ w_scale=weight_scale,
+ dtype=input.dtype,
+ )
+ if bias is not None:
+ output += bias
+ return _process_scaled_mm_output(
+ output, input_2d.shape, [*input.shape[:-1], weight.shape[0]]
+ )
+ else:
+ # For now validated on ROCm platform
+ # fp8 rowwise scaling in torch._scaled_mm is introduced in
+ # https://github.com/pytorch/pytorch/pull/144432 using hipBLASLt
+ # and ROCm 6.3, which only exists in torch 2.7 and above.
+ # For CUDA platform please validate if the
+ # torch._scaled_mm support rowwise scaled GEMM
+ # Fused GEMM_DQ Rowwise GEMM
+ output = torch._scaled_mm(
+ qinput,
+ weight,
+ out_dtype=input.dtype,
+ scale_a=x_scale,
+ scale_b=weight_scale.t(),
+ bias=bias,
+ )
+ return _process_scaled_mm_output(
+ output, input_2d.shape, output_shape
+ )
else:
# Fallback for channelwise case, where we use unfused DQ
# due to limitations with scaled_mm
@@ -735,14 +753,25 @@ def apply_fp8_linear(
assert (
weight_scale.numel() == weight.shape[1]
), "cutlass w8a8 fp8 sgl-kernel only supports per-channel scale"
- output = fp8_scaled_mm(
- qinput,
- weight,
- x_scale,
- weight_scale,
- out_dtype=input.dtype,
- bias=bias,
+
+ cutlass_compatible_b = (
+ weight.shape[0] % 16 == 0 and weight.shape[1] % 16 == 0
)
+ if not cutlass_compatible_b or use_triton_w8a8_fp8_kernel:
+ # Massage the input to be 2D
+ qinput = qinput.view(-1, qinput.shape[-1])
+ output = triton_scaled_mm(
+ qinput, weight, x_scale, weight_scale, input.dtype, bias
+ )
+ else:
+ output = fp8_scaled_mm(
+ qinput,
+ weight,
+ x_scale,
+ weight_scale,
+ out_dtype=input.dtype,
+ bias=bias,
+ )
return output.view(*output_shape)
except (ImportError, NameError, AttributeError):
pass
@@ -789,3 +818,12 @@ def apply_fp8_linear(
bias,
input.dtype,
)
+
+
+def can_auto_enable_marlin_fp8() -> bool:
+ try:
+ major, minor = get_device_capability()
+ sm = major * 10 + minor
+ return 80 <= sm < 89
+ except Exception:
+ return False
diff --git a/python/sglang/srt/layers/quantization/fpgemm_fp8.py b/python/sglang/srt/layers/quantization/fpgemm_fp8.py
new file mode 100644
index 00000000000..5a78626ff3c
--- /dev/null
+++ b/python/sglang/srt/layers/quantization/fpgemm_fp8.py
@@ -0,0 +1,203 @@
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional
+
+import torch
+from torch.nn import Module
+from torch.nn.parameter import Parameter
+
+from sglang.srt.layers.linear import LinearBase
+from sglang.srt.layers.parameter import ChannelQuantScaleParameter, ModelWeightParameter
+from sglang.srt.layers.quantization.base_config import (
+ FusedMoEMethodBase,
+ LinearMethodBase,
+ QuantizationConfig,
+ QuantizeMethodBase,
+)
+from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz
+from sglang.srt.layers.quantization.fp8_utils import (
+ apply_fp8_linear,
+ can_auto_enable_marlin_fp8,
+ cutlass_fp8_supported,
+ normalize_e4m3fn_to_e4m3fnuz,
+)
+from sglang.srt.layers.quantization.marlin_utils_fp8 import (
+ apply_fp8_marlin_linear,
+ prepare_fp8_layer_for_marlin,
+)
+from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
+from sglang.srt.layers.quantization.utils import is_layer_skipped, replace_parameter
+from sglang.srt.utils import get_bool_env_var, is_cuda
+
+_is_cuda = is_cuda()
+_is_fp8_fnuz = is_fp8_fnuz()
+
+logger = logging.getLogger(__name__)
+
+
+class FBGEMMFp8Config(QuantizationConfig):
+ """Config class for FBGEMM Fp8."""
+
+ def __init__(self, ignore_list: list[str], input_scale_ub: float):
+ super().__init__()
+ self.ignore_list = ignore_list if ignore_list else []
+ self.input_scale_ub = input_scale_ub
+
+ # For GPUs that lack FP8 hardware suspport, we can leverage the Marlin
+ # kernel for fast weight-only FP8 quantization
+ # self.use_marlin = not marlin_fp8_supported()
+ self.use_marlin = False
+ if _is_cuda:
+ force_marlin = get_bool_env_var("SGLANG_FORCE_FP8_MARLIN")
+ auto_enable = can_auto_enable_marlin_fp8()
+ self.use_marlin = force_marlin or auto_enable
+
+ @classmethod
+ def get_name(cls) -> str:
+ return "fbgemm_fp8"
+
+ @classmethod
+ def get_supported_act_dtypes(cls) -> list[torch.dtype]:
+ return [torch.bfloat16, torch.float16]
+
+ @classmethod
+ def get_min_capability(cls) -> int:
+ return 80
+
+ @classmethod
+ def get_config_filenames(cls) -> list[str]:
+ return []
+
+ @classmethod
+ def from_config(cls, config: dict[str, Any]) -> FBGEMMFp8Config:
+ ignore_list = cls.get_from_keys(config, ["modules_to_not_convert"])
+ input_scale_ub = cls.get_from_keys(config, ["activation_scale_ub"])
+ return cls(ignore_list=ignore_list, input_scale_ub=input_scale_ub)
+
+ def get_quant_method(
+ self, layer: torch.nn.Module, prefix: str
+ ) -> Optional[QuantizeMethodBase]:
+ if isinstance(layer, LinearBase):
+ if is_layer_skipped(
+ prefix=prefix,
+ ignored_layers=self.ignore_list,
+ fused_mapping=self.packed_modules_mapping,
+ ):
+ return UnquantizedLinearMethod()
+ return FBGEMMFp8LinearMethod(self)
+ return None
+
+ def get_scaled_act_names(self) -> List[str]:
+ return []
+
+
+class FBGEMMFp8LinearMethod(LinearMethodBase):
+
+ def __init__(self, quant_config: FBGEMMFp8Config):
+ self.quant_config = quant_config
+ # self.fp8_linear = Fp8LinearOp(
+ # act_quant_static=False, act_quant_group_shape=GroupShape.PER_TOKEN)
+ self.out_dtype = torch.get_default_dtype()
+ self.cutlass_fp8_supported = cutlass_fp8_supported()
+
+ def create_weights(
+ self,
+ layer: torch.nn.Module,
+ input_size_per_partition: int,
+ output_partition_sizes: list[int],
+ input_size: int,
+ output_size: int,
+ params_dtype: torch.dtype,
+ **extra_weight_attrs,
+ ):
+ # maybe_create_device_identity()
+ weight_loader = extra_weight_attrs.get("weight_loader")
+ del input_size, output_size
+ output_size_per_partition = sum(output_partition_sizes)
+
+ layer.logical_widths = output_partition_sizes
+
+ layer.input_size_per_partition = input_size_per_partition
+ layer.output_size_per_partition = output_size_per_partition
+ layer.orig_dtype = params_dtype
+
+ # WEIGHT
+ weight = ModelWeightParameter(
+ data=torch.empty(
+ output_size_per_partition,
+ input_size_per_partition,
+ dtype=torch.float8_e4m3fn,
+ ),
+ input_dim=1,
+ output_dim=0,
+ weight_loader=weight_loader,
+ )
+ layer.register_parameter("weight", weight)
+
+ # WEIGHT SCALE
+ weight_scale = ChannelQuantScaleParameter(
+ data=torch.empty((sum(output_partition_sizes), 1), dtype=torch.float32),
+ output_dim=0,
+ weight_loader=weight_loader,
+ )
+ weight_scale[:] = torch.finfo(torch.float32).min
+ layer.register_parameter("weight_scale", weight_scale)
+
+ # INPUT SCALE UPPER BOUND
+ input_scale_ub = torch.nn.Parameter(
+ torch.tensor((self.quant_config.input_scale_ub), dtype=torch.float32),
+ requires_grad=False,
+ )
+ layer.input_scale_ub = input_scale_ub
+
+ def process_weights_after_loading(self, layer: Module) -> None:
+ # required by torch.compile
+ layer.weight_scale = Parameter(layer.weight_scale.data, requires_grad=False)
+ layer.weight = Parameter(layer.weight.data, requires_grad=False)
+
+ weight = layer.weight
+
+ if _is_fp8_fnuz:
+ weight, weight_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz(
+ weight=weight, weight_scale=layer.weight_scale, input_scale=None
+ )
+ if input_scale is not None:
+ layer.input_scale = Parameter(input_scale, requires_grad=False)
+ layer.weight_scale = Parameter(weight_scale, requires_grad=False)
+
+ layer.weight = Parameter(weight.t(), requires_grad=False)
+ if self.quant_config.use_marlin:
+ prepare_fp8_layer_for_marlin(layer)
+ # Activations not quantized for marlin.
+ del layer.input_scale_ub
+
+ def apply(
+ self,
+ layer: torch.nn.Module,
+ x: torch.Tensor,
+ bias: Optional[torch.Tensor] = None,
+ ) -> torch.Tensor:
+
+ if self.quant_config.use_marlin:
+ return apply_fp8_marlin_linear(
+ input=x,
+ weight=layer.weight,
+ weight_scale=layer.weight_scale,
+ workspace=layer.workspace,
+ size_n=layer.output_size_per_partition,
+ size_k=layer.input_size_per_partition,
+ bias=bias,
+ )
+
+ return apply_fp8_linear(
+ input=x,
+ weight=layer.weight,
+ weight_scale=layer.weight_scale,
+ input_scale=None,
+ input_scale_ub=layer.input_scale_ub,
+ bias=bias,
+ cutlass_fp8_supported=self.cutlass_fp8_supported,
+ use_per_token_if_dynamic=False,
+ )
diff --git a/python/sglang/srt/layers/quantization/gptq.py b/python/sglang/srt/layers/quantization/gptq.py
index c770708b0fc..ccd3d46f705 100644
--- a/python/sglang/srt/layers/quantization/gptq.py
+++ b/python/sglang/srt/layers/quantization/gptq.py
@@ -45,7 +45,10 @@
if TYPE_CHECKING:
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ StandardDispatchOutput,
+ CombineInput,
+ )
from sglang.srt.utils import is_cuda
@@ -838,19 +841,14 @@ def create_weights(
from sglang.srt.layers.linear import set_weight_attrs
from sglang.srt.layers.moe.fused_moe_triton import FusedMoeWeightScaleSupported
- intermediate_size = extra_weight_attrs.pop("intermediate_size")
-
- self.is_k_full = (not self.quant_config.desc_act) or (
- intermediate_size_per_partition == intermediate_size
- )
+ self.is_k_full = (not self.quant_config.desc_act) or layer.moe_tp_size == 1
if self.quant_config.group_size != -1:
scales_size13 = hidden_size // self.quant_config.group_size
- w2_scales_size = (
- intermediate_size
- if self.quant_config.desc_act
- else intermediate_size_per_partition
- )
+ if self.quant_config.desc_act:
+ w2_scales_size = intermediate_size_per_partition
+ else:
+ w2_scales_size = intermediate_size_per_partition * layer.moe_tp_size
scales_size2 = w2_scales_size // self.quant_config.group_size
strategy = FusedMoeWeightScaleSupported.GROUP.value
else:
@@ -1052,17 +1050,26 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
)
replace_parameter(layer, "w2_scales", marlin_w2_scales)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
# Delay the import to avoid circular dependency
assert (
- moe_runner_config.activation == "silu"
+ self.moe_runner_config.activation == "silu"
), "Only SiLU activation is supported."
# The input must currently be float16
@@ -1071,7 +1078,7 @@ def apply(
topk_weights, topk_ids, router_logits = topk_output
- return fused_marlin_moe(
+ output = fused_marlin_moe(
x,
layer.w13_qweight,
layer.w2_qweight,
@@ -1087,3 +1094,4 @@ def apply(
num_bits=self.quant_config.weight_bits,
is_k_full=self.is_k_full,
).to(orig_dtype)
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/marlin_utils.py b/python/sglang/srt/layers/quantization/marlin_utils.py
index d76b900ae9b..e0b398c251e 100644
--- a/python/sglang/srt/layers/quantization/marlin_utils.py
+++ b/python/sglang/srt/layers/quantization/marlin_utils.py
@@ -306,6 +306,13 @@ def marlin_permute_scales(
return s
+def marlin_permute_bias(s: torch.Tensor) -> torch.Tensor:
+ origin_shape = s.shape
+ _, scale_perm_single = get_scale_perms()
+ s = s.reshape((-1, len(scale_perm_single)))[:, scale_perm_single]
+ return s.reshape(*origin_shape).contiguous()
+
+
def marlin_moe_permute_scales(
s: torch.Tensor,
size_k: int,
diff --git a/python/sglang/srt/layers/quantization/marlin_utils_fp8.py b/python/sglang/srt/layers/quantization/marlin_utils_fp8.py
new file mode 100644
index 00000000000..94326d71e54
--- /dev/null
+++ b/python/sglang/srt/layers/quantization/marlin_utils_fp8.py
@@ -0,0 +1,352 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from typing import Optional
+
+import torch
+
+from sglang.srt.layers.quantization.marlin_utils import (
+ USE_FP32_REDUCE_DEFAULT,
+ marlin_make_workspace,
+ marlin_permute_bias,
+ marlin_permute_scales,
+ should_use_atomic_add_reduce,
+)
+from sglang.srt.layers.quantization.utils import get_scalar_types
+from sglang.srt.utils import is_cuda
+
+_is_cuda = is_cuda()
+if _is_cuda:
+ from sgl_kernel import gptq_marlin_gemm, gptq_marlin_repack
+
+ScalarType, scalar_types = get_scalar_types()
+
+logger = logging.getLogger(__name__)
+
+
+def fp8_fused_exponent_bias_into_scales(scales):
+ fp8_exponent = 4
+ if scales.dtype == torch.half:
+ target_exponent = 5
+ elif scales.dtype == torch.bfloat16:
+ target_exponent = 8
+ # exponent_bias_fp16 = 2 ** 4 - 2 ** 3 = 8
+ # exponent_bias_bf16 = 2 ** 7 - 2 ** 3 = 120
+ exponent_bias = 2 ** (target_exponent - 1) - 2 ** (fp8_exponent - 1)
+ s = torch.ones_like(scales) * 2
+ s = s**exponent_bias
+ return scales * s
+
+
+def apply_fp8_marlin_linear(
+ input: torch.Tensor,
+ weight: torch.Tensor,
+ weight_scale: torch.Tensor,
+ workspace: torch.Tensor,
+ size_n: int,
+ size_k: int,
+ bias: Optional[torch.Tensor],
+ use_fp32_reduce: bool = USE_FP32_REDUCE_DEFAULT,
+) -> torch.Tensor:
+ # For GPUs that lack FP8 hardware support, we can leverage the
+ # Marlin kernel for fast weight-only FP8 quantization
+
+ reshaped_x = input.reshape(-1, input.shape[-1])
+ out_shape = input.shape[:-1] + (size_n,)
+
+ use_atomic_add = should_use_atomic_add_reduce(
+ m=reshaped_x.size(0), n=size_n, k=size_k, device=input.device, dtype=input.dtype
+ )
+
+ output = gptq_marlin_gemm(
+ a=reshaped_x,
+ c=None,
+ b_q_weight=weight,
+ b_bias=bias,
+ b_scales=weight_scale,
+ global_scale=None,
+ b_zeros=None,
+ g_idx=None,
+ perm=None,
+ workspace=workspace,
+ b_q_type=scalar_types.float8_e4m3fn,
+ size_m=reshaped_x.size(0),
+ size_n=size_n,
+ size_k=size_k,
+ use_atomic_add=use_atomic_add,
+ use_fp32_reduce=use_fp32_reduce,
+ )
+
+ return output.reshape(out_shape)
+
+
+def prepare_fp8_layer_for_marlin(
+ layer: torch.nn.Module, size_k_first: bool = True
+) -> None:
+ logger.warning_once(
+ "Your GPU does not have native support for FP8 computation but "
+ "FP8 quantization is being used. Weight-only FP8 compression will "
+ "be used leveraging the Marlin kernel. This may degrade "
+ "performance for compute-heavy workloads."
+ )
+
+ part_size_n = layer.output_size_per_partition
+ part_size_k = layer.input_size_per_partition
+ weight_block_size = getattr(layer, "weight_block_size", None)
+
+ if size_k_first:
+ assert layer.weight.shape == (part_size_k, part_size_n)
+ else:
+ assert layer.weight.shape == (part_size_n, part_size_k)
+
+ device = layer.weight.device
+
+ # WORKSPACE
+ layer.workspace = marlin_make_workspace(device)
+
+ # WEIGHT
+ # Repack weights to marlin format
+ perm = torch.empty(0, dtype=torch.int, device=device)
+ qweight = pack_fp8_to_int32(layer.weight, size_k_first)
+ if not size_k_first:
+ qweight = qweight.T.contiguous()
+
+ marlin_qweight = gptq_marlin_repack(
+ b_q_weight=qweight,
+ perm=perm,
+ size_k=part_size_k,
+ size_n=part_size_n,
+ num_bits=8,
+ )
+ layer.weight = torch.nn.Parameter(marlin_qweight, requires_grad=False)
+
+ # WEIGHT SCALES
+ # Permute scales
+ if "weight_scale" in dir(layer):
+ scales = layer.weight_scale.to(layer.orig_dtype)
+ elif "weight_scale_inv" in dir(layer):
+ scales = layer.weight_scale_inv.to(layer.orig_dtype)
+ del layer.weight_scale_inv
+
+ group_size = -1 if weight_block_size is None else weight_block_size[1]
+
+ # marlin kernel only support channel-wise and group-wise quantization
+ # we need to convert the scales
+ if weight_block_size is None:
+ if scales.nelement() == 1:
+ # tensor-wise quantization -> channel-wise quantization
+ # (1, 1) =>(repeat)=> (1, size_n)
+ scales = scales.view(1, 1).repeat_interleave(part_size_n, 1)
+ elif scales.nelement() > 1 and scales.nelement() != part_size_n:
+ assert part_size_n % scales.nelement() == 0
+ s_size = scales.nelement()
+ # tensor-wise quantization (for gate-up proj)
+ # -> channel-wise quantization
+ # (1, s_size) =>(repeat)=> (1, size_n)
+ scales = scales.view(1, s_size)
+ scales = scales.repeat_interleave(part_size_n // s_size, 1)
+ else:
+ # channel-wise quantization
+ # (1, size_n)
+ scales = scales.view(1, part_size_n)
+ else:
+ # block-wise quantization -> group-wise quantization
+ # (size_k // block_size[1], ceil(size_n / block_size[0]))
+ # =>(repeat)=> (size_k // block_size[1], size_n)
+ if not size_k_first:
+ scales = scales.T.contiguous()
+ block_n = weight_block_size[0]
+ scales = scales.repeat_interleave(block_n, 1)
+ # size_n may not divisible by block_size[0]
+ scales = scales[:, :part_size_n]
+
+ marlin_scales = marlin_permute_scales(
+ s=scales, size_k=part_size_k, size_n=part_size_n, group_size=group_size
+ )
+ marlin_scales = fp8_fused_exponent_bias_into_scales(marlin_scales)
+ layer.weight_scale = torch.nn.Parameter(marlin_scales, requires_grad=False)
+
+ if hasattr(layer, "bias") and layer.bias is not None:
+ assert layer.bias.shape == (part_size_n,)
+ bias = marlin_permute_bias(layer.bias)
+ layer.bias = torch.nn.Parameter(bias, requires_grad=False)
+
+
+def prepare_moe_fp8_layer_for_marlin(
+ layer: torch.nn.Module, size_k_first: bool = True
+) -> None:
+ logger.warning_once(
+ "Your GPU does not have native support for FP8 computation but "
+ "FP8 quantization is being used. Weight-only FP8 compression will "
+ "be used leveraging the Marlin kernel. This may degrade "
+ "performance for compute-heavy workloads."
+ )
+
+ e = layer.num_experts
+ k = layer.hidden_size
+ n = layer.intermediate_size_per_partition
+ weight_block_size = getattr(layer, "weight_block_size", None)
+
+ # WORKSPACE
+ device = layer.w13_weight.device
+ layer.workspace = marlin_make_workspace(device, 4)
+ perm = torch.empty(0, dtype=torch.int, device=device)
+
+ # WEIGHT
+ # Repack weights to marlin format
+ for name in ["w13_weight", "w2_weight"]:
+ weight = getattr(layer, name)
+ tensor_list = []
+ if "w13" in name:
+ size_n, size_k = n * 2, k
+ else:
+ size_n, size_k = k, n
+
+ if size_k_first:
+ assert weight.shape == (e, size_k, size_n)
+ else:
+ assert weight.shape == (e, size_n, size_k)
+
+ for i in range(e):
+ qweight = pack_fp8_to_int32(weight[i], size_k_first)
+ if not size_k_first:
+ qweight = qweight.T.contiguous()
+
+ marlin_qweight = gptq_marlin_repack(
+ b_q_weight=qweight, perm=perm, size_k=size_k, size_n=size_n, num_bits=8
+ )
+ tensor_list.append(marlin_qweight)
+
+ weight = torch.cat([x.unsqueeze(0) for x in tensor_list], 0)
+ weight = torch.nn.Parameter(weight, requires_grad=False)
+
+ setattr(layer, name, weight)
+
+ # WEIGHT SCALES
+ # Permute scales
+ group_size = -1 if weight_block_size is None else weight_block_size[1]
+
+ for name in ["w13", "w2"]:
+ if name + "_weight_scale" in dir(layer):
+ new_name = name + "_weight_scale"
+ scales = getattr(layer, new_name).to(layer.orig_dtype)
+ delattr(layer, new_name)
+ elif name + "_weight_scale_inv" in dir(layer):
+ new_name = name + "_weight_scale_inv"
+ scales = getattr(layer, new_name).to(layer.orig_dtype)
+ delattr(layer, new_name)
+
+ tensor_list = []
+ if "w13" in name:
+ size_n, size_k = n * 2, k
+ else:
+ size_n, size_k = k, n
+
+ # marlin kernel only support channel-wise and group-wise quantization
+ # we need to convert the scales
+ if weight_block_size is None:
+ if scales.nelement() == e:
+ # tensor-wise quantization -> channel-wise quantization
+ # (e, 1, 1) =>(repeat)=> (e, 1, size_n)
+ scales = scales.view(e, 1, 1).repeat_interleave(size_n, 2)
+ elif scales.nelement() > e and scales.nelement() != e * size_n:
+ assert (e * size_n) % scales.nelement() == 0
+ s_size = scales.nelement() // e
+ # tensor-wise quantization (for gate-up proj)
+ # -> channel-wise quantization
+ # (e, 1, s_size) =>(repeat)=> (e, 1, size_n)
+ scales = scales.view(e, 1, s_size)
+ scales = scales.repeat_interleave(size_n // s_size, 2)
+ else:
+ # channel-wise quantization
+ # (e, 1, size_n)
+ scales = scales.view(e, 1, size_n)
+ else:
+ # block-wise quantization -> group-wise quantization
+ # (e, size_k // block_size[1], ceil(size_n / block_size[0]))
+ # =>(repeat)=> (e, size_k // block_size[1], size_n)
+ if not size_k_first:
+ scales = scales.permute(0, 2, 1)
+ block_n = weight_block_size[0]
+ scales = scales.repeat_interleave(block_n, 2)
+ # size_n may not divisible by block_size[0]
+ scales = scales[..., :size_n].contiguous()
+
+ for i in range(e):
+ marlin_scales = marlin_permute_scales(
+ s=scales[i], size_k=size_k, size_n=size_n, group_size=group_size
+ )
+ tensor_list.append(marlin_scales)
+
+ scales = torch.cat([x.unsqueeze(0) for x in tensor_list], 0)
+ scales = fp8_fused_exponent_bias_into_scales(scales)
+ scales = torch.nn.Parameter(scales, requires_grad=False)
+
+ setattr(layer, name + "_weight_scale", scales)
+
+ # BIAS
+ # Permute bias
+ for name in ["w13_bias", "w2_bias"]:
+ if not hasattr(layer, name):
+ continue
+ bias = getattr(layer, name).to(layer.orig_dtype)
+
+ tensor_list = []
+ for i in range(e):
+ expert_bias = bias[i]
+
+ tensor_list.append(marlin_permute_bias(expert_bias))
+
+ bias = torch.cat([x.unsqueeze(0) for x in tensor_list], 0)
+ bias = torch.nn.Parameter(bias, requires_grad=False)
+ setattr(layer, name, bias)
+
+
+def pack_fp8_to_int32(
+ fp8_tensor: torch.Tensor, size_k_first: bool = True
+) -> torch.Tensor:
+ """
+ Repack FP8 weights to gptq format (packed int32 elements)
+ """
+ assert fp8_tensor.dtype == torch.float8_e4m3fn
+ assert fp8_tensor.ndim == 2
+
+ fp8_tensor = fp8_tensor.T if size_k_first else fp8_tensor
+ fp8_tensor = fp8_tensor.contiguous()
+ # fp8_tensor is contiguous and have shape (N, K) now
+ # with `.view(torch.int32)`, it become (N, K // 4)
+ int32_tensor = fp8_tensor.view(torch.int32)
+ return int32_tensor.T.contiguous() if size_k_first else int32_tensor
+
+
+def marlin_quant_fp8_torch(weight, group_size):
+ size_n, size_k = weight.shape
+ device = weight.device
+
+ if group_size != -1:
+ scales = weight.view(size_n, -1, group_size).abs().max(-1)[0] / 448
+ repeated_scales = scales.repeat_interleave(group_size, 1)
+ fp8_weight = (weight / repeated_scales).to(torch.float8_e4m3fn)
+ weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
+ else:
+ scales = weight.view(size_n, 1, group_size).abs().max(-1)[0] / 448
+ repeated_scales = scales.repeat_interleave(size_k, 1)
+ fp8_weight = (weight / repeated_scales).to(torch.float8_e4m3fn)
+ weight_ref = fp8_weight.to(weight.dtype) * repeated_scales
+
+ packed_weight = pack_fp8_to_int32(fp8_weight, False).T.contiguous()
+ marlin_qweight = gptq_marlin_repack(
+ b_q_weight=packed_weight,
+ perm=torch.empty(0, dtype=torch.int, device=device),
+ size_k=size_k,
+ size_n=size_n,
+ num_bits=8,
+ )
+
+ marlin_scales = marlin_permute_scales(
+ s=scales.T, size_k=size_k, size_n=size_n, group_size=group_size
+ )
+
+ marlin_scales = fp8_fused_exponent_bias_into_scales(marlin_scales)
+
+ return weight_ref.T, marlin_qweight, marlin_scales
diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py
index 7647ec30b02..89ecd44f55c 100755
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -10,10 +10,14 @@
from sglang.srt.distributed import get_tp_group
from sglang.srt.layers.dp_attention import get_dp_global_num_tokens, get_local_dp_buffer
from sglang.srt.layers.moe import (
+ MoeRunner,
+ MoeRunnerBackend,
+ MoeRunnerConfig,
should_use_flashinfer_cutlass_moe_fp4_allgather,
should_use_flashinfer_trtllm_moe,
)
from sglang.srt.layers.moe.cutlass_moe_params import CutlassMoEParams, CutlassMoEType
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.parameter import ModelWeightParameter, PerTensorScaleParameter
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
@@ -39,8 +43,10 @@
if TYPE_CHECKING:
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
if is_cuda():
from sgl_kernel import scaled_fp4_quant
@@ -111,18 +117,52 @@ def get_config_filenames(cls) -> List[str]:
@classmethod
def from_config(cls, config: Dict[str, Any]) -> ModelOptFp8Config:
- quant_method = cls.get_from_keys(config, ["quantization"]).get("quant_algo")
- kv_cache_quant_method = cls.get_from_keys(config, ["quantization"]).get(
- "kv_cache_quant_algo"
- )
- exclude_modules = cls.get_from_keys(config, ["quantization"]).get(
- "exclude_modules"
- )
+ # Handle two different config formats:
+ # 1. hf_quant_config.json format: {"quantization": {"quant_algo": "FP8", ...}}
+ # 2. config.json quantization_config format: {"quant_algo": "FP8", ...}
+ # In future modelopt will deprecate hf_quant_config.json, and only keep config.json.
+ # For legacy reasons, we keep hf_quant_config.json for now.
+
+ # Initialize variables
+ kv_cache_quant_method = None
+ exclude_modules = None
+
+ # Try flat format first (config.json quantization_config - preferred format)
+ quant_method = config.get("quant_algo")
+ if quant_method is not None:
+ # Flat format (config.json quantization_config)
+ # For kv_cache, check if kv_cache_scheme exists and extract algo
+ kv_cache_scheme = config.get("kv_cache_scheme")
+ if (
+ kv_cache_scheme
+ and kv_cache_scheme.get("type") == "float"
+ and kv_cache_scheme.get("num_bits") == 8
+ ):
+ kv_cache_quant_method = "FP8"
+ # Map 'ignore' field to 'exclude_modules'
+ exclude_modules = config.get("ignore")
+ else:
+ # Fall back to nested format (hf_quant_config.json - legacy format)
+ try:
+ quantization_section = cls.get_from_keys(config, ["quantization"])
+ quant_method = quantization_section.get("quant_algo")
+ kv_cache_quant_method = quantization_section.get("kv_cache_quant_algo")
+ exclude_modules = quantization_section.get("exclude_modules")
+ except ValueError:
+ raise ValueError(
+ "Cannot find 'quant_algo' in the model's quantization config. "
+ "Expected either flat format (config.json) or nested format (hf_quant_config.json)."
+ )
+ if quant_method is None:
+ raise ValueError(
+ "Cannot find 'quant_algo' in the model's quantization config. "
+ )
if "FP8" not in quant_method:
raise ValueError(
- "ModelOpt only supports static FP8 quantization in SGLang. "
- "Check the `hf_quant_config.json` file for your model's configuration."
+ "ModelOptFp8Config only supports static FP8 quantization in SGLang. "
+ "For FP4 quantization, use ModelOptFp4Config. "
+ "Check the quantization config for your model's configuration."
)
return cls(
@@ -288,7 +328,7 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
@@ -304,7 +344,10 @@ def create_weights(
w13_weight = ModelWeightParameter(
data=torch.empty(
- num_experts, 2 * intermediate_size, hidden_size, dtype=weight_dtype
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ hidden_size,
+ dtype=weight_dtype,
),
input_dim=2,
output_dim=1,
@@ -314,7 +357,10 @@ def create_weights(
w2_weight = ModelWeightParameter(
data=torch.empty(
- num_experts, hidden_size, intermediate_size, dtype=weight_dtype
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition,
+ dtype=weight_dtype,
),
input_dim=2,
output_dim=1,
@@ -380,28 +426,28 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
max_w13_scales = layer.w13_weight_scale.max(dim=1).values
# Requantize each expert's weights using the combined scale
- # w13_weight has shape (num_experts, 2 * intermediate_size, hidden_size)
- # where the first intermediate_size rows are w1, the next are w3
- intermediate_size = layer.w13_weight.shape[1] // 2
+ # w13_weight has shape (num_experts, 2 * intermediate_size_per_partition, hidden_size)
+ # where the first intermediate_size_per_partition rows are w1, the next are w3
+ intermediate_size_per_partition = layer.w13_weight.shape[1] // 2
for expert_id in range(layer.w13_weight.shape[0]):
start = 0
for shard_id in range(2): # w1 and w3
# Dequantize using the original scale for this shard
dq_weight = per_tensor_dequantize(
layer.w13_weight[expert_id][
- start : start + intermediate_size, :
+ start : start + intermediate_size_per_partition, :
],
layer.w13_weight_scale[expert_id][shard_id],
)
# Requantize using the combined max scale
(
layer.w13_weight[expert_id][
- start : start + intermediate_size, :
+ start : start + intermediate_size_per_partition, :
],
_,
) = scaled_fp8_quant(dq_weight, max_w13_scales[expert_id])
- start += intermediate_size
+ start += intermediate_size_per_partition
# Update the scale parameter to be per-expert instead of per-shard
layer.w13_weight_scale = Parameter(max_w13_scales, requires_grad=False)
@@ -423,29 +469,31 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w2_input_scale.max(), requires_grad=False
)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
-
- return fused_experts(
- x,
- layer.w13_weight,
- layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
use_fp8_w8a8=True,
- per_channel_quant=False, # ModelOpt uses per-tensor quantization
- w1_scale=layer.w13_weight_scale,
+ per_channel_quant=False,
+ w13_scale=layer.w13_weight_scale,
w2_scale=layer.w2_weight_scale,
- a1_scale=layer.w13_input_scale,
+ a13_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
)
+ return self.runner.run(dispatch_output, quant_info)
+
class ModelOptFp4Config(QuantizationConfig):
"""Config class for FP4."""
@@ -483,24 +531,98 @@ def get_min_capability(cls) -> int:
def get_config_filenames(cls) -> List[str]:
return ["hf_quant_config.json"]
+ @staticmethod
+ def common_group_size(cfg: dict) -> int:
+ """Return the unique group_size across the config; raise if missing/mismatched."""
+ sizes = set()
+
+ # Top-level and 'quantization' block
+ v = cfg.get("group_size")
+ if isinstance(v, int):
+ sizes.add(v)
+ q = cfg.get("quantization")
+ if isinstance(q, dict):
+ v = q.get("group_size")
+ if isinstance(v, int):
+ sizes.add(v)
+
+ # config_groups: accept group-level or nested dicts (e.g., weights/input_activations)
+ for g in (cfg.get("config_groups") or {}).values():
+ if isinstance(g, dict):
+ v = g.get("group_size")
+ if isinstance(v, int):
+ sizes.add(v)
+ for sub in g.values():
+ if isinstance(sub, dict):
+ v = sub.get("group_size")
+ if isinstance(v, int):
+ sizes.add(v)
+
+ if not sizes:
+ raise ValueError("No group_size found in config.")
+ if len(sizes) > 1:
+ raise ValueError(f"Inconsistent group_size values: {sorted(sizes)}")
+ return next(iter(sizes))
+
@classmethod
def from_config(cls, config: Dict[str, Any]) -> ModelOptFp4Config:
- quant_config = cls.get_from_keys(config, ["quantization"])
- quant_method = quant_config["quant_algo"]
+ # Handle two different config formats:
+ # 1. hf_quant_config.json format: {"quantization": {"quant_algo": "NVFP4", ...}}
+ # 2. config.json quantization_config format: {"quant_algo": "NVFP4", ...}
+ # In future modelopt will deprecate hf_quant_config.json, and only keep config.json.
+ # For legacy reasons, we keep hf_quant_config.json for now.
+
+ # Initialize variables
+ kv_cache_quant_algo = None
+ group_size = None
+ exclude_modules = []
+
+ # Try flat format first (config.json quantization_config - preferred format)
+ quant_method = config.get("quant_algo")
+ if quant_method is not None:
+ # Flat format (config.json quantization_config)
+ # Note: FP4 models in config.json format may not have all the detailed fields
+ # that are present in hf_quant_config.json, so we need to handle defaults
+ kv_cache_quant_algo = config.get("kv_cache_quant_algo")
+ if not kv_cache_quant_algo:
+ # For config.json format, derive from kv_cache_scheme if available
+ kv_cache_scheme = config.get("kv_cache_scheme")
+ if (
+ kv_cache_scheme
+ and kv_cache_scheme.get("type") == "float"
+ and kv_cache_scheme.get("num_bits") == 8
+ ):
+ kv_cache_quant_algo = "FP8"
+ else:
+ kv_cache_quant_algo = "auto"
+
+ group_size = ModelOptFp4Config.common_group_size(config)
+ exclude_modules = config.get("ignore", [])
+ else:
+ # Fall back to nested format (hf_quant_config.json - legacy format)
+ try:
+ quant_config = cls.get_from_keys(config, ["quantization"])
+ quant_method = quant_config["quant_algo"]
+ kv_cache_quant_algo = quant_config.get("kv_cache_quant_algo")
+ if not kv_cache_quant_algo:
+ kv_cache_quant_algo = "auto"
+ group_size = ModelOptFp4Config.common_group_size(config)
+ exclude_modules = quant_config.get("exclude_modules", [])
+ except (ValueError, KeyError):
+ raise ValueError(
+ "Cannot find 'quant_algo' in the model's quantization config. "
+ "Expected either flat format (config.json) or nested format (hf_quant_config.json)."
+ )
+
if not quant_method in ["FP8", "NVFP4"]:
raise ValueError(
f"ModelOpt currently only supports: FP8, NVFP4"
" quantizations in sglang. Please check the "
- "`hf_quant_config.json` file for your model's "
- "quant configuration."
+ "quantization config for your model's configuration."
)
is_checkpoint_nvfp4_serialized = "NVFP4" in quant_method
- kv_cache_quant_algo = quant_config["kv_cache_quant_algo"]
- if not kv_cache_quant_algo:
- kv_cache_quant_algo = "auto"
- group_size = quant_config["group_size"]
- exclude_modules = quant_config["exclude_modules"]
- if not (group_size and kv_cache_quant_algo and exclude_modules):
+
+ if not (group_size and kv_cache_quant_algo) or exclude_modules is None:
logger.warning(
f"group_size: {group_size},"
f"kv_cache_quant_algo: {kv_cache_quant_algo},"
@@ -508,8 +630,7 @@ def from_config(cls, config: Dict[str, Any]) -> ModelOptFp4Config:
)
raise ValueError(
"NVFP4 quantization requires group size and "
- "kv_cache_quant_algo specified in "
- "hf_quant_config.json"
+ "kv_cache_quant_algo specified in the quantization config"
)
return cls(
is_checkpoint_nvfp4_serialized,
@@ -521,10 +642,22 @@ def from_config(cls, config: Dict[str, Any]) -> ModelOptFp4Config:
def is_layer_excluded(self, prefix: str, exclude_modules: list):
import regex as re
+ fused_patterns = ["q_a_proj", "q_b_proj", "kv_a_proj_with_mqa", "kv_b_proj"]
+ prefix_split = prefix.split(".")
for pattern in exclude_modules:
regex_str = pattern.replace(".", r"\.").replace("*", r".*")
+ pattern_split = pattern.split(".")
if re.fullmatch(regex_str, prefix):
return True
+ elif (
+ pattern_split[-1] in fused_patterns
+ and pattern_split[-1] in prefix_split[-1]
+ ):
+ # Check if the last part of the excluded pattern is contained in the last part of the prefix
+ # This handles fused modules like fused_qkv_a_proj_with_mqa that contain q_a_proj and kv_a_proj_with_mqa
+ # e.g., model.layers.{i}.self_attn.{fused_weight_name}
+ assert len(prefix_split) == 5 and len(pattern_split) == 5
+ return True
return False
def get_quant_method(
@@ -802,7 +935,6 @@ def create_weights(
data=torch.empty(
layer.num_local_experts,
2 * intermediate_size_per_partition,
- # 2 fp4 items are packed in the input dimension
hidden_size // self.quant_config.group_size,
dtype=weight_scale_dtype,
),
@@ -812,11 +944,15 @@ def create_weights(
)
layer.register_parameter("w13_weight_scale", w13_weight_scale)
+ # Only use `swizzle_blockscale` for shapes, not for real content
+ layer.w13_blockscale_swizzled = Parameter(
+ self.swizzle_blockscale(layer.w13_weight_scale), requires_grad=False
+ )
+
w2_weight_scale = ModelWeightParameter(
data=torch.empty(
layer.num_local_experts,
hidden_size,
- # 2 fp4 items are packed in the input dimension
intermediate_size_per_partition // self.quant_config.group_size,
dtype=weight_scale_dtype,
),
@@ -826,6 +962,10 @@ def create_weights(
)
layer.register_parameter("w2_weight_scale", w2_weight_scale)
+ layer.w2_blockscale_swizzled = Parameter(
+ self.swizzle_blockscale(layer.w2_weight_scale), requires_grad=False
+ )
+
from sglang.srt.layers.moe.fused_moe_triton import FusedMoeWeightScaleSupported
extra_weight_attrs.update(
@@ -1122,27 +1262,22 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w13_weight_scale,
)
- logger.info_once("Applied flashinfer weight processing for both w13 and w2")
-
else:
# CUTLASS processing - handle w13 and w2 separately
# Process w13 weights
w13_blockscale_swizzled = self.swizzle_blockscale(layer.w13_weight_scale)
- layer.w13_blockscale_swizzled = Parameter(
- w13_blockscale_swizzled, requires_grad=False
- )
+ del layer.w13_weight_scale
+ layer.w13_blockscale_swizzled.data.copy_(w13_blockscale_swizzled)
layer.w13_weight = Parameter(layer.w13_weight.data, requires_grad=False)
# Process w2 weights
w2_blockscale_swizzled = self.swizzle_blockscale(layer.w2_weight_scale)
- layer.w2_blockscale_swizzled = Parameter(
- w2_blockscale_swizzled, requires_grad=False
- )
+ del layer.w2_weight_scale
+ layer.w2_blockscale_swizzled.data.copy_(w2_blockscale_swizzled)
layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)
# Both flashinfer cutlass and regular cutlass use same processing for w2
- logger.info_once("Applied weight processing for both w13 and w2")
# Set up CUTLASS MoE parameters
device = layer.w13_weight.device
@@ -1159,21 +1294,32 @@ def load_up_proj_weight_first(self) -> bool:
# FlashInfer CUTLASS kernel assumes [Up, Gate] Proj as W13
return self.enable_flashinfer_cutlass_moe
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: FusedMoE,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
assert (
- moe_runner_config.activation == "silu"
+ self.moe_runner_config.activation == "silu"
), "Only SiLU activation is supported."
+ moe_runner_config = self.moe_runner_config
+
# Check if this is a FlashInferFP4MoE layer that should handle its own forward
if hasattr(layer, "gemm1_weights_fp4_shuffled"):
# This layer was processed with flashinfer TRTLLM - delegate to its own forward
- return layer.forward(x, topk_output)
+ return StandardCombineInput(hidden_states=layer.forward(x, topk_output))
if self.enable_flashinfer_cutlass_moe:
assert (
@@ -1226,14 +1372,12 @@ def apply(
tp_rank=layer.moe_tp_rank,
tune_max_num_tokens=next_power_of_2(x.shape[0]),
)[0]
- if moe_runner_config.routed_scaling_factor is not None:
- output *= moe_runner_config.routed_scaling_factor
if should_use_flashinfer_cutlass_moe_fp4_allgather():
output, global_output = get_local_dp_buffer(), output
get_tp_group().reduce_scatterv(
global_output, output=output, sizes=get_dp_global_num_tokens()
)
- return output
+ return StandardCombineInput(hidden_states=output)
from sglang.srt.layers.moe.cutlass_moe import cutlass_moe_fp4
@@ -1253,6 +1397,6 @@ def apply(
params=layer.cutlass_moe_params,
apply_router_weight_on_input=moe_runner_config.apply_router_weight_on_input,
).to(x.dtype)
- if moe_runner_config.routed_scaling_factor is not None:
- output *= moe_runner_config.routed_scaling_factor
- return output
+ # Scale by routed_scaling_factor is fused into select_experts.
+
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/moe_wna16.py b/python/sglang/srt/layers/quantization/moe_wna16.py
index 7f2c78cbbd9..531e4271f1b 100644
--- a/python/sglang/srt/layers/quantization/moe_wna16.py
+++ b/python/sglang/srt/layers/quantization/moe_wna16.py
@@ -9,6 +9,8 @@
from sglang.srt.distributed import get_tensor_model_parallel_rank
from sglang.srt.distributed.parallel_state import get_tp_group
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.quantization.awq import AWQConfig
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
@@ -22,8 +24,10 @@
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
def get_weight_perm(num_bits: int):
@@ -349,37 +353,36 @@ def create_weights(
layer.register_parameter(key, param)
set_weight_attrs(param, extra_weight_attrs)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- # avoid circular import
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
-
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
assert (
- moe_runner_config.activation == "silu"
+ self.moe_runner_config.activation == "silu"
), "Only SiLU activation is supported."
weight_bits = self.quant_config.weight_bits
has_zp = self.quant_config.has_zp
- return fused_experts(
- x,
- layer.w13_qweight,
- layer.w2_qweight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_qweight,
+ w2_weight=layer.w2_qweight,
use_int4_w4a16=weight_bits == 4,
use_int8_w8a16=weight_bits == 8,
- w1_scale=layer.w13_scales,
+ w13_scale=layer.w13_scales,
w2_scale=layer.w2_scales,
- w1_zp=layer.w13_qzeros if has_zp else None,
+ w13_zp=layer.w13_qzeros if has_zp else None,
w2_zp=layer.w2_qzeros if has_zp else None,
block_shape=[0, layer.group_size],
)
+ return self.runner.run(dispatch_output, quant_info)
@staticmethod
def get_weight_loader(layer, weight_loader):
diff --git a/python/sglang/srt/layers/quantization/mxfp4.py b/python/sglang/srt/layers/quantization/mxfp4.py
index 4cb28d4219a..0d98d00d63b 100644
--- a/python/sglang/srt/layers/quantization/mxfp4.py
+++ b/python/sglang/srt/layers/quantization/mxfp4.py
@@ -22,6 +22,8 @@
import torch
from torch.nn.parameter import Parameter
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.moe.utils import get_moe_runner_backend
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
@@ -29,13 +31,13 @@
QuantizeMethodBase,
)
from sglang.srt.layers.quantization.utils import is_layer_skipped
-from sglang.srt.layers.utils import is_sm100_supported
+from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.utils import (
direct_register_custom_op,
- get_bool_env_var,
is_cuda,
is_flashinfer_available,
is_hip,
+ is_sm100_supported,
is_triton_kernels_available,
log_info_on_rank0,
mxfp_supported,
@@ -59,17 +61,24 @@
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
_is_hip = is_hip()
if _is_hip:
# import aiter
- from aiter import ActivationType, QuantType, dtypes
- from aiter.fused_moe import fused_moe
- from aiter.ops.triton.quant import dynamic_mxfp4_quant
- from aiter.utility.fp4_utils import e8m0_shuffle
+ try:
+ from aiter import ActivationType, QuantType, dtypes
+ from aiter.fused_moe import fused_moe
+ from aiter.ops.triton.quant import dynamic_mxfp4_quant
+ from aiter.utility.fp4_utils import e8m0_shuffle
+ except ImportError as err:
+ ActivationType = QuantType = dtypes = fused_moe = dynamic_mxfp4_quant = (
+ e8m0_shuffle
+ ) = err
def _swizzle_mxfp4(quant_tensor, scale, num_warps):
@@ -145,27 +154,21 @@ def _quant_dequant_mxfp4_fake(
return torch.empty_like(x)
-try:
- direct_register_custom_op(
- op_name="dequant_mxfp4",
- op_func=_dequant_mxfp4,
- mutates_args=[],
- fake_impl=_dequant_mxfp4_fake,
- )
- dequant_mxfp4 = torch.ops.sglang.dequant_mxfp4
-except AttributeError as error:
- raise error
-
-try:
- direct_register_custom_op(
- op_name="quant_dequant_mxfp4",
- op_func=_quant_dequant_mxfp4,
- mutates_args=[],
- fake_impl=_quant_dequant_mxfp4_fake,
- )
- quant_dequant_mxfp4 = torch.ops.sglang.quant_dequant_mxfp4
-except AttributeError as error:
- raise error
+direct_register_custom_op(
+ op_name="dequant_mxfp4",
+ op_func=_dequant_mxfp4,
+ mutates_args=[],
+ fake_impl=_dequant_mxfp4_fake,
+)
+dequant_mxfp4 = torch.ops.sglang.dequant_mxfp4
+
+direct_register_custom_op(
+ op_name="quant_dequant_mxfp4",
+ op_func=_quant_dequant_mxfp4,
+ mutates_args=[],
+ fake_impl=_quant_dequant_mxfp4_fake,
+)
+quant_dequant_mxfp4 = torch.ops.sglang.quant_dequant_mxfp4
class Mxfp4Config(QuantizationConfig):
@@ -262,6 +265,9 @@ def __init__(
self.use_triton_kernels = get_moe_runner_backend().is_triton_kernel()
self.with_bias = False
self.use_flashinfer = get_moe_runner_backend().is_flashinfer_mxfp4()
+ self.flashinfer_mxfp4_moe_precision = global_server_args_dict[
+ "flashinfer_mxfp4_moe_precision"
+ ]
self.triton_kernel_moe_forward = None
self.triton_kernel_moe_with_bias_forward = None
@@ -281,7 +287,7 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
with_bias: bool = False,
**extra_weight_attrs,
@@ -294,19 +300,26 @@ def create_weights(
# pad the intermediate size to be a multiple of 2 * mxfp4_block
# for to hold non-uniform sharded tensor as well as swizzling
- intermediate_size_per_partition_after_pad = intermediate_size
+ intermediate_size_per_partition_after_pad = intermediate_size_per_partition
if _is_sm100_supported:
if self.use_flashinfer:
intermediate_size_per_partition_after_pad = round_up(
- intermediate_size, 256
+ intermediate_size_per_partition, 256
)
hidden_size = round_up(hidden_size, 256)
else:
intermediate_size_per_partition_after_pad = round_up(
- intermediate_size, 64
+ intermediate_size_per_partition, 64
)
+ elif has_triton_kernels:
+ # TODO: this is a hack to make
+ # intermediate_size_per_partition_after_pad the same as the
+ # per_rank_intermediate_size during weight loading
+ intermediate_size_per_partition_after_pad = round_up(
+ intermediate_size_per_partition, mxfp4_block
+ )
- self.intermediate_size = intermediate_size_per_partition_after_pad
+ self.intermediate_size_per_partition = intermediate_size_per_partition_after_pad
self.hidden_size = hidden_size
# Fused gate_up_proj (column parallel)
@@ -401,31 +414,35 @@ def process_weights_after_loading(self, layer):
assert (
layer.w13_weight.dim() == 3
and layer.w13_weight.shape[0] == self.num_experts
- and layer.w13_weight.shape[1] == self.intermediate_size * 2
+ and layer.w13_weight.shape[1]
+ == self.intermediate_size_per_partition * 2
and layer.w13_weight.shape[2] == self.hidden_size // 2
)
assert (
layer.w13_weight_scale.dim() == 3
and layer.w13_weight_scale.shape[0] == self.num_experts
- and layer.w13_weight_scale.shape[1] == self.intermediate_size * 2
+ and layer.w13_weight_scale.shape[1]
+ == self.intermediate_size_per_partition * 2
and layer.w13_weight_scale.shape[2] == self.hidden_size // sf_block_size
)
assert (
layer.w2_weight.dim() == 3
and layer.w2_weight.shape[0] == self.num_experts
and layer.w2_weight.shape[1] == self.hidden_size
- and layer.w2_weight.shape[2] == self.intermediate_size // 2
+ and layer.w2_weight.shape[2]
+ == self.intermediate_size_per_partition // 2
)
assert (
layer.w2_weight_scale.dim() == 3
and layer.w2_weight_scale.shape[1] == self.hidden_size
and layer.w2_weight_scale.shape[2]
- == self.intermediate_size // sf_block_size
+ == self.intermediate_size_per_partition // sf_block_size
)
assert (
layer.w13_weight_bias.dim() == 2
and layer.w13_weight_bias.shape[0] == self.num_experts
- and layer.w13_weight_bias.shape[1] == self.intermediate_size * 2
+ and layer.w13_weight_bias.shape[1]
+ == self.intermediate_size_per_partition * 2
)
assert (
layer.w2_weight_bias.dim() == 2
@@ -502,7 +519,7 @@ def swap_every_two_rows(x, axis=-1):
torch.stack(gemm1_scales_mxfp4_shuffled)
.reshape(
self.num_experts,
- 2 * self.intermediate_size,
+ 2 * self.intermediate_size_per_partition,
self.hidden_size // sf_block_size,
)
.view(torch.float8_e4m3fn)
@@ -514,7 +531,7 @@ def swap_every_two_rows(x, axis=-1):
.reshape(
self.num_experts,
self.hidden_size,
- self.intermediate_size // sf_block_size,
+ self.intermediate_size_per_partition // sf_block_size,
)
.view(torch.float8_e4m3fn)
)
@@ -604,22 +621,50 @@ def _get_tile_tokens_dim(self, x: torch.Tensor, top_k: int):
return tile_tokens_dim
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
from sglang.srt.layers.moe.topk import TopKOutputChecker
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
+ moe_runner_config = self.moe_runner_config
+
if self.use_flashinfer:
- # Based on profiling results, we need to quantize x to mxfp8 here to achieve better performance
- x_quant, x_scale = mxfp8_quantize(
- x, False, alignment=self.hidden_size
- ) # to mxfp8
- x_scale = x_scale.view(torch.float8_e4m3fn).reshape(-1)
+ # When bf16 mode is enabled, we don't need to quantize the input,
+ # TRT-LLM automatically handles quantization in the kernel implementation and pipelines it with GEMM operations,
+ # which can theoretically improve performance
+ if self.flashinfer_mxfp4_moe_precision == "bf16":
+ assert x.dtype == torch.bfloat16
+ x_quant = x
+ x_scale = None
+
+ # May be fused later if this code branch is frequently needed
+ origin_hidden_states_dim = x_quant.shape[-1]
+ if self.hidden_size != origin_hidden_states_dim:
+ x_quant = torch.nn.functional.pad(
+ x_quant,
+ (0, self.hidden_size - origin_hidden_states_dim),
+ mode="constant",
+ value=0.0,
+ )
+ elif self.flashinfer_mxfp4_moe_precision == "default":
+ x_quant, x_scale = mxfp8_quantize(x, False, alignment=self.hidden_size)
+ x_scale = x_scale.view(torch.float8_e4m3fn).reshape(-1)
+ else:
+ raise NotImplementedError
+
assert x_quant.shape[-1] == self.hidden_size
assert TopKOutputChecker.format_is_bypassed(topk_output)
@@ -647,7 +692,7 @@ def apply(
top_k,
None, # n_group # TODO: support n_group
None, # topk_group # TODO: support topk_group
- self.intermediate_size, # padded to multiple of 256
+ self.intermediate_size_per_partition, # padded to multiple of 256
layer.moe_ep_rank * layer.num_local_experts, # local_expert_offset
layer.num_local_experts, # local num experts
None,
@@ -655,14 +700,14 @@ def apply(
1, # routing_method_type, renormalize
True, # do finalize
)[0]
- return trtllm_gen_output
+ return StandardCombineInput(hidden_states=trtllm_gen_output)
if self.use_triton_kernels:
assert (
layer.moe_ep_size == 1
), "Expert parallel is not supported when using triton kernels"
if self.with_bias:
- return self.triton_kernel_moe_with_bias_forward(
+ output = self.triton_kernel_moe_with_bias_forward(
hidden_states=x,
w1=self.w13_weight_triton_tensor,
w1_pcg=self.w13_precision_config,
@@ -674,25 +719,22 @@ def apply(
moe_runner_config=moe_runner_config,
)
else:
- return self.triton_kernel_moe_forward(
+ output = self.triton_kernel_moe_forward(
hidden_states=x,
w1=layer.w13_weight,
w2=layer.w2_weight,
topk_output=topk_output,
moe_runner_config=moe_runner_config,
)
+ return StandardCombineInput(hidden_states=output)
else:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
-
- return fused_experts(
- hidden_states=x,
- w1=layer.w13_weight,
- w2=layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
- b1=layer.w13_weight_bias,
- b2=layer.w2_weight_bias,
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
+ w13_weight_bias=layer.w13_weight_bias,
+ w2_weight_bias=layer.w2_weight_bias,
)
+ return self.runner.run(dispatch_output, quant_info)
class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
@@ -771,7 +813,7 @@ def mxfp4_quantize(self, w):
return w, mx_scales
- def process_weights_after_loading(self, layer: Module) -> None:
+ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
w13, w13_mx_scales = self.mxfp4_quantize(layer.w13_weight.data)
w2, w2_mx_scales = self.mxfp4_quantize(layer.w2_weight.data)
@@ -781,16 +823,27 @@ def process_weights_after_loading(self, layer: Module) -> None:
layer.w2_weight = torch.nn.Parameter(w2, requires_grad=False)
layer.w2_weight_scale = torch.nn.Parameter(w2_mx_scales, requires_grad=False)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- topk_weights, topk_ids, _ = topk_output
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
- return fused_moe(
+ topk_weights, topk_ids, _ = topk_output
+ if _is_hip:
+ topk_weights = topk_weights.to(
+ torch.float32
+ ) # aiter's moe_sorting requires topk_weights to be FP32
+ output = fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
@@ -801,8 +854,9 @@ def apply(
w2_scale=layer.w2_weight_scale,
activation=(
ActivationType.Silu
- if moe_runner_config.activation == "silu"
+ if self.moe_runner_config.activation == "silu"
else ActivationType.Gelu
),
doweight_stage1=False,
)
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/mxfp4_tensor.py b/python/sglang/srt/layers/quantization/mxfp4_tensor.py
index e7b9a83467d..76cb92c544f 100644
--- a/python/sglang/srt/layers/quantization/mxfp4_tensor.py
+++ b/python/sglang/srt/layers/quantization/mxfp4_tensor.py
@@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from typing import Optional
+
import torch
@@ -24,7 +26,7 @@ class MXFP4QuantizeUtil:
E2M1_bounds = torch.tensor([0.25, 0.75, 1.25, 1.75, 2.5, 3.5, 5])
@classmethod
- def quantize(cls, input: torch.Tensor, block_size: int | None) -> tuple:
+ def quantize(cls, input: torch.Tensor, block_size: Optional[int]) -> tuple:
"""Converting a tensor to a quantized format based on MXFP4 quantization. Only E4M3 is supported.
Args:
input (torch.Tensor): The input tensor to be quantized.
diff --git a/python/sglang/srt/layers/quantization/quark/quark_moe.py b/python/sglang/srt/layers/quantization/quark/quark_moe.py
index 194fa414d76..f6e750a2cbb 100644
--- a/python/sglang/srt/layers/quantization/quark/quark_moe.py
+++ b/python/sglang/srt/layers/quantization/quark/quark_moe.py
@@ -10,8 +10,17 @@
from aiter.fused_moe import fused_moe
from aiter.utility.fp4_utils import e8m0_shuffle
+from sglang.srt.layers.moe import MoeRunnerConfig
+from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
from sglang.srt.utils import get_bool_env_var, mxfp_supported, set_weight_attrs
+if TYPE_CHECKING:
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
+ from sglang.srt.layers.quantization.quark.quark import QuarkConfig
+
logger = logging.getLogger(__name__)
__all__ = ["QuarkMoEMethod", "QuarkW4A4MXFp4MoEMethod"]
@@ -19,31 +28,17 @@
OCP_MX_BLOCK_SIZE = 32
if TYPE_CHECKING:
- from sglang.srt.layers.moe.topk import TopKOutput
-
-
-class QuarkMoEMethod:
- def __new__(cls, *args, **kwargs):
- from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
-
- if not hasattr(cls, "_initialized"):
- original_init = cls.__init__
- new_cls = type(
- cls.__name__,
- (FusedMoEMethodBase,),
- {
- "__init__": original_init,
- **{k: v for k, v in cls.__dict__.items() if k != "__dict__"},
- },
- )
- obj = super(new_cls, new_cls).__new__(new_cls)
- obj.__init__(*args, **kwargs)
- return obj
- return super().__new__(cls)
+ from sglang.srt.layers.quantization import QuarkConfig
+
+
+class QuarkMoEMethod(FusedMoEMethodBase):
+
+ def __init__(self, quant_config: QuarkConfig):
+ self.quant_config = quant_config
@staticmethod
def get_moe_method(
- quant_config: "QuarkConfig", # type: ignore # noqa E501 # noqa F821
+ quant_config: QuarkConfig, # type: ignore # noqa E501 # noqa F821
module: torch.nn.Module,
layer_name: str,
) -> "QuarkMoEMethod":
@@ -170,16 +165,25 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
# layer.w2_weight_scale = torch.nn.Parameter(w2_weight_scale, requires_grad=False)
layer.w2_weight_scale.data = w2_weight_scale.view(s0, s1, -1)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+ moe_runner_config = self.moe_runner_config
topk_weights, topk_ids, _ = topk_output
- return fused_moe(
+ output = fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
@@ -195,3 +199,4 @@ def apply(
),
doweight_stage1=False,
)
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py b/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
index e5fc22797d4..a0787baaf0f 100644
--- a/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
+++ b/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
@@ -8,6 +8,7 @@
from aiter.ops.gemm_op_a4w4 import gemm_a4w4
from aiter.ops.shuffle import shuffle_weight
from aiter.ops.triton.gemm_afp4wfp4 import gemm_afp4wfp4
+from aiter.ops.triton.gemm_afp4wfp4_pre_quant_atomic import gemm_afp4wfp4_pre_quant
from aiter.ops.triton.quant import dynamic_mxfp4_quant
from aiter.utility import dtypes
from aiter.utility.fp4_utils import e8m0_shuffle
@@ -38,15 +39,6 @@ def get_min_capability(cls) -> int:
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
return
- # for aiter implement
- # wshuffle = shuffle_weight(layer.weight.data, layout=(16, 16))
- # w_scales_shuffle = e8m0_shuffle(layer.weight_scale.data).view(dtypes.fp8_e8m0)
-
- # layer.weight = torch.nn.Parameter(wshuffle,
- # requires_grad=False)
- # layer.weight_scale = torch.nn.Parameter(w_scales_shuffle,
- # requires_grad=False)
-
def create_weights(
self,
layer: torch.nn.Module,
@@ -93,26 +85,53 @@ def apply_weights(
x: torch.Tensor,
bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
-
- out_dtype = x.dtype
- # M = x.shape[0]
- # N = layer.weight.shape[0]
-
- # quant_func = aiter.get_triton_quant(aiter.QuantType.per_1x32)
- # x, x_scales_shuffle = quant_func(x, shuffle=True)
-
- # y = torch.zeros((M + 255) // 256 * 256, N, device=x.device, dtype=self.out_dtype)
-
- # out = gemm_a4w4(x, layer.weight.data, x_scales_shuffle, layer.weight_scale.data, y, bias=bias)
-
- # return out[:M]
-
- # triton implement
- x_q, x_s = dynamic_mxfp4_quant(x)
- y = torch.empty(
- x_q.shape[0], layer.weight.shape[0], device=x_q.device, dtype=out_dtype
+ # This path does not have support for bias currently
+ assert bias is None, "bias is not supported"
+
+ three_d = False
+ x_s = None
+ y = None
+ if isinstance(x, tuple):
+ assert len(x) in [
+ 2,
+ 3,
+ ], "For tuple input, only (x, x_s) or (x, x_s, y) formats are accepted"
+ if len(x) == 2:
+ x, x_s = x
+ elif len(x) == 3:
+ x, x_s, y = x
+
+ use_fused_quant_gemm = (
+ x_s is None and y is not None and layer.weight.shape[0] == y.shape[1]
)
- out = gemm_afp4wfp4(x_q, layer.weight, x_s, layer.weight_scale, out_dtype, y)
-
- return out
+ if x.dim() == 3:
+ three_d = True
+ x = x.view(-1, x.shape[-1])
+ output_shape = [*x.shape[:-1], layer.weight.shape[0]]
+
+ # use_fused_quant_gemm = true, x_q is a bf16/fp16 num
+ # x_s is not None = true, x_q is uint8 num
+ if use_fused_quant_gemm or x_s is not None:
+ x_q = x
+ else:
+ x_q, x_s = dynamic_mxfp4_quant(x)
+
+ if y is None:
+ y = torch.empty(
+ x_q.shape[0],
+ layer.weight.shape[0],
+ device=x_q.device,
+ dtype=self.out_dtype,
+ )
+
+ if use_fused_quant_gemm:
+ gemm_afp4wfp4_pre_quant(x_q, layer.weight, layer.weight_scale, y.dtype, y)
+ y = y.to(x.dtype)
+ else:
+ gemm_afp4wfp4(x_q, layer.weight, x_s, layer.weight_scale, self.out_dtype, y)
+
+ if three_d:
+ return y.view(*output_shape)
+
+ return y
diff --git a/python/sglang/srt/layers/quantization/quark/utils.py b/python/sglang/srt/layers/quantization/quark/utils.py
index 5ea91b5d890..eacbf3ba915 100644
--- a/python/sglang/srt/layers/quantization/quark/utils.py
+++ b/python/sglang/srt/layers/quantization/quark/utils.py
@@ -5,6 +5,10 @@
from types import MappingProxyType
from typing import Any, Optional
+import torch
+from aiter.ops.triton.quant import dynamic_mxfp4_quant
+from torch import nn
+
def deep_compare(dict1: Any, dict2: Any) -> bool:
if type(dict1) is not type(dict2):
@@ -105,3 +109,96 @@ def _is_equal_or_regex_match(
elif target == value:
return True
return False
+
+
+# utility for tensor dims > 2 cases
+def b_dynamic_mxfp4_quant(x):
+ h, b, d = x.shape
+ x, x_scales = dynamic_mxfp4_quant(x.reshape(-1, d))
+ return x.view(h, b, d // 2), x_scales.view(h, b, d // 32)
+
+
+def mxfp4_to_f32(x, is_threed):
+ # 2 because we pack fp4 in uint8.
+ x = x.repeat_interleave(2, dim=-1)
+ if is_threed:
+ x[..., ::2] = x[..., ::2] & 0xF
+ x[..., 1::2] = x[..., 1::2] >> 4
+ else:
+ x[:, ::2] = x[:, ::2] & 0xF
+ x[:, 1::2] = x[:, 1::2] >> 4
+
+ mxfp4_list = [
+ 0.0,
+ 0.5,
+ 1.0,
+ 1.5,
+ 2.0,
+ 3.0,
+ 4.0,
+ 6.0,
+ -0.0,
+ -0.5,
+ -1.0,
+ -1.5,
+ -2.0,
+ -3.0,
+ -4.0,
+ -6.0,
+ ]
+ mxfp4_in_f32 = torch.tensor(mxfp4_list, dtype=torch.float32, device="cuda")
+ return mxfp4_in_f32[x.long()]
+
+
+def e8m0_to_f32(x):
+ # Convert the input tensor `x` (assumed to be in e8m0 format) to float32.
+ # e8m0 is a custom 8-bit floating point format with 8 bits for exponent, 0 for mantissa.
+ # This means the value is essentially 2^(exponent - 127), similar to how IEEE-754 stores floats.
+
+ # Convert x to float32 for computation, and compute the power of 2 by subtracting the bias (127).
+ x_f32 = 2 ** ((x.to(torch.float32)) - 127)
+
+ # If the exponent value was 255 (i.e., 2^(128)), this is a special case usually used to represent NaN or Inf.
+ # Since this custom format has no mantissa, treat 2^128 as NaN.
+ x_f32[x_f32 == 128] = float("nan")
+ return x_f32
+
+
+def quark_post_load_weights(self_attn: nn.Module, w: torch.Tensor, quant_format: str):
+ if "mxfp4" in quant_format:
+ # when dtype is bf16, the processing flow is to dynamic quantize bf16 tensor to uint8 tensor
+ # do w_kc (bf16) first to get the w_kc(uint8) w_s_kc(uint8)
+ # and w_vc repeating the same procedure of w_kc to get w_vc(uint8) w_s_vc(uint8)
+ if w.dtype == torch.bfloat16:
+ w_kc, w_vc = w.unflatten(
+ 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
+ ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
+ w_kc, w_s_kc = b_dynamic_mxfp4_quant(w_kc.transpose(-2, -1))
+ w_kc = w_kc.transpose(-2, -1)
+ w_s_kc = w_s_kc.transpose(-2, -1)
+ w_vc, w_s_vc = b_dynamic_mxfp4_quant(w_vc)
+ w_s_kc = w_s_kc.transpose(1, 2).contiguous().transpose(1, 2)
+ w_s_vc = w_s_vc.contiguous().transpose(1, 2)
+ elif w.dtype == torch.uint8: # static quant for mxfp4
+ # when dtype is uint8, it means the w has been quantized to mxfp4 format
+ # but we must separate it to w_kc and w_vc.
+ # The quantized tensor size is only half of original tensor size
+ # and the scaling factor is 1/32, the transpose behavior will be not correct
+ # need to upcast it to fp32 to separate w to w_kc and w_vc
+ # to ensure the following transpose behavior is correct
+ # and then do mxfp4 quant again
+ w = mxfp4_to_f32(w, True).to(torch.bfloat16)
+ w_scales = self_attn.kv_b_proj.weight_scale.repeat_interleave(32, dim=-1)
+ w_scales = e8m0_to_f32(w_scales).to(torch.bfloat16)
+ w = w * w_scales
+ w_kc, w_vc = w.unflatten(
+ 0, (-1, (self_attn.qk_nope_head_dim + self_attn.v_head_dim))
+ ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
+ w_kc, w_s_kc = b_dynamic_mxfp4_quant(w_kc.transpose(-2, -1))
+ w_kc = w_kc.transpose(-2, -1)
+ w_s_kc = w_s_kc.transpose(-2, -1)
+ w_vc, w_s_vc = b_dynamic_mxfp4_quant(w_vc)
+ w_s_kc = w_s_kc.transpose(1, 2).contiguous().transpose(1, 2)
+ w_s_vc = w_s_vc.contiguous().transpose(1, 2)
+
+ return w_kc, w_s_kc, w_vc, w_s_vc
diff --git a/python/sglang/srt/layers/quantization/rocm_mxfp4_utils.py b/python/sglang/srt/layers/quantization/rocm_mxfp4_utils.py
new file mode 100644
index 00000000000..4659f76bd87
--- /dev/null
+++ b/python/sglang/srt/layers/quantization/rocm_mxfp4_utils.py
@@ -0,0 +1,13 @@
+from aiter.ops.triton.batched_gemm_afp4wfp4_pre_quant import (
+ batched_gemm_afp4wfp4_pre_quant,
+)
+from aiter.ops.triton.fused_mxfp4_quant import (
+ fused_flatten_mxfp4_quant,
+ fused_rms_mxfp4_quant,
+)
+
+__all__ = [
+ "fused_rms_mxfp4_quant",
+ "fused_flatten_mxfp4_quant",
+ "batched_gemm_afp4wfp4_pre_quant",
+]
diff --git a/python/sglang/srt/layers/quantization/unquant.py b/python/sglang/srt/layers/quantization/unquant.py
index 67d3ce3275a..495beb00900 100644
--- a/python/sglang/srt/layers/quantization/unquant.py
+++ b/python/sglang/srt/layers/quantization/unquant.py
@@ -9,6 +9,8 @@
from sglang.srt.custom_op import CustomOp
from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
LinearMethodBase,
@@ -24,8 +26,10 @@
)
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
has_triton_kernels = importlib.util.find_spec("triton_kernels") is not None
@@ -116,9 +120,15 @@ def apply(
) -> torch.Tensor:
if use_intel_amx_backend(layer):
- return torch.ops.sgl_kernel.weight_packed_linear(
+ x_shapes = x.shape
+ if len(x_shapes) == 3:
+ x = x.view(-1, x.shape[-1])
+ output = torch.ops.sgl_kernel.weight_packed_linear(
x, layer.weight, bias, True # is_vnni
)
+ if len(x_shapes) == 3:
+ output = output.view(x_shapes[0], x_shapes[1], -1)
+ return output
return F.linear(x, layer.weight, bias)
@@ -149,7 +159,7 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
with_bias: bool = False,
**extra_weight_attrs,
@@ -157,7 +167,7 @@ def create_weights(
self.with_bias = with_bias
# Fused gate_up_proj (column parallel)
- w13_weight_n, w13_weight_k = 2 * intermediate_size, hidden_size
+ w13_weight_n, w13_weight_k = 2 * intermediate_size_per_partition, hidden_size
if self.use_triton_kernels:
w13_weight_n, w13_weight_k = w13_weight_k, w13_weight_n
w13_weight = torch.nn.Parameter(
@@ -169,7 +179,11 @@ def create_weights(
if self.with_bias:
w13_weight_bias = torch.nn.Parameter(
- torch.empty(num_experts, 2 * intermediate_size, dtype=torch.float32),
+ torch.empty(
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ dtype=torch.float32,
+ ),
requires_grad=False,
)
layer.register_parameter("w13_weight_bias", w13_weight_bias)
@@ -178,7 +192,7 @@ def create_weights(
# down_proj (row parallel)
w2_weight_n, w2_weight_k = (
hidden_size,
- intermediate_size,
+ intermediate_size_per_partition,
)
if self.use_triton_kernels:
w2_weight_n, w2_weight_k = w2_weight_k, w2_weight_n
@@ -216,33 +230,40 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
return
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
return self.forward(
- x=x,
layer=layer,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ dispatch_output=dispatch_output,
)
def forward_cuda(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
+ moe_runner_config = self.moe_runner_config
if self.use_triton_kernels:
if self.with_bias:
assert self.triton_kernel_moe_with_bias_forward is not None
- return self.triton_kernel_moe_with_bias_forward(
+ output = self.triton_kernel_moe_with_bias_forward(
hidden_states=x,
w1=layer.w13_weight,
w2=layer.w2_weight,
@@ -255,13 +276,14 @@ def forward_cuda(
)
else:
assert self.triton_kernel_moe_forward is not None
- return self.triton_kernel_moe_forward(
+ output = self.triton_kernel_moe_forward(
hidden_states=x,
w1=layer.w13_weight,
w2=layer.w2_weight,
topk_output=topk_output,
moe_runner_config=moe_runner_config,
)
+ return StandardCombineInput(hidden_states=output)
else:
if _use_aiter:
assert not moe_runner_config.no_combine, "unsupported"
@@ -278,7 +300,7 @@ def forward_cuda(
topk_weights = torch.ones_like(
topk_weights, dtype=torch.float32
) # topk_weights must be FP32 (float32)
- return fused_moe(
+ output = fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
@@ -290,28 +312,30 @@ def forward_cuda(
else ActivationType.Gelu
),
)
+ return StandardCombineInput(hidden_states=output)
else:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
- fused_experts,
- )
- return fused_experts(
- hidden_states=x,
- w1=layer.w13_weight,
- w2=layer.w2_weight,
- b1=getattr(layer, "w13_weight_bias", None),
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
+ b13=getattr(layer, "w13_weight_bias", None),
b2=getattr(layer, "w2_weight_bias", None),
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
)
+ return self.runner.run(dispatch_output, quant_info)
def forward_cpu(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
+
+ moe_runner_config = self.moe_runner_config
+
assert (
moe_runner_config.activation == "silu"
), f"activation = {moe_runner_config.activation} is not supported."
@@ -326,7 +350,7 @@ def forward_cpu(
x, topk_weights = apply_topk_weights_cpu(
moe_runner_config.apply_router_weight_on_input, topk_weights, x
)
- return torch.ops.sgl_kernel.fused_experts_cpu(
+ output = torch.ops.sgl_kernel.fused_experts_cpu(
x,
layer.w13_weight,
layer.w2_weight,
@@ -342,33 +366,103 @@ def forward_cpu(
None, # a2_scale
True, # is_vnni
)
+ return StandardCombineInput(hidden_states=output)
else:
from sglang.srt.layers.moe.fused_moe_native import moe_forward_native
- return moe_forward_native(
+ output = moe_forward_native(
layer,
x,
topk_output,
moe_runner_config,
)
+ return StandardCombineInput(hidden_states=output)
def forward_npu(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_native import moe_forward_native
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+
+ import torch_npu
+
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_weights, topk_ids, _ = dispatch_output.topk_output
+
+ original_dtype = x.dtype
+ num_tokens = x.shape[0]
+ topk_weights = topk_weights.to(x.dtype)
+ topk_ids = topk_ids.to(torch.int32)
+ num_experts = layer.num_experts
+ top_k = layer.top_k
+ row_idx_len = num_tokens * top_k
+ row_idx = (
+ torch.arange(0, row_idx_len, dtype=torch.int32, device=topk_weights.device)
+ .view(top_k, -1)
+ .permute(1, 0)
+ .contiguous()
+ )
- return moe_forward_native(
- layer,
- x,
- topk_output,
- moe_runner_config,
+ hidden_states, expanded_row_idx, expanded_expert_idx = (
+ torch_npu.npu_moe_init_routing(
+ x, row_idx=row_idx, expert_idx=topk_ids, active_num=num_tokens
+ )
+ )
+
+ expert_tokens = torch_npu.npu_moe_compute_expert_tokens(
+ expanded_expert_idx, num_experts
)
- def forward_tpu(self, *args, **kwargs) -> torch.Tensor:
+ expert_tokens = expert_tokens.to(torch.int64)
+ if layer.w13_weight.shape[-1] == layer.hidden_size:
+ w13 = layer.w13_weight.transpose(1, 2)
+ w2 = layer.w2_weight.transpose(1, 2)
+
+ # gmm1: gate_up_proj
+ hidden_states = torch_npu.npu_grouped_matmul(
+ x=[hidden_states],
+ weight=[w13],
+ split_item=2,
+ group_list_type=0,
+ group_type=0,
+ group_list=expert_tokens,
+ output_dtype=original_dtype,
+ )[0]
+
+ # act_fn:
+ if self.moe_runner_config.activation == "silu":
+ hidden_states = torch_npu.npu_swiglu(hidden_states)
+ else:
+ from sglang.srt.layers.activation import GeluAndMul
+
+ hidden_states = GeluAndMul()(hidden_states)
+
+ # gmm2: down_proj
+ hidden_states = torch_npu.npu_grouped_matmul(
+ x=[hidden_states],
+ weight=[w2],
+ split_item=2,
+ group_list_type=0,
+ group_type=0,
+ group_list=expert_tokens,
+ output_dtype=original_dtype,
+ )[0]
+
+ final_hidden_states = torch_npu.npu_moe_finalize_routing(
+ hidden_states,
+ skip1=None,
+ skip2=None,
+ bias=None,
+ scales=topk_weights,
+ expanded_src_to_dst_row=expanded_row_idx,
+ export_for_source_row=topk_ids,
+ )
+
+ return StandardCombineInput(hidden_states=final_hidden_states)
+
+ def forward_tpu(self, *args, **kwargs) -> CombineInput:
raise NotImplementedError("The TPU backend currently does not support MoE.")
forward_native = forward_cpu
diff --git a/python/sglang/srt/layers/quantization/utils.py b/python/sglang/srt/layers/quantization/utils.py
index d2c7975ba59..63b8b6eb797 100644
--- a/python/sglang/srt/layers/quantization/utils.py
+++ b/python/sglang/srt/layers/quantization/utils.py
@@ -77,6 +77,19 @@ def is_layer_skipped(
)
else:
is_skipped = prefix in ignored_layers
+ if "gate_up_proj" in prefix:
+ prefix_gate = prefix.replace("gate_up_proj", "gate_proj")
+ prefix_up = prefix.replace("gate_up_proj", "up_proj")
+ if prefix_gate in ignored_layers and prefix_up in ignored_layers:
+ is_skipped = True
+ elif "experts" in prefix:
+ is_skipped = any(
+ [
+ prefix in layer_name
+ for layer_name in ignored_layers
+ if "experts" in layer_name
+ ]
+ )
assert is_skipped is not None
return is_skipped
@@ -146,6 +159,10 @@ def requantize_with_max_scale(
return max_w_scale, weight
+def update_tensor_inplace(old: torch.Tensor, new: torch.Tensor) -> None:
+ old.copy_(new)
+
+
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/quantization/utils/layer_utils.py
# Newly generated tensors need to replace existing tensors that are
# already registered as parameters by vLLM (and won't be freed)
@@ -172,6 +189,27 @@ def replace_parameter(
mod.register_parameter(name, torch.nn.Parameter(new, requires_grad=False))
+def assert_fp8_all_close(a: torch.Tensor, b: torch.Tensor):
+ assert a.shape == b.shape
+ assert a.dtype == b.dtype == torch.float8_e4m3fn
+
+ a_u8 = a.view(torch.uint8)
+ b_u8 = b.view(torch.uint8)
+ diff_u8 = (a_u8.to(torch.int16) - b_u8.to(torch.int16)).abs()
+
+ numel = a.numel()
+
+ count_diff_sign = ((a_u8 >= 0) & (b_u8 < 0)).sum().item()
+ count_tiny_diff = (diff_u8 >= 1).sum().item()
+ count_large_diff = (diff_u8 >= 2).sum().item()
+
+ assert (
+ (count_diff_sign == 0)
+ and (count_tiny_diff / numel < 0.005)
+ and (count_large_diff == 0)
+ ), f"{count_diff_sign=} {count_tiny_diff=} {count_large_diff=} {numel=}"
+
+
# Match dynamic rules with module name (prefix) and override quantize
# config if module (prefix) matches a rule
def override_config(config: QuantizationConfig, prefix: str):
diff --git a/python/sglang/srt/layers/quantization/w4afp8.py b/python/sglang/srt/layers/quantization/w4afp8.py
index 9be54d05ae8..e952470419a 100644
--- a/python/sglang/srt/layers/quantization/w4afp8.py
+++ b/python/sglang/srt/layers/quantization/w4afp8.py
@@ -1,12 +1,14 @@
from __future__ import annotations
import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
import torch
from torch.nn import Module
from torch.nn.parameter import Parameter
+from sglang.srt.distributed.parallel_state import get_moe_expert_parallel_world_size
+from sglang.srt.layers.linear import LinearBase, UnquantizedLinearMethod
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
QuantizationConfig,
@@ -15,12 +17,19 @@
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod
from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
from sglang.srt.layers.quantization.utils import is_layer_skipped
-from sglang.srt.utils import set_weight_attrs
+from sglang.srt.utils import is_npu, set_weight_attrs
+
+_is_npu = is_npu()
+if not _is_npu:
+ from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
if TYPE_CHECKING:
from sglang.srt.layers.moe import MoeRunnerConfig
from sglang.srt.layers.moe.ep_moe.layer import EPMoE
- from sglang.srt.layers.moe.topk import StandardTopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
ACTIVATION_SCHEMES = ["static", "dynamic"]
@@ -91,12 +100,13 @@ def get_quant_method(
from sglang.srt.layers.linear import LinearBase
from sglang.srt.layers.moe.ep_moe.layer import EPMoE
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
+ from sglang.srt.managers.schedule_batch import global_server_args_dict
if isinstance(layer, LinearBase):
if is_layer_skipped(prefix, self.ignored_layers):
return UnquantizedLinearMethod()
return Fp8LinearMethod(self)
- elif isinstance(layer, EPMoE):
+ elif isinstance(layer, FusedMoE):
return W4AFp8MoEMethod(self)
return None
@@ -104,8 +114,24 @@ def get_scaled_act_names(self) -> List[str]:
return []
-class W4AFp8MoEMethod(FusedMoEMethodBase):
+def interleave_scales(scales: torch.Tensor) -> torch.Tensor:
+ """Interleave scales in groups of 4 similar to TRT-LLM implementation."""
+ s_shape = scales.shape
+ # Reshape to separate groups of 4
+ alignment = 4 if s_shape[2] % 4 == 0 else 1
+ scales_interleaved = scales.reshape(
+ s_shape[0], s_shape[1], (s_shape[2] // alignment), alignment
+ )
+ # Permute dimensions to interleave
+ scales_interleaved = scales_interleaved.permute(0, 2, 1, 3)
+ # Reshape back to original dimensions but with interleaved values
+ scales_interleaved = scales_interleaved.reshape(
+ s_shape[0], s_shape[2] // alignment, s_shape[1] * alignment
+ )
+ return scales_interleaved.contiguous()
+
+class W4AFp8MoEMethod(FusedMoEMethodBase):
def __init__(self, quant_config: W4AFp8Config):
self.quant_config = quant_config
@@ -114,7 +140,7 @@ def create_weights(
layer: EPMoE,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
@@ -126,7 +152,7 @@ def create_weights(
w13_weight = torch.nn.Parameter(
torch.empty(
num_experts,
- intermediate_size * 2,
+ intermediate_size_per_partition * 2,
hidden_size // 2,
dtype=torch.int8,
),
@@ -140,7 +166,7 @@ def create_weights(
torch.empty(
num_experts,
hidden_size,
- intermediate_size // 2,
+ intermediate_size_per_partition // 2,
dtype=torch.int8,
),
requires_grad=False,
@@ -154,7 +180,7 @@ def create_weights(
w13_weight_scale = torch.nn.Parameter(
torch.zeros(
num_experts,
- 2 * intermediate_size,
+ 2 * intermediate_size_per_partition,
hidden_size // self.quant_config.group_size,
dtype=torch.float32,
),
@@ -167,7 +193,7 @@ def create_weights(
torch.zeros(
num_experts,
hidden_size,
- intermediate_size // self.quant_config.group_size,
+ intermediate_size_per_partition // self.quant_config.group_size,
dtype=torch.float32,
),
requires_grad=False,
@@ -201,13 +227,13 @@ def create_weights(
)
self.c_strides1 = torch.full(
(num_experts, 3),
- 2 * intermediate_size,
+ 2 * intermediate_size_per_partition,
device=device,
dtype=torch.int64,
)
self.a_strides2 = torch.full(
(num_experts, 3),
- intermediate_size,
+ intermediate_size_per_partition,
device=device,
dtype=torch.int64,
)
@@ -234,33 +260,18 @@ def create_weights(
return
- def _interleave_scales(self, scales: torch.Tensor) -> torch.Tensor:
- """Interleave scales in groups of 4 similar to TRT-LLM implementation."""
- s_shape = scales.shape
- # Reshape to separate groups of 4
- scales_interleaved = scales.reshape(
- s_shape[0], s_shape[1], (s_shape[2] // 4), 4
- )
- # Permute dimensions to interleave
- scales_interleaved = scales_interleaved.permute(0, 2, 1, 3)
- # Reshape back to original dimensions but with interleaved values
- scales_interleaved = scales_interleaved.reshape(
- s_shape[0], s_shape[2] // 4, s_shape[1] * 4
- )
- return scales_interleaved.contiguous()
-
def process_weights_after_loading(self, layer: Module) -> None:
dtype = torch.bfloat16
device = layer.w2_weight.device
# Interleave w13_weight_scale (gate_up_proj)
w13_weight_scale = layer.w13_weight_scale_inv.to(dtype)
- w13_weight_scale = self._interleave_scales(w13_weight_scale)
+ w13_weight_scale = interleave_scales(w13_weight_scale)
layer.w13_weight_scale_inv = Parameter(w13_weight_scale, requires_grad=False)
# Interleave w2_weight_scale (down_proj)
w2_weight_scale = layer.w2_weight_scale_inv.to(dtype)
- w2_weight_scale = self._interleave_scales(w2_weight_scale)
+ w2_weight_scale = interleave_scales(w2_weight_scale)
layer.w2_weight_scale_inv = Parameter(w2_weight_scale, requires_grad=False)
# Process input scales
@@ -278,24 +289,31 @@ def process_weights_after_loading(self, layer: Module) -> None:
)
layer.w2_input_scale = Parameter(new_w2_input_scale, requires_grad=False)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer: EPMoE,
- x: torch.Tensor,
- topk_output: StandardTopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
- # TODO(ch-wan): move it out of this class
from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
topk_weights, topk_ids, _ = topk_output
local_topk_ids = topk_ids
- local_topk_ids = torch.where(
- topk_ids == -1,
- layer.num_experts,
- topk_ids,
- )
+ if get_moe_expert_parallel_world_size() > 1:
+ local_topk_ids = torch.where(
+ topk_ids == -1,
+ layer.num_experts,
+ topk_ids,
+ )
output = cutlass_w4a8_moe(
layer.start_expert_id,
@@ -323,6 +341,6 @@ def apply(
layer.w13_input_scale,
layer.w2_input_scale,
)
- if moe_runner_config.routed_scaling_factor is not None:
- output *= moe_runner_config.routed_scaling_factor
- return output
+ if self.moe_runner_config.routed_scaling_factor is not None:
+ output *= self.moe_runner_config.routed_scaling_factor
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/quantization/w8a8_fp8.py b/python/sglang/srt/layers/quantization/w8a8_fp8.py
index 5e1aa41a60e..c68591fc6e8 100644
--- a/python/sglang/srt/layers/quantization/w8a8_fp8.py
+++ b/python/sglang/srt/layers/quantization/w8a8_fp8.py
@@ -5,6 +5,7 @@
import torch
from torch.nn.parameter import Parameter
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.parameter import ChannelQuantScaleParameter, ModelWeightParameter
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
@@ -26,8 +27,11 @@
from sglang.srt.utils import set_weight_attrs
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import StandardTopKOutput
+ from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
_is_fp8_fnuz = is_fp8_fnuz()
@@ -209,7 +213,7 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
@@ -218,7 +222,10 @@ def create_weights(
# WEIGHTS
w13_weight = torch.nn.Parameter(
torch.empty(
- num_experts, 2 * intermediate_size, hidden_size, dtype=fp8_dtype
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ hidden_size,
+ dtype=fp8_dtype,
),
requires_grad=False,
)
@@ -226,14 +233,21 @@ def create_weights(
set_weight_attrs(w13_weight, extra_weight_attrs)
w2_weight = torch.nn.Parameter(
- torch.empty(num_experts, hidden_size, intermediate_size, dtype=fp8_dtype),
+ torch.empty(
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition,
+ dtype=fp8_dtype,
+ ),
requires_grad=False,
)
layer.register_parameter("w2_weight", w2_weight)
set_weight_attrs(w2_weight, extra_weight_attrs)
w13_weight_scale = torch.nn.Parameter(
- torch.ones(num_experts, 2 * intermediate_size, 1, dtype=torch.float32),
+ torch.ones(
+ num_experts, 2 * intermediate_size_per_partition, 1, dtype=torch.float32
+ ),
requires_grad=False,
)
w2_weight_scale = torch.nn.Parameter(
@@ -266,25 +280,26 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w2_weight_scale.data, requires_grad=False
)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: StandardTopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
- return fused_experts(
- x,
- layer.w13_weight,
- layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
use_fp8_w8a8=True,
per_channel_quant=True,
- w1_scale=(layer.w13_weight_scale),
- w2_scale=(layer.w2_weight_scale),
- a1_scale=layer.w13_input_scale,
+ w13_scale=layer.w13_weight_scale,
+ w2_scale=layer.w2_weight_scale,
+ a13_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
)
+ return self.runner.run(dispatch_output, quant_info)
diff --git a/python/sglang/srt/layers/quantization/w8a8_int8.py b/python/sglang/srt/layers/quantization/w8a8_int8.py
index abcf334e00e..5ccb0259da3 100644
--- a/python/sglang/srt/layers/quantization/w8a8_int8.py
+++ b/python/sglang/srt/layers/quantization/w8a8_int8.py
@@ -24,6 +24,8 @@
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
from sglang.srt.layers.parameter import (
ChannelQuantScaleParameter,
ModelWeightParameter,
@@ -49,8 +51,10 @@
)
if TYPE_CHECKING:
- from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
- from sglang.srt.layers.moe.topk import TopKOutput
+ from sglang.srt.layers.moe.token_dispatcher import (
+ CombineInput,
+ StandardDispatchOutput,
+ )
_is_cuda = is_cuda()
_is_cpu_amx_available = cpu_has_amx_support()
@@ -339,9 +343,8 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
_is_cpu_amx_available
), "W8A8Int8LinearMethod on CPU requires that CPU has AMX support"
_amx_process_weight_after_loading(layer, ["weight"])
- return
-
- layer.weight = Parameter(layer.weight.t(), requires_grad=False)
+ else:
+ layer.weight = Parameter(layer.weight.t(), requires_grad=False)
layer.weight_scale = Parameter(layer.weight_scale.data, requires_grad=False)
def create_weights(
@@ -417,7 +420,7 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
):
@@ -428,7 +431,10 @@ def create_weights(
# WEIGHTS
w13_weight = torch.nn.Parameter(
torch.empty(
- num_experts, 2 * intermediate_size, hidden_size, dtype=torch.int8
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ hidden_size,
+ dtype=torch.int8,
),
requires_grad=False,
)
@@ -436,14 +442,21 @@ def create_weights(
set_weight_attrs(w13_weight, extra_weight_attrs)
w2_weight = torch.nn.Parameter(
- torch.empty(num_experts, hidden_size, intermediate_size, dtype=torch.int8),
+ torch.empty(
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition,
+ dtype=torch.int8,
+ ),
requires_grad=False,
)
layer.register_parameter("w2_weight", w2_weight)
set_weight_attrs(w2_weight, extra_weight_attrs)
w13_weight_scale = torch.nn.Parameter(
- torch.ones(num_experts, 2 * intermediate_size, 1, dtype=torch.float32),
+ torch.ones(
+ num_experts, 2 * intermediate_size_per_partition, 1, dtype=torch.float32
+ ),
requires_grad=False,
)
w2_weight_scale = torch.nn.Parameter(
@@ -472,10 +485,9 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
_is_cpu_amx_available
), "W8A8Int8MoEMethod on CPU requires that CPU has AMX support"
_amx_process_weight_after_loading(layer, ["w13_weight", "w2_weight"])
- return
-
- layer.w13_weight = Parameter(layer.w13_weight, requires_grad=False)
- layer.w2_weight = Parameter(layer.w2_weight, requires_grad=False)
+ else:
+ layer.w13_weight = Parameter(layer.w13_weight, requires_grad=False)
+ layer.w2_weight = Parameter(layer.w2_weight, requires_grad=False)
layer.w13_weight_scale = Parameter(
layer.w13_weight_scale.data, requires_grad=False
)
@@ -483,23 +495,30 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w2_weight_scale.data, requires_grad=False
)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+ self.runner = MoeRunner(MoeRunnerBackend.TRITON, moe_runner_config)
+
def apply(
self,
layer: torch.nn.Module,
- x: torch.Tensor,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
+ dispatch_output: StandardDispatchOutput,
) -> torch.Tensor:
- from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
if use_intel_amx_backend(layer):
from sglang.srt.layers.moe.topk import apply_topk_weights_cpu
topk_weights, topk_ids, _ = topk_output
x, topk_weights = apply_topk_weights_cpu(
- moe_runner_config.apply_router_weight_on_input, topk_weights, x
+ self.moe_runner_config.apply_router_weight_on_input, topk_weights, x
)
- return torch.ops.sgl_kernel.fused_experts_cpu(
+ output = torch.ops.sgl_kernel.fused_experts_cpu(
x,
layer.w13_weight,
layer.w2_weight,
@@ -515,20 +534,19 @@ def apply(
layer.w2_input_scale, # a2_scale
True, # is_vnni
)
+ return StandardCombineInput(hidden_states=output)
- return fused_experts(
- x,
- layer.w13_weight,
- layer.w2_weight,
- topk_output=topk_output,
- moe_runner_config=moe_runner_config,
+ quant_info = TritonMoeQuantInfo(
+ w13_weight=layer.w13_weight,
+ w2_weight=layer.w2_weight,
use_int8_w8a8=True,
per_channel_quant=True,
- w1_scale=(layer.w13_weight_scale),
- w2_scale=(layer.w2_weight_scale),
- a1_scale=layer.w13_input_scale,
+ w13_scale=layer.w13_weight_scale,
+ w2_scale=layer.w2_weight_scale,
+ a13_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
)
+ return self.runner.run(dispatch_output, quant_info)
class NPU_W8A8LinearMethodImpl:
@@ -551,7 +569,7 @@ def get_weight(
def get_pertensor_param(params_dtype: torch.dtype) -> Dict[str, Any]:
params_dict = {}
params_dict["input_scale"] = torch.empty(1, dtype=params_dtype)
- params_dict["input_offset"] = torch.empty(1, dtype=torch.int8)
+ params_dict["input_offset"] = torch.empty(1, dtype=params_dtype)
return params_dict
@staticmethod
@@ -582,11 +600,11 @@ def apply(
if original_dtype != torch.int8:
x = torch_npu.npu_quantize(
x,
- layer.aclnn_input_scale,
+ layer.aclnn_input_scale_reciprocal,
layer.aclnn_input_offset,
torch.qint8,
-1,
- True,
+ False,
)
# Only fuse bias add into GEMM for rank 0 (this ensures that
# bias will not get added more than once in Attention TP>1 case)
@@ -608,6 +626,10 @@ def process_weights_after_loading(self, layer):
layer.input_scale.data.repeat(expanding_factor).to(device="npu"),
requires_grad=False,
)
+ layer.aclnn_input_scale_reciprocal = 1 / torch.nn.Parameter(
+ layer.input_scale.data.repeat(expanding_factor).to(device="npu"),
+ requires_grad=False,
+ )
layer.aclnn_input_offset = torch.nn.Parameter(
layer.input_offset.data.repeat(expanding_factor).to(device="npu"),
requires_grad=False,
@@ -896,7 +918,7 @@ def create_weights(
layer: torch.nn.Module,
num_experts: int,
hidden_size: int,
- intermediate_size: int,
+ intermediate_size_per_partition: int,
params_dtype: torch.dtype,
**extra_weight_attrs,
) -> None:
@@ -910,21 +932,31 @@ def create_weights(
# weight
w13_weight = torch.nn.Parameter(
torch.empty(
- num_experts, 2 * intermediate_size, hidden_size, dtype=torch.int8
+ num_experts,
+ 2 * intermediate_size_per_partition,
+ hidden_size,
+ dtype=torch.int8,
),
requires_grad=False,
)
layer.register_parameter("w13_weight", w13_weight)
set_weight_attrs(w13_weight, extra_weight_attrs)
w2_weight = torch.nn.Parameter(
- torch.empty(num_experts, hidden_size, intermediate_size, dtype=torch.int8),
+ torch.empty(
+ num_experts,
+ hidden_size,
+ intermediate_size_per_partition,
+ dtype=torch.int8,
+ ),
requires_grad=False,
)
layer.register_parameter("w2_weight", w2_weight)
set_weight_attrs(w2_weight, extra_weight_attrs)
# scale
w13_weight_scale = torch.nn.Parameter(
- torch.empty(num_experts, 2 * intermediate_size, 1, dtype=torch.float32),
+ torch.empty(
+ num_experts, 2 * intermediate_size_per_partition, 1, dtype=torch.float32
+ ),
requires_grad=False,
)
layer.register_parameter("w13_weight_scale", w13_weight_scale)
@@ -937,7 +969,9 @@ def create_weights(
set_weight_attrs(w2_weight_scale, extra_weight_attrs)
# offset
w13_weight_offset = torch.nn.Parameter(
- torch.empty(num_experts, 2 * intermediate_size, 1, dtype=torch.float32),
+ torch.empty(
+ num_experts, 2 * intermediate_size_per_partition, 1, dtype=torch.float32
+ ),
requires_grad=False,
)
layer.register_parameter("w13_weight_offset", w13_weight_offset)
@@ -969,18 +1003,25 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w2_weight_offset.data.squeeze(-1).contiguous(), requires_grad=False
)
+ def create_moe_runner(
+ self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
+ ):
+ self.moe_runner_config = moe_runner_config
+
def apply(
self,
layer,
- x,
- topk_output: TopKOutput,
- moe_runner_config: MoeRunnerConfig,
- ) -> torch.Tensor:
+ dispatch_output: StandardDispatchOutput,
+ ) -> CombineInput:
+ from sglang.srt.layers.moe.token_dispatcher import StandardCombineInput
+
+ x = dispatch_output.hidden_states
+ topk_output = dispatch_output.topk_output
topk_weights, topk_ids, _ = topk_output
topk_ids = topk_ids.to(torch.int32)
topk_weights = topk_weights.to(x.dtype)
- return npu_fused_experts(
+ output = npu_fused_experts(
hidden_states=x,
w13=layer.w13_weight,
w13_scale=layer.w13_weight_scale,
@@ -990,3 +1031,4 @@ def apply(
topk_ids=topk_ids,
top_k=topk_ids.shape[1],
)
+ return StandardCombineInput(hidden_states=output)
diff --git a/python/sglang/srt/layers/radix_attention.py b/python/sglang/srt/layers/radix_attention.py
index 8004fc7c9c4..2e422b9b3df 100644
--- a/python/sglang/srt/layers/radix_attention.py
+++ b/python/sglang/srt/layers/radix_attention.py
@@ -19,6 +19,8 @@
from torch import nn
+from sglang.srt.layers.rotary_embedding import RotaryEmbedding, DualChunkRotaryEmbedding
+
if TYPE_CHECKING:
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
@@ -52,6 +54,11 @@ def __init__(
v_head_dim: int = -1,
sliding_window_size: int = -1,
is_cross_attention: bool = False,
+ pos_encoding_mode: str = "NONE",
+ logit_capping_method: str = "tanh",
+ orig_context_len: Optional[int] = None,
+ rope: Optional[RotaryEmbedding] = None,
+ rope_range: Optional[tuple[int, int]] = None,
quant_config: Optional[QuantizationConfig] = None,
attn_type: AttentionType = AttentionType.DECODER,
use_irope: bool = False,
@@ -81,6 +88,38 @@ def __init__(
self.quant_method.create_weights(self)
self.attn_type = attn_type
+ self.pos_encoding_mode = pos_encoding_mode
+ self.logit_capping_method = logit_capping_method
+ self.xai_temperature_len = -1
+
+ self.orig_context_len = orig_context_len
+
+ # Store RoPE for context extension
+ if rope is not None:
+ if isinstance(rope, (list, tuple)):
+ _, self.rope_cos, self.rope_sin = rope
+ self.rope_is_neox_style = True
+ elif isinstance(rope, DualChunkRotaryEmbedding):
+ self.rope_cos = None
+ self.rope_sin = None
+ self.rope_is_neox_style = True
+ else:
+ assert isinstance(rope, RotaryEmbedding), type(rope)
+ self.rope_is_neox_style = rope.is_neox_style
+ if hasattr(rope, "repeated_cos_sin_cache"):
+ self.rope_cos, self.rope_sin = rope.repeated_cos_sin_cache
+ else:
+ cos_sin = rope.cos_sin_cache
+ cos, sin = cos_sin.chunk(2, dim=-1)
+ self.rope_cos = cos.repeat(1, 2)
+ self.rope_sin = sin.repeat(1, 2)
+ rope.repeated_cos_sin_cache = (self.rope_cos, self.rope_sin)
+ else:
+ self.rope_cos = self.rope_sin = None
+ self.rope_is_neox_style = None
+
+ self.rope_range = rope_range
+
def forward(
self,
q,
diff --git a/python/sglang/srt/layers/rocm_linear_utils.py b/python/sglang/srt/layers/rocm_linear_utils.py
new file mode 100644
index 00000000000..ee7dd1f59ed
--- /dev/null
+++ b/python/sglang/srt/layers/rocm_linear_utils.py
@@ -0,0 +1,44 @@
+import torch
+from aiter.ops.triton.fused_qk_concat import fused_qk_rope_cat
+from aiter.ops.triton.gemm_a16w16 import gemm_a16w16
+from aiter.ops.triton.gemm_a16w16_atomic import gemm_a16w16_atomic
+
+from sglang.srt.utils import BumpAllocator
+
+__all__ = ["fused_qk_rope_cat"]
+
+
+def aiter_dsv3_router_gemm(
+ hidden_states: torch.Tensor,
+ weight: torch.Tensor,
+ gemm_output_zero_allocator: BumpAllocator = None,
+):
+ M = hidden_states.shape[0]
+ N = weight.shape[0]
+ y = None
+
+ if M <= 256:
+ # TODO (cagri): convert to bfloat16 as part of another kernel to save time
+ # for now it is also coupled with zero allocator.
+ if gemm_output_zero_allocator != None:
+ y = gemm_output_zero_allocator.allocate(M * N).view(M, N)
+ else:
+ y = torch.zeros((M, N), dtype=torch.float32, device=hidden_states.device)
+
+ if y is not None:
+ logits = gemm_a16w16_atomic(hidden_states, weight, y=y).to(hidden_states.dtype)
+ else:
+ logits = gemm_a16w16(hidden_states, weight)
+
+ return logits
+
+
+def get_dsv3_gemm_output_zero_allocator_size(
+ n_routed_experts: int, num_moe_layers: int, allocate_size: int, embedding_dim: int
+):
+ if embedding_dim != 7168 or n_routed_experts != 256:
+ return 0
+
+ per_layer_size = 256 * (allocate_size + n_routed_experts)
+
+ return num_moe_layers * per_layer_size
diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py
index 8f8de70280a..05f06855725 100644
--- a/python/sglang/srt/layers/rotary_embedding.py
+++ b/python/sglang/srt/layers/rotary_embedding.py
@@ -1029,6 +1029,7 @@ def __init__(
f"Corrected mrope_section: {self.mrope_section} (sum={sum(self.mrope_section)})"
)
+ @torch.compile(dynamic=True)
def forward(
self,
positions: torch.Tensor,
@@ -1432,24 +1433,6 @@ def get_rope_index_glm4v(
return position_ids, mrope_position_deltas
- @staticmethod
- def get_next_input_positions(
- mrope_position_delta: int,
- context_len: int,
- seq_len: int,
- ) -> torch.Tensor:
- return torch.tensor(
- [
- list(
- range(
- context_len + mrope_position_delta,
- seq_len + mrope_position_delta,
- )
- )
- for _ in range(3)
- ]
- )
-
class DualChunkRotaryEmbedding(CustomOp):
"""Rotary positional embedding for Dual Chunk Attention."""
@@ -1875,7 +1858,7 @@ def rotate_half(x):
return torch.cat((-x2, x1), dim=-1)
-def apply_rotary_pos_emb(
+def apply_rotary_pos_emb_native(
q: torch.Tensor,
k: torch.Tensor,
cos: torch.Tensor,
@@ -1898,6 +1881,33 @@ def apply_rotary_pos_emb(
return q_embed, k_embed
+def apply_rotary_pos_emb_npu(
+ q: torch.Tensor,
+ k: torch.Tensor,
+ cos: torch.Tensor,
+ sin: torch.Tensor,
+ unsqueeze_dim=1,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ if q.shape[1] != 128:
+ return apply_rotary_pos_emb_native(q, k, cos, sin, unsqueeze_dim)
+ cos = cos.unsqueeze(unsqueeze_dim)
+ cos = torch.transpose(cos, 1, 2)
+ sin = sin.unsqueeze(unsqueeze_dim)
+ sin = torch.transpose(sin, 1, 2)
+ q = torch.transpose(q, 1, 2)
+ k = torch.transpose(k, 1, 2)
+ q_embed, k_embed = torch_npu.npu_apply_rotary_pos_emb(q, k, cos, sin)
+ q_embed = torch.transpose(q_embed, 1, 2)
+ k_embed = torch.transpose(k_embed, 1, 2)
+ return q_embed, k_embed
+
+
+if _is_npu:
+ apply_rotary_pos_emb = apply_rotary_pos_emb_npu
+else:
+ apply_rotary_pos_emb = apply_rotary_pos_emb_native
+
+
def get_rope_cpu(
head_size: int,
rotary_dim: int,
diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py
index cf4222cc73a..56a831f2daf 100644
--- a/python/sglang/srt/layers/sampler.py
+++ b/python/sglang/srt/layers/sampler.py
@@ -27,6 +27,7 @@
logger = logging.getLogger(__name__)
SYNC_TOKEN_IDS_ACROSS_TP = get_bool_env_var("SYNC_TOKEN_IDS_ACROSS_TP")
+RETURN_ORIGINAL_LOGPROB = get_bool_env_var("RETURN_ORIGINAL_LOGPROB")
class Sampler(nn.Module):
@@ -77,7 +78,12 @@ def forward(
batch_next_token_ids = torch.argmax(logits, -1)
if return_logprob:
logprobs = torch.nn.functional.log_softmax(logits, dim=-1)
+
else:
+ # Post process original logits. if temperatures are all 1.0, no need to rescale
+ if return_logprob and RETURN_ORIGINAL_LOGPROB:
+ logprobs = torch.softmax(logits, dim=-1)
+
# Post process logits
logits.div_(sampling_info.temperatures)
logits[:] = torch.softmax(logits, dim=-1)
@@ -116,7 +122,12 @@ def forward(
if return_logprob:
# clamp to avoid -inf
- logprobs = torch.log(probs).clamp(min=torch.finfo(probs.dtype).min)
+ if RETURN_ORIGINAL_LOGPROB:
+ logprobs = torch.log(logprobs).clamp(
+ min=torch.finfo(logprobs.dtype).min
+ )
+ else:
+ logprobs = torch.log(probs).clamp(min=torch.finfo(probs.dtype).min)
# Attach logprobs to logits_output (in-place modification)
if return_logprob:
@@ -201,7 +212,10 @@ def top_p_normalize_probs_torch(
return torch.zeros_like(probs_sort).scatter_(-1, probs_idx, probs_sort)
-def get_top_logprobs(logprobs: torch.Tensor, top_logprobs_nums: List[int]):
+def get_top_logprobs(
+ logprobs: torch.Tensor,
+ top_logprobs_nums: List[int],
+):
max_k = max(top_logprobs_nums)
ret = logprobs.topk(max_k, dim=1)
values = ret.values.tolist()
@@ -212,10 +226,17 @@ def get_top_logprobs(logprobs: torch.Tensor, top_logprobs_nums: List[int]):
for i, k in enumerate(top_logprobs_nums):
output_top_logprobs_val.append(values[i][:k])
output_top_logprobs_idx.append(indices[i][:k])
- return output_top_logprobs_val, output_top_logprobs_idx
+
+ return (
+ output_top_logprobs_val,
+ output_top_logprobs_idx,
+ )
-def get_token_ids_logprobs(logprobs: torch.Tensor, token_ids_logprobs: List[List[int]]):
+def get_token_ids_logprobs(
+ logprobs: torch.Tensor,
+ token_ids_logprobs: List[List[int]],
+):
output_token_ids_logprobs_val = []
output_token_ids_logprobs_idx = []
for i, token_ids in enumerate(token_ids_logprobs):
@@ -226,7 +247,10 @@ def get_token_ids_logprobs(logprobs: torch.Tensor, token_ids_logprobs: List[List
output_token_ids_logprobs_val.append([])
output_token_ids_logprobs_idx.append([])
- return output_token_ids_logprobs_val, output_token_ids_logprobs_idx
+ return (
+ output_token_ids_logprobs_val,
+ output_token_ids_logprobs_idx,
+ )
def apply_custom_logit_processor(
diff --git a/python/sglang/srt/layers/utils.py b/python/sglang/srt/layers/utils.py
index ac0ddb65ce7..d79ccc663cc 100644
--- a/python/sglang/srt/layers/utils.py
+++ b/python/sglang/srt/layers/utils.py
@@ -34,17 +34,3 @@ def forward(self, *args, **kwargs):
"""
input = args[0] if args else next(iter(kwargs.values()))
return (input,) if self.return_tuple else input
-
-
-@lru_cache(maxsize=1)
-def is_sm100_supported(device=None) -> bool:
- return (torch.cuda.get_device_capability(device)[0] == 10) and (
- torch.version.cuda >= "12.8"
- )
-
-
-@lru_cache(maxsize=1)
-def is_sm90_supported(device=None) -> bool:
- return (torch.cuda.get_device_capability(device)[0] == 9) and (
- torch.version.cuda >= "12.3"
- )
diff --git a/python/sglang/srt/lora/backend/base_backend.py b/python/sglang/srt/lora/backend/base_backend.py
index fe8bd3d20e3..7c2c232d539 100644
--- a/python/sglang/srt/lora/backend/base_backend.py
+++ b/python/sglang/srt/lora/backend/base_backend.py
@@ -1,8 +1,9 @@
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
import torch
from sglang.srt.lora.utils import LoRABatchInfo
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
class BaseLoRABackend:
@@ -10,13 +11,14 @@ class BaseLoRABackend:
Each backend has its own implementation of Lora kernels.
Args:
- name: name of backend
- batch_info: information of current batch for use
+ max_loras_per_batch: maximum number of different lora weights
+ that can be applied in a single forward batch.
+ device: the device where the backend runs.
"""
- def __init__(self, name: str, batch_info: LoRABatchInfo = None):
- self.name = name
- self.batch_info = batch_info
+ def __init__(self, max_loras_per_batch: int, device: torch.device):
+ self.max_loras_per_batch = max_loras_per_batch
+ self.device = device
def run_lora_a_sgemm(
self, x: torch.Tensor, weights: torch.Tensor, *args, **kwargs
@@ -93,8 +95,44 @@ def run_gate_up_lora(
"""
pass
- def set_batch_info(self, batch_info: LoRABatchInfo):
- self.batch_info = batch_info
+ def init_cuda_graph_batch_info(
+ self,
+ cuda_graph_batch_info: LoRABatchInfo,
+ max_bs_in_cuda_graph: int,
+ ):
+ """Initialize the batch info for CUDA Graph mode.
+
+ This method provides a hook for each backend to conduct its own initialization
+ logic for CUDA Graph mode.
+
+ Args:
+ cuda_graph_batch_info: the LoRABatchInfo object created in LoraManager
+ max_bs_in_cuda_graph: maximum batch size for CUDA Graph mode
+ """
+ pass
+
+ def prepare_lora_batch(
+ self,
+ forward_batch: ForwardBatch,
+ weight_indices: list[int],
+ lora_ranks: list[int],
+ scalings: list[float],
+ batch_info: Optional[LoRABatchInfo] = None,
+ ):
+ """Prepare the lora weights and batch info for current forward batch.
+
+ This method provides a hook for each backend to conduct its own preparation
+ logic for each forward batch.
+
+ Args:
+ forward_batch: the ForwardBatch object for current forward pass
+ weight_indices: list of indices of lora weights to be applied for current batch
+ lora_ranks: list of lora ranks corresponding to weight_indices
+ scalings: list of scaling factors corresponding to weight_indices
+ batch_info: optional LoRABatchInfo object, if not provided, the backend should use its own
+ internal batch info (e.g., self.cuda_graph_batch_info for CUDA Graph mode)
+ """
+ pass
def get_backend_from_name(name: str) -> BaseLoRABackend:
@@ -105,6 +143,10 @@ def get_backend_from_name(name: str) -> BaseLoRABackend:
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
return TritonLoRABackend
+ # elif name == "csgmv":
+ # from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
+
+ # return ChunkedSgmvLoRABackend
elif name == "flashinfer":
raise ValueError(
"FlashInfer LoRA backend has been deprecated, please use `triton` instead."
diff --git a/python/sglang/srt/lora/backend/triton_backend.py b/python/sglang/srt/lora/backend/triton_backend.py
index d3a854b40fd..7abeef770ac 100644
--- a/python/sglang/srt/lora/backend/triton_backend.py
+++ b/python/sglang/srt/lora/backend/triton_backend.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
import torch
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
@@ -8,12 +10,14 @@
sgemm_lora_b_fwd,
)
from sglang.srt.lora.utils import LoRABatchInfo
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
class TritonLoRABackend(BaseLoRABackend):
+ name = "triton"
- def __init__(self, name: str, batch_info: LoRABatchInfo = None):
- super().__init__(name, batch_info)
+ def __init__(self, max_loras_per_batch: int, device: torch.device):
+ super().__init__(max_loras_per_batch, device)
def run_lora_a_sgemm(
self, x: torch.Tensor, weights: torch.Tensor, *args, **kwargs
@@ -86,3 +90,87 @@ def run_gate_up_lora(
base_output,
)
return lora_output
+
+ def init_cuda_graph_batch_info(
+ self, cuda_graph_batch_info: LoRABatchInfo, max_bs_in_cuda_graph: int
+ ):
+ # Initialize seg_lens and seg_indptr for CUDA graph as they remain constant
+ # across batches.
+ cuda_graph_batch_info.seg_lens[:max_bs_in_cuda_graph].fill_(1)
+ torch.cumsum(
+ cuda_graph_batch_info.seg_lens[:max_bs_in_cuda_graph],
+ dim=0,
+ out=cuda_graph_batch_info.seg_indptr[1 : max_bs_in_cuda_graph + 1],
+ )
+
+ def prepare_lora_batch(
+ self,
+ forward_batch: ForwardBatch,
+ weight_indices: list[int],
+ lora_ranks: list[int],
+ scalings: list[float],
+ batch_info: Optional[LoRABatchInfo] = None,
+ ):
+ # Use pinned memory to avoid synchronizations during host-to-device transfer
+ weight_indices_tensor = torch.tensor(
+ weight_indices, dtype=torch.int32, pin_memory=True, device="cpu"
+ )
+ lora_ranks_tensor = torch.tensor(
+ lora_ranks, dtype=torch.int32, pin_memory=True, device="cpu"
+ )
+ scalings_tensor = torch.tensor(
+ scalings, dtype=torch.float, pin_memory=True, device="cpu"
+ )
+
+ bs = forward_batch.batch_size
+
+ if batch_info is not None:
+ assert (
+ batch_info.use_cuda_graph
+ ), "batch_info.use_cuda_graph must be True when batch_info is provided"
+ batch_info.bs = forward_batch.batch_size
+ batch_info.num_segments = forward_batch.batch_size
+ else:
+ max_len = (
+ # Calculate max_len from the CPU copy to avoid D2H transfer.
+ max(forward_batch.extend_seq_lens_cpu)
+ if forward_batch.forward_mode.is_extend()
+ else 1
+ )
+ seg_lens = (
+ forward_batch.extend_seq_lens
+ if forward_batch.forward_mode.is_extend()
+ else torch.ones(bs, device=self.device)
+ )
+ seg_indptr = torch.zeros((bs + 1,), dtype=torch.int32, device=self.device)
+ seg_indptr[1:] = torch.cumsum(seg_lens, dim=0)
+
+ batch_info = LoRABatchInfo(
+ bs=forward_batch.batch_size,
+ num_segments=forward_batch.batch_size,
+ max_len=max_len,
+ use_cuda_graph=False,
+ seg_lens=seg_lens,
+ seg_indptr=seg_indptr,
+ weight_indices=torch.empty(
+ (bs,), dtype=torch.int32, device=self.device
+ ),
+ lora_ranks=torch.empty(
+ (self.max_loras_per_batch,), dtype=torch.int64, device=self.device
+ ),
+ scalings=torch.empty(
+ (self.max_loras_per_batch,), dtype=torch.float, device=self.device
+ ),
+ permutation=None,
+ )
+
+ # Copy to device asynchronously
+ batch_info.lora_ranks[: self.max_loras_per_batch].copy_(
+ lora_ranks_tensor, non_blocking=True
+ )
+ batch_info.scalings[: self.max_loras_per_batch].copy_(
+ scalings_tensor, non_blocking=True
+ )
+ batch_info.weight_indices[:bs].copy_(weight_indices_tensor, non_blocking=True)
+
+ self.batch_info = batch_info
diff --git a/python/sglang/srt/lora/layers.py b/python/sglang/srt/lora/layers.py
index f9a877cd56f..4426faccba7 100644
--- a/python/sglang/srt/lora/layers.py
+++ b/python/sglang/srt/lora/layers.py
@@ -66,6 +66,15 @@ def __init__(
lora_backend: BaseLoRABackend,
) -> None:
super().__init__(base_layer, lora_backend)
+ shard_size = self.base_layer.output_partition_sizes[0]
+ self.output_offset = torch.tensor(
+ [
+ 0,
+ shard_size,
+ ],
+ dtype=torch.int32,
+ device=next(self.base_layer.parameters()).device,
+ )
def set_lora_info(
self,
@@ -81,6 +90,7 @@ def apply_lora(self, base_output: torch.Tensor, x: torch.Tensor) -> torch.Tensor
lora_output = self.lora_backend.run_lora_b_sgemm(
x=lora_a_output,
weights=self.B_buffer,
+ output_offset=self.output_offset,
base_output=base_output,
)
return lora_output
@@ -130,11 +140,23 @@ def set_lora_info(
self.A_buffer_gate_up = A_buffer
self.B_buffer_gate_up = B_buffer
+ shard_size = self.base_layer.output_partition_sizes[0]
+ self.output_offset = torch.tensor(
+ [
+ 0,
+ shard_size,
+ 2 * shard_size,
+ ],
+ dtype=torch.int32,
+ device=next(self.base_layer.parameters()).device,
+ )
+
def apply_lora(self, base_output: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
lora_output = self.lora_backend.run_gate_up_lora(
x=x,
gate_up_lora_a=self.A_buffer_gate_up,
gate_up_lora_b=self.B_buffer_gate_up,
+ output_offset=self.output_offset,
base_output=base_output,
)
return lora_output
@@ -243,12 +265,22 @@ def set_lora_info(self, A_buffer: torch.Tensor, B_buffer: torch.Tensor):
self.set_lora = True
self.A_buffer = A_buffer
self.B_buffer = B_buffer
+ output_size = self.base_layer.output_size
+ self.output_offset = torch.tensor(
+ [
+ 0,
+ output_size,
+ ],
+ dtype=torch.int32,
+ device=next(self.base_layer.parameters()).device,
+ )
def apply_lora(self, base_output: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
lora_a_output = self.lora_backend.run_lora_a_sgemm(x, self.A_buffer)
lora_output = self.lora_backend.run_lora_b_sgemm(
x=lora_a_output,
weights=self.B_buffer,
+ output_offset=self.output_offset,
base_output=base_output,
)
return lora_output
diff --git a/python/sglang/srt/lora/lora.py b/python/sglang/srt/lora/lora.py
index dfd5acda971..e7569624ccf 100644
--- a/python/sglang/srt/lora/lora.py
+++ b/python/sglang/srt/lora/lora.py
@@ -28,6 +28,9 @@
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
+
+# from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
+from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
from sglang.srt.lora.lora_config import LoRAConfig
from sglang.srt.model_loader.loader import DefaultModelLoader
@@ -156,7 +159,7 @@ def normalize_gate_up_proj(
gate_up_name = weight_name.replace("gate_proj", "gate_up_proj")
if up_name not in weights:
weights[up_name] = torch.zeros_like(weights[weight_name])
- assert self.lora_backend.name == "triton", (
+ assert isinstance(self.lora_backend, TritonLoRABackend), (
f"LoRA weight initialization currently only supported for 'triton' backend. "
f"Received backend: {self.lora_backend.name}. Please verify your backend configuration "
f"or consider implementing custom initialization logic for other backends."
diff --git a/python/sglang/srt/lora/lora_manager.py b/python/sglang/srt/lora/lora_manager.py
index 3ab93c73b0d..baf120ca265 100644
--- a/python/sglang/srt/lora/lora_manager.py
+++ b/python/sglang/srt/lora/lora_manager.py
@@ -32,8 +32,8 @@
LoRABatchInfo,
LoRAType,
get_layer_id,
- get_normalized_lora_weight_names,
- get_weight_name,
+ get_normalized_target_modules,
+ get_target_module_name,
)
from sglang.srt.managers.io_struct import LoRAUpdateResult
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
@@ -55,7 +55,7 @@ def __init__(
tp_rank: int = 0,
max_lora_rank: Optional[int] = None,
target_modules: Optional[Iterable[str]] = None,
- lora_paths: Optional[Dict[str, LoRARef]] = None,
+ lora_paths: Optional[List[LoRARef]] = None,
):
self.base_model: torch.nn.Module = base_model
self.base_hf_config: AutoConfig = base_hf_config
@@ -69,7 +69,10 @@ def __init__(
# LoRA backend for running sgemm kernels
logger.info(f"Using {lora_backend} as backend of LoRA kernels.")
backend_type = get_backend_from_name(lora_backend)
- self.lora_backend: BaseLoRABackend = backend_type(lora_backend)
+ self.lora_backend: BaseLoRABackend = backend_type(
+ max_loras_per_batch=max_loras_per_batch,
+ device=self.device,
+ )
# Initialize mutable internal state of the LoRAManager.
self.init_state(
@@ -82,29 +85,22 @@ def init_cuda_graph_batch_info(self, max_bs_in_cuda_graph: int):
self.max_bs_in_cuda_graph = max_bs_in_cuda_graph
with torch.device("cuda"):
self.cuda_graph_batch_info = LoRABatchInfo(
- bs=self.max_bs_in_cuda_graph,
- seg_lens=torch.zeros(self.max_bs_in_cuda_graph, dtype=torch.int32),
- seg_indptr=torch.zeros(
- self.max_bs_in_cuda_graph + 1, dtype=torch.int32
- ),
+ bs=max_bs_in_cuda_graph,
+ use_cuda_graph=True,
+ num_segments=None,
+ seg_lens=torch.zeros(max_bs_in_cuda_graph, dtype=torch.int32),
+ seg_indptr=torch.zeros(max_bs_in_cuda_graph + 1, dtype=torch.int32),
max_len=1,
- weight_indices=torch.zeros(
- self.max_bs_in_cuda_graph, dtype=torch.int32
- ),
+ weight_indices=torch.zeros(max_bs_in_cuda_graph, dtype=torch.int32),
+ permutation=torch.zeros(max_bs_in_cuda_graph, dtype=torch.int32),
lora_ranks=torch.zeros(self.max_loras_per_batch, dtype=torch.int32),
scalings=torch.zeros(self.max_loras_per_batch, dtype=torch.float),
)
- # Initialize seg_lens and seg_indptr for CUDA graph as they remain constant
- # across batches.
- self.cuda_graph_batch_info.seg_lens[: self.max_bs_in_cuda_graph].fill_(1)
- torch.cumsum(
- self.cuda_graph_batch_info.seg_lens[: self.max_bs_in_cuda_graph],
- dim=0,
- out=self.cuda_graph_batch_info.seg_indptr[
- 1 : self.max_bs_in_cuda_graph + 1
- ],
- )
+ self.lora_backend.init_cuda_graph_batch_info(
+ cuda_graph_batch_info=self.cuda_graph_batch_info,
+ max_bs_in_cuda_graph=max_bs_in_cuda_graph,
+ )
def create_lora_update_result(
self, success: bool, error_message: str = ""
@@ -232,7 +228,6 @@ def validate_lora_batch(self, lora_ids: set[str]) -> bool:
return required_slots <= mem_pool_vacancy
def prepare_lora_batch(self, forward_batch: ForwardBatch):
-
# Load active loras into lora memory pool
cur_uids = set(forward_batch.lora_ids)
@@ -247,102 +242,30 @@ def prepare_lora_batch(self, forward_batch: ForwardBatch):
# set up batch info shared by all lora modules
bs = forward_batch.batch_size
- def transfer_adapter_info(
- weight_indices_out: torch.Tensor,
- lora_ranks_out: torch.Tensor,
- scalings_out: torch.Tensor,
- ):
- """
- Transfer adapter metadata (weight indices, LoRA rank, scalings) from host
- to device (CUDA) asynchronously.
- """
- weight_indices = [0] * len(forward_batch.lora_ids)
- lora_ranks = [0] * self.max_loras_per_batch
- scalings = [0] * self.max_loras_per_batch
- for i, uid in enumerate(forward_batch.lora_ids):
- weight_indices[i] = self.memory_pool.get_buffer_id(uid)
- if uid is not None:
- lora = self.loras[uid]
- lora_ranks[weight_indices[i]] = lora.config.r
- scalings[weight_indices[i]] = lora.scaling
-
- # Use pinned memory to avoid synchronizations during host-to-device transfer
- weight_indices_tensor = torch.tensor(
- weight_indices, dtype=torch.int32, pin_memory=True, device="cpu"
- )
- lora_ranks_tensor = torch.tensor(
- lora_ranks, dtype=torch.int32, pin_memory=True, device="cpu"
- )
- scalings_tensor = torch.tensor(
- scalings, dtype=torch.float, pin_memory=True, device="cpu"
- )
-
- # Copy to device tensors asynchronously
- weight_indices_out[:bs].copy_(weight_indices_tensor, non_blocking=True)
- lora_ranks_out[: self.max_loras_per_batch].copy_(
- lora_ranks_tensor, non_blocking=True
- )
- scalings_out[: self.max_loras_per_batch].copy_(
- scalings_tensor, non_blocking=True
- )
-
- if (
+ use_cuda_graph = (
hasattr(self, "max_bs_in_cuda_graph")
and bs <= self.max_bs_in_cuda_graph
and forward_batch.forward_mode.is_cuda_graph()
- ):
- # Do in-place updates when CUDA graph is enabled and the batch forward mode
- # could use CUDA graph.
-
- transfer_adapter_info(
- self.cuda_graph_batch_info.weight_indices,
- self.cuda_graph_batch_info.lora_ranks,
- self.cuda_graph_batch_info.scalings,
- )
-
- self.cuda_graph_batch_info.bs = bs
- self.cuda_graph_batch_info.max_len = 1
- batch_info = self.cuda_graph_batch_info
- else:
- weight_indices = torch.empty((bs,), dtype=torch.int32, device=self.device)
- lora_ranks = torch.zeros(
- (self.max_loras_per_batch,), dtype=torch.int64, device=self.device
- )
- scalings = torch.zeros(
- (self.max_loras_per_batch,), dtype=torch.float, device=self.device
- )
- transfer_adapter_info(
- weight_indices,
- lora_ranks,
- scalings,
- )
-
- seg_lens = (
- forward_batch.extend_seq_lens
- if forward_batch.forward_mode.is_extend()
- else torch.ones(bs, device=self.device)
- )
-
- max_len = (
- # Calculate max_len from the CPU copy to avoid D2H transfer.
- max(forward_batch.extend_seq_lens_cpu)
- if forward_batch.forward_mode.is_extend()
- else 1
- )
+ )
- seg_indptr = torch.zeros((bs + 1,), dtype=torch.int32, device=self.device)
- seg_indptr[1:] = torch.cumsum(seg_lens, dim=0)
-
- batch_info = LoRABatchInfo(
- bs=bs,
- seg_lens=seg_lens,
- seg_indptr=seg_indptr,
- max_len=max_len,
- weight_indices=weight_indices,
- lora_ranks=lora_ranks,
- scalings=scalings,
- )
- self.lora_backend.set_batch_info(batch_info)
+ weight_indices = [0] * len(forward_batch.lora_ids)
+ lora_ranks = [0] * self.max_loras_per_batch
+ scalings = [0] * self.max_loras_per_batch
+ for i, uid in enumerate(forward_batch.lora_ids):
+ weight_indices[i] = self.memory_pool.get_buffer_id(uid)
+ if uid is not None:
+ lora = self.loras[uid]
+ lora_ranks[weight_indices[i]] = lora.config.r
+ scalings[weight_indices[i]] = lora.scaling
+ # Do in-place updates when CUDA graph is enabled and the batch forward mode
+ # could use CUDA graph.
+ self.lora_backend.prepare_lora_batch(
+ forward_batch=forward_batch,
+ weight_indices=weight_indices,
+ lora_ranks=lora_ranks,
+ scalings=scalings,
+ batch_info=self.cuda_graph_batch_info if use_cuda_graph else None,
+ )
def update_lora_info(self):
"""
@@ -350,19 +273,27 @@ def update_lora_info(self):
"""
for layer_id, layer_modules in enumerate(self.lora_modules):
for module_name, module in layer_modules.items():
- weight_name = get_weight_name(
- module_name, self.memory_pool.lora_weight_names
+ target_module = get_target_module_name(
+ module_name, self.memory_pool.target_modules
)
module.set_lora_info(
- self.memory_pool.get_tensor(weight_name, layer_id, LoRAType.LORA_A),
- self.memory_pool.get_tensor(weight_name, layer_id, LoRAType.LORA_B),
+ self.memory_pool.get_tensor(
+ target_module=target_module,
+ layer_id=layer_id,
+ lora_type=LoRAType.LORA_A,
+ ),
+ self.memory_pool.get_tensor(
+ target_module=target_module,
+ layer_id=layer_id,
+ lora_type=LoRAType.LORA_B,
+ ),
)
def init_state(
self,
max_lora_rank: Optional[int] = None,
target_modules: Optional[Iterable[str]] = None,
- lora_paths: Optional[Dict[str, LoRARef]] = None,
+ lora_paths: Optional[List[LoRARef]] = None,
):
"""
Initialize the internal (mutable) state of the LoRAManager.
@@ -380,12 +311,11 @@ def init_state(
max_lora_rank=max_lora_rank,
target_modules=target_modules,
)
- self.init_lora_weight_names()
self.init_lora_modules()
self.init_memory_pool()
self.update_lora_info()
- def init_lora_adapters(self, lora_paths: Optional[Dict[str, LoRARef]] = None):
+ def init_lora_adapters(self, lora_paths: Optional[List[LoRARef]] = None):
# Configs of all active LoRA adapters, indexed by LoRA ID.
self.configs: Dict[str, LoRAConfig] = {}
@@ -399,7 +329,7 @@ def init_lora_adapters(self, lora_paths: Optional[Dict[str, LoRARef]] = None):
self.num_pinned_loras: int = 0
if lora_paths:
- for lora_ref in lora_paths.values():
+ for lora_ref in lora_paths:
result = self.load_lora_adapter(lora_ref)
if not result.success:
raise RuntimeError(
@@ -413,19 +343,37 @@ def init_lora_shapes(
):
"""Infer LoRA target modules and max_lora_rank from loaded adapters if not provided."""
- if target_modules is not None:
- self.target_modules = set(target_modules)
- else:
- self.target_modules = set()
- for config in self.configs.values():
- if not isinstance(config.target_modules, list):
+ self.target_modules = (
+ get_normalized_target_modules(target_modules) if target_modules else set()
+ )
+
+ for lora_id, config in self.configs.items():
+ if not isinstance(config.target_modules, list):
+ raise ValueError(
+ f"SGLang currently only supports inferring LoRA target modules when a list of "
+ "suffixes is provided in `target_modules` field of PEFT config. Please explicitly "
+ "specify `--lora-target-modules` during server startup. You can specify `all` to "
+ "enable all support modules types. "
+ )
+
+ adapter_target_modules = get_normalized_target_modules(
+ config.target_modules
+ )
+
+ if target_modules is not None:
+ # When `--lora-target-modules` is provided, validate adapter target modules is a subset of the specified target modules.
+ if not adapter_target_modules.issubset(self.target_modules):
+ unsupported_modules = adapter_target_modules - self.target_modules
+ lora_name = self.lora_refs[lora_id].lora_name
raise ValueError(
- f"SGLang currently only supports inferring LoRA target modules when a list of "
- "suffixes is provided in `target_modules` field of PEFT config. Please explicitly "
- "specify `--lora-target-modules` during server startup. You can specify `all` to "
- "enable all support modules types. "
+ f"LoRA adapter '{lora_name}' contains target modules {sorted(unsupported_modules)} "
+ f"that are not included in the specified --lora-target-modules {sorted(self.target_modules)}. "
+ f"Please update --lora-target-modules to include all required modules: "
+ f"{sorted(self.target_modules | adapter_target_modules)}, or use 'all' to enable all supported modules."
)
- self.target_modules.update(config.target_modules)
+ else:
+ # Otherwise, infer target_modules from adapter configs.
+ self.target_modules.update(adapter_target_modules)
if max_lora_rank is not None:
self.max_lora_rank = max_lora_rank
@@ -435,15 +383,6 @@ def init_lora_shapes(
default=0,
)
- def init_lora_weight_names(self):
- """
- Add new LoRA weight names if needed based on the current `self.configs`.
- """
-
- self.lora_weight_names: Set[str] = get_normalized_lora_weight_names(
- self.target_modules
- )
-
def load_lora_weights(self, lora_ref: LoRARef):
"""
Load the weights of a LoRA adapter to CPU memory and conducts post-loading validation.
@@ -467,7 +406,7 @@ def init_memory_pool(self):
tp_size=self.tp_size,
tp_rank=self.tp_rank,
max_lora_rank=self.max_lora_rank,
- lora_weight_names=self.lora_weight_names,
+ target_modules=self.target_modules,
base_model=self.base_model,
)
@@ -494,7 +433,7 @@ def init_lora_modules(self):
continue
# The module should be converted if it is included in target_names
- if module_name.split(".")[-1] in self.lora_weight_names:
+ if module_name.split(".")[-1] in self.target_modules:
layer_id = get_layer_id(module_name)
self.lora_modules[layer_id][module_name] = self.set_lora_module(
module_name, module
diff --git a/python/sglang/srt/lora/lora_registry.py b/python/sglang/srt/lora/lora_registry.py
index 535ab47b41e..51d2b0e6651 100644
--- a/python/sglang/srt/lora/lora_registry.py
+++ b/python/sglang/srt/lora/lora_registry.py
@@ -59,9 +59,9 @@ class LoRARegistry:
update / eventual consistency model between the tokenizer manager process and the scheduler processes.
"""
- def __init__(self, lora_paths: Optional[Dict[str, LoRARef]] = None):
+ def __init__(self, lora_paths: Optional[List[LoRARef]] = None):
assert lora_paths is None or all(
- isinstance(lora, LoRARef) for lora in lora_paths.values()
+ isinstance(lora, LoRARef) for lora in lora_paths
), (
"server_args.lora_paths should have been normalized to LoRARef objects during server initialization. "
"Please file an issue if you see this error."
@@ -78,7 +78,7 @@ def __init__(self, lora_paths: Optional[Dict[str, LoRARef]] = None):
# Initialize the registry with provided LoRA paths, if present.
if lora_paths:
- for lora_ref in lora_paths.values():
+ for lora_ref in lora_paths:
self._register_adapter(lora_ref)
async def register(self, lora_ref: LoRARef):
diff --git a/python/sglang/srt/lora/mem_pool.py b/python/sglang/srt/lora/mem_pool.py
index 56cd39d675f..cdf1707e8be 100644
--- a/python/sglang/srt/lora/mem_pool.py
+++ b/python/sglang/srt/lora/mem_pool.py
@@ -13,9 +13,9 @@
ROW_PARALLELISM_LINEAR_LORA_NAMES,
LoRAType,
get_hidden_dim,
- get_normalized_lora_weight_names,
+ get_normalized_target_modules,
get_stacked_multiply,
- get_weight_name,
+ get_target_module_name,
)
logger = logging.getLogger(__name__)
@@ -52,7 +52,7 @@ def __init__(
tp_size: int,
tp_rank: int,
max_lora_rank: int,
- lora_weight_names: Set[str],
+ target_modules: Set[str],
base_model: torch.nn.Module,
):
self.base_hf_config: AutoConfig = base_hf_config
@@ -62,7 +62,7 @@ def __init__(
self.tp_size: int = tp_size
self.tp_rank: int = tp_rank
self.max_lora_rank: int = max_lora_rank
- self.lora_weight_names: Set[str] = lora_weight_names
+ self.target_modules: Set[str] = target_modules
# Both A_buffer and B_buffer maps lora weight names to its buffer space.
# A_buffer contains num_layer number of row-major tensors with shape
@@ -95,8 +95,8 @@ def _can_support(config: LoRAConfig) -> bool:
"""
if config.r > self.max_lora_rank:
return False
- weights = get_normalized_lora_weight_names(config.target_modules)
- return weights.issubset(self.lora_weight_names)
+ target_module_names = get_normalized_target_modules(config.target_modules)
+ return target_module_names.issubset(self.target_modules)
if isinstance(config, LoRAConfig):
return _can_support(config)
@@ -104,12 +104,18 @@ def _can_support(config: LoRAConfig) -> bool:
return all(_can_support(x) for x in config)
def get_lora_A_shape(
- self, module_name: str, base_model: torch.nn.Module, max_lora_dim: int
+ self,
+ module_name: str,
+ base_model: torch.nn.Module,
+ max_lora_dim: int,
+ layer_idx: int,
) -> Tuple[int]:
"""
Given a module_name (might be a stacked name), return the hidden dims of modules' input and output.
"""
- input_dim, _ = get_hidden_dim(module_name, self.base_hf_config, base_model)
+ input_dim, _ = get_hidden_dim(
+ module_name, self.base_hf_config, base_model, layer_idx
+ )
c = get_stacked_multiply(module_name)
if self.tp_size > 1 and module_name in ROW_PARALLELISM_LINEAR_LORA_NAMES:
input_dim = divide(input_dim, self.tp_size)
@@ -120,12 +126,18 @@ def get_lora_A_shape(
)
def get_lora_B_shape(
- self, module_name: str, base_model: torch.nn.Module, max_lora_dim: int
+ self,
+ module_name: str,
+ base_model: torch.nn.Module,
+ max_lora_dim: int,
+ layer_idx: int,
) -> Tuple[int]:
"""
Given a module_name (might be a stacked name), return the hidden dims of modules' input and output.
"""
- _, output_dim = get_hidden_dim(module_name, self.base_hf_config, base_model)
+ _, output_dim = get_hidden_dim(
+ module_name, self.base_hf_config, base_model, layer_idx
+ )
if self.tp_size > 1 and module_name not in ROW_PARALLELISM_LINEAR_LORA_NAMES:
output_dim = divide(output_dim, self.tp_size)
return (
@@ -139,31 +151,33 @@ def init_buffers(self, base_model: torch.nn.Module):
def init_buffer(
buffer: Dict[str, List[torch.Tensor]],
- lora_weight_names: Set[str],
- get_lora_shape_fn: Callable[[str, torch.nn.Module, int], Tuple[int]],
+ target_modules: Set[str],
+ get_lora_shape_fn: Callable[[str, torch.nn.Module, int, int], Tuple[int]],
):
- for module_name in lora_weight_names:
- lora_shape = get_lora_shape_fn(
- module_name, base_model, self.max_lora_rank
- )
+ for module_name in target_modules:
buffer[module_name] = [
torch.empty(
- lora_shape,
+ get_lora_shape_fn(
+ module_name,
+ base_model,
+ self.max_lora_rank,
+ idx,
+ ),
dtype=self.dtype,
device=device,
)
- for _ in range(self.num_layer)
+ for idx in range(self.num_layer)
]
init_buffer(
self.A_buffer,
- self.lora_weight_names,
+ self.target_modules,
self.get_lora_A_shape,
)
init_buffer(
self.B_buffer,
- self.lora_weight_names,
+ self.target_modules,
self.get_lora_B_shape,
)
@@ -242,32 +256,34 @@ def load_lora_weight_tensor(
for layer_id in range(self.num_layer):
layer_weights = lora_adapter.layers[layer_id].weights
temp_A_buffer: Dict[str, Optional[torch.Tensor]] = {
- weight_name: None for weight_name in self.A_buffer
+ target_module: None for target_module in self.A_buffer
}
temp_B_buffer: Dict[str, Optional[torch.Tensor]] = {
- weight_name: None for weight_name in self.B_buffer
+ target_module: None for target_module in self.B_buffer
}
for name, weights in layer_weights.items():
- lora_weight_name = get_weight_name(name, self.lora_weight_names)
+ target_module = get_target_module_name(name, self.target_modules)
if "lora_A" in name:
- temp_A_buffer[lora_weight_name] = weights
+ temp_A_buffer[target_module] = weights
else:
- temp_B_buffer[lora_weight_name] = weights
+ temp_B_buffer[target_module] = weights
if self.tp_size > 1:
cur_layer_modules = lora_modules[layer_id]
for module_name, module in cur_layer_modules.items():
- weight_name = get_weight_name(module_name, self.lora_weight_names)
+ target_module = get_target_module_name(
+ module_name, self.target_modules
+ )
- if temp_A_buffer[weight_name] is None:
+ if temp_A_buffer[target_module] is None:
# Skip weight slicing if the weight is not present in the adapter
continue
- temp_A_buffer[weight_name] = module.slice_lora_a_weights(
- temp_A_buffer[weight_name], self.tp_rank
+ temp_A_buffer[target_module] = module.slice_lora_a_weights(
+ temp_A_buffer[target_module], self.tp_rank
)
- temp_B_buffer[weight_name] = module.slice_lora_b_weights(
- temp_B_buffer[weight_name], self.tp_rank
+ temp_B_buffer[target_module] = module.slice_lora_b_weights(
+ temp_B_buffer[target_module], self.tp_rank
)
for name, weights in temp_A_buffer.items():
@@ -282,12 +298,12 @@ def load_lora_weight_tensor(
load_lora_weight_tensor(buffer_view, weights)
def get_tensor(
- self, weight_name: str, layer_id: int, lora_type: LoRAType
+ self, target_module: str, layer_id: int, lora_type: LoRAType
) -> torch.Tensor:
if lora_type == LoRAType.LORA_A:
- return self.A_buffer[weight_name][layer_id]
+ return self.A_buffer[target_module][layer_id]
- return self.B_buffer[weight_name][layer_id]
+ return self.B_buffer[target_module][layer_id]
def get_buffer_id(self, lora_uid: str):
return self.uid_to_buffer_id[lora_uid]
diff --git a/python/sglang/srt/lora/utils.py b/python/sglang/srt/lora/utils.py
index e5aa43effef..459c943b73c 100644
--- a/python/sglang/srt/lora/utils.py
+++ b/python/sglang/srt/lora/utils.py
@@ -10,19 +10,19 @@
@dataclass
class LoRABatchInfo:
+ # The forward mode is using CUDA Graph.
+ use_cuda_graph: bool
+
# Batch size
bs: int
- # Lengths of each sequence in shape (bs,)
- seg_lens: torch.Tensor
+ # Number of segments. For triton backend, it is equal to batch size.
+ num_segments: int
- # Indice pointers of each sequence in shape (bs + 1, )
+ # Indice pointers of each segment in shape (num_segments + 1, )
seg_indptr: torch.Tensor
- # Maximum sequence length of current batch
- max_len: int
-
- # The index of lora adapter used by each sequence, in shape (bs,)
+ # The index of lora adapter used by each segment, in shape (num_segments,)
weight_indices: torch.Tensor
# ranks of each lora adapter, in shape (lora_num,)
@@ -31,6 +31,15 @@ class LoRABatchInfo:
# scaling of each lora adapter, in shape (lora_num,)
scalings: torch.Tensor
+ # Lengths of each segments in shape (num_segments,)
+ seg_lens: Optional[torch.Tensor]
+
+ # Maximum segment length of current batch
+ max_len: Optional[int]
+
+ # The logical (re)ordering of input rows (tokens), in shape (num_tokens,)
+ permutation: Optional[torch.Tensor]
+
class LoRAType(Enum):
LORA_A = 0
@@ -48,14 +57,14 @@ def get_layer_id(name: str) -> int:
def get_hidden_dim(
- module_name: str, config: AutoConfig, base_model: torch.nn.Module
+ module_name: str, config: AutoConfig, base_model: torch.nn.Module, layer_idx: int
) -> Tuple[int]:
"""
Given a module_name (might be a stacked name), return the hidden dims of modules' input and output.
"""
if hasattr(base_model, "get_hidden_dim"):
- return base_model.get_hidden_dim(module_name)
+ return base_model.get_hidden_dim(module_name, layer_idx)
else:
"""
WARNING: get_hidden_dim() is not defined,
@@ -84,7 +93,7 @@ def get_hidden_dim(
raise NotImplementedError()
-def get_normalized_lora_weight_names(
+def get_normalized_target_modules(
target_modules: Iterable[str],
) -> set[str]:
"""
@@ -100,8 +109,8 @@ def get_normalized_lora_weight_names(
result = set()
for name in target_modules:
- weight_name = params_mapping.get(name, name)
- result.add(weight_name)
+ normalized_name = params_mapping.get(name, name)
+ result.add(normalized_name)
return result
@@ -116,20 +125,18 @@ def get_stacked_multiply(module_name: str) -> int:
return stacked_rank[module_name] if module_name in stacked_rank else 1
-def get_weight_name(
- target_name: str, lora_weight_names: Tuple[Set[str]]
-) -> Optional[str]:
+def get_target_module_name(full_module_name: str, target_modules: Set[str]) -> str:
"""
- Get the weight name in lora_weight_names that can match target_name.
+ Get the target module name in target_modules that can match full_module_name.
- If there is a weight name in lora_weight_names that can match target_name, return this name
+ If there is a target module name in target_modules that can match full_module_name, return this name
Else raise ValueError.
"""
- for weight_name in lora_weight_names:
- if weight_name in target_name:
- return weight_name
+ for target_module in target_modules:
+ if target_module in full_module_name:
+ return target_module
raise ValueError(
- f"Cannot find weight name for {target_name} in {lora_weight_names}"
+ f"Cannot find target module name for {full_module_name} in {target_modules}"
)
diff --git a/python/sglang/srt/managers/cache_controller.py b/python/sglang/srt/managers/cache_controller.py
index b25bf4032b0..f9d45b2f76f 100644
--- a/python/sglang/srt/managers/cache_controller.py
+++ b/python/sglang/srt/managers/cache_controller.py
@@ -18,51 +18,78 @@
import threading
import time
from queue import Empty, Full, PriorityQueue, Queue
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List, NamedTuple, Optional, Set, Tuple
import torch
+from sglang.srt.mem_cache.hicache_storage import HiCacheStorageConfig
+
if TYPE_CHECKING:
from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
from sglang.srt.mem_cache.memory_pool_host import HostKVCache
+from sglang.srt.distributed import (
+ get_tensor_model_parallel_rank,
+ get_tensor_model_parallel_world_size,
+)
+from sglang.srt.layers.dp_attention import (
+ get_attention_dp_rank,
+ get_attention_tp_rank,
+ get_attention_tp_size,
+ is_dp_attention_enabled,
+)
+from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool, MLATokenToKVPool
logger = logging.getLogger(__name__)
+class LayerLoadingEvent:
+ def __init__(self, num_layers: int):
+ self._num_layers = num_layers
+ self.load_events = [torch.cuda.Event() for _ in range(num_layers)]
+ self.start_event = torch.cuda.Event() # start event on controller stream
+
+ def complete(self, layer_index: int):
+ assert 0 <= layer_index < self._num_layers
+ self.load_events[layer_index].record()
+
+ def wait(self, layer_index: int):
+ torch.cuda.current_stream().wait_event(self.load_events[layer_index])
+
+ @property
+ def finish_event(self):
+ return self.load_events[-1]
+
+
class LayerDoneCounter:
- def __init__(self, num_layers):
+ def __init__(self, num_layers: int):
self.num_layers = num_layers
# extra producer and consumer counters for overlap mode
self.num_counters = 3
- self.counters = [num_layers] * self.num_counters
- self.conditions = [threading.Condition() for _ in range(self.num_counters)]
- self.producer_index = 0
- self.consumer_index = 0
-
- def next_producer(self):
- return (self.producer_index + 1) % self.num_counters
+ self.events = [LayerLoadingEvent(num_layers) for _ in range(self.num_counters)]
+ self.producer_index = -1
+ self.consumer_index = -1
def update_producer(self):
- self.producer_index = self.next_producer()
+ self.producer_index = (self.producer_index + 1) % self.num_counters
+ assert self.events[
+ self.producer_index
+ ].finish_event.query(), (
+ "Producer finish event should be ready before being reused."
+ )
return self.producer_index
- def set_consumer(self, index):
+ def set_consumer(self, index: int):
self.consumer_index = index
- def increment(self):
- with self.conditions[self.producer_index]:
- self.counters[self.producer_index] += 1
- self.conditions[self.producer_index].notify_all()
-
- def wait_until(self, threshold):
- with self.conditions[self.consumer_index]:
- while self.counters[self.consumer_index] <= threshold:
- self.conditions[self.consumer_index].wait()
+ def wait_until(self, threshold: int):
+ if self.consumer_index < 0:
+ return
+ self.events[self.consumer_index].wait(threshold)
def reset(self):
- with self.conditions[self.producer_index]:
- self.counters[self.producer_index] = 0
+ self.producer_index = -1
+ self.consumer_index = -1
class CacheOperation:
@@ -86,36 +113,30 @@ def __init__(
# default priority is the order of creation
self.priority = priority if priority is not None else self.id
- def merge(self, other: "CacheOperation") -> None:
- # multiple operations can be merged into a single operation for batch processing
- self.host_indices = torch.cat([self.host_indices, other.host_indices])
- self.device_indices = torch.cat([self.device_indices, other.device_indices])
- self.priority = min(self.priority, other.priority)
- self.node_ids.extend(other.node_ids)
-
- def split(self, factor) -> List["CacheOperation"]:
- # split an operation into smaller operations to reduce the size of intermediate buffers
- if factor <= 1:
- return [self]
-
- chunk_size = math.ceil(len(self.host_indices) / factor)
- split_ops = []
- for i in range(0, len(self.host_indices), chunk_size):
- split_ops.append(
- CacheOperation(
- host_indices=self.host_indices[i : i + chunk_size],
- device_indices=self.device_indices[i : i + chunk_size],
- node_id=0,
- )
- )
- # Inherit the node_ids on the final chunk
- if split_ops:
- split_ops[-1].node_ids = self.node_ids
+ @staticmethod
+ def merge_ops(ops: List[CacheOperation]) -> CacheOperation:
+ assert len(ops) > 0
+ if len(ops) == 1:
+ return ops[0]
+
+ host_indices = torch.cat([op.host_indices for op in ops])
+ device_indices = torch.cat([op.device_indices for op in ops])
+ node_ids = []
+ priority = min(op.priority for op in ops)
+ for op in ops:
+ node_ids.extend(op.node_ids)
+ merged_op = CacheOperation(host_indices, device_indices, -1, priority)
+ merged_op.node_ids = node_ids
+ return merged_op
+
+ def __lt__(self, other: CacheOperation):
+ return self.priority < other.priority
- return split_ops
- def __lt__(self, other: "CacheOperation"):
- return self.priority < other.priority
+class HiCacheAck(NamedTuple):
+ start_event: torch.cuda.Event
+ finish_event: torch.cuda.Event
+ node_ids: List[int]
class TransferBuffer:
@@ -194,26 +215,25 @@ def __init__(
):
self.request_id = request_id
- self._done_flag = False
self._lock = threading.Lock()
-
+ self._terminated_flag = False
self.start_time = time.monotonic()
super().__init__(host_indices, token_ids, last_hash)
def increment(self, num_tokens: int):
with self._lock:
- if self._done_flag:
+ if self._terminated_flag:
return False
self.completed_tokens += num_tokens
return True
- def mark_done(self):
+ def mark_terminate(self):
with self._lock:
- self._done_flag = True
+ self._terminated_flag = True
- def is_done(self) -> bool:
- return self._done_flag
+ def is_terminated(self) -> bool:
+ return self._terminated_flag
class HiCacheController:
@@ -224,11 +244,13 @@ def __init__(
mem_pool_host: HostKVCache,
page_size: int,
tp_group: torch.distributed.ProcessGroup,
- load_cache_event: threading.Event = None,
+ load_cache_event: threading.Event,
write_policy: str = "write_through_selective",
io_backend: str = "",
storage_backend: Optional[str] = None,
prefetch_threshold: int = 256,
+ model_name: Optional[str] = None,
+ storage_backend_extra_config: Optional[str] = None,
):
self.mem_pool_device_allocator = token_to_kv_pool_allocator
self.mem_pool_device = token_to_kv_pool_allocator.get_kvcache()
@@ -236,56 +258,69 @@ def __init__(
self.write_policy = write_policy
self.page_size = page_size
self.io_backend = io_backend
-
self.enable_storage = False
- # todo: move backend initialization to storage backend module
+
if storage_backend is not None:
self.storage_backend_type = storage_backend
- from sglang.srt.mem_cache.hicache_storage import HiCacheFile, get_hash_str
+ from sglang.srt.mem_cache.hicache_storage import get_hash_str
+
+ self.get_hash_str = get_hash_str
+ self.storage_config = self._generate_storage_config(
+ model_name, storage_backend_extra_config
+ )
+ # for MLA models, only one rank needs to backup the KV cache
+ self.backup_skip = (
+ self.storage_config.is_mla_model
+ # todo: load balancing
+ and self.storage_config.tp_rank != 0
+ )
if storage_backend == "file":
- self.storage_backend = HiCacheFile()
- self.get_hash_str = get_hash_str
+ from sglang.srt.mem_cache.hicache_storage import HiCacheFile
+
+ self.storage_backend = HiCacheFile(self.storage_config)
elif storage_backend == "nixl":
from sglang.srt.mem_cache.storage.nixl.hicache_nixl import HiCacheNixl
self.storage_backend = HiCacheNixl()
- self.get_hash_str = get_hash_str
elif storage_backend == "mooncake":
from sglang.srt.mem_cache.storage.mooncake_store.mooncake_store import (
MooncakeStore,
- get_hash_str_mooncake,
)
- self.storage_backend = MooncakeStore()
- self.get_hash_str = get_hash_str_mooncake
+ self.storage_backend = MooncakeStore(self.storage_config)
self.storage_backend.register_buffer(self.mem_pool_host.kv_buffer)
assert self.mem_pool_host.layout == "page_first"
elif storage_backend == "hf3fs":
- from sglang.srt.distributed import get_tensor_model_parallel_rank
from sglang.srt.mem_cache.storage.hf3fs.storage_hf3fs import (
HiCacheHF3FS,
)
- rank = get_tensor_model_parallel_rank()
- bytes_per_page = (
- mem_pool_host.get_size_per_token() * mem_pool_host.page_size
- )
+ if self.mem_pool_host.layout == "page_first":
+ bytes_per_page = (
+ mem_pool_host.get_ksize_per_token() * mem_pool_host.page_size
+ )
+ elif self.mem_pool_host.layout == "layer_first":
+ bytes_per_page = (
+ mem_pool_host.get_size_per_token() * mem_pool_host.page_size
+ )
dtype = mem_pool_host.dtype
self.storage_backend = HiCacheHF3FS.from_env_config(
- rank, bytes_per_page, dtype
+ bytes_per_page, dtype, self.storage_config
)
- self.get_hash_str = get_hash_str
else:
raise NotImplementedError(
f"Unsupported storage backend: {storage_backend}"
)
+
self.enable_storage = True
# todo: threshold policy for prefetching
self.prefetch_threshold = max(prefetch_threshold, self.page_size)
self.prefetch_capacity_limit = int(
0.8 * (self.mem_pool_host.size - self.mem_pool_device.size)
)
+ # granularity of batch storage IO operations, in number of pages
+ self.storage_batch_size = 128
# tracking the number of tokens locked in prefetching, updated by the main scheduler thread
self.prefetch_tokens_occupied = 0
@@ -296,15 +331,26 @@ def __init__(
self.prefetch_tp_group = torch.distributed.new_group(
group_ranks, backend="gloo"
)
- self.prefetch_io_tp_group = torch.distributed.new_group(
- group_ranks, backend="gloo"
- )
- self.backup_tp_group = torch.distributed.new_group(
- group_ranks, backend="gloo"
- )
- self.load_cache_event = load_cache_event
- self.layer_done_counter = LayerDoneCounter(self.mem_pool_device.layer_num)
+ # Select the get and set functions
+ self.page_get_func = self._generic_page_get
+ self.page_set_func = self._generic_page_set
+ self.batch_exists_func = self.storage_backend.batch_exists
+ self.is_3fs_zerocopy = (
+ self.storage_backend_type == "hf3fs"
+ and self.mem_pool_host.layout == "page_first"
+ )
+ if self.storage_backend_type == "mooncake":
+ self.page_get_func = self._mooncake_page_get
+ self.page_set_func = self._mooncake_page_set
+ elif self.is_3fs_zerocopy:
+ self.page_get_func = self._3fs_zero_copy_page_get
+ self.page_set_func = self._3fs_zero_copy_page_set
+ self.batch_exists_func = self._3fs_zero_copy_batch_exists
+
+ self.device = self.mem_pool_device.device
+ self.layer_num = self.mem_pool_device.layer_num
+ self.layer_done_counter = LayerDoneCounter(self.layer_num)
self.mem_pool_device.register_layer_transfer_counter(self.layer_done_counter)
if write_policy not in [
@@ -314,11 +360,11 @@ def __init__(
]:
raise ValueError(f"Invalid write policy: {write_policy}")
- self.write_queue = PriorityQueue()
- self.load_queue = PriorityQueue()
-
- self.ack_write_queue = Queue()
- self.ack_load_queue = Queue()
+ # self.write_queue = PriorityQueue[CacheOperation]()
+ self.load_queue: List[CacheOperation] = []
+ self.write_queue: List[CacheOperation] = []
+ self.ack_load_queue: List[HiCacheAck] = []
+ self.ack_write_queue: List[HiCacheAck] = []
self.stop_event = threading.Event()
self.write_buffer = TransferBuffer(self.stop_event)
@@ -329,16 +375,6 @@ def __init__(
self.write_stream = torch.cuda.Stream()
self.load_stream = torch.cuda.Stream()
- self.write_thread = threading.Thread(
- target=self.write_thread_func_direct, daemon=True
- )
- self.load_thread = threading.Thread(
- target=self.load_thread_func_layer_by_layer, daemon=True
- )
-
- self.write_thread.start()
- self.load_thread.start()
-
if self.enable_storage:
self.prefetch_thread = threading.Thread(
target=self.prefetch_thread_func, daemon=True
@@ -351,21 +387,57 @@ def __init__(
self.prefetch_revoke_queue = Queue()
self.ack_backup_queue = Queue()
+ self.host_mem_release_queue = Queue()
self.prefetch_thread.start()
self.backup_thread.start()
+ def _generate_storage_config(
+ self,
+ model_name: Optional[str] = None,
+ storage_backend_extra_config: Optional[str] = None,
+ ):
+
+ if is_dp_attention_enabled():
+ self.tp_rank = get_attention_tp_rank()
+ self.tp_size = get_attention_tp_size()
+ self.dp_rank = get_attention_dp_rank()
+ else:
+ self.tp_rank = get_tensor_model_parallel_rank()
+ self.tp_size = get_tensor_model_parallel_world_size()
+ self.dp_rank = 0
+
+ # Currently, AscendMLAPagedTokenToKVPool is the subclass of MLATokenToKVPool.
+ is_mla_backend = isinstance(self.mem_pool_device, MLATokenToKVPool)
+
+ # Parse extra config JSON if provided
+ extra_config = None
+ if storage_backend_extra_config:
+ try:
+ import json
+
+ extra_config = json.loads(storage_backend_extra_config)
+ except Exception as e:
+ logger.error(f"Invalid backend extra config JSON: {e}")
+
+ return HiCacheStorageConfig(
+ tp_rank=self.tp_rank,
+ tp_size=self.tp_size,
+ is_mla_model=is_mla_backend,
+ is_page_first_layout=self.mem_pool_host.layout == "page_first",
+ model_name=model_name,
+ extra_config=extra_config,
+ )
+
def reset(self):
self.stop_event.set()
- self.write_thread.join()
- self.load_thread.join()
- self.write_queue.queue.clear()
- self.load_queue.queue.clear()
+ self.write_queue.clear()
+ self.load_queue.clear()
self.write_buffer.clear()
self.load_buffer.clear()
- self.ack_write_queue.queue.clear()
- self.ack_load_queue.queue.clear()
+ self.ack_write_queue.clear()
+ self.ack_load_queue.clear()
if self.enable_storage:
self.prefetch_thread.join()
self.backup_thread.join()
@@ -374,15 +446,7 @@ def reset(self):
self.prefetch_revoke_queue.queue.clear()
self.ack_backup_queue.queue.clear()
- self.write_thread = threading.Thread(
- target=self.write_thread_func_direct, daemon=True
- )
- self.load_thread = threading.Thread(
- target=self.load_thread_func_layer_by_layer, daemon=True
- )
self.stop_event.clear()
- self.write_thread.start()
- self.load_thread.start()
if self.enable_storage:
self.prefetch_thread = threading.Thread(
@@ -398,7 +462,7 @@ def write(
self,
device_indices: torch.Tensor,
priority: Optional[int] = None,
- node_id: int = 0,
+ node_id: int = -1,
) -> Optional[torch.Tensor]:
"""
Back up KV caches from device memory to host memory.
@@ -407,17 +471,46 @@ def write(
if host_indices is None:
return None
self.mem_pool_host.protect_write(host_indices)
- torch.cuda.current_stream().synchronize()
- self.write_queue.put(
+ self.write_queue.append(
CacheOperation(host_indices, device_indices, node_id, priority)
)
+ self.start_writing()
return host_indices
+ def start_writing(self) -> None:
+ if len(self.write_queue) == 0:
+ return
+
+ op = CacheOperation.merge_ops(self.write_queue)
+ host_indices, device_indices = self.move_indices(op)
+ self.write_queue.clear()
+
+ start_event = torch.cuda.Event()
+ finish_event = torch.cuda.Event()
+
+ start_event.record()
+ with torch.cuda.stream(self.write_stream):
+ start_event.wait(self.write_stream)
+ self.mem_pool_host.backup_from_device_all_layer(
+ self.mem_pool_device, host_indices, device_indices, self.io_backend
+ )
+ self.mem_pool_host.complete_io(op.host_indices)
+ finish_event.record()
+ # NOTE: We must save the host indices and device indices here,
+ # this is because we need to guarantee that these tensors are
+ # still alive when the write stream is executing.
+ if host_indices.is_cuda:
+ host_indices.record_stream(self.write_stream)
+ if device_indices.is_cuda:
+ device_indices.record_stream(self.write_stream)
+
+ self.ack_write_queue.append(HiCacheAck(start_event, finish_event, op.node_ids))
+
def load(
self,
host_indices: torch.Tensor,
priority: Optional[int] = None,
- node_id: int = 0,
+ node_id: int = -1,
) -> Optional[torch.Tensor]:
"""
Load KV caches from host memory to device memory.
@@ -426,17 +519,18 @@ def load(
if device_indices is None:
return None
self.mem_pool_host.protect_load(host_indices)
- # to ensure the device indices are ready before accessed by another CUDA stream
- torch.cuda.current_stream().synchronize()
- self.load_queue.put(
+ self.load_queue.append(
CacheOperation(host_indices, device_indices, node_id, priority)
)
return device_indices
- def move_indices(self, host_indices, device_indices):
+ def move_indices(self, op: CacheOperation):
+ host_indices, device_indices = op.host_indices, op.device_indices
# move indices to GPU if using kernels, to host if using direct indexing
if self.io_backend == "kernel":
- return host_indices.to(self.mem_pool_device.device), device_indices
+ if not host_indices.is_cuda:
+ host_indices = host_indices.to(self.device, non_blocking=True)
+ return host_indices, device_indices
elif self.io_backend == "direct":
device_indices = device_indices.cpu()
host_indices, idx = host_indices.sort()
@@ -444,58 +538,20 @@ def move_indices(self, host_indices, device_indices):
else:
raise ValueError(f"Unsupported io backend")
- def write_thread_func_direct(self):
- """
- Directly write through KV caches to host memory without buffering.
- """
- torch.cuda.set_stream(self.write_stream)
- while not self.stop_event.is_set():
- try:
- operation = self.write_queue.get(block=True, timeout=1)
- host_indices, device_indices = self.move_indices(
- operation.host_indices, operation.device_indices
- )
- self.mem_pool_host.backup_from_device_all_layer(
- self.mem_pool_device, host_indices, device_indices, self.io_backend
- )
- self.write_stream.synchronize()
- self.mem_pool_host.complete_io(operation.host_indices)
- for node_id in operation.node_ids:
- if node_id != 0:
- self.ack_write_queue.put(node_id)
- except Empty:
- continue
- except Exception as e:
- logger.error(e)
+ def start_loading(self) -> int:
+ if len(self.load_queue) == 0:
+ return -1
- def load_thread_func_layer_by_layer(self):
- """
- Load KV caches from host memory to device memory layer by layer.
- """
- torch.cuda.set_stream(self.load_stream)
- while not self.stop_event.is_set():
- self.load_cache_event.wait(timeout=1)
- if not self.load_cache_event.is_set():
- continue
- self.load_cache_event.clear()
- self.layer_done_counter.update_producer()
-
- batch_operation = None
- while self.load_queue.qsize() > 0:
- op = self.load_queue.get(block=True)
- if batch_operation is None:
- batch_operation = op
- else:
- batch_operation.merge(op)
- if batch_operation is None:
- continue
+ producer_id = self.layer_done_counter.update_producer()
+ op = CacheOperation.merge_ops(self.load_queue)
+ host_indices, device_indices = self.move_indices(op)
+ self.load_queue.clear()
+ producer_event = self.layer_done_counter.events[producer_id]
+ producer_event.start_event.record()
- # start layer-wise KV cache transfer from CPU to GPU
- self.layer_done_counter.reset()
- host_indices, device_indices = self.move_indices(
- batch_operation.host_indices, batch_operation.device_indices
- )
- for i in range(self.mem_pool_host.layer_num):
+ with torch.cuda.stream(self.load_stream):
+ producer_event.start_event.wait(self.load_stream)
+ for i in range(self.layer_num):
self.mem_pool_host.load_to_device_per_layer(
self.mem_pool_device,
host_indices,
@@ -503,13 +559,24 @@ def load_thread_func_layer_by_layer(self):
i,
self.io_backend,
)
- self.load_stream.synchronize()
- self.layer_done_counter.increment()
-
- self.mem_pool_host.complete_io(batch_operation.host_indices)
- for node_id in batch_operation.node_ids:
- if node_id != 0:
- self.ack_load_queue.put(node_id)
+ producer_event.complete(i)
+ self.mem_pool_host.complete_io(op.host_indices)
+ # NOTE: We must save the host indices and device indices here,
+ # this is because we need to guarantee that these tensors are
+ # still alive when the load stream is executing.
+ if host_indices.is_cuda:
+ host_indices.record_stream(self.load_stream)
+ if device_indices.is_cuda:
+ device_indices.record_stream(self.load_stream)
+
+ self.ack_load_queue.append(
+ HiCacheAck(
+ start_event=producer_event.start_event,
+ finish_event=producer_event.finish_event,
+ node_ids=op.node_ids,
+ )
+ )
+ return producer_id
def evict_device(
self, device_indices: torch.Tensor, host_indices: torch.Tensor
@@ -552,42 +619,93 @@ def prefetch(
return operation
def terminate_prefetch(self, operation):
- operation.mark_done()
+ operation.mark_terminate()
return operation.completed_tokens, operation.hash_value
- def generic_page_transfer(self, operation, batch_size=8):
- for i in range(0, len(operation.hash_value), batch_size):
- page_hashes = operation.hash_value[i : i + batch_size]
- # todo: zero copy
- dummy_page_dst = [self.mem_pool_host.get_dummy_flat_data_page()] * len(
- page_hashes
+ def append_host_mem_release(self, host_indices: torch.Tensor):
+ chunks = host_indices.split(self.mem_pool_host.page_size)
+ for chunk in chunks:
+ self.host_mem_release_queue.put(chunk)
+
+ def _3fs_zero_copy_batch_exists(self, batch_hashes):
+ _batch_hashes, _, factor = self.mem_pool_host.get_buffer_with_hash(batch_hashes)
+ hit_page_num = self.storage_backend.batch_exists(_batch_hashes) // factor
+ return hit_page_num
+
+ def _3fs_zero_copy_page_get(self, operation, hash_values, host_indices):
+ hashes, dsts, factor = self.mem_pool_host.get_buffer_with_hash(
+ hash_values, host_indices
+ )
+ page_data = self.storage_backend.batch_get(hashes, dsts)
+ if page_data:
+ inc = self.page_size * len(hashes) // factor
+ operation.increment(inc)
+ else:
+ logger.warning(
+ f"Prefetch operation {operation.request_id} failed to retrieve page {hashes}."
)
- page_data = self.storage_backend.batch_get(page_hashes, dummy_page_dst)
- if page_data is None:
+
+ def _mooncake_page_get(self, operation, hash_values, host_indices):
+ key_strs, buffer_ptrs, buffer_sizes = self.mem_pool_host.get_buffer_meta(
+ hash_values,
+ host_indices,
+ self.storage_config.tp_rank,
+ )
+ get_result = self.storage_backend.batch_get(
+ key_strs,
+ target_locations=buffer_ptrs,
+ target_sizes=buffer_sizes,
+ )
+ if get_result != len(hash_values):
+ logger.warning(
+ f"Prefetch operation {operation.request_id} failed or partially failed."
+ )
+ if get_result != 0:
+ operation.increment(get_result * self.page_size)
+
+ def _generic_page_get(self, operation, hash_values, host_indices):
+ dummy_page_dst = [
+ self.mem_pool_host.get_dummy_flat_data_page() for _ in hash_values
+ ]
+ page_data = self.storage_backend.batch_get(hash_values, dummy_page_dst)
+ if page_data is None:
+ return
+ for i in range(len(hash_values)):
+ if page_data[i] is None:
logger.warning(
- f"Prefetch operation {operation.request_id} failed to retrieve page {page_hashes}."
+ f"Prefetch operation {operation.request_id} failed to retrieve page {hash_values[i]}."
)
break
- completed_tokens = operation.completed_tokens
- if operation.increment(self.page_size * len(page_hashes)):
- for i in range(len(page_hashes)):
- self.mem_pool_host.set_from_flat_data_page(
- operation.host_indices[completed_tokens],
- page_data[i],
- )
- completed_tokens += self.page_size
- else:
- break
-
- def mooncake_page_transfer(self, operation):
- key_strs, buffer_ptrs, buffer_sizes = self.mem_pool_host.get_buffer_meta(
- operation.hash_value, operation.host_indices
+ # Must set the data before increasing the completed tokens.
+ # Otherwise this page may be read before being set.
+ self.mem_pool_host.set_from_flat_data_page(
+ host_indices[i * self.page_size],
+ page_data[i],
+ )
+ if not operation.increment(self.page_size):
+ break # Operation terminated by controller
+
+ def _page_transfer(self, operation):
+ # Transfer batch by batch
+ for i in range(0, len(operation.hash_value), self.storage_batch_size):
+ batch_hashes = operation.hash_value[i : i + self.storage_batch_size]
+ batch_host_indices = operation.host_indices[
+ i * self.page_size : (i + len(batch_hashes)) * self.page_size
+ ]
+ prev_completed_tokens = operation.completed_tokens
+ # Get one batch token, and update the completed_tokens if succeed
+ self.page_get_func(operation, batch_hashes, batch_host_indices)
+ # Check termination
+ if (
+ operation.completed_tokens
+ != prev_completed_tokens + len(batch_hashes) * self.page_size
+ ):
+ operation.mark_terminate()
+ break # Some operations fail or operation terminated by controller
+ # release pre-allocated memory
+ self.append_host_mem_release(
+ operation.host_indices[operation.completed_tokens :]
)
- self.storage_backend.batch_get(key_strs, buffer_ptrs, buffer_sizes)
- operation.increment(len(operation.hash_value) * self.page_size)
-
- def is_mooncake_backend(self):
- return self.storage_backend_type == "mooncake"
def prefetch_io_aux_func(self):
"""
@@ -596,32 +714,50 @@ def prefetch_io_aux_func(self):
while not self.stop_event.is_set():
try:
operation = self.prefetch_buffer.get(block=True, timeout=1)
- if self.is_mooncake_backend():
- self.mooncake_page_transfer(operation)
- elif self.storage_backend_type == "hf3fs":
- self.generic_page_transfer(operation, batch_size=128)
- else:
- self.generic_page_transfer(operation)
-
- if self.tp_world_size > 1:
- # to ensure all TP workers release the host memory at the same time
- torch.distributed.barrier(group=self.prefetch_io_tp_group)
+ self._page_transfer(operation)
# operation terminated by controller, release pre-allocated memory
- self.mem_pool_host.free(
+ self.append_host_mem_release(
operation.host_indices[operation.completed_tokens :]
)
except Empty:
continue
- def prefetch_rate_limit_check(self) -> bool:
+ def prefetch_rate_limited(self) -> bool:
"""
Rate limit the prefetching operations to avoid overwhelming the storage backend.
"""
# cancel prefetch if too much memory is occupied
if self.prefetch_tokens_occupied >= self.prefetch_capacity_limit:
- return False
+ return True
# todo: more sophisticated rate limiting based on storage backend performance
- return True
+ return False
+
+ def _storage_hit_query(self, operation) -> tuple[list[str], int]:
+ last_hash = operation.last_hash
+ tokens_to_fetch = operation.token_ids
+
+ storage_query_count = 0
+ hash_value = []
+
+ for start in range(
+ 0, len(tokens_to_fetch), self.page_size * self.storage_batch_size
+ ):
+ end = min(
+ start + self.page_size * self.storage_batch_size, len(tokens_to_fetch)
+ )
+ batch_tokens = tokens_to_fetch[start:end]
+ batch_hashes = []
+ for i in range(0, len(batch_tokens), self.page_size):
+ last_hash = self.get_hash_str(
+ batch_tokens[i : i + self.page_size], last_hash
+ )
+ batch_hashes.append(last_hash)
+ hit_page_num = self.batch_exists_func(batch_hashes)
+ hash_value.extend(batch_hashes[:hit_page_num])
+ storage_query_count += hit_page_num * self.page_size
+ if hit_page_num < len(batch_hashes):
+ break
+ return hash_value, storage_query_count
def prefetch_thread_func(self):
"""
@@ -636,39 +772,7 @@ def prefetch_thread_func(self):
if operation is None:
continue
- storage_hit_count = 0
- if (
- operation.host_indices is not None
- ) and self.prefetch_rate_limit_check():
- last_hash = operation.last_hash
- tokens_to_fetch = operation.token_ids
-
- remaining_tokens = len(tokens_to_fetch)
- hash_value = []
- while remaining_tokens >= self.page_size:
- last_hash = self.get_hash_str(
- tokens_to_fetch[
- storage_hit_count : storage_hit_count + self.page_size
- ],
- last_hash,
- )
-
- # todo, more unified interface
- if not self.is_mooncake_backend():
- if not self.storage_backend.exists(last_hash):
- break
- hash_value.append(last_hash)
- storage_hit_count += self.page_size
- remaining_tokens -= self.page_size
-
- if self.is_mooncake_backend():
- # deferring to batch exists for mooncake store
- exist_result = self.storage_backend.exists(hash_value)
- storage_hit_count = (
- sum(1 for v in exist_result.values() if v != 0)
- * self.page_size
- )
-
+ hash_value, storage_hit_count = self._storage_hit_query(operation)
if self.tp_world_size > 1:
storage_hit_count_tensor = torch.tensor(
storage_hit_count, dtype=torch.int
@@ -683,8 +787,7 @@ def prefetch_thread_func(self):
if storage_hit_count < self.prefetch_threshold:
# not to prefetch if not enough benefits
self.prefetch_revoke_queue.put(operation.request_id)
- if operation.host_indices is not None:
- self.mem_pool_host.free(operation.host_indices)
+ self.append_host_mem_release(operation.host_indices)
logger.debug(
f"Revoking prefetch for request {operation.request_id} due to insufficient hits ({storage_hit_count})."
)
@@ -693,7 +796,9 @@ def prefetch_thread_func(self):
: (storage_hit_count // self.page_size)
]
# free the pre-allocated memory for pages that are not hit
- self.mem_pool_host.free(operation.host_indices[storage_hit_count:])
+ self.append_host_mem_release(
+ operation.host_indices[storage_hit_count:]
+ )
operation.host_indices = operation.host_indices[:storage_hit_count]
logger.debug(
f"Prefetching {len(operation.hash_value)} pages for request {operation.request_id}."
@@ -716,46 +821,52 @@ def write_storage(
self.backup_queue.put(operation)
return operation.id
- def generic_page_backup(self, operation, batch_size=8):
- for i in range(0, len(operation.hash_value), batch_size):
- page_hashes = operation.hash_value[i : i + batch_size]
- page_data = [
- self.mem_pool_host.get_flat_data_page(
- operation.host_indices[j * self.page_size]
- )
- for j in range(i, i + len(page_hashes))
+ # non-zero copy
+ def _generic_page_set(self, hash_values, host_indices) -> bool:
+ data = [
+ self.mem_pool_host.get_flat_data_page(host_indices[i * self.page_size])
+ for i in range(len(hash_values))
+ ]
+ return self.storage_backend.batch_set(hash_values, data)
+
+ # zero copy
+ def _mooncake_page_set(self, hash_values, host_indices) -> bool:
+ key_strs, buffer_ptrs, buffer_sizes = self.mem_pool_host.get_buffer_meta(
+ hash_values,
+ host_indices,
+ self.storage_config.tp_rank,
+ )
+ success = self.storage_backend.batch_set(
+ key_strs,
+ target_locations=buffer_ptrs,
+ target_sizes=buffer_sizes,
+ )
+ return success
+
+ # zero copy
+ def _3fs_zero_copy_page_set(self, hash_values, host_indices) -> bool:
+ hashes, dsts, _ = self.mem_pool_host.get_buffer_with_hash(
+ hash_values, host_indices
+ )
+ return self.storage_backend.batch_set(hashes, dsts)
+
+ # Backup batch by batch
+ def _page_backup(self, operation):
+ # Backup batch by batch
+ for i in range(0, len(operation.hash_value), self.storage_batch_size):
+ batch_hashes = operation.hash_value[i : i + self.storage_batch_size]
+ batch_host_indices = operation.host_indices[
+ i * self.page_size : (i + len(batch_hashes)) * self.page_size
]
- success = self.storage_backend.batch_set(page_hashes, page_data)
+ # Set one batch token, and record if success.
+ # todo: allow partial success
+ success = self.page_set_func(batch_hashes, batch_host_indices)
if not success:
- logger.warning(f"Failed to write page {page_hashes} to storage.")
- break
- operation.completed_tokens += self.page_size * len(page_hashes)
-
- def mooncake_page_backup(self, operation):
- if len(operation.hash_value):
- exist_hashvalues = self.storage_backend.exists(operation.hash_value)
- indices = operation.host_indices.tolist()
- non_exist_keys = []
- non_exist_indices = []
- for i in range(len(operation.hash_value)):
- if not exist_hashvalues[operation.hash_value[i]]:
- non_exist_keys.append(operation.hash_value[i])
- non_exist_indices.extend(
- indices[i * self.page_size : (i + 1) * self.page_size]
- )
- if len(non_exist_keys) > 0:
- key_strs, buffer_ptrs, buffer_sizes = (
- self.mem_pool_host.get_buffer_meta(
- non_exist_keys, non_exist_indices
- )
- )
- # TODO: check the return value of batch set to see how many tokens are set successfully
- self.storage_backend.batch_set(
- key_strs,
- target_location=buffer_ptrs,
- target_sizes=buffer_sizes,
+ logger.warning(
+ f"Write page to storage: {len(batch_hashes)} pages failed."
)
- operation.completed_tokens += len(operation.hash_value) * self.page_size
+ break
+ operation.completed_tokens += self.page_size * len(batch_hashes)
def backup_thread_func(self):
"""
@@ -767,31 +878,9 @@ def backup_thread_func(self):
if operation is None:
continue
- if self.is_mooncake_backend():
- self.mooncake_page_backup(operation)
- elif self.storage_backend_type == "hf3fs":
- self.generic_page_backup(operation, batch_size=128)
- else:
- self.generic_page_backup(operation)
-
- min_completed_tokens = operation.completed_tokens
- if self.tp_world_size > 1:
- completed_tokens_tensor = torch.tensor(
- min_completed_tokens, dtype=torch.int
- )
- torch.distributed.all_reduce(
- completed_tokens_tensor,
- op=torch.distributed.ReduceOp.MIN,
- group=self.backup_tp_group,
- )
- min_completed_tokens = completed_tokens_tensor.item()
-
- self.ack_backup_queue.put(
- (
- operation.id,
- min_completed_tokens,
- )
- )
+ if not self.backup_skip:
+ self._page_backup(operation)
+ self.ack_backup_queue.put(operation)
except Empty:
continue
diff --git a/python/sglang/srt/managers/data_parallel_controller.py b/python/sglang/srt/managers/data_parallel_controller.py
index 76b9e1a018a..a7bb6d13a67 100644
--- a/python/sglang/srt/managers/data_parallel_controller.py
+++ b/python/sglang/srt/managers/data_parallel_controller.py
@@ -13,6 +13,7 @@
# ==============================================================================
"""A controller that dispatches requests to multiple data parallel workers."""
+import faulthandler
import logging
import multiprocessing as mp
import signal
@@ -39,7 +40,12 @@
from sglang.srt.managers.utils import DPBalanceMeta
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
-from sglang.srt.utils import bind_port, configure_logger, get_zmq_socket
+from sglang.srt.utils import (
+ bind_port,
+ configure_logger,
+ get_zmq_socket,
+ kill_itself_when_parent_died,
+)
from sglang.utils import get_exception_traceback
logger = logging.getLogger(__name__)
@@ -100,7 +106,7 @@ def __init__(
# Launch data parallel workers
self.scheduler_procs = []
- self.workers = [None] * server_args.dp_size
+ self.workers: List[zmq.Socket] = [None] * server_args.dp_size
if server_args.enable_dp_attention:
dp_port_args = self.launch_dp_attention_schedulers(server_args, port_args)
@@ -266,27 +272,34 @@ def launch_tensor_parallel_group(
self.max_total_num_tokens = scheduler_info[0]["max_total_num_tokens"]
self.max_req_input_len = scheduler_info[0]["max_req_input_len"]
+ def maybe_external_dp_rank_routing(self, req: Req):
+ if req.data_parallel_rank is not None:
+ logger.debug(f"Direct routing to DP rank {req.data_parallel_rank}")
+ self.workers[req.data_parallel_rank].send_pyobj(req)
+ return True
+ return False
+
def round_robin_scheduler(self, req: Req):
+ if self.maybe_external_dp_rank_routing(req):
+ return
+
if self.server_args.disaggregation_mode == "null":
- if req.data_parallel_rank is not None:
- logger.debug(f"Direct routing to DP rank {req.data_parallel_rank}")
- self.workers[req.data_parallel_rank].send_pyobj(req)
- else:
- self.workers[self.round_robin_counter].send_pyobj(req)
- self.round_robin_counter = (self.round_robin_counter + 1) % len(
- self.workers
- )
+ self.workers[self.round_robin_counter].send_pyobj(req)
+ self.round_robin_counter = (self.round_robin_counter + 1) % len(
+ self.workers
+ )
else:
- if req.data_parallel_rank is not None:
- logger.debug(f"Direct routing to DP rank {req.data_parallel_rank}")
- self.workers[req.data_parallel_rank].send_pyobj(req)
- else:
- self.workers[req.bootstrap_room % len(self.workers)].send_pyobj(req)
+ self.workers[req.bootstrap_room % len(self.workers)].send_pyobj(req)
def shortest_queue_scheduler(self, input_requests):
+ if self.maybe_external_dp_rank_routing(req):
+ return
raise NotImplementedError()
def minimum_tokens_scheduler(self, req):
+ if self.maybe_external_dp_rank_routing(req):
+ return
+
# This variable corresponds to the balance_id in TokenizedGenerateReqInput.
# We use it to to control the number of onfly tokens (requests dispatched to workers but not yet received).
def get_next_global_balance_id() -> int:
@@ -343,7 +356,9 @@ def run_data_parallel_controller_process(
port_args: PortArgs,
pipe_writer,
):
+ kill_itself_when_parent_died()
setproctitle.setproctitle("sglang::data_parallel_controller")
+ faulthandler.enable()
configure_logger(server_args)
parent_process = psutil.Process().parent()
balance_meta = DPBalanceMeta(server_args.dp_size)
diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py
index 34a29ec17dd..efa4275ad1e 100644
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -31,10 +31,14 @@
BatchMultimodalOut,
BatchStrOut,
BatchTokenIDOut,
+ FreezeGCReq,
+ MultiTokenizerRegisterReq,
)
+from sglang.srt.managers.multi_tokenizer_mixin import MultiHttpWorkerDetokenizerMixin
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import (
configure_logger,
+ freeze_gc,
get_zmq_socket,
kill_itself_when_parent_died,
)
@@ -65,7 +69,7 @@ class DecodeStatus:
sent_offset: int = 0
-class DetokenizerManager:
+class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
"""DetokenizerManager is a process that detokenizes the token ids."""
def __init__(
@@ -100,15 +104,20 @@ def __init__(
(BatchEmbeddingOut, self.handle_batch_embedding_out),
(BatchTokenIDOut, self.handle_batch_token_id_out),
(BatchMultimodalDecodeReq, self.handle_multimodal_decode_req),
+ (MultiTokenizerRegisterReq, lambda x: x),
+ (FreezeGCReq, self.handle_freeze_gc_req),
]
)
+ self.is_tool_call_parser_gpt_oss = server_args.tool_call_parser == "gpt-oss"
+
def event_loop(self):
"""The event loop that handles requests"""
while True:
recv_obj = self.recv_from_scheduler.recv_pyobj()
output = self._request_dispatcher(recv_obj)
- self.send_to_tokenizer.send_pyobj(output)
+ if output is not None:
+ self.send_to_tokenizer.send_pyobj(output)
def trim_matched_stop(
self, output: Union[str, List[int]], finished_reason: Dict, no_stop_trim: bool
@@ -129,6 +138,9 @@ def trim_matched_stop(
# Trim stop token.
if isinstance(matched, int) and isinstance(output, list):
+ # 200012 <|call|> is the tool call token and one of eos tokens for gpt-oss model
+ if output[-1] == 200012 and self.is_tool_call_parser_gpt_oss:
+ return output
assert len(output) > 0
return output[:-1]
return output
@@ -165,15 +177,16 @@ def handle_batch_token_id_out(self, recv_obj: BatchTokenIDOut):
)
surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset])
+ force_show = os.getenv("SRT_FORCE_SPECIAL_TOKENS", "0") == "1"
# TODO(lmzheng): handle skip_special_tokens/spaces_between_special_tokens per request
surr_texts = self.tokenizer.batch_decode(
surr_ids,
- skip_special_tokens=recv_obj.skip_special_tokens[0],
+ skip_special_tokens=recv_obj.skip_special_tokens[0] and not force_show,
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
read_texts = self.tokenizer.batch_decode(
read_ids,
- skip_special_tokens=recv_obj.skip_special_tokens[0],
+ skip_special_tokens=recv_obj.skip_special_tokens[0] and not force_show,
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
@@ -216,7 +229,7 @@ def handle_batch_token_id_out(self, recv_obj: BatchTokenIDOut):
rids=recv_obj.rids,
finished_reasons=recv_obj.finished_reasons,
output_strs=output_strs,
- output_ids=recv_obj.output_ids,
+ output_ids=recv_obj.decode_ids,
prompt_tokens=recv_obj.prompt_tokens,
completion_tokens=recv_obj.completion_tokens,
cached_tokens=recv_obj.cached_tokens,
@@ -234,6 +247,8 @@ def handle_batch_token_id_out(self, recv_obj: BatchTokenIDOut):
output_token_ids_logprobs_val=recv_obj.output_token_ids_logprobs_val,
output_token_ids_logprobs_idx=recv_obj.output_token_ids_logprobs_idx,
output_hidden_states=recv_obj.output_hidden_states,
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
)
def handle_multimodal_decode_req(self, recv_obj: BatchMultimodalDecodeReq):
@@ -245,8 +260,14 @@ def handle_multimodal_decode_req(self, recv_obj: BatchMultimodalDecodeReq):
prompt_tokens=recv_obj.prompt_tokens,
completion_tokens=recv_obj.completion_tokens,
cached_tokens=recv_obj.cached_tokens,
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
)
+ def handle_freeze_gc_req(self, recv_req: FreezeGCReq):
+ freeze_gc("Detokenizer Manager")
+ return None
+
class LimitedCapacityDict(OrderedDict):
def __init__(self, capacity: int, *args, **kwargs):
@@ -272,8 +293,12 @@ def run_detokenizer_process(
try:
manager = DetokenizerManager(server_args, port_args)
- manager.event_loop()
+ if server_args.tokenizer_worker_num > 1:
+ manager.multi_http_worker_event_loop()
+ else:
+ manager.event_loop()
except Exception:
+ manager.socket_mapping.clear_all_sockets()
traceback = get_exception_traceback()
logger.error(f"DetokenizerManager hit an exception: {traceback}")
parent_process.send_signal(signal.SIGQUIT)
diff --git a/python/sglang/srt/managers/disagg_service.py b/python/sglang/srt/managers/disagg_service.py
new file mode 100644
index 00000000000..df0eac48b4d
--- /dev/null
+++ b/python/sglang/srt/managers/disagg_service.py
@@ -0,0 +1,46 @@
+"""Start bootstrap/kv-store-related server"""
+
+import os
+from typing import Type
+
+from sglang.srt.disaggregation.base import BaseKVBootstrapServer
+from sglang.srt.disaggregation.utils import (
+ DisaggregationMode,
+ KVClassType,
+ TransferBackend,
+ get_kv_class,
+)
+from sglang.srt.server_args import ServerArgs
+
+
+def start_disagg_service(
+ server_args: ServerArgs,
+):
+ # Start kv boostrap server on prefill
+ disagg_mode = DisaggregationMode(server_args.disaggregation_mode)
+ transfer_backend = TransferBackend(server_args.disaggregation_transfer_backend)
+
+ if disagg_mode == DisaggregationMode.PREFILL:
+ # only start bootstrap server on prefill tm
+ kv_bootstrap_server_class: Type[BaseKVBootstrapServer] = get_kv_class(
+ transfer_backend, KVClassType.BOOTSTRAP_SERVER
+ )
+ bootstrap_server: BaseKVBootstrapServer = kv_bootstrap_server_class(
+ host=server_args.host,
+ port=server_args.disaggregation_bootstrap_port,
+ )
+ is_create_store = (
+ server_args.node_rank == 0 and transfer_backend == TransferBackend.ASCEND
+ )
+ if is_create_store:
+ try:
+ from mf_adapter import create_config_store
+
+ ascend_url = os.getenv("ASCEND_MF_STORE_URL")
+ create_config_store(ascend_url)
+ except Exception as e:
+ error_message = f"Failed create mf store, invalid ascend_url."
+ error_message += f" With exception {e}"
+ raise error_message
+
+ return bootstrap_server
diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py
index dfa49d70a0e..6237cd38338 100644
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -121,6 +121,7 @@ class GenerateReqInput:
bootstrap_host: Optional[Union[List[str], str]] = None
bootstrap_port: Optional[Union[List[Optional[int]], int]] = None
bootstrap_room: Optional[Union[List[int], int]] = None
+ bootstrap_pair_key: Optional[Union[List[str], str]] = None
# For data parallel rank routing
data_parallel_rank: Optional[int] = None
@@ -128,6 +129,18 @@ class GenerateReqInput:
# For background responses (OpenAI responses API)
background: bool = False
+ # Conversation id used for tracking requests
+ conversation_id: Optional[str] = None
+
+ # Label for the request
+ label: Optional[str] = None
+
+ # Priority for the request
+ priority: Optional[int] = None
+
+ # Image gen grpc migration
+ return_bytes: bool = False
+
def contains_mm_input(self) -> bool:
return (
has_valid_data(self.image_data)
@@ -258,6 +271,7 @@ def _normalize_batch_inputs(self):
self._normalize_sampling_params(num)
self._normalize_logprob_params(num)
self._normalize_custom_logit_processor(num)
+ self._normalize_bootstrap_params(num)
def _expand_inputs(self, num):
"""Expand the main inputs (text, input_ids, input_embeds) for parallel sampling."""
@@ -297,6 +311,11 @@ def _normalize_image_data(self, num):
self.image_data = [[self.image_data]] * num
self.modalities = ["image"] * num
elif isinstance(self.image_data, list):
+ # Handle empty list case - treat as no images
+ if len(self.image_data) == 0:
+ self.image_data = [None] * num
+ return
+
if len(self.image_data) != self.batch_size:
raise ValueError(
"The length of image_data should be equal to the batch size."
@@ -421,6 +440,40 @@ def _normalize_custom_logit_processor(self, num):
"Cannot use list custom_logit_processor with parallel_sample_num > 1"
)
+ def _normalize_bootstrap_params(self, num):
+ """Normalize bootstrap parameters for batch processing."""
+ # Normalize bootstrap_host
+ if self.bootstrap_host is None:
+ self.bootstrap_host = [None] * num
+ elif not isinstance(self.bootstrap_host, list):
+ self.bootstrap_host = [self.bootstrap_host] * num
+ elif isinstance(self.bootstrap_host, list):
+ self.bootstrap_host = self.bootstrap_host * self.parallel_sample_num
+
+ # Normalize bootstrap_port
+ if self.bootstrap_port is None:
+ self.bootstrap_port = [None] * num
+ elif not isinstance(self.bootstrap_port, list):
+ self.bootstrap_port = [self.bootstrap_port] * num
+ elif isinstance(self.bootstrap_port, list):
+ self.bootstrap_port = self.bootstrap_port * self.parallel_sample_num
+
+ # Normalize bootstrap_room
+ if self.bootstrap_room is None:
+ self.bootstrap_room = [None] * num
+ elif not isinstance(self.bootstrap_room, list):
+ self.bootstrap_room = [self.bootstrap_room + i for i in range(num)]
+ elif isinstance(self.bootstrap_room, list):
+ self.bootstrap_room = self.bootstrap_room * self.parallel_sample_num
+
+ # Normalize bootstrap_pair_key
+ if self.bootstrap_pair_key is None:
+ self.bootstrap_pair_key = [None] * num
+ elif not isinstance(self.bootstrap_pair_key, list):
+ self.bootstrap_pair_key = [self.bootstrap_pair_key] * num
+ elif isinstance(self.bootstrap_pair_key, list):
+ self.bootstrap_pair_key = self.bootstrap_pair_key * self.parallel_sample_num
+
def _validate_session_params(self):
"""Validate that session parameters are properly formatted."""
if self.session_params is not None:
@@ -453,7 +506,13 @@ def __getitem__(self, i):
return_text_in_logprobs=self.return_text_in_logprobs,
stream=self.stream,
log_metrics=self.log_metrics,
+ return_hidden_states=(
+ self.return_hidden_states[i]
+ if isinstance(self.return_hidden_states, list)
+ else self.return_hidden_states
+ ),
modalities=self.modalities[i] if self.modalities else None,
+ session_params=self.session_params,
lora_path=self.lora_path[i] if self.lora_path is not None else None,
lora_id=self.lora_id[i] if self.lora_id is not None else None,
custom_logit_processor=(
@@ -461,11 +520,6 @@ def __getitem__(self, i):
if self.custom_logit_processor is not None
else None
),
- return_hidden_states=(
- self.return_hidden_states[i]
- if isinstance(self.return_hidden_states, list)
- else self.return_hidden_states
- ),
# if `__getitem__` is called, the bootstrap_host, bootstrap_port, bootstrap_room must be a list
bootstrap_host=(
self.bootstrap_host[i] if self.bootstrap_host is not None else None
@@ -476,9 +530,18 @@ def __getitem__(self, i):
bootstrap_room=(
self.bootstrap_room[i] if self.bootstrap_room is not None else None
),
+ bootstrap_pair_key=(
+ self.bootstrap_pair_key[i]
+ if self.bootstrap_pair_key is not None
+ else None
+ ),
data_parallel_rank=(
self.data_parallel_rank if self.data_parallel_rank is not None else None
),
+ conversation_id=self.conversation_id,
+ label=self.label,
+ priority=self.priority,
+ return_bytes=self.return_bytes,
)
@@ -504,27 +567,28 @@ class TokenizedGenerateReqInput:
token_ids_logprob: List[int]
# Whether to stream output
stream: bool
+ # Whether to return hidden states
+ return_hidden_states: bool = False
- # LoRA related
- lora_id: Optional[str] = None # None means just use the base model
# The input embeds
input_embeds: Optional[Union[List[List[List[float]]], List[List[float]]]] = None
# Session info for continual prompting
session_params: Optional[SessionParams] = None
+ # LoRA related
+ lora_id: Optional[str] = None # None means just use the base model
+
# Custom logit processor for advanced sampling control. Must be a serialized instance
# of `CustomLogitProcessor` in python/sglang/srt/sampling/custom_logit_processor.py
# Use the processor's `to_str()` method to generate the serialized string.
custom_logit_processor: Optional[str] = None
- # Whether to return hidden states
- return_hidden_states: bool = False
-
# For disaggregated inference
bootstrap_host: Optional[str] = None
bootstrap_port: Optional[int] = None
bootstrap_room: Optional[int] = None
+ bootstrap_pair_key: Optional[str] = None
# For data parallel rank routing
data_parallel_rank: Optional[int] = None
@@ -532,6 +596,30 @@ class TokenizedGenerateReqInput:
# For dp balance
dp_balance_id: int = -1
+ # Label for the request
+ label: Optional[str] = None
+
+ # Priority for the request
+ priority: Optional[int] = None
+
+ # Image gen grpc migration
+ return_bytes: bool = False
+
+
+@dataclass
+class BatchTokenizedGenerateReqInput:
+ # The batch of tokenized requests
+ batch: List[TokenizedGenerateReqInput]
+
+ def __len__(self):
+ return len(self.batch)
+
+ def __getitem__(self, i):
+ return self.batch[i]
+
+ def __iter__(self):
+ return iter(self.batch)
+
@dataclass
class EmbeddingReqInput:
@@ -668,6 +756,21 @@ class TokenizedEmbeddingReqInput:
dp_balance_id: int = -1
+@dataclass
+class BatchTokenizedEmbeddingReqInput:
+ # The batch of tokenized embedding requests
+ batch: List[TokenizedEmbeddingReqInput]
+
+ def __len__(self):
+ return len(self.batch)
+
+ def __getitem__(self, i):
+ return self.batch[i]
+
+ def __iter__(self):
+ return iter(self.batch)
+
+
@dataclass
class BatchTokenIDOut:
# The request id
@@ -708,9 +811,26 @@ class BatchTokenIDOut:
# Hidden states
output_hidden_states: List[List[float]]
+ # The information of placeholder tokens (e.g., image token)
+ # idx is the index of the token in the prompt after expansion.
+ # val is the length of padded tokens after expansion.
+ placeholder_tokens_idx: List[Optional[List[int]]]
+ placeholder_tokens_val: List[Optional[List[int]]]
+
@dataclass
class BatchMultimodalDecodeReq:
+ decoded_ids: List[int]
+ input_token_logprobs_val: List[float]
+ input_token_logprobs_idx: List[int]
+ output_token_logprobs_val: List[float]
+ output_token_logprobs_idx: List[int]
+ read_offsets: List[int]
+ skip_special_tokens: List[bool]
+ spaces_between_special_tokens: List[bool]
+ image_resolutions: List[List[int]]
+ resize_image_resolutions: List[List[int]]
+
# The request id
rids: List[str]
finished_reasons: List[BaseFinishReason]
@@ -720,6 +840,12 @@ class BatchMultimodalDecodeReq:
completion_tokens: List[int]
cached_tokens: List[int]
+ # Placeholder token info
+ placeholder_tokens_idx: List[Optional[List[int]]]
+ placeholder_tokens_val: List[Optional[List[int]]]
+
+ return_bytes: bool = False
+
@dataclass
class BatchStrOut:
@@ -755,6 +881,9 @@ class BatchStrOut:
# Hidden states
output_hidden_states: List[List[float]]
+ placeholder_tokens_idx: List[Optional[List[int]]]
+ placeholder_tokens_val: List[Optional[List[int]]]
+
@dataclass
class BatchMultimodalOut:
@@ -762,14 +891,26 @@ class BatchMultimodalOut:
rids: List[str]
# The finish reason
finished_reasons: List[dict]
+ decoded_ids: List[List[int]]
# The outputs
- outputs: List[List[Dict]]
+ outputs: Union[List[str | bytes], List[List[Dict]]]
+
+ # probability values for input tokens and output tokens
+ input_token_logprobs_val: List[List[float]]
+ input_token_logprobs_idx: List[List[int]]
+ output_token_logprobs_val: List[List[float]]
+ output_token_logprobs_idx: List[List[int]]
# Token counts
prompt_tokens: List[int]
completion_tokens: List[int]
cached_tokens: List[int]
+ placeholder_tokens_idx: List[Optional[List[int]]]
+ placeholder_tokens_val: List[Optional[List[int]]]
+
+ return_bytes: List[bool]
+
@dataclass
class BatchEmbeddingOut:
@@ -782,6 +923,19 @@ class BatchEmbeddingOut:
# Token counts
prompt_tokens: List[int]
cached_tokens: List[int]
+ # Placeholder token info
+ placeholder_tokens_idx: List[Optional[List[int]]]
+ placeholder_tokens_val: List[Optional[List[int]]]
+
+
+@dataclass
+class ClearHiCacheReqInput:
+ pass
+
+
+@dataclass
+class ClearHiCacheReqOutput:
+ success: bool
@dataclass
@@ -804,6 +958,12 @@ class UpdateWeightFromDiskReqInput:
abort_all_requests: bool = False
# Optional: Update weight version along with weights
weight_version: Optional[str] = None
+ # Whether to update weights asynchronously
+ is_async: bool = False
+ # Whether to empty torch cache
+ torch_empty_cache: bool = False
+ # Whether to keep the scheduler paused after weight update
+ keep_pause: bool = False
@dataclass
@@ -943,6 +1103,12 @@ class AbortReq:
abort_all: bool = False
# The finished reason data
finished_reason: Optional[Dict[str, Any]] = None
+ abort_reason: Optional[str] = None
+ # used in MultiTokenzierManager mode
+ rids: Optional[Union[List[str], str]] = None
+
+ def __post_init__(self):
+ self.rids = self.rid
@dataclass
@@ -1005,12 +1171,18 @@ class ProfileReqOutput:
message: str
+@dataclass
+class FreezeGCReq:
+ pass
+
+
@dataclass
class ConfigureLoggingReq:
log_requests: Optional[bool] = None
log_requests_level: Optional[int] = None
dump_requests_folder: Optional[str] = None
dump_requests_threshold: Optional[int] = None
+ crash_dump_folder: Optional[str] = None
@dataclass
@@ -1138,6 +1310,18 @@ class LoRAUpdateResult:
LoadLoRAAdapterReqOutput = UnloadLoRAAdapterReqOutput = LoRAUpdateResult
+@dataclass
+class MultiTokenizerRegisterReq:
+ rids: Optional[Union[List[str], str]] = None
+ ipc_name: Optional[str] = None
+
+
+@dataclass
+class MultiTokenizerWrapper:
+ worker_id: int
+ obj: Optional[Any] = None
+
+
class BlockReqType(Enum):
BLOCK = 1
UNBLOCK = 2
diff --git a/python/sglang/srt/managers/mm_utils.py b/python/sglang/srt/managers/mm_utils.py
index 7d4ae186a61..f495904d560 100644
--- a/python/sglang/srt/managers/mm_utils.py
+++ b/python/sglang/srt/managers/mm_utils.py
@@ -20,9 +20,11 @@
)
from sglang.srt.mem_cache.multimodal_cache import MultiModalCache
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
-from sglang.srt.utils import flatten_nested_list, print_warning_once
+from sglang.srt.utils import flatten_nested_list, is_npu, print_warning_once
from sglang.utils import logger
+_is_npu = is_npu()
+
# NOTE: Using the shared logger from sglang.utils instead of creating a module-specific logger
# to ensure consistent logging behavior across the codebase. This prevents issues with log
# propagation that can cause some log messages (like 'server is fired up') to not appear
@@ -486,6 +488,8 @@ def get_embedding_and_mask(
if embedding is None:
return None, None
# 2. Get mask
+ if _is_npu:
+ torch.npu.current_stream().synchronize()
special_multimodal_mask = _get_multimodal_mask(input_ids, placeholder_tensor)
# 3. Adjust embedding length if needed
embedding = _adjust_embedding_length(embedding, special_multimodal_mask, logger)
@@ -625,6 +629,7 @@ def general_mm_embed_routine(
embed_tokens = language_model.get_input_embeddings()
if (
not forward_batch.forward_mode.is_decode()
+ and not forward_batch.forward_mode.is_target_verify()
and forward_batch.contains_mm_inputs()
):
mm_inputs_list = [
diff --git a/python/sglang/srt/managers/multi_tokenizer_mixin.py b/python/sglang/srt/managers/multi_tokenizer_mixin.py
new file mode 100644
index 00000000000..0aadfba2c83
--- /dev/null
+++ b/python/sglang/srt/managers/multi_tokenizer_mixin.py
@@ -0,0 +1,579 @@
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""MultiTokenizerMixin is a class that provides nesscary methods for MultiTokenizerManager and DetokenizerManager."""
+import asyncio
+import logging
+import multiprocessing as multiprocessing
+import os
+import pickle
+import sys
+import threading
+from functools import partialmethod
+from multiprocessing import shared_memory
+from typing import Any, Dict
+
+import setproctitle
+import zmq
+import zmq.asyncio
+
+from sglang.srt.disaggregation.utils import DisaggregationMode, TransferBackend
+from sglang.srt.managers.disagg_service import start_disagg_service
+from sglang.srt.managers.io_struct import (
+ BatchEmbeddingOut,
+ BatchMultimodalOut,
+ BatchStrOut,
+ BatchTokenIDOut,
+ MultiTokenizerRegisterReq,
+ MultiTokenizerWrapper,
+)
+from sglang.srt.managers.tokenizer_communicator_mixin import _Communicator
+from sglang.srt.managers.tokenizer_manager import TokenizerManager
+from sglang.srt.server_args import PortArgs, ServerArgs
+from sglang.srt.utils import get_zmq_socket, kill_process_tree
+from sglang.utils import get_exception_traceback
+
+logger = logging.getLogger(__name__)
+
+
+class SocketMapping:
+ def __init__(self):
+ self._zmq_context = zmq.Context()
+ self._mapping: Dict[str, zmq.Socket] = {}
+
+ def clear_all_sockets(self):
+ for socket in self._mapping.values():
+ socket.close()
+ self._mapping.clear()
+
+ def register_ipc_mapping(
+ self, recv_obj: MultiTokenizerRegisterReq, worker_id: str, is_tokenizer: bool
+ ):
+ type_str = "tokenizer" if is_tokenizer else "detokenizer"
+ if worker_id in self._mapping:
+ logger.warning(
+ f"{type_str} already registered with worker {worker_id}, skipping..."
+ )
+ return
+ logger.info(
+ f"{type_str} not registered with worker {worker_id}, registering..."
+ )
+ socket = get_zmq_socket(self._zmq_context, zmq.PUSH, recv_obj.ipc_name, False)
+ self._mapping[worker_id] = socket
+ self._mapping[worker_id].send_pyobj(recv_obj)
+
+ def send_output(self, worker_id: str, output: Any):
+ if worker_id not in self._mapping:
+ logger.error(
+ f"worker ID {worker_id} not registered. Check if the server Process is alive"
+ )
+ return
+ self._mapping[worker_id].send_pyobj(output)
+
+
+def _handle_output_by_index(output, i):
+ """NOTE: A maintainable method is better here."""
+ if isinstance(output, BatchTokenIDOut):
+ new_output = BatchTokenIDOut(
+ rids=[output.rids[i]],
+ finished_reasons=(
+ [output.finished_reasons[i]]
+ if len(output.finished_reasons) > i
+ else None
+ ),
+ decoded_texts=(
+ [output.decoded_texts[i]] if len(output.decoded_texts) > i else None
+ ),
+ decode_ids=([output.decode_ids[i]] if len(output.decode_ids) > i else None),
+ read_offsets=(
+ [output.read_offsets[i]] if len(output.read_offsets) > i else None
+ ),
+ output_ids=(
+ [output.output_ids[i]]
+ if output.output_ids and len(output.output_ids) > i
+ else None
+ ),
+ skip_special_tokens=(
+ [output.skip_special_tokens[i]]
+ if len(output.skip_special_tokens) > i
+ else None
+ ),
+ spaces_between_special_tokens=(
+ [output.spaces_between_special_tokens[i]]
+ if len(output.spaces_between_special_tokens) > i
+ else None
+ ),
+ no_stop_trim=(
+ [output.no_stop_trim[i]] if len(output.no_stop_trim) > i else None
+ ),
+ prompt_tokens=(
+ [output.prompt_tokens[i]] if len(output.prompt_tokens) > i else None
+ ),
+ completion_tokens=(
+ [output.completion_tokens[i]]
+ if len(output.completion_tokens) > i
+ else None
+ ),
+ cached_tokens=(
+ [output.cached_tokens[i]] if len(output.cached_tokens) > i else None
+ ),
+ spec_verify_ct=(
+ [output.spec_verify_ct[i]] if len(output.spec_verify_ct) > i else None
+ ),
+ input_token_logprobs_val=(
+ [output.input_token_logprobs_val[i]]
+ if output.input_token_logprobs_val
+ else None
+ ),
+ input_token_logprobs_idx=(
+ [output.input_token_logprobs_idx[i]]
+ if output.input_token_logprobs_idx
+ else None
+ ),
+ output_token_logprobs_val=(
+ [output.output_token_logprobs_val[i]]
+ if output.output_token_logprobs_val
+ else None
+ ),
+ output_token_logprobs_idx=(
+ [output.output_token_logprobs_idx[i]]
+ if output.output_token_logprobs_idx
+ else None
+ ),
+ input_top_logprobs_val=(
+ [output.input_top_logprobs_val[i]]
+ if output.input_top_logprobs_val
+ else None
+ ),
+ input_top_logprobs_idx=(
+ [output.input_top_logprobs_idx[i]]
+ if output.input_top_logprobs_idx
+ else None
+ ),
+ output_top_logprobs_val=(
+ [output.output_top_logprobs_val[i]]
+ if output.output_top_logprobs_val
+ else None
+ ),
+ output_top_logprobs_idx=(
+ [output.output_top_logprobs_idx[i]]
+ if output.output_top_logprobs_idx
+ else None
+ ),
+ input_token_ids_logprobs_val=(
+ [output.input_token_ids_logprobs_val[i]]
+ if output.input_token_ids_logprobs_val
+ else None
+ ),
+ input_token_ids_logprobs_idx=(
+ [output.input_token_ids_logprobs_idx[i]]
+ if output.input_token_ids_logprobs_idx
+ else None
+ ),
+ output_token_ids_logprobs_val=(
+ [output.output_token_ids_logprobs_val[i]]
+ if output.output_token_ids_logprobs_val
+ else None
+ ),
+ output_token_ids_logprobs_idx=(
+ [output.output_token_ids_logprobs_idx[i]]
+ if output.output_token_ids_logprobs_idx
+ else None
+ ),
+ output_hidden_states=(
+ [output.output_hidden_states[i]]
+ if output.output_hidden_states
+ else None
+ ),
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
+ )
+ elif isinstance(output, BatchEmbeddingOut):
+ new_output = BatchEmbeddingOut(
+ rids=[output.rids[i]],
+ finished_reasons=(
+ [output.finished_reasons[i]]
+ if len(output.finished_reasons) > i
+ else None
+ ),
+ embeddings=([output.embeddings[i]] if len(output.embeddings) > i else None),
+ prompt_tokens=(
+ [output.prompt_tokens[i]] if len(output.prompt_tokens) > i else None
+ ),
+ cached_tokens=(
+ [output.cached_tokens[i]] if len(output.cached_tokens) > i else None
+ ),
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
+ )
+ elif isinstance(output, BatchStrOut):
+ new_output = BatchStrOut(
+ rids=[output.rids[i]],
+ finished_reasons=(
+ [output.finished_reasons[i]]
+ if len(output.finished_reasons) > i
+ else None
+ ),
+ output_strs=(
+ [output.output_strs[i]] if len(output.output_strs) > i else None
+ ),
+ output_ids=(
+ [output.output_ids[i]]
+ if output.output_ids and len(output.output_ids) > i
+ else None
+ ),
+ prompt_tokens=(
+ [output.prompt_tokens[i]] if len(output.prompt_tokens) > i else None
+ ),
+ completion_tokens=(
+ [output.completion_tokens[i]]
+ if len(output.completion_tokens) > i
+ else None
+ ),
+ cached_tokens=(
+ [output.cached_tokens[i]] if len(output.cached_tokens) > i else None
+ ),
+ spec_verify_ct=(
+ [output.spec_verify_ct[i]] if len(output.spec_verify_ct) > i else None
+ ),
+ input_token_logprobs_val=(
+ [output.input_token_logprobs_val[i]]
+ if output.input_token_logprobs_val
+ else None
+ ),
+ input_token_logprobs_idx=(
+ [output.input_token_logprobs_idx[i]]
+ if output.input_token_logprobs_idx
+ else None
+ ),
+ output_token_logprobs_val=(
+ [output.output_token_logprobs_val[i]]
+ if output.output_token_logprobs_val
+ else None
+ ),
+ output_token_logprobs_idx=(
+ [output.output_token_logprobs_idx[i]]
+ if output.output_token_logprobs_idx
+ else None
+ ),
+ input_top_logprobs_val=(
+ [output.input_top_logprobs_val[i]]
+ if output.input_top_logprobs_val
+ else None
+ ),
+ input_top_logprobs_idx=(
+ [output.input_top_logprobs_idx[i]]
+ if output.input_top_logprobs_idx
+ else None
+ ),
+ output_top_logprobs_val=(
+ [output.output_top_logprobs_val[i]]
+ if output.output_top_logprobs_val
+ else None
+ ),
+ output_top_logprobs_idx=(
+ [output.output_top_logprobs_idx[i]]
+ if output.output_top_logprobs_idx
+ else None
+ ),
+ input_token_ids_logprobs_val=(
+ [output.input_token_ids_logprobs_val[i]]
+ if output.input_token_ids_logprobs_val
+ else None
+ ),
+ input_token_ids_logprobs_idx=(
+ [output.input_token_ids_logprobs_idx[i]]
+ if output.input_token_ids_logprobs_idx
+ else None
+ ),
+ output_token_ids_logprobs_val=(
+ [output.output_token_ids_logprobs_val[i]]
+ if output.output_token_ids_logprobs_val
+ else None
+ ),
+ output_token_ids_logprobs_idx=(
+ [output.output_token_ids_logprobs_idx[i]]
+ if output.output_token_ids_logprobs_idx
+ else None
+ ),
+ output_hidden_states=(
+ [output.output_hidden_states[i]]
+ if output.output_hidden_states
+ else None
+ ),
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
+ )
+ elif isinstance(output, BatchMultimodalOut):
+ new_output = BatchMultimodalOut(
+ rids=[output.rids[i]],
+ finished_reasons=(
+ [output.finished_reasons[i]]
+ if len(output.finished_reasons) > i
+ else None
+ ),
+ outputs=([output.outputs[i]] if len(output.outputs) > i else None),
+ prompt_tokens=(
+ [output.prompt_tokens[i]] if len(output.prompt_tokens) > i else None
+ ),
+ completion_tokens=(
+ [output.completion_tokens[i]]
+ if len(output.completion_tokens) > i
+ else None
+ ),
+ cached_tokens=(
+ [output.cached_tokens[i]] if len(output.cached_tokens) > i else None
+ ),
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
+ )
+ else:
+ new_output = output
+ return new_output
+
+
+class MultiHttpWorkerDetokenizerMixin:
+ """Mixin class for MultiTokenizerManager and DetokenizerManager"""
+
+ def get_worker_ids_from_req_rids(self, rids):
+ if isinstance(rids, list):
+ worker_ids = [int(rid.split("_")[0]) for rid in rids]
+ elif isinstance(rids, str):
+ worker_ids = [int(rids.split("_")[0])]
+ else:
+ worker_ids = []
+ return worker_ids
+
+ def multi_http_worker_event_loop(self):
+ """The event loop that handles requests, for multi multi-http-worker mode"""
+ self.socket_mapping = SocketMapping()
+ while True:
+ recv_obj = self.recv_from_scheduler.recv_pyobj()
+ output = self._request_dispatcher(recv_obj)
+ if output is None:
+ continue
+ # Extract worker_id from rid
+ if isinstance(recv_obj.rids, list):
+ worker_ids = self.get_worker_ids_from_req_rids(recv_obj.rids)
+ else:
+ raise RuntimeError(
+ f"for tokenizer_worker_num > 1, recv_obj.rids must be a list"
+ )
+
+ # Send data using the corresponding socket
+ for i, worker_id in enumerate(worker_ids):
+ if isinstance(recv_obj, MultiTokenizerRegisterReq):
+ self.socket_mapping.register_ipc_mapping(
+ recv_obj, worker_id, is_tokenizer=False
+ )
+ else:
+ new_output = _handle_output_by_index(output, i)
+ self.socket_mapping.send_output(worker_id, new_output)
+
+
+class MultiTokenizerRouter:
+ """A router to receive requests from MultiTokenizerManager"""
+
+ def __init__(
+ self,
+ server_args: ServerArgs,
+ port_args: PortArgs,
+ ):
+ self.server_args = server_args
+ context = zmq.asyncio.Context(3)
+ self.recv_from_detokenizer = get_zmq_socket(
+ context, zmq.PULL, port_args.tokenizer_ipc_name, True
+ )
+ self.send_to_scheduler = get_zmq_socket(
+ context, zmq.PUSH, port_args.scheduler_input_ipc_name, True
+ )
+ self.receive_from_worker = get_zmq_socket(
+ context, zmq.PULL, port_args.tokenizer_worker_ipc_name, True
+ )
+ self._loop = asyncio.new_event_loop()
+ self._thread = threading.Thread(target=self._run_loop, daemon=True)
+ self._thread.start()
+ self._task = asyncio.run_coroutine_threadsafe(
+ self.router_worker_obj(), self._loop
+ )
+ # Start handle_loop simultaneously
+ self._handle_task = asyncio.run_coroutine_threadsafe(
+ print_exception_wrapper(self.handle_loop), self._loop
+ )
+ self.disaggregation_bootstrap_server = start_disagg_service(self.server_args)
+
+ def _run_loop(self):
+ self._loop.run_forever()
+
+ async def router_worker_obj(self):
+ while True:
+ recv_obj = await self.receive_from_worker.recv_pyobj()
+ await self.send_to_scheduler.send_pyobj(recv_obj)
+
+ async def handle_loop(self):
+ # special reqs will recv from scheduler, need to route to right worker
+ self.socket_mapping = SocketMapping()
+ while True:
+ recv_obj = await self.recv_from_detokenizer.recv_pyobj()
+ await self._distribute_result_to_workers(recv_obj)
+
+ async def _distribute_result_to_workers(self, recv_obj):
+ """Distribute result to corresponding workers based on rid"""
+ if isinstance(recv_obj, MultiTokenizerWrapper):
+ worker_ids = [recv_obj.worker_id]
+ recv_obj = recv_obj.obj
+ else:
+ worker_ids = self.get_worker_ids_from_req_rids(recv_obj.rids)
+
+ if len(worker_ids) == 0:
+ logger.error(f"Cannot find worker_id from rids {recv_obj.rids}")
+ return
+
+ # Distribute result to each worker
+ for i, worker_id in enumerate(worker_ids):
+ if isinstance(recv_obj, MultiTokenizerRegisterReq):
+ self.socket_mapping.register_ipc_mapping(
+ recv_obj, worker_id, is_tokenizer=True
+ )
+ else:
+ new_recv_obj = _handle_output_by_index(recv_obj, i)
+ self.socket_mapping.send_output(worker_id, new_recv_obj)
+
+
+class MultiTokenizerManager(TokenizerManager):
+ """Multi Process Tokenizer Manager that tokenizes the text."""
+
+ def __init__(
+ self,
+ server_args: ServerArgs,
+ port_args: PortArgs,
+ ):
+ setproctitle.setproctitle(f"sglang::tokenizer_worker:{os.getpid()}")
+ # prevent init prefill bootstrapserver again
+ disaggregation_mode = server_args.disaggregation_mode
+ server_args.disaggregation_mode = "null"
+ super().__init__(server_args, port_args)
+
+ self.worker_id = os.getpid()
+ self.tokenizer_ipc_name = port_args.tokenizer_ipc_name
+
+ # For PD disaggregtion
+ self.server_args.disaggregation_mode = disaggregation_mode
+ self.disaggregation_mode = DisaggregationMode(
+ self.server_args.disaggregation_mode
+ )
+ self.disaggregation_transfer_backend = TransferBackend(
+ self.server_args.disaggregation_transfer_backend
+ )
+ # Communicator
+ self.register_multi_tokenizer_communicator = _Communicator(
+ self.send_to_scheduler, 2
+ )
+ self._result_dispatcher._mapping.append(
+ (
+ MultiTokenizerRegisterReq,
+ self.register_multi_tokenizer_communicator.handle_recv,
+ )
+ )
+
+ async def register_to_main_tokenizer_manager(self):
+ """Register this worker to the main TokenizerManager"""
+ # create a handle loop to receive messages from the main TokenizerManager
+ self.auto_create_handle_loop()
+ req = MultiTokenizerRegisterReq(rids=[f"{self.worker_id}_register"])
+ req.ipc_name = self.tokenizer_ipc_name
+ _Communicator.enable_multi_tokenizer = True
+ await self.register_multi_tokenizer_communicator(req)
+
+
+async def print_exception_wrapper(func):
+ """
+ Sometimes an asyncio function does not print exception.
+ We do another wrapper to handle the exception.
+ """
+ try:
+ await func()
+ except Exception:
+ traceback = get_exception_traceback()
+ logger.error(f"MultiTokenizerRouter hit an exception: {traceback}")
+ if hasattr(func, "__self__") and isinstance(
+ func.__self__, MultiTokenizerRouter
+ ):
+ func.__self__.dump_requests_before_crash()
+ kill_process_tree(os.getpid(), include_parent=True)
+ sys.exit(1)
+
+
+def get_main_process_id() -> int:
+ """Get the main process ID"""
+ return multiprocessing.current_process()._parent_pid
+
+
+def write_to_shared_memory(obj, name: str) -> shared_memory.SharedMemory:
+ """Write data to shared memory"""
+ serialized = pickle.dumps(obj)
+ size = len(serialized)
+ try:
+ # Try to open existing shared memory
+ shm = shared_memory.SharedMemory(name=name)
+ # If size is insufficient, close and recreate
+ if shm.size < size:
+ shm.close()
+ shm.unlink()
+ shm = shared_memory.SharedMemory(create=True, size=size, name=name)
+ except FileNotFoundError:
+ # If not present, create new shared memory
+ shm = shared_memory.SharedMemory(create=True, size=size, name=name)
+
+ shm.buf[:size] = serialized
+ return shm
+
+
+def read_from_shared_memory(name: str) -> Any:
+ """Read data from shared memory"""
+ try:
+ shm = shared_memory.SharedMemory(name=name)
+ data = pickle.loads(bytes(shm.buf))
+ shm.close()
+ return data
+ except FileNotFoundError:
+ raise FileNotFoundError(f"Shared memory {name} not found")
+
+
+def write_data_for_multi_tokenizer(
+ port_args: PortArgs, server_args: ServerArgs, scheduler_info: Dict
+):
+ """Write args information to share memory for multi-tokenizer"""
+ # get main process ID
+ main_pid = get_main_process_id()
+ current_pid = os.getpid()
+ logger.info(f"main process ID: {main_pid}, current process ID: {current_pid}")
+ args = (port_args, server_args, scheduler_info)
+ args_shm = write_to_shared_memory(args, f"multi_tokenizer_args_{current_pid}")
+ args_shm.close()
+
+ return args_shm
+
+
+def monkey_patch_uvicorn_multiprocessing(timeout: float = 10):
+ """Monkey patch uvicorn multiprocessing is_alive timeout"""
+ # from default 5s -> 10s
+ try:
+ from uvicorn.supervisors.multiprocess import Process
+
+ Process.is_alive = partialmethod(Process.is_alive, timeout=timeout)
+
+ except ImportError:
+ logger.warning(
+ "uvicorn.supervisors.multiprocess not found, skipping monkey patch"
+ )
diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index 5b45154db4a..dd0676ddf41 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -38,7 +38,7 @@
from enum import Enum, auto
from http import HTTPStatus
from itertools import chain
-from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
import numpy as np
import torch
@@ -52,7 +52,6 @@
ScheduleBatchDisaggregationDecodeMixin,
)
from sglang.srt.distributed.parallel_state import get_tensor_model_parallel_rank
-from sglang.srt.layers.moe import is_tbo_enabled
from sglang.srt.mem_cache.allocator import (
BaseTokenToKVPoolAllocator,
SWATokenToKVPoolAllocator,
@@ -60,7 +59,7 @@
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.chunk_cache import ChunkCache, SWAChunkCache
from sglang.srt.mem_cache.lora_radix_cache import LoRAKey, LoRARadixCache
-from sglang.srt.mem_cache.memory_pool import ReqToTokenPool
+from sglang.srt.mem_cache.memory_pool import HybridReqToTokenPool, ReqToTokenPool
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
from sglang.srt.metrics.collector import TimeStats
from sglang.srt.model_executor.forward_batch_info import CaptureHiddenMode, ForwardMode
@@ -70,7 +69,10 @@
from sglang.srt.utils import flatten_nested_list, support_triton
if TYPE_CHECKING:
+ from hip_attn.v1_2 import HiPAttentionConfig, HiPMaskRefreshState
+
from sglang.srt.configs.model_config import ModelConfig
+ from sglang.srt.server_args import ServerArgs
from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
@@ -87,6 +89,7 @@
"disable_flashinfer_cutlass_moe_fp4_allgather",
"disable_radix_cache",
"enable_dp_lm_head",
+ "flashinfer_mxfp4_moe_precision",
"enable_flashinfer_allreduce_fusion",
"moe_dense_tp_size",
"ep_dispatch_algorithm",
@@ -98,6 +101,7 @@
"sampling_backend",
"speculative_accept_threshold_single",
"speculative_accept_threshold_acc",
+ "speculative_attention_mode",
"torchao_config",
"triton_attention_reduce_in_fp32",
"num_reserved_decode_tokens",
@@ -106,6 +110,7 @@
"enable_symm_mem",
"quantization",
"enable_custom_logit_processor",
+ "disaggregation_mode",
]
# Put some global args for easy access
@@ -271,6 +276,11 @@ def is_image(self):
def is_video(self):
return self.modality == Modality.VIDEO
+ def is_audio(self):
+ return (
+ self.is_modality(Modality.AUDIO)
+ ) and not MultimodalDataItem.is_empty_list(self.audio_features)
+
def is_valid(self) -> bool:
return self.is_image() or self.is_video() or self.is_audio()
@@ -902,6 +912,10 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
spec_algorithm: SpeculativeAlgorithm = None
spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]] = None
+ # For HiP Attention
+ hip_mask_refresh_state: Optional[HiPMaskRefreshState] = None
+ hip_metadata_cached_stages: Optional[int] = None
+
# Whether to return hidden states
return_hidden_states: bool = False
@@ -909,7 +923,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
is_prefill_only: bool = False
# hicache pointer for synchronizing data loading from CPU to GPU
- hicache_consumer_index: int = 0
+ hicache_consumer_index: int = -1
@classmethod
def init_new(
@@ -921,8 +935,16 @@ def init_new(
model_config: ModelConfig,
enable_overlap: bool,
spec_algorithm: SpeculativeAlgorithm,
+ hip_attention_config: Optional[HiPAttentionConfig] = None,
chunked_req: Optional[Req] = None,
):
+ hip_mask_refresh_state = None
+ if hip_attention_config is not None:
+ from hip_attn.v1_2 import HiPMaskRefreshState
+
+ # For keeping track of HiP attention mask refresh cycles
+ hip_mask_refresh_state = HiPMaskRefreshState(hip_attention_config)
+
return_logprob = any(req.return_logprob for req in reqs)
is_hybrid = False
@@ -948,6 +970,7 @@ def init_new(
device=req_to_token_pool.device,
spec_algorithm=spec_algorithm,
return_hidden_states=any(req.return_hidden_states for req in reqs),
+ hip_mask_refresh_state=hip_mask_refresh_state,
is_prefill_only=all(
req.sampling_params.max_new_tokens == 0 for req in reqs
),
@@ -960,8 +983,11 @@ def batch_size(self):
def is_empty(self):
return len(self.reqs) == 0
- def alloc_req_slots(self, num_reqs: int):
- req_pool_indices = self.req_to_token_pool.alloc(num_reqs)
+ def alloc_req_slots(self, num_reqs: int, reqs: Optional[List[Req]] = None):
+ if isinstance(self.req_to_token_pool, HybridReqToTokenPool):
+ req_pool_indices = self.req_to_token_pool.alloc(num_reqs, reqs)
+ else:
+ req_pool_indices = self.req_to_token_pool.alloc(num_reqs)
if req_pool_indices is None:
raise RuntimeError(
"alloc_req_slots runs out of memory. "
@@ -1136,7 +1162,7 @@ def prepare_for_extend(self):
# Allocate req slots
bs = len(self.reqs)
- req_pool_indices = self.alloc_req_slots(bs)
+ req_pool_indices = self.alloc_req_slots(bs, self.reqs)
# Init tensors
reqs = self.reqs
@@ -1370,21 +1396,28 @@ def mix_with_running(self, running_batch: "ScheduleBatch"):
# TODO (lianmin): Revisit this. It should be seq_len - 1
self.extend_logprob_start_lens.extend([0] * running_bs)
- def new_page_count_next_decode(self):
+ def new_page_count_next_decode(self, selected_indices: Optional[List[int]] = None):
page_size = self.token_to_kv_pool_allocator.page_size
+ requests = (
+ self.reqs
+ if selected_indices is None
+ else [self.reqs[i] for i in selected_indices]
+ )
if page_size == 1:
- return len(self.reqs)
+ return len(requests)
# In the decoding phase, the length of a request's KV cache should be
# the total length of the request minus 1
return (
- sum(1 for req in self.reqs if req.seqlen % page_size == 0)
+ sum(1 for req in requests if req.seqlen % page_size == 0)
if self.enable_overlap
- else sum(1 for req in self.reqs if (req.seqlen - 1) % page_size == 0)
+ else sum(1 for req in requests if (req.seqlen - 1) % page_size == 0)
)
- def check_decode_mem(self, buf_multiplier=1):
+ def check_decode_mem(
+ self, buf_multiplier=1, selected_indices: Optional[List[int]] = None
+ ):
num_tokens = (
- self.new_page_count_next_decode()
+ self.new_page_count_next_decode(selected_indices)
* buf_multiplier
* self.token_to_kv_pool_allocator.page_size
)
@@ -1410,34 +1443,11 @@ def retract_decode(self, server_args: ServerArgs):
reverse=True,
)
- def get_required_tokens(num_reqs: int):
- headroom_for_spec_decode = 0
- if server_args.speculative_algorithm:
- headroom_for_spec_decode += (
- num_reqs
- * server_args.speculative_eagle_topk
- * server_args.speculative_num_steps
- + num_reqs * server_args.speculative_num_draft_tokens
- )
- return (
- num_reqs * global_config.retract_decode_steps + headroom_for_spec_decode
- )
-
- def _get_available_size():
- if self.is_hybrid:
- return min(
- self.token_to_kv_pool_allocator.full_available_size(),
- self.token_to_kv_pool_allocator.swa_available_size(),
- )
- else:
- return self.token_to_kv_pool_allocator.available_size()
-
retracted_reqs = []
seq_lens_cpu = self.seq_lens.cpu().numpy()
first_iter = True
- while (
- _get_available_size() < get_required_tokens(len(sorted_indices))
- or first_iter
+ while first_iter or (
+ not self.check_decode_mem(selected_indices=sorted_indices)
):
if len(sorted_indices) == 1:
# Corner case: only one request left
@@ -1491,10 +1501,6 @@ def _get_available_size():
else:
self.tree_cache.dec_lock_ref(req.last_node)
- # NOTE(lsyin): we should use the newly evictable memory instantly.
- num_tokens = len(sorted_indices) * global_config.retract_decode_steps
- self._evict_tree_cache_if_needed(num_tokens)
-
req.reset_for_retract()
if len(retracted_reqs) == 0:
@@ -1538,7 +1544,7 @@ def prepare_for_decode(self):
self.forward_mode = ForwardMode.DECODE
bs = len(self.reqs)
- if self.spec_algorithm.is_eagle():
+ if self.spec_algorithm.is_eagle() or self.spec_algorithm.is_standalone():
# if spec decoding is used, the decode batch is prepared inside
# `forward_batch_speculative_generation` after running draft models.
return
@@ -1608,6 +1614,9 @@ def prepare_for_decode(self):
(self.req_pool_indices, locs), self.out_cache_loc.to(torch.int32)
)
+ if self.hip_mask_refresh_state is not None:
+ self.hip_metadata_cached_stages = self.hip_mask_refresh_state.update()
+
def filter_batch(
self,
chunked_req_to_exclude: Optional[Union[Req, List[Req]]] = None,
@@ -1777,6 +1786,7 @@ def get_model_worker_batch(
)
),
extend_input_logprob_token_ids=self.extend_input_logprob_token_ids,
+ hip_metadata_cached_stages=self.hip_metadata_cached_stages,
launch_done=self.launch_done,
)
@@ -1915,7 +1925,10 @@ class ModelWorkerBatch:
spec_info: Optional[Union[EagleVerifyInput, EagleDraftInput]] = None
# If set, the output of the batch contains the hidden states of the run.
capture_hidden_mode: CaptureHiddenMode = None
- hicache_consumer_index: int = 0
+ hicache_consumer_index: int = -1
+
+ # Use cached mask for HiP Attention
+ hip_metadata_cached_stages: Optional[int] = None
# Overlap event
launch_done: Optional[threading.Event] = None
diff --git a/python/sglang/srt/managers/schedule_policy.py b/python/sglang/srt/managers/schedule_policy.py
index 4665207c1a4..0a3723e0be6 100644
--- a/python/sglang/srt/managers/schedule_policy.py
+++ b/python/sglang/srt/managers/schedule_policy.py
@@ -380,8 +380,9 @@ def _update_prefill_budget(
self.log_input_tokens += extend_input_len
def add_chunked_req(self, req: Req):
- truncated = req.extend_input_len > self.rem_chunk_tokens
- req.extend_input_len = min(req.extend_input_len, self.rem_chunk_tokens)
+ _rem_tokens = min(self.rem_chunk_tokens, int(self.rem_total_tokens))
+ truncated = req.extend_input_len > _rem_tokens
+ req.extend_input_len = min(req.extend_input_len, _rem_tokens)
req.fill_ids = req.fill_ids[: len(req.prefix_indices) + req.extend_input_len]
self.can_run_list.append(req)
self._update_prefill_budget(
@@ -549,7 +550,7 @@ def add_one_req(self, req: Req, has_chunked_req: bool):
)
else:
# Make sure at least one page is available
- trunc_len = self.rem_chunk_tokens - self.page_size + 1
+ trunc_len = self.rem_chunk_tokens // self.page_size * self.page_size
if trunc_len <= 0:
return AddReqResult.OTHER
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index 05878fe4eed..e6d8d40b94d 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -67,11 +67,16 @@
from sglang.srt.layers.moe import initialize_moe_config
from sglang.srt.managers.io_struct import (
AbortReq,
+ BatchTokenizedEmbeddingReqInput,
+ BatchTokenizedGenerateReqInput,
+ ClearHiCacheReqInput,
+ ClearHiCacheReqOutput,
CloseSessionReqInput,
ExpertDistributionReq,
ExpertDistributionReqOutput,
FlushCacheReqInput,
FlushCacheReqOutput,
+ FreezeGCReq,
GetInternalStateReq,
GetInternalStateReqOutput,
GetWeightsByNameReqInput,
@@ -79,6 +84,8 @@
InitWeightsUpdateGroupReqInput,
LoadLoRAAdapterReqInput,
LoadLoRAAdapterReqOutput,
+ MultiTokenizerRegisterReq,
+ MultiTokenizerWrapper,
OpenSessionReqInput,
OpenSessionReqOutput,
ProfileReq,
@@ -134,7 +141,7 @@
from sglang.srt.mem_cache.radix_cache import RadixCache
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
-from sglang.srt.reasoning_parser import ReasoningParser
+from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
@@ -145,11 +152,13 @@
configure_gc_logger,
configure_logger,
disable_request_logging,
+ freeze_gc,
get_available_gpu_memory,
get_bool_env_var,
get_zmq_socket,
is_cpu,
kill_itself_when_parent_died,
+ numa_bind_to_node,
point_to_point_pyobj,
pyspy_dump_schedulers,
require_mlp_sync,
@@ -251,7 +260,6 @@ def __init__(
# Init inter-process communication
context = zmq.Context(2)
self.idle_sleeper = None
-
if self.pp_rank == 0 and self.attn_tp_rank == 0:
self.recv_from_tokenizer = get_zmq_socket(
context, zmq.PULL, port_args.scheduler_input_ipc_name, False
@@ -341,6 +349,18 @@ def __init__(
target_worker=self.tp_worker,
dp_rank=dp_rank,
)
+ elif self.spec_algorithm.is_standalone():
+ from sglang.srt.speculative.standalone_worker import StandaloneWorker
+
+ self.draft_worker = StandaloneWorker(
+ gpu_id=gpu_id,
+ tp_rank=tp_rank,
+ moe_ep_rank=moe_ep_rank,
+ server_args=server_args,
+ nccl_port=port_args.nccl_port,
+ target_worker=self.tp_worker,
+ dp_rank=dp_rank,
+ )
else:
self.draft_worker = None
@@ -394,7 +414,7 @@ def __init__(
f"max_prefill_tokens={self.max_prefill_tokens}, "
f"max_running_requests={self.max_running_requests}, "
f"context_len={self.model_config.context_len}, "
- f"available_gpu_mem={avail_mem:.2f} GB"
+ f"{'available_cpu_mem' if self.device == 'cpu' else 'available_gpu_mem'}={avail_mem:.2f} GB"
)
# Init memory pool and cache
@@ -481,7 +501,7 @@ def __init__(
enable=server_args.enable_memory_saver
)
self.offload_tags = set()
- self.init_profier()
+ self.init_profiler()
self.recv_skipper = SchedulerRecvSkipper.maybe_create(server_args)
self.input_blocker = (
@@ -493,6 +513,7 @@ def __init__(
# Init metrics stats
self.init_metrics(tp_rank, pp_rank, dp_rank)
self.init_kv_events(server_args.kv_events_config)
+ self.init_dp_balance(dp_balance_meta)
# Init disaggregation
self.disaggregation_mode = DisaggregationMode(
@@ -508,7 +529,10 @@ def __init__(
[
(TokenizedGenerateReqInput, self.handle_generate_request),
(TokenizedEmbeddingReqInput, self.handle_embedding_request),
+ (BatchTokenizedGenerateReqInput, self.handle_batch_generate_request),
+ (BatchTokenizedEmbeddingReqInput, self.handle_batch_embedding_request),
(FlushCacheReqInput, self.flush_cache_wrapped),
+ (ClearHiCacheReqInput, self.clear_hicache_storage_wrapped),
(AbortReq, self.abort_request),
(OpenSessionReqInput, self.open_session),
(CloseSessionReqInput, self.close_session),
@@ -524,24 +548,17 @@ def __init__(
(ResumeMemoryOccupationReqInput, self.resume_memory_occupation),
(SlowDownReqInput, self.slow_down),
(ProfileReq, self.profile),
+ (FreezeGCReq, self.handle_freeze_gc),
(GetInternalStateReq, self.get_internal_state),
(SetInternalStateReq, self.set_internal_state),
(RpcReqInput, self.handle_rpc_request),
(ExpertDistributionReq, self.expert_distribution_handle),
(LoadLoRAAdapterReqInput, self.load_lora_adapter),
(UnloadLoRAAdapterReqInput, self.unload_lora_adapter),
+ (MultiTokenizerRegisterReq, self.register_multi_tokenizer),
]
)
- self.balance_meta = dp_balance_meta
- if (
- server_args.enable_dp_attention
- and server_args.load_balance_method == "minimum_tokens"
- ):
- assert dp_balance_meta is not None
-
- self.recv_dp_balance_id_this_term = []
-
def init_tokenizer(self):
server_args = self.server_args
self.is_generation = self.model_config.is_generation
@@ -618,8 +635,11 @@ def init_memory_pool_and_cache(self):
hicache_write_policy=server_args.hicache_write_policy,
hicache_io_backend=server_args.hicache_io_backend,
hicache_mem_layout=server_args.hicache_mem_layout,
+ enable_metrics=self.enable_metrics,
hicache_storage_backend=server_args.hicache_storage_backend,
hicache_storage_prefetch_policy=server_args.hicache_storage_prefetch_policy,
+ model_name=server_args.served_model_name,
+ storage_backend_extra_config=server_args.hicache_storage_backend_extra_config,
)
self.tp_worker.register_hicache_layer_transfer_counter(
self.tree_cache.cache_controller.layer_done_counter
@@ -648,6 +668,21 @@ def init_memory_pool_and_cache(self):
page_size=self.page_size,
disable=server_args.disable_radix_cache,
)
+ elif server_args.enable_lmcache:
+ from sglang.srt.mem_cache.storage.lmcache.lmc_radix_cache import (
+ LMCRadixCache,
+ )
+
+ self.tree_cache = LMCRadixCache(
+ req_to_token_pool=self.req_to_token_pool,
+ token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
+ page_size=self.page_size,
+ disable=server_args.disable_radix_cache,
+ model_config=self.model_config,
+ tp_size=self.tp_size,
+ rank=self.tp_rank,
+ tp_group=self.tp_group,
+ )
else:
self.tree_cache = RadixCache(
req_to_token_pool=self.req_to_token_pool,
@@ -1015,14 +1050,26 @@ def recv_requests(self) -> List[Req]:
req
for req in recv_reqs
if isinstance(
- req, (TokenizedGenerateReqInput, TokenizedEmbeddingReqInput)
+ req,
+ (
+ TokenizedGenerateReqInput,
+ TokenizedEmbeddingReqInput,
+ BatchTokenizedGenerateReqInput,
+ BatchTokenizedEmbeddingReqInput,
+ ),
)
]
control_reqs = [
req
for req in recv_reqs
if not isinstance(
- req, (TokenizedGenerateReqInput, TokenizedEmbeddingReqInput)
+ req,
+ (
+ TokenizedGenerateReqInput,
+ TokenizedEmbeddingReqInput,
+ BatchTokenizedGenerateReqInput,
+ BatchTokenizedEmbeddingReqInput,
+ ),
)
]
else:
@@ -1077,6 +1124,17 @@ def process_input_requests(self, recv_reqs: List):
)
self.send_to_tokenizer.send_pyobj(abort_req)
continue
+
+ # If it is a MultiTokenizerWrapper, unwrap it and handle the inner request.
+ if isinstance(recv_req, MultiTokenizerWrapper):
+ worker_id = recv_req.worker_id
+ recv_req = recv_req.obj
+ output = self._request_dispatcher(recv_req)
+ if output is not None:
+ output = MultiTokenizerWrapper(worker_id, output)
+ self.send_to_tokenizer.send_pyobj(output)
+ continue
+
output = self._request_dispatcher(recv_req)
if output is not None:
if isinstance(output, RpcReqOutput):
@@ -1089,11 +1147,7 @@ def handle_generate_request(
self,
recv_req: TokenizedGenerateReqInput,
):
- if (
- self.server_args.enable_dp_attention
- and self.server_args.load_balance_method == "minimum_tokens"
- ):
- self.recv_dp_balance_id_this_term.append(recv_req.dp_balance_id)
+ self.maybe_update_dp_balance_data(recv_req)
# Create a new request
if (
@@ -1141,7 +1195,7 @@ def handle_generate_request(
f"boostrap room id. {req.rid=}"
)
logger.error(error_msg)
- prepare_abort(req, error_msg)
+ prepare_abort(req, error_msg, status_code=HTTPStatus.BAD_REQUEST)
self.stream_output([req], req.return_logprob)
return
@@ -1250,6 +1304,17 @@ def handle_generate_request(
else:
self._add_request_to_queue(req)
+ def handle_batch_generate_request(
+ self,
+ recv_req: BatchTokenizedGenerateReqInput,
+ ):
+ """Handle optimized batch generate request."""
+ logger.debug(f"Processing batch generate request with {len(recv_req)} requests")
+
+ # Process each request in the batch
+ for tokenized_req in recv_req:
+ self.handle_generate_request(tokenized_req)
+
def _add_request_to_queue(self, req: Req):
req.queue_time_start = time.perf_counter()
if self.disaggregation_mode == DisaggregationMode.PREFILL:
@@ -1266,10 +1331,11 @@ def _add_request_to_queue(self, req: Req):
def _prefetch_kvcache(self, req: Req):
if self.enable_hicache_storage:
req.init_next_round_input(self.tree_cache)
- last_hash = req.last_host_node.get_last_hash_value()
- matched_len = len(req.prefix_indices) + req.host_hit_length
- # todo, free-form fetching, calculating hash keys on the fly
- if (matched_len > 0 and last_hash is not None) or matched_len == 0:
+ if req.last_node.backuped:
+ # only to initiate the prefetch if the last node is backuped
+ # otherwise, the allocated GPU memory must be locked for integrity
+ last_hash = req.last_host_node.get_last_hash_value()
+ matched_len = len(req.prefix_indices) + req.host_hit_length
new_input_tokens = req.fill_ids[matched_len:]
self.tree_cache.prefetch_from_storage(
req.rid, req.last_host_node, new_input_tokens, last_hash
@@ -1332,6 +1398,19 @@ def handle_embedding_request(
req.logprob_start_len = len(req.origin_input_ids) - 1
self._add_request_to_queue(req)
+ def handle_batch_embedding_request(
+ self,
+ recv_req: BatchTokenizedEmbeddingReqInput,
+ ):
+ """Handle optimized batch embedding request."""
+ logger.debug(
+ f"Processing batch embedding request with {len(recv_req)} requests"
+ )
+
+ # Process each request in the batch
+ for tokenized_req in recv_req:
+ self.handle_embedding_request(tokenized_req)
+
def self_check_during_idle(self):
self.check_memory()
self.check_tree_cache()
@@ -1359,9 +1438,11 @@ def check_memory(self):
_, _, available_size, evictable_size = self._get_token_info()
protected_size = self.tree_cache.protected_size()
memory_leak = (available_size + evictable_size) != (
+ # self.max_total_num_tokens
+ # if not self.enable_hierarchical_cache
+ # else self.max_total_num_tokens - protected_size
self.max_total_num_tokens
- if not self.enable_hierarchical_cache
- else self.max_total_num_tokens - protected_size
+ - protected_size
)
token_msg = f"{self.max_total_num_tokens=}, {available_size=}, {evictable_size=}, {protected_size=}\n"
@@ -1457,9 +1538,14 @@ def get_next_batch_to_run(self) -> Optional[ScheduleBatch]:
# Move the chunked request out of the batch so that we can merge
# only finished requests to running_batch.
chunked_req_to_exclude.add(self.chunked_req)
- self.tree_cache.cache_unfinished_req(self.chunked_req)
+ self.tree_cache.cache_unfinished_req(self.chunked_req, chunked=True)
# chunked request keeps its rid but will get a new req_pool_idx
- self.req_to_token_pool.free(self.chunked_req.req_pool_idx)
+ if self.tp_worker.worker.model_runner.is_hybrid_gdn:
+ self.req_to_token_pool.free(
+ self.chunked_req.req_pool_idx, free_mamba_cache=False
+ )
+ else:
+ self.req_to_token_pool.free(self.chunked_req.req_pool_idx)
if self.last_batch and self.last_batch.forward_mode.is_extend():
if self.last_batch.chunked_req is not None:
# In the context pipeline parallelism, after the last chunk, the current microbatch still track outdated chunked_req.
@@ -1506,11 +1592,7 @@ def get_next_batch_to_run(self) -> Optional[ScheduleBatch]:
# Handle DP attention
if need_dp_attn_preparation:
- if (
- self.server_args.load_balance_method == "minimum_tokens"
- and self.forward_ct % 40 == 0
- ):
- self.handle_dp_balance_data(ret)
+ self.maybe_handle_dp_balance_data()
ret = self.prepare_mlp_sync_batch(ret)
return ret
@@ -1643,6 +1725,7 @@ def get_new_batch_prefill(self) -> Optional[ScheduleBatch]:
self.model_config,
self.enable_overlap,
self.spec_algorithm,
+ self.server_args.hip_attention_config,
chunked_req=self.chunked_req,
)
if self.enable_hierarchical_cache:
@@ -1730,10 +1813,6 @@ def run_batch(
if self.spec_algorithm.is_none():
model_worker_batch = batch.get_model_worker_batch()
- # update the consumer index of hicache to the running batch
- self.tp_worker.set_hicache_consumer(
- model_worker_batch.hicache_consumer_index
- )
if self.pp_group.is_last_rank:
logits_output, next_token_ids, can_run_cuda_graph = (
self.tp_worker.forward_batch_generation(model_worker_batch)
@@ -1835,86 +1914,6 @@ def prepare_mlp_sync_batch(self, local_batch: ScheduleBatch):
disable_overlap_schedule=self.server_args.disable_overlap_schedule,
)
- def handle_dp_balance_data(self, local_batch: ScheduleBatch):
- def gather_dp_balance_info(holding_tokens_list) -> Union[None, List[List[int]]]:
- """gather recv_dp_balance_id_this_term and holding tokens per worker for dp balance"""
- recv_list = self.recv_dp_balance_id_this_term
- assert len(recv_list) <= 511, (
- "The number of requests received this round is too large. "
- "Please increase gather_tensor_size and onfly_info_size."
- )
- # The maximum size of the tensor used for gathering data from all workers.
- gather_tensor_size = 512
-
- # recv_tensor: | holding_tokens | len(recv_dp_balance_id) | recv_dp_balance_ids
- recv_tensor = torch.zeros(gather_tensor_size, dtype=torch.int32)
- recv_tensor[0] = holding_tokens_list
- recv_tensor[1] = len(
- recv_list
- ) # The first element is the length of the list.
- recv_tensor[2 : len(recv_list) + 2] = torch.tensor(
- recv_list, dtype=torch.int32
- )
-
- if self.tp_rank == 0:
- gathered_list = [
- torch.zeros(gather_tensor_size, dtype=torch.int32)
- for _ in range(self.balance_meta.num_workers)
- ]
- else:
- gathered_list = None
-
- torch.distributed.gather(
- recv_tensor, gathered_list, group=self.tp_cpu_group
- )
-
- gathered_id_list_per_worker = None
- if self.tp_rank == 0:
- gathered_id_list_per_worker = []
- holding_tokens_list = []
- for tensor in gathered_list:
- holding_tokens_list.append(tensor[0].item())
- list_length = tensor[1].item()
- gathered_id_list_per_worker.append(
- tensor[2 : list_length + 2].tolist()
- )
-
- return gathered_id_list_per_worker, holding_tokens_list
-
- def write_shared_dp_balance_info(new_recv_rid_lists, local_tokens):
- meta = self.balance_meta
-
- with meta.mutex:
- onfly_list: List[Dict[int, int]] = meta.get_shared_onfly()
- assert len(new_recv_rid_lists) == len(
- onfly_list
- ), "num_worker not equal"
- # 1.Check if the rid received by each worker this round is present in onfly.
- # If it is, remove the corresponding onfly item.
- worker_id = 0
- for new_recv_rids, on_fly_reqs in zip(new_recv_rid_lists, onfly_list):
- for new_recv_rid in new_recv_rids:
- assert (
- new_recv_rid in on_fly_reqs
- ), f"{new_recv_rid=} not in {worker_id=} {on_fly_reqs=}, data consistency is wrong"
- del on_fly_reqs[new_recv_rid]
- worker_id += 1
- # 2. Atomically write local_tokens and onfly into shm under the mutex
- meta.set_shared_onfly_info(onfly_list)
- meta.set_shared_local_tokens(local_tokens)
-
- holding_tokens = self.get_load()
-
- new_recv_dp_balance_id_list, holding_token_list = gather_dp_balance_info(
- holding_tokens
- )
-
- self.recv_dp_balance_id_this_term.clear()
- if self.tp_rank == 0: # only first worker write info
- write_shared_dp_balance_info(
- new_recv_dp_balance_id_list, holding_token_list
- )
-
@staticmethod
def prepare_mlp_sync_batch_raw(
local_batch: ScheduleBatch,
@@ -2028,6 +2027,7 @@ def get_idle_batch(self):
self.model_config,
self.enable_overlap,
self.spec_algorithm,
+ self.server_args.hip_attention_config,
)
idle_batch.prepare_for_idle()
return idle_batch
@@ -2161,6 +2161,16 @@ def flush_cache_wrapped(self, recv_req: FlushCacheReqInput):
success = self.flush_cache()
return FlushCacheReqOutput(success=success)
+ def clear_hicache_storage_wrapped(self, recv_req: ClearHiCacheReqInput):
+ if self.enable_hierarchical_cache:
+ self.tree_cache.clear_storage_backend()
+ logger.info("Hierarchical cache cleared successfully!")
+ if_success = True
+ else:
+ logging.warning("Hierarchical cache is not enabled.")
+ if_success = False
+ return ClearHiCacheReqOutput(success=if_success)
+
def flush_cache(self):
"""Flush the memory pool and cache."""
if (
@@ -2245,10 +2255,9 @@ def get_internal_state(self, recv_req: GetInternalStateReq):
"token_capacity": int(self.max_total_num_tokens),
}
- if not _is_cpu:
- ret["memory_usage"]["cuda_graph"] = round(
- self.tp_worker.worker.model_runner.cuda_graph_mem_usage, 2
- )
+ ret["memory_usage"]["graph"] = round(
+ self.tp_worker.worker.model_runner.graph_mem_usage, 2
+ )
if not self.spec_algorithm.is_none() and self.cum_spec_accept_count > 0:
ret["avg_spec_accept_length"] = (
@@ -2331,7 +2340,14 @@ def abort_request(self, recv_req: AbortReq):
# This only works for requests that have not started anything.
# We still need to send something back to TokenizerManager to clean up the state.
req = self.waiting_queue.pop(i)
+ if self.enable_hicache_storage:
+ # to release prefetch events associated with the request
+ self.tree_cache.release_aborted_request(req.rid)
self.send_to_tokenizer.send_pyobj(AbortReq(req.rid))
+ # For disaggregation decode mode, the request in the waiting queue has KV cache allocated.
+ if self.disaggregation_mode == DisaggregationMode.DECODE:
+ self.tree_cache.cache_finished_req(req)
+
logger.debug(f"Abort queued request. {req.rid=}")
# Delete the requests in the grammar queue
@@ -2411,6 +2427,10 @@ def unload_lora_adapter(
result = self.tp_worker.unload_lora_adapter(recv_req)
return result
+ def register_multi_tokenizer(self, recv_req: MultiTokenizerRegisterReq):
+ self.send_to_detokenizer.send_pyobj(recv_req)
+ return recv_req
+
def slow_down(self, recv_req: SlowDownReqInput):
t = recv_req.forward_sleep_time
if t is not None and t <= 0:
@@ -2469,6 +2489,12 @@ def maybe_sleep_on_idle(self):
if self.idle_sleeper is not None:
self.idle_sleeper.maybe_sleep()
+ def handle_freeze_gc(self, recv_req: FreezeGCReq):
+ """Handle freeze_gc request: freeze scheduler's GC and forward to detokenizer."""
+ freeze_gc("Scheduler")
+ self.send_to_detokenizer.send_pyobj(recv_req)
+ return None
+
class IdleSleeper:
"""
@@ -2504,7 +2530,15 @@ def is_health_check_generate_req(recv_req):
def is_work_request(recv_req):
- return isinstance(recv_req, (TokenizedGenerateReqInput, TokenizedEmbeddingReqInput))
+ return isinstance(
+ recv_req,
+ (
+ TokenizedGenerateReqInput,
+ TokenizedEmbeddingReqInput,
+ BatchTokenizedGenerateReqInput,
+ BatchTokenizedEmbeddingReqInput,
+ ),
+ )
def run_scheduler_process(
@@ -2518,6 +2552,9 @@ def run_scheduler_process(
pipe_writer,
balance_meta: Optional[DPBalanceMeta] = None,
):
+ if (numa_node := server_args.numa_node) is not None:
+ numa_bind_to_node(numa_node[gpu_id])
+
# Generate the prefix
prefix = ""
if dp_rank is not None:
diff --git a/python/sglang/srt/managers/scheduler_metrics_mixin.py b/python/sglang/srt/managers/scheduler_metrics_mixin.py
index a6497ffde5c..3d8572e342f 100644
--- a/python/sglang/srt/managers/scheduler_metrics_mixin.py
+++ b/python/sglang/srt/managers/scheduler_metrics_mixin.py
@@ -1,15 +1,24 @@
+from __future__ import annotations
+
import logging
import time
from collections import defaultdict
-from typing import List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
+
+import torch
from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch
from sglang.srt.disaggregation.utils import DisaggregationMode
+from sglang.srt.managers.io_struct import TokenizedGenerateReqInput
from sglang.srt.managers.schedule_policy import PrefillAdder
from sglang.srt.managers.scheduler import Req, ScheduleBatch
+from sglang.srt.managers.utils import DPBalanceMeta
from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
from sglang.srt.utils import get_bool_env_var
+if TYPE_CHECKING:
+ from sglang.srt.managers.scheduler import Scheduler
+
logger = logging.getLogger(__name__)
RECORD_STEP_TIME = get_bool_env_var("SGLANG_RECORD_STEP_TIME")
@@ -28,7 +37,9 @@ def __init__(self):
class SchedulerMetricsMixin:
- def init_metrics(self, tp_rank: int, pp_rank: int, dp_rank: Optional[int]):
+ def init_metrics(
+ self: Scheduler, tp_rank: int, pp_rank: int, dp_rank: Optional[int]
+ ):
self.last_gen_throughput: float = 0.0
self.last_input_throughput: float = 0.0
self.step_time_dict = defaultdict(list) # Dict[batch size -> step time]
@@ -50,14 +61,24 @@ def init_metrics(self, tp_rank: int, pp_rank: int, dp_rank: Optional[int]):
labels["dp_rank"] = dp_rank
self.metrics_collector = SchedulerMetricsCollector(labels=labels)
- def init_kv_events(self, kv_events_config: Optional[str]):
+ def init_dp_balance(self: Scheduler, dp_balance_meta: Optional[DPBalanceMeta]):
+ self.balance_meta = dp_balance_meta
+ if (
+ self.server_args.enable_dp_attention
+ and self.server_args.load_balance_method == "minimum_tokens"
+ ):
+ assert dp_balance_meta is not None
+
+ self.recv_dp_balance_id_this_term = []
+
+ def init_kv_events(self: Scheduler, kv_events_config: Optional[str]):
if self.enable_kv_cache_events:
self.kv_event_publisher = EventPublisherFactory.create(
kv_events_config, self.attn_dp_rank
)
def log_prefill_stats(
- self,
+ self: Scheduler,
adder: PrefillAdder,
can_run_list: List[Req],
running_bs: int,
@@ -125,12 +146,20 @@ def log_prefill_stats(
total_queue_latency += req.queue_time_end - req.queue_time_start
self.stats.avg_request_queue_latency = total_queue_latency / num_new_seq
+ if self.disaggregation_mode == DisaggregationMode.PREFILL:
+ self.stats.num_prefill_prealloc_queue_reqs = len(
+ self.disagg_prefill_bootstrap_queue.queue
+ )
+ self.stats.num_prefill_inflight_queue_reqs = len(
+ self.disagg_prefill_inflight_queue
+ )
+
self.metrics_collector.log_stats(self.stats)
self._emit_kv_metrics()
self._publish_kv_events()
def log_decode_stats(
- self, can_run_cuda_graph: bool, running_batch: ScheduleBatch = None
+ self: Scheduler, can_run_cuda_graph: bool, running_batch: ScheduleBatch = None
):
batch = running_batch or self.running_batch
@@ -185,7 +214,7 @@ def log_decode_stats(
msg += f"#retracted-req: {len(self.disagg_decode_prealloc_queue.retracted_queue)}, "
msg += (
- f"cuda graph: {can_run_cuda_graph}, "
+ f"{'cpu graph' if self.device == 'cpu' else 'cuda graph'}: {can_run_cuda_graph}, "
f"gen throughput (token/s): {self.last_gen_throughput:.2f}, "
f"#queue-req: {len(self.waiting_queue)}, "
)
@@ -202,10 +231,17 @@ def log_decode_stats(
self.stats.spec_accept_length = spec_accept_length
self.stats.total_retracted_reqs = self.total_retracted_reqs
self.metrics_collector.log_stats(self.stats)
+ if self.disaggregation_mode == DisaggregationMode.DECODE:
+ self.stats.num_decode_prealloc_queue_reqs = len(
+ self.disagg_decode_prealloc_queue.queue
+ )
+ self.stats.num_decode_transfer_queue_reqs = len(
+ self.disagg_decode_transfer_queue.queue
+ )
self._emit_kv_metrics()
self._publish_kv_events()
- def _emit_kv_metrics(self):
+ def _emit_kv_metrics(self: Scheduler):
kv_metrics = KvMetrics()
kv_metrics.request_active_slots = self.stats.num_running_reqs
kv_metrics.request_total_slots = self.max_running_requests
@@ -221,9 +257,94 @@ def _emit_kv_metrics(self):
if not self.send_metrics_from_scheduler.closed:
self.send_metrics_from_scheduler.send_pyobj(kv_metrics)
- def _publish_kv_events(self):
+ def _publish_kv_events(self: Scheduler):
if self.enable_kv_cache_events:
events = self.tree_cache.take_events()
if events:
batch = KVEventBatch(ts=time.time(), events=events)
self.kv_event_publisher.publish(batch)
+
+ def maybe_update_dp_balance_data(
+ self: Scheduler, recv_req: TokenizedGenerateReqInput
+ ):
+ if (
+ self.server_args.enable_dp_attention
+ and self.server_args.load_balance_method == "minimum_tokens"
+ ):
+ self.recv_dp_balance_id_this_term.append(recv_req.dp_balance_id)
+
+ def maybe_handle_dp_balance_data(self: Scheduler):
+ if (
+ self.server_args.load_balance_method == "minimum_tokens"
+ and self.forward_ct % 40 == 0
+ ):
+ holding_tokens = self.get_load()
+
+ new_recv_dp_balance_id_list, holding_token_list = (
+ self.gather_dp_balance_info(holding_tokens)
+ )
+
+ self.recv_dp_balance_id_this_term.clear()
+ if self.tp_rank == 0: # only first worker write info
+ self.write_shared_dp_balance_info(
+ new_recv_dp_balance_id_list, holding_token_list
+ )
+
+ def gather_dp_balance_info(
+ self: Scheduler, holding_tokens_list
+ ) -> Union[None, List[List[int]]]:
+ """gather recv_dp_balance_id_this_term and holding tokens per worker for dp balance"""
+ recv_list = self.recv_dp_balance_id_this_term
+ assert len(recv_list) <= 511, (
+ "The number of requests received this round is too large. "
+ "Please increase gather_tensor_size and onfly_info_size."
+ )
+ # The maximum size of the tensor used for gathering data from all workers.
+ gather_tensor_size = 512
+
+ # recv_tensor: | holding_tokens | len(recv_dp_balance_id) | recv_dp_balance_ids
+ recv_tensor = torch.zeros(gather_tensor_size, dtype=torch.int32)
+ recv_tensor[0] = holding_tokens_list
+ recv_tensor[1] = len(recv_list) # The first element is the length of the list.
+ recv_tensor[2 : len(recv_list) + 2] = torch.tensor(recv_list, dtype=torch.int32)
+
+ if self.tp_rank == 0:
+ gathered_list = [
+ torch.zeros(gather_tensor_size, dtype=torch.int32)
+ for _ in range(self.balance_meta.num_workers)
+ ]
+ else:
+ gathered_list = None
+
+ torch.distributed.gather(recv_tensor, gathered_list, group=self.tp_cpu_group)
+
+ gathered_id_list_per_worker = None
+ if self.tp_rank == 0:
+ gathered_id_list_per_worker = []
+ holding_tokens_list = []
+ for tensor in gathered_list:
+ holding_tokens_list.append(tensor[0].item())
+ list_length = tensor[1].item()
+ gathered_id_list_per_worker.append(tensor[2 : list_length + 2].tolist())
+
+ return gathered_id_list_per_worker, holding_tokens_list
+
+ def write_shared_dp_balance_info(self: Scheduler, new_recv_rid_lists, local_tokens):
+ meta = self.balance_meta
+
+ with meta.mutex:
+ onfly_list: List[Dict[int, int]] = meta.get_shared_onfly()
+ assert len(new_recv_rid_lists) == len(onfly_list), "num_worker not equal"
+ # 1.Check if the rid received by each worker this round is present in onfly.
+ # If it is, remove the corresponding onfly item.
+ worker_id = 0
+ for new_recv_rids, on_fly_reqs in zip(new_recv_rid_lists, onfly_list):
+ for new_recv_rid in new_recv_rids:
+ assert (
+ new_recv_rid in on_fly_reqs
+ ), f"{new_recv_rid=} not in {worker_id=} {on_fly_reqs=}, data consistency is wrong"
+ del on_fly_reqs[new_recv_rid]
+ worker_id += 1
+ # 2. Atomically write local_tokens and onfly into shm under the mutex
+ meta.set_shared_onfly_info(onfly_list)
+ meta.set_shared_local_tokens(local_tokens)
diff --git a/python/sglang/srt/managers/scheduler_output_processor_mixin.py b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
index a86899f6e79..d931759bbfa 100644
--- a/python/sglang/srt/managers/scheduler_output_processor_mixin.py
+++ b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
@@ -93,20 +93,21 @@ def process_batch_result_prefill(
# This updates radix so others can match
self.tree_cache.cache_unfinished_req(req)
- if req.return_logprob:
+ if batch.return_logprob:
assert extend_logprob_start_len_per_req is not None
assert extend_input_len_per_req is not None
extend_logprob_start_len = extend_logprob_start_len_per_req[i]
extend_input_len = extend_input_len_per_req[i]
num_input_logprobs = extend_input_len - extend_logprob_start_len
- self.add_logprob_return_values(
- i,
- req,
- logprob_pt,
- next_token_ids,
- num_input_logprobs,
- logits_output,
- )
+ if req.return_logprob:
+ self.add_logprob_return_values(
+ i,
+ req,
+ logprob_pt,
+ next_token_ids,
+ num_input_logprobs,
+ logits_output,
+ )
logprob_pt += num_input_logprobs
if (
@@ -146,7 +147,7 @@ def process_batch_result_prefill(
skip_stream_req = req
# Incrementally update input logprobs.
- if req.return_logprob:
+ if batch.return_logprob:
extend_logprob_start_len = extend_logprob_start_len_per_req[i]
extend_input_len = extend_input_len_per_req[i]
if extend_logprob_start_len < extend_input_len:
@@ -154,14 +155,15 @@ def process_batch_result_prefill(
num_input_logprobs = (
extend_input_len - extend_logprob_start_len
)
- self.add_input_logprob_return_values(
- i,
- req,
- logits_output,
- logprob_pt,
- num_input_logprobs,
- last_prefill_chunk=False,
- )
+ if req.return_logprob:
+ self.add_input_logprob_return_values(
+ i,
+ req,
+ logits_output,
+ logprob_pt,
+ num_input_logprobs,
+ last_prefill_chunk=False,
+ )
logprob_pt += num_input_logprobs
self.set_next_batch_sampling_info_done(batch)
@@ -698,6 +700,8 @@ def stream_output_generation(
output_token_ids_logprobs_val,
output_token_ids_logprobs_idx,
output_hidden_states,
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
)
)
@@ -717,6 +721,12 @@ def stream_output_embedding(self: Scheduler, reqs: List[Req]):
cached_tokens.append(req.cached_tokens)
self.send_to_detokenizer.send_pyobj(
BatchEmbeddingOut(
- rids, finished_reasons, embeddings, prompt_tokens, cached_tokens
+ rids,
+ finished_reasons,
+ embeddings,
+ prompt_tokens,
+ cached_tokens,
+ placeholder_tokens_idx=None,
+ placeholder_tokens_val=None,
)
)
diff --git a/python/sglang/srt/managers/scheduler_profiler_mixin.py b/python/sglang/srt/managers/scheduler_profiler_mixin.py
index afbab82058f..116a8d84264 100644
--- a/python/sglang/srt/managers/scheduler_profiler_mixin.py
+++ b/python/sglang/srt/managers/scheduler_profiler_mixin.py
@@ -26,7 +26,7 @@
class SchedulerProfilerMixin:
- def init_profier(self):
+ def init_profiler(self):
self.torch_profiler = None
self.torch_profiler_output_dir: Optional[str] = None
self.profiler_activities: Optional[List[str]] = None
diff --git a/python/sglang/srt/managers/scheduler_update_weights_mixin.py b/python/sglang/srt/managers/scheduler_update_weights_mixin.py
index 8da3d07be13..fdae2142cd3 100644
--- a/python/sglang/srt/managers/scheduler_update_weights_mixin.py
+++ b/python/sglang/srt/managers/scheduler_update_weights_mixin.py
@@ -121,9 +121,16 @@ def save_remote_model(self, params):
url = params["url"]
worker = self.tp_worker.worker
-
worker.model_runner.save_remote_model(url)
+ if self.draft_worker is not None:
+ draft_url = params.get("draft_url", None)
+ assert (
+ draft_url is not None
+ ), "draft_url must be provided when draft model is enabled"
+ draft_worker = self.draft_worker.worker
+ draft_worker.model_runner.save_remote_model(draft_url)
+
def save_sharded_model(self, params):
worker = self.tp_worker.worker
diff --git a/python/sglang/srt/managers/template_manager.py b/python/sglang/srt/managers/template_manager.py
index 2327f942bb3..1d9bbea8186 100644
--- a/python/sglang/srt/managers/template_manager.py
+++ b/python/sglang/srt/managers/template_manager.py
@@ -24,20 +24,20 @@
import re
from typing import Optional
-from sglang.srt.code_completion_parser import (
+from sglang.srt.parser.code_completion_parser import (
CompletionTemplate,
FimPosition,
completion_template_exists,
register_completion_template,
)
-from sglang.srt.conversation import (
+from sglang.srt.parser.conversation import (
Conversation,
SeparatorStyle,
chat_template_exists,
get_conv_template_by_model_path,
register_conv_template,
)
-from sglang.srt.jinja_template_utils import detect_jinja_template_content_format
+from sglang.srt.parser.jinja_template_utils import detect_jinja_template_content_format
logger = logging.getLogger(__name__)
@@ -89,6 +89,7 @@ def _detect_reasoning_pattern(self, template: str) -> bool:
if template is None:
return False
+ # TODO: remove this hard code the reasoning pattern
force_reasoning_pattern = r"<\|im_start\|>assistant\\n\\n"
has_reasoning = re.search(force_reasoning_pattern, template) is not None
@@ -128,11 +129,12 @@ def load_chat_template(
logger.info(
f"Using default HuggingFace chat template with detected content format: {self._jinja_template_content_format}"
)
- return
-
- # Default to string content format if no template was found
- self._jinja_template_content_format = "string"
- logger.info("No chat template found, defaulting to 'string' content format")
+ else:
+ # Default to string content format if no template was found
+ self._jinja_template_content_format = "string"
+ logger.info(
+ "No chat template found, defaulting to 'string' content format"
+ )
# Detect reasoning pattern from chat template
if tokenizer_manager.tokenizer:
diff --git a/python/sglang/srt/managers/tokenizer_communicator_mixin.py b/python/sglang/srt/managers/tokenizer_communicator_mixin.py
new file mode 100644
index 00000000000..e59d3f296c8
--- /dev/null
+++ b/python/sglang/srt/managers/tokenizer_communicator_mixin.py
@@ -0,0 +1,491 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import time
+from collections import deque
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Deque,
+ Dict,
+ Generic,
+ List,
+ Optional,
+ Tuple,
+ TypeVar,
+)
+
+import fastapi
+
+from sglang.srt.managers.io_struct import (
+ ClearHiCacheReqInput,
+ ClearHiCacheReqOutput,
+ ExpertDistributionReq,
+ ExpertDistributionReqOutput,
+ FlushCacheReqInput,
+ FlushCacheReqOutput,
+ GetInternalStateReq,
+ GetInternalStateReqOutput,
+ GetWeightsByNameReqInput,
+ GetWeightsByNameReqOutput,
+ InitWeightsUpdateGroupReqInput,
+ InitWeightsUpdateGroupReqOutput,
+ LoadLoRAAdapterReqInput,
+ LoadLoRAAdapterReqOutput,
+ LoRAUpdateResult,
+ MultiTokenizerWrapper,
+ ProfileReq,
+ ProfileReqOutput,
+ ProfileReqType,
+ ReleaseMemoryOccupationReqInput,
+ ReleaseMemoryOccupationReqOutput,
+ ResumeMemoryOccupationReqInput,
+ ResumeMemoryOccupationReqOutput,
+ SetInternalStateReq,
+ SetInternalStateReqOutput,
+ SlowDownReqInput,
+ SlowDownReqOutput,
+ UnloadLoRAAdapterReqInput,
+ UnloadLoRAAdapterReqOutput,
+ UpdateWeightsFromDistributedReqInput,
+ UpdateWeightsFromDistributedReqOutput,
+ UpdateWeightsFromTensorReqInput,
+ UpdateWeightsFromTensorReqOutput,
+)
+from sglang.srt.server_args import LoRARef, ServerArgs
+from sglang.srt.utils import get_bool_env_var
+from sglang.utils import TypeBasedDispatcher
+
+if TYPE_CHECKING:
+ from sglang.srt.managers.tokenizer_manager import TokenizerManager
+
+T = TypeVar("T")
+
+logger = logging.getLogger(__name__)
+
+
+class _Communicator(Generic[T]):
+ """Note: The communicator now only run up to 1 in-flight request at any time."""
+
+ enable_multi_tokenizer = False
+
+ def __init__(self, sender, fan_out: int):
+ self._sender = sender
+ self._fan_out = fan_out
+ self._result_event: Optional[asyncio.Event] = None
+ self._result_values: Optional[List[T]] = None
+ self._ready_queue: Deque[asyncio.Future] = deque()
+
+ async def __call__(self, obj):
+ ready_event = asyncio.Event()
+ if self._result_event is not None or len(self._ready_queue) > 0:
+ self._ready_queue.append(ready_event)
+ await ready_event.wait()
+ assert self._result_event is None
+ assert self._result_values is None
+
+ if obj:
+ if _Communicator.enable_multi_tokenizer:
+ obj = MultiTokenizerWrapper(worker_id=os.getpid(), obj=obj)
+ self._sender.send_pyobj(obj)
+
+ self._result_event = asyncio.Event()
+ self._result_values = []
+ await self._result_event.wait()
+ result_values = self._result_values
+ self._result_event = self._result_values = None
+
+ if len(self._ready_queue) > 0:
+ self._ready_queue.popleft().set()
+
+ return result_values
+
+ def handle_recv(self, recv_obj: T):
+ self._result_values.append(recv_obj)
+ if len(self._result_values) == self._fan_out:
+ self._result_event.set()
+
+
+class TokenizerCommunicatorMixin:
+ """Mixin class for TokenizerManager to handle communication with the scheduler."""
+
+ def init_communicators(self: TokenizerManager, server_args: ServerArgs):
+ # Communicators
+ self.init_weights_update_group_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.update_weights_from_distributed_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.update_weights_from_tensor_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.get_weights_by_name_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.release_memory_occupation_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.resume_memory_occupation_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.slow_down_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.flush_cache_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.clear_hicache_storage_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.profile_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.get_internal_state_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.set_internal_state_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.expert_distribution_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+ self.update_lora_adapter_communicator = _Communicator(
+ self.send_to_scheduler, server_args.dp_size
+ )
+
+ self._result_dispatcher += self._get_communicator_dispatcher()
+
+ def _get_communicator_dispatcher(self: TokenizerManager):
+ return TypeBasedDispatcher(
+ [
+ (
+ InitWeightsUpdateGroupReqOutput,
+ self.init_weights_update_group_communicator.handle_recv,
+ ),
+ (
+ UpdateWeightsFromDistributedReqOutput,
+ self.update_weights_from_distributed_communicator.handle_recv,
+ ),
+ (
+ UpdateWeightsFromTensorReqOutput,
+ self.update_weights_from_tensor_communicator.handle_recv,
+ ),
+ (
+ GetWeightsByNameReqOutput,
+ self.get_weights_by_name_communicator.handle_recv,
+ ),
+ (
+ ReleaseMemoryOccupationReqOutput,
+ self.release_memory_occupation_communicator.handle_recv,
+ ),
+ (
+ ResumeMemoryOccupationReqOutput,
+ self.resume_memory_occupation_communicator.handle_recv,
+ ),
+ (
+ SlowDownReqOutput,
+ self.slow_down_communicator.handle_recv,
+ ),
+ (
+ ClearHiCacheReqOutput,
+ self.clear_hicache_storage_communicator.handle_recv,
+ ),
+ (
+ FlushCacheReqOutput,
+ self.flush_cache_communicator.handle_recv,
+ ),
+ (
+ ProfileReqOutput,
+ self.profile_communicator.handle_recv,
+ ),
+ (
+ GetInternalStateReqOutput,
+ self.get_internal_state_communicator.handle_recv,
+ ),
+ (
+ SetInternalStateReqOutput,
+ self.set_internal_state_communicator.handle_recv,
+ ),
+ (
+ ExpertDistributionReqOutput,
+ self.expert_distribution_communicator.handle_recv,
+ ),
+ (
+ LoRAUpdateResult,
+ self.update_lora_adapter_communicator.handle_recv,
+ ),
+ ]
+ )
+
+ async def flush_cache(self: TokenizerManager) -> FlushCacheReqOutput:
+ return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
+
+ async def clear_hicache_storage(self: TokenizerManager) -> ClearHiCacheReqOutput:
+ """Clear the hierarchical cache storage."""
+ # Delegate to the scheduler to handle HiCacheStorage clearing
+ return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[
+ 0
+ ]
+
+ async def start_profile(
+ self: TokenizerManager,
+ output_dir: Optional[str] = None,
+ start_step: Optional[int] = None,
+ num_steps: Optional[int] = None,
+ activities: Optional[List[str]] = None,
+ with_stack: Optional[bool] = None,
+ record_shapes: Optional[bool] = None,
+ profile_by_stage: bool = False,
+ ):
+ self.auto_create_handle_loop()
+ env_with_stack: bool = get_bool_env_var("SGLANG_PROFILE_WITH_STACK", "true")
+ with_stack = False if with_stack is False or env_with_stack is False else True
+ req = ProfileReq(
+ type=ProfileReqType.START_PROFILE,
+ output_dir=output_dir,
+ start_step=start_step,
+ num_steps=num_steps,
+ activities=activities,
+ with_stack=with_stack,
+ record_shapes=record_shapes,
+ profile_by_stage=profile_by_stage,
+ profile_id=str(time.time()),
+ )
+ return await self._execute_profile(req)
+
+ async def stop_profile(self: TokenizerManager):
+ self.auto_create_handle_loop()
+ req = ProfileReq(type=ProfileReqType.STOP_PROFILE)
+ return await self._execute_profile(req)
+
+ async def _execute_profile(self: TokenizerManager, req: ProfileReq):
+ result = (await self.profile_communicator(req))[0]
+ if not result.success:
+ raise RuntimeError(result.message)
+ return result
+
+ async def start_expert_distribution_record(self: TokenizerManager):
+ self.auto_create_handle_loop()
+ await self.expert_distribution_communicator(ExpertDistributionReq.START_RECORD)
+
+ async def stop_expert_distribution_record(self: TokenizerManager):
+ self.auto_create_handle_loop()
+ await self.expert_distribution_communicator(ExpertDistributionReq.STOP_RECORD)
+
+ async def dump_expert_distribution_record(self: TokenizerManager):
+ self.auto_create_handle_loop()
+ await self.expert_distribution_communicator(ExpertDistributionReq.DUMP_RECORD)
+
+ async def init_weights_update_group(
+ self: TokenizerManager,
+ obj: InitWeightsUpdateGroupReqInput,
+ request: Optional[fastapi.Request] = None,
+ ) -> Tuple[bool, str]:
+ self.auto_create_handle_loop()
+ assert (
+ self.server_args.dp_size == 1
+ ), "dp_size must be 1 for init parameter update group"
+ result = (await self.init_weights_update_group_communicator(obj))[0]
+ return result.success, result.message
+
+ async def update_weights_from_distributed(
+ self: TokenizerManager,
+ obj: UpdateWeightsFromDistributedReqInput,
+ request: Optional[fastapi.Request] = None,
+ ) -> Tuple[bool, str]:
+ self.auto_create_handle_loop()
+ assert (
+ self.server_args.dp_size == 1 or self.server_args.enable_dp_attention
+ ), "dp_size must be 1 or dp attention must be enabled for update weights from distributed"
+
+ if obj.abort_all_requests:
+ self.abort_request(abort_all=True)
+
+ # This means that weight sync
+ # cannot run while requests are in progress.
+ async with self.model_update_lock.writer_lock:
+ result = (await self.update_weights_from_distributed_communicator(obj))[0]
+ return result.success, result.message
+
+ async def update_weights_from_tensor(
+ self: TokenizerManager,
+ obj: UpdateWeightsFromTensorReqInput,
+ request: Optional[fastapi.Request] = None,
+ ) -> Tuple[bool, str]:
+ self.auto_create_handle_loop()
+ assert (
+ self.server_args.dp_size == 1 or self.server_args.enable_dp_attention
+ ), "dp_size must be 1 or dp attention must be enabled for update weights from tensor"
+
+ if obj.abort_all_requests:
+ self.abort_request(abort_all=True)
+
+ # This means that weight sync
+ # cannot run while requests are in progress.
+ async with self.model_update_lock.writer_lock:
+ result = (await self.update_weights_from_tensor_communicator(obj))[0]
+ return result.success, result.message
+
+ async def load_lora_adapter(
+ self: TokenizerManager,
+ obj: LoadLoRAAdapterReqInput,
+ _: Optional[fastapi.Request] = None,
+ ) -> LoadLoRAAdapterReqOutput:
+ self.auto_create_handle_loop()
+
+ try:
+ if not self.server_args.enable_lora:
+ raise ValueError(
+ "LoRA is not enabled. Please set `--enable-lora` to enable LoRA."
+ )
+
+ # TODO (lifuhuang): Remove this after we verify that dynamic lora loading works
+ # with dp_size > 1.
+ assert (
+ self.server_args.dp_size == 1
+ ), "dp_size must be 1 for dynamic lora loading"
+ logger.info(
+ "Start load Lora adapter. Lora name=%s, path=%s",
+ obj.lora_name,
+ obj.lora_path,
+ )
+
+ async with self.lora_update_lock:
+ if (
+ self.server_args.max_loaded_loras is not None
+ and self.lora_registry.num_registered_loras
+ >= self.server_args.max_loaded_loras
+ ):
+ raise ValueError(
+ f"Cannot load LoRA adapter {obj.lora_name} at path {obj.lora_path}. "
+ f"Maximum number of loaded LoRA adapters is {self.server_args.max_loaded_loras}. "
+ "Please unload some LoRA adapters before loading new ones."
+ )
+
+ # Generate new uniquely identifiable LoRARef object.
+ new_adapter = LoRARef(
+ lora_name=obj.lora_name,
+ lora_path=obj.lora_path,
+ pinned=obj.pinned,
+ )
+
+ # Trigger the actual loading operation at the backend processes.
+ obj.lora_id = new_adapter.lora_id
+ result = (await self.update_lora_adapter_communicator(obj))[0]
+
+ # Register the LoRA adapter only after loading is successful.
+ if result.success:
+ await self.lora_registry.register(new_adapter)
+
+ return result
+ except ValueError as e:
+ return LoadLoRAAdapterReqOutput(
+ success=False,
+ error_message=str(e),
+ )
+
+ async def unload_lora_adapter(
+ self: TokenizerManager,
+ obj: UnloadLoRAAdapterReqInput,
+ _: Optional[fastapi.Request] = None,
+ ) -> UnloadLoRAAdapterReqOutput:
+ self.auto_create_handle_loop()
+
+ try:
+ if not self.server_args.enable_lora:
+ raise ValueError(
+ "LoRA is not enabled. Please set `--enable-lora` to enable LoRA."
+ )
+
+ assert (
+ obj.lora_name is not None
+ ), "lora_name must be provided to unload LoRA adapter"
+
+ # TODO (lifuhuang): Remove this after we verify that dynamic lora loading works
+ # with dp_size > 1.
+ assert (
+ self.server_args.dp_size == 1
+ ), "dp_size must be 1 for dynamic lora loading"
+ logger.info(
+ "Start unload Lora adapter. Lora name=%s",
+ obj.lora_name,
+ )
+
+ async with self.lora_update_lock:
+ # Unregister the LoRA adapter from the registry to stop new requests for this adapter
+ # from being started.
+ lora_id = await self.lora_registry.unregister(obj.lora_name)
+ obj.lora_id = lora_id
+
+ # Initiate the actual unloading operation at the backend processes only after all
+ # ongoing requests using this LoRA adapter are finished.
+ await self.lora_registry.wait_for_unload(lora_id)
+ result = (await self.update_lora_adapter_communicator(obj))[0]
+
+ return result
+ except ValueError as e:
+ return UnloadLoRAAdapterReqOutput(success=False, error_message=str(e))
+
+ async def get_weights_by_name(
+ self: TokenizerManager,
+ obj: GetWeightsByNameReqInput,
+ request: Optional[fastapi.Request] = None,
+ ):
+ self.auto_create_handle_loop()
+ results = await self.get_weights_by_name_communicator(obj)
+ all_parameters = [r.parameter for r in results]
+ if self.server_args.dp_size == 1:
+ return all_parameters[0]
+ else:
+ return all_parameters
+
+ async def release_memory_occupation(
+ self: TokenizerManager,
+ obj: ReleaseMemoryOccupationReqInput,
+ request: Optional[fastapi.Request] = None,
+ ):
+ self.auto_create_handle_loop()
+ await self.release_memory_occupation_communicator(obj)
+
+ async def resume_memory_occupation(
+ self: TokenizerManager,
+ obj: ResumeMemoryOccupationReqInput,
+ request: Optional[fastapi.Request] = None,
+ ):
+ self.auto_create_handle_loop()
+ await self.resume_memory_occupation_communicator(obj)
+
+ async def slow_down(
+ self: TokenizerManager,
+ obj: SlowDownReqInput,
+ request: Optional[fastapi.Request] = None,
+ ):
+ self.auto_create_handle_loop()
+ await self.slow_down_communicator(obj)
+
+ async def get_internal_state(self: TokenizerManager) -> List[Dict[Any, Any]]:
+ req = GetInternalStateReq()
+ responses: List[GetInternalStateReqOutput] = (
+ await self.get_internal_state_communicator(req)
+ )
+ # Many DP ranks
+ return [res.internal_state for res in responses]
+
+ async def set_internal_state(
+ self: TokenizerManager, obj: SetInternalStateReq
+ ) -> List[bool]:
+ responses: List[SetInternalStateReqOutput] = (
+ await self.set_internal_state_communicator(obj)
+ )
+ return [res.updated for res in responses]
+
+ async def get_load(self: TokenizerManager) -> dict:
+ # TODO(lsyin): fake load report server
+ if not self.current_load_lock.locked():
+ async with self.current_load_lock:
+ internal_state = await self.get_internal_state()
+ self.current_load = internal_state[0]["load"]
+ return {"load": self.current_load}
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 58220b1d6ce..a13278463eb 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -31,18 +31,7 @@
from datetime import datetime
from enum import Enum
from http import HTTPStatus
-from typing import (
- Any,
- Awaitable,
- Deque,
- Dict,
- Generic,
- List,
- Optional,
- Tuple,
- TypeVar,
- Union,
-)
+from typing import Any, Awaitable, Dict, List, Optional, Tuple, Union
import fastapi
import torch
@@ -53,77 +42,51 @@
from sglang.srt.aio_rwlock import RWLock
from sglang.srt.configs.model_config import ModelConfig
-from sglang.srt.disaggregation.utils import (
- DisaggregationMode,
- KVClassType,
- TransferBackend,
- get_kv_class,
-)
+from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.lora.lora_registry import LoRARef, LoRARegistry
+from sglang.srt.managers.disagg_service import start_disagg_service
from sglang.srt.managers.io_struct import (
AbortReq,
BatchEmbeddingOut,
BatchMultimodalOut,
BatchStrOut,
BatchTokenIDOut,
+ BatchTokenizedEmbeddingReqInput,
+ BatchTokenizedGenerateReqInput,
CloseSessionReqInput,
ConfigureLoggingReq,
EmbeddingReqInput,
- ExpertDistributionReq,
- ExpertDistributionReqOutput,
- FlushCacheReqInput,
- FlushCacheReqOutput,
+ FreezeGCReq,
GenerateReqInput,
- GetInternalStateReq,
- GetInternalStateReqOutput,
- GetWeightsByNameReqInput,
- GetWeightsByNameReqOutput,
HealthCheckOutput,
- InitWeightsUpdateGroupReqInput,
- InitWeightsUpdateGroupReqOutput,
- LoadLoRAAdapterReqInput,
- LoadLoRAAdapterReqOutput,
- LoRAUpdateResult,
+ MultiTokenizerWrapper,
OpenSessionReqInput,
OpenSessionReqOutput,
- ProfileReq,
- ProfileReqOutput,
- ProfileReqType,
- ReleaseMemoryOccupationReqInput,
- ReleaseMemoryOccupationReqOutput,
- ResumeMemoryOccupationReqInput,
- ResumeMemoryOccupationReqOutput,
SessionParams,
- SetInternalStateReq,
- SetInternalStateReqOutput,
- SlowDownReqInput,
- SlowDownReqOutput,
TokenizedEmbeddingReqInput,
TokenizedGenerateReqInput,
- UnloadLoRAAdapterReqInput,
- UnloadLoRAAdapterReqOutput,
UpdateWeightFromDiskReqInput,
UpdateWeightFromDiskReqOutput,
- UpdateWeightsFromDistributedReqInput,
- UpdateWeightsFromDistributedReqOutput,
- UpdateWeightsFromTensorReqInput,
- UpdateWeightsFromTensorReqOutput,
)
from sglang.srt.managers.mm_utils import TensorTransportMode
from sglang.srt.managers.multimodal_processor import get_mm_processor, import_processors
from sglang.srt.managers.scheduler import is_health_check_generate_req
from sglang.srt.managers.scheduler_input_blocker import input_blocker_guard_region
+from sglang.srt.managers.tokenizer_communicator_mixin import TokenizerCommunicatorMixin
from sglang.srt.metrics.collector import TokenizerMetricsCollector
from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import (
+ configure_gc_warning,
dataclass_to_string_truncated,
+ freeze_gc,
get_bool_env_var,
+ get_origin_rid,
get_zmq_socket,
kill_process_tree,
)
@@ -171,7 +134,7 @@ class ReqState:
output_token_ids_logprobs_idx: List = dataclasses.field(default_factory=list)
-class TokenizerManager:
+class TokenizerManager(TokenizerCommunicatorMixin):
"""TokenizerManager is a process that tokenizes the text."""
def __init__(
@@ -259,9 +222,15 @@ def __init__(
self.recv_from_detokenizer = get_zmq_socket(
context, zmq.PULL, port_args.tokenizer_ipc_name, True
)
- self.send_to_scheduler = get_zmq_socket(
- context, zmq.PUSH, port_args.scheduler_input_ipc_name, True
- )
+ if self.server_args.tokenizer_worker_num > 1:
+ # Use tokenizer_worker_ipc_name in multi-tokenizer mode
+ self.send_to_scheduler = get_zmq_socket(
+ context, zmq.PUSH, port_args.tokenizer_worker_ipc_name, False
+ )
+ else:
+ self.send_to_scheduler = get_zmq_socket(
+ context, zmq.PUSH, port_args.scheduler_input_ipc_name, True
+ )
# Request states
self.no_create_loop = False
@@ -298,42 +267,16 @@ def __init__(
# The registry dynamically updates as adapters are loaded / unloaded during runtime. It
# serves as the source of truth for available adapters and maps user-friendly LoRA names
# to internally used unique LoRA IDs.
- self.lora_registry = LoRARegistry(self.server_args.lora_paths or {})
+ self.lora_registry = LoRARegistry(self.server_args.lora_paths)
# Lock to serialize LoRA update operations.
# Please note that, unlike `model_update_lock`, this does not block inference, allowing
# LoRA updates and inference to overlap.
self.lora_update_lock = asyncio.Lock()
- # For PD disaggregtion
self.disaggregation_mode = DisaggregationMode(
self.server_args.disaggregation_mode
)
- self.disaggregation_transfer_backend = TransferBackend(
- self.server_args.disaggregation_transfer_backend
- )
- # Start kv boostrap server on prefill
- if self.disaggregation_mode == DisaggregationMode.PREFILL:
- # only start bootstrap server on prefill tm
- kv_bootstrap_server_class = get_kv_class(
- self.disaggregation_transfer_backend, KVClassType.BOOTSTRAP_SERVER
- )
- self.bootstrap_server = kv_bootstrap_server_class(
- self.server_args.disaggregation_bootstrap_port
- )
- is_create_store = (
- self.server_args.node_rank == 0
- and self.server_args.disaggregation_transfer_backend == "ascend"
- )
- if is_create_store:
- try:
- from mf_adapter import create_config_store
-
- ascend_url = os.getenv("ASCEND_MF_STORE_URL")
- create_config_store(ascend_url)
- except Exception as e:
- error_message = f"Failed create mf store, invalid ascend_url."
- error_message += f" With exception {e}"
- raise error_message
+ self.bootstrap_server = start_disagg_service(self.server_args)
# For load balancing
self.current_load = 0
@@ -342,6 +285,7 @@ def __init__(
# Metrics
if self.enable_metrics:
self.metrics_collector = TokenizerMetricsCollector(
+ server_args=server_args,
labels={
"model_name": self.server_args.served_model_name,
# TODO: Add lora name/path in the future,
@@ -352,46 +296,9 @@ def __init__(
collect_tokens_histogram=self.server_args.collect_tokens_histogram,
)
- # Communicators
- self.init_weights_update_group_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.update_weights_from_distributed_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.update_weights_from_tensor_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.get_weights_by_name_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.release_memory_occupation_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.resume_memory_occupation_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.slow_down_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.flush_cache_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.profile_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.get_internal_state_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.set_internal_state_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.expert_distribution_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
- self.update_lora_adapter_communicator = _Communicator(
- self.send_to_scheduler, server_args.dp_size
- )
+ # Configure GC warning
+ if self.server_args.gc_warning_threshold_secs > 0.0:
+ configure_gc_warning(self.server_args.gc_warning_threshold_secs)
self._result_dispatcher = TypeBasedDispatcher(
[
@@ -411,61 +318,15 @@ def __init__(
self._handle_update_weights_from_disk_req_output,
),
(
- InitWeightsUpdateGroupReqOutput,
- self.init_weights_update_group_communicator.handle_recv,
- ),
- (
- UpdateWeightsFromDistributedReqOutput,
- self.update_weights_from_distributed_communicator.handle_recv,
- ),
- (
- UpdateWeightsFromTensorReqOutput,
- self.update_weights_from_tensor_communicator.handle_recv,
- ),
- (
- GetWeightsByNameReqOutput,
- self.get_weights_by_name_communicator.handle_recv,
- ),
- (
- ReleaseMemoryOccupationReqOutput,
- self.release_memory_occupation_communicator.handle_recv,
- ),
- (
- ResumeMemoryOccupationReqOutput,
- self.resume_memory_occupation_communicator.handle_recv,
- ),
- (
- SlowDownReqOutput,
- self.slow_down_communicator.handle_recv,
- ),
- (
- FlushCacheReqOutput,
- self.flush_cache_communicator.handle_recv,
- ),
- (
- ProfileReqOutput,
- self.profile_communicator.handle_recv,
- ),
- (
- GetInternalStateReqOutput,
- self.get_internal_state_communicator.handle_recv,
- ),
- (
- SetInternalStateReqOutput,
- self.set_internal_state_communicator.handle_recv,
- ),
- (
- ExpertDistributionReqOutput,
- self.expert_distribution_communicator.handle_recv,
- ),
- (
- LoRAUpdateResult,
- self.update_lora_adapter_communicator.handle_recv,
- ),
+ FreezeGCReq,
+ lambda x: None,
+ ), # For handling case when scheduler skips detokenizer and forwards back to the tokenizer manager, we ignore it.
(HealthCheckOutput, lambda x: None),
]
)
+ self.init_communicators(server_args)
+
async def generate_request(
self,
obj: Union[GenerateReqInput, EmbeddingReqInput],
@@ -475,6 +336,15 @@ async def generate_request(
self.auto_create_handle_loop()
obj.normalize_batch_and_arguments()
+ if self.server_args.tokenizer_worker_num > 1:
+ # Modify rid, add worker_id
+ if isinstance(obj.rid, list):
+ # If it's an array, add worker_id prefix to each element
+ obj.rid = [f"{self.worker_id}_{rid}" for rid in obj.rid]
+ else:
+ # If it's a single value, add worker_id prefix
+ obj.rid = f"{self.worker_id}_{obj.rid}"
+
if self.log_requests:
max_length, skip_names, _ = self.log_request_metadata
logger.info(
@@ -565,14 +435,24 @@ def _validate_one_request(
self, obj: Union[GenerateReqInput, EmbeddingReqInput], input_ids: List[int]
) -> None:
"""Validates that the input token count and the requested token count doesn't exceed the model's context length."""
+ # FIXME: unify the length validation logic with the one in the scheduler.
+ _max_req_len = self.context_len
input_token_num = len(input_ids) if input_ids is not None else 0
- # Check if input alone exceeds context length
if input_token_num >= self.context_len:
- raise ValueError(
- f"The input ({input_token_num} tokens) is longer than the "
- f"model's context length ({self.context_len} tokens)."
- )
+ if self.server_args.allow_auto_truncate:
+ logger.warning(
+ f"The input ({input_token_num} tokens) is longer than the "
+ f"model's context length ({self.context_len} tokens). "
+ "Truncating the input."
+ )
+ del input_ids[_max_req_len:]
+ input_token_num = len(input_ids)
+ else:
+ raise ValueError(
+ f"The input ({input_token_num} tokens) is longer than the "
+ f"model's context length ({self.context_len} tokens)."
+ )
if isinstance(obj, EmbeddingReqInput) and self.is_generation:
raise ValueError(
@@ -584,17 +464,37 @@ def _validate_one_request(
max_new_tokens = obj.sampling_params.get("max_new_tokens")
if (
max_new_tokens is not None
- and (max_new_tokens + input_token_num) >= self.context_len
+ and (max_new_tokens + input_token_num) >= _max_req_len
):
total_tokens = max_new_tokens + input_token_num
- error_msg = (
- f"Requested token count exceeds the model's maximum context length "
- f"of {self.context_len} tokens. You requested a total of {total_tokens} "
- f"tokens: {input_token_num} tokens from the input messages and "
- f"{max_new_tokens} tokens for the completion. Please reduce the number "
- f"of tokens in the input messages or the completion to fit within the limit."
- )
- raise ValueError(error_msg)
+ if self.server_args.allow_auto_truncate:
+ logger.warning(
+ f"Requested token count ({input_token_num} input + {max_new_tokens} new) "
+ f"exceeds the model's context length ({self.context_len} tokens). "
+ "Truncating inputs."
+ # NOTE(hj): SGLang upstream truncate max_new_tokens, but we will truncate the inputs.
+ )
+ # obj.sampling_params["max_new_tokens"] = max(
+ # 0, _max_req_len - input_token_num
+ # )
+ num_trunc = total_tokens - self.context_len + 1
+ trunc_first = num_trunc // 2
+ for _ in range(num_trunc):
+ input_ids.pop(input_token_num // 2 + 1 - trunc_first)
+ assert (
+ max_new_tokens + len(input_ids)
+ ) < self.context_len, (
+ f"({max_new_tokens} + {len(input_ids)}) < {self.context_len}"
+ )
+ else:
+ error_msg = (
+ f"Requested token count exceeds the model's maximum context length "
+ f"of {self.context_len} tokens. You requested a total of {total_tokens} "
+ f"tokens: {input_token_num} tokens from the input messages and "
+ f"{max_new_tokens} tokens for the completion. Please reduce the number "
+ f"of tokens in the input messages or the completion to fit within the limit."
+ )
+ raise ValueError(error_msg)
if isinstance(obj, GenerateReqInput):
if (
@@ -737,6 +637,30 @@ def _send_one_request(
self.rid_to_state[obj.rid] = state
return state
+ def _send_batch_request(
+ self,
+ obj: Union[GenerateReqInput, EmbeddingReqInput],
+ tokenized_objs: List[
+ Union[TokenizedGenerateReqInput, TokenizedEmbeddingReqInput]
+ ],
+ created_time: Optional[float] = None,
+ ):
+ """Send a batch of tokenized requests as a single batched request to the scheduler."""
+ if isinstance(tokenized_objs[0], TokenizedGenerateReqInput):
+ batch_req = BatchTokenizedGenerateReqInput(batch=tokenized_objs)
+ else:
+ batch_req = BatchTokenizedEmbeddingReqInput(batch=tokenized_objs)
+
+ self.send_to_scheduler.send_pyobj(batch_req)
+
+ # Create states for each individual request in the batch
+ for i, tokenized_obj in enumerate(tokenized_objs):
+ tmp_obj = obj[i]
+ state = ReqState(
+ [], False, asyncio.Event(), tmp_obj, created_time=created_time
+ )
+ self.rid_to_state[tmp_obj.rid] = state
+
async def _wait_one_response(
self,
obj: Union[GenerateReqInput, EmbeddingReqInput],
@@ -782,15 +706,17 @@ async def _wait_one_response(
):
raise ValueError(finish_reason["message"])
- if (
- finish_reason.get("type") == "abort"
- and finish_reason.get("status_code")
- == HTTPStatus.SERVICE_UNAVAILABLE
+ if finish_reason.get("type") == "abort" and finish_reason.get(
+ "status_code"
+ ) in (
+ HTTPStatus.SERVICE_UNAVAILABLE,
+ HTTPStatus.INTERNAL_SERVER_ERROR,
):
# This is an abort request initiated by scheduler.
# Delete the key to prevent resending abort request to the scheduler and
# to ensure aborted request state is cleaned up.
- del self.rid_to_state[state.obj.rid]
+ if state.obj.rid in self.rid_to_state:
+ del self.rid_to_state[state.obj.rid]
# Mark ongoing LoRA request as finished.
if self.server_args.enable_lora and state.obj.lora_path:
@@ -837,10 +763,17 @@ async def _handle_batch_request(
tokenized_objs = await self._batch_tokenize_and_process(batch_size, obj)
- for i, tokenized_obj in enumerate(tokenized_objs):
+ # Send as a single batched request
+ self._send_batch_request(obj, tokenized_objs, created_time)
+
+ # Set up generators for each request in the batch
+ for i in range(batch_size):
tmp_obj = obj[i]
- state = self._send_one_request(tmp_obj, tokenized_obj, created_time)
- generators.append(self._wait_one_response(tmp_obj, state, request))
+ generators.append(
+ self._wait_one_response(
+ tmp_obj, self.rid_to_state[tmp_obj.rid], request
+ )
+ )
rids.append(tmp_obj.rid)
else:
# Sequential tokenization and processing
@@ -919,9 +852,6 @@ async def _handle_batch_request(
except StopAsyncIteration:
pass
- async def flush_cache(self) -> FlushCacheReqOutput:
- return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
-
def abort_request(self, rid: str = "", abort_all: bool = False):
if not abort_all and rid not in self.rid_to_state:
return
@@ -931,55 +861,6 @@ def abort_request(self, rid: str = "", abort_all: bool = False):
if self.enable_metrics:
self.metrics_collector.observe_one_aborted_request()
- async def start_profile(
- self,
- output_dir: Optional[str] = None,
- start_step: Optional[int] = None,
- num_steps: Optional[int] = None,
- activities: Optional[List[str]] = None,
- with_stack: Optional[bool] = None,
- record_shapes: Optional[bool] = None,
- profile_by_stage: bool = False,
- ):
- self.auto_create_handle_loop()
- env_with_stack: bool = get_bool_env_var("SGLANG_PROFILE_WITH_STACK", "true")
- with_stack = False if with_stack is False or env_with_stack is False else True
- req = ProfileReq(
- type=ProfileReqType.START_PROFILE,
- output_dir=output_dir,
- start_step=start_step,
- num_steps=num_steps,
- activities=activities,
- with_stack=with_stack,
- record_shapes=record_shapes,
- profile_by_stage=profile_by_stage,
- profile_id=str(time.time()),
- )
- return await self._execute_profile(req)
-
- async def stop_profile(self):
- self.auto_create_handle_loop()
- req = ProfileReq(type=ProfileReqType.STOP_PROFILE)
- return await self._execute_profile(req)
-
- async def _execute_profile(self, req: ProfileReq):
- result = (await self.profile_communicator(req))[0]
- if not result.success:
- raise RuntimeError(result.message)
- return result
-
- async def start_expert_distribution_record(self):
- self.auto_create_handle_loop()
- await self.expert_distribution_communicator(ExpertDistributionReq.START_RECORD)
-
- async def stop_expert_distribution_record(self):
- self.auto_create_handle_loop()
- await self.expert_distribution_communicator(ExpertDistributionReq.STOP_RECORD)
-
- async def dump_expert_distribution_record(self):
- self.auto_create_handle_loop()
- await self.expert_distribution_communicator(ExpertDistributionReq.DUMP_RECORD)
-
async def pause_generation(self):
async with self.is_pause_cond:
self.is_pause = True
@@ -1014,6 +895,8 @@ async def update_weights_from_disk(
async def _wait_for_model_update_from_disk(
self, obj: UpdateWeightFromDiskReqInput
) -> Tuple[bool, str]:
+ if self.server_args.tokenizer_worker_num > 1:
+ obj = MultiTokenizerWrapper(self.worker_id, obj)
self.send_to_scheduler.send_pyobj(obj)
self.model_update_result = asyncio.Future()
if self.server_args.dp_size == 1:
@@ -1038,191 +921,6 @@ async def _wait_for_model_update_from_disk(
all_paused_requests = [r.num_paused_requests for r in result]
return all_success, all_message, all_paused_requests
- async def init_weights_update_group(
- self,
- obj: InitWeightsUpdateGroupReqInput,
- request: Optional[fastapi.Request] = None,
- ) -> Tuple[bool, str]:
- self.auto_create_handle_loop()
- assert (
- self.server_args.dp_size == 1
- ), "dp_size must be 1 for init parameter update group"
- result = (await self.init_weights_update_group_communicator(obj))[0]
- return result.success, result.message
-
- async def update_weights_from_distributed(
- self,
- obj: UpdateWeightsFromDistributedReqInput,
- request: Optional[fastapi.Request] = None,
- ) -> Tuple[bool, str]:
- self.auto_create_handle_loop()
- assert (
- self.server_args.dp_size == 1 or self.server_args.enable_dp_attention
- ), "dp_size must be 1 or dp attention must be enabled for update weights from distributed"
-
- if obj.abort_all_requests:
- self.abort_request(abort_all=True)
-
- # This means that weight sync
- # cannot run while requests are in progress.
- async with self.model_update_lock.writer_lock:
- result = (await self.update_weights_from_distributed_communicator(obj))[0]
- return result.success, result.message
-
- async def update_weights_from_tensor(
- self,
- obj: UpdateWeightsFromTensorReqInput,
- request: Optional[fastapi.Request] = None,
- ) -> Tuple[bool, str]:
- self.auto_create_handle_loop()
- assert (
- self.server_args.dp_size == 1 or self.server_args.enable_dp_attention
- ), "dp_size must be 1 or dp attention must be enabled for update weights from tensor"
-
- if obj.abort_all_requests:
- self.abort_request(abort_all=True)
-
- # This means that weight sync
- # cannot run while requests are in progress.
- async with self.model_update_lock.writer_lock:
- result = (await self.update_weights_from_tensor_communicator(obj))[0]
- return result.success, result.message
-
- async def load_lora_adapter(
- self,
- obj: LoadLoRAAdapterReqInput,
- _: Optional[fastapi.Request] = None,
- ) -> LoadLoRAAdapterReqOutput:
- self.auto_create_handle_loop()
-
- try:
- if not self.server_args.enable_lora:
- raise ValueError(
- "LoRA is not enabled. Please set `--enable-lora` to enable LoRA."
- )
-
- # TODO (lifuhuang): Remove this after we verify that dynamic lora loading works
- # with dp_size > 1.
- assert (
- self.server_args.dp_size == 1
- ), "dp_size must be 1 for dynamic lora loading"
- logger.info(
- "Start load Lora adapter. Lora name=%s, path=%s",
- obj.lora_name,
- obj.lora_path,
- )
-
- async with self.lora_update_lock:
- if (
- self.server_args.max_loaded_loras is not None
- and self.lora_registry.num_registered_loras
- >= self.server_args.max_loaded_loras
- ):
- raise ValueError(
- f"Cannot load LoRA adapter {obj.lora_name} at path {obj.lora_path}. "
- f"Maximum number of loaded LoRA adapters is {self.server_args.max_loaded_loras}. "
- "Please unload some LoRA adapters before loading new ones."
- )
-
- # Generate new uniquely identifiable LoRARef object.
- new_adapter = LoRARef(
- lora_name=obj.lora_name,
- lora_path=obj.lora_path,
- pinned=obj.pinned,
- )
-
- # Trigger the actual loading operation at the backend processes.
- obj.lora_id = new_adapter.lora_id
- result = (await self.update_lora_adapter_communicator(obj))[0]
-
- # Register the LoRA adapter only after loading is successful.
- if result.success:
- await self.lora_registry.register(new_adapter)
-
- return result
- except ValueError as e:
- return LoadLoRAAdapterReqOutput(
- success=False,
- error_message=str(e),
- )
-
- async def unload_lora_adapter(
- self,
- obj: UnloadLoRAAdapterReqInput,
- _: Optional[fastapi.Request] = None,
- ) -> UnloadLoRAAdapterReqOutput:
- self.auto_create_handle_loop()
-
- try:
- if not self.server_args.enable_lora:
- raise ValueError(
- "LoRA is not enabled. Please set `--enable-lora` to enable LoRA."
- )
-
- assert (
- obj.lora_name is not None
- ), "lora_name must be provided to unload LoRA adapter"
-
- # TODO (lifuhuang): Remove this after we verify that dynamic lora loading works
- # with dp_size > 1.
- assert (
- self.server_args.dp_size == 1
- ), "dp_size must be 1 for dynamic lora loading"
- logger.info(
- "Start unload Lora adapter. Lora name=%s",
- obj.lora_name,
- )
-
- async with self.lora_update_lock:
- # Unregister the LoRA adapter from the registry to stop new requests for this adapter
- # from being started.
- lora_id = await self.lora_registry.unregister(obj.lora_name)
- obj.lora_id = lora_id
-
- # Initiate the actual unloading operation at the backend processes only after all
- # ongoing requests using this LoRA adapter are finished.
- await self.lora_registry.wait_for_unload(lora_id)
- result = (await self.update_lora_adapter_communicator(obj))[0]
-
- return result
- except ValueError as e:
- return UnloadLoRAAdapterReqOutput(success=False, error_message=str(e))
-
- async def get_weights_by_name(
- self, obj: GetWeightsByNameReqInput, request: Optional[fastapi.Request] = None
- ):
- self.auto_create_handle_loop()
- results = await self.get_weights_by_name_communicator(obj)
- all_parameters = [r.parameter for r in results]
- if self.server_args.dp_size == 1:
- return all_parameters[0]
- else:
- return all_parameters
-
- async def release_memory_occupation(
- self,
- obj: ReleaseMemoryOccupationReqInput,
- request: Optional[fastapi.Request] = None,
- ):
- self.auto_create_handle_loop()
- await self.release_memory_occupation_communicator(obj)
-
- async def resume_memory_occupation(
- self,
- obj: ResumeMemoryOccupationReqInput,
- request: Optional[fastapi.Request] = None,
- ):
- self.auto_create_handle_loop()
- await self.resume_memory_occupation_communicator(obj)
-
- async def slow_down(
- self,
- obj: SlowDownReqInput,
- request: Optional[fastapi.Request] = None,
- ):
- self.auto_create_handle_loop()
- await self.slow_down_communicator(obj)
-
async def open_session(
self, obj: OpenSessionReqInput, request: Optional[fastapi.Request] = None
):
@@ -1233,6 +931,8 @@ async def open_session(
elif obj.session_id in self.session_futures:
return None
+ if self.server_args.tokenizer_worker_num > 1:
+ obj = MultiTokenizerWrapper(self.worker_id, obj)
self.send_to_scheduler.send_pyobj(obj)
self.session_futures[obj.session_id] = asyncio.Future()
@@ -1245,30 +945,6 @@ async def close_session(
):
await self.send_to_scheduler.send_pyobj(obj)
- async def get_internal_state(self) -> List[Dict[Any, Any]]:
- req = GetInternalStateReq()
- responses: List[GetInternalStateReqOutput] = (
- await self.get_internal_state_communicator(req)
- )
- # Many DP ranks
- return [res.internal_state for res in responses]
-
- async def set_internal_state(
- self, obj: SetInternalStateReq
- ) -> SetInternalStateReqOutput:
- responses: List[SetInternalStateReqOutput] = (
- await self.set_internal_state_communicator(obj)
- )
- return [res.internal_state for res in responses]
-
- async def get_load(self) -> dict:
- # TODO(lsyin): fake load report server
- if not self.current_load_lock.locked():
- async with self.current_load_lock:
- internal_state = await self.get_internal_state()
- self.current_load = internal_state[0]["load"]
- return {"load": self.current_load}
-
def get_log_request_metadata(self):
max_length = None
skip_names = None
@@ -1337,6 +1013,12 @@ def configure_logging(self, obj: ConfigureLoggingReq):
logging.info(f"Config logging: {obj=}")
self.log_request_metadata = self.get_log_request_metadata()
+ async def freeze_gc(self):
+ """Send a freeze_gc message to the scheduler first, then freeze locally."""
+ self.send_to_scheduler.send_pyobj(FreezeGCReq())
+ freeze_gc("Tokenizer Manager")
+ return None
+
def create_abort_task(self, obj: GenerateReqInput):
# Abort the request if the client is disconnected.
async def abort_request():
@@ -1504,7 +1186,6 @@ async def sigterm_watchdog(self):
async def handle_loop(self):
"""The event loop that handles requests"""
-
while True:
recv_obj = await self.recv_from_detokenizer.recv_pyobj()
self._result_dispatcher(recv_obj)
@@ -1524,9 +1205,12 @@ def _handle_batch_output(
)
continue
+ origin_rid = rid
+ if self.server_args.tokenizer_worker_num > 1:
+ origin_rid = get_origin_rid(rid)
# Build meta_info and return value
meta_info = {
- "id": rid,
+ "id": origin_rid,
"finish_reason": recv_obj.finished_reasons[i],
"prompt_tokens": recv_obj.prompt_tokens[i],
"weight_version": self.server_args.weight_version,
@@ -1832,6 +1516,9 @@ def _handle_abort_req(self, recv_obj):
if is_health_check_generate_req(recv_obj):
return
state = self.rid_to_state[recv_obj.rid]
+ origin_rid = recv_obj.rid
+ if self.server_args.tokenizer_worker_num > 1:
+ origin_rid = get_origin_rid(origin_rid)
state.finished = True
if recv_obj.finished_reason:
out = {
@@ -1844,7 +1531,7 @@ def _handle_abort_req(self, recv_obj):
out = {
"text": "",
"meta_info": {
- "id": recv_obj.rid,
+ "id": origin_rid,
"finish_reason": {
"type": "abort",
"message": "Abort before prefill",
@@ -2024,47 +1711,6 @@ def running_phase_sigquit_handler(self, signum=None, frame=None):
kill_process_tree(os.getpid())
-T = TypeVar("T")
-
-
-class _Communicator(Generic[T]):
- """Note: The communicator now only run up to 1 in-flight request at any time."""
-
- def __init__(self, sender, fan_out: int):
- self._sender = sender
- self._fan_out = fan_out
- self._result_event: Optional[asyncio.Event] = None
- self._result_values: Optional[List[T]] = None
- self._ready_queue: Deque[asyncio.Future] = deque()
-
- async def __call__(self, obj):
- ready_event = asyncio.Event()
- if self._result_event is not None or len(self._ready_queue) > 0:
- self._ready_queue.append(ready_event)
- await ready_event.wait()
- assert self._result_event is None
- assert self._result_values is None
-
- if obj:
- self._sender.send_pyobj(obj)
-
- self._result_event = asyncio.Event()
- self._result_values = []
- await self._result_event.wait()
- result_values = self._result_values
- self._result_event = self._result_values = None
-
- if len(self._ready_queue) > 0:
- self._ready_queue.popleft().set()
-
- return result_values
-
- def handle_recv(self, recv_obj: T):
- self._result_values.append(recv_obj)
- if len(self._result_values) == self._fan_out:
- self._result_event.set()
-
-
# Note: request abort handling logic
# We should handle all of the following cases correctly.
#
diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
index 77dac1ea6c6..1cdc48c2591 100644
--- a/python/sglang/srt/managers/tp_worker.py
+++ b/python/sglang/srt/managers/tp_worker.py
@@ -12,10 +12,11 @@
# limitations under the License.
# ==============================================================================
"""A tensor parallel worker."""
+from __future__ import annotations
import logging
import threading
-from typing import Optional, Tuple, Union
+from typing import TYPE_CHECKING, Optional, Tuple, Union
import torch
@@ -45,6 +46,9 @@
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed
+if TYPE_CHECKING:
+ from sglang.srt.managers.cache_controller import LayerDoneCounter
+
logger = logging.getLogger(__name__)
@@ -78,6 +82,11 @@ def __init__(
if not is_draft_worker
else server_args.speculative_draft_model_path
),
+ model_revision=(
+ server_args.revision
+ if not is_draft_worker
+ else server_args.speculative_draft_model_revision
+ ),
is_draft_model=is_draft_worker,
)
@@ -92,6 +101,7 @@ def __init__(
pp_rank=pp_rank,
pp_size=server_args.pp_size,
nccl_port=nccl_port,
+ dp_rank=dp_rank,
server_args=server_args,
is_draft_worker=is_draft_worker,
req_to_token_pool=req_to_token_pool,
@@ -136,7 +146,7 @@ def __init__(
assert self.max_running_requests > 0, "max_running_request is zero"
self.max_queued_requests = server_args.max_queued_requests
assert (
- self.max_running_requests > 0
+ self.max_queued_requests > 0
), "max_queued_requests is zero. We need to be at least 1 to schedule a request."
self.max_req_len = min(
self.model_config.context_len - 1,
@@ -161,10 +171,10 @@ def __init__(
self.hicache_layer_transfer_counter = None
- def register_hicache_layer_transfer_counter(self, counter):
+ def register_hicache_layer_transfer_counter(self, counter: LayerDoneCounter):
self.hicache_layer_transfer_counter = counter
- def set_hicache_consumer(self, consumer_index):
+ def set_hicache_consumer(self, consumer_index: int):
if self.hicache_layer_transfer_counter is not None:
self.hicache_layer_transfer_counter.set_consumer(consumer_index)
@@ -224,6 +234,9 @@ def forward_batch_generation(
) -> Tuple[
Union[LogitsProcessorOutput, torch.Tensor], Optional[torch.Tensor], bool
]:
+ # update the consumer index of hicache to the running batch
+ self.set_hicache_consumer(model_worker_batch.hicache_consumer_index)
+
forward_batch = ForwardBatch.init_new(model_worker_batch, self.model_runner)
pp_proxy_tensors = None
diff --git a/python/sglang/srt/managers/tp_worker_overlap_thread.py b/python/sglang/srt/managers/tp_worker_overlap_thread.py
index 674a941955c..e72d4fb6e3f 100644
--- a/python/sglang/srt/managers/tp_worker_overlap_thread.py
+++ b/python/sglang/srt/managers/tp_worker_overlap_thread.py
@@ -12,13 +12,14 @@
# limitations under the License.
# ==============================================================================
"""A tensor parallel worker."""
+from __future__ import annotations
import dataclasses
import logging
import signal
import threading
from queue import Queue
-from typing import Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
import psutil
import torch
@@ -38,6 +39,9 @@
from sglang.srt.utils import DynamicGradMode, get_compiler_backend
from sglang.utils import get_exception_traceback
+if TYPE_CHECKING:
+ from sglang.srt.managers.cache_controller import LayerDoneCounter
+
logger = logging.getLogger(__name__)
@@ -79,7 +83,7 @@ def __init__(
)
# Launch threads
- self.input_queue = Queue()
+ self.input_queue = Queue[Tuple[ModelWorkerBatch, int, torch.Event]]()
self.output_queue = Queue()
self.forward_stream = torch.get_device_module(self.device).Stream()
self.forward_thread = threading.Thread(
@@ -93,13 +97,9 @@ def __init__(
self.hicache_layer_transfer_counter = None
- def register_hicache_layer_transfer_counter(self, counter):
+ def register_hicache_layer_transfer_counter(self, counter: LayerDoneCounter):
self.hicache_layer_transfer_counter = counter
- def set_hicache_consumer(self, consumer_index):
- if self.hicache_layer_transfer_counter is not None:
- self.hicache_layer_transfer_counter.set_consumer(consumer_index)
-
def get_worker_info(self):
return self.worker.get_worker_info()
@@ -147,7 +147,7 @@ def forward_thread_func(self):
@DynamicGradMode()
def forward_thread_func_(self):
batch_pt = 0
- batch_lists = [None] * 2
+ batch_lists: List = [None] * 2
while True:
model_worker_batch, future_token_ids_ct, sync_event = self.input_queue.get()
@@ -169,8 +169,6 @@ def forward_thread_func_(self):
input_ids = model_worker_batch.input_ids
resolve_future_token_ids(input_ids, self.future_token_ids_map)
- # update the consumer index of hicache to the running batch
- self.set_hicache_consumer(model_worker_batch.hicache_consumer_index)
# Run forward
logits_output, next_token_ids, can_run_cuda_graph = (
self.worker.forward_batch_generation(
diff --git a/python/sglang/srt/managers/utils.py b/python/sglang/srt/managers/utils.py
index de83c459086..7b7369150ec 100644
--- a/python/sglang/srt/managers/utils.py
+++ b/python/sglang/srt/managers/utils.py
@@ -35,7 +35,14 @@ def validate_input_length(
"the max context length. Truncated. "
f"{len(req.origin_input_ids)=}, {max_req_input_len=}."
)
- req.origin_input_ids = req.origin_input_ids[:max_req_input_len]
+ input_len = len(req.origin_input_ids)
+ num_to_truncate = input_len - max_req_input_len
+ trunc_first = num_to_truncate // 2
+ trunc_last = num_to_truncate - trunc_first
+ req.origin_input_ids = (
+ req.origin_input_ids[: input_len // 2 - trunc_first]
+ + req.origin_input_ids[input_len // 2 + trunc_last :]
+ )
return None
else:
error_msg = (
diff --git a/python/sglang/srt/mem_cache/allocator.py b/python/sglang/srt/mem_cache/allocator.py
index 64c2fe3186b..497331673d3 100644
--- a/python/sglang/srt/mem_cache/allocator.py
+++ b/python/sglang/srt/mem_cache/allocator.py
@@ -283,7 +283,7 @@ def clear(self):
self.swa_attn_allocator.clear()
self.full_attn_allocator.clear()
self.full_to_swa_index_mapping.fill_(0)
- self.is_in_free_group = False
+ self.is_not_in_free_group = True
self.free_group = []
@@ -434,15 +434,12 @@ def __init__(
device: str,
kvcache: KVCache,
need_sort: bool,
- max_num_extend_tokens: int,
):
super().__init__(size, page_size, dtype, device, kvcache, need_sort)
self.num_pages = size // page_size
- self.max_num_extend_tokens_next_power_of_2 = next_power_of_2(
- max_num_extend_tokens
- )
self.debug_mode = get_bool_env_var("SGLANG_DEBUG_MEMORY_POOL")
self.ret_values = torch.empty((), dtype=torch.int64, device=self.device)
+ self.seen_max_num_extend_tokens_next_power_of_2 = 1
self.clear()
def alloc(self, need_size: int):
@@ -480,6 +477,11 @@ def alloc_extend(
(last_loc + 1) % self.page_size == prefix_lens % self.page_size
)
+ self.seen_max_num_extend_tokens_next_power_of_2 = max(
+ self.seen_max_num_extend_tokens_next_power_of_2,
+ next_power_of_2(extend_num_tokens),
+ )
+
bs = len(prefix_lens)
if self.need_sort and extend_num_tokens // self.page_size + bs + 1 > len(
self.free_pages
@@ -498,7 +500,7 @@ def alloc_extend(
self.ret_values,
next_power_of_2(bs),
self.page_size,
- self.max_num_extend_tokens_next_power_of_2,
+ self.seen_max_num_extend_tokens_next_power_of_2,
)
if self.debug_mode:
diff --git a/python/sglang/srt/mem_cache/allocator_ascend.py b/python/sglang/srt/mem_cache/allocator_ascend.py
index 94bbaafebda..2af138a6cb7 100644
--- a/python/sglang/srt/mem_cache/allocator_ascend.py
+++ b/python/sglang/srt/mem_cache/allocator_ascend.py
@@ -66,17 +66,6 @@ def alloc_extend_kernel_ascend(
class AscendPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator):
- def __init__(
- self,
- size: int,
- page_size: int,
- dtype: torch.dtype,
- device: str,
- kvcache: KVCache,
- need_sort: bool,
- ):
- super().__init__(size, page_size, dtype, device, kvcache, need_sort, 1)
-
def alloc_extend(
self,
prefix_lens: torch.Tensor,
diff --git a/python/sglang/srt/mem_cache/chunk_cache.py b/python/sglang/srt/mem_cache/chunk_cache.py
index 88d923b4605..1a576bfa2dd 100644
--- a/python/sglang/srt/mem_cache/chunk_cache.py
+++ b/python/sglang/srt/mem_cache/chunk_cache.py
@@ -47,7 +47,7 @@ def cache_finished_req(self, req: Req):
self.req_to_token_pool.free(req.req_pool_idx)
self.token_to_kv_pool_allocator.free(kv_indices)
- def cache_unfinished_req(self, req: Req):
+ def cache_unfinished_req(self, req: Req, chunked=False):
kv_indices = self.req_to_token_pool.req_to_token[
req.req_pool_idx, : len(req.fill_ids)
]
diff --git a/python/sglang/srt/mem_cache/hicache_storage.py b/python/sglang/srt/mem_cache/hicache_storage.py
index 90a468cc36c..6ec077db58c 100644
--- a/python/sglang/srt/mem_cache/hicache_storage.py
+++ b/python/sglang/srt/mem_cache/hicache_storage.py
@@ -2,6 +2,7 @@
import logging
import os
from abc import ABC, abstractmethod
+from dataclasses import dataclass
from typing import Any, List, Optional
import torch
@@ -9,12 +10,6 @@
logger = logging.getLogger(__name__)
-from sglang.srt.distributed import (
- get_tensor_model_parallel_rank,
- get_tensor_model_parallel_world_size,
-)
-
-
def get_hash_str(token_ids: List[int], prior_hash: str = None) -> str:
hasher = hashlib.sha256()
@@ -27,6 +22,16 @@ def get_hash_str(token_ids: List[int], prior_hash: str = None) -> str:
return hasher.hexdigest()
+@dataclass
+class HiCacheStorageConfig:
+ tp_rank: int
+ tp_size: int
+ is_mla_model: bool
+ is_page_first_layout: bool
+ model_name: Optional[str]
+ extra_config: Optional[dict] = None
+
+
class HiCacheStorage(ABC):
"""
HiCacheStorage is a class that provides a generic key-value interface for storing and retrieving KV cache.
@@ -55,7 +60,7 @@ def batch_get(
keys: List[str],
target_locations: Optional[Any] = None,
target_sizes: Optional[Any] = None,
- ) -> List[torch.Tensor | None]:
+ ) -> List[torch.Tensor | None] | int:
"""
Retrieve values for multiple keys.
Returns a list of tensors or None for each key.
@@ -91,27 +96,53 @@ def batch_set(
pass
@abstractmethod
- def exists(self, key: str) -> bool | dict:
+ def exists(self, key: str) -> bool:
"""
Check if the key exists in the storage.
Returns True if the key exists, False otherwise.
"""
pass
+ def batch_exists(self, keys: List[str]) -> int:
+ """
+ Check if the keys exist in the storage.
+ return the number of consecutive existing keys from the start.
+ Can be overridden by subclasses for more efficient implementation.
+ """
+ for i in range(len(keys)):
+ if not self.exists(keys[i]):
+ return i
+ return len(keys)
+
+ def get_stats(self):
+ return None
+
class HiCacheFile(HiCacheStorage):
- def __init__(self, file_path: str = "/tmp/hicache"):
+ def __init__(
+ self, storage_config: HiCacheStorageConfig, file_path: str = "/tmp/hicache"
+ ):
self.file_path = os.getenv("SGLANG_HICACHE_FILE_BACKEND_STORAGE_DIR", file_path)
- tp_rank = get_tensor_model_parallel_rank()
- tp_size = get_tensor_model_parallel_world_size()
- self.tp_suffix = f"_{tp_rank}_{tp_size}" if tp_size > 1 else ""
+
+ tp_rank, tp_size, model_name, is_mla_model = (
+ storage_config.tp_rank,
+ storage_config.tp_size,
+ storage_config.model_name,
+ storage_config.is_mla_model,
+ )
+ model_name = "-".join(model_name.split("/")) if model_name else ""
+ if is_mla_model:
+ self.config_suffix = f"_{model_name}"
+ else:
+ self.config_suffix = f"_{model_name}_{tp_rank}_{tp_size}"
+
if not os.path.exists(self.file_path) and tp_rank == 0:
os.makedirs(self.file_path)
logger.info(f"Created HiCacheFile storage directory at {self.file_path}")
def _get_suffixed_key(self, key: str) -> str:
- return key + self.tp_suffix
+ return key + self.config_suffix
def get(
self,
@@ -122,13 +153,11 @@ def get(
key = self._get_suffixed_key(key)
tensor_path = os.path.join(self.file_path, f"{key}.bin")
try:
- # Load directly into target_location's memory buffer
- with open(tensor_path, "rb") as f:
- target_location.set_(
- torch.frombuffer(f.read(), dtype=target_location.dtype)
- .reshape(target_location.shape)
- .untyped_storage()
- )
+ expected = target_location.numel() * target_location.element_size()
+ with open(tensor_path, "rb", buffering=0) as f:
+ buf = memoryview(target_location.view(torch.uint8).contiguous().numpy())
+ if f.readinto(buf) != expected:
+ raise IOError(f"Short read for {key}")
return target_location
except FileNotFoundError:
logger.warning(f"Failed to fetch {key} from HiCacheFile storage.")
@@ -154,11 +183,12 @@ def set(
target_location: Optional[Any] = None,
target_sizes: Optional[Any] = None,
) -> bool:
- key = self._get_suffixed_key(key)
- tensor_path = os.path.join(self.file_path, f"{key}.bin")
if self.exists(key):
logger.debug(f"Key {key} already exists. Skipped.")
return True
+
+ key = self._get_suffixed_key(key)
+ tensor_path = os.path.join(self.file_path, f"{key}.bin")
try:
value.contiguous().view(dtype=torch.uint8).numpy().tofile(tensor_path)
return True
@@ -183,21 +213,14 @@ def exists(self, key: str) -> bool:
tensor_path = os.path.join(self.file_path, f"{key}.bin")
return os.path.exists(tensor_path)
- def delete(self, key: str) -> None:
- key = self._get_suffixed_key(key)
- tensor_path = os.path.join(self.file_path, f"{key}.bin")
- try:
- os.remove(tensor_path)
- except FileNotFoundError:
- logger.warning(f"Key {key} does not exist. Cannot delete.")
- return
-
- def clear(self) -> None:
+ def clear(self) -> bool:
try:
for filename in os.listdir(self.file_path):
file_path = os.path.join(self.file_path, filename)
if os.path.isfile(file_path):
os.remove(file_path)
logger.info("Cleared all entries in HiCacheFile storage.")
+ return True
except Exception as e:
logger.error(f"Failed to clear HiCacheFile storage: {e}")
+ return False
diff --git a/python/sglang/srt/mem_cache/hip_offload_kv_pool_mha.py b/python/sglang/srt/mem_cache/hip_offload_kv_pool_mha.py
new file mode 100644
index 00000000000..9ba68c55cc4
--- /dev/null
+++ b/python/sglang/srt/mem_cache/hip_offload_kv_pool_mha.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
+
+import torch
+from torch import Tensor
+
+from sglang.srt.layers.radix_attention import RadixAttention
+from sglang.srt.mem_cache.memory_pool import KVCache
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+
+if TYPE_CHECKING:
+ from hip_attn.v1_2 import HiPAttentionConfig, HiPOffloadCache
+
+logger = logging.getLogger(__name__)
+
+
+class MHATokenToHiPOffloadKVPool(KVCache):
+
+ def __init__(
+ self,
+ max_token_size: int,
+ max_mask_cache_factor: Union[float, List[float]],
+ max_mask_cache_size: Optional[Union[int, List[int]]],
+ max_sa_cache_factor: Union[float, List[float]],
+ max_sa_cache_size: Optional[Union[int, List[int]]],
+ dtype: torch.dtype,
+ head_num: int,
+ head_dim: int,
+ layer_num: int,
+ device: torch.device,
+ hip_config: HiPAttentionConfig,
+ chunked_attention_size: int = 0,
+ irope_offset: int = 0,
+ irope_interval: int = 0,
+ enable_memory_saver: bool = False,
+ ):
+ super().__init__(
+ size=max_token_size,
+ page_size=1,
+ dtype=dtype,
+ layer_num=layer_num,
+ device=device,
+ enable_memory_saver=enable_memory_saver,
+ )
+ self.size = max_token_size
+ self.dtype = dtype
+ self.device = device
+
+ assert isinstance(device, torch.device)
+ assert device.index is not None
+
+ from hip_attn.v1_2 import HiPModelOffloadCache
+
+ self.offload_cache = HiPModelOffloadCache(
+ max_token_size=max_token_size,
+ max_mask_cache_factor=max_mask_cache_factor,
+ max_mask_cache_token_size=max_mask_cache_size,
+ max_sa_cache_factor=max_sa_cache_factor,
+ max_sa_cache_token_size=max_sa_cache_size,
+ dtype=dtype,
+ head_num=head_num,
+ head_dim=head_dim,
+ layer_num=layer_num,
+ device=device,
+ hip_config=hip_config,
+ chunked_attention_size=chunked_attention_size,
+ irope_offset=irope_offset,
+ irope_interval=irope_interval,
+ )
+
+ def get_key_buffer(self, layer_id: int):
+ raise NotImplementedError()
+
+ def get_value_buffer(self, layer_id: int):
+ raise NotImplementedError()
+
+ def get_kv_buffer(self, layer_id: int) -> Tuple[HiPOffloadCache, Any]:
+ return self.offload_cache.get_kv_buffer(layer_id)
+
+ def get_fetched_prefix_kv_buffer(
+ self,
+ layer_id: int,
+ extend_seq_lens: Tensor,
+ extend_seq_lens_cpu: List[int],
+ # you need to pass KV for extend
+ cache_k: Tensor,
+ cache_v: Tensor,
+ ) -> Tuple[Tensor, Tensor, Any]:
+ return self.offload_cache.get_fetched_prefix_kv_buffer(
+ layer_id,
+ cache_k=cache_k,
+ cache_v=cache_v,
+ extend_seq_lens=extend_seq_lens,
+ extend_seq_lens_cpu=extend_seq_lens_cpu,
+ )
+
+ def set_kv_buffer(
+ self,
+ layer: RadixAttention,
+ table: torch.Tensor,
+ cache_k: torch.Tensor,
+ cache_v: torch.Tensor,
+ async_copy: bool = False,
+ push_to_gpu_cache: bool = False,
+ ):
+ self.offload_cache.set_kv_buffer(
+ layer.layer_id, table, cache_k, cache_v, async_copy, push_to_gpu_cache
+ )
+
+ def get_flat_data(self, indices):
+ raise NotImplementedError()
+
+ def transfer(self, indices, flat_data):
+ raise NotImplementedError()
+
+ def transfer_per_layer(self, indices, flat_data, layer_id):
+ raise NotImplementedError()
+
+ def on_model_start(self, forward_batch: ForwardBatch):
+ assert forward_batch.token_to_kv_pool == self
+
+ self.offload_cache.on_model_start(
+ forward_batch.forward_mode.is_extend(),
+ forward_batch.batch_size,
+ forward_batch.req_to_token_pool.req_to_token,
+ forward_batch.req_pool_indices,
+ forward_batch.extend_prefix_lens_cpu,
+ forward_batch.extend_seq_lens_cpu,
+ )
+
+ def on_model_end(self, forward_batch: ForwardBatch):
+ assert forward_batch.token_to_kv_pool == self
+
+ self.offload_cache.on_model_end(
+ forward_batch.forward_mode.is_extend(),
+ )
+
+ def on_layer_start(self, forward_batch: ForwardBatch, layer_id: int):
+ assert forward_batch.token_to_kv_pool == self
+
+ self.offload_cache.on_layer_start(
+ layer_id,
+ forward_batch.forward_mode.is_extend(),
+ forward_batch.batch_size,
+ forward_batch.req_to_token_pool.req_to_token,
+ forward_batch.req_pool_indices,
+ forward_batch.extend_prefix_lens_cpu,
+ forward_batch.extend_seq_lens_cpu,
+ )
+
+ def on_layer_end(self, forward_batch: ForwardBatch, layer_id: int):
+ assert forward_batch.token_to_kv_pool == self
+
+ self.offload_cache.on_layer_end(
+ layer_id,
+ forward_batch.forward_mode.is_extend(),
+ )
+
+ def is_online_cache_update_enabled(self):
+ return self.offload_cache.is_online_cache_update_enabled()
diff --git a/python/sglang/srt/mem_cache/hiradix_cache.py b/python/sglang/srt/mem_cache/hiradix_cache.py
index d4ff703ba18..3b00e4619da 100644
--- a/python/sglang/srt/mem_cache/hiradix_cache.py
+++ b/python/sglang/srt/mem_cache/hiradix_cache.py
@@ -20,6 +20,7 @@
MLATokenToKVPoolHost,
)
from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
+from sglang.srt.metrics.collector import StorageMetricsCollector
logger = logging.getLogger(__name__)
@@ -37,8 +38,11 @@ def __init__(
hicache_write_policy: str,
hicache_io_backend: str,
hicache_mem_layout: str,
+ enable_metrics: bool,
hicache_storage_backend: Optional[str] = None,
hicache_storage_prefetch_policy: Optional[str] = "best_effort",
+ model_name: Optional[str] = None,
+ storage_backend_extra_config: Optional[str] = None,
):
if hicache_io_backend == "direct":
@@ -71,6 +75,8 @@ def __init__(
self.tp_group = tp_cache_group
self.tp_world_size = torch.distributed.get_world_size(group=self.tp_group)
self.enable_storage = hicache_storage_backend is not None
+ self.enable_storage_metrics = self.enable_storage and enable_metrics
+
# todo: customizable storage prefetch threshold and timeout
self.prefetch_threshold = 256
self.prefetch_timeout = 3 # seconds
@@ -87,7 +93,17 @@ def __init__(
io_backend=hicache_io_backend,
storage_backend=hicache_storage_backend,
prefetch_threshold=self.prefetch_threshold,
+ model_name=model_name,
+ storage_backend_extra_config=storage_backend_extra_config,
)
+ if self.enable_storage_metrics:
+ # TODO: support pp
+ labels = {
+ "storage_backend": hicache_storage_backend,
+ "tp_rank": self.cache_controller.tp_rank,
+ "dp_rank": self.cache_controller.dp_rank,
+ }
+ self.metrics_collector = StorageMetricsCollector(labels=labels)
# record the nodes with ongoing write through
self.ongoing_write_through = {}
@@ -98,10 +114,7 @@ def __init__(
self.ongoing_backup = {}
# todo: dynamically adjust the threshold
self.write_through_threshold = (
- 1 if hicache_write_policy == "write_through" else 3
- )
- self.write_through_threshold_storage = (
- 1 if hicache_write_policy == "write_through" else 3
+ 1 if hicache_write_policy == "write_through" else 2
)
self.load_back_threshold = 10
super().__init__(
@@ -121,6 +134,28 @@ def get_height(self, node: TreeNode):
height += 1
return height
+ def clear_storage_backend(self) -> bool:
+ if self.enable_storage:
+ try:
+ # Check if the storage backend has a clear method (for nixl backends)
+ if hasattr(self.cache_controller.storage_backend, "clear"):
+ self.cache_controller.storage_backend.clear()
+ logger.info(
+ "Hierarchical cache storage backend cleared successfully!"
+ )
+ return True
+ else:
+ logger.warning(
+ f"Storage backend {type(self.cache_controller.storage_backend).__name__} does not support clear operation."
+ )
+ return False
+ except Exception as e:
+ logger.error(f"Failed to clear hierarchical cache storage backend: {e}")
+ return False
+ else:
+ logger.warning("Hierarchical cache storage backend is not enabled.")
+ return False
+
def write_backup(self, node: TreeNode, write_back=False):
host_indices = self.cache_controller.write(
device_indices=node.value,
@@ -151,8 +186,9 @@ def write_backup_storage(self, node: TreeNode):
self.ongoing_backup[operation_id] = node
node.protect_host()
- def inc_hit_count(self, node: TreeNode):
- if self.cache_controller.write_policy == "write_back":
+ def _inc_hit_count(self, node: TreeNode, chunked=False):
+ # skip the hit count update for chunked requests
+ if self.cache_controller.write_policy == "write_back" or chunked:
return
node.hit_count += 1
@@ -160,51 +196,62 @@ def inc_hit_count(self, node: TreeNode):
if node.hit_count >= self.write_through_threshold:
# write to host if the node is not backuped
self.write_backup(node)
- else:
- if (
- self.enable_storage
- and (not node.backuped_storage)
- and node.hit_count >= self.write_through_threshold_storage
- ):
- # if the node is backuped on host memory but not on storage
- self.write_backup_storage(node)
def writing_check(self, write_back=False):
if write_back:
# blocking till all write back complete
while len(self.ongoing_write_through) > 0:
- ack_id = self.cache_controller.ack_write_queue.get()
- del self.ongoing_write_through[ack_id]
+ for _, finish_event, ack_list in self.cache_controller.ack_write_queue:
+ finish_event.synchronize()
+ for ack_id in ack_list:
+ del self.ongoing_write_through[ack_id]
+ self.cache_controller.ack_write_queue.clear()
+ assert len(self.ongoing_write_through) == 0
return
- queue_size = torch.tensor(
- self.cache_controller.ack_write_queue.qsize(), dtype=torch.int
- )
+
+ # NOTE: all ranks has the same ongoing_write_through, can skip sync if empty
+ if len(self.ongoing_write_through) == 0:
+ return
+
+ finish_count = 0
+ for _, finish_event, ack_list in self.cache_controller.ack_write_queue:
+ if not finish_event.query():
+ break
+ finish_count += 1
+ queue_size = torch.tensor(finish_count, dtype=torch.int, device="cpu")
if self.tp_world_size > 1:
- # synchrnoize TP workers to make the same update to radix cache
+ # synchronize TP workers to make the same update to radix cache
torch.distributed.all_reduce(
queue_size,
op=torch.distributed.ReduceOp.MIN,
group=self.tp_group,
)
- for _ in range(queue_size.item()):
- ack_id = self.cache_controller.ack_write_queue.get()
- self.dec_lock_ref(self.ongoing_write_through[ack_id])
- del self.ongoing_write_through[ack_id]
+
+ finish_count = int(queue_size.item())
+ while finish_count > 0:
+ _, finish_event, ack_list = self.cache_controller.ack_write_queue.pop(0)
+ finish_event.synchronize()
+ for ack_id in ack_list:
+ backuped_node = self.ongoing_write_through.pop(ack_id)
+ self.dec_lock_ref(backuped_node)
+ if self.enable_storage:
+ self.write_backup_storage(backuped_node)
+ finish_count -= 1
def loading_check(self):
- while not self.cache_controller.ack_load_queue.empty():
- try:
- ack_id = self.cache_controller.ack_load_queue.get_nowait()
- start_node, end_node = self.ongoing_load_back[ack_id]
- self.dec_lock_ref(end_node)
- while end_node != start_node:
- assert end_node.loading
- end_node.loading = False
- end_node = end_node.parent
- # clear the reference
- del self.ongoing_load_back[ack_id]
- except Exception:
+ finish_count = 0
+ for _, finish_event, ack_list in self.cache_controller.ack_load_queue:
+ if not finish_event.query():
+ # the KV cache loading is still ongoing
break
+ finish_count += 1
+ # no need to sync across TP workers as batch forwarding is synced
+ for ack_id in ack_list:
+ end_node = self.ongoing_load_back.pop(ack_id)
+ self.dec_lock_ref(end_node)
+
+ # ACK until all events are processed
+ del self.cache_controller.ack_load_queue[:finish_count]
def evictable_size(self):
return self.evictable_size_
@@ -329,12 +376,11 @@ def load_back(
# no sufficient GPU memory to load back KV caches
return None
- self.ongoing_load_back[last_hit_node.id] = (ancester_node, last_hit_node)
+ self.ongoing_load_back[last_hit_node.id] = last_hit_node
offset = 0
for node in nodes_to_load:
node.value = device_indices[offset : offset + len(node.host_value)]
offset += len(node.host_value)
- node.loading = True
self.evictable_size_ += len(device_indices)
self.inc_lock_ref(last_hit_node)
@@ -363,66 +409,72 @@ def init_load_back(
last_node,
)
- def ready_to_load_host_cache(self):
- producer_index = self.cache_controller.layer_done_counter.next_producer()
- self.load_cache_event.set()
- return producer_index
+ def ready_to_load_host_cache(self) -> int:
+ """
+ Notify the cache controller to start the KV cache loading.
+ Return the consumer index for the schedule batch manager to track.
+ """
+ return self.cache_controller.start_loading()
def check_hicache_events(self):
self.writing_check()
self.loading_check()
if self.enable_storage:
- self.check_revoked_prefetch()
- self.check_backup_progress()
-
- def check_revoked_prefetch(self):
- queue_size = torch.tensor(
- self.cache_controller.prefetch_revoke_queue.qsize(), dtype=torch.int
- )
- if self.tp_world_size > 1:
- # synchrnoize TP workers to make the same update to hiradix cache
- torch.distributed.all_reduce(
- queue_size,
- op=torch.distributed.ReduceOp.MIN,
- group=self.tp_group,
+ self.drain_storage_control_queues()
+ if self.enable_storage_metrics:
+ self.metrics_collector.log_storage_metrics(
+ self.cache_controller.storage_backend.get_stats()
)
- for _ in range(queue_size.item()):
- req_id = self.cache_controller.prefetch_revoke_queue.get()
- if req_id in self.ongoing_prefetch:
- last_host_node, token_ids, _, _ = self.ongoing_prefetch[req_id]
- last_host_node.release_host()
- del self.ongoing_prefetch[req_id]
- self.cache_controller.prefetch_tokens_occupied -= len(token_ids)
- else:
- # the revoked operation already got terminated
- pass
- def check_backup_progress(self):
- queue_size = torch.tensor(
- self.cache_controller.ack_backup_queue.qsize(), dtype=torch.int
+ def drain_storage_control_queues(self):
+ """
+ Combine prefetch revoke, backup ack, and host mem release checks
+ to minimize TP synchronization and Python overhead.
+ """
+ cc = self.cache_controller
+
+ qsizes = torch.tensor(
+ [
+ cc.prefetch_revoke_queue.qsize(),
+ cc.ack_backup_queue.qsize(),
+ cc.host_mem_release_queue.qsize(),
+ ],
+ dtype=torch.int,
)
if self.tp_world_size > 1:
- # synchrnoize TP workers to make the same update to hiradix cache
torch.distributed.all_reduce(
- queue_size,
- op=torch.distributed.ReduceOp.MIN,
- group=self.tp_group,
+ qsizes, op=torch.distributed.ReduceOp.MIN, group=self.tp_group
)
- for _ in range(queue_size.item()):
- ack_id, completed_tokens = self.cache_controller.ack_backup_queue.get()
- host_node = self.ongoing_backup[ack_id]
-
- if completed_tokens > 0:
- if completed_tokens < len(host_node.key):
- # backup is only partially successful, split the node
- new_node = self._split_node(
- host_node.key, host_node, completed_tokens
- )
- new_node.backuped_storage = True
- else:
- host_node.backuped_storage = True
- host_node.release_host()
- del self.ongoing_backup[ack_id]
+
+ n_revoke, n_backup, n_release = map(int, qsizes.tolist())
+
+ # process prefetch revokes
+ for _ in range(n_revoke):
+ req_id = cc.prefetch_revoke_queue.get()
+ info = self.ongoing_prefetch.pop(req_id, None)
+ if info is not None:
+ last_host_node, token_ids, _, _ = info
+ last_host_node.release_host()
+ cc.prefetch_tokens_occupied -= len(token_ids)
+ # else: the revoked operation already got terminated, nothing to do
+
+ # process backup acks
+ for _ in range(n_backup):
+ operation = cc.ack_backup_queue.get()
+ ack_id = operation.id
+ entry = self.ongoing_backup.pop(ack_id, None)
+ if entry is not None:
+ entry.release_host()
+ if self.enable_storage_metrics:
+ self.metrics_collector.log_backuped_tokens(operation.completed_tokens)
+
+ # release host memory
+ host_indices_list = []
+ for _ in range(n_release):
+ host_indices_list.append(cc.host_mem_release_queue.get())
+ if host_indices_list:
+ host_indices = torch.cat(host_indices_list, dim=0)
+ cc.mem_pool_host.free(host_indices)
def can_terminate_prefetch(self, operation: PrefetchOperation):
can_terminate = True
@@ -430,9 +482,12 @@ def can_terminate_prefetch(self, operation: PrefetchOperation):
if self.prefetch_stop_policy == "best_effort":
return can_terminate
- completed = (
- operation.completed_tokens == len(operation.hash_value) * self.page_size
- )
+ if len(operation.hash_value) == 0:
+ completed = False
+ else:
+ completed = (
+ operation.completed_tokens == len(operation.hash_value) * self.page_size
+ )
if self.prefetch_stop_policy == "wait_complete":
can_terminate = completed
@@ -444,15 +499,22 @@ def can_terminate_prefetch(self, operation: PrefetchOperation):
# unknown prefetch stop policy, just return True
return True
+ operation_terminated = operation.is_terminated()
if self.tp_world_size > 1:
- can_terminate = torch.tensor(can_terminate, dtype=torch.int)
+ states = torch.tensor(
+ [1 - int(can_terminate), int(operation_terminated)],
+ dtype=torch.int,
+ )
torch.distributed.all_reduce(
- can_terminate,
- op=torch.distributed.ReduceOp.MIN,
+ states,
+ op=torch.distributed.ReduceOp.MAX,
group=self.tp_group,
)
- can_terminate = bool(can_terminate.item())
-
+ can_terminate = states[0].item() == 0
+ operation_terminated = states[1].item() == 1
+ # the operation should be terminated if it is already terminated on any TP worker
+ # or it meets the termination condition on all TP workers
+ can_terminate = can_terminate or operation_terminated
return can_terminate
def check_prefetch_progress(self, req_id: str) -> bool:
@@ -479,7 +541,7 @@ def check_prefetch_progress(self, req_id: str) -> bool:
logger.debug(f"Prefetch {req_id} completed with {completed_tokens} tokens")
min_completed_tokens = completed_tokens
- if self.tp_world_size > 1 and self.prefetch_stop_policy != "wait_complete":
+ if self.tp_world_size > 1:
# synchrnoize TP workers to make the same update to hiradix cache
completed_tokens_tensor = torch.tensor(
min_completed_tokens, dtype=torch.int
@@ -502,13 +564,18 @@ def check_prefetch_progress(self, req_id: str) -> bool:
self.cache_controller.mem_pool_host.update_prefetch(written_indices)
self.cache_controller.mem_pool_host.free(host_indices[:matched_length])
- self.cache_controller.mem_pool_host.free(
+ self.cache_controller.append_host_mem_release(
host_indices[min_completed_tokens:completed_tokens]
)
last_host_node.release_host()
del self.ongoing_prefetch[req_id]
self.cache_controller.prefetch_tokens_occupied -= len(token_ids)
+ if self.enable_storage_metrics:
+ self.metrics_collector.log_prefetched_tokens(
+ min_completed_tokens - matched_length
+ )
+
return True
def match_prefix(self, key: List[int], **kwargs):
@@ -536,6 +603,8 @@ def match_prefix(self, key: List[int], **kwargs):
while last_node.evicted:
host_hit_length += len(last_node.host_value)
last_node = last_node.parent
+ while not last_host_node.backuped:
+ last_host_node = last_host_node.parent
return MatchResult(
device_indices=value,
@@ -556,7 +625,11 @@ def prefetch_from_storage(
len(new_input_tokens) % self.page_size
)
new_input_tokens = new_input_tokens[:prefetch_length]
- if not self.enable_storage or prefetch_length < self.prefetch_threshold:
+ if (
+ not self.enable_storage
+ or prefetch_length < self.prefetch_threshold
+ or self.cache_controller.prefetch_rate_limited()
+ ):
return
last_host_node.protect_host()
@@ -564,6 +637,10 @@ def prefetch_from_storage(
if host_indices is None:
self.evict_host(prefetch_length)
host_indices = self.cache_controller.mem_pool_host.alloc(prefetch_length)
+ if host_indices is None:
+ last_host_node.release_host()
+ # no sufficient host memory for prefetch
+ return
operation = self.cache_controller.prefetch(
req_id, host_indices, new_input_tokens, last_hash
)
@@ -642,7 +719,6 @@ def _split_node(self, key, child: TreeNode, split_len: int):
new_node.parent = child.parent
new_node.lock_ref = child.lock_ref
new_node.key = child.key[:split_len]
- new_node.loading = child.loading
new_node.hit_count = child.hit_count
# split value and host value if exists
@@ -663,11 +739,11 @@ def _split_node(self, key, child: TreeNode, split_len: int):
new_node.parent.children[self.get_child_key_fn(key)] = new_node
return new_node
- def _insert_helper(self, node: TreeNode, key: List, value):
- node.last_access_time = time.monotonic()
+ def insert(self, key: List, value, chunked=False):
if len(key) == 0:
return 0
+ node = self.root_node
child_key = self.get_child_key_fn(key)
total_prefix_length = 0
@@ -684,7 +760,7 @@ def _insert_helper(self, node: TreeNode, key: List, value):
self.token_to_kv_pool_host.update_synced(node.host_value)
self.evictable_size_ += len(node.value)
else:
- self.inc_hit_count(node)
+ self._inc_hit_count(node, chunked)
total_prefix_length += prefix_len
else:
# partial match, split the node
@@ -694,7 +770,7 @@ def _insert_helper(self, node: TreeNode, key: List, value):
self.token_to_kv_pool_host.update_synced(new_node.host_value)
self.evictable_size_ += len(new_node.value)
else:
- self.inc_hit_count(new_node)
+ self._inc_hit_count(new_node, chunked)
total_prefix_length += prefix_len
node = new_node
@@ -728,7 +804,7 @@ def _insert_helper(self, node: TreeNode, key: List, value):
last_hash = new_node.hash_value[-1]
if self.cache_controller.write_policy != "write_back":
- self.inc_hit_count(new_node)
+ self._inc_hit_count(new_node, chunked)
return total_prefix_length
def _collect_leaves_device(self):
@@ -755,3 +831,19 @@ def is_leaf(node):
if not cur_child.evicted:
stack.append(cur_child)
return ret_list
+
+ def release_aborted_request(self, rid: str):
+ if rid not in self.ongoing_prefetch:
+ return
+
+ last_host_node, token_ids, host_indices, operation = self.ongoing_prefetch[rid]
+ if operation.host_indices is None:
+ return
+
+ completed_tokens, _ = self.cache_controller.terminate_prefetch(operation)
+ if self.tp_world_size > 1:
+ torch.distributed.barrier(group=self.tp_group)
+ last_host_node.release_host()
+ del self.ongoing_prefetch[rid]
+ self.cache_controller.append_host_mem_release(host_indices[:completed_tokens])
+ self.cache_controller.prefetch_tokens_occupied -= len(token_ids)
diff --git a/python/sglang/srt/mem_cache/lora_radix_cache.py b/python/sglang/srt/mem_cache/lora_radix_cache.py
index fa562601253..32b115cb66d 100644
--- a/python/sglang/srt/mem_cache/lora_radix_cache.py
+++ b/python/sglang/srt/mem_cache/lora_radix_cache.py
@@ -183,7 +183,7 @@ def cache_finished_req(self, req: Req):
self.req_to_token_pool.free(req.req_pool_idx)
self.dec_lock_ref(req.last_node)
- def cache_unfinished_req(self, req: Req):
+ def cache_unfinished_req(self, req: Req, chunked=False):
"""Cache request when it is unfinished."""
if self.disable:
return
diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py
index 07d7f5234cd..4405b6c0b6b 100644
--- a/python/sglang/srt/mem_cache/memory_pool.py
+++ b/python/sglang/srt/mem_cache/memory_pool.py
@@ -13,6 +13,8 @@
limitations under the License.
"""
+from __future__ import annotations
+
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
"""
@@ -27,7 +29,7 @@
import abc
import logging
from contextlib import nullcontext
-from typing import Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
import numpy as np
import torch
@@ -36,12 +38,19 @@
from sglang.srt.constants import GPU_MEMORY_TYPE_KV_CACHE
from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.utils import get_bool_env_var, is_cuda, next_power_of_2
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+from sglang.srt.utils import get_bool_env_var, is_cuda, is_npu, next_power_of_2
+
+if TYPE_CHECKING:
+ from sglang.srt.managers.cache_controller import LayerDoneCounter
logger = logging.getLogger(__name__)
GB = 1024 * 1024 * 1024
_is_cuda = is_cuda()
+_is_npu = is_npu()
+if _is_npu:
+ import torch_npu
class ReqToTokenPool:
@@ -94,6 +103,207 @@ def clear(self):
self.free_slots = list(range(self.size))
+class MambaPool:
+ def __init__(
+ self,
+ size: int,
+ conv_dtype: torch.dtype,
+ ssm_dtype: torch.dtype,
+ num_mamba_layers: int,
+ conv_state_shape: Tuple[int, int],
+ temporal_state_shape: Tuple[int, int],
+ device: str,
+ speculative_num_draft_tokens: Optional[int] = None,
+ ):
+ conv_state = torch.zeros(
+ size=(num_mamba_layers, size + 1) + conv_state_shape,
+ dtype=conv_dtype,
+ device=device,
+ )
+ temporal_state = torch.zeros(
+ size=(num_mamba_layers, size + 1) + temporal_state_shape,
+ dtype=ssm_dtype,
+ device=device,
+ )
+ if speculative_num_draft_tokens is not None:
+ # Cache intermediate SSM states per draft token during target verify
+ # Shape: [num_layers, size + 1, speculative_num_draft_tokens, HV, K, V]
+ intermediate_ssm_state_cache = torch.empty(
+ size=(
+ num_mamba_layers,
+ size + 1,
+ speculative_num_draft_tokens,
+ temporal_state_shape[0],
+ temporal_state_shape[1],
+ temporal_state_shape[2],
+ ),
+ dtype=ssm_dtype,
+ device="cuda",
+ )
+ # Cache intermediate conv windows (last K-1 inputs) per draft token during target verify
+ # Shape: [num_layers, size + 1, speculative_num_draft_tokens, dim, K-1]
+ intermediate_conv_window_cache = torch.empty(
+ size=(
+ num_mamba_layers,
+ size + 1,
+ speculative_num_draft_tokens,
+ conv_state_shape[0],
+ conv_state_shape[1],
+ ),
+ dtype=conv_dtype,
+ device="cuda",
+ )
+ self.mamba_cache = (
+ conv_state,
+ temporal_state,
+ intermediate_ssm_state_cache,
+ intermediate_conv_window_cache,
+ )
+ else:
+ self.mamba_cache = (conv_state, temporal_state)
+ self.size = size
+ self.free_slots = list(range(size))
+ self.mem_usage = self.get_mamba_size() / GB
+ logger.info(
+ f"Mamba Cache is allocated. "
+ f"conv_state size: {conv_state.numel() * conv_state.itemsize / GB:.2f}GB, "
+ f"ssm_state size: {temporal_state.numel() * temporal_state.itemsize / GB:.2f}GB "
+ )
+
+ def get_mamba_params_all_layers(self):
+ return [self.mamba_cache[i] for i in range(len(self.mamba_cache))]
+
+ def get_mamba_params(self, layer_id: int):
+ return [self.mamba_cache[i][layer_id] for i in range(len(self.mamba_cache))]
+
+ def get_mamba_size(self):
+ return (
+ np.prod(self.mamba_cache[0].shape) * self.mamba_cache[0].dtype.itemsize
+ + np.prod(self.mamba_cache[1].shape) * self.mamba_cache[1].dtype.itemsize
+ )
+
+ def available_size(self):
+ return len(self.free_slots)
+
+ def alloc(self, need_size: int) -> Optional[List[int]]:
+ if need_size > len(self.free_slots):
+ return None
+
+ select_index = self.free_slots[:need_size]
+ self.free_slots = self.free_slots[need_size:]
+
+ return select_index
+
+ def free(self, free_index: Union[int, List[int]]):
+ if isinstance(free_index, (int,)):
+ self.free_slots.append(free_index)
+ else:
+ self.free_slots.extend(free_index)
+ self.mamba_cache[0][:, free_index] = self.mamba_cache[1][:, free_index] = 0
+
+ def clear(self):
+ self.free_slots = list(range(self.size))
+
+
+class HybridReqToTokenPool(ReqToTokenPool):
+ """A memory pool that maps a request to its token locations."""
+
+ def __init__(
+ self,
+ size: int,
+ max_context_len: int,
+ device: str,
+ enable_memory_saver: bool,
+ conv_dtype: torch.dtype,
+ ssm_dtype: torch.dtype,
+ mamba_layers: List[int],
+ conv_state_shape: Tuple[int, int],
+ temporal_state_shape: Tuple[int, int],
+ speculative_num_draft_tokens: int,
+ ):
+ super().__init__(
+ size=size,
+ max_context_len=max_context_len,
+ device=device,
+ enable_memory_saver=enable_memory_saver,
+ )
+
+ self.mamba_pool = MambaPool(
+ size,
+ conv_dtype,
+ ssm_dtype,
+ len(mamba_layers),
+ conv_state_shape,
+ temporal_state_shape,
+ device,
+ speculative_num_draft_tokens,
+ )
+ self.mamba_map = {layer_id: i for i, layer_id in enumerate(mamba_layers)}
+
+ self.device = device
+ self.req_index_to_mamba_index_mapping: torch.Tensor = torch.empty(
+ size, dtype=torch.int32, device=self.device
+ )
+
+ self.rid_to_mamba_index_mapping: Dict[str, int] = {}
+ self.mamba_index_to_rid_mapping: Dict[int, str] = {}
+
+ # For chunk prefill req, we do not need to allocate mamba cache,
+ # We could use allocated mamba cache instead.
+ def alloc(
+ self, need_size: int, reqs: Optional[List["Req"]] = None
+ ) -> Optional[List[int]]:
+ select_index = super().alloc(need_size)
+ if select_index == None:
+ return None
+
+ mamba_index = []
+ for req in reqs:
+ rid = req.rid
+ if rid in self.rid_to_mamba_index_mapping:
+ mid = self.rid_to_mamba_index_mapping[rid]
+ elif (mid := self.mamba_pool.alloc(1)) is not None:
+ mid = mid[0]
+ self.rid_to_mamba_index_mapping[rid] = mid
+ self.mamba_index_to_rid_mapping[mid] = rid
+ mamba_index.append(mid)
+ assert len(select_index) == len(
+ mamba_index
+ ), f"Not enough space for mamba cache, try to increase --max-mamba-cache-size."
+ self.req_index_to_mamba_index_mapping[select_index] = torch.tensor(
+ mamba_index, dtype=torch.int32, device=self.device
+ )
+ return select_index
+
+ def get_mamba_indices(self, req_indices: torch.Tensor) -> torch.Tensor:
+ return self.req_index_to_mamba_index_mapping[req_indices]
+
+ def get_mamba_params(self, layer_id: int):
+ assert layer_id in self.mamba_map
+ return self.mamba_pool.get_mamba_params(self.mamba_map[layer_id])
+
+ def get_mamba_params_all_layers(self):
+ return self.mamba_pool.get_mamba_params_all_layers()
+
+ # For chunk prefill, we can not free mamba cache, we need use it in the future
+ def free(self, free_index: Union[int, List[int]], free_mamba_cache: bool = True):
+ super().free(free_index)
+ if free_mamba_cache:
+ mamba_index = self.req_index_to_mamba_index_mapping[free_index]
+ mamba_index_list = mamba_index.tolist()
+ if isinstance(mamba_index_list, int):
+ mamba_index_list = [mamba_index_list]
+ self.mamba_pool.free(mamba_index_list)
+ for mid in mamba_index_list:
+ rid = self.mamba_index_to_rid_mapping[mid]
+ self.mamba_index_to_rid_mapping.pop(mid)
+ self.rid_to_mamba_index_mapping.pop(rid)
+
+ def clear(self):
+ super().clear()
+ self.mamba_pool.clear()
+
+
class KVCache(abc.ABC):
@abc.abstractmethod
def __init__(
@@ -127,6 +337,29 @@ def __init__(
# used for chunked cpu-offloading
self.cpu_offloading_chunk_size = 8192
+ # default state for optional layer-wise transfer control
+ self.layer_transfer_counter = None
+
+ def _finalize_allocation_log(self, num_tokens: int):
+ """Common logging and mem_usage computation for KV cache allocation.
+ Supports both tuple (K, V) size returns and single KV size returns.
+ """
+ kv_size_bytes = self.get_kv_size_bytes()
+ if isinstance(kv_size_bytes, tuple):
+ k_size, v_size = kv_size_bytes
+ k_size_GB = k_size / GB
+ v_size_GB = v_size / GB
+ logger.info(
+ f"KV Cache is allocated. #tokens: {num_tokens}, K size: {k_size_GB:.2f} GB, V size: {v_size_GB:.2f} GB"
+ )
+ self.mem_usage = k_size_GB + v_size_GB
+ else:
+ kv_size_GB = kv_size_bytes / GB
+ logger.info(
+ f"KV Cache is allocated. #tokens: {num_tokens}, KV size: {kv_size_GB:.2f} GB"
+ )
+ self.mem_usage = kv_size_GB
+
@abc.abstractmethod
def get_key_buffer(self, layer_id: int) -> torch.Tensor:
raise NotImplementedError()
@@ -149,7 +382,7 @@ def set_kv_buffer(
) -> None:
raise NotImplementedError()
- def register_layer_transfer_counter(self, layer_transfer_counter):
+ def register_layer_transfer_counter(self, layer_transfer_counter: LayerDoneCounter):
self.layer_transfer_counter = layer_transfer_counter
def get_cpu_copy(self, indices):
@@ -158,6 +391,90 @@ def get_cpu_copy(self, indices):
def load_cpu_copy(self, kv_cache_cpu, indices):
raise NotImplementedError()
+ def register_layer_transfer_counter(self, layer_transfer_counter):
+ self.layer_transfer_counter = layer_transfer_counter
+
+ def on_model_start(self, forward_batch: ForwardBatch):
+ pass
+
+ def on_model_end(self, forward_batch: ForwardBatch):
+ pass
+
+ def on_layer_start(self, forward_batch: ForwardBatch, layer_id: int):
+ pass
+
+ def on_layer_end(self, forward_batch: ForwardBatch, layer_id: int):
+ pass
+
+
+class TokenToKVPoolAllocator:
+ """An allocator managing the indices to kv cache data."""
+
+ def __init__(
+ self,
+ size: int,
+ dtype: torch.dtype,
+ device: str,
+ kvcache: KVCache,
+ ):
+ self.size = size
+ self.dtype = dtype
+ self.device = device
+ self.page_size = 1
+
+ self.free_slots = None
+ self.is_not_in_free_group = True
+ self.free_group = []
+ self.clear()
+
+ self._kvcache = kvcache
+
+ def available_size(self):
+ return len(self.free_slots)
+
+ def get_kvcache(self):
+ return self._kvcache
+
+ def alloc(self, need_size: int):
+ if need_size > len(self.free_slots):
+ return None
+
+ select_index = self.free_slots[:need_size]
+ self.free_slots = self.free_slots[need_size:]
+ return select_index
+
+ def free(self, free_index: torch.Tensor):
+ if free_index.numel() == 0:
+ return
+
+ if self.is_not_in_free_group:
+ self.free_slots = torch.cat((self.free_slots, free_index))
+ else:
+ self.free_group.append(free_index)
+
+ def free_group_begin(self):
+ self.is_not_in_free_group = False
+ self.free_group = []
+
+ def free_group_end(self):
+ self.is_not_in_free_group = True
+ if self.free_group:
+ self.free(torch.cat(self.free_group))
+
+ def backup_state(self):
+ return self.free_slots
+
+ def restore_state(self, free_slots):
+ self.free_slots = free_slots
+
+ def clear(self):
+ # The padded slot 0 is used for writing dummy outputs from padded tokens.
+ self.free_slots = torch.arange(
+ 1, self.size + 1, dtype=torch.int64, device=self.device
+ )
+ self.is_not_in_free_group = True
+ self.free_group = []
+
class MHATokenToKVPool(KVCache):
@@ -202,15 +519,9 @@ def __init__(
self._create_buffers()
- self.layer_transfer_counter = None
self.device_module = torch.get_device_module(self.device)
self.alt_stream = self.device_module.Stream() if _is_cuda else None
-
- k_size, v_size = self.get_kv_size_bytes()
- logger.info(
- f"KV Cache is allocated. #tokens: {size}, K size: {k_size / GB:.2f} GB, V size: {v_size / GB:.2f} GB"
- )
- self.mem_usage = (k_size + v_size) / GB
+ self._finalize_allocation_log(size)
def _create_buffers(self):
with self.memory_saver_adapter.region(GPU_MEMORY_TYPE_KV_CACHE):
@@ -349,7 +660,6 @@ def get_key_buffer(self, layer_id: int):
# same applies to get_value_buffer and get_kv_buffer
if self.layer_transfer_counter is not None:
self.layer_transfer_counter.wait_until(layer_id - self.start_layer)
-
return self._get_key_buffer(layer_id)
def _get_value_buffer(self, layer_id: int):
@@ -376,7 +686,7 @@ def set_kv_buffer(
v_scale: Optional[float] = None,
layer_id_override: Optional[int] = None,
):
- from sglang.srt.model_executor.graph_runner import get_is_capture_mode
+ from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
if layer_id_override is not None:
layer_id = layer_id_override
@@ -417,50 +727,119 @@ def move_kv_cache(self, tgt_loc: torch.Tensor, src_loc: torch.Tensor):
)
-class SWAKVPool(KVCache):
- """KV cache with separate pools for full and SWA attention layers."""
+class HybridLinearKVPool(KVCache):
+ """KV cache with separate pools for full and linear attention layers."""
def __init__(
self,
size: int,
- size_swa: int,
dtype: torch.dtype,
head_num: int,
head_dim: int,
- swa_attention_layer_ids: List[int],
full_attention_layer_ids: List[int],
enable_kvcache_transpose: bool,
device: str,
):
self.size = size
- self.size_swa = size_swa
self.dtype = dtype
self.device = device
- self.swa_layer_nums = len(swa_attention_layer_ids)
self.full_layer_nums = len(full_attention_layer_ids)
self.page_size = 1
# TODO MHATransposedTokenToKVPool if enable_kvcache_transpose is True
assert not enable_kvcache_transpose
- TokenToKVPoolClass = MHATokenToKVPool
- self.swa_kv_pool = TokenToKVPoolClass(
- size=size_swa,
+ self.full_kv_pool = MHATokenToKVPool(
+ size=size,
page_size=self.page_size,
dtype=dtype,
head_num=head_num,
head_dim=head_dim,
- layer_num=self.swa_layer_nums,
+ layer_num=self.full_layer_nums,
device=device,
enable_memory_saver=False,
)
- self.full_kv_pool = TokenToKVPoolClass(
+ self.full_attention_layer_id_mapping = {
+ id: i for i, id in enumerate(full_attention_layer_ids)
+ }
+ k_size, v_size = self.get_kv_size_bytes()
+ self.mem_usage = (k_size + v_size) / GB
+
+ def get_kv_size_bytes(self):
+ return self.full_kv_pool.get_kv_size_bytes()
+
+ def get_contiguous_buf_infos(self):
+ return self.full_kv_pool.get_contiguous_buf_infos()
+
+ def _transfer_full_attention_id(self, layer_id: int):
+ if layer_id not in self.full_attention_layer_id_mapping:
+ raise ValueError(
+ f"{layer_id=} not in full attention layers: {self.full_attention_layer_id_mapping.keys()}"
+ )
+ return self.full_attention_layer_id_mapping[layer_id]
+
+ def get_key_buffer(self, layer_id: int):
+ layer_id = self._transfer_full_attention_id(layer_id)
+ return self.full_kv_pool.get_key_buffer(layer_id)
+
+ def get_value_buffer(self, layer_id: int):
+ layer_id = self._transfer_full_attention_id(layer_id)
+ return self.full_kv_pool.get_value_buffer(layer_id)
+
+ def get_kv_buffer(self, layer_id: int):
+ layer_id = self._transfer_full_attention_id(layer_id)
+ return self.full_kv_pool.get_kv_buffer(layer_id)
+
+ def set_kv_buffer(
+ self,
+ layer: RadixAttention,
+ loc: torch.Tensor,
+ cache_k: torch.Tensor,
+ cache_v: torch.Tensor,
+ k_scale: float = 1.0,
+ v_scale: float = 1.0,
+ ):
+ layer_id = self._transfer_full_attention_id(layer.layer_id)
+ self.full_kv_pool.set_kv_buffer(
+ None,
+ loc,
+ cache_k,
+ cache_v,
+ k_scale,
+ v_scale,
+ layer_id_override=layer_id,
+ )
+
+
+class SWAKVPool(KVCache):
+ """KV cache with separate pools for full and SWA attention layers."""
+
+ def __init__(
+ self,
+ size: int,
+ size_swa: int,
+ swa_attention_layer_ids: List[int],
+ full_attention_layer_ids: List[int],
+ enable_kvcache_transpose: bool,
+ token_to_kv_pool_class: KVCache = MHATokenToKVPool,
+ **kwargs,
+ ):
+ self.size = size
+ self.size_swa = size_swa
+ self.swa_layer_nums = len(swa_attention_layer_ids)
+ self.full_layer_nums = len(full_attention_layer_ids)
+ kwargs["page_size"] = 1
+ kwargs["enable_memory_saver"] = False
+ # TODO MHATransposedTokenToKVPool if enable_kvcache_transpose is True
+ assert not enable_kvcache_transpose
+
+ self.swa_kv_pool = token_to_kv_pool_class(
+ size=size_swa,
+ layer_num=self.swa_layer_nums,
+ **kwargs,
+ )
+ self.full_kv_pool = token_to_kv_pool_class(
size=size,
- page_size=self.page_size,
- dtype=dtype,
- head_num=head_num,
- head_dim=head_dim,
layer_num=self.full_layer_nums,
- device=device,
- enable_memory_saver=False,
+ **kwargs,
)
self.layers_mapping: Dict[int, Tuple[int, bool]] = {}
for full_attn_layer_id, global_layer_id in enumerate(full_attention_layer_ids):
@@ -624,8 +1003,6 @@ def set_kv_buffer(
cache_k = cache_k.view(self.store_dtype)
cache_v = cache_v.view(self.store_dtype)
- import torch_npu
-
torch_npu._npu_reshape_and_cache(
key=cache_k,
value=cache_v,
@@ -767,13 +1144,7 @@ def __init__(
dtype=torch.uint64,
device=self.device,
)
- self.layer_transfer_counter = None
-
- kv_size = self.get_kv_size_bytes()
- logger.info(
- f"KV Cache is allocated. #tokens: {size}, KV size: {kv_size / GB:.2f} GB"
- )
- self.mem_usage = kv_size / GB
+ self._finalize_allocation_log(size)
def get_kv_size_bytes(self):
assert hasattr(self, "kv_buffer")
@@ -912,31 +1283,77 @@ def __init__(
with self.memory_saver_adapter.region(GPU_MEMORY_TYPE_KV_CACHE):
# The padded slot 0 is used for writing dummy outputs from padded tokens.
- self.kv_buffer = torch.zeros(
+ self.k_buffer = torch.zeros(
+ (
+ layer_num,
+ self.size // self.page_size + 1,
+ self.page_size,
+ 1,
+ self.kv_lora_rank,
+ ),
+ dtype=self.store_dtype,
+ device=self.device,
+ )
+ self.v_buffer = torch.zeros(
(
layer_num,
self.size // self.page_size + 1,
self.page_size,
- self.kv_lora_rank + self.qk_rope_head_dim,
+ 1,
+ self.qk_rope_head_dim,
),
dtype=self.store_dtype,
device=self.device,
)
- self.layer_transfer_counter = None
+ self._finalize_allocation_log(size)
- kv_size = self.get_kv_size_bytes()
- logger.info(
- f"KV Cache is allocated. #tokens: {size}, KV size: {kv_size / GB:.2f} GB"
+ def get_kv_size_bytes(self):
+ assert hasattr(self, "k_buffer")
+ assert hasattr(self, "v_buffer")
+ kv_size_bytes = 0
+ for k_cache in self.k_buffer:
+ kv_size_bytes += np.prod(k_cache.shape) * k_cache.dtype.itemsize
+ for v_cache in self.v_buffer:
+ kv_size_bytes += np.prod(v_cache.shape) * v_cache.dtype.itemsize
+ return kv_size_bytes
+
+ def get_kv_buffer(self, layer_id: int):
+ if self.layer_transfer_counter is not None:
+ self.layer_transfer_counter.wait_until(layer_id - self.start_layer)
+ return (
+ self.k_buffer[layer_id - self.start_layer],
+ self.v_buffer[layer_id - self.start_layer],
)
- self.mem_usage = kv_size / GB
+
+ def get_key_buffer(self, layer_id: int):
+ if self.layer_transfer_counter is not None:
+ self.layer_transfer_counter.wait_until(layer_id - self.start_layer)
+
+ if self.store_dtype != self.dtype:
+ return self.k_buffer[layer_id - self.start_layer].view(self.dtype)
+ return self.k_buffer[layer_id - self.start_layer]
+
+ def get_value_buffer(self, layer_id: int):
+ if self.layer_transfer_counter is not None:
+ self.layer_transfer_counter.wait_until(layer_id - self.start_layer)
+
+ if self.store_dtype != self.dtype:
+ return self.v_buffer[layer_id - self.start_layer].view(self.dtype)
+ return self.v_buffer[layer_id - self.start_layer]
# for disagg
def get_contiguous_buf_infos(self):
# MLA has only one kv_buffer, so only the information of this buffer needs to be returned.
- kv_data_ptrs = [self.kv_buffer[i].data_ptr() for i in range(self.layer_num)]
- kv_data_lens = [self.kv_buffer[i].nbytes for i in range(self.layer_num)]
- kv_item_lens = [self.kv_buffer[i][0].nbytes for i in range(self.layer_num)]
+ kv_data_ptrs = [self.k_buffer[i].data_ptr() for i in range(self.layer_num)] + [
+ self.v_buffer[i].data_ptr() for i in range(self.layer_num)
+ ]
+ kv_data_lens = [self.k_buffer[i].nbytes for i in range(self.layer_num)] + [
+ self.v_buffer[i].nbytes for i in range(self.layer_num)
+ ]
+ kv_item_lens = [self.k_buffer[i][0].nbytes for i in range(self.layer_num)] + [
+ self.v_buffer[i][0].nbytes for i in range(self.layer_num)
+ ]
return kv_data_ptrs, kv_data_lens, kv_item_lens
def set_kv_buffer(
@@ -949,18 +1366,28 @@ def set_kv_buffer(
layer_id = layer.layer_id
if cache_k.dtype != self.dtype:
cache_k = cache_k.to(self.dtype)
+ cache_v = cache_v.to(self.dtype)
if self.store_dtype != self.dtype:
cache_k = cache_k.view(self.store_dtype)
+ cache_v = cache_v.view(self.store_dtype)
- import torch_npu
+ if cache_v is None:
+ cache_k, cache_v = cache_k.split(
+ [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
+ )
- torch_npu._npu_reshape_and_cache_siso(
- key=cache_k.view(-1, 1, self.kv_lora_rank + self.qk_rope_head_dim),
- key_cache=self.kv_buffer[layer_id - self.start_layer].view(
- -1, 1, 1, self.kv_lora_rank + self.qk_rope_head_dim
+ torch_npu.npu_scatter_nd_update_(
+ self.k_buffer[layer_id - self.start_layer].view(-1, 1, self.kv_lora_rank),
+ loc.view(-1, 1),
+ cache_k.view(-1, 1, self.kv_lora_rank),
+ )
+ torch_npu.npu_scatter_nd_update_(
+ self.v_buffer[layer_id - self.start_layer].view(
+ -1, 1, self.qk_rope_head_dim
),
- slot_indices=loc,
+ loc.view(-1, 1),
+ cache_v.view(-1, 1, self.qk_rope_head_dim),
)
diff --git a/python/sglang/srt/mem_cache/memory_pool_host.py b/python/sglang/srt/mem_cache/memory_pool_host.py
index cfc7f36c52a..dc27eaa03bb 100644
--- a/python/sglang/srt/mem_cache/memory_pool_host.py
+++ b/python/sglang/srt/mem_cache/memory_pool_host.py
@@ -3,15 +3,17 @@
import threading
from enum import IntEnum
from functools import wraps
+from typing import Optional
import psutil
import torch
from sglang.srt.mem_cache.memory_pool import KVCache, MHATokenToKVPool, MLATokenToKVPool
-from sglang.srt.utils import is_npu
+from sglang.srt.utils import is_npu, is_xpu
_is_npu = is_npu()
-if not _is_npu:
+_is_xpu = is_xpu()
+if not (_is_npu or _is_xpu):
from sgl_kernel.kvcacheio import (
transfer_kv_all_layer,
transfer_kv_all_layer_lf_pf,
@@ -168,7 +170,7 @@ def available_size(self):
return len(self.free_slots)
@synchronized()
- def alloc(self, need_size: int) -> torch.Tensor:
+ def alloc(self, need_size: int) -> Optional[torch.Tensor]:
assert (
need_size % self.page_size == 0
), "The requested size should be a multiple of the page size."
@@ -307,6 +309,9 @@ def get_size_per_token(self):
return self.head_dim * self.head_num * self.layer_num * self.dtype.itemsize * 2
+ def get_ksize_per_token(self):
+ return self.get_size_per_token() // 2
+
def init_kv_buffer(self):
if self.layout == "layer_first":
dims = (2, self.layer_num, self.size, self.head_num, self.head_dim)
@@ -460,10 +465,11 @@ def set_from_flat_data_page(self, index: int, data_page: torch.Tensor) -> None:
else:
raise ValueError(f"Unsupported layout: {self.layout}")
- def get_buffer_meta(self, keys, indices):
+ def get_buffer_meta(self, keys, indices, local_rank):
ptr_list = []
key_list = []
kv_buffer_data_ptr = self.kv_buffer.data_ptr()
+ indices = indices.tolist()
v_offset = (
self.layer_num
* self.size
@@ -484,8 +490,8 @@ def get_buffer_meta(self, keys, indices):
ptr_list.append(k_ptr)
ptr_list.append(v_ptr)
key_ = keys[index // self.page_size]
- key_list.append(f"{key_}_k")
- key_list.append(f"{key_}_v")
+ key_list.append(f"{key_}_{local_rank}_k")
+ key_list.append(f"{key_}_{local_rank}_v")
element_size = (
self.layer_num
* self.dtype.itemsize
@@ -496,6 +502,24 @@ def get_buffer_meta(self, keys, indices):
element_size_list = [element_size] * len(key_list)
return key_list, ptr_list, element_size_list
+ def get_buffer_with_hash(self, keys, indices=None):
+ assert self.layout == "page_first"
+ assert indices is None or (len(keys) == (len(indices) // self.page_size))
+
+ key_list = []
+ buf_list = []
+
+ for i in range(len(keys)):
+ key = keys[i]
+ key_list.append(f"{key}-k")
+ key_list.append(f"{key}-v")
+ if indices is not None:
+ index = indices[i * self.page_size]
+ buf_list.append(self.k_buffer[index : index + self.page_size])
+ buf_list.append(self.v_buffer[index : index + self.page_size])
+
+ return key_list, buf_list, 2
+
class MLATokenToKVPoolHost(HostKVCache):
device_pool: MLATokenToKVPool
@@ -538,6 +562,9 @@ def get_size_per_token(self):
* self.layer_num
)
+ def get_ksize_per_token(self):
+ return self.get_size_per_token()
+
def init_kv_buffer(self):
if self.layout == "layer_first":
dims = (
@@ -681,10 +708,11 @@ def set_from_flat_data_page(self, index: int, data_page: torch.Tensor) -> None:
else:
raise ValueError(f"Unsupported layout: {self.layout}")
- def get_buffer_meta(self, keys, indices):
+ def get_buffer_meta(self, keys, indices, local_rank):
ptr_list = []
key_list = []
kv_buffer_data_ptr = self.kv_buffer.data_ptr()
+ indices = indices.tolist()
for index in range(0, len(indices), self.page_size):
k_ptr = (
kv_buffer_data_ptr
@@ -704,3 +732,16 @@ def get_buffer_meta(self, keys, indices):
)
element_size_list = [element_size] * len(key_list)
return key_list, ptr_list, element_size_list
+
+ def get_buffer_with_hash(self, keys, indices=None):
+ assert self.layout == "page_first"
+ assert indices is None or (len(keys) == (len(indices) // self.page_size))
+
+ buf_list = []
+
+ if indices is not None:
+ for i in range(len(keys)):
+ index = indices[i * self.page_size]
+ buf_list.append(self.kv_buffer[index : index + self.page_size])
+
+ return keys, buf_list, 1
diff --git a/python/sglang/srt/mem_cache/radix_cache.py b/python/sglang/srt/mem_cache/radix_cache.py
index 847a7dbbf11..2ad9ae4154c 100644
--- a/python/sglang/srt/mem_cache/radix_cache.py
+++ b/python/sglang/srt/mem_cache/radix_cache.py
@@ -21,6 +21,7 @@
import heapq
import time
+import warnings
from collections import defaultdict
from functools import partial
from typing import TYPE_CHECKING, List, Optional
@@ -53,8 +54,6 @@ def __init__(self, id: Optional[int] = None):
self.last_access_time = time.monotonic()
self.hit_count = 0
- # indicating the node is loading KV cache from host
- self.loading = False
# indicating the node is locked to protect from eviction
# incremented when the node is referenced by a storage operation
self.host_ref_counter = 0
@@ -62,7 +61,6 @@ def __init__(self, id: Optional[int] = None):
self.host_value: Optional[torch.Tensor] = None
# store hash values of each pages
self.hash_value: Optional[List[str]] = None
- self.backuped_storage = False
self.id = TreeNode.counter if id is None else id
TreeNode.counter += 1
@@ -152,6 +150,7 @@ def reset(self):
self.root_node = TreeNode()
self.root_node.key = []
self.root_node.value = []
+ self.root_node.host_value = []
self.root_node.lock_ref = 1
self.evictable_size_ = 0
self.protected_size_ = 0
@@ -194,7 +193,7 @@ def match_prefix(self, key: List[int], **kwargs) -> MatchResult:
last_host_node=last_node,
)
- def insert(self, key: List, value=None):
+ def insert(self, key: List, value=None, chunked=False):
if self.disable:
return 0
@@ -239,7 +238,7 @@ def cache_finished_req(self, req: Req):
self.req_to_token_pool.free(req.req_pool_idx)
self.dec_lock_ref(req.last_node)
- def cache_unfinished_req(self, req: Req):
+ def cache_unfinished_req(self, req: Req, chunked=False):
"""Cache request when it is unfinished."""
if self.disable:
return
@@ -260,7 +259,9 @@ def cache_unfinished_req(self, req: Req):
page_aligned_token_ids = token_ids[:page_aligned_len]
# Radix Cache takes one ref in memory pool
- new_prefix_len = self.insert(page_aligned_token_ids, page_aligned_kv_indices)
+ new_prefix_len = self.insert(
+ page_aligned_token_ids, page_aligned_kv_indices, chunked=chunked
+ )
self.token_to_kv_pool_allocator.free(
kv_indices[len(req.prefix_indices) : new_prefix_len]
)
@@ -334,6 +335,10 @@ def dec_lock_ref(self, node: TreeNode):
if self.disable:
return 0
+ if node is None:
+ warnings.warn("this should be not happened")
+ return 0
+
delta = 0
while node != self.root_node:
if node.lock_ref == 1:
@@ -342,6 +347,9 @@ def dec_lock_ref(self, node: TreeNode):
delta += len(node.value)
node.lock_ref -= 1
node = node.parent
+ if node is None:
+ warnings.warn("this should be not happened")
+ break
return delta
def evictable_size(self):
diff --git a/python/sglang/srt/mem_cache/radix_cache_cpp.py b/python/sglang/srt/mem_cache/radix_cache_cpp.py
index 5234f1a0fbf..e9512e83f05 100644
--- a/python/sglang/srt/mem_cache/radix_cache_cpp.py
+++ b/python/sglang/srt/mem_cache/radix_cache_cpp.py
@@ -181,7 +181,7 @@ def cache_finished_req(self, req: Req):
self.dec_lock_ref(req.last_node)
self.req_to_token_pool.free(req.req_pool_idx)
- def cache_unfinished_req(self, req: Req):
+ def cache_unfinished_req(self, req: Req, chunked=False):
"""Cache request when it is unfinished."""
assert req.req_pool_idx is not None
token_ids = req.fill_ids
diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/docs/setup_usrbio_client.md b/python/sglang/srt/mem_cache/storage/hf3fs/docs/setup_usrbio_client.md
index 5fa1fa4c236..7c7c0bfb264 100644
--- a/python/sglang/srt/mem_cache/storage/hf3fs/docs/setup_usrbio_client.md
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/docs/setup_usrbio_client.md
@@ -34,6 +34,9 @@ apt-get update \
python3 python3-pip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
+# apt install python3.12 python3.12-venv python3.12-dev
+# curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+# python3.12 get-pip.py
# Generated wheel location: dist/hf3fs_py_usrbio-1.2.9+2db69ce-cp310-cp310-linux_x86_64.whl
python3 setup.py bdist_wheel
@@ -60,6 +63,6 @@ apt update && apt install -y \
libuv1-dev
# Install Python Package
-pip install hf3fs_py_usrbio-1.2.9+2db69ce-cp310-cp310-linux_x86_64.whl
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages
+pip install hf3fs_py_usrbio-1.2.9+394583d-cp312-cp312-linux_x86_64.whl
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.12/dist-packages
```
diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
new file mode 100644
index 00000000000..c7a485fa048
--- /dev/null
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
@@ -0,0 +1,164 @@
+import logging
+import os
+import threading
+from abc import ABC, abstractmethod
+from typing import List
+
+import torch
+
+
+class Hf3fsClient(ABC):
+ """Abstract interface for HF3FS clients."""
+
+ @abstractmethod
+ def __init__(self, path: str, size: int, bytes_per_page: int, entries: int):
+ """Initialize the HF3FS client.
+
+ Args:
+ path: File path for storage
+ size: Total size of storage file
+ bytes_per_page: Bytes per page
+ entries: Number of entries for batch operations
+ """
+ pass
+
+ @abstractmethod
+ def batch_read(self, offsets: List[int], tensors: List[torch.Tensor]) -> List[int]:
+ """Batch read from storage."""
+ pass
+
+ @abstractmethod
+ def batch_write(self, offsets: List[int], tensors: List[torch.Tensor]) -> List[int]:
+ """Batch write to storage."""
+ pass
+
+ @abstractmethod
+ def check(self, offsets: List[int], tensors: List[torch.Tensor]) -> None:
+ """Validate batch operation parameters."""
+ pass
+
+ @abstractmethod
+ def get_size(self) -> int:
+ """Get total storage size."""
+ pass
+
+ @abstractmethod
+ def close(self) -> None:
+ """Close the client and cleanup resources."""
+ pass
+
+ @abstractmethod
+ def flush(self) -> None:
+ """Flush data to disk."""
+ pass
+
+
+logger = logging.getLogger(__name__)
+
+
+class Hf3fsMockClient(Hf3fsClient):
+ """Mock implementation of Hf3fsClient for CI testing purposes."""
+
+ def __init__(self, path: str, size: int, bytes_per_page: int, entries: int):
+ """Initialize mock HF3FS client."""
+ self.path = path
+ self.size = size
+ self.bytes_per_page = bytes_per_page
+ self.entries = entries
+
+ # Create directory if it doesn't exist
+ os.makedirs(os.path.dirname(self.path), exist_ok=True)
+
+ # Create and initialize the file
+ self.file = os.open(self.path, os.O_RDWR | os.O_CREAT)
+ os.ftruncate(self.file, size)
+
+ logger.info(
+ f"Hf3fsMockClient initialized: path={path}, size={size}, "
+ f"bytes_per_page={bytes_per_page}, entries={entries}"
+ )
+
+ def batch_read(self, offsets: List[int], tensors: List[torch.Tensor]) -> List[int]:
+ """Batch read from mock storage."""
+ self.check(offsets, tensors)
+
+ results = []
+
+ for offset, tensor in zip(offsets, tensors):
+ size = tensor.numel() * tensor.itemsize
+
+ try:
+ os.lseek(self.file, offset, os.SEEK_SET)
+ bytes_read = os.read(self.file, size)
+
+ if len(bytes_read) == size:
+ # Convert bytes to tensor and copy to target
+ bytes_tensor = torch.frombuffer(bytes_read, dtype=torch.uint8)
+ typed_tensor = bytes_tensor.view(tensor.dtype).view(tensor.shape)
+ tensor.copy_(typed_tensor)
+ results.append(size)
+ else:
+ logger.warning(
+ f"Short read: expected {size}, got {len(bytes_read)}"
+ )
+ results.append(len(bytes_read))
+
+ except Exception as e:
+ logger.error(f"Error reading from offset {offset}: {e}")
+ results.append(0)
+
+ return results
+
+ def batch_write(self, offsets: List[int], tensors: List[torch.Tensor]) -> List[int]:
+ """Batch write to mock storage."""
+ self.check(offsets, tensors)
+
+ results = []
+
+ for offset, tensor in zip(offsets, tensors):
+ size = tensor.numel() * tensor.itemsize
+
+ try:
+ # Convert tensor to bytes and write directly to file
+ tensor_bytes = tensor.contiguous().view(torch.uint8).flatten()
+ data = tensor_bytes.numpy().tobytes()
+
+ os.lseek(self.file, offset, os.SEEK_SET)
+ bytes_written = os.write(self.file, data)
+
+ if bytes_written == size:
+ results.append(size)
+ else:
+ logger.warning(f"Short write: expected {size}, got {bytes_written}")
+ results.append(bytes_written)
+
+ except Exception as e:
+ logger.error(f"Error writing to offset {offset}: {e}")
+ results.append(0)
+
+ return results
+
+ def check(self, offsets: List[int], tensors: List[torch.Tensor]) -> None:
+ """Validate batch operation parameters."""
+ pass
+
+ def get_size(self) -> int:
+ """Get total storage size."""
+ return self.size
+
+ def close(self) -> None:
+ """Close the mock client and cleanup resources."""
+ try:
+ if hasattr(self, "file") and self.file >= 0:
+ os.close(self.file)
+ self.file = -1 # Mark as closed
+ logger.info(f"MockHf3fsClient closed: {self.path}")
+ except Exception as e:
+ logger.error(f"Error closing MockHf3fsClient: {e}")
+
+ def flush(self) -> None:
+ """Flush data to disk."""
+ try:
+ os.fsync(self.file)
+ except Exception as e:
+ logger.error(f"Error flushing MockHf3fsClient: {e}")
diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_usrbio_client.py
similarity index 96%
rename from python/sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py
rename to python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_usrbio_client.py
index 399a9011811..480c18ed1c6 100644
--- a/python/sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_usrbio_client.py
@@ -9,6 +9,8 @@
import torch
from torch.utils.cpp_extension import load
+from sglang.srt.mem_cache.storage.hf3fs.hf3fs_client import Hf3fsClient
+
root = Path(__file__).parent.resolve()
hf3fs_utils = load(name="hf3fs_utils", sources=[f"{root}/hf3fs_utils.cpp"])
@@ -51,7 +53,9 @@ def wrapper(self, *args, **kwargs):
return _decorator
-class Hf3fsClient:
+class Hf3fsUsrBioClient(Hf3fsClient):
+ """HF3FS client implementation using usrbio."""
+
def __init__(self, path: str, size: int, bytes_per_page: int, entries: int):
if not HF3FS_AVAILABLE:
raise ImportError(
diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py b/python/sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py
index 1967259ac06..414d13adc18 100644
--- a/python/sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py
@@ -4,10 +4,12 @@
import logging
import threading
from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, OrderedDict, Tuple
+import orjson
import requests
-from fastapi import FastAPI, HTTPException, Request, status
+from fastapi import FastAPI, HTTPException, Request, Response
+from fastapi.responses import ORJSONResponse
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
@@ -24,10 +26,10 @@ class RankMetadata:
"""Holds all metadata for a single rank."""
def __init__(self, num_pages: int):
- self.lock = threading.RLock()
+ self.lock = threading.Lock()
self.num_pages = num_pages
self.free_pages: List[int] = list(range(num_pages))
- self.key_to_index: Dict[str, int] = {}
+ self.key_to_index: OrderedDict[str, int] = OrderedDict()
# Todo: Support multi files for HF3FS
def exists_keys(self, keys: List[str]) -> List[bool]:
@@ -46,16 +48,18 @@ def reserve_and_allocate_page_indices(
for i, (key, prefix_key) in enumerate(keys):
if key in self.key_to_index:
results[i] = (True, self.key_to_index[key])
+ self.key_to_index.move_to_end(key)
else:
new_keys_to_process.append((i, key, prefix_key))
# Todo: Implementing data eviction logic after HiCache supports prefix information pass-through
for i, key, prefix_key in new_keys_to_process:
if len(self.free_pages) > 0:
- page_idx = self.free_pages.pop()
- results[i] = (False, page_idx)
+ page_index = self.free_pages.pop()
else:
- results[i] = (False, -1)
+ page_index = self.key_to_index.popitem(last=False)[1]
+
+ results[i] = (False, page_index)
return results
@@ -68,6 +72,7 @@ def confirm_write(
with self.lock:
for key, page_index in written_keys_to_confirm:
self.key_to_index[key] = page_index
+ self.key_to_index.move_to_end(key)
for page_index in pages_to_release:
if page_index not in self.free_pages:
@@ -94,7 +99,14 @@ def clear_all(self) -> None:
def get_page_indices(self, keys: List[str]) -> List[Optional[int]]:
"""Get page indices for keys."""
with self.lock:
- return [self.key_to_index.get(key) for key in keys]
+ results = []
+ for key in keys:
+ if key in self.key_to_index:
+ results.append(self.key_to_index[key])
+ self.key_to_index.move_to_end(key)
+ else:
+ results.append(None)
+ return results
class GlobalMetadataState:
@@ -182,7 +194,8 @@ class Hf3fsMetadataServer:
def __init__(self, persistence_path: Optional[str] = None, save_interval: int = 60):
self.state = GlobalMetadataState(persistence_path, save_interval)
- self.app = FastAPI()
+ self.app = FastAPI(default_response_class=ORJSONResponse)
+
self._setup_routes()
def _setup_routes(self):
@@ -199,17 +212,25 @@ def _setup_routes(self):
def get_rank_metadata(self, rank: int) -> RankMetadata:
"""Get rank metadata with proper error handling."""
- with self.state.global_lock:
- if rank not in self.state.ranks:
- raise HTTPException(
- status_code=404,
- detail=f"Rank {rank} not initialized. Please call /{{rank}}/initialize first.",
- )
- return self.state.ranks[rank]
+ if rank not in self.state.ranks:
+ raise HTTPException(
+ status_code=404,
+ detail=f"Rank {rank} not initialized. Please call /{rank}/initialize first.",
+ )
+ return self.state.ranks[rank]
+
+ async def _read_json(self, request: Request) -> dict:
+ """Parse request JSON using orjson if available."""
+ body = await request.body()
+ return orjson.loads(body)
+
+ def _json_response(self, content: dict):
+ """Return ORJSONResponse when available to bypass jsonable_encoder."""
+ return ORJSONResponse(content)
async def initialize(self, rank: int, request: Request):
"""Initialize a rank with specified number of pages."""
- data = await request.json()
+ data = await self._read_json(request)
num_pages = data["num_pages"]
with self.state.global_lock:
if rank in self.state.ranks:
@@ -223,57 +244,55 @@ async def initialize(self, rank: int, request: Request):
else:
logging.info(f"Initializing new Rank {rank} with {num_pages} pages.")
self.state.ranks[rank] = RankMetadata(num_pages)
- return {"message": f"Rank {rank} is ready."}
+ return Response(status_code=204)
async def exists(self, rank: int, request: Request):
"""Check if keys exist in metadata."""
- data = await request.json()
+ data = await self._read_json(request)
keys = data["keys"]
metadata = self.get_rank_metadata(rank)
results = metadata.exists_keys(keys)
- return {"exists": results}
+ return self._json_response({"exists": results})
async def reserve_and_allocate_page_indices(self, rank: int, request: Request):
"""Reserve and allocate page indices for keys."""
- data = await request.json()
+ data = await self._read_json(request)
metadata = self.get_rank_metadata(rank)
keys = data["keys"]
results = metadata.reserve_and_allocate_page_indices(keys)
- return {"indices": results}
+ return self._json_response({"indices": results})
async def confirm_write(self, rank: int, request: Request):
"""Confirm write operations and release pages."""
- data = await request.json()
+ data = await self._read_json(request)
metadata = self.get_rank_metadata(rank)
success_written_keys = data.get("written_keys_to_confirm", [])
released_pages = data.get("pages_to_release", [])
metadata.confirm_write(success_written_keys, released_pages)
- return {
- "message": f"Rank {rank}: Write confirmed for {len(success_written_keys)} keys. {len(released_pages)} pages released."
- }
+ return Response(status_code=204)
async def delete_keys(self, rank: int, request: Request):
"""Delete keys from metadata."""
- data = await request.json()
+ data = await self._read_json(request)
metadata = self.get_rank_metadata(rank)
count = metadata.delete_keys(data["keys"])
- return {"message": f"Rank {rank}: {count} keys deleted."}
+ return Response(status_code=204)
async def clear(self, rank: int):
"""Clear all metadata for a rank."""
metadata = self.get_rank_metadata(rank)
metadata.clear_all()
- return {"message": f"Rank {rank}: Metadata cleared."}
+ return Response(status_code=204)
async def get_page_indices(self, rank: int, request: Request):
"""Get page indices for keys."""
- data = await request.json()
+ data = await self._read_json(request)
metadata = self.get_rank_metadata(rank)
keys = data["keys"]
results = metadata.get_page_indices(keys)
- return {"indices": results}
+ return self._json_response({"indices": results})
def run(self, host: str = "0.0.0.0", port: int = 18000):
"""Run the metadata server."""
@@ -309,14 +328,22 @@ def __init__(self, base_url: str, max_retries: int = 3):
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET", "POST"],
)
- adapter = HTTPAdapter(max_retries=retry_strategy)
+ adapter = HTTPAdapter(
+ max_retries=retry_strategy, pool_connections=256, pool_maxsize=256
+ )
self._session.mount("http://", adapter)
def _post(self, endpoint: str, json_data: dict) -> dict:
try:
- response = self._session.post(f"{self.base_url}/{endpoint}", json=json_data)
+ url = f"{self.base_url}/{endpoint}"
+ headers = {"Content-Type": "application/json"}
+ payload = orjson.dumps(json_data) # type: ignore[union-attr]
+ response = self._session.post(url, data=payload, headers=headers)
response.raise_for_status()
- return response.json()
+
+ if response.status_code == 204 or not response.content:
+ return {}
+ return orjson.loads(response.content) # type: ignore[union-attr]
except requests.exceptions.RequestException as e:
logging.error(f"Failed to POST to {endpoint} after retries: {e}")
raise RuntimeError(f"Failed to connect to metadata server: {e}") from e
diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py
index e7dd01c7379..9595e720498 100644
--- a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py
@@ -5,14 +5,16 @@
import os
import signal
import threading
+import time
from abc import ABC, abstractmethod
from functools import wraps
-from typing import List, Optional, Tuple
+from typing import Any, List, Optional, Tuple
import torch
-from sglang.srt.mem_cache.hicache_storage import HiCacheStorage
-from sglang.srt.mem_cache.storage.hf3fs.client_hf3fs import Hf3fsClient
+from sglang.srt.mem_cache.hicache_storage import HiCacheStorage, HiCacheStorageConfig
+from sglang.srt.mem_cache.storage.hf3fs.hf3fs_client import Hf3fsClient
+from sglang.srt.metrics.collector import StorageMetrics
logger = logging.getLogger(__name__)
@@ -112,7 +114,36 @@ def wrapper(self, *args, **kwargs):
return _decorator
+def create_hf3fs_client(
+ path: str, size: int, bytes_per_page: int, entries: int, use_mock: bool = False
+) -> Hf3fsClient:
+ """Factory function to create appropriate HF3FS client.
+
+ Args:
+ path: File path for storage
+ size: Total size of storage file
+ bytes_per_page: Bytes per page
+ entries: Number of entries for batch operations
+ use_mock: Whether to use mock client instead of real usrbio client
+
+ Returns:
+ """
+ if use_mock:
+ from sglang.srt.mem_cache.storage.hf3fs.hf3fs_client import Hf3fsMockClient
+
+ logger.info(f"[Rank Using Hf3fsMockClient for testing")
+ return Hf3fsMockClient(path, size, bytes_per_page, entries)
+ else:
+ from sglang.srt.mem_cache.storage.hf3fs.hf3fs_usrbio_client import (
+ Hf3fsUsrBioClient,
+ )
+
+ return Hf3fsUsrBioClient(path, size, bytes_per_page, entries)
+
+
class HiCacheHF3FS(HiCacheStorage):
+ """HiCache backend that stores KV cache pages in HF3FS files."""
+
default_env_var: str = "SGLANG_HICACHE_HF3FS_CONFIG_PATH"
def __init__(
@@ -125,18 +156,27 @@ def __init__(
entries: int,
dtype: torch.dtype,
metadata_client: Hf3fsMetadataInterface,
+ is_mla_model: bool = False,
+ is_page_first_layout: bool = False,
+ use_mock_client: bool = False,
):
self.rank = rank
self.file_path = file_path
self.file_size = file_size
self.numjobs = numjobs
self.bytes_per_page = bytes_per_page
+ self.gb_per_page = bytes_per_page / (1 << 30)
self.entries = entries
self.dtype = dtype
self.metadata_client = metadata_client
-
+ self.is_mla_model = is_mla_model
+ self.is_page_first_layout = is_page_first_layout
self.numel = self.bytes_per_page // self.dtype.itemsize
self.num_pages = self.file_size // self.bytes_per_page
+ self.skip_backup = False
+ if self.is_mla_model and self.rank != 0:
+ self.skip_backup = True
+ self.rank = 0
logger.info(
f"[Rank {self.rank}] HiCacheHF3FS Client Initializing: "
@@ -147,8 +187,12 @@ def __init__(
self.ac = AtomicCounter(self.numjobs)
self.clients = [
- Hf3fsClient(
- self.file_path, self.file_size, self.bytes_per_page, self.entries
+ create_hf3fs_client(
+ self.file_path,
+ self.file_size,
+ self.bytes_per_page,
+ self.entries,
+ use_mock_client,
)
for _ in range(numjobs)
]
@@ -165,17 +209,57 @@ def __init__(
signal.signal(signal.SIGTERM, lambda sig, frame: self.close())
signal.signal(signal.SIGQUIT, lambda sig, frame: self.close())
+ self.prefetch_pgs = []
+ self.backup_pgs = []
+ self.prefetch_bandwidth = []
+ self.backup_bandwidth = []
+
@staticmethod
def from_env_config(
- rank: int, bytes_per_page: int, dtype: torch.dtype
+ bytes_per_page: int,
+ dtype: torch.dtype,
+ storage_config: HiCacheStorageConfig = None,
) -> "HiCacheHF3FS":
+ """Create a HiCacheHF3FS instance from environment configuration.
+
+ Environment:
+ - Uses env var stored in `HiCacheHF3FS.default_env_var` to locate a JSON config.
+ - Falls back to a local single-machine config when the env var is not set.
+
+ Raises:
+ ValueError: If MLA Model is requested without global metadata server or required keys are missing.
+ """
from sglang.srt.mem_cache.storage.hf3fs.mini_3fs_metadata_server import (
Hf3fsGlobalMetadataClient,
Hf3fsLocalMetadataClient,
)
+ use_mock_client = False
+ if storage_config is not None:
+ rank, is_mla_model, is_page_first_layout = (
+ storage_config.tp_rank,
+ storage_config.is_mla_model,
+ storage_config.is_page_first_layout,
+ )
+
+ if storage_config.extra_config is not None:
+ use_mock_client = storage_config.extra_config.get(
+ "use_mock_hf3fs_client", False
+ )
+ else:
+ rank, is_mla_model, is_page_first_layout = (
+ 0,
+ False,
+ False,
+ )
+
+ mla_unsupported_msg = f"MLA model is not supported without global metadata server, please refer to https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/mem_cache/storage/hf3fs/docs/deploy_sglang_3fs_multinode.md"
+
config_path = os.getenv(HiCacheHF3FS.default_env_var)
if not config_path:
+ if is_mla_model:
+ raise ValueError(mla_unsupported_msg)
+
return HiCacheHF3FS(
rank=rank,
file_path=f"/data/hicache.{rank}.bin",
@@ -185,6 +269,8 @@ def from_env_config(
entries=8,
dtype=dtype,
metadata_client=Hf3fsLocalMetadataClient(),
+ is_page_first_layout=is_page_first_layout,
+ use_mock_client=use_mock_client,
)
try:
@@ -205,38 +291,56 @@ def from_env_config(
raise ValueError(f"Missing required keys in config: {missing_keys}")
# Choose metadata client based on configuration
- if "metadata_server_url" in config and config["metadata_server_url"]:
+ if config.get("metadata_server_url"):
# Use global metadata client to connect to metadata server
metadata_server_url = config["metadata_server_url"]
metadata_client = Hf3fsGlobalMetadataClient(metadata_server_url)
+
logger.info(
f"Using global metadata client with server url: {metadata_server_url}"
)
else:
+ # Enable MLA optimization only when using the global metadata client
+ if is_mla_model:
+ raise ValueError(mla_unsupported_msg)
+
# Use local metadata client for single-machine deployment
metadata_client = Hf3fsLocalMetadataClient()
+ rank_for_path = 0 if is_mla_model else rank
return HiCacheHF3FS(
rank=rank,
- file_path=f"{config['file_path_prefix']}.{rank}.bin",
+ # Let all ranks use the same file path for MLA model
+ file_path=f"{config['file_path_prefix']}.{rank_for_path}.bin",
file_size=int(config["file_size"]),
numjobs=int(config["numjobs"]),
bytes_per_page=bytes_per_page,
entries=int(config["entries"]),
dtype=dtype,
metadata_client=metadata_client,
+ is_mla_model=is_mla_model,
+ is_page_first_layout=is_page_first_layout,
+ use_mock_client=use_mock_client,
)
def get(
- self, key: str, target_location: Optional[torch.Tensor] = None
+ self,
+ key: str,
+ target_location: Optional[Any] = None,
+ target_sizes: Optional[Any] = None,
) -> torch.Tensor | None:
- return self.batch_get([key], [target_location] if target_location else None)[0]
+ return self.batch_get(
+ [key],
+ [target_location] if target_location is not None else None,
+ [target_sizes] if target_sizes is not None else None,
+ )[0]
@synchronized()
def batch_get(
self,
keys: List[str],
- target_locations: Optional[List[torch.Tensor]] = None,
+ target_locations: Optional[Any] = None,
+ target_sizes: Optional[Any] = None,
) -> List[torch.Tensor | None]:
page_indices = self.metadata_client.get_page_indices(self.rank, keys)
@@ -246,9 +350,17 @@ def batch_get(
batch_indices.append(i)
file_offsets.append(page_index * self.bytes_per_page)
- file_results = [
- torch.empty(self.numel, dtype=self.dtype) for _ in range(len(batch_indices))
- ]
+ if target_locations is not None:
+ for target_location in target_locations:
+ assert target_location.is_contiguous()
+ file_results = target_locations
+ else:
+ file_results = [
+ torch.empty(self.numel, dtype=self.dtype)
+ for _ in range(len(batch_indices))
+ ]
+
+ start_time = time.perf_counter()
futures = [
self.executor.submit(
@@ -260,6 +372,13 @@ def batch_get(
]
read_results = [result for future in futures for result in future.result()]
+ end_time = time.perf_counter()
+ ionum = len(batch_indices)
+ self.prefetch_pgs.append(ionum)
+ self.prefetch_bandwidth.append(
+ ionum / (end_time - start_time) * self.gb_per_page
+ )
+
results = [None] * len(keys)
for batch_index, file_result, read_result in zip(
batch_indices, file_results, read_results
@@ -273,10 +392,32 @@ def batch_get(
return results
- def set(self, key: str, value: torch.Tensor) -> bool:
- return self.batch_set([key], [value])
+ def set(
+ self,
+ key: str,
+ value: Optional[Any] = None,
+ target_location: Optional[Any] = None,
+ target_sizes: Optional[Any] = None,
+ ) -> bool:
+ return self.batch_set(
+ [key],
+ [value] if value is not None else None,
+ [target_location] if target_location is not None else None,
+ [target_sizes] if target_sizes is not None else None,
+ )
+
+ @synchronized()
+ def batch_set(
+ self,
+ keys: List[str],
+ values: Optional[Any] = None,
+ target_locations: Optional[Any] = None,
+ target_sizes: Optional[Any] = None,
+ ) -> bool:
+ # In MLA backend, only one rank needs to backup the KV cache
+ if self.skip_backup:
+ return True
- def batch_set(self, keys: List[str], values: List[torch.Tensor]) -> bool:
# Todo: Add prefix block's hash key
key_with_prefix = [(key, "") for key in keys]
indices = self.metadata_client.reserve_and_allocate_page_indices(
@@ -292,7 +433,10 @@ def batch_set(self, keys: List[str], values: List[torch.Tensor]) -> bool:
batch_indices.append(i)
file_offsets.append(page_index * self.bytes_per_page)
- file_values.append(value.contiguous())
+ assert value.is_contiguous()
+ file_values.append(value)
+
+ start_time = time.perf_counter()
futures = [
self.executor.submit(
@@ -308,6 +452,11 @@ def batch_set(self, keys: List[str], values: List[torch.Tensor]) -> bool:
for result in future.result()
]
+ end_time = time.perf_counter()
+ ionum = len(batch_indices)
+ self.backup_pgs.append(ionum)
+ self.backup_bandwidth.append(ionum / (end_time - start_time) * self.gb_per_page)
+
written_keys_to_confirm = []
results = [index[0] for index in indices]
for batch_index, write_result in zip(batch_indices, write_results):
@@ -327,18 +476,29 @@ def batch_set(self, keys: List[str], values: List[torch.Tensor]) -> bool:
return all(results)
- @synchronized()
def delete(self, key: str) -> None:
self.metadata_client.delete_keys(self.rank, [key])
- @synchronized()
def exists(self, key: str) -> bool:
result = self.metadata_client.exists(self.rank, [key])
return result[0] if result else False
- @synchronized()
- def clear(self) -> None:
- self.metadata_client.clear(self.rank)
+ def batch_exists(self, keys: List[str]) -> int:
+ results = self.metadata_client.exists(self.rank, keys)
+ for i in range(len(keys)):
+ if not results[i]:
+ return i
+
+ return len(keys)
+
+ def clear(self) -> bool:
+ try:
+ self.metadata_client.clear(self.rank)
+ logger.info(f"Cleared HiCacheHF3FS for rank {self.rank}")
+ return True
+ except Exception as e:
+ logger.error(f"Failed to clear HiCacheHF3FS: {e}")
+ return False
def close(self) -> None:
try:
@@ -348,3 +508,16 @@ def close(self) -> None:
except Exception as e:
logger.error(f"close HiCacheHF3FS: {e}")
logger.info("close HiCacheHF3FS")
+
+ @synchronized()
+ def get_stats(self):
+ storage_metrics = StorageMetrics()
+ storage_metrics.prefetch_pgs.extend(self.prefetch_pgs)
+ storage_metrics.backup_pgs.extend(self.backup_pgs)
+ storage_metrics.prefetch_bandwidth.extend(self.prefetch_bandwidth)
+ storage_metrics.backup_bandwidth.extend(self.backup_bandwidth)
+ self.prefetch_pgs.clear()
+ self.backup_pgs.clear()
+ self.prefetch_bandwidth.clear()
+ self.backup_bandwidth.clear()
+ return storage_metrics
diff --git a/python/sglang/srt/mem_cache/storage/lmcache/README.md b/python/sglang/srt/mem_cache/storage/lmcache/README.md
new file mode 100644
index 00000000000..7177e21e5f5
--- /dev/null
+++ b/python/sglang/srt/mem_cache/storage/lmcache/README.md
@@ -0,0 +1,43 @@
+# LMCache Connector for SGLang
+
+This document describes how to use LMCache as KV Cache Management Backend for SGLang engine.
+For more details about LMCache, please refer to: https://lmcache.ai
+
+## Install LMCache
+
+### Method 1: with pip
+
+```bash
+pip install lmcache
+```
+
+### Method 2: from source
+
+Clone LMCache project:
+
+```bash
+git clone https://github.com/LMCache/LMCache
+```
+
+Install:
+
+```bash
+cd LMCache
+pip install -e . --no-build-isolation
+```
+
+
+## Use LMCache
+
+Firstly, setup LMCache config. An example config is set at `example_config.yaml`. For more settings please refer to https://docs.lmcache.ai/api_reference/configurations.html.
+
+Secondly, setup SGLang serving engine with lmcache:
+
+```bash
+export LMCACHE_USE_EXPERIMENTAL=True
+export LMCACHE_CONFIG_FILE=example_config.yaml
+
+python -m sglang.launch_server \
+ --model-path MODEL \
+ --enable-lmcache
+```
diff --git a/python/sglang/srt/mem_cache/storage/lmcache/example_config.yaml b/python/sglang/srt/mem_cache/storage/lmcache/example_config.yaml
new file mode 100644
index 00000000000..549110b7cd4
--- /dev/null
+++ b/python/sglang/srt/mem_cache/storage/lmcache/example_config.yaml
@@ -0,0 +1,7 @@
+# Basic configurations
+chunk_size: 256
+
+# CPU offloading configurations
+local_cpu: true
+use_layerwise: true
+max_local_cpu_size: 10 # number of CPU backend GB
diff --git a/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py b/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
new file mode 100644
index 00000000000..f8690aec4bf
--- /dev/null
+++ b/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
@@ -0,0 +1,280 @@
+from __future__ import annotations
+
+import logging
+import threading
+from typing import TYPE_CHECKING, List, Optional
+
+import torch
+
+from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
+from sglang.srt.mem_cache.base_prefix_cache import MatchResult
+from sglang.srt.mem_cache.memory_pool import ReqToTokenPool
+from sglang.srt.mem_cache.radix_cache import RadixCache, TreeNode
+
+try:
+ from lmcache.integration.sglang.sglang_adapter import (
+ LMCacheLayerwiseConnector,
+ LoadMetadata,
+ StoreMetadata,
+ )
+except ImportError as e:
+ raise RuntimeError(
+ "LMCache is not installed. Please install it by running `pip install lmcache`"
+ ) from e
+
+if TYPE_CHECKING:
+ from sglang.srt.configs.model_config import ModelConfig
+ from sglang.srt.managers.schedule_batch import Req
+
+logger = logging.getLogger(__name__)
+
+
+class LayerTransferCounter:
+ """Minimal adapter that lets the memory pool notify LMCache per-layer.
+
+ The KV pool calls `wait_until(layer_id)` after finishing a layer, which we
+ translate into a `load_kv_layerwise(layer_id)` call on the LMCache connector
+ within the provided CUDA stream.
+ """
+
+ def __init__(
+ self,
+ num_layers: int,
+ load_stream: torch.cuda.Stream,
+ lmc_connector: LMCacheLayerwiseConnector,
+ printable: bool = False,
+ ):
+ self.num_layers = num_layers
+ self.load_stream = load_stream
+ self.lmc_connector = lmc_connector
+
+ def wait_until(self, layer_id: int):
+ # Ensure ordering of the async loads wrt compute stream(s).
+ self.load_stream.synchronize()
+ with self.load_stream:
+ self.lmc_connector.load_kv_layerwise(layer_id)
+
+
+class LMCRadixCache(RadixCache):
+ """RadixCache + LMCache IO.
+
+ This subclass adds:
+ - LMCache connector setup (device/host buffers, TP rank/size)
+ - Two CUDA streams for async load/store
+ - Layer-wise transfer executor wiring to the KV cache
+ - Overridden `match_prefix` to fetch missing prefix chunks from LMCache
+ - Extended cache_finalization paths to store back into LMCache
+ - Eviction barrier that respects any in-flight host->device stores
+ """
+
+ def __init__(
+ self,
+ req_to_token_pool: ReqToTokenPool,
+ token_to_kv_pool_allocator: BaseTokenToKVPoolAllocator,
+ page_size: int,
+ disable: bool = False,
+ enable_kv_cache_events: bool = False,
+ model_config: Optional["ModelConfig"] = None,
+ tp_size: int = 1,
+ rank: int = 0,
+ tp_group: Optional[torch.distributed.ProcessGroup] = None,
+ ):
+ super().__init__(
+ req_to_token_pool=req_to_token_pool,
+ token_to_kv_pool_allocator=token_to_kv_pool_allocator,
+ page_size=page_size,
+ disable=disable,
+ enable_kv_cache_events=enable_kv_cache_events,
+ )
+
+ kvcache = self.token_to_kv_pool_allocator.get_kvcache()
+ self.lmcache_connector = LMCacheLayerwiseConnector(
+ sgl_config=model_config,
+ tp_size=tp_size,
+ rank=rank,
+ # NOTE: The original implementation accessed private buffers via
+ # `_kvcache.k_buffer` / `.v_buffer`. We prefer public accessors when
+ # available; fall back to private fields if needed.
+ k_pool=getattr(
+ kvcache,
+ "k_buffer",
+ getattr(self.token_to_kv_pool_allocator._kvcache, "k_buffer"),
+ ),
+ v_pool=getattr(
+ kvcache,
+ "v_buffer",
+ getattr(self.token_to_kv_pool_allocator._kvcache, "v_buffer"),
+ ),
+ tp_group=tp_group,
+ )
+
+ self.load_stream = torch.cuda.Stream()
+ self.store_stream = torch.cuda.Stream()
+
+ self.layer_done_executor = LayerTransferCounter(
+ num_layers=(
+ model_config.num_hidden_layers if model_config is not None else 0
+ ),
+ load_stream=self.load_stream,
+ lmc_connector=self.lmcache_connector,
+ )
+ kvcache.register_layer_transfer_counter(self.layer_done_executor)
+
+ self._in_flight_nodes: list[TreeNode] = []
+ self._node_lock = threading.Lock()
+
+ def reset(self): # type: ignore[override]
+ super().reset()
+ if hasattr(self, "_in_flight_nodes"):
+ with self._node_lock:
+ self._in_flight_nodes.clear()
+
+ def match_prefix(self, key: List[int], **kwargs) -> MatchResult: # type: ignore[override]
+ """Match cached prefix; if there's a tail miss, prefetch from LMCache.
+
+ Reuses the base matching logic to obtain (value, last_node). If there
+ remains a *page-aligned* uncached suffix and there is room (or after
+ eviction), we allocate token slots and trigger an async LMCache load
+ into those slots, then materialize a new child node for the retrieved
+ chunk.
+ """
+ if self.disable or not key:
+ return super().match_prefix(key, **kwargs)
+
+ if self.page_size != 1:
+ aligned_len = len(key) // self.page_size * self.page_size
+ key = key[:aligned_len]
+
+ base_res = super().match_prefix(key, **kwargs)
+ value: torch.Tensor = base_res.device_indices
+ last_node: TreeNode = base_res.last_device_node
+
+ if value.numel() == len(key):
+ return base_res
+
+ uncached_len = len(key) - value.numel()
+ if uncached_len == 0:
+ return base_res
+
+ chunk_size = self.lmcache_connector.chunk_size()
+ prefix_pad = value.numel() % chunk_size
+
+ if self.token_to_kv_pool_allocator.available_size() < uncached_len:
+ self.evict(uncached_len)
+
+ token_slots = self.token_to_kv_pool_allocator.alloc(uncached_len)
+ if token_slots is None:
+ return base_res
+
+ slot_mapping = torch.cat(
+ [
+ torch.full((value.numel(),), -1, dtype=torch.int64, device=self.device),
+ token_slots.detach().clone().to(torch.int64).to(self.device),
+ ]
+ )
+
+ with torch.cuda.stream(self.load_stream):
+ num_retrieved = self.lmcache_connector.start_load_kv(
+ LoadMetadata(
+ token_ids=key, # full page-aligned key
+ slot_mapping=slot_mapping,
+ offset=value.numel() - prefix_pad, # LMCache offset convention
+ )
+ )
+ logger.debug("num_retrieved_tokens: %s", num_retrieved)
+
+ if num_retrieved > 0:
+ self.token_to_kv_pool_allocator.free(
+ token_slots[(num_retrieved - prefix_pad) :]
+ )
+ else:
+ self.token_to_kv_pool_allocator.free(token_slots)
+
+ if num_retrieved > 0:
+ fetched = num_retrieved - prefix_pad
+ new_node = TreeNode()
+ start = value.numel()
+ end = start + fetched
+ new_node.key = key[start:end]
+ new_node.value = token_slots[:fetched]
+ new_node.parent = last_node
+ last_node.children[self.get_child_key_fn(new_node.key)] = new_node
+ last_node = new_node
+
+ value = torch.cat([value, token_slots[:fetched]])
+ self.evictable_size_ += fetched
+
+ self._record_store_event(new_node.parent)
+ self._record_store_event(new_node)
+
+ return MatchResult(
+ device_indices=value,
+ last_device_node=last_node,
+ last_host_node=last_node,
+ )
+
+ return base_res
+
+ def cache_finished_req(self, req: "Req") -> None: # type: ignore[override]
+ """On request completion, insert device KV into radix and store to LMCache."""
+
+ super().cache_finished_req(req)
+
+ token_ids = (req.origin_input_ids + req.output_ids)[:-1]
+ kv_indices = self.req_to_token_pool.req_to_token[
+ req.req_pool_idx, : len(token_ids)
+ ]
+
+ _, new_last_node, _, _ = self.match_prefix(token_ids)
+ assert new_last_node is not None
+
+ self.inc_lock_ref(new_last_node)
+ store_md = StoreMetadata(
+ last_node=new_last_node,
+ token_ids=token_ids,
+ kv_indices=kv_indices,
+ offset=0,
+ )
+ with torch.cuda.stream(self.store_stream):
+ self.lmcache_connector.store_kv(store_md)
+ with self._node_lock:
+ self._in_flight_nodes.append(new_last_node)
+
+ def evict(self, num_tokens: int) -> None: # type: ignore[override]
+ """Before base eviction, wait for any outstanding stores and release locks."""
+ if self.disable:
+ return
+
+ self.store_stream.synchronize()
+ with self._node_lock:
+ for node in self._in_flight_nodes:
+ self.dec_lock_ref(node)
+ self._in_flight_nodes.clear()
+
+ super().evict(num_tokens)
+
+ def pretty_print(self): # type: ignore[override]
+ super().pretty_print()
+ try:
+ logger.debug(
+ "evictable=%d protected=%d", self.evictable_size_, self.protected_size_
+ )
+ except Exception: # pragma: no cover
+ pass
+
+
+if __name__ == "__main__":
+ cache = LMCRadixCache(
+ req_to_token_pool=None,
+ token_to_kv_pool_allocator=None,
+ page_size=1,
+ disable=False,
+ enable_kv_cache_events=False,
+ model_config=None,
+ tp_size=1,
+ rank=0,
+ tp_group=None,
+ )
+ cache.insert([1, 2, 3], torch.tensor([10, 11, 12], dtype=torch.int64))
+ cache.insert([1, 2, 3, 4], torch.tensor([10, 11, 12, 13], dtype=torch.int64))
+ cache.pretty_print()
diff --git a/python/sglang/srt/mem_cache/storage/lmcache/unit_test.py b/python/sglang/srt/mem_cache/storage/lmcache/unit_test.py
new file mode 100644
index 00000000000..68dfe939d69
--- /dev/null
+++ b/python/sglang/srt/mem_cache/storage/lmcache/unit_test.py
@@ -0,0 +1,121 @@
+try:
+ from lmcache.integration.sglang.sglang_adapter import (
+ LMCacheLayerwiseConnector,
+ LoadMetadata,
+ StoreMetadata,
+ )
+except ImportError:
+ raise RuntimeError(
+ "LMCache is not installed. Please install it by running `pip install lmcache` in the root directory of LMCache"
+ )
+
+import os
+
+import torch
+
+from sglang.srt.configs.model_config import ModelConfig
+
+os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True"
+os.environ["LMCACHE_CONFIG_FILE"] = "example_config.yaml"
+
+
+def test_load_store_metadata():
+ model_config = ModelConfig(
+ model_path="Qwen/Qwen3-4B",
+ )
+
+ # Generate Dummy KV Cache
+ head_num = model_config.num_key_value_heads
+ head_dim = model_config.head_dim
+ layer_num = model_config.num_hidden_layers
+ buffer_size = 256
+ input_id_len = 16
+
+ k_buffer = [
+ torch.randn(buffer_size, head_num, head_dim, dtype=torch.bfloat16).cuda()
+ for _ in range(layer_num)
+ ]
+ v_buffer = [
+ torch.randn(buffer_size, head_num, head_dim, dtype=torch.bfloat16).cuda()
+ for _ in range(layer_num)
+ ]
+
+ connector = LMCacheLayerwiseConnector(model_config, 1, 0, k_buffer, v_buffer)
+
+ fake_token_ids = torch.randint(0, model_config.vocab_size, (input_id_len,)).tolist()
+ fake_kv_indices = torch.randint(0, buffer_size, (input_id_len,))
+ offset = 0
+
+ store_metadata = StoreMetadata(
+ last_node=None,
+ token_ids=fake_token_ids,
+ kv_indices=fake_kv_indices,
+ offset=offset,
+ )
+
+ load_metadata = LoadMetadata(
+ token_ids=fake_token_ids,
+ slot_mapping=fake_kv_indices,
+ offset=offset,
+ )
+
+ current_stream = torch.cuda.current_stream()
+
+ retrieve_token_num = connector.start_load_kv(load_metadata)
+ assert retrieve_token_num == 0
+
+ connector.store_kv(store_metadata)
+ current_stream.synchronize()
+
+ # check retrieve
+ gt_key_buffer = [
+ torch.zeros(input_id_len, head_num, head_dim, dtype=torch.bfloat16).cuda()
+ for _ in range(layer_num)
+ ]
+ gt_value_buffer = [
+ torch.zeros(input_id_len, head_num, head_dim, dtype=torch.bfloat16).cuda()
+ for _ in range(layer_num)
+ ]
+
+ for i in range(layer_num):
+ gt_key_buffer[i] = k_buffer[i][fake_kv_indices]
+ gt_value_buffer[i] = v_buffer[i][fake_kv_indices]
+
+ # clear the k_buffer and v_buffer
+ for _ in range(layer_num):
+ k_buffer[i].zero_()
+ v_buffer[i].zero_()
+
+ retrieve_token_num = connector.start_load_kv(load_metadata)
+ assert retrieve_token_num == input_id_len
+
+ for i in range(layer_num):
+ current_stream.synchronize()
+ connector.load_kv_layerwise(i)
+
+ current_stream.synchronize()
+ test_key_buffer = [
+ torch.zeros(input_id_len, head_num, head_dim, dtype=torch.bfloat16).cuda()
+ for _ in range(layer_num)
+ ]
+ test_value_buffer = [
+ torch.zeros(input_id_len, head_num, head_dim, dtype=torch.bfloat16).cuda()
+ for _ in range(layer_num)
+ ]
+
+ for i in range(layer_num):
+ test_key_buffer[i] = k_buffer[i][fake_kv_indices]
+ test_value_buffer[i] = v_buffer[i][fake_kv_indices]
+
+ for i in range(layer_num):
+ assert torch.allclose(test_key_buffer[i], gt_key_buffer[i])
+ assert torch.allclose(test_value_buffer[i], gt_value_buffer[i])
+
+ print("================================================")
+ print("TEST_LOAD_STORE_METADATA PASSED!")
+ print("================================================")
+ connector.close()
+
+
+if __name__ == "__main__":
+ test_load_store_metadata()
diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/README.md b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md
index 6ad71821ead..e815122bd37 100644
--- a/python/sglang/srt/mem_cache/storage/mooncake_store/README.md
+++ b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md
@@ -1,7 +1,12 @@
# Mooncake as L3 KV Cache
This document describes how to use Mooncake as the L3 KV cache for SGLang.
-For more details about Mooncake, please refer to: https://kvcache-ai.github.io/
+
+## About Mooncake
+
+Mooncake aims to enhance the inference efficiency of large language models (LLMs), especially in slow object storage environments, by constructing a multi-level caching pool on high-speed interconnected DRAM/SSD resources. Compared to traditional caching systems, Mooncake utilizes (GPUDirect) RDMA technology to transfer data directly in a zero-copy manner, while maximizing the use of multi-NIC resources on a single machine.
+
+For more details about Mooncake, please refer to [Mooncake project](https://github.com/kvcache-ai/Mooncake) and [Mooncake documents](https://kvcache-ai.github.io/Mooncake/).
## Install Mooncake
@@ -41,31 +46,122 @@ Install Mooncake:
sudo make install
```
-## Use Mooncake
+## Deploy Mooncake
+
+**Mooncake** is a distributed system that efficiently aggregates memory resources across multiple servers. It can also be deployed on a single server for simpler setups.
-Launch Mooncake master server:
+When integrated with **SGLang**, the system conceptually consists of four key components: `the master service`, `metadata service`, `store service`, and the `SGLang server`. Among them, the `master service` and `metadata service` are responsible for object and metadata maintenance. The `store service` manages a contiguous memory segment that contributes to the distributed KV cache, making its memory accessible to both local and remote `SGLang servers`. Data transfer occurs directly between the `store service` and `SGLang servers`, bypassing the `master service`.
+
+### Single Server Deployment
+
+**Launch Mooncake `metadata service`:**
+
+```bash
+python -m mooncake.http_metadata_server
+```
+
+**Launch Mooncake `master service`:**
```bash
mooncake_master
```
-Launch Mooncake meta server:
+**Launch Mooncake `store service`:**
+
+First, create and save a configuration file in JSON format. For example:
+
+```json
+{
+ "local_hostname": "localhost",
+ "metadata_server": "http://localhost:8080/metadata",
+ "master_server_address": "localhost:50051",
+ "protocol": "rdma",
+ "device_name": "mlx5_0,mlx5_1",
+ "global_segment_size": 2684354560,
+ "local_buffer_size": 0
+}
+```
+
+Parameter Explanation:
+
+* `local_hostname`: The hostname of the `store service`.
+* `metadata_server`: The network address of the `metadata service`. The default port is 8080.
+* `master_server_address`: The network address of the `master service`. The default port is 50051.
+* `protocol`: The protocol used by the Mooncake. Supported values are `"rdma"` or `"tcp"`. For optimal performance, `"rdma"` is recommended.
+* `device_name`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command.
+* `global_segment_size`: The amount of memory (in bytes) contributed to the global memory pool. A larger value allows Mooncake to cache more KV tensors.
+* `local_buffer_size`: Local buffer is used to do request operations such as `Get` or `Put`. In this case, it is set to 0 because the instance functions solely as a storage server, contributing memory to the global pool without issuing any request operations.
+
+Then start the `store service`:
```bash
-python -m mooncake.http_metadata_server
+python -m mooncake.mooncake_store_service --config=[config_path]
```
-Start the SGLang server with Mooncake enabled. Mooncake configuration can be provided via environment variables:
+Note: To get started quickly, if `MOONCAKE_GLOBAL_SEGMENT_SIZE` is set to a non-zero value when starting the `SGLang server`, launching the `store service` can be skipped. In this case, the `SGLang server` also fulfills the role of the `store service`.
+
+**Start the `SGLang server` with Mooncake enabled:**
+Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations).
+
+There are two ways to configure Mooncake: 1. Using environment variables; 2. Using extra-config of sglang arguments.
+
+**Using env variables to configure Mooncake**
```bash
MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \
-MOONCAKE_GLOBAL_SEGMENT_SIZE=4294967296 \
-MOONCAKE_LOCAL_BUFFER_SIZE=134217728 \
-MOONCAKE_PROTOCOL="rdma" \
-MOONCAKE_DEVICE="erdma_0,erdma_1" \
MOONCAKE_MASTER=127.0.0.1:50051 \
+MOONCAKE_PROTOCOL="rdma" \
+MOONCAKE_DEVICE="mlx5_0,mlx5_1" \
+MOONCAKE_GLOBAL_SEGMENT_SIZE=4294967296 \
python -m sglang.launch_server \
--enable-hierarchical-cache \
--hicache-storage-backend mooncake\
--model-path [model_path]
```
+
+Parameter Explanation:
+
+* `MOONCAKE_TE_META_DATA_SERVER`: The network address of the `metadata service`. The default port is 8080.
+* `MOONCAKE_MASTER`: The network address of the `master service`. The default port is 50051.
+* `MOONCAKE_PROTOCOL`: The protocol used by Mooncake. Supported values are `"rdma"` or `"tcp"`. For optimal performance, `"rdma"` is recommended.
+* `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command.
+* `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors.
+
+**Using extra-config of sglang arguments to configure Mooncake**
+
+```bash
+python -m sglang.launch_server \
+ --enable-hierarchical-cache \
+ --hicache-storage-backend mooncake \
+ --model-path [model_path] \
+ --hicache-storage-backend-extra-config '{"master_server_address": "127.0.0.1:50051", "local_hostname": "localhost", "metadata_server": "http://127.0.0.1:8080/metadata", "global_segment_size": 4294967296, "local_buffer_size": 16777216, "protocol": "rdma", "device_name": "mlx5_0,mlx5_1"}'
+```
+
+**Important: Understanding Global Segment Size**
+
+`global_segment_size` for `store service` and `MOONCAKE_GLOBAL_SEGMENT_SIZE` for `SGLang service`: This parameter specifies the amount of memory each instance contributes to the distributed memory pool. The total memory available for KV cache storage across the cluster is the sum of the memory contributed by all instances.
+
+Adjust this value according to system’s available memory and expected cache requirements.
+
+### Distributed Deployment
+
+Distributed deployment of Mooncake is straightforward. Similar to the single-node setup, start one `metadata service` and one `master service` for this cluster. Then start a `store service` on each server.
+
+Mooncake also supports high availability mode. This mode enhances fault tolerance by running the `master service` as a cluster of multiple master nodes coordinated through an `etcd` cluster. The master nodes use `etcd` to elect a leader, which is responsible for handling client requests. For more details about how to deploy in this mode, please refer to our [documents](https://kvcache-ai.github.io/Mooncake/) .
+
+## Test Mooncake Store
+
+This test is intended for developers to quickly verify that the MooncakeStore class interfaces are functioning correctly.
+
+First, start the `metadata service` and `master service`. Then run the `test_mooncake_store.py`. 16MB global segments size is enough to run this test.
+
+```bash
+MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \
+MOONCAKE_MASTER=127.0.0.1:50051 \
+MOONCAKE_PROTOCOL="rdma" \
+MOONCAKE_DEVICE="mlx5_0,mlx5_1" \
+MOONCAKE_GLOBAL_SEGMENT_SIZE=16777216 \
+python3 [path of test_mooncake_store.py]
+```
+
+If all tests pass, the message "✅ All tests passed" will be printed at the end.
diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py
index 51b47335e5c..caab04b5cfc 100644
--- a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py
+++ b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py
@@ -1,4 +1,3 @@
-import hashlib
import json
import logging
import os
@@ -6,29 +5,16 @@
from dataclasses import dataclass
from typing import Any, List, Optional
-import numpy as np
import torch
-from sglang.srt.distributed import get_tensor_model_parallel_rank
-from sglang.srt.mem_cache.hicache_storage import HiCacheStorage
+from sglang.srt.mem_cache.hicache_storage import HiCacheStorage, HiCacheStorageConfig
DEFAULT_GLOBAL_SEGMENT_SIZE = 4 * 1024 * 1024 * 1024 # 4 GiB
-DEFAULT_LOCAL_BUFFER_SIZE = 128 * 1024 * 1024 # 128 MB
+DEFAULT_LOCAL_BUFFER_SIZE = 16 * 1024 * 1024 # 16 MB
logger = logging.getLogger(__name__)
-def get_hash_str_mooncake(token_ids: List[int], prior_hash: str = None):
- local_rank = get_tensor_model_parallel_rank()
- prefix_str = ""
- if prior_hash:
- prefix_str = hashlib.sha256(prior_hash.encode()).hexdigest()
- current_token_ids_bytes = np.array(token_ids).tobytes()
- current_hash_object = hashlib.sha256(current_token_ids_bytes)
- current_hash_hex = current_hash_object.hexdigest()
- return f"{prefix_str}_{int(current_hash_hex[:16], 16)}_{local_rank}"
-
-
@dataclass
class MooncakeStoreConfig:
local_hostname: str
@@ -55,9 +41,8 @@ def from_file() -> "MooncakeStoreConfig":
global_segment_size=config.get(
"global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE
),
- local_buffer_size=config.get(
- "local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE
- ),
+ # Zero copy interface does not need local buffer
+ local_buffer_size=DEFAULT_LOCAL_BUFFER_SIZE,
protocol=config.get("protocol", "tcp"),
device_name=config.get("device_name", "auto"),
master_server_address=config.get("master_server_address"),
@@ -80,14 +65,33 @@ def load_from_env() -> "MooncakeStoreConfig":
global_segment_size=int(
os.getenv("MOONCAKE_GLOBAL_SEGMENT_SIZE", DEFAULT_GLOBAL_SEGMENT_SIZE)
),
- local_buffer_size=int(
- os.getenv("MOONCAKE_LOCAL_BUFFER_SIZE", DEFAULT_LOCAL_BUFFER_SIZE)
- ),
+ # Zero copy interface does not need local buffer
+ local_buffer_size=DEFAULT_LOCAL_BUFFER_SIZE,
protocol=os.getenv("MOONCAKE_PROTOCOL", "tcp"),
device_name=os.getenv("MOONCAKE_DEVICE", "auto"),
master_server_address=os.getenv("MOONCAKE_MASTER"),
)
+ @staticmethod
+ def load_from_extra_config(extra_config: dict) -> "MooncakeStoreConfig":
+ """Load config from extra_config dictionary."""
+ if "master_server_address" not in extra_config:
+ raise ValueError("master_server_address is required in extra_config")
+
+ return MooncakeStoreConfig(
+ local_hostname=extra_config.get("local_hostname", "localhost"),
+ metadata_server=extra_config.get("metadata_server", "P2PHANDSHAKE"),
+ global_segment_size=extra_config.get(
+ "global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE
+ ),
+ local_buffer_size=extra_config.get(
+ "local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE
+ ),
+ protocol=extra_config.get("protocol", "tcp"),
+ device_name=extra_config.get("device_name", "auto"),
+ master_server_address=extra_config["master_server_address"],
+ )
+
def __post_init__(self):
if self.device_name == "auto":
os.environ["MC_MS_AUTO_DISC"] = "1"
@@ -97,7 +101,7 @@ def __post_init__(self):
class MooncakeStore(HiCacheStorage):
- def __init__(self):
+ def __init__(self, storage_config: HiCacheStorageConfig = None):
try:
from mooncake.store import MooncakeDistributedStore
except ImportError as e:
@@ -109,8 +113,26 @@ def __init__(self):
try:
self.store = MooncakeDistributedStore()
- self.config = MooncakeStoreConfig.load_from_env()
- logger.info("Mooncake Configuration loaded from env successfully.")
+
+ extra_config = (
+ getattr(storage_config, "extra_config", None)
+ if storage_config
+ else None
+ )
+ # Load configuration with master_server_address prioritized from extra_config if available
+ if (
+ extra_config is not None
+ and extra_config.get("master_server_address") is not None
+ ):
+ # Load from extra_config
+ self.config = MooncakeStoreConfig.load_from_extra_config(extra_config)
+ logger.info(
+ "Mooncake Configuration loaded from extra_config successfully."
+ )
+ else:
+ # Load from environment variables
+ self.config = MooncakeStoreConfig.load_from_env()
+ logger.info("Mooncake Configuration loaded from env successfully.")
ret_code = self.store.setup(
self.config.local_hostname,
@@ -128,6 +150,13 @@ def __init__(self):
self.warmup()
logger.info("Mooncake store warmup successfully.")
+ if storage_config is not None:
+ self.is_mla_backend = storage_config.is_mla_model
+ self.local_rank = storage_config.tp_rank
+ else:
+ self.is_mla_backend = False
+ self.local_rank = 0
+
except ValueError as e:
logger.error("Configuration loading failed: %s", e)
raise
@@ -137,12 +166,10 @@ def __init__(self):
def warmup(self):
warmup_key = "sglang_mooncake_store_warmup_key" + uuid.uuid4().hex
- # 10 MB
- warmup_value = bytes(10 * 1024 * 1024)
- self.store.put(warmup_key, warmup_value)
+ warmup_value = bytes(4 * 1024) # 4 KB
+ assert self.store.put(warmup_key, warmup_value) == 0
assert self.store.is_exist(warmup_key) == 1
- self.store.get(warmup_key)
- self.store.remove(warmup_key)
+ assert self.store.get(warmup_key) == warmup_value
def register_buffer(self, buffer: torch.Tensor) -> None:
try:
@@ -162,77 +189,118 @@ def set(
target_location: Optional[List[int]] = None,
target_sizes: Optional[List[int]] = None,
) -> bool:
- assert len(key) == len(target_location) == len(target_sizes)
- if len(key) == 0:
- return
-
- for i in range(len(key)):
- if key[i] is None or target_location[i] is None or target_sizes[i] is None:
- return
-
- self._put_batch_zero_copy_impl(key, target_location, target_sizes)
+ # Only support zero copy set for now
+ assert target_location is not None and target_sizes is not None
+ exist_result = self._batch_exist([key])
+ if exist_result[0] == 1:
+ return True
+ put_result = self._put_batch_zero_copy_impl(
+ [key], [target_location], [target_sizes]
+ )
+ return put_result[0] == 0
def batch_set(
self,
keys: List[str],
- value: Optional[Any] = None,
- target_location: Optional[List[int]] = None,
+ values: Optional[List[torch.Tensor]] = None,
+ target_locations: Optional[List[int]] = None,
target_sizes: Optional[List[int]] = None,
) -> bool:
- assert len(keys) == len(target_location) == len(target_sizes)
+ # Only support zero copy set for now
+ assert target_locations is not None and target_sizes is not None
+ assert len(keys) == len(target_locations) == len(target_sizes)
+
if len(keys) == 0:
- return
+ return False
for i in range(len(keys)):
- if keys[i] is None or target_location[i] is None or target_sizes[i] is None:
- return
+ if (
+ keys[i] is None
+ or target_locations[i] is None
+ or target_sizes[i] is None
+ ):
+ return False
+
+ exist_result = self._batch_exist(keys)
+ set_keys = []
+ set_target_locations = []
+ set_target_sizes = []
+ set_indices = []
+ for i in range(len(keys)):
+ if exist_result[i] != 1:
+ set_keys.append(keys[i])
+ set_target_locations.append(target_locations[i])
+ set_target_sizes.append(target_sizes[i])
+ set_indices.append(i)
+ # Only set non-existing keys to storage
+ put_result = self._put_batch_zero_copy_impl(
+ set_keys, set_target_locations, set_target_sizes
+ )
+ for i in range(len(set_indices)):
+ if put_result[i] == 0:
+ exist_result[set_indices[i]] = 1
- self._put_batch_zero_copy_impl(keys, target_location, target_sizes)
+ success_count = 0
+ for i in range(len(keys)):
+ if exist_result[i] == 0:
+ break
+ success_count += 1
+ # TODO: return the number of consecutive successful operations from the start.
+ return success_count == len(keys)
def get(
self,
key,
target_location: Optional[Any] = None,
target_sizes: Optional[Any] = None,
- ) -> torch.Tensor | None:
- assert len(key) == len(target_location) == len(target_sizes)
- if len(key) == 0:
- return
-
- for i in range(len(key)):
- if key[i] is None or target_location[i] is None or target_sizes[i] is None:
- return
-
- return self._get_batch_zero_copy_impl(key, target_location, target_sizes)
+ ) -> bool:
+ assert target_location is not None and target_sizes is not None
+ get_result = self._get_batch_zero_copy_impl(
+ [key], [target_location], [target_sizes]
+ )
+ return get_result[0] >= 0
def batch_get(
self,
keys: List[str],
- target_location: Optional[Any] = None,
+ target_locations: Optional[Any] = None,
target_sizes: Optional[Any] = None,
- ) -> torch.Tensor | None:
- assert len(keys) == len(target_location) == len(target_sizes)
+ ) -> int:
+ assert len(keys) == len(target_locations) == len(target_sizes)
if len(keys) == 0:
- return
-
+ return 0
+ get_result = self._get_batch_zero_copy_impl(
+ keys, target_locations, target_sizes
+ )
+ if self.is_mla_backend:
+ key_multiplier = 1
+ else:
+ key_multiplier = 2
for i in range(len(keys)):
- if keys[i] is None or target_location[i] is None or target_sizes[i] is None:
- return
-
- return self._get_batch_zero_copy_impl(keys, target_location, target_sizes)
-
- def exists(self, keys) -> bool | dict:
- _keys = []
- for key in keys:
- if key is None:
- return None
-
- _keys.append(f"{key}_k")
- result = {k: v for k, v in zip(keys, self.store.batch_is_exist(_keys))}
- return result
-
- def delete(self, key) -> None:
- raise (NotImplementedError)
+ if get_result[i] < 0:
+ return i // key_multiplier
+ return len(keys) // key_multiplier
+
+ def exists(self, key) -> bool:
+ exist_result = self._batch_exist([key])
+ return exist_result[0] == 1
+
+ def batch_exists(self, keys) -> int:
+ if self.is_mla_backend:
+ query_keys = [f"{key}_k" for key in keys]
+ key_multiplier = 1
+ else:
+ query_keys = []
+ for key in keys:
+ query_keys.append(f"{key}_{self.local_rank}_k")
+ query_keys.append(f"{key}_{self.local_rank}_v")
+ key_multiplier = 2
+
+ exist_result = self._batch_exist(query_keys)
+ for i in range(len(query_keys)):
+ if exist_result[i] != 1:
+ return i // key_multiplier
+ return len(query_keys) // key_multiplier
def close(self):
# MooncakeDistributedStore will automatically call the destructor, so
@@ -240,22 +308,17 @@ def close(self):
pass
def clear(self) -> None:
- raise (NotImplementedError)
+ self.store.remove_all()
def _put_batch_zero_copy_impl(
self, key_strs: List[str], buffer_ptrs: List[int], buffer_sizes: List[int]
- ) -> None:
- try:
- self.store.batch_put_from(key_strs, buffer_ptrs, buffer_sizes)
- except TypeError as err:
- logger.error("Failed to put value to Mooncake Store: %s", err)
- raise TypeError("Mooncake Store Put Type Error.") from err
+ ) -> List[int]:
+ return self.store.batch_put_from(key_strs, buffer_ptrs, buffer_sizes)
def _get_batch_zero_copy_impl(
self, key_strs: List[str], buffer_ptrs: List[int], buffer_sizes: List[int]
- ) -> None:
- try:
- self.store.batch_get_into(key_strs, buffer_ptrs, buffer_sizes)
- except TypeError as err:
- logger.error("Failed to get value from Mooncake Store: %s", err)
- raise TypeError("Mooncake Store Get Type Error.") from err
+ ) -> List[int]:
+ return self.store.batch_get_into(key_strs, buffer_ptrs, buffer_sizes)
+
+ def _batch_exist(self, key_strs: List[str]) -> List[int]:
+ return self.store.batch_is_exist(key_strs)
diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/test_mooncake_store.py b/python/sglang/srt/mem_cache/storage/mooncake_store/test_mooncake_store.py
new file mode 100644
index 00000000000..3083abe22cf
--- /dev/null
+++ b/python/sglang/srt/mem_cache/storage/mooncake_store/test_mooncake_store.py
@@ -0,0 +1,161 @@
+import logging
+import uuid
+
+import torch
+from mooncake_store import MooncakeStore
+
+from sglang.srt.mem_cache.hicache_storage import HiCacheStorageConfig
+
+logging.basicConfig(
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+def generate_batch_query_keys(kv_num: int, config: HiCacheStorageConfig):
+ keys = []
+ for _ in range(kv_num):
+ key = "test_" + str(uuid.uuid4())
+ keys.append(key)
+ set_keys = []
+ for key in keys:
+ if config.is_mla_model:
+ set_keys.append(key + "_k")
+ else:
+ set_keys.append(key + f"_{config.tp_rank}_k")
+ set_keys.append(key + f"_{config.tp_rank}_v")
+ get_keys = set_keys
+ exist_keys = keys
+ return set_keys, get_keys, exist_keys
+
+
+def test_single_operation():
+ """Test the set API with a single key-value pair."""
+ print("=" * 100)
+ print("Testing single operation")
+
+ buffer_size = 1024 * 1024 * 16 # 16MB
+ value_elements = 1024
+ store = MooncakeStore()
+ buffer = torch.randn(buffer_size, dtype=torch.float32)
+ store.register_buffer(buffer)
+ value_size = value_elements * buffer.element_size()
+
+ key = str(uuid.uuid4())
+ set_slice = buffer[:value_elements]
+ get_slice = buffer[value_elements : 2 * value_elements]
+ set_location = set_slice.data_ptr()
+ get_location = get_slice.data_ptr()
+
+ # Test set operation
+ result = store.set(key, target_location=set_location, target_sizes=value_size)
+ assert result is True, f"❌set operation failed for key: {key}"
+
+ # Test exists operation
+ assert store.exists(key), f"❌key {key} should exist after set operation"
+
+ # Test get operation
+ result = store.get(key, target_location=get_location, target_sizes=value_size)
+ assert result is True, f"❌get operation failed for key: {key}"
+
+ # Compare the data using proper tensor indices
+ assert torch.allclose(
+ set_slice, get_slice, atol=1e-6
+ ), f"❌get operation failed for key: {key}"
+
+ logger.info(f"✅ Single operation passed")
+
+
+def test_batch_operation(config: HiCacheStorageConfig):
+ """Test the batch set/get APIs with multiple key-value pairs."""
+ print("=" * 100)
+ print(f"Testing batch operation with config: {config}")
+
+ buffer_size = 1024 * 1024 * 16 # 16MB
+ value_elements = 256
+ kv_num = 13
+ store = MooncakeStore(config)
+ buffer = torch.randn(buffer_size, dtype=torch.float32)
+ store.register_buffer(buffer)
+ value_size = value_elements * buffer.element_size()
+
+ set_keys, get_keys, exist_keys = generate_batch_query_keys(kv_num, config)
+ set_slices = [
+ buffer[i * value_elements : (i + 1) * value_elements]
+ for i in range(len(set_keys))
+ ]
+ set_locations = [set_slice.data_ptr() for set_slice in set_slices]
+ target_sizes = [value_size for _ in range(len(set_keys))]
+
+ # Test batch set operation
+ result = store.batch_set(
+ set_keys, target_locations=set_locations, target_sizes=target_sizes
+ )
+ assert result is True, f"❌batch set operation failed"
+
+ # Test batch exists operation
+ assert store.batch_exists(
+ exist_keys
+ ), f"❌keys should exist after batch set operation"
+
+ # Test batch get operation
+ get_slices = [
+ buffer[
+ (len(set_keys) + i)
+ * value_elements : (len(set_keys) + i + 1)
+ * value_elements
+ ]
+ for i in range(len(get_keys))
+ ]
+ get_locations = [get_slice.data_ptr() for get_slice in get_slices]
+ result = store.batch_get(
+ get_keys, target_locations=get_locations, target_sizes=target_sizes
+ )
+ assert result == kv_num, f"❌batch get operation failed"
+ for i in range(len(get_keys)):
+ assert torch.allclose(
+ set_slices[i], get_slices[i], atol=1e-6
+ ), f"❌batch get operation failed for key: {get_keys[i]}"
+
+ logger.info(f"✅ Batch operation passed")
+
+
+if __name__ == "__main__":
+ test_single_operation()
+ test_batch_operation(
+ HiCacheStorageConfig(
+ is_mla_model=False,
+ tp_rank=0,
+ tp_size=1,
+ model_name=None,
+ is_page_first_layout=True,
+ )
+ )
+ test_batch_operation(
+ HiCacheStorageConfig(
+ is_mla_model=True,
+ tp_rank=0,
+ tp_size=1,
+ model_name=None,
+ is_page_first_layout=True,
+ )
+ )
+ test_batch_operation(
+ HiCacheStorageConfig(
+ is_mla_model=False,
+ tp_rank=1,
+ tp_size=4,
+ model_name=None,
+ is_page_first_layout=True,
+ )
+ )
+ test_batch_operation(
+ HiCacheStorageConfig(
+ is_mla_model=True,
+ tp_rank=3,
+ tp_size=8,
+ model_name=None,
+ is_page_first_layout=True,
+ )
+ )
+ logger.info(f"✅ All tests passed")
diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/unit_test.py b/python/sglang/srt/mem_cache/storage/mooncake_store/unit_test.py
deleted file mode 100644
index 801b0ec1bc3..00000000000
--- a/python/sglang/srt/mem_cache/storage/mooncake_store/unit_test.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import torch
-from mooncake_store import MooncakeStore
-
-
-def test_init_and_warmup():
- store = MooncakeStore()
- assert store.store is not None
-
-
-def test_register_buffer():
- store = MooncakeStore()
- tensor = torch.zeros(1024, dtype=torch.float32)
- store.register_buffer(tensor)
-
-
-def test_set_and_get():
- store = MooncakeStore()
-
- key = ["test_key_" + str(i) for i in range(2)]
- tensor = torch.arange(256, dtype=torch.float32).cuda()
- ptrs = [tensor.data_ptr(), tensor.data_ptr()]
- sizes = [tensor.numel() * tensor.element_size()] * 2
-
- store.set(key, target_location=ptrs, target_sizes=sizes)
- store.get(key, target_location=ptrs, target_sizes=sizes)
-
-
-def test_exists():
- store = MooncakeStore()
- keys = ["test_key_0", "non_existent_key"]
- result = store.exists(keys)
- assert isinstance(result, dict)
- assert "test_key_0" in result
-
-
-if __name__ == "__main__":
- test_init_and_warmup()
- test_register_buffer()
- test_set_and_get()
- test_exists()
diff --git a/python/sglang/srt/mem_cache/storage/nixl/README.md b/python/sglang/srt/mem_cache/storage/nixl/README.md
index b00e0774e33..d33cd5d0542 100644
--- a/python/sglang/srt/mem_cache/storage/nixl/README.md
+++ b/python/sglang/srt/mem_cache/storage/nixl/README.md
@@ -36,6 +36,21 @@ Consolidated utility classes:
- **NixlRegistration** - Manages memory registration for tensors, files and objects
- **NixlFileManager** - Handles file system operations and NIXL tuple creation
+## Using NIXL for HiCache backend
+When running the SGLang server, indicate `nixl` for `hicache-storage-backend` parameter, for instance:
+
+```bash
+python3 -m sglang.launch_server --model-path --host --port --page-size 64 --enable-hierarchical-cache --hicache-ratio 2 --hicache-size 64 --hicache-write-policy write_through --hicache-storage-backend nixl
+```
+
+To customize the base directory for files, you can set the following environment variable:
+
+```bash
+export SGLANG_HICACHE_NIXL_BACKEND_STORAGE_DIR=/path/to/desired/dir
+```
+
+Selection of any storage backend like 3FS requires availability of that library on the system, and the backend is selected based on the priority mentioned above.
+
## Running Unit Tests
### Prerequisites
@@ -43,33 +58,26 @@ Consolidated utility classes:
- PyTorch installed
- Python 3.8+
-### Unit tests from Project root
-Navigate to the project root directory (`/path/to/sglang`) and run:
+### Unit tests from current directory
+From the current directory run:
#### Run all NIXL tests:
```bash
-PYTHONPATH=. python -m pytest test/srt/test_hicache_nixl_storage.py -o asyncio_mode=strict
+PYTHONPATH=. python -m pytest test_hicache_nixl_storage.py -o asyncio_mode=strict
```
#### Run with verbose output:
```bash
-PYTHONPATH=. python -m pytest test/srt/test_hicache_nixl_storage.py -v -o asyncio_mode=strict
+PYTHONPATH=. python -m pytest test_hicache_nixl_storage.py -v -o asyncio_mode=strict
```
Note: The `-v` flag provides more detailed output, showing each test case name and its result.
#### Run a specific test:
```bash
-PYTHONPATH=. python -m pytest test/srt/test_hicache_nixl_storage.py -v -k test_single_set_get -o asyncio_mode=strict
+PYTHONPATH=. python -m pytest test_hicache_nixl_storage.py -v -k test_single_set_get -o asyncio_mode=strict
```
-### From Tests Directory
-Navigate to the tests directory and run:
-
-```bash
-cd test/srt
-PYTHONPATH=../.. python -m pytest test_hicache_nixl_storage.py -o asyncio_mode=strict
-```
Note: The `-o asyncio_mode=strict` flag is added to suppress warnings about asyncio configuration. This is not required for test functionality but provides cleaner output.
## Test Coverage
diff --git a/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py b/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
index 35d8ec38ad4..327c905025c 100644
--- a/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
@@ -3,7 +3,7 @@
import os
import time
import uuid
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
import torch
@@ -28,6 +28,8 @@ class HiCacheNixl(HiCacheStorage):
def __init__(self, file_path: str = "/tmp/hicache_storage", plugin: str = "auto"):
"""Initialize NIXL storage connector."""
+ # Might be better to be unified across HiCache backends and moved to HiCacheController
+ file_path = os.getenv("SGLANG_HICACHE_NIXL_BACKEND_STORAGE_DIR", file_path)
self.file_manager = (
NixlFileManager(file_path)
if plugin not in NixlBackendSelection.OBJ_PLUGINS
@@ -44,59 +46,109 @@ def __init__(self, file_path: str = "/tmp/hicache_storage", plugin: str = "auto"
self.registration = NixlRegistration(self.agent)
+ def register_buffers(
+ self, buffers: Union[torch.Tensor, List[torch.Tensor], List[tuple]]
+ ) -> Optional[Any]:
+ """Register tensor(s) or target locations in host memory (list of addr,len tuples) with NIXL."""
+ if isinstance(buffers[0], tuple):
+ tuples = [(x[0], x[1], 0, "") for x in buffers]
+ return self.registration._register_memory(tuples, "DRAM")
+ else:
+ return self.registration._register_memory(buffers)
+
+ def register_files(
+ self, file_paths: List[str], open_file: Optional[bool] = True
+ ) -> Optional[Any]:
+ """Register files with NIXL."""
+ tuples = self.file_manager.files_to_nixl_tuples(file_paths)
+ return self.registration._register_memory(tuples, "FILE")
+
+ def register_objects(
+ self, keys: List[str], sizes: Optional[List[int]] = None
+ ) -> Optional[Any]:
+ """Register objects with NIXL."""
+ if not keys:
+ return None
+ tuples = [(0, 0, key, "") for key in keys]
+ return self.registration._register_memory(tuples, "OBJ")
+
def _execute_transfer(
- self, tensors: List[torch.Tensor], keys: List[str], direction: str
+ self,
+ buffers: Optional[List[torch.Tensor | tuple]],
+ keys: List[str],
+ direction: str,
) -> bool:
- if len(tensors) != len(keys):
- logger.error("Mismatch between number of tensors and files/objects")
+ if len(buffers) != len(keys):
+ logger.error("Mismatch between number of tensors/buffers and files/objects")
return False
- if not self.registration.register_buffers(tensors):
- logger.error("Failed to register tensors")
- return False
-
- # Get transfer tuples based on backend type
- tensor_sizes = [tensor.element_size() * tensor.numel() for tensor in tensors]
+ # Registering file and object keys per transfer, to be updated when
+ # pre-registration for file and object is added to HiCache.
if self.backend_selector.mem_type == "FILE":
- file_tuples = self.file_manager.files_to_nixl_tuples(keys)
- if not file_tuples or not self.registration.register_files(file_tuples):
+ tuples = self.file_manager.files_to_nixl_tuples(keys)
+ if not tuples or not self.registration._register_memory(tuples, "FILE"):
logger.error("Failed to prepare files for transfer")
return False
- transfer_tuples = [
- (x[0], s, x[2]) for x, s in zip(file_tuples, tensor_sizes)
- ]
- else:
- if not self.registration.register_objects(keys, tensors):
+ else: # mem_type == "OBJ"
+ tuples = [(0, 0, key, "") for key in keys]
+ if not tuples or not self.registration._register_memory(tuples, "OBJ"):
logger.error("Failed to register objects")
return False
- transfer_tuples = [(0, s, key) for s, key in zip(tensor_sizes, keys)]
+ # Prepare transfer descriptors
+ if isinstance(buffers[0], torch.Tensor):
+ tensor_sizes = [
+ tensor.element_size() * tensor.numel() for tensor in buffers
+ ]
+ storage_tuples = [(x[0], s, x[2]) for x, s in zip(tuples, tensor_sizes)]
+ host_descs = self.agent.get_xfer_descs(buffers)
+ elif isinstance(buffers[0], tuple):
+ storage_tuples = [(x[0], y[1], x[2]) for x, y in zip(tuples, buffers)]
+ host_descs = self.agent.get_xfer_descs(
+ [(x[0], x[1], 0) for x in buffers], "DRAM"
+ )
+ else:
+ return False
+
+ storage_descs = self.agent.get_xfer_descs(
+ storage_tuples, self.backend_selector.mem_type
+ )
+
+ if (host_descs is None) or (storage_descs is None):
+ logger.error("Failed to get transfer descriptors")
+ return False
+
+ # Initialize transfer, default assumption that tensor was registered
try:
- # Get transfer descriptors
- if (tensor_descs := self.agent.get_xfer_descs(tensors)) is None or (
- file_descs := self.agent.get_xfer_descs(
- transfer_tuples, self.backend_selector.mem_type
- )
- ) is None:
- logger.error("Failed to get transfer descriptors")
+ xfer_req = self.agent.initialize_xfer(
+ direction, host_descs, storage_descs, self.agent_name
+ )
+ except Exception:
+ # Check if it was due to missing pre-registration
+ if not self.register_buffers(buffers):
+ logger.error("Failed to register tensors/buffers")
return False
- # Initialize and execute transfer
- if (
- xfer_req := self.agent.initialize_xfer(
- direction, tensor_descs, file_descs, self.agent_name
+ try:
+ xfer_req = self.agent.initialize_xfer(
+ direction, host_descs, storage_descs, self.agent_name
)
- ) is None:
- logger.error("Failed to create transfer request")
+ except Exception as e:
+ logger.error(f"Failed to create transfer request: {e}")
return False
+ # Execute transfer and wait for its completion
+ try:
state = self.agent.transfer(xfer_req)
while state != "DONE":
state = self.agent.check_xfer_state(xfer_req)
if state == "ERR":
+ self.agent.release_xfer_handle(xfer_req)
logger.error("Transfer failed")
return False
- time.sleep(0.0001) # Can be changed to os.sched_yield() or parametrized
+ time.sleep(0.0001) # Can be changed to os.sched_yield() or parametrized
+
+ self.agent.release_xfer_handle(xfer_req)
return True
except Exception as e:
@@ -106,45 +158,87 @@ def _execute_transfer(
logger.error(f"Traceback: {traceback.format_exc()}")
return False
- def batch_set(self, keys: List[str], values: List[torch.Tensor]) -> bool:
- if not keys:
- return True
-
- if self.backend_selector.mem_type == "FILE":
- file_paths = []
- for key in keys:
- tensor_path = self.file_manager.get_file_path(key)
- if not self.file_manager.create_file(tensor_path):
- logger.error(f"Failed to create file {tensor_path}")
- return False
- file_paths.append(tensor_path)
- return self._execute_transfer(values, file_paths, "WRITE")
- else:
- return self._execute_transfer(values, keys, "WRITE")
-
- def set(self, key: str, value: torch.Tensor) -> bool:
- return self.batch_set([key], [value])
-
def get(
- self, key: str, dst_tensor: Optional[torch.Tensor] = None
+ self,
+ key: str,
+ target_location: Optional[torch.Tensor | int] = None,
+ target_sizes: Optional[int] = None,
) -> torch.Tensor | None:
- if dst_tensor is None: # To be removed, being compatible with the current API
+ # To be removed, being compatible with the current API
+ if target_location is None:
return None
- result = self.batch_get([key], [dst_tensor])
+ if target_sizes:
+ result = self.batch_get([key], [target_location], [target_sizes])
+ else:
+ result = self.batch_get([key], [target_location])
return result[0] if result else None
def batch_get(
- self, keys: List[str], dst_tensors: List[torch.Tensor]
- ) -> List[Optional[torch.Tensor]]:
+ self,
+ keys: List[str],
+ target_locations: Optional[List[torch.Tensor | int]] = None,
+ target_sizes: Optional[List[int]] = None,
+ ) -> List[torch.Tensor | None]:
if not keys:
return []
+ # To be removed, being compatible with the current API
+ if not target_locations:
+ return [None] * len(keys)
+
+ if target_sizes and (len(target_sizes) != len(target_locations)):
+ logger.error("Mismatch between number of target_locations and target_sizes")
+ return [None] * len(keys)
+ if target_sizes:
+ dest = list(zip(target_locations, target_sizes))
+ else:
+ dest = target_locations
+
if self.backend_selector.mem_type == "FILE":
file_paths = [self.file_manager.get_file_path(key) for key in keys]
- success = self._execute_transfer(dst_tensors, file_paths, "READ")
+ success = self._execute_transfer(dest, file_paths, "READ")
else:
- success = self._execute_transfer(dst_tensors, keys, "READ")
- return dst_tensors if success else [None] * len(keys)
+ success = self._execute_transfer(dest, keys, "READ")
+ return target_locations if success and not target_sizes else [None] * len(keys)
+
+ def set(
+ self,
+ key: str,
+ value: Optional[torch.Tensor] = None,
+ target_location: Optional[int] = None,
+ target_sizes: Optional[int] = None,
+ ) -> bool:
+ if target_location and target_sizes:
+ return self.batch_set([key], None, [target_location], [target_sizes])
+ else:
+ return self.batch_set([key], [value])
+
+ def batch_set(
+ self,
+ keys: List[str],
+ values: Optional[List[torch.Tensor]] = None,
+ target_locations: Optional[List[int]] = None,
+ target_sizes: Optional[List[int]] = None,
+ ) -> bool:
+ if not keys or (not values and (not target_locations or not target_sizes)):
+ logger.error("Keys or values were not passed")
+ return False
+
+ if not values:
+ values = list(zip(target_locations, target_sizes))
+
+ if self.backend_selector.mem_type == "FILE":
+ file_paths = []
+ for key in keys:
+ file_path = self.file_manager.get_file_path(key)
+ # New file per set, to be updated when partial writes is added to HiCache
+ if not self.file_manager.create_file(file_path):
+ logger.error(f"Failed to create file {file_path}")
+ return False
+ file_paths.append(file_path)
+ return self._execute_transfer(values, file_paths, "WRITE")
+ else: # mem_type == "OBJ"
+ return self._execute_transfer(values, keys, "WRITE")
def exists(self, key: str) -> bool:
tuples = self.registration.create_query_tuples(
diff --git a/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py b/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
index 476aed3a475..6e3d2a900cc 100644
--- a/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
@@ -109,66 +109,35 @@ def create_query_tuples(
return [(0, 0, key)]
def _register_memory(
- self, items: Union[List[tuple], List[torch.Tensor]], mem_type: str, desc: str
+ self,
+ items: Union[List[tuple], torch.Tensor, List[torch.Tensor]],
+ mem_type: Optional[str] = None,
) -> Optional[Any]:
"""Common registration logic for files, objects, and buffers.
Args:
items: List of tuples or tensors to register
- mem_type: Memory type ("FILE", "OBJ", "DRAM", "VRAM")
- desc: Description for logging
+ mem_type: Memory type ("FILE", "OBJ") or None for tensor or list of tensors
"""
- try:
- if not items:
- return None
-
- reg_descs = self.agent.get_reg_descs(items, mem_type)
- if reg_descs is None:
- logger.error("Failed to create registration descriptors")
- return None
-
- registered_memory = self.agent.register_memory(reg_descs)
- if registered_memory:
- return registered_memory
- else:
- logger.error("Failed to register with NIXL")
- return None
-
- except Exception as e:
- logger.error(f"Failed to register {desc}: {e}")
+ if isinstance(items, list) and not items:
return None
- def register_buffers(
- self, buffers: Union[torch.Tensor, List[torch.Tensor]]
- ) -> Optional[Any]:
- """Register tensors/buffers with NIXL."""
- if isinstance(buffers, torch.Tensor):
- buffers = [buffers]
-
- if not buffers:
+ reg_descs = self.agent.get_reg_descs(items, mem_type)
+ if reg_descs is None:
+ logger.error("Failed to create registration descriptors")
return None
- # Determine memory type based on tensor device
- mem_type = "VRAM" if buffers[0].device.type == "cuda" else "DRAM"
- return self._register_memory(buffers, mem_type, "buffers")
-
- def register_files(self, tuples: List[tuple]) -> Optional[Any]:
- """Register files with NIXL using (0, 0, fd, file_path) tuples."""
- return self._register_memory(tuples, "FILE", "files")
-
- def register_objects(
- self, keys: List[str], tensors: Optional[List[torch.Tensor]] = None
- ) -> Optional[Any]:
- """Register objects with NIXL."""
- if not keys:
+ try:
+ registered_memory = self.agent.register_memory(reg_descs)
+ return registered_memory # Could be None in case of error
+ except Exception as e:
+ if not mem_type:
+ logger.error(f"Failed to register Tensors with NIXL: {e}")
+ else:
+ logger.error(
+ f"Failed to register memory of type {mem_type} with NIXL: {e}"
+ )
return None
- # Create object tuples with proper sizes
- tuples = [
- (0, tensor.element_size() * tensor.numel() if tensor else 0, key)
- for key, tensor in zip(keys, tensors or [None] * len(keys))
- ]
- return self._register_memory(tuples, "OBJ", "objects")
-
class NixlFileManager:
"""Handles file system operations for NIXL."""
@@ -221,12 +190,9 @@ def close_file(self, fd: int) -> bool:
return False
def files_to_nixl_tuples(
- self, file_paths: List[str], open_file: bool = True
+ self, file_paths: List[str]
) -> List[Tuple[int, int, int, str]]:
"""Create NIXL tuples (offset, length, fd, file_path) for given files."""
- if not open_file:
- return [(0, 0, 0, path) for path in file_paths]
-
tuples = []
for path in file_paths:
if (fd := self.open_file(path)) is None:
diff --git a/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py b/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
index 572a032bf99..951e5a4ea03 100755
--- a/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
@@ -7,8 +7,11 @@
import torch
-from sglang.srt.mem_cache.nixl.hicache_nixl import HiCacheNixl
-from sglang.srt.mem_cache.nixl.nixl_utils import NixlFileManager, NixlRegistration
+from sglang.srt.mem_cache.storage.nixl.hicache_nixl import HiCacheNixl
+from sglang.srt.mem_cache.storage.nixl.nixl_utils import (
+ NixlFileManager,
+ NixlRegistration,
+)
class TestNixlUnified(unittest.TestCase):
@@ -88,8 +91,27 @@ def test_single_set_get(self):
# Test get
retrieved = self.hicache.get(key, dst_tensor)
+ self.verify_tensors_equal(value, dst_tensor)
self.verify_tensors_equal(value, retrieved)
+ # Same test in addr,len mode with another key and dst_tensor
+ key2 = "test_key2"
+ dst_tensor2 = torch.zeros_like(value, device="cpu")
+ src_addr, src_len = value.data_ptr(), value.numel() * value.element_size()
+ dst_addr, dst_len = (
+ dst_tensor2.data_ptr(),
+ dst_tensor2.numel() * dst_tensor2.element_size(),
+ )
+
+ # Test set
+ self.assertTrue(self.hicache.set(key, None, src_addr, src_len))
+ self.assertTrue(self.hicache.exists(key))
+
+ # Test get
+ retrieved2 = self.hicache.get(key, dst_addr, dst_len)
+ self.assertTrue(retrieved2 == None)
+ self.verify_tensors_equal(value, dst_tensor2)
+
def test_batch_set_get(self):
"""Test batch tensor set/get operations."""
keys = ["key1", "key2", "key3"]
@@ -108,6 +130,23 @@ def test_batch_set_get(self):
retrieved = self.hicache.batch_get(keys, dst_tensors)
self.verify_tensor_lists_equal(values, retrieved)
+ # Same test in addr,len mode with another key and dst_tensor
+ keys2 = ["key4", "key5", "key6"]
+ dst_tensors2 = [torch.zeros_like(v, device="cpu") for v in values]
+ src_addrs = [v.data_ptr() for v in values]
+ src_lens = [v.numel() * v.element_size() for v in values]
+ dst_addrs = [dt.data_ptr() for dt in dst_tensors2]
+ dst_lens = [dt.numel() * dt.element_size() for dt in dst_tensors2]
+
+ # Test batch set
+ self.assertTrue(self.hicache.batch_set(keys2, None, src_addrs, src_lens))
+ self.assertTrue(all(self.hicache.exists(key) for key in keys2))
+
+ # Test batch get
+ retrieved2 = self.hicache.batch_get(keys, dst_addrs, dst_lens)
+ self.assertTrue(all(ret == None for ret in retrieved2))
+ self.verify_tensor_lists_equal(values, dst_tensors2)
+
def test_mixed_operations(self):
"""Test mixing single and batch operations."""
# Test interleaved set/get operations
@@ -170,7 +209,7 @@ def test_create_nixl_tuples(self):
self.file_manager.create_file(test_file)
# Test tuple creation
- tuples = self.file_manager.files_to_nixl_tuples([test_file], False)
+ tuples = self.file_manager.files_to_nixl_tuples([test_file])
self.assertIsNotNone(tuples)
self.assertTrue(len(tuples) > 0)
@@ -190,11 +229,11 @@ def test_register_buffers(self):
tensor = torch.randn(10, 10)
# Test buffer registration
- self.assertIsNotNone(self.registration.register_buffers(tensor))
+ self.assertIsNotNone(self.hicache.register_buffers(tensor))
# Test batch registration
tensors = [torch.randn(5, 5) for _ in range(3)]
- self.assertIsNotNone(self.registration.register_buffers(tensors))
+ self.assertIsNotNone(self.hicache.register_buffers(tensors))
def test_register_files_with_tuples(self):
"""Test registration of files using NIXL tuples."""
@@ -203,8 +242,8 @@ def test_register_files_with_tuples(self):
self.file_manager.create_file(file)
# Create tuples and register
- tuples = self.file_manager.files_to_nixl_tuples(files, False)
- self.registration.register_files(tuples)
+ tuples = self.file_manager.files_to_nixl_tuples(files)
+ self.hicache.register_files(tuples)
# Verify tuples
self.assertEqual(len(tuples), len(files))
diff --git a/python/sglang/srt/mem_cache/swa_radix_cache.py b/python/sglang/srt/mem_cache/swa_radix_cache.py
index 7a23eb85612..686fc6ab014 100644
--- a/python/sglang/srt/mem_cache/swa_radix_cache.py
+++ b/python/sglang/srt/mem_cache/swa_radix_cache.py
@@ -60,8 +60,6 @@ def __init__(self, id: Optional[int] = None):
self.last_access_time = time.monotonic()
self.hit_count = 0
- # indicating the node is loading KV cache from host
- self.loading = False
# store the host indices of KV cache
self.host_value = None
@@ -464,7 +462,7 @@ def cache_finished_req(self, req: Req) -> None:
self.req_to_token_pool.free(req.req_pool_idx)
self.dec_lock_ref(req.last_node, req.swa_uuid_for_lock)
- def cache_unfinished_req(self, req: Req) -> None:
+ def cache_unfinished_req(self, req: Req, chunked=False) -> None:
"""Cache request when it is unfinished."""
if self.disable:
kv_indices = self.req_to_token_pool.req_to_token[
diff --git a/python/sglang/srt/metrics/collector.py b/python/sglang/srt/metrics/collector.py
index 4c32b8fc634..7cbcb694909 100644
--- a/python/sglang/srt/metrics/collector.py
+++ b/python/sglang/srt/metrics/collector.py
@@ -14,10 +14,12 @@
"""Utilities for Prometheus Metrics Collection."""
import time
-from dataclasses import dataclass
+from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Optional, Union
+from sglang.srt.metrics.utils import generate_buckets
+from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import get_bool_env_var
SGLANG_TEST_REQUEST_TIME_STATS = get_bool_env_var("SGLANG_TEST_REQUEST_TIME_STATS")
@@ -48,6 +50,9 @@ class RequestType(Enum):
DECODE = "decode"
INVALID = "invalid"
+ def get_queueing_time(self) -> float:
+ return self.forward_entry_time - self.wait_queue_entry_time
+
def __str__(self) -> str:
# if unified
_type = self.get_type()
@@ -132,27 +137,48 @@ def get_type(self) -> RequestType:
@dataclass
class SchedulerStats:
+ # Basics
num_running_reqs: int = 0
num_used_tokens: int = 0
token_usage: float = 0.0
+ swa_token_usage: float = 0.0
gen_throughput: float = 0.0
num_queue_reqs: int = 0
- cache_hit_rate: float = 0.0
num_grammar_queue_reqs: int = 0
- spec_accept_length: float = 0.0
+ num_running_reqs_offline_batch: int = 0
avg_request_queue_latency: float = 0.0
+ cache_hit_rate: float = 0.0
+
+ # Speculative decoding
+ spec_accept_length: float = 0.0
+
+ # PD disaggregation
num_prefill_prealloc_queue_reqs: int = 0
- num_prefill_infight_queue_reqs: int = 0
+ num_prefill_inflight_queue_reqs: int = 0
num_decode_prealloc_queue_reqs: int = 0
num_decode_transfer_queue_reqs: int = 0
+ kv_transfer_speed_gb_s: float = 0.0
+ kv_transfer_latency_ms: float = 0.0
+
+ # Retract
total_retracted_reqs: int = 0
+ num_retracted_reqs: int = 0
+ num_paused_reqs: int = 0
+
+ # Utilization
+ utilization: float = 0.0
+ max_running_requests_under_SLO: Optional[int] = None
+
+ # Engine startup
+ engine_startup_time: float = 0.0
+ engine_load_weights_time: float = 0.0
class SchedulerMetricsCollector:
def __init__(self, labels: Dict[str, str]) -> None:
# We need to import prometheus_client after setting the env variable `PROMETHEUS_MULTIPROC_DIR`
- from prometheus_client import Counter, Gauge
+ from prometheus_client import Counter, Gauge, Histogram
self.labels = labels
self.last_log_time = time.perf_counter()
@@ -163,42 +189,54 @@ def __init__(self, labels: Dict[str, str]) -> None:
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
self.num_used_tokens = Gauge(
name="sglang:num_used_tokens",
documentation="The number of used tokens.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
self.token_usage = Gauge(
name="sglang:token_usage",
documentation="The token usage.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
+ self.swa_token_usage = Gauge(
+ name="sglang:swa_token_usage",
+ documentation="The token usage for SWA layers.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
self.gen_throughput = Gauge(
name="sglang:gen_throughput",
documentation="The generation throughput (token/s).",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
self.num_queue_reqs = Gauge(
name="sglang:num_queue_reqs",
documentation="The number of requests in the waiting queue.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
self.num_grammar_queue_reqs = Gauge(
name="sglang:num_grammar_queue_reqs",
documentation="The number of requests in the grammar waiting queue.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
+ self.num_running_reqs_offline_batch = Gauge(
+ name="sglang:num_running_reqs_offline_batch",
+ documentation="The number of running low-priority offline batch requests(label is 'batch').",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
+ self.avg_request_queue_latency = Gauge(
+ name="sglang:avg_request_queue_latency",
+ documentation="The average request queue latency for the last batch of requests in seconds.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
self.cache_hit_rate = Gauge(
name="sglang:cache_hit_rate",
documentation="The prefix cache hit rate.",
@@ -206,6 +244,7 @@ def __init__(self, labels: Dict[str, str]) -> None:
multiprocess_mode="mostrecent",
)
+ # Speculative decoding
self.spec_accept_length = Gauge(
name="sglang:spec_accept_length",
documentation="The average acceptance length of speculative decoding.",
@@ -213,65 +252,275 @@ def __init__(self, labels: Dict[str, str]) -> None:
multiprocess_mode="mostrecent",
)
- self.avg_request_queue_latency = Gauge(
- name="sglang:avg_request_queue_latency",
- documentation="The average request queue latency for the last batch of requests in seconds.",
+ # PD disaggregation
+ self.num_prefill_prealloc_queue_reqs = Gauge(
+ name="sglang:num_prefill_prealloc_queue_reqs",
+ documentation="The number of requests in the prefill prealloc queue.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
+ self.num_prefill_inflight_queue_reqs = Gauge(
+ name="sglang:num_prefill_inflight_queue_reqs",
+ documentation="The number of requests in the prefill inflight queue.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
+ self.num_decode_prealloc_queue_reqs = Gauge(
+ name="sglang:num_decode_prealloc_queue_reqs",
+ documentation="The number of requests in the decode prealloc queue.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
+ self.num_decode_transfer_queue_reqs = Gauge(
+ name="sglang:num_decode_transfer_queue_reqs",
+ documentation="The number of requests in the decode transfer queue.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
+ self.num_bootstrap_failed_reqs = Counter(
+ name="sglang:num_bootstrap_failed_reqs_total",
+ documentation="The number of bootstrap failed requests.",
+ labelnames=labels.keys(),
+ )
+ self.num_transfer_failed_reqs = Counter(
+ name="sglang:num_transfer_failed_reqs_total",
+ documentation="The number of transfer failed requests.",
+ labelnames=labels.keys(),
+ )
+ self.kv_transfer_speed_gb_s = Gauge(
+ name="sglang:kv_transfer_speed_gb_s",
+ documentation="The transfer speed of the KV cache in GB/s.",
+ labelnames=labels.keys(),
+ multiprocess_mode="mostrecent",
+ )
+ self.kv_transfer_latency_ms = Gauge(
+ name="sglang:kv_transfer_latency_ms",
+ documentation="The transfer latency of the KV cache in ms.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
+ # Retract
self.total_retracted_reqs = Gauge(
name="sglang:total_retracted_reqs",
documentation="The total number of retracted requests due to kvcache full.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
+ self.num_retracted_reqs = Gauge(
+ name="sglang:num_retracted_reqs",
+ documentation="The number of retracted requests.",
+ labelnames=labels.keys(),
+ )
+ self.num_paused_reqs = Gauge(
+ name="sglang:num_paused_reqs",
+ documentation="The number of paused requests by async weight sync.",
+ labelnames=labels.keys(),
+ )
- # Disaggregation queue metrics
- self.num_prefill_prealloc_queue_reqs = Gauge(
- name="sglang:num_prefill_prealloc_queue_reqs",
- documentation="The number of requests in the prefill prealloc queue.",
+ # Utilization
+ self.utilization = Gauge(
+ name="sglang:utilization",
+ documentation="The utilization.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
- self.num_prefill_infight_queue_reqs = Gauge(
- name="sglang:num_prefill_infight_queue_reqs",
- documentation="The number of requests in the prefill infight queue.",
+ self.max_running_requests_under_SLO = Gauge(
+ name="sglang:max_running_requests_under_SLO",
+ documentation="The maximum number of running requests under SLO.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
- self.num_decode_prealloc_queue_reqs = Gauge(
- name="sglang:num_decode_prealloc_queue_reqs",
- documentation="The number of requests in the decode prealloc queue.",
+ # Engine startup
+ self.engine_startup_time = Gauge(
+ name="sglang:engine_startup_time",
+ documentation="The time taken for the engine to start up.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
-
- self.num_decode_transfer_queue_reqs = Gauge(
- name="sglang:num_decode_transfer_queue_reqs",
- documentation="The number of requests in the decode transfer queue.",
+ self.engine_load_weights_time = Gauge(
+ name="sglang:engine_load_weights_time",
+ documentation="The time taken for the engine to load weights.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
- self.num_bootstrap_failed_reqs = Counter(
- name="sglang:num_bootstrap_failed_reqs",
- documentation="The number of bootstrap failed requests.",
+ # Additional queueing time histogram
+ self.queue_time = Histogram(
+ name="sglang:queue_time_s",
+ documentation="Histogram of queueing time in seconds.",
labelnames=labels.keys(),
+ buckets=[
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.5,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 10,
+ 15,
+ 20,
+ 30,
+ 40,
+ 50,
+ 60,
+ 70,
+ 80,
+ 90,
+ 100,
+ 200,
+ 300,
+ 400,
+ 500,
+ 600,
+ 700,
+ 800,
+ 900,
+ 1000,
+ 1200,
+ 1400,
+ 1600,
+ 1800,
+ 2000,
+ 2500,
+ 3000,
+ ],
)
- self.num_transfer_failed_reqs = Counter(
- name="sglang:num_transfer_failed_reqs",
- documentation="The number of transfer failed requests.",
+ # Grammar metrics
+ self.grammar_compilation_time = Histogram(
+ name="sglang:grammar_compilation_time_seconds",
+ documentation="Histogram of grammar compilation time in seconds.",
+ labelnames=labels.keys(),
+ buckets=[
+ 0.0,
+ 0.01,
+ 0.02,
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.5,
+ 1,
+ 2,
+ 5,
+ 10,
+ 20,
+ 30,
+ 60,
+ 90,
+ 120,
+ 240,
+ ],
+ )
+ self.num_grammar_cache_hit = Counter(
+ name="sglang:num_grammar_cache_hit_total",
+ documentation="Number of grammar cache hits.",
labelnames=labels.keys(),
)
+ self.num_grammar_aborted = Counter(
+ name="sglang:num_grammar_aborted_total",
+ documentation="Number of grammar aborted requests.",
+ labelnames=labels.keys(),
+ )
+ self.num_grammar_total = Counter(
+ name="sglang:num_grammar_total",
+ documentation="Number of the total grammar requests.",
+ labelnames=labels.keys(),
+ )
+ self.grammar_schema_count = Histogram(
+ name="sglang:grammar_schema_count",
+ documentation="Histogram of grammar schema count.",
+ labelnames=labels.keys(),
+ buckets=[
+ 0,
+ 1,
+ 2,
+ 5,
+ 10,
+ 20,
+ 30,
+ 40,
+ 60,
+ 80,
+ 100,
+ 120,
+ 140,
+ 160,
+ 180,
+ 200,
+ 300,
+ 400,
+ 500,
+ 700,
+ 1000,
+ ],
+ )
+ self.grammar_ebnf_size = Histogram(
+ name="sglang:grammar_ebnf_size",
+ documentation="Histogram of grammar EBNF size.",
+ labelnames=labels.keys(),
+ buckets=[
+ 0,
+ 50,
+ 100,
+ 200,
+ 300,
+ 500,
+ 1000,
+ 2000,
+ 3000,
+ 5000,
+ 10000,
+ 20000,
+ 30000,
+ 50000,
+ 100000,
+ ],
+ )
+
+ tree_traversal_time_buckets = [
+ 0.0,
+ 0.01,
+ 0.02,
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.5,
+ 1,
+ 2,
+ 5,
+ 10,
+ 15,
+ 30,
+ 60,
+ 90,
+ 120,
+ 240,
+ ]
+ self.grammar_tree_traversal_time_avg = Histogram(
+ name="sglang:grammar_tree_traversal_time_avg",
+ documentation="Histogram of average grammar tree traversal time in seconds.",
+ labelnames=labels.keys(),
+ buckets=tree_traversal_time_buckets,
+ )
+ self.grammar_tree_traversal_time_max = Histogram(
+ name="sglang:grammar_tree_traversal_time_max",
+ documentation="Histogram of max grammar tree traversal time in seconds.",
+ labelnames=labels.keys(),
+ buckets=tree_traversal_time_buckets,
+ )
def _log_gauge(self, gauge, data: Union[int, float]) -> None:
# Convenience function for logging to gauge.
gauge.labels(**self.labels).set(data)
+ def log_histogram(self, histogram, data: Union[int, float]) -> None:
+ histogram.labels(**self.labels).observe(data)
+
def increment_bootstrap_failed_reqs(self) -> None:
self.num_bootstrap_failed_reqs.labels(**self.labels).inc(1)
@@ -282,19 +531,24 @@ def log_stats(self, stats: SchedulerStats) -> None:
self._log_gauge(self.num_running_reqs, stats.num_running_reqs)
self._log_gauge(self.num_used_tokens, stats.num_used_tokens)
self._log_gauge(self.token_usage, stats.token_usage)
+ self._log_gauge(self.swa_token_usage, stats.swa_token_usage)
self._log_gauge(self.gen_throughput, stats.gen_throughput)
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)
self._log_gauge(self.num_grammar_queue_reqs, stats.num_grammar_queue_reqs)
+ self._log_gauge(
+ self.num_running_reqs_offline_batch, stats.num_running_reqs_offline_batch
+ )
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)
+
+ # Speculative decoding
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
- self._log_gauge(self.total_retracted_reqs, stats.total_retracted_reqs)
- # Disaggregation metrics
+ # PD disaggregation
self._log_gauge(
self.num_prefill_prealloc_queue_reqs, stats.num_prefill_prealloc_queue_reqs
)
self._log_gauge(
- self.num_prefill_infight_queue_reqs, stats.num_prefill_infight_queue_reqs
+ self.num_prefill_inflight_queue_reqs, stats.num_prefill_inflight_queue_reqs
)
self._log_gauge(
self.num_decode_prealloc_queue_reqs, stats.num_decode_prealloc_queue_reqs
@@ -302,14 +556,59 @@ def log_stats(self, stats: SchedulerStats) -> None:
self._log_gauge(
self.num_decode_transfer_queue_reqs, stats.num_decode_transfer_queue_reqs
)
+ self._log_gauge(self.kv_transfer_speed_gb_s, stats.kv_transfer_speed_gb_s)
+ self._log_gauge(self.kv_transfer_latency_ms, stats.kv_transfer_latency_ms)
+
+ # Retract
+ self._log_gauge(self.total_retracted_reqs, stats.total_retracted_reqs)
+ self._log_gauge(self.num_retracted_reqs, stats.num_retracted_reqs)
+ self._log_gauge(self.num_paused_reqs, stats.num_paused_reqs)
+
+ # Utilization
+ self._log_gauge(self.utilization, stats.utilization)
+ if stats.max_running_requests_under_SLO is not None:
+ self._log_gauge(
+ self.max_running_requests_under_SLO,
+ stats.max_running_requests_under_SLO,
+ )
+
+ # Engine startup time
+ self._log_gauge(self.engine_startup_time, stats.engine_startup_time)
+ if stats.engine_load_weights_time is not None:
+ self._log_gauge(
+ self.engine_load_weights_time, stats.engine_load_weights_time
+ )
self.last_log_time = time.perf_counter()
+ def log_grammar_stats(self, grammar_stats) -> None:
+ # Duck-typed GrammarStats to avoid cross-package dependency
+ if getattr(grammar_stats, "compilation_time", None) is not None:
+ self.log_histogram(
+ self.grammar_compilation_time, grammar_stats.compilation_time
+ )
+ if getattr(grammar_stats, "schema_count", None) is not None:
+ self.log_histogram(self.grammar_schema_count, grammar_stats.schema_count)
+ if getattr(grammar_stats, "ebnf_size", None) is not None:
+ self.log_histogram(self.grammar_ebnf_size, grammar_stats.ebnf_size)
+ tree_times = getattr(grammar_stats, "tree_traversal_time", None)
+ if tree_times:
+ max_time = max(tree_times)
+ avg_time = sum(tree_times) / len(tree_times)
+ self.log_histogram(self.grammar_tree_traversal_time_max, max_time)
+ self.log_histogram(self.grammar_tree_traversal_time_avg, avg_time)
+ if getattr(grammar_stats, "is_cache_hit", False):
+ self.num_grammar_cache_hit.labels(**self.labels).inc(1)
+ if getattr(grammar_stats, "is_grammar_aborted", False):
+ self.num_grammar_aborted.labels(**self.labels).inc(1)
+ self.num_grammar_total.labels(**self.labels).inc(1)
+
class TokenizerMetricsCollector:
def __init__(
self,
- labels: Dict[str, str],
+ server_args: Optional[ServerArgs] = None,
+ labels: Dict[str, str] = None,
bucket_time_to_first_token: Optional[List[float]] = None,
bucket_inter_token_latency: Optional[List[float]] = None,
bucket_e2e_request_latency: Optional[List[float]] = None,
@@ -318,7 +617,7 @@ def __init__(
# We need to import prometheus_client after setting the env variable `PROMETHEUS_MULTIPROC_DIR`
from prometheus_client import Counter, Histogram
- self.labels = labels
+ self.labels = labels or {}
self.collect_tokens_histogram = collect_tokens_histogram
self.prompt_tokens_total = Counter(
@@ -334,7 +633,7 @@ def __init__(
)
if collect_tokens_histogram:
- bucket_prompt_tokens = [
+ default_bucket_prompt_tokens = [
100,
300,
500,
@@ -358,39 +657,30 @@ def __init__(
30000,
35000,
40000,
+ 66000,
+ 99000,
+ 132000,
+ 300000,
+ 600000,
+ 900000,
+ 1100000,
]
self.prompt_tokens_histogram = Histogram(
name="sglang:prompt_tokens_histogram",
documentation="Histogram of prompt token length.",
labelnames=labels.keys(),
- buckets=bucket_prompt_tokens,
+ buckets=generate_buckets(
+ server_args.prompt_tokens_buckets, default_bucket_prompt_tokens
+ ),
)
- bucket_generation_tokens = [
- 100,
- 300,
- 500,
- 1000,
- 1200,
- 1500,
- 1700,
- 2000,
- 2500,
- 3000,
- 3500,
- 4000,
- 4500,
- 5000,
- 6000,
- 7000,
- 8000,
- 9000,
- 10000,
- ]
self.generation_tokens_histogram = Histogram(
name="sglang:generation_tokens_histogram",
documentation="Histogram of generation token length.",
labelnames=labels.keys(),
- buckets=bucket_generation_tokens,
+ buckets=generate_buckets(
+ server_args.generation_tokens_buckets,
+ default_bucket_prompt_tokens,
+ ),
)
self.cached_tokens_total = Counter(
@@ -459,7 +749,10 @@ def __init__(
100,
200,
400,
- 800,
+ 600,
+ 1200,
+ 1800,
+ 2400,
]
if bucket_inter_token_latency is None:
@@ -510,6 +803,14 @@ def __init__(
buckets=bucket_e2e_request_latency,
)
+ # Offline batch specific TTFB histogram
+ self.histogram_time_to_first_token_offline_batch = Histogram(
+ name="sglang:time_to_first_token_seconds_offline_batch",
+ documentation="Histogram of time to first token in seconds for offline batch requests.",
+ labelnames=labels.keys(),
+ buckets=bucket_time_to_first_token,
+ )
+
def _log_histogram(self, histogram, data: Union[int, float]) -> None:
histogram.labels(**self.labels).observe(data)
@@ -533,8 +834,26 @@ def observe_one_finished_request(
self._log_histogram(self.prompt_tokens_histogram, prompt_tokens)
self._log_histogram(self.generation_tokens_histogram, generation_tokens)
- def observe_time_to_first_token(self, value: float):
- self.histogram_time_to_first_token.labels(**self.labels).observe(value)
+ def observe_time_to_first_token(self, value: float, label: str = ""):
+ if label == "batch":
+ self.histogram_time_to_first_token_offline_batch.labels(
+ **self.labels
+ ).observe(value)
+ else:
+ self.histogram_time_to_first_token.labels(**self.labels).observe(value)
+
+ def check_time_to_first_token_straggler(self, value: float) -> bool:
+ his = self.histogram_time_to_first_token.labels(**self.labels)
+ total_observations = sum(bucket._value for bucket in his._buckets)
+ if total_observations < 100:
+ return False
+ p99_threshold = total_observations * 0.99
+ cumulative_count = 0
+ for i, bucket in enumerate(his._buckets):
+ cumulative_count += bucket._value
+ if cumulative_count > p99_threshold:
+ return value >= his._upper_bounds[i]
+ return False
def observe_inter_token_latency(self, internval: float, num_new_tokens: int):
adjusted_interval = internval / num_new_tokens
@@ -551,3 +870,105 @@ def observe_inter_token_latency(self, internval: float, num_new_tokens: int):
def observe_one_aborted_request(self):
self.num_aborted_requests_total.labels(**self.labels).inc(1)
+
+
+@dataclass
+class StorageMetrics:
+ prefetch_pgs: List[int] = field(default_factory=list)
+ backup_pgs: List[int] = field(default_factory=list)
+ prefetch_bandwidth: List[float] = field(default_factory=list)
+ backup_bandwidth: List[float] = field(default_factory=list)
+
+
+class StorageMetricsCollector:
+ def __init__(
+ self,
+ labels: Dict[str, str],
+ ):
+ from prometheus_client import Counter, Histogram
+
+ self.labels = labels
+
+ self.prefetched_tokens_total = Counter(
+ name="sglang:prefetched_tokens_total",
+ documentation="Number of prefetched prompt tokens.",
+ labelnames=labels.keys(),
+ )
+
+ self.backuped_tokens_total = Counter(
+ name="sglang:backuped_tokens_total",
+ documentation="Number of backuped tokens.",
+ labelnames=labels.keys(),
+ )
+
+ bucket_io = [
+ 1,
+ 5,
+ 10,
+ 50,
+ 100,
+ ]
+
+ bucket_bandwidth = [
+ 0.1,
+ 0.5,
+ 1,
+ 5,
+ 10,
+ 50,
+ 100,
+ ]
+
+ self.histogram_prefetch_pgs = Histogram(
+ name="sglang:prefetch_pgs",
+ documentation="Histogram of prefetch pages of batches.",
+ labelnames=labels.keys(),
+ buckets=bucket_io,
+ )
+
+ self.histogram_backup_pgs = Histogram(
+ name="sglang:backup_pgs",
+ documentation="Histogram of backup pages of batches.",
+ labelnames=labels.keys(),
+ buckets=bucket_io,
+ )
+
+ self.histogram_prefetch_bandwidth = Histogram(
+ name="sglang:prefetch_bandwidth",
+ documentation="Histogram of prefetch bandwidth in GB/s.",
+ labelnames=labels.keys(),
+ buckets=bucket_bandwidth,
+ )
+
+ self.histogram_backup_bandwidth = Histogram(
+ name="sglang:backup_bandwidth",
+ documentation="Histogram of backup bandwidth in GB/s.",
+ labelnames=labels.keys(),
+ buckets=bucket_bandwidth,
+ )
+
+ def log_prefetched_tokens(self, prefetched_tokens: int):
+ if prefetched_tokens > 0:
+ self.prefetched_tokens_total.labels(**self.labels).inc(prefetched_tokens)
+
+ def log_backuped_tokens(self, backuped_tokens: int):
+ if backuped_tokens > 0:
+ self.backuped_tokens_total.labels(**self.labels).inc(backuped_tokens)
+
+ def _log_histogram(self, histogram, data: Union[int, float]):
+ histogram.labels(**self.labels).observe(data)
+
+ def log_storage_metrics(self, storage_metrics: Optional[StorageMetrics] = None):
+ if storage_metrics is None:
+ return
+
+ assert isinstance(storage_metrics, StorageMetrics)
+
+ for v in storage_metrics.prefetch_pgs:
+ self._log_histogram(self.histogram_prefetch_pgs, v)
+ for v in storage_metrics.backup_pgs:
+ self._log_histogram(self.histogram_backup_pgs, v)
+ for v in storage_metrics.prefetch_bandwidth:
+ self._log_histogram(self.histogram_prefetch_bandwidth, v)
+ for v in storage_metrics.backup_bandwidth:
+ self._log_histogram(self.histogram_backup_bandwidth, v)
diff --git a/python/sglang/srt/metrics/startup_func_log_and_timer.py b/python/sglang/srt/metrics/startup_func_log_and_timer.py
new file mode 100644
index 00000000000..752daccbd71
--- /dev/null
+++ b/python/sglang/srt/metrics/startup_func_log_and_timer.py
@@ -0,0 +1,150 @@
+"""
+Records startup latency breakdown by context using gauge metrics in seconds
+"""
+
+import logging
+import time
+from contextlib import contextmanager
+from functools import wraps
+from typing import Any, Callable, Dict, Generator, Optional
+
+logger = logging.getLogger(__name__)
+
+enable_startup_metrics = False
+STARTUP_LATENCY_SECONDS = None
+# Track maximum durations for each context
+_max_durations: Dict[str, float] = {}
+
+
+def enable_startup_timer():
+ """Initialize startup latency metrics when metrics are enabled"""
+ # We need to import prometheus_client after setting the env variable `PROMETHEUS_MULTIPROC_DIR`
+ from prometheus_client import Gauge
+
+ global enable_startup_metrics, STARTUP_LATENCY_SECONDS
+ enable_startup_metrics = True
+
+ STARTUP_LATENCY_SECONDS = Gauge(
+ "sglang:startup_latency_breakdown_seconds_max",
+ "Startup latency breakdown in seconds by context, only records the maximum duration if the context is called multiple times.",
+ labelnames=["context"],
+ multiprocess_mode="mostrecent",
+ )
+
+
+def set_startup_metric(context: str, value: float, should_log: bool = True):
+ """Set the startup metric for a given context"""
+ if should_log:
+ logger.info(f"Setting startup metric: {context} took {value:.3f}s")
+
+ if not enable_startup_metrics:
+ return
+ current_max = _max_durations.get(context, 0.0)
+ if value > current_max:
+ _max_durations[context] = value
+ STARTUP_LATENCY_SECONDS.labels(context=context).set(value)
+
+
+def reset_startup_timers():
+ """Reset all recorded maximum durations. Useful for testing or reinitialization."""
+ global _max_durations
+ _max_durations.clear()
+
+
+def get_max_duration(context: str) -> Optional[float]:
+ """Get the maximum recorded duration for a context name."""
+ return _max_durations.get(context)
+
+
+@contextmanager
+def startup_timer(name: str, log_only: bool = False) -> Generator[None, None, None]:
+ """
+ Context manager to measure startup latency for arbitrary code blocks.
+ Only records the maximum duration if the context is called multiple times.
+
+ Usage:
+ with startup_timer("model_loading"):
+ # model loading code
+ model = load_model()
+
+ with startup_timer("memory_allocation"):
+ # memory setup code
+ allocate_memory()
+ """
+ start_time = time.monotonic()
+ try:
+ yield
+ finally:
+ duration_seconds = time.monotonic() - start_time
+
+ # Track the maximum duration for this context name
+ current_max = _max_durations.get(name, 0.0)
+ is_new_max = duration_seconds > current_max
+
+ if is_new_max:
+ _max_durations[name] = duration_seconds
+
+ # Only update Prometheus gauge if this is a new maximum
+ if enable_startup_metrics and not log_only:
+ STARTUP_LATENCY_SECONDS.labels(context=name).set(duration_seconds)
+
+ # Log with indication if this was a new max
+ logger.info(f"Startup timing: {name} took {duration_seconds:.3f}s")
+
+
+def time_startup_latency(
+ func: Callable = None, name: Optional[str] = None, log_only: bool = False
+) -> Callable[..., Any]:
+ """
+ A decorator to measure startup context latency and record it in seconds.
+ Only records the maximum duration if the context is called multiple times.
+
+ Usage:
+ @time_startup_latency
+ def load_model():
+ # model loading code
+
+ @time_startup_latency(name="custom_init")
+ def initialize_something():
+ # initialization code
+
+ @time_startup_latency(name="debug_only", log_only=True)
+ def debug_function():
+ # This will only log, not record to Prometheus
+ """
+
+ def measure(func: Callable[..., Any]) -> Callable[..., Any]:
+ nonlocal name
+ name = name or func.__name__
+
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ start_time = time.monotonic()
+ try:
+ result = func(*args, **kwargs)
+ return result
+ finally:
+ duration_seconds = time.monotonic() - start_time
+
+ # Track the maximum duration for this context name
+ current_max = _max_durations.get(name, 0.0)
+ is_new_max = duration_seconds > current_max
+
+ if is_new_max:
+ _max_durations[name] = duration_seconds
+
+ # Only update Prometheus gauge if this is a new maximum
+ if enable_startup_metrics and not log_only:
+ STARTUP_LATENCY_SECONDS.labels(context=name).set(
+ duration_seconds
+ )
+
+ # Log the timing
+ logger.info(f"Startup timing: {name} took {duration_seconds:.3f}s")
+
+ return wrapper
+
+ if func:
+ return measure(func)
+ else:
+ return measure
diff --git a/python/sglang/srt/metrics/utils.py b/python/sglang/srt/metrics/utils.py
new file mode 100644
index 00000000000..ffc7e106665
--- /dev/null
+++ b/python/sglang/srt/metrics/utils.py
@@ -0,0 +1,48 @@
+# Copyright 2023-2025 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for Prometheus Metrics."""
+import math
+from typing import List
+
+
+def two_sides_exponential_buckets(
+ middle: float, base: float, count: int
+) -> List[float]:
+ buckets = []
+ half_count = math.ceil(count / 2)
+ distance = 1
+ buckets.append(middle)
+ for i in range(half_count):
+ distance *= base
+ buckets.append(middle + distance)
+ buckets.append(max(0, middle - distance))
+ return sorted(set(buckets))
+
+
+def generate_buckets(
+ buckets_rule: List[str], default_buckets: List[float]
+) -> List[float]:
+ if not buckets_rule:
+ buckets_rule = ["default"]
+
+ assert len(buckets_rule) > 0
+ rule = buckets_rule[0]
+ if rule == "tse":
+ middle, base, count = buckets_rule[1:]
+ assert float(base) > 1.0, "Base must be greater than 1.0"
+ return two_sides_exponential_buckets(float(middle), float(base), int(count))
+ if rule == "default":
+ return sorted(set(default_buckets))
+ assert rule == "customer"
+ return sorted(set([float(x) for x in buckets_rule[1:]]))
diff --git a/python/sglang/srt/model_executor/cpu_graph_runner.py b/python/sglang/srt/model_executor/cpu_graph_runner.py
new file mode 100644
index 00000000000..bc1e5c5b877
--- /dev/null
+++ b/python/sglang/srt/model_executor/cpu_graph_runner.py
@@ -0,0 +1,640 @@
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Run the model with cpu torch compile."""
+
+# The implementation of CPUGraphRunner follows the CudaGraphRunner
+
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Callable, Optional, Union
+
+import psutil
+import torch
+import tqdm
+
+from sglang.srt.distributed import get_tensor_model_parallel_rank
+from sglang.srt.distributed.parallel_state import GroupCoordinator
+from sglang.srt.layers.logits_processor import LogitsProcessorOutput
+from sglang.srt.model_executor.forward_batch_info import (
+ CaptureHiddenMode,
+ ForwardBatch,
+ ForwardMode,
+ PPProxyTensors,
+)
+from sglang.srt.patch_torch import monkey_patch_torch_compile
+from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
+from sglang.srt.utils import (
+ log_info_on_rank0,
+ require_attn_tp_gather,
+ require_gathered_buffer,
+ require_mlp_sync,
+ require_mlp_tp_gather,
+)
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+ from sglang.srt.model_executor.model_runner import ModelRunner
+
+
+@contextmanager
+def patch_model(
+ model: torch.nn.Module,
+ enable_compile: bool,
+ num_tokens: int,
+ tp_group: GroupCoordinator,
+):
+ """Patch the model to make it compatible with torch.compile"""
+ backup_ca_comm = None
+
+ try:
+ if enable_compile:
+ backup_ca_comm = tp_group.ca_comm
+ # Use custom-allreduce here.
+ # We found the custom allreduce is much faster than the built-in allreduce in torch,
+ # even with ENABLE_INTRA_NODE_COMM=1.
+ # tp_group.ca_comm = None
+ yield torch.compile(
+ torch.no_grad()(model.forward),
+ dynamic=False,
+ )
+ else:
+ yield model.forward
+ finally:
+ if enable_compile:
+ tp_group.ca_comm = backup_ca_comm
+
+
+def set_torch_compile_config():
+ import torch._dynamo.config
+ import torch._inductor.config
+
+ torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
+ torch._inductor.config.freezing = True
+ torch._dynamo.config.accumulated_cache_size_limit = 1024
+ if hasattr(torch._dynamo.config, "cache_size_limit"):
+ torch._dynamo.config.cache_size_limit = 1024
+ monkey_patch_torch_compile()
+
+
+def get_batch_sizes_to_capture(model_runner: ModelRunner):
+ server_args = model_runner.server_args
+ # cpu torch compile only speeds up decoding by
+ # reducing python overhead when bs is small
+ capture_bs = list(range(1, 17))
+ capture_bs = [bs for bs in capture_bs if bs <= server_args.torch_compile_max_bs]
+ capture_bs = [bs for bs in capture_bs if bs <= model_runner.req_to_token_pool.size]
+ capture_bs = list(sorted(set(capture_bs)))
+ assert len(capture_bs) > 0 and capture_bs[0] > 0, f"{capture_bs=}"
+ return capture_bs
+
+
+def register_fake_ops():
+ """
+ Registers fake/meta implementations for all custom sgl_kernel CPU operators
+ using torch.library.register_fake to support torch.compile
+ """
+
+ none_return_ops = [
+ "shm_allreduce",
+ "bmm_cpu",
+ "fused_add_rmsnorm_cpu",
+ "decode_attention_cpu",
+ "extend_attention_cpu",
+ ]
+ for op in none_return_ops:
+
+ @torch.library.register_fake(f"sgl_kernel::{op}")
+ def _(*args, **kwargs):
+ return
+
+ for op in [
+ "rmsnorm_cpu",
+ "l2norm_cpu",
+ "fused_experts_cpu",
+ "shared_expert_cpu",
+ ]:
+
+ @torch.library.register_fake(f"sgl_kernel::{op}")
+ def _(input, *args, **kwargs):
+ return torch.empty_like(input)
+
+ @torch.library.register_fake("sgl_kernel::qkv_proj_with_rope")
+ def _(
+ hidden_states,
+ q_a_proj_weight,
+ q_b_proj_weight,
+ kv_a_proj_weight,
+ w_kc,
+ q_a_layernorm_weight,
+ kv_a_layernorm_weight,
+ positions,
+ cos_sin_cache,
+ eps,
+ use_int8_w8a8,
+ use_fp8_w8a16,
+ q_a_proj_scale,
+ q_b_proj_scale,
+ kv_a_proj_scale,
+ is_vnni,
+ block_size,
+ ):
+ num_seqs = hidden_states.shape[0]
+ num_heads = w_kc.shape[0]
+ kv_lora_rank = w_kc.shape[1]
+ qk_rope_head_dim = kv_a_proj_weight.shape[0] - kv_lora_rank
+ q_input = torch.empty(
+ num_seqs,
+ num_heads,
+ kv_lora_rank + qk_rope_head_dim,
+ dtype=hidden_states.dtype,
+ device=hidden_states.device,
+ )
+ k_input = torch.empty(
+ num_seqs,
+ 1,
+ kv_lora_rank + qk_rope_head_dim,
+ dtype=hidden_states.dtype,
+ device=hidden_states.device,
+ )
+ v_input = k_input.narrow(-1, 0, kv_lora_rank)
+ return q_input, k_input, v_input
+
+ @torch.library.register_fake("sgl_kernel::rotary_embedding_cpu")
+ def _(positions, query, key, head_size, cos_sin_cache, is_neox):
+ if query.ndim == 2:
+ return query, key
+ else:
+ return torch.empty_like(query), torch.empty_like(key)
+
+ @torch.library.register_fake("sgl_kernel::qkv_proj_with_rope_fused_weight")
+ def _(
+ hidden_states,
+ q_a_proj_weight,
+ q_b_proj_weight,
+ w_kc,
+ q_a_layernorm_weight,
+ kv_a_layernorm_weight,
+ positions,
+ cos_sin_cache,
+ eps,
+ use_int8_w8a8,
+ use_fp8_w8a16,
+ qkv_a_proj_scale,
+ q_b_proj_scale,
+ is_vnni,
+ block_size,
+ q_lora_rank,
+ kv_lora_rank,
+ qk_rope_head_dim,
+ ):
+ num_seqs = hidden_states.shape[0]
+ num_heads = w_kc.shape[0]
+ kv_lora_rank = w_kc.shape[1]
+ weight_chunks = torch.split(
+ q_a_proj_weight, [q_lora_rank, kv_lora_rank + qk_rope_head_dim], dim=0
+ )
+ qk_rope_head_dim = weight_chunks[1].shape[0] - kv_lora_rank
+ q_input = torch.empty(
+ num_seqs,
+ num_heads,
+ kv_lora_rank + qk_rope_head_dim,
+ dtype=hidden_states.dtype,
+ device=hidden_states.device,
+ )
+ k_input = torch.empty(
+ num_seqs,
+ 1,
+ kv_lora_rank + qk_rope_head_dim,
+ dtype=hidden_states.dtype,
+ device=hidden_states.device,
+ )
+ v_input = k_input.narrow(-1, 0, kv_lora_rank)
+ return q_input, k_input, v_input
+
+ @torch.library.register_fake("sgl_kernel::weight_packed_linear")
+ def _(x, weight, bias, is_vnni):
+ return x.new_empty(x.shape[0], weight.shape[0])
+
+ @torch.library.register_fake("sgl_kernel::per_token_quant_int8_cpu")
+ def _(input):
+ M = input.shape[0]
+ K = input.shape[1]
+ Aq = input.new_empty(M, K, dtype=torch.int8)
+ As = input.new_empty(M, dtype=torch.float32)
+ return Aq, As
+
+ @torch.library.register_fake("sgl_kernel::int8_scaled_mm_cpu")
+ def _(mat1, mat2, scales1, scales2, bias, out_dtype, is_vnni):
+ M = mat1.shape[0]
+ N = mat2.shape[0]
+ out = mat1.new_empty(M, N, dtype=out_dtype)
+ return out
+
+ @torch.library.register_fake("sgl_kernel::grouped_topk_cpu")
+ def _(
+ hidden_states,
+ gating_output,
+ topk,
+ renormalize,
+ num_expert_group,
+ topk_group,
+ num_fused_shared_experts,
+ routed_scaling_factor,
+ num_token_non_padded,
+ ):
+ num_tokens = hidden_states.shape[0]
+ shape = (num_tokens, topk)
+ device = hidden_states.device
+ topk_weights = torch.empty(shape, device=device, dtype=torch.float32)
+ topk_ids = torch.empty(shape, device=device, dtype=torch.int)
+ return topk_weights, topk_ids
+
+ @torch.library.register_fake("sgl_kernel::biased_grouped_topk_cpu")
+ def _(
+ hidden_states,
+ gating_output,
+ correction_bias,
+ topk,
+ renormalize,
+ num_expert_group,
+ topk_group,
+ num_fused_shared_experts,
+ routed_scaling_factor,
+ num_token_non_padded,
+ ):
+ num_tokens = hidden_states.shape[0]
+ shape = (num_tokens, topk)
+ device = hidden_states.device
+ topk_weights = torch.empty(shape, device=device, dtype=torch.float32)
+ topk_ids = torch.empty(shape, device=device, dtype=torch.int)
+ return topk_weights, topk_ids
+
+ @torch.library.register_fake("sgl_kernel::topk_sigmoid_cpu")
+ def _(hidden_states, gating_output, topk, renormalize):
+ num_tokens = hidden_states.shape[0]
+ shape = (num_tokens, topk)
+ return (
+ torch.empty(shape, device=hidden_states.device, dtype=torch.float),
+ torch.empty(shape, device=hidden_states.device, dtype=torch.int),
+ )
+
+ @torch.library.register_fake("sgl_kernel::topk_softmax_cpu")
+ def _(
+ hidden_states,
+ gating_output,
+ topk,
+ renormalize,
+ ):
+ num_tokens = hidden_states.shape[0]
+ shape = (num_tokens, topk)
+ return (
+ torch.empty(shape, device=hidden_states.device, dtype=torch.float),
+ torch.empty(shape, device=hidden_states.device, dtype=torch.int),
+ )
+
+ @torch.library.register_fake("sgl_kernel::silu_and_mul_cpu")
+ def _(input):
+ return input.new_empty(input.shape[0], input.shape[1] // 2)
+
+ @torch.library.register_fake("sgl_kernel::int8_scaled_mm_with_quant")
+ def _(
+ mat1,
+ mat2,
+ scales2,
+ bias,
+ out_dtype,
+ is_vnni,
+ ):
+ M = mat1.shape[0]
+ N = mat2.shape[0]
+ return mat1.new_empty(M, N, dtype=out_dtype)
+
+ @torch.library.register_fake("sgl_kernel::fp8_scaled_mm_cpu")
+ def _(
+ mat1,
+ mat2,
+ scales2,
+ block_size,
+ bias,
+ out_dtype,
+ is_vnni,
+ ):
+ M = mat1.shape[0]
+ N = mat2.shape[0]
+ return mat1.new_empty(M, N, dtype=out_dtype)
+
+
+# TODO Remove unnecessary settings for CPUGraphRunner.
+# Re-abstract the graph runner and restructure CPUGraphRunner to reuse the same logic.
+class CPUGraphRunner:
+ """A CPUGraphRunner runs the forward pass of a model with cpu torch.compile."""
+
+ def __init__(self, model_runner: ModelRunner):
+ # Parse args
+ self.model_runner = model_runner
+ self.device = model_runner.device
+ self.graphs = {}
+ self.output_buffers = {}
+ self.enable_torch_compile = model_runner.server_args.enable_torch_compile
+ self.disable_padding = model_runner.server_args.disable_cuda_graph_padding
+ self.is_encoder_decoder = model_runner.model_config.is_encoder_decoder
+ self.require_gathered_buffer = require_gathered_buffer(model_runner.server_args)
+ self.require_mlp_tp_gather = require_mlp_tp_gather(model_runner.server_args)
+ self.require_mlp_sync = require_mlp_sync(model_runner.server_args)
+ self.require_attn_tp_gather = require_attn_tp_gather(model_runner.server_args)
+ self.enable_two_batch_overlap = (
+ model_runner.server_args.enable_two_batch_overlap
+ )
+ self.speculative_algorithm = model_runner.server_args.speculative_algorithm
+ self.enable_profile_cuda_graph = (
+ model_runner.server_args.enable_profile_cuda_graph
+ )
+ self.tp_size = model_runner.server_args.tp_size
+ self.dp_size = model_runner.server_args.dp_size
+ self.pp_size = model_runner.server_args.pp_size
+
+ self.capture_forward_mode = ForwardMode.DECODE
+ self.capture_hidden_mode = CaptureHiddenMode.NULL
+ self.num_tokens_per_bs = 1
+
+ # If returning hidden states is enabled, set initial capture hidden mode to full to avoid double-capture on startup
+ if model_runner.server_args.enable_return_hidden_states:
+ self.capture_hidden_mode = CaptureHiddenMode.FULL
+
+ assert (
+ not self.model_runner.server_args.enable_lora
+ ), "CPUGraphRunner does not support LoRA yet."
+ assert (
+ not self.enable_two_batch_overlap
+ ), "CPUGraphRunner does not support two batch overlap yet."
+ assert (
+ not self.require_mlp_tp_gather
+ ), "CPUGraphRunner does not support MLP TP gather yet."
+ assert (
+ not self.require_mlp_sync
+ ), "CPUGraphRunner does not support MLP sync yet."
+ assert (
+ not self.require_gathered_buffer
+ ), "CPUGraphRunner does not support gathered buffer yet."
+ assert (
+ model_runner.spec_algorithm == SpeculativeAlgorithm.NONE
+ ), "CPUGraphRunner does not support speculative inference yet."
+ # TODO add compile support for encoder-decoder models
+ assert (
+ not self.is_encoder_decoder
+ ), "CPUGraphRunner does not support encoder-decoder models yet."
+ assert self.dp_size == 1, "CPUGraphRunner does not support DP yet."
+ assert self.pp_size == 1, "CPUGraphRunner does not support PP yet."
+
+ # Batch sizes to capture
+ self.capture_bs = get_batch_sizes_to_capture(model_runner)
+ log_info_on_rank0(logger, f"Capture cpu graph bs {self.capture_bs}")
+ # Attention backend
+ self.max_bs = max(self.capture_bs)
+ self.max_num_token = self.max_bs * self.num_tokens_per_bs
+
+ self.seq_len_fill_value = (
+ self.model_runner.attn_backend.get_graph_seq_len_fill_value()
+ )
+
+ if self.enable_torch_compile:
+ register_fake_ops()
+ set_torch_compile_config()
+
+ # Graph inputs
+ with torch.device(self.device):
+ self.input_ids = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.req_pool_indices = torch.zeros((self.max_bs,), dtype=torch.int64)
+ self.seq_lens = torch.full(
+ (self.max_bs,), self.seq_len_fill_value, dtype=torch.int64
+ )
+ self.out_cache_loc = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.positions = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.mrope_positions = torch.zeros((3, self.max_bs), dtype=torch.int64)
+ self.num_token_non_padded = torch.zeros((1,), dtype=torch.int64)
+ self.custom_mask = torch.ones(
+ (
+ (self.seq_lens.sum().item() + self.max_num_token)
+ * self.num_tokens_per_bs
+ ),
+ dtype=torch.bool,
+ device=self.device,
+ )
+
+ # Capture
+ try:
+ self.capture()
+ except RuntimeError as e:
+ raise Exception(
+ f"Capture CPU graph failed: {e}\n{CPU_GRAPH_CAPTURE_FAILED_MSG}"
+ )
+
+ def can_run(self, forward_batch: ForwardBatch):
+ is_bs_supported = forward_batch.batch_size in self.graphs
+
+ requested_capture_hidden_mode = max(
+ forward_batch.capture_hidden_mode,
+ (
+ forward_batch.spec_info.capture_hidden_mode
+ if getattr(forward_batch.spec_info, "capture_hidden_mode", None)
+ is not None
+ else CaptureHiddenMode.NULL
+ ),
+ )
+ capture_hidden_mode_matches = (
+ requested_capture_hidden_mode == CaptureHiddenMode.NULL
+ or requested_capture_hidden_mode == self.capture_hidden_mode
+ )
+
+ return is_bs_supported and capture_hidden_mode_matches
+
+ def capture(self) -> None:
+ capture_range = (
+ tqdm.tqdm(list(reversed(self.capture_bs)))
+ if get_tensor_model_parallel_rank() == 0
+ else reversed(self.capture_bs)
+ )
+ for bs in capture_range:
+ if get_tensor_model_parallel_rank() == 0:
+ avail_mem = psutil.virtual_memory().available / (1 << 30)
+ capture_range.set_description(
+ f"Capturing batches ({bs=} {avail_mem=:.2f} GB)"
+ )
+
+ with patch_model(
+ self.model_runner.model,
+ bs in self.capture_bs,
+ num_tokens=bs * self.num_tokens_per_bs,
+ tp_group=self.model_runner.tp_group,
+ ) as forward:
+ (
+ graph,
+ output_buffers,
+ ) = self.capture_one_batch_size(bs, forward)
+ self.graphs[bs] = graph
+ self.output_buffers[bs] = output_buffers
+
+ def capture_one_batch_size(self, bs: int, forward: Callable):
+ num_tokens = bs * self.num_tokens_per_bs
+
+ # Graph inputs
+ input_ids = self.input_ids[:num_tokens]
+ req_pool_indices = self.req_pool_indices[:bs]
+ seq_lens = self.seq_lens[:bs]
+ out_cache_loc = self.out_cache_loc[:num_tokens]
+ positions = self.positions[:num_tokens]
+ mrope_positions = self.mrope_positions[:, :bs]
+ self.num_token_non_padded[...] = num_tokens
+
+ spec_info = self.get_spec_info(num_tokens)
+ if self.capture_hidden_mode != CaptureHiddenMode.FULL:
+ self.capture_hidden_mode = (
+ spec_info.capture_hidden_mode if spec_info else CaptureHiddenMode.NULL
+ )
+
+ forward_batch = ForwardBatch(
+ forward_mode=self.capture_forward_mode,
+ batch_size=bs,
+ input_ids=input_ids,
+ req_pool_indices=req_pool_indices,
+ seq_lens=seq_lens,
+ req_to_token_pool=self.model_runner.req_to_token_pool,
+ token_to_kv_pool=self.model_runner.token_to_kv_pool,
+ attn_backend=self.model_runner.attn_backend,
+ out_cache_loc=out_cache_loc,
+ seq_lens_sum=seq_lens.sum().item(),
+ return_logprob=False,
+ positions=positions,
+ mrope_positions=mrope_positions,
+ spec_algorithm=self.model_runner.spec_algorithm,
+ spec_info=spec_info,
+ capture_hidden_mode=self.capture_hidden_mode,
+ num_token_non_padded=self.num_token_non_padded,
+ global_forward_mode=self.capture_forward_mode,
+ )
+
+ # Attention backend
+ self.model_runner.attn_backend.init_forward_metadata(forward_batch)
+ # Do infernence to avoid setting attr at runtime, e.g.,
+ # self.attn_mha.kv_b_proj = self.kv_b_proj for full graph compile on CPU
+ self.model_runner.model.forward(
+ forward_batch.input_ids,
+ forward_batch.positions,
+ forward_batch,
+ )
+
+ # Run and capture
+ def run_once():
+ # Clean intermediate result cache for DP attention
+ forward_batch.dp_local_start_pos = forward_batch.dp_local_num_tokens = None
+ logits_output_or_pp_proxy_tensors = forward(
+ input_ids,
+ forward_batch.positions,
+ forward_batch,
+ )
+ return logits_output_or_pp_proxy_tensors
+
+ with torch.no_grad():
+ for _ in range(2):
+ self.model_runner.tp_group.barrier()
+ out = run_once()
+ return forward, out
+
+ def recapture_if_needed(self, forward_batch: ForwardBatch):
+
+ # If the required capture_hidden_mode changes, we need to recapture the graph
+
+ # These are the different factors that can influence the capture_hidden_mode
+ capture_hidden_mode_required_by_forward_batch = (
+ forward_batch.capture_hidden_mode
+ )
+ capture_hidden_mode_required_by_spec_info = getattr(
+ forward_batch.spec_info, "capture_hidden_mode", CaptureHiddenMode.NULL
+ )
+ capture_hidden_mode_required_for_returning_hidden_states = (
+ CaptureHiddenMode.FULL
+ if self.model_runner.server_args.enable_return_hidden_states
+ else CaptureHiddenMode.NULL
+ )
+
+ # Determine the highest capture_hidden_mode required
+ # (If we have FULL, we can emulate LAST or NULL)
+ # (If we have LAST, we can emulate NULL)
+ required_capture_hidden_mode = max(
+ capture_hidden_mode_required_by_forward_batch,
+ capture_hidden_mode_required_by_spec_info,
+ capture_hidden_mode_required_for_returning_hidden_states,
+ )
+
+ # If the current hidden mode is no longer aligned with the required hidden mode, we need to set it to what is required and re-capture
+ if self.capture_hidden_mode != required_capture_hidden_mode:
+ self.capture_hidden_mode = required_capture_hidden_mode
+ self.capture()
+
+ # TODO add padding support for CPUGraphRunner
+ def replay(
+ self,
+ forward_batch: ForwardBatch,
+ skip_attn_backend_init: bool = False,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ ) -> Union[LogitsProcessorOutput, PPProxyTensors]:
+ assert (
+ pp_proxy_tensors is None
+ ), "PPProxyTensors is not supported in CPUGraphRunner yet."
+ self.recapture_if_needed(forward_batch)
+ self.model_runner.attn_backend.init_forward_metadata(forward_batch)
+ output = self.graphs[forward_batch.batch_size](
+ forward_batch.input_ids,
+ forward_batch.positions,
+ forward_batch,
+ )
+ return output
+
+ def get_spec_info(self, num_tokens: int):
+ spec_info = None
+ if self.model_runner.spec_algorithm.is_eagle():
+ from sglang.srt.speculative.eagle_utils import EagleVerifyInput
+
+ if self.model_runner.is_draft_worker:
+ raise RuntimeError("This should not happen.")
+ else:
+ spec_info = EagleVerifyInput(
+ draft_token=None,
+ custom_mask=self.custom_mask,
+ positions=None,
+ retrive_index=None,
+ retrive_next_token=None,
+ retrive_next_sibling=None,
+ retrive_cum_len=None,
+ spec_steps=self.model_runner.server_args.speculative_num_steps,
+ topk=self.model_runner.server_args.speculative_eagle_topk,
+ draft_token_num=self.model_runner.server_args.speculative_num_draft_tokens,
+ capture_hidden_mode=CaptureHiddenMode.FULL,
+ seq_lens_sum=None,
+ seq_lens_cpu=None,
+ )
+
+ return spec_info
+
+
+CPU_GRAPH_CAPTURE_FAILED_MSG = (
+ "Possible solutions:\n"
+ "1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
+ "2. set --torch-compile-max-bs to a smaller value (e.g., 8)\n"
+ "3. disable torch compile by not using --enable-torch-compile\n"
+ "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
+)
diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py
index aeca8dcb7e2..cb15c9afb0c 100644
--- a/python/sglang/srt/model_executor/cuda_graph_runner.py
+++ b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -15,22 +15,908 @@
from __future__ import annotations
-from typing import TYPE_CHECKING
+import bisect
+import gc
+import inspect
+import logging
+import os
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Callable, Optional, Union
import torch
+import tqdm
+from torch.profiler import ProfilerActivity, profile
-from sglang.srt.model_executor.graph_runner import GraphRunner
+from sglang.srt.custom_op import CustomOp
+from sglang.srt.distributed import get_tensor_model_parallel_rank
+from sglang.srt.distributed.device_communicators.pynccl_allocator import (
+ set_graph_pool_id,
+)
+from sglang.srt.distributed.parallel_state import GroupCoordinator, graph_capture
+from sglang.srt.layers.dp_attention import (
+ DpPaddingMode,
+ get_attention_tp_rank,
+ get_attention_tp_size,
+ set_dp_buffer_len,
+)
+from sglang.srt.layers.logits_processor import LogitsProcessorOutput
+from sglang.srt.layers.torchao_utils import save_gemlite_cache
+from sglang.srt.model_executor.forward_batch_info import (
+ CaptureHiddenMode,
+ ForwardBatch,
+ ForwardMode,
+ PPProxyTensors,
+ enable_num_token_non_padded,
+)
+from sglang.srt.patch_torch import monkey_patch_torch_compile
+from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
+from sglang.srt.utils import (
+ empty_context,
+ get_available_gpu_memory,
+ get_device_memory_capacity,
+ log_info_on_rank0,
+ require_attn_tp_gather,
+ require_gathered_buffer,
+ require_mlp_sync,
+ require_mlp_tp_gather,
+)
+
+logger = logging.getLogger(__name__)
+
+try:
+ import torch.distributed as dist
+
+ from sglang.srt.distributed import (
+ get_tensor_model_parallel_rank,
+ get_tensor_model_parallel_world_size,
+ split_tensor_along_last_dim,
+ tensor_model_parallel_all_gather,
+ tensor_model_parallel_all_reduce,
+ )
+
+ SGLANG_DIST_ACTIVATED = True
+except ImportError as ex:
+ SGLANG_DIST_ACTIVATED = False
+
+
+def get_local_rank() -> 0:
+ if SGLANG_DIST_ACTIVATED:
+ return get_tensor_model_parallel_rank()
+ else:
+ return 0
if TYPE_CHECKING:
from sglang.srt.model_executor.model_runner import ModelRunner
+# Detect whether the current forward pass is in capture mode
+is_capture_mode = False
+
+
+def get_is_capture_mode():
+ return is_capture_mode
+
+
+@contextmanager
+def model_capture_mode():
+ global is_capture_mode
+ is_capture_mode = True
+
+ yield
+
+ is_capture_mode = False
+
+
+@contextmanager
+def freeze_gc(enable_cudagraph_gc: bool):
+ """
+ Optimize garbage collection during CUDA graph capture.
+ Clean up, then freeze all remaining objects from being included
+ in future collections if GC is disabled during capture.
+ """
+ gc.collect()
+ should_freeze = not enable_cudagraph_gc
+ if should_freeze:
+ gc.freeze()
+ try:
+ yield
+ finally:
+ if should_freeze:
+ gc.unfreeze()
+
+
+def _to_torch(model: torch.nn.Module, reverse: bool, num_tokens: int):
+ for sub in model._modules.values():
+ if isinstance(sub, CustomOp):
+ if reverse:
+ sub.leave_torch_compile()
+ else:
+ sub.enter_torch_compile(num_tokens=num_tokens)
+ if isinstance(sub, torch.nn.Module):
+ _to_torch(sub, reverse, num_tokens)
+
+
+@contextmanager
+def patch_model(
+ model: torch.nn.Module,
+ enable_compile: bool,
+ num_tokens: int,
+ tp_group: GroupCoordinator,
+):
+ """Patch the model to make it compatible with with torch.compile"""
+ backup_ca_comm = None
+
+ try:
+ if enable_compile:
+ _to_torch(model, reverse=False, num_tokens=num_tokens)
+ backup_ca_comm = tp_group.ca_comm
+ # Use custom-allreduce here.
+ # We found the custom allreduce is much faster than the built-in allreduce in torch,
+ # even with ENABLE_INTRA_NODE_COMM=1.
+ # tp_group.ca_comm = None
+ yield torch.compile(
+ torch.no_grad()(model.forward),
+ mode=os.environ.get(
+ "SGLANG_TORCH_COMPILE_MODE", "max-autotune-no-cudagraphs"
+ ),
+ dynamic=False,
+ )
+ else:
+ yield model.forward
+ finally:
+ if enable_compile:
+ _to_torch(model, reverse=True, num_tokens=num_tokens)
+ tp_group.ca_comm = backup_ca_comm
+
+
+def set_torch_compile_config():
+ import torch._dynamo.config
+ import torch._inductor.config
+
+ torch._inductor.config.coordinate_descent_tuning = True
+ torch._inductor.config.triton.unique_kernel_names = True
+ torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
+
+ # FIXME: tmp workaround
+ torch._dynamo.config.accumulated_cache_size_limit = 1024
+ if hasattr(torch._dynamo.config, "cache_size_limit"):
+ torch._dynamo.config.cache_size_limit = 1024
+
+ monkey_patch_torch_compile()
+
+
+def get_batch_sizes_to_capture(model_runner: ModelRunner):
+ server_args = model_runner.server_args
+ capture_bs = server_args.cuda_graph_bs
-class CudaGraphRunner(GraphRunner):
+ if capture_bs is None:
+ if server_args.speculative_algorithm is None:
+ if server_args.disable_cuda_graph_padding:
+ capture_bs = list(range(1, 33)) + list(range(48, 161, 16))
+ else:
+ capture_bs = [1, 2, 4, 8] + list(range(16, 161, 8))
+ else:
+ # Since speculative decoding requires more cuda graph memory, we
+ # capture less.
+ capture_bs = (
+ list(range(1, 9))
+ + list(range(10, 33, 2))
+ + list(range(40, 64, 8))
+ + list(range(80, 161, 16))
+ )
+
+ gpu_mem = get_device_memory_capacity()
+ if gpu_mem is not None:
+ if gpu_mem > 90 * 1024: # H200, H20
+ capture_bs += list(range(160, 257, 8))
+ if gpu_mem > 160 * 1000: # B200, MI300
+ capture_bs += list(range(256, 513, 16))
+
+ if max(capture_bs) > model_runner.req_to_token_pool.size:
+ # In some cases (e.g., with a small GPU or --max-running-requests), the #max-running-requests
+ # is very small. We add more values here to make sure we capture the maximum bs.
+ capture_bs += [model_runner.req_to_token_pool.size]
+
+ mul_base = 1
+
+ if server_args.enable_two_batch_overlap:
+ mul_base *= 2
+
+ if require_gathered_buffer(server_args):
+ mul_base *= get_attention_tp_size()
+
+ capture_bs = [bs for bs in capture_bs if bs % mul_base == 0]
+
+ if server_args.cuda_graph_max_bs:
+ capture_bs = [bs for bs in capture_bs if bs <= server_args.cuda_graph_max_bs]
+ if max(capture_bs) < server_args.cuda_graph_max_bs:
+ capture_bs += list(
+ range(max(capture_bs), server_args.cuda_graph_max_bs + 1, 16)
+ )
+ capture_bs = [bs for bs in capture_bs if bs <= model_runner.req_to_token_pool.size]
+ capture_bs = list(sorted(set(capture_bs)))
+ assert len(capture_bs) > 0 and capture_bs[0] > 0, f"{capture_bs=}"
+ compile_bs = (
+ [bs for bs in capture_bs if bs <= server_args.torch_compile_max_bs]
+ if server_args.enable_torch_compile
+ else []
+ )
+ return capture_bs, compile_bs
+
+
+# Reuse this memory pool across all cuda graph runners.
+global_graph_memory_pool = None
+
+
+def get_global_graph_memory_pool():
+ return global_graph_memory_pool
+
+
+def set_global_graph_memory_pool(val):
+ global global_graph_memory_pool
+ global_graph_memory_pool = val
+
+
+class CudaGraphRunner:
"""A CudaGraphRunner runs the forward pass of a model with cuda graph and torch.compile."""
def __init__(self, model_runner: ModelRunner):
# Parse args
- super().__init__(model_runner)
+ self.model_runner = model_runner
+ self.device = model_runner.device
+ self.device_module = torch.get_device_module(self.device)
+ self.graphs = {}
+ self.output_buffers = {}
+ self.enable_torch_compile = model_runner.server_args.enable_torch_compile
+ self.disable_padding = model_runner.server_args.disable_cuda_graph_padding
+ self.is_encoder_decoder = model_runner.model_config.is_encoder_decoder
+ self.require_gathered_buffer = require_gathered_buffer(model_runner.server_args)
+ self.require_mlp_tp_gather = require_mlp_tp_gather(model_runner.server_args)
+ self.require_mlp_sync = require_mlp_sync(model_runner.server_args)
+ self.require_attn_tp_gather = require_attn_tp_gather(model_runner.server_args)
+ self.enable_two_batch_overlap = (
+ model_runner.server_args.enable_two_batch_overlap
+ )
+ self.speculative_algorithm = model_runner.server_args.speculative_algorithm
+ self.enable_profile_cuda_graph = (
+ model_runner.server_args.enable_profile_cuda_graph
+ )
+ self.hip_config = model_runner.server_args.hip_attention_config
+ self.enable_hip_attention = model_runner.server_args.enable_hip_attention
+ if self.enable_hip_attention:
+ from hip_attn.v1_2.paged_hip import cuda_graph_capture_configs
+ self.capture_configs = cuda_graph_capture_configs(self.hip_config)
+ else:
+ self.capture_configs = [()]
+ self.tp_size = model_runner.server_args.tp_size
+ self.dp_size = model_runner.server_args.dp_size
+ self.pp_size = model_runner.server_args.pp_size
+
+ self.attn_tp_size = get_attention_tp_size()
+ self.attn_tp_rank = get_attention_tp_rank()
+
+ # Batch sizes to capture
+ self.capture_bs, self.compile_bs = get_batch_sizes_to_capture(model_runner)
+ log_info_on_rank0(logger, f"Capture cuda graph bs {self.capture_bs}")
+ self.capture_forward_mode = ForwardMode.DECODE
+ self.capture_hidden_mode = CaptureHiddenMode.NULL
+ self.num_tokens_per_bs = 1
+ if (
+ model_runner.spec_algorithm.is_eagle()
+ or model_runner.spec_algorithm.is_standalone()
+ ):
+ if self.model_runner.is_draft_worker:
+ raise RuntimeError("This should not happen")
+ else:
+ self.capture_forward_mode = ForwardMode.TARGET_VERIFY
+ self.num_tokens_per_bs = (
+ self.model_runner.server_args.speculative_num_draft_tokens
+ )
+
+ # If returning hidden states is enabled, set initial capture hidden mode to full to avoid double-capture on startup
+ if model_runner.server_args.enable_return_hidden_states:
+ self.capture_hidden_mode = CaptureHiddenMode.FULL
+
+ # Attention backend
+ self.max_bs = max(self.capture_bs)
+ self.max_num_token = self.max_bs * self.num_tokens_per_bs
+ self.model_runner.attn_backend.init_cuda_graph_state(
+ self.max_bs, self.max_num_token
+ )
+ self.seq_len_fill_value = (
+ self.model_runner.attn_backend.get_cuda_graph_seq_len_fill_value()
+ )
+
+ # FIXME(lsyin): leave it here for now, I don't know whether it is necessary
+ self.encoder_len_fill_value = 0
+ self.seq_lens_cpu = torch.full(
+ (self.max_bs,), self.seq_len_fill_value, dtype=torch.int32
+ )
+
+ if self.enable_torch_compile:
+ set_torch_compile_config()
+
+ if self.model_runner.server_args.enable_lora:
+ self.model_runner.lora_manager.init_cuda_graph_batch_info(self.max_bs)
+
+ # Graph inputs
+ with torch.device(self.device):
+ self.input_ids = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.req_pool_indices = torch.zeros((self.max_bs,), dtype=torch.int32)
+ self.seq_lens = torch.full(
+ (self.max_bs,), self.seq_len_fill_value, dtype=torch.int32
+ )
+ self.out_cache_loc = torch.zeros(
+ (self.max_num_token,), dtype=self._cache_loc_dtype()
+ )
+ self.positions = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.mrope_positions = torch.zeros(
+ (3, self.max_num_token), dtype=torch.int64
+ )
+ self.num_token_non_padded = torch.zeros((1,), dtype=torch.int32)
+ self.tbo_plugin = TboCudaGraphRunnerPlugin()
+
+ # pipeline parallelism
+ if self.pp_size > 1:
+ self.pp_proxy_tensors = {
+ "hidden_states": torch.zeros(
+ (self.max_bs, self.model_runner.model_config.hidden_size),
+ dtype=torch.bfloat16,
+ ),
+ "residual": torch.zeros(
+ (self.max_bs, self.model_runner.model_config.hidden_size),
+ dtype=torch.bfloat16,
+ ),
+ }
+
+ # Speculative_inference
+ if model_runner.spec_algorithm.is_eagle3():
+ self.model_runner.model.set_eagle3_layers_to_capture()
+
+ if self.is_encoder_decoder:
+ # NOTE: encoder_lens can influence the full_text_row_masked_out_mask tensor when doing mixed batch
+ self.encoder_lens = torch.full(
+ (self.max_bs,), self.encoder_len_fill_value, dtype=torch.int32
+ )
+ else:
+ self.encoder_lens = None
+
+ if self.require_gathered_buffer:
+ if self.require_mlp_tp_gather:
+ self.global_num_tokens_gpu = torch.zeros(
+ (self.dp_size,), dtype=torch.int32
+ )
+ self.global_num_tokens_for_logprob_gpu = torch.zeros(
+ (self.dp_size,), dtype=torch.int32
+ )
+ else:
+ assert self.require_attn_tp_gather
+ self.global_num_tokens_gpu = torch.zeros((1,), dtype=torch.int32)
+ self.global_num_tokens_for_logprob_gpu = torch.zeros(
+ (1,), dtype=torch.int32
+ )
+ else:
+ self.global_num_tokens_gpu = None
+ self.global_num_tokens_for_logprob_gpu = None
+
+ self.custom_mask = torch.ones(
+ (
+ (self.seq_lens.sum().item() + self.max_num_token)
+ * self.num_tokens_per_bs
+ ),
+ dtype=torch.bool,
+ device=self.device,
+ )
+ self.next_token_logits_buffer = torch.zeros(
+ (self.max_num_token, self.model_runner.model_config.vocab_size),
+ dtype=torch.float,
+ device=self.device,
+ )
+
+ # Capture
+ try:
+ with model_capture_mode():
+ self.capture()
+ except RuntimeError as e:
+ raise Exception(
+ f"Capture cuda graph failed: {e}\n{CUDA_GRAPH_CAPTURE_FAILED_MSG}"
+ )
+
+ def _cache_loc_dtype(self):
+ return torch.int64
+
+ def can_run(self, forward_batch: ForwardBatch):
+ if self.require_mlp_tp_gather:
+ cuda_graph_bs = (
+ max(forward_batch.global_num_tokens_cpu) // self.num_tokens_per_bs
+ if self.model_runner.spec_algorithm.is_eagle()
+ else max(forward_batch.global_num_tokens_cpu)
+ )
+ else:
+ cuda_graph_bs = forward_batch.batch_size
+
+ recorded_batch_sizes = {bs for bs, *_ in self.graphs}
+ is_bs_supported = (
+ cuda_graph_bs in recorded_batch_sizes
+ if self.disable_padding
+ else cuda_graph_bs <= self.max_bs
+ )
+
+ if self.require_mlp_sync:
+ is_bs_supported = is_bs_supported and forward_batch.can_run_dp_cuda_graph
+
+ # NOTE: cuda graph cannot handle mixed batch (encoder_len = 0)
+ # If mixed batch cannot be supported, then encoder_lens can be removed in cuda graph
+ # because the full_text_row_masked_out_mask tensor will always be ones
+ is_encoder_lens_supported = (
+ torch.all(forward_batch.encoder_lens > 0)
+ if self.is_encoder_decoder
+ else True
+ )
+
+ requested_capture_hidden_mode = max(
+ forward_batch.capture_hidden_mode,
+ (
+ forward_batch.spec_info.capture_hidden_mode
+ if getattr(forward_batch.spec_info, "capture_hidden_mode", None)
+ is not None
+ else CaptureHiddenMode.NULL
+ ),
+ )
+ capture_hidden_mode_matches = (
+ requested_capture_hidden_mode == CaptureHiddenMode.NULL
+ or requested_capture_hidden_mode == self.capture_hidden_mode
+ )
+ is_tbo_supported = (
+ forward_batch.can_run_tbo if self.enable_two_batch_overlap else True
+ )
+
+ return (
+ is_bs_supported
+ and is_encoder_lens_supported
+ and is_tbo_supported
+ and capture_hidden_mode_matches
+ )
+
+ def capture(self) -> None:
+ profile_context = empty_context()
+ if self.enable_profile_cuda_graph:
+ profile_context = profile(
+ activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+ record_shapes=True,
+ )
+
+ # Trigger CUDA graph capture for specific shapes.
+ # Capture the large shapes first so that the smaller shapes
+ # can reuse the memory pool allocated for the large shapes.
+ with freeze_gc(
+ self.model_runner.server_args.enable_cudagraph_gc
+ ), graph_capture() as graph_capture_context:
+ with profile_context as prof:
+ self.stream = graph_capture_context.stream
+ avail_mem = get_available_gpu_memory(
+ self.model_runner.device,
+ self.model_runner.gpu_id,
+ empty_cache=False,
+ )
+ # Reverse the order to enable better memory sharing across cuda graphs.
+ capture_range = (
+ tqdm.tqdm(list(reversed(self.capture_bs)))
+ if get_tensor_model_parallel_rank() == 0
+ else reversed(self.capture_bs)
+ )
+ for i, bs in enumerate(capture_range):
+ if get_tensor_model_parallel_rank() == 0:
+ avail_mem = get_available_gpu_memory(
+ self.model_runner.device,
+ self.model_runner.gpu_id,
+ empty_cache=False,
+ )
+ capture_range.set_description(
+ f"Capturing batches ({bs=} {avail_mem=:.2f} GB)"
+ )
+
+ for capture_config in self.capture_configs:
+ with patch_model(
+ self.model_runner.model,
+ bs in self.compile_bs,
+ num_tokens=bs * self.num_tokens_per_bs,
+ tp_group=self.model_runner.tp_group,
+ ) as forward:
+ (
+ graph,
+ output_buffers,
+ ) = self.capture_one_batch_size(bs, forward, capture_config)
+ graph_handle = (bs, *capture_config)
+ self.graphs[graph_handle] = graph
+ self.output_buffers[graph_handle] = output_buffers
+
+ # Save gemlite cache after each capture
+ save_gemlite_cache()
+
+ if self.enable_profile_cuda_graph:
+ log_message = (
+ "Sorted by CUDA Time:\n"
+ + prof.key_averages(group_by_input_shape=True).table(
+ sort_by="cuda_time_total", row_limit=10
+ )
+ + "\n\nSorted by CPU Time:\n"
+ + prof.key_averages(group_by_input_shape=True).table(
+ sort_by="cpu_time_total", row_limit=10
+ )
+ )
+ logger.info(log_message)
+
+ def _capture_graph(self, graph, pool, stream, run_once_fn):
+ with self.device_module.graph(graph, pool=pool, stream=stream):
+ out = run_once_fn()
+ return out
def _create_device_graph(self):
return torch.cuda.CUDAGraph()
+
+ def capture_one_batch_size(self, bs: int, forward: Callable, capture_config: tuple):
+ graph = self._create_device_graph()
+ stream = self.stream
+ num_tokens = bs * self.num_tokens_per_bs
+
+ # Graph inputs
+ input_ids = self.input_ids[:num_tokens]
+ req_pool_indices = self.req_pool_indices[:bs]
+ seq_lens = self.seq_lens[:bs]
+ out_cache_loc = self.out_cache_loc[:num_tokens]
+ positions = self.positions[:num_tokens]
+ if self.is_encoder_decoder:
+ encoder_lens = self.encoder_lens[:bs]
+ else:
+ encoder_lens = None
+ mrope_positions = self.mrope_positions[:, :num_tokens]
+ next_token_logits_buffer = self.next_token_logits_buffer[:num_tokens]
+ self.num_token_non_padded[...] = num_tokens
+
+ # pipeline parallelism
+ if self.pp_size > 1:
+ pp_proxy_tensors = PPProxyTensors(
+ {k: v[:num_tokens] for k, v in self.pp_proxy_tensors.items()}
+ )
+
+ if self.require_mlp_tp_gather:
+ self.global_num_tokens_gpu.copy_(
+ torch.tensor(
+ [num_tokens] * self.dp_size,
+ dtype=torch.int32,
+ device=input_ids.device,
+ )
+ )
+ self.global_num_tokens_for_logprob_gpu.copy_(
+ torch.tensor(
+ [num_tokens] * self.dp_size,
+ dtype=torch.int32,
+ device=input_ids.device,
+ )
+ )
+ global_dp_buffer_len = num_tokens * self.dp_size
+ elif self.require_attn_tp_gather:
+ self.global_num_tokens_gpu.copy_(
+ torch.tensor(
+ [num_tokens],
+ dtype=torch.int32,
+ device=input_ids.device,
+ )
+ )
+ self.global_num_tokens_for_logprob_gpu.copy_(
+ torch.tensor(
+ [num_tokens],
+ dtype=torch.int32,
+ device=input_ids.device,
+ )
+ )
+ global_dp_buffer_len = num_tokens
+ else:
+ global_dp_buffer_len = None
+
+ spec_info = self.get_spec_info(num_tokens)
+ if self.capture_hidden_mode != CaptureHiddenMode.FULL:
+ self.capture_hidden_mode = (
+ spec_info.capture_hidden_mode if spec_info else CaptureHiddenMode.NULL
+ )
+
+ if self.model_runner.server_args.enable_lora:
+ # It is safe to capture CUDA graph using empty LoRA id, as the LoRA kernels will always be launched whenever
+ # `--enable-lora` is set to True (and return immediately if the LoRA id is empty for perf optimization).
+ lora_ids = [None] * bs
+ else:
+ lora_ids = None
+
+ hip_num_cached_stages = None
+ if self.enable_hip_attention:
+ (hip_num_cached_stages,) = capture_config
+
+ forward_batch = ForwardBatch(
+ forward_mode=self.capture_forward_mode,
+ batch_size=bs,
+ input_ids=input_ids,
+ req_pool_indices=req_pool_indices,
+ seq_lens=seq_lens,
+ next_token_logits_buffer=next_token_logits_buffer,
+ orig_seq_lens=seq_lens,
+ req_to_token_pool=self.model_runner.req_to_token_pool,
+ token_to_kv_pool=self.model_runner.token_to_kv_pool,
+ attn_backend=self.model_runner.attn_backend,
+ hip_metadata_cache_pool=self.model_runner.hip_metadata_cache_pool,
+ hip_metadata_cached_stages=hip_num_cached_stages,
+ out_cache_loc=out_cache_loc,
+ seq_lens_sum=seq_lens.sum().item(),
+ encoder_lens=encoder_lens,
+ return_logprob=False,
+ positions=positions,
+ global_num_tokens_gpu=self.global_num_tokens_gpu,
+ global_num_tokens_for_logprob_gpu=self.global_num_tokens_for_logprob_gpu,
+ dp_padding_mode=DpPaddingMode.get_default_mode_in_cuda_graph(),
+ global_dp_buffer_len=global_dp_buffer_len,
+ mrope_positions=mrope_positions,
+ spec_algorithm=self.model_runner.spec_algorithm,
+ spec_info=spec_info,
+ capture_hidden_mode=self.capture_hidden_mode,
+ num_token_non_padded=self.num_token_non_padded,
+ global_forward_mode=self.capture_forward_mode,
+ lora_ids=lora_ids,
+ )
+ self.tbo_plugin.capture_one_batch_size(forward_batch, num_tokens=num_tokens)
+
+ if lora_ids is not None:
+ self.model_runner.lora_manager.prepare_lora_batch(forward_batch)
+
+ # Attention backend
+ self.model_runner.attn_backend.init_forward_metadata_capture_cuda_graph(
+ bs,
+ num_tokens,
+ req_pool_indices,
+ seq_lens,
+ encoder_lens,
+ forward_batch.forward_mode,
+ forward_batch.spec_info,
+ )
+
+ # Run and capture
+ def run_once():
+ # Clean intermediate result cache for DP attention
+ forward_batch.dp_local_start_pos = forward_batch.dp_local_num_tokens = None
+ set_dp_buffer_len(global_dp_buffer_len, num_tokens)
+
+ kwargs = {}
+ if (
+ self.pp_size > 1
+ and "pp_proxy_tensors" in inspect.signature(forward).parameters
+ ):
+ kwargs["pp_proxy_tensors"] = PPProxyTensors(
+ {k: v.clone() for k, v in pp_proxy_tensors.tensors.items()}
+ )
+
+ logits_output_or_pp_proxy_tensors = forward(
+ input_ids,
+ forward_batch.positions,
+ forward_batch,
+ **kwargs,
+ )
+ return logits_output_or_pp_proxy_tensors
+
+ for _ in range(2):
+ self.device_module.synchronize()
+ self.model_runner.tp_group.barrier()
+ run_once()
+
+ if get_global_graph_memory_pool() is None:
+ set_global_graph_memory_pool(self.device_module.graph_pool_handle())
+ # Set graph pool id globally to be able to use symmetric memory
+ set_graph_pool_id(get_global_graph_memory_pool())
+ out = self._capture_graph(
+ graph, get_global_graph_memory_pool(), stream, run_once
+ )
+
+ return graph, out
+
+ def recapture_if_needed(self, forward_batch: ForwardBatch):
+
+ # If the required capture_hidden_mode changes, we need to recapture the graph
+
+ # These are the different factors that can influence the capture_hidden_mode
+ capture_hidden_mode_required_by_forward_batch = (
+ forward_batch.capture_hidden_mode
+ )
+ capture_hidden_mode_required_by_spec_info = getattr(
+ forward_batch.spec_info, "capture_hidden_mode", CaptureHiddenMode.NULL
+ )
+ capture_hidden_mode_required_for_returning_hidden_states = (
+ CaptureHiddenMode.FULL
+ if self.model_runner.server_args.enable_return_hidden_states
+ else CaptureHiddenMode.NULL
+ )
+
+ # Determine the highest capture_hidden_mode required
+ # (If we have FULL, we can emulate LAST or NULL)
+ # (If we have LAST, we can emulate NULL)
+ required_capture_hidden_mode = max(
+ capture_hidden_mode_required_by_forward_batch,
+ capture_hidden_mode_required_by_spec_info,
+ capture_hidden_mode_required_for_returning_hidden_states,
+ )
+
+ # If the current hidden mode is no longer aligned with the required hidden mode, we need to set it to what is required and re-capture
+ if self.capture_hidden_mode != required_capture_hidden_mode:
+ self.capture_hidden_mode = required_capture_hidden_mode
+ self.capture()
+
+ def replay_prepare(
+ self,
+ forward_batch: ForwardBatch,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ ):
+ self.recapture_if_needed(forward_batch)
+
+ raw_bs = forward_batch.batch_size
+ raw_num_token = raw_bs * self.num_tokens_per_bs
+
+ # Pad
+ if self.require_mlp_tp_gather:
+ max_num_tokens = max(forward_batch.global_num_tokens_cpu)
+ max_batch_size = (
+ max_num_tokens / self.num_tokens_per_bs
+ if self.model_runner.spec_algorithm.is_eagle()
+ else max_num_tokens
+ )
+ index = bisect.bisect_left(self.capture_bs, max_batch_size)
+ else:
+ index = bisect.bisect_left(self.capture_bs, raw_bs)
+ bs = self.capture_bs[index]
+ if bs != raw_bs:
+ self.seq_lens.fill_(self.seq_len_fill_value)
+ self.out_cache_loc.zero_()
+
+ # Common inputs
+ self.input_ids[:raw_num_token].copy_(forward_batch.input_ids)
+ self.req_pool_indices[:raw_bs].copy_(forward_batch.req_pool_indices)
+ self.seq_lens[:raw_bs].copy_(forward_batch.seq_lens)
+ self.out_cache_loc[:raw_num_token].copy_(forward_batch.out_cache_loc)
+ self.positions[:raw_num_token].copy_(forward_batch.positions)
+
+ seq_lens_cpu = None
+ if forward_batch.seq_lens_cpu is not None:
+ if bs != raw_bs:
+ self.seq_lens_cpu.fill_(self.seq_len_fill_value)
+ self.seq_lens_cpu[:raw_bs].copy_(forward_batch.seq_lens_cpu)
+ seq_lens_cpu = self.seq_lens_cpu[:bs]
+
+ if pp_proxy_tensors:
+ for key in self.pp_proxy_tensors.keys():
+ dim = pp_proxy_tensors[key].shape[0]
+ self.pp_proxy_tensors[key][:dim].copy_(pp_proxy_tensors[key])
+
+ if self.is_encoder_decoder:
+ self.encoder_lens[:raw_bs].copy_(forward_batch.encoder_lens)
+ if forward_batch.mrope_positions is not None:
+ self.mrope_positions[:, :raw_num_token].copy_(forward_batch.mrope_positions)
+ if self.require_gathered_buffer:
+ self.global_num_tokens_gpu.fill_(bs * self.num_tokens_per_bs)
+ self.global_num_tokens_for_logprob_gpu.fill_(bs * self.num_tokens_per_bs)
+ if enable_num_token_non_padded(self.model_runner.server_args):
+ num_token_non_padded = forward_batch.num_token_non_padded
+ if self.require_gathered_buffer:
+ tokens_per_rank = bs // self.attn_tp_size * self.num_tokens_per_bs
+ num_local_token_non_padded = torch.clamp(
+ num_token_non_padded - tokens_per_rank * self.attn_tp_rank,
+ min=0,
+ max=tokens_per_rank,
+ )
+ self.num_token_non_padded.copy_(num_local_token_non_padded)
+ else:
+ self.num_token_non_padded.copy_(num_token_non_padded)
+ if self.enable_two_batch_overlap:
+ self.tbo_plugin.replay_prepare(
+ forward_mode=self.capture_forward_mode,
+ bs=bs,
+ num_token_non_padded=len(forward_batch.input_ids),
+ spec_info=forward_batch.spec_info,
+ )
+ if forward_batch.forward_mode.is_idle() and forward_batch.spec_info is not None:
+ forward_batch.spec_info.custom_mask = self.custom_mask
+ # Attention backend
+ self.model_runner.attn_backend.init_forward_metadata_replay_cuda_graph(
+ bs,
+ self.req_pool_indices[:bs],
+ self.seq_lens[:bs],
+ forward_batch.seq_lens_sum + (bs - raw_bs) * self.seq_len_fill_value,
+ self.encoder_lens[:bs] if self.is_encoder_decoder else None,
+ self.capture_forward_mode,
+ forward_batch.spec_info,
+ seq_lens_cpu=seq_lens_cpu,
+ )
+
+ # Store fields
+ self.raw_bs = raw_bs
+ self.raw_num_token = raw_num_token
+ self.bs = bs
+
+ def replay(
+ self,
+ forward_batch: ForwardBatch,
+ skip_attn_backend_init: bool = False,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ ) -> Union[LogitsProcessorOutput, PPProxyTensors]:
+ if not skip_attn_backend_init:
+ self.replay_prepare(forward_batch, pp_proxy_tensors)
+ else:
+ # In speculative decoding, these two fields are still needed.
+ self.input_ids[: self.raw_num_token].copy_(forward_batch.input_ids)
+ self.positions[: self.raw_num_token].copy_(forward_batch.positions)
+
+ # Replay
+ graph_handle = (self.bs,)
+ if self.enable_hip_attention:
+ graph_handle = (self.bs, forward_batch.hip_metadata_cached_stages)
+ run_bench = os.getenv("HIP_DEBUG_BENCH", "0") == "1" and get_local_rank() == 0
+ run_bench = os.getenv("HIP_DEBUG_BENCH_DECODE", "1") == "1" and run_bench
+ if run_bench:
+ start = torch.cuda.Event(True)
+ end = torch.cuda.Event(True)
+ start.record()
+ self.graphs[graph_handle].replay()
+ if run_bench:
+ end.record()
+ end.synchronize()
+ elapsed = start.elapsed_time(end)
+ print(
+ f"[CudaGraphRunner.replay] graph {graph_handle} took {elapsed:.2f} ms"
+ )
+ output = self.output_buffers[graph_handle]
+
+ if isinstance(output, LogitsProcessorOutput):
+ return LogitsProcessorOutput(
+ next_token_logits=output.next_token_logits[: self.raw_num_token],
+ hidden_states=(
+ output.hidden_states[: self.raw_num_token]
+ if output.hidden_states is not None
+ else None
+ ),
+ )
+ else:
+ assert isinstance(output, PPProxyTensors)
+ return PPProxyTensors({k: v[: self.bs] for k, v in output.tensors.items()})
+
+ def get_spec_info(self, num_tokens: int):
+ spec_info = None
+ if (
+ self.model_runner.spec_algorithm.is_eagle()
+ or self.model_runner.spec_algorithm.is_standalone()
+ ):
+ from sglang.srt.speculative.eagle_utils import EagleVerifyInput
+
+ if self.model_runner.is_draft_worker:
+ raise RuntimeError("This should not happen.")
+ else:
+ spec_info = EagleVerifyInput(
+ draft_token=None,
+ custom_mask=self.custom_mask,
+ positions=None,
+ retrive_index=None,
+ retrive_next_token=None,
+ retrive_next_sibling=None,
+ retrive_cum_len=None,
+ spec_steps=self.model_runner.server_args.speculative_num_steps,
+ topk=self.model_runner.server_args.speculative_eagle_topk,
+ draft_token_num=self.model_runner.server_args.speculative_num_draft_tokens,
+ capture_hidden_mode=CaptureHiddenMode.FULL,
+ seq_lens_sum=None,
+ seq_lens_cpu=None,
+ )
+
+ return spec_info
+
+
+CUDA_GRAPH_CAPTURE_FAILED_MSG = (
+ "Possible solutions:\n"
+ "1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
+ "2. set --cuda-graph-max-bs to a smaller value (e.g., 16)\n"
+ "3. disable torch compile by not using --enable-torch-compile\n"
+ "4. disable CUDA graph by --disable-cuda-graph. (Not recommended. Huge performance loss)\n"
+ "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
+)
diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py
index bceb0759efa..497cb44faa6 100644
--- a/python/sglang/srt/model_executor/forward_batch_info.py
+++ b/python/sglang/srt/model_executor/forward_batch_info.py
@@ -54,6 +54,8 @@
)
if TYPE_CHECKING:
+ from hip_attn.v1_2 import HiPMetadataCachePool
+
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.schedule_batch import ModelWorkerBatch, MultimodalInputs
@@ -132,6 +134,9 @@ def is_cuda_graph(self):
or self == ForwardMode.IDLE
)
+ def is_cpu_graph(self):
+ return self == ForwardMode.DECODE
+
def is_dummy_first(self):
return self == ForwardMode.DUMMY_FIRST
@@ -241,6 +246,9 @@ class ForwardBatch:
prefix_chunk_num_tokens: Optional[List[int]] = None
# KV Indices for each chunk
prefix_chunk_kv_indices: Optional[List[torch.Tensor]] = None
+ # For MLA chunked prefix cache used in chunked prefill
+ # Tell attention backend whether lse needs to be returned
+ mha_return_lse: Optional[bool] = None
# For multimodal
mm_inputs: Optional[List[MultimodalInputs]] = None
@@ -268,6 +276,10 @@ class ForwardBatch:
token_to_kv_pool: KVCache = None
attn_backend: AttentionBackend = None
+ # For HiP attention
+ hip_metadata_cache_pool: Optional[HiPMetadataCachePool] = None
+ hip_metadata_cached_stages: Optional[int] = None
+
# For DP attention
global_num_tokens_cpu: Optional[List[int]] = None
global_num_tokens_gpu: Optional[torch.Tensor] = None
@@ -438,7 +450,24 @@ def init_new(
ret.extend_logprob_start_lens_cpu = batch.extend_logprob_start_lens
if model_runner.model_is_mrope:
- ret._compute_mrope_positions(model_runner, batch)
+ if (
+ ret.spec_info is not None
+ and getattr(ret.spec_info, "positions", None) is not None
+ ):
+ ret._compute_spec_mrope_positions(model_runner, batch)
+ else:
+ ret._compute_mrope_positions(model_runner, batch)
+
+ # Init HiP attention information
+ if model_runner.hip_metadata_cache_pool is not None:
+ ret.hip_metadata_cache_pool = model_runner.hip_metadata_cache_pool
+ if isinstance(batch.hip_metadata_cached_stages, int) and (
+ batch.hip_metadata_cached_stages < 0
+ ):
+ ret.hip_metadata_cache_pool.reset_decode_phase()
+ ret.hip_metadata_cached_stages = 0
+ else:
+ ret.hip_metadata_cached_stages = batch.hip_metadata_cached_stages
# Init lora information
if model_runner.server_args.enable_lora:
@@ -504,6 +533,52 @@ def contains_mm_inputs(self) -> bool:
or self.contains_image_inputs()
)
+ def _compute_spec_mrope_positions(
+ self, model_runner: ModelRunner, batch: ModelWorkerBatch
+ ):
+ # TODO support batched deltas
+ batch_size = self.seq_lens.shape[0]
+ device = model_runner.device
+ mm_inputs = batch.multimodal_inputs
+
+ if batch.forward_mode.is_draft_extend(): # draft_extend_after_decode
+ mrope_deltas = []
+ extend_lens = []
+ for batch_idx in range(batch_size):
+ extend_seq_len = batch.extend_seq_lens[batch_idx]
+ extend_lens.append(extend_seq_len)
+ mrope_delta = (
+ torch.zeros(1, dtype=torch.int64)
+ if mm_inputs[batch_idx] is None
+ else mm_inputs[batch_idx].mrope_position_delta.squeeze(0)
+ )
+ mrope_deltas.append(mrope_delta.to(device=device))
+ position_chunks = torch.split(batch.spec_info.positions, extend_lens)
+ mrope_positions_list = [
+ pos_chunk + delta
+ for pos_chunk, delta in zip(position_chunks, mrope_deltas)
+ ]
+ next_input_positions = (
+ torch.cat(mrope_positions_list, dim=0).unsqueeze(0).repeat(3, 1)
+ )
+
+ else: # target_verify or draft_decode
+ seq_positions = batch.spec_info.positions.view(batch_size, -1)
+ mrope_deltas = [
+ (
+ torch.tensor([0], dtype=torch.int64)
+ if mm_inputs[i] is None
+ else mm_inputs[i].mrope_position_delta.squeeze(0)
+ )
+ for i in range(batch_size)
+ ]
+ mrope_delta_tensor = torch.stack(mrope_deltas, dim=0).to(device=device)
+ next_input_positions = (
+ (seq_positions + mrope_delta_tensor).flatten().unsqueeze(0).repeat(3, 1)
+ )
+
+ self.mrope_positions = next_input_positions
+
def _compute_mrope_positions(
self, model_runner: ModelRunner, batch: ModelWorkerBatch
):
@@ -513,24 +588,23 @@ def _compute_mrope_positions(
for batch_idx in range(batch_size):
mm_input = batch.multimodal_inputs[batch_idx]
if self.forward_mode.is_decode():
- mrope_position_deltas = (
- [0]
- if mm_input is None
- else flatten_nested_list(mm_input.mrope_position_delta.tolist())
- )
- next_input_positions = []
- for mrope_position_delta in mrope_position_deltas:
- # batched deltas needs to be processed separately
- # Convert list of lists to tensor with shape [3, seq_len]
- next_input_positions += [
- MRotaryEmbedding.get_next_input_positions(
- mrope_position_delta,
- int(self.seq_lens[batch_idx]) - 1,
- int(self.seq_lens[batch_idx]),
- )
- ]
# 3 * N
- mrope_positions_list[batch_idx] = torch.cat(next_input_positions, dim=1)
+ if mm_input is None:
+ mrope_positions_list[batch_idx] = torch.full(
+ (3, 1),
+ self.seq_lens[batch_idx] - 1,
+ dtype=torch.int64,
+ device=model_runner.device,
+ )
+ else:
+ mrope_position_deltas = mm_input.mrope_position_delta.flatten().to(
+ model_runner.device, non_blocking=True
+ )
+ mrope_positions_list[batch_idx] = (
+ (mrope_position_deltas + self.seq_lens[batch_idx] - 1)
+ .unsqueeze(0)
+ .repeat(3, 1)
+ )
elif self.forward_mode.is_extend():
extend_seq_len, extend_prefix_len = (
batch.extend_seq_lens[batch_idx],
@@ -864,6 +938,18 @@ def prepare_chunked_prefix_cache_info(self, device: torch.device):
# Precompute the kv indices for each chunk
self.prepare_chunked_kv_indices(device)
+ def on_model_start(self):
+ self.token_to_kv_pool.on_model_start(self)
+
+ def on_model_end(self):
+ self.token_to_kv_pool.on_model_end(self)
+
+ def on_layer_start(self, layer_id: int):
+ self.token_to_kv_pool.on_layer_start(self, layer_id)
+
+ def on_layer_end(self, layer_id: int):
+ self.token_to_kv_pool.on_layer_end(self, layer_id)
+
@property
def can_run_tbo(self):
return self.tbo_split_seq_index is not None
diff --git a/python/sglang/srt/model_executor/graph_runner.py b/python/sglang/srt/model_executor/graph_runner.py
deleted file mode 100644
index afcb00b4e76..00000000000
--- a/python/sglang/srt/model_executor/graph_runner.py
+++ /dev/null
@@ -1,860 +0,0 @@
-# Copyright 2023-2024 SGLang Team
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Run the model with device graph and torch.compile."""
-
-from __future__ import annotations
-
-import bisect
-import gc
-import inspect
-import logging
-import os
-from contextlib import contextmanager
-from typing import TYPE_CHECKING, Callable, Optional, Union
-
-import torch
-import tqdm
-from torch.profiler import ProfilerActivity, profile
-
-from sglang.srt.custom_op import CustomOp
-from sglang.srt.distributed import get_tensor_model_parallel_rank
-from sglang.srt.distributed.device_communicators.pynccl_allocator import (
- set_graph_pool_id,
-)
-from sglang.srt.distributed.parallel_state import GroupCoordinator, graph_capture
-from sglang.srt.layers.dp_attention import (
- DpPaddingMode,
- get_attention_tp_rank,
- get_attention_tp_size,
- set_dp_buffer_len,
-)
-from sglang.srt.layers.logits_processor import LogitsProcessorOutput
-from sglang.srt.layers.torchao_utils import save_gemlite_cache
-from sglang.srt.model_executor.forward_batch_info import (
- CaptureHiddenMode,
- ForwardBatch,
- ForwardMode,
- PPProxyTensors,
- enable_num_token_non_padded,
-)
-from sglang.srt.patch_torch import monkey_patch_torch_compile
-from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
-from sglang.srt.utils import (
- empty_context,
- get_available_gpu_memory,
- get_device_memory_capacity,
- rank0_log,
- require_attn_tp_gather,
- require_gathered_buffer,
- require_mlp_sync,
- require_mlp_tp_gather,
-)
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
- from sglang.srt.model_executor.model_runner import ModelRunner
-
-# Detect whether the current forward pass is in capture mode
-is_capture_mode = False
-
-
-def get_is_capture_mode():
- return is_capture_mode
-
-
-@contextmanager
-def model_capture_mode():
- global is_capture_mode
- is_capture_mode = True
-
- yield
-
- is_capture_mode = False
-
-
-@contextmanager
-def freeze_gc(enable_cudagraph_gc: bool):
- """
- Optimize garbage collection during CUDA graph capture.
- Clean up, then freeze all remaining objects from being included
- in future collections if GC is disabled during capture.
- """
- gc.collect()
- should_freeze = not enable_cudagraph_gc
- if should_freeze:
- gc.freeze()
- try:
- yield
- finally:
- if should_freeze:
- gc.unfreeze()
-
-
-def _to_torch(model: torch.nn.Module, reverse: bool, num_tokens: int):
- for sub in model._modules.values():
- if isinstance(sub, CustomOp):
- if reverse:
- sub.leave_torch_compile()
- else:
- sub.enter_torch_compile(num_tokens=num_tokens)
- if isinstance(sub, torch.nn.Module):
- _to_torch(sub, reverse, num_tokens)
-
-
-@contextmanager
-def patch_model(
- model: torch.nn.Module,
- enable_compile: bool,
- num_tokens: int,
- tp_group: GroupCoordinator,
-):
- """Patch the model to make it compatible with with torch.compile"""
- backup_ca_comm = None
-
- try:
- if enable_compile:
- _to_torch(model, reverse=False, num_tokens=num_tokens)
- backup_ca_comm = tp_group.ca_comm
- # Use custom-allreduce here.
- # We found the custom allreduce is much faster than the built-in allreduce in torch,
- # even with ENABLE_INTRA_NODE_COMM=1.
- # tp_group.ca_comm = None
- yield torch.compile(
- torch.no_grad()(model.forward),
- mode=os.environ.get(
- "SGLANG_TORCH_COMPILE_MODE", "max-autotune-no-cudagraphs"
- ),
- dynamic=False,
- )
- else:
- yield model.forward
- finally:
- if enable_compile:
- _to_torch(model, reverse=True, num_tokens=num_tokens)
- tp_group.ca_comm = backup_ca_comm
-
-
-def set_torch_compile_config():
- import torch._dynamo.config
- import torch._inductor.config
-
- torch._inductor.config.coordinate_descent_tuning = True
- torch._inductor.config.triton.unique_kernel_names = True
- torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
-
- # FIXME: tmp workaround
- torch._dynamo.config.accumulated_cache_size_limit = 1024
- if hasattr(torch._dynamo.config, "cache_size_limit"):
- torch._dynamo.config.cache_size_limit = 1024
-
- monkey_patch_torch_compile()
-
-
-def get_batch_sizes_to_capture(model_runner: ModelRunner):
- server_args = model_runner.server_args
- capture_bs = server_args.cuda_graph_bs
-
- if capture_bs is None:
- if server_args.speculative_algorithm is None:
- if server_args.disable_cuda_graph_padding:
- capture_bs = list(range(1, 33)) + list(range(48, 161, 16))
- else:
- capture_bs = [1, 2, 4, 8] + list(range(16, 161, 8))
- else:
- # Since speculative decoding requires more cuda graph memory, we
- # capture less.
- capture_bs = (
- list(range(1, 9))
- + list(range(10, 33, 2))
- + list(range(40, 64, 8))
- + list(range(80, 161, 16))
- )
-
- gpu_mem = get_device_memory_capacity()
- if gpu_mem is not None:
- if gpu_mem > 90 * 1024: # H200, H20
- capture_bs += list(range(160, 257, 8))
- if gpu_mem > 160 * 1000: # B200, MI300
- capture_bs += list(range(256, 513, 16))
-
- if max(capture_bs) > model_runner.req_to_token_pool.size:
- # In some cases (e.g., with a small GPU or --max-running-requests), the #max-running-requests
- # is very small. We add more values here to make sure we capture the maximum bs.
- capture_bs += [model_runner.req_to_token_pool.size]
-
- mul_base = 1
-
- if server_args.enable_two_batch_overlap:
- mul_base *= 2
-
- if require_gathered_buffer(server_args):
- mul_base *= get_attention_tp_size()
-
- capture_bs = [bs for bs in capture_bs if bs % mul_base == 0]
-
- if server_args.cuda_graph_max_bs:
- capture_bs = [bs for bs in capture_bs if bs <= server_args.cuda_graph_max_bs]
- if max(capture_bs) < server_args.cuda_graph_max_bs:
- capture_bs += list(
- range(max(capture_bs), server_args.cuda_graph_max_bs + 1, 16)
- )
- capture_bs = [bs for bs in capture_bs if bs <= model_runner.req_to_token_pool.size]
- capture_bs = list(sorted(set(capture_bs)))
- assert len(capture_bs) > 0 and capture_bs[0] > 0, f"{capture_bs=}"
- compile_bs = (
- [bs for bs in capture_bs if bs <= server_args.torch_compile_max_bs]
- if server_args.enable_torch_compile
- else []
- )
- return capture_bs, compile_bs
-
-
-# Reuse this memory pool across all device graph runners.
-global_graph_memory_pool = None
-
-
-def get_global_graph_memory_pool():
- return global_graph_memory_pool
-
-
-def set_global_graph_memory_pool(val):
- global global_graph_memory_pool
- global_graph_memory_pool = val
-
-
-class GraphRunner:
- """A GraphRunner is a base class to run the forward pass of a model with device graph and torch.compile."""
-
- def __init__(self, model_runner: ModelRunner):
- # Parse args
- self.model_runner = model_runner
- self.device = model_runner.device
- self.device_module = torch.get_device_module(self.device)
- self.graphs = {}
- self.output_buffers = {}
- self.enable_torch_compile = model_runner.server_args.enable_torch_compile
- self.disable_padding = model_runner.server_args.disable_cuda_graph_padding
- self.is_encoder_decoder = model_runner.model_config.is_encoder_decoder
- self.require_gathered_buffer = require_gathered_buffer(model_runner.server_args)
- self.require_mlp_tp_gather = require_mlp_tp_gather(model_runner.server_args)
- self.require_mlp_sync = require_mlp_sync(model_runner.server_args)
- self.require_attn_tp_gather = require_attn_tp_gather(model_runner.server_args)
- self.enable_two_batch_overlap = (
- model_runner.server_args.enable_two_batch_overlap
- )
- self.speculative_algorithm = model_runner.server_args.speculative_algorithm
- self.enable_profile_cuda_graph = (
- model_runner.server_args.enable_profile_cuda_graph
- )
- self.tp_size = model_runner.server_args.tp_size
- self.dp_size = model_runner.server_args.dp_size
- self.pp_size = model_runner.server_args.pp_size
-
- self.attn_tp_size = get_attention_tp_size()
- self.attn_tp_rank = get_attention_tp_rank()
-
- # Batch sizes to capture
- self.capture_bs, self.compile_bs = get_batch_sizes_to_capture(model_runner)
- rank0_log(f"Capture graph bs {self.capture_bs}")
- self.capture_forward_mode = ForwardMode.DECODE
- self.capture_hidden_mode = CaptureHiddenMode.NULL
- self.num_tokens_per_bs = 1
- if model_runner.spec_algorithm.is_eagle():
- if self.model_runner.is_draft_worker:
- raise RuntimeError("This should not happen")
- else:
- self.capture_forward_mode = ForwardMode.TARGET_VERIFY
- self.num_tokens_per_bs = (
- self.model_runner.server_args.speculative_num_draft_tokens
- )
-
- # If returning hidden states is enabled, set initial capture hidden mode to full to avoid double-capture on startup
- if model_runner.server_args.enable_return_hidden_states:
- self.capture_hidden_mode = CaptureHiddenMode.FULL
-
- # Attention backend
- self.max_bs = max(self.capture_bs)
- self.max_num_token = self.max_bs * self.num_tokens_per_bs
- self.model_runner.attn_backend.init_cuda_graph_state(
- self.max_bs, self.max_num_token
- )
- self.seq_len_fill_value = (
- self.model_runner.attn_backend.get_cuda_graph_seq_len_fill_value()
- )
-
- # FIXME(lsyin): leave it here for now, I don't know whether it is necessary
- self.encoder_len_fill_value = 0
- self.seq_lens_cpu = torch.full(
- (self.max_bs,), self.seq_len_fill_value, dtype=torch.int32
- )
-
- if self.enable_torch_compile:
- set_torch_compile_config()
-
- if self.model_runner.server_args.enable_lora:
- self.model_runner.lora_manager.init_cuda_graph_batch_info(self.max_bs)
-
- # Graph inputs
- with torch.device(self.device):
- self.input_ids = torch.zeros((self.max_num_token,), dtype=torch.int64)
- self.req_pool_indices = torch.zeros((self.max_bs,), dtype=torch.int32)
- self.seq_lens = torch.full(
- (self.max_bs,), self.seq_len_fill_value, dtype=torch.int32
- )
- self.out_cache_loc = torch.zeros(
- (self.max_num_token,), dtype=self._cache_loc_dtype()
- )
- self.positions = torch.zeros((self.max_num_token,), dtype=torch.int64)
- self.mrope_positions = torch.zeros((3, self.max_bs), dtype=torch.int64)
- self.num_token_non_padded = torch.zeros((1,), dtype=torch.int32)
- self.tbo_plugin = TboCudaGraphRunnerPlugin()
-
- # pipeline parallelism
- if self.pp_size > 1:
- self.pp_proxy_tensors = {
- "hidden_states": torch.zeros(
- (self.max_bs, self.model_runner.model_config.hidden_size),
- dtype=torch.bfloat16,
- ),
- "residual": torch.zeros(
- (self.max_bs, self.model_runner.model_config.hidden_size),
- dtype=torch.bfloat16,
- ),
- }
-
- # Speculative_inference
- if model_runner.spec_algorithm.is_eagle3():
- self.model_runner.model.set_eagle3_layers_to_capture()
-
- if self.is_encoder_decoder:
- # NOTE: encoder_lens can influence the full_text_row_masked_out_mask tensor when doing mixed batch
- self.encoder_lens = torch.full(
- (self.max_bs,), self.encoder_len_fill_value, dtype=torch.int32
- )
- else:
- self.encoder_lens = None
-
- if self.require_gathered_buffer:
- if self.require_mlp_tp_gather:
- self.global_num_tokens_gpu = torch.zeros(
- (self.dp_size,), dtype=torch.int32
- )
- self.global_num_tokens_for_logprob_gpu = torch.zeros(
- (self.dp_size,), dtype=torch.int32
- )
- else:
- assert self.require_attn_tp_gather
- self.global_num_tokens_gpu = torch.zeros((1,), dtype=torch.int32)
- self.global_num_tokens_for_logprob_gpu = torch.zeros(
- (1,), dtype=torch.int32
- )
- else:
- self.global_num_tokens_gpu = None
- self.global_num_tokens_for_logprob_gpu = None
-
- self.custom_mask = torch.ones(
- (
- (self.seq_lens.sum().item() + self.max_num_token)
- * self.num_tokens_per_bs
- ),
- dtype=torch.bool,
- device=self.device,
- )
- self.next_token_logits_buffer = torch.zeros(
- (self.max_num_token, self.model_runner.model_config.vocab_size),
- dtype=torch.float,
- device=self.device,
- )
-
- # Capture
- try:
- with model_capture_mode():
- self.capture()
- except RuntimeError as e:
- raise Exception(
- f"Capture device graph failed: {e}\n{GRAPH_CAPTURE_FAILED_MSG}"
- )
-
- def _cache_loc_dtype(self):
- return torch.int64
-
- def can_run(self, forward_batch: ForwardBatch):
- if self.require_mlp_tp_gather:
- cuda_graph_bs = (
- max(forward_batch.global_num_tokens_cpu) // self.num_tokens_per_bs
- if self.model_runner.spec_algorithm.is_eagle()
- else max(forward_batch.global_num_tokens_cpu)
- )
- else:
- cuda_graph_bs = forward_batch.batch_size
-
- is_bs_supported = (
- cuda_graph_bs in self.graphs
- if self.disable_padding
- else cuda_graph_bs <= self.max_bs
- )
-
- if self.require_mlp_sync:
- is_bs_supported = is_bs_supported and forward_batch.can_run_dp_cuda_graph
-
- # NOTE: cuda graph cannot handle mixed batch (encoder_len = 0)
- # If mixed batch cannot be supported, then encoder_lens can be removed in cuda graph
- # because the full_text_row_masked_out_mask tensor will always be ones
- is_encoder_lens_supported = (
- torch.all(forward_batch.encoder_lens > 0)
- if self.is_encoder_decoder
- else True
- )
-
- requested_capture_hidden_mode = max(
- forward_batch.capture_hidden_mode,
- (
- forward_batch.spec_info.capture_hidden_mode
- if getattr(forward_batch.spec_info, "capture_hidden_mode", None)
- is not None
- else CaptureHiddenMode.NULL
- ),
- )
- capture_hidden_mode_matches = (
- requested_capture_hidden_mode == CaptureHiddenMode.NULL
- or requested_capture_hidden_mode == self.capture_hidden_mode
- )
- is_tbo_supported = (
- forward_batch.can_run_tbo if self.enable_two_batch_overlap else True
- )
-
- return (
- is_bs_supported
- and is_encoder_lens_supported
- and is_tbo_supported
- and capture_hidden_mode_matches
- )
-
- def capture(self) -> None:
- profile_context = empty_context()
- if self.enable_profile_cuda_graph:
- profile_context = profile(
- activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
- record_shapes=True,
- )
-
- # Trigger CUDA graph capture for specific shapes.
- # Capture the large shapes first so that the smaller shapes
- # can reuse the memory pool allocated for the large shapes.
- with freeze_gc(
- self.model_runner.server_args.enable_cudagraph_gc
- ), graph_capture() as graph_capture_context:
- with profile_context as prof:
- self.stream = graph_capture_context.stream
- avail_mem = get_available_gpu_memory(
- self.model_runner.device,
- self.model_runner.gpu_id,
- empty_cache=False,
- )
- # Reverse the order to enable better memory sharing across cuda graphs.
- capture_range = (
- tqdm.tqdm(list(reversed(self.capture_bs)))
- if get_tensor_model_parallel_rank() == 0
- else reversed(self.capture_bs)
- )
- for i, bs in enumerate(capture_range):
- if get_tensor_model_parallel_rank() == 0:
- avail_mem = get_available_gpu_memory(
- self.model_runner.device,
- self.model_runner.gpu_id,
- empty_cache=False,
- )
- capture_range.set_description(
- f"Capturing batches ({bs=} {avail_mem=:.2f} GB)"
- )
-
- with patch_model(
- self.model_runner.model,
- bs in self.compile_bs,
- num_tokens=bs * self.num_tokens_per_bs,
- tp_group=self.model_runner.tp_group,
- ) as forward:
- (
- graph,
- output_buffers,
- ) = self.capture_one_batch_size(bs, forward)
- self.graphs[bs] = graph
- self.output_buffers[bs] = output_buffers
-
- # Save gemlite cache after each capture
- save_gemlite_cache()
-
- if self.enable_profile_cuda_graph:
- log_message = (
- "Sorted by CUDA Time:\n"
- + prof.key_averages(group_by_input_shape=True).table(
- sort_by="cuda_time_total", row_limit=10
- )
- + "\n\nSorted by CPU Time:\n"
- + prof.key_averages(group_by_input_shape=True).table(
- sort_by="cpu_time_total", row_limit=10
- )
- )
- logger.info(log_message)
-
- def _capture_graph(self, graph, pool, stream, run_once_fn):
- with self.device_module.graph(graph, pool=pool, stream=stream):
- out = run_once_fn()
- return out
-
- def _create_device_graph(self):
- pass
-
- def capture_one_batch_size(self, bs: int, forward: Callable):
- graph = self._create_device_graph()
- stream = self.stream
- num_tokens = bs * self.num_tokens_per_bs
-
- # Graph inputs
- input_ids = self.input_ids[:num_tokens]
- req_pool_indices = self.req_pool_indices[:bs]
- seq_lens = self.seq_lens[:bs]
- out_cache_loc = self.out_cache_loc[:num_tokens]
- positions = self.positions[:num_tokens]
- if self.is_encoder_decoder:
- encoder_lens = self.encoder_lens[:bs]
- else:
- encoder_lens = None
- mrope_positions = self.mrope_positions[:, :bs]
- next_token_logits_buffer = self.next_token_logits_buffer[:num_tokens]
- self.num_token_non_padded[...] = num_tokens
-
- # pipeline parallelism
- if self.pp_size > 1:
- pp_proxy_tensors = PPProxyTensors(
- {k: v[:num_tokens] for k, v in self.pp_proxy_tensors.items()}
- )
-
- if self.require_mlp_tp_gather:
- self.global_num_tokens_gpu.copy_(
- torch.tensor(
- [num_tokens] * self.dp_size,
- dtype=torch.int32,
- device=input_ids.device,
- )
- )
- self.global_num_tokens_for_logprob_gpu.copy_(
- torch.tensor(
- [num_tokens] * self.dp_size,
- dtype=torch.int32,
- device=input_ids.device,
- )
- )
- global_dp_buffer_len = num_tokens * self.dp_size
- elif self.require_attn_tp_gather:
- self.global_num_tokens_gpu.copy_(
- torch.tensor(
- [num_tokens],
- dtype=torch.int32,
- device=input_ids.device,
- )
- )
- self.global_num_tokens_for_logprob_gpu.copy_(
- torch.tensor(
- [num_tokens],
- dtype=torch.int32,
- device=input_ids.device,
- )
- )
- global_dp_buffer_len = num_tokens
- else:
- global_dp_buffer_len = None
-
- spec_info = self.get_spec_info(num_tokens)
- if self.capture_hidden_mode != CaptureHiddenMode.FULL:
- self.capture_hidden_mode = (
- spec_info.capture_hidden_mode if spec_info else CaptureHiddenMode.NULL
- )
-
- if self.model_runner.server_args.enable_lora:
- # It is safe to capture CUDA graph using empty LoRA id, as the LoRA kernels will always be launched whenever
- # `--enable-lora` is set to True (and return immediately if the LoRA id is empty for perf optimization).
- lora_ids = [None] * bs
- else:
- lora_ids = None
-
- forward_batch = ForwardBatch(
- forward_mode=self.capture_forward_mode,
- batch_size=bs,
- input_ids=input_ids,
- req_pool_indices=req_pool_indices,
- seq_lens=seq_lens,
- next_token_logits_buffer=next_token_logits_buffer,
- orig_seq_lens=seq_lens,
- req_to_token_pool=self.model_runner.req_to_token_pool,
- token_to_kv_pool=self.model_runner.token_to_kv_pool,
- attn_backend=self.model_runner.attn_backend,
- out_cache_loc=out_cache_loc,
- seq_lens_sum=seq_lens.sum().item(),
- encoder_lens=encoder_lens,
- return_logprob=False,
- positions=positions,
- global_num_tokens_gpu=self.global_num_tokens_gpu,
- global_num_tokens_for_logprob_gpu=self.global_num_tokens_for_logprob_gpu,
- dp_padding_mode=DpPaddingMode.get_default_mode_in_cuda_graph(),
- global_dp_buffer_len=global_dp_buffer_len,
- mrope_positions=mrope_positions,
- spec_algorithm=self.model_runner.spec_algorithm,
- spec_info=spec_info,
- capture_hidden_mode=self.capture_hidden_mode,
- num_token_non_padded=self.num_token_non_padded,
- global_forward_mode=self.capture_forward_mode,
- lora_ids=lora_ids,
- )
- self.tbo_plugin.capture_one_batch_size(forward_batch, num_tokens=num_tokens)
-
- if lora_ids is not None:
- self.model_runner.lora_manager.prepare_lora_batch(forward_batch)
-
- # Attention backend
- self.model_runner.attn_backend.init_forward_metadata_capture_cuda_graph(
- bs,
- num_tokens,
- req_pool_indices,
- seq_lens,
- encoder_lens,
- forward_batch.forward_mode,
- forward_batch.spec_info,
- )
-
- # Run and capture
- def run_once():
- # Clean intermediate result cache for DP attention
- forward_batch.dp_local_start_pos = forward_batch.dp_local_num_tokens = None
- set_dp_buffer_len(global_dp_buffer_len, num_tokens)
-
- kwargs = {}
- if (
- self.pp_size > 1
- and "pp_proxy_tensors" in inspect.signature(forward).parameters
- ):
- kwargs["pp_proxy_tensors"] = PPProxyTensors(
- {k: v.clone() for k, v in pp_proxy_tensors.tensors.items()}
- )
-
- logits_output_or_pp_proxy_tensors = forward(
- input_ids,
- forward_batch.positions,
- forward_batch,
- **kwargs,
- )
- return logits_output_or_pp_proxy_tensors
-
- for _ in range(2):
- self.device_module.synchronize()
- self.model_runner.tp_group.barrier()
- run_once()
-
- if get_global_graph_memory_pool() is None:
- set_global_graph_memory_pool(self.device_module.graph_pool_handle())
- # Set graph pool id globally to be able to use symmetric memory
- set_graph_pool_id(get_global_graph_memory_pool())
- out = self._capture_graph(
- graph, get_global_graph_memory_pool(), stream, run_once
- )
-
- return graph, out
-
- def recapture_if_needed(self, forward_batch: ForwardBatch):
-
- # If the required capture_hidden_mode changes, we need to recapture the graph
-
- # These are the different factors that can influence the capture_hidden_mode
- capture_hidden_mode_required_by_forward_batch = (
- forward_batch.capture_hidden_mode
- )
- capture_hidden_mode_required_by_spec_info = getattr(
- forward_batch.spec_info, "capture_hidden_mode", CaptureHiddenMode.NULL
- )
- capture_hidden_mode_required_for_returning_hidden_states = (
- CaptureHiddenMode.FULL
- if self.model_runner.server_args.enable_return_hidden_states
- else CaptureHiddenMode.NULL
- )
-
- # Determine the highest capture_hidden_mode required
- # (If we have FULL, we can emulate LAST or NULL)
- # (If we have LAST, we can emulate NULL)
- required_capture_hidden_mode = max(
- capture_hidden_mode_required_by_forward_batch,
- capture_hidden_mode_required_by_spec_info,
- capture_hidden_mode_required_for_returning_hidden_states,
- )
-
- # If the current hidden mode is no longer aligned with the required hidden mode, we need to set it to what is required and re-capture
- if self.capture_hidden_mode != required_capture_hidden_mode:
- self.capture_hidden_mode = required_capture_hidden_mode
- self.capture()
-
- def replay_prepare(
- self,
- forward_batch: ForwardBatch,
- pp_proxy_tensors: Optional[PPProxyTensors] = None,
- ):
- self.recapture_if_needed(forward_batch)
-
- raw_bs = forward_batch.batch_size
- raw_num_token = raw_bs * self.num_tokens_per_bs
-
- # Pad
- if self.require_mlp_tp_gather:
- max_num_tokens = max(forward_batch.global_num_tokens_cpu)
- max_batch_size = (
- max_num_tokens / self.num_tokens_per_bs
- if self.model_runner.spec_algorithm.is_eagle()
- else max_num_tokens
- )
- index = bisect.bisect_left(self.capture_bs, max_batch_size)
- else:
- index = bisect.bisect_left(self.capture_bs, raw_bs)
- bs = self.capture_bs[index]
- if bs != raw_bs:
- self.seq_lens.fill_(self.seq_len_fill_value)
- self.out_cache_loc.zero_()
-
- # Common inputs
- self.input_ids[:raw_num_token].copy_(forward_batch.input_ids)
- self.req_pool_indices[:raw_bs].copy_(forward_batch.req_pool_indices)
- self.seq_lens[:raw_bs].copy_(forward_batch.seq_lens)
- self.out_cache_loc[:raw_num_token].copy_(forward_batch.out_cache_loc)
- self.positions[:raw_num_token].copy_(forward_batch.positions)
-
- seq_lens_cpu = None
- if forward_batch.seq_lens_cpu is not None:
- if bs != raw_bs:
- self.seq_lens_cpu.fill_(self.seq_len_fill_value)
- self.seq_lens_cpu[:raw_bs].copy_(forward_batch.seq_lens_cpu)
- seq_lens_cpu = self.seq_lens_cpu[:bs]
-
- if pp_proxy_tensors:
- for key in self.pp_proxy_tensors.keys():
- dim = pp_proxy_tensors[key].shape[0]
- self.pp_proxy_tensors[key][:dim].copy_(pp_proxy_tensors[key])
-
- if self.is_encoder_decoder:
- self.encoder_lens[:raw_bs].copy_(forward_batch.encoder_lens)
- if forward_batch.mrope_positions is not None:
- self.mrope_positions[:, :raw_bs].copy_(forward_batch.mrope_positions)
- if self.require_gathered_buffer:
- self.global_num_tokens_gpu.fill_(bs * self.num_tokens_per_bs)
- self.global_num_tokens_for_logprob_gpu.fill_(bs * self.num_tokens_per_bs)
- if enable_num_token_non_padded(self.model_runner.server_args):
- num_token_non_padded = forward_batch.num_token_non_padded
- if self.require_gathered_buffer:
- tokens_per_rank = bs // self.attn_tp_size * self.num_tokens_per_bs
- num_local_token_non_padded = torch.clamp(
- num_token_non_padded - tokens_per_rank * self.attn_tp_rank,
- min=0,
- max=tokens_per_rank,
- )
- self.num_token_non_padded.copy_(num_local_token_non_padded)
- else:
- self.num_token_non_padded.copy_(num_token_non_padded)
- if self.enable_two_batch_overlap:
- self.tbo_plugin.replay_prepare(
- forward_mode=self.capture_forward_mode,
- bs=bs,
- num_token_non_padded=len(forward_batch.input_ids),
- spec_info=forward_batch.spec_info,
- )
- if forward_batch.forward_mode.is_idle() and forward_batch.spec_info is not None:
- forward_batch.spec_info.custom_mask = self.custom_mask
- # Attention backend
- self.model_runner.attn_backend.init_forward_metadata_replay_cuda_graph(
- bs,
- self.req_pool_indices[:bs],
- self.seq_lens[:bs],
- forward_batch.seq_lens_sum + (bs - raw_bs) * self.seq_len_fill_value,
- self.encoder_lens[:bs] if self.is_encoder_decoder else None,
- self.capture_forward_mode,
- forward_batch.spec_info,
- seq_lens_cpu=seq_lens_cpu,
- )
-
- # Store fields
- self.raw_bs = raw_bs
- self.raw_num_token = raw_num_token
- self.bs = bs
-
- def replay(
- self,
- forward_batch: ForwardBatch,
- skip_attn_backend_init: bool = False,
- pp_proxy_tensors: Optional[PPProxyTensors] = None,
- ) -> Union[LogitsProcessorOutput, PPProxyTensors]:
- if not skip_attn_backend_init:
- self.replay_prepare(forward_batch, pp_proxy_tensors)
- else:
- # In speculative decoding, these two fields are still needed.
- self.input_ids[: self.raw_num_token].copy_(forward_batch.input_ids)
- self.positions[: self.raw_num_token].copy_(forward_batch.positions)
-
- # Replay
- self.graphs[self.bs].replay()
-
- output = self.output_buffers[self.bs]
- if isinstance(output, LogitsProcessorOutput):
- return LogitsProcessorOutput(
- next_token_logits=output.next_token_logits[: self.raw_num_token],
- hidden_states=(
- output.hidden_states[: self.raw_num_token]
- if output.hidden_states is not None
- else None
- ),
- )
- else:
- assert isinstance(output, PPProxyTensors)
- return PPProxyTensors({k: v[: self.bs] for k, v in output.tensors.items()})
-
- def get_spec_info(self, num_tokens: int):
- spec_info = None
- if self.model_runner.spec_algorithm.is_eagle():
- from sglang.srt.speculative.eagle_utils import EagleVerifyInput
-
- if self.model_runner.is_draft_worker:
- raise RuntimeError("This should not happen.")
- else:
- spec_info = EagleVerifyInput(
- draft_token=None,
- custom_mask=self.custom_mask,
- positions=None,
- retrive_index=None,
- retrive_next_token=None,
- retrive_next_sibling=None,
- retrive_cum_len=None,
- spec_steps=self.model_runner.server_args.speculative_num_steps,
- topk=self.model_runner.server_args.speculative_eagle_topk,
- draft_token_num=self.model_runner.server_args.speculative_num_draft_tokens,
- capture_hidden_mode=CaptureHiddenMode.FULL,
- seq_lens_sum=None,
- seq_lens_cpu=None,
- )
-
- return spec_info
-
-
-GRAPH_CAPTURE_FAILED_MSG = (
- "Possible solutions:\n"
- "1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
- "2. set --cuda-graph-max-bs to a smaller value (e.g., 16)\n"
- "3. disable torch compile by not using --enable-torch-compile\n"
- "4. disable CUDA graph by --disable-cuda-graph. (Not recommended. Huge performance loss)\n"
- "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
-)
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 41b9ce93fa2..d90190d7f80 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -20,6 +20,7 @@
import logging
import os
import time
+from collections import defaultdict
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union
@@ -32,6 +33,7 @@
from sglang.srt.configs.update_config import adjust_config_with_unaligned_cpu_tp
from sglang.srt.constants import GPU_MEMORY_TYPE_WEIGHTS
from sglang.srt.distributed import (
+ get_pp_group,
get_tp_group,
get_world_group,
init_distributed_environment,
@@ -53,6 +55,7 @@
set_global_expert_location_metadata,
)
from sglang.srt.eplb.expert_location_updater import ExpertLocationUpdater
+from sglang.srt.hf_transformers_utils import get_context_length, update_context_length
from sglang.srt.layers.attention.tbo_backend import TboAttnBackend
from sglang.srt.layers.dp_attention import (
get_attention_tp_group,
@@ -66,7 +69,6 @@
)
from sglang.srt.layers.sampler import Sampler
from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model
-from sglang.srt.layers.utils import is_sm100_supported
from sglang.srt.lora.lora_manager import LoRAManager
from sglang.srt.lora.lora_registry import LoRARef
from sglang.srt.managers.schedule_batch import (
@@ -80,15 +82,19 @@
TokenToKVPoolAllocator,
)
from sglang.srt.mem_cache.allocator_ascend import AscendPagedTokenToKVPoolAllocator
+from sglang.srt.mem_cache.hip_offload_kv_pool_mha import MHATokenToHiPOffloadKVPool
from sglang.srt.mem_cache.memory_pool import (
AscendMLAPagedTokenToKVPool,
AscendTokenToKVPool,
DoubleSparseTokenToKVPool,
+ HybridLinearKVPool,
+ HybridReqToTokenPool,
MHATokenToKVPool,
MLATokenToKVPool,
ReqToTokenPool,
SWAKVPool,
)
+from sglang.srt.model_executor.cpu_graph_runner import CPUGraphRunner
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
from sglang.srt.model_executor.npu_graph_runner import NPUGraphRunner
@@ -96,6 +102,11 @@
from sglang.srt.model_loader.loader import DefaultModelLoader, get_model_loader
from sglang.srt.model_loader.utils import set_default_torch_dtype
from sglang.srt.model_loader.weight_utils import default_weight_loader
+from sglang.srt.offloader import (
+ create_offloader_from_server_args,
+ get_offloader,
+ set_offloader,
+)
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from sglang.srt.server_args import ServerArgs
@@ -116,9 +127,9 @@
is_hopper_with_cuda_12_3,
is_no_spec_infer_or_topk_one,
is_npu,
+ is_sm100_supported,
monkey_patch_p2p_access_check,
monkey_patch_vllm_gguf_config,
- set_cpu_offload_max_bytes,
set_cuda_arch,
)
from sglang.srt.weight_sync.tensor_bucket import (
@@ -168,6 +179,7 @@ def __init__(
pp_size: int,
nccl_port: int,
server_args: ServerArgs,
+ dp_rank: Optional[int] = None,
is_draft_worker: bool = False,
req_to_token_pool: Optional[ReqToTokenPool] = None,
token_to_kv_pool_allocator: Optional[BaseTokenToKVPoolAllocator] = None,
@@ -222,9 +234,6 @@ def __init__(
}
)
- # CPU offload
- set_cpu_offload_max_bytes(int(server_args.cpu_offload_gb * 1024**3))
-
# Init OpenMP threads binding for CPU
if self.device == "cpu":
self.init_threads_binding()
@@ -232,6 +241,9 @@ def __init__(
# Get memory before model loading
min_per_gpu_memory = self.init_torch_distributed()
+ # CPU offload
+ set_offloader(create_offloader_from_server_args(server_args, dp_rank=dp_rank))
+
# Update deep gemm configure
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
deep_gemm_wrapper.update_deep_gemm_config(gpu_id, server_args)
@@ -295,6 +307,26 @@ def initialize(self, min_per_gpu_memory: float):
if architectures and not any("Llama4" in arch for arch in architectures):
self.is_hybrid = self.model_config.is_hybrid = True
+ if self.is_hybrid_gdn:
+ logger.warning("Hybrid GDN model detected, disable radix cache")
+ self.server_args.disable_radix_cache = True
+ self.server_args.attention_backend = "hybrid_linear_attn"
+ if self.server_args.max_mamba_cache_size is None:
+ if self.server_args.max_running_requests is not None:
+ self.server_args.max_mamba_cache_size = (
+ self.server_args.max_running_requests
+ )
+ else:
+ self.server_args.max_mamba_cache_size = 512
+ self.server_args.max_mamba_cache_size = (
+ self.server_args.max_mamba_cache_size
+ // (
+ self.server_args.dp_size
+ if self.server_args.enable_dp_attention
+ else 1
+ )
+ )
+
# For MTP models like DeepSeek-V3 or GLM-4.5, the MTP layer(s) are used separately as draft
# models for speculative decoding. In those cases, `num_nextn_predict_layers` is used to
# determine the number of layers.
@@ -302,7 +334,10 @@ def initialize(self, min_per_gpu_memory: float):
model_num_layers = (
self.model_config.num_nextn_predict_layers
if self.is_draft_worker and model_has_mtp_layers
- else self.model_config.num_hidden_layers
+ else max(
+ self.model_config.num_hidden_layers,
+ self.model_config.num_attention_layers,
+ )
)
self.start_layer = getattr(self.model, "start_layer", 0)
self.end_layer = getattr(self.model, "end_layer", model_num_layers)
@@ -333,6 +368,14 @@ def initialize(self, min_per_gpu_memory: float):
if server_args.enable_lora:
self.init_lora_manager()
+ # Init Double Sparsity
+ if server_args.enable_double_sparsity:
+ if server_args.ds_heavy_channel_type is None:
+ raise ValueError(
+ "Please specify the heavy channel type for double sparsity optimization."
+ )
+ self.init_double_sparsity_channel_config(server_args.ds_heavy_channel_type)
+
# Init memory pool and attention backends
self.init_memory_pool(
min_per_gpu_memory,
@@ -343,12 +386,12 @@ def initialize(self, min_per_gpu_memory: float):
self.init_cublas()
self.init_attention_backend()
self.init_device_graphs()
- elif self.device == "npu":
+ elif self.device in ["npu", "cpu"]:
self.init_attention_backend()
self.init_device_graphs()
else:
self.graph_runner = None
- self.cuda_graph_mem_usage = 0
+ self.graph_mem_usage = 0
self.init_attention_backend()
# auxiliary hidden capture mode. TODO: expose this to server args?
@@ -466,6 +509,7 @@ def model_specific_adjustment(self):
"triton",
"flashmla",
"cutlass_mla",
+ "hip_attention",
"trtllm_mla",
"ascend",
]:
@@ -498,11 +542,10 @@ def model_specific_adjustment(self):
)
server_args.attention_backend = "triton"
server_args.disable_cuda_graph = True
- if server_args.ds_heavy_channel_type is None:
- raise ValueError(
- "Please specify the heavy channel type for double sparsity optimization."
- )
- self.init_double_sparsity_channel_config(server_args.ds_heavy_channel_type)
+
+ elif server_args.enable_hip_attention:
+ logger.info("HIP attention is turned on.")
+ server_args.attention_backend = "hip_attention"
if self.is_multimodal:
if not self.is_multimodal_chunked_prefill_supported:
@@ -514,8 +557,16 @@ def model_specific_adjustment(self):
if not self.use_mla_backend:
server_args.disable_chunked_prefix_cache = True
- elif self.page_size > 1:
- logger.info("Disable chunked prefix cache when page size > 1.")
+ # TODO(kaixih@nvidia): remove this once we have a better solution for DP attention.
+ # For more details, see: https://github.com/sgl-project/sglang/issues/8616
+ elif (
+ self.dp_size > 1
+ and is_sm100_supported()
+ and server_args.attention_backend != "triton"
+ ):
+ logger.info(
+ "Disable chunked prefix cache when dp size > 1 and attention backend is not triton."
+ )
server_args.disable_chunked_prefix_cache = True
if not server_args.disable_chunked_prefix_cache:
@@ -588,6 +639,11 @@ def init_torch_distributed(self):
# Set local size to hint SGLang to use shared memory based AllReduce
os.environ["LOCAL_SIZE"] = str(self.tp_size)
torch.ops.sgl_kernel.initialize(self.tp_size, self.tp_rank)
+
+ @torch.library.register_fake("sgl_kernel::shm_allgather")
+ def _(data, dim):
+ return torch.cat([data] * self.tp_size, dim=dim)
+
else:
logger.warning(
"init_cpu_threads_env and shared memory based AllReduce is disabled since intel amx backend is not available"
@@ -620,6 +676,7 @@ def init_torch_distributed(self):
cpu_group=get_world_group().cpu_group,
)
self.tp_group = get_tp_group()
+ self.pp_group = get_pp_group()
self.attention_tp_group = get_attention_tp_group()
# Check memory for tensor parallelism
@@ -631,11 +688,11 @@ def init_torch_distributed(self):
"The memory capacity is unbalanced. Some GPUs may be occupied by other processes. "
f"{min_per_gpu_memory=}, {local_gpu_memory=}, {local_gpu_memory * 0.9=}"
)
- else:
- raise ValueError(
- "The memory capacity is unbalanced. Some GPUs may be occupied by other processes. "
- f"{min_per_gpu_memory=}, {local_gpu_memory=}, {local_gpu_memory * 0.9=}"
- )
+ # else:
+ # raise ValueError(
+ # "The memory capacity is unbalanced. Some GPUs may be occupied by other processes. "
+ # f"{min_per_gpu_memory=}, {local_gpu_memory=}, {local_gpu_memory * 0.9=}"
+ # )
logger.info(
f"Init torch distributed ends. mem usage={(before_avail_memory - local_gpu_memory):.2f} GB"
@@ -676,6 +733,42 @@ def load_model(self):
if self.server_args.load_format == "gguf":
monkey_patch_vllm_gguf_config()
+ if self.server_args.enable_hip_attention:
+ if hasattr(self.model_config.hf_config, "text_config"):
+ orig_context_length = get_context_length(
+ self.model_config.hf_config.text_config
+ )
+ new_context_length = (
+ max(orig_context_length, self.server_args.context_length)
+ if self.server_args.context_length is not None
+ else orig_context_length
+ )
+ if self.server_args.context_length is None:
+ new_context_length = orig_context_length
+ update_context_length(self.model_config.hf_config, new_context_length)
+ update_context_length(
+ self.model_config.hf_config.text_config, new_context_length
+ )
+ self.model_config.hf_config.orig_context_len = orig_context_length
+ self.model_config.hf_config.text_config.orig_context_len = (
+ orig_context_length
+ )
+ else:
+ orig_context_length = get_context_length(self.model_config.hf_config)
+ new_context_length = (
+ max(orig_context_length, self.server_args.context_length)
+ if self.server_args.context_length is not None
+ else orig_context_length
+ )
+ if self.server_args.context_length is None:
+ new_context_length = orig_context_length
+ update_context_length(self.model_config.hf_config, new_context_length)
+ self.model_config.hf_config.orig_context_len = orig_context_length
+ logger.info(
+ f"Update model config for HiP context extension "
+ f"{orig_context_length} -> {new_context_length}."
+ )
+
# Load the model
# Remove monkey_patch when linear.py quant remove dependencies with vllm
monkey_patch_vllm_parallel_state()
@@ -690,6 +783,8 @@ def load_model(self):
monkey_patch_vllm_parallel_state(reverse=True)
monkey_patch_isinstance_for_vllm_base_layer(reverse=True)
+ get_offloader().post_init()
+
if self.server_args.kv_cache_dtype == "fp8_e4m3":
if self.server_args.quantization_param_path is not None:
if callable(getattr(self.model, "load_kv_cache_scales", None)):
@@ -713,6 +808,23 @@ def load_model(self):
"This may lead to less accurate results!"
)
+ if self.server_args.enable_hip_attention:
+ model_supports_hip_attention = getattr(
+ self.model, "hip_attention_supported", False
+ )
+ if self.server_args.hip_attention_config.using_extend:
+ if not model_supports_hip_attention:
+ raise RuntimeError(
+ "Model does not support HiP attention context length extension. "
+ "Try disabling context extension in --hip-attention-config."
+ )
+ if self.server_args.enable_hip_kv_cache_offload:
+ if not model_supports_hip_attention:
+ raise RuntimeError(
+ "Model does not support HiP attention KV cache offloading. "
+ "Try disabling --enable-hip-kv-cache-offload."
+ )
+
# Parse other args
self.sliding_window_size = None
if hasattr(self.model, "get_attention_sliding_window_size"):
@@ -1050,6 +1162,8 @@ def profile_max_num_token(self, total_gpu_memory: int):
"num_nextn_predict_layers",
self.num_effective_layers,
)
+ elif self.is_hybrid_gdn:
+ num_layers = len(self.model_config.hf_config.full_attention_layer_ids)
else:
num_layers = self.num_effective_layers
if self.use_mla_backend:
@@ -1069,9 +1183,22 @@ def profile_max_num_token(self, total_gpu_memory: int):
rest_memory = available_gpu_memory - total_gpu_memory * (
1 - self.mem_fraction_static
)
+ if self.is_hybrid_gdn:
+ rest_memory -= (
+ self.server_args.max_mamba_cache_size
+ * self.model_config.hf_config.mamba_cache_per_req
+ / (1 << 30)
+ )
max_num_token = int(rest_memory * (1 << 30) // cell_size)
return max_num_token
+ @property
+ def is_hybrid_gdn(self):
+ return self.model_config.hf_config.architectures[0] in [
+ "Qwen3NextForCausalLM",
+ "Qwen3NextForCausalLMMTP",
+ ]
+
def set_num_token_hybrid(self):
if (
"Llama4ForConditionalGeneration"
@@ -1192,6 +1319,8 @@ def init_memory_pool(
),
4096,
)
+ if self.is_hybrid_gdn:
+ max_num_reqs = min(max_num_reqs, self.server_args.max_mamba_cache_size)
if not self.spec_algorithm.is_none():
if self.is_draft_worker:
@@ -1217,19 +1346,34 @@ def init_memory_pool(
self.server_args.max_num_reqs = max_num_reqs
if max_total_tokens is not None:
- if max_total_tokens > self.max_total_num_tokens:
- logging.warning(
- f"max_total_tokens={max_total_tokens} is larger than the profiled value "
- f"{self.max_total_num_tokens}. "
- f"Use the profiled value instead."
+ if self.server_args.enable_hip_kv_cache_offload:
+ self.max_total_num_tokens = max_total_tokens
+ else:
+ if max_total_tokens > self.max_total_num_tokens:
+ logging.warning(
+ f"max_total_tokens={max_total_tokens} is larger than the profiled value "
+ f"{self.max_total_num_tokens}. "
+ f"Use the profiled value instead."
+ )
+ self.max_total_num_tokens = min(
+ self.max_total_num_tokens, max_total_tokens
)
- self.max_total_num_tokens = min(self.max_total_num_tokens, max_total_tokens)
self.max_total_num_tokens = (
self.max_total_num_tokens
// self.server_args.page_size
* self.server_args.page_size
)
+ # different pp rank may have different num of layers, so we need to reduce the max_total_num_tokens
+ if self.pp_size > 1:
+ tensor = torch.tensor(self.max_total_num_tokens, dtype=torch.int64)
+ torch.distributed.all_reduce(
+ tensor,
+ op=torch.distributed.ReduceOp.MIN,
+ group=get_world_group().cpu_group,
+ )
+ self.max_total_num_tokens = tensor.item()
+
# create token size for hybrid cache
if self.is_hybrid:
self.set_num_token_hybrid()
@@ -1241,6 +1385,11 @@ def init_memory_pool(
# Initialize req_to_token_pool
if self.req_to_token_pool is None:
+ # FIXME(lsyin): this is the temporary fix for the context length issue when using speculative decoding
+ extra_max_context_len = 4
+ if self.server_args.speculative_num_draft_tokens is not None:
+ extra_max_context_len += self.server_args.speculative_num_draft_tokens
+
if self.server_args.disaggregation_mode == "decode":
from sglang.srt.disaggregation.decode import DecodeReqToTokenPool
@@ -1249,15 +1398,39 @@ def init_memory_pool(
pre_alloc_size = max_num_reqs * 2 if max_num_reqs <= 32 else 0
self.req_to_token_pool = DecodeReqToTokenPool(
size=max_num_reqs,
- max_context_len=self.model_config.context_len + 4,
+ max_context_len=self.model_config.context_len
+ + extra_max_context_len,
device=self.device,
enable_memory_saver=self.server_args.enable_memory_saver,
pre_alloc_size=pre_alloc_size,
)
+ elif self.is_hybrid_gdn:
+ config = self.model_config.hf_config
+ (
+ conv_state_shape,
+ temporal_state_shape,
+ conv_dtype,
+ ssm_dtype,
+ mamba_layers,
+ ) = config.hybrid_gdn_params
+ self.req_to_token_pool = HybridReqToTokenPool(
+ size=max_num_reqs,
+ max_context_len=self.model_config.context_len
+ + extra_max_context_len,
+ device=self.device,
+ enable_memory_saver=self.server_args.enable_memory_saver,
+ conv_state_shape=conv_state_shape,
+ temporal_state_shape=temporal_state_shape,
+ conv_dtype=conv_dtype,
+ ssm_dtype=ssm_dtype,
+ mamba_layers=mamba_layers,
+ speculative_num_draft_tokens=self.server_args.speculative_num_draft_tokens,
+ )
else:
self.req_to_token_pool = ReqToTokenPool(
size=max_num_reqs,
- max_context_len=self.model_config.context_len + 4,
+ max_context_len=self.model_config.context_len
+ + extra_max_context_len,
device=self.device,
enable_memory_saver=self.server_args.enable_memory_saver,
)
@@ -1320,6 +1493,77 @@ def init_memory_pool(
start_layer=self.start_layer,
end_layer=self.end_layer,
)
+ elif (
+ self.server_args.enable_hip_attention
+ and self.server_args.enable_hip_kv_cache_offload
+ ):
+ if self.model_config.attention_chunk_size is not None:
+ # NOTE: this should handle only llama4, for now.
+ if self.model_config.hf_config.architectures[0] not in [
+ "Llama4ForConditionalGeneration",
+ ]:
+ raise RuntimeError(
+ f"Unsupported model for chunked attention with HiP Attention: {self.model_config.hf_config.architectures[0]}"
+ )
+
+ num_layers = self.model_config.num_hidden_layers
+ attention_chunk_size = self.model_config.attention_chunk_size
+
+ mask_factors = []
+ mask_sizes = []
+ sa_factors = []
+ sa_sizes = []
+
+ irope_offset = 1
+ irope_interval = 4
+
+ for layer_id in range(num_layers):
+ use_rope = (layer_id + irope_offset) % irope_interval != 0
+ if use_rope:
+ # Chunked attention
+ mask_factors.append(None)
+ mask_sizes.append(1)
+ sa_factors.append(None)
+ sa_sizes.append(int(attention_chunk_size * 1.5))
+ else:
+ # NoPE attention
+ mask_factors.append(self.server_args.hip_max_mask_cache_factor)
+ mask_sizes.append(self.server_args.hip_max_mask_cache_size)
+ sa_factors.append(self.server_args.hip_max_sa_cache_factor)
+ sa_sizes.append(self.server_args.hip_max_sa_cache_size)
+
+ self.token_to_kv_pool = MHATokenToHiPOffloadKVPool(
+ max_token_size=self.max_total_num_tokens,
+ max_mask_cache_factor=mask_factors,
+ max_mask_cache_size=mask_sizes,
+ max_sa_cache_factor=sa_factors,
+ max_sa_cache_size=sa_sizes,
+ dtype=self.kv_cache_dtype,
+ head_num=self.model_config.get_num_kv_heads(self.tp_size),
+ head_dim=self.model_config.head_dim,
+ layer_num=self.model_config.num_hidden_layers,
+ device=torch.device(self.gpu_id),
+ hip_config=self.server_args.hip_attention_config,
+ chunked_attention_size=attention_chunk_size,
+ irope_offset=irope_offset,
+ irope_interval=irope_interval,
+ enable_memory_saver=self.server_args.enable_memory_saver,
+ )
+ else:
+ self.token_to_kv_pool = MHATokenToHiPOffloadKVPool(
+ max_token_size=self.max_total_num_tokens,
+ max_mask_cache_factor=self.server_args.hip_max_mask_cache_factor,
+ max_mask_cache_size=self.server_args.hip_max_mask_cache_size,
+ max_sa_cache_factor=self.server_args.hip_max_sa_cache_factor,
+ max_sa_cache_size=self.server_args.hip_max_sa_cache_size,
+ dtype=self.kv_cache_dtype,
+ head_num=self.model_config.get_num_kv_heads(self.tp_size),
+ head_dim=self.model_config.head_dim,
+ layer_num=self.model_config.num_hidden_layers,
+ device=torch.device(self.gpu_id),
+ hip_config=self.server_args.hip_attention_config,
+ enable_memory_saver=self.server_args.enable_memory_saver,
+ )
else:
if self.is_hybrid:
self.token_to_kv_pool = SWAKVPool(
@@ -1335,6 +1579,23 @@ def init_memory_pool(
enable_kvcache_transpose=False,
device=self.device,
)
+ elif self.is_hybrid_gdn:
+ self.token_to_kv_pool = HybridLinearKVPool(
+ size=self.max_total_num_tokens,
+ dtype=self.kv_cache_dtype,
+ head_num=self.model_config.get_num_kv_heads(
+ get_attention_tp_size()
+ ),
+ head_dim=self.model_config.head_dim,
+ # if draft worker, we only need 1 attention layer's kv pool
+ full_attention_layer_ids=(
+ [0]
+ if self.is_draft_worker
+ else self.model_config.hf_config.full_attention_layer_ids
+ ),
+ enable_kvcache_transpose=False,
+ device=self.device,
+ )
else:
self.token_to_kv_pool = MHATokenToKVPool(
self.max_total_num_tokens,
@@ -1353,11 +1614,6 @@ def init_memory_pool(
# Initialize token_to_kv_pool_allocator
need_sort = self.server_args.disaggregation_mode in ("decode", "prefill")
- max_num_extend_tokens = (
- self.server_args.chunked_prefill_size
- if self.server_args.chunked_prefill_size > 0
- else self.server_args.max_prefill_tokens
- )
if self.token_to_kv_pool_allocator is None:
if self.server_args.attention_backend == "ascend":
self.token_to_kv_pool_allocator = AscendPagedTokenToKVPoolAllocator(
@@ -1396,11 +1652,25 @@ def init_memory_pool(
device=self.device,
kvcache=self.token_to_kv_pool,
need_sort=need_sort,
- max_num_extend_tokens=max_num_extend_tokens,
)
else:
assert self.is_draft_worker
+ self.hip_metadata_cache_pool = None
+ if self.server_args.enable_hip_attention:
+ from hip_attn.v1_2 import HiPMetadataCachePool
+
+ self.hip_metadata_cache_pool = HiPMetadataCachePool(
+ self.max_total_num_tokens,
+ query_head_num=(
+ self.model_config.num_attention_heads // self.server_args.tp_size
+ ),
+ layer_num=self.model_config.num_hidden_layers,
+ context_length=self.model_config.context_len,
+ device=self.device,
+ hip_config=self.server_args.hip_attention_config,
+ )
+
logger.info(
f"Memory pool end. "
f"avail mem={get_available_gpu_memory(self.device, self.gpu_id):.2f} GB"
@@ -1435,14 +1705,12 @@ def _get_attention_backend(self):
else self.server_args.attention_backend
)
if self.decode_attention_backend_str != self.prefill_attention_backend_str:
- assert (
- self.server_args.speculative_algorithm is None
- ), "Currently HybridAttentionBackend does not support speculative decoding."
from sglang.srt.layers.attention.hybrid_attn_backend import (
HybridAttnBackend,
)
attn_backend = HybridAttnBackend(
+ self,
decode_backend=self._get_attention_backend_from_str(
self.decode_attention_backend_str
),
@@ -1473,7 +1741,11 @@ def _get_attention_backend(self):
return attn_backend
def _get_attention_backend_from_str(self, backend_str: str):
- if backend_str == "flashinfer":
+ if backend_str == "hip_attention":
+ from sglang.srt.layers.attention.hip_attention import HiPAttentionBackend
+
+ return HiPAttentionBackend(self)
+ elif backend_str == "flashinfer":
if not self.use_mla_backend:
from sglang.srt.layers.attention.flashinfer_backend import (
FlashInferAttnBackend,
@@ -1576,6 +1848,24 @@ def _get_attention_backend_from_str(self, backend_str: str):
)
return DualChunkFlashAttentionBackend(self)
+ elif backend_str == "hybrid_linear_attn":
+ assert (
+ self.is_hybrid_gdn
+ ), "hybrid_linear_attn backend can only be used with hybrid GDN models."
+ from sglang.srt.layers.attention.flashattention_backend import (
+ FlashAttentionBackend,
+ )
+ from sglang.srt.layers.attention.hybrid_linear_attn_backend import (
+ HybridLinearAttnBackend,
+ MambaAttnBackend,
+ )
+
+ full_attn_backend = FlashAttentionBackend(self)
+ linear_attn_backend = MambaAttnBackend(self)
+ full_attn_layers = self.model_config.hf_config.full_attention_layer_ids
+ return HybridLinearAttnBackend(
+ full_attn_backend, linear_attn_backend, full_attn_layers
+ )
else:
raise ValueError(f"Invalid attention backend: {backend_str}")
@@ -1597,38 +1887,46 @@ def init_double_sparsity_channel_config(self, selected_channel):
)
def init_device_graphs(self):
- """Capture cuda graphs."""
+ """Capture device graphs."""
self.graph_runner = None
- self.cuda_graph_mem_usage = 0
+ self.graph_mem_usage = 0
if not self.is_generation:
# TODO: Currently, cuda graph only captures decode steps, which only exists for generation models
return
- if self.server_args.disable_cuda_graph:
+ if self.device != "cpu" and self.server_args.disable_cuda_graph:
+ return
+
+ if self.device == "cpu" and not self.server_args.enable_torch_compile:
return
tic = time.perf_counter()
before_mem = get_available_gpu_memory(self.device, self.gpu_id)
logger.info(
- f"Capture cuda graph begin. This can take up to several minutes. avail mem={before_mem:.2f} GB"
+ f"Capture {'cpu graph' if self.device == 'cpu' else 'cuda graph'} begin. This can take up to several minutes. avail mem={before_mem:.2f} GB"
)
- self.graph_runner = (
- CudaGraphRunner(self) if not _is_npu else NPUGraphRunner(self)
+ graph_runners = defaultdict(
+ lambda: CudaGraphRunner,
+ {
+ "cpu": CPUGraphRunner,
+ "npu": NPUGraphRunner,
+ },
)
+ self.graph_runner = graph_runners[self.device](self)
+
after_mem = get_available_gpu_memory(self.device, self.gpu_id)
- self.cuda_graph_mem_usage = before_mem - after_mem
+ self.graph_mem_usage = before_mem - after_mem
logger.info(
- f"Capture cuda graph end. Time elapsed: {time.perf_counter() - tic:.2f} s. "
- f"mem usage={self.cuda_graph_mem_usage:.2f} GB. avail mem={after_mem:.2f} GB."
+ f"Capture {'cpu graph' if self.device == 'cpu' else 'cuda graph'} end. Time elapsed: {time.perf_counter() - tic:.2f} s. "
+ f"mem usage={self.graph_mem_usage:.2f} GB. avail mem={after_mem:.2f} GB."
)
def init_threads_binding(self):
omp_cpuids = os.environ.get("SGLANG_CPU_OMP_THREADS_BIND", "all")
+ cpu_ids_by_node = get_cpu_ids_by_node()
+ n_numa_node = len(cpu_ids_by_node)
if omp_cpuids == "all":
- cpu_ids_by_node = get_cpu_ids_by_node()
- n_numa_node = len(cpu_ids_by_node)
-
assert self.tp_size <= n_numa_node, (
f"SGLANG_CPU_OMP_THREADS_BIND is not set, in this case, "
f"tp_size {self.tp_size} should be smaller than or equal to number of numa node on the machine {n_numa_node}. "
@@ -1645,11 +1943,22 @@ def init_threads_binding(self):
)
self.local_omp_cpuid = cpu_ids_by_node[self.tp_rank]
else:
- self.local_omp_cpuid = omp_cpuids.split("|")[self.tp_rank]
+ threads_bind_list = omp_cpuids.split("|")
+ assert self.tp_size == len(threads_bind_list), (
+ f"SGLANG_CPU_OMP_THREADS_BIND setting must be aligned with TP size parameter ({self.tp_size}). "
+ f"Please double check your settings."
+ )
+ self.local_omp_cpuid = threads_bind_list[self.tp_rank]
+ if self.tp_size > n_numa_node:
+ logger.warning(
+ f"TP size ({self.tp_size})is larger than numa node number ({n_numa_node}), "
+ f"in this case the available memory amount of each rank cannot be determined in prior. "
+ f"Please set proper `--max-total-tokens` to avoid the out-of-memory error."
+ )
def apply_torch_tp(self):
logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.")
- from sglang.srt.model_parallel import tensor_parallel
+ from sglang.srt.layers.model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,))
tensor_parallel(self.model, device_mesh)
@@ -1765,18 +2074,24 @@ def _forward_raw(
reinit_attn_backend: bool = False,
split_forward_count: int = 1,
) -> Tuple[Union[LogitsProcessorOutput, PPProxyTensors], bool]:
- can_run_cuda_graph = bool(
- forward_batch.forward_mode.is_cuda_graph()
+ mode_check = (
+ forward_batch.forward_mode.is_cpu_graph
+ if self.device == "cpu"
+ else forward_batch.forward_mode.is_cuda_graph
+ )
+ can_run_graph = bool(
+ mode_check()
and self.graph_runner
and self.graph_runner.can_run(forward_batch)
)
- if can_run_cuda_graph:
+
+ if can_run_graph:
ret = self.graph_runner.replay(
forward_batch,
skip_attn_backend_init=skip_attn_backend_init,
pp_proxy_tensors=pp_proxy_tensors,
)
- return ret, can_run_cuda_graph
+ return ret, can_run_graph
# For MLP sync
if forward_batch.global_num_tokens_cpu is not None:
@@ -1805,10 +2120,13 @@ def _forward_raw(
else:
raise ValueError(f"Invalid forward mode: {forward_batch.forward_mode}")
- if forward_batch.global_num_tokens_cpu is not None:
+ if (
+ forward_batch.global_num_tokens_cpu is not None
+ and self.pp_group.is_last_rank
+ ):
forward_batch.post_forward_mlp_sync_batch(ret)
- return ret, can_run_cuda_graph
+ return ret, can_run_graph
def _preprocess_logits(
self, logits_output: LogitsProcessorOutput, sampling_info: SamplingBatchInfo
diff --git a/python/sglang/srt/model_executor/npu_graph_runner.py b/python/sglang/srt/model_executor/npu_graph_runner.py
index 582b5b7c612..0ff19d58275 100644
--- a/python/sglang/srt/model_executor/npu_graph_runner.py
+++ b/python/sglang/srt/model_executor/npu_graph_runner.py
@@ -17,11 +17,11 @@
import logging
import threading
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional, Union
import torch
-from sglang.srt.model_executor.graph_runner import GraphRunner
+from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
logger = logging.getLogger(__name__)
@@ -32,7 +32,7 @@
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
-class NPUGraphRunner(GraphRunner):
+class NPUGraphRunner(CudaGraphRunner):
"""A NPUGraphRunner runs the forward pass of a model with npu graph and torch.compile."""
def __init__(self, model_runner: ModelRunner):
diff --git a/python/sglang/srt/model_loader/__init__.py b/python/sglang/srt/model_loader/__init__.py
index fa2386e3a4b..63f110204ba 100644
--- a/python/sglang/srt/model_loader/__init__.py
+++ b/python/sglang/srt/model_loader/__init__.py
@@ -1,16 +1,22 @@
# Adapted from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/model_executor/model_loader/__init__.py
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
from torch import nn
-from sglang.srt.configs.device_config import DeviceConfig
-from sglang.srt.configs.load_config import LoadConfig
-from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.model_loader.loader import BaseModelLoader, get_model_loader
from sglang.srt.model_loader.utils import (
get_architecture_class_name,
get_model_architecture,
)
+if TYPE_CHECKING:
+ from sglang.srt.configs.device_config import DeviceConfig
+ from sglang.srt.configs.load_config import LoadConfig
+ from sglang.srt.configs.model_config import ModelConfig
+
def get_model(
*,
diff --git a/python/sglang/srt/model_loader/loader.py b/python/sglang/srt/model_loader/loader.py
index 95d41a05018..d2b4c6bfcc7 100644
--- a/python/sglang/srt/model_loader/loader.py
+++ b/python/sglang/srt/model_loader/loader.py
@@ -1,5 +1,7 @@
# Adapted from https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/model_executor/model_loader/loader.py
+from __future__ import annotations
+
# ruff: noqa: SIM117
import collections
import concurrent
@@ -14,7 +16,17 @@
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
-from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, cast
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Dict,
+ Generator,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ cast,
+)
import huggingface_hub
import numpy as np
@@ -26,9 +38,7 @@
from transformers import AutoModelForCausalLM
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
-from sglang.srt.configs.device_config import DeviceConfig
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
-from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.connector import (
ConnectorType,
create_remote_connector,
@@ -39,9 +49,9 @@
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
-from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.model_loader.utils import (
get_model_architecture,
+ post_load_weights,
set_default_torch_dtype,
)
from sglang.srt.model_loader.weight_utils import (
@@ -69,6 +79,11 @@
set_weight_attrs,
)
+if TYPE_CHECKING:
+ from sglang.srt.configs.device_config import DeviceConfig
+ from sglang.srt.configs.model_config import ModelConfig
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
+
_is_npu = is_npu()
@@ -79,13 +94,19 @@ def device_loading_context(module: torch.nn.Module, target_device: torch.device)
yield module
return
- original_device_states: Dict[str, torch.device] = {}
+ original_infos: Dict[str, Dict] = {}
# Store original device states and move parameters to GPU if they're on CPU
for name, p in module.named_parameters():
if p.device.type == "cpu":
- original_device_states[name] = p.device
- p.data = p.data.to(target_device)
+ original_data = p.data
+ device_data = p.data.to(target_device)
+ original_infos[name] = dict(
+ device=p.device,
+ original_data=original_data,
+ device_data=device_data,
+ )
+ p.data = device_data
# Parameters already on target device are not touched
try:
@@ -95,9 +116,21 @@ def device_loading_context(module: torch.nn.Module, target_device: torch.device)
# Restore parameters to their original devices, ignoring new parameters
pin_memory = is_pin_memory_available()
for name, p in module.named_parameters():
- if name in original_device_states:
- original_device: torch.device = original_device_states[name]
- if original_device.type == "cpu":
+ if name in original_infos:
+ original_info = original_infos[name]
+ device_data = original_info["device_data"]
+ original_data = original_info["original_data"]
+ original_device: torch.device = original_info["device"]
+
+ if (
+ (device_data.device == p.data.device)
+ and (device_data.data_ptr() == p.data.data_ptr())
+ and (device_data.shape == p.data.shape)
+ and (device_data.dtype == p.data.dtype)
+ ):
+ original_data.copy_(p.data.to(original_data.device))
+ p.data = original_data
+ elif original_device.type == "cpu":
# `torch.empty_like` does not support `pin_memory` argument
cpu_data = torch.empty_strided(
size=p.data.size(),
@@ -582,18 +615,7 @@ def load_model(
# random values to the weights.
initialize_dummy_weights(model)
- # Model weight loading consists of two stages:
- # 1. Initial weight loading.
- # 2. Post-processing of weights, including assigning specific member variables.
- # For `dummy_init`, only the second stage is required.
- if hasattr(model, "post_load_weights"):
- if (
- model_config.hf_config.architectures[0]
- == "DeepseekV3ForCausalLMNextN"
- ):
- model.post_load_weights(is_nextn=True)
- else:
- model.post_load_weights()
+ post_load_weights(model, model_config)
return model.eval()
@@ -733,6 +755,9 @@ def load_model(
state_dict.pop(key)
if state_dict:
raise ValueError(f"Missing keys {tuple(state_dict)} in loaded state!")
+
+ post_load_weights(model, model_config)
+
return model.eval()
@staticmethod
@@ -1403,18 +1428,16 @@ def save_model(
# ignore hidden files
if file_name.startswith("."):
continue
- if os.path.splitext(file_name)[1] not in (
- ".bin",
- ".pt",
- ".safetensors",
- ):
+ if os.path.splitext(file_name)[1] in (".json", ".py"):
file_path = os.path.join(root, file_name)
with open(file_path, encoding="utf-8") as file:
file_content = file.read()
f_key = f"{model_name}/files/{file_name}"
client.setstr(f_key, file_content)
- def _load_model_from_remote_kv(self, model: nn.Module, client):
+ def _load_model_from_remote_kv(
+ self, model: nn.Module, model_config: ModelConfig, client
+ ):
for _, module in model.named_modules():
quant_method = getattr(module, "quant_method", None)
if quant_method is not None:
@@ -1442,6 +1465,8 @@ def _load_model_from_remote_kv(self, model: nn.Module, client):
if state_dict:
raise ValueError(f"Missing keys {tuple(state_dict)} in loaded state!")
+ post_load_weights(model, model_config)
+
def _load_model_from_remote_fs(
self, model, client, model_config: ModelConfig, device_config: DeviceConfig
) -> nn.Module:
@@ -1483,15 +1508,13 @@ def load_model(
with set_default_torch_dtype(model_config.dtype):
with torch.device(device_config.device):
model = _initialize_model(model_config, self.load_config)
- for _, module in model.named_modules():
- quant_method = getattr(module, "quant_method", None)
- if quant_method is not None:
- quant_method.process_weights_after_loading(module)
- with create_remote_connector(model_weights, device_config.device) as client:
+ with create_remote_connector(
+ model_weights, device=device_config.device
+ ) as client:
connector_type = get_connector_type(client)
if connector_type == ConnectorType.KV:
- self._load_model_from_remote_kv(model, client)
+ self._load_model_from_remote_kv(model, model_config, client)
elif connector_type == ConnectorType.FS:
self._load_model_from_remote_fs(
model, client, model_config, device_config
diff --git a/python/sglang/srt/model_loader/utils.py b/python/sglang/srt/model_loader/utils.py
index dfbbd154d62..f6ad79010c9 100644
--- a/python/sglang/srt/model_loader/utils.py
+++ b/python/sglang/srt/model_loader/utils.py
@@ -105,3 +105,15 @@ def get_model_architecture(model_config: ModelConfig) -> Tuple[Type[nn.Module],
def get_architecture_class_name(model_config: ModelConfig) -> str:
return get_model_architecture(model_config)[1]
+
+
+def post_load_weights(model: nn.Module, model_config: ModelConfig):
+ # Model weight loading consists of two stages:
+ # 1. Initial weight loading.
+ # 2. Post-processing of weights, including assigning specific member variables.
+ # For `dummy_init`, only the second stage is required.
+ if hasattr(model, "post_load_weights"):
+ if model_config.hf_config.architectures[0] == "DeepseekV3ForCausalLMNextN":
+ model.post_load_weights(is_nextn=True)
+ else:
+ model.post_load_weights()
diff --git a/python/sglang/srt/model_loader/weight_utils.py b/python/sglang/srt/model_loader/weight_utils.py
index a326e3f10aa..397d9e91358 100644
--- a/python/sglang/srt/model_loader/weight_utils.py
+++ b/python/sglang/srt/model_loader/weight_utils.py
@@ -35,6 +35,7 @@
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed import get_tensor_model_parallel_rank
+from sglang.srt.layers.dp_attention import get_attention_tp_rank
from sglang.srt.layers.quantization import QuantizationConfig, get_quantization_config
from sglang.srt.layers.quantization.modelopt_quant import ModelOptFp4Config
from sglang.srt.utils import print_warning_once
@@ -680,7 +681,7 @@ def sharded_weight_loader(shard_axis: int) -> LoaderFunction:
"""Create a weight loader that shards the weights along the given axis"""
def loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
- tp_rank = get_tensor_model_parallel_rank()
+ tp_rank = get_attention_tp_rank()
shard_size = param.data.shape[shard_axis]
start_idx = tp_rank * shard_size
diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
index 37274e45b30..1ee71337cf7 100644
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -17,6 +17,7 @@
"""Inference-only DeepseekV2 model."""
import concurrent.futures
+import copy
import logging
import os
from enum import IntEnum, auto
@@ -67,7 +68,10 @@
should_use_flashinfer_cutlass_moe_fp4_allgather,
)
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, get_moe_impl_class
-from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
+from sglang.srt.layers.moe.fused_moe_triton.layer import (
+ FusedMoE,
+ _is_fp4_quantization_enabled,
+)
from sglang.srt.layers.moe.topk import TopK
from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -87,8 +91,8 @@
block_dequant as int8_block_dequant,
)
from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.rotary_embedding import get_rope, get_rope_wrapper
-from sglang.srt.layers.utils import PPMissingLayer, get_layer_id, is_sm100_supported
+from sglang.srt.layers.rotary_embedding import get_rope_wrapper
+from sglang.srt.layers.utils import PPMissingLayer, get_layer_id
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
@@ -112,8 +116,11 @@
is_cpu,
is_cuda,
is_flashinfer_available,
+ is_gfx95_supported,
is_hip,
is_non_idle_and_non_empty,
+ is_npu,
+ is_sm100_supported,
log_info_on_rank0,
make_layers,
use_intel_amx_backend,
@@ -121,11 +128,28 @@
_is_hip = is_hip()
_is_cuda = is_cuda()
+_is_npu = is_npu()
_is_fp8_fnuz = is_fp8_fnuz()
_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
_is_cpu_amx_available = cpu_has_amx_support()
_is_cpu = is_cpu()
_device_sm = get_device_sm()
+_is_gfx95_supported = is_gfx95_supported()
+
+_use_aiter_gfx95 = _use_aiter and _is_gfx95_supported
+
+if _use_aiter_gfx95:
+ from sglang.srt.layers.quantization.quark.utils import quark_post_load_weights
+ from sglang.srt.layers.quantization.rocm_mxfp4_utils import (
+ batched_gemm_afp4wfp4_pre_quant,
+ fused_flatten_mxfp4_quant,
+ fused_rms_mxfp4_quant,
+ )
+ from sglang.srt.layers.rocm_linear_utils import (
+ aiter_dsv3_router_gemm,
+ fused_qk_rope_cat,
+ get_dsv3_gemm_output_zero_allocator_size,
+ )
if _is_cuda:
from sgl_kernel import (
@@ -173,6 +197,8 @@ class AttnForwardMethod(IntEnum):
# Use MLA with fused RoPE kernel for CPU
MLA_FUSED_ROPE_CPU = auto()
+ MHA_FROM_CACHE = auto()
+
class DeepseekV2MLP(nn.Module):
def __init__(
@@ -221,10 +247,21 @@ def forward(
forward_batch=None,
should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
+ gemm_output_zero_allocator: BumpAllocator = None,
):
if (self.tp_size == 1) and x.shape[0] == 0:
return x
+ if (
+ gemm_output_zero_allocator is not None
+ and x.shape[0] <= 256
+ and self.gate_up_proj.weight.dtype == torch.uint8
+ ):
+ y = gemm_output_zero_allocator.allocate(
+ x.shape[0] * self.gate_up_proj.output_size_per_partition
+ ).view(x.shape[0], self.gate_up_proj.output_size_per_partition)
+ x = (x, None, y)
+
gate_up, _ = self.gate_up_proj(x)
x = self.act_fn(gate_up)
x, _ = self.down_proj(
@@ -254,7 +291,7 @@ def __init__(
if _is_cpu and _is_cpu_amx_available:
self.quant_method = PackWeightMethod(weight_names=["weight"])
- def forward(self, hidden_states):
+ def forward(self, hidden_states, gemm_output_zero_allocator: BumpAllocator = None):
if use_intel_amx_backend(self):
return torch.ops.sgl_kernel.weight_packed_linear(
hidden_states,
@@ -272,7 +309,13 @@ def forward(self, hidden_states):
and _device_sm >= 90
):
# router gemm output float32
- logits = dsv3_router_gemm(hidden_states, self.weight)
+ logits = dsv3_router_gemm(
+ hidden_states, self.weight, out_dtype=torch.float32
+ )
+ elif _use_aiter_gfx95 and hidden_states.shape[0] <= 256:
+ logits = aiter_dsv3_router_gemm(
+ hidden_states, self.weight, gemm_output_zero_allocator
+ )
else:
logits = F.linear(hidden_states, self.weight, None)
@@ -319,18 +362,7 @@ def __init__(
config=config, prefix=add_prefix("gate", prefix), is_nextn=is_nextn
)
- self.topk = TopK(
- top_k=config.num_experts_per_tok + self.num_fused_shared_experts,
- renormalize=config.norm_topk_prob,
- use_grouped_topk=True,
- num_expert_group=config.n_group,
- num_fused_shared_experts=self.num_fused_shared_experts,
- topk_group=config.topk_group,
- correction_bias=self.gate.e_score_correction_bias,
- routed_scaling_factor=self.routed_scaling_factor,
- )
-
- self.experts = get_moe_impl_class()(
+ self.experts = get_moe_impl_class(quant_config)(
num_experts=config.n_routed_experts
+ self.num_fused_shared_experts
+ global_server_args_dict["ep_num_redundant_experts"],
@@ -344,6 +376,22 @@ def __init__(
prefix=add_prefix("experts", prefix),
)
+ correction_bias = self.gate.e_score_correction_bias
+ if _is_fp4_quantization_enabled():
+ correction_bias = correction_bias.to(torch.bfloat16)
+ self.topk = TopK(
+ top_k=config.num_experts_per_tok + self.num_fused_shared_experts,
+ renormalize=config.norm_topk_prob,
+ use_grouped_topk=True,
+ num_expert_group=config.n_group,
+ num_fused_shared_experts=self.num_fused_shared_experts,
+ topk_group=config.topk_group,
+ correction_bias=correction_bias,
+ routed_scaling_factor=self.routed_scaling_factor,
+ apply_routed_scaling_factor_on_output=self.experts.should_fuse_routed_scaling_factor_in_topk(),
+ force_topk=quant_config is None,
+ )
+
self.shared_experts_is_int8 = False
self.shared_experts_is_fp8 = False
self.shared_experts_weight_block_size = None
@@ -434,6 +482,7 @@ def forward(
forward_batch: Optional[ForwardBatch] = None,
should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
if not self._enable_deepep_moe:
DUAL_STREAM_TOKEN_THRESHOLD = 1024
@@ -447,12 +496,14 @@ def forward(
hidden_states,
should_allreduce_fusion,
use_reduce_scatter,
+ gemm_output_zero_allocator,
)
else:
return self.forward_normal(
hidden_states,
should_allreduce_fusion,
use_reduce_scatter,
+ gemm_output_zero_allocator,
)
else:
return self.forward_deepep(hidden_states, forward_batch)
@@ -462,15 +513,18 @@ def forward_normal_dual_stream(
hidden_states: torch.Tensor,
should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
current_stream = torch.cuda.current_stream()
self.alt_stream.wait_stream(current_stream)
- shared_output = self._forward_shared_experts(hidden_states)
+ shared_output = self._forward_shared_experts(
+ hidden_states, gemm_output_zero_allocator
+ )
with torch.cuda.stream(self.alt_stream):
# router_logits: (num_tokens, n_experts)
- router_logits = self.gate(hidden_states)
+ router_logits = self.gate(hidden_states, gemm_output_zero_allocator)
topk_output = self.topk(hidden_states, router_logits)
final_hidden_states = self.experts(hidden_states, topk_output)
if not _is_cuda:
@@ -497,6 +551,7 @@ def forward_normal(
hidden_states: torch.Tensor,
should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
if hasattr(self, "shared_experts") and use_intel_amx_backend(
self.shared_experts.gate_up_proj
@@ -504,9 +559,11 @@ def forward_normal(
return self.forward_cpu(hidden_states, should_allreduce_fusion)
if hidden_states.shape[0] > 0:
- shared_output = self._forward_shared_experts(hidden_states)
+ shared_output = self._forward_shared_experts(
+ hidden_states, gemm_output_zero_allocator
+ )
# router_logits: (num_tokens, n_experts)
- router_logits = self.gate(hidden_states)
+ router_logits = self.gate(hidden_states, gemm_output_zero_allocator)
topk_output = self.topk(hidden_states, router_logits)
else:
shared_output = None
@@ -626,9 +683,13 @@ def forward_deepep(
return final_hidden_states
- def _forward_shared_experts(self, hidden_states):
+ def _forward_shared_experts(
+ self, hidden_states, gemm_output_zero_allocator: BumpAllocator = None
+ ):
if self.num_fused_shared_experts == 0:
- return self.shared_experts(hidden_states)
+ return self.shared_experts(
+ hidden_states, gemm_output_zero_allocator=gemm_output_zero_allocator
+ )
else:
return None
@@ -865,6 +926,11 @@ def __init__(
num_kv_heads=1,
layer_id=layer_id,
v_head_dim=self.kv_lora_rank,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
+ rope_range=(self.kv_lora_rank, self.kv_lora_rank + self.qk_rope_head_dim),
quant_config=quant_config,
prefix=add_prefix("attn_mqa", prefix),
)
@@ -876,6 +942,11 @@ def __init__(
num_kv_heads=self.num_local_heads,
layer_id=layer_id,
v_head_dim=self.v_head_dim,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
+ rope_range=(self.qk_nope_head_dim, self.qk_head_dim),
quant_config=quant_config,
prefix=add_prefix("attn_mha", prefix),
)
@@ -987,48 +1058,117 @@ def _dispatch_mla_subtype():
# Determine attention backend used by current forward batch
if forward_batch.forward_mode.is_decode_or_idle():
attention_backend = global_server_args_dict["decode_attention_backend"]
+ elif (
+ forward_batch.forward_mode.is_target_verify()
+ or forward_batch.forward_mode.is_draft_extend()
+ ):
+ # Use the specified backend for speculative operations (both verify and draft extend)
+ if global_server_args_dict["speculative_attention_mode"] == "decode":
+ attention_backend = global_server_args_dict["decode_attention_backend"]
+ else: # default to prefill
+ attention_backend = global_server_args_dict["prefill_attention_backend"]
else:
attention_backend = global_server_args_dict["prefill_attention_backend"]
self.current_attention_backend = attention_backend
if attention_backend == "ascend":
- return AttnForwardMethod.MLA
- elif attention_backend == "flashinfer":
- # Flashinfer MLA: Do not absorb when enabling ragged prefill
if (
- not self.flashinfer_mla_disable_ragged
- and forward_batch.forward_mode.is_extend()
+ forward_batch.forward_mode.is_extend()
and not forward_batch.forward_mode.is_target_verify()
and not forward_batch.forward_mode.is_draft_extend()
- and sum(forward_batch.extend_prefix_lens_cpu) == 0
):
return AttnForwardMethod.MHA
else:
- return _dispatch_mla_subtype()
- elif attention_backend == "fa3":
+ return AttnForwardMethod.MLA
+ elif attention_backend in ["hip_attention"]:
# Flash Attention: Use MHA with chunked KV cache when prefilling on long sequences.
if forward_batch.extend_prefix_lens_cpu is not None:
sum_extend_prefix_lens = sum(forward_batch.extend_prefix_lens_cpu)
+ # if (
+ # forward_batch.forward_mode.is_extend()
+ # and not self.disable_chunked_prefix_cache
+ # and not forward_batch.forward_mode.is_target_verify()
+ # and not forward_batch.forward_mode.is_draft_extend()
+ # and (
+ # sum_extend_prefix_lens >= self.chunked_prefix_cache_threshold
+ # or sum_extend_prefix_lens == 0
+ # )
+ # ):
+ # print(
+ # global_server_args_dict["disable_chunked_prefix_cache"],
+ # forward_batch.attn_attend_prefix_cache is not None,
+ # forward_batch.forward_mode.is_target_verify(),
+ # forward_batch.forward_mode.is_draft_extend(),
+ # forward_batch.forward_mode,
+ # )
if (
- forward_batch.forward_mode.is_extend()
- and not self.disable_chunked_prefix_cache
+ not global_server_args_dict["disable_chunked_prefix_cache"]
+ # and forward_batch.attn_attend_prefix_cache is not None
+ and forward_batch.forward_mode.is_extend()
+ and not forward_batch.forward_mode.is_target_verify()
+ and not forward_batch.forward_mode.is_draft_extend()
+ # and not forward_batch.forward_mode.is_draft_extend()
+ ):
+ return AttnForwardMethod.MHA_FROM_CACHE
+ else:
+ # if forward_batch.forward_mode == ForwardMode.EXTEND:
+ # # FIXME: this should be MLA, but bug.
+ # return AttnForwardMethod.MHA_FROM_CACHE
+ return AttnForwardMethod.MLA
+ elif (
+ attention_backend == "flashinfer"
+ or attention_backend == "fa3"
+ or attention_backend == "flashmla"
+ or attention_backend == "cutlass_mla"
+ ):
+ # Use MHA with chunked KV cache when prefilling on long sequences.
+ sum_extend_prefix_lens = (
+ sum(forward_batch.extend_prefix_lens_cpu)
+ if forward_batch.extend_prefix_lens_cpu is not None
+ else 0
+ )
+ # Flashinfer MLA: Do not absorb when enabling ragged prefill
+ disable_ragged = (
+ attention_backend == "flashinfer" or attention_backend == "flashmla"
+ ) and self.flashinfer_mla_disable_ragged
+ if (
+ not disable_ragged
+ and forward_batch.forward_mode.is_extend()
and not forward_batch.forward_mode.is_target_verify()
and not forward_batch.forward_mode.is_draft_extend()
and (
- sum_extend_prefix_lens >= self.chunked_prefix_cache_threshold
+ (
+ sum_extend_prefix_lens >= self.chunked_prefix_cache_threshold
+ and not self.disable_chunked_prefix_cache
+ )
or sum_extend_prefix_lens == 0
)
):
return AttnForwardMethod.MHA_CHUNKED_KV
else:
return _dispatch_mla_subtype()
+ elif attention_backend == "trtllm_mla":
+ if (
+ forward_batch.forward_mode.is_extend()
+ and not forward_batch.forward_mode.is_target_verify()
+ and not forward_batch.forward_mode.is_draft_extend()
+ ):
+ return AttnForwardMethod.MHA_CHUNKED_KV
+ else:
+ return _dispatch_mla_subtype()
elif attention_backend == "aiter":
if (
forward_batch.forward_mode.is_extend()
and not forward_batch.forward_mode.is_target_verify()
and not forward_batch.forward_mode.is_draft_extend()
):
- return AttnForwardMethod.MHA
+ if is_dp_attention_enabled():
+ if sum(forward_batch.extend_prefix_lens_cpu) == 0:
+ return AttnForwardMethod.MHA
+ else:
+ return AttnForwardMethod.MLA
+ else:
+ return AttnForwardMethod.MHA
else:
return AttnForwardMethod.MLA
else:
@@ -1081,11 +1221,19 @@ def forward_prepare(
if self.attn_mha.kv_b_proj is None:
self.attn_mha.kv_b_proj = self.kv_b_proj
- if hidden_states.shape[0] == 0:
- assert (
- not self.o_proj.reduce_results
- ), "short-circuiting allreduce will lead to hangs"
- return hidden_states, None, forward_batch, None
+ # when hidden_states is a tuple of tensors, the tuple will include quantized weight and scale tensor
+ if isinstance(hidden_states, tuple):
+ if hidden_states[0].shape[0] == 0:
+ assert (
+ not self.o_proj.reduce_results
+ ), "short-circuiting allreduce will lead to hangs"
+ return hidden_states[0]
+ else:
+ if hidden_states.shape[0] == 0:
+ assert (
+ not self.o_proj.reduce_results
+ ), "short-circuiting allreduce will lead to hangs"
+ return hidden_states, None, forward_batch, None
attn_forward_method = self.dispatch_attn_forward_method(forward_batch)
@@ -1161,19 +1309,32 @@ def forward_normal_prepare(
k_nope = kv[..., : self.qk_nope_head_dim]
v = kv[..., self.qk_nope_head_dim :]
k_pe = latent_cache[:, :, self.kv_lora_rank :]
- q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
+
q[..., self.qk_nope_head_dim :] = q_pe
k = torch.empty_like(q)
k[..., : self.qk_nope_head_dim] = k_nope
k[..., self.qk_nope_head_dim :] = k_pe
- latent_cache[:, :, : self.kv_lora_rank] = kv_a.unsqueeze(1)
- latent_cache[:, :, self.kv_lora_rank :] = k_pe
+ if not _is_npu:
+ latent_cache[:, :, : self.kv_lora_rank] = kv_a.unsqueeze(1)
+ latent_cache[:, :, self.kv_lora_rank :] = k_pe
- # Save latent cache
- forward_batch.token_to_kv_pool.set_kv_buffer(
- self.attn_mha, forward_batch.out_cache_loc, latent_cache, None
- )
+ # Save latent cache
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ self.attn_mha, forward_batch.out_cache_loc, latent_cache, None
+ )
+ else:
+ # To reduce a time-costing split operation
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ self.attn_mha, forward_batch.out_cache_loc, kv_a.unsqueeze(1), k_pe
+ )
return q, k, v, forward_batch
@@ -1200,10 +1361,14 @@ def forward_absorb_prepare(
forward_batch: ForwardBatch,
zero_allocator: BumpAllocator,
):
- from sglang.srt.model_executor.graph_runner import get_is_capture_mode
+ from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
if self.q_lora_rank is not None:
- if hidden_states.shape[0] <= 16 and self.use_min_latency_fused_a_gemm:
+ if (
+ (not isinstance(hidden_states, tuple))
+ and hidden_states.shape[0] <= 16
+ and self.use_min_latency_fused_a_gemm
+ ):
fused_qkv_a_proj_out = dsv3_fused_a_gemm(
hidden_states, self.fused_qkv_a_proj_with_mqa.weight.T
)
@@ -1223,8 +1388,18 @@ def forward_absorb_prepare(
k_nope = self.kv_a_layernorm(k_nope)
current_stream.wait_stream(self.alt_stream)
else:
- q = self.q_a_layernorm(q)
- k_nope = self.kv_a_layernorm(k_nope)
+ if _use_aiter_gfx95 and self.q_b_proj.weight.dtype == torch.uint8:
+ q, k_nope = fused_rms_mxfp4_quant(
+ q,
+ self.q_a_layernorm.weight,
+ self.q_a_layernorm.variance_epsilon,
+ k_nope,
+ self.kv_a_layernorm.weight,
+ self.kv_a_layernorm.variance_epsilon,
+ )
+ else:
+ q = self.q_a_layernorm(q)
+ k_nope = self.kv_a_layernorm(k_nope)
k_nope = k_nope.unsqueeze(1)
q = self.q_b_proj(q)[0].view(-1, self.num_local_heads, self.qk_head_dim)
@@ -1256,10 +1431,27 @@ def forward_absorb_prepare(
q_nope_out = q_nope_out[:, :expected_m, :]
elif _is_hip:
# TODO(haishaw): add bmm_fp8 to ROCm
- q_nope_out = torch.bmm(
- q_nope.to(torch.bfloat16).transpose(0, 1),
- self.w_kc.to(torch.bfloat16) * self.w_scale,
- )
+ if _use_aiter_gfx95 and self.w_kc.dtype == torch.uint8:
+ x = q_nope.transpose(0, 1)
+ q_nope_out = torch.empty(
+ x.shape[0],
+ x.shape[1],
+ self.w_kc.shape[2],
+ device=x.device,
+ dtype=torch.bfloat16,
+ )
+ batched_gemm_afp4wfp4_pre_quant(
+ x,
+ self.w_kc.transpose(-2, -1),
+ self.w_scale_k.transpose(-2, -1),
+ torch.bfloat16,
+ q_nope_out,
+ )
+ else:
+ q_nope_out = torch.bmm(
+ q_nope.to(torch.bfloat16).transpose(0, 1),
+ self.w_kc.to(torch.bfloat16) * self.w_scale,
+ )
elif self.w_kc.dtype == torch.float8_e4m3fn:
q_nope_val, q_nope_scale = per_tensor_quant_mla_fp8(
q_nope.transpose(0, 1),
@@ -1273,19 +1465,30 @@ def forward_absorb_prepare(
q_nope_out = q_nope_out.transpose(0, 1)
- if not self._fuse_rope_for_trtllm_mla(forward_batch):
+ if (
+ (not self._fuse_rope_for_trtllm_mla(forward_batch))
+ and (not _use_aiter or not _is_gfx95_supported)
+ and (
+ not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ )
+ )
+ ) and (not self._fuse_rope_for_trtllm_mla(forward_batch)):
q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
- return q_pe, k_pe, q_nope_out, k_nope, forward_batch, zero_allocator
+ return q_pe, k_pe, q_nope_out, k_nope, forward_batch, zero_allocator, positions
def forward_absorb_core(
- self, q_pe, k_pe, q_nope_out, k_nope, forward_batch, zero_allocator
+ self, q_pe, k_pe, q_nope_out, k_nope, forward_batch, zero_allocator, positions
):
if (
self.current_attention_backend == "fa3"
+ or self.current_attention_backend == "hip_attention"
or self.current_attention_backend == "flashinfer"
or self.current_attention_backend == "cutlass_mla"
or self.current_attention_backend == "trtllm_mla"
+ or self.current_attention_backend == "ascend"
):
extra_args = {}
if self._fuse_rope_for_trtllm_mla(forward_batch):
@@ -1303,8 +1506,23 @@ def forward_absorb_core(
**extra_args,
)
else:
- q = torch.cat([q_nope_out, q_pe], dim=-1)
- k = torch.cat([k_nope, k_pe], dim=-1)
+ if _use_aiter_gfx95:
+ cos = self.rotary_emb.cos_cache
+ sin = self.rotary_emb.sin_cache
+ q, k = fused_qk_rope_cat(
+ q_nope_out,
+ q_pe,
+ k_nope,
+ k_pe,
+ positions,
+ cos,
+ sin,
+ self.rotary_emb.is_neox_style,
+ )
+ else:
+ q = torch.cat([q_nope_out, q_pe], dim=-1)
+ k = torch.cat([k_nope, k_pe], dim=-1)
+
attn_output = self.attn_mqa(q, k, k_nope, forward_batch)
attn_output = attn_output.view(-1, self.num_local_heads, self.kv_lora_rank)
@@ -1329,11 +1547,34 @@ def forward_absorb_core(
)
elif _is_hip:
# TODO(haishaw): add bmm_fp8 to ROCm
- attn_bmm_output = torch.bmm(
- attn_output.to(torch.bfloat16).transpose(0, 1),
- self.w_vc.to(torch.bfloat16) * self.w_scale,
- )
- attn_bmm_output = attn_bmm_output.transpose(0, 1).flatten(1, 2)
+ if _use_aiter_gfx95 and self.w_vc.dtype == torch.uint8:
+ x = attn_output.transpose(0, 1)
+ attn_bmm_output = torch.empty(
+ x.shape[0],
+ x.shape[1],
+ self.w_vc.shape[2],
+ device=x.device,
+ dtype=torch.bfloat16,
+ )
+ batched_gemm_afp4wfp4_pre_quant(
+ x,
+ self.w_vc.transpose(-2, -1),
+ self.w_scale_v.transpose(-2, -1),
+ torch.bfloat16,
+ attn_bmm_output,
+ )
+ else:
+ attn_bmm_output = torch.bmm(
+ attn_output.to(torch.bfloat16).transpose(0, 1),
+ self.w_vc.to(torch.bfloat16) * self.w_scale,
+ )
+
+ if self.o_proj.weight.dtype == torch.uint8:
+ attn_bmm_output = attn_bmm_output.transpose(0, 1)
+ attn_bmm_output = fused_flatten_mxfp4_quant(attn_bmm_output)
+ else:
+ attn_bmm_output = attn_bmm_output.transpose(0, 1).flatten(1, 2)
+
elif self.w_vc.dtype == torch.float8_e4m3fn:
attn_output_val, attn_output_scale = per_tensor_quant_mla_fp8(
attn_output.transpose(0, 1),
@@ -1371,6 +1612,8 @@ def forward_absorb_fused_mla_rope_prepare(
forward_batch: ForwardBatch,
zero_allocator: BumpAllocator,
):
+ if forward_batch.hip_metadata_cache_pool is not None:
+ raise ValueError("HiP Attention does not support fused MLA with RoPE")
enable_rope_fusion = (
os.getenv("SGLANG_FUSED_MLA_ENABLE_ROPE_FUSION", "1") == "1"
)
@@ -1416,7 +1659,11 @@ def forward_absorb_fused_mla_rope_prepare(
if not enable_rope_fusion:
k_pe = k_input[..., self.kv_lora_rank :]
- q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
+ if not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
q_input[..., self.kv_lora_rank :] = q_pe
k_input[..., self.kv_lora_rank :] = k_pe
k_pe_output = None
@@ -1602,6 +1849,153 @@ def forward_absorb_fused_mla_rope_core(
return output
+ def forward_normal_from_cache(
+ self,
+ positions: torch.Tensor,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ ):
+ if self.q_lora_rank is not None:
+ q, latent_cache = self.fused_qkv_a_proj_with_mqa(hidden_states)[0].split(
+ [self.q_lora_rank, self.kv_lora_rank + self.qk_rope_head_dim], dim=-1
+ )
+ q = self.q_a_layernorm(q)
+ q = self.q_b_proj(q)[0].view(-1, self.num_local_heads, self.qk_head_dim)
+ else:
+ q = self.q_proj(hidden_states)[0].view(
+ -1, self.num_local_heads, self.qk_head_dim
+ )
+ latent_cache = self.kv_a_proj_with_mqa(hidden_states)[0]
+ _, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+ kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
+ latent_cache = latent_cache.unsqueeze(1)
+ kv_a = self.kv_a_layernorm(kv_a.contiguous())
+ kv = self.kv_b_proj(kv_a)[0]
+ kv = kv.view(-1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim)
+ k_nope = kv[..., : self.qk_nope_head_dim]
+ v = kv[..., self.qk_nope_head_dim :]
+ k_pe = latent_cache[:, :, self.kv_lora_rank :]
+
+ if not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
+
+ q[..., self.qk_nope_head_dim :] = q_pe
+ k = torch.empty_like(q)
+ k[..., : self.qk_nope_head_dim] = k_nope
+ k[..., self.qk_nope_head_dim :] = k_pe
+
+ latent_cache[:, :, : self.kv_lora_rank] = kv_a.unsqueeze(1)
+ latent_cache[:, :, self.kv_lora_rank :] = k_pe
+
+ # Save latent cache
+ forward_batch.token_to_kv_pool.set_kv_buffer(
+ self.attn_mha, forward_batch.out_cache_loc, latent_cache, None
+ )
+
+ k_current = k
+ v_current = v
+
+ # Fetch latent cache from memory pool with precomputed chunked kv indices
+ latent_cache_buf = forward_batch.token_to_kv_pool.get_key_buffer(
+ self.attn_mha.layer_id
+ )
+ block_table = forward_batch.req_to_token_pool.req_to_token.index_select(
+ dim=0, index=forward_batch.req_pool_indices
+ )
+ batch_size = block_table.shape[0]
+
+ outputs = []
+ acc_chunk_len = 0
+ for ibatch in range(batch_size):
+ prefix_len = forward_batch.extend_prefix_lens_cpu[ibatch]
+ chunk_len = forward_batch.extend_seq_lens_cpu[ibatch]
+
+ q_chunk = q[acc_chunk_len : acc_chunk_len + chunk_len][None, ...]
+ k_chunk = k_current[acc_chunk_len : acc_chunk_len + chunk_len][None, ...]
+ v_chunk = v_current[acc_chunk_len : acc_chunk_len + chunk_len][None, ...]
+
+ acc_chunk_len += chunk_len
+
+ if latent_cache_buf.dtype in (torch.float8_e5m2,):
+ latent_cache = (
+ latent_cache_buf.view(torch.uint8)[
+ block_table[ibatch : ibatch + 1, :prefix_len]
+ ]
+ .view(latent_cache_buf.dtype)
+ .to(q_chunk.dtype)
+ )
+ else:
+ latent_cache = latent_cache_buf[
+ block_table[ibatch : ibatch + 1, :prefix_len]
+ ]
+ kv_a_normed, k_pe = latent_cache.split(
+ [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
+ )
+ kv_a_normed = kv_a_normed.squeeze(1).contiguous()
+ kv = self.kv_b_proj(kv_a_normed)[0]
+ kv = kv.view(
+ -1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim
+ )
+ v = kv[..., self.qk_nope_head_dim :]
+ k_nope = kv[..., : self.qk_nope_head_dim]
+
+ k = torch.zeros(
+ (
+ k_nope.shape[0],
+ self.num_local_heads,
+ self.qk_nope_head_dim + self.qk_rope_head_dim,
+ ),
+ dtype=v.dtype,
+ device=v.device,
+ )
+ k[..., : self.qk_nope_head_dim] = k_nope
+ k[..., self.qk_nope_head_dim :] = k_pe
+
+ # k = k[:-k_chunk.shape[1]]
+ # v = v[:-k_chunk.shape[1]]
+
+ k = torch.cat([k, k_chunk[0]], dim=0)
+ v = torch.cat([v, v_chunk[0]], dim=0)
+
+ current_forward_batch = copy.copy(forward_batch)
+ current_forward_batch.batch_size = 1
+ current_forward_batch.req_pool_indices = forward_batch.req_pool_indices[
+ ibatch : ibatch + 1
+ ]
+ current_forward_batch.extend_seq_lens = forward_batch.extend_seq_lens[
+ ibatch : ibatch + 1
+ ]
+ current_forward_batch.extend_seq_lens_cpu = (
+ forward_batch.extend_seq_lens_cpu[ibatch : ibatch + 1]
+ )
+ current_forward_batch.extend_prefix_lens_cpu = (
+ forward_batch.extend_prefix_lens_cpu[ibatch : ibatch + 1]
+ )
+ current_forward_batch.positions = forward_batch.positions[
+ acc_chunk_len : acc_chunk_len + chunk_len
+ ]
+ assert current_forward_batch.extend_prefix_lens_cpu is not None
+ # cache_loc = (
+ # forward_batch.out_cache_loc
+ # if not layer.is_cross_attention
+ # else forward_batch.encoder_out_cache_loc
+ # )
+ assert not self.attn_mha.is_cross_attention
+ current_forward_batch.out_cache_loc = forward_batch.out_cache_loc[
+ acc_chunk_len : acc_chunk_len + chunk_len
+ ]
+
+ output = self.attn_mha(q_chunk, k, v, forward_batch, save_kv_cache=False)
+
+ outputs.append(output)
+ attn_output = torch.cat(outputs, dim=0)
+ attn_output = attn_output.reshape(-1, self.num_local_heads * self.v_head_dim)
+ output, _ = self.o_proj(attn_output)
+ return output
+
def forward_absorb_fused_mla_rope_cpu_core(
self, q_input, k_input, v_input, forward_batch, zero_allocator
):
@@ -1655,9 +2049,11 @@ def _chunked_prefix_attn_mha(
latent_cache_buf = forward_batch.token_to_kv_pool.get_key_buffer(
self.attn_mha.layer_id
)
- latent_cache = latent_cache_buf[
- forward_batch.prefix_chunk_kv_indices[i]
- ].contiguous()
+ latent_cache = (
+ latent_cache_buf[forward_batch.prefix_chunk_kv_indices[i]]
+ .contiguous()
+ .to(q.dtype)
+ )
kv_a_normed, k_pe = latent_cache.split(
[self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
@@ -1683,7 +2079,6 @@ def _chunked_prefix_attn_mha(
k[..., self.qk_nope_head_dim :] = k_pe
output, lse = self.attn_mha(q, k, v, forward_batch, save_kv_cache=False)
- lse = torch.transpose(lse, 0, 1).contiguous()
tmp_output = torch.empty_like(accum_output)
tmp_lse = torch.empty_like(accum_lse)
merge_state_v2(output, lse, accum_output, accum_lse, tmp_output, tmp_lse)
@@ -1698,6 +2093,7 @@ def forward_normal_chunked_kv_prepare(
forward_batch: ForwardBatch,
zero_allocator: BumpAllocator,
):
+ assert not torch.cuda.is_current_stream_capturing()
# In normal mha, the k and v tensors will become overly large when the prefix length is long.
# To avoid this, we split the kv cache into chunks and process them one after another.
# Since mha is compute friendly, the for loop induced here will not introduce significant overhead.
@@ -1705,55 +2101,26 @@ def forward_normal_chunked_kv_prepare(
# will be helpful for understanding the purpose of this function.
# First do normal mha forward to get output for extended part
- if self.q_lora_rank is not None:
- q, latent_cache = self.fused_qkv_a_proj_with_mqa(hidden_states)[0].split(
- [self.q_lora_rank, self.kv_lora_rank + self.qk_rope_head_dim], dim=-1
- )
- q = self.q_a_layernorm(q)
- q = self.q_b_proj(q)[0].view(-1, self.num_local_heads, self.qk_head_dim)
- else:
- q = self.q_proj(hidden_states)[0].view(
- -1, self.num_local_heads, self.qk_head_dim
- )
- latent_cache = self.kv_a_proj_with_mqa(hidden_states)[0]
- _, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
- kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
- latent_cache = latent_cache.unsqueeze(1)
- kv_a = self.kv_a_layernorm(kv_a)
- kv = self.kv_b_proj(kv_a)[0]
- kv = kv.view(-1, self.num_local_heads, self.qk_nope_head_dim + self.v_head_dim)
- k_nope = kv[..., : self.qk_nope_head_dim]
- v = kv[..., self.qk_nope_head_dim :]
- k_pe = latent_cache[:, :, self.kv_lora_rank :]
-
- q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)
- q[..., self.qk_nope_head_dim :] = q_pe
- k = torch.empty_like(q)
- k[..., : self.qk_nope_head_dim] = k_nope
- k[..., self.qk_nope_head_dim :] = k_pe
-
- latent_cache[:, :, : self.kv_lora_rank] = kv_a.unsqueeze(1)
- latent_cache[:, :, self.kv_lora_rank :] = k_pe
-
- # Save latent cache
- forward_batch.token_to_kv_pool.set_kv_buffer(
- self.attn_mha, forward_batch.out_cache_loc, latent_cache, None
+ return self.forward_normal_prepare(
+ positions, hidden_states, forward_batch, zero_allocator
)
- return q, k, v, forward_batch
-
def forward_normal_chunked_kv_core(self, q, k, v, forward_batch):
+ has_extend_prefix = any(forward_batch.extend_prefix_lens_cpu)
+ # Only initialize the info once
+ if has_extend_prefix and forward_batch.num_prefix_chunks is None:
+ forward_batch.prepare_chunked_prefix_cache_info(q.device)
+ if hasattr(forward_batch.attn_backend, "init_mha_chunk_metadata"):
+ forward_batch.attn_backend.init_mha_chunk_metadata(forward_batch)
+
+ forward_batch.mha_return_lse = has_extend_prefix
# Do mha for extended part without prefix
forward_batch.set_attn_attend_prefix_cache(False)
- attn_output, lse = self.attn_mha(q, k, v, forward_batch, save_kv_cache=False)
- lse = torch.transpose(lse, 0, 1).contiguous()
+ attn_output = self.attn_mha(q, k, v, forward_batch, save_kv_cache=False)
# Do mha attention with chunked prefix cache if there are any sequence with prefix
- if any(forward_batch.extend_prefix_lens_cpu):
- # Only initialize the info once
- if forward_batch.num_prefix_chunks is None:
- forward_batch.prepare_chunked_prefix_cache_info(q.device)
-
+ if has_extend_prefix:
+ attn_output, lse = attn_output
forward_batch.set_attn_attend_prefix_cache(True)
attn_output = self._chunked_prefix_attn_mha(
q=q,
@@ -1852,10 +2219,11 @@ def __init__(
input_layernorm=self.input_layernorm,
post_attention_layernorm=self.post_attention_layernorm,
allow_reduce_scatter=True,
+ is_last_layer=(
+ is_nextn or (self.layer_id == self.config.num_hidden_layers - 1)
+ ),
)
- self._fuse_allreduce_lookup_table = self._build_fuse_allreduce_lookup_table()
-
def _is_layer_sparse(self, layer_id: int, is_nextn: bool) -> bool:
return is_nextn or (
self.config.n_routed_experts is not None
@@ -1863,20 +2231,6 @@ def _is_layer_sparse(self, layer_id: int, is_nextn: bool) -> bool:
and layer_id % self.config.moe_layer_freq == 0
)
- def _should_fuse_mlp_allreduce_with_next_layer(self, forward_batch) -> bool:
- """Check if MLP allreduce can be fused with next layer's residual_rmsnorm"""
-
- batch_size = (
- forward_batch.input_ids.shape[0]
- if hasattr(forward_batch, "input_ids")
- else 0
- )
-
- if batch_size > 128:
- return False
-
- return self._fuse_allreduce_lookup_table.get(batch_size, False)
-
def forward(
self,
positions: torch.Tensor,
@@ -1884,10 +2238,24 @@ def forward(
forward_batch: ForwardBatch,
residual: Optional[torch.Tensor],
zero_allocator: BumpAllocator,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
+ quant_format = (
+ "mxfp4"
+ if _is_gfx95_supported
+ and getattr(self.self_attn, "fused_qkv_a_proj_with_mqa", None) is not None
+ and getattr(self.self_attn.fused_qkv_a_proj_with_mqa, "weight", None)
+ is not None
+ and self.self_attn.fused_qkv_a_proj_with_mqa.weight.dtype == torch.uint8
+ else ""
+ )
+
hidden_states, residual = self.layer_communicator.prepare_attn(
- hidden_states, residual, forward_batch
+ hidden_states,
+ residual,
+ forward_batch,
+ quant_format,
)
hidden_states = self.self_attn(
@@ -1902,19 +2270,25 @@ def forward(
)
should_allreduce_fusion = (
- self._should_fuse_mlp_allreduce_with_next_layer(forward_batch)
- and not (
- is_dp_attention_enabled() and self.speculative_algorithm.is_eagle()
+ self.layer_communicator.should_fuse_mlp_allreduce_with_next_layer(
+ forward_batch
)
- and not self.is_nextn
)
# For DP with padding, reduce scatter can be used instead of all-reduce.
use_reduce_scatter = self.layer_communicator.should_use_reduce_scatter(
forward_batch
)
+
+ if isinstance(self.mlp, DeepseekV2MLP):
+ gemm_output_zero_allocator = None
+
hidden_states = self.mlp(
- hidden_states, forward_batch, should_allreduce_fusion, use_reduce_scatter
+ hidden_states,
+ forward_batch,
+ should_allreduce_fusion,
+ use_reduce_scatter,
+ gemm_output_zero_allocator,
)
if should_allreduce_fusion:
@@ -1997,26 +2371,6 @@ def op_comm_postprocess_layer(self, state):
)
return output
- def _build_fuse_allreduce_lookup_table(self):
- static_conditions_met = (
- self.layer_id != self.config.num_hidden_layers - 1
- and get_tensor_model_parallel_world_size() > 1
- and global_server_args_dict.get("enable_flashinfer_allreduce_fusion", False)
- and _is_sm100_supported
- and _is_flashinfer_available
- )
-
- if not static_conditions_met:
- return {}
-
- lookup_table = {}
- for batch_size in range(129): # 0 to 128
- is_last_layer = self.layer_id == self.config.num_hidden_layers - 1
- should_fuse = batch_size > 0 and batch_size <= 128 and not is_last_layer
- lookup_table[batch_size] = should_fuse
-
- return lookup_table
-
class DeepseekV2Model(nn.Module):
fall_back_to_pt_during_load = False
@@ -2055,12 +2409,60 @@ def __init__(
pp_rank=self.pp_group.rank_in_group,
pp_size=self.pp_group.world_size,
prefix=add_prefix("layers", prefix),
+ offloader_kwargs=dict(
+ submodule_accessor=lambda layer: (
+ layer.mlp.experts
+ if isinstance(layer.mlp, DeepseekV2MoE)
+ else layer.mlp
+ ),
+ whitelist_param_names_creator=lambda module: (
+ [
+ "w13_weight",
+ "w2_weight",
+ "w13_blockscale_swizzled",
+ "w2_blockscale_swizzled",
+ ]
+ if isinstance(module, FusedMoE)
+ else []
+ ),
+ ),
)
if self.pp_group.is_last_rank:
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
else:
self.norm = PPMissingLayer(return_tuple=True)
+ self.gemm_output_zero_allocator_size = 0
+ if (
+ _use_aiter_gfx95
+ and config.n_routed_experts == 256
+ and self.embed_tokens.embedding_dim == 7168
+ ):
+ num_moe_layers = sum(
+ [
+ 1
+ for i in range(len(self.layers))
+ if isinstance(self.layers[i].mlp, DeepseekV2MoE)
+ ]
+ )
+
+ allocate_size = 0
+ for i in range(len(self.layers)):
+ if isinstance(self.layers[i].mlp, DeepseekV2MoE):
+ allocate_size = self.layers[
+ i
+ ].mlp.shared_experts.gate_up_proj.output_size_per_partition
+ break
+
+ self.gemm_output_zero_allocator_size = (
+ get_dsv3_gemm_output_zero_allocator_size(
+ config.n_routed_experts,
+ num_moe_layers,
+ allocate_size,
+ self.embed_tokens.embedding_dim,
+ )
+ )
+
def get_input_embeddings(self) -> torch.Tensor:
return self.embed_tokens
@@ -2080,6 +2482,21 @@ def forward(
device=device,
)
+ has_gemm_output_zero_allocator = hasattr(
+ self, "gemm_output_zero_allocator_size"
+ )
+
+ gemm_output_zero_allocator = (
+ BumpAllocator(
+ buffer_size=self.gemm_output_zero_allocator_size,
+ dtype=torch.float32,
+ device=device,
+ )
+ if has_gemm_output_zero_allocator
+ and self.gemm_output_zero_allocator_size > 0
+ else None
+ )
+
if self.pp_group.is_first_rank:
if input_embeds is None:
hidden_states = self.embed_tokens(input_ids)
@@ -2102,12 +2519,21 @@ def forward(
elif self.first_k_dense_replace < normal_start_layer:
normal_end_layer = normal_start_layer = 0
+ forward_batch.on_model_start()
for i in range(normal_start_layer, normal_end_layer):
with get_global_expert_distribution_recorder().with_current_layer(i):
+ forward_batch.on_layer_start(i)
layer = self.layers[i]
hidden_states, residual = layer(
- positions, hidden_states, forward_batch, residual, zero_allocator
+ positions,
+ hidden_states,
+ forward_batch,
+ residual,
+ zero_allocator,
+ gemm_output_zero_allocator,
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
if normal_end_layer != self.end_layer:
hidden_states, residual = model_forward_maybe_tbo(
@@ -2143,6 +2569,9 @@ class DeepseekV2ForCausalLM(nn.Module):
# for quark model load
packed_modules_mapping = {}
+ # for hip attention
+ hip_attention_supported = True
+
def __init__(
self,
config: PretrainedConfig,
@@ -2210,6 +2639,8 @@ def determine_num_fused_shared_experts(
disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 80 can use shared experts fusion optimization."
elif get_moe_expert_parallel_world_size() > 1:
disable_reason = "Deepseek V3/R1 can not use shared experts fusion optimization under expert parallelism."
+ elif self.quant_config.get_name() == "w4afp8":
+ disable_reason = "Deepseek V3/R1 W4AFP8 model uses different quant method for routed experts and shared experts."
if disable_reason is not None:
global_server_args_dict["disable_shared_experts_fusion"] = True
@@ -2377,6 +2808,16 @@ def post_load_weights(self, is_nextn=False, weight_names=None):
w_kc, w_vc = w.unflatten(
0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
+
+ if (
+ _use_aiter_gfx95
+ and self.quant_config is not None
+ and self.quant_config.get_name() == "quark"
+ ):
+ w_kc, self_attn.w_scale_k, w_vc, self_attn.w_scale_v = (
+ quark_post_load_weights(self_attn, w, "mxfp4")
+ )
+
if not use_deep_gemm_bmm:
self_attn.w_kc = bind_or_assign(
self_attn.w_kc, w_kc.transpose(1, 2).contiguous().transpose(1, 2)
@@ -2439,18 +2880,26 @@ def _weight_requant_ue8m0(self, is_nextn=False):
)
num_hidden_layers = 1 if is_nextn else self.config.num_hidden_layers
+
for layer_id in range(num_hidden_layers):
if is_nextn:
layer = self.model.decoder
else:
layer = self.model.layers[layer_id]
- for module in [
- layer.self_attn.fused_qkv_a_proj_with_mqa,
- layer.self_attn.q_b_proj,
+ module_list = [
layer.self_attn.kv_b_proj,
layer.self_attn.o_proj,
- ]:
+ ]
+
+ if self.config.q_lora_rank is not None:
+ module_list.append(layer.self_attn.fused_qkv_a_proj_with_mqa)
+ module_list.append(layer.self_attn.q_b_proj)
+ else:
+ module_list.append(layer.self_attn.kv_a_proj_with_mqa)
+ module_list.append(layer.self_attn.q_proj)
+
+ for module in module_list:
requant_weight_ue8m0_inplace(
module.weight, module.weight_scale_inv, weight_block_size
)
@@ -2513,6 +2962,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal
ckpt_up_proj_name="up_proj",
num_experts=self.config.n_routed_experts + self.num_fused_shared_experts,
)
+ # Params for special naming rules in mixed-precision models, for example:
+ # model.layers.xx.mlp.experts.xx.w1.input_scale. For details,
+ # see https://huggingface.co/Barrrrry/DeepSeek-R1-W4AFP8/blob/main.
if self.quant_config and self.quant_config.get_name() == "w4afp8":
expert_params_mapping += FusedMoE.make_expert_input_scale_params_mapping(
num_experts=self.config.n_routed_experts
diff --git a/python/sglang/srt/models/exaone.py b/python/sglang/srt/models/exaone.py
index 1e4dfb3df21..f597c6c9736 100644
--- a/python/sglang/srt/models/exaone.py
+++ b/python/sglang/srt/models/exaone.py
@@ -156,6 +156,10 @@ def __init__(
num_kv_heads=self.num_kv_heads,
layer_id=layer_id,
quant_config=quant_config,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
)
def forward(
@@ -166,7 +170,14 @@ def forward(
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
- q, k = self.rotary_emb(positions, q, k)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(positions, q, k)
+
attn_output = self.attn(q, k, v, forward_batch)
output, _ = self.out_proj(attn_output)
return output
@@ -282,7 +293,10 @@ def forward(
else:
hidden_states = input_embeds
residual = None
+
+ forward_batch.on_model_start()
for i in range(len(self.h)):
+ forward_batch.on_layer_start(i)
layer = self.h[i]
hidden_states, residual = layer(
positions,
@@ -290,11 +304,16 @@ def forward(
forward_batch,
residual,
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
+
hidden_states, _ = self.ln_f(hidden_states, residual)
return hidden_states
class ExaoneForCausalLM(nn.Module):
+ hip_attention_supported = True
+
def __init__(
self,
config,
diff --git a/python/sglang/srt/models/gemma3n_mm.py b/python/sglang/srt/models/gemma3n_mm.py
index fa9a10c85cb..995db26027c 100644
--- a/python/sglang/srt/models/gemma3n_mm.py
+++ b/python/sglang/srt/models/gemma3n_mm.py
@@ -499,7 +499,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
def should_apply_lora(self, module_name: str) -> bool:
return bool(self.lora_pattern.match(module_name))
- def get_hidden_dim(self, module_name):
+ def get_hidden_dim(self, module_name, layer_idx):
# return input_dim, output_dim
if module_name == "qkv_proj":
return (
diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py
index bf6ceaeb875..59462836eab 100644
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -68,8 +68,8 @@
VocabParallelEmbedding,
)
from sglang.srt.managers.schedule_batch import global_server_args_dict
+from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
-from sglang.srt.model_executor.graph_runner import get_is_capture_mode
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.deepseek_v2 import (
DeepseekV2DecoderLayer,
@@ -153,7 +153,13 @@ def __init__(
)
self.act_fn = SiluAndMul()
- def forward(self, x, forward_batch=None, should_allreduce_fusion=False):
+ def forward(
+ self,
+ x,
+ forward_batch=None,
+ should_allreduce_fusion=False,
+ gemm_output_zero_allocator: BumpAllocator = None,
+ ):
if (self.tp_size == 1) and x.shape[0] == 0:
return x
@@ -181,6 +187,7 @@ def __init__(
use_qk_norm: bool = False,
prefix: str = "",
alt_stream: Optional[torch.cuda.Stream] = None,
+ config: PretrainedConfig = None,
) -> None:
super().__init__()
self.hidden_size = hidden_size
@@ -241,13 +248,19 @@ def __init__(
base=rope_theta,
rope_scaling=rope_scaling,
)
+ assert partial_rotary_factor == 0.5
self.attn = RadixAttention(
self.num_heads,
self.head_dim,
self.scaling,
num_kv_heads=self.num_kv_heads,
layer_id=layer_id,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
prefix=add_prefix("attn", prefix),
+ rope_range=(self.head_dim // 2, self.head_dim),
)
if self.use_qk_norm:
@@ -301,7 +314,25 @@ def forward_prepare(
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
if self.use_qk_norm:
q, k = self._apply_qk_norm(q, k)
- q, k = self.rotary_emb(positions, q, k)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+
+ def rotate(t: torch.Tensor):
+ t_shape = t.shape
+ t = t.reshape(-1, self.head_dim)
+ HID = t.shape[-1]
+ t = torch.cat([t[..., HID // 2 :], t[..., : HID // 2]], dim=-1)
+ return t.reshape(t_shape)
+
+ q = rotate(q)
+ k = rotate(k)
+ else:
+ q, k = self.rotary_emb(positions, q, k)
+
inner_state = q, k, v, forward_batch
return None, forward_batch, inner_state
@@ -501,6 +532,7 @@ def forward_normal_dual_stream(
hidden_states: torch.Tensor,
should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
current_stream = torch.cuda.current_stream()
@@ -543,6 +575,7 @@ def forward_normal(
hidden_states: torch.Tensor,
should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
if hasattr(self, "shared_experts") and use_intel_amx_backend(
self.shared_experts.gate_up_proj
@@ -612,6 +645,7 @@ def __init__(
quant_config=quant_config,
prefix=add_prefix("self_attn", prefix),
use_qk_norm=config.use_qk_norm,
+ config=config,
)
self.is_layer_sparse = self._is_layer_sparse(layer_id, is_nextn=is_nextn)
@@ -666,6 +700,7 @@ def forward(
forward_batch: ForwardBatch,
residual: Optional[torch.Tensor],
zero_allocator: BumpAllocator,
+ gemm_output_zero_allocator: BumpAllocator = None,
) -> torch.Tensor:
hidden_states, residual = self.layer_communicator.prepare_attn(
hidden_states, residual, forward_batch
@@ -727,6 +762,7 @@ def __init__(
class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
+ hip_attention_supported = True
def __init__(
self,
diff --git a/python/sglang/srt/models/glm4v.py b/python/sglang/srt/models/glm4v.py
index fbd757849a8..63c955a7246 100644
--- a/python/sglang/srt/models/glm4v.py
+++ b/python/sglang/srt/models/glm4v.py
@@ -9,6 +9,7 @@
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import SiluAndMul
+from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
ColumnParallelLinear,
@@ -91,9 +92,9 @@ def __init__(
norm_layer=norm_layer,
quant_config=quant_config,
prefix=prefix,
+ num_dummy_heads=config.num_dummy_heads,
+ rms_norm_eps=config.rms_norm_eps,
)
- self.norm1 = Glm4vRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
- self.norm2 = Glm4vRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.mlp = Glm4vVisionMLP(
config.hidden_size,
@@ -469,7 +470,7 @@ def __init__(
nn.Module.__init__(self)
self.config = config
-
+ vision_utils.update_vit_attn_dummy_heads_config(self.config)
self.model = Glm4Model(
config,
quant_config,
@@ -496,6 +497,9 @@ def __init__(
self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
self.is_mrope_enabled = "mrope_section" in self.config.rope_scaling
+ # For EAGLE3 support
+ self.capture_aux_hidden_states = False
+
def get_image_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
pixel_values = torch.cat(
[item.feature.squeeze(0) for item in items], dim=0
@@ -537,6 +541,51 @@ def get_video_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
video_embeds = torch.split(video_embeds, split_sizes)
return torch.cat(video_embeds)
+ def _update_hf_config(self):
+ """update hf config to ensure vision attention num_attention_heads is divisible by tp_size"""
+ tp_size = get_attention_tp_size()
+ num_heads = self.config.vision_config.num_heads
+ head_dim = self.config.vision_config.hidden_size // num_heads
+ num_dummy_heads = 0
+
+ if num_heads % tp_size != 0:
+ num_dummy_heads = (
+ (num_heads + tp_size - 1) // tp_size
+ ) * tp_size - num_heads
+
+ setattr(self.config.vision_config, "head_dim", head_dim)
+ setattr(self.config.vision_config, "num_dummy_heads", num_dummy_heads)
+
+ def _pad_vit_attn_dummy_heads(self, name: str, loaded_weight: torch.Tensor):
+ """pad attn qkv weights for dummy heads"""
+ num_dummy_heads = self.config.vision_config.num_dummy_heads
+ if num_dummy_heads == 0:
+ return loaded_weight
+ head_dim = self.config.vision_config.head_dim
+
+ if "attn.qkv_proj" in name:
+ wq, wk, wv = loaded_weight.chunk(3, dim=0)
+ if name.endswith(".weight"):
+ dummy_shape = [num_dummy_heads, head_dim, wq.shape[-1]]
+ elif name.endswith(".bias"):
+ dummy_shape = [num_dummy_heads, head_dim]
+ else:
+ raise RuntimeError(f"Unsupported weight with name={name}")
+ pad_func = lambda x: torch.cat(
+ [x.unflatten(0, (-1, head_dim)), x.new_zeros(dummy_shape)], dim=0
+ ).flatten(0, 1)
+ wq, wk, wv = pad_func(wq), pad_func(wk), pad_func(wv)
+ loaded_weight = torch.cat([wq, wk, wv], dim=0)
+ elif "attn.proj.weight" in name:
+ padded_weight = loaded_weight.new_zeros(
+ loaded_weight.shape[0], head_dim * num_dummy_heads
+ )
+ loaded_weight = torch.cat([loaded_weight, padded_weight], dim=-1)
+ elif "attn.q_norm.weight" in name or "attn.k_norm.weight" in name:
+ padded_weight = loaded_weight.new_zeros(head_dim * num_dummy_heads)
+ loaded_weight = torch.cat([loaded_weight, padded_weight], dim=0)
+ return loaded_weight
+
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
stacked_params_mapping = [
# (param_name, shard_name, shard_id)
@@ -583,6 +632,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
raise
weight_loader = getattr(param, "weight_loader", default_weight_loader)
+ if "visual" in name:
+ loaded_weight = vision_utils.pad_vit_attn_dummy_heads(
+ self.config, name, loaded_weight
+ )
weight_loader(param, loaded_weight)
diff --git a/python/sglang/srt/models/glm4v_moe.py b/python/sglang/srt/models/glm4v_moe.py
index 576cb349022..86cca4ab246 100644
--- a/python/sglang/srt/models/glm4v_moe.py
+++ b/python/sglang/srt/models/glm4v_moe.py
@@ -11,6 +11,7 @@
get_tensor_model_parallel_world_size,
)
from sglang.srt.hf_transformers_utils import get_processor
+from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
from sglang.srt.layers.pooler import Pooler, PoolingType
@@ -40,6 +41,7 @@ def __init__(
config.moe_layer_freq = 1
self.config = config
+ vision_utils.update_vit_attn_dummy_heads_config(self.config)
self.tp_size = get_tensor_model_parallel_world_size()
self.quant_config = quant_config
self.determine_num_fused_shared_experts("Glm4MoeForCausalLM")
@@ -385,6 +387,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal
weight_loader = getattr(
param, "weight_loader", default_weight_loader
)
+ if "visual" in name:
+ loaded_weight = vision_utils.pad_vit_attn_dummy_heads(
+ self.config, name, loaded_weight
+ )
weight_loader(param, loaded_weight)
diff --git a/python/sglang/srt/models/gpt_oss.py b/python/sglang/srt/models/gpt_oss.py
index 93c4bda4904..3a5aacaaf64 100644
--- a/python/sglang/srt/models/gpt_oss.py
+++ b/python/sglang/srt/models/gpt_oss.py
@@ -16,6 +16,7 @@
"""Inference-only GptOss model compatible with HuggingFace weights."""
import logging
+import math
from collections.abc import Iterable
from functools import partial
from typing import Any, Dict, List, Optional, Tuple, Union
@@ -57,7 +58,7 @@
from sglang.srt.layers.quantization.fp8_utils import dequant_mxfp4
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.rotary_embedding import get_rope
-from sglang.srt.layers.utils import PPMissingLayer, get_layer_id, is_sm100_supported
+from sglang.srt.layers.utils import PPMissingLayer, get_layer_id
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
@@ -70,6 +71,7 @@
add_prefix,
is_cuda,
is_flashinfer_available,
+ is_sm100_supported,
make_layers,
)
@@ -191,8 +193,9 @@ def forward_normal(
return ans
-def _enable_fused_set_kv_buffer():
- return _is_cuda
+def _enable_fused_set_kv_buffer(forward_batch: ForwardBatch):
+ """Enable fused set_kv_buffer only on CUDA with bfloat16 KV cache."""
+ return _is_cuda and forward_batch.token_to_kv_pool.dtype == torch.bfloat16
# TODO maybe move to a model-common utils
@@ -235,6 +238,7 @@ def __init__(
sliding_window_size: int = -1, # if -1, normal attention, else, window attention.
layer_type: str = "",
params_dtype: torch.dtype = torch.bfloat16,
+ config: GptOssConfig = None,
) -> None:
super().__init__()
self.hidden_size = hidden_size
@@ -313,6 +317,10 @@ def __init__(
self.scaling,
num_kv_heads=self.num_kv_heads,
layer_id=layer_id,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
prefix=add_prefix("attn", prefix),
sliding_window_size=(sliding_window_size if use_sliding_window else -1),
)
@@ -329,20 +337,29 @@ def forward_prepare(
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
- q, k = self.rotary_emb(
- positions,
- q,
- k,
- fused_set_kv_buffer_arg=(
- _create_fused_set_kv_buffer_arg(
- value=v,
- layer=self.attn,
- forward_batch=forward_batch,
- )
- if _enable_fused_set_kv_buffer()
- else None
- ),
- )
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(
+ positions,
+ q,
+ k,
+ fused_set_kv_buffer_arg=(
+ _create_fused_set_kv_buffer_arg(
+ value=v,
+ layer=self.attn,
+ forward_batch=forward_batch,
+ )
+ if (
+ _enable_fused_set_kv_buffer(forward_batch)
+ and (forward_batch.hip_metadata_cache_pool is None)
+ )
+ else None
+ ),
+ )
+
inner_state = q, k, v, forward_batch
return None, forward_batch, inner_state
@@ -353,7 +370,10 @@ def forward_core(self, intermediate_state):
attn_output = self.attn(
*inner_state,
sinks=self.sinks,
- save_kv_cache=not _enable_fused_set_kv_buffer(),
+ save_kv_cache=not (
+ _enable_fused_set_kv_buffer(forward_batch)
+ and (forward_batch.hip_metadata_cache_pool is None)
+ ),
)
output, _ = self.o_proj(attn_output)
return output
@@ -413,6 +433,7 @@ def __init__(
sliding_window_size=self.sliding_window_size,
layer_type=config.layer_types[layer_id],
params_dtype=config.torch_dtype,
+ config=config,
)
self.layer_id = layer_id
@@ -453,44 +474,11 @@ def __init__(
layer_scatter_modes=self.layer_scatter_modes,
input_layernorm=self.input_layernorm,
post_attention_layernorm=self.post_attention_layernorm,
+ is_last_layer=(
+ self.is_nextn or (self.layer_id == self.config.num_hidden_layers - 1)
+ ),
)
- self._fuse_allreduce_lookup_table = self._build_fuse_allreduce_lookup_table()
-
- def _should_fuse_mlp_allreduce_with_next_layer(self, forward_batch) -> bool:
- """Check if MLP allreduce can be fused with next layer's residual_rmsnorm"""
-
- batch_size = (
- forward_batch.input_ids.shape[0]
- if hasattr(forward_batch, "input_ids")
- else 0
- )
-
- if batch_size > 128:
- return False
-
- return self._fuse_allreduce_lookup_table.get(batch_size, False)
-
- def _build_fuse_allreduce_lookup_table(self):
- static_conditions_met = (
- self.layer_id != self.config.num_hidden_layers - 1
- and get_tensor_model_parallel_world_size() > 1
- and global_server_args_dict.get("enable_flashinfer_allreduce_fusion", False)
- and _is_sm100_supported
- and _is_flashinfer_available
- )
-
- if not static_conditions_met:
- return {}
-
- lookup_table = {}
- for batch_size in range(129): # 0 to 128
- is_last_layer = self.layer_id == self.config.num_hidden_layers - 1
- should_fuse = batch_size > 0 and batch_size <= 128 and not is_last_layer
- lookup_table[batch_size] = should_fuse
-
- return lookup_table
-
def forward(
self,
positions: torch.Tensor,
@@ -514,8 +502,9 @@ def forward(
)
should_allreduce_fusion = (
- self._should_fuse_mlp_allreduce_with_next_layer(forward_batch)
- and not self.is_nextn
+ self.layer_communicator.should_fuse_mlp_allreduce_with_next_layer(
+ forward_batch
+ )
)
hidden_states = self.mlp(hidden_states, forward_batch, should_allreduce_fusion)
@@ -595,14 +584,20 @@ def forward(
residual = pp_proxy_tensors["residual"]
aux_hidden_states = []
+
+ forward_batch.on_model_start()
for i in range(self.start_layer, self.end_layer):
with get_global_expert_distribution_recorder().with_current_layer(i):
if i in self.layers_to_capture:
aux_hidden_states.append(hidden_states + residual)
+ forward_batch.on_layer_start(i)
layer = self.layers[i]
hidden_states, residual = layer(
positions, hidden_states, forward_batch, residual
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
+
if not self.pp_group.is_last_rank:
return PPProxyTensors(
{
@@ -624,6 +619,7 @@ def forward(
class GptOssForCausalLM(nn.Module):
fall_back_to_pt_during_load = False
+ hip_attention_supported = True
def __init__(
self,
@@ -820,18 +816,27 @@ def _load_mxfp4_experts_weights(self, weights):
moe_ep_size = get_moe_expert_parallel_world_size()
intermediate_size = self.config.intermediate_size
+ assert (
+ intermediate_size % mxfp4_block == 0
+ ), f"{intermediate_size=} must be divisible by {mxfp4_block=}"
intermediate_size_block = intermediate_size // mxfp4_block
- per_rank_intermediate_size_block = intermediate_size_block // moe_tp_size
+
+ per_rank_intermediate_size_block = math.ceil(
+ intermediate_size_block / moe_tp_size
+ )
+
per_rank_intermediate_size = per_rank_intermediate_size_block * mxfp4_block
# Calculate common slicing bounds for current rank
assert self.config.num_local_experts % moe_ep_size == 0
moe_num_global_experts = self.config.num_local_experts
moe_num_local_experts = self.config.num_local_experts // moe_ep_size
+
moe_tp_rank_start = moe_tp_rank * per_rank_intermediate_size
moe_tp_rank_end = min(
(moe_tp_rank + 1) * per_rank_intermediate_size, intermediate_size
)
+
moe_ep_rank_start = moe_ep_rank * moe_num_local_experts
moe_ep_rank_end = (moe_ep_rank + 1) * moe_num_local_experts
@@ -1050,10 +1055,6 @@ def _load_normal_weights(
)
params_dict = dict(self.named_parameters())
- params_checker = {k: False for k, v in params_dict.items()}
-
- for other_loaded_param_name in other_loaded_param_names:
- params_checker[other_loaded_param_name] = True
for name, loaded_weight in weights:
loaded_weight = _WeightCreator.maybe_materialize(loaded_weight)
@@ -1090,7 +1091,6 @@ def _load_normal_weights(
param = params_dict[name]
weight_loader = param.weight_loader
weight_loader(param, loaded_weight, shard_id)
- params_checker[name] = True
break
else:
for mapping in expert_params_mapping:
@@ -1113,7 +1113,6 @@ def _load_normal_weights(
name,
shard_id=shard_id,
)
- params_checker[name] = True
break
else:
if name.endswith(".bias") and name not in params_dict:
@@ -1123,7 +1122,7 @@ def _load_normal_weights(
if name in params_dict.keys():
param = params_dict[name]
if "sinks" in name:
- start = tp_rank * param.numel()
+ start = get_attention_tp_rank() * param.numel()
param.data.copy_(
loaded_weight[start : start + param.numel()]
)
@@ -1132,17 +1131,9 @@ def _load_normal_weights(
param, "weight_loader", default_weight_loader
)
weight_loader(param, loaded_weight)
- params_checker[name] = True
else:
logger.warning(f"Parameter {name} not found in params_dict")
- not_loaded_params = [k for k, v in params_checker.items() if not v]
- if tp_rank == 0:
- if len(not_loaded_params) > 0:
- raise Exception(f"Not all parameters loaded: {not_loaded_params}")
- else:
- logging.info("All parameters loaded successfully.")
-
def get_embed_and_head(self):
return self.model.embed_tokens.weight, self.lm_head.weight
diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py
index 254d46d7bbc..a35420993d9 100644
--- a/python/sglang/srt/models/grok.py
+++ b/python/sglang/srt/models/grok.py
@@ -16,7 +16,6 @@
# https://github.com/vllm-project/vllm/blob/c7f2cf2b7f67bce5842fedfdba508440fe257375/vllm/model_executor/models/mixtral.py#L1
"""Inference-only Grok1 model."""
import functools
-import json
import logging
import math
import os
@@ -35,9 +34,16 @@
tensor_model_parallel_all_gather,
tensor_model_parallel_all_reduce,
)
-from sglang.srt.layers.elementwise import fused_dual_residual_rmsnorm, fused_rmsnorm
+from sglang.srt.layers.activation import GeluAndMul
+from sglang.srt.layers.elementwise import (
+ experts_combine_triton,
+ fused_dual_residual_rmsnorm,
+ fused_rmsnorm,
+ gelu_and_mul_triton,
+)
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
+ MergedColumnParallelLinear,
QKVParallelLinear,
ReplicatedLinear,
RowParallelLinear,
@@ -49,7 +55,12 @@
from sglang.srt.layers.moe.topk import TopK
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.rotary_embedding import get_rope
+from sglang.srt.layers.rotary_embedding import (
+ RotaryEmbedding,
+ _yarn_find_correction_range,
+ _yarn_get_mscale,
+ get_rope,
+)
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
@@ -58,13 +69,60 @@
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.loader import DefaultModelLoader
from sglang.srt.model_loader.weight_utils import default_weight_loader
-from sglang.srt.utils import dump_to_file
+from sglang.srt.utils import add_prefix, dispose_tensor, dump_to_file
logger = logging.getLogger(__name__)
+# Dump tensors for debugging
debug_tensor_dump_output_folder = None
+debug_tensor_dump_prefill_only = False
+# Skip all the other tensor dumps, only dump the target logits
+debug_tensor_dump_only_target_logprobs = False
debug_tensor_dump_inject = False
+debug_tensor_dump_layers = None
+debug_tensor_dump_test = False
+
+
+class Grok1MLP(nn.Module):
+ def __init__(
+ self,
+ hidden_size: int,
+ intermediate_size: int,
+ layer_id: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ reduce_results=True,
+ use_presharded_weights: bool = False,
+ split_gate_up: bool = False,
+ ) -> None:
+ super().__init__()
+
+ self.gate_up_proj = MergedColumnParallelLinear(
+ hidden_size,
+ [intermediate_size] * 2,
+ bias=False,
+ quant_config=quant_config,
+ prefix=add_prefix("gate_up_proj", prefix),
+ use_presharded_weights=use_presharded_weights,
+ )
+ self.down_proj = RowParallelLinear(
+ intermediate_size,
+ hidden_size,
+ bias=False,
+ quant_config=quant_config,
+ prefix=add_prefix("down_proj", prefix),
+ reduce_results=reduce_results,
+ use_presharded_weights=use_presharded_weights,
+ )
+ self.act_fn = GeluAndMul(approximate="tanh")
+ self.layer_id = layer_id
+
+ def forward(self, x):
+ gate_up, _ = self.gate_up_proj(x)
+ x, _ = gelu_and_mul_triton(gate_up)
+ x, _ = self.down_proj(x)
+ return x
class Grok1MoE(nn.Module):
@@ -87,10 +145,11 @@ def __init__(
params_dtype: Optional[torch.dtype] = None,
quant_config: Optional[QuantizationConfig] = None,
tp_size: Optional[int] = None,
- reduce_results=True,
+ reduce_results: bool = True,
use_presharded_weights: bool = False,
inplace: bool = True,
no_combine: bool = False,
+ prefix: str = "",
):
super().__init__()
self.hidden_size = hidden_size
@@ -145,6 +204,135 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
return self.experts(hidden_states, topk_output)
+def _yarn_linear_ramp_mask(
+ low: float, high: float, dim: int, dtype: torch.dtype
+) -> torch.Tensor:
+ if low == high:
+ low -= 0.001 # Prevent singularity
+
+ linear_func = (torch.arange(dim, dtype=dtype) - low) / (high - low)
+ ramp_func = torch.clamp(linear_func, 0, 1)
+ return ramp_func
+
+
+def get_rope_scaling(config):
+ rope_type = getattr(config, "rope_type", None)
+ if rope_type:
+ original_max_position_embeddings = getattr(
+ config, "original_max_position_embeddings", None
+ )
+ scaling_factor = getattr(config, "scaling_factor", None)
+ extrapolation_factor = getattr(config, "extrapolation_factor", 1.0)
+ attn_factor = getattr(config, "attn_factor", 1.0)
+ beta_fast = getattr(config, "beta_fast", 32)
+ beta_slow = getattr(config, "beta_slow", 1)
+ rope_scaling = {
+ "extra_method": rope_type,
+ "max_position_embeddings": original_max_position_embeddings,
+ "scaling_factor": scaling_factor,
+ "extrapolation_factor": extrapolation_factor,
+ "attn_factor": attn_factor,
+ "beta_fast": beta_fast,
+ "beta_slow": beta_slow,
+ "dtype": torch.float,
+ }
+ return rope_scaling
+ else:
+ return None
+
+
+class ScalingRotaryEmbedding(RotaryEmbedding):
+ """Scale the RotaryEmbedding in a way similar to YaRN method. https://arxiv.org/pdf/2309.00071."""
+
+ def __init__(
+ self,
+ head_size: int,
+ rotary_dim: int,
+ max_position_embeddings: int,
+ base: int,
+ is_neox_style: bool,
+ scaling_factor: float,
+ dtype: torch.dtype,
+ *,
+ extra_method: str = "yarn_log",
+ extrapolation_factor: float = 1,
+ attn_factor: float = 1,
+ beta_fast: int = 32,
+ beta_slow: int = 1,
+ ) -> None:
+ self.scaling_factor = scaling_factor
+ self.extra_method = extra_method
+ self.extrapolation_factor = extrapolation_factor
+ self.attn_factor = attn_factor
+ self.beta_fast = beta_fast
+ self.beta_slow = beta_slow
+ # Get n-d magnitude scaling corrected for interpolation
+ self.mscale = float(_yarn_get_mscale(self.scaling_factor) * attn_factor)
+ super().__init__(
+ head_size, rotary_dim, max_position_embeddings, base, is_neox_style, dtype
+ )
+
+ def _compute_inv_freq(self, scaling_factor: float) -> torch.Tensor:
+ pos_freqs = self.base ** (
+ torch.arange(0, self.rotary_dim, 2, dtype=torch.float) / self.rotary_dim
+ )
+ inv_freq_extrapolation = 1.0 / pos_freqs
+ inv_freq_interpolation = 1.0 / (scaling_factor * pos_freqs)
+
+ low, high = _yarn_find_correction_range(
+ self.beta_fast,
+ self.beta_slow,
+ self.rotary_dim,
+ self.base,
+ self.max_position_embeddings,
+ )
+ # Get n-d rotational scaling corrected for extrapolation
+ inv_freq_mask = (
+ 1
+ - _yarn_linear_ramp_mask(low, high, self.rotary_dim // 2, dtype=torch.float)
+ ) * self.extrapolation_factor
+ if self.extra_method in ["original"]:
+ inv_freq = inv_freq_extrapolation
+ elif self.extra_method in ["yarn", "yarn_linear"]:
+ inv_freq = (
+ inv_freq_interpolation * (1 - inv_freq_mask)
+ + inv_freq_extrapolation * inv_freq_mask
+ )
+ elif self.extra_method == "yarn_log":
+ inv_freq = torch.exp(
+ torch.log(inv_freq_extrapolation) * inv_freq_mask
+ + torch.log(inv_freq_interpolation) * (1.0 - inv_freq_mask)
+ )
+ elif self.extra_method == "theta_scale":
+ exponents = torch.arange(0, self.rotary_dim, 2, dtype=torch.float)
+ theta_scale_exponent = self.base ** (
+ math.log(
+ self.max_position_embeddings * self.scaling_factor / (2 * math.pi)
+ )
+ / math.log(self.max_position_embeddings / (2 * math.pi))
+ )
+ inv_freq = torch.tensor(
+ 1.0 / (theta_scale_exponent ** (exponents / self.rotary_dim)),
+ dtype=torch.float32,
+ )
+ else:
+ raise ValueError(f"Unknown extrapolation method: {self.extra_method}")
+ return inv_freq
+
+ def _compute_cos_sin_cache(self) -> torch.Tensor:
+ inv_freq = self._compute_inv_freq(self.scaling_factor)
+ t = torch.arange(
+ self.max_position_embeddings * self.scaling_factor, dtype=torch.float32
+ )
+ freqs = torch.einsum("i,j -> ij", t, inv_freq)
+ # cos = freqs.cos() * self.mscale
+ # sin = freqs.sin() * self.mscale
+ cos = freqs.cos()
+ sin = freqs.sin()
+ cache = torch.cat((cos, sin), dim=-1)
+ return cache
+
+
class Grok1Attention(nn.Module):
def __init__(
self,
@@ -157,7 +345,9 @@ def __init__(
rope_theta: float = 10000,
quant_config: Optional[QuantizationConfig] = None,
reduce_results: bool = True,
+ alt_stream: Optional[torch.cuda.Stream] = None,
load_presharded_attn: bool = False,
+ prefix: str = "",
) -> None:
super().__init__()
self.config = config
@@ -183,7 +373,9 @@ def __init__(
self.kv_size = self.num_kv_heads * self.head_dim
self.scaling = self.head_dim**-0.5
self.rope_theta = rope_theta
+ rope_scaling = get_rope_scaling(config)
self.load_presharded_attn = load_presharded_attn
+ self.alt_stream = alt_stream or torch.cuda.Stream()
self.qkv_proj = QKVParallelLinear(
hidden_size,
@@ -195,6 +387,7 @@ def __init__(
tp_rank=attn_tp_rank,
tp_size=attn_tp_size,
load_presharded_attn=self.load_presharded_attn,
+ prefix=add_prefix("qkv_proj", prefix),
)
self.o_proj = RowParallelLinear(
self.total_num_heads * self.head_dim,
@@ -205,6 +398,7 @@ def __init__(
tp_rank=attn_tp_rank,
tp_size=attn_tp_size,
use_presharded_weights=self.load_presharded_attn,
+ prefix=add_prefix("o_proj", prefix),
)
self.rotary_emb = get_rope(
self.head_dim,
@@ -214,7 +408,37 @@ def __init__(
is_neox_style=True,
)
+ self.rope_rotate_half_dims = getattr(config, "rope_rotate_half_dims", False)
+
+ if rope_scaling is not None:
+ self.rotary_emb = ScalingRotaryEmbedding(
+ self.head_dim,
+ rotary_dim=(
+ self.head_dim
+ if not self.rope_rotate_half_dims
+ else self.head_dim // 2
+ ),
+ base=int(self.rope_theta),
+ is_neox_style=True,
+ **rope_scaling,
+ )
+ pos_encoding_mode = "NONE"
+ else:
+ self.rotary_emb = get_rope(
+ self.head_dim,
+ rotary_dim=(
+ self.head_dim
+ if not self.rope_rotate_half_dims
+ else self.head_dim // 2
+ ),
+ max_position=max_position,
+ base=int(self.rope_theta),
+ is_neox_style=True,
+ )
+ pos_encoding_mode = "NONE"
+
logit_cap = max(getattr(config, "attn_logit_softcapping", 30.0), 0.0)
+ logit_capping_method = getattr(config, "attn_logit_softcapping_method", "tanh")
self.attn = RadixAttention(
self.num_heads,
@@ -224,7 +448,11 @@ def __init__(
layer_id=layer_id,
logit_cap=logit_cap,
quant_config=quant_config,
+ pos_encoding_mode=pos_encoding_mode,
+ logit_capping_method=logit_capping_method,
+ prefix=add_prefix("attn", prefix),
)
+ self.attn.xai_temperature_len = getattr(self.config, "attn_temperature_len", -1)
def forward(
self,
@@ -256,6 +484,8 @@ def forward(
)
qkv, _ = self.qkv_proj(hidden_states)
+ dispose_tensor(hidden_states)
+
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
q, k = self.rotary_emb(positions, q, k)
@@ -288,6 +518,7 @@ def forward(
)
attn_output = self.attn(q, k, v, forward_batch)
+ del q, k, v, qkv
if debug_tensor_dump_output_folder:
dump_to_file(
@@ -312,49 +543,89 @@ def __init__(
load_presharded_moe: bool = False,
load_presharded_attn: bool = False,
load_presharded_mlp: bool = False,
+ alt_stream: Optional[torch.cuda.Stream] = None,
+ skip_moe: bool = False,
+ prefix: str = "",
) -> None:
super().__init__()
self.num_experts = config.num_local_experts
self.hidden_size = config.hidden_size
+ self.residual_moe = getattr(config, "residual_moe", False)
self.layer_id = layer_id
+ self.alt_stream = alt_stream or torch.cuda.Stream()
rope_theta = getattr(config, "rope_theta", 10000)
self.self_attn = Grok1Attention(
config=config,
hidden_size=self.hidden_size,
num_heads=config.num_attention_heads,
- max_position=config.max_position_embeddings,
+ max_position=(
+ config.context_len
+ if hasattr(config, "context_len")
+ else config.max_position_embeddings
+ ),
num_kv_heads=config.num_key_value_heads,
layer_id=layer_id,
rope_theta=rope_theta,
quant_config=quant_config,
reduce_results=False,
+ alt_stream=self.alt_stream,
load_presharded_attn=load_presharded_attn,
+ prefix=add_prefix("attn", prefix),
)
- self.block_sparse_moe = Grok1MoE(
- config=config,
- layer_id=layer_id,
- num_experts=config.num_local_experts,
- top_k=config.num_experts_per_tok,
- hidden_size=config.hidden_size,
- intermediate_size=getattr(
- config,
- "moe_intermediate_size",
- getattr(config, "intermediate_size", None),
- ),
- quant_config=quant_config,
- reduce_results=True,
- use_presharded_weights=load_presharded_moe,
- inplace=True,
- no_combine=False, # just a suggestion to not combine topk
- )
+
+ split_gate_up = not getattr(config, "merge_gate_up", True)
+ if self.num_experts > 0:
+ self.block_sparse_moe = Grok1MoE(
+ config=config,
+ layer_id=layer_id,
+ num_experts=config.num_local_experts,
+ top_k=config.num_experts_per_tok,
+ hidden_size=config.hidden_size,
+ intermediate_size=getattr(
+ config,
+ "moe_intermediate_size",
+ getattr(config, "intermediate_size", None),
+ ),
+ quant_config=quant_config,
+ reduce_results=not self.residual_moe,
+ use_presharded_weights=load_presharded_moe,
+ inplace=False, # not self.residual_moe,
+ no_combine=False, # self.residual_moe, # just a suggestion to not combine topk
+ prefix=add_prefix("block_sparse_moe", prefix),
+ )
+ if self.residual_moe:
+ self.mlp = Grok1MLP(
+ hidden_size=config.hidden_size,
+ intermediate_size=config.intermediate_size,
+ quant_config=quant_config,
+ reduce_results=False,
+ use_presharded_weights=load_presharded_mlp,
+ layer_id=layer_id,
+ split_gate_up=split_gate_up,
+ )
+ else:
+ raise NotImplementedError()
self.pre_attn_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.post_attn_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.pre_moe_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.post_moe_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
- self.ffn = self.block_sparse_moe
+ if self.num_experts > 0:
+ if self.residual_moe:
+ # NOTE: self.block_sparse_moe modifies the input in-place,
+ # so we have to call it later. Be aware of any possible related errors.
+ if get_tensor_model_parallel_world_size() > 1:
+ self.ffn = lambda x: tensor_model_parallel_all_reduce(
+ self.moe_with_rmoe(x)
+ )
+ else:
+ self.ffn = self.moe_with_rmoe
+ else:
+ self.ffn = self.block_sparse_moe
+ else:
+ raise NotImplementedError()
def forward(
self,
@@ -364,6 +635,10 @@ def forward(
residual: Optional[torch.Tensor] = None,
deferred_norm: Optional[RMSNorm] = None,
) -> Tuple[torch.Tensor, torch.Tensor, RMSNorm]:
+
+ hidden_states_original = hidden_states
+ residual_original = residual
+
# Self Attention
if deferred_norm is not None:
assert residual is not None
@@ -386,6 +661,14 @@ def forward(
hidden_states,
)
+ if residual_original is not None:
+ dispose_tensor(residual_original)
+
+ dispose_flag = False
+ if residual is not hidden_states_original:
+ dispose_flag = True
+ dispose_tensor(hidden_states_original)
+
hidden_states = self.self_attn(
positions=positions,
hidden_states=hidden_states,
@@ -403,10 +686,23 @@ def forward(
self.post_attn_norm.variance_epsilon,
)
+ if not dispose_flag:
+ dispose_tensor(hidden_states_original)
+
# Fully Connected
hidden_states = self.ffn(hidden_states)
return hidden_states, residual, self.post_moe_norm # defer layernorm
+ def moe_with_rmoe(self, x):
+ current_stream = torch.cuda.current_stream()
+ self.alt_stream.wait_stream(current_stream)
+ mlp_result = self.mlp(x)
+ with torch.cuda.stream(self.alt_stream):
+ # moe should not be inplace because of stream race condition
+ moe_result = self.block_sparse_moe(x)
+ current_stream.wait_stream(self.alt_stream)
+ return (mlp_result + moe_result) / 1.4142135623730951
+
class Grok1Model(nn.Module):
def __init__(
@@ -417,6 +713,8 @@ def __init__(
load_presharded_embedding: bool = False,
load_presharded_attn: bool = False,
load_presharded_mlp: bool = False,
+ replicate_embedding: bool = False,
+ prefix: str = "",
) -> None:
super().__init__()
self.config = config
@@ -427,7 +725,11 @@ def __init__(
config.vocab_size,
config.hidden_size,
use_presharded_weights=load_presharded_embedding,
+ enable_tp=not replicate_embedding,
+ prefix=add_prefix("embed_tokens", prefix),
)
+
+ self.alt_stream = torch.cuda.Stream()
self.layers = nn.ModuleList(
[
Grok1DecoderLayer(
@@ -437,6 +739,7 @@ def __init__(
load_presharded_moe=load_presharded_moe,
load_presharded_attn=load_presharded_attn,
load_presharded_mlp=load_presharded_mlp,
+ alt_stream=self.alt_stream,
)
for i in range(config.num_hidden_layers)
]
@@ -506,6 +809,7 @@ def __init__(
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
) -> None:
super().__init__()
self.config = config
@@ -514,7 +818,8 @@ def __init__(
# Get presharded weights.
self.load_presharded_mlp = getattr(config, "load_presharded_mlp", False)
self.load_presharded_moe = (
- self.config.num_local_experts > 0
+ getattr(config, "load_presharded_moe", True)
+ and self.config.num_local_experts > 0
and get_tensor_model_parallel_world_size() > 1
)
self.load_presharded_attn = getattr(config, "load_presharded_attn", False)
@@ -529,14 +834,16 @@ def __init__(
or self.load_presharded_embedding
)
- if self.is_weights_presharded:
- setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights)
-
default_replicate_lm_head = False
self.replicate_lm_head = getattr(
config, "replicate_lm_head", default_replicate_lm_head
)
+ if self.is_weights_presharded:
+ setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights)
+
+ self.replicate_embedding = getattr(config, "replicate_embedding", False)
+
self.model = Grok1Model(
config,
quant_config=quant_config,
@@ -544,6 +851,8 @@ def __init__(
load_presharded_embedding=self.load_presharded_embedding,
load_presharded_attn=self.load_presharded_attn,
load_presharded_mlp=self.load_presharded_mlp,
+ replicate_embedding=self.replicate_embedding,
+ prefix=add_prefix("model", prefix),
)
lm_head_params_dtype = None
@@ -553,6 +862,7 @@ def __init__(
config.vocab_size,
bias=False,
params_dtype=lm_head_params_dtype,
+ prefix=add_prefix("lm_head", prefix),
)
self.logits_processor = LogitsProcessor(config, skip_all_gather=True)
else:
@@ -561,6 +871,7 @@ def __init__(
config.hidden_size,
use_presharded_weights=self.load_presharded_embedding,
params_dtype=lm_head_params_dtype,
+ prefix=add_prefix("lm_head", prefix),
)
self.logits_processor = LogitsProcessor(config)
@@ -577,6 +888,7 @@ def __init__(
f"#parameters (analytical): {self.get_num_params_analytical() / 1e9:.2f} B, "
f"#parameters (actual): {self.get_num_params_torch() / 1e9:.2f} B"
)
+ self.loaded_param_names = set()
def forward(
self,
@@ -596,11 +908,13 @@ def forward(
def load_weights(
self,
weights: Iterable[Tuple[str, torch.Tensor]],
- num_experts: Optional[int] = None,
ignore_parent_name: bool = False,
+ check_hit_names: bool = True,
+ model_config: PretrainedConfig | None = None,
) -> dict[str, torch.Tensor]:
- if num_experts is None:
- num_experts = self.config.num_local_experts
+ if model_config is None:
+ model_config = self.config
+
stacked_params_mapping = []
stacked_params_mapping += [
# (param_name, shard_name, shard_id)
@@ -616,6 +930,7 @@ def load_weights(
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
+ num_experts = model_config.num_local_experts
expert_params_mapping = FusedMoE.make_expert_params_mapping(
ckpt_gate_proj_name="w1",
ckpt_down_proj_name="w2",
@@ -630,23 +945,26 @@ def load_weights(
def load_weight_wrapper(
name: str, loaded_weight: torch.Tensor, *args, **kwargs
):
- if ignore_parent_name:
- name = name.split(".")[-1]
-
- if name not in params_dict:
- return
-
# Fuse constant multipliers into the weights
if "lm_head" in name:
loaded_weight = (
loaded_weight.to(torch.float32)
- * self.config.output_multiplier_scale
+ * model_config.output_multiplier_scale
)
+ original_name = name
+ if ignore_parent_name:
+ name = name.split(".")[-1]
+
+ if name not in params_dict:
+ logger.info(f"Skipping {name=} in load_weights_wrapper")
+ return
+
param = params_dict[name]
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, loaded_weight, *args, **kwargs)
hit_names.add(name)
+ self.loaded_param_names.add(original_name)
for name, loaded_weight in weights:
if "rotary_emb.inv_freq" in name:
@@ -685,19 +1003,22 @@ def load_weight_wrapper(
load_weight_wrapper(name=name, loaded_weight=loaded_weight)
- if len(hit_names) > 5:
- missing = all_names - hit_names
- missing_exclude_scales = {x for x in missing if "scale" not in x}
- logger.info(
- f"#all_names: {len(all_names)}, #hit_names: {len(hit_names)}, #missing_exclude_scales: {len(missing_exclude_scales)}",
- )
- if len(missing_exclude_scales) > 0:
- raise ValueError(
- f"load_weights failed because some weights are missing: {missing_exclude_scales=}."
+ if check_hit_names:
+ if len(hit_names) > 5:
+ missing = all_names - hit_names
+ missing_exclude_scales = {x for x in missing if "scale" not in x}
+ logger.info(
+ f"#all_names: {len(all_names)}, #hit_names: {len(hit_names)}, #missing_exclude_scales: {len(missing_exclude_scales)}",
)
+ if len(missing_exclude_scales) > 0:
+ raise ValueError(
+ f"load_weights failed because some weights are missing: {missing_exclude_scales=}."
+ )
- elif len(hit_names) == 0:
- raise ValueError("load_weights failed because it did not hit any names.")
+ elif len(hit_names) == 0:
+ raise ValueError(
+ f"load_weights failed because it did not hit any names. {all_names=} {hit_names=}"
+ )
return hit_names
@@ -708,7 +1029,11 @@ def get_num_params_analytical(self):
"moe_intermediate_size",
getattr(cfg, "intermediate_size", None),
)
- num_experts = cfg.num_local_experts
+ residual_moe = getattr(cfg, "residual_moe", False)
+ if cfg.num_local_experts > 0:
+ num_experts = cfg.num_local_experts + (1 if residual_moe else 0)
+ else:
+ num_experts = 1
wq = (
cfg.num_hidden_layers
diff --git a/python/sglang/srt/models/interns1.py b/python/sglang/srt/models/interns1.py
index d72deca41e5..c7383ed2583 100644
--- a/python/sglang/srt/models/interns1.py
+++ b/python/sglang/srt/models/interns1.py
@@ -4,7 +4,7 @@
from torch import nn
from transformers import PretrainedConfig
-from sglang.srt.distributed import parallel_state
+from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -21,6 +21,7 @@
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.internvl import InternVisionModel
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
+from sglang.srt.models.qwen3 import Qwen3ForCausalLM
from sglang.srt.models.qwen3_moe import Qwen3MoeForCausalLM
from sglang.utils import logger
@@ -35,7 +36,7 @@ def __init__(
super().__init__()
self.config = config
self.quant_config = quant_config
- self._update_hf_config()
+ vision_utils.update_vit_attn_dummy_heads_config(self.config)
image_size = (
getattr(config, "force_image_size", None) or config.vision_config.image_size
)
@@ -70,6 +71,10 @@ def __init__(
self.language_model = Qwen3MoeForCausalLM(
config=config.text_config, quant_config=quant_config
)
+ elif config.text_config.architectures[0] == "Qwen3ForCausalLM":
+ self.language_model = Qwen3ForCausalLM(
+ config=config.text_config, quant_config=quant_config
+ )
else:
raise NotImplementedError(
f"{config.text_config.architectures[0]} is not implemented."
@@ -87,21 +92,6 @@ def __init__(
nn.Linear(llm_hidden_size, llm_hidden_size),
)
- def _update_hf_config(self):
- """update hf config to support tp"""
- world_size = parallel_state.get_tensor_model_parallel_world_size()
- num_heads = self.config.vision_config.num_attention_heads
- head_dim = self.config.vision_config.hidden_size // num_heads
- num_dummy_heads = 0
-
- if num_heads % world_size != 0:
- num_dummy_heads = (
- (num_heads + world_size) // world_size
- ) * world_size - num_heads
-
- setattr(self.config.vision_config, "head_dim", head_dim)
- setattr(self.config.vision_config, "num_dummy_heads", num_dummy_heads)
-
def pixel_shuffle(self, x, scale_factor=0.5):
n, w, h, c = x.size()
# N, W, H, C --> N, W, H * scale, C // scale
@@ -184,34 +174,6 @@ def pad_input_ids(self, input_ids: List[int], mm_inputs: MultimodalInputs):
return helper.pad_input_tokens(input_ids, mm_inputs)
- def _pad_vit_attn_dummy_heads(self, name: str, loaded_weight: torch.Tensor):
- """pad attn qkv weights for dummy heads"""
- num_dummy_heads = self.config.vision_config.num_dummy_heads
- if num_dummy_heads == 0:
- return loaded_weight
- head_dim = self.config.vision_config.head_dim
-
- if any([_ in name for _ in ["attn.q_proj", "attn.k_proj", "attn.v_proj"]]):
- if name.endswith(".weight"):
- dummy_shape = [num_dummy_heads, head_dim, loaded_weight.shape[-1]]
- elif name.endswith(".bias"):
- dummy_shape = [num_dummy_heads, head_dim]
- else:
- raise RuntimeError(f"Unsupported weight with name={name}")
- padded_weight = loaded_weight.new_zeros(dummy_shape)
- loaded_weight = torch.cat(
- [loaded_weight.unflatten(0, (-1, head_dim)), padded_weight], dim=0
- ).flatten(0, 1)
- if "attn.proj.weight" in name:
- padded_weight = loaded_weight.new_zeros(
- loaded_weight.shape[0], head_dim * num_dummy_heads
- )
- loaded_weight = torch.cat([loaded_weight, padded_weight], dim=-1)
- if "attn.q_norm.weight" in name or "attn.k_norm.weight" in name:
- padded_weight = loaded_weight.new_zeros(head_dim * num_dummy_heads)
- loaded_weight = torch.cat([loaded_weight, padded_weight], dim=0)
- return loaded_weight
-
def _mapping_interns1_name(self, name):
names_map = {
"lm_head.weight": "language_model.lm_head.weight",
@@ -270,7 +232,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
continue
name = self._mapping_interns1_name(name)
if "vision_model" in name:
- loaded_weight = self._pad_vit_attn_dummy_heads(name, loaded_weight)
+ loaded_weight = vision_utils.pad_vit_attn_dummy_heads(
+ self.config, name, loaded_weight
+ )
for param_name, weight_name, shard_id in stacked_params_mapping:
if weight_name not in name:
diff --git a/python/sglang/srt/models/internvl.py b/python/sglang/srt/models/internvl.py
index 94470cc0ad7..b146da0e5d0 100644
--- a/python/sglang/srt/models/internvl.py
+++ b/python/sglang/srt/models/internvl.py
@@ -10,7 +10,7 @@
from transformers.activations import ACT2FN
from transformers.modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
-from sglang.srt.distributed import parallel_state
+from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.attention.vision import SingletonCache, VisionAttention
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -26,8 +26,10 @@
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.deepseek_janus_pro import DropPath
+from sglang.srt.models.gpt_oss import GptOssForCausalLM
from sglang.srt.models.internlm2 import InternLM2ForCausalLM
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
+from sglang.srt.models.qwen3 import Qwen3ForCausalLM
from sglang.srt.models.qwen3_moe import Qwen3MoeForCausalLM
from sglang.utils import logger
@@ -412,7 +414,7 @@ def __init__(
super().__init__()
self.config = config
self.quant_config = quant_config
- self._update_vision_config()
+ vision_utils.update_vit_attn_dummy_heads_config(self.config)
image_size = config.force_image_size or config.vision_config.image_size
patch_size = config.vision_config.patch_size
self.patch_size = patch_size
@@ -445,6 +447,14 @@ def __init__(
self.language_model = Qwen3MoeForCausalLM(
config=config.llm_config, quant_config=quant_config
)
+ elif config.llm_config.architectures[0] == "GptOssForCausalLM":
+ self.language_model = GptOssForCausalLM(
+ config=config.llm_config, quant_config=quant_config
+ )
+ elif config.llm_config.architectures[0] == "Qwen3ForCausalLM":
+ self.language_model = Qwen3ForCausalLM(
+ config=config.llm_config, quant_config=quant_config
+ )
else:
raise NotImplementedError(
f"{config.llm_config.architectures[0]} is not implemented."
@@ -462,21 +472,6 @@ def __init__(
nn.Linear(llm_hidden_size, llm_hidden_size),
)
- def _update_vision_config(self):
- """update vision config to support tp"""
- world_size = parallel_state.get_tensor_model_parallel_world_size()
- num_heads = self.config.vision_config.num_attention_heads
- head_dim = self.config.vision_config.hidden_size // num_heads
- num_dummy_heads = 0
-
- if num_heads % world_size != 0:
- num_dummy_heads = (
- (num_heads + world_size) // world_size
- ) * world_size - num_heads
-
- setattr(self.config.vision_config, "head_dim", head_dim)
- setattr(self.config.vision_config, "num_dummy_heads", num_dummy_heads)
-
def pixel_shuffle(self, x, scale_factor=0.5):
n, w, h, c = x.size()
# N, W, H, C --> N, W, H * scale, C // scale
@@ -559,36 +554,6 @@ def pad_input_ids(self, input_ids: List[int], mm_inputs: MultimodalInputs):
return helper.pad_input_tokens(input_ids, mm_inputs)
- def _pad_vit_attn_dummy_heads(self, name: str, loaded_weight: torch.Tensor):
- """pad attn qkv weights for dummy heads"""
- num_dummy_heads = self.config.vision_config.num_dummy_heads
- if num_dummy_heads == 0:
- return loaded_weight
- head_dim = self.config.vision_config.head_dim
-
- if "attn.qkv_proj" in name:
- wq, wk, wv = loaded_weight.chunk(3, dim=0)
- if name.endswith(".weight"):
- dummy_shape = [num_dummy_heads, head_dim, wq.shape[-1]]
- elif name.endswith(".bias"):
- dummy_shape = [num_dummy_heads, head_dim]
- else:
- raise RuntimeError(f"Unsupported weight with name={name}")
- pad_func = lambda x: torch.cat(
- [x.unflatten(0, (-1, head_dim)), x.new_zeros(dummy_shape)], dim=0
- ).flatten(0, 1)
- wq, wk, wv = pad_func(wq), pad_func(wk), pad_func(wv)
- loaded_weight = torch.cat([wq, wk, wv], dim=0)
- if "attn.proj.weight" in name:
- padded_weight = loaded_weight.new_zeros(
- loaded_weight.shape[0], head_dim * num_dummy_heads
- )
- loaded_weight = torch.cat([loaded_weight, padded_weight], dim=-1)
- if "attn.q_norm.weight" in name or "attn.k_norm.weight" in name:
- padded_weight = loaded_weight.new_zeros(head_dim * num_dummy_heads)
- loaded_weight = torch.cat([loaded_weight, padded_weight], dim=0)
- return loaded_weight
-
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
expert_params_mapping = []
if "InternLM2ForCausalLM" in self.config.llm_config.architectures:
@@ -622,6 +587,15 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
ckpt_up_proj_name="up_proj",
num_experts=self.config.num_experts,
)
+ elif "Qwen3ForCausalLM" in self.config.llm_config.architectures:
+ stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ ("qkv_proj", "q_proj", "q"),
+ ("qkv_proj", "k_proj", "k"),
+ ("qkv_proj", "v_proj", "v"),
+ ("gate_up_proj", "gate_proj", 0),
+ ("gate_up_proj", "up_proj", 1),
+ ]
params_dict = dict(self.named_parameters())
loaded_params: Set[str] = set()
@@ -699,13 +673,22 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
param, "weight_loader", default_weight_loader
)
if "vision_model" in name:
- loaded_weight = self._pad_vit_attn_dummy_heads(
- name, loaded_weight
+ loaded_weight = vision_utils.pad_vit_attn_dummy_heads(
+ self.config, name, loaded_weight
)
weight_loader(param, loaded_weight)
loaded_params.add(name)
unloaded_params = params_dict.keys() - loaded_params
+ # Skip params that are created by quantization wrappers and are not expected in the ckpt
+ _quant_only_fragments = (
+ "weight_scale", # per-matrix FP8 scales (e.g., w2_weight_scale, w13_weight_scale)
+ )
+ unloaded_params = {
+ n
+ for n in unloaded_params
+ if not any(frag in n for frag in _quant_only_fragments)
+ }
if unloaded_params:
raise RuntimeError(
f"Some weights are not initialized from checkpoints: {unloaded_params}"
diff --git a/python/sglang/srt/models/llama.py b/python/sglang/srt/models/llama.py
index fc0ce930a69..53c991a2ff1 100644
--- a/python/sglang/srt/models/llama.py
+++ b/python/sglang/srt/models/llama.py
@@ -181,6 +181,10 @@ def __init__(
self.scaling,
num_kv_heads=self.num_kv_heads,
layer_id=layer_id,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
quant_config=quant_config,
prefix=add_prefix("attn", prefix),
)
@@ -193,7 +197,14 @@ def forward(
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
- q, k = self.rotary_emb(positions, q, k)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if not (
+ forward_batch.hip_metadata_cache_pool is not None
+ and forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(positions, q, k)
+
attn_output = self.attn(q, k, v, forward_batch)
output, _ = self.o_proj(attn_output)
return output
@@ -300,7 +311,7 @@ def __init__(
self.layers, self.start_layer, self.end_layer = make_layers(
config.num_hidden_layers,
lambda idx, prefix: LlamaDecoderLayer(
- config=config, quant_config=quant_config, layer_id=idx, prefix=prefix
+ config=config, layer_id=idx, quant_config=quant_config, prefix=prefix
),
pp_rank=self.pp_group.rank_in_group,
pp_size=self.pp_group.world_size,
@@ -335,9 +346,12 @@ def forward(
deferred_norm = None
aux_hidden_states = []
+
+ forward_batch.on_model_start()
for i in range(self.start_layer, self.end_layer):
if i in self.layers_to_capture:
aux_hidden_states.append(hidden_states + residual)
+ forward_batch.on_layer_start(i)
layer = self.layers[i]
hidden_states, residual = layer(
positions,
@@ -345,6 +359,8 @@ def forward(
forward_batch,
residual,
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
if not self.pp_group.is_last_rank:
return PPProxyTensors(
@@ -407,6 +423,7 @@ class LlamaForCausalLM(nn.Module):
".gate_proj": (".gate_up_proj", 0),
".up_proj": (".gate_up_proj", 1),
}
+ hip_attention_supported = True
def __init__(
self,
diff --git a/python/sglang/srt/models/llama4.py b/python/sglang/srt/models/llama4.py
index e05d96527d0..ffc61bb432d 100644
--- a/python/sglang/srt/models/llama4.py
+++ b/python/sglang/srt/models/llama4.py
@@ -302,6 +302,9 @@ def __init__(
layer_id=layer_id,
prefix=add_prefix("attn", prefix),
use_irope=self.use_rope,
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
)
def _get_attn_scale(self, positions: torch.Tensor) -> torch.Tensor:
@@ -423,6 +426,12 @@ def _is_moe_layer(self, layer_id: int) -> bool:
return self.config.num_local_experts > 0
return (layer_id + 1) % self.config.interleave_moe_layer_step == 0
+ def get_intermediate_size(self) -> int:
+ if isinstance(self.feed_forward, Llama4MoE):
+ return self.config.intermediate_size
+ else:
+ return self.config.intermediate_size_mlp
+
def forward(
self,
positions: torch.Tensor,
@@ -504,9 +513,12 @@ def forward(
hidden_states = input_embeds
residual = None
aux_hidden_states = []
+
+ forward_batch.on_model_start()
for i in range(len(self.layers)):
if i in self.layers_to_capture:
aux_hidden_states.append(hidden_states + residual)
+ forward_batch.on_layer_start(i)
layer = self.layers[i]
hidden_states, residual = layer(
positions,
@@ -514,6 +526,9 @@ def forward(
forward_batch,
residual,
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
+
if not forward_batch.forward_mode.is_idle():
hidden_states, _ = self.norm(hidden_states, residual)
@@ -528,6 +543,9 @@ class Llama4ForCausalLM(LlamaForCausalLM):
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
}
+ # TODO(ainl): prefetch 4 layers at the same time
+ # TODO(ainl): check sliding window fetch shape
+ hip_attention_supported = True
def __init__(
self,
@@ -540,6 +558,9 @@ def __init__(
def get_input_embeddings(self):
return self.model.embed_tokens
+ def get_layers(self):
+ return self.model.layers
+
def _init_model(
self,
config: Llama4TextConfig,
diff --git a/python/sglang/srt/models/llama_eagle3.py b/python/sglang/srt/models/llama_eagle3.py
index f8d7b608c37..87ae7ade5d5 100644
--- a/python/sglang/srt/models/llama_eagle3.py
+++ b/python/sglang/srt/models/llama_eagle3.py
@@ -109,6 +109,16 @@ def __init__(
) -> None:
super().__init__()
self.config = config
+
+ self.is_mrope_enabled = (
+ hasattr(config, "rope_scaling")
+ and config.rope_scaling is not None
+ and "mrope_section" in config.rope_scaling
+ )
+ # fix rope_scaling for qwen2.5-vl
+ if self.is_mrope_enabled:
+ config.rope_scaling["rope_type"] = "default"
+
self.vocab_size = config.vocab_size
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
@@ -144,6 +154,9 @@ def forward(
else:
embeds = input_embeds
+ if self.is_mrope_enabled:
+ positions = forward_batch.mrope_positions
+
hidden_states = forward_batch.spec_info.hidden_states
if hidden_states.shape[-1] != embeds.shape[-1]:
hidden_states = self.fc(hidden_states)
@@ -185,9 +198,13 @@ def __init__(
)
# Llama 3.2 1B Instruct set tie_word_embeddings to True
# Llama 3.1 8B Instruct set tie_word_embeddings to False
+ self.load_lm_head_from_target = False
if self.config.tie_word_embeddings:
self.lm_head = self.model.embed_tokens
else:
+ if config.draft_vocab_size is None:
+ self.load_lm_head_from_target = True
+ config.draft_vocab_size = config.vocab_size
self.lm_head = ParallelLMHead(
config.draft_vocab_size,
config.hidden_size,
diff --git a/python/sglang/srt/models/longcat_flash.py b/python/sglang/srt/models/longcat_flash.py
new file mode 100644
index 00000000000..9531cb83ef2
--- /dev/null
+++ b/python/sglang/srt/models/longcat_flash.py
@@ -0,0 +1,1026 @@
+# Apache License, Version 2.0:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# MIT License:
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import concurrent.futures
+import logging
+import os
+from enum import IntEnum, auto
+from typing import Any, Dict, Iterable, Optional, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from tqdm import tqdm
+
+from sglang.srt.configs import LongcatFlashConfig
+from sglang.srt.distributed import (
+ get_tensor_model_parallel_world_size,
+ tensor_model_parallel_all_reduce,
+)
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
+from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
+from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
+from sglang.srt.layers.activation import SiluAndMul
+from sglang.srt.layers.amx_utils import PackWeightMethod
+from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes
+from sglang.srt.layers.dp_attention import (
+ get_attention_tp_rank,
+ get_attention_tp_size,
+ is_dp_attention_enabled,
+)
+from sglang.srt.layers.layernorm import RMSNorm
+from sglang.srt.layers.linear import (
+ MergedColumnParallelLinear,
+ ReplicatedLinear,
+ RowParallelLinear,
+)
+from sglang.srt.layers.logits_processor import LogitsProcessor
+from sglang.srt.layers.moe.ep_moe.kernels import zero_experts_compute_triton
+from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, get_moe_impl_class
+from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
+from sglang.srt.layers.moe.topk import StandardTopKOutput, TopK
+from sglang.srt.layers.quantization import deep_gemm_wrapper
+from sglang.srt.layers.quantization.base_config import QuantizationConfig
+from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz
+from sglang.srt.layers.quantization.fp8_utils import (
+ block_quant_dequant,
+ block_quant_to_tensor_quant,
+ channel_quant_to_tensor_quant,
+ normalize_e4m3fn_to_e4m3fnuz,
+ requant_weight_ue8m0_inplace,
+)
+from sglang.srt.layers.quantization.int8_utils import (
+ block_dequant as int8_block_dequant,
+)
+from sglang.srt.layers.vocab_parallel_embedding import (
+ ParallelLMHead,
+ VocabParallelEmbedding,
+)
+from sglang.srt.managers.schedule_batch import global_server_args_dict
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+from sglang.srt.model_loader.weight_utils import default_weight_loader
+from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
+from sglang.srt.utils import (
+ BumpAllocator,
+ LazyValue,
+ add_prefix,
+ bind_or_assign,
+ cpu_has_amx_support,
+ get_bool_env_var,
+ get_device_sm,
+ get_int_env_var,
+ is_cpu,
+ is_cuda,
+ is_flashinfer_available,
+ is_hip,
+ is_non_idle_and_non_empty,
+ is_npu,
+ is_sm100_supported,
+)
+
+_is_hip = is_hip()
+_is_cuda = is_cuda()
+_is_npu = is_npu()
+_is_fp8_fnuz = is_fp8_fnuz()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
+_is_cpu_amx_available = cpu_has_amx_support()
+_is_cpu = is_cpu()
+_device_sm = get_device_sm()
+
+if _is_cuda:
+ from sgl_kernel import (
+ awq_dequantize,
+ bmm_fp8,
+ dsv3_fused_a_gemm,
+ dsv3_router_gemm,
+ merge_state_v2,
+ )
+elif _is_cpu and _is_cpu_amx_available:
+ pass
+elif _is_hip:
+ from sglang.srt.layers.quantization.awq_triton import (
+ awq_dequantize_triton as awq_dequantize,
+ )
+else:
+ from vllm._custom_ops import awq_dequantize
+
+logger = logging.getLogger(__name__)
+
+
+class LongcatFlashMLP(nn.Module):
+ def __init__(
+ self,
+ hidden_size: int,
+ intermediate_size: int,
+ hidden_act: str,
+ quant_config: Optional[QuantizationConfig] = None,
+ reduce_results: bool = False,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ self.gate_up_proj = MergedColumnParallelLinear(
+ hidden_size,
+ [intermediate_size] * 2,
+ bias=False,
+ quant_config=quant_config,
+ prefix=add_prefix("gate_up_proj", prefix),
+ )
+ self.down_proj = RowParallelLinear(
+ intermediate_size,
+ hidden_size,
+ bias=False,
+ quant_config=quant_config,
+ reduce_results=reduce_results,
+ prefix=add_prefix("down_proj", prefix),
+ )
+ if hidden_act != "silu":
+ raise ValueError(
+ f"Unsupported activation: {hidden_act}. "
+ "Only silu is supported for now."
+ )
+ self.act_fn = SiluAndMul()
+
+ def forward(
+ self,
+ x,
+ ):
+ gate_up, _ = self.gate_up_proj(x)
+ x = self.act_fn(gate_up)
+ x, _ = self.down_proj(x)
+ return x
+
+
+class LongcatFlashRouter(nn.Module):
+ def __init__(
+ self,
+ config,
+ zero_expert_num=0,
+ rounter_params_dtype=torch.float32,
+ prefix: str = "",
+ ):
+ super().__init__()
+ self.n_routed_experts = config.n_routed_experts
+ self.n_routed_experts = self.n_routed_experts + zero_expert_num
+ self.rounter_params_dtype = rounter_params_dtype
+ self.classifier = ReplicatedLinear(
+ config.hidden_size,
+ self.n_routed_experts,
+ bias=config.router_bias,
+ params_dtype=rounter_params_dtype,
+ quant_config=None,
+ prefix=add_prefix("classifier", prefix),
+ )
+ self.e_score_correction_bias = nn.Parameter(
+ torch.zeros((self.n_routed_experts), dtype=rounter_params_dtype)
+ )
+
+ def forward(self, hidden_states):
+ logits, _ = self.classifier(hidden_states.to(self.rounter_params_dtype))
+ return logits
+
+
+class LongcatFlashMoE(nn.Module):
+
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ layer_id: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ super().__init__()
+ self.config = config
+ self.layer_id = layer_id
+ self.routed_scaling_factor = config.routed_scaling_factor
+ self.num_experts = config.n_routed_experts
+ self.top_k = config.moe_topk
+ self.zero_expert_num = config.zero_expert_num
+ self.zero_expert_type = config.zero_expert_type
+
+ if config.rounter_params_dtype == "float32":
+ self.rounter_params_dtype = torch.float32
+ else:
+ self.rounter_params_dtype = torch.bfloat16
+
+ self.tp_size = get_tensor_model_parallel_world_size()
+
+ if self.tp_size > config.n_routed_experts:
+ raise ValueError(
+ f"Tensor parallel size {self.tp_size} is greater than "
+ f"the number of experts {config.n_routed_experts}."
+ )
+
+ if config.hidden_act != "silu":
+ raise ValueError(
+ f"Unsupported activation: {config.hidden_act}. "
+ "Only silu is supported for now."
+ )
+
+ self.router = LongcatFlashRouter(
+ config=self.config,
+ zero_expert_num=self.zero_expert_num,
+ rounter_params_dtype=self.rounter_params_dtype,
+ prefix=add_prefix("router", prefix),
+ )
+
+ self.topk = TopK(
+ top_k=self.top_k,
+ renormalize=False,
+ use_grouped_topk=False,
+ correction_bias=self.router.e_score_correction_bias.data,
+ )
+ self.topk.forward = self.topk.forward_native
+
+ self.experts = get_moe_impl_class()(
+ num_experts=self.num_experts,
+ top_k=self.top_k,
+ layer_id=self.layer_id,
+ hidden_size=config.hidden_size,
+ intermediate_size=config.moe_intermediate_size,
+ quant_config=quant_config,
+ prefix=add_prefix("experts", prefix),
+ )
+
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+ num_tokens, hidden_dim = hidden_states.shape
+ hidden_states = hidden_states.view(-1, hidden_dim)
+
+ # router_logits: (num_tokens, n_experts)
+ router_logits = self.router(hidden_states)
+ topk_weights, topk_idx, _ = self.topk(
+ hidden_states,
+ router_logits,
+ )
+ if self.zero_expert_type is not None:
+ zero_expert_result = zero_experts_compute_triton(
+ expert_indices=topk_idx,
+ expert_scales=topk_weights,
+ num_experts=self.num_experts,
+ zero_expert_type=self.zero_expert_type,
+ hidden_states=hidden_states,
+ )
+ topk_output = StandardTopKOutput(topk_weights, topk_idx, _)
+
+ final_hidden_states = self.experts(hidden_states, topk_output)
+ final_hidden_states *= self.routed_scaling_factor
+
+ if self.zero_expert_type is not None and hidden_states.shape[0] > 0:
+ final_hidden_states += zero_expert_result.to(final_hidden_states.device)
+
+ if self.tp_size > 1:
+ final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
+
+ return final_hidden_states.view(num_tokens, hidden_dim)
+
+ def get_moe_weights(self):
+ return [
+ x.data
+ for name, x in self.experts.named_parameters()
+ if name not in ["correction_bias"]
+ ]
+
+
+class LongcatFlashDecoderLayer(nn.Module):
+
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ layer_id: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ alt_stream: Optional[torch.cuda.Stream] = None,
+ ) -> None:
+ super().__init__()
+ self.config = config
+ self.hidden_size = config.hidden_size
+ self.layer_id = layer_id
+ self.alt_stream = alt_stream
+ self.self_attn = nn.ModuleList(
+ [
+ DeepseekV2AttentionMLA(
+ config=config,
+ hidden_size=config.hidden_size,
+ num_heads=config.num_attention_heads,
+ qk_nope_head_dim=config.qk_nope_head_dim,
+ qk_rope_head_dim=config.qk_rope_head_dim,
+ v_head_dim=config.v_head_dim,
+ q_lora_rank=config.q_lora_rank,
+ kv_lora_rank=config.kv_lora_rank,
+ rope_theta=config.rope_theta,
+ rope_scaling=None,
+ max_position_embeddings=config.max_position_embeddings,
+ quant_config=(
+ None
+ if "self_attn" in getattr(config, "disable_quant_module", [])
+ else quant_config
+ ),
+ layer_id=layer_id * 2 + i,
+ reduce_results=False,
+ prefix=add_prefix(f"self_attn.{i}", prefix),
+ alt_stream=self.alt_stream,
+ )
+ for i in range(2)
+ ]
+ )
+
+ self.input_layernorm = nn.ModuleList(
+ [RMSNorm(config.hidden_size, eps=config.rms_norm_eps) for i in range(2)]
+ )
+ self.post_attention_layernorm = nn.ModuleList(
+ [RMSNorm(config.hidden_size, eps=config.rms_norm_eps) for i in range(2)]
+ )
+
+ self.mlps = nn.ModuleList(
+ [
+ LongcatFlashMLP(
+ hidden_size=config.hidden_size,
+ intermediate_size=config.intermediate_size,
+ hidden_act=config.hidden_act,
+ quant_config=(
+ None
+ if "mlps" in getattr(config, "disable_quant_module", [])
+ else quant_config
+ ),
+ prefix=add_prefix(f"mlps.{i}", prefix),
+ )
+ for i in range(2)
+ ]
+ )
+
+ self.mlp = LongcatFlashMoE(
+ layer_id=self.layer_id,
+ config=config,
+ quant_config=quant_config,
+ prefix=add_prefix("mlp", prefix),
+ )
+
+ self.attn_tp_size = get_attention_tp_size()
+ self.attn_tp_rank = get_attention_tp_rank()
+
+ self.mlp_layer_scatter_modes = [
+ LayerScatterModes.init_new(
+ layer_id=self.layer_id * 2 + i,
+ num_layers=config.num_hidden_layers,
+ is_layer_sparse=False,
+ is_previous_layer_sparse=False,
+ )
+ for i in range(2)
+ ]
+ self.mlp_layer_communicator = [
+ LayerCommunicator(
+ layer_scatter_modes=self.mlp_layer_scatter_modes[i],
+ input_layernorm=self.input_layernorm[i],
+ post_attention_layernorm=self.post_attention_layernorm[i],
+ )
+ for i in range(2)
+ ]
+
+ self.moe_layer_scatter_modes = LayerScatterModes.init_new(
+ layer_id=self.layer_id,
+ num_layers=config.num_hidden_layers,
+ is_layer_sparse=True,
+ is_previous_layer_sparse=True,
+ )
+ self.moe_layer_communicator = LayerCommunicator(
+ layer_scatter_modes=self.moe_layer_scatter_modes,
+ input_layernorm=self.input_layernorm[0],
+ post_attention_layernorm=self.post_attention_layernorm[0],
+ )
+
+ def forward(
+ self,
+ positions: torch.Tensor,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ residual: Optional[torch.Tensor],
+ zero_allocator: BumpAllocator,
+ ) -> torch.Tensor:
+ # first_attn
+ hidden_states, residual = self.moe_layer_communicator.prepare_attn(
+ hidden_states, residual, forward_batch
+ )
+ if hidden_states.shape[0] != 0:
+ hidden_states = self.self_attn[0](
+ positions=positions,
+ hidden_states=hidden_states,
+ forward_batch=forward_batch,
+ zero_allocator=zero_allocator,
+ )
+
+ # moe
+ hidden_states, residual = self.moe_layer_communicator.prepare_mlp(
+ hidden_states, residual, forward_batch
+ )
+ moe_hidden_states = hidden_states.clone()
+ moe_residual = residual.clone()
+ moe_hidden_states = self.mlp(moe_hidden_states)
+ moe_hidden_states, moe_residual = self.moe_layer_communicator.postprocess_layer(
+ moe_hidden_states, moe_residual, forward_batch
+ )
+
+ hidden_states, residual = self.forward_mlp(
+ hidden_states, positions, residual, forward_batch, zero_allocator
+ )
+
+ hidden_states = moe_hidden_states + hidden_states
+ return hidden_states, residual
+
+ def forward_mlp(
+ self, hidden_states, positions, residual, forward_batch, zero_allocator
+ ):
+ # first_mlp
+ hidden_states = self.mlps[0](hidden_states)
+ # TP all_reduce
+ hidden_states = tensor_model_parallel_all_reduce(hidden_states)
+
+ # second_attn
+ hidden_states, residual = self.mlp_layer_communicator[1].prepare_attn(
+ hidden_states, residual, forward_batch
+ )
+ if hidden_states.shape[0] != 0:
+ hidden_states = self.self_attn[1](
+ positions=positions,
+ hidden_states=hidden_states,
+ forward_batch=forward_batch,
+ zero_allocator=zero_allocator,
+ )
+
+ # second_mlp
+ hidden_states, residual = self.mlp_layer_communicator[1].prepare_mlp(
+ hidden_states, residual, forward_batch
+ )
+ hidden_states = self.mlps[1](hidden_states)
+ # TP all_reduce
+ hidden_states = tensor_model_parallel_all_reduce(hidden_states)
+
+ hidden_states, residual = self.mlp_layer_communicator[1].postprocess_layer(
+ hidden_states, residual, forward_batch
+ )
+
+ return hidden_states, residual
+
+
+class LongcatFlashModel(nn.Module):
+ fall_back_to_pt_during_load = False
+
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ self.vocab_size = config.vocab_size
+
+ self.embed_tokens = VocabParallelEmbedding(
+ config.vocab_size,
+ config.hidden_size,
+ enable_tp=not is_dp_attention_enabled(),
+ )
+
+ self.alt_stream = torch.cuda.Stream()
+ self.layers = nn.ModuleList(
+ [
+ LongcatFlashDecoderLayer(
+ config,
+ layer_id,
+ quant_config=quant_config,
+ prefix=add_prefix(f"layers.{layer_id}", prefix),
+ alt_stream=self.alt_stream,
+ )
+ for layer_id in range(config.num_hidden_layers)
+ ]
+ )
+ self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ def get_input_embeddings(self) -> torch.Tensor:
+ return self.embed_tokens
+
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ input_embeds: torch.Tensor = None,
+ ) -> torch.Tensor:
+ total_num_layers = len(self.layers)
+ device = input_embeds.device if input_embeds is not None else input_ids.device
+ zero_allocator = BumpAllocator(
+ buffer_size=total_num_layers * 2 * (2 if forward_batch.can_run_tbo else 1),
+ dtype=torch.float32,
+ device=device,
+ )
+ if input_embeds is None:
+ hidden_states = self.embed_tokens(input_ids)
+ else:
+ hidden_states = input_embeds
+
+ residual = None
+
+ for i in range(total_num_layers):
+ with get_global_expert_distribution_recorder().with_current_layer(i):
+ layer = self.layers[i]
+ hidden_states, residual = layer(
+ positions, hidden_states, forward_batch, residual, zero_allocator
+ )
+
+ if hidden_states.shape[0] != 0:
+ if residual is None:
+ hidden_states = self.norm(hidden_states)
+ else:
+ hidden_states, _ = self.norm(hidden_states, residual)
+ return hidden_states
+
+
+class LongcatFlashForCausalLM(nn.Module):
+ # for quark model load
+ packed_modules_mapping = {}
+
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+
+ # for quark model load
+ # Fuse q_a_proj and kv_a_proj_with_mqa along output dimension when q_lora_rank is not None
+ self.fuse_qkv_a_proj = (
+ hasattr(config, "q_lora_rank") and config.q_lora_rank is not None
+ )
+ if self.fuse_qkv_a_proj:
+ self.packed_modules_mapping["fused_qkv_a_proj_with_mqa"] = [
+ "q_a_proj",
+ "kv_a_proj_with_mqa",
+ ]
+
+ self.config = config
+ self.tp_size = get_tensor_model_parallel_world_size()
+ self.quant_config = quant_config
+ self.model = LongcatFlashModel(
+ config, quant_config, prefix=add_prefix("model", prefix)
+ )
+ self.lm_head = ParallelLMHead(
+ config.vocab_size,
+ config.hidden_size,
+ quant_config=quant_config,
+ prefix=add_prefix("lm_head", prefix),
+ use_attn_tp_group=global_server_args_dict["enable_dp_lm_head"],
+ )
+ self.logits_processor = LogitsProcessor(config)
+
+ def get_input_embeddings(self) -> nn.Embedding:
+ return self.model.embed_tokens
+
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ input_embeds: torch.Tensor = None,
+ ) -> torch.Tensor:
+ hidden_states = self.model(input_ids, positions, forward_batch, input_embeds)
+
+ return self.logits_processor(
+ input_ids, hidden_states, self.lm_head, forward_batch
+ )
+
+ def post_load_weights(self, weight_names=None):
+
+ # Perform post-processing after loading weights
+ if weight_names is None:
+ layer_ids = range(self.config.num_hidden_layers)
+ else:
+ layer_ids = set()
+ for name in weight_names:
+ if "kv_b_proj" in name:
+ layer_id = int(name.split(".")[2])
+ if layer_id < self.config.num_hidden_layers:
+ layer_ids.add(layer_id)
+
+ for layer_id in layer_ids:
+ for i in range(2):
+ self_attn = self.model.layers[layer_id].self_attn[i]
+ if hasattr(self_attn.kv_b_proj, "qweight"):
+ # AWQ compatible
+ if _is_cuda or _is_hip:
+ w = awq_dequantize(
+ self_attn.kv_b_proj.qweight,
+ self_attn.kv_b_proj.scales,
+ self_attn.kv_b_proj.qzeros,
+ ).T
+ else:
+ w = awq_dequantize(
+ self_attn.kv_b_proj.qweight,
+ self_attn.kv_b_proj.scales,
+ self_attn.kv_b_proj.qzeros,
+ 0,
+ 0,
+ 0,
+ ).T
+ else:
+ w = self_attn.kv_b_proj.weight
+ use_deep_gemm_bmm = False
+
+ if w.dtype in (
+ torch.float8_e4m3fn,
+ torch.float8_e4m3fnuz,
+ ):
+ if (
+ hasattr(self.quant_config, "weight_block_size")
+ and self.quant_config.weight_block_size is not None
+ ):
+ weight_block_size = self.quant_config.weight_block_size
+ assert hasattr(self_attn.kv_b_proj, "weight_scale_inv")
+ if _is_fp8_fnuz:
+ weight, weight_scale, _ = normalize_e4m3fn_to_e4m3fnuz(
+ weight=w,
+ weight_scale=self_attn.kv_b_proj.weight_scale_inv,
+ input_scale=None,
+ )
+ else:
+ weight = w
+ weight_scale = self_attn.kv_b_proj.weight_scale_inv
+
+ if (
+ _is_cuda
+ and weight_block_size[0] == 128
+ and weight_block_size[1] == 128
+ ):
+ if (
+ deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
+ and not deep_gemm_wrapper.DEEPGEMM_BLACKWELL
+ and get_bool_env_var("SGL_USE_DEEPGEMM_BMM", "false")
+ ):
+ block_scale = weight_scale
+ use_deep_gemm_bmm = True
+ else:
+ w = block_quant_dequant(
+ weight,
+ weight_scale,
+ weight_block_size,
+ torch.bfloat16,
+ )
+ else:
+ w, scale = block_quant_to_tensor_quant(
+ weight, weight_scale, weight_block_size
+ )
+ self_attn.w_scale = scale
+ else:
+ if _is_fp8_fnuz:
+ weight, weight_scale, _ = normalize_e4m3fn_to_e4m3fnuz(
+ weight=w,
+ weight_scale=self_attn.kv_b_proj.weight_scale,
+ input_scale=None,
+ )
+ else:
+ weight = w
+ weight_scale = self_attn.kv_b_proj.weight_scale
+
+ w, scale = channel_quant_to_tensor_quant(weight, weight_scale)
+ self_attn.w_scale = scale
+
+ if w.dtype == torch.int8:
+ if hasattr(self.quant_config, "weight_block_size"):
+ # block-wise int8 need it
+ weight_block_size = self.quant_config.weight_block_size
+ if weight_block_size is not None:
+ assert hasattr(self_attn.kv_b_proj, "weight_scale_inv")
+ weight = w
+ weight_scale = self_attn.kv_b_proj.weight_scale_inv
+ w = int8_block_dequant(
+ weight, weight_scale, weight_block_size
+ ).to(torch.bfloat16)
+ else:
+ # channel-wise int8 need it
+ w = w.to(torch.bfloat16) * self_attn.kv_b_proj.weight_scale.to(
+ torch.bfloat16
+ )
+
+ w_kc, w_vc = w.unflatten(
+ 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
+ ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
+ if not use_deep_gemm_bmm:
+ self_attn.w_kc = bind_or_assign(
+ self_attn.w_kc,
+ w_kc.transpose(1, 2).contiguous().transpose(1, 2),
+ )
+ self_attn.w_vc = bind_or_assign(
+ self_attn.w_vc, w_vc.contiguous().transpose(1, 2)
+ )
+ if (
+ hasattr(self_attn.kv_b_proj, "weight_scale")
+ and self_attn.w_scale is None
+ ):
+ self_attn.w_scale = bind_or_assign(
+ self_attn.w_scale, self_attn.kv_b_proj.weight_scale
+ )
+ if _is_hip:
+ self_attn.w_scale *= 2.0
+ # TODO: remove this after adding FP8 support in bmm cpu kernel
+ if (
+ _is_cpu
+ and _is_cpu_amx_available
+ and w.dtype == torch.float8_e4m3fn
+ ):
+ self_attn.w_kc = (
+ self_attn.w_kc.to(torch.bfloat16) * self_attn.w_scale
+ )
+ self_attn.w_vc = (
+ self_attn.w_vc.to(torch.bfloat16) * self_attn.w_scale
+ )
+ else:
+ num_tiles_k = self_attn.qk_nope_head_dim // weight_block_size[1]
+ num_tiles_n = self_attn.v_head_dim // weight_block_size[0]
+ ws_kc, ws_vc = block_scale.unflatten(
+ 0, (-1, (num_tiles_k + num_tiles_n))
+ ).split([num_tiles_k, num_tiles_n], dim=1)
+ self_attn.w_scale_k = bind_or_assign(
+ self_attn.w_scale_k, ws_kc.transpose(1, 2).contiguous()
+ )
+ self_attn.w_scale_v = bind_or_assign(
+ self_attn.w_scale_v, ws_vc.contiguous()
+ )
+ self_attn.w_kc = bind_or_assign(
+ self_attn.w_kc, w_kc.transpose(1, 2).contiguous()
+ )
+ self_attn.w_vc = bind_or_assign(self_attn.w_vc, w_vc.contiguous())
+ self_attn.use_deep_gemm_bmm = True
+
+ if self.config.mla_scale_q_lora:
+ self_attn.q_a_layernorm.weight.data *= (
+ self.config.hidden_size / self.config.q_lora_rank
+ ) ** 0.5
+ if self.config.mla_scale_kv_lora:
+ self_attn.kv_a_layernorm.weight.data *= (
+ self.config.hidden_size / self.config.kv_lora_rank
+ ) ** 0.5
+
+ # TODO(linguoyuan) EPMoE not support DEEPGEMM_BLACKWELL, DeepEP needs to be supported in the future
+ deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0 = False
+
+ if (
+ deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
+ and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
+ and hasattr(self.quant_config, "weight_block_size")
+ and self.quant_config.weight_block_size is not None
+ ):
+ self._weight_requant_ue8m0()
+
+ def _weight_requant_ue8m0(self):
+ weight_block_size = self.quant_config.weight_block_size
+
+ for layer_id in range(self.config.num_hidden_layers):
+ layer = self.model.layers[layer_id]
+ for i in range(2):
+ self_attn = layer.self_attn[i]
+ module_list = [
+ self_attn.kv_b_proj,
+ self_attn.o_proj,
+ ]
+
+ if self.config.q_lora_rank is not None:
+ module_list.append(self_attn.fused_qkv_a_proj_with_mqa)
+ module_list.append(self_attn.q_b_proj)
+ else:
+ module_list.append(self_attn.kv_a_proj_with_mqa)
+ module_list.append(self_attn.q_proj)
+
+ for module in module_list:
+ if hasattr(module, "weight_scale_inv"):
+ requant_weight_ue8m0_inplace(
+ module.weight, module.weight_scale_inv, weight_block_size
+ )
+
+ mlp = layer.mlps[i]
+ assert isinstance(mlp, LongcatFlashMLP)
+ for module in [
+ mlp.gate_up_proj,
+ mlp.down_proj,
+ ]:
+ if hasattr(module, "weight_scale_inv"):
+ requant_weight_ue8m0_inplace(
+ module.weight, module.weight_scale_inv, weight_block_size
+ )
+
+ for layer_id in range(self.config.num_hidden_layers):
+ experts = layer.mlp.experts
+ if isinstance(experts, DeepEPMoE):
+ for w in [
+ experts.w13_weight_fp8,
+ experts.w2_weight_fp8,
+ ]:
+ requant_weight_ue8m0_inplace(w[0], w[1], weight_block_size)
+
+ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+
+ stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ ("gate_up_proj", "gate_proj", 0),
+ ("gate_up_proj", "up_proj", 1),
+ ]
+
+ # Params for weights, fp8 weight scales, fp8 activation scales
+ # (param_name, weight_name, expert_id, shard_id)
+ expert_params_mapping = get_moe_impl_class().make_expert_params_mapping(
+ ckpt_gate_proj_name="gate_proj",
+ ckpt_down_proj_name="down_proj",
+ ckpt_up_proj_name="up_proj",
+ num_experts=self.config.n_routed_experts,
+ )
+
+ # Fuse q_a_proj and kv_a_proj_with_mqa along output dimension when q_lora_rank is not None
+ fuse_qkv_a_proj = hasattr(self.config, "q_lora_rank") and (
+ self.config.q_lora_rank is not None
+ )
+ cached_a_proj = {} if fuse_qkv_a_proj else None
+
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+ futures = []
+ params_dict = dict(self.named_parameters())
+ weight_names = []
+ for name, loaded_weight in weights:
+ if "mtp" in name:
+ continue
+ weight_names.append(name)
+ if "rotary_emb.inv_freq" in name:
+ continue
+ for param_name, weight_name, shard_id in stacked_params_mapping:
+ # Skip non-stacked layers and experts (experts handled below).
+ if weight_name not in name:
+ continue
+ # We have mlp.experts[0].gate_proj in the checkpoint.
+ # Since we handle the experts below in expert_params_mapping,
+ # we need to skip here BEFORE we update the name, otherwise
+ # name will be updated to mlp.experts[0].gate_up_proj, which
+ # will then be updated below in expert_params_mapping
+ # for mlp.experts[0].gate_gate_up_proj, which breaks load.
+ if ("mlp.experts." in name) and name not in params_dict:
+ continue
+ name = name.replace(weight_name, param_name)
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ param = params_dict[name]
+ weight_loader = param.weight_loader
+ futures.append(
+ executor.submit(weight_loader, param, loaded_weight, shard_id)
+ )
+ break
+ else:
+ for mapping in expert_params_mapping:
+ param_name, weight_name, expert_id, shard_id = mapping
+ if weight_name not in name:
+ continue
+ name = name.replace(weight_name, param_name)
+ param = params_dict[name]
+ weight_loader = param.weight_loader
+ futures.append(
+ executor.submit(
+ weight_loader,
+ param,
+ loaded_weight,
+ name,
+ shard_id=shard_id,
+ expert_id=expert_id,
+ )
+ )
+ break
+ else:
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ if fuse_qkv_a_proj and (
+ "q_a_proj" in name or "kv_a_proj_with_mqa" in name
+ ):
+ cached_a_proj[name] = loaded_weight
+ q_a_proj_name = (
+ name
+ if "q_a_proj" in name
+ else name.replace("kv_a_proj_with_mqa", "q_a_proj")
+ )
+ kv_a_proj_name = (
+ name
+ if "kv_a_proj_with_mqa" in name
+ else name.replace("q_a_proj", "kv_a_proj_with_mqa")
+ )
+
+ # When both q_a_proj and kv_a_proj_with_mqa has been cached, load the fused weight to parameter
+ if (
+ q_a_proj_name in cached_a_proj
+ and kv_a_proj_name in cached_a_proj
+ ):
+ q_a_proj_weight = cached_a_proj[q_a_proj_name]
+ kv_a_proj_weight = cached_a_proj[kv_a_proj_name]
+ cat_dim = 0
+ if self.quant_config is not None and (
+ self.quant_config.get_name() == "awq"
+ or self.quant_config.get_name() == "awq_marlin"
+ or self.quant_config.get_name() == "moe_wna16"
+ ):
+ cat_dim = 1
+ fused_weight = torch.cat(
+ [q_a_proj_weight, kv_a_proj_weight], dim=cat_dim
+ )
+ param_name = (
+ name.replace(
+ "q_a_proj", "fused_qkv_a_proj_with_mqa"
+ )
+ if "q_a_proj" in name
+ else name.replace(
+ "kv_a_proj_with_mqa",
+ "fused_qkv_a_proj_with_mqa",
+ )
+ )
+ param = params_dict[param_name]
+
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ futures.append(
+ executor.submit(weight_loader, param, fused_weight)
+ )
+ cached_a_proj.pop(q_a_proj_name)
+ cached_a_proj.pop(kv_a_proj_name)
+ else:
+ if (
+ "k_scale" in name or "v_scale" in name
+ ) and name not in params_dict:
+ # modelopt attn kv scale is named differently
+ for scale in ["k_scale", "v_scale"]:
+ if scale in name:
+ name = name.replace(
+ f"{scale[0]}_proj", "attn_mqa"
+ )
+ break
+ if name not in params_dict:
+ # modelopt ckpt contains not needed weights for MTP module:
+ # model.decoder.self_attn.attn_mqa.v_scale and
+ # model.decoder.self_attn.attn_mqa.k_scale
+ logger.warning(f"{name} not found in params_dict.")
+ continue
+ param = params_dict[name]
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ futures.append(
+ executor.submit(weight_loader, param, loaded_weight)
+ )
+
+ # Wait for all tasks to complete and raise any exceptions.
+ for future in concurrent.futures.as_completed(futures):
+ future.result()
+
+ self.post_load_weights(weight_names=weight_names)
+
+ def get_embed_and_head(self):
+ return self.model.embed_tokens.weight, self.lm_head.weight
+
+ def set_embed_and_head(self, embed, head):
+ del self.model.embed_tokens.weight
+ del self.lm_head.weight
+ self.model.embed_tokens.weight = embed
+ self.lm_head.weight = head
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
+
+ @classmethod
+ def get_model_config_for_expert_location(cls, config):
+ return ModelConfigForExpertLocation(
+ num_layers=config.num_hidden_layers,
+ num_logical_experts=config.n_routed_experts,
+ )
+
+
+EntryClass = [LongcatFlashForCausalLM]
diff --git a/python/sglang/srt/models/longcat_flash_nextn.py b/python/sglang/srt/models/longcat_flash_nextn.py
new file mode 100644
index 00000000000..64a4265c582
--- /dev/null
+++ b/python/sglang/srt/models/longcat_flash_nextn.py
@@ -0,0 +1,699 @@
+# Apache License, Version 2.0:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# MIT License:
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import concurrent.futures
+import logging
+import os
+from enum import IntEnum, auto
+from typing import Any, Dict, Iterable, Optional, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from tqdm import tqdm
+
+from sglang.srt.configs import LongcatFlashConfig
+from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
+from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes
+from sglang.srt.layers.dp_attention import (
+ get_attention_tp_rank,
+ get_attention_tp_size,
+ is_dp_attention_enabled,
+)
+from sglang.srt.layers.layernorm import RMSNorm
+from sglang.srt.layers.linear import ReplicatedLinear
+from sglang.srt.layers.logits_processor import LogitsProcessor
+from sglang.srt.layers.quantization import deep_gemm_wrapper
+from sglang.srt.layers.quantization.base_config import QuantizationConfig
+from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz
+from sglang.srt.layers.quantization.fp8_utils import (
+ block_quant_dequant,
+ block_quant_to_tensor_quant,
+ channel_quant_to_tensor_quant,
+ normalize_e4m3fn_to_e4m3fnuz,
+ requant_weight_ue8m0_inplace,
+)
+from sglang.srt.layers.quantization.int8_utils import (
+ block_dequant as int8_block_dequant,
+)
+from sglang.srt.layers.vocab_parallel_embedding import (
+ ParallelLMHead,
+ VocabParallelEmbedding,
+)
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+from sglang.srt.model_loader.weight_utils import default_weight_loader
+from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
+from sglang.srt.models.longcat_flash import LongcatFlashForCausalLM, LongcatFlashMLP
+from sglang.srt.utils import (
+ BumpAllocator,
+ LazyValue,
+ add_prefix,
+ bind_or_assign,
+ cpu_has_amx_support,
+ get_bool_env_var,
+ get_device_sm,
+ is_cpu,
+ is_cuda,
+ is_hip,
+ is_npu,
+)
+
+_is_hip = is_hip()
+_is_cuda = is_cuda()
+_is_npu = is_npu()
+_is_fp8_fnuz = is_fp8_fnuz()
+_use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
+_is_cpu_amx_available = cpu_has_amx_support()
+_is_cpu = is_cpu()
+_device_sm = get_device_sm()
+
+if _is_cuda:
+ from sgl_kernel import (
+ awq_dequantize,
+ bmm_fp8,
+ dsv3_fused_a_gemm,
+ dsv3_router_gemm,
+ merge_state_v2,
+ )
+elif _is_cpu and _is_cpu_amx_available:
+ pass
+elif _is_hip:
+ from sglang.srt.layers.quantization.awq_triton import (
+ awq_dequantize_triton as awq_dequantize,
+ )
+else:
+ from vllm._custom_ops import awq_dequantize
+
+
+logger = logging.getLogger(__name__)
+
+
+class LongcatFlashDenseDecoderLayer(nn.Module):
+
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ layer_id: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ alt_stream: Optional[torch.cuda.Stream] = None,
+ ) -> None:
+ super().__init__()
+ self.config = config
+ self.hidden_size = config.hidden_size
+ self.layer_id = layer_id
+ self.alt_stream = alt_stream
+
+ self.self_attn = DeepseekV2AttentionMLA(
+ config=config,
+ hidden_size=config.hidden_size,
+ num_heads=config.num_attention_heads,
+ qk_nope_head_dim=config.qk_nope_head_dim,
+ qk_rope_head_dim=config.qk_rope_head_dim,
+ v_head_dim=config.v_head_dim,
+ q_lora_rank=config.q_lora_rank,
+ kv_lora_rank=config.kv_lora_rank,
+ rope_theta=config.rope_theta,
+ rope_scaling=None,
+ max_position_embeddings=config.max_position_embeddings,
+ quant_config=quant_config,
+ layer_id=layer_id,
+ reduce_results=False,
+ prefix=add_prefix(f"self_attn", prefix),
+ alt_stream=self.alt_stream,
+ )
+
+ self.mlp = LongcatFlashMLP(
+ hidden_size=config.hidden_size,
+ intermediate_size=config.intermediate_size,
+ hidden_act=config.hidden_act,
+ quant_config=quant_config,
+ prefix=add_prefix(f"mlps", prefix),
+ )
+ self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.post_attention_layernorm = RMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps
+ )
+
+ self.attn_tp_size = get_attention_tp_size()
+ self.attn_tp_rank = get_attention_tp_rank()
+ self.layer_scatter_modes = LayerScatterModes.init_new(
+ layer_id=self.layer_id,
+ num_layers=config.num_hidden_layers,
+ is_layer_sparse=False,
+ is_previous_layer_sparse=False,
+ )
+ self.layer_communicator = LayerCommunicator(
+ layer_scatter_modes=self.layer_scatter_modes,
+ input_layernorm=self.input_layernorm,
+ post_attention_layernorm=self.post_attention_layernorm,
+ )
+
+ def forward(
+ self,
+ positions: torch.Tensor,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ residual: Optional[torch.Tensor],
+ zero_allocator: BumpAllocator,
+ ) -> torch.Tensor:
+
+ hidden_states, residual = self.layer_communicator.prepare_attn(
+ hidden_states, residual, forward_batch
+ )
+ if hidden_states.shape[0] != 0:
+ hidden_states = self.self_attn(
+ positions=positions,
+ hidden_states=hidden_states,
+ forward_batch=forward_batch,
+ zero_allocator=zero_allocator,
+ )
+
+ hidden_states, residual = self.layer_communicator.prepare_mlp(
+ hidden_states, residual, forward_batch
+ )
+ hidden_states = self.mlp(hidden_states)
+ hidden_states, residual = self.layer_communicator.postprocess_layer(
+ hidden_states, residual, forward_batch
+ )
+ return hidden_states, residual
+
+
+class LongcatFlashModelNextN(nn.Module):
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ self.vocab_size = config.vocab_size
+ self.alt_stream = torch.cuda.Stream()
+
+ self.embed_tokens = VocabParallelEmbedding(
+ config.vocab_size,
+ config.hidden_size,
+ enable_tp=not is_dp_attention_enabled(),
+ prefix=add_prefix("embed_tokens", prefix),
+ )
+
+ self.enorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.hnorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ self.eh_proj = ReplicatedLinear(
+ 2 * config.hidden_size,
+ config.hidden_size,
+ bias=False,
+ quant_config=quant_config,
+ prefix=add_prefix("eh_proj", ""),
+ )
+ self.decoder = LongcatFlashDenseDecoderLayer(
+ config, 0, quant_config=quant_config, alt_stream=self.alt_stream
+ )
+
+ self.final_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ def get_input_embeddings(self) -> torch.Tensor:
+ return self.embed_tokens
+
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ input_embeds: torch.Tensor = None,
+ ) -> torch.Tensor:
+ total_num_layers = 1
+ device = input_embeds.device if input_embeds is not None else input_ids.device
+ zero_allocator = BumpAllocator(
+ buffer_size=total_num_layers * 2 * (2 if forward_batch.can_run_tbo else 1),
+ dtype=torch.float32,
+ device=device,
+ )
+ if input_embeds is None:
+ hidden_states = self.embed_tokens(input_ids)
+ else:
+ hidden_states = input_embeds
+
+ if hidden_states.shape[0] > 0:
+ hidden_states, _ = self.eh_proj(
+ torch.cat(
+ (
+ self.enorm(hidden_states),
+ self.hnorm(forward_batch.spec_info.hidden_states),
+ ),
+ dim=-1,
+ )
+ )
+
+ residual = None
+ with get_global_expert_distribution_recorder().disable_this_region():
+ hidden_states, residual = self.decoder(
+ positions, hidden_states, forward_batch, residual, zero_allocator
+ )
+
+ if not forward_batch.forward_mode.is_idle():
+ if residual is not None:
+ hidden_states, _ = self.final_layernorm(hidden_states, residual)
+ else:
+ hidden_states = self.final_layernorm(hidden_states)
+ return hidden_states
+
+
+class LongcatFlashForCausalLMNextN(LongcatFlashForCausalLM):
+
+ def __init__(
+ self,
+ config: LongcatFlashConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ ) -> None:
+ nn.Module.__init__(self)
+ self.config = config
+ self.quant_config = (
+ None
+ if "mtp" in getattr(config, "disable_quant_module", [])
+ else quant_config
+ )
+ self.model = LongcatFlashModelNextN(config, self.quant_config)
+ self.lm_head = ParallelLMHead(
+ config.vocab_size,
+ config.hidden_size,
+ quant_config=self.quant_config,
+ )
+ self.logits_processor = LogitsProcessor(config)
+
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ ) -> torch.Tensor:
+ hidden_states = self.model(input_ids, positions, forward_batch)
+ return self.logits_processor(
+ input_ids, hidden_states, self.lm_head, forward_batch
+ )
+
+ def post_load_weights(self):
+ self_attn = self.model.decoder.self_attn
+ if hasattr(self_attn.kv_b_proj, "qweight"):
+ # AWQ compatible
+ if _is_cuda or _is_hip:
+ w = awq_dequantize(
+ self_attn.kv_b_proj.qweight,
+ self_attn.kv_b_proj.scales,
+ self_attn.kv_b_proj.qzeros,
+ ).T
+ else:
+ w = awq_dequantize(
+ self_attn.kv_b_proj.qweight,
+ self_attn.kv_b_proj.scales,
+ self_attn.kv_b_proj.qzeros,
+ 0,
+ 0,
+ 0,
+ ).T
+ else:
+ w = self_attn.kv_b_proj.weight
+ use_deep_gemm_bmm = False
+ if w.dtype in (
+ torch.float8_e4m3fn,
+ torch.float8_e4m3fnuz,
+ ):
+ if (
+ hasattr(self.quant_config, "weight_block_size")
+ and self.quant_config.weight_block_size is not None
+ ):
+ weight_block_size = self.quant_config.weight_block_size
+ assert hasattr(self_attn.kv_b_proj, "weight_scale_inv")
+ if _is_fp8_fnuz:
+ weight, weight_scale, _ = normalize_e4m3fn_to_e4m3fnuz(
+ weight=w,
+ weight_scale=self_attn.kv_b_proj.weight_scale_inv,
+ input_scale=None,
+ )
+ else:
+ weight = w
+ weight_scale = self_attn.kv_b_proj.weight_scale_inv
+ if (
+ _is_cuda
+ and weight_block_size[0] == 128
+ and weight_block_size[1] == 128
+ ):
+ if (
+ deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
+ and not deep_gemm_wrapper.DEEPGEMM_BLACKWELL
+ and get_bool_env_var("SGL_USE_DEEPGEMM_BMM", "false")
+ ):
+ block_scale = weight_scale
+ use_deep_gemm_bmm = True
+ else:
+ w = block_quant_dequant(
+ weight,
+ weight_scale,
+ weight_block_size,
+ torch.bfloat16,
+ )
+ else:
+ w, scale = block_quant_to_tensor_quant(
+ weight, weight_scale, weight_block_size
+ )
+ self_attn.w_scale = scale
+ else:
+ if _is_fp8_fnuz:
+ weight, weight_scale, _ = normalize_e4m3fn_to_e4m3fnuz(
+ weight=w,
+ weight_scale=self_attn.kv_b_proj.weight_scale,
+ input_scale=None,
+ )
+ else:
+ weight = w
+ weight_scale = self_attn.kv_b_proj.weight_scale
+ w, scale = channel_quant_to_tensor_quant(weight, weight_scale)
+ self_attn.w_scale = scale
+ if w.dtype == torch.int8:
+ if hasattr(self.quant_config, "weight_block_size"):
+ # block-wise int8 need it
+ weight_block_size = self.quant_config.weight_block_size
+ if weight_block_size is not None:
+ assert hasattr(self_attn.kv_b_proj, "weight_scale_inv")
+ weight = w
+ weight_scale = self_attn.kv_b_proj.weight_scale_inv
+ w = int8_block_dequant(weight, weight_scale, weight_block_size).to(
+ torch.bfloat16
+ )
+ else:
+ # channel-wise int8 need it
+ w = w.to(torch.bfloat16) * self_attn.kv_b_proj.weight_scale.to(
+ torch.bfloat16
+ )
+ w_kc, w_vc = w.unflatten(
+ 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
+ ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
+ if not use_deep_gemm_bmm:
+ self_attn.w_kc = bind_or_assign(
+ self_attn.w_kc, w_kc.transpose(1, 2).contiguous().transpose(1, 2)
+ )
+ self_attn.w_vc = bind_or_assign(
+ self_attn.w_vc, w_vc.contiguous().transpose(1, 2)
+ )
+ if (
+ hasattr(self_attn.kv_b_proj, "weight_scale")
+ and self_attn.w_scale is None
+ ):
+ self_attn.w_scale = bind_or_assign(
+ self_attn.w_scale, self_attn.kv_b_proj.weight_scale
+ )
+ if _is_hip:
+ self_attn.w_scale *= 2.0
+ # TODO: remove this after adding FP8 support in bmm cpu kernel
+ if _is_cpu and _is_cpu_amx_available and w.dtype == torch.float8_e4m3fn:
+ self_attn.w_kc = self_attn.w_kc.to(torch.bfloat16) * self_attn.w_scale
+ self_attn.w_vc = self_attn.w_vc.to(torch.bfloat16) * self_attn.w_scale
+ else:
+ num_tiles_k = self_attn.qk_nope_head_dim // weight_block_size[1]
+ num_tiles_n = self_attn.v_head_dim // weight_block_size[0]
+ ws_kc, ws_vc = block_scale.unflatten(
+ 0, (-1, (num_tiles_k + num_tiles_n))
+ ).split([num_tiles_k, num_tiles_n], dim=1)
+ self_attn.w_scale_k = bind_or_assign(
+ self_attn.w_scale_k, ws_kc.transpose(1, 2).contiguous()
+ )
+ self_attn.w_scale_v = bind_or_assign(
+ self_attn.w_scale_v, ws_vc.contiguous()
+ )
+ self_attn.w_kc = bind_or_assign(
+ self_attn.w_kc, w_kc.transpose(1, 2).contiguous()
+ )
+ self_attn.w_vc = bind_or_assign(self_attn.w_vc, w_vc.contiguous())
+ self_attn.use_deep_gemm_bmm = True
+
+ if self.config.mla_scale_q_lora:
+ self_attn.q_a_layernorm.weight.data *= (
+ self.config.hidden_size / self.config.q_lora_rank
+ ) ** 0.5
+ if self.config.mla_scale_kv_lora:
+ self_attn.kv_a_layernorm.weight.data *= (
+ self.config.hidden_size / self.config.kv_lora_rank
+ ) ** 0.5
+
+ if (
+ deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
+ and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
+ and hasattr(self.quant_config, "weight_block_size")
+ and self.quant_config.weight_block_size is not None
+ ):
+ self._weight_requant_ue8m0()
+
+ def _weight_requant_ue8m0(self):
+ weight_block_size = self.quant_config.weight_block_size
+ layer = self.model.decoder
+ self_attn = layer.self_attn
+ module_list = [
+ self_attn.kv_b_proj,
+ self_attn.o_proj,
+ ]
+
+ if self.config.q_lora_rank is not None:
+ module_list.append(self_attn.fused_qkv_a_proj_with_mqa)
+ module_list.append(self_attn.q_b_proj)
+ else:
+ module_list.append(self_attn.kv_a_proj_with_mqa)
+ module_list.append(self_attn.q_proj)
+
+ for module in module_list:
+ if hasattr(module, "weight_scale_inv"):
+ requant_weight_ue8m0_inplace(
+ module.weight, module.weight_scale_inv, weight_block_size
+ )
+
+ mlp = layer.mlps
+ assert isinstance(mlp, LongcatFlashMLP)
+ for module in [
+ mlp.gate_up_proj,
+ mlp.down_proj,
+ ]:
+ if hasattr(module, "weight_scale_inv"):
+ requant_weight_ue8m0_inplace(
+ module.weight, module.weight_scale_inv, weight_block_size
+ )
+
+ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+ stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ ("gate_up_proj", "gate_proj", 0),
+ ("gate_up_proj", "up_proj", 1),
+ ]
+
+ # Fuse q_a_proj and kv_a_proj_with_mqa along output dimension when q_lora_rank is not None
+ fuse_qkv_a_proj = hasattr(self.config, "q_lora_rank") and (
+ self.config.q_lora_rank is not None
+ )
+ cached_a_proj = {} if fuse_qkv_a_proj else None
+
+ nextn_layer_prefix = "model.layers.0"
+ nextn_spec_weight_names = [
+ "shared_head.norm",
+ "eh_proj",
+ "enorm",
+ "hnorm",
+ "final_layernorm",
+ ]
+
+ weight_names_mapping = {
+ "model.mtp.embed_tokens.weight": "embed_tokens.weight",
+ "model.mtp.layers.0.eh_proj.weight": "eh_proj.weight",
+ "model.mtp.layers.0.eh_proj.weight_scale_inv": "eh_proj.weight_scale_inv",
+ "model.mtp.layers.0.enorm.m.weight": "enorm.weight",
+ "model.mtp.layers.0.hnorm.m.weight": "hnorm.weight",
+ "model.mtp.layers.0.input_layernorm.weight": "layers.0.input_layernorm.weight",
+ "model.mtp.layers.0.post_attention_layernorm.weight": "layers.0.post_attention_layernorm.weight",
+ "model.mtp.layers.0.self_attn.kv_a_layernorm.weight": "layers.0.self_attn.kv_a_layernorm.weight",
+ "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight": "layers.0.self_attn.kv_a_proj_with_mqa.weight",
+ "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv": "layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv",
+ "model.mtp.layers.0.self_attn.kv_b_proj.weight": "layers.0.self_attn.kv_b_proj.weight",
+ "model.mtp.layers.0.self_attn.kv_b_proj.weight_scale_inv": "layers.0.self_attn.kv_b_proj.weight_scale_inv",
+ "model.mtp.layers.0.self_attn.o_proj.weight": "layers.0.self_attn.o_proj.weight",
+ "model.mtp.layers.0.self_attn.o_proj.weight_scale_inv": "layers.0.self_attn.o_proj.weight_scale_inv",
+ "model.mtp.layers.0.self_attn.q_a_layernorm.weight": "layers.0.self_attn.q_a_layernorm.weight",
+ "model.mtp.layers.0.self_attn.q_a_proj.weight": "layers.0.self_attn.q_a_proj.weight",
+ "model.mtp.layers.0.self_attn.q_a_proj.weight_scale_inv": "layers.0.self_attn.q_a_proj.weight_scale_inv",
+ "model.mtp.layers.0.self_attn.q_b_proj.weight": "layers.0.self_attn.q_b_proj.weight",
+ "model.mtp.layers.0.self_attn.q_b_proj.weight_scale_inv": "layers.0.self_attn.q_b_proj.weight_scale_inv",
+ "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight": "layers.0.mlp.down_proj.weight",
+ "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight_scale_inv": "layers.0.mlp.down_proj.weight_scale_inv",
+ "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight": "layers.0.mlp.gate_proj.weight",
+ "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight_scale_inv": "layers.0.mlp.gate_proj.weight_scale_inv",
+ "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight": "layers.0.mlp.up_proj.weight",
+ "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight_scale_inv": "layers.0.mlp.up_proj.weight_scale_inv",
+ "model.mtp.norm.weight": "layers.0.final_layernorm.weight",
+ }
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+ futures = []
+ params_dict = dict(self.named_parameters())
+ weight_names = []
+ for name, loaded_weight in weights:
+ if ".mtp." not in name:
+ continue
+ if name in weight_names_mapping:
+ name = weight_names_mapping[name]
+ if name.startswith("layers.0"):
+ name = "model." + name
+ if (
+ name.startswith("enorm")
+ or name.startswith("hnorm")
+ or name.startswith("eh_proj")
+ ):
+ name = nextn_layer_prefix + "." + name
+ if not name.startswith(nextn_layer_prefix):
+ continue
+
+ # Use shared head and embed weights from target model
+ if "shared_head.head" in name or "embed_tokens" in name:
+ continue
+
+ is_decoder = True
+ # For nextn specific weights
+ for weight_name in nextn_spec_weight_names:
+ if weight_name in name:
+ name = name.replace(nextn_layer_prefix, "model")
+ is_decoder = False
+ break
+ # For decoder layer weights
+ if is_decoder:
+ name = name.replace(nextn_layer_prefix, "model.decoder")
+
+ weight_names.append(name)
+ if "rotary_emb.inv_freq" in name:
+ continue
+ for param_name, weight_name, shard_id in stacked_params_mapping:
+ # Skip non-stacked layers and experts (experts handled below).
+ if weight_name not in name:
+ continue
+ # We have mlp.experts[0].gate_proj in the checkpoint.
+ # Since we handle the experts below in expert_params_mapping,
+ # we need to skip here BEFORE we update the name, otherwise
+ # name will be updated to mlp.experts[0].gate_up_proj, which
+ # will then be updated below in expert_params_mapping
+ # for mlp.experts[0].gate_gate_up_proj, which breaks load.
+ if ("mlp.experts." in name) and name not in params_dict:
+ continue
+ name = name.replace(weight_name, param_name)
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ param = params_dict[name]
+ weight_loader = param.weight_loader
+ futures.append(
+ executor.submit(weight_loader, param, loaded_weight, shard_id)
+ )
+ break
+ else:
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ if fuse_qkv_a_proj and (
+ "q_a_proj" in name or "kv_a_proj_with_mqa" in name
+ ):
+ cached_a_proj[name] = loaded_weight
+ q_a_proj_name = (
+ name
+ if "q_a_proj" in name
+ else name.replace("kv_a_proj_with_mqa", "q_a_proj")
+ )
+ kv_a_proj_name = (
+ name
+ if "kv_a_proj_with_mqa" in name
+ else name.replace("q_a_proj", "kv_a_proj_with_mqa")
+ )
+
+ # When both q_a_proj and kv_a_proj_with_mqa has been cached, load the fused weight to parameter
+ if (
+ q_a_proj_name in cached_a_proj
+ and kv_a_proj_name in cached_a_proj
+ ):
+ q_a_proj_weight = cached_a_proj[q_a_proj_name]
+ kv_a_proj_weight = cached_a_proj[kv_a_proj_name]
+ cat_dim = 0
+ if self.quant_config is not None and (
+ self.quant_config.get_name() == "awq"
+ or self.quant_config.get_name() == "awq_marlin"
+ or self.quant_config.get_name() == "moe_wna16"
+ ):
+ cat_dim = 1
+ fused_weight = torch.cat(
+ [q_a_proj_weight, kv_a_proj_weight], dim=cat_dim
+ )
+ param_name = (
+ name.replace("q_a_proj", "fused_qkv_a_proj_with_mqa")
+ if "q_a_proj" in name
+ else name.replace(
+ "kv_a_proj_with_mqa",
+ "fused_qkv_a_proj_with_mqa",
+ )
+ )
+ param = params_dict[param_name]
+
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ futures.append(
+ executor.submit(weight_loader, param, fused_weight)
+ )
+ cached_a_proj.pop(q_a_proj_name)
+ cached_a_proj.pop(kv_a_proj_name)
+ else:
+ if (
+ "k_scale" in name or "v_scale" in name
+ ) and name not in params_dict:
+ # modelopt attn kv scale is named differently
+ for scale in ["k_scale", "v_scale"]:
+ if scale in name:
+ name = name.replace(f"{scale[0]}_proj", "attn_mqa")
+ break
+ if name not in params_dict:
+ # modelopt ckpt contains not needed weights for MTP module:
+ # model.decoder.self_attn.attn_mqa.v_scale and
+ # model.decoder.self_attn.attn_mqa.k_scale
+ logger.warning(f"{name} not found in params_dict.")
+ continue
+ param = params_dict[name]
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ futures.append(
+ executor.submit(weight_loader, param, loaded_weight)
+ )
+ self.post_load_weights()
+
+
+EntryClass = [LongcatFlashForCausalLMNextN]
diff --git a/python/sglang/srt/models/minicpmv.py b/python/sglang/srt/models/minicpmv.py
index 8166d1646ad..e621676fcd5 100644
--- a/python/sglang/srt/models/minicpmv.py
+++ b/python/sglang/srt/models/minicpmv.py
@@ -54,6 +54,7 @@
from sglang.srt.model_loader.utils import set_default_torch_dtype
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.idefics2 import Idefics2VisionTransformer
+from sglang.srt.models.llama import LlamaConfig, LlamaForCausalLM
from sglang.srt.models.qwen2 import Qwen2Config, Qwen2ForCausalLM
from sglang.srt.utils import add_prefix, flatten_nested_list
@@ -581,7 +582,7 @@ def forward(
def init_llm(
self,
- config: Qwen2Config,
+ config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
) -> nn.Module:
@@ -774,7 +775,168 @@ def pad_input_ids(self, input_ids: List[int], image_inputs: MultimodalInputs):
return pattern.pad_input_tokens(input_ids, image_inputs)
-_SUPPORT_VERSION = {(2, 6): MiniCPMV2_6}
+class MiniCPMV4_0(MiniCPMBaseModel):
+ packed_modules_mapping = {
+ "qkv_proj": [
+ "q_proj",
+ "k_proj",
+ "v_proj",
+ ],
+ "gate_up_proj": [
+ "gate_proj",
+ "up_proj",
+ ],
+ }
+ # LoRA specific attributes
+ supported_lora_modules = [
+ # vision encoder
+ "fc1",
+ "fc2",
+ "out_proj",
+ # language model
+ "qkv_proj", # same name with vision encoder
+ "o_proj",
+ "gate_up_proj",
+ "down_proj",
+ # resampler
+ "kv_proj",
+ ]
+
+ # BitandBytes specific attributes
+ bitsandbytes_stacked_params_mapping = {
+ # shard_name, weight_name, index
+ "q_proj": ("qkv_proj", 0),
+ "k_proj": ("qkv_proj", 1),
+ "v_proj": ("qkv_proj", 2),
+ "gate_proj": ("gate_up_proj", 0),
+ "up_proj": ("gate_up_proj", 1),
+ }
+
+ embedding_modules = {}
+ embedding_padding_modules = []
+
+ def __init__(
+ self,
+ config: PretrainedConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ super().__init__(config=config, quant_config=quant_config, prefix=prefix)
+ assert self.version == (4, 0)
+
+ def init_llm(
+ self,
+ config: LlamaConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> nn.Module:
+ return LlamaForCausalLM(config=config, quant_config=quant_config, prefix=prefix)
+
+ def init_vision_module(
+ self,
+ config: PretrainedConfig,
+ quant_config: Optional[QuantizationConfig],
+ prefix: str = "",
+ ) -> nn.Module:
+ model = Idefics2VisionTransformer(
+ config=config.vision_config, quant_config=quant_config, prefix=prefix
+ )
+ if self.config.drop_vision_last_layer:
+ model.encoder.layers = model.encoder.layers[:-1]
+
+ setattr(model, "embed_dim", model.embeddings.embed_dim)
+ setattr(model, "patch_size", model.embeddings.patch_size)
+ return model
+
+ def init_resampler(
+ self,
+ embed_dim: int,
+ vision_dim: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> nn.Module:
+ with set_default_torch_dtype(torch.float16):
+ # The resampler in 2.6 remains consistent with the one in 2.5.
+ resampler = Resampler2_5(
+ num_queries=self.config.query_num,
+ embed_dim=embed_dim,
+ num_heads=embed_dim // 128,
+ kv_dim=vision_dim,
+ quant_config=quant_config,
+ prefix=prefix,
+ )
+
+ return resampler.to(device="cuda", dtype=torch.get_default_dtype())
+
+ def get_vision_embedding(
+ self,
+ pixel_values: List[torch.Tensor],
+ patch_attn_mask: Optional[torch.Tensor] = None,
+ tgt_sizes: Optional[torch.Tensor] = None,
+ ) -> torch.Tensor:
+ vision_embedding = self.vpm(
+ pixel_values,
+ patch_attention_mask=patch_attn_mask,
+ tgt_sizes=tgt_sizes,
+ )
+ return vision_embedding
+
+ def get_image_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
+ # list of tensors
+ pixel_values = flatten_nested_list([item.feature for item in items])
+ tgt_sizes = torch.stack(
+ flatten_nested_list([item.tgt_size for item in items]), dim=0
+ )
+ assert len(pixel_values) == tgt_sizes.shape[0]
+
+ device = self.vpm.embeddings.position_embedding.weight.device
+ dtype = self.vpm.embeddings.position_embedding.weight.dtype
+ all_pixel_values_lst = [
+ i.flatten(end_dim=1).permute(1, 0) for i in pixel_values
+ ]
+
+ max_patches = (tgt_sizes[:, 0] * tgt_sizes[:, 1]).max().item()
+ assert isinstance(max_patches, int)
+ all_pixel_values = torch.nn.utils.rnn.pad_sequence(
+ all_pixel_values_lst, batch_first=True, padding_value=0.0
+ )
+
+ B, L, _ = all_pixel_values.shape
+ all_pixel_values = all_pixel_values.permute(0, 2, 1).reshape(B, 3, -1, L)
+ patch_attn_mask = torch.zeros(
+ (B, 1, max_patches), dtype=torch.bool, device=device
+ )
+
+ tgt_sizes_tensor = tgt_sizes.clone().to(device=patch_attn_mask.device)
+ mask_shapes = tgt_sizes_tensor[:, 0] * tgt_sizes_tensor[:, 1]
+ patch_attn_mask[:, 0, :] = torch.arange(
+ patch_attn_mask.size(2), device=patch_attn_mask.device
+ ).unsqueeze(0) < mask_shapes.unsqueeze(1)
+
+ vision_embedding = self.vpm(
+ all_pixel_values.type(dtype),
+ patch_attention_mask=patch_attn_mask,
+ tgt_sizes=tgt_sizes,
+ )
+ return self.resampler(vision_embedding, tgt_sizes)
+
+ def pad_input_ids(self, input_ids: List[int], image_inputs: MultimodalInputs):
+ # Get all special token IDs
+ im_start_id: int = image_inputs.im_start_id
+ im_end_id: int = image_inputs.im_end_id
+ slice_start_id: int = image_inputs.slice_start_id
+ slice_end_id: int = image_inputs.slice_end_id
+
+ media_token_pairs = [(im_start_id, im_end_id), (slice_start_id, slice_end_id)]
+ pattern = MultiModalityDataPaddingPatternTokenPairs(media_token_pairs)
+
+ return pattern.pad_input_tokens(input_ids, image_inputs)
+
+
+_SUPPORT_VERSION = {
+ (2, 6): MiniCPMV2_6,
+ (4, 0): MiniCPMV4_0,
+}
class MiniCPMV:
@@ -809,7 +971,7 @@ def __init__(
# Dispatch class based on version
instance_class = _SUPPORT_VERSION.get(version)
if instance_class is None:
- raise ValueError("Currently, MiniCPMV only supports versions 2.6")
+ raise ValueError("Currently, MiniCPMV only supports versions 2.6 and 4.0")
try:
minicpmv = instance_class(
diff --git a/python/sglang/srt/models/mllama.py b/python/sglang/srt/models/mllama.py
index 3ba736c7a94..fa294ddcd0c 100644
--- a/python/sglang/srt/models/mllama.py
+++ b/python/sglang/srt/models/mllama.py
@@ -966,7 +966,7 @@ def forward(
positions: torch.Tensor,
forward_batch: ForwardBatch,
) -> Union[Tuple, CausalLMOutputWithPast]:
- from sglang.srt.model_executor.graph_runner import get_is_capture_mode
+ from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
batched_images, batched_ar_ids, batched_ar_mask, encoder_lens_need = (
self._batch_image_inputs(forward_batch)
diff --git a/python/sglang/srt/models/mllama4.py b/python/sglang/srt/models/mllama4.py
index b57d637f052..5f2847bddda 100644
--- a/python/sglang/srt/models/mllama4.py
+++ b/python/sglang/srt/models/mllama4.py
@@ -421,6 +421,7 @@ class Llama4ForConditionalGeneration(nn.Module):
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
}
+ hip_attention_supported = True
def __init__(
self,
@@ -559,10 +560,9 @@ def forward(
input_ids=input_ids,
forward_batch=forward_batch,
language_model=self.language_model,
- data_embedding_funcs={
- Modality.IMAGE: self.get_image_feature,
- },
+ multimodal_model=self,
positions=positions,
+ data_embedding_funcs=self.get_image_feature,
)
return hs
@@ -961,5 +961,30 @@ def get_embed(self):
def set_embed(self, embed):
return self.language_model.set_embed(embed)
+ def get_hidden_dim(self, module_name, layer_idx):
+ # return input_dim, output_dim
+ if module_name == "qkv_proj":
+ return (
+ self.config.hidden_size,
+ self.config.head_dim
+ * (
+ self.config.num_attention_heads
+ + self.config.num_key_value_heads * 2
+ ),
+ )
+ elif module_name == "o_proj":
+ return (
+ self.config.head_dim * self.config.num_attention_heads,
+ self.config.hidden_size,
+ )
+ elif module_name == "gate_up_proj":
+ return self.config.hidden_size, self.config.intermediate_size * 2
+ elif module_name == "down_proj":
+ decoder_layer = self.language_model.get_layers()[layer_idx]
+ intermediate_size = decoder_layer.get_intermediate_size()
+ return intermediate_size, self.config.hidden_size
+ else:
+ raise NotImplementedError()
+
EntryClass = Llama4ForConditionalGeneration
diff --git a/python/sglang/srt/models/nemotron_nas.py b/python/sglang/srt/models/nemotron_nas.py
new file mode 100644
index 00000000000..ebf49f95a4a
--- /dev/null
+++ b/python/sglang/srt/models/nemotron_nas.py
@@ -0,0 +1,435 @@
+# Copyright 2023-2025 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/nemotron_nas.py
+
+"""Inference-only deci model compatible with HuggingFace weights."""
+from typing import Iterable, Optional, Tuple, Type, Union
+
+import torch
+from torch import nn
+from transformers import LlamaConfig
+
+from sglang.srt.distributed import get_pp_group
+from sglang.srt.layers.layernorm import RMSNorm
+from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput
+from sglang.srt.layers.pooler import Pooler, PoolingType
+from sglang.srt.layers.quantization import QuantizationConfig
+from sglang.srt.layers.utils import PPMissingLayer
+from sglang.srt.layers.vocab_parallel_embedding import (
+ DEFAULT_VOCAB_PADDING_SIZE,
+ ParallelLMHead,
+ VocabParallelEmbedding,
+)
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
+from sglang.srt.model_loader.weight_utils import (
+ default_weight_loader,
+ maybe_remap_kv_scale_name,
+)
+from sglang.srt.models.llama import LlamaAttention, LlamaMLP
+from sglang.srt.utils import add_prefix, make_layers
+from sglang.utils import logger
+
+
+def _ffn_mult_to_intermediate_size(ffn_mult: float, n_embd: int) -> int:
+ # DeciLM-specific code
+ intermediate_size = int(2 * ffn_mult * n_embd / 3)
+ return _find_multiple(intermediate_size, 256)
+
+
+def _find_multiple(n: int, k: int) -> int:
+ # DeciLM-specific code
+ if n % k == 0:
+ return n
+ return n + k - (n % k)
+
+
+class DeciLMDecoderLayer(nn.Module):
+
+ def __init__(
+ self,
+ config: LlamaConfig,
+ layer_idx: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ block_config = config.block_configs[layer_idx]
+ self._is_no_op_attention = block_config.attention.no_op
+ self._is_no_op_ffn = block_config.ffn.no_op
+
+ self.hidden_size = config.hidden_size
+ rope_theta = getattr(config, "rope_theta", 10000)
+ rope_scaling = getattr(config, "rope_scaling", None)
+ if rope_scaling is not None and getattr(
+ config, "original_max_position_embeddings", None
+ ):
+ rope_scaling["original_max_position_embeddings"] = (
+ config.original_max_position_embeddings
+ )
+ max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
+ # Support abacusai/Smaug-72B-v0.1 with attention_bias
+ # Support internlm/internlm-7b with bias
+ rope_is_neox_style = getattr(config, "rope_is_neox_style", True)
+ attention_bias = getattr(config, "attention_bias", False) or getattr(
+ config, "bias", False
+ )
+ # support internlm/internlm3-8b with qkv_bias
+ if hasattr(config, "qkv_bias"):
+ attention_bias = config.qkv_bias
+
+ if not self._is_no_op_attention:
+ num_kv_heads = (
+ config.num_attention_heads // block_config.attention.n_heads_in_group
+ )
+ self.self_attn = LlamaAttention(
+ config=config,
+ hidden_size=self.hidden_size,
+ num_heads=config.num_attention_heads,
+ num_kv_heads=num_kv_heads,
+ layer_id=layer_idx,
+ rope_theta=rope_theta,
+ rope_scaling=rope_scaling,
+ rope_is_neox_style=rope_is_neox_style,
+ max_position_embeddings=max_position_embeddings,
+ quant_config=quant_config,
+ prefix=add_prefix("self_attn", prefix),
+ bias=attention_bias,
+ )
+ self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ if not self._is_no_op_ffn:
+ ffn_mult = block_config.ffn.ffn_mult
+ intermediate_size = _ffn_mult_to_intermediate_size(
+ ffn_mult, config.hidden_size
+ )
+ self.mlp = LlamaMLP(
+ hidden_size=self.hidden_size,
+ intermediate_size=intermediate_size,
+ hidden_act=config.hidden_act,
+ quant_config=quant_config,
+ prefix=add_prefix("mlp", prefix),
+ )
+ self.post_attention_layernorm = RMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps
+ )
+
+ def forward(
+ self,
+ positions: torch.Tensor,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ residual: Optional[torch.Tensor],
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
+ # Self Attention
+
+ if self._is_no_op_attention:
+ pass
+ else:
+ if residual is None:
+ residual = hidden_states
+ hidden_states = self.input_layernorm(hidden_states)
+ else:
+ hidden_states, residual = self.input_layernorm(hidden_states, residual)
+ hidden_states = self.self_attn(
+ positions=positions,
+ hidden_states=hidden_states,
+ forward_batch=forward_batch,
+ )
+
+ # Fully Connected
+ if not self._is_no_op_ffn:
+ hidden_states, residual = self.post_attention_layernorm(
+ hidden_states, residual
+ )
+ hidden_states = self.mlp(hidden_states)
+ return hidden_states, residual
+
+
+class DeciModel(nn.Module):
+ def __init__(
+ self,
+ *,
+ config: LlamaConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ layer_type: Type[DeciLMDecoderLayer] = DeciLMDecoderLayer,
+ ):
+ super().__init__()
+
+ lora_config = None
+ self.config = config
+ self.quant_config = quant_config
+ self.padding_idx = config.pad_token_id
+ lora_vocab = (
+ (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1))
+ if lora_config
+ else 0
+ )
+ vocab_size = config.vocab_size + lora_vocab
+ if get_pp_group().is_first_rank:
+ self.embed_tokens = VocabParallelEmbedding(
+ vocab_size,
+ config.hidden_size,
+ org_num_embeddings=config.vocab_size,
+ quant_config=quant_config,
+ )
+ else:
+ self.embed_tokens = PPMissingLayer()
+
+ def get_layer(idx: int, prefix: str):
+ return layer_type(
+ config,
+ layer_idx=idx,
+ quant_config=quant_config,
+ prefix=prefix,
+ )
+
+ self.layers, self.start_layer, self.end_layer = make_layers(
+ config.num_hidden_layers,
+ get_layer,
+ pp_rank=get_pp_group().rank_in_group,
+ pp_size=get_pp_group().world_size,
+ prefix=add_prefix("layers", prefix),
+ )
+ if get_pp_group().is_last_rank:
+ self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ else:
+ self.norm = PPMissingLayer(return_tuple=True)
+
+ def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
+ return self.embed_tokens(input_ids)
+
+ def forward(
+ self,
+ input_ids: Optional[torch.Tensor],
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ ) -> Union[torch.Tensor, PPProxyTensors]:
+ if get_pp_group().is_first_rank:
+ if inputs_embeds is not None:
+ hidden_states = inputs_embeds
+ else:
+ hidden_states = self.get_input_embeddings(input_ids)
+ residual = None
+ else:
+ assert pp_proxy_tensors is not None
+ hidden_states = pp_proxy_tensors["hidden_states"]
+ residual = pp_proxy_tensors["residual"]
+
+ kv_cache_index = 0
+ for i in range(self.start_layer, self.end_layer):
+ layer = self.layers[i]
+ if not layer._is_no_op_attention:
+ hidden_states, residual = layer(
+ positions, hidden_states, forward_batch, residual
+ )
+ kv_cache_index += 1
+ else:
+ hidden_states, residual = layer(
+ positions, hidden_states, forward_batch, residual
+ )
+
+ if not get_pp_group().is_last_rank:
+ return PPProxyTensors(
+ {"hidden_states": hidden_states, "residual": residual}
+ )
+
+ hidden_states, _ = self.norm(hidden_states, residual)
+ return hidden_states
+
+
+class DeciLMForCausalLM(nn.Module):
+ packed_modules_mapping = {
+ "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+ "gate_up_proj": ["gate_proj", "up_proj"],
+ }
+
+ # LoRA specific attributes
+ supported_lora_modules = [
+ "qkv_proj",
+ "o_proj",
+ "gate_up_proj",
+ "down_proj",
+ "embed_tokens",
+ "lm_head",
+ ]
+ embedding_modules = {
+ "embed_tokens": "input_embeddings",
+ "lm_head": "output_embeddings",
+ }
+ embedding_padding_modules = ["lm_head"]
+
+ # Mistral/Llama models can also be loaded with --load-format mistral
+ # from consolidated.safetensors checkpoints
+ mistral_mapping = {
+ "layers": "model.layers",
+ "attention": "self_attn",
+ "wq": "q_proj",
+ "wk": "k_proj",
+ "wv": "v_proj",
+ "wo": "o_proj",
+ "attention_norm": "input_layernorm",
+ "feed_forward": "mlp",
+ "w1": "gate_proj",
+ "w2": "down_proj",
+ "w3": "up_proj",
+ "ffn_norm": "post_attention_layernorm",
+ "tok_embeddings": "model.embed_tokens",
+ "output": "lm_head",
+ "norm": "model.norm",
+ }
+
+ def __init__(
+ self,
+ *,
+ config: LlamaConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ super().__init__()
+ lora_config = None
+ self.config = config
+ self.lora_config = lora_config
+
+ self.model = self._init_model(
+ config=config, quant_config=quant_config, prefix=add_prefix("model", prefix)
+ )
+ if self.config.tie_word_embeddings:
+ self.lm_head = self.model.embed_tokens
+ else:
+ self.unpadded_vocab_size = config.vocab_size
+ if lora_config:
+ self.unpadded_vocab_size += lora_config.lora_extra_vocab_size
+ self.lm_head = ParallelLMHead(
+ self.unpadded_vocab_size,
+ config.hidden_size,
+ org_num_embeddings=config.vocab_size,
+ padding_size=(
+ DEFAULT_VOCAB_PADDING_SIZE
+ # We need bigger padding if using lora for kernel
+ # compatibility
+ if not lora_config
+ else lora_config.lora_vocab_padding_size
+ ),
+ quant_config=quant_config,
+ prefix=add_prefix("lm_head", prefix),
+ )
+ self.logits_processor = LogitsProcessor(config)
+ self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
+
+ def _init_model(
+ self,
+ config: LlamaConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ return DeciModel(config=config, quant_config=quant_config, prefix=prefix)
+
+ def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
+ return self.model.get_input_embeddings(input_ids)
+
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ get_embedding: bool = False,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ ) -> LogitsProcessorOutput:
+ hidden_states = self.model(
+ input_ids,
+ positions,
+ forward_batch,
+ inputs_embeds,
+ pp_proxy_tensors=pp_proxy_tensors,
+ )
+ if get_pp_group().is_last_rank:
+ if not get_embedding:
+ return self.logits_processor(
+ input_ids, hidden_states, self.lm_head, forward_batch
+ )
+ else:
+ return self.pooler(hidden_states, forward_batch)
+ else:
+ return hidden_states
+
+ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) -> None:
+ stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ (".qkv_proj", ".q_proj", "q"),
+ (".qkv_proj", ".k_proj", "k"),
+ (".qkv_proj", ".v_proj", "v"),
+ (".gate_up_proj", ".gate_proj", 0),
+ (".gate_up_proj", ".up_proj", 1),
+ ]
+
+ params_dict = dict(self.named_parameters())
+
+ for name, loaded_weight in weights:
+ if "rotary_emb.inv_freq" in name:
+ continue
+ if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name:
+ # Models trained using ColossalAI may include these tensors in
+ # the checkpoint. Skip them.
+ continue
+ if self.config.tie_word_embeddings and "lm_head.weight" in name:
+ continue
+ if self.model.quant_config is not None and (
+ scale_name := self.model.quant_config.get_cache_scale(name)
+ ):
+ # Loading kv cache quantization scales
+ param = params_dict[scale_name]
+ weight_loader = getattr(param, "weight_loader", default_weight_loader)
+ loaded_weight = (
+ loaded_weight if loaded_weight.dim() == 0 else loaded_weight[0]
+ )
+ weight_loader(param, loaded_weight)
+ continue
+ if "scale" in name:
+ name = maybe_remap_kv_scale_name(name, params_dict)
+ if name is None:
+ continue
+
+ for param_name, weight_name, shard_id in stacked_params_mapping:
+ if weight_name not in name:
+ continue
+ name = name.replace(weight_name, param_name)
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ if name not in params_dict:
+ continue
+ param = params_dict[name]
+ weight_loader = param.weight_loader
+ weight_loader(param, loaded_weight, shard_id)
+ break
+ else:
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ if name in params_dict.keys():
+ param = params_dict[name]
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ weight_loader(param, loaded_weight)
+ else:
+ logger.warning(f"Parameter {name} not found in params_dict")
+
+
+EntryClass = [DeciLMForCausalLM]
diff --git a/python/sglang/srt/models/opt.py b/python/sglang/srt/models/opt.py
new file mode 100644
index 00000000000..a571e8937be
--- /dev/null
+++ b/python/sglang/srt/models/opt.py
@@ -0,0 +1,637 @@
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Inference-only OPT model compatible with HuggingFace weights."""
+from collections.abc import Iterable
+from typing import Optional, Union
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from transformers import OPTConfig
+
+from sglang.srt.distributed import (
+ get_pp_group,
+ get_tensor_model_parallel_rank,
+ get_tensor_model_parallel_world_size,
+)
+from sglang.srt.layers.activation import get_act_fn
+from sglang.srt.layers.linear import (
+ ColumnParallelLinear,
+ MergedColumnParallelLinear,
+ QKVParallelLinear,
+ ReplicatedLinear,
+ RowParallelLinear,
+)
+from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput
+from sglang.srt.layers.pooler import Pooler, PoolingType
+from sglang.srt.layers.quantization.base_config import QuantizationConfig
+from sglang.srt.layers.radix_attention import RadixAttention
+from sglang.srt.layers.utils import PPMissingLayer, get_layer_id
+from sglang.srt.layers.vocab_parallel_embedding import (
+ ParallelLMHead,
+ VocabParallelEmbedding,
+)
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
+from sglang.srt.model_loader.weight_utils import (
+ default_weight_loader,
+ kv_cache_scales_loader,
+ maybe_remap_kv_scale_name,
+)
+from sglang.srt.utils import add_prefix, make_layers
+
+
+def get_activation(name="relu"):
+ """Select an activation function by name
+
+ Args:
+ name: str
+ activation function name,
+ one of ["relu", "gelu", "swish", "sigmoid"],
+ default "relu".
+ """
+ name = name.lower()
+ if name == "relu":
+ return nn.ReLU()
+ if name == "gelu":
+ return nn.GELU()
+ if name == "sigmoid":
+ return torch.nn.Sigmoid()
+ return nn.Identity()
+
+
+class OPTLearnedPositionalEmbedding(nn.Embedding):
+
+ def __init__(self, num_embeddings: int, embedding_dim: int):
+ # OPT is set up so that if padding_idx is specified then offset the
+ # embedding ids by 2 and adjust num_embeddings appropriately. Other
+ # models don't have this hack
+ self.offset = 2
+ super().__init__(num_embeddings + self.offset, embedding_dim)
+
+ def forward(self, positions: torch.Tensor):
+ return super().forward(positions + self.offset)
+
+
+class OPTAttention(nn.Module):
+
+ def __init__(
+ self,
+ embed_dim: int,
+ num_heads: int,
+ layer_id: int = 0,
+ bias: bool = True,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ self.embed_dim = embed_dim
+ tensor_model_parallel_world_size = get_tensor_model_parallel_world_size()
+ total_num_heads = num_heads
+ assert num_heads % tensor_model_parallel_world_size == 0
+ self.num_heads = total_num_heads // tensor_model_parallel_world_size
+ self.head_dim = embed_dim // total_num_heads
+ self.scaling = self.head_dim**-0.5
+
+ self.qkv_proj = QKVParallelLinear(
+ embed_dim,
+ self.head_dim,
+ total_num_heads,
+ bias=bias,
+ quant_config=quant_config,
+ prefix=add_prefix("qkv_proj", prefix),
+ )
+ self.out_proj = RowParallelLinear(
+ embed_dim,
+ embed_dim,
+ bias=bias,
+ quant_config=quant_config,
+ prefix=add_prefix("o_proj", prefix),
+ )
+
+ self.attn = RadixAttention(
+ self.num_heads,
+ self.head_dim,
+ self.scaling,
+ num_kv_heads=self.num_heads,
+ layer_id=layer_id,
+ quant_config=quant_config,
+ prefix=add_prefix("attn", prefix),
+ )
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ ) -> torch.Tensor:
+ qkv, _ = self.qkv_proj(hidden_states)
+ q, k, v = qkv.chunk(chunks=3, dim=-1)
+ attn_output = self.attn(q, k, v, forward_batch)
+ output, _ = self.out_proj(attn_output)
+ return output
+
+
+class OPTDecoderLayer(nn.Module):
+
+ def __init__(
+ self,
+ config: OPTConfig,
+ layer_id: int = 0,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ super().__init__()
+ self.config = config
+ self.embed_dim = config.hidden_size
+ self.self_attn = OPTAttention(
+ embed_dim=self.embed_dim,
+ num_heads=config.num_attention_heads,
+ layer_id=layer_id,
+ bias=config.enable_bias,
+ quant_config=quant_config,
+ prefix=add_prefix("self_attn", prefix),
+ )
+ self.do_layer_norm_before = config.do_layer_norm_before
+
+ self.self_attn_layer_norm = nn.LayerNorm(
+ self.embed_dim, elementwise_affine=config.layer_norm_elementwise_affine
+ )
+ self.fc1 = ColumnParallelLinear(
+ self.embed_dim,
+ config.ffn_dim,
+ bias=config.enable_bias,
+ quant_config=quant_config,
+ prefix=add_prefix("fc1", prefix),
+ )
+ self.activation_fn = get_activation(config.activation_function)
+ self.fc2 = RowParallelLinear(
+ config.ffn_dim,
+ self.embed_dim,
+ bias=config.enable_bias,
+ quant_config=quant_config,
+ prefix=add_prefix("fc2", prefix),
+ )
+ self.final_layer_norm = nn.LayerNorm(
+ self.embed_dim, elementwise_affine=config.layer_norm_elementwise_affine
+ )
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ ) -> torch.Tensor:
+ # Self Attention
+ residual = hidden_states
+ # 125m, 1.7B, ..., 175B applies layer norm BEFORE attention
+ if self.do_layer_norm_before:
+ hidden_states = self.self_attn_layer_norm(hidden_states)
+ hidden_states = self.self_attn(
+ hidden_states=hidden_states, forward_batch=forward_batch
+ )
+ hidden_states = residual + hidden_states
+ # 350m applies layer norm AFTER attention
+ if not self.do_layer_norm_before:
+ hidden_states = self.self_attn_layer_norm(hidden_states)
+
+ # Fully Connected
+ residual = hidden_states
+ # 125m, 1.7B, ..., 175B applies layer norm BEFORE attention
+ if self.do_layer_norm_before:
+ hidden_states = self.final_layer_norm(hidden_states)
+ hidden_states, _ = self.fc1(hidden_states)
+ hidden_states = self.activation_fn(hidden_states)
+ hidden_states, _ = self.fc2(hidden_states)
+ hidden_states = residual + hidden_states
+ # 350m applies layer norm AFTER attention
+ if not self.do_layer_norm_before:
+ hidden_states = self.final_layer_norm(hidden_states)
+ return hidden_states
+
+
+class OPTDecoder(nn.Module):
+
+ def __init__(
+ self,
+ config: OPTConfig,
+ layer_id: int = 0,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ super().__init__()
+ self.config = config
+ self.max_target_positions = config.max_position_embeddings
+ self.vocab_size = config.vocab_size
+
+ self.pp_group = get_pp_group()
+
+ self.embed_tokens = VocabParallelEmbedding(
+ config.vocab_size,
+ config.word_embed_proj_dim,
+ prefix=add_prefix("embed_tokens", prefix),
+ )
+ # Positional embeddings are replicated (not sharded).
+ self.embed_positions = OPTLearnedPositionalEmbedding(
+ config.max_position_embeddings, config.hidden_size
+ )
+
+ # Project out & in will be replicated if they exist.
+ if config.word_embed_proj_dim != config.hidden_size:
+ self.project_out = ReplicatedLinear(
+ config.hidden_size,
+ config.word_embed_proj_dim,
+ bias=False,
+ quant_config=quant_config,
+ prefix=add_prefix("project_out", prefix),
+ )
+ else:
+ self.project_out = None
+
+ if config.word_embed_proj_dim != config.hidden_size:
+ self.project_in = ReplicatedLinear(
+ config.word_embed_proj_dim,
+ config.hidden_size,
+ bias=False,
+ quant_config=quant_config,
+ prefix=add_prefix("project_in", prefix),
+ )
+ else:
+ self.project_in = None
+
+ # Note that the only purpose of `config._remove_final_layer_norm` is to
+ # keep backward compatibility with checkpoints that have been fine-tuned
+ # before transformers v4.20.1
+ # see https://github.com/facebookresearch/metaseq/pull/164
+ if config.do_layer_norm_before and not config._remove_final_layer_norm:
+ self.final_layer_norm = nn.LayerNorm(
+ config.hidden_size,
+ elementwise_affine=config.layer_norm_elementwise_affine,
+ )
+ else:
+ self.final_layer_norm = None
+
+ self.layers, self.start_layer, self.end_layer = make_layers(
+ config.num_hidden_layers,
+ lambda idx, prefix: OPTDecoderLayer(
+ config=config, layer_id=idx, quant_config=quant_config, prefix=prefix
+ ),
+ pp_rank=self.pp_group.rank_in_group,
+ pp_size=self.pp_group.world_size,
+ prefix="model.layers",
+ )
+
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ input_embeds: Optional[torch.Tensor] = None,
+ ) -> Union[torch.Tensor, PPProxyTensors]:
+ if self.pp_group.is_first_rank:
+ if input_embeds is None:
+ input_embeds = self.embed_tokens(input_ids)
+ pos_embeds = self.embed_positions(positions)
+ if self.project_in is not None:
+ input_embeds, _ = self.project_in(input_embeds)
+ hidden_states = input_embeds + pos_embeds
+ else:
+ assert pp_proxy_tensors is not None
+ hidden_states = pp_proxy_tensors["hidden_states"]
+
+ for layer in self.layers[self.start_layer : self.end_layer]:
+ hidden_states = layer(
+ hidden_states=hidden_states, forward_batch=forward_batch
+ )
+ if not self.pp_group.is_last_rank:
+ return PPProxyTensors({"hidden_states": hidden_states})
+ if self.final_layer_norm is not None:
+ hidden_states = self.final_layer_norm(hidden_states)
+ # 没有经过这里
+ if self.project_out is not None:
+ hidden_states, _ = self.project_out(hidden_states)
+ return hidden_states
+
+
+class OPTModel(nn.Module):
+
+ def __init__(
+ self,
+ config: OPTConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+
+ # config = vllm_config.model_config.hf_config
+ # quant_config = vllm_config.quant_config
+ self.config = config
+ self.padding_idx = config.pad_token_id
+ self.vocab_size = config.vocab_size
+ self.pp_group = get_pp_group()
+
+ self.decoder = OPTDecoder(
+ config=config,
+ quant_config=quant_config,
+ prefix=add_prefix("decoder", prefix),
+ )
+
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ pp_proxy_tensors: Optional[PPProxyTensors],
+ input_embeds: Optional[torch.Tensor] = None,
+ ) -> Union[torch.Tensor, PPProxyTensors]:
+ return self.decoder(
+ input_ids,
+ positions,
+ pp_proxy_tensors=pp_proxy_tensors,
+ input_embeds=input_embeds,
+ forward_batch=forward_batch,
+ )
+
+ def load_kv_cache_scales(self, quantization_param_path: str) -> None:
+ tp_size = get_tensor_model_parallel_world_size()
+ tp_rank = get_tensor_model_parallel_rank()
+ for layer_idx, scaling_factor in kv_cache_scales_loader(
+ quantization_param_path,
+ tp_rank,
+ tp_size,
+ self.config.num_hidden_layers,
+ self.config.__class__.model_type,
+ ):
+ if not isinstance(self.decoder.layers[layer_idx], nn.Identity):
+ layer_self_attn = self.decoder.layers[layer_idx].self_attn
+
+ if hasattr(layer_self_attn.attn, "k_scale"):
+ layer_self_attn.attn.k_scale = scaling_factor
+ layer_self_attn.attn.v_scale = scaling_factor
+ else:
+ raise RuntimeError(
+ "Self attention has no KV cache scaling " "factor attribute!"
+ )
+
+
+class OPTForCausalLM(nn.Module):
+ # BitandBytes specific attributes
+ # in TP, these weights are partitioned along the column dimension (dim=-1)
+ column_parallel_weights_modules = [".down_proj.", ".o_proj."]
+
+ def __init__(
+ self,
+ config: OPTConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ):
+ super().__init__()
+ self.config = config
+ self.quant_config = quant_config
+
+ self.model = OPTModel(
+ config=config, quant_config=quant_config, prefix=add_prefix("model", prefix)
+ )
+ if self.config.tie_word_embeddings:
+ self.lm_head = self.model.decoder.embed_tokens
+ else:
+ self.lm_head = ParallelLMHead(
+ config.vocab_size,
+ config.word_embed_proj_dim,
+ prefix=add_prefix("lm_head", prefix),
+ )
+ self.logits_processor = LogitsProcessor(config)
+ self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
+ self.capture_aux_hidden_states = False
+ self.pp_group = get_pp_group()
+ self.stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ (".qkv_proj", ".q_proj", "q"),
+ (".qkv_proj", ".k_proj", "k"),
+ (".qkv_proj", ".v_proj", "v"),
+ ]
+
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ pp_proxy_tensors: Optional[PPProxyTensors] = None,
+ input_embeds: Optional[torch.Tensor] = None,
+ get_embedding: bool = False,
+ ) -> LogitsProcessorOutput:
+ hidden_states = self.model(
+ input_ids=input_ids,
+ positions=positions,
+ forward_batch=forward_batch,
+ input_embeds=input_embeds,
+ pp_proxy_tensors=pp_proxy_tensors,
+ )
+ aux_hidden_states = None
+ if self.capture_aux_hidden_states:
+ hidden_states, aux_hidden_states = hidden_states
+
+ if self.pp_group.is_last_rank:
+ if not get_embedding:
+ return self.logits_processor(
+ input_ids,
+ hidden_states,
+ self.lm_head,
+ forward_batch,
+ aux_hidden_states=aux_hidden_states,
+ )
+ else:
+ return self.pooler(hidden_states, forward_batch)
+ else:
+ return hidden_states
+
+ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> None:
+ stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ ("qkv_proj", "q_proj", "q"),
+ ("qkv_proj", "k_proj", "k"),
+ ("qkv_proj", "v_proj", "v"),
+ ]
+ params_dict = dict(self.named_parameters(remove_duplicate=False))
+
+ for name, loaded_weight in weights:
+ if name.startswith("decoder"):
+ name = name.replace("decoder.", "model.decoder.")
+ layer_id = get_layer_id(name)
+ if (
+ layer_id is not None
+ and hasattr(self.model, "start_layer")
+ and (
+ layer_id < self.model.start_layer
+ or layer_id >= self.model.end_layer
+ )
+ ):
+ continue
+ for param_name, weight_name, shard_id in stacked_params_mapping:
+ if weight_name not in name:
+ continue
+ name = name.replace(weight_name, param_name)
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ # if is_pp_missing_parameter(name, self):
+ # continue
+ param = params_dict[name]
+ weight_loader = param.weight_loader
+ weight_loader(param, loaded_weight, shard_id)
+ break
+ else:
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ # if is_pp_missing_parameter(name, self):
+ # continue
+ if name not in params_dict:
+ continue
+ if name in params_dict.keys():
+ param = params_dict[name]
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ weight_loader(param, loaded_weight)
+ else:
+ logger.warning(f"Parameter {name} not found in params_dict")
+
+ @property
+ def start_layer(self):
+ return self.model.start_layer
+
+ @property
+ def end_layer(self):
+ return self.model.end_layer
+
+ def get_input_embeddings(self) -> nn.Embedding:
+ return self.model.embed_tokens
+
+ def get_module_name_from_weight_name(self, name):
+ for param_name, weight_name, shard_id, num_shard in self.stacked_params_mapping:
+ if weight_name in name:
+ return (
+ name.replace(weight_name, param_name)[: -len(".weight")],
+ num_shard,
+ )
+ return name[: -len(".weight")], 1
+
+ def get_num_params(self):
+ params_dict = dict(self.named_parameters())
+ return len(params_dict)
+
+ def get_weights_by_name(
+ self, name: str, truncate_size: int = 100, tp_size: int = 1
+ ) -> Optional[torch.Tensor]:
+ """Get the weights of the parameter by its name. Similar to `get_parameter` in Hugging Face.
+
+ Only used for unit test with an unoptimized performance.
+ For optimized performance, please use torch.save and torch.load.
+ """
+ try:
+ if name == "lm_head.weight" and self.config.tie_word_embeddings:
+ logger.info(
+ "word embedding is tied for this model, return embed_tokens.weight as lm_head.weight."
+ )
+ return (
+ self.model.embed_tokens.weight.cpu()
+ .to(torch.float32)
+ .numpy()
+ .tolist()[:truncate_size]
+ )
+
+ mapped_name = name
+ mapped_shard_id = None
+ for param_name, weight_name, shard_id in self.stacked_params_mapping:
+ if weight_name in name:
+ mapped_name = name.replace(weight_name, param_name)
+ mapped_shard_id = shard_id
+ break
+ params_dict = dict(self.named_parameters())
+ param = params_dict[mapped_name]
+ if mapped_shard_id is not None:
+ if mapped_shard_id in ["q", "k", "v"]:
+ num_heads = self.config.num_attention_heads // tp_size
+ num_kv_heads = self.config.num_attention_heads // tp_size
+ head_dim = (
+ self.config.hidden_size // self.config.num_attention_heads
+ )
+ if mapped_shard_id == "q":
+ offset = 0
+ size = num_heads * head_dim
+ elif mapped_shard_id == "k":
+ offset = num_heads * head_dim
+ size = num_kv_heads * head_dim
+ elif mapped_shard_id == "v":
+ offset = (num_heads + num_kv_heads) * head_dim
+ size = num_kv_heads * head_dim
+ weight = param.data.narrow(0, offset, size)
+ elif mapped_shard_id in [0, 1]:
+ intermediate_size = self.config.ffn_dim
+ slice_size = intermediate_size // tp_size
+ if mapped_shard_id == 0: # gate_proj
+ offset = 0
+ size = slice_size
+ elif mapped_shard_id == 1: # up_proj
+ offset = slice_size
+ size = slice_size
+
+ weight = param.data.narrow(0, offset, size)
+ else:
+ weight = param.data
+ else:
+ weight = param.data
+ if tp_size > 1 and ("o_proj" in name or "down_proj" in name):
+ gathered_weights = [torch.zeros_like(weight) for _ in range(tp_size)]
+ torch.distributed.all_gather(gathered_weights, weight)
+ weight = torch.cat(gathered_weights, dim=1)
+ return weight.cpu().to(torch.float32).numpy().tolist()[:truncate_size]
+
+ except Exception:
+ logger.error(
+ f"Error getting weights by name {name} in OPTForCausalLM: {get_exception_traceback()}"
+ )
+ return None
+
+ def get_embed_and_head(self):
+ return self.model.embed_tokens.weight, self.lm_head.weight
+
+ def set_embed_and_head(self, embed, head):
+ del self.model.embed_tokens.weight
+ del self.lm_head.weight
+ self.model.embed_tokens.weight = embed
+ self.lm_head.weight = head
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
+
+ def get_embed(self):
+ return self.model.embed_tokens.weight
+
+ def set_embed(self, embed):
+ # NOTE: If draft hidden size != target hidden size, the embed weight cannot be shared for EAGLE3
+ if (
+ hasattr(self.config, "target_hidden_size")
+ and self.config.target_hidden_size != self.config.hidden_size
+ ):
+ return
+ del self.model.embed_tokens.weight
+ self.model.embed_tokens.weight = embed
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
+
+ def load_kv_cache_scales(self, quantization_param_path: str) -> None:
+ self.model.load_kv_cache_scales(quantization_param_path)
+
+
+EntryClass = [OPTForCausalLM]
diff --git a/python/sglang/srt/models/phi4mm.py b/python/sglang/srt/models/phi4mm.py
index e1c5fee7837..37a638acb5c 100644
--- a/python/sglang/srt/models/phi4mm.py
+++ b/python/sglang/srt/models/phi4mm.py
@@ -54,25 +54,6 @@
}
-def get_navit_vision_model():
- vision_config = {
- "hidden_size": 1152,
- "image_size": 448,
- "intermediate_size": 4304,
- "model_type": "siglip_vision_model",
- "num_attention_heads": 16,
- "num_hidden_layers": 26, # Model is originally 27-layer, we only need the first 26 layers for feature extraction.
- "patch_size": 14,
- }
- model_config = SiglipVisionConfig(**vision_config)
-
- vision_model = Idefics2VisionTransformer(
- config=model_config, require_post_norm=False
- )
-
- return vision_model
-
-
class Phi4MMImageEncoder(nn.Module):
"""Image embedding."""
@@ -88,8 +69,9 @@ def __init__(
# n_embed or hidden_size
hidden_size = config.n_embd if hasattr(config, "n_embd") else config.hidden_size
self.type_feature = "patch"
-
- self.img_processor = get_navit_vision_model()
+ self.img_processor = Idefics2VisionTransformer(
+ config=config.vision_config, require_post_norm=False
+ )
pe_weight = self.img_processor.embeddings.position_embedding.weight
L, D = pe_weight.size()
diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py
index 2b1ea57fd89..847d7afa2d3 100644
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -16,7 +16,7 @@
# Modify details for the adaptation of Qwen2 model.
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
import logging
-from typing import Any, Dict, Iterable, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import torch
from torch import nn
@@ -98,6 +98,7 @@ def forward(self, x):
class Qwen2Attention(nn.Module):
def __init__(
self,
+ config: Qwen2Config,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -107,7 +108,7 @@ def __init__(
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 32768,
quant_config: Optional[QuantizationConfig] = None,
- dual_chunk_attention_config: Optional[dict[str, Any]] = None,
+ previous_layer: Optional["Qwen2Attention"] = None,
prefix: str = "",
) -> None:
super().__init__()
@@ -153,14 +154,17 @@ def __init__(
prefix=add_prefix("o_proj", prefix),
)
- self.rotary_emb = get_rope(
- self.head_dim,
- rotary_dim=self.head_dim,
- max_position=max_position_embeddings,
- base=rope_theta,
- rope_scaling=rope_scaling,
- dual_chunk_attention_config=dual_chunk_attention_config,
- )
+ if previous_layer is None:
+ self.rotary_emb = get_rope(
+ self.head_dim,
+ rotary_dim=self.head_dim,
+ max_position=max_position_embeddings,
+ base=rope_theta,
+ rope_scaling=rope_scaling,
+ )
+ else:
+ assert self.head_dim == previous_layer.head_dim
+ self.rotary_emb = previous_layer.rotary_emb
self.attn = RadixAttention(
self.num_heads,
self.head_dim,
@@ -169,6 +173,10 @@ def __init__(
layer_id=layer_id,
quant_config=quant_config,
prefix=add_prefix("attn", prefix),
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
)
def forward(
@@ -179,7 +187,13 @@ def forward(
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
- q, k = self.rotary_emb(positions, q, k)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if (forward_batch.hip_metadata_cache_pool is None) or (
+ not forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(positions, q, k)
+
attn_output = self.attn(q, k, v, forward_batch)
output, _ = self.o_proj(attn_output)
return output
@@ -204,6 +218,7 @@ def __init__(
config, "dual_chunk_attention_config", None
)
self.self_attn = Qwen2Attention(
+ config=config,
hidden_size=self.hidden_size,
num_heads=config.num_attention_heads,
num_kv_heads=config.num_key_value_heads,
@@ -213,7 +228,9 @@ def __init__(
rope_scaling=rope_scaling,
max_position_embeddings=max_position_embeddings,
quant_config=quant_config,
- dual_chunk_attention_config=dual_chunk_attention_config,
+ previous_layer=(
+ previous_layer.self_attn if previous_layer is not None else None
+ ),
prefix=add_prefix("self_attn", prefix),
)
self.mlp = Qwen2MLP(
@@ -330,8 +347,11 @@ def forward(
hidden_states = pp_proxy_tensors["hidden_states"]
residual = pp_proxy_tensors["residual"]
+ forward_batch.on_model_start()
+
aux_hidden_states = []
for i in range(self.start_layer, self.end_layer):
+ forward_batch.on_layer_start(i)
if i in self.layers_to_capture:
aux_hidden_states.append(
hidden_states + residual if residual is not None else hidden_states
@@ -343,6 +363,8 @@ def forward(
forward_batch,
residual,
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
if not self.pp_group.is_last_rank:
return PPProxyTensors(
{
@@ -405,6 +427,7 @@ class Qwen2ForCausalLM(nn.Module):
"gate_proj": ("gate_up_proj", 0),
"up_proj": ("gate_up_proj", 1),
}
+ hip_attention_supported = True
def __init__(
self,
@@ -431,7 +454,6 @@ def __init__(
quant_config=quant_config,
prefix=add_prefix("lm_head", prefix),
)
-
else:
# ranks other than the last rank will have a placeholder layer
self.lm_head = PPMissingLayer()
@@ -452,6 +474,11 @@ def __init__(
self.logits_processor = LogitsProcessor(config)
self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
+ # For EAGLE3 support
+ self.capture_aux_hidden_states = False
+
+ # For EAGLE3 support
+ self.capture_aux_hidden_states = False
def get_input_embedding(self, input_ids: torch.Tensor) -> torch.Tensor:
return self.model.get_input_embedding(input_ids)
@@ -476,11 +503,22 @@ def forward(
input_embeds,
pp_proxy_tensors=pp_proxy_tensors,
)
+ aux_hidden_states = None
+ if self.capture_aux_hidden_states:
+ hidden_states, aux_hidden_states = hidden_states
+
+ aux_hidden_states = None
+ if self.capture_aux_hidden_states:
+ hidden_states, aux_hidden_states = hidden_states
if self.pp_group.is_last_rank:
if not get_embedding:
return self.logits_processor(
- input_ids, hidden_states, self.lm_head, forward_batch
+ input_ids,
+ hidden_states,
+ self.lm_head,
+ forward_batch,
+ aux_hidden_states,
)
else:
return self.pooler(hidden_states, forward_batch)
@@ -619,5 +657,20 @@ def set_embed_and_head(self, embed, head):
def load_kv_cache_scales(self, quantization_param_path: str) -> None:
self.model.load_kv_cache_scales(quantization_param_path)
+ def set_eagle3_layers_to_capture(self, layer_ids: Optional[List[int]] = None):
+ if not self.pp_group.is_last_rank:
+ return
+
+ self.capture_aux_hidden_states = True
+ if layer_ids is None:
+ num_layers = self.config.num_hidden_layers
+ self.model.layers_to_capture = [
+ 2,
+ num_layers // 2,
+ num_layers - 3,
+ ] # Specific layers for EAGLE3 support
+ else:
+ self.model.layers_to_capture = [val + 1 for val in layer_ids]
+
EntryClass = Qwen2ForCausalLM
diff --git a/python/sglang/srt/models/qwen2_5_vl.py b/python/sglang/srt/models/qwen2_5_vl.py
index 3d7567d2c59..84156a0fbde 100644
--- a/python/sglang/srt/models/qwen2_5_vl.py
+++ b/python/sglang/srt/models/qwen2_5_vl.py
@@ -31,7 +31,6 @@
import torch.nn.functional as F
from einops import rearrange
from transformers.activations import ACT2FN
-from transformers.models.qwen2.modeling_qwen2 import Qwen2RMSNorm
from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import (
Qwen2_5_VLConfig,
Qwen2_5_VLVisionConfig,
@@ -43,7 +42,12 @@
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention.vision import VisionAttention
-from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
+from sglang.srt.layers.layernorm import RMSNorm
+from sglang.srt.layers.linear import (
+ ColumnParallelLinear,
+ MergedColumnParallelLinear,
+ RowParallelLinear,
+)
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.pooler import Pooler, PoolingType
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -62,7 +66,6 @@
class Qwen2_5_VLMLP(nn.Module):
-
def __init__(
self,
in_features: int,
@@ -73,19 +76,12 @@ def __init__(
prefix: str = "",
):
super().__init__()
- self.gate_proj = ColumnParallelLinear(
- in_features,
- hidden_features,
+ self.gate_up_proj = MergedColumnParallelLinear(
+ input_size=in_features,
+ output_sizes=[hidden_features] * 2, # [gate_proj, up_proj]
bias=bias,
quant_config=quant_config,
- prefix=add_prefix("gate_proj", prefix),
- )
- self.up_proj = ColumnParallelLinear(
- in_features,
- hidden_features,
- bias=bias,
- quant_config=quant_config,
- prefix=add_prefix("up_proj", prefix),
+ prefix=add_prefix("gate_up_proj", prefix),
)
self.down_proj = RowParallelLinear(
hidden_features,
@@ -97,12 +93,11 @@ def __init__(
self.act = ACT2FN[hidden_act]
def forward(self, x: torch.Tensor) -> torch.Tensor:
- x_parallel_gate, _ = self.gate_proj(x)
- x_parallel_gate = self.act(x_parallel_gate)
- x_parallel_up, _ = self.up_proj(x)
- x_parallel = x_parallel_gate * x_parallel_up
- x, _ = self.down_proj(x_parallel)
- return x
+ gate_up, _ = self.gate_up_proj(x)
+ gate, up = gate_up.chunk(2, dim=-1)
+ x = self.act(gate) * up
+ x_down, _ = self.down_proj(x)
+ return x_down
class Qwen2_5_VisionBlock(nn.Module):
@@ -117,12 +112,14 @@ def __init__(
attn_implementation: Optional[str] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
+ num_dummy_heads: int = 0,
+ rms_norm_eps: float = 1e-6,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = partial(nn.LayerNorm, eps=1e-6)
- self.norm1 = Qwen2RMSNorm(dim, eps=1e-6)
- self.norm2 = Qwen2RMSNorm(dim, eps=1e-6)
+ self.norm1 = RMSNorm(dim, eps=rms_norm_eps)
+ self.norm2 = RMSNorm(dim, eps=rms_norm_eps)
if attn_implementation is None:
softmax_in_single_precision = False
@@ -157,6 +154,7 @@ def __init__(
flatten_batch=flatten_batch,
quant_config=quant_config,
prefix=add_prefix("attn", prefix),
+ num_dummy_heads=num_dummy_heads,
)
self.mlp = Qwen2_5_VLMLP(
dim,
@@ -172,18 +170,29 @@ def forward(
cu_seqlens: torch.Tensor,
position_embeddings: torch.Tensor,
) -> torch.Tensor:
- hidden_states = self.norm1(x)
- hidden_states = rearrange(hidden_states, "s b ... -> b s ...")
+ S, B, H = x.shape
+ # norm1: flatten to 2D -> [S*B, H], then reshape back
+ x2d = x.reshape(-1, H)
+ hidden_states = self.norm1(x2d).reshape(S, B, H)
+
+ # Attention expects [B, S, H]
+ hidden_states = rearrange(hidden_states, "s b h -> b s h")
attn = self.attn(
hidden_states,
cu_seqlens=cu_seqlens,
position_embeddings=position_embeddings,
)
- attn = rearrange(attn, "b s ... -> s b ...")
- x = x + attn
- norm2 = self.norm2(x)
- mlp = self.mlp(norm2)
- x = x + mlp
+ attn = rearrange(attn, "b s h -> s b h")
+
+ # norm2 with fused residual-add: also 2D
+ attn2d = attn.reshape(-1, H)
+ x_norm_2d, x_after_add_2d = self.norm2(x2d, residual=attn2d)
+ x_norm = x_norm_2d.reshape(S, B, H)
+ x_after_add = x_after_add_2d.reshape(S, B, H)
+
+ # MLP and final residual
+ mlp_out = self.mlp(x_norm)
+ x = x_after_add + mlp_out
return x
@@ -199,7 +208,7 @@ def __init__(
) -> None:
super().__init__()
self.hidden_size = context_dim * (spatial_merge_size**2)
- self.ln_q = Qwen2RMSNorm(context_dim, eps=1e-6)
+ self.ln_q = RMSNorm(context_dim, eps=1e-6)
self.mlp = nn.ModuleList(
[
ColumnParallelLinear(
@@ -221,11 +230,13 @@ def __init__(
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
- x = self.ln_q(x)
- x = x.view(-1, self.hidden_size)
-
+ # x expected shape: [S, B, context_dim]
+ S, B, D = x.shape
+ x2d = x.reshape(-1, D)
+ x2d = self.ln_q(x2d) # RMSNorm expects 2D
+ x2d = x2d.view(-1, self.hidden_size) # group into spatial_merge_unit
mlp_fc1, mlp_act, mlp_fc2 = self.mlp
- x_parallel, _ = mlp_fc1(x)
+ x_parallel, _ = mlp_fc1(x2d)
x_parallel = mlp_act(x_parallel)
out, _ = mlp_fc2(x_parallel)
return out
@@ -338,7 +349,7 @@ def dtype(self) -> torch.dtype:
@property
def device(self) -> torch.device:
- return self.blocks[0].mlp.gate_proj.weight.device
+ return self.patch_embed.proj.weight.device
def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor:
pos_ids = []
@@ -392,6 +403,12 @@ def forward(
)
cu_window_seqlens = torch.unique_consecutive(cu_window_seqlens)
+ # Move window_index to the same device as x before using it to index x
+ window_index = window_index.to(device=x.device)
+
+ # Ensure rotary_pos_emb is on the same device/dtype as x
+ rotary_pos_emb = rotary_pos_emb.to(device=x.device, dtype=x.dtype)
+
seq_len, _ = x.size()
x = x.reshape(seq_len // self.spatial_merge_unit, self.spatial_merge_unit, -1)
@@ -404,12 +421,19 @@ def forward(
rotary_pos_emb = rotary_pos_emb.reshape(seq_len, -1)
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
position_embeddings = (emb.cos(), emb.sin())
+ # After building position_embeddings, make sure both cos and sin are on the same device/dtype as the attention input
+ position_embeddings = (
+ position_embeddings[0].to(x.device, x.dtype),
+ position_embeddings[1].to(x.device, x.dtype),
+ )
- # compute cu_seqlens
+ # compute cu_seqlens - move cu_seqlens to GPU and make it int32
cu_seqlens = torch.cat(
[
- torch.tensor([0], device=grid_thw.device),
- (grid_thw[:, 0] * grid_thw[:, 1] * grid_thw[:, 2]).cumsum(dim=0),
+ torch.tensor([0], device=x.device, dtype=torch.int32),
+ (grid_thw[:, 0] * grid_thw[:, 1] * grid_thw[:, 2])
+ .cumsum(dim=0)
+ .to(device=x.device, dtype=torch.int32),
]
)
cu_seqlens = F.pad(cu_seqlens, (1, 0), "constant", 0)
@@ -440,9 +464,8 @@ def forward(
class Qwen2_5_VLForConditionalGeneration(nn.Module):
# BitandBytes specific attributes
default_bitsandbytes_target_modules = [
- ".gate_proj.",
+ ".gate_up_proj.",
".down_proj.",
- ".up_proj.",
".q_proj.",
".k_proj.",
".v_proj.",
@@ -456,6 +479,7 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module):
"gate_proj": ("gate_up_proj", 0),
"up_proj": ("gate_up_proj", 1),
}
+ hip_attention_supported = True
def __init__(
self,
@@ -495,6 +519,9 @@ def __init__(
self.logits_processor = LogitsProcessor(config)
self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
+ # For EAGLE3 support
+ self.capture_aux_hidden_states = False
+
def pad_input_ids(self, input_ids: List[int], mm_inputs: MultimodalInputs):
pattern = MultiModalityDataPaddingPatternMultimodalTokens()
return pattern.pad_input_tokens(input_ids, mm_inputs)
@@ -524,6 +551,7 @@ def get_video_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
def get_input_embeddings(self):
return self.model.embed_tokens
+ @torch.no_grad()
def forward(
self,
input_ids: torch.Tensor,
@@ -564,9 +592,13 @@ def forward(
positions=positions,
)
+ aux_hidden_states = None
+ if self.capture_aux_hidden_states:
+ hidden_states, aux_hidden_states = hidden_states
+
if not get_embedding:
return self.logits_processor(
- input_ids, hidden_states, self.lm_head, forward_batch
+ input_ids, hidden_states, self.lm_head, forward_batch, aux_hidden_states
)
else:
return self.pooler(hidden_states, forward_batch)
@@ -588,7 +620,11 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
for param_name, weight_name, shard_id in stacked_params_mapping:
if weight_name not in name:
continue
- if "visual" in name:
+ if (
+ "visual" in name
+ and "up_proj" not in name
+ and "gate_proj" not in name
+ ):
continue
name = name.replace(weight_name, param_name)
@@ -616,5 +652,21 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, loaded_weight)
+ def get_embed_and_head(self):
+ return self.model.embed_tokens.weight, self.lm_head.weight
+
+ def set_eagle3_layers_to_capture(self, layer_ids: Optional[List[int]] = None):
+ self.capture_aux_hidden_states = True
+ self.model.capture_aux_hidden_states = True
+ if layer_ids is None:
+ num_layers = self.config.num_hidden_layers
+ self.model.layers_to_capture = [
+ 2,
+ num_layers // 2,
+ num_layers - 3,
+ ] # Specific layers for EAGLE3 support
+ else:
+ self.model.layers_to_capture = [val + 1 for val in layer_ids]
+
EntryClass = [Qwen2_5_VLForConditionalGeneration]
diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py
index a3427e068c9..d0ff693ecd6 100644
--- a/python/sglang/srt/models/qwen2_moe.py
+++ b/python/sglang/srt/models/qwen2_moe.py
@@ -17,7 +17,7 @@
"""Inference-only Qwen2MoE model compatible with HuggingFace weights."""
import logging
-from typing import Any, Dict, Iterable, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import torch
import torch.nn.functional as F
@@ -65,10 +65,12 @@
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.two_batch_overlap import model_forward_maybe_tbo
-from sglang.srt.utils import add_prefix, make_layers
+from sglang.srt.utils import add_prefix, is_cuda, make_layers
logger = logging.getLogger(__name__)
+_is_cuda = is_cuda()
+
class Qwen2MoeMLP(nn.Module):
def __init__(
@@ -105,11 +107,14 @@ def __init__(
def forward(
self,
x,
+ should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
):
gate_up, _ = self.gate_up_proj(x)
x = self.act_fn(gate_up)
- x, _ = self.down_proj(x, skip_all_reduce=use_reduce_scatter)
+ x, _ = self.down_proj(
+ x, skip_all_reduce=should_allreduce_fusion or use_reduce_scatter
+ )
return x
@@ -119,11 +124,13 @@ def __init__(
layer_id: int,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
+ alt_stream: Optional[torch.cuda.Stream] = None,
prefix: str = "",
):
super().__init__()
self.tp_size = get_tensor_model_parallel_world_size()
self.layer_id = layer_id
+ self.alt_stream = alt_stream
if self.tp_size > config.num_experts:
raise ValueError(
f"Tensor parallel size {self.tp_size} is greater than "
@@ -165,14 +172,7 @@ def __init__(
self.shared_expert = None
self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False)
- def forward(
- self,
- hidden_states: torch.Tensor,
- forward_batch: Optional[ForwardBatch] = None,
- use_reduce_scatter: bool = False,
- ) -> torch.Tensor:
- num_tokens, hidden_dim = hidden_states.shape
- hidden_states = hidden_states.view(-1, hidden_dim)
+ def _forward_shared_experts(self, hidden_states: torch.Tensor):
shared_output = None
if self.shared_expert is not None:
shared_output = self.shared_expert(hidden_states)
@@ -180,11 +180,51 @@ def forward(
shared_output = (
F.sigmoid(self.shared_expert_gate(hidden_states)) * shared_output
)
+ return shared_output
+ def _forward_router_experts(self, hidden_states: torch.Tensor):
# router_logits: (num_tokens, n_experts)
router_logits, _ = self.gate(hidden_states)
topk_output = self.topk(hidden_states, router_logits)
- final_hidden_states = self.experts(hidden_states, topk_output)
+ return self.experts(hidden_states, topk_output)
+
+ def forward_normal_dual_stream(
+ self,
+ hidden_states: torch.Tensor,
+ ) -> torch.Tensor:
+ current_stream = torch.cuda.current_stream()
+ self.alt_stream.wait_stream(current_stream)
+ shared_output = self._forward_shared_experts(hidden_states)
+
+ with torch.cuda.stream(self.alt_stream):
+ router_output = self._forward_router_experts(hidden_states)
+
+ current_stream.wait_stream(self.alt_stream)
+
+ return router_output, shared_output
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ forward_batch: Optional[ForwardBatch] = None,
+ use_reduce_scatter: bool = False,
+ ) -> torch.Tensor:
+ num_tokens, hidden_dim = hidden_states.shape
+ hidden_states = hidden_states.view(-1, hidden_dim)
+
+ DUAL_STREAM_TOKEN_THRESHOLD = 1024
+ if (
+ self.alt_stream is not None
+ and hidden_states.shape[0] > 0
+ and hidden_states.shape[0] <= DUAL_STREAM_TOKEN_THRESHOLD
+ ):
+ final_hidden_states, shared_output = self.forward_normal_dual_stream(
+ hidden_states
+ )
+ else:
+ shared_output = self._forward_shared_experts(hidden_states)
+ final_hidden_states = self._forward_router_experts(hidden_states)
+
if shared_output is not None:
final_hidden_states = final_hidden_states + shared_output
if self.tp_size > 1 and not use_reduce_scatter:
@@ -196,6 +236,7 @@ def forward(
class Qwen2MoeAttention(nn.Module):
def __init__(
self,
+ config: PretrainedConfig,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -273,6 +314,10 @@ def __init__(
layer_id=layer_id,
quant_config=quant_config,
prefix=add_prefix("attn", prefix),
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
)
def forward(
@@ -283,7 +328,13 @@ def forward(
) -> torch.Tensor:
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
- q, k = self.rotary_emb(positions, q, k)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if (forward_batch.hip_metadata_cache_pool is None) or (
+ not forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(positions, q, k)
+
attn_output = self.attn(q, k, v, forward_batch)
output, _ = self.o_proj(attn_output)
return output
@@ -309,6 +360,7 @@ def __init__(
config, "dual_chunk_attention_config", None
)
self.self_attn = Qwen2MoeAttention(
+ config=config,
hidden_size=self.hidden_size,
num_heads=config.num_attention_heads,
num_kv_heads=config.num_key_value_heads,
@@ -343,6 +395,7 @@ def __init__(
layer_id=layer_id,
config=config,
quant_config=quant_config,
+ alt_stream=alt_stream,
prefix=add_prefix("mlp", prefix),
)
else:
@@ -480,6 +533,7 @@ def forward(
residual=residual,
)
else:
+ forward_batch.on_model_start()
for i in range(self.start_layer, self.end_layer):
if i in self.layers_to_capture:
aux_hidden_states.append(
@@ -488,10 +542,13 @@ def forward(
else hidden_states
)
with get_global_expert_distribution_recorder().with_current_layer(i):
+ forward_batch.on_layer_start(i)
layer = self.layers[i]
hidden_states, residual = layer(
positions, hidden_states, forward_batch, residual
)
+ forward_batch.on_layer_end(i)
+ forward_batch.on_model_end()
if not self.pp_group.is_last_rank:
return PPProxyTensors(
{
@@ -515,6 +572,8 @@ def forward(
class Qwen2MoeForCausalLM(nn.Module):
fall_back_to_pt_during_load = False
+ hip_attention_supported = True
+
def __init__(
self,
config: PretrainedConfig,
@@ -525,8 +584,12 @@ def __init__(
self.pp_group = get_pp_group()
self.config = config
self.quant_config = quant_config
+ alt_stream = torch.cuda.Stream() if _is_cuda else None
self.model = Qwen2MoeModel(
- config, quant_config, prefix=add_prefix("model", prefix)
+ config,
+ quant_config,
+ prefix=add_prefix("model", prefix),
+ alt_stream=alt_stream,
)
self.lm_head = ParallelLMHead(
config.vocab_size,
@@ -536,6 +599,8 @@ def __init__(
use_attn_tp_group=global_server_args_dict["enable_dp_lm_head"],
)
self.logits_processor = LogitsProcessor(config)
+ # For EAGLE3 support
+ self.capture_aux_hidden_states = False
@torch.no_grad()
def forward(
@@ -553,9 +618,12 @@ def forward(
input_embeds,
pp_proxy_tensors=pp_proxy_tensors,
)
+ aux_hidden_states = None
+ if self.capture_aux_hidden_states:
+ hidden_states, aux_hidden_states = hidden_states
if self.pp_group.is_last_rank:
return self.logits_processor(
- input_ids, hidden_states, self.lm_head, forward_batch
+ input_ids, hidden_states, self.lm_head, forward_batch, aux_hidden_states
)
else:
return hidden_states
@@ -705,5 +773,20 @@ def get_model_config_for_expert_location(cls, config):
num_groups=None,
)
+ def set_eagle3_layers_to_capture(self, layer_ids: Optional[List[int]] = None):
+ if not self.pp_group.is_last_rank:
+ return
+
+ self.capture_aux_hidden_states = True
+ if layer_ids is None:
+ num_layers = self.config.num_hidden_layers
+ self.model.layers_to_capture = [
+ 2,
+ num_layers // 2,
+ num_layers - 3,
+ ] # Specific layers for EAGLE3 support
+ else:
+ self.model.layers_to_capture = [val + 1 for val in layer_ids]
+
EntryClass = Qwen2MoeForCausalLM
diff --git a/python/sglang/srt/models/qwen3.py b/python/sglang/srt/models/qwen3.py
index 04120e77b3a..855c1217c05 100644
--- a/python/sglang/srt/models/qwen3.py
+++ b/python/sglang/srt/models/qwen3.py
@@ -22,9 +22,12 @@
from sglang.srt.layers.rotary_embedding import get_rope
from sglang.srt.layers.utils import PPMissingLayer, get_layer_id
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
+from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
-from sglang.srt.model_executor.graph_runner import get_is_capture_mode
-from sglang.srt.model_loader.weight_utils import default_weight_loader
+from sglang.srt.model_loader.weight_utils import (
+ default_weight_loader,
+ maybe_remap_kv_scale_name,
+)
from sglang.srt.models.qwen2 import Qwen2MLP as Qwen3MLP
from sglang.srt.models.qwen2 import Qwen2Model
from sglang.srt.utils import add_prefix, is_cuda
@@ -38,6 +41,7 @@
class Qwen3Attention(nn.Module):
def __init__(
self,
+ config: Qwen3Config,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -118,6 +122,10 @@ def __init__(
num_kv_heads=self.num_kv_heads,
layer_id=layer_id,
prefix=add_prefix("attn", prefix),
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
)
self.alt_stream = alt_stream
@@ -152,7 +160,13 @@ def forward(
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
q, k = self._apply_qk_norm(q, k)
- q, k = self.rotary_emb(positions, q, k)
+
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if (forward_batch.hip_metadata_cache_pool is None) or (
+ not forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(positions, q, k)
+
attn_output = self.attn(q, k, v, forward_batch)
output, _ = self.o_proj(attn_output)
return output
@@ -174,6 +188,7 @@ def __init__(
max_position_embeddings = getattr(config, "max_position_embeddings", 32768)
head_dim = getattr(config, "head_dim", None)
self.self_attn = Qwen3Attention(
+ config=config,
hidden_size=self.hidden_size,
num_heads=config.num_attention_heads,
num_kv_heads=config.num_key_value_heads,
@@ -278,6 +293,8 @@ class Qwen3ForCausalLM(nn.Module):
"up_proj": ("gate_up_proj", 1),
}
+ hip_attention_supported = True
+
def __init__(
self,
config: Qwen3Config,
@@ -327,8 +344,8 @@ def __init__(
# For EAGLE3 support
self.capture_aux_hidden_states = False
- def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
- return self.model.get_input_embeddings(input_ids)
+ def get_input_embeddings(self) -> nn.Embedding:
+ return self.model.get_input_embeddings()
@torch.no_grad()
def forward(
@@ -458,7 +475,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
continue
if name.startswith("model.vision_tower") and name not in params_dict:
continue
-
+ if "scale" in name:
+ name = maybe_remap_kv_scale_name(name, params_dict)
+ if name is None:
+ continue
for param_name, weight_name, shard_id in stacked_params_mapping:
if weight_name not in name:
continue
diff --git a/python/sglang/srt/models/qwen3_classification.py b/python/sglang/srt/models/qwen3_classification.py
index 54802b558bd..a59d6769bcd 100644
--- a/python/sglang/srt/models/qwen3_classification.py
+++ b/python/sglang/srt/models/qwen3_classification.py
@@ -42,7 +42,13 @@ def __init__(
# Use normalize=True for qwen3 embedding based on official implementation
# Reference: https://github.com/QwenLM/Qwen3-Embedding/blob/main/examples/qwen3_embedding_transformers.py#L55
# Official code: output = F.normalize(output, p=2, dim=1)
- self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
+ normalize = True
+
+ # We don't want to normalize the embedding if we have a classification head
+ if config.id2label is not None or config.label2id is not None:
+ normalize = False
+
+ self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=normalize)
self.eos_token_id = config.eos_token_id
diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py
index 26971c119c5..b4069aef678 100644
--- a/python/sglang/srt/models/qwen3_moe.py
+++ b/python/sglang/srt/models/qwen3_moe.py
@@ -42,7 +42,10 @@
RowParallelLinear,
)
from sglang.srt.layers.logits_processor import LogitsProcessor
-from sglang.srt.layers.moe import get_moe_a2a_backend
+from sglang.srt.layers.moe import (
+ get_moe_a2a_backend,
+ should_use_flashinfer_cutlass_moe_fp4_allgather,
+)
from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.moe.topk import TopK
@@ -52,15 +55,22 @@
from sglang.srt.layers.utils import get_layer_id
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
from sglang.srt.managers.schedule_batch import global_server_args_dict
+from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
-from sglang.srt.model_executor.graph_runner import get_is_capture_mode
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2_moe import Qwen2MoeMLP as Qwen3MoeMLP
from sglang.srt.models.qwen2_moe import Qwen2MoeModel
-from sglang.srt.utils import add_prefix, is_cuda, is_non_idle_and_non_empty
+from sglang.srt.utils import (
+ add_prefix,
+ is_cuda,
+ is_flashinfer_available,
+ is_non_idle_and_non_empty,
+)
Qwen3MoeConfig = None
+_is_flashinfer_available = is_flashinfer_available()
+
logger = logging.getLogger(__name__)
_is_cuda = is_cuda()
@@ -119,11 +129,14 @@ def forward(
self,
hidden_states: torch.Tensor,
forward_batch: Optional[ForwardBatch] = None,
+ should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
) -> torch.Tensor:
if not get_moe_a2a_backend().is_deepep():
- return self.forward_normal(hidden_states, use_reduce_scatter)
+ return self.forward_normal(
+ hidden_states, should_allreduce_fusion, use_reduce_scatter
+ )
else:
return self.forward_deepep(hidden_states, forward_batch)
@@ -137,6 +150,7 @@ def get_moe_weights(self):
def forward_normal(
self,
hidden_states: torch.Tensor,
+ should_allreduce_fusion: bool = False,
use_reduce_scatter: bool = False,
) -> torch.Tensor:
num_tokens, hidden_dim = hidden_states.shape
@@ -146,7 +160,12 @@ def forward_normal(
router_logits, _ = self.gate(hidden_states)
topk_output = self.topk(hidden_states, router_logits)
final_hidden_states = self.experts(hidden_states, topk_output)
- if self.tp_size > 1 and not use_reduce_scatter:
+ if (
+ self.tp_size > 1
+ and not should_allreduce_fusion
+ and not use_reduce_scatter
+ and not should_use_flashinfer_cutlass_moe_fp4_allgather()
+ ):
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
return final_hidden_states.view(num_tokens, hidden_dim)
@@ -262,6 +281,7 @@ def op_output(self, state):
class Qwen3MoeAttention(nn.Module):
def __init__(
self,
+ config: Qwen3MoeConfig,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -342,6 +362,10 @@ def __init__(
num_kv_heads=self.num_kv_heads,
layer_id=layer_id,
prefix=add_prefix("attn", prefix),
+ orig_context_len=getattr(
+ config, "orig_context_len", max_position_embeddings
+ ),
+ rope=self.rotary_emb,
)
self.q_norm = RMSNorm(self.head_dim, eps=rms_norm_eps)
@@ -393,7 +417,11 @@ def forward_prepare(
qkv, _ = self.qkv_proj(hidden_states)
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
q, k = self._apply_qk_norm(q, k)
- q, k = self.rotary_emb(positions, q, k)
+ # RoPE is applied inside the attention kernel in HiP Attention
+ if (forward_batch.hip_metadata_cache_pool is None) or (
+ not forward_batch.hip_metadata_cache_pool.hip_config.using_extend
+ ):
+ q, k = self.rotary_emb(positions, q, k)
inner_state = q, k, v, forward_batch
return None, forward_batch, inner_state
@@ -443,6 +471,7 @@ def __init__(
config, "dual_chunk_attention_config", None
)
self.self_attn = Qwen3MoeAttention(
+ config=config,
hidden_size=self.hidden_size,
num_heads=config.num_attention_heads,
num_kv_heads=config.num_key_value_heads,
@@ -500,6 +529,7 @@ def __init__(
input_layernorm=self.input_layernorm,
post_attention_layernorm=self.post_attention_layernorm,
allow_reduce_scatter=True,
+ is_last_layer=(self.layer_id == self.config.num_hidden_layers - 1),
)
def forward(
@@ -525,17 +555,28 @@ def forward(
hidden_states, residual, forward_batch
)
+ should_allreduce_fusion = (
+ self.layer_communicator.should_fuse_mlp_allreduce_with_next_layer(
+ forward_batch
+ )
+ )
+
# For DP with padding, reduce scatter can be used instead of all-reduce.
use_reduce_scatter = self.layer_communicator.should_use_reduce_scatter(
forward_batch
)
- hidden_states = self.mlp(hidden_states, forward_batch, use_reduce_scatter)
-
- hidden_states, residual = self.layer_communicator.postprocess_layer(
- hidden_states, residual, forward_batch
+ hidden_states = self.mlp(
+ hidden_states, forward_batch, should_allreduce_fusion, use_reduce_scatter
)
+ if should_allreduce_fusion:
+ hidden_states._sglang_needs_allreduce_fusion = True
+ else:
+ hidden_states, residual = self.layer_communicator.postprocess_layer(
+ hidden_states, residual, forward_batch
+ )
+
return hidden_states, residual
def op_comm_prepare_attn(
@@ -616,6 +657,8 @@ def __init__(
class Qwen3MoeForCausalLM(nn.Module):
fall_back_to_pt_during_load = False
+ hip_attention_supported = True
+
def __init__(
self,
config: Qwen3MoeConfig,
diff --git a/python/sglang/srt/models/qwen3_next.py b/python/sglang/srt/models/qwen3_next.py
new file mode 100644
index 00000000000..927158e7a5d
--- /dev/null
+++ b/python/sglang/srt/models/qwen3_next.py
@@ -0,0 +1,1043 @@
+import enum
+import logging
+from typing import Any, Dict, Iterable, Optional, Set, Tuple
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from sglang.srt.configs.qwen3_next import Qwen3NextConfig
+from sglang.srt.distributed import (
+ divide,
+ get_pp_group,
+ get_tensor_model_parallel_rank,
+ get_tensor_model_parallel_world_size,
+)
+from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
+from sglang.srt.layers.attention.fla.layernorm_gated import RMSNorm as RMSNormGated
+from sglang.srt.layers.attention.mamba.mamba import mamba_v2_sharded_weight_loader
+from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes
+from sglang.srt.layers.dp_attention import (
+ get_attention_tp_rank,
+ get_attention_tp_size,
+ is_dp_attention_enabled,
+)
+from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
+from sglang.srt.layers.linear import (
+ ColumnParallelLinear,
+ MergedColumnParallelLinear,
+ QKVParallelLinear,
+ RowParallelLinear,
+)
+from sglang.srt.layers.logits_processor import LogitsProcessor
+from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
+from sglang.srt.layers.quantization.base_config import QuantizationConfig
+from sglang.srt.layers.radix_attention import RadixAttention
+from sglang.srt.layers.rotary_embedding import get_rope
+from sglang.srt.layers.vocab_parallel_embedding import (
+ ParallelLMHead,
+ VocabParallelEmbedding,
+)
+from sglang.srt.managers.schedule_batch import global_server_args_dict
+from sglang.srt.model_executor.cuda_graph_runner import get_is_capture_mode
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+from sglang.srt.model_loader.weight_utils import (
+ default_weight_loader,
+ sharded_weight_loader,
+)
+from sglang.srt.models.qwen2_moe import Qwen2MoeMLP, Qwen2MoeSparseMoeBlock
+from sglang.srt.utils import add_prefix, is_cuda, make_layers, set_weight_attrs
+
+logger = logging.getLogger(__name__)
+_is_cuda = is_cuda()
+
+import triton
+import triton.language as tl
+
+
+@triton.jit
+def fused_qkvzba_split_reshape_cat_kernel(
+ mixed_qkv,
+ z,
+ b,
+ a,
+ mixed_qkvz,
+ mixed_ba,
+ NUM_HEADS_QK: tl.constexpr,
+ NUM_HEADS_V: tl.constexpr,
+ HEAD_QK: tl.constexpr,
+ HEAD_V: tl.constexpr,
+):
+ i_bs, i_qk = tl.program_id(0), tl.program_id(1)
+ QKVZ_DIM_T: tl.constexpr = HEAD_QK * 2 + NUM_HEADS_V // NUM_HEADS_QK * HEAD_V * 2
+ BA_DIM_T: tl.constexpr = NUM_HEADS_V // NUM_HEADS_QK * 2
+ QKV_DIM_T: tl.constexpr = HEAD_QK * 2 + NUM_HEADS_V // NUM_HEADS_QK * HEAD_V
+ q_end: tl.constexpr = HEAD_QK
+ blk_q_ptr = (
+ mixed_qkvz
+ + i_bs * NUM_HEADS_QK * QKVZ_DIM_T
+ + i_qk * QKVZ_DIM_T
+ + tl.arange(0, q_end)
+ )
+ k_end: tl.constexpr = q_end + HEAD_QK
+ blk_k_ptr = (
+ mixed_qkvz
+ + i_bs * NUM_HEADS_QK * QKVZ_DIM_T
+ + i_qk * QKVZ_DIM_T
+ + tl.arange(q_end, k_end)
+ )
+ v_end: tl.constexpr = k_end + NUM_HEADS_V // NUM_HEADS_QK * HEAD_V
+ blk_v_ptr = (
+ mixed_qkvz
+ + i_bs * NUM_HEADS_QK * QKVZ_DIM_T
+ + i_qk * QKVZ_DIM_T
+ + tl.arange(k_end, v_end)
+ )
+ z_end: tl.constexpr = v_end + NUM_HEADS_V // NUM_HEADS_QK * HEAD_V
+ blk_z_ptr = (
+ mixed_qkvz
+ + i_bs * NUM_HEADS_QK * QKVZ_DIM_T
+ + i_qk * QKVZ_DIM_T
+ + tl.arange(v_end, z_end)
+ )
+ blk_q_st_ptr = (
+ mixed_qkv
+ + i_bs * NUM_HEADS_QK * QKV_DIM_T
+ + i_qk * HEAD_QK
+ + tl.arange(0, HEAD_QK)
+ )
+ blk_k_st_ptr = (
+ mixed_qkv
+ + i_bs * NUM_HEADS_QK * QKV_DIM_T
+ + NUM_HEADS_QK * HEAD_QK
+ + i_qk * HEAD_QK
+ + tl.arange(0, HEAD_QK)
+ )
+ blk_v_st_ptr = (
+ mixed_qkv
+ + i_bs * NUM_HEADS_QK * QKV_DIM_T
+ + NUM_HEADS_QK * HEAD_QK * 2
+ + i_qk * HEAD_V * NUM_HEADS_V // NUM_HEADS_QK
+ + tl.arange(0, HEAD_V * NUM_HEADS_V // NUM_HEADS_QK)
+ )
+ blk_z_st_ptr = (
+ z
+ + i_bs * NUM_HEADS_V * HEAD_V
+ + i_qk * HEAD_V * NUM_HEADS_V // NUM_HEADS_QK
+ + tl.arange(0, HEAD_V * NUM_HEADS_V // NUM_HEADS_QK)
+ )
+ tl.store(blk_q_st_ptr, tl.load(blk_q_ptr))
+ tl.store(blk_k_st_ptr, tl.load(blk_k_ptr))
+ tl.store(blk_v_st_ptr, tl.load(blk_v_ptr))
+ tl.store(blk_z_st_ptr, tl.load(blk_z_ptr))
+ b_end: tl.constexpr = NUM_HEADS_V // NUM_HEADS_QK
+ a_end: tl.constexpr = b_end + NUM_HEADS_V // NUM_HEADS_QK
+ for i in tl.static_range(b_end):
+ blk_b_ptr = mixed_ba + i_bs * NUM_HEADS_QK * BA_DIM_T + i_qk * BA_DIM_T + i
+ blk_b_st_ptr = b + i_bs * NUM_HEADS_V + i_qk * NUM_HEADS_V // NUM_HEADS_QK + i
+ tl.store(blk_b_st_ptr, tl.load(blk_b_ptr))
+ for i in tl.static_range(b_end, a_end):
+ blk_a_ptr = mixed_ba + i_bs * NUM_HEADS_QK * BA_DIM_T + i_qk * BA_DIM_T + i
+ blk_a_st_ptr = (
+ a + i_bs * NUM_HEADS_V + i_qk * NUM_HEADS_V // NUM_HEADS_QK + (i - b_end)
+ )
+ tl.store(blk_a_st_ptr, tl.load(blk_a_ptr))
+
+
+def fused_qkvzba_split_reshape_cat(
+ mixed_qkvz,
+ mixed_ba,
+ num_heads_qk,
+ num_heads_v,
+ head_qk,
+ head_v,
+):
+ batch, seq_len = mixed_qkvz.shape[0], 1
+ qkv_dim_t = num_heads_qk * head_qk * 2 + num_heads_v * head_v
+ mixed_qkv = torch.empty(
+ [batch * seq_len, qkv_dim_t],
+ dtype=mixed_qkvz.dtype,
+ device=mixed_qkvz.device,
+ )
+ z = torch.empty(
+ [batch * seq_len, num_heads_v, head_v],
+ dtype=mixed_qkvz.dtype,
+ device=mixed_qkvz.device,
+ )
+ b = torch.empty(
+ [batch * seq_len, num_heads_v],
+ dtype=mixed_ba.dtype,
+ device=mixed_ba.device,
+ )
+ a = torch.empty_like(b)
+ grid = (batch * seq_len, num_heads_qk)
+ fused_qkvzba_split_reshape_cat_kernel[grid](
+ mixed_qkv,
+ z,
+ b,
+ a,
+ mixed_qkvz,
+ mixed_ba,
+ num_heads_qk,
+ num_heads_v,
+ head_qk,
+ head_v,
+ num_warps=1,
+ num_stages=3,
+ )
+ return mixed_qkv, z, b, a
+
+
+# g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias)
+@triton.jit
+def fused_gdn_gating_kernel(
+ g,
+ A_log,
+ a,
+ dt_bias,
+ seq_len,
+ NUM_HEADS: tl.constexpr,
+ beta: tl.constexpr,
+ threshold: tl.constexpr,
+ BLK_HEADS: tl.constexpr,
+):
+ i_b, i_s, i_d = tl.program_id(0), tl.program_id(1), tl.program_id(2)
+ head_off = i_d * BLK_HEADS + tl.arange(0, BLK_HEADS)
+ off = i_b * seq_len * NUM_HEADS + i_s * NUM_HEADS + head_off
+ mask = head_off < NUM_HEADS
+ blk_A_log = tl.load(A_log + head_off, mask=mask)
+ blk_a = tl.load(a + off, mask=mask)
+ blk_bias = tl.load(dt_bias + head_off, mask=mask)
+ x = blk_a.to(tl.float32) + blk_bias.to(tl.float32)
+ softplus_x = tl.where(
+ beta * x <= threshold, (1 / beta) * tl.log(1 + tl.exp(beta * x)), x
+ )
+ blk_g = -tl.exp(blk_A_log.to(tl.float32)) * softplus_x
+ tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask)
+
+
+def fused_gdn_gating(
+ A_log: torch.Tensor,
+ a: torch.Tensor,
+ dt_bias: torch.Tensor,
+ beta: float = 1.0,
+ threshold: float = 20.0,
+) -> torch.Tensor:
+ batch, num_heads = a.shape
+ seq_len = 1
+ grid = (batch, seq_len, triton.cdiv(num_heads, 8))
+ g = torch.empty_like(a, dtype=torch.float32)
+ fused_gdn_gating_kernel[grid](
+ g, A_log, a, dt_bias, seq_len, num_heads, beta, threshold, 8, num_warps=1
+ )
+ return g
+
+
+class Qwen3GatedDeltaNet(nn.Module):
+ def __init__(
+ self,
+ config: Qwen3NextConfig,
+ layer_id: int,
+ alt_stream: Optional[torch.cuda.Stream] = None,
+ ) -> None:
+ super().__init__()
+ self.config = config
+ self.attn_tp_rank = get_attention_tp_rank()
+ self.attn_tp_size = get_attention_tp_size()
+ self.hidden_size = config.hidden_size
+ self.num_v_heads = config.linear_num_value_heads
+ self.num_k_heads = config.linear_num_key_heads
+ self.head_k_dim = config.linear_key_head_dim
+ self.head_v_dim = config.linear_value_head_dim
+ self.key_dim = self.head_k_dim * self.num_k_heads
+ self.value_dim = self.head_v_dim * self.num_v_heads
+ self.alt_stream = alt_stream
+
+ self.conv_kernel_size = config.linear_conv_kernel_dim
+ self.layer_id = layer_id
+ self.activation = config.hidden_act
+ self.layer_norm_epsilon = config.rms_norm_eps
+
+ # QKV
+ self.conv_dim = self.key_dim * 2 + self.value_dim
+ self.conv1d = ColumnParallelLinear(
+ input_size=self.conv_kernel_size,
+ output_size=self.conv_dim,
+ bias=False,
+ quant_config=None,
+ tp_rank=self.attn_tp_rank,
+ tp_size=self.attn_tp_size,
+ )
+ self.conv1d.weight.data = self.conv1d.weight.data.unsqueeze(1)
+ # projection of the input hidden states
+ projection_size_qkvz = self.key_dim * 2 + self.value_dim * 2
+ projection_size_ba = self.num_v_heads * 2
+
+ self.in_proj_qkvz = ColumnParallelLinear(
+ input_size=self.hidden_size,
+ output_size=projection_size_qkvz,
+ bias=False,
+ tp_rank=self.attn_tp_rank,
+ tp_size=self.attn_tp_size,
+ )
+ self.in_proj_ba = ColumnParallelLinear(
+ input_size=self.hidden_size,
+ output_size=projection_size_ba,
+ bias=False,
+ tp_rank=self.attn_tp_rank,
+ tp_size=self.attn_tp_size,
+ )
+
+ query_key_settings = (self.key_dim, 0, False)
+ value_settings = (self.value_dim, 0, False)
+
+ delattr(self.conv1d.weight, "weight_loader")
+ set_weight_attrs(
+ self.conv1d.weight,
+ {
+ "weight_loader": mamba_v2_sharded_weight_loader(
+ [
+ query_key_settings,
+ query_key_settings,
+ value_settings,
+ ],
+ self.attn_tp_size,
+ self.attn_tp_rank,
+ )
+ },
+ )
+
+ # selective projection used to make dt, B and C input dependent
+
+ # time step projection (discretization)
+ # instantiate once and copy inv_dt in init_weights of PretrainedModel
+ self.dt_bias = nn.Parameter(torch.ones(self.num_v_heads // self.attn_tp_size))
+
+ A = torch.empty(
+ divide(self.num_v_heads, self.attn_tp_size), dtype=torch.float32
+ ).uniform_(0, 16)
+ self.A_log = nn.Parameter(torch.log(A))
+ self.A_log._no_weight_decay = True
+
+ set_weight_attrs(self.A_log, {"weight_loader": sharded_weight_loader(0)})
+ set_weight_attrs(self.dt_bias, {"weight_loader": sharded_weight_loader(0)})
+
+ self.norm = RMSNormGated(
+ self.head_v_dim,
+ eps=self.layer_norm_epsilon,
+ group_size=None,
+ norm_before_gate=True,
+ device=torch.cuda.current_device(),
+ dtype=config.torch_dtype,
+ )
+
+ self.out_proj = RowParallelLinear(
+ self.value_dim,
+ self.hidden_size,
+ bias=False,
+ input_is_parallel=True,
+ reduce_results=False,
+ tp_rank=self.attn_tp_rank,
+ tp_size=self.attn_tp_size,
+ )
+
+ def fix_query_key_value_ordering(self, mixed_qkvz, mixed_ba):
+ """
+ Derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
+ """
+ new_tensor_shape_qkvz = mixed_qkvz.size()[:-1] + (
+ self.num_k_heads // self.attn_tp_size,
+ (
+ self.head_k_dim
+ + self.head_k_dim
+ + (self.head_v_dim + self.head_v_dim)
+ * self.num_v_heads
+ // self.num_k_heads
+ ),
+ )
+ new_tensor_shape_ba = mixed_ba.size()[:-1] + (
+ self.num_k_heads // self.attn_tp_size,
+ 2 * self.num_v_heads // self.num_k_heads,
+ )
+
+ mixed_qkvz = mixed_qkvz.view(*new_tensor_shape_qkvz)
+ mixed_ba = mixed_ba.view(*new_tensor_shape_ba)
+
+ split_arg_list_qkvz = [
+ self.head_k_dim,
+ self.head_k_dim,
+ (self.num_v_heads // self.num_k_heads * self.head_v_dim),
+ (self.num_v_heads // self.num_k_heads * self.head_v_dim),
+ ]
+ split_arg_list_ba = [
+ self.num_v_heads // self.num_k_heads,
+ self.num_v_heads // self.num_k_heads,
+ ]
+
+ # [b, sq, ng, (hn + hn + np/ng * hn + np/ng + np/ng)]
+ # --> [b, sq, ng, hn], [b, sq, ng, hn], [b, sq, ng, np/ng * hn], [b, sq, ng, np/ng * hn], [b, sq, ng, np/ng], [b, sq, ng, np/ng]
+ (query, key, value, z) = torch.split(mixed_qkvz, split_arg_list_qkvz, dim=2)
+ (b, a) = torch.split(mixed_ba, split_arg_list_ba, dim=2)
+
+ # [b, sq, ng, np/ng * hn] -> [b, sq, np, hn]
+ value = value.reshape(value.size(0), -1, self.head_v_dim)
+ z = z.reshape(z.size(0), -1, self.head_v_dim)
+ b = b.reshape(b.size(0), self.num_v_heads // self.attn_tp_size)
+ a = a.reshape(a.size(0), self.num_v_heads // self.attn_tp_size)
+
+ return query, key, value, z, b, a
+
+ def _forward_input_proj(self, hidden_states: torch.Tensor):
+ DUAL_STREAM_TOKEN_THRESHOLD = 1024
+ seq_len, _ = hidden_states.shape
+ if seq_len < DUAL_STREAM_TOKEN_THRESHOLD:
+ current_stream = torch.cuda.current_stream()
+ self.alt_stream.wait_stream(current_stream)
+ projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)
+ with torch.cuda.stream(self.alt_stream):
+ projected_states_ba, _ = self.in_proj_ba(hidden_states)
+ current_stream.wait_stream(self.alt_stream)
+ else:
+ projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)
+ projected_states_ba, _ = self.in_proj_ba(hidden_states)
+ return projected_states_qkvz, projected_states_ba
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ ):
+ seq_len, _ = hidden_states.shape
+ is_cuda_graph = forward_batch.forward_mode.is_cuda_graph()
+
+ projected_states_qkvz, projected_states_ba = self._forward_input_proj(
+ hidden_states
+ )
+
+ if self.num_v_heads // self.num_k_heads in [1, 2, 4] and is_cuda_graph:
+ mixed_qkv, z, b, a = fused_qkvzba_split_reshape_cat(
+ projected_states_qkvz,
+ projected_states_ba,
+ triton.cdiv(self.num_k_heads, self.attn_tp_size),
+ triton.cdiv(self.num_v_heads, self.attn_tp_size),
+ self.head_k_dim,
+ self.head_v_dim,
+ )
+ else:
+ query, key, value, z, b, a = self.fix_query_key_value_ordering(
+ projected_states_qkvz, projected_states_ba
+ )
+ query, key, value = map(
+ lambda x: x.reshape(x.shape[0], -1), (query, key, value)
+ )
+ mixed_qkv = torch.cat((query, key, value), dim=-1)
+ # mixed_qkv = rearrange(mixed_qkv, "b l d -> b d l")
+
+ # 2. Convolution sequence transformation
+ conv_weights = self.conv1d.weight.view(
+ self.conv1d.weight.size(0), self.conv1d.weight.size(2)
+ )
+
+ kwargs = {
+ "mixed_qkv": mixed_qkv,
+ "conv_weights": conv_weights,
+ "bias": self.conv1d.bias,
+ "activation": self.activation,
+ "key_dim": self.key_dim,
+ "value_dim": self.value_dim,
+ "attention_tp_size": self.attn_tp_size,
+ "head_k_dim": self.head_k_dim,
+ "head_v_dim": self.head_v_dim,
+ "a": a,
+ "b": b,
+ "A_log": self.A_log,
+ "dt_bias": self.dt_bias,
+ "layer_id": self.layer_id,
+ "seq_len": seq_len,
+ "z": z,
+ }
+
+ core_attn_out = forward_batch.attn_backend.forward(
+ q=None,
+ k=None,
+ v=None,
+ layer=None,
+ forward_batch=forward_batch,
+ **kwargs,
+ )
+
+ z_shape_og = z.shape
+ # reshape input data into 2D tensor
+ core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
+ z = z.reshape(-1, z.shape[-1])
+ core_attn_out = self.norm(core_attn_out, z)
+ core_attn_out = core_attn_out.reshape(z_shape_og)
+ core_attn_out = core_attn_out.reshape(*core_attn_out.shape[:-2], -1)
+
+ output, _ = self.out_proj(core_attn_out)
+ return output
+
+
+class Qwen3HybridLinearDecoderLayer(nn.Module):
+
+ def __init__(
+ self,
+ config: Qwen3NextConfig,
+ layer_id: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ alt_stream: Optional[torch.cuda.Stream] = None,
+ ) -> None:
+ super().__init__()
+ self.config = config
+ self.linear_attn = Qwen3GatedDeltaNet(config, layer_id, alt_stream)
+
+ # Qwen3Next all layers are sparse and have no nextn now
+ self.is_layer_sparse = True
+ is_previous_layer_sparse = True
+ self.layer_id = layer_id
+
+ self.layer_scatter_modes = LayerScatterModes.init_new(
+ layer_id=layer_id,
+ num_layers=config.num_hidden_layers,
+ is_layer_sparse=self.is_layer_sparse,
+ is_previous_layer_sparse=is_previous_layer_sparse,
+ )
+
+ if self.is_layer_sparse:
+ self.mlp = Qwen2MoeSparseMoeBlock(
+ layer_id=layer_id,
+ config=config,
+ quant_config=quant_config,
+ alt_stream=alt_stream,
+ )
+ else:
+ self.mlp = Qwen2MoeMLP(
+ hidden_size=config.hidden_size,
+ intermediate_size=config.intermediate_size,
+ hidden_act=config.hidden_act,
+ quant_config=quant_config,
+ )
+ self.input_layernorm = GemmaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.post_attention_layernorm = GemmaRMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps
+ )
+ self.layer_communicator = LayerCommunicator(
+ layer_scatter_modes=self.layer_scatter_modes,
+ input_layernorm=self.input_layernorm,
+ post_attention_layernorm=self.post_attention_layernorm,
+ allow_reduce_scatter=True,
+ )
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ **kwargs,
+ ):
+ forward_batch = kwargs.get("forward_batch", None)
+
+ hidden_states, residual = self.layer_communicator.prepare_attn(
+ hidden_states, residual, forward_batch
+ )
+
+ if not forward_batch.forward_mode.is_idle():
+ hidden_states = self.linear_attn(
+ hidden_states,
+ forward_batch,
+ )
+ # Fully Connected
+ hidden_states, residual = self.layer_communicator.prepare_mlp(
+ hidden_states, residual, forward_batch
+ )
+
+ use_reduce_scatter = self.layer_communicator.should_use_reduce_scatter(
+ forward_batch
+ )
+ hidden_states = self.mlp(hidden_states, forward_batch, use_reduce_scatter)
+
+ hidden_states, residual = self.layer_communicator.postprocess_layer(
+ hidden_states, residual, forward_batch
+ )
+
+ return hidden_states, residual
+
+
+class Qwen3HybridAttentionDecoderLayer(nn.Module):
+
+ def __init__(
+ self,
+ config: Qwen3NextConfig,
+ layer_id: int,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ alt_stream: Optional[torch.cuda.Stream] = None,
+ ) -> None:
+ super().__init__()
+ self.config = config
+ self.hidden_size = config.hidden_size
+ self.attn_tp_rank = get_attention_tp_rank()
+ self.attn_tp_size = get_attention_tp_size()
+ self.total_num_heads = config.num_attention_heads
+ assert self.total_num_heads % self.attn_tp_size == 0
+ self.num_heads = self.total_num_heads // self.attn_tp_size
+ self.total_num_kv_heads = config.num_key_value_heads
+ if self.total_num_kv_heads >= self.attn_tp_size:
+ # Number of KV heads is greater than TP size, so we partition
+ # the KV heads across multiple tensor parallel GPUs.
+ assert self.total_num_kv_heads % self.attn_tp_size == 0
+ else:
+ # Number of KV heads is less than TP size, so we replicate
+ # the KV heads across multiple tensor parallel GPUs.
+ assert self.attn_tp_size % self.total_num_kv_heads == 0
+ self.num_kv_heads = max(1, self.total_num_kv_heads // self.attn_tp_size)
+ self.head_dim = config.head_dim or (self.hidden_size // self.num_heads)
+ self.q_size = self.num_heads * self.head_dim
+ self.kv_size = self.num_kv_heads * self.head_dim
+ self.scaling = self.head_dim**-0.5
+ self.rope_theta = getattr(config, "rope_theta", 10000)
+ self.max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
+ self.rope_scaling = getattr(config, "rope_scaling", None)
+ self.partial_rotary_factor = config.partial_rotary_factor
+ self.layer_id = layer_id
+
+ self.attn_output_gate = getattr(config, "attn_output_gate", True)
+ if self.attn_output_gate:
+ logger.warning_once("using attn output gate!")
+
+ self.rotary_emb = get_rope(
+ head_size=self.head_dim,
+ rotary_dim=self.head_dim,
+ max_position=self.max_position_embeddings,
+ rope_scaling=self.rope_scaling,
+ base=self.rope_theta,
+ partial_rotary_factor=self.partial_rotary_factor,
+ is_neox_style=True,
+ dtype=torch.get_default_dtype(), # see impl of get_rope
+ )
+
+ self.qkv_proj = QKVParallelLinear(
+ config.hidden_size,
+ self.head_dim,
+ self.total_num_heads * (1 + self.attn_output_gate),
+ self.total_num_kv_heads,
+ bias=False,
+ quant_config=quant_config,
+ tp_rank=self.attn_tp_rank,
+ tp_size=self.attn_tp_size,
+ )
+
+ self.o_proj = RowParallelLinear(
+ self.total_num_heads * self.head_dim,
+ config.hidden_size,
+ bias=False,
+ quant_config=quant_config,
+ reduce_results=False,
+ tp_rank=self.attn_tp_rank,
+ tp_size=self.attn_tp_size,
+ )
+
+ self.attn = RadixAttention(
+ self.num_heads,
+ self.head_dim,
+ self.scaling,
+ num_kv_heads=self.num_kv_heads,
+ layer_id=layer_id,
+ prefix=f"{prefix}.attn",
+ orig_context_len=getattr(
+ config, "orig_context_len", self.max_position_embeddings
+ ),
+ rope=self.rotary_emb,
+ )
+
+ # Qwen3Next all layers are sparse and have no nextn now
+ self.is_layer_sparse = True
+ is_previous_layer_sparse = True
+
+ self.layer_scatter_modes = LayerScatterModes.init_new(
+ layer_id=layer_id,
+ num_layers=config.num_hidden_layers,
+ is_layer_sparse=self.is_layer_sparse,
+ is_previous_layer_sparse=is_previous_layer_sparse,
+ )
+
+ if self.is_layer_sparse:
+ self.mlp = Qwen2MoeSparseMoeBlock(
+ layer_id=layer_id,
+ config=config,
+ quant_config=quant_config,
+ alt_stream=alt_stream,
+ )
+ else:
+ self.mlp = Qwen2MoeMLP(
+ hidden_size=config.hidden_size,
+ intermediate_size=config.intermediate_size,
+ hidden_act=config.hidden_act,
+ quant_config=quant_config,
+ )
+ self.input_layernorm = GemmaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.post_attention_layernorm = GemmaRMSNorm(
+ config.hidden_size, eps=config.rms_norm_eps
+ )
+
+ self.q_norm = GemmaRMSNorm(self.head_dim, eps=config.rms_norm_eps)
+ self.k_norm = GemmaRMSNorm(self.head_dim, eps=config.rms_norm_eps)
+
+ self.layer_communicator = LayerCommunicator(
+ layer_scatter_modes=self.layer_scatter_modes,
+ input_layernorm=self.input_layernorm,
+ post_attention_layernorm=self.post_attention_layernorm,
+ allow_reduce_scatter=True,
+ )
+
+ self.alt_stream = alt_stream
+
+ def _apply_qk_norm(
+ self, q: torch.Tensor, k: torch.Tensor
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
+ # overlap qk norm
+ if self.alt_stream is not None and get_is_capture_mode():
+ current_stream = torch.cuda.current_stream()
+ self.alt_stream.wait_stream(current_stream)
+ q_by_head = q.reshape(-1, self.head_dim)
+ q_by_head = self.q_norm(q_by_head)
+ with torch.cuda.stream(self.alt_stream):
+ k_by_head = k.reshape(-1, self.head_dim)
+ k_by_head = self.k_norm(k_by_head)
+ current_stream.wait_stream(self.alt_stream)
+ else:
+ q_by_head = q.reshape(-1, self.head_dim)
+ q_by_head = self.q_norm(q_by_head)
+ k_by_head = k.reshape(-1, self.head_dim)
+ k_by_head = self.k_norm(k_by_head)
+ q = q_by_head.view(q.shape)
+ k = k_by_head.view(k.shape)
+ return q, k
+
+ def self_attention(
+ self,
+ positions: torch.Tensor,
+ hidden_states: torch.Tensor,
+ forward_batch: ForwardBatch,
+ ) -> torch.Tensor:
+ qkv, _ = self.qkv_proj(hidden_states)
+
+ if self.attn_output_gate:
+ q_gate, k, v = qkv.split(
+ [self.q_size * 2, self.kv_size, self.kv_size], dim=-1
+ )
+ orig_shape = q_gate.shape[:-1]
+ q_gate = q_gate.view(*orig_shape, self.num_heads, -1)
+ q, gate = torch.chunk(q_gate, 2, dim=-1)
+ q = q.reshape(*orig_shape, -1)
+ gate = gate.reshape(*orig_shape, -1)
+ else:
+ q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+
+ q, k = self._apply_qk_norm(q, k)
+
+ q, k = self.rotary_emb(positions, q, k)
+
+ attn_output = self.attn(q, k, v, forward_batch)
+
+ if self.attn_output_gate:
+ gate = torch.sigmoid(gate)
+ attn_output = attn_output * gate
+
+ output, _ = self.o_proj(attn_output)
+ return output
+
+ def forward(
+ self,
+ positions: torch.Tensor,
+ hidden_states: torch.Tensor,
+ residual: Optional[torch.Tensor],
+ forward_batch: ForwardBatch,
+ **kwargs: Any,
+ ):
+ hidden_states, residual = self.layer_communicator.prepare_attn(
+ hidden_states, residual, forward_batch
+ )
+
+ if not forward_batch.forward_mode.is_idle():
+ hidden_states = self.self_attention(
+ positions=positions,
+ hidden_states=hidden_states,
+ forward_batch=forward_batch,
+ )
+
+ # Fully Connected
+ hidden_states, residual = self.layer_communicator.prepare_mlp(
+ hidden_states, residual, forward_batch
+ )
+ use_reduce_scatter = self.layer_communicator.should_use_reduce_scatter(
+ forward_batch
+ )
+ hidden_states = self.mlp(hidden_states, forward_batch, use_reduce_scatter)
+
+ hidden_states, residual = self.layer_communicator.postprocess_layer(
+ hidden_states, residual, forward_batch
+ )
+
+ return hidden_states, residual
+
+
+ALL_DECODER_LAYER_TYPES = {
+ "attention": Qwen3HybridAttentionDecoderLayer,
+ "linear_attention": Qwen3HybridLinearDecoderLayer,
+}
+
+
+class Qwen3NextModel(nn.Module):
+ def __init__(
+ self,
+ config: Qwen3NextConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ self.config = config
+
+ alt_stream = torch.cuda.Stream() if _is_cuda else None
+
+ self.embed_tokens = VocabParallelEmbedding(
+ config.vocab_size,
+ config.hidden_size,
+ org_num_embeddings=config.vocab_size,
+ enable_tp=not is_dp_attention_enabled(),
+ )
+
+ def get_layer(idx: int, prefix: str):
+ layer_class = ALL_DECODER_LAYER_TYPES[config.layers_block_type[idx]]
+ return layer_class(
+ config,
+ idx,
+ quant_config=quant_config,
+ prefix=prefix,
+ alt_stream=alt_stream,
+ )
+
+ self.layers = make_layers(
+ config.num_hidden_layers, get_layer, prefix=f"{prefix}.layers"
+ )
+
+ self.norm = GemmaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.infer_count = 0
+
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ # mamba_cache_params: MambaCacheParams,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ ) -> torch.Tensor:
+
+ # pass a sequence index tensor, that is required for
+ # proper continuous batching computation including
+ # chunked prefill
+ if inputs_embeds is not None:
+ hidden_states = inputs_embeds
+ else:
+ hidden_states = self.embed_tokens(input_ids)
+
+ residual = None
+ for i in range(len(self.layers)):
+ layer = self.layers[i]
+ hidden_states, residual = layer(
+ layer_id=i,
+ positions=positions,
+ hidden_states=hidden_states,
+ residual=residual,
+ forward_batch=forward_batch,
+ )
+
+ if not forward_batch.forward_mode.is_idle():
+ if residual is None:
+ hidden_states = self.norm(hidden_states)
+ else:
+ hidden_states, _ = self.norm(hidden_states, residual)
+
+ return hidden_states
+
+
+class HybridLayerType(enum.Enum):
+ full_attention = "attention"
+ swa_attention = "swa_attention"
+ linear_attention = "linear_attention"
+ mamba2 = "mamba"
+
+
+class Qwen3NextForCausalLM(nn.Module):
+ fall_back_to_pt_during_load = False
+
+ def __init__(
+ self,
+ config: Qwen3NextConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ super().__init__()
+ self.config = config
+ self.pp_group = get_pp_group()
+ assert self.pp_group.is_first_rank and self.pp_group.is_last_rank
+ self.quant_config = quant_config
+ self.model = Qwen3NextModel(
+ config, quant_config, prefix=add_prefix("model", prefix)
+ )
+ self.lm_head = ParallelLMHead(
+ config.vocab_size,
+ config.hidden_size,
+ quant_config=quant_config,
+ org_num_embeddings=config.vocab_size,
+ prefix=add_prefix("lm_head", prefix),
+ use_attn_tp_group=global_server_args_dict["enable_dp_lm_head"],
+ )
+ self.lm_head = self.lm_head.float()
+ self.logits_processor = LogitsProcessor(config)
+
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ **kwargs,
+ ):
+ hidden_states = self.model(input_ids, positions, forward_batch, inputs_embeds)
+
+ return self.logits_processor(
+ input_ids, hidden_states, self.lm_head, forward_batch
+ )
+
+ def get_embed_and_head(self):
+ return self.model.embed_tokens.weight, self.lm_head.weight
+
+ def set_embed_and_head(self, embed, head):
+ del self.model.embed_tokens.weight
+ del self.lm_head.weight
+ self.model.embed_tokens.weight = embed
+ self.lm_head.weight = head
+ torch.cuda.empty_cache()
+ torch.cuda.synchronize()
+
+ def load_weights(
+ self, weights: Iterable[Tuple[str, torch.Tensor]], is_mtp: bool = False
+ ) -> Set[str]:
+ stacked_params_mapping = [
+ # (param_name, shard_name, shard_id)
+ ("qkv_proj", "q_proj", "q"),
+ ("qkv_proj", "k_proj", "k"),
+ ("qkv_proj", "v_proj", "v"),
+ ("gate_up_proj", "gate_proj", 0),
+ ("gate_up_proj", "up_proj", 1),
+ ]
+
+ # Params for weights, fp8 weight scales, fp8 activation scales
+ # (param_name, weight_name, expert_id, shard_id)
+ expert_params_mapping = get_moe_impl_class().make_expert_params_mapping(
+ ckpt_gate_proj_name="gate_proj",
+ ckpt_down_proj_name="down_proj",
+ ckpt_up_proj_name="up_proj",
+ num_experts=self.config.num_experts,
+ )
+
+ params_dict = dict(self.named_parameters())
+ loaded_params: Set[str] = set()
+ for name, loaded_weight in weights:
+
+ if is_mtp:
+
+ if "mtp" not in name:
+ continue
+
+ if name in [
+ "mtp.fc.weight",
+ "mtp.pre_fc_norm_embedding.weight",
+ "mtp.pre_fc_norm_hidden.weight",
+ ]:
+ name = name.replace("mtp.", "")
+ else:
+ name = name.replace("mtp", "model")
+
+ if not is_mtp and "mtp" in name:
+ continue
+
+ if "rotary_emb.inv_freq" in name:
+ continue
+
+ if ".self_attn." in name:
+ name = name.replace(".self_attn", "")
+
+ for param_name, weight_name, shard_id in stacked_params_mapping:
+ if weight_name not in name:
+ continue
+
+ # TODO(fix mtp loading)
+ if "mlp.experts" in name:
+ continue
+
+ name = name.replace(weight_name, param_name)
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ # Skip layers on other devices.
+ # if is_pp_missing_parameter(name, self):
+ # continue
+ if name not in params_dict:
+ continue
+ param = params_dict[name]
+ weight_loader = getattr(param, "weight_loader")
+ weight_loader(param, loaded_weight, shard_id)
+ break
+ else:
+ for mapping in expert_params_mapping:
+ param_name, weight_name, expert_id, shard_id = mapping
+ if weight_name not in name:
+ continue
+ name = name.replace(weight_name, param_name)
+ # Skip layers on other devices.
+ # if is_pp_missing_parameter(name, self):
+ # continue
+ # Skip loading extra bias for GPTQ models.
+ if (
+ name.endswith(".bias") or name.endswith("_bias")
+ ) and name not in params_dict:
+ continue
+ param = params_dict[name]
+
+ weight_loader = getattr(param, "weight_loader")
+ weight_loader(
+ param,
+ loaded_weight,
+ name,
+ shard_id=shard_id,
+ expert_id=expert_id,
+ )
+ break
+ else:
+ # Skip loading extra bias for GPTQ models.
+ if name.endswith(".bias") and name not in params_dict:
+ continue
+ # if is_pp_missing_parameter(name, self):
+ # continue
+
+ param = params_dict[name]
+ weight_loader = getattr(
+ param, "weight_loader", default_weight_loader
+ )
+ weight_loader(param, loaded_weight)
+ loaded_params.add(name)
+ return loaded_params
+
+ @classmethod
+ def get_model_config_for_expert_location(cls, config):
+ return ModelConfigForExpertLocation(
+ num_layers=config.num_hidden_layers,
+ num_logical_experts=config.num_experts,
+ num_groups=None,
+ )
+
+
+EntryClass = Qwen3NextForCausalLM
diff --git a/python/sglang/srt/models/qwen3_next_mtp.py b/python/sglang/srt/models/qwen3_next_mtp.py
new file mode 100644
index 00000000000..a9da0867df9
--- /dev/null
+++ b/python/sglang/srt/models/qwen3_next_mtp.py
@@ -0,0 +1,109 @@
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Inference-only Qwen3Next MTP Speculative Decoding."""
+import logging
+from typing import Iterable, Optional, Tuple
+
+import torch
+from torch import nn
+from transformers import PretrainedConfig
+
+from sglang.srt.distributed import get_pp_group, get_tensor_model_parallel_world_size
+from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
+from sglang.srt.layers.logits_processor import LogitsProcessor
+from sglang.srt.layers.quantization.base_config import QuantizationConfig
+from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
+from sglang.srt.managers.schedule_batch import global_server_args_dict
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch
+from sglang.srt.models.qwen3_moe import Qwen3MoeModel
+from sglang.srt.models.qwen3_next import Qwen3NextForCausalLM, Qwen3NextModel
+from sglang.srt.utils import add_prefix
+
+logger = logging.getLogger(__name__)
+
+
+class Qwen3NextForCausalLMMTP(Qwen3NextForCausalLM):
+
+ def __init__(
+ self,
+ config: PretrainedConfig,
+ quant_config: Optional[QuantizationConfig] = None,
+ prefix: str = "",
+ ) -> None:
+ nn.Module.__init__(self)
+ self.config = config
+ self.tp_size = get_tensor_model_parallel_world_size()
+ self.quant_config = quant_config
+ # if not set, model load will be broken in Qwen3NextForCausalLM load_weights()
+ self.pp_group = get_pp_group()
+ # self.determine_num_fused_shared_experts("Qwen3NextForCausalLMMTP")
+
+ # currently based on the provided ckpt, we:
+ # (1) do not use_dedicated_mtp_embeddings provided in ckpt since not provided and directly use the target model embeddings
+ # (2) hardcode bias=False since not provided
+ self.fc = nn.Linear(2 * config.hidden_size, config.hidden_size, bias=False)
+ RMSNorm_cls = GemmaRMSNorm
+ self.pre_fc_norm_embedding = RMSNorm_cls(
+ config.hidden_size, config.rms_norm_eps
+ )
+ self.pre_fc_norm_hidden = RMSNorm_cls(config.hidden_size, config.rms_norm_eps)
+ config.num_hidden_layers = 1
+ config.full_attention_interval = 1
+ self.model = Qwen3NextModel(
+ config, quant_config, prefix=add_prefix("model", prefix)
+ )
+ self.lm_head = ParallelLMHead(
+ config.vocab_size,
+ config.hidden_size,
+ quant_config=quant_config,
+ prefix=add_prefix("model.shared_head.head", prefix),
+ use_attn_tp_group=global_server_args_dict["enable_dp_lm_head"],
+ )
+ self.logits_processor = LogitsProcessor(config)
+
+ @torch.no_grad()
+ def forward(
+ self,
+ input_ids: torch.Tensor,
+ positions: torch.Tensor,
+ forward_batch: ForwardBatch,
+ input_embeds: Optional[torch.Tensor] = None,
+ **kwargs,
+ ):
+ if input_embeds is None:
+ input_embeds = self.model.embed_tokens(input_ids)
+
+ input_embeds = self.pre_fc_norm_embedding(input_embeds)
+ hidden_states = self.pre_fc_norm_hidden(forward_batch.spec_info.hidden_states)
+ hidden_states = self.fc(torch.cat((input_embeds, hidden_states), dim=-1))
+
+ hidden_states = self.model(
+ input_ids,
+ positions,
+ forward_batch,
+ hidden_states,
+ )
+
+ return self.logits_processor(
+ input_ids, hidden_states, self.lm_head, forward_batch
+ )
+
+ def load_weights(
+ self, weights: Iterable[Tuple[str, torch.Tensor]], is_mtp: bool = False
+ ):
+ super().load_weights(weights, is_mtp=True)
+
+
+EntryClass = [Qwen3NextForCausalLMMTP]
diff --git a/python/sglang/srt/models/torch_native_llama.py b/python/sglang/srt/models/torch_native_llama.py
index 630e5feb8a6..00499ce666f 100644
--- a/python/sglang/srt/models/torch_native_llama.py
+++ b/python/sglang/srt/models/torch_native_llama.py
@@ -22,7 +22,7 @@
Here is a quick example to enable TP:
```python
-from sglang.srt.model_parallel import tensor_parallel
+from sglang.srt.layers.model_parallel import tensor_parallel
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
tensor_parallel(model, device_mesh)
diff --git a/python/sglang/srt/models/transformers.py b/python/sglang/srt/models/transformers.py
index a8d33c6aa01..40e7edcaf42 100644
--- a/python/sglang/srt/models/transformers.py
+++ b/python/sglang/srt/models/transformers.py
@@ -213,7 +213,7 @@ def tensor_parallel(self, tp_size: int):
"""
tp_plan = getattr(self.model.config, "base_model_tp_plan", None) or {}
- if not tp_plan and self.tp_size > 1:
+ if not tp_plan and tp_size > 1:
raise ValueError(
f"{type(self.model)} does not support tensor parallel yet!"
)
diff --git a/python/sglang/srt/multimodal/processors/base_processor.py b/python/sglang/srt/multimodal/processors/base_processor.py
index d650535cb0c..cc14f691fb9 100644
--- a/python/sglang/srt/multimodal/processors/base_processor.py
+++ b/python/sglang/srt/multimodal/processors/base_processor.py
@@ -13,7 +13,9 @@
from transformers import BaseImageProcessorFast
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
-from sglang.srt.utils import load_audio, load_image, load_video, logger
+from sglang.srt.utils import is_npu, load_audio, load_image, load_video, logger
+
+_is_npu = is_npu()
@dataclasses.dataclass
@@ -232,7 +234,7 @@ def process_mm_data(
and isinstance(processor.image_processor, BaseImageProcessorFast)
and not self.server_args.disable_fast_image_processor
):
- kwargs["device"] = "cuda"
+ kwargs["device"] = "cuda" if not _is_npu else "npu"
result = processor.__call__(
text=[input_text],
padding=True,
diff --git a/python/sglang/srt/multimodal/processors/glm4v.py b/python/sglang/srt/multimodal/processors/glm4v.py
index 58c55c0f85f..e3c8edc9283 100644
--- a/python/sglang/srt/multimodal/processors/glm4v.py
+++ b/python/sglang/srt/multimodal/processors/glm4v.py
@@ -2,7 +2,6 @@
from typing import List, Union
from decord import VideoReader
-from transformers.video_utils import VideoMetadata
from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration
@@ -66,17 +65,18 @@ async def preprocess_video(self, vr: VideoReader):
total_num_frames = len(vr)
duration = total_num_frames / video_fps if video_fps else 0
- metadata = VideoMetadata(
- total_num_frames=int(total_num_frames),
- fps=float(video_fps),
- duration=float(duration),
- video_backend="decord",
- )
-
# Extract all frames
indices = list(range(total_num_frames))
frames = vr.get_batch(indices).asnumpy()
- metadata.frames_indices = indices
+
+ # Return metadata as dict so transformers can properly create VideoMetadata objects
+ metadata = {
+ "total_num_frames": int(total_num_frames),
+ "fps": float(video_fps),
+ "duration": float(duration),
+ "video_backend": "decord",
+ "frames_indices": indices,
+ }
return frames, metadata
diff --git a/python/sglang/srt/multimodal/processors/internvl.py b/python/sglang/srt/multimodal/processors/internvl.py
index 6ab17b1a9b1..9c20664d6c9 100644
--- a/python/sglang/srt/multimodal/processors/internvl.py
+++ b/python/sglang/srt/multimodal/processors/internvl.py
@@ -2,8 +2,10 @@
import numpy as np
import torch
-from decord import VideoReader, cpu
+import torchvision.transforms as T
+from decord import VideoReader, cpu, gpu
from PIL import Image
+from torchvision.transforms import InterpolationMode
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.interns1 import InternS1ForConditionalGeneration
@@ -44,103 +46,10 @@ def __init__(self, hf_config, server_args, _image_processor, *args, **kwargs):
self.img_start_token_id = tokenizer.convert_tokens_to_ids(self.IMG_START_TOKEN)
self.img_end_token_id = tokenizer.convert_tokens_to_ids(self.IMG_END_TOKEN)
self.mm_tokens = MultimodalSpecialTokens(
- image_token="",
+ image_token="",
image_token_id=tokenizer.convert_tokens_to_ids(self.IMG_CONTEXT_TOKEN),
).build(_image_processor)
- @staticmethod
- def build_transform(input_size):
- IMAGENET_MEAN = (0.485, 0.456, 0.406)
- IMAGENET_STD = (0.229, 0.224, 0.225)
-
- def resize_image(img, size):
- return img.resize((size, size), Image.Resampling.BICUBIC)
-
- def to_tensor(img):
- # Convert PIL Image to numpy array
- img_array = np.array(img).astype(np.float32) / 255.0
- # Convert HWC to CHW format
- img_array = img_array.transpose(2, 0, 1)
- return torch.from_numpy(img_array)
-
- def normalize(tensor, mean, std):
- mean = torch.tensor(mean).view(-1, 1, 1)
- std = torch.tensor(std).view(-1, 1, 1)
- return (tensor - mean) / std
-
- def transform(img):
- img = img.convert("RGB") if img.mode != "RGB" else img
- img = resize_image(img, input_size)
- tensor = to_tensor(img)
- tensor = normalize(tensor, IMAGENET_MEAN, IMAGENET_STD)
- return tensor
-
- return transform
-
- @staticmethod
- def dynamic_preprocess(
- image, min_num=1, max_num=12, image_size=448, use_thumbnail=False
- ):
-
- def find_closest_aspect_ratio(
- aspect_ratio, target_ratios, width, height, image_size
- ):
- best_ratio_diff = float("inf")
- best_ratio = (1, 1)
- area = width * height
- for ratio in target_ratios:
- target_aspect_ratio = ratio[0] / ratio[1]
- ratio_diff = abs(aspect_ratio - target_aspect_ratio)
- if ratio_diff < best_ratio_diff:
- best_ratio_diff = ratio_diff
- best_ratio = ratio
- elif ratio_diff == best_ratio_diff:
- if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
- best_ratio = ratio
- return best_ratio
-
- orig_width, orig_height = image.size
- aspect_ratio = orig_width / orig_height
-
- # calculate the existing image aspect ratio
- target_ratios = set(
- (i, j)
- for n in range(min_num, max_num + 1)
- for i in range(1, n + 1)
- for j in range(1, n + 1)
- if i * j <= max_num and i * j >= min_num
- )
- target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
-
- # find the closest aspect ratio to the target
- target_aspect_ratio = find_closest_aspect_ratio(
- aspect_ratio, target_ratios, orig_width, orig_height, image_size
- )
-
- # calculate the target width and height
- target_width = image_size * target_aspect_ratio[0]
- target_height = image_size * target_aspect_ratio[1]
- blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
-
- # resize the image
- resized_img = image.resize((target_width, target_height))
- processed_images = []
- for i in range(blocks):
- box = (
- (i % (target_width // image_size)) * image_size,
- (i // (target_width // image_size)) * image_size,
- ((i % (target_width // image_size)) + 1) * image_size,
- ((i // (target_width // image_size)) + 1) * image_size,
- )
- # split the image
- split_img = resized_img.crop(box)
- processed_images.append(split_img)
- assert len(processed_images) == blocks
- if use_thumbnail and len(processed_images) != 1:
- thumbnail_img = image.resize((image_size, image_size))
- processed_images.append(thumbnail_img)
- return processed_images
-
@staticmethod
def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
if bound:
@@ -160,27 +69,112 @@ def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
@staticmethod
def load_video(video_path, bound=None, input_size=448, max_num=1, num_segments=32):
- vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
+ try:
+ vr = VideoReader(video_path, ctx=gpu(0), num_threads=1)
+ use_gpu = True
+ except (RuntimeError, OSError) as e:
+ print(
+ f"[WARNING] Load video on gpu decoding failed: {e}. Falling back to CPU."
+ )
+ vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
+ use_gpu = False
+
max_frame = len(vr) - 1
fps = float(vr.get_avg_fps())
- pixel_values_list, num_patches_list = [], []
- transform = InternVLImageProcessor.build_transform(input_size=input_size)
+ pixel_values_list = []
+ num_patches_list = []
frame_indices = InternVLImageProcessor.get_index(
bound, fps, max_frame, first_idx=0, num_segments=num_segments
)
+
for frame_index in frame_indices:
- img = Image.fromarray(vr[frame_index].asnumpy()).convert("RGB")
- img = InternVLImageProcessor.dynamic_preprocess(
- img, image_size=input_size, use_thumbnail=True, max_num=max_num
+ # Load frame
+ frame = vr[frame_index]
+ if use_gpu:
+ img = frame.cuda().permute(2, 0, 1).float() / 255.0
+ else:
+ img_np = frame.asnumpy()
+ img = torch.from_numpy(img_np).permute(2, 0, 1).cuda().float() / 255.0
+
+ # Using the mean and variance of the ImageNet dataset for all input images can lead to accuracy issues, while using the mean and variance of each input image is a more accurate choice.
+ mean = img.mean(dim=[1, 2], keepdim=True)
+ # Prevent division by zero; clamp to minimum value of 1e-6
+ std = img.std(dim=[1, 2], keepdim=True).clamp(min=1e-6)
+ img = (img - mean) / std
+
+ tiles = InternVLImageProcessor.dynamic_preprocess(
+ img, image_size=input_size, max_num=max_num, use_thumbnail=True
)
- pixel_values = [transform(tile) for tile in img]
- pixel_values = torch.stack(pixel_values)
- num_patches_list.append(pixel_values.shape[0])
- pixel_values_list.append(pixel_values)
- pixel_values = torch.cat(pixel_values_list)
+
+ pixel_values_list.append(tiles)
+ num_patches_list.append(tiles.shape[0])
+
+ pixel_values = torch.cat(pixel_values_list, dim=0)
return pixel_values, num_patches_list
+ @staticmethod
+ def dynamic_preprocess(tensor, image_size=448, max_num=12, use_thumbnail=False):
+ C, H, W = tensor.shape
+ aspect_ratio = W / H
+
+ # Generate all possible aspect ratios
+ target_ratios = set(
+ (i, j)
+ for n in range(1, max_num + 1)
+ for i in range(1, n + 1)
+ for j in range(1, n + 1)
+ if i * j <= max_num
+ )
+ target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
+
+ # Find closest ratio
+ best_ratio_diff = float("inf")
+ best_ratio = (1, 1)
+
+ for x, y in target_ratios:
+ target_ar = x / y
+ diff = abs(aspect_ratio - target_ar)
+ blocks = x * y
+ best_blocks = best_ratio[0] * best_ratio[1]
+
+ if diff < best_ratio_diff:
+ best_ratio_diff = diff
+ best_ratio = (x, y)
+ elif diff == best_ratio_diff and blocks > best_blocks:
+ best_ratio = (x, y)
+
+ target_w, target_h = image_size * best_ratio[0], image_size * best_ratio[1]
+ blocks = best_ratio[0] * best_ratio[1]
+
+ # Resize on GPU
+ resized = torch.nn.functional.interpolate(
+ tensor.unsqueeze(0),
+ size=(target_h, target_w),
+ mode="bicubic",
+ align_corners=False,
+ ).squeeze(0)
+
+ # Split into tiles
+ tiles = []
+ for i in range(blocks):
+ x = (i % best_ratio[0]) * image_size
+ y = (i // best_ratio[0]) * image_size
+ tile = resized[:, y : y + image_size, x : x + image_size]
+ tiles.append(tile)
+
+ # Add thumbnail if needed
+ if use_thumbnail and len(tiles) > 1:
+ thumb = torch.nn.functional.interpolate(
+ tensor.unsqueeze(0),
+ size=(image_size, image_size),
+ mode="bicubic",
+ align_corners=False,
+ ).squeeze(0)
+ tiles.append(thumb)
+
+ return torch.stack(tiles).to(torch.bfloat16)
+
async def process_mm_data_async(
self, image_data, input_text, request_obj, **kwargs
):
@@ -191,48 +185,71 @@ async def process_mm_data_async(
discard_alpha_channel=True,
)
- def process_image_internvl(image, input_size=448, max_num=12):
- transform = InternVLImageProcessor.build_transform(input_size=input_size)
- images = InternVLImageProcessor.dynamic_preprocess(
- image, image_size=input_size, use_thumbnail=True, max_num=max_num
- )
- pixel_values = [transform(image) for image in images]
- pixel_values = torch.stack(pixel_values)
- return pixel_values
-
num_patches_list = []
pixel_values = []
+
# Process each input with allocated frames
- for image_index, (image) in enumerate(base_output.images):
+ for image_index, image in enumerate(base_output.images):
try:
# TODO: video input
- raw_image = process_image_internvl(image)
- pixel_value = [raw_image.to(torch.bfloat16)]
- pixel_values += pixel_value
- num_patches = raw_image.shape[0]
- num_patches_list += [num_patches]
-
- except FileNotFoundError as e:
- print(e)
+ # Convert PIL to GPU tensor
+ if isinstance(image, Image.Image):
+ img_np = np.array(image.convert("RGB"))
+ tensor = (
+ torch.from_numpy(img_np).permute(2, 0, 1).cuda().float() / 255.0
+ )
+ else:
+ tensor = image.cuda() # assume already tensor
+
+ # Using the mean and variance of the ImageNet dataset for all input images can lead to accuracy issues, while using the mean and variance of each input image is a more accurate choice.
+ mean = tensor.mean(dim=[1, 2], keepdim=True)
+ # Prevent division by zero; clamp to minimum value of 1e-6
+ std = tensor.std(dim=[1, 2], keepdim=True).clamp(min=1e-6)
+ tensor = (tensor - mean) / std
+ tiles = self.dynamic_preprocess(
+ tensor, image_size=448, max_num=12, use_thumbnail=True
+ )
+
+ pixel_values.append(tiles)
+ num_patches_list.append(tiles.shape[0])
+
+ except Exception as e:
+ print(f"[Error] Failed to process image {image_index}: {e}")
return None
+ # Concatenate all
pixel_values = torch.cat(pixel_values, dim=0)
- for idx, num_patches in enumerate(num_patches_list):
+ original_placeholder = "<<<__IMG_CONTEXT_PLACEHOLDER__>>>"
+ input_text = input_text.replace(self.IMG_CONTEXT_TOKEN, original_placeholder)
+
+ input_text_updated = input_text
+ for num_patches in num_patches_list:
image_tokens = (
self.IMG_START_TOKEN
+ self.IMG_CONTEXT_TOKEN * self.num_image_token * num_patches
+ self.IMG_END_TOKEN
)
- input_text = input_text.replace("", image_tokens, 1)
+ input_text_updated = input_text_updated.replace(
+ original_placeholder, image_tokens, 1
+ )
- input_ids = self.tokenizer(input_text, return_tensors="pt")[
+ input_text_updated = input_text_updated.replace(
+ original_placeholder, self.IMG_CONTEXT_TOKEN
+ )
+
+ # Tokenize
+ input_ids_tensor = self.tokenizer(input_text_updated, return_tensors="pt")[
"input_ids"
].flatten()
+ input_ids = input_ids_tensor.tolist()
+
+ # Get image token offsets
image_offsets = self.get_mm_items_offset(
- input_ids=input_ids,
+ input_ids=input_ids_tensor.to("cuda"),
mm_token_id=self.mm_tokens.image_token_id,
)
+
items = [
MultimodalDataItem(
feature=pixel_values,
@@ -242,7 +259,7 @@ def process_image_internvl(image, input_size=448, max_num=12):
]
return {
- "input_ids": input_ids.tolist(),
+ "input_ids": input_ids,
"mm_items": items,
"im_start_id": self.img_start_token_id,
"im_end_id": self.img_end_token_id,
diff --git a/python/sglang/srt/multimodal/processors/qwen_vl.py b/python/sglang/srt/multimodal/processors/qwen_vl.py
index f67f72b95d8..6181a7622bf 100644
--- a/python/sglang/srt/multimodal/processors/qwen_vl.py
+++ b/python/sglang/srt/multimodal/processors/qwen_vl.py
@@ -243,6 +243,7 @@ async def process_mm_data_async(
resize_tasks = [resize_image_async(image) for image in base_output.images]
base_output.images = await asyncio.gather(*resize_tasks)
+ videos = None
if base_output.videos:
base_output.videos = [
await preprocess_video(video) for video in base_output.videos
diff --git a/python/sglang/srt/offloader.py b/python/sglang/srt/offloader.py
new file mode 100644
index 00000000000..aea7d7f2330
--- /dev/null
+++ b/python/sglang/srt/offloader.py
@@ -0,0 +1,548 @@
+import logging
+import os
+from abc import ABC
+from typing import Callable, Generator, List, Optional
+
+import torch
+from torch.func import functional_call
+
+from sglang.srt.distributed.naive_distributed import (
+ NaiveDistributed,
+ get_naive_distributed,
+ set_naive_distributed,
+)
+from sglang.srt.host_shared_memory import (
+ HostSharedMemoryManager,
+ get_host_shared_memory_manager,
+ set_host_shared_memory_manager,
+)
+from sglang.srt.layers.parameter import ModelWeightParameter
+from sglang.srt.server_args import ServerArgs
+from sglang.srt.utils import MultiprocessingSerializer, is_pin_memory_available
+
+logger = logging.getLogger(__name__)
+
+_SubmoduleAccessor = Callable[[torch.nn.Module], torch.nn.Module]
+_WhitelistParamNamesCreator = Callable[[torch.nn.Module], List[str]]
+
+
+class BaseOffloader(ABC):
+ def wrap_modules(
+ self,
+ all_modules_generator: Generator[torch.nn.Module, None, None],
+ submodule_accessor: Optional[_SubmoduleAccessor] = None,
+ whitelist_param_names_creator: Optional[_WhitelistParamNamesCreator] = None,
+ ):
+ return list(all_modules_generator)
+
+ def post_init(self):
+ pass
+
+
+class NoopOffloader(BaseOffloader):
+ pass
+
+
+# For simplicity use singleton, but can surely support multi instance
+_instance: Optional[BaseOffloader] = NoopOffloader()
+
+
+def get_offloader():
+ assert _instance is not None
+ return _instance
+
+
+def set_offloader(instance: BaseOffloader):
+ global _instance
+ _instance = instance
+
+
+def create_offloader_from_server_args(server_args: ServerArgs, dp_rank: int):
+ if server_args.cpu_offload_gb > 0:
+ return OffloaderV1(
+ cpu_offload_max_bytes=int(server_args.cpu_offload_gb * 1024**3)
+ )
+ if server_args.offload_group_size > 0:
+ assert (
+ server_args.cpu_offload_gb == 0
+ ), "V2 offload does not support cpu_offload_gb yet"
+ return OffloaderV2(
+ group_size=server_args.offload_group_size,
+ num_in_group=server_args.offload_num_in_group,
+ prefetch_step=server_args.offload_prefetch_step,
+ mode=server_args.offload_mode,
+ dp_rank=dp_rank,
+ dp_size=server_args.dp_size,
+ )
+ return NoopOffloader()
+
+
+class OffloaderV1(BaseOffloader):
+ def __init__(self, cpu_offload_max_bytes: int):
+ self._cpu_offload_bytes = 0
+ self._cpu_offload_max_bytes = cpu_offload_max_bytes
+
+ def wrap_modules(
+ self,
+ all_modules_generator: Generator[torch.nn.Module, None, None],
+ submodule_accessor: Optional[_SubmoduleAccessor] = None,
+ whitelist_param_names_creator: Optional[_WhitelistParamNamesCreator] = None,
+ ):
+ return [self.maybe_offload_to_cpu(module) for module in all_modules_generator]
+
+ def maybe_offload_to_cpu(self, module: torch.nn.Module) -> torch.nn.Module:
+ if (params := next(module.parameters(), None)) is None:
+ return module
+
+ device = params.device
+
+ if device == torch.device("cpu"):
+ return module
+
+ if self._cpu_offload_bytes >= self._cpu_offload_max_bytes:
+ return module
+
+ pin_memory = is_pin_memory_available()
+ # offload parameters to CPU
+ # use pin_memory if possible, which helps cudagraph capture speed
+ offloaded_parameters = False
+ for p in module.parameters():
+ if self._cpu_offload_bytes >= self._cpu_offload_max_bytes:
+ # we use per-parameter offloading
+ # one module might have some parameters offloaded and some not
+ break
+
+ # `torch.empty_like` does not support `pin_memory` argument
+ cpu_data = torch.empty_strided(
+ size=p.data.size(),
+ stride=p.data.stride(),
+ dtype=p.data.dtype,
+ layout=p.data.layout,
+ device="cpu",
+ pin_memory=pin_memory,
+ )
+ cpu_data.copy_(p.data)
+ p.data = cpu_data
+ self._cpu_offload_bytes += p.data.numel() * p.data.element_size()
+ offloaded_parameters = True
+
+ if offloaded_parameters:
+ original_forward = module.forward
+
+ def forward(*args, **kwargs):
+ module.forward = original_forward
+ device_state = {
+ # here we blindly call `to(device)`
+ # if the parameter is already on the device, it will be a no-op
+ k: v.to(device, non_blocking=True)
+ for k, v in module.state_dict().items()
+ }
+ output = functional_call(module, device_state, args=args, kwargs=kwargs)
+ module.forward = forward
+ return output
+
+ module.forward = forward
+
+ return module
+
+
+class OffloaderV2(BaseOffloader):
+ def __init__(
+ self,
+ group_size: int,
+ num_in_group: int,
+ prefetch_step: int,
+ mode: str,
+ dp_rank: int,
+ dp_size: int,
+ ):
+ self.group_size = group_size
+ self.num_in_group = num_in_group
+ self.prefetch_step = prefetch_step
+ self.mode = mode
+
+ run_id = os.environ["SGLANG_RUN_ID"]
+
+ # Temporarily init inside Offloader, can move if other modules also need this
+ if self.mode in {"sharded_gpu", "shm_cpu"}:
+ from sglang.srt.distributed import get_tensor_model_parallel_world_size
+
+ assert (
+ get_tensor_model_parallel_world_size() == 1
+ ), "not yet support tp_size!=1"
+ set_naive_distributed(
+ NaiveDistributed(
+ rank=dp_rank,
+ world_size=dp_size,
+ rendezvous=f"/tmp/{run_id}",
+ )
+ )
+ if self.mode in {"shm_cpu"}:
+ set_host_shared_memory_manager(
+ HostSharedMemoryManager(
+ base_name=run_id,
+ )
+ )
+
+ self.offloaders = []
+
+ def wrap_modules(
+ self,
+ all_modules_generator: Generator[torch.nn.Module, None, None],
+ submodule_accessor: Optional[_SubmoduleAccessor] = None,
+ whitelist_param_names_creator: Optional[_WhitelistParamNamesCreator] = None,
+ ):
+ assert len(self.offloaders) == 0, "should only call wrap_modules once"
+
+ alt_stream = torch.cuda.Stream()
+
+ all_modules = []
+ offload_submodules = []
+ for module_index, module in enumerate(all_modules_generator):
+ all_modules.append(module)
+ if module_index % self.group_size >= self.group_size - self.num_in_group:
+ submodule = submodule_accessor(module)
+ whitelist_param_names = whitelist_param_names_creator(submodule)
+ logger.info(
+ f"[offloader] offload {module_index=} submodule={type(submodule)} params={whitelist_param_names} memory_allocated={torch.cuda.memory_allocated()}"
+ )
+ offload_submodules.append(submodule)
+ self.offloaders.append(
+ _ModuleOffloader(
+ mode=self.mode,
+ module=submodule,
+ alt_stream=alt_stream,
+ whitelist_param_names=whitelist_param_names,
+ )
+ )
+
+ for index, module in enumerate(offload_submodules):
+ _hook_module_forward_for_offloader(
+ index=index,
+ module=module,
+ offloaders=self.offloaders,
+ prefetch_step=self.prefetch_step,
+ )
+
+ return all_modules
+
+ def post_init(self):
+ for offloader in self.offloaders:
+ offloader.post_init()
+
+ for i in range(self.prefetch_step):
+ self.offloaders[i].start_onload()
+
+
+def _hook_module_forward_for_offloader(index, module, offloaders, prefetch_step):
+ def _on_forward_end():
+ offloaders[(index + prefetch_step) % len(offloaders)].start_onload()
+ offloaders[index].offload()
+
+ _hook_module_forward_raw(
+ module,
+ on_forward_end=_on_forward_end,
+ get_parameter_and_buffer_dicts=lambda: offloaders[
+ index
+ ].wait_and_get_device_tensors(),
+ )
+
+
+def _hook_module_forward_raw(module, on_forward_end, get_parameter_and_buffer_dicts):
+ original_forward = module.forward
+
+ def forward(*args, **kwargs):
+ module.forward = original_forward
+ output = functional_call(
+ module, get_parameter_and_buffer_dicts(), args=args, kwargs=kwargs
+ )
+ on_forward_end()
+ module.forward = forward
+ return output
+
+ module.forward = forward
+
+
+class _ModuleOffloader(ABC):
+ def __init__(
+ self,
+ mode: str,
+ module: torch.nn.Module,
+ alt_stream: torch.cuda.Stream,
+ whitelist_param_names: List[str],
+ ):
+ self.mode = mode
+ self.module = module
+ self.device = next(module.parameters()).device
+ self.alt_stream = alt_stream
+
+ assert self.device != torch.device(
+ "cpu"
+ ), "not handled device=cpu case yet (should skip this tensor)"
+
+ self._device_tensors = None
+ self._load_event = None
+
+ param_dict = dict(self.module.named_parameters())
+ assert all(
+ name in param_dict for name in whitelist_param_names
+ ), f"{whitelist_param_names=} {list(param_dict.keys())=}"
+
+ self._param_offloaders = {
+ name: _BaseParamOffloader.create(mode, module=module, param_name=name)
+ for name in whitelist_param_names
+ }
+
+ def post_init(self):
+ for name, param_offloader in self._param_offloaders.items():
+ param_offloader.post_init()
+
+ def start_onload(self):
+ self.alt_stream.wait_stream(torch.cuda.current_stream())
+ with torch.cuda.stream(self.alt_stream):
+ self._device_tensors = self._create_device_tensors()
+ self._load_event = torch.cuda.Event()
+ self._load_event.record()
+
+ def offload(self):
+ self._device_tensors = None
+ self._load_event = None
+
+ def wait_and_get_device_tensors(self):
+ assert self._device_tensors is not None
+ self._load_event.wait()
+ return self._device_tensors
+
+ def _create_device_tensors(self):
+ return {k: v.create_device_tensor() for k, v in self._param_offloaders.items()}
+
+
+class _BaseParamOffloader(ABC):
+ @staticmethod
+ def create(mode: str, **kwargs) -> "_BaseParamOffloader":
+ return {
+ "meta": _MetaParamOffloader,
+ "cpu": _CpuParamOffloader,
+ "shm_cpu": _ShmCpuParamOffloader,
+ "sharded_gpu": _ShardedGpuParamOffloader,
+ }[mode](**kwargs)
+
+ def __init__(self, module, param_name):
+ self._module = module
+ self._param_name = param_name
+
+ @property
+ def _param(self):
+ return getattr(self._module, self._param_name)
+
+ def post_init(self):
+ pass
+
+ def create_device_tensor(self):
+ raise NotImplementedError
+
+
+class _MetaParamOffloader(_BaseParamOffloader):
+ """Usually used for debugging."""
+
+ def __init__(self, module, param_name):
+ super().__init__(module, param_name)
+ _move_param_to_meta(module, param_name)
+
+ def create_device_tensor(self):
+ return torch.empty_like(self._param.data, device="cuda")
+
+
+class _CpuParamOffloader(_BaseParamOffloader):
+ def __init__(self, module, param_name):
+ super().__init__(module, param_name)
+ _move_param_to_cpu(self._param, pin_memory=True)
+
+ def create_device_tensor(self):
+ return self._param.to("cuda", non_blocking=True)
+
+
+class _ShmCpuParamOffloader(_BaseParamOffloader):
+ def __init__(self, module, param_name):
+ super().__init__(module, param_name)
+ self._rank = get_naive_distributed().get_rank()
+ self._world_size = get_naive_distributed().get_world_size()
+
+ from sglang.srt.distributed import get_tensor_model_parallel_world_size
+
+ assert get_tensor_model_parallel_world_size() == 1, "not yet support tp_size!=1"
+ assert (
+ self._param.data.is_contiguous()
+ ), f"not yet support non-contiguous tensor {self._param.shape=} {self._param.stride()=}"
+
+ self.shm_cpu_data = get_host_shared_memory_manager().malloc(
+ shape=self._param.shape, dtype=self._param.dtype
+ )
+
+ if self._rank == 0:
+ self.shm_cpu_data.copy_(self._param.data.to("cpu"))
+ self._param.data = self.shm_cpu_data
+ else:
+ _move_param_to_meta(self._module, self._param_name)
+ get_naive_distributed().barrier()
+
+ def post_init(self):
+ if self._rank == 0:
+ assert (
+ self.shm_cpu_data.data_ptr() == self._param.data.data_ptr()
+ ), f"{self.shm_cpu_data.data_ptr()=} {self._param.data.data_ptr()=} {self.shm_cpu_data=} {self._param.data=}"
+
+ _move_param_to_meta(self._module, self._param_name)
+
+ def create_device_tensor(self):
+ return self.shm_cpu_data.to("cuda", non_blocking=True)
+
+
+def _move_param_to_cpu(param, pin_memory: bool):
+ cpu_data = _empty_strided_like(
+ param.data,
+ device="cpu",
+ pin_memory=pin_memory,
+ )
+ cpu_data.copy_(param.data)
+ param.data = cpu_data
+
+
+def _move_param_to_meta(module, param_name):
+ old_param = getattr(module, param_name)
+ old_param_type = type(old_param)
+
+ new_data = old_param.data.to("meta")
+
+ if old_param_type == ModelWeightParameter:
+ # manually checked how `w13_weight` and `w2_weight` are constructed
+ new_param = ModelWeightParameter(
+ data=new_data,
+ **{
+ k: getattr(old_param, k)
+ for k in ["input_dim", "output_dim", "weight_loader"]
+ },
+ )
+ elif old_param_type == torch.nn.Parameter:
+ new_param = torch.nn.Parameter(
+ data=new_data,
+ requires_grad=False,
+ )
+ else:
+ raise ValueError(f"Unknown {old_param_type=} {old_param=}")
+
+ setattr(module, param_name, new_param)
+
+
+def _empty_strided_like(x: torch.Tensor, device, pin_memory=False):
+ return torch.empty_strided(
+ size=x.size(),
+ stride=x.stride(),
+ dtype=x.dtype,
+ layout=x.layout,
+ device=device,
+ pin_memory=pin_memory,
+ )
+
+
+# ----------------------------------------- ShardedGpu ------------------------------------------------------
+
+
+# TODO unify with ShmCpu mode
+class _ShardedGpuParamOffloader(_BaseParamOffloader):
+ def __init__(self, module, param_name):
+ super().__init__(module, param_name)
+ self._rank = get_naive_distributed().get_rank()
+ self._world_size = get_naive_distributed().get_world_size()
+
+ from sglang.srt.distributed import get_tensor_model_parallel_world_size
+
+ assert get_tensor_model_parallel_world_size() == 1, "not yet support tp_size!=1"
+ assert (
+ self._param.data.is_contiguous()
+ ), f"not yet support non-contiguous tensor {self._param.shape=} {self._param.stride()=}"
+
+ if self._rank == 0:
+ _move_param_to_cpu(self._param, pin_memory=True)
+ else:
+ _move_param_to_meta(self._module, self._param_name)
+
+ self.sharded_param_handles = None
+
+ def post_init(self):
+ # check again since it may be changed
+ assert (
+ self._param.data.is_contiguous()
+ ), f"not yet support non-contiguous tensor {self._param.shape=} {self._param.stride()=}"
+
+ scatter_src = self._param.data
+
+ logger.info(
+ f"[offloader] post_init {scatter_src.nbytes=} {scatter_src.dtype=} {scatter_src.shape=} {torch.cuda.memory_allocated()=}"
+ )
+
+ if self._rank == 0:
+ scatter_src = scatter_src.to("cuda")
+ scatter_list = _even_chunk(scatter_src, self._world_size)
+
+ sharded_param = torch.empty(
+ scatter_list[0].shape, dtype=scatter_list[0].dtype, device="cuda"
+ )
+ self.sharded_param_handles = _create_shared_buffer_tensors(
+ local_tensor=sharded_param
+ )
+
+ get_naive_distributed().scatter(
+ sharded_param, scatter_list if self._rank == 0 else None
+ )
+
+ _move_param_to_meta(self._module, self._param_name)
+
+ def create_device_tensor(self):
+ output = _empty_strided_like(self._param, device="cuda")
+ output_chunks = output.chunk(self._world_size)
+
+ for index in range(self._world_size):
+ src_rank = (self._rank + index) % self._world_size
+ src_buf = self.sharded_param_handles[src_rank]
+ output_chunks[src_rank].copy_(src_buf)
+
+ return output
+
+
+def _even_chunk(x: torch.Tensor, chunks: int):
+ assert x.shape[0] % chunks == 0, f"{x.shape=} {chunks=}"
+ return list(x.chunk(chunks))
+
+
+def _create_shared_buffer_tensors(local_tensor: torch.Tensor) -> List[torch.Tensor]:
+ self_rank = get_naive_distributed().get_rank()
+ world_size = get_naive_distributed().get_world_size()
+
+ object_list = get_naive_distributed().all_gather_object(
+ dict(
+ dup_serialized_local_tensor=[
+ (
+ None
+ if interesting_rank == self_rank
+ else MultiprocessingSerializer.serialize(local_tensor)
+ )
+ for interesting_rank in range(world_size)
+ ]
+ )
+ )
+
+ output_tensors = []
+ for output_rank in range(world_size):
+ remote_serialized_tensor = object_list[output_rank][
+ "dup_serialized_local_tensor"
+ ][self_rank]
+ if output_rank == self_rank:
+ assert remote_serialized_tensor is None
+ output_tensors.append(local_tensor)
+ else:
+ output_tensors.append(
+ MultiprocessingSerializer.deserialize(remote_serialized_tensor)
+ )
+
+ return output_tensors
diff --git a/python/sglang/srt/code_completion_parser.py b/python/sglang/srt/parser/code_completion_parser.py
similarity index 100%
rename from python/sglang/srt/code_completion_parser.py
rename to python/sglang/srt/parser/code_completion_parser.py
diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/parser/conversation.py
similarity index 96%
rename from python/sglang/srt/conversation.py
rename to python/sglang/srt/parser/conversation.py
index 84cb1db36b5..8a2fe4e7f06 100644
--- a/python/sglang/srt/conversation.py
+++ b/python/sglang/srt/parser/conversation.py
@@ -26,6 +26,8 @@
# Adapted from
# https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
import dataclasses
+import json
+import os
import re
from enum import IntEnum, auto
from typing import Callable, Dict, List, Optional, Tuple, Union
@@ -625,7 +627,7 @@ def generate_chat_conv(
real_content += content.text
elif content.type == "image_url":
# NOTE: works for llava and intervl2_5
- if conv.name in ["internvl-2-5", "interns1"]:
+ if conv.name in ["internvl-2-5"]:
real_content = image_token + real_content
else:
real_content += image_token
@@ -817,20 +819,7 @@ def generate_chat_conv(
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>\n",
stop_str=["<|im_end|>", "<|action_end|>"],
- image_token="",
- )
-)
-
-register_conv_template(
- Conversation(
- name="interns1",
- system_template="<|im_start|>system\n{system_message}",
- system_message="You are an AI assistant whose name is Intern-S1 (书生大模型).\n- Intern-S1 (书生大模型) is a vision-language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n- Intern-S1 (书生大模型) can understand and communicate fluently in the language chosen by the user such as English and 中文.\nYou are an expert reasoner with extensive experience in all areas. You approach problems through systematic thinking and rigorous reasoning. Your response should reflect deep understanding and precise logical thinking, making your solution path and reasoning clear to others. Please put your thinking process within ... tags.",
- roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
- sep_style=SeparatorStyle.MPT,
- sep="<|im_end|>\n",
- stop_str=["<|im_end|>", "<|action_end|>"],
- image_token="",
+ image_token="",
)
)
@@ -972,16 +961,42 @@ def generate_chat_conv(
)
+MODEL_TYPE_TO_TEMPLATE = {
+ "internvl_chat": "internvl-2-5",
+ "deepseek_vl_v2": "deepseek-vl2",
+ "multi_modality": "janus-pro",
+ "phi4mm": "phi-4-mm",
+ "minicpmv": "minicpmv",
+ "minicpmo": "minicpmo",
+}
+
+
+def get_model_type(model_path: str) -> Optional[str]:
+ config_path = os.path.join(model_path, "config.json")
+ if not os.path.exists(config_path):
+ return None
+ try:
+ with open(config_path, "r", encoding="utf-8") as f:
+ config = json.load(f)
+ return config.get("model_type")
+ except (IOError, json.JSONDecodeError):
+ return None
+
+
@register_conv_template_matching_function
def match_internvl(model_path: str):
if re.search(r"internvl", model_path, re.IGNORECASE):
return "internvl-2-5"
+ model_type = get_model_type(model_path)
+ return MODEL_TYPE_TO_TEMPLATE.get(model_type)
@register_conv_template_matching_function
def match_deepseek_janus_pro(model_path: str):
if re.search(r"janus", model_path, re.IGNORECASE):
return "janus-pro"
+ model_type = get_model_type(model_path)
+ return MODEL_TYPE_TO_TEMPLATE.get(model_type)
@register_conv_template_matching_function
@@ -994,6 +1009,8 @@ def match_vicuna(model_path: str):
def match_deepseek_vl(model_path: str):
if re.search(r"deepseek.*vl2", model_path, re.IGNORECASE):
return "deepseek-vl2"
+ model_type = get_model_type(model_path)
+ return MODEL_TYPE_TO_TEMPLATE.get(model_type)
@register_conv_template_matching_function
@@ -1007,14 +1024,17 @@ def match_qwen_chat_ml(model_path: str):
@register_conv_template_matching_function
-def match_openbmb_minicpm(model_path: str):
- if re.search(r"minicpm-v", model_path, re.IGNORECASE):
- return "minicpmv"
- elif re.search(r"minicpm-o", model_path, re.IGNORECASE):
- return "minicpmo"
+def match_minicpm(model_path: str):
+ match = re.search(r"minicpm-(v|o)", model_path, re.IGNORECASE)
+ if match:
+ return f"minicpm{match.group(1).lower()}"
+ model_type = get_model_type(model_path)
+ return MODEL_TYPE_TO_TEMPLATE.get(model_type)
@register_conv_template_matching_function
def match_phi_4_mm(model_path: str):
if "phi-4-multimodal" in model_path.lower():
return "phi-4-mm"
+ model_type = get_model_type(model_path)
+ return MODEL_TYPE_TO_TEMPLATE.get(model_type)
diff --git a/python/sglang/srt/parser/harmony_parser.py b/python/sglang/srt/parser/harmony_parser.py
new file mode 100644
index 00000000000..ffc0be95ec7
--- /dev/null
+++ b/python/sglang/srt/parser/harmony_parser.py
@@ -0,0 +1,588 @@
+import re
+from dataclasses import dataclass
+from typing import Iterator, List, Optional, Tuple
+
+
+@dataclass
+class Event:
+ """Represents a parsed event from the Harmony stream."""
+
+ event_type: str
+ content: str
+ raw_text: str = None # Original text including structural markers
+
+
+@dataclass
+class Token:
+ """A structural token in the Harmony format."""
+
+ type: str
+ start: int
+ end: int
+
+
+def prefix_hold(text: str, tokens: List[str]) -> Tuple[str, str]:
+ """
+ Holds back the longest suffix of `text` that could be a prefix of any token.
+ Returns (emit_now, keep_for_later).
+ """
+ if not text:
+ return "", ""
+ max_hold = 0
+ for tok in tokens:
+ if not tok:
+ continue
+ # Check for prefixes of tok in the suffix of text
+ L = min(len(tok) - 1, len(text))
+ for k in range(L, 0, -1):
+ if tok.startswith(text[-k:]):
+ max_hold = max(max_hold, k)
+ break
+ if max_hold == 0:
+ return text, ""
+ return text[:-max_hold], text[-max_hold:]
+
+
+def iter_tokens(text: str, start_pos: int = 0) -> Iterator[Token]:
+ """Iterate over structural tokens in left-to-right order."""
+ TOKENS = {
+ "<|start|>": "START",
+ "<|channel|>": "CHANNEL",
+ "<|message|>": "MESSAGE",
+ "<|constrain|>": "CONSTRAIN",
+ "<|end|>": "END",
+ "<|call|>": "CALL",
+ "<|return|>": "RETURN",
+ }
+
+ pos = start_pos
+ has_unknown_tokens = False
+ while pos < len(text):
+ # Find next "<|"
+ marker_pos = text.find("<|", pos)
+ if marker_pos == -1:
+ break
+
+ # Emit any text before the marker
+ if marker_pos > pos:
+ yield Token("TEXT", pos, marker_pos)
+
+ # Check which token it is
+ found_token = False
+
+ for literal, token_type in TOKENS.items():
+ if text.startswith(literal, marker_pos):
+ yield Token(token_type, marker_pos, marker_pos + len(literal))
+ pos = marker_pos + len(literal)
+ found_token = True
+ break
+ if not found_token:
+ tail = text[marker_pos:]
+ is_partial = any(lit.startswith(tail) for lit in TOKENS)
+ if is_partial:
+ # Hold whole tail (partial token)
+ yield Token("TEXT", marker_pos, len(text))
+ pos = len(text)
+ break
+ else:
+ # Unknown token like <|weird|> ...
+ has_unknown_tokens = True
+ # Emit the "<|" as a TEXT token first
+ yield Token("TEXT", marker_pos, marker_pos + 2)
+
+ # Try to find a closing "|>" for this unknown token
+ close_pos = text.find("|>", marker_pos + 2)
+ if close_pos != -1:
+ # Look ahead to the next structural token after the unknown close
+ next_marker = text.find("<|", close_pos + 2)
+ if next_marker != -1:
+ # Emit the unknown body + any following plain text up to next marker
+ yield Token("TEXT", marker_pos + 2, next_marker)
+ pos = next_marker
+ else:
+ # Emit until the end
+ yield Token("TEXT", marker_pos + 2, len(text))
+ pos = len(text)
+ break
+ else:
+ # No closing; advance past "<|" and continue scanning
+ pos = marker_pos + 2
+
+ # Emit any remaining text
+ if pos < len(text):
+ yield Token("TEXT", pos, len(text))
+ elif pos == len(text) and has_unknown_tokens:
+ # Add an empty trailing TEXT token only when we encountered unknown tokens
+ # and the text ends with a known structural token. This matches expected tests.
+ for literal in TOKENS.keys():
+ if text.endswith(literal):
+ yield Token("TEXT", pos, pos)
+ break
+
+
+class CanonicalStrategy:
+ """Parses the canonical Harmony format with channel markers."""
+
+ def __init__(self):
+ self.guard_tokens = [
+ "<|start|>",
+ "<|channel|>",
+ "<|message|>",
+ "<|constrain|>",
+ "<|end|>",
+ "<|call|>",
+ "<|return|>",
+ ]
+
+ def parse(self, text: str) -> Tuple[List[Event], str]:
+ events = []
+ tokens = list(iter_tokens(text))
+
+ if not tokens:
+ return events, ""
+
+ pos = 0
+ while pos < len(tokens):
+ token = tokens[pos]
+
+ if token.type == "TEXT":
+ # Check if this might be incomplete
+ if pos == len(tokens) - 1: # Last token
+ emit, hold = prefix_hold(
+ text[token.start : token.end], self.guard_tokens
+ )
+ if emit:
+ events.append(Event("normal", emit))
+ return events, hold
+ else:
+ # Check if this might be commentary filler between blocks
+ if self._is_commentary_filler_between_blocks(text, tokens, pos):
+ # Skip this filler text - don't emit as normal content
+ pos += 1
+ else:
+ content = text[token.start : token.end]
+ # Skip standalone structural tokens that shouldn't be emitted as normal text
+ if not self._is_standalone_structural_token(content):
+ events.append(Event("normal", content))
+ pos += 1
+
+ elif token.type in ("START", "CHANNEL"):
+ # Parse a channel block starting here
+ block_result = self._parse_block(text, tokens, pos)
+ if block_result is None:
+ # Incomplete block - check if we can emit partial reasoning content
+ partial_result = self._parse_partial_analysis(text, tokens, pos)
+ if partial_result:
+ event, remaining_text = partial_result
+ events.append(event)
+ return events, remaining_text
+ # No partial content, hold entire remaining text
+ remaining_start = tokens[pos].start
+ return events, text[remaining_start:]
+ event, new_pos = block_result
+ if event:
+ events.append(event)
+ pos = new_pos
+
+ else:
+ # Check if this might be commentary filler between blocks
+ if self._is_commentary_filler_between_blocks(text, tokens, pos):
+ # Skip this filler text - don't emit as normal content
+ pos += 1
+ else:
+ # Unexpected token - only emit as text if it's not a standalone structural token
+ content = text[token.start : token.end]
+ if not self._is_standalone_structural_token(content):
+ events.append(Event("normal", content))
+ pos += 1
+
+ return events, ""
+
+ def _parse_partial_analysis(
+ self, text: str, tokens: List[Token], start_pos: int
+ ) -> Optional[Tuple[Event, str]]:
+ """Try to parse partial analysis content for incremental streaming."""
+ pos = start_pos
+
+ # Skip <|start|> if present
+ if pos < len(tokens) and tokens[pos].type == "START":
+ pos += 1
+
+ # Look for <|channel|> followed by analysis
+ channel_pos = None
+ message_pos = None
+
+ for i in range(pos, len(tokens)):
+ if tokens[i].type == "CHANNEL" and channel_pos is None:
+ channel_pos = i
+ elif tokens[i].type == "MESSAGE":
+ message_pos = i
+ break
+
+ if channel_pos is None or message_pos is None:
+ return None
+
+ # Extract channel type
+ channel_start = (
+ tokens[channel_pos + 1].start
+ if channel_pos + 1 < len(tokens)
+ else tokens[channel_pos].end
+ )
+ channel_end = tokens[message_pos].start
+ channel_header = text[channel_start:channel_end]
+
+ channel_type = self._extract_channel_type(channel_header)
+ if channel_type != "analysis":
+ return None # Only stream analysis content - tool calls wait for completion
+
+ # Extract partial content after <|message|>
+ content_start = tokens[message_pos].end
+ content = text[content_start:]
+
+ # Return partial reasoning content and preserve the channel structure for next parse
+ remaining_text = text[tokens[start_pos].start : content_start]
+ return Event("reasoning", content), remaining_text
+
+ def _extract_channel_type(self, header_text: str) -> Optional[str]:
+ """Extract channel type from header, ignoring other attributes like to=... or <|constrain|>..."""
+ # Look for channel type at the start of the header (case insensitive)
+ header_clean = header_text.strip()
+
+ if header_clean.lower().startswith("analysis"):
+ return "analysis"
+ elif header_clean.lower().startswith("commentary"):
+ return "commentary"
+ elif header_clean.lower().startswith("final"):
+ return "final"
+ else:
+ return None # Unknown channel type
+
+ def _parse_block(
+ self, text: str, tokens: List[Token], start_pos: int
+ ) -> Optional[Tuple[Optional[Event], int]]:
+ """Parse a channel block. Returns (event, next_pos) or None if incomplete."""
+ pos = start_pos
+
+ # Skip <|start|> if present
+ if pos < len(tokens) and tokens[pos].type == "START":
+ pos += 1
+
+ # Look for <|channel|> or <|message|> (tool responses go direct to message)
+ channel_pos = None
+ message_pos = None
+
+ for i in range(pos, len(tokens)):
+ if tokens[i].type == "CHANNEL" and channel_pos is None:
+ channel_pos = i
+ elif tokens[i].type == "MESSAGE":
+ message_pos = i
+ break
+
+ if message_pos is None:
+ return None # No message token found
+
+ # If no channel found, this is a tool response - treat as normal text
+ if channel_pos is None:
+ content_start = tokens[message_pos].end
+ # Find end token after message
+ end_token_pos = None
+ for i in range(message_pos + 1, len(tokens)):
+ if tokens[i].type in ("END", "CALL", "RETURN"):
+ end_token_pos = i
+ break
+ if end_token_pos is None:
+ return None # Incomplete
+ content = text[content_start : tokens[end_token_pos].start]
+ return Event("normal", content), end_token_pos + 1
+
+ # Standard channel block processing - message_pos is already found above
+ pos = channel_pos + 1 # Skip CHANNEL token
+
+ # Extract channel type from header (ignoring other attributes like to=... or <|constrain|>...)
+ channel_start = tokens[pos].start if pos < len(tokens) else tokens[pos - 1].end
+ channel_end = tokens[message_pos].start
+ channel_header = text[channel_start:channel_end]
+
+ channel_type = self._extract_channel_type(channel_header)
+ if not channel_type:
+ return None # Unknown or malformed channel
+
+ pos = message_pos + 1 # Skip MESSAGE token
+
+ # Find content and end token
+ content_start = tokens[message_pos].end
+ end_pos = pos
+
+ # Each channel type has specific valid end tokens
+ if channel_type == "final":
+ while end_pos < len(tokens) and tokens[end_pos].type != "RETURN":
+ end_pos += 1
+ elif channel_type == "analysis":
+ while end_pos < len(tokens) and tokens[end_pos].type not in ("END", "CALL"):
+ end_pos += 1
+ else: # commentary
+ while end_pos < len(tokens) and tokens[end_pos].type not in ("END", "CALL"):
+ end_pos += 1
+
+ if end_pos >= len(tokens):
+ # No end token found
+ if channel_type == "final":
+ # Final blocks can end at end of input without requiring <|return|>
+ content = text[content_start:]
+ return Event("normal", content), end_pos
+ return None # Analysis and commentary need proper end tokens
+
+ end_token = tokens[end_pos]
+ content = text[content_start : end_token.start]
+
+ # Create event based on channel and end token
+ if channel_type == "analysis":
+ if end_token.type == "CALL":
+ # Built-in tools (browser, python) use analysis channel with <|call|>
+ raw_text = text[tokens[start_pos].start : end_token.end]
+ return Event("tool_call", content.strip(), raw_text), end_pos + 1
+ else:
+ return Event("reasoning", content), end_pos + 1
+ elif channel_type == "commentary":
+ if end_token.type == "CALL":
+ raw_text = text[tokens[start_pos].start : end_token.end]
+ return Event("tool_call", content.strip(), raw_text), end_pos + 1
+ else:
+ return Event("normal", content), end_pos + 1
+ elif channel_type == "final":
+ # For final blocks, include any trailing TEXT immediately after <|return|>
+ final_content = content
+ if end_token.type == "RETURN" and end_pos + 1 < len(tokens):
+ next_token = tokens[end_pos + 1]
+ if next_token.type == "TEXT":
+ final_content += text[next_token.start : next_token.end]
+ return Event("normal", final_content), end_pos + 2
+ return Event("normal", final_content), end_pos + 1
+
+ return None, end_pos + 1
+
+ def _is_commentary_filler_between_blocks(
+ self, text: str, tokens: List[Token], pos: int
+ ) -> bool:
+ """Check if this is commentary filler text or problematic structural tokens in malformed sequences."""
+ current_token = tokens[pos]
+ current_text = text[current_token.start : current_token.end].strip()
+
+ # Check for commentary filler between CALL and CHANNEL
+ if pos > 0 and pos + 1 < len(tokens):
+ prev_token = tokens[pos - 1]
+ next_token = tokens[pos + 1]
+
+ # Check if we have CALL -> TEXT("commentary") -> CHANNEL pattern
+ if (
+ prev_token.type == "CALL"
+ and next_token.type == "CHANNEL"
+ and current_text.lower() == "commentary"
+ ):
+ return True
+
+ # Check for problematic patterns after CALL tokens (malformed sequences)
+ if pos > 0:
+ prev_token = tokens[pos - 1]
+
+ # Only filter structural tokens that appear immediately after CALL in malformed sequences
+ # These patterns indicate the content is malformed and the structural tokens are noise
+ if prev_token.type == "CALL":
+ # Filter MESSAGE tokens after CALL (should not happen in well-formed content)
+ if current_token.type == "MESSAGE":
+ return True
+
+ # Filter standalone "commentary" text after CALL
+ if (
+ current_token.type == "TEXT"
+ and current_text.lower() == "commentary"
+ ):
+ return True
+
+ return False
+
+ def _is_standalone_structural_token(self, content: str) -> bool:
+ """Check if content is just a standalone structural token that should be filtered."""
+ content_stripped = content.strip()
+ structural_tokens = [
+ "<|start|>",
+ "<|channel|>",
+ "<|message|>",
+ "<|constrain|>",
+ "<|end|>",
+ "<|call|>",
+ "<|return|>",
+ ]
+ return content_stripped in structural_tokens
+
+
+class TextStrategy:
+ """Parses the text-based Harmony fallback format."""
+
+ def __init__(self):
+ self.buffer_context = ""
+ self.patterns = {
+ "analysis_then_final": re.compile(
+ r"^\s*(?:assistant)?\s*(analysis|commentary)(.*?)\s*assistantfinal\s*(.*)\s*$",
+ re.IGNORECASE | re.DOTALL,
+ ),
+ "final_only": re.compile(
+ r"^\s*assistantfinal\s*(.*)\s*$", re.IGNORECASE | re.DOTALL
+ ),
+ "analysis_only": re.compile(
+ r"^\s*(?:assistant)?\s*(analysis|commentary)(.*)\s*$",
+ re.IGNORECASE | re.DOTALL,
+ ),
+ }
+
+ def set_buffer_context(self, buffer: str):
+ self.buffer_context = buffer
+
+ def parse(self, text: str) -> Tuple[List[Event], str]:
+ events = []
+
+ m = self.patterns["analysis_then_final"].match(text)
+ if m:
+ channel, reasoning, final = m.groups()
+ if channel.lower() == "analysis" and reasoning.strip():
+ events.append(Event("reasoning", reasoning.strip()))
+ elif channel.lower() == "commentary" and reasoning.strip():
+ events.append(Event("normal", reasoning.strip()))
+ if final.strip():
+ events.append(Event("normal", final.strip()))
+ return events, ""
+
+ # If assistantfinal appears to be incomplete (e.g., 'assistantfin'), hold entire buffer
+ if re.search(
+ r"(?:^|\s)(?:assistant)?\s*(analysis|commentary)", text, re.IGNORECASE
+ ):
+ low = text.lower()
+ if "assistantfin" in low and "assistantfinal" not in low:
+ return events, text
+
+ m = self.patterns["final_only"].match(text)
+ if m:
+ final = m.group(1)
+ if final.strip():
+ events.append(Event("normal", final.strip()))
+ return events, ""
+
+ m = self.patterns["analysis_only"].match(text)
+ if m:
+ channel, content = m.groups()
+ emit, hold = prefix_hold(content, ["assistantfinal"])
+ if channel.lower() == "analysis" and emit:
+ # Stream reasoning content as-is based on structural markers only.
+ events.append(Event("reasoning", emit))
+ # Keep the channel header in the remaining buffer to continue parsing
+ # subsequent chunks in the text fallback format. Preserve any held
+ # prefix that may complete into "assistantfinal".
+ if hold:
+ return events, text[: m.start(2)] + hold
+ else:
+ return events, channel
+ elif channel.lower() == "commentary" and emit:
+ # For commentary, stream as normal text. Preserve spaces unless holding.
+ content_out = emit if hold else emit.strip()
+ events.append(Event("normal", content_out))
+ if hold:
+ return events, text[: m.start(2)] + hold
+ else:
+ return events, ""
+ # If no emit, just return the held content
+ return events, text[: m.start(2)] + hold
+
+ emit, hold = prefix_hold(text, ["analysis", "commentary", "assistantfinal"])
+ if emit:
+ events.append(Event("normal", emit))
+ return events, hold
+
+
+class HarmonyParser:
+ """Facade for parsing Harmony format, switching between strategies."""
+
+ def __init__(self):
+ self.strategy = None
+ self._buffer = ""
+ self._should_filter_commentary = (
+ False # Track if we should filter commentary in next chunks
+ )
+ self._partial_commentary = (
+ "" # Track partial commentary being built across chunks
+ )
+
+ def parse(self, chunk: str) -> List[Event]:
+ self._buffer += chunk
+
+ if self.strategy is None:
+ if "<|channel|>" in self._buffer or "<|start|>" in self._buffer:
+ self.strategy = CanonicalStrategy()
+ elif re.search(
+ r"(?:^|\s)(?:assistant)?\s*(analysis|commentary|assistantfinal)",
+ self._buffer,
+ re.IGNORECASE,
+ ):
+ self.strategy = TextStrategy()
+ else:
+ # Not yet determined, hold
+ return []
+
+ if hasattr(self.strategy, "set_buffer_context"):
+ # Provide full buffer context to strategy for smarter whitespace handling
+ self.strategy.set_buffer_context(self._buffer)
+
+ events, remaining = self.strategy.parse(self._buffer)
+
+ # Check if we should start filtering commentary (after <|call|> token or tool_call event)
+ buffer_has_call_token = self._buffer.rstrip().endswith("<|call|>")
+
+ self._buffer = remaining
+
+ # Filter events for streaming case
+ filtered_events = []
+ for event in events:
+ should_filter = False
+
+ if event.event_type == "normal":
+ # Check if we're in a commentary filtering state
+ if self._should_filter_commentary or self._partial_commentary:
+ # Try to build partial commentary
+ potential_commentary = (
+ self._partial_commentary + event.content.strip().lower()
+ )
+
+ if potential_commentary == "commentary":
+ # Complete commentary found - filter it
+ should_filter = True
+ self._partial_commentary = "" # Reset
+ self._should_filter_commentary = False # Done filtering
+ elif "commentary".startswith(potential_commentary):
+ # Partial match - accumulate and filter this chunk
+ should_filter = True
+ self._partial_commentary = potential_commentary
+ else:
+ # Not commentary - reset and keep the event
+ self._partial_commentary = ""
+ self._should_filter_commentary = False
+ else:
+ # Not in commentary filtering state - reset partial state
+ self._partial_commentary = ""
+
+ if should_filter:
+ # Skip this commentary filler
+ continue
+
+ # Update filtering state based on events and buffer state
+ if event.event_type == "tool_call":
+ self._should_filter_commentary = (
+ True # Filter commentary after tool calls
+ )
+ self._partial_commentary = "" # Reset on tool call
+ elif buffer_has_call_token:
+ self._should_filter_commentary = (
+ True # Filter commentary after <|call|> token
+ )
+
+ filtered_events.append(event)
+
+ return filtered_events
diff --git a/python/sglang/srt/jinja_template_utils.py b/python/sglang/srt/parser/jinja_template_utils.py
similarity index 100%
rename from python/sglang/srt/jinja_template_utils.py
rename to python/sglang/srt/parser/jinja_template_utils.py
diff --git a/python/sglang/srt/parser/reasoning_parser.py b/python/sglang/srt/parser/reasoning_parser.py
new file mode 100644
index 00000000000..f50368aed9c
--- /dev/null
+++ b/python/sglang/srt/parser/reasoning_parser.py
@@ -0,0 +1,309 @@
+import re
+from typing import Dict, Optional, Tuple, Type
+
+from sglang.srt.parser.harmony_parser import HarmonyParser
+
+
+class StreamingParseResult:
+ """Result of streaming incremental parsing."""
+
+ def __init__(
+ self,
+ normal_text: Optional[str] = None,
+ reasoning_text: Optional[str] = None,
+ ):
+ self.normal_text = normal_text or ""
+ self.reasoning_text = reasoning_text or ""
+
+
+class BaseReasoningFormatDetector:
+ """Base class providing two sets of interfaces: one-time and streaming incremental."""
+
+ def __init__(
+ self,
+ think_start_token: str,
+ think_end_token: str,
+ force_reasoning: bool = False,
+ stream_reasoning: bool = True,
+ ):
+ self.think_start_token = think_start_token
+ self.think_end_token = think_end_token
+ self._in_reasoning = force_reasoning
+ self.stream_reasoning = stream_reasoning
+
+ self._buffer = ""
+ self.stripped_think_start = False
+
+ def detect_and_parse(self, text: str) -> StreamingParseResult:
+ """
+ One-time parsing: Detects and parses reasoning sections in the provided text.
+ Returns both reasoning content and normal text separately.
+ """
+ in_reasoning = self._in_reasoning or self.think_start_token in text
+
+ if not in_reasoning:
+ return StreamingParseResult(normal_text=text)
+
+ # The text is considered to be in a reasoning block.
+ processed_text = text.replace(self.think_start_token, "").strip()
+
+ if self.think_end_token not in processed_text:
+ # Assume reasoning was truncated before `` token
+ return StreamingParseResult(reasoning_text=processed_text)
+
+ # Extract reasoning content
+ splits = processed_text.split(self.think_end_token, maxsplit=1)
+ reasoning_text = splits[0]
+ normal_text = splits[1].strip()
+
+ return StreamingParseResult(
+ normal_text=normal_text, reasoning_text=reasoning_text
+ )
+
+ def parse_streaming_increment(self, new_text: str) -> StreamingParseResult:
+ """
+ Streaming incremental parsing for reasoning content.
+ Handles partial reasoning tags and content.
+
+ If stream_reasoning is False:
+ Accumulates reasoning content until the end tag is found
+ If stream_reasoning is True:
+ Streams reasoning content as it arrives
+ """
+ self._buffer += new_text
+ current_text = self._buffer
+
+ # If the current text is a prefix of the think token, keep buffering
+ if any(
+ token.startswith(current_text) and token != current_text
+ for token in [self.think_start_token, self.think_end_token]
+ ):
+ return StreamingParseResult()
+
+ # Strip `` token if present
+ if not self.stripped_think_start and self.think_start_token in current_text:
+ current_text = current_text.replace(self.think_start_token, "")
+ self.stripped_think_start = True
+ self._in_reasoning = True
+
+ # Handle end of reasoning block
+ if self._in_reasoning and self.think_end_token in current_text:
+ end_idx = current_text.find(self.think_end_token)
+
+ reasoning_text = current_text[:end_idx]
+
+ self._buffer = ""
+ self._in_reasoning = False
+ normal_text = current_text[end_idx + len(self.think_end_token) :]
+
+ return StreamingParseResult(
+ normal_text=normal_text, reasoning_text=reasoning_text.rstrip()
+ )
+
+ # Continue with reasoning content
+ if self._in_reasoning:
+ if self.stream_reasoning:
+ # Stream the content immediately
+ self._buffer = ""
+ return StreamingParseResult(reasoning_text=current_text)
+ else:
+ return StreamingParseResult()
+
+ # If we're not in a reasoning block return as normal text
+ if not self._in_reasoning:
+ self._buffer = ""
+ return StreamingParseResult(normal_text=current_text)
+
+ return StreamingParseResult()
+
+
+class DeepSeekR1Detector(BaseReasoningFormatDetector):
+ """
+ Detector for DeepSeek-R1 model.
+ Assumes reasoning format:
+ ()*(.*)
+ Returns all the text before the tag as `reasoning_text`
+ and the rest of the text as `normal_text`.
+
+ Supported models:
+ - DeepSeek-R1: Always generates thinking content without start tag
+ - DeepSeek-R1-0528: Generates thinking content with start tag
+
+ Format patterns:
+ - DeepSeek-R1: "I need to think about this...The answer is 42."
+ - DeepSeek-R1-0528: "I need to think about this...The answer is 42."
+
+ Args:
+ stream_reasoning (bool): If False, accumulates reasoning content until the end tag.
+ If True, streams reasoning content as it arrives.
+ """
+
+ def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = True):
+ # DeepSeek-R1 is assumed to be reasoning until `` token
+ super().__init__(
+ "",
+ "",
+ force_reasoning=True,
+ stream_reasoning=stream_reasoning,
+ )
+ # https://github.com/sgl-project/sglang/pull/3202#discussion_r1950153599
+
+
+class Qwen3Detector(BaseReasoningFormatDetector):
+ """
+ Detector for Qwen3 models (e.g., Qwen/Qwen3-235B-A22B).
+ Assumes reasoning format:
+ ()*(.*)
+
+ Qwen3 models released before 07/2025 supports switching between thinking mode and normal
+ mode using `enable_thinking` parameter in the request parameter.
+ - enable_thinking=True: "reasoning contentThe answer is 42."
+ - enable_thinking=False: "The answer is 42." (no thinking tokens)
+
+ Args:
+ stream_reasoning (bool): If False, accumulates reasoning content until the end tag.
+ If True, streams reasoning content as it arrives.
+ """
+
+ def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = False):
+ super().__init__(
+ "",
+ "",
+ force_reasoning=force_reasoning,
+ stream_reasoning=stream_reasoning,
+ )
+
+
+class KimiDetector(BaseReasoningFormatDetector):
+ """
+ Detector for Kimi Thinking model.
+ Assumes reasoning format:
+ ◁think▷*(.*)◁/think▷
+ Returns all the text before the ◁/think▷ tag as `reasoning_text`
+ and the rest of the text as `normal_text`.
+ """
+
+ def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = False):
+ super().__init__(
+ "◁think▷",
+ "◁/think▷",
+ force_reasoning=False,
+ stream_reasoning=stream_reasoning,
+ )
+
+
+class GptOssDetector(BaseReasoningFormatDetector):
+ """
+ Detector for T4-style reasoning format (GPT-OSS), using the HarmonyParser.
+ """
+
+ def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = True):
+ super().__init__(
+ "<|channel|>analysis<|message|>",
+ "<|end|>",
+ force_reasoning=force_reasoning,
+ stream_reasoning=stream_reasoning,
+ )
+ self.parser = HarmonyParser()
+
+ def detect_and_parse(self, text: str) -> StreamingParseResult:
+ events = self.parser.parse(text)
+ # Flush the buffer for one-shot parsing
+ events += self.parser.parse("")
+
+ reasoning_text = "".join(
+ [e.content for e in events if e.event_type == "reasoning"]
+ )
+ normal_parts = []
+ for e in events:
+ if e.event_type == "normal":
+ normal_parts.append(e.content)
+ elif e.event_type == "tool_call":
+ # Use raw_text to preserve structural markers for function call detector
+ normal_parts.append(e.raw_text if e.raw_text else e.content)
+ normal_text = "".join(normal_parts)
+ # Tool call events preserve raw text with structural markers
+
+ return StreamingParseResult(
+ normal_text=normal_text,
+ reasoning_text=reasoning_text,
+ )
+
+ def parse_streaming_increment(self, new_text: str) -> StreamingParseResult:
+ events = self.parser.parse(new_text)
+
+ reasoning_text = "".join(
+ [e.content for e in events if e.event_type == "reasoning"]
+ )
+ normal_parts = []
+ for e in events:
+ if e.event_type == "normal":
+ normal_parts.append(e.content)
+ elif e.event_type == "tool_call":
+ # Use raw_text to preserve structural markers for function call detector
+ normal_parts.append(e.raw_text if e.raw_text else e.content)
+ normal_text = "".join(normal_parts)
+
+ return StreamingParseResult(
+ normal_text=normal_text,
+ reasoning_text=reasoning_text,
+ )
+
+
+class ReasoningParser:
+ """
+ Parser that handles both streaming and non-streaming scenarios for extracting
+ reasoning content from model outputs.
+
+ Args:
+ model_type (str): Type of model to parse reasoning from
+ stream_reasoning (bool): If False, accumulates reasoning content until complete.
+ If True, streams reasoning content as it arrives.
+ """
+
+ DetectorMap: Dict[str, Type[BaseReasoningFormatDetector]] = {
+ "deepseek-r1": DeepSeekR1Detector,
+ "deepseek-v3": Qwen3Detector,
+ "glm45": Qwen3Detector,
+ "gpt-oss": GptOssDetector,
+ "kimi": KimiDetector,
+ "qwen3": Qwen3Detector,
+ "qwen3-thinking": Qwen3Detector,
+ "step3": DeepSeekR1Detector,
+ }
+
+ def __init__(
+ self,
+ model_type: Optional[str] = None,
+ stream_reasoning: bool = True,
+ force_reasoning: Optional[bool] = None,
+ ):
+ if not model_type:
+ raise ValueError("Model type must be specified")
+
+ detector_class = self.DetectorMap.get(model_type.lower())
+ if not detector_class:
+ raise ValueError(f"Unsupported model type: {model_type}")
+
+ # Special cases where we override force_reasoning
+ if model_type.lower() in {"qwen3-thinking", "gpt-oss"}:
+ force_reasoning = True
+
+ # Only pass force_reasoning if explicitly set, let detectors use their defaults
+ kwargs = {"stream_reasoning": stream_reasoning}
+ if force_reasoning is not None:
+ kwargs["force_reasoning"] = force_reasoning
+
+ self.detector = detector_class(**kwargs)
+
+ def parse_non_stream(self, full_text: str) -> Tuple[Optional[str], Optional[str]]:
+ """Non-streaming call: one-time parsing"""
+ ret = self.detector.detect_and_parse(full_text)
+ return ret.reasoning_text, ret.normal_text
+
+ def parse_stream_chunk(
+ self, chunk_text: str
+ ) -> Tuple[Optional[str], Optional[str]]:
+ """Streaming call: incremental parsing"""
+ ret = self.detector.parse_streaming_increment(chunk_text)
+ return ret.reasoning_text, ret.normal_text
diff --git a/python/sglang/srt/reasoning_parser.py b/python/sglang/srt/reasoning_parser.py
deleted file mode 100644
index 46899a5c21f..00000000000
--- a/python/sglang/srt/reasoning_parser.py
+++ /dev/null
@@ -1,552 +0,0 @@
-import re
-from typing import Dict, Optional, Tuple, Type
-
-
-class StreamingParseResult:
- """Result of streaming incremental parsing."""
-
- def __init__(self, normal_text: str = "", reasoning_text: str = ""):
- self.normal_text = normal_text
- self.reasoning_text = reasoning_text
-
-
-class BaseReasoningFormatDetector:
- """Base class providing two sets of interfaces: one-time and streaming incremental."""
-
- def __init__(
- self,
- think_start_token: str,
- think_end_token: str,
- force_reasoning: bool = False,
- stream_reasoning: bool = True,
- ):
- self.think_start_token = think_start_token
- self.think_end_token = think_end_token
- self._in_reasoning = force_reasoning
- self.stream_reasoning = stream_reasoning
-
- self._buffer = ""
- self.stripped_think_start = False
-
- def detect_and_parse(self, text: str) -> StreamingParseResult:
- """
- One-time parsing: Detects and parses reasoning sections in the provided text.
- Returns both reasoning content and normal text separately.
- """
- in_reasoning = self._in_reasoning or self.think_start_token in text
-
- if not in_reasoning:
- return StreamingParseResult(normal_text=text)
-
- # The text is considered to be in a reasoning block.
- processed_text = text.replace(self.think_start_token, "").strip()
-
- if self.think_end_token not in processed_text:
- # Assume reasoning was truncated before `` token
- return StreamingParseResult(reasoning_text=processed_text)
-
- # Extract reasoning content
- splits = processed_text.split(self.think_end_token, maxsplit=1)
- reasoning_text = splits[0]
- normal_text = splits[1].strip()
-
- return StreamingParseResult(
- normal_text=normal_text, reasoning_text=reasoning_text
- )
-
- def parse_streaming_increment(self, new_text: str) -> StreamingParseResult:
- """
- Streaming incremental parsing for reasoning content.
- Handles partial reasoning tags and content.
-
- If stream_reasoning is False:
- Accumulates reasoning content until the end tag is found
- If stream_reasoning is True:
- Streams reasoning content as it arrives
- """
- self._buffer += new_text
- current_text = self._buffer
-
- # If the current text is a prefix of the think token, keep buffering
- if any(
- token.startswith(current_text) and token != current_text
- for token in [self.think_start_token, self.think_end_token]
- ):
- return StreamingParseResult()
-
- # Strip `` token if present
- if not self.stripped_think_start and self.think_start_token in current_text:
- current_text = current_text.replace(self.think_start_token, "")
- self.stripped_think_start = True
- self._in_reasoning = True
-
- # Handle end of reasoning block
- if self._in_reasoning and self.think_end_token in current_text:
- end_idx = current_text.find(self.think_end_token)
-
- reasoning_text = current_text[:end_idx]
-
- self._buffer = ""
- self._in_reasoning = False
- normal_text = current_text[end_idx + len(self.think_end_token) :]
-
- return StreamingParseResult(
- normal_text=normal_text, reasoning_text=reasoning_text.rstrip()
- )
-
- # Continue with reasoning content
- if self._in_reasoning:
- if self.stream_reasoning:
- # Stream the content immediately
- self._buffer = ""
- return StreamingParseResult(reasoning_text=current_text)
- else:
- return StreamingParseResult()
-
- # If we're not in a reasoning block return as normal text
- if not self._in_reasoning:
- self._buffer = ""
- return StreamingParseResult(normal_text=current_text)
-
- return StreamingParseResult()
-
-
-class DeepSeekR1Detector(BaseReasoningFormatDetector):
- """
- Detector for DeepSeek-R1 model.
- Assumes reasoning format:
- ()*(.*)
- Returns all the text before the tag as `reasoning_text`
- and the rest of the text as `normal_text`.
-
- Supported models:
- - DeepSeek-R1: Always generates thinking content without start tag
- - DeepSeek-R1-0528: Generates thinking content with start tag
-
- Format patterns:
- - DeepSeek-R1: "I need to think about this...The answer is 42."
- - DeepSeek-R1-0528: "I need to think about this...The answer is 42."
-
- Args:
- stream_reasoning (bool): If False, accumulates reasoning content until the end tag.
- If True, streams reasoning content as it arrives.
- """
-
- def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = True):
- # DeepSeek-R1 is assumed to be reasoning until `` token
- super().__init__(
- "",
- "",
- force_reasoning=True,
- stream_reasoning=stream_reasoning,
- )
- # https://github.com/sgl-project/sglang/pull/3202#discussion_r1950153599
-
-
-class Qwen3Detector(BaseReasoningFormatDetector):
- """
- Detector for Qwen3 models (e.g., Qwen/Qwen3-235B-A22B).
- Assumes reasoning format:
- ()*(.*)
-
- Qwen3 models released before 07/2025 supports switching between thinking mode and normal
- mode using `enable_thinking` parameter in the request parameter.
- - enable_thinking=True: "reasoning contentThe answer is 42."
- - enable_thinking=False: "The answer is 42." (no thinking tokens)
-
- Args:
- stream_reasoning (bool): If False, accumulates reasoning content until the end tag.
- If True, streams reasoning content as it arrives.
- """
-
- def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = False):
- super().__init__(
- "",
- "",
- force_reasoning=force_reasoning,
- stream_reasoning=stream_reasoning,
- )
-
-
-class KimiDetector(BaseReasoningFormatDetector):
- """
- Detector for Kimi Thinking model.
- Assumes reasoning format:
- ◁think▷*(.*)◁/think▷
- Returns all the text before the ◁/think▷ tag as `reasoning_text`
- and the rest of the text as `normal_text`.
- """
-
- def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = False):
- super().__init__(
- "◁think▷",
- "◁/think▷",
- force_reasoning=False,
- stream_reasoning=stream_reasoning,
- )
-
-
-class GptOssDetector(BaseReasoningFormatDetector):
- """
- Detector for T4-style reasoning format.
-
- Assumes reasoning format with two channels:
- <|channel|>analysis<|message|>...reasoning content...<|end|>
- <|start|>assistant<|channel|>final<|message|>...final answer...<|return|>
-
- Returns content from 'analysis' channel as reasoning_text
- and content from 'final' channel as normal_text.
-
- Args:
- stream_reasoning (bool): If False, accumulates reasoning content until complete.
- If True, streams reasoning content as it arrives.
- """
-
- def __init__(self, stream_reasoning: bool = True, force_reasoning: bool = True):
- # TypeScript uses channel tokens instead of simple start/end tokens
- super().__init__(
- "<|channel|>analysis<|message|>",
- "<|end|>",
- force_reasoning=True,
- stream_reasoning=stream_reasoning,
- )
- self.final_channel_start = "<|start|>assistant<|channel|>final<|message|>"
- self.final_channel_end = "<|return|>"
- self._in_final_channel = False
- self._analysis_complete = False
- self._in_reasoning = True
-
- def detect_and_parse(self, text: str) -> StreamingParseResult:
- """
- One-time parsing: Detects and parses both analysis and final channels.
- Tool call channels are preserved in normal_text for downstream processing.
-
- HACK: Also handles simplified format where text starts with "analysis" and transitions
- to "assistantfinal" without full channel markers.
- """
- # HACK: Handle simplified format (analysis...assistantfinal) without channel markers
- if (
- text.startswith("analysis")
- and "assistantfinal" in text
- and "<|channel|>" not in text
- ):
- # Split on "assistantfinal"
- parts = text.split("assistantfinal", 1)
- self._in_reasoning = False
- if len(parts) == 2:
- reasoning_text = parts[0][
- len("analysis") :
- ].strip() # Remove "analysis" prefix
- normal_text = parts[1].strip()
- return StreamingParseResult(
- normal_text=normal_text, reasoning_text=reasoning_text
- )
-
- reasoning_parts = []
- normal_parts = []
- current_pos = 0
-
- # Process text sequentially to preserve tool calls between analysis sections
- while current_pos < len(text):
- # Look for next analysis channel
- analysis_start_idx = text.find(self.think_start_token, current_pos)
-
- if analysis_start_idx == -1:
- # No more analysis channels, rest goes to remaining
- break
-
- # Preserve any content before this analysis channel (could include tool calls)
- if analysis_start_idx > current_pos:
- between_content = text[current_pos:analysis_start_idx]
- # This content will be added to normal_parts later
- normal_parts.append(between_content)
-
- # Extract analysis content
- analysis_content_start = analysis_start_idx + len(self.think_start_token)
- analysis_end_idx = text.find(self.think_end_token, analysis_content_start)
-
- if analysis_end_idx != -1:
- reasoning_parts.append(
- text[analysis_content_start:analysis_end_idx].strip()
- )
- current_pos = analysis_end_idx + len(self.think_end_token)
- else:
- # Analysis not complete
- reasoning_parts.append(text[analysis_content_start:].strip())
- reasoning_text = "".join(reasoning_parts)
- return StreamingParseResult(reasoning_text=reasoning_text)
-
- # Add any remaining text after all analysis sections
- if current_pos < len(text):
- remaining = text[current_pos:]
- normal_parts.append(remaining)
-
- # Process non-analysis content for commentary sections
- full_normal_text = "".join(normal_parts)
-
- # Extract reasoning from non-tool-call commentary sections
- # Tool calls have "to=" in their header, regular commentary does not
- commentary_pattern = re.compile(
- r"<\|start\|>assistant<\|channel\|>commentary<\|message\|>(.*?)(?:<\|end\|>|<\|call\|>)",
- re.DOTALL,
- )
-
- cleaned_text = full_normal_text
- for match in reversed(list(commentary_pattern.finditer(full_normal_text))):
- # Check if this commentary is a tool call by looking at the text before <|message|>
- match_start = match.start()
- # Find where "<|channel|>commentary" starts within the matched pattern
- # The pattern starts with "<|start|>assistant<|channel|>commentary"
- # So we look for the text between "commentary" and "<|message|>" in the match
- match_text = full_normal_text[match_start : match.end()]
- commentary_idx = match_text.find("<|channel|>commentary")
- if commentary_idx != -1:
- message_idx = match_text.find("<|message|>", commentary_idx)
- if message_idx != -1:
- between_text = match_text[commentary_idx:message_idx]
- # If no "to=" found, this is regular commentary (reasoning content)
- if " to=" not in between_text:
- content = match.group(1).strip()
- reasoning_parts.append(content)
- # Remove this commentary section from normal text
- cleaned_text = (
- cleaned_text[: match.start()] + cleaned_text[match.end() :]
- )
-
- full_normal_text = cleaned_text
-
- # Combine all reasoning parts
- reasoning_text = "".join(reasoning_parts)
-
- # Process full_normal_text for final output
- normal_text = ""
- if self.final_channel_start in full_normal_text:
- final_start = full_normal_text.find(self.final_channel_start)
- final_content_start = final_start + len(self.final_channel_start)
- final_end = full_normal_text.find(
- self.final_channel_end, final_content_start
- )
-
- if final_end != -1:
- # Extract content before final channel (includes tool calls)
- before_final = full_normal_text[:final_start].strip()
- # Extract ONLY the final channel content (not the channel markers)
- final_text = full_normal_text[final_content_start:final_end].strip()
- # Extract content after final channel
- after_final = full_normal_text[
- final_end + len(self.final_channel_end) :
- ].strip()
-
- # For tool calls + final answer: concatenate tool calls with final text
- parts = []
- if before_final:
- parts.append(before_final)
- if final_text:
- parts.append(final_text)
- if after_final:
- parts.append(after_final)
- normal_text = " ".join(parts)
- else:
- # Final channel not complete - extract what we have
- # Look for just <|channel|>final<|message|> without <|return|>
- alt_final_start = full_normal_text.find("<|channel|>final<|message|>")
- if alt_final_start != -1:
- before_alt_final = full_normal_text[:alt_final_start].strip()
- alt_final_content = full_normal_text[
- alt_final_start + len("<|channel|>final<|message|>") :
- ].strip()
-
- parts = []
- if before_alt_final:
- parts.append(before_alt_final)
- if alt_final_content:
- parts.append(alt_final_content)
- normal_text = " ".join(parts)
- else:
- normal_text = full_normal_text.strip()
- else:
- # No final channel, treat all as normal text (includes tool calls)
- normal_text = full_normal_text.strip()
-
- return StreamingParseResult(
- normal_text=normal_text, reasoning_text=reasoning_text
- )
-
- def parse_streaming_increment(self, new_text: str) -> StreamingParseResult:
- """
- Streaming incremental parsing for GPT-OSS format.
-
- This is a simplified streaming implementation that accumulates content
- and delegates to the non-streaming parser for complex multi-channel parsing.
- TODO: Implement proper incremental parsing for better streaming performance.
- """
- self._buffer += new_text
-
- if not self._in_reasoning:
- return StreamingParseResult(normal_text=new_text)
-
- # Check if we have complete sections to process
- # For GPT-OSS, we need to wait for complete channel sections
- # HACK: For now, use simplified approach - wait for key markers before processing
- key_markers = ["<|end|>", "<|call|>", "<|return|>", "assistantfinal"]
- has_complete_section = any(marker in self._buffer for marker in key_markers)
-
- if not has_complete_section:
- # Still accumulating, don't process yet
- return StreamingParseResult()
-
- # Handle simplified format (analysis...assistantfinal) with true incremental streaming
- if (
- "<|channel|>" not in self._buffer
- ): # Simplified format without channel markers
- if self._buffer.startswith("analysis"):
- # Check if we have the transition to assistantfinal
- if "assistantfinal" in self._buffer:
- self._in_reasoning = False
- # Complete reasoning section - extract and stream it
- parts = self._buffer.split("assistantfinal", 1)
- reasoning_text = parts[0][len("analysis") :].strip()
- final_content = parts[1].strip()
-
- # Clear buffer and return both reasoning and final content
- self._buffer = ""
- return StreamingParseResult(
- reasoning_text=reasoning_text if self.stream_reasoning else "",
- normal_text=final_content,
- )
- elif self.stream_reasoning:
- # Stream reasoning content incrementally as it arrives
- current_reasoning = self._buffer[len("analysis") :].strip()
- self._buffer = ""
- return StreamingParseResult(reasoning_text=current_reasoning)
- else:
- # Wait for assistantfinal
- return StreamingParseResult()
- elif self._buffer.startswith("assistantfinal"):
- # Direct final content without analysis
- final_content = self._buffer[len("assistantfinal") :].strip()
- self._buffer = ""
- return StreamingParseResult(normal_text=final_content)
-
- # For full channel format, process sections as they complete
- result = StreamingParseResult()
-
- # Process complete analysis sections
- while (
- self.think_start_token in self._buffer
- and self.think_end_token in self._buffer
- ):
- start_idx = self._buffer.find(self.think_start_token)
- start_pos = start_idx + len(self.think_start_token)
- end_pos = self._buffer.find(self.think_end_token, start_pos)
-
- if end_pos != -1:
- reasoning_content = self._buffer[start_pos:end_pos].strip()
- if self.stream_reasoning and reasoning_content:
- result.reasoning_text += reasoning_content
-
- # Remove processed analysis section
- self._buffer = (
- self._buffer[:start_idx]
- + self._buffer[end_pos + len(self.think_end_token) :]
- )
- else:
- break
-
- # Process complete commentary sections
- commentary_pattern = re.compile(
- r"<\|start\|>assistant<\|channel\|>commentary<\|message\|>(.*?)(?:<\|end\|>|<\|call\|>)",
- re.DOTALL,
- )
-
- for match in reversed(list(commentary_pattern.finditer(self._buffer))):
- # Check if this is a tool call
- start_pos = match.start()
- commentary_content = match.group(1).strip()
- if self.stream_reasoning and commentary_content:
- result.reasoning_text += commentary_content
-
- # Remove this commentary section
- self._buffer = self._buffer[: match.start()] + self._buffer[match.end() :]
- # Clean up any standalone <|start|>assistant
- self._buffer = re.sub(
- r"<\|start\|>assistant(?=<\|start\|>assistant)", "", self._buffer
- )
-
- # Handle final channel completion
- if self.final_channel_start in self._buffer:
- final_start = self._buffer.find(self.final_channel_start)
- final_content_start = final_start + len(self.final_channel_start)
-
- # Check if final channel is complete
- final_end = self._buffer.find(self.final_channel_end, final_content_start)
- if final_end != -1:
- # Complete final channel - process everything
- final_result = self.detect_and_parse(self._buffer)
- self._buffer = ""
- return StreamingParseResult(
- normal_text=final_result.normal_text,
- reasoning_text=result.reasoning_text + final_result.reasoning_text,
- )
- else:
- # Extract content before final channel (e.g. tool calls)
- before_final = self._buffer[:final_start]
- if before_final:
- # Output tool calls for processing
- result.normal_text += before_final
- # Keep the final channel part in buffer
- self._buffer = self._buffer[final_start:]
-
- return result
-
-
-class ReasoningParser:
- """
- Parser that handles both streaming and non-streaming scenarios for extracting
- reasoning content from model outputs.
-
- Args:
- model_type (str): Type of model to parse reasoning from
- stream_reasoning (bool): If False, accumulates reasoning content until complete.
- If True, streams reasoning content as it arrives.
- """
-
- DetectorMap: Dict[str, Type[BaseReasoningFormatDetector]] = {
- "deepseek-r1": DeepSeekR1Detector,
- "qwen3": Qwen3Detector,
- "qwen3-thinking": Qwen3Detector,
- "glm45": Qwen3Detector,
- "kimi": KimiDetector,
- "step3": DeepSeekR1Detector,
- "gpt-oss": GptOssDetector,
- }
-
- def __init__(
- self,
- model_type: Optional[str] = None,
- stream_reasoning: bool = True,
- force_reasoning: bool = False,
- ):
- if not model_type:
- raise ValueError("Model type must be specified")
-
- detector_class = self.DetectorMap.get(model_type.lower())
- if not detector_class:
- raise ValueError(f"Unsupported model type: {model_type}")
-
- if model_type.lower() == "qwen3-thinking":
- force_reasoning = True
-
- self.detector = detector_class(
- stream_reasoning=stream_reasoning, force_reasoning=force_reasoning
- )
-
- def parse_non_stream(self, full_text: str) -> Tuple[str, str]:
- """Non-streaming call: one-time parsing"""
- ret = self.detector.detect_and_parse(full_text)
- return ret.reasoning_text, ret.normal_text
-
- def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, str]:
- """Streaming call: incremental parsing"""
- ret = self.detector.parse_streaming_increment(chunk_text)
- return ret.reasoning_text, ret.normal_text
diff --git a/python/sglang/srt/sampling/penaltylib/orchestrator.py b/python/sglang/srt/sampling/penaltylib/orchestrator.py
index a75d5e9bbf5..1abd255cb54 100644
--- a/python/sglang/srt/sampling/penaltylib/orchestrator.py
+++ b/python/sglang/srt/sampling/penaltylib/orchestrator.py
@@ -1,7 +1,8 @@
from __future__ import annotations
import abc
-from typing import TYPE_CHECKING, Set, Type
+import weakref
+from typing import TYPE_CHECKING, Optional, Set, Type
import torch
@@ -17,7 +18,7 @@ def __init__(
penalizers: Set[Type["_BatchedPenalizer"]],
):
self.vocab_size = vocab_size
- self.batch = batch
+ self._batch_ref = weakref.ref(batch)
self.device = batch.device
self.penalizers = {Penalizer: Penalizer(self) for Penalizer in penalizers}
@@ -27,6 +28,17 @@ def __init__(
is_required |= pen_is_required
self.is_required = is_required
+ @property
+ def batch(self) -> ScheduleBatch | None:
+ return self._batch_ref()
+
+ @batch.setter
+ def batch(self, value: Optional[ScheduleBatch]):
+ if value is None:
+ self._batch_ref = lambda: None
+ else:
+ self._batch_ref = weakref.ref(value)
+
def reqs(self):
return self.batch.reqs
diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py
index ec649f47936..6ba8a77770c 100644
--- a/python/sglang/srt/sampling/sampling_batch_info.py
+++ b/python/sglang/srt/sampling/sampling_batch_info.py
@@ -67,28 +67,31 @@ class SamplingBatchInfo:
logit_bias: Optional[torch.Tensor] = None
@classmethod
- def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int):
+ def _get_global_server_args_dict(cls):
from sglang.srt.managers.schedule_batch import global_server_args_dict
+ return global_server_args_dict
+
+ @classmethod
+ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int):
+ global_server_args_dict = cls._get_global_server_args_dict()
+
reqs = batch.reqs
device = batch.device
- temperatures = (
- torch.tensor(
- [r.sampling_params.temperature for r in reqs],
- dtype=torch.float,
- )
- .view(-1, 1)
- .to(device, non_blocking=True)
- )
+ temperatures = torch.tensor(
+ [r.sampling_params.temperature for r in reqs],
+ dtype=torch.float,
+ device=device,
+ ).view(-1, 1)
top_ps = torch.tensor(
- [r.sampling_params.top_p for r in reqs], dtype=torch.float
- ).to(device, non_blocking=True)
+ [r.sampling_params.top_p for r in reqs], dtype=torch.float, device=device
+ )
top_ks = torch.tensor(
- [r.sampling_params.top_k for r in reqs], dtype=torch.int32
- ).to(device, non_blocking=True)
+ [r.sampling_params.top_k for r in reqs], dtype=torch.int32, device=device
+ )
min_ps = torch.tensor(
- [r.sampling_params.min_p for r in reqs], dtype=torch.float
- ).to(device, non_blocking=True)
+ [r.sampling_params.min_p for r in reqs], dtype=torch.float, device=device
+ )
logit_bias = None
if any(r.sampling_params.logit_bias is not None for r in reqs):
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index fdd1f80ddf8..7123e4384c5 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -13,6 +13,8 @@
# ==============================================================================
"""The arguments of the server."""
+from __future__ import annotations
+
import argparse
import dataclasses
import json
@@ -21,12 +23,12 @@
import random
import sys
import tempfile
-from typing import List, Literal, Optional, Union
+from typing import TYPE_CHECKING, List, Literal, Optional, Union
+from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
-from sglang.srt.layers.utils import is_sm90_supported, is_sm100_supported
from sglang.srt.lora.lora_registry import LoRARef
-from sglang.srt.reasoning_parser import ReasoningParser
+from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import (
LORA_TARGET_ALL_MODULES,
SUPPORTED_LORA_TARGET_MODULES,
@@ -38,20 +40,109 @@
is_hip,
is_port_available,
is_remote_url,
+ is_sm90_supported,
+ is_sm100_supported,
is_triton_kernels_available,
is_valid_ipv6_address,
nullable_str,
)
+from sglang.utils import is_in_ci
+
+if TYPE_CHECKING:
+ from hip_attn.v1_2 import HiPAttentionConfig
logger = logging.getLogger(__name__)
+# Define constants
+LOAD_FORMAT_CHOICES = [
+ "auto",
+ "pt",
+ "safetensors",
+ "npcache",
+ "dummy",
+ "sharded_state",
+ "gguf",
+ "bitsandbytes",
+ "layered",
+ "remote",
+]
+
+QUANTIZATION_CHOICES = [
+ "awq",
+ "fp8",
+ "gptq",
+ "marlin",
+ "gptq_marlin",
+ "awq_marlin",
+ "bitsandbytes",
+ "gguf",
+ "modelopt",
+ "modelopt_fp4",
+ "petit_nvfp4",
+ "w8a8_int8",
+ "w8a8_fp8",
+ "moe_wna16",
+ "qoq",
+ "w4afp8",
+ "mxfp4",
+]
+
+ATTENTION_BACKEND_CHOICES = [
+ # Common
+ "triton",
+ "torch_native",
+ "hip_attention",
+ # NVIDIA specific
+ "cutlass_mla",
+ "fa3",
+ "flashinfer",
+ "flashmla",
+ "trtllm_mla",
+ "trtllm_mha",
+ "dual_chunk_flash_attn",
+ "hybrid_linear_attn",
+ # AMD specific
+ "aiter",
+ "wave",
+ # Other platforms
+ "intel_amx",
+ "ascend",
+]
+
+DISAGG_TRANSFER_BACKEND_CHOICES = ["mooncake", "nixl", "ascend", "fake"]
+
+GRAMMAR_BACKEND_CHOICES = ["xgrammar", "outlines", "llguidance", "none"]
+
+
+# Allow external code to add more choices
+def add_load_format_choices(choices):
+ LOAD_FORMAT_CHOICES.extend(choices)
+
+
+def add_quantization_method_choices(choices):
+ QUANTIZATION_CHOICES.extend(choices)
+
+
+def add_attention_backend_choices(choices):
+ ATTENTION_BACKEND_CHOICES.extend(choices)
+
+
+def add_disagg_transfer_backend_choices(choices):
+ DISAGG_TRANSFER_BACKEND_CHOICES.extend(choices)
+
+
+def add_grammar_backend_choices(choices):
+ GRAMMAR_BACKEND_CHOICES.extend(choices)
+
+
@dataclasses.dataclass
class ServerArgs:
# Model and tokenizer
model_path: str
tokenizer_path: Optional[str] = None
tokenizer_mode: str = "auto"
+ tokenizer_worker_num: int = 1
skip_tokenizer_init: bool = False
load_format: str = "auto"
model_loader_extra_config: str = "{}"
@@ -84,7 +175,6 @@ class ServerArgs:
max_prefill_tokens: int = 16384
schedule_policy: str = "fcfs"
schedule_conservativeness: float = 1.0
- cpu_offload_gb: int = 0
page_size: Optional[int] = None
hybrid_kvcache_ratio: Optional[float] = None
swa_full_tokens_ratio: float = 0.8
@@ -119,9 +209,12 @@ class ServerArgs:
bucket_inter_token_latency: Optional[List[float]] = None
bucket_e2e_request_latency: Optional[List[float]] = None
collect_tokens_histogram: bool = False
+ prompt_tokens_buckets: Optional[List[str]] = None
+ generation_tokens_buckets: Optional[List[str]] = None
decode_log_interval: int = 40
enable_request_time_stats_logging: bool = False
kv_events_config: Optional[str] = None
+ gc_warning_threshold_secs: float = 0.0
# API related
api_key: Optional[str] = None
@@ -138,6 +231,8 @@ class ServerArgs:
# Data parallelism
dp_size: int = 1
load_balance_method: str = "round_robin"
+ # FIXME: remove this after dp rank scheduling is fully supported with PD-Disaggregation
+ prefill_round_robin_balance: bool = False
# Multi-node distributed serving
dist_init_addr: Optional[str] = None
@@ -148,11 +243,28 @@ class ServerArgs:
json_model_override_args: str = "{}"
preferred_sampling_params: Optional[str] = None
+ # HiP Attention
+ enable_hip_attention: bool = False
+ hip_attention_config: Optional[HiPAttentionConfig] = None
+
+ # HiP Attention Offload
+ enable_hip_kv_cache_offload: bool = False
+ # On-GPU cache size for sparse top-k mask estimation, in tokens
+ hip_max_mask_cache_factor: float = 1.2
+ # If the size is not None, we override hip_max_mask_cache_factor for precise control of cache size.
+ hip_max_mask_cache_size: Optional[int] = None
+ # On-GPU cache size for sparse attention, in tokens
+ hip_max_sa_cache_factor: int = 1.2
+ # If the size is not None, we override hip_max_sa_cache_factor for precise control of cache size.
+ hip_max_sa_cache_size: Optional[int] = None
+
# LoRA
enable_lora: Optional[bool] = None
max_lora_rank: Optional[int] = None
lora_target_modules: Optional[Union[set[str], List[str]]] = None
- lora_paths: Optional[Union[dict[str, str], dict[str, LoRARef], List[str]]] = None
+ lora_paths: Optional[
+ Union[dict[str, str], List[dict[str, str]], List[str], List[LoRARef]]
+ ] = None
max_loaded_loras: Optional[int] = None
max_loras_per_batch: int = 8
lora_backend: str = "triton"
@@ -168,12 +280,14 @@ class ServerArgs:
# Speculative decoding
speculative_algorithm: Optional[str] = None
speculative_draft_model_path: Optional[str] = None
+ speculative_draft_model_revision: Optional[str] = None
speculative_num_steps: Optional[int] = None
speculative_eagle_topk: Optional[int] = None
speculative_num_draft_tokens: Optional[int] = None
speculative_accept_threshold_single: float = 1.0
speculative_accept_threshold_acc: float = 1.0
speculative_token_map: Optional[str] = None
+ speculative_attention_mode: str = "prefill"
# Expert parallelism
ep_size: int = 1
@@ -186,6 +300,7 @@ class ServerArgs:
"flashinfer_cutlass",
"flashinfer_mxfp4",
] = "auto"
+ flashinfer_mxfp4_moe_precision: Literal["default", "bf16"] = "default"
enable_flashinfer_allreduce_fusion: bool = False
deepep_mode: Literal["auto", "normal", "low_latency"] = "auto"
ep_num_redundant_experts: int = 0
@@ -195,6 +310,7 @@ class ServerArgs:
eplb_algorithm: str = "auto"
eplb_rebalance_num_iterations: int = 1000
eplb_rebalance_layers_per_chunk: Optional[int] = None
+ eplb_min_rebalancing_utilization_threshold: float = 1.0
expert_distribution_recorder_mode: Optional[
Literal["stat", "stat_approx", "per_pass", "per_token"]
] = None
@@ -207,11 +323,14 @@ class ServerArgs:
enable_hierarchical_cache: bool = False
hicache_ratio: float = 2.0
hicache_size: int = 0
- hicache_write_policy: str = "write_through_selective"
+ hicache_write_policy: str = "write_through"
hicache_io_backend: str = "kernel"
hicache_mem_layout: str = "layer_first"
hicache_storage_backend: Optional[str] = None
hicache_storage_prefetch_policy: str = "best_effort"
+ hicache_storage_backend_extra_config: Optional[str] = None
+ # LMCache
+ enable_lmcache: bool = False
# Double Sparsity
enable_double_sparsity: bool = False
@@ -221,6 +340,13 @@ class ServerArgs:
ds_heavy_channel_type: str = "qk"
ds_sparse_decode_threshold: int = 4096
+ # Offloading
+ cpu_offload_gb: int = 0
+ offload_group_size: int = -1
+ offload_num_in_group: int = 1
+ offload_prefetch_step: int = 1
+ offload_mode: str = "cpu"
+
# Optimization/debug options
disable_radix_cache: bool = False
cuda_graph_max_bs: Optional[int] = None
@@ -260,6 +386,7 @@ class ServerArgs:
disable_fast_image_processor: bool = False
enable_return_hidden_states: bool = False
scheduler_recv_interval: int = 1
+ numa_node: Optional[List[int]] = None
# Debug tensor dumps
debug_tensor_dump_output_folder: Optional[str] = None
@@ -276,7 +403,6 @@ class ServerArgs:
disaggregation_prefill_pp: Optional[int] = 1
disaggregation_ib_device: Optional[str] = None
num_reserved_decode_tokens: int = 512 # used for decode kv cache offload in PD
- pdlb_url: Optional[str] = None
# For model weight update
custom_weight_loader: Optional[List[str]] = None
@@ -286,6 +412,10 @@ class ServerArgs:
enable_pdmux: bool = False
sm_group_num: int = 3
+ # Mamba cache
+ max_mamba_cache_size: Optional[int] = None
+ mamba_ssm_dtype: str = "float32"
+
# Deprecated arguments
enable_ep_moe: bool = False
enable_deepep_moe: bool = False
@@ -296,9 +426,6 @@ class ServerArgs:
def __post_init__(self):
# Check deprecated arguments
- def print_deprecated_warning(message: str):
- logger.warning(f"\033[33m{message}\033[0m")
-
if self.enable_ep_moe:
self.ep_size = self.tp_size
print_deprecated_warning(
@@ -376,9 +503,14 @@ def print_deprecated_warning(message: str):
# B200, MI300. (chunked_prefill_size 16k, cuda_graph_max_bs 512)
reserved_mem = 32 * 1024
+ # draft model and larger cuda graph buffers
if self.speculative_algorithm is not None:
- # draft model and larger cuda graph buffers
- reserved_mem += 2 * 1024
+ if self.speculative_algorithm == "STANDALONE":
+ # Standalone speculative decoding needs more memory than other speculative
+ # decoding algorithms since the draft model is typically larger.
+ reserved_mem += 6 * 1024
+ else:
+ reserved_mem += 2 * 1024
if self.enable_dp_attention:
reserved_mem += 4 * 1024
@@ -479,11 +611,6 @@ def print_deprecated_warning(message: str):
)
self.page_size = 64
- if self.speculative_algorithm is not None:
- raise ValueError(
- "trtllm_mla backend does not support speculative decoding yet."
- )
-
if self.kv_cache_dtype not in ["fp8_e4m3", "auto"]:
raise ValueError(
"TensorRT-LLM MLA backend only supports kv-cache-dtype of fp8_e4m3 or auto."
@@ -505,11 +632,6 @@ def print_deprecated_warning(message: str):
)
self.page_size = 64
- if self.speculative_algorithm is not None:
- raise ValueError(
- "trtllm_mha backend does not support speculative decoding yet."
- )
-
if self.attention_backend == "dual_chunk_flash_attn":
logger.warning(
"Mixed chunk, radix cache, and cuda graphs are disabled because of using dual chunk flash attention backend"
@@ -530,12 +652,12 @@ def print_deprecated_warning(message: str):
if self.grammar_backend is None:
self.grammar_backend = "xgrammar"
+ if self.dp_size == 1:
+ self.enable_dp_attention = False
+
# Data parallelism attention
if self.enable_dp_attention:
self.schedule_conservativeness = self.schedule_conservativeness * 0.3
- assert (
- self.dp_size > 1
- ), "Please set a dp-size > 1. You can use 1 < dp-size <= tp-size "
assert self.tp_size % self.dp_size == 0
self.chunked_prefill_size = self.chunked_prefill_size // self.dp_size
logger.warning(
@@ -552,18 +674,19 @@ def print_deprecated_warning(message: str):
assert (
self.quantization == "modelopt_fp4"
), "modelopt_fp4 quantization is required for Flashinfer MOE"
- os.environ["TRTLLM_ENABLE_PDL"] = "1"
assert self.ep_size in [
1,
self.tp_size,
], "The expert parallel size must be 1 or the same as the tensor parallel size"
if self.moe_runner_backend == "flashinfer_trtllm":
- if not self.disable_shared_experts_fusion:
- self.disable_shared_experts_fusion = True
- logger.warning(
- "FlashInfer TRTLLM MoE is enabled. --disable-shared-experts-fusion is automatically set."
- )
+ assert (
+ self.quantization == "modelopt_fp4" or self.quantization == "fp8"
+ ), "modelopt_fp4 quantization is required for Flashinfer TRTLLM MoE"
+ self.disable_shared_experts_fusion = True
+ logger.warning(
+ "FlashInfer TRTLLM MoE is enabled. --disable-shared-experts-fusion is automatically set."
+ )
# DeepEP MoE
if self.moe_a2a_backend == "deepep":
@@ -618,7 +741,12 @@ def print_deprecated_warning(message: str):
# NEXTN shares the same implementation of EAGLE
self.speculative_algorithm = "EAGLE"
- if self.speculative_algorithm in ("EAGLE", "EAGLE3"):
+ if self.speculative_algorithm in ("EAGLE", "EAGLE3", "STANDALONE"):
+ if self.speculative_algorithm == "STANDALONE":
+ # TODO: support dp attention for standalone speculative decoding
+ assert (
+ self.enable_dp_attention is False
+ ), "Currently standalone speculative decoding does not support dp attention."
if self.max_running_requests is None:
self.max_running_requests = 48
self.disable_overlap_schedule = True
@@ -655,6 +783,16 @@ def print_deprecated_warning(message: str):
self.speculative_num_draft_tokens,
) = auto_choose_speculative_params(self)
+ if (
+ self.attention_backend == "trtllm_mha"
+ or self.decode_attention_backend == "trtllm_mha"
+ or self.prefill_attention_backend == "trtllm_mha"
+ ):
+ if self.speculative_eagle_topk > 1:
+ raise ValueError(
+ "trtllm_mha backend only supports topk = 1 for speculative decoding."
+ )
+
if (
self.speculative_eagle_topk == 1
and self.speculative_num_draft_tokens != self.speculative_num_steps + 1
@@ -664,6 +802,15 @@ def print_deprecated_warning(message: str):
)
self.speculative_num_draft_tokens = self.speculative_num_steps + 1
+ if (
+ self.speculative_eagle_topk > 1
+ and self.page_size > 1
+ and self.attention_backend != "flashinfer"
+ ):
+ raise ValueError(
+ "speculative_eagle_topk > 1 with page_size > 1 is unstable and produces incorrect results for paged attention backends. This combination is only supported for the 'flashinfer' backend."
+ )
+
# The token generated from the verify step is counted.
# If sepculative_num_steps >= speculative_num_draft_tokens, the additional tokens will definitely be discarded.
# assert self.speculative_num_steps < self.speculative_num_draft_tokens
@@ -691,6 +838,13 @@ def print_deprecated_warning(message: str):
self.disable_radix_cache = True
logger.warning("KV cache is forced as chunk cache for decode server")
+
+ if self.dp_size > 1 and not is_in_ci():
+ assert self.prefill_round_robin_balance, (
+ "Prefill round robin balance is required when dp size > 1. "
+ "Please make sure that the prefill instance is launched with `--load-balance-method round_robin`"
+ " and `--prefill-round-robin-balance` is set for decode server."
+ )
elif self.disaggregation_mode == "prefill":
if self.disaggregation_decode_tp is None:
self.disaggregation_decode_tp = self.tp_size
@@ -707,11 +861,19 @@ def print_deprecated_warning(message: str):
os.environ["SGLANG_ENABLE_TORCH_COMPILE"] = (
"1" if self.enable_torch_compile else "0"
)
+ os.environ["SGLANG_MAMBA_SSM_DTYPE"] = self.mamba_ssm_dtype
+
# Set env var before grammar backends init
os.environ["SGLANG_DISABLE_OUTLINES_DISK_CACHE"] = (
"1" if self.disable_outlines_disk_cache else "0"
)
+ if self.enable_hierarchical_cache and self.disable_radix_cache:
+ raise ValueError(
+ "The arguments enable-hierarchical-cache and disable-radix-cache are mutually exclusive "
+ "and cannot be used at the same time. Please use only one of them."
+ )
+
@staticmethod
def add_cli_args(parser: argparse.ArgumentParser):
# Model and tokenizer
@@ -737,6 +899,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
"tokenizer if available, and 'slow' will "
"always use the slow tokenizer.",
)
+ parser.add_argument(
+ "--tokenizer-worker-num",
+ type=int,
+ default=ServerArgs.tokenizer_worker_num,
+ help="The worker num of the tokenizer manager.",
+ )
parser.add_argument(
"--skip-tokenizer-init",
action="store_true",
@@ -746,18 +914,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--load-format",
type=str,
default=ServerArgs.load_format,
- choices=[
- "auto",
- "pt",
- "safetensors",
- "npcache",
- "dummy",
- "sharded_state",
- "gguf",
- "bitsandbytes",
- "layered",
- "remote",
- ],
+ choices=LOAD_FORMAT_CHOICES,
help="The format of the model weights to load. "
'"auto" will try to load the weights in the safetensors format '
"and fall back to the pytorch bin format if safetensors format "
@@ -876,25 +1033,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--quantization",
type=str,
default=ServerArgs.quantization,
- choices=[
- "awq",
- "fp8",
- "gptq",
- "marlin",
- "gptq_marlin",
- "awq_marlin",
- "bitsandbytes",
- "gguf",
- "modelopt",
- "modelopt_fp4",
- "petit_nvfp4",
- "w8a8_int8",
- "w8a8_fp8",
- "moe_wna16",
- "qoq",
- "w4afp8",
- "mxfp4",
- ],
+ choices=QUANTIZATION_CHOICES,
help="The quantization method.",
)
parser.add_argument(
@@ -965,12 +1104,6 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.schedule_conservativeness,
help="How conservative the schedule policy is. A larger value means more conservative scheduling. Use a larger value if you see requests being retracted frequently.",
)
- parser.add_argument(
- "--cpu-offload-gb",
- type=int,
- default=ServerArgs.cpu_offload_gb,
- help="How many GBs of RAM to reserve for CPU offloading.",
- )
parser.add_argument(
"--page-size",
type=int,
@@ -1163,6 +1296,32 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.collect_tokens_histogram,
help="Collect prompt/generation tokens histogram.",
)
+ bucket_rule = (
+ "Supports 3 rule types: 'default' uses predefined buckets; 'tse ' "
+ "generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets "
+ "[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer "
+ " ...' uses custom bucket values (e.g., 'customer 10 50 100 500')."
+ )
+ parser.add_argument(
+ "--prompt-tokens-buckets",
+ type=str,
+ nargs="+",
+ default=ServerArgs.prompt_tokens_buckets,
+ help=f"The buckets rule of prompt tokens. {bucket_rule}",
+ )
+ parser.add_argument(
+ "--generation-tokens-buckets",
+ type=str,
+ nargs="+",
+ default=ServerArgs.generation_tokens_buckets,
+ help=f"The buckets rule for generation tokens histogram. {bucket_rule}",
+ )
+ parser.add_argument(
+ "--gc-warning-threshold-secs",
+ type=float,
+ default=ServerArgs.gc_warning_threshold_secs,
+ help="The threshold for long GC warning. If a GC takes longer than this, a warning will be logged. Set to 0 to disable.",
+ )
parser.add_argument(
"--decode-log-interval",
type=int,
@@ -1231,23 +1390,13 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.reasoning_parser,
help=f"Specify the parser for reasoning models, supported parsers are: {list(ReasoningParser.DetectorMap.keys())}.",
)
+ tool_call_parser_choices = list(FunctionCallParser.ToolCallParserEnum.keys())
parser.add_argument(
"--tool-call-parser",
type=str,
- choices=[ # TODO: use FunctionCallParser.DetectorMap.keys()
- "qwen25",
- "mistral",
- "llama3",
- "deepseekv3",
- "pythonic",
- "kimi_k2",
- "qwen3_coder",
- "glm45",
- "step3",
- "gpt-oss",
- ],
+ choices=tool_call_parser_choices,
default=ServerArgs.tool_call_parser,
- help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', 'llama3', 'deepseekv3', 'pythonic', 'kimi_k2', 'qwen3_coder', 'glm45', and 'step3'.",
+ help=f"Specify the parser for handling tool-call interactions. Options include: {tool_call_parser_choices}.",
)
parser.add_argument(
"--tool-server",
@@ -1275,6 +1424,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
"minimum_tokens",
],
)
+ parser.add_argument(
+ "--prefill-round-robin-balance",
+ default=ServerArgs.prefill_round_robin_balance,
+ action="store_true",
+ help="Prefill is round robin balanced. This is used to promise decode server can get the correct dp rank.",
+ )
# Multi-node distributed serving
parser.add_argument(
@@ -1303,6 +1458,71 @@ def add_cli_args(parser: argparse.ArgumentParser):
help="json-formatted sampling settings that will be returned in /get_model_info",
)
+ # HiP Attention
+ parser.add_argument(
+ "--enable-hip-attention",
+ action="store_true",
+ help="Enable HiP attention. This flag is not compatible with other sparse attention flags (e.g., double sparsity).",
+ )
+ parser.add_argument(
+ "--hip-attention-config",
+ "--hip-attention-config-path",
+ type=str,
+ default=ServerArgs.hip_attention_config,
+ help="Path to the HiP attention config file, or the json in string format.",
+ )
+ parser.add_argument(
+ "--hip-attention-config-override-json",
+ type=str,
+ default=None,
+ help="JSON string to override imported HiP Attention configs.",
+ )
+
+ # HiP Attention Offload
+ parser.add_argument(
+ "--enable-hip-kv-cache-offload",
+ action="store_true",
+ help="Enable HiP KV cache offloading. This option should be set with --enable-hip-attention.",
+ )
+ parser.add_argument(
+ "--hip-max-mask-cache-factor",
+ type=float,
+ default=ServerArgs.hip_max_mask_cache_factor,
+ help=(
+ "On-GPU cache size factor for HiP sparse top-k mask estimation kernels. "
+ "A cache of size proportional to this value will be allocated on the GPU. "
+ "This will be a major determining factor for mask-refreshing decoding step latency."
+ ),
+ )
+ parser.add_argument(
+ "--hip-max-mask-cache-size",
+ type=int,
+ default=ServerArgs.hip_max_mask_cache_size,
+ help=(
+ "On-GPU cache size for HiP sparse top-k mask estimation kernels. "
+ "Overrides --hip-max-sa-cache-factor. Only use this for precise control of the cache size."
+ ),
+ )
+ parser.add_argument(
+ "--hip-max-sa-cache-factor",
+ type=float,
+ default=ServerArgs.hip_max_sa_cache_factor,
+ help=(
+ "On-GPU cache size factor for HiP sparse attention kernels, in tokens per layer. "
+ "A cache of size proportional to this value will be allocated on the GPU`. "
+ "This will be a major determining factor for mask-cached decoding step latency."
+ ),
+ )
+ parser.add_argument(
+ "--hip-max-sa-cache-size",
+ type=int,
+ default=ServerArgs.hip_max_sa_cache_size,
+ help=(
+ "On-GPU cache size for HiP sparse attention kernels, in tokens per layer. "
+ "Overrides --hip-max-sa-cache-factor. Only use this for precise control of the cache size."
+ ),
+ )
+
# LoRA
parser.add_argument(
"--enable-lora",
@@ -1332,7 +1552,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
nargs="*",
default=None,
action=LoRAPathAction,
- help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}.",
+ help='The list of LoRA adapters to load. Each adapter must be specified in one of the following formats: | = | JSON with schema {"lora_name":str,"lora_path":str,"pinned":bool}',
)
parser.add_argument(
"--max-loras-per-batch",
@@ -1354,43 +1574,24 @@ def add_cli_args(parser: argparse.ArgumentParser):
)
# Kernel backend
- ATTN_BACKENDS = [
- # Common
- "triton",
- "torch_native",
- # NVIDIA specific
- "cutlass_mla",
- "fa3",
- "flashinfer",
- "flashmla",
- "trtllm_mla",
- "trtllm_mha",
- "dual_chunk_flash_attn",
- # AMD specific
- "aiter",
- "wave",
- # Other platforms
- "intel_amx",
- "ascend",
- ]
parser.add_argument(
"--attention-backend",
type=str,
- choices=ATTN_BACKENDS,
+ choices=ATTENTION_BACKEND_CHOICES,
default=ServerArgs.attention_backend,
help="Choose the kernels for attention layers.",
)
parser.add_argument(
"--prefill-attention-backend",
type=str,
- choices=ATTN_BACKENDS,
+ choices=ATTENTION_BACKEND_CHOICES,
default=ServerArgs.prefill_attention_backend,
help="Choose the kernels for prefill attention layers (have priority over --attention-backend).",
)
parser.add_argument(
"--decode-attention-backend",
type=str,
- choices=ATTN_BACKENDS,
+ choices=ATTENTION_BACKEND_CHOICES,
default=ServerArgs.decode_attention_backend,
help="Choose the kernels for decode attention layers (have priority over --attention-backend).",
)
@@ -1404,7 +1605,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--grammar-backend",
type=str,
- choices=["xgrammar", "outlines", "llguidance", "none"],
+ choices=GRAMMAR_BACKEND_CHOICES,
default=ServerArgs.grammar_backend,
help="Choose the backend for grammar-guided decoding.",
)
@@ -1420,14 +1621,23 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--speculative-algorithm",
type=str,
- choices=["EAGLE", "EAGLE3", "NEXTN"],
+ choices=["EAGLE", "EAGLE3", "NEXTN", "STANDALONE"],
help="Speculative algorithm.",
)
parser.add_argument(
"--speculative-draft-model-path",
+ "--speculative-draft-model",
type=str,
help="The path of the draft model weights. This can be a local folder or a Hugging Face repo ID.",
)
+ parser.add_argument(
+ "--speculative-draft-model-revision",
+ type=str,
+ default=None,
+ help="The specific draft model version to use. It can be a branch "
+ "name, a tag name, or a commit id. If unspecified, will use "
+ "the default version.",
+ )
parser.add_argument(
"--speculative-num-steps",
type=int,
@@ -1464,6 +1674,13 @@ def add_cli_args(parser: argparse.ArgumentParser):
help="The path of the draft model's small vocab table.",
default=ServerArgs.speculative_token_map,
)
+ parser.add_argument(
+ "--speculative-attention-mode",
+ type=str,
+ choices=["prefill", "decode"],
+ help="Attention backend for speculative decoding operations (both target verify and draft extend). Can be one of 'prefill' (default) or 'decode'.",
+ default=ServerArgs.speculative_attention_mode,
+ )
# Expert parallelism
parser.add_argument(
@@ -1490,10 +1707,18 @@ def add_cli_args(parser: argparse.ArgumentParser):
"triton_kernel",
"flashinfer_trtllm",
"flashinfer_cutlass",
+ "flashinfer_mxfp4",
],
default=ServerArgs.moe_runner_backend,
help="Choose the runner backend for MoE.",
)
+ parser.add_argument(
+ "--flashinfer-mxfp4-moe-precision",
+ type=str,
+ choices=["default", "bf16"],
+ default=ServerArgs.flashinfer_mxfp4_moe_precision,
+ help="Choose the computation precision of flashinfer mxfp4 moe",
+ )
parser.add_argument(
"--enable-flashinfer-allreduce-fusion",
action="store_true",
@@ -1547,6 +1772,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.eplb_rebalance_layers_per_chunk,
help="Number of layers to rebalance per forward pass.",
)
+ parser.add_argument(
+ "--eplb-min-rebalancing-utilization-threshold",
+ type=float,
+ default=ServerArgs.eplb_min_rebalancing_utilization_threshold,
+ help="Minimum threshold for GPU average utilization to trigger EPLB rebalancing. Must be in the range [0.0, 1.0].",
+ )
parser.add_argument(
"--expert-distribution-recorder-mode",
type=str,
@@ -1577,6 +1808,21 @@ def add_cli_args(parser: argparse.ArgumentParser):
help="TP size for MoE dense MLP layers. This flag is useful when, with large TP size, there are errors caused by weights in MLP layers having dimension smaller than the min dimension GEMM supports.",
)
+ # Mamba Cache
+ parser.add_argument(
+ "--max-mamba-cache-size",
+ type=int,
+ default=ServerArgs.max_mamba_cache_size,
+ help="The maximum size of the mamba cache.",
+ )
+ parser.add_argument(
+ "--mamba-ssm-dtype",
+ type=str,
+ default=ServerArgs.mamba_ssm_dtype,
+ choices=["float32", "bfloat16"],
+ help="The data type of the SSM states in mamba cache.",
+ )
+
# Hierarchical cache
parser.add_argument(
"--enable-hierarchical-cache",
@@ -1630,6 +1876,18 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.hicache_storage_prefetch_policy,
help="Control when prefetching from the storage backend should stop.",
)
+ parser.add_argument(
+ "--hicache-storage-backend-extra-config",
+ type=str,
+ default=ServerArgs.hicache_storage_backend_extra_config,
+ help="A dictionary in JSON string format containing extra configuration for the storage backend.",
+ )
+ # LMCache
+ parser.add_argument(
+ "--enable-lmcache",
+ action="store_true",
+ help="Using LMCache as an alternative hierarchical cache solution",
+ )
# Double Sparsity
parser.add_argument(
@@ -1668,6 +1926,38 @@ def add_cli_args(parser: argparse.ArgumentParser):
help="The type of heavy channels in double sparsity attention",
)
+ # Offloading
+ parser.add_argument(
+ "--cpu-offload-gb",
+ type=int,
+ default=ServerArgs.cpu_offload_gb,
+ help="How many GBs of RAM to reserve for CPU offloading.",
+ )
+ parser.add_argument(
+ "--offload-group-size",
+ type=int,
+ default=ServerArgs.offload_group_size,
+ help="Number of layers per group in offloading.",
+ )
+ parser.add_argument(
+ "--offload-num-in-group",
+ type=int,
+ default=ServerArgs.offload_num_in_group,
+ help="Number of layers to be offloaded within a group.",
+ )
+ parser.add_argument(
+ "--offload-prefetch-step",
+ type=int,
+ default=ServerArgs.offload_prefetch_step,
+ help="Steps to prefetch in offloading.",
+ )
+ parser.add_argument(
+ "--offload-mode",
+ type=str,
+ default=ServerArgs.offload_mode,
+ help="Mode of offloading.",
+ )
+
# Optimization/debug options
parser.add_argument(
"--disable-radix-cache",
@@ -1870,6 +2160,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.scheduler_recv_interval,
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
)
+ parser.add_argument(
+ "--numa-node",
+ type=int,
+ nargs="+",
+ help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.",
+ )
# Debug tensor dumps
parser.add_argument(
@@ -1908,7 +2204,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--disaggregation-transfer-backend",
type=str,
default=ServerArgs.disaggregation_transfer_backend,
- choices=["mooncake", "nixl", "ascend"],
+ choices=DISAGG_TRANSFER_BACKEND_CHOICES,
help="The backend for disaggregation transfer. Default is mooncake.",
)
parser.add_argument(
@@ -1949,12 +2245,6 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=ServerArgs.num_reserved_decode_tokens,
help="Number of decode tokens that will have memory reserved when adding new request to the running batch.",
)
- parser.add_argument(
- "--pdlb-url",
- type=str,
- default=None,
- help="The URL of the PD disaggregation load balancer. If set, the prefill/decode server will register with the load balancer.",
- )
# Custom weight loader
parser.add_argument(
@@ -1964,24 +2254,25 @@ def add_cli_args(parser: argparse.ArgumentParser):
default=None,
help="The custom dataloader which used to update the model. Should be set with a valid import path, such as my_package.weight_load_func",
)
+ parser.add_argument(
+ "--weight-loader-disable-mmap",
+ action="store_true",
+ help="Disable mmap while loading weight using safetensors.",
+ )
+
+ # For PD-Multiplexing
parser.add_argument(
"--enable-pdmux",
action="store_true",
help="Enable PD-Multiplexing, PD running on greenctx stream.",
)
- # For PD-Multiplexing
parser.add_argument(
"--sm-group-num",
type=int,
default=ServerArgs.sm_group_num,
help="Number of sm partition groups.",
)
- parser.add_argument(
- "--weight-loader-disable-mmap",
- action="store_true",
- help="Disable mmap while loading weight using safetensors.",
- )
# Deprecated arguments
parser.add_argument(
@@ -2021,6 +2312,27 @@ def from_cli_args(cls, args: argparse.Namespace):
args.pp_size = args.pipeline_parallel_size
args.dp_size = args.data_parallel_size
args.ep_size = args.expert_parallel_size
+
+ if args.attention_backend == "hip_attention":
+ args.enable_hip_attention = True
+
+ if args.enable_hip_attention:
+ from hip_attn.v1_2 import HiPAttentionConfig
+
+ json_or_path = args.hip_attention_config
+
+ args.hip_attention_config = HiPAttentionConfig(
+ json_or_path=json_or_path,
+ json_override=args.hip_attention_config_override_json,
+ )
+ if args.attention_backend != "hip_attention":
+ logger.info(
+ f"attention_backend changed {args.attention_backend} -> hip_attention"
+ )
+ args.attention_backend = "hip_attention"
+ else:
+ args.hip_attention_config = None
+
attrs = [attr.name for attr in dataclasses.fields(cls)]
return cls(**{attr: getattr(args, attr) for attr in attrs})
@@ -2082,6 +2394,15 @@ def check_server_args(self):
self.chunked_prefill_size % self.page_size == 0
), "chunked_prefill_size must be divisible by page_size"
+ # Check multi tokenizer
+ assert self.tokenizer_worker_num > 0, "Tokenizer worker num must >= 1"
+ self.validate_buckets_rule(
+ "--prompt-tokens-buckets", self.prompt_tokens_buckets
+ )
+ self.validate_buckets_rule(
+ "--generation-tokens-buckets", self.generation_tokens_buckets
+ )
+
def check_lora_server_args(self):
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"
@@ -2098,28 +2419,42 @@ def check_lora_server_args(self):
)
if self.enable_lora:
- # Normalize lora_paths to a dictionary if it is a list.
- # TODO (lifuhuang): support specifying pinned adapters in server_args.
if isinstance(self.lora_paths, list):
lora_paths = self.lora_paths
- self.lora_paths = {}
+ self.lora_paths = []
for lora_path in lora_paths:
- if "=" in lora_path:
- name, path = lora_path.split("=", 1)
- self.lora_paths[name] = LoRARef(
- lora_name=name, lora_path=path, pinned=False
+ if isinstance(lora_path, str):
+ if "=" in lora_path:
+ name, path = lora_path.split("=", 1)
+ lora_ref = LoRARef(
+ lora_name=name, lora_path=path, pinned=False
+ )
+ else:
+ lora_ref = LoRARef(
+ lora_name=lora_path, lora_path=lora_path, pinned=False
+ )
+ elif isinstance(lora_path, dict):
+ assert (
+ "lora_name" in lora_path and "lora_path" in lora_path
+ ), f"When providing LoRA paths as a list of dict, each dict should contain 'lora_name' and 'lora_path' keys. Got: {lora_path}"
+ lora_ref = LoRARef(
+ lora_name=lora_path["lora_name"],
+ lora_path=lora_path["lora_path"],
+ pinned=lora_path.get("pinned", False),
)
else:
- self.lora_paths[lora_path] = LoRARef(
- lora_name=lora_path, lora_path=lora_path, pinned=False
+ raise ValueError(
+ f"Invalid type for item in --lora-paths list: {type(lora_path)}. "
+ "Expected a string or a dictionary."
)
+ self.lora_paths.append(lora_ref)
elif isinstance(self.lora_paths, dict):
- self.lora_paths = {
- k: LoRARef(lora_name=k, lora_path=v, pinned=False)
+ self.lora_paths = [
+ LoRARef(lora_name=k, lora_path=v, pinned=False)
for k, v in self.lora_paths.items()
- }
+ ]
elif self.lora_paths is None:
- self.lora_paths = {}
+ self.lora_paths = []
else:
raise ValueError(
f"Invalid type for --lora-paths: {type(self.lora_paths)}. "
@@ -2146,9 +2481,7 @@ def check_lora_server_args(self):
"max_loaded_loras should be greater than or equal to max_loras_per_batch. "
f"max_loaded_loras={self.max_loaded_loras}, max_loras_per_batch={self.max_loras_per_batch}"
)
- assert (
- not self.lora_paths or len(self.lora_paths) <= self.max_loaded_loras
- ), (
+ assert len(self.lora_paths) <= self.max_loaded_loras, (
"The number of LoRA paths should not exceed max_loaded_loras. "
f"max_loaded_loras={self.max_loaded_loras}, lora_paths={len(self.lora_paths)}"
)
@@ -2161,6 +2494,54 @@ def validate_disagg_tp_size(self, prefill_tp: int, decode_tp: int):
f"decode_tp={decode_tp}, prefill_tp={prefill_tp}"
)
+ def validate_buckets_rule(self, arg_name: str, buckets_rule: List[str]):
+ if not buckets_rule:
+ return
+
+ assert len(buckets_rule) > 0, f"{arg_name} cannot be empty list"
+ rule = buckets_rule[0]
+ assert rule in [
+ "tse",
+ "default",
+ "customer",
+ ], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'customer'"
+
+ if rule == "tse":
+ assert (
+ len(buckets_rule) == 4
+ ), f"{arg_name} TSE rule requires exactly 4 parameters: ['tse', middle, base, count], got {len(buckets_rule)}"
+ try:
+ middle = float(buckets_rule[1])
+ base = float(buckets_rule[2])
+ count = int(buckets_rule[3])
+ except (ValueError, IndexError):
+ assert (
+ False
+ ), f"{arg_name} TSE rule parameters must be: ['tse', , , ]"
+ assert base > 1, f"{arg_name} TSE base must be larger than 1, got: {base}"
+ assert count > 0, f"{arg_name} TSE count must be positive, got: {count}"
+ assert middle > 0, f"{arg_name} TSE middle must be positive, got: {middle}"
+
+ elif rule == "default":
+ assert (
+ len(buckets_rule) == 1
+ ), f"{arg_name} default rule should only have one parameter: ['default'], got {len(buckets_rule)}"
+
+ elif rule == "customer":
+ assert (
+ len(buckets_rule) >= 2
+ ), f"{arg_name} customer rule requires at least one bucket value: ['customer', value1, ...]"
+ try:
+ bucket_values = [float(x) for x in buckets_rule[1:]]
+ except ValueError:
+ assert False, f"{arg_name} customer rule bucket values must be numeric"
+ assert len(set(bucket_values)) == len(
+ bucket_values
+ ), f"{arg_name} customer rule bucket values should not contain duplicates"
+ assert all(
+ val >= 0 for val in bucket_values
+ ), f"{arg_name} customer rule bucket values should be non-negative"
+
def model_specific_adjustments(self):
hf_config = self.get_hf_config()
model_arch = hf_config.architectures[0]
@@ -2172,7 +2553,7 @@ def model_specific_adjustments(self):
self.attention_backend = "fa3"
else:
self.attention_backend = "triton"
- supported_backends = ["triton", "trtllm_mha", "fa3"]
+ supported_backends = ["triton", "trtllm_mha", "fa3", "hip_attention"]
logger.info(
f"Use {self.attention_backend} as attention backend for GptOssForCausalLM"
)
@@ -2181,10 +2562,11 @@ def model_specific_adjustments(self):
), f"GptOssForCausalLM requires one of {supported_backends} attention backend, but got '{self.attention_backend}'"
if is_sm100_supported():
- self.enable_flashinfer_allreduce_fusion = True
- logger.info(
- "Enable FlashInfer AllReduce Fusion on sm100 for GptOssForCausalLM"
- )
+ if not self.enable_dp_attention:
+ self.enable_flashinfer_allreduce_fusion = True
+ logger.info(
+ "Enable FlashInfer AllReduce Fusion on sm100 for GptOssForCausalLM"
+ )
quantization_config = getattr(hf_config, "quantization_config", None)
is_mxfp4_quant_format = (
quantization_config is not None
@@ -2214,8 +2596,13 @@ def model_specific_adjustments(self):
if is_mxfp4_quant_format:
# use bf16 for mxfp4 triton kernels
self.dtype = "bfloat16"
+
elif "Llama4" in model_arch:
- assert self.attention_backend == "fa3", "fa3 is required for Llama4 model"
+ assert self.attention_backend in {
+ "fa3",
+ "aiter",
+ "triton",
+ }, "fa3, aiter, or triton is required for Llama4 model"
elif model_arch in [
"Gemma2ForCausalLM",
"Gemma3ForCausalLM",
@@ -2308,6 +2695,9 @@ class PortArgs:
# The ipc filename for Scheduler to send metrics
metrics_ipc_name: str
+ # The ipc filename for Tokenizer and worker tokenizer
+ tokenizer_worker_ipc_name: Optional[str]
+
@staticmethod
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
if server_args.nccl_port is None:
@@ -2331,6 +2721,7 @@ def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
nccl_port=nccl_port,
rpc_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
metrics_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
+ tokenizer_worker_ipc_name=None,
)
else:
# DP attention. Use TCP + port to handle both single-node and multi-node.
@@ -2364,18 +2755,28 @@ def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
nccl_port=nccl_port,
rpc_ipc_name=f"tcp://{dist_init_host}:{rpc_port}",
metrics_ipc_name=f"tcp://{dist_init_host}:{metrics_ipc_name}",
+ tokenizer_worker_ipc_name=None,
)
class LoRAPathAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
- setattr(namespace, self.dest, {})
- for lora_path in values:
- if "=" in lora_path:
- name, path = lora_path.split("=", 1)
- getattr(namespace, self.dest)[name] = path
- else:
- getattr(namespace, self.dest)[lora_path] = lora_path
+ lora_paths = []
+ if values:
+ assert isinstance(values, list), "Expected a list of LoRA paths."
+ for lora_path in values:
+ lora_path = lora_path.strip()
+ if lora_path.startswith("{") and lora_path.endswith("}"):
+ obj = json.loads(lora_path)
+ assert "lora_path" in obj and "lora_name" in obj, (
+ f"{repr(lora_path)} looks like a JSON str, "
+ "but it does not contain 'lora_name' and 'lora_path' keys."
+ )
+ lora_paths.append(obj)
+ else:
+ lora_paths.append(lora_path)
+
+ setattr(namespace, self.dest, lora_paths)
class DeprecatedAction(argparse.Action):
@@ -2388,6 +2789,10 @@ def __call__(self, parser, namespace, values, option_string=None):
raise ValueError(self.help)
+def print_deprecated_warning(message: str):
+ logger.warning(f"\033[33m{message}\033[0m")
+
+
def auto_choose_speculative_params(self: ServerArgs):
"""
Automatically choose the parameters for speculative decoding.
@@ -2396,12 +2801,18 @@ def auto_choose_speculative_params(self: ServerArgs):
"""
hf_config = self.get_hf_config()
arch = hf_config.architectures[0]
-
+ if self.speculative_algorithm == "STANDALONE":
+ # The default value for standalone speculative decoding
+ return (3, 1, 4)
if arch in ["LlamaForCausalLM"]:
# The default value for llama
return (5, 4, 8)
- elif arch in ["DeepseekV3ForCausalLM", "DeepseekV2ForCausalLM"]:
- # The default value for deepseek
+ elif arch in [
+ "DeepseekV3ForCausalLM",
+ "DeepseekV2ForCausalLM",
+ "GptOssForCausalLM",
+ ]:
+ # The default value for deepseek and gpt-oss
return (3, 1, 4)
elif arch in ["Grok1ForCausalLM", "Grok1VForCausalLM"]:
return (5, 4, 8)
diff --git a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
index 3401e2738b2..dbd8cc2f416 100644
--- a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
+++ b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
@@ -6,20 +6,21 @@
import torch
from sglang.srt.layers.dp_attention import DpPaddingMode, set_dp_buffer_len
-from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
-from sglang.srt.model_executor.forward_batch_info import (
- CaptureHiddenMode,
- ForwardBatch,
- ForwardMode,
-)
-from sglang.srt.model_executor.graph_runner import (
- GRAPH_CAPTURE_FAILED_MSG,
+from sglang.srt.model_executor.cuda_graph_runner import (
+ CUDA_GRAPH_CAPTURE_FAILED_MSG,
+ CudaGraphRunner,
get_batch_sizes_to_capture,
+ get_capture_configs,
get_global_graph_memory_pool,
model_capture_mode,
set_global_graph_memory_pool,
set_torch_compile_config,
)
+from sglang.srt.model_executor.forward_batch_info import (
+ CaptureHiddenMode,
+ ForwardBatch,
+ ForwardMode,
+)
from sglang.srt.speculative.eagle_utils import EagleDraftInput
from sglang.srt.utils import (
require_attn_tp_gather,
@@ -41,6 +42,7 @@ def __init__(self, eagle_worker: EAGLEWorker):
# Parse args
self.eagle_worker = eagle_worker
self.model_runner = model_runner = eagle_worker.model_runner
+ self.model_runner: EAGLEWorker
self.graphs = {}
self.output_buffers = {}
self.enable_torch_compile = model_runner.server_args.enable_torch_compile
@@ -54,6 +56,7 @@ def __init__(self, eagle_worker: EAGLEWorker):
self.tp_size = self.model_runner.tp_size
self.topk = model_runner.server_args.speculative_eagle_topk
self.speculative_num_steps = model_runner.server_args.speculative_num_steps
+ self.enable_hip_attention = model_runner.server_args.enable_hip_attention
self.enable_profile_cuda_graph = (
model_runner.server_args.enable_profile_cuda_graph
)
@@ -62,6 +65,7 @@ def __init__(self, eagle_worker: EAGLEWorker):
# Batch sizes to capture
self.capture_bs, self.compile_bs = get_batch_sizes_to_capture(model_runner)
self.num_tokens_per_bs = server_args.speculative_eagle_topk
+ self.capture_configs = get_capture_configs(model_runner.server_args)
# Attention backend
self.max_bs = max(self.capture_bs)
@@ -90,6 +94,9 @@ def __init__(self, eagle_worker: EAGLEWorker):
(self.max_num_token * self.speculative_num_steps,), dtype=torch.int64
)
self.positions = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.mrope_positions = torch.zeros(
+ (3, self.max_num_token), dtype=torch.int64
+ )
self.topk_p = torch.zeros((self.max_bs, self.topk), dtype=torch.float32)
self.topk_index = torch.zeros((self.max_bs, self.topk), dtype=torch.int64)
self.hidden_states = torch.zeros(
@@ -121,7 +128,7 @@ def __init__(self, eagle_worker: EAGLEWorker):
self.capture()
except RuntimeError as e:
raise Exception(
- f"Capture cuda graph failed: {e}\n{GRAPH_CAPTURE_FAILED_MSG}"
+ f"Capture cuda graph failed: {e}\n{CUDA_GRAPH_CAPTURE_FAILED_MSG}"
)
def can_run(self, forward_batch: ForwardBatch):
@@ -134,8 +141,9 @@ def can_run(self, forward_batch: ForwardBatch):
else:
cuda_graph_bs = forward_batch.batch_size
+ recorded_batch_sizes = {bs for bs, *_ in self.graphs}
is_bs_supported = (
- cuda_graph_bs in self.graphs
+ forward_batch.batch_size in recorded_batch_sizes
if self.disable_padding
else cuda_graph_bs <= self.max_bs
)
@@ -148,7 +156,9 @@ def can_run(self, forward_batch: ForwardBatch):
def capture(self):
CudaGraphRunner.capture(self)
- def capture_one_batch_size(self, num_seqs: int, forward: Callable):
+ def capture_one_batch_size(
+ self, num_seqs: int, forward: Callable, capture_config: tuple
+ ):
graph = torch.cuda.CUDAGraph()
stream = self.stream
num_tokens = num_seqs * self.num_tokens_per_bs
@@ -158,6 +168,7 @@ def capture_one_batch_size(self, num_seqs: int, forward: Callable):
seq_lens = self.seq_lens[:num_seqs]
out_cache_loc = self.out_cache_loc[: num_tokens * self.speculative_num_steps]
positions = self.positions[:num_tokens]
+ mrope_positions = self.mrope_positions[:, :num_tokens]
topk_p = self.topk_p[:num_seqs]
topk_index = self.topk_index[:num_seqs]
hidden_states = self.hidden_states[:num_seqs]
@@ -223,6 +234,7 @@ def capture_one_batch_size(self, num_seqs: int, forward: Callable):
seq_lens_sum=seq_lens.sum().item(),
return_logprob=False,
positions=positions,
+ mrope_positions=mrope_positions,
global_num_tokens_gpu=global_num_tokens,
dp_padding_mode=DpPaddingMode.get_default_mode_in_cuda_graph(),
global_dp_buffer_len=global_dp_buffer_len,
@@ -334,8 +346,11 @@ def replay(self, forward_batch: ForwardBatch):
# TODO: The forward_batch.seq_len_sum might need to be updated to reflect the padding in the cuda graph
# Replay
- self.graphs[bs].replay()
- out = self.output_buffers[bs]
+ graph_handle = (bs,)
+ if self.enable_hip_attention:
+ graph_handle = (bs, forward_batch.hip_metadata_cached_stages)
+ self.graphs[graph_handle].replay()
+ out = self.output_buffers[graph_handle]
if bs != raw_bs:
out = self._postprocess_output_to_raw_bs(out, raw_bs)
diff --git a/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py
index b40db90dd98..8340b0ca892 100644
--- a/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py
+++ b/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py
@@ -6,14 +6,9 @@
import torch
from sglang.srt.layers.dp_attention import DpPaddingMode, set_dp_buffer_len
-from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
-from sglang.srt.model_executor.forward_batch_info import (
- CaptureHiddenMode,
- ForwardBatch,
- ForwardMode,
-)
-from sglang.srt.model_executor.graph_runner import (
- GRAPH_CAPTURE_FAILED_MSG,
+from sglang.srt.model_executor.cuda_graph_runner import (
+ CUDA_GRAPH_CAPTURE_FAILED_MSG,
+ CudaGraphRunner,
LogitsProcessorOutput,
get_batch_sizes_to_capture,
get_global_graph_memory_pool,
@@ -21,6 +16,11 @@
set_global_graph_memory_pool,
set_torch_compile_config,
)
+from sglang.srt.model_executor.forward_batch_info import (
+ CaptureHiddenMode,
+ ForwardBatch,
+ ForwardMode,
+)
from sglang.srt.speculative.eagle_utils import EagleDraftInput, fast_topk
from sglang.srt.utils import (
require_attn_tp_gather,
@@ -80,6 +80,9 @@ def __init__(self, eagle_worker: EAGLEWorker):
self.req_pool_indices = torch.zeros((self.max_bs,), dtype=torch.int32)
self.out_cache_loc = torch.ones((self.max_num_token,), dtype=torch.int64)
self.positions = torch.zeros((self.max_num_token,), dtype=torch.int64)
+ self.mrope_positions = torch.zeros(
+ (3, self.max_num_token), dtype=torch.int64
+ )
if self.eagle_worker.speculative_algorithm.is_eagle3():
self.hidden_states = torch.zeros(
@@ -149,7 +152,7 @@ def __init__(self, eagle_worker: EAGLEWorker):
self.capture()
except RuntimeError as e:
raise Exception(
- f"Capture cuda graph failed: {e}\n{GRAPH_CAPTURE_FAILED_MSG}"
+ f"Capture cuda graph failed: {e}\n{CUDA_GRAPH_CAPTURE_FAILED_MSG}"
)
def can_run(self, forward_batch: ForwardBatch):
@@ -189,6 +192,7 @@ def capture_one_batch_size(self, bs: int, forward: Callable):
accept_length = self.accept_length[:bs]
out_cache_loc = self.out_cache_loc[:num_tokens]
positions = self.positions[:num_tokens]
+ mrope_positions = self.mrope_positions[:, :num_tokens]
hidden_states = self.hidden_states[:num_tokens]
next_token_logits_buffer = self.next_token_logits_buffer[:bs]
@@ -247,6 +251,7 @@ def capture_one_batch_size(self, bs: int, forward: Callable):
seq_lens_sum=seq_lens.sum().item(),
return_logprob=False,
positions=positions,
+ mrope_positions=mrope_positions,
global_num_tokens_gpu=self.global_num_tokens_gpu,
global_num_tokens_for_logprob_gpu=self.global_num_tokens_for_logprob_gpu,
dp_padding_mode=DpPaddingMode.get_default_mode_in_cuda_graph(),
@@ -336,7 +341,11 @@ def replay(self, forward_batch: ForwardBatch):
self.extend_seq_lens[:raw_bs].copy_(forward_batch.extend_seq_lens)
self.out_cache_loc[:num_tokens].copy_(forward_batch.out_cache_loc)
self.positions[:num_tokens].copy_(forward_batch.positions)
- self.hidden_states[:num_tokens].copy_(forward_batch.spec_info.hidden_states)
+ if (
+ forward_batch.spec_info.hidden_states.shape[1]
+ == self.hidden_states.shape[1]
+ ):
+ self.hidden_states[:num_tokens].copy_(forward_batch.spec_info.hidden_states)
if forward_batch.spec_info.accept_length is not None:
self.accept_length[:raw_bs].copy_(forward_batch.spec_info.accept_length)
self.req_pool_indices[:raw_bs].copy_(forward_batch.req_pool_indices)
diff --git a/python/sglang/srt/speculative/eagle_utils.py b/python/sglang/srt/speculative/eagle_utils.py
index d4741144d29..b8289b053ad 100644
--- a/python/sglang/srt/speculative/eagle_utils.py
+++ b/python/sglang/srt/speculative/eagle_utils.py
@@ -49,6 +49,8 @@
TREE_TRAVERSE_TIME_THRESHOLD = 1 # TODO: set this properly
+TREE_SPEC_KERNEL_AVAILABLE = "tree_speculative_sampling_target_only" in globals()
+
@dataclass
class EagleDraftInput:
@@ -302,6 +304,9 @@ def prepare_for_verify(self, batch: ScheduleBatch, page_size: int):
next_power_of_2(bs),
)
+ if batch.hip_mask_refresh_state is not None:
+ batch.hip_metadata_cached_stages = batch.hip_mask_refresh_state.update()
+
def generate_attn_arg_prefill(
self,
req_pool_indices: torch.Tensor,
@@ -423,8 +428,15 @@ def verify(
logits=logits_output.next_token_logits, vocab_mask=vocab_mask
)
- # Sample tokens
- if batch.sampling_info.is_all_greedy:
+ # Sample tokens. Force greedy sampling on AMD
+ is_all_greedy = sampling_info.is_all_greedy
+ if (not is_all_greedy) and (not TREE_SPEC_KERNEL_AVAILABLE):
+ logger.warning(
+ "Tree speculative sampling kernel unavailable (likely AMD/HIP build). "
+ "Falling back to greedy verification."
+ )
+
+ if is_all_greedy or not TREE_SPEC_KERNEL_AVAILABLE:
target_predict = torch.argmax(logits_output.next_token_logits, dim=-1)
target_predict = target_predict.reshape(bs, self.draft_token_num)
@@ -453,12 +465,13 @@ def verify(
sampling_info.top_ks, self.draft_token_num, dim=0
),
) # (bs * draft_token_num, vocab_size)
- target_probs = top_p_renorm_prob(
- target_probs,
- torch.repeat_interleave(
- sampling_info.top_ps, self.draft_token_num, dim=0
- ),
- )
+ if not torch.all(sampling_info.top_ps == 1.0):
+ target_probs = top_p_renorm_prob(
+ target_probs,
+ torch.repeat_interleave(
+ sampling_info.top_ps, self.draft_token_num, dim=0
+ ),
+ )
target_probs = target_probs.reshape(bs, self.draft_token_num, -1)
draft_probs = torch.zeros(
diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py
index 972d7182d81..a9abbede320 100644
--- a/python/sglang/srt/speculative/eagle_worker.py
+++ b/python/sglang/srt/speculative/eagle_worker.py
@@ -9,12 +9,12 @@
from sglang.srt.distributed import (
GroupCoordinator,
- get_tensor_model_parallel_world_size,
get_tp_group,
patch_tensor_parallel_group,
)
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.sampler import get_token_ids_logprobs, get_top_logprobs
+from sglang.srt.managers.mm_utils import embed_mm_inputs
from sglang.srt.managers.schedule_batch import (
ScheduleBatch,
get_last_loc,
@@ -47,6 +47,7 @@
from sglang.srt.utils import (
empty_context,
get_available_gpu_memory,
+ get_bool_env_var,
is_cuda,
next_power_of_2,
)
@@ -55,6 +56,7 @@
from sgl_kernel import segment_packbits
logger = logging.getLogger(__name__)
+RETURN_ORIGINAL_LOGPROB = get_bool_env_var("RETURN_ORIGINAL_LOGPROB")
@contextmanager
@@ -92,7 +94,7 @@ def __init__(
)
self.padded_static_len = -1
- # Override context length with target model's context length
+ # Override the context length of the draft model to be the same as the target model.
server_args.context_length = target_worker.model_runner.model_config.context_len
# Do not capture cuda graph in `super().__init__()`
@@ -138,8 +140,15 @@ def __init__(
embed, head = self.target_worker.model_runner.model.get_embed_and_head()
if self.speculative_algorithm.is_eagle3():
- # EAGLE3 models don't share lm_head
- self.draft_model_runner.model.set_embed(embed)
+ # most cases EAGLE3 models don't share lm_head
+ # but some models (e.g. nvidia/gpt-oss-120b-Eagle3) shares
+ if (
+ hasattr(self.draft_model_runner.model, "load_lm_head_from_target")
+ and self.draft_model_runner.model.load_lm_head_from_target
+ ):
+ self.draft_model_runner.model.set_embed_and_head(embed, head)
+ else:
+ self.draft_model_runner.model.set_embed(embed)
# grab hot token ids
if self.draft_model_runner.model.hot_token_id is not None:
@@ -179,100 +188,190 @@ def init_attention_backend(self):
self.has_prefill_wrapper_verify = False
self.draft_extend_attn_backend = None
- if self.server_args.attention_backend == "flashinfer":
- if not global_server_args_dict["use_mla_backend"]:
- from sglang.srt.layers.attention.flashinfer_backend import (
- FlashInferAttnBackend,
- FlashInferMultiStepDraftBackend,
- )
+ # Initialize decode attention backend
+ self.draft_attn_backend = self._create_decode_backend()
- self.draft_attn_backend = FlashInferMultiStepDraftBackend(
- self.draft_model_runner,
- self.topk,
- self.speculative_num_steps,
- )
- self.draft_extend_attn_backend = FlashInferAttnBackend(
- self.draft_model_runner,
- skip_prefill=False,
- )
- else:
- from sglang.srt.layers.attention.flashinfer_mla_backend import (
- FlashInferMLAAttnBackend,
- FlashInferMLAMultiStepDraftBackend,
- )
+ # Initialize draft extend attention backend (respects speculative_attention_mode setting)
+ self.draft_extend_attn_backend = self._create_draft_extend_backend()
- self.draft_attn_backend = FlashInferMLAMultiStepDraftBackend(
- self.draft_model_runner,
- self.topk,
- self.speculative_num_steps,
- )
- self.draft_extend_attn_backend = FlashInferMLAAttnBackend(
- self.draft_model_runner,
- skip_prefill=False,
- )
- self.has_prefill_wrapper_verify = True
- elif self.server_args.attention_backend == "triton":
- from sglang.srt.layers.attention.triton_backend import (
- TritonAttnBackend,
- TritonMultiStepDraftBackend,
- )
+ self.draft_model_runner.draft_attn_backend = self.draft_attn_backend
- self.draft_attn_backend = TritonMultiStepDraftBackend(
- self.draft_model_runner,
- self.topk,
- self.speculative_num_steps,
- )
- self.draft_extend_attn_backend = TritonAttnBackend(
- self.draft_model_runner,
- skip_prefill=False,
- )
- elif self.server_args.attention_backend == "aiter":
- from sglang.srt.layers.attention.aiter_backend import (
- AiterAttnBackend,
- AiterMultiStepDraftBackend,
- )
+ def _create_backend(
+ self, backend_name: str, backend_map: dict, error_template: str
+ ):
+ backend_type = getattr(self.server_args, backend_name)
+ if backend_type is None:
+ backend_type = self.server_args.attention_backend
+
+ if backend_type not in backend_map:
+ raise ValueError(error_template.format(backend_type=backend_type))
+
+ return backend_map[backend_type]()
+
+ def _create_decode_backend(self):
+ backend_map = {
+ "flashinfer": self._create_flashinfer_decode_backend,
+ "triton": self._create_triton_decode_backend,
+ "aiter": self._create_aiter_decode_backend,
+ "fa3": self._create_fa3_decode_backend,
+ "hip_attention": self._create_fa3_decode_backend,
+ "hybrid_linear_attn": self._create_fa3_decode_backend,
+ "flashmla": self._create_flashmla_decode_backend,
+ "trtllm_mha": self._create_trtllm_mha_decode_backend,
+ "trtllm_mla": self._create_trtllm_mla_decode_backend,
+ }
+
+ return self._create_backend(
+ "decode_attention_backend",
+ backend_map,
+ "EAGLE is not supported in decode attention backend {backend_type}",
+ )
- self.draft_attn_backend = AiterMultiStepDraftBackend(
- self.draft_model_runner,
- self.topk,
- self.speculative_num_steps,
+ def _create_draft_extend_backend(self):
+ backend_map = {
+ "flashinfer": self._create_flashinfer_prefill_backend,
+ "triton": self._create_triton_prefill_backend,
+ "aiter": self._create_aiter_prefill_backend,
+ "fa3": self._create_fa3_prefill_backend,
+ "hybrid_linear_attn": self._create_fa3_prefill_backend,
+ "trtllm_mha": self._create_trtllm_mha_prefill_backend,
+ "trtllm_mla": self._create_trtllm_mla_prefill_backend,
+ }
+ backend_name = (
+ "decode_attention_backend"
+ if self.server_args.speculative_attention_mode == "decode"
+ else "prefill_attention_backend"
+ )
+ return self._create_backend(
+ backend_name,
+ backend_map,
+ "EAGLE is not supported in attention backend {backend_type}",
+ )
+
+ def _create_flashinfer_decode_backend(self):
+ if not global_server_args_dict["use_mla_backend"]:
+ from sglang.srt.layers.attention.flashinfer_backend import (
+ FlashInferMultiStepDraftBackend,
)
- self.draft_extend_attn_backend = AiterAttnBackend(
- self.draft_model_runner,
- skip_prefill=False,
+
+ self.has_prefill_wrapper_verify = True
+ return FlashInferMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
)
- self.has_prefill_wrapper_verify = False
- elif self.server_args.attention_backend == "fa3":
- from sglang.srt.layers.attention.flashattention_backend import (
- FlashAttentionBackend,
- FlashAttentionMultiStepBackend,
+ else:
+ from sglang.srt.layers.attention.flashinfer_mla_backend import (
+ FlashInferMLAMultiStepDraftBackend,
)
- self.draft_attn_backend = FlashAttentionMultiStepBackend(
- self.draft_model_runner,
- self.topk,
- self.speculative_num_steps,
- )
- self.draft_extend_attn_backend = FlashAttentionBackend(
- self.draft_model_runner,
- skip_prefill=False,
+ self.has_prefill_wrapper_verify = True
+ return FlashInferMLAMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
)
- elif self.server_args.attention_backend == "flashmla":
- from sglang.srt.layers.attention.flashmla_backend import (
- FlashMLAMultiStepDraftBackend,
+
+ def _create_triton_decode_backend(self):
+ from sglang.srt.layers.attention.triton_backend import (
+ TritonMultiStepDraftBackend,
+ )
+
+ return TritonMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
+ )
+
+ def _create_aiter_decode_backend(self):
+ from sglang.srt.layers.attention.aiter_backend import AiterMultiStepDraftBackend
+
+ return AiterMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
+ )
+
+ def _create_fa3_decode_backend(self):
+ from sglang.srt.layers.attention.flashattention_backend import (
+ FlashAttentionMultiStepBackend,
+ )
+
+ return FlashAttentionMultiStepBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
+ )
+
+ def _create_flashmla_decode_backend(self):
+ from sglang.srt.layers.attention.flashmla_backend import (
+ FlashMLAMultiStepDraftBackend,
+ )
+
+ return FlashMLAMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
+ )
+
+ def _create_trtllm_mha_decode_backend(self):
+ from sglang.srt.layers.attention.trtllm_mha_backend import (
+ TRTLLMHAAttnMultiStepDraftBackend,
+ )
+
+ self.has_prefill_wrapper_verify = True
+ return TRTLLMHAAttnMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
+ )
+
+ def _create_trtllm_mla_decode_backend(self):
+ if not global_server_args_dict["use_mla_backend"]:
+ raise ValueError(
+ "trtllm_mla backend requires MLA model (use_mla_backend=True)."
)
- self.draft_attn_backend = FlashMLAMultiStepDraftBackend(
- self.draft_model_runner,
- self.topk,
- self.speculative_num_steps,
+ from sglang.srt.layers.attention.trtllm_mla_backend import (
+ TRTLLMMLAMultiStepDraftBackend,
+ )
+
+ self.has_prefill_wrapper_verify = True
+ return TRTLLMMLAMultiStepDraftBackend(
+ self.draft_model_runner, self.topk, self.speculative_num_steps
+ )
+
+ def _create_flashinfer_prefill_backend(self):
+ if not global_server_args_dict["use_mla_backend"]:
+ from sglang.srt.layers.attention.flashinfer_backend import (
+ FlashInferAttnBackend,
)
+
+ return FlashInferAttnBackend(self.draft_model_runner, skip_prefill=False)
else:
+ from sglang.srt.layers.attention.flashinfer_mla_backend import (
+ FlashInferMLAAttnBackend,
+ )
+
+ return FlashInferMLAAttnBackend(self.draft_model_runner, skip_prefill=False)
+
+ def _create_triton_prefill_backend(self):
+ from sglang.srt.layers.attention.triton_backend import TritonAttnBackend
+
+ return TritonAttnBackend(self.draft_model_runner, skip_prefill=False)
+
+ def _create_aiter_prefill_backend(self):
+ from sglang.srt.layers.attention.aiter_backend import AiterAttnBackend
+
+ return AiterAttnBackend(self.draft_model_runner, skip_prefill=False)
+
+ def _create_fa3_prefill_backend(self):
+ from sglang.srt.layers.attention.flashattention_backend import (
+ FlashAttentionBackend,
+ )
+
+ return FlashAttentionBackend(self.draft_model_runner, skip_prefill=False)
+
+ def _create_trtllm_mha_prefill_backend(self):
+ from sglang.srt.layers.attention.trtllm_mha_backend import TRTLLMHAAttnBackend
+
+ return TRTLLMHAAttnBackend(self.draft_model_runner, skip_prefill=False)
+
+ def _create_trtllm_mla_prefill_backend(self):
+ if not global_server_args_dict["use_mla_backend"]:
raise ValueError(
- f"EAGLE is not supported in attention backend {self.server_args.attention_backend}"
+ "trtllm_mla backend requires MLA model (use_mla_backend=True)."
)
- self.draft_model_runner.draft_attn_backend = self.draft_attn_backend
+ from sglang.srt.layers.attention.trtllm_mla_backend import TRTLLMMLABackend
+
+ return TRTLLMMLABackend(self.draft_model_runner, skip_prefill=False)
def init_cuda_graphs(self):
"""Capture cuda graphs."""
@@ -638,6 +737,14 @@ def draft_forward(self, forward_batch: ForwardBatch):
# Set inputs
forward_batch.input_ids = input_ids
+ # This is a temporary fix for the case that the user is using standalone
+ # speculative decoding and the draft model architecture is gpt-oss. gpt-oss
+ # rope kernel needs cache_loc to be contiguous.
+ if (
+ self.server_args.speculative_algorithm == "STANDALONE"
+ and self.model_config.hf_config.architectures[0] == "GptOssForCausalLM"
+ ):
+ out_cache_loc = out_cache_loc.contiguous()
forward_batch.out_cache_loc = out_cache_loc[i]
forward_batch.positions.add_(1)
forward_batch.attn_backend = self.draft_attn_backend.attn_backends[i]
@@ -722,6 +829,21 @@ def verify(self, batch: ScheduleBatch, spec_info: EagleVerifyInput):
]
logits_output.hidden_states = logits_output.hidden_states[res.accepted_indices]
+ # QQ: can be optimized
+ if self.target_worker.model_runner.is_hybrid_gdn:
+ # res.draft_input.accept_length is on GPU but may be empty for last verify?
+ accepted_length = (
+ torch.tensor(
+ res.accept_length_per_req_cpu,
+ device=logits_output.hidden_states.device,
+ dtype=torch.int32,
+ )
+ + 1
+ )
+ self.target_worker.model_runner.attn_backend.update_mamba_state_after_mtp_verify(
+ accepted_length, self.target_worker.model_runner.model
+ )
+
if batch.return_logprob:
self.add_logprob_values(batch, res, logits_output)
@@ -745,15 +867,20 @@ def add_logprob_values(
token_ids_logprobs = batch.token_ids_logprobs
accepted_indices = res.accepted_indices
assert len(accepted_indices) == len(logits_output.next_token_logits)
+
temperatures = batch.sampling_info.temperatures
num_draft_tokens = batch.spec_info.draft_token_num
# acceptance indices are the indices in a "flattened" batch.
# dividing it to num_draft_tokens will yield the actual batch index.
temperatures = temperatures[accepted_indices // num_draft_tokens]
-
- logprobs = torch.nn.functional.log_softmax(
- logits_output.next_token_logits / temperatures, dim=-1
- )
+ if RETURN_ORIGINAL_LOGPROB:
+ logprobs = torch.nn.functional.log_softmax(
+ logits_output.next_token_logits, dim=-1
+ )
+ else:
+ logprobs = torch.nn.functional.log_softmax(
+ logits_output.next_token_logits / temperatures, dim=-1
+ )
batch_next_token_ids = res.verified_id
num_tokens_per_req = [accept + 1 for accept in res.accept_length_per_req_cpu]
@@ -770,13 +897,19 @@ def add_logprob_values(
(
logits_output.next_token_top_logprobs_val,
logits_output.next_token_top_logprobs_idx,
- ) = get_top_logprobs(logprobs, top_logprobs_nums_repeat_interleaved)
+ ) = get_top_logprobs(
+ logprobs,
+ top_logprobs_nums_repeat_interleaved,
+ )
if any(x is not None for x in token_ids_logprobs):
(
logits_output.next_token_token_ids_logprobs_val,
logits_output.next_token_token_ids_logprobs_idx,
- ) = get_token_ids_logprobs(logprobs, token_ids_logprobs_repeat_interleaved)
+ ) = get_token_ids_logprobs(
+ logprobs,
+ token_ids_logprobs_repeat_interleaved,
+ )
logits_output.next_token_logprobs = logprobs[
torch.arange(len(batch_next_token_ids), device=batch.sampling_info.device),
@@ -981,7 +1114,9 @@ def get_last_loc_large_page_size_top_k_1(
return prefix_lens, seq_lens, last_loc
-@torch.compile(dynamic=True)
+# Disable torch.compile for this function because it will be
+# even slower.
+# @torch.compile(dynamic=True)
def get_last_loc_large_page_size_large_top_k(
req_to_token: torch.Tensor,
req_pool_indices: torch.Tensor,
diff --git a/python/sglang/srt/speculative/spec_info.py b/python/sglang/srt/speculative/spec_info.py
index af556b99c05..a8096347121 100644
--- a/python/sglang/srt/speculative/spec_info.py
+++ b/python/sglang/srt/speculative/spec_info.py
@@ -5,6 +5,7 @@ class SpeculativeAlgorithm(IntEnum):
NONE = auto()
EAGLE = auto()
EAGLE3 = auto()
+ STANDALONE = auto()
def is_none(self):
return self == SpeculativeAlgorithm.NONE
@@ -15,11 +16,15 @@ def is_eagle(self):
def is_eagle3(self):
return self == SpeculativeAlgorithm.EAGLE3
+ def is_standalone(self):
+ return self == SpeculativeAlgorithm.STANDALONE
+
@staticmethod
def from_string(name: str):
name_map = {
"EAGLE": SpeculativeAlgorithm.EAGLE,
"EAGLE3": SpeculativeAlgorithm.EAGLE3,
+ "STANDALONE": SpeculativeAlgorithm.STANDALONE,
None: SpeculativeAlgorithm.NONE,
}
if name is not None:
diff --git a/python/sglang/srt/speculative/standalone_worker.py b/python/sglang/srt/speculative/standalone_worker.py
new file mode 100644
index 00000000000..b6004ea013b
--- /dev/null
+++ b/python/sglang/srt/speculative/standalone_worker.py
@@ -0,0 +1,109 @@
+import logging
+from contextlib import contextmanager
+from typing import Optional
+
+import torch
+
+from sglang.srt.distributed import GroupCoordinator, patch_tensor_parallel_group
+from sglang.srt.managers.tp_worker import TpModelWorker
+from sglang.srt.server_args import ServerArgs
+from sglang.srt.speculative.eagle_worker import EAGLEWorker, load_token_map
+from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
+from sglang.srt.utils import empty_context, get_bool_env_var, is_cuda
+
+if is_cuda():
+ from sgl_kernel import segment_packbits
+
+logger = logging.getLogger(__name__)
+RETURN_ORIGINAL_LOGPROB = get_bool_env_var("RETURN_ORIGINAL_LOGPROB")
+
+
+@contextmanager
+def draft_tp_context(tp_group: GroupCoordinator):
+ # Draft model doesn't use dp and has its own tp group.
+ # We disable mscclpp now because it doesn't support 2 comm groups.
+ with patch_tensor_parallel_group(tp_group):
+ yield
+
+
+class StandaloneWorker(EAGLEWorker):
+
+ def __init__(
+ self,
+ server_args: ServerArgs,
+ gpu_id: int,
+ tp_rank: int,
+ dp_rank: Optional[int],
+ moe_ep_rank: int,
+ nccl_port: int,
+ target_worker: TpModelWorker,
+ ):
+ # Parse arguments
+ self.server_args = server_args
+ self.topk = server_args.speculative_eagle_topk
+ self.speculative_num_steps = server_args.speculative_num_steps
+ self.speculative_num_draft_tokens = server_args.speculative_num_draft_tokens
+ self.enable_nan_detection = server_args.enable_nan_detection
+ self.gpu_id = gpu_id
+ self.device = server_args.device
+ self.target_worker = target_worker
+ self.page_size = server_args.page_size
+ self.speculative_algorithm = SpeculativeAlgorithm.from_string(
+ server_args.speculative_algorithm
+ )
+ self.padded_static_len = -1
+
+ # Override the context length of the draft model to be the same as the target model.
+ server_args.context_length = target_worker.model_runner.model_config.context_len
+
+ # Do not capture cuda graph in `super().__init__()`
+ # It will be captured later.
+ backup_disable_cuda_graph = server_args.disable_cuda_graph
+ server_args.disable_cuda_graph = True
+ # Share the allocator with a target worker.
+ # Draft and target worker own their own KV cache pools.
+ self.req_to_token_pool, self.token_to_kv_pool_allocator = (
+ target_worker.get_memory_pool()
+ )
+
+ # Load hot token ids
+ if server_args.speculative_token_map is not None:
+ self.hot_token_id = load_token_map(server_args.speculative_token_map)
+ server_args.json_model_override_args = (
+ f'{{"hot_vocab_size": {len(self.hot_token_id)}}}'
+ )
+ else:
+ self.hot_token_id = None
+
+ # Init draft worker
+ with empty_context():
+ TpModelWorker.__init__(
+ self,
+ server_args=server_args,
+ gpu_id=gpu_id,
+ tp_rank=tp_rank,
+ pp_rank=0, # FIXME
+ dp_rank=dp_rank,
+ moe_ep_rank=moe_ep_rank,
+ nccl_port=nccl_port,
+ is_draft_worker=True,
+ req_to_token_pool=self.req_to_token_pool,
+ token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
+ )
+
+ # Init attention backend and cuda graphs
+ self.draft_model_runner.server_args.disable_cuda_graph = (
+ backup_disable_cuda_graph
+ )
+ self.draft_tp_context = (
+ draft_tp_context if server_args.enable_dp_attention else empty_context
+ )
+ with self.draft_tp_context(self.draft_model_runner.tp_group):
+ self.init_attention_backend()
+ self.init_cuda_graphs()
+
+ # Some dummy tensors
+ self.num_new_pages_per_topk = torch.empty(
+ (), dtype=torch.int64, device=self.device
+ )
+ self.extend_lens = torch.empty((), dtype=torch.int64, device=self.device)
diff --git a/python/sglang/srt/tokenizer/tiktoken_tokenizer.py b/python/sglang/srt/tokenizer/tiktoken_tokenizer.py
new file mode 100644
index 00000000000..98df443e5eb
--- /dev/null
+++ b/python/sglang/srt/tokenizer/tiktoken_tokenizer.py
@@ -0,0 +1,166 @@
+import functools
+import json
+from typing import AbstractSet, Collection, List, Literal, Union
+
+
+class TiktokenProcessor:
+ def __init__(self, name: str):
+ self.tokenizer = TiktokenTokenizer(name)
+
+ def image_processor(self, image):
+ return {"pixel_values": [image]}
+
+
+RESERVED_TOKEN_TEXTS = [f"<|reserved_{i}|>" for i in range(3, 128)]
+CONTROL_TOKEN_TEXTS = [f"<|control{i}|>" for i in range(1, 705)]
+
+
+PAD = "<|pad|>"
+EOS = "<|eos|>"
+SEP = "<|separator|>"
+
+DEFAULT_SPECIAL_TOKENS = [PAD, SEP, EOS]
+DEFAULT_CONTROL_TOKENS = {"pad": PAD, "sep": EOS, "eos": SEP}
+
+# default + separate each single digit
+PAT_STR_B = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
+
+
+class TiktokenTokenizer:
+ def __init__(self, tokenizer_path):
+ import tiktoken
+ from jinja2 import Template
+
+ # Read the JSON
+ with open(tokenizer_path, "rb") as fin:
+ xtok_dict = json.load(fin)
+
+ # Copy from train/xlm/tokenizers/tiktoken_wrapper.py::Encoding::from_xtok_dict
+ mergeable_ranks = {
+ bytes(item["bytes"]): item["token"] for item in xtok_dict["regular_tokens"]
+ }
+ special_tokens = {
+ bytes(item["bytes"]).decode(): item["token"]
+ for item in xtok_dict["special_tokens"]
+ }
+ if xtok_dict["word_split"] == "V1":
+ pad_str = PAT_STR_B
+ else:
+ assert False, f"Unknown word_split: {xtok_dict['word_split']}"
+ pad_str = xtok_dict.get("pat_str", pad_str)
+
+ kwargs = {
+ "name": tokenizer_path,
+ "pat_str": pad_str,
+ "mergeable_ranks": mergeable_ranks,
+ "special_tokens": special_tokens,
+ }
+ if "default_allowed_special" in xtok_dict:
+ default_allowed_special = set(
+ [
+ bytes(bytes_list).decode()
+ for bytes_list in xtok_dict["default_allowed_special"]
+ ]
+ )
+ if "vocab_size" in xtok_dict:
+ kwargs["explicit_n_vocab"] = xtok_dict["vocab_size"]
+
+ # Copy from train/xlm/tokenizers/tiktoken_wrapper.py::Encoding::__init__
+ default_allowed_special = None
+ control_tokens = DEFAULT_CONTROL_TOKENS
+ tokenizer = tiktoken.Encoding(**kwargs)
+ tokenizer._default_allowed_special = default_allowed_special or set()
+ tokenizer._control_tokens = control_tokens
+
+ def encode_patched(
+ self,
+ text: str,
+ *,
+ allowed_special: Union[
+ Literal["all"], AbstractSet[str]
+ ] = set(), # noqa: B006
+ disallowed_special: Union[Literal["all"], Collection[str]] = "all",
+ ) -> List[int]:
+ if isinstance(allowed_special, set):
+ allowed_special |= self._default_allowed_special
+ return tiktoken.Encoding.encode(
+ self,
+ text,
+ allowed_special=allowed_special,
+ disallowed_special=(),
+ )
+
+ tokenizer.encode = functools.partial(encode_patched, tokenizer)
+
+ # Allow more tokens to prevent crash
+ tokenizer._default_allowed_special |= set(DEFAULT_CONTROL_TOKENS.values())
+ tokenizer._default_allowed_special |= set(
+ CONTROL_TOKEN_TEXTS + RESERVED_TOKEN_TEXTS
+ )
+
+ # Convert to HF interface
+ self.tokenizer = tokenizer
+ self.bos_token_id = None
+ self.eos_token_id = tokenizer._special_tokens[EOS]
+ self.vocab_size = tokenizer.n_vocab
+ self.chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'].strip() + '<|separator|>\n\n' }}{% elif message['role'] == 'system' %}{{ 'System: ' + message['content'].strip() + '<|separator|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|separator|>\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"
+ self.chat_template_jinja = Template(self.chat_template)
+ self.additional_stop_token_ids = None
+
+ def encode(self, x, add_special_tokens=False):
+ return self.tokenizer.encode(x)
+
+ def decode(self, x, *args, **kwargs):
+ return self.tokenizer.decode(x)
+
+ def batch_decode(
+ self, batch, skip_special_tokens=True, spaces_between_special_tokens=False
+ ):
+ if len(batch) > 0 and isinstance(batch[0], int):
+ batch = [[x] for x in batch]
+ return self.tokenizer.decode_batch(batch)
+
+ def apply_chat_template(
+ self,
+ messages,
+ tokenize,
+ add_generation_prompt,
+ tools=None,
+ reasoning_effort=None,
+ ):
+ ret = self.chat_template_jinja.render(
+ messages=messages, add_generation_prompt=add_generation_prompt
+ )
+ return self.encode(ret) if tokenize else ret
+
+ def __call__(self, text, **kwargs):
+ return {
+ "input_ids": self.encode(text),
+ }
+
+ def init_xgrammar(self):
+ from xgrammar import TokenizerInfo
+
+ XGRAMMAR_SPECIAL_TOKEN_TEMPLATE = "<|xg_special_token_{}|>"
+
+ enc = self.tokenizer
+ encoded_vocab = {**enc._mergeable_ranks, **enc._special_tokens}
+ encoded_vocab = [
+ token for token, _ in sorted(encoded_vocab.items(), key=lambda x: x[1])
+ ]
+ override_stop_tokens = [2] # eos
+ # These are treated as special tokens in xgrammar; we want to avoid them
+ # For now, xgrammar treats anything starting with b'\x00' as a special token
+ xgrammar_special_token_ids = []
+ for i, token in enumerate(encoded_vocab):
+ if isinstance(token, bytes) and token.startswith(b"\x00"):
+ xgrammar_special_token_ids.append(i)
+
+ for i, id in enumerate(xgrammar_special_token_ids):
+ encoded_vocab[id] = XGRAMMAR_SPECIAL_TOKEN_TEMPLATE.format(i)
+ tokenizer_info = TokenizerInfo(
+ encoded_vocab, stop_token_ids=override_stop_tokens
+ )
+ assert len(tokenizer_info.special_token_ids) == 0
+
+ return tokenizer_info, override_stop_tokens
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index d15ef2a93f9..fbb9af9277f 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -76,6 +76,7 @@
import torch.distributed as dist
import triton
import zmq
+from decord import VideoReader, cpu, gpu
from fastapi.responses import ORJSONResponse
from packaging import version as pkg_version
from PIL import Image
@@ -172,6 +173,20 @@ def is_blackwell():
return torch.cuda.get_device_capability()[0] == 10
+@lru_cache(maxsize=1)
+def is_sm100_supported(device=None) -> bool:
+ return (torch.cuda.get_device_capability(device)[0] == 10) and (
+ torch.version.cuda >= "12.8"
+ )
+
+
+@lru_cache(maxsize=1)
+def is_sm90_supported(device=None) -> bool:
+ return (torch.cuda.get_device_capability(device)[0] == 9) and (
+ torch.version.cuda >= "12.3"
+ )
+
+
_warned_bool_env_var_keys = set()
@@ -216,8 +231,16 @@ def support_triton(backend: str) -> bool:
is_intel_amx_backend_available = False
+try:
+ # move torch._C._cpu._is_amx_tile_supported() from cpu_has_amx_support
+ # to support torch compile
+ is_amx_tile_supported = torch._C._cpu._is_amx_tile_supported()
+except:
+ is_amx_tile_supported = False
+
+
def cpu_has_amx_support():
- return torch._C._cpu._is_amx_tile_supported() and is_intel_amx_backend_available
+ return is_amx_tile_supported and is_intel_amx_backend_available
def use_intel_amx_backend(layer):
@@ -412,7 +435,9 @@ def get_available_gpu_memory(
elif device == "cpu":
# TODO: rename the variables in the current function to be not GPU specific
- free_gpu_memory = psutil.virtual_memory().available
+ total_free_memory = psutil.virtual_memory().available
+ n_numa_node: int = len(get_cpu_ids_by_node())
+ free_gpu_memory = round(total_free_memory / n_numa_node, 3)
elif device == "npu":
num_gpus = torch.npu.device_count()
assert gpu_id < num_gpus
@@ -438,70 +463,6 @@ def is_pin_memory_available() -> bool:
return torch.cuda.is_available()
-_CPU_OFFLOAD_BYTES = 0
-_CPU_OFFLOAD_MAX_BYTES = 0
-
-
-def set_cpu_offload_max_bytes(max_bytes: int) -> None:
- global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
- _CPU_OFFLOAD_BYTES = 0
- _CPU_OFFLOAD_MAX_BYTES = max_bytes
-
-
-def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
- device = next(module.parameters()).device
-
- if device == torch.device("cpu"):
- return module
-
- global _CPU_OFFLOAD_MAX_BYTES, _CPU_OFFLOAD_BYTES
- if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
- return module
-
- pin_memory = is_pin_memory_available()
- # offload parameters to CPU
- # use pin_memory if possible, which helps cudagraph capture speed
- offloaded_parameters = False
- for p in module.parameters():
- if _CPU_OFFLOAD_BYTES >= _CPU_OFFLOAD_MAX_BYTES:
- # we use per-parameter offloading
- # one module might have some parameters offloaded and some not
- break
-
- # `torch.empty_like` does not support `pin_memory` argument
- cpu_data = torch.empty_strided(
- size=p.data.size(),
- stride=p.data.stride(),
- dtype=p.data.dtype,
- layout=p.data.layout,
- device="cpu",
- pin_memory=pin_memory,
- )
- cpu_data.copy_(p.data)
- p.data = cpu_data
- _CPU_OFFLOAD_BYTES += p.data.numel() * p.data.element_size()
- offloaded_parameters = True
-
- if offloaded_parameters:
- original_forward = module.forward
-
- def forward(*args, **kwargs):
- module.forward = original_forward
- device_state = {
- # here we blindly call `to(device)`
- # if the parameter is already on the device, it will be a no-op
- k: v.to(device, non_blocking=True)
- for k, v in module.state_dict().items()
- }
- output = functional_call(module, device_state, args=args, kwargs=kwargs)
- module.forward = forward
- return output
-
- module.forward = forward
-
- return module
-
-
class LayerFn(Protocol):
def __call__(self, layer_id: int, prefix: str) -> torch.nn.Module: ...
@@ -514,11 +475,13 @@ def make_layers(
pp_size: Optional[int] = None,
prefix: str = "",
return_tuple: bool = False,
+ offloader_kwargs: Dict[str, Any] = {},
) -> Tuple[int, int, torch.nn.ModuleList]:
"""Make a list of layers with the given layer function"""
# circula imports
from sglang.srt.distributed import get_pp_indices
from sglang.srt.layers.utils import PPMissingLayer
+ from sglang.srt.offloader import get_offloader
assert not pp_size or num_hidden_layers >= pp_size
start_layer, end_layer = (
@@ -532,10 +495,13 @@ def make_layers(
)
modules = torch.nn.ModuleList(
[PPMissingLayer(return_tuple=return_tuple) for _ in range(start_layer)]
- + [
- maybe_offload_to_cpu(layer_fn(idx=idx, prefix=add_prefix(idx, prefix)))
- for idx in range(start_layer, end_layer)
- ]
+ + get_offloader().wrap_modules(
+ (
+ layer_fn(idx=idx, prefix=add_prefix(idx, prefix))
+ for idx in range(start_layer, end_layer)
+ ),
+ **offloader_kwargs,
+ )
+ [
PPMissingLayer(return_tuple=return_tuple)
for _ in range(end_layer, num_hidden_layers)
@@ -1724,9 +1690,29 @@ def direct_register_custom_op(
IMPORTANT: the lifetime of the operator is tied to the lifetime of the
library object. If you want to bind the operator to a different library,
make sure the library object is alive when the operator is used.
+
+ Note: This function will silently skip registration if the operator
+ with the same name is already registered to avoid RuntimeError in
+ multi-engine scenarios (e.g., VERL framework).
"""
import torch.library
+ my_lib = target_lib or sglang_lib
+
+ # Check if operator is already registered to avoid duplicate registration
+ # This is important for scenarios where multiple SGLang engines run in the same process
+ try:
+ # Try to access the operator to see if it's already registered
+ lib_name = my_lib.m.name if hasattr(my_lib.m, "name") else "sglang"
+ if hasattr(torch.ops, lib_name) and hasattr(
+ getattr(torch.ops, lib_name), op_name
+ ):
+ # Operator already exists, skip registration
+ return
+ except (AttributeError, RuntimeError):
+ # Operator doesn't exist, proceed with registration
+ pass
+
if hasattr(torch.library, "infer_schema"):
schema_str = torch.library.infer_schema(op_func, mutates_args=mutates_args)
else:
@@ -1735,11 +1721,22 @@ def direct_register_custom_op(
schema_str = torch._custom_op.impl.infer_schema(op_func, mutates_args)
- my_lib = target_lib or sglang_lib
- my_lib.define(op_name + schema_str)
- my_lib.impl(op_name, op_func, "CUDA")
- if fake_impl is not None:
- my_lib._register_fake(op_name, fake_impl)
+ try:
+ my_lib.define(op_name + schema_str)
+ my_lib.impl(op_name, op_func, "CUDA")
+ if fake_impl is not None:
+ my_lib._register_fake(op_name, fake_impl)
+ except RuntimeError as error:
+ if "Tried to register an operator" in str(e) and "multiple times" in str(e):
+ # Silently ignore duplicate registration errors
+ # This can happen in multi-engine scenarios
+ pass
+ else:
+ # Re-raise other RuntimeErrors
+ raise error
+ except AttributeError as error:
+ # Always re-raise AttributeError as it indicates missing dependencies
+ raise error
def set_gpu_proc_affinity(
@@ -1978,6 +1975,15 @@ def get_ip() -> str:
except Exception:
pass
+ # try using hostname
+ hostname = socket.gethostname()
+ try:
+ ip_addr = socket.gethostbyname(hostname)
+ warnings.warn("using local ip address: {}".format(ip_addr))
+ return ip_addr
+ except Exception:
+ pass
+
warnings.warn(
"Failed to get the IP address, using 0.0.0.0 by default."
"The value can be set by the environment variable"
@@ -2061,13 +2067,6 @@ def configure_ipv6(dist_init_addr):
return port, host
-def rank0_log(msg: str):
- from sglang.srt.distributed import get_tensor_model_parallel_rank
-
- if get_tensor_model_parallel_rank() == 0:
- logger.info(msg)
-
-
def launch_dummy_health_check_server(host, port, enable_metrics):
import asyncio
@@ -2343,6 +2342,7 @@ def is_fa3_default_architecture(hf_config):
"Qwen3ForCausalLM",
"Qwen3MoeForCausalLM",
"Glm4MoeForCausalLM",
+ "Glm4vMoeForConditionalGeneration",
"Step3VLForConditionalGeneration",
}
return architectures[0] in default_archs
@@ -2599,6 +2599,50 @@ def dynamic_import(func_path: str):
return func
+def gc_object_counts():
+ import gc
+
+ g0 = len(gc.get_objects(0))
+ g1 = len(gc.get_objects(1))
+ g2 = len(gc.get_objects(2))
+ return g0, g1, g2
+
+
+def configure_gc_warning(warn_threshold_secs):
+ import gc
+
+ gc_start_time = {}
+
+ def gc_callback(phase, info):
+ gen = info.get("generation", "?")
+ if phase == "start":
+ gc_start_time[gen] = time.time()
+ elif phase == "stop":
+ duration = time.time() - gc_start_time.get(gen, time.time())
+ if duration > warn_threshold_secs:
+ g0, g1, g2 = gc_object_counts()
+ logger.warn(
+ f"LONG GARBAGE COLLECTION DETECTED | Generation {gen} | Duration: {duration:.4f}s | # Objects: gen0={g0}, gen1={g1}, gen2={g2} | "
+ f"This may cause latency jitter. Consider calling the freeze_gc API after sending a few warmup requests."
+ )
+
+ gc.callbacks.append(gc_callback)
+
+
+def freeze_gc(context: str):
+ import gc
+
+ g0_before, g1_before, g2_before = gc_object_counts()
+ gc.freeze()
+ g0_after, g1_after, g2_after = gc_object_counts()
+ logger.info(
+ f"Freezing GC in {context} process. "
+ f"gen0: {g0_before}->{g0_after}, "
+ f"gen1: {g1_before}->{g1_after}, "
+ f"gen2: {g2_before}->{g2_after}"
+ )
+
+
def configure_gc_logger():
logger.info("Enable GC Logger")
@@ -2754,6 +2798,10 @@ def wrapper(*args, **kwargs):
return decorator
+def get_origin_rid(rid):
+ return rid.split("_", 1)[1] if "_" in rid else rid
+
+
def apply_module_patch(target_module, target_function, wrappers):
original_module, original_function = parse_module_path(
target_module, target_function, False
@@ -2863,6 +2911,18 @@ def mxfp_supported():
return False
+@lru_cache(maxsize=1)
+def is_gfx95_supported():
+ """
+ Returns whether the current platform supports MX types.
+ """
+ if torch.version.hip:
+ gcn_arch = torch.cuda.get_device_properties(0).gcnArchName
+ return any(gfx in gcn_arch for gfx in ["gfx95"])
+ else:
+ return False
+
+
# LoRA-related constants and utilities
SUPPORTED_LORA_TARGET_MODULES = [
"q_proj",
@@ -2872,6 +2932,8 @@ def mxfp_supported():
"gate_proj",
"up_proj",
"down_proj",
+ "qkv_proj",
+ "gate_up_proj",
]
LORA_TARGET_ALL_MODULES = "all"
@@ -2966,3 +3028,22 @@ async def wait_for_zero(self):
@lru_cache(maxsize=1)
def is_triton_kernels_available() -> bool:
return importlib.util.find_spec("triton_kernels") is not None
+
+
+def check_cuda_result(raw_output):
+ import cuda.bindings.runtime as cuda_rt
+
+ err, *results = raw_output
+ if err != cuda_rt.cudaError_t.cudaSuccess:
+ raise Exception(f"CUDA error: {err}")
+
+ return results
+
+
+def numa_bind_to_node(node: int):
+ libnuma = ctypes.CDLL("libnuma.so")
+ if libnuma.numa_available() < 0:
+ raise SystemError("numa not available on this system")
+
+ libnuma.numa_run_on_node(ctypes.c_int(node))
+ libnuma.numa_set_localalloc()
diff --git a/python/sglang/srt/weight_sync/utils.py b/python/sglang/srt/weight_sync/utils.py
index 8f3c8adb788..f308207e286 100644
--- a/python/sglang/srt/weight_sync/utils.py
+++ b/python/sglang/srt/weight_sync/utils.py
@@ -6,7 +6,7 @@
from torch.distributed.tensor import DTensor
from sglang.srt.entrypoints.engine import Engine
-from sglang.srt.managers.tokenizer_manager import UpdateWeightsFromTensorReqInput
+from sglang.srt.managers.io_struct import UpdateWeightsFromTensorReqInput
from sglang.srt.model_executor.model_runner import LocalSerializedTensor
from sglang.srt.utils import MultiprocessingSerializer
diff --git a/python/sglang/test/attention/test_trtllm_mla_backend.py b/python/sglang/test/attention/test_trtllm_mla_backend.py
index 18a7f77ea5f..6f610baf039 100755
--- a/python/sglang/test/attention/test_trtllm_mla_backend.py
+++ b/python/sglang/test/attention/test_trtllm_mla_backend.py
@@ -41,6 +41,10 @@
"v_head_dim": 512,
"num_kv_heads": 1,
"layer_id": 0,
+ "tp_q_head_num": 128,
+ "tp_k_head_num": 128,
+ "prefill_head_dim": 192,
+ "prefill_v_head_dim": 128,
}
ROPE_BASE = 10000
@@ -92,7 +96,7 @@ def build_rotary_emb(config, device=None):
"description": "Medium-scale batch",
},
],
- "decode_output_match": [
+ "output_match": [
{
"name": "single_fp16",
"batch_size": 1,
@@ -208,6 +212,15 @@ def __init__(self, config):
self.kv_cache_dtype = config["kv_cache_dtype"]
self.page_size = config["page_size"]
+ # Server args stub - needed by attention backends
+ self.server_args = type(
+ "ServerArgs",
+ (),
+ {
+ "enable_dp_attention": False, # Default value for testing
+ },
+ )
+
# Model-config stub with MLA attributes
self.model_config = type(
"ModelConfig",
@@ -313,7 +326,7 @@ def _merge_config(self, test_case):
config.update(test_case)
return config
- def _create_model_components(self, config):
+ def _create_model_components(self, config, is_prefill=False):
"""Create model runners, backends, and layer for testing."""
# Create model runners
model_runner_trtllm = MockModelRunner(config)
@@ -323,14 +336,23 @@ def _create_model_components(self, config):
trtllm_backend = TRTLLMMLABackend(model_runner_trtllm)
reference_backend = FlashInferMLAAttnBackend(model_runner_reference)
+ head_dim = (
+ config["kv_lora_rank"] + config["qk_rope_head_dim"]
+ if not is_prefill
+ else config["prefill_head_dim"]
+ )
+ v_head_dim = (
+ config["v_head_dim"] if not is_prefill else config["prefill_v_head_dim"]
+ )
+
# Create RadixAttention layer
layer = RadixAttention(
num_heads=config["num_attention_heads"],
- head_dim=config["kv_lora_rank"] + config["qk_rope_head_dim"],
+ head_dim=head_dim,
scaling=model_runner_trtllm.model_config.scaling,
num_kv_heads=config["num_kv_heads"],
layer_id=config["layer_id"],
- v_head_dim=config["v_head_dim"],
+ v_head_dim=v_head_dim,
prefix="attn_mqa",
)
@@ -515,7 +537,7 @@ def test_decode_output_match(self):
"""Test that TRTLLM and FlashInfer MLA backends produce matching outputs."""
print(f"\nRunning decode output matching tests...")
- for test_case in TEST_CASES["decode_output_match"]:
+ for test_case in TEST_CASES["output_match"]:
with self.subTest(test_case=test_case["name"]):
print(f" Testing {test_case['name']}: {test_case['description']}")
@@ -833,7 +855,7 @@ def test_metadata_initialization(self):
# Test workspace properties
self.assertEqual(metadata.workspace.device.type, "cuda")
- self.assertEqual(metadata.workspace.dtype, torch.int8)
+ self.assertEqual(metadata.workspace.dtype, torch.uint8)
self.assertGreater(
metadata.workspace.numel(), 0, "Workspace should have non-zero size"
)
@@ -993,8 +1015,8 @@ def test_metadata_cuda_graph_compatibility(self):
)
# Verify CUDA graph buffers are allocated
- self.assertIsNotNone(backend.cuda_graph_kv_indices)
- self.assertIsNotNone(backend.cuda_graph_workspace)
+ self.assertIsNotNone(backend.decode_cuda_graph_kv_indices)
+ self.assertIsNotNone(backend.decode_cuda_graph_workspace)
# Test capture metadata
seq_lens = torch.full(
@@ -1090,6 +1112,157 @@ def test_metadata_consistency_across_calls(self):
self.assertIsNotNone(metadata_3.block_kv_indices)
self.assertEqual(metadata_3.block_kv_indices.shape[0], config["batch_size"])
+ def test_prefill_output_match_self_attention(self):
+ """Test prefill (forward) behavior of TRTLLM MLA backend vs reference."""
+ print(f"\nRunning prefill output tests...")
+
+ for test_case in TEST_CASES["output_match"][:2]: # Just a subset for speed
+ with self.subTest(test_case=test_case["name"]):
+ print(
+ f"Prefill Testing {test_case['name']}: {test_case['description']}"
+ )
+
+ config = self._merge_config(test_case)
+ batch_size = config["batch_size"]
+ max_seq_len = config["max_seq_len"]
+
+ # Create components
+ (
+ model_runner_trtllm,
+ model_runner_reference,
+ trtllm_backend,
+ reference_backend,
+ layer,
+ ) = self._create_model_components(config, is_prefill=True)
+
+ # Prefill uses full sequences
+ seq_lens = torch.full(
+ (batch_size,), max_seq_len, device=config["device"]
+ )
+
+ def _create_forward_batch_prefill(
+ batch_size,
+ seq_lens,
+ extend_prefix_lens,
+ backend,
+ model_runner,
+ config,
+ ):
+ """Create a forward batch for the given backend."""
+
+ fb = ForwardBatch(
+ batch_size=batch_size,
+ input_ids=torch.randint(
+ 0, 100, (batch_size, 1), device=config["device"]
+ ),
+ out_cache_loc=torch.arange(batch_size, device=config["device"]),
+ seq_lens_sum=int(seq_lens.sum().item()),
+ extend_prefix_lens=extend_prefix_lens,
+ extend_prefix_lens_cpu=extend_prefix_lens.cpu().int().tolist(),
+ extend_seq_lens_cpu=(seq_lens - extend_prefix_lens)
+ .cpu()
+ .int()
+ .tolist(),
+ forward_mode=ForwardMode.EXTEND,
+ req_pool_indices=torch.arange(
+ batch_size, device=config["device"]
+ ),
+ seq_lens=seq_lens,
+ seq_lens_cpu=seq_lens.cpu(),
+ attn_attend_prefix_cache=False,
+ mha_return_lse=False,
+ attn_backend=backend,
+ )
+ fb.req_to_token_pool = model_runner.req_to_token_pool
+ fb.token_to_kv_pool = model_runner.token_to_kv_pool
+
+ # Add position information for RoPE
+ fb.positions = torch.arange(batch_size, device=config["device"])
+
+ return fb
+
+ # Create forward batches
+ fb_trtllm = _create_forward_batch_prefill(
+ batch_size,
+ seq_lens.clone(),
+ torch.zeros(batch_size, device=config["device"], dtype=torch.int32),
+ trtllm_backend,
+ model_runner_trtllm,
+ config,
+ )
+ fb_reference = _create_forward_batch_prefill(
+ batch_size,
+ seq_lens.clone(),
+ torch.zeros(batch_size, device=config["device"], dtype=torch.int32),
+ reference_backend,
+ model_runner_reference,
+ config,
+ )
+
+ # Initialize metadata for both backends
+ trtllm_backend.init_forward_metadata(fb_trtllm)
+ reference_backend.init_forward_metadata(fb_reference)
+
+ # Create Q, K, V tensors for prefill
+ torch.manual_seed(config["seed_qkv"])
+
+ def _create_qkv_tensors_prefill(
+ batch_size, seq_len, config, dtype_override=None
+ ):
+ """Create Q, K, V tensors for prefill, using config for head_num and head_dim."""
+ device = config["device"]
+ dtype = dtype_override or config["dtype"]
+
+ total_tokens = batch_size * seq_len
+
+ tp_q_head_num = config["tp_q_head_num"]
+ tp_k_head_num = config["tp_k_head_num"]
+ head_dim = config["prefill_head_dim"]
+ v_head_dim = config["prefill_v_head_dim"]
+
+ q = torch.randn(
+ (total_tokens, tp_q_head_num * head_dim),
+ dtype=dtype,
+ device=device,
+ )
+ k = torch.randn(
+ (total_tokens, tp_k_head_num * head_dim),
+ dtype=dtype,
+ device=device,
+ )
+ v = torch.randn(
+ (total_tokens, tp_k_head_num * v_head_dim),
+ dtype=dtype,
+ device=device,
+ )
+
+ # Reshape as requested
+ q = q.view(-1, tp_q_head_num, head_dim)
+ k = k.view(-1, tp_k_head_num, head_dim)
+ v = v.view(-1, tp_k_head_num, v_head_dim)
+
+ return q, k, v
+
+ q, k, v = _create_qkv_tensors_prefill(batch_size, max_seq_len, config)
+ # Run prefill on both backends
+ out_trtllm = trtllm_backend.forward_extend(
+ q, k, v, layer, fb_trtllm, False
+ ).view(-1, layer.tp_q_head_num * layer.v_head_dim)
+ out_reference = reference_backend.forward_extend(
+ q, k, v, layer, fb_reference, False
+ )
+
+ tolerance = config.get("tolerance", 1e-2)
+ comparison_passed = compare_outputs(
+ out_trtllm, out_reference, tolerance=tolerance
+ )
+ self.assertTrue(
+ comparison_passed,
+ f"TRTLLM and Reference prefill outputs differ beyond tolerance. "
+ f"Config: {test_case['name']}, "
+ f"Max diff: {(out_trtllm - out_reference).abs().max().item()}",
+ )
+
if __name__ == "__main__":
unittest.main()
diff --git a/python/sglang/test/few_shot_gsm8k.py b/python/sglang/test/few_shot_gsm8k.py
index e9971fa90f1..7dafcd423f4 100644
--- a/python/sglang/test/few_shot_gsm8k.py
+++ b/python/sglang/test/few_shot_gsm8k.py
@@ -129,6 +129,7 @@ def few_shot_gsm8k(s, question):
return {
"accuracy": acc,
+ "invalid": invalid,
"latency": latency,
"output_throughput": output_throughput,
}
diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py
index ba1519951a8..8ce2e2e20a5 100644
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -231,11 +231,14 @@ def start_model_process(self, in_queue, out_queue, model_path, torch_dtype):
# Load the model and tokenizer
if self.model_type == "generation":
- config = AutoConfig.from_pretrained(model_path)
- if model_archs := getattr(config, "architectures"):
- model_cls = getattr(transformers, model_archs[0])
- else:
+ config = AutoConfig.from_pretrained(
+ model_path, trust_remote_code=self.trust_remote_code
+ )
+ if self.trust_remote_code:
model_cls = AutoModelForCausalLM
+ else:
+ model_arch = getattr(config, "architectures")[0]
+ model_cls = getattr(transformers, model_arch)
self.base_model = model_cls.from_pretrained(
model_path,
torch_dtype=torch_dtype,
@@ -488,7 +491,7 @@ def __init__(
tp_size: int = 1,
model_impl: str = "auto",
port: int = DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
- lora_paths: List[str] = None,
+ lora_paths: Optional[Union[List[str], List[dict[str, str]]]] = None,
max_loras_per_batch: int = 4,
attention_backend: Optional[str] = None,
prefill_attention_backend: Optional[str] = None,
@@ -502,6 +505,7 @@ def __init__(
mem_fraction_static: float = 0.65,
trust_remote_code: bool = False,
speculative_draft_model_path: Optional[str] = None,
+ speculative_draft_model_revision: Optional[str] = None,
speculative_algorithm: Optional[str] = None,
speculative_num_steps: Optional[int] = None,
speculative_eagle_topk: Optional[int] = None,
@@ -523,6 +527,9 @@ def __init__(
spec_kwargs = {}
if speculative_draft_model_path:
spec_kwargs["speculative_draft_model_path"] = speculative_draft_model_path
+ spec_kwargs["speculative_draft_model_revision"] = (
+ speculative_draft_model_revision
+ )
spec_kwargs["speculative_algorithm"] = speculative_algorithm
spec_kwargs["speculative_num_steps"] = speculative_num_steps
spec_kwargs["speculative_eagle_topk"] = speculative_eagle_topk
diff --git a/python/sglang/test/test_cutlass_moe.py b/python/sglang/test/test_cutlass_moe.py
index 496e6d4877d..56f276c8198 100755
--- a/python/sglang/test/test_cutlass_moe.py
+++ b/python/sglang/test/test_cutlass_moe.py
@@ -8,11 +8,21 @@
from sglang.srt.layers.moe.cutlass_moe import cutlass_fused_experts_fp8
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
+from sglang.srt.layers.moe.moe_runner.base import MoeRunnerConfig
+from sglang.srt.layers.moe.topk import StandardTopKOutput
+
+
+# Copy from: https://github.com/deepseek-ai/DeepGEMM/blob/main/deep_gemm/utils.py
+def calc_diff(x, y):
+ x, y = x.double(), y.double()
+ denominator = (x * x + y * y).sum()
+ sim = 2 * (x * y).sum() / denominator
+ return 1 - sim
def get_model_config(tp_size: int):
config = AutoConfig.from_pretrained(
- "deepseek-ai/deepseek-R1", trust_remote_code=True
+ "deepseek-ai/Deepseek-R1", trust_remote_code=True
)
E = config.n_routed_experts
topk = config.num_experts_per_tok
@@ -69,16 +79,11 @@ def run_test(tp_size, batch_size, model_config, check=False):
# --- Input Data ---
# Use bf16/fp16 for input activation based on model config
- x = torch.randn((batch_size, H), device="cuda", dtype=dtype) * 0.0001
+ x = torch.randn((batch_size, H), device="cuda", dtype=dtype)
# --- Weights (Generate in higher precision, then convert to FP8) ---
# Generate weights suitable for FP8 conversion (e.g., scaled appropriately)
- w1_hp = (
- torch.randn((E, I, H), device="cuda", dtype=torch.float32) * 0.00001 + 0.00001
- )
- w2_hp = (
- torch.randn((E, H, I // 2), device="cuda", dtype=torch.float32) * 0.00001
- + 0.00001
- )
+ w1_hp = torch.randn((E, I, H), device="cuda", dtype=torch.float32)
+ w2_hp = torch.randn((E, H, I // 2), device="cuda", dtype=torch.float32)
w1 = to_fp8(w1_hp)
w2 = to_fp8(w2_hp)
@@ -148,15 +153,31 @@ def run_test(tp_size, batch_size, model_config, check=False):
problem_sizes2,
)
+ topk_output = StandardTopKOutput(
+ topk_weights=topk_weights,
+ topk_ids=topk_ids,
+ router_logits=torch.randn(
+ (batch_size, topk), device=topk_weights.device, dtype=dtype
+ ),
+ )
+
+ moe_runner_config = MoeRunnerConfig(
+ num_experts=E,
+ top_k=topk,
+ hidden_size=H,
+ intermediate_size_per_partition=I,
+ params_dtype=dtype,
+ activation="silu",
+ inplace=False,
+ )
+
# Note: Triton expects non-transposed weights
triton_lambda = lambda: fused_experts(
x,
w1,
w2,
- topk_weights,
- topk_ids,
- inplace=False, # Use False for benchmarking to avoid side effects if run multiple times
- activation="silu", # Assuming SiLU activation common in MoEs
+ topk_output,
+ moe_runner_config,
use_fp8_w8a8=True,
w1_scale=w1_scale,
w2_scale=w2_scale,
@@ -221,34 +242,20 @@ def run_test(tp_size, batch_size, model_config, check=False):
x,
w1, # Original shape
w2, # Original shape
- topk_weights,
- topk_ids,
- inplace=False, # Important: Use False to get output tensor
- activation="silu",
+ topk_output,
+ moe_runner_config,
use_fp8_w8a8=True,
w1_scale=w1_scale,
w2_scale=w2_scale,
block_shape=block_shape,
)
- # Ensure outputs are same dtype for comparison
- y_cutlass = y_cutlass.to(dtype)
- y_triton = y_triton.to(dtype)
-
- abs_error = torch.abs(y_cutlass - y_triton)
- rel_error = abs_error / torch.clamp(torch.abs(y_triton), min=1e-2)
-
- max_abs_err = abs_error.max().item()
- max_rel_err = rel_error.max().item()
-
- print("y_cutlass:", y_cutlass[:, :10])
- print("y_triton:", y_triton[:, :10])
- print(f"Max absolute error: {max_abs_err:.6f}")
- print(f"Max relative error: {max_rel_err:.6f}")
+ diff = calc_diff(y_cutlass, y_triton)
+ print(f"Diff: {diff:.6f}")
# Tolerance might need adjustment based on FP8 specifics and kernel differences
# FP8 comparisons often require higher tolerance than FP16/BF16
- assert max_rel_err < 5e-1, f"Relative error too high! {max_rel_err}"
+ assert diff < 1e-4, f"Diff too high! {diff}"
print("Correctness check passed.")
@@ -266,7 +273,21 @@ def main(tp_size=8, batch_sizes=[1, 4, 8, 16, 32, 64, 128, 256, 512], check=Fals
"--batch-sizes",
type=int,
nargs="+",
- default=[1, 4, 8, 16, 32, 64, 128, 256, 512], # Adjusted default
+ default=[
+ 1,
+ 4,
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ 256,
+ 512,
+ 1024,
+ 2048,
+ 4096,
+ 8192,
+ ], # Adjusted default
help="List of batch sizes to test",
)
parser.add_argument("--check", action="store_true", help="Enable check mode")
diff --git a/python/sglang/test/test_cutlass_w4a8_moe.py b/python/sglang/test/test_cutlass_w4a8_moe.py
index 622941f006f..6706fc962c8 100644
--- a/python/sglang/test/test_cutlass_w4a8_moe.py
+++ b/python/sglang/test/test_cutlass_w4a8_moe.py
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
-from typing import Optional
+from typing import Literal, Optional
import pytest
import torch
@@ -25,7 +25,7 @@ def pack_int4_values_to_int8(int4_values_interleaved: torch.Tensor) -> torch.Ten
return packed_tensor.to(torch.int8)
-def pack_interleave(num_experts, ref_weight, ref_scale):
+def pack_interleave(num_experts, ref_weight, ref_scale, alignment=4):
n, k = ref_weight.shape[1], ref_weight.shape[2]
weight = pack_int4_values_to_int8(ref_weight.cpu()).cuda()
@@ -33,11 +33,16 @@ def pack_interleave(num_experts, ref_weight, ref_scale):
w_q = w_q.contiguous()
scale_interleaved = ref_scale.reshape(
- ref_scale.shape[0], ref_scale.shape[1], (ref_scale.shape[2] // 4), 4
+ ref_scale.shape[0],
+ ref_scale.shape[1],
+ (ref_scale.shape[2] // alignment),
+ alignment,
) # [E, N, K/4, 4]
scale_interleaved = scale_interleaved.permute(0, 2, 1, 3) # [E, K/4, N, 4]
scale_interleaved = scale_interleaved.reshape(
- ref_scale.shape[0], ref_scale.shape[2] // 4, ref_scale.shape[1] * 4
+ ref_scale.shape[0],
+ ref_scale.shape[2] // alignment,
+ ref_scale.shape[1] * alignment,
) # [E, K/4, N*4]
w_scale = scale_interleaved.contiguous()
@@ -48,12 +53,17 @@ def pack_interleave(num_experts, ref_weight, ref_scale):
@pytest.mark.parametrize("N", [2048])
@pytest.mark.parametrize("K", [7168])
@pytest.mark.parametrize("E", [256])
-@pytest.mark.parametrize("ep_size", [8])
+@pytest.mark.parametrize("tp_size", [8])
+@pytest.mark.parametrize("use_ep_moe", [True, False])
@pytest.mark.parametrize("topk", [8])
@pytest.mark.parametrize("group_size", [128])
@pytest.mark.parametrize("dtype", [torch.bfloat16])
-def test_cutlass_w4a8_moe(M, N, K, E, ep_size, topk, group_size, dtype):
- local_e = E // ep_size
+def test_cutlass_w4a8_moe(M, N, K, E, tp_size, use_ep_moe, topk, group_size, dtype):
+ if use_ep_moe:
+ local_e = E // tp_size
+ else: # tp mode
+ local_e = E
+ N = N // tp_size
debug = False
if debug:
@@ -87,7 +97,10 @@ def test_cutlass_w4a8_moe(M, N, K, E, ep_size, topk, group_size, dtype):
)
w1_q, w1_scale = pack_interleave(local_e, ref_weight_1, scale_1)
- w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2)
+ if use_ep_moe:
+ w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2)
+ else:
+ w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2, 1)
device = "cuda"
a_strides1 = torch.full((local_e, 3), K, device=device, dtype=torch.int64)
@@ -265,7 +278,9 @@ def ref(
gate, fc1 = fc1.chunk(2, dim=-1)
fc1 = fc1 * torch.nn.functional.silu(gate)
- act = (fc1 / pre_quant_scale_2.float()).to(torch.float8_e4m3fn)
+ act = torch.clamp((fc1 / pre_quant_scale_2.float()), -448.0, 448.0).to(
+ torch.float8_e4m3fn
+ )
act = act.to(dtype)
w2 = ref_weight_2[e_idx]
diff --git a/python/sglang/test/test_disaggregation_utils.py b/python/sglang/test/test_disaggregation_utils.py
new file mode 100644
index 00000000000..f61b71a9df5
--- /dev/null
+++ b/python/sglang/test/test_disaggregation_utils.py
@@ -0,0 +1,66 @@
+import time
+
+import requests
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+ DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+ CustomTestCase,
+ popen_with_error_check,
+)
+
+
+class TestDisaggregationBase(CustomTestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.process_lb, cls.process_decode, cls.process_prefill = None, None, None
+ pass
+
+ @classmethod
+ def launch_lb(cls):
+ lb_command = [
+ "python3",
+ "-m",
+ "sglang_router.launch_router",
+ "--pd-disaggregation",
+ "--mini-lb", # FIXME: remove this
+ "--prefill",
+ cls.prefill_url,
+ "--decode",
+ cls.decode_url,
+ "--host",
+ cls.base_host,
+ "--port",
+ cls.lb_port,
+ ]
+ print("Starting load balancer:", " ".join(lb_command))
+ cls.process_lb = popen_with_error_check(lb_command)
+ cls.wait_server_ready(cls.lb_url + "/health")
+
+ @classmethod
+ def wait_server_ready(cls, url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH):
+ start_time = time.perf_counter()
+ while True:
+ try:
+ response = requests.get(url)
+ if response.status_code == 200:
+ print(f"Server {url} is ready")
+ return
+ except Exception:
+ pass
+
+ if time.perf_counter() - start_time > timeout:
+ raise RuntimeError(f"Server {url} failed to start in {timeout}s")
+ time.sleep(1)
+
+ @classmethod
+ def tearDownClass(cls):
+ for process in [cls.process_lb, cls.process_decode, cls.process_prefill]:
+ if process:
+ try:
+ kill_process_tree(process.pid)
+ except Exception as e:
+ print(f"Error killing process {process.pid}: {e}")
+
+ # wait for 5 seconds
+ time.sleep(5)
diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
index 8b4cb903c20..0d3d769f419 100644
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -42,7 +42,8 @@
DEFAULT_SMALL_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE = "meta-llama/Llama-3.2-1B"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST = "Qwen/Qwen1.5-MoE-A2.7B"
+DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE = "Qwen/Qwen1.5-MoE-A2.7B"
+DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_CHAT = "Qwen/Qwen1.5-MoE-A2.7B-Chat"
# MLA test models
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST = "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
@@ -61,11 +62,21 @@
DEFAULT_MODEL_NAME_FOR_MODELOPT_QUANT_ACCURACY_TEST_FP8 = (
"nvidia/Llama-3.1-8B-Instruct-FP8"
)
+DEFAULT_MODEL_NAME_FOR_TEST_QWEN_FP8 = "Qwen/Qwen3-1.7B-FP8"
+DEFAULT_MODEL_NAME_FOR_TEST_FP8_WITH_MOE = "gaunernst/DeepSeek-V2-Lite-Chat-FP8"
+
+# W8A8 models
+DEFAULT_MODEL_NAME_FOR_TEST_W8A8 = "RedHatAI/Llama-3.2-3B-quantized.w8a8"
+DEFAULT_MODEL_NAME_FOR_TEST_W8A8_WITH_MOE = "nytopop/Qwen3-30B-A3B.w8a8"
# EAGLE
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmsys/sglang-EAGLE-llama2-chat-7B"
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3 = "jamesliu1/sglang-EAGLE3-Llama-3.1-Instruct-8B"
+DEFAULT_STANDALONE_SPECULATIVE_TARGET_MODEL_FOR_TEST = (
+ "meta-llama/Llama-3.1-8B-Instruct"
+)
+DEFAULT_STANDALONE_SPECULATIVE_DRAFT_MODEL_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
# Other use cases
DEFAULT_MODEL_NAME_FOR_TEST_LOCAL_ATTENTION = (
@@ -460,6 +471,25 @@ def try_cached_model(model_repo: str):
return model_dir if model_dir else model_repo
+def popen_with_error_check(command: list[str], allow_exit: bool = False):
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def _run_and_check():
+ stdout, stderr = process.communicate()
+
+ while process.poll() is None:
+ time.sleep(5)
+
+ if not allow_exit or process.returncode != 0:
+ raise Exception(
+ f"{command} exited with code {process.returncode}\n{stdout=}\n{stderr=}"
+ )
+
+ t = threading.Thread(target=_run_and_check)
+ t.start()
+ return process
+
+
def popen_launch_server(
model: str,
base_url: str,
diff --git a/python/sglang/utils.py b/python/sglang/utils.py
index 09f7916bc55..f6bf20c4294 100644
--- a/python/sglang/utils.py
+++ b/python/sglang/utils.py
@@ -5,7 +5,6 @@
import logging
import os
import random
-import signal
import socket
import subprocess
import sys
@@ -458,6 +457,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
NOTE: Typically, the server runs in a separate terminal.
In this notebook, we run the server and notebook code together, so their outputs are combined.
To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
+ To reduce the log length, we set the log level to warning for the server, the default log level is info.
We are running those notebooks in a CI environment, so the throughput is not representative of the actual performance.
"""
)
@@ -473,6 +473,10 @@ class TypeBasedDispatcher:
def __init__(self, mapping: List[Tuple[Type, Callable]]):
self._mapping = mapping
+ def __iadd__(self, other: "TypeBasedDispatcher"):
+ self._mapping.extend(other._mapping)
+ return self
+
def __call__(self, obj: Any):
for ty, fn in self._mapping:
if isinstance(obj, ty):
diff --git a/python/sglang/version.py b/python/sglang/version.py
index bdc1cd94747..722515271fb 100644
--- a/python/sglang/version.py
+++ b/python/sglang/version.py
@@ -1 +1 @@
-__version__ = "0.5.0rc2"
+__version__ = "0.5.2"
diff --git a/scripts/ci/amd_ci_exec.sh b/scripts/ci/amd_ci_exec.sh
index 411fe2a7566..3bd940eb1a5 100755
--- a/scripts/ci/amd_ci_exec.sh
+++ b/scripts/ci/amd_ci_exec.sh
@@ -1,6 +1,18 @@
#!/bin/bash
set -euo pipefail
+# Detect GPU family from hostname (e.g., linux-mi35x-gpu-1-xxxxx-runner-zzzzz)
+HOSTNAME_VALUE=$(hostname)
+GPU_FAMILY=""
+
+# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
+if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
+ GPU_FAMILY="${BASH_REMATCH[1]}"
+ echo "Detected GPU family from hostname: ${GPU_FAMILY}"
+else
+ echo "Warning: could not parse GPU family from '${HOSTNAME_VALUE}'"
+fi
+
WORKDIR="/sglang-checkout/test/srt"
declare -A ENV_MAP=(
[SGLANG_AMD_CI]=1
@@ -8,6 +20,11 @@ declare -A ENV_MAP=(
[SGLANG_USE_AITER]=1
)
+# Conditionally add GPU_ARCHS only for mi35x
+if [[ "${GPU_FAMILY}" == "mi35x" ]]; then
+ ENV_MAP[GPU_ARCHS]="gfx950"
+fi
+
# Parse -w/--workdir and -e ENV=VAL
while [[ $# -gt 0 ]]; do
case "$1" in
diff --git a/scripts/ci/amd_ci_install_dependency.sh b/scripts/ci/amd_ci_install_dependency.sh
index 3c8061351b3..518f0dde9ce 100755
--- a/scripts/ci/amd_ci_install_dependency.sh
+++ b/scripts/ci/amd_ci_install_dependency.sh
@@ -1,19 +1,44 @@
#!/bin/bash
set -euo pipefail
+HOSTNAME_VALUE=$(hostname)
+GPU_ARCH="mi30x" # default
+
+# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
+if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
+ GPU_ARCH="${BASH_REMATCH[1]}"
+ echo "Detected GPU architecture from hostname: ${GPU_ARCH}"
+else
+ echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}"
+fi
# Install the required dependencies in CI.
docker exec ci_sglang pip install --upgrade pip
docker exec ci_sglang pip uninstall sgl-kernel -y || true
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
-docker exec ci_sglang pip install -e "python[dev_hip]"
+
+case "${GPU_ARCH}" in
+ mi35x)
+ echo "Runner uses ${GPU_ARCH}; will fetch mi35x image."
+ docker exec ci_sglang pip install -e "python[dev_hip]" --no-deps # TODO: only for mi35x
+ # For lmms_evals evaluating MMMU
+ docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
+ docker exec -w /lmms-eval ci_sglang pip install -e . --no-deps # TODO: only for mi35x
+ ;;
+ mi30x|mi300|mi325)
+ echo "Runner uses ${GPU_ARCH}; will fetch mi30x image."
+ docker exec ci_sglang pip install -e "python[dev_hip]"
+ # For lmms_evals evaluating MMMU
+ docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
+ docker exec -w /lmms-eval ci_sglang pip install -e .
+ ;;
+ *)
+ echo "Runner architecture '${GPU_ARCH}' unrecognised;" >&2
+ ;;
+esac
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker exec -w /human-eval ci_sglang pip install -e .
-# For lmms_evals evaluating MMMU
-docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
-docker exec -w /lmms-eval ci_sglang pip install -e .
-
docker exec -w / ci_sglang mkdir -p /dummy-grok
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
docker cp ./dummy-grok ci_sglang:/
diff --git a/scripts/ci/amd_ci_start_container.sh b/scripts/ci/amd_ci_start_container.sh
index 352d9634789..a1f281c8d99 100755
--- a/scripts/ci/amd_ci_start_container.sh
+++ b/scripts/ci/amd_ci_start_container.sh
@@ -3,7 +3,7 @@ set -euo pipefail
# Get version from SGLang version.py file
SGLANG_VERSION_FILE="$(dirname "$0")/../../python/sglang/version.py"
-SGLANG_VERSION="v0.5.0rc0" # Default version, will be overridden if version.py is found
+SGLANG_VERSION="v0.5.0rc0" # Default version, will be overridden if version.py is found
if [ -f "$SGLANG_VERSION_FILE" ]; then
VERSION_FROM_FILE=$(python3 -c '
@@ -25,130 +25,102 @@ else
echo "Warning: version.py not found, using default version: $SGLANG_VERSION" >&2
fi
+
# Default base tags (can be overridden by command line arguments)
DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x"
DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x"
# Parse command line arguments
-MI30X_BASE_TAG="$DEFAULT_MI30X_BASE_TAG"
-MI35X_BASE_TAG="$DEFAULT_MI35X_BASE_TAG"
+MI30X_BASE_TAG="${DEFAULT_MI30X_BASE_TAG}"
+MI35X_BASE_TAG="${DEFAULT_MI35X_BASE_TAG}"
while [[ $# -gt 0 ]]; do
case $1 in
- --mi30x-base-tag)
- MI30X_BASE_TAG="$2"
- shift 2
- ;;
- --mi35x-base-tag)
- MI35X_BASE_TAG="$2"
- shift 2
- ;;
+ --mi30x-base-tag) MI30X_BASE_TAG="$2"; shift 2;;
+ --mi35x-base-tag) MI35X_BASE_TAG="$2"; shift 2;;
-h|--help)
echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
- echo " --mi30x-base-tag TAG Base tag for mi30x images (default: $DEFAULT_MI30X_BASE_TAG)"
- echo " --mi35x-base-tag TAG Base tag for mi35x images (default: $DEFAULT_MI35X_BASE_TAG)"
exit 0
;;
- *)
- echo "Unknown option $1"
- echo "Use --help for usage information"
- exit 1
- ;;
+ *) echo "Unknown option $1"; exit 1;;
esac
done
+
+
+# Detect GPU architecture from the Kubernetes runner hostname
+HOSTNAME_VALUE=$(hostname)
+GPU_ARCH="mi30x" # default
+
+# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
+if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
+ GPU_ARCH="${BASH_REMATCH[1]}"
+ echo "Detected GPU architecture from hostname: ${GPU_ARCH}"
+else
+ echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}"
+fi
+
+# Normalise / collapse architectures we don’t yet build specifically for
+case "${GPU_ARCH}" in
+ mi35x)
+ echo "Runner uses ${GPU_ARCH}; will fetch mi35x image."
+ ;;
+ mi30x|mi300|mi325)
+ echo "Runner uses ${GPU_ARCH}; will fetch mi30x image."
+ GPU_ARCH="mi30x"
+ ;;
+ *)
+ echo "Runner architecture '${GPU_ARCH}' unrecognised; defaulting to mi30x image." >&2
+ GPU_ARCH="mi30x"
+ ;;
+esac
+
+
# Set up DEVICE_FLAG based on Kubernetes pod info
-if [ -f "/etc/podinfo/gha-render-devices" ]; then
+if [[ -f /etc/podinfo/gha-render-devices ]]; then
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
else
DEVICE_FLAG="--device /dev/dri"
fi
-
-# Function to find latest available image for a given GPU architecture
+# Find the latest image
find_latest_image() {
local gpu_arch=$1
- local base_tag
-
- if [ "$gpu_arch" == "mi30x" ]; then
- base_tag="$MI30X_BASE_TAG"
- elif [ "$gpu_arch" == "mi35x" ]; then
- base_tag="$MI35X_BASE_TAG"
- else
- echo "Error: Unsupported GPU architecture '$gpu_arch'" >&2
- return 1
- fi
+ local base_tag days_back image_tag
- local days_back=0
-
- while [ $days_back -lt 7 ]; do
- local check_date=$(date -d "$days_back days ago" +%Y%m%d)
- local image_tag="${base_tag}-${check_date}"
+ case "${gpu_arch}" in
+ mi30x) base_tag="${MI30X_BASE_TAG}" ;;
+ mi35x) base_tag="${MI35X_BASE_TAG}" ;;
+ *) echo "Error: unsupported GPU architecture '${gpu_arch}'" >&2; return 1 ;;
+ esac
+ for days_back in {0..6}; do
+ image_tag="${base_tag}-$(date -d "${days_back} days ago" +%Y%m%d)"
echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2
-
- # Check if the image exists by trying to get its manifest
if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then
echo "Found available image: rocm/sgl-dev:${image_tag}" >&2
echo "rocm/sgl-dev:${image_tag}"
return 0
fi
-
- days_back=$((days_back + 1))
done
- echo "Error: No ${gpu_arch} image found in the last 7 days for version ${base_tag}" >&2
-
- # Final fallback to specific hardcoded images
- echo "Using final fallback images..." >&2
- if [ "$gpu_arch" == "mi30x" ]; then
- echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
- elif [ "$gpu_arch" == "mi35x" ]; then
+ echo "Error: no ${gpu_arch} image found in the last 7 days for base ${base_tag}" >&2
+ echo "Using hard-coded fallback…" >&2
+ if [[ "${gpu_arch}" == "mi35x" ]]; then
echo "rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812"
else
- echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812" # Default to mi30x
+ echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
fi
-
- return 0
}
-# Determine image finder and fallback based on runner
-# In Kubernetes, the hostname contains the GPU type (e.g., linux-mi300-gpu-1-bgg8r-runner-vknlb)
-# Extract the GPU type from hostname
-HOSTNAME_VALUE=$(hostname)
-RUNNER_NAME="unknown"
-
-if [[ "${HOSTNAME_VALUE}" =~ ^(linux-mi[0-9]+-gpu-[0-9]+) ]]; then
- RUNNER_NAME="${BASH_REMATCH[1]}"
- echo "Extracted runner from hostname: ${RUNNER_NAME}"
-else
- echo "Could not extract runner info from hostname: ${HOSTNAME_VALUE}"
-fi
-
-echo "The runner is: ${RUNNER_NAME}"
-GPU_ARCH="mi30x"
-
-# Check for mi350/mi355 runners
-if [[ "${RUNNER_NAME}" =~ ^linux-mi350-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi355-gpu-[0-9]+$ ]]; then
- echo "Runner is ${RUNNER_NAME}, will find mi35x image."
- GPU_ARCH="mi35x"
-# Check for mi300/mi325 runners
-elif [[ "${RUNNER_NAME}" =~ ^linux-mi300-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi325-gpu-[0-9]+$ ]]; then
- echo "Runner is ${RUNNER_NAME}, will find mi30x image."
-else
- echo "Runner type not recognized: '${RUNNER_NAME}'"
- echo "Defaulting to find mi30x image"
-fi
-
-# Find and pull the latest image
+# Pull and run the latest image
IMAGE=$(find_latest_image "${GPU_ARCH}")
-echo "Pulling Docker image: $IMAGE"
-docker pull "$IMAGE"
+echo "Pulling Docker image: ${IMAGE}"
+docker pull "${IMAGE}"
-# Run the container
-echo "Starting container: ci_sglang"
-docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
+echo "Launching container: ci_sglang"
+docker run -dt --user root --device=/dev/kfd ${DEVICE_FLAG} \
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
--ipc=host --group-add video \
--shm-size 32g \
@@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
--security-opt seccomp=unconfined \
-w /sglang-checkout \
--name ci_sglang \
- "$IMAGE"
+ "${IMAGE}"
diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh
index 3f1bae5245a..e007121a361 100755
--- a/scripts/ci/ci_install_dependency.sh
+++ b/scripts/ci/ci_install_dependency.sh
@@ -45,10 +45,16 @@ fi
# Install the main package
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
+# Install router for pd-disagg test
+SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
+
+
+SGL_KERNEL_VERSION=0.3.9.post2
if [ "$IS_BLACKWELL" = "1" ]; then
# TODO auto determine sgl-kernel version
- SGL_KERNEL_VERSION=0.3.2
- $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
+ $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu128-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
+else
+ $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
fi
# Show current packages
diff --git a/scripts/ci/ci_install_rust.sh b/scripts/ci/ci_install_rust.sh
index 519155dfbe8..ac042fc9adb 100755
--- a/scripts/ci/ci_install_rust.sh
+++ b/scripts/ci/ci_install_rust.sh
@@ -4,10 +4,10 @@ set -euxo pipefail
# Check if sudo is available
if command -v sudo >/dev/null 2>&1; then
sudo apt-get update
- sudo apt-get install -y libssl-dev pkg-config
+ sudo apt-get install -y libssl-dev pkg-config protobuf-compiler
else
apt-get update
- apt-get install -y libssl-dev pkg-config
+ apt-get install -y libssl-dev pkg-config protobuf-compiler
fi
# Install rustup (Rust installer and version manager)
@@ -21,3 +21,4 @@ source $HOME/.cargo/env
# Verify installation
rustc --version
cargo --version
+protoc --version
diff --git a/scripts/ci/npu_ci_install_dependency.sh b/scripts/ci/npu_ci_install_dependency.sh
index 5226071f40e..71cf46f7f66 100755
--- a/scripts/ci/npu_ci_install_dependency.sh
+++ b/scripts/ci/npu_ci_install_dependency.sh
@@ -51,5 +51,11 @@ ${PIP_INSTALL} attrs==24.2.0 numpy==1.26.4 scipy==1.13.1 decorator==5.1.1 psutil
wget -O "${TRITON_ASCEND_NAME}" "${TRITON_ASCEND_URL}" && ${PIP_INSTALL} "./${TRITON_ASCEND_NAME}"
+### Install sgl-kernel-npu
+SGL_KERNEL_NPU_TAG="20250901"
+git clone --depth 1 https://github.com/sgl-project/sgl-kernel-npu.git --branch ${SGL_KERNEL_NPU_TAG}
+(cd sgl-kernel-npu && bash ./build.sh -a deepep && pip install output/deep_ep*.whl && cd "$(pip show deep-ep | grep -E '^Location:' | awk '{print $2}')" && ln -s deep_ep/deep_ep_cpp*.so)
+
+
### Install SGLang
${PIP_INSTALL} -v -e "python[srt_npu]"
diff --git a/scripts/code_sync/copy_from_oss.py b/scripts/code_sync/copy_from_oss.py
new file mode 100644
index 00000000000..5590a73a090
--- /dev/null
+++ b/scripts/code_sync/copy_from_oss.py
@@ -0,0 +1,293 @@
+"""
+Sync code from OSS repo to the local repo and open a PR if changes exist.
+
+NOTE:
+1. You need to execute this script in the git root folder.
+2. A GH_TOKEN environment variable is required to create the pull request.
+ - see also https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens
+
+This script will:
+1. Clone the sgl-project/sglang repository (or use a local copy).
+2. Sync specified files and directories using rsync.
+3. Check if the sync operation resulted in any changes.
+4. If there are changes:
+ a. Create a new branch.
+ b. Commit and push the changes.
+ c. Open a pull request using the GitHub CLI (gh).
+
+Usage:
+# Run the full sync and PR creation process
+python3 scripts/copy_from_oss.py
+
+# Perform a dry run without making any actual changes
+python3 scripts/copy_from_oss.py --dry-run
+
+# Use a local directory as the source instead of cloning
+python3 scripts/copy_from_oss.py --local-dir ~/projects/sglang
+"""
+
+import argparse
+import datetime
+import os
+import shutil
+import subprocess
+import tempfile
+
+# --- Configuration Begin ---
+# List of folders and files to copy from the OSS repo.
+# Changes outside these paths will be ignored.
+folder_names = [
+ "3rdparty",
+ "assets",
+ "benchmark",
+ "docker",
+ "docs",
+ "examples",
+ "sgl-kernel",
+ "README.md",
+ "python/sglang/lang",
+ "python/sglang/srt",
+ "python/sglang/test",
+ "test/lang",
+ "test/srt",
+]
+
+private_repo = "your-org/sglang-private-repo"
+# --- Configuration End ---
+
+
+def write_github_step_summary(content):
+ if not os.environ.get("GITHUB_STEP_SUMMARY"):
+ return
+
+ with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
+ f.write(content)
+
+
+def check_dependencies():
+ """Check for required command-line tools."""
+ if not shutil.which("git"):
+ raise EnvironmentError("git is not installed or not in PATH.")
+ if not shutil.which("gh"):
+ raise EnvironmentError("GitHub CLI (gh) is not installed or not in PATH.")
+ print("✅ All dependencies (git, gh) are available.")
+
+
+def checkout_main(dry_run):
+ """Checkout to the main branch."""
+ commands = [
+ "git checkout main",
+ "git reset --hard",
+ ]
+ for cmd in commands:
+ print(f"Run: {cmd}")
+ if not dry_run:
+ try:
+ subprocess.run(cmd, shell=True, check=True, capture_output=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Git command failed: {e.stderr.decode()}")
+ raise
+ print("✅ Checkout the main branch.")
+
+
+def get_source_folder(args):
+ """
+ Prepare the source repository, either by cloning from GitHub or using a local directory.
+ Returns the path to the source repo root, a temporary directory path (if created),
+ and the short commit hash.
+ """
+ temp_dir = None
+ if args.local_dir:
+ oss_root = os.path.expanduser(args.local_dir)
+ if not os.path.exists(oss_root):
+ raise FileNotFoundError(
+ f"Specified local directory {oss_root} does not exist."
+ )
+ print(f"Using local directory as the source: {oss_root}")
+ else:
+ temp_dir = tempfile.mkdtemp()
+ oss_root = temp_dir
+ print(f"Created temporary directory: {oss_root}")
+
+ repo_url = "https://github.com/sgl-project/sglang.git"
+ try:
+ subprocess.run(
+ [
+ "git",
+ "clone",
+ "--single-branch",
+ "--branch",
+ "main",
+ repo_url,
+ temp_dir,
+ ],
+ check=True,
+ capture_output=True,
+ )
+ print(f"Successfully cloned repository to {temp_dir}")
+ except subprocess.CalledProcessError as e:
+ print(f"Error cloning repository: {e.stderr.decode()}")
+ raise
+
+ commit_hash = subprocess.run(
+ ["git", "-C", oss_root, "rev-parse", "HEAD"],
+ capture_output=True,
+ text=True,
+ check=True,
+ ).stdout.strip()[:8]
+ print(f"✅ Get source OSS code at commit: {commit_hash}")
+ return oss_root, temp_dir, commit_hash
+
+
+def sync_directories(oss_root, folder_names, dry_run):
+ """Sync specified directories from oss_root to current working directory."""
+ rsync_commands = []
+ for folder_name in folder_names:
+ target_name = f"{oss_root}/{folder_name}"
+ src_name = "./" + "/".join(folder_name.split("/")[:-1])
+ cmd = f"rsync -r --delete {target_name} {src_name}"
+ rsync_commands.append(cmd)
+
+ for cmd in rsync_commands:
+ try:
+ print(f"Run: {cmd}")
+ if not dry_run:
+ subprocess.run(cmd, shell=True, check=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Error executing command '{cmd}': {e}")
+ raise
+ print(f"✅ Sync all folders.")
+
+
+def check_for_changes():
+ """Check if there are any uncommitted git changes."""
+ # This command exits with 1 if there are changes, 0 otherwise.
+ result = subprocess.run(["git", "diff", "--quiet"])
+ return result.returncode != 0
+
+
+def create_and_push_branch(branch_name, commit_message, dry_run):
+ """Create a new branch, commit all changes, and push to origin."""
+ commands = [
+ f"git checkout -b {branch_name}",
+ "git config user.name 'github-actions[bot]'",
+ "git config user.email 'github-actions[bot]@users.noreply.github.com'",
+ "git add .",
+ f"git commit -m '{commit_message}'",
+ f"git push origin {branch_name} --force",
+ ]
+ print("\nCreating and pushing git branch...")
+ for cmd in commands:
+ print(f"Run: {cmd}")
+ if not dry_run:
+ try:
+ subprocess.run(cmd, shell=True, check=True, capture_output=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Git command failed: {e.stderr.decode()}")
+ raise
+
+
+def create_pull_request(branch_name, title, body, dry_run):
+ """Create a pull request using the GitHub CLI."""
+ gh_token = os.getenv("GH_TOKEN")
+ if not gh_token:
+ print(
+ "\n⚠️ Warning: GH_TOKEN environment variable not set. Skipping PR creation."
+ )
+ if not dry_run:
+ return
+
+ print("\nCreating pull request...")
+ command = [
+ "gh",
+ "pr",
+ "create",
+ "--base",
+ "main",
+ "--head",
+ branch_name,
+ "--repo",
+ private_repo,
+ "--title",
+ title,
+ "--body",
+ body,
+ ]
+ print(f"Run: {' '.join(command)}")
+ if not dry_run:
+ env = os.environ.copy()
+ env["GH_TOKEN"] = gh_token
+ try:
+ result = subprocess.run(
+ command, check=True, capture_output=True, text=True, env=env
+ )
+ pr_url = result.stdout.strip()
+ msg = f"✅ Successfully created pull request: {pr_url}"
+ print(msg)
+ write_github_step_summary(msg)
+ except subprocess.CalledProcessError as e:
+ print(f"Error creating pull request: {e.stderr}")
+ raise
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Copy code from OSS and open a PR if changes are detected."
+ )
+ parser.add_argument(
+ "--local-dir",
+ type=str,
+ help="Path to local SGLang directory to use instead of cloning from GitHub.",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="Dry run the script without executing git, rsync, or gh commands.",
+ )
+ args = parser.parse_args()
+
+ check_dependencies()
+ checkout_main(args.dry_run)
+
+ oss_root, temp_dir, oss_commit = get_source_folder(args)
+
+ try:
+ # Sync directories
+ sync_directories(oss_root, folder_names, args.dry_run)
+
+ # Check for changes and create PR if necessary
+ if not check_for_changes():
+ msg = "😴 No changes detected. The code is already in sync."
+ print(msg)
+ write_github_step_summary(msg)
+ return
+
+ print("✅ Changes detected. Proceeding to create a PR.")
+
+ current_date = datetime.datetime.now().strftime("%Y%m%d")
+ branch_name = f"copy-from-oss-{oss_commit}-{current_date}"
+ commit_message = f"Copy OSS code from {oss_commit} on {current_date}"
+ pr_title = (
+ f"[Automated PR] Copy OSS code from commit {oss_commit} on {current_date}"
+ )
+ pr_body = (
+ f"Copy OSS code from https://github.com/sgl-project/sglang/commit/{oss_commit} on {current_date}."
+ "\n\n---\n\n"
+ "*This is an automated PR created by scripts/copy_from_oss.py.*"
+ )
+
+ create_and_push_branch(branch_name, commit_message, args.dry_run)
+ create_pull_request(branch_name, pr_title, pr_body, args.dry_run)
+
+ finally:
+ # Remove temporary directory if it was created
+ if temp_dir:
+ try:
+ shutil.rmtree(temp_dir)
+ print(f"\nRemoved temporary directory: {temp_dir}")
+ except OSError as e:
+ print(f"Error removing temporary directory {temp_dir}: {e}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/code_sync/copy_to_oss.py b/scripts/code_sync/copy_to_oss.py
new file mode 100644
index 00000000000..cd931ffd6e5
--- /dev/null
+++ b/scripts/code_sync/copy_to_oss.py
@@ -0,0 +1,425 @@
+"""
+Sync a specific commit from the local private repo to the OSS upstream and open a PR.
+
+NOTE:
+1. You need to execute this script in the git root folder.
+2. A GH_TOKEN environment variable is required to create the pull request.
+ - see also https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens
+
+This script will:
+1. Take a commit hash as an argument (or use the latest commit by default).
+2. Create a patch for that commit.
+3. Filter the patch to only include changes in specified directories.
+4. Clone the sgl-project/sglang repository.
+5. Create a new branch in the OSS repo.
+6. Apply the filtered patch, commit, and force push.
+7. Open a pull request to the OSS repo using the GitHub CLI (gh).
+
+Usage:
+# Sync the latest commit from the current branch
+python3 scripts/copy_to_oss.py
+
+# Run the full sync and PR creation process for a given commit
+python3 scripts/copy_to_oss.py --commit
+
+# Perform a dry run without making any actual changes
+python3 scripts/copy_to_oss.py --commit --dry-run
+"""
+
+import argparse
+import datetime
+import os
+import shutil
+import subprocess
+import tempfile
+
+# --- Configuration Begin ---
+# List of folders and files to copy to the OSS repo.
+# Changes outside these paths will be ignored.
+folder_names = [
+ "3rdparty",
+ "assets",
+ "benchmark",
+ "docker",
+ "docs",
+ "examples",
+ "sgl-kernel",
+ "README.md",
+ "python/sglang/lang",
+ "python/sglang/srt",
+ "python/sglang/test",
+ "test/lang",
+ "test/srt",
+]
+
+# --- Configuration End ---
+
+
+def write_github_step_summary(content):
+ if not os.environ.get("GITHUB_STEP_SUMMARY"):
+ return
+
+ with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
+ f.write(content)
+
+
+def get_commit_info(commit_ref):
+ """
+ Retrieves the hash and message of a specific commit.
+
+ Args:
+ commit_ref (str): The commit hash, tag, or branch to inspect (e.g., 'HEAD').
+
+ Returns:
+ A tuple containing the (commit_hash, commit_message),
+ or (None, None) if an error occurs.
+ """
+ try:
+ # Use a custom format to get the hash (%H) and the full message (%B)
+ # separated by a null character for safe parsing.
+ command = ["git", "log", "-1", f"--pretty=%H%x00%B", commit_ref]
+ result = subprocess.run(
+ command, capture_output=True, text=True, check=True, encoding="utf-8"
+ )
+
+ # Split the output by the null character separator
+ commit_hash, commit_message = result.stdout.strip().split("\x00", 1)
+ return commit_hash, commit_message
+
+ except FileNotFoundError:
+ print("❌ Error: 'git' command not found. Is Git installed and in your PATH?")
+ except subprocess.CalledProcessError as e:
+ print(f"❌ Error getting commit info for '{commit_ref}': {e.stderr.strip()}")
+ print(
+ "Hint: Make sure you are running this from within a Git repository and the commit exists."
+ )
+
+ return None, None
+
+
+def check_dependencies():
+ """Check for required command-line tools."""
+ if not shutil.which("git"):
+ raise EnvironmentError("git is not installed or not in PATH.")
+ if not shutil.which("gh"):
+ raise EnvironmentError("GitHub CLI (gh) is not installed or not in PATH.")
+ print("✅ All dependencies (git, gh) are available.")
+
+
+def create_filtered_patch(commit_hash, dry_run):
+ """
+ Create a patch file for the given commit, containing only changes
+ to files and directories specified in `folder_names`.
+ """
+ print(f"Creating a filtered patch for commit {commit_hash}")
+
+ try:
+ # Get the list of all files changed in the commit
+ changed_files_raw = subprocess.run(
+ ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash],
+ capture_output=True,
+ text=True,
+ check=True,
+ ).stdout
+ changed_files = changed_files_raw.strip().split("\n")
+
+ # Filter the list of files
+ relevant_files = [
+ f for f in changed_files if any(f.startswith(path) for path in folder_names)
+ ]
+
+ if not relevant_files:
+ msg = "\n😴 No relevant file changes found in this commit. Exiting."
+ print(msg)
+ write_github_step_summary(msg)
+ return None, None
+
+ print("Found relevant changes in the following files:")
+ for f in relevant_files:
+ print(f" - {f}")
+
+ # Create a patch containing only the changes for the relevant files
+ patch_command = [
+ "git",
+ "format-patch",
+ "--stdout",
+ f"{commit_hash}^..{commit_hash}",
+ "--",
+ ] + relevant_files
+
+ print(f"Run: {' '.join(patch_command)}")
+
+ patch_content = subprocess.run(
+ patch_command, capture_output=True, text=True, check=True
+ ).stdout
+
+ # Save the patch to a temporary file
+ patch_file = tempfile.NamedTemporaryFile(
+ mode="w", delete=False, suffix=".patch", encoding="utf-8"
+ )
+ patch_file.write(patch_content)
+ patch_file.close()
+
+ print(f"✅ Filtered patch created successfully at: {patch_file.name}")
+ return patch_file.name, relevant_files
+
+ except subprocess.CalledProcessError as e:
+ print(f"Error creating patch: {e.stderr}")
+ raise
+
+
+def get_oss_repo(dry_run):
+ """
+ Clones the OSS repository into a temporary directory.
+ Returns the path to the repo root and the temp directory itself.
+ """
+ gh_token = os.getenv("GH_TOKEN")
+ if not gh_token:
+ print("⚠️ Warning: GH_TOKEN environment variable not set. Skipping PR creation.")
+ if not dry_run:
+ return
+
+ temp_dir = tempfile.mkdtemp()
+ oss_root = os.path.join(temp_dir, "sglang")
+ print(f"\nCreated temporary directory for OSS repo: {temp_dir}")
+
+ repo_url = f"https://{gh_token}@github.com/sgl-project/sglang.git"
+ command = ["git", "clone", "--branch", "main", repo_url, oss_root]
+
+ print(f"Run: {' '.join(command)}")
+ if not dry_run:
+ try:
+ subprocess.run(command, check=True, capture_output=True)
+ print(f"✅ Successfully cloned repository to {oss_root}")
+ except subprocess.CalledProcessError as e:
+ print(f"Error cloning repository: {e.stderr.decode()}")
+ shutil.rmtree(temp_dir)
+ raise
+
+ return oss_root, temp_dir
+
+
+def apply_patch_and_push(oss_root, patch_file, branch_name, commit_message, dry_run):
+ """
+ In the OSS repo, create a branch, apply the patch, commit, and push.
+ """
+ print("\nApplying patch and pushing to OSS repo...")
+
+ original_cwd = os.getcwd()
+ if not dry_run:
+ os.chdir(oss_root)
+
+ try:
+ # Define commands as lists to avoid shell injection issues
+ commands_to_run = [
+ ["git", "checkout", "-b", branch_name],
+ ["git", "apply", patch_file],
+ ["git", "config", "user.name", "github-actions[bot]"],
+ [
+ "git",
+ "config",
+ "user.email",
+ "github-actions[bot]@users.noreply.github.com",
+ ],
+ ["git", "add", "."],
+ ]
+
+ for cmd_list in commands_to_run:
+ print(f"Run: {' '.join(cmd_list)}")
+ if not dry_run:
+ subprocess.run(cmd_list, check=True, capture_output=True, text=True)
+
+ # Handle commit separately to pass multi-line message safely via stdin
+ commit_cmd = ["git", "commit", "-F", "-"]
+ print(f"Run: {' '.join(commit_cmd)}")
+ if not dry_run:
+ print(f"Commit Message:\n---\n{commit_message}\n---")
+ subprocess.run(
+ commit_cmd,
+ input=commit_message,
+ text=True,
+ check=True,
+ capture_output=True,
+ )
+
+ # Push the changes
+ push_cmd = ["git", "push", "origin", branch_name, "--force"]
+ print(f"Run: {' '.join(push_cmd)}")
+ if not dry_run:
+ subprocess.run(push_cmd, check=True, capture_output=True, text=True)
+
+ except subprocess.CalledProcessError as e:
+ print(f"Git command failed: {e.stderr}")
+ raise
+ finally:
+ if not dry_run:
+ os.chdir(original_cwd)
+
+ print("✅ Branch created, patch applied, and pushed successfully.")
+
+
+def create_pull_request(oss_root, branch_name, title, body, dry_run):
+ """Create a pull request in the OSS repo using the GitHub CLI."""
+ gh_token = os.getenv("GH_TOKEN")
+ if not gh_token:
+ print("⚠️ Warning: GH_TOKEN environment variable not set. Skipping PR creation.")
+ if not dry_run:
+ return
+
+ print("\nCreating pull request...")
+ command = [
+ "gh",
+ "pr",
+ "create",
+ "--base",
+ "main",
+ "--head",
+ branch_name,
+ "--repo",
+ "sgl-project/sglang",
+ "--title",
+ title,
+ "--body",
+ body,
+ ]
+
+ print(f"Run: {' '.join(command)}")
+ if not dry_run:
+ env = os.environ.copy()
+ env["GH_TOKEN"] = gh_token
+ try:
+ result = subprocess.run(
+ command,
+ check=True,
+ capture_output=True,
+ text=True,
+ env=env,
+ cwd=oss_root,
+ )
+ msg = f"✅ Successfully created pull request: {result.stdout.strip()}"
+ print(msg)
+ write_github_step_summary(msg)
+ except subprocess.CalledProcessError as e:
+ print(f"Error creating pull request: {e.stderr}")
+ # Check if a PR already exists
+ if "A pull request for" in e.stderr and "already exists" in e.stderr:
+ print("ℹ️ A PR for this branch likely already exists.")
+ else:
+ raise
+
+
+def get_commit_author(commit_hash):
+ """Get the author name and email of a commit."""
+ try:
+ author_name = subprocess.run(
+ ["git", "show", "-s", "--format=%an", commit_hash],
+ capture_output=True,
+ text=True,
+ check=True,
+ ).stdout.strip()
+ author_email = subprocess.run(
+ ["git", "show", "-s", "--format=%ae", commit_hash],
+ capture_output=True,
+ text=True,
+ check=True,
+ ).stdout.strip()
+ return author_name, author_email
+ except subprocess.CalledProcessError as e:
+ print(f"Error getting commit author for {commit_hash}: {e.stderr}")
+ raise
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Copy a commit from the private repo to OSS and open a PR."
+ )
+ parser.add_argument(
+ "--commit",
+ type=str,
+ default="LAST",
+ help="The commit hash to sync. Defaults to 'LAST' to use the latest commit.",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="Dry run the script without executing git, rsync, or gh commands.",
+ )
+ args = parser.parse_args()
+
+ check_dependencies()
+
+ commit_ref = "HEAD" if args.commit == "LAST" else args.commit
+ commit_hash, original_commit_message = get_commit_info(commit_ref)
+
+ if not commit_hash:
+ return # Exit if we couldn't get commit info
+
+ # Display the details of the commit being processed
+ if args.commit == "LAST":
+ summary = (
+ f"\nℹ️ No commit specified. Using the last commit:\n"
+ f" - **Hash:** `{commit_hash}`\n"
+ f" - **Message:** {original_commit_message}\n\n"
+ )
+ else:
+ summary = (
+ f"\nℹ️ Using specified commit:\n"
+ f" - **Hash:** `{commit_hash}`\n"
+ f" - **Message:** {original_commit_message}\n\n"
+ )
+ print(summary)
+ write_github_step_summary(summary)
+
+ short_hash = commit_hash[:8]
+
+ patch_file = None
+ temp_dir = None
+ try:
+ # 1. Create a filtered patch from the local repo
+ patch_file, relevant_files = create_filtered_patch(commit_hash, args.dry_run)
+ if not patch_file:
+ return
+
+ # 2. Get the OSS repo
+ oss_root, temp_dir = get_oss_repo(args.dry_run)
+
+ # 3. Get original commit author for the co-author line
+ author_name, author_email = get_commit_author(commit_hash)
+
+ # 4. Prepare content for the commit and PR based on changed files
+ file_list_str = "\n".join([f"- {f}" for f in relevant_files])
+ filename_list_str = ", ".join([f.split("/")[-1] for f in relevant_files])
+ if len(filename_list_str) > 40:
+ filename_list_str = filename_list_str[:40] + "..."
+ current_date = datetime.datetime.now().strftime("%Y%m%d")
+ pr_title = f"[Auto Sync] Update {filename_list_str} ({current_date})"
+ pr_body = (
+ f"Sync changes from commit `{short_hash}`.\n\n"
+ f"**Relevant Files Changed:**\n{file_list_str}"
+ "\n\n---\n\n"
+ "*This is an automated PR created by a script.*"
+ )
+
+ # 5. Create branch, apply patch, and push
+ branch_name = f"sync-{short_hash}-{current_date}"
+ co_author_line = f"Co-authored-by: {author_name} <{author_email}>"
+ commit_message = f"{pr_title}\n\n{co_author_line}"
+ apply_patch_and_push(
+ oss_root, patch_file, branch_name, commit_message, args.dry_run
+ )
+
+ # 6. Create Pull Request
+ create_pull_request(oss_root, branch_name, pr_title, pr_body, args.dry_run)
+
+ finally:
+ # Cleanup temporary files
+ if patch_file and os.path.exists(patch_file):
+ os.remove(patch_file)
+ print(f"\nRemoved temporary patch file: {patch_file}")
+ if temp_dir and os.path.exists(temp_dir):
+ shutil.rmtree(temp_dir)
+ print(f"Removed temporary directory: {temp_dir}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/code_sync/guideline.md b/scripts/code_sync/guideline.md
new file mode 100644
index 00000000000..52f08eb4b0a
--- /dev/null
+++ b/scripts/code_sync/guideline.md
@@ -0,0 +1,27 @@
+### Sync Code Between OSS and Private Fork
+
+You can use the following principles and tools to sync the code between a private fork and the OSS repo [sgl-project/sglang](https://github.com/sgl-project/sglang/tree/main).
+It learns from [Copybara](https://github.com/google/copybara), a tool used at Google for maintaining open-source code synchronization.
+
+## Principals
+
+- The core folders (e.g., `python/sglang/srt`) are 100% mirrored between the private fork and OSS repo.
+- The OSS repo is the single source of truth. If one commit changes `python/sglang/srt` in the private repo, the change should be synced to the OSS repo as soon as possible with the action B below.
+- The common code (e.g., base classes, well-known techniques in the industry without private secrets) goes to `python/sglang/srt`. The private-specific code (e.g., with private-specific features, confidential info) goes to `python/sglang/private` .
+- Anytime you want to make private changes to a file or class under `python/sglang/srt`, duplicate the file and move it under `python/sglang/private`. You can achieve code reuse by importing and inheriting.
+
+## How to sync the code bidirectionally
+### Action A: Copy code from OSS to private
+
+- We can run this action: [Open A PR to Copy Code From OSS](https://github.com/sgl-project/sglang/tree/main/.github/workflows/open-pr-copy-from-oss.yml)
+ - It opens a PR to copy all files under certain folders (e.g., `python/sglang/srt` , `test/srt` , `sgl-kernel` ) from the OSS main branch to the private fork.
+ - Since the OSS repo is the single source of truth, this action copies files and overwrites any changes in the private fork. To prevent the private changes from being overwritten, you need to ensure all private changes are merged into the OSS repo before running this action.
+- This action will be run automatically every day and can also be triggered manually.
+
+### Action B: Copy diff from private to OSS
+
+- We can run this action: [Open A PR to Copy Code To OSS](https://github.com/sgl-project/sglang/tree/main/.github/workflows/open-pr-copy-to-oss.yml)
+ - It opens a PR to apply the diff of one specific commit of the private fork to the OSS main branch. It will only pick the changes under certain folders (e.g., `python/sglang/srt` , `test/srt` , `sgl-kernel` ) and ignore changes under private folders (e.g., `python/sglang/private` )
+ - For example, you can have a PR that changes both `python/sglang/srt` and `python/sglang/private/srt`. Once you merge the PR into the private repo, `python/sglang/srt` becomes desynced between the two repos. You need to run this action on your merge commit immediately to open a PR to send your diff to the OSS repo. Then, we need to merge the OSS PR as soon as possible. Once your OSS PR is merged, we can run action A again.
+ - Action A copies files directly, but Action B applies diff. This is because OSS is the source of truth; action A can just copy files. Action B cannot copy, so it uses diff instead.
+- This action currently needs a manual trigger in order to prevent incidental code leaks. One can also consider making it automatic.
diff --git a/scripts/code_sync/install_github_cli.sh b/scripts/code_sync/install_github_cli.sh
new file mode 100755
index 00000000000..2ef1db02395
--- /dev/null
+++ b/scripts/code_sync/install_github_cli.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# Check if gh is installed before attempting to install it
+if ! command -v gh &> /dev/null
+then
+echo "GitHub CLI not found. Installing now..."
+(type -p wget >/dev/null || ( apt update && apt install wget -y)) \
+&& mkdir -p -m 755 /etc/apt/keyrings \
+&& out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+&& cat $out | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
+&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
+&& mkdir -p -m 755 /etc/apt/sources.list.d \
+&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
+&& apt update \
+&& apt install gh -y
+else
+echo "GitHub CLI is already installed. Skipping installation."
+fi
diff --git a/scripts/playground/bench_speculative.py b/scripts/playground/bench_speculative.py
index f16ff4460a2..c89e99242f1 100644
--- a/scripts/playground/bench_speculative.py
+++ b/scripts/playground/bench_speculative.py
@@ -16,8 +16,14 @@
import numpy as np
import requests
+from transformers import AutoTokenizer
-from sglang.bench_serving import DatasetRow, benchmark, set_global_args
+from sglang.bench_serving import (
+ DatasetRow,
+ benchmark,
+ sample_mmmu_requests,
+ set_global_args,
+)
from sglang.srt.server_args import ServerArgs
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -48,20 +54,33 @@ def encode(self, text: str, add_special_tokens: bool = False):
return []
-def send_one_batch(base_url, num_prompts, batch_size):
- padded_prompts = (prompts * ((num_prompts + len(prompts) - 1) // len(prompts)))[
- :num_prompts
- ]
-
+def send_one_batch(base_url, num_prompts, batch_size, tokenizer, is_multimodal):
# format: (prompt, input_len, output len). We set input_len as a dummy value 0.
- input_requests: List[DatasetRow] = [DatasetRow(p, 0, 512) for p in padded_prompts]
+ if is_multimodal:
+ input_requests = sample_mmmu_requests(
+ num_prompts,
+ tokenizer,
+ 512,
+ apply_chat_template=False,
+ )
+ backend = "sglang-oai-chat"
+ api_url = f"{base_url}/v1/chat/completions"
+ else:
+ padded_prompts = (prompts * ((num_prompts + len(prompts) - 1) // len(prompts)))[
+ :num_prompts
+ ]
+ input_requests: List[DatasetRow] = [
+ DatasetRow(p, 0, 512) for p in padded_prompts
+ ]
+ backend = "sglang"
+ api_url = f"{base_url}/generate"
# We need to set some dummy values in order to call `benchmark` below.
args = SimpleNamespace(
disable_ignore_eos=False,
disable_stream=False,
return_logprob=False,
- backend="sglang",
+ backend=backend,
dataset_name="custom",
num_prompts=None,
sharegpt_output_len=None,
@@ -73,13 +92,12 @@ def send_one_batch(base_url, num_prompts, batch_size):
output_details=False,
)
set_global_args(args)
- tokenizer = FakeTokenizer()
# Run benchmark
results = asyncio.run(
benchmark(
- backend="sglang",
- api_url=f"{base_url}/generate",
+ backend=backend,
+ api_url=api_url,
base_url=base_url,
model_id="default",
tokenizer=tokenizer,
@@ -143,8 +161,6 @@ def main(args, server_args):
other_args = []
else:
other_args = [
- "--speculative-algorithm",
- "EAGLE",
"--speculative-num-steps",
steps,
"--speculative-eagle-topk",
@@ -157,6 +173,8 @@ def main(args, server_args):
[
"--speculative-draft-model-path",
server_args.speculative_draft_model_path,
+ "--speculative-algorithm",
+ server_args.speculative_algorithm,
]
)
@@ -207,13 +225,23 @@ def main(args, server_args):
},
)
+ tokenizer = AutoTokenizer.from_pretrained(
+ args.model_path, trust_remote_code=server_args.trust_remote_code
+ )
+
try:
# Warmup
- send_one_batch(base_url, batch_size, batch_size)
+ send_one_batch(
+ base_url, batch_size, batch_size, tokenizer, args.is_multimodal
+ )
# Benchmark
acc_length, step_time, speed, completion_tokens = send_one_batch(
- base_url, max(args.num_prompts, batch_size), batch_size
+ base_url,
+ max(args.num_prompts, batch_size),
+ batch_size,
+ tokenizer,
+ args.is_multimodal,
)
finally:
kill_process_tree(process.pid)
@@ -273,6 +301,7 @@ def main(args, server_args):
parser.add_argument("--start", type=int, default=0)
parser.add_argument("--end", type=int)
parser.add_argument("--output", type=str, default="output.jsonl")
+ parser.add_argument("--is-multimodal", action="store_true", default=False)
args = parser.parse_args()
server_args: ServerArgs = ServerArgs.from_cli_args(args)
diff --git a/scripts/playground/frontend_reasoning.ipynb b/scripts/playground/frontend_reasoning.ipynb
index c0ce4910ceb..fcdce25aba2 100644
--- a/scripts/playground/frontend_reasoning.ipynb
+++ b/scripts/playground/frontend_reasoning.ipynb
@@ -13,63 +13,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/workspaces/sglang/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2025-05-05 17:53:32] server_args=ServerArgs(model_path='Qwen/Qwen3-4B', tokenizer_path='Qwen/Qwen3-4B', tokenizer_mode='auto', skip_tokenizer_init=False, enable_tokenizer_batch_encode=False, load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization=None, quantization_param_path=None, context_length=None, device='cuda', served_model_name='Qwen/Qwen3-4B', chat_template=None, completion_template=None, is_embedding=False, revision=None, host='0.0.0.0', port=38475, mem_fraction_static=0.88, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='fcfs', schedule_conservativeness=1.0, cpu_offload_gb=0, page_size=1, tp_size=1, pp_size=1, max_micro_batch_size=None, stream_interval=1, stream_output=False, random_seed=376691526, constrained_json_whitespace_pattern=None, watchdog_timeout=300, dist_timeout=None, download_dir=None, base_gpu_id=0, gpu_id_step=1, log_level='info', log_level_http=None, log_requests=False, log_requests_level=0, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_path='sglang_storage', enable_cache_report=False, reasoning_parser='qwen3', dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, lora_backend='triton', attention_backend=None, sampling_backend='flashinfer', grammar_backend='xgrammar', speculative_algorithm=None, speculative_draft_model_path=None, speculative_num_steps=None, speculative_eagle_topk=None, speculative_num_draft_tokens=None, speculative_accept_threshold_single=1.0, speculative_accept_threshold_acc=1.0, speculative_token_map=None, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, enable_nccl_nvls=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, enable_multimodal=None, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_deepep_moe=False, deepep_mode='auto', enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=None, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None, enable_hierarchical_cache=False, hicache_ratio=2.0, hicache_size=0, hicache_write_policy='write_through_selective', flashinfer_mla_disable_ragged=False, warmups=None, moe_dense_tp_size=None, n_share_experts_fusion=0, disable_chunked_prefix_cache=False, disable_fast_image_processor=False, debug_tensor_dump_output_folder=None, debug_tensor_dump_input_file=None, debug_tensor_dump_inject=False, disaggregation_mode='null', disaggregation_bootstrap_port=8998, disaggregation_transfer_backend='mooncake', disaggregation_ib_device=None)\n",
- "[2025-05-05 17:53:38] Attention backend not set. Use flashinfer backend by default.\n",
- "[2025-05-05 17:53:38] Init torch distributed begin.\n",
- "[2025-05-05 17:53:38] Init torch distributed ends. mem usage=0.00 GB\n",
- "[2025-05-05 17:53:38] Load weight begin. avail mem=43.89 GB\n",
- "[2025-05-05 17:53:39] Using model weights format ['*.safetensors']\n",
- "Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00, ?it/s]\n",
- "Loading safetensors checkpoint shards: 67% Completed | 2/3 [00:00<00:00, 4.06it/s]\n",
- "Loading safetensors checkpoint shards: 100% Completed | 3/3 [00:01<00:00, 2.52it/s]\n",
- "Loading safetensors checkpoint shards: 100% Completed | 3/3 [00:01<00:00, 2.73it/s]\n",
- "\n",
- "[2025-05-05 17:53:40] Load weight end. type=Qwen3ForCausalLM, dtype=torch.bfloat16, avail mem=36.25 GB, mem usage=7.63 GB.\n",
- "[2025-05-05 17:53:40] KV Cache is allocated. #tokens: 225647, K size: 15.49 GB, V size: 15.49 GB\n",
- "[2025-05-05 17:53:40] Memory pool end. avail mem=4.71 GB\n",
- "2025-05-05 17:53:41,152 - INFO - flashinfer.jit: Prebuilt kernels not found, using JIT backend\n",
- "[2025-05-05 17:53:41] Capture cuda graph begin. This can take up to several minutes. avail mem=4.09 GB\n",
- "[2025-05-05 17:53:41] Capture cuda graph bs [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160]\n",
- "Capturing batches (avail_mem=4.06 GB): 0%| | 0/23 [00:00, ?it/s]2025-05-05 17:53:41,620 - INFO - flashinfer.jit: Loading JIT ops: batch_decode_with_kv_cache_dtype_q_bf16_dtype_kv_bf16_dtype_o_bf16_dtype_idx_i32_head_dim_qk_128_head_dim_vo_128_posenc_0_use_swa_False_use_logits_cap_False\n",
- "2025-05-05 17:53:41,642 - INFO - flashinfer.jit: Finished loading JIT ops: batch_decode_with_kv_cache_dtype_q_bf16_dtype_kv_bf16_dtype_o_bf16_dtype_idx_i32_head_dim_qk_128_head_dim_vo_128_posenc_0_use_swa_False_use_logits_cap_False\n",
- "Capturing batches (avail_mem=2.68 GB): 100%|██████████| 23/23 [00:06<00:00, 3.75it/s]\n",
- "[2025-05-05 17:53:47] Capture cuda graph end. Time elapsed: 6.18 s. mem usage=1.41 GB. avail mem=2.67 GB.\n",
- "[2025-05-05 17:53:47] max_total_num_tokens=225647, chunked_prefill_size=8192, max_prefill_tokens=16384, max_running_requests=2821, context_len=40960\n",
- "[2025-05-05 17:53:48] INFO: Started server process [1104179]\n",
- "[2025-05-05 17:53:48] INFO: Waiting for application startup.\n",
- "[2025-05-05 17:53:48] INFO: Application startup complete.\n",
- "[2025-05-05 17:53:48] INFO: Uvicorn running on http://0.0.0.0:38475 (Press CTRL+C to quit)\n",
- "[2025-05-05 17:53:48] INFO: 127.0.0.1:37502 - \"GET /v1/models HTTP/1.1\" 200 OK\n",
- "[2025-05-05 17:53:49] INFO: 127.0.0.1:37516 - \"GET /get_model_info HTTP/1.1\" 200 OK\n",
- "[2025-05-05 17:53:49] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "2025-05-05 17:53:49,777 - INFO - flashinfer.jit: Loading JIT ops: batch_prefill_with_kv_cache_dtype_q_bf16_dtype_kv_bf16_dtype_o_bf16_dtype_idx_i32_head_dim_qk_128_head_dim_vo_128_posenc_0_use_swa_False_use_logits_cap_False_f16qk_False\n",
- "2025-05-05 17:53:49,799 - INFO - flashinfer.jit: Finished loading JIT ops: batch_prefill_with_kv_cache_dtype_q_bf16_dtype_kv_bf16_dtype_o_bf16_dtype_idx_i32_head_dim_qk_128_head_dim_vo_128_posenc_0_use_swa_False_use_logits_cap_False_f16qk_False\n",
- "[2025-05-05 17:53:50] INFO: 127.0.0.1:37526 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "[2025-05-05 17:53:50] The server is fired up and ready to roll!\n",
- "\n",
- "\n",
- " NOTE: Typically, the server runs in a separate terminal.\n",
- " In this notebook, we run the server and notebook code together, so their outputs are combined.\n",
- " To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.\n",
- " We are running those notebooks in a CI parallel environment, so the throughput is not representative of the actual performance.\n",
- " \n",
- "Server started on http://localhost:38475\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from sglang import separate_reasoning, assistant_begin, assistant_end\n",
"from sglang import assistant, function, gen, system, user\n",
@@ -105,15 +49,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2025-05-05 17:53:53] INFO: 127.0.0.1:37530 - \"GET /get_model_info HTTP/1.1\" 200 OK\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"set_default_backend(\n",
" RuntimeEndpoint(f\"http://localhost:{port}\", chat_template_name=\"qwen\")\n",
@@ -131,41 +67,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2025-05-05 17:53:53] Prefill batch. #new-seq: 1, #new-token: 31, #cached-token: 0, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "[2025-05-05 17:53:54] Decode batch. #running-req: 1, #token: 64, token usage: 0.00, gen throughput (token/s): 6.00, #queue-req: 0\n",
- "[2025-05-05 17:53:54] Decode batch. #running-req: 1, #token: 104, token usage: 0.00, gen throughput (token/s): 82.06, #queue-req: 0\n",
- "[2025-05-05 17:53:55] Decode batch. #running-req: 1, #token: 144, token usage: 0.00, gen throughput (token/s): 81.56, #queue-req: 0\n",
- "[2025-05-05 17:53:55] Decode batch. #running-req: 1, #token: 184, token usage: 0.00, gen throughput (token/s): 81.14, #queue-req: 0\n",
- "[2025-05-05 17:53:56] Decode batch. #running-req: 1, #token: 224, token usage: 0.00, gen throughput (token/s): 80.91, #queue-req: 0\n",
- "[2025-05-05 17:53:56] Decode batch. #running-req: 1, #token: 264, token usage: 0.00, gen throughput (token/s): 80.55, #queue-req: 0\n",
- "[2025-05-05 17:53:56] INFO: 127.0.0.1:37538 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "\n",
- "Okay, the user is asking for three countries and their capitals. Let me think about which countries to choose. I should pick some well-known ones to make it easy for the user.\n",
- "\n",
- "First, France is a good start because its capital is Paris, which is a major city. Then maybe Germany with Berlin. Those are both in Europe and have clear capitals. \n",
- "\n",
- "Next, I need a country from another continent. Let's go with Japan, which has Tokyo as its capital. That covers Asia. \n",
- "\n",
- "Wait, should I check if there are any countries with non-obvious capitals? Maybe not necessary. The user probably wants straightforward answers. \n",
- "\n",
- "Let me confirm the capitals again. France - Paris, Germany - Berlin, Japan - Tokyo. Yep, that's correct. \n",
- "\n",
- "I should present them in a clear list. Maybe number them and list each with the capital. Keep it simple and to the point. No need for extra info unless the user asks. \n",
- "\n",
- "Alright, that should cover it. Three countries, their capitals, correct and easy to understand.\n",
- "\n",
- "\n",
- "1. **France** - Paris \n",
- "2. **Germany** - Berlin \n",
- "3. **Japan** - Tokyo\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"@function\n",
"def basic_qa(s, question):\n",
@@ -191,38 +93,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "dict_keys(['answer', 'answer_reasoning_content'])\n",
- "[2025-05-05 17:56:44] Prefill batch. #new-seq: 1, #new-token: 1, #cached-token: 30, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "[2025-05-05 17:56:44] Decode batch. #running-req: 1, #token: 63, token usage: 0.00, gen throughput (token/s): 3.77, #queue-req: 0\n",
- "[2025-05-05 17:56:45] Decode batch. #running-req: 1, #token: 103, token usage: 0.00, gen throughput (token/s): 82.12, #queue-req: 0\n",
- "[2025-05-05 17:56:45] Decode batch. #running-req: 1, #token: 143, token usage: 0.00, gen throughput (token/s): 81.60, #queue-req: 0\n",
- "[2025-05-05 17:56:46] Decode batch. #running-req: 1, #token: 183, token usage: 0.00, gen throughput (token/s): 81.17, #queue-req: 0\n",
- "[2025-05-05 17:56:46] Decode batch. #running-req: 1, #token: 223, token usage: 0.00, gen throughput (token/s): 80.90, #queue-req: 0\n",
- "[2025-05-05 17:56:46] INFO: 127.0.0.1:45282 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "\n",
- "Separated Reasoning Content:\n",
- "Okay, the user is asking for three countries and their capitals. Let me think. I need to make sure the countries are correct and their capitals are properly matched.\n",
- "\n",
- "First, I should start with a well-known country. France is a good example. Its capital is Paris. That's straightforward. Next, maybe a country in Asia. Japan's capital is Tokyo. That's correct. Then, perhaps a country in Africa. Egypt's capital is Cairo. Wait, is that right? Yes, Egypt's capital is indeed Cairo. Let me double-check. France - Paris, Japan - Tokyo, Egypt - Cairo. Those are all correct. I should present them in a clear list format. Make sure the country names are spelled correctly and the capitals are properly capitalized. No need for any extra information, just the three pairs. That should answer the user's question effectively.\n",
- "\n",
- "\n",
- "\n",
- "Content:\n",
- "1. **France** - Paris \n",
- "2. **Japan** - Tokyo \n",
- "3. **Egypt** - Cairo\n",
- "\n",
- "\n",
- "Messages:\n",
- "{'role': 'assistant', 'content': '1. **France** - Paris \\n2. **Japan** - Tokyo \\n3. **Egypt** - Cairo'}\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"@function\n",
"def basic_qa_separate_reasoning(s, question):\n",
@@ -254,71 +125,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2025-05-05 17:54:03] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 79.25, #queue-req: 0\n",
- "[2025-05-05 17:54:03] Prefill batch. #new-seq: 1, #new-token: 18, #cached-token: 18, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "[2025-05-05 17:54:03] Decode batch. #running-req: 1, #token: 77, token usage: 0.00, gen throughput (token/s): 75.90, #queue-req: 0\n",
- "[2025-05-05 17:54:04] Decode batch. #running-req: 1, #token: 117, token usage: 0.00, gen throughput (token/s): 81.85, #queue-req: 0\n",
- "[2025-05-05 17:54:04] Decode batch. #running-req: 1, #token: 157, token usage: 0.00, gen throughput (token/s): 81.36, #queue-req: 0\n",
- "[2025-05-05 17:54:05] Decode batch. #running-req: 1, #token: 197, token usage: 0.00, gen throughput (token/s): 81.01, #queue-req: 0\n",
- "[2025-05-05 17:54:05] Decode batch. #running-req: 1, #token: 237, token usage: 0.00, gen throughput (token/s): 80.80, #queue-req: 0\n",
- "[2025-05-05 17:54:06] Decode batch. #running-req: 1, #token: 277, token usage: 0.00, gen throughput (token/s): 80.43, #queue-req: 0\n",
- "[2025-05-05 17:54:06] Decode batch. #running-req: 1, #token: 317, token usage: 0.00, gen throughput (token/s): 80.10, #queue-req: 0\n",
- "[2025-05-05 17:54:07] Decode batch. #running-req: 1, #token: 357, token usage: 0.00, gen throughput (token/s): 79.83, #queue-req: 0\n",
- "[2025-05-05 17:54:07] INFO: 127.0.0.1:41424 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "\n",
- "\n",
- "first_answer:\n",
- "Here’s a list of three countries and their capitals:\n",
- "\n",
- "1. **France** – **Paris** \n",
- "2. **United States** – **Washington, D.C.** \n",
- "3. **Brazil** – **Brasília** \n",
- "\n",
- "Let me know if you'd like more examples! 😊\n",
- "\n",
- "\n",
- "first_answer_reasoning_content:\n",
- "Okay, the user is asking for a list of three countries and their capitals. Let me think about which countries to choose. They might be a student studying geography or someone just curious. I should pick well-known countries to make it easier for them.\n",
- "\n",
- "First, I'll start with the most obvious ones. France and its capital Paris are a classic example. Then, maybe the United States with Washington, D.C. That's another common one. For the third country, perhaps Brazil with Brasília? Wait, I should make sure I'm correct about the capitals. Let me double-check: France is Paris, USA is Washington, D.C., and Brazil is indeed Brasília. \n",
- "\n",
- "Alternatively, maybe including a country from a different continent could be better? Like Japan with Tokyo? But the user didn't specify any particular region. Since the first two are from Europe and North America, adding a South American country might be a good mix. \n",
- "\n",
- "Wait, but the user just asked for three, so as long as they're accurate, it's fine. I'll go with France, USA, and Brazil. Let me make sure I get the spelling right. Paris, Washington D.C., Brasília. Yeah, that's correct. I should present them in a clear list format. The user might need this for a school assignment or a quiz. Alright, that should cover it.\n",
- "\n",
- "[2025-05-05 17:54:07] Prefill batch. #new-seq: 1, #new-token: 83, #cached-token: 36, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "[2025-05-05 17:54:07] Decode batch. #running-req: 1, #token: 138, token usage: 0.00, gen throughput (token/s): 76.16, #queue-req: 0\n",
- "[2025-05-05 17:54:08] Decode batch. #running-req: 1, #token: 178, token usage: 0.00, gen throughput (token/s): 81.10, #queue-req: 0\n",
- "[2025-05-05 17:54:08] Decode batch. #running-req: 1, #token: 218, token usage: 0.00, gen throughput (token/s): 80.91, #queue-req: 0\n",
- "[2025-05-05 17:54:09] Decode batch. #running-req: 1, #token: 258, token usage: 0.00, gen throughput (token/s): 80.63, #queue-req: 0\n",
- "[2025-05-05 17:54:09] Decode batch. #running-req: 1, #token: 298, token usage: 0.00, gen throughput (token/s): 80.29, #queue-req: 0\n",
- "[2025-05-05 17:54:10] Decode batch. #running-req: 1, #token: 338, token usage: 0.00, gen throughput (token/s): 79.96, #queue-req: 0\n",
- "[2025-05-05 17:54:10] INFO: 127.0.0.1:47266 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "\n",
- "\n",
- "second_answer:\n",
- "Here’s another list of three countries and their capitals:\n",
- "\n",
- "1. **Nigeria** – **Lagos** \n",
- "2. **Japan** – **Tokyo** \n",
- "3. **Argentina** – **Buenos Aires** \n",
- "\n",
- "Let me know if you'd like more examples! 😊\n",
- "\n",
- "\n",
- "second_answer_reasoning_content:\n",
- "Okay, the user asked for another list of three countries and their capitals. Let me think about what they might need. They previously got France, the US, and Brazil. Maybe they want more variety or different regions? I should pick countries from different continents to cover a broad range.\n",
- "\n",
- "First, maybe include a country from Africa. Lagos is the capital of Nigeria, which is a common example. Then, Asia – maybe Japan, with Tokyo. That's a major country. Then, a country from South America, like Argentina with Buenos Aires. That gives a good mix. I should check if those capitals are correct. Lagos is right for Nigeria, Tokyo for Japan, and Buenos Aires for Argentina. Yeah, that works. I'll present them in a list format again, making sure to mention each country and its capital clearly. Make sure the response is friendly and offers further help if needed.\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"@function\n",
"def multi_turn_qa(s):\n",
@@ -360,23 +167,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2025-05-05 17:54:10] Prefill batch. #new-seq: 1, #new-token: 9, #cached-token: 26, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "[2025-05-05 17:54:10] Decode batch. #running-req: 1, #token: 51, token usage: 0.00, gen throughput (token/s): 76.50, #queue-req: 0\n",
- "[2025-05-05 17:54:10] INFO: 127.0.0.1:47276 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "Reasoning Content:\n",
- " \n",
- "Content:\n",
- " 1. France - Paris \n",
- "2. Germany - Berlin \n",
- "3. Japan - Tokyo\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"reasoning_state = basic_qa_separate_reasoning(\n",
" \"List 3 countries and their capitals. /no_think\"\n",
@@ -423,37 +214,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[2025-05-05 17:54:11] Prefill batch. #new-seq: 1, #new-token: 26, #cached-token: 8, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
- "[2025-05-05 17:54:11] Decode batch. #running-req: 1, #token: 68, token usage: 0.00, gen throughput (token/s): 47.33, #queue-req: 0\n",
- "[2025-05-05 17:54:12] Decode batch. #running-req: 1, #token: 108, token usage: 0.00, gen throughput (token/s): 83.03, #queue-req: 0\n",
- "[2025-05-05 17:54:12] Decode batch. #running-req: 1, #token: 148, token usage: 0.00, gen throughput (token/s): 82.51, #queue-req: 0\n",
- "[2025-05-05 17:54:13] Decode batch. #running-req: 1, #token: 188, token usage: 0.00, gen throughput (token/s): 82.06, #queue-req: 0\n",
- "[2025-05-05 17:54:13] Decode batch. #running-req: 1, #token: 228, token usage: 0.00, gen throughput (token/s): 81.80, #queue-req: 0\n",
- "[2025-05-05 17:54:14] Decode batch. #running-req: 1, #token: 268, token usage: 0.00, gen throughput (token/s): 81.48, #queue-req: 0\n",
- "[2025-05-05 17:54:14] Decode batch. #running-req: 1, #token: 308, token usage: 0.00, gen throughput (token/s): 81.14, #queue-req: 0\n",
- "[2025-05-05 17:54:15] Decode batch. #running-req: 1, #token: 348, token usage: 0.00, gen throughput (token/s): 80.84, #queue-req: 0\n",
- "[2025-05-05 17:54:15] INFO: 127.0.0.1:47290 - \"POST /generate HTTP/1.1\" 200 OK\n",
- "Answer:\n",
- "2023-10-05\n",
- "\n",
- "\n",
- "Reasoning Content:\n",
- "Okay, the user is asking for the IP addresses of Google's DNS servers. Let me recall what I know about DNS servers. Google provides two public DNS servers, right? They're commonly used for their reliability and speed.\n",
- "\n",
- "I think the primary one is 8.8.8.8. Wait, isn't there another one? Oh yeah, 8.8.4.4. Those are the two main ones. Let me make sure I'm not mixing them up with other providers. For example, Cloudflare uses 1.1.1.1 and 1.0.0.1. But Google's are definitely 8.8.8.8 and 8.8.4.4. \n",
- "\n",
- "I should check if there are any other IP addresses, but I don't think so. They have two main ones. The user might be looking to set up their DNS settings, so providing both is important. Also, maybe mention that they're both in the same range, which is 8.8.0.0/14. But the user just asked for the IP addresses, so maybe just list them. \n",
- "\n",
- "Wait, the user said \"just provide the answer,\" so maybe they don't need extra info. But to be thorough, I should confirm that those are the correct ones. Let me think if there's any chance of confusion. No, 8.8.8.8 is the primary, and 8.8.4.4 is the secondary. Yeah, that's right. So the answer is those two IPs.\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"print_highlight(f\"Answer:\\n{reasoning_state['answer']}\")\n",
"print_highlight(\n",
diff --git a/scripts/playground/replay_request_dump.py b/scripts/playground/replay_request_dump.py
index 93d0d7d2614..301cf948edd 100644
--- a/scripts/playground/replay_request_dump.py
+++ b/scripts/playground/replay_request_dump.py
@@ -36,7 +36,7 @@ def read_records(files):
def run_one_request_internal(record):
(req, output, replay_init_time, start_time, end_time, idx) = record
- time.sleep(max(0, start_time - (time.time() - replay_init_time)))
+ time.sleep(max(0, (start_time - (time.time() - replay_init_time)) / args.speed))
if "completion_tokens" in output.get("meta_info", {}):
recorded_completion_tokens = output["meta_info"]["completion_tokens"]
@@ -121,6 +121,7 @@ def main(records):
parser.add_argument("--parallel", type=int, default=512)
parser.add_argument("--idx", type=int, default=None)
parser.add_argument("--ignore-eos", action="store_true")
+ parser.add_argument("--speed", type=float, default=1)
args = parser.parse_args()
set_ulimit()
diff --git a/sgl-kernel/CMakeLists.txt b/sgl-kernel/CMakeLists.txt
index 89e0a591830..80f29921f2c 100644
--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -3,6 +3,7 @@ project(sgl-kernel LANGUAGES CXX CUDA)
# CMake
cmake_policy(SET CMP0169 OLD)
+cmake_policy(SET CMP0177 NEW)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
set(CMAKE_COLOR_DIAGNOSTICS ON)
set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")
@@ -45,11 +46,20 @@ include(FetchContent)
FetchContent_Declare(
repo-cutlass
GIT_REPOSITORY https://github.com/NVIDIA/cutlass
- GIT_TAG 664c4f7b3ed1959414905025728eef5568209479
+ GIT_TAG a49a78ffefc86a87160dfe0ccc3a3a2d1622c918
GIT_SHALLOW OFF
)
FetchContent_Populate(repo-cutlass)
+# DeepGEMM
+FetchContent_Declare(
+ repo-deepgemm
+ GIT_REPOSITORY https://github.com/sgl-project/DeepGEMM
+ GIT_TAG sgl
+ GIT_SHALLOW OFF
+)
+FetchContent_Populate(repo-deepgemm)
+
FetchContent_Declare(
repo-fmt
GIT_REPOSITORY https://github.com/fmtlib/fmt
@@ -57,13 +67,6 @@ FetchContent_Declare(
GIT_SHALLOW OFF
)
FetchContent_Populate(repo-fmt)
-FetchContent_Declare(
- repo-deepgemm
- GIT_REPOSITORY https://github.com/sgl-project/DeepGEMM
- GIT_TAG cabi
- GIT_SHALLOW OFF
-)
-FetchContent_Populate(repo-deepgemm)
# Triton
FetchContent_Declare(
@@ -78,7 +81,7 @@ FetchContent_Populate(repo-triton)
FetchContent_Declare(
repo-flashinfer
GIT_REPOSITORY https://github.com/flashinfer-ai/flashinfer.git
- GIT_TAG 9220fb3443b5a5d274f00ca5552f798e225239b7
+ GIT_TAG 018b551825c8e5579206e6eb9d3229fa679202b3
GIT_SHALLOW OFF
)
FetchContent_Populate(repo-flashinfer)
@@ -92,6 +95,15 @@ FetchContent_Declare(
)
FetchContent_Populate(repo-flash-attention)
+# flash-attention origin
+FetchContent_Declare(
+ repo-flash-attention-origin
+ GIT_REPOSITORY https://github.com/Dao-AILab/flash-attention.git
+ GIT_TAG 203b9b3dba39d5d08dffb49c09aa622984dff07d
+ GIT_SHALLOW OFF
+)
+FetchContent_Populate(repo-flash-attention-origin)
+
# mscclpp
FetchContent_Declare(
repo-mscclpp
@@ -145,23 +157,59 @@ set(SGL_KERNEL_CUDA_FLAGS
"-DCUTLASS_DEBUG_TRACE_LEVEL=0"
"--expt-relaxed-constexpr"
"--expt-extended-lambda"
- "--threads=32"
-
- # Suppress warnings
- "-Xcompiler=-Wconversion"
- "-Xcompiler=-fno-strict-aliasing"
+ # The following flag leads to the CMAKE_BUILD_PARALLEL_LEVEL breaking,
+ # it triggers OOM with low memory host. Extract the threads number to
+ # option named SGL_KERNEL_COMPILE_THREADS, default value 32.
+ # "--threads=32"
+
+ # Supress warnings
+ "-Xcompiler=-Wno-clang-format-violations"
+ "-Xcompiler=-Wno-conversion"
+ "-Xcompiler=-Wno-deprecated-declarations"
+ "-Xcompiler=-Wno-terminate"
+ "-Xcompiler=-Wfatal-errors"
+ "-Xcompiler=-ftemplate-backtrace-limit=1"
+ "-Xcudafe=--diag_suppress=177" # variable was declared but never referenced
# uncomment to debug
# "--ptxas-options=-v"
# "--ptxas-options=--verbose,--register-usage-level=10,--warn-on-local-memory-usage"
)
-option(SGL_KERNEL_ENABLE_SM100A "Enable SM100A" OFF)
-option(SGL_KERNEL_ENABLE_SM90A "Enable SM90A" OFF)
+set(SGL_KERNEL_COMPILE_THREADS 32 CACHE STRING "Set compilation threads, default 32")
+
+# When SGL_KERNEL_COMPILE_THREADS value is less than 1, set it to 1
+if (NOT SGL_KERNEL_COMPILE_THREADS MATCHES "^[0-9]+$")
+ message(FATAL_ERROR "SGL_KERNEL_COMPILE_THREADS must be an integer, but was set to '${SGL_KERNEL_COMPILE_THREADS}'.")
+elseif (SGL_KERNEL_COMPILE_THREADS LESS 1)
+ message(STATUS "SGL_KERNEL_COMPILE_THREADS was set to a value less than 1. Using 1 instead.")
+ set(SGL_KERNEL_COMPILE_THREADS 1)
+endif()
+
+list(APPEND SGL_KERNEL_CUDA_FLAGS
+ "--threads=${SGL_KERNEL_COMPILE_THREADS}"
+)
+
option(SGL_KERNEL_ENABLE_BF16 "Enable BF16" ON)
option(SGL_KERNEL_ENABLE_FP8 "Enable FP8" ON)
option(SGL_KERNEL_ENABLE_FP4 "Enable FP4" OFF)
option(SGL_KERNEL_ENABLE_FA3 "Enable FA3" OFF)
+option(SGL_KERNEL_ENABLE_SM90A "Enable SM90A" OFF)
+option(SGL_KERNEL_ENABLE_SM100A "Enable SM100A" OFF)
+
+if (SGL_KERNEL_ENABLE_BF16)
+ list(APPEND SGL_KERNEL_CUDA_FLAGS
+ "-DFLASHINFER_ENABLE_BF16"
+ )
+endif()
+
+if (SGL_KERNEL_ENABLE_FP8)
+ list(APPEND SGL_KERNEL_CUDA_FLAGS
+ "-DFLASHINFER_ENABLE_FP8"
+ "-DFLASHINFER_ENABLE_FP8_E4M3"
+ "-DFLASHINFER_ENABLE_FP8_E5M2"
+ )
+endif()
if (ENABLE_BELOW_SM90)
list(APPEND SGL_KERNEL_CUDA_FLAGS
@@ -172,13 +220,24 @@ endif()
if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
list(APPEND SGL_KERNEL_CUDA_FLAGS
- "-gencode=arch=compute_100,code=sm_100"
"-gencode=arch=compute_100a,code=sm_100a"
- "-gencode=arch=compute_101,code=sm_101"
- "-gencode=arch=compute_101a,code=sm_101a"
- "-gencode=arch=compute_120,code=sm_120"
"-gencode=arch=compute_120a,code=sm_120a"
)
+
+ # refer sm_121, sm_110 and sm_101 description https://github.com/pytorch/pytorch/pull/156176
+ if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "13.0")
+ list(APPEND SGL_KERNEL_CUDA_FLAGS
+ "-gencode=arch=compute_103a,code=sm_103a"
+ "-gencode=arch=compute_110a,code=sm_110a"
+ "-gencode=arch=compute_121a,code=sm_121a"
+ "--compress-mode=size"
+ )
+ else()
+ list(APPEND SGL_KERNEL_CUDA_FLAGS
+ "-gencode=arch=compute_101a,code=sm_101a"
+ )
+ endif()
+
else()
list(APPEND SGL_KERNEL_CUDA_FLAGS
"-use_fast_math"
@@ -192,43 +251,28 @@ if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.4" OR SGL_KERNEL_ENABLE_SM90A)
)
endif()
-if (SGL_KERNEL_ENABLE_BF16)
- list(APPEND SGL_KERNEL_CUDA_FLAGS
- "-DFLASHINFER_ENABLE_BF16"
- )
-endif()
-
-if (SGL_KERNEL_ENABLE_FP8)
- list(APPEND SGL_KERNEL_CUDA_FLAGS
- "-DFLASHINFER_ENABLE_FP8"
- "-DFLASHINFER_ENABLE_FP8_E4M3"
- "-DFLASHINFER_ENABLE_FP8_E5M2"
- )
-endif()
-
if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_FP4)
list(APPEND SGL_KERNEL_CUDA_FLAGS
"-DENABLE_NVFP4=1"
)
endif()
-string(REPLACE "-D__CUDA_NO_HALF_OPERATORS__" "" CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
-string(REPLACE "-D__CUDA_NO_HALF_CONVERSIONS__" "" CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
-string(REPLACE "-D__CUDA_NO_BFLOAT16_CONVERSIONS__" "" CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
-string(REPLACE "-D__CUDA_NO_HALF2_OPERATORS__" "" CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
-
set(SOURCES
- "csrc/allreduce/mscclpp_allreduce.cu"
"csrc/allreduce/custom_all_reduce.cu"
+ "csrc/allreduce/mscclpp_allreduce.cu"
"csrc/attention/cascade.cu"
- "csrc/attention/merge_attn_states.cu"
"csrc/attention/cutlass_mla_kernel.cu"
- "csrc/attention/vertical_slash_index.cu"
"csrc/attention/lightning_attention_decode_kernel.cu"
+ "csrc/attention/merge_attn_states.cu"
+ "csrc/attention/vertical_slash_index.cu"
"csrc/elementwise/activation.cu"
+ "csrc/elementwise/cast.cu"
+ "csrc/elementwise/copy.cu"
+ "csrc/elementwise/concat_mla.cu"
"csrc/elementwise/fused_add_rms_norm_kernel.cu"
"csrc/elementwise/rope.cu"
"csrc/common_extension.cc"
+
"csrc/gemm/awq_kernel.cu"
"csrc/gemm/bmm_fp8.cu"
"csrc/gemm/dsv3_fused_a_gemm.cu"
@@ -252,33 +296,32 @@ set(SOURCES
"csrc/gemm/marlin/gptq_marlin_repack.cu"
"csrc/gemm/marlin/awq_marlin_repack.cu"
"csrc/gemm/gptq/gptq_kernel.cu"
+
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu"
+
"csrc/moe/cutlass_moe/w4a8/scaled_mm_entry.cu"
"csrc/moe/cutlass_moe/w4a8/w4a8_moe_data.cu"
"csrc/moe/cutlass_moe/w4a8/w4a8_grouped_mm_c3x.cu"
"csrc/moe/marlin_moe_wna16/ops.cu"
- "csrc/moe/marlin_moe_wna16/kernel_bf16_ku4.cu"
- "csrc/moe/marlin_moe_wna16/kernel_bf16_ku4b8.cu"
- "csrc/moe/marlin_moe_wna16/kernel_bf16_ku8b128.cu"
- "csrc/moe/marlin_moe_wna16/kernel_fp16_ku4.cu"
- "csrc/moe/marlin_moe_wna16/kernel_fp16_ku4b8.cu"
- "csrc/moe/marlin_moe_wna16/kernel_fp16_ku8b128.cu"
+ "csrc/mamba/causal_conv1d.cu"
"csrc/moe/moe_align_kernel.cu"
"csrc/moe/moe_fused_gate.cu"
"csrc/moe/moe_topk_softmax_kernels.cu"
"csrc/moe/nvfp4_blockwise_moe.cu"
"csrc/moe/fp8_blockwise_moe_kernel.cu"
"csrc/moe/prepare_moe_input.cu"
- "csrc/moe/ep_moe_reorder_kernel.cu"
- "csrc/moe/ep_moe_silu_and_mul_kernel.cu"
+
+ "csrc/memory/store.cu"
"csrc/kvcacheio/transfer.cu"
+
"csrc/speculative/eagle_utils.cu"
"csrc/speculative/packbit.cu"
"csrc/speculative/speculative_sampling.cu"
- "csrc/memory/store.cu"
+
"${repo-flashinfer_SOURCE_DIR}/csrc/norm.cu"
"${repo-flashinfer_SOURCE_DIR}/csrc/renorm.cu"
"${repo-flashinfer_SOURCE_DIR}/csrc/sampling.cu"
+
"${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src/flash_fwd_sparse_hdim128_bf16_causal_sm80.cu"
"${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src/flash_fwd_sparse_hdim128_bf16_sm80.cu"
"${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src/flash_fwd_sparse_hdim128_fp16_causal_sm80.cu"
@@ -294,8 +337,6 @@ target_include_directories(common_ops PRIVATE
${repo-cutlass_SOURCE_DIR}/examples/common
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
)
-set_source_files_properties("csrc/gemm/per_token_group_quant_8bit" PROPERTIES COMPILE_OPTIONS "--use_fast_math")
-
find_package(Python3 COMPONENTS Interpreter REQUIRED)
execute_process(
@@ -317,7 +358,10 @@ endif()
set(MSCCLPP_USE_CUDA ON)
set(MSCCLPP_BYPASS_GPU_CHECK ON)
set(MSCCLPP_BUILD_TESTS OFF)
-add_subdirectory(${repo-mscclpp_SOURCE_DIR})
+add_subdirectory(
+ ${repo-mscclpp_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}/mscclpp-build
+)
target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt mscclpp_static)
# flash attention
@@ -478,3 +522,13 @@ install(DIRECTORY "${repo-triton_SOURCE_DIR}/python/triton_kernels/triton_kernel
DESTINATION "triton_kernels"
PATTERN ".git*" EXCLUDE
PATTERN "__pycache__" EXCLUDE)
+
+# flash attention 4
+# TODO: find a better install condition.
+if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
+ # flash_attn/cute
+ install(DIRECTORY "${repo-flash-attention-origin_SOURCE_DIR}/flash_attn/cute/"
+ DESTINATION "flash_attn/cute"
+ PATTERN ".git*" EXCLUDE
+ PATTERN "__pycache__" EXCLUDE)
+ endif()
diff --git a/sgl-kernel/Makefile b/sgl-kernel/Makefile
index 382c4e0c42e..c40489800ef 100644
--- a/sgl-kernel/Makefile
+++ b/sgl-kernel/Makefile
@@ -21,12 +21,11 @@ submodule: ## Initialize and update git submodules
ln: submodule ## Create compilation database
@rm -rf build && mkdir build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=YES -DCMAKE_POLICY_VERSION_MINIMUM=3.5
-
install: submodule ## Install package in development mode
@pip install -e . --no-build-isolation
build: install-deps submodule ## Build and install wheel package
- @rm -rf dist/* || true && export MAX_JOBS=$(nproc) && CMAKE_POLICY_VERSION_MINIMUM=3.5 CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) uv build --wheel -Cbuild-dir=build . --verbose --color=always --no-build-isolation && pip3 install dist/*whl --force-reinstall --no-deps
+ @rm -rf dist/* || true && CMAKE_POLICY_VERSION_MINIMUM=3.5 MAX_JOBS=$(nproc) CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) uv build --wheel -Cbuild-dir=build . --verbose --color=always --no-build-isolation && pip3 install dist/*whl --force-reinstall --no-deps
clean: ## Remove build artifacts
@rm -rf build dist *.egg-info
@@ -48,7 +47,8 @@ FILES_TO_UPDATE = python/sgl_kernel/version.py \
pyproject.toml \
pyproject_rocm.toml \
pyproject_cpu.toml \
- ../docker/Dockerfile
+ ../docker/Dockerfile \
+ ../.github/workflows/pr-test-pd-router.yml
update: ## Update version numbers across project files. Usage: make update
@if [ -z "$(filter-out $@,$(MAKECMDGOALS))" ]; then \
diff --git a/sgl-kernel/README.md b/sgl-kernel/README.md
index c81a2af0b52..47f3dea54ec 100644
--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -52,10 +52,12 @@ See CMakeLists.txt for more options.
### Parallel Build
We highly recommend you build sgl-kernel with Ninja. Ninja can automatically build sgl-kernel in parallel.
-And if you build the sgl-kernel with cmake, you need to add `CMAKE_BUILD_PARALLEL_LEVEL` for parallel build like:
+And if you build the sgl-kernel with cmake, you need to add `CMAKE_BUILD_PARALLEL_LEVEL` and limit the
+nvcc threads to a single thread by setting `SGL_KERNEL_COMPILE_THREADS=1` for parallel build like:
```bash
-CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m uv build --wheel -Cbuild-dir=build --color=always .
+CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m uv build --wheel -Cbuild-dir=build \
+-Ccmake.define.SGL_KERNEL_COMPILE_THREADS=1 --color=always .
```
### ⚠️ Compilation Issue with `sgl-kernel` and CUDA 12.6
@@ -121,7 +123,7 @@ ptxas --version
## Development Environment Setup
-Use Docker to set up the development environment. See [Docker setup guide](https://github.com/sgl-project/sglang/blob/main/docs/references/development_guide_using_docker.md#setup-docker-container).
+Use Docker to set up the development environment. See [Docker setup guide](https://github.com/sgl-project/sglang/blob/main/docs/developer_guide/development_guide_using_docker.md#setup-docker-container).
Create and enter development container:
```bash
diff --git a/sgl-kernel/benchmark/bench_moe_ep_post_reorder.py b/sgl-kernel/benchmark/bench_moe_ep_post_reorder.py
index 078e2c13185..faadd769841 100644
--- a/sgl-kernel/benchmark/bench_moe_ep_post_reorder.py
+++ b/sgl-kernel/benchmark/bench_moe_ep_post_reorder.py
@@ -1,6 +1,5 @@
import torch
import triton
-from sgl_kernel import ep_moe_post_reorder
from sglang.srt.layers.moe.ep_moe.kernels import post_reorder_triton_kernel
@@ -13,9 +12,9 @@
x_names=["batch_size"],
x_vals=[list(_) for _ in configs],
line_arg="provider",
- line_vals=["cuda", "triton"],
- line_names=["CUDA Kernel", "Triton Kernel"],
- styles=[("green", "-"), ("orange", "-")],
+ line_vals=["triton"],
+ line_names=["Triton Kernel"],
+ styles=[("orange", "-")],
ylabel="us",
plot_name="ep-moe-post-reorder-performance",
args={},
@@ -46,24 +45,7 @@ def alloc_tensors():
quantiles = [0.5, 0.2, 0.8]
- if provider == "cuda":
- d_out, out, s2d, tk_ids, tk_weights = alloc_tensors()
-
- def run_cuda():
- ep_moe_post_reorder(
- d_out,
- out,
- s2d,
- tk_ids,
- tk_weights,
- start_expert_id,
- end_expert_id,
- topk,
- )
-
- ms, min_ms, max_ms = triton.testing.do_bench(run_cuda, quantiles=quantiles)
-
- elif provider == "triton":
+ if provider == "triton":
d_out, out, s2d, tk_ids, tk_weights = alloc_tensors()
def run_triton():
diff --git a/sgl-kernel/benchmark/bench_moe_ep_pre_reorder.py b/sgl-kernel/benchmark/bench_moe_ep_pre_reorder.py
deleted file mode 100644
index 7623d310979..00000000000
--- a/sgl-kernel/benchmark/bench_moe_ep_pre_reorder.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import torch
-import triton
-from sgl_kernel import ep_moe_pre_reorder
-
-from sglang.srt.layers.moe.ep_moe.kernels import pre_reorder_triton_kernel
-
-batch_sizes = [64, 128, 256, 512, 640, 768, 1024, 2048, 4096]
-configs = [(bs,) for bs in batch_sizes]
-
-
-@triton.testing.perf_report(
- triton.testing.Benchmark(
- x_names=["batch_size"],
- x_vals=[list(_) for _ in configs],
- line_arg="provider",
- line_vals=["cuda", "triton"],
- line_names=["CUDA Kernel", "Triton Kernel"],
- styles=[("green", "-"), ("orange", "-")],
- ylabel="us",
- plot_name="ep-moe-pre-reorder-performance",
- args={},
- )
-)
-def benchmark(batch_size, provider):
- dtype = torch.bfloat16
- device = torch.device("cuda")
- hidden_size, topk, start_expert_id, end_expert_id, block_size = (
- 4096,
- 8,
- 0,
- 255,
- 512,
- )
-
- # Allocate fresh tensors for every run to match bench_moe_fused_gate style
- def alloc_tensors():
- input_ = torch.randn(batch_size, hidden_size, dtype=dtype, device=device)
- gateup_input = torch.zeros(
- batch_size * topk, hidden_size, dtype=dtype, device=device
- )
- src2dst = torch.randint(
- 0, batch_size * topk, (batch_size, topk), dtype=torch.int32, device=device
- )
- topk_ids = torch.randint(
- start_expert_id,
- end_expert_id + 1,
- (batch_size, topk),
- dtype=torch.int32,
- device=device,
- )
- a1_scales = torch.rand(
- end_expert_id - start_expert_id + 1, dtype=torch.float32, device=device
- )
- return input_, gateup_input, src2dst, topk_ids, a1_scales
-
- quantiles = [0.5, 0.2, 0.8]
-
- if provider == "cuda":
- inp, gout, s2d, tk_ids, scales = alloc_tensors()
-
- def run_cuda():
- ep_moe_pre_reorder(
- inp,
- gout,
- s2d,
- tk_ids,
- scales,
- start_expert_id,
- end_expert_id,
- topk,
- True,
- )
-
- ms, min_ms, max_ms = triton.testing.do_bench(run_cuda, quantiles=quantiles)
-
- elif provider == "triton":
- inp, gout, s2d, tk_ids, scales = alloc_tensors()
-
- def run_triton():
- pre_reorder_triton_kernel[(batch_size,)](
- inp.view(-1),
- gout.view(-1),
- s2d.view(-1),
- tk_ids.view(-1),
- scales,
- start_expert_id,
- end_expert_id,
- topk,
- hidden_size,
- block_size,
- True,
- )
-
- ms, min_ms, max_ms = triton.testing.do_bench(run_triton, quantiles=quantiles)
-
- else:
- raise ValueError(f"Unknown provider: {provider}")
-
- return 1000 * ms, 1000 * max_ms, 1000 * min_ms
-
-
-if __name__ == "__main__":
- benchmark.run(print_data=True)
diff --git a/sgl-kernel/benchmark/bench_moe_silu_and_mul.py b/sgl-kernel/benchmark/bench_moe_silu_and_mul.py
deleted file mode 100644
index 68f54bd327b..00000000000
--- a/sgl-kernel/benchmark/bench_moe_silu_and_mul.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import itertools
-
-import torch
-import triton
-from sgl_kernel import ep_moe_silu_and_mul
-
-from sglang.srt.layers.moe.ep_moe.kernels import silu_and_mul_triton_kernel
-
-batch_size_range = [64, 128, 256, 512, 640, 768, 1024, 2048, 4096]
-hidden_size_range = [1024, 2048, 4096, 8192]
-block_size_range = [128, 256, 512]
-configs = list(itertools.product(batch_size_range, hidden_size_range, block_size_range))
-
-
-@triton.testing.perf_report(
- triton.testing.Benchmark(
- x_names=["batch_size", "hidden_size", "block_size"],
- x_vals=[list(cfg) for cfg in configs],
- line_arg="provider",
- line_vals=["cuda", "triton"],
- line_names=["CUDA Kernel", "Triton Kernel"],
- styles=[("green", "-"), ("orange", "-")],
- ylabel="us",
- plot_name="ep-moe-silu-and-mul-performance",
- args={},
- )
-)
-def benchmark(batch_size, hidden_size, block_size, provider):
- dtype = torch.bfloat16
- device = torch.device("cuda")
-
- half_hidden_size = hidden_size // 2
- start_expert_id, end_expert_id = 0, 255
- block_size = 512
- quantiles = [0.5, 0.2, 0.8]
-
- def alloc_tensors():
- gateup_output = torch.randn(batch_size, hidden_size, dtype=dtype, device=device)
- down_input = torch.empty(
- batch_size, half_hidden_size, dtype=dtype, device=device
- )
- reorder_topk_ids = torch.randint(
- start_expert_id,
- end_expert_id + 1,
- (batch_size,),
- dtype=torch.int32,
- device=device,
- )
- scales = torch.rand(
- end_expert_id - start_expert_id + 1, dtype=torch.float32, device=device
- )
- return gateup_output, down_input, reorder_topk_ids, scales
-
- if provider == "cuda":
- gateup, down, ids, scales = alloc_tensors()
-
- def run_cuda():
- ep_moe_silu_and_mul(
- gateup,
- down,
- ids,
- scales,
- start_expert_id,
- end_expert_id,
- )
-
- ms, min_ms, max_ms = triton.testing.do_bench(run_cuda, quantiles=quantiles)
-
- elif provider == "triton":
- gateup, down, ids, scales = alloc_tensors()
-
- def run_triton():
- silu_and_mul_triton_kernel[(batch_size,)](
- gateup.view(-1),
- down.view(-1),
- hidden_size,
- ids,
- scales,
- start_expert_id,
- end_expert_id,
- block_size,
- )
-
- ms, min_ms, max_ms = triton.testing.do_bench(run_triton, quantiles=quantiles)
- else:
- raise ValueError(f"Unknown provider: {provider}")
-
- return 1000 * ms, 1000 * max_ms, 1000 * min_ms
-
-
-if __name__ == "__main__":
- benchmark.run(print_data=True)
diff --git a/sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py b/sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
index 5a924898281..3f37a3248a5 100644
--- a/sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
+++ b/sgl-kernel/benchmark/bench_per_token_group_quant_8bit.py
@@ -1,189 +1,68 @@
import itertools
-from typing import Tuple
+import time
+from functools import partial
+from pathlib import Path
import torch
import triton
-import triton.language as tl
-from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_group_quant_int8
+from sglang.srt.bench_utils import bench_kineto
+from sglang.srt.layers.quantization.fp8_kernel import (
+ create_per_token_group_quant_fp8_output_scale,
+)
+from sglang.srt.layers.quantization.fp8_kernel import (
+ per_token_group_quant_8bit as triton_per_token_group_quant_8bit,
+)
+from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_8bit
from sglang.srt.utils import is_hip
_is_hip = is_hip()
fp8_type_ = torch.float8_e4m3fnuz if _is_hip else torch.float8_e4m3fn
-@triton.jit
-def _per_token_group_quant_8bit(
- # Pointers to inputs and output
- y_ptr,
- y_q_ptr,
- y_s_ptr,
- # Stride of input
- y_stride,
- # Columns of input
- N,
- # Avoid to divide zero
- eps,
- # Information for 8bit data type (int8 or fp8_type_)
- max_8bit,
- min_8bit,
- # Meta-parameters
- BLOCK: tl.constexpr,
-):
- """A Triton-accelerated function to perform per-token-group quantization on a
- tensor.
- This function converts the tensor values into 8bit values.
- """
- # Map the program id to the row of X and Y it should compute.
- g_id = tl.program_id(0)
- y_ptr += g_id * y_stride
- y_q_ptr += g_id * y_stride
- y_s_ptr += g_id
-
- cols = tl.arange(0, BLOCK) # N <= BLOCK
- mask = cols < N
-
- y = tl.load(y_ptr + cols, mask=mask, other=0.0).to(tl.float32)
- # Quant
- _absmax = tl.maximum(tl.max(tl.abs(y)), eps)
- y_s = _absmax / max_8bit
- y_q = tl.clamp(y / y_s, min_8bit, max_8bit).to(y_q_ptr.dtype.element_ty)
-
- tl.store(y_q_ptr + cols, y_q, mask=mask)
- tl.store(y_s_ptr, y_s)
-
-
-def triton_per_token_group_quant_8bit(
- x: torch.Tensor,
- group_size: int,
- dst_dtype: torch.dtype,
- eps: float = 1e-10,
-) -> Tuple[torch.Tensor, torch.Tensor]:
- """Function to perform per-token-group quantization on an input tensor `x`.
- It converts the tensor values into signed float8 values and returns the
- quantized tensor along with the scaling factor used for quantization.
- Args:
- x: The input tenosr with ndim >= 2.
- group_size: The group size used for quantization.
- eps: The minimum to avoid dividing zero.
- dtype: The dype of output tensor. Note that only `torch.float8_e4m3fn` is supported for now.
- Returns:
- Tuple[torch.Tensor, torch.Tensor]: The quantized tensor and the scaling factor for quantization.
- """
- assert (
- x.shape[-1] % group_size == 0
- ), "the last dimension of `x` cannot be divisible by `group_size`"
- assert x.is_contiguous(), "`x` is not contiguous"
-
- if dst_dtype == torch.int8:
- iinfo = torch.iinfo(dst_dtype)
- max_8bit = iinfo.max
- min_8bit = iinfo.min
- else:
- finfo = torch.finfo(dst_dtype)
- max_8bit = finfo.max
- min_8bit = finfo.min
-
- x_q = torch.empty_like(x, device=x.device, dtype=dst_dtype)
- M = x.numel() // group_size
- N = group_size
- x_s = torch.empty(
- x.shape[:-1] + (x.shape[-1] // group_size,),
- device=x.device,
- dtype=torch.float32,
- )
-
- BLOCK = triton.next_power_of_2(N)
- # heuristics for number of warps
- num_warps = min(max(BLOCK // 256, 1), 8)
- num_stages = 1
- _per_token_group_quant_8bit[(M,)](
- x,
- x_q,
- x_s,
- group_size,
- N,
- eps,
- max_8bit,
- min_8bit,
- BLOCK=BLOCK,
- num_warps=num_warps,
- num_stages=num_stages,
- )
-
- return x_q, x_s
-
-
-def sglang_per_token_group_quant_8bit(
- x: torch.Tensor,
- group_size: int,
- dst_dtype: torch.dtype,
- eps: float = 1e-10,
-):
- assert (
- x.shape[-1] % group_size == 0
- ), "the last dimension of `x` cannot be divisible by `group_size`"
- assert x.is_contiguous(), "`x` is not contiguous"
-
- x_q = torch.empty_like(x, device=x.device, dtype=dst_dtype)
- x_s = torch.empty(
- x.shape[:-1] + (x.shape[-1] // group_size,),
- device=x.device,
- dtype=torch.float32,
- )
-
- if dst_dtype == torch.int8:
- iinfo = torch.iinfo(dst_dtype)
- int8_max = iinfo.max
- int8_min = iinfo.min
- sgl_per_token_group_quant_int8(x, x_q, x_s, group_size, eps, int8_min, int8_max)
- else:
- f8_info = torch.finfo(dst_dtype)
- fp8_max = f8_info.max
- fp8_min = f8_info.min
- sgl_per_token_group_quant_fp8(x, x_q, x_s, group_size, eps, fp8_min, fp8_max)
-
- return x_q, x_s
-
-
-def calculate_diff(batch_size, seq_len, group_size, dst_dtype):
- device = torch.device("cuda")
- hidden_dim = 7168
-
- x = torch.randn(
- batch_size * seq_len, hidden_dim, device=device, dtype=torch.float16
- )
-
- x_q_triton, x_s_triton = triton_per_token_group_quant_8bit(
- x.clone(), group_size, dst_dtype
- )
- x_q_sglang, x_s_sglang = sglang_per_token_group_quant_8bit(
- x.clone(), group_size, dst_dtype
- )
-
- if torch.allclose(
- x_q_triton.to(torch.float32), x_q_sglang.to(torch.float32), rtol=1e-3, atol=1e-5
- ) and torch.allclose(x_s_triton, x_s_sglang, rtol=1e-3, atol=1e-5):
- print(f"✅ {dst_dtype} implementations match")
- else:
- print("❌ Implementations differ")
-
-
-batch_size_range = [1, 2, 4, 8, 16, 32, 64]
-seq_len_range = [64, 128, 256, 512, 1024, 2048]
+num_tokens_range = [1, 4, 16, 64, 256, 768, 2048, 8192, 16384]
+hidden_dim_range = [1536, 7168, 18432] # For DeepSeek V3/R1
group_size_range = [128] # For DeepSeek V3/R1
-dst_dtype_range = [torch.int8, fp8_type_]
+# TODO test int8
+dst_dtype_range = [fp8_type_]
+flags_range = [
+ dict(
+ column_major_scales=False,
+ scale_tma_aligned=False,
+ scale_ue8m0=False,
+ ),
+ dict(
+ column_major_scales=True,
+ scale_tma_aligned=False,
+ scale_ue8m0=False,
+ ),
+ dict(
+ column_major_scales=True,
+ scale_tma_aligned=True,
+ scale_ue8m0=False,
+ ),
+ dict(
+ column_major_scales=True,
+ scale_tma_aligned=True,
+ scale_ue8m0=True,
+ ),
+]
+
configs = list(
itertools.product(
- batch_size_range, seq_len_range, group_size_range, dst_dtype_range
+ num_tokens_range,
+ hidden_dim_range,
+ group_size_range,
+ dst_dtype_range,
+ flags_range,
)
)
@triton.testing.perf_report(
triton.testing.Benchmark(
- x_names=["batch_size", "seq_len", "group_size", "dst_dtype"],
+ x_names=["num_tokens", "hidden_dim", "group_size", "dst_dtype", "flags"],
x_vals=configs,
line_arg="provider",
line_vals=["triton", "sglang"],
@@ -194,29 +73,26 @@ def calculate_diff(batch_size, seq_len, group_size, dst_dtype):
args={},
)
)
-def benchmark(batch_size, seq_len, group_size, dst_dtype, provider):
- device = torch.device("cuda")
- hidden_dim = 7168
+def benchmark(num_tokens, hidden_dim, group_size, dst_dtype, flags, provider):
+ if flags["scale_ue8m0"] and group_size != 128:
+ return
- x = torch.randn(
- batch_size * seq_len, hidden_dim, device=device, dtype=torch.float16
- )
-
- quantiles = [0.5, 0.2, 0.8]
+ device = torch.device("cuda")
- if provider == "triton":
- fn = lambda: triton_per_token_group_quant_8bit(x, group_size, dst_dtype)
- elif provider == "sglang":
- fn = lambda: sglang_per_token_group_quant_8bit(x, group_size, dst_dtype)
+ x = torch.randn(num_tokens, hidden_dim, device=device, dtype=torch.bfloat16)
- ms, min_ms, max_ms = triton.testing.do_bench(fn, quantiles=quantiles)
+ fn, kernel_names = {
+ "triton": (triton_per_token_group_quant_8bit, "_per_token_group_quant_fp8"),
+ "sglang": (
+ sglang_per_token_group_quant_8bit,
+ "per_token_group_quant_8bit_kernel",
+ ),
+ }[provider]
+ bench_fn = lambda: fn(x=x, group_size=group_size, dst_dtype=dst_dtype, **flags)
- return 1000 * ms, 1000 * max_ms, 1000 * min_ms
+ time_s = bench_kineto(bench_fn, kernel_names=kernel_names)
+ return time_s * 1e6
if __name__ == "__main__":
-
- calculate_diff(batch_size=4, seq_len=128, group_size=64, dst_dtype=torch.int8)
- calculate_diff(batch_size=4, seq_len=128, group_size=64, dst_dtype=fp8_type_)
-
benchmark.run(print_data=True)
diff --git a/sgl-kernel/build.sh b/sgl-kernel/build.sh
index 4b430d30f7e..0bf5a07ed0b 100755
--- a/sgl-kernel/build.sh
+++ b/sgl-kernel/build.sh
@@ -15,7 +15,6 @@ echo "ARCH: $ARCH"
if [ ${ARCH} = "aarch64" ]; then
LIBCUDA_ARCH="sbsa"
BUILDER_NAME="pytorch/manylinuxaarch64-builder"
- CMAKE_BUILD_PARALLEL_LEVEL=16
else
LIBCUDA_ARCH=${ARCH}
BUILDER_NAME="pytorch/manylinux2_28-builder"
@@ -40,6 +39,7 @@ docker run --rm \
export CMAKE_VERSION_MAJOR=3.31
export CMAKE_VERSION_MINOR=1
# Setting these flags to reduce OOM chance only on ARM
+ export CMAKE_BUILD_PARALLEL_LEVEL=$(( $(nproc)/3 < 48 ? $(nproc)/3 : 48 ))
if [ \"${ARCH}\" = \"aarch64\" ]; then
export CUDA_NVCC_FLAGS=\"-Xcudafe --threads=2\"
export MAKEFLAGS='-j2'
diff --git a/sgl-kernel/csrc/allreduce/mscclpp_allreduce.cuh b/sgl-kernel/csrc/allreduce/mscclpp_allreduce.cuh
index 2e064d704a9..ba0bc33fd8d 100644
--- a/sgl-kernel/csrc/allreduce/mscclpp_allreduce.cuh
+++ b/sgl-kernel/csrc/allreduce/mscclpp_allreduce.cuh
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
-#if defined(__HIP_PLATFORM_AMD__)
+#ifdef USE_ROCM
#include
#else
#include
diff --git a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu
index 88c4c89e230..a41779c1b01 100644
--- a/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu
+++ b/sgl-kernel/csrc/attention/cutlass_mla_kernel.cu
@@ -26,6 +26,7 @@ limitations under the License.
#include "cutlass_sm100_mla/device/sm100_mla.hpp"
#include "cutlass_sm100_mla/kernel/sm100_mla_tile_scheduler.hpp"
+#include "utils.h"
// clang-format off
#if !defined(CUDA_VERSION) || CUDA_VERSION < 12040
@@ -162,7 +163,7 @@ typename T::Fmha::Arguments args_from_options(
// TODO(trevor-m): Change split_kv back to -1 when
// https://github.com/NVIDIA/cutlass/issues/2274 is fixed. Split_kv=1 will
// perform worse with larger context length and smaller batch sizes.
- num_kv_splits, // split_kv
+ static_cast(num_kv_splits), // split_kv
nullptr, // is_var_split_kv
};
// TODO(kaixih@nvidia): When split_kv=-1 and is_var_split_kv=false, we compute
@@ -217,6 +218,10 @@ void cutlass_mla_decode(
torch::Tensor const& workspace,
double sm_scale,
int64_t num_kv_splits) {
+ auto sm_version = getSMVersion();
+ // On SM103a, half of the accuracy tests are failing.
+ TORCH_CHECK(sm_version == 100, "cutlass_mla_decode is only supported on compute capability 10.0, but found sm version ", sm_version);
+
auto in_dtype = q_nope.dtype();
at::cuda::CUDAGuard device_guard{(char)q_nope.get_device()};
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(q_nope.get_device());
@@ -259,7 +264,7 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches,
// Assumes device 0 when getting sm_count.
arguments.hw_info.sm_count =
sm_count <= 0 ? cutlass::KernelHardwareInfo::query_device_multiprocessor_count(/*device_id=*/0) : sm_count;
- arguments.split_kv = num_kv_splits;
+ arguments.split_kv = static_cast(num_kv_splits);
MlaSm100Type::Fmha::set_split_kv(arguments);
return MlaSm100Type::Fmha::get_workspace_size(arguments);
diff --git a/sgl-kernel/csrc/common_extension.cc b/sgl-kernel/csrc/common_extension.cc
index d11fe5b3a49..4f95c9138b9 100644
--- a/sgl-kernel/csrc/common_extension.cc
+++ b/sgl-kernel/csrc/common_extension.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include
#include "sgl_kernel_ops.h"
+
TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
/*
* From csrc/allreduce
@@ -89,10 +90,15 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def(
"apply_rope_pos_ids_cos_sin_cache(Tensor q, Tensor k, Tensor! q_rope, Tensor! k_rope, Tensor cos_sin_cache, "
- "Tensor pos_ids, bool interleave, int cuda_stream, "
+ "Tensor pos_ids, bool interleave, bool enable_pdl, int cuda_stream, "
"Tensor? v, Tensor!? k_buffer, Tensor!? v_buffer, Tensor? kv_cache_loc) -> ()");
m.impl("apply_rope_pos_ids_cos_sin_cache", torch::kCUDA, &apply_rope_pos_ids_cos_sin_cache);
+ m.def(
+ "downcast_fp8(Tensor k, Tensor v, Tensor k_out, Tensor v_out, Tensor k_scale, Tensor v_scale, Tensor loc, int "
+ "mult, int offset, int cuda_stream) -> ()");
+ m.impl("downcast_fp8", torch::kCUDA, &downcast_fp8);
+
/*
* From csrc/gemm
*/
@@ -151,6 +157,11 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"Tensor output_scale_offset_by_experts) -> ()");
m.impl("scaled_fp4_experts_quant", torch::kCUDA, &scaled_fp4_experts_quant);
+ m.def(
+ "silu_and_mul_scaled_fp4_experts_quant(Tensor! output, Tensor! output_scale,"
+ "Tensor input, Tensor input_global_scale, Tensor mask, bool use_silu_and_mul) -> ()");
+ m.impl("silu_and_mul_scaled_fp4_experts_quant", torch::kCUDA, &silu_and_mul_scaled_fp4_experts_quant);
+
m.def(
"cutlass_fp4_group_mm(Tensor! output, Tensor a, Tensor b,"
"Tensor a_blockscale, Tensor b_blockscale, Tensor alphas,"
@@ -161,7 +172,9 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def("dsv3_router_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()");
m.impl("dsv3_router_gemm", torch::kCUDA, &dsv3_router_gemm);
- // GPTQ related method
+ /*
+ * From csrc/gemm/gptq
+ */
m.def(
"gptq_marlin_gemm(Tensor! a, Tensor? c_or_none,"
"Tensor! b_q_weight, Tensor! b_scales, Tensor? global_scale_or_none,"
@@ -183,6 +196,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def("awq_marlin_repack(Tensor! b_q_weight, int size_k, int size_n, int num_bits) -> Tensor");
m.impl("awq_marlin_repack", torch::kCUDA, &awq_marlin_repack);
+
/*
* From csrc/moe
*/
@@ -200,18 +214,6 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"num_fused_shared_experts, float routed_scaling_factor, bool apply_routed_scaling_factor_on_output) -> "
"(Tensor[])");
m.impl("moe_fused_gate", torch::kCUDA, &moe_fused_gate);
- m.def(
- "ep_moe_pre_reorder(Tensor input, Tensor gateup_input, Tensor src2dst, Tensor topk_ids, Tensor "
- "a1_scales, int start_expert_id, int end_expert_id, int topk, bool use_per_token_if_dynamic) -> ()");
- m.impl("ep_moe_pre_reorder", torch::kCUDA, &ep_moe_pre_reorder);
- m.def(
- "ep_moe_silu_and_mul(Tensor gateup_output, Tensor down_input, Tensor reorder_topk_ids, Tensor scales, int "
- "start_expert_id, int end_expert_id) -> ()");
- m.impl("ep_moe_silu_and_mul", torch::kCUDA, &ep_moe_silu_and_mul);
- m.def(
- "ep_moe_post_reorder(Tensor down_output, Tensor output, Tensor src2dst, Tensor topk_ids, Tensor "
- "topk_weights, int start_expert_id, int end_expert_id, int topk) -> ()");
- m.impl("ep_moe_post_reorder", torch::kCUDA, &ep_moe_post_reorder);
m.def(
"fp8_blockwise_scaled_grouped_mm(Tensor output, Tensor a_ptrs, Tensor b_ptrs, Tensor out_ptrs, Tensor "
"a_scales_ptrs, Tensor b_scales_ptrs, Tensor a, Tensor b, Tensor scales_a, Tensor scales_b, Tensor "
@@ -229,6 +231,41 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def("apply_shuffle_mul_sum(Tensor input, Tensor output, Tensor permutation, Tensor? factors) -> ()");
m.impl("apply_shuffle_mul_sum", torch::kCUDA, &apply_shuffle_mul_sum);
+ /*
+ * From csrc/moe/marlin_moe_wna16
+ */
+ m.def(
+ "moe_wna16_marlin_gemm(Tensor! a, Tensor? c_or_none,"
+ "Tensor! b_q_weight, Tensor! b_scales, Tensor? b_zeros_or_none,"
+ "Tensor? g_idx_or_none, Tensor? perm_or_none, Tensor! workspace,"
+ "Tensor sorted_token_ids,"
+ "Tensor! expert_ids, Tensor! num_tokens_past_padded,"
+ "Tensor! topk_weights, int moe_block_size, int top_k, "
+ "bool mul_topk_weights, bool is_ep, int b_q_type_id,"
+ "int size_m, int size_n, int size_k,"
+ "bool is_k_full, bool use_atomic_add,"
+ "bool use_fp32_reduce, bool is_zp_float) -> Tensor");
+ m.impl("moe_wna16_marlin_gemm", torch::kCUDA, &moe_wna16_marlin_gemm);
+
+ /*
+ * From csrc/moe/cutlass_moe/w4a8
+ */
+ m.def(
+ "get_cutlass_w4a8_moe_mm_data(Tensor topk_ids, Tensor! expert_offsets, "
+ " Tensor! problem_sizes1, Tensor! problem_sizes2, "
+ " Tensor! input_permutation, "
+ " Tensor! output_permutation, int num_experts, "
+ " int n, int k) -> ()");
+ m.impl("get_cutlass_w4a8_moe_mm_data", torch::kCUDA, &get_cutlass_w4a8_moe_mm_data);
+
+ m.def(
+ "cutlass_w4a8_moe_mm(Tensor! d, Tensor a, Tensor b, "
+ " Tensor a_scales, Tensor b_scales, Tensor expert_offsets, "
+ " Tensor problem_sizes, Tensor a_strides, "
+ " Tensor b_strides, Tensor d_strides, Tensor s_strides,"
+ " int chunk_size, int topk) -> ()");
+ m.impl("cutlass_w4a8_moe_mm", torch::kCUDA, &cutlass_w4a8_moe_mm);
+
/*
* From csrc/speculative
*/
@@ -299,6 +336,14 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"transfer_kv_direct(Tensor[] src_layers, Tensor[] dst_layers, Tensor src_indices, Tensor dst_indices, int "
"page_size) -> ()");
m.impl("transfer_kv_direct", torch::kCUDA, &transfer_kv_direct);
+ m.def(
+ "transfer_kv_per_layer_direct_pf_lf(Tensor[] src_ptrs, Tensor[] dst_ptrs, Tensor src_indices, "
+ "Tensor dst_indices, int layer_id, int page_size)->() ");
+ m.impl("transfer_kv_per_layer_direct_pf_lf", torch::kCUDA, &transfer_kv_per_layer_direct_pf_lf);
+ m.def(
+ "transfer_kv_all_layer_direct_lf_pf(Tensor[] src_ptrs, Tensor[] dst_ptrs, Tensor src_indices, "
+ "Tensor dst_indices, int page_size) ->() ");
+ m.impl("transfer_kv_all_layer_direct_lf_pf", torch::kCUDA, &transfer_kv_all_layer_direct_lf_pf);
/*
* From csrc/memory
@@ -306,25 +351,6 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def("store_kv_cache(Tensor k_cache, Tensor v_cache, Tensor out_loc, Tensor k, Tensor v) -> ()");
m.impl("store_kv_cache", &store_kv_cache);
- /*
- * From csrc/moe/cutlass_moe/w4a8
- */
- m.def(
- "get_cutlass_w4a8_moe_mm_data(Tensor topk_ids, Tensor! expert_offsets, "
- " Tensor! problem_sizes1, Tensor! problem_sizes2, "
- " Tensor! input_permutation, "
- " Tensor! output_permutation, int num_experts, "
- " int n, int k) -> ()");
- m.impl("get_cutlass_w4a8_moe_mm_data", torch::kCUDA, &get_cutlass_w4a8_moe_mm_data);
-
- m.def(
- "cutlass_w4a8_moe_mm(Tensor! d, Tensor a, Tensor b, "
- " Tensor a_scales, Tensor b_scales, Tensor expert_offsets, "
- " Tensor problem_sizes, Tensor a_strides, "
- " Tensor b_strides, Tensor d_strides, Tensor s_strides,"
- " int chunk_size, int topk) -> ()");
- m.impl("cutlass_w4a8_moe_mm", torch::kCUDA, &cutlass_w4a8_moe_mm);
-
/*
* From FlashInfer
*/
@@ -358,19 +384,6 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.def("top_k_mask_logits(Tensor logits, Tensor mask_logits, Tensor? maybe_top_k_arr, int top_k_val) -> ()");
m.impl("top_k_mask_logits", torch::kCUDA, &top_k_mask_logits);
- m.def(
- "moe_wna16_marlin_gemm(Tensor! a, Tensor? c_or_none,"
- "Tensor! b_q_weight, Tensor! b_scales, Tensor? b_zeros_or_none,"
- "Tensor? g_idx_or_none, Tensor? perm_or_none, Tensor! workspace,"
- "Tensor sorted_token_ids,"
- "Tensor! expert_ids, Tensor! num_tokens_past_padded,"
- "Tensor! topk_weights, int moe_block_size, int top_k, "
- "bool mul_topk_weights, bool is_ep, int b_q_type_id,"
- "int size_m, int size_n, int size_k,"
- "bool is_full_k, bool use_atomic_add,"
- "bool use_fp32_reduce, bool is_zp_float) -> Tensor");
- m.impl("moe_wna16_marlin_gemm", torch::kCUDA, &moe_wna16_marlin_gemm);
-
/*
* From Sparse Flash Attention
*/
@@ -433,6 +446,36 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"qserve_w4a8_per_group_gemm(Tensor _in_feats, Tensor _kernel, Tensor _zeros, Tensor _scales_i8, Tensor _wscales, "
"Tensor _ascales, Tensor! _out_feats) -> ()");
m.impl("qserve_w4a8_per_group_gemm", torch::kCUDA, &qserve_w4a8_per_group_gemm);
+
+ m.def("copy_to_gpu_no_ce(Tensor input, Tensor! output) -> ()");
+ m.impl("copy_to_gpu_no_ce", torch::kCUDA, ©_to_gpu_no_ce);
+ m.def("concat_mla_k(Tensor! k, Tensor k_nope, Tensor k_rope) -> ()");
+ m.impl("concat_mla_k", torch::kCUDA, &concat_mla_k);
+
+ /*
+ * From csrc/mamba
+ */
+ m.def(
+ "causal_conv1d_update(Tensor! x,"
+ "Tensor! conv_state,"
+ "Tensor! weight,"
+ "Tensor? bias_,"
+ "bool silu_activation,"
+ "Tensor? cache_seqlens_,"
+ "Tensor? conv_state_indices,"
+ "int pad_slot_id) -> ()");
+ m.impl("causal_conv1d_update", torch::kCUDA, &causal_conv1d_update);
+
+ m.def(
+ "causal_conv1d_fwd(Tensor! x, Tensor! weight,"
+ "Tensor? bias_,"
+ "Tensor!? conv_states,"
+ "Tensor? query_start_loc,"
+ "Tensor? cache_indices,"
+ "Tensor? has_initial_state,"
+ "bool silu_activation,"
+ "int pad_slot_id) -> ()");
+ m.impl("causal_conv1d_fwd", torch::kCUDA, &causal_conv1d_fwd);
}
REGISTER_EXTENSION(common_ops)
diff --git a/sgl-kernel/csrc/common_extension_rocm.cc b/sgl-kernel/csrc/common_extension_rocm.cc
index a97f1733684..f4e14d0d514 100644
--- a/sgl-kernel/csrc/common_extension_rocm.cc
+++ b/sgl-kernel/csrc/common_extension_rocm.cc
@@ -33,6 +33,7 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
m.def("gelu_quick(Tensor! out, Tensor input) -> ()");
m.impl("gelu_quick", torch::kCUDA, &gelu_quick);
+
/*
* From csrc/allreduce
*/
@@ -120,6 +121,56 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
*/
m.def("apply_token_bitmask_inplace_cuda(Tensor logits, Tensor bitmask, Tensor? indices=None) -> ()");
m.impl("apply_token_bitmask_inplace_cuda", &ApplyTokenBitmaskInplace);
+
+ /*
+ * From csrc/kvcacheio
+ */
+ m.def(
+ "transfer_kv_per_layer(Tensor src_k, Tensor dst_k, Tensor src_v, Tensor dst_v, Tensor src_indices, Tensor "
+ "dst_indices, int item_size, int block_quota, int num_warps_per_block) -> ()");
+ m.impl("transfer_kv_per_layer", torch::kCUDA, &transfer_kv_per_layer);
+ m.def(
+ "transfer_kv_per_layer_pf_lf(Tensor src_k, Tensor dst_k, Tensor src_v, Tensor dst_v, Tensor src_indices, Tensor "
+ "dst_indices, int layer_id, int item_size, int src_layout_dim, int block_quota, int num_warps_per_block) -> ()");
+ m.impl("transfer_kv_per_layer_pf_lf", torch::kCUDA, &transfer_kv_per_layer_pf_lf);
+ m.def(
+ "transfer_kv_all_layer(Tensor src_k_layers, Tensor dst_k_layers, Tensor src_v_layers, Tensor dst_v_layers, "
+ "Tensor src_indices, Tensor dst_indices, int item_size, int num_layers, int block_quota, int "
+ "num_warps_per_block) -> ()");
+ m.impl("transfer_kv_all_layer", torch::kCUDA, &transfer_kv_all_layer);
+ m.def(
+ "transfer_kv_all_layer_lf_pf(Tensor src_k_layers, Tensor dst_k, Tensor src_v_layers, Tensor dst_v, "
+ "Tensor src_indices, Tensor dst_indices, int item_size, int dst_layout_dim, int num_layers, int block_quota, int "
+ "num_warps_per_block) -> ()");
+ m.impl("transfer_kv_all_layer_lf_pf", torch::kCUDA, &transfer_kv_all_layer_lf_pf);
+ m.def(
+ "transfer_kv_per_layer_mla(Tensor src, Tensor dst, Tensor src_indices, Tensor dst_indices, int item_size, int "
+ "block_quota, int num_warps_per_block) -> ()");
+ m.impl("transfer_kv_per_layer_mla", torch::kCUDA, &transfer_kv_per_layer_mla);
+ m.def(
+ "transfer_kv_per_layer_mla_pf_lf(Tensor src, Tensor dst, Tensor src_indices, Tensor dst_indices, int layer_id, "
+ "int item_size, int src_layout_dim, int block_quota, int num_warps_per_block) -> ()");
+ m.impl("transfer_kv_per_layer_mla_pf_lf", torch::kCUDA, &transfer_kv_per_layer_mla_pf_lf);
+ m.def(
+ "transfer_kv_all_layer_mla(Tensor src_layers, Tensor dst_layers, Tensor src_indices, Tensor dst_indices, int "
+ "item_size, int num_layers, int block_quota, int num_warps_per_block) -> ()");
+ m.impl("transfer_kv_all_layer_mla", torch::kCUDA, &transfer_kv_all_layer_mla);
+ m.def(
+ "transfer_kv_all_layer_mla_lf_pf(Tensor src_layers, Tensor dst, Tensor src_indices, Tensor dst_indices, "
+ "int item_size, int dst_layout_dim, int num_layers, int block_quota, int num_warps_per_block) -> ()");
+ m.impl("transfer_kv_all_layer_mla_lf_pf", torch::kCUDA, &transfer_kv_all_layer_mla_lf_pf);
+ m.def(
+ "transfer_kv_direct(Tensor[] src_layers, Tensor[] dst_layers, Tensor src_indices, Tensor dst_indices, int "
+ "page_size) -> ()");
+ m.impl("transfer_kv_direct", torch::kCUDA, &transfer_kv_direct);
+ m.def(
+ "transfer_kv_per_layer_direct_pf_lf(Tensor[] src_ptrs, Tensor[] dst_ptrs, Tensor src_indices, "
+ "Tensor dst_indices, int layer_id, int page_size)->() ");
+ m.impl("transfer_kv_per_layer_direct_pf_lf", torch::kCUDA, &transfer_kv_per_layer_direct_pf_lf);
+ m.def(
+ "transfer_kv_all_layer_direct_lf_pf(Tensor[] src_ptrs, Tensor[] dst_ptrs, Tensor src_indices, "
+ "Tensor dst_indices, int page_size) ->() ");
+ m.impl("transfer_kv_all_layer_direct_lf_pf", torch::kCUDA, &transfer_kv_all_layer_direct_lf_pf);
}
REGISTER_EXTENSION(common_ops)
diff --git a/sgl-kernel/csrc/cpu/activation.cpp b/sgl-kernel/csrc/cpu/activation.cpp
index debf5b2447e..70756776b91 100644
--- a/sgl-kernel/csrc/cpu/activation.cpp
+++ b/sgl-kernel/csrc/cpu/activation.cpp
@@ -77,3 +77,59 @@ at::Tensor silu_and_mul_cpu(at::Tensor& input) {
});
return out;
}
+
+at::Tensor gelu_tanh_and_mul_cpu(const at::Tensor& input) {
+ RECORD_FUNCTION("sgl-kernel::gelu_tanh_and_mul_cpu", std::vector({input}));
+ auto sizes = input.sizes().vec();
+ int64_t last_dim = input.ndimension() - 1;
+ int64_t d = sizes[last_dim] / 2;
+ sizes[last_dim] = d;
+ int64_t num_tokens = input.numel() / input.size(-1);
+ at::Tensor out = at::empty(sizes, input.options());
+ const float sqrt_2_div_pi = std::sqrt(2.f / M_PI);
+
+ AT_DISPATCH_REDUCED_FLOATING_TYPES(input.scalar_type(), "gelu_tanh_and_mul", [&] {
+ using Vec = at::vec::Vectorized;
+ act_and_mul_kernel_impl(
+ out.data_ptr(),
+ input.data_ptr(),
+ num_tokens,
+ d,
+ [sqrt_2_div_pi](float x) {
+ float x3 = x * x * x;
+ float tanh_arg = sqrt_2_div_pi * (x + 0.044715f * x3);
+ return 0.5f * x * (1.f + std::tanh(tanh_arg));
+ },
+ [sqrt_2_div_pi](Vec x) {
+ Vec x3 = x * x * x;
+ Vec tanh_arg = Vec(sqrt_2_div_pi) * (x + Vec(0.044715f) * x3);
+ return Vec(0.5f) * x * (Vec(1.f) + tanh_arg.tanh());
+ });
+ });
+
+ return out;
+}
+
+at::Tensor gelu_and_mul_cpu(const at::Tensor& input) {
+ RECORD_FUNCTION("sgl-kernel::gelu_and_mul_cpu", std::vector({input}));
+ auto sizes = input.sizes().vec();
+ int64_t last_dim = input.ndimension() - 1;
+ int64_t d = sizes[last_dim] / 2;
+ sizes[last_dim] = d;
+ int64_t num_tokens = input.numel() / input.size(-1);
+ at::Tensor out = at::empty(sizes, input.options());
+
+ AT_DISPATCH_REDUCED_FLOATING_TYPES(input.scalar_type(), "gelu_and_mul", [&] {
+ using Vec = at::vec::Vectorized;
+ const float inv_sqrt2 = 1.0f / std::sqrt(2.0f);
+ act_and_mul_kernel_impl(
+ out.data_ptr(),
+ input.data_ptr(),
+ num_tokens,
+ d,
+ [inv_sqrt2](float x) { return 0.5f * x * (1.f + std::erf(x * inv_sqrt2)); },
+ [inv_sqrt2](Vec x) { return Vec(0.5f) * x * (Vec(1.f) + (x * Vec(inv_sqrt2)).erf()); });
+ });
+
+ return out;
+}
diff --git a/sgl-kernel/csrc/cpu/common.h b/sgl-kernel/csrc/cpu/common.h
index 1bf45ee4bc8..0fb13260768 100644
--- a/sgl-kernel/csrc/cpu/common.h
+++ b/sgl-kernel/csrc/cpu/common.h
@@ -105,7 +105,19 @@ namespace {
#define CHECK_EQ(a, b) TORCH_CHECK((a) == (b), "CHECK_EQ(" #a ", " #b ") failed. ", a, " vs ", b)
-// parallel routines
+// [NB] Parallel Routines
+//
+// * at::parallel_for - applies for most of generic use cases, this will be compiled
+// against openmp in default torch release.
+//
+// * parallel_for - same function as above, can choose payload partition scheme in
+// balance211.
+//
+// * parallel_2d - parallel for 2 dimensions, used in GEMM, etc.
+// this one will do payload balance across 2 dimensions.
+//
+
+// grain size for each thread
constexpr int GRAIN_SIZE = 1024;
template ::value, int>::type = 0>
@@ -113,6 +125,17 @@ inline T div_up(T x, T y) {
return (x + y - 1) / y;
}
+// you can only use at::get_thread_num() with at::parallel_for()
+// as it is lazy initialized, otherwise it will always return 0.
+inline int get_thread_num() {
+#if defined(_OPENMP)
+ return omp_get_thread_num();
+#else
+ return 0;
+#endif
+}
+
+// balance payload across each thread
template
inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
#if 0
@@ -153,6 +176,100 @@ inline void parallel_for(int n, const func_t& f) {
#endif
}
+// for 1d parallel, use `actual_nth`
+// for 2d parallel, use even nths, e.g. 43->42
+int inline adjust_num_threads(int m) {
+ int actual_nth = at::get_num_threads();
+ if (m == 1) {
+ return actual_nth;
+ }
+ return std::max(1, (actual_nth >> 1) * 2);
+}
+
+template
+inline void parallel_2d(int m, int n, const func_t& f) {
+ // make sure we have even num_threads
+ int nth = adjust_num_threads(m);
+
+ // [NOTE] thread blocking:
+ //
+ // 1) prefer square block per thread
+ // 2) use even number of CPU cores
+ // 3) use all `num_threads` cores
+ //
+ // we have:
+ // TM * TN = T
+ // BM / TM = BN / TN
+ // then:
+ // TM = ((BM / BN) * T) ^ 0.5
+ //
+ float r = float(m) / n;
+ int nth_m = std::ceil(std::sqrt(r * nth));
+ int nth_n = 1;
+ for (; nth_m > 0; --nth_m) {
+ nth_n = nth / nth_m;
+ if (nth_m * nth_n == nth) {
+ break;
+ }
+ }
+
+#if defined(_OPENMP)
+#pragma omp parallel num_threads(nth)
+ {
+ int ith = omp_get_thread_num();
+ int ith_m = ith / nth_n;
+ int ith_n = ith % nth_n;
+
+ int thread_block_m = div_up(m, nth_m);
+ int thread_block_n = div_up(n, nth_n);
+
+ int begin_m = ith_m * thread_block_m;
+ int end_m = std::min(m, begin_m + thread_block_m);
+ int begin_n = ith_n * thread_block_n;
+ int end_n = std::min(n, begin_n + thread_block_n);
+
+ f(begin_m, end_m, begin_n, end_n);
+ }
+#else
+ f(0, m, 0, n);
+#endif
+}
+
+// limit max cache blocks
+// when we need to do pre-unpack for weights, e.g. fp8
+#define MAX_CACHE_BLOCK_SIZE 4
+
+template
+inline int get_cache_blocks(int chunk_size) {
+ // L2 2MB and ratio of 50%
+ const int L2_size = 2048 * 1024 >> 1;
+ return std::max(1, int(L2_size / (chunk_size * sizeof(T))));
+}
+
+template <>
+inline int get_cache_blocks(int chunk_size) {
+ // fp8 uses bf16 as accumulate type
+ int cache_block_size = get_cache_blocks(chunk_size);
+ return std::min(MAX_CACHE_BLOCK_SIZE, cache_block_size);
+}
+
+// 2d sequential loop in range : [mb0, mb1), [nb0, nb1)
+template
+inline void loop_2d(int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1, int64_t chunk_size, const func_t& f) {
+ // get number of blocks for L2 in most inner loop
+ int64_t cache_blocks_nb = get_cache_blocks(chunk_size);
+
+ // loop order: [NB / cache_blocks_nb, MB, cache_blocks_nb]
+ // TODO: implement reverse order of [MB / cache_blocks_mb, NB, cache_blocks_mb]
+ for (int64_t nbb = nb0; nbb < nb1; nbb += cache_blocks_nb) {
+ for (int64_t mb = mb0; mb < mb1; ++mb) {
+ for (int64_t nb = nbb; nb < std::min(nbb + cache_blocks_nb, nb1); ++nb) {
+ f(mb, nb, nb - nbb);
+ }
+ }
+ }
+}
+
// data indexing for dimension collapse
template
inline T data_index_init(T offset) {
diff --git a/sgl-kernel/csrc/cpu/gemm.cpp b/sgl-kernel/csrc/cpu/gemm.cpp
index 2cce8ddac5a..48655b9f702 100644
--- a/sgl-kernel/csrc/cpu/gemm.cpp
+++ b/sgl-kernel/csrc/cpu/gemm.cpp
@@ -254,7 +254,7 @@ void tinygemm_kernel(
return;
}
- // pattern: 1-4-16
+ // pattern: 1-4-16, N = 16, 32, 48, 64
constexpr int64_t BLOCK_M = 4;
constexpr int64_t BLOCK_N = 64;
const int64_t MB = div_up(M, BLOCK_M);
@@ -268,35 +268,59 @@ void tinygemm_kernel(
switch (mb_size << 4 | nb_size >> 4) {
// mb_size = 1
+ case 0x11:
+ LAUNCH_TINYGEMM_KERNEL_NN(1, 16);
+ break;
case 0x12:
LAUNCH_TINYGEMM_KERNEL_NN(1, 32);
break;
+ case 0x13:
+ LAUNCH_TINYGEMM_KERNEL_NN(1, 48);
+ break;
case 0x14:
LAUNCH_TINYGEMM_KERNEL_NN(1, 64);
break;
// mb_size = 2
+ case 0x21:
+ LAUNCH_TINYGEMM_KERNEL_NN(2, 16);
+ break;
case 0x22:
LAUNCH_TINYGEMM_KERNEL_NN(2, 32);
break;
+ case 0x23:
+ LAUNCH_TINYGEMM_KERNEL_NN(2, 48);
+ break;
case 0x24:
LAUNCH_TINYGEMM_KERNEL_NN(2, 64);
break;
// mb_size = 3
+ case 0x31:
+ LAUNCH_TINYGEMM_KERNEL_NN(3, 16);
+ break;
case 0x32:
LAUNCH_TINYGEMM_KERNEL_NN(3, 32);
break;
+ case 0x33:
+ LAUNCH_TINYGEMM_KERNEL_NN(3, 48);
+ break;
case 0x34:
LAUNCH_TINYGEMM_KERNEL_NN(3, 64);
break;
// mb_size = 4
+ case 0x41:
+ LAUNCH_TINYGEMM_KERNEL_NN(4, 16);
+ break;
case 0x42:
LAUNCH_TINYGEMM_KERNEL_NN(4, 32);
break;
+ case 0x43:
+ LAUNCH_TINYGEMM_KERNEL_NN(4, 48);
+ break;
case 0x44:
LAUNCH_TINYGEMM_KERNEL_NN(4, 64);
break;
default:
- TORCH_CHECK(false, "Unexpected block size, ", mb_size, "x", "nb_size");
+ TORCH_CHECK(false, "Unexpected block size, ", mb_size, " x ", nb_size);
}
}
}
@@ -318,20 +342,15 @@ void weight_packed_linear_kernel_impl(
const int64_t MB = div_up(M, BLOCK_M);
const int64_t NB = div_up(N, BLOCK_N);
- // use avx512-bf16 when a) M is small; b) dtype is bfloat16, otherwise use amx c) N is small
- const bool use_brgemm = (M > 4) || (!std::is_same_v) || (N < 64);
+ const bool use_brgemm = can_use_brgemm(M);
// parallel on [MB, NB]
AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
- int64_t mb{0}, nb{0};
- data_index_init(begin, mb, MB, nb, NB);
-
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// for brgemm, use float32 for accumulate
alignas(64) float Ctmp[BLOCK_M * BLOCK_N];
- for (int64_t i = begin; i < end; ++i) {
- UNUSED(i);
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t mb_start = mb * BLOCK_M;
int64_t mb_size = std::min(M - mb_start, BLOCK_M);
int64_t nb_start = nb * BLOCK_N;
@@ -350,10 +369,7 @@ void weight_packed_linear_kernel_impl(
/* ldb */ nb_size,
/* ldc */ out_strideM,
/* brg */ use_brgemm);
-
- // move to the next index
- data_index_step(mb, MB, nb, NB);
- }
+ });
if (use_brgemm) {
at::native::cpublas::brgemm_release();
diff --git a/sgl-kernel/csrc/cpu/gemm.h b/sgl-kernel/csrc/cpu/gemm.h
index eabbfb7c8fa..6a16a298554 100644
--- a/sgl-kernel/csrc/cpu/gemm.h
+++ b/sgl-kernel/csrc/cpu/gemm.h
@@ -27,10 +27,10 @@ template <>
inline bool can_use_brgemm(int M) {
return true;
}
-// TODO: add u8s8 brgemm, this requires PyTorch 2.7
+// this requires PyTorch 2.7 or above
template <>
inline bool can_use_brgemm(int M) {
- return false;
+ return M > 4;
}
template <>
@@ -198,4 +198,5 @@ void tinygemm_kernel(
int64_t ldb,
int64_t ldc,
bool brg,
- int64_t block_size_K);
+ int64_t block_size_K,
+ bool do_unpack = true);
diff --git a/sgl-kernel/csrc/cpu/gemm_fp8.cpp b/sgl-kernel/csrc/cpu/gemm_fp8.cpp
index 3bba4078636..008f8329846 100644
--- a/sgl-kernel/csrc/cpu/gemm_fp8.cpp
+++ b/sgl-kernel/csrc/cpu/gemm_fp8.cpp
@@ -2,9 +2,6 @@
#include "gemm.h"
#include "vec.h"
-// we use 4x32 for BLOCK_M
-#define BLOCK_SIZE_M_SCALE 4
-
namespace {
template
@@ -250,7 +247,8 @@ struct brgemm {
int K,
int lda,
int ldb,
- int ldc) {
+ int ldc,
+ bool do_unpack = true) {
TORCH_CHECK(false, "struct brgemm: primary template not implemented!");
}
};
@@ -270,17 +268,20 @@ struct brgemm {
int K,
int lda,
int ldb,
- int ldc) {
+ int ldc,
+ bool do_unpack = true) {
constexpr int BLOCK_N = block_size_n();
// [K, BLOCK_N] -> [K / 2, BLOCK_N * 2]
const int ldb_tmp = BLOCK_N;
- for (int k = 0; k < K; k += BLOCK_K) {
- int kb_size = std::min(BLOCK_K, K - k);
+ if (do_unpack) {
+ for (int k = 0; k < K; k += BLOCK_K) {
+ int kb_size = std::min(BLOCK_K, K - k);
- int idx = k >> 7; // k / BLOCK_K where BLOCK_K = 128
- unpack_B(Btmp + k * ldb_tmp, B + k * ldb, N, kb_size, ldb, ldb_tmp, scale[idx]);
+ int idx = k >> 7; // k / BLOCK_K where BLOCK_K = 128
+ unpack_B(Btmp + k * ldb_tmp, B + k * ldb, N, kb_size, ldb, ldb_tmp, scale[idx]);
+ }
}
at::native::cpublas::brgemm(M, N, K, lda, ldb_tmp, BLOCK_N, /* add_C */ false, A, Btmp, Ctmp);
@@ -312,9 +313,11 @@ void tinygemm_kernel(
int64_t ldb,
int64_t ldc,
bool brg,
- int64_t block_size_K) {
+ int64_t block_size_K,
+ bool do_unpack = true) {
if (brg) {
- brgemm::apply(A, B, C, Btmp, Ctmp, bias, scale, M, N, K, lda, ldb, ldc);
+ brgemm::apply(
+ A, B, C, Btmp, Ctmp, bias, scale, M, N, K, lda, ldb, ldc, do_unpack);
return;
}
@@ -366,7 +369,7 @@ void fp8_scaled_mm_kernel_impl(
int64_t block_size_N,
int64_t block_size_K,
int64_t buffer_size_per_thread) {
- constexpr int64_t BLOCK_M = block_size_m() * BLOCK_SIZE_M_SCALE;
+ constexpr int64_t BLOCK_M = block_size_m();
constexpr int64_t BLOCK_N = block_size_n();
const int64_t MB = div_up(M, BLOCK_M);
const int64_t NB = div_up(N, BLOCK_N);
@@ -378,16 +381,12 @@ void fp8_scaled_mm_kernel_impl(
// parallel on [MB, NB]
AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
- int64_t mb{0}, nb{0};
- data_index_init(begin, mb, MB, nb, NB);
-
- int tid = at::get_thread_num();
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+ int tid = get_thread_num();
scalar_t* __restrict__ Btmp = buffer + tid * buffer_size_per_thread;
- float* __restrict__ Ctmp = (float*)((void*)(Btmp + BLOCK_N * K));
+ float* __restrict__ Ctmp = (float*)((void*)(Btmp + MAX_CACHE_BLOCK_SIZE * BLOCK_N * K));
- for (int64_t i = begin; i < end; ++i) {
- UNUSED(i);
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
const float* scale_ptr = scales2 + (nb / blocks_n_per_group) * scale_size_K;
int64_t mb_start = mb * BLOCK_M;
@@ -395,11 +394,14 @@ void fp8_scaled_mm_kernel_impl(
int64_t nb_start = nb * BLOCK_N;
int64_t nb_size = std::min(N - nb_start, BLOCK_N);
+ // only do unpacking for the first row
+ bool do_unpack = (mb == mb0);
+
tinygemm_kernel(
/* A */ mat1 + mb_start * mat1_strideM,
/* B */ mat2 + nb_start * K, // nb * BLOCK_N * K
/* C */ out + mb_start * out_strideM + nb_start,
- /* Btmp */ Btmp,
+ /* Btmp */ Btmp + nb_offset * BLOCK_N * K,
/* Ctmp */ Ctmp,
/* scale */ scale_ptr,
/* bias */ bias + nb_start,
@@ -410,11 +412,9 @@ void fp8_scaled_mm_kernel_impl(
/* ldb */ nb_size,
/* ldc */ out_strideM,
/* brg */ use_brgemm,
- /* block_size_K */ block_size_K);
-
- // move to the next index
- data_index_step(mb, MB, nb, NB);
- }
+ /* block_size_K */ block_size_K,
+ /* do_unpack */ do_unpack);
+ });
if (use_brgemm) {
at::native::cpublas::brgemm_release();
@@ -441,8 +441,10 @@ void tinygemm_kernel(
int64_t ldb,
int64_t ldc,
bool brg,
- int64_t block_size_K) {
- tinygemm_kernel(A, B, C, Btmp, Ctmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K);
+ int64_t block_size_K,
+ bool do_unpack) {
+ tinygemm_kernel(
+ A, B, C, Btmp, Ctmp, scale, nullptr, M, N, K, lda, ldb, ldc, brg, block_size_K, do_unpack);
}
#define INSTANTIATE_TINYGEMM_TEMPLATE(TYPE) \
@@ -460,7 +462,8 @@ void tinygemm_kernel(
int64_t ldb, \
int64_t ldc, \
bool brg, \
- int64_t block_size_K)
+ int64_t block_size_K, \
+ bool do_unpack)
INSTANTIATE_TINYGEMM_TEMPLATE(at::BFloat16);
INSTANTIATE_TINYGEMM_TEMPLATE(at::Half);
@@ -495,7 +498,7 @@ at::Tensor fp8_scaled_mm_cpu(
int64_t block_size_N = block_size[0];
int64_t block_size_K = block_size[1];
- constexpr int64_t BLOCK_M = block_size_m() * BLOCK_SIZE_M_SCALE;
+ constexpr int64_t BLOCK_M = block_size_m();
constexpr int64_t BLOCK_N = block_size_n();
TORCH_CHECK(block_size_N % BLOCK_N == 0, "fp8_scaled_mm_cpu: expect block_size_N to be multiples of BLOCK_N");
TORCH_CHECK(block_size_K == BLOCK_K, "fp8_scaled_mm_cpu: expect block_size_K equals to BLOCK_K");
@@ -523,7 +526,7 @@ at::Tensor fp8_scaled_mm_cpu(
// Btmp : [T, BLOCK_N * K]
// Ctmp : [T, BLOCK_M * BLOCK_N]
int num_threads = at::get_num_threads();
- int64_t size_per_thread = BLOCK_N * K + BLOCK_M * BLOCK_N * 2;
+ int64_t size_per_thread = MAX_CACHE_BLOCK_SIZE * BLOCK_N * K + BLOCK_M * BLOCK_N * 2;
auto buffer = at::empty({num_threads, size_per_thread}, mat1.options());
AT_DISPATCH_REDUCED_FLOATING_TYPES(out_dtype, "fp8_scaled_mm_kernel_impl", [&] {
diff --git a/sgl-kernel/csrc/cpu/gemm_int8.cpp b/sgl-kernel/csrc/cpu/gemm_int8.cpp
index f0f013cd167..cb6146607f1 100644
--- a/sgl-kernel/csrc/cpu/gemm_int8.cpp
+++ b/sgl-kernel/csrc/cpu/gemm_int8.cpp
@@ -4,6 +4,61 @@
namespace {
+template
+struct scale_C {
+ static inline void apply(
+ scalar_t* __restrict__ C,
+ const int32_t* __restrict__ Ctmp,
+ const int32_t* __restrict__ Bcomp,
+ const float* __restrict__ bias,
+ float As,
+ const float* __restrict__ Bs) {
+ TORCH_CHECK(false, "scale_C: scalar path not implemented!");
+ }
+};
+
+#if defined(CPU_CAPABILITY_AVX512)
+template
+struct scale_C {
+ static inline void apply(
+ at::BFloat16* __restrict__ C,
+ const int32_t* __restrict__ Ctmp,
+ const int32_t* __restrict__ Bcomp,
+ const float* __restrict__ bias,
+ float As,
+ const float* __restrict__ Bs) {
+ constexpr int COLS = BLOCK_N / 16;
+ static_assert(COLS % 2 == 0);
+
+ __m512 vc[COLS];
+ __m512 vd0 = _mm512_set1_ps(As);
+
+ auto compute = [&](auto col) {
+ __m512 vd1 = _mm512_loadu_ps(Bs + col * 16);
+ __m512i vcomp = _mm512_loadu_si512(Bcomp + col * 16);
+ __m512i vc32 = _mm512_loadu_si512(Ctmp + col * 16);
+ vc[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32, vcomp));
+ if constexpr (has_bias) {
+ __m512 vbias = _mm512_loadu_ps(bias + col * 16);
+ vc[col] = _mm512_fmadd_ps(_mm512_mul_ps(vc[col], vd0), vd1, vbias);
+ } else {
+ vc[col] = _mm512_mul_ps(_mm512_mul_ps(vc[col], vd0), vd1);
+ }
+ };
+ Unroll{}(compute);
+
+ auto storec = [&](auto col) {
+ // for COLS = 2, 4 use 512bit store
+ if constexpr (col % 2 == 0) {
+ _mm512_storeu_si512(
+ reinterpret_cast<__m512i*>((C + col * 16)), (__m512i)(_mm512_cvtne2ps_pbh(vc[col + 1], vc[col + 0])));
+ }
+ };
+ Unroll{}(storec);
+ }
+};
+#endif
+
template
struct tinygemm_kernel_nn {
static inline void apply(
@@ -169,6 +224,17 @@ void tinygemm_kernel(
// B compensation
const int32_t* Bcomp = reinterpret_cast(B + block_size_n() * K);
+ if (brg) {
+ constexpr int BLOCK_N = block_size_n();
+ at::native::cpublas::brgemm(M, N, K, lda, ldb, BLOCK_N, /* add_C */ false, A, B, Ctmp);
+
+ // apply compensation and scale
+ for (int64_t m = 0; m < M; ++m) {
+ scale_C::apply(C + m * ldc, Ctmp + m * BLOCK_N, Bcomp, bias, As[m], Bs);
+ }
+ return;
+ }
+
// pattern: 1-4-16
constexpr int64_t BLOCK_M = 4;
constexpr int64_t BLOCK_N = 64;
@@ -233,22 +299,17 @@ void int8_scaled_mm_kernel_impl(
const int64_t MB = div_up(M, BLOCK_M);
const int64_t NB = div_up(N, BLOCK_N);
- // TODO: brgemm u8s8 depends on PyTorch 2.7 release.
- const bool use_brgemm = false;
+ const bool use_brgemm = can_use_brgemm(M);
// K + 4 after compensation
const int64_t packed_row_size = get_row_size(K);
AT_DISPATCH_BOOL(bias != nullptr, has_bias, [&] {
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
- int64_t mb{0}, nb{0};
- data_index_init(begin, mb, MB, nb, NB);
-
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// for brgemm, use int32_t for accumulate
alignas(64) int32_t Ctmp[BLOCK_M * BLOCK_N];
- for (int i = begin; i < end; ++i) {
- UNUSED(i);
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int mb_start = mb * BLOCK_M;
int mb_size = std::min(M - mb_start, BLOCK_M);
int nb_start = nb * BLOCK_N;
@@ -269,10 +330,7 @@ void int8_scaled_mm_kernel_impl(
/* ldb */ nb_size,
/* ldc */ N,
/* brg */ use_brgemm);
-
- // move to the next index
- data_index_step(mb, MB, nb, NB);
- }
+ });
if (use_brgemm) {
at::native::cpublas::brgemm_release();
diff --git a/sgl-kernel/csrc/cpu/moe.cpp b/sgl-kernel/csrc/cpu/moe.cpp
index 88d84c83022..c3d66cec7f9 100644
--- a/sgl-kernel/csrc/cpu/moe.cpp
+++ b/sgl-kernel/csrc/cpu/moe.cpp
@@ -579,36 +579,31 @@ void fused_experts_kernel_impl(
const int64_t stride_e = 2 * N * K;
const int64_t stride_n = K;
+ int64_t avg_M = std::max(int64_t(1), M * topk / E);
+ const bool use_brgemm = can_use_brgemm(avg_M);
+
// here we only parallel on half of 2N to fuse silu_and_mul with gemm
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
+ int tid = get_thread_num();
scalar_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
float* __restrict__ C0 = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
float* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
- bool is_brgemm_used = false;
-
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB;
- int64_t nb = i % NB;
-
- // nb0 from top half and nb1 from bottom half
- int64_t nb0 = nb, nb1 = nb + NB;
- int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+ // nb_upper from top half and nb_lower from bottom half
+ int64_t nb_upper = nb, nb_lower = nb + NB;
+ int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
// B shape [K, n_size] in vnni format
int32_t expert_id = expert_ids[mb];
- const scalar_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb0 * BLOCK_N * stride_n;
- const scalar_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb1 * BLOCK_N * stride_n;
+ const scalar_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb_upper * BLOCK_N * stride_n;
+ const scalar_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb_lower * BLOCK_N * stride_n;
// 1.a load A
const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
int64_t m_size = offsets[mb + 1] - offsets[mb];
- const bool use_brgemm = can_use_brgemm(m_size);
- is_brgemm_used = is_brgemm_used || use_brgemm;
-
for (int64_t m = 0; m < m_size; ++m) {
int32_t index = A_ids[m] / topk;
copy_stub(A + m * K, input + index * K, K);
@@ -659,9 +654,9 @@ void fused_experts_kernel_impl(
/* ldb */ n_size,
/* ldc */ N);
}
- }
+ });
- if (is_brgemm_used) {
+ if (use_brgemm) {
at::native::cpublas::brgemm_release();
}
});
@@ -676,24 +671,16 @@ void fused_experts_kernel_impl(
const int64_t stride_oc = IC;
// parallel on [MB2, NB2]
- at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
+ int tid = get_thread_num();
// we won't be using C1 for gemm2
float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
- bool is_brgemm_used = false;
-
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB2;
- int64_t nb = i % NB2;
-
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = offsets[mb + 1] - offsets[mb];
int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
- const bool use_brgemm = can_use_brgemm(m_size);
- is_brgemm_used = is_brgemm_used || use_brgemm;
-
// A ptr from ic1 of [M * topk, N] in sorted order
// so as to avoid copy A to tmp buffer again
const scalar_t* __restrict__ A = ic1 + offsets[mb] * N;
@@ -736,9 +723,9 @@ void fused_experts_kernel_impl(
float weight = topk_weights[index];
copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C + m * BLOCK_N, weight, n_size);
}
- }
+ });
- if (is_brgemm_used) {
+ if (use_brgemm) {
at::native::cpublas::brgemm_release();
}
});
@@ -776,36 +763,27 @@ void shared_expert_kernel_impl(
TORCH_CHECK(N % BLOCK_N == 0, "Fixme when N is not multiples of ", BLOCK_N);
const int64_t stride_n = K;
+ const bool use_brgemm = can_use_brgemm(M);
+
// here we only parallel on half of 2N to fuse silu_and_mul with gemm
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
+ int tid = get_thread_num();
float* __restrict__ C0 = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
float* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
- bool is_brgemm_used = false;
-
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB;
- int64_t nb = i % NB;
-
- // nb0 from top half and nb1 from bottom half
- int64_t nb0 = nb, nb1 = nb + NB;
- int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+ // nb_upper from top half and nb_lower from bottom half
+ int64_t nb_upper = nb, nb_lower = nb + NB;
+ int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
- // int64_t mb_start = mb * BLOCK_M;
- // int64_t mb_size = std::min(M - mb_start, BLOCK_M);
-
// A shape [m_size, K]
const scalar_t* A = input + mb * BLOCK_M * K;
// B shape [K, n_size] in vnni format
- const scalar_t* __restrict__ B0 = packed_w1 + nb0 * BLOCK_N * stride_n;
- const scalar_t* __restrict__ B1 = packed_w1 + nb1 * BLOCK_N * stride_n;
-
- const bool use_brgemm = can_use_brgemm(m_size);
- is_brgemm_used = is_brgemm_used || use_brgemm;
+ const scalar_t* __restrict__ B0 = packed_w1 + nb_upper * BLOCK_N * stride_n;
+ const scalar_t* __restrict__ B1 = packed_w1 + nb_lower * BLOCK_N * stride_n;
if (use_brgemm) {
// 1.b gemm: C0 = A @ B0
@@ -850,9 +828,9 @@ void shared_expert_kernel_impl(
/* ldb */ n_size,
/* ldc */ N);
}
- }
+ });
- if (is_brgemm_used) {
+ if (use_brgemm) {
at::native::cpublas::brgemm_release();
}
});
@@ -866,24 +844,16 @@ void shared_expert_kernel_impl(
const int64_t stride_oc = IC;
// parallel on [MB2, NB2]
- at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
+ int tid = get_thread_num();
// we won't be using C1 for gemm2
float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
- bool is_brgemm_used = false;
-
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB2;
- int64_t nb = i % NB2;
-
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
- const bool use_brgemm = can_use_brgemm(m_size);
- is_brgemm_used = is_brgemm_used || use_brgemm;
-
// A shape [m_size, IC]
const scalar_t* __restrict__ A = ic1 + mb * BLOCK_M * N;
@@ -922,9 +892,9 @@ void shared_expert_kernel_impl(
for (int64_t m = 0; m < m_size; ++m) {
add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out + m * K, routed_scaling_factor, n_size);
}
- }
+ });
- if (is_brgemm_used) {
+ if (use_brgemm) {
at::native::cpublas::brgemm_release();
}
});
@@ -1086,7 +1056,7 @@ at::Tensor fused_experts_cpu(
//
// for fp8 w8a16:
// 7. intermediate_cache0 : [M * topk, 2N]
- // 8. B_tmp : [T, BLOCK_N, std::max(K, N)]
+ // 8. B_tmp : [T, MAX_CACHE_BLOCK_SIZE, BLOCK_N, std::max(K, N)]
//
int64_t buffer_size_nbytes = M * topk * N * 2 + M * topk * K * 2 +
num_threads * BLOCK_M * K * (use_int8_w8a8 ? 1 : 2) +
@@ -1096,7 +1066,7 @@ at::Tensor fused_experts_cpu(
buffer_size_nbytes += std::max(M * K, M * topk * N) + M * topk * sizeof(float);
}
if (use_fp8_w8a16) {
- buffer_size_nbytes += M * topk * 2 * N * 2 + num_threads * BLOCK_N * std::max(K, N) * 2;
+ buffer_size_nbytes += M * topk * 2 * N * 2 + num_threads * MAX_CACHE_BLOCK_SIZE * BLOCK_N * std::max(K, N) * 2;
}
auto buffer2 = at::empty({buffer_size_nbytes}, hidden_states.options().dtype(at::kChar));
@@ -1268,7 +1238,7 @@ at::Tensor shared_expert_cpu(
//
// for fp8 w8a16:
// 5. intermediate_cache0 : [M, 2N]
- // 6. B_tmp: [T, BLOCK_M, max(K, N)]
+ // 6. B_tmp: [T, MAX_CACHE_BLOCK_SIZE, BLOCK_M, max(K, N)]
//
int num_threads = at::get_num_threads();
int64_t buffer_size_nbytes = M * N * 2 + num_threads * 2 * BLOCK_M * BLOCK_N * sizeof(float);
@@ -1277,7 +1247,7 @@ at::Tensor shared_expert_cpu(
buffer_size_nbytes += std::max(M * K, M * N) + M * sizeof(float);
}
if (use_fp8_w8a16) {
- buffer_size_nbytes += M * 2 * N * 2 + num_threads * BLOCK_M * std::max(K, N) * 2;
+ buffer_size_nbytes += M * 2 * N * 2 + num_threads * MAX_CACHE_BLOCK_SIZE * BLOCK_M * std::max(K, N) * 2;
}
auto buffer = at::empty({buffer_size_nbytes}, hidden_states.options().dtype(at::kChar));
diff --git a/sgl-kernel/csrc/cpu/moe_fp8.cpp b/sgl-kernel/csrc/cpu/moe_fp8.cpp
index cb891fca28a..281c0089713 100644
--- a/sgl-kernel/csrc/cpu/moe_fp8.cpp
+++ b/sgl-kernel/csrc/cpu/moe_fp8.cpp
@@ -174,18 +174,18 @@ void fused_experts_fp8_kernel_impl(
const int64_t stride_e = 2 * N * K;
const int64_t stride_n = K;
+ int64_t avg_M = std::max(int64_t(1), M * topk / E);
+ const bool use_brgemm = can_use_brgemm(avg_M);
+
+ int64_t B_tmp_size_per_thread = MAX_CACHE_BLOCK_SIZE * BLOCK_N * std::max(K, N);
+
// here we only parallel on half of 2N to fuse silu_and_mul with gemm
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
+ int tid = get_thread_num();
scalar_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
- bool is_brgemm_used = false;
-
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB;
- int64_t nb = i % NB;
-
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t n_size = std::min(2 * N - nb * BLOCK_N, BLOCK_N);
// B shape [K, n_size] in vnni format
@@ -194,13 +194,14 @@ void fused_experts_fp8_kernel_impl(
const float* __restrict__ Bs =
w1s + expert_id * scale_size_N * scale_size_K + (nb / blocks_n_per_group) * scale_size_K;
+ // do unpacking for the first row or a new expert
+ int32_t pre_expert_id = mb == 0 ? -1 : expert_ids[mb - 1];
+ bool do_unpack = (mb == mb0) || (expert_id != pre_expert_id);
+
// 1.a load A
const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
int64_t m_size = offsets[mb + 1] - offsets[mb];
- const bool use_brgemm = can_use_brgemm(m_size);
- is_brgemm_used = is_brgemm_used || use_brgemm;
-
for (int64_t m = 0; m < m_size; ++m) {
int32_t index = A_ids[m] / topk;
copy_stub(A + m * K, input + index * K, K);
@@ -211,7 +212,7 @@ void fused_experts_fp8_kernel_impl(
/* A */ A,
/* B */ B,
/* C */ ic0 + offset * 2 * N + nb * BLOCK_N,
- /* Btmp */ B_tmp + tid * BLOCK_N * std::max(K, N),
+ /* Btmp */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * K,
/* Ctmp */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
/* scale */ Bs,
/* M */ m_size,
@@ -221,10 +222,11 @@ void fused_experts_fp8_kernel_impl(
/* ldb */ n_size,
/* ldc */ 2 * N,
/* brg */ use_brgemm,
- /* block_size_K */ block_size_K);
- }
+ /* block_size_K */ block_size_K,
+ /* do_unpack */ do_unpack);
+ });
- if (is_brgemm_used) {
+ if (use_brgemm) {
at::native::cpublas::brgemm_release();
}
});
@@ -248,22 +250,14 @@ void fused_experts_fp8_kernel_impl(
const int64_t stride_oc = IC;
// parallel on [MB2, NB2]
- at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
- int tid = at::get_thread_num();
+ parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+ int tid = get_thread_num();
alignas(64) scalar_t C[BLOCK_M * BLOCK_K];
- bool is_brgemm_used = false;
-
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB2;
- int64_t nb = i % NB2;
-
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = offsets[mb + 1] - offsets[mb];
int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
- const bool use_brgemm = can_use_brgemm(m_size);
- is_brgemm_used = is_brgemm_used || use_brgemm;
-
// A ptr from ic1 of [M * topk, N] in sorted order
// so as to avoid copy A to tmp buffer again
const scalar_t* __restrict__ A = ic1 + offsets[mb] * N;
@@ -275,11 +269,15 @@ void fused_experts_fp8_kernel_impl(
const float* __restrict__ Bs =
w2s + expert_id * scale_size_N * scale_size_K + (nb / blocks_n_per_group) * scale_size_K;
+ // do unpacking for the first row or a new expert
+ int32_t pre_expert_id = mb == 0 ? -1 : expert_ids[mb - 1];
+ bool do_unpack = (mb == mb0) || (expert_id != pre_expert_id);
+
tinygemm_kernel(
/* A */ A,
/* B */ B,
/* C */ C,
- /* Btmp */ B_tmp + tid * BLOCK_N * std::max(K, N),
+ /* Btmp */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * IC,
/* Ctmp */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
/* scale */ Bs,
/* M */ m_size,
@@ -289,7 +287,8 @@ void fused_experts_fp8_kernel_impl(
/* ldb */ n_size,
/* ldc */ BLOCK_N,
/* brg */ use_brgemm,
- /* block_size_K */ block_size_K);
+ /* block_size_K */ block_size_K,
+ /* do_unpack */ do_unpack);
// 2.b copy from C to ic2 in original order
// and also mul topk_weights in float32
@@ -298,9 +297,9 @@ void fused_experts_fp8_kernel_impl(
float weight = topk_weights[index];
copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C + m * BLOCK_N, weight, n_size);
}
- }
+ });
- if (is_brgemm_used) {
+ if (use_brgemm) {
at::native::cpublas::brgemm_release();
}
});
@@ -374,20 +373,23 @@ void shared_expert_fp8_kernel_impl(
const bool use_brgemm = can_use_brgemm(M);
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
- int tid = at::get_thread_num();
+ int64_t B_tmp_size_per_thread = MAX_CACHE_BLOCK_SIZE * BLOCK_N * std::max(K, N);
+
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+ int tid = get_thread_num();
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB;
- int64_t nb = i % NB;
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
int64_t n_size = std::min(2 * N - nb * BLOCK_N, BLOCK_N);
+ // do unpacking for the first row
+ bool do_unpack = (mb == mb0);
+
tinygemm_kernel(
/* A */ input + mb * BLOCK_M * K,
/* B */ packed_w1 + nb * BLOCK_N * K,
/* C */ ic0 + mb * BLOCK_M * 2 * N + nb * BLOCK_N,
- /* Btmp */ B_tmp + tid * BLOCK_N * std::max(K, N),
+ /* Btmp */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * K,
/* Ctmp */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
/* scale */ w1s + (nb / blocks_n_per_group) * scale_size_K,
/* M */ m_size,
@@ -397,8 +399,9 @@ void shared_expert_fp8_kernel_impl(
/* ldb */ n_size,
/* ldc */ 2 * N,
/* brg */ use_brgemm,
- /* block_size_K */ block_size_K);
- }
+ /* block_size_K */ block_size_K,
+ /* do_unpack */ do_unpack);
+ });
if (use_brgemm) {
at::native::cpublas::brgemm_release();
@@ -421,22 +424,23 @@ void shared_expert_fp8_kernel_impl(
scale_size_K = div_up(N, block_size_K);
// parallel on [MB2, NB2]
- at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
- int tid = at::get_thread_num();
+ parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+ int tid = get_thread_num();
alignas(64) scalar_t C[BLOCK_M * BLOCK_K];
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB2;
- int64_t nb = i % NB2;
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
+ // do unpacking for the first row
+ bool do_unpack = (mb == mb0);
+
// 2.a gemm: C = A @ B
tinygemm_kernel(
/* A */ ic1 + mb * BLOCK_M * N,
/* B */ packed_w2 + nb * BLOCK_N * N,
/* C */ C,
- /* Btmp */ B_tmp + tid * BLOCK_N * std::max(K, N),
+ /* Btmp */ B_tmp + tid * B_tmp_size_per_thread + nb_offset * BLOCK_N * IC,
/* Ctmp */ C_tmp + tid * 2 * BLOCK_M * BLOCK_N,
/* scale */ w2s + (nb / blocks_n_per_group) * scale_size_K,
/* M */ m_size,
@@ -446,7 +450,8 @@ void shared_expert_fp8_kernel_impl(
/* ldb */ n_size,
/* ldc */ BLOCK_N,
/* brg */ use_brgemm,
- /* block_size_K */ block_size_K);
+ /* block_size_K */ block_size_K,
+ /* do_unpack */ do_unpack);
// 2.b copy from C to output and add fused_experts_out
scalar_t* __restrict__ out = output + mb * BLOCK_M * K + nb * BLOCK_N;
@@ -454,7 +459,7 @@ void shared_expert_fp8_kernel_impl(
for (int64_t m = 0; m < m_size; ++m) {
add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out + m * K, routed_scaling_factor, n_size);
}
- }
+ });
});
if (use_brgemm) {
diff --git a/sgl-kernel/csrc/cpu/moe_int8.cpp b/sgl-kernel/csrc/cpu/moe_int8.cpp
index e12e5e7cfc6..8fbac902fcc 100644
--- a/sgl-kernel/csrc/cpu/moe_int8.cpp
+++ b/sgl-kernel/csrc/cpu/moe_int8.cpp
@@ -109,6 +109,120 @@ inline void add_mul_stub(
}
}
+template
+inline void silu_and_mul(
+ scalar_t* __restrict__ C,
+ const int32_t* __restrict__ C0, // x: x0, x1
+ const int32_t* __restrict__ C1, // y: y0, y1
+ const float* __restrict__ As,
+ const float* __restrict__ Bs0,
+ const float* __restrict__ Bs1,
+ const int32_t* __restrict__ Bcomp0,
+ const int32_t* __restrict__ Bcomp1,
+ int64_t m_size,
+ int64_t N) {
+#if defined(CPU_CAPABILITY_AVX512)
+ constexpr int COLS = BLOCK_N / 16;
+ static_assert(COLS % 2 == 0);
+
+ __m512 vc0[COLS];
+ __m512 vc1[COLS];
+ __m512i vcomp0[COLS];
+ __m512i vcomp1[COLS];
+ __m512 vas;
+ __m512 vbs0[COLS];
+ __m512 vbs1[COLS];
+
+ auto load_scale_and_comp = [&](auto col) {
+ vcomp0[col] = _mm512_loadu_si512(Bcomp0 + col * 16);
+ vcomp1[col] = _mm512_loadu_si512(Bcomp1 + col * 16);
+ vbs0[col] = _mm512_loadu_ps(Bs0 + col * 16);
+ vbs1[col] = _mm512_loadu_ps(Bs1 + col * 16);
+ };
+ Unroll{}(load_scale_and_comp);
+
+ auto scalec = [&](auto col, int64_t m) {
+ // update As
+ vas = _mm512_set1_ps(As[m]);
+ // C = As * (C - Bcomp) * Bs
+ __m512i vc32_0 = _mm512_loadu_si512(C0 + m * BLOCK_N + col * 16);
+ __m512i vc32_1 = _mm512_loadu_si512(C1 + m * BLOCK_N + col * 16);
+ vc0[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32_0, vcomp0[col]));
+ vc1[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32_1, vcomp1[col]));
+ vc0[col] = _mm512_mul_ps(_mm512_mul_ps(vc0[col], vas), vbs0[col]);
+ vc1[col] = _mm512_mul_ps(_mm512_mul_ps(vc1[col], vas), vbs1[col]);
+ };
+
+ using bVec = at::vec::Vectorized;
+ using fVec = at::vec::Vectorized;
+ const fVec one = fVec(1.f);
+ auto silu_and_mul = [&](auto col) {
+ fVec x = fVec(vc0[col]);
+ fVec y = fVec(vc1[col]);
+ x = x / (one + x.neg().exp_u20());
+ vc0[col] = x * y;
+ };
+
+ auto storec = [&](auto col, int64_t m) {
+ if constexpr (col % 2 == 0) {
+ fVec x0 = fVec(vc0[col + 0]);
+ fVec x1 = fVec(vc0[col + 1]);
+ bVec out_vec = convert_from_float_ext(x0, x1);
+ out_vec.store(C + m * N + col * 16);
+ }
+ };
+
+ for (int64_t m = 0; m < m_size; ++m) {
+ Unroll{}(scalec, m);
+ Unroll{}(silu_and_mul);
+ Unroll{}(storec, m);
+ }
+#else
+ TORCH_CHECK(false, "silu_and_mul: scalar path not implemented!");
+#endif
+}
+
+template
+inline void scale_C(
+ float* __restrict__ C,
+ const int32_t* __restrict__ Ctmp,
+ const float* __restrict__ As,
+ const float* __restrict__ Bs,
+ const int32_t* __restrict__ Bcomp,
+ int64_t m_size) {
+#if defined(CPU_CAPABILITY_AVX512)
+ constexpr int COLS = BLOCK_N / 16;
+ static_assert(COLS % 2 == 0);
+
+ __m512 vc[COLS];
+ __m512i vcomp[COLS];
+ __m512 vas;
+ __m512 vbs[COLS];
+
+ auto load_scale_and_comp = [&](auto col) {
+ vcomp[col] = _mm512_loadu_si512(Bcomp + col * 16);
+ vbs[col] = _mm512_loadu_ps(Bs + col * 16);
+ };
+ Unroll{}(load_scale_and_comp);
+
+ auto scalec = [&](auto col, int64_t m) {
+ // update As
+ vas = _mm512_set1_ps(As[m]);
+ // C = As * (C - Bcomp) * Bs
+ __m512i vc32 = _mm512_loadu_si512(Ctmp + m * BLOCK_N + col * 16);
+ vc[col] = _mm512_cvtepi32_ps(_mm512_sub_epi32(vc32, vcomp[col]));
+ vc[col] = _mm512_mul_ps(_mm512_mul_ps(vc[col], vas), vbs[col]);
+ _mm512_storeu_ps(C + m * BLOCK_N + col * 16, vc[col]);
+ };
+
+ for (int64_t m = 0; m < m_size; ++m) {
+ Unroll{}(scalec, m);
+ }
+#else
+ TORCH_CHECK(false, "scale_C: scalar path not implemented!");
+#endif
+}
+
/// gemm for w13
template
struct tinygemm_kernel_vnni {
@@ -515,28 +629,31 @@ void fused_experts_int8_kernel_impl(
const int64_t stride_e = 2 * N * packed_K;
const int64_t stride_n = packed_K;
+
+ int64_t avg_M = std::max(int64_t(1), M * topk / E);
+ const bool use_brgemm = can_use_brgemm(avg_M);
+
// here we only parallel on half of 2N to fuse silu_and_mul with gemm
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
+ int tid = get_thread_num();
uint8_t* __restrict__ A = A_tmp + tid * BLOCK_M * K;
+ int32_t* __restrict__ C0 = reinterpret_cast(C_tmp) + tid * 2 * BLOCK_M * BLOCK_N;
+ int32_t* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
alignas(64) float As[BLOCK_M];
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB;
- int64_t nb = i % NB;
-
- // nb0 from top half and nb1 from bottom half
- int64_t nb0 = nb, nb1 = nb + NB;
- int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+ // nb_upper from top half and nb_lower from bottom half
+ int64_t nb_upper = nb, nb_lower = nb + NB;
+ int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
// B shape [K, n_size] in vnni format
int32_t expert_id = expert_ids[mb];
- const int8_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb0 * BLOCK_N * stride_n;
- const int8_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb1 * BLOCK_N * stride_n;
- const float* __restrict__ Bs0 = w1s + expert_id * 2 * N + nb0 * BLOCK_N;
- const float* __restrict__ Bs1 = w1s + expert_id * 2 * N + nb1 * BLOCK_N;
+ const int8_t* __restrict__ B0 = packed_w1 + expert_id * stride_e + nb_upper * BLOCK_N * stride_n;
+ const int8_t* __restrict__ B1 = packed_w1 + expert_id * stride_e + nb_lower * BLOCK_N * stride_n;
+ const float* __restrict__ Bs0 = w1s + expert_id * 2 * N + nb_upper * BLOCK_N;
+ const float* __restrict__ Bs1 = w1s + expert_id * 2 * N + nb_lower * BLOCK_N;
// 1.a load A
const int32_t* A_ids = sorted_ids + mb * BLOCK_M;
@@ -548,22 +665,62 @@ void fused_experts_int8_kernel_impl(
As[m] = As_tmp[index];
}
- // fused 1.b: silu_and_mul(A @ B0, A @ B1)
- const int64_t offset = offsets[mb];
- tinygemm_kernel(
- /* A */ A,
- /* B0 */ B0,
- /* B1 */ B1,
- /* C */ ic1 + offset * N + nb * BLOCK_N,
- /* As */ As,
- /* Bs0 */ Bs0,
- /* Bs1 */ Bs1,
- /* M */ m_size,
- /* N */ n_size,
- /* K */ K,
- /* lda */ K,
- /* ldb */ n_size,
- /* ldc */ N);
+ if (use_brgemm) {
+ // 1.b gemm: C0 = A @ B0
+ at::native::cpublas::brgemm(
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ K,
+ /* lda */ K,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N,
+ /* add_C */ false,
+ /* A */ A,
+ /* B */ B0,
+ /* C */ C0);
+
+ // 1.c gemm: C1 = A @ B1
+ at::native::cpublas::brgemm(
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ K,
+ /* lda */ K,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N,
+ /* add_C */ false,
+ /* A */ A,
+ /* B */ B1,
+ /* C */ C1);
+
+ const int32_t* Bcomp0 = reinterpret_cast(B0 + block_size_n() * K);
+ const int32_t* Bcomp1 = reinterpret_cast(B1 + block_size_n() * K);
+
+ // 1.d silu and mul
+ const int64_t offset = offsets[mb];
+ silu_and_mul(
+ ic1 + offset * N + nb * BLOCK_N, C0, C1, As, Bs0, Bs1, Bcomp0, Bcomp1, m_size, N);
+ } else {
+ // fused 1.bcd: silu_and_mul(A @ B0, A @ B1)
+ const int64_t offset = offsets[mb];
+ tinygemm_kernel(
+ /* A */ A,
+ /* B0 */ B0,
+ /* B1 */ B1,
+ /* C */ ic1 + offset * N + nb * BLOCK_N,
+ /* As */ As,
+ /* Bs0 */ Bs0,
+ /* Bs1 */ Bs1,
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ K,
+ /* lda */ K,
+ /* ldb */ n_size,
+ /* ldc */ N);
+ }
+ });
+
+ if (use_brgemm) {
+ at::native::cpublas::brgemm_release();
}
});
@@ -584,16 +741,13 @@ void fused_experts_int8_kernel_impl(
const int64_t stride_oc = packed_N;
// parallel on [MB2, NB2]
- at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
- // we won't be using C1 for gemm2
+ int tid = get_thread_num();
float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
+ int32_t* __restrict__ C32 = reinterpret_cast(C + BLOCK_M * BLOCK_N);
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB2;
- int64_t nb = i % NB2;
-
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = offsets[mb + 1] - offsets[mb];
int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
@@ -609,18 +763,36 @@ void fused_experts_int8_kernel_impl(
const float* __restrict__ Bs = w2s + expert_id * K + nb * BLOCK_N;
// 2.a gemm: C = A @ B
- tinygemm_kernel(
- /* A */ A,
- /* B */ B,
- /* C */ C,
- /* As */ As,
- /* Bs */ Bs,
- /* M */ m_size,
- /* N */ n_size,
- /* K */ IC,
- /* lda */ IC,
- /* ldb */ n_size,
- /* ldc */ BLOCK_N);
+ if (use_brgemm) {
+ at::native::cpublas::brgemm(
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ IC,
+ /* lda */ IC,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N,
+ /* add_C */ false,
+ /* A */ A,
+ /* B */ B,
+ /* C */ C32);
+
+ // apply scales
+ const int32_t* Bcomp = reinterpret_cast(B + block_size_n() * IC);
+ scale_C(C, C32, As, Bs, Bcomp, m_size);
+ } else {
+ tinygemm_kernel(
+ /* A */ A,
+ /* B */ B,
+ /* C */ C,
+ /* As */ As,
+ /* Bs */ Bs,
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ IC,
+ /* lda */ IC,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N);
+ }
// 2.b copy from C to ic2 in original order
// and also mul topk_weights in float32
@@ -629,6 +801,10 @@ void fused_experts_int8_kernel_impl(
float weight = topk_weights[index];
copy_mul_stub(ic2 + index * K + nb * BLOCK_N, C + m * BLOCK_N, weight, n_size);
}
+ });
+
+ if (use_brgemm) {
+ at::native::cpublas::brgemm_release();
}
});
@@ -708,15 +884,19 @@ void shared_expert_int8_kernel_impl(
const int64_t packed_N = get_row_size(N);
const int64_t stride_n = packed_K;
+ const bool use_brgemm = can_use_brgemm(M);
+
// here we only parallel on half of 2N to fuse silu_and_mul with gemm
- at::parallel_for(0, MB * NB, 0, [&](int64_t begin, int64_t end) {
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB;
- int64_t nb = i % NB;
-
- // nb0 from top half and nb1 from bottom half
- int64_t nb0 = nb, nb1 = nb + NB;
- int64_t n_size = std::min(N - nb0 * BLOCK_N, BLOCK_N);
+ parallel_2d(MB, NB, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
+ // get local pointers
+ int tid = get_thread_num();
+ int32_t* __restrict__ C0 = reinterpret_cast(C_tmp) + tid * 2 * BLOCK_M * BLOCK_N;
+ int32_t* __restrict__ C1 = C0 + BLOCK_M * BLOCK_N;
+
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * K * 2, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
+ // nb_upper from top half and nb_lower from bottom half
+ int64_t nb_upper = nb, nb_lower = nb + NB;
+ int64_t n_size = std::min(N - nb * BLOCK_N, BLOCK_N);
int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
// A shape [m_size, K]
@@ -724,26 +904,65 @@ void shared_expert_int8_kernel_impl(
const float* As = As_tmp + mb * BLOCK_M;
// B shape [K, n_size] in vnni format
- const int8_t* __restrict__ B0 = packed_w1 + nb0 * BLOCK_N * stride_n;
- const int8_t* __restrict__ B1 = packed_w1 + nb1 * BLOCK_N * stride_n;
- const float* __restrict__ Bs0 = w1s + nb0 * BLOCK_N;
- const float* __restrict__ Bs1 = w1s + nb1 * BLOCK_N;
-
- // fused 1.b: silu_and_mul(A @ B0, A @ B1)
- tinygemm_kernel(
- /* A */ A,
- /* B0 */ B0,
- /* B1 */ B1,
- /* C */ ic1 + mb * BLOCK_M * N + nb * BLOCK_N,
- /* As */ As,
- /* Bs0 */ Bs0,
- /* Bs1 */ Bs1,
- /* M */ m_size,
- /* N */ n_size,
- /* K */ K,
- /* lda */ K,
- /* ldb */ n_size,
- /* ldc */ N);
+ const int8_t* __restrict__ B0 = packed_w1 + nb_upper * BLOCK_N * stride_n;
+ const int8_t* __restrict__ B1 = packed_w1 + nb_lower * BLOCK_N * stride_n;
+ const float* __restrict__ Bs0 = w1s + nb_upper * BLOCK_N;
+ const float* __restrict__ Bs1 = w1s + nb_lower * BLOCK_N;
+
+ if (use_brgemm) {
+ // 1.b gemm: C0 = A @ B0
+ at::native::cpublas::brgemm(
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ K,
+ /* lda */ K,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N,
+ /* add_C */ false,
+ /* A */ A,
+ /* B */ B0,
+ /* C */ C0);
+
+ // 1.c gemm: C1 = A @ B1
+ at::native::cpublas::brgemm(
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ K,
+ /* lda */ K,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N,
+ /* add_C */ false,
+ /* A */ A,
+ /* B */ B1,
+ /* C */ C1);
+
+ const int32_t* Bcomp0 = reinterpret_cast(B0 + block_size_n() * K);
+ const int32_t* Bcomp1 = reinterpret_cast(B1 + block_size_n() * K);
+
+ // 1.d silu and mul
+ silu_and_mul(
+ ic1 + mb * BLOCK_M * N + nb * BLOCK_N, C0, C1, As, Bs0, Bs1, Bcomp0, Bcomp1, m_size, N);
+ } else {
+ // fused 1.bcd: silu_and_mul(A @ B0, A @ B1)
+ tinygemm_kernel(
+ /* A */ A,
+ /* B0 */ B0,
+ /* B1 */ B1,
+ /* C */ ic1 + mb * BLOCK_M * N + nb * BLOCK_N,
+ /* As */ As,
+ /* Bs0 */ Bs0,
+ /* Bs1 */ Bs1,
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ K,
+ /* lda */ K,
+ /* ldb */ n_size,
+ /* ldc */ N);
+ }
+ });
+
+ if (use_brgemm) {
+ at::native::cpublas::brgemm_release();
}
});
@@ -763,16 +982,13 @@ void shared_expert_int8_kernel_impl(
const int64_t stride_oc = packed_N;
// parallel on [MB2, NB2]
- at::parallel_for(0, MB2 * NB2, 0, [&](int64_t begin, int64_t end) {
+ parallel_2d(MB2, NB2, [&](int64_t mb0, int64_t mb1, int64_t nb0, int64_t nb1) {
// get local pointers
- int tid = at::get_thread_num();
- // we won't be using C1 for gemm2
+ int tid = get_thread_num();
float* __restrict__ C = C_tmp + tid * 2 * BLOCK_M * BLOCK_N;
+ int32_t* __restrict__ C32 = reinterpret_cast(C + BLOCK_M * BLOCK_N);
- for (int64_t i = begin; i < end; ++i) {
- int64_t mb = i / NB2;
- int64_t nb = i % NB2;
-
+ loop_2d(mb0, mb1, nb0, nb1, BLOCK_N * IC, [&](int64_t mb, int64_t nb, int64_t nb_offset) {
int64_t m_size = std::min(M - mb * BLOCK_M, BLOCK_M);
int64_t n_size = std::min(OC - nb * BLOCK_N, BLOCK_N);
@@ -784,19 +1000,37 @@ void shared_expert_int8_kernel_impl(
const int8_t* __restrict__ B = packed_w2 + nb * BLOCK_N * stride_oc;
const float* __restrict__ Bs = w2s + nb * BLOCK_N;
- // 2.a gemm: C = A @ B
- tinygemm_kernel(
- /* A */ A,
- /* B */ B,
- /* C */ C,
- /* As */ As,
- /* Bs */ Bs,
- /* M */ m_size,
- /* N */ n_size,
- /* K */ IC,
- /* lda */ IC,
- /* ldb */ n_size,
- /* ldc */ BLOCK_N);
+ if (use_brgemm) {
+ at::native::cpublas::brgemm(
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ IC,
+ /* lda */ IC,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N,
+ /* add_C */ false,
+ /* A */ A,
+ /* B */ B,
+ /* C */ C32);
+
+ // apply scales
+ const int32_t* Bcomp = reinterpret_cast(B + block_size_n() * IC);
+ scale_C(C, C32, As, Bs, Bcomp, m_size);
+ } else {
+ // 2.a gemm: C = A @ B
+ tinygemm_kernel(
+ /* A */ A,
+ /* B */ B,
+ /* C */ C,
+ /* As */ As,
+ /* Bs */ Bs,
+ /* M */ m_size,
+ /* N */ n_size,
+ /* K */ IC,
+ /* lda */ IC,
+ /* ldb */ n_size,
+ /* ldc */ BLOCK_N);
+ }
// 2.b copy from C to output and add fused_experts_out
scalar_t* __restrict__ out = output + mb * BLOCK_M * K + nb * BLOCK_N;
@@ -804,6 +1038,10 @@ void shared_expert_int8_kernel_impl(
for (int64_t m = 0; m < m_size; ++m) {
add_mul_stub(out + m * K, C + m * BLOCK_N, fused_out + m * K, routed_scaling_factor, n_size);
}
+ });
+
+ if (use_brgemm) {
+ at::native::cpublas::brgemm_release();
}
});
}
diff --git a/sgl-kernel/csrc/cpu/qkv_proj.cpp b/sgl-kernel/csrc/cpu/qkv_proj.cpp
index 8d663e84aff..b3e2072e8ca 100644
--- a/sgl-kernel/csrc/cpu/qkv_proj.cpp
+++ b/sgl-kernel/csrc/cpu/qkv_proj.cpp
@@ -100,8 +100,7 @@ void segment_gemm_kernel_impl(
const int64_t NB1 = div_up(N1, BLOCK_N);
const int64_t NB = NB0 + NB1;
- // TODO: brgemm u8s8 depends on PyTorch 2.7 release.
- const bool use_brgemm = false;
+ const bool use_brgemm = can_use_brgemm(M);
// K + 4 after compensation
const int64_t packed_row_size = get_row_size(K);
diff --git a/sgl-kernel/csrc/cpu/torch_extension_cpu.cpp b/sgl-kernel/csrc/cpu/torch_extension_cpu.cpp
index 44257dec5e0..2c8d9e3ecec 100644
--- a/sgl-kernel/csrc/cpu/torch_extension_cpu.cpp
+++ b/sgl-kernel/csrc/cpu/torch_extension_cpu.cpp
@@ -23,6 +23,10 @@ limitations under the License.
// silu_and_mul
at::Tensor silu_and_mul_cpu(at::Tensor& input);
+// gelu_and_mul
+at::Tensor gelu_tanh_and_mul_cpu(const at::Tensor& input);
+at::Tensor gelu_and_mul_cpu(const at::Tensor& input);
+
// l2norm
at::Tensor l2norm_cpu(at::Tensor& input, double eps);
@@ -233,13 +237,17 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
// activation
m.def("silu_and_mul_cpu(Tensor input) -> Tensor");
m.impl("silu_and_mul_cpu", torch::kCPU, &silu_and_mul_cpu);
+ m.def("gelu_tanh_and_mul_cpu(Tensor input) -> Tensor");
+ m.impl("gelu_tanh_and_mul_cpu", torch::kCPU, &gelu_tanh_and_mul_cpu);
+ m.def("gelu_and_mul_cpu(Tensor input) -> Tensor");
+ m.impl("gelu_and_mul_cpu", torch::kCPU, &gelu_and_mul_cpu);
// norm
m.def("rmsnorm_cpu(Tensor input, Tensor weight, float eps) -> Tensor");
m.impl("rmsnorm_cpu", torch::kCPU, &rmsnorm_cpu);
m.def("l2norm_cpu(Tensor input, float eps) -> Tensor");
m.impl("l2norm_cpu", torch::kCPU, &l2norm_cpu);
- m.def("fused_add_rmsnorm_cpu(Tensor input, Tensor residual, Tensor weight, float eps) -> ()");
+ m.def("fused_add_rmsnorm_cpu(Tensor(a!) input, Tensor residual, Tensor weight, float eps) -> ()");
m.impl("fused_add_rmsnorm_cpu", torch::kCPU, &fused_add_rmsnorm_cpu);
// topk
@@ -262,14 +270,14 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
// decode
m.def(
- "decode_attention_cpu(Tensor query, Tensor k_cache, Tensor v_cahce, Tensor output, Tensor key, Tensor value, "
+ "decode_attention_cpu(Tensor query, Tensor k_cache, Tensor v_cahce, Tensor(a!) output, Tensor key, Tensor value, "
"Tensor loc, Tensor attn_logits, Tensor req_to_token, Tensor req_pool_indices, Tensor seq_lens, float sm_scale, "
"float logit_cap) -> ()");
m.impl("decode_attention_cpu", torch::kCPU, &decode_attention_cpu);
// extend
m.def(
- "extend_attention_cpu(Tensor q_extend, Tensor k_extend, Tensor v_extend, Tensor o_extend, Tensor k_buffer, "
+ "extend_attention_cpu(Tensor q_extend, Tensor k_extend, Tensor v_extend, Tensor(a!) o_extend, Tensor k_buffer, "
"Tensor v_buffer, Tensor req_to_token, Tensor req_pool_indices, Tensor seq_lens, Tensor extend_seq_lens, Tensor "
"extend_start_loc, int max_len_extend, float sm_scale, float logit_cap) -> ()");
m.impl("extend_attention_cpu", torch::kCPU, &extend_attention_cpu);
@@ -305,7 +313,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.impl("int8_scaled_mm_with_quant", torch::kCPU, &int8_scaled_mm_with_quant);
// bmm
- m.def("bmm_cpu(Tensor out, Tensor mat1, Tensor mat2, bool is_vnni, Tensor? scale) -> ()");
+ m.def("bmm_cpu(Tensor(a!) out, Tensor mat1, Tensor mat2, bool is_vnni, Tensor? scale) -> ()");
m.impl("bmm_cpu", torch::kCPU, &bmm_cpu);
// moe
@@ -342,7 +350,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
// all reduce
m.def("initialize(int size, int rank) -> ()");
- m.def("shm_allreduce(Tensor data, int reduce_op) -> ()");
+ m.def("shm_allreduce(Tensor(a!) data, int reduce_op) -> ()");
m.impl("shm_allreduce", torch::kCPU, &shm_allreduce);
m.def("shm_allgather(Tensor data, int dim) -> Tensor");
m.impl("shm_allgather", torch::kCPU, &shm_allgather);
diff --git a/sgl-kernel/csrc/cutlass_extensions/gemm/collective/sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp b/sgl-kernel/csrc/cutlass_extensions/gemm/collective/sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp
index 13e890e35c5..b37d5696cdc 100644
--- a/sgl-kernel/csrc/cutlass_extensions/gemm/collective/sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp
+++ b/sgl-kernel/csrc/cutlass_extensions/gemm/collective/sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp
@@ -1488,6 +1488,10 @@ struct CollectiveMmaArrayMixedInput<
template
CUTLASS_DEVICE void
tensormaps_cp_fence_release(TensorMapStorage& shared_tensormaps, cute::tuple const& input_tensormaps) {
+ if (cute::elect_one_sync()) {
+ cute::tma_desc_commit_group();
+ cute::tma_desc_wait_group();
+ }
// Entire warp must do this (i.e. it's aligned)
tma_descriptor_cp_fence_release(get<0>(input_tensormaps), shared_tensormaps.smem_tensormap_A);
tma_descriptor_cp_fence_release(get<1>(input_tensormaps), shared_tensormaps.smem_tensormap_B);
diff --git a/sgl-kernel/csrc/elementwise/activation.cu b/sgl-kernel/csrc/elementwise/activation.cu
index 20b88953014..43617f87f31 100644
--- a/sgl-kernel/csrc/elementwise/activation.cu
+++ b/sgl-kernel/csrc/elementwise/activation.cu
@@ -25,7 +25,7 @@
#include "utils.h"
#else
-#include "hip_act_and_mul.cuh"
+#include "hip/hip_act_and_mul.cuh"
#endif
// Adapted from flashinfer activation
diff --git a/sgl-kernel/csrc/elementwise/cast.cu b/sgl-kernel/csrc/elementwise/cast.cu
new file mode 100644
index 00000000000..a1ff8703f88
--- /dev/null
+++ b/sgl-kernel/csrc/elementwise/cast.cu
@@ -0,0 +1,171 @@
+#include "pytorch_extension_utils.h"
+
+template
+struct ConvertToFP8 {
+ static __device__ __nv_fp8_storage_t convert_to_fp8(T value) {
+ return 0;
+ }
+};
+
+template <>
+struct ConvertToFP8<__nv_bfloat16> {
+ static __device__ __nv_fp8_storage_t convert_to_fp8(__nv_bfloat16 value) {
+ return __nv_cvt_bfloat16raw_to_fp8(value, __NV_SATFINITE, __NV_E4M3);
+ }
+};
+
+template <>
+struct ConvertToFP8 {
+ static __device__ __nv_fp8_storage_t convert_to_fp8(half value) {
+ return __nv_cvt_halfraw_to_fp8(value, __NV_SATFINITE, __NV_E4M3);
+ }
+};
+
+template
+struct ConvertFromFloat {
+ static __device__ T convert_from_float(float value) {
+ return 0;
+ }
+};
+
+template <>
+struct ConvertFromFloat<__nv_bfloat16> {
+ static __device__ __nv_bfloat16 convert_from_float(float value) {
+ return __float2bfloat16(value);
+ }
+};
+
+template <>
+struct ConvertFromFloat {
+ static __device__ half convert_from_float(float value) {
+ return __float2half(value);
+ }
+};
+
+template
+__global__ void fused_downcast_kernel(
+ const T* cache_k,
+ const T* cache_v,
+ const float* k_scale,
+ const float* v_scale,
+ __nv_fp8_storage_t* output_k,
+ __nv_fp8_storage_t* output_v,
+ const int input_sl,
+ const int head,
+ const int dim,
+ const T max_fp8,
+ const T min_fp8,
+ const int64_t mult,
+ const int64_t offset,
+ const int64_t* loc) {
+ // TODO: change name
+ int token_idx = blockIdx.x;
+ int thread_idx = threadIdx.x;
+ int total_threads = blockDim.x;
+
+ T k_scale_val = ConvertFromFloat::convert_from_float(k_scale[0]);
+ T v_scale_val = ConvertFromFloat::convert_from_float(v_scale[0]);
+
+ T k_scale_inv = static_cast(1.f) / k_scale_val;
+ T v_scale_inv = static_cast(1.f) / v_scale_val;
+
+ auto clamp = [&](T val) { return val > max_fp8 ? max_fp8 : (min_fp8 > val ? min_fp8 : val); };
+
+ if (token_idx < input_sl) {
+ int out_seq_idx = loc[token_idx];
+
+#pragma unroll
+ for (int i = thread_idx; i < head * dim; i += total_threads) {
+ int in_idx = token_idx * head * dim + i;
+ int out_idx = (out_seq_idx * mult + offset) * head * dim + i;
+
+ T k_val = cache_k[in_idx] * k_scale_inv;
+ k_val = clamp(k_val);
+ output_k[out_idx] = ConvertToFP8::convert_to_fp8(k_val);
+
+ T v_val = cache_v[in_idx] * v_scale_inv;
+ v_val = clamp(v_val);
+ output_v[out_idx] = ConvertToFP8::convert_to_fp8(v_val);
+ }
+ }
+}
+
+template
+void downcast_fp8_impl(
+ at::Tensor& k,
+ at::Tensor& v,
+ at::Tensor& k_out,
+ at::Tensor& v_out,
+ at::Tensor& k_scale,
+ at::Tensor& v_scale,
+ at::Tensor& loc,
+ int64_t mult,
+ int64_t offset,
+ cudaStream_t stream) {
+ CHECK_INPUT(k);
+ CHECK_INPUT(v);
+ CHECK_INPUT(k_out);
+ CHECK_INPUT(v_out);
+ CHECK_INPUT(k_scale);
+ CHECK_INPUT(v_scale);
+ CHECK_INPUT(loc);
+
+ int64_t input_sl = k.size(0);
+ int64_t head = k.size(1);
+ int64_t dim = k.size(2);
+
+ dim3 grid(input_sl * head);
+ int vec_size = 8;
+ dim3 block(std::min(int(dim) / vec_size, 1024));
+
+ const T max_fp8 = static_cast(448.0f);
+ const T min_fp8 = static_cast(-448.0f);
+
+ fused_downcast_kernel<<>>(
+ static_cast(k.data_ptr()),
+ static_cast(v.data_ptr()),
+ static_cast(k_scale.data_ptr()),
+ static_cast(v_scale.data_ptr()),
+ static_cast<__nv_fp8_storage_t*>(k_out.data_ptr()),
+ static_cast<__nv_fp8_storage_t*>(v_out.data_ptr()),
+ input_sl,
+ head,
+ dim,
+ max_fp8,
+ min_fp8,
+ mult,
+ offset,
+ static_cast(loc.data_ptr()));
+
+ cudaError_t status = cudaGetLastError();
+ TORCH_CHECK(status == cudaSuccess, "Kernel launch failed: " + std::string(cudaGetErrorString(status)));
+}
+
+void downcast_fp8(
+ at::Tensor& k,
+ at::Tensor& v,
+ at::Tensor& k_out,
+ at::Tensor& v_out,
+ at::Tensor& k_scale,
+ at::Tensor& v_scale,
+ at::Tensor& loc,
+ int64_t mult,
+ int64_t offset,
+ int64_t cuda_stream) {
+ CHECK_INPUT(k);
+ CHECK_INPUT(v);
+ CHECK_INPUT(k_out);
+ CHECK_INPUT(v_out);
+
+ cudaStream_t stream = reinterpret_cast(cuda_stream);
+ switch (k.scalar_type()) {
+ case at::ScalarType::BFloat16:
+ downcast_fp8_impl<__nv_bfloat16>(k, v, k_out, v_out, k_scale, v_scale, loc, mult, offset, stream);
+ break;
+ case at::ScalarType::Half:
+ downcast_fp8_impl<__half>(k, v, k_out, v_out, k_scale, v_scale, loc, mult, offset, stream);
+ break;
+ default:
+ TORCH_CHECK(false, "Unsupported input type for downcast_fp8. Expected bfloat16 or float16.");
+ }
+}
diff --git a/sgl-kernel/csrc/elementwise/concat_mla.cu b/sgl-kernel/csrc/elementwise/concat_mla.cu
new file mode 100644
index 00000000000..b6c23633339
--- /dev/null
+++ b/sgl-kernel/csrc/elementwise/concat_mla.cu
@@ -0,0 +1,117 @@
+#include
+#include
+#include
+
+#include "pytorch_extension_utils.h"
+
+constexpr int NUM_LOCAL_HEADS = 128;
+constexpr int QK_NOPE_HEAD_DIM = 128;
+constexpr int QK_ROPE_HEAD_DIM = 64;
+constexpr int K_HEAD_DIM = QK_NOPE_HEAD_DIM + QK_ROPE_HEAD_DIM;
+
+constexpr int HEAD_CHUNK_SIZE = 16;
+constexpr int NUM_HEAD_CHUNKS = NUM_LOCAL_HEADS / HEAD_CHUNK_SIZE;
+
+__forceinline__ __device__ int get_lane_id() {
+ int lane_id;
+ asm("mov.s32 %0, %laneid;" : "=r"(lane_id));
+ return lane_id;
+}
+
+int ceil_div(int a, int b) {
+ return (a + b - 1) / b;
+}
+
+__global__ void concat_mla_k_kernel(
+ nv_bfloat16* k,
+ nv_bfloat16* k_nope,
+ nv_bfloat16* k_rope,
+ const int num_tokens,
+ const int k_stride_0,
+ const int k_stride_1,
+ const int k_nope_stride_0,
+ const int k_nope_stride_1,
+ const int k_rope_stride_0) {
+ const int flat_warp_id = (blockIdx.x * blockDim.x + threadIdx.x) / 32;
+ const int token_id = flat_warp_id / NUM_HEAD_CHUNKS;
+ const int head_chunk_id = flat_warp_id % NUM_HEAD_CHUNKS;
+ const int lane_id = get_lane_id();
+
+ if (token_id >= num_tokens) {
+ return;
+ }
+
+ using KNopeBufType = int2;
+ static_assert(sizeof(KNopeBufType) == QK_NOPE_HEAD_DIM * sizeof(k[0]) / 32);
+ KNopeBufType k_nope_buf[HEAD_CHUNK_SIZE];
+
+ using KRopeBufType = int;
+ static_assert(sizeof(KRopeBufType) == QK_ROPE_HEAD_DIM * sizeof(k[0]) / 32);
+ KRopeBufType k_rope_buf;
+
+ {
+ const int* base_addr = reinterpret_cast(k_rope + token_id * k_rope_stride_0);
+ k_rope_buf = *(base_addr + lane_id);
+ }
+
+#pragma unroll
+ for (int i = 0; i < HEAD_CHUNK_SIZE; ++i) {
+ const int head_id = head_chunk_id * HEAD_CHUNK_SIZE + i;
+ const int2* base_addr = reinterpret_cast(k_nope + token_id * k_nope_stride_0 + head_id * k_nope_stride_1);
+ k_nope_buf[i] = *(base_addr + lane_id);
+ }
+
+#pragma unroll
+ for (int i = 0; i < HEAD_CHUNK_SIZE; ++i) {
+ const int head_id = head_chunk_id * HEAD_CHUNK_SIZE + i;
+
+ {
+ int2* base_addr = reinterpret_cast(k + token_id * k_stride_0 + head_id * k_stride_1);
+ *(base_addr + lane_id) = k_nope_buf[i];
+ }
+ {
+ int* base_addr = reinterpret_cast(k + token_id * k_stride_0 + head_id * k_stride_1 + QK_NOPE_HEAD_DIM);
+ *(base_addr + lane_id) = k_rope_buf;
+ }
+ }
+}
+
+inline void check_tensor(const at::Tensor& t, int64_t shape0, int64_t shape1, int64_t shape2, c10::ScalarType dtype) {
+ TORCH_CHECK_EQ(t.dim(), 3);
+ TORCH_CHECK_EQ(t.size(0), shape0);
+ TORCH_CHECK_EQ(t.size(1), shape1);
+ TORCH_CHECK_EQ(t.size(2), shape2);
+ TORCH_CHECK_EQ(t.dtype(), dtype);
+ TORCH_CHECK(t.device().is_cuda());
+ TORCH_CHECK_EQ(((int64_t)t.data_ptr()) % 16, 0); // alignment
+}
+
+void concat_mla_k(at::Tensor k, at::Tensor k_nope, at::Tensor k_rope) {
+ const int num_tokens = k.size(0);
+
+ check_tensor(k, num_tokens, NUM_LOCAL_HEADS, K_HEAD_DIM, at::kBFloat16);
+ check_tensor(k_nope, num_tokens, NUM_LOCAL_HEADS, QK_NOPE_HEAD_DIM, at::kBFloat16);
+ check_tensor(k_rope, num_tokens, 1, QK_ROPE_HEAD_DIM, at::kBFloat16);
+ TORCH_CHECK_EQ(k.stride(2), 1);
+ TORCH_CHECK_EQ(k_nope.stride(2), 1);
+ TORCH_CHECK_EQ(k_rope.stride(2), 1);
+
+ const auto stream = at::cuda::getCurrentCUDAStream().stream();
+
+ constexpr int num_warps_per_block = 32;
+ const int grid_size = ceil_div(num_tokens * NUM_HEAD_CHUNKS, num_warps_per_block);
+ const int block_size = num_warps_per_block * 32;
+
+ concat_mla_k_kernel<<>>(
+ reinterpret_cast(k.data_ptr()),
+ reinterpret_cast(k_nope.data_ptr()),
+ reinterpret_cast(k_rope.data_ptr()),
+ num_tokens,
+ k.stride(0),
+ k.stride(1),
+ k_nope.stride(0),
+ k_nope.stride(1),
+ k_rope.stride(0));
+ cudaError_t err = cudaGetLastError();
+ TORCH_CHECK(err == cudaSuccess, "CUDA kernel launch failed: ", cudaGetErrorString(err));
+}
diff --git a/sgl-kernel/csrc/elementwise/copy.cu b/sgl-kernel/csrc/elementwise/copy.cu
new file mode 100644
index 00000000000..09719f51070
--- /dev/null
+++ b/sgl-kernel/csrc/elementwise/copy.cu
@@ -0,0 +1,58 @@
+#include
+#include
+#include
+
+#include
+
+template
+struct InputArray {
+ int values[N];
+};
+
+template
+__global__ void copy_to_gpu_no_ce_kernel(const InputArray input_array, int* output) {
+ int idx = threadIdx.x + blockIdx.x * blockDim.x;
+ if (idx < N) {
+ output[idx] = input_array.values[idx];
+ }
+}
+
+template
+void copy_to_gpu_no_ce_impl(const at::Tensor& input, at::Tensor& output) {
+ TORCH_CHECK(input.dim() == 1, "input must be 1-D");
+ TORCH_CHECK(static_cast(input.numel()) == N, "input numel must equal template N");
+ TORCH_CHECK(input.is_contiguous(), "input must be contiguous");
+ TORCH_CHECK(input.dtype() == torch::kInt32, "input dtype must be int32");
+
+ TORCH_CHECK(output.dim() == 1, "output dim");
+ TORCH_CHECK(static_cast(output.numel()) == N, "output size");
+ TORCH_CHECK(output.is_contiguous(), "output contiguous");
+ TORCH_CHECK(output.dtype() == torch::kInt32, "output dtype");
+
+ TORCH_CHECK(input.device().is_cpu(), "input must be a CPU tensor");
+ TORCH_CHECK(output.device().is_cuda(), "output must be a CUDA tensor");
+
+ InputArray input_array;
+ const int* input_ptr = input.data_ptr();
+ for (int i = 0; i < N; ++i)
+ input_array.values[i] = input_ptr[i];
+
+ // may use multi thread blocks if performance bottleneck
+ dim3 grid(1);
+ dim3 block(static_cast(input.numel()));
+ cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+ copy_to_gpu_no_ce_kernel<<>>(input_array, output.data_ptr());
+ C10_CUDA_KERNEL_LAUNCH_CHECK();
+}
+
+void copy_to_gpu_no_ce(const at::Tensor& input, at::Tensor& output) {
+ int N = static_cast(input.numel());
+ // Can use macro if there are more N needed
+ if (N == 72) {
+ copy_to_gpu_no_ce_impl<72>(input, output);
+ } else if (N == 64) {
+ copy_to_gpu_no_ce_impl<64>(input, output);
+ } else {
+ TORCH_CHECK(false, "unexpected N");
+ }
+}
diff --git a/sgl-kernel/csrc/elementwise/pos_enc.cuh b/sgl-kernel/csrc/elementwise/pos_enc.cuh
index 5388f0e74bd..a2e4e2ebb91 100644
--- a/sgl-kernel/csrc/elementwise/pos_enc.cuh
+++ b/sgl-kernel/csrc/elementwise/pos_enc.cuh
@@ -104,6 +104,10 @@ __global__ void BatchQKApplyRotaryPosIdsCosSinCacheEnhancedHeadParallelismKernel
uint32_t by = blockIdx.y;
const uint32_t bdy = blockDim.y;
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+ asm volatile("griddepcontrol.wait;");
+#endif
+
vec_t cos, sin;
if (bx * bdy + ty < nnz) {
const uint32_t idx = bx * bdy + ty;
@@ -178,6 +182,10 @@ __global__ void BatchQKApplyRotaryPosIdsCosSinCacheEnhancedHeadParallelismKernel
}
}
}
+
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+ asm volatile("griddepcontrol.launch_dependents;");
+#endif
}
template <
@@ -220,6 +228,10 @@ __global__ void BatchQKApplyRotaryPosIdsCosSinCacheEnhancedKernel(
uint32_t bx = blockIdx.x, tx = threadIdx.x, ty = threadIdx.y;
const uint32_t bdy = blockDim.y;
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+ asm volatile("griddepcontrol.wait;");
+#endif
+
vec_t cos, sin;
if (bx * bdy + ty < nnz) {
const uint32_t idx = bx * bdy + ty;
@@ -296,6 +308,10 @@ __global__ void BatchQKApplyRotaryPosIdsCosSinCacheEnhancedKernel(
}
}
}
+
+#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
+ asm volatile("griddepcontrol.launch_dependents;");
+#endif
}
#define DISPATCH_SAVE_KV_CACHE(save_kv_cache, SAVE_KV_CACHE, ...) \
@@ -340,12 +356,59 @@ cudaError_t BatchQKApplyRotaryPosIdsCosSinCacheEnhanced(
IdType* kv_cache_loc,
bool interleave,
bool save_kv_cache,
+ bool enable_pdl,
cudaStream_t stream = nullptr) {
int dev_id = 0;
int num_sms = 0;
FLASHINFER_CUDA_CALL(cudaGetDevice(&dev_id));
FLASHINFER_CUDA_CALL(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
+#define LAUNCH_KERNEL_RAW(kernel_name) \
+ do { \
+ cudaLaunchConfig_t config = {}; \
+ config.gridDim = nblks; \
+ config.blockDim = nthrs; \
+ config.dynamicSmemBytes = 0; \
+ config.stream = stream; \
+ cudaLaunchAttribute attrs[1] = {}; \
+ attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization; \
+ attrs[0].val.programmaticStreamSerializationAllowed = enable_pdl; \
+ config.numAttrs = 1; \
+ config.attrs = attrs; \
+ \
+ FLASHINFER_CUDA_CALL(cudaLaunchKernelEx( \
+ &config, \
+ kernel_name, \
+ q, \
+ k, \
+ v, \
+ q_rope, \
+ k_rope, \
+ k_buffer, \
+ v_buffer, \
+ cos_sin_cache, \
+ pos_ids, \
+ nnz, \
+ num_qo_heads, \
+ num_kv_heads, \
+ rotary_dim, \
+ q_stride_n, \
+ q_stride_h, \
+ k_stride_n, \
+ k_stride_h, \
+ v_stride_n, \
+ v_stride_h, \
+ q_rope_stride_n, \
+ q_rope_stride_h, \
+ k_rope_stride_n, \
+ k_rope_stride_h, \
+ k_buffer_stride_n, \
+ k_buffer_stride_h, \
+ v_buffer_stride_n, \
+ v_buffer_stride_h, \
+ kv_cache_loc)); \
+ } while (0)
+
DISPATCH_SAVE_KV_CACHE(save_kv_cache, SAVE_KV_CACHE, {
DISPATCH_INTERLEAVE(interleave, INTERLEAVE, {
DISPATCH_HEAD_DIM(head_dim, HEAD_DIM, {
@@ -359,35 +422,7 @@ cudaError_t BatchQKApplyRotaryPosIdsCosSinCacheEnhanced(
uint32_t bdy = num_threads / bdx;
// how many blocks needed to process all tokens
uint32_t nblks_x = (nnz + bdy - 1) / bdy;
- void* args[] = {
- (void*)&q,
- (void*)&k,
- (void*)&v,
- (void*)&q_rope,
- (void*)&k_rope,
- (void*)&k_buffer,
- (void*)&v_buffer,
- (void*)&cos_sin_cache,
- (void*)&pos_ids,
- (void*)&nnz,
- (void*)&num_qo_heads,
- (void*)&num_kv_heads,
- (void*)&rotary_dim,
- (void*)&q_stride_n,
- (void*)&q_stride_h,
- (void*)&k_stride_n,
- (void*)&k_stride_h,
- (void*)&v_stride_n,
- (void*)&v_stride_h,
- (void*)&q_rope_stride_n,
- (void*)&q_rope_stride_h,
- (void*)&k_rope_stride_n,
- (void*)&k_rope_stride_h,
- (void*)&k_buffer_stride_n,
- (void*)&k_buffer_stride_h,
- (void*)&v_buffer_stride_n,
- (void*)&v_buffer_stride_h,
- (void*)&kv_cache_loc};
+
auto kernel_0 = BatchQKApplyRotaryPosIdsCosSinCacheEnhancedKernel<
SAVE_KV_CACHE,
INTERLEAVE,
@@ -405,7 +440,7 @@ cudaError_t BatchQKApplyRotaryPosIdsCosSinCacheEnhanced(
if ((nnz + bdy - 1) / bdy >= num_ctas_0) {
dim3 nblks(nblks_x);
dim3 nthrs(bdx, bdy);
- FLASHINFER_CUDA_CALL(cudaLaunchKernel((void*)kernel_0, nblks, nthrs, args, 0, stream));
+ LAUNCH_KERNEL_RAW(kernel_0);
} else {
dim3 nblks(nblks_x, num_qo_heads + num_kv_heads);
dim3 nthrs(bdx, bdy);
@@ -417,11 +452,12 @@ cudaError_t BatchQKApplyRotaryPosIdsCosSinCacheEnhanced(
bdx,
DType,
IdType>;
- FLASHINFER_CUDA_CALL(cudaLaunchKernel((void*)kernel_1, nblks, nthrs, args, 0, stream));
+ LAUNCH_KERNEL_RAW(kernel_1);
}
});
});
});
+#undef LAUNCH_KERNEL_RAW
return cudaSuccess;
}
diff --git a/sgl-kernel/csrc/elementwise/rope.cu b/sgl-kernel/csrc/elementwise/rope.cu
index 41cad7dd418..041558f61e0 100644
--- a/sgl-kernel/csrc/elementwise/rope.cu
+++ b/sgl-kernel/csrc/elementwise/rope.cu
@@ -27,6 +27,7 @@ void apply_rope_pos_ids_cos_sin_cache(
at::Tensor cos_sin_cache,
at::Tensor pos_ids,
bool interleave,
+ bool enable_pdl,
int64_t cuda_stream,
const std::optional& v,
const std::optional& k_buffer,
@@ -124,12 +125,14 @@ void apply_rope_pos_ids_cos_sin_cache(
kv_cache_loc_ptr,
interleave,
save_kv_cache,
+ enable_pdl,
stream);
TORCH_CHECK(
status == cudaSuccess,
"BatchQKApplyRotaryPosIdsCosSinCacheEnhanced failed with error code " +
std::string(cudaGetErrorString(status)));
} else {
+ TORCH_CHECK(!enable_pdl);
cudaError_t status = BatchQKApplyRotaryPosIdsCosSinCache(
static_cast(q.data_ptr()),
static_cast(k.data_ptr()),
diff --git a/sgl-kernel/csrc/gemm/dsv3_fused_a_gemm.cu b/sgl-kernel/csrc/gemm/dsv3_fused_a_gemm.cu
index 28dcaaee14d..37aff1b9a85 100644
--- a/sgl-kernel/csrc/gemm/dsv3_fused_a_gemm.cu
+++ b/sgl-kernel/csrc/gemm/dsv3_fused_a_gemm.cu
@@ -131,6 +131,7 @@ __device__ bool try_wait_barrier(uint64_t* smem_ptr, int phase_bit) {
: "r"(smem_int_ptr), "r"(phase_bit));
return static_cast(wait_complete);
#endif
+ return false;
}
// Barrier arrive
diff --git a/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu b/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu
index e69167a4d29..b8b23c42746 100644
--- a/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu
+++ b/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu
@@ -195,6 +195,176 @@ void sm100_fp8_blockwise_dispatch_shape(
}
}
+template <
+ typename OutType,
+ typename MmaTileShape,
+ typename PerSmTileShape,
+ typename EpilogueTileShape,
+ typename ScalesPerTile,
+ int TileSizeM_ = 128,
+ class ClusterShape = Shape<_1, _1, _1>>
+void launch_sm120_fp8_blockwise_scaled_mm(
+ torch::Tensor& out,
+ const torch::Tensor& a,
+ const torch::Tensor& b,
+ const torch::Tensor& scales_a,
+ const torch::Tensor& scales_b) {
+ using ElementBlockScale = float;
+
+ // A matrix configuration
+ using ElementA = cutlass::float_e4m3_t; // Element type for A matrix operand
+ using LayoutATag = cutlass::layout::RowMajor; // Layout type for A matrix operand
+ constexpr int AlignmentA =
+ 128 / cutlass::sizeof_bits::value; // Memory access granularity/alignment of A matrix in units of
+ // elements (up to 16 bytes)
+
+ // B matrix configuration
+ using ElementB = cutlass::float_e4m3_t; // Element type for B matrix operand
+ using LayoutBTag = cutlass::layout::ColumnMajor; // Layout type for B matrix operand
+ constexpr int AlignmentB =
+ 128 / cutlass::sizeof_bits::value; // Memory access granularity/alignment of B matrix in units of
+ // elements (up to 16 bytes)
+
+ // C/D matrix configuration
+ using ElementD = OutType; // Element type for D matrix operand
+ using ElementC = void; // Element type for C matrix operand
+ using LayoutCTag = cutlass::layout::RowMajor; // Layout type for C matrix operand
+ using LayoutDTag = cutlass::layout::RowMajor; // Layout type for D matrix operand
+ constexpr int AlignmentD =
+ 128 / cutlass::sizeof_bits::value; // Memory access granularity/alignment of C matrix in units of
+ // elements (up to 16 bytes)
+ constexpr int AlignmentC =
+ AlignmentD; // Memory access granularity/alignment of C matrix in units of elements (up to 16 bytes)
+
+ // Kernel functional config
+ using ElementAccumulator = float; // Element type for internal accumulation
+ using ArchTag = cutlass::arch::Sm120; // Tag indicating the minimum SM that supports the intended feature
+ using OperatorClass = cutlass::arch::OpClassTensorOp; // Operator class tag - changed from OpClassBlockScaledTensorOp
+
+ static constexpr int ScaleMsPerTile = size<0>(ScalesPerTile{});
+ static constexpr int ScaleGranularityM = size<0>(MmaTileShape{}) / ScaleMsPerTile;
+ static constexpr int ScaleGranularityN = size<1>(MmaTileShape{}) / size<1>(ScalesPerTile{});
+ static constexpr int ScaleGranularityK = size<2>(MmaTileShape{}) / size<2>(ScalesPerTile{});
+
+ using ScaleConfig = cutlass::detail::Sm120BlockwiseScaleConfig<
+ ScaleGranularityM,
+ ScaleGranularityN,
+ ScaleGranularityK,
+ cute::UMMA::Major::MN,
+ cute::UMMA::Major::K>;
+ // FP8 Block-wise scaling configuration
+ using LayoutSFA = decltype(ScaleConfig::deduce_layoutSFA()); // Layout type for SFA matrix operand
+ using LayoutSFB = decltype(ScaleConfig::deduce_layoutSFB()); // Layout type for SFB matrix operand
+
+ using CollectiveEpilogue = typename cutlass::epilogue::collective::CollectiveBuilder<
+ ArchTag,
+ OperatorClass,
+ PerSmTileShape,
+ ClusterShape,
+ cutlass::epilogue::collective::EpilogueTileAuto,
+ ElementAccumulator,
+ ElementAccumulator,
+ ElementC,
+ LayoutCTag,
+ AlignmentC,
+ ElementD,
+ LayoutDTag,
+ AlignmentD,
+ cutlass::epilogue::collective::EpilogueScheduleAuto // Epilogue schedule policy
+ >::CollectiveOp;
+
+ using CollectiveMainloop = typename cutlass::gemm::collective::CollectiveBuilder<
+ ArchTag,
+ OperatorClass,
+ ElementA,
+ cute::tuple,
+ AlignmentA,
+ ElementB,
+ cute::tuple,
+ AlignmentB,
+ ElementAccumulator,
+ MmaTileShape,
+ ClusterShape,
+ cutlass::gemm::collective::StageCountAutoCarveout(
+ sizeof(typename CollectiveEpilogue::SharedStorage))>,
+ cutlass::gemm::collective::KernelScheduleAuto // Kernel schedule policy. Auto defaults to cooperative kernel
+ // schedule
+ >::CollectiveOp;
+
+ using GemmKernel = cutlass::gemm::kernel::GemmUniversal<
+ Shape, // Indicates ProblemShape
+ CollectiveMainloop,
+ CollectiveEpilogue,
+ void>;
+
+ using Gemm = cutlass::gemm::device::GemmUniversalAdapter;
+
+ Gemm gemm_op;
+
+ int m = a.size(0);
+ int k = a.size(1);
+ int n = b.size(1);
+
+ auto a_ptr = static_cast(a.data_ptr());
+ auto b_ptr = static_cast(b.data_ptr());
+ auto c_ptr = static_cast(out.data_ptr());
+
+ auto scales_a_ptr = static_cast(scales_a.data_ptr());
+ auto scales_b_ptr = static_cast(scales_b.data_ptr());
+
+ using StrideA = typename Gemm::GemmKernel::StrideA;
+ using StrideB = typename Gemm::GemmKernel::StrideB;
+ using StrideD = typename Gemm::GemmKernel::StrideD;
+ using StrideC = typename Gemm::GemmKernel::StrideD;
+
+ StrideA stride_a = cutlass::make_cute_packed_stride(StrideA{}, cute::make_shape(m, k, 1));
+ StrideB stride_b = cutlass::make_cute_packed_stride(StrideB{}, cute::make_shape(n, k, 1));
+ StrideC stride_c = cutlass::make_cute_packed_stride(StrideC{}, cute::make_shape(m, n, 1));
+ LayoutSFA layout_SFA = ScaleConfig::tile_atom_to_shape_SFA(make_shape(m, n, k, 1));
+ LayoutSFB layout_SFB = ScaleConfig::tile_atom_to_shape_SFB(make_shape(m, n, k, 1));
+
+ typename GemmKernel::MainloopArguments mainloop_args{
+ a_ptr, stride_a, b_ptr, stride_b, scales_a_ptr, layout_SFA, scales_b_ptr, layout_SFB};
+
+ typename GemmKernel::EpilogueArguments epilogue_args{{}, c_ptr, stride_c, c_ptr, stride_c};
+ epilogue_args.thread.alpha = 1.0f;
+
+ typename Gemm::Arguments args = {
+ cutlass::gemm::GemmUniversalMode::kGemm,
+ {m, n, k, 1},
+ mainloop_args,
+ epilogue_args,
+ };
+
+ auto can_implement = gemm_op.can_implement(args);
+ TORCH_CHECK(can_implement == cutlass::Status::kSuccess, cutlassGetStatusString(can_implement))
+
+ size_t workspace_size = gemm_op.get_workspace_size(args);
+ cutlass::device_memory::allocation workspace(workspace_size);
+
+ auto init_status = gemm_op.initialize(args, workspace.get());
+ TORCH_CHECK(init_status == cutlass::Status::kSuccess, cutlassGetStatusString(init_status));
+
+ auto stream = at::cuda::getCurrentCUDAStream(a.get_device());
+ auto status = gemm_op.run(stream);
+ TORCH_CHECK(status == cutlass::Status::kSuccess, cutlassGetStatusString(status))
+}
+
+template
+void sm120_fp8_blockwise_dispatch_shape(
+ torch::Tensor& out,
+ const torch::Tensor& a,
+ const torch::Tensor& b,
+ const torch::Tensor& scales_a,
+ const torch::Tensor& scales_b) {
+ using MmaTileShape = Shape<_128, _128, _128>;
+ using PerSmTileShape = Shape<_128, _128, _128>;
+ using EpilogueTileShape = Shape<_128, _64>;
+ using ScalesPerTile = Shape<_128, _1, _1>;
+ launch_sm120_fp8_blockwise_scaled_mm(
+ out, a, b, scales_a, scales_b);
+}
+
torch::Tensor fp8_blockwise_scaled_mm(
const torch::Tensor& mat_a,
const torch::Tensor& mat_b,
@@ -260,7 +430,11 @@ torch::Tensor fp8_blockwise_scaled_mm(
#if defined(CUTLASS_ARCH_MMA_SM100A_SUPPORTED) || defined(CUTLASS_ARCH_MMA_SM100_SUPPORTED)
#if defined CUDA_VERSION && CUDA_VERSION >= 12080
- if (sm_version == 100) {
+ if (sm_version == 100
+#if CUDA_VERSION >= 12090
+ || sm_version == 103
+#endif
+ ) {
if (out_dtype == torch::kBFloat16) {
sm100_fp8_blockwise_dispatch_shape(
out_padded, mat_a_padded, mat_b, scales_a_padded, scales_b);
@@ -271,6 +445,21 @@ torch::Tensor fp8_blockwise_scaled_mm(
}
#endif
#endif
+
+#if defined(CUTLASS_ARCH_MMA_SM120A_SUPPORTED) || defined(CUTLASS_ARCH_MMA_SM120_SUPPORTED)
+#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080
+ if (sm_version == 120) {
+ if (out_dtype == torch::kBFloat16) {
+ sm120_fp8_blockwise_dispatch_shape(
+ out_padded, mat_a_padded, mat_b, scales_a_padded, scales_b);
+ } else {
+ sm120_fp8_blockwise_dispatch_shape(out_padded, mat_a_padded, mat_b, scales_a_padded, scales_b);
+ }
+ return out_padded.slice(0, 0, original_rows);
+ }
+#endif
+#endif
+
TORCH_CHECK_NOT_IMPLEMENTED(
false, "No implemented fp8_blockwise_scaled_mm for current compute capability: ", sm_version);
}
diff --git a/sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu b/sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu
index 77b5c500f04..0a9e6b7a535 100644
--- a/sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu
+++ b/sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu
@@ -1212,7 +1212,11 @@ torch::Tensor fp8_scaled_mm(
auto sm_version = getSMVersion();
#if defined CUDA_VERSION && CUDA_VERSION >= 12080
- if (sm_version >= 100) {
+ if (sm_version == 100
+#if CUDA_VERSION >= 12090
+ || sm_version == 103
+#endif
+ ) {
if (out_dtype == torch::kBFloat16) {
sm100_fp8_dispatch_shape(out, mat_a, mat_b, scales_a, scales_b, bias);
} else {
diff --git a/sgl-kernel/csrc/gemm/nvfp4_expert_quant.cu b/sgl-kernel/csrc/gemm/nvfp4_expert_quant.cu
index af52196f662..e18f2057bab 100644
--- a/sgl-kernel/csrc/gemm/nvfp4_expert_quant.cu
+++ b/sgl-kernel/csrc/gemm/nvfp4_expert_quant.cu
@@ -1,169 +1,11 @@
#include
#include
-#include
#include
+#include
#include
-template
-struct TypeConverter {
- using Type = half2;
-}; // keep for generality
-
-template <>
-struct TypeConverter {
- using Type = half;
-};
-
-template <>
-struct TypeConverter {
- using Type = half2;
-};
-
-template <>
-struct TypeConverter<__nv_bfloat162> {
- using Type = __nv_bfloat16;
-};
-
-template <>
-struct TypeConverter<__nv_bfloat16> {
- using Type = __nv_bfloat162;
-};
-
-#define ELTS_PER_THREAD 8
-
-constexpr int CVT_FP4_ELTS_PER_THREAD = 8;
-constexpr int CVT_FP4_SF_VEC_SIZE = 16;
-
-// Convert 8 float32 values into 8 e2m1 values (represented as one uint32_t).
-inline __device__ uint32_t fp32_vec_to_e2m1(float (&array)[8]) {
- // PTX instructions used here requires sm100a.
-#if CUDA_VERSION >= 12080
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000) && __CUDA_ARCH_HAS_FEATURE__(SM100_ALL)
- uint32_t val;
- asm volatile(
- "{\n"
- ".reg .b8 byte0;\n"
- ".reg .b8 byte1;\n"
- ".reg .b8 byte2;\n"
- ".reg .b8 byte3;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte0, %2, %1;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte1, %4, %3;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte2, %6, %5;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte3, %8, %7;\n"
- "mov.b32 %0, {byte0, byte1, byte2, byte3};\n"
- "}"
- : "=r"(val)
- : "f"(array[0]),
- "f"(array[1]),
- "f"(array[2]),
- "f"(array[3]),
- "f"(array[4]),
- "f"(array[5]),
- "f"(array[6]),
- "f"(array[7]));
- return val;
-#else
- return 0;
-#endif
-#endif
-}
-
-// Convert 4 float2 values into 8 e2m1 values (represented as one uint32_t).
-inline __device__ uint32_t fp32_vec_to_e2m1(float2 (&array)[4]) {
- // PTX instructions used here requires sm100a.
-#if CUDA_VERSION >= 12080
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000) && __CUDA_ARCH_HAS_FEATURE__(SM100_ALL)
- uint32_t val;
- asm volatile(
- "{\n"
- ".reg .b8 byte0;\n"
- ".reg .b8 byte1;\n"
- ".reg .b8 byte2;\n"
- ".reg .b8 byte3;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte0, %2, %1;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte1, %4, %3;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte2, %6, %5;\n"
- "cvt.rn.satfinite.e2m1x2.f32 byte3, %8, %7;\n"
- "mov.b32 %0, {byte0, byte1, byte2, byte3};\n"
- "}"
- : "=r"(val)
- : "f"(array[0].x),
- "f"(array[0].y),
- "f"(array[1].x),
- "f"(array[1].y),
- "f"(array[2].x),
- "f"(array[2].y),
- "f"(array[3].x),
- "f"(array[3].y));
- return val;
-#else
- return 0;
-#endif
-#endif
-}
-
-// Fast reciprocal.
-inline __device__ float reciprocal_approximate_ftz(float a) {
- float b;
- asm volatile("rcp.approx.ftz.f32 %0, %1;\n" : "=f"(b) : "f"(a));
- return b;
-}
-
-template
-__device__ uint8_t* cvt_quant_to_fp4_get_sf_out_offset(int rowIdx, int colIdx, int numCols, SFType* SFout) {
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000)
- static_assert(CVT_FP4_NUM_THREADS_PER_SF == 1 || CVT_FP4_NUM_THREADS_PER_SF == 2);
-
- // One pair of threads write one SF to global memory.
- // TODO: stage through smem for packed STG.32
- // is it better than STG.8 from 4 threads ?
- if (threadIdx.x % CVT_FP4_NUM_THREADS_PER_SF == 0) {
- // SF vector index (16 elements share one SF in the K dimension).
- int32_t kIdx = colIdx / CVT_FP4_NUM_THREADS_PER_SF;
- int32_t mIdx = rowIdx;
-
- // SF layout [numMTiles, numKTiles, 32 (mTile), 4 (mTile), 4(kTile)]
- // --> index [mTileIdx, kTileIdx, outerMIdx, innerMIdx, innerKIdx]
-
- int32_t mTileIdx = mIdx / (32 * 4);
- // SF vector size 16.
- int factor = CVT_FP4_SF_VEC_SIZE * 4;
- int32_t numKTiles = (numCols + factor - 1) / factor;
- int64_t mTileStride = numKTiles * 32 * 4 * 4;
-
- int32_t kTileIdx = (kIdx / 4);
- int64_t kTileStride = 32 * 4 * 4;
-
- // M tile layout [32, 4] is column-major.
- int32_t outerMIdx = (mIdx % 32);
- int64_t outerMStride = 4 * 4;
-
- int32_t innerMIdx = (mIdx % (32 * 4)) / 32;
- int64_t innerMStride = 4;
-
- int32_t innerKIdx = (kIdx % 4);
- int64_t innerKStride = 1;
-
- // Compute the global offset.
- int64_t SFOffset = mTileIdx * mTileStride + kTileIdx * kTileStride + outerMIdx * outerMStride +
- innerMIdx * innerMStride + innerKIdx * innerKStride;
-
- return reinterpret_cast(SFout) + SFOffset;
- }
-#endif
- return nullptr;
-}
-
-// Define a 16 bytes packed data type.
-template
-struct PackedVec {
- typename TypeConverter::Type elts[4];
-};
-
-template <>
-struct PackedVec<__nv_fp8_e4m3> {
- __nv_fp8x2_e4m3 elts[8];
-};
+#include "nvfp4_quant.cuh"
+#include "utils.h"
// Quantizes the provided PackedVec into the uint32_t output
template
@@ -239,6 +81,33 @@ __device__ uint32_t cvt_warp_fp16_to_fp4(PackedVec& vec, float SFScaleVal,
#endif
}
+__device__ __forceinline__ float silu(const float& val) {
+ return val / (1.0f + __expf(-val));
+}
+
+template
+inline __device__ void silu_and_mul(PackedVec& x_vec, const PackedVec& y_vec) {
+ float2 x[CVT_FP4_ELTS_PER_THREAD / 2];
+ float2 y[CVT_FP4_ELTS_PER_THREAD / 2];
+
+#pragma unroll
+ for (int i = 0; i < CVT_FP4_ELTS_PER_THREAD / 2; i++) {
+ if constexpr (std::is_same_v) {
+ x[i] = __half22float2(x_vec.elts[i]);
+ y[i] = __half22float2(y_vec.elts[i]);
+ x[i].x = silu(x[i].x) * y[i].x;
+ x[i].y = silu(x[i].y) * y[i].y;
+ x_vec.elts[i] = __float22half2_rn(x[i]);
+ } else {
+ x[i] = __bfloat1622float2(x_vec.elts[i]);
+ y[i] = __bfloat1622float2(y_vec.elts[i]);
+ x[i].x = silu(x[i].x) * y[i].x;
+ x[i].y = silu(x[i].y) * y[i].y;
+ x_vec.elts[i] = __float22bfloat162_rn(x[i]);
+ }
+ }
+}
+
// Use UE4M3 by default.
template
__global__ void
@@ -255,6 +124,7 @@ cvt_fp16_to_fp4(
uint32_t* SFout,
uint32_t* input_offset_by_experts,
uint32_t* output_scale_offset_by_experts,
+ int32_t* mask,
int n_experts,
bool low_latency) {
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000)
@@ -265,6 +135,11 @@ cvt_fp16_to_fp4(
// Input tensor row/col loops.
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int colsPerRow = numCols / CVT_FP4_ELTS_PER_THREAD;
+ // TODO(kaixih@nvidia): For now, we assume mask is used together with
+ // silu_and_mal. Maybe we want a more general behavior of mask later. In the
+ // silu case, the input last dim doubles.
+ bool use_mask = mask != nullptr;
+ int actualColsPerRow = use_mask ? colsPerRow * 2 : colsPerRow;
// Each global thread processes one element
for (int globalIdx = tid; globalIdx < numRows * colsPerRow; globalIdx += gridDim.x * blockDim.x) {
@@ -272,13 +147,6 @@ cvt_fp16_to_fp4(
int rowIdx = globalIdx / colsPerRow;
int colIdx = globalIdx % colsPerRow;
- int64_t inOffset = rowIdx * colsPerRow + colIdx;
- PackedVec in_vec = reinterpret_cast(in)[inOffset];
- // Get the output tensor offset.
- // Same as inOffset because 8 elements are packed into one uint32_t.
- int64_t outOffset = inOffset;
- auto& out_pos = out[outOffset];
-
// Find index within the experts using different strategies based on expert
// count
int rowIdx_in_expert = 0;
@@ -321,6 +189,23 @@ cvt_fp16_to_fp4(
}
}
+ // Early exit when using masks.
+ if (use_mask && rowIdx_in_expert >= mask[expert_idx]) {
+ continue;
+ }
+
+ int64_t inOffset = rowIdx * actualColsPerRow + colIdx;
+ PackedVec in_vec = reinterpret_cast(in)[inOffset];
+ if (use_mask) {
+ PackedVec in_vec_mul = reinterpret_cast(in)[inOffset + colsPerRow];
+ silu_and_mul(in_vec, in_vec_mul);
+ }
+
+ // Get the output tensor offset.
+ // Same as inOffset because 8 elements are packed into one uint32_t.
+ int64_t outOffset = rowIdx * colsPerRow + colIdx;
+ auto& out_pos = out[outOffset];
+
// Get the global scaling factor, which will be applied to the SF.
// Note SFScale is the same as next GEMM's alpha, which is
// (448.f / (Alpha_A / 6.f)).
@@ -340,6 +225,107 @@ cvt_fp16_to_fp4(
#endif
}
+// Use UE4M3 by default.
+template
+__global__ void
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000)
+__launch_bounds__(512, 4) cvt_fp16_to_fp4_expert(
+#else
+cvt_fp16_to_fp4_expert(
+#endif
+ int32_t numRows,
+ int32_t numCols,
+ Type const* in,
+ float const* SFScale,
+ uint32_t* out,
+ uint32_t* SFout,
+ int32_t* mask,
+ bool use_silu_and_mul,
+ int n_experts) {
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000)
+ using PackedVec = PackedVec;
+ static constexpr int CVT_FP4_NUM_THREADS_PER_SF = (CVT_FP4_SF_VEC_SIZE / CVT_FP4_ELTS_PER_THREAD);
+ static_assert(sizeof(PackedVec) == sizeof(Type) * CVT_FP4_ELTS_PER_THREAD, "Vec size is not matched.");
+
+ // Input tensor row/col loops.
+ int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ int stride = (gridDim.x * blockDim.x) / n_experts;
+ int remainder = (gridDim.x * blockDim.x) % n_experts;
+ int expert_idx;
+ int tid_in_expert;
+ int actual_stride;
+ if (remainder > 0) {
+ int bound = remainder * (stride + 1);
+ if (tid < bound) {
+ expert_idx = tid / (stride + 1);
+ tid_in_expert = tid % (stride + 1);
+ actual_stride = stride + 1;
+ } else {
+ expert_idx = remainder + (tid - bound) / stride;
+ tid_in_expert = (tid - bound) % stride;
+ actual_stride = stride;
+ }
+ } else {
+ expert_idx = tid / stride;
+ tid_in_expert = tid % stride;
+ actual_stride = stride;
+ }
+ int m = numRows / n_experts;
+ int padded_m = (m + (128 - 1)) / 128 * 128;
+
+ int colsPerRow = numCols / CVT_FP4_ELTS_PER_THREAD;
+ // TODO(kaixih@nvidia): For now, we assume mask is used together with
+ // silu_and_mal. Maybe we want a more general behavior of mask later. In the
+ // silu case, the input last dim doubles.
+ bool use_mask = mask != nullptr;
+ int actualColsPerRow = use_silu_and_mul ? colsPerRow * 2 : colsPerRow;
+
+ // Each global thread processes one element
+ for (int globalIdx = tid_in_expert + expert_idx * m * colsPerRow; globalIdx < (expert_idx + 1) * m * colsPerRow;
+ globalIdx += actual_stride) {
+ // Calculate which row and column this global thread should process
+ int rowIdx = globalIdx / colsPerRow;
+ int colIdx = globalIdx % colsPerRow;
+
+ // Find index within the experts
+ int rowIdx_in_expert = rowIdx - expert_idx * m;
+
+ // Early exit when using masks.
+ if (use_mask && rowIdx_in_expert >= mask[expert_idx]) {
+ break;
+ }
+
+ int64_t inOffset = rowIdx * actualColsPerRow + colIdx;
+ PackedVec in_vec = reinterpret_cast(in)[inOffset];
+ if (use_silu_and_mul) {
+ PackedVec in_vec_mul = reinterpret_cast(in)[inOffset + colsPerRow];
+ silu_and_mul(in_vec, in_vec_mul);
+ }
+
+ // Get the output tensor offset.
+ // Same as inOffset because 8 elements are packed into one uint32_t.
+ int64_t outOffset = rowIdx * colsPerRow + colIdx;
+ auto& out_pos = out[outOffset];
+
+ // Get the global scaling factor, which will be applied to the SF.
+ // Note SFScale is the same as next GEMM's alpha, which is
+ // (448.f / (Alpha_A / 6.f)).
+ float const SFScaleVal = SFScale == nullptr ? 1.0f : SFScale[expert_idx];
+
+ int factor = CVT_FP4_SF_VEC_SIZE * 4;
+ // The actual output_scales dim is computed from the padded numCols.
+ int32_t numCols_padded = (numCols + factor - 1) / factor * factor;
+ int numCols_SFout = numCols_padded / CVT_FP4_SF_VEC_SIZE / 4;
+ uint32_t* SFout_in_expert = SFout + expert_idx * padded_m * numCols_SFout;
+
+ auto sf_out = cvt_quant_to_fp4_get_sf_out_offset(
+ rowIdx_in_expert, colIdx, numCols, SFout_in_expert);
+
+ out_pos = cvt_warp_fp16_to_fp4(in_vec, SFScaleVal, sf_out);
+ }
+#endif
+}
+
// Kernel for LARGE_M_TOPK = true (large m_topk optimized version)
template
__global__ void
@@ -356,6 +342,7 @@ cvt_fp16_to_fp4(
uint32_t* SFout,
uint32_t* input_offset_by_experts,
uint32_t* output_scale_offset_by_experts,
+ int32_t* mask,
int n_experts) {
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 1000)
using PackedVec = PackedVec;
@@ -383,6 +370,8 @@ cvt_fp16_to_fp4(
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int colsPerRow = numCols / CVT_FP4_ELTS_PER_THREAD;
+ bool use_mask = mask != nullptr;
+ int actualColsPerRow = use_mask ? colsPerRow * 2 : colsPerRow;
// Each global thread processes one element
for (int globalIdx = tid; globalIdx < numRows * colsPerRow; globalIdx += gridDim.x * blockDim.x) {
@@ -390,11 +379,6 @@ cvt_fp16_to_fp4(
int rowIdx = globalIdx / colsPerRow;
int colIdx = globalIdx % colsPerRow;
- int64_t inOffset = rowIdx * colsPerRow + colIdx;
- PackedVec in_vec = reinterpret_cast(in)[inOffset];
- int64_t outOffset = inOffset;
- auto& out_pos = out[outOffset];
-
// Find expert using binary search for better performance with large m_topk
int rowIdx_in_expert = 0;
int expert_idx = 0;
@@ -419,6 +403,21 @@ cvt_fp16_to_fp4(
}
}
+ if (use_mask && rowIdx_in_expert >= mask[expert_idx]) {
+ continue;
+ }
+
+ int64_t inOffset = rowIdx * actualColsPerRow + colIdx;
+
+ PackedVec in_vec = reinterpret_cast(in)[inOffset];
+ if (use_mask) {
+ PackedVec in_vec_mul = reinterpret_cast(in)[inOffset + colsPerRow];
+ silu_and_mul(in_vec, in_vec_mul);
+ }
+
+ int64_t outOffset = rowIdx * colsPerRow + colIdx;
+ auto& out_pos = out[outOffset];
+
float const SFScaleVal = SFScale == nullptr ? 1.0f : SFScale[expert_idx];
int factor = CVT_FP4_SF_VEC_SIZE * 4;
@@ -442,6 +441,8 @@ void quant_impl(
void* input_global_scale,
void* input_offset_by_experts,
void* output_scale_offset_by_experts,
+ void* mask,
+ bool use_silu_and_mul,
int m_topk,
int k,
int n_experts,
@@ -465,6 +466,22 @@ void quant_impl(
block.x = (block.x + 1) / 2;
}
+ // TODO(kaixih@nvidia): Should relax this to allow any grid size.
+ if (mask != nullptr) {
+ grid.x = (grid.x + n_experts - 1) / n_experts * n_experts;
+ cvt_fp16_to_fp4_expert<<>>(
+ m_topk,
+ k,
+ reinterpret_cast(input),
+ reinterpret_cast(input_global_scale),
+ reinterpret_cast(output),
+ reinterpret_cast(output_scale),
+ reinterpret_cast(mask),
+ use_silu_and_mul,
+ n_experts);
+ return;
+ }
+
int const blockRepeat = (totalWorkSize + block.x * grid.x - 1) / (block.x * grid.x);
if (blockRepeat > 1) {
size_t shared_mem_size = (n_experts + 1) * sizeof(uint32_t);
@@ -478,6 +495,7 @@ void quant_impl(
reinterpret_cast(output_scale),
reinterpret_cast(input_offset_by_experts),
reinterpret_cast(output_scale_offset_by_experts),
+ reinterpret_cast(mask),
n_experts);
} else {
cvt_fp16_to_fp4<<>>(
@@ -489,6 +507,7 @@ void quant_impl(
reinterpret_cast(output_scale),
reinterpret_cast(input_offset_by_experts),
reinterpret_cast(output_scale_offset_by_experts),
+ reinterpret_cast(mask),
n_experts);
}
} else {
@@ -502,6 +521,7 @@ void quant_impl(
reinterpret_cast(output_scale),
reinterpret_cast(input_offset_by_experts),
reinterpret_cast