Skip to content

Commit 12c9d3c

Browse files
committed
Restore dpskv4 GB300 non-MTP disagg to staging image + deepep backend
1 parent 4f63034 commit 12c9d3c

8 files changed

Lines changed: 160 additions & 73 deletions

.github/configs/nvidia-master.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8759,7 +8759,7 @@ dsv4-fp4-gb300-dynamo-vllm:
87598759
dp-attn: true
87608760

87618761
dsv4-fp4-gb300-dynamo-sglang:
8762-
image: lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647
8762+
image: lmsysorg/sglang-staging:deepseek-v4-grace-blackwell-dev
87638763
model: deepseek-ai/DeepSeek-V4-Pro
87648764
model-prefix: dsv4
87658765
runner: gb300-cw

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-10p1d-dep4-dep16-14-c8192.yaml

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-10p1d-dep4-dep16-14-c8192"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang-staging:deepseek-v4-grace-blackwell-dev"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -74,14 +74,24 @@ backend:
7474

7575
prefill_environment:
7676
PYTHONUNBUFFERED: "1"
77-
SGLANG_RADIX_FORCE_MISS: "1"
77+
SGLANG_RADIX_DISABLE_REUSE: "1"
7878
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
79-
SGLANG_DEFAULT_THINKING: "1"
80-
SGLANG_DSV4_REASONING_EFFORT: "max"
79+
SGLANG_ENABLE_THINKING: "1"
80+
SGLANG_REASONING_EFFORT: "max"
8181
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
8282
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
83+
SGLANG_OPT_USE_JIT_NORM: "1"
84+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
85+
SGLANG_OPT_USE_TOPK_V2: "1"
86+
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
87+
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
88+
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
89+
SGLANG_OPT_USE_FAST_MASK_EP: "1"
90+
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
8391
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "8192"
92+
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8493
SGLANG_OPT_USE_ONLINE_COMPRESS: "1"
94+
SGLANG_OPT_FP8_WO_A_GEMM: "1"
8595
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8696
NCCL_MNNVL_ENABLE: "1"
8797
NCCL_CUMEM_ENABLE: "1"
@@ -94,17 +104,24 @@ backend:
94104
SGLANG_LOG_FORWARD_ITERS: "1"
95105
SGLANG_LOG_MS: "1"
96106
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
98107

99108
decode_environment:
100109
PYTHONUNBUFFERED: "1"
101-
SGLANG_RADIX_FORCE_MISS: "1"
110+
SGLANG_RADIX_DISABLE_REUSE: "1"
102111
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
103-
SGLANG_DEFAULT_THINKING: "1"
104-
SGLANG_DSV4_REASONING_EFFORT: "max"
112+
SGLANG_ENABLE_THINKING: "1"
113+
SGLANG_REASONING_EFFORT: "max"
105114
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
106115
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
116+
SGLANG_OPT_USE_JIT_NORM: "1"
117+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
118+
SGLANG_OPT_USE_TOPK_V2: "1"
119+
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
120+
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
121+
SGLANG_OPT_USE_FAST_MASK_EP: "1"
122+
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
107123
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "1280"
124+
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
108125
SGLANG_OPT_USE_ONLINE_COMPRESS: "1"
109126
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
110127
NCCL_MNNVL_ENABLE: "1"
@@ -119,7 +136,7 @@ backend:
119136
SGLANG_LOG_FORWARD_ITERS: "1"
120137
SGLANG_LOG_MS: "1"
121138
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
139+
# SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2 intentionally NOT set: CAR_V2
123140
# is single-node only and corrupts results in 2-node decode setups.
124141

125142
sglang_config:
@@ -135,7 +152,7 @@ backend:
135152
expert-parallel-size: 4
136153

137154
enable-dp-attention: true
138-
moe-a2a-backend: "megamoe"
155+
moe-a2a-backend: "deepep"
139156
deepep-config: '{"normal_dispatch":{"num_sms":88,"num_max_nvl_chunked_send_tokens":28,"num_max_nvl_chunked_recv_tokens":512},"normal_combine": {"num_sms":88,"num_max_nvl_chunked_send_tokens":16,"num_max_nvl_chunked_recv_tokens":512}}'
140157
moe-dense-tp-size: 1
141158

@@ -154,7 +171,7 @@ backend:
154171
stream-interval: 60
155172

156173
load-balance-method: "total_requests"
157-
moe-a2a-backend: "megamoe"
174+
moe-a2a-backend: "deepep"
158175

159176
disaggregation-mode: "decode"
160177
disaggregation-transfer-backend: mooncake
@@ -179,4 +196,3 @@ benchmark:
179196
concurrencies: "8192"
180197
req_rate: "inf"
181198
use_chat_template: false
182-
custom_tokenizer: "sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-12p1d-dep4-dep12-15-c21504.yaml

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-12p1d-dep4-dep12-15-c21504"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang-staging:deepseek-v4-grace-blackwell-dev"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -74,14 +74,24 @@ backend:
7474

7575
prefill_environment:
7676
PYTHONUNBUFFERED: "1"
77-
SGLANG_RADIX_FORCE_MISS: "1"
77+
SGLANG_RADIX_DISABLE_REUSE: "1"
7878
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
79-
SGLANG_DEFAULT_THINKING: "1"
80-
SGLANG_DSV4_REASONING_EFFORT: "max"
79+
SGLANG_ENABLE_THINKING: "1"
80+
SGLANG_REASONING_EFFORT: "max"
8181
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
8282
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
83+
SGLANG_OPT_USE_JIT_NORM: "1"
84+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
85+
SGLANG_OPT_USE_TOPK_V2: "1"
86+
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
87+
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
88+
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
89+
SGLANG_OPT_USE_FAST_MASK_EP: "1"
90+
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
8391
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "8192"
92+
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8493
SGLANG_OPT_USE_ONLINE_COMPRESS: "1"
94+
SGLANG_OPT_FP8_WO_A_GEMM: "1"
8595
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8696
NCCL_MNNVL_ENABLE: "1"
8797
NCCL_CUMEM_ENABLE: "1"
@@ -94,17 +104,24 @@ backend:
94104
SGLANG_LOG_FORWARD_ITERS: "1"
95105
SGLANG_LOG_MS: "1"
96106
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
98107

99108
decode_environment:
100109
PYTHONUNBUFFERED: "1"
101-
SGLANG_RADIX_FORCE_MISS: "1"
110+
SGLANG_RADIX_DISABLE_REUSE: "1"
102111
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
103-
SGLANG_DEFAULT_THINKING: "1"
104-
SGLANG_DSV4_REASONING_EFFORT: "max"
112+
SGLANG_ENABLE_THINKING: "1"
113+
SGLANG_REASONING_EFFORT: "max"
105114
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
106115
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
116+
SGLANG_OPT_USE_JIT_NORM: "1"
117+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
118+
SGLANG_OPT_USE_TOPK_V2: "1"
119+
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
120+
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
121+
SGLANG_OPT_USE_FAST_MASK_EP: "1"
122+
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
107123
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "1280"
124+
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
108125
SGLANG_OPT_USE_ONLINE_COMPRESS: "1"
109126
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
110127
NCCL_MNNVL_ENABLE: "1"
@@ -119,7 +136,7 @@ backend:
119136
SGLANG_LOG_FORWARD_ITERS: "1"
120137
SGLANG_LOG_MS: "1"
121138
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
139+
# SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2 intentionally NOT set: CAR_V2
123140
# is single-node only and corrupts results in 2-node decode setups.
124141

125142
sglang_config:
@@ -135,7 +152,7 @@ backend:
135152
expert-parallel-size: 4
136153

137154
enable-dp-attention: true
138-
moe-a2a-backend: "megamoe"
155+
moe-a2a-backend: "deepep"
139156
deepep-config: '{"normal_dispatch":{"num_sms":88,"num_max_nvl_chunked_send_tokens":28,"num_max_nvl_chunked_recv_tokens":512},"normal_combine": {"num_sms":88,"num_max_nvl_chunked_send_tokens":16,"num_max_nvl_chunked_recv_tokens":512}}'
140157
moe-dense-tp-size: 1
141158

@@ -154,7 +171,7 @@ backend:
154171
stream-interval: 60
155172

156173
load-balance-method: "total_requests"
157-
moe-a2a-backend: "megamoe"
174+
moe-a2a-backend: "deepep"
158175

159176
disaggregation-mode: "decode"
160177
disaggregation-transfer-backend: mooncake
@@ -179,4 +196,3 @@ benchmark:
179196
concurrencies: "21504"
180197
req_rate: "inf"
181198
use_chat_template: false
182-
custom_tokenizer: "sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep16-5-c1024.yaml

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-1p1d-dep4-dep16-5-c1024"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang-staging:deepseek-v4-grace-blackwell-dev"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -74,14 +74,24 @@ backend:
7474

7575
prefill_environment:
7676
PYTHONUNBUFFERED: "1"
77-
SGLANG_RADIX_FORCE_MISS: "1"
77+
SGLANG_RADIX_DISABLE_REUSE: "1"
7878
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
79-
SGLANG_DEFAULT_THINKING: "1"
80-
SGLANG_DSV4_REASONING_EFFORT: "max"
79+
SGLANG_ENABLE_THINKING: "1"
80+
SGLANG_REASONING_EFFORT: "max"
8181
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
8282
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
83+
SGLANG_OPT_USE_JIT_NORM: "1"
84+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
85+
SGLANG_OPT_USE_TOPK_V2: "1"
86+
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
87+
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
88+
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
89+
SGLANG_OPT_USE_FAST_MASK_EP: "1"
90+
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
8391
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "8192"
92+
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8493
SGLANG_OPT_USE_ONLINE_COMPRESS: "1"
94+
SGLANG_OPT_FP8_WO_A_GEMM: "1"
8595
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8696
NCCL_MNNVL_ENABLE: "1"
8797
NCCL_CUMEM_ENABLE: "1"
@@ -94,17 +104,24 @@ backend:
94104
SGLANG_LOG_FORWARD_ITERS: "1"
95105
SGLANG_LOG_MS: "1"
96106
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
98107

99108
decode_environment:
100109
PYTHONUNBUFFERED: "1"
101-
SGLANG_RADIX_FORCE_MISS: "1"
110+
SGLANG_RADIX_DISABLE_REUSE: "1"
102111
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
103-
SGLANG_DEFAULT_THINKING: "1"
104-
SGLANG_DSV4_REASONING_EFFORT: "max"
112+
SGLANG_ENABLE_THINKING: "1"
113+
SGLANG_REASONING_EFFORT: "max"
105114
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
106115
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
116+
SGLANG_OPT_USE_JIT_NORM: "1"
117+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
118+
SGLANG_OPT_USE_TOPK_V2: "1"
119+
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
120+
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
121+
SGLANG_OPT_USE_FAST_MASK_EP: "1"
122+
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
107123
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "1280"
124+
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
108125
SGLANG_OPT_USE_ONLINE_COMPRESS: "1"
109126
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
110127
NCCL_MNNVL_ENABLE: "1"
@@ -119,7 +136,7 @@ backend:
119136
SGLANG_LOG_FORWARD_ITERS: "1"
120137
SGLANG_LOG_MS: "1"
121138
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
139+
# SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2 intentionally NOT set: CAR_V2
123140
# is single-node only and corrupts results in 2-node decode setups.
124141

125142
sglang_config:
@@ -135,7 +152,7 @@ backend:
135152
expert-parallel-size: 4
136153

137154
enable-dp-attention: true
138-
moe-a2a-backend: "megamoe"
155+
moe-a2a-backend: "deepep"
139156
deepep-config: '{"normal_dispatch":{"num_sms":88,"num_max_nvl_chunked_send_tokens":28,"num_max_nvl_chunked_recv_tokens":512},"normal_combine": {"num_sms":88,"num_max_nvl_chunked_send_tokens":16,"num_max_nvl_chunked_recv_tokens":512}}'
140157
moe-dense-tp-size: 1
141158

@@ -154,7 +171,7 @@ backend:
154171
stream-interval: 60
155172

156173
load-balance-method: "total_requests"
157-
moe-a2a-backend: "megamoe"
174+
moe-a2a-backend: "deepep"
158175

159176
disaggregation-mode: "decode"
160177
disaggregation-transfer-backend: mooncake
@@ -179,4 +196,3 @@ benchmark:
179196
concurrencies: "1024"
180197
req_rate: "inf"
181198
use_chat_template: false
182-
custom_tokenizer: "sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-tp4-tp4-2-c1.yaml

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-1p1d-tp4-tp4-2-c1"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang-staging:deepseek-v4-grace-blackwell-dev"
3737
precision: "fp4"
3838

3939
# See ../1k1k/disagg-gb200-1p1d-dep8-tep8.yaml for the dynamo pin
@@ -80,37 +80,40 @@ backend:
8080

8181
prefill_environment:
8282
PYTHONUNBUFFERED: "1"
83-
SGLANG_RADIX_FORCE_MISS: "1"
83+
SGLANG_RADIX_DISABLE_REUSE: "1"
8484
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
85-
SGLANG_DEFAULT_THINKING: "1"
86-
SGLANG_DSV4_REASONING_EFFORT: "max"
85+
SGLANG_ENABLE_THINKING: "1"
86+
SGLANG_REASONING_EFFORT: "max"
8787
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
88-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
88+
SGLANG_OPT_USE_JIT_NORM: "1"
89+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
90+
SGLANG_OPT_USE_TOPK_V2: "1"
8991
NCCL_MNNVL_ENABLE: "1"
9092
NCCL_CUMEM_ENABLE: "1"
9193
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
9294
MC_FORCE_MNNVL: "1"
9395
SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
9496
SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
9597
SGLANG_OPT_SWA_RELEASE_LEAF_LOCK_AFTER_WINDOW: "1"
96-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9798

9899
decode_environment:
99100
PYTHONUNBUFFERED: "1"
100-
SGLANG_RADIX_FORCE_MISS: "1"
101+
SGLANG_RADIX_DISABLE_REUSE: "1"
101102
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
102-
SGLANG_DEFAULT_THINKING: "1"
103-
SGLANG_DSV4_REASONING_EFFORT: "max"
103+
SGLANG_ENABLE_THINKING: "1"
104+
SGLANG_REASONING_EFFORT: "max"
104105
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
105-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
106+
SGLANG_OPT_USE_JIT_NORM: "1"
107+
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
108+
SGLANG_OPT_USE_TOPK_V2: "1"
106109
NCCL_MNNVL_ENABLE: "1"
107110
NCCL_CUMEM_ENABLE: "1"
108111
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
109112
MC_FORCE_MNNVL: "1"
110113
SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
111114
SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
112115
SGLANG_OPT_SWA_RELEASE_LEAF_LOCK_AFTER_WINDOW: "1"
113-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
116+
# SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2 intentionally NOT set: CAR_V2
114117
# is single-node only and corrupts results in 2-node decode setups.
115118

116119
sglang_config:
@@ -164,4 +167,3 @@ benchmark:
164167
concurrencies: "1"
165168
req_rate: "inf"
166169
use_chat_template: false
167-
custom_tokenizer: "sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer"

0 commit comments

Comments
 (0)