Skip to content

Commit 0c7c630

Browse files
committed
Update dpskv4 GB300 MTP SGLang image to nightly-20260518 and clean env vars
1 parent 7f60bc7 commit 0c7c630

7 files changed

Lines changed: 35 additions & 95 deletions

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,14 +31,12 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
34+
SGLANG_RADIX_FORCE_MISS: "1"
3535
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3636
SGLANG_DEFAULT_THINKING: "1"
3737
SGLANG_DSV4_REASONING_EFFORT: "max"
3838
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
39+
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
4240
NCCL_MNNVL_ENABLE: "1"
4341
NCCL_CUMEM_ENABLE: "1"
4442
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
@@ -49,14 +47,12 @@ backend:
4947

5048
decode_environment:
5149
PYTHONUNBUFFERED: "1"
52-
SGLANG_RADIX_DISABLE_REUSE: "1"
50+
SGLANG_RADIX_FORCE_MISS: "1"
5351
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
5452
SGLANG_DEFAULT_THINKING: "1"
5553
SGLANG_DSV4_REASONING_EFFORT: "max"
5654
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
57-
SGLANG_OPT_USE_JIT_NORM: "1"
58-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
59-
SGLANG_OPT_USE_TOPK_V2: "1"
55+
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
6056
NCCL_MNNVL_ENABLE: "1"
6157
NCCL_CUMEM_ENABLE: "1"
6258
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,23 +31,16 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
34+
SGLANG_RADIX_FORCE_MISS: "1"
3535
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3636
SGLANG_DEFAULT_THINKING: "1"
3737
SGLANG_DSV4_REASONING_EFFORT: "max"
3838
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
42-
4339
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
44-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
45-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
40+
4641
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
47-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
4842
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
4943
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
50-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5144
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5245

5346
NCCL_MNNVL_ENABLE: "1"
@@ -60,14 +53,11 @@ backend:
6053

6154
decode_environment:
6255
PYTHONUNBUFFERED: "1"
63-
SGLANG_RADIX_DISABLE_REUSE: "1"
56+
SGLANG_RADIX_FORCE_MISS: "1"
6457
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6558
SGLANG_DEFAULT_THINKING: "1"
6659
SGLANG_DSV4_REASONING_EFFORT: "max"
6760
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
68-
SGLANG_OPT_USE_JIT_NORM: "1"
69-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
70-
SGLANG_OPT_USE_TOPK_V2: "1"
7161
NCCL_MNNVL_ENABLE: "1"
7262
NCCL_CUMEM_ENABLE: "1"
7363
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
44-
4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
42+
4843
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5145
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5346
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5447

5548
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +55,16 @@ backend:
6255

6356
decode_environment:
6457
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
58+
SGLANG_RADIX_FORCE_MISS: "1"
6659
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6760
SGLANG_DEFAULT_THINKING: "1"
6861
SGLANG_DSV4_REASONING_EFFORT: "max"
6962
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
73-
7463
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
64+
7665
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7866
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
7967
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8168
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8269

8370
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
44-
4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
42+
4843
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5145
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5346
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5447

5548
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +55,16 @@ backend:
6255

6356
decode_environment:
6457
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
58+
SGLANG_RADIX_FORCE_MISS: "1"
6659
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6760
SGLANG_DEFAULT_THINKING: "1"
6861
SGLANG_DSV4_REASONING_EFFORT: "max"
6962
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
73-
7463
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
64+
7665
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7866
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
7967
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8168
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8269

8370
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
44-
4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
42+
4843
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5145
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5346
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5447

5548
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +55,16 @@ backend:
6255

6356
decode_environment:
6457
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
58+
SGLANG_RADIX_FORCE_MISS: "1"
6659
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6760
SGLANG_DEFAULT_THINKING: "1"
6861
SGLANG_DSV4_REASONING_EFFORT: "max"
6962
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
73-
7463
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
64+
7665
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7866
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
7967
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8168
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8269

8370
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
44-
4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
42+
4843
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5145
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5346
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5447

5548
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +55,16 @@ backend:
6255

6356
decode_environment:
6457
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
58+
SGLANG_RADIX_FORCE_MISS: "1"
6659
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6760
SGLANG_DEFAULT_THINKING: "1"
6861
SGLANG_DSV4_REASONING_EFFORT: "max"
6962
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
73-
7463
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
64+
7665
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7866
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
7967
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8168
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8269

8370
NCCL_MNNVL_ENABLE: "1"

perf-changelog.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,3 +2741,9 @@
27412741
description:
27422742
- "Update vLLM image from v0.19.0-cu130 (26d old) to v0.21.0"
27432743
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1454
2744+
2745+
- config-keys:
2746+
- dsv4-fp4-gb300-dynamo-sglang-mtp
2747+
description:
2748+
- "Update SGLang image to nightly-20260518 and remove obsolete env vars for MTP configs"
2749+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1478

0 commit comments

Comments
 (0)