Skip to content

Commit 03c49ca

Browse files
yhyang201github-actions[bot]
authored andcommitted
Update dpskv4 GB300 MTP SGLang image to v0.5.12 and clean env vars
1 parent 0c4bf82 commit 03c49ca

7 files changed

Lines changed: 24 additions & 95 deletions

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,14 +31,11 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
34+
SGLANG_RADIX_FORCE_MISS: "1"
3535
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3636
SGLANG_DEFAULT_THINKING: "1"
3737
SGLANG_DSV4_REASONING_EFFORT: "max"
3838
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
4239
NCCL_MNNVL_ENABLE: "1"
4340
NCCL_CUMEM_ENABLE: "1"
4441
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
@@ -49,14 +46,11 @@ backend:
4946

5047
decode_environment:
5148
PYTHONUNBUFFERED: "1"
52-
SGLANG_RADIX_DISABLE_REUSE: "1"
49+
SGLANG_RADIX_FORCE_MISS: "1"
5350
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
5451
SGLANG_DEFAULT_THINKING: "1"
5552
SGLANG_DSV4_REASONING_EFFORT: "max"
5653
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
57-
SGLANG_OPT_USE_JIT_NORM: "1"
58-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
59-
SGLANG_OPT_USE_TOPK_V2: "1"
6054
NCCL_MNNVL_ENABLE: "1"
6155
NCCL_CUMEM_ENABLE: "1"
6256
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,23 +31,15 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
34+
SGLANG_RADIX_FORCE_MISS: "1"
3535
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3636
SGLANG_DEFAULT_THINKING: "1"
3737
SGLANG_DSV4_REASONING_EFFORT: "max"
3838
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
4239

43-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
44-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
45-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4640
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
47-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
4841
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
4942
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
50-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5143
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5244

5345
NCCL_MNNVL_ENABLE: "1"
@@ -60,14 +52,11 @@ backend:
6052

6153
decode_environment:
6254
PYTHONUNBUFFERED: "1"
63-
SGLANG_RADIX_DISABLE_REUSE: "1"
55+
SGLANG_RADIX_FORCE_MISS: "1"
6456
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6557
SGLANG_DEFAULT_THINKING: "1"
6658
SGLANG_DSV4_REASONING_EFFORT: "max"
6759
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
68-
SGLANG_OPT_USE_JIT_NORM: "1"
69-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
70-
SGLANG_OPT_USE_TOPK_V2: "1"
7160
NCCL_MNNVL_ENABLE: "1"
7261
NCCL_CUMEM_ENABLE: "1"
7362
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

perf-changelog.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2674,3 +2674,9 @@
26742674
description:
26752675
- "Update SGLang image from nightly-dev-20260216-d3bae71e (86d old) to v0.5.12-cu130"
26762676
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1446
2677+
2678+
- config-keys:
2679+
- dsv4-fp4-gb300-dynamo-sglang-mtp
2680+
description:
2681+
- "Update SGLang image to v0.5.12 and remove obsolete env vars for MTP configs"
2682+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1478

0 commit comments

Comments
 (0)