Skip to content

Commit 3dc973e

Browse files
Fridge003claude
andcommitted
Bump image to nightly-dev-20260527-14f81a67, clean envs/flags, bump dynamo hash
- Replace SGLang image with nightly-dev-20260527-14f81a67 in all 6 dsv4 8k1k mtp recipes - Apply the #1559 environ/flag cleanup (drop default-on JIT/topk envs, auto-set MegaMoE companions, and the removed SGLANG_RADIX_DISABLE_REUSE / SGLANG_OPT_USE_FAST_MASK_EP; switch moe-a2a-backend deepep->megamoe and drop deepep-config), while keeping the W4A4 flags SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS / SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND - Bump dynamo hash to 81d0555ee23519cea80a42b4fe824e30368b7300 (matching #1559) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 75d54d9 commit 3dc973e

7 files changed

Lines changed: 25 additions & 134 deletions

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
9+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
1010
install: true
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5"
14+
container: "lmsysorg/sglang:nightly-dev-20260527-14f81a67"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,14 +31,10 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
3534
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3635
SGLANG_DEFAULT_THINKING: "1"
3736
SGLANG_DSV4_REASONING_EFFORT: "max"
3837
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
4238
NCCL_MNNVL_ENABLE: "1"
4339
NCCL_CUMEM_ENABLE: "1"
4440
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
@@ -49,14 +45,10 @@ backend:
4945

5046
decode_environment:
5147
PYTHONUNBUFFERED: "1"
52-
SGLANG_RADIX_DISABLE_REUSE: "1"
5348
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
5449
SGLANG_DEFAULT_THINKING: "1"
5550
SGLANG_DSV4_REASONING_EFFORT: "max"
5651
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
57-
SGLANG_OPT_USE_JIT_NORM: "1"
58-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
59-
SGLANG_OPT_USE_TOPK_V2: "1"
6052
NCCL_MNNVL_ENABLE: "1"
6153
NCCL_CUMEM_ENABLE: "1"
6254
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
9+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
1010
install: true
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5"
14+
container: "lmsysorg/sglang:nightly-dev-20260527-14f81a67"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,26 +31,16 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
3534
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3635
SGLANG_DEFAULT_THINKING: "1"
3736
SGLANG_DSV4_REASONING_EFFORT: "max"
3837
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
4238

4339
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
4440
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
45-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
46-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
4741
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
4842
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
51-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
53-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5444

5545
NCCL_MNNVL_ENABLE: "1"
5646
NCCL_CUMEM_ENABLE: "1"
@@ -62,14 +52,10 @@ backend:
6252

6353
decode_environment:
6454
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
6655
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6756
SGLANG_DEFAULT_THINKING: "1"
6857
SGLANG_DSV4_REASONING_EFFORT: "max"
6958
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7359
NCCL_MNNVL_ENABLE: "1"
7460
NCCL_CUMEM_ENABLE: "1"
7561
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
@@ -97,8 +83,7 @@ backend:
9783
enable-dp-attention: true
9884
enable-dp-lm-head: true
9985

100-
moe-a2a-backend: "deepep"
101-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
86+
moe-a2a-backend: "megamoe"
10287

10388
mem-fraction-static: 0.9
10489
max-running-requests: 128

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
9+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
1010
install: true
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5"
14+
container: "lmsysorg/sglang:nightly-dev-20260527-14f81a67"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,26 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
3736
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3837
SGLANG_DEFAULT_THINKING: "1"
3938
SGLANG_DSV4_REASONING_EFFORT: "max"
4039
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4440

4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
4642
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
48-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
4943
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
51-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5245
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
53-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
54-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
55-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5646

5747
NCCL_MNNVL_ENABLE: "1"
5848
NCCL_CUMEM_ENABLE: "1"
@@ -64,25 +54,15 @@ backend:
6454

6555
decode_environment:
6656
PYTHONUNBUFFERED: "1"
67-
SGLANG_RADIX_DISABLE_REUSE: "1"
6857
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6958
SGLANG_DEFAULT_THINKING: "1"
7059
SGLANG_DSV4_REASONING_EFFORT: "max"
7160
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
72-
SGLANG_OPT_USE_JIT_NORM: "1"
73-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
74-
SGLANG_OPT_USE_TOPK_V2: "1"
7561

7662
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
77-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
78-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
7963
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
8064
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
81-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
8265
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
83-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
84-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
85-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8666

8767
NCCL_MNNVL_ENABLE: "1"
8868
NCCL_CUMEM_ENABLE: "1"
@@ -110,8 +90,7 @@ backend:
11090
enable-dp-attention: true
11191
enable-dp-lm-head: true
11292

113-
moe-a2a-backend: "deepep"
114-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
93+
moe-a2a-backend: "megamoe"
11594

11695
mem-fraction-static: 0.9
11796
max-running-requests: 256
@@ -135,8 +114,7 @@ backend:
135114
enable-dp-attention: true
136115
enable-dp-lm-head: true
137116

138-
moe-a2a-backend: "deepep"
139-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
117+
moe-a2a-backend: "megamoe"
140118

141119
speculative-algo: "EAGLE"
142120
speculative-num-steps: 3

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
9+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
1010
install: true
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5"
14+
container: "lmsysorg/sglang:nightly-dev-20260527-14f81a67"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,26 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
3736
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3837
SGLANG_DEFAULT_THINKING: "1"
3938
SGLANG_DSV4_REASONING_EFFORT: "max"
4039
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4440

4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
4642
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
48-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
4943
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
51-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5245
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
53-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
54-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
55-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5646

5747
NCCL_MNNVL_ENABLE: "1"
5848
NCCL_CUMEM_ENABLE: "1"
@@ -64,25 +54,15 @@ backend:
6454

6555
decode_environment:
6656
PYTHONUNBUFFERED: "1"
67-
SGLANG_RADIX_DISABLE_REUSE: "1"
6857
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6958
SGLANG_DEFAULT_THINKING: "1"
7059
SGLANG_DSV4_REASONING_EFFORT: "max"
7160
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
72-
SGLANG_OPT_USE_JIT_NORM: "1"
73-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
74-
SGLANG_OPT_USE_TOPK_V2: "1"
7561

7662
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
77-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
78-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
7963
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
8064
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
81-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
8265
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
83-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
84-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
85-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8666

8767
NCCL_MNNVL_ENABLE: "1"
8868
NCCL_CUMEM_ENABLE: "1"
@@ -110,8 +90,7 @@ backend:
11090
enable-dp-attention: true
11191
enable-dp-lm-head: true
11292

113-
moe-a2a-backend: "deepep"
114-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
93+
moe-a2a-backend: "megamoe"
11594

11695
mem-fraction-static: 0.9
11796
max-running-requests: 256
@@ -135,8 +114,7 @@ backend:
135114
enable-dp-attention: true
136115
enable-dp-lm-head: true
137116

138-
moe-a2a-backend: "deepep"
139-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
117+
moe-a2a-backend: "megamoe"
140118

141119
speculative-algo: "EAGLE"
142120
speculative-num-steps: 3

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
9+
hash: "81d0555ee23519cea80a42b4fe824e30368b7300"
1010
install: true
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-20260522-c9153da5"
14+
container: "lmsysorg/sglang:nightly-dev-20260527-14f81a67"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,26 +33,16 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
3736
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3837
SGLANG_DEFAULT_THINKING: "1"
3938
SGLANG_DSV4_REASONING_EFFORT: "max"
4039
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4440

4541
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
4642
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
48-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
4943
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
5044
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
51-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5245
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
53-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
54-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
55-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5646

5747
NCCL_MNNVL_ENABLE: "1"
5848
NCCL_CUMEM_ENABLE: "1"
@@ -64,25 +54,15 @@ backend:
6454

6555
decode_environment:
6656
PYTHONUNBUFFERED: "1"
67-
SGLANG_RADIX_DISABLE_REUSE: "1"
6857
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6958
SGLANG_DEFAULT_THINKING: "1"
7059
SGLANG_DSV4_REASONING_EFFORT: "max"
7160
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
72-
SGLANG_OPT_USE_JIT_NORM: "1"
73-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
74-
SGLANG_OPT_USE_TOPK_V2: "1"
7561

7662
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
77-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
78-
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
7963
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_FP4_ACTS: "1"
8064
SGLANG_OPT_DEEPGEMM_MEGA_MOE_USE_MXF4_KIND: "1"
81-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
8265
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
83-
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
84-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
85-
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8666

8767
NCCL_MNNVL_ENABLE: "1"
8868
NCCL_CUMEM_ENABLE: "1"
@@ -110,8 +90,7 @@ backend:
11090
enable-dp-attention: true
11191
enable-dp-lm-head: true
11292

113-
moe-a2a-backend: "deepep"
114-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
93+
moe-a2a-backend: "megamoe"
11594

11695
mem-fraction-static: 0.9
11796
max-running-requests: 512
@@ -135,8 +114,7 @@ backend:
135114
enable-dp-attention: true
136115
enable-dp-lm-head: true
137116

138-
moe-a2a-backend: "deepep"
139-
deepep-config: '{"normal_dispatch":{"num_sms":96},"normal_combine":{"num_sms":96}}'
117+
moe-a2a-backend: "megamoe"
140118

141119
speculative-algo: "EAGLE"
142120
speculative-num-steps: 3

0 commit comments

Comments
 (0)