Skip to content

Commit cc55442

Browse files
committed
Update dpskv4 GB300 MTP SGLang image to v0.5.12 and clean env vars
1 parent 891a72c commit cc55442

11 files changed

Lines changed: 55 additions & 105 deletions

.github/configs/amd-master.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ dsr1-fp8-mi300x-sglang:
8585
- { tp: 8, conc-start: 4, conc-end: 64 }
8686

8787
dsr1-fp8-mi325x-sglang:
88-
image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
88+
image: lmsysorg/sglang:v0.5.12-rocm700-mi30x
8989
model: deepseek-ai/DeepSeek-R1-0528
9090
model-prefix: dsr1
9191
runner: mi325x

.github/configs/nvidia-master.yaml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,7 @@ dsr1-fp8-b300-dynamo-trt:
16701670
ep: 8
16711671
dp-attn: true
16721672
dsr1-fp4-b200-sglang:
1673-
image: lmsysorg/sglang:v0.5.9-cu130
1673+
image: lmsysorg/sglang:v0.5.12-cu130
16741674
model: nvidia/DeepSeek-R1-0528-FP4-V2
16751675
model-prefix: dsr1
16761676
runner: b200
@@ -1689,11 +1689,14 @@ dsr1-fp4-b200-sglang:
16891689
search-space:
16901690
- { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
16911691
- { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
1692-
agentic-coding:
1693-
- duration: 1800
1694-
search-space:
1695-
- { tp: 4, ep: 4, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128, 256] }
1696-
- { tp: 8, ep: 8, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 32, 64, 128, 256, 512] }
1692+
# agentic-coding: temporarily disabled — blocked by e2e-tests.yml artifact
1693+
# name mismatch (downloads `agentic_*` but benchmark-tmpl.yml uploads as
1694+
# `bmk_agentic_*`). Re-enable once that workflow is aligned.
1695+
# agentic-coding:
1696+
# - duration: 1800
1697+
# search-space:
1698+
# - { tp: 4, ep: 4, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128, 256] }
1699+
# - { tp: 8, ep: 8, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 32, 64, 128, 256, 512] }
16971700

16981701
dsv4-fp4-b200-sglang:
16991702
image: lmsysorg/sglang:deepseek-v4-blackwell@sha256:df18bfc4aa9ecf59451002b49ba00cae58042de9e2a96378bbd21b404dd62c7b

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,14 +31,11 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
34+
SGLANG_RADIX_FORCE_MISS: "1"
3535
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3636
SGLANG_DEFAULT_THINKING: "1"
3737
SGLANG_DSV4_REASONING_EFFORT: "max"
3838
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
4239
NCCL_MNNVL_ENABLE: "1"
4340
NCCL_CUMEM_ENABLE: "1"
4441
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
@@ -49,14 +46,11 @@ backend:
4946

5047
decode_environment:
5148
PYTHONUNBUFFERED: "1"
52-
SGLANG_RADIX_DISABLE_REUSE: "1"
49+
SGLANG_RADIX_FORCE_MISS: "1"
5350
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
5451
SGLANG_DEFAULT_THINKING: "1"
5552
SGLANG_DSV4_REASONING_EFFORT: "max"
5653
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
57-
SGLANG_OPT_USE_JIT_NORM: "1"
58-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
59-
SGLANG_OPT_USE_TOPK_V2: "1"
6054
NCCL_MNNVL_ENABLE: "1"
6155
NCCL_CUMEM_ENABLE: "1"
6256
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -31,23 +31,15 @@ backend:
3131

3232
prefill_environment:
3333
PYTHONUNBUFFERED: "1"
34-
SGLANG_RADIX_DISABLE_REUSE: "1"
34+
SGLANG_RADIX_FORCE_MISS: "1"
3535
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3636
SGLANG_DEFAULT_THINKING: "1"
3737
SGLANG_DSV4_REASONING_EFFORT: "max"
3838
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
39-
SGLANG_OPT_USE_JIT_NORM: "1"
40-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
41-
SGLANG_OPT_USE_TOPK_V2: "1"
4239

43-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
44-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
45-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4640
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
47-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
4841
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
4942
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
50-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5143
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5244

5345
NCCL_MNNVL_ENABLE: "1"
@@ -60,14 +52,11 @@ backend:
6052

6153
decode_environment:
6254
PYTHONUNBUFFERED: "1"
63-
SGLANG_RADIX_DISABLE_REUSE: "1"
55+
SGLANG_RADIX_FORCE_MISS: "1"
6456
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6557
SGLANG_DEFAULT_THINKING: "1"
6658
SGLANG_DSV4_REASONING_EFFORT: "max"
6759
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
68-
SGLANG_OPT_USE_JIT_NORM: "1"
69-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
70-
SGLANG_OPT_USE_TOPK_V2: "1"
7160
NCCL_MNNVL_ENABLE: "1"
7261
NCCL_CUMEM_ENABLE: "1"
7362
SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dynamo:
1111

1212
model:
1313
path: "deepseek-v4-pro"
14-
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
14+
container: "lmsysorg/sglang:v0.5.12"
1515
precision: "mxfp4"
1616

1717
sbatch_directives:
@@ -33,23 +33,15 @@ backend:
3333

3434
prefill_environment:
3535
PYTHONUNBUFFERED: "1"
36-
SGLANG_RADIX_DISABLE_REUSE: "1"
36+
SGLANG_RADIX_FORCE_MISS: "1"
3737
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
3838
SGLANG_DEFAULT_THINKING: "1"
3939
SGLANG_DSV4_REASONING_EFFORT: "max"
4040
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
41-
SGLANG_OPT_USE_JIT_NORM: "1"
42-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
43-
SGLANG_OPT_USE_TOPK_V2: "1"
4441

45-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
46-
SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1"
47-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
4842
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
49-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
5043
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216"
5144
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
52-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
5345
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
5446

5547
NCCL_MNNVL_ENABLE: "1"
@@ -62,22 +54,15 @@ backend:
6254

6355
decode_environment:
6456
PYTHONUNBUFFERED: "1"
65-
SGLANG_RADIX_DISABLE_REUSE: "1"
57+
SGLANG_RADIX_FORCE_MISS: "1"
6658
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1"
6759
SGLANG_DEFAULT_THINKING: "1"
6860
SGLANG_DSV4_REASONING_EFFORT: "max"
6961
SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1"
70-
SGLANG_OPT_USE_JIT_NORM: "1"
71-
SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1"
72-
SGLANG_OPT_USE_TOPK_V2: "1"
7362

74-
SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1"
75-
SGLANG_OPT_USE_FAST_MASK_EP: "1"
7663
SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1"
77-
SGLANG_OPT_FIX_HASH_MEGA_MOE: "1"
7864
SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096"
7965
SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1"
80-
SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1"
8166
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0"
8267

8368
NCCL_MNNVL_ENABLE: "1"

perf-changelog.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2653,3 +2653,23 @@
26532653
description:
26542654
- "Update SGLang image from v0.5.9-cu129-amd64 (74d old) to v0.5.12-cu130"
26552655
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1458
2656+
2657+
- config-keys:
2658+
- dsr1-fp8-mi325x-sglang
2659+
description:
2660+
- "Update SGLang image from v0.5.9-rocm700-mi30x to v0.5.12-rocm700-mi30x"
2661+
- "Workaround LlamaTokenizer.all_special_tokens_extended removal in newer transformers: prefer backend_request_func.get_tokenizer over vLLM's"
2662+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1428
2663+
2664+
- config-keys:
2665+
- dsr1-fp4-b200-sglang
2666+
description:
2667+
- "Update SGLang image from v0.5.11-cu130 to v0.5.12-cu130"
2668+
- "Temporarily disable agentic-coding scenario (blocked by e2e-tests.yml artifact-name mismatch)"
2669+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1415
2670+
2671+
- config-keys:
2672+
- dsv4-fp4-gb300-dynamo-sglang-mtp
2673+
description:
2674+
- "Update SGLang image to v0.5.12 and remove obsolete env vars for MTP configs"
2675+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1478

runners/launch_mi325x-amds.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ LOCK_FILE="${SQUASH_FILE}.lock"
99

1010
set -x
1111

12-
JOB_ID=$(salloc --partition=$PARTITION --gres=gpu:$TP --cpus-per-task=256 --time=480 --no-shell --job-name="$RUNNER_NAME" 2>&1 | tee /dev/stderr | grep -oP 'Granted job allocation \K[0-9]+')
12+
# Exclude known-broken mi325x nodes:
13+
# chi-mi325x-pod1-121: enroot-aufs2ovlfs setcap fails on this node's NFS-backed
14+
# squash dir; container image import never completes
15+
# (root-caused via #1467/#1468/#1469 sweep failures).
16+
JOB_ID=$(salloc --partition=$PARTITION --exclude=chi-mi325x-pod1-121.ord.vultr.cpe.ice.amd.com --gres=gpu:$TP --cpus-per-task=256 --time=480 --no-shell --job-name="$RUNNER_NAME" 2>&1 | tee /dev/stderr | grep -oP 'Granted job allocation \K[0-9]+')
1317

1418
if [ -z "$JOB_ID" ]; then
1519
echo "ERROR: salloc failed to allocate a job"

0 commit comments

Comments
 (0)