Skip to content

Commit 7034272

Browse files
committed
[DONOTMERGE] Update dpskv4 GB300 non-MTP disagg SGLang image to nightly-20260520
1 parent 4f63034 commit 7034272

8 files changed

Lines changed: 15 additions & 20 deletions

.github/configs/nvidia-master.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8759,7 +8759,7 @@ dsv4-fp4-gb300-dynamo-vllm:
87598759
dp-attn: true
87608760

87618761
dsv4-fp4-gb300-dynamo-sglang:
8762-
image: lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647
8762+
image: lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd
87638763
model: deepseek-ai/DeepSeek-V4-Pro
87648764
model-prefix: dsv4
87658765
runner: gb300-cw

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-10p1d-dep4-dep16-14-c8192.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-10p1d-dep4-dep16-14-c8192"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -94,7 +94,6 @@ backend:
9494
SGLANG_LOG_FORWARD_ITERS: "1"
9595
SGLANG_LOG_MS: "1"
9696
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9897

9998
decode_environment:
10099
PYTHONUNBUFFERED: "1"
@@ -119,7 +118,6 @@ backend:
119118
SGLANG_LOG_FORWARD_ITERS: "1"
120119
SGLANG_LOG_MS: "1"
121120
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
123121
# is single-node only and corrupts results in 2-node decode setups.
124122

125123
sglang_config:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-12p1d-dep4-dep12-15-c21504.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-12p1d-dep4-dep12-15-c21504"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -94,7 +94,6 @@ backend:
9494
SGLANG_LOG_FORWARD_ITERS: "1"
9595
SGLANG_LOG_MS: "1"
9696
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9897

9998
decode_environment:
10099
PYTHONUNBUFFERED: "1"
@@ -119,7 +118,6 @@ backend:
119118
SGLANG_LOG_FORWARD_ITERS: "1"
120119
SGLANG_LOG_MS: "1"
121120
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
123121
# is single-node only and corrupts results in 2-node decode setups.
124122

125123
sglang_config:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-dep4-dep16-5-c1024.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-1p1d-dep4-dep16-5-c1024"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -94,7 +94,6 @@ backend:
9494
SGLANG_LOG_FORWARD_ITERS: "1"
9595
SGLANG_LOG_MS: "1"
9696
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9897

9998
decode_environment:
10099
PYTHONUNBUFFERED: "1"
@@ -119,7 +118,6 @@ backend:
119118
SGLANG_LOG_FORWARD_ITERS: "1"
120119
SGLANG_LOG_MS: "1"
121120
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
123121
# is single-node only and corrupts results in 2-node decode setups.
124122

125123
sglang_config:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-1p1d-tp4-tp4-2-c1.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-1p1d-tp4-tp4-2-c1"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd"
3737
precision: "fp4"
3838

3939
# See ../1k1k/disagg-gb200-1p1d-dep8-tep8.yaml for the dynamo pin
@@ -93,7 +93,6 @@ backend:
9393
SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
9494
SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
9595
SGLANG_OPT_SWA_RELEASE_LEAF_LOCK_AFTER_WINDOW: "1"
96-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9796

9897
decode_environment:
9998
PYTHONUNBUFFERED: "1"
@@ -110,7 +109,6 @@ backend:
110109
SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
111110
SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
112111
SGLANG_OPT_SWA_RELEASE_LEAF_LOCK_AFTER_WINDOW: "1"
113-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
114112
# is single-node only and corrupts results in 2-node decode setups.
115113

116114
sglang_config:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-4p1d-dep4-dep16-8-c1024.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-4p1d-dep4-dep16-8-c1024"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -94,7 +94,6 @@ backend:
9494
SGLANG_LOG_FORWARD_ITERS: "1"
9595
SGLANG_LOG_MS: "1"
9696
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9897

9998
decode_environment:
10099
PYTHONUNBUFFERED: "1"
@@ -119,7 +118,6 @@ backend:
119118
SGLANG_LOG_FORWARD_ITERS: "1"
120119
SGLANG_LOG_MS: "1"
121120
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
123121
# is single-node only and corrupts results in 2-node decode setups.
124122

125123
sglang_config:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb300-8p1d-dep4-dep16-12-c4096.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ name: "disagg-gb300-8p1d-dep4-dep16-12-c4096"
3333

3434
model:
3535
path: "deepseek-v4-pro"
36-
container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647"
36+
container: "lmsysorg/sglang:nightly-dev-cu13-20260520-425dffbd"
3737
precision: "fp4"
3838

3939
dynamo:
@@ -94,7 +94,6 @@ backend:
9494
SGLANG_LOG_FORWARD_ITERS: "1"
9595
SGLANG_LOG_MS: "1"
9696
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
97-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
9897

9998
decode_environment:
10099
PYTHONUNBUFFERED: "1"
@@ -119,7 +118,6 @@ backend:
119118
SGLANG_LOG_FORWARD_ITERS: "1"
120119
SGLANG_LOG_MS: "1"
121120
SGLANG_REQUEST_STATE_WAIT_TIMEOUT: "60"
122-
SGLANG_OPT_FP8_WO_A_GEMM: "0"
123121
# is single-node only and corrupts results in 2-node decode setups.
124122

125123
sglang_config:

perf-changelog.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3035,7 +3035,14 @@
30353035
- "Bump ATOM image to rocm/atom:rocm7.2.3_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom20260511"
30363036
- "TP=4 shows +3.2% to +16.3% throughput improvement across 1k1k and 8k1k workloads (concurrency 4-256)"
30373037
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1411
3038-
3038+
3039+
- config-keys:
3040+
- dsv4-fp4-gb300-dynamo-sglang
3041+
description:
3042+
- "Update SGLang image from nightly-dev-cu13-20260519-dbac4647 to nightly-dev-cu13-20260520-425dffbd for all non-MTP disagg configs"
3043+
- "Remove SGLANG_OPT_FP8_WO_A_GEMM=0 workaround (topk_v2 crash fixed upstream in sgl-project/sglang#25805)"
3044+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1528
3045+
30393046

30403047
- config-keys:
30413048
- qwen3.5-fp4-b300-sglang

0 commit comments

Comments
 (0)