Skip to content

Commit f8a6681

Browse files
committed
Revert "fix: switch gb200 sglang mtp recipes to NGC prebuilt dynamo image"
This reverts commit d38a486.
1 parent d38a486 commit f8a6681

7 files changed

Lines changed: 35 additions & 28 deletions

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p1d-tp8-tp8-mtp.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p6d-dep8-tp8-mtp.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep16-mtp.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep8-mtp.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-4p1d-dep8-dep8-mtp-c8192.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-5p1d-dep8-dep8-mtp-c12288.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-6p1d-dep8-dep8-mtp-c16384.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@ frontend:
66
num_additional_frontends: 8
77

88
dynamo:
9-
install: false # NGC sglang-runtime image ships dynamo prebuilt.
9+
hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
10+
install: true
1011

11-
# 100-min readiness wall (default 30 min) tolerates first-time NGC
12-
# image pull on each worker node plus DSV4-Pro multi-node load.
12+
# 100-min readiness wall (default 30 min) so the per-worker dynamo
13+
# source build has room to finish before health-poll gives up.
1314
health_check:
1415
max_attempts: 600
1516
interval_seconds: 10
1617

1718
model:
1819
path: "deepseek-v4-pro"
19-
container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
20+
container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
2021
precision: "mxfp4"
2122

2223
sbatch_directives:

0 commit comments

Comments
 (0)