Revert "fix: switch gb200 sglang mtp recipes to NGC prebuilt dynamo image"

Oseltamivir · Oseltamivir · commit f8a66816b614 · 2026-05-13T14:53:54.000-07:00
This reverts commit d38a486.
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p1d-tp8-tp8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p1d-tp8-tp8-mtp.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives:
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p6d-dep8-tp8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p6d-dep8-tp8-mtp.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives:
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep16-mtp.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives:
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep8-mtp.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives:
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-4p1d-dep8-dep8-mtp-c8192.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-4p1d-dep8-dep8-mtp-c8192.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives:
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-5p1d-dep8-dep8-mtp-c12288.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-5p1d-dep8-dep8-mtp-c12288.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives:
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-6p1d-dep8-dep8-mtp-c16384.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-6p1d-dep8-dep8-mtp-c16384.yaml
@@ -6,17 +6,18 @@ frontend:
   num_additional_frontends: 8
 
 dynamo:
-  install: false  # NGC sglang-runtime image ships dynamo prebuilt.
+  hash: "34d55a596fb8d3d44daefe425ec1e303131f4d2c"
+  install: true
 
-# 100-min readiness wall (default 30 min) tolerates first-time NGC
-# image pull on each worker node plus DSV4-Pro multi-node load.
+# 100-min readiness wall (default 30 min) so the per-worker dynamo
+# source build has room to finish before health-poll gives up.
 health_check:
   max_attempts: 600
   interval_seconds: 10
 
 model:
   path: "deepseek-v4-pro"
-  container: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.2.0-deepseek-v4-cuda13-dev.3"
+  container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e"
   precision: "mxfp4"
 
 sbatch_directives: