Revert "Update glm5-fp4-b300-sglang and -mtp SGLang image to v0.5.12-cu130 (#…" (#1563)

functionstackx · web-flow · commit 5481fbfc6638 · 2026-05-25T16:05:13.000-04:00
This reverts commit 8862360.
diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
@@ -2352,7 +2352,7 @@ glm5-fp4-b200-sglang-mtp:
   # does not have a B300-specific recipe, so this config reuses the existing
   # GLM-5 FP4 B200 SGLang recipe as-is until B300-specific tuning is available.
 glm5-fp4-b300-sglang:
-  image: lmsysorg/sglang:v0.5.12-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/GLM-5-NVFP4
   model-prefix: glm5
   runner: b300
@@ -2373,7 +2373,7 @@ glm5-fp4-b300-sglang:
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp4-b300-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.12-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/GLM-5-NVFP4
   model-prefix: glm5
   runner: b300
diff --git a/benchmarks/single_node/glm5_fp4_b300.sh b/benchmarks/single_node/glm5_fp4_b300.sh
@@ -24,13 +24,6 @@ nvidia-smi
 
 if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
 
-# Downgrade flashinfer to the version pinned in sglang v0.5.11 to test the
-# trtllm batched-GEMM regression suspicion from sgl-project/sglang#25563
-# (suggested by @trevor-m). sglang v0.5.12's pyproject.toml moved from
-# flashinfer_python==0.6.8.post1 → 0.6.11.post1, and the trtllm GEMM crash
-# at bs=128 + EAGLE on B300 appeared in the same image bump.
-pip install --no-deps "flashinfer_python==0.6.8.post1" "flashinfer_cubin==0.6.8.post1"
-
 SERVER_LOG=/workspace/server.log
 PORT=${PORT:-8888}
 
diff --git a/benchmarks/single_node/glm5_fp4_b300_mtp.sh b/benchmarks/single_node/glm5_fp4_b300_mtp.sh
@@ -24,12 +24,6 @@ nvidia-smi
 if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
 
 pip install --no-deps "transformers==5.2.0" "huggingface-hub==1.4.1"
-# Downgrade flashinfer to the version pinned in sglang v0.5.11 to test the
-# trtllm batched-GEMM regression suspicion from sgl-project/sglang#25563
-# (suggested by @trevor-m). sglang v0.5.12's pyproject.toml moved from
-# flashinfer_python==0.6.8.post1 → 0.6.11.post1, and the trtllm GEMM crash
-# at bs=128 + EAGLE on B300 appeared in the same image bump.
-pip install --no-deps "flashinfer_python==0.6.8.post1" "flashinfer_cubin==0.6.8.post1"
 
 export SGL_ENABLE_JIT_DEEPGEMM=1
 export SGLANG_ENABLE_SPEC_V2=1
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -3043,13 +3043,6 @@
     - "Update SGLang image from nightly-dev-cu13-20260518-c67b2870 to nightly-dev-cu13-20260519-dbac4647"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1492
 
-- config-keys:
-    - glm5-fp4-b300-sglang
-    - glm5-fp4-b300-sglang-mtp
-  description:
-    - "Update SGLang image from v0.5.11-cu130 to v0.5.12-cu130"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1420
-
 - config-keys:
     - dsr1-fp4-b200-sglang-mtp
   description: