SemiAnalysisAI
diff --git a/‎.github/configs/nvidia-master.yaml‎
Lines changed: 16 additions & 16 deletions b/‎.github/configs/nvidia-master.yaml‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎.github/workflows/README.md‎
Lines changed: 36 additions & 0 deletions b/‎.github/workflows/README.md‎
Lines changed: 36 additions & 0 deletions
@@ -1827,7 +1827,7 @@ dsv4-fp4-b200-vllm-mtp:
 # does not have a B300-specific recipe, so this config reuses the existing DSR1 FP4
 # B200 SGLang recipe as-is until B300-specific tuning is available.
 dsr1-fp4-b300-sglang:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/DeepSeek-R1-0528-FP4-V2
   model-prefix: dsr1
   runner: b300
@@ -1936,7 +1936,7 @@ dsr1-fp8-b200-sglang:
   # does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
   # B200 SGLang recipe as-is until B300-specific tuning is available.
 dsr1-fp8-b300-sglang:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: b300
@@ -2169,7 +2169,7 @@ glm5-fp8-b200-sglang-mtp:
   # does not have a B300-specific recipe, so this config reuses the existing GLM5 FP8
   # B200 SGLang recipe as-is until B300-specific tuning is available.
 glm5-fp8-b300-sglang:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: b300
@@ -2188,7 +2188,7 @@ glm5-fp8-b300-sglang:
       - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp8-b300-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: b300
@@ -2252,7 +2252,7 @@ glm5-fp4-b200-sglang-mtp:
   # does not have a B300-specific recipe, so this config reuses the existing
   # GLM-5 FP4 B200 SGLang recipe as-is until B300-specific tuning is available.
 glm5-fp4-b300-sglang:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/GLM-5-NVFP4
   model-prefix: glm5
   runner: b300
@@ -2273,7 +2273,7 @@ glm5-fp4-b300-sglang:
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
 
 glm5-fp4-b300-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/GLM-5-NVFP4
   model-prefix: glm5
   runner: b300
@@ -2316,7 +2316,7 @@ qwen3.5-fp8-b200-sglang-mtp:
 
 
 qwen3.5-fp8-b300-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: Qwen/Qwen3.5-397B-A17B-FP8
   model-prefix: qwen3.5
   runner: b300
@@ -2354,7 +2354,7 @@ qwen3.5-fp8-b300-sglang:
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
 
 qwen3.5-fp4-b300-sglang:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/Qwen3.5-397B-A17B-NVFP4
   model-prefix: qwen3.5
   runner: b300
@@ -2375,7 +2375,7 @@ qwen3.5-fp4-b300-sglang:
       - { tp: 2, ep: 2, conc-start: 4, conc-end: 128 }
 
 qwen3.5-fp4-b300-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: nvidia/Qwen3.5-397B-A17B-NVFP4
   model-prefix: qwen3.5
   runner: b300
@@ -2396,7 +2396,7 @@ qwen3.5-fp4-b300-sglang-mtp:
       - { tp: 2, ep: 2, conc-start: 4, conc-end: 128, spec-decoding: mtp }
 
 qwen3.5-bf16-b300-sglang:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: Qwen/Qwen3.5-397B-A17B
   model-prefix: qwen3.5
   runner: b300
@@ -2417,7 +2417,7 @@ qwen3.5-bf16-b300-sglang:
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-b300-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.10.post1-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: Qwen/Qwen3.5-397B-A17B
   model-prefix: qwen3.5
   runner: b300
@@ -2634,7 +2634,7 @@ dsr1-fp8-b200-trt-mtp:
       - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-h200-sglang:
-  image: lmsysorg/sglang:v0.5.9-cu130
+  image: lmsysorg/sglang:v0.5.11-cu130
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: h200
@@ -2866,7 +2866,7 @@ qwen3.5-fp8-h200-sglang:
       - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-h200-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.10.post1
+  image: lmsysorg/sglang:v0.5.11
   model: Qwen/Qwen3.5-397B-A17B-FP8
   model-prefix: qwen3.5
   runner: h200
@@ -4102,7 +4102,7 @@ minimaxm2.5-fp4-b300-vllm:
       - { tp: 8, conc-start: 4, conc-end: 4 }
 
 gptoss-fp4-h100-vllm:
-  image: vllm/vllm-openai:v0.18.0
+  image: vllm/vllm-openai:v0.20.2
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: h100
@@ -4125,7 +4125,7 @@ gptoss-fp4-h100-vllm:
       - { tp: 8, conc-start: 4, conc-end: 16 }
 
 minimaxm2.5-fp8-h100-vllm:
-  image: vllm/vllm-openai:v0.18.0
+  image: vllm/vllm-openai:v0.20.2
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: h100
@@ -4331,7 +4331,7 @@ gptoss-fp4-h200-vllm:
       - { tp: 8, conc-start: 4, conc-end: 32 }
 
 minimaxm2.5-fp8-h200-vllm:
-  image: vllm/vllm-openai:v0.18.0
+  image: vllm/vllm-openai:v0.20.2
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: h200
 
@@ -178,6 +178,42 @@ test-config --config-keys dsr1-fp4-b200-sglang gptoss* --config-files .github/co
 test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvidia-master.yaml
 ```
 
+## Reusing an Approved PR Full Sweep
+
+If a PR has already run the full untrimmed sweep (`full-sweep-enabled` label),
+a maintainer can avoid running the same sweep again after merge by leaving a
+PR comment before merging:
+
+```
+/reuse-sweep-run
+```
+
+That reuses the latest successful `run-sweep.yml` `pull_request` run for the
+PR's current head SHA. If the PR was rebased or had to merge `main` after the
+successful sweep — so the current head no longer has a matching run — pin the
+source run explicitly:
+
+```
+/reuse-sweep-run <run_id>
+```
+
+The comment is the reuse authorization, so adding it does not trigger or cancel
+a PR sweep. On the push-to-main run, `run-sweep.yml` resolves the merged PR
+from the merge commit, verifies the source run is a successful `pull_request`
+`run-sweep.yml` run for the same PR, downloads the ingest-relevant artifacts,
+validates that `results_bmk` covers the merge run's expected benchmark matrix,
+and uploads them as `reused-ingest-artifacts`. The normal database ingest then
+publishes those artifacts with the merge run's changelog metadata.
+
+Only comments from `OWNER`, `MEMBER`, or `COLLABORATOR` users authorize reuse.
+The most recent matching comment wins, so a maintainer can supersede an earlier
+pin by leaving a new `/reuse-sweep-run [<run_id>]` comment.
+
+Reuse fails closed: if the comment is present but the `full-sweep-enabled`
+label, source PR run, or artifacts cannot be validated, the push-to-main
+workflow fails instead of falling back to a cluster sweep. Without the comment,
+the push-to-main workflow runs the normal full sweep.
+
 ## Validation Architecture
 
 The benchmarking system uses a strict validation methodology to ensure correctness at every stage. This is implemented in `utils/matrix_logic/validation.py` using Pydantic models.