Skip to content

Commit 72c0408

Browse files
authored
Merge branch 'main' into claude/issue-1154-kimik2.5-fp4-b200-vllm
2 parents bd3726a + 0552ead commit 72c0408

12 files changed

Lines changed: 1496 additions & 56 deletions

.github/configs/nvidia-master.yaml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,7 +1827,7 @@ dsv4-fp4-b200-vllm-mtp:
18271827
# does not have a B300-specific recipe, so this config reuses the existing DSR1 FP4
18281828
# B200 SGLang recipe as-is until B300-specific tuning is available.
18291829
dsr1-fp4-b300-sglang:
1830-
image: lmsysorg/sglang:v0.5.10.post1-cu130
1830+
image: lmsysorg/sglang:v0.5.11-cu130
18311831
model: nvidia/DeepSeek-R1-0528-FP4-V2
18321832
model-prefix: dsr1
18331833
runner: b300
@@ -1936,7 +1936,7 @@ dsr1-fp8-b200-sglang:
19361936
# does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
19371937
# B200 SGLang recipe as-is until B300-specific tuning is available.
19381938
dsr1-fp8-b300-sglang:
1939-
image: lmsysorg/sglang:v0.5.10.post1-cu130
1939+
image: lmsysorg/sglang:v0.5.11-cu130
19401940
model: deepseek-ai/DeepSeek-R1-0528
19411941
model-prefix: dsr1
19421942
runner: b300
@@ -2169,7 +2169,7 @@ glm5-fp8-b200-sglang-mtp:
21692169
# does not have a B300-specific recipe, so this config reuses the existing GLM5 FP8
21702170
# B200 SGLang recipe as-is until B300-specific tuning is available.
21712171
glm5-fp8-b300-sglang:
2172-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2172+
image: lmsysorg/sglang:v0.5.11-cu130
21732173
model: zai-org/GLM-5-FP8
21742174
model-prefix: glm5
21752175
runner: b300
@@ -2188,7 +2188,7 @@ glm5-fp8-b300-sglang:
21882188
- { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
21892189

21902190
glm5-fp8-b300-sglang-mtp:
2191-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2191+
image: lmsysorg/sglang:v0.5.11-cu130
21922192
model: zai-org/GLM-5-FP8
21932193
model-prefix: glm5
21942194
runner: b300
@@ -2252,7 +2252,7 @@ glm5-fp4-b200-sglang-mtp:
22522252
# does not have a B300-specific recipe, so this config reuses the existing
22532253
# GLM-5 FP4 B200 SGLang recipe as-is until B300-specific tuning is available.
22542254
glm5-fp4-b300-sglang:
2255-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2255+
image: lmsysorg/sglang:v0.5.11-cu130
22562256
model: nvidia/GLM-5-NVFP4
22572257
model-prefix: glm5
22582258
runner: b300
@@ -2273,7 +2273,7 @@ glm5-fp4-b300-sglang:
22732273
- { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
22742274

22752275
glm5-fp4-b300-sglang-mtp:
2276-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2276+
image: lmsysorg/sglang:v0.5.11-cu130
22772277
model: nvidia/GLM-5-NVFP4
22782278
model-prefix: glm5
22792279
runner: b300
@@ -2316,7 +2316,7 @@ qwen3.5-fp8-b200-sglang-mtp:
23162316

23172317

23182318
qwen3.5-fp8-b300-sglang-mtp:
2319-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2319+
image: lmsysorg/sglang:v0.5.11-cu130
23202320
model: Qwen/Qwen3.5-397B-A17B-FP8
23212321
model-prefix: qwen3.5
23222322
runner: b300
@@ -2354,7 +2354,7 @@ qwen3.5-fp8-b300-sglang:
23542354
- { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
23552355

23562356
qwen3.5-fp4-b300-sglang:
2357-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2357+
image: lmsysorg/sglang:v0.5.11-cu130
23582358
model: nvidia/Qwen3.5-397B-A17B-NVFP4
23592359
model-prefix: qwen3.5
23602360
runner: b300
@@ -2375,7 +2375,7 @@ qwen3.5-fp4-b300-sglang:
23752375
- { tp: 2, ep: 2, conc-start: 4, conc-end: 128 }
23762376

23772377
qwen3.5-fp4-b300-sglang-mtp:
2378-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2378+
image: lmsysorg/sglang:v0.5.11-cu130
23792379
model: nvidia/Qwen3.5-397B-A17B-NVFP4
23802380
model-prefix: qwen3.5
23812381
runner: b300
@@ -2396,7 +2396,7 @@ qwen3.5-fp4-b300-sglang-mtp:
23962396
- { tp: 2, ep: 2, conc-start: 4, conc-end: 128, spec-decoding: mtp }
23972397

23982398
qwen3.5-bf16-b300-sglang:
2399-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2399+
image: lmsysorg/sglang:v0.5.11-cu130
24002400
model: Qwen/Qwen3.5-397B-A17B
24012401
model-prefix: qwen3.5
24022402
runner: b300
@@ -2417,7 +2417,7 @@ qwen3.5-bf16-b300-sglang:
24172417
- { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
24182418

24192419
qwen3.5-bf16-b300-sglang-mtp:
2420-
image: lmsysorg/sglang:v0.5.10.post1-cu130
2420+
image: lmsysorg/sglang:v0.5.11-cu130
24212421
model: Qwen/Qwen3.5-397B-A17B
24222422
model-prefix: qwen3.5
24232423
runner: b300
@@ -2634,7 +2634,7 @@ dsr1-fp8-b200-trt-mtp:
26342634
- { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
26352635

26362636
dsr1-fp8-h200-sglang:
2637-
image: lmsysorg/sglang:v0.5.9-cu130
2637+
image: lmsysorg/sglang:v0.5.11-cu130
26382638
model: deepseek-ai/DeepSeek-R1-0528
26392639
model-prefix: dsr1
26402640
runner: h200
@@ -2866,7 +2866,7 @@ qwen3.5-fp8-h200-sglang:
28662866
- { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
28672867

28682868
qwen3.5-fp8-h200-sglang-mtp:
2869-
image: lmsysorg/sglang:v0.5.10.post1
2869+
image: lmsysorg/sglang:v0.5.11
28702870
model: Qwen/Qwen3.5-397B-A17B-FP8
28712871
model-prefix: qwen3.5
28722872
runner: h200
@@ -4102,7 +4102,7 @@ minimaxm2.5-fp4-b300-vllm:
41024102
- { tp: 8, conc-start: 4, conc-end: 4 }
41034103

41044104
gptoss-fp4-h100-vllm:
4105-
image: vllm/vllm-openai:v0.18.0
4105+
image: vllm/vllm-openai:v0.20.2
41064106
model: openai/gpt-oss-120b
41074107
model-prefix: gptoss
41084108
runner: h100
@@ -4125,7 +4125,7 @@ gptoss-fp4-h100-vllm:
41254125
- { tp: 8, conc-start: 4, conc-end: 16 }
41264126

41274127
minimaxm2.5-fp8-h100-vllm:
4128-
image: vllm/vllm-openai:v0.18.0
4128+
image: vllm/vllm-openai:v0.20.2
41294129
model: MiniMaxAI/MiniMax-M2.5
41304130
model-prefix: minimaxm2.5
41314131
runner: h100
@@ -4331,7 +4331,7 @@ gptoss-fp4-h200-vllm:
43314331
- { tp: 8, conc-start: 4, conc-end: 32 }
43324332

43334333
minimaxm2.5-fp8-h200-vllm:
4334-
image: vllm/vllm-openai:v0.18.0
4334+
image: vllm/vllm-openai:v0.20.2
43354335
model: MiniMaxAI/MiniMax-M2.5
43364336
model-prefix: minimaxm2.5
43374337
runner: h200

.github/workflows/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,42 @@ test-config --config-keys dsr1-fp4-b200-sglang gptoss* --config-files .github/co
178178
test-config --config-keys *-b200-* --conc 4 8 --config-files .github/configs/nvidia-master.yaml
179179
```
180180

181+
## Reusing an Approved PR Full Sweep
182+
183+
If a PR has already run the full untrimmed sweep (`full-sweep-enabled` label),
184+
a maintainer can avoid running the same sweep again after merge by leaving a
185+
PR comment before merging:
186+
187+
```
188+
/reuse-sweep-run
189+
```
190+
191+
That reuses the latest successful `run-sweep.yml` `pull_request` run for the
192+
PR's current head SHA. If the PR was rebased or had to merge `main` after the
193+
successful sweep — so the current head no longer has a matching run — pin the
194+
source run explicitly:
195+
196+
```
197+
/reuse-sweep-run <run_id>
198+
```
199+
200+
The comment is the reuse authorization, so adding it does not trigger or cancel
201+
a PR sweep. On the push-to-main run, `run-sweep.yml` resolves the merged PR
202+
from the merge commit, verifies the source run is a successful `pull_request`
203+
`run-sweep.yml` run for the same PR, downloads the ingest-relevant artifacts,
204+
validates that `results_bmk` covers the merge run's expected benchmark matrix,
205+
and uploads them as `reused-ingest-artifacts`. The normal database ingest then
206+
publishes those artifacts with the merge run's changelog metadata.
207+
208+
Only comments from `OWNER`, `MEMBER`, or `COLLABORATOR` users authorize reuse.
209+
The most recent matching comment wins, so a maintainer can supersede an earlier
210+
pin by leaving a new `/reuse-sweep-run [<run_id>]` comment.
211+
212+
Reuse fails closed: if the comment is present but the `full-sweep-enabled`
213+
label, source PR run, or artifacts cannot be validated, the push-to-main
214+
workflow fails instead of falling back to a cluster sweep. Without the comment,
215+
the push-to-main workflow runs the normal full sweep.
216+
181217
## Validation Architecture
182218

183219
The benchmarking system uses a strict validation methodology to ensure correctness at every stage. This is implemented in `utils/matrix_logic/validation.py` using Pydantic models.

0 commit comments

Comments
 (0)