Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/configs/nvidia-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ dsv4-fp4-b200-sglang:
- { tp: 8, ep: 8, dp-attn: true, conc-start: 256, conc-end: 512 }

dsv4-fp4-b200-vllm:
image: vllm/vllm-openai:v0.20.0-cu130
image: vllm/vllm-openai:v0.21.0
model: deepseek-ai/DeepSeek-V4-Pro
model-prefix: dsv4
runner: b200-dsv4
Expand Down Expand Up @@ -1822,7 +1822,7 @@ dsv4-fp4-b200-trt-mtp:
# MTP variant of dsv4-fp4-b200-vllm. Mirrors the base search space and adds
# --speculative-config '{"method":"mtp","num_speculative_tokens":2}'.
dsv4-fp4-b200-vllm-mtp:
image: vllm/vllm-openai:v0.20.0-cu130
image: vllm/vllm-openai:v0.21.0
model: deepseek-ai/DeepSeek-V4-Pro
model-prefix: dsv4
runner: b200-dsv4
Expand Down
7 changes: 7 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2653,3 +2653,10 @@
description:
- "Update SGLang image from v0.5.9-cu129-amd64 (74d old) to v0.5.12-cu130"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1458

- config-keys:
- dsv4-fp4-b200-vllm
- dsv4-fp4-b200-vllm-mtp
description:
- "Update vLLM image from v0.20.0-cu130 (20d/18d old) to v0.21.0"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1476