From e34230f6cfd55cd5cc963a9a3e2a5187fc0ba4ae Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Mon, 18 May 2026 14:02:32 -0400 Subject: [PATCH 1/2] Update kimik2.5-fp4-b200-vllm vLLM image to v0.21.0 Update vLLM image from v0.20.2 to v0.21.0 Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/configs/nvidia-master.yaml | 2 +- perf-changelog.yaml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 419a98c09..df3f389c5 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2632,7 +2632,7 @@ kimik2.5-int4-h200-vllm-agentic: - { tp: 8, offloading: cpu, conc-list: [6, 7, 8, 9, 10, 11, 12, 13, 14] } kimik2.5-fp4-b200-vllm: - image: vllm/vllm-openai:v0.20.2 + image: vllm/vllm-openai:v0.21.0 model: nvidia/Kimi-K2.5-NVFP4 model-prefix: kimik2.5 runner: b200 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 8a2bc8fe8..8c8c14c2d 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2826,3 +2826,9 @@ description: - "Update TensorRT-LLM image from v1.2.0rc2.post2 (102d old) to v1.3.0rc14 (latest pre-release)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1490 + +- config-keys: + - kimik2.5-fp4-b200-vllm + description: + - "Update vLLM image from v0.20.2 to v0.21.0" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX From ae4efc343ceb33d12da988125de947832149a260 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Mon, 18 May 2026 14:02:37 -0400 Subject: [PATCH 2/2] chore: fill pr-link for #1504 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 8c8c14c2d..e17073c05 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2831,4 +2831,4 @@ - kimik2.5-fp4-b200-vllm description: - "Update vLLM image from v0.20.2 to v0.21.0" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1504