From 380b685df7cd81917d3ac25d326fef0e4a1793cb Mon Sep 17 00:00:00 2001
From: jasonlizhengjian <jasonlizhengjian@gmail.com>
Date: Wed, 3 Jun 2026 10:39:11 -0700
Subject: [PATCH 1/2] Add Kimi K2.5 EP sweep points

---
 .github/configs/nvidia-master.yaml                       | 8 ++++++++
 .../single_node/fixed_seq_len/kimik2.5_fp4_b200.sh       | 9 ++++++++-
 .../single_node/fixed_seq_len/kimik2.5_fp4_b300.sh       | 7 +++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 3ee32f6c6..b10247c5c 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -2661,12 +2661,16 @@ kimik2.5-fp4-b200-vllm:
       osl: 1024
       search-space:
       - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 4 }
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
     - isl: 8192
       osl: 1024
       search-space:
       - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 4 }
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
 
 # NOTE: At the time of submission, https://docs.vllm.ai/projects/recipes/en/latest/moonshotai/Kimi-K2.5.html
 # does not have a B300-specific recipe, so this config reuses the existing
@@ -2714,12 +2718,16 @@ kimik2.5-fp4-b300-vllm:
       osl: 1024
       search-space:
       - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 4 }
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
     - isl: 8192
       osl: 1024
       search-space:
       - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 4 }
       - { tp: 4, ep: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-b200-sglang-mtp:
   image: lmsysorg/sglang:v0.5.12-cu130
diff --git a/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b200.sh b/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b200.sh
index 59b55c90c..a625d86ae 100644
--- a/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b200.sh
+++ b/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b200.sh
@@ -5,6 +5,7 @@ source "$(dirname "$0")/../../benchmark_lib.sh"
 check_env_vars \
     MODEL \
     TP \
+    EP_SIZE \
     CONC \
     ISL \
     OSL \
@@ -39,9 +40,15 @@ start_gpu_monitor
 # benchmarks/single_node/agentic/kimik2.5_fp4_b200.sh).
 export VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0
 
+EP_ARGS=()
+if [ "$EP_SIZE" -gt 1 ]; then
+    EP_ARGS=(--enable-expert-parallel)
+fi
+
 set -x
 vllm serve $MODEL --host 0.0.0.0 --port $PORT \
 --tensor-parallel-size=$TP \
+"${EP_ARGS[@]}" \
 --gpu-memory-utilization 0.90 \
 --max-model-len $MAX_MODEL_LEN \
 --max-num-seqs $CONC \
@@ -82,4 +89,4 @@ fi
 
 # Stop GPU monitoring
 stop_gpu_monitor
-set +x
\ No newline at end of file
+set +x
diff --git a/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b300.sh b/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b300.sh
index db6d3fb0d..7e2a65eec 100755
--- a/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b300.sh
+++ b/benchmarks/single_node/fixed_seq_len/kimik2.5_fp4_b300.sh
@@ -9,6 +9,7 @@ source "$(dirname "$0")/../../benchmark_lib.sh"
 check_env_vars \
     MODEL \
     TP \
+    EP_SIZE \
     CONC \
     ISL \
     OSL \
@@ -47,9 +48,15 @@ fi
 # Start GPU monitoring (power, temperature, clocks every second)
 start_gpu_monitor
 
+EP_ARGS=()
+if [ "$EP_SIZE" -gt 1 ]; then
+    EP_ARGS=(--enable-expert-parallel)
+fi
+
 set -x
 vllm serve $MODEL_PATH --served-model-name $MODEL --host 0.0.0.0 --port $PORT \
 --tensor-parallel-size $TP \
+"${EP_ARGS[@]}" \
 --gpu-memory-utilization 0.90 \
 --max-model-len $MAX_MODEL_LEN \
 --max-num-seqs $CONC \

From 8c2f79338f7ee8c4cfd10b002f28cc190d1c37f1 Mon Sep 17 00:00:00 2001
From: jasonlizhengjian <jasonlizhengjian@gmail.com>
Date: Wed, 3 Jun 2026 10:44:57 -0700
Subject: [PATCH 2/2] Add Kimi K2.5 EP sweep changelog

---
 perf-changelog.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 1b9d2f0db..0c078c175 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3430,3 +3430,10 @@
     - "Image: vllm/vllm-openai:v0.20.1"
     - "Same 1k/1k and 8k/1k search space as gb300, plus a new tp8-1p1d at low concurrencies for both ISLs"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1652
+
+- config-keys:
+    - kimik2.5-fp4-b200-vllm
+    - kimik2.5-fp4-b300-vllm
+  description:
+    - "Add expert-parallel sweep points for Kimi K2.5 FP4 B200/B300 vLLM aggregate benchmarks"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1658