Skip to content

Commit c79bf86

Browse files
cquil11claude
andcommitted
revert: drop MAX_MODEL_LEN=131072 default from Kimi MI355X/B200 launchers
Per agentic benchmark design: must not cap context. Removes the MAX_MODEL_LEN=131072 default from kimik2.5_fp4_mi355x, kimik2.5_fp4_b200, and kimik2.5_int4_b200 launchers so vLLM uses the model's native context window (matches the H100/H200/B300 launchers that already have no cap). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent d7411dd commit c79bf86

3 files changed

Lines changed: 0 additions & 14 deletions

File tree

benchmarks/single_node/agentic/kimik2.5_fp4_b200.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@ DURATION=${DURATION:-1800}
1616
MAX_DELAY=${MAX_DELAY:-60}
1717
ADVANCE_MIN=${ADVANCE_MIN:-0.0}
1818
ADVANCE_MAX=${ADVANCE_MAX:-0.7}
19-
# Agentic matrix entries don't set max-model-len, so the workflow passes 0.
20-
# ${:-DEFAULT} only fires on unset/empty, so handle 0 explicitly.
21-
if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
22-
MAX_MODEL_LEN=131072
23-
fi
2419

2520
if [[ -n "${SLURM_JOB_ID:-}" ]]; then
2621
echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -64,7 +59,6 @@ vllm serve $MODEL \
6459
--port $PORT \
6560
--tensor-parallel-size=$TP \
6661
--gpu-memory-utilization 0.90 \
67-
--max-model-len $MAX_MODEL_LEN \
6862
--max-num-seqs $CONC \
6963
--reasoning-parser kimi_k2 \
7064
--tool-call-parser kimi_k2 \

benchmarks/single_node/agentic/kimik2.5_fp4_mi355x.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ MAX_DELAY=${MAX_DELAY:-60}
1717
ADVANCE_MIN=${ADVANCE_MIN:-0.0}
1818
ADVANCE_MAX=${ADVANCE_MAX:-0.7}
1919
EP_SIZE=${EP_SIZE:-1}
20-
if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
21-
MAX_MODEL_LEN=131072
22-
fi
2320

2421
if [[ -n "${SLURM_JOB_ID:-}" ]]; then
2522
echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -84,7 +81,6 @@ vllm serve $MODEL \
8481
--tensor-parallel-size=$TP \
8582
$EP \
8683
--gpu-memory-utilization 0.90 \
87-
--max-model-len $MAX_MODEL_LEN \
8884
--block-size=1 \
8985
--trust-remote-code \
9086
--max-num-seqs $CONC \

benchmarks/single_node/agentic/kimik2.5_int4_b200.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ DURATION=${DURATION:-1800}
1616
MAX_DELAY=${MAX_DELAY:-60}
1717
ADVANCE_MIN=${ADVANCE_MIN:-0.0}
1818
ADVANCE_MAX=${ADVANCE_MAX:-0.7}
19-
if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
20-
MAX_MODEL_LEN=131072
21-
fi
2219

2320
if [[ -n "${SLURM_JOB_ID:-}" ]]; then
2421
echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -58,7 +55,6 @@ vllm serve $MODEL \
5855
--port $PORT \
5956
--gpu-memory-utilization 0.95 \
6057
--tensor-parallel-size $TP \
61-
--max-model-len $MAX_MODEL_LEN \
6258
--max-num-seqs $CONC \
6359
--reasoning-parser kimi_k2 \
6460
--tool-call-parser kimi_k2 \

0 commit comments

Comments
 (0)