Skip to content

Commit 5819b31

Browse files
committed
fix(agentic): filter Kimi MI355X replay context
1 parent 5db2668 commit 5819b31

2 files changed

Lines changed: 6 additions & 1 deletion

File tree

benchmarks/single_node/agentic/kimik2.5_fp4_mi355x.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ MAX_DELAY=${MAX_DELAY:-60}
2222
ADVANCE_MIN=${ADVANCE_MIN:-0.0}
2323
ADVANCE_MAX=${ADVANCE_MAX:-0.7}
2424
EP_SIZE=${EP_SIZE:-1}
25+
# Kimi-K2.5 advertises a 262144-token context window in vLLM 0.21.0.
26+
# Keep the benchmark loader's trace filter aligned with the server so
27+
# prompt+max_tokens overflows are removed before replay.
28+
MAX_MODEL_LEN=${MAX_MODEL_LEN:-262144}
2529

2630
if [[ -n "${SLURM_JOB_ID:-}" ]]; then
2731
echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -512,6 +516,7 @@ VLLM_CMD=(
512516
--gpu-memory-utilization 0.90
513517
--block-size=1
514518
--trust-remote-code
519+
--max-model-len "$MAX_MODEL_LEN"
515520
--max-num-seqs "$CONC"
516521
--mm-encoder-tp-mode data
517522
"${PREFIX_CACHE_ARGS[@]}"

0 commit comments

Comments
 (0)