test: DSv4-Pro B300 vLLM agentic - drop --max-model-len

cquil11 · claude · cquil11 · commit f8a1f5ed3657 · 2026-05-13T15:25:36.000-05:00
Drop the explicit --max-model-len so vLLM picks up DSv4-Pro's full
native context window. Previous 1,000,000 was already above the
trace dataset's max (937K), so behavior is unchanged for this
particular dataset; the cleanup just makes the launcher consistent
with the Kimi B200/B300 sister launchers and removes the synthetic
cap from the server config entirely.

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/benchmarks/single_node/agentic/dsv4_fp4_b300_vllm.sh b/benchmarks/single_node/agentic/dsv4_fp4_b300_vllm.sh
@@ -31,9 +31,6 @@ ADVANCE_MIN=${ADVANCE_MIN:-0.0}
 ADVANCE_MAX=${ADVANCE_MAX:-0.7}
 EP_SIZE=${EP_SIZE:-1}
 DP_ATTENTION=${DP_ATTENTION:-false}
-if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
-    MAX_MODEL_LEN=1000000
-fi
 
 if [[ -n "${SLURM_JOB_ID:-}" ]]; then
     echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -136,7 +133,6 @@ vllm serve "$MODEL" \
 --reasoning-parser deepseek_v4 \
 --enable-prefix-caching \
 --no-disable-hybrid-kv-cache-manager \
---max-model-len "$MAX_MODEL_LEN" \
 --max-num-seqs "$PER_ENGINE_MAX_NUM_SEQS" \
 $OFFLOAD_ARGS > "$SERVER_LOG" 2>&1 &
 SERVER_PID=$!