@@ -55,11 +55,28 @@ nvidia-smi
5555resolve_trace_source
5656install_agentic_deps
5757
58+ # vllm-project/router expands the one HTTP backend into one logical worker per
59+ # DP rank and sends X-data-parallel-rank on forwarded requests. aiperf's
60+ # X-Correlation-ID is stable for every turn of a conversation; alias it to the
61+ # router's preferred X-Session-ID header.
62+ USE_VLLM_ROUTER=false
63+ VLLM_BACKEND_PORT=" $PORT "
64+ if [ " $DP_ATTENTION " = " true" ]; then
65+ USE_VLLM_ROUTER=true
66+ VLLM_BACKEND_PORT=$(( PORT + 1 ))
67+ VLLM_ROUTER_VERSION=0.1.14
68+ VLLM_ROUTER_POLICY=consistent_hash
69+ VLLM_ROUTER_METRICS_PORT=$(( PORT + 10000 ))
70+ export AIPERF_HTTP_X_SESSION_ID_FROM_CORRELATION_ID=1
71+ agentic_pip_install --quiet " vllm-router==$VLLM_ROUTER_VERSION "
72+ fi
73+
5874# DeepSeek-V4-Pro weights are large; engine startup can exceed default 600s.
5975export VLLM_ENGINE_READY_TIMEOUT_S=3600
6076
6177# ---- Server config ----------------------------------------------------------
6278SERVER_LOG=" $RESULT_DIR /server.log"
79+ ROUTER_LOG=" $RESULT_DIR /router.log"
6380LMCACHE_LOG=" $RESULT_DIR /lmcache_server.log"
6481mkdir -p " $RESULT_DIR "
6582
@@ -233,7 +250,7 @@ export VLLM_FLOAT32_MATMUL_PRECISION=high
233250VLLM_CMD=(
234251 vllm serve " $MODEL_PATH " --served-model-name " $MODEL "
235252 --host 0.0.0.0
236- --port " $PORT "
253+ --port " $VLLM_BACKEND_PORT "
237254 --trust-remote-code
238255 --kv-cache-dtype fp8
239256 --block-size 256
@@ -257,7 +274,23 @@ printf '\n' | tee -a "$RESULT_DIR/vllm_command.txt"
257274SERVER_PID=$!
258275echo " Server PID: $SERVER_PID "
259276
260- wait_for_server_ready --port " $PORT " --server-log " $SERVER_LOG " --server-pid " $SERVER_PID "
277+ wait_for_server_ready --port " $VLLM_BACKEND_PORT " --server-log " $SERVER_LOG " --server-pid " $SERVER_PID "
278+
279+ if [ " $USE_VLLM_ROUTER " = " true" ]; then
280+ echo " Starting native vLLM router on port $PORT for $TP DP ranks..."
281+ vllm-router \
282+ --worker-urls " http://localhost:$VLLM_BACKEND_PORT " \
283+ --policy " $VLLM_ROUTER_POLICY " \
284+ --intra-node-data-parallel-size " $TP " \
285+ --host 0.0.0.0 \
286+ --port " $PORT " \
287+ --prometheus-host 127.0.0.1 \
288+ --prometheus-port " $VLLM_ROUTER_METRICS_PORT " \
289+ --disable-retries > " $ROUTER_LOG " 2>&1 &
290+ ROUTER_PID=$!
291+ echo " Router PID: $ROUTER_PID "
292+ wait_for_server_ready --port " $PORT " --server-log " $ROUTER_LOG " --server-pid " $ROUTER_PID "
293+ fi
261294
262295# ---- Run benchmark ----------------------------------------------------------
263296build_replay_cmd " $RESULT_DIR "
0 commit comments