Skip to content

Commit 923186d

Browse files
committed
feat(agentic): route B200 DEP traffic through native vLLM router
1 parent 76a3f09 commit 923186d

1 file changed

Lines changed: 35 additions & 2 deletions

File tree

benchmarks/single_node/agentic/dsv4_fp4_b200_vllm.sh

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,28 @@ nvidia-smi
5555
resolve_trace_source
5656
install_agentic_deps
5757

58+
# vllm-project/router expands the one HTTP backend into one logical worker per
59+
# DP rank and sends X-data-parallel-rank on forwarded requests. aiperf's
60+
# X-Correlation-ID is stable for every turn of a conversation; alias it to the
61+
# router's preferred X-Session-ID header.
62+
USE_VLLM_ROUTER=false
63+
VLLM_BACKEND_PORT="$PORT"
64+
if [ "$DP_ATTENTION" = "true" ]; then
65+
USE_VLLM_ROUTER=true
66+
VLLM_BACKEND_PORT=$((PORT + 1))
67+
VLLM_ROUTER_VERSION=0.1.14
68+
VLLM_ROUTER_POLICY=consistent_hash
69+
VLLM_ROUTER_METRICS_PORT=$((PORT + 10000))
70+
export AIPERF_HTTP_X_SESSION_ID_FROM_CORRELATION_ID=1
71+
agentic_pip_install --quiet "vllm-router==$VLLM_ROUTER_VERSION"
72+
fi
73+
5874
# DeepSeek-V4-Pro weights are large; engine startup can exceed default 600s.
5975
export VLLM_ENGINE_READY_TIMEOUT_S=3600
6076

6177
# ---- Server config ----------------------------------------------------------
6278
SERVER_LOG="$RESULT_DIR/server.log"
79+
ROUTER_LOG="$RESULT_DIR/router.log"
6380
LMCACHE_LOG="$RESULT_DIR/lmcache_server.log"
6481
mkdir -p "$RESULT_DIR"
6582

@@ -233,7 +250,7 @@ export VLLM_FLOAT32_MATMUL_PRECISION=high
233250
VLLM_CMD=(
234251
vllm serve "$MODEL_PATH" --served-model-name "$MODEL"
235252
--host 0.0.0.0
236-
--port "$PORT"
253+
--port "$VLLM_BACKEND_PORT"
237254
--trust-remote-code
238255
--kv-cache-dtype fp8
239256
--block-size 256
@@ -257,7 +274,23 @@ printf '\n' | tee -a "$RESULT_DIR/vllm_command.txt"
257274
SERVER_PID=$!
258275
echo "Server PID: $SERVER_PID"
259276

260-
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
277+
wait_for_server_ready --port "$VLLM_BACKEND_PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
278+
279+
if [ "$USE_VLLM_ROUTER" = "true" ]; then
280+
echo "Starting native vLLM router on port $PORT for $TP DP ranks..."
281+
vllm-router \
282+
--worker-urls "http://localhost:$VLLM_BACKEND_PORT" \
283+
--policy "$VLLM_ROUTER_POLICY" \
284+
--intra-node-data-parallel-size "$TP" \
285+
--host 0.0.0.0 \
286+
--port "$PORT" \
287+
--prometheus-host 127.0.0.1 \
288+
--prometheus-port "$VLLM_ROUTER_METRICS_PORT" \
289+
--disable-retries > "$ROUTER_LOG" 2>&1 &
290+
ROUTER_PID=$!
291+
echo "Router PID: $ROUTER_PID"
292+
wait_for_server_ready --port "$PORT" --server-log "$ROUTER_LOG" --server-pid "$ROUTER_PID"
293+
fi
261294

262295
# ---- Run benchmark ----------------------------------------------------------
263296
build_replay_cmd "$RESULT_DIR"

0 commit comments

Comments
 (0)