File tree Expand file tree Collapse file tree
benchmarks/multi_node/llm-d Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -167,11 +167,15 @@ vllm serve "$MODEL" "${COMMON_ARGS[@]}" $ROLE_EXTRA_ARGS \
167167 > " $VLLM_LOG " 2>&1 &
168168VLLM_PID=$!
169169
170+ # Every rank waits for its own engine to bind /health before falling
171+ # through. For wide-EP (LWS_GROUP_SIZE > 1) this prevents the bench
172+ # from starting before the worker-side DP shards have come up; for the
173+ # single-node case it is a no-op extra check.
174+ wait_for_server_ready --port " $VLLM_PORT " --server-log " $VLLM_LOG " --server-pid " $VLLM_PID "
175+ echo " vLLM ready on rank $NODE_RANK ($ROLE worker_index=$LWS_WORKER_INDEX )"
176+
170177# Only the leader of each instance accepts external requests on $VLLM_PORT.
171178if [[ " $LWS_WORKER_INDEX " -eq 0 ]]; then
172- wait_for_server_ready --port " $VLLM_PORT " --server-log " $VLLM_LOG " --server-pid " $VLLM_PID "
173- echo " vLLM leader ready on rank $NODE_RANK "
174-
175179 # ------------------------------------------------------------
176180 # Start pd-sidecar on each leader (prefill leader and decode leader).
177181 # The decode-side sidecar is what EPP routes to; the prefill-side
You can’t perform that action at this time.
0 commit comments