Skip to content

Commit 37cc47c

Browse files
committed
function-ize the waiting for server to start
1 parent c84fae8 commit 37cc47c

27 files changed

Lines changed: 170 additions & 235 deletions

benchmarks/benchmark_lib.sh

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,74 @@
22

33
# Shared benchmarking utilities for InferenceMAX
44

5+
# Wait for server to be ready by polling the health endpoint
6+
# All parameters are required
7+
# Parameters:
8+
# --port: Server port
9+
# --server-log: Path to server log file
10+
# --server-pid: Server process ID (required)
11+
# --sleep-interval: Sleep interval between health checks (optional, default: 5)
12+
wait_for_server_ready() {
13+
local port=""
14+
local server_log=""
15+
local server_pid=""
16+
local sleep_interval=5
17+
18+
# Parse arguments
19+
while [[ $# -gt 0 ]]; do
20+
case $1 in
21+
--port)
22+
port="$2"
23+
shift 2
24+
;;
25+
--server-log)
26+
server_log="$2"
27+
shift 2
28+
;;
29+
--server-pid)
30+
server_pid="$2"
31+
shift 2
32+
;;
33+
--sleep-interval)
34+
sleep_interval="$2"
35+
shift 2
36+
;;
37+
*)
38+
echo "Unknown parameter: $1"
39+
return 1
40+
;;
41+
esac
42+
done
43+
44+
# Validate required parameters
45+
if [[ -z "$port" ]]; then
46+
echo "Error: --port is required"
47+
return 1
48+
fi
49+
if [[ -z "$server_log" ]]; then
50+
echo "Error: --server-log is required"
51+
return 1
52+
fi
53+
if [[ -z "$server_pid" ]]; then
54+
echo "Error: --server-pid is required"
55+
return 1
56+
fi
57+
58+
# Show logs until server is ready
59+
tail -f "$server_log" &
60+
local TAIL_PID=$!
61+
set +x
62+
until curl --output /dev/null --silent --fail http://0.0.0.0:$port/health; do
63+
if ! kill -0 "$server_pid" 2>/dev/null; then
64+
echo "Server died before becoming healthy. Exiting."
65+
kill $TAIL_PID
66+
exit 1
67+
fi
68+
sleep "$sleep_interval"
69+
done
70+
kill $TAIL_PID
71+
}
72+
573
# Run benchmark serving with standardized parameters
674
# All parameters are required
775
# Parameters:

benchmarks/dsr1_fp4_b200_docker.sh

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,16 @@ PYTHONNOUSERSITE=1 python3 -m sglang.launch_server --model-path $MODEL --host 0.
2626
--ep-size $EP_SIZE --quantization modelopt_fp4 --enable-flashinfer-allreduce-fusion --scheduler-recv-interval $SCHEDULER_RECV_INTERVAL \
2727
--enable-symm-mem --disable-radix-cache --attention-backend trtllm_mla --moe-runner-backend flashinfer_trtllm --stream-interval 10 > $SERVER_LOG 2>&1 &
2828

29-
# Show logs until server is ready
30-
tail -f $SERVER_LOG &
31-
TAIL_PID=$!
32-
set +x
33-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
34-
sleep 5
35-
done
36-
kill $TAIL_PID
37-
38-
pip install -q datasets pandas
29+
SERVER_PID=$!
3930

4031
# Source benchmark utilities
4132
source "$(dirname "$0")/benchmark_lib.sh"
4233

34+
# Wait for server to be ready
35+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
36+
37+
pip install -q datasets pandas
38+
4339
set -x
4440
run_benchmark_serving \
4541
--model "$MODEL" \

benchmarks/dsr1_fp4_b200_trt_slurm.sh

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,19 +100,14 @@ mpirun -n 1 --oversubscribe --allow-run-as-root \
100100
--extra_llm_api_options=$EXTRA_CONFIG_FILE \
101101
> $SERVER_LOG 2>&1 &
102102

103-
104-
# Show logs until server is ready
105-
tail -f $SERVER_LOG &
106-
TAIL_PID=$!
107-
set +x
108-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
109-
sleep 5
110-
done
111-
kill $TAIL_PID
103+
SERVER_PID=$!
112104

113105
# Source benchmark utilities
114106
source "$(dirname "$0")/benchmark_lib.sh"
115107

108+
# Wait for server to be ready
109+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
110+
116111
set -x
117112
run_benchmark_serving \
118113
--model "$MODEL" \

benchmarks/dsr1_fp4_mi355x_docker.sh

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,14 @@ python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \
3131
--max-prefill-tokens=$PREFILL_SIZE \
3232
--cuda-graph-max-bs=128 > $SERVER_LOG 2>&1 &
3333

34-
# Show logs until server is ready
35-
tail -f $SERVER_LOG &
36-
TAIL_PID=$!
37-
set +x
38-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
39-
sleep 5
40-
done
41-
kill $TAIL_PID
34+
SERVER_PID=$!
4235

4336
# Source benchmark utilities
4437
source "$(dirname "$0")/benchmark_lib.sh"
4538

39+
# Wait for server to be ready
40+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
41+
4642
set -x
4743
run_benchmark_serving \
4844
--model "$MODEL" \

benchmarks/dsr1_fp4_mi355x_slurm.sh

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,12 @@ python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \
3434
--cuda-graph-max-bs=128 \
3535
> $SERVER_LOG 2>&1 &
3636

37-
# Show logs until server is ready
38-
tail -f $SERVER_LOG &
39-
TAIL_PID=$!
40-
set +x
41-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
42-
sleep 5
43-
done
44-
kill $TAIL_PID
45-
4637
# Source benchmark utilities
4738
source "$(dirname "$0")/benchmark_lib.sh"
4839

40+
# Wait for server to be ready
41+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
42+
4943
set -x
5044
run_benchmark_serving \
5145
--model "$MODEL" \

benchmarks/dsr1_fp8_b200_docker.sh

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,16 @@ PYTHONNOUSERSITE=1 python3 -m sglang.launch_server --model-path=$MODEL --host=0.
3737
--enable-flashinfer-allreduce-fusion --scheduler-recv-interval $SCHEDULER_RECV_INTERVAL --disable-radix-cache \
3838
--attention-backend trtllm_mla --stream-interval 30 --ep-size $EP_SIZE --moe-runner-backend flashinfer_trtllm --quantization fp8 > $SERVER_LOG 2>&1 &
3939

40-
# Show logs until server is ready
41-
tail -f $SERVER_LOG &
42-
TAIL_PID=$!
43-
set +x
44-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
45-
sleep 5
46-
done
47-
kill $TAIL_PID
48-
49-
pip install -q datasets pandas
40+
SERVER_PID=$!
5041

5142
# Source benchmark utilities
5243
source "$(dirname "$0")/benchmark_lib.sh"
5344

45+
# Wait for server to be ready
46+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
47+
48+
pip install -q datasets pandas
49+
5450
set -x
5551
run_benchmark_serving \
5652
--model "$MODEL" \

benchmarks/dsr1_fp8_b200_trt_slurm.sh

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,12 @@ mpirun -n 1 --oversubscribe --allow-run-as-root \
7272

7373
SERVER_PID=$!
7474

75-
# Show logs until server is ready
76-
tail -f $SERVER_LOG &
77-
TAIL_PID=$!
78-
set +x
79-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
80-
sleep 5
81-
done
82-
kill $TAIL_PID
83-
8475
# Source benchmark utilities
8576
source "$(dirname "$0")/benchmark_lib.sh"
8677

78+
# Wait for server to be ready
79+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
80+
8781
set -x
8882
run_benchmark_serving \
8983
--model "$MODEL" \

benchmarks/dsr1_fp8_h200_slurm.sh

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,14 @@ else
4444
> $SERVER_LOG 2>&1 &
4545
fi
4646

47-
# Show logs until server is ready
48-
tail -f $SERVER_LOG &
49-
TAIL_PID=$!
50-
set +x
51-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
52-
sleep 5
53-
done
54-
kill $TAIL_PID
47+
SERVER_PID=$!
5548

5649
# Source benchmark utilities
5750
source "$(dirname "$0")/benchmark_lib.sh"
5851

52+
# Wait for server to be ready
53+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
54+
5955
set -x
6056
run_benchmark_serving \
6157
--model "$MODEL" \

benchmarks/dsr1_fp8_h200_trt_slurm.sh

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,12 @@ PYTHONNOUSERSITE=1 mpirun -n 1 --oversubscribe --allow-run-as-root \
7272

7373
SERVER_PID=$!
7474

75-
# Show logs until server is ready
76-
tail -f $SERVER_LOG &
77-
TAIL_PID=$!
78-
set +x
79-
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
80-
if ! kill -0 $SERVER_PID 2>/dev/null; then
81-
echo "Server died before becoming healthy. Exiting."
82-
exit 1
83-
fi
84-
sleep 5
85-
done
86-
kill $TAIL_PID
87-
8875
# Source benchmark utilities
8976
source "$(dirname "$0")/benchmark_lib.sh"
9077

78+
# Wait for server to be ready
79+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
80+
9181
set -x
9282
run_benchmark_serving \
9383
--model "$MODEL" \

benchmarks/dsr1_fp8_mi300x_docker.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ kill $TAIL_PID
5050
# Source benchmark utilities
5151
source "$(dirname "$0")/benchmark_lib.sh"
5252

53+
# Wait for server to be ready
54+
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
55+
5356
set -x
5457
run_benchmark_serving \
5558
--model "$MODEL" \

0 commit comments

Comments
 (0)