Skip to content

Commit b4eb57e

Browse files
committed
cleanng up
1 parent 9806d30 commit b4eb57e

7 files changed

Lines changed: 72 additions & 50 deletions

benchmarks/dsr1_fp4_mi355x_docker.sh

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ if [[ "$ISL" == "8192" && "$OSL" == "1024" ]]; then
1818
fi
1919
fi
2020

21+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
22+
2123
set -x
2224
python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \
2325
--host=0.0.0.0 --port=$PORT \
@@ -27,5 +29,28 @@ python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \
2729
--disable-radix-cache \
2830
--num-continuous-decode-steps=4 \
2931
--max-prefill-tokens=$PREFILL_SIZE \
30-
--cuda-graph-max-bs=128
32+
--cuda-graph-max-bs=128 > $SERVER_LOG 2>&1 &
33+
34+
# Show logs until server is ready
35+
tail -f $SERVER_LOG &
36+
TAIL_PID=$!
37+
set +x
38+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
39+
sleep 5
40+
done
41+
kill $TAIL_PID
42+
43+
set -x
44+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
45+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
46+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
47+
--model=$MODEL --backend=vllm --base-url="http://localhost:$PORT" \
48+
--dataset-name=random \
49+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
50+
--num-prompts=$NUM_PROMPTS \
51+
--max-concurrency=$CONC \
52+
--request-rate=inf --ignore-eos \
53+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
54+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
55+
3156

benchmarks/dsr1_fp4_mi355x_slurm.sh

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,10 @@ python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \
3434
--cuda-graph-max-bs=128 \
3535
> $SERVER_LOG 2>&1 &
3636

37-
set +x
38-
while IFS= read -r line; do
39-
printf '%s\n' "$line"
40-
if [[ "$line" == *"The server is fired up and ready to roll"* ]]; then
41-
break
42-
fi
43-
done < <(tail -F -n0 "$SERVER_LOG")
44-
4537
set -x
46-
git clone https://github.com/kimbochen/bench_serving.git
47-
python3 bench_serving/benchmark_serving.py \
38+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
39+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
40+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
4841
--model $MODEL --backend vllm \
4942
--base-url "http://0.0.0.0:$PORT" \
5043
--dataset-name random \

benchmarks/dsr1_fp8_h200_slurm.sh

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,10 @@ else
4444
> $SERVER_LOG 2>&1 &
4545
fi
4646

47-
set +x
48-
while IFS= read -r line; do
49-
printf '%s\n' "$line"
50-
if [[ "$line" == *"Application startup complete"* ]]; then
51-
break
52-
fi
53-
done < <(tail -F -n0 "$SERVER_LOG")
54-
5547
set -x
56-
git clone https://github.com/kimbochen/bench_serving.git
57-
python3 bench_serving/benchmark_serving.py \
48+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
49+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
50+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
5851
--model $MODEL --backend vllm \
5952
--base-url http://0.0.0.0:$PORT \
6053
--dataset-name random \

benchmarks/dsr1_fp8_mi355x_docker.sh

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
export SGLANG_USE_AITER=1
1616

17+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
18+
1719
python3 -m sglang.launch_server \
1820
--model-path $MODEL \
1921
--host=0.0.0.0 \
@@ -24,13 +26,16 @@ python3 -m sglang.launch_server \
2426
--mem-fraction-static 0.8 --disable-radix-cache \
2527
--num-continuous-decode-steps 4 \
2628
--max-prefill-tokens 196608 \
27-
--cuda-graph-max-bs 128 | tee $(mktemp /tmp/server-XXXXXX.log) &
29+
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
2830

31+
# Show logs until server is ready
32+
tail -f $SERVER_LOG &
33+
TAIL_PID=$!
2934
set +x
30-
until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
35+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
3136
sleep 5
3237
done
33-
pkill -P $$ tee 2>/dev/null
38+
kill $TAIL_PID
3439

3540
if [[ "$MODEL" == "amd/DeepSeek-R1-0528-MXFP4-Preview" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
3641
if [[ "$OSL" == "8192" ]]; then
@@ -42,9 +47,9 @@ else
4247
NUM_PROMPTS=$(( CONC * 10 ))
4348
fi
4449

50+
set -x
4551
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
4652
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
47-
set -x
4853
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
4954
--model=$MODEL --backend=vllm --base-url="http://localhost:$PORT" \
5055
--dataset-name=random \

benchmarks/dsr1_fp8_mi355x_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,19 @@ python3 -m sglang.launch_server \
3232
--max-prefill-tokens 196608 \
3333
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
3434

35+
# Show logs until server is ready
36+
tail -f $SERVER_LOG &
37+
TAIL_PID=$!
3538
set +x
36-
while IFS= read -r line; do
37-
printf '%s\n' "$line"
38-
if [[ "$line" == *"The server is fired up and ready to roll"* ]]; then
39-
break
40-
fi
41-
done < <(tail -F -n0 "$SERVER_LOG")
39+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
40+
sleep 5
41+
done
42+
kill $TAIL_PID
4243

4344
set -x
44-
git clone https://github.com/kimbochen/bench_serving.git
45-
python3 bench_serving/benchmark_serving.py \
45+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
46+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
47+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
4648
--model $MODEL --backend vllm \
4749
--base-url "http://0.0.0.0:$PORT" \
4850
--dataset-name random \

benchmarks/gptoss_fp4_b200_trt_slurm.sh

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,19 @@ mpirun -n 1 --oversubscribe --allow-run-as-root \
7979
> $SERVER_LOG 2>&1 &
8080

8181

82+
# Show logs until server is ready
83+
tail -f $SERVER_LOG &
84+
TAIL_PID=$!
8285
set +x
83-
while IFS= read -r line; do
84-
printf '%s\n' "$line"
85-
if [[ "$line" == *"Application startup complete"* ]]; then
86-
break
87-
fi
88-
done < <(tail -F -n0 "$SERVER_LOG")
89-
90-
git clone https://github.com/kimbochen/bench_serving.git
86+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
87+
sleep 5
88+
done
89+
kill $TAIL_PID
90+
9191
set -x
92-
python3 bench_serving/benchmark_serving.py \
92+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
93+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
94+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
9395
--model $MODEL --backend openai \
9496
--base-url http://0.0.0.0:$PORT \
9597
--dataset-name random \

benchmarks/gptoss_fp4_h200_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,19 @@ PYTHONNOUSERSITE=1 vllm serve $MODEL --host 0.0.0.0 --port $PORT --config config
4848
--gpu-memory-utilization 0.9 --tensor-parallel-size $TP --max-num-seqs $CONC \
4949
--disable-log-requests > $SERVER_LOG 2>&1 &
5050

51+
# Show logs until server is ready
52+
tail -f $SERVER_LOG &
53+
TAIL_PID=$!
5154
set +x
52-
while IFS= read -r line; do
53-
printf '%s\n' "$line"
54-
if [[ "$line" == *"Application startup complete"* ]]; then
55-
break
56-
fi
57-
done < <(tail -F -n0 "$SERVER_LOG")
55+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
56+
sleep 5
57+
done
58+
kill $TAIL_PID
5859

5960
set -x
60-
git clone https://github.com/kimbochen/bench_serving.git
61-
python3 bench_serving/benchmark_serving.py \
61+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
62+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
63+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6264
--model $MODEL --backend vllm \
6365
--base-url http://0.0.0.0:$PORT \
6466
--dataset-name random \

0 commit comments

Comments
 (0)