Skip to content

Commit f62ca37

Browse files
committed
cleanup
1 parent f4e10c9 commit f62ca37

5 files changed

Lines changed: 39 additions & 35 deletions

File tree

benchmarks/dsr1_fp8_h200_trt_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,19 @@ PYTHONNOUSERSITE=1 mpirun -n 1 --oversubscribe --allow-run-as-root \
7171
> $SERVER_LOG 2>&1 &
7272

7373

74+
# Show logs until server is ready
75+
tail -f $SERVER_LOG &
76+
TAIL_PID=$!
7477
set +x
75-
while IFS= read -r line; do
76-
printf '%s\n' "$line"
77-
if [[ "$line" == *"Application startup complete"* ]]; then
78-
break
79-
fi
80-
done < <(tail -F -n0 "$SERVER_LOG")
78+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
79+
sleep 5
80+
done
81+
kill $TAIL_PID
8182

82-
git clone https://github.com/kimbochen/bench_serving.git
8383
set -x
84-
python3 bench_serving/benchmark_serving.py \
84+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
85+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
86+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
8587
--model $MODEL --backend openai \
8688
--base-url http://0.0.0.0:$PORT \
8789
--dataset-name random \

benchmarks/gptoss_fp4_h100_docker.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,29 @@ max-model-len: 10240
2222
EOF
2323

2424
export PYTHONNOUSERSITE=1
25+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2526

2627
set -x
2728
vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
2829
--config config.yaml \
2930
--gpu-memory-utilization=0.9 \
3031
--tensor-parallel-size=$TP \
3132
--max-num-seqs=$CONC \
32-
--disable-log-requests 2>&1 | tee $(mktemp /tmp/server-XXXXXX.log) &
33+
--disable-log-requests > $SERVER_LOG 2>&1 &
3334

34-
# Show server logs til' it is up, then stop showing
35+
# Show logs until server is ready
36+
tail -f $SERVER_LOG &
37+
TAIL_PID=$!
3538
set +x
36-
until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
39+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
3740
sleep 5
3841
done
39-
pkill -P $$ tee 2>/dev/null
42+
kill $TAIL_PID
4043

4144
pip install -q datasets pandas
45+
set -x
4246
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
4347
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
44-
set -x
4548
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
4649
--model=$MODEL \
4750
--backend=vllm \

benchmarks/gptoss_fp4_h100_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,20 @@ PYTHONNOUSERSITE=1 vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
3535
--max-num-seqs=$CONC \
3636
--disable-log-requests > $SERVER_LOG 2>&1 &
3737

38+
# Show logs until server is ready
39+
tail -f $SERVER_LOG &
40+
TAIL_PID=$!
3841
set +x
39-
while IFS= read -r line; do
40-
printf '%s\n' "$line"
41-
if [[ "$line" == *"Application startup complete"* ]]; then
42-
break
43-
fi
44-
done < <(tail -F -n0 "$SERVER_LOG")
42+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
43+
sleep 5
44+
done
45+
kill $TAIL_PID
4546

4647
pip install -q datasets pandas
47-
git clone https://github.com/kimbochen/bench_serving.git
4848
set -x
49-
python3 bench_serving/benchmark_serving.py \
49+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
50+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
51+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
5052
--model=$MODEL \
5153
--backend=vllm \
5254
--base-url="http://0.0.0.0:$PORT" \

benchmarks/gptoss_fp4_h200_trt_slurm.sh

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
2020

2121
hf download $MODEL
22-
# SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
22+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2323
PORT=$(( 8888 + $PORT_OFFSET ))
2424

2525

@@ -44,9 +44,6 @@ print_iter_log: true
4444
stream_interval: 20
4545
EOF
4646

47-
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
48-
49-
#mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml --port $PORT > $SERVER_LOG 2>&1 &
5047
mpirun -n 1 --oversubscribe --allow-run-as-root \
5148
trtllm-serve $MODEL \
5249
--max_batch_size $CONC \
@@ -65,8 +62,6 @@ trtllm-serve $MODEL \
6562
# Show logs until server is ready
6663
tail -f $SERVER_LOG &
6764
TAIL_PID=$!
68-
69-
# Show server logs til' it is up, then stop showing
7065
set +x
7166
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
7267
sleep 5

benchmarks/gptoss_fp4_mi355x_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,19 @@ vllm serve $MODEL --port $PORT \
3838
--disable-log-requests \
3939
--async-scheduling > $SERVER_LOG 2>&1 &
4040

41+
# Show logs until server is ready
42+
tail -f $SERVER_LOG &
43+
TAIL_PID=$!
4144
set +x
42-
while IFS= read -r line; do
43-
printf '%s\n' "$line"
44-
if [[ "$line" == *"Application startup complete"* ]]; then
45-
break
46-
fi
47-
done < <(tail -F -n0 "$SERVER_LOG")
45+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
46+
sleep 5
47+
done
48+
kill $TAIL_PID
4849

4950
set -x
50-
git clone https://github.com/kimbochen/bench_serving.git
51-
python3 bench_serving/benchmark_serving.py \
51+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
52+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
53+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
5254
--model $MODEL --backend vllm \
5355
--base-url "http://0.0.0.0:$PORT" \
5456
--dataset-name random \

0 commit comments

Comments
 (0)