File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -71,17 +71,19 @@ PYTHONNOUSERSITE=1 mpirun -n 1 --oversubscribe --allow-run-as-root \
7171 > $SERVER_LOG 2>&1 &
7272
7373
74+ # Show logs until server is ready
75+ tail -f $SERVER_LOG &
76+ TAIL_PID=$!
7477set +x
75- while IFS= read -r line; do
76- printf ' %s\n' " $line "
77- if [[ " $line " == * " Application startup complete" * ]]; then
78- break
79- fi
80- done < <( tail -F -n0 " $SERVER_LOG " )
78+ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT /health; do
79+ sleep 5
80+ done
81+ kill $TAIL_PID
8182
82- git clone https://github.com/kimbochen/bench_serving.git
8383set -x
84- python3 bench_serving/benchmark_serving.py \
84+ BENCH_SERVING_DIR=$( mktemp -d /tmp/bmk-XXXXXX)
85+ git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
86+ python3 $BENCH_SERVING_DIR /benchmark_serving.py \
8587--model $MODEL --backend openai \
8688--base-url http://0.0.0.0:$PORT \
8789--dataset-name random \
Original file line number Diff line number Diff line change @@ -22,26 +22,29 @@ max-model-len: 10240
2222EOF
2323
2424export PYTHONNOUSERSITE=1
25+ SERVER_LOG=$( mktemp /tmp/server-XXXXXX.log)
2526
2627set -x
2728vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
2829--config config.yaml \
2930--gpu-memory-utilization=0.9 \
3031--tensor-parallel-size=$TP \
3132--max-num-seqs=$CONC \
32- --disable-log-requests 2>&1 | tee $( mktemp /tmp/server-XXXXXX.log ) &
33+ --disable-log-requests > $SERVER_LOG 2>&1 &
3334
34- # Show server logs til' it is up, then stop showing
35+ # Show logs until server is ready
36+ tail -f $SERVER_LOG &
37+ TAIL_PID=$!
3538set +x
36- until curl --output /dev/null --silent --fail http://localhost :$PORT /health; do
39+ until curl --output /dev/null --silent --fail http://0.0.0.0 :$PORT /health; do
3740 sleep 5
3841done
39- pkill -P $$ tee 2> /dev/null
42+ kill $TAIL_PID
4043
4144pip install -q datasets pandas
45+ set -x
4246BENCH_SERVING_DIR=$( mktemp -d /tmp/bmk-XXXXXX)
4347git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
44- set -x
4548python3 $BENCH_SERVING_DIR /benchmark_serving.py \
4649--model=$MODEL \
4750--backend=vllm \
Original file line number Diff line number Diff line change @@ -35,18 +35,20 @@ PYTHONNOUSERSITE=1 vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
3535--max-num-seqs=$CONC \
3636--disable-log-requests > $SERVER_LOG 2>&1 &
3737
38+ # Show logs until server is ready
39+ tail -f $SERVER_LOG &
40+ TAIL_PID=$!
3841set +x
39- while IFS= read -r line; do
40- printf ' %s\n' " $line "
41- if [[ " $line " == * " Application startup complete" * ]]; then
42- break
43- fi
44- done < <( tail -F -n0 " $SERVER_LOG " )
42+ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT /health; do
43+ sleep 5
44+ done
45+ kill $TAIL_PID
4546
4647pip install -q datasets pandas
47- git clone https://github.com/kimbochen/bench_serving.git
4848set -x
49- python3 bench_serving/benchmark_serving.py \
49+ BENCH_SERVING_DIR=$( mktemp -d /tmp/bmk-XXXXXX)
50+ git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
51+ python3 $BENCH_SERVING_DIR /benchmark_serving.py \
5052--model=$MODEL \
5153--backend=vllm \
5254--base-url=" http://0.0.0.0:$PORT " \
Original file line number Diff line number Diff line change 1919echo " JOB $SLURM_JOB_ID running on $SLURMD_NODENAME "
2020
2121hf download $MODEL
22- # SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
22+ SERVER_LOG=$( mktemp /tmp/server-XXXXXX.log)
2323PORT=$(( 8888 + $PORT_OFFSET ))
2424
2525
@@ -44,9 +44,6 @@ print_iter_log: true
4444stream_interval: 20
4545EOF
4646
47- SERVER_LOG=$( mktemp /tmp/server-XXXXXX.log)
48-
49- # mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml --port $PORT > $SERVER_LOG 2>&1 &
5047mpirun -n 1 --oversubscribe --allow-run-as-root \
5148trtllm-serve $MODEL \
5249--max_batch_size $CONC \
@@ -65,8 +62,6 @@ trtllm-serve $MODEL \
6562# Show logs until server is ready
6663tail -f $SERVER_LOG &
6764TAIL_PID=$!
68-
69- # Show server logs til' it is up, then stop showing
7065set +x
7166until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT /health; do
7267 sleep 5
Original file line number Diff line number Diff line change @@ -38,17 +38,19 @@ vllm serve $MODEL --port $PORT \
3838--disable-log-requests \
3939--async-scheduling > $SERVER_LOG 2>&1 &
4040
41+ # Show logs until server is ready
42+ tail -f $SERVER_LOG &
43+ TAIL_PID=$!
4144set +x
42- while IFS= read -r line; do
43- printf ' %s\n' " $line "
44- if [[ " $line " == * " Application startup complete" * ]]; then
45- break
46- fi
47- done < <( tail -F -n0 " $SERVER_LOG " )
45+ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT /health; do
46+ sleep 5
47+ done
48+ kill $TAIL_PID
4849
4950set -x
50- git clone https://github.com/kimbochen/bench_serving.git
51- python3 bench_serving/benchmark_serving.py \
51+ BENCH_SERVING_DIR=$( mktemp -d /tmp/bmk-XXXXXX)
52+ git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
53+ python3 $BENCH_SERVING_DIR /benchmark_serving.py \
5254--model $MODEL --backend vllm \
5355--base-url " http://0.0.0.0:$PORT " \
5456--dataset-name random \
You can’t perform that action at this time.
0 commit comments