1919echo " JOB $SLURM_JOB_ID running on $SLURMD_NODENAME "
2020
2121hf download $MODEL
22- SERVER_LOG=$( mktemp /tmp/server-XXXXXX.log)
22+ # SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2323PORT=$(( 8888 + $PORT_OFFSET ))
2424
2525
@@ -45,20 +45,32 @@ stream_interval: 20
4545EOF
4646
4747# mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml --port $PORT > $SERVER_LOG 2>&1 &
48- mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --max_batch_size $CONC --max_num_tokens 20000 --backend pytorch --extra_llm_api_options gptoss-config.yml --ep_size=$EP_SIZE --trust_remote_code --gpus_per_node 8 --host 0.0.0.0 --port $PORT --tp_size=$TP --pp_size=1 > $SERVER_LOG 2>&1 &
49-
48+ mpirun -n 1 --oversubscribe --allow-run-as-root \
49+ trtllm-serve $MODEL \
50+ --max_batch_size $CONC \
51+ --max_num_tokens 20000 \
52+ --backend pytorch \
53+ --extra_llm_api_options gptoss-config.yml \
54+ --ep_size=$EP_SIZE \
55+ --trust_remote_code \
56+ --gpus_per_node 8 \
57+ --host 0.0.0.0 \
58+ --port $PORT \
59+ --tp_size=$TP \
60+ --pp_size=1 \
61+ 2>&1 | tee $( mktemp /tmp/server-XXXXXX.log) &
5062
63+ # Show server logs til' it is up, then stop showing
5164set +x
52- while IFS= read -r line; do
53- printf ' %s\n' " $line "
54- if [[ " $line " == * " Application startup complete" * ]]; then
55- break
56- fi
57- done < <( tail -F -n0 " $SERVER_LOG " )
65+ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT /health; do
66+ sleep 5
67+ done
68+ pkill -P $$ tee 2> /dev/null
5869
5970set -x
60- git clone https://github.com/kimbochen/bench_serving.git
61- python3 bench_serving/benchmark_serving.py \
71+ BENCH_SERVING_DIR=$( mktemp -d /tmp/bmk-XXXXXX)
72+ git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
73+ python3 $BENCH_SERVING_DIR /benchmark_serving.py \
6274--model $MODEL --backend openai \
6375--base-url http://0.0.0.0:$PORT \
6476--dataset-name random \
0 commit comments