Skip to content

Commit fb34a45

Browse files
committed
adding h200 initial refactor
1 parent 6b37939 commit fb34a45

1 file changed

Lines changed: 23 additions & 11 deletions

File tree

benchmarks/gptoss_fp4_h200_trt_slurm.sh

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
2020

2121
hf download $MODEL
22-
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
22+
# SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2323
PORT=$(( 8888 + $PORT_OFFSET ))
2424

2525

@@ -45,20 +45,32 @@ stream_interval: 20
4545
EOF
4646

4747
#mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --tp_size $TP --trust_remote_code --max_seq_len $MAX_MODEL_LEN --max_num_tokens $MAX_MODEL_LEN --num_postprocess_workers 2 --extra_llm_api_options llama-config.yml --port $PORT > $SERVER_LOG 2>&1 &
48-
mpirun -n 1 --oversubscribe --allow-run-as-root trtllm-serve $MODEL --max_batch_size $CONC --max_num_tokens 20000 --backend pytorch --extra_llm_api_options gptoss-config.yml --ep_size=$EP_SIZE --trust_remote_code --gpus_per_node 8 --host 0.0.0.0 --port $PORT --tp_size=$TP --pp_size=1 > $SERVER_LOG 2>&1 &
49-
48+
mpirun -n 1 --oversubscribe --allow-run-as-root \
49+
trtllm-serve $MODEL \
50+
--max_batch_size $CONC \
51+
--max_num_tokens 20000 \
52+
--backend pytorch \
53+
--extra_llm_api_options gptoss-config.yml \
54+
--ep_size=$EP_SIZE \
55+
--trust_remote_code \
56+
--gpus_per_node 8 \
57+
--host 0.0.0.0 \
58+
--port $PORT \
59+
--tp_size=$TP \
60+
--pp_size=1 \
61+
2>&1 | tee $(mktemp /tmp/server-XXXXXX.log) &
5062

63+
# Show server logs til' it is up, then stop showing
5164
set +x
52-
while IFS= read -r line; do
53-
printf '%s\n' "$line"
54-
if [[ "$line" == *"Application startup complete"* ]]; then
55-
break
56-
fi
57-
done < <(tail -F -n0 "$SERVER_LOG")
65+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
66+
sleep 5
67+
done
68+
pkill -P $$ tee 2>/dev/null
5869

5970
set -x
60-
git clone https://github.com/kimbochen/bench_serving.git
61-
python3 bench_serving/benchmark_serving.py \
71+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
72+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
73+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6274
--model $MODEL --backend openai \
6375
--base-url http://0.0.0.0:$PORT \
6476
--dataset-name random \

0 commit comments

Comments
 (0)