We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent a99755d commit 07fc926Copy full SHA for 07fc926
1 file changed
benchmarks/gptoss_fp4_h100_docker.sh
@@ -29,18 +29,16 @@ vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
29
--gpu-memory-utilization=0.9 \
30
--tensor-parallel-size=$TP \
31
--max-num-seqs=$CONC \
32
---disable-log-requests &
+--disable-log-requests 2>&1 | tee $(mktemp /tmp/server-XXXXXX.log) &
33
34
-SERVER_PID=$!
+VLLM_PID=$!
35
set +x
36
-tail -f /tmp/vllm_server.log &
37
-TAIL_PID=$!
38
39
until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
40
sleep 5
41
done
42
43
-kill $TAIL_PID 2>/dev/null
+pkill -P $$ tee 2>/dev/null
44
45
pip install -q datasets pandas
46
git clone https://github.com/kimbochen/bench_serving.git
0 commit comments