We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent dc6d76d commit 5e2d39eCopy full SHA for 5e2d39e
1 file changed
benchmarks/gptoss_fp4_h100_docker.sh
@@ -29,10 +29,10 @@ vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
29
--gpu-memory-utilization=0.9 \
30
--tensor-parallel-size=$TP \
31
--max-num-seqs=$CONC \
32
---disable-log-requests
+--disable-log-requests &
33
34
set +x
35
-until curl --output /dev/null --silent --head --fail http://localhost:$PORT/health; do
+until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
36
sleep 5
37
done
38
0 commit comments