@@ -30,4 +30,40 @@ vllm serve $MODEL --port $PORT \
3030--block-size=64 \
3131--no-enable-prefix-caching \
3232--disable-log-requests \
33- --async-scheduling
33+ --async-scheduling | tee $( mktemp /tmp/server-XXXXXX.log) &
34+
35+ # Show server logs til' it is up, then stop showing
36+ VLLM_PID=$!
37+ set +x
38+ until curl --output /dev/null --silent --fail http://localhost:$PORT /health; do
39+ sleep 5
40+ done
41+ pkill -P $$ tee 2> /dev/null
42+
43+ if [[ " $MODEL " == " amd/DeepSeek-R1-0528-MXFP4-Preview" || " $MODEL " == " deepseek-ai/DeepSeek-R1-0528" ]]; then
44+ if [[ " $OSL " == " 8192" ]]; then
45+ NUM_PROMPTS=$(( CONC * 20 ))
46+ else
47+ NUM_PROMPTS=$(( CONC * 50 ))
48+ fi
49+ else
50+ NUM_PROMPTS=$(( CONC * 10 ))
51+ fi
52+
53+ git clone https://github.com/kimbochen/bench_serving.git
54+
55+ set -x
56+ docker run --rm --network=$network_name --name=$client_name \
57+ -v $GITHUB_WORKSPACE :/workspace/ -w /workspace/ \
58+ -e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
59+ --entrypoint=python3 \
60+ $IMAGE \
61+ bench_serving/benchmark_serving.py \
62+ --model=$MODEL --backend=vllm --base-url=" http://localhost:$PORT " \
63+ --dataset-name=random \
64+ --random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
65+ --num-prompts=$NUM_PROMPTS \
66+ --max-concurrency=$CONC \
67+ --request-rate=inf --ignore-eos \
68+ --save-result --percentile-metrics=" ttft,tpot,itl,e2el" \
69+ --result-dir=/workspace/ --result-filename=$RESULT_FILENAME .json
0 commit comments