|
1 | 1 | #!/usr/bin/env bash |
2 | 2 |
|
3 | | -# Source benchmark utilities early |
4 | | -source "$(dirname "$0")/benchmark_lib.sh" |
5 | | - |
6 | | -check_env_vars \ |
7 | | - MODEL \ |
8 | | - PORT \ |
9 | | - TP \ |
10 | | - CONC \ |
11 | | - ISL \ |
12 | | - OSL \ |
13 | | - MAX_MODEL_LEN \ |
14 | | - RANDOM_RANGE_RATIO \ |
15 | | - RESULT_FILENAME |
| 3 | +# ========= Required Env Vars ========= |
| 4 | +# HF_TOKEN |
| 5 | +# HF_HUB_CACHE |
| 6 | +# MODEL |
| 7 | +# PORT |
| 8 | +# TP |
| 9 | +# CONC |
| 10 | +# MAX_MODEL_LEN |
16 | 11 |
|
17 | 12 | # If the machine runs a MEC FW older than 177, RCCL |
18 | 13 | # cannot reclaim some memory. |
@@ -42,19 +37,24 @@ vllm serve $MODEL --port $PORT \ |
42 | 37 | --disable-log-requests \ |
43 | 38 | --async-scheduling > $SERVER_LOG 2>&1 & |
44 | 39 |
|
45 | | -SERVER_PID=$! |
46 | | - |
47 | | -# Wait for server to be ready |
48 | | -wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" |
| 40 | +# Show logs until server is ready |
| 41 | +tail -f $SERVER_LOG & |
| 42 | +TAIL_PID=$! |
| 43 | +set +x |
| 44 | +until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do |
| 45 | + sleep 5 |
| 46 | +done |
| 47 | +kill $TAIL_PID |
49 | 48 |
|
50 | | -run_benchmark_serving \ |
51 | | - --model "$MODEL" \ |
52 | | - --port "$PORT" \ |
53 | | - --backend vllm \ |
54 | | - --input-len "$ISL" \ |
55 | | - --output-len "$OSL" \ |
56 | | - --random-range-ratio "$RANDOM_RANGE_RATIO" \ |
57 | | - --num-prompts $(( $CONC * 10 )) \ |
58 | | - --max-concurrency "$CONC" \ |
59 | | - --result-filename "$RESULT_FILENAME" \ |
60 | | - --result-dir /workspace/ |
| 49 | +set -x |
| 50 | +BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX) |
| 51 | +git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR |
| 52 | +python3 $BENCH_SERVING_DIR/benchmark_serving.py \ |
| 53 | +--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \ |
| 54 | +--dataset-name=random \ |
| 55 | +--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \ |
| 56 | +--num-prompts=$(( $CONC * 10 )) \ |
| 57 | +--max-concurrency=$CONC \ |
| 58 | +--request-rate=inf --ignore-eos \ |
| 59 | +--save-result --percentile-metrics="ttft,tpot,itl,e2el" \ |
| 60 | +--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json |
0 commit comments