Skip to content

Commit 7b0cca0

Browse files
committed
add utils function for benchmark
1 parent 9cc83d7 commit 7b0cca0

27 files changed

Lines changed: 512 additions & 301 deletions

benchmarks/benchmark_lib.sh

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#!/usr/bin/env bash
2+
3+
# Shared benchmarking utilities for InferenceMAX
4+
5+
# Run benchmark serving with standardized parameters
6+
# All parameters are required
7+
# Parameters:
8+
# --model: Model name
9+
# --port: Server port
10+
# --backend: Backend type - 'vllm' or 'openai'
11+
# --input-len: Random input sequence length
12+
# --output-len: Random output sequence length
13+
# --random-range-ratio: Random range ratio
14+
# --num-prompts: Number of prompts
15+
# --max-concurrency: Max concurrency
16+
# --result-filename: Result filename without extension
17+
# --result-dir: Result directory
18+
run_benchmark_serving() {
19+
local model=""
20+
local port=""
21+
local backend=""
22+
local input_len=""
23+
local output_len=""
24+
local random_range_ratio=""
25+
local num_prompts=""
26+
local max_concurrency=""
27+
local result_filename=""
28+
local result_dir=""
29+
30+
# Parse arguments
31+
while [[ $# -gt 0 ]]; do
32+
case $1 in
33+
--model)
34+
model="$2"
35+
shift 2
36+
;;
37+
--port)
38+
port="$2"
39+
shift 2
40+
;;
41+
--backend)
42+
backend="$2"
43+
shift 2
44+
;;
45+
--input-len)
46+
input_len="$2"
47+
shift 2
48+
;;
49+
--output-len)
50+
output_len="$2"
51+
shift 2
52+
;;
53+
--random-range-ratio)
54+
random_range_ratio="$2"
55+
shift 2
56+
;;
57+
--num-prompts)
58+
num_prompts="$2"
59+
shift 2
60+
;;
61+
--max-concurrency)
62+
max_concurrency="$2"
63+
shift 2
64+
;;
65+
--result-filename)
66+
result_filename="$2"
67+
shift 2
68+
;;
69+
--result-dir)
70+
result_dir="$2"
71+
shift 2
72+
;;
73+
*)
74+
echo "Unknown parameter: $1"
75+
return 1
76+
;;
77+
esac
78+
done
79+
80+
# Validate all required parameters
81+
if [[ -z "$model" ]]; then
82+
echo "Error: --model is required"
83+
return 1
84+
fi
85+
if [[ -z "$port" ]]; then
86+
echo "Error: --port is required"
87+
return 1
88+
fi
89+
if [[ -z "$backend" ]]; then
90+
echo "Error: --backend is required"
91+
return 1
92+
fi
93+
if [[ -z "$input_len" ]]; then
94+
echo "Error: --input-len is required"
95+
return 1
96+
fi
97+
if [[ -z "$output_len" ]]; then
98+
echo "Error: --output-len is required"
99+
return 1
100+
fi
101+
if [[ -z "$random_range_ratio" ]]; then
102+
echo "Error: --random-range-ratio is required"
103+
return 1
104+
fi
105+
if [[ -z "$num_prompts" ]]; then
106+
echo "Error: --num-prompts is required"
107+
return 1
108+
fi
109+
if [[ -z "$max_concurrency" ]]; then
110+
echo "Error: --max-concurrency is required"
111+
return 1
112+
fi
113+
if [[ -z "$result_filename" ]]; then
114+
echo "Error: --result-filename is required"
115+
return 1
116+
fi
117+
if [[ -z "$result_dir" ]]; then
118+
echo "Error: --result-dir is required"
119+
return 1
120+
fi
121+
122+
# Clone benchmark serving repo
123+
local BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
124+
git clone https://github.com/kimbochen/bench_serving.git "$BENCH_SERVING_DIR"
125+
126+
# Run benchmark
127+
python3 "$BENCH_SERVING_DIR/benchmark_serving.py" \
128+
--model "$model" \
129+
--backend "$backend" \
130+
--base-url "http://0.0.0.0:$port" \
131+
--dataset-name random \
132+
--random-input-len "$input_len" \
133+
--random-output-len "$output_len" \
134+
--random-range-ratio "$random_range_ratio" \
135+
--num-prompts "$num_prompts" \
136+
--max-concurrency "$max_concurrency" \
137+
--request-rate inf \
138+
--ignore-eos \
139+
--save-result \
140+
--percentile-metrics 'ttft,tpot,itl,e2el' \
141+
--result-dir "$result_dir" \
142+
--result-filename "$result_filename.json"
143+
}

benchmarks/dsr1_fp4_b200_docker.sh

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,20 @@ done
3636
kill $TAIL_PID
3737

3838
pip install -q datasets pandas
39+
40+
# Source benchmark utilities
41+
source "$(dirname "$0")/benchmark_lib.sh"
42+
3943
set -x
40-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
41-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
42-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
43-
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
44-
--dataset-name random \
45-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
46-
--num-prompts $NUM_PROMPTS \
47-
--max-concurrency $CONC \
48-
--request-rate inf --ignore-eos \
49-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
50-
--result-dir /workspace/ --result-filename $RESULT_FILENAME.json
44+
run_benchmark_serving \
45+
--model "$MODEL" \
46+
--port "$PORT" \
47+
--backend vllm \
48+
--input-len "$ISL" \
49+
--output-len "$OSL" \
50+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
51+
--num-prompts "$NUM_PROMPTS" \
52+
--max-concurrency "$CONC" \
53+
--result-filename "$RESULT_FILENAME" \
54+
--result-dir /workspace/
5155

benchmarks/dsr1_fp4_b200_trt_slurm.sh

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -110,16 +110,18 @@ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
110110
done
111111
kill $TAIL_PID
112112

113+
# Source benchmark utilities
114+
source "$(dirname "$0")/benchmark_lib.sh"
115+
113116
set -x
114-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
115-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
116-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
117-
--model $MODEL --backend openai \
118-
--base-url http://0.0.0.0:$PORT \
119-
--dataset-name random \
120-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
121-
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
122-
--request-rate inf --ignore-eos \
123-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
124-
--result-dir /workspace/ \
125-
--result-filename $RESULT_FILENAME.json
117+
run_benchmark_serving \
118+
--model "$MODEL" \
119+
--port "$PORT" \
120+
--backend openai \
121+
--input-len "$ISL" \
122+
--output-len "$OSL" \
123+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
124+
--num-prompts $(( $CONC * 10 )) \
125+
--max-concurrency "$CONC" \
126+
--result-filename "$RESULT_FILENAME" \
127+
--result-dir /workspace/

benchmarks/dsr1_fp4_mi355x_docker.sh

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,20 @@ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
4040
done
4141
kill $TAIL_PID
4242

43+
# Source benchmark utilities
44+
source "$(dirname "$0")/benchmark_lib.sh"
45+
4346
set -x
44-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
45-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
46-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
47-
--model=$MODEL --backend=vllm --base-url="http://localhost:$PORT" \
48-
--dataset-name=random \
49-
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
50-
--num-prompts=$NUM_PROMPTS \
51-
--max-concurrency=$CONC \
52-
--request-rate=inf --ignore-eos \
53-
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
54-
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
47+
run_benchmark_serving \
48+
--model "$MODEL" \
49+
--port "$PORT" \
50+
--backend vllm \
51+
--input-len "$ISL" \
52+
--output-len "$OSL" \
53+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
54+
--num-prompts "$NUM_PROMPTS" \
55+
--max-concurrency "$CONC" \
56+
--result-filename "$RESULT_FILENAME" \
57+
--result-dir /workspace/
5558

5659

benchmarks/dsr1_fp4_mi355x_slurm.sh

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,19 @@ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
4343
done
4444
kill $TAIL_PID
4545

46+
# Source benchmark utilities
47+
source "$(dirname "$0")/benchmark_lib.sh"
48+
4649
set -x
47-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
48-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
49-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
50-
--model $MODEL --backend vllm \
51-
--base-url "http://0.0.0.0:$PORT" \
52-
--dataset-name random \
53-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
54-
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
55-
--request-rate inf --ignore-eos \
56-
--save-result --percentile-metrics "ttft,tpot,itl,e2el" \
57-
--result-dir /workspace/ --result-filename $RESULT_FILENAME.json
50+
run_benchmark_serving \
51+
--model "$MODEL" \
52+
--port "$PORT" \
53+
--backend vllm \
54+
--input-len "$ISL" \
55+
--output-len "$OSL" \
56+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
57+
--num-prompts $(( $CONC * 10 )) \
58+
--max-concurrency "$CONC" \
59+
--result-filename "$RESULT_FILENAME" \
60+
--result-dir /workspace/
5861

benchmarks/dsr1_fp8_b200_docker.sh

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,19 @@ done
4747
kill $TAIL_PID
4848

4949
pip install -q datasets pandas
50+
51+
# Source benchmark utilities
52+
source "$(dirname "$0")/benchmark_lib.sh"
53+
5054
set -x
51-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
52-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
53-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
54-
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
55-
--dataset-name random \
56-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
57-
--num-prompts $NUM_PROMPTS \
58-
--max-concurrency $CONC \
59-
--request-rate inf --ignore-eos \
60-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
61-
--result-dir /workspace/ --result-filename $RESULT_FILENAME.json
55+
run_benchmark_serving \
56+
--model "$MODEL" \
57+
--port "$PORT" \
58+
--backend vllm \
59+
--input-len "$ISL" \
60+
--output-len "$OSL" \
61+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
62+
--num-prompts "$NUM_PROMPTS" \
63+
--max-concurrency "$CONC" \
64+
--result-filename "$RESULT_FILENAME" \
65+
--result-dir /workspace/

benchmarks/dsr1_fp8_b200_trt_slurm.sh

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,18 @@ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
8181
done
8282
kill $TAIL_PID
8383

84+
# Source benchmark utilities
85+
source "$(dirname "$0")/benchmark_lib.sh"
86+
8487
set -x
85-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
86-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
87-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
88-
--model $MODEL --backend openai \
89-
--base-url http://0.0.0.0:$PORT \
90-
--dataset-name random \
91-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
92-
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
93-
--request-rate inf --ignore-eos \
94-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
95-
--result-dir /workspace/ \
96-
--result-filename $RESULT_FILENAME.json
88+
run_benchmark_serving \
89+
--model "$MODEL" \
90+
--port "$PORT" \
91+
--backend openai \
92+
--input-len "$ISL" \
93+
--output-len "$OSL" \
94+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
95+
--num-prompts $(( $CONC * 10 )) \
96+
--max-concurrency "$CONC" \
97+
--result-filename "$RESULT_FILENAME" \
98+
--result-dir /workspace/

benchmarks/dsr1_fp8_h200_slurm.sh

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,18 @@ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
5353
done
5454
kill $TAIL_PID
5555

56+
# Source benchmark utilities
57+
source "$(dirname "$0")/benchmark_lib.sh"
58+
5659
set -x
57-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
58-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
59-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
60-
--model $MODEL --backend vllm \
61-
--base-url http://0.0.0.0:$PORT \
62-
--dataset-name random \
63-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
64-
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
65-
--request-rate inf --ignore-eos \
66-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
67-
--result-dir /workspace/ \
68-
--result-filename $RESULT_FILENAME.json
60+
run_benchmark_serving \
61+
--model "$MODEL" \
62+
--port "$PORT" \
63+
--backend vllm \
64+
--input-len "$ISL" \
65+
--output-len "$OSL" \
66+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
67+
--num-prompts $(( $CONC * 10 )) \
68+
--max-concurrency "$CONC" \
69+
--result-filename "$RESULT_FILENAME" \
70+
--result-dir /workspace/

benchmarks/dsr1_fp8_h200_trt_slurm.sh

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,18 @@ until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
8585
done
8686
kill $TAIL_PID
8787

88+
# Source benchmark utilities
89+
source "$(dirname "$0")/benchmark_lib.sh"
90+
8891
set -x
89-
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
90-
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
91-
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
92-
--model $MODEL --backend openai \
93-
--base-url http://0.0.0.0:$PORT \
94-
--dataset-name random \
95-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
96-
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
97-
--request-rate inf --ignore-eos \
98-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
99-
--result-dir /workspace/ \
100-
--result-filename $RESULT_FILENAME.json
92+
run_benchmark_serving \
93+
--model "$MODEL" \
94+
--port "$PORT" \
95+
--backend openai \
96+
--input-len "$ISL" \
97+
--output-len "$OSL" \
98+
--random-range-ratio "$RANDOM_RANGE_RATIO" \
99+
--num-prompts $(( $CONC * 10 )) \
100+
--max-concurrency "$CONC" \
101+
--result-filename "$RESULT_FILENAME" \
102+
--result-dir /workspace/

0 commit comments

Comments
 (0)