Skip to content

Commit 167b1f2

Browse files
committed
initial poc
1 parent 7daaa8b commit 167b1f2

3 files changed

Lines changed: 62 additions & 36 deletions

File tree

benchmarks/gptoss_fp4_h100_docker.sh

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
# MAX_MODEL_LEN
88
# TP
99
# CONC
10+
# ISL
11+
# OSL
12+
1013

1114
cat > config.yaml << EOF
1215
compilation-config: '{"cudagraph_mode":"PIECEWISE"}'
@@ -18,11 +21,35 @@ max-model-len: 10240
1821
EOF
1922

2023
export PYTHONNOUSERSITE=1
24+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2125

2226
set -x
2327
vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
2428
--config config.yaml \
2529
--gpu-memory-utilization=0.9 \
2630
--tensor-parallel-size=$TP \
2731
--max-num-seqs=$CONC \
28-
--disable-log-requests
32+
--disable-log-requests > $SERVER_LOG 2>&1 &
33+
34+
set +x
35+
while IFS= read -r line; do
36+
printf '%s\n' "$line"
37+
if [[ "$line" =~ Application\ startup\ complete ]]; then
38+
break
39+
fi
40+
done < <(tail -F -n0 "$SERVER_LOG")
41+
42+
pip install -q datasets pandas
43+
git clone https://github.com/kimbochen/bench_serving.git
44+
set -x
45+
python3 bench_serving/benchmark_serving.py \
46+
--model=$MODEL \
47+
--backend=vllm \
48+
--base-url=\"http://localhost:$PORT\" \
49+
--dataset-name=random \
50+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
51+
--num-prompts=$(( $CONC * 10 )) --max-concurrency=$CONC \
52+
--request-rate=inf --ignore-eos \
53+
--save-result --percentile-metrics='ttft,tpot,itl,e2el' \
54+
--result-dir=/workspace/ \
55+
--result-filename=$RESULT_FILENAME.json"

benchmarks/gptoss_fp4_h100_slurm.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# === Required Env Vars ===
44
# HF_TOKEN
55
# HF_HUB_CACHE
6-
# IMAGE
76
# MODEL
87
# ISL
98
# OSL

runners/launch_h100-cr.sh

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ HF_HUB_CACHE_MOUNT="/home/ubuntu/hf_hub_cache/"
44
PORT=8888
55

66
server_name="bmk-server"
7-
client_name="bmk-client"
7+
# client_name="bmk-client"
88

99
set -x
1010
docker run --rm -d --network=host --name=$server_name \
@@ -17,38 +17,38 @@ docker run --rm -d --network=host --name=$server_name \
1717
$IMAGE \
1818
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_h100_docker.sh"
1919

20-
set +x
21-
while IFS= read -r line; do
22-
printf '%s\n' "$line"
23-
if [[ "$line" =~ Application\ startup\ complete ]]; then
24-
break
25-
fi
26-
done < <(docker logs -f --tail=0 $server_name 2>&1)
27-
28-
if ! docker ps --format "{{.Names}}" | grep -q "$server_name"; then
29-
echo "Server container launch failed."
30-
exit 1
31-
fi
32-
33-
git clone https://github.com/kimbochen/bench_serving.git
34-
35-
set -x
36-
docker run --rm --network=host --name=$client_name \
37-
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
38-
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
39-
--entrypoint=/bin/bash \
40-
$IMAGE \
41-
-lc "pip install -q datasets pandas && \
42-
python3 bench_serving/benchmark_serving.py \
43-
--model=$MODEL \
44-
--backend=vllm \
45-
--base-url=\"http://localhost:$PORT\" \
46-
--dataset-name=random \
47-
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
48-
--num-prompts=$(( $CONC * 10 )) --max-concurrency=$CONC \
49-
--request-rate=inf --ignore-eos \
50-
--save-result --percentile-metrics='ttft,tpot,itl,e2el' \
51-
--result-dir=/workspace/ \
52-
--result-filename=$RESULT_FILENAME.json"
20+
# set +x
21+
# while IFS= read -r line; do
22+
# printf '%s\n' "$line"
23+
# if [[ "$line" =~ Application\ startup\ complete ]]; then
24+
# break
25+
# fi
26+
# done < <(docker logs -f --tail=0 $server_name 2>&1)
27+
28+
# if ! docker ps --format "{{.Names}}" | grep -q "$server_name"; then
29+
# echo "Server container launch failed."
30+
# exit 1
31+
# fi
32+
33+
# git clone https://github.com/kimbochen/bench_serving.git
34+
35+
# set -x
36+
# docker run --rm --network=host --name=$client_name \
37+
# -v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
38+
# -e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
39+
# --entrypoint=/bin/bash \
40+
# $IMAGE \
41+
# -lc "pip install -q datasets pandas && \
42+
# python3 bench_serving/benchmark_serving.py \
43+
# --model=$MODEL \
44+
# --backend=vllm \
45+
# --base-url=\"http://localhost:$PORT\" \
46+
# --dataset-name=random \
47+
# --random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
48+
# --num-prompts=$(( $CONC * 10 )) --max-concurrency=$CONC \
49+
# --request-rate=inf --ignore-eos \
50+
# --save-result --percentile-metrics='ttft,tpot,itl,e2el' \
51+
# --result-dir=/workspace/ \
52+
# --result-filename=$RESULT_FILENAME.json"
5353

5454
docker stop $server_name

0 commit comments

Comments
 (0)