Skip to content

Commit 634768c

Browse files
committed
fixing mi300x and updating 325x
1 parent c0a5c62 commit 634768c

6 files changed

Lines changed: 74 additions & 58 deletions

benchmarks/dsr1_fp8_mi325x_docker.sh

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
export SGLANG_USE_AITER=1
1616

17+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
18+
1719
python3 -m sglang.launch_server \
1820
--model-path $MODEL \
1921
--host=0.0.0.0 \
@@ -24,5 +26,27 @@ python3 -m sglang.launch_server \
2426
--mem-fraction-static 0.8 --disable-radix-cache \
2527
--num-continuous-decode-steps 4 \
2628
--max-prefill-tokens 196608 \
27-
--cuda-graph-max-bs 128
29+
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
30+
31+
# Show logs until server is ready
32+
tail -f $SERVER_LOG &
33+
TAIL_PID=$!
34+
set +x
35+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
36+
sleep 5
37+
done
38+
kill $TAIL_PID
39+
40+
set -x
41+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
42+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
43+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
44+
--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \
45+
--dataset-name=random \
46+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
47+
--num-prompts=$(( $CONC * 10 )) \
48+
--max-concurrency=$CONC \
49+
--request-rate=inf --ignore-eos \
50+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
51+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
2852

benchmarks/dsr1_fp8_mi325x_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,19 @@ python3 -m sglang.launch_server \
2323
--disable-radix-cache \
2424
> $SERVER_LOG 2>&1 &
2525

26+
# Show logs until server is ready
27+
tail -f $SERVER_LOG &
28+
TAIL_PID=$!
2629
set +x
27-
while IFS= read -r line; do
28-
printf '%s\n' "$line"
29-
if [[ "$line" == *"The server is fired up and ready to roll"* ]]; then
30-
break
31-
fi
32-
done < <(tail -F -n0 "$SERVER_LOG")
30+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
31+
sleep 5
32+
done
33+
kill $TAIL_PID
3334

3435
set -x
35-
git clone https://github.com/kimbochen/bench_serving.git
36-
python3 bench_serving/benchmark_serving.py \
36+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
37+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
38+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
3739
--model $MODEL --backend vllm \
3840
--base-url http://0.0.0.0:$PORT \
3941
--dataset-name random \

benchmarks/gptoss_fp4_mi300x_docker.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ set -x
5151
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
5252
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
5353
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
54-
--model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \
54+
--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \
5555
--dataset-name=random \
5656
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
5757
--num-prompts=$(( $CONC * 10 )) \
5858
--max-concurrency=$CONC \
5959
--request-rate=inf --ignore-eos \
6060
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
61-
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
61+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json

benchmarks/gptoss_fp4_mi325x_docker.sh

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ export VLLM_USE_AITER_UNIFIED_ATTENTION=1
2323
export VLLM_ROCM_USE_AITER_MHA=0
2424
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
2525

26+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
27+
2628
set -x
2729
vllm serve $MODEL --port $PORT \
2830
--tensor-parallel-size=$TP \
@@ -33,4 +35,26 @@ vllm serve $MODEL --port $PORT \
3335
--block-size=64 \
3436
--no-enable-prefix-caching \
3537
--disable-log-requests \
36-
--async-scheduling
38+
--async-scheduling > $SERVER_LOG 2>&1 &
39+
40+
# Show logs until server is ready
41+
tail -f $SERVER_LOG &
42+
TAIL_PID=$!
43+
set +x
44+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
45+
sleep 5
46+
done
47+
kill $TAIL_PID
48+
49+
set -x
50+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
51+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
52+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
53+
--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \
54+
--dataset-name=random \
55+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
56+
--num-prompts=$(( $CONC * 10 )) \
57+
--max-concurrency=$CONC \
58+
--request-rate=inf --ignore-eos \
59+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
60+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json

benchmarks/gptoss_fp4_mi325x_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,19 @@ vllm serve $MODEL --port $PORT \
4848
--async-scheduling \
4949
> $SERVER_LOG 2>&1 &
5050

51+
# Show logs until server is ready
52+
tail -f $SERVER_LOG &
53+
TAIL_PID=$!
5154
set +x
52-
while IFS= read -r line; do
53-
printf '%s\n' "$line"
54-
if [[ "$line" == *"Application startup complete"* ]]; then
55-
break
56-
fi
57-
done < <(tail -F -n0 "$SERVER_LOG")
55+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
56+
sleep 5
57+
done
58+
kill $TAIL_PID
5859

5960
set -x
60-
git clone https://github.com/kimbochen/bench_serving.git
61-
python3 bench_serving/benchmark_serving.py \
61+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
62+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
63+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6264
--model $MODEL --backend vllm \
6365
--base-url http://0.0.0.0:$PORT \
6466
--dataset-name random \

runners/launch_mi325x-amd.sh

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,16 @@ sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
55
HF_HUB_CACHE_MOUNT="/home/kimbosemianalysis/hf_hub_cache/"
66
PORT=8888
77

8-
network_name="bmk-net"
98
server_name="bmk-server"
10-
client_name="bmk-client"
11-
12-
docker network create $network_name
139

1410
set -x
15-
docker run --rm -d --ipc=host --shm-size=16g --network=$network_name --name=$server_name \
11+
docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
1612
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
1713
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
1814
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
1915
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
2016
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
21-
-e ISL -e OSL \
17+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
2218
--entrypoint=/bin/bash \
2319
$IMAGE \
2420
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi325x_docker.sh"
25-
26-
set +x
27-
while IFS= read -r line; do
28-
printf '%s\n' "$line"
29-
if [[ "$line" =~ Application\ startup\ complete ]]; then
30-
break
31-
fi
32-
done < <(docker logs -f --tail=0 $server_name 2>&1)
33-
34-
git clone https://github.com/kimbochen/bench_serving.git
35-
36-
set -x
37-
docker run --rm --network=$network_name --name=$client_name \
38-
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
39-
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
40-
--entrypoint=python3 \
41-
$IMAGE \
42-
bench_serving/benchmark_serving.py \
43-
--model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \
44-
--dataset-name=random \
45-
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
46-
--num-prompts=$(( $CONC * 10 )) \
47-
--max-concurrency=$CONC \
48-
--request-rate=inf --ignore-eos \
49-
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
50-
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
51-
52-
while [ -n "$(docker ps -aq)" ]; do
53-
docker stop $server_name
54-
docker network rm $network_name
55-
sleep 5
56-
done

0 commit comments

Comments
 (0)