Skip to content

Commit 131b572

Browse files
committed
updating mi300
1 parent cddabf5 commit 131b572

7 files changed

Lines changed: 78 additions & 61 deletions

benchmarks/dsr1_fp8_mi300x_docker.sh

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ fi
2424

2525
export SGLANG_USE_AITER=1
2626

27+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
28+
2729
set -x
2830
python3 -m sglang.launch_server \
2931
--model-path=$MODEL --host=0.0.0.0 --port=$PORT --trust-remote-code \
@@ -33,4 +35,27 @@ python3 -m sglang.launch_server \
3335
--chunked-prefill-size=196608 \
3436
--num-continuous-decode-steps=4 \
3537
--max-prefill-tokens=196608 \
36-
--disable-radix-cache
38+
--disable-radix-cache > $SERVER_LOG 2>&1 &
39+
40+
41+
# Show logs until server is ready
42+
tail -f $SERVER_LOG &
43+
TAIL_PID=$!
44+
set +x
45+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
46+
sleep 5
47+
done
48+
kill $TAIL_PID
49+
50+
set -x
51+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
52+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
53+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
54+
--model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \
55+
--dataset-name=random \
56+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
57+
--num-prompts=$(( $CONC * 10 )) \
58+
--max-concurrency=$CONC \
59+
--request-rate=inf --ignore-eos \
60+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
61+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json

benchmarks/dsr1_fp8_mi300x_slurm.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,19 @@ python3 -m sglang.launch_server \
4747
--disable-radix-cache \
4848
> $SERVER_LOG 2>&1 &
4949

50+
# Show logs until server is ready
51+
tail -f $SERVER_LOG &
52+
TAIL_PID=$!
5053
set +x
51-
while IFS= read -r line; do
52-
printf '%s\n' "$line"
53-
if [[ "$line" == *"The server is fired up and ready to roll"* ]]; then
54-
break
55-
fi
56-
done < <(tail -F -n0 "$SERVER_LOG")
54+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
55+
sleep 5
56+
done
57+
kill $TAIL_PID
5758

5859
set -x
59-
git clone https://github.com/kimbochen/bench_serving.git
60-
python3 bench_serving/benchmark_serving.py \
60+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
61+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
62+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6163
--model=$MODEL --backend=vllm \
6264
--base-url="http://0.0.0.0:$PORT" \
6365
--dataset-name=random \

benchmarks/gptoss_fp4_b200_docker.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ export TORCH_CUDA_ARCH_LIST="10.0"
4343
export VLLM_FLASHINFER_ALLREDUCE_FUSION_THRESHOLDS_MB='{"2":32,"4":32,"8":8}'
4444
export PYTHONNOUSERSITE=1
4545
export VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8=1
46+
4647
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
4748

4849
set -x
@@ -66,7 +67,7 @@ set -x
6667
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
6768
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
6869
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
69-
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
70+
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
7071
--dataset-name random \
7172
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
7273
--num-prompts $NUM_PROMPTS \

benchmarks/gptoss_fp4_mi300x_docker.sh

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ export VLLM_ROCM_USE_AITER_MHA=0
2424
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
2525
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
2626

27+
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
28+
2729
set -x
2830
vllm serve $MODEL --port $PORT \
2931
--tensor-parallel-size=$TP \
@@ -34,4 +36,26 @@ vllm serve $MODEL --port $PORT \
3436
--block-size=64 \
3537
--no-enable-prefix-caching \
3638
--disable-log-requests \
37-
--async-scheduling
39+
--async-scheduling > $SERVER_LOG 2>&1 &
40+
41+
# Show logs until server is ready
42+
tail -f $SERVER_LOG &
43+
TAIL_PID=$!
44+
set +x
45+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
46+
sleep 5
47+
done
48+
kill $TAIL_PID
49+
50+
set -x
51+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
52+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
53+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
54+
--model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \
55+
--dataset-name=random \
56+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
57+
--num-prompts=$(( $CONC * 10 )) \
58+
--max-concurrency=$CONC \
59+
--request-rate=inf --ignore-eos \
60+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
61+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json

benchmarks/gptoss_fp4_mi300x_slurm.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,18 @@ vllm serve $MODEL --port $PORT \
4848
--async-scheduling \
4949
> $SERVER_LOG 2>&1 &
5050

51+
# Show logs until server is ready
52+
tail -f $SERVER_LOG &
53+
TAIL_PID=$!
5154
set +x
52-
while IFS= read -r line; do
53-
printf '%s\n' "$line"
54-
if [[ "$line" == *"Application startup complete"* ]]; then
55-
break
56-
fi
57-
done < <(tail -F -n0 "$SERVER_LOG")
55+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
56+
sleep 5
57+
done
58+
kill $TAIL_PID
5859

59-
set -x
60-
git clone https://github.com/kimbochen/bench_serving.git
61-
python3 bench_serving/benchmark_serving.py \
60+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
61+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
62+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6263
--model $MODEL --backend vllm \
6364
--base-url http://0.0.0.0:$PORT \
6465
--dataset-name random \

runners/launch_b200-nvd.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ set -x
2727

2828
if [[ "$MODEL" == "nvidia/DeepSeek-R1-0528-FP4" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
2929
if [[ "$OSL" == "8192" ]]; then
30-
NUM_PROMPTS=$(( CONC * 20 ))
30+
export NUM_PROMPTS=$(( CONC * 20 ))
3131
else
32-
NUM_PROMPTS=$(( CONC * 50 ))
32+
export NUM_PROMPTS=$(( CONC * 50 ))
3333
fi
3434
else
35-
NUM_PROMPTS=$(( CONC * 10 ))
35+
export NUM_PROMPTS=$(( CONC * 10 ))
3636
fi
3737

3838
docker run --rm --init --network host --name $server_name \

runners/launch_mi300x-amd.sh

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,16 @@ sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
55
HF_HUB_CACHE_MOUNT="/shareddata/hf_hub_cache_$(hostname)/"
66
PORT=8888
77

8-
network_name="bmk-net"
98
server_name="bmk-server"
10-
client_name="bmk-client"
11-
12-
docker network create $network_name
139

1410
set -x
15-
docker run --rm -d --ipc=host --shm-size=16g --network=$network_name --name=$server_name \
11+
docker run --rm --ipc=host --shm-size=16g --name=$server_name \
1612
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
1713
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
1814
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
1915
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
2016
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
21-
-e ISL -e OSL \
17+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
2218
--entrypoint=/bin/bash \
2319
$IMAGE \
2420
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi300x_docker.sh"
25-
26-
set +x
27-
while IFS= read -r line; do
28-
printf '%s\n' "$line"
29-
if [[ "$line" =~ Application\ startup\ complete ]]; then
30-
break
31-
fi
32-
done < <(docker logs -f --tail=0 $server_name 2>&1)
33-
34-
git clone https://github.com/kimbochen/bench_serving.git
35-
36-
set -x
37-
docker run --rm --network=$network_name --name=$client_name \
38-
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
39-
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
40-
--entrypoint=python3 \
41-
$IMAGE \
42-
bench_serving/benchmark_serving.py \
43-
--model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \
44-
--dataset-name=random \
45-
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
46-
--num-prompts=$(( $CONC * 10 )) \
47-
--max-concurrency=$CONC \
48-
--request-rate=inf --ignore-eos \
49-
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
50-
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
51-
52-
while [ -n "$(docker ps -aq)" ]; do
53-
docker stop $server_name
54-
docker network rm $network_name
55-
sleep 5
56-
done

0 commit comments

Comments
 (0)