Skip to content

Commit 6b37939

Browse files
committed
adding mi355x refactor
1 parent 7f42d70 commit 6b37939

4 files changed

Lines changed: 76 additions & 60 deletions

File tree

benchmarks/dsr1_fp8_mi355x_docker.sh

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,36 @@ python3 -m sglang.launch_server \
2424
--mem-fraction-static 0.8 --disable-radix-cache \
2525
--num-continuous-decode-steps 4 \
2626
--max-prefill-tokens 196608 \
27-
--cuda-graph-max-bs 128
27+
--cuda-graph-max-bs 128 | tee $(mktemp /tmp/server-XXXXXX.log) &
28+
29+
set +x
30+
until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
31+
sleep 5
32+
done
33+
pkill -P $$ tee 2>/dev/null
34+
35+
if [[ "$MODEL" == "amd/DeepSeek-R1-0528-MXFP4-Preview" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
36+
if [[ "$OSL" == "8192" ]]; then
37+
NUM_PROMPTS=$(( CONC * 20 ))
38+
else
39+
NUM_PROMPTS=$(( CONC * 50 ))
40+
fi
41+
else
42+
NUM_PROMPTS=$(( CONC * 10 ))
43+
fi
44+
45+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
46+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
47+
set -x
48+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
49+
--model=$MODEL --backend=vllm --base-url="http://localhost:$PORT" \
50+
--dataset-name=random \
51+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
52+
--num-prompts=$NUM_PROMPTS \
53+
--max-concurrency=$CONC \
54+
--request-rate=inf --ignore-eos \
55+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
56+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
57+
58+
2859

benchmarks/gptoss_fp4_h100_docker.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ vllm serve $MODEL --host=0.0.0.0 --port=$PORT \
3232
--disable-log-requests 2>&1 | tee $(mktemp /tmp/server-XXXXXX.log) &
3333

3434
# Show server logs til' it is up, then stop showing
35-
VLLM_PID=$!
3635
set +x
3736
until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
3837
sleep 5

benchmarks/gptoss_fp4_mi355x_docker.sh

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
# TP
99
# CONC
1010
# MAX_MODEL_LEN
11+
# RANDOM_RANGE_RATIO
12+
# RESULT_FILENAME
1113

1214
cat > config.yaml << EOF
1315
compilation-config: '{"compile_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,256,512,1024,2048,8192] , "cudagraph_capture_sizes":[1,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,136,144,152,160,168,176,184,192,200,208,216,224,232,240,248,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,520,528,536,544,552,560,568,576,584,592,600,608,616,624,632,640,648,656,664,672,680,688,696,704,712,720,728,736,744,752,760,768,776,784,792,800,808,816,824,832,840,848,856,864,872,880,888,896,904,912,920,928,936,944,952,960,968,976,984,992,1000,1008,1016,1024,2048,4096,8192] , "cudagraph_mode": "FULL_AND_PIECEWISE"}'
@@ -33,32 +35,16 @@ vllm serve $MODEL --port $PORT \
3335
--async-scheduling | tee $(mktemp /tmp/server-XXXXXX.log) &
3436

3537
# Show server logs til' it is up, then stop showing
36-
VLLM_PID=$!
3738
set +x
3839
until curl --output /dev/null --silent --fail http://localhost:$PORT/health; do
3940
sleep 5
4041
done
4142
pkill -P $$ tee 2>/dev/null
4243

43-
if [[ "$MODEL" == "amd/DeepSeek-R1-0528-MXFP4-Preview" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
44-
if [[ "$OSL" == "8192" ]]; then
45-
NUM_PROMPTS=$(( CONC * 20 ))
46-
else
47-
NUM_PROMPTS=$(( CONC * 50 ))
48-
fi
49-
else
50-
NUM_PROMPTS=$(( CONC * 10 ))
51-
fi
52-
53-
git clone https://github.com/kimbochen/bench_serving.git
54-
44+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
45+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
5546
set -x
56-
docker run --rm --network=$network_name --name=$client_name \
57-
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
58-
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
59-
--entrypoint=python3 \
60-
$IMAGE \
61-
bench_serving/benchmark_serving.py \
47+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6248
--model=$MODEL --backend=vllm --base-url="http://localhost:$PORT" \
6349
--dataset-name=random \
6450
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \

runners/launch_mi355x-amd.sh

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -24,61 +24,61 @@ server_name="bmk-server"
2424
# docker network create $network_name
2525

2626
set -x
27-
docker run --rm -d --ipc=host --shm-size=16g --network=host --name=$server_name \
27+
docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
2828
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
2929
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
3030
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
3131
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
3232
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
33-
-e ISL -e OSL \
33+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
3434
--entrypoint=/bin/bash \
3535
$IMAGE \
3636
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi355x_docker.sh"
3737

38-
set +x
39-
while IFS= read -r line; do
40-
printf '%s\n' "$line"
41-
if [[ "$line" =~ Application\ startup\ complete ]]; then
42-
break
43-
fi
44-
done < <(docker logs -f --tail=0 $server_name 2>&1)
38+
# set +x
39+
# while IFS= read -r line; do
40+
# printf '%s\n' "$line"
41+
# if [[ "$line" =~ Application\ startup\ complete ]]; then
42+
# break
43+
# fi
44+
# done < <(docker logs -f --tail=0 $server_name 2>&1)
4545

46-
if [[ "$MODEL" == "amd/DeepSeek-R1-0528-MXFP4-Preview" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
47-
if [[ "$OSL" == "8192" ]]; then
48-
NUM_PROMPTS=$(( CONC * 20 ))
49-
else
50-
NUM_PROMPTS=$(( CONC * 50 ))
51-
fi
52-
else
53-
NUM_PROMPTS=$(( CONC * 10 ))
54-
fi
46+
# if [[ "$MODEL" == "amd/DeepSeek-R1-0528-MXFP4-Preview" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
47+
# if [[ "$OSL" == "8192" ]]; then
48+
# NUM_PROMPTS=$(( CONC * 20 ))
49+
# else
50+
# NUM_PROMPTS=$(( CONC * 50 ))
51+
# fi
52+
# else
53+
# NUM_PROMPTS=$(( CONC * 10 ))
54+
# fi
5555

56-
git clone https://github.com/kimbochen/bench_serving.git
56+
# git clone https://github.com/kimbochen/bench_serving.git
5757

58-
set -x
59-
docker run --rm --network=$network_name --name=$client_name \
60-
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
61-
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
62-
--entrypoint=python3 \
63-
$IMAGE \
64-
bench_serving/benchmark_serving.py \
65-
--model=$MODEL --backend=vllm --base-url="http://$server_name:$PORT" \
66-
--dataset-name=random \
67-
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
68-
--num-prompts=$NUM_PROMPTS \
69-
--max-concurrency=$CONC \
70-
--request-rate=inf --ignore-eos \
71-
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
72-
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
58+
# set -x
59+
# docker run --rm --network=$network_name --name=$client_name \
60+
# -v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
61+
# -e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
62+
# --entrypoint=python3 \
63+
# $IMAGE \
64+
# bench_serving/benchmark_serving.py \
65+
# --model=$MODEL --backend=vllm --base-url="http://$server_name:$PORT" \
66+
# --dataset-name=random \
67+
# --random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
68+
# --num-prompts=$NUM_PROMPTS \
69+
# --max-concurrency=$CONC \
70+
# --request-rate=inf --ignore-eos \
71+
# --save-result --percentile-metrics="ttft,tpot,itl,e2el" \
72+
# --result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
7373

7474
if ls gpucore.* 1> /dev/null 2>&1; then
7575
echo "gpucore files exist. not good"
7676
rm -f gpucore.*
7777
fi
7878

7979

80-
while [ -n "$(docker ps -aq)" ]; do
81-
docker stop $server_name
82-
docker network rm $network_name
83-
sleep 5
84-
done
80+
# while [ -n "$(docker ps -aq)" ]; do
81+
# docker stop $server_name
82+
# # docker network rm $network_name
83+
# sleep 5
84+
# done

0 commit comments

Comments
 (0)