Skip to content

Commit 905589a

Browse files
committed
fixing mi300x and updating 325x
1 parent 1d92a47 commit 905589a

6 files changed

Lines changed: 130 additions & 131 deletions
Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
#!/usr/bin/env bash
22

3-
# Source benchmark utilities early
4-
source "$(dirname "$0")/benchmark_lib.sh"
5-
6-
check_env_vars \
7-
MODEL \
8-
PORT \
9-
TP \
10-
CONC \
11-
ISL \
12-
OSL \
13-
RANDOM_RANGE_RATIO \
14-
RESULT_FILENAME
3+
# ========= Required Env Vars =========
4+
# HF_TOKEN
5+
# HF_HUB_CACHE
6+
# MODEL
7+
# PORT
8+
# TP
9+
# CONC
10+
# MAX_MODEL_LEN
1511

1612
# Reference
1713
# https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html
@@ -32,19 +28,25 @@ python3 -m sglang.launch_server \
3228
--max-prefill-tokens 196608 \
3329
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
3430

35-
SERVER_PID=$!
36-
37-
# Wait for server to be ready
38-
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
39-
40-
run_benchmark_serving \
41-
--model "$MODEL" \
42-
--port "$PORT" \
43-
--backend vllm \
44-
--input-len "$ISL" \
45-
--output-len "$OSL" \
46-
--random-range-ratio "$RANDOM_RANGE_RATIO" \
47-
--num-prompts $(( $CONC * 10 )) \
48-
--max-concurrency "$CONC" \
49-
--result-filename "$RESULT_FILENAME" \
50-
--result-dir /workspace/
31+
# Show logs until server is ready
32+
tail -f $SERVER_LOG &
33+
TAIL_PID=$!
34+
set +x
35+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
36+
sleep 5
37+
done
38+
kill $TAIL_PID
39+
40+
set -x
41+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
42+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
43+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
44+
--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \
45+
--dataset-name=random \
46+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
47+
--num-prompts=$(( $CONC * 10 )) \
48+
--max-concurrency=$CONC \
49+
--request-rate=inf --ignore-eos \
50+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
51+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
52+
Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,5 @@
11
#!/usr/bin/bash
22

3-
# Source benchmark utilities early
4-
source "$(dirname "$0")/benchmark_lib.sh"
5-
6-
check_env_vars \
7-
MODEL \
8-
TP \
9-
CONC \
10-
ISL \
11-
OSL \
12-
RANDOM_RANGE_RATIO \
13-
RESULT_FILENAME
14-
153
echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
164

175
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
@@ -35,19 +23,25 @@ python3 -m sglang.launch_server \
3523
--disable-radix-cache \
3624
> $SERVER_LOG 2>&1 &
3725

38-
SERVER_PID=$!
26+
# Show logs until server is ready
27+
tail -f $SERVER_LOG &
28+
TAIL_PID=$!
29+
set +x
30+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
31+
sleep 5
32+
done
33+
kill $TAIL_PID
3934

40-
# Wait for server to be ready
41-
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
42-
43-
run_benchmark_serving \
44-
--model "$MODEL" \
45-
--port "$PORT" \
46-
--backend vllm \
47-
--input-len "$ISL" \
48-
--output-len "$OSL" \
49-
--random-range-ratio "$RANDOM_RANGE_RATIO" \
50-
--num-prompts $(( $CONC * 10 )) \
51-
--max-concurrency "$CONC" \
52-
--result-filename "$RESULT_FILENAME" \
53-
--result-dir /workspace/
35+
set -x
36+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
37+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
38+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
39+
--model $MODEL --backend vllm \
40+
--base-url http://0.0.0.0:$PORT \
41+
--dataset-name random \
42+
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
43+
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
44+
--request-rate inf --ignore-eos \
45+
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
46+
--result-dir /workspace/ \
47+
--result-filename $RESULT_FILENAME.json

benchmarks/gptoss_fp4_mi300x_docker.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ set -x
5151
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
5252
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
5353
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
54-
--model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \
54+
--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \
5555
--dataset-name=random \
5656
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
5757
--num-prompts=$(( $CONC * 10 )) \
5858
--max-concurrency=$CONC \
5959
--request-rate=inf --ignore-eos \
6060
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
61-
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
61+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json
Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
11
#!/usr/bin/env bash
22

3-
# Source benchmark utilities early
4-
source "$(dirname "$0")/benchmark_lib.sh"
5-
6-
check_env_vars \
7-
MODEL \
8-
PORT \
9-
TP \
10-
CONC \
11-
ISL \
12-
OSL \
13-
MAX_MODEL_LEN \
14-
RANDOM_RANGE_RATIO \
15-
RESULT_FILENAME
3+
# ========= Required Env Vars =========
4+
# HF_TOKEN
5+
# HF_HUB_CACHE
6+
# MODEL
7+
# PORT
8+
# TP
9+
# CONC
10+
# MAX_MODEL_LEN
1611

1712
# If the machine runs a MEC FW older than 177, RCCL
1813
# cannot reclaim some memory.
@@ -42,19 +37,24 @@ vllm serve $MODEL --port $PORT \
4237
--disable-log-requests \
4338
--async-scheduling > $SERVER_LOG 2>&1 &
4439

45-
SERVER_PID=$!
46-
47-
# Wait for server to be ready
48-
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
40+
# Show logs until server is ready
41+
tail -f $SERVER_LOG &
42+
TAIL_PID=$!
43+
set +x
44+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
45+
sleep 5
46+
done
47+
kill $TAIL_PID
4948

50-
run_benchmark_serving \
51-
--model "$MODEL" \
52-
--port "$PORT" \
53-
--backend vllm \
54-
--input-len "$ISL" \
55-
--output-len "$OSL" \
56-
--random-range-ratio "$RANDOM_RANGE_RATIO" \
57-
--num-prompts $(( $CONC * 10 )) \
58-
--max-concurrency "$CONC" \
59-
--result-filename "$RESULT_FILENAME" \
60-
--result-dir /workspace/
49+
set -x
50+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
51+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
52+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
53+
--model=$MODEL --backend=vllm --base-url=http://0.0.0.0:$PORT \
54+
--dataset-name=random \
55+
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
56+
--num-prompts=$(( $CONC * 10 )) \
57+
--max-concurrency=$CONC \
58+
--request-rate=inf --ignore-eos \
59+
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
60+
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json

benchmarks/gptoss_fp4_mi325x_slurm.sh

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
#!/usr/bin/bash
22

3-
# Source benchmark utilities early
4-
source "$(dirname "$0")/benchmark_lib.sh"
3+
# === Required Env Vars ===
4+
# HF_TOKEN
5+
# HF_HUB_CACHE
6+
# IMAGE
7+
# MODEL
8+
# ISL
9+
# OSL
10+
# MAX_MODEL_LEN
11+
# RANDOM_RANGE_RATIO
12+
# TP
13+
# CONC
14+
# RESULT_FILENAME
515

6-
check_env_vars \
7-
MODEL \
8-
TP \
9-
CONC \
10-
ISL \
11-
OSL \
12-
MAX_MODEL_LEN \
13-
RANDOM_RANGE_RATIO \
14-
RESULT_FILENAME
1516

1617
echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
1718

@@ -47,19 +48,25 @@ vllm serve $MODEL --port $PORT \
4748
--async-scheduling \
4849
> $SERVER_LOG 2>&1 &
4950

50-
SERVER_PID=$!
51+
# Show logs until server is ready
52+
tail -f $SERVER_LOG &
53+
TAIL_PID=$!
54+
set +x
55+
until curl --output /dev/null --silent --fail http://0.0.0.0:$PORT/health; do
56+
sleep 5
57+
done
58+
kill $TAIL_PID
5159

52-
# Wait for server to be ready
53-
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
54-
55-
run_benchmark_serving \
56-
--model "$MODEL" \
57-
--port "$PORT" \
58-
--backend vllm \
59-
--input-len "$ISL" \
60-
--output-len "$OSL" \
61-
--random-range-ratio "$RANDOM_RANGE_RATIO" \
62-
--num-prompts $(( $CONC * 10 )) \
63-
--max-concurrency "$CONC" \
64-
--result-filename "$RESULT_FILENAME" \
65-
--result-dir /workspace/
60+
set -x
61+
BENCH_SERVING_DIR=$(mktemp -d /tmp/bmk-XXXXXX)
62+
git clone https://github.com/kimbochen/bench_serving.git $BENCH_SERVING_DIR
63+
python3 $BENCH_SERVING_DIR/benchmark_serving.py \
64+
--model $MODEL --backend vllm \
65+
--base-url http://0.0.0.0:$PORT \
66+
--dataset-name random \
67+
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
68+
--num-prompts $(( $CONC * 10 )) --max-concurrency $CONC \
69+
--request-rate inf --ignore-eos \
70+
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
71+
--result-dir /workspace/ \
72+
--result-filename $RESULT_FILENAME.json

runners/launch_mi325x-amd.sh

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,20 @@
1-
#!/usr/bin/env bash
1+
#!/usr/bin/bash
22

3-
export HF_HUB_CACHE_MOUNT="/nfsdata/sa/hf_hub_cache-${USER: -1}/"
4-
export PORT_OFFSET=${USER: -1}
3+
sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing'
54

6-
PARTITION="compute"
7-
SQUASH_FILE="/nfsdata/sa/squash/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
5+
HF_HUB_CACHE_MOUNT="/home/kimbosemianalysis/hf_hub_cache/"
6+
PORT=8888
87

9-
set -x
10-
salloc --partition=$PARTITION --gres=gpu:$TP --cpus-per-task=256 --time=180 --no-shell
11-
JOB_ID=$(squeue -u $USER -h -o %A | head -n1)
12-
13-
srun --jobid=$JOB_ID bash -c "sudo enroot import -o $SQUASH_FILE docker://$IMAGE"
14-
srun --jobid=$JOB_ID \
15-
--container-image=$SQUASH_FILE \
16-
--container-mounts=$GITHUB_WORKSPACE:/workspace/,$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
17-
--container-mount-home \
18-
--container-writable \
19-
--container-remap-root \
20-
--container-workdir=/workspace/ \
21-
--no-container-entrypoint --export=ALL \
22-
bash benchmarks/${EXP_NAME%%_*}_${PRECISION}_mi325x_slurm.sh
8+
server_name="bmk-server"
239

24-
scancel $JOB_ID
10+
set -x
11+
docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
12+
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
13+
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
14+
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
15+
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
16+
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
17+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
18+
--entrypoint=/bin/bash \
19+
$IMAGE \
20+
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi325x_docker.sh"

0 commit comments

Comments
 (0)