Skip to content

Commit 3194358

Browse files
committed
add launch_b200-dgxc.sh
1 parent d22bf9c commit 3194358

1 file changed

Lines changed: 94 additions & 0 deletions

File tree

runners/launch_b200-dgxc.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/bash
2+
3+
HF_HUB_CACHE_MOUNT="/raid/hf_hub_cache/"
4+
FRAMEWORK_SUFFIX=$([[ "$FRAMEWORK" == "trt" ]] && printf '_trt' || printf '')
5+
PORT=8888
6+
7+
# Create unique cache directory based on model parameters
8+
MODEL_NAME=$(basename "$MODEL")
9+
10+
server_name="bmk-server"
11+
client_name="bmk-client"
12+
13+
nvidia-smi
14+
15+
# GPUs must be idle
16+
if nvidia-smi --query-compute-apps=pid --format=csv,noheader | grep -q '[0-9]'; then
17+
echo "[ERROR] GPU busy from previous run"; nvidia-smi; exit 1
18+
fi
19+
20+
set -x
21+
# Use --init flag to run an init process (PID 1) inside container for better signal handling and zombie process cleanup
22+
# Ref: https://www.paolomainardi.com/posts/docker-run-init/
23+
24+
# NCCL_GRAPH_REGISTER tries to automatically enable user buffer registration with CUDA Graphs.
25+
# Disabling it can reduce perf but will improve CI stability. i.e. we won't see vLLM/Sglang crashes.
26+
# Ref: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-graph-register
27+
28+
29+
docker run --rm -d --init --network host --name $server_name \
30+
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
31+
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
32+
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
33+
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE \
34+
-e NCCL_GRAPH_REGISTER=0 \
35+
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
36+
--entrypoint=/bin/bash \
37+
$(echo "$IMAGE" | sed 's/#/\//') \
38+
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}_docker.sh"
39+
40+
set +x
41+
while IFS= read -r line; do
42+
printf '%s\n' "$line"
43+
if [[ "$line" =~ Application\ startup\ complete ]]; then
44+
break
45+
fi
46+
done < <(docker logs -f --tail=0 $server_name 2>&1)
47+
48+
git clone https://github.com/kimbochen/bench_serving.git
49+
50+
51+
if [[ "$MODEL" == "nvidia/DeepSeek-R1-0528-FP4" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
52+
if [[ "$OSL" == "8192" ]]; then
53+
NUM_PROMPTS=$(( CONC * 20 ))
54+
else
55+
NUM_PROMPTS=$(( CONC * 50 ))
56+
fi
57+
else
58+
NUM_PROMPTS=$(( CONC * 10 ))
59+
fi
60+
61+
set -x
62+
docker run --rm --network host --name $client_name \
63+
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
64+
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
65+
--entrypoint=/bin/bash \
66+
$(echo "$IMAGE" | sed 's/#/\//') \
67+
-lc "pip install -q datasets pandas && \
68+
python3 bench_serving/benchmark_serving.py \
69+
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
70+
--dataset-name random \
71+
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
72+
--num-prompts $NUM_PROMPTS \
73+
--max-concurrency $CONC \
74+
--request-rate inf --ignore-eos \
75+
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
76+
--result-dir /workspace/ --result-filename $RESULT_FILENAME.json"
77+
78+
# Try graceful first
79+
docker stop -t 90 "$server_name" || true
80+
# Wait until it's really dead
81+
docker wait "$server_name" >/dev/null 2>&1 || true
82+
# Force remove if anything lingers
83+
docker rm -f "$server_name" >/dev/null 2>&1 || true
84+
85+
# Give a moment for GPU processes to fully terminate
86+
sleep 2
87+
# Verify GPUs are now idle; if not, print diag and (optionally) reset
88+
if nvidia-smi --query-compute-apps=pid --format=csv,noheader | grep -q '[0-9]'; then
89+
echo "[WARN] After stop, GPU still busy:"; nvidia-smi
90+
# Last resort if driver allows and GPUs appear idle otherwise:
91+
#nvidia-smi --gpu-reset -i 0,1,2,3,4,5,6,7 2>/dev/null || true
92+
fi
93+
94+
nvidia-smi

0 commit comments

Comments
 (0)