Skip to content

Commit 970f27a

Browse files
committed
fixing b200
1 parent e2b15d9 commit 970f27a

4 files changed

Lines changed: 11 additions & 39 deletions

File tree

benchmarks/dsr1_fp4_b200_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ python3 $BENCH_SERVING_DIR/benchmark_serving.py \
4343
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
4444
--dataset-name random \
4545
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
46-
--num-prompts $(( $CONC * 10 )) \
46+
--num-prompts $NUM_PROMPTS \
4747
--max-concurrency $CONC \
4848
--request-rate inf --ignore-eos \
4949
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \

benchmarks/dsr1_fp8_b200_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ python3 $BENCH_SERVING_DIR/benchmark_serving.py \
5454
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
5555
--dataset-name random \
5656
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
57-
--num-prompts $(( $CONC * 10 )) \
57+
--num-prompts $NUM_PROMPTS \
5858
--max-concurrency $CONC \
5959
--request-rate inf --ignore-eos \
6060
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \

benchmarks/gptoss_fp4_b200_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ python3 $BENCH_SERVING_DIR/benchmark_serving.py \
6969
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
7070
--dataset-name random \
7171
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
72-
--num-prompts $(( $CONC * 10 )) \
72+
--num-prompts $NUM_PROMPTS \
7373
--max-concurrency $CONC \
7474
--request-rate inf --ignore-eos \
7575
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \

runners/launch_b200-nvd.sh

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,6 @@ set -x
2525
# Disabling it can reduce perf but will improve CI stability. i.e. we won't see vLLM/Sglang crashes.
2626
# Ref: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-graph-register
2727

28-
29-
docker run --rm -d --init --network host --name $server_name \
30-
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
31-
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
32-
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
33-
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE \
34-
-e NCCL_GRAPH_REGISTER=0 \
35-
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
36-
--entrypoint=/bin/bash \
37-
$(echo "$IMAGE" | sed 's/#/\//') \
38-
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}_docker.sh"
39-
40-
set +x
41-
while IFS= read -r line; do
42-
printf '%s\n' "$line"
43-
if [[ "$line" =~ Application\ startup\ complete ]]; then
44-
break
45-
fi
46-
done < <(docker logs -f --tail=0 $server_name 2>&1)
47-
48-
git clone https://github.com/kimbochen/bench_serving.git
49-
50-
5128
if [[ "$MODEL" == "nvidia/DeepSeek-R1-0528-FP4" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
5229
if [[ "$OSL" == "8192" ]]; then
5330
NUM_PROMPTS=$(( CONC * 20 ))
@@ -58,22 +35,17 @@ else
5835
NUM_PROMPTS=$(( CONC * 10 ))
5936
fi
6037

61-
set -x
62-
docker run --rm --network host --name $client_name \
38+
docker run --rm --init --network host --name $server_name \
39+
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
40+
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
6341
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
64-
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
42+
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE \
43+
-e NCCL_GRAPH_REGISTER=0 \
44+
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
45+
-e -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e NUM_PROMPTS \
6546
--entrypoint=/bin/bash \
6647
$(echo "$IMAGE" | sed 's/#/\//') \
67-
-lc "pip install -q datasets pandas && \
68-
python3 bench_serving/benchmark_serving.py \
69-
--model $MODEL --backend vllm --base-url http://localhost:$PORT \
70-
--dataset-name random \
71-
--random-input-len $ISL --random-output-len $OSL --random-range-ratio $RANDOM_RANGE_RATIO \
72-
--num-prompts $NUM_PROMPTS \
73-
--max-concurrency $CONC \
74-
--request-rate inf --ignore-eos \
75-
--save-result --percentile-metrics 'ttft,tpot,itl,e2el' \
76-
--result-dir /workspace/ --result-filename $RESULT_FILENAME.json"
48+
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}_docker.sh"
7749

7850
# Try graceful first
7951
docker stop -t 90 "$server_name" || true

0 commit comments

Comments
 (0)