Skip to content

Commit 06a4ea7

Browse files
committed
test(agentic): enable blocking CUDA offload diagnostics
1 parent cb21694 commit 06a4ea7

2 files changed

Lines changed: 9 additions & 4 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9420,11 +9420,13 @@ dsv4-fp4-b300-vllm-agentic:
94209420
agentic-coding:
94219421
- duration: 1800
94229422
search-space:
9423-
- { tp: 4, offloading: none, conc-list: [1, 4, 8, 16, 32] }
9424-
- { tp: 8, offloading: none, conc-list: [1, 4, 8, 16, 32, 40, 48, 52, 64, 72] }
9425-
- { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [8, 16, 32, 64, 128] }
9423+
# TEMPORARY: run only native CPU-offload scenarios while diagnosing
9424+
# asynchronous CUDA failures.
9425+
# - { tp: 4, offloading: none, conc-list: [1, 4, 8, 16, 32] }
9426+
# - { tp: 8, offloading: none, conc-list: [1, 4, 8, 16, 32, 40, 48, 52, 64, 72] }
9427+
# - { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [8, 16, 32, 64, 128] }
94269428
- { tp: 4, ep: 4, dp-attn: true, offloading: cpu, conc-list: [32, 48, 64, 96, 128, 192, 256] }
9427-
- { tp: 8, ep: 8, dp-attn: true, offloading: none, conc-list: [52, 64, 72, 84, 100, 128, 196, 256, 512] }
9429+
# - { tp: 8, ep: 8, dp-attn: true, offloading: none, conc-list: [52, 64, 72, 84, 100, 128, 196, 256, 512] }
94289430

94299431
gptoss-fp4-b200-vllm-agentic:
94309432
image: vllm/vllm-openai:v0.22.0

benchmarks/single_node/agentic/dsv4_fp4_b300_vllm.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ echo "Starting vllm server..."
144144
export TORCH_CUDA_ARCH_LIST="10.0"
145145
export PYTHONNOUSERSITE=1
146146
export VLLM_FLOAT32_MATMUL_PRECISION=high
147+
# Temporary diagnostic: surface asynchronous CUDA failures at the operation
148+
# that caused them instead of at a later synchronization point.
149+
export CUDA_LAUNCH_BLOCKING=1
147150

148151
vllm serve "$MODEL_PATH" --served-model-name "$MODEL" \
149152
--host 0.0.0.0 \

0 commit comments

Comments
 (0)