fix: use full cudagraph mode for gb200 profile

Oseltamivir · Oseltamivir · commit 3e18d9c1c0c0 · 2026-05-28T14:46:00.000-07:00
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/agg-gb200-flash-profile-dep16-conc16-mtp3.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/agg-gb200-flash-profile-dep16-conc16-mtp3.yaml
@@ -74,7 +74,7 @@ backend:
       no-enable-prefix-caching: true
       no-enable-flashinfer-autotune: true
       block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_AND_PIECEWISE","mode":3}'
+      compilation-config: '{"cudagraph_mode":"FULL","mode":3}'
       gpu-memory-utilization: 0.9
       stream-interval: 50
       no-disable-hybrid-kv-cache-manager: true