Skip to content

Commit 97576fa

Browse files
committed
test(agentic): run B300 CPU offload in eager mode
1 parent 4bd54ce commit 97576fa

1 file changed

Lines changed: 3 additions & 5 deletions

File tree

benchmarks/single_node/agentic/dsv4_fp4_b300_vllm.sh

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,11 @@ case "$OFFLOADING" in
114114
PER_ENGINE_GB=$TOTAL_CPU_DRAM_GB
115115
fi
116116
PER_ENGINE_BYTES=$((PER_ENGINE_GB * 1024 * 1024 * 1024))
117-
# Use --kv-transfer-config JSON to also pass lazy_offload=true. Eager
118-
# mode (default) hits an AssertionError in
119-
# vllm/v1/core/kv_cache_utils.py:269 popleft_n at low/mid CONC; lazy
120-
# mode defers the store path and clears low/mid CONC at 80-100%.
117+
# Temporarily run eager mode to isolate whether lazy offloading is
118+
# required to reproduce the SimpleCPUOffloadConnector CUDA failures.
121119
# See SimpleCPUOffloadConnector PR #37160 for the lazy_offload knob.
122120
export VLLM_USE_SIMPLE_KV_OFFLOAD=1
123-
OFFLOAD_ARGS="--kv-transfer-config {\"kv_connector\":\"SimpleCPUOffloadConnector\",\"kv_role\":\"kv_both\",\"kv_connector_extra_config\":{\"cpu_bytes_to_use\":$PER_ENGINE_BYTES,\"lazy_offload\":true}}"
121+
OFFLOAD_ARGS="--kv-transfer-config {\"kv_connector\":\"SimpleCPUOffloadConnector\",\"kv_role\":\"kv_both\",\"kv_connector_extra_config\":{\"cpu_bytes_to_use\":$PER_ENGINE_BYTES,\"lazy_offload\":false}}"
124122
;;
125123
*)
126124
echo "Error: unsupported OFFLOADING value '$OFFLOADING' (expected one of: none, cpu)" >&2

0 commit comments

Comments
 (0)