File tree Expand file tree Collapse file tree
benchmarks/single_node/agentic Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -99,6 +99,8 @@ case "$OFFLOADING" in
9999 # RSS + page cache. Eager mode (the shortcut form default) is
100100 # intentional here per user request — Kimi FP4 on B200 has cleared
101101 # the full eager sweep before.
102+ # (srok), internal node limitation
103+ # TOTAL_CPU_DRAM_GB=2500
102104 TOTAL_CPU_DRAM_GB=2500
103105 export VLLM_USE_SIMPLE_KV_OFFLOAD=1
104106 OFFLOAD_ARGS=(
@@ -119,6 +121,8 @@ case "$OFFLOADING" in
119121 # --kv-offloading-size through vLLM's integrated LMCache convenience
120122 # path, which divides the value by TP and then hits a large single-shot
121123 # cudaHostAlloc in LMCache 0.4.5's single-process local CPU backend.
124+ # (srok), internal node limitation
125+ # TOTAL_CPU_DRAM_GB=2500
122126 TOTAL_CPU_DRAM_GB=2500
123127 LMCACHE_HOST=" ${LMCACHE_HOST:- 127.0.0.1} "
124128 LMCACHE_PORT=" ${LMCACHE_PORT:- 5555} "
You can’t perform that action at this time.
0 commit comments