We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2441f1f commit 461bbe7Copy full SHA for 461bbe7
1 file changed
benchmarks/single_node/agentic/kimik2.5_fp4_b200.sh
@@ -101,7 +101,7 @@ case "$OFFLOADING" in
101
# the full eager sweep before.
102
#(srok), internal node limitation
103
#TOTAL_CPU_DRAM_GB=2500
104
- TOTAL_CPU_DRAM_GB=2500
+ TOTAL_CPU_DRAM_GB=1500
105
export VLLM_USE_SIMPLE_KV_OFFLOAD=1
106
OFFLOAD_ARGS=(
107
--kv_offloading_backend native
@@ -123,7 +123,7 @@ case "$OFFLOADING" in
123
# cudaHostAlloc in LMCache 0.4.5's single-process local CPU backend.
124
125
126
127
LMCACHE_HOST="${LMCACHE_HOST:-127.0.0.1}"
128
LMCACHE_PORT="${LMCACHE_PORT:-5555}"
129
LMCACHE_HTTP_PORT="${LMCACHE_HTTP_PORT:-8080}"
0 commit comments