File tree Expand file tree Collapse file tree
benchmarks/single_node/agentic Expand file tree Collapse file tree Original file line number Diff line number Diff line change 501501 LMCACHE_CONNECT_HOST=" ${LMCACHE_CONNECT_HOST:- tcp:// $LMCACHE_HOST } "
502502 LMCACHE_L1_SIZE_GB=" ${LMCACHE_L1_SIZE_GB:- $TOTAL_CPU_DRAM_GB } "
503503 LMCACHE_L1_INIT_SIZE_GB=" ${LMCACHE_L1_INIT_SIZE_GB:- 20} "
504+ # LMCache read locks are leases on chunks that lookup has promised
505+ # vLLM can retrieve. The default 300s TTL is too short for this
506+ # long-context agentic queue: TP8/conc32 can spend >300s between
507+ # lookup and retrieve while GPU KV is saturated, which leaves the
508+ # object present in L1 but no longer readable. Keep the 2.5 TB pool
509+ # size unchanged and only extend the lookup-to-retrieve lease.
510+ LMCACHE_L1_READ_TTL_SECONDS=" ${LMCACHE_L1_READ_TTL_SECONDS:- 3600} "
504511 LMCACHE_CHUNK_SIZE=" ${LMCACHE_CHUNK_SIZE:- 256} "
505512 LMCACHE_MAX_WORKERS=" ${LMCACHE_MAX_WORKERS:- $TP } "
506513 export PYTHONHASHSEED=" ${PYTHONHASHSEED:- 0} "
514521 --http-port " $LMCACHE_HTTP_PORT "
515522 --l1-size-gb " $LMCACHE_L1_SIZE_GB "
516523 --l1-init-size-gb " $LMCACHE_L1_INIT_SIZE_GB "
524+ --l1-read-ttl-seconds " $LMCACHE_L1_READ_TTL_SECONDS "
517525 --chunk-size " $LMCACHE_CHUNK_SIZE "
518526 --max-workers " $LMCACHE_MAX_WORKERS "
519527 --eviction-policy LRU
You can’t perform that action at this time.
0 commit comments