Skip to content

Commit b0231de

Browse files
sufubaohiworldwzj
andauthored
fix(linear-att): fix latent prefix-cache ref/buffer leaks (#1348)
Co-authored-by: wzj <wzjhelloworld@qq.com>
1 parent 3a15cb0 commit b0231de

2 files changed

Lines changed: 17 additions & 1 deletion

File tree

lightllm/server/router/dynamic_prompt/linear_att_radix_cache.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,10 @@ def _discard_node(self, node: LinearAttPagedTreeNode):
163163
return
164164

165165
def _add_node(self, node: LinearAttPagedTreeNode):
166-
if node.is_leaf():
166+
# root 永远不参与回收:当树为空时 root 自身也满足 is_leaf(),若加入 _evict_tree_set,
167+
# 会与 _evict 中 "node is not self.root_node" 的断言相矛盾(当前仅靠 root 的 ref_counter>=1
168+
# 和回收水位 guard 掩盖)。这里显式排除,使数据结构与回收逻辑的意图一致。
169+
if node.is_leaf() and node is not self.root_node:
167170
self._evict_tree_set.add(node)
168171
if node.small_page_buffer_idx is not None:
169172
self._evict_tree_set_for_linear_att.add(node)

lightllm/server/router/model_infer/infer_batch.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,19 @@ def _linear_att_free_req(self, free_token_index: List, req: "InferReq"):
226226

227227
if shared_kv_len <= req.cur_kv_len:
228228
free_token_index.append(self.req_manager.req_to_token_indexs[req.req_idx][shared_kv_len : req.cur_kv_len])
229+
# 该分支不会把 prefill 阶段累积的 big page id 插入 radix cache(典型为 pause/abort
230+
# 在 prefill 跨过 big page 边界后、到达末尾前触发),需在此显式释放,避免泄漏。
231+
232+
# 释放本请求 prefill 阶段在 big page 边界上申请、但尚未插入 radix cache 的 big page
233+
# state buffer。仅当请求未走 insert 分支(小页/大页插入)就被释放时才会有残留,典型场景:
234+
# big page 模式下请求在 prefill 跨过 big page 边界后、到达末尾前被 pause / abort。
235+
# 若不释放,会泄漏 big page state slot,并触发 free_a_req_mem 中 dict 为空的断言。
236+
if req.linear_att_len_to_big_page_id:
237+
self.radix_cache.linear_att_big_page_buffers.free_state_cache(
238+
list(req.linear_att_len_to_big_page_id.values())
239+
)
240+
req.linear_att_len_to_big_page_id.clear()
241+
229242
req.cur_kv_len = shared_kv_len
230243
assert req.tail_linear_att_small_page_buffer_id is None
231244
if req.shared_kv_node is not None:

0 commit comments

Comments
 (0)