Skip to content

Commit 01b8eb5

Browse files
kevincheng2claude
andcommitted
[KVCache][Engine][BugFix] fix cache evict metadata direction and resource manager v1 bugs
## Motivation 修复 cache_manager 和 resource_manager_v1 中的多个 bug。 ## Modifications - `cache_manager.py`: 修复 `free_gpu_block_ids` 返回实际空闲块列表而非 range,调整日志顺序(先打印日志再计算 matched_device/host_ids) - `common_engine.py`: 修正 typo(Unexcepted → Unexpected) - `request.py`: 修正 `cache_evict_metadata` 中 src/dst 类型方向错误(DEVICE→HOST 驱逐方向) - `resource_manager_v1.py`: PD 分离 prefill 节点跳过 prefix cache update_cache_blocks;在 prefill 节点分配后调用 update_cache_blocks Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 82a5b80 commit 01b8eb5

4 files changed

Lines changed: 13 additions & 6 deletions

File tree

fastdeploy/cache_manager/v1/cache_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def gpu_free_block_list(self) -> List[int]:
431431
with PrefixCacheManager.gpu_free_block_list.
432432
"""
433433
# Return list representation of available blocks
434-
return list(range(self._device_pool.available_blocks()))
434+
return list(self._device_pool._free_blocks)
435435

436436
@property
437437
def available_gpu_resource(self) -> float:
@@ -536,13 +536,14 @@ def match_prefix(
536536
if not (self._storage_scheduler and skip_storage):
537537
self._radix_tree.increment_ref_nodes(matched_nodes)
538538

539-
matched_device_ids = [n.block_id for n in result.device_nodes]
540-
matched_host_ids = [n.block_id for n in result.host_nodes]
541539
logger.info(
542540
f"match_prefix for request_id: {request.request_id} total_hashes: {len(block_hashes)}, "
543541
f"total_matched: {result.total_matched_blocks} (device_blocks={result.matched_device_nums}, "
544542
f"host_blocks={result.matched_host_nums}, storage_hashes={result.matched_storage_nums})"
545543
)
544+
545+
matched_device_ids = [n.block_id for n in result.device_nodes]
546+
matched_host_ids = [n.block_id for n in result.host_nodes]
546547
logger.debug(
547548
f"[match_prefix] request_id={request.request_id} "
548549
f"matched_device_block_ids={matched_device_ids} "

fastdeploy/engine/common_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2091,7 +2091,7 @@ def _zmq_send_generated_tokens(self):
20912091
if batch_data:
20922092
self.send_response_server.send_response(None, batch_data, worker_pid=wpid)
20932093
except Exception as e:
2094-
self.llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}")
2094+
self.llm_logger.error(f"Unexpected error happend: {e}, {traceback.format_exc()!s}")
20952095

20962096
def _decode_process_splitwise_requests(self):
20972097
"""

fastdeploy/engine/request.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -664,8 +664,8 @@ def append_evict_metadata(self, metadata: List[CacheSwapMetadata]):
664664
self.cache_evict_metadata = CacheSwapMetadata(
665665
src_block_ids=meta.src_block_ids,
666666
dst_block_ids=meta.dst_block_ids,
667-
src_type=CacheLevel.HOST,
668-
dst_type=CacheLevel.DEVICE,
667+
src_type=CacheLevel.DEVICE,
668+
dst_type=CacheLevel.HOST,
669669
hash_values=meta.hash_values,
670670
)
671671

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,7 @@ def _allocate_decode_and_extend():
10711071
if (
10721072
self.config.cache_config.enable_prefix_caching
10731073
and self.config.scheduler_config.splitwise_role != "decode"
1074+
and self.config.scheduler_config.splitwise_role != "prefill"
10741075
and not self.enable_cache_manager_v1
10751076
):
10761077
self.cache_manager.update_cache_blocks(
@@ -1844,6 +1845,11 @@ def preallocate_resource_in_p(self, request: Request):
18441845
self.stop_flags[request.idx] = False
18451846
self.requests[request.request_id] = request
18461847
self.req_dict[request.request_id] = allocated_position
1848+
1849+
self.cache_manager.update_cache_blocks(
1850+
request, self.config.cache_config.block_size, request.need_prefill_tokens
1851+
)
1852+
18471853
return True
18481854
else:
18491855
self._free_blocks(request)

0 commit comments

Comments
 (0)