Skip to content

Commit 0a5d4b6

Browse files
zccjjjkevincheng2
andauthored
[bugfix] AS block leaks (#7895)
Co-authored-by: kevin <chengyf112@gmail.com>
1 parent e7a02e2 commit 0a5d4b6

1 file changed

Lines changed: 10 additions & 2 deletions

File tree

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,9 +1063,12 @@ def _allocate_decode_and_extend():
10631063
self.cache_manager.num_cpu_blocks > 0
10641064
or self.config.cache_config.kvcache_storage_backend
10651065
):
1066-
if not self.cache_manager.can_allocate_gpu_blocks(
1066+
can_schedule_block_num_threshold = self._get_can_schedule_prefill_threshold_block(
10671067
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
10681068
// self.config.cache_config.block_size
1069+
)
1070+
if not self.cache_manager.can_allocate_gpu_blocks(
1071+
can_schedule_block_num_threshold
10691072
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
10701073
break
10711074
success = self.get_prefix_cached_blocks(request)
@@ -1124,6 +1127,7 @@ def _allocate_decode_and_extend():
11241127
self.req_dict[request.request_id] = allocated_position
11251128
llm_logger.debug(f"req_id:{request.request_id} allocate pos end")
11261129
else:
1130+
# Warning: _free_blocks before update_cache_blocks may cause storage blocks leak
11271131
if self.config.cache_config.enable_prefix_caching:
11281132
self._free_blocks(request)
11291133
break
@@ -1139,9 +1143,12 @@ def _allocate_decode_and_extend():
11391143
self.cache_manager.num_cpu_blocks > 0
11401144
or self.config.cache_config.kvcache_storage_backend
11411145
):
1142-
if not self.cache_manager.can_allocate_gpu_blocks(
1146+
can_schedule_block_num_threshold = self._get_can_schedule_prefill_threshold_block(
11431147
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
11441148
// self.config.cache_config.block_size
1149+
)
1150+
if not self.cache_manager.can_allocate_gpu_blocks(
1151+
can_schedule_block_num_threshold
11451152
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
11461153
break
11471154
success = self.get_prefix_cached_blocks(request)
@@ -1186,6 +1193,7 @@ def _allocate_decode_and_extend():
11861193
)
11871194
request.status = RequestStatus.RUNNING_PREFILL
11881195
else:
1196+
# Warning: _free_blocks before update_cache_blocks may cause storage blocks leak
11891197
if self.config.cache_config.enable_prefix_caching:
11901198
self._free_blocks(request)
11911199
break

0 commit comments

Comments
 (0)