Skip to content

Commit 3735546

Browse files
committed
[bugfix] AS block leaks
1 parent 3ec5011 commit 3735546

1 file changed

Lines changed: 10 additions & 2 deletions

File tree

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,9 +1070,12 @@ def _allocate_decode_and_extend():
10701070
self.cache_manager.num_cpu_blocks > 0
10711071
or self.config.cache_config.kvcache_storage_backend
10721072
):
1073-
if not self.cache_manager.can_allocate_gpu_blocks(
1073+
can_schedule_block_num_threshold = self._get_can_schedule_prefill_threshold_block(
10741074
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
10751075
// self.config.cache_config.block_size
1076+
)
1077+
if not self.cache_manager.can_allocate_gpu_blocks(
1078+
can_schedule_block_num_threshold
10761079
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
10771080
break
10781081
success = self.get_prefix_cached_blocks(request)
@@ -1134,6 +1137,7 @@ def _allocate_decode_and_extend():
11341137
self.req_dict[request.request_id] = allocated_position
11351138
llm_logger.debug(f"req_id:{request.request_id} allocate pos end")
11361139
else:
1140+
# Warning: _free_blocks before update_cache_blocks may cause storage blocks leak
11371141
if self.config.cache_config.enable_prefix_caching:
11381142
self._free_blocks(request)
11391143
break
@@ -1150,9 +1154,12 @@ def _allocate_decode_and_extend():
11501154
self.cache_manager.num_cpu_blocks > 0
11511155
or self.config.cache_config.kvcache_storage_backend
11521156
):
1153-
if not self.cache_manager.can_allocate_gpu_blocks(
1157+
can_schedule_block_num_threshold = self._get_can_schedule_prefill_threshold_block(
11541158
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
11551159
// self.config.cache_config.block_size
1160+
)
1161+
if not self.cache_manager.can_allocate_gpu_blocks(
1162+
can_schedule_block_num_threshold
11561163
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
11571164
break
11581165
success = self.get_prefix_cached_blocks(request)
@@ -1196,6 +1203,7 @@ def _allocate_decode_and_extend():
11961203
)
11971204
request.status = RequestStatus.RUNNING_PREFILL
11981205
else:
1206+
# Warning: _free_blocks before update_cache_blocks may cause storage blocks leak
11991207
if self.config.cache_config.enable_prefix_caching:
12001208
self._free_blocks(request)
12011209
break

0 commit comments

Comments
 (0)