Skip to content

Commit 84fd8c2

Browse files
committed
bugfix AS block leaks
1 parent 3ec5011 commit 84fd8c2

1 file changed

Lines changed: 9 additions & 2 deletions

File tree

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,9 +1070,12 @@ def _allocate_decode_and_extend():
10701070
self.cache_manager.num_cpu_blocks > 0
10711071
or self.config.cache_config.kvcache_storage_backend
10721072
):
1073-
if not self.cache_manager.can_allocate_gpu_blocks(
1073+
can_schedule_block_num_threshold = self._get_can_schedule_prefill_threshold_block(
10741074
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
10751075
// self.config.cache_config.block_size
1076+
)
1077+
if not self.cache_manager.can_allocate_gpu_blocks(
1078+
can_schedule_block_num_threshold
10761079
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
10771080
break
10781081
success = self.get_prefix_cached_blocks(request)
@@ -1150,9 +1153,12 @@ def _allocate_decode_and_extend():
11501153
self.cache_manager.num_cpu_blocks > 0
11511154
or self.config.cache_config.kvcache_storage_backend
11521155
):
1153-
if not self.cache_manager.can_allocate_gpu_blocks(
1156+
can_schedule_block_num_threshold = self._get_can_schedule_prefill_threshold_block(
11541157
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
11551158
// self.config.cache_config.block_size
1159+
)
1160+
if not self.cache_manager.can_allocate_gpu_blocks(
1161+
can_schedule_block_num_threshold
11561162
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
11571163
break
11581164
success = self.get_prefix_cached_blocks(request)
@@ -1196,6 +1202,7 @@ def _allocate_decode_and_extend():
11961202
)
11971203
request.status = RequestStatus.RUNNING_PREFILL
11981204
else:
1205+
# Warnig: _free_blocks before update_cache_blocks may cause storage blocks leak
11991206
if self.config.cache_config.enable_prefix_caching:
12001207
self._free_blocks(request)
12011208
break

0 commit comments

Comments
 (0)