Skip to content

Commit a502dda

Browse files
authored
[BugFix] fix multi-step mtp bug (PaddlePaddle#6754)
1 parent b05a6c4 commit a502dda

2 files changed

Lines changed: 4 additions & 6 deletions

File tree

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ def get_new_block_nums(self, request: Request, num_new_tokens: int):
229229

230230
if self.config.speculative_config.method is not None:
231231
block_num = min(block_num + 1, self.config.cache_config.max_block_num_per_seq)
232+
else:
233+
block_num = min(block_num, self.config.cache_config.max_block_num_per_seq)
232234
return block_num
233235

234236
def _prepare_prefill_task(self, request, new_token_num):
@@ -926,7 +928,7 @@ def _allocate_decode_and_extend():
926928
)
927929
# Allocate blocks to prefill
928930
if self.cache_manager.can_allocate_gpu_blocks(can_schedule_block_num_threshold):
929-
if not request.get("skip_allocate", False):
931+
if num_new_block > 0:
930932
extra_gpu_block_ids = self.cache_manager.allocate_gpu_blocks(
931933
num_new_block, request.request_id
932934
)
@@ -985,7 +987,7 @@ def _allocate_decode_and_extend():
985987
)
986988
# Allocate blocks to prefill
987989
if self.cache_manager.can_allocate_gpu_blocks(can_schedule_block_num_threshold):
988-
if not request.get("skip_allocate", False):
990+
if num_new_block > 0:
989991
extra_gpu_block_ids = self.cache_manager.allocate_gpu_blocks(
990992
num_new_block, request.request_id
991993
)
@@ -1166,19 +1168,16 @@ def get_prefix_cached_blocks(self, request: Request):
11661168

11671169
request.cache_info = [matched_block_num, no_cache_block_num]
11681170
request.block_tables = common_block_ids
1169-
request.skip_allocate = False
11701171
request.num_cached_tokens = matched_token_num
11711172
if self.config.cache_config.disable_chunked_mm_input:
11721173
if matched_token_num == request.need_prefill_tokens:
11731174
matched_token_num = matched_token_num - self.config.cache_config.block_size
1174-
request.skip_allocate = True
11751175
request.num_computed_tokens = self.revert_chunked_mm_input(
11761176
request.multimodal_inputs, matched_token_num
11771177
)
11781178
else:
11791179
if matched_token_num == request.need_prefill_tokens:
11801180
request.num_computed_tokens = matched_token_num - self.config.cache_config.block_size
1181-
request.skip_allocate = True
11821181
else:
11831182
request.num_computed_tokens = matched_token_num
11841183

tests/v1/test_resource_manager_v1.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,6 @@ def test_get_prefix_cached_blocks_with_revert(self):
541541
manager.cache_manager.get_required_block_num.return_value = 0
542542
success = manager.get_prefix_cached_blocks(request)
543543
self.assertTrue(success)
544-
self.assertTrue(request.skip_allocate)
545544
self.assertEqual(request.num_cached_tokens, 8)
546545
self.assertEqual(request.metrics.gpu_cache_token_num, 4)
547546
self.assertEqual(request.metrics.cpu_cache_token_num, 0)

0 commit comments

Comments
 (0)