Skip to content

Commit ac24fcc

Browse files
authored
[Cherry-Pick][BugFix] fix mtp reset bugs in rl (#7957) (#7958)
1 parent fefbcff commit ac24fcc

1 file changed

Lines changed: 4 additions & 0 deletions

File tree

fastdeploy/worker/input_batch.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,10 @@ def reset_model_inputs(self) -> None:
989989
# NOTE(fix): These tensors are dynamically resized during runtime inference.
990990
# Must recreate at full initial size to avoid CUDAGraph replay OOB access.
991991
max_num_seqs = self.scheduler_config.max_num_seqs
992+
if self.enable_mm and self.model_config.mm_max_tokens_per_item is None:
993+
self.max_chunk_tokens = self.model_config.max_model_len
994+
else:
995+
self.max_chunk_tokens = self.fd_config.get_max_chunk_tokens(self.model_config.mm_max_tokens_per_item)
992996
self.ids_remove_padding = paddle.full([max_num_seqs * self.max_chunk_tokens], 0, dtype="int64")
993997
self.batch_id_per_token = paddle.full([max_num_seqs * self.max_chunk_tokens, 1], 0, dtype="int32")
994998
self.cu_seqlens_q = paddle.full([max_num_seqs + 1], 0, dtype="int32")

0 commit comments

Comments
 (0)