We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8c4f5a6 commit 507e464Copy full SHA for 507e464
1 file changed
fastdeploy/worker/input_batch.py
@@ -189,9 +189,8 @@ def init_share_inputs(self):
189
self.cu_seqlens_k = paddle.full([max_num_seqs + 1], 0, dtype="int32")
190
191
# Initialize addressing buffers
192
- _max_batched_tokens = self.scheduler_config.max_num_batched_tokens
193
- self.position_ids_buffer = paddle.zeros([_max_batched_tokens], dtype=paddle.int32)
194
- self.slot_mapping_buffer = paddle.zeros([_max_batched_tokens], dtype=paddle.int64)
+ self.position_ids_buffer = paddle.zeros([self.max_chunk_tokens], dtype=paddle.int32)
+ self.slot_mapping_buffer = paddle.zeros([self.max_chunk_tokens], dtype=paddle.int64)
195
196
# Declare AttentionBackend buffers
197
self.decoder_batch_ids = None
0 commit comments