We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 507e464 commit 956b543Copy full SHA for 956b543
1 file changed
fastdeploy/worker/gpu_model_runner.py
@@ -1337,7 +1337,9 @@ def _compute_position_ids_and_slot_mapping(self) -> None:
1337
)
1338
block_size = self.cache_config.block_size
1339
block_idx = position_ids // block_size # [num_tokens]
1340
- assert self.forward_meta.batch_id_per_token.shape == block_idx.shape
+ assert (
1341
+ self.forward_meta.batch_id_per_token.shape == block_idx.shape
1342
+ ), f"batch_id_per_token.shape:{self.forward_meta.batch_id_per_token.shape} != block_idx.shape:{block_idx.shape}"
1343
block_ids = self.forward_meta.block_tables[self.forward_meta.batch_id_per_token, block_idx] # [num_tokens]
1344
block_offset = position_ids % block_size # [num_tokens]
1345
slot_mapping = self.share_inputs["slot_mapping_buffer"][:current_total_tokens]
0 commit comments