refine code

gongshaotian · gongshaotian · commit 956b5438e1bc · 2026-05-20T17:32:06.000+08:00
diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
@@ -1337,7 +1337,9 @@ def _compute_position_ids_and_slot_mapping(self) -> None:
         )
         block_size = self.cache_config.block_size
         block_idx = position_ids // block_size  # [num_tokens]
-        assert self.forward_meta.batch_id_per_token.shape == block_idx.shape
+        assert (
+            self.forward_meta.batch_id_per_token.shape == block_idx.shape
+        ), f"batch_id_per_token.shape:{self.forward_meta.batch_id_per_token.shape} != block_idx.shape:{block_idx.shape}"
         block_ids = self.forward_meta.block_tables[self.forward_meta.batch_id_per_token, block_idx]  # [num_tokens]
         block_offset = position_ids % block_size  # [num_tokens]
         slot_mapping = self.share_inputs["slot_mapping_buffer"][:current_total_tokens]