address coderabbit comments

athena-nv · athena-nv · commit 849febfa9033 · 2026-06-24T16:21:35.000Z
Signed-off-by: Athena Cai &lt;athenac@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/disaggregation/native/transfer.py b/tensorrt_llm/_torch/disaggregation/native/transfer.py
@@ -732,10 +732,14 @@ def _build_kv_write_meta(self, task: KVSendTask, req_info: RecvReqInfo) -> Write
                 # When sender uses chunking, the receiver sends all dst
                 # blocks in a single RecvReqInfo.  Slice dst to match
                 # this task's src chunk position.
-                if chunk_offset > 0 or len(src_block_ids) < len(full_dst_block_ids):
-                    dst_block_ids = full_dst_block_ids[
-                        chunk_offset : chunk_offset + len(src_block_ids)
-                    ]
+                if chunk_offset > 0 or not task._slice.is_last_slice:
+                    chunk_end = chunk_offset + len(src_block_ids)
+                    if chunk_end > full_dst_block_ids.size:
+                        raise ValueError(
+                            f"dst chunk range out of bounds: offset={chunk_offset}, "
+                            f"len={len(src_block_ids)}, dst_blocks={full_dst_block_ids.size}"
+                        )
+                    dst_block_ids = full_dst_block_ids[chunk_offset:chunk_end]
                 else:
                     dst_block_ids = full_dst_block_ids
 
@@ -747,10 +751,10 @@ def _build_kv_write_meta(self, task: KVSendTask, req_info: RecvReqInfo) -> Write
                         f"src={src_block_ids.size}, dst={dst_block_ids.size}"
                     )
                     dst_block_ids = dst_block_ids[:-1]
-                elif block_diff > 1:
+                elif block_diff != 0:
                     raise ValueError(
                         f"src/dst block count mismatch: {src_block_ids.size} vs "
-                        f"{dst_block_ids.size} (expected diff <= 1)"
+                        f"{dst_block_ids.size} (expected 0 <= diff <= 1)"
                     )
                 tpb = extractor.page_table.tokens_per_block
                 token_range = task._slice.token_range
diff --git a/tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py b/tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py
@@ -74,8 +74,9 @@ def create_kv_cache_transceiver(
     # since the C++ transceiver does not support chunked transfer.
     # Only applies to NIXL/DEFAULT backends (the Python transceiver
     # does not support UCX, MPI, or MOONCAKE).
-    use_python = cache_transceiver_config.transceiver_runtime == "PYTHON"
-    if (not use_python
+    runtime = cache_transceiver_config.transceiver_runtime
+    use_python = runtime == "PYTHON"
+    if (runtime is None
             and cache_transceiver_config.chunk_size_blocks is not None):
         if cache_transceiver_config.backend in (None, "DEFAULT", "NIXL"):
             # Use warning (not info) so users notice the transceiver swap and
@@ -95,6 +96,12 @@ def create_kv_cache_transceiver(
                 f"transceiver, which does not support chunked transfer. "
                 f"chunk_size_blocks will be ignored. Use NIXL backend to "
                 f"enable chunked transfer.")
+    elif (runtime == "CPP"
+          and cache_transceiver_config.chunk_size_blocks is not None):
+        logger.warning(
+            "chunk_size_blocks is set but transceiver_runtime='CPP' "
+            "explicitly disables Python auto-selection; "
+            "chunk_size_blocks will be ignored.")
 
     # Warn when chunk_size_blocks is below the recommended floor.  The Pydantic
     # field is PositiveInt (>=1), but values below ~16 push the per-chunk RDMA