Skip to content

Commit 849febf

Browse files
committed
address coderabbit comments
Signed-off-by: Athena Cai <athenac@nvidia.com>
1 parent 7746bbc commit 849febf

2 files changed

Lines changed: 19 additions & 8 deletions

File tree

tensorrt_llm/_torch/disaggregation/native/transfer.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -732,10 +732,14 @@ def _build_kv_write_meta(self, task: KVSendTask, req_info: RecvReqInfo) -> Write
732732
# When sender uses chunking, the receiver sends all dst
733733
# blocks in a single RecvReqInfo. Slice dst to match
734734
# this task's src chunk position.
735-
if chunk_offset > 0 or len(src_block_ids) < len(full_dst_block_ids):
736-
dst_block_ids = full_dst_block_ids[
737-
chunk_offset : chunk_offset + len(src_block_ids)
738-
]
735+
if chunk_offset > 0 or not task._slice.is_last_slice:
736+
chunk_end = chunk_offset + len(src_block_ids)
737+
if chunk_end > full_dst_block_ids.size:
738+
raise ValueError(
739+
f"dst chunk range out of bounds: offset={chunk_offset}, "
740+
f"len={len(src_block_ids)}, dst_blocks={full_dst_block_ids.size}"
741+
)
742+
dst_block_ids = full_dst_block_ids[chunk_offset:chunk_end]
739743
else:
740744
dst_block_ids = full_dst_block_ids
741745

@@ -747,10 +751,10 @@ def _build_kv_write_meta(self, task: KVSendTask, req_info: RecvReqInfo) -> Write
747751
f"src={src_block_ids.size}, dst={dst_block_ids.size}"
748752
)
749753
dst_block_ids = dst_block_ids[:-1]
750-
elif block_diff > 1:
754+
elif block_diff != 0:
751755
raise ValueError(
752756
f"src/dst block count mismatch: {src_block_ids.size} vs "
753-
f"{dst_block_ids.size} (expected diff <= 1)"
757+
f"{dst_block_ids.size} (expected 0 <= diff <= 1)"
754758
)
755759
tpb = extractor.page_table.tokens_per_block
756760
token_range = task._slice.token_range

tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ def create_kv_cache_transceiver(
7474
# since the C++ transceiver does not support chunked transfer.
7575
# Only applies to NIXL/DEFAULT backends (the Python transceiver
7676
# does not support UCX, MPI, or MOONCAKE).
77-
use_python = cache_transceiver_config.transceiver_runtime == "PYTHON"
78-
if (not use_python
77+
runtime = cache_transceiver_config.transceiver_runtime
78+
use_python = runtime == "PYTHON"
79+
if (runtime is None
7980
and cache_transceiver_config.chunk_size_blocks is not None):
8081
if cache_transceiver_config.backend in (None, "DEFAULT", "NIXL"):
8182
# Use warning (not info) so users notice the transceiver swap and
@@ -95,6 +96,12 @@ def create_kv_cache_transceiver(
9596
f"transceiver, which does not support chunked transfer. "
9697
f"chunk_size_blocks will be ignored. Use NIXL backend to "
9798
f"enable chunked transfer.")
99+
elif (runtime == "CPP"
100+
and cache_transceiver_config.chunk_size_blocks is not None):
101+
logger.warning(
102+
"chunk_size_blocks is set but transceiver_runtime='CPP' "
103+
"explicitly disables Python auto-selection; "
104+
"chunk_size_blocks will be ignored.")
98105

99106
# Warn when chunk_size_blocks is below the recommended floor. The Pydantic
100107
# field is PositiveInt (>=1), but values below ~16 push the per-chunk RDMA

0 commit comments

Comments
 (0)