NVIDIA
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorrt_llm/_torch/attention_backend/interface.py‎
Lines changed: 2 additions & 1 deletion b/‎tensorrt_llm/_torch/attention_backend/interface.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tensorrt_llm/_torch/disaggregation/resource/cache_reuse.py‎
Lines changed: 2 additions & 1 deletion b/‎tensorrt_llm/_torch/disaggregation/resource/cache_reuse.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tensorrt_llm/_torch/disaggregation/resource/kv_extractor.py‎
Lines changed: 1 addition & 1 deletion b/‎tensorrt_llm/_torch/disaggregation/resource/kv_extractor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorrt_llm/_torch/pyexecutor/_util.py‎
Lines changed: 3 additions & 3 deletions b/‎tensorrt_llm/_torch/pyexecutor/_util.py‎
Lines changed: 3 additions & 3 deletions
@@ -240,6 +240,7 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
 /cpp/tensorrt_llm/batch_manager/allocateKvCache.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tests/unit_tests/batch_manager/kvCacheManagerTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tests/unit_tests/batch_manager/kvCacheUtilsTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
+/tensorrt_llm/_torch/pyexecutor/kv_cache_manager_v2.py @NVIDIA/trt-llm-kv-cache-manager-devs
 /tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
 
@@ -24,8 +24,9 @@
 
 from ..memory_buffer_utils import Buffers
 from ..metadata import KVCacheParams
+from ..pyexecutor.kv_cache_manager_v2 import KVCacheManagerV2
 from ..pyexecutor.mamba_cache_manager import BaseMambaCacheManager
-from ..pyexecutor.resource_manager import KVCacheManager, KVCacheManagerV2
+from ..pyexecutor.resource_manager import KVCacheManager
 from ..utils import get_model_extra_attrs
 
 try:
 
@@ -19,8 +19,9 @@
 
 import numpy as np
 
+from tensorrt_llm._torch.pyexecutor.kv_cache_manager_v2 import KVCacheManagerV2
 from tensorrt_llm._torch.pyexecutor.llm_request import LlmRequest
-from tensorrt_llm._torch.pyexecutor.resource_manager import KVCacheManager, KVCacheManagerV2
+from tensorrt_llm._torch.pyexecutor.resource_manager import KVCacheManager
 
 from .page import AttentionLayerGroup
 from .utils import get_global_layer_ids
 
@@ -296,7 +296,7 @@ def _build_page_table_v2(manager) -> KVCachePageTable:
     """
     from collections import defaultdict
 
-    from tensorrt_llm._torch.pyexecutor.resource_manager import Role
+    from tensorrt_llm._torch.pyexecutor.kv_cache_manager_v2 import Role
     from tensorrt_llm.runtime.kv_cache_manager_v2 import CacheTier
 
     _ROLE_STR_TO_ENUM: dict[str, DataRole] = {
 
@@ -37,6 +37,7 @@
 from .connectors.kv_cache_connector import KvCacheConnectorManager
 from .dwdp import DwdpManager
 from .guided_decoder import GuidedDecoder
+from .kv_cache_manager_v2 import KVCacheManagerV2
 from .kv_cache_transceiver import AttentionTypeCpp, create_kv_cache_transceiver
 from .llm_request import ExecutorResponse
 from .mamba_cache_manager import (BaseMambaCacheManager,
@@ -46,9 +47,8 @@
                                   use_py_mamba_cache_manager)
 from .model_engine import PyTorchModelEngine
 from .py_executor import PyExecutor
-from .resource_manager import (KVCacheManager, KVCacheManagerV2,
-                               PeftCacheManager, ResourceManager,
-                               ResourceManagerType)
+from .resource_manager import (KVCacheManager, PeftCacheManager,
+                               ResourceManager, ResourceManagerType)
 from .sampler import (EarlyStopSampler, EarlyStopWithMMResult, TorchSampler,
                       TRTLLMSampler)
 from .scheduler import (BindCapacityScheduler, BindMicroBatchScheduler,