Skip to content

Commit 85d5e6e

Browse files
authored
[None][refactor] Move KV cache manager V2 to separate file (#14680)
Signed-off-by: Jiagan Cheng <jiaganc@nvidia.com>
1 parent c323881 commit 85d5e6e

20 files changed

Lines changed: 1870 additions & 1726 deletions

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
240240
/cpp/tensorrt_llm/batch_manager/allocateKvCache.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
241241
/cpp/tests/unit_tests/batch_manager/kvCacheManagerTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
242242
/cpp/tests/unit_tests/batch_manager/kvCacheUtilsTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
243+
/tensorrt_llm/_torch/pyexecutor/kv_cache_manager_v2.py @NVIDIA/trt-llm-kv-cache-manager-devs
243244
/tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
244245
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
245246
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs

tensorrt_llm/_torch/attention_backend/interface.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424

2525
from ..memory_buffer_utils import Buffers
2626
from ..metadata import KVCacheParams
27+
from ..pyexecutor.kv_cache_manager_v2 import KVCacheManagerV2
2728
from ..pyexecutor.mamba_cache_manager import BaseMambaCacheManager
28-
from ..pyexecutor.resource_manager import KVCacheManager, KVCacheManagerV2
29+
from ..pyexecutor.resource_manager import KVCacheManager
2930
from ..utils import get_model_extra_attrs
3031

3132
try:

tensorrt_llm/_torch/disaggregation/resource/cache_reuse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919

2020
import numpy as np
2121

22+
from tensorrt_llm._torch.pyexecutor.kv_cache_manager_v2 import KVCacheManagerV2
2223
from tensorrt_llm._torch.pyexecutor.llm_request import LlmRequest
23-
from tensorrt_llm._torch.pyexecutor.resource_manager import KVCacheManager, KVCacheManagerV2
24+
from tensorrt_llm._torch.pyexecutor.resource_manager import KVCacheManager
2425

2526
from .page import AttentionLayerGroup
2627
from .utils import get_global_layer_ids

tensorrt_llm/_torch/disaggregation/resource/kv_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def _build_page_table_v2(manager) -> KVCachePageTable:
296296
"""
297297
from collections import defaultdict
298298

299-
from tensorrt_llm._torch.pyexecutor.resource_manager import Role
299+
from tensorrt_llm._torch.pyexecutor.kv_cache_manager_v2 import Role
300300
from tensorrt_llm.runtime.kv_cache_manager_v2 import CacheTier
301301

302302
_ROLE_STR_TO_ENUM: dict[str, DataRole] = {

tensorrt_llm/_torch/pyexecutor/_util.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from .connectors.kv_cache_connector import KvCacheConnectorManager
3838
from .dwdp import DwdpManager
3939
from .guided_decoder import GuidedDecoder
40+
from .kv_cache_manager_v2 import KVCacheManagerV2
4041
from .kv_cache_transceiver import AttentionTypeCpp, create_kv_cache_transceiver
4142
from .llm_request import ExecutorResponse
4243
from .mamba_cache_manager import (BaseMambaCacheManager,
@@ -46,9 +47,8 @@
4647
use_py_mamba_cache_manager)
4748
from .model_engine import PyTorchModelEngine
4849
from .py_executor import PyExecutor
49-
from .resource_manager import (KVCacheManager, KVCacheManagerV2,
50-
PeftCacheManager, ResourceManager,
51-
ResourceManagerType)
50+
from .resource_manager import (KVCacheManager, PeftCacheManager,
51+
ResourceManager, ResourceManagerType)
5252
from .sampler import (EarlyStopSampler, EarlyStopWithMMResult, TorchSampler,
5353
TRTLLMSampler)
5454
from .scheduler import (BindCapacityScheduler, BindMicroBatchScheduler,

0 commit comments

Comments
 (0)