Skip to content

Commit 78e34a7

Browse files
authored
nixl pd support qwen3.5 (#1340)
1 parent da9dfb8 commit 78e34a7

15 files changed

Lines changed: 840 additions & 38 deletions

File tree

lightllm/common/kv_cache_mem_manager/deepseek2_mem_manager.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ def write_mem_to_page_kv_move_buffer(
5656
dp_index: int,
5757
mem_managers: List["MemoryManager"],
5858
dp_world_size: int,
59+
page_kind: str = "kv",
60+
req_idx: int = None,
5961
):
62+
assert page_kind == "kv", f"{type(self).__name__} does not support page_kind={page_kind}"
6063
cur_page = self.kv_move_buffer[page_index]
6164
pin_mem_indexes = self._buffer_mem_indexes_tensors[page_index][0 : len(mem_indexes)]
6265
pin_mem_indexes.numpy()[:] = mem_indexes
@@ -77,7 +80,10 @@ def read_page_kv_move_buffer_to_mem(
7780
dp_index: int,
7881
mem_managers: List["MemoryManager"],
7982
dp_world_size: int,
83+
page_kind: str = "kv",
84+
req_idx: int = None,
8085
):
86+
assert page_kind == "kv", f"{type(self).__name__} does not support page_kind={page_kind}"
8187
cur_page = self.kv_move_buffer[page_index]
8288
pin_mem_indexes = self._buffer_mem_indexes_tensors[page_index][0 : len(mem_indexes)]
8389
pin_mem_indexes.numpy()[:] = mem_indexes

lightllm/common/kv_cache_mem_manager/mem_manager.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,6 @@ def alloc_kv_move_buffer(self, max_req_total_len):
102102
return
103103

104104
def alloc_paged_kv_move_buffer(self, page_num, page_size) -> torch.Tensor:
105-
if isinstance(self, MemoryManager) and type(self) is not MemoryManager:
106-
raise NotImplementedError("subclass need reimpl this method")
107-
108105
num_kv_head = get_num_key_value_heads(get_env_start_args().model_dir)
109106
self.kv_move_buffer = torch.empty(
110107
(page_num, page_size, self.layer_num, 2 * num_kv_head, self.head_dim), dtype=self.dtype, device="cuda"
@@ -121,7 +118,10 @@ def write_mem_to_page_kv_move_buffer(
121118
dp_index: int,
122119
mem_managers: List["MemoryManager"],
123120
dp_world_size: int,
121+
page_kind: str = "kv",
122+
req_idx: int = None,
124123
):
124+
assert page_kind == "kv", f"{type(self).__name__} does not support page_kind={page_kind}"
125125
cur_page = self.kv_move_buffer[page_index]
126126
pin_mem_indexes = self._buffer_mem_indexes_tensors[page_index][0 : len(mem_indexes)]
127127
pin_mem_indexes.numpy()[:] = mem_indexes
@@ -150,7 +150,10 @@ def read_page_kv_move_buffer_to_mem(
150150
dp_index: int,
151151
mem_managers: List["MemoryManager"],
152152
dp_world_size: int,
153+
page_kind: str = "kv",
154+
req_idx: int = None,
153155
):
156+
assert page_kind == "kv", f"{type(self).__name__} does not support page_kind={page_kind}"
154157
cur_page = self.kv_move_buffer[page_index]
155158
pin_mem_indexes = self._buffer_mem_indexes_tensors[page_index][0 : len(mem_indexes)]
156159
pin_mem_indexes.numpy()[:] = mem_indexes

0 commit comments

Comments
 (0)