Added custom_meta to clear for all TransferQueueKVStorageClient

dpj135 · dpj135 · commit acd768603b11 · 2026-01-30T14:15:36.000+08:00
Signed-off-by: dpj135 &lt;958208521@qq.com&gt;
diff --git a/transfer_queue/storage/clients/base.py b/transfer_queue/storage/clients/base.py
@@ -44,7 +44,7 @@ def put(self, keys: list[str], values: list[Any]) -> Optional[list[Any]]:
         raise NotImplementedError("Subclasses must implement put")
 
     @abstractmethod
-    def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta=None) -> list[Any]:
+    def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta: Optional[list[Any]] = None) -> list[Any]:
         """
         Retrieve values from the storage backend by key.
         Args:
@@ -65,6 +65,6 @@ def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta=None) -> li
         raise NotImplementedError("Subclasses must implement get")
 
     @abstractmethod
-    def clear(self, keys: list[str]) -> None:
+    def clear(self, keys: list[str], custom_meta: Optional[list[Any]] = None) -> None:
         """Clear key-value pairs in the storage backend."""
         raise NotImplementedError("Subclasses must implement clear")
diff --git a/transfer_queue/storage/clients/mooncake_client.py b/transfer_queue/storage/clients/mooncake_client.py
@@ -139,7 +139,7 @@ def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta=None) -> li
             keys (List[str]): Keys to fetch.
             shapes (List[List[int]]): Expected tensor shapes (use [] for scalars).
             dtypes (List[Optional[torch.dtype]]): Expected dtypes; use None for non-tensor data.
-            custom_meta (List[str], optional): Device type (npu/cpu) for each key
+            custom_meta (List[Any], optional): ...
 
         Returns:
             List[Any]: Retrieved values in the same order as input keys.
@@ -216,11 +216,12 @@ def _batch_get_bytes(self, keys: list[str]) -> list[bytes]:
             results.extend(batch_results)
         return results
 
-    def clear(self, keys: list[str]):
+    def clear(self, keys: list[str], custom_meta=None):
         """Deletes multiple keys from MooncakeStore.
 
         Args:
             keys (List[str]): List of keys to remove.
+            custom_meta (List[Any], optional): ...
         """
         for key in keys:
             ret = self._store.remove(key)
diff --git a/transfer_queue/storage/clients/ray_storage_client.py b/transfer_queue/storage/clients/ray_storage_client.py
@@ -106,10 +106,11 @@ def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta=None) -> li
             raise RuntimeError(f"Failed to retrieve value for key '{keys}': {e}") from e
         return values
 
-    def clear(self, keys: list[str]):
+    def clear(self, keys: list[str], custom_meta=None):
         """
         Delete entries from storage by keys.
         Args:
             keys (list): List of keys to delete
+            custom_meta (List[Any], optional): ...
         """
         ray.get(self.storage_actor.clear_obj_ref.remote(keys))
diff --git a/transfer_queue/storage/clients/yuanrong_client.py b/transfer_queue/storage/clients/yuanrong_client.py
@@ -219,8 +219,8 @@ def supports_clear(self, custom_meta: str) -> bool:
 
     def clear(self, keys: list[str]):
         for i in range(0, len(keys), self.GET_CLEAR_KEYS_LIMIT):
-            batch = keys[i : i + self.GET_CLEAR_KEYS_LIMIT]
-            self._ds_client.delete(batch)
+            batch_keys = keys[i : i + self.GET_CLEAR_KEYS_LIMIT]
+            self._ds_client.delete(batch_keys)
 
     @staticmethod
     def calc_packed_size(items: list[memoryview]) -> int:
@@ -342,7 +342,7 @@ def __init__(self, config: dict[str, Any]):
         if not self._strategies:
             raise RuntimeError("No storage strategy available for YuanrongStorageClient")
 
-    def put(self, keys: list[str], values: list[Any]) -> Optional[list[Any]]:
+    def put(self, keys: list[str], values: list[Any]) -> list[str]:
         """Stores multiple key-value pairs to remote storage.
 
         Automatically routes NPU tensors to high-performance tensor storage,
@@ -353,15 +353,17 @@ def put(self, keys: list[str], values: list[Any]) -> Optional[list[Any]]:
             values (List[Any]): List of values to store (tensors, scalars, dicts, etc.).
 
         Returns:
-            List[Any]: custom metadata of YuanrongStorageCilent in the same order as input keys.
+            List[str]: custom metadata of YuanrongStorageCilent in the same order as input keys.
         """
         if not isinstance(keys, list) or not isinstance(values, list):
             raise ValueError("keys and values must be lists")
         if len(keys) != len(values):
             raise ValueError("Number of keys must match number of values")
 
         routed_indexes = self._route_to_strategies(values, lambda strategy_, item_: strategy_.supports_put(item_))
-        custom_metas = [None] * len(keys)
+        custom_metas: list[str] = [""] * len(keys)
+
+        # Todo(dpj): Parallel put
         for strategy, indexes in routed_indexes.items():
             if not indexes:
                 continue
@@ -382,7 +384,7 @@ def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta=None) -> li
             keys (List[str]): Keys to fetch.
             shapes (List[List[int]]): Expected tensor shapes (use [] for scalars).
             dtypes (List[Optional[torch.dtype]]): Expected dtypes; use None for non-tensor data.
-            custom_meta (List[str], optional): Device type (npu/cpu) for each key
+            custom_meta (List[str]): StorageStrategy type for each key
 
         Returns:
             List[Any]: Retrieved values in the same order as input keys.
@@ -414,13 +416,29 @@ def get(self, keys: list[str], shapes=None, dtypes=None, custom_meta=None) -> li
 
         return results
 
-    def clear(self, keys: list[str]):
+    def clear(self, keys: list[str], custom_meta=None):
         """Deletes multiple keys from remote storage.
 
         Args:
             keys (List[str]): List of keys to remove.
+            custom_meta (List[str]): StorageStrategy type for each key
         """
-        pass
+        if not isinstance(keys, list):
+            raise ValueError("keys must be a list")
+        if not isinstance(custom_meta, list):
+            raise ValueError("custom_meta must be a list if provided")
+        if len(custom_meta) != len(keys):
+            raise ValueError("custom_meta length must match keys")
+
+        routed_indexes = self._route_to_strategies(
+            custom_meta, lambda strategy_, item_: strategy_.supports_clear(item_)
+        )
+        # Todo(dpj): Parallel clear
+        for strategy, indexes in routed_indexes.items():
+            if not indexes:
+                continue
+            strategy_keys = [keys[i] for i in indexes]
+            strategy.clear(strategy_keys)
 
     def _route_to_strategies(
         self,
diff --git a/transfer_queue/storage/managers/base.py b/transfer_queue/storage/managers/base.py
@@ -553,6 +553,8 @@ async def put_data(self, data: TensorDict, metadata: BatchMeta) -> None:
         keys = self._generate_keys(data.keys(), metadata.global_indexes)
         values = self._generate_values(data)
         loop = asyncio.get_event_loop()
+
+        # put <keys, values> to storage backends
         custom_meta = await loop.run_in_executor(None, self.storage_client.put, keys, values)
 
         per_field_dtypes: dict[int, dict[str, Any]] = {}
@@ -628,4 +630,5 @@ async def clear_data(self, metadata: BatchMeta) -> None:
             logger.warning("Attempted to clear data, but metadata contains no fields.")
             return
         keys = self._generate_keys(metadata.field_names, metadata.global_indexes)
-        self.storage_client.clear(keys=keys)
+        _, _, custom_meta = self._get_shape_type_custom_meta_list(metadata)
+        self.storage_client.clear(keys=keys, custom_meta=custom_meta)