apecloud
diff --git a/‎aperag/db/repositories/lightrag.py‎
Lines changed: 20 additions & 0 deletions b/‎aperag/db/repositories/lightrag.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎aperag/graph/lightrag/kg/neo4j_sync_impl.py‎
Lines changed: 1 addition & 1 deletion b/‎aperag/graph/lightrag/kg/neo4j_sync_impl.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aperag/graph/lightrag/kg/postgres_sync_impl.py‎
Lines changed: 64 additions & 2 deletions b/‎aperag/graph/lightrag/kg/postgres_sync_impl.py‎
Lines changed: 64 additions & 2 deletions
diff --git a/‎aperag/graph/lightrag/kg/qdrant_impl.py‎
Lines changed: 16 additions & 8 deletions b/‎aperag/graph/lightrag/kg/qdrant_impl.py‎
Lines changed: 16 additions & 8 deletions
@@ -878,3 +878,23 @@ def _query(session):
             return [row[0] for row in result.fetchall()]
 
         return self._execute_query(_query)
+
+    def query_lightrag_vdb_entity_all(self, workspace: str):
+        """Query all LightRAG VDB Entity records for workspace"""
+
+        def _query(session):
+            stmt = select(LightRAGVDBEntityModel).where(LightRAGVDBEntityModel.workspace == workspace)
+            result = session.execute(stmt)
+            return {entity.id: entity for entity in result.scalars().all()}
+
+        return self._execute_query(_query)
+
+    def query_lightrag_vdb_relation_all(self, workspace: str):
+        """Query all LightRAG VDB Relation records for workspace"""
+
+        def _query(session):
+            stmt = select(LightRAGVDBRelationModel).where(LightRAGVDBRelationModel.workspace == workspace)
+            result = session.execute(stmt)
+            return {relation.id: relation for relation in result.scalars().all()}
+
+        return self._execute_query(_query)
@@ -82,7 +82,7 @@ async def initialize(self):
         # Prepare database in thread to avoid blocking
         self._DATABASE = await asyncio.to_thread(Neo4jSyncConnectionManager.prepare_database, self.workspace)
 
-        logger.info(f"Neo4JSyncStorage initialized for workspace '{self.workspace}', database '{self._DATABASE}'")
+        logger.debug(f"Neo4JSyncStorage initialized for workspace '{self.workspace}', database '{self._DATABASE}'")
 
     async def finalize(self):
         """Clean up resources."""
 
@@ -53,7 +53,7 @@ class PGOpsSyncKVStorage(BaseKVStorage):
 
     async def initialize(self):
         """Initialize storage."""
-        logger.info(f"PGOpsSyncKVStorage initialized for workspace '{self.workspace}'")
+        logger.debug(f"PGOpsSyncKVStorage initialized for workspace '{self.workspace}'")
 
     async def finalize(self):
         """Clean up resources."""
@@ -240,12 +240,74 @@ class PGOpsSyncVectorStorage(BaseVectorStorage):
 
     async def initialize(self):
         """Initialize storage."""
-        logger.info(f"PGOpsSyncVectorStorage initialized for workspace '{self.workspace}'")
+        logger.debug(f"PGOpsSyncVectorStorage initialized for workspace '{self.workspace}'")
 
     async def finalize(self):
         """Clean up resources."""
         logger.debug(f"PGOpsSyncVectorStorage finalized for workspace '{self.workspace}'")
 
+    async def get_all(self) -> dict[str, Any]:
+        """Get all data from vector storage"""
+
+        def _sync_get_all():
+            # Import here to avoid circular imports
+            from aperag.db.ops import db_ops
+            from aperag.graph.lightrag.namespace import NameSpace, is_namespace
+
+            # Determine which table to query based on namespace
+            if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS):
+                models = db_ops.query_lightrag_doc_chunks_all(self.workspace)
+                return {
+                    chunk_id: {
+                        "id": chunk_id,
+                        "tokens": model.tokens,
+                        "content": model.content or "",
+                        "chunk_order_index": model.chunk_order_index,
+                        "full_doc_id": model.full_doc_id,
+                        "content_vector": model.content_vector,
+                        "file_path": model.file_path,
+                        "created_at": int(model.create_time.timestamp()) if model.create_time else None,
+                    }
+                    for chunk_id, model in models.items()
+                }
+            elif is_namespace(self.namespace, NameSpace.VECTOR_STORE_ENTITIES):
+                models = db_ops.query_lightrag_vdb_entity_all(self.workspace)
+                return {
+                    entity_id: {
+                        "id": entity_id,
+                        "entity_name": model.entity_name,
+                        "content": model.content or "",
+                        "content_vector": model.content_vector,
+                        "chunk_ids": model.chunk_ids or [],
+                        "file_path": model.file_path,
+                        "created_at": int(model.create_time.timestamp()) if model.create_time else None,
+                    }
+                    for entity_id, model in models.items()
+                }
+            elif is_namespace(self.namespace, NameSpace.VECTOR_STORE_RELATIONSHIPS):
+                models = db_ops.query_lightrag_vdb_relation_all(self.workspace)
+                return {
+                    relation_id: {
+                        "id": relation_id,
+                        "source_id": model.source_id,
+                        "target_id": model.target_id,
+                        "content": model.content or "",
+                        "content_vector": model.content_vector,
+                        "chunk_ids": model.chunk_ids or [],
+                        "file_path": model.file_path,
+                        "created_at": int(model.create_time.timestamp()) if model.create_time else None,
+                        # Add additional fields that might be expected
+                        "src_id": model.source_id,
+                        "tgt_id": model.target_id,
+                    }
+                    for relation_id, model in models.items()
+                }
+            else:
+                logger.error(f"Unknown namespace for get_all: {self.namespace}")
+                return {}
+
+        return await asyncio.to_thread(_sync_get_all)
+
     def _prepare_vector_data(self, item: dict[str, Any], current_time: datetime.datetime) -> dict[str, Any]:
         """Prepare vector data based on namespace."""
         from aperag.graph.lightrag.namespace import NameSpace, is_namespace
 
@@ -45,19 +45,27 @@
 from ..utils import logger
 
 
-def compute_mdhash_id_for_qdrant(content: str, prefix: str = "", style: str = "simple") -> str:
+def compute_mdhash_id_for_qdrant(content: str, prefix: str = "", workspace: str = "", style: str = "simple") -> str:
     """
-    Generate a UUID based on the content and support multiple formats.
+    Generate a UUID based on the content with workspace isolation and support multiple formats.
 
     :param content: The content used to generate the UUID.
+    :param prefix: The prefix to add to the hash
+    :param workspace: The workspace identifier for data isolation
     :param style: The format of the UUID, optional values are "simple", "hyphenated", "urn".
     :return: A UUID that meets the requirements of Qdrant.
     """
     if not content:
         raise ValueError("Content must not be empty.")
 
+    # Combine content with workspace to ensure isolation
+    if workspace:
+        hash_input = f"{workspace}::{content}"
+    else:
+        hash_input = content
+
     # Use the hash value of the content to create a UUID.
-    hashed_content = hashlib.sha256((prefix + content).encode("utf-8")).digest()
+    hashed_content = hashlib.sha256((prefix + hash_input).encode("utf-8")).digest()
     generated_uuid = uuid.UUID(bytes=hashed_content[:16], version=4)
 
     # Return the UUID according to the specified format.
@@ -122,7 +130,7 @@ async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
         for i, d in enumerate(list_data):
             list_points.append(
                 models.PointStruct(
-                    id=compute_mdhash_id_for_qdrant(d["id"]),
+                    id=compute_mdhash_id_for_qdrant(d["id"], workspace=self.workspace),
                     vector=embeddings[i],
                     payload=d,
                 )
@@ -160,7 +168,7 @@ async def delete(self, ids: List[str]) -> None:
         """
         try:
             # Convert regular ids to Qdrant compatible ids
-            qdrant_ids = [compute_mdhash_id_for_qdrant(id) for id in ids]
+            qdrant_ids = [compute_mdhash_id_for_qdrant(id, workspace=self.workspace) for id in ids]
             # Delete points from the collection
             self._client.delete(
                 collection_name=self._collection_name,
@@ -181,7 +189,7 @@ async def delete_entity(self, entity_name: str) -> None:
         """
         try:
             # Generate the entity ID
-            entity_id = compute_mdhash_id_for_qdrant(entity_name, prefix="ent-")
+            entity_id = compute_mdhash_id_for_qdrant(entity_name, prefix="ent-", workspace=self.workspace)
             logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
 
             # Delete the entity point from the collection
@@ -246,7 +254,7 @@ async def get_by_id(self, id: str) -> dict[str, Any] | None:
         """
         try:
             # Convert to Qdrant compatible ID
-            qdrant_id = compute_mdhash_id_for_qdrant(id)
+            qdrant_id = compute_mdhash_id_for_qdrant(id, workspace=self.workspace)
 
             # Retrieve the point by ID
             result = self._client.retrieve(
@@ -282,7 +290,7 @@ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
 
         try:
             # Convert to Qdrant compatible IDs
-            qdrant_ids = [compute_mdhash_id_for_qdrant(id) for id in ids]
+            qdrant_ids = [compute_mdhash_id_for_qdrant(id, workspace=self.workspace) for id in ids]
 
             # Retrieve the points by IDs
             results = self._client.retrieve(