apecloud
diff --git a/‎aperag/api/components/schemas/document.yaml‎
Lines changed: 38 additions & 0 deletions b/‎aperag/api/components/schemas/document.yaml‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎aperag/api/paths/collections.yaml‎
Lines changed: 42 additions & 1 deletion b/‎aperag/api/paths/collections.yaml‎
Lines changed: 42 additions & 1 deletion
diff --git a/‎aperag/views/main.py‎
Lines changed: 140 additions & 0 deletions b/‎aperag/views/main.py‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎frontend/src/api/api.ts‎
Lines changed: 1 addition & 0 deletions b/‎frontend/src/api/api.ts‎
Lines changed: 1 addition & 0 deletions
@@ -82,3 +82,41 @@ documentUpdate:
       type: string
     source:
       type: string
+
+# Add vector index details schemas at the end of the file
+documentVectorIndexDetails:
+  type: object
+  properties:
+    document_id:
+      type: string
+      description: Document ID
+    collection_id:
+      type: string
+      description: Collection ID
+    vector_count:
+      type: integer
+      description: Total number of vectors
+    vectors:
+      type: array
+      items:
+        $ref: '#/vectorIndexItem'
+      description: List of vectors
+
+vectorIndexItem:
+  type: object
+  properties:
+    id:
+      type: string
+      description: Vector ID
+    created_at:
+      type: integer
+      description: Creation timestamp
+    content:
+      type: string
+      description: Text content of the vector chunk
+    chunk_order_index:
+      type: integer
+      description: Order index of the chunk in the document
+    tokens:
+      type: integer
+      description: Number of tokens in the chunk
@@ -378,4 +378,45 @@ search_detail:
         content:
           application/json:
             schema:
-              $ref: '../components/schemas/common.yaml#/failResponse'
+              $ref: '../components/schemas/common.yaml#/failResponse'
+
+# Find the documents section and add a new path for vector index details
+  /collections/{collection_id}/documents/{document_id}/vector-index:
+    get:
+      tags:
+        - collections
+      summary: Get document vector index details
+      description: Get all vectors for a specific document from the vector database
+      operationId: getDocumentVectorIndex
+      parameters:
+        - name: collection_id
+          in: path
+          required: true
+          description: Collection ID
+          schema:
+            type: string
+        - name: document_id
+          in: path
+          required: true
+          description: Document ID
+          schema:
+            type: string
+      responses:
+        '200':
+          description: Document vector index details
+          content:
+            application/json:
+              schema:
+                $ref: '../components/schemas/document.yaml#/documentVectorIndexDetails'
+        '404':
+          description: Document not found
+          content:
+            application/json:
+              schema:
+                $ref: '../components/schemas/error.yaml#/error'
+        '500':
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: '../components/schemas/error.yaml#/error'
@@ -42,6 +42,9 @@
 # Import authentication dependencies
 from aperag.views.auth import UserManager, authenticate_websocket_user, current_user, get_user_manager
 
+from aperag.utils.utils import generate_vector_db_collection_name
+from config.vector_db import get_vector_db_connector
+
 logger = logging.getLogger(__name__)
 
 router = APIRouter()
@@ -147,6 +150,143 @@ async def delete_document_view(
     return await document_service.delete_document(str(user.id), collection_id, document_id)
 
 
+@router.get("/collections/{collection_id}/documents/{document_id}/vector-index")
+async def get_document_vector_index_view(
+    request: Request,
+    collection_id: str,
+    document_id: str,
+    user: User = Depends(current_user),
+):
+    """Get document vector index details"""
+    
+    try:
+        # Get collection and document
+        from aperag.db.ops import async_db_ops
+        
+        collection = await async_db_ops.query_collection_by_id(collection_id=collection_id)
+        if not collection:
+            raise HTTPException(status_code=404, detail="Collection not found")
+        
+        document = await async_db_ops.query_document_by_id(document_id=document_id)
+        if not document or document.collection_id != collection_id:
+            raise HTTPException(status_code=404, detail="Document not found")
+        
+        # Get vector index data from DocumentIndex
+        from aperag.db.models import DocumentIndex, DocumentIndexType
+        from aperag.config import get_async_session
+        from sqlalchemy import and_, select
+        
+        async with get_async_session() as session:
+            stmt = select(DocumentIndex).where(
+                and_(
+                    DocumentIndex.document_id == document_id,
+                    DocumentIndex.index_type == DocumentIndexType.VECTOR
+                )
+            )
+            result = await session.execute(stmt)
+            doc_index = result.scalar_one_or_none()
+            
+            if not doc_index or not doc_index.index_data:
+                return {
+                    "document_id": document_id,
+                    "collection_id": collection_id,
+                    "vector_count": 0,
+                    "vectors": []
+                }
+            
+            # Parse vector IDs from index data
+            import json
+            index_data = json.loads(doc_index.index_data)
+            ctx_ids = index_data.get("ctx", [])
+            
+            if not ctx_ids:
+                return {
+                    "document_id": document_id,
+                    "collection_id": collection_id,
+                    "vector_count": 0,
+                    "vectors": []
+                }
+            
+            # Get vector details from vector database
+            vector_store_adaptor = get_vector_db_connector(
+                collection=generate_vector_db_collection_name(collection_id=collection.id)
+            )
+            
+            # Get vector data by IDs
+            vector_details = []
+            try:
+                # Use the connector's get_by_ids method if available
+                if hasattr(vector_store_adaptor.connector, 'get_by_ids'):
+                    vector_data_list = await vector_store_adaptor.connector.get_by_ids(ctx_ids)
+                    
+                    for vector_data in vector_data_list:
+                        if vector_data:
+                            vector_details.append({
+                                "id": vector_data.get("id", ""),
+                                "created_at": vector_data.get("created_at"),
+                                "content": vector_data.get("content", ""),
+                                "chunk_order_index": vector_data.get("chunk_order_index"),
+                                "tokens": vector_data.get("tokens")
+                            })
+                else:
+                    # Fallback: get vectors one by one
+                    for ctx_id in ctx_ids:
+                        try:
+                            if hasattr(vector_store_adaptor.connector, 'get_by_id'):
+                                vector_data = await vector_store_adaptor.connector.get_by_id(ctx_id)
+                                if vector_data:
+                                    vector_details.append({
+                                        "id": vector_data.get("id", ctx_id),
+                                        "created_at": vector_data.get("created_at"),
+                                        "content": vector_data.get("content", ""),
+                                        "chunk_order_index": vector_data.get("chunk_order_index"),
+                                        "tokens": vector_data.get("tokens")
+                                    })
+                        except Exception as e:
+                            logger.warning(f"Failed to get vector data for {ctx_id}: {e}")
+                            # Add a minimal record for missing vectors
+                            vector_details.append({
+                                "id": ctx_id,
+                                "created_at": None,
+                                "content": "Vector data not available",
+                                "chunk_order_index": None,
+                                "tokens": None
+                            })
+            
+            except Exception as e:
+                logger.error(f"Failed to get vector details: {e}")
+                # Return basic info if we can't get details
+                vector_details = [
+                    {
+                        "id": ctx_id,
+                        "created_at": None,
+                        "content": "Vector data not available",
+                        "chunk_order_index": None,
+                        "tokens": None
+                    }
+                    for ctx_id in ctx_ids
+                ]
+            
+            # Sort by created_at if available, otherwise by chunk_order_index
+            vector_details.sort(key=lambda x: (
+                x.get("created_at") or 0,
+                x.get("chunk_order_index") or 0
+            ))
+            
+            return {
+                "document_id": document_id,
+                "collection_id": collection_id,
+                "vector_count": len(vector_details),
+                "vectors": vector_details
+            }
+    
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error getting document vector index: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
 @router.delete("/collections/{collection_id}/documents")
 @audit(resource_type="document", api_name="DeleteDocuments")
 async def delete_documents_view(
 
@@ -17,4 +17,5 @@
 export * from './apis/audit-api';
 export * from './apis/default-api';
 export * from './apis/llmapi';
+import { DocumentVectorIndexDetails } from './models';