Skip to content

Commit 92768de

Browse files
committed
feat: enhance vector index details functionality
- Added new API endpoint to retrieve vector index details for documents. - Updated document schema to include vector index details. - Integrated vector index details into the frontend with a new drawer component for displaying vector information. - Added translations for vector index details in English and Chinese.
1 parent 359215a commit 92768de

9 files changed

Lines changed: 496 additions & 2 deletions

File tree

aperag/api/components/schemas/document.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,41 @@ documentUpdate:
8282
type: string
8383
source:
8484
type: string
85+
86+
# Add vector index details schemas at the end of the file
87+
documentVectorIndexDetails:
88+
type: object
89+
properties:
90+
document_id:
91+
type: string
92+
description: Document ID
93+
collection_id:
94+
type: string
95+
description: Collection ID
96+
vector_count:
97+
type: integer
98+
description: Total number of vectors
99+
vectors:
100+
type: array
101+
items:
102+
$ref: '#/vectorIndexItem'
103+
description: List of vectors
104+
105+
vectorIndexItem:
106+
type: object
107+
properties:
108+
id:
109+
type: string
110+
description: Vector ID
111+
created_at:
112+
type: integer
113+
description: Creation timestamp
114+
content:
115+
type: string
116+
description: Text content of the vector chunk
117+
chunk_order_index:
118+
type: integer
119+
description: Order index of the chunk in the document
120+
tokens:
121+
type: integer
122+
description: Number of tokens in the chunk

aperag/api/paths/collections.yaml

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,4 +378,45 @@ search_detail:
378378
content:
379379
application/json:
380380
schema:
381-
$ref: '../components/schemas/common.yaml#/failResponse'
381+
$ref: '../components/schemas/common.yaml#/failResponse'
382+
383+
# Find the documents section and add a new path for vector index details
384+
/collections/{collection_id}/documents/{document_id}/vector-index:
385+
get:
386+
tags:
387+
- collections
388+
summary: Get document vector index details
389+
description: Get all vectors for a specific document from the vector database
390+
operationId: getDocumentVectorIndex
391+
parameters:
392+
- name: collection_id
393+
in: path
394+
required: true
395+
description: Collection ID
396+
schema:
397+
type: string
398+
- name: document_id
399+
in: path
400+
required: true
401+
description: Document ID
402+
schema:
403+
type: string
404+
responses:
405+
'200':
406+
description: Document vector index details
407+
content:
408+
application/json:
409+
schema:
410+
$ref: '../components/schemas/document.yaml#/documentVectorIndexDetails'
411+
'404':
412+
description: Document not found
413+
content:
414+
application/json:
415+
schema:
416+
$ref: '../components/schemas/error.yaml#/error'
417+
'500':
418+
description: Internal server error
419+
content:
420+
application/json:
421+
schema:
422+
$ref: '../components/schemas/error.yaml#/error'

aperag/views/main.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
# Import authentication dependencies
4343
from aperag.views.auth import UserManager, authenticate_websocket_user, current_user, get_user_manager
4444

45+
from aperag.utils.utils import generate_vector_db_collection_name
46+
from config.vector_db import get_vector_db_connector
47+
4548
logger = logging.getLogger(__name__)
4649

4750
router = APIRouter()
@@ -147,6 +150,143 @@ async def delete_document_view(
147150
return await document_service.delete_document(str(user.id), collection_id, document_id)
148151

149152

153+
@router.get("/collections/{collection_id}/documents/{document_id}/vector-index")
154+
async def get_document_vector_index_view(
155+
request: Request,
156+
collection_id: str,
157+
document_id: str,
158+
user: User = Depends(current_user),
159+
):
160+
"""Get document vector index details"""
161+
162+
try:
163+
# Get collection and document
164+
from aperag.db.ops import async_db_ops
165+
166+
collection = await async_db_ops.query_collection_by_id(collection_id=collection_id)
167+
if not collection:
168+
raise HTTPException(status_code=404, detail="Collection not found")
169+
170+
document = await async_db_ops.query_document_by_id(document_id=document_id)
171+
if not document or document.collection_id != collection_id:
172+
raise HTTPException(status_code=404, detail="Document not found")
173+
174+
# Get vector index data from DocumentIndex
175+
from aperag.db.models import DocumentIndex, DocumentIndexType
176+
from aperag.config import get_async_session
177+
from sqlalchemy import and_, select
178+
179+
async with get_async_session() as session:
180+
stmt = select(DocumentIndex).where(
181+
and_(
182+
DocumentIndex.document_id == document_id,
183+
DocumentIndex.index_type == DocumentIndexType.VECTOR
184+
)
185+
)
186+
result = await session.execute(stmt)
187+
doc_index = result.scalar_one_or_none()
188+
189+
if not doc_index or not doc_index.index_data:
190+
return {
191+
"document_id": document_id,
192+
"collection_id": collection_id,
193+
"vector_count": 0,
194+
"vectors": []
195+
}
196+
197+
# Parse vector IDs from index data
198+
import json
199+
index_data = json.loads(doc_index.index_data)
200+
ctx_ids = index_data.get("ctx", [])
201+
202+
if not ctx_ids:
203+
return {
204+
"document_id": document_id,
205+
"collection_id": collection_id,
206+
"vector_count": 0,
207+
"vectors": []
208+
}
209+
210+
# Get vector details from vector database
211+
vector_store_adaptor = get_vector_db_connector(
212+
collection=generate_vector_db_collection_name(collection_id=collection.id)
213+
)
214+
215+
# Get vector data by IDs
216+
vector_details = []
217+
try:
218+
# Use the connector's get_by_ids method if available
219+
if hasattr(vector_store_adaptor.connector, 'get_by_ids'):
220+
vector_data_list = await vector_store_adaptor.connector.get_by_ids(ctx_ids)
221+
222+
for vector_data in vector_data_list:
223+
if vector_data:
224+
vector_details.append({
225+
"id": vector_data.get("id", ""),
226+
"created_at": vector_data.get("created_at"),
227+
"content": vector_data.get("content", ""),
228+
"chunk_order_index": vector_data.get("chunk_order_index"),
229+
"tokens": vector_data.get("tokens")
230+
})
231+
else:
232+
# Fallback: get vectors one by one
233+
for ctx_id in ctx_ids:
234+
try:
235+
if hasattr(vector_store_adaptor.connector, 'get_by_id'):
236+
vector_data = await vector_store_adaptor.connector.get_by_id(ctx_id)
237+
if vector_data:
238+
vector_details.append({
239+
"id": vector_data.get("id", ctx_id),
240+
"created_at": vector_data.get("created_at"),
241+
"content": vector_data.get("content", ""),
242+
"chunk_order_index": vector_data.get("chunk_order_index"),
243+
"tokens": vector_data.get("tokens")
244+
})
245+
except Exception as e:
246+
logger.warning(f"Failed to get vector data for {ctx_id}: {e}")
247+
# Add a minimal record for missing vectors
248+
vector_details.append({
249+
"id": ctx_id,
250+
"created_at": None,
251+
"content": "Vector data not available",
252+
"chunk_order_index": None,
253+
"tokens": None
254+
})
255+
256+
except Exception as e:
257+
logger.error(f"Failed to get vector details: {e}")
258+
# Return basic info if we can't get details
259+
vector_details = [
260+
{
261+
"id": ctx_id,
262+
"created_at": None,
263+
"content": "Vector data not available",
264+
"chunk_order_index": None,
265+
"tokens": None
266+
}
267+
for ctx_id in ctx_ids
268+
]
269+
270+
# Sort by created_at if available, otherwise by chunk_order_index
271+
vector_details.sort(key=lambda x: (
272+
x.get("created_at") or 0,
273+
x.get("chunk_order_index") or 0
274+
))
275+
276+
return {
277+
"document_id": document_id,
278+
"collection_id": collection_id,
279+
"vector_count": len(vector_details),
280+
"vectors": vector_details
281+
}
282+
283+
except HTTPException:
284+
raise
285+
except Exception as e:
286+
logger.error(f"Error getting document vector index: {e}")
287+
raise HTTPException(status_code=500, detail="Internal server error")
288+
289+
150290
@router.delete("/collections/{collection_id}/documents")
151291
@audit(resource_type="document", api_name="DeleteDocuments")
152292
async def delete_documents_view(

frontend/src/api/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@
1717
export * from './apis/audit-api';
1818
export * from './apis/default-api';
1919
export * from './apis/llmapi';
20+
import { DocumentVectorIndexDetails } from './models';
2021

0 commit comments

Comments
 (0)