|
42 | 42 | # Import authentication dependencies |
43 | 43 | from aperag.views.auth import UserManager, authenticate_websocket_user, current_user, get_user_manager |
44 | 44 |
|
| 45 | +from aperag.utils.utils import generate_vector_db_collection_name |
| 46 | +from config.vector_db import get_vector_db_connector |
| 47 | + |
45 | 48 | logger = logging.getLogger(__name__) |
46 | 49 |
|
47 | 50 | router = APIRouter() |
@@ -147,6 +150,143 @@ async def delete_document_view( |
147 | 150 | return await document_service.delete_document(str(user.id), collection_id, document_id) |
148 | 151 |
|
149 | 152 |
|
| 153 | +@router.get("/collections/{collection_id}/documents/{document_id}/vector-index") |
| 154 | +async def get_document_vector_index_view( |
| 155 | + request: Request, |
| 156 | + collection_id: str, |
| 157 | + document_id: str, |
| 158 | + user: User = Depends(current_user), |
| 159 | +): |
| 160 | + """Get document vector index details""" |
| 161 | + |
| 162 | + try: |
| 163 | + # Get collection and document |
| 164 | + from aperag.db.ops import async_db_ops |
| 165 | + |
| 166 | + collection = await async_db_ops.query_collection_by_id(collection_id=collection_id) |
| 167 | + if not collection: |
| 168 | + raise HTTPException(status_code=404, detail="Collection not found") |
| 169 | + |
| 170 | + document = await async_db_ops.query_document_by_id(document_id=document_id) |
| 171 | + if not document or document.collection_id != collection_id: |
| 172 | + raise HTTPException(status_code=404, detail="Document not found") |
| 173 | + |
| 174 | + # Get vector index data from DocumentIndex |
| 175 | + from aperag.db.models import DocumentIndex, DocumentIndexType |
| 176 | + from aperag.config import get_async_session |
| 177 | + from sqlalchemy import and_, select |
| 178 | + |
| 179 | + async with get_async_session() as session: |
| 180 | + stmt = select(DocumentIndex).where( |
| 181 | + and_( |
| 182 | + DocumentIndex.document_id == document_id, |
| 183 | + DocumentIndex.index_type == DocumentIndexType.VECTOR |
| 184 | + ) |
| 185 | + ) |
| 186 | + result = await session.execute(stmt) |
| 187 | + doc_index = result.scalar_one_or_none() |
| 188 | + |
| 189 | + if not doc_index or not doc_index.index_data: |
| 190 | + return { |
| 191 | + "document_id": document_id, |
| 192 | + "collection_id": collection_id, |
| 193 | + "vector_count": 0, |
| 194 | + "vectors": [] |
| 195 | + } |
| 196 | + |
| 197 | + # Parse vector IDs from index data |
| 198 | + import json |
| 199 | + index_data = json.loads(doc_index.index_data) |
| 200 | + ctx_ids = index_data.get("ctx", []) |
| 201 | + |
| 202 | + if not ctx_ids: |
| 203 | + return { |
| 204 | + "document_id": document_id, |
| 205 | + "collection_id": collection_id, |
| 206 | + "vector_count": 0, |
| 207 | + "vectors": [] |
| 208 | + } |
| 209 | + |
| 210 | + # Get vector details from vector database |
| 211 | + vector_store_adaptor = get_vector_db_connector( |
| 212 | + collection=generate_vector_db_collection_name(collection_id=collection.id) |
| 213 | + ) |
| 214 | + |
| 215 | + # Get vector data by IDs |
| 216 | + vector_details = [] |
| 217 | + try: |
| 218 | + # Use the connector's get_by_ids method if available |
| 219 | + if hasattr(vector_store_adaptor.connector, 'get_by_ids'): |
| 220 | + vector_data_list = await vector_store_adaptor.connector.get_by_ids(ctx_ids) |
| 221 | + |
| 222 | + for vector_data in vector_data_list: |
| 223 | + if vector_data: |
| 224 | + vector_details.append({ |
| 225 | + "id": vector_data.get("id", ""), |
| 226 | + "created_at": vector_data.get("created_at"), |
| 227 | + "content": vector_data.get("content", ""), |
| 228 | + "chunk_order_index": vector_data.get("chunk_order_index"), |
| 229 | + "tokens": vector_data.get("tokens") |
| 230 | + }) |
| 231 | + else: |
| 232 | + # Fallback: get vectors one by one |
| 233 | + for ctx_id in ctx_ids: |
| 234 | + try: |
| 235 | + if hasattr(vector_store_adaptor.connector, 'get_by_id'): |
| 236 | + vector_data = await vector_store_adaptor.connector.get_by_id(ctx_id) |
| 237 | + if vector_data: |
| 238 | + vector_details.append({ |
| 239 | + "id": vector_data.get("id", ctx_id), |
| 240 | + "created_at": vector_data.get("created_at"), |
| 241 | + "content": vector_data.get("content", ""), |
| 242 | + "chunk_order_index": vector_data.get("chunk_order_index"), |
| 243 | + "tokens": vector_data.get("tokens") |
| 244 | + }) |
| 245 | + except Exception as e: |
| 246 | + logger.warning(f"Failed to get vector data for {ctx_id}: {e}") |
| 247 | + # Add a minimal record for missing vectors |
| 248 | + vector_details.append({ |
| 249 | + "id": ctx_id, |
| 250 | + "created_at": None, |
| 251 | + "content": "Vector data not available", |
| 252 | + "chunk_order_index": None, |
| 253 | + "tokens": None |
| 254 | + }) |
| 255 | + |
| 256 | + except Exception as e: |
| 257 | + logger.error(f"Failed to get vector details: {e}") |
| 258 | + # Return basic info if we can't get details |
| 259 | + vector_details = [ |
| 260 | + { |
| 261 | + "id": ctx_id, |
| 262 | + "created_at": None, |
| 263 | + "content": "Vector data not available", |
| 264 | + "chunk_order_index": None, |
| 265 | + "tokens": None |
| 266 | + } |
| 267 | + for ctx_id in ctx_ids |
| 268 | + ] |
| 269 | + |
| 270 | + # Sort by created_at if available, otherwise by chunk_order_index |
| 271 | + vector_details.sort(key=lambda x: ( |
| 272 | + x.get("created_at") or 0, |
| 273 | + x.get("chunk_order_index") or 0 |
| 274 | + )) |
| 275 | + |
| 276 | + return { |
| 277 | + "document_id": document_id, |
| 278 | + "collection_id": collection_id, |
| 279 | + "vector_count": len(vector_details), |
| 280 | + "vectors": vector_details |
| 281 | + } |
| 282 | + |
| 283 | + except HTTPException: |
| 284 | + raise |
| 285 | + except Exception as e: |
| 286 | + logger.error(f"Error getting document vector index: {e}") |
| 287 | + raise HTTPException(status_code=500, detail="Internal server error") |
| 288 | + |
| 289 | + |
150 | 290 | @router.delete("/collections/{collection_id}/documents") |
151 | 291 | @audit(resource_type="document", api_name="DeleteDocuments") |
152 | 292 | async def delete_documents_view( |
|
0 commit comments