|
20 | 20 | from ..lib.age_client import AGEClient |
21 | 21 | from ..lib.garage import get_source_storage, get_image_storage |
22 | 22 | from ..lib.similarity_calculator import cosine_similarity |
23 | | -from ..dependencies.auth import get_current_active_user |
| 23 | +from ..dependencies.auth import get_current_active_user, CurrentUser, require_permission |
24 | 24 | from ..models.auth import UserInDB |
25 | 25 |
|
26 | 26 | router = APIRouter(prefix="/documents", tags=["documents"]) |
@@ -107,6 +107,14 @@ class DocumentListResponse(BaseModel): |
107 | 107 | offset: int |
108 | 108 |
|
109 | 109 |
|
| 110 | +class DocumentDeleteResponse(BaseModel): |
| 111 | + """Response from document deletion.""" |
| 112 | + document_id: str |
| 113 | + deleted: bool |
| 114 | + sources_deleted: int |
| 115 | + orphaned_concepts_deleted: int |
| 116 | + |
| 117 | + |
110 | 118 | class DocumentConceptItem(BaseModel): |
111 | 119 | """Concept extracted from a document.""" |
112 | 120 | concept_id: str |
@@ -999,3 +1007,171 @@ async def get_document_concepts_bulk( |
999 | 1007 | raise HTTPException(status_code=500, detail=f"Failed to get bulk concepts: {str(e)}") |
1000 | 1008 | finally: |
1001 | 1009 | client.close() |
| 1010 | + |
| 1011 | + |
| 1012 | +# ============================================================================ |
| 1013 | +# Document Deletion |
| 1014 | +# ============================================================================ |
| 1015 | + |
| 1016 | +@router.delete("/{document_id}", response_model=DocumentDeleteResponse) |
| 1017 | +async def delete_document( |
| 1018 | + document_id: str, |
| 1019 | + current_user: CurrentUser, |
| 1020 | + _: None = Depends(require_permission("sources", "delete")), |
| 1021 | +): |
| 1022 | + """Delete a document and cascade-remove orphaned concepts. |
| 1023 | +
|
| 1024 | + Deletes all data associated with a single document: |
| 1025 | + - Source nodes (chunks) linked via DocumentMeta |
| 1026 | + - Instance nodes linked to those sources |
| 1027 | + - source_embeddings records |
| 1028 | + - Garage storage objects (source documents and images) |
| 1029 | + - The DocumentMeta node itself |
| 1030 | + - Orphaned Concept nodes (concepts with no remaining sources) |
| 1031 | +
|
| 1032 | + Follows the same cascade pattern as DELETE /ontology/{name} |
| 1033 | + but scoped to a single document. |
| 1034 | +
|
| 1035 | + Authorization: Requires sources:delete permission. |
| 1036 | + """ |
| 1037 | + from ..services.job_queue import get_job_queue |
| 1038 | + |
| 1039 | + client = AGEClient() |
| 1040 | + queue = get_job_queue() |
| 1041 | + try: |
| 1042 | + # Find the DocumentMeta node and its sources |
| 1043 | + doc_meta = client._execute_cypher(""" |
| 1044 | + MATCH (d:DocumentMeta {document_id: $document_id}) |
| 1045 | + RETURN d.document_id as document_id, |
| 1046 | + d.filename as filename, |
| 1047 | + d.ontology as ontology |
| 1048 | + """, params={"document_id": document_id}, fetch_one=True) |
| 1049 | + |
| 1050 | + if not doc_meta: |
| 1051 | + raise HTTPException(status_code=404, detail=f"Document '{document_id}' not found") |
| 1052 | + |
| 1053 | + ontology_name = doc_meta.get("ontology") |
| 1054 | + filename = doc_meta.get("filename", document_id) |
| 1055 | + logger.info(f"Deleting document '{filename}' (id={document_id}) from ontology '{ontology_name}'") |
| 1056 | + |
| 1057 | + # Capture source_ids before deletion |
| 1058 | + source_ids_result = client._execute_cypher(""" |
| 1059 | + MATCH (d:DocumentMeta {document_id: $document_id})-[:HAS_SOURCE]->(s:Source) |
| 1060 | + RETURN s.source_id as source_id, s.storage_key as storage_key |
| 1061 | + """, params={"document_id": document_id}) |
| 1062 | + |
| 1063 | + source_ids = [r["source_id"] for r in (source_ids_result or []) if r.get("source_id")] |
| 1064 | + storage_keys = [r["storage_key"] for r in (source_ids_result or []) if r.get("storage_key")] |
| 1065 | + |
| 1066 | + # Clean up Garage storage objects |
| 1067 | + try: |
| 1068 | + # Delete images (via storage_key on Source nodes) |
| 1069 | + if storage_keys: |
| 1070 | + image_storage = get_image_storage() |
| 1071 | + for key in storage_keys: |
| 1072 | + try: |
| 1073 | + image_storage.delete(key) |
| 1074 | + except Exception as e: |
| 1075 | + logger.warning(f"Failed to delete Garage image {key}: {e}") |
| 1076 | + |
| 1077 | + # Delete source documents |
| 1078 | + try: |
| 1079 | + source_storage = get_source_storage() |
| 1080 | + for sid in source_ids: |
| 1081 | + try: |
| 1082 | + source_storage.delete(sid) |
| 1083 | + except Exception as e: |
| 1084 | + logger.warning(f"Failed to delete source doc {sid} from Garage: {e}") |
| 1085 | + except Exception as e: |
| 1086 | + logger.warning(f"Failed to initialize source storage: {e}") |
| 1087 | + |
| 1088 | + except Exception as e: |
| 1089 | + logger.warning(f"Failed to initialize Garage for cleanup: {e}") |
| 1090 | + |
| 1091 | + # Delete Instance nodes linked to this document's sources |
| 1092 | + client._execute_cypher(""" |
| 1093 | + MATCH (d:DocumentMeta {document_id: $document_id})-[:HAS_SOURCE]->(s:Source) |
| 1094 | + MATCH (i:Instance)-[:FROM_SOURCE]->(s) |
| 1095 | + DETACH DELETE i |
| 1096 | + """, params={"document_id": document_id}) |
| 1097 | + |
| 1098 | + # Delete Source nodes |
| 1099 | + result = client._execute_cypher(""" |
| 1100 | + MATCH (d:DocumentMeta {document_id: $document_id})-[:HAS_SOURCE]->(s:Source) |
| 1101 | + DETACH DELETE s |
| 1102 | + RETURN count(s) as deleted_count |
| 1103 | + """, params={"document_id": document_id}, fetch_one=True) |
| 1104 | + |
| 1105 | + sources_deleted = result["deleted_count"] if result else 0 |
| 1106 | + |
| 1107 | + # Delete source_embeddings |
| 1108 | + if source_ids: |
| 1109 | + conn = None |
| 1110 | + try: |
| 1111 | + conn = client.pool.getconn() |
| 1112 | + with conn.cursor() as cur: |
| 1113 | + cur.execute(""" |
| 1114 | + DELETE FROM kg_api.source_embeddings |
| 1115 | + WHERE source_id = ANY(%s) |
| 1116 | + """, (source_ids,)) |
| 1117 | + conn.commit() |
| 1118 | + except Exception as e: |
| 1119 | + logger.warning(f"Failed to delete source embeddings: {e}") |
| 1120 | + finally: |
| 1121 | + if conn: |
| 1122 | + client.pool.putconn(conn) |
| 1123 | + |
| 1124 | + # Delete the DocumentMeta node |
| 1125 | + client._execute_cypher(""" |
| 1126 | + MATCH (d:DocumentMeta {document_id: $document_id}) |
| 1127 | + DETACH DELETE d |
| 1128 | + """, params={"document_id": document_id}) |
| 1129 | + |
| 1130 | + # Clean up orphaned concepts (concepts with no remaining sources) |
| 1131 | + orphaned_result = client._execute_cypher(""" |
| 1132 | + MATCH (c:Concept) |
| 1133 | + WHERE NOT EXISTS { MATCH (c)-[:APPEARS]->(:Source) } |
| 1134 | + DETACH DELETE c |
| 1135 | + RETURN count(c) as orphaned_count |
| 1136 | + """, fetch_one=True) |
| 1137 | + |
| 1138 | + orphaned_count = orphaned_result["orphaned_count"] if orphaned_result else 0 |
| 1139 | + |
| 1140 | + # Delete job records for this document |
| 1141 | + try: |
| 1142 | + conn = None |
| 1143 | + conn = client.pool.getconn() |
| 1144 | + with conn.cursor() as cur: |
| 1145 | + cur.execute(""" |
| 1146 | + DELETE FROM kg_api.jobs |
| 1147 | + WHERE source_filename = %s AND ontology = %s |
| 1148 | + """, (filename, ontology_name)) |
| 1149 | + conn.commit() |
| 1150 | + except Exception as e: |
| 1151 | + logger.warning(f"Failed to delete job records: {e}") |
| 1152 | + finally: |
| 1153 | + if conn: |
| 1154 | + client.pool.putconn(conn) |
| 1155 | + |
| 1156 | + logger.info( |
| 1157 | + f"Deleted document '{filename}': " |
| 1158 | + f"{sources_deleted} sources, {orphaned_count} orphaned concepts" |
| 1159 | + ) |
| 1160 | + |
| 1161 | + # Refresh graph epoch so caches (FUSE, etc.) detect the change |
| 1162 | + client.refresh_epoch() |
| 1163 | + |
| 1164 | + return DocumentDeleteResponse( |
| 1165 | + document_id=document_id, |
| 1166 | + deleted=True, |
| 1167 | + sources_deleted=sources_deleted, |
| 1168 | + orphaned_concepts_deleted=orphaned_count, |
| 1169 | + ) |
| 1170 | + |
| 1171 | + except HTTPException: |
| 1172 | + raise |
| 1173 | + except Exception as e: |
| 1174 | + logger.error(f"Failed to delete document: {e}", exc_info=True) |
| 1175 | + raise HTTPException(status_code=500, detail=f"Failed to delete document: {str(e)}") |
| 1176 | + finally: |
| 1177 | + client.close() |
0 commit comments