Skip to content

Commit 6a22262

Browse files
committed
feat: add document dedup by name in CloudBackend
1 parent bac2792 commit 6a22262

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

pageindex/backend/cloud.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from __future__ import annotations
77
import json
88
import logging
9+
import os
910
import re
1011
import time
1112
import urllib.parse
@@ -125,6 +126,14 @@ def delete_collection(self, name: str) -> None:
125126
# ── Document management ───────────────────────────────────────────────
126127

127128
def add_document(self, collection: str, file_path: str) -> str:
129+
file_name = os.path.basename(file_path)
130+
131+
# Dedup: check if a document with the same name already exists
132+
existing_docs = self.list_documents(collection)
133+
for doc in existing_docs:
134+
if doc.get("doc_name") == file_name:
135+
return doc["doc_id"]
136+
128137
folder_id = self._get_folder_id(collection)
129138
data = {"if_retrieval": "true"}
130139
if folder_id:

0 commit comments

Comments
 (0)