Skip to content

Commit 43a5e4a

Browse files
authored
fix: increase batch delete size and optimize document loading with async (#440)
1 parent 4aeb9c4 commit 43a5e4a

File tree

3 files changed

+4
-3
lines changed

3 files changed

+4
-3
lines changed

runtime/datamate-python/app/module/rag/infra/vectorstore/store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
logger = logging.getLogger(__name__)
2828

29-
BATCH_DELETE_SIZE = 100
29+
BATCH_DELETE_SIZE = 500
3030

3131

3232
def _delete_chunks_by_rag_file_id_batched(client, collection_name: str, rag_file_id: str) -> int:

runtime/datamate-python/app/module/rag/service/common/batch_processor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ async def _store_single_batch(
8080
documents, doc_ids = chunks_to_documents(batch_chunks, ids=ids)
8181

8282
try:
83-
vectorstore.add_documents(documents=documents, ids=doc_ids)
83+
# 使用异步方法避免阻塞事件循环
84+
await vectorstore.aadd_documents(documents=documents, ids=doc_ids)
8485
logger.info("批次 %d-%d 存储成功", batch_start + 1, batch_end)
8586
except Exception as e:
8687
logger.error(

runtime/datamate-python/app/module/rag/service/file_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ async def _process_single_graph_file(
153153
await self._mark_failed(db, file_repo, str(rag_file.id), "文件不存在")
154154
return
155155

156-
documents = load_documents(file_path)
156+
documents = await asyncio.to_thread(load_documents, file_path)
157157
if not documents:
158158
await self._mark_failed(db, file_repo, str(rag_file.id), "文件解析失败,未生成文档")
159159
return

0 commit comments

Comments
 (0)