Skip to content

Commit b55cc8e

Browse files
authored
fix: batch FAISS embedding to avoid 429 rate limit exhaustion (#289)
Split document embedding into 100-chunk batches with a 1s delay between batches so a 429 only retries one batch (~1 API call) rather than restarting FAISS.from_documents from scratch (~87 calls). Also raise retry wait times from max 120s to max 600s to give the quota time to reset before the next attempt. Signed-off-by: Jack Luar <jluar@precisioninno.com>
1 parent 8a1258a commit b55cc8e

1 file changed

Lines changed: 11 additions & 3 deletions

File tree

backend/src/vectorstores/faiss.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import time
23
import logging
34
from typing import Optional, Union
45
from dotenv import load_dotenv
@@ -74,13 +75,13 @@ def faiss_db(self) -> Optional[FAISS]:
7475

7576
@retry(
7677
stop=stop_after_attempt(5),
77-
wait=wait_exponential(multiplier=2, min=10, max=120),
78+
wait=wait_exponential(multiplier=2, min=60, max=600),
7879
retry=retry_if_exception(
7980
lambda e: "RESOURCE_EXHAUSTED" in str(e) or "429" in str(e)
8081
),
8182
reraise=True,
8283
)
83-
def _add_to_db(self, documents: list[Document]) -> None:
84+
def _embed_and_add(self, documents: list[Document]) -> None:
8485
if self._faiss_db is None:
8586
self._faiss_db = FAISS.from_documents(
8687
documents=documents,
@@ -90,6 +91,13 @@ def _add_to_db(self, documents: list[Document]) -> None:
9091
else:
9192
self._faiss_db.add_documents(documents)
9293

94+
def _add_to_db(self, documents: list[Document], batch_size: int = 100) -> None:
95+
for i in range(0, len(documents), batch_size):
96+
batch = documents[i : i + batch_size]
97+
self._embed_and_add(batch)
98+
if i + batch_size < len(documents):
99+
time.sleep(1)
100+
93101
def add_md_docs(
94102
self, folder_paths: list[str], chunk_size: int = 500, return_docs: bool = False
95103
) -> Optional[list[Document]]:
@@ -229,7 +237,7 @@ def get_documents(self) -> list[Document]:
229237

230238
@retry(
231239
stop=stop_after_attempt(5),
232-
wait=wait_exponential(multiplier=2, min=10, max=120),
240+
wait=wait_exponential(multiplier=2, min=60, max=600),
233241
retry=retry_if_exception(
234242
lambda e: "RESOURCE_EXHAUSTED" in str(e) or "429" in str(e)
235243
),

0 commit comments

Comments
 (0)