Skip to content

Commit bffb994

Browse files
committed
ci: pre-build FAISS indices in Docker image to fix ~60 min graph init
Graph initialisation during the docker-eval CI job was taking ~60 minutes because all 6 FAISS vector indices were rebuilt from scratch at container startup on every run (HuggingFace CPU inference over the full corpus, or Google Gemini embedding API exhausting its quota and retrying with 60 s minimum backoff). Root cause: - HybridRetrieverChain.create_hybrid_retriever() takes the slow embed_docs() path when faiss_db/<name> does not exist on disk. - The faiss_data named volume is empty on every CI run (docker compose down --volumes is called between jobs), so the indices are never reused. Fix: - Add backend/scripts/build_faiss.py: runs RetrieverTools.initialize() with EMBEDDINGS_TYPE=HF, FAST_MODE=true, and contextual_rerank=False at Docker build time, saving all 6 FAISS indices into the image layer. - Add a RUN step in the Dockerfile that calls the script after the dataset is downloaded. Docker layer caching means the step is skipped on re-runs where neither source nor data changed. - Set ENV EMBEDDINGS_TYPE=HF / HF_EMBEDDINGS=thenlper/gte-large as container defaults so runtime matches the pre-built indices (override in .env or via docker run -e if a different model is needed). - Add contextual_rerank: bool = True param to RetrieverTools.initialize() so the build script can skip loading the cross-encoder model, keeping the Docker build dependency-light. On first CI run with an empty faiss_data volume Docker copies the pre-built indices from the image into the volume automatically, so the container finds faiss_db/<name> at startup and takes the load_db() path instead. Graph init drops from ~60 min to a few seconds. Note: ensure backend/.env (or ci-secret.yaml) sets EMBEDDINGS_TYPE=HF to match the pre-built indices; using a different model at runtime causes a vector dimension mismatch. Signed-off-by: Jack Luar <jluar@precisioninno.com>
1 parent 052d03a commit bffb994

3 files changed

Lines changed: 76 additions & 6 deletions

File tree

backend/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,23 @@ RUN git clone https://huggingface.co/datasets/The-OpenROAD-Project/ORAssistant_R
2727
mv ORAssistant_RAG_Dataset/* data/ && \
2828
rm -rf ORAssistant_RAG_Dataset
2929

30+
# Build FAISS indices into the image using HuggingFace embeddings (no API quota).
31+
# At container startup HybridRetrieverChain detects faiss_db/ and loads from disk,
32+
# reducing graph init from ~60 min to a few seconds.
33+
# Override these ARGs to build with a different model when needed.
34+
ARG BUILD_EMBEDDINGS_TYPE=HF
35+
ARG BUILD_HF_MODEL=thenlper/gte-large
36+
RUN PYTHONPATH=/ORAssistant-backend \
37+
EMBEDDINGS_TYPE=${BUILD_EMBEDDINGS_TYPE} \
38+
HF_EMBEDDINGS=${BUILD_HF_MODEL} \
39+
FAST_MODE=true \
40+
uv run python scripts/build_faiss.py
41+
42+
# Set HF as the default embedding backend so runtime matches the pre-built indices.
43+
# Override at container runtime (docker run -e / docker-compose environment) if needed.
44+
ENV EMBEDDINGS_TYPE=HF
45+
ENV HF_EMBEDDINGS=thenlper/gte-large
46+
3047
EXPOSE 8000
3148

3249
CMD ["uv", "run", "uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]

backend/scripts/build_faiss.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Pre-build FAISS indices for all retriever chains at Docker image build time.
3+
4+
Run from /ORAssistant-backend (the WORKDIR in the Dockerfile) so that data/ is
5+
reachable via relative paths. Uses HuggingFace embeddings by default so no
6+
external API key or network quota is required during docker build.
7+
8+
The saved indices land in faiss_db/ inside the image. At container startup,
9+
HybridRetrieverChain.create_hybrid_retriever() detects existing directories and
10+
takes the load_db() path instead of re-embedding, dropping init time from ~60 min
11+
to a few seconds.
12+
13+
EMBEDDINGS_TYPE and HF_EMBEDDINGS are read from the environment so the Dockerfile
14+
ARG values propagate through cleanly.
15+
"""
16+
17+
import os
18+
import sys
19+
import logging
20+
21+
logging.basicConfig(level="INFO", format="%(levelname)s %(message)s")
22+
23+
if not os.path.isdir("data"):
24+
sys.exit("ERROR: run from backend directory — data/ not found")
25+
26+
embeddings_type = os.environ.get("EMBEDDINGS_TYPE", "HF")
27+
hf_embeddings = os.environ.get("HF_EMBEDDINGS", "thenlper/gte-large")
28+
fast_mode = os.environ.get("FAST_MODE", "true").lower() == "true"
29+
30+
if embeddings_type != "HF":
31+
sys.exit(
32+
f"ERROR: build_faiss.py only supports EMBEDDINGS_TYPE=HF, got '{embeddings_type}'"
33+
)
34+
35+
embeddings_config = {"type": embeddings_type, "name": hf_embeddings}
36+
37+
logging.info("Pre-building FAISS indices")
38+
logging.info(" model : %s", hf_embeddings)
39+
logging.info(" fast_mode : %s", fast_mode)
40+
41+
from src.agents.retriever_tools import RetrieverTools
42+
43+
tools = RetrieverTools()
44+
tools.initialize(
45+
embeddings_config=embeddings_config,
46+
reranking_model_name="",
47+
use_cuda=False,
48+
fast_mode=fast_mode,
49+
contextual_rerank=False,
50+
)
51+
52+
logging.info("FAISS pre-build complete")

backend/src/agents/retriever_tools.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def initialize(
4545
reranking_model_name: str,
4646
use_cuda: bool = False,
4747
fast_mode: bool = False,
48+
contextual_rerank: bool = True,
4849
) -> None:
4950
markdown_docs_map = {
5051
"general": [
@@ -97,7 +98,7 @@ def initialize(
9798
else markdown_docs_map["general"],
9899
other_docs_path=[] if fast_mode else ["./data/pdf"],
99100
weights=[0.6, 0.2, 0.2],
100-
contextual_rerank=True,
101+
contextual_rerank=contextual_rerank,
101102
search_k=search_k,
102103
chunk_size=chunk_size,
103104
)
@@ -112,7 +113,7 @@ def initialize(
112113
if fast_mode
113114
else markdown_docs_map["install"],
114115
weights=[0.6, 0.2, 0.2],
115-
contextual_rerank=True,
116+
contextual_rerank=contextual_rerank,
116117
search_k=search_k,
117118
chunk_size=chunk_size,
118119
)
@@ -128,7 +129,7 @@ def initialize(
128129
else markdown_docs_map["commands"],
129130
other_docs_path=[] if fast_mode else ["./data/pdf"],
130131
weights=[0.6, 0.2, 0.2],
131-
contextual_rerank=True,
132+
contextual_rerank=contextual_rerank,
132133
search_k=search_k,
133134
chunk_size=chunk_size,
134135
)
@@ -143,7 +144,7 @@ def initialize(
143144
if fast_mode
144145
else ["./data/html/yosys_docs"],
145146
weights=[0.6, 0.2, 0.2],
146-
contextual_rerank=True,
147+
contextual_rerank=contextual_rerank,
147148
search_k=search_k,
148149
chunk_size=chunk_size,
149150
)
@@ -158,7 +159,7 @@ def initialize(
158159
if fast_mode
159160
else ["./data/html/klayout_docs"],
160161
weights=[0.6, 0.2, 0.2],
161-
contextual_rerank=True,
162+
contextual_rerank=contextual_rerank,
162163
search_k=search_k,
163164
chunk_size=chunk_size,
164165
)
@@ -173,7 +174,7 @@ def initialize(
173174
if fast_mode
174175
else markdown_docs_map["errinfo"],
175176
weights=[0.6, 0.2, 0.2],
176-
contextual_rerank=True,
177+
contextual_rerank=contextual_rerank,
177178
search_k=search_k,
178179
chunk_size=chunk_size,
179180
)

0 commit comments

Comments
 (0)