Skip to content

Commit 2f9227a

Browse files
fix(backend): resolve five backend bugs reported in #259
Each fix ships with a regression test (RED->GREEN): - tools/process_pdf.py: return [] when a PDF raises PdfStreamError instead of falling through to an UnboundLocalError on the undefined `documents`. - vectorstores/faiss.py: honor the FAISS_DB_PATH environment variable in get_db_path() (previously the configured value was ignored). - agents/retriever_typing.py: add the missing `context_list` field to the AgentState schema so LangGraph propagates retrieved context downstream. - api/routers/helpers.py: derive the Gemini model from GOOGLE_GEMINI via a small mapping (default gemini-2.0-flash) instead of hardcoding it. - prompts/prompt_templates.py: fix the "avaiable" typo in summarise_prompt_template ("Sorry, it's not available..."). ruff format/check and mypy (strict) pass. Closes #259 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent ab50525 commit 2f9227a

10 files changed

Lines changed: 102 additions & 13 deletions

File tree

backend/src/agents/retriever_typing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
class AgentState(TypedDict):
77
messages: Annotated[list[AnyMessage], add_messages]
88
context: Annotated[list[AnyMessage], add_messages]
9+
context_list: Annotated[list[str], add_messages]
910
tools: list[str]
1011
sources: Annotated[list[str], add_messages]
1112
urls: Annotated[list[str], add_messages]

backend/src/api/routers/helpers.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,22 @@
1111
if not GOOGLE_API_KEY:
1212
raise RuntimeError("GOOGLE_API_KEY is not set")
1313

14-
model = "gemini-2.0-flash"
14+
_GEMINI_MODEL_MAP = {
15+
"2.0_flash": "gemini-2.0-flash",
16+
"2.5_flash": "gemini-2.5-flash",
17+
"2.5_pro": "gemini-2.5-pro",
18+
}
19+
20+
21+
def _resolve_gemini_model(gemini_version: str | None) -> str:
22+
"""Map a GOOGLE_GEMINI version string to a Gemini model name.
23+
24+
Falls back to ``gemini-2.0-flash`` when the value is unset or unknown.
25+
"""
26+
return _GEMINI_MODEL_MAP.get(gemini_version or "", "gemini-2.0-flash")
27+
28+
29+
model = _resolve_gemini_model(os.getenv("GOOGLE_GEMINI"))
1530
client = OpenAI(
1631
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
1732
api_key=GOOGLE_API_KEY,

backend/src/prompts/prompt_templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
You must not ask the user to refer to the context in any part of your answer.
1212
You must not ask the user to refer to a link that is not a part of your answer.
1313
14-
If there is nothing in the context relevant to the question, simply say "Sorry its not avaiable in my knowledge base."
14+
If there is nothing in the context relevant to the question, simply say "Sorry, it's not available in my knowledge base."
1515
Do not try to make up an answer.
1616
Anything between the following `context` html blocks is retrieved from a knowledge bank, not part of the conversation with the user.
1717

backend/src/tools/process_pdf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def process_pdf_docs(file_path: str) -> list[Document]:
3131
documents = loader.load_and_split(text_splitter=text_splitter)
3232
except PdfStreamError:
3333
logging.error(f"Error processing PDF: {file_path} is corrupted or incomplete.")
34+
return []
3435

3536
for doc in documents:
3637
try:

backend/src/vectorstores/faiss.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ def add_documents(
214214
return None
215215

216216
def get_db_path(self) -> str:
217+
env_path = os.getenv("FAISS_DB_PATH")
218+
if env_path:
219+
return os.path.abspath(env_path)
217220
cur_path = os.path.abspath(__file__)
218221
path = os.path.join(cur_path, "../../../", "faiss_db")
219222
path = os.path.abspath(path) # Ensure proper parent directory

backend/tests/test_api_helpers.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,22 @@ def test_constants_defined(self):
8181
assert model == "gemini-2.0-flash"
8282
# GOOGLE_API_KEY should be set or raise error during module import
8383

84+
def test_resolve_gemini_model_maps_known_versions(self):
85+
"""_resolve_gemini_model maps GOOGLE_GEMINI values to model names (issue #259)."""
86+
from src.api.routers.helpers import _resolve_gemini_model
87+
88+
assert _resolve_gemini_model("2.0_flash") == "gemini-2.0-flash"
89+
assert _resolve_gemini_model("2.5_flash") == "gemini-2.5-flash"
90+
assert _resolve_gemini_model("2.5_pro") == "gemini-2.5-pro"
91+
92+
def test_resolve_gemini_model_defaults_for_unknown_or_unset(self):
93+
"""_resolve_gemini_model falls back to gemini-2.0-flash for unset/unknown values."""
94+
from src.api.routers.helpers import _resolve_gemini_model
95+
96+
assert _resolve_gemini_model(None) == "gemini-2.0-flash"
97+
assert _resolve_gemini_model("") == "gemini-2.0-flash"
98+
assert _resolve_gemini_model("nonexistent") == "gemini-2.0-flash"
99+
84100
def test_router_configuration(self):
85101
"""Test that router is properly configured."""
86102
from src.api.routers.helpers import router

backend/tests/test_faiss_vectorstore.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,18 +184,35 @@ def test_add_md_docs_invalid_folder_paths(self):
184184
db.add_md_docs(folder_paths="not_a_list")
185185

186186
def test_get_db_path(self):
187-
"""Test get_db_path returns correct path."""
187+
"""get_db_path falls back to the default ./faiss_db when FAISS_DB_PATH is unset."""
188188
with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf:
189189
mock_hf.return_value = Mock()
190190

191191
db = FAISSVectorDatabase(
192192
embeddings_type="HF", embeddings_model_name="test-model"
193193
)
194194

195-
path = db.get_db_path()
195+
with patch.dict(os.environ, {}, clear=False):
196+
os.environ.pop("FAISS_DB_PATH", None)
197+
path = db.get_db_path()
198+
196199
assert path.endswith("faiss_db")
197200
assert os.path.isabs(path)
198201

202+
def test_get_db_path_respects_env_var(self):
203+
"""get_db_path honors the FAISS_DB_PATH environment variable (issue #259)."""
204+
with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf:
205+
mock_hf.return_value = Mock()
206+
207+
db = FAISSVectorDatabase(
208+
embeddings_type="HF", embeddings_model_name="test-model"
209+
)
210+
211+
with patch.dict(os.environ, {"FAISS_DB_PATH": "custom_dir/faiss_index"}):
212+
path = db.get_db_path()
213+
214+
assert path == os.path.abspath("custom_dir/faiss_index")
215+
199216
def test_save_db_without_documents_raises_error(self):
200217
"""Test save_db raises error when no documents in database."""
201218
with patch("src.vectorstores.faiss.HuggingFaceEmbeddings") as mock_hf:

backend/tests/test_process_pdf.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,27 @@ def test_process_pdf_docs_multiple_pages(self, mock_file, mock_loader):
7777
assert all(doc.metadata["url"] == "https://example1.com" for doc in result)
7878
assert all(doc.metadata["source"] == "doc1.pdf" for doc in result)
7979

80-
# Note: Commented out due to bug in process_pdf_docs function
81-
# The function doesn't properly handle PdfStreamError - it logs but then
82-
# tries to use undefined 'documents' variable
83-
# @patch('src.tools.process_pdf.logging')
84-
# @patch('src.tools.process_pdf.PyPDFLoader')
85-
# @patch('builtins.open', new_callable=mock_open, read_data='{"corrupted.pdf": "https://example.com"}')
86-
# def test_process_pdf_docs_corrupted_file(self, mock_file, mock_loader, mock_logging):
87-
# """Test PDF processing with corrupted file."""
88-
# pass
80+
@patch("src.tools.process_pdf.PyPDFLoader")
81+
@patch(
82+
"builtins.open",
83+
new_callable=mock_open,
84+
read_data='{"corrupted.pdf": "https://example.com"}',
85+
)
86+
def test_process_pdf_docs_corrupted_file_returns_empty(
87+
self, mock_file, mock_loader
88+
):
89+
"""A corrupted PDF (PdfStreamError) should return [] instead of crashing."""
90+
from pypdf.errors import PdfStreamError
91+
92+
mock_loader_instance = Mock()
93+
mock_loader_instance.load_and_split.side_effect = PdfStreamError(
94+
"corrupted stream"
95+
)
96+
mock_loader.return_value = mock_loader_instance
97+
98+
result = process_pdf_docs("./corrupted.pdf")
99+
100+
assert result == []
89101

90102
@patch("src.tools.process_pdf.logging")
91103
@patch("src.tools.process_pdf.PyPDFLoader")

backend/tests/test_prompt_templates.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,13 @@ def test_suggested_questions_prompt_template_is_string(self):
4848
assert isinstance(suggested_questions_prompt_template, str)
4949
assert suggested_questions_prompt_template != ""
5050
assert suggested_questions_prompt_template is not None
51+
52+
def test_summarise_prompt_template_has_no_typo(self):
53+
"""summarise_prompt_template should use correct spelling (issue #259)."""
54+
from src.prompts.prompt_templates import summarise_prompt_template
55+
56+
assert "avaiable" not in summarise_prompt_template
57+
assert (
58+
"Sorry, it's not available in my knowledge base."
59+
in summarise_prompt_template
60+
)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from src.agents.retriever_typing import AgentState
2+
3+
4+
class TestAgentState:
5+
"""Test suite for the AgentState TypedDict."""
6+
7+
def test_agent_state_includes_context_list(self):
8+
"""AgentState must declare context_list so LangGraph propagates it (issue #259).
9+
10+
ToolNode.get_node returns a ``context_list`` key, but LangGraph drops any
11+
state key that is not declared on the graph's state schema. Without this
12+
annotation the retrieved context list is silently lost downstream.
13+
"""
14+
assert "context_list" in AgentState.__annotations__

0 commit comments

Comments
 (0)