Skip to content

Commit c2ef99c

Browse files
committed
chore: merge quick task worktree (worktree-agent-a8ad88bb)
2 parents 75bdd80 + 8357f38 commit c2ef99c

6 files changed

Lines changed: 126 additions & 12 deletions

File tree

.planning/quick/260416-u2r-fix-review-findings-4-important-i-1-get-/260416-u2r-SUMMARY.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,7 @@ Verified by direct inspection:
153153
- `grep -rn detected_python_source src tests` returns zero matches.
154154
- `pyproject.toml` shows `beautifulsoup4` and `markdownify` only under `[project.optional-dependencies].build`.
155155
- Runtime simulation confirmed `_ensure_build_deps()` raises actionable `ImportError` with the install hint when both deps are missing.
156+
157+
## Round 3 Ratifications
158+
159+
**I-3 (case-insensitive symbol lookup) — `COLLATE NOCASE` vs `normalized_name`.** Round 3 review flagged that the Task 9 fix in `retrieval/ranker.py::lookup_symbols_exact` uses `WHERE qualified_name = ? COLLATE NOCASE` rather than querying the already-populated `normalized_name` column. The two approaches are correctness-equivalent: `normalized_name` stores the lower-cased form of `qualified_name`, so `WHERE normalized_name = LOWER(?)` would return the same result set. Neither column has an index covering this exact-match scan today (`symbols` only indexes `(doc_set_id, qualified_name)`-style composites), so neither approach pays a planned-performance penalty over the other in v0.1.0 — the symbol fast-path is already gated on the query being a short dotted name, which keeps the scan row-count tiny in practice. Adding `CREATE INDEX idx_symbols_normalized ON symbols(normalized_name)` plus switching the query to `LOWER(?)` is a clean v1.1 change once real symbol-lookup query volume is measured; the schema already reserves the column, so there's no migration cost. **Disposition:** ratified as-is for v0.1.0, deferred to v1.1.

src/mcp_server_python_docs/ingestion/publish.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,9 @@ def publish_index(
353353
("failed", "\n".join(messages), run_id),
354354
)
355355
conn.commit()
356+
# Round 3: finalize WAL here too so the failed build_db leaves no
357+
# -wal/-shm sidecars even though we're not swapping it in.
358+
finalize_for_swap(conn)
356359
logger.error("Smoke tests failed — not publishing")
357360
return False
358361

src/mcp_server_python_docs/services/content.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def get_docs(
5252
with contextlib.closing(
5353
self._db.execute(
5454
"""
55-
SELECT d.id, d.title, d.slug
55+
SELECT d.id, d.title, d.slug, d.content_text
5656
FROM documents d
5757
JOIN doc_sets ds ON d.doc_set_id = ds.id
5858
WHERE d.slug = ? AND ds.version = ?
@@ -110,7 +110,10 @@ def get_docs(
110110
section_rows = cursor.fetchall()
111111

112112
if not section_rows:
113-
full_text = ""
113+
# I-1 (Round 3): fall back to the document-level content_text when
114+
# no sections exist (e.g. symbol-only builds). Keeps the empty-string
115+
# behavior only when content_text itself is NULL/empty.
116+
full_text = doc_row["content_text"] or ""
114117
else:
115118
parts = []
116119
for row in section_rows:

src/mcp_server_python_docs/services/search.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,14 @@ def search(
7676
expanded = expand_synonyms(query, self._synonyms)
7777
self._last_synonym_expanded = expanded != original_tokens
7878

79-
# Classify query for routing (RETR-04)
80-
query_type = classify_query(query, self._symbol_exists)
79+
# Classify query for routing (RETR-04).
80+
# M-5 (Round 3): only classify when the result will actually be consumed —
81+
# the symbol fast-path below only triggers for kind in ("auto", "symbol"),
82+
# so skip the DB round-trip entirely for "section"/"example"/"page".
83+
if kind in ("auto", "symbol"):
84+
query_type = classify_query(query, self._symbol_exists)
85+
else:
86+
query_type = "fts"
8187

8288
# Symbol fast-path: skip FTS5 entirely
8389
if kind == "symbol" or (kind == "auto" and query_type == "symbol"):

tests/test_publish.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,56 @@ def test_publish_index_second_build_replaces_cleanly(self, tmp_path, monkeypatch
332332
assert not wal_sidecars, f"WAL sidecar leaked after 2nd build: {entries}"
333333
assert not shm_sidecars, f"SHM sidecar leaked after 2nd build: {entries}"
334334

335+
def test_publish_index_leaves_no_wal_sidecars_on_smoke_failure(
336+
self, tmp_path, monkeypatch
337+
):
338+
"""Round 3: smoke-test failure path must also finalize WAL.
339+
340+
Forces run_smoke_tests to return (False, [...]) and asserts publish_index
341+
returns False AND leaves no -wal/-shm sidecars in the cache dir.
342+
"""
343+
from mcp_server_python_docs.ingestion import publish as publish_mod
344+
345+
target_index = tmp_path / "index.db"
346+
monkeypatch.setattr(
347+
"mcp_server_python_docs.storage.db.get_cache_dir",
348+
lambda: tmp_path,
349+
)
350+
monkeypatch.setattr(
351+
"mcp_server_python_docs.storage.db.get_index_path",
352+
lambda: target_index,
353+
)
354+
monkeypatch.setattr(publish_mod, "get_index_path", lambda: target_index)
355+
356+
# Force smoke test failure regardless of DB contents.
357+
monkeypatch.setattr(
358+
publish_mod,
359+
"run_smoke_tests",
360+
lambda *args, **kwargs: (False, ["FAIL: forced for test"]),
361+
)
362+
363+
build_db = tmp_path / "build-smoke-fail.db"
364+
self._seed_passing_build(build_db)
365+
366+
assert publish_mod.publish_index(build_db, "3.13") is False
367+
368+
# No WAL/SHM sidecars anywhere in tmp_path.
369+
wal_sidecars = [
370+
p.name for p in tmp_path.iterdir() if p.name.endswith("-wal")
371+
]
372+
shm_sidecars = [
373+
p.name for p in tmp_path.iterdir() if p.name.endswith("-shm")
374+
]
375+
assert not wal_sidecars, (
376+
f"WAL sidecar leaked on failure path: {wal_sidecars}"
377+
)
378+
assert not shm_sidecars, (
379+
f"SHM sidecar leaked on failure path: {shm_sidecars}"
380+
)
381+
382+
# index.db must not have been created (atomic_swap was not reached).
383+
assert not target_index.exists()
384+
335385

336386
class TestReadOnlyConnection:
337387
def test_can_query_existing_db(self, tmp_path):

tests/test_services.py

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,50 @@ def test_search_synonym_expansion_tracking(self, populated_with_content):
119119
svc.search("http", kind="section")
120120
assert svc._last_synonym_expanded is True
121121

122+
def test_search_does_not_classify_for_non_symbol_kinds(
123+
self, populated_with_content, monkeypatch
124+
):
125+
"""M-5 (Round 3): for kind in ('section','example','page') the service
126+
must not invoke classify_query / _symbol_exists at all — the DB
127+
round-trip is pure waste when the result will never be consumed."""
128+
from unittest.mock import MagicMock
129+
130+
svc = SearchService(populated_with_content, {})
131+
mock_symbol_exists = MagicMock(return_value=True)
132+
monkeypatch.setattr(svc, "_symbol_exists", mock_symbol_exists)
133+
134+
svc.search(query="socket", kind="section", max_results=5)
135+
mock_symbol_exists.assert_not_called()
136+
137+
svc.search(query="socket", kind="example", max_results=5)
138+
mock_symbol_exists.assert_not_called()
139+
140+
svc.search(query="socket", kind="page", max_results=5)
141+
mock_symbol_exists.assert_not_called()
142+
143+
def test_search_classifies_for_auto_and_symbol_kinds(
144+
self, populated_with_content, monkeypatch
145+
):
146+
"""M-5 (Round 3) positive control: for kind in ('auto','symbol') the
147+
service still invokes classify_query / _symbol_exists — the gate must
148+
not regress the fast-path routing."""
149+
from unittest.mock import MagicMock
150+
151+
svc = SearchService(populated_with_content, {})
152+
mock_symbol_exists = MagicMock(return_value=False)
153+
monkeypatch.setattr(svc, "_symbol_exists", mock_symbol_exists)
154+
155+
# 'socket' is a lowercase identifier that matches _MODULE_PATTERN and
156+
# passes the length>=2 short-circuit, so classify_query WILL call
157+
# symbol_exists_fn. Dotted queries take the dot branch without calling
158+
# the fn, so we use a single-word identifier instead.
159+
svc.search(query="socket", kind="auto", max_results=5)
160+
assert mock_symbol_exists.called
161+
162+
mock_symbol_exists.reset_mock()
163+
svc.search(query="socket", kind="symbol", max_results=5)
164+
assert mock_symbol_exists.called
165+
122166

123167
# === ContentService Tests ===
124168

@@ -181,13 +225,16 @@ def test_get_docs_default_version(self, populated_with_content):
181225
result = svc.get_docs(slug="library/asyncio-task.html")
182226
assert result.version == "3.13"
183227

184-
def test_get_docs_returns_empty_content_for_symbols_only_doc(self, populated_db):
185-
"""I-1: a document row with no sections returns empty content, not a raise.
228+
def test_get_docs_falls_back_to_document_content_text_when_no_sections(
229+
self, populated_db
230+
):
231+
"""I-1 (Round 3): when a document has zero sections, get_docs must fall
232+
back to the documents.content_text column rather than returning empty.
186233
187234
Scenario: symbol-only builds (or pathological ingestion) can end up with a
188235
documents row whose sections table has no matching rows. The service must
189-
return a structured GetDocsResult with content='', char_count=0,
190-
truncated=False — not raise PageNotFoundError.
236+
return a structured GetDocsResult whose content is the document-level
237+
content_text, not the empty string.
191238
"""
192239
db = populated_db
193240
row = db.execute("SELECT id FROM doc_sets LIMIT 1").fetchone()
@@ -196,19 +243,20 @@ def test_get_docs_returns_empty_content_for_symbols_only_doc(self, populated_db)
196243
"UPDATE doc_sets SET built_at = '2026-04-16T00:00:00' WHERE id = ?",
197244
(doc_set_id,),
198245
)
199-
# Seed a document with empty content and ZERO sections.
246+
# Seed a document with 'hello world' content and ZERO sections.
200247
db.execute(
201248
"INSERT INTO documents (doc_set_id, uri, slug, title, content_text, char_count) "
202-
"VALUES (?, 'library/empty.html', 'library/empty.html', 'Empty Page', '', 0)",
249+
"VALUES (?, 'library/empty.html', 'library/empty.html', 'Empty Page', "
250+
"'hello world', 11)",
203251
(doc_set_id,),
204252
)
205253
db.commit()
206254

207255
svc = ContentService(db)
208256
result = svc.get_docs(slug="library/empty.html")
209257
assert isinstance(result, GetDocsResult)
210-
assert result.content == ""
211-
assert result.char_count == 0
258+
assert result.content == "hello world"
259+
assert result.char_count == 11
212260
assert result.truncated is False
213261
assert result.next_start_index is None
214262
assert result.anchor is None

0 commit comments

Comments
 (0)