chore: merge quick task worktree (worktree-agent-a8ad88bb)

ayhammouda · ayhammouda · commit c2ef99cd33a0 · 2026-04-16T22:31:45.000+02:00
diff --git a/.planning/quick/260416-u2r-fix-review-findings-4-important-i-1-get-/260416-u2r-SUMMARY.md b/.planning/quick/260416-u2r-fix-review-findings-4-important-i-1-get-/260416-u2r-SUMMARY.md
@@ -153,3 +153,7 @@ Verified by direct inspection:
 - `grep -rn detected_python_source src tests` returns zero matches.
 - `pyproject.toml` shows `beautifulsoup4` and `markdownify` only under `[project.optional-dependencies].build`.
 - Runtime simulation confirmed `_ensure_build_deps()` raises actionable `ImportError` with the install hint when both deps are missing.
+
+## Round 3 Ratifications
+
+**I-3 (case-insensitive symbol lookup) — `COLLATE NOCASE` vs `normalized_name`.** Round 3 review flagged that the Task 9 fix in `retrieval/ranker.py::lookup_symbols_exact` uses `WHERE qualified_name = ? COLLATE NOCASE` rather than querying the already-populated `normalized_name` column. The two approaches are correctness-equivalent: `normalized_name` stores the lower-cased form of `qualified_name`, so `WHERE normalized_name = LOWER(?)` would return the same result set. Neither column has an index covering this exact-match scan today (`symbols` only indexes `(doc_set_id, qualified_name)`-style composites), so neither approach pays a planned-performance penalty over the other in v0.1.0 — the symbol fast-path is already gated on the query being a short dotted name, which keeps the scan row-count tiny in practice. Adding `CREATE INDEX idx_symbols_normalized ON symbols(normalized_name)` plus switching the query to `LOWER(?)` is a clean v1.1 change once real symbol-lookup query volume is measured; the schema already reserves the column, so there's no migration cost. **Disposition:** ratified as-is for v0.1.0, deferred to v1.1.
diff --git a/src/mcp_server_python_docs/ingestion/publish.py b/src/mcp_server_python_docs/ingestion/publish.py
@@ -353,6 +353,9 @@ def publish_index(
                 ("failed", "\n".join(messages), run_id),
             )
             conn.commit()
+            # Round 3: finalize WAL here too so the failed build_db leaves no
+            # -wal/-shm sidecars even though we're not swapping it in.
+            finalize_for_swap(conn)
             logger.error("Smoke tests failed — not publishing")
             return False
 
diff --git a/src/mcp_server_python_docs/services/content.py b/src/mcp_server_python_docs/services/content.py
@@ -52,7 +52,7 @@ def get_docs(
         with contextlib.closing(
             self._db.execute(
                 """
-                SELECT d.id, d.title, d.slug
+                SELECT d.id, d.title, d.slug, d.content_text
                 FROM documents d
                 JOIN doc_sets ds ON d.doc_set_id = ds.id
                 WHERE d.slug = ? AND ds.version = ?
@@ -110,7 +110,10 @@ def get_docs(
                 section_rows = cursor.fetchall()
 
             if not section_rows:
-                full_text = ""
+                # I-1 (Round 3): fall back to the document-level content_text when
+                # no sections exist (e.g. symbol-only builds). Keeps the empty-string
+                # behavior only when content_text itself is NULL/empty.
+                full_text = doc_row["content_text"] or ""
             else:
                 parts = []
                 for row in section_rows:
diff --git a/src/mcp_server_python_docs/services/search.py b/src/mcp_server_python_docs/services/search.py
@@ -76,8 +76,14 @@ def search(
         expanded = expand_synonyms(query, self._synonyms)
         self._last_synonym_expanded = expanded != original_tokens
 
-        # Classify query for routing (RETR-04)
-        query_type = classify_query(query, self._symbol_exists)
+        # Classify query for routing (RETR-04).
+        # M-5 (Round 3): only classify when the result will actually be consumed —
+        # the symbol fast-path below only triggers for kind in ("auto", "symbol"),
+        # so skip the DB round-trip entirely for "section"/"example"/"page".
+        if kind in ("auto", "symbol"):
+            query_type = classify_query(query, self._symbol_exists)
+        else:
+            query_type = "fts"
 
         # Symbol fast-path: skip FTS5 entirely
         if kind == "symbol" or (kind == "auto" and query_type == "symbol"):
diff --git a/tests/test_publish.py b/tests/test_publish.py
@@ -332,6 +332,56 @@ def test_publish_index_second_build_replaces_cleanly(self, tmp_path, monkeypatch
         assert not wal_sidecars, f"WAL sidecar leaked after 2nd build: {entries}"
         assert not shm_sidecars, f"SHM sidecar leaked after 2nd build: {entries}"
 
+    def test_publish_index_leaves_no_wal_sidecars_on_smoke_failure(
+        self, tmp_path, monkeypatch
+    ):
+        """Round 3: smoke-test failure path must also finalize WAL.
+
+        Forces run_smoke_tests to return (False, [...]) and asserts publish_index
+        returns False AND leaves no -wal/-shm sidecars in the cache dir.
+        """
+        from mcp_server_python_docs.ingestion import publish as publish_mod
+
+        target_index = tmp_path / "index.db"
+        monkeypatch.setattr(
+            "mcp_server_python_docs.storage.db.get_cache_dir",
+            lambda: tmp_path,
+        )
+        monkeypatch.setattr(
+            "mcp_server_python_docs.storage.db.get_index_path",
+            lambda: target_index,
+        )
+        monkeypatch.setattr(publish_mod, "get_index_path", lambda: target_index)
+
+        # Force smoke test failure regardless of DB contents.
+        monkeypatch.setattr(
+            publish_mod,
+            "run_smoke_tests",
+            lambda *args, **kwargs: (False, ["FAIL: forced for test"]),
+        )
+
+        build_db = tmp_path / "build-smoke-fail.db"
+        self._seed_passing_build(build_db)
+
+        assert publish_mod.publish_index(build_db, "3.13") is False
+
+        # No WAL/SHM sidecars anywhere in tmp_path.
+        wal_sidecars = [
+            p.name for p in tmp_path.iterdir() if p.name.endswith("-wal")
+        ]
+        shm_sidecars = [
+            p.name for p in tmp_path.iterdir() if p.name.endswith("-shm")
+        ]
+        assert not wal_sidecars, (
+            f"WAL sidecar leaked on failure path: {wal_sidecars}"
+        )
+        assert not shm_sidecars, (
+            f"SHM sidecar leaked on failure path: {shm_sidecars}"
+        )
+
+        # index.db must not have been created (atomic_swap was not reached).
+        assert not target_index.exists()
+
 
 class TestReadOnlyConnection:
     def test_can_query_existing_db(self, tmp_path):
diff --git a/tests/test_services.py b/tests/test_services.py
@@ -119,6 +119,50 @@ def test_search_synonym_expansion_tracking(self, populated_with_content):
         svc.search("http", kind="section")
         assert svc._last_synonym_expanded is True
 
+    def test_search_does_not_classify_for_non_symbol_kinds(
+        self, populated_with_content, monkeypatch
+    ):
+        """M-5 (Round 3): for kind in ('section','example','page') the service
+        must not invoke classify_query / _symbol_exists at all — the DB
+        round-trip is pure waste when the result will never be consumed."""
+        from unittest.mock import MagicMock
+
+        svc = SearchService(populated_with_content, {})
+        mock_symbol_exists = MagicMock(return_value=True)
+        monkeypatch.setattr(svc, "_symbol_exists", mock_symbol_exists)
+
+        svc.search(query="socket", kind="section", max_results=5)
+        mock_symbol_exists.assert_not_called()
+
+        svc.search(query="socket", kind="example", max_results=5)
+        mock_symbol_exists.assert_not_called()
+
+        svc.search(query="socket", kind="page", max_results=5)
+        mock_symbol_exists.assert_not_called()
+
+    def test_search_classifies_for_auto_and_symbol_kinds(
+        self, populated_with_content, monkeypatch
+    ):
+        """M-5 (Round 3) positive control: for kind in ('auto','symbol') the
+        service still invokes classify_query / _symbol_exists — the gate must
+        not regress the fast-path routing."""
+        from unittest.mock import MagicMock
+
+        svc = SearchService(populated_with_content, {})
+        mock_symbol_exists = MagicMock(return_value=False)
+        monkeypatch.setattr(svc, "_symbol_exists", mock_symbol_exists)
+
+        # 'socket' is a lowercase identifier that matches _MODULE_PATTERN and
+        # passes the length>=2 short-circuit, so classify_query WILL call
+        # symbol_exists_fn. Dotted queries take the dot branch without calling
+        # the fn, so we use a single-word identifier instead.
+        svc.search(query="socket", kind="auto", max_results=5)
+        assert mock_symbol_exists.called
+
+        mock_symbol_exists.reset_mock()
+        svc.search(query="socket", kind="symbol", max_results=5)
+        assert mock_symbol_exists.called
+
 
 # === ContentService Tests ===
 
@@ -181,13 +225,16 @@ def test_get_docs_default_version(self, populated_with_content):
         result = svc.get_docs(slug="library/asyncio-task.html")
         assert result.version == "3.13"
 
-    def test_get_docs_returns_empty_content_for_symbols_only_doc(self, populated_db):
-        """I-1: a document row with no sections returns empty content, not a raise.
+    def test_get_docs_falls_back_to_document_content_text_when_no_sections(
+        self, populated_db
+    ):
+        """I-1 (Round 3): when a document has zero sections, get_docs must fall
+        back to the documents.content_text column rather than returning empty.
 
         Scenario: symbol-only builds (or pathological ingestion) can end up with a
         documents row whose sections table has no matching rows. The service must
-        return a structured GetDocsResult with content='', char_count=0,
-        truncated=False — not raise PageNotFoundError.
+        return a structured GetDocsResult whose content is the document-level
+        content_text, not the empty string.
         """
         db = populated_db
         row = db.execute("SELECT id FROM doc_sets LIMIT 1").fetchone()
@@ -196,19 +243,20 @@ def test_get_docs_returns_empty_content_for_symbols_only_doc(self, populated_db)
             "UPDATE doc_sets SET built_at = '2026-04-16T00:00:00' WHERE id = ?",
             (doc_set_id,),
         )
-        # Seed a document with empty content and ZERO sections.
+        # Seed a document with 'hello world' content and ZERO sections.
         db.execute(
             "INSERT INTO documents (doc_set_id, uri, slug, title, content_text, char_count) "
-            "VALUES (?, 'library/empty.html', 'library/empty.html', 'Empty Page', '', 0)",
+            "VALUES (?, 'library/empty.html', 'library/empty.html', 'Empty Page', "
+            "'hello world', 11)",
             (doc_set_id,),
         )
         db.commit()
 
         svc = ContentService(db)
         result = svc.get_docs(slug="library/empty.html")
         assert isinstance(result, GetDocsResult)
-        assert result.content == ""
-        assert result.char_count == 0
+        assert result.content == "hello world"
+        assert result.char_count == 11
         assert result.truncated is False
         assert result.next_start_index is None
         assert result.anchor is None

Original file line number	Diff line number	Diff line change
`@@ -353,6 +353,9 @@ def publish_index(`
`353`	`353`	`("failed", "\n".join(messages), run_id),`
`354`	`354`	`)`
`355`	`355`	`conn.commit()`
	`356`	`+ # Round 3: finalize WAL here too so the failed build_db leaves no`
	`357`	`+ # -wal/-shm sidecars even though we're not swapping it in.`
	`358`	`+ finalize_for_swap(conn)`
`356`	`359`	`logger.error("Smoke tests failed — not publishing")`
`357`	`360`	`return False`
`358`	`361`