Skip to content

Commit 9206c09

Browse files
ayhammoudaclaude
andcommitted
fix: persist symbol-only build mode so validate-corpus respects it
publish_index now records build_mode=symbol_only in ingestion_runs.notes when require_content=False. validate-corpus reads this flag to auto-detect symbol-only indexes instead of always enforcing full-content checks. Also fixes read-only SQLite regression (PRAGMA WAL on RO connections), adds symbol-only smoke test support with require_content parameter, and consolidates connection helpers via get_readonly_connection. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4ebb60e commit 9206c09

5 files changed

Lines changed: 213 additions & 61 deletions

File tree

src/mcp_server_python_docs/__main__.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def serve() -> None:
8181
@click.option(
8282
"--skip-content",
8383
is_flag=True,
84-
help="Skip Sphinx JSON build, only ingest objects.inv symbols",
84+
help="Skip Sphinx JSON build and publish a symbol-only index (search_docs only).",
8585
)
8686
def build_index(versions: str, skip_content: bool) -> None:
8787
"""Build the documentation index from objects.inv and Sphinx JSON."""
@@ -239,7 +239,8 @@ def build_index(versions: str, skip_content: bool) -> None:
239239
)
240240
logger.warning(
241241
"Version %s has SYMBOLS ONLY (sphinx-build failed). "
242-
"search_docs will work but get_docs will return empty pages.",
242+
"search_docs will work but get_docs will fail until content "
243+
"ingestion succeeds.",
243244
version,
244245
)
245246
any_version_succeeded = True
@@ -301,7 +302,7 @@ def build_index(versions: str, skip_content: bool) -> None:
301302

302303
# Publish: smoke test + atomic swap (PUBL-01 through PUBL-05)
303304
versions_str = ",".join(version_list)
304-
success = publish_index(build_db_path, versions_str)
305+
success = publish_index(build_db_path, versions_str, require_content=not skip_content)
305306
if not success:
306307
logger.error("Publishing failed — smoke tests did not pass")
307308
raise SystemExit(1)
@@ -323,7 +324,7 @@ def validate_corpus(db_path: str | None) -> None:
323324
from pathlib import Path
324325

325326
from mcp_server_python_docs.ingestion.publish import run_smoke_tests
326-
from mcp_server_python_docs.storage.db import get_index_path
327+
from mcp_server_python_docs.storage.db import get_index_path, get_readonly_connection
327328

328329
if db_path is not None:
329330
target = Path(db_path)
@@ -337,7 +338,22 @@ def validate_corpus(db_path: str | None) -> None:
337338

338339
logger.info("Validating corpus at %s", target)
339340

340-
passed, messages = run_smoke_tests(target)
341+
# Auto-detect symbol-only builds from the last published ingestion run
342+
require_content = True
343+
try:
344+
ro_conn = get_readonly_connection(target)
345+
row = ro_conn.execute(
346+
"SELECT notes FROM ingestion_runs "
347+
"WHERE status = 'published' ORDER BY id DESC LIMIT 1"
348+
).fetchone()
349+
ro_conn.close()
350+
if row and row[0] and "build_mode=symbol_only" in row[0]:
351+
require_content = False
352+
logger.info("Detected symbol-only build — skipping content checks")
353+
except Exception:
354+
pass # If we can't read the metadata, default to full validation
355+
356+
passed, messages = run_smoke_tests(target, require_content=require_content)
341357

342358
for msg in messages:
343359
if msg.startswith("OK:"):

src/mcp_server_python_docs/ingestion/publish.py

Lines changed: 75 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,23 @@ def record_ingestion_run(
9090
return cursor.lastrowid # type: ignore[return-value]
9191

9292

93-
def run_smoke_tests(db_path: Path) -> tuple[bool, list[str]]:
93+
def run_smoke_tests(
94+
db_path: Path,
95+
*,
96+
require_content: bool = True,
97+
) -> tuple[bool, list[str]]:
9498
"""Run smoke tests against a newly built database (PUBL-03).
9599
96100
Validates that the index has sufficient data to be useful:
97101
- doc_sets table has at least 1 row
98-
- documents table has at least 10 rows
99-
- sections table has at least 50 rows
100102
- symbols table has at least 1000 rows
101-
- Spot-check: an asyncio-related document exists
102-
- FTS5 check: sections_fts is searchable
103+
- For content builds: documents/sections are populated and sections_fts is searchable
104+
- For symbol-only builds: symbols_fts is searchable
103105
104106
Args:
105107
db_path: Path to the database to test.
108+
require_content: When True, enforce document/section checks suitable for
109+
full-content builds. When False, validate a symbol-only build.
106110
107111
Returns:
108112
Tuple of (passed, messages). ``passed`` is True only if ALL
@@ -125,22 +129,6 @@ def run_smoke_tests(db_path: Path) -> tuple[bool, list[str]]:
125129
messages.append(f"FAIL: doc_sets: {count} rows (need >= 1)")
126130
passed = False
127131

128-
# Check documents
129-
count = conn.execute("SELECT COUNT(*) FROM documents").fetchone()[0]
130-
if count >= 10:
131-
messages.append(f"OK: documents: {count} rows")
132-
else:
133-
messages.append(f"FAIL: documents: {count} rows (need >= 10)")
134-
passed = False
135-
136-
# Check sections
137-
count = conn.execute("SELECT COUNT(*) FROM sections").fetchone()[0]
138-
if count >= 50:
139-
messages.append(f"OK: sections: {count} rows")
140-
else:
141-
messages.append(f"FAIL: sections: {count} rows (need >= 50)")
142-
passed = False
143-
144132
# Check symbols
145133
count = conn.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
146134
if count >= 1000:
@@ -149,31 +137,64 @@ def run_smoke_tests(db_path: Path) -> tuple[bool, list[str]]:
149137
messages.append(f"FAIL: symbols: {count} rows (need >= 1000)")
150138
passed = False
151139

152-
# Spot-check: asyncio document exists
153-
row = conn.execute(
154-
"SELECT 1 FROM documents WHERE slug LIKE '%asyncio%' LIMIT 1"
155-
).fetchone()
156-
if row:
157-
messages.append("OK: spot-check: asyncio document found")
158-
else:
159-
messages.append("FAIL: spot-check: no asyncio document found")
160-
passed = False
140+
if require_content:
141+
# Check documents
142+
count = conn.execute("SELECT COUNT(*) FROM documents").fetchone()[0]
143+
if count >= 10:
144+
messages.append(f"OK: documents: {count} rows")
145+
else:
146+
messages.append(f"FAIL: documents: {count} rows (need >= 10)")
147+
passed = False
161148

162-
# FTS5 check: sections_fts is searchable
163-
try:
149+
# Check sections
150+
count = conn.execute("SELECT COUNT(*) FROM sections").fetchone()[0]
151+
if count >= 50:
152+
messages.append(f"OK: sections: {count} rows")
153+
else:
154+
messages.append(f"FAIL: sections: {count} rows (need >= 50)")
155+
passed = False
156+
157+
# Spot-check: asyncio document exists
164158
row = conn.execute(
165-
"SELECT 1 FROM sections_fts WHERE sections_fts MATCH '\"asyncio\"' LIMIT 1"
159+
"SELECT 1 FROM documents WHERE slug LIKE '%asyncio%' LIMIT 1"
166160
).fetchone()
167161
if row:
168-
messages.append("OK: fts5: sections_fts searchable")
162+
messages.append("OK: spot-check: asyncio document found")
169163
else:
170-
messages.append(
171-
"WARN: fts5: sections_fts has no asyncio matches"
172-
" (may be OK for partial builds)"
173-
)
174-
except sqlite3.OperationalError as e:
175-
messages.append(f"FAIL: fts5: sections_fts query failed: {e}")
176-
passed = False
164+
messages.append("FAIL: spot-check: no asyncio document found")
165+
passed = False
166+
167+
# FTS5 check: sections_fts is searchable
168+
try:
169+
row = conn.execute(
170+
'SELECT 1 FROM sections_fts WHERE sections_fts MATCH \'"asyncio"\' LIMIT 1'
171+
).fetchone()
172+
if row:
173+
messages.append("OK: fts5: sections_fts searchable")
174+
else:
175+
messages.append(
176+
"WARN: fts5: sections_fts has no asyncio matches"
177+
" (may be OK for partial builds)"
178+
)
179+
except sqlite3.OperationalError as e:
180+
messages.append(f"FAIL: fts5: sections_fts query failed: {e}")
181+
passed = False
182+
else:
183+
messages.append("OK: content checks skipped for symbol-only build")
184+
try:
185+
row = conn.execute(
186+
'SELECT 1 FROM symbols_fts WHERE symbols_fts MATCH \'"asyncio"\' LIMIT 1'
187+
).fetchone()
188+
if row:
189+
messages.append("OK: fts5: symbols_fts searchable")
190+
else:
191+
messages.append(
192+
"WARN: fts5: symbols_fts has no asyncio matches"
193+
" (unexpected for stdlib builds)"
194+
)
195+
except sqlite3.OperationalError as e:
196+
messages.append(f"FAIL: fts5: symbols_fts query failed: {e}")
197+
passed = False
177198

178199
except Exception as e:
179200
messages.append(f"FAIL: Unexpected error during smoke tests: {e}")
@@ -259,7 +280,12 @@ def print_restart_message() -> None:
259280
)
260281

261282

262-
def publish_index(build_db_path: Path, version: str) -> bool:
283+
def publish_index(
284+
build_db_path: Path,
285+
version: str,
286+
*,
287+
require_content: bool = True,
288+
) -> bool:
263289
"""Orchestrate the full publish pipeline.
264290
265291
1. Compute SHA256 of the build artifact
@@ -271,6 +297,7 @@ def publish_index(build_db_path: Path, version: str) -> bool:
271297
Args:
272298
build_db_path: Path to the build artifact database.
273299
version: Version string for the ingestion run record.
300+
require_content: Whether publish validation should require content tables.
274301
275302
Returns:
276303
True if publishing succeeded, False if smoke tests failed.
@@ -282,6 +309,7 @@ def publish_index(build_db_path: Path, version: str) -> bool:
282309
# Record ingestion run
283310
from mcp_server_python_docs.storage.db import get_readwrite_connection
284311

312+
build_notes = "build_mode=symbol_only" if not require_content else None
285313
conn = get_readwrite_connection(build_db_path)
286314
try:
287315
run_id = record_ingestion_run(
@@ -290,12 +318,13 @@ def publish_index(build_db_path: Path, version: str) -> bool:
290318
version=version,
291319
status="smoke_testing",
292320
artifact_hash=artifact_hash,
321+
notes=build_notes,
293322
)
294323
finally:
295324
conn.close()
296325

297326
# Run smoke tests (PUBL-03)
298-
passed, messages = run_smoke_tests(build_db_path)
327+
passed, messages = run_smoke_tests(build_db_path, require_content=require_content)
299328
for msg in messages:
300329
logger.info("Smoke test: %s", msg)
301330

@@ -314,13 +343,13 @@ def publish_index(build_db_path: Path, version: str) -> bool:
314343
logger.error("Smoke tests failed — not publishing")
315344
return False
316345

317-
# Update run status to published
346+
# Update run status to published (preserve build_mode note)
318347
conn = get_readwrite_connection(build_db_path)
319348
try:
320349
conn.execute(
321-
"UPDATE ingestion_runs SET status = ?, "
350+
"UPDATE ingestion_runs SET status = ?, notes = ?, "
322351
"finished_at = CURRENT_TIMESTAMP WHERE id = ?",
323-
("published", run_id),
352+
("published", build_notes, run_id),
324353
)
325354
conn.commit()
326355
finally:

src/mcp_server_python_docs/server.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from mcp_server_python_docs.services.content import ContentService
2828
from mcp_server_python_docs.services.search import SearchService
2929
from mcp_server_python_docs.services.version import VersionService
30+
from mcp_server_python_docs.storage.db import get_readonly_connection
3031

3132
logger = logging.getLogger(__name__)
3233

@@ -72,11 +73,7 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
7273
logger.info("Loaded %d synonym entries", len(synonyms))
7374

7475
# Open read-only connection (STOR-06, STOR-07)
75-
db = sqlite3.connect(f"file:{index_path}?mode=ro", uri=True, check_same_thread=False)
76-
db.execute("PRAGMA journal_mode = WAL")
77-
db.execute("PRAGMA synchronous = NORMAL")
78-
db.execute("PRAGMA foreign_keys = ON")
79-
db.row_factory = sqlite3.Row
76+
db = get_readonly_connection(index_path)
8077

8178
# Check FTS5 (STOR-08)
8279
_assert_fts5(db)

src/mcp_server_python_docs/storage/db.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,31 @@ def get_index_path() -> Path:
2828
return get_cache_dir() / "index.db"
2929

3030

31-
def _set_pragmas(conn: sqlite3.Connection) -> None:
32-
"""Set required PRAGMAs on a connection (STOR-07)."""
33-
conn.execute("PRAGMA journal_mode = WAL")
31+
def _set_common_pragmas(conn: sqlite3.Connection) -> None:
32+
"""Set PRAGMAs that are safe for both read-only and read-write connections."""
3433
conn.execute("PRAGMA synchronous = NORMAL")
3534
conn.execute("PRAGMA foreign_keys = ON")
3635

3736

37+
def _set_readonly_pragmas(conn: sqlite3.Connection) -> None:
38+
"""Set PRAGMAs for read-only serving connections (STOR-07)."""
39+
_set_common_pragmas(conn)
40+
41+
42+
def _set_readwrite_pragmas(conn: sqlite3.Connection) -> None:
43+
"""Set PRAGMAs for read-write ingestion connections (STOR-07)."""
44+
conn.execute("PRAGMA journal_mode = WAL")
45+
_set_common_pragmas(conn)
46+
47+
3848
def get_readonly_connection(path: str | Path) -> sqlite3.Connection:
3949
"""Open a read-only connection for serving (STOR-06).
4050
4151
Uses SQLite URI mode with ?mode=ro to prevent accidental writes.
4252
"""
4353
path = Path(path)
4454
conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True, check_same_thread=False)
45-
_set_pragmas(conn)
55+
_set_readonly_pragmas(conn)
4656
conn.row_factory = sqlite3.Row
4757
return conn
4858

@@ -56,7 +66,7 @@ def get_readwrite_connection(path: str | Path) -> sqlite3.Connection:
5666
path = Path(path)
5767
path.parent.mkdir(parents=True, exist_ok=True)
5868
conn = sqlite3.connect(str(path))
59-
_set_pragmas(conn)
69+
_set_readwrite_pragmas(conn)
6070
conn.row_factory = sqlite3.Row
6171
return conn
6272

0 commit comments

Comments
 (0)