Skip to content

Commit e2a8754

Browse files
ayhammoudaclaude
andcommitted
fix: apply benchmark-informed deep code review findings (2 critical, 6 warning, 5 info)
Benchmarked against top Python MCP server projects (PrefectHQ/fastmcp, awslabs/mcp, Anthropic reference servers) and official MCP specification. Critical: - CR-01: Fix synonym substring matching false positives (word-boundary regex) - CR-02: Catch all exceptions in tool handlers as ToolError (MCP spec compliance) Warning: - WR-01: Add max_length=500 to query/slug string inputs - WR-02: Move version validation before DB writes in ingest_inventory - WR-03: Add check_same_thread=False for concurrent async dispatch safety - WR-05: Use inspect.signature for robust version extraction in observability - WR-06: Clearer logging for partial (symbols-only) ingestion Info: - IN-01: Replace f-string logging with lazy %-style formatting - IN-02: Add idempotentHint=True to ToolAnnotations - IN-03: Add py.typed PEP 561 marker file - IN-04: Fix inaccurate services/__init__.py docstring - IN-05: Make AppContext service fields non-optional (enforces runtime invariant) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6a7439d commit e2a8754

12 files changed

Lines changed: 79 additions & 40 deletions

File tree

src/mcp_server_python_docs/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ def build_index(versions: str, skip_content: bool) -> None:
238238
result.stderr[-2000:] if result.stderr else "(no output)",
239239
)
240240
logger.warning(
241-
"Symbols were ingested for %s but sections/examples "
242-
"will be missing. Smoke tests may fail.",
241+
"Version %s has SYMBOLS ONLY (sphinx-build failed). "
242+
"search_docs will work but get_docs will return empty pages.",
243243
version,
244244
)
245245
any_version_succeeded = True

src/mcp_server_python_docs/app_context.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99
import sqlite3
1010
from dataclasses import dataclass, field
1111
from pathlib import Path
12-
from typing import TYPE_CHECKING
1312

14-
if TYPE_CHECKING:
15-
from mcp_server_python_docs.services.content import ContentService
16-
from mcp_server_python_docs.services.search import SearchService
17-
from mcp_server_python_docs.services.version import VersionService
13+
from mcp_server_python_docs.services.content import ContentService
14+
from mcp_server_python_docs.services.search import SearchService
15+
from mcp_server_python_docs.services.version import VersionService
1816

1917

2018
@dataclass
@@ -23,7 +21,7 @@ class AppContext:
2321

2422
db: sqlite3.Connection
2523
index_path: Path
24+
search_service: SearchService
25+
content_service: ContentService
26+
version_service: VersionService
2627
synonyms: dict[str, list[str]] = field(default_factory=dict)
27-
search_service: SearchService | None = None
28-
content_service: ContentService | None = None
29-
version_service: VersionService | None = None

src/mcp_server_python_docs/ingestion/inventory.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ def ingest_inventory(
8686
5. Handle duplicates via priority ordering (INGR-I-05)
8787
6. Populate symbols_fts in same transaction (INGR-I-06)
8888
"""
89+
# Validate version format first, before any DB writes (WR-02)
90+
import re
91+
92+
if not re.match(r"^\d+\.\d+$", version):
93+
from mcp_server_python_docs.errors import IngestionError
94+
95+
raise IngestionError(f"Invalid version format: {version!r}")
96+
8997
bootstrap_schema(conn)
9098

9199
# Upsert doc_set for this version
@@ -111,19 +119,11 @@ def ingest_inventory(
111119
# Clear existing symbols for this doc_set (re-ingestion support)
112120
conn.execute("DELETE FROM symbols WHERE doc_set_id = ?", (doc_set_id,))
113121

114-
# Validate version format before URL construction (WR-04)
115-
import re
116-
117-
if not re.match(r"^\d+\.\d+$", version):
118-
from mcp_server_python_docs.errors import IngestionError
119-
120-
raise IngestionError(f"Invalid version format: {version!r}")
121-
122122
# Download and parse objects.inv (INGR-I-01)
123123
url = f"https://docs.python.org/{version}/objects.inv"
124-
logger.info(f"Downloading {url}...")
124+
logger.info("Downloading %s...", url)
125125
inv = soi.Inventory(url=url) # type: ignore[call-arg] # sphobjinv lacks type stubs
126-
logger.info(f"Downloaded {len(inv.objects)} inventory objects")
126+
logger.info("Downloaded %d inventory objects", len(inv.objects))
127127

128128
# Filter to Python domain objects and collect by qualified_name
129129
# for duplicate resolution (INGR-I-05)
@@ -174,5 +174,5 @@ def ingest_inventory(
174174
conn.execute("INSERT INTO symbols_fts(symbols_fts) VALUES('rebuild')")
175175
conn.commit()
176176

177-
logger.info(f"Ingested {count} symbols for Python {version}")
177+
logger.info("Ingested %d symbols for Python %s", count, version)
178178
return count

src/mcp_server_python_docs/models.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ class SearchDocsInput(BaseModel):
1616
"""Input parameters for search_docs tool."""
1717

1818
query: str = Field(
19-
description="Search query - Python symbol (asyncio.TaskGroup) or concept (parse json)"
19+
max_length=500,
20+
description="Search query - Python symbol (asyncio.TaskGroup) or concept (parse json)",
2021
)
2122
version: str | None = Field(
2223
default=None,
@@ -69,7 +70,10 @@ class SearchDocsResult(BaseModel):
6970
class GetDocsInput(BaseModel):
7071
"""Input parameters for get_docs tool."""
7172

72-
slug: str = Field(description="Page slug (e.g. 'library/asyncio-task.html')")
73+
slug: str = Field(
74+
max_length=500,
75+
description="Page slug (e.g. 'library/asyncio-task.html')",
76+
)
7377
version: str | None = Field(
7478
default=None,
7579
description="Python version. Defaults to latest.",

src/mcp_server_python_docs/py.typed

Whitespace-only changes.

src/mcp_server_python_docs/retrieval/query.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,20 +86,35 @@ def classify_query(
8686
return "fts"
8787

8888

89+
def _build_concept_patterns(
90+
synonyms: dict[str, list[str]],
91+
) -> dict[str, re.Pattern[str]]:
92+
"""Pre-compile word-boundary regex patterns for multi-word synonym concepts."""
93+
patterns: dict[str, re.Pattern[str]] = {}
94+
for concept in synonyms:
95+
if " " in concept:
96+
patterns[concept] = re.compile(r"\b" + re.escape(concept) + r"\b")
97+
return patterns
98+
99+
89100
def expand_synonyms(
90101
query: str,
91102
synonyms: dict[str, list[str]],
103+
*,
104+
_concept_patterns: dict[str, re.Pattern[str]] | None = None,
92105
) -> set[str]:
93106
"""Expand query using synonym table for concept search (RETR-05).
94107
95-
Checks each token and the full query against the synonym table.
96-
Returns a set of terms including the original tokens plus any
97-
expansion values.
108+
Checks each token and multi-word phrases against the synonym table.
109+
Single-word concepts use exact token matching. Multi-word concepts
110+
use word-boundary regex to avoid substring false positives (CR-01).
98111
99112
Args:
100113
query: User search query.
101114
synonyms: Mapping of concept -> list of expansion terms.
102115
Loaded from synonyms.yaml at startup.
116+
_concept_patterns: Pre-compiled regex patterns for multi-word
117+
concepts. Built lazily on first call if not provided.
103118
104119
Returns:
105120
Set of terms (original + expansions). Empty set if query
@@ -112,15 +127,20 @@ def expand_synonyms(
112127
tokens = query.lower().split()
113128
expanded = set(tokens)
114129

115-
# Check individual tokens against synonym keys
130+
# Single-word concepts: exact token match (no substring false positives)
116131
for token in tokens:
117132
if token in synonyms:
118133
expanded.update(synonyms[token])
119134

120-
# Check multi-word concepts (e.g., "http requests", "file io")
135+
# Multi-word concepts: word-boundary regex match (CR-01 fix)
136+
if _concept_patterns is None:
137+
_concept_patterns = _build_concept_patterns(synonyms)
121138
query_lower = query.lower()
122139
for concept, expansions in synonyms.items():
123-
if concept in query_lower:
140+
if " " not in concept:
141+
continue
142+
pattern = _concept_patterns.get(concept)
143+
if pattern and pattern.search(query_lower):
124144
expanded.update(expansions)
125145

126146
return expanded
@@ -129,6 +149,8 @@ def expand_synonyms(
129149
def build_match_expression(
130150
query: str,
131151
synonyms: dict[str, list[str]],
152+
*,
153+
_concept_patterns: dict[str, re.Pattern[str]] | None = None,
132154
) -> str:
133155
"""Build a complete FTS5 MATCH expression with synonym expansion.
134156
@@ -138,6 +160,7 @@ def build_match_expression(
138160
Args:
139161
query: User search query.
140162
synonyms: Synonym mapping for expansion.
163+
_concept_patterns: Pre-compiled regex patterns for multi-word concepts.
141164
142165
Returns:
143166
FTS5-safe MATCH expression string.
@@ -146,7 +169,7 @@ def build_match_expression(
146169
if not query:
147170
return '""'
148171

149-
expanded = expand_synonyms(query, synonyms)
172+
expanded = expand_synonyms(query, synonyms, _concept_patterns=_concept_patterns)
150173
original_tokens = set(query.lower().split())
151174

152175
# If expansion added new terms, OR-join all terms

src/mcp_server_python_docs/server.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
7272
logger.info("Loaded %d synonym entries", len(synonyms))
7373

7474
# Open read-only connection (STOR-06, STOR-07)
75-
db = sqlite3.connect(f"file:{index_path}?mode=ro", uri=True)
75+
db = sqlite3.connect(f"file:{index_path}?mode=ro", uri=True, check_same_thread=False)
7676
db.execute("PRAGMA journal_mode = WAL")
7777
db.execute("PRAGMA synchronous = NORMAL")
7878
db.execute("PRAGMA foreign_keys = ON")
@@ -113,6 +113,7 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
113113
_TOOL_ANNOTATIONS = ToolAnnotations(
114114
readOnlyHint=True,
115115
destructiveHint=False,
116+
idempotentHint=True,
116117
openWorldHint=False,
117118
)
118119

@@ -141,6 +142,9 @@ def search_docs(
141142
return app_ctx.search_service.search(query, version, kind, max_results)
142143
except DocsServerError as e:
143144
raise ToolError(str(e))
145+
except Exception as e:
146+
logger.exception("Unexpected error in search_docs")
147+
raise ToolError(f"Internal error: {type(e).__name__}")
144148

145149
@mcp.tool(annotations=_TOOL_ANNOTATIONS)
146150
def get_docs(
@@ -160,6 +164,9 @@ def get_docs(
160164
)
161165
except DocsServerError as e:
162166
raise ToolError(str(e))
167+
except Exception as e:
168+
logger.exception("Unexpected error in get_docs")
169+
raise ToolError(f"Internal error: {type(e).__name__}")
163170

164171
@mcp.tool(annotations=_TOOL_ANNOTATIONS)
165172
def list_versions(
@@ -171,6 +178,9 @@ def list_versions(
171178
return app_ctx.version_service.list_versions()
172179
except DocsServerError as e:
173180
raise ToolError(str(e))
181+
except Exception as e:
182+
logger.exception("Unexpected error in list_versions")
183+
raise ToolError(f"Internal error: {type(e).__name__}")
174184

175185
# SRVR-07: _meta hint for get_docs tool.
176186
# FastMCP 1.27 does not expose a public API for setting _meta on tool

src/mcp_server_python_docs/services/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
33
Services sit between FastMCP tool handlers and the retrieval/storage layers.
44
Dependency rule: server -> services -> retrieval/storage.
5-
No service touches SQL directly (except through storage/retrieval functions).
5+
Services receive sqlite3.Connection via constructor and execute queries directly.
66
No service imports MCP types.
77
"""

src/mcp_server_python_docs/services/observability.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from __future__ import annotations
1111

1212
import functools
13+
import inspect
1314
import sys
1415
import time
1516
from collections.abc import Callable
@@ -66,13 +67,14 @@ def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
6667
"latency_ms": round(elapsed_ms, 1),
6768
}
6869

69-
# Extract version from kwargs or positional args
70-
version_val = kwargs.get("version")
71-
if version_val is None and args:
72-
# For search: (query, version, kind, max_results)
73-
# For get_docs: (slug, version, anchor, ...)
74-
if len(args) >= 2:
75-
version_val = args[1]
70+
# Extract version by name via inspect (WR-05: avoids fragile positional indexing)
71+
try:
72+
sig = inspect.signature(fn)
73+
bound = sig.bind(self, *args, **kwargs)
74+
bound.apply_defaults()
75+
version_val = bound.arguments.get("version")
76+
except (TypeError, ValueError):
77+
version_val = kwargs.get("version")
7678
fields["version"] = version_val or "default"
7779

7880
# Extract result-specific fields

src/mcp_server_python_docs/storage/db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def get_readonly_connection(path: str | Path) -> sqlite3.Connection:
4141
Uses SQLite URI mode with ?mode=ro to prevent accidental writes.
4242
"""
4343
path = Path(path)
44-
conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
44+
conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True, check_same_thread=False)
4545
_set_pragmas(conn)
4646
conn.row_factory = sqlite3.Row
4747
return conn

0 commit comments

Comments
 (0)