Skip to content

Commit 928fc18

Browse files
ayhammoudaclaude
andcommitted
fix: address PR #9 review feedback
- package_docs: catch UnicodeDecodeError on non-UTF-8 PyPI responses so they surface as the controlled empty-result path instead of leaking a tool-level exception - ranker/SymbolHit: return None for page-only symbol anchors so get_docs() retrieves the whole page; SymbolHit.anchor is now str | None - persistent_cache: serialize execute()/commit()/stats updates with threading.Lock — per the Python sqlite3 docs, check_same_thread=False alone does not make a connection safe for concurrent writes; without serialization concurrent put() raised SystemError under load Minor follow-ups picked up along the way: _empty_result helper in package_docs, drop unused PackageDocsInput model, use storage.db.get_cache_dir/get_index_path helpers from server.py, import _NO_ANCHOR_KEY constant in smoke test. Tests: UTF-8 decode path, page-only anchor=None contract, concurrent put() across 20 threads. 261/261 pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c2571ac commit 928fc18

11 files changed

Lines changed: 212 additions & 106 deletions

src/mcp_server_python_docs/models.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ class SymbolHit(BaseModel):
4848
score: float = Field(default=0.0, description="Relevance score")
4949
version: str = Field(description="Python version this hit belongs to")
5050
slug: str = Field(default="", description="Page slug for get_docs follow-up")
51-
anchor: str = Field(default="", description="Section anchor for get_docs follow-up")
51+
anchor: str | None = Field(
52+
default=None,
53+
description="Section anchor for get_docs follow-up; None for page-level hits",
54+
)
5255

5356

5457
class SearchDocsResult(BaseModel):
@@ -165,16 +168,6 @@ class DetectPythonVersionResult(BaseModel):
165168
# --- lookup_package_docs models ---
166169

167170

168-
class PackageDocsInput(BaseModel):
169-
"""Input parameters for lookup_package_docs tool."""
170-
171-
package: str = Field(
172-
min_length=1,
173-
max_length=214,
174-
description="PyPI package/project name (e.g. 'requests').",
175-
)
176-
177-
178171
class PackageDocsSource(BaseModel):
179172
"""A package-declared documentation or project source URL."""
180173

src/mcp_server_python_docs/retrieval/ranker.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@ def _document_candidates(uri: str) -> tuple[str, ...]:
3737
def _resolve_symbol_location(
3838
conn: sqlite3.Connection,
3939
row: sqlite3.Row,
40-
) -> tuple[str, str]:
41-
"""Resolve a symbol row to a get_docs-compatible slug and anchor."""
40+
) -> tuple[str, str | None]:
41+
"""Resolve a symbol row to a get_docs-compatible slug and anchor.
42+
43+
Returns ``(slug, None)`` when the hit is page-level. ``get_docs()`` treats
44+
any non-None anchor as a section lookup, so blank anchors must never leak.
45+
"""
4246
uri = str(row["uri"])
4347
fallback_slug = _page_uri(uri)
4448
fallback_anchor = str(row["anchor"] or "")
@@ -56,7 +60,7 @@ def _resolve_symbol_location(
5660
(section_id,),
5761
).fetchone()
5862
if section_row is not None:
59-
return section_row["slug"], section_row["anchor"] or ""
63+
return section_row["slug"], (section_row["anchor"] or None)
6064

6165
doc_row = None
6266
document_id = row["document_id"]
@@ -81,22 +85,22 @@ def _resolve_symbol_location(
8185
break
8286

8387
if doc_row is None:
84-
return fallback_slug, fallback_anchor
88+
return fallback_slug, (fallback_anchor or None)
8589

8690
if fallback_anchor:
8791
section_row = conn.execute(
8892
"""
89-
SELECT anchor
93+
SELECT 1
9094
FROM sections
9195
WHERE document_id = ? AND anchor = ?
9296
LIMIT 1
9397
""",
9498
(doc_row["id"], fallback_anchor),
9599
).fetchone()
96100
if section_row is not None:
97-
return doc_row["slug"], section_row["anchor"] or ""
101+
return doc_row["slug"], fallback_anchor
98102

99-
return doc_row["slug"], ""
103+
return doc_row["slug"], None
100104

101105

102106
def _normalize_scores(hits: list[SymbolHit]) -> list[SymbolHit]:
@@ -182,7 +186,7 @@ def search_sections(
182186
score=row["score"],
183187
version=row["version"],
184188
slug=row["slug"],
185-
anchor=row["anchor"],
189+
anchor=row["anchor"] or None,
186190
)
187191
for row in rows
188192
]
@@ -300,7 +304,7 @@ def search_examples(
300304
score=row["score"],
301305
version=row["version"],
302306
slug=row["slug"],
303-
anchor=row["anchor"] or "",
307+
anchor=row["anchor"] or None,
304308
)
305309
for row in rows
306310
]

src/mcp_server_python_docs/server.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,8 @@
1414
import traceback
1515
from collections.abc import AsyncIterator
1616
from contextlib import asynccontextmanager
17-
from pathlib import Path
1817
from typing import Annotated, Literal
1918

20-
import platformdirs
2119
import yaml
2220
from mcp.server.fastmcp import Context, FastMCP
2321
from mcp.server.fastmcp.exceptions import ToolError
@@ -39,7 +37,11 @@
3937
from mcp_server_python_docs.services.persistent_cache import PersistentDocsCache
4038
from mcp_server_python_docs.services.search import SearchService
4139
from mcp_server_python_docs.services.version import VersionService
42-
from mcp_server_python_docs.storage.db import get_readonly_connection
40+
from mcp_server_python_docs.storage.db import (
41+
get_cache_dir,
42+
get_index_path,
43+
get_readonly_connection,
44+
)
4345

4446
logger = logging.getLogger(__name__)
4547

@@ -67,8 +69,8 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
6769
constructs service instances, and fails fast on missing index or
6870
unavailable FTS5.
6971
"""
70-
cache_dir = Path(platformdirs.user_cache_dir("mcp-python-docs"))
71-
index_path = cache_dir / "index.db"
72+
cache_dir = get_cache_dir()
73+
index_path = get_index_path()
7274

7375
# Fail fast on missing index (SRVR-10)
7476
if not index_path.exists():

src/mcp_server_python_docs/services/package_docs.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ def _read_limited(response: _HTTPResponse) -> bytes | None:
7171
return data
7272

7373

74+
def _empty_result(package: str, metadata_source: str, note: str) -> PackageDocsResult:
75+
return PackageDocsResult(
76+
package=package,
77+
version="",
78+
metadata_source=metadata_source,
79+
sources=[],
80+
note=note,
81+
)
82+
83+
7484
class PackageDocsService:
7585
"""Return package-declared docs/homepage/source URLs from PyPI metadata only."""
7686

@@ -86,32 +96,20 @@ def lookup(self, package: str) -> PackageDocsResult:
8696
with self._fetcher(metadata_source, self._timeout) as response:
8797
data = _read_limited(response)
8898
if data is None:
89-
return PackageDocsResult(
90-
package=package,
91-
version="",
92-
metadata_source=metadata_source,
93-
sources=[],
94-
note="PyPI metadata exceeded size limit.",
99+
return _empty_result(
100+
package, metadata_source, "PyPI metadata exceeded size limit."
95101
)
96102
payload = json.loads(data.decode("utf-8"))
97103
except HTTPError as e:
98104
note = (
99105
"Package not found on PyPI." if e.code == 404 else f"PyPI returned HTTP {e.code}."
100106
)
101-
return PackageDocsResult(
102-
package=package,
103-
version="",
104-
metadata_source=metadata_source,
105-
sources=[],
106-
note=note,
107-
)
108-
except (URLError, TimeoutError, json.JSONDecodeError) as e:
109-
return PackageDocsResult(
110-
package=package,
111-
version="",
112-
metadata_source=metadata_source,
113-
sources=[],
114-
note=f"Unable to retrieve PyPI metadata: {type(e).__name__}.",
107+
return _empty_result(package, metadata_source, note)
108+
except (URLError, TimeoutError, json.JSONDecodeError, UnicodeDecodeError) as e:
109+
return _empty_result(
110+
package,
111+
metadata_source,
112+
f"Unable to retrieve PyPI metadata: {type(e).__name__}.",
115113
)
116114

117115
info = payload.get("info") if isinstance(payload, dict) else {}

src/mcp_server_python_docs/services/persistent_cache.py

Lines changed: 68 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import logging
66
import sqlite3
7+
import threading
78
from pathlib import Path
89
from typing import NamedTuple
910

@@ -28,10 +29,16 @@ def __init__(self, cache_path: Path, index_path: Path) -> None:
2829
self._cache_path = Path(cache_path)
2930
self._fingerprint = self._fingerprint_index(Path(index_path))
3031
self._hits = self._misses = self._writes = 0
32+
# ``check_same_thread=False`` lets multiple threads share the connection,
33+
# but per the Python sqlite3 docs writes must still be serialized by the
34+
# application — this lock guards every execute()/commit() and the stats
35+
# counters they update.
36+
self._lock = threading.Lock()
3137
self._conn: sqlite3.Connection | None = None
3238
try:
3339
self._cache_path.parent.mkdir(parents=True, exist_ok=True)
3440
self._conn = sqlite3.connect(str(self._cache_path), check_same_thread=False)
41+
self._conn.execute("PRAGMA journal_mode = WAL")
3542
self._conn.execute("PRAGMA synchronous = NORMAL")
3643
self._conn.execute(
3744
"CREATE TABLE IF NOT EXISTS retrieved_docs_cache ("
@@ -40,6 +47,10 @@ def __init__(self, cache_path: Path, index_path: Path) -> None:
4047
"result_json TEXT NOT NULL, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, "
4148
"PRIMARY KEY (index_fingerprint, version, slug, anchor, max_chars, start_index))"
4249
)
50+
self._conn.execute(
51+
"DELETE FROM retrieved_docs_cache WHERE index_fingerprint != ?",
52+
(self._fingerprint,),
53+
)
4354
self._conn.commit()
4455
except (OSError, sqlite3.Error) as e:
4556
if self._conn is not None:
@@ -67,62 +78,67 @@ def get(
6778
self, *, version: str, slug: str, anchor: str | None, max_chars: int, start_index: int
6879
) -> GetDocsResult | None:
6980
if self._conn is None:
70-
self._misses += 1
71-
return None
72-
try:
73-
row = self._conn.execute(
74-
"SELECT result_json FROM retrieved_docs_cache WHERE index_fingerprint = ? "
75-
"AND version = ? AND slug = ? AND anchor = ? AND max_chars = ? AND start_index = ?",
76-
(
77-
self._fingerprint,
78-
version,
79-
slug,
80-
self._anchor_key(anchor),
81-
max_chars,
82-
start_index,
83-
),
84-
).fetchone()
85-
except sqlite3.Error as e:
86-
self._misses += 1
87-
logger.warning("Persistent docs cache read skipped: %s", e)
81+
with self._lock:
82+
self._misses += 1
8883
return None
89-
if row is None:
90-
self._misses += 1
91-
return None
92-
try:
93-
result = GetDocsResult.model_validate_json(row[0])
94-
except (ValidationError, ValueError) as e:
95-
self._misses += 1
96-
logger.warning("Persistent docs cache entry ignored: %s", e)
97-
return None
98-
self._hits += 1
99-
return result
84+
with self._lock:
85+
try:
86+
row = self._conn.execute(
87+
"SELECT result_json FROM retrieved_docs_cache WHERE index_fingerprint = ? "
88+
"AND version = ? AND slug = ? AND anchor = ? AND max_chars = ? "
89+
"AND start_index = ?",
90+
(
91+
self._fingerprint,
92+
version,
93+
slug,
94+
self._anchor_key(anchor),
95+
max_chars,
96+
start_index,
97+
),
98+
).fetchone()
99+
except sqlite3.Error as e:
100+
self._misses += 1
101+
logger.warning("Persistent docs cache read skipped: %s", e)
102+
return None
103+
if row is None:
104+
self._misses += 1
105+
return None
106+
try:
107+
result = GetDocsResult.model_validate_json(row[0])
108+
except (ValidationError, ValueError) as e:
109+
self._misses += 1
110+
logger.warning("Persistent docs cache entry ignored: %s", e)
111+
return None
112+
self._hits += 1
113+
return result
100114

101115
def put(self, *, result: GetDocsResult, max_chars: int, start_index: int) -> None:
102116
if self._conn is None:
103117
return
104-
try:
105-
self._conn.execute(
106-
"INSERT OR REPLACE INTO retrieved_docs_cache "
107-
"(index_fingerprint, version, slug, anchor, max_chars, start_index, result_json) "
108-
"VALUES (?, ?, ?, ?, ?, ?, ?)",
109-
(
110-
self._fingerprint,
111-
result.version,
112-
result.slug,
113-
self._anchor_key(result.anchor),
114-
max_chars,
115-
start_index,
116-
result.model_dump_json(),
117-
),
118-
)
119-
self._conn.commit()
120-
except (sqlite3.Error, ValueError) as e:
121-
logger.warning("Persistent docs cache write skipped: %s", e)
122-
return
123-
self._writes += 1
118+
with self._lock:
119+
try:
120+
self._conn.execute(
121+
"INSERT OR REPLACE INTO retrieved_docs_cache "
122+
"(index_fingerprint, version, slug, anchor, max_chars, start_index, "
123+
"result_json) VALUES (?, ?, ?, ?, ?, ?, ?)",
124+
(
125+
self._fingerprint,
126+
result.version,
127+
result.slug,
128+
self._anchor_key(result.anchor),
129+
max_chars,
130+
start_index,
131+
result.model_dump_json(),
132+
),
133+
)
134+
self._conn.commit()
135+
except (sqlite3.Error, ValueError) as e:
136+
logger.warning("Persistent docs cache write skipped: %s", e)
137+
return
138+
self._writes += 1
124139

125140
def close(self) -> None:
126-
if self._conn is not None:
127-
self._conn.close()
128-
self._conn = None
141+
with self._lock:
142+
if self._conn is not None:
143+
self._conn.close()
144+
self._conn = None

tests/fixtures/schema-search_docs-output.json

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,17 @@
4242
"type": "string"
4343
},
4444
"anchor": {
45-
"default": "",
46-
"description": "Section anchor for get_docs follow-up",
47-
"title": "Anchor",
48-
"type": "string"
45+
"anyOf": [
46+
{
47+
"type": "string"
48+
},
49+
{
50+
"type": "null"
51+
}
52+
],
53+
"default": null,
54+
"description": "Section anchor for get_docs follow-up; None for page-level hits",
55+
"title": "Anchor"
4956
}
5057
},
5158
"required": [

tests/test_mcp_get_docs_cache_smoke.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
from pathlib import Path
88

9+
from mcp_server_python_docs.services.persistent_cache import _NO_ANCHOR_KEY
910
from mcp_server_python_docs.storage.db import bootstrap_schema, get_readwrite_connection
1011
from tests.test_stdio_smoke import (
1112
_assert_protocol_on_stdout_only,
@@ -150,7 +151,7 @@ def test_get_docs_cache_restart_and_corrupt_cache_fallback(tmp_path: Path):
150151
assert (version, slug, anchor, max_chars, start_index) == (
151152
"3.13",
152153
"library/json.html",
153-
"\x00mcp-python-docs:no-anchor\x00",
154+
_NO_ANCHOR_KEY,
154155
8000,
155156
0,
156157
)

tests/test_package_docs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,10 @@ def invalid_json(url: str, timeout: float):
128128
json_result = PackageDocsService(fetcher=invalid_json).lookup("demo")
129129
assert json_result.sources == []
130130
assert json_result.note == "Unable to retrieve PyPI metadata: JSONDecodeError."
131+
132+
def invalid_utf8(url: str, timeout: float):
133+
return _Resp(b"\xff\xfe\xfd")
134+
135+
utf8_result = PackageDocsService(fetcher=invalid_utf8).lookup("demo")
136+
assert utf8_result.sources == []
137+
assert utf8_result.note == "Unable to retrieve PyPI metadata: UnicodeDecodeError."

0 commit comments

Comments
 (0)