Skip to content

Commit 5d8e8bc

Browse files
fix: address PR 9 review cache and PyPI errors
1 parent 15b2bc5 commit 5d8e8bc

6 files changed

Lines changed: 234 additions & 52 deletions

File tree

src/mcp_server_python_docs/server.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
Thin server layer — delegates all tool logic to services.
44
Dependency rule: server -> services -> retrieval/storage.
55
"""
6+
67
from __future__ import annotations
78

9+
import asyncio
810
import importlib.resources
911
import logging
1012
import sqlite3
@@ -256,17 +258,15 @@ def get_docs(
256258
if version is None and app_ctx.detected_python_version:
257259
version = app_ctx.detected_python_version
258260
try:
259-
return app_ctx.content_service.get_docs(
260-
slug, version, anchor, max_chars, start_index
261-
)
261+
return app_ctx.content_service.get_docs(slug, version, anchor, max_chars, start_index)
262262
except DocsServerError as e:
263263
raise ToolError(str(e))
264264
except Exception as e:
265265
logger.exception("Unexpected error in get_docs")
266266
raise ToolError(f"Internal error: {type(e).__name__}")
267267

268268
@mcp.tool(annotations=_PYPI_TOOL_ANNOTATIONS)
269-
def lookup_package_docs(
269+
async def lookup_package_docs(
270270
package: PackageParam,
271271
ctx: Context = None, # type: ignore[assignment]
272272
) -> PackageDocsResult:
@@ -277,7 +277,7 @@ def lookup_package_docs(
277277
"""
278278
app_ctx: AppContext = ctx.request_context.lifespan_context
279279
try:
280-
return app_ctx.package_docs_service.lookup(package)
280+
return await asyncio.to_thread(app_ctx.package_docs_service.lookup, package)
281281
except Exception as e:
282282
logger.exception("Unexpected error in lookup_package_docs")
283283
raise ToolError(f"Internal error: {type(e).__name__}")

src/mcp_server_python_docs/services/package_docs.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
Source: https://docs.pypi.org/api/json/ documents GET /pypi/<project>/json.
44
"""
5+
56
from __future__ import annotations
67

78
import json
@@ -16,8 +17,14 @@
1617
from mcp_server_python_docs.services.observability import log_tool_call
1718

1819
_ALLOWED = {
19-
"documentation", "docs", "homepage", "home page", "source", "source code",
20-
"repository", "repo", "bug tracker", "issues", "changelog", "release notes",
20+
"documentation",
21+
"docs",
22+
"homepage",
23+
"home page",
24+
"source",
25+
"source code",
26+
"repository",
27+
"repo",
2128
}
2229
_BLOCKED = ("mirror", "community", "unofficial", "tutorial", "example")
2330
_PYPI_METADATA_MAX_BYTES = 5 * 1024 * 1024
@@ -89,30 +96,39 @@ def lookup(self, package: str) -> PackageDocsResult:
8996
)
9097
payload = json.loads(data.decode("utf-8"))
9198
except HTTPError as e:
92-
if e.code == 404:
93-
return PackageDocsResult(
94-
package=package, version="", metadata_source=metadata_source,
95-
sources=[], note="Package not found on PyPI.",
96-
)
97-
raise
99+
note = (
100+
"Package not found on PyPI." if e.code == 404 else f"PyPI returned HTTP {e.code}."
101+
)
102+
return PackageDocsResult(
103+
package=package,
104+
version="",
105+
metadata_source=metadata_source,
106+
sources=[],
107+
note=note,
108+
)
98109
except (URLError, TimeoutError, json.JSONDecodeError) as e:
99110
return PackageDocsResult(
100-
package=package, version="", metadata_source=metadata_source,
101-
sources=[], note=f"Unable to retrieve PyPI metadata: {type(e).__name__}.",
111+
package=package,
112+
version="",
113+
metadata_source=metadata_source,
114+
sources=[],
115+
note=f"Unable to retrieve PyPI metadata: {type(e).__name__}.",
102116
)
103117

104118
info = payload.get("info") if isinstance(payload, dict) else {}
105119
info = info if isinstance(info, dict) else {}
106120
sources = [
107-
s for s in (
121+
s
122+
for s in (
108123
_source(
109124
"PyPI project",
110125
info.get("project_url") or f"https://pypi.org/project/{project}/",
111126
"pypi",
112127
),
113128
_source("Documentation", info.get("docs_url"), "docs"),
114129
_source("Homepage", info.get("home_page"), "homepage"),
115-
) if s is not None
130+
)
131+
if s is not None
116132
]
117133
skipped: list[str] = []
118134
project_urls = info.get("project_urls")
Lines changed: 80 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
"""SQLite-backed cache for completed get_docs results across MCP restarts."""
2+
23
from __future__ import annotations
34

5+
import logging
46
import sqlite3
57
from pathlib import Path
68
from typing import NamedTuple
79

10+
from pydantic import ValidationError
11+
812
from mcp_server_python_docs.models import GetDocsResult
913

14+
logger = logging.getLogger(__name__)
15+
_NO_ANCHOR_KEY = "\x00mcp-python-docs:no-anchor\x00"
16+
1017

1118
class CacheStats(NamedTuple):
1219
hits: int = 0
@@ -21,17 +28,24 @@ def __init__(self, cache_path: Path, index_path: Path) -> None:
2128
self._cache_path = Path(cache_path)
2229
self._fingerprint = self._fingerprint_index(Path(index_path))
2330
self._hits = self._misses = self._writes = 0
24-
self._cache_path.parent.mkdir(parents=True, exist_ok=True)
25-
self._conn = sqlite3.connect(str(self._cache_path), check_same_thread=False)
26-
self._conn.execute("PRAGMA synchronous = NORMAL")
27-
self._conn.execute(
28-
"CREATE TABLE IF NOT EXISTS retrieved_docs_cache ("
29-
"index_fingerprint TEXT NOT NULL, version TEXT NOT NULL, slug TEXT NOT NULL, "
30-
"anchor TEXT NOT NULL, max_chars INTEGER NOT NULL, start_index INTEGER NOT NULL, "
31-
"result_json TEXT NOT NULL, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, "
32-
"PRIMARY KEY (index_fingerprint, version, slug, anchor, max_chars, start_index))"
33-
)
34-
self._conn.commit()
31+
self._conn: sqlite3.Connection | None = None
32+
try:
33+
self._cache_path.parent.mkdir(parents=True, exist_ok=True)
34+
self._conn = sqlite3.connect(str(self._cache_path), check_same_thread=False)
35+
self._conn.execute("PRAGMA synchronous = NORMAL")
36+
self._conn.execute(
37+
"CREATE TABLE IF NOT EXISTS retrieved_docs_cache ("
38+
"index_fingerprint TEXT NOT NULL, version TEXT NOT NULL, slug TEXT NOT NULL, "
39+
"anchor TEXT NOT NULL, max_chars INTEGER NOT NULL, start_index INTEGER NOT NULL, "
40+
"result_json TEXT NOT NULL, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, "
41+
"PRIMARY KEY (index_fingerprint, version, slug, anchor, max_chars, start_index))"
42+
)
43+
self._conn.commit()
44+
except (OSError, sqlite3.Error) as e:
45+
if self._conn is not None:
46+
self._conn.close()
47+
self._conn = None
48+
logger.warning("Persistent docs cache disabled: %s", e)
3549

3650
@property
3751
def cache_path(self) -> Path:
@@ -42,40 +56,73 @@ def _fingerprint_index(index_path: Path) -> str:
4256
stat = index_path.stat()
4357
return f"{index_path.resolve()}:{stat.st_size}:{stat.st_mtime_ns}"
4458

59+
@staticmethod
60+
def _anchor_key(anchor: str | None) -> str:
61+
return _NO_ANCHOR_KEY if anchor is None else anchor
62+
4563
def stats(self) -> CacheStats:
4664
return CacheStats(self._hits, self._misses, self._writes)
4765

4866
def get(
4967
self, *, version: str, slug: str, anchor: str | None, max_chars: int, start_index: int
5068
) -> GetDocsResult | None:
51-
row = self._conn.execute(
52-
"SELECT result_json FROM retrieved_docs_cache WHERE index_fingerprint = ? "
53-
"AND version = ? AND slug = ? AND anchor = ? AND max_chars = ? AND start_index = ?",
54-
(self._fingerprint, version, slug, anchor or "", max_chars, start_index),
55-
).fetchone()
69+
if self._conn is None:
70+
self._misses += 1
71+
return None
72+
try:
73+
row = self._conn.execute(
74+
"SELECT result_json FROM retrieved_docs_cache WHERE index_fingerprint = ? "
75+
"AND version = ? AND slug = ? AND anchor = ? AND max_chars = ? AND start_index = ?",
76+
(
77+
self._fingerprint,
78+
version,
79+
slug,
80+
self._anchor_key(anchor),
81+
max_chars,
82+
start_index,
83+
),
84+
).fetchone()
85+
except sqlite3.Error as e:
86+
self._misses += 1
87+
logger.warning("Persistent docs cache read skipped: %s", e)
88+
return None
5689
if row is None:
5790
self._misses += 1
5891
return None
92+
try:
93+
result = GetDocsResult.model_validate_json(row[0])
94+
except (ValidationError, ValueError) as e:
95+
self._misses += 1
96+
logger.warning("Persistent docs cache entry ignored: %s", e)
97+
return None
5998
self._hits += 1
60-
return GetDocsResult.model_validate_json(row[0])
99+
return result
61100

62101
def put(self, *, result: GetDocsResult, max_chars: int, start_index: int) -> None:
63-
self._conn.execute(
64-
"INSERT OR REPLACE INTO retrieved_docs_cache "
65-
"(index_fingerprint, version, slug, anchor, max_chars, start_index, result_json) "
66-
"VALUES (?, ?, ?, ?, ?, ?, ?)",
67-
(
68-
self._fingerprint,
69-
result.version,
70-
result.slug,
71-
result.anchor or "",
72-
max_chars,
73-
start_index,
74-
result.model_dump_json(),
75-
),
76-
)
77-
self._conn.commit()
102+
if self._conn is None:
103+
return
104+
try:
105+
self._conn.execute(
106+
"INSERT OR REPLACE INTO retrieved_docs_cache "
107+
"(index_fingerprint, version, slug, anchor, max_chars, start_index, result_json) "
108+
"VALUES (?, ?, ?, ?, ?, ?, ?)",
109+
(
110+
self._fingerprint,
111+
result.version,
112+
result.slug,
113+
self._anchor_key(result.anchor),
114+
max_chars,
115+
start_index,
116+
result.model_dump_json(),
117+
),
118+
)
119+
self._conn.commit()
120+
except (sqlite3.Error, ValueError) as e:
121+
logger.warning("Persistent docs cache write skipped: %s", e)
122+
return
78123
self._writes += 1
79124

80125
def close(self) -> None:
81-
self._conn.close()
126+
if self._conn is not None:
127+
self._conn.close()
128+
self._conn = None

tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ def populated_db(test_db):
150150
"os.path -- Common pathname manipulations",
151151
[
152152
("os.path.join", "os.path.join", 2, 1,
153-
"Join one or more path components intelligently. If a component is an absolute path, "
154-
"all previous components are thrown away."),
153+
"Join one or more path components intelligently. If a component is an "
154+
"absolute path, all previous components are thrown away."),
155155
],
156156
),
157157
]

tests/test_package_docs.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,17 @@ def missing(url: str, timeout: float):
7676
assert tool.annotations.openWorldHint is True
7777

7878

79+
def test_package_docs_reports_non_404_pypi_http_errors():
80+
def rate_limited(url: str, timeout: float):
81+
raise HTTPError(url, 429, "Too Many Requests", hdrs=None, fp=None)
82+
83+
result = PackageDocsService(fetcher=rate_limited).lookup("busy-package")
84+
85+
assert result.sources == []
86+
assert result.metadata_source == "https://pypi.org/pypi/busy-package/json"
87+
assert result.note == "PyPI returned HTTP 429."
88+
89+
7990
def test_package_docs_rejects_oversized_pypi_metadata_without_unbounded_read():
8091
class LargeResp:
8192
requested_size: int | None = None

0 commit comments

Comments
 (0)