Skip to content

Commit 61e0994

Browse files
feat: cache docs and expose PyPI package docs lookup
1 parent 8f6736c commit 61e0994

10 files changed

Lines changed: 505 additions & 8 deletions

File tree

README.md

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,13 @@ Contributor commands and validation steps live in
164164

165165
## Tools
166166

167-
The server currently exposes four MCP tools:
167+
The server currently exposes five MCP tools:
168168

169169
| Tool | Description |
170170
|------|-------------|
171171
| `search_docs` | Search Python stdlib docs by query. Supports symbol lookup (`asyncio.TaskGroup`), module search (`json`), and free-text search. Returns ranked hits with BM25 scoring and snippet excerpts. |
172-
| `get_docs` | Retrieve a specific documentation page or section by slug and optional anchor. Returns markdown content with budget-enforced truncation and pagination. |
172+
| `get_docs` | Retrieve a specific documentation page or section by slug and optional anchor. Returns markdown content with budget-enforced truncation and pagination. Retrieved results are cached on disk by Python docs version and request identity. |
173+
| `lookup_package_docs` | Look up official PyPI package metadata and return package-declared documentation/homepage/source URLs. This is a controlled PyPI metadata lookup, not generic web search. |
173174
| `list_versions` | List all indexed Python versions with metadata. |
174175
| `detect_python_version` | Detect the user's local Python version and report whether it matches an indexed documentation version. |
175176

@@ -184,10 +185,43 @@ Use this server when you need:
184185

185186
Use a generic fetcher or broader docs MCP when you need:
186187

187-
- third-party package docs outside the Python stdlib
188+
- arbitrary third-party package content beyond package-declared PyPI metadata
188189
- arbitrary web pages
189190
- mixed-source research across many frameworks
190191

192+
## Retrieved docs cache
193+
194+
`get_docs` responses are cached across MCP client/server restarts in the
195+
platform cache directory:
196+
197+
```text
198+
<platform cache dir>/mcp-python-docs/retrieved-docs-cache.sqlite3
199+
```
200+
201+
The cache stores completed `get_docs` results, including page/section content,
202+
for the resolved Python docs version plus request identity (`slug`, optional
203+
`anchor`, `max_chars`, and `start_index`). Cache misses fall back to the normal
204+
local index retrieval path and then write the retrieved result.
205+
206+
Cache entries are also scoped to a fingerprint of the local `index.db` file
207+
(path, size, and modification timestamp). If you rebuild or replace the local
208+
docs index, older entries are ignored automatically instead of being returned
209+
for the new index generation. Deleting `retrieved-docs-cache.sqlite3` is safe;
210+
it only removes cached retrieval results, not the docs index.
211+
212+
## PyPI package docs lookup
213+
214+
`lookup_package_docs` queries the official PyPI JSON API documented at
215+
`https://docs.pypi.org/api/json/` (`GET /pypi/<project>/json`) and returns only
216+
sources declared in that package's PyPI metadata: the PyPI project URL,
217+
`docs_url`, `home_page`, and allowlisted `project_urls` labels such as
218+
Documentation, Homepage, Source, Repository, Issues, Changelog, and Release
219+
Notes.
220+
221+
The tool makes the trust boundary explicit with
222+
`trust_boundary="pypi-declared-metadata"`. It does not crawl pages, perform web
223+
search, or silently fall back to unofficial community mirrors.
224+
191225
## Diagnostics
192226

193227
Check the local environment:

src/mcp_server_python_docs/app_context.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from pathlib import Path
1212

1313
from mcp_server_python_docs.services.content import ContentService
14+
from mcp_server_python_docs.services.package_docs import PackageDocsService
15+
from mcp_server_python_docs.services.persistent_cache import PersistentDocsCache
1416
from mcp_server_python_docs.services.search import SearchService
1517
from mcp_server_python_docs.services.version import VersionService
1618

@@ -24,6 +26,8 @@ class AppContext:
2426
search_service: SearchService
2527
content_service: ContentService
2628
version_service: VersionService
29+
package_docs_service: PackageDocsService = field(default_factory=PackageDocsService)
30+
persistent_docs_cache: PersistentDocsCache | None = None
2731
synonyms: dict[str, list[str]] = field(default_factory=dict)
2832
detected_python_version: str | None = None
2933
detected_python_source: str | None = None

src/mcp_server_python_docs/models.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,46 @@ class DetectPythonVersionResult(BaseModel):
160160
is_default: bool = Field(
161161
description="Whether this detected version is being used as the default for get_docs"
162162
)
163+
164+
165+
# --- lookup_package_docs models ---
166+
167+
168+
class PackageDocsInput(BaseModel):
169+
"""Input parameters for lookup_package_docs tool."""
170+
171+
package: str = Field(
172+
min_length=1,
173+
max_length=214,
174+
description="PyPI package/project name (e.g. 'requests').",
175+
)
176+
177+
178+
class PackageDocsSource(BaseModel):
179+
"""A package-declared documentation or project source URL."""
180+
181+
label: str = Field(description="Label from PyPI metadata or a normalized core metadata field")
182+
url: str = Field(description="HTTP(S) URL declared by the package on PyPI")
183+
kind: str = Field(description="Source category, such as docs, homepage, source, or pypi")
184+
declared_by: str = Field(description="Where this source declaration came from")
185+
186+
187+
class PackageDocsResult(BaseModel):
188+
"""Output from lookup_package_docs tool."""
189+
190+
package: str = Field(description="Canonical package name returned by PyPI when available")
191+
version: str = Field(description="Latest version reported by PyPI metadata")
192+
summary: str = Field(default="", description="Package summary from PyPI metadata")
193+
metadata_source: str = Field(description="Official PyPI JSON API URL used for lookup")
194+
trust_boundary: str = Field(
195+
default="pypi-declared-metadata",
196+
description="Indicates results are limited to PyPI/project-declared metadata",
197+
)
198+
sources: list[PackageDocsSource] = Field(
199+
default_factory=list,
200+
description="Package-declared PyPI, documentation, homepage, and source URLs",
201+
)
202+
note: str | None = Field(
203+
default=None,
204+
description="Controlled-scope note, for example skipped labels or not-found details",
205+
)

src/mcp_server_python_docs/server.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,12 @@
2929
DetectPythonVersionResult,
3030
GetDocsResult,
3131
ListVersionsResult,
32+
PackageDocsResult,
3233
SearchDocsResult,
3334
)
3435
from mcp_server_python_docs.services.content import ContentService
36+
from mcp_server_python_docs.services.package_docs import PackageDocsService
37+
from mcp_server_python_docs.services.persistent_cache import PersistentDocsCache
3538
from mcp_server_python_docs.services.search import SearchService
3639
from mcp_server_python_docs.services.version import VersionService
3740
from mcp_server_python_docs.storage.db import get_readonly_connection
@@ -86,15 +89,21 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
8689

8790
# Open read-only connection (STOR-06, STOR-07)
8891
db = get_readonly_connection(index_path)
92+
persistent_docs_cache: PersistentDocsCache | None = None
8993

9094
try:
9195
# Check FTS5 (STOR-08)
9296
_assert_fts5(db)
9397

9498
# Construct service instances (Phase 5 — service layer wiring)
99+
persistent_docs_cache = PersistentDocsCache(
100+
cache_path=cache_dir / "retrieved-docs-cache.sqlite3",
101+
index_path=index_path,
102+
)
95103
search_svc = SearchService(db, synonyms)
96-
content_svc = ContentService(db)
104+
content_svc = ContentService(db, persistent_cache=persistent_docs_cache)
97105
version_svc = VersionService(db)
106+
package_docs_svc = PackageDocsService()
98107

99108
# Detect user's Python version and match to indexed versions
100109
detected_ver, detected_src = detect_python_version()
@@ -119,6 +128,8 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
119128
search_service=search_svc,
120129
content_service=content_svc,
121130
version_service=version_svc,
131+
package_docs_service=package_docs_svc,
132+
persistent_docs_cache=persistent_docs_cache,
122133
detected_python_version=matched,
123134
detected_python_source=detected_src,
124135
)
@@ -133,6 +144,11 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
133144
pass
134145
raise
135146
finally:
147+
if persistent_docs_cache is not None:
148+
try:
149+
persistent_docs_cache.close()
150+
except Exception:
151+
pass
136152
db.close()
137153

138154

@@ -143,6 +159,12 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
143159
idempotentHint=True,
144160
openWorldHint=False,
145161
)
162+
_PYPI_TOOL_ANNOTATIONS = ToolAnnotations(
163+
readOnlyHint=True,
164+
destructiveHint=False,
165+
idempotentHint=True,
166+
openWorldHint=True,
167+
)
146168

147169
SearchQueryParam = Annotated[
148170
str,
@@ -184,6 +206,10 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
184206
int,
185207
Field(ge=0, description="Start position for pagination"),
186208
]
209+
PackageParam = Annotated[
210+
str,
211+
Field(min_length=1, max_length=214, description="PyPI package/project name"),
212+
]
187213

188214

189215
def create_server() -> FastMCP:
@@ -239,6 +265,23 @@ def get_docs(
239265
logger.exception("Unexpected error in get_docs")
240266
raise ToolError(f"Internal error: {type(e).__name__}")
241267

268+
@mcp.tool(annotations=_PYPI_TOOL_ANNOTATIONS)
269+
def lookup_package_docs(
270+
package: PackageParam,
271+
ctx: Context = None, # type: ignore[assignment]
272+
) -> PackageDocsResult:
273+
"""Look up package-declared docs/homepage/source URLs via official PyPI metadata.
274+
275+
This is not generic web search: it only queries PyPI's JSON API and
276+
returns official PyPI metadata plus package-declared project URLs.
277+
"""
278+
app_ctx: AppContext = ctx.request_context.lifespan_context
279+
try:
280+
return app_ctx.package_docs_service.lookup(package)
281+
except Exception as e:
282+
logger.exception("Unexpected error in lookup_package_docs")
283+
raise ToolError(f"Internal error: {type(e).__name__}")
284+
242285
@mcp.tool(annotations=_TOOL_ANNOTATIONS)
243286
def list_versions(
244287
ctx: Context = None, # type: ignore[assignment]

src/mcp_server_python_docs/services/content.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from mcp_server_python_docs.retrieval.budget import apply_budget
1414
from mcp_server_python_docs.services.cache import create_section_cache
1515
from mcp_server_python_docs.services.observability import log_tool_call
16+
from mcp_server_python_docs.services.persistent_cache import PersistentDocsCache
1617
from mcp_server_python_docs.services.version_resolution import resolve_version_strict
1718

1819

@@ -23,9 +24,14 @@ class ContentService:
2324
When omitted, returns the full page with truncation/pagination.
2425
"""
2526

26-
def __init__(self, db: sqlite3.Connection) -> None:
27+
def __init__(
28+
self,
29+
db: sqlite3.Connection,
30+
persistent_cache: PersistentDocsCache | None = None,
31+
) -> None:
2732
self._db = db
2833
self._get_section = create_section_cache(db)
34+
self._persistent_cache = persistent_cache
2935

3036
def _resolve_version(self, version: str | None) -> str:
3137
"""Resolve version to a concrete version string using shared resolution logic.
@@ -47,6 +53,17 @@ def get_docs(
4753
"""Retrieve documentation content by slug, optionally narrowed to a section by anchor."""
4854
resolved_version = self._resolve_version(version)
4955

56+
if self._persistent_cache is not None:
57+
cached = self._persistent_cache.get(
58+
version=resolved_version,
59+
slug=slug,
60+
anchor=anchor,
61+
max_chars=max_chars,
62+
start_index=start_index,
63+
)
64+
if cached is not None:
65+
return cached
66+
5067
# Find the document
5168
doc_row = self._db.execute(
5269
"""
@@ -119,7 +136,7 @@ def get_docs(
119136
full_text, max_chars, start_index
120137
)
121138

122-
return GetDocsResult(
139+
result = GetDocsResult(
123140
content=truncated_text,
124141
slug=slug,
125142
title=title,
@@ -129,3 +146,10 @@ def get_docs(
129146
truncated=is_truncated,
130147
next_start_index=next_idx,
131148
)
149+
if self._persistent_cache is not None:
150+
self._persistent_cache.put(
151+
result=result,
152+
max_chars=max_chars,
153+
start_index=start_index,
154+
)
155+
return result

0 commit comments

Comments
 (0)