Skip to content

Commit d339d27

Browse files
fix: restore stdio transport stdout
1 parent f4bd817 commit d339d27

3 files changed

Lines changed: 262 additions & 25 deletions

File tree

src/mcp_server_python_docs/__main__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ def serve() -> None:
9898
# print to stdout during MCP communication.
9999
os.dup2(saved_stdout_fd, 1)
100100
os.close(saved_stdout_fd)
101+
# FastMCP writes JSON-RPC frames through Python's stdout object.
102+
# Restore it as well as fd 1, otherwise protocol frames are emitted
103+
# on stderr because module import redirected sys.stdout there.
104+
sys.stdout = sys.__stdout__
101105

102106
try:
103107
mcp_server.run(transport="stdio")
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
"""MCP subprocess smoke tests for get_docs and persistent cache behavior."""
2+
from __future__ import annotations
3+
4+
import sqlite3
5+
import subprocess
6+
import sys
7+
from pathlib import Path
8+
9+
from mcp_server_python_docs.storage.db import bootstrap_schema, get_readwrite_connection
10+
from tests.test_stdio_smoke import (
11+
_assert_protocol_on_stdout_only,
12+
_find_response,
13+
_isolated_cache_env,
14+
_make_notification,
15+
_make_request,
16+
)
17+
18+
19+
def _create_contentful_json_index(cache_dir: Path) -> Path:
20+
"""Create a deterministic contentful docs index for subprocess smoke tests."""
21+
cache_dir.mkdir(parents=True, exist_ok=True)
22+
db_path = cache_dir / "index.db"
23+
conn = get_readwrite_connection(db_path)
24+
bootstrap_schema(conn)
25+
conn.execute(
26+
"INSERT INTO doc_sets (source, version, language, label, is_default, base_url) "
27+
"VALUES ('python-docs', '3.13', 'en', 'Python 3.13', 1, "
28+
"'https://docs.python.org/3.13/')"
29+
)
30+
doc_set_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
31+
conn.execute(
32+
"INSERT INTO symbols (doc_set_id, qualified_name, normalized_name, "
33+
"module, symbol_type, uri, anchor) "
34+
"VALUES (?, 'json.dumps', 'json_dumps', 'json', 'function', "
35+
"'library/json.html#json.dumps', 'json.dumps')",
36+
(doc_set_id,),
37+
)
38+
conn.execute(
39+
"INSERT INTO documents (doc_set_id, uri, slug, title, content_text, char_count) "
40+
"VALUES (?, 'library/json.html', 'library/json.html', "
41+
"'json — JSON encoder and decoder', "
42+
"'The json module exposes APIs for encoding and decoding JSON data.', 64)",
43+
(doc_set_id,),
44+
)
45+
doc_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
46+
conn.executemany(
47+
"INSERT INTO sections (document_id, uri, anchor, heading, level, ordinal, "
48+
"content_text, char_count) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
49+
[
50+
(
51+
doc_id,
52+
"library/json.html#top",
53+
"top",
54+
"json — JSON encoder and decoder",
55+
1,
56+
1,
57+
"The json module exposes APIs for encoding and decoding JSON data.",
58+
64,
59+
),
60+
(
61+
doc_id,
62+
"library/json.html#json.dumps",
63+
"json.dumps",
64+
"json.dumps",
65+
2,
66+
2,
67+
"Serialize obj to a JSON formatted str using a conversion table.",
68+
62,
69+
),
70+
],
71+
)
72+
conn.commit()
73+
conn.execute("INSERT INTO symbols_fts(symbols_fts) VALUES('rebuild')")
74+
conn.execute("INSERT INTO sections_fts(sections_fts) VALUES('rebuild')")
75+
conn.execute("INSERT INTO examples_fts(examples_fts) VALUES('rebuild')")
76+
conn.commit()
77+
conn.close()
78+
return db_path
79+
80+
81+
def _run_server(stdin_data: bytes, env: dict[str, str]) -> subprocess.CompletedProcess:
82+
return subprocess.run(
83+
[sys.executable, "-m", "mcp_server_python_docs", "serve"],
84+
input=stdin_data,
85+
capture_output=True,
86+
timeout=15,
87+
env=env,
88+
)
89+
90+
91+
def _initialized_tool_call(name: str, arguments: dict, req_id: int = 2) -> bytes:
92+
return (
93+
_make_request(
94+
"initialize",
95+
{
96+
"protocolVersion": "2024-11-05",
97+
"capabilities": {},
98+
"clientInfo": {"name": "test", "version": "0.1"},
99+
},
100+
req_id=1,
101+
)
102+
+ _make_notification("notifications/initialized")
103+
+ _make_request("tools/call", {"name": name, "arguments": arguments}, req_id=req_id)
104+
)
105+
106+
107+
def _tool_structured_content(result: subprocess.CompletedProcess, req_id: int = 2) -> dict:
108+
responses = _assert_protocol_on_stdout_only(result)
109+
response = _find_response(responses, req_id)
110+
assert response is not None, f"Missing tools/call response: {responses}"
111+
assert "result" in response, response
112+
assert response["result"].get("isError") is not True, response
113+
return response["result"]["structuredContent"]
114+
115+
116+
def _tool_error_text(result: subprocess.CompletedProcess, req_id: int = 2) -> str:
117+
responses = _assert_protocol_on_stdout_only(result)
118+
response = _find_response(responses, req_id)
119+
assert response is not None, f"Missing tools/call response: {responses}"
120+
assert response["result"].get("isError") is True, response
121+
return "\n".join(item.get("text", "") for item in response["result"].get("content", []))
122+
123+
124+
def test_get_docs_cache_restart_and_corrupt_cache_fallback(tmp_path: Path):
125+
"""Exercise get_docs through real MCP stdio with isolated contentful cache."""
126+
env, cache_dir = _isolated_cache_env(tmp_path)
127+
_create_contentful_json_index(cache_dir)
128+
cache_path = cache_dir / "retrieved-docs-cache.sqlite3"
129+
130+
full_page = _tool_structured_content(
131+
_run_server(
132+
_initialized_tool_call(
133+
"get_docs",
134+
{"slug": "library/json.html", "version": "3.13"},
135+
),
136+
env,
137+
)
138+
)
139+
assert full_page["slug"] == "library/json.html"
140+
assert full_page["anchor"] is None
141+
assert "json module" in full_page["content"]
142+
143+
with sqlite3.connect(cache_path) as conn:
144+
rows = conn.execute(
145+
"SELECT version, slug, anchor, max_chars, start_index, length(result_json) "
146+
"FROM retrieved_docs_cache"
147+
).fetchall()
148+
assert len(rows) == 1
149+
version, slug, anchor, max_chars, start_index, result_json_length = rows[0]
150+
assert (version, slug, anchor, max_chars, start_index) == (
151+
"3.13",
152+
"library/json.html",
153+
"\x00mcp-python-docs:no-anchor\x00",
154+
8000,
155+
0,
156+
)
157+
assert result_json_length > 0
158+
159+
restarted_page = _tool_structured_content(
160+
_run_server(
161+
_initialized_tool_call(
162+
"get_docs",
163+
{"slug": "library/json.html", "version": "3.13"},
164+
),
165+
env,
166+
)
167+
)
168+
assert restarted_page == full_page
169+
170+
section = _tool_structured_content(
171+
_run_server(
172+
_initialized_tool_call(
173+
"get_docs",
174+
{
175+
"slug": "library/json.html",
176+
"version": "3.13",
177+
"anchor": "json.dumps",
178+
},
179+
),
180+
env,
181+
)
182+
)
183+
assert section["anchor"] == "json.dumps"
184+
assert "Serialize obj" in section["content"]
185+
186+
empty_anchor_error = _tool_error_text(
187+
_run_server(
188+
_initialized_tool_call(
189+
"get_docs",
190+
{"slug": "library/json.html", "version": "3.13", "anchor": ""},
191+
),
192+
env,
193+
)
194+
)
195+
assert "Section '' not found" in empty_anchor_error
196+
197+
cache_path.write_bytes(b"not a sqlite database")
198+
after_corrupt_cache = _tool_structured_content(
199+
_run_server(
200+
_initialized_tool_call(
201+
"get_docs",
202+
{"slug": "library/json.html", "version": "3.13"},
203+
),
204+
env,
205+
)
206+
)
207+
assert after_corrupt_cache == full_page

tests/test_stdio_smoke.py

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import os
1717
import subprocess
1818
import sys
19+
import time
1920
from pathlib import Path
2021

2122
import pytest
@@ -131,6 +132,36 @@ def _read_responses(stdout_data: bytes) -> list[dict]:
131132
return responses
132133

133134

135+
def _jsonrpc_frames(stream_data: bytes) -> list[dict]:
136+
"""Return JSON-RPC objects parsed from a stream, ignoring log lines."""
137+
frames: list[dict] = []
138+
for line in stream_data.split(b"\n"):
139+
line = line.strip()
140+
if not line:
141+
continue
142+
try:
143+
parsed = json.loads(line)
144+
except json.JSONDecodeError:
145+
continue
146+
if isinstance(parsed, dict) and parsed.get("jsonrpc") == "2.0":
147+
frames.append(parsed)
148+
return frames
149+
150+
151+
def _assert_protocol_on_stdout_only(result: subprocess.CompletedProcess) -> list[dict]:
152+
"""Assert JSON-RPC protocol frames are emitted only on stdout."""
153+
responses = _read_responses(result.stdout)
154+
assert responses, f"No JSON-RPC responses on stdout; stderr was: {result.stderr!r}"
155+
156+
for resp in responses:
157+
assert "_raw" not in resp, f"Non-JSON stdout pollution: {resp.get('_raw')}"
158+
assert resp.get("jsonrpc") == "2.0", f"Missing JSON-RPC frame on stdout: {resp}"
159+
160+
stderr_frames = _jsonrpc_frames(result.stderr)
161+
assert stderr_frames == [], f"JSON-RPC frames leaked to stderr: {stderr_frames}"
162+
return responses
163+
164+
134165
def _find_response(responses: list[dict], req_id: int) -> dict | None:
135166
"""Find a JSON-RPC response matching the given request id."""
136167
for resp in responses:
@@ -152,14 +183,23 @@ def _setup_test_env(self, tmp_path):
152183
def _run_server_with_input(
153184
self, stdin_data: bytes, timeout: int = 15,
154185
) -> subprocess.CompletedProcess:
155-
"""Run the server subprocess with the given stdin and return the result."""
156-
return subprocess.run(
186+
"""Run the server subprocess with line-paced stdin and return the result."""
187+
proc = subprocess.Popen(
157188
[sys.executable, "-m", "mcp_server_python_docs", "serve"],
158-
input=stdin_data,
159-
capture_output=True,
160-
timeout=timeout,
189+
stdin=subprocess.PIPE,
190+
stdout=subprocess.PIPE,
191+
stderr=subprocess.PIPE,
161192
env=self.env,
162193
)
194+
assert proc.stdin is not None
195+
for index, line in enumerate(stdin_data.splitlines(keepends=True)):
196+
proc.stdin.write(line)
197+
proc.stdin.flush()
198+
time.sleep(0.3 if index == 0 else 0.05)
199+
proc.stdin.close()
200+
proc.stdin = None
201+
stdout, stderr = proc.communicate(timeout=timeout)
202+
return subprocess.CompletedProcess(proc.args, proc.returncode, stdout, stderr)
163203

164204
def test_server_lists_tools_no_stdout_pollution(self):
165205
"""Server returns tool list and stdout has no non-JSON-RPC bytes."""
@@ -175,16 +215,11 @@ def test_server_lists_tools_no_stdout_pollution(self):
175215

176216
result = self._run_server_with_input(stdin_data)
177217

178-
responses = _read_responses(result.stdout)
179-
180-
# Every line on stdout must be valid JSON-RPC
181-
for resp in responses:
182-
assert "_raw" not in resp, f"Non-JSON stdout pollution: {resp.get('_raw')}"
218+
responses = _assert_protocol_on_stdout_only(result)
183219

184220
# Find the tools/list response
185221
tools_resp = _find_response(responses, 2)
186-
if tools_resp is None:
187-
pytest.skip("Server exited before returning tools/list response")
222+
assert tools_resp is not None, f"Server exited before tools/list response: {responses}"
188223
assert "result" in tools_resp, f"tools/list error: {tools_resp}"
189224
tool_names = [t["name"] for t in tools_resp["result"].get("tools", [])]
190225
assert "search_docs" in tool_names
@@ -209,16 +244,11 @@ def test_search_docs_round_trip(self):
209244
)
210245

211246
result = self._run_server_with_input(stdin_data)
212-
responses = _read_responses(result.stdout)
213-
214-
# No stdout pollution
215-
for resp in responses:
216-
assert "_raw" not in resp, f"Stdout pollution: {resp.get('_raw')}"
247+
responses = _assert_protocol_on_stdout_only(result)
217248

218249
# Find the tools/call response
219250
call_resp = _find_response(responses, 2)
220-
if call_resp is None:
221-
pytest.skip("Server exited before returning search_docs response")
251+
assert call_resp is not None, f"Server exited before search_docs response: {responses}"
222252
assert "result" in call_resp, f"tools/call error: {call_resp}"
223253
content = call_resp["result"].get("content", [])
224254
assert len(content) >= 1, "search_docs returned no content"
@@ -239,14 +269,10 @@ def test_list_versions_round_trip(self):
239269
)
240270

241271
result = self._run_server_with_input(stdin_data)
242-
responses = _read_responses(result.stdout)
243-
244-
for resp in responses:
245-
assert "_raw" not in resp, f"Stdout pollution: {resp.get('_raw')}"
272+
responses = _assert_protocol_on_stdout_only(result)
246273

247274
call_resp = _find_response(responses, 2)
248-
if call_resp is None:
249-
pytest.skip("Server exited before returning list_versions response")
275+
assert call_resp is not None, f"Server exited before list_versions response: {responses}"
250276
assert "result" in call_resp, f"tools/call error: {call_resp}"
251277

252278
def test_all_stdout_is_valid_jsonrpc(self):

0 commit comments

Comments
 (0)