|
| 1 | +"""MCP subprocess smoke tests for get_docs and persistent cache behavior.""" |
| 2 | +from __future__ import annotations |
| 3 | + |
| 4 | +import sqlite3 |
| 5 | +import subprocess |
| 6 | +import sys |
| 7 | +from pathlib import Path |
| 8 | + |
| 9 | +from mcp_server_python_docs.storage.db import bootstrap_schema, get_readwrite_connection |
| 10 | +from tests.test_stdio_smoke import ( |
| 11 | + _assert_protocol_on_stdout_only, |
| 12 | + _find_response, |
| 13 | + _isolated_cache_env, |
| 14 | + _make_notification, |
| 15 | + _make_request, |
| 16 | +) |
| 17 | + |
| 18 | + |
| 19 | +def _create_contentful_json_index(cache_dir: Path) -> Path: |
| 20 | + """Create a deterministic contentful docs index for subprocess smoke tests.""" |
| 21 | + cache_dir.mkdir(parents=True, exist_ok=True) |
| 22 | + db_path = cache_dir / "index.db" |
| 23 | + conn = get_readwrite_connection(db_path) |
| 24 | + bootstrap_schema(conn) |
| 25 | + conn.execute( |
| 26 | + "INSERT INTO doc_sets (source, version, language, label, is_default, base_url) " |
| 27 | + "VALUES ('python-docs', '3.13', 'en', 'Python 3.13', 1, " |
| 28 | + "'https://docs.python.org/3.13/')" |
| 29 | + ) |
| 30 | + doc_set_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0] |
| 31 | + conn.execute( |
| 32 | + "INSERT INTO symbols (doc_set_id, qualified_name, normalized_name, " |
| 33 | + "module, symbol_type, uri, anchor) " |
| 34 | + "VALUES (?, 'json.dumps', 'json_dumps', 'json', 'function', " |
| 35 | + "'library/json.html#json.dumps', 'json.dumps')", |
| 36 | + (doc_set_id,), |
| 37 | + ) |
| 38 | + conn.execute( |
| 39 | + "INSERT INTO documents (doc_set_id, uri, slug, title, content_text, char_count) " |
| 40 | + "VALUES (?, 'library/json.html', 'library/json.html', " |
| 41 | + "'json — JSON encoder and decoder', " |
| 42 | + "'The json module exposes APIs for encoding and decoding JSON data.', 64)", |
| 43 | + (doc_set_id,), |
| 44 | + ) |
| 45 | + doc_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0] |
| 46 | + conn.executemany( |
| 47 | + "INSERT INTO sections (document_id, uri, anchor, heading, level, ordinal, " |
| 48 | + "content_text, char_count) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", |
| 49 | + [ |
| 50 | + ( |
| 51 | + doc_id, |
| 52 | + "library/json.html#top", |
| 53 | + "top", |
| 54 | + "json — JSON encoder and decoder", |
| 55 | + 1, |
| 56 | + 1, |
| 57 | + "The json module exposes APIs for encoding and decoding JSON data.", |
| 58 | + 64, |
| 59 | + ), |
| 60 | + ( |
| 61 | + doc_id, |
| 62 | + "library/json.html#json.dumps", |
| 63 | + "json.dumps", |
| 64 | + "json.dumps", |
| 65 | + 2, |
| 66 | + 2, |
| 67 | + "Serialize obj to a JSON formatted str using a conversion table.", |
| 68 | + 62, |
| 69 | + ), |
| 70 | + ], |
| 71 | + ) |
| 72 | + conn.commit() |
| 73 | + conn.execute("INSERT INTO symbols_fts(symbols_fts) VALUES('rebuild')") |
| 74 | + conn.execute("INSERT INTO sections_fts(sections_fts) VALUES('rebuild')") |
| 75 | + conn.execute("INSERT INTO examples_fts(examples_fts) VALUES('rebuild')") |
| 76 | + conn.commit() |
| 77 | + conn.close() |
| 78 | + return db_path |
| 79 | + |
| 80 | + |
| 81 | +def _run_server(stdin_data: bytes, env: dict[str, str]) -> subprocess.CompletedProcess: |
| 82 | + return subprocess.run( |
| 83 | + [sys.executable, "-m", "mcp_server_python_docs", "serve"], |
| 84 | + input=stdin_data, |
| 85 | + capture_output=True, |
| 86 | + timeout=15, |
| 87 | + env=env, |
| 88 | + ) |
| 89 | + |
| 90 | + |
| 91 | +def _initialized_tool_call(name: str, arguments: dict, req_id: int = 2) -> bytes: |
| 92 | + return ( |
| 93 | + _make_request( |
| 94 | + "initialize", |
| 95 | + { |
| 96 | + "protocolVersion": "2024-11-05", |
| 97 | + "capabilities": {}, |
| 98 | + "clientInfo": {"name": "test", "version": "0.1"}, |
| 99 | + }, |
| 100 | + req_id=1, |
| 101 | + ) |
| 102 | + + _make_notification("notifications/initialized") |
| 103 | + + _make_request("tools/call", {"name": name, "arguments": arguments}, req_id=req_id) |
| 104 | + ) |
| 105 | + |
| 106 | + |
| 107 | +def _tool_structured_content(result: subprocess.CompletedProcess, req_id: int = 2) -> dict: |
| 108 | + responses = _assert_protocol_on_stdout_only(result) |
| 109 | + response = _find_response(responses, req_id) |
| 110 | + assert response is not None, f"Missing tools/call response: {responses}" |
| 111 | + assert "result" in response, response |
| 112 | + assert response["result"].get("isError") is not True, response |
| 113 | + return response["result"]["structuredContent"] |
| 114 | + |
| 115 | + |
| 116 | +def _tool_error_text(result: subprocess.CompletedProcess, req_id: int = 2) -> str: |
| 117 | + responses = _assert_protocol_on_stdout_only(result) |
| 118 | + response = _find_response(responses, req_id) |
| 119 | + assert response is not None, f"Missing tools/call response: {responses}" |
| 120 | + assert response["result"].get("isError") is True, response |
| 121 | + return "\n".join(item.get("text", "") for item in response["result"].get("content", [])) |
| 122 | + |
| 123 | + |
| 124 | +def test_get_docs_cache_restart_and_corrupt_cache_fallback(tmp_path: Path): |
| 125 | + """Exercise get_docs through real MCP stdio with isolated contentful cache.""" |
| 126 | + env, cache_dir = _isolated_cache_env(tmp_path) |
| 127 | + _create_contentful_json_index(cache_dir) |
| 128 | + cache_path = cache_dir / "retrieved-docs-cache.sqlite3" |
| 129 | + |
| 130 | + full_page = _tool_structured_content( |
| 131 | + _run_server( |
| 132 | + _initialized_tool_call( |
| 133 | + "get_docs", |
| 134 | + {"slug": "library/json.html", "version": "3.13"}, |
| 135 | + ), |
| 136 | + env, |
| 137 | + ) |
| 138 | + ) |
| 139 | + assert full_page["slug"] == "library/json.html" |
| 140 | + assert full_page["anchor"] is None |
| 141 | + assert "json module" in full_page["content"] |
| 142 | + |
| 143 | + with sqlite3.connect(cache_path) as conn: |
| 144 | + rows = conn.execute( |
| 145 | + "SELECT version, slug, anchor, max_chars, start_index, length(result_json) " |
| 146 | + "FROM retrieved_docs_cache" |
| 147 | + ).fetchall() |
| 148 | + assert len(rows) == 1 |
| 149 | + version, slug, anchor, max_chars, start_index, result_json_length = rows[0] |
| 150 | + assert (version, slug, anchor, max_chars, start_index) == ( |
| 151 | + "3.13", |
| 152 | + "library/json.html", |
| 153 | + "\x00mcp-python-docs:no-anchor\x00", |
| 154 | + 8000, |
| 155 | + 0, |
| 156 | + ) |
| 157 | + assert result_json_length > 0 |
| 158 | + |
| 159 | + restarted_page = _tool_structured_content( |
| 160 | + _run_server( |
| 161 | + _initialized_tool_call( |
| 162 | + "get_docs", |
| 163 | + {"slug": "library/json.html", "version": "3.13"}, |
| 164 | + ), |
| 165 | + env, |
| 166 | + ) |
| 167 | + ) |
| 168 | + assert restarted_page == full_page |
| 169 | + |
| 170 | + section = _tool_structured_content( |
| 171 | + _run_server( |
| 172 | + _initialized_tool_call( |
| 173 | + "get_docs", |
| 174 | + { |
| 175 | + "slug": "library/json.html", |
| 176 | + "version": "3.13", |
| 177 | + "anchor": "json.dumps", |
| 178 | + }, |
| 179 | + ), |
| 180 | + env, |
| 181 | + ) |
| 182 | + ) |
| 183 | + assert section["anchor"] == "json.dumps" |
| 184 | + assert "Serialize obj" in section["content"] |
| 185 | + |
| 186 | + empty_anchor_error = _tool_error_text( |
| 187 | + _run_server( |
| 188 | + _initialized_tool_call( |
| 189 | + "get_docs", |
| 190 | + {"slug": "library/json.html", "version": "3.13", "anchor": ""}, |
| 191 | + ), |
| 192 | + env, |
| 193 | + ) |
| 194 | + ) |
| 195 | + assert "Section '' not found" in empty_anchor_error |
| 196 | + |
| 197 | + cache_path.write_bytes(b"not a sqlite database") |
| 198 | + after_corrupt_cache = _tool_structured_content( |
| 199 | + _run_server( |
| 200 | + _initialized_tool_call( |
| 201 | + "get_docs", |
| 202 | + {"slug": "library/json.html", "version": "3.13"}, |
| 203 | + ), |
| 204 | + env, |
| 205 | + ) |
| 206 | + ) |
| 207 | + assert after_corrupt_cache == full_page |
0 commit comments