Skip to content

Commit db0df8d

Browse files
authored
Merge pull request #127 from dbwls99706/claude/project-review-analysis-jghi99
perf + seo: 15x faster lookup SDK, sync all AI discovery surfaces to the real 11-tool MCP registry
2 parents baf0968 + b43559b commit db0df8d

8 files changed

Lines changed: 234 additions & 29 deletions

File tree

AGENTS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ https://deadends.dev/country/{cc}/ for each country's entries.
4141

4242
MCP tools: lookup_error, get_error_detail, search_errors, batch_lookup,
4343
get_error_chain, list_error_domains, list_errors_by_domain,
44-
get_domain_stats, report_outcome.
44+
get_domain_stats, list_errors_by_country, get_country_summary,
45+
report_outcome.
4546

4647
MCP config:
4748
```json

CLAUDE.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ generator/
6262
validate.py # Validation: schema, business rules, HTML, cross-refs, staleness
6363
6464
mcp/
65-
server.py # MCP server (JSON-RPC over stdio) - 8 tools for AI agents
65+
server.py # MCP server (JSON-RPC over stdio) - 11 tools for AI agents
6666
6767
api/
6868
mcp.py # Vercel serverless MCP endpoint
@@ -209,7 +209,8 @@ Optional overrides for `generator/build_site.py` (defaults work out of the box):
209209

210210
## MCP Server
211211

212-
The MCP server exposes 8 read-only tools over stdio (JSON-RPC):
212+
The MCP server exposes 11 tools over stdio (JSON-RPC). All are read-only
213+
except `report_outcome`, which appends feedback to `data/outcomes/`:
213214

214215
1. `lookup_error` - Match error message against regex patterns
215216
2. `get_error_detail` - Full canon by ID
@@ -219,6 +220,9 @@ The MCP server exposes 8 read-only tools over stdio (JSON-RPC):
219220
6. `batch_lookup` - Look up multiple errors at once (max 10)
220221
7. `get_domain_stats` - Domain statistics and confidence levels
221222
8. `get_error_chain` - Traverse error transition graph
223+
9. `list_errors_by_country` - Country-scoped dead ends (ISO alpha-2 code)
224+
10. `get_country_summary` - Country-level coverage summary
225+
11. `report_outcome` - Record whether a workaround worked (write)
222226

223227
Configuration via environment variables:
224228
- `DEADENDS_PREFERRED_DOMAINS` - Comma-separated domain boost list

generator/build_site.py

Lines changed: 92 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,25 @@
3939
# Previously, non-tech domains were noindexed to preserve crawl budget.
4040
# Removed: all domains are now indexed to maximise Google coverage.
4141

42+
# Canonical MCP tool list - the single source of truth for every AI
43+
# discovery surface this builder emits (llms.txt, ai-plugin.json,
44+
# agent.json, mcp.json, server-card.json, CLAUDE.md, .cursorrules,
45+
# homepage ai-summary). Must match mcp/server.py TOOLS; enforced by
46+
# tests/test_build.py::test_mcp_tool_names_match_server.
47+
MCP_TOOL_NAMES = [
48+
"lookup_error",
49+
"get_error_detail",
50+
"list_error_domains",
51+
"search_errors",
52+
"list_errors_by_domain",
53+
"batch_lookup",
54+
"get_domain_stats",
55+
"list_errors_by_country",
56+
"get_country_summary",
57+
"get_error_chain",
58+
"report_outcome",
59+
]
60+
4261

4362
def load_canons(data_dir: Path) -> list[dict]:
4463
"""Load all ErrorCanon JSON files from the data directory."""
@@ -970,6 +989,7 @@ def build_index_page(canons: list[dict], jinja_env: Environment) -> None:
970989
mrr=mrr,
971990
precision_at_3=precision_at_3,
972991
demo_errors=demo_errors,
992+
mcp_tools=MCP_TOOL_NAMES,
973993
google_verification=GOOGLE_VERIFICATION,
974994
bing_verification=BING_VERIFICATION,
975995
)
@@ -3119,10 +3139,8 @@ def build_llms_txt(canons: list[dict]) -> None:
31193139
"python -m mcp.server # stdio mode",
31203140
"```",
31213141
"",
3122-
"Tools: `lookup_error`, `get_error_detail`, `search_errors`, "
3123-
"`batch_lookup`, `get_error_chain`, `list_error_domains`, "
3124-
"`list_errors_by_domain`, `get_domain_stats`, "
3125-
"`list_errors_by_country`, `get_country_summary`",
3142+
f"Tools ({len(MCP_TOOL_NAMES)}): "
3143+
+ ", ".join(f"`{t}`" for t in MCP_TOOL_NAMES),
31263144
"",
31273145
"### Option 2: REST API",
31283146
"",
@@ -3139,6 +3157,9 @@ def build_llms_txt(canons: list[dict]) -> None:
31393157
"",
31403158
f"- [Complete Database]({BASE_URL}/llms-full.txt): "
31413159
"All errors in plaintext (load into context window)",
3160+
f"- Per-domain slices: `{BASE_URL}/llms-full-{{domain}}.txt` "
3161+
"(bounded size - load only the domain you need, "
3162+
"e.g. `llms-full-python.txt`, `llms-full-docker.txt`)",
31423163
"",
31433164
"## How to Use",
31443165
"",
@@ -3876,7 +3897,8 @@ def build_well_known(canons: list[dict]) -> None:
38763897
"(leads_to, preceded_by, frequently_confused_with). "
38773898
"Alt: GET /llms.txt for text summary, "
38783899
"GET /api/v1/errors.ndjson for streaming, "
3879-
"or use MCP server (8 tools). No auth required."
3900+
f"or use MCP server ({len(MCP_TOOL_NAMES)} tools). "
3901+
"No auth required."
38803902
),
38813903
"auth": {"type": "none"},
38823904
"api": {
@@ -4019,6 +4041,43 @@ def build_well_known(canons: list[dict]) -> None:
40194041
"inputModes": ["text"],
40204042
"outputModes": ["text"],
40214043
},
4044+
{
4045+
"id": "list-errors-by-country",
4046+
"name": "List Errors By Country",
4047+
"description": (
4048+
"List country-scoped dead ends by ISO alpha-2 code: "
4049+
"visa, banking, legal, cultural, medical, food-safety, "
4050+
"emergency - jurisdiction knowledge generic LLM "
4051+
"training data gets wrong."
4052+
),
4053+
"tags": ["country", "visa", "legal", "travel", "jurisdiction"],
4054+
"examples": ["kr", "jp", "us", "de", "th"],
4055+
"inputModes": ["text"],
4056+
"outputModes": ["text"],
4057+
},
4058+
{
4059+
"id": "get-country-summary",
4060+
"name": "Country Coverage Summary",
4061+
"description": (
4062+
"Country-level summary: total entries, domain "
4063+
"breakdown, average fix rate, latest update. Use to "
4064+
"assess coverage before relying on country data."
4065+
),
4066+
"tags": ["country", "stats", "coverage"],
4067+
"inputModes": ["text"],
4068+
"outputModes": ["text"],
4069+
},
4070+
{
4071+
"id": "report-outcome",
4072+
"name": "Report Workaround Outcome",
4073+
"description": (
4074+
"Report whether a workaround worked or failed. "
4075+
"Feedback improves fix_success_rate for future agents."
4076+
),
4077+
"tags": ["feedback", "outcomes", "write"],
4078+
"inputModes": ["text"],
4079+
"outputModes": ["text"],
4080+
},
40224081
],
40234082
"authentication": {"schemes": ["none"]},
40244083
"documentationUrl": f"{BASE_URL}/api/v1/openapi.json",
@@ -4056,12 +4115,7 @@ def build_well_known(canons: list[dict]) -> None:
40564115
"args": ["-m", "mcp.server"],
40574116
"transport": "stdio",
40584117
},
4059-
"tools": [
4060-
"lookup_error", "get_error_detail", "search_errors",
4061-
"batch_lookup", "get_error_chain", "list_error_domains",
4062-
"list_errors_by_domain", "get_domain_stats",
4063-
"list_errors_by_country", "get_country_summary",
4064-
],
4118+
"tools": list(MCP_TOOL_NAMES),
40654119
"domains": domains,
40664120
"homepage": BASE_URL,
40674121
"repository": "https://github.com/dbwls99706/deadends.dev",
@@ -4083,7 +4137,7 @@ def build_well_known(canons: list[dict]) -> None:
40834137
f"workarounds, error chains. {len(canons)} error entries across "
40844138
f"{len(domains)} domains."
40854139
),
4086-
"version": "1.5.0",
4140+
"version": "1.6.0",
40874141
"homepage": BASE_URL,
40884142
"repository": "https://github.com/dbwls99706/deadends.dev",
40894143
"license": "MIT",
@@ -4158,6 +4212,13 @@ def build_well_known(canons: list[dict]) -> None:
41584212
"average fix rate, latest update."
41594213
),
41604214
},
4215+
{
4216+
"name": "report_outcome",
4217+
"description": (
4218+
"Report whether a workaround worked or failed - feedback "
4219+
"improves fix_success_rate for future agents."
4220+
),
4221+
},
41614222
],
41624223
"domains": domains,
41634224
}
@@ -4441,10 +4502,13 @@ def build_ai_config_files(canons: list[dict]) -> None:
44414502
| `/api/v1/index.json` | Complete error index |
44424503
| `/api/v1/stats.json` | Dataset quality metrics |
44434504
| `/api/v1/errors.ndjson` | Streaming format |
4505+
| `/api/v1/countries.json` | Country index (country-scoped dead ends) |
4506+
| `/api/v1/country/{{cc}}.json` | Per-country aggregate (ISO alpha-2) |
44444507
| `/llms.txt` | LLM-optimized summary |
44454508
| `/llms-full.txt` | Complete plaintext dump |
4509+
| `/llms-full-{{domain}}.txt` | Per-domain plaintext slice |
44464510
4447-
## MCP Server (8 tools)
4511+
## MCP Server ({len(MCP_TOOL_NAMES)} tools)
44484512
44494513
```json
44504514
{{
@@ -4458,9 +4522,7 @@ def build_ai_config_files(canons: list[dict]) -> None:
44584522
}}
44594523
```
44604524
4461-
Tools: `lookup_error`, `get_error_detail`, `search_errors`,
4462-
`batch_lookup`, `get_error_chain`, `list_error_domains`,
4463-
`list_errors_by_domain`, `get_domain_stats`
4525+
Tools: {", ".join(f"`{t}`" for t in MCP_TOOL_NAMES)}
44644526
"""
44654527
(SITE_DIR / "CLAUDE.md").write_text(claude_md, encoding="utf-8")
44664528

@@ -4488,9 +4550,11 @@ def build_ai_config_files(canons: list[dict]) -> None:
44884550
→ dead_ends: "pip install X" fails 85% when the issue is a venv mismatch
44894551
→ workaround: "python -m pip install X" in the correct venv works 90%
44904552
4491-
MCP server available with 8 tools: lookup_error, get_error_detail,
4492-
search_errors, batch_lookup, get_error_chain, list_error_domains,
4493-
list_errors_by_domain, get_domain_stats
4553+
Country-specific dead ends (visa, banking, legal, cultural, medical,
4554+
emergency) are also covered: {BASE_URL}/api/v1/country/{{cc}}.json
4555+
(ISO alpha-2 code, e.g. kr, jp, us).
4556+
4557+
MCP server available with {len(MCP_TOOL_NAMES)} tools: {", ".join(MCP_TOOL_NAMES)}
44944558
44954559
Full API docs: {BASE_URL}/api/v1/openapi.json
44964560
"""
@@ -4525,9 +4589,18 @@ def build_ai_config_files(canons: list[dict]) -> None:
45254589
| Full error data | `/api/v1/{{id}}.json` |
45264590
| All errors | `/api/v1/index.json` |
45274591
| By domain | `/api/v1/stats.json` |
4592+
| By country (visa/legal/etc.) | `/api/v1/country/{{cc}}.json` |
45284593
| Stream all | `/api/v1/errors.ndjson` |
45294594
| LLM summary | `/llms.txt` |
45304595
| Full dump | `/llms-full.txt` |
4596+
| Per-domain dump | `/llms-full-{{domain}}.txt` |
4597+
4598+
## MCP Server ({len(MCP_TOOL_NAMES)} tools)
4599+
4600+
`python -m mcp.server` (stdio) or HTTPS endpoint at
4601+
https://deadends-dev.vercel.app/api/mcp
4602+
4603+
Tools: {", ".join(f"`{t}`" for t in MCP_TOOL_NAMES)}
45314604
"""
45324605
(SITE_DIR / "AGENTS.md").write_text(agents_md, encoding="utf-8")
45334606

generator/lookup.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@
3030

3131
_CANONS_CACHE: list[dict] | None = None
3232

33+
# Compiled regex cache, index-aligned with the canons list it was built from.
34+
# The stdlib re module caches at most 512 patterns internally, so with 2000+
35+
# canons every lookup would otherwise recompile the full pattern set.
36+
_REGEX_CACHE: list["re.Pattern | None"] = []
37+
_REGEX_CACHE_SOURCE: list[dict] | None = None
38+
3339

3440
def _load_canons() -> list[dict]:
3541
"""Load all canon data (cached after first call)."""
@@ -45,6 +51,32 @@ def _load_canons() -> list[dict]:
4551
return canons
4652

4753

54+
def _get_compiled_regexes(canons: list[dict]) -> list["re.Pattern | None"]:
55+
"""Compile each canon's regex once, invalidating if the canon list changes.
56+
57+
Entries are None for canons with missing or invalid regexes (warned once
58+
at compile time instead of on every lookup).
59+
"""
60+
global _REGEX_CACHE, _REGEX_CACHE_SOURCE
61+
if _REGEX_CACHE_SOURCE is canons:
62+
return _REGEX_CACHE
63+
64+
compiled: list[re.Pattern | None] = []
65+
for canon in canons:
66+
try:
67+
compiled.append(re.compile(canon["error"]["regex"], re.IGNORECASE))
68+
except (re.error, KeyError, TypeError) as e:
69+
print(
70+
f"[lookup] skipping invalid regex in canon "
71+
f"{canon.get('id', '?')}: {e}",
72+
file=sys.stderr,
73+
)
74+
compiled.append(None)
75+
_REGEX_CACHE = compiled
76+
_REGEX_CACHE_SOURCE = canons
77+
return compiled
78+
79+
4880
def _compute_freshness(canon: dict) -> str:
4981
"""Compute freshness status based on last_confirmed date.
5082
@@ -143,13 +175,11 @@ def lookup_all(error_message: str) -> list[dict]:
143175
extracted = _extract_error_lines(error_message)
144176

145177
canons = _load_canons()
178+
patterns = _get_compiled_regexes(canons)
146179
matches = []
147180

148-
for canon in canons:
149-
try:
150-
pattern = re.compile(canon["error"]["regex"], re.IGNORECASE)
151-
except re.error as e:
152-
print(f"[lookup] skipping canon with invalid regex: {e}", file=sys.stderr)
181+
for canon, pattern in zip(canons, patterns):
182+
if pattern is None:
153183
continue
154184

155185
try:

generator/templates/index.html

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@
137137
NDJSON_STREAM={{ base_url }}/api/v1/errors.ndjson
138138
STATS_API={{ base_url }}/api/v1/stats.json
139139
FEED={{ base_url }}/feed.xml
140-
MCP_SERVER=python -m mcp.server (8 tools: lookup_error, get_error_detail, search_errors, batch_lookup, get_error_chain, list_error_domains, list_errors_by_domain, get_domain_stats)</pre>
140+
COUNTRY_INDEX={{ base_url }}/api/v1/countries.json
141+
COUNTRY_PATTERN={{ base_url }}/api/v1/country/{cc}.json
142+
MCP_SERVER=python -m mcp.server ({{ mcp_tools|length }} tools: {{ mcp_tools | join(', ') }})</pre>
141143

142144
<header>
143145
<h1>deadends.dev</h1>

scripts/collect_github_signals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def score_item(item: dict, labels: list[str]) -> tuple[int, list[str]]:
6565
if (item.get("comments") or 0) >= 2:
6666
score += 1
6767
reasons.append("has_multiple_comments")
68-
lower_labels = [l.lower() for l in labels if l]
68+
lower_labels = [label.lower() for label in labels if label]
6969
if any(k in lower_labels for k in ["bug", "fix", "regression", "confirmed"]):
7070
score += 1
7171
reasons.append("quality_labels")

tests/test_build.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,3 +460,21 @@ def test_non_string_returns_empty(self):
460460
def test_valid_signature_returns_list(self):
461461
result = _generate_variations("ModuleNotFoundError", "Module.*", "python")
462462
assert isinstance(result, list)
463+
464+
465+
class TestMcpToolNames:
466+
"""MCP_TOOL_NAMES is the single source of truth for every AI
467+
discovery surface (llms.txt, ai-plugin.json, agent.json, mcp.json,
468+
server-card.json, CLAUDE.md, .cursorrules, homepage ai-summary).
469+
It must never drift from the actual server tool registry."""
470+
471+
def test_mcp_tool_names_match_server(self):
472+
from generator.build_site import MCP_TOOL_NAMES
473+
from mcp.server import TOOLS
474+
475+
server_tools = [t["name"] for t in TOOLS]
476+
assert MCP_TOOL_NAMES == server_tools, (
477+
"generator/build_site.py MCP_TOOL_NAMES is out of sync with "
478+
"mcp/server.py TOOLS - update MCP_TOOL_NAMES so AI discovery "
479+
"files advertise the real tool set."
480+
)

0 commit comments

Comments
 (0)