Merge pull request #127 from dbwls99706/claude/project-review-analysis-jghi99

dbwls99706 · web-flow · commit db0df8d33cfb · 2026-07-03T00:19:40.000+09:00
perf + seo: 15x faster lookup SDK, sync all AI discovery surfaces to the real 11-tool MCP registry
diff --git a/AGENTS.md b/AGENTS.md
@@ -41,7 +41,8 @@ https://deadends.dev/country/{cc}/ for each country's entries.
 
 MCP tools: lookup_error, get_error_detail, search_errors, batch_lookup,
 get_error_chain, list_error_domains, list_errors_by_domain,
-get_domain_stats, report_outcome.
+get_domain_stats, list_errors_by_country, get_country_summary,
+report_outcome.
 
 MCP config:
 ```json
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -62,7 +62,7 @@ generator/
   validate.py          # Validation: schema, business rules, HTML, cross-refs, staleness
 
 mcp/
-  server.py            # MCP server (JSON-RPC over stdio) - 8 tools for AI agents
+  server.py            # MCP server (JSON-RPC over stdio) - 11 tools for AI agents
 
 api/
   mcp.py               # Vercel serverless MCP endpoint
@@ -209,7 +209,8 @@ Optional overrides for `generator/build_site.py` (defaults work out of the box):
 
 ## MCP Server
 
-The MCP server exposes 8 read-only tools over stdio (JSON-RPC):
+The MCP server exposes 11 tools over stdio (JSON-RPC). All are read-only
+except `report_outcome`, which appends feedback to `data/outcomes/`:
 
 1. `lookup_error` - Match error message against regex patterns
 2. `get_error_detail` - Full canon by ID
@@ -219,6 +220,9 @@ The MCP server exposes 8 read-only tools over stdio (JSON-RPC):
 6. `batch_lookup` - Look up multiple errors at once (max 10)
 7. `get_domain_stats` - Domain statistics and confidence levels
 8. `get_error_chain` - Traverse error transition graph
+9. `list_errors_by_country` - Country-scoped dead ends (ISO alpha-2 code)
+10. `get_country_summary` - Country-level coverage summary
+11. `report_outcome` - Record whether a workaround worked (write)
 
 Configuration via environment variables:
 - `DEADENDS_PREFERRED_DOMAINS` - Comma-separated domain boost list
diff --git a/generator/build_site.py b/generator/build_site.py
@@ -39,6 +39,25 @@
 # Previously, non-tech domains were noindexed to preserve crawl budget.
 # Removed: all domains are now indexed to maximise Google coverage.
 
+# Canonical MCP tool list - the single source of truth for every AI
+# discovery surface this builder emits (llms.txt, ai-plugin.json,
+# agent.json, mcp.json, server-card.json, CLAUDE.md, .cursorrules,
+# homepage ai-summary). Must match mcp/server.py TOOLS; enforced by
+# tests/test_build.py::test_mcp_tool_names_match_server.
+MCP_TOOL_NAMES = [
+    "lookup_error",
+    "get_error_detail",
+    "list_error_domains",
+    "search_errors",
+    "list_errors_by_domain",
+    "batch_lookup",
+    "get_domain_stats",
+    "list_errors_by_country",
+    "get_country_summary",
+    "get_error_chain",
+    "report_outcome",
+]
+
 
 def load_canons(data_dir: Path) -> list[dict]:
     """Load all ErrorCanon JSON files from the data directory."""
@@ -970,6 +989,7 @@ def build_index_page(canons: list[dict], jinja_env: Environment) -> None:
         mrr=mrr,
         precision_at_3=precision_at_3,
         demo_errors=demo_errors,
+        mcp_tools=MCP_TOOL_NAMES,
         google_verification=GOOGLE_VERIFICATION,
         bing_verification=BING_VERIFICATION,
     )
@@ -3119,10 +3139,8 @@ def build_llms_txt(canons: list[dict]) -> None:
         "python -m mcp.server  # stdio mode",
         "```",
         "",
-        "Tools: `lookup_error`, `get_error_detail`, `search_errors`, "
-        "`batch_lookup`, `get_error_chain`, `list_error_domains`, "
-        "`list_errors_by_domain`, `get_domain_stats`, "
-        "`list_errors_by_country`, `get_country_summary`",
+        f"Tools ({len(MCP_TOOL_NAMES)}): "
+        + ", ".join(f"`{t}`" for t in MCP_TOOL_NAMES),
         "",
         "### Option 2: REST API",
         "",
@@ -3139,6 +3157,9 @@ def build_llms_txt(canons: list[dict]) -> None:
         "",
         f"- [Complete Database]({BASE_URL}/llms-full.txt): "
         "All errors in plaintext (load into context window)",
+        f"- Per-domain slices: `{BASE_URL}/llms-full-{{domain}}.txt` "
+        "(bounded size - load only the domain you need, "
+        "e.g. `llms-full-python.txt`, `llms-full-docker.txt`)",
         "",
         "## How to Use",
         "",
@@ -3876,7 +3897,8 @@ def build_well_known(canons: list[dict]) -> None:
             "(leads_to, preceded_by, frequently_confused_with). "
             "Alt: GET /llms.txt for text summary, "
             "GET /api/v1/errors.ndjson for streaming, "
-            "or use MCP server (8 tools). No auth required."
+            f"or use MCP server ({len(MCP_TOOL_NAMES)} tools). "
+            "No auth required."
         ),
         "auth": {"type": "none"},
         "api": {
@@ -4019,6 +4041,43 @@ def build_well_known(canons: list[dict]) -> None:
                 "inputModes": ["text"],
                 "outputModes": ["text"],
             },
+            {
+                "id": "list-errors-by-country",
+                "name": "List Errors By Country",
+                "description": (
+                    "List country-scoped dead ends by ISO alpha-2 code: "
+                    "visa, banking, legal, cultural, medical, food-safety, "
+                    "emergency - jurisdiction knowledge generic LLM "
+                    "training data gets wrong."
+                ),
+                "tags": ["country", "visa", "legal", "travel", "jurisdiction"],
+                "examples": ["kr", "jp", "us", "de", "th"],
+                "inputModes": ["text"],
+                "outputModes": ["text"],
+            },
+            {
+                "id": "get-country-summary",
+                "name": "Country Coverage Summary",
+                "description": (
+                    "Country-level summary: total entries, domain "
+                    "breakdown, average fix rate, latest update. Use to "
+                    "assess coverage before relying on country data."
+                ),
+                "tags": ["country", "stats", "coverage"],
+                "inputModes": ["text"],
+                "outputModes": ["text"],
+            },
+            {
+                "id": "report-outcome",
+                "name": "Report Workaround Outcome",
+                "description": (
+                    "Report whether a workaround worked or failed. "
+                    "Feedback improves fix_success_rate for future agents."
+                ),
+                "tags": ["feedback", "outcomes", "write"],
+                "inputModes": ["text"],
+                "outputModes": ["text"],
+            },
         ],
         "authentication": {"schemes": ["none"]},
         "documentationUrl": f"{BASE_URL}/api/v1/openapi.json",
@@ -4056,12 +4115,7 @@ def build_well_known(canons: list[dict]) -> None:
             "args": ["-m", "mcp.server"],
             "transport": "stdio",
         },
-        "tools": [
-            "lookup_error", "get_error_detail", "search_errors",
-            "batch_lookup", "get_error_chain", "list_error_domains",
-            "list_errors_by_domain", "get_domain_stats",
-            "list_errors_by_country", "get_country_summary",
-        ],
+        "tools": list(MCP_TOOL_NAMES),
         "domains": domains,
         "homepage": BASE_URL,
         "repository": "https://github.com/dbwls99706/deadends.dev",
@@ -4083,7 +4137,7 @@ def build_well_known(canons: list[dict]) -> None:
             f"workarounds, error chains. {len(canons)} error entries across "
             f"{len(domains)} domains."
         ),
-        "version": "1.5.0",
+        "version": "1.6.0",
         "homepage": BASE_URL,
         "repository": "https://github.com/dbwls99706/deadends.dev",
         "license": "MIT",
@@ -4158,6 +4212,13 @@ def build_well_known(canons: list[dict]) -> None:
                     "average fix rate, latest update."
                 ),
             },
+            {
+                "name": "report_outcome",
+                "description": (
+                    "Report whether a workaround worked or failed - feedback "
+                    "improves fix_success_rate for future agents."
+                ),
+            },
         ],
         "domains": domains,
     }
@@ -4441,10 +4502,13 @@ def build_ai_config_files(canons: list[dict]) -> None:
 | `/api/v1/index.json` | Complete error index |
 | `/api/v1/stats.json` | Dataset quality metrics |
 | `/api/v1/errors.ndjson` | Streaming format |
+| `/api/v1/countries.json` | Country index (country-scoped dead ends) |
+| `/api/v1/country/{{cc}}.json` | Per-country aggregate (ISO alpha-2) |
 | `/llms.txt` | LLM-optimized summary |
 | `/llms-full.txt` | Complete plaintext dump |
+| `/llms-full-{{domain}}.txt` | Per-domain plaintext slice |
 
-## MCP Server (8 tools)
+## MCP Server ({len(MCP_TOOL_NAMES)} tools)
 
 ```json
 {{
@@ -4458,9 +4522,7 @@ def build_ai_config_files(canons: list[dict]) -> None:
 }}
 ```
 
-Tools: `lookup_error`, `get_error_detail`, `search_errors`,
-`batch_lookup`, `get_error_chain`, `list_error_domains`,
-`list_errors_by_domain`, `get_domain_stats`
+Tools: {", ".join(f"`{t}`" for t in MCP_TOOL_NAMES)}
 """
     (SITE_DIR / "CLAUDE.md").write_text(claude_md, encoding="utf-8")
 
@@ -4488,9 +4550,11 @@ def build_ai_config_files(canons: list[dict]) -> None:
 → dead_ends: "pip install X" fails 85% when the issue is a venv mismatch
 → workaround: "python -m pip install X" in the correct venv works 90%
 
-MCP server available with 8 tools: lookup_error, get_error_detail,
-search_errors, batch_lookup, get_error_chain, list_error_domains,
-list_errors_by_domain, get_domain_stats
+Country-specific dead ends (visa, banking, legal, cultural, medical,
+emergency) are also covered: {BASE_URL}/api/v1/country/{{cc}}.json
+(ISO alpha-2 code, e.g. kr, jp, us).
+
+MCP server available with {len(MCP_TOOL_NAMES)} tools: {", ".join(MCP_TOOL_NAMES)}
 
 Full API docs: {BASE_URL}/api/v1/openapi.json
 """
@@ -4525,9 +4589,18 @@ def build_ai_config_files(canons: list[dict]) -> None:
 | Full error data | `/api/v1/{{id}}.json` |
 | All errors | `/api/v1/index.json` |
 | By domain | `/api/v1/stats.json` |
+| By country (visa/legal/etc.) | `/api/v1/country/{{cc}}.json` |
 | Stream all | `/api/v1/errors.ndjson` |
 | LLM summary | `/llms.txt` |
 | Full dump | `/llms-full.txt` |
+| Per-domain dump | `/llms-full-{{domain}}.txt` |
+
+## MCP Server ({len(MCP_TOOL_NAMES)} tools)
+
+`python -m mcp.server` (stdio) or HTTPS endpoint at
+https://deadends-dev.vercel.app/api/mcp
+
+Tools: {", ".join(f"`{t}`" for t in MCP_TOOL_NAMES)}
 """
     (SITE_DIR / "AGENTS.md").write_text(agents_md, encoding="utf-8")
 
diff --git a/generator/lookup.py b/generator/lookup.py
@@ -30,6 +30,12 @@
 
 _CANONS_CACHE: list[dict] | None = None
 
+# Compiled regex cache, index-aligned with the canons list it was built from.
+# The stdlib re module caches at most 512 patterns internally, so with 2000+
+# canons every lookup would otherwise recompile the full pattern set.
+_REGEX_CACHE: list["re.Pattern | None"] = []
+_REGEX_CACHE_SOURCE: list[dict] | None = None
+
 
 def _load_canons() -> list[dict]:
     """Load all canon data (cached after first call)."""
@@ -45,6 +51,32 @@ def _load_canons() -> list[dict]:
     return canons
 
 
+def _get_compiled_regexes(canons: list[dict]) -> list["re.Pattern | None"]:
+    """Compile each canon's regex once, invalidating if the canon list changes.
+
+    Entries are None for canons with missing or invalid regexes (warned once
+    at compile time instead of on every lookup).
+    """
+    global _REGEX_CACHE, _REGEX_CACHE_SOURCE
+    if _REGEX_CACHE_SOURCE is canons:
+        return _REGEX_CACHE
+
+    compiled: list[re.Pattern | None] = []
+    for canon in canons:
+        try:
+            compiled.append(re.compile(canon["error"]["regex"], re.IGNORECASE))
+        except (re.error, KeyError, TypeError) as e:
+            print(
+                f"[lookup] skipping invalid regex in canon "
+                f"{canon.get('id', '?')}: {e}",
+                file=sys.stderr,
+            )
+            compiled.append(None)
+    _REGEX_CACHE = compiled
+    _REGEX_CACHE_SOURCE = canons
+    return compiled
+
+
 def _compute_freshness(canon: dict) -> str:
     """Compute freshness status based on last_confirmed date.
 
@@ -143,13 +175,11 @@ def lookup_all(error_message: str) -> list[dict]:
     extracted = _extract_error_lines(error_message)
 
     canons = _load_canons()
+    patterns = _get_compiled_regexes(canons)
     matches = []
 
-    for canon in canons:
-        try:
-            pattern = re.compile(canon["error"]["regex"], re.IGNORECASE)
-        except re.error as e:
-            print(f"[lookup] skipping canon with invalid regex: {e}", file=sys.stderr)
+    for canon, pattern in zip(canons, patterns):
+        if pattern is None:
             continue
 
         try:
diff --git a/generator/templates/index.html b/generator/templates/index.html
@@ -137,7 +137,9 @@
 NDJSON_STREAM={{ base_url }}/api/v1/errors.ndjson
 STATS_API={{ base_url }}/api/v1/stats.json
 FEED={{ base_url }}/feed.xml
-MCP_SERVER=python -m mcp.server (8 tools: lookup_error, get_error_detail, search_errors, batch_lookup, get_error_chain, list_error_domains, list_errors_by_domain, get_domain_stats)</pre>
+COUNTRY_INDEX={{ base_url }}/api/v1/countries.json
+COUNTRY_PATTERN={{ base_url }}/api/v1/country/{cc}.json
+MCP_SERVER=python -m mcp.server ({{ mcp_tools|length }} tools: {{ mcp_tools | join(', ') }})</pre>
 
   <header>
     <h1>deadends.dev</h1>
diff --git a/scripts/collect_github_signals.py b/scripts/collect_github_signals.py
@@ -65,7 +65,7 @@ def score_item(item: dict, labels: list[str]) -> tuple[int, list[str]]:
     if (item.get("comments") or 0) >= 2:
         score += 1
         reasons.append("has_multiple_comments")
-    lower_labels = [l.lower() for l in labels if l]
+    lower_labels = [label.lower() for label in labels if label]
     if any(k in lower_labels for k in ["bug", "fix", "regression", "confirmed"]):
         score += 1
         reasons.append("quality_labels")
diff --git a/tests/test_build.py b/tests/test_build.py
@@ -460,3 +460,21 @@ def test_non_string_returns_empty(self):
     def test_valid_signature_returns_list(self):
         result = _generate_variations("ModuleNotFoundError", "Module.*", "python")
         assert isinstance(result, list)
+
+
+class TestMcpToolNames:
+    """MCP_TOOL_NAMES is the single source of truth for every AI
+    discovery surface (llms.txt, ai-plugin.json, agent.json, mcp.json,
+    server-card.json, CLAUDE.md, .cursorrules, homepage ai-summary).
+    It must never drift from the actual server tool registry."""
+
+    def test_mcp_tool_names_match_server(self):
+        from generator.build_site import MCP_TOOL_NAMES
+        from mcp.server import TOOLS
+
+        server_tools = [t["name"] for t in TOOLS]
+        assert MCP_TOOL_NAMES == server_tools, (
+            "generator/build_site.py MCP_TOOL_NAMES is out of sync with "
+            "mcp/server.py TOOLS - update MCP_TOOL_NAMES so AI discovery "
+            "files advertise the real tool set."
+        )
diff --git a/tests/test_lookup_regex_cache.py b/tests/test_lookup_regex_cache.py