Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ https://deadends.dev/country/{cc}/ for each country's entries.

MCP tools: lookup_error, get_error_detail, search_errors, batch_lookup,
get_error_chain, list_error_domains, list_errors_by_domain,
get_domain_stats, report_outcome.
get_domain_stats, list_errors_by_country, get_country_summary,
report_outcome.

MCP config:
```json
Expand Down
8 changes: 6 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ generator/
validate.py # Validation: schema, business rules, HTML, cross-refs, staleness

mcp/
server.py # MCP server (JSON-RPC over stdio) - 8 tools for AI agents
server.py # MCP server (JSON-RPC over stdio) - 11 tools for AI agents

api/
mcp.py # Vercel serverless MCP endpoint
Expand Down Expand Up @@ -209,7 +209,8 @@ Optional overrides for `generator/build_site.py` (defaults work out of the box):

## MCP Server

The MCP server exposes 8 read-only tools over stdio (JSON-RPC):
The MCP server exposes 11 tools over stdio (JSON-RPC). All are read-only
except `report_outcome`, which appends feedback to `data/outcomes/`:

1. `lookup_error` - Match error message against regex patterns
2. `get_error_detail` - Full canon by ID
Expand All @@ -219,6 +220,9 @@ The MCP server exposes 8 read-only tools over stdio (JSON-RPC):
6. `batch_lookup` - Look up multiple errors at once (max 10)
7. `get_domain_stats` - Domain statistics and confidence levels
8. `get_error_chain` - Traverse error transition graph
9. `list_errors_by_country` - Country-scoped dead ends (ISO alpha-2 code)
10. `get_country_summary` - Country-level coverage summary
11. `report_outcome` - Record whether a workaround worked (write)

Configuration via environment variables:
- `DEADENDS_PREFERRED_DOMAINS` - Comma-separated domain boost list
Expand Down
111 changes: 92 additions & 19 deletions generator/build_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@
# Previously, non-tech domains were noindexed to preserve crawl budget.
# Removed: all domains are now indexed to maximise Google coverage.

# Canonical MCP tool list - the single source of truth for every AI
# discovery surface this builder emits (llms.txt, ai-plugin.json,
# agent.json, mcp.json, server-card.json, CLAUDE.md, .cursorrules,
# homepage ai-summary). Must match mcp/server.py TOOLS; enforced by
# tests/test_build.py::test_mcp_tool_names_match_server.
MCP_TOOL_NAMES = [
"lookup_error",
"get_error_detail",
"list_error_domains",
"search_errors",
"list_errors_by_domain",
"batch_lookup",
"get_domain_stats",
"list_errors_by_country",
"get_country_summary",
"get_error_chain",
"report_outcome",
]


def load_canons(data_dir: Path) -> list[dict]:
"""Load all ErrorCanon JSON files from the data directory."""
Expand Down Expand Up @@ -970,6 +989,7 @@ def build_index_page(canons: list[dict], jinja_env: Environment) -> None:
mrr=mrr,
precision_at_3=precision_at_3,
demo_errors=demo_errors,
mcp_tools=MCP_TOOL_NAMES,
google_verification=GOOGLE_VERIFICATION,
bing_verification=BING_VERIFICATION,
)
Expand Down Expand Up @@ -3119,10 +3139,8 @@ def build_llms_txt(canons: list[dict]) -> None:
"python -m mcp.server # stdio mode",
"```",
"",
"Tools: `lookup_error`, `get_error_detail`, `search_errors`, "
"`batch_lookup`, `get_error_chain`, `list_error_domains`, "
"`list_errors_by_domain`, `get_domain_stats`, "
"`list_errors_by_country`, `get_country_summary`",
f"Tools ({len(MCP_TOOL_NAMES)}): "
+ ", ".join(f"`{t}`" for t in MCP_TOOL_NAMES),
"",
"### Option 2: REST API",
"",
Expand All @@ -3139,6 +3157,9 @@ def build_llms_txt(canons: list[dict]) -> None:
"",
f"- [Complete Database]({BASE_URL}/llms-full.txt): "
"All errors in plaintext (load into context window)",
f"- Per-domain slices: `{BASE_URL}/llms-full-{{domain}}.txt` "
"(bounded size - load only the domain you need, "
"e.g. `llms-full-python.txt`, `llms-full-docker.txt`)",
"",
"## How to Use",
"",
Expand Down Expand Up @@ -3876,7 +3897,8 @@ def build_well_known(canons: list[dict]) -> None:
"(leads_to, preceded_by, frequently_confused_with). "
"Alt: GET /llms.txt for text summary, "
"GET /api/v1/errors.ndjson for streaming, "
"or use MCP server (8 tools). No auth required."
f"or use MCP server ({len(MCP_TOOL_NAMES)} tools). "
"No auth required."
),
"auth": {"type": "none"},
"api": {
Expand Down Expand Up @@ -4019,6 +4041,43 @@ def build_well_known(canons: list[dict]) -> None:
"inputModes": ["text"],
"outputModes": ["text"],
},
{
"id": "list-errors-by-country",
"name": "List Errors By Country",
"description": (
"List country-scoped dead ends by ISO alpha-2 code: "
"visa, banking, legal, cultural, medical, food-safety, "
"emergency - jurisdiction knowledge generic LLM "
"training data gets wrong."
),
"tags": ["country", "visa", "legal", "travel", "jurisdiction"],
"examples": ["kr", "jp", "us", "de", "th"],
"inputModes": ["text"],
"outputModes": ["text"],
},
{
"id": "get-country-summary",
"name": "Country Coverage Summary",
"description": (
"Country-level summary: total entries, domain "
"breakdown, average fix rate, latest update. Use to "
"assess coverage before relying on country data."
),
"tags": ["country", "stats", "coverage"],
"inputModes": ["text"],
"outputModes": ["text"],
},
{
"id": "report-outcome",
"name": "Report Workaround Outcome",
"description": (
"Report whether a workaround worked or failed. "
"Feedback improves fix_success_rate for future agents."
),
"tags": ["feedback", "outcomes", "write"],
"inputModes": ["text"],
"outputModes": ["text"],
},
],
"authentication": {"schemes": ["none"]},
"documentationUrl": f"{BASE_URL}/api/v1/openapi.json",
Expand Down Expand Up @@ -4056,12 +4115,7 @@ def build_well_known(canons: list[dict]) -> None:
"args": ["-m", "mcp.server"],
"transport": "stdio",
},
"tools": [
"lookup_error", "get_error_detail", "search_errors",
"batch_lookup", "get_error_chain", "list_error_domains",
"list_errors_by_domain", "get_domain_stats",
"list_errors_by_country", "get_country_summary",
],
"tools": list(MCP_TOOL_NAMES),
"domains": domains,
"homepage": BASE_URL,
"repository": "https://github.com/dbwls99706/deadends.dev",
Expand All @@ -4083,7 +4137,7 @@ def build_well_known(canons: list[dict]) -> None:
f"workarounds, error chains. {len(canons)} error entries across "
f"{len(domains)} domains."
),
"version": "1.5.0",
"version": "1.6.0",
"homepage": BASE_URL,
"repository": "https://github.com/dbwls99706/deadends.dev",
"license": "MIT",
Expand Down Expand Up @@ -4158,6 +4212,13 @@ def build_well_known(canons: list[dict]) -> None:
"average fix rate, latest update."
),
},
{
"name": "report_outcome",
"description": (
"Report whether a workaround worked or failed - feedback "
"improves fix_success_rate for future agents."
),
},
],
"domains": domains,
}
Expand Down Expand Up @@ -4441,10 +4502,13 @@ def build_ai_config_files(canons: list[dict]) -> None:
| `/api/v1/index.json` | Complete error index |
| `/api/v1/stats.json` | Dataset quality metrics |
| `/api/v1/errors.ndjson` | Streaming format |
| `/api/v1/countries.json` | Country index (country-scoped dead ends) |
| `/api/v1/country/{{cc}}.json` | Per-country aggregate (ISO alpha-2) |
| `/llms.txt` | LLM-optimized summary |
| `/llms-full.txt` | Complete plaintext dump |
| `/llms-full-{{domain}}.txt` | Per-domain plaintext slice |

## MCP Server (8 tools)
## MCP Server ({len(MCP_TOOL_NAMES)} tools)

```json
{{
Expand All @@ -4458,9 +4522,7 @@ def build_ai_config_files(canons: list[dict]) -> None:
}}
```

Tools: `lookup_error`, `get_error_detail`, `search_errors`,
`batch_lookup`, `get_error_chain`, `list_error_domains`,
`list_errors_by_domain`, `get_domain_stats`
Tools: {", ".join(f"`{t}`" for t in MCP_TOOL_NAMES)}
"""
(SITE_DIR / "CLAUDE.md").write_text(claude_md, encoding="utf-8")

Expand Down Expand Up @@ -4488,9 +4550,11 @@ def build_ai_config_files(canons: list[dict]) -> None:
→ dead_ends: "pip install X" fails 85% when the issue is a venv mismatch
→ workaround: "python -m pip install X" in the correct venv works 90%

MCP server available with 8 tools: lookup_error, get_error_detail,
search_errors, batch_lookup, get_error_chain, list_error_domains,
list_errors_by_domain, get_domain_stats
Country-specific dead ends (visa, banking, legal, cultural, medical,
emergency) are also covered: {BASE_URL}/api/v1/country/{{cc}}.json
(ISO alpha-2 code, e.g. kr, jp, us).

MCP server available with {len(MCP_TOOL_NAMES)} tools: {", ".join(MCP_TOOL_NAMES)}

Full API docs: {BASE_URL}/api/v1/openapi.json
"""
Expand Down Expand Up @@ -4525,9 +4589,18 @@ def build_ai_config_files(canons: list[dict]) -> None:
| Full error data | `/api/v1/{{id}}.json` |
| All errors | `/api/v1/index.json` |
| By domain | `/api/v1/stats.json` |
| By country (visa/legal/etc.) | `/api/v1/country/{{cc}}.json` |
| Stream all | `/api/v1/errors.ndjson` |
| LLM summary | `/llms.txt` |
| Full dump | `/llms-full.txt` |
| Per-domain dump | `/llms-full-{{domain}}.txt` |

## MCP Server ({len(MCP_TOOL_NAMES)} tools)

`python -m mcp.server` (stdio) or HTTPS endpoint at
https://deadends-dev.vercel.app/api/mcp

Tools: {", ".join(f"`{t}`" for t in MCP_TOOL_NAMES)}
"""
(SITE_DIR / "AGENTS.md").write_text(agents_md, encoding="utf-8")

Expand Down
40 changes: 35 additions & 5 deletions generator/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@

_CANONS_CACHE: list[dict] | None = None

# Compiled regex cache, index-aligned with the canons list it was built from.
# The stdlib re module caches at most 512 patterns internally, so with 2000+
# canons every lookup would otherwise recompile the full pattern set.
_REGEX_CACHE: list["re.Pattern | None"] = []
_REGEX_CACHE_SOURCE: list[dict] | None = None


def _load_canons() -> list[dict]:
"""Load all canon data (cached after first call)."""
Expand All @@ -45,6 +51,32 @@ def _load_canons() -> list[dict]:
return canons


def _get_compiled_regexes(canons: list[dict]) -> list["re.Pattern | None"]:
"""Compile each canon's regex once, invalidating if the canon list changes.

Entries are None for canons with missing or invalid regexes (warned once
at compile time instead of on every lookup).
"""
global _REGEX_CACHE, _REGEX_CACHE_SOURCE
if _REGEX_CACHE_SOURCE is canons:
return _REGEX_CACHE

compiled: list[re.Pattern | None] = []
for canon in canons:
try:
compiled.append(re.compile(canon["error"]["regex"], re.IGNORECASE))
except (re.error, KeyError, TypeError) as e:
print(
f"[lookup] skipping invalid regex in canon "
f"{canon.get('id', '?')}: {e}",
file=sys.stderr,
)
compiled.append(None)
_REGEX_CACHE = compiled
_REGEX_CACHE_SOURCE = canons
return compiled


def _compute_freshness(canon: dict) -> str:
"""Compute freshness status based on last_confirmed date.

Expand Down Expand Up @@ -143,13 +175,11 @@ def lookup_all(error_message: str) -> list[dict]:
extracted = _extract_error_lines(error_message)

canons = _load_canons()
patterns = _get_compiled_regexes(canons)
matches = []

for canon in canons:
try:
pattern = re.compile(canon["error"]["regex"], re.IGNORECASE)
except re.error as e:
print(f"[lookup] skipping canon with invalid regex: {e}", file=sys.stderr)
for canon, pattern in zip(canons, patterns):
if pattern is None:
continue

try:
Expand Down
4 changes: 3 additions & 1 deletion generator/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@
NDJSON_STREAM={{ base_url }}/api/v1/errors.ndjson
STATS_API={{ base_url }}/api/v1/stats.json
FEED={{ base_url }}/feed.xml
MCP_SERVER=python -m mcp.server (8 tools: lookup_error, get_error_detail, search_errors, batch_lookup, get_error_chain, list_error_domains, list_errors_by_domain, get_domain_stats)</pre>
COUNTRY_INDEX={{ base_url }}/api/v1/countries.json
COUNTRY_PATTERN={{ base_url }}/api/v1/country/{cc}.json
MCP_SERVER=python -m mcp.server ({{ mcp_tools|length }} tools: {{ mcp_tools | join(', ') }})</pre>

<header>
<h1>deadends.dev</h1>
Expand Down
2 changes: 1 addition & 1 deletion scripts/collect_github_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def score_item(item: dict, labels: list[str]) -> tuple[int, list[str]]:
if (item.get("comments") or 0) >= 2:
score += 1
reasons.append("has_multiple_comments")
lower_labels = [l.lower() for l in labels if l]
lower_labels = [label.lower() for label in labels if label]
if any(k in lower_labels for k in ["bug", "fix", "regression", "confirmed"]):
score += 1
reasons.append("quality_labels")
Expand Down
18 changes: 18 additions & 0 deletions tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,21 @@ def test_non_string_returns_empty(self):
def test_valid_signature_returns_list(self):
result = _generate_variations("ModuleNotFoundError", "Module.*", "python")
assert isinstance(result, list)


class TestMcpToolNames:
"""MCP_TOOL_NAMES is the single source of truth for every AI
discovery surface (llms.txt, ai-plugin.json, agent.json, mcp.json,
server-card.json, CLAUDE.md, .cursorrules, homepage ai-summary).
It must never drift from the actual server tool registry."""

def test_mcp_tool_names_match_server(self):
from generator.build_site import MCP_TOOL_NAMES
from mcp.server import TOOLS

server_tools = [t["name"] for t in TOOLS]
assert MCP_TOOL_NAMES == server_tools, (
"generator/build_site.py MCP_TOOL_NAMES is out of sync with "
"mcp/server.py TOOLS - update MCP_TOOL_NAMES so AI discovery "
"files advertise the real tool set."
)
Loading
Loading