|
| 1 | +"""Handler: wiki_purge — remove wiki pages that fail the current classifier. |
| 2 | +
|
| 3 | +Re-evaluates every authored wiki page against the current classifier rules |
| 4 | +and deletes the ones that would no longer be admitted. Memories in the |
| 5 | +PostgreSQL/SQLite store are left untouched — only the markdown files in |
| 6 | +~/.claude/methodology/wiki/ are removed. |
| 7 | +
|
| 8 | +Use this after tightening classifier rules, after a backfill that |
| 9 | +polluted the wiki, or whenever the wiki has drifted away from curated |
| 10 | +knowledge toward session audit artefacts. |
| 11 | +""" |
| 12 | + |
| 13 | +from __future__ import annotations |
| 14 | + |
| 15 | +from pathlib import Path |
| 16 | +from typing import Any |
| 17 | + |
| 18 | +from mcp_server.core.wiki_classifier import classify_memory |
| 19 | +from mcp_server.infrastructure.config import WIKI_ROOT |
| 20 | +from mcp_server.shared.yaml_parser import parse_yaml_frontmatter |
| 21 | + |
| 22 | +# ── Schema ───────────────────────────────────────────────────────────── |
| 23 | + |
| 24 | +schema = { |
| 25 | + "description": ( |
| 26 | + "Re-evaluate every authored wiki page against the current classifier " |
| 27 | + "rules and delete the ones that no longer pass the admission gate. " |
| 28 | + "Memories remain in the store (still available via recall); only the " |
| 29 | + "wiki markdown files are removed. Use this after a backfill that " |
| 30 | + "polluted the wiki with session artefacts (file access, URL access, " |
| 31 | + "stage reports, code reviews), or after tightening classifier rules. " |
| 32 | + "Returns keep/purge counts plus the list of purged relative paths. " |
| 33 | + "Always runs a dry-run by default — pass apply=true to actually delete." |
| 34 | + ), |
| 35 | + "inputSchema": { |
| 36 | + "type": "object", |
| 37 | + "required": [], |
| 38 | + "properties": { |
| 39 | + "apply": { |
| 40 | + "type": "boolean", |
| 41 | + "description": ( |
| 42 | + "If true, actually delete the files. If false (default), " |
| 43 | + "only report what would be purged." |
| 44 | + ), |
| 45 | + "default": False, |
| 46 | + }, |
| 47 | + "kind": { |
| 48 | + "type": "string", |
| 49 | + "description": ( |
| 50 | + "Restrict the purge to a single page-kind directory. " |
| 51 | + "Omit to scan all page kinds." |
| 52 | + ), |
| 53 | + "enum": [ |
| 54 | + "adr", |
| 55 | + "conventions", |
| 56 | + "guides", |
| 57 | + "journal", |
| 58 | + "lessons", |
| 59 | + "notes", |
| 60 | + "reference", |
| 61 | + "specs", |
| 62 | + ], |
| 63 | + "examples": ["notes", "lessons"], |
| 64 | + }, |
| 65 | + }, |
| 66 | + }, |
| 67 | +} |
| 68 | + |
| 69 | +# Directories that hold authored page-kind content. Anything else under the |
| 70 | +# wiki root (_kinds, _rules, _views, _bibliography, _triggers, .generated) |
| 71 | +# is deliberately left alone. |
| 72 | +_PAGE_DIRS: frozenset[str] = frozenset( |
| 73 | + { |
| 74 | + "adr", |
| 75 | + "conventions", |
| 76 | + "guides", |
| 77 | + "journal", |
| 78 | + "lessons", |
| 79 | + "notes", |
| 80 | + "reference", |
| 81 | + "specs", |
| 82 | + } |
| 83 | +) |
| 84 | + |
| 85 | + |
| 86 | +def _parse_tags(raw: Any) -> list[str]: |
| 87 | + """Extract a list of tag strings from frontmatter value (list or CSV).""" |
| 88 | + if isinstance(raw, list): |
| 89 | + return [str(t) for t in raw] |
| 90 | + if not isinstance(raw, str): |
| 91 | + return [] |
| 92 | + stripped = raw.strip().strip("[]") |
| 93 | + return [t.strip().strip("'\"") for t in stripped.split(",") if t.strip()] |
| 94 | + |
| 95 | + |
| 96 | +def _evaluate_page(md_path: Path) -> tuple[str | None, list[str]]: |
| 97 | + """Classify a single page. Returns (kind_or_None, tags).""" |
| 98 | + text = md_path.read_text(encoding="utf-8", errors="ignore") |
| 99 | + r = parse_yaml_frontmatter(text) |
| 100 | + tags = _parse_tags(r.meta.get("tags")) |
| 101 | + body = r.body or "" |
| 102 | + lines = body.strip().splitlines() |
| 103 | + if lines and lines[0].startswith("# "): |
| 104 | + lines = lines[1:] |
| 105 | + content = "\n".join(lines).strip() or str(r.meta.get("title", "")) |
| 106 | + return classify_memory(content, tags), tags |
| 107 | + |
| 108 | + |
| 109 | +async def handler(args: dict[str, Any] | None = None) -> dict[str, Any]: |
| 110 | + """Purge wiki pages that no longer pass the classifier.""" |
| 111 | + args = args or {} |
| 112 | + apply = bool(args.get("apply", False)) |
| 113 | + kind_filter = args.get("kind") |
| 114 | + |
| 115 | + root = Path(WIKI_ROOT).expanduser() |
| 116 | + if not root.exists(): |
| 117 | + return {"error": f"wiki root does not exist: {root}"} |
| 118 | + |
| 119 | + target_dirs = {kind_filter} if kind_filter else _PAGE_DIRS |
| 120 | + kept: list[str] = [] |
| 121 | + purged: list[str] = [] |
| 122 | + errors: list[str] = [] |
| 123 | + |
| 124 | + for md in root.rglob("*.md"): |
| 125 | + rel = md.relative_to(root) |
| 126 | + if rel.parts[0] not in target_dirs: |
| 127 | + continue |
| 128 | + try: |
| 129 | + decision, _tags = _evaluate_page(md) |
| 130 | + if decision is None: |
| 131 | + purged.append(str(rel)) |
| 132 | + if apply: |
| 133 | + md.unlink() |
| 134 | + else: |
| 135 | + kept.append(str(rel)) |
| 136 | + except (OSError, ValueError) as exc: |
| 137 | + errors.append(f"{rel}: {exc}") |
| 138 | + |
| 139 | + # Clean up empty directories after an apply so the tree stays tidy. |
| 140 | + if apply and purged: |
| 141 | + for dir_path in sorted(root.rglob("*"), key=lambda p: -len(p.parts)): |
| 142 | + if ( |
| 143 | + dir_path.is_dir() |
| 144 | + and not any(dir_path.iterdir()) |
| 145 | + and not dir_path.name.startswith("_") |
| 146 | + and dir_path != root |
| 147 | + ): |
| 148 | + try: |
| 149 | + dir_path.rmdir() |
| 150 | + except OSError: |
| 151 | + pass |
| 152 | + |
| 153 | + return { |
| 154 | + "applied": apply, |
| 155 | + "scanned": len(kept) + len(purged), |
| 156 | + "kept": len(kept), |
| 157 | + "purged": len(purged), |
| 158 | + "purged_paths": purged, |
| 159 | + "errors": errors, |
| 160 | + "root": str(root), |
| 161 | + } |
0 commit comments