feat(roadmap): add gen-roadmap --check-github ground-truth validation (#800)

Dumbris · claude · web-flow · commit b2003bac03fa · 2026-07-02T18:50:52.000+03:00
The 2026-07-02 audit found roadmap.yaml chronically drifts from reality: tasks stayed `todo`/`in_review` while their PRs merged, an epic claimed `in_review` with no PR anywhere, and spec: links can point at nonexistent dirs producing false progress badges. `--check` only validates ROADMAP.md freshness against roadmap.yaml — nothing validated roadmap.yaml itself. Add a `--check-github` mode that cross-checks roadmap.yaml against ground truth: - PR status: MERGED but not done → ERROR; CLOSED-unmerged but in_progress/ in_review → ERROR; OPEN but done → ERROR; OPEN but todo → WARN; dangling ref → ERROR. Handles "#786" and full /pull/ URLs and lists; caches per PR. - spec: links must resolve to a real specs/ dir (ERROR); a spec shared by two distinct epics → WARN (badge double-count). - status sanity: in_review with no pr: → WARN (any item); in_progress with no pr: and no children → WARN (leaf only, umbrella epics delegate PRs); done epic with a non-done child → WARN. - exit 0 (no errors) / 1 (any error, or --strict with warnings) / 2 (gh missing or unauthenticated — offline spec/status checks still run). `--check` is untouched. Wire a non-blocking (continue-on-error) advisory step into .github/workflows/roadmap.yml. Docs updated in the generator template and roadmap.yaml header; ROADMAP.md regenerated. Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
diff --git a/.github/workflows/roadmap.yml b/.github/workflows/roadmap.yml
@@ -24,6 +24,7 @@ on:
 
 permissions:
   contents: read
+  pull-requests: read
 
 jobs:
   roadmap-check:
@@ -39,3 +40,12 @@ jobs:
         run: pip install pyyaml
       - name: Verify ROADMAP.md is up to date
         run: python3 scripts/gen-roadmap.py --check
+      # Non-blocking: report roadmap.yaml drift vs live GitHub PR state, dangling
+      # spec: links, and status sanity. `gh` is preinstalled on GitHub runners;
+      # GH_TOKEN authenticates it. continue-on-error so drift is surfaced in the
+      # log without failing the build (roadmap.yaml is hand-maintained).
+      - name: Cross-check roadmap.yaml vs GitHub (advisory)
+        continue-on-error: true
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: python3 scripts/gen-roadmap.py --check-github
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -13,7 +13,10 @@ The roadmap models cross-spec **epics → tasks** with a dependency DAG, executi
 ```bash
 python3 scripts/gen-roadmap.py     # writes ROADMAP.md
 scripts/gen-roadmap                # convenience wrapper (same thing)
-python3 scripts/gen-roadmap.py --check   # CI canary: fail if stale
+python3 scripts/gen-roadmap.py --check          # CI canary: fail if ROADMAP.md is stale
+python3 scripts/gen-roadmap.py --check-github   # cross-check statuses vs live GitHub PR state,
+                                                # spec links, and status sanity (add --strict
+                                                # to fail on warnings; needs an authenticated gh)
 ```
 
 ## roadmap.yaml schema (short form)
diff --git a/roadmap.yaml b/roadmap.yaml
@@ -12,6 +12,11 @@
 #     python3 scripts/gen-roadmap.py        # writes ROADMAP.md
 #     # or: scripts/gen-roadmap              # same thing (wrapper)
 #
+# Validate this file against ground truth (does not write ROADMAP.md):
+#     python3 scripts/gen-roadmap.py --check-github   # PR state vs status,
+#         # dangling spec: links, status sanity. Add --strict to fail on
+#         # warnings. Needs an authenticated `gh`; exits 2 if gh is missing.
+#
 # ── Schema ──────────────────────────────────────────────────────────────────
 # version:  schema version (int).
 # epics:    list of epic objects. Each epic:
diff --git a/scripts/gen-roadmap.py b/scripts/gen-roadmap.py
@@ -17,19 +17,28 @@
 is fully generated and safe to overwrite on every run.
 
 Usage:
-    python3 scripts/gen-roadmap.py [--check]
+    python3 scripts/gen-roadmap.py [--check | --check-github [--strict]]
 
-    --check   Exit non-zero if ROADMAP.md is out of date (does not write).
-              Useful as a CI canary.
+    --check         Exit non-zero if ROADMAP.md is out of date (does not write).
+                    Useful as a CI canary.
+    --check-github  Cross-check roadmap.yaml against ground truth (does not write):
+                    live GitHub PR state (via `gh`), spec: links resolving to
+                    real specs/ dirs, and status sanity. Reports ERROR/WARN and
+                    exits 1 on any ERROR, 0 otherwise; 2 if `gh` is unavailable.
+    --strict        With --check-github, promote warnings to errors for the exit
+                    code (report is unchanged).
 
 Pure stdlib + PyYAML (already used by scripts/check-settings-parity.py).
 Idempotent: running twice with no source change produces identical output.
 """
 from __future__ import annotations
 
 import argparse
+import json
 import os
 import re
+import shutil
+import subprocess
 import sys
 
 try:
@@ -43,6 +52,11 @@
 ROADMAP_MD = os.path.join(REPO_ROOT, "ROADMAP.md")
 SPECS_DIR = os.path.join(REPO_ROOT, "specs")
 
+# GitHub repo `gh` queries target for --check-github.
+REPO_SLUG = "smart-mcp-proxy/mcpproxy-go"
+# A PR ref inside a pr: field, either "#786" or ".../pull/786".
+PR_NUM_RE = re.compile(r"(?:#|/pull/)(\d+)")
+
 # A checkbox line: "- [ ] ...", "- [x] ...", "- [X] ..." (matches specs/README.md).
 CHECKBOX_RE = re.compile(r"^- \[([ xX])\]")
 
@@ -244,7 +258,10 @@ def render(data: dict) -> str:
     out.append("```bash")
     out.append("python3 scripts/gen-roadmap.py     # writes ROADMAP.md")
     out.append("scripts/gen-roadmap                # convenience wrapper (same thing)")
-    out.append("python3 scripts/gen-roadmap.py --check   # CI canary: fail if stale")
+    out.append("python3 scripts/gen-roadmap.py --check          # CI canary: fail if ROADMAP.md is stale")
+    out.append("python3 scripts/gen-roadmap.py --check-github   # cross-check statuses vs live GitHub PR state,")
+    out.append("                                                # spec links, and status sanity (add --strict")
+    out.append("                                                # to fail on warnings; needs an authenticated gh)")
     out.append("```")
     out.append("")
     out.append("## roadmap.yaml schema (short form)")
@@ -281,15 +298,247 @@ def render(data: dict) -> str:
     return "\n".join(out)
 
 
+# ── GitHub / ground-truth cross-check (--check-github) ──────────────────────
+class Finding:
+    """One report line: an ERROR or WARN against a roadmap item."""
+    __slots__ = ("level", "ref", "reason")
+
+    def __init__(self, level: str, ref: str, reason: str):
+        self.level = level  # "ERROR" | "WARN"
+        self.ref = ref
+        self.reason = reason
+
+
+def iter_items(data: dict):
+    """Yield metadata for every epic and task, in file order.
+
+    Each dict: item (raw), id, kind ('epic'|'task'), epic_id (owning epic),
+    has_children (bool). A task's owning epic id lets us attribute a task's
+    spec: link back to its epic for double-count detection.
+    """
+    for epic in data.get("epics", []):
+        children = epic.get("tasks") or []
+        yield {"item": epic, "id": epic["id"], "kind": "epic",
+               "epic_id": epic["id"], "has_children": bool(children)}
+        for t in children:
+            yield {"item": t, "id": t["id"], "kind": "task",
+                   "epic_id": epic["id"], "has_children": False}
+
+
+def ref_label(meta: dict) -> str:
+    if meta["kind"] == "epic":
+        return f"{meta['id']} (epic)"
+    return f"{meta['id']} (task · epic {meta['epic_id']})"
+
+
+def parse_pr_refs(pr) -> list[int]:
+    """Extract PR numbers from a pr: field ("#786", full URL, or a list)."""
+    if not pr:
+        return []
+    refs = pr if isinstance(pr, list) else [pr]
+    nums: list[int] = []
+    for r in refs:
+        for m in PR_NUM_RE.finditer(str(r)):
+            n = int(m.group(1))
+            if n not in nums:
+                nums.append(n)
+    return nums
+
+
+def gh_available() -> tuple[bool, str]:
+    """(ok, reason). ok=False means skip the live PR cross-check (exit 2)."""
+    if not shutil.which("gh"):
+        return False, "`gh` CLI not found on PATH"
+    try:
+        r = subprocess.run(["gh", "auth", "status"],
+                           capture_output=True, text=True)
+    except OSError as e:  # pragma: no cover
+        return False, f"could not execute `gh`: {e}"
+    if r.returncode != 0:
+        return False, "`gh` is not authenticated (`gh auth status` failed)"
+    return True, ""
+
+
+def gh_pr_state(number: int, repo: str, cache: dict) -> dict:
+    """Return {'state','mergedAt'} for a PR, or {'error': msg}. Cached per number."""
+    if number in cache:
+        return cache[number]
+    r = subprocess.run(
+        ["gh", "pr", "view", str(number), "--repo", repo,
+         "--json", "state,mergedAt"],
+        capture_output=True, text=True)
+    if r.returncode != 0:
+        cache[number] = {"error": (r.stderr.strip().splitlines() or ["not found"])[-1]}
+    else:
+        try:
+            cache[number] = json.loads(r.stdout)
+        except json.JSONDecodeError:
+            cache[number] = {"error": "unparseable `gh` JSON output"}
+    return cache[number]
+
+
+def check_pr_status(items: list[dict], repo: str, cache: dict) -> list[Finding]:
+    """Cross-check every pr: link against live GitHub state.
+
+    MERGED but not done → ERROR; CLOSED (unmerged) but in_progress/in_review →
+    ERROR; OPEN but done → ERROR; OPEN but todo → WARN; unresolvable ref → ERROR.
+    """
+    out: list[Finding] = []
+    for meta in items:
+        status = meta["item"].get("status", "todo")
+        for num in parse_pr_refs(meta["item"].get("pr")):
+            st = gh_pr_state(num, repo, cache)
+            if "error" in st:
+                out.append(Finding("ERROR", ref_label(meta),
+                    f"PR #{num} could not be resolved on GitHub "
+                    f"({st['error']}) — dangling pr: link."))
+                continue
+            state = st.get("state")               # OPEN | CLOSED | MERGED
+            if state == "MERGED":
+                if status != "done":
+                    out.append(Finding("ERROR", ref_label(meta),
+                        f"PR #{num} is MERGED but status is '{status}' "
+                        f"(expected 'done')."))
+            elif state == "CLOSED":
+                if status in ("in_progress", "in_review"):
+                    out.append(Finding("ERROR", ref_label(meta),
+                        f"PR #{num} is CLOSED (unmerged) but status is "
+                        f"'{status}'."))
+            elif state == "OPEN":
+                if status == "done":
+                    out.append(Finding("ERROR", ref_label(meta),
+                        f"PR #{num} is OPEN but status is 'done'."))
+                elif status == "todo":
+                    out.append(Finding("WARN", ref_label(meta),
+                        f"PR #{num} is OPEN (work started) but status is "
+                        f"still 'todo'."))
+    return out
+
+
+def check_spec_links(items: list[dict]) -> list[Finding]:
+    """Every spec: must resolve to a real specs/<NNN> dir (ERROR if not).
+    A spec shared by two different epics double-counts its badge (WARN)."""
+    out: list[Finding] = []
+    spec_to_epics: dict[str, set] = {}
+    for meta in items:
+        spec = meta["item"].get("spec")
+        if not spec:
+            continue
+        if not os.path.isdir(os.path.join(REPO_ROOT, spec)):
+            out.append(Finding("ERROR", ref_label(meta),
+                f"spec: '{spec}' does not resolve to a directory under specs/."))
+        # Attribute to the owning epic so an epic sharing a spec with its OWN
+        # child task is not flagged — only genuinely distinct epics are.
+        spec_to_epics.setdefault(spec, set()).add(meta["epic_id"])
+    for spec, epics in sorted(spec_to_epics.items()):
+        if len(epics) > 1:
+            out.append(Finding("WARN", f"spec {spec}",
+                f"shared by {len(epics)} distinct epics "
+                f"({', '.join(sorted(epics))}) — the Epics-table progress "
+                f"badge double-counts this spec."))
+    return out
+
+
+def check_status_sanity(items: list[dict]) -> list[Finding]:
+    """Reviews/in-flight work should have PR evidence; done epics should have
+    all children done.
+
+    in_review with no pr: → WARN for any item (an in-review claim with no PR
+    anywhere is exactly the drift this audit found). in_progress with no pr: →
+    WARN only for leaf items, since an umbrella epic legitimately delegates its
+    PRs to child tasks.
+    """
+    out: list[Finding] = []
+    for meta in items:
+        item = meta["item"]
+        status = item.get("status", "todo")
+        has_pr = bool(parse_pr_refs(item.get("pr")))
+        if not has_pr:
+            if status == "in_review":
+                out.append(Finding("WARN", ref_label(meta),
+                    "status 'in_review' but no pr: link — an in-review item "
+                    "should link its PR as evidence."))
+            elif status == "in_progress" and not meta["has_children"]:
+                out.append(Finding("WARN", ref_label(meta),
+                    "status 'in_progress' but no pr: link and no child tasks "
+                    "— nothing links the in-flight work."))
+        if meta["kind"] == "epic" and status == "done":
+            for t in item.get("tasks") or []:
+                if t.get("status") != "done":
+                    out.append(Finding("WARN", ref_label(meta),
+                        f"epic is 'done' but child task '{t['id']}' is "
+                        f"'{t.get('status', 'todo')}'."))
+    return out
+
+
+def print_report(findings: list[Finding], strict: bool) -> int:
+    errors = [f for f in findings if f.level == "ERROR"]
+    warnings = [f for f in findings if f.level == "WARN"]
+
+    print(f"roadmap.yaml ground-truth cross-check (repo {REPO_SLUG})\n")
+
+    def emit(group: list[Finding], head: str):
+        print(f"{head} ({len(group)}):")
+        if not group:
+            print("  none")
+        for f in group:
+            print(f"  [{f.level:<5}] {f.ref}")
+            print(f"          {f.reason}")
+        print()
+
+    emit(errors, "ERRORS")
+    emit(warnings, "WARNINGS")
+
+    print(f"Summary: {len(errors)} error(s), {len(warnings)} warning(s).")
+    if strict and warnings:
+        print("(--strict: warnings count as errors for the exit code.)")
+    if errors or (strict and warnings):
+        return 1
+    return 0
+
+
+def check_github(data: dict, strict: bool) -> int:
+    items = list(iter_items(data))
+    findings: list[Finding] = []
+
+    ok, reason = gh_available()
+    if ok:
+        cache: dict = {}
+        findings += check_pr_status(items, REPO_SLUG, cache)
+
+    # spec + status checks are offline and always run.
+    findings += check_spec_links(items)
+    findings += check_status_sanity(items)
+
+    if not ok:
+        print_report(findings, strict)
+        sys.stderr.write(
+            f"\nerror: PR cross-check skipped — {reason}. "
+            "Install and authenticate `gh` (`gh auth login`) to validate PR "
+            "state; offline spec/status checks above still ran.\n")
+        return 2
+
+    return print_report(findings, strict)
+
+
 def main() -> int:
     ap = argparse.ArgumentParser(description="Generate ROADMAP.md from roadmap.yaml")
     ap.add_argument("--check", action="store_true",
                     help="exit non-zero if ROADMAP.md is stale (do not write)")
+    ap.add_argument("--check-github", action="store_true",
+                    help="cross-check roadmap.yaml vs live GitHub PR state, "
+                         "spec links, and status sanity (does not write)")
+    ap.add_argument("--strict", action="store_true",
+                    help="with --check-github, promote warnings to errors "
+                         "for the exit code")
     args = ap.parse_args()
 
     with open(ROADMAP_YAML, encoding="utf-8") as fh:
         data = yaml.safe_load(fh)
 
+    if args.check_github:
+        return check_github(data, args.strict)
+
     rendered = render(data)
 
     if args.check: