|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +validate_branch_existence.py (EDUENG-614) |
| 4 | +
|
| 5 | +For every row in src/current/_data/versions.csv, verifies that the listed |
| 6 | +crdb_branch_name exists as a branch in cockroachdb/generated-diagrams. |
| 7 | +
|
| 8 | +Also flags entries where versions.csv still points to an older branch even |
| 9 | +though the "natural" release-X.Y branch for that version now exists |
| 10 | +(e.g. v26.2 pointing to release-26.1 after release-26.2 is created). |
| 11 | +
|
| 12 | +Usage: |
| 13 | + python .github/scripts/validate_branch_existence.py |
| 14 | +
|
| 15 | + # Run built-in unit tests (no network required): |
| 16 | + python .github/scripts/validate_branch_existence.py --self-test |
| 17 | +
|
| 18 | +Exit codes: |
| 19 | + 0 all checks passed |
| 20 | + 1 one or more issues found |
| 21 | + 2 fatal error (versions.csv not found) |
| 22 | +
|
| 23 | +Environment: |
| 24 | + GITHUB_TOKEN Optional. Raises API rate limit from 60 to 5000 req/hr. |
| 25 | + GITHUB_ACTIONS Set automatically in CI. Enables pr-comment.md output. |
| 26 | +""" |
| 27 | + |
| 28 | +import contextlib |
| 29 | +import csv |
| 30 | +import io |
| 31 | +import json |
| 32 | +import os |
| 33 | +import re |
| 34 | +import sys |
| 35 | +import urllib.error |
| 36 | +import urllib.parse |
| 37 | +import urllib.request |
| 38 | +from pathlib import Path |
| 39 | + |
| 40 | +GENERATED_DIAGRAMS_REPO = "cockroachdb/generated-diagrams" |
| 41 | +GITHUB_API_BASE = "https://api.github.com" |
| 42 | +VERSIONS_CSV = Path("src/current/_data/versions.csv") |
| 43 | + |
| 44 | +# --------------------------------------------------------------------------- |
| 45 | +# HTTP |
| 46 | +# --------------------------------------------------------------------------- |
| 47 | + |
| 48 | +def _api_get(path: str) -> dict | None: |
| 49 | + url = f"{GITHUB_API_BASE}/{path}" |
| 50 | + req = urllib.request.Request(url) |
| 51 | + req.add_header("Accept", "application/vnd.github+json") |
| 52 | + req.add_header("X-GitHub-Api-Version", "2022-11-28") |
| 53 | + token = os.environ.get("GITHUB_TOKEN") |
| 54 | + if token: |
| 55 | + req.add_header("Authorization", f"Bearer {token}") |
| 56 | + try: |
| 57 | + with urllib.request.urlopen(req, timeout=15) as resp: |
| 58 | + return json.loads(resp.read().decode()) |
| 59 | + except urllib.error.HTTPError as exc: |
| 60 | + if exc.code == 404: |
| 61 | + return None |
| 62 | + raise |
| 63 | + except Exception as exc: |
| 64 | + print(f" Warning: request to {url} failed: {exc}", file=sys.stderr) |
| 65 | + return None |
| 66 | + |
| 67 | + |
| 68 | +# --------------------------------------------------------------------------- |
| 69 | +# Core logic |
| 70 | +# --------------------------------------------------------------------------- |
| 71 | + |
| 72 | +_cache: dict[str, bool] = {} |
| 73 | + |
| 74 | + |
| 75 | +def branch_exists(branch: str) -> bool: |
| 76 | + if branch not in _cache: |
| 77 | + encoded = urllib.parse.quote(branch, safe="") |
| 78 | + result = _api_get(f"repos/{GENERATED_DIAGRAMS_REPO}/branches/{encoded}") |
| 79 | + _cache[branch] = result is not None |
| 80 | + return _cache[branch] |
| 81 | + |
| 82 | + |
| 83 | +def load_versions_csv() -> list[dict]: |
| 84 | + if not VERSIONS_CSV.exists(): |
| 85 | + print(f"Error: {VERSIONS_CSV} not found. Run from the repo root.", file=sys.stderr) |
| 86 | + sys.exit(2) |
| 87 | + with open(VERSIONS_CSV, newline="") as f: |
| 88 | + return list(csv.DictReader(f)) |
| 89 | + |
| 90 | + |
| 91 | +def run_checks(rows: list[dict], _exists_fn=None) -> list[dict]: |
| 92 | + """Check each versions.csv row for branch existence and staleness. |
| 93 | +
|
| 94 | + _exists_fn is injectable for unit tests; defaults to branch_exists. |
| 95 | + """ |
| 96 | + if _exists_fn is None: |
| 97 | + _exists_fn = branch_exists |
| 98 | + |
| 99 | + failures = [] |
| 100 | + checked: set[str] = set() |
| 101 | + |
| 102 | + for row in rows: |
| 103 | + version = row.get("major_version", "").strip() |
| 104 | + branch = row.get("crdb_branch_name", "").strip() |
| 105 | + if not branch or branch == "N/A": |
| 106 | + continue |
| 107 | + |
| 108 | + # (a) Does the listed branch exist? |
| 109 | + if branch not in checked: |
| 110 | + checked.add(branch) |
| 111 | + print(f" {version:8s} → {branch} ...", end=" ", flush=True) |
| 112 | + if _exists_fn(branch): |
| 113 | + print("OK") |
| 114 | + else: |
| 115 | + print("MISSING") |
| 116 | + failures.append({ |
| 117 | + "type": "branch_missing", |
| 118 | + "version": version, |
| 119 | + "branch": branch, |
| 120 | + "message": ( |
| 121 | + f"{version}: crdb_branch_name={branch!r} does not exist " |
| 122 | + f"in cockroachdb/generated-diagrams." |
| 123 | + ), |
| 124 | + }) |
| 125 | + continue |
| 126 | + |
| 127 | + # (b) Is the version still pointing to an older branch? |
| 128 | + # e.g. v26.2 → release-26.1 when release-26.2 now exists. |
| 129 | + expected = f"release-{version.lstrip('v')}" |
| 130 | + if branch != expected and expected not in checked: |
| 131 | + if _exists_fn(expected): |
| 132 | + checked.add(expected) |
| 133 | + failures.append({ |
| 134 | + "type": "branch_mismatch", |
| 135 | + "version": version, |
| 136 | + "branch": branch, |
| 137 | + "expected": expected, |
| 138 | + "message": ( |
| 139 | + f"{version}: crdb_branch_name={branch!r} but {expected!r} " |
| 140 | + f"now exists in cockroachdb/generated-diagrams. " |
| 141 | + f"Update versions.csv to use {expected!r}." |
| 142 | + ), |
| 143 | + }) |
| 144 | + |
| 145 | + return failures |
| 146 | + |
| 147 | + |
| 148 | +# --------------------------------------------------------------------------- |
| 149 | +# Output |
| 150 | +# --------------------------------------------------------------------------- |
| 151 | + |
| 152 | +def format_comment(failures: list[dict]) -> str: |
| 153 | + if not failures: |
| 154 | + return ( |
| 155 | + "## Branch Existence Check: Passed\n\n" |
| 156 | + "All `crdb_branch_name` entries in `versions.csv` exist in " |
| 157 | + "`cockroachdb/generated-diagrams`." |
| 158 | + ) |
| 159 | + |
| 160 | + lines = [ |
| 161 | + "## Branch Existence Check: Failed", |
| 162 | + "", |
| 163 | + f"Found **{len(failures)}** issue(s) in `versions.csv`:", |
| 164 | + "", |
| 165 | + "> **Context**: [EDUENG-614](https://cockroachlabs.atlassian.net/browse/EDUENG-614)", |
| 166 | + "", |
| 167 | + ] |
| 168 | + for f in failures: |
| 169 | + icon = ":warning:" if f["type"] == "branch_mismatch" else ":x:" |
| 170 | + lines.append(f"- {icon} {f['message']}") |
| 171 | + |
| 172 | + return "\n".join(lines) |
| 173 | + |
| 174 | + |
| 175 | +# --------------------------------------------------------------------------- |
| 176 | +# Self-tests (no network required) |
| 177 | +# --------------------------------------------------------------------------- |
| 178 | + |
| 179 | +def _run_self_tests() -> None: |
| 180 | + """Unit tests for run_checks logic using injected exists functions.""" |
| 181 | + |
| 182 | + def _quiet(rows, exists_fn): |
| 183 | + with contextlib.redirect_stdout(io.StringIO()): |
| 184 | + return run_checks(rows, _exists_fn=exists_fn) |
| 185 | + |
| 186 | + # branch_missing: listed branch does not exist |
| 187 | + rows = [{"major_version": "v26.1", "crdb_branch_name": "release-26.1"}] |
| 188 | + failures = _quiet(rows, lambda b: False) |
| 189 | + assert len(failures) == 1, failures |
| 190 | + assert failures[0]["type"] == "branch_missing", failures |
| 191 | + |
| 192 | + # all OK: branch exists and matches expected |
| 193 | + rows = [{"major_version": "v26.1", "crdb_branch_name": "release-26.1"}] |
| 194 | + failures = _quiet(rows, lambda b: True) |
| 195 | + assert failures == [], failures |
| 196 | + |
| 197 | + # branch_mismatch: listed branch exists but a newer canonical branch also exists |
| 198 | + rows = [{"major_version": "v26.2", "crdb_branch_name": "release-26.1"}] |
| 199 | + known = {"release-26.1", "release-26.2"} |
| 200 | + failures = _quiet(rows, lambda b: b in known) |
| 201 | + assert len(failures) == 1, failures |
| 202 | + assert failures[0]["type"] == "branch_mismatch", failures |
| 203 | + assert failures[0]["expected"] == "release-26.2", failures |
| 204 | + |
| 205 | + # N/A entries are skipped entirely |
| 206 | + rows = [{"major_version": "v24.1", "crdb_branch_name": "N/A"}] |
| 207 | + failures = _quiet(rows, lambda b: (_ for _ in ()).throw(AssertionError("unexpected call"))) |
| 208 | + assert failures == [], failures |
| 209 | + |
| 210 | + # empty branch field is skipped |
| 211 | + rows = [{"major_version": "v25.1", "crdb_branch_name": ""}] |
| 212 | + failures = _quiet(rows, lambda b: (_ for _ in ()).throw(AssertionError("unexpected call"))) |
| 213 | + assert failures == [], failures |
| 214 | + |
| 215 | + print("All self-tests passed.") |
| 216 | + sys.exit(0) |
| 217 | + |
| 218 | + |
| 219 | +# --------------------------------------------------------------------------- |
| 220 | +# Entry point |
| 221 | +# --------------------------------------------------------------------------- |
| 222 | + |
| 223 | +def main() -> None: |
| 224 | + if "--self-test" in sys.argv: |
| 225 | + _run_self_tests() |
| 226 | + |
| 227 | + rows = load_versions_csv() |
| 228 | + print(f"Checking {len(rows)} versions.csv entries against cockroachdb/generated-diagrams...\n") |
| 229 | + failures = run_checks(rows) |
| 230 | + |
| 231 | + comment = format_comment(failures) |
| 232 | + if os.environ.get("GITHUB_ACTIONS"): |
| 233 | + summary = os.environ.get("GITHUB_STEP_SUMMARY") |
| 234 | + if summary: |
| 235 | + Path(summary).write_text(comment, encoding="utf-8") |
| 236 | + Path("pr-comment.md").write_text(comment, encoding="utf-8") |
| 237 | + |
| 238 | + if failures: |
| 239 | + print(f"\n--- Issues ---", file=sys.stderr) |
| 240 | + for f in failures: |
| 241 | + print(f" [{f['type']}] {f['message']}", file=sys.stderr) |
| 242 | + print(f"\nTotal: {len(failures)} issue(s).", file=sys.stderr) |
| 243 | + sys.exit(1) |
| 244 | + else: |
| 245 | + print("\nAll branch existence checks passed.") |
| 246 | + sys.exit(0) |
| 247 | + |
| 248 | + |
| 249 | +if __name__ == "__main__": |
| 250 | + main() |
0 commit comments