|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Audit & migrate pinned GitHub Actions off deprecated Node runtimes. |
| 4 | +
|
| 5 | +GitHub is deprecating the node16/node20 action runtimes in favour of node24. |
| 6 | +This repo pins third-party actions to full commit SHAs, so a deprecation means |
| 7 | +finding every pin whose action.yml still declares `using: node16|node20` and |
| 8 | +repinning it to the newest release that runs on node24. |
| 9 | +
|
| 10 | +What it does |
| 11 | +------------ |
| 12 | +1. Scans `.github/` for `uses: owner/repo[/subdir]@<40-hex-sha>` pins |
| 13 | + (workflows and composite actions). |
| 14 | +2. For each unique pin, reads `action.yml`/`action.yaml` *at that exact SHA* |
| 15 | + from raw.githubusercontent.com and extracts `runs.using`. |
| 16 | + - node24 / composite / docker -> already fine, left untouched |
| 17 | + - node16 / node20 -> AFFECTED |
| 18 | + - pinned commit unreachable (404, i.e. a stale/force-moved tag) -> AFFECTED |
| 19 | +3. For each affected action, lists tags with `git ls-remote` and finds the |
| 20 | + newest non-prerelease semver tag whose action.yml declares `using: node24`, |
| 21 | + then repins every occurrence to that tag's commit SHA and rewrites the |
| 22 | + trailing `# vX.Y.Z` comment. |
| 23 | +
|
| 24 | +Data sources are `git ls-remote` (read-only) and raw.githubusercontent.com. |
| 25 | +Neither counts against the GitHub REST API rate limit, so no token is needed. |
| 26 | +
|
| 27 | +Usage |
| 28 | +----- |
| 29 | + python3 bump-actions-to-node24.py # dry run (default), prints plan |
| 30 | + python3 bump-actions-to-node24.py --apply # rewrite the workflow files |
| 31 | + python3 bump-actions-to-node24.py --report out.md # also write a markdown report |
| 32 | + python3 bump-actions-to-node24.py --root .github # scan root (default: .github) |
| 33 | +""" |
| 34 | +from __future__ import annotations |
| 35 | + |
| 36 | +import argparse |
| 37 | +import dataclasses |
| 38 | +import os |
| 39 | +import re |
| 40 | +import subprocess |
| 41 | +import sys |
| 42 | +import urllib.request |
| 43 | +import urllib.error |
| 44 | +from collections import defaultdict |
| 45 | + |
| 46 | +RAW = "https://raw.githubusercontent.com" |
| 47 | +USES_RE = re.compile( |
| 48 | + r"^(?P<prefix>\s*(?:-\s*)?uses:\s*)" |
| 49 | + r"(?P<repo>[A-Za-z0-9_.-]+/[A-Za-z0-9_./-]+?)" |
| 50 | + r"@(?P<sha>[0-9a-fA-F]{40})" |
| 51 | + r"(?P<rest>\s*(?:#.*)?)$" |
| 52 | +) |
| 53 | +USING_RE = re.compile(r"""^\s*using:\s*['"]?([A-Za-z0-9]+)['"]?""") |
| 54 | +SEMVER_RE = re.compile(r"^v?(\d+)\.(\d+)\.(\d+)$") # strict X.Y.Z, no prerelease |
| 55 | +DEPRECATED = {"node16", "node20", "node12", "node10"} |
| 56 | + |
| 57 | +_using_cache: dict[tuple, str | None] = {} |
| 58 | +_tags_cache: dict[str, dict[str, str]] = {} |
| 59 | + |
| 60 | + |
| 61 | +def fetch(url: str) -> tuple[int, str]: |
| 62 | + req = urllib.request.Request(url, headers={"User-Agent": "node24-audit"}) |
| 63 | + try: |
| 64 | + with urllib.request.urlopen(req, timeout=30) as r: |
| 65 | + return r.status, r.read().decode("utf-8", "replace") |
| 66 | + except urllib.error.HTTPError as e: |
| 67 | + return e.code, "" |
| 68 | + except Exception as e: # network hiccup -> surface, do not silently misclassify |
| 69 | + print(f" ! fetch error {url}: {e}", file=sys.stderr) |
| 70 | + return 0, "" |
| 71 | + |
| 72 | + |
| 73 | +def split_repo(repo: str) -> tuple[str, str, str]: |
| 74 | + """owner/name[/sub...] -> (owner, name, subdir).""" |
| 75 | + parts = repo.split("/") |
| 76 | + return parts[0], parts[1], "/".join(parts[2:]) |
| 77 | + |
| 78 | + |
| 79 | +def runtime_at(repo: str, ref: str) -> str | None: |
| 80 | + """Return runs.using for action at ref, or None if no action file (404).""" |
| 81 | + key = (repo, ref) |
| 82 | + if key in _using_cache: |
| 83 | + return _using_cache[key] |
| 84 | + owner, name, sub = split_repo(repo) |
| 85 | + sub = f"{sub}/" if sub else "" |
| 86 | + result: str | None = None |
| 87 | + for fname in ("action.yml", "action.yaml"): |
| 88 | + status, body = fetch(f"{RAW}/{owner}/{name}/{ref}/{sub}{fname}") |
| 89 | + if status == 200: |
| 90 | + for line in body.splitlines(): |
| 91 | + m = USING_RE.match(line) |
| 92 | + if m: |
| 93 | + result = m.group(1).lower() |
| 94 | + break |
| 95 | + else: |
| 96 | + result = "<no-using-key>" |
| 97 | + break |
| 98 | + _using_cache[key] = result |
| 99 | + return result |
| 100 | + |
| 101 | + |
| 102 | +def tag_shas(repo: str) -> dict[str, str]: |
| 103 | + """tag -> commit SHA, preferring the peeled (^{}) commit for annotated tags.""" |
| 104 | + if repo in _tags_cache: |
| 105 | + return _tags_cache[repo] |
| 106 | + out = subprocess.run( |
| 107 | + ["git", "ls-remote", "--tags", f"https://github.com/{repo}"], |
| 108 | + capture_output=True, text=True, timeout=60, |
| 109 | + ).stdout |
| 110 | + direct, peeled = {}, {} |
| 111 | + for line in out.splitlines(): |
| 112 | + try: |
| 113 | + sha, ref = line.split("\t", 1) |
| 114 | + except ValueError: |
| 115 | + continue |
| 116 | + if not ref.startswith("refs/tags/"): |
| 117 | + continue |
| 118 | + tag = ref[len("refs/tags/"):] |
| 119 | + if tag.endswith("^{}"): |
| 120 | + peeled[tag[:-3]] = sha |
| 121 | + else: |
| 122 | + direct[tag] = sha |
| 123 | + result = {t: peeled.get(t, s) for t, s in direct.items()} |
| 124 | + _tags_cache[repo] = result |
| 125 | + return result |
| 126 | + |
| 127 | + |
| 128 | +def latest_node24(repo: str) -> tuple[str, str] | None: |
| 129 | + """Newest non-prerelease semver tag whose action.yml is NOT node<24. -> (tag, sha).""" |
| 130 | + owner, name, _sub = split_repo(repo) |
| 131 | + tags = tag_shas(f"{owner}/{name}") # tags live on the repo, not the subdir |
| 132 | + candidates = [] |
| 133 | + for tag in tags: |
| 134 | + m = SEMVER_RE.match(tag) |
| 135 | + if m: |
| 136 | + candidates.append((tuple(int(x) for x in m.groups()), tag)) |
| 137 | + for _, tag in sorted(candidates, reverse=True): |
| 138 | + using = runtime_at(repo, f"refs/tags/{tag}") |
| 139 | + if using and using not in DEPRECATED and using != "<no-using-key>": |
| 140 | + return tag, tags[tag] |
| 141 | + return None |
| 142 | + |
| 143 | + |
| 144 | +@dataclasses.dataclass |
| 145 | +class Pin: |
| 146 | + repo: str |
| 147 | + sha: str |
| 148 | + files: set = dataclasses.field(default_factory=set) |
| 149 | + count: int = 0 |
| 150 | + |
| 151 | + |
| 152 | +def scan(root: str) -> dict[tuple, Pin]: |
| 153 | + pins: dict[tuple, Pin] = {} |
| 154 | + for dirpath, _, filenames in os.walk(root): |
| 155 | + for fn in filenames: |
| 156 | + if not fn.endswith((".yml", ".yaml")): |
| 157 | + continue |
| 158 | + path = os.path.join(dirpath, fn) |
| 159 | + with open(path, encoding="utf-8") as fh: |
| 160 | + for line in fh: |
| 161 | + m = USES_RE.match(line.rstrip("\n")) |
| 162 | + if not m: |
| 163 | + continue |
| 164 | + repo, sha = m.group("repo"), m.group("sha").lower() |
| 165 | + # skip reusable-workflow refs (owner/repo/.github/workflows/x.yml@sha) |
| 166 | + if repo.endswith((".yml", ".yaml")): |
| 167 | + continue |
| 168 | + key = (repo, sha) |
| 169 | + p = pins.setdefault(key, Pin(repo, sha)) |
| 170 | + p.files.add(path) |
| 171 | + p.count += 1 |
| 172 | + return pins |
| 173 | + |
| 174 | + |
| 175 | +def main() -> int: |
| 176 | + ap = argparse.ArgumentParser(description=__doc__) |
| 177 | + ap.add_argument("--root", default=".github", help="directory to scan (default: .github)") |
| 178 | + ap.add_argument("--apply", action="store_true", help="rewrite files (default: dry run)") |
| 179 | + ap.add_argument("--report", help="write a markdown report to this path") |
| 180 | + args = ap.parse_args() |
| 181 | + |
| 182 | + if not os.path.isdir(args.root): |
| 183 | + print(f"error: {args.root} not found (run from repo root)", file=sys.stderr) |
| 184 | + return 2 |
| 185 | + |
| 186 | + pins = scan(args.root) |
| 187 | + print(f"Scanned {args.root}: {len(pins)} unique SHA-pinned action refs\n") |
| 188 | + |
| 189 | + affected = [] # (Pin, current_using, target_tag, target_sha) |
| 190 | + ok = [] # (Pin, current_using) |
| 191 | + blocked = [] # (Pin, current_using) -- no node24 release available |
| 192 | + |
| 193 | + for key in sorted(pins): |
| 194 | + pin = pins[key] |
| 195 | + using = runtime_at(pin.repo, pin.sha) |
| 196 | + is_stale = using is None |
| 197 | + if not is_stale and using not in DEPRECATED: |
| 198 | + ok.append((pin, using)) |
| 199 | + continue |
| 200 | + target = latest_node24(pin.repo) |
| 201 | + cur = using if using else "stale/unreachable" |
| 202 | + if target is None: |
| 203 | + blocked.append((pin, cur)) |
| 204 | + continue |
| 205 | + tag, sha = target |
| 206 | + if sha.lower() == pin.sha.lower(): |
| 207 | + ok.append((pin, f"{cur} (already latest)")) |
| 208 | + continue |
| 209 | + affected.append((pin, cur, tag, sha)) |
| 210 | + |
| 211 | + # ---- plan output ---- |
| 212 | + print(f"AFFECTED (node<24 / stale) -> repin to latest node24: {len(affected)}") |
| 213 | + for pin, cur, tag, sha in affected: |
| 214 | + print(f" {pin.repo:<46} {cur:<18} -> {tag:<10} {sha} ({pin.count}x)") |
| 215 | + print(f"\nOK (node24 / composite / docker, untouched): {len(ok)}") |
| 216 | + for pin, using in ok: |
| 217 | + print(f" {pin.repo}@{pin.sha[:12]} {using} ({pin.count}x)") |
| 218 | + if blocked: |
| 219 | + print(f"\nBLOCKED (no node24 release exists yet -- manual review): {len(blocked)}") |
| 220 | + for pin, cur in blocked: |
| 221 | + print(f" {pin.repo}@{pin.sha[:12]} {cur} ({pin.count}x)") |
| 222 | + |
| 223 | + # ---- rewrite ---- |
| 224 | + edits = 0 |
| 225 | + if affected: |
| 226 | + remap = {} # file -> list of (repo, oldsha, newsha, newtag) |
| 227 | + files = set() |
| 228 | + for pin, _cur, tag, sha in affected: |
| 229 | + for f in pin.files: |
| 230 | + remap.setdefault(f, []).append((pin.repo, pin.sha, sha, tag)) |
| 231 | + files.add(f) |
| 232 | + for f in sorted(files): |
| 233 | + with open(f, encoding="utf-8") as fh: |
| 234 | + text = fh.read() |
| 235 | + new = text |
| 236 | + for repo, oldsha, newsha, tag in remap[f]: |
| 237 | + pat = re.compile( |
| 238 | + r"(?P<prefix>(?:-[ \t]*)?uses:[ \t]*)" + re.escape(repo) + |
| 239 | + r"@" + re.escape(oldsha) + r"(?:[ \t]*#[^\n]*)?", |
| 240 | + re.IGNORECASE, |
| 241 | + ) |
| 242 | + new, n = pat.subn( |
| 243 | + lambda m: f"{m.group('prefix')}{repo}@{newsha} # {tag}", new |
| 244 | + ) |
| 245 | + edits += n |
| 246 | + if new != text and args.apply: |
| 247 | + with open(f, "w", encoding="utf-8") as fh: |
| 248 | + fh.write(new) |
| 249 | + print(f"\n{'APPLIED' if args.apply else 'DRY RUN'}: {edits} line(s) " |
| 250 | + f"across {len(files)} file(s)" + ("" if args.apply else " — re-run with --apply")) |
| 251 | + |
| 252 | + if args.report: |
| 253 | + with open(args.report, "w", encoding="utf-8") as fh: |
| 254 | + fh.write("# Pinned-action Node runtime audit (target: node24)\n\n") |
| 255 | + fh.write(f"Scanned `{args.root}` — {len(pins)} unique SHA-pinned refs.\n\n") |
| 256 | + fh.write("## Affected — repin to latest node24\n\n") |
| 257 | + fh.write("| Action | Current | → Target tag | Target SHA | Uses |\n") |
| 258 | + fh.write("|---|---|---|---|---|\n") |
| 259 | + for pin, cur, tag, sha in affected: |
| 260 | + fh.write(f"| `{pin.repo}` | {cur} | `{tag}` | `{sha}` | {pin.count} |\n") |
| 261 | + fh.write("\n## Already OK (untouched)\n\n") |
| 262 | + fh.write("| Action@sha | Runtime | Uses |\n|---|---|---|\n") |
| 263 | + for pin, using in ok: |
| 264 | + fh.write(f"| `{pin.repo}@{pin.sha[:12]}` | {using} | {pin.count} |\n") |
| 265 | + if blocked: |
| 266 | + fh.write("\n## Blocked — no node24 release yet\n\n") |
| 267 | + for pin, cur in blocked: |
| 268 | + fh.write(f"- `{pin.repo}@{pin.sha[:12]}` ({cur}, {pin.count}x)\n") |
| 269 | + print(f"\nReport written to {args.report}") |
| 270 | + |
| 271 | + return 0 |
| 272 | + |
| 273 | + |
| 274 | +if __name__ == "__main__": |
| 275 | + sys.exit(main()) |
0 commit comments