|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Audit GitHub Actions pin freshness against the upstream registries. |
| 3 | +
|
| 4 | +`check_action_pins.py` validates pin **shape** — does the @ref match the |
| 5 | +policy bucket. This script validates **freshness** — does the @ref still |
| 6 | +resolve to something upstream, and does the trailing `# vN.M.P` comment |
| 7 | +on a SHA pin still match the tag's current SHA? |
| 8 | +
|
| 9 | +Filed as #136 after PR #121 surfaced `astral-sh/setup-uv@v5` going |
| 10 | +silently dead — the tag stopped resolving to anything in March 2026, |
| 11 | +producing 0-jobs / 0-seconds CI failures. The shape gate doesn't catch |
| 12 | +that class; this freshness gate does. |
| 13 | +
|
| 14 | +Behaviour: |
| 15 | +
|
| 16 | +- Walks every workflow + composite-action file via the same |
| 17 | + `parse_workflow` machinery as `check_action_pins.py`. |
| 18 | +- For each tag pin (`@v8`, `@v8.0.0`): GET |
| 19 | + `https://api.github.com/repos/<action>/git/refs/tags/<ref>`. A 404 means |
| 20 | + the tag no longer exists upstream — emit `::warning::` (or `::error::` |
| 21 | + under strict mode). |
| 22 | +- For each SHA pin (`@<40-hex>` + trailing `# vN.M.P` comment): GET |
| 23 | + `/repos/<action>/git/refs/tags/<comment-version>` to fetch the tag's |
| 24 | + current SHA. If the tag exists and resolves to a different SHA than |
| 25 | + the pin, the upstream re-tagged — warn (potential supply-chain shift). |
| 26 | + If the tag's SHA is a tag object (annotated tag), dereference one |
| 27 | + level via `git/tags/<sha>` to get the commit SHA before comparing. |
| 28 | +- API failures (network, 4xx other than 404, 5xx) downgrade to |
| 29 | + `::warning::` — the gate's job is to surface drift, not be a |
| 30 | + transient-network tripwire. |
| 31 | +
|
| 32 | +Default: warn-not-fail (`exit 0` even on findings, with annotations). |
| 33 | +With `PIN_FRESHNESS_STRICT=1`, findings escalate to errors (`exit 1`), |
| 34 | +matching the `ASPIRATIONAL_STRICT=1` toggle pattern from #153. |
| 35 | +
|
| 36 | +Exit codes: |
| 37 | + 0 — every pin resolves cleanly OR strict mode is off and findings |
| 38 | + are surfaced as warnings only |
| 39 | + 1 — strict mode is on and one or more pins failed freshness checks |
| 40 | + 2 — script-level error (workflows dir missing, parse failure, no |
| 41 | + `GITHUB_TOKEN` set so we can't query the API) |
| 42 | +
|
| 43 | +Usage (from repo root, in CI with token): |
| 44 | +
|
| 45 | + GITHUB_TOKEN=... python .github/scripts/check_pin_freshness.py |
| 46 | +""" |
| 47 | + |
| 48 | +from __future__ import annotations |
| 49 | + |
| 50 | +import importlib.util |
| 51 | +import json |
| 52 | +import os |
| 53 | +import sys |
| 54 | +import urllib.error |
| 55 | +import urllib.request |
| 56 | +from pathlib import Path |
| 57 | +from typing import TYPE_CHECKING |
| 58 | + |
| 59 | +if TYPE_CHECKING: |
| 60 | + from types import ModuleType |
| 61 | + |
| 62 | +# Reuse `parse_workflow`, `_collect_yaml_files`, `_VERSION_COMMENT_RE`, |
| 63 | +# `_SHA_RE`, etc. from check_action_pins.py rather than duplicate them. |
| 64 | +# Importlib-based load mirrors the test pattern used elsewhere in the |
| 65 | +# repo so this script stays standalone (no setup.py wiring needed). |
| 66 | +_SCRIPT_DIR = Path(__file__).parent |
| 67 | + |
| 68 | + |
| 69 | +def _load_pin_module() -> ModuleType: |
| 70 | + spec = importlib.util.spec_from_file_location( |
| 71 | + "check_action_pins", _SCRIPT_DIR / "check_action_pins.py" |
| 72 | + ) |
| 73 | + if spec is None or spec.loader is None: |
| 74 | + msg = "could not load check_action_pins.py" |
| 75 | + raise RuntimeError(msg) |
| 76 | + module = importlib.util.module_from_spec(spec) |
| 77 | + # Register in sys.modules BEFORE exec_module — `@dataclass` walks |
| 78 | + # `sys.modules[cls.__module__]` while processing the class, and the |
| 79 | + # ActionRef dataclass would AttributeError without this line. |
| 80 | + sys.modules[spec.name] = module |
| 81 | + spec.loader.exec_module(module) |
| 82 | + return module |
| 83 | + |
| 84 | + |
| 85 | +_pins = _load_pin_module() |
| 86 | +_API_BASE = "https://api.github.com" |
| 87 | + |
| 88 | + |
| 89 | +def _fetch_json(url: str, token: str) -> dict[str, object] | None: |
| 90 | + """GET a GitHub API URL, return parsed JSON or None on any failure. |
| 91 | +
|
| 92 | + Failures (404, 5xx, network, JSON-parse) all collapse to None — the |
| 93 | + caller decides how to surface them. Keeps this gate from being a |
| 94 | + transient-CI tripwire. |
| 95 | + """ |
| 96 | + req = urllib.request.Request( # noqa: S310 — fixed api.github.com host |
| 97 | + url, |
| 98 | + headers={ |
| 99 | + "Authorization": f"Bearer {token}", |
| 100 | + "Accept": "application/vnd.github+json", |
| 101 | + "X-GitHub-Api-Version": "2022-11-28", |
| 102 | + }, |
| 103 | + ) |
| 104 | + try: |
| 105 | + with urllib.request.urlopen(req, timeout=10) as response: # noqa: S310 |
| 106 | + payload = json.loads(response.read().decode("utf-8")) |
| 107 | + except urllib.error.URLError, TimeoutError, json.JSONDecodeError: |
| 108 | + return None |
| 109 | + return payload if isinstance(payload, dict) else None |
| 110 | + |
| 111 | + |
| 112 | +def _resolve_tag_sha(action: str, tag: str, token: str) -> str | None: |
| 113 | + """Return the commit SHA the tag points at, or None on missing/error. |
| 114 | +
|
| 115 | + Annotated tags resolve via two GETs: first `/git/refs/tags/<tag>` to |
| 116 | + get the tag-object SHA, then `/git/tags/<obj>` to dereference to the |
| 117 | + commit. Lightweight tags resolve in one GET (the ref's `object.sha` |
| 118 | + is the commit directly). |
| 119 | + """ |
| 120 | + ref = _fetch_json(f"{_API_BASE}/repos/{action}/git/refs/tags/{tag}", token) |
| 121 | + if ref is None: |
| 122 | + return None |
| 123 | + obj = ref.get("object") |
| 124 | + if not isinstance(obj, dict): |
| 125 | + return None |
| 126 | + obj_type = obj.get("type") |
| 127 | + obj_sha = obj.get("sha") |
| 128 | + if not isinstance(obj_sha, str): |
| 129 | + return None |
| 130 | + if obj_type == "commit": |
| 131 | + return obj_sha |
| 132 | + if obj_type == "tag": |
| 133 | + # Annotated tag — dereference to the commit it points at. |
| 134 | + annotated = _fetch_json(f"{_API_BASE}/repos/{action}/git/tags/{obj_sha}", token) |
| 135 | + if annotated is None: |
| 136 | + return None |
| 137 | + inner = annotated.get("object") |
| 138 | + if isinstance(inner, dict): |
| 139 | + inner_sha = inner.get("sha") |
| 140 | + if isinstance(inner_sha, str): |
| 141 | + return inner_sha |
| 142 | + return None |
| 143 | + |
| 144 | + |
| 145 | +def _check_tag_pin(ref: object, token: str) -> str | None: |
| 146 | + """Tag pin: ensure the upstream tag still exists. Returns warning text or None.""" |
| 147 | + tag = ref.pin # type: ignore[attr-defined] |
| 148 | + sha = _resolve_tag_sha(ref.action, tag, token) # type: ignore[attr-defined] |
| 149 | + if sha is None: |
| 150 | + return ( |
| 151 | + f"{ref.action}@{tag} — upstream tag no longer resolves " # type: ignore[attr-defined] |
| 152 | + "(404 or API failure). If 404, the tag was deleted/renamed; " |
| 153 | + "bump to a current tag or SHA pin." |
| 154 | + ) |
| 155 | + return None |
| 156 | + |
| 157 | + |
| 158 | +def _check_sha_pin(ref: object, token: str) -> str | None: |
| 159 | + """SHA pin: trailing tag comment must still resolve to the same SHA.""" |
| 160 | + if not ref.comment: # type: ignore[attr-defined] |
| 161 | + return None # shape audit owns the missing-comment case |
| 162 | + match = _pins._VERSION_COMMENT_RE.search(ref.comment) # type: ignore[attr-defined] |
| 163 | + if not match: |
| 164 | + return None |
| 165 | + documented_tag = match.group(0) |
| 166 | + upstream_sha = _resolve_tag_sha(ref.action, documented_tag, token) # type: ignore[attr-defined] |
| 167 | + if upstream_sha is None: |
| 168 | + return ( |
| 169 | + f"{ref.action}@{ref.pin[:8]}… (commented `{documented_tag}`) " # type: ignore[attr-defined] |
| 170 | + "— upstream tag no longer resolves; comment may be stale." |
| 171 | + ) |
| 172 | + if upstream_sha.lower() != ref.pin.lower(): # type: ignore[attr-defined] |
| 173 | + return ( |
| 174 | + f"{ref.action}@{ref.pin[:8]}… (commented `{documented_tag}`) " # type: ignore[attr-defined] |
| 175 | + f"— upstream tag has been re-tagged to " |
| 176 | + f"{upstream_sha[:8]}…; pin no longer matches the documented tag." |
| 177 | + ) |
| 178 | + return None |
| 179 | + |
| 180 | + |
| 181 | +def main() -> int: |
| 182 | + token = os.environ.get("GITHUB_TOKEN", "") |
| 183 | + if not token: |
| 184 | + print( |
| 185 | + "::error::GITHUB_TOKEN required for pin-freshness audit " |
| 186 | + "(API rate limit + private-repo access)." |
| 187 | + ) |
| 188 | + return 2 |
| 189 | + |
| 190 | + yml_files = _pins._collect_yaml_files() |
| 191 | + if not yml_files: |
| 192 | + print("::error::no workflow / composite-action files found") |
| 193 | + return 2 |
| 194 | + |
| 195 | + refs = [] |
| 196 | + for path in yml_files: |
| 197 | + refs.extend(_pins.parse_workflow(path)) |
| 198 | + |
| 199 | + strict = os.environ.get("PIN_FRESHNESS_STRICT", "") == "1" |
| 200 | + findings: list[tuple[object, str]] = [] |
| 201 | + for ref in refs: |
| 202 | + if not ref.pin: |
| 203 | + continue # shape audit catches missing-@ |
| 204 | + if _pins._SHA_RE.match(ref.pin): |
| 205 | + problem = _check_sha_pin(ref, token) |
| 206 | + else: |
| 207 | + problem = _check_tag_pin(ref, token) |
| 208 | + if problem is not None: |
| 209 | + findings.append((ref, problem)) |
| 210 | + |
| 211 | + severity = "error" if strict else "warning" |
| 212 | + for ref, problem in findings: |
| 213 | + print(f"::{severity} file={ref.file},line={ref.line}::{problem}") # type: ignore[attr-defined] |
| 214 | + |
| 215 | + summary = ( |
| 216 | + f"Pin-freshness audit: {len(refs)} pins checked across " |
| 217 | + f"{len(yml_files)} files; {len(findings)} finding(s)" |
| 218 | + ) |
| 219 | + # Surface the finding count as a workflow output so the calling |
| 220 | + # workflow can decide whether to open a tracking issue. Skipped when |
| 221 | + # GITHUB_OUTPUT isn't set (local runs / tests). |
| 222 | + output_path = os.environ.get("GITHUB_OUTPUT", "") |
| 223 | + if output_path: |
| 224 | + with Path(output_path).open("a", encoding="utf-8") as fh: |
| 225 | + fh.write(f"findings_count={len(findings)}\n") |
| 226 | + if findings: |
| 227 | + suffix = " (strict — failing)" if strict else " (warn-only)" |
| 228 | + print(summary + suffix + ".") |
| 229 | + return 1 if strict else 0 |
| 230 | + print(summary + ".") |
| 231 | + return 0 |
| 232 | + |
| 233 | + |
| 234 | +if __name__ == "__main__": |
| 235 | + sys.exit(main()) |
0 commit comments