|
5 | 5 | branches: [main] |
6 | 6 | paths: |
7 | 7 | - 'site/**' |
8 | | - - 'README.md' |
9 | | - - 'CONTRIBUTING.md' |
10 | | - - 'docs/**' |
11 | 8 | - '.github/workflows/generate-docs.yml' |
12 | 9 | workflow_dispatch: |
13 | 10 |
|
@@ -44,12 +41,79 @@ jobs: |
44 | 41 | - name: Validate (no external resources) |
45 | 42 | run: | |
46 | 43 | set -euo pipefail |
47 | | - # fail the build if any HTML file references third-party origins, which would |
48 | | - # break GDPR compliance and the "no external requests" guarantee |
49 | | - if grep -RInE 'https?://(cdn\.|fonts\.googleapis\.com|fonts\.gstatic\.com|www\.google-analytics\.com|googletagmanager\.com)' _site --include='*.html'; then |
50 | | - echo "ERROR: external resource reference found — landing page must be self-contained." >&2 |
51 | | - exit 1 |
52 | | - fi |
| 44 | + python - <<'PY' |
| 45 | + from html.parser import HTMLParser |
| 46 | + from pathlib import Path |
| 47 | +
|
| 48 | + RESOURCE_ATTRIBUTES = { |
| 49 | + ("script", "src"), |
| 50 | + ("img", "src"), |
| 51 | + ("img", "srcset"), |
| 52 | + ("iframe", "src"), |
| 53 | + ("audio", "src"), |
| 54 | + ("video", "src"), |
| 55 | + ("track", "src"), |
| 56 | + ("source", "src"), |
| 57 | + ("source", "srcset"), |
| 58 | + ("embed", "src"), |
| 59 | + ("object", "data"), |
| 60 | + } |
| 61 | +
|
| 62 | + LINK_RESOURCE_RELS = { |
| 63 | + "stylesheet", |
| 64 | + "icon", |
| 65 | + "apple-touch-icon", |
| 66 | + "manifest", |
| 67 | + "preload", |
| 68 | + "modulepreload", |
| 69 | + "prefetch", |
| 70 | + "dns-prefetch", |
| 71 | + "preconnect", |
| 72 | + "mask-icon", |
| 73 | + "shortcut icon", |
| 74 | + } |
| 75 | +
|
| 76 | + def external_values(raw_value: str): |
| 77 | + for candidate in raw_value.split(","): |
| 78 | + value = candidate.strip().split(" ", 1)[0] |
| 79 | + if value.startswith(("http://", "https://", "//")): |
| 80 | + yield value |
| 81 | +
|
| 82 | + class ResourceParser(HTMLParser): |
| 83 | + def __init__(self, path: Path): |
| 84 | + super().__init__() |
| 85 | + self.path = path |
| 86 | + self.violations = [] |
| 87 | +
|
| 88 | + def handle_starttag(self, tag, attrs): |
| 89 | + attr_map = dict(attrs) |
| 90 | + rel_tokens = set(attr_map.get("rel", "").lower().split()) |
| 91 | +
|
| 92 | + for name, value in attrs: |
| 93 | + if not value: |
| 94 | + continue |
| 95 | +
|
| 96 | + if (tag, name) in RESOURCE_ATTRIBUTES: |
| 97 | + for external in external_values(value): |
| 98 | + self.violations.append((tag, name, external)) |
| 99 | +
|
| 100 | + if tag == "link" and name == "href": |
| 101 | + rel_value = attr_map.get("rel", "").lower().strip() |
| 102 | + if rel_tokens & LINK_RESOURCE_RELS or rel_value in LINK_RESOURCE_RELS: |
| 103 | + for external in external_values(value): |
| 104 | + self.violations.append((tag, name, external)) |
| 105 | +
|
| 106 | + violations = [] |
| 107 | + for path in Path("_site").rglob("*.html"): |
| 108 | + parser = ResourceParser(path) |
| 109 | + parser.feed(path.read_text(encoding="utf-8")) |
| 110 | + violations.extend((path, *violation) for violation in parser.violations) |
| 111 | +
|
| 112 | + if violations: |
| 113 | + for path, tag, attr, value in violations: |
| 114 | + print(f"{path}: external resource via <{tag} {attr}=\"{value}\">", flush=True) |
| 115 | + raise SystemExit("ERROR: external resource reference found — landing page must be self-contained.") |
| 116 | + PY |
53 | 117 |
|
54 | 118 | - name: Upload Pages artifact |
55 | 119 | uses: actions/upload-pages-artifact@v3 |
|
0 commit comments