diff --git a/.agents/context/schema-spec.md b/.agents/context/schema-spec.md index 23872f2f..417a151b 100644 --- a/.agents/context/schema-spec.md +++ b/.agents/context/schema-spec.md @@ -167,6 +167,11 @@ Meta-schema encodes this via `allOf` with 4 `if/then` branches keyed on `type`. } ``` +## Hosting + +- **Resolved:** GitHub Pages on the `opendecree/decree` repo itself, with custom domain `schemas.opendecree.dev` via a Cloudflare CNAME (DNS-only, gray cloud). The `.github/workflows/deploy-pages.yml` workflow builds an explicit `_site/` artifact from `schemas/v*/...` and uploads only that to Pages — no other repo content is exposed. CORS comes for free (`access-control-allow-origin: *` on GitHub Pages static files); content-type is `application/json` (the issue's acceptance criteria treat this as an acceptable fallback for `application/schema+json`). +- **One-time setup steps** are documented in [`docs/development/schemas-hosting-runbook.md`](../../docs/development/schemas-hosting-runbook.md). May extract to a dedicated `opendecree/schemas` repo later if decree's Pages slot is needed for something else (docs landing, API reference). The public URL stays stable across that move thanks to the CNAME. + ## CI - **Primary tool:** `check-jsonschema` (Python, wraps `jsonschema` library, excellent error messages, built-in YAML support, default Draft 2020-12) @@ -213,7 +218,6 @@ Meta-schema encodes this via `allOf` with 4 `if/then` branches keyed on `type`. ## Open questions -- **Hosting target for `schemas.opendecree.dev`** — dedicated GitHub Pages repo? Cloudflare redirect to raw GitHub content? Needs DNS + CORS setup. - **Bundling tool** — hand-rolled Python script vs off-the-shelf (e.g. `json-dereference-cli`). Go with off-the-shelf if one exists and is maintained. - **Does the CLI emit `$schema`/`$id` on export?** — `decree schema export` should probably inject `$schema` by default, make `$id` opt-in. - **Post-v1.0.0 URL migration** — when the spec promotes to 1.0.0, keep `/v0.1.0/` live forever or redirect? Preserve forever matches OpenAPI's dated-URL practice. diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml new file mode 100644 index 00000000..b1249d60 --- /dev/null +++ b/.github/workflows/deploy-pages.yml @@ -0,0 +1,132 @@ +# Deploy meta-schemas to GitHub Pages at schemas.opendecree.dev. +# +# Publishes JSON Schema 2020-12 documents under schemas/v*/ to +# https://schemas.opendecree.dev/schema/v*/decree-{schema,config}.json so +# editors (via schemastore.org), CI linters, and other tooling can fetch +# the meta-schemas by URL. Closes #125. +# +# Allowlist publishing model: +# The job builds _site/ from scratch, copies only the JSON files, runs +# an audit step that fails on unrecognized files, and uploads exactly +# _site/ as the Pages artifact. The rest of the repo is not exposed — +# Pages serves only what's in the artifact. +# +# Triggers: +# - push to main when schemas/** or this workflow itself changes +# - workflow_dispatch (manual republish for debugging or post-DNS fixups) +# +# Bootstrap requirement: +# Custom domain schemas.opendecree.dev must be configured in the repo's +# Settings → Pages with Source: GitHub Actions. Cloudflare CNAME +# schemas → opendecree.github.io must be in place. See +# docs/development/schemas-hosting-runbook.md for one-time setup. + +name: Deploy Pages + +on: + push: + branches: [main] + paths: + - 'schemas/**' + - 'scripts/generate-schema-index.py' + - '.github/workflows/deploy-pages.yml' + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: false + +jobs: + deploy: + name: Deploy meta-schemas + runs-on: ubuntu-latest + timeout-minutes: 5 + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install jsonschema + PyYAML + run: pip install --no-cache-dir 'jsonschema>=4.21' 'PyYAML>=6' + + # Reuses the script that ci.yml already runs on PRs — same checks, + # second pass on main before publish. + - name: Validate meta-schemas + run: python3 scripts/validate-meta-schemas.py + + # Belt-and-braces: publish-time check that the URL inside the + # artifact matches the URL it will be served at. Catches the + # rare case where a rename slips through without updating $id. + - name: Validate $id matches publish URL + run: | + set -euo pipefail + fail=0 + for f in schemas/v*/decree-schema.json schemas/v*/decree-config.json; do + ver=$(basename "$(dirname "$f")") + base=$(basename "$f") + expected="https://schemas.opendecree.dev/schema/${ver}/${base}" + actual=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1])).get("$id",""))' "$f") + if [ "$actual" != "$expected" ]; then + echo "::error file=$f::\$id mismatch — expected $expected, got $actual" + fail=1 + fi + done + exit $fail + + - name: Build _site/ + run: | + set -euo pipefail + mkdir -p _site/schema + for d in schemas/v*/; do + ver=$(basename "$d") + mkdir -p "_site/schema/${ver}" + cp "${d}decree-schema.json" "_site/schema/${ver}/" + cp "${d}decree-config.json" "_site/schema/${ver}/" + done + python3 scripts/generate-schema-index.py _site + + # Allowlist guard: refuse to upload anything that isn't on the + # explicit list of expected files. A workflow bug that copies + # extra paths into _site/ (e.g. .git, .github, internal/) gets + # caught here, not at the Pages edge. + - name: Audit _site/ contents + run: | + set -euo pipefail + unexpected=$(find _site -type f \ + ! -path '_site/index.html' \ + ! -path '_site/robots.txt' \ + ! -regex '_site/schema/v[0-9]+\.[0-9]+\.[0-9]+/decree-\(schema\|config\)\.json' \ + -print) + if [ -n "$unexpected" ]; then + echo "::error::Unexpected files in _site/ — refusing to publish:" + echo "$unexpected" + exit 1 + fi + echo "Files to be published:" + find _site -type f | sort + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: _site + + - name: Deploy + id: deployment + uses: actions/deploy-pages@v4 diff --git a/docs/development/schemas-hosting-runbook.md b/docs/development/schemas-hosting-runbook.md new file mode 100644 index 00000000..bb32e277 --- /dev/null +++ b/docs/development/schemas-hosting-runbook.md @@ -0,0 +1,96 @@ +# Meta-schema hosting runbook + +Operations guide for `https://schemas.opendecree.dev/` — the Pages site that serves the JSON Schema 2020-12 meta-schemas under `schemas/v*/`. + +## Architecture + +The decree repo is the publish target. `.github/workflows/deploy-pages.yml` builds a `_site/` directory containing only the JSON files (plus `index.html` and `robots.txt`) and uploads it as the Pages artifact. The rest of the repo is not exposed. + +DNS routes `schemas.opendecree.dev` → `opendecree.github.io` via a Cloudflare CNAME (DNS-only). GitHub Pages provisions the Let's Encrypt cert. + +## One-time setup + +### 1. Cloudflare DNS + +In the `opendecree.dev` zone: + +| Field | Value | +|-------|-------| +| Type | `CNAME` | +| Name | `schemas` | +| Target | `opendecree.github.io` | +| Proxy status | **DNS only** (gray cloud) | +| TTL | Auto | + +Why gray cloud and not orange (proxied): with the proxy on, Cloudflare terminates TLS at its edge and GitHub can't validate domain ownership for Let's Encrypt. Workarounds exist (Cloudflare's "Full" mode + advanced certs) but add complexity for no current benefit. We can switch to proxied later if response-header tuning or caching becomes useful. + +`.dev` is on the Chromium HSTS preload list, so HTTPS is mandatory at first request — there's no period where the domain is reachable over plain HTTP. + +### 2. GitHub Pages + +In the decree repo: **Settings → Pages**. + +1. Source: **GitHub Actions** (not "Deploy from a branch"). +2. Custom domain: `schemas.opendecree.dev`. Save. +3. If GitHub asks for a TXT verification record, add it on the apex via the Cloudflare DNS panel and re-verify. +4. Wait for the cert to provision. Typical 15–60 minutes, sometimes longer. Once complete, tick **Enforce HTTPS**. +5. Verify before announcing the URL anywhere: + + ```sh + curl -I https://schemas.opendecree.dev/ + ``` + + Expect `HTTP/2 200` and a valid cert. If the cert is still pending, retry; do not link the URL externally yet. + +## Publishing a new version + +1. Author meta-schema YAML under `schemas/vX.Y.Z/decree-{schema,config}.yaml`. The `$id` field of each file must equal `https://schemas.opendecree.dev/schema/vX.Y.Z/` — the deploy workflow asserts this. +2. Run `python3 scripts/yaml-to-json.py schemas/vX.Y.Z/decree-schema.yaml schemas/vX.Y.Z/decree-schema.json` (and the same for `decree-config`). Both YAML and JSON copies are committed. +3. `make validate-meta-schemas` to confirm canonical files validate and known-invalid fixtures don't. +4. Open a PR. CI runs `Meta-schemas check` on every PR; it fails loud on validation regressions. +5. After merge to main, `Deploy Pages` triggers automatically (path-filtered on `schemas/**`). It republishes the entire `_site/` artifact — adds the new version, regenerates the index. +6. Verify: + + ```sh + curl -s https://schemas.opendecree.dev/schema/vX.Y.Z/decree-schema.json | jq -r '.["$id"]' + ``` + + The output must equal the requested URL. + +## Immutability policy + +Once a version is announced (linked from external systems, schemastore.org catalog, READMEs, etc.), **do not edit `schemas/vX.Y.Z/` in place**. Bugs require a new SemVer dir. Fix-ups are allowed up until the version is referenced externally; after that, third parties may have cached the file and may not refetch. + +The workflow does not enforce immutability — it allows overwrites because pre-release fix-ups are common. Discipline is on the author. + +## Manual republish + +If a Pages run fails (transient action error, cert wasn't ready, etc.), trigger a republish without commit: + +```sh +gh workflow run deploy-pages.yml --repo opendecree/decree +``` + +This dispatches the workflow on the current `main` HEAD. The same validation steps run; if any fail, no deploy. + +## Troubleshooting + +- **Cert pending past 60 minutes.** Re-check the CNAME (gray cloud, target `opendecree.github.io`). Toggle the custom domain off and back on in Settings → Pages to nudge re-provisioning. +- **`curl -I https://schemas.opendecree.dev/` returns 5xx or connection refused.** Custom domain not yet active. Wait for cert provisioning. Don't announce the URL until this passes. +- **Workflow fails with "Pages site not found".** Custom domain hasn't been saved in Settings yet, or the Pages source is still set to a branch. Set Source = GitHub Actions and re-dispatch. +- **`$id` mismatch error from the deploy workflow.** A file under `schemas/v*/` has a `$id` that doesn't match its publish URL — usually a stale rename. Fix the YAML, regenerate the JSON, push. +- **Audit step refuses unexpected files in `_site/`.** A workflow step copied something unintended. Inspect the run log for the file list; typical cause is a `cp -r` that pulled in too much. +- **Schemastore.org caches a 404.** If schemastore fetched the URL during the cert-pending window, their crawler may cache the failure. Re-trigger their fetch by editing the catalog entry's `description` or filing a quick "please re-fetch" issue on their tracker. +- **Browser HSTS pinning issues.** Once a browser has loaded `schemas.opendecree.dev` over HTTPS with a valid cert, it pins the HSTS for at least the cert lifetime. Don't worry — `.dev` is preloaded so this is the default state. + +## Future migration + +If decree's Pages slot is ever needed for something else (project docs landing, API reference site), extract this configuration to a dedicated `opendecree/schemas` repo: + +1. Create the repo with the same `_site/` build pipeline. +2. Move `schemas/v*/` to the new repo. +3. Re-target the Cloudflare CNAME (target stays `opendecree.github.io`; the new repo is now the source). +4. Disable Pages on the decree repo, enable on the new repo with the same custom domain. +5. The public URL stays stable — the CNAME continues to resolve, and consumers see no change. + +The current single-repo design is cheap to extract; this is documented for future reference, not a planned move. diff --git a/scripts/generate-schema-index.py b/scripts/generate-schema-index.py new file mode 100755 index 00000000..6e660732 --- /dev/null +++ b/scripts/generate-schema-index.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""generate-schema-index: build the Pages root index.html. + +Walks /schema/v*/ to discover published versions and writes a static +HTML page at /index.html that lists each version's published files +with download links. Also writes /robots.txt to keep crawlers off +the JSON (we don't need SEO for raw schema docs). + +Usage: + generate-schema-index.py + +Vanilla — stdlib only. Called from .github/workflows/deploy-pages.yml. +""" + +from __future__ import annotations + +import html +import re +import sys +from pathlib import Path + +VERSION_RE = re.compile(r"^v(\d+)\.(\d+)\.(\d+)$") + +PAGE_TEMPLATE = """ + + + + OpenDecree Meta-Schemas + + + + +

OpenDecree Meta-Schemas

+

JSON Schema 2020-12 documents that describe the OpenDecree configuration format. Source of truth: opendecree/decree.

+ + + +{rows} + +
VersionFile
+
Apache 2.0 · OpenDecree is alpha — all artifacts subject to change.
+ + +""" + + +def main() -> int: + if len(sys.argv) != 2: + print(f"usage: {sys.argv[0]} ", file=sys.stderr) + return 2 + + site = Path(sys.argv[1]) + schema_dir = site / "schema" + if not schema_dir.is_dir(): + print(f"error: {schema_dir} not found", file=sys.stderr) + return 1 + + versions: list[tuple[tuple[int, int, int], str, list[str]]] = [] + for d in schema_dir.iterdir(): + if not d.is_dir(): + continue + m = VERSION_RE.match(d.name) + if not m: + continue + files = sorted(p.name for p in d.iterdir() if p.is_file()) + versions.append(((int(m.group(1)), int(m.group(2)), int(m.group(3))), d.name, files)) + versions.sort(reverse=True) + + rows: list[str] = [] + for _, ver, files in versions: + for fname in files: + rows.append( + f' {html.escape(ver)}' + f'{html.escape(fname)}' + ) + + body = "\n".join(rows) or ' No versions published yet.' + (site / "index.html").write_text(PAGE_TEMPLATE.format(rows=body)) + (site / "robots.txt").write_text("User-agent: *\nDisallow: /\n") + print(f"Wrote {site / 'index.html'} ({len(versions)} versions)") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())