Skip to content

Commit 87bca06

Browse files
authored
Merge branch 'master' into greens-relations-defs
2 parents c3a32e9 + fbfd7f5 commit 87bca06

2,451 files changed

Lines changed: 54098 additions & 21489 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devcontainer/devcontainer.json

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
{
22
"name": "Mathlib4 dev container",
3+
"remoteUser": "vscode",
34

45
"build": {
56
"dockerfile": "Dockerfile"
67
},
78

89
"onCreateCommand": "lake exe cache get!",
10+
"postCreateCommand": "gh auth status || gh auth login",
911

1012
"hostRequirements": {
1113
"cpus": 4,
@@ -16,5 +18,14 @@
1618
"vscode": {
1719
"extensions": ["leanprover.lean4"]
1820
}
19-
}
21+
},
22+
23+
"features": {
24+
// install `gh`
25+
"ghcr.io/devcontainers/features/github-cli:1": {}
26+
},
27+
28+
"mounts": [
29+
"source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig,type=bind,consistency=cached"
30+
]
2031
}
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# Single source of truth mapping (repo, branch) → (upload container,
2+
# read fallback chain) for Mathlib's multi-container cache.
3+
#
4+
# Called by build, upload_cache, and post_steps in build_template.yml so
5+
# trust classification is decided in exactly one place. Lean-side cache
6+
# logic stays branch-agnostic; this composite is the seam where CI-only
7+
# trust policy lives.
8+
9+
name: Cache trust dispatch
10+
description: Compute the cache container target and read fallback for this job.
11+
12+
inputs:
13+
repo:
14+
description: GitHub repo full name (`owner/name`).
15+
required: true
16+
branch:
17+
description: Branch name (`github.head_ref || github.ref_name`).
18+
required: true
19+
head-sha:
20+
description: |
21+
Head commit SHA for the ref being built. Used as the per-commit cache
22+
namespace (`MATHLIB_CACHE_REPO_SCOPE`) for fork-trust uploads, so a
23+
closed/hidden PR's poisoned artifacts cannot be served to a later
24+
honest PR from the same fork.
25+
required: true
26+
27+
# Outputs are mirrored to $GITHUB_ENV inside the step, which is what
28+
# downstream `cache get` / `cache put-staged` calls actually read. We
29+
# also expose them as action outputs for callers that need them in
30+
# `with:` blocks (e.g. constructing further `if:` conditions).
31+
outputs:
32+
primary:
33+
description: Container name for uploads (master, forks, nightly-testing, pr-toolchain-tests).
34+
value: ${{ steps.dispatch.outputs.primary }}
35+
read-chain:
36+
description: |
37+
Comma-separated read fallback chain for `MATHLIB_CACHE_FROM`. Empty
38+
when the job should use the cache tool's repo-level default.
39+
value: ${{ steps.dispatch.outputs.read-chain }}
40+
repo-scope:
41+
description: |
42+
Per-commit namespace suffix for `MATHLIB_CACHE_REPO_SCOPE`. Set to
43+
the head SHA when uploading to fork-trust containers; empty for
44+
master / nightly / pr-toolchain-tests uploads where scoping isn't
45+
applied.
46+
value: ${{ steps.dispatch.outputs.repo-scope }}
47+
48+
runs:
49+
using: composite
50+
steps:
51+
- name: Compute trust dispatch
52+
id: dispatch
53+
shell: bash
54+
run: |
55+
REPO="${{ inputs.repo }}"
56+
BRANCH="${{ inputs.branch }}"
57+
HEAD_SHA="${{ inputs.head-sha }}"
58+
PRIMARY=""
59+
READ_CHAIN=""
60+
REPO_SCOPE=""
61+
62+
# Security note: this dispatch is NOT the trust boundary for writes.
63+
# The real enforcement is the OIDC bearer token minted in upload_cache:
64+
# the token is scoped to a specific container, so a malicious actor
65+
# rewriting this case to `--container=master` from a fork build would
66+
# be 403'd by Azure regardless. The dispatch exists so the workflow
67+
# does the right thing in the honest case; defence in depth is RBAC.
68+
69+
# Privileged containers (master, nightly-testing, pr-toolchain-tests) are
70+
# writable only by a repo's own native CI, whose OIDC token is RBAC-scoped
71+
# to match. A cross-repo pull request is built by build_fork.yml with
72+
# fork-trust credentials whatever repo the PR head lives on (e.g. a `bump/*`
73+
# branch on the mathlib4-nightly-testing fork), so it can only write
74+
# `forks`. `GITHUB_REPOSITORY` is the repo the workflow runs as (the base
75+
# repo for `pull_request_target`), i.e. the one whose credentials this job
76+
# holds; when it differs from `$REPO`, the build is a fork and targets
77+
# `forks` regardless of the head repo's own trust class.
78+
if [ "$REPO" != "$GITHUB_REPOSITORY" ]; then
79+
PRIMARY="forks"
80+
else
81+
case "$REPO" in
82+
"leanprover-community/mathlib4")
83+
case "$BRANCH" in
84+
"master"|"staging")
85+
# Master / staging are the only writers that feed `master`
86+
# (`staging` is bors's merge candidate, which fast-forwards to
87+
# `master`). Read `master` only, not the default [master,
88+
# legacy]: files the read chain serves are skipped at stage
89+
# time, so keeping `legacy` would leave legacy-only files out of
90+
# `master` for good. Reading `master` alone turns them into
91+
# misses that get rebuilt and uploaded, so `master` fills itself
92+
# into a standalone cache. (Only PRIMARY=master does this; other
93+
# runs write to `forks` and keep the wider chain.)
94+
PRIMARY="master"
95+
READ_CHAIN="master"
96+
;;
97+
*)
98+
# `bors trying`, `ci-dev/*`, maintainer dev branches on the
99+
# canonical repo: trust level is fork-equivalent (the OIDC
100+
# token's RBAC scopes them to `forks`). Reads must widen
101+
# past the default [master, legacy] so the post-build
102+
# verification finds the just-uploaded fork-trust artifacts.
103+
PRIMARY="forks"
104+
READ_CHAIN="master,forks,legacy"
105+
;;
106+
esac
107+
;;
108+
"leanprover-community/mathlib4-nightly-testing")
109+
case "$BRANCH" in
110+
"nightly-testing"|"nightly-testing-green"|"staging"|bump/*)
111+
# Trusted nightly refs use the default [nightly-testing, legacy].
112+
# It excludes `pr-toolchain-tests` so an upload from a
113+
# `lean-pr-testing-*` branch never reaches a trusted-nightly
114+
# consumer.
115+
PRIMARY="nightly-testing"
116+
;;
117+
*)
118+
# `lean-pr-testing-*`, `batteries-pr-testing-*`, etc.:
119+
# least-trusted (can build with arbitrary toolchains). Widen
120+
# reads to recover this branch's own previously-uploaded
121+
# artifacts; trusted-nightly stays preferred where hash
122+
# spaces happen to align.
123+
PRIMARY="pr-toolchain-tests"
124+
READ_CHAIN="pr-toolchain-tests,nightly-testing,legacy"
125+
;;
126+
esac
127+
;;
128+
*)
129+
# Foreign fork. The cache tool's default chain for a fork repo
130+
# is [master, forks, legacy] (master-first): master supplies the
131+
# bulk of unchanged upstream deps, forks supplies PR-specific
132+
# files. No widening needed, so MATHLIB_CACHE_FROM stays unset.
133+
PRIMARY="forks"
134+
;;
135+
esac
136+
fi
137+
138+
# Per-commit cache namespace, only for fork-trust uploads. Closes the
139+
# within-fork temporal replay attack: each commit's CI run gets its
140+
# own /f/{repo}/{sha}/... namespace, so artifacts from a closed/
141+
# hidden PR cannot be served to a later honest build on the same
142+
# fork. Master / nightly / pr-toolchain-tests uploads stay un-scoped:
143+
# master has a single writer (no replay risk), and the per-toolchain
144+
# hash partitioning isolates nightly and toolchain-test classes via
145+
# their root-hash inputs.
146+
if [ "$PRIMARY" = "forks" ]; then
147+
REPO_SCOPE="$HEAD_SHA"
148+
fi
149+
150+
echo "primary=$PRIMARY" >> "$GITHUB_OUTPUT"
151+
echo "read-chain=$READ_CHAIN" >> "$GITHUB_OUTPUT"
152+
echo "repo-scope=$REPO_SCOPE" >> "$GITHUB_OUTPUT"
153+
echo "MATHLIB_CACHE_PRIMARY=$PRIMARY" >> "$GITHUB_ENV"
154+
if [ -n "$READ_CHAIN" ]; then
155+
echo "MATHLIB_CACHE_FROM=$READ_CHAIN" >> "$GITHUB_ENV"
156+
fi
157+
if [ -n "$REPO_SCOPE" ]; then
158+
echo "MATHLIB_CACHE_REPO_SCOPE=$REPO_SCOPE" >> "$GITHUB_ENV"
159+
fi
160+
# Visible in CI logs so a glance at any cache-touching step shows
161+
# what trust class the job is operating under.
162+
SCOPE_NOTE=""
163+
if [ -n "$REPO_SCOPE" ]; then
164+
SCOPE_NOTE=", MATHLIB_CACHE_REPO_SCOPE=$REPO_SCOPE"
165+
fi
166+
if [ -n "$READ_CHAIN" ]; then
167+
echo "cache-trust-dispatch: REPO=$REPO BRANCH=$BRANCH → container=$PRIMARY, MATHLIB_CACHE_FROM=$READ_CHAIN$SCOPE_NOTE"
168+
else
169+
echo "cache-trust-dispatch: REPO=$REPO BRANCH=$BRANCH → container=$PRIMARY, MATHLIB_CACHE_FROM=<default>$SCOPE_NOTE"
170+
fi
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Get this commit's oleans, in two phases:
2+
# 1. Warm the cache from the `cache-snapshot` GitHub artifact (canonical repo only).
3+
# 2. Fetch this commit's oleans from the remote cache with the trusted master-built binary.
4+
# The fetch is HEAD-scoped (reads only this commit's own cache scope). The warm is fail-safe
5+
# (any failure → just the remote fetch) and its source is hardcoded, so nothing can redirect
6+
# the download off the trusted master pipeline.
7+
name: Get cache
8+
description: Get this commit's oleans into the local cache.
9+
inputs:
10+
working_directory:
11+
description: The lake project to fetch the cache for (e.g. the checked-out PR branch).
12+
required: true
13+
cache_bin:
14+
description: Path to the trusted `cache` binary, relative to `working_directory`.
15+
required: true
16+
runs:
17+
using: composite
18+
steps:
19+
# 1. Warm cache from the GitHub artifact. Resolve which snapshot to use: the one built
20+
# at this commit's merge-base with master (its unchanged files hash identically
21+
# there), else the newest still-retained one at-or-before it, else the latest (a
22+
# merge-base older than retention, or any failure, lands here). Canonical repo only.
23+
- name: Resolve cache snapshot
24+
id: resolve
25+
if: ${{ github.repository == 'leanprover-community/mathlib4' }}
26+
shell: bash
27+
env:
28+
GH_TOKEN: ${{ github.token }}
29+
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
30+
run: |
31+
set -uo pipefail
32+
runs="repos/leanprover-community/mathlib4/actions/workflows/build.yml/runs?branch=master&event=push&status=success"
33+
34+
# Each helper prints a matching successful master-push run-id, or empty.
35+
run_at() { gh api "${runs}&head_sha=$1" --jq '.workflow_runs[0].id // empty' 2>/dev/null || true; }
36+
latest_run() { gh api "${runs}&per_page=1" --jq '.workflow_runs[0].id // empty' 2>/dev/null || true; }
37+
# newest run created at-or-before date $1, but not older than cutoff $2
38+
newest_before() {
39+
gh api "${runs}&per_page=100" 2>/dev/null | jq -r --arg d "$1" --arg c "$2" \
40+
'[.workflow_runs[] | select(.created_at <= $d and ($c == "" or .created_at >= $c))][0].id // empty' \
41+
2>/dev/null || true
42+
}
43+
# True if run-id $1 actually carries a `cache-snapshot` artifact. Older runs
44+
# predate the feature and artifacts expire, so a successful run is not enough.
45+
# Filters server-side by name, but re-checks in jq in case `name` is ignored.
46+
has_snapshot() {
47+
[[ -n "$(gh api "repos/leanprover-community/mathlib4/actions/runs/$1/artifacts?name=cache-snapshot&per_page=100" \
48+
--jq '.artifacts[] | select(.name == "cache-snapshot") | .id' 2>/dev/null | head -1 || true)" ]]
49+
}
50+
51+
# This PR's merge-base with master (+ its commit date), and the cutoff below which
52+
# snapshots have expired (retention ~14d).
53+
mb_info=$(gh api "repos/leanprover-community/mathlib4/compare/master...${HEAD_SHA}" \
54+
--jq '.merge_base_commit | "\(.sha) \(.commit.committer.date)"' 2>/dev/null || true)
55+
read -r mb mb_date <<< "${mb_info}"
56+
cutoff=$(date -u -d '13 days ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || true)
57+
58+
# Prefer the merge-base's snapshot (while still retained), then the newest one
59+
# before it, then the latest of all.
60+
run_id=""
61+
if [[ -n "${mb}" && ( -z "${cutoff}" || "${mb_date}" > "${cutoff}" ) ]]; then
62+
run_id=$(run_at "${mb}")
63+
[[ -z "${run_id}" ]] && run_id=$(newest_before "${mb_date}" "${cutoff}")
64+
fi
65+
[[ -z "${run_id}" ]] && run_id=$(latest_run)
66+
67+
# The resolved run may carry no `cache-snapshot` artifact: an older merge-base
68+
# predating the feature, or one whose artifact already expired (older-than-today
69+
# runs often won't have one). Rather than let the download step hard-error on a
70+
# missing artifact, confirm it's present; if not, fall back to the latest master
71+
# run, and warm only if that one has it.
72+
if [[ -n "${run_id}" ]] && ! has_snapshot "${run_id}"; then
73+
echo "Run ${run_id} has no cache-snapshot artifact; falling back to latest master run."
74+
run_id=$(latest_run)
75+
if [[ -n "${run_id}" ]] && ! has_snapshot "${run_id}"; then
76+
run_id=""
77+
fi
78+
fi
79+
80+
echo "Resolved cache-snapshot run_id: '${run_id}' (merge-base: ${mb:-unknown})"
81+
echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
82+
83+
- name: Warm cache from GitHub artifact
84+
if: ${{ steps.resolve.outputs.run_id != '' }}
85+
continue-on-error: true # fail-safe: fall back to the remote fetch (step 2)
86+
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
87+
with:
88+
name: cache-snapshot
89+
path: /home/lean/.cache/mathlib
90+
repository: leanprover-community/mathlib4
91+
run-id: ${{ steps.resolve.outputs.run_id }}
92+
github-token: ${{ github.token }}
93+
94+
# 2. Fetch this commit's oleans from the remote cache with the trusted `cache` binary
95+
# (outside landrun). Runs on every repo; the warm above just gives the canonical
96+
# repo a local head start. HEAD-scoped: reads only this commit's own cache scope.
97+
- name: Fetch cache from remote
98+
shell: bash
99+
env:
100+
WORKDIR: ${{ inputs.working_directory }}
101+
CACHE_BIN: ${{ inputs.cache_bin }}
102+
CACHE_REPO: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
103+
run: |
104+
set -eo pipefail
105+
cd "${WORKDIR}"
106+
rm -rf .lake/build/lib/lean/Mathlib
107+
log="${RUNNER_TEMP:-/tmp}/cache-get.log"
108+
# --repo so fork PRs also read their own repo-namespaced cache (master is read flat,
109+
# so this still gets the master bulk); for in-repo runs it resolves to the same repo.
110+
"${CACHE_BIN}" --repo="${CACHE_REPO}" get 2>&1 | tee "${log}"
111+
# Warmth = how much the snapshot covered HEAD: files already cached locally
112+
# (just decompressed) vs downloaded from Azure. Parsed best-effort from the log.
113+
warm=$(grep -oE 'Decompressing [0-9]+ already-cached' "${log}" | grep -oE '[0-9]+' | head -1 || true)
114+
cold=$(grep -oE 'Attempting to download [0-9]+' "${log}" | grep -oE '[0-9]+' | head -1 || true)
115+
echo "Cache warmth: ${warm:-0} already-cached (warm) / ${cold:-0} downloaded from Azure (cold)"

.github/actions/get-mathlib-ci/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ inputs:
1010
# Default pinned commit used by workflows unless they explicitly override.
1111
# Update this ref as needed to pick up changes to mathlib-ci scripts
1212
# This is also updated automatically by .github/workflows/update_dependencies.yml
13-
default: 194b03113712fdf55c2c29651f161893f0305fe8
13+
default: 5aee9d4ce5a39050c72b4aa46015a824b0c189ac
1414
path:
1515
description: Checkout destination path.
1616
required: false

0 commit comments

Comments
 (0)