diff --git a/.github/workflows/retry-cla-assistant.yml b/.github/workflows/retry-cla-assistant.yml new file mode 100644 index 00000000000..df5718b3090 --- /dev/null +++ b/.github/workflows/retry-cla-assistant.yml @@ -0,0 +1,87 @@ +name: Retry CLA Assistant + +# CLA Assistant publishes `license/cla` as a commit status, not a check run. +# If its webhook handler misses a PR update, GitHub branch protection can wait +# forever even after every real CI check has passed. This workflow nudges CLA +# Assistant only when that status is the sole remaining non-green signal. +# +# SECURITY: This workflow uses pull_request_target so it can inspect PR status +# for forks. It checks out trusted default-branch code only; it must never check +# out, build, or execute code from the PR head. + +on: + pull_request_target: + types: [opened, reopened, synchronize, ready_for_review] + workflow_run: + types: [completed] + schedule: + - cron: "7,22,37,52 * * * *" + workflow_dispatch: + inputs: + pr_number: + description: "Pull request number to check" + required: true + type: number + +permissions: + actions: read + checks: read + contents: read + pull-requests: read + statuses: read + +jobs: + retry-cla: + name: Retry CLA Assistant if it is the only blocker + runs-on: ubuntu-latest + + steps: + - name: Check out trusted base code + uses: actions/checkout@v4 + with: + ref: ${{ github.event.repository.default_branch }} + + - uses: dsherret/rust-toolchain-file@v1 + + - name: Collect pull requests to check + id: prs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + pr_numbers="${RUNNER_TEMP}/cla-pr-numbers" + + case "${GITHUB_EVENT_NAME}" in + pull_request_target) + jq -r '.pull_request.number' "${GITHUB_EVENT_PATH}" > "${pr_numbers}" + ;; + workflow_run) + if jq -e '.workflow_run.name == "Retry CLA Assistant"' "${GITHUB_EVENT_PATH}" > /dev/null; then + : > "${pr_numbers}" + else + jq -r '.workflow_run.pull_requests[].number' "${GITHUB_EVENT_PATH}" > "${pr_numbers}" + fi + ;; + schedule) + gh api --paginate "repos/${GITHUB_REPOSITORY}/pulls?state=open&base=master&per_page=100" --jq '.[].number' > "${pr_numbers}" + ;; + workflow_dispatch) + jq -r '.inputs.pr_number' "${GITHUB_EVENT_PATH}" > "${pr_numbers}" + ;; + *) + echo "unsupported event ${GITHUB_EVENT_NAME}" >&2 + exit 1 + ;; + esac + + sort -n -u "${pr_numbers}" -o "${pr_numbers}" + echo "path=${pr_numbers}" >> "${GITHUB_OUTPUT}" + + - name: Recheck CLA Assistant + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + while read -r pr_number; do + if [ -n "${pr_number}" ]; then + cargo ci retry-cla-assistant --pr-number "${pr_number}" + fi + done < "${{ steps.prs.outputs.path }}" diff --git a/Cargo.lock b/Cargo.lock index 3d23f530b1e..389abf7ca16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -900,6 +900,7 @@ dependencies = [ "log", "regex", "reqwest 0.12.24", + "serde", "serde_json", "spacetimedb-guard", "tempfile", diff --git a/tools/ci/Cargo.toml b/tools/ci/Cargo.toml index 972a0bf9d61..deb9b7f4ee7 100644 --- a/tools/ci/Cargo.toml +++ b/tools/ci/Cargo.toml @@ -9,7 +9,8 @@ anyhow.workspace = true chrono = { workspace = true, features=["clock"] } clap.workspace = true regex.workspace = true -reqwest = { workspace = true, features = ["blocking"] } +reqwest = { workspace = true, features = ["blocking", "json"] } +serde.workspace = true serde_json.workspace = true duct.workspace = true tempfile.workspace = true diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs index eb27190cbb0..2276be04c9f 100644 --- a/tools/ci/src/main.rs +++ b/tools/ci/src/main.rs @@ -14,6 +14,7 @@ const README_PATH: &str = "tools/ci/README.md"; mod ci_docs; mod keynote_bench; +mod retry_cla_assistant; mod smoketest; mod util; @@ -367,6 +368,8 @@ enum CiCmd { VersionUpgradeCheck, /// Builds the docs site. Docs, + /// Retries CLA Assistant if `license/cla` is the only remaining PR blocker. + RetryClaAssistant(retry_cla_assistant::RetryClaAssistantArgs), } fn run_all_clap_subcommands(skips: &[String]) -> Result<()> { @@ -772,6 +775,10 @@ fn main() -> Result<()> { run_docs_build()?; } + Some(CiCmd::RetryClaAssistant(args)) => { + retry_cla_assistant::run(args)?; + } + None => run_all_clap_subcommands(&cli.skip)?, } diff --git a/tools/ci/src/retry_cla_assistant.rs b/tools/ci/src/retry_cla_assistant.rs new file mode 100644 index 00000000000..a768cbc1123 --- /dev/null +++ b/tools/ci/src/retry_cla_assistant.rs @@ -0,0 +1,278 @@ +use std::collections::BTreeMap; +use std::env; +use std::time::Duration; + +use anyhow::{anyhow, bail, Context, Result}; +use chrono::{DateTime, Utc}; +use clap::Args; +use reqwest::blocking::Client; +use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT}; +use serde::de::DeserializeOwned; +use serde::Deserialize; + +const CLA_CONTEXT: &str = "license/cla"; +const MIN_HEAD_AGE: Duration = Duration::from_secs(10 * 60); +const POLL_ATTEMPTS: usize = 6; +const POLL_DELAY: Duration = Duration::from_secs(30); + +#[derive(Args)] +pub(crate) struct RetryClaAssistantArgs { + /// Pull request number to check. + #[arg(long)] + pub(crate) pr_number: u64, + + /// Repository in `owner/name` form. Defaults to GITHUB_REPOSITORY. + #[arg(long)] + pub(crate) repo: Option, +} + +pub(crate) fn run(args: RetryClaAssistantArgs) -> Result<()> { + let repo = args + .repo + .or_else(|| env::var("GITHUB_REPOSITORY").ok()) + .context("repo is required via --repo or GITHUB_REPOSITORY")?; + let (owner, repo_name) = repo + .split_once('/') + .ok_or_else(|| anyhow!("repo must be in owner/name form, got {repo:?}"))?; + let token = env::var("GITHUB_TOKEN").context("GITHUB_TOKEN is required")?; + let client = GithubClient::new(token)?; + + retry_for_pr(&client, owner, repo_name, args.pr_number) +} + +fn retry_for_pr(client: &GithubClient, owner: &str, repo: &str, pr_number: u64) -> Result<()> { + println!("Inspecting PR #{pr_number}"); + + let pr: PullRequest = client.github_get(&format!("/repos/{owner}/{repo}/pulls/{pr_number}"))?; + if pr.state != "open" { + println!("PR #{pr_number} is {}; skipping.", pr.state); + return Ok(()); + } + if pr.draft { + println!("PR #{pr_number} is draft; skipping."); + return Ok(()); + } + if pr.base.ref_name != "master" { + println!("PR #{pr_number} targets {}, not master; skipping.", pr.base.ref_name); + return Ok(()); + } + + let sha = pr.head.sha; + let commit: CommitResponse = client.github_get(&format!("/repos/{owner}/{repo}/commits/{sha}"))?; + let committed_at = commit + .commit + .committer + .date + .or(commit.commit.author.date) + .context("commit payload did not contain an author or committer date")?; + let committed_at = DateTime::parse_from_rfc3339(&committed_at) + .context("commit date was not RFC3339")? + .with_timezone(&Utc); + let head_age = Utc::now() + .signed_duration_since(committed_at) + .to_std() + .unwrap_or_default(); + if head_age < MIN_HEAD_AGE { + println!("PR #{pr_number} head is too new ({}s); skipping.", head_age.as_secs()); + return Ok(()); + } + + let check_runs = client.list_check_runs(owner, repo, &sha)?; + let statuses = client.list_statuses(owner, repo, &sha)?; + + let latest_statuses = latest_status_by_context(statuses); + if latest_statuses + .get(CLA_CONTEXT) + .is_some_and(|status| status.state == "success") + { + println!("PR #{pr_number} already has {CLA_CONTEXT}=success."); + return Ok(()); + } + + if check_runs.is_empty() { + println!("PR #{pr_number} has no check runs yet; skipping."); + return Ok(()); + } + + let blocking_check_runs: Vec<_> = check_runs.iter().filter(|run| !check_run_is_green(run)).collect(); + if !blocking_check_runs.is_empty() { + println!("PR #{pr_number} still has non-green check runs:"); + for run in blocking_check_runs { + println!( + "- {}: status={}, conclusion={}", + run.name, + run.status, + run.conclusion.as_deref().unwrap_or("none") + ); + } + return Ok(()); + } + + let blocking_statuses: Vec<_> = latest_statuses + .values() + .filter(|status| status.context != CLA_CONTEXT) + .filter(|status| status.state != "success") + .collect(); + if !blocking_statuses.is_empty() { + println!("PR #{pr_number} still has non-green commit statuses:"); + for status in blocking_statuses { + println!("- {}: {}", status.context, status.state); + } + return Ok(()); + } + + if let Some(cla_status) = latest_statuses.get(CLA_CONTEXT) { + if !matches!(cla_status.state.as_str(), "pending" | "failure" | "error") { + println!( + "PR #{pr_number} has unexpected {CLA_CONTEXT} state {}; skipping.", + cla_status.state + ); + return Ok(()); + } + } + + let reason = latest_statuses.get(CLA_CONTEXT).map_or_else( + || format!("{CLA_CONTEXT} is missing"), + |status| format!("{CLA_CONTEXT} is {}", status.state), + ); + println!("Retrying CLA Assistant for PR #{pr_number}: {reason}"); + client.recheck_cla(owner, repo, pr_number)?; + + for attempt in 1..=POLL_ATTEMPTS { + std::thread::sleep(POLL_DELAY); + let statuses = latest_status_by_context(client.list_statuses(owner, repo, &sha)?); + let cla_state = statuses + .get(CLA_CONTEXT) + .map(|status| status.state.as_str()) + .unwrap_or("missing"); + println!("Poll {attempt}/{POLL_ATTEMPTS}: {CLA_CONTEXT}={cla_state}"); + if cla_state == "success" { + println!("CLA Assistant posted {CLA_CONTEXT}=success for PR #{pr_number}."); + return Ok(()); + } + } + + println!("::warning::CLA Assistant did not post {CLA_CONTEXT}=success for PR #{pr_number} after retry."); + Ok(()) +} + +fn check_run_is_green(run: &CheckRun) -> bool { + run.status == "completed" && matches!(run.conclusion.as_deref(), Some("success" | "skipped" | "neutral")) +} + +fn latest_status_by_context(statuses: Vec) -> BTreeMap { + // GitHub returns combined statuses newest-first, so keep the first context. + let mut result = BTreeMap::new(); + for status in statuses { + result.entry(status.context.clone()).or_insert(status); + } + result +} + +struct GithubClient { + http: Client, +} + +impl GithubClient { + fn new(token: String) -> Result { + let mut headers = HeaderMap::new(); + headers.insert(USER_AGENT, HeaderValue::from_static("clockworklabs-ci")); + headers.insert(ACCEPT, HeaderValue::from_static("application/vnd.github+json")); + headers.insert( + AUTHORIZATION, + HeaderValue::from_str(&format!("Bearer {token}")).context("invalid GitHub token header")?, + ); + Ok(Self { + http: Client::builder().default_headers(headers).build()?, + }) + } + + fn github_get(&self, path: &str) -> Result { + let url = format!("https://api.github.com{path}"); + let response = self.http.get(&url).send()?; + if !response.status().is_success() { + bail!("GET {url} failed with HTTP {}", response.status()); + } + Ok(response.json()?) + } + + fn list_check_runs(&self, owner: &str, repo: &str, sha: &str) -> Result> { + let path = format!("/repos/{owner}/{repo}/commits/{sha}/check-runs"); + let response: CheckRunsResponse = self.github_get(&path)?; + Ok(response.check_runs) + } + + fn list_statuses(&self, owner: &str, repo: &str, sha: &str) -> Result> { + let path = format!("/repos/{owner}/{repo}/commits/{sha}/status"); + let response: CombinedStatusResponse = self.github_get(&path)?; + Ok(response.statuses) + } + + fn recheck_cla(&self, owner: &str, repo: &str, pr_number: u64) -> Result<()> { + let url = format!("https://cla-assistant.io/check/{owner}/{repo}?pullRequest={pr_number}"); + let response = self + .http + .get(&url) + .header(ACCEPT, HeaderValue::from_static("text/plain, */*")) + .send()?; + println!("CLA Assistant recheck response: HTTP {}", response.status()); + if !response.status().is_success() { + bail!("CLA Assistant recheck failed with HTTP {}", response.status()); + } + Ok(()) + } +} + +#[derive(Deserialize)] +struct PullRequest { + state: String, + draft: bool, + head: PullRequestRef, + base: PullRequestRef, +} + +#[derive(Deserialize)] +struct PullRequestRef { + sha: String, + #[serde(rename = "ref")] + ref_name: String, +} + +#[derive(Deserialize)] +struct CommitResponse { + commit: Commit, +} + +#[derive(Deserialize)] +struct Commit { + author: CommitPerson, + committer: CommitPerson, +} + +#[derive(Deserialize)] +struct CommitPerson { + date: Option, +} + +#[derive(Deserialize)] +struct CheckRunsResponse { + check_runs: Vec, +} + +#[derive(Deserialize)] +struct CheckRun { + name: String, + status: String, + conclusion: Option, +} + +#[derive(Deserialize)] +struct CombinedStatusResponse { + statuses: Vec, +} + +#[derive(Clone, Deserialize)] +struct CommitStatus { + context: String, + state: String, +}