Skip to content

Commit cfd03b9

Browse files
committed
Auto-rerun failed daily workflow jobs
1 parent 72bef87 commit cfd03b9

7 files changed

Lines changed: 68 additions & 18 deletions

.github/scripts/rerun-flaky-pr-jobs.py renamed to .github/scripts/rerun-failed-workflow-jobs.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
#!/usr/bin/env python3
2-
"""Rerun up to two failed jobs for the pull request CI run that triggered this workflow.
3-
4-
Reads the triggering workflow run id from the WORKFLOW_RUN_ID environment variable,
5-
ignores the synthetic required-status-check job, and reruns eligible failed jobs
6-
up to two times.
7-
"""
2+
"""Rerun failed jobs for the workflow run that triggered this workflow."""
83

94
from __future__ import annotations
105

@@ -13,43 +8,51 @@
138
import subprocess
149
import urllib.parse
1510

16-
MAX_FAILED_JOBS_PER_WORKFLOW_RUN = 5
17-
MAX_RERUN_ATTEMPTS = 2
11+
DEFAULT_IGNORED_JOB_SUFFIXES = ("required-status-check",)
12+
DEFAULT_MAX_FAILED_JOBS_PER_WORKFLOW_RUN = 5
13+
DEFAULT_MAX_RERUN_ATTEMPTS = 2
1814

1915

2016
def main() -> None:
2117
owner, repo = os.environ["GITHUB_REPOSITORY"].split("/", 1)
2218
run_id = os.environ["WORKFLOW_RUN_ID"]
2319

2420
run = gh_get(f"/repos/{owner}/{repo}/actions/runs/{run_id}")
25-
pr_number = resolve_pr_number(owner, repo, run)
26-
pr_label = f"PR #{pr_number}" if pr_number is not None else "PR unknown"
27-
label = f"{pr_label}, run {run['id']}, attempt {run['run_attempt']}"
21+
label = build_label(owner, repo, run)
2822

2923
if run["status"] != "completed" or run.get("conclusion") != "failure":
3024
print(f"Skipped {label}: status={run['status']}, conclusion={run.get('conclusion')}.")
3125
return
3226

27+
max_rerun_attempts = int(os.getenv("MAX_RERUN_ATTEMPTS", DEFAULT_MAX_RERUN_ATTEMPTS))
3328
rerun_attempts = run["run_attempt"] - 1
34-
if rerun_attempts >= MAX_RERUN_ATTEMPTS:
29+
if rerun_attempts >= max_rerun_attempts:
3530
print(f"Skipped {label}: already rerun {rerun_attempts} times.")
3631
return
3732

33+
ignored_job_suffixes = tuple(
34+
suffix.strip()
35+
for suffix in os.getenv("IGNORED_JOB_SUFFIXES", ",".join(DEFAULT_IGNORED_JOB_SUFFIXES)).split(",")
36+
if suffix.strip()
37+
)
3838
failed_real_jobs = [
3939
job
4040
for job in list_jobs_for_run(owner, repo, run["id"])
4141
if job.get("conclusion") == "failure"
42-
and not job["name"].endswith("required-status-check")
42+
and not any(job["name"].endswith(suffix) for suffix in ignored_job_suffixes)
4343
]
4444

4545
if not failed_real_jobs:
46-
print(f"Skipped {label}: only synthetic jobs failed.")
46+
print(f"Skipped {label}: only ignored jobs failed.")
4747
return
4848

49-
if len(failed_real_jobs) > MAX_FAILED_JOBS_PER_WORKFLOW_RUN:
49+
max_failed_jobs = int(
50+
os.getenv("MAX_FAILED_JOBS_PER_WORKFLOW_RUN", DEFAULT_MAX_FAILED_JOBS_PER_WORKFLOW_RUN)
51+
)
52+
if len(failed_real_jobs) > max_failed_jobs:
5053
print(
5154
f"Skipped {label}: {len(failed_real_jobs)} failed jobs"
52-
f" exceeded limit {MAX_FAILED_JOBS_PER_WORKFLOW_RUN}."
55+
f" exceeded limit {max_failed_jobs}."
5356
)
5457
return
5558

@@ -61,6 +64,12 @@ def main() -> None:
6164
print(f"::notice::{label}: reran failed jobs {job_list}.")
6265

6366

67+
def build_label(owner: str, repo: str, run: dict) -> str:
68+
pr_number = resolve_pr_number(owner, repo, run)
69+
pr_label = f"PR #{pr_number}, " if pr_number is not None else ""
70+
return f"{pr_label}{run['name']} #{run['run_number']}, run {run['id']}, attempt {run['run_attempt']}"
71+
72+
6473
def list_jobs_for_run(owner: str, repo: str, run_id: int) -> list[dict]:
6574
jobs: list[dict] = []
6675
page = 1
@@ -101,4 +110,4 @@ def resolve_pr_number(owner: str, repo: str, run: dict) -> int | None:
101110

102111

103112
if __name__ == "__main__":
104-
main()
113+
main()

.github/workflows/build-daily-no-build-cache.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ jobs:
4545
needs.common.result == 'success' &&
4646
needs.test-latest-deps.result == 'success'
4747
}}
48+
failure-notification-after-attempts: 2

.github/workflows/build-daily.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,4 @@ jobs:
8686
needs.lint.result == 'success' &&
8787
needs.publish-snapshots.result == 'success'
8888
}}
89+
failure-notification-after-attempts: 2

.github/workflows/native-tests-daily.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ jobs:
2525
uses: ./.github/workflows/reusable-workflow-notification.yml
2626
with:
2727
success: ${{ needs.graalvm-native-tests.result == 'success' }}
28+
failure-notification-after-attempts: 2
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: Rerun failed daily jobs
2+
3+
on:
4+
workflow_run:
5+
workflows:
6+
- "Build (daily)"
7+
- "Build (daily --no-build-cache)"
8+
- "Daily GraalVM native tests"
9+
types:
10+
- completed
11+
12+
concurrency:
13+
group: rerun-failed-daily-jobs
14+
cancel-in-progress: false
15+
16+
permissions:
17+
actions: write
18+
contents: read
19+
20+
jobs:
21+
rerun-failed-jobs:
22+
runs-on: ubuntu-latest
23+
timeout-minutes: 10
24+
if: github.event.workflow_run.conclusion == 'failure'
25+
steps:
26+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
27+
28+
- name: Rerun eligible failed jobs
29+
env:
30+
GH_TOKEN: ${{ github.token }}
31+
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
32+
run: python .github/scripts/rerun-failed-workflow-jobs.py

.github/workflows/rerun-flaky-pr-jobs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ jobs:
2727
env:
2828
GH_TOKEN: ${{ github.token }}
2929
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
30-
run: python .github/scripts/rerun-flaky-pr-jobs.py
30+
run: python .github/scripts/rerun-failed-workflow-jobs.py

.github/workflows/reusable-workflow-notification.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ on:
1515
type: string
1616
required: false
1717
description: "Custom title for the issue (defaults to 'Workflow failed: $GITHUB_WORKFLOW')"
18+
failure-notification-after-attempts:
19+
type: number
20+
required: false
21+
default: 0
22+
description: "Only notify on failure after this many workflow run attempts"
1823

1924
permissions:
2025
contents: read
@@ -25,6 +30,7 @@ jobs:
2530
contents: read
2631
issues: write
2732
runs-on: ubuntu-latest
33+
if: ${{ inputs.success || github.run_attempt > inputs.failure-notification-after-attempts }}
2834
steps:
2935
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
3036

0 commit comments

Comments
 (0)