Skip to content

Commit 18af518

Browse files
authored
Merge queue: make dev checks required + add .asf.yaml validation (apache#21239)
Related to apache#6880 Follow up on apache#17538 Bringing back the merge queues 🤞🏻 Last time it was painful because: - there was no `.asf.yaml` validation - if something goes wrong, we weren't able to force merge without the infra team this PR solved the first problem. As for the second one, I have the permissions to bypass so should be able to quickly revert if something ever goes bad <img width="830" height="151" alt="image" src="https://github.com/user-attachments/assets/62fb2bb5-44d8-4aae-a006-188d96728140" /> Check result: https://github.com/apache/datafusion/actions/runs/23715604583/job/69082077684?pr=21239 I also checked that CI will keep working by [merging](apache/datafusion-sandbox#197) this into our sandbox and then opening and merging [a dummy pr](apache/datafusion-sandbox#204) For now, bringing just the basic checks (in dev.yml). Will do rust.yml separately if everything goes smoothly after this one is merged
1 parent e5358b7 commit 18af518

File tree

3 files changed

+159
-0
lines changed

3 files changed

+159
-0
lines changed

.asf.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ github:
5151
main:
5252
required_pull_request_reviews:
5353
required_approving_review_count: 1
54+
required_status_checks:
55+
contexts:
56+
- "Check License Header"
57+
- "Use prettier to check formatting of documents"
58+
- "Validate required_status_checks in .asf.yaml"
59+
- "Spell Check with Typos"
5460
# needs to be updated as part of the release process
5561
# .asf.yaml doesn't support wildcard branch protection rules, only exact branch names
5662
# https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#branch-protection

.github/workflows/dev.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ jobs:
5151
# if you encounter error, see instructions inside the script
5252
run: ci/scripts/doc_prettier_check.sh
5353

54+
asf-yaml-check:
55+
name: Validate required_status_checks in .asf.yaml
56+
runs-on: ubuntu-latest
57+
steps:
58+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
59+
- run: pip install pyyaml
60+
- run: python3 ci/scripts/check_asf_yaml_status_checks.py
61+
5462
typos:
5563
name: Spell Check with Typos
5664
runs-on: ubuntu-latest
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#!/usr/bin/env python3
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
"""
20+
Validate that every entry in .asf.yaml required_status_checks
21+
matches an actual GitHub Actions job name, and that the workflow
22+
is not filtered by paths/paths-ignore (which would prevent the
23+
check from running on some PRs, blocking merges).
24+
25+
A typo or stale entry in required_status_checks will block all
26+
merges for the project, so this check catches that early.
27+
"""
28+
29+
import glob
30+
import os
31+
import sys
32+
33+
import yaml
34+
35+
36+
def get_required_checks(asf_yaml_path):
37+
"""Extract all required_status_checks contexts from .asf.yaml."""
38+
with open(asf_yaml_path) as f:
39+
config = yaml.safe_load(f)
40+
41+
checks = {} # context -> list of branches requiring it
42+
branches = config.get("github", {}).get("protected_branches", {})
43+
for branch, settings in branches.items():
44+
contexts = (
45+
settings.get("required_status_checks", {}).get("contexts", [])
46+
)
47+
for ctx in contexts:
48+
checks.setdefault(ctx, []).append(branch)
49+
50+
return checks
51+
52+
53+
def get_workflow_jobs(workflows_dir):
54+
"""Collect all jobs with their metadata from GitHub Actions workflow files.
55+
56+
Returns a dict mapping job identifier (name or key) to a list of
57+
(workflow_file, has_path_filters) tuples.
58+
"""
59+
jobs = {} # identifier -> [(workflow_file, has_path_filters)]
60+
for workflow_file in sorted(glob.glob(os.path.join(workflows_dir, "*.yml"))):
61+
with open(workflow_file) as f:
62+
workflow = yaml.safe_load(f)
63+
64+
if not workflow or "jobs" not in workflow:
65+
continue
66+
67+
# Check if pull_request trigger has path filters
68+
on = workflow.get(True, workflow.get("on", {})) # yaml parses `on:` as True
69+
pr_trigger = on.get("pull_request", {}) if isinstance(on, dict) else {}
70+
has_path_filters = bool(
71+
isinstance(pr_trigger, dict)
72+
and (pr_trigger.get("paths") or pr_trigger.get("paths-ignore"))
73+
)
74+
75+
basename = os.path.basename(workflow_file)
76+
for job_key, job_config in workflow.get("jobs", {}).items():
77+
if not isinstance(job_config, dict):
78+
continue
79+
job_name = job_config.get("name", job_key)
80+
info = (basename, has_path_filters)
81+
jobs.setdefault(job_name, []).append(info)
82+
if job_key != job_name:
83+
jobs.setdefault(job_key, []).append(info)
84+
85+
return jobs
86+
87+
88+
def main():
89+
repo_root = os.path.dirname(
90+
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
91+
)
92+
asf_yaml = os.path.join(repo_root, ".asf.yaml")
93+
workflows_dir = os.path.join(repo_root, ".github", "workflows")
94+
95+
required_checks = get_required_checks(asf_yaml)
96+
if not required_checks:
97+
print("No required_status_checks found in .asf.yaml — nothing to validate.")
98+
return
99+
100+
jobs = get_workflow_jobs(workflows_dir)
101+
errors = []
102+
103+
for ctx in sorted(required_checks):
104+
branches = ", ".join(sorted(required_checks[ctx]))
105+
if ctx not in jobs:
106+
errors.append(
107+
f' - "{ctx}" (branch: {branches}): '
108+
f"not found in any GitHub Actions workflow"
109+
)
110+
continue
111+
112+
# Check if ALL workflows providing this job have path filters
113+
# (if at least one doesn't, the check will still run)
114+
filtered_workflows = [
115+
wf for wf, has_filter in jobs[ctx] if has_filter
116+
]
117+
unfiltered_workflows = [
118+
wf for wf, has_filter in jobs[ctx] if not has_filter
119+
]
120+
if filtered_workflows and not unfiltered_workflows:
121+
wf_list = ", ".join(filtered_workflows)
122+
errors.append(
123+
f' - "{ctx}" (branch: {branches}): '
124+
f"workflow {wf_list} uses paths/paths-ignore filters on "
125+
f"pull_request, so this check won't run for some PRs "
126+
f"and will block merging"
127+
)
128+
129+
if errors:
130+
print("ERROR: Problems found with required_status_checks in .asf.yaml:\n")
131+
print("\n".join(errors))
132+
print()
133+
print("Available job names across all workflows:")
134+
for name in sorted(jobs):
135+
print(f" - {name}")
136+
sys.exit(1)
137+
138+
print(
139+
f"OK: All {len(required_checks)} required_status_checks "
140+
"match existing GitHub Actions jobs."
141+
)
142+
143+
144+
if __name__ == "__main__":
145+
main()

0 commit comments

Comments
 (0)