Skip to content

Commit bc1a996

Browse files
authored
Merge pull request #661 from PolicyEngine/codex/cheaper-pr-ci
Reduce unnecessary Modal CI runs on PRs
2 parents 14fb1f0 + e202b93 commit bc1a996

4 files changed

Lines changed: 106 additions & 6 deletions

File tree

.github/workflows/pr_code_changes.yaml

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,18 @@ on:
66
branches:
77
- main
88
paths:
9+
- pyproject.toml
10+
- uv.lock
11+
- modal_app/**
912
- policyengine_us_data/**
1013
- tests/**
1114
- .github/workflows/**
1215
- Makefile
1316

17+
concurrency:
18+
group: pr-code-changes-${{ github.event.pull_request.number }}
19+
cancel-in-progress: true
20+
1421
jobs:
1522
check-fork:
1623
runs-on: ubuntu-latest
@@ -30,6 +37,86 @@ jobs:
3037
fi
3138
echo "✅ PR is from the correct repository"
3239
40+
decide-test-scope:
41+
name: Decide PR test scope
42+
runs-on: ubuntu-latest
43+
needs: check-fork
44+
outputs:
45+
full_suite: ${{ steps.decide.outputs.full_suite }}
46+
reason: ${{ steps.decide.outputs.reason }}
47+
steps:
48+
- uses: actions/checkout@v4
49+
with:
50+
fetch-depth: 0
51+
52+
- id: decide
53+
env:
54+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
55+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
56+
PR_LABELS_JSON: ${{ toJson(github.event.pull_request.labels.*.name) }}
57+
run: |
58+
python - <<'PY'
59+
import fnmatch
60+
import json
61+
import os
62+
import subprocess
63+
64+
labels = set(json.loads(os.environ["PR_LABELS_JSON"]))
65+
changed_files = subprocess.check_output(
66+
[
67+
"git",
68+
"diff",
69+
"--name-only",
70+
os.environ["BASE_SHA"],
71+
os.environ["HEAD_SHA"],
72+
],
73+
text=True,
74+
).splitlines()
75+
76+
full_suite_label = "full-data-ci"
77+
critical_patterns = [
78+
"modal_app/**",
79+
"policyengine_us_data/calibration/**",
80+
"policyengine_us_data/datasets/**",
81+
"policyengine_us_data/db/**",
82+
"policyengine_us_data/storage/download_private_prerequisites.py",
83+
"policyengine_us_data/utils/loss.py",
84+
"policyengine_us_data/utils/mortgage_interest.py",
85+
"policyengine_us_data/utils/soi.py",
86+
"policyengine_us_data/utils/uprating.py",
87+
]
88+
89+
matched_files = [
90+
path
91+
for path in changed_files
92+
if any(fnmatch.fnmatch(path, pattern) for pattern in critical_patterns)
93+
]
94+
95+
if full_suite_label in labels:
96+
full_suite = True
97+
reason = f"label:{full_suite_label}"
98+
elif matched_files:
99+
full_suite = True
100+
reason = f"critical-path:{matched_files[0]}"
101+
else:
102+
full_suite = False
103+
reason = "basic-pytest-only"
104+
105+
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output:
106+
output.write(f"full_suite={'true' if full_suite else 'false'}\n")
107+
output.write(f"reason={reason}\n")
108+
109+
summary = [
110+
"### PR test scope",
111+
f"- full suite: `{'true' if full_suite else 'false'}`",
112+
f"- reason: `{reason}`",
113+
]
114+
if matched_files:
115+
summary.append(f"- first matching file: `{matched_files[0]}`")
116+
with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as out:
117+
out.write("\n".join(summary) + "\n")
118+
PY
119+
33120
check-lock-freshness:
34121
name: Check uv.lock freshness
35122
runs-on: ubuntu-latest
@@ -81,10 +168,10 @@ jobs:
81168
run: python -c "from policyengine_core.data import Dataset; print('Core import OK')"
82169

83170
Test:
84-
needs: [check-fork, Lint]
171+
needs: [check-fork, Lint, decide-test-scope]
85172
uses: ./.github/workflows/reusable_test.yaml
86173
with:
87-
full_suite: true
174+
full_suite: ${{ needs.decide-test-scope.outputs.full_suite == 'true' }}
88175
upload_data: false
89176
deploy_docs: false
90-
secrets: inherit
177+
secrets: inherit

changelog.d/cheaper-pr-ci.fixed.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Reduce unnecessary PR CI spend by canceling superseded runs and limiting
2+
the Modal-backed full data build to labeled or high-risk data-pipeline changes.

policyengine_us_data/tests/test_calibration/test_unified_calibration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def test_county_var_uses_county_values(self):
459459
person_hh_idx = np.array([0, 1, 2, 3])
460460

461461
builder = UnifiedMatrixBuilder.__new__(UnifiedMatrixBuilder)
462-
hh_vars, _ = builder._assemble_clone_values(
462+
hh_vars, _, _ = builder._assemble_clone_values(
463463
state_values,
464464
clone_states,
465465
person_hh_idx,
@@ -499,7 +499,7 @@ def test_non_county_var_uses_state_values(self):
499499
person_hh_idx = np.array([0, 1, 2, 3])
500500

501501
builder = UnifiedMatrixBuilder.__new__(UnifiedMatrixBuilder)
502-
hh_vars, _ = builder._assemble_clone_values(
502+
hh_vars, _, _ = builder._assemble_clone_values(
503503
state_values,
504504
clone_states,
505505
person_hh_idx,

policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,11 @@ def test_reform_targets_preserved(self):
212212
def test_inactive_targets_are_excluded(self):
213213
b = self._make_builder(time_period=2024)
214214
df = b._query_targets({"stratum_ids": [1], "variables": ["aca_ptc"]})
215-
baseline_rows = df[(df["variable"] == "aca_ptc") & (df["reform_id"] == 0)]
215+
baseline_rows = df[
216+
(df["variable"] == "aca_ptc")
217+
& (df["reform_id"] == 0)
218+
& (df["stratum_id"] == 1)
219+
]
216220
self.assertEqual(len(baseline_rows), 1)
217221
self.assertEqual(int(baseline_rows.iloc[0]["period"]), 2022)
218222
self.assertEqual(float(baseline_rows.iloc[0]["value"]), 10000.0)
@@ -526,6 +530,7 @@ def test_return_structure_no_takeup(self, mock_msim_cls, mock_gcv):
526530
sim=None,
527531
target_vars={"snap"},
528532
constraint_vars={"income"},
533+
reform_vars=set(),
529534
geography=geo,
530535
rerandomize_takeup=False,
531536
)
@@ -560,6 +565,7 @@ def test_fresh_sim_per_state(self, mock_msim_cls, mock_gcv):
560565
sim=None,
561566
target_vars={"snap"},
562567
constraint_vars=set(),
568+
reform_vars=set(),
563569
geography=geo,
564570
rerandomize_takeup=False,
565571
)
@@ -582,6 +588,7 @@ def test_state_fips_set_correctly(self, mock_msim_cls, mock_gcv):
582588
sim=None,
583589
target_vars={"snap"},
584590
constraint_vars=set(),
591+
reform_vars=set(),
585592
geography=geo,
586593
rerandomize_takeup=False,
587594
)
@@ -617,6 +624,7 @@ def test_takeup_vars_forced_true(self, mock_msim_cls, mock_gcv):
617624
sim=None,
618625
target_vars={"snap"},
619626
constraint_vars=set(),
627+
reform_vars=set(),
620628
geography=geo,
621629
rerandomize_takeup=True,
622630
)
@@ -664,6 +672,7 @@ def test_count_vars_skipped(self, mock_msim_cls, mock_gcv):
664672
sim=None,
665673
target_vars={"snap", "snap_count"},
666674
constraint_vars=set(),
675+
reform_vars=set(),
667676
geography=geo,
668677
rerandomize_takeup=False,
669678
)
@@ -914,6 +923,7 @@ def test_workers_gt1_creates_pool(self, mock_msim_cls, mock_gcv, mock_pool_cls):
914923
sim=None,
915924
target_vars={"snap"},
916925
constraint_vars=set(),
926+
reform_vars=set(),
917927
geography=geo,
918928
rerandomize_takeup=False,
919929
workers=2,
@@ -939,6 +949,7 @@ def test_workers_1_skips_pool(self, mock_msim_cls, mock_gcv):
939949
sim=None,
940950
target_vars={"snap"},
941951
constraint_vars=set(),
952+
reform_vars=set(),
942953
geography=geo,
943954
rerandomize_takeup=False,
944955
workers=1,

0 commit comments

Comments
 (0)