Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 90 additions & 3 deletions .github/workflows/pr_code_changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@ on:
branches:
- main
paths:
- pyproject.toml
- uv.lock
- modal_app/**
- policyengine_us_data/**
- tests/**
- .github/workflows/**
- Makefile

concurrency:
group: pr-code-changes-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
check-fork:
runs-on: ubuntu-latest
Expand All @@ -30,6 +37,86 @@ jobs:
fi
echo "✅ PR is from the correct repository"

decide-test-scope:
name: Decide PR test scope
runs-on: ubuntu-latest
needs: check-fork
outputs:
full_suite: ${{ steps.decide.outputs.full_suite }}
reason: ${{ steps.decide.outputs.reason }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- id: decide
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
PR_LABELS_JSON: ${{ toJson(github.event.pull_request.labels.*.name) }}
run: |
python - <<'PY'
import fnmatch
import json
import os
import subprocess

labels = set(json.loads(os.environ["PR_LABELS_JSON"]))
changed_files = subprocess.check_output(
[
"git",
"diff",
"--name-only",
os.environ["BASE_SHA"],
os.environ["HEAD_SHA"],
],
text=True,
).splitlines()

full_suite_label = "full-data-ci"
critical_patterns = [
"modal_app/**",
"policyengine_us_data/calibration/**",
"policyengine_us_data/datasets/**",
"policyengine_us_data/db/**",
"policyengine_us_data/storage/download_private_prerequisites.py",
"policyengine_us_data/utils/loss.py",
"policyengine_us_data/utils/mortgage_interest.py",
"policyengine_us_data/utils/soi.py",
"policyengine_us_data/utils/uprating.py",
]

matched_files = [
path
for path in changed_files
if any(fnmatch.fnmatch(path, pattern) for pattern in critical_patterns)
]

if full_suite_label in labels:
full_suite = True
reason = f"label:{full_suite_label}"
elif matched_files:
full_suite = True
reason = f"critical-path:{matched_files[0]}"
else:
full_suite = False
reason = "basic-pytest-only"

with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output:
output.write(f"full_suite={'true' if full_suite else 'false'}\n")
output.write(f"reason={reason}\n")

summary = [
"### PR test scope",
f"- full suite: `{'true' if full_suite else 'false'}`",
f"- reason: `{reason}`",
]
if matched_files:
summary.append(f"- first matching file: `{matched_files[0]}`")
with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as out:
out.write("\n".join(summary) + "\n")
PY

check-lock-freshness:
name: Check uv.lock freshness
runs-on: ubuntu-latest
Expand Down Expand Up @@ -81,10 +168,10 @@ jobs:
run: python -c "from policyengine_core.data import Dataset; print('Core import OK')"

Test:
needs: [check-fork, Lint]
needs: [check-fork, Lint, decide-test-scope]
uses: ./.github/workflows/reusable_test.yaml
with:
full_suite: true
full_suite: ${{ needs.decide-test-scope.outputs.full_suite == 'true' }}
upload_data: false
deploy_docs: false
secrets: inherit
secrets: inherit
2 changes: 2 additions & 0 deletions changelog.d/cheaper-pr-ci.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Reduce unnecessary PR CI spend by canceling superseded runs and limiting
the Modal-backed full data build to labeled or high-risk data-pipeline changes.
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def test_county_var_uses_county_values(self):
person_hh_idx = np.array([0, 1, 2, 3])

builder = UnifiedMatrixBuilder.__new__(UnifiedMatrixBuilder)
hh_vars, _ = builder._assemble_clone_values(
hh_vars, _, _ = builder._assemble_clone_values(
state_values,
clone_states,
person_hh_idx,
Expand Down Expand Up @@ -499,7 +499,7 @@ def test_non_county_var_uses_state_values(self):
person_hh_idx = np.array([0, 1, 2, 3])

builder = UnifiedMatrixBuilder.__new__(UnifiedMatrixBuilder)
hh_vars, _ = builder._assemble_clone_values(
hh_vars, _, _ = builder._assemble_clone_values(
state_values,
clone_states,
person_hh_idx,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,11 @@ def test_reform_targets_preserved(self):
def test_inactive_targets_are_excluded(self):
b = self._make_builder(time_period=2024)
df = b._query_targets({"stratum_ids": [1], "variables": ["aca_ptc"]})
baseline_rows = df[(df["variable"] == "aca_ptc") & (df["reform_id"] == 0)]
baseline_rows = df[
(df["variable"] == "aca_ptc")
& (df["reform_id"] == 0)
& (df["stratum_id"] == 1)
]
self.assertEqual(len(baseline_rows), 1)
self.assertEqual(int(baseline_rows.iloc[0]["period"]), 2022)
self.assertEqual(float(baseline_rows.iloc[0]["value"]), 10000.0)
Expand Down Expand Up @@ -526,6 +530,7 @@ def test_return_structure_no_takeup(self, mock_msim_cls, mock_gcv):
sim=None,
target_vars={"snap"},
constraint_vars={"income"},
reform_vars=set(),
geography=geo,
rerandomize_takeup=False,
)
Expand Down Expand Up @@ -560,6 +565,7 @@ def test_fresh_sim_per_state(self, mock_msim_cls, mock_gcv):
sim=None,
target_vars={"snap"},
constraint_vars=set(),
reform_vars=set(),
geography=geo,
rerandomize_takeup=False,
)
Expand All @@ -582,6 +588,7 @@ def test_state_fips_set_correctly(self, mock_msim_cls, mock_gcv):
sim=None,
target_vars={"snap"},
constraint_vars=set(),
reform_vars=set(),
geography=geo,
rerandomize_takeup=False,
)
Expand Down Expand Up @@ -617,6 +624,7 @@ def test_takeup_vars_forced_true(self, mock_msim_cls, mock_gcv):
sim=None,
target_vars={"snap"},
constraint_vars=set(),
reform_vars=set(),
geography=geo,
rerandomize_takeup=True,
)
Expand Down Expand Up @@ -664,6 +672,7 @@ def test_count_vars_skipped(self, mock_msim_cls, mock_gcv):
sim=None,
target_vars={"snap", "snap_count"},
constraint_vars=set(),
reform_vars=set(),
geography=geo,
rerandomize_takeup=False,
)
Expand Down Expand Up @@ -914,6 +923,7 @@ def test_workers_gt1_creates_pool(self, mock_msim_cls, mock_gcv, mock_pool_cls):
sim=None,
target_vars={"snap"},
constraint_vars=set(),
reform_vars=set(),
geography=geo,
rerandomize_takeup=False,
workers=2,
Expand All @@ -939,6 +949,7 @@ def test_workers_1_skips_pool(self, mock_msim_cls, mock_gcv):
sim=None,
target_vars={"snap"},
constraint_vars=set(),
reform_vars=set(),
geography=geo,
rerandomize_takeup=False,
workers=1,
Expand Down
Loading