diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index cf3356941..bc10cc6f1 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -6,11 +6,18 @@ on: branches: - main paths: + - pyproject.toml + - uv.lock + - modal_app/** - policyengine_us_data/** - tests/** - .github/workflows/** - Makefile +concurrency: + group: pr-code-changes-${{ github.event.pull_request.number }} + cancel-in-progress: true + jobs: check-fork: runs-on: ubuntu-latest @@ -30,6 +37,86 @@ jobs: fi echo "✅ PR is from the correct repository" + decide-test-scope: + name: Decide PR test scope + runs-on: ubuntu-latest + needs: check-fork + outputs: + full_suite: ${{ steps.decide.outputs.full_suite }} + reason: ${{ steps.decide.outputs.reason }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - id: decide + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PR_LABELS_JSON: ${{ toJson(github.event.pull_request.labels.*.name) }} + run: | + python - <<'PY' + import fnmatch + import json + import os + import subprocess + + labels = set(json.loads(os.environ["PR_LABELS_JSON"])) + changed_files = subprocess.check_output( + [ + "git", + "diff", + "--name-only", + os.environ["BASE_SHA"], + os.environ["HEAD_SHA"], + ], + text=True, + ).splitlines() + + full_suite_label = "full-data-ci" + critical_patterns = [ + "modal_app/**", + "policyengine_us_data/calibration/**", + "policyengine_us_data/datasets/**", + "policyengine_us_data/db/**", + "policyengine_us_data/storage/download_private_prerequisites.py", + "policyengine_us_data/utils/loss.py", + "policyengine_us_data/utils/mortgage_interest.py", + "policyengine_us_data/utils/soi.py", + "policyengine_us_data/utils/uprating.py", + ] + + matched_files = [ + path + for path in changed_files + if any(fnmatch.fnmatch(path, pattern) for pattern in critical_patterns) + ] + + if full_suite_label in labels: + full_suite = True + reason = f"label:{full_suite_label}" + elif matched_files: + full_suite = True + reason = f"critical-path:{matched_files[0]}" + else: + full_suite = False + reason = "basic-pytest-only" + + with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output: + output.write(f"full_suite={'true' if full_suite else 'false'}\n") + output.write(f"reason={reason}\n") + + summary = [ + "### PR test scope", + f"- full suite: `{'true' if full_suite else 'false'}`", + f"- reason: `{reason}`", + ] + if matched_files: + summary.append(f"- first matching file: `{matched_files[0]}`") + with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as out: + out.write("\n".join(summary) + "\n") + PY + check-lock-freshness: name: Check uv.lock freshness runs-on: ubuntu-latest @@ -81,10 +168,10 @@ jobs: run: python -c "from policyengine_core.data import Dataset; print('Core import OK')" Test: - needs: [check-fork, Lint] + needs: [check-fork, Lint, decide-test-scope] uses: ./.github/workflows/reusable_test.yaml with: - full_suite: true + full_suite: ${{ needs.decide-test-scope.outputs.full_suite == 'true' }} upload_data: false deploy_docs: false - secrets: inherit \ No newline at end of file + secrets: inherit diff --git a/changelog.d/cheaper-pr-ci.fixed.md b/changelog.d/cheaper-pr-ci.fixed.md new file mode 100644 index 000000000..c39419d48 --- /dev/null +++ b/changelog.d/cheaper-pr-ci.fixed.md @@ -0,0 +1,2 @@ +Reduce unnecessary PR CI spend by canceling superseded runs and limiting +the Modal-backed full data build to labeled or high-risk data-pipeline changes. diff --git a/policyengine_us_data/tests/test_calibration/test_unified_calibration.py b/policyengine_us_data/tests/test_calibration/test_unified_calibration.py index f92d02db0..abb8f0a40 100644 --- a/policyengine_us_data/tests/test_calibration/test_unified_calibration.py +++ b/policyengine_us_data/tests/test_calibration/test_unified_calibration.py @@ -459,7 +459,7 @@ def test_county_var_uses_county_values(self): person_hh_idx = np.array([0, 1, 2, 3]) builder = UnifiedMatrixBuilder.__new__(UnifiedMatrixBuilder) - hh_vars, _ = builder._assemble_clone_values( + hh_vars, _, _ = builder._assemble_clone_values( state_values, clone_states, person_hh_idx, @@ -499,7 +499,7 @@ def test_non_county_var_uses_state_values(self): person_hh_idx = np.array([0, 1, 2, 3]) builder = UnifiedMatrixBuilder.__new__(UnifiedMatrixBuilder) - hh_vars, _ = builder._assemble_clone_values( + hh_vars, _, _ = builder._assemble_clone_values( state_values, clone_states, person_hh_idx, diff --git a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py index da0f49882..b7e964543 100644 --- a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py +++ b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py @@ -212,7 +212,11 @@ def test_reform_targets_preserved(self): def test_inactive_targets_are_excluded(self): b = self._make_builder(time_period=2024) df = b._query_targets({"stratum_ids": [1], "variables": ["aca_ptc"]}) - baseline_rows = df[(df["variable"] == "aca_ptc") & (df["reform_id"] == 0)] + baseline_rows = df[ + (df["variable"] == "aca_ptc") + & (df["reform_id"] == 0) + & (df["stratum_id"] == 1) + ] self.assertEqual(len(baseline_rows), 1) self.assertEqual(int(baseline_rows.iloc[0]["period"]), 2022) self.assertEqual(float(baseline_rows.iloc[0]["value"]), 10000.0) @@ -526,6 +530,7 @@ def test_return_structure_no_takeup(self, mock_msim_cls, mock_gcv): sim=None, target_vars={"snap"}, constraint_vars={"income"}, + reform_vars=set(), geography=geo, rerandomize_takeup=False, ) @@ -560,6 +565,7 @@ def test_fresh_sim_per_state(self, mock_msim_cls, mock_gcv): sim=None, target_vars={"snap"}, constraint_vars=set(), + reform_vars=set(), geography=geo, rerandomize_takeup=False, ) @@ -582,6 +588,7 @@ def test_state_fips_set_correctly(self, mock_msim_cls, mock_gcv): sim=None, target_vars={"snap"}, constraint_vars=set(), + reform_vars=set(), geography=geo, rerandomize_takeup=False, ) @@ -617,6 +624,7 @@ def test_takeup_vars_forced_true(self, mock_msim_cls, mock_gcv): sim=None, target_vars={"snap"}, constraint_vars=set(), + reform_vars=set(), geography=geo, rerandomize_takeup=True, ) @@ -664,6 +672,7 @@ def test_count_vars_skipped(self, mock_msim_cls, mock_gcv): sim=None, target_vars={"snap", "snap_count"}, constraint_vars=set(), + reform_vars=set(), geography=geo, rerandomize_takeup=False, ) @@ -914,6 +923,7 @@ def test_workers_gt1_creates_pool(self, mock_msim_cls, mock_gcv, mock_pool_cls): sim=None, target_vars={"snap"}, constraint_vars=set(), + reform_vars=set(), geography=geo, rerandomize_takeup=False, workers=2, @@ -939,6 +949,7 @@ def test_workers_1_skips_pool(self, mock_msim_cls, mock_gcv): sim=None, target_vars={"snap"}, constraint_vars=set(), + reform_vars=set(), geography=geo, rerandomize_takeup=False, workers=1,