Skip to content

Commit 32be41e

Browse files
anth-volkclaude
andcommitted
Run integration tests inside Modal with phase-based matrix
Tests must run where the data lives. The previous push.yaml ran pytest on the GH runner where H5 files don't exist, causing tests to silently skip via conftest logic. Fix: run tests inside the same Modal container that built the data. data_build.py changes: - SCRIPT_TESTS mapping: which integration tests go with which build - --run-tests flag: runs pytest after build in the same container - --test flag: runs standalone integration tests on Modal with all checkpointed data restored - run_integration_test() function for tests not tied to a build step push.yaml changes: - Phase-based matrix jobs instead of sequential steps - Phase 1 (uprating, acs, irs_puf): parallel, independent - Phase 2 (cps, puf): parallel, needs phase1 - Phase 3 (extended_cps): needs phase2 - Phase 4 (enhanced_cps, stratified_cps): parallel, needs phase3 - Phase 5 (source_imputed, small_enhanced): parallel, needs phase4 - Each matrix entry: one modal run --script X --run-tests call - Remaining tests (census_cps, database_build): parallel after phase4 - Image cache no longer busted between steps (clean runner per job) - Exit codes propagate from Modal to GH Actions naturally Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 29a3a96 commit 32be41e

2 files changed

Lines changed: 244 additions & 139 deletions

File tree

.github/workflows/push.yaml

Lines changed: 143 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@ jobs:
1515
- name: Check formatting
1616
run: ruff format --check .
1717

18-
# ── Per-dataset build and test on Modal ─────────────────────
19-
build-and-test:
18+
# ── Download prerequisites ──────────────────────────────────
19+
download-prerequisites:
2020
runs-on: ubuntu-latest
2121
needs: lint
2222
if: github.event.head_commit.message != 'Update package version'
23-
timeout-minutes: 240
2423
env:
2524
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
2625
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
@@ -30,167 +29,173 @@ jobs:
3029
- uses: actions/setup-python@v5
3130
with:
3231
python-version: "3.13"
33-
- uses: astral-sh/setup-uv@v5
3432
- name: Install Modal CLI
3533
run: pip install modal
36-
- name: Install package
37-
run: uv sync --dev
38-
39-
- name: Initialize summary
40-
run: |
41-
echo "## Data Build & Integration Tests" >> $GITHUB_STEP_SUMMARY
42-
echo "" >> $GITHUB_STEP_SUMMARY
43-
echo "| Step | Status | Duration |" >> $GITHUB_STEP_SUMMARY
44-
echo "|------|--------|----------|" >> $GITHUB_STEP_SUMMARY
45-
46-
# ── Phase 1: Download prerequisites ───────────────────
47-
- name: "Build: download prerequisites"
48-
run: |
49-
START=$(date +%s)
50-
modal run modal_app/data_build.py --script download_prerequisites \
51-
--branch=${{ github.ref_name }}
52-
ELAPSED=$(( $(date +%s) - START ))
53-
echo "| download_prerequisites | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
54-
55-
# ── Phase 1: Independent datasets (sequential) ────────
56-
- name: "Build: uprating"
57-
run: |
58-
START=$(date +%s)
59-
modal run modal_app/data_build.py --script uprating \
60-
--branch=${{ github.ref_name }}
61-
ELAPSED=$(( $(date +%s) - START ))
62-
echo "| uprating | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
63-
64-
- name: "Build: acs"
65-
run: |
66-
START=$(date +%s)
67-
modal run modal_app/data_build.py --script acs \
68-
--branch=${{ github.ref_name }}
69-
ELAPSED=$(( $(date +%s) - START ))
70-
echo "| acs | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
71-
72-
- name: "Test: acs"
73-
run: |
74-
uv run pytest policyengine_us_data/tests/integration/test_acs.py -v
75-
echo "| test_acs | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
76-
77-
- name: "Build: irs_puf"
34+
- name: Download prerequisites on Modal
7835
run: |
79-
START=$(date +%s)
80-
modal run modal_app/data_build.py --script irs_puf \
36+
modal run modal_app/data_build.py \
37+
--script download_prerequisites \
8138
--branch=${{ github.ref_name }}
82-
ELAPSED=$(( $(date +%s) - START ))
83-
echo "| irs_puf | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
8439
85-
# ── Phase 2: CPS and PUF (depend on Phase 1) ─────────
86-
- name: "Build: cps"
40+
# ── Phase 1: Independent datasets (parallel) ───────────────
41+
phase1:
42+
needs: download-prerequisites
43+
runs-on: ubuntu-latest
44+
strategy:
45+
fail-fast: true
46+
matrix:
47+
dataset: [uprating, acs, irs_puf]
48+
env:
49+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
50+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
51+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
52+
steps:
53+
- uses: actions/checkout@v4
54+
- uses: actions/setup-python@v5
55+
with:
56+
python-version: "3.13"
57+
- name: Install Modal CLI
58+
run: pip install modal
59+
- name: "Build + test: ${{ matrix.dataset }}"
8760
run: |
88-
START=$(date +%s)
89-
modal run modal_app/data_build.py --script cps \
61+
modal run modal_app/data_build.py \
62+
--script ${{ matrix.dataset }} \
63+
--run-tests \
9064
--branch=${{ github.ref_name }}
91-
ELAPSED=$(( $(date +%s) - START ))
92-
echo "| cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
93-
94-
- name: "Test: cps"
95-
run: |
96-
uv run pytest policyengine_us_data/tests/integration/test_cps.py -v
97-
echo "| test_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
9865
99-
- name: "Build: puf"
66+
# ── Phase 2: CPS + PUF (depend on Phase 1) ─────────────────
67+
phase2:
68+
needs: phase1
69+
runs-on: ubuntu-latest
70+
strategy:
71+
fail-fast: true
72+
matrix:
73+
dataset: [cps, puf]
74+
env:
75+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
76+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
77+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
78+
steps:
79+
- uses: actions/checkout@v4
80+
- uses: actions/setup-python@v5
81+
with:
82+
python-version: "3.13"
83+
- name: Install Modal CLI
84+
run: pip install modal
85+
- name: "Build + test: ${{ matrix.dataset }}"
10086
run: |
101-
START=$(date +%s)
102-
modal run modal_app/data_build.py --script puf \
87+
modal run modal_app/data_build.py \
88+
--script ${{ matrix.dataset }} \
89+
--run-tests \
10390
--branch=${{ github.ref_name }}
104-
ELAPSED=$(( $(date +%s) - START ))
105-
echo "| puf | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
10691
107-
# ── Phase 3: Extended CPS (depends on CPS + PUF) ─────
108-
- name: "Build: extended_cps"
92+
# ── Phase 3: Extended CPS (depends on Phase 2) ─────────────
93+
phase3:
94+
needs: phase2
95+
runs-on: ubuntu-latest
96+
env:
97+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
98+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
99+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
100+
steps:
101+
- uses: actions/checkout@v4
102+
- uses: actions/setup-python@v5
103+
with:
104+
python-version: "3.13"
105+
- name: Install Modal CLI
106+
run: pip install modal
107+
- name: "Build + test: extended_cps"
109108
run: |
110-
START=$(date +%s)
111-
modal run modal_app/data_build.py --script extended_cps \
109+
modal run modal_app/data_build.py \
110+
--script extended_cps \
111+
--run-tests \
112112
--branch=${{ github.ref_name }}
113-
ELAPSED=$(( $(date +%s) - START ))
114-
echo "| extended_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
115113
116-
- name: "Test: extended_cps"
117-
run: |
118-
uv run pytest policyengine_us_data/tests/integration/test_extended_cps.py -v
119-
echo "| test_extended_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
120-
121-
# ── Phase 4: Enhanced + Stratified CPS ────────────────
122-
- name: "Build: enhanced_cps"
114+
# ── Phase 4: Enhanced + Stratified CPS (depend on Phase 3) ─
115+
phase4:
116+
needs: phase3
117+
runs-on: ubuntu-latest
118+
strategy:
119+
fail-fast: true
120+
matrix:
121+
dataset: [enhanced_cps, stratified_cps]
122+
env:
123+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
124+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
125+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
126+
steps:
127+
- uses: actions/checkout@v4
128+
- uses: actions/setup-python@v5
129+
with:
130+
python-version: "3.13"
131+
- name: Install Modal CLI
132+
run: pip install modal
133+
- name: "Build + test: ${{ matrix.dataset }}"
123134
run: |
124-
START=$(date +%s)
125-
modal run modal_app/data_build.py --script enhanced_cps \
135+
modal run modal_app/data_build.py \
136+
--script ${{ matrix.dataset }} \
137+
--run-tests \
126138
--branch=${{ github.ref_name }}
127-
ELAPSED=$(( $(date +%s) - START ))
128-
echo "| enhanced_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
129139
130-
- name: "Test: enhanced_cps"
131-
run: |
132-
uv run pytest policyengine_us_data/tests/integration/test_enhanced_cps.py -v
133-
echo "| test_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
134-
135-
- name: "Build: stratified_cps"
140+
# ── Phase 5: Source imputed + Small enhanced (depend on 4) ──
141+
phase5:
142+
needs: phase4
143+
runs-on: ubuntu-latest
144+
strategy:
145+
fail-fast: true
146+
matrix:
147+
dataset: [source_imputed_cps, small_enhanced_cps]
148+
env:
149+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
150+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
151+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
152+
steps:
153+
- uses: actions/checkout@v4
154+
- uses: actions/setup-python@v5
155+
with:
156+
python-version: "3.13"
157+
- name: Install Modal CLI
158+
run: pip install modal
159+
- name: "Build + test: ${{ matrix.dataset }}"
136160
run: |
137-
START=$(date +%s)
138-
modal run modal_app/data_build.py --script stratified_cps \
161+
modal run modal_app/data_build.py \
162+
--script ${{ matrix.dataset }} \
163+
--run-tests \
139164
--branch=${{ github.ref_name }}
140-
ELAPSED=$(( $(date +%s) - START ))
141-
echo "| stratified_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
142165
143-
# ── Phase 5: Source imputed + Small enhanced CPS ──────
144-
- name: "Build: source_imputed_cps"
166+
# ── Remaining integration tests (depend on Phase 4) ─────────
167+
remaining-tests:
168+
needs: phase4
169+
runs-on: ubuntu-latest
170+
strategy:
171+
fail-fast: false
172+
matrix:
173+
test:
174+
- policyengine_us_data/tests/integration/test_census_cps.py
175+
- policyengine_us_data/tests/integration/test_database_build.py
176+
env:
177+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
178+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
179+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
180+
steps:
181+
- uses: actions/checkout@v4
182+
- uses: actions/setup-python@v5
183+
with:
184+
python-version: "3.13"
185+
- name: Install Modal CLI
186+
run: pip install modal
187+
- name: "Test: ${{ matrix.test }}"
145188
run: |
146-
START=$(date +%s)
147-
modal run modal_app/data_build.py --script source_imputed_cps \
189+
modal run modal_app/data_build.py \
190+
--script download_prerequisites \
148191
--branch=${{ github.ref_name }}
149-
ELAPSED=$(( $(date +%s) - START ))
150-
echo "| source_imputed_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
151-
152-
- name: "Test: source_imputed_cps"
153-
run: |
154-
uv run pytest policyengine_us_data/tests/integration/test_source_imputed_cps_masking.py policyengine_us_data/tests/integration/test_source_imputed_cps_consistency.py -v
155-
echo "| test_source_imputed_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
156-
157-
- name: "Build: small_enhanced_cps"
158-
run: |
159-
START=$(date +%s)
160-
modal run modal_app/data_build.py --script small_enhanced_cps \
192+
modal run modal_app/data_build.py \
193+
--test ${{ matrix.test }} \
161194
--branch=${{ github.ref_name }}
162-
ELAPSED=$(( $(date +%s) - START ))
163-
echo "| small_enhanced_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
164-
165-
- name: "Test: small_enhanced_cps"
166-
run: |
167-
uv run pytest policyengine_us_data/tests/integration/test_small_enhanced_cps.py -v
168-
echo "| test_small_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
169-
170-
# ── Remaining integration tests ───────────────────────
171-
- name: "Test: sparse_enhanced_cps"
172-
run: |
173-
uv run pytest policyengine_us_data/tests/integration/test_sparse_enhanced_cps.py -v
174-
echo "| test_sparse_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
175-
176-
- name: "Test: sipp_assets"
177-
run: |
178-
uv run pytest policyengine_us_data/tests/integration/test_sipp_assets.py -v
179-
echo "| test_sipp_assets | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
180-
181-
- name: "Test: census_cps"
182-
run: |
183-
uv run pytest policyengine_us_data/tests/integration/test_census_cps.py -v
184-
echo "| test_census_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
185-
186-
- name: "Test: database_build"
187-
run: |
188-
uv run pytest policyengine_us_data/tests/integration/test_database_build.py -v
189-
echo "| test_database_build | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
190195
191196
# ── Manual approval gate ────────────────────────────────────
192197
approval-gate:
193-
needs: build-and-test
198+
needs: [phase5, remaining-tests]
194199
runs-on: ubuntu-latest
195200
environment: pipeline-approval
196201
steps:

0 commit comments

Comments
 (0)