Skip to content

Commit 02eb9fa

Browse files
anth-volkclaude
andcommitted
[TEST] Add push workflow for dry-run testing
DO NOT MERGE - test-only PR to validate push.yaml workflow. Contains deliberate lint failure to prevent accidental merge. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 22f922e commit 02eb9fa

4 files changed

Lines changed: 336 additions & 14 deletions

File tree

.github/workflows/push.yaml

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
name: "[TEST] Push workflow dry run"
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
7+
jobs:
8+
# ── Lint ────────────────────────────────────────────────────
9+
lint:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
- name: Install ruff
14+
run: pip install ruff>=0.9.0
15+
- name: Check formatting
16+
run: ruff format --check .
17+
18+
# ── Per-dataset build and test on Modal ─────────────────────
19+
build-and-test:
20+
runs-on: ubuntu-latest
21+
needs: lint
22+
if: github.event.head_commit.message != 'Update package version'
23+
timeout-minutes: 240
24+
env:
25+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
26+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
27+
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
28+
steps:
29+
- uses: actions/checkout@v4
30+
- uses: actions/setup-python@v5
31+
with:
32+
python-version: "3.13"
33+
- uses: astral-sh/setup-uv@v5
34+
- name: Install Modal CLI
35+
run: pip install modal
36+
- name: Install package
37+
run: uv sync --dev
38+
39+
- name: Initialize summary
40+
run: |
41+
echo "## Data Build & Integration Tests" >> $GITHUB_STEP_SUMMARY
42+
echo "" >> $GITHUB_STEP_SUMMARY
43+
echo "| Step | Status | Duration |" >> $GITHUB_STEP_SUMMARY
44+
echo "|------|--------|----------|" >> $GITHUB_STEP_SUMMARY
45+
46+
# ── Phase 1: Download prerequisites ───────────────────
47+
- name: "Build: download prerequisites"
48+
run: |
49+
START=$(date +%s)
50+
modal run modal_app/data_build.py --script download_prerequisites \
51+
--branch=${{ github.ref_name }}
52+
ELAPSED=$(( $(date +%s) - START ))
53+
echo "| download_prerequisites | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
54+
55+
# ── Phase 1: Independent datasets (sequential) ────────
56+
- name: "Build: uprating"
57+
run: |
58+
START=$(date +%s)
59+
modal run modal_app/data_build.py --script uprating \
60+
--branch=${{ github.ref_name }}
61+
ELAPSED=$(( $(date +%s) - START ))
62+
echo "| uprating | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
63+
64+
- name: "Build: acs"
65+
run: |
66+
START=$(date +%s)
67+
modal run modal_app/data_build.py --script acs \
68+
--branch=${{ github.ref_name }}
69+
ELAPSED=$(( $(date +%s) - START ))
70+
echo "| acs | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
71+
72+
- name: "Test: acs"
73+
run: |
74+
uv run pytest policyengine_us_data/tests/integration/test_acs.py -v
75+
echo "| test_acs | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
76+
77+
- name: "Build: irs_puf"
78+
run: |
79+
START=$(date +%s)
80+
modal run modal_app/data_build.py --script irs_puf \
81+
--branch=${{ github.ref_name }}
82+
ELAPSED=$(( $(date +%s) - START ))
83+
echo "| irs_puf | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
84+
85+
# ── Phase 2: CPS and PUF (depend on Phase 1) ─────────
86+
- name: "Build: cps"
87+
run: |
88+
START=$(date +%s)
89+
modal run modal_app/data_build.py --script cps \
90+
--branch=${{ github.ref_name }}
91+
ELAPSED=$(( $(date +%s) - START ))
92+
echo "| cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
93+
94+
- name: "Test: cps"
95+
run: |
96+
uv run pytest policyengine_us_data/tests/integration/test_cps.py -v
97+
echo "| test_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
98+
99+
- name: "Build: puf"
100+
run: |
101+
START=$(date +%s)
102+
modal run modal_app/data_build.py --script puf \
103+
--branch=${{ github.ref_name }}
104+
ELAPSED=$(( $(date +%s) - START ))
105+
echo "| puf | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
106+
107+
# ── Phase 3: Extended CPS (depends on CPS + PUF) ─────
108+
- name: "Build: extended_cps"
109+
run: |
110+
START=$(date +%s)
111+
modal run modal_app/data_build.py --script extended_cps \
112+
--branch=${{ github.ref_name }}
113+
ELAPSED=$(( $(date +%s) - START ))
114+
echo "| extended_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
115+
116+
- name: "Test: extended_cps"
117+
run: |
118+
uv run pytest policyengine_us_data/tests/integration/test_extended_cps.py -v
119+
echo "| test_extended_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
120+
121+
# ── Phase 4: Enhanced + Stratified CPS ────────────────
122+
- name: "Build: enhanced_cps"
123+
run: |
124+
START=$(date +%s)
125+
modal run modal_app/data_build.py --script enhanced_cps \
126+
--branch=${{ github.ref_name }}
127+
ELAPSED=$(( $(date +%s) - START ))
128+
echo "| enhanced_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
129+
130+
- name: "Test: enhanced_cps"
131+
run: |
132+
uv run pytest policyengine_us_data/tests/integration/test_enhanced_cps.py -v
133+
echo "| test_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
134+
135+
- name: "Build: stratified_cps"
136+
run: |
137+
START=$(date +%s)
138+
modal run modal_app/data_build.py --script stratified_cps \
139+
--branch=${{ github.ref_name }}
140+
ELAPSED=$(( $(date +%s) - START ))
141+
echo "| stratified_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
142+
143+
# ── Phase 5: Source imputed + Small enhanced CPS ──────
144+
- name: "Build: source_imputed_cps"
145+
run: |
146+
START=$(date +%s)
147+
modal run modal_app/data_build.py --script source_imputed_cps \
148+
--branch=${{ github.ref_name }}
149+
ELAPSED=$(( $(date +%s) - START ))
150+
echo "| source_imputed_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
151+
152+
- name: "Test: source_imputed_cps"
153+
run: |
154+
uv run pytest policyengine_us_data/tests/integration/test_source_imputed_cps_masking.py policyengine_us_data/tests/integration/test_source_imputed_cps_consistency.py -v
155+
echo "| test_source_imputed_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
156+
157+
- name: "Build: small_enhanced_cps"
158+
run: |
159+
START=$(date +%s)
160+
modal run modal_app/data_build.py --script small_enhanced_cps \
161+
--branch=${{ github.ref_name }}
162+
ELAPSED=$(( $(date +%s) - START ))
163+
echo "| small_enhanced_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
164+
165+
- name: "Test: small_enhanced_cps"
166+
run: |
167+
uv run pytest policyengine_us_data/tests/integration/test_small_enhanced_cps.py -v
168+
echo "| test_small_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
169+
170+
# ── Remaining integration tests ───────────────────────
171+
- name: "Test: sparse_enhanced_cps"
172+
run: |
173+
uv run pytest policyengine_us_data/tests/integration/test_sparse_enhanced_cps.py -v
174+
echo "| test_sparse_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
175+
176+
- name: "Test: sipp_assets"
177+
run: |
178+
uv run pytest policyengine_us_data/tests/integration/test_sipp_assets.py -v
179+
echo "| test_sipp_assets | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
180+
181+
- name: "Test: census_cps"
182+
run: |
183+
uv run pytest policyengine_us_data/tests/integration/test_census_cps.py -v
184+
echo "| test_census_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
185+
186+
- name: "Test: database_build"
187+
run: |
188+
uv run pytest policyengine_us_data/tests/integration/test_database_build.py -v
189+
echo "| test_database_build | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
190+
191+
# ── Manual approval gate ────────────────────────────────────
192+
approval-gate:
193+
needs: build-and-test
194+
runs-on: ubuntu-latest
195+
environment: pipeline-approval
196+
steps:
197+
- run: echo "Pipeline approved. Dispatching H5 build."
198+
199+
# ── Dispatch pipeline ───────────────────────────────────────
200+
trigger-pipeline:
201+
needs: approval-gate
202+
runs-on: ubuntu-latest
203+
steps:
204+
- name: Trigger pipeline workflow
205+
uses: actions/github-script@v7
206+
with:
207+
github-token: ${{ secrets.GITHUB_TOKEN }}
208+
script: |
209+
await github.rest.actions.createWorkflowDispatch({
210+
owner: context.repo.owner,
211+
repo: context.repo.repo,
212+
workflow_id: 'pipeline.yaml',
213+
ref: 'main',
214+
inputs: { scope: 'all' }
215+
})
216+
console.log('Pipeline dispatched with scope=all')
217+
218+
# ── PyPI publish (version bump commits only) ────────────────
219+
publish:
220+
runs-on: ubuntu-latest
221+
needs: lint
222+
if: github.event.head_commit.message == 'Update package version'
223+
steps:
224+
- uses: actions/checkout@v4
225+
- uses: actions/setup-python@v5
226+
with:
227+
python-version: "3.13"
228+
- uses: astral-sh/setup-uv@v5
229+
- name: Install package
230+
run: uv sync --dev
231+
- name: Build package
232+
run: uv run python -m build
233+
- name: Publish to PyPI
234+
uses: pypa/gh-action-pypi-publish@release/v1
235+
with:
236+
user: __token__
237+
password: ${{ secrets.PYPI }}
238+
skip-existing: true
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Test-only PR for push workflow dry run. Do not merge.

modal_app/data_build.py

Lines changed: 96 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,25 @@
7979

8080
# Test modules to run individually for checkpoint tracking
8181
TEST_MODULES = [
82-
"policyengine_us_data/tests/test_import.py",
83-
"policyengine_us_data/tests/test_database.py",
84-
"policyengine_us_data/tests/test_pandas3_compatibility.py",
85-
"policyengine_us_data/tests/test_datasets/",
86-
"policyengine_us_data/tests/test_calibration/",
82+
"policyengine_us_data/tests/unit/",
83+
"policyengine_us_data/tests/integration/",
8784
]
8885

86+
# Short names for --script mode (maps to SCRIPT_OUTPUTS keys)
87+
SCRIPT_SHORT_NAMES = {
88+
"download_prerequisites": "policyengine_us_data/storage/download_private_prerequisites.py",
89+
"uprating": "policyengine_us_data/utils/uprating.py",
90+
"acs": "policyengine_us_data/datasets/acs/acs.py",
91+
"irs_puf": "policyengine_us_data/datasets/puf/irs_puf.py",
92+
"cps": "policyengine_us_data/datasets/cps/cps.py",
93+
"puf": "policyengine_us_data/datasets/puf/puf.py",
94+
"extended_cps": "policyengine_us_data/datasets/cps/extended_cps.py",
95+
"enhanced_cps": "policyengine_us_data/datasets/cps/enhanced_cps.py",
96+
"stratified_cps": "policyengine_us_data/calibration/create_stratified_cps.py",
97+
"source_imputed_cps": "policyengine_us_data/calibration/create_source_imputed_cps.py",
98+
"small_enhanced_cps": "policyengine_us_data/datasets/cps/small_enhanced_cps.py",
99+
}
100+
89101

90102
def setup_gcp_credentials():
91103
"""Write GCP credentials JSON to a temp file for google.auth.default()."""
@@ -654,6 +666,68 @@ def build_datasets(
654666
return "Data build completed successfully"
655667

656668

669+
@app.function(
670+
image=image,
671+
secrets=[hf_secret, gcp_secret],
672+
volumes={
673+
VOLUME_MOUNT: checkpoint_volume,
674+
PIPELINE_MOUNT: pipeline_volume,
675+
},
676+
memory=32768,
677+
cpu=8.0,
678+
timeout=14400,
679+
nonpreemptible=True,
680+
)
681+
def run_single_script(
682+
script_name: str,
683+
branch: str = "main",
684+
) -> str:
685+
"""Run a single dataset build script with checkpointing.
686+
687+
Args:
688+
script_name: Short name (e.g. 'cps') or full path to the script.
689+
branch: Git branch for checkpoint scoping.
690+
691+
Returns:
692+
Status message.
693+
"""
694+
setup_gcp_credentials()
695+
os.chdir("/root/policyengine-us-data")
696+
697+
# Resolve short name to full path
698+
script_path = SCRIPT_SHORT_NAMES.get(script_name, script_name)
699+
700+
# Handle download_prerequisites specially (no SCRIPT_OUTPUTS entry)
701+
if script_name == "download_prerequisites":
702+
run_script(script_path)
703+
checkpoint_volume.commit()
704+
return f"Completed {script_name}"
705+
706+
output_files = SCRIPT_OUTPUTS.get(script_path)
707+
if output_files is None:
708+
raise ValueError(
709+
f"Unknown script: {script_name}. "
710+
f"Valid names: {', '.join(SCRIPT_SHORT_NAMES.keys())}"
711+
)
712+
713+
# Restore any existing checkpoints for dependencies
714+
for dep_path, dep_outputs in SCRIPT_OUTPUTS.items():
715+
if dep_path == script_path:
716+
continue
717+
if isinstance(dep_outputs, str):
718+
dep_outputs = [dep_outputs]
719+
for dep_output in dep_outputs:
720+
restore_from_checkpoint(branch, dep_output)
721+
722+
run_script_with_checkpoint(
723+
script_path,
724+
output_files,
725+
branch,
726+
checkpoint_volume,
727+
)
728+
return f"Completed {script_name}"
729+
730+
657731
@app.local_entrypoint()
658732
def main(
659733
upload: bool = False,
@@ -662,13 +736,21 @@ def main(
662736
clear_checkpoints: bool = False,
663737
skip_tests: bool = False,
664738
skip_enhanced_cps: bool = False,
739+
script: str = "",
665740
):
666-
result = build_datasets.remote(
667-
upload=upload,
668-
branch=branch,
669-
sequential=sequential,
670-
clear_checkpoints=clear_checkpoints,
671-
skip_tests=skip_tests,
672-
skip_enhanced_cps=skip_enhanced_cps,
673-
)
674-
print(result)
741+
if script:
742+
result = run_single_script.remote(
743+
script_name=script,
744+
branch=branch,
745+
)
746+
print(result)
747+
else:
748+
result = build_datasets.remote(
749+
upload=upload,
750+
branch=branch,
751+
sequential=sequential,
752+
clear_checkpoints=clear_checkpoints,
753+
skip_tests=skip_tests,
754+
skip_enhanced_cps=skip_enhanced_cps,
755+
)
756+
print(result)

policyengine_us_data/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .datasets import *
22
from .geography import ZIP_CODE_DATASET
3+
DO_NOT_MERGE_THIS_PR = True # deliberate lint failure

0 commit comments

Comments
 (0)