PolicyEngine
diff --git a/‎.github/workflows/push.yaml‎
Lines changed: 143 additions & 138 deletions b/‎.github/workflows/push.yaml‎
Lines changed: 143 additions & 138 deletions
@@ -15,12 +15,11 @@ jobs:
       - name: Check formatting
         run: ruff format --check .
 
-  # ── Per-dataset build and test on Modal ─────────────────────
-  build-and-test:
+  # ── Download prerequisites ──────────────────────────────────
+  download-prerequisites:
     runs-on: ubuntu-latest
     needs: lint
     if: github.event.head_commit.message != 'Update package version'
-    timeout-minutes: 240
     env:
       MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
       MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
@@ -30,167 +29,173 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.13"
-      - uses: astral-sh/setup-uv@v5
       - name: Install Modal CLI
         run: pip install modal
-      - name: Install package
-        run: uv sync --dev
-
-      - name: Initialize summary
-        run: |
-          echo "## Data Build & Integration Tests" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "| Step | Status | Duration |" >> $GITHUB_STEP_SUMMARY
-          echo "|------|--------|----------|" >> $GITHUB_STEP_SUMMARY
-
-      # ── Phase 1: Download prerequisites ───────────────────
-      - name: "Build: download prerequisites"
-        run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script download_prerequisites \
-            --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| download_prerequisites | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
-
-      # ── Phase 1: Independent datasets (sequential) ────────
-      - name: "Build: uprating"
-        run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script uprating \
-            --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| uprating | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Build: acs"
-        run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script acs \
-            --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| acs | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: acs"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_acs.py -v
-          echo "| test_acs | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Build: irs_puf"
+      - name: Download prerequisites on Modal
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script irs_puf \
+          modal run modal_app/data_build.py \
+            --script download_prerequisites \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| irs_puf | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
 
-      # ── Phase 2: CPS and PUF (depend on Phase 1) ─────────
-      - name: "Build: cps"
+  # ── Phase 1: Independent datasets (parallel) ───────────────
+  phase1:
+    needs: download-prerequisites
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: true
+      matrix:
+        dataset: [uprating, acs, irs_puf]
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install Modal CLI
+        run: pip install modal
+      - name: "Build + test: ${{ matrix.dataset }}"
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script cps \
+          modal run modal_app/data_build.py \
+            --script ${{ matrix.dataset }} \
+            --run-tests \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_cps.py -v
-          echo "| test_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
 
-      - name: "Build: puf"
+  # ── Phase 2: CPS + PUF (depend on Phase 1) ─────────────────
+  phase2:
+    needs: phase1
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: true
+      matrix:
+        dataset: [cps, puf]
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install Modal CLI
+        run: pip install modal
+      - name: "Build + test: ${{ matrix.dataset }}"
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script puf \
+          modal run modal_app/data_build.py \
+            --script ${{ matrix.dataset }} \
+            --run-tests \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| puf | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
 
-      # ── Phase 3: Extended CPS (depends on CPS + PUF) ─────
-      - name: "Build: extended_cps"
+  # ── Phase 3: Extended CPS (depends on Phase 2) ─────────────
+  phase3:
+    needs: phase2
+    runs-on: ubuntu-latest
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install Modal CLI
+        run: pip install modal
+      - name: "Build + test: extended_cps"
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script extended_cps \
+          modal run modal_app/data_build.py \
+            --script extended_cps \
+            --run-tests \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| extended_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
 
-      - name: "Test: extended_cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_extended_cps.py -v
-          echo "| test_extended_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      # ── Phase 4: Enhanced + Stratified CPS ────────────────
-      - name: "Build: enhanced_cps"
+  # ── Phase 4: Enhanced + Stratified CPS (depend on Phase 3) ─
+  phase4:
+    needs: phase3
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: true
+      matrix:
+        dataset: [enhanced_cps, stratified_cps]
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install Modal CLI
+        run: pip install modal
+      - name: "Build + test: ${{ matrix.dataset }}"
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script enhanced_cps \
+          modal run modal_app/data_build.py \
+            --script ${{ matrix.dataset }} \
+            --run-tests \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| enhanced_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
 
-      - name: "Test: enhanced_cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_enhanced_cps.py -v
-          echo "| test_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Build: stratified_cps"
+  # ── Phase 5: Source imputed + Small enhanced (depend on 4) ──
+  phase5:
+    needs: phase4
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: true
+      matrix:
+        dataset: [source_imputed_cps, small_enhanced_cps]
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install Modal CLI
+        run: pip install modal
+      - name: "Build + test: ${{ matrix.dataset }}"
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script stratified_cps \
+          modal run modal_app/data_build.py \
+            --script ${{ matrix.dataset }} \
+            --run-tests \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| stratified_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
 
-      # ── Phase 5: Source imputed + Small enhanced CPS ──────
-      - name: "Build: source_imputed_cps"
+  # ── Remaining integration tests (depend on Phase 4) ─────────
+  remaining-tests:
+    needs: phase4
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        test:
+          - policyengine_us_data/tests/integration/test_census_cps.py
+          - policyengine_us_data/tests/integration/test_database_build.py
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install Modal CLI
+        run: pip install modal
+      - name: "Test: ${{ matrix.test }}"
         run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script source_imputed_cps \
+          modal run modal_app/data_build.py \
+            --script download_prerequisites \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| source_imputed_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: source_imputed_cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_source_imputed_cps_masking.py policyengine_us_data/tests/integration/test_source_imputed_cps_consistency.py -v
-          echo "| test_source_imputed_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Build: small_enhanced_cps"
-        run: |
-          START=$(date +%s)
-          modal run modal_app/data_build.py --script small_enhanced_cps \
+          modal run modal_app/data_build.py \
+            --test ${{ matrix.test }} \
             --branch=${{ github.ref_name }}
-          ELAPSED=$(( $(date +%s) - START ))
-          echo "| small_enhanced_cps | :white_check_mark: | ${ELAPSED}s |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: small_enhanced_cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_small_enhanced_cps.py -v
-          echo "| test_small_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      # ── Remaining integration tests ───────────────────────
-      - name: "Test: sparse_enhanced_cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_sparse_enhanced_cps.py -v
-          echo "| test_sparse_enhanced_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: sipp_assets"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_sipp_assets.py -v
-          echo "| test_sipp_assets | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: census_cps"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_census_cps.py -v
-          echo "| test_census_cps | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
-
-      - name: "Test: database_build"
-        run: |
-          uv run pytest policyengine_us_data/tests/integration/test_database_build.py -v
-          echo "| test_database_build | :white_check_mark: | - |" >> $GITHUB_STEP_SUMMARY
 
   # ── Manual approval gate ────────────────────────────────────
   approval-gate:
-    needs: build-and-test
+    needs: [phase5, remaining-tests]
     runs-on: ubuntu-latest
     environment: pipeline-approval
     steps: