PolicyEngine · baogorek · Mar 25, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
@@ -0,0 +1,58 @@
+name: Run Pipeline
+
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+    inputs:
+      gpu:
+        description: "GPU type for regional calibration"
+        default: "T4"
+        type: string
+      epochs:
+        description: "Epochs for regional calibration"
+        default: "1000"
+        type: string
+      national_epochs:
+        description: "Epochs for national calibration"
+        default: "4000"
+        type: string
+      num_workers:
+        description: "Number of parallel H5 workers"
+        default: "8"
+        type: string
+      skip_national:
+        description: "Skip national calibration/H5"
+        default: false
+        type: boolean
+
+jobs:
+  pipeline:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install Modal
+        run: pip install modal
+
+      - name: Launch pipeline on Modal
+        env:
+          MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+          MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+        run: |
+          ARGS="--action run --branch main"
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            ARGS="$ARGS --gpu ${{ inputs.gpu }}"
+            ARGS="$ARGS --epochs ${{ inputs.epochs }}"
+            ARGS="$ARGS --national-epochs ${{ inputs.national_epochs }}"
+            ARGS="$ARGS --num-workers ${{ inputs.num_workers }}"
+            if [ "${{ inputs.skip_national }}" = "true" ]; then
+              ARGS="$ARGS --skip-national"
+            fi
+          fi
+          modal run --detach modal_app/pipeline.py::main $ARGS
diff --git a/Makefile b/Makefile
@@ -1,11 +1,12 @@
-.PHONY: all format test install download upload docker documentation data validate-data calibrate calibrate-build publish-local-area upload-calibration upload-dataset upload-database push-to-modal build-matrices calibrate-modal calibrate-modal-national calibrate-both stage-h5s stage-national-h5 stage-all-h5s pipeline validate-staging validate-staging-full upload-validation check-staging check-sanity clean build paper clean-paper presentations database database-refresh promote-database promote-dataset promote build-h5s validate-local
+.PHONY: all format test install download upload docker documentation data validate-data calibrate calibrate-build publish-local-area upload-calibration upload-dataset upload-database push-to-modal build-data-modal build-matrices calibrate-modal calibrate-modal-national calibrate-both stage-h5s stage-national-h5 stage-all-h5s pipeline validate-staging validate-staging-full upload-validation check-staging check-sanity clean build paper clean-paper presentations database database-refresh promote-database promote-dataset promote build-h5s validate-local
 
-GPU ?= A100-80GB
-EPOCHS ?= 200
+GPU ?= T4
+EPOCHS ?= 1000
 NATIONAL_GPU ?= T4
-NATIONAL_EPOCHS ?= 200
+NATIONAL_EPOCHS ?= 4000
 BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
 NUM_WORKERS ?= 8
+N_CLONES ?= 430
 VERSION ?=
 
 HF_CLONE_DIR ?= $(HOME)/huggingface/policyengine-us-data
@@ -87,9 +88,11 @@ promote-database:
 	@echo "Copied DB and raw_inputs to HF clone. Now cd to HF repo, commit, and push."
 
 promote-dataset:
-	cp policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5 \
-		$(HF_CLONE_DIR)/calibration/source_imputed_stratified_extended_cps.h5
-	@echo "Copied dataset to HF clone. Now cd to HF repo, commit, and push."
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5', \
+		'policyengine/policyengine-us-data', \
+		'calibration/source_imputed_stratified_extended_cps.h5')"
+	@echo "Dataset promoted to HF."
 
 data: download
 	python policyengine_us_data/utils/uprating.py
@@ -155,68 +158,66 @@ upload-database:
 	@echo "Database uploaded to HF."
 
 push-to-modal:
-	modal volume put local-area-staging \
+	modal volume put pipeline-artifacts \
 		policyengine_us_data/storage/calibration/calibration_weights.npy \
-		calibration_inputs/calibration/calibration_weights.npy --force
-	modal volume put local-area-staging \
-		policyengine_us_data/storage/calibration/stacked_blocks.npy \
-		calibration_inputs/calibration/stacked_blocks.npy --force
-	modal volume put local-area-staging \
-		policyengine_us_data/storage/calibration/stacked_takeup.npz \
-		calibration_inputs/calibration/stacked_takeup.npz --force
-	modal volume put local-area-staging \
+		artifacts/calibration_weights.npy --force
+	modal volume put pipeline-artifacts \
 		policyengine_us_data/storage/calibration/policy_data.db \
-		calibration_inputs/calibration/policy_data.db --force
-	modal volume put local-area-staging \
-		policyengine_us_data/storage/calibration/geo_labels.json \
-		calibration_inputs/calibration/geo_labels.json --force
-	modal volume put local-area-staging \
+		artifacts/policy_data.db --force
+	modal volume put pipeline-artifacts \
 		policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5 \
-		calibration_inputs/calibration/source_imputed_stratified_extended_cps.h5 --force
-	@echo "All calibration inputs pushed to Modal volume."
+		artifacts/source_imputed_stratified_extended_cps.h5 --force
+	@echo "All pipeline artifacts pushed to Modal volume."
 
 build-matrices:
-	modal run modal_app/remote_calibration_runner.py::build_package \
-		--branch $(BRANCH)
+	modal run --detach modal_app/remote_calibration_runner.py::build_package \
+		--branch $(BRANCH) --county-level --n-clones $(N_CLONES)
 
 calibrate-modal:
-	modal run modal_app/remote_calibration_runner.py::main \
+	modal run --detach modal_app/remote_calibration_runner.py::main \
 		--branch $(BRANCH) --gpu $(GPU) --epochs $(EPOCHS) \
+		--beta 0.65 --lambda-l0 1e-7 --lambda-l2 1e-8 --log-freq 500 \
+		--target-config policyengine_us_data/calibration/target_config.yaml \
 		--push-results
 
 calibrate-modal-national:
-	modal run modal_app/remote_calibration_runner.py::main \
+	modal run --detach modal_app/remote_calibration_runner.py::main \
 		--branch $(BRANCH) --gpu $(NATIONAL_GPU) \
 		--epochs $(NATIONAL_EPOCHS) \
+		--beta 0.65 --lambda-l0 1e-4 --lambda-l2 1e-12 --log-freq 500 \
+		--target-config policyengine_us_data/calibration/target_config.yaml \
 		--push-results --national
 
 calibrate-both:
 	$(MAKE) calibrate-modal & $(MAKE) calibrate-modal-national & wait
 
 stage-h5s:
-	modal run modal_app/local_area.py::main \
-		--branch $(BRANCH) --num-workers $(NUM_WORKERS) \
-		$(if $(SKIP_DOWNLOAD),--skip-download)
+	modal run --detach modal_app/local_area.py::main \
+		--branch $(BRANCH) --num-workers $(NUM_WORKERS) --n-clones $(N_CLONES)
 
 stage-national-h5:
-	modal run modal_app/local_area.py::main_national \
-		--branch $(BRANCH)
+	modal run --detach modal_app/local_area.py::main_national \
+		--branch $(BRANCH) --n-clones $(N_CLONES)
 
 stage-all-h5s:
 	$(MAKE) stage-h5s & $(MAKE) stage-national-h5 & wait
 
 promote:
+	@echo "This will run the full Modal promote pipeline (local_area.py::main_promote)."
+	@read -p "Are you sure? [y/N] " confirm && [ "$$confirm" = "y" ] || (echo "Aborted."; exit 1)
 	$(eval VERSION := $(or $(VERSION),$(shell python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")))
-	modal run modal_app/local_area.py::main_promote \
+	modal run --detach modal_app/local_area.py::main_promote \
 		--branch $(BRANCH) --version $(VERSION)
 
 validate-staging:
 	python -m policyengine_us_data.calibration.validate_staging \
-		--area-type states --output validation_results.csv
+		--area-type states --output validation_results.csv \
+		$(if $(RUN_ID),--run-id $(RUN_ID))
 
 validate-staging-full:
 	python -m policyengine_us_data.calibration.validate_staging \
-		--area-type states,districts --output validation_results.csv
+		--area-type states,districts --output validation_results.csv \
+		$(if $(RUN_ID),--run-id $(RUN_ID))
 
 upload-validation:
 	python -c "from policyengine_us_data.utils.huggingface import upload; \
@@ -225,18 +226,23 @@ upload-validation:
 		'calibration/logs/validation_results.csv')"
 
 check-staging:
-	python -m policyengine_us_data.calibration.check_staging_sums
+	python -m policyengine_us_data.calibration.check_staging_sums \
+		$(if $(RUN_ID),--run-id $(RUN_ID))
 
 check-sanity:
 	python -m policyengine_us_data.calibration.validate_staging \
-		--sanity-only --area-type states --areas NC
-
-pipeline: data upload-dataset build-matrices calibrate-both stage-all-h5s
-	@echo ""
-	@echo "========================================"
-	@echo "Pipeline complete. H5s are in HF staging."
-	@echo "Run 'Promote Local Area H5 Files' workflow in GitHub to publish."
-	@echo "========================================"
+		--sanity-only --area-type states --areas NC \
+		$(if $(RUN_ID),--run-id $(RUN_ID))
+
+build-data-modal:
+	modal run --detach modal_app/data_build.py::main --branch $(BRANCH) --upload --skip-tests
+
+pipeline:
+	modal run --detach modal_app.pipeline::main \
+		--action run --branch $(BRANCH) --gpu $(GPU) \
+		--epochs $(EPOCHS) --national-gpu $(NATIONAL_GPU) \
+		--national-epochs $(NATIONAL_EPOCHS) \
+		--num-workers $(NUM_WORKERS) --n-clones $(N_CLONES)
 
 clean:
 	rm -f policyengine_us_data/storage/*.h5

diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
@@ -9,7 +9,7 @@
     "\n",
     "This notebook demonstrates the clone-based calibration pipeline: how raw CPS records become a calibration matrix and, ultimately, CD-level stacked datasets.\n",
     "\n",
-    "The paradigm shift from the old approach: instead of replicating every household into every congressional district, we **clone** each record N times and assign each clone a **random census block** drawn from a population-weighted distribution. Each clone inherits a state, CD, and block \u2014 and gets re-simulated under the rules of its assigned state.\n",
+    "The paradigm shift from the old approach: instead of replicating every household into every congressional district, we **clone** each record N times and assign each clone a **random census block** drawn from a population-weighted distribution. Each clone inherits a state, CD, and block — and gets re-simulated under the rules of its assigned state.\n",
     "\n",
     "We follow one household (`record_idx=8629`, household_id 128694, SNAP \\$18,396) through the entire pipeline:\n",
     "1. Clone and assign geography\n",
@@ -19,7 +19,7 @@
     "5. Build the calibration matrix\n",
     "6. Create stacked datasets from calibrated weights\n",
     "\n",
-    "**Companion notebook:** [calibration_internals.ipynb](calibration_internals.ipynb) covers the *finished* matrix \u2014 row/column anatomy, target groups, sparsity. This notebook covers the *process* that creates it and what happens after (stacked datasets).\n",
+    "**Companion notebook:** [calibration_internals.ipynb](calibration_internals.ipynb) covers the *finished* matrix — row/column anatomy, target groups, sparsity. This notebook covers the *process* that creates it and what happens after (stacked datasets).\n",
     "\n",
     "**Requirements:** `policy_data.db`, `block_cd_distributions.csv.gz`, and the stratified CPS h5 file in `STORAGE_FOLDER`."
    ]
@@ -56,7 +56,6 @@
     "from policyengine_us_data.storage import STORAGE_FOLDER\n",
     "from policyengine_us_data.calibration.clone_and_assign import (\n",
     "    assign_random_geography,\n",
-    "    GeographyAssignment,\n",
     "    load_global_block_distribution,\n",
     ")\n",
     "from policyengine_us_data.calibration.unified_matrix_builder import (\n",
@@ -303,13 +302,13 @@
    "id": "cell-9",
    "metadata": {},
    "source": [
-    "## Section 3: Inside `_simulate_clone` \u2014 State-Swap\n",
+    "## Section 3: Inside `_simulate_clone` — State-Swap\n",
     "\n",
     "For each clone, `_simulate_clone` does four things:\n",
     "1. Creates a **fresh** `Microsimulation` from the base dataset\n",
     "2. Overwrites `state_fips` with the clone's assigned states\n",
     "3. Optionally calls a `sim_modifier` (e.g., takeup re-randomization)\n",
-    "4. **Clears cached formulas** via `get_calculated_variables` \u2014 preserving survey inputs and IDs while forcing recalculation of state-dependent variables like SNAP\n",
+    "4. **Clears cached formulas** via `get_calculated_variables` — preserving survey inputs and IDs while forcing recalculation of state-dependent variables like SNAP\n",
     "\n",
     "Let's reproduce this manually for clone 0."
    ]
@@ -476,7 +475,7 @@
     "\n",
     "When assembling the calibration matrix, each target row only \"sees\" columns (clones) whose geography matches the target's geography. This is implemented via `state_to_cols` and `cd_to_cols` dictionaries built from the `GeographyAssignment`.\n",
     "\n",
-    "This is step 3 of `build_matrix` \u2014 reproduced here for transparency."
+    "This is step 3 of `build_matrix` — reproduced here for transparency."
    ]
   },
   {
@@ -585,7 +584,7 @@
    "source": [
     "## Section 5: Takeup Re-randomization\n",
     "\n",
-    "The base CPS has fixed takeup decisions (e.g., \"this household takes up SNAP\"). But when we clone a household into different census blocks, each block should have independently drawn takeup \u2014 otherwise every clone of a SNAP-participating household would still participate, regardless of geography.\n",
+    "The base CPS has fixed takeup decisions (e.g., \"this household takes up SNAP\"). But when we clone a household into different census blocks, each block should have independently drawn takeup — otherwise every clone of a SNAP-participating household would still participate, regardless of geography.\n",
     "\n",
     "`rerandomize_takeup` solves this: for each census block, it uses `seeded_rng(variable_name, salt=block_geoid)` to draw new takeup booleans. The seed is deterministic per (variable, block) pair, so results are reproducible."
    ]
@@ -763,7 +762,7 @@
    "id": "cell-22",
    "metadata": {},
    "source": [
-    "In the full pipeline, `rerandomize_takeup` is passed to `build_matrix` as a `sim_modifier` callback. For each clone, after `state_fips` is set but before formula caches are cleared, the callback draws new takeup booleans per census block. This means the same household in block A might take up SNAP while in block B it doesn't \u2014 matching the statistical reality that takeup varies by geography."
+    "In the full pipeline, `rerandomize_takeup` is passed to `build_matrix` as a `sim_modifier` callback. For each clone, after `state_fips` is set but before formula caches are cleared, the callback draws new takeup booleans per census block. This means the same household in block A might take up SNAP while in block B it doesn't — matching the statistical reality that takeup varies by geography."
    ]
   },
   {
@@ -871,9 +870,9 @@
    "source": [
     "## Section 7: From Weights to Datasets\n",
     "\n",
-    "`create_sparse_cd_stacked_dataset` takes calibrated weights and builds an h5 file with only the non-zero-weight households, reindexed per CD. Internally it does its own state-swap simulation \u2014 loading the base dataset, assigning `state_fips` for the target CD's state, and recalculating benefits from scratch. This means SNAP values in the output reflect the destination state's rules (e.g., a $70 SNAP household from ME may get $0 under AK rules).\n",
+    "`create_sparse_cd_stacked_dataset` takes calibrated weights and builds an h5 file with only the non-zero-weight households, reindexed per CD. Internally it does its own state-swap simulation — loading the base dataset, assigning `state_fips` for the target CD's state, and recalculating benefits from scratch. This means SNAP values in the output reflect the destination state's rules (e.g., a $70 SNAP household from ME may get $0 under AK rules).\n",
     "\n",
-    "**Format gap:** The calibration produces weights in clone layout `(n_records * n_clones,)` where each clone maps to one specific CD via the `GeographyAssignment`. The stacked dataset builder expects CD layout `(n_cds * n_households,)` where every CD has a weight slot for every household. Converting between these \u2014 accumulating clone weights into their assigned CDs \u2014 is a separate step not yet implemented. The demo below constructs artificial CD-layout weights directly to show how the builder works."
+    "**Format gap:** The calibration produces weights in clone layout `(n_records * n_clones,)` where each clone maps to one specific CD via the `GeographyAssignment`. The stacked dataset builder expects CD layout `(n_cds * n_households,)` where every CD has a weight slot for every household. Converting between these — accumulating clone weights into their assigned CDs — is a separate step not yet implemented. The demo below constructs artificial CD-layout weights directly to show how the builder works."
    ]
   },
   {
@@ -1012,9 +1011,9 @@
       "\n",
       "Overflow check:\n",
       "  Max person ID after reindexing: 5,025,365\n",
-      "  Max person ID \u00d7 100: 502,536,500\n",
+      "  Max person ID × 100: 502,536,500\n",
       "  int32 max: 2,147,483,647\n",
-      "  \u2713 No overflow risk!\n",
+      "  ✓ No overflow risk!\n",
       "\n",
       "Creating Dataset from combined DataFrame...\n",
       "Building simulation from Dataset...\n",
@@ -1134,12 +1133,12 @@
     "\n",
     "The clone-based calibration pipeline has six stages:\n",
     "\n",
-    "1. **Clone + assign geography** \u2014 `assign_random_geography()` creates N copies of each CPS record, each with a population-weighted random census block.\n",
-    "2. **Simulate** \u2014 `_simulate_clone()` sets each clone's `state_fips` and recalculates state-dependent benefits.\n",
-    "3. **Geographic masking** \u2014 `state_to_cols` / `cd_to_cols` restrict each target row to geographically relevant columns.\n",
-    "4. **Re-randomize takeup** \u2014 `rerandomize_takeup()` draws new takeup per census block, breaking the fixed-takeup assumption.\n",
-    "5. **Build matrix** \u2014 `UnifiedMatrixBuilder.build_matrix()` assembles the sparse CSR matrix from all clones.\n",
-    "6. **Stacked datasets** \u2014 `create_sparse_cd_stacked_dataset()` converts calibrated weights into CD-level h5 files.\n",
+    "1. **Clone + assign geography** — `assign_random_geography()` creates N copies of each CPS record, each with a population-weighted random census block.\n",
+    "2. **Simulate** — `_simulate_clone()` sets each clone's `state_fips` and recalculates state-dependent benefits.\n",
+    "3. **Geographic masking** — `state_to_cols` / `cd_to_cols` restrict each target row to geographically relevant columns.\n",
+    "4. **Re-randomize takeup** — `rerandomize_takeup()` draws new takeup per census block, breaking the fixed-takeup assumption.\n",
+    "5. **Build matrix** — `UnifiedMatrixBuilder.build_matrix()` assembles the sparse CSR matrix from all clones.\n",
+    "6. **Stacked datasets** — `create_sparse_cd_stacked_dataset()` converts calibrated weights into CD-level h5 files.\n",
     "\n",
     "For matrix diagnostics (row/column anatomy, target groups, sparsity analysis), see [calibration_internals.ipynb](calibration_internals.ipynb)."
    ]