PolicyEngine
diff --git a/‎modal_app/local_area.py‎
Lines changed: 30 additions & 18 deletions b/‎modal_app/local_area.py‎
Lines changed: 30 additions & 18 deletions
diff --git a/‎modal_app/remote_calibration_runner.py‎
Lines changed: 112 additions & 1 deletion b/‎modal_app/remote_calibration_runner.py‎
Lines changed: 112 additions & 1 deletion
diff --git a/‎modal_app/worker_script.py‎
Lines changed: 29 additions & 3 deletions b/‎modal_app/worker_script.py‎
Lines changed: 29 additions & 3 deletions
@@ -154,23 +154,32 @@ def build_areas_worker(
 
     work_items_json = json.dumps(work_items)
 
+    worker_cmd = [
+        "uv",
+        "run",
+        "python",
+        "modal_app/worker_script.py",
+        "--work-items",
+        work_items_json,
+        "--weights-path",
+        calibration_inputs["weights"],
+        "--dataset-path",
+        calibration_inputs["dataset"],
+        "--db-path",
+        calibration_inputs["database"],
+        "--output-dir",
+        str(output_dir),
+    ]
+    if "blocks" in calibration_inputs:
+        worker_cmd.extend(
+            [
+                "--calibration-blocks",
+                calibration_inputs["blocks"],
+            ]
+        )
+
     result = subprocess.run(
-        [
-            "uv",
-            "run",
-            "python",
-            "modal_app/worker_script.py",
-            "--work-items",
-            work_items_json,
-            "--weights-path",
-            calibration_inputs["weights"],
-            "--dataset-path",
-            calibration_inputs["dataset"],
-            "--db-path",
-            calibration_inputs["database"],
-            "--output-dir",
-            str(output_dir),
-        ],
+        worker_cmd,
         capture_output=True,
         text=True,
         env=os.environ.copy(),
@@ -474,11 +483,15 @@ def coordinate_publish(
     staging_volume.commit()
     print("Calibration inputs downloaded")
 
+    blocks_path = calibration_dir / "calibration" / "stacked_blocks.npy"
     calibration_inputs = {
         "weights": str(weights_path),
         "dataset": str(dataset_path),
         "database": str(db_path),
     }
+    if blocks_path.exists():
+        calibration_inputs["blocks"] = str(blocks_path)
+        print(f"Calibration blocks found: {blocks_path}")
 
     result = subprocess.run(
         [
@@ -582,8 +595,7 @@ def coordinate_publish(
             for err in all_errors[:5]:
                 err_msg = err.get("error", "Unknown")[:100]
                 print(
-                    f"  - {err.get('item', err.get('worker'))}: "
-                    f"{err_msg}"
+                    f"  - {err.get('item', err.get('worker'))}: " f"{err_msg}"
                 )
             if len(all_errors) > 5:
                 print(f"  ... and {len(all_errors) - 5} more")
 
@@ -72,11 +72,17 @@ def _collect_outputs(cal_lines):
     output_path = None
     log_path = None
     cal_log_path = None
+    config_path = None
+    blocks_path = None
     for line in cal_lines:
         if "OUTPUT_PATH:" in line:
             output_path = line.split("OUTPUT_PATH:")[1].strip()
+        elif "CONFIG_PATH:" in line:
+            config_path = line.split("CONFIG_PATH:")[1].strip()
         elif "CAL_LOG_PATH:" in line:
             cal_log_path = line.split("CAL_LOG_PATH:")[1].strip()
+        elif "BLOCKS_PATH:" in line:
+            blocks_path = line.split("BLOCKS_PATH:")[1].strip()
         elif "LOG_PATH:" in line:
             log_path = line.split("LOG_PATH:")[1].strip()
 
@@ -93,13 +99,94 @@ def _collect_outputs(cal_lines):
         with open(cal_log_path, "rb") as f:
             cal_log_bytes = f.read()
 
+    config_bytes = None
+    if config_path:
+        with open(config_path, "rb") as f:
+            config_bytes = f.read()
+
+    blocks_bytes = None
+    if blocks_path and os.path.exists(blocks_path):
+        with open(blocks_path, "rb") as f:
+            blocks_bytes = f.read()
+
     return {
         "weights": weights_bytes,
         "log": log_bytes,
         "cal_log": cal_log_bytes,
+        "config": config_bytes,
+        "blocks": blocks_bytes,
     }
 
 
+def _upload_logs_to_hf(log_files: dict):
+    """Upload calibration log files to HuggingFace.
+
+    Args:
+        log_files: dict mapping HF path suffixes to local file paths,
+            e.g. {"calibration_log.csv": "calibration_log.csv"}
+    """
+    from huggingface_hub import HfApi, CommitOperationAdd
+
+    token = os.environ.get("HUGGING_FACE_TOKEN")
+    repo = "policyengine/policyengine-us-data"
+
+    api = HfApi()
+    operations = []
+    for hf_name, local_path in log_files.items():
+        if not os.path.exists(local_path):
+            print(f"Skipping {local_path} (not found)", flush=True)
+            continue
+        operations.append(
+            CommitOperationAdd(
+                path_in_repo=f"calibration/logs/{hf_name}",
+                path_or_fileobj=local_path,
+            )
+        )
+
+    if not operations:
+        print("No log files to upload.", flush=True)
+        return
+
+    api.create_commit(
+        token=token,
+        repo_id=repo,
+        operations=operations,
+        repo_type="model",
+        commit_message=(f"Upload {len(operations)} calibration log file(s)"),
+    )
+    uploaded = [op.path_in_repo for op in operations]
+    print(f"Uploaded to HuggingFace: {uploaded}", flush=True)
+
+
+def _upload_calibration_artifact(local_path: str, hf_name: str):
+    """Upload a calibration artifact to calibration/ on HuggingFace."""
+    from huggingface_hub import HfApi, CommitOperationAdd
+
+    if not os.path.exists(local_path):
+        print(f"Skipping {local_path} (not found)", flush=True)
+        return
+
+    token = os.environ.get("HUGGING_FACE_TOKEN")
+    repo = "policyengine/policyengine-us-data"
+    api = HfApi()
+    api.create_commit(
+        token=token,
+        repo_id=repo,
+        operations=[
+            CommitOperationAdd(
+                path_in_repo=f"calibration/{hf_name}",
+                path_or_fileobj=local_path,
+            )
+        ],
+        repo_type="model",
+        commit_message=f"Upload calibration artifact: {hf_name}",
+    )
+    print(
+        f"Uploaded {local_path} to calibration/{hf_name}",
+        flush=True,
+    )
+
+
 def _fit_weights_impl(
     branch: str,
     epochs: int,
@@ -631,6 +718,7 @@ def main(
     package_volume: bool = False,
     county_level: bool = False,
     workers: int = 1,
+    upload_logs: bool = False,
 ):
     if gpu not in GPU_FUNCTIONS:
         raise ValueError(
@@ -706,8 +794,31 @@ def main(
             f.write(result["log"])
         print(f"Diagnostics log saved to: {log_output}")
 
+    cal_log_output = "calibration_log.csv"
     if result.get("cal_log"):
-        cal_log_output = "calibration_log.csv"
         with open(cal_log_output, "wb") as f:
             f.write(result["cal_log"])
         print(f"Calibration log saved to: {cal_log_output}")
+
+    config_output = "unified_run_config.json"
+    if result.get("config"):
+        with open(config_output, "wb") as f:
+            f.write(result["config"])
+        print(f"Run config saved to: {config_output}")
+
+    blocks_output = "stacked_blocks.npy"
+    if result.get("blocks"):
+        with open(blocks_output, "wb") as f:
+            f.write(result["blocks"])
+        print(f"Stacked blocks saved to: {blocks_output}")
+
+    if upload_logs:
+        log_files = {
+            "calibration_log.csv": cal_log_output,
+            "unified_diagnostics.csv": log_output,
+            "unified_run_config.json": config_output,
+        }
+        _upload_logs_to_hf(log_files)
+
+        if result.get("blocks"):
+            _upload_calibration_artifact(blocks_output, "stacked_blocks.npy")
@@ -20,6 +20,12 @@ def main():
     parser.add_argument("--dataset-path", required=True)
     parser.add_argument("--db-path", required=True)
     parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "--calibration-blocks",
+        type=str,
+        default=None,
+        help="Path to stacked_blocks.npy from calibration",
+    )
     args = parser.parse_args()
 
     work_items = json.loads(args.work_items)
@@ -28,6 +34,19 @@ def main():
     db_path = Path(args.db_path)
     output_dir = Path(args.output_dir)
 
+    calibration_blocks = None
+    if args.calibration_blocks:
+        calibration_blocks = np.load(args.calibration_blocks)
+
+    rerandomize_takeup = True
+    from policyengine_us_data.utils.takeup import (
+        TAKEUP_AFFECTED_TARGETS,
+    )
+
+    takeup_filter = [
+        info["takeup_var"] for info in TAKEUP_AFFECTED_TARGETS.values()
+    ]
+
     original_stdout = sys.stdout
     sys.stdout = sys.stderr
 
@@ -63,6 +82,9 @@ def main():
                     cds_to_calibrate=cds_to_calibrate,
                     dataset_path=dataset_path,
                     output_dir=output_dir,
+                    rerandomize_takeup=rerandomize_takeup,
+                    calibration_blocks=calibration_blocks,
+                    takeup_filter=takeup_filter,
                 )
             elif item_type == "district":
                 state_code, dist_num = item_id.split("-")
@@ -72,9 +94,7 @@ def main():
                         state_fips = fips
                         break
                 if state_fips is None:
-                    raise ValueError(
-                        f"Unknown state in district: {item_id}"
-                    )
+                    raise ValueError(f"Unknown state in district: {item_id}")
 
                 candidate = f"{state_fips}{int(dist_num):02d}"
                 if candidate in cds_to_calibrate:
@@ -100,6 +120,9 @@ def main():
                     cds_to_calibrate=cds_to_calibrate,
                     dataset_path=dataset_path,
                     output_dir=output_dir,
+                    rerandomize_takeup=rerandomize_takeup,
+                    calibration_blocks=calibration_blocks,
+                    takeup_filter=takeup_filter,
                 )
             elif item_type == "city":
                 path = build_city_h5(
@@ -108,6 +131,9 @@ def main():
                     cds_to_calibrate=cds_to_calibrate,
                     dataset_path=dataset_path,
                     output_dir=output_dir,
+                    rerandomize_takeup=rerandomize_takeup,
+                    calibration_blocks=calibration_blocks,
+                    takeup_filter=takeup_filter,
                 )
             else:
                 raise ValueError(f"Unknown item type: {item_type}")