Update PolicyEngine US dependency (#999)

MaxGhenis · web-flow · commit 47dc766ab75c · 2026-05-17T23:19:42.000-04:00
diff --git a/.github/workflows/long_run_projection.yaml b/.github/workflows/long_run_projection.yaml
@@ -269,6 +269,6 @@ jobs:
             echo "- Tax assumption: \`${TAX_ASSUMPTION}\`"
             echo "- HF staging upload: \`${UPLOAD_TO_HF_STAGING}\`"
             if [ "${UPLOAD_TO_HF_STAGING}" = "true" ]; then
-              echo "- HF staging prefix: \`staging/${CHECKED_OUT_SHA}/${RUN_ID}/long_term/\`"
+              echo "- HF staging prefix: \`staging/${CHECKED_OUT_SHA}-${RUN_ID}/long_term/\`"
             fi
           } >> "$GITHUB_STEP_SUMMARY"
diff --git a/changelog.d/999.fixed.md b/changelog.d/999.fixed.md
@@ -0,0 +1,2 @@
+Update the pinned PolicyEngine US dependency and allow long-run production builds
+to mix pre- and post-support-augmentation years.
diff --git a/policyengine_us_data/datasets/cps/long_term/README.md b/policyengine_us_data/datasets/cps/long_term/README.md
@@ -74,7 +74,7 @@ python run_long_term_production.py \
 - `.github/workflows/long_run_projection.yaml` is `workflow_dispatch` only. It does not run on pull requests, normal merges, or the standard `push.yaml` publication path.
 - The workflow calls `run_long_term_production.py`, which wraps the parallel runner, writes `long_run_production_manifest.json`, and preserves per-year logs with the run metadata.
 - The default year set builds the 10-year budget window plus 5-year sampled points through `2100`; override `years` for full annual builds or narrower diagnostics.
-- Hugging Face upload is disabled by default. Set `upload_to_hf_staging=true` only for a candidate run that should publish generated H5s and metadata under `staging/{source_sha}/{run_id}/long_term/`.
+- Hugging Face upload is disabled by default. Set `upload_to_hf_staging=true` only for a candidate run that should publish generated H5s and metadata under `staging/{source_sha}-{run_id}/long_term/`.
 - Late-year support augmentation remains an explicit input. The workflow exposes the donor-backed controls, but it does not silently enable experimental support profiles.
 
 **Named profiles:**
diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
@@ -691,12 +691,24 @@ def _support_augmentation_metadata(
             "Support-augmentation non-target income sanitization is only "
             "supported with donor-backed-composite-v1."
         )
-    if START_YEAR < SUPPORT_AUGMENTATION_START_YEAR:
-        raise ValueError(
-            "Support augmentation is only supported for late-year runs. "
-            f"Received START_YEAR={START_YEAR}, requires >= "
+    if END_YEAR < SUPPORT_AUGMENTATION_START_YEAR:
+        print(
+            "Support augmentation requested but disabled for this pre-activation "
+            f"run ({START_YEAR}-{END_YEAR}); activation starts in "
             f"{SUPPORT_AUGMENTATION_START_YEAR}."
         )
+        SUPPORT_AUGMENTATION_PROFILE = None
+    elif (
+        START_YEAR < SUPPORT_AUGMENTATION_START_YEAR
+        and not SUPPORT_AUGMENTATION_ALIGN_TO_RUN_YEAR
+    ):
+        raise ValueError(
+            "Static support augmentation cannot span both pre-activation and "
+            "late-year runs in a single process. Use the parallel production "
+            "runner, split the run, or pass --support-augmentation-align-to-run-year. "
+            f"Received START_YEAR={START_YEAR}, END_YEAR={END_YEAR}, activation "
+            f"starts in {SUPPORT_AUGMENTATION_START_YEAR}."
+        )
 
 legacy_flags_used = any([USE_GREG, USE_SS, USE_PAYROLL, USE_H6_REFORM, USE_TOB])
 if PROFILE_NAME and legacy_flags_used:
diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection_parallel.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection_parallel.py
@@ -16,6 +16,22 @@
 
 SCRIPT_DIR = Path(__file__).resolve().parent
 RUNNER_PATH = SCRIPT_DIR / "run_household_projection.py"
+DEFAULT_SUPPORT_AUGMENTATION_START_YEAR = 2075
+SUPPORT_AUGMENTATION_VALUE_FLAGS = {
+    "--support-augmentation-profile",
+    "--support-augmentation-target-year",
+    "--support-augmentation-start-year",
+    "--support-augmentation-top-n-targets",
+    "--support-augmentation-donors-per-target",
+    "--support-augmentation-max-distance",
+    "--support-augmentation-clone-weight-scale",
+    "--support-augmentation-blueprint-base-weight-scale",
+}
+SUPPORT_AUGMENTATION_BOOLEAN_FLAGS = {
+    "--support-augmentation-align-to-run-year",
+    "--support-augmentation-sanitize-worker-non-target-income",
+    "--support-augmentation-sanitize-clone-non-target-income",
+}
 
 
 def parse_years(spec: str) -> list[int]:
@@ -81,6 +97,53 @@ def validate_forwarded_args(forwarded_args: list[str]) -> None:
             )
 
 
+def _option_value(args: list[str], flag: str) -> str | None:
+    if flag not in args:
+        return None
+    index = args.index(flag)
+    if index + 1 >= len(args):
+        raise ValueError(f"{flag} requires a value")
+    return args[index + 1]
+
+
+def _has_support_augmentation_profile(args: list[str]) -> bool:
+    return "--support-augmentation-profile" in args
+
+
+def _support_augmentation_start_year(args: list[str]) -> int:
+    raw_value = _option_value(args, "--support-augmentation-start-year")
+    if raw_value is None:
+        return DEFAULT_SUPPORT_AUGMENTATION_START_YEAR
+    return int(raw_value)
+
+
+def _strip_support_augmentation_args(args: list[str]) -> list[str]:
+    stripped: list[str] = []
+    index = 0
+    while index < len(args):
+        arg = args[index]
+        if arg in SUPPORT_AUGMENTATION_VALUE_FLAGS:
+            if index + 1 >= len(args):
+                raise ValueError(f"{arg} requires a value")
+            index += 2
+            continue
+        if arg in SUPPORT_AUGMENTATION_BOOLEAN_FLAGS:
+            index += 1
+            continue
+        stripped.append(arg)
+        index += 1
+    return stripped
+
+
+def forwarded_args_for_year(year: int, forwarded_args: list[str]) -> list[str]:
+    """Return runner args with late-year support disabled before activation."""
+    if not _has_support_augmentation_profile(forwarded_args):
+        return list(forwarded_args)
+    if year >= _support_augmentation_start_year(forwarded_args):
+        return list(forwarded_args)
+    return _strip_support_augmentation_args(forwarded_args)
+
+
 def year_output_dir(root: Path, year: int) -> Path:
     return root / ".parallel_tmp" / str(year)
 
@@ -123,7 +186,7 @@ def run_year(
         "--output-dir",
         str(output_dir),
         "--save-h5",
-        *forwarded_args,
+        *forwarded_args_for_year(year, forwarded_args),
     ]
 
     with log_path.open("w", encoding="utf-8") as log_file:
@@ -168,6 +231,44 @@ def _json_clone(value):
     return json.loads(json.dumps(value))
 
 
+def _normalize_support_augmentation_contract(value):
+    if value is None:
+        return None
+    normalized = _json_clone(value)
+    if normalized.get("target_year_strategy") == "run_year":
+        normalized.pop("target_year", None)
+    normalized.pop("report_file", None)
+    normalized.pop("report_summary", None)
+    return normalized
+
+
+def _support_augmentation_activation_start(value) -> int | None:
+    if not isinstance(value, dict):
+        return None
+    raw_value = value.get("activation_start_year")
+    if raw_value is None:
+        return None
+    return int(raw_value)
+
+
+def support_augmentation_contracts_compatible(left, right, *, year: int) -> bool:
+    if _normalize_support_augmentation_contract(
+        left
+    ) == _normalize_support_augmentation_contract(right):
+        return True
+    if left is None and right is not None:
+        activation_year = _support_augmentation_activation_start(right)
+        return activation_year is not None and year >= activation_year
+    if left is not None and right is None:
+        activation_year = _support_augmentation_activation_start(left)
+        return activation_year is not None and year < activation_year
+    return False
+
+
+def merge_support_augmentation_contract(left, right):
+    return _json_clone(left if left is not None else right)
+
+
 def manifest_contract(manifest: dict) -> dict:
     tax_assumption = _json_clone(manifest.get("tax_assumption"))
     if isinstance(tax_assumption, dict):
@@ -209,6 +310,22 @@ def merge_outputs(
             manifest_seed = temp_contract
         else:
             for key, value in manifest_seed.items():
+                if key == "support_augmentation":
+                    support_augmentation = temp_contract.get(key)
+                    if not support_augmentation_contracts_compatible(
+                        value,
+                        support_augmentation,
+                        year=year,
+                    ):
+                        raise ValueError(
+                            f"Temp manifest mismatch for {key} in year {year}: "
+                            f"{support_augmentation} != {value}"
+                        )
+                    manifest_seed[key] = merge_support_augmentation_contract(
+                        value,
+                        support_augmentation,
+                    )
+                    continue
                 if temp_contract.get(key) != value:
                     raise ValueError(
                         f"Temp manifest mismatch for {key} in year {year}: "
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
-    "policyengine-us==1.693.2",
+    "policyengine-us==1.693.4",
     # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
     # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
     # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.
diff --git a/tests/unit/test_long_term_calibration_contract.py b/tests/unit/test_long_term_calibration_contract.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Update the pinned PolicyEngine US dependency and allow long-run production builds`
	`2`	`+to mix pre- and post-support-augmentation years.`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ classifiers = [`
`22`	`22`	`"Programming Language :: Python :: 3.14",`
`23`	`23`	`]`
`24`	`24`	`dependencies = [`
`25`		`- "policyengine-us==1.693.2",`
	`25`	`+ "policyengine-us==1.693.4",`
`26`	`26`	`# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for`
`27`	`27`	`# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost`
`28`	`28`	`# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.`