Skip to content

Commit 47dc766

Browse files
authored
Update PolicyEngine US dependency (#999)
1 parent ff11dd6 commit 47dc766

8 files changed

Lines changed: 356 additions & 12 deletions

File tree

.github/workflows/long_run_projection.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,6 @@ jobs:
269269
echo "- Tax assumption: \`${TAX_ASSUMPTION}\`"
270270
echo "- HF staging upload: \`${UPLOAD_TO_HF_STAGING}\`"
271271
if [ "${UPLOAD_TO_HF_STAGING}" = "true" ]; then
272-
echo "- HF staging prefix: \`staging/${CHECKED_OUT_SHA}/${RUN_ID}/long_term/\`"
272+
echo "- HF staging prefix: \`staging/${CHECKED_OUT_SHA}-${RUN_ID}/long_term/\`"
273273
fi
274274
} >> "$GITHUB_STEP_SUMMARY"

changelog.d/999.fixed.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Update the pinned PolicyEngine US dependency and allow long-run production builds
2+
to mix pre- and post-support-augmentation years.

policyengine_us_data/datasets/cps/long_term/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ python run_long_term_production.py \
7474
- `.github/workflows/long_run_projection.yaml` is `workflow_dispatch` only. It does not run on pull requests, normal merges, or the standard `push.yaml` publication path.
7575
- The workflow calls `run_long_term_production.py`, which wraps the parallel runner, writes `long_run_production_manifest.json`, and preserves per-year logs with the run metadata.
7676
- The default year set builds the 10-year budget window plus 5-year sampled points through `2100`; override `years` for full annual builds or narrower diagnostics.
77-
- Hugging Face upload is disabled by default. Set `upload_to_hf_staging=true` only for a candidate run that should publish generated H5s and metadata under `staging/{source_sha}/{run_id}/long_term/`.
77+
- Hugging Face upload is disabled by default. Set `upload_to_hf_staging=true` only for a candidate run that should publish generated H5s and metadata under `staging/{source_sha}-{run_id}/long_term/`.
7878
- Late-year support augmentation remains an explicit input. The workflow exposes the donor-backed controls, but it does not silently enable experimental support profiles.
7979

8080
**Named profiles:**

policyengine_us_data/datasets/cps/long_term/run_household_projection.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -691,12 +691,24 @@ def _support_augmentation_metadata(
691691
"Support-augmentation non-target income sanitization is only "
692692
"supported with donor-backed-composite-v1."
693693
)
694-
if START_YEAR < SUPPORT_AUGMENTATION_START_YEAR:
695-
raise ValueError(
696-
"Support augmentation is only supported for late-year runs. "
697-
f"Received START_YEAR={START_YEAR}, requires >= "
694+
if END_YEAR < SUPPORT_AUGMENTATION_START_YEAR:
695+
print(
696+
"Support augmentation requested but disabled for this pre-activation "
697+
f"run ({START_YEAR}-{END_YEAR}); activation starts in "
698698
f"{SUPPORT_AUGMENTATION_START_YEAR}."
699699
)
700+
SUPPORT_AUGMENTATION_PROFILE = None
701+
elif (
702+
START_YEAR < SUPPORT_AUGMENTATION_START_YEAR
703+
and not SUPPORT_AUGMENTATION_ALIGN_TO_RUN_YEAR
704+
):
705+
raise ValueError(
706+
"Static support augmentation cannot span both pre-activation and "
707+
"late-year runs in a single process. Use the parallel production "
708+
"runner, split the run, or pass --support-augmentation-align-to-run-year. "
709+
f"Received START_YEAR={START_YEAR}, END_YEAR={END_YEAR}, activation "
710+
f"starts in {SUPPORT_AUGMENTATION_START_YEAR}."
711+
)
700712

701713
legacy_flags_used = any([USE_GREG, USE_SS, USE_PAYROLL, USE_H6_REFORM, USE_TOB])
702714
if PROFILE_NAME and legacy_flags_used:

policyengine_us_data/datasets/cps/long_term/run_household_projection_parallel.py

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,22 @@
1616

1717
SCRIPT_DIR = Path(__file__).resolve().parent
1818
RUNNER_PATH = SCRIPT_DIR / "run_household_projection.py"
19+
DEFAULT_SUPPORT_AUGMENTATION_START_YEAR = 2075
20+
SUPPORT_AUGMENTATION_VALUE_FLAGS = {
21+
"--support-augmentation-profile",
22+
"--support-augmentation-target-year",
23+
"--support-augmentation-start-year",
24+
"--support-augmentation-top-n-targets",
25+
"--support-augmentation-donors-per-target",
26+
"--support-augmentation-max-distance",
27+
"--support-augmentation-clone-weight-scale",
28+
"--support-augmentation-blueprint-base-weight-scale",
29+
}
30+
SUPPORT_AUGMENTATION_BOOLEAN_FLAGS = {
31+
"--support-augmentation-align-to-run-year",
32+
"--support-augmentation-sanitize-worker-non-target-income",
33+
"--support-augmentation-sanitize-clone-non-target-income",
34+
}
1935

2036

2137
def parse_years(spec: str) -> list[int]:
@@ -81,6 +97,53 @@ def validate_forwarded_args(forwarded_args: list[str]) -> None:
8197
)
8298

8399

100+
def _option_value(args: list[str], flag: str) -> str | None:
101+
if flag not in args:
102+
return None
103+
index = args.index(flag)
104+
if index + 1 >= len(args):
105+
raise ValueError(f"{flag} requires a value")
106+
return args[index + 1]
107+
108+
109+
def _has_support_augmentation_profile(args: list[str]) -> bool:
110+
return "--support-augmentation-profile" in args
111+
112+
113+
def _support_augmentation_start_year(args: list[str]) -> int:
114+
raw_value = _option_value(args, "--support-augmentation-start-year")
115+
if raw_value is None:
116+
return DEFAULT_SUPPORT_AUGMENTATION_START_YEAR
117+
return int(raw_value)
118+
119+
120+
def _strip_support_augmentation_args(args: list[str]) -> list[str]:
121+
stripped: list[str] = []
122+
index = 0
123+
while index < len(args):
124+
arg = args[index]
125+
if arg in SUPPORT_AUGMENTATION_VALUE_FLAGS:
126+
if index + 1 >= len(args):
127+
raise ValueError(f"{arg} requires a value")
128+
index += 2
129+
continue
130+
if arg in SUPPORT_AUGMENTATION_BOOLEAN_FLAGS:
131+
index += 1
132+
continue
133+
stripped.append(arg)
134+
index += 1
135+
return stripped
136+
137+
138+
def forwarded_args_for_year(year: int, forwarded_args: list[str]) -> list[str]:
139+
"""Return runner args with late-year support disabled before activation."""
140+
if not _has_support_augmentation_profile(forwarded_args):
141+
return list(forwarded_args)
142+
if year >= _support_augmentation_start_year(forwarded_args):
143+
return list(forwarded_args)
144+
return _strip_support_augmentation_args(forwarded_args)
145+
146+
84147
def year_output_dir(root: Path, year: int) -> Path:
85148
return root / ".parallel_tmp" / str(year)
86149

@@ -123,7 +186,7 @@ def run_year(
123186
"--output-dir",
124187
str(output_dir),
125188
"--save-h5",
126-
*forwarded_args,
189+
*forwarded_args_for_year(year, forwarded_args),
127190
]
128191

129192
with log_path.open("w", encoding="utf-8") as log_file:
@@ -168,6 +231,44 @@ def _json_clone(value):
168231
return json.loads(json.dumps(value))
169232

170233

234+
def _normalize_support_augmentation_contract(value):
235+
if value is None:
236+
return None
237+
normalized = _json_clone(value)
238+
if normalized.get("target_year_strategy") == "run_year":
239+
normalized.pop("target_year", None)
240+
normalized.pop("report_file", None)
241+
normalized.pop("report_summary", None)
242+
return normalized
243+
244+
245+
def _support_augmentation_activation_start(value) -> int | None:
246+
if not isinstance(value, dict):
247+
return None
248+
raw_value = value.get("activation_start_year")
249+
if raw_value is None:
250+
return None
251+
return int(raw_value)
252+
253+
254+
def support_augmentation_contracts_compatible(left, right, *, year: int) -> bool:
255+
if _normalize_support_augmentation_contract(
256+
left
257+
) == _normalize_support_augmentation_contract(right):
258+
return True
259+
if left is None and right is not None:
260+
activation_year = _support_augmentation_activation_start(right)
261+
return activation_year is not None and year >= activation_year
262+
if left is not None and right is None:
263+
activation_year = _support_augmentation_activation_start(left)
264+
return activation_year is not None and year < activation_year
265+
return False
266+
267+
268+
def merge_support_augmentation_contract(left, right):
269+
return _json_clone(left if left is not None else right)
270+
271+
171272
def manifest_contract(manifest: dict) -> dict:
172273
tax_assumption = _json_clone(manifest.get("tax_assumption"))
173274
if isinstance(tax_assumption, dict):
@@ -209,6 +310,22 @@ def merge_outputs(
209310
manifest_seed = temp_contract
210311
else:
211312
for key, value in manifest_seed.items():
313+
if key == "support_augmentation":
314+
support_augmentation = temp_contract.get(key)
315+
if not support_augmentation_contracts_compatible(
316+
value,
317+
support_augmentation,
318+
year=year,
319+
):
320+
raise ValueError(
321+
f"Temp manifest mismatch for {key} in year {year}: "
322+
f"{support_augmentation} != {value}"
323+
)
324+
manifest_seed[key] = merge_support_augmentation_contract(
325+
value,
326+
support_augmentation,
327+
)
328+
continue
212329
if temp_contract.get(key) != value:
213330
raise ValueError(
214331
f"Temp manifest mismatch for {key} in year {year}: "

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ classifiers = [
2222
"Programming Language :: Python :: 3.14",
2323
]
2424
dependencies = [
25-
"policyengine-us==1.693.2",
25+
"policyengine-us==1.693.4",
2626
# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
2727
# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
2828
# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.

0 commit comments

Comments
 (0)