From 1359aaf3322f4c5efb01bfff4300f89d5de57b63 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 22 May 2026 06:11:13 -0400 Subject: [PATCH 1/6] Normalize SSI calibration targets for payment timing --- changelog.d/1109.changed | 1 + .../db/etl_national_targets.py | 13 +++- policyengine_us_data/utils/loss.py | 47 ++++++++------ policyengine_us_data/utils/ssi_targets.py | 61 +++++++++++++++++++ tests/unit/calibration/test_loss_targets.py | 50 ++++++++++++++- tests/unit/calibration/test_target_config.py | 1 + tests/unit/test_etl_national_targets.py | 57 ++++++++++++++++- 7 files changed, 206 insertions(+), 24 deletions(-) create mode 100644 changelog.d/1109.changed diff --git a/changelog.d/1109.changed b/changelog.d/1109.changed new file mode 100644 index 000000000..7f440c9bd --- /dev/null +++ b/changelog.d/1109.changed @@ -0,0 +1 @@ +Normalized SSI amount calibration targets for federal fiscal-year payment timing. diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 9626a41cd..4bdcab0fa 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -27,10 +27,13 @@ get_geographic_strata, ) from policyengine_us_data.utils.ssi_targets import ( + SSI_PAYMENT_TARGET_SOURCE, SSI_RECIPIENT_TARGET_NOTES, SSI_RECIPIENT_TARGET_SOURCE, SSI_RECIPIENT_TARGET_YEAR, SSI_RECIPIENT_TARGETS_2024, + get_ssi_payment_target_notes, + normalize_ssi_payment_target, ) from policyengine_us_data.utils.target_variables import ( target_variable_components, @@ -767,12 +770,18 @@ def extract_national_targets(year: int = DEFAULT_YEAR): value = tax_benefit_system.parameters( time_period ).calibration.gov.cbo._children[param_name] + source = "CBO Budget Projections" + notes = f"CBO projection for {variable_name}" + if variable_name == "ssi": + value = normalize_ssi_payment_target(value, time_period) + source = SSI_PAYMENT_TARGET_SOURCE + notes = get_ssi_payment_target_notes(time_period) cbo_targets.append( { "variable": variable_name, "value": float(value), - "source": "CBO Budget Projections", - "notes": f"CBO projection for {variable_name}", + "source": source, + "notes": notes, "year": time_period, } ) diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 149661778..05df36cbe 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -26,7 +26,10 @@ ) from policyengine_core.reforms import Reform from policyengine_us_data.utils.soi import pe_to_soi, get_soi, get_tracked_soi_row -from policyengine_us_data.utils.ssi_targets import SSI_RECIPIENT_TARGETS_2024 +from policyengine_us_data.utils.ssi_targets import ( + SSI_RECIPIENT_TARGETS_2024, + normalize_ssi_payment_target, +) from policyengine_us_data.utils.target_variables import ( target_variable_components, ) @@ -93,6 +96,18 @@ ), ] +CBO_PROGRAMS = [ + "income_tax_positive", + "snap", + "social_security", + "ssi", + "unemployment_compensation", +] + +CBO_PARAM_NAME_MAP = { + "income_tax_positive": "income_tax", +} + HARD_CODED_TOTALS = { MEDICARE_PART_B_PREMIUM_VARIABLE: ( get_beneficiary_paid_medicare_part_b_premiums_target(2024) @@ -233,6 +248,16 @@ def _add_ssi_recipient_targets(loss_matrix, targets_array, sim, time_period): return targets_array, loss_matrix +def _cbo_program_target_value(sim, variable_name: str, time_period): + param_name = CBO_PARAM_NAME_MAP.get(variable_name, variable_name) + value = sim.tax_benefit_system.parameters( + time_period + ).calibration.gov.cbo._children[param_name] + if variable_name == "ssi": + return normalize_ssi_payment_target(value, time_period) + return value + + ACA_SPENDING_TARGETS = { 2024: 98e9, } @@ -1316,30 +1341,12 @@ def build_loss_matrix(dataset: type, time_period): # refundable credit payments in excess of liability are classified as # outlays, not negative receipts. See: https://www.cbo.gov/publication/43767 - CBO_PROGRAMS = [ - "income_tax_positive", - "snap", - "social_security", - "ssi", - "unemployment_compensation", - ] - - # Mapping from variable name to CBO parameter name (when different) - CBO_PARAM_NAME_MAP = { - "income_tax_positive": "income_tax", - } - for variable_name in CBO_PROGRAMS: label = f"nation/cbo/{variable_name}" loss_matrix[label] = sim.calculate(variable_name, map_to="household").values if any(loss_matrix[label].isna()): raise ValueError(f"Missing values for {label}") - param_name = CBO_PARAM_NAME_MAP.get(variable_name, variable_name) - targets_array.append( - sim.tax_benefit_system.parameters( - time_period - ).calibration.gov.cbo._children[param_name] - ) + targets_array.append(_cbo_program_target_value(sim, variable_name, time_period)) targets_array, loss_matrix = _add_ssi_recipient_targets( loss_matrix, diff --git a/policyengine_us_data/utils/ssi_targets.py b/policyengine_us_data/utils/ssi_targets.py index 578d7c9e8..f5a6b1de9 100644 --- a/policyengine_us_data/utils/ssi_targets.py +++ b/policyengine_us_data/utils/ssi_targets.py @@ -1,5 +1,66 @@ """Shared SSI calibration targets.""" +from datetime import date, timedelta + + +SSI_CBO_TARGET_SOURCE = ( + "https://www.cbo.gov/system/files/2026-02/51313-2026-02-ssi.xlsx" +) +SSI_PAYMENT_TIMING_SOURCE = ( + "https://www.ssa.gov/budget/assets/materials/2026/2026BST.pdf" +) +SSI_PAYMENT_TARGET_SOURCE = f"{SSI_CBO_TARGET_SOURCE}; {SSI_PAYMENT_TIMING_SOURCE}" + + +def _as_fiscal_year(year) -> int: + return int(str(year)[:4]) + + +def _is_first_day_federal_holiday(day: date) -> bool: + is_new_years_day = day.month == 1 and day.day == 1 + is_labor_day = day.month == 9 and day.weekday() == 0 and 1 <= day.day <= 7 + return is_new_years_day or is_labor_day + + +def _ssi_payment_date(year: int, month: int) -> date: + day = date(year, month, 1) + while day.weekday() >= 5 or _is_first_day_federal_holiday(day): + day -= timedelta(days=1) + return day + + +def get_ssi_fiscal_year_payment_count(year) -> int: + """Return SSI monthly benefit payments counted in the federal fiscal year.""" + fiscal_year = _as_fiscal_year(year) + start = date(fiscal_year - 1, 10, 1) + end = date(fiscal_year, 9, 30) + payment_count = 0 + + for calendar_year in (fiscal_year - 1, fiscal_year): + for month in range(1, 13): + payment_day = _ssi_payment_date(calendar_year, month) + if start <= payment_day <= end: + payment_count += 1 + + return payment_count + + +def normalize_ssi_payment_target(value, year) -> float: + """Convert fiscal-year SSI outlays to a 12-payment-equivalent target.""" + payment_count = get_ssi_fiscal_year_payment_count(year) + return float(value) * 12 / payment_count + + +def get_ssi_payment_target_notes(year) -> str: + payment_count = get_ssi_fiscal_year_payment_count(year) + return ( + "CBO SSI total outlays normalized to a 12-payment-equivalent " + "annual target for PolicyEngine's annual SSI variable; " + f"FY{_as_fiscal_year(year)} has {payment_count} monthly SSI " + "payments under federal budget timing" + ) + + SSI_RECIPIENT_TARGET_YEAR = 2024 SSI_RECIPIENT_TARGET_SOURCE = ( "https://www.ssa.gov/policy/docs/statcomps/ssi_monthly/2024-12/table01.html" diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py index b799d48ff..c0a7f82df 100644 --- a/tests/unit/calibration/test_loss_targets.py +++ b/tests/unit/calibration/test_loss_targets.py @@ -28,6 +28,7 @@ _add_real_estate_tax_targets, _add_ssi_recipient_targets, _add_transfer_balance_targets, + _cbo_program_target_value, _get_medicaid_national_targets, _get_aca_national_targets, _load_aca_spending_and_enrollment_targets, @@ -39,7 +40,11 @@ get_target_loss_weights, ) from policyengine_us_data.db import etl_national_targets -from policyengine_us_data.utils.ssi_targets import SSI_RECIPIENT_TARGETS_2024 +from policyengine_us_data.utils.ssi_targets import ( + SSI_RECIPIENT_TARGETS_2024, + get_ssi_fiscal_year_payment_count, + normalize_ssi_payment_target, +) def test_legacy_loss_targets_include_aggregate_qbi_deduction(): @@ -228,6 +233,27 @@ def map_result(self, values, source_entity, target_entity, how=None): return np.asarray(values, dtype=np.float32) +class _FakeCBOProgramTargetSimulation: + def __init__(self): + self.tax_benefit_system = SimpleNamespace( + parameters=lambda period: SimpleNamespace( + calibration=SimpleNamespace( + gov=SimpleNamespace( + cbo=SimpleNamespace( + _children={ + "income_tax": 2_000.0, + "snap": 1_000.0, + "social_security": 3_000.0, + "ssi": 57_000_000_000.0, + "unemployment_compensation": 4_000.0, + } + ) + ) + ) + ) + ) + + class _FakeCapitalGainsSimulation: def __init__(self): self.calculate_calls = [] @@ -354,6 +380,28 @@ def test_add_ssi_recipient_targets_adds_total_and_age_counts(): ) +def test_ssi_payment_targets_normalize_fiscal_year_payment_timing(): + assert get_ssi_fiscal_year_payment_count(2024) == 11 + assert get_ssi_fiscal_year_payment_count(2025) == 12 + assert get_ssi_fiscal_year_payment_count(2028) == 13 + + assert normalize_ssi_payment_target(57_000_000_000, 2024) == pytest.approx( + 57_000_000_000 * 12 / 11 + ) + assert normalize_ssi_payment_target(75_400_000_000, 2028) == pytest.approx( + 75_400_000_000 * 12 / 13 + ) + + +def test_legacy_cbo_ssi_target_uses_12_payment_equivalent(): + sim = _FakeCBOProgramTargetSimulation() + + assert _cbo_program_target_value(sim, "ssi", 2024) == pytest.approx( + 57_000_000_000 * 12 / 11 + ) + assert _cbo_program_target_value(sim, "snap", 2024) == 1_000.0 + + def test_add_ctc_targets(monkeypatch): monkeypatch.setattr( "policyengine_us_data.utils.loss.get_national_geography_soi_target", diff --git a/tests/unit/calibration/test_target_config.py b/tests/unit/calibration/test_target_config.py index 7e94ac602..fe7b3055e 100644 --- a/tests/unit/calibration/test_target_config.py +++ b/tests/unit/calibration/test_target_config.py @@ -546,6 +546,7 @@ def test_training_config_includes_ssi_recipient_count_targets(self): ) include_rules = config["include"] + assert {"variable": "ssi", "geo_level": "national"} in include_rules assert { "variable": "person_count", "geo_level": "national", diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py index c2474cd0d..69b18d47b 100644 --- a/tests/unit/test_etl_national_targets.py +++ b/tests/unit/test_etl_national_targets.py @@ -1,6 +1,7 @@ import inspect import pandas as pd +import pytest from sqlalchemy import text from sqlmodel import Session, select @@ -19,7 +20,10 @@ load_national_targets, load_state_acs_rent_targets, ) -from policyengine_us_data.utils.ssi_targets import SSI_RECIPIENT_TARGETS_2024 +from policyengine_us_data.utils.ssi_targets import ( + SSI_PAYMENT_TARGET_SOURCE, + SSI_RECIPIENT_TARGETS_2024, +) def test_national_targets_do_not_extract_treasury_eitc(): @@ -439,6 +443,57 @@ def test_extract_national_targets_includes_ssi_count_targets(): } +def test_extract_national_targets_normalizes_ssi_cbo_amount_target(monkeypatch): + class FakeIncomeBySource: + _children = { + target["parameter"]: 0 + for target in etl_national_targets.CBO_INCOME_BY_SOURCE_TARGETS + } + + class FakeCBO: + income_by_source = FakeIncomeBySource() + _children = { + "income_tax": 0, + "snap": 0, + "social_security": 0, + "ssi": 57_000_000_000, + "unemployment_compensation": 0, + } + + class FakeSOI: + _children = {"long_term_capital_gains": 0} + + class FakeGov: + cbo = FakeCBO() + irs = type("FakeIRS", (), {"soi": FakeSOI()})() + + class FakeCalibration: + gov = FakeGov() + + class FakeParameters: + def __call__(self, year): + return self + + calibration = FakeCalibration() + + class FakeTaxBenefitSystem: + parameters = FakeParameters() + + monkeypatch.setattr( + "policyengine_us.CountryTaxBenefitSystem", + FakeTaxBenefitSystem, + ) + + raw_targets = extract_national_targets(year=2024) + ssi_target = next( + target for target in raw_targets["cbo_targets"] if target["variable"] == "ssi" + ) + + assert ssi_target["value"] == pytest.approx(57_000_000_000 * 12 / 11) + assert ssi_target["source"] == SSI_PAYMENT_TARGET_SOURCE + assert "12-payment-equivalent" in ssi_target["notes"] + + def test_load_national_targets_uses_medicaid_enrolled_for_enrollment_counts( tmp_path, monkeypatch ): From edd17ef9dd9afd5982d0d816dd280aabbdd3ad4e Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 22 May 2026 13:16:17 -0400 Subject: [PATCH 2/6] Use SSI fiscal year outlays calibration target --- changelog.d/1109.changed | 2 +- .../calibration/check_staging_sums.py | 4 +- .../calibration/compare_calibration_runs.py | 2 +- .../calibration/sanity_checks.py | 2 +- .../calibration/target_config.yaml | 2 +- .../calibration/validate_national_h5.py | 4 +- .../db/etl_national_targets.py | 17 +++--- policyengine_us_data/utils/loss.py | 17 ++---- policyengine_us_data/utils/ssi_targets.py | 56 ------------------- pyproject.toml | 2 +- .../calibration/test_check_staging_sums.py | 5 ++ .../test_compare_calibration_runs.py | 6 ++ tests/unit/calibration/test_loss_targets.py | 22 ++------ tests/unit/calibration/test_target_config.py | 5 +- .../calibration/test_validate_national_h5.py | 4 ++ tests/unit/test_etl_national_targets.py | 15 ++--- uv.lock | 8 +-- 17 files changed, 59 insertions(+), 114 deletions(-) diff --git a/changelog.d/1109.changed b/changelog.d/1109.changed index 7f440c9bd..c523bdc11 100644 --- a/changelog.d/1109.changed +++ b/changelog.d/1109.changed @@ -1 +1 @@ -Normalized SSI amount calibration targets for federal fiscal-year payment timing. +Targeted SSI federal fiscal-year outlays directly in calibration. diff --git a/policyengine_us_data/calibration/check_staging_sums.py b/policyengine_us_data/calibration/check_staging_sums.py index a88841ad6..00534ae4d 100644 --- a/policyengine_us_data/calibration/check_staging_sums.py +++ b/policyengine_us_data/calibration/check_staging_sums.py @@ -34,7 +34,7 @@ "taxable_interest_income", "social_security", "snap", - "ssi", + "ssi_federal_fiscal_year_outlays", "income_tax_before_credits", "eitc", "non_refundable_ctc", @@ -59,7 +59,7 @@ def get_reference_summary(reference_year: int = 2024) -> str: reference_year, ) return ( - " SNAP ~$110B, SSI ~$60B, Social Security ~$1.2T\n" + " SNAP ~$110B, SSI federal fiscal-year outlays ~$60B, Social Security ~$1.2T\n" f" EITC ~$60B, refundable CTC ~${refundable_ctc_target['amount'] / 1e9:.1f}B " f"(IRS SOI {refundable_ctc_target['source_year']}), " f"non-refundable CTC ~${non_refundable_ctc_target['amount'] / 1e9:.1f}B " diff --git a/policyengine_us_data/calibration/compare_calibration_runs.py b/policyengine_us_data/calibration/compare_calibration_runs.py index f986cfd4f..58eabf090 100644 --- a/policyengine_us_data/calibration/compare_calibration_runs.py +++ b/policyengine_us_data/calibration/compare_calibration_runs.py @@ -39,7 +39,7 @@ "taxable_interest_income", "social_security", "snap", - "ssi", + "ssi_federal_fiscal_year_outlays", "income_tax_before_credits", "ctc", "eitc", diff --git a/policyengine_us_data/calibration/sanity_checks.py b/policyengine_us_data/calibration/sanity_checks.py index 9906ce3ef..ac35fc7f9 100644 --- a/policyengine_us_data/calibration/sanity_checks.py +++ b/policyengine_us_data/calibration/sanity_checks.py @@ -30,7 +30,7 @@ "employment_income", "adjusted_gross_income", "snap", - "ssi", + "ssi_federal_fiscal_year_outlays", "eitc", "social_security", "income_tax_before_credits", diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index aae559ac8..51c282129 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -205,7 +205,7 @@ include: geo_level: national - variable: social_security_survivors geo_level: national - - variable: ssi + - variable: ssi_federal_fiscal_year_outlays geo_level: national - variable: person_count geo_level: national diff --git a/policyengine_us_data/calibration/validate_national_h5.py b/policyengine_us_data/calibration/validate_national_h5.py index f80b2a8e7..48f9e677f 100644 --- a/policyengine_us_data/calibration/validate_national_h5.py +++ b/policyengine_us_data/calibration/validate_national_h5.py @@ -35,7 +35,7 @@ "taxable_interest_income", "social_security", "snap", - "ssi", + "ssi_federal_fiscal_year_outlays", "income_tax_before_credits", "ctc", "eitc", @@ -56,7 +56,7 @@ "employment_income": (10_000_000_000_000, "~$10T"), "social_security": (1_200_000_000_000, "~$1.2T"), "snap": (110_000_000_000, "~$110B"), - "ssi": (60_000_000_000, "~$60B"), + "ssi_federal_fiscal_year_outlays": (60_000_000_000, "~$60B"), "eitc": (67_000_000_000, "~$67B"), "income_tax_before_credits": (4_000_000_000_000, "~$4T"), } diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 4bdcab0fa..e2b2685bb 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -27,13 +27,11 @@ get_geographic_strata, ) from policyengine_us_data.utils.ssi_targets import ( - SSI_PAYMENT_TARGET_SOURCE, + SSI_CBO_TARGET_SOURCE, SSI_RECIPIENT_TARGET_NOTES, SSI_RECIPIENT_TARGET_SOURCE, SSI_RECIPIENT_TARGET_YEAR, SSI_RECIPIENT_TARGETS_2024, - get_ssi_payment_target_notes, - normalize_ssi_payment_target, ) from policyengine_us_data.utils.target_variables import ( target_variable_components, @@ -754,13 +752,14 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "income_tax_positive", "snap", "social_security", - "ssi", + "ssi_federal_fiscal_year_outlays", "unemployment_compensation", ] # Mapping from target variable to CBO parameter name (when different) cbo_param_name_map = { "income_tax_positive": "income_tax", # CBO param is income_tax + "ssi_federal_fiscal_year_outlays": "ssi", } cbo_targets = [] @@ -772,10 +771,12 @@ def extract_national_targets(year: int = DEFAULT_YEAR): ).calibration.gov.cbo._children[param_name] source = "CBO Budget Projections" notes = f"CBO projection for {variable_name}" - if variable_name == "ssi": - value = normalize_ssi_payment_target(value, time_period) - source = SSI_PAYMENT_TARGET_SOURCE - notes = get_ssi_payment_target_notes(time_period) + if variable_name == "ssi_federal_fiscal_year_outlays": + source = SSI_CBO_TARGET_SOURCE + notes = ( + "CBO SSI federal fiscal-year outlays matched to " + "policyengine-us ssi_federal_fiscal_year_outlays" + ) cbo_targets.append( { "variable": variable_name, diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 05df36cbe..ba31e063a 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -26,10 +26,7 @@ ) from policyengine_core.reforms import Reform from policyengine_us_data.utils.soi import pe_to_soi, get_soi, get_tracked_soi_row -from policyengine_us_data.utils.ssi_targets import ( - SSI_RECIPIENT_TARGETS_2024, - normalize_ssi_payment_target, -) +from policyengine_us_data.utils.ssi_targets import SSI_RECIPIENT_TARGETS_2024 from policyengine_us_data.utils.target_variables import ( target_variable_components, ) @@ -100,12 +97,13 @@ "income_tax_positive", "snap", "social_security", - "ssi", + "ssi_federal_fiscal_year_outlays", "unemployment_compensation", ] CBO_PARAM_NAME_MAP = { "income_tax_positive": "income_tax", + "ssi_federal_fiscal_year_outlays": "ssi", } HARD_CODED_TOTALS = { @@ -250,12 +248,9 @@ def _add_ssi_recipient_targets(loss_matrix, targets_array, sim, time_period): def _cbo_program_target_value(sim, variable_name: str, time_period): param_name = CBO_PARAM_NAME_MAP.get(variable_name, variable_name) - value = sim.tax_benefit_system.parameters( - time_period - ).calibration.gov.cbo._children[param_name] - if variable_name == "ssi": - return normalize_ssi_payment_target(value, time_period) - return value + return sim.tax_benefit_system.parameters(time_period).calibration.gov.cbo._children[ + param_name + ] ACA_SPENDING_TARGETS = { diff --git a/policyengine_us_data/utils/ssi_targets.py b/policyengine_us_data/utils/ssi_targets.py index f5a6b1de9..3fc5f7f85 100644 --- a/policyengine_us_data/utils/ssi_targets.py +++ b/policyengine_us_data/utils/ssi_targets.py @@ -1,64 +1,8 @@ """Shared SSI calibration targets.""" -from datetime import date, timedelta - - SSI_CBO_TARGET_SOURCE = ( "https://www.cbo.gov/system/files/2026-02/51313-2026-02-ssi.xlsx" ) -SSI_PAYMENT_TIMING_SOURCE = ( - "https://www.ssa.gov/budget/assets/materials/2026/2026BST.pdf" -) -SSI_PAYMENT_TARGET_SOURCE = f"{SSI_CBO_TARGET_SOURCE}; {SSI_PAYMENT_TIMING_SOURCE}" - - -def _as_fiscal_year(year) -> int: - return int(str(year)[:4]) - - -def _is_first_day_federal_holiday(day: date) -> bool: - is_new_years_day = day.month == 1 and day.day == 1 - is_labor_day = day.month == 9 and day.weekday() == 0 and 1 <= day.day <= 7 - return is_new_years_day or is_labor_day - - -def _ssi_payment_date(year: int, month: int) -> date: - day = date(year, month, 1) - while day.weekday() >= 5 or _is_first_day_federal_holiday(day): - day -= timedelta(days=1) - return day - - -def get_ssi_fiscal_year_payment_count(year) -> int: - """Return SSI monthly benefit payments counted in the federal fiscal year.""" - fiscal_year = _as_fiscal_year(year) - start = date(fiscal_year - 1, 10, 1) - end = date(fiscal_year, 9, 30) - payment_count = 0 - - for calendar_year in (fiscal_year - 1, fiscal_year): - for month in range(1, 13): - payment_day = _ssi_payment_date(calendar_year, month) - if start <= payment_day <= end: - payment_count += 1 - - return payment_count - - -def normalize_ssi_payment_target(value, year) -> float: - """Convert fiscal-year SSI outlays to a 12-payment-equivalent target.""" - payment_count = get_ssi_fiscal_year_payment_count(year) - return float(value) * 12 / payment_count - - -def get_ssi_payment_target_notes(year) -> str: - payment_count = get_ssi_fiscal_year_payment_count(year) - return ( - "CBO SSI total outlays normalized to a 12-payment-equivalent " - "annual target for PolicyEngine's annual SSI variable; " - f"FY{_as_fiscal_year(year)} has {payment_count} monthly SSI " - "payments under federal budget timing" - ) SSI_RECIPIENT_TARGET_YEAR = 2024 diff --git a/pyproject.toml b/pyproject.toml index e6c05b530..155a8966d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us==1.703.1", + "policyengine-us==1.704.0", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/tests/unit/calibration/test_check_staging_sums.py b/tests/unit/calibration/test_check_staging_sums.py index 0daf2755e..d273756c4 100644 --- a/tests/unit/calibration/test_check_staging_sums.py +++ b/tests/unit/calibration/test_check_staging_sums.py @@ -29,3 +29,8 @@ def test_reference_summary_uses_irs_ctc_component_targets(monkeypatch): def test_staging_sums_use_total_self_employment_income(): assert "total_self_employment_income" in VARIABLES assert "self_employment_income" not in VARIABLES + + +def test_staging_sums_use_ssi_fiscal_year_outlays(): + assert "ssi_federal_fiscal_year_outlays" in VARIABLES + assert "ssi" not in VARIABLES diff --git a/tests/unit/calibration/test_compare_calibration_runs.py b/tests/unit/calibration/test_compare_calibration_runs.py index 02a0031b3..965fe67f8 100644 --- a/tests/unit/calibration/test_compare_calibration_runs.py +++ b/tests/unit/calibration/test_compare_calibration_runs.py @@ -3,6 +3,7 @@ import pandas as pd from policyengine_us_data.calibration.compare_calibration_runs import ( + DEFAULT_VARIABLES, RunComparisonPaths, build_h5_comparison_rows, build_markdown_report, @@ -41,6 +42,11 @@ def test_parse_variables_preserves_requested_order(): assert parse_variables("snap, eitc, ctc") == ["snap", "eitc", "ctc"] +def test_default_h5_comparison_uses_ssi_fiscal_year_outlays(): + assert "ssi_federal_fiscal_year_outlays" in DEFAULT_VARIABLES + assert "ssi" not in DEFAULT_VARIABLES + + def test_summarize_diagnostics_uses_achievable_target_tail(): diagnostics = pd.DataFrame( { diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py index c0a7f82df..cb74dadaa 100644 --- a/tests/unit/calibration/test_loss_targets.py +++ b/tests/unit/calibration/test_loss_targets.py @@ -42,8 +42,6 @@ from policyengine_us_data.db import etl_national_targets from policyengine_us_data.utils.ssi_targets import ( SSI_RECIPIENT_TARGETS_2024, - get_ssi_fiscal_year_payment_count, - normalize_ssi_payment_target, ) @@ -380,24 +378,12 @@ def test_add_ssi_recipient_targets_adds_total_and_age_counts(): ) -def test_ssi_payment_targets_normalize_fiscal_year_payment_timing(): - assert get_ssi_fiscal_year_payment_count(2024) == 11 - assert get_ssi_fiscal_year_payment_count(2025) == 12 - assert get_ssi_fiscal_year_payment_count(2028) == 13 - - assert normalize_ssi_payment_target(57_000_000_000, 2024) == pytest.approx( - 57_000_000_000 * 12 / 11 - ) - assert normalize_ssi_payment_target(75_400_000_000, 2028) == pytest.approx( - 75_400_000_000 * 12 / 13 - ) - - -def test_legacy_cbo_ssi_target_uses_12_payment_equivalent(): +def test_legacy_cbo_ssi_target_uses_fiscal_year_outlays_variable(): sim = _FakeCBOProgramTargetSimulation() - assert _cbo_program_target_value(sim, "ssi", 2024) == pytest.approx( - 57_000_000_000 * 12 / 11 + assert ( + _cbo_program_target_value(sim, "ssi_federal_fiscal_year_outlays", 2024) + == 57_000_000_000 ) assert _cbo_program_target_value(sim, "snap", 2024) == 1_000.0 diff --git a/tests/unit/calibration/test_target_config.py b/tests/unit/calibration/test_target_config.py index fe7b3055e..60f862a90 100644 --- a/tests/unit/calibration/test_target_config.py +++ b/tests/unit/calibration/test_target_config.py @@ -546,7 +546,10 @@ def test_training_config_includes_ssi_recipient_count_targets(self): ) include_rules = config["include"] - assert {"variable": "ssi", "geo_level": "national"} in include_rules + assert { + "variable": "ssi_federal_fiscal_year_outlays", + "geo_level": "national", + } in include_rules assert { "variable": "person_count", "geo_level": "national", diff --git a/tests/unit/calibration/test_validate_national_h5.py b/tests/unit/calibration/test_validate_national_h5.py index 42d9ab118..d35336e8b 100644 --- a/tests/unit/calibration/test_validate_national_h5.py +++ b/tests/unit/calibration/test_validate_national_h5.py @@ -4,6 +4,7 @@ import pytest from policyengine_us_data.calibration.validate_national_h5 import ( + REFERENCES, VARIABLES, build_advance_ctc_agi_share_comparison, build_advance_ctc_filing_status_share_comparison, @@ -47,6 +48,9 @@ def test_reference_values_use_irs_ctc_component_targets(monkeypatch): 63_622_000.0, "IRS Pub. 4801 2022 63.6M", ) + assert "ssi_federal_fiscal_year_outlays" in REFERENCES + assert "ssi_federal_fiscal_year_outlays" in VARIABLES + assert "ssi" not in VARIABLES def test_ctc_diagnostic_outputs_format_all_sections(monkeypatch): diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py index 69b18d47b..f9d76b7ba 100644 --- a/tests/unit/test_etl_national_targets.py +++ b/tests/unit/test_etl_national_targets.py @@ -1,7 +1,6 @@ import inspect import pandas as pd -import pytest from sqlalchemy import text from sqlmodel import Session, select @@ -21,7 +20,7 @@ load_state_acs_rent_targets, ) from policyengine_us_data.utils.ssi_targets import ( - SSI_PAYMENT_TARGET_SOURCE, + SSI_CBO_TARGET_SOURCE, SSI_RECIPIENT_TARGETS_2024, ) @@ -443,7 +442,7 @@ def test_extract_national_targets_includes_ssi_count_targets(): } -def test_extract_national_targets_normalizes_ssi_cbo_amount_target(monkeypatch): +def test_extract_national_targets_uses_ssi_fiscal_year_outlays_target(monkeypatch): class FakeIncomeBySource: _children = { target["parameter"]: 0 @@ -486,12 +485,14 @@ class FakeTaxBenefitSystem: raw_targets = extract_national_targets(year=2024) ssi_target = next( - target for target in raw_targets["cbo_targets"] if target["variable"] == "ssi" + target + for target in raw_targets["cbo_targets"] + if target["variable"] == "ssi_federal_fiscal_year_outlays" ) - assert ssi_target["value"] == pytest.approx(57_000_000_000 * 12 / 11) - assert ssi_target["source"] == SSI_PAYMENT_TARGET_SOURCE - assert "12-payment-equivalent" in ssi_target["notes"] + assert ssi_target["value"] == 57_000_000_000 + assert ssi_target["source"] == SSI_CBO_TARGET_SOURCE + assert "federal fiscal-year outlays" in ssi_target["notes"] def test_load_national_targets_uses_medicaid_enrolled_for_enrollment_counts( diff --git a/uv.lock b/uv.lock index f96ba5ab0..b8d4d1c59 100644 --- a/uv.lock +++ b/uv.lock @@ -2122,7 +2122,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.703.1" +version = "1.704.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2132,9 +2132,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2f/d7/6268c87ecb05e3aa1edaee9dc79467da8c96c69dc5b6139754bbf9e1970d/policyengine_us-1.703.1.tar.gz", hash = "sha256:951cf922550849890a73442282cc1e013852b270c3b3b4e24aca5ae29e6e811d", size = 9886740, upload-time = "2026-05-21T22:17:47.309Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/62/0e7ee37fda0c31f5cbc9916fcbbd7923996255e8f9600e2f8f62c59d0232/policyengine_us-1.704.0.tar.gz", hash = "sha256:480b05a88ed50767886806a615c77539e918eff12f36fceb12cea40e0bacf481", size = 9899961, upload-time = "2026-05-22T16:59:42.447Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/91/dc40a435fb0af3cdf62fa476b87674a2fb4cfd221137f2c5a98ce194d96a/policyengine_us-1.703.1-py3-none-any.whl", hash = "sha256:39445e07e7616d5c4da006a0836cf8c2b326f6f6dec1c8b633bb835cf8682f35", size = 10680928, upload-time = "2026-05-21T22:17:43.642Z" }, + { url = "https://files.pythonhosted.org/packages/06/ec/f159036d78092955f09b0d0bd1c06b0be4930480134a4b9ae5460a08efa8/policyengine_us-1.704.0-py3-none-any.whl", hash = "sha256:1e3a3757804a01060f002229fcc85912677a256132112dd8f828de8944673943", size = 10705262, upload-time = "2026-05-22T16:59:39.587Z" }, ] [[package]] @@ -2204,7 +2204,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", specifier = "==1.703.1" }, + { name = "policyengine-us", specifier = "==1.704.0" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" }, From c4c4fcb860fd56ac1b61cce6ba9a3a21093a2486 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 22 May 2026 13:59:22 -0400 Subject: [PATCH 3/6] Handle legacy SSI target replacement --- .../calibration/sanity_checks.py | 72 ++++++++++++++----- .../db/etl_national_targets.py | 35 +++++++++ .../test_hourly_wage_income_consistency.py | 22 ++++++ tests/unit/test_etl_national_targets.py | 58 +++++++++++++++ 4 files changed, 168 insertions(+), 19 deletions(-) diff --git a/policyengine_us_data/calibration/sanity_checks.py b/policyengine_us_data/calibration/sanity_checks.py index ac35fc7f9..003c159a1 100644 --- a/policyengine_us_data/calibration/sanity_checks.py +++ b/policyengine_us_data/calibration/sanity_checks.py @@ -30,12 +30,16 @@ "employment_income", "adjusted_gross_income", "snap", - "ssi_federal_fiscal_year_outlays", + "ssi", "eitc", "social_security", "income_tax_before_credits", ] +COMPUTED_KEY_MONETARY_VARS = [ + "ssi_federal_fiscal_year_outlays", +] + TAKEUP_VARS = [ "takes_up_snap_if_eligible", "takes_up_ssi_if_eligible", @@ -375,6 +379,27 @@ def _get_person_weights(f, period, person_count, household_weights): except KeyError: return None + def _append_finite_check(var: str, vals) -> None: + vals = np.asarray(vals) + n_nan = int(np.isnan(vals).sum()) + n_inf = int(np.isinf(vals).sum()) + if n_nan > 0 or n_inf > 0: + results.append( + { + "check": f"no_nan_inf_{var}", + "status": "FAIL", + "detail": f"{n_nan} NaN, {n_inf} Inf", + } + ) + else: + results.append( + { + "check": f"no_nan_inf_{var}", + "status": "PASS", + "detail": "", + } + ) + with h5py.File(h5_path, "r") as f: # 1. Weight non-negativity w_key = f"household_weight/{period}" @@ -440,24 +465,7 @@ def _get_person_weights(f, period, person_count, household_weights): vals = _get(f, f"{var}/{period}") if vals is None: continue - n_nan = int(np.isnan(vals).sum()) - n_inf = int(np.isinf(vals).sum()) - if n_nan > 0 or n_inf > 0: - results.append( - { - "check": f"no_nan_inf_{var}", - "status": "FAIL", - "detail": f"{n_nan} NaN, {n_inf} Inf", - } - ) - else: - results.append( - { - "check": f"no_nan_inf_{var}", - "status": "PASS", - "detail": "", - } - ) + _append_finite_check(var, vals) # 4. Person-to-household mapping person_hh_arr = _get(f, f"person_household_id/{period}") @@ -650,9 +658,35 @@ def _get_person_weights(f, period, person_count, household_weights): ) ) + for var, vals in _computed_key_monetary_values(h5_path, period).items(): + _append_finite_check(var, vals) + return results +def _computed_key_monetary_values(h5_path: str, period: int) -> dict[str, np.ndarray]: + try: + from policyengine_us import Microsimulation + + sim = Microsimulation(dataset=h5_path) + except Exception as error: + logger.info("Skipping computed monetary sanity checks: %s", error) + return {} + + values = {} + for var in COMPUTED_KEY_MONETARY_VARS: + try: + result = sim.calculate(var, period) + values[var] = np.asarray( + result.values if hasattr(result, "values") else result + ) + except Exception as error: + logger.info( + "Skipping computed monetary sanity check for %s: %s", var, error + ) + return values + + def main(): import argparse diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index e2b2685bb..bdca9248f 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -151,6 +151,33 @@ def _register_target_variable(session: Session, variable: str) -> None: session.flush() +def _deactivate_replaced_national_target( + session: Session, + *, + stratum_id: int, + old_variable: str, + new_variable: str, + period: int, +) -> None: + old_targets = session.exec( + select(Target).where( + Target.stratum_id == stratum_id, + Target.variable == old_variable, + Target.period == period, + Target.reform_id == 0, + Target.active, + ) + ).all() + for target in old_targets: + target.active = False + replacement_note = ( + f"Deactivated because {new_variable} replaced this target concept." + ) + target.notes = ( + f"{target.notes} | {replacement_note}" if target.notes else replacement_note + ) + + WIC_NATIONAL_ANNUAL_SUMMARY_SOURCE = ( "https://www.fns.usda.gov/sites/default/files/resource-files/wisummary-4.xlsx" ) @@ -922,6 +949,14 @@ def load_national_targets( for _, target_data in direct_targets_df.iterrows(): target_year = target_data["year"] _register_target_variable(session, target_data["variable"]) + if target_data["variable"] == "ssi_federal_fiscal_year_outlays": + _deactivate_replaced_national_target( + session, + stratum_id=us_stratum.stratum_id, + old_variable="ssi", + new_variable="ssi_federal_fiscal_year_outlays", + period=target_year, + ) # Check if target already exists existing_target = session.exec( select(Target).where( diff --git a/tests/unit/calibration/test_hourly_wage_income_consistency.py b/tests/unit/calibration/test_hourly_wage_income_consistency.py index cf189ad93..235a9ecf3 100644 --- a/tests/unit/calibration/test_hourly_wage_income_consistency.py +++ b/tests/unit/calibration/test_hourly_wage_income_consistency.py @@ -60,3 +60,25 @@ def test_run_sanity_checks_adds_hourly_wage_income_consistency(tmp_path): assert by_check["hourly_wage_income_consistency"]["status"] == "WARN" assert by_check["hourly_wage_income_consistency_overtime"]["status"] == "WARN" + + +def test_run_sanity_checks_keeps_raw_ssi_and_checks_computed_outlays( + tmp_path, monkeypatch +): + h5_path = tmp_path / "sample.h5" + with h5py.File(h5_path, "w") as h5: + _write_period_dataset(h5, "household_weight", [1.0, 1.0]) + _write_period_dataset(h5, "ssi", [100.0, 0.0]) + + monkeypatch.setattr( + "policyengine_us_data.calibration.sanity_checks._computed_key_monetary_values", + lambda h5_path, period: { + "ssi_federal_fiscal_year_outlays": np.array([100.0, np.inf]) + }, + ) + + diagnostics = run_sanity_checks(str(h5_path), period=2024) + by_check = {diagnostic["check"]: diagnostic for diagnostic in diagnostics} + + assert by_check["no_nan_inf_ssi"]["status"] == "PASS" + assert by_check["no_nan_inf_ssi_federal_fiscal_year_outlays"]["status"] == "FAIL" diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py index f9d76b7ba..b6243bc89 100644 --- a/tests/unit/test_etl_national_targets.py +++ b/tests/unit/test_etl_national_targets.py @@ -495,6 +495,64 @@ class FakeTaxBenefitSystem: assert "federal fiscal-year outlays" in ssi_target["notes"] +def test_load_national_targets_deactivates_legacy_ssi_dollar_target( + tmp_path, monkeypatch +): + calibration_dir = tmp_path / "calibration" + calibration_dir.mkdir() + db_uri = f"sqlite:///{calibration_dir / 'policy_data.db'}" + engine = create_database(db_uri) + + with Session(engine) as session: + national = _make_stratum(session, notes="United States") + session.add( + Target( + stratum_id=national.stratum_id, + variable="ssi", + period=2024, + value=57_000_000_000, + active=True, + notes="legacy SSI dollar target", + ) + ) + session.commit() + + monkeypatch.setattr( + "policyengine_us_data.db.etl_national_targets.STORAGE_FOLDER", + tmp_path, + ) + + load_national_targets( + direct_targets_df=pd.DataFrame( + [ + { + "variable": "ssi_federal_fiscal_year_outlays", + "value": 57_000_000_000, + "source": SSI_CBO_TARGET_SOURCE, + "notes": "CBO SSI federal fiscal-year outlays", + "year": 2024, + } + ] + ), + tax_filer_df=pd.DataFrame(), + tax_expenditure_df=pd.DataFrame(), + conditional_targets=[], + ) + + with Session(engine) as session: + legacy_target = session.exec( + select(Target).where(Target.variable == "ssi") + ).one() + new_target = session.exec( + select(Target).where(Target.variable == "ssi_federal_fiscal_year_outlays") + ).one() + + assert legacy_target.active is False + assert "replaced this target concept" in legacy_target.notes + assert new_target.active is True + assert new_target.value == 57_000_000_000 + + def test_load_national_targets_uses_medicaid_enrolled_for_enrollment_counts( tmp_path, monkeypatch ): From 04b481f4b3f3a83f3f69271102e5a376a9c8ee29 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 22 May 2026 14:04:02 -0400 Subject: [PATCH 4/6] Bump PolicyEngine US dependency --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 155a8966d..0d82e9d54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us==1.704.0", + "policyengine-us==1.705.0", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/uv.lock b/uv.lock index b8d4d1c59..a62197887 100644 --- a/uv.lock +++ b/uv.lock @@ -2122,7 +2122,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.704.0" +version = "1.705.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2132,9 +2132,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/62/0e7ee37fda0c31f5cbc9916fcbbd7923996255e8f9600e2f8f62c59d0232/policyengine_us-1.704.0.tar.gz", hash = "sha256:480b05a88ed50767886806a615c77539e918eff12f36fceb12cea40e0bacf481", size = 9899961, upload-time = "2026-05-22T16:59:42.447Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/65/619cc5a8cb4ef81a2e03acb799b3f34f77c31d8a817d8d2ec88aafa18a87/policyengine_us-1.705.0.tar.gz", hash = "sha256:e8dbc18dbb5640302658b863364989b9991091bf3034e0537641851959e54c1c", size = 9909097, upload-time = "2026-05-22T17:45:53.24Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/06/ec/f159036d78092955f09b0d0bd1c06b0be4930480134a4b9ae5460a08efa8/policyengine_us-1.704.0-py3-none-any.whl", hash = "sha256:1e3a3757804a01060f002229fcc85912677a256132112dd8f828de8944673943", size = 10705262, upload-time = "2026-05-22T16:59:39.587Z" }, + { url = "https://files.pythonhosted.org/packages/43/87/b4764a317cc09f314960c426c76b15d4b808e4127f7d233bced7f5ac5221/policyengine_us-1.705.0-py3-none-any.whl", hash = "sha256:6021486e605a2a539326a16d7723367c99082aad6430e05fa390a2a95a5edf88", size = 10732752, upload-time = "2026-05-22T17:45:50.373Z" }, ] [[package]] @@ -2204,7 +2204,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", specifier = "==1.704.0" }, + { name = "policyengine-us", specifier = "==1.705.0" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" }, From 9c7fccc88359a1f99bfdbcdf46e4e28c1c079285 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 22 May 2026 14:46:49 -0400 Subject: [PATCH 5/6] Bump PolicyEngine US for payroll wage fix --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0d82e9d54..8fc6f9078 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us==1.705.0", + "policyengine-us==1.705.1", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/uv.lock b/uv.lock index a62197887..b8a1c0cf9 100644 --- a/uv.lock +++ b/uv.lock @@ -2122,7 +2122,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.705.0" +version = "1.705.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2132,9 +2132,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6d/65/619cc5a8cb4ef81a2e03acb799b3f34f77c31d8a817d8d2ec88aafa18a87/policyengine_us-1.705.0.tar.gz", hash = "sha256:e8dbc18dbb5640302658b863364989b9991091bf3034e0537641851959e54c1c", size = 9909097, upload-time = "2026-05-22T17:45:53.24Z" } +sdist = { url = "https://files.pythonhosted.org/packages/12/a1/1f5fac9080680f490fc8c0222e1206585fa573928c6e5a76dc11b772e3cc/policyengine_us-1.705.1.tar.gz", hash = "sha256:4467ff3c74b468593a38a65854c037a5650552abb5cb0fb3aab248d47a5b1f99", size = 9910341, upload-time = "2026-05-22T18:34:58.827Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/43/87/b4764a317cc09f314960c426c76b15d4b808e4127f7d233bced7f5ac5221/policyengine_us-1.705.0-py3-none-any.whl", hash = "sha256:6021486e605a2a539326a16d7723367c99082aad6430e05fa390a2a95a5edf88", size = 10732752, upload-time = "2026-05-22T17:45:50.373Z" }, + { url = "https://files.pythonhosted.org/packages/87/1d/71653e73a243ffb6e74463403b2a198cdea9ce9fa2dab8038d99ad70991b/policyengine_us-1.705.1-py3-none-any.whl", hash = "sha256:9db5121748d62961cb4867f18dab03da1be9abc986676e12e147e19afc6fd6aa", size = 10735178, upload-time = "2026-05-22T18:34:55.376Z" }, ] [[package]] @@ -2204,7 +2204,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", specifier = "==1.705.0" }, + { name = "policyengine-us", specifier = "==1.705.1" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" }, From 387b6c73908962d7c7d3e24f2a70b9843e99fa14 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 22 May 2026 14:59:10 -0400 Subject: [PATCH 6/6] Scale SSI fiscal year target for single-year data --- changelog.d/1109.changed | 2 +- .../calibration/check_staging_sums.py | 4 +- .../calibration/compare_calibration_runs.py | 2 +- .../calibration/validate_national_h5.py | 4 +- .../db/etl_national_targets.py | 12 +-- policyengine_us_data/utils/loss.py | 14 ++- policyengine_us_data/utils/ssi_targets.py | 89 +++++++++++++++++++ .../calibration/test_check_staging_sums.py | 6 +- .../test_compare_calibration_runs.py | 6 +- tests/unit/calibration/test_loss_targets.py | 28 ++++-- .../calibration/test_validate_national_h5.py | 6 +- tests/unit/test_etl_national_targets.py | 14 +-- 12 files changed, 150 insertions(+), 37 deletions(-) diff --git a/changelog.d/1109.changed b/changelog.d/1109.changed index c523bdc11..e98b94154 100644 --- a/changelog.d/1109.changed +++ b/changelog.d/1109.changed @@ -1 +1 @@ -Targeted SSI federal fiscal-year outlays directly in calibration. +Targeted single-year-compatible SSI federal fiscal-year outlays in calibration. diff --git a/policyengine_us_data/calibration/check_staging_sums.py b/policyengine_us_data/calibration/check_staging_sums.py index 00534ae4d..a88841ad6 100644 --- a/policyengine_us_data/calibration/check_staging_sums.py +++ b/policyengine_us_data/calibration/check_staging_sums.py @@ -34,7 +34,7 @@ "taxable_interest_income", "social_security", "snap", - "ssi_federal_fiscal_year_outlays", + "ssi", "income_tax_before_credits", "eitc", "non_refundable_ctc", @@ -59,7 +59,7 @@ def get_reference_summary(reference_year: int = 2024) -> str: reference_year, ) return ( - " SNAP ~$110B, SSI federal fiscal-year outlays ~$60B, Social Security ~$1.2T\n" + " SNAP ~$110B, SSI ~$60B, Social Security ~$1.2T\n" f" EITC ~$60B, refundable CTC ~${refundable_ctc_target['amount'] / 1e9:.1f}B " f"(IRS SOI {refundable_ctc_target['source_year']}), " f"non-refundable CTC ~${non_refundable_ctc_target['amount'] / 1e9:.1f}B " diff --git a/policyengine_us_data/calibration/compare_calibration_runs.py b/policyengine_us_data/calibration/compare_calibration_runs.py index 58eabf090..f986cfd4f 100644 --- a/policyengine_us_data/calibration/compare_calibration_runs.py +++ b/policyengine_us_data/calibration/compare_calibration_runs.py @@ -39,7 +39,7 @@ "taxable_interest_income", "social_security", "snap", - "ssi_federal_fiscal_year_outlays", + "ssi", "income_tax_before_credits", "ctc", "eitc", diff --git a/policyengine_us_data/calibration/validate_national_h5.py b/policyengine_us_data/calibration/validate_national_h5.py index 48f9e677f..f80b2a8e7 100644 --- a/policyengine_us_data/calibration/validate_national_h5.py +++ b/policyengine_us_data/calibration/validate_national_h5.py @@ -35,7 +35,7 @@ "taxable_interest_income", "social_security", "snap", - "ssi_federal_fiscal_year_outlays", + "ssi", "income_tax_before_credits", "ctc", "eitc", @@ -56,7 +56,7 @@ "employment_income": (10_000_000_000_000, "~$10T"), "social_security": (1_200_000_000_000, "~$1.2T"), "snap": (110_000_000_000, "~$110B"), - "ssi_federal_fiscal_year_outlays": (60_000_000_000, "~$60B"), + "ssi": (60_000_000_000, "~$60B"), "eitc": (67_000_000_000, "~$67B"), "income_tax_before_credits": (4_000_000_000_000, "~$4T"), } diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index bdca9248f..56a6d0e92 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -27,11 +27,13 @@ get_geographic_strata, ) from policyengine_us_data.utils.ssi_targets import ( - SSI_CBO_TARGET_SOURCE, + SSI_PAYMENT_TARGET_SOURCE, SSI_RECIPIENT_TARGET_NOTES, SSI_RECIPIENT_TARGET_SOURCE, SSI_RECIPIENT_TARGET_YEAR, SSI_RECIPIENT_TARGETS_2024, + get_ssi_payment_target_notes, + scale_ssi_fiscal_year_target_for_single_year_data, ) from policyengine_us_data.utils.target_variables import ( target_variable_components, @@ -799,11 +801,11 @@ def extract_national_targets(year: int = DEFAULT_YEAR): source = "CBO Budget Projections" notes = f"CBO projection for {variable_name}" if variable_name == "ssi_federal_fiscal_year_outlays": - source = SSI_CBO_TARGET_SOURCE - notes = ( - "CBO SSI federal fiscal-year outlays matched to " - "policyengine-us ssi_federal_fiscal_year_outlays" + value = scale_ssi_fiscal_year_target_for_single_year_data( + value, time_period ) + source = SSI_PAYMENT_TARGET_SOURCE + notes = get_ssi_payment_target_notes(time_period) cbo_targets.append( { "variable": variable_name, diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index ba31e063a..d088ac516 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -26,7 +26,10 @@ ) from policyengine_core.reforms import Reform from policyengine_us_data.utils.soi import pe_to_soi, get_soi, get_tracked_soi_row -from policyengine_us_data.utils.ssi_targets import SSI_RECIPIENT_TARGETS_2024 +from policyengine_us_data.utils.ssi_targets import ( + SSI_RECIPIENT_TARGETS_2024, + scale_ssi_fiscal_year_target_for_single_year_data, +) from policyengine_us_data.utils.target_variables import ( target_variable_components, ) @@ -248,9 +251,12 @@ def _add_ssi_recipient_targets(loss_matrix, targets_array, sim, time_period): def _cbo_program_target_value(sim, variable_name: str, time_period): param_name = CBO_PARAM_NAME_MAP.get(variable_name, variable_name) - return sim.tax_benefit_system.parameters(time_period).calibration.gov.cbo._children[ - param_name - ] + value = sim.tax_benefit_system.parameters( + time_period + ).calibration.gov.cbo._children[param_name] + if variable_name == "ssi_federal_fiscal_year_outlays": + return scale_ssi_fiscal_year_target_for_single_year_data(value, time_period) + return value ACA_SPENDING_TARGETS = { diff --git a/policyengine_us_data/utils/ssi_targets.py b/policyengine_us_data/utils/ssi_targets.py index 3fc5f7f85..ec87c6e0b 100644 --- a/policyengine_us_data/utils/ssi_targets.py +++ b/policyengine_us_data/utils/ssi_targets.py @@ -1,8 +1,97 @@ """Shared SSI calibration targets.""" +from datetime import date, timedelta + SSI_CBO_TARGET_SOURCE = ( "https://www.cbo.gov/system/files/2026-02/51313-2026-02-ssi.xlsx" ) +SSI_PAYMENT_TIMING_SOURCE = "https://www.ssa.gov/oact/ssir/SSI24/IV_C_Payments.html" +SSI_PAYMENT_RULE_SOURCE = "https://www.ssa.gov/OP_Home/cfr20/416/416-0502.htm" +SSI_PAYMENT_TARGET_SOURCE = ( + f"{SSI_CBO_TARGET_SOURCE}; {SSI_PAYMENT_TIMING_SOURCE}; {SSI_PAYMENT_RULE_SOURCE}" +) + + +def _as_fiscal_year(year) -> int: + return int(str(year)[:4]) + + +def _is_new_years_day_observed(day: date) -> bool: + new_years_day = date(day.year, 1, 1) + next_new_years_day = date(day.year + 1, 1, 1) + return ( + day == new_years_day + or (new_years_day.weekday() == 6 and day == date(day.year, 1, 2)) + or (next_new_years_day.weekday() == 5 and day == date(day.year, 12, 31)) + ) + + +def _is_labor_day(day: date) -> bool: + return day.month == 9 and day.weekday() == 0 and day.day <= 7 + + +def _is_federal_holiday_affecting_ssi_payment(day: date) -> bool: + return _is_new_years_day_observed(day) or _is_labor_day(day) + + +def _ssi_payment_date(year: int, month: int) -> date: + payment_date = date(year, month, 1) + while payment_date.weekday() >= 5 or _is_federal_holiday_affecting_ssi_payment( + payment_date + ): + payment_date -= timedelta(days=1) + return payment_date + + +def _ssi_fiscal_year_benefit_months(year) -> list[date]: + fiscal_year = _as_fiscal_year(year) + fiscal_year_start = date(fiscal_year - 1, 10, 1) + fiscal_year_end = date(fiscal_year, 9, 30) + + benefit_months = [] + for calendar_year in (fiscal_year - 1, fiscal_year): + for month in range(1, 13): + payment_day = _ssi_payment_date(calendar_year, month) + if fiscal_year_start <= payment_day <= fiscal_year_end: + benefit_months.append(date(calendar_year, month, 1)) + return benefit_months + + +def get_ssi_fiscal_year_payment_count(year) -> int: + """Return SSI benefit months with payment dates in the federal fiscal year.""" + return len(_ssi_fiscal_year_benefit_months(year)) + + +def get_ssi_single_year_available_payment_count(year) -> int: + """Return fiscal-year SSI benefit months available from a single-year H5.""" + fiscal_year = _as_fiscal_year(year) + return sum( + benefit_month.year == fiscal_year + for benefit_month in _ssi_fiscal_year_benefit_months(year) + ) + + +def scale_ssi_fiscal_year_target_for_single_year_data(value, year) -> float: + """Scale full fiscal-year SSI outlays to months computable from one H5 year.""" + return ( + float(value) + * get_ssi_single_year_available_payment_count(year) + / get_ssi_fiscal_year_payment_count(year) + ) + + +def get_ssi_payment_target_notes(year) -> str: + fiscal_year = _as_fiscal_year(year) + available_count = get_ssi_single_year_available_payment_count(year) + payment_count = get_ssi_fiscal_year_payment_count(year) + return ( + "CBO SSI federal fiscal-year outlays scaled to the benefit months " + "computable from a single-year PolicyEngine-US-data H5 using " + "policyengine-us ssi_federal_fiscal_year_outlays; " + f"FY{fiscal_year} has {payment_count} SSI benefit months paid in the " + f"federal fiscal year, of which {available_count} are benefit months " + f"in calendar year {fiscal_year}" + ) SSI_RECIPIENT_TARGET_YEAR = 2024 diff --git a/tests/unit/calibration/test_check_staging_sums.py b/tests/unit/calibration/test_check_staging_sums.py index d273756c4..d27b8d0e8 100644 --- a/tests/unit/calibration/test_check_staging_sums.py +++ b/tests/unit/calibration/test_check_staging_sums.py @@ -31,6 +31,6 @@ def test_staging_sums_use_total_self_employment_income(): assert "self_employment_income" not in VARIABLES -def test_staging_sums_use_ssi_fiscal_year_outlays(): - assert "ssi_federal_fiscal_year_outlays" in VARIABLES - assert "ssi" not in VARIABLES +def test_staging_sums_use_raw_ssi(): + assert "ssi" in VARIABLES + assert "ssi_federal_fiscal_year_outlays" not in VARIABLES diff --git a/tests/unit/calibration/test_compare_calibration_runs.py b/tests/unit/calibration/test_compare_calibration_runs.py index 965fe67f8..8c3cdd688 100644 --- a/tests/unit/calibration/test_compare_calibration_runs.py +++ b/tests/unit/calibration/test_compare_calibration_runs.py @@ -42,9 +42,9 @@ def test_parse_variables_preserves_requested_order(): assert parse_variables("snap, eitc, ctc") == ["snap", "eitc", "ctc"] -def test_default_h5_comparison_uses_ssi_fiscal_year_outlays(): - assert "ssi_federal_fiscal_year_outlays" in DEFAULT_VARIABLES - assert "ssi" not in DEFAULT_VARIABLES +def test_default_h5_comparison_uses_raw_ssi(): + assert "ssi" in DEFAULT_VARIABLES + assert "ssi_federal_fiscal_year_outlays" not in DEFAULT_VARIABLES def test_summarize_diagnostics_uses_achievable_target_tail(): diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py index cb74dadaa..3e7ee8baf 100644 --- a/tests/unit/calibration/test_loss_targets.py +++ b/tests/unit/calibration/test_loss_targets.py @@ -42,6 +42,9 @@ from policyengine_us_data.db import etl_national_targets from policyengine_us_data.utils.ssi_targets import ( SSI_RECIPIENT_TARGETS_2024, + get_ssi_fiscal_year_payment_count, + get_ssi_single_year_available_payment_count, + scale_ssi_fiscal_year_target_for_single_year_data, ) @@ -378,13 +381,28 @@ def test_add_ssi_recipient_targets_adds_total_and_age_counts(): ) -def test_legacy_cbo_ssi_target_uses_fiscal_year_outlays_variable(): +def test_ssi_payment_targets_scale_to_single_year_fiscal_year_coverage(): + assert get_ssi_fiscal_year_payment_count(2024) == 11 + assert get_ssi_single_year_available_payment_count(2024) == 9 + assert get_ssi_fiscal_year_payment_count(2025) == 12 + assert get_ssi_single_year_available_payment_count(2025) == 9 + assert get_ssi_fiscal_year_payment_count(2028) == 13 + assert get_ssi_single_year_available_payment_count(2028) == 10 + + assert scale_ssi_fiscal_year_target_for_single_year_data( + 57_000_000_000, 2024 + ) == pytest.approx(57_000_000_000 * 9 / 11) + assert scale_ssi_fiscal_year_target_for_single_year_data( + 75_400_000_000, 2028 + ) == pytest.approx(75_400_000_000 * 10 / 13) + + +def test_legacy_cbo_ssi_target_uses_single_year_fiscal_year_coverage(): sim = _FakeCBOProgramTargetSimulation() - assert ( - _cbo_program_target_value(sim, "ssi_federal_fiscal_year_outlays", 2024) - == 57_000_000_000 - ) + assert _cbo_program_target_value( + sim, "ssi_federal_fiscal_year_outlays", 2024 + ) == pytest.approx(57_000_000_000 * 9 / 11) assert _cbo_program_target_value(sim, "snap", 2024) == 1_000.0 diff --git a/tests/unit/calibration/test_validate_national_h5.py b/tests/unit/calibration/test_validate_national_h5.py index d35336e8b..2c74d03be 100644 --- a/tests/unit/calibration/test_validate_national_h5.py +++ b/tests/unit/calibration/test_validate_national_h5.py @@ -4,7 +4,6 @@ import pytest from policyengine_us_data.calibration.validate_national_h5 import ( - REFERENCES, VARIABLES, build_advance_ctc_agi_share_comparison, build_advance_ctc_filing_status_share_comparison, @@ -48,9 +47,8 @@ def test_reference_values_use_irs_ctc_component_targets(monkeypatch): 63_622_000.0, "IRS Pub. 4801 2022 63.6M", ) - assert "ssi_federal_fiscal_year_outlays" in REFERENCES - assert "ssi_federal_fiscal_year_outlays" in VARIABLES - assert "ssi" not in VARIABLES + assert "ssi" in VARIABLES + assert "ssi_federal_fiscal_year_outlays" not in VARIABLES def test_ctc_diagnostic_outputs_format_all_sections(monkeypatch): diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py index b6243bc89..a72d21d34 100644 --- a/tests/unit/test_etl_national_targets.py +++ b/tests/unit/test_etl_national_targets.py @@ -20,7 +20,7 @@ load_state_acs_rent_targets, ) from policyengine_us_data.utils.ssi_targets import ( - SSI_CBO_TARGET_SOURCE, + SSI_PAYMENT_TARGET_SOURCE, SSI_RECIPIENT_TARGETS_2024, ) @@ -490,9 +490,9 @@ class FakeTaxBenefitSystem: if target["variable"] == "ssi_federal_fiscal_year_outlays" ) - assert ssi_target["value"] == 57_000_000_000 - assert ssi_target["source"] == SSI_CBO_TARGET_SOURCE - assert "federal fiscal-year outlays" in ssi_target["notes"] + assert ssi_target["value"] == 57_000_000_000 * 9 / 11 + assert ssi_target["source"] == SSI_PAYMENT_TARGET_SOURCE + assert "single-year PolicyEngine-US-data H5" in ssi_target["notes"] def test_load_national_targets_deactivates_legacy_ssi_dollar_target( @@ -527,8 +527,8 @@ def test_load_national_targets_deactivates_legacy_ssi_dollar_target( [ { "variable": "ssi_federal_fiscal_year_outlays", - "value": 57_000_000_000, - "source": SSI_CBO_TARGET_SOURCE, + "value": 57_000_000_000 * 9 / 11, + "source": SSI_PAYMENT_TARGET_SOURCE, "notes": "CBO SSI federal fiscal-year outlays", "year": 2024, } @@ -550,7 +550,7 @@ def test_load_national_targets_deactivates_legacy_ssi_dollar_target( assert legacy_target.active is False assert "replaced this target concept" in legacy_target.notes assert new_target.active is True - assert new_target.value == 57_000_000_000 + assert new_target.value == 57_000_000_000 * 9 / 11 def test_load_national_targets_uses_medicaid_enrolled_for_enrollment_counts(