diff --git a/changelog.d/1125.changed.md b/changelog.d/1125.changed.md new file mode 100644 index 000000000..b1b5f446d --- /dev/null +++ b/changelog.d/1125.changed.md @@ -0,0 +1 @@ +Write retirement contribution source data to desired pre-limit variables and target the plain PolicyEngine-US contribution outputs after statutory caps. diff --git a/policyengine_us_data/calibration/puf_impute.py b/policyengine_us_data/calibration/puf_impute.py index f8dd58fa9..cda23d446 100644 --- a/policyengine_us_data/calibration/puf_impute.py +++ b/policyengine_us_data/calibration/puf_impute.py @@ -160,11 +160,11 @@ ] CPS_RETIREMENT_VARIABLES = [ - "traditional_401k_contributions", - "roth_401k_contributions", - "traditional_ira_contributions", - "roth_ira_contributions", - "self_employed_pension_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", ] RETIREMENT_DEMOGRAPHIC_PREDICTORS = [ @@ -845,18 +845,9 @@ def _impute_retirement_contributions( n_persons = len(data["person_id"][time_period]) return {var: np.zeros(n_persons) for var in CPS_RETIREMENT_VARIABLES} - # Extract results and apply constraints - limits = _get_retirement_limits(time_period) - age = X_test["age"].values - catch_up_eligible = age >= 50 - limit_401k = limits["401k"] + catch_up_eligible * limits["401k_catch_up"] - limit_ira = limits["ira"] + catch_up_eligible * limits["ira_catch_up"] + # Extract results and apply data-domain constraints. Statutory limits + # are applied by PolicyEngine-US plain contribution variables. se_income = X_test["self_employment_income"].values - se_pension_cap = np.minimum( - se_income * limits["se_pension_rate"], - limits["se_pension_dollar_limit"], - ) - emp_income = X_test["employment_income"].values result = {} @@ -866,19 +857,12 @@ def _impute_retirement_contributions( # Non-negativity vals = np.maximum(vals, 0) - # Cap 401k at year-specific limit + # Zero out employment-based plans for records with no employment income. if "401k" in var: - vals = np.minimum(vals, limit_401k) - # Zero out for records with no employment income vals = np.where(emp_income > 0, vals, 0) - # Cap IRA at year-specific limit - if "ira" in var: - vals = np.minimum(vals, limit_ira) - - # Cap SE pension at min(25% of SE income, dollar limit) - if var == "self_employed_pension_contributions": - vals = np.minimum(vals, se_pension_cap) + # Zero out self-employed plans for records with no self-employment income. + if var == "self_employed_pension_contributions_desired": vals = np.where(se_income > 0, vals, 0) result[var] = vals @@ -886,11 +870,11 @@ def _impute_retirement_contributions( logger.info( "Imputed retirement contributions for PUF: " "401k mean=$%.0f, IRA mean=$%.0f, SE pension mean=$%.0f", - result["traditional_401k_contributions"].mean() - + result["roth_401k_contributions"].mean(), - result["traditional_ira_contributions"].mean() - + result["roth_ira_contributions"].mean(), - result["self_employed_pension_contributions"].mean(), + result["traditional_401k_contributions_desired"].mean() + + result["roth_401k_contributions_desired"].mean(), + result["traditional_ira_contributions_desired"].mean() + + result["roth_ira_contributions_desired"].mean(), + result["self_employed_pension_contributions_desired"].mean(), ) return result diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index aae559ac8..69c413d66 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -240,7 +240,7 @@ include: geo_level: national - variable: roth_ira_contributions geo_level: national - - variable: self_employed_pension_contribution_ald + - variable: self_employed_pension_contributions geo_level: national # === NATIONAL — IRS SOI domain-constrained dollar targets (restored: |rel_err| < 15%) === diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index 2d6577a5b..033708bb1 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -1512,40 +1512,25 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): # nearly all of RETCB_VAL and left IRA contributions at $0. # # The proportional approach uses BEA/FRED and IRS SOI shares to - # split contributions into DC (401k) and IRA pools, then splits - # each pool into traditional/Roth using administrative fractions. - # See imputation_parameters.yaml for sources. - from policyengine_us_data.utils.retirement_limits import ( - get_retirement_limits, - ) - - limits = get_retirement_limits(year) - LIMIT_401K = limits["401k"] - LIMIT_401K_CATCH_UP = limits["401k_catch_up"] - LIMIT_IRA = limits["ira"] - LIMIT_IRA_CATCH_UP = limits["ira_catch_up"] - CATCH_UP_AGE = 50 - catch_up_eligible = person.A_AGE >= CATCH_UP_AGE - limit_401k = LIMIT_401K + catch_up_eligible * LIMIT_401K_CATCH_UP - limit_ira = LIMIT_IRA + catch_up_eligible * LIMIT_IRA_CATCH_UP - + # split contributions into self-employed pension, DC (401k), and + # IRA pools, then splits each pool into traditional/Roth using + # administrative fractions. See imputation_parameters.yaml for + # sources. retirement_contributions = person.RETCB_VAL has_wages = person.WSAL_VAL > 0 has_se = person.SEMP_VAL > 0 has_earned_income = has_wages | has_se - # 1) Self-employed pension: cap at min(25% of SE income, dollar - # limit) so dual-income filers keep a remainder for 401(k)/IRA. - se_rate = p["se_pension_contribution_rate"] - se_dollar_cap = p["se_pension_contribution_dollar_limit"][year] - se_pension_cap = np.minimum(person.SEMP_VAL * se_rate, se_dollar_cap) - cps["self_employed_pension_contributions"] = np.where( + # 1) Self-employed pension: allocate a share without applying statutory + # limits. PolicyEngine-US applies those limits. + se_share = p["se_pension_share_of_retirement_contributions"] + cps["self_employed_pension_contributions_desired"] = np.where( has_se, - np.minimum(retirement_contributions, se_pension_cap), + retirement_contributions * se_share, 0, ) remaining = np.maximum( - retirement_contributions - cps["self_employed_pension_contributions"], + retirement_contributions - cps["self_employed_pension_contributions_desired"], 0, ) @@ -1561,17 +1546,15 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): # earned income (including SE-only filers). ira_pool = np.where(has_earned_income, remaining - dc_pool, 0) - # DC pool: split into traditional/Roth 401(k), cap at combined - # 401(k) limit. - dc_capped = np.minimum(dc_pool, limit_401k) - cps["traditional_401k_contributions"] = dc_capped * (1 - roth_dc_share) - cps["roth_401k_contributions"] = dc_capped * roth_dc_share - - # IRA pool: split into traditional/Roth IRA, cap at combined - # IRA limit. - ira_capped = np.minimum(ira_pool, limit_ira) - cps["traditional_ira_contributions"] = ira_capped * trad_ira_share - cps["roth_ira_contributions"] = ira_capped * (1 - trad_ira_share) + # DC pool: split into desired traditional/Roth 401(k) contributions. + # The statutory elective deferral limit is applied in policyengine-us. + cps["traditional_401k_contributions_desired"] = dc_pool * (1 - roth_dc_share) + cps["roth_401k_contributions_desired"] = dc_pool * roth_dc_share + + # IRA pool: split into desired traditional/Roth IRA contributions. + # The statutory IRA limit is applied in policyengine-us. + cps["traditional_ira_contributions_desired"] = ira_pool * trad_ira_share + cps["roth_ira_contributions_desired"] = ira_pool * (1 - trad_ira_share) # Allocate capital gains into long-term and short-term based on aggregate split. cps["long_term_capital_gains"] = person.CAP_VAL * (p["long_term_capgain_fraction"]) cps["short_term_capital_gains"] = person.CAP_VAL * ( diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index e317d21a9..43fc3d5db 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -56,10 +56,6 @@ from policyengine_us_data.utils.dataset_validation import ( assert_no_computed_policyengine_us_variables_exported, ) -from policyengine_us_data.utils.retirement_limits import ( - get_retirement_limits, - get_se_pension_limits, -) from policyengine_us_data.utils.randomness import seeded_rng logger = logging.getLogger(__name__) @@ -154,11 +150,11 @@ def _supports_structural_mortgage_inputs() -> bool: "taxable_sep_distributions", "tax_exempt_sep_distributions", # Retirement contributions - "traditional_401k_contributions", - "roth_401k_contributions", - "traditional_ira_contributions", - "roth_ira_contributions", - "self_employed_pension_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", # Social Security sub-components "social_security_retirement", "social_security_disability", @@ -753,50 +749,34 @@ def _impute_cps_only_variables( def apply_retirement_constraints(predictions, X_test, time_period): - """Enforce IRS contribution limits on retirement variable predictions. + """Clean retirement contribution predictions for data-domain eligibility. Args: predictions: DataFrame of QRF predictions for retirement contribution variables. X_test: DataFrame with at least ``age``, ``employment_income``, and ``self_employment_income``. - time_period: Tax year (int) for IRS limit look-up. + time_period: Tax year (int), accepted for API compatibility. Returns: - DataFrame with constrained values (same columns). + DataFrame with cleaned values (same columns). """ - limits = get_retirement_limits(time_period) - se_limits = get_se_pension_limits(time_period) - - age = X_test["age"].values - catch_up = age >= 50 emp_income = X_test["employment_income"].values se_income = X_test["self_employment_income"].values - limit_401k = limits["401k"] + catch_up * limits["401k_catch_up"] - limit_ira = limits["ira"] + catch_up * limits["ira_catch_up"] - se_pension_cap = np.minimum( - se_income * se_limits["se_pension_rate"], - se_limits["se_pension_dollar_limit"], - ) - - # Explicit mapping: variable -> (cap array, zero_mask or None). + # Explicit mapping: variable -> zero_mask or None. Statutory limits + # are applied by PolicyEngine-US plain contribution variables. _CONSTRAINT_MAP = { - "traditional_401k_contributions": (limit_401k, emp_income == 0), - "roth_401k_contributions": (limit_401k, emp_income == 0), - "traditional_ira_contributions": (limit_ira, None), - "roth_ira_contributions": (limit_ira, None), - "self_employed_pension_contributions": ( - se_pension_cap, - se_income == 0, - ), + "traditional_401k_contributions_desired": emp_income == 0, + "roth_401k_contributions_desired": emp_income == 0, + "traditional_ira_contributions_desired": None, + "roth_ira_contributions_desired": None, + "self_employed_pension_contributions_desired": se_income == 0, } result = predictions.clip(lower=0) for var in result.columns: - cap, zero_mask = _CONSTRAINT_MAP.get(var, (None, None)) - if cap is not None: - result[var] = np.minimum(result[var].values, cap) + zero_mask = _CONSTRAINT_MAP.get(var) if zero_mask is not None: result.loc[zero_mask, var] = 0 @@ -836,11 +816,11 @@ def reconcile_ss_subcomponents(predictions, total_ss): _RETIREMENT_VARS = { - "traditional_401k_contributions", - "roth_401k_contributions", - "traditional_ira_contributions", - "roth_ira_contributions", - "self_employed_pension_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", } _SS_SUBCOMPONENT_VARS = { diff --git a/policyengine_us_data/datasets/cps/imputation_parameters.yaml b/policyengine_us_data/datasets/cps/imputation_parameters.yaml index f71fa4d14..71cbf67fc 100644 --- a/policyengine_us_data/datasets/cps/imputation_parameters.yaml +++ b/policyengine_us_data/datasets/cps/imputation_parameters.yaml @@ -21,7 +21,15 @@ long_term_capgain_fraction: 0.880 # Used to split CPS RETCB_VAL (a single bundled total) into # account-type-specific variables. # -# DC vs IRA share of non-SE retirement contributions. +# Self-employed pension share of retirement contributions. +# Self-employed pension: $30.13B (IRS SOI Publication 1304, Table 1.4, +# TY 2023, "Payments to a Keogh plan") +# Combined employee DC + IRA + self-employed pension: $655.53B +# Share: $30.13B / $655.53B = 4.6% +# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income +se_pension_share_of_retirement_contributions: 0.046 + +# DC vs IRA share of remaining non-SE retirement contributions. # Employee DC: $567.9B (BEA/FRED Y351RC1A027NBEA minus W351RC0A144NBEA) # Total IRA: $57.5B (IRS SOI Tables 5 & 6, TY 2022) # Combined: $625.4B @@ -46,8 +54,9 @@ roth_share_of_dc_contributions: 0.15 # https://www.irs.gov/statistics/soi-tax-stats-accumulation-and-distribution-of-individual-retirement-arrangements traditional_share_of_ira_contributions: 0.392 -# SE pension contribution cap. -# SEP-IRA / Solo 401(k) contributions are capped at the lesser of +# SE pension statutory parameters retained for retirement-limit utilities. +# These are not used to reduce desired source contribution data. +# SEP-IRA / Solo 401(k) contributions are limited to the lesser of # a percentage of net SE earnings and a dollar limit. # The 25% rate is technically ~20% for sole proprietors after the # deduction-for-half-of-SE-tax adjustment, but 25% is the standard diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index ff390420e..1aa43f2d0 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -692,7 +692,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: puf["taxable_ira_distributions"] = puf.E01400 puf["tax_exempt_interest_income"] = puf.E00400 puf["tax_exempt_pension_income"] = puf.E01500 - puf.E01700 - puf["traditional_ira_contributions"] = puf.E03150 + puf["traditional_ira_contributions_desired"] = puf.E03150 puf["unrecaptured_section_1250_gain"] = puf.E24515 puf["foreign_tax_credit"] = puf.E07300 @@ -835,7 +835,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: "taxable_ira_distributions", "tax_exempt_interest_income", "tax_exempt_pension_income", - "traditional_ira_contributions", + "traditional_ira_contributions_desired", "unrecaptured_section_1250_gain", "foreign_tax_credit", "amt_foreign_tax_credit", diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index d521928f9..f3af8e657 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -659,15 +659,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "year": 2024, }, { - "variable": "self_employed_pension_contribution_ald", + "variable": "self_employed_pension_contributions", "value": RETIREMENT_CONTRIBUTION_TARGETS[ - "self_employed_pension_contribution_ald" + "self_employed_pension_contributions" ]["value"], "source": RETIREMENT_CONTRIBUTION_TARGETS[ - "self_employed_pension_contribution_ald" + "self_employed_pension_contributions" ]["source"], "notes": RETIREMENT_CONTRIBUTION_TARGETS[ - "self_employed_pension_contribution_ald" + "self_employed_pension_contributions" ]["notes"], "year": 2024, }, diff --git a/policyengine_us_data/storage/calibration_targets/soi_metadata.py b/policyengine_us_data/storage/calibration_targets/soi_metadata.py index 5a5b6acf3..1c61bf1a7 100644 --- a/policyengine_us_data/storage/calibration_targets/soi_metadata.py +++ b/policyengine_us_data/storage/calibration_targets/soi_metadata.py @@ -14,7 +14,7 @@ ), "source_year": 2023, }, - "self_employed_pension_contribution_ald": { + "self_employed_pension_contributions": { "value": 30.130848e9, "source": "https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income", "notes": ( diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 32112eee0..84c620bd3 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -137,9 +137,9 @@ # # traditional_ira_contributions: IRS SOI Publication 1304, Table 1.4 # (TY 2023), "IRA payments" deduction — $13.77B (col DU, row - # "All returns, total"). This is the actual above-the-line - # deduction claimed on returns. The variable flows directly into - # the ALD with no deductibility logic in policyengine-us, so the + # "All returns, total"). This is the above-the-line deduction + # claimed on returns. The variable flows directly into the ALD + # with no deductibility logic in policyengine-us, so the # target must match the deduction, not total contributions. # https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income "traditional_ira_contributions": RETIREMENT_CONTRIBUTION_TARGETS[ @@ -159,15 +159,15 @@ # https://corporate.vanguard.com/content/dam/corp/research/pdf/how_america_saves_report_2024.pdf "traditional_401k_contributions": 482.7e9, "roth_401k_contributions": 85.2e9, - # self_employed_pension_contribution_ald: IRS SOI Publication + # self_employed_pension_contributions: IRS SOI Publication # 1304, Table 1.4 (TY 2023), "Payments to a Keogh plan" — # $30.13B (col DM, row "All returns, total"). Includes # SEP-IRAs, SIMPLE-IRAs, and traditional Keogh/HR-10 plans. - # Targeting the ALD (not the input) because policyengine-us - # applies a min(contributions, SE_income) cap. + # Targeting the contribution output because policyengine-us applies + # statutory limits before the ALD formula. # https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income - "self_employed_pension_contribution_ald": RETIREMENT_CONTRIBUTION_TARGETS[ - "self_employed_pension_contribution_ald" + "self_employed_pension_contributions": RETIREMENT_CONTRIBUTION_TARGETS[ + "self_employed_pension_contributions" ]["value"], # roth_ira_contributions: IRS SOI IRA Accumulation Tables 5 & 6 # (TY 2022, latest published). Total Roth IRA contributions = diff --git a/policyengine_us_data/utils/national_target_parity.py b/policyengine_us_data/utils/national_target_parity.py index d2494642f..00f0655d8 100644 --- a/policyengine_us_data/utils/national_target_parity.py +++ b/policyengine_us_data/utils/national_target_parity.py @@ -61,7 +61,7 @@ "traditional_ira_contributions", "traditional_401k_contributions", "roth_401k_contributions", - "self_employed_pension_contribution_ald", + "self_employed_pension_contributions", "roth_ira_contributions", } diff --git a/pyproject.toml b/pyproject.toml index fe45ef323..6302df0c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,9 +23,9 @@ classifiers = [ ] dependencies = [ # Temporary GitHub pin: policyengine-us 1.706.14 is blocked from PyPI by - # the project-size limit, but us-data needs the merged FLSA overtime - # constants and data-backed Medicaid cost input before the next PyPI - # release is available. + # the project-size limit, but us-data needs the merged desired retirement + # contribution variables, FLSA overtime constants, and data-backed + # Medicaid cost input before the next PyPI release is available. "policyengine-us @ git+https://github.com/PolicyEngine/policyengine-us.git@1da04a64dcdce26834b063d68daa835765a5d8ed", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost diff --git a/tests/unit/calibration/test_calibration_puf_impute.py b/tests/unit/calibration/test_calibration_puf_impute.py index b29914ca3..6f2f60561 100644 --- a/tests/unit/calibration/test_calibration_puf_impute.py +++ b/tests/unit/calibration/test_calibration_puf_impute.py @@ -384,13 +384,13 @@ def test_indices_sorted(self): assert np.all(idx[1:] >= idx[:-1]) -def test_retirement_imputation_caps_se_pension_using_sstb_income(monkeypatch): +def test_retirement_imputation_uses_sstb_income_for_se_eligibility(monkeypatch): class FakeMicrosimulation: def __init__(self, dataset): self.dataset = dataset def calculate_dataframe(self, columns): - if "self_employed_pension_contributions" in columns: + if "self_employed_pension_contributions_desired" in columns: return pd.DataFrame( { "age": [40, 55], @@ -406,11 +406,11 @@ def calculate_dataframe(self, columns): "qualified_dividend_income": [0.0, 0.0], "taxable_pension_income": [0.0, 0.0], "social_security": [0.0, 0.0], - "traditional_401k_contributions": [0.0, 0.0], - "roth_401k_contributions": [0.0, 0.0], - "traditional_ira_contributions": [0.0, 0.0], - "roth_ira_contributions": [0.0, 0.0], - "self_employed_pension_contributions": [0.0, 0.0], + "traditional_401k_contributions_desired": [0.0, 0.0], + "roth_401k_contributions_desired": [0.0, 0.0], + "traditional_ira_contributions_desired": [0.0, 0.0], + "roth_ira_contributions_desired": [0.0, 0.0], + "self_employed_pension_contributions_desired": [0.0, 0.0], } ) return pd.DataFrame( @@ -446,11 +446,11 @@ def fit_predict( ) return pd.DataFrame( { - "traditional_401k_contributions": [0.0, 0.0], - "roth_401k_contributions": [0.0, 0.0], - "traditional_ira_contributions": [0.0, 0.0], - "roth_ira_contributions": [0.0, 0.0], - "self_employed_pension_contributions": [50_000.0, 50_000.0], + "traditional_401k_contributions_desired": [0.0, 0.0], + "roth_401k_contributions_desired": [0.0, 0.0], + "traditional_ira_contributions_desired": [0.0, 0.0], + "roth_ira_contributions_desired": [0.0, 0.0], + "self_employed_pension_contributions_desired": [50_000.0, 50_000.0], } ) @@ -473,8 +473,8 @@ def fit_predict( ) np.testing.assert_array_equal( - result["self_employed_pension_contributions"], - np.array([25.0, 25.0]), + result["self_employed_pension_contributions_desired"], + np.array([50_000.0, 50_000.0]), ) diff --git a/tests/unit/calibration/test_retirement_imputation.py b/tests/unit/calibration/test_retirement_imputation.py index 5b635c792..29ec3103d 100644 --- a/tests/unit/calibration/test_retirement_imputation.py +++ b/tests/unit/calibration/test_retirement_imputation.py @@ -89,11 +89,11 @@ def _make_cps_df(n, rng): "taxable_pension_income": rng.uniform(0, 20_000, n), "social_security": rng.uniform(0, 15_000, n), # Targets - "traditional_401k_contributions": rng.uniform(0, 5000, n), - "roth_401k_contributions": rng.uniform(0, 3000, n), - "traditional_ira_contributions": rng.uniform(0, 2000, n), - "roth_ira_contributions": rng.uniform(0, 2000, n), - "self_employed_pension_contributions": rng.uniform(0, 10_000, n), + "traditional_401k_contributions_desired": rng.uniform(0, 5000, n), + "roth_401k_contributions_desired": rng.uniform(0, 3000, n), + "traditional_ira_contributions_desired": rng.uniform(0, 2000, n), + "roth_ira_contributions_desired": rng.uniform(0, 2000, n), + "self_employed_pension_contributions_desired": rng.uniform(0, 10_000, n), } ) @@ -142,11 +142,11 @@ def test_five_retirement_variables(self): def test_retirement_variable_names(self): expected = { - "traditional_401k_contributions", - "roth_401k_contributions", - "traditional_ira_contributions", - "roth_ira_contributions", - "self_employed_pension_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", } assert set(CPS_RETIREMENT_VARIABLES) == expected @@ -315,27 +315,28 @@ def test_nonnegative_output(self): for var in CPS_RETIREMENT_VARIABLES: assert np.all(result[var] >= 0), f"{var} has negative values" - def test_401k_capped(self): + def test_401k_preserves_desired_amounts(self): result = self._call_with_mocks(self._uniform_preds(50_000.0)) - lim = _get_retirement_limits(self.time_period) - max_401k = lim["401k"] + lim["401k_catch_up"] + pos_wage = self.puf_imputations["employment_income"] > 0 for var in ( - "traditional_401k_contributions", - "roth_401k_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", ): - assert np.all(result[var] <= max_401k), f"{var} exceeds 401k limit" + assert np.all(result[var][pos_wage] == 50_000.0), ( + f"{var} should preserve desired amounts for records with wages" + ) - def test_ira_capped(self): + def test_ira_preserves_desired_amounts(self): result = self._call_with_mocks(self._uniform_preds(50_000.0)) - lim = _get_retirement_limits(self.time_period) - max_ira = lim["ira"] + lim["ira_catch_up"] for var in ( - "traditional_ira_contributions", - "roth_ira_contributions", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", ): - assert np.all(result[var] <= max_ira), f"{var} exceeds IRA limit" + assert np.all(result[var] == 50_000.0), ( + f"{var} should preserve desired amounts" + ) def test_401k_zero_when_no_wages(self): result = self._call_with_mocks(self._uniform_preds(5_000.0)) @@ -343,8 +344,8 @@ def test_401k_zero_when_no_wages(self): assert zero_wage.sum() == 10 for var in ( - "traditional_401k_contributions", - "roth_401k_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", ): assert np.all(result[var][zero_wage] == 0), ( f"{var} should be 0 when employment_income is 0" @@ -354,10 +355,12 @@ def test_se_pension_zero_when_no_se_income(self): result = self._call_with_mocks(self._uniform_preds(5_000.0)) zero_se = self.puf_imputations["self_employment_income"] == 0 assert zero_se.sum() == 20 - assert np.all(result["self_employed_pension_contributions"][zero_se] == 0) + assert np.all( + result["self_employed_pension_contributions_desired"][zero_se] == 0 + ) - def test_catch_up_age_threshold(self): - """Records age >= 50 get higher caps than younger.""" + def test_401k_desired_does_not_apply_age_threshold(self): + """401(k) desired inputs do not apply age-based statutory limits here.""" self.cps_df["age"] = np.concatenate([np.full(25, 30.0), np.full(25, 55.0)]) # All have positive income self.puf_imputations["employment_income"] = np.full(self.n, 100_000.0).astype( @@ -369,26 +372,24 @@ def test_catch_up_age_threshold(self): result = self._call_with_mocks(self._uniform_preds(val)) - young_401k = result["traditional_401k_contributions"][:25] - old_401k = result["traditional_401k_contributions"][25:] + young_401k = result["traditional_401k_contributions_desired"][:25] + old_401k = result["traditional_401k_contributions_desired"][25:] - # Young capped at base limit - assert np.all(young_401k == lim["401k"]) - # Old get full value (within catch-up limit) + assert np.all(young_401k == val) assert np.all(old_401k == val) - def test_ira_catch_up_threshold(self): - """IRA catch-up also works for age >= 50.""" + def test_ira_desired_does_not_apply_age_threshold(self): + """IRA desired inputs do not apply age-based statutory limits here.""" self.cps_df["age"] = np.concatenate([np.full(25, 30.0), np.full(25, 55.0)]) lim = _get_retirement_limits(self.time_period) val = float(lim["ira"]) + 500 # 7500 result = self._call_with_mocks(self._uniform_preds(val)) - young_ira = result["traditional_ira_contributions"][:25] - old_ira = result["traditional_ira_contributions"][25:] + young_ira = result["traditional_ira_contributions_desired"][:25] + old_ira = result["traditional_ira_contributions_desired"][25:] - assert np.all(young_ira == lim["ira"]) + assert np.all(young_ira == val) assert np.all(old_ira == val) def test_401k_nonzero_for_positive_wages(self): @@ -397,31 +398,24 @@ def test_401k_nonzero_for_positive_wages(self): result = self._call_with_mocks(self._uniform_preds(5_000.0)) pos_wage = self.puf_imputations["employment_income"] > 0 for var in ( - "traditional_401k_contributions", - "roth_401k_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", ): assert np.all(result[var][pos_wage] > 0) def test_se_pension_nonzero_for_positive_se(self): result = self._call_with_mocks(self._uniform_preds(5_000.0)) pos_se = self.puf_imputations["self_employment_income"] > 0 - assert np.all(result["self_employed_pension_contributions"][pos_se] > 0) + assert np.all(result["self_employed_pension_contributions_desired"][pos_se] > 0) - def test_se_pension_capped_at_rate_times_income(self): - """SE pension should not exceed 25% of SE income.""" - # Predict a large value that would exceed the SE cap + def test_se_pension_preserves_desired_amounts(self): + """SE pension desired inputs preserve source amounts when SE income exists.""" result = self._call_with_mocks(self._uniform_preds(50_000.0)) - lim = _get_retirement_limits(self.time_period) se_income = self.puf_imputations["self_employment_income"] - se_cap = np.minimum( - se_income * lim["se_pension_rate"], - lim["se_pension_dollar_limit"], - ) pos_se = se_income > 0 assert np.all( - result["self_employed_pension_contributions"][pos_se] - <= se_cap[pos_se] + 0.01 - ), "SE pension exceeds 25%-of-income cap" + result["self_employed_pension_contributions_desired"][pos_se] == 50_000.0 + ) def test_qrf_failure_returns_zeros(self): """When QRF fit/predict throws, should return all zeros.""" diff --git a/tests/unit/datasets/test_cps_income_variables.py b/tests/unit/datasets/test_cps_income_variables.py index 835117bde..d9bc8690e 100644 --- a/tests/unit/datasets/test_cps_income_variables.py +++ b/tests/unit/datasets/test_cps_income_variables.py @@ -103,6 +103,21 @@ def test_add_personal_income_variables_maps_spm_income_leaves(): np.testing.assert_array_equal(cps["survivor_benefits"], [30.0, 31.0, 32.0]) +def test_retirement_contributions_write_desired_without_se_rate_cap(): + person = _minimal_person_income_frame() + person["SEMP_VAL"] = [100.0, 0.0] + person["WSAL_VAL"] = [0.0, 100_000.0] + person["RETCB_VAL"] = [100_000.0, 100_000.0] + cps = {} + + add_personal_income_variables(cps, person, 2024) + + assert cps["self_employed_pension_contributions_desired"][0] > 100 * 0.25 + assert cps["self_employed_pension_contributions_desired"][1] == 0 + assert cps["traditional_ira_contributions_desired"][0] > 0 + assert cps["traditional_401k_contributions_desired"][1] > 0 + + def test_derive_flsa_overtime_premium_uses_wage_share_and_exemption_screen(): premium = derive_flsa_overtime_premium( time_period=2024, diff --git a/tests/unit/test_extended_cps.py b/tests/unit/test_extended_cps.py index f97c3a761..0f2fbd85e 100644 --- a/tests/unit/test_extended_cps.py +++ b/tests/unit/test_extended_cps.py @@ -4,7 +4,7 @@ 1. Sequential QRF preserves covariance between imputed variables 2. CPS-only imputation uses PUF-imputed income (not CPS originals) 3. Variable lists don't overlap (no double-imputation) -4. Post-processing constraints enforce IRS caps and SS normalization +4. Post-processing constraints clean retirement inputs and normalize SS """ from contextlib import contextmanager @@ -137,23 +137,40 @@ def test_stage2_uses_esi_coverage_predictor(self): def test_cps_only_vars_mostly_exist_in_tbs(self): """Most CPS-only variables should exist in policyengine-us.""" + from importlib.metadata import version + + from packaging.version import Version from policyengine_us import CountryTaxBenefitSystem tbs = CountryTaxBenefitSystem() - valid = [v for v in CPS_ONLY_IMPUTED_VARIABLES if v in tbs.variables] - assert len(valid) >= len(CPS_ONLY_IMPUTED_VARIABLES) * 0.9, ( - f"Only {len(valid)}/{len(CPS_ONLY_IMPUTED_VARIABLES)} " + pending_policyengine_us_release = set() + if Version(version("policyengine-us")) < Version("1.706.3"): + pending_policyengine_us_release = { + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", + } + checked_variables = [ + v + for v in CPS_ONLY_IMPUTED_VARIABLES + if v in tbs.variables or v not in pending_policyengine_us_release + ] + valid = [v for v in checked_variables if v in tbs.variables] + assert len(valid) >= len(checked_variables) * 0.9, ( + f"Only {len(valid)}/{len(checked_variables)} " f"CPS-only vars exist in tax-benefit system" ) def test_retirement_contributions_in_cps_only(self): """All 5 retirement contribution vars should be in CPS_ONLY.""" expected = { - "traditional_401k_contributions", - "roth_401k_contributions", - "traditional_ira_contributions", - "roth_ira_contributions", - "self_employed_pension_contributions", + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", } missing = expected - set(CPS_ONLY_IMPUTED_VARIABLES) assert missing == set(), ( @@ -1052,17 +1069,35 @@ def test_zeroes_esi_premiums_for_non_policyholder_clone_records(self): class TestRetirementConstraints: - """Post-processing retirement constraints enforce IRS caps.""" + """Post-processing retirement constraints clean retirement predictions.""" @pytest.fixture def sample_predictions(self): return pd.DataFrame( { - "traditional_401k_contributions": [25000, -500, 5000, 10000, 3000], - "roth_401k_contributions": [30000, 2000, 0, 50000, 1000], - "traditional_ira_contributions": [8000, -100, 3000, 15000, 500], - "roth_ira_contributions": [10000, 1000, 0, 20000, 200], - "self_employed_pension_contributions": [80000, -200, 5000, 0, 100000], + "traditional_401k_contributions_desired": [ + 25000, + -500, + 5000, + 10000, + 3000, + ], + "roth_401k_contributions_desired": [30000, 2000, 0, 50000, 1000], + "traditional_ira_contributions_desired": [ + 8000, + -100, + 3000, + 15000, + 500, + ], + "roth_ira_contributions_desired": [10000, 1000, 0, 20000, 200], + "self_employed_pension_contributions_desired": [ + 80000, + -200, + 5000, + 0, + 100000, + ], } ) @@ -1081,44 +1116,53 @@ def test_non_negativity(self, sample_predictions, sample_features): for var in result.columns: assert (result[var] >= 0).all(), f"{var} has negative values" - def test_401k_capped_at_limit(self, sample_predictions, sample_features): + def test_401k_preserves_desired_amounts_above_limit( + self, sample_predictions, sample_features + ): result = apply_retirement_constraints(sample_predictions, sample_features, 2024) - from policyengine_us_data.utils.retirement_limits import get_retirement_limits - - limits = get_retirement_limits(2024) - age = sample_features["age"].values - catch_up = age >= 50 - cap = limits["401k"] + catch_up * limits["401k_catch_up"] - for var in ["traditional_401k_contributions", "roth_401k_contributions"]: - assert (result[var].values <= cap).all(), f"{var} exceeds 401k cap" + np.testing.assert_allclose( + result["traditional_401k_contributions_desired"].to_numpy(), + np.array([25000, 0, 0, 10000, 3000]), + ) + np.testing.assert_allclose( + result["roth_401k_contributions_desired"].to_numpy(), + np.array([30000, 2000, 0, 50000, 1000]), + ) - def test_ira_capped_at_limit(self, sample_predictions, sample_features): + def test_ira_preserves_desired_amounts_above_limit( + self, sample_predictions, sample_features + ): result = apply_retirement_constraints(sample_predictions, sample_features, 2024) - from policyengine_us_data.utils.retirement_limits import get_retirement_limits - - limits = get_retirement_limits(2024) - age = sample_features["age"].values - catch_up = age >= 50 - cap = limits["ira"] + catch_up * limits["ira_catch_up"] - for var in ["traditional_ira_contributions", "roth_ira_contributions"]: - assert (result[var].values <= cap).all(), f"{var} exceeds IRA cap" + np.testing.assert_allclose( + result["traditional_ira_contributions_desired"].to_numpy(), + np.array([8000, 0, 3000, 15000, 500]), + ) + np.testing.assert_allclose( + result["roth_ira_contributions_desired"].to_numpy(), + np.array([10000, 1000, 0, 20000, 200]), + ) def test_401k_zeroed_without_employment_income( self, sample_predictions, sample_features ): result = apply_retirement_constraints(sample_predictions, sample_features, 2024) no_emp = sample_features["employment_income"] == 0 - for var in ["traditional_401k_contributions", "roth_401k_contributions"]: + for var in [ + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + ]: assert (result[var].values[no_emp] == 0).all(), ( f"{var} should be zero without employment income" ) - def test_se_pension_capped(self, sample_predictions, sample_features): + def test_se_pension_preserves_desired_amounts( + self, sample_predictions, sample_features + ): result = apply_retirement_constraints(sample_predictions, sample_features, 2024) - se_income = sample_features["self_employment_income"].values - se_vals = result["self_employed_pension_contributions"].values - rate_cap = se_income * 0.25 - assert (se_vals <= rate_cap + 1).all(), "SE pension exceeds 25% of SE income" + np.testing.assert_allclose( + result["self_employed_pension_contributions_desired"].to_numpy(), + np.array([0, 0, 5000, 0, 100000]), + ) def test_se_pension_zeroed_without_se_income( self, sample_predictions, sample_features @@ -1126,7 +1170,7 @@ def test_se_pension_zeroed_without_se_income( result = apply_retirement_constraints(sample_predictions, sample_features, 2024) no_se = sample_features["self_employment_income"] == 0 assert ( - result["self_employed_pension_contributions"].values[no_se] == 0 + result["self_employed_pension_contributions_desired"].values[no_se] == 0 ).all(), "SE pension should be zero without SE income" diff --git a/tests/unit/test_income_target_mappings.py b/tests/unit/test_income_target_mappings.py index 17217f6e6..b8031d531 100644 --- a/tests/unit/test_income_target_mappings.py +++ b/tests/unit/test_income_target_mappings.py @@ -1,9 +1,27 @@ +from importlib.metadata import version + +from packaging.version import Version +from policyengine_us import CountryTaxBenefitSystem + import policyengine_us_data.db.etl_national_targets as etl_national_targets import policyengine_us_data.utils.loss as loss from policyengine_us_data.calibration.unified_calibration import load_target_config TARGET_CONFIG_PATH = "policyengine_us_data/calibration/target_config.yaml" +RETIREMENT_VARIABLE_RELEASE = Version("1.706.4") +REQUIRED_RETIREMENT_POLICYENGINE_US_VARIABLES = { + "traditional_401k_contributions_desired", + "roth_401k_contributions_desired", + "traditional_ira_contributions_desired", + "roth_ira_contributions_desired", + "self_employed_pension_contributions_desired", + "traditional_401k_contributions", + "roth_401k_contributions", + "traditional_ira_contributions", + "roth_ira_contributions", + "self_employed_pension_contributions", +} def _target_config_include_entries(): @@ -83,3 +101,48 @@ def test_bea_nipa_direct_sum_targets_are_in_default_target_config(): } assert expected_entries <= include_entries + + +def test_retirement_calibration_targets_use_contribution_outputs(): + include_entries = _target_config_include_entries() + expected_entries = { + ("traditional_401k_contributions", "national", None), + ("roth_401k_contributions", "national", None), + ("traditional_ira_contributions", "national", None), + ("roth_ira_contributions", "national", None), + ("self_employed_pension_contributions", "national", None), + } + + assert expected_entries <= include_entries + assert expected_entries <= { + (variable, "national", None) for variable in loss.HARD_CODED_TOTALS + } + + direct_sum_targets = { + target["variable"] + for target in etl_national_targets.extract_national_targets(year=2024)[ + "direct_sum_targets" + ] + } + assert { + "traditional_401k_contributions", + "roth_401k_contributions", + "traditional_ira_contributions", + "roth_ira_contributions", + "self_employed_pension_contributions", + } <= direct_sum_targets + + +def test_retirement_policyengine_us_variables_exist_after_release(): + tbs = CountryTaxBenefitSystem() + missing = REQUIRED_RETIREMENT_POLICYENGINE_US_VARIABLES - set(tbs.variables) + installed_version = Version(version("policyengine-us")) + + if installed_version < RETIREMENT_VARIABLE_RELEASE: + assert missing, ( + "Remove the temporary retirement variable release gate after " + "policyengine-us is bumped." + ) + return + + assert missing == set()