Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1125.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Write retirement contribution source data to desired pre-limit variables and target the plain PolicyEngine-US contribution outputs after statutory caps.
46 changes: 15 additions & 31 deletions policyengine_us_data/calibration/puf_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@
]

CPS_RETIREMENT_VARIABLES = [
"traditional_401k_contributions",
"roth_401k_contributions",
"traditional_ira_contributions",
"roth_ira_contributions",
"self_employed_pension_contributions",
"traditional_401k_contributions_desired",
"roth_401k_contributions_desired",
"traditional_ira_contributions_desired",
"roth_ira_contributions_desired",
"self_employed_pension_contributions_desired",
]

RETIREMENT_DEMOGRAPHIC_PREDICTORS = [
Expand Down Expand Up @@ -845,18 +845,9 @@ def _impute_retirement_contributions(
n_persons = len(data["person_id"][time_period])
return {var: np.zeros(n_persons) for var in CPS_RETIREMENT_VARIABLES}

# Extract results and apply constraints
limits = _get_retirement_limits(time_period)
age = X_test["age"].values
catch_up_eligible = age >= 50
limit_401k = limits["401k"] + catch_up_eligible * limits["401k_catch_up"]
limit_ira = limits["ira"] + catch_up_eligible * limits["ira_catch_up"]
# Extract results and apply data-domain constraints. Statutory limits
# are applied by PolicyEngine-US plain contribution variables.
se_income = X_test["self_employment_income"].values
se_pension_cap = np.minimum(
se_income * limits["se_pension_rate"],
limits["se_pension_dollar_limit"],
)

emp_income = X_test["employment_income"].values

result = {}
Expand All @@ -866,31 +857,24 @@ def _impute_retirement_contributions(
# Non-negativity
vals = np.maximum(vals, 0)

# Cap 401k at year-specific limit
# Zero out employment-based plans for records with no employment income.
if "401k" in var:
vals = np.minimum(vals, limit_401k)
# Zero out for records with no employment income
vals = np.where(emp_income > 0, vals, 0)

# Cap IRA at year-specific limit
if "ira" in var:
vals = np.minimum(vals, limit_ira)

# Cap SE pension at min(25% of SE income, dollar limit)
if var == "self_employed_pension_contributions":
vals = np.minimum(vals, se_pension_cap)
# Zero out self-employed plans for records with no self-employment income.
if var == "self_employed_pension_contributions_desired":
vals = np.where(se_income > 0, vals, 0)

result[var] = vals

logger.info(
"Imputed retirement contributions for PUF: "
"401k mean=$%.0f, IRA mean=$%.0f, SE pension mean=$%.0f",
result["traditional_401k_contributions"].mean()
+ result["roth_401k_contributions"].mean(),
result["traditional_ira_contributions"].mean()
+ result["roth_ira_contributions"].mean(),
result["self_employed_pension_contributions"].mean(),
result["traditional_401k_contributions_desired"].mean()
+ result["roth_401k_contributions_desired"].mean(),
result["traditional_ira_contributions_desired"].mean()
+ result["roth_ira_contributions_desired"].mean(),
result["self_employed_pension_contributions_desired"].mean(),
)

return result
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/calibration/target_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ include:
geo_level: national
- variable: roth_ira_contributions
geo_level: national
- variable: self_employed_pension_contribution_ald
- variable: self_employed_pension_contributions
geo_level: national

# === NATIONAL — IRS SOI domain-constrained dollar targets (restored: |rel_err| < 15%) ===
Expand Down
55 changes: 19 additions & 36 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,40 +1512,25 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
# nearly all of RETCB_VAL and left IRA contributions at $0.
#
# The proportional approach uses BEA/FRED and IRS SOI shares to
# split contributions into DC (401k) and IRA pools, then splits
# each pool into traditional/Roth using administrative fractions.
# See imputation_parameters.yaml for sources.
from policyengine_us_data.utils.retirement_limits import (
get_retirement_limits,
)

limits = get_retirement_limits(year)
LIMIT_401K = limits["401k"]
LIMIT_401K_CATCH_UP = limits["401k_catch_up"]
LIMIT_IRA = limits["ira"]
LIMIT_IRA_CATCH_UP = limits["ira_catch_up"]
CATCH_UP_AGE = 50
catch_up_eligible = person.A_AGE >= CATCH_UP_AGE
limit_401k = LIMIT_401K + catch_up_eligible * LIMIT_401K_CATCH_UP
limit_ira = LIMIT_IRA + catch_up_eligible * LIMIT_IRA_CATCH_UP

# split contributions into self-employed pension, DC (401k), and
# IRA pools, then splits each pool into traditional/Roth using
# administrative fractions. See imputation_parameters.yaml for
# sources.
retirement_contributions = person.RETCB_VAL
has_wages = person.WSAL_VAL > 0
has_se = person.SEMP_VAL > 0
has_earned_income = has_wages | has_se

# 1) Self-employed pension: cap at min(25% of SE income, dollar
# limit) so dual-income filers keep a remainder for 401(k)/IRA.
se_rate = p["se_pension_contribution_rate"]
se_dollar_cap = p["se_pension_contribution_dollar_limit"][year]
se_pension_cap = np.minimum(person.SEMP_VAL * se_rate, se_dollar_cap)
cps["self_employed_pension_contributions"] = np.where(
# 1) Self-employed pension: allocate a share without applying statutory
# limits. PolicyEngine-US applies those limits.
se_share = p["se_pension_share_of_retirement_contributions"]
cps["self_employed_pension_contributions_desired"] = np.where(
has_se,
np.minimum(retirement_contributions, se_pension_cap),
retirement_contributions * se_share,
0,
)
remaining = np.maximum(
retirement_contributions - cps["self_employed_pension_contributions"],
retirement_contributions - cps["self_employed_pension_contributions_desired"],
0,
)

Expand All @@ -1561,17 +1546,15 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
# earned income (including SE-only filers).
ira_pool = np.where(has_earned_income, remaining - dc_pool, 0)

# DC pool: split into traditional/Roth 401(k), cap at combined
# 401(k) limit.
dc_capped = np.minimum(dc_pool, limit_401k)
cps["traditional_401k_contributions"] = dc_capped * (1 - roth_dc_share)
cps["roth_401k_contributions"] = dc_capped * roth_dc_share

# IRA pool: split into traditional/Roth IRA, cap at combined
# IRA limit.
ira_capped = np.minimum(ira_pool, limit_ira)
cps["traditional_ira_contributions"] = ira_capped * trad_ira_share
cps["roth_ira_contributions"] = ira_capped * (1 - trad_ira_share)
# DC pool: split into desired traditional/Roth 401(k) contributions.
# The statutory elective deferral limit is applied in policyengine-us.
cps["traditional_401k_contributions_desired"] = dc_pool * (1 - roth_dc_share)
cps["roth_401k_contributions_desired"] = dc_pool * roth_dc_share

# IRA pool: split into desired traditional/Roth IRA contributions.
# The statutory IRA limit is applied in policyengine-us.
cps["traditional_ira_contributions_desired"] = ira_pool * trad_ira_share
cps["roth_ira_contributions_desired"] = ira_pool * (1 - trad_ira_share)
# Allocate capital gains into long-term and short-term based on aggregate split.
cps["long_term_capital_gains"] = person.CAP_VAL * (p["long_term_capgain_fraction"])
cps["short_term_capital_gains"] = person.CAP_VAL * (
Expand Down
62 changes: 21 additions & 41 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,6 @@
from policyengine_us_data.utils.dataset_validation import (
assert_no_computed_policyengine_us_variables_exported,
)
from policyengine_us_data.utils.retirement_limits import (
get_retirement_limits,
get_se_pension_limits,
)
from policyengine_us_data.utils.randomness import seeded_rng

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -154,11 +150,11 @@ def _supports_structural_mortgage_inputs() -> bool:
"taxable_sep_distributions",
"tax_exempt_sep_distributions",
# Retirement contributions
"traditional_401k_contributions",
"roth_401k_contributions",
"traditional_ira_contributions",
"roth_ira_contributions",
"self_employed_pension_contributions",
"traditional_401k_contributions_desired",
"roth_401k_contributions_desired",
"traditional_ira_contributions_desired",
"roth_ira_contributions_desired",
"self_employed_pension_contributions_desired",
# Social Security sub-components
"social_security_retirement",
"social_security_disability",
Expand Down Expand Up @@ -753,50 +749,34 @@ def _impute_cps_only_variables(


def apply_retirement_constraints(predictions, X_test, time_period):
"""Enforce IRS contribution limits on retirement variable predictions.
"""Clean retirement contribution predictions for data-domain eligibility.

Args:
predictions: DataFrame of QRF predictions for retirement
contribution variables.
X_test: DataFrame with at least ``age``,
``employment_income``, and ``self_employment_income``.
time_period: Tax year (int) for IRS limit look-up.
time_period: Tax year (int), accepted for API compatibility.

Returns:
DataFrame with constrained values (same columns).
DataFrame with cleaned values (same columns).
"""
limits = get_retirement_limits(time_period)
se_limits = get_se_pension_limits(time_period)

age = X_test["age"].values
catch_up = age >= 50
emp_income = X_test["employment_income"].values
se_income = X_test["self_employment_income"].values

limit_401k = limits["401k"] + catch_up * limits["401k_catch_up"]
limit_ira = limits["ira"] + catch_up * limits["ira_catch_up"]
se_pension_cap = np.minimum(
se_income * se_limits["se_pension_rate"],
se_limits["se_pension_dollar_limit"],
)

# Explicit mapping: variable -> (cap array, zero_mask or None).
# Explicit mapping: variable -> zero_mask or None. Statutory limits
# are applied by PolicyEngine-US plain contribution variables.
_CONSTRAINT_MAP = {
"traditional_401k_contributions": (limit_401k, emp_income == 0),
"roth_401k_contributions": (limit_401k, emp_income == 0),
"traditional_ira_contributions": (limit_ira, None),
"roth_ira_contributions": (limit_ira, None),
"self_employed_pension_contributions": (
se_pension_cap,
se_income == 0,
),
"traditional_401k_contributions_desired": emp_income == 0,
"roth_401k_contributions_desired": emp_income == 0,
"traditional_ira_contributions_desired": None,
"roth_ira_contributions_desired": None,
"self_employed_pension_contributions_desired": se_income == 0,
}

result = predictions.clip(lower=0)
for var in result.columns:
cap, zero_mask = _CONSTRAINT_MAP.get(var, (None, None))
if cap is not None:
result[var] = np.minimum(result[var].values, cap)
zero_mask = _CONSTRAINT_MAP.get(var)
if zero_mask is not None:
result.loc[zero_mask, var] = 0

Expand Down Expand Up @@ -836,11 +816,11 @@ def reconcile_ss_subcomponents(predictions, total_ss):


_RETIREMENT_VARS = {
"traditional_401k_contributions",
"roth_401k_contributions",
"traditional_ira_contributions",
"roth_ira_contributions",
"self_employed_pension_contributions",
"traditional_401k_contributions_desired",
"roth_401k_contributions_desired",
"traditional_ira_contributions_desired",
"roth_ira_contributions_desired",
"self_employed_pension_contributions_desired",
}

_SS_SUBCOMPONENT_VARS = {
Expand Down
15 changes: 12 additions & 3 deletions policyengine_us_data/datasets/cps/imputation_parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ long_term_capgain_fraction: 0.880
# Used to split CPS RETCB_VAL (a single bundled total) into
# account-type-specific variables.
#
# DC vs IRA share of non-SE retirement contributions.
# Self-employed pension share of retirement contributions.
# Self-employed pension: $30.13B (IRS SOI Publication 1304, Table 1.4,
# TY 2023, "Payments to a Keogh plan")
# Combined employee DC + IRA + self-employed pension: $655.53B
# Share: $30.13B / $655.53B = 4.6%
# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
se_pension_share_of_retirement_contributions: 0.046

# DC vs IRA share of remaining non-SE retirement contributions.
# Employee DC: $567.9B (BEA/FRED Y351RC1A027NBEA minus W351RC0A144NBEA)
# Total IRA: $57.5B (IRS SOI Tables 5 & 6, TY 2022)
# Combined: $625.4B
Expand All @@ -46,8 +54,9 @@ roth_share_of_dc_contributions: 0.15
# https://www.irs.gov/statistics/soi-tax-stats-accumulation-and-distribution-of-individual-retirement-arrangements
traditional_share_of_ira_contributions: 0.392

# SE pension contribution cap.
# SEP-IRA / Solo 401(k) contributions are capped at the lesser of
# SE pension statutory parameters retained for retirement-limit utilities.
# These are not used to reduce desired source contribution data.
# SEP-IRA / Solo 401(k) contributions are limited to the lesser of
# a percentage of net SE earnings and a dollar limit.
# The 25% rate is technically ~20% for sole proprietors after the
# deduction-for-half-of-SE-tax adjustment, but 25% is the standard
Expand Down
4 changes: 2 additions & 2 deletions policyengine_us_data/datasets/puf/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
puf["taxable_ira_distributions"] = puf.E01400
puf["tax_exempt_interest_income"] = puf.E00400
puf["tax_exempt_pension_income"] = puf.E01500 - puf.E01700
puf["traditional_ira_contributions"] = puf.E03150
puf["traditional_ira_contributions_desired"] = puf.E03150
puf["unrecaptured_section_1250_gain"] = puf.E24515

puf["foreign_tax_credit"] = puf.E07300
Expand Down Expand Up @@ -835,7 +835,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
"taxable_ira_distributions",
"tax_exempt_interest_income",
"tax_exempt_pension_income",
"traditional_ira_contributions",
"traditional_ira_contributions_desired",
"unrecaptured_section_1250_gain",
"foreign_tax_credit",
"amt_foreign_tax_credit",
Expand Down
8 changes: 4 additions & 4 deletions policyengine_us_data/db/etl_national_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,15 +659,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
"year": 2024,
},
{
"variable": "self_employed_pension_contribution_ald",
"variable": "self_employed_pension_contributions",
"value": RETIREMENT_CONTRIBUTION_TARGETS[
"self_employed_pension_contribution_ald"
"self_employed_pension_contributions"
]["value"],
"source": RETIREMENT_CONTRIBUTION_TARGETS[
"self_employed_pension_contribution_ald"
"self_employed_pension_contributions"
]["source"],
"notes": RETIREMENT_CONTRIBUTION_TARGETS[
"self_employed_pension_contribution_ald"
"self_employed_pension_contributions"
]["notes"],
"year": 2024,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
),
"source_year": 2023,
},
"self_employed_pension_contribution_ald": {
"self_employed_pension_contributions": {
"value": 30.130848e9,
"source": "https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income",
"notes": (
Expand Down
16 changes: 8 additions & 8 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@
#
# traditional_ira_contributions: IRS SOI Publication 1304, Table 1.4
# (TY 2023), "IRA payments" deduction — $13.77B (col DU, row
# "All returns, total"). This is the actual above-the-line
# deduction claimed on returns. The variable flows directly into
# the ALD with no deductibility logic in policyengine-us, so the
# "All returns, total"). This is the above-the-line deduction
# claimed on returns. The variable flows directly into the ALD
# with no deductibility logic in policyengine-us, so the
# target must match the deduction, not total contributions.
# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
"traditional_ira_contributions": RETIREMENT_CONTRIBUTION_TARGETS[
Expand All @@ -159,15 +159,15 @@
# https://corporate.vanguard.com/content/dam/corp/research/pdf/how_america_saves_report_2024.pdf
"traditional_401k_contributions": 482.7e9,
"roth_401k_contributions": 85.2e9,
# self_employed_pension_contribution_ald: IRS SOI Publication
# self_employed_pension_contributions: IRS SOI Publication
# 1304, Table 1.4 (TY 2023), "Payments to a Keogh plan" —
# $30.13B (col DM, row "All returns, total"). Includes
# SEP-IRAs, SIMPLE-IRAs, and traditional Keogh/HR-10 plans.
# Targeting the ALD (not the input) because policyengine-us
# applies a min(contributions, SE_income) cap.
# Targeting the contribution output because policyengine-us applies
# statutory limits before the ALD formula.
# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
"self_employed_pension_contribution_ald": RETIREMENT_CONTRIBUTION_TARGETS[
"self_employed_pension_contribution_ald"
"self_employed_pension_contributions": RETIREMENT_CONTRIBUTION_TARGETS[
"self_employed_pension_contributions"
]["value"],
# roth_ira_contributions: IRS SOI IRA Accumulation Tables 5 & 6
# (TY 2022, latest published). Total Roth IRA contributions =
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/utils/national_target_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"traditional_ira_contributions",
"traditional_401k_contributions",
"roth_401k_contributions",
"self_employed_pension_contribution_ald",
"self_employed_pension_contributions",
"roth_ira_contributions",
}

Expand Down
Loading