Skip to content

Commit 7c76aad

Browse files
authored
Merge pull request #1125 from PolicyEngine/codex/ira-contributions-desired
Write desired retirement contribution inputs
2 parents 9980b41 + 1cee973 commit 7c76aad

17 files changed

Lines changed: 309 additions & 236 deletions

changelog.d/1125.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Write retirement contribution source data to desired pre-limit variables and target the plain PolicyEngine-US contribution outputs after statutory caps.

policyengine_us_data/calibration/puf_impute.py

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,11 @@
160160
]
161161

162162
CPS_RETIREMENT_VARIABLES = [
163-
"traditional_401k_contributions",
164-
"roth_401k_contributions",
165-
"traditional_ira_contributions",
166-
"roth_ira_contributions",
167-
"self_employed_pension_contributions",
163+
"traditional_401k_contributions_desired",
164+
"roth_401k_contributions_desired",
165+
"traditional_ira_contributions_desired",
166+
"roth_ira_contributions_desired",
167+
"self_employed_pension_contributions_desired",
168168
]
169169

170170
RETIREMENT_DEMOGRAPHIC_PREDICTORS = [
@@ -845,18 +845,9 @@ def _impute_retirement_contributions(
845845
n_persons = len(data["person_id"][time_period])
846846
return {var: np.zeros(n_persons) for var in CPS_RETIREMENT_VARIABLES}
847847

848-
# Extract results and apply constraints
849-
limits = _get_retirement_limits(time_period)
850-
age = X_test["age"].values
851-
catch_up_eligible = age >= 50
852-
limit_401k = limits["401k"] + catch_up_eligible * limits["401k_catch_up"]
853-
limit_ira = limits["ira"] + catch_up_eligible * limits["ira_catch_up"]
848+
# Extract results and apply data-domain constraints. Statutory limits
849+
# are applied by PolicyEngine-US plain contribution variables.
854850
se_income = X_test["self_employment_income"].values
855-
se_pension_cap = np.minimum(
856-
se_income * limits["se_pension_rate"],
857-
limits["se_pension_dollar_limit"],
858-
)
859-
860851
emp_income = X_test["employment_income"].values
861852

862853
result = {}
@@ -866,31 +857,24 @@ def _impute_retirement_contributions(
866857
# Non-negativity
867858
vals = np.maximum(vals, 0)
868859

869-
# Cap 401k at year-specific limit
860+
# Zero out employment-based plans for records with no employment income.
870861
if "401k" in var:
871-
vals = np.minimum(vals, limit_401k)
872-
# Zero out for records with no employment income
873862
vals = np.where(emp_income > 0, vals, 0)
874863

875-
# Cap IRA at year-specific limit
876-
if "ira" in var:
877-
vals = np.minimum(vals, limit_ira)
878-
879-
# Cap SE pension at min(25% of SE income, dollar limit)
880-
if var == "self_employed_pension_contributions":
881-
vals = np.minimum(vals, se_pension_cap)
864+
# Zero out self-employed plans for records with no self-employment income.
865+
if var == "self_employed_pension_contributions_desired":
882866
vals = np.where(se_income > 0, vals, 0)
883867

884868
result[var] = vals
885869

886870
logger.info(
887871
"Imputed retirement contributions for PUF: "
888872
"401k mean=$%.0f, IRA mean=$%.0f, SE pension mean=$%.0f",
889-
result["traditional_401k_contributions"].mean()
890-
+ result["roth_401k_contributions"].mean(),
891-
result["traditional_ira_contributions"].mean()
892-
+ result["roth_ira_contributions"].mean(),
893-
result["self_employed_pension_contributions"].mean(),
873+
result["traditional_401k_contributions_desired"].mean()
874+
+ result["roth_401k_contributions_desired"].mean(),
875+
result["traditional_ira_contributions_desired"].mean()
876+
+ result["roth_ira_contributions_desired"].mean(),
877+
result["self_employed_pension_contributions_desired"].mean(),
894878
)
895879

896880
return result

policyengine_us_data/calibration/target_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ include:
240240
geo_level: national
241241
- variable: roth_ira_contributions
242242
geo_level: national
243-
- variable: self_employed_pension_contribution_ald
243+
- variable: self_employed_pension_contributions
244244
geo_level: national
245245

246246
# === NATIONAL — IRS SOI domain-constrained dollar targets (restored: |rel_err| < 15%) ===

policyengine_us_data/datasets/cps/cps.py

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,40 +1512,25 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
15121512
# nearly all of RETCB_VAL and left IRA contributions at $0.
15131513
#
15141514
# The proportional approach uses BEA/FRED and IRS SOI shares to
1515-
# split contributions into DC (401k) and IRA pools, then splits
1516-
# each pool into traditional/Roth using administrative fractions.
1517-
# See imputation_parameters.yaml for sources.
1518-
from policyengine_us_data.utils.retirement_limits import (
1519-
get_retirement_limits,
1520-
)
1521-
1522-
limits = get_retirement_limits(year)
1523-
LIMIT_401K = limits["401k"]
1524-
LIMIT_401K_CATCH_UP = limits["401k_catch_up"]
1525-
LIMIT_IRA = limits["ira"]
1526-
LIMIT_IRA_CATCH_UP = limits["ira_catch_up"]
1527-
CATCH_UP_AGE = 50
1528-
catch_up_eligible = person.A_AGE >= CATCH_UP_AGE
1529-
limit_401k = LIMIT_401K + catch_up_eligible * LIMIT_401K_CATCH_UP
1530-
limit_ira = LIMIT_IRA + catch_up_eligible * LIMIT_IRA_CATCH_UP
1531-
1515+
# split contributions into self-employed pension, DC (401k), and
1516+
# IRA pools, then splits each pool into traditional/Roth using
1517+
# administrative fractions. See imputation_parameters.yaml for
1518+
# sources.
15321519
retirement_contributions = person.RETCB_VAL
15331520
has_wages = person.WSAL_VAL > 0
15341521
has_se = person.SEMP_VAL > 0
15351522
has_earned_income = has_wages | has_se
15361523

1537-
# 1) Self-employed pension: cap at min(25% of SE income, dollar
1538-
# limit) so dual-income filers keep a remainder for 401(k)/IRA.
1539-
se_rate = p["se_pension_contribution_rate"]
1540-
se_dollar_cap = p["se_pension_contribution_dollar_limit"][year]
1541-
se_pension_cap = np.minimum(person.SEMP_VAL * se_rate, se_dollar_cap)
1542-
cps["self_employed_pension_contributions"] = np.where(
1524+
# 1) Self-employed pension: allocate a share without applying statutory
1525+
# limits. PolicyEngine-US applies those limits.
1526+
se_share = p["se_pension_share_of_retirement_contributions"]
1527+
cps["self_employed_pension_contributions_desired"] = np.where(
15431528
has_se,
1544-
np.minimum(retirement_contributions, se_pension_cap),
1529+
retirement_contributions * se_share,
15451530
0,
15461531
)
15471532
remaining = np.maximum(
1548-
retirement_contributions - cps["self_employed_pension_contributions"],
1533+
retirement_contributions - cps["self_employed_pension_contributions_desired"],
15491534
0,
15501535
)
15511536

@@ -1561,17 +1546,15 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
15611546
# earned income (including SE-only filers).
15621547
ira_pool = np.where(has_earned_income, remaining - dc_pool, 0)
15631548

1564-
# DC pool: split into traditional/Roth 401(k), cap at combined
1565-
# 401(k) limit.
1566-
dc_capped = np.minimum(dc_pool, limit_401k)
1567-
cps["traditional_401k_contributions"] = dc_capped * (1 - roth_dc_share)
1568-
cps["roth_401k_contributions"] = dc_capped * roth_dc_share
1569-
1570-
# IRA pool: split into traditional/Roth IRA, cap at combined
1571-
# IRA limit.
1572-
ira_capped = np.minimum(ira_pool, limit_ira)
1573-
cps["traditional_ira_contributions"] = ira_capped * trad_ira_share
1574-
cps["roth_ira_contributions"] = ira_capped * (1 - trad_ira_share)
1549+
# DC pool: split into desired traditional/Roth 401(k) contributions.
1550+
# The statutory elective deferral limit is applied in policyengine-us.
1551+
cps["traditional_401k_contributions_desired"] = dc_pool * (1 - roth_dc_share)
1552+
cps["roth_401k_contributions_desired"] = dc_pool * roth_dc_share
1553+
1554+
# IRA pool: split into desired traditional/Roth IRA contributions.
1555+
# The statutory IRA limit is applied in policyengine-us.
1556+
cps["traditional_ira_contributions_desired"] = ira_pool * trad_ira_share
1557+
cps["roth_ira_contributions_desired"] = ira_pool * (1 - trad_ira_share)
15751558
# Allocate capital gains into long-term and short-term based on aggregate split.
15761559
cps["long_term_capital_gains"] = person.CAP_VAL * (p["long_term_capgain_fraction"])
15771560
cps["short_term_capital_gains"] = person.CAP_VAL * (

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 21 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@
5656
from policyengine_us_data.utils.dataset_validation import (
5757
assert_no_computed_policyengine_us_variables_exported,
5858
)
59-
from policyengine_us_data.utils.retirement_limits import (
60-
get_retirement_limits,
61-
get_se_pension_limits,
62-
)
6359
from policyengine_us_data.utils.randomness import seeded_rng
6460

6561
logger = logging.getLogger(__name__)
@@ -154,11 +150,11 @@ def _supports_structural_mortgage_inputs() -> bool:
154150
"taxable_sep_distributions",
155151
"tax_exempt_sep_distributions",
156152
# Retirement contributions
157-
"traditional_401k_contributions",
158-
"roth_401k_contributions",
159-
"traditional_ira_contributions",
160-
"roth_ira_contributions",
161-
"self_employed_pension_contributions",
153+
"traditional_401k_contributions_desired",
154+
"roth_401k_contributions_desired",
155+
"traditional_ira_contributions_desired",
156+
"roth_ira_contributions_desired",
157+
"self_employed_pension_contributions_desired",
162158
# Social Security sub-components
163159
"social_security_retirement",
164160
"social_security_disability",
@@ -753,50 +749,34 @@ def _impute_cps_only_variables(
753749

754750

755751
def apply_retirement_constraints(predictions, X_test, time_period):
756-
"""Enforce IRS contribution limits on retirement variable predictions.
752+
"""Clean retirement contribution predictions for data-domain eligibility.
757753
758754
Args:
759755
predictions: DataFrame of QRF predictions for retirement
760756
contribution variables.
761757
X_test: DataFrame with at least ``age``,
762758
``employment_income``, and ``self_employment_income``.
763-
time_period: Tax year (int) for IRS limit look-up.
759+
time_period: Tax year (int), accepted for API compatibility.
764760
765761
Returns:
766-
DataFrame with constrained values (same columns).
762+
DataFrame with cleaned values (same columns).
767763
"""
768-
limits = get_retirement_limits(time_period)
769-
se_limits = get_se_pension_limits(time_period)
770-
771-
age = X_test["age"].values
772-
catch_up = age >= 50
773764
emp_income = X_test["employment_income"].values
774765
se_income = X_test["self_employment_income"].values
775766

776-
limit_401k = limits["401k"] + catch_up * limits["401k_catch_up"]
777-
limit_ira = limits["ira"] + catch_up * limits["ira_catch_up"]
778-
se_pension_cap = np.minimum(
779-
se_income * se_limits["se_pension_rate"],
780-
se_limits["se_pension_dollar_limit"],
781-
)
782-
783-
# Explicit mapping: variable -> (cap array, zero_mask or None).
767+
# Explicit mapping: variable -> zero_mask or None. Statutory limits
768+
# are applied by PolicyEngine-US plain contribution variables.
784769
_CONSTRAINT_MAP = {
785-
"traditional_401k_contributions": (limit_401k, emp_income == 0),
786-
"roth_401k_contributions": (limit_401k, emp_income == 0),
787-
"traditional_ira_contributions": (limit_ira, None),
788-
"roth_ira_contributions": (limit_ira, None),
789-
"self_employed_pension_contributions": (
790-
se_pension_cap,
791-
se_income == 0,
792-
),
770+
"traditional_401k_contributions_desired": emp_income == 0,
771+
"roth_401k_contributions_desired": emp_income == 0,
772+
"traditional_ira_contributions_desired": None,
773+
"roth_ira_contributions_desired": None,
774+
"self_employed_pension_contributions_desired": se_income == 0,
793775
}
794776

795777
result = predictions.clip(lower=0)
796778
for var in result.columns:
797-
cap, zero_mask = _CONSTRAINT_MAP.get(var, (None, None))
798-
if cap is not None:
799-
result[var] = np.minimum(result[var].values, cap)
779+
zero_mask = _CONSTRAINT_MAP.get(var)
800780
if zero_mask is not None:
801781
result.loc[zero_mask, var] = 0
802782

@@ -836,11 +816,11 @@ def reconcile_ss_subcomponents(predictions, total_ss):
836816

837817

838818
_RETIREMENT_VARS = {
839-
"traditional_401k_contributions",
840-
"roth_401k_contributions",
841-
"traditional_ira_contributions",
842-
"roth_ira_contributions",
843-
"self_employed_pension_contributions",
819+
"traditional_401k_contributions_desired",
820+
"roth_401k_contributions_desired",
821+
"traditional_ira_contributions_desired",
822+
"roth_ira_contributions_desired",
823+
"self_employed_pension_contributions_desired",
844824
}
845825

846826
_SS_SUBCOMPONENT_VARS = {

policyengine_us_data/datasets/cps/imputation_parameters.yaml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@ long_term_capgain_fraction: 0.880
2121
# Used to split CPS RETCB_VAL (a single bundled total) into
2222
# account-type-specific variables.
2323
#
24-
# DC vs IRA share of non-SE retirement contributions.
24+
# Self-employed pension share of retirement contributions.
25+
# Self-employed pension: $30.13B (IRS SOI Publication 1304, Table 1.4,
26+
# TY 2023, "Payments to a Keogh plan")
27+
# Combined employee DC + IRA + self-employed pension: $655.53B
28+
# Share: $30.13B / $655.53B = 4.6%
29+
# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
30+
se_pension_share_of_retirement_contributions: 0.046
31+
32+
# DC vs IRA share of remaining non-SE retirement contributions.
2533
# Employee DC: $567.9B (BEA/FRED Y351RC1A027NBEA minus W351RC0A144NBEA)
2634
# Total IRA: $57.5B (IRS SOI Tables 5 & 6, TY 2022)
2735
# Combined: $625.4B
@@ -46,8 +54,9 @@ roth_share_of_dc_contributions: 0.15
4654
# https://www.irs.gov/statistics/soi-tax-stats-accumulation-and-distribution-of-individual-retirement-arrangements
4755
traditional_share_of_ira_contributions: 0.392
4856

49-
# SE pension contribution cap.
50-
# SEP-IRA / Solo 401(k) contributions are capped at the lesser of
57+
# SE pension statutory parameters retained for retirement-limit utilities.
58+
# These are not used to reduce desired source contribution data.
59+
# SEP-IRA / Solo 401(k) contributions are limited to the lesser of
5160
# a percentage of net SE earnings and a dollar limit.
5261
# The 25% rate is technically ~20% for sole proprietors after the
5362
# deduction-for-half-of-SE-tax adjustment, but 25% is the standard

policyengine_us_data/datasets/puf/puf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
692692
puf["taxable_ira_distributions"] = puf.E01400
693693
puf["tax_exempt_interest_income"] = puf.E00400
694694
puf["tax_exempt_pension_income"] = puf.E01500 - puf.E01700
695-
puf["traditional_ira_contributions"] = puf.E03150
695+
puf["traditional_ira_contributions_desired"] = puf.E03150
696696
puf["unrecaptured_section_1250_gain"] = puf.E24515
697697

698698
puf["foreign_tax_credit"] = puf.E07300
@@ -835,7 +835,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
835835
"taxable_ira_distributions",
836836
"tax_exempt_interest_income",
837837
"tax_exempt_pension_income",
838-
"traditional_ira_contributions",
838+
"traditional_ira_contributions_desired",
839839
"unrecaptured_section_1250_gain",
840840
"foreign_tax_credit",
841841
"amt_foreign_tax_credit",

policyengine_us_data/db/etl_national_targets.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -659,15 +659,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
659659
"year": 2024,
660660
},
661661
{
662-
"variable": "self_employed_pension_contribution_ald",
662+
"variable": "self_employed_pension_contributions",
663663
"value": RETIREMENT_CONTRIBUTION_TARGETS[
664-
"self_employed_pension_contribution_ald"
664+
"self_employed_pension_contributions"
665665
]["value"],
666666
"source": RETIREMENT_CONTRIBUTION_TARGETS[
667-
"self_employed_pension_contribution_ald"
667+
"self_employed_pension_contributions"
668668
]["source"],
669669
"notes": RETIREMENT_CONTRIBUTION_TARGETS[
670-
"self_employed_pension_contribution_ald"
670+
"self_employed_pension_contributions"
671671
]["notes"],
672672
"year": 2024,
673673
},

policyengine_us_data/storage/calibration_targets/soi_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
),
1515
"source_year": 2023,
1616
},
17-
"self_employed_pension_contribution_ald": {
17+
"self_employed_pension_contributions": {
1818
"value": 30.130848e9,
1919
"source": "https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income",
2020
"notes": (

policyengine_us_data/utils/loss.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@
137137
#
138138
# traditional_ira_contributions: IRS SOI Publication 1304, Table 1.4
139139
# (TY 2023), "IRA payments" deduction — $13.77B (col DU, row
140-
# "All returns, total"). This is the actual above-the-line
141-
# deduction claimed on returns. The variable flows directly into
142-
# the ALD with no deductibility logic in policyengine-us, so the
140+
# "All returns, total"). This is the above-the-line deduction
141+
# claimed on returns. The variable flows directly into the ALD
142+
# with no deductibility logic in policyengine-us, so the
143143
# target must match the deduction, not total contributions.
144144
# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
145145
"traditional_ira_contributions": RETIREMENT_CONTRIBUTION_TARGETS[
@@ -159,15 +159,15 @@
159159
# https://corporate.vanguard.com/content/dam/corp/research/pdf/how_america_saves_report_2024.pdf
160160
"traditional_401k_contributions": 482.7e9,
161161
"roth_401k_contributions": 85.2e9,
162-
# self_employed_pension_contribution_ald: IRS SOI Publication
162+
# self_employed_pension_contributions: IRS SOI Publication
163163
# 1304, Table 1.4 (TY 2023), "Payments to a Keogh plan" —
164164
# $30.13B (col DM, row "All returns, total"). Includes
165165
# SEP-IRAs, SIMPLE-IRAs, and traditional Keogh/HR-10 plans.
166-
# Targeting the ALD (not the input) because policyengine-us
167-
# applies a min(contributions, SE_income) cap.
166+
# Targeting the contribution output because policyengine-us applies
167+
# statutory limits before the ALD formula.
168168
# https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
169-
"self_employed_pension_contribution_ald": RETIREMENT_CONTRIBUTION_TARGETS[
170-
"self_employed_pension_contribution_ald"
169+
"self_employed_pension_contributions": RETIREMENT_CONTRIBUTION_TARGETS[
170+
"self_employed_pension_contributions"
171171
]["value"],
172172
# roth_ira_contributions: IRS SOI IRA Accumulation Tables 5 & 6
173173
# (TY 2022, latest published). Total Roth IRA contributions =

0 commit comments

Comments
 (0)