Skip to content

Commit c1f08e4

Browse files
MaxGhenisclaude
andcommitted
Write 401(k) deferrals to the new *_desired inputs
PolicyEngine-US PR #8391 makes traditional_401k_contributions and roth_401k_contributions formula variables that proportionally cap the combined elective deferrals at the IRC §402(g) limit. The formulas read the new pre-cap inputs traditional_401k_contributions_desired and roth_401k_contributions_desired. This change updates the CPS imputation and PUF clone pipelines to write the desired inputs instead of the canonical names, so the §402(g) cap takes effect in microsimulation. Calibration targets continue to point at the canonical names (post-cap formula output), so the loss matrix and database targets are unchanged. IRA contribution variables are not renamed yet — those will move when PolicyEngine/policyengine-us#8388 lands. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 104ff16 commit c1f08e4

7 files changed

Lines changed: 47 additions & 34 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Write 401(k) elective deferrals to `traditional_401k_contributions_desired` and `roth_401k_contributions_desired` so PolicyEngine-US can apply the §402(g) cap to the canonical variable.

policyengine_us_data/calibration/puf_impute.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,8 @@
160160
]
161161

162162
CPS_RETIREMENT_VARIABLES = [
163-
"traditional_401k_contributions",
164-
"roth_401k_contributions",
163+
"traditional_401k_contributions_desired",
164+
"roth_401k_contributions_desired",
165165
"traditional_ira_contributions",
166166
"roth_ira_contributions",
167167
"self_employed_pension_contributions",
@@ -886,8 +886,8 @@ def _impute_retirement_contributions(
886886
logger.info(
887887
"Imputed retirement contributions for PUF: "
888888
"401k mean=$%.0f, IRA mean=$%.0f, SE pension mean=$%.0f",
889-
result["traditional_401k_contributions"].mean()
890-
+ result["roth_401k_contributions"].mean(),
889+
result["traditional_401k_contributions_desired"].mean()
890+
+ result["roth_401k_contributions_desired"].mean(),
891891
result["traditional_ira_contributions"].mean()
892892
+ result["roth_ira_contributions"].mean(),
893893
result["self_employed_pension_contributions"].mean(),

policyengine_us_data/datasets/cps/cps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,8 +1410,8 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
14101410
# DC pool: split into traditional/Roth 401(k), cap at combined
14111411
# 401(k) limit.
14121412
dc_capped = np.minimum(dc_pool, limit_401k)
1413-
cps["traditional_401k_contributions"] = dc_capped * (1 - roth_dc_share)
1414-
cps["roth_401k_contributions"] = dc_capped * roth_dc_share
1413+
cps["traditional_401k_contributions_desired"] = dc_capped * (1 - roth_dc_share)
1414+
cps["roth_401k_contributions_desired"] = dc_capped * roth_dc_share
14151415

14161416
# IRA pool: split into traditional/Roth IRA, cap at combined
14171417
# IRA limit.

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ def _supports_structural_mortgage_inputs() -> bool:
159159
"taxable_sep_distributions",
160160
"tax_exempt_sep_distributions",
161161
# Retirement contributions
162-
"traditional_401k_contributions",
163-
"roth_401k_contributions",
162+
"traditional_401k_contributions_desired",
163+
"roth_401k_contributions_desired",
164164
"traditional_ira_contributions",
165165
"roth_ira_contributions",
166166
"self_employed_pension_contributions",
@@ -587,8 +587,8 @@ def apply_retirement_constraints(predictions, X_test, time_period):
587587

588588
# Explicit mapping: variable -> (cap array, zero_mask or None).
589589
_CONSTRAINT_MAP = {
590-
"traditional_401k_contributions": (limit_401k, emp_income == 0),
591-
"roth_401k_contributions": (limit_401k, emp_income == 0),
590+
"traditional_401k_contributions_desired": (limit_401k, emp_income == 0),
591+
"roth_401k_contributions_desired": (limit_401k, emp_income == 0),
592592
"traditional_ira_contributions": (limit_ira, None),
593593
"roth_ira_contributions": (limit_ira, None),
594594
"self_employed_pension_contributions": (
@@ -641,8 +641,8 @@ def reconcile_ss_subcomponents(predictions, total_ss):
641641

642642

643643
_RETIREMENT_VARS = {
644-
"traditional_401k_contributions",
645-
"roth_401k_contributions",
644+
"traditional_401k_contributions_desired",
645+
"roth_401k_contributions_desired",
646646
"traditional_ira_contributions",
647647
"roth_ira_contributions",
648648
"self_employed_pension_contributions",

tests/unit/calibration/test_calibration_puf_impute.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,8 @@ def calculate_dataframe(self, columns):
406406
"qualified_dividend_income": [0.0, 0.0],
407407
"taxable_pension_income": [0.0, 0.0],
408408
"social_security": [0.0, 0.0],
409-
"traditional_401k_contributions": [0.0, 0.0],
410-
"roth_401k_contributions": [0.0, 0.0],
409+
"traditional_401k_contributions_desired": [0.0, 0.0],
410+
"roth_401k_contributions_desired": [0.0, 0.0],
411411
"traditional_ira_contributions": [0.0, 0.0],
412412
"roth_ira_contributions": [0.0, 0.0],
413413
"self_employed_pension_contributions": [0.0, 0.0],
@@ -446,8 +446,8 @@ def fit_predict(
446446
)
447447
return pd.DataFrame(
448448
{
449-
"traditional_401k_contributions": [0.0, 0.0],
450-
"roth_401k_contributions": [0.0, 0.0],
449+
"traditional_401k_contributions_desired": [0.0, 0.0],
450+
"roth_401k_contributions_desired": [0.0, 0.0],
451451
"traditional_ira_contributions": [0.0, 0.0],
452452
"roth_ira_contributions": [0.0, 0.0],
453453
"self_employed_pension_contributions": [50_000.0, 50_000.0],

tests/unit/calibration/test_retirement_imputation.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ def _make_cps_df(n, rng):
8989
"taxable_pension_income": rng.uniform(0, 20_000, n),
9090
"social_security": rng.uniform(0, 15_000, n),
9191
# Targets
92-
"traditional_401k_contributions": rng.uniform(0, 5000, n),
93-
"roth_401k_contributions": rng.uniform(0, 3000, n),
92+
"traditional_401k_contributions_desired": rng.uniform(0, 5000, n),
93+
"roth_401k_contributions_desired": rng.uniform(0, 3000, n),
9494
"traditional_ira_contributions": rng.uniform(0, 2000, n),
9595
"roth_ira_contributions": rng.uniform(0, 2000, n),
9696
"self_employed_pension_contributions": rng.uniform(0, 10_000, n),
@@ -142,8 +142,8 @@ def test_five_retirement_variables(self):
142142

143143
def test_retirement_variable_names(self):
144144
expected = {
145-
"traditional_401k_contributions",
146-
"roth_401k_contributions",
145+
"traditional_401k_contributions_desired",
146+
"roth_401k_contributions_desired",
147147
"traditional_ira_contributions",
148148
"roth_ira_contributions",
149149
"self_employed_pension_contributions",
@@ -321,8 +321,8 @@ def test_401k_capped(self):
321321
max_401k = lim["401k"] + lim["401k_catch_up"]
322322

323323
for var in (
324-
"traditional_401k_contributions",
325-
"roth_401k_contributions",
324+
"traditional_401k_contributions_desired",
325+
"roth_401k_contributions_desired",
326326
):
327327
assert np.all(result[var] <= max_401k), f"{var} exceeds 401k limit"
328328

@@ -343,8 +343,8 @@ def test_401k_zero_when_no_wages(self):
343343
assert zero_wage.sum() == 10
344344

345345
for var in (
346-
"traditional_401k_contributions",
347-
"roth_401k_contributions",
346+
"traditional_401k_contributions_desired",
347+
"roth_401k_contributions_desired",
348348
):
349349
assert np.all(result[var][zero_wage] == 0), (
350350
f"{var} should be 0 when employment_income is 0"
@@ -369,8 +369,8 @@ def test_catch_up_age_threshold(self):
369369

370370
result = self._call_with_mocks(self._uniform_preds(val))
371371

372-
young_401k = result["traditional_401k_contributions"][:25]
373-
old_401k = result["traditional_401k_contributions"][25:]
372+
young_401k = result["traditional_401k_contributions_desired"][:25]
373+
old_401k = result["traditional_401k_contributions_desired"][25:]
374374

375375
# Young capped at base limit
376376
assert np.all(young_401k == lim["401k"])
@@ -397,8 +397,8 @@ def test_401k_nonzero_for_positive_wages(self):
397397
result = self._call_with_mocks(self._uniform_preds(5_000.0))
398398
pos_wage = self.puf_imputations["employment_income"] > 0
399399
for var in (
400-
"traditional_401k_contributions",
401-
"roth_401k_contributions",
400+
"traditional_401k_contributions_desired",
401+
"roth_401k_contributions_desired",
402402
):
403403
assert np.all(result[var][pos_wage] > 0)
404404

tests/unit/test_extended_cps.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ def test_cps_only_vars_mostly_exist_in_tbs(self):
143143
def test_retirement_contributions_in_cps_only(self):
144144
"""All 5 retirement contribution vars should be in CPS_ONLY."""
145145
expected = {
146-
"traditional_401k_contributions",
147-
"roth_401k_contributions",
146+
"traditional_401k_contributions_desired",
147+
"roth_401k_contributions_desired",
148148
"traditional_ira_contributions",
149149
"roth_ira_contributions",
150150
"self_employed_pension_contributions",
@@ -912,8 +912,14 @@ class TestRetirementConstraints:
912912
def sample_predictions(self):
913913
return pd.DataFrame(
914914
{
915-
"traditional_401k_contributions": [25000, -500, 5000, 10000, 3000],
916-
"roth_401k_contributions": [30000, 2000, 0, 50000, 1000],
915+
"traditional_401k_contributions_desired": [
916+
25000,
917+
-500,
918+
5000,
919+
10000,
920+
3000,
921+
],
922+
"roth_401k_contributions_desired": [30000, 2000, 0, 50000, 1000],
917923
"traditional_ira_contributions": [8000, -100, 3000, 15000, 500],
918924
"roth_ira_contributions": [10000, 1000, 0, 20000, 200],
919925
"self_employed_pension_contributions": [80000, -200, 5000, 0, 100000],
@@ -943,7 +949,10 @@ def test_401k_capped_at_limit(self, sample_predictions, sample_features):
943949
age = sample_features["age"].values
944950
catch_up = age >= 50
945951
cap = limits["401k"] + catch_up * limits["401k_catch_up"]
946-
for var in ["traditional_401k_contributions", "roth_401k_contributions"]:
952+
for var in [
953+
"traditional_401k_contributions_desired",
954+
"roth_401k_contributions_desired",
955+
]:
947956
assert (result[var].values <= cap).all(), f"{var} exceeds 401k cap"
948957

949958
def test_ira_capped_at_limit(self, sample_predictions, sample_features):
@@ -962,7 +971,10 @@ def test_401k_zeroed_without_employment_income(
962971
):
963972
result = apply_retirement_constraints(sample_predictions, sample_features, 2024)
964973
no_emp = sample_features["employment_income"] == 0
965-
for var in ["traditional_401k_contributions", "roth_401k_contributions"]:
974+
for var in [
975+
"traditional_401k_contributions_desired",
976+
"roth_401k_contributions_desired",
977+
]:
966978
assert (result[var].values[no_emp] == 0).all(), (
967979
f"{var} should be zero without employment income"
968980
)

0 commit comments

Comments
 (0)