Skip to content

Commit 35817a5

Browse files
committed
Add structural mortgage interest data support
1 parent d0d35c9 commit 35817a5

5 files changed

Lines changed: 1027 additions & 4 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Convert imputed deductible mortgage interest into structural mortgage balance, interest, and origination-year inputs when the installed `policyengine-us` supports federal MID cap modeling, while preserving total current-law interest deductions via residual investment interest inputs.

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,14 @@
66
import pandas as pd
77
from policyengine_core.data import Dataset
88

9-
from policyengine_us_data.datasets.cps.cps import * # noqa: F403
10-
from policyengine_us_data.datasets.puf import * # noqa: F403
9+
from policyengine_us_data.datasets.cps.cps import CPS, CPS_2024, CPS_2024_Full
10+
from policyengine_us_data.datasets.puf import PUF, PUF_2024
1111
from policyengine_us_data.storage import STORAGE_FOLDER
12+
from policyengine_us_data.utils.mortgage_interest import (
13+
convert_mortgage_interest_to_structural_inputs,
14+
impute_tax_unit_mortgage_balance_hints,
15+
supports_structural_mortgage_inputs,
16+
)
1217
from policyengine_us_data.utils.retirement_limits import (
1318
get_retirement_limits,
1419
get_se_pension_limits,
@@ -445,6 +450,14 @@ def generate(self):
445450
)
446451

447452
new_data = self._rename_imputed_to_inputs(new_data)
453+
new_data = impute_tax_unit_mortgage_balance_hints(
454+
new_data,
455+
self.time_period,
456+
)
457+
new_data = convert_mortgage_interest_to_structural_inputs(
458+
new_data,
459+
self.time_period,
460+
)
448461
new_data = self._drop_formula_variables(new_data)
449462
self.save_dataset(new_data)
450463

@@ -472,11 +485,17 @@ def _rename_imputed_to_inputs(cls, data):
472485
# due to entity shape mismatch.
473486
_KEEP_FORMULA_VARS = {
474487
"person_id",
475-
"interest_deduction",
476488
"self_employed_pension_contribution_ald",
477489
"self_employed_health_insurance_ald",
478490
}
479491

492+
@classmethod
493+
def _keep_formula_vars(cls):
494+
keep = set(cls._KEEP_FORMULA_VARS)
495+
if not supports_structural_mortgage_inputs():
496+
keep.add("interest_deduction")
497+
return keep
498+
480499
# QRF imputes formula-level variables (e.g. taxable_pension_income)
481500
# but we must store them under leaf input names so
482501
# _drop_formula_variables doesn't discard them. The engine then
@@ -526,7 +545,7 @@ def _drop_formula_variables(cls, data):
526545
if (hasattr(var, "formulas") and len(var.formulas) > 0)
527546
or getattr(var, "adds", None)
528547
or getattr(var, "subtracts", None)
529-
} - cls._KEEP_FORMULA_VARS
548+
} - cls._keep_formula_vars()
530549
dropped = sorted(set(data.keys()) & formula_vars)
531550
if dropped:
532551
logger.info(

policyengine_us_data/datasets/puf/puf.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
from policyengine_us_data.datasets.puf.disaggregate_puf import (
1313
disaggregate_aggregate_records,
1414
)
15+
from policyengine_us_data.utils.mortgage_interest import (
16+
convert_mortgage_interest_to_structural_inputs,
17+
)
1518
from policyengine_us_data.utils.uprating import (
1619
create_policyengine_uprating_factors_table,
1720
)
@@ -643,6 +646,14 @@ def generate(self):
643646
self.holder[key] = np.array(self.holder[key]).astype(float)
644647
assert not np.isnan(self.holder[key]).any(), f"{key} has NaNs."
645648

649+
holder_tp = {variable: {self.time_period: values} for variable, values in self.holder.items()}
650+
holder_tp = convert_mortgage_interest_to_structural_inputs(
651+
holder_tp,
652+
self.time_period,
653+
)
654+
self.holder = {
655+
variable: values[self.time_period] for variable, values in holder_tp.items()
656+
}
646657
self.save_dataset(self.holder)
647658

648659
def add_tax_unit(self, row, tax_unit_id):
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pytest
4+
5+
from policyengine_us_data.utils.mortgage_interest import (
6+
convert_mortgage_interest_to_structural_inputs,
7+
impute_tax_unit_mortgage_balance_hints,
8+
supports_structural_mortgage_inputs,
9+
)
10+
11+
12+
def _base_dataset_dict(deductible_mortgage_interest, interest_deduction):
13+
time_period = 2024
14+
return {
15+
"person_id": {time_period: np.array([1, 2])},
16+
"tax_unit_id": {time_period: np.array([1])},
17+
"marital_unit_id": {time_period: np.array([1])},
18+
"spm_unit_id": {time_period: np.array([1])},
19+
"family_id": {time_period: np.array([1])},
20+
"household_id": {time_period: np.array([1])},
21+
"person_tax_unit_id": {time_period: np.array([1, 1])},
22+
"person_marital_unit_id": {time_period: np.array([1, 1])},
23+
"person_spm_unit_id": {time_period: np.array([1, 1])},
24+
"person_family_id": {time_period: np.array([1, 1])},
25+
"person_household_id": {time_period: np.array([1, 1])},
26+
"is_tax_unit_head": {time_period: np.array([True, False])},
27+
"is_tax_unit_spouse": {time_period: np.array([False, True])},
28+
"age": {time_period: np.array([55, 53])},
29+
"filing_status": {time_period: np.array([b"JOINT"])},
30+
"deductible_mortgage_interest": {
31+
time_period: np.array(deductible_mortgage_interest, dtype=np.float32)
32+
},
33+
"interest_deduction": {
34+
time_period: np.array(interest_deduction, dtype=np.float32)
35+
},
36+
}
37+
38+
39+
def _current_law_cap(filing_status: bytes, origination_year: int) -> float:
40+
is_separate = b"SEPARATE" in filing_status
41+
if origination_year <= 2017:
42+
return 500_000.0 if is_separate else 1_000_000.0
43+
return 375_000.0 if is_separate else 750_000.0
44+
45+
46+
@pytest.mark.skipif(
47+
not supports_structural_mortgage_inputs(),
48+
reason="Installed policyengine-us does not yet expose structural MID inputs.",
49+
)
50+
def test_structural_mortgage_conversion_preserves_current_law_interest_deduction():
51+
data = _base_dataset_dict(
52+
deductible_mortgage_interest=[6_000.0, 0.0],
53+
interest_deduction=[7_000.0],
54+
)
55+
converted = convert_mortgage_interest_to_structural_inputs(data, 2024)
56+
57+
assert "deductible_mortgage_interest" not in converted
58+
assert "interest_deduction" not in converted
59+
assert converted["first_home_mortgage_balance"][2024][0] > 0
60+
assert converted["first_home_mortgage_interest"][2024][0] >= 6_000
61+
assert converted["first_home_mortgage_origination_year"][2024][0] > 0
62+
assert converted["investment_interest_expense"][2024].sum() == pytest.approx(
63+
1_000.0
64+
)
65+
cap = _current_law_cap(
66+
converted["filing_status"][2024][0],
67+
int(converted["first_home_mortgage_origination_year"][2024][0]),
68+
)
69+
balance = converted["first_home_mortgage_balance"][2024][0]
70+
total_interest = converted["first_home_mortgage_interest"][2024][0]
71+
deductible_share = min(1.0, cap / balance) if balance > 0 else 0.0
72+
73+
assert total_interest * deductible_share == pytest.approx(6_000.0)
74+
assert converted["home_mortgage_interest"][2024].sum() == pytest.approx(
75+
total_interest
76+
)
77+
assert (
78+
total_interest * deductible_share
79+
+ converted["investment_interest_expense"][2024].sum()
80+
) == pytest.approx(7_000.0)
81+
82+
83+
@pytest.mark.skipif(
84+
not supports_structural_mortgage_inputs(),
85+
reason="Installed policyengine-us does not yet expose structural MID inputs.",
86+
)
87+
def test_structural_mortgage_conversion_preserves_non_mortgage_interest():
88+
data = _base_dataset_dict(
89+
deductible_mortgage_interest=[0.0, 0.0],
90+
interest_deduction=[2_500.0],
91+
)
92+
converted = convert_mortgage_interest_to_structural_inputs(data, 2024)
93+
94+
assert converted["first_home_mortgage_balance"][2024][0] == 0
95+
assert converted["first_home_mortgage_interest"][2024][0] == 0
96+
assert converted["home_mortgage_interest"][2024].sum() == 0
97+
assert converted["investment_interest_expense"][2024].sum() == pytest.approx(
98+
2_500.0
99+
)
100+
101+
102+
@pytest.mark.skipif(
103+
not supports_structural_mortgage_inputs(),
104+
reason="Installed policyengine-us does not yet expose structural MID inputs.",
105+
)
106+
def test_structural_mortgage_conversion_keeps_balance_hints_for_non_itemizers():
107+
data = _base_dataset_dict(
108+
deductible_mortgage_interest=[0.0, 0.0],
109+
interest_deduction=[0.0],
110+
)
111+
data["imputed_first_home_mortgage_balance_hint"] = {
112+
2024: np.array([250_000.0], dtype=np.float32)
113+
}
114+
data["imputed_second_home_mortgage_balance_hint"] = {
115+
2024: np.array([25_000.0], dtype=np.float32)
116+
}
117+
118+
converted = convert_mortgage_interest_to_structural_inputs(data, 2024)
119+
120+
assert converted["first_home_mortgage_balance"][2024][0] == pytest.approx(
121+
250_000.0
122+
)
123+
assert converted["second_home_mortgage_balance"][2024][0] == pytest.approx(
124+
25_000.0
125+
)
126+
assert converted["first_home_mortgage_interest"][2024][0] == 0
127+
assert converted["second_home_mortgage_interest"][2024][0] == 0
128+
assert converted["first_home_mortgage_origination_year"][2024][0] > 0
129+
assert converted["second_home_mortgage_origination_year"][2024][0] >= 2018
130+
assert converted["home_mortgage_interest"][2024].sum() == 0
131+
assert converted["investment_interest_expense"][2024].sum() == 0
132+
133+
134+
@pytest.mark.skipif(
135+
not supports_structural_mortgage_inputs(),
136+
reason="Installed policyengine-us does not yet expose structural MID inputs.",
137+
)
138+
def test_scf_balance_hint_imputation_zeroes_non_mortgaged_owner(monkeypatch):
139+
import microimpute.models.qrf as qrf_module
140+
import policyengine_us_data.datasets.scf.scf as scf_module
141+
142+
class DummyQRF:
143+
def fit(self, *args, **kwargs):
144+
return self
145+
146+
def predict(self, X_test):
147+
return pd.DataFrame(
148+
{
149+
"imputed_first_home_mortgage_balance_hint": X_test[
150+
"mortgage_owner_status"
151+
]
152+
* 100_000,
153+
"imputed_second_home_mortgage_balance_hint": X_test[
154+
"mortgage_owner_status"
155+
]
156+
* 10_000,
157+
}
158+
)
159+
160+
monkeypatch.setattr(qrf_module, "QRF", DummyQRF)
161+
monkeypatch.setattr(
162+
scf_module.SCF_2022,
163+
"load_dataset",
164+
lambda self: {
165+
"age": np.array([45, 55]),
166+
"is_female": np.array([0, 1]),
167+
"cps_race": np.array([1, 2]),
168+
"is_married": np.array([1, 0]),
169+
"own_children_in_household": np.array([1, 0]),
170+
"employment_income": np.array([80_000, 40_000]),
171+
"interest_dividend_income": np.array([2_000, 1_000]),
172+
"social_security_pension_income": np.array([0, 5_000]),
173+
"nh_mort": np.array([250_000, 0]),
174+
"heloc": np.array([25_000, 0]),
175+
"houses": np.array([500_000, 350_000]),
176+
"wgt": np.array([1, 1]),
177+
},
178+
)
179+
180+
data = {
181+
"person_id": {2024: np.array([1, 2])},
182+
"tax_unit_id": {2024: np.array([1, 2])},
183+
"marital_unit_id": {2024: np.array([1, 2])},
184+
"spm_unit_id": {2024: np.array([1, 2])},
185+
"family_id": {2024: np.array([1, 2])},
186+
"household_id": {2024: np.array([1, 2])},
187+
"person_tax_unit_id": {2024: np.array([1, 2])},
188+
"person_marital_unit_id": {2024: np.array([1, 2])},
189+
"person_spm_unit_id": {2024: np.array([1, 2])},
190+
"person_family_id": {2024: np.array([1, 2])},
191+
"person_household_id": {2024: np.array([1, 2])},
192+
"is_tax_unit_head": {2024: np.array([True, True])},
193+
"is_tax_unit_spouse": {2024: np.array([False, False])},
194+
"age": {2024: np.array([45, 55])},
195+
"is_male": {2024: np.array([1, 0])},
196+
"cps_race": {2024: np.array([1, 2])},
197+
"employment_income": {2024: np.array([80_000, 40_000])},
198+
"taxable_interest_income": {2024: np.array([1_000, 500])},
199+
"tax_exempt_interest_income": {2024: np.array([0, 0])},
200+
"qualified_dividend_income": {2024: np.array([500, 250])},
201+
"non_qualified_dividend_income": {2024: np.array([0, 0])},
202+
"social_security_retirement": {2024: np.array([0, 5_000])},
203+
"taxable_private_pension_income": {2024: np.array([0, 0])},
204+
"tax_exempt_private_pension_income": {2024: np.array([0, 0])},
205+
"tenure_type": {
206+
2024: np.array([b"OWNED_WITH_MORTGAGE", b"OWNED_WITH_MORTGAGE"])
207+
},
208+
"spm_unit_tenure_type": {
209+
2024: np.array([b"OWNER_WITH_MORTGAGE", b"OWNER_WITHOUT_MORTGAGE"])
210+
},
211+
}
212+
213+
imputed = impute_tax_unit_mortgage_balance_hints(data, 2024)
214+
215+
assert imputed["imputed_first_home_mortgage_balance_hint"][2024].tolist() == [
216+
200_000.0,
217+
0.0,
218+
]
219+
assert imputed["imputed_second_home_mortgage_balance_hint"][2024].tolist() == [
220+
20_000.0,
221+
0.0,
222+
]

0 commit comments

Comments
 (0)