Skip to content

Commit 14fb1f0

Browse files
authored
Merge pull request #633 from PolicyEngine/codex/mid-data-support
Add structural mortgage interest data support
2 parents 94abbf9 + 22ab8f1 commit 14fb1f0

9 files changed

Lines changed: 1264 additions & 5 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Convert imputed deductible mortgage interest into structural mortgage balance, interest, and origination-year inputs when the installed `policyengine-us` supports federal MID cap modeling, while preserving total current-law interest deductions via residual investment interest inputs.

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,27 @@
66
import pandas as pd
77
from policyengine_core.data import Dataset
88

9-
from policyengine_us_data.datasets.cps.cps import * # noqa: F403
10-
from policyengine_us_data.datasets.puf import * # noqa: F403
9+
from policyengine_us_data.datasets.cps.cps import CPS, CPS_2024, CPS_2024_Full
10+
from policyengine_us_data.datasets.puf import PUF, PUF_2024
1111
from policyengine_us_data.storage import STORAGE_FOLDER
12+
from policyengine_us_data.utils.mortgage_interest import (
13+
STRUCTURAL_MORTGAGE_VARIABLES,
14+
convert_mortgage_interest_to_structural_inputs,
15+
impute_tax_unit_mortgage_balance_hints,
16+
)
17+
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
1218
from policyengine_us_data.utils.retirement_limits import (
1319
get_retirement_limits,
1420
get_se_pension_limits,
1521
)
1622

1723
logger = logging.getLogger(__name__)
1824

25+
26+
def _supports_structural_mortgage_inputs() -> bool:
27+
return has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES)
28+
29+
1930
# CPS-only variables that should be QRF-imputed for the PUF clone half
2031
# instead of naively duplicated from the CPS donor. These are
2132
# income-correlated variables that exist only in the CPS; demographics,
@@ -445,6 +456,15 @@ def generate(self):
445456
)
446457

447458
new_data = self._rename_imputed_to_inputs(new_data)
459+
if _supports_structural_mortgage_inputs():
460+
new_data = impute_tax_unit_mortgage_balance_hints(
461+
new_data,
462+
self.time_period,
463+
)
464+
new_data = convert_mortgage_interest_to_structural_inputs(
465+
new_data,
466+
self.time_period,
467+
)
448468
new_data = self._drop_formula_variables(new_data)
449469
self.save_dataset(new_data)
450470

@@ -472,11 +492,17 @@ def _rename_imputed_to_inputs(cls, data):
472492
# due to entity shape mismatch.
473493
_KEEP_FORMULA_VARS = {
474494
"person_id",
475-
"interest_deduction",
476495
"self_employed_pension_contribution_ald",
477496
"self_employed_health_insurance_ald",
478497
}
479498

499+
@classmethod
500+
def _keep_formula_vars(cls):
501+
keep = set(cls._KEEP_FORMULA_VARS)
502+
if not _supports_structural_mortgage_inputs():
503+
keep.add("interest_deduction")
504+
return keep
505+
480506
# QRF imputes formula-level variables (e.g. taxable_pension_income)
481507
# but we must store them under leaf input names so
482508
# _drop_formula_variables doesn't discard them. The engine then
@@ -526,7 +552,7 @@ def _drop_formula_variables(cls, data):
526552
if (hasattr(var, "formulas") and len(var.formulas) > 0)
527553
or getattr(var, "adds", None)
528554
or getattr(var, "subtracts", None)
529-
} - cls._KEEP_FORMULA_VARS
555+
} - cls._keep_formula_vars()
530556
dropped = sorted(set(data.keys()) & formula_vars)
531557
if dropped:
532558
logger.info(

policyengine_us_data/datasets/puf/puf.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
from policyengine_us_data.datasets.puf.disaggregate_puf import (
1313
disaggregate_aggregate_records,
1414
)
15+
from policyengine_us_data.utils.mortgage_interest import (
16+
STRUCTURAL_MORTGAGE_VARIABLES,
17+
convert_mortgage_interest_to_structural_inputs,
18+
)
19+
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
1520
from policyengine_us_data.utils.uprating import (
1621
create_policyengine_uprating_factors_table,
1722
)
@@ -643,6 +648,18 @@ def generate(self):
643648
self.holder[key] = np.array(self.holder[key]).astype(float)
644649
assert not np.isnan(self.holder[key]).any(), f"{key} has NaNs."
645650

651+
holder_tp = {
652+
variable: {self.time_period: values}
653+
for variable, values in self.holder.items()
654+
}
655+
if has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES):
656+
holder_tp = convert_mortgage_interest_to_structural_inputs(
657+
holder_tp,
658+
self.time_period,
659+
)
660+
self.holder = {
661+
variable: values[self.time_period] for variable, values in holder_tp.items()
662+
}
646663
self.save_dataset(self.holder)
647664

648665
def add_tax_unit(self, row, tax_unit_id):

0 commit comments

Comments
 (0)