|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import pytest |
| 4 | + |
| 5 | +from policyengine_us_data.utils.mortgage_interest import ( |
| 6 | + convert_mortgage_interest_to_structural_inputs, |
| 7 | + impute_tax_unit_mortgage_balance_hints, |
| 8 | + supports_structural_mortgage_inputs, |
| 9 | +) |
| 10 | + |
| 11 | + |
| 12 | +def _base_dataset_dict(deductible_mortgage_interest, interest_deduction): |
| 13 | + time_period = 2024 |
| 14 | + return { |
| 15 | + "person_id": {time_period: np.array([1, 2])}, |
| 16 | + "tax_unit_id": {time_period: np.array([1])}, |
| 17 | + "marital_unit_id": {time_period: np.array([1])}, |
| 18 | + "spm_unit_id": {time_period: np.array([1])}, |
| 19 | + "family_id": {time_period: np.array([1])}, |
| 20 | + "household_id": {time_period: np.array([1])}, |
| 21 | + "person_tax_unit_id": {time_period: np.array([1, 1])}, |
| 22 | + "person_marital_unit_id": {time_period: np.array([1, 1])}, |
| 23 | + "person_spm_unit_id": {time_period: np.array([1, 1])}, |
| 24 | + "person_family_id": {time_period: np.array([1, 1])}, |
| 25 | + "person_household_id": {time_period: np.array([1, 1])}, |
| 26 | + "is_tax_unit_head": {time_period: np.array([True, False])}, |
| 27 | + "is_tax_unit_spouse": {time_period: np.array([False, True])}, |
| 28 | + "age": {time_period: np.array([55, 53])}, |
| 29 | + "filing_status": {time_period: np.array([b"JOINT"])}, |
| 30 | + "deductible_mortgage_interest": { |
| 31 | + time_period: np.array(deductible_mortgage_interest, dtype=np.float32) |
| 32 | + }, |
| 33 | + "interest_deduction": { |
| 34 | + time_period: np.array(interest_deduction, dtype=np.float32) |
| 35 | + }, |
| 36 | + } |
| 37 | + |
| 38 | + |
| 39 | +def _current_law_cap(filing_status: bytes, origination_year: int) -> float: |
| 40 | + is_separate = b"SEPARATE" in filing_status |
| 41 | + if origination_year <= 2017: |
| 42 | + return 500_000.0 if is_separate else 1_000_000.0 |
| 43 | + return 375_000.0 if is_separate else 750_000.0 |
| 44 | + |
| 45 | + |
| 46 | +@pytest.mark.skipif( |
| 47 | + not supports_structural_mortgage_inputs(), |
| 48 | + reason="Installed policyengine-us does not yet expose structural MID inputs.", |
| 49 | +) |
| 50 | +def test_structural_mortgage_conversion_preserves_current_law_interest_deduction(): |
| 51 | + data = _base_dataset_dict( |
| 52 | + deductible_mortgage_interest=[6_000.0, 0.0], |
| 53 | + interest_deduction=[7_000.0], |
| 54 | + ) |
| 55 | + converted = convert_mortgage_interest_to_structural_inputs(data, 2024) |
| 56 | + |
| 57 | + assert "deductible_mortgage_interest" not in converted |
| 58 | + assert "interest_deduction" not in converted |
| 59 | + assert converted["first_home_mortgage_balance"][2024][0] > 0 |
| 60 | + assert converted["first_home_mortgage_interest"][2024][0] >= 6_000 |
| 61 | + assert converted["first_home_mortgage_origination_year"][2024][0] > 0 |
| 62 | + assert converted["investment_interest_expense"][2024].sum() == pytest.approx( |
| 63 | + 1_000.0 |
| 64 | + ) |
| 65 | + cap = _current_law_cap( |
| 66 | + converted["filing_status"][2024][0], |
| 67 | + int(converted["first_home_mortgage_origination_year"][2024][0]), |
| 68 | + ) |
| 69 | + balance = converted["first_home_mortgage_balance"][2024][0] |
| 70 | + total_interest = converted["first_home_mortgage_interest"][2024][0] |
| 71 | + deductible_share = min(1.0, cap / balance) if balance > 0 else 0.0 |
| 72 | + |
| 73 | + assert total_interest * deductible_share == pytest.approx(6_000.0) |
| 74 | + assert converted["home_mortgage_interest"][2024].sum() == pytest.approx( |
| 75 | + total_interest |
| 76 | + ) |
| 77 | + assert ( |
| 78 | + total_interest * deductible_share |
| 79 | + + converted["investment_interest_expense"][2024].sum() |
| 80 | + ) == pytest.approx(7_000.0) |
| 81 | + |
| 82 | + |
| 83 | +@pytest.mark.skipif( |
| 84 | + not supports_structural_mortgage_inputs(), |
| 85 | + reason="Installed policyengine-us does not yet expose structural MID inputs.", |
| 86 | +) |
| 87 | +def test_structural_mortgage_conversion_preserves_non_mortgage_interest(): |
| 88 | + data = _base_dataset_dict( |
| 89 | + deductible_mortgage_interest=[0.0, 0.0], |
| 90 | + interest_deduction=[2_500.0], |
| 91 | + ) |
| 92 | + converted = convert_mortgage_interest_to_structural_inputs(data, 2024) |
| 93 | + |
| 94 | + assert converted["first_home_mortgage_balance"][2024][0] == 0 |
| 95 | + assert converted["first_home_mortgage_interest"][2024][0] == 0 |
| 96 | + assert converted["home_mortgage_interest"][2024].sum() == 0 |
| 97 | + assert converted["investment_interest_expense"][2024].sum() == pytest.approx( |
| 98 | + 2_500.0 |
| 99 | + ) |
| 100 | + |
| 101 | + |
| 102 | +@pytest.mark.skipif( |
| 103 | + not supports_structural_mortgage_inputs(), |
| 104 | + reason="Installed policyengine-us does not yet expose structural MID inputs.", |
| 105 | +) |
| 106 | +def test_structural_mortgage_conversion_keeps_balance_hints_for_non_itemizers(): |
| 107 | + data = _base_dataset_dict( |
| 108 | + deductible_mortgage_interest=[0.0, 0.0], |
| 109 | + interest_deduction=[0.0], |
| 110 | + ) |
| 111 | + data["imputed_first_home_mortgage_balance_hint"] = { |
| 112 | + 2024: np.array([250_000.0], dtype=np.float32) |
| 113 | + } |
| 114 | + data["imputed_second_home_mortgage_balance_hint"] = { |
| 115 | + 2024: np.array([25_000.0], dtype=np.float32) |
| 116 | + } |
| 117 | + |
| 118 | + converted = convert_mortgage_interest_to_structural_inputs(data, 2024) |
| 119 | + |
| 120 | + assert converted["first_home_mortgage_balance"][2024][0] == pytest.approx( |
| 121 | + 250_000.0 |
| 122 | + ) |
| 123 | + assert converted["second_home_mortgage_balance"][2024][0] == pytest.approx( |
| 124 | + 25_000.0 |
| 125 | + ) |
| 126 | + assert converted["first_home_mortgage_interest"][2024][0] == 0 |
| 127 | + assert converted["second_home_mortgage_interest"][2024][0] == 0 |
| 128 | + assert converted["first_home_mortgage_origination_year"][2024][0] > 0 |
| 129 | + assert converted["second_home_mortgage_origination_year"][2024][0] >= 2018 |
| 130 | + assert converted["home_mortgage_interest"][2024].sum() == 0 |
| 131 | + assert converted["investment_interest_expense"][2024].sum() == 0 |
| 132 | + |
| 133 | + |
| 134 | +@pytest.mark.skipif( |
| 135 | + not supports_structural_mortgage_inputs(), |
| 136 | + reason="Installed policyengine-us does not yet expose structural MID inputs.", |
| 137 | +) |
| 138 | +def test_scf_balance_hint_imputation_zeroes_non_mortgaged_owner(monkeypatch): |
| 139 | + import microimpute.models.qrf as qrf_module |
| 140 | + import policyengine_us_data.datasets.scf.scf as scf_module |
| 141 | + |
| 142 | + class DummyQRF: |
| 143 | + def fit(self, *args, **kwargs): |
| 144 | + return self |
| 145 | + |
| 146 | + def predict(self, X_test): |
| 147 | + return pd.DataFrame( |
| 148 | + { |
| 149 | + "imputed_first_home_mortgage_balance_hint": X_test[ |
| 150 | + "mortgage_owner_status" |
| 151 | + ] |
| 152 | + * 100_000, |
| 153 | + "imputed_second_home_mortgage_balance_hint": X_test[ |
| 154 | + "mortgage_owner_status" |
| 155 | + ] |
| 156 | + * 10_000, |
| 157 | + } |
| 158 | + ) |
| 159 | + |
| 160 | + monkeypatch.setattr(qrf_module, "QRF", DummyQRF) |
| 161 | + monkeypatch.setattr( |
| 162 | + scf_module.SCF_2022, |
| 163 | + "load_dataset", |
| 164 | + lambda self: { |
| 165 | + "age": np.array([45, 55]), |
| 166 | + "is_female": np.array([0, 1]), |
| 167 | + "cps_race": np.array([1, 2]), |
| 168 | + "is_married": np.array([1, 0]), |
| 169 | + "own_children_in_household": np.array([1, 0]), |
| 170 | + "employment_income": np.array([80_000, 40_000]), |
| 171 | + "interest_dividend_income": np.array([2_000, 1_000]), |
| 172 | + "social_security_pension_income": np.array([0, 5_000]), |
| 173 | + "nh_mort": np.array([250_000, 0]), |
| 174 | + "heloc": np.array([25_000, 0]), |
| 175 | + "houses": np.array([500_000, 350_000]), |
| 176 | + "wgt": np.array([1, 1]), |
| 177 | + }, |
| 178 | + ) |
| 179 | + |
| 180 | + data = { |
| 181 | + "person_id": {2024: np.array([1, 2])}, |
| 182 | + "tax_unit_id": {2024: np.array([1, 2])}, |
| 183 | + "marital_unit_id": {2024: np.array([1, 2])}, |
| 184 | + "spm_unit_id": {2024: np.array([1, 2])}, |
| 185 | + "family_id": {2024: np.array([1, 2])}, |
| 186 | + "household_id": {2024: np.array([1, 2])}, |
| 187 | + "person_tax_unit_id": {2024: np.array([1, 2])}, |
| 188 | + "person_marital_unit_id": {2024: np.array([1, 2])}, |
| 189 | + "person_spm_unit_id": {2024: np.array([1, 2])}, |
| 190 | + "person_family_id": {2024: np.array([1, 2])}, |
| 191 | + "person_household_id": {2024: np.array([1, 2])}, |
| 192 | + "is_tax_unit_head": {2024: np.array([True, True])}, |
| 193 | + "is_tax_unit_spouse": {2024: np.array([False, False])}, |
| 194 | + "age": {2024: np.array([45, 55])}, |
| 195 | + "is_male": {2024: np.array([1, 0])}, |
| 196 | + "cps_race": {2024: np.array([1, 2])}, |
| 197 | + "employment_income": {2024: np.array([80_000, 40_000])}, |
| 198 | + "taxable_interest_income": {2024: np.array([1_000, 500])}, |
| 199 | + "tax_exempt_interest_income": {2024: np.array([0, 0])}, |
| 200 | + "qualified_dividend_income": {2024: np.array([500, 250])}, |
| 201 | + "non_qualified_dividend_income": {2024: np.array([0, 0])}, |
| 202 | + "social_security_retirement": {2024: np.array([0, 5_000])}, |
| 203 | + "taxable_private_pension_income": {2024: np.array([0, 0])}, |
| 204 | + "tax_exempt_private_pension_income": {2024: np.array([0, 0])}, |
| 205 | + "tenure_type": { |
| 206 | + 2024: np.array([b"OWNED_WITH_MORTGAGE", b"OWNED_WITH_MORTGAGE"]) |
| 207 | + }, |
| 208 | + "spm_unit_tenure_type": { |
| 209 | + 2024: np.array([b"OWNER_WITH_MORTGAGE", b"OWNER_WITHOUT_MORTGAGE"]) |
| 210 | + }, |
| 211 | + } |
| 212 | + |
| 213 | + imputed = impute_tax_unit_mortgage_balance_hints(data, 2024) |
| 214 | + |
| 215 | + assert imputed["imputed_first_home_mortgage_balance_hint"][2024].tolist() == [ |
| 216 | + 200_000.0, |
| 217 | + 0.0, |
| 218 | + ] |
| 219 | + assert imputed["imputed_second_home_mortgage_balance_hint"][2024].tolist() == [ |
| 220 | + 20_000.0, |
| 221 | + 0.0, |
| 222 | + ] |
0 commit comments