|
6 | 6 | import pandas as pd |
7 | 7 | from policyengine_core.data import Dataset |
8 | 8 |
|
9 | | -from policyengine_us_data.datasets.cps.cps import * # noqa: F403 |
10 | | -from policyengine_us_data.datasets.puf import * # noqa: F403 |
| 9 | +from policyengine_us_data.datasets.cps.cps import CPS, CPS_2024, CPS_2024_Full |
| 10 | +from policyengine_us_data.datasets.puf import PUF, PUF_2024 |
11 | 11 | from policyengine_us_data.storage import STORAGE_FOLDER |
| 12 | +from policyengine_us_data.utils.mortgage_interest import ( |
| 13 | + STRUCTURAL_MORTGAGE_VARIABLES, |
| 14 | + convert_mortgage_interest_to_structural_inputs, |
| 15 | + impute_tax_unit_mortgage_balance_hints, |
| 16 | +) |
| 17 | +from policyengine_us_data.utils.policyengine import has_policyengine_us_variables |
12 | 18 | from policyengine_us_data.utils.retirement_limits import ( |
13 | 19 | get_retirement_limits, |
14 | 20 | get_se_pension_limits, |
15 | 21 | ) |
16 | 22 |
|
17 | 23 | logger = logging.getLogger(__name__) |
18 | 24 |
|
| 25 | + |
| 26 | +def _supports_structural_mortgage_inputs() -> bool: |
| 27 | + return has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES) |
| 28 | + |
| 29 | + |
19 | 30 | # CPS-only variables that should be QRF-imputed for the PUF clone half |
20 | 31 | # instead of naively duplicated from the CPS donor. These are |
21 | 32 | # income-correlated variables that exist only in the CPS; demographics, |
@@ -445,6 +456,15 @@ def generate(self): |
445 | 456 | ) |
446 | 457 |
|
447 | 458 | new_data = self._rename_imputed_to_inputs(new_data) |
| 459 | + if _supports_structural_mortgage_inputs(): |
| 460 | + new_data = impute_tax_unit_mortgage_balance_hints( |
| 461 | + new_data, |
| 462 | + self.time_period, |
| 463 | + ) |
| 464 | + new_data = convert_mortgage_interest_to_structural_inputs( |
| 465 | + new_data, |
| 466 | + self.time_period, |
| 467 | + ) |
448 | 468 | new_data = self._drop_formula_variables(new_data) |
449 | 469 | self.save_dataset(new_data) |
450 | 470 |
|
@@ -472,11 +492,17 @@ def _rename_imputed_to_inputs(cls, data): |
472 | 492 | # due to entity shape mismatch. |
473 | 493 | _KEEP_FORMULA_VARS = { |
474 | 494 | "person_id", |
475 | | - "interest_deduction", |
476 | 495 | "self_employed_pension_contribution_ald", |
477 | 496 | "self_employed_health_insurance_ald", |
478 | 497 | } |
479 | 498 |
|
| 499 | + @classmethod |
| 500 | + def _keep_formula_vars(cls): |
| 501 | + keep = set(cls._KEEP_FORMULA_VARS) |
| 502 | + if not _supports_structural_mortgage_inputs(): |
| 503 | + keep.add("interest_deduction") |
| 504 | + return keep |
| 505 | + |
480 | 506 | # QRF imputes formula-level variables (e.g. taxable_pension_income) |
481 | 507 | # but we must store them under leaf input names so |
482 | 508 | # _drop_formula_variables doesn't discard them. The engine then |
@@ -526,7 +552,7 @@ def _drop_formula_variables(cls, data): |
526 | 552 | if (hasattr(var, "formulas") and len(var.formulas) > 0) |
527 | 553 | or getattr(var, "adds", None) |
528 | 554 | or getattr(var, "subtracts", None) |
529 | | - } - cls._KEEP_FORMULA_VARS |
| 555 | + } - cls._keep_formula_vars() |
530 | 556 | dropped = sorted(set(data.keys()) & formula_vars) |
531 | 557 | if dropped: |
532 | 558 | logger.info( |
|
0 commit comments