Skip to content

Commit 25792eb

Browse files
authored
Reassign housing assistance take-up after geography (#1000)
1 parent 78b18c9 commit 25792eb

3 files changed

Lines changed: 254 additions & 13 deletions

File tree

changelog.d/1000.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Recompute housing assistance take-up after county geography is assigned.

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 128 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
CPS_2024_Full,
1313
ESI_POLICYHOLDER_VARIABLE,
1414
_open_dataset_read_only,
15+
load_take_up_rate,
1516
)
17+
from policyengine_us_data.datasets.cps.takeup import prioritize_reported_recipients
1618
from policyengine_us_data.datasets.org import (
1719
ORG_IMPUTED_VARIABLES,
1820
apply_org_domain_constraints,
@@ -38,6 +40,7 @@
3840
get_retirement_limits,
3941
get_se_pension_limits,
4042
)
43+
from policyengine_us_data.utils.randomness import seeded_rng
4144

4245
logger = logging.getLogger(__name__)
4346

@@ -686,7 +689,7 @@ def reconcile_ss_subcomponents(predictions, total_ss):
686689
"rent",
687690
"spm_unit_capped_work_childcare_expenses",
688691
}
689-
_MIN_MODELED_HOUSING_SHARE_OF_BENCHMARK = 0.01
692+
_MIN_MODELED_HOUSING_SHARE_OF_BENCHMARK = 0.60
690693

691694

692695
class _InMemoryTimePeriodDataset(Dataset):
@@ -707,7 +710,11 @@ def load_dataset(self):
707710
return self._data
708711

709712

710-
def _load_raw_spm_capped_housing_subsidy(cps_dataset, time_period: int):
713+
def _load_raw_spm_capped_housing_subsidy(
714+
cps_dataset,
715+
time_period: int,
716+
target_spm_unit_ids=None,
717+
):
711718
"""Load Census SPM capped housing subsidy for validation only."""
712719

713720
raw_cps = getattr(cps_dataset, "raw_cps", None)
@@ -719,6 +726,23 @@ def _load_raw_spm_capped_housing_subsidy(cps_dataset, time_period: int):
719726
if "SPM_CAPHOUSESUB" not in spm_unit.columns:
720727
return None
721728
values = np.asarray(spm_unit["SPM_CAPHOUSESUB"], dtype=float)
729+
if target_spm_unit_ids is not None:
730+
if "SPM_ID" in spm_unit.columns:
731+
raw_spm_unit_ids = np.asarray(spm_unit["SPM_ID"])
732+
else:
733+
raw_spm_unit_ids = np.asarray(spm_unit.index)
734+
raw_index = pd.Index(raw_spm_unit_ids.astype(str))
735+
target_index = pd.Index(np.asarray(target_spm_unit_ids).astype(str))
736+
aligned = pd.Series(values, index=raw_index).reindex(target_index)
737+
if aligned.isna().any():
738+
missing_count = int(aligned.isna().sum())
739+
logger.warning(
740+
"Skipping raw SPM capped housing subsidy validation benchmark "
741+
"because %d CPS SPM unit IDs are absent from raw ASEC.",
742+
missing_count,
743+
)
744+
return None
745+
values = aligned.to_numpy(dtype=float)
722746

723747
return {time_period: values}
724748

@@ -881,22 +905,16 @@ def generate(self):
881905
data_dict = {}
882906
for var in data:
883907
data_dict[var] = {self.time_period: data[var][...]}
908+
spm_unit_ids = data_dict.get("spm_unit_id", {}).get(self.time_period)
884909
raw_spm_capped_housing_subsidy = _load_raw_spm_capped_housing_subsidy(
885910
self.cps,
886911
self.time_period,
912+
target_spm_unit_ids=spm_unit_ids,
887913
)
888914
if raw_spm_capped_housing_subsidy is not None:
889-
source_values = raw_spm_capped_housing_subsidy[self.time_period]
890-
spm_unit_ids = data_dict.get("spm_unit_id", {}).get(self.time_period)
891-
if spm_unit_ids is not None and len(source_values) == len(spm_unit_ids):
892-
data_dict["spm_unit_capped_housing_subsidy"] = (
893-
raw_spm_capped_housing_subsidy
894-
)
895-
else:
896-
logger.warning(
897-
"Skipping raw SPM capped housing subsidy validation benchmark "
898-
"due to SPM unit length mismatch"
899-
)
915+
data_dict["spm_unit_capped_housing_subsidy"] = (
916+
raw_spm_capped_housing_subsidy
917+
)
900918

901919
state_fips = data_dict["state_fips"][self.time_period]
902920
county_fips = data_dict.get("county_fips", {}).get(self.time_period)
@@ -953,6 +971,10 @@ def generate(self):
953971
new_data = self._impute_aotc_eligibility_inputs(new_data, self.time_period)
954972
new_data = self._impute_llc_eligibility_inputs(new_data, self.time_period)
955973
new_data = self._rename_imputed_to_inputs(new_data)
974+
new_data = self._reassign_housing_assistance_takeup_with_geography(
975+
new_data,
976+
self.time_period,
977+
)
956978
new_data = self._validate_housing_assistance_microsimulation(
957979
new_data,
958980
self.time_period,
@@ -1418,6 +1440,99 @@ def _validate_housing_assistance_microsimulation(
14181440
)
14191441
return data
14201442

1443+
@classmethod
1444+
def _reassign_housing_assistance_takeup_with_geography(
1445+
cls,
1446+
data,
1447+
time_period,
1448+
microsimulation_cls=None,
1449+
take_up_rate=None,
1450+
draws=None,
1451+
):
1452+
"""Recompute housing-assistance take-up after county assignment.
1453+
1454+
CPS add_takeup runs before the ExtendedCPS geography assignment, so
1455+
HUD income-limit eligibility can only anchor on reported recipients at
1456+
that point. Reassign here, after county_fips is present and after PUF
1457+
clone income variables have been spliced in, so reported recipients are
1458+
preserved while non-reported take-up is drawn from the full HUD-eligible
1459+
pool.
1460+
"""
1461+
1462+
if "county_fips" not in data or time_period not in data["county_fips"]:
1463+
return data
1464+
1465+
receives = data.get("receives_housing_assistance", {}).get(time_period)
1466+
existing_takeup = data.get("takes_up_housing_assistance_if_eligible", {}).get(
1467+
time_period
1468+
)
1469+
if receives is None and existing_takeup is None:
1470+
return data
1471+
1472+
if microsimulation_cls is None:
1473+
from policyengine_us import Microsimulation
1474+
1475+
microsimulation_cls = Microsimulation
1476+
1477+
validation_data = {
1478+
variable: values
1479+
for variable, values in data.items()
1480+
if variable not in _HOUSING_ASSISTANCE_FORMULA_OUTPUTS
1481+
}
1482+
simulation = microsimulation_cls(
1483+
dataset=_InMemoryTimePeriodDataset(validation_data, time_period)
1484+
)
1485+
eligible = simulation.calculate(
1486+
"is_eligible_for_housing_assistance",
1487+
time_period,
1488+
)
1489+
eligible = np.asarray(getattr(eligible, "values", eligible), dtype=bool)
1490+
spm_unit_weight = simulation.calculate(
1491+
"spm_unit_weight",
1492+
time_period,
1493+
use_weights=False,
1494+
)
1495+
weights = np.asarray(
1496+
getattr(spm_unit_weight, "values", spm_unit_weight),
1497+
dtype=float,
1498+
)
1499+
1500+
if receives is None:
1501+
receives = np.zeros_like(eligible, dtype=bool)
1502+
else:
1503+
receives = np.asarray(receives, dtype=bool)
1504+
1505+
if len(receives) != len(eligible):
1506+
raise ValueError(
1507+
"receives_housing_assistance length does not match HUD "
1508+
"eligibility length when reassigning housing assistance "
1509+
f"take-up: got {len(receives)}, expected {len(eligible)}."
1510+
)
1511+
1512+
if draws is None:
1513+
rng = seeded_rng("takes_up_housing_assistance_if_eligible")
1514+
draws = rng.random(len(receives))
1515+
if take_up_rate is None:
1516+
take_up_rate = load_take_up_rate("housing_assistance", time_period)
1517+
1518+
draws = np.asarray(draws)
1519+
reassigned_takeup = np.zeros_like(receives, dtype=bool)
1520+
assignment_groups = (weights > 0, weights <= 0)
1521+
for assignment_group in assignment_groups:
1522+
if not assignment_group.any():
1523+
continue
1524+
reassigned_takeup[assignment_group] = prioritize_reported_recipients(
1525+
receives[assignment_group],
1526+
take_up_rate,
1527+
draws[assignment_group],
1528+
eligible_mask=eligible[assignment_group],
1529+
)
1530+
1531+
data["takes_up_housing_assistance_if_eligible"] = {
1532+
time_period: reassigned_takeup
1533+
}
1534+
return data
1535+
14211536
@classmethod
14221537
def _drop_housing_assistance_formula_outputs(cls, data):
14231538
"""Remove housing assistance formula outputs after validation."""

tests/unit/test_extended_cps.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
4. Post-processing constraints enforce IRS caps and SS normalization
88
"""
99

10+
from contextlib import contextmanager
11+
1012
import numpy as np
1113
import pandas as pd
1214
import pytest
@@ -24,6 +26,7 @@
2426
CPS_STAGE2_DEMOGRAPHIC_PREDICTORS,
2527
CPS_STAGE2_INCOME_PREDICTORS,
2628
ExtendedCPS,
29+
_load_raw_spm_capped_housing_subsidy,
2730
_apply_post_processing,
2831
_build_clone_test_frame,
2932
_derive_overtime_occupation_inputs,
@@ -58,6 +61,10 @@ def calculate(self, variable, period, **kwargs):
5861
return type(self).outputs[variable]
5962

6063

64+
class _FakeCPSDataset:
65+
raw_cps = object()
66+
67+
6168
class TestVariableListConsistency:
6269
"""Variable lists should not overlap — no variable should be
6370
imputed by two different mechanisms."""
@@ -66,6 +73,34 @@ def test_no_overlap_imputed_and_cps_only(self):
6673
overlap = set(IMPUTED_VARIABLES) & set(CPS_ONLY_IMPUTED_VARIABLES)
6774
assert overlap == set(), f"Variables in both IMPUTED and CPS_ONLY: {overlap}"
6875

76+
def test_load_raw_spm_capped_housing_subsidy_aligns_to_spm_unit_ids(
77+
self, monkeypatch
78+
):
79+
raw_spm_unit = pd.DataFrame(
80+
{
81+
"SPM_ID": [10, 20, 30],
82+
"SPM_CAPHOUSESUB": [100.0, 200.0, 300.0],
83+
}
84+
)
85+
86+
@contextmanager
87+
def fake_open_dataset_read_only(dataset_source):
88+
yield {"spm_unit": raw_spm_unit}
89+
90+
monkeypatch.setattr(
91+
extended_cps_module,
92+
"_open_dataset_read_only",
93+
fake_open_dataset_read_only,
94+
)
95+
96+
result = _load_raw_spm_capped_housing_subsidy(
97+
_FakeCPSDataset,
98+
2024,
99+
target_spm_unit_ids=np.array([30, 10]),
100+
)
101+
102+
assert result[2024].tolist() == [300.0, 100.0]
103+
69104
def test_no_overlap_overridden_and_cps_only(self):
70105
overlap = set(OVERRIDDEN_IMPUTED_VARIABLES) & set(CPS_ONLY_IMPUTED_VARIABLES)
71106
assert overlap == set(), f"Variables in both OVERRIDDEN and CPS_ONLY: {overlap}"
@@ -367,6 +402,96 @@ def test_housing_assistance_validation_rejects_tiny_reported_match(self):
367402
microsimulation_cls=_FakeHousingMicrosimulation,
368403
)
369404

405+
def test_housing_assistance_validation_rejects_half_reported_match(self):
406+
data = {
407+
"receives_housing_assistance": {2024: np.array([True])},
408+
"takes_up_housing_assistance_if_eligible": {2024: np.array([True])},
409+
"spm_unit_capped_housing_subsidy": {2024: np.array([100.0])},
410+
}
411+
_FakeHousingMicrosimulation.outputs = {
412+
"housing_assistance": np.array([100.0]),
413+
"spm_unit_capped_housing_subsidy": np.array([59.0]),
414+
"spm_unit_weight": np.array([1.0]),
415+
}
416+
417+
with pytest.raises(RuntimeError, match="implausibly small"):
418+
ExtendedCPS._validate_housing_assistance_microsimulation(
419+
data,
420+
2024,
421+
microsimulation_cls=_FakeHousingMicrosimulation,
422+
)
423+
424+
def test_reassign_housing_assistance_takeup_uses_geographic_eligibility(self):
425+
data = {
426+
"county_fips": {2024: np.array([1001, 1003, 1005, 1007])},
427+
"receives_housing_assistance": {
428+
2024: np.array([True, False, False, False])
429+
},
430+
"takes_up_housing_assistance_if_eligible": {
431+
2024: np.array([True, False, False, False])
432+
},
433+
"housing_assistance": {2024: np.array([99_000.0] * 4)},
434+
"spm_unit_capped_housing_subsidy": {2024: np.array([99_000.0] * 4)},
435+
}
436+
_FakeHousingMicrosimulation.outputs = {
437+
"is_eligible_for_housing_assistance": np.array([True, True, True, False]),
438+
"spm_unit_weight": np.array([1.0, 1.0, 1.0, 1.0]),
439+
}
440+
441+
result = ExtendedCPS._reassign_housing_assistance_takeup_with_geography(
442+
data,
443+
2024,
444+
microsimulation_cls=_FakeHousingMicrosimulation,
445+
take_up_rate=0.75,
446+
draws=np.array([0.0, 0.9, 0.0, 0.0]),
447+
)
448+
449+
assert result["takes_up_housing_assistance_if_eligible"][2024].tolist() == [
450+
True,
451+
False,
452+
True,
453+
False,
454+
]
455+
assert "housing_assistance" not in _FakeHousingMicrosimulation.seen_data
456+
assert (
457+
"spm_unit_capped_housing_subsidy"
458+
not in _FakeHousingMicrosimulation.seen_data
459+
)
460+
461+
def test_reassign_housing_assistance_takeup_separates_zero_weight_clones(self):
462+
data = {
463+
"county_fips": {2024: np.arange(6)},
464+
"receives_housing_assistance": {
465+
2024: np.array([True, False, False, True, False, False])
466+
},
467+
"takes_up_housing_assistance_if_eligible": {
468+
2024: np.array([True, False, False, True, False, False])
469+
},
470+
}
471+
_FakeHousingMicrosimulation.outputs = {
472+
"is_eligible_for_housing_assistance": np.array(
473+
[True, True, True, True, True, True]
474+
),
475+
"spm_unit_weight": np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0]),
476+
}
477+
478+
result = ExtendedCPS._reassign_housing_assistance_takeup_with_geography(
479+
data,
480+
2024,
481+
microsimulation_cls=_FakeHousingMicrosimulation,
482+
take_up_rate=2 / 3,
483+
draws=np.array([0.0, 0.0, 0.9, 0.0, 0.0, 0.9]),
484+
)
485+
486+
assert result["takes_up_housing_assistance_if_eligible"][2024].tolist() == [
487+
True,
488+
True,
489+
False,
490+
True,
491+
True,
492+
False,
493+
]
494+
370495
def test_drop_housing_assistance_formula_outputs_after_validation(self):
371496
data = {
372497
"housing_assistance": {2024: np.array([1_000.0])},

0 commit comments

Comments
 (0)