1212 CPS_2024_Full ,
1313 ESI_POLICYHOLDER_VARIABLE ,
1414 _open_dataset_read_only ,
15+ load_take_up_rate ,
1516)
17+ from policyengine_us_data .datasets .cps .takeup import prioritize_reported_recipients
1618from policyengine_us_data .datasets .org import (
1719 ORG_IMPUTED_VARIABLES ,
1820 apply_org_domain_constraints ,
3840 get_retirement_limits ,
3941 get_se_pension_limits ,
4042)
43+ from policyengine_us_data .utils .randomness import seeded_rng
4144
4245logger = logging .getLogger (__name__ )
4346
@@ -686,7 +689,7 @@ def reconcile_ss_subcomponents(predictions, total_ss):
686689 "rent" ,
687690 "spm_unit_capped_work_childcare_expenses" ,
688691}
689- _MIN_MODELED_HOUSING_SHARE_OF_BENCHMARK = 0.01
692+ _MIN_MODELED_HOUSING_SHARE_OF_BENCHMARK = 0.60
690693
691694
692695class _InMemoryTimePeriodDataset (Dataset ):
@@ -707,7 +710,11 @@ def load_dataset(self):
707710 return self ._data
708711
709712
710- def _load_raw_spm_capped_housing_subsidy (cps_dataset , time_period : int ):
713+ def _load_raw_spm_capped_housing_subsidy (
714+ cps_dataset ,
715+ time_period : int ,
716+ target_spm_unit_ids = None ,
717+ ):
711718 """Load Census SPM capped housing subsidy for validation only."""
712719
713720 raw_cps = getattr (cps_dataset , "raw_cps" , None )
@@ -719,6 +726,23 @@ def _load_raw_spm_capped_housing_subsidy(cps_dataset, time_period: int):
719726 if "SPM_CAPHOUSESUB" not in spm_unit .columns :
720727 return None
721728 values = np .asarray (spm_unit ["SPM_CAPHOUSESUB" ], dtype = float )
729+ if target_spm_unit_ids is not None :
730+ if "SPM_ID" in spm_unit .columns :
731+ raw_spm_unit_ids = np .asarray (spm_unit ["SPM_ID" ])
732+ else :
733+ raw_spm_unit_ids = np .asarray (spm_unit .index )
734+ raw_index = pd .Index (raw_spm_unit_ids .astype (str ))
735+ target_index = pd .Index (np .asarray (target_spm_unit_ids ).astype (str ))
736+ aligned = pd .Series (values , index = raw_index ).reindex (target_index )
737+ if aligned .isna ().any ():
738+ missing_count = int (aligned .isna ().sum ())
739+ logger .warning (
740+ "Skipping raw SPM capped housing subsidy validation benchmark "
741+ "because %d CPS SPM unit IDs are absent from raw ASEC." ,
742+ missing_count ,
743+ )
744+ return None
745+ values = aligned .to_numpy (dtype = float )
722746
723747 return {time_period : values }
724748
@@ -881,22 +905,16 @@ def generate(self):
881905 data_dict = {}
882906 for var in data :
883907 data_dict [var ] = {self .time_period : data [var ][...]}
908+ spm_unit_ids = data_dict .get ("spm_unit_id" , {}).get (self .time_period )
884909 raw_spm_capped_housing_subsidy = _load_raw_spm_capped_housing_subsidy (
885910 self .cps ,
886911 self .time_period ,
912+ target_spm_unit_ids = spm_unit_ids ,
887913 )
888914 if raw_spm_capped_housing_subsidy is not None :
889- source_values = raw_spm_capped_housing_subsidy [self .time_period ]
890- spm_unit_ids = data_dict .get ("spm_unit_id" , {}).get (self .time_period )
891- if spm_unit_ids is not None and len (source_values ) == len (spm_unit_ids ):
892- data_dict ["spm_unit_capped_housing_subsidy" ] = (
893- raw_spm_capped_housing_subsidy
894- )
895- else :
896- logger .warning (
897- "Skipping raw SPM capped housing subsidy validation benchmark "
898- "due to SPM unit length mismatch"
899- )
915+ data_dict ["spm_unit_capped_housing_subsidy" ] = (
916+ raw_spm_capped_housing_subsidy
917+ )
900918
901919 state_fips = data_dict ["state_fips" ][self .time_period ]
902920 county_fips = data_dict .get ("county_fips" , {}).get (self .time_period )
@@ -953,6 +971,10 @@ def generate(self):
953971 new_data = self ._impute_aotc_eligibility_inputs (new_data , self .time_period )
954972 new_data = self ._impute_llc_eligibility_inputs (new_data , self .time_period )
955973 new_data = self ._rename_imputed_to_inputs (new_data )
974+ new_data = self ._reassign_housing_assistance_takeup_with_geography (
975+ new_data ,
976+ self .time_period ,
977+ )
956978 new_data = self ._validate_housing_assistance_microsimulation (
957979 new_data ,
958980 self .time_period ,
@@ -1418,6 +1440,99 @@ def _validate_housing_assistance_microsimulation(
14181440 )
14191441 return data
14201442
1443+ @classmethod
1444+ def _reassign_housing_assistance_takeup_with_geography (
1445+ cls ,
1446+ data ,
1447+ time_period ,
1448+ microsimulation_cls = None ,
1449+ take_up_rate = None ,
1450+ draws = None ,
1451+ ):
1452+ """Recompute housing-assistance take-up after county assignment.
1453+
1454+ CPS add_takeup runs before the ExtendedCPS geography assignment, so
1455+ HUD income-limit eligibility can only anchor on reported recipients at
1456+ that point. Reassign here, after county_fips is present and after PUF
1457+ clone income variables have been spliced in, so reported recipients are
1458+ preserved while non-reported take-up is drawn from the full HUD-eligible
1459+ pool.
1460+ """
1461+
1462+ if "county_fips" not in data or time_period not in data ["county_fips" ]:
1463+ return data
1464+
1465+ receives = data .get ("receives_housing_assistance" , {}).get (time_period )
1466+ existing_takeup = data .get ("takes_up_housing_assistance_if_eligible" , {}).get (
1467+ time_period
1468+ )
1469+ if receives is None and existing_takeup is None :
1470+ return data
1471+
1472+ if microsimulation_cls is None :
1473+ from policyengine_us import Microsimulation
1474+
1475+ microsimulation_cls = Microsimulation
1476+
1477+ validation_data = {
1478+ variable : values
1479+ for variable , values in data .items ()
1480+ if variable not in _HOUSING_ASSISTANCE_FORMULA_OUTPUTS
1481+ }
1482+ simulation = microsimulation_cls (
1483+ dataset = _InMemoryTimePeriodDataset (validation_data , time_period )
1484+ )
1485+ eligible = simulation .calculate (
1486+ "is_eligible_for_housing_assistance" ,
1487+ time_period ,
1488+ )
1489+ eligible = np .asarray (getattr (eligible , "values" , eligible ), dtype = bool )
1490+ spm_unit_weight = simulation .calculate (
1491+ "spm_unit_weight" ,
1492+ time_period ,
1493+ use_weights = False ,
1494+ )
1495+ weights = np .asarray (
1496+ getattr (spm_unit_weight , "values" , spm_unit_weight ),
1497+ dtype = float ,
1498+ )
1499+
1500+ if receives is None :
1501+ receives = np .zeros_like (eligible , dtype = bool )
1502+ else :
1503+ receives = np .asarray (receives , dtype = bool )
1504+
1505+ if len (receives ) != len (eligible ):
1506+ raise ValueError (
1507+ "receives_housing_assistance length does not match HUD "
1508+ "eligibility length when reassigning housing assistance "
1509+ f"take-up: got { len (receives )} , expected { len (eligible )} ."
1510+ )
1511+
1512+ if draws is None :
1513+ rng = seeded_rng ("takes_up_housing_assistance_if_eligible" )
1514+ draws = rng .random (len (receives ))
1515+ if take_up_rate is None :
1516+ take_up_rate = load_take_up_rate ("housing_assistance" , time_period )
1517+
1518+ draws = np .asarray (draws )
1519+ reassigned_takeup = np .zeros_like (receives , dtype = bool )
1520+ assignment_groups = (weights > 0 , weights <= 0 )
1521+ for assignment_group in assignment_groups :
1522+ if not assignment_group .any ():
1523+ continue
1524+ reassigned_takeup [assignment_group ] = prioritize_reported_recipients (
1525+ receives [assignment_group ],
1526+ take_up_rate ,
1527+ draws [assignment_group ],
1528+ eligible_mask = eligible [assignment_group ],
1529+ )
1530+
1531+ data ["takes_up_housing_assistance_if_eligible" ] = {
1532+ time_period : reassigned_takeup
1533+ }
1534+ return data
1535+
14211536 @classmethod
14221537 def _drop_housing_assistance_formula_outputs (cls , data ):
14231538 """Remove housing assistance formula outputs after validation."""
0 commit comments