Skip to content

Commit 71468a4

Browse files
authored
Populate SPM subsidies and drop resource aggregates
1 parent 2ca687d commit 71468a4

23 files changed

Lines changed: 129 additions & 115 deletions
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Populate the SPM housing and energy subsidy concepts directly from CPS ASEC SPM fields, and keep Census SPM resource aggregates out of generated pipeline datasets.

policyengine_us_data/calibration/formulaic_inputs.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Formula outputs that must not be persisted as dataset leaf inputs."""
1+
"""SPM formula/output aggregates that must not be persisted as leaf inputs."""
22

33
FORMULAIC_SPM_INPUTS_TO_DROP = frozenset(
44
{
@@ -9,11 +9,13 @@
99
"spm_unit_is_in_deep_spm_poverty",
1010
"spm_unit_spm_threshold",
1111
"spm_unit_geographic_adjustment",
12+
"spm_unit_total_income_reported",
13+
"spm_unit_net_income_reported",
1214
}
1315
)
1416

1517

1618
def drop_formulaic_spm_inputs(variable_names: set[str]) -> None:
17-
"""Remove SPM formula outputs from a mutable variable-name set."""
19+
"""Remove SPM formula/output aggregates from a mutable variable-name set."""
1820

1921
variable_names.difference_update(FORMULAIC_SPM_INPUTS_TO_DROP)

policyengine_us_data/datasets/cps/census_cps.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,9 @@ class CensusCPS_2018(CensusCPS):
354354
"PTOTVAL",
355355
"OI_OFF",
356356
"OI_VAL",
357+
"ED_VAL",
358+
"FIN_VAL",
359+
"SRVS_VAL",
357360
"CSP_VAL",
358361
"PAW_VAL",
359362
"SSI_VAL",

policyengine_us_data/datasets/cps/cps.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -443,9 +443,7 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
443443
cps["rent"][mask] = imputed_values["rent"]
444444
# Assume zero housing assistance since
445445
cps["pre_subsidy_rent"] = cps["rent"]
446-
cps["housing_assistance"] = np.zeros_like(
447-
cps["spm_unit_capped_housing_subsidy_data"]
448-
)
446+
cps["housing_assistance"] = np.zeros_like(cps["spm_unit_capped_housing_subsidy"])
449447
cps["real_estate_taxes"] = np.zeros(len(cps["age"]), dtype=float)
450448
cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"]
451449

@@ -1276,9 +1274,19 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
12761274
cps["tax_exempt_ira_distributions"] = cps["roth_ira_distributions"]
12771275
# Other income (OI_VAL) is a catch-all for all other income sources.
12781276
# The code for alimony income is 20.
1279-
cps["alimony_income"] = (person.OI_OFF == 20) * person.OI_VAL
1277+
alimony_income = person.OI_OFF == 20
1278+
cps["alimony_income"] = alimony_income * person.OI_VAL
12801279
# The code for strike benefits is 12.
1281-
cps["strike_benefits"] = (person.OI_OFF == 12) * person.OI_VAL
1280+
strike_benefits = person.OI_OFF == 12
1281+
cps["strike_benefits"] = strike_benefits * person.OI_VAL
1282+
cps["miscellaneous_income"] = np.where(
1283+
alimony_income | strike_benefits,
1284+
0,
1285+
person.OI_VAL,
1286+
)
1287+
cps["educational_assistance"] = person.ED_VAL
1288+
cps["financial_assistance"] = person.FIN_VAL
1289+
cps["survivor_benefits"] = person.SRVS_VAL
12821290
cps["child_support_received"] = person.CSP_VAL
12831291
# CPS SSI receipt anchors SSI take-up and disability alignment inside
12841292
# add_takeup; it is dropped before the dataset is saved.
@@ -1414,12 +1422,10 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
14141422
)
14151423
def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
14161424
SPM_RENAMES = dict(
1417-
spm_unit_total_income_reported="SPM_TOTVAL",
14181425
snap_reported="SPM_SNAPSUB",
1419-
spm_unit_capped_housing_subsidy_data="SPM_CAPHOUSESUB",
1420-
spm_unit_energy_subsidy_data="SPM_ENGVAL",
1426+
spm_unit_capped_housing_subsidy="SPM_CAPHOUSESUB",
1427+
spm_unit_energy_subsidy="SPM_ENGVAL",
14211428
spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
1422-
spm_unit_net_income_reported="SPM_RESOURCES",
14231429
spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
14241430
)
14251431

policyengine_us_data/datasets/cps/enhanced_cps.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def compute_clone_diagnostics_summary(
8383
person_is_puf_clone,
8484
person_weight,
8585
person_in_poverty,
86-
person_reported_in_poverty,
8786
spm_unit_is_puf_clone,
8887
spm_unit_weight,
8988
spm_unit_capped_work_childcare_expenses,
@@ -96,7 +95,6 @@ def compute_clone_diagnostics_summary(
9695
person_is_puf_clone = np.asarray(person_is_puf_clone, dtype=bool)
9796
person_weight = np.asarray(person_weight, dtype=np.float64)
9897
person_in_poverty = np.asarray(person_in_poverty, dtype=bool)
99-
person_reported_in_poverty = np.asarray(person_reported_in_poverty, dtype=bool)
10098
spm_unit_is_puf_clone = np.asarray(spm_unit_is_puf_clone, dtype=bool)
10199
spm_unit_weight = np.asarray(spm_unit_weight, dtype=np.float64)
102100
capped_childcare = np.asarray(
@@ -108,7 +106,6 @@ def compute_clone_diagnostics_summary(
108106
spm_unit_taxes = np.asarray(spm_unit_taxes, dtype=np.float64)
109107
spm_unit_market_income = np.asarray(spm_unit_market_income, dtype=np.float64)
110108

111-
poor_modeled_only = person_in_poverty & ~person_reported_in_poverty
112109
clone_spm_weight = spm_unit_weight[spm_unit_is_puf_clone].sum()
113110

114111
return {
@@ -118,18 +115,10 @@ def compute_clone_diagnostics_summary(
118115
"clone_person_weight_share_pct": _weighted_share(
119116
person_is_puf_clone, person_weight
120117
),
121-
"clone_poor_modeled_only_person_weight_share_pct": _weighted_share(
122-
person_is_puf_clone & poor_modeled_only,
118+
"clone_poor_person_weight_share_pct": _weighted_share(
119+
person_is_puf_clone & person_in_poverty,
123120
person_weight,
124121
),
125-
"poor_modeled_only_within_clone_person_weight_share_pct": (
126-
0.0
127-
if person_weight[person_is_puf_clone].sum() <= 0
128-
else _weighted_share(
129-
poor_modeled_only[person_is_puf_clone],
130-
person_weight[person_is_puf_clone],
131-
)
132-
),
133122
"clone_childcare_exceeds_pre_subsidy_share_pct": (
134123
0.0
135124
if clone_spm_weight <= 0
@@ -269,12 +258,6 @@ def build_clone_diagnostics_for_simulation(
269258
weight inputs back from disk.
270259
"""
271260

272-
person_reported_in_poverty = _to_numpy(
273-
sim.calculate("spm_unit_net_income_reported", period=period, map_to="person")
274-
) < _to_numpy(
275-
sim.calculate("spm_unit_spm_threshold", period=period, map_to="person")
276-
)
277-
278261
return compute_clone_diagnostics_summary(
279262
household_is_puf_clone=_load_saved_period_array(
280263
dataset_path, "household_is_puf_clone", period
@@ -287,7 +270,6 @@ def build_clone_diagnostics_for_simulation(
287270
sim.calculate("household_weight", period=period, map_to="person")
288271
),
289272
person_in_poverty=_to_numpy(sim.calculate("person_in_poverty", period=period)),
290-
person_reported_in_poverty=person_reported_in_poverty,
291273
spm_unit_is_puf_clone=_load_saved_period_array(
292274
dataset_path, "spm_unit_is_puf_clone", period
293275
),

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,14 +167,15 @@ def _supports_structural_mortgage_inputs() -> bool:
167167
"child_support_received",
168168
"veterans_benefits",
169169
"workers_compensation",
170+
"educational_assistance",
171+
"financial_assistance",
172+
"survivor_benefits",
170173
"disability_benefits",
171174
"strike_benefits",
172175
"receives_wic",
173176
# SPM variables
174-
"spm_unit_total_income_reported",
175-
"spm_unit_capped_housing_subsidy_data",
176-
"spm_unit_energy_subsidy_data",
177-
"spm_unit_net_income_reported",
177+
"spm_unit_capped_housing_subsidy",
178+
"spm_unit_energy_subsidy",
178179
"spm_unit_pre_subsidy_childcare_expenses",
179180
# Medical expenses
180181
"employer_sponsored_insurance_premiums",

policyengine_us_data/db/etl_national_targets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -476,15 +476,15 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
476476
"year": 2024,
477477
},
478478
{
479-
"constraint_variable": "spm_unit_energy_subsidy_data",
479+
"constraint_variable": "spm_unit_energy_subsidy",
480480
"target_variable": "household_count",
481481
"household_count": 5_939_605,
482482
"source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2023/FY2023AllStates%28National%29Profile-508Compliant.pdf",
483483
"notes": "LIHEAP total households served by state programs",
484484
"year": 2023,
485485
},
486486
{
487-
"constraint_variable": "spm_unit_energy_subsidy_data",
487+
"constraint_variable": "spm_unit_energy_subsidy",
488488
"target_variable": "household_count",
489489
"household_count": 5_876_646,
490490
"source": "https://liheappm.acf.gov/sites/default/files/private/congress/profiles/2024/FY2024_AllStates%28National%29_Profile.pdf",
@@ -903,7 +903,7 @@ def load_national_targets(
903903
stratum_notes = "National ACA Premium Tax Credit Recipients"
904904
constraint_operation = ">"
905905
constraint_value = "0"
906-
elif constraint_var == "spm_unit_energy_subsidy_data":
906+
elif constraint_var == "spm_unit_energy_subsidy":
907907
stratum_notes = "National LIHEAP Recipient Households"
908908
constraint_operation = ">"
909909
constraint_value = "0"

policyengine_us_data/storage/upload_completed_datasets.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ class MicrosimulationAggregateCheck:
9999
"free_school_meals_reported",
100100
"reduced_price_school_meals_reported",
101101
"spm_unit_wic_reported",
102+
"spm_unit_total_income_reported",
103+
"spm_unit_net_income_reported",
102104
"spm_unit_broadband_subsidy",
103105
"spm_unit_broadband_subsidy_reported",
104106
"spm_unit_payroll_tax_reported",
@@ -185,8 +187,7 @@ class MicrosimulationAggregateCheck:
185187
CLONE_DIAGNOSTICS_METRICS = {
186188
"clone_household_weight_share_pct",
187189
"clone_person_weight_share_pct",
188-
"clone_poor_modeled_only_person_weight_share_pct",
189-
"poor_modeled_only_within_clone_person_weight_share_pct",
190+
"clone_poor_person_weight_share_pct",
190191
"clone_childcare_exceeds_pre_subsidy_share_pct",
191192
"clone_childcare_above_5000_share_pct",
192193
"clone_taxes_exceed_market_income_share_pct",

policyengine_us_data/utils/national_target_parity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,9 @@ def classify_national_target(
482482
target_name,
483483
index.match(
484484
variable="household_count",
485-
domain_variable="spm_unit_energy_subsidy_data",
485+
domain_variable="spm_unit_energy_subsidy",
486486
period=period,
487-
constraints=[_constraint("spm_unit_energy_subsidy_data", ">", 0)],
487+
constraints=[_constraint("spm_unit_energy_subsidy", ">", 0)],
488488
),
489489
reason="structured_liheap_target",
490490
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ classifiers = [
2222
"Programming Language :: Python :: 3.14",
2323
]
2424
dependencies = [
25-
"policyengine-us==1.691.12",
25+
"policyengine-us @ git+https://github.com/PolicyEngine/policyengine-us@4588f756668f12cac43e847a73e6a1f38b0b296d",
2626
# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
2727
# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
2828
# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.

0 commit comments

Comments
 (0)