Skip to content

Commit 9b459b8

Browse files
authored
Merge pull request #1095 from PolicyEngine/codex/ssi-disability-status-imputation
Impute pre-SGA SSI disability criteria from SIPP
2 parents d06e225 + 045d6fd commit 9b459b8

13 files changed

Lines changed: 700 additions & 6 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Impute SSI disability criteria status before the SGA screen from SIPP for enhanced CPS datasets.

policyengine_us_data/calibration/source_impute.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@
3838
from policyengine_us_data.datasets.sipp.sipp import (
3939
ASSET_JOB_EARNINGS_COLUMNS,
4040
ASSET_PREDICTORS,
41+
SSI_DISABILITY_MODEL_VARIABLE,
4142
VEHICLE_MODEL_PREDICTORS,
4243
build_vehicle_training_frame,
44+
get_ssi_disability_model,
45+
predict_ssi_disability_criteria,
46+
preserve_under_65_ssi_disability_criteria,
4347
)
4448

4549
from policyengine_us_data.datasets.org import (
@@ -81,6 +85,7 @@
8185
"bank_account_assets",
8286
"stock_assets",
8387
"bond_assets",
88+
SSI_DISABILITY_MODEL_VARIABLE,
8489
"household_vehicles_owned",
8590
"household_vehicles_value",
8691
]
@@ -806,6 +811,83 @@ def _impute_sipp(
806811

807812
logger.info("SIPP asset imputation complete")
808813

814+
cps_ssi_df = _build_cps_receiver(
815+
data,
816+
time_period,
817+
dataset_path,
818+
[
819+
"employment_income",
820+
"interest_income",
821+
"dividend_income",
822+
"rental_income",
823+
"age",
824+
"is_male",
825+
"is_disabled",
826+
"social_security_disability",
827+
"disability_benefits",
828+
],
829+
)
830+
if "is_male" in cps_ssi_df.columns:
831+
cps_ssi_df["is_female"] = (~cps_ssi_df["is_male"].astype(bool)).astype(
832+
np.float32
833+
)
834+
else:
835+
cps_ssi_df["is_female"] = 0.0
836+
if "is_married" in data:
837+
cps_ssi_df["is_married"] = data["is_married"][time_period].astype(
838+
np.float32
839+
)
840+
else:
841+
cps_ssi_df["is_married"] = 0.0
842+
cps_ssi_df["count_under_18"] = (
843+
cps_tip_df["count_under_18"]
844+
if "count_under_18" in cps_tip_df.columns
845+
else 0.0
846+
)
847+
for var in asset_vars:
848+
cps_ssi_df[var] = data[var][time_period].astype(np.float32)
849+
for var in [
850+
"interest_income",
851+
"dividend_income",
852+
"rental_income",
853+
"is_disabled",
854+
"social_security_disability",
855+
]:
856+
if var not in cps_ssi_df.columns:
857+
cps_ssi_df[var] = data.get(var, {}).get(
858+
time_period, np.zeros(len(cps_ssi_df))
859+
)
860+
if "disability_benefits" in cps_ssi_df.columns:
861+
disability_benefits = cps_ssi_df["disability_benefits"]
862+
else:
863+
disability_benefits = data.get("disability_benefits", {}).get(
864+
time_period, np.zeros(len(cps_ssi_df))
865+
)
866+
cps_ssi_df["has_disability_income"] = (
867+
np.asarray(disability_benefits).astype(float) > 0
868+
)
869+
870+
ssi_disability_model = get_ssi_disability_model(time_period=time_period)
871+
meets_ssi_disability_criteria = predict_ssi_disability_criteria(
872+
ssi_disability_model,
873+
cps_ssi_df,
874+
)
875+
existing_meets_ssi_disability_criteria = data.get(
876+
SSI_DISABILITY_MODEL_VARIABLE, {}
877+
).get(time_period)
878+
ssi_reported = data.get("ssi_reported", {}).get(time_period)
879+
meets_ssi_disability_criteria = preserve_under_65_ssi_disability_criteria(
880+
meets_ssi_disability_criteria,
881+
age=data["age"][time_period],
882+
ssi_reported=ssi_reported,
883+
existing_meets_ssi_disability_criteria=existing_meets_ssi_disability_criteria,
884+
)
885+
data[SSI_DISABILITY_MODEL_VARIABLE] = {
886+
time_period: meets_ssi_disability_criteria
887+
}
888+
889+
logger.info("SIPP SSI disability criteria imputation complete")
890+
809891
vehicle_train = build_vehicle_training_frame()
810892
vehicle_train = vehicle_train.loc[
811893
rng.choice(

policyengine_us_data/datasets/cps/cps.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2680,6 +2680,40 @@ def add_tips(self, cps: h5py.File):
26802680
cps["stock_assets"] = asset_predictions.stock_assets.values
26812681
cps["bond_assets"] = asset_predictions.bond_assets.values
26822682

2683+
from policyengine_us_data.datasets.sipp import (
2684+
SSI_DISABILITY_MODEL_VARIABLE,
2685+
get_ssi_disability_model,
2686+
predict_ssi_disability_criteria,
2687+
preserve_under_65_ssi_disability_criteria,
2688+
)
2689+
2690+
n_persons = len(cps)
2691+
for variable in [
2692+
"is_disabled",
2693+
"social_security_disability",
2694+
]:
2695+
cps[variable] = np.asarray(
2696+
existing_data.get(variable, np.zeros(n_persons)),
2697+
)
2698+
disability_benefits = np.asarray(
2699+
existing_data.get("disability_benefits", np.zeros(n_persons)),
2700+
)
2701+
cps["has_disability_income"] = disability_benefits > 0
2702+
ssi_disability_model = get_ssi_disability_model()
2703+
meets_ssi_disability_criteria = predict_ssi_disability_criteria(
2704+
ssi_disability_model,
2705+
cps,
2706+
)
2707+
meets_ssi_disability_criteria = preserve_under_65_ssi_disability_criteria(
2708+
meets_ssi_disability_criteria,
2709+
age=existing_data.get("age", np.full(n_persons, 65)),
2710+
ssi_reported=existing_data.get("ssi_reported"),
2711+
existing_meets_ssi_disability_criteria=existing_data.get(
2712+
SSI_DISABILITY_MODEL_VARIABLE
2713+
),
2714+
)
2715+
cps[SSI_DISABILITY_MODEL_VARIABLE] = meets_ssi_disability_criteria
2716+
26832717
from policyengine_us_data.datasets.sipp import get_vehicle_model
26842718

26852719
vehicle_model = get_vehicle_model()
@@ -2717,6 +2751,7 @@ def add_tips(self, cps: h5py.File):
27172751
"is_under_18",
27182752
"is_under_6",
27192753
"is_household_head",
2754+
"has_disability_income",
27202755
"household_size",
27212756
"retirement_income",
27222757
"non_ssi_income",

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ def _supports_structural_mortgage_inputs() -> bool:
178178
"financial_assistance",
179179
"survivor_benefits",
180180
"disability_benefits",
181+
"meets_ssi_disability_criteria",
181182
"strike_benefits",
182183
"receives_wic",
183184
# SPM variables

policyengine_us_data/datasets/sipp/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@
55
get_tip_model,
66
train_asset_model,
77
get_asset_model,
8+
SSI_DISABILITY_MODEL_PREDICTORS,
9+
SSI_DISABILITY_MODEL_VARIABLE,
10+
apply_ssi_disability_signal_screen,
11+
build_ssi_disability_training_frame,
12+
coerce_ssi_disability_predictions,
13+
predict_ssi_disability_criteria,
14+
preserve_under_65_ssi_disability_criteria,
15+
prepare_ssi_disability_receiver,
16+
train_ssi_disability_model,
17+
get_ssi_disability_model,
818
build_vehicle_training_frame,
919
train_vehicle_model,
1020
get_vehicle_model,
@@ -17,6 +27,16 @@
1727
"get_tip_model",
1828
"train_asset_model",
1929
"get_asset_model",
30+
"SSI_DISABILITY_MODEL_PREDICTORS",
31+
"SSI_DISABILITY_MODEL_VARIABLE",
32+
"apply_ssi_disability_signal_screen",
33+
"build_ssi_disability_training_frame",
34+
"coerce_ssi_disability_predictions",
35+
"predict_ssi_disability_criteria",
36+
"preserve_under_65_ssi_disability_criteria",
37+
"prepare_ssi_disability_receiver",
38+
"train_ssi_disability_model",
39+
"get_ssi_disability_model",
2040
"build_vehicle_training_frame",
2141
"train_vehicle_model",
2242
"get_vehicle_model",

0 commit comments

Comments
 (0)