|
38 | 38 | from policyengine_us_data.datasets.sipp.sipp import ( |
39 | 39 | ASSET_JOB_EARNINGS_COLUMNS, |
40 | 40 | ASSET_PREDICTORS, |
| 41 | + SSI_DISABILITY_MODEL_VARIABLE, |
41 | 42 | VEHICLE_MODEL_PREDICTORS, |
42 | 43 | build_vehicle_training_frame, |
| 44 | + get_ssi_disability_model, |
| 45 | + predict_ssi_disability_criteria, |
| 46 | + preserve_under_65_ssi_disability_criteria, |
43 | 47 | ) |
44 | 48 |
|
45 | 49 | from policyengine_us_data.datasets.org import ( |
|
81 | 85 | "bank_account_assets", |
82 | 86 | "stock_assets", |
83 | 87 | "bond_assets", |
| 88 | + SSI_DISABILITY_MODEL_VARIABLE, |
84 | 89 | "household_vehicles_owned", |
85 | 90 | "household_vehicles_value", |
86 | 91 | ] |
@@ -806,6 +811,83 @@ def _impute_sipp( |
806 | 811 |
|
807 | 812 | logger.info("SIPP asset imputation complete") |
808 | 813 |
|
| 814 | + cps_ssi_df = _build_cps_receiver( |
| 815 | + data, |
| 816 | + time_period, |
| 817 | + dataset_path, |
| 818 | + [ |
| 819 | + "employment_income", |
| 820 | + "interest_income", |
| 821 | + "dividend_income", |
| 822 | + "rental_income", |
| 823 | + "age", |
| 824 | + "is_male", |
| 825 | + "is_disabled", |
| 826 | + "social_security_disability", |
| 827 | + "disability_benefits", |
| 828 | + ], |
| 829 | + ) |
| 830 | + if "is_male" in cps_ssi_df.columns: |
| 831 | + cps_ssi_df["is_female"] = (~cps_ssi_df["is_male"].astype(bool)).astype( |
| 832 | + np.float32 |
| 833 | + ) |
| 834 | + else: |
| 835 | + cps_ssi_df["is_female"] = 0.0 |
| 836 | + if "is_married" in data: |
| 837 | + cps_ssi_df["is_married"] = data["is_married"][time_period].astype( |
| 838 | + np.float32 |
| 839 | + ) |
| 840 | + else: |
| 841 | + cps_ssi_df["is_married"] = 0.0 |
| 842 | + cps_ssi_df["count_under_18"] = ( |
| 843 | + cps_tip_df["count_under_18"] |
| 844 | + if "count_under_18" in cps_tip_df.columns |
| 845 | + else 0.0 |
| 846 | + ) |
| 847 | + for var in asset_vars: |
| 848 | + cps_ssi_df[var] = data[var][time_period].astype(np.float32) |
| 849 | + for var in [ |
| 850 | + "interest_income", |
| 851 | + "dividend_income", |
| 852 | + "rental_income", |
| 853 | + "is_disabled", |
| 854 | + "social_security_disability", |
| 855 | + ]: |
| 856 | + if var not in cps_ssi_df.columns: |
| 857 | + cps_ssi_df[var] = data.get(var, {}).get( |
| 858 | + time_period, np.zeros(len(cps_ssi_df)) |
| 859 | + ) |
| 860 | + if "disability_benefits" in cps_ssi_df.columns: |
| 861 | + disability_benefits = cps_ssi_df["disability_benefits"] |
| 862 | + else: |
| 863 | + disability_benefits = data.get("disability_benefits", {}).get( |
| 864 | + time_period, np.zeros(len(cps_ssi_df)) |
| 865 | + ) |
| 866 | + cps_ssi_df["has_disability_income"] = ( |
| 867 | + np.asarray(disability_benefits).astype(float) > 0 |
| 868 | + ) |
| 869 | + |
| 870 | + ssi_disability_model = get_ssi_disability_model(time_period=time_period) |
| 871 | + meets_ssi_disability_criteria = predict_ssi_disability_criteria( |
| 872 | + ssi_disability_model, |
| 873 | + cps_ssi_df, |
| 874 | + ) |
| 875 | + existing_meets_ssi_disability_criteria = data.get( |
| 876 | + SSI_DISABILITY_MODEL_VARIABLE, {} |
| 877 | + ).get(time_period) |
| 878 | + ssi_reported = data.get("ssi_reported", {}).get(time_period) |
| 879 | + meets_ssi_disability_criteria = preserve_under_65_ssi_disability_criteria( |
| 880 | + meets_ssi_disability_criteria, |
| 881 | + age=data["age"][time_period], |
| 882 | + ssi_reported=ssi_reported, |
| 883 | + existing_meets_ssi_disability_criteria=existing_meets_ssi_disability_criteria, |
| 884 | + ) |
| 885 | + data[SSI_DISABILITY_MODEL_VARIABLE] = { |
| 886 | + time_period: meets_ssi_disability_criteria |
| 887 | + } |
| 888 | + |
| 889 | + logger.info("SIPP SSI disability criteria imputation complete") |
| 890 | + |
809 | 891 | vehicle_train = build_vehicle_training_frame() |
810 | 892 | vehicle_train = vehicle_train.loc[ |
811 | 893 | rng.choice( |
|
0 commit comments