5151 SSI_DISABILITY_EXPORT_VARIABLES ,
5252 VEHICLE_MODEL_PREDICTORS ,
5353 build_vehicle_training_frame ,
54+ ensure_sipp_file ,
5455 get_ssi_disability_model ,
5556 predict_ssi_disability_criteria ,
5657 preserve_under_65_ssi_disability_criteria ,
@@ -663,16 +664,26 @@ def _impute_sipp(
663664 Returns:
664665 Updated data dict.
665666 """
666- from huggingface_hub import hf_hub_download
667- from policyengine_us_data .storage import STORAGE_FOLDER
668-
669- hf_hub_download (
670- repo_id = "PolicyEngine/policyengine-us-data" ,
671- filename = "pu2023_slim.csv" ,
672- repo_type = "model" ,
673- local_dir = STORAGE_FOLDER ,
667+ tip_cols = (
668+ [
669+ "SSUID" ,
670+ "MONTHCODE" ,
671+ "WPFINWGT" ,
672+ "TAGE" ,
673+ "TPTOTINC" ,
674+ ]
675+ + SIPP_JOB_OCCUPATION_COLUMNS
676+ + SIPP_TIP_AMOUNT_COLUMNS
677+ + [
678+ SIPP_TIP_AMOUNT_TO_ALLOCATION_COLUMN [column ]
679+ for column in SIPP_TIP_AMOUNT_COLUMNS
680+ ]
681+ )
682+ sipp_df = pd .read_csv (
683+ ensure_sipp_file (),
684+ delimiter = "|" ,
685+ usecols = tip_cols ,
674686 )
675- sipp_df = pd .read_csv (STORAGE_FOLDER / "pu2023_slim.csv" )
676687
677688 tip_amount_columns = [
678689 column for column in SIPP_TIP_AMOUNT_COLUMNS if column in sipp_df
@@ -788,12 +799,6 @@ def _impute_sipp(
788799
789800 # Asset imputation
790801 try :
791- hf_hub_download (
792- repo_id = "PolicyEngine/policyengine-us-data" ,
793- filename = "pu2023.csv" ,
794- repo_type = "model" ,
795- local_dir = STORAGE_FOLDER ,
796- )
797802 asset_cols = (
798803 [
799804 "SSUID" ,
@@ -817,7 +822,7 @@ def _impute_sipp(
817822 + SIPP_ASSET_ALLOCATION_COLUMNS
818823 )
819824 asset_df = pd .read_csv (
820- STORAGE_FOLDER / "pu2023.csv" ,
825+ ensure_sipp_file () ,
821826 delimiter = "|" ,
822827 usecols = asset_cols ,
823828 )
0 commit comments