2828
2929import numpy as np
3030import pandas as pd
31+ from policyengine_us_data .datasets .cps .tipped_occupation import (
32+ derive_any_treasury_tipped_occupation_code ,
33+ derive_is_tipped_occupation ,
34+ )
3135
3236from policyengine_us_data .datasets .org import (
3337 ORG_BOOL_VARIABLES ,
8084 "age" ,
8185 "count_under_18" ,
8286 "count_under_6" ,
87+ "is_tipped_occupation" ,
8388]
8489
8590SIPP_ASSETS_PREDICTORS = [
112117 "NONE" : 0 ,
113118}
114119
120+ SIPP_JOB_OCCUPATION_COLUMNS = [f"TJB{ i } _OCC" for i in range (1 , 8 )]
121+
115122
116123def _encode_tenure_type (df : pd .DataFrame ) -> pd .DataFrame :
117124 """Convert tenure_type enum strings to numeric codes."""
@@ -384,6 +391,12 @@ def _impute_sipp(
384391 sipp_df ["age" ] = sipp_df .TAGE
385392 sipp_df ["household_weight" ] = sipp_df .WPFINWGT
386393 sipp_df ["household_id" ] = sipp_df .SSUID
394+ sipp_df ["treasury_tipped_occupation_code" ] = (
395+ derive_any_treasury_tipped_occupation_code (sipp_df [SIPP_JOB_OCCUPATION_COLUMNS ])
396+ )
397+ sipp_df ["is_tipped_occupation" ] = derive_is_tipped_occupation (
398+ sipp_df ["treasury_tipped_occupation_code" ]
399+ )
387400
388401 sipp_df ["is_under_18" ] = sipp_df .TAGE < 18
389402 sipp_df ["is_under_6" ] = sipp_df .TAGE < 6
@@ -401,6 +414,7 @@ def _impute_sipp(
401414 "count_under_18" ,
402415 "count_under_6" ,
403416 "age" ,
417+ "is_tipped_occupation" ,
404418 "household_weight" ,
405419 ]
406420 tip_train = sipp_df [tip_cols ].dropna ()
@@ -431,6 +445,12 @@ def _impute_sipp(
431445 else :
432446 cps_tip_df ["count_under_18" ] = 0.0
433447 cps_tip_df ["count_under_6" ] = 0.0
448+ if "treasury_tipped_occupation_code" in data :
449+ cps_tip_df ["is_tipped_occupation" ] = derive_is_tipped_occupation (
450+ data ["treasury_tipped_occupation_code" ][time_period ]
451+ ).astype (np .float32 )
452+ else :
453+ cps_tip_df ["is_tipped_occupation" ] = 0.0
434454
435455 qrf = QRF ()
436456 logger .info (
0 commit comments