|
11 | 11 | ) |
12 | 12 | from policyengine_us_data.utils.source_quality import ( |
13 | 13 | filter_observed_source_rows, |
| 14 | + require_columns_present, |
14 | 15 | sipp_allocation_flag_for, |
15 | 16 | target_observed_source_masks, |
16 | 17 | ) |
17 | 18 |
|
18 | 19 |
|
19 | 20 | SIPP_JOB_OCCUPATION_COLUMNS = [f"TJB{i}_OCC" for i in range(1, 8)] |
20 | 21 | SIPP_TIP_AMOUNT_COLUMNS = [f"TJB{i}_TXAMT" for i in range(1, 8)] |
21 | | -SIPP_TIP_ALLOCATION_COLUMNS = [ |
22 | | - sipp_allocation_flag_for(column) for column in SIPP_TIP_AMOUNT_COLUMNS |
23 | | -] |
| 22 | +SIPP_TIP_AMOUNT_TO_ALLOCATION_COLUMN = { |
| 23 | + column: sipp_allocation_flag_for(column) for column in SIPP_TIP_AMOUNT_COLUMNS |
| 24 | +} |
| 25 | +SIPP_TIP_ALLOCATION_COLUMNS = list(SIPP_TIP_AMOUNT_TO_ALLOCATION_COLUMN.values()) |
24 | 26 | TIP_MODEL_PREDICTORS = [ |
25 | 27 | "employment_income", |
26 | 28 | "age", |
@@ -124,6 +126,14 @@ def train_tip_model(): |
124 | 126 | # AJB*_TXAMT Census allocation flags (small ints 0/1/2 indicating |
125 | 127 | # imputation status) and added them to the dollar totals. |
126 | 128 | tip_amount_columns = [column for column in SIPP_TIP_AMOUNT_COLUMNS if column in df] |
| 129 | + tip_allocation_columns = [ |
| 130 | + SIPP_TIP_AMOUNT_TO_ALLOCATION_COLUMN[column] for column in tip_amount_columns |
| 131 | + ] |
| 132 | + require_columns_present( |
| 133 | + df.columns, |
| 134 | + tip_allocation_columns, |
| 135 | + source_name="SIPP tip donor file", |
| 136 | + ) |
127 | 137 | df["tip_income"] = df[tip_amount_columns].fillna(0).sum(axis=1) * 12 |
128 | 138 | df["employment_income"] = df.TPTOTINC * 12 |
129 | 139 | df["is_under_18"] = (df.TAGE < 18) & (df.MONTHCODE == 12) |
@@ -159,7 +169,7 @@ def train_tip_model(): |
159 | 169 | df, |
160 | 170 | targets=["tip_income"], |
161 | 171 | target_source_columns={"tip_income": tip_amount_columns}, |
162 | | - target_allocation_flag_columns={"tip_income": SIPP_TIP_ALLOCATION_COLUMNS}, |
| 172 | + target_allocation_flag_columns={"tip_income": tip_allocation_columns}, |
163 | 173 | require_nonmissing_source=False, |
164 | 174 | ) |
165 | 175 |
|
|
0 commit comments