|
1 | 1 | """ |
2 | 2 | Salary sacrifice imputation for pension contributions. |
3 | 3 |
|
4 | | -This module imputes salary sacrifice pension amounts using QRF trained on |
5 | | -FRS respondents who were asked the SALSAC question. The model predicts |
6 | | -the continuous amount (pension_contributions_via_salary_sacrifice), with |
7 | | -non-participants naturally having 0. |
| 4 | +Two-stage imputation: |
8 | 5 |
|
9 | | -Training data (FRS 2023-24): |
10 | | -- SALSAC='1' (Yes): ~224 jobs with reported SPNAMT amounts |
11 | | -- SALSAC='2' (No): ~3,803 jobs with SPNAMT=0 |
| 6 | +1. QRF trained on FRS respondents who were asked SALSAC (~224 yes, |
| 7 | + ~3,803 no). Predicts SS amounts for ~13,265 jobs where SALSAC was |
| 8 | + not asked. |
12 | 9 |
|
13 | | -Imputation candidates: |
14 | | -- SALSAC=' ' (skip/not asked): ~13,265 jobs |
| 10 | +2. Headcount-targeted imputation: converts a fraction of pension |
| 11 | + contributors without SS into below-cap (≤£2,000) SS users, moving |
| 12 | + employee pension contributions to salary sacrifice. Targets the |
| 13 | + OBR/ASHE estimate of ~4.3mn below-cap SS users. |
15 | 14 |
|
16 | | -Targeting to HMRC totals (~24bn SS contributions) happens via weight |
17 | | -calibration, not in this imputation step. |
| 15 | +Exact monetary totals (~£24bn SS contributions) and final headcount |
| 16 | +calibration happen via weight optimisation in a subsequent step. |
18 | 17 | """ |
19 | 18 |
|
20 | 19 | import pandas as pd |
@@ -124,13 +123,10 @@ def impute_salary_sacrifice( |
124 | 123 | """ |
125 | 124 | Impute salary sacrifice pension amounts for FRS non-respondents. |
126 | 125 |
|
127 | | - For respondents not asked about salary sacrifice (SALSAC=' '), uses |
128 | | - a QRF model trained on those who were asked to predict the SS pension |
129 | | - contribution amount directly. The model naturally predicts 0 for |
130 | | - non-participants and positive amounts for likely participants. |
131 | | -
|
132 | | - Note: This imputation does NOT target any specific total. Targeting |
133 | | - to HMRC figures happens via weight calibration in a subsequent step. |
| 126 | + Stage 1: QRF predicts SS amounts for respondents not asked SALSAC. |
| 127 | + Stage 2: Converts a fraction of pension contributors to below-cap |
| 128 | + SS users, targeting ~4.3mn (OBR/ASHE). Moves employee pension |
| 129 | + contributions to salary sacrifice to keep total pension consistent. |
134 | 130 |
|
135 | 131 | Args: |
136 | 132 | dataset: PolicyEngine UK dataset with salary_sacrifice_asked |
@@ -183,7 +179,46 @@ def impute_salary_sacrifice( |
183 | 179 | imputed_ss, # Use imputed for non-respondents |
184 | 180 | ) |
185 | 181 |
|
186 | | - # Update dataset |
| 182 | + # Stage 2: Headcount-targeted imputation for below-cap SS users. |
| 183 | + # ASHE data shows many more SS users than the FRS captures due to |
| 184 | + # self-reporting bias in auto-enrolment. Impute additional SS users |
| 185 | + # from pension contributors to create enough records for calibration |
| 186 | + # to hit OBR headcount targets (7.7mn total, 4.3mn below £2,000). |
| 187 | + person_weight = sim.calculate("person_weight").values |
| 188 | + employee_pension = dataset.person[ |
| 189 | + "employee_pension_contributions" |
| 190 | + ].values.copy() |
| 191 | + has_ss = final_ss > 0 |
| 192 | + below_cap_ss = has_ss & (final_ss <= 2000) |
| 193 | + |
| 194 | + # Donor pool: employed pension contributors not already SS users |
| 195 | + is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0) |
| 196 | + |
| 197 | + # Target ~4.3mn below-cap SS users (HMRC/ASHE estimate) |
| 198 | + TARGET_BELOW_CAP = 4_300_000 |
| 199 | + current_below_cap = (person_weight * below_cap_ss).sum() |
| 200 | + shortfall = max(0, TARGET_BELOW_CAP - current_below_cap) |
| 201 | + |
| 202 | + if shortfall > 0: |
| 203 | + donor_weighted = (person_weight * is_donor).sum() |
| 204 | + if donor_weighted > 0: |
| 205 | + imputation_rate = min(0.8, shortfall / donor_weighted) |
| 206 | + rng = np.random.default_rng(seed=2024) |
| 207 | + newly_imputed = is_donor & ( |
| 208 | + rng.random(len(final_ss)) < imputation_rate |
| 209 | + ) |
| 210 | + |
| 211 | + # Move up to £2,000 of employee pension to SS |
| 212 | + ss_new = np.minimum(employee_pension, 2000.0) |
| 213 | + final_ss = np.where(newly_imputed, ss_new, final_ss) |
| 214 | + |
| 215 | + # Reduce employee pension correspondingly |
| 216 | + dataset.person["employee_pension_contributions"] = np.where( |
| 217 | + newly_imputed, |
| 218 | + employee_pension - ss_new, |
| 219 | + employee_pension, |
| 220 | + ) |
| 221 | + |
187 | 222 | dataset.person["pension_contributions_via_salary_sacrifice"] = final_ss |
188 | 223 |
|
189 | 224 | return dataset |
0 commit comments