@@ -93,20 +93,6 @@ def save_salary_sacrifice_model():
9393 }
9494 )
9595
96- n_participants = (
97- train_df ["pension_contributions_via_salary_sacrifice" ] > 0
98- ).sum ()
99- print (f"Training salary sacrifice model on { len (train_df )} observations" )
100- print (
101- f" With SS contributions: { n_participants } "
102- f"({ n_participants / len (train_df ):.1%} )"
103- )
104- mean_amount = train_df .loc [
105- train_df ["pension_contributions_via_salary_sacrifice" ] > 0 ,
106- "pension_contributions_via_salary_sacrifice" ,
107- ].mean ()
108- print (f" Mean SS amount (participants): £{ mean_amount :,.0f} " )
109-
11096 # Train QRF model
11197 model = QRF ()
11298 model .fit (train_df [PREDICTORS ], train_df [IMPUTATIONS ])
@@ -166,17 +152,10 @@ def impute_salary_sacrifice(
166152
167153 # Get indicator for who was asked
168154 if "salary_sacrifice_asked" not in dataset .person .columns :
169- print (
170- "Warning: salary_sacrifice_asked not in dataset, "
171- "skipping imputation"
172- )
173155 return dataset
174156
175157 ss_asked = dataset .person .salary_sacrifice_asked .values
176158
177- # Identify imputation candidates: those not asked about SS
178- not_asked = ss_asked == 0
179-
180159 # Create prediction DataFrame for all records
181160 pred_df = pd .DataFrame (
182161 {
@@ -208,17 +187,4 @@ def impute_salary_sacrifice(
208187 # Update dataset
209188 dataset .person ["pension_contributions_via_salary_sacrifice" ] = final_ss
210189
211- # Report results (no targeting - just descriptive)
212- weights = sim .calculate ("person_weight" ).values
213- is_employee = employment_income > 0
214- total_ss = (final_ss * weights ).sum ()
215- participation_rate = ((final_ss > 0 ) * weights * is_employee ).sum () / (
216- weights * is_employee
217- ).sum ()
218-
219- print ("Salary sacrifice imputation results (pre-calibration):" )
220- print (f" Total SS contributions: £{ total_ss / 1e9 :.1f} bn" )
221- print (f" Employee participation rate: { participation_rate :.1%} " )
222- print (" (Final totals depend on subsequent weight calibration)" )
223-
224190 return dataset
0 commit comments