Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- bump: minor
changes:
added:
- Calibrate salary sacrifice population to HMRC/ASHE headcount targets (7.7mn
total, 3.3mn above 2k cap, 4.3mn below 2k cap). Two-stage imputation in
salary_sacrifice.py converts pension contributors to below-cap SS users, and
three new headcount calibration targets in loss.py.
73 changes: 54 additions & 19 deletions policyengine_uk_data/datasets/imputations/salary_sacrifice.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
"""
Salary sacrifice imputation for pension contributions.

This module imputes salary sacrifice pension amounts using QRF trained on
FRS respondents who were asked the SALSAC question. The model predicts
the continuous amount (pension_contributions_via_salary_sacrifice), with
non-participants naturally having 0.
Two-stage imputation:

Training data (FRS 2023-24):
- SALSAC='1' (Yes): ~224 jobs with reported SPNAMT amounts
- SALSAC='2' (No): ~3,803 jobs with SPNAMT=0
1. QRF trained on FRS respondents who were asked SALSAC (~224 yes,
~3,803 no). Predicts SS amounts for ~13,265 jobs where SALSAC was
not asked.

Imputation candidates:
- SALSAC=' ' (skip/not asked): ~13,265 jobs
2. Headcount-targeted imputation: converts a fraction of pension
contributors without SS into below-cap (≤£2,000) SS users, moving
employee pension contributions to salary sacrifice. Targets the
OBR/ASHE estimate of ~4.3mn below-cap SS users.

Targeting to HMRC totals (~24bn SS contributions) happens via weight
calibration, not in this imputation step.
Exact monetary totals (~£24bn SS contributions) and final headcount
calibration happen via weight optimisation in a subsequent step.
"""

import pandas as pd
Expand Down Expand Up @@ -124,13 +123,10 @@ def impute_salary_sacrifice(
"""
Impute salary sacrifice pension amounts for FRS non-respondents.

For respondents not asked about salary sacrifice (SALSAC=' '), uses
a QRF model trained on those who were asked to predict the SS pension
contribution amount directly. The model naturally predicts 0 for
non-participants and positive amounts for likely participants.

Note: This imputation does NOT target any specific total. Targeting
to HMRC figures happens via weight calibration in a subsequent step.
Stage 1: QRF predicts SS amounts for respondents not asked SALSAC.
Stage 2: Converts a fraction of pension contributors to below-cap
SS users, targeting ~4.3mn (OBR/ASHE). Moves employee pension
contributions to salary sacrifice to keep total pension consistent.

Args:
dataset: PolicyEngine UK dataset with salary_sacrifice_asked
Expand Down Expand Up @@ -183,7 +179,46 @@ def impute_salary_sacrifice(
imputed_ss, # Use imputed for non-respondents
)

# Update dataset
# Stage 2: Headcount-targeted imputation for below-cap SS users.
# ASHE data shows many more SS users than the FRS captures due to
# self-reporting bias in auto-enrolment. Impute additional SS users
# from pension contributors to create enough records for calibration
# to hit OBR headcount targets (7.7mn total, 4.3mn below £2,000).
person_weight = sim.calculate("person_weight").values
employee_pension = dataset.person[
"employee_pension_contributions"
].values.copy()
has_ss = final_ss > 0
below_cap_ss = has_ss & (final_ss <= 2000)

# Donor pool: employed pension contributors not already SS users
is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0)

# Target ~4.3mn below-cap SS users (HMRC/ASHE estimate)
TARGET_BELOW_CAP = 4_300_000
current_below_cap = (person_weight * below_cap_ss).sum()
shortfall = max(0, TARGET_BELOW_CAP - current_below_cap)

if shortfall > 0:
donor_weighted = (person_weight * is_donor).sum()
if donor_weighted > 0:
imputation_rate = min(0.8, shortfall / donor_weighted)
rng = np.random.default_rng(seed=2024)
newly_imputed = is_donor & (
rng.random(len(final_ss)) < imputation_rate
)

# Move up to £2,000 of employee pension to SS
ss_new = np.minimum(employee_pension, 2000.0)
final_ss = np.where(newly_imputed, ss_new, final_ss)

# Reduce employee pension correspondingly
dataset.person["employee_pension_contributions"] = np.where(
newly_imputed,
employee_pension - ss_new,
employee_pension,
)

dataset.person["pension_contributions_via_salary_sacrifice"] = final_ss

return dataset
81 changes: 81 additions & 0 deletions policyengine_uk_data/tests/test_salary_sacrifice_headcount.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""Test salary sacrifice headcount calibration targets.

Source: HMRC, "Salary sacrifice reform for pension contributions"
https://www.gov.uk/government/publications/salary-sacrifice-reform-for-pension-contributions-effective-from-6-april-2029
7.7mn total SS users (3.3mn above 2k cap, 4.3mn below 2k cap)
"""

import pytest

TOLERANCE = 0.15 # 15% relative tolerance


@pytest.mark.xfail(
reason="Will pass after recalibration with new headcount targets"
)
def test_salary_sacrifice_total_users(baseline):
"""Test that total SS user count is close to 7.7mn."""
ss = baseline.calculate(
"pension_contributions_via_salary_sacrifice",
map_to="person",
period=2025,
)
person_weight = baseline.calculate(
"person_weight", map_to="person", period=2025
).values

total_users = (person_weight * (ss.values > 0)).sum()
TARGET = 7_700_000

assert abs(total_users / TARGET - 1) < TOLERANCE, (
f"Expected ~{TARGET/1e6:.1f}mn SS users, "
f"got {total_users/1e6:.1f}mn ({total_users/TARGET*100:.0f}% of target)"
)


@pytest.mark.xfail(
reason="Will pass after recalibration with new headcount targets"
)
def test_salary_sacrifice_below_cap_users(baseline):
"""Test that below-cap (<=2k) SS users are close to 4.3mn."""
ss = baseline.calculate(
"pension_contributions_via_salary_sacrifice",
map_to="person",
period=2025,
)
person_weight = baseline.calculate(
"person_weight", map_to="person", period=2025
).values

below_cap = (ss.values > 0) & (ss.values <= 2000)
total_below_cap = (person_weight * below_cap).sum()
TARGET = 4_300_000

assert abs(total_below_cap / TARGET - 1) < TOLERANCE, (
f"Expected ~{TARGET/1e6:.1f}mn below-cap SS users, "
f"got {total_below_cap/1e6:.1f}mn ({total_below_cap/TARGET*100:.0f}% of target)"
)


@pytest.mark.xfail(
reason="Will pass after recalibration with new headcount targets"
)
def test_salary_sacrifice_above_cap_users(baseline):
"""Test that above-cap (>2k) SS users are close to 3.3mn."""
ss = baseline.calculate(
"pension_contributions_via_salary_sacrifice",
map_to="person",
period=2025,
)
person_weight = baseline.calculate(
"person_weight", map_to="person", period=2025
).values

above_cap = ss.values > 2000
total_above_cap = (person_weight * above_cap).sum()
TARGET = 3_300_000

assert abs(total_above_cap / TARGET - 1) < TOLERANCE, (
f"Expected ~{TARGET/1e6:.1f}mn above-cap SS users, "
f"got {total_above_cap/1e6:.1f}mn ({total_above_cap/TARGET*100:.0f}% of target)"
)
1 change: 0 additions & 1 deletion policyengine_uk_data/utils/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from huggingface_hub import hf_hub_download, login, HfApi
import os
import pkg_resources


def download(
Expand Down
35 changes: 35 additions & 0 deletions policyengine_uk_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,41 @@ def pe_count(*variables):
target_names.append("hmrc/salary_sacrifice_contributions")
target_values.append(SS_CONTRIBUTIONS_2024 * uprating_factor)

# Salary sacrifice headcount targets
# Source: HMRC, "Salary sacrifice reform for pension contributions"
# https://www.gov.uk/government/publications/salary-sacrifice-reform-for-pension-contributions-effective-from-6-april-2029
# 7.7mn total SS users (3.3mn above £2k cap, 4.3mn below £2k cap)
ss_has_contributions = ss_contributions > 0
ss_below_cap = ss_has_contributions & (ss_contributions <= 2000)
ss_above_cap = ss_has_contributions & (ss_contributions > 2000)

df["obr/salary_sacrifice_users_total"] = household_from_person(
ss_has_contributions
)
df["obr/salary_sacrifice_users_below_cap"] = household_from_person(
ss_below_cap
)
df["obr/salary_sacrifice_users_above_cap"] = household_from_person(
ss_above_cap
)

# HMRC/ASHE 2024 baseline headcounts
SS_TOTAL_USERS_2024 = 7_700_000
SS_BELOW_CAP_USERS_2024 = 4_300_000
SS_ABOVE_CAP_USERS_2024 = 3_300_000
# OBR (5 Feb 2026, para 1.7): SS population grows 0.9% faster than
# total employee numbers. With ~1.5% employment growth, ~2.4%/year.
ss_headcount_factor = 1.024 ** max(0, int(time_period) - 2024)

target_names.append("obr/salary_sacrifice_users_total")
target_values.append(SS_TOTAL_USERS_2024 * ss_headcount_factor)

target_names.append("obr/salary_sacrifice_users_below_cap")
target_values.append(SS_BELOW_CAP_USERS_2024 * ss_headcount_factor)

target_names.append("obr/salary_sacrifice_users_above_cap")
target_values.append(SS_ABOVE_CAP_USERS_2024 * ss_headcount_factor)

# Add two-child limit targets.
child_is_affected = (
sim.map_result(
Expand Down