PolicyEngine · vahid-ahmadi · Feb 16, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,7 @@
+- bump: minor
+  changes:
+    added:
+    - Calibrate salary sacrifice population to HMRC/ASHE headcount targets (7.7mn
+      total, 3.3mn above 2k cap, 4.3mn below 2k cap). Two-stage imputation in
+      salary_sacrifice.py converts pension contributors to below-cap SS users, and
+      three new headcount calibration targets in loss.py.
diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py
@@ -1,20 +1,19 @@
 """
 Salary sacrifice imputation for pension contributions.
 
-This module imputes salary sacrifice pension amounts using QRF trained on
-FRS respondents who were asked the SALSAC question. The model predicts
-the continuous amount (pension_contributions_via_salary_sacrifice), with
-non-participants naturally having 0.
+Two-stage imputation:
 
-Training data (FRS 2023-24):
-- SALSAC='1' (Yes): ~224 jobs with reported SPNAMT amounts
-- SALSAC='2' (No): ~3,803 jobs with SPNAMT=0
+1. QRF trained on FRS respondents who were asked SALSAC (~224 yes,
+   ~3,803 no). Predicts SS amounts for ~13,265 jobs where SALSAC was
+   not asked.
 
-Imputation candidates:
-- SALSAC=' ' (skip/not asked): ~13,265 jobs
+2. Headcount-targeted imputation: converts a fraction of pension
+   contributors without SS into below-cap (≤£2,000) SS users, moving
+   employee pension contributions to salary sacrifice. Targets the
+   OBR/ASHE estimate of ~4.3mn below-cap SS users.
 
-Targeting to HMRC totals (~24bn SS contributions) happens via weight
-calibration, not in this imputation step.
+Exact monetary totals (~£24bn SS contributions) and final headcount
+calibration happen via weight optimisation in a subsequent step.
 """
 
 import pandas as pd
@@ -124,13 +123,10 @@ def impute_salary_sacrifice(
     """
     Impute salary sacrifice pension amounts for FRS non-respondents.
 
-    For respondents not asked about salary sacrifice (SALSAC=' '), uses
-    a QRF model trained on those who were asked to predict the SS pension
-    contribution amount directly. The model naturally predicts 0 for
-    non-participants and positive amounts for likely participants.
-
-    Note: This imputation does NOT target any specific total. Targeting
-    to HMRC figures happens via weight calibration in a subsequent step.
+    Stage 1: QRF predicts SS amounts for respondents not asked SALSAC.
+    Stage 2: Converts a fraction of pension contributors to below-cap
+    SS users, targeting ~4.3mn (OBR/ASHE). Moves employee pension
+    contributions to salary sacrifice to keep total pension consistent.
 
     Args:
         dataset: PolicyEngine UK dataset with salary_sacrifice_asked
@@ -183,7 +179,46 @@ def impute_salary_sacrifice(
         imputed_ss,  # Use imputed for non-respondents
     )
 
-    # Update dataset
+    # Stage 2: Headcount-targeted imputation for below-cap SS users.
+    # ASHE data shows many more SS users than the FRS captures due to
+    # self-reporting bias in auto-enrolment. Impute additional SS users
+    # from pension contributors to create enough records for calibration
+    # to hit OBR headcount targets (7.7mn total, 4.3mn below £2,000).
+    person_weight = sim.calculate("person_weight").values
+    employee_pension = dataset.person[
+        "employee_pension_contributions"
+    ].values.copy()
+    has_ss = final_ss > 0
+    below_cap_ss = has_ss & (final_ss <= 2000)
+
+    # Donor pool: employed pension contributors not already SS users
+    is_donor = (employee_pension > 0) & ~has_ss & (employment_income > 0)
+
+    # Target ~4.3mn below-cap SS users (HMRC/ASHE estimate)
+    TARGET_BELOW_CAP = 4_300_000
+    current_below_cap = (person_weight * below_cap_ss).sum()
+    shortfall = max(0, TARGET_BELOW_CAP - current_below_cap)
+
+    if shortfall > 0:
+        donor_weighted = (person_weight * is_donor).sum()
+        if donor_weighted > 0:
+            imputation_rate = min(0.8, shortfall / donor_weighted)
+            rng = np.random.default_rng(seed=2024)
+            newly_imputed = is_donor & (
+                rng.random(len(final_ss)) < imputation_rate
+            )
+
+            # Move up to £2,000 of employee pension to SS
+            ss_new = np.minimum(employee_pension, 2000.0)
+            final_ss = np.where(newly_imputed, ss_new, final_ss)
+
+            # Reduce employee pension correspondingly
+            dataset.person["employee_pension_contributions"] = np.where(
+                newly_imputed,
+                employee_pension - ss_new,
+                employee_pension,
+            )
+
     dataset.person["pension_contributions_via_salary_sacrifice"] = final_ss
 
     return dataset
diff --git a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py
@@ -0,0 +1,81 @@
+"""Test salary sacrifice headcount calibration targets.
+
+Source: HMRC, "Salary sacrifice reform for pension contributions"
+https://www.gov.uk/government/publications/salary-sacrifice-reform-for-pension-contributions-effective-from-6-april-2029
+7.7mn total SS users (3.3mn above 2k cap, 4.3mn below 2k cap)
+"""
+
+import pytest
+
+TOLERANCE = 0.15  # 15% relative tolerance
+
+
+@pytest.mark.xfail(
+    reason="Will pass after recalibration with new headcount targets"
+)
+def test_salary_sacrifice_total_users(baseline):
+    """Test that total SS user count is close to 7.7mn."""
+    ss = baseline.calculate(
+        "pension_contributions_via_salary_sacrifice",
+        map_to="person",
+        period=2025,
+    )
+    person_weight = baseline.calculate(
+        "person_weight", map_to="person", period=2025
+    ).values
+
+    total_users = (person_weight * (ss.values > 0)).sum()
+    TARGET = 7_700_000
+
+    assert abs(total_users / TARGET - 1) < TOLERANCE, (
+        f"Expected ~{TARGET/1e6:.1f}mn SS users, "
+        f"got {total_users/1e6:.1f}mn ({total_users/TARGET*100:.0f}% of target)"
+    )
+
+
+@pytest.mark.xfail(
+    reason="Will pass after recalibration with new headcount targets"
+)
+def test_salary_sacrifice_below_cap_users(baseline):
+    """Test that below-cap (<=2k) SS users are close to 4.3mn."""
+    ss = baseline.calculate(
+        "pension_contributions_via_salary_sacrifice",
+        map_to="person",
+        period=2025,
+    )
+    person_weight = baseline.calculate(
+        "person_weight", map_to="person", period=2025
+    ).values
+
+    below_cap = (ss.values > 0) & (ss.values <= 2000)
+    total_below_cap = (person_weight * below_cap).sum()
+    TARGET = 4_300_000
+
+    assert abs(total_below_cap / TARGET - 1) < TOLERANCE, (
+        f"Expected ~{TARGET/1e6:.1f}mn below-cap SS users, "
+        f"got {total_below_cap/1e6:.1f}mn ({total_below_cap/TARGET*100:.0f}% of target)"
+    )
+
+
+@pytest.mark.xfail(
+    reason="Will pass after recalibration with new headcount targets"
+)
+def test_salary_sacrifice_above_cap_users(baseline):
+    """Test that above-cap (>2k) SS users are close to 3.3mn."""
+    ss = baseline.calculate(
+        "pension_contributions_via_salary_sacrifice",
+        map_to="person",
+        period=2025,
+    )
+    person_weight = baseline.calculate(
+        "person_weight", map_to="person", period=2025
+    ).values
+
+    above_cap = ss.values > 2000
+    total_above_cap = (person_weight * above_cap).sum()
+    TARGET = 3_300_000
+
+    assert abs(total_above_cap / TARGET - 1) < TOLERANCE, (
+        f"Expected ~{TARGET/1e6:.1f}mn above-cap SS users, "
+        f"got {total_above_cap/1e6:.1f}mn ({total_above_cap/TARGET*100:.0f}% of target)"
+    )
diff --git a/policyengine_uk_data/utils/huggingface.py b/policyengine_uk_data/utils/huggingface.py
@@ -1,6 +1,5 @@
 from huggingface_hub import hf_hub_download, login, HfApi
 import os
-import pkg_resources
 
 
 def download(

diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py
@@ -473,6 +473,41 @@ def pe_count(*variables):
     target_names.append("hmrc/salary_sacrifice_contributions")
     target_values.append(SS_CONTRIBUTIONS_2024 * uprating_factor)
 
+    # Salary sacrifice headcount targets
+    # Source: HMRC, "Salary sacrifice reform for pension contributions"
+    # https://www.gov.uk/government/publications/salary-sacrifice-reform-for-pension-contributions-effective-from-6-april-2029
+    # 7.7mn total SS users (3.3mn above £2k cap, 4.3mn below £2k cap)
+    ss_has_contributions = ss_contributions > 0
+    ss_below_cap = ss_has_contributions & (ss_contributions <= 2000)
+    ss_above_cap = ss_has_contributions & (ss_contributions > 2000)
+
+    df["obr/salary_sacrifice_users_total"] = household_from_person(
+        ss_has_contributions
+    )
+    df["obr/salary_sacrifice_users_below_cap"] = household_from_person(
+        ss_below_cap
+    )
+    df["obr/salary_sacrifice_users_above_cap"] = household_from_person(
+        ss_above_cap
+    )
+
+    # HMRC/ASHE 2024 baseline headcounts
+    SS_TOTAL_USERS_2024 = 7_700_000
+    SS_BELOW_CAP_USERS_2024 = 4_300_000
+    SS_ABOVE_CAP_USERS_2024 = 3_300_000
+    # OBR (5 Feb 2026, para 1.7): SS population grows 0.9% faster than
+    # total employee numbers. With ~1.5% employment growth, ~2.4%/year.
+    ss_headcount_factor = 1.024 ** max(0, int(time_period) - 2024)
+
+    target_names.append("obr/salary_sacrifice_users_total")
+    target_values.append(SS_TOTAL_USERS_2024 * ss_headcount_factor)
+
+    target_names.append("obr/salary_sacrifice_users_below_cap")
+    target_values.append(SS_BELOW_CAP_USERS_2024 * ss_headcount_factor)
+
+    target_names.append("obr/salary_sacrifice_users_above_cap")
+    target_values.append(SS_ABOVE_CAP_USERS_2024 * ss_headcount_factor)
+
     # Add two-child limit targets.
     child_is_affected = (
         sim.map_result(