From ba7d8a2020262e001c1eb28424579289cd3e39df Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 17 Apr 2026 13:44:19 -0400
Subject: [PATCH 1/2] Anchor UC/PC/CB takeup flags to FRS-reported receipt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRS respondents who report positive receipt of a benefit are by
construction take-up=True. The prior code assigned `would_claim_uc`,
`would_claim_pc`, and `would_claim_child_benefit` by pure random draw
against the aggregate takeup rate, ignoring that information — which
meant a respondent reporting UC receipt could be randomly assigned
`would_claim_uc = False`, producing calibration noise.

Ports `assign_takeup_with_reported_anchors` from
`policyengine-us-data/utils/takeup.py`, pared down to the single-group
case (UK doesn't need the US's state-keyed grouping). Reporters are
forced to True; non-reporters are filled probabilistically to hit the
aggregate target rate across the full population, so the overall
takeup share still matches the target.

Applied to the three benefit-unit-level flags where FRS has a matching
reported column (`universal_credit_reported`, `pension_credit_reported`,
`child_benefit_reported`). Other takeup flags (TFC, childcare schemes,
SCP) have no FRS-reported counterpart and keep pure-random behaviour.

5 unit tests cover the new helper: pure-random fallback, reporters
always True, overall rate close to target, handling when reporters
already exceed target, and mask-length validation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../reported-takeup-anchors.changed.md        |  1 +
 policyengine_uk_data/datasets/frs.py          | 37 ++++++++---
 .../tests/test_reported_takeup_anchors.py     | 65 +++++++++++++++++++
 policyengine_uk_data/utils/takeup.py          | 59 +++++++++++++++++
 4 files changed, 154 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/reported-takeup-anchors.changed.md
 create mode 100644 policyengine_uk_data/tests/test_reported_takeup_anchors.py
 create mode 100644 policyengine_uk_data/utils/takeup.py

diff --git a/changelog.d/reported-takeup-anchors.changed.md b/changelog.d/reported-takeup-anchors.changed.md
new file mode 100644
index 000000000..0c8597a06
--- /dev/null
+++ b/changelog.d/reported-takeup-anchors.changed.md
@@ -0,0 +1 @@
+Anchor stochastic takeup assignment for Universal Credit, Pension Credit, and Child Benefit to the FRS-reported receipt columns, matching the `policyengine-us-data` pattern. Respondents who report positive receipt in the FRS benefits table now receive `would_claim_* = True` with certainty, and non-reporters are filled probabilistically to hit the aggregate target rate. Removes a source of calibration noise where respondents who clearly took up a benefit could be randomly assigned `would_claim = False`.
diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py
index fc6aeaf71..170f11b51 100644
--- a/policyengine_uk_data/datasets/frs.py
+++ b/policyengine_uk_data/datasets/frs.py
@@ -1217,24 +1217,45 @@ def determine_education_level(fted_val, typeed2_val, age_val):
     scp_under_6_rate = load_take_up_rate("scp_under_6", year)
     scp_6_plus_rate = load_take_up_rate("scp_6_plus", year)
 
-    # Generate take-up decisions by comparing random draws to take-up rates
+    # Generate take-up decisions by comparing random draws to take-up rates,
+    # anchored to reported receipts where the FRS captures them. Respondents
+    # who report positive receipt of a benefit are assigned takeup=True with
+    # certainty; the remaining non-reporters are filled probabilistically to
+    # hit the aggregate target rate. See policyengine_uk_data/utils/takeup.py.
+    from policyengine_uk_data.utils.takeup import (
+        assign_takeup_with_reported_anchors,
+    )
+
+    def _reported_benunit_mask(person_column: str) -> np.ndarray:
+        reporter_benunits = set(
+            pe_person.loc[pe_person[person_column] > 0, "person_benunit_id"].values
+        )
+        return pe_benunit["benunit_id"].isin(reporter_benunits).values
+
     # Person-level
     pe_person["would_claim_marriage_allowance"] = (
         generator.random(len(pe_person)) < marriage_allowance_rate
     )
 
-    # Benefit unit-level
-    pe_benunit["would_claim_child_benefit"] = (
-        generator.random(len(pe_benunit)) < child_benefit_rate
+    # Benefit unit-level — anchor on any adult in the benefit unit having
+    # reported positive receipt in the FRS benefits table.
+    pe_benunit["would_claim_child_benefit"] = assign_takeup_with_reported_anchors(
+        generator.random(len(pe_benunit)),
+        child_benefit_rate,
+        reported_mask=_reported_benunit_mask("child_benefit_reported"),
     )
     pe_benunit["child_benefit_opts_out"] = (
         generator.random(len(pe_benunit)) < child_benefit_opts_out_rate
     )
-    pe_benunit["would_claim_pc"] = (
-        generator.random(len(pe_benunit)) < pension_credit_rate
+    pe_benunit["would_claim_pc"] = assign_takeup_with_reported_anchors(
+        generator.random(len(pe_benunit)),
+        pension_credit_rate,
+        reported_mask=_reported_benunit_mask("pension_credit_reported"),
     )
-    pe_benunit["would_claim_uc"] = (
-        generator.random(len(pe_benunit)) < universal_credit_rate
+    pe_benunit["would_claim_uc"] = assign_takeup_with_reported_anchors(
+        generator.random(len(pe_benunit)),
+        universal_credit_rate,
+        reported_mask=_reported_benunit_mask("universal_credit_reported"),
     )
     pe_benunit["would_claim_tfc"] = generator.random(len(pe_benunit)) < tfc_rate
     pe_benunit["would_claim_extended_childcare"] = (
diff --git a/policyengine_uk_data/tests/test_reported_takeup_anchors.py b/policyengine_uk_data/tests/test_reported_takeup_anchors.py
new file mode 100644
index 000000000..49ce6293d
--- /dev/null
+++ b/policyengine_uk_data/tests/test_reported_takeup_anchors.py
@@ -0,0 +1,65 @@
+"""Unit tests for reported-anchor takeup logic."""
+
+from __future__ import annotations
+
+import numpy as np
+
+from policyengine_uk_data.utils.takeup import assign_takeup_with_reported_anchors
+
+
+def test_no_reported_mask_falls_back_to_draws_less_than_rate():
+    rng = np.random.default_rng(0)
+    draws = rng.random(1000)
+    result = assign_takeup_with_reported_anchors(draws, 0.3)
+    # Expected share close to rate
+    assert abs(result.mean() - 0.3) < 0.05
+    # Identical to plain draws < rate
+    assert (result == (draws < 0.3)).all()
+
+
+def test_reported_anchor_forces_true_for_reporters():
+    rng = np.random.default_rng(1)
+    draws = rng.random(1000)
+    reported_mask = np.zeros(1000, dtype=bool)
+    reported_mask[:100] = True
+    result = assign_takeup_with_reported_anchors(
+        draws, 0.3, reported_mask=reported_mask
+    )
+    # Every reporter is True
+    assert result[:100].all()
+
+
+def test_reported_anchor_hits_target_rate():
+    rng = np.random.default_rng(2)
+    draws = rng.random(10000)
+    reported_mask = np.zeros(10000, dtype=bool)
+    reported_mask[:1000] = True  # 10% reporters
+    result = assign_takeup_with_reported_anchors(
+        draws, 0.3, reported_mask=reported_mask
+    )
+    # Overall rate should be close to 30%
+    assert abs(result.mean() - 0.3) < 0.02
+
+
+def test_reported_anchor_when_reporters_exceed_target():
+    rng = np.random.default_rng(3)
+    draws = rng.random(1000)
+    reported_mask = np.zeros(1000, dtype=bool)
+    reported_mask[:500] = True  # 50% reporters
+    # Target 30% but reporters already at 50% — everyone reporting stays in.
+    result = assign_takeup_with_reported_anchors(
+        draws, 0.3, reported_mask=reported_mask
+    )
+    assert result[:500].all()
+    assert not result[500:].any()
+
+
+def test_reported_mask_length_validation():
+    draws = np.random.default_rng(4).random(100)
+    reported_mask = np.zeros(50, dtype=bool)
+    try:
+        assign_takeup_with_reported_anchors(draws, 0.3, reported_mask=reported_mask)
+    except ValueError as exc:
+        assert "must align" in str(exc)
+    else:
+        raise AssertionError("expected ValueError for misaligned reported_mask")
diff --git a/policyengine_uk_data/utils/takeup.py b/policyengine_uk_data/utils/takeup.py
new file mode 100644
index 000000000..ab5f6241c
--- /dev/null
+++ b/policyengine_uk_data/utils/takeup.py
@@ -0,0 +1,59 @@
+"""Shared take-up draw logic with reported-recipient anchoring.
+
+Ported from ``policyengine_us_data/utils/takeup.py``. The core idea: when a
+survey respondent reports receiving a benefit, they are by construction a
+taker-up; they should be assigned takeup=True with certainty, and the
+remaining random fill should hit the target aggregate takeup rate across the
+non-reporting eligibles. Pure random draws (the previous UK pattern) ignore
+this information and produce noisier calibration.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import numpy as np
+
+
+def assign_takeup_with_reported_anchors(
+    draws: np.ndarray,
+    rate: float,
+    reported_mask: Optional[np.ndarray] = None,
+) -> np.ndarray:
+    """Apply the SSI/SNAP-style reported-first takeup pattern.
+
+    Reported recipients are always assigned ``takeup=True``. Remaining
+    non-reporters are filled probabilistically to reach the target count
+    implied by ``rate`` across the full population.
+
+    Args:
+        draws: Uniform draws in [0, 1), one per entity.
+        rate: Target aggregate takeup rate in [0, 1].
+        reported_mask: Boolean array, same length as ``draws``. ``True``
+            where the survey reports a positive benefit amount. If ``None``,
+            the function falls back to a plain ``draws < rate`` fill.
+
+    Returns:
+        Boolean array of the same length as ``draws``, ``True`` for entities
+        that take up.
+    """
+    draws = np.asarray(draws, dtype=np.float64)
+    rate = float(rate)
+
+    if reported_mask is None:
+        return draws < rate
+
+    reported_mask = np.asarray(reported_mask, dtype=bool)
+    if len(reported_mask) != len(draws):
+        raise ValueError("reported_mask and draws must align")
+
+    result = reported_mask.copy()
+    target_count = int(rate * len(draws))
+    remaining_needed = max(0, target_count - int(reported_mask.sum()))
+    non_reporters = ~reported_mask
+    if not non_reporters.any() or remaining_needed == 0:
+        return result
+
+    adjusted_rate = remaining_needed / int(non_reporters.sum())
+    result |= non_reporters & (draws < adjusted_rate)
+    return result

From a8c99ae200705333ddf717f90c2f104b8236e525 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 17 Apr 2026 20:35:34 -0400
Subject: [PATCH 2/2] Refresh uv.lock version after merge

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index 3f83c8fc8..fa99c1613 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1366,7 +1366,7 @@ wheels = [
 
 [[package]]
 name = "policyengine-uk-data"
-version = "1.50.1"
+version = "1.52.0"
 source = { editable = "." }
 dependencies = [
     { name = "google-auth" },