From ba7d8a2020262e001c1eb28424579289cd3e39df Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 17 Apr 2026 13:44:19 -0400 Subject: [PATCH 1/2] Anchor UC/PC/CB takeup flags to FRS-reported receipt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FRS respondents who report positive receipt of a benefit are by construction take-up=True. The prior code assigned `would_claim_uc`, `would_claim_pc`, and `would_claim_child_benefit` by pure random draw against the aggregate takeup rate, ignoring that information — which meant a respondent reporting UC receipt could be randomly assigned `would_claim_uc = False`, producing calibration noise. Ports `assign_takeup_with_reported_anchors` from `policyengine-us-data/utils/takeup.py`, pared down to the single-group case (UK doesn't need the US's state-keyed grouping). Reporters are forced to True; non-reporters are filled probabilistically to hit the aggregate target rate across the full population, so the overall takeup share still matches the target. Applied to the three benefit-unit-level flags where FRS has a matching reported column (`universal_credit_reported`, `pension_credit_reported`, `child_benefit_reported`). Other takeup flags (TFC, childcare schemes, SCP) have no FRS-reported counterpart and keep pure-random behaviour. 5 unit tests cover the new helper: pure-random fallback, reporters always True, overall rate close to target, handling when reporters already exceed target, and mask-length validation. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../reported-takeup-anchors.changed.md | 1 + policyengine_uk_data/datasets/frs.py | 37 ++++++++--- .../tests/test_reported_takeup_anchors.py | 65 +++++++++++++++++++ policyengine_uk_data/utils/takeup.py | 59 +++++++++++++++++ 4 files changed, 154 insertions(+), 8 deletions(-) create mode 100644 changelog.d/reported-takeup-anchors.changed.md create mode 100644 policyengine_uk_data/tests/test_reported_takeup_anchors.py create mode 100644 policyengine_uk_data/utils/takeup.py diff --git a/changelog.d/reported-takeup-anchors.changed.md b/changelog.d/reported-takeup-anchors.changed.md new file mode 100644 index 000000000..0c8597a06 --- /dev/null +++ b/changelog.d/reported-takeup-anchors.changed.md @@ -0,0 +1 @@ +Anchor stochastic takeup assignment for Universal Credit, Pension Credit, and Child Benefit to the FRS-reported receipt columns, matching the `policyengine-us-data` pattern. Respondents who report positive receipt in the FRS benefits table now receive `would_claim_* = True` with certainty, and non-reporters are filled probabilistically to hit the aggregate target rate. Removes a source of calibration noise where respondents who clearly took up a benefit could be randomly assigned `would_claim = False`. diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index fc6aeaf71..170f11b51 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -1217,24 +1217,45 @@ def determine_education_level(fted_val, typeed2_val, age_val): scp_under_6_rate = load_take_up_rate("scp_under_6", year) scp_6_plus_rate = load_take_up_rate("scp_6_plus", year) - # Generate take-up decisions by comparing random draws to take-up rates + # Generate take-up decisions by comparing random draws to take-up rates, + # anchored to reported receipts where the FRS captures them. Respondents + # who report positive receipt of a benefit are assigned takeup=True with + # certainty; the remaining non-reporters are filled probabilistically to + # hit the aggregate target rate. See policyengine_uk_data/utils/takeup.py. + from policyengine_uk_data.utils.takeup import ( + assign_takeup_with_reported_anchors, + ) + + def _reported_benunit_mask(person_column: str) -> np.ndarray: + reporter_benunits = set( + pe_person.loc[pe_person[person_column] > 0, "person_benunit_id"].values + ) + return pe_benunit["benunit_id"].isin(reporter_benunits).values + # Person-level pe_person["would_claim_marriage_allowance"] = ( generator.random(len(pe_person)) < marriage_allowance_rate ) - # Benefit unit-level - pe_benunit["would_claim_child_benefit"] = ( - generator.random(len(pe_benunit)) < child_benefit_rate + # Benefit unit-level — anchor on any adult in the benefit unit having + # reported positive receipt in the FRS benefits table. + pe_benunit["would_claim_child_benefit"] = assign_takeup_with_reported_anchors( + generator.random(len(pe_benunit)), + child_benefit_rate, + reported_mask=_reported_benunit_mask("child_benefit_reported"), ) pe_benunit["child_benefit_opts_out"] = ( generator.random(len(pe_benunit)) < child_benefit_opts_out_rate ) - pe_benunit["would_claim_pc"] = ( - generator.random(len(pe_benunit)) < pension_credit_rate + pe_benunit["would_claim_pc"] = assign_takeup_with_reported_anchors( + generator.random(len(pe_benunit)), + pension_credit_rate, + reported_mask=_reported_benunit_mask("pension_credit_reported"), ) - pe_benunit["would_claim_uc"] = ( - generator.random(len(pe_benunit)) < universal_credit_rate + pe_benunit["would_claim_uc"] = assign_takeup_with_reported_anchors( + generator.random(len(pe_benunit)), + universal_credit_rate, + reported_mask=_reported_benunit_mask("universal_credit_reported"), ) pe_benunit["would_claim_tfc"] = generator.random(len(pe_benunit)) < tfc_rate pe_benunit["would_claim_extended_childcare"] = ( diff --git a/policyengine_uk_data/tests/test_reported_takeup_anchors.py b/policyengine_uk_data/tests/test_reported_takeup_anchors.py new file mode 100644 index 000000000..49ce6293d --- /dev/null +++ b/policyengine_uk_data/tests/test_reported_takeup_anchors.py @@ -0,0 +1,65 @@ +"""Unit tests for reported-anchor takeup logic.""" + +from __future__ import annotations + +import numpy as np + +from policyengine_uk_data.utils.takeup import assign_takeup_with_reported_anchors + + +def test_no_reported_mask_falls_back_to_draws_less_than_rate(): + rng = np.random.default_rng(0) + draws = rng.random(1000) + result = assign_takeup_with_reported_anchors(draws, 0.3) + # Expected share close to rate + assert abs(result.mean() - 0.3) < 0.05 + # Identical to plain draws < rate + assert (result == (draws < 0.3)).all() + + +def test_reported_anchor_forces_true_for_reporters(): + rng = np.random.default_rng(1) + draws = rng.random(1000) + reported_mask = np.zeros(1000, dtype=bool) + reported_mask[:100] = True + result = assign_takeup_with_reported_anchors( + draws, 0.3, reported_mask=reported_mask + ) + # Every reporter is True + assert result[:100].all() + + +def test_reported_anchor_hits_target_rate(): + rng = np.random.default_rng(2) + draws = rng.random(10000) + reported_mask = np.zeros(10000, dtype=bool) + reported_mask[:1000] = True # 10% reporters + result = assign_takeup_with_reported_anchors( + draws, 0.3, reported_mask=reported_mask + ) + # Overall rate should be close to 30% + assert abs(result.mean() - 0.3) < 0.02 + + +def test_reported_anchor_when_reporters_exceed_target(): + rng = np.random.default_rng(3) + draws = rng.random(1000) + reported_mask = np.zeros(1000, dtype=bool) + reported_mask[:500] = True # 50% reporters + # Target 30% but reporters already at 50% — everyone reporting stays in. + result = assign_takeup_with_reported_anchors( + draws, 0.3, reported_mask=reported_mask + ) + assert result[:500].all() + assert not result[500:].any() + + +def test_reported_mask_length_validation(): + draws = np.random.default_rng(4).random(100) + reported_mask = np.zeros(50, dtype=bool) + try: + assign_takeup_with_reported_anchors(draws, 0.3, reported_mask=reported_mask) + except ValueError as exc: + assert "must align" in str(exc) + else: + raise AssertionError("expected ValueError for misaligned reported_mask") diff --git a/policyengine_uk_data/utils/takeup.py b/policyengine_uk_data/utils/takeup.py new file mode 100644 index 000000000..ab5f6241c --- /dev/null +++ b/policyengine_uk_data/utils/takeup.py @@ -0,0 +1,59 @@ +"""Shared take-up draw logic with reported-recipient anchoring. + +Ported from ``policyengine_us_data/utils/takeup.py``. The core idea: when a +survey respondent reports receiving a benefit, they are by construction a +taker-up; they should be assigned takeup=True with certainty, and the +remaining random fill should hit the target aggregate takeup rate across the +non-reporting eligibles. Pure random draws (the previous UK pattern) ignore +this information and produce noisier calibration. +""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np + + +def assign_takeup_with_reported_anchors( + draws: np.ndarray, + rate: float, + reported_mask: Optional[np.ndarray] = None, +) -> np.ndarray: + """Apply the SSI/SNAP-style reported-first takeup pattern. + + Reported recipients are always assigned ``takeup=True``. Remaining + non-reporters are filled probabilistically to reach the target count + implied by ``rate`` across the full population. + + Args: + draws: Uniform draws in [0, 1), one per entity. + rate: Target aggregate takeup rate in [0, 1]. + reported_mask: Boolean array, same length as ``draws``. ``True`` + where the survey reports a positive benefit amount. If ``None``, + the function falls back to a plain ``draws < rate`` fill. + + Returns: + Boolean array of the same length as ``draws``, ``True`` for entities + that take up. + """ + draws = np.asarray(draws, dtype=np.float64) + rate = float(rate) + + if reported_mask is None: + return draws < rate + + reported_mask = np.asarray(reported_mask, dtype=bool) + if len(reported_mask) != len(draws): + raise ValueError("reported_mask and draws must align") + + result = reported_mask.copy() + target_count = int(rate * len(draws)) + remaining_needed = max(0, target_count - int(reported_mask.sum())) + non_reporters = ~reported_mask + if not non_reporters.any() or remaining_needed == 0: + return result + + adjusted_rate = remaining_needed / int(non_reporters.sum()) + result |= non_reporters & (draws < adjusted_rate) + return result From a8c99ae200705333ddf717f90c2f104b8236e525 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 17 Apr 2026 20:35:34 -0400 Subject: [PATCH 2/2] Refresh uv.lock version after merge Co-Authored-By: Claude Opus 4.7 (1M context) --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 3f83c8fc8..fa99c1613 100644 --- a/uv.lock +++ b/uv.lock @@ -1366,7 +1366,7 @@ wheels = [ [[package]] name = "policyengine-uk-data" -version = "1.50.1" +version = "1.52.0" source = { editable = "." } dependencies = [ { name = "google-auth" },