Skip to content

Commit 5056ce7

Browse files
MaxGhenisclaude
andauthored
Anchor UC/PC/CB takeup flags to FRS-reported receipt (#359)
* Anchor UC/PC/CB takeup flags to FRS-reported receipt FRS respondents who report positive receipt of a benefit are by construction take-up=True. The prior code assigned `would_claim_uc`, `would_claim_pc`, and `would_claim_child_benefit` by pure random draw against the aggregate takeup rate, ignoring that information — which meant a respondent reporting UC receipt could be randomly assigned `would_claim_uc = False`, producing calibration noise. Ports `assign_takeup_with_reported_anchors` from `policyengine-us-data/utils/takeup.py`, pared down to the single-group case (UK doesn't need the US's state-keyed grouping). Reporters are forced to True; non-reporters are filled probabilistically to hit the aggregate target rate across the full population, so the overall takeup share still matches the target. Applied to the three benefit-unit-level flags where FRS has a matching reported column (`universal_credit_reported`, `pension_credit_reported`, `child_benefit_reported`). Other takeup flags (TFC, childcare schemes, SCP) have no FRS-reported counterpart and keep pure-random behaviour. 5 unit tests cover the new helper: pure-random fallback, reporters always True, overall rate close to target, handling when reporters already exceed target, and mask-length validation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Refresh uv.lock version after merge Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e132907 commit 5056ce7

4 files changed

Lines changed: 154 additions & 8 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Anchor stochastic takeup assignment for Universal Credit, Pension Credit, and Child Benefit to the FRS-reported receipt columns, matching the `policyengine-us-data` pattern. Respondents who report positive receipt in the FRS benefits table now receive `would_claim_* = True` with certainty, and non-reporters are filled probabilistically to hit the aggregate target rate. Removes a source of calibration noise where respondents who clearly took up a benefit could be randomly assigned `would_claim = False`.

policyengine_uk_data/datasets/frs.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,24 +1217,45 @@ def determine_education_level(fted_val, typeed2_val, age_val):
12171217
scp_under_6_rate = load_take_up_rate("scp_under_6", year)
12181218
scp_6_plus_rate = load_take_up_rate("scp_6_plus", year)
12191219

1220-
# Generate take-up decisions by comparing random draws to take-up rates
1220+
# Generate take-up decisions by comparing random draws to take-up rates,
1221+
# anchored to reported receipts where the FRS captures them. Respondents
1222+
# who report positive receipt of a benefit are assigned takeup=True with
1223+
# certainty; the remaining non-reporters are filled probabilistically to
1224+
# hit the aggregate target rate. See policyengine_uk_data/utils/takeup.py.
1225+
from policyengine_uk_data.utils.takeup import (
1226+
assign_takeup_with_reported_anchors,
1227+
)
1228+
1229+
def _reported_benunit_mask(person_column: str) -> np.ndarray:
1230+
reporter_benunits = set(
1231+
pe_person.loc[pe_person[person_column] > 0, "person_benunit_id"].values
1232+
)
1233+
return pe_benunit["benunit_id"].isin(reporter_benunits).values
1234+
12211235
# Person-level
12221236
pe_person["would_claim_marriage_allowance"] = (
12231237
generator.random(len(pe_person)) < marriage_allowance_rate
12241238
)
12251239

1226-
# Benefit unit-level
1227-
pe_benunit["would_claim_child_benefit"] = (
1228-
generator.random(len(pe_benunit)) < child_benefit_rate
1240+
# Benefit unit-level — anchor on any adult in the benefit unit having
1241+
# reported positive receipt in the FRS benefits table.
1242+
pe_benunit["would_claim_child_benefit"] = assign_takeup_with_reported_anchors(
1243+
generator.random(len(pe_benunit)),
1244+
child_benefit_rate,
1245+
reported_mask=_reported_benunit_mask("child_benefit_reported"),
12291246
)
12301247
pe_benunit["child_benefit_opts_out"] = (
12311248
generator.random(len(pe_benunit)) < child_benefit_opts_out_rate
12321249
)
1233-
pe_benunit["would_claim_pc"] = (
1234-
generator.random(len(pe_benunit)) < pension_credit_rate
1250+
pe_benunit["would_claim_pc"] = assign_takeup_with_reported_anchors(
1251+
generator.random(len(pe_benunit)),
1252+
pension_credit_rate,
1253+
reported_mask=_reported_benunit_mask("pension_credit_reported"),
12351254
)
1236-
pe_benunit["would_claim_uc"] = (
1237-
generator.random(len(pe_benunit)) < universal_credit_rate
1255+
pe_benunit["would_claim_uc"] = assign_takeup_with_reported_anchors(
1256+
generator.random(len(pe_benunit)),
1257+
universal_credit_rate,
1258+
reported_mask=_reported_benunit_mask("universal_credit_reported"),
12381259
)
12391260
pe_benunit["would_claim_tfc"] = generator.random(len(pe_benunit)) < tfc_rate
12401261
pe_benunit["would_claim_extended_childcare"] = (
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""Unit tests for reported-anchor takeup logic."""
2+
3+
from __future__ import annotations
4+
5+
import numpy as np
6+
7+
from policyengine_uk_data.utils.takeup import assign_takeup_with_reported_anchors
8+
9+
10+
def test_no_reported_mask_falls_back_to_draws_less_than_rate():
11+
rng = np.random.default_rng(0)
12+
draws = rng.random(1000)
13+
result = assign_takeup_with_reported_anchors(draws, 0.3)
14+
# Expected share close to rate
15+
assert abs(result.mean() - 0.3) < 0.05
16+
# Identical to plain draws < rate
17+
assert (result == (draws < 0.3)).all()
18+
19+
20+
def test_reported_anchor_forces_true_for_reporters():
21+
rng = np.random.default_rng(1)
22+
draws = rng.random(1000)
23+
reported_mask = np.zeros(1000, dtype=bool)
24+
reported_mask[:100] = True
25+
result = assign_takeup_with_reported_anchors(
26+
draws, 0.3, reported_mask=reported_mask
27+
)
28+
# Every reporter is True
29+
assert result[:100].all()
30+
31+
32+
def test_reported_anchor_hits_target_rate():
33+
rng = np.random.default_rng(2)
34+
draws = rng.random(10000)
35+
reported_mask = np.zeros(10000, dtype=bool)
36+
reported_mask[:1000] = True # 10% reporters
37+
result = assign_takeup_with_reported_anchors(
38+
draws, 0.3, reported_mask=reported_mask
39+
)
40+
# Overall rate should be close to 30%
41+
assert abs(result.mean() - 0.3) < 0.02
42+
43+
44+
def test_reported_anchor_when_reporters_exceed_target():
45+
rng = np.random.default_rng(3)
46+
draws = rng.random(1000)
47+
reported_mask = np.zeros(1000, dtype=bool)
48+
reported_mask[:500] = True # 50% reporters
49+
# Target 30% but reporters already at 50% — everyone reporting stays in.
50+
result = assign_takeup_with_reported_anchors(
51+
draws, 0.3, reported_mask=reported_mask
52+
)
53+
assert result[:500].all()
54+
assert not result[500:].any()
55+
56+
57+
def test_reported_mask_length_validation():
58+
draws = np.random.default_rng(4).random(100)
59+
reported_mask = np.zeros(50, dtype=bool)
60+
try:
61+
assign_takeup_with_reported_anchors(draws, 0.3, reported_mask=reported_mask)
62+
except ValueError as exc:
63+
assert "must align" in str(exc)
64+
else:
65+
raise AssertionError("expected ValueError for misaligned reported_mask")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""Shared take-up draw logic with reported-recipient anchoring.
2+
3+
Ported from ``policyengine_us_data/utils/takeup.py``. The core idea: when a
4+
survey respondent reports receiving a benefit, they are by construction a
5+
taker-up; they should be assigned takeup=True with certainty, and the
6+
remaining random fill should hit the target aggregate takeup rate across the
7+
non-reporting eligibles. Pure random draws (the previous UK pattern) ignore
8+
this information and produce noisier calibration.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
from typing import Optional
14+
15+
import numpy as np
16+
17+
18+
def assign_takeup_with_reported_anchors(
19+
draws: np.ndarray,
20+
rate: float,
21+
reported_mask: Optional[np.ndarray] = None,
22+
) -> np.ndarray:
23+
"""Apply the SSI/SNAP-style reported-first takeup pattern.
24+
25+
Reported recipients are always assigned ``takeup=True``. Remaining
26+
non-reporters are filled probabilistically to reach the target count
27+
implied by ``rate`` across the full population.
28+
29+
Args:
30+
draws: Uniform draws in [0, 1), one per entity.
31+
rate: Target aggregate takeup rate in [0, 1].
32+
reported_mask: Boolean array, same length as ``draws``. ``True``
33+
where the survey reports a positive benefit amount. If ``None``,
34+
the function falls back to a plain ``draws < rate`` fill.
35+
36+
Returns:
37+
Boolean array of the same length as ``draws``, ``True`` for entities
38+
that take up.
39+
"""
40+
draws = np.asarray(draws, dtype=np.float64)
41+
rate = float(rate)
42+
43+
if reported_mask is None:
44+
return draws < rate
45+
46+
reported_mask = np.asarray(reported_mask, dtype=bool)
47+
if len(reported_mask) != len(draws):
48+
raise ValueError("reported_mask and draws must align")
49+
50+
result = reported_mask.copy()
51+
target_count = int(rate * len(draws))
52+
remaining_needed = max(0, target_count - int(reported_mask.sum()))
53+
non_reporters = ~reported_mask
54+
if not non_reporters.any() or remaining_needed == 0:
55+
return result
56+
57+
adjusted_rate = remaining_needed / int(non_reporters.sum())
58+
result |= non_reporters & (draws < adjusted_rate)
59+
return result

0 commit comments

Comments
 (0)