Skip to content

Commit 6b1f80e

Browse files
authored
Split transfer PIP component categories (#419)
1 parent 46392fd commit 6b1f80e

4 files changed

Lines changed: 97 additions & 7 deletions

File tree

policyengine_uk_data/datasets/enhanced_cps.py

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
NEW_STATE_PENSION_2025 = 224.96 * 52
4242
DIVIDEND_YIELD_FOR_WEALTH_IMPUTATION = 0.03
4343
RENTAL_YIELD_FOR_WEALTH_IMPUTATION = 0.04
44+
PIP_BOTH_COMPONENT_SHARE = 0.67
45+
PIP_DAILY_LIVING_ONLY_SHARE = 0.22
4446

4547
REGION_SHARES = (
4648
("NORTH_EAST", 0.04),
@@ -215,12 +217,25 @@ def _capital_gains_amount(person_inputs: dict, exchange_rate: float) -> float:
215217
)
216218

217219

218-
def _pip_category(person: dict) -> str:
220+
def _deterministic_fraction(identifier: int, salt: int) -> float:
221+
value = (int(identifier) + salt * 0x9E3779B97F4A7C15) & 0xFFFFFFFFFFFFFFFF
222+
value = (value ^ (value >> 30)) * 0xBF58476D1CE4E5B9
223+
value &= 0xFFFFFFFFFFFFFFFF
224+
value = (value ^ (value >> 27)) * 0x94D049BB133111EB
225+
value &= 0xFFFFFFFFFFFFFFFF
226+
value ^= value >> 31
227+
return value / 2**64
228+
229+
230+
def _pip_recipient(person: dict) -> bool:
219231
inputs = person.get("inputs", {})
220232
disabled = bool(inputs.get("is_disabled", False))
221233
age = int(person.get("age", 0))
222-
if not disabled or age < 16:
223-
return "NONE"
234+
return disabled and age >= 16
235+
236+
237+
def _pip_enhanced_probability(person: dict) -> float:
238+
inputs = person.get("inputs", {})
224239

225240
severe_signal = (
226241
float(inputs.get("disability_benefits", 0.0)) > 0
@@ -232,7 +247,54 @@ def _pip_category(person: dict) -> str:
232247
+ float(inputs.get("self_employment_income", 0.0))
233248
< 12_000
234249
)
235-
return "ENHANCED" if severe_signal or low_earnings else "STANDARD"
250+
if severe_signal and low_earnings:
251+
return 0.75
252+
if severe_signal:
253+
return 0.65
254+
if low_earnings:
255+
return 0.55
256+
return 0.35
257+
258+
259+
def _pip_component_categories(person: dict, person_id: int) -> tuple[str, str]:
260+
"""Return deterministic transfer-side PIP DL and mobility categories.
261+
262+
The transfer source has only broad disability signals, not UK PIP
263+
assessment outcomes. Use those signals for the recipient pool, then split
264+
components deterministically so daily living and mobility are not identical
265+
by construction.
266+
"""
267+
268+
if not _pip_recipient(person):
269+
return "NONE", "NONE"
270+
271+
component_draw = _deterministic_fraction(person_id, salt=17)
272+
receives_daily_living = component_draw < (
273+
PIP_BOTH_COMPONENT_SHARE + PIP_DAILY_LIVING_ONLY_SHARE
274+
)
275+
receives_mobility = (
276+
component_draw < PIP_BOTH_COMPONENT_SHARE
277+
or component_draw >= PIP_BOTH_COMPONENT_SHARE + PIP_DAILY_LIVING_ONLY_SHARE
278+
)
279+
enhanced_probability = _pip_enhanced_probability(person)
280+
281+
daily_living = "NONE"
282+
if receives_daily_living:
283+
daily_living = (
284+
"ENHANCED"
285+
if _deterministic_fraction(person_id, salt=31) < enhanced_probability
286+
else "STANDARD"
287+
)
288+
289+
mobility = "NONE"
290+
if receives_mobility:
291+
mobility = (
292+
"ENHANCED"
293+
if _deterministic_fraction(person_id, salt=43) < enhanced_probability
294+
else "STANDARD"
295+
)
296+
297+
return daily_living, mobility
236298

237299

238300
def _household_cash_income(people: list[dict], exchange_rate: float) -> float:
@@ -591,7 +653,10 @@ def _build_base_dataset(
591653
for person_index, person in enumerate(people, start=1):
592654
inputs = person.get("inputs", {})
593655
person_id = household_id * 10 + person_index
594-
pip_category = _pip_category(person)
656+
pip_dl_category, pip_m_category = _pip_component_categories(
657+
person,
658+
person_id,
659+
)
595660

596661
person_rows.append(
597662
{
@@ -670,8 +735,8 @@ def _build_base_dataset(
670735
if bool(inputs.get("is_blind", False))
671736
else 0.0,
672737
"is_disabled_for_benefits": bool(inputs.get("is_disabled", False)),
673-
"pip_dl_category": pip_category,
674-
"pip_m_category": pip_category,
738+
"pip_dl_category": pip_dl_category,
739+
"pip_m_category": pip_m_category,
675740
"hours_worked": float(
676741
inputs.get(
677742
"weekly_hours_worked",
4.09 KB
Binary file not shown.
157 KB
Binary file not shown.

policyengine_uk_data/tests/test_policybench_transfer.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,22 @@ def _subset_source(tmp_path: Path, rows: int) -> Path:
3535
return subset_path
3636

3737

38+
def _assert_pip_components_are_split(person: pd.DataFrame) -> None:
39+
pip = person[["pip_dl_category", "pip_m_category"]]
40+
receives_daily_living = pip.pip_dl_category != "NONE"
41+
receives_mobility = pip.pip_m_category != "NONE"
42+
receives_both = receives_daily_living & receives_mobility
43+
44+
assert receives_both.any()
45+
assert (receives_daily_living & ~receives_mobility).any()
46+
assert (~receives_daily_living & receives_mobility).any()
47+
assert (pip.pip_dl_category != pip.pip_m_category).any()
48+
assert (
49+
pip.loc[receives_both, "pip_dl_category"]
50+
!= pip.loc[receives_both, "pip_m_category"]
51+
).any()
52+
53+
3854
def test_policybench_transfer_dataset_validates(tmp_path: Path):
3955
dataset = create_enhanced_cps(
4056
source_file_path=_subset_source(tmp_path, 10),
@@ -98,6 +114,15 @@ def test_checked_in_enhanced_cps_h5_uses_pip_categories():
98114
assert "pip_m_reported" not in dataset.person.columns
99115
assert "pip_dl_category" in dataset.person.columns
100116
assert "pip_m_category" in dataset.person.columns
117+
_assert_pip_components_are_split(dataset.person)
118+
119+
120+
def test_policybench_transfer_splits_pip_components(tmp_path: Path):
121+
dataset = create_enhanced_cps(
122+
source_file_path=_subset_source(tmp_path, 1_000),
123+
calibrate=False,
124+
)
125+
_assert_pip_components_are_split(dataset.person)
101126

102127

103128
def test_policybench_transfer_runs_uk_microsimulation(tmp_path: Path):

0 commit comments

Comments
 (0)