Skip to content

Commit c766af1

Browse files
committed
Map disability benefit categories in data
1 parent 604621c commit c766af1

13 files changed

Lines changed: 449 additions & 75 deletions
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Map reported disability benefit amounts to category inputs in the data pipeline.

policyengine_uk_data/datasets/create_datasets.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ def main():
3030
assert_local_build_environment()
3131

3232
from policyengine_uk.data import UKSingleYearDataset
33+
from policyengine_uk_data.datasets.disability_benefits import (
34+
strip_internal_disability_reported_amounts,
35+
)
3336
from policyengine_uk_data.datasets.frs import create_frs
3437
from policyengine_uk_data.storage import STORAGE_FOLDER
3538
from policyengine_uk_data.utils.progress import (
@@ -79,8 +82,11 @@ def main():
7982
frs = create_frs(
8083
raw_frs_folder=STORAGE_FOLDER / "frs_2023_24",
8184
year=2023,
85+
include_internal_disability_reported_amounts=True,
86+
)
87+
strip_internal_disability_reported_amounts(frs).save(
88+
STORAGE_FOLDER / "frs_2023_24.h5"
8289
)
83-
frs.save(STORAGE_FOLDER / "frs_2023_24.h5")
8490
update_dataset("Create base FRS dataset", "completed")
8591

8692
# Import imputation functions
@@ -212,7 +218,9 @@ def main():
212218
update_dataset("Downrate to 2023", "completed")
213219

214220
update_dataset("Save final dataset", "processing")
215-
frs_calibrated.save(STORAGE_FOLDER / "enhanced_frs_2023_24.h5")
221+
strip_internal_disability_reported_amounts(frs_calibrated).save(
222+
STORAGE_FOLDER / "enhanced_frs_2023_24.h5"
223+
)
216224
update_dataset("Save final dataset", "completed")
217225

218226
# Create tiny (n=1000 households) versions for testing
@@ -225,7 +233,10 @@ def main():
225233
tiny_frs = subsample_dataset(frs_base, TINY_SIZE)
226234
tiny_frs.save(STORAGE_FOLDER / "frs_2023_24_tiny.h5")
227235

228-
tiny_enhanced = subsample_dataset(frs_calibrated, TINY_SIZE)
236+
tiny_enhanced = subsample_dataset(
237+
strip_internal_disability_reported_amounts(frs_calibrated),
238+
TINY_SIZE,
239+
)
229240
tiny_enhanced.save(STORAGE_FOLDER / "enhanced_frs_2023_24_tiny.h5")
230241
update_dataset("Create tiny datasets", "completed")
231242

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""Dataset-side disability benefit category mapping.
2+
3+
PolicyEngine UK models PIP, DLA, and Attendance Allowance from category
4+
inputs. The FRS observes reported amounts, so the data pipeline keeps those
5+
amounts as internal build intermediates and converts them to model inputs
6+
before datasets are published.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
from functools import lru_cache
12+
13+
import numpy as np
14+
import pandas as pd
15+
from policyengine_uk import CountryTaxBenefitSystem
16+
from policyengine_uk.data import UKSingleYearDataset
17+
from policyengine_uk.model_api import WEEKS_IN_YEAR as MODEL_WEEKS_IN_YEAR
18+
19+
20+
DISABILITY_REPORTED_AMOUNT_COLUMNS = (
21+
"attendance_allowance_reported",
22+
"dla_sc_reported",
23+
"dla_m_reported",
24+
"pip_m_reported",
25+
"pip_dl_reported",
26+
)
27+
28+
DISABILITY_CATEGORY_COLUMNS = (
29+
"aa_category",
30+
"dla_sc_category",
31+
"dla_m_category",
32+
"pip_m_category",
33+
"pip_dl_category",
34+
)
35+
36+
SAFETY_MARGIN = 0.1
37+
SURVEY_REPORTED_AMOUNT_WEEKS_IN_YEAR = 365.25 / 7
38+
39+
40+
@lru_cache(maxsize=None)
41+
def _dwp_parameters(year: int):
42+
# Use the current-law branch for new dataset inputs. The removed PE-UK
43+
# formulas used baseline rates, but category assignment now belongs to
44+
# data creation and should align to the same-year benefit amounts.
45+
return CountryTaxBenefitSystem().parameters(year).gov.dwp
46+
47+
48+
def _reported_amount(person: pd.DataFrame, column: str) -> pd.Series:
49+
if column not in person.columns:
50+
return pd.Series(0.0, index=person.index)
51+
return pd.to_numeric(person[column], errors="coerce").fillna(0.0)
52+
53+
54+
def _category_from_reported_amount(
55+
reported_amount: pd.Series,
56+
thresholds: tuple[tuple[str, float], ...],
57+
) -> np.ndarray:
58+
weekly_amount = pd.to_numeric(reported_amount, errors="coerce").fillna(0)
59+
weekly_amount = weekly_amount.to_numpy(dtype=float) / MODEL_WEEKS_IN_YEAR
60+
category = np.full(len(weekly_amount), "NONE", dtype=object)
61+
for category_name, weekly_rate in thresholds:
62+
category[weekly_amount >= float(weekly_rate) * (1 - SAFETY_MARGIN)] = (
63+
category_name
64+
)
65+
return category
66+
67+
68+
def add_disability_benefit_categories_from_reported_amounts(
69+
person: pd.DataFrame,
70+
year: int,
71+
*,
72+
inplace: bool = False,
73+
) -> pd.DataFrame:
74+
"""Convert reported disability benefit amounts into category inputs."""
75+
76+
if not inplace:
77+
person = person.copy()
78+
79+
dwp = _dwp_parameters(int(year))
80+
mappings = (
81+
(
82+
"attendance_allowance_reported",
83+
"aa_category",
84+
(
85+
("LOWER", dwp.attendance_allowance.lower),
86+
("HIGHER", dwp.attendance_allowance.higher),
87+
),
88+
),
89+
(
90+
"dla_sc_reported",
91+
"dla_sc_category",
92+
(
93+
("LOWER", dwp.dla.self_care.lower),
94+
("MIDDLE", dwp.dla.self_care.middle),
95+
("HIGHER", dwp.dla.self_care.higher),
96+
),
97+
),
98+
(
99+
"dla_m_reported",
100+
"dla_m_category",
101+
(
102+
("LOWER", dwp.dla.mobility.lower),
103+
("HIGHER", dwp.dla.mobility.higher),
104+
),
105+
),
106+
(
107+
"pip_m_reported",
108+
"pip_m_category",
109+
(
110+
("STANDARD", dwp.pip.mobility.standard),
111+
("ENHANCED", dwp.pip.mobility.enhanced),
112+
),
113+
),
114+
(
115+
"pip_dl_reported",
116+
"pip_dl_category",
117+
(
118+
("STANDARD", dwp.pip.daily_living.standard),
119+
("ENHANCED", dwp.pip.daily_living.enhanced),
120+
),
121+
),
122+
)
123+
124+
for reported_column, category_column, thresholds in mappings:
125+
if reported_column in person.columns:
126+
person[category_column] = _category_from_reported_amount(
127+
person[reported_column],
128+
thresholds,
129+
)
130+
131+
return person
132+
133+
134+
def add_disability_benefit_flags_from_reported_amounts(
135+
person: pd.DataFrame,
136+
year: int,
137+
*,
138+
inplace: bool = False,
139+
) -> pd.DataFrame:
140+
"""Recompute disability flags derived from reported benefit amounts."""
141+
142+
if not inplace:
143+
person = person.copy()
144+
145+
dwp = _dwp_parameters(int(year))
146+
dla_sc = _reported_amount(person, "dla_sc_reported")
147+
dla_m = _reported_amount(person, "dla_m_reported")
148+
pip_m = _reported_amount(person, "pip_m_reported")
149+
pip_dl = _reported_amount(person, "pip_dl_reported")
150+
afcs = _reported_amount(person, "afcs_reported")
151+
152+
person["is_disabled_for_benefits"] = (dla_sc + dla_m + pip_m + pip_dl) > 0
153+
154+
threshold_safety_gap = 1 * SURVEY_REPORTED_AMOUNT_WEEKS_IN_YEAR
155+
dla_sc_higher = (
156+
dwp.dla.self_care.higher * SURVEY_REPORTED_AMOUNT_WEEKS_IN_YEAR
157+
- threshold_safety_gap
158+
)
159+
pip_dl_enhanced = (
160+
dwp.pip.daily_living.enhanced * SURVEY_REPORTED_AMOUNT_WEEKS_IN_YEAR
161+
- threshold_safety_gap
162+
)
163+
164+
person["is_enhanced_disabled_for_benefits"] = dla_sc > dla_sc_higher
165+
person["is_severely_disabled_for_benefits"] = (
166+
(dla_sc >= dla_sc_higher) | (pip_dl >= pip_dl_enhanced) | (afcs > 0)
167+
)
168+
169+
return person
170+
171+
172+
def drop_internal_disability_reported_amounts(
173+
person: pd.DataFrame,
174+
*,
175+
inplace: bool = False,
176+
) -> pd.DataFrame:
177+
"""Drop disability amount intermediates that are not PE-UK inputs."""
178+
179+
if inplace:
180+
person.drop(
181+
columns=list(DISABILITY_REPORTED_AMOUNT_COLUMNS),
182+
errors="ignore",
183+
inplace=True,
184+
)
185+
return person
186+
return person.drop(
187+
columns=list(DISABILITY_REPORTED_AMOUNT_COLUMNS),
188+
errors="ignore",
189+
)
190+
191+
192+
def strip_internal_disability_reported_amounts(
193+
dataset: UKSingleYearDataset,
194+
) -> UKSingleYearDataset:
195+
"""Return ``dataset`` without internal disability amount intermediates."""
196+
197+
dataset = dataset.copy()
198+
dataset.person = drop_internal_disability_reported_amounts(dataset.person)
199+
return dataset

policyengine_uk_data/datasets/enhanced_cps.py

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,13 @@
3535
"yearly-average-currency-exchange-rates"
3636
)
3737

38-
# 2025/26 reported-benefit mapping assumptions used only to populate UK input
39-
# leaves from U.S. source records. PolicyEngine UK applies its own parameters
40-
# when calculating derived tax and benefit outputs.
38+
# 2025/26 benefit mapping assumptions used only to populate UK input leaves from
39+
# U.S. source records. PolicyEngine UK applies its own parameters when
40+
# calculating derived tax and benefit outputs.
4141
NEW_STATE_PENSION_2025 = 224.96 * 52
4242
DIVIDEND_YIELD_FOR_WEALTH_IMPUTATION = 0.03
4343
RENTAL_YIELD_FOR_WEALTH_IMPUTATION = 0.04
4444

45-
PIP_2025_WEEKLY_RATES = {
46-
"daily_living": {
47-
"NONE": 0.0,
48-
"STANDARD": 73.89,
49-
"ENHANCED": 110.40,
50-
},
51-
"mobility": {
52-
"NONE": 0.0,
53-
"STANDARD": 29.19,
54-
"ENHANCED": 77.04,
55-
},
56-
}
57-
5845
REGION_SHARES = (
5946
("NORTH_EAST", 0.04),
6047
("NORTH_WEST", 0.11),
@@ -248,11 +235,6 @@ def _pip_category(person: dict) -> str:
248235
return "ENHANCED" if severe_signal or low_earnings else "STANDARD"
249236

250237

251-
def _pip_reported_amount(category: str, component: str) -> float:
252-
weekly = PIP_2025_WEEKLY_RATES[component][category]
253-
return round(weekly * 52, 2)
254-
255-
256238
def _household_cash_income(people: list[dict], exchange_rate: float) -> float:
257239
total = 0.0
258240
for person in people:
@@ -688,14 +670,8 @@ def _build_base_dataset(
688670
if bool(inputs.get("is_blind", False))
689671
else 0.0,
690672
"is_disabled_for_benefits": bool(inputs.get("is_disabled", False)),
691-
"pip_dl_reported": _pip_reported_amount(
692-
pip_category,
693-
"daily_living",
694-
),
695-
"pip_m_reported": _pip_reported_amount(
696-
pip_category,
697-
"mobility",
698-
),
673+
"pip_dl_category": pip_category,
674+
"pip_m_category": pip_category,
699675
"hours_worked": float(
700676
inputs.get(
701677
"weekly_hours_worked",

policyengine_uk_data/datasets/frs.py

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
from policyengine_uk.variables.household.income.employment_status import (
1818
EmploymentStatus,
1919
)
20+
from policyengine_uk_data.datasets.disability_benefits import (
21+
add_disability_benefit_categories_from_reported_amounts,
22+
add_disability_benefit_flags_from_reported_amounts,
23+
drop_internal_disability_reported_amounts,
24+
)
2025
from policyengine_uk_data.utils.datasets import (
2126
sum_to_entity,
2227
categorical,
@@ -475,6 +480,7 @@ def split_reported_education_grants(
475480
def create_frs(
476481
raw_frs_folder: str,
477482
year: int,
483+
include_internal_disability_reported_amounts: bool = False,
478484
) -> UKSingleYearDataset:
479485
"""
480486
Process raw FRS data into PolicyEngine UK dataset format.
@@ -487,6 +493,9 @@ def create_frs(
487493
Args:
488494
raw_frs_folder: Path to folder containing raw FRS .tab files.
489495
year: Survey year for the dataset.
496+
include_internal_disability_reported_amounts: Keep raw disability
497+
benefit amount intermediates for downstream imputation. Public
498+
saved datasets should leave this as ``False``.
490499
491500
Returns:
492501
UKSingleYearDataset with processed FRS data ready for policy simulation.
@@ -1010,6 +1019,12 @@ def determine_education_level(fted_val, typeed2_val, age_val):
10101019
* WEEKS_IN_YEAR
10111020
)
10121021

1022+
pe_person = add_disability_benefit_categories_from_reported_amounts(
1023+
pe_person,
1024+
year,
1025+
inplace=True,
1026+
)
1027+
10131028
pe_person["jsa_contrib_reported"] = (
10141029
sum_to_entity(
10151030
benefits.benamt * (benefits.var2.isin((1, 3))) * (benefits.benefit == 14),
@@ -1266,35 +1281,10 @@ def determine_education_level(fted_val, typeed2_val, age_val):
12661281

12671282
pe_household["brma"] = brmas
12681283

1269-
parameters = sim.tax_benefit_system.parameters
1270-
benefit = parameters(year).gov.dwp
1271-
1272-
pe_person["is_disabled_for_benefits"] = (
1273-
pe_person.dla_sc_reported
1274-
+ pe_person.dla_m_reported
1275-
+ pe_person.pip_m_reported
1276-
+ pe_person.pip_dl_reported
1277-
) > 0
1278-
1279-
THRESHOLD_SAFETY_GAP = 1 * WEEKS_IN_YEAR
1280-
1281-
pe_person["is_enhanced_disabled_for_benefits"] = (
1282-
pe_person.dla_sc_reported
1283-
> benefit.dla.self_care.higher * WEEKS_IN_YEAR - THRESHOLD_SAFETY_GAP
1284-
)
1285-
1286-
# Child Tax Credit Regulations 2002 s. 8
1287-
paragraph_3 = (
1288-
pe_person.dla_sc_reported
1289-
>= benefit.dla.self_care.higher * WEEKS_IN_YEAR - THRESHOLD_SAFETY_GAP
1290-
)
1291-
paragraph_4 = (
1292-
pe_person.pip_dl_reported
1293-
>= benefit.pip.daily_living.enhanced * WEEKS_IN_YEAR - THRESHOLD_SAFETY_GAP
1294-
)
1295-
paragraph_5 = pe_person.afcs_reported > 0
1296-
pe_person["is_severely_disabled_for_benefits"] = (
1297-
paragraph_3 | paragraph_4 | paragraph_5
1284+
pe_person = add_disability_benefit_flags_from_reported_amounts(
1285+
pe_person,
1286+
year,
1287+
inplace=True,
12981288
)
12991289

13001290
# Dataset-side claimant-state approximations for future legacy ESA/JSA
@@ -1460,6 +1450,9 @@ def _reported_benunit_mask(person_column: str) -> np.ndarray:
14601450
np.random.random(len(pe_household)) < PROPERTY_PURCHASE_RATE
14611451
)
14621452

1453+
if not include_internal_disability_reported_amounts:
1454+
pe_person = drop_internal_disability_reported_amounts(pe_person)
1455+
14631456
dataset = UKSingleYearDataset(
14641457
person=pe_person,
14651458
benunit=pe_benunit,

0 commit comments

Comments
 (0)