Skip to content

Commit ba3af5b

Browse files
authored
Split reported education grants (#342)
* Split reported education grants * Address education grant review risks
1 parent e9b8c19 commit ba3af5b

6 files changed

Lines changed: 249 additions & 8 deletions

File tree

changelog.d/341.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Split specific student-finance grant capacity out of the generic FRS education-grants residual and seed Disabled Students' Allowance expenses where reported grants plausibly identify DSA (#341).

policyengine_uk_data/datasets/frs.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,20 @@
3535
EmploymentStatus.LONG_TERM_DISABLED.name,
3636
EmploymentStatus.SHORT_TERM_DISABLED.name,
3737
)
38+
FORMULA_MODELED_EDUCATION_GRANT_VARIABLES = (
39+
"childcare_grant",
40+
"parents_learning_allowance",
41+
"adult_dependants_grant",
42+
)
43+
DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT = (
44+
"disabled_students_allowance_eligible_expenses"
45+
)
46+
DISABLED_STUDENTS_ALLOWANCE_FIRST_MODELED_YEAR = 2025
47+
DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES = (
48+
"maintenance_loan_in_england_system",
49+
"disabled_students_allowance_course_eligible",
50+
"disabled_students_allowance_has_qualifying_condition",
51+
)
3852

3953

4054
@lru_cache(maxsize=None)
@@ -214,6 +228,114 @@ def attach_legacy_benefit_proxies_from_frs_person(
214228
)
215229

216230

231+
def _as_non_negative_array(values) -> np.ndarray:
232+
values = np.asarray(values, dtype=float)
233+
return np.maximum(np.nan_to_num(values, nan=0.0), 0.0)
234+
235+
236+
def allocate_reported_education_grants(
237+
reported_grants, grant_capacities: dict[str, np.ndarray]
238+
) -> dict[str, np.ndarray]:
239+
"""Split aggregate FRS education grants across modelled grant capacity.
240+
241+
The FRS reports several direct education grants in one aggregate field. When
242+
several modelled grants are plausible for the same person, allocate the
243+
reported amount proportionally to each grant's modelled capacity and keep any
244+
excess in the generic ``education_grants`` residual.
245+
"""
246+
247+
reported_grants = _as_non_negative_array(reported_grants)
248+
capacities = {
249+
variable: _as_non_negative_array(capacity)
250+
for variable, capacity in grant_capacities.items()
251+
}
252+
total_capacity = np.zeros_like(reported_grants, dtype=float)
253+
for variable, capacity in capacities.items():
254+
if capacity.shape != reported_grants.shape:
255+
raise ValueError(
256+
f"{variable} capacity has shape {capacity.shape}, "
257+
f"expected {reported_grants.shape}."
258+
)
259+
total_capacity += capacity
260+
261+
allocation_fraction = np.divide(
262+
reported_grants,
263+
total_capacity,
264+
out=np.zeros_like(reported_grants, dtype=float),
265+
where=total_capacity > 0,
266+
)
267+
allocation_fraction = np.minimum(allocation_fraction, 1)
268+
269+
allocations = {}
270+
allocated_total = np.zeros_like(reported_grants, dtype=float)
271+
for variable, capacity in capacities.items():
272+
allocation = capacity * allocation_fraction
273+
allocations[variable] = allocation
274+
allocated_total += allocation
275+
276+
allocations["education_grants"] = np.maximum(reported_grants - allocated_total, 0)
277+
return allocations
278+
279+
280+
def calculate_disabled_students_allowance_reported_grant_capacity(
281+
sim, year: int, maximum: float
282+
) -> np.ndarray:
283+
if year < DISABLED_STUDENTS_ALLOWANCE_FIRST_MODELED_YEAR:
284+
return np.zeros_like(
285+
np.asarray(
286+
sim.calculate(
287+
DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES[0], year
288+
)
289+
),
290+
dtype=float,
291+
)
292+
293+
eligible = None
294+
for variable in DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES:
295+
variable_eligible = np.asarray(sim.calculate(variable, year), dtype=bool)
296+
eligible = (
297+
variable_eligible if eligible is None else eligible & variable_eligible
298+
)
299+
equivalent_support = np.asarray(
300+
sim.calculate("disabled_students_allowance_receives_equivalent_support", year),
301+
dtype=bool,
302+
)
303+
return np.where(eligible & ~equivalent_support, float(maximum), 0.0)
304+
305+
306+
def split_reported_education_grants(
307+
pe_person: pd.DataFrame, sim, year: int, dsa_maximum: float
308+
) -> pd.DataFrame:
309+
"""Move specific modelled grants out of the generic education-grant residual.
310+
311+
PLA, ADG, and Childcare Grant remain formula-driven because they are
312+
calibration targets. Their modelled capacity is only used to avoid also
313+
counting the same reported FRS grant amount in the generic residual.
314+
DSA lacks a modelled amount signal, so its allocation seeds eligible
315+
expenses directly where the DSA parameter is available.
316+
"""
317+
318+
grant_capacities = {
319+
variable: sim.calculate(variable, year)
320+
for variable in FORMULA_MODELED_EDUCATION_GRANT_VARIABLES
321+
}
322+
grant_capacities[DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT] = (
323+
calculate_disabled_students_allowance_reported_grant_capacity(
324+
sim, year, dsa_maximum
325+
)
326+
)
327+
allocations = allocate_reported_education_grants(
328+
pe_person["education_grants"], grant_capacities
329+
)
330+
331+
pe_person["education_grants"] = allocations["education_grants"]
332+
pe_person[DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT] = allocations[
333+
DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT
334+
]
335+
336+
return pe_person
337+
338+
217339
def create_frs(
218340
raw_frs_folder: str,
219341
year: int,
@@ -1006,6 +1128,21 @@ def determine_education_level(fted_val, typeed2_val, age_val):
10061128
pe_person, person, sim, year
10071129
)
10081130

1131+
if (pe_person["education_grants"] > 0).any():
1132+
student_support_dataset = UKSingleYearDataset(
1133+
person=pe_person,
1134+
benunit=pe_benunit,
1135+
household=pe_household,
1136+
fiscal_year=year,
1137+
)
1138+
student_support_sim = Microsimulation(dataset=student_support_dataset)
1139+
dsa_maximum = student_support_sim.tax_benefit_system.parameters(
1140+
year
1141+
).gov.dfe.disabled_students_allowance.maximum
1142+
pe_person = split_reported_education_grants(
1143+
pe_person, student_support_sim, year, dsa_maximum
1144+
)
1145+
10091146
# Generate stochastic take-up decisions
10101147
# All randomness is generated here in the data package using take-up rates
10111148
# stored in YAML parameter files. This keeps the country package purely
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
from policyengine_uk_data.datasets.frs import (
5+
allocate_reported_education_grants,
6+
split_reported_education_grants,
7+
)
8+
9+
10+
def test_allocate_reported_education_grants_splits_by_capacity():
11+
allocations = allocate_reported_education_grants(
12+
reported_grants=np.array([50, 300, 1_000, 100]),
13+
grant_capacities={
14+
"grant_a": np.array([100, 100, 100, 0]),
15+
"grant_b": np.array([100, 0, 100, 0]),
16+
},
17+
)
18+
19+
np.testing.assert_allclose(allocations["grant_a"], [25, 100, 100, 0])
20+
np.testing.assert_allclose(allocations["grant_b"], [25, 0, 100, 0])
21+
np.testing.assert_allclose(allocations["education_grants"], [0, 200, 800, 100])
22+
23+
24+
class FakeStudentSupportSim:
25+
def __init__(self, values):
26+
self.values = values
27+
28+
def calculate(self, variable, year):
29+
del year
30+
return self.values[variable]
31+
32+
33+
def test_split_reported_education_grants_updates_residual_and_dsa_expenses():
34+
pe_person = pd.DataFrame({"education_grants": [900, 1_200, 100]})
35+
sim = FakeStudentSupportSim(
36+
{
37+
"childcare_grant": np.array([300, 0, 0]),
38+
"parents_learning_allowance": np.array([600, 400, 0]),
39+
"adult_dependants_grant": np.array([0, 600, 0]),
40+
"maintenance_loan_in_england_system": np.array([False, False, True]),
41+
"disabled_students_allowance_course_eligible": np.array(
42+
[False, False, True]
43+
),
44+
"disabled_students_allowance_has_qualifying_condition": np.array(
45+
[False, False, True]
46+
),
47+
"disabled_students_allowance_receives_equivalent_support": np.array(
48+
[False, False, False]
49+
),
50+
}
51+
)
52+
53+
result = split_reported_education_grants(pe_person, sim, 2025, dsa_maximum=500)
54+
55+
assert "childcare_grant" not in result.columns
56+
assert "parents_learning_allowance" not in result.columns
57+
assert "adult_dependants_grant" not in result.columns
58+
np.testing.assert_allclose(
59+
result["disabled_students_allowance_eligible_expenses"], [0, 0, 100]
60+
)
61+
np.testing.assert_allclose(result["education_grants"], [0, 200, 0])
62+
63+
64+
def test_split_reported_education_grants_does_not_seed_dsa_before_model_year():
65+
pe_person = pd.DataFrame({"education_grants": [100]})
66+
sim = FakeStudentSupportSim(
67+
{
68+
"childcare_grant": np.array([0]),
69+
"parents_learning_allowance": np.array([0]),
70+
"adult_dependants_grant": np.array([0]),
71+
"maintenance_loan_in_england_system": np.array([True]),
72+
}
73+
)
74+
75+
result = split_reported_education_grants(pe_person, sim, 2024, dsa_maximum=500)
76+
77+
np.testing.assert_allclose(
78+
result["disabled_students_allowance_eligible_expenses"], [0]
79+
)
80+
np.testing.assert_allclose(result["education_grants"], [100])

policyengine_uk_data/tests/test_legacy_benefit_proxies.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,17 @@ def __init__(self, dataset):
232232
"FakeGov",
233233
(),
234234
{
235+
"dfe": type(
236+
"FakeDfe",
237+
(),
238+
{
239+
"disabled_students_allowance": type(
240+
"FakeDsa",
241+
(),
242+
{"maximum": 27_783},
243+
)()
244+
},
245+
)(),
235246
"dwp": type(
236247
"FakeDwp",
237248
(),
@@ -259,7 +270,7 @@ def __init__(self, dataset):
259270
},
260271
)(),
261272
},
262-
)()
273+
)(),
263274
},
264275
)()
265276
},
@@ -274,6 +285,16 @@ def calculate(self, variable, year=None):
274285
return np.array([100])
275286
if variable == "state_pension_age":
276287
return pd.Series([66])
288+
if variable in (
289+
"childcare_grant",
290+
"parents_learning_allowance",
291+
"adult_dependants_grant",
292+
"disabled_students_allowance_receives_equivalent_support",
293+
"maintenance_loan_in_england_system",
294+
"disabled_students_allowance_course_eligible",
295+
"disabled_students_allowance_has_qualifying_condition",
296+
):
297+
return np.zeros(len(self.dataset.person))
277298
raise KeyError(variable)
278299

279300

@@ -365,7 +386,7 @@ def fake_read_csv(path, *args, **kwargs):
365386
"pareamt": 0,
366387
"allpay3": 0,
367388
"allpay4": 0,
368-
"grtdir1": 0,
389+
"grtdir1": 100,
369390
"grtdir2": 0,
370391
}
371392
]
@@ -445,3 +466,5 @@ def fake_read_csv(path, *args, **kwargs):
445466
"esa_health_condition_proxy",
446467
"esa_support_group_proxy",
447468
}.issubset(dataset.person.columns)
469+
assert dataset.person["education_grants"].iloc[0] == 100
470+
assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies = [
2121
"policyengine",
2222
"google-cloud-storage",
2323
"google-auth",
24-
"policyengine-uk>=2.85.0",
24+
"policyengine-uk>=2.86.0",
2525
"microcalibrate>=0.18.0",
2626
"microimpute>=1.0.1",
2727
"ruff>=0.9.0",

uv.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)