Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/341.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Split specific student-finance grant capacity out of the generic FRS education-grants residual and seed Disabled Students' Allowance expenses where reported grants plausibly identify DSA (#341).
137 changes: 137 additions & 0 deletions policyengine_uk_data/datasets/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@
EmploymentStatus.LONG_TERM_DISABLED.name,
EmploymentStatus.SHORT_TERM_DISABLED.name,
)
FORMULA_MODELED_EDUCATION_GRANT_VARIABLES = (
"childcare_grant",
"parents_learning_allowance",
"adult_dependants_grant",
)
DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT = (
"disabled_students_allowance_eligible_expenses"
)
DISABLED_STUDENTS_ALLOWANCE_FIRST_MODELED_YEAR = 2025
DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES = (
"maintenance_loan_in_england_system",
"disabled_students_allowance_course_eligible",
"disabled_students_allowance_has_qualifying_condition",
)


@lru_cache(maxsize=None)
Expand Down Expand Up @@ -214,6 +228,114 @@ def attach_legacy_benefit_proxies_from_frs_person(
)


def _as_non_negative_array(values) -> np.ndarray:
values = np.asarray(values, dtype=float)
return np.maximum(np.nan_to_num(values, nan=0.0), 0.0)


def allocate_reported_education_grants(
reported_grants, grant_capacities: dict[str, np.ndarray]
) -> dict[str, np.ndarray]:
"""Split aggregate FRS education grants across modelled grant capacity.

The FRS reports several direct education grants in one aggregate field. When
several modelled grants are plausible for the same person, allocate the
reported amount proportionally to each grant's modelled capacity and keep any
excess in the generic ``education_grants`` residual.
"""

reported_grants = _as_non_negative_array(reported_grants)
capacities = {
variable: _as_non_negative_array(capacity)
for variable, capacity in grant_capacities.items()
}
total_capacity = np.zeros_like(reported_grants, dtype=float)
for variable, capacity in capacities.items():
if capacity.shape != reported_grants.shape:
raise ValueError(
f"{variable} capacity has shape {capacity.shape}, "
f"expected {reported_grants.shape}."
)
total_capacity += capacity

allocation_fraction = np.divide(
reported_grants,
total_capacity,
out=np.zeros_like(reported_grants, dtype=float),
where=total_capacity > 0,
)
allocation_fraction = np.minimum(allocation_fraction, 1)

allocations = {}
allocated_total = np.zeros_like(reported_grants, dtype=float)
for variable, capacity in capacities.items():
allocation = capacity * allocation_fraction
allocations[variable] = allocation
allocated_total += allocation

allocations["education_grants"] = np.maximum(reported_grants - allocated_total, 0)
return allocations


def calculate_disabled_students_allowance_reported_grant_capacity(
sim, year: int, maximum: float
) -> np.ndarray:
if year < DISABLED_STUDENTS_ALLOWANCE_FIRST_MODELED_YEAR:
return np.zeros_like(
np.asarray(
sim.calculate(
DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES[0], year
)
),
dtype=float,
)

eligible = None
for variable in DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES:
variable_eligible = np.asarray(sim.calculate(variable, year), dtype=bool)
eligible = (
variable_eligible if eligible is None else eligible & variable_eligible
)
equivalent_support = np.asarray(
sim.calculate("disabled_students_allowance_receives_equivalent_support", year),
dtype=bool,
)
return np.where(eligible & ~equivalent_support, float(maximum), 0.0)


def split_reported_education_grants(
pe_person: pd.DataFrame, sim, year: int, dsa_maximum: float
) -> pd.DataFrame:
"""Move specific modelled grants out of the generic education-grant residual.

PLA, ADG, and Childcare Grant remain formula-driven because they are
calibration targets. Their modelled capacity is only used to avoid also
counting the same reported FRS grant amount in the generic residual.
DSA lacks a modelled amount signal, so its allocation seeds eligible
expenses directly where the DSA parameter is available.
"""

grant_capacities = {
variable: sim.calculate(variable, year)
for variable in FORMULA_MODELED_EDUCATION_GRANT_VARIABLES
}
grant_capacities[DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT] = (
calculate_disabled_students_allowance_reported_grant_capacity(
sim, year, dsa_maximum
)
)
allocations = allocate_reported_education_grants(
pe_person["education_grants"], grant_capacities
)

pe_person["education_grants"] = allocations["education_grants"]
pe_person[DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT] = allocations[
DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT
]

return pe_person


def create_frs(
raw_frs_folder: str,
year: int,
Expand Down Expand Up @@ -1006,6 +1128,21 @@ def determine_education_level(fted_val, typeed2_val, age_val):
pe_person, person, sim, year
)

if (pe_person["education_grants"] > 0).any():
student_support_dataset = UKSingleYearDataset(
person=pe_person,
benunit=pe_benunit,
household=pe_household,
fiscal_year=year,
)
student_support_sim = Microsimulation(dataset=student_support_dataset)
dsa_maximum = student_support_sim.tax_benefit_system.parameters(
year
).gov.dfe.disabled_students_allowance.maximum
pe_person = split_reported_education_grants(
pe_person, student_support_sim, year, dsa_maximum
)

# Generate stochastic take-up decisions
# All randomness is generated here in the data package using take-up rates
# stored in YAML parameter files. This keeps the country package purely
Expand Down
80 changes: 80 additions & 0 deletions policyengine_uk_data/tests/test_education_grants_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import numpy as np
import pandas as pd

from policyengine_uk_data.datasets.frs import (
allocate_reported_education_grants,
split_reported_education_grants,
)


def test_allocate_reported_education_grants_splits_by_capacity():
allocations = allocate_reported_education_grants(
reported_grants=np.array([50, 300, 1_000, 100]),
grant_capacities={
"grant_a": np.array([100, 100, 100, 0]),
"grant_b": np.array([100, 0, 100, 0]),
},
)

np.testing.assert_allclose(allocations["grant_a"], [25, 100, 100, 0])
np.testing.assert_allclose(allocations["grant_b"], [25, 0, 100, 0])
np.testing.assert_allclose(allocations["education_grants"], [0, 200, 800, 100])


class FakeStudentSupportSim:
def __init__(self, values):
self.values = values

def calculate(self, variable, year):
del year
return self.values[variable]


def test_split_reported_education_grants_updates_residual_and_dsa_expenses():
pe_person = pd.DataFrame({"education_grants": [900, 1_200, 100]})
sim = FakeStudentSupportSim(
{
"childcare_grant": np.array([300, 0, 0]),
"parents_learning_allowance": np.array([600, 400, 0]),
"adult_dependants_grant": np.array([0, 600, 0]),
"maintenance_loan_in_england_system": np.array([False, False, True]),
"disabled_students_allowance_course_eligible": np.array(
[False, False, True]
),
"disabled_students_allowance_has_qualifying_condition": np.array(
[False, False, True]
),
"disabled_students_allowance_receives_equivalent_support": np.array(
[False, False, False]
),
}
)

result = split_reported_education_grants(pe_person, sim, 2025, dsa_maximum=500)

assert "childcare_grant" not in result.columns
assert "parents_learning_allowance" not in result.columns
assert "adult_dependants_grant" not in result.columns
np.testing.assert_allclose(
result["disabled_students_allowance_eligible_expenses"], [0, 0, 100]
)
np.testing.assert_allclose(result["education_grants"], [0, 200, 0])


def test_split_reported_education_grants_does_not_seed_dsa_before_model_year():
pe_person = pd.DataFrame({"education_grants": [100]})
sim = FakeStudentSupportSim(
{
"childcare_grant": np.array([0]),
"parents_learning_allowance": np.array([0]),
"adult_dependants_grant": np.array([0]),
"maintenance_loan_in_england_system": np.array([True]),
}
)

result = split_reported_education_grants(pe_person, sim, 2024, dsa_maximum=500)

np.testing.assert_allclose(
result["disabled_students_allowance_eligible_expenses"], [0]
)
np.testing.assert_allclose(result["education_grants"], [100])
27 changes: 25 additions & 2 deletions policyengine_uk_data/tests/test_legacy_benefit_proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,17 @@ def __init__(self, dataset):
"FakeGov",
(),
{
"dfe": type(
"FakeDfe",
(),
{
"disabled_students_allowance": type(
"FakeDsa",
(),
{"maximum": 27_783},
)()
},
)(),
"dwp": type(
"FakeDwp",
(),
Expand Down Expand Up @@ -259,7 +270,7 @@ def __init__(self, dataset):
},
)(),
},
)()
)(),
},
)()
},
Expand All @@ -274,6 +285,16 @@ def calculate(self, variable, year=None):
return np.array([100])
if variable == "state_pension_age":
return pd.Series([66])
if variable in (
"childcare_grant",
"parents_learning_allowance",
"adult_dependants_grant",
"disabled_students_allowance_receives_equivalent_support",
"maintenance_loan_in_england_system",
"disabled_students_allowance_course_eligible",
"disabled_students_allowance_has_qualifying_condition",
):
return np.zeros(len(self.dataset.person))
raise KeyError(variable)


Expand Down Expand Up @@ -365,7 +386,7 @@ def fake_read_csv(path, *args, **kwargs):
"pareamt": 0,
"allpay3": 0,
"allpay4": 0,
"grtdir1": 0,
"grtdir1": 100,
"grtdir2": 0,
}
]
Expand Down Expand Up @@ -445,3 +466,5 @@ def fake_read_csv(path, *args, **kwargs):
"esa_health_condition_proxy",
"esa_support_group_proxy",
}.issubset(dataset.person.columns)
assert dataset.person["education_grants"].iloc[0] == 100
assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"policyengine",
"google-cloud-storage",
"google-auth",
"policyengine-uk>=2.85.0",
"policyengine-uk>=2.86.0",
"microcalibrate>=0.18.0",
"microimpute>=1.0.1",
"ruff>=0.9.0",
Expand Down
10 changes: 5 additions & 5 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading