Split reported education grants (#342)

MaxGhenis · web-flow · commit ba3af5bf90d4 · 2026-04-15T09:19:01.000-04:00
* Split reported education grants

* Address education grant review risks
diff --git a/changelog.d/341.md b/changelog.d/341.md
@@ -0,0 +1 @@
+Split specific student-finance grant capacity out of the generic FRS education-grants residual and seed Disabled Students' Allowance expenses where reported grants plausibly identify DSA (#341).
diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py
@@ -35,6 +35,20 @@
     EmploymentStatus.LONG_TERM_DISABLED.name,
     EmploymentStatus.SHORT_TERM_DISABLED.name,
 )
+FORMULA_MODELED_EDUCATION_GRANT_VARIABLES = (
+    "childcare_grant",
+    "parents_learning_allowance",
+    "adult_dependants_grant",
+)
+DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT = (
+    "disabled_students_allowance_eligible_expenses"
+)
+DISABLED_STUDENTS_ALLOWANCE_FIRST_MODELED_YEAR = 2025
+DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES = (
+    "maintenance_loan_in_england_system",
+    "disabled_students_allowance_course_eligible",
+    "disabled_students_allowance_has_qualifying_condition",
+)
 
 
 @lru_cache(maxsize=None)
@@ -214,6 +228,114 @@ def attach_legacy_benefit_proxies_from_frs_person(
     )
 
 
+def _as_non_negative_array(values) -> np.ndarray:
+    values = np.asarray(values, dtype=float)
+    return np.maximum(np.nan_to_num(values, nan=0.0), 0.0)
+
+
+def allocate_reported_education_grants(
+    reported_grants, grant_capacities: dict[str, np.ndarray]
+) -> dict[str, np.ndarray]:
+    """Split aggregate FRS education grants across modelled grant capacity.
+
+    The FRS reports several direct education grants in one aggregate field. When
+    several modelled grants are plausible for the same person, allocate the
+    reported amount proportionally to each grant's modelled capacity and keep any
+    excess in the generic ``education_grants`` residual.
+    """
+
+    reported_grants = _as_non_negative_array(reported_grants)
+    capacities = {
+        variable: _as_non_negative_array(capacity)
+        for variable, capacity in grant_capacities.items()
+    }
+    total_capacity = np.zeros_like(reported_grants, dtype=float)
+    for variable, capacity in capacities.items():
+        if capacity.shape != reported_grants.shape:
+            raise ValueError(
+                f"{variable} capacity has shape {capacity.shape}, "
+                f"expected {reported_grants.shape}."
+            )
+        total_capacity += capacity
+
+    allocation_fraction = np.divide(
+        reported_grants,
+        total_capacity,
+        out=np.zeros_like(reported_grants, dtype=float),
+        where=total_capacity > 0,
+    )
+    allocation_fraction = np.minimum(allocation_fraction, 1)
+
+    allocations = {}
+    allocated_total = np.zeros_like(reported_grants, dtype=float)
+    for variable, capacity in capacities.items():
+        allocation = capacity * allocation_fraction
+        allocations[variable] = allocation
+        allocated_total += allocation
+
+    allocations["education_grants"] = np.maximum(reported_grants - allocated_total, 0)
+    return allocations
+
+
+def calculate_disabled_students_allowance_reported_grant_capacity(
+    sim, year: int, maximum: float
+) -> np.ndarray:
+    if year < DISABLED_STUDENTS_ALLOWANCE_FIRST_MODELED_YEAR:
+        return np.zeros_like(
+            np.asarray(
+                sim.calculate(
+                    DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES[0], year
+                )
+            ),
+            dtype=float,
+        )
+
+    eligible = None
+    for variable in DISABLED_STUDENTS_ALLOWANCE_ELIGIBILITY_VARIABLES:
+        variable_eligible = np.asarray(sim.calculate(variable, year), dtype=bool)
+        eligible = (
+            variable_eligible if eligible is None else eligible & variable_eligible
+        )
+    equivalent_support = np.asarray(
+        sim.calculate("disabled_students_allowance_receives_equivalent_support", year),
+        dtype=bool,
+    )
+    return np.where(eligible & ~equivalent_support, float(maximum), 0.0)
+
+
+def split_reported_education_grants(
+    pe_person: pd.DataFrame, sim, year: int, dsa_maximum: float
+) -> pd.DataFrame:
+    """Move specific modelled grants out of the generic education-grant residual.
+
+    PLA, ADG, and Childcare Grant remain formula-driven because they are
+    calibration targets. Their modelled capacity is only used to avoid also
+    counting the same reported FRS grant amount in the generic residual.
+    DSA lacks a modelled amount signal, so its allocation seeds eligible
+    expenses directly where the DSA parameter is available.
+    """
+
+    grant_capacities = {
+        variable: sim.calculate(variable, year)
+        for variable in FORMULA_MODELED_EDUCATION_GRANT_VARIABLES
+    }
+    grant_capacities[DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT] = (
+        calculate_disabled_students_allowance_reported_grant_capacity(
+            sim, year, dsa_maximum
+        )
+    )
+    allocations = allocate_reported_education_grants(
+        pe_person["education_grants"], grant_capacities
+    )
+
+    pe_person["education_grants"] = allocations["education_grants"]
+    pe_person[DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT] = allocations[
+        DISABLED_STUDENTS_ALLOWANCE_EXPENSE_INPUT
+    ]
+
+    return pe_person
+
+
 def create_frs(
     raw_frs_folder: str,
     year: int,
@@ -1006,6 +1128,21 @@ def determine_education_level(fted_val, typeed2_val, age_val):
         pe_person, person, sim, year
     )
 
+    if (pe_person["education_grants"] > 0).any():
+        student_support_dataset = UKSingleYearDataset(
+            person=pe_person,
+            benunit=pe_benunit,
+            household=pe_household,
+            fiscal_year=year,
+        )
+        student_support_sim = Microsimulation(dataset=student_support_dataset)
+        dsa_maximum = student_support_sim.tax_benefit_system.parameters(
+            year
+        ).gov.dfe.disabled_students_allowance.maximum
+        pe_person = split_reported_education_grants(
+            pe_person, student_support_sim, year, dsa_maximum
+        )
+
     # Generate stochastic take-up decisions
     # All randomness is generated here in the data package using take-up rates
     # stored in YAML parameter files. This keeps the country package purely
diff --git a/policyengine_uk_data/tests/test_education_grants_split.py b/policyengine_uk_data/tests/test_education_grants_split.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pandas as pd
+
+from policyengine_uk_data.datasets.frs import (
+    allocate_reported_education_grants,
+    split_reported_education_grants,
+)
+
+
+def test_allocate_reported_education_grants_splits_by_capacity():
+    allocations = allocate_reported_education_grants(
+        reported_grants=np.array([50, 300, 1_000, 100]),
+        grant_capacities={
+            "grant_a": np.array([100, 100, 100, 0]),
+            "grant_b": np.array([100, 0, 100, 0]),
+        },
+    )
+
+    np.testing.assert_allclose(allocations["grant_a"], [25, 100, 100, 0])
+    np.testing.assert_allclose(allocations["grant_b"], [25, 0, 100, 0])
+    np.testing.assert_allclose(allocations["education_grants"], [0, 200, 800, 100])
+
+
+class FakeStudentSupportSim:
+    def __init__(self, values):
+        self.values = values
+
+    def calculate(self, variable, year):
+        del year
+        return self.values[variable]
+
+
+def test_split_reported_education_grants_updates_residual_and_dsa_expenses():
+    pe_person = pd.DataFrame({"education_grants": [900, 1_200, 100]})
+    sim = FakeStudentSupportSim(
+        {
+            "childcare_grant": np.array([300, 0, 0]),
+            "parents_learning_allowance": np.array([600, 400, 0]),
+            "adult_dependants_grant": np.array([0, 600, 0]),
+            "maintenance_loan_in_england_system": np.array([False, False, True]),
+            "disabled_students_allowance_course_eligible": np.array(
+                [False, False, True]
+            ),
+            "disabled_students_allowance_has_qualifying_condition": np.array(
+                [False, False, True]
+            ),
+            "disabled_students_allowance_receives_equivalent_support": np.array(
+                [False, False, False]
+            ),
+        }
+    )
+
+    result = split_reported_education_grants(pe_person, sim, 2025, dsa_maximum=500)
+
+    assert "childcare_grant" not in result.columns
+    assert "parents_learning_allowance" not in result.columns
+    assert "adult_dependants_grant" not in result.columns
+    np.testing.assert_allclose(
+        result["disabled_students_allowance_eligible_expenses"], [0, 0, 100]
+    )
+    np.testing.assert_allclose(result["education_grants"], [0, 200, 0])
+
+
+def test_split_reported_education_grants_does_not_seed_dsa_before_model_year():
+    pe_person = pd.DataFrame({"education_grants": [100]})
+    sim = FakeStudentSupportSim(
+        {
+            "childcare_grant": np.array([0]),
+            "parents_learning_allowance": np.array([0]),
+            "adult_dependants_grant": np.array([0]),
+            "maintenance_loan_in_england_system": np.array([True]),
+        }
+    )
+
+    result = split_reported_education_grants(pe_person, sim, 2024, dsa_maximum=500)
+
+    np.testing.assert_allclose(
+        result["disabled_students_allowance_eligible_expenses"], [0]
+    )
+    np.testing.assert_allclose(result["education_grants"], [100])
diff --git a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py
@@ -232,6 +232,17 @@ def __init__(self, dataset):
                             "FakeGov",
                             (),
                             {
+                                "dfe": type(
+                                    "FakeDfe",
+                                    (),
+                                    {
+                                        "disabled_students_allowance": type(
+                                            "FakeDsa",
+                                            (),
+                                            {"maximum": 27_783},
+                                        )()
+                                    },
+                                )(),
                                 "dwp": type(
                                     "FakeDwp",
                                     (),
@@ -259,7 +270,7 @@ def __init__(self, dataset):
                                             },
                                         )(),
                                     },
-                                )()
+                                )(),
                             },
                         )()
                     },
@@ -274,6 +285,16 @@ def calculate(self, variable, year=None):
             return np.array([100])
         if variable == "state_pension_age":
             return pd.Series([66])
+        if variable in (
+            "childcare_grant",
+            "parents_learning_allowance",
+            "adult_dependants_grant",
+            "disabled_students_allowance_receives_equivalent_support",
+            "maintenance_loan_in_england_system",
+            "disabled_students_allowance_course_eligible",
+            "disabled_students_allowance_has_qualifying_condition",
+        ):
+            return np.zeros(len(self.dataset.person))
         raise KeyError(variable)
 
 
@@ -365,7 +386,7 @@ def fake_read_csv(path, *args, **kwargs):
                 "pareamt": 0,
                 "allpay3": 0,
                 "allpay4": 0,
-                "grtdir1": 0,
+                "grtdir1": 100,
                 "grtdir2": 0,
             }
         ]
@@ -445,3 +466,5 @@ def fake_read_csv(path, *args, **kwargs):
         "esa_health_condition_proxy",
         "esa_support_group_proxy",
     }.issubset(dataset.person.columns)
+    assert dataset.person["education_grants"].iloc[0] == 100
+    assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "policyengine",
     "google-cloud-storage",
     "google-auth",
-    "policyengine-uk>=2.85.0",
+    "policyengine-uk>=2.86.0",
     "microcalibrate>=0.18.0",
     "microimpute>=1.0.1",
     "ruff>=0.9.0",
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Split specific student-finance grant capacity out of the generic FRS education-grants residual and seed Disabled Students' Allowance expenses where reported grants plausibly identify DSA (#341).`