Skip to content

Commit 24466a5

Browse files
committed
Add maintenance loan targets
1 parent 57f8330 commit 24466a5

4 files changed

Lines changed: 233 additions & 4 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add Student Loans Company maintenance-loan recipient-count and spend targets for England full-time undergraduates.

policyengine_uk_data/targets/sources/slc.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@
55
Two target types are exposed:
66
- `above_threshold`: borrowers liable to repay and earning above threshold
77
- `liable`: all borrowers liable to repay, including below-threshold holders
8+
- `maintenance_loan`: full-time undergraduate England maintenance-loan
9+
recipient counts and total amount paid
810
911
Source: Explore Education Statistics — Student loan forecasts for England,
1012
Table 6a: Forecast number of student borrowers liable to repay and number
1113
earning above repayment threshold, by product. We use the "Higher education
1214
total" row which sums HE full-time, HE part-time, and Advanced Learner loans.
1315
Academic year 20XX-YY maps to calendar year 20XX+1 (e.g., 2024-25 → 2025).
1416
17+
Maintenance-loan targets come from Student support for higher education in
18+
England 2025, Table 3A: Maintenance Loans paid to full-time undergraduate
19+
students. Academic year 20XX/YY maps to calendar year 20XX+1.
20+
1521
Data permalink:
1622
https://explore-education-statistics.service.gov.uk/data-tables/permalink/6ff75517-7124-487c-cb4e-08de6eccf22d
1723
"""
@@ -21,6 +27,7 @@
2127
import re
2228
from functools import lru_cache
2329

30+
import pandas as pd
2431
import requests
2532

2633
from policyengine_uk_data.targets.schema import Target, Unit
@@ -30,6 +37,10 @@
3037
f"https://explore-education-statistics.service.gov.uk"
3138
f"/data-tables/permalink/{_PERMALINK_ID}"
3239
)
40+
_MAINTENANCE_LOAN_URL = (
41+
"https://assets.publishing.service.gov.uk/media/"
42+
"691d9e662c6b98ecdbc5003f/slcsp052025.xlsx"
43+
)
3344
_TESTING_DATA = {
3445
"plan_2": {
3546
"above_threshold": {
@@ -68,6 +79,36 @@
6879
},
6980
},
7081
}
82+
_MAINTENANCE_LOAN_TESTING_DATA = {
83+
"recipients": {
84+
2014: 972_830,
85+
2015: 963_084,
86+
2016: 986_323,
87+
2017: 1_013_354,
88+
2018: 1_028_438,
89+
2019: 1_044_973,
90+
2020: 1_055_702,
91+
2021: 1_117_591,
92+
2022: 1_145_289,
93+
2023: 1_151_607,
94+
2024: 1_154_427,
95+
2025: 1_159_761,
96+
},
97+
"amount_paid": {
98+
2014: 3_783_626_551,
99+
2015: 3_784_628_482,
100+
2016: 3_996_708_360,
101+
2017: 4_870_158_274,
102+
2018: 5_746_431_691,
103+
2019: 6_555_506_426,
104+
2020: 7_113_141_652,
105+
2021: 7_914_340_039,
106+
2022: 8_332_837_845,
107+
2023: 8_594_103_415,
108+
2024: 8_881_701_387,
109+
2025: 8_591_659_718,
110+
},
111+
}
71112

72113

73114
def get_snapshot_data() -> dict:
@@ -80,6 +121,13 @@ def get_snapshot_data() -> dict:
80121
}
81122

82123

124+
def get_maintenance_loan_snapshot_data() -> dict:
125+
"""Return the checked-in maintenance-loan snapshot."""
126+
return {
127+
key: values.copy() for key, values in _MAINTENANCE_LOAN_TESTING_DATA.items()
128+
}
129+
130+
83131
@lru_cache(maxsize=1)
84132
def _fetch_slc_data() -> dict:
85133
"""Fetch and parse SLC Table 6a data from Explore Education Statistics.
@@ -166,9 +214,62 @@ def parse_values(row, start_index, years):
166214
}
167215

168216

217+
def _row_contains_text(df: pd.DataFrame, row_index: int, text: str) -> bool:
218+
row = df.iloc[row_index].dropna()
219+
return any(str(value).strip() == text for value in row)
220+
221+
222+
def _find_row(df: pd.DataFrame, text: str, start: int = 0) -> int:
223+
for row_index in range(start, len(df)):
224+
if _row_contains_text(df, row_index, text):
225+
return row_index
226+
raise ValueError(f"Could not find row containing {text!r}")
227+
228+
229+
@lru_cache(maxsize=1)
230+
def _fetch_maintenance_loan_data() -> dict:
231+
"""Fetch full-time England maintenance-loan recipient counts and spend."""
232+
if os.environ.get("TESTING", "0") == "1":
233+
return get_maintenance_loan_snapshot_data()
234+
235+
df = pd.read_excel(_MAINTENANCE_LOAN_URL, sheet_name="Table 3A", header=None)
236+
237+
count_header_row = _find_row(df, "Number of students paid (000s) [27]")
238+
count_year_row = count_header_row + 1
239+
count_total_row = _find_row(df, "Grand total", start=count_year_row + 1)
240+
241+
amount_header_row = _find_row(df, "Amount paid (£m)")
242+
amount_year_row = amount_header_row + 1
243+
amount_total_row = _find_row(df, "Grand total", start=amount_year_row + 1)
244+
245+
year_columns = {}
246+
for column, value in df.iloc[count_year_row].items():
247+
if isinstance(value, str) and re.fullmatch(r"\d{4}/\d{2}", value):
248+
year_columns[column] = int(value[:4]) + 1
249+
250+
if not year_columns:
251+
raise ValueError("Could not find maintenance-loan year columns")
252+
253+
recipients = {}
254+
amount_paid = {}
255+
for column, year in year_columns.items():
256+
count_value = df.iloc[count_total_row, column]
257+
amount_value = df.iloc[amount_total_row, column]
258+
if pd.notna(count_value):
259+
recipients[year] = int(round(float(count_value) * 1_000))
260+
if pd.notna(amount_value):
261+
amount_paid[year] = int(round(float(amount_value) * 1_000_000))
262+
263+
return {
264+
"recipients": recipients,
265+
"amount_paid": amount_paid,
266+
}
267+
268+
169269
def get_targets() -> list[Target]:
170270
"""Generate SLC calibration targets by fetching live data."""
171271
slc_data = _fetch_slc_data()
272+
maintenance_loan_data = _fetch_maintenance_loan_data()
172273

173274
targets = []
174275

@@ -189,4 +290,26 @@ def get_targets() -> list[Target]:
189290
)
190291
)
191292

293+
targets.extend(
294+
[
295+
Target(
296+
name="slc/maintenance_loan_recipients",
297+
variable="maintenance_loan",
298+
source="slc",
299+
unit=Unit.COUNT,
300+
is_count=True,
301+
values=maintenance_loan_data["recipients"],
302+
reference_url=_MAINTENANCE_LOAN_URL,
303+
),
304+
Target(
305+
name="slc/maintenance_loan_spend",
306+
variable="maintenance_loan",
307+
source="slc",
308+
unit=Unit.GBP,
309+
values=maintenance_loan_data["amount_paid"],
310+
reference_url=_MAINTENANCE_LOAN_URL,
311+
),
312+
]
313+
)
314+
192315
return targets

policyengine_uk_data/tests/test_student_loan_targets.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ def test_slc_targets_registered():
1818
assert "slc/student_loan_repayment/england" in targets
1919
assert "slc/student_loan_repayment/scotland" in targets
2020
assert "slc/student_loan_repayment/england/plan_2" in targets
21+
assert "slc/maintenance_loan_recipients" in targets
22+
assert "slc/maintenance_loan_spend" in targets
23+
24+
25+
def test_policyengine_uk_release_exposes_maintenance_loan_variable():
26+
"""The lockfile should point at a policyengine-uk release with maintenance loans."""
27+
from policyengine_uk import CountryTaxBenefitSystem
28+
29+
system = CountryTaxBenefitSystem()
30+
assert "maintenance_loan" in system.variables
2131

2232

2333
def test_slc_snapshot_values_match_higher_education_total_rows():
@@ -97,6 +107,28 @@ def test_slc_england_plan_repayments_sum_to_england_total():
97107
assert england_plans == england_total
98108

99109

110+
def test_slc_maintenance_loan_targets_match_official_2025_values():
111+
"""Maintenance-loan targets should match Table 3A for 2024/25."""
112+
from policyengine_uk_data.targets.registry import get_all_targets
113+
114+
targets = {t.name: t for t in get_all_targets()}
115+
116+
assert targets["slc/maintenance_loan_recipients"].values[2025] == 1_159_761
117+
assert targets["slc/maintenance_loan_spend"].values[2025] == 8_591_659_718
118+
119+
120+
def test_slc_maintenance_loan_snapshot_matches_known_series_points():
121+
"""Snapshot should preserve the published maintenance-loan time series."""
122+
from policyengine_uk_data.targets.sources import slc
123+
124+
data = slc.get_maintenance_loan_snapshot_data()
125+
126+
assert data["recipients"][2014] == 972_830
127+
assert data["recipients"][2024] == 1_154_427
128+
assert data["amount_paid"][2017] == 4_870_158_274
129+
assert data["amount_paid"][2025] == 8_591_659_718
130+
131+
100132
def test_slc_testing_mode_uses_snapshot_without_network(monkeypatch):
101133
"""Dataset-build CI should not depend on a live SLC endpoint."""
102134
from policyengine_uk_data.targets.sources import slc
@@ -113,6 +145,24 @@ def fail_network(*args, **kwargs):
113145
slc._fetch_slc_data.cache_clear()
114146

115147

148+
def test_slc_maintenance_loan_testing_mode_uses_snapshot_without_network(monkeypatch):
149+
"""Maintenance-loan targets should also avoid network in TESTING mode."""
150+
from policyengine_uk_data.targets.sources import slc
151+
152+
slc._fetch_maintenance_loan_data.cache_clear()
153+
monkeypatch.setenv("TESTING", "1")
154+
155+
def fail_excel(*args, **kwargs):
156+
raise AssertionError("network should not be used in TESTING mode")
157+
158+
monkeypatch.setattr(slc.pd, "read_excel", fail_excel)
159+
160+
assert (
161+
slc._fetch_maintenance_loan_data() == slc.get_maintenance_loan_snapshot_data()
162+
)
163+
slc._fetch_maintenance_loan_data.cache_clear()
164+
165+
116166
def test_slc_parser_uses_higher_education_total_rows(monkeypatch):
117167
"""Parser should read HE-total rows, not the first matching above-threshold row."""
118168
from policyengine_uk_data.targets.sources import slc
@@ -218,6 +268,43 @@ def raise_for_status():
218268
slc._fetch_slc_data.cache_clear()
219269

220270

271+
def test_slc_maintenance_loan_parser_uses_grand_total_rows(monkeypatch):
272+
"""Maintenance-loan parser should read the grand-total rows from Table 3A."""
273+
from policyengine_uk_data.targets.sources import slc
274+
275+
table = np.full((24, 16), np.nan, dtype=object)
276+
table[6, 4] = "Number of students paid (000s) [27]"
277+
table[7, 4] = "2013/14"
278+
table[7, 5] = "2024/25"
279+
table[12, 1] = "Grand total"
280+
table[12, 4] = 972.830
281+
table[12, 5] = 1159.761
282+
table[15, 4] = "Amount paid (£m)"
283+
table[16, 4] = "2013/14"
284+
table[16, 5] = "2024/25"
285+
table[21, 1] = "Grand total"
286+
table[21, 4] = 3783.626551
287+
table[21, 5] = 8591.659718
288+
289+
df = np.array(table, dtype=object)
290+
291+
slc._fetch_maintenance_loan_data.cache_clear()
292+
monkeypatch.delenv("TESTING", raising=False)
293+
monkeypatch.setattr(
294+
slc.pd,
295+
"read_excel",
296+
lambda *args, **kwargs: __import__("pandas").DataFrame(df),
297+
)
298+
299+
data = slc._fetch_maintenance_loan_data()
300+
assert data["recipients"][2014] == 972_830
301+
assert data["recipients"][2025] == 1_159_761
302+
assert data["amount_paid"][2014] == 3_783_626_551
303+
assert data["amount_paid"][2025] == 8_591_659_718
304+
305+
slc._fetch_maintenance_loan_data.cache_clear()
306+
307+
221308
def test_student_loan_target_compute_distinguishes_liable_from_repaying():
222309
"""Above-threshold counts should require repayments, while liable counts should not."""
223310
from policyengine_uk_data.targets.compute.other import (

uv.lock

Lines changed: 22 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)