55Two target types are exposed:
66- `above_threshold`: borrowers liable to repay and earning above threshold
77- `liable`: all borrowers liable to repay, including below-threshold holders
8+ - `maintenance_loan`: full-time undergraduate England maintenance-loan
9+ recipient counts and total amount paid
810
911Source: Explore Education Statistics — Student loan forecasts for England,
1012Table 6a: Forecast number of student borrowers liable to repay and number
1113earning above repayment threshold, by product. We use the "Higher education
1214total" row which sums HE full-time, HE part-time, and Advanced Learner loans.
1315Academic year 20XX-YY maps to calendar year 20XX+1 (e.g., 2024-25 → 2025).
1416
17+ Maintenance-loan targets come from Student support for higher education in
18+ England 2025, Table 3A: Maintenance Loans paid to full-time undergraduate
19+ students. Academic year 20XX/YY maps to calendar year 20XX+1.
20+
1521Data permalink:
1622https://explore-education-statistics.service.gov.uk/data-tables/permalink/6ff75517-7124-487c-cb4e-08de6eccf22d
1723"""
2127import re
2228from functools import lru_cache
2329
30+ import pandas as pd
2431import requests
2532
2633from policyengine_uk_data .targets .schema import Target , Unit
3037 f"https://explore-education-statistics.service.gov.uk"
3138 f"/data-tables/permalink/{ _PERMALINK_ID } "
3239)
40+ _MAINTENANCE_LOAN_URL = (
41+ "https://assets.publishing.service.gov.uk/media/"
42+ "691d9e662c6b98ecdbc5003f/slcsp052025.xlsx"
43+ )
3344_TESTING_DATA = {
3445 "plan_2" : {
3546 "above_threshold" : {
6879 },
6980 },
7081}
82+ _MAINTENANCE_LOAN_TESTING_DATA = {
83+ "recipients" : {
84+ 2014 : 972_830 ,
85+ 2015 : 963_084 ,
86+ 2016 : 986_323 ,
87+ 2017 : 1_013_354 ,
88+ 2018 : 1_028_438 ,
89+ 2019 : 1_044_973 ,
90+ 2020 : 1_055_702 ,
91+ 2021 : 1_117_591 ,
92+ 2022 : 1_145_289 ,
93+ 2023 : 1_151_607 ,
94+ 2024 : 1_154_427 ,
95+ 2025 : 1_159_761 ,
96+ },
97+ "amount_paid" : {
98+ 2014 : 3_783_626_551 ,
99+ 2015 : 3_784_628_482 ,
100+ 2016 : 3_996_708_360 ,
101+ 2017 : 4_870_158_274 ,
102+ 2018 : 5_746_431_691 ,
103+ 2019 : 6_555_506_426 ,
104+ 2020 : 7_113_141_652 ,
105+ 2021 : 7_914_340_039 ,
106+ 2022 : 8_332_837_845 ,
107+ 2023 : 8_594_103_415 ,
108+ 2024 : 8_881_701_387 ,
109+ 2025 : 8_591_659_718 ,
110+ },
111+ }
71112
72113
73114def get_snapshot_data () -> dict :
@@ -80,6 +121,13 @@ def get_snapshot_data() -> dict:
80121 }
81122
82123
124+ def get_maintenance_loan_snapshot_data () -> dict :
125+ """Return the checked-in maintenance-loan snapshot."""
126+ return {
127+ key : values .copy () for key , values in _MAINTENANCE_LOAN_TESTING_DATA .items ()
128+ }
129+
130+
83131@lru_cache (maxsize = 1 )
84132def _fetch_slc_data () -> dict :
85133 """Fetch and parse SLC Table 6a data from Explore Education Statistics.
@@ -166,9 +214,62 @@ def parse_values(row, start_index, years):
166214 }
167215
168216
217+ def _row_contains_text (df : pd .DataFrame , row_index : int , text : str ) -> bool :
218+ row = df .iloc [row_index ].dropna ()
219+ return any (str (value ).strip () == text for value in row )
220+
221+
222+ def _find_row (df : pd .DataFrame , text : str , start : int = 0 ) -> int :
223+ for row_index in range (start , len (df )):
224+ if _row_contains_text (df , row_index , text ):
225+ return row_index
226+ raise ValueError (f"Could not find row containing { text !r} " )
227+
228+
229+ @lru_cache (maxsize = 1 )
230+ def _fetch_maintenance_loan_data () -> dict :
231+ """Fetch full-time England maintenance-loan recipient counts and spend."""
232+ if os .environ .get ("TESTING" , "0" ) == "1" :
233+ return get_maintenance_loan_snapshot_data ()
234+
235+ df = pd .read_excel (_MAINTENANCE_LOAN_URL , sheet_name = "Table 3A" , header = None )
236+
237+ count_header_row = _find_row (df , "Number of students paid (000s) [27]" )
238+ count_year_row = count_header_row + 1
239+ count_total_row = _find_row (df , "Grand total" , start = count_year_row + 1 )
240+
241+ amount_header_row = _find_row (df , "Amount paid (£m)" )
242+ amount_year_row = amount_header_row + 1
243+ amount_total_row = _find_row (df , "Grand total" , start = amount_year_row + 1 )
244+
245+ year_columns = {}
246+ for column , value in df .iloc [count_year_row ].items ():
247+ if isinstance (value , str ) and re .fullmatch (r"\d{4}/\d{2}" , value ):
248+ year_columns [column ] = int (value [:4 ]) + 1
249+
250+ if not year_columns :
251+ raise ValueError ("Could not find maintenance-loan year columns" )
252+
253+ recipients = {}
254+ amount_paid = {}
255+ for column , year in year_columns .items ():
256+ count_value = df .iloc [count_total_row , column ]
257+ amount_value = df .iloc [amount_total_row , column ]
258+ if pd .notna (count_value ):
259+ recipients [year ] = int (round (float (count_value ) * 1_000 ))
260+ if pd .notna (amount_value ):
261+ amount_paid [year ] = int (round (float (amount_value ) * 1_000_000 ))
262+
263+ return {
264+ "recipients" : recipients ,
265+ "amount_paid" : amount_paid ,
266+ }
267+
268+
169269def get_targets () -> list [Target ]:
170270 """Generate SLC calibration targets by fetching live data."""
171271 slc_data = _fetch_slc_data ()
272+ maintenance_loan_data = _fetch_maintenance_loan_data ()
172273
173274 targets = []
174275
@@ -189,4 +290,26 @@ def get_targets() -> list[Target]:
189290 )
190291 )
191292
293+ targets .extend (
294+ [
295+ Target (
296+ name = "slc/maintenance_loan_recipients" ,
297+ variable = "maintenance_loan" ,
298+ source = "slc" ,
299+ unit = Unit .COUNT ,
300+ is_count = True ,
301+ values = maintenance_loan_data ["recipients" ],
302+ reference_url = _MAINTENANCE_LOAN_URL ,
303+ ),
304+ Target (
305+ name = "slc/maintenance_loan_spend" ,
306+ variable = "maintenance_loan" ,
307+ source = "slc" ,
308+ unit = Unit .GBP ,
309+ values = maintenance_loan_data ["amount_paid" ],
310+ reference_url = _MAINTENANCE_LOAN_URL ,
311+ ),
312+ ]
313+ )
314+
192315 return targets
0 commit comments