@@ -24,6 +24,28 @@ def _get_positive_int_env(name: str, default: int) -> int:
2424 return value
2525
2626
27+ def _needs_base_year_materialization (frs_release ) -> bool :
28+ return frs_release .calibration_year != frs_release .base_year
29+
30+
31+ def _needs_calibration_year_materialization (frs_release ) -> bool :
32+ return frs_release .calibration_year != frs_release .base_year
33+
34+
35+ def _materialize_calibration_year_dataset (dataset , frs_release , uprate_dataset ):
36+ if not _needs_calibration_year_materialization (frs_release ):
37+ return dataset
38+
39+ return uprate_dataset (dataset , frs_release .calibration_year )
40+
41+
42+ def _materialize_base_year_dataset (dataset , frs_release , uprate_dataset ):
43+ if not _needs_base_year_materialization (frs_release ):
44+ return dataset
45+
46+ return uprate_dataset (dataset , frs_release .base_year )
47+
48+
2749def main ():
2850 """Create enhanced FRS dataset with rich progress tracking."""
2951 try :
@@ -34,6 +56,7 @@ def main():
3456 strip_internal_disability_reported_amounts ,
3557 )
3658 from policyengine_uk_data .datasets .frs import create_frs
59+ from policyengine_uk_data .datasets .frs_release import CURRENT_FRS_RELEASE
3760 from policyengine_uk_data .storage import STORAGE_FOLDER
3861 from policyengine_uk_data .utils .progress import (
3962 ProcessingProgress ,
@@ -50,6 +73,19 @@ def main():
5073 "PE_UK_DATA_OA_CLONES" ,
5174 2 if is_testing else 10 ,
5275 )
76+ frs_release = CURRENT_FRS_RELEASE
77+ align_to_base_year = frs_release .base_year != frs_release .survey_year
78+ align_step = f"Align to { frs_release .base_year } base year"
79+ materialize_calibration_year = _needs_calibration_year_materialization (
80+ frs_release
81+ )
82+ materialize_calibration_step = (
83+ f"Materialize { frs_release .calibration_year } calibration-year dataset"
84+ )
85+ materialize_base_year = _needs_base_year_materialization (frs_release )
86+ materialize_step = (
87+ f"Materialize calibrated { frs_release .base_year } base-year dataset"
88+ )
5389
5490 progress_tracker = ProcessingProgress ()
5591
@@ -65,14 +101,27 @@ def main():
65101 "Impute salary sacrifice" ,
66102 "Impute student loan plan" ,
67103 "Clone and assign OA geography" ,
68- "Uprate to 2025" ,
69104 "Calibrate constituency weights" ,
70105 "Calibrate local authority weights" ,
71- "Downrate to 2023" ,
72106 "Calibrate fuel litres" ,
73107 "Save final dataset" ,
74108 "Create tiny datasets" ,
75109 ]
110+ if align_to_base_year :
111+ steps .insert (
112+ steps .index ("Calibrate constituency weights" ),
113+ align_step ,
114+ )
115+ if materialize_calibration_year :
116+ steps .insert (
117+ steps .index ("Calibrate constituency weights" ),
118+ materialize_calibration_step ,
119+ )
120+ if materialize_base_year :
121+ steps .insert (
122+ steps .index ("Calibrate fuel litres" ),
123+ materialize_step ,
124+ )
76125
77126 with progress_tracker .track_dataset_creation (steps ) as (
78127 update_dataset ,
@@ -81,12 +130,12 @@ def main():
81130 # Create base FRS dataset
82131 update_dataset ("Create base FRS dataset" , "processing" )
83132 frs = create_frs (
84- raw_frs_folder = STORAGE_FOLDER / "frs_2023_24" ,
85- year = 2023 ,
133+ raw_frs_folder = STORAGE_FOLDER / frs_release . name ,
134+ year = frs_release . survey_year ,
86135 include_internal_disability_reported_amounts = True ,
87136 )
88137 strip_internal_disability_reported_amounts (frs ).save (
89- STORAGE_FOLDER / "frs_2023_24.h5"
138+ STORAGE_FOLDER / frs_release . base_dataset_file
90139 )
91140 update_dataset ("Create base FRS dataset" , "completed" )
92141
@@ -136,7 +185,10 @@ def main():
136185 update_dataset ("Impute salary sacrifice" , "completed" )
137186
138187 update_dataset ("Impute student loan plan" , "processing" )
139- frs = impute_student_loan_plan (frs , year = 2025 )
188+ frs = impute_student_loan_plan (
189+ frs ,
190+ year = frs_release .calibration_year ,
191+ )
140192 update_dataset ("Impute student loan plan" , "completed" )
141193
142194 # Clone households and assign OA geography
@@ -148,10 +200,19 @@ def main():
148200 frs = clone_and_assign (frs , n_clones = oa_clones )
149201 update_dataset ("Clone and assign OA geography" , "completed" )
150202
151- # Uprate dataset
152- update_dataset ("Uprate to 2025" , "processing" )
153- frs = uprate_dataset (frs , 2025 )
154- update_dataset ("Uprate to 2025" , "completed" )
203+ if align_to_base_year :
204+ update_dataset (align_step , "processing" )
205+ frs = uprate_dataset (frs , frs_release .base_year )
206+ update_dataset (align_step , "completed" )
207+
208+ if materialize_calibration_year :
209+ update_dataset (materialize_calibration_step , "processing" )
210+ frs = _materialize_calibration_year_dataset (
211+ frs ,
212+ frs_release ,
213+ uprate_dataset ,
214+ )
215+ update_dataset (materialize_calibration_step , "completed" )
155216
156217 # Calibrate constituency weights with nested progress
157218
@@ -179,12 +240,14 @@ def main():
179240 national_matrix_fn = create_national_target_matrix ,
180241 area_count = 650 ,
181242 weight_file = "parliamentary_constituency_weights.h5" ,
243+ dataset_key = str (frs_release .calibration_year ),
182244 excluded_training_targets = [],
183245 log_csv = "constituency_calibration_log.csv" ,
184246 verbose = True , # Enable nested progress display
185247 area_name = "Constituency" ,
186248 get_performance = get_performance ,
187249 nested_progress = nested_progress , # Pass the nested progress manager
250+ time_period = frs_release .calibration_year ,
188251 )
189252 update_dataset ("Calibrate constituency weights" , "completed" )
190253
@@ -204,19 +267,26 @@ def main():
204267 national_matrix_fn = create_national_target_matrix ,
205268 area_count = 360 ,
206269 weight_file = "local_authority_weights.h5" ,
270+ dataset_key = str (frs_release .calibration_year ),
207271 excluded_training_targets = [],
208272 log_csv = "la_calibration_log.csv" ,
209273 verbose = True , # Enable nested progress display
210274 area_name = "Local Authority" ,
211275 get_performance = get_la_performance ,
212276 nested_progress = nested_progress , # Pass the nested progress manager
277+ time_period = frs_release .calibration_year ,
213278 )
214279 update_dataset ("Calibrate local authority weights" , "completed" )
215280
216- # Downrate and save
217- update_dataset ("Downrate to 2023" , "processing" )
218- frs_calibrated = uprate_dataset (frs_calibrated_constituencies , 2023 )
219- update_dataset ("Downrate to 2023" , "completed" )
281+ frs_calibrated = frs_calibrated_constituencies
282+ if materialize_base_year :
283+ update_dataset (materialize_step , "processing" )
284+ frs_calibrated = _materialize_base_year_dataset (
285+ frs_calibrated ,
286+ frs_release ,
287+ uprate_dataset ,
288+ )
289+ update_dataset (materialize_step , "completed" )
220290
221291 update_dataset ("Calibrate fuel litres" , "processing" )
222292 from policyengine_uk_data .datasets .imputations .consumption import (
@@ -228,7 +298,7 @@ def main():
228298
229299 update_dataset ("Save final dataset" , "processing" )
230300 strip_internal_disability_reported_amounts (frs_calibrated ).save (
231- STORAGE_FOLDER / "enhanced_frs_2023_24.h5"
301+ STORAGE_FOLDER / frs_release . enhanced_dataset_file
232302 )
233303 update_dataset ("Save final dataset" , "completed" )
234304
@@ -237,26 +307,26 @@ def main():
237307 TINY_SIZE = 1_000
238308
239309 frs_base = UKSingleYearDataset (
240- file_path = str (STORAGE_FOLDER / "frs_2023_24.h5" )
310+ file_path = str (STORAGE_FOLDER / frs_release . base_dataset_file )
241311 )
242312 tiny_frs = subsample_dataset (frs_base , TINY_SIZE )
243- tiny_frs .save (STORAGE_FOLDER / "frs_2023_24_tiny.h5" )
313+ tiny_frs .save (STORAGE_FOLDER / frs_release . tiny_base_dataset_file )
244314
245315 tiny_enhanced = subsample_dataset (
246316 strip_internal_disability_reported_amounts (frs_calibrated ),
247317 TINY_SIZE ,
248318 )
249- tiny_enhanced .save (STORAGE_FOLDER / "enhanced_frs_2023_24_tiny.h5" )
319+ tiny_enhanced .save (STORAGE_FOLDER / frs_release . tiny_enhanced_dataset_file )
250320 update_dataset ("Create tiny datasets" , "completed" )
251321
252322 # Display success message
253323 display_success_panel (
254324 "Dataset creation completed successfully" ,
255325 details = {
256- "base_dataset" : "frs_2023_24.h5" ,
257- "enhanced_dataset" : "enhanced_frs_2023_24.h5" ,
258- "tiny_base_dataset" : "frs_2023_24_tiny.h5" ,
259- "tiny_enhanced_dataset" : "enhanced_frs_2023_24_tiny.h5" ,
326+ "base_dataset" : frs_release . base_dataset_file ,
327+ "enhanced_dataset" : frs_release . enhanced_dataset_file ,
328+ "tiny_base_dataset" : frs_release . tiny_base_dataset_file ,
329+ "tiny_enhanced_dataset" : frs_release . tiny_enhanced_dataset_file ,
260330 "imputations_applied" : "consumption, wealth, VAT, services, income, capital_gains, salary_sacrifice, student_loan_plan" ,
261331 "calibration" : "national, LA and constituency targets" ,
262332 },
0 commit comments