From 14a7b6922357b6eb9afe3f5edaa1f401a9256580 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 14:34:33 +0100 Subject: [PATCH] Create and upload datasets for the new multi-year format Fixes #168 --- Makefile | 1 + changelog_entry.yaml | 4 ++ .../storage/upload_completed_datasets.py | 2 + .../utils/create_multi_year_dataset.py | 53 +++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 policyengine_uk_data/utils/create_multi_year_dataset.py diff --git a/Makefile b/Makefile index 5ad0a0156..a7f50b1a3 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ data: python policyengine_uk_data/datasets/frs/enhanced_frs.py python policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py python policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py + python policyengine_uk_data/utils/create_multi_year_dataset.py efrs: python policyengine_uk_data/datasets/frs/enhanced_frs.py diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..914a40e49 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - New multi-year dataset format for FRS and Enhanced FRS. diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 058b51450..db3cf7e63 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -7,6 +7,8 @@ def upload_datasets(): dataset_files = [ FRS_2023_24.file_path, EnhancedFRS_2023_24.file_path, + STORAGE_FOLDER / "frs_2023_29.h5", + STORAGE_FOLDER / "enhanced_frs_2023_29.h5", STORAGE_FOLDER / "parliamentary_constituency_weights.h5", STORAGE_FOLDER / "local_authority_weights.h5", ] diff --git a/policyengine_uk_data/utils/create_multi_year_dataset.py b/policyengine_uk_data/utils/create_multi_year_dataset.py new file mode 100644 index 000000000..9b98a694f --- /dev/null +++ b/policyengine_uk_data/utils/create_multi_year_dataset.py @@ -0,0 +1,53 @@ +from policyengine_uk.data import UKMultiYearDataset, UKSingleYearDataset +from policyengine_uk.data.economic_assumptions import apply_uprating +from policyengine_uk import Microsimulation +from policyengine_uk_data.storage import STORAGE_FOLDER +from policyengine_core.data import Dataset + + +def convert_legacy_to_multi_year_dataset( + file_path: str, + new_file_path: str, + start_year: int = 2023, + end_year: int = 2029, +) -> UKMultiYearDataset: + """ + Convert a legacy single year dataset to a multi-year dataset. + """ + sim = Microsimulation(dataset=Dataset.from_file(file_path)) + + dataset = UKSingleYearDataset.from_simulation(sim, fiscal_year=start_year) + dataset.time_period = str(start_year) + + datasets = [dataset] + + for year in range(start_year + 1, end_year + 1): + dataset = dataset.copy() + dataset.time_period = str(year) + + multi_year_dataset = UKMultiYearDataset( + datasets=datasets, + ) + multi_year_dataset = apply_uprating(multi_year_dataset) + + multi_year_dataset.save(new_file_path) + + +if __name__ == "__main__": + file_paths = [ + STORAGE_FOLDER / "frs_2023_24.h5", + STORAGE_FOLDER / "enhanced_frs_2023_24.h5", + ] + out_file_paths = [ + STORAGE_FOLDER / "frs_2023_29.h5", + STORAGE_FOLDER / "enhanced_frs_2023_29.h5", + ] + + for file_path, new_file_path in zip(file_paths, out_file_paths): + convert_legacy_to_multi_year_dataset( + file_path=str(file_path), + new_file_path=str(new_file_path), + start_year=2023, + end_year=2029, + ) + print(f"Converted {file_path} to {new_file_path}")