From 045de34b5624d0267c3f781016fcb11873df56af Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 09:50:36 +0100 Subject: [PATCH 01/11] Rename dataset --- policyengine_uk/data/__init__.py | 2 +- policyengine_uk/data/dataset_schema.py | 56 ++++++++++++++++++-- policyengine_uk/data/economic_assumptions.py | 4 +- policyengine_uk/system.py | 6 +-- 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/policyengine_uk/data/__init__.py b/policyengine_uk/data/__init__.py index 4603c15be..47b1b096e 100644 --- a/policyengine_uk/data/__init__.py +++ b/policyengine_uk/data/__init__.py @@ -1 +1 @@ -from policyengine_uk.data.dataset_schema import UKDataset +from policyengine_uk.data.dataset_schema import UKMultiYearDataset, UKSingleYearDataset diff --git a/policyengine_uk/data/dataset_schema.py b/policyengine_uk/data/dataset_schema.py index 683913aea..457c0b701 100644 --- a/policyengine_uk/data/dataset_schema.py +++ b/policyengine_uk/data/dataset_schema.py @@ -8,7 +8,7 @@ import h5py -class UKDataset: +class UKSingleYearDataset: person: pd.DataFrame benunit: pd.DataFrame household: pd.DataFrame @@ -80,7 +80,7 @@ def load(self): return data def copy(self): - return UKDataset( + return UKSingleYearDataset( person=self.person.copy(), benunit=self.benunit.copy(), household=self.household.copy(), @@ -110,9 +110,59 @@ def from_simulation( input_variables, period=fiscal_year ) - return UKDataset( + return UKSingleYearDataset( person=entity_dfs["person"], benunit=entity_dfs["benunit"], household=entity_dfs["household"], fiscal_year=fiscal_year, ) + + +class UKMultiYearDataset: + def __init__(self, file_path: str = None, datasets: list[UKSingleYearDataset] | None = None): + if datasets is None: + self.datasets = {} + for dataset in datasets: + if not isinstance(dataset, UKSingleYearDataset): + raise TypeError( + "All items in datasets must be of type UKSingleYearDataset." + ) + year = int(dataset.time_period[:4]) + self.datasets[year] = dataset + + if file_path is not None: + UKSingleYearDataset.validate_file_path(file_path) + with pd.HDFStore(file_path) as f: + self.datasets = {} + for year in f.keys(): + if year.startswith("/person/"): + fiscal_year = int(year.split("/")[2]) + person_df = f[year] + benunit_df = f[f"/benunit/{fiscal_year}"] + household_df = f[f"/household/{fiscal_year}"] + self.datasets[fiscal_year] = UKSingleYearDataset( + person=person_df, + benunit=benunit_df, + household=household_df, + fiscal_year=fiscal_year, + ) + + + def get_year(self, fiscal_year: int) -> UKSingleYearDataset: + if fiscal_year in self.datasets: + return self.datasets[fiscal_year] + else: + raise ValueError(f"No dataset found for year {fiscal_year}.") + + def __getattr__(self, fiscal_year: int): + return self.get_year(fiscal_year) + + def save(self, file_path: str): + with pd.HDFStore(file_path) as f: + for i, dataset in enumerate(self.datasets): + year = dataset.time_period[:4] + f.put(f"person/{year}", dataset.person, format="table", data_columns=True) + f.put(f"benunit/{year}", dataset.benunit, format="table", data_columns=True) + f.put(f"household/{year}", dataset.household, format="table", data_columns=True) + f.put(f"time_period/{year}", pd.Series([dataset.time_period]), format="table", data_columns=True) + \ No newline at end of file diff --git a/policyengine_uk/data/economic_assumptions.py b/policyengine_uk/data/economic_assumptions.py index 64b345709..2f15ae21a 100644 --- a/policyengine_uk/data/economic_assumptions.py +++ b/policyengine_uk/data/economic_assumptions.py @@ -1,6 +1,6 @@ import pandas as pd from pathlib import Path -from policyengine_uk.data.dataset_schema import UKDataset +from policyengine_uk.data.dataset_schema import UKSingleYearDataset START_YEAR = 2022 END_YEAR = 2029 @@ -150,7 +150,7 @@ def convert_yoy_growth_to_index( def apply_growth_factors( - dataset: UKDataset, + dataset: UKSingleYearDataset, growth_factors: pd.DataFrame, start_year: int, end_year: int, diff --git a/policyengine_uk/system.py b/policyengine_uk/system.py index f50040a02..b6ed92790 100644 --- a/policyengine_uk/system.py +++ b/policyengine_uk/system.py @@ -6,7 +6,7 @@ Simulation as CoreSimulation, Microsimulation as CoreMicrosimulation, ) -from policyengine_uk.data.dataset_schema import UKDataset +from policyengine_uk.data.dataset_schema import UKSingleYearDataset from policyengine_core.tools.hugging_face import download_huggingface_dataset import pandas as pd @@ -183,8 +183,8 @@ def __init__(self, *args, dataset=ENHANCED_FRS, **kwargs): if Path(dataset_file_path).exists(): if dataset_file_path.endswith(".h5"): try: - UKDataset.validate_file_path(dataset_file_path) - dataset = UKDataset(file_path=dataset_file_path) + UKSingleYearDataset.validate_file_path(dataset_file_path) + dataset = UKSingleYearDataset(file_path=dataset_file_path) except: dataset = Dataset.from_file(dataset_file_path) From bbfbca386d0489eca89ee42bcabb07ed25cdd652 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 11:15:17 +0100 Subject: [PATCH 02/11] Add multi-year dataset (working in sim load) --- policyengine_uk/data/__init__.py | 5 +- policyengine_uk/data/dataset_schema.py | 86 +++++++++++++++++++++----- policyengine_uk/system.py | 26 ++++++-- 3 files changed, 98 insertions(+), 19 deletions(-) diff --git a/policyengine_uk/data/__init__.py b/policyengine_uk/data/__init__.py index 47b1b096e..905fb1306 100644 --- a/policyengine_uk/data/__init__.py +++ b/policyengine_uk/data/__init__.py @@ -1 +1,4 @@ -from policyengine_uk.data.dataset_schema import UKMultiYearDataset, UKSingleYearDataset +from policyengine_uk.data.dataset_schema import ( + UKMultiYearDataset, + UKSingleYearDataset, +) diff --git a/policyengine_uk/data/dataset_schema.py b/policyengine_uk/data/dataset_schema.py index 457c0b701..837c1a2b6 100644 --- a/policyengine_uk/data/dataset_schema.py +++ b/policyengine_uk/data/dataset_schema.py @@ -119,8 +119,12 @@ def from_simulation( class UKMultiYearDataset: - def __init__(self, file_path: str = None, datasets: list[UKSingleYearDataset] | None = None): - if datasets is None: + def __init__( + self, + file_path: str = None, + datasets: list[UKSingleYearDataset] | None = None, + ): + if datasets is not None: self.datasets = {} for dataset in datasets: if not isinstance(dataset, UKSingleYearDataset): @@ -129,7 +133,7 @@ def __init__(self, file_path: str = None, datasets: list[UKSingleYearDataset] | ) year = int(dataset.time_period[:4]) self.datasets[year] = dataset - + if file_path is not None: UKSingleYearDataset.validate_file_path(file_path) with pd.HDFStore(file_path) as f: @@ -146,23 +150,77 @@ def __init__(self, file_path: str = None, datasets: list[UKSingleYearDataset] | household=household_df, fiscal_year=fiscal_year, ) - + + self.data_format = "time_period_arrays" + self.time_period = list(sorted(self.datasets.keys()))[0] def get_year(self, fiscal_year: int) -> UKSingleYearDataset: if fiscal_year in self.datasets: return self.datasets[fiscal_year] else: raise ValueError(f"No dataset found for year {fiscal_year}.") - - def __getattr__(self, fiscal_year: int): + + def __getitem__(self, fiscal_year: int): return self.get_year(fiscal_year) - + def save(self, file_path: str): with pd.HDFStore(file_path) as f: - for i, dataset in enumerate(self.datasets): - year = dataset.time_period[:4] - f.put(f"person/{year}", dataset.person, format="table", data_columns=True) - f.put(f"benunit/{year}", dataset.benunit, format="table", data_columns=True) - f.put(f"household/{year}", dataset.household, format="table", data_columns=True) - f.put(f"time_period/{year}", pd.Series([dataset.time_period]), format="table", data_columns=True) - \ No newline at end of file + for year, dataset in self.datasets.items(): + f.put( + f"person/{year}", + dataset.person, + format="table", + data_columns=True, + ) + f.put( + f"benunit/{year}", + dataset.benunit, + format="table", + data_columns=True, + ) + f.put( + f"household/{year}", + dataset.household, + format="table", + data_columns=True, + ) + f.put( + f"time_period/{year}", + pd.Series([year]), + format="table", + data_columns=True, + ) + + def copy(self): + new_datasets = { + year: dataset.copy() for year, dataset in self.datasets.items() + } + return UKMultiYearDataset(datasets=list(new_datasets.values())) + + @staticmethod + def validate_file_path(file_path: str): + if not file_path.endswith(".h5"): + raise ValueError( + "File path must end with '.h5' for UKMultiYearDataset." + ) + if not Path(file_path).exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + # Check if the file contains datasets for multiple years + with h5py.File(file_path, "r") as f: + if not any(key.startswith("/person/") for key in f.keys()): + raise ValueError("No person dataset found in the file.") + if not any(key.startswith("/benunit/") for key in f.keys()): + raise ValueError("No benunit dataset found in the file.") + if not any(key.startswith("/household/") for key in f.keys()): + raise ValueError("No household dataset found in the file.") + + def load(self): + data = {} + for year, dataset in self.datasets.items(): + for df in (dataset.person, dataset.benunit, dataset.household): + for col in df.columns: + if col not in data: + data[col] = {} + data[col][year] = df[col].values + return data diff --git a/policyengine_uk/system.py b/policyengine_uk/system.py index b6ed92790..01385a546 100644 --- a/policyengine_uk/system.py +++ b/policyengine_uk/system.py @@ -6,7 +6,10 @@ Simulation as CoreSimulation, Microsimulation as CoreMicrosimulation, ) -from policyengine_uk.data.dataset_schema import UKSingleYearDataset +from policyengine_uk.data.dataset_schema import ( + UKSingleYearDataset, + UKMultiYearDataset, +) from policyengine_core.tools.hugging_face import download_huggingface_dataset import pandas as pd @@ -183,10 +186,25 @@ def __init__(self, *args, dataset=ENHANCED_FRS, **kwargs): if Path(dataset_file_path).exists(): if dataset_file_path.endswith(".h5"): try: - UKSingleYearDataset.validate_file_path(dataset_file_path) - dataset = UKSingleYearDataset(file_path=dataset_file_path) + UKSingleYearDataset.validate_file_path( + dataset_file_path + ) + dataset = UKSingleYearDataset( + file_path=dataset_file_path + ) + except: + pass + + try: + UKMultiYearDataset.validate_file_path( + dataset_file_path + ) + dataset = UKMultiYearDataset( + file_path=dataset_file_path + ) except: - dataset = Dataset.from_file(dataset_file_path) + pass + dataset = Dataset.from_file(dataset_file_path) super().__init__(*args, dataset=dataset, **kwargs) From 93f8c69b209f2a965e54bc4a87199302d46571f1 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 11:57:15 +0100 Subject: [PATCH 03/11] Add core uprating functionality --- policyengine_uk/__init__.py | 4 - policyengine_uk/data/economic_assumptions.py | 202 ++++--------------- policyengine_uk/data/uprating_indices.yaml | 87 ++++++++ 3 files changed, 123 insertions(+), 170 deletions(-) create mode 100644 policyengine_uk/data/uprating_indices.yaml diff --git a/policyengine_uk/__init__.py b/policyengine_uk/__init__.py index 352fca387..9686788cb 100644 --- a/policyengine_uk/__init__.py +++ b/policyengine_uk/__init__.py @@ -13,9 +13,5 @@ from pathlib import Path import os from policyengine_core.taxbenefitsystems import TaxBenefitSystem -from policyengine_uk.data.economic_assumptions import ( - BASELINE_GROWFACTORS, - apply_growth_factors, -) REPO = Path(__file__).parent diff --git a/policyengine_uk/data/economic_assumptions.py b/policyengine_uk/data/economic_assumptions.py index 2f15ae21a..e4b38a01a 100644 --- a/policyengine_uk/data/economic_assumptions.py +++ b/policyengine_uk/data/economic_assumptions.py @@ -1,181 +1,51 @@ -import pandas as pd +from policyengine_uk.data import UKMultiYearDataset, UKSingleYearDataset +from policyengine_uk.system import system +import yaml +from policyengine_core.parameters import ParameterNode from pathlib import Path -from policyengine_uk.data.dataset_schema import UKSingleYearDataset -START_YEAR = 2022 -END_YEAR = 2029 +def apply_uprating( + dataset: UKMultiYearDataset, +): + # Apply uprating to the dataset. -def create_policyengine_uprating_factors_table(print_diff=True): - from policyengine_uk.system import system - - df = pd.DataFrame() - - variable_names = [] - years = [] - yoy_values = [] - - parameter_by_variable = {} - - for variable in system.variables.values(): - if variable.uprating is not None: - parameter = system.parameters.get_child( - variable.uprating.replace("indices", "yoy_growth") - ) - parameter_by_variable[variable.name] = parameter.name - for year in range(START_YEAR, END_YEAR + 1): - variable_names.append(variable.name) - years.append(str(year)) - yoy_values.append(round(parameter(year), 3)) - - df["Variable"] = variable_names - df["Year"] = years - df["Value"] = yoy_values - - # Convert to there is a column for each year - df = df.pivot(index="Variable", columns="Year", values="Value") - df = df.sort_values("Variable") - - file_path = Path(__file__).parent / "uprating_growth_factors.csv" - - # Read old CSV if it exists - old_df = None - if file_path.exists(): - old_df = pd.read_csv(file_path, index_col=0) - # Ensure all columns are strings in old_df - old_df.columns = old_df.columns.astype(str) - - # Prepare new dataframe - df["Parameter"] = df.index.map(parameter_by_variable) - df = df[ - ["Parameter"] + [str(year) for year in range(START_YEAR, END_YEAR + 1)] - ] - - # Print diff if old CSV existed and print_diff is True - if old_df is not None and print_diff: - print_csv_diff(old_df, df) - # Save new CSV - df.to_csv(file_path) - - return pd.read_csv(file_path) - - -def print_csv_diff(old_df, new_df): - """Print differences between old and new dataframes.""" - print("\n" + "=" * 80) - print("CSV diff report") - print("=" * 80) - - # Check for new rows - new_rows = set(new_df.index) - set(old_df.index) - if new_rows: - print(f"\n✅ New rows added ({len(new_rows)}):") - for row in sorted(new_rows): - print(f" - {row}") - - # Check for deleted rows - deleted_rows = set(old_df.index) - set(new_df.index) - if deleted_rows: - print(f"\n❌ Rows deleted ({len(deleted_rows)}):") - for row in sorted(deleted_rows): - print(f" - {row}") - - # Check for changed values - common_rows = set(old_df.index) & set(new_df.index) - common_cols = set(old_df.columns) & set(new_df.columns) - - changes = [] - for row in common_rows: - for col in common_cols: - old_val = old_df.loc[row, col] - new_val = new_df.loc[row, col] - - # Handle NaN values - if pd.isna(old_val) and pd.isna(new_val): - continue - elif pd.isna(old_val) or pd.isna(new_val): - changes.append((row, col, old_val, new_val)) - elif old_val != new_val: - changes.append((row, col, old_val, new_val)) - - if changes: - print(f"\n🔄 Value changes ({len(changes)}):") - print( - f"{'Variable':<30} {'Column':<15} {'Old value':<15} {'New value':<15}" - ) - print("-" * 75) - for row, col, old_val, new_val in sorted(changes): - old_str = str(old_val) if not pd.isna(old_val) else "NaN" - new_str = str(new_val) if not pd.isna(new_val) else "NaN" - print(f"{row:<30} {str(col):<15} {old_str:<15} {new_str:<15}") - - # Check for new columns - new_cols = set(new_df.columns) - set(old_df.columns) - if new_cols: - print(f"\n✅ New columns added ({len(new_cols)}):") - for col in sorted(new_cols): - print(f" - {col}") - - # Check for deleted columns - deleted_cols = set(old_df.columns) - set(new_df.columns) - if deleted_cols: - print(f"\n❌ Columns deleted ({len(deleted_cols)}):") - for col in sorted(deleted_cols): - print(f" - {col}") - - if not (new_rows or deleted_rows or changes or new_cols or deleted_cols): - print("\n✨ No changes detected - CSV is identical!") - - print("\n" + "=" * 80 + "\n") + if not isinstance(dataset, UKMultiYearDataset): + raise TypeError("dataset must be of type UKMultiYearDataset.") + for year, single_year_dataset in dataset.datasets.items(): + apply_single_year_uprating(single_year_dataset, system.parameters) -def convert_yoy_growth_to_index( - growth_factors: pd.DataFrame, -): - """ - Convert year-on-year growth factors to an index. - """ - growth_factors = growth_factors.copy() - # Get the first year column (skip 'Variable' and 'Parameter' columns) - year_columns = [ - col - for col in growth_factors.columns - if col not in ["Variable", "Parameter"] - ] - index = growth_factors[year_columns[0]] * 0 + 1 - for year in year_columns: - index *= 1 + growth_factors[year] - growth_factors[year] = index - return growth_factors - -def apply_growth_factors( +def apply_single_year_uprating( dataset: UKSingleYearDataset, - growth_factors: pd.DataFrame, - start_year: int, - end_year: int, + parameters: ParameterNode, ): - start_year = str(start_year) - end_year = str(end_year) - dataset = dataset.copy() - growth_factors_indices = convert_yoy_growth_to_index(growth_factors) - for i in range(len(growth_factors)): - variable = growth_factors["Variable"].values[i] - start_index = growth_factors_indices[start_year].values[i] - end_index = growth_factors_indices[end_year].values[i] + # Apply uprating to a single year dataset. - for table in dataset.tables: - if variable in table.columns: - table[variable] *= end_index / start_index + if not isinstance(dataset, UKSingleYearDataset): + raise TypeError("dataset must be of type UKSingleYearDataset.") - return dataset + with open(Path(__file__).parent / "uprating_indices.yaml", "r") as f: + uprating = yaml.safe_load(f) + for index_name, variables in uprating.items(): + index_rel_change = parameters.get_child(index_name)( + dataset.time_period + ) + for variable in variables: + for df in dataset.tables: + if variable in df.columns: + df[variable] *= 1 + index_rel_change + dataset.validate() -BASELINE_GROWFACTORS = create_policyengine_uprating_factors_table( - print_diff=False -) +def reset_uprating( + dataset: UKMultiYearDataset, +): + # Remove all uprating from the dataset. -if __name__ == "__main__": - # Print diff when running as script - create_policyengine_uprating_factors_table(print_diff=True) + first_year = min(dataset.datasets.keys()) + for year in dataset.datasets: + if year != first_year: + dataset.datasets[year] = dataset.datasets[first_year].copy() diff --git a/policyengine_uk/data/uprating_indices.yaml b/policyengine_uk/data/uprating_indices.yaml new file mode 100644 index 000000000..10028bf62 --- /dev/null +++ b/policyengine_uk/data/uprating_indices.yaml @@ -0,0 +1,87 @@ +gov.economic_assumptions.yoy_growth.obr.average_earnings: +- employee_pension_contributions +- employer_pension_contributions +- employment_income +- employment_income_before_lsr +- personal_pension_contributions +- student_loan_repayments +gov.economic_assumptions.yoy_growth.obr.consumer_price_index: +- afcs_reported +- alcohol_and_tobacco_consumption +- attendance_allowance_reported +- bsp_reported +- carers_allowance_reported +- child_benefit_reported +- child_tax_credit_reported +- childcare_expenses +- clothing_and_footwear_consumption +- communication_consumption +- diesel_spending +- dla_m_reported +- dla_sc_reported +- domestic_energy_consumption +- education_consumption +- esa_contrib_reported +- esa_income_reported +- food_and_non_alcoholic_beverages_consumption +- free_school_fruit_veg +- free_school_meals +- free_school_milk +- health_consumption +- household_furnishings_consumption +- housing_benefit_reported +- housing_water_and_electricity_consumption +- iidb_reported +- incapacity_benefit_reported +- income_support_reported +- jsa_contrib_reported +- jsa_income_reported +- maintenance_expenses +- maternity_allowance_reported +- miscellaneous_consumption +- pension_credit_reported +- petrol_spending +- pip_dl_reported +- pip_m_reported +- recreation_consumption +- restaurants_and_hotels_consumption +- sda_reported +- state_pension +- state_pension_reported +- statutory_maternity_pay +- statutory_paternity_pay +- statutory_sick_pay +- transport_consumption +- universal_credit_reported +- winter_fuel_allowance_reported +- working_tax_credit_reported +gov.economic_assumptions.yoy_growth.obr.mortgage_interest: +- mortgage_interest_repayment +gov.economic_assumptions.yoy_growth.obr.per_capita.gdp: +- capital_gains +- capital_gains_before_response +- corporate_wealth +- dividend_income +- gross_financial_wealth +- lump_sum_income +- main_residence_value +- maintenance_income +- miscellaneous_income +- mortgage_capital_repayment +- net_financial_wealth +- non_residential_property_value +- other_investment_income +- other_residential_property_value +- owned_land +- pension_income +- private_transfer_income +- property_income +- savings +- savings_interest_income +- sublet_income +gov.economic_assumptions.yoy_growth.obr.per_capita.mixed_income: +- self_employment_income +gov.economic_assumptions.yoy_growth.obr.private_pension_index: +- private_pension_income +gov.economic_assumptions.yoy_growth.ons.population: +- household_weight From 76a615ccf06ba96cc98feefccb0adb8dd1b0fae1 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 11:58:10 +0100 Subject: [PATCH 04/11] Adjust tests --- Makefile | 1 - .../data/uprating_growth_factors.csv | 83 ------------------- 2 files changed, 84 deletions(-) delete mode 100644 policyengine_uk/data/uprating_growth_factors.csv diff --git a/Makefile b/Makefile index 33ddf37ab..1a253c854 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,6 @@ test: pytest policyengine_uk/tests/ --cov=policyengine_uk --cov-report=xml --maxfail=0 -v update-tests: - python policyengine_uk/data/economic_assumptions.py python policyengine_uk/tests/microsimulation/update_reform_impacts.py documentation: diff --git a/policyengine_uk/data/uprating_growth_factors.csv b/policyengine_uk/data/uprating_growth_factors.csv deleted file mode 100644 index f0f6400db..000000000 --- a/policyengine_uk/data/uprating_growth_factors.csv +++ /dev/null @@ -1,83 +0,0 @@ -Variable,Parameter,2022,2023,2024,2025,2026,2027,2028,2029 -afcs_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -alcohol_and_tobacco_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -attendance_allowance_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -bsp_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -capital_gains,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -capital_gains_before_response,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -carers_allowance_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -child_benefit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -child_tax_credit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -childcare_expenses,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -clothing_and_footwear_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -communication_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -corporate_wealth,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -council_tax,gov.economic_assumptions.yoy_growth.obr.council_tax,0.053,0.056,0.064,0.046,0.045,0.046,0.045,0.045 -diesel_spending,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -dividend_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -dla_m_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -dla_sc_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -domestic_energy_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -education_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -employee_pension_contributions,gov.economic_assumptions.yoy_growth.obr.average_earnings,0.064,0.069,0.047,0.037,0.022,0.021,0.023,0.025 -employer_pension_contributions,gov.economic_assumptions.yoy_growth.obr.average_earnings,0.064,0.069,0.047,0.037,0.022,0.021,0.023,0.025 -employment_income,gov.economic_assumptions.yoy_growth.obr.average_earnings,0.064,0.069,0.047,0.037,0.022,0.021,0.023,0.025 -employment_income_before_lsr,gov.economic_assumptions.yoy_growth.obr.average_earnings,0.064,0.069,0.047,0.037,0.022,0.021,0.023,0.025 -esa_contrib_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -esa_income_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -food_and_non_alcoholic_beverages_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -free_school_fruit_veg,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -free_school_meals,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -free_school_milk,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -gross_financial_wealth,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -health_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -household_furnishings_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -household_weight,gov.economic_assumptions.yoy_growth.ons.population,0.003,0.014,0.01,0.011,0.007,0.008,0.004,0.005 -housing_benefit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -housing_water_and_electricity_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -iidb_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -incapacity_benefit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -income_support_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -jsa_contrib_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -jsa_income_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -lump_sum_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -main_residence_value,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -maintenance_expenses,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -maintenance_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -maternity_allowance_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -miscellaneous_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -miscellaneous_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -mortgage_capital_repayment,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -mortgage_interest_repayment,gov.economic_assumptions.yoy_growth.obr.mortgage_interest,0.262,0.485,0.221,0.136,0.126,0.082,0.042,0.047 -net_financial_wealth,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -non_residential_property_value,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -other_investment_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -other_residential_property_value,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -owned_land,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -pension_credit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -pension_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -personal_pension_contributions,gov.economic_assumptions.yoy_growth.obr.average_earnings,0.064,0.069,0.047,0.037,0.022,0.021,0.023,0.025 -petrol_spending,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -pip_dl_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -pip_m_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -private_pension_income,gov.economic_assumptions.yoy_growth.obr.private_pension_index,0.05,0.05,0.05,0.047,0.037,0.022,0.021,0.023 -private_transfer_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -property_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -recreation_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -rent,gov.economic_assumptions.yoy_growth.obr.rent,0.04,0.063,0.074,0.057,0.036,0.027,0.023,0.024 -restaurants_and_hotels_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -savings,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -savings_interest_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -sda_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -self_employment_income,gov.economic_assumptions.yoy_growth.obr.per_capita.mixed_income,0.063,0.024,0.048,0.047,0.031,0.031,0.036,0.038 -state_pension,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -state_pension_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -statutory_maternity_pay,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -statutory_paternity_pay,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -statutory_sick_pay,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -student_loan_repayments,gov.economic_assumptions.yoy_growth.obr.average_earnings,0.064,0.069,0.047,0.037,0.022,0.021,0.023,0.025 -sublet_income,gov.economic_assumptions.yoy_growth.obr.per_capita.gdp,0.092,0.05,0.038,0.028,0.028,0.031,0.033,0.033 -transport_consumption,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -universal_credit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -winter_fuel_allowance_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 -working_tax_credit_reported,gov.economic_assumptions.yoy_growth.obr.consumer_price_index,0.1,0.057,0.023,0.032,0.019,0.02,0.02,0.02 From 2e06533c40fcad9da4c1f6346b77357a7ff12fb9 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 13:55:35 +0100 Subject: [PATCH 05/11] Add rent uprating --- policyengine_uk/data/dataset_schema.py | 5 + policyengine_uk/data/economic_assumptions.py | 151 +++++++++++++- .../gov/economic_assumptions/yoy_growth.yaml | 192 +++++++++++++++++- 3 files changed, 338 insertions(+), 10 deletions(-) diff --git a/policyengine_uk/data/dataset_schema.py b/policyengine_uk/data/dataset_schema.py index 837c1a2b6..a6cbd8cdf 100644 --- a/policyengine_uk/data/dataset_schema.py +++ b/policyengine_uk/data/dataset_schema.py @@ -61,6 +61,7 @@ def __init__( self.data_format = "arrays" self.tables = (self.person, self.benunit, self.household) + self.table_names = ("person", "benunit", "household") def save(self, file_path: str): with pd.HDFStore(file_path) as f: @@ -84,6 +85,7 @@ def copy(self): person=self.person.copy(), benunit=self.benunit.copy(), household=self.household.copy(), + fiscal_year=self.time_period, ) def validate(self): @@ -164,6 +166,9 @@ def __getitem__(self, fiscal_year: int): return self.get_year(fiscal_year) def save(self, file_path: str): + Path(file_path).unlink( + missing_ok=True + ) # Remove existing file if it exists with pd.HDFStore(file_path) as f: for year, dataset in self.datasets.items(): f.put( diff --git a/policyengine_uk/data/economic_assumptions.py b/policyengine_uk/data/economic_assumptions.py index e4b38a01a..c5b4beee9 100644 --- a/policyengine_uk/data/economic_assumptions.py +++ b/policyengine_uk/data/economic_assumptions.py @@ -3,41 +3,174 @@ import yaml from policyengine_core.parameters import ParameterNode from pathlib import Path +import numpy as np def apply_uprating( dataset: UKMultiYearDataset, ): # Apply uprating to the dataset. + dataset = dataset.copy() if not isinstance(dataset, UKMultiYearDataset): raise TypeError("dataset must be of type UKMultiYearDataset.") - for year, single_year_dataset in dataset.datasets.items(): - apply_single_year_uprating(single_year_dataset, system.parameters) + for year in dataset.datasets.keys(): + if year == min(dataset.datasets.keys()): + continue # Don't uprate the first year + current_year = dataset.datasets[year] + prev_year = dataset.datasets[year - 1] + apply_single_year_uprating(current_year, prev_year, system.parameters) + + return dataset def apply_single_year_uprating( - dataset: UKSingleYearDataset, + current_year: UKSingleYearDataset, + previous_year: UKSingleYearDataset, parameters: ParameterNode, ): # Apply uprating to a single year dataset. - if not isinstance(dataset, UKSingleYearDataset): - raise TypeError("dataset must be of type UKSingleYearDataset.") + # First, apply standard variable-YoY growth based uprating. with open(Path(__file__).parent / "uprating_indices.yaml", "r") as f: uprating = yaml.safe_load(f) for index_name, variables in uprating.items(): index_rel_change = parameters.get_child(index_name)( - dataset.time_period + current_year.time_period ) for variable in variables: - for df in dataset.tables: + for table_name, df in zip( + current_year.table_names, current_year.tables + ): if variable in df.columns: - df[variable] *= 1 + index_rel_change + prev_year_value = getattr(previous_year, table_name)[ + variable + ] + current_year_value = prev_year_value * index_rel_change + getattr(current_year, table_name)[ + variable + ] = current_year_value + + # Next, apply custom uprating. + + # Council Tax is uprated by OBR forecasts/outturns by country. + + current_year = uprate_council_tax(current_year, previous_year, parameters) + + # Rent is uprated by OBR forecasts/outturns by region. + + current_year = uprate_rent(current_year, previous_year, parameters) + + current_year.validate() + + return current_year + + +def uprate_council_tax( + current_year: UKSingleYearDataset, + previous_year: UKSingleYearDataset, + parameters: ParameterNode, +): + # Uprate council tax for a single year dataset. + + council_tax = ( + parameters.gov.economic_assumptions.yoy_growth.obr.council_tax + ) + region = current_year.household["region"] + country = np.select( + [ + region == "WALES", + region == "SCOTLAND", + region == "NORTHERN IRELAND", + ], + [ + "WALES", + "SCOTLAND", + "NORTHERN IRELAND", + ], + default="ENGLAND", + ) + growth_rates = np.select( + [ + country == "ENGLAND", + country == "WALES", + country == "SCOTLAND", + ], + [ + council_tax.england(current_year.time_period), + council_tax.wales(current_year.time_period), + council_tax.scotland(current_year.time_period), + ], + default=0, + ) + + current_year.household["council_tax"] = previous_year.household[ + "council_tax" + ] * (1 + growth_rates) + return current_year + + +def uprate_rent( + current_year: UKSingleYearDataset, + previous_year: UKSingleYearDataset, + parameters: ParameterNode, +): + # Uprate rent for a single year dataset. + is_private_rented = ( + current_year.household["tenure_type"] == "RENT_PRIVATELY" + ) + region = current_year.household["region"] + prev_rent = previous_year.household["rent"] + growth = parameters.gov.economic_assumptions.yoy_growth + year = int(current_year.time_period) + social_rent_growth = growth.obr.social_rent(year) + + if year < 2022: + raise ValueError( + "Rent uprating is not supported for years before 2022." + ) + elif year < 2025: + # We have regional growth rates for private rent. + regional_growth_rate = growth.ons.private_rental_prices(year)[ + region.values + ] + current_year.household["rent"] = np.where( + is_private_rented, + prev_rent * (1 + regional_growth_rate), + prev_rent * (1 + social_rent_growth), + ) + elif year >= 2025: + # Back out private rent growth from the aggregate + # from latest English Housing Survey data + PRIVATE_RENTAL_HOUSEHOLDS = 0.188 + SOCIAL_RENTAL_HOUSEHOLDS = 0.164 + + total_rental_households = ( + PRIVATE_RENTAL_HOUSEHOLDS + SOCIAL_RENTAL_HOUSEHOLDS + ) + + private_weight = PRIVATE_RENTAL_HOUSEHOLDS / total_rental_households + social_weight = SOCIAL_RENTAL_HOUSEHOLDS / total_rental_households + + aggregate_growth = growth.obr.rent(year) + private_rent_growth = ( + aggregate_growth - social_weight * social_rent_growth + ) / private_weight + print( + f"Backed out private rent growth: {private_rent_growth:.1%} in {year}" + ) + print(f"OBR aggregate rent growth: {aggregate_growth:.1%} in {year}") + print(f"Social rent growth: {social_rent_growth:.1%} in {year}") + + current_year.household["rent"] = np.where( + is_private_rented, + prev_rent * (1 + private_rent_growth), + prev_rent * (1 + social_rent_growth), + ) - dataset.validate() + return current_year def reset_uprating( diff --git a/policyengine_uk/parameters/gov/economic_assumptions/yoy_growth.yaml b/policyengine_uk/parameters/gov/economic_assumptions/yoy_growth.yaml index 1e89fd51a..4c278655e 100644 --- a/policyengine_uk/parameters/gov/economic_assumptions/yoy_growth.yaml +++ b/policyengine_uk/parameters/gov/economic_assumptions/yoy_growth.yaml @@ -257,7 +257,7 @@ obr: description: Rent year-on-year growth (CPI+1%, one year lagged). values: 2022-01-01: 0.050 - 2023-01-01: 0.110 + 2023-01-01: 0.070 2024-01-01: 0.067 2025-01-01: 0.033 2026-01-01: 0.042 @@ -270,6 +270,25 @@ obr: reference: - title: OBR EFO March 2025 href: https://obr.uk/efo/economic-and-fiscal-outlook-march-2025/ + rent: + description: Rent year-on-year growth, private and social (ONS series D7GQ). + + values: + 2022-01-01: 0.041 + 2023-01-01: 0.063 + 2024-01-01: 0.074 + 2025-01-01: 0.057 + 2026-01-01: 0.036 + 2027-01-01: 0.027 + 2028-01-01: 0.023 + 2029-01-01: 0.024 + + metadata: + unit: /1 + label: rent growth + reference: + - title: OBR EFO March 2025 + href: https://obr.uk/efo/economic-and-fiscal-outlook-march-2025/ ons: population: @@ -290,6 +309,177 @@ ons: reference: - title: ONS Population Projections href: https://www.ons.gov.uk/ + private_rental_prices: + UNITED_KINGDOM: + description: Private rental prices year-on-year growth in United Kingdom. + values: + 2022-01-01: 0.050398 + 2023-01-01: 0.080953 + 2024-01-01: 0.086137 + metadata: + unit: /1 + label: united kingdom private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + ENGLAND: + description: Private rental prices year-on-year growth in England. + values: + 2022-01-01: 0.048965 + 2023-01-01: 0.078981 + 2024-01-01: 0.08694 + metadata: + unit: /1 + label: england private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + WALES: + description: Private rental prices year-on-year growth in Wales. + values: + 2022-01-01: 0.049352 + 2023-01-01: 0.090369 + 2024-01-01: 0.082778 + metadata: + unit: /1 + label: wales private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + SCOTLAND: + description: Private rental prices year-on-year growth in Scotland. + values: + 2022-01-01: 0.062296 + 2023-01-01: 0.107356 + 2024-01-01: 0.073967 + metadata: + unit: /1 + label: scotland private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + NORTHERN_IRELAND: + description: Private rental prices year-on-year growth in Northern Ireland. + values: + 2022-01-01: 0.08965 + 2023-01-01: 0.089197 + 2024-01-01: 0.090822 + metadata: + unit: /1 + label: northern ireland private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + NORTH_EAST: + description: Private rental prices year-on-year growth in North East. + values: + 2022-01-01: 0.042495 + 2023-01-01: 0.055804 + 2024-01-01: 0.073116 + metadata: + unit: /1 + label: north east private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + NORTH_WEST: + description: Private rental prices year-on-year growth in North West. + values: + 2022-01-01: 0.062822 + 2023-01-01: 0.07947 + 2024-01-01: 0.094498 + metadata: + unit: /1 + label: north west private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + YORKSHIRE: + description: + Private rental prices year-on-year growth in Yorkshire and The + Humber. + values: + 2022-01-01: 0.05625 + 2023-01-01: 0.072295 + 2024-01-01: 0.063506 + metadata: + unit: /1 + label: yorkshire and the humber private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + EAST_MIDLANDS: + description: Private rental prices year-on-year growth in East Midlands. + values: + 2022-01-01: 0.053405 + 2023-01-01: 0.067085 + 2024-01-01: 0.087371 + metadata: + unit: /1 + label: east midlands private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + WEST_MIDLANDS: + description: Private rental prices year-on-year growth in West Midlands. + values: + 2022-01-01: 0.043415 + 2023-01-01: 0.077504 + 2024-01-01: 0.084456 + metadata: + unit: /1 + label: west midlands private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + EAST_OF_ENGLAND: + description: Private rental prices year-on-year growth in East of England. + values: + 2022-01-01: 0.044759 + 2023-01-01: 0.058668 + 2024-01-01: 0.077271 + metadata: + unit: /1 + label: east of england private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + LONDON: + description: Private rental prices year-on-year growth in London. + values: + 2022-01-01: 0.042829 + 2023-01-01: 0.095307 + 2024-01-01: 0.103031 + metadata: + unit: /1 + label: london private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + SOUTH_EAST: + description: Private rental prices year-on-year growth in South East. + values: + 2022-01-01: 0.050307 + 2023-01-01: 0.071938 + 2024-01-01: 0.077617 + metadata: + unit: /1 + label: south east private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables + SOUTH_WEST: + description: Private rental prices year-on-year growth in South West. + values: + 2022-01-01: 0.059055 + 2023-01-01: 0.069351 + 2024-01-01: 0.06671 + metadata: + unit: /1 + label: south west private rental prices growth + reference: + - title: ONS Index of Private Housing Rental Prices, UK monthly estimates + href: https://www.ons.gov.uk/economy/inflationandpriceindices/datasets/indexofprivatehousingrentalpricesreferencetables ofwat: water_bills: description: Water and sewerage bills year-on-year growth. From 8500ba83252df51f79fb7518a5e27e02ed69b5fa Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 14:13:02 +0100 Subject: [PATCH 06/11] Downgrade microdf --- pyproject.toml | 2 +- uv.lock | 31 ++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e41ba3529..42310ceb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "policyengine-core>=3.6.4", - "microdf-python", + "microdf-python==0.4.6", ] [project.urls] diff --git a/uv.lock b/uv.lock index 977d7299b..ab769d1be 100644 --- a/uv.lock +++ b/uv.lock @@ -381,6 +381,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/38/bbe2e63902847cf79036ecc75550d0698af31c91c7575352eb25190d0fb3/coverage-7.9.2-py3-none-any.whl", hash = "sha256:e425cd5b00f6fc0ed7cdbd766c70be8baab4b7839e4d4fe5fac48581dd968ea4", size = 204005, upload-time = "2025-07-03T10:54:13.491Z" }, ] +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, +] + [[package]] name = "datetime" version = "5.5" @@ -987,15 +992,15 @@ wheels = [ [[package]] name = "microdf-python" -version = "0.4.4" +version = "0.4.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, { name = "pandas" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3a/d4/d73c7d2505e7247f761b42c84893474c8c0cbd860b0272bf019a74fbb19e/microdf_python-0.4.4.tar.gz", hash = "sha256:01ffb4f1e1f1fbceff5e9e448971ed39905916d52fbe1d87c48179c7278fb7f2", size = 22177, upload-time = "2025-07-09T17:40:10.739Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/40/8c015dd57b0bcb96f3551a4ddac536963f23e8024e433df1faa8a1ddce1e/microdf_python-0.4.6.tar.gz", hash = "sha256:8066feb06a9f52a8b8f1de59009601539fdf8a344f6854e290fb9e2aebd3984c", size = 30630, upload-time = "2025-07-18T11:45:02.867Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/51/bd8752149339e023d7f2a0a42ae3612fb38045bc873eaa465da132e0cc3c/microdf_python-0.4.4-py3-none-any.whl", hash = "sha256:435e9e9852612a81de1bdcffdc606c25dcd3fa2ea43d9fb52a85b2887bbca0ec", size = 26793, upload-time = "2025-07-09T17:40:09.641Z" }, + { url = "https://files.pythonhosted.org/packages/46/7a/aca1b60540514d77cad374085150e5c61ba4d6ad5d9f80c5c4d728f37688/microdf_python-0.4.6-py3-none-any.whl", hash = "sha256:63b11174347b62e781da7e1a1358b0ca0c6e8c7595e3f503307b341a0d42876b", size = 37916, upload-time = "2025-07-18T11:45:01.705Z" }, ] [[package]] @@ -1441,7 +1446,7 @@ wheels = [ [[package]] name = "policyengine-uk" -version = "2.35.1" +version = "2.39.3" source = { editable = "." } dependencies = [ { name = "microdf-python" }, @@ -1456,6 +1461,7 @@ dev = [ { name = "jupyter-book" }, { name = "linecheck" }, { name = "pytest" }, + { name = "pytest-cov" }, { name = "setuptools" }, { name = "snowballstemmer" }, { name = "sphinx-argparse" }, @@ -1478,9 +1484,10 @@ requires-dist = [ { name = "furo", marker = "extra == 'dev'", specifier = "<2023" }, { name = "jupyter-book", marker = "extra == 'dev'" }, { name = "linecheck", marker = "extra == 'dev'" }, - { name = "microdf-python" }, + { name = "microdf-python", specifier = "==0.4.6" }, { name = "policyengine-core", specifier = ">=3.6.4" }, { name = "pytest", marker = "extra == 'dev'" }, + { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "setuptools", marker = "extra == 'dev'" }, { name = "snowballstemmer", marker = "extra == 'dev'", specifier = ">=2,<3" }, { name = "sphinx-argparse", marker = "extra == 'dev'", specifier = ">=0.3.2,<1" }, @@ -1629,6 +1636,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, ] +[[package]] +name = "pytest-cov" +version = "6.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/99/668cade231f434aaa59bbfbf49469068d2ddd945000621d3d165d2e7dd7b/pytest_cov-6.2.1.tar.gz", hash = "sha256:25cc6cc0a5358204b8108ecedc51a9b57b34cc6b8c967cc2c01a4e00d8a67da2", size = 69432, upload-time = "2025-06-12T10:47:47.684Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/16/4ea354101abb1287856baa4af2732be351c7bee728065aed451b678153fd/pytest_cov-6.2.1-py3-none-any.whl", hash = "sha256:f5bc4c23f42f1cdd23c70b1dab1bbaef4fc505ba950d53e0081d0730dd7e86d5", size = 24644, upload-time = "2025-06-12T10:47:45.932Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From 271cef493fefefdfe07ee61276d22c1eb73dacc8 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 14:15:54 +0100 Subject: [PATCH 07/11] 0.4.5 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 42310ceb2..c38001126 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "policyengine-core>=3.6.4", - "microdf-python==0.4.6", + "microdf-python==0.4.5", ] [project.urls] From 1d180af79ec1b1abb58143321d86ce379ab89128 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 14:16:36 +0100 Subject: [PATCH 08/11] 0.4.4? --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c38001126..c48375307 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "policyengine-core>=3.6.4", - "microdf-python==0.4.5", + "microdf-python==0.4.4", ] [project.urls] From 71cbcdec43ed03eb2528170489bcdc766390b319 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 14:22:52 +0100 Subject: [PATCH 09/11] Add versioning --- changelog_entry.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..ee819cb21 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: minor + changes: + added: + - UKMultiYearDataset class to handle multiple fiscal years. + - Uprating of datasets using the `uprate` method. From 15e09772c175ff70276fcc46a0c64665b1c1ee8e Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 16:23:56 +0100 Subject: [PATCH 10/11] UKMultiYearDataset fails when downloading from HuggingFace Fixes #1256 --- changelog_entry.yaml | 7 +++---- policyengine_uk/data/dataset_schema.py | 11 +++++------ policyengine_uk/system.py | 8 ++++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index ee819cb21..ae4337a7b 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -1,5 +1,4 @@ -- bump: minor +- bump: patch changes: - added: - - UKMultiYearDataset class to handle multiple fiscal years. - - Uprating of datasets using the `uprate` method. + fixed: + - Bug in handling downloads of UKMultiYearDataset from HuggingFace. \ No newline at end of file diff --git a/policyengine_uk/data/dataset_schema.py b/policyengine_uk/data/dataset_schema.py index a6cbd8cdf..9f6a04f2b 100644 --- a/policyengine_uk/data/dataset_schema.py +++ b/policyengine_uk/data/dataset_schema.py @@ -213,12 +213,11 @@ def validate_file_path(file_path: str): # Check if the file contains datasets for multiple years with h5py.File(file_path, "r") as f: - if not any(key.startswith("/person/") for key in f.keys()): - raise ValueError("No person dataset found in the file.") - if not any(key.startswith("/benunit/") for key in f.keys()): - raise ValueError("No benunit dataset found in the file.") - if not any(key.startswith("/household/") for key in f.keys()): - raise ValueError("No household dataset found in the file.") + for required_dataset in ["person", "benunit", "household"]: + if not any(f"{required_dataset}" in key for key in f.keys()): + raise ValueError( + f"Dataset '{required_dataset}' not found in the file: {file_path}" + ) def load(self): data = {} diff --git a/policyengine_uk/system.py b/policyengine_uk/system.py index 01385a546..ae2d9bcf8 100644 --- a/policyengine_uk/system.py +++ b/policyengine_uk/system.py @@ -202,9 +202,13 @@ def __init__(self, *args, dataset=ENHANCED_FRS, **kwargs): dataset = UKMultiYearDataset( file_path=dataset_file_path ) - except: + except Exception as e: pass - dataset = Dataset.from_file(dataset_file_path) + + if not isinstance( + dataset, (UKSingleYearDataset, UKMultiYearDataset) + ): + dataset = Dataset.from_file(dataset_file_path) super().__init__(*args, dataset=dataset, **kwargs) From f5cbfe4d66ce1fdd0c6b224a0ff85d17db43efa3 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 21 Jul 2025 16:32:20 +0100 Subject: [PATCH 11/11] Update tests --- .../tests/microsimulation/reforms_config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/policyengine_uk/tests/microsimulation/reforms_config.yaml b/policyengine_uk/tests/microsimulation/reforms_config.yaml index b6be6b317..71ce38fb9 100644 --- a/policyengine_uk/tests/microsimulation/reforms_config.yaml +++ b/policyengine_uk/tests/microsimulation/reforms_config.yaml @@ -1,10 +1,10 @@ reforms: - name: Raise basic rate by 1pp - expected_impact: 7.6 + expected_impact: 7.5 parameters: gov.hmrc.income_tax.rates.uk[0].rate: 0.21 - name: Raise higher rate by 1pp - expected_impact: 4.8 + expected_impact: 4.7 parameters: gov.hmrc.income_tax.rates.uk[1].rate: 0.42 - name: Raise personal allowance by ~800GBP/year @@ -16,7 +16,7 @@ reforms: parameters: gov.hmrc.child_benefit.amount.additional: 25 - name: Reduce Universal Credit taper rate to 20% - expected_impact: -36.5 + expected_impact: -36.4 parameters: gov.dwp.universal_credit.means_test.reduction_rate: 0.2 - name: Raise Class 1 main employee NICs rate to 10% @@ -24,10 +24,10 @@ reforms: parameters: gov.hmrc.national_insurance.class_1.rates.employee.main: 0.1 - name: Raise VAT standard rate by 2pp - expected_impact: 18.8 + expected_impact: 18.9 parameters: gov.hmrc.vat.standard_rate: 0.22 - name: Raise additional rate by 3pp - expected_impact: 5.5 + expected_impact: 5.3 parameters: gov.hmrc.income_tax.rates.uk[2].rate: 0.48