From 0d4e697e15c619ea582cce9d8e84a807eb5379e9 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 9 Jul 2025 10:32:23 +0100 Subject: [PATCH] Add dataset schema, remove State variables and add HBAI net income --- policyengine_uk/data/dataset_schema.py | 57 +++++++++++++++++++ policyengine_uk/entities.py | 16 +----- .../variables/gov/hmrc/expected_sdlt.py | 4 +- .../gov/revenue_scotland/expected_lbtt.py | 6 +- .../variables/gov/wra/expected_ltt.py | 6 +- .../consumption/property_sale_rate.py | 13 ----- .../household/demographic/person_state_id.py | 9 --- .../demographic/person_state_role.py | 8 --- .../household/demographic/state_id.py | 9 --- .../household/demographic/state_weight.py | 9 --- .../income/hbai_household_net_income.py | 13 +++++ 11 files changed, 81 insertions(+), 69 deletions(-) create mode 100644 policyengine_uk/data/dataset_schema.py delete mode 100644 policyengine_uk/variables/household/consumption/property_sale_rate.py delete mode 100644 policyengine_uk/variables/household/demographic/person_state_id.py delete mode 100644 policyengine_uk/variables/household/demographic/person_state_role.py delete mode 100644 policyengine_uk/variables/household/demographic/state_id.py delete mode 100644 policyengine_uk/variables/household/demographic/state_weight.py diff --git a/policyengine_uk/data/dataset_schema.py b/policyengine_uk/data/dataset_schema.py new file mode 100644 index 000000000..d2236833f --- /dev/null +++ b/policyengine_uk/data/dataset_schema.py @@ -0,0 +1,57 @@ +import pandas as pd + + +class UKDataset: + person: pd.DataFrame + benunit: pd.DataFrame + household: pd.DataFrame + + def __init__( + self, + file_path: str = None, + person: pd.DataFrame = None, + benunit: pd.DataFrame = None, + household: pd.DataFrame = None, + fiscal_year: int = 2025, + ): + if file_path is not None: + with pd.HDFStore(file_path) as f: + self.person = f["person"] + self.benunit = f["benunit"] + self.household = f["household"] + else: + if person is None or benunit is None or household is None: + raise ValueError( + "Must provide either a file path or all three DataFrames (person, benunit, household)." + ) + self.person = person + self.benunit = benunit + self.household = household + + self.data_format = "time_period_arrays" + self.time_period = fiscal_year + + def save(self, file_path: str): + with pd.HDFStore(file_path) as f: + f.put("person", self.person, format="table", data_columns=True) + f.put("benunit", self.benunit, format="table", data_columns=True) + f.put( + "household", self.household, format="table", data_columns=True + ) + + def load(self): + data = {} + for df in (self.person, self.benunit, self.household): + for col in df.columns: + data[col] = { + self.time_period: df[col].values, + } + + return data + + def copy(self): + return UKDataset( + person=self.person.copy(), + benunit=self.benunit.copy(), + household=self.household.copy(), + ) diff --git a/policyengine_uk/entities.py b/policyengine_uk/entities.py index f87dbb621..d83ee53b2 100644 --- a/policyengine_uk/entities.py +++ b/policyengine_uk/entities.py @@ -3,20 +3,6 @@ # This file defines the entities needed by our legislation. from policyengine_core.entities import build_entity -State = build_entity( - key="state", - plural="states", - label="State", - roles=[ - { - "key": "member", - "plural": "members", - "label": "Member", - "doc": "A person who is a citizen of a country.", - } - ], -) - Household = build_entity( key="household", plural="households", @@ -57,4 +43,4 @@ is_person=True, ) -entities = [State, Household, BenUnit, Person] +entities = [Household, BenUnit, Person] diff --git a/policyengine_uk/variables/gov/hmrc/expected_sdlt.py b/policyengine_uk/variables/gov/hmrc/expected_sdlt.py index afaf80876..f15450335 100644 --- a/policyengine_uk/variables/gov/hmrc/expected_sdlt.py +++ b/policyengine_uk/variables/gov/hmrc/expected_sdlt.py @@ -13,6 +13,6 @@ def formula(household, period, parameters): if parameters(period).gov.hmrc.stamp_duty.abolish: return 0 return ( - household.state("property_sale_rate", period) - * household("stamp_duty_land_tax", period) + household("stamp_duty_land_tax", period) + + parameters(period).gov.hmrc.stamp_duty.property_sale_rate ) + household("corporate_sdlt", period) diff --git a/policyengine_uk/variables/gov/revenue_scotland/expected_lbtt.py b/policyengine_uk/variables/gov/revenue_scotland/expected_lbtt.py index bdb9d43f3..91d55ec85 100644 --- a/policyengine_uk/variables/gov/revenue_scotland/expected_lbtt.py +++ b/policyengine_uk/variables/gov/revenue_scotland/expected_lbtt.py @@ -9,7 +9,9 @@ class expected_lbtt(Variable): value_type = float unit = GBP - def formula(household, period): - property_sale_rate = household.state("property_sale_rate", period) + def formula(household, period, parameters): + property_sale_rate = parameters( + period + ).gov.hmrc.stamp_duty.property_sale_rate lbtt = household("land_and_buildings_transaction_tax", period) return property_sale_rate * lbtt diff --git a/policyengine_uk/variables/gov/wra/expected_ltt.py b/policyengine_uk/variables/gov/wra/expected_ltt.py index 68d192594..ddf0c6baf 100644 --- a/policyengine_uk/variables/gov/wra/expected_ltt.py +++ b/policyengine_uk/variables/gov/wra/expected_ltt.py @@ -9,7 +9,9 @@ class expected_ltt(Variable): value_type = float unit = GBP - def formula(household, period): - property_sale_rate = household.state("property_sale_rate", period) + def formula(household, period, parameters): + property_sale_rate = parameters( + period + ).gov.hmrc.stamp_duty.property_sale_rate land_transaction_tax = household("land_transaction_tax", period) return property_sale_rate * land_transaction_tax diff --git a/policyengine_uk/variables/household/consumption/property_sale_rate.py b/policyengine_uk/variables/household/consumption/property_sale_rate.py deleted file mode 100644 index c54c068e0..000000000 --- a/policyengine_uk/variables/household/consumption/property_sale_rate.py +++ /dev/null @@ -1,13 +0,0 @@ -from policyengine_uk.model_api import * - - -class property_sale_rate(Variable): - label = "Residential property sale rate" - documentation = "The percentage of residential property value owned by households sold in the year" - entity = State - definition_period = YEAR - value_type = float - unit = "/1" - - def formula(household, period, parameters): - return parameters(period).gov.hmrc.stamp_duty.property_sale_rate diff --git a/policyengine_uk/variables/household/demographic/person_state_id.py b/policyengine_uk/variables/household/demographic/person_state_id.py deleted file mode 100644 index d1c5720bd..000000000 --- a/policyengine_uk/variables/household/demographic/person_state_id.py +++ /dev/null @@ -1,9 +0,0 @@ -from policyengine_uk.model_api import * - - -class person_state_id(Variable): - label = "State ID" - documentation = "Identity of the state" - entity = Person - definition_period = YEAR - value_type = int diff --git a/policyengine_uk/variables/household/demographic/person_state_role.py b/policyengine_uk/variables/household/demographic/person_state_role.py deleted file mode 100644 index 8b87c798a..000000000 --- a/policyengine_uk/variables/household/demographic/person_state_role.py +++ /dev/null @@ -1,8 +0,0 @@ -from policyengine_uk.model_api import * - - -class person_state_role(Variable): - label = "State role" - entity = Person - definition_period = YEAR - value_type = str diff --git a/policyengine_uk/variables/household/demographic/state_id.py b/policyengine_uk/variables/household/demographic/state_id.py deleted file mode 100644 index 6871ab3f7..000000000 --- a/policyengine_uk/variables/household/demographic/state_id.py +++ /dev/null @@ -1,9 +0,0 @@ -from policyengine_uk.model_api import * - - -class state_id(Variable): - label = "State ID" - documentation = "Identity of the state" - entity = State - definition_period = YEAR - value_type = int diff --git a/policyengine_uk/variables/household/demographic/state_weight.py b/policyengine_uk/variables/household/demographic/state_weight.py deleted file mode 100644 index 35fd65341..000000000 --- a/policyengine_uk/variables/household/demographic/state_weight.py +++ /dev/null @@ -1,9 +0,0 @@ -from policyengine_uk.model_api import * - - -class state_weight(Variable): - label = "State weight" - documentation = "Weight value" - entity = State - definition_period = YEAR - value_type = float diff --git a/policyengine_uk/variables/household/income/hbai_household_net_income.py b/policyengine_uk/variables/household/income/hbai_household_net_income.py index 5ddb728b5..569c6efb3 100644 --- a/policyengine_uk/variables/household/income/hbai_household_net_income.py +++ b/policyengine_uk/variables/household/income/hbai_household_net_income.py @@ -52,3 +52,16 @@ class hbai_household_net_income(Variable): "income_tax", "national_insurance", ] + + +class real_hbai_household_net_income(Variable): + label = "real household net income (HBAI definition)" + entity = Household + definition_period = YEAR + value_type = float + unit = GBP + + def formula(household, period, parameters): + return household("hbai_household_net_income", period) * household( + "inflation_adjustment", period + )