Skip to content

Commit 048f389

Browse files
Merge pull request #211 from PolicyEngine/refactor-income-imputation
Refactor income imputation and remove winter fuel allowance from loss calculations
2 parents 32d4e8c + 9b2eebe commit 048f389

3 files changed

Lines changed: 44 additions & 7 deletions

File tree

changelog_entry.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
- bump: patch
2+
changes:
3+
changed:
4+
- Refactored income imputation to selectively impute only dividend income on the main dataset.
5+
- Removed winter fuel allowance from loss calculations.

policyengine_uk_data/datasets/imputations/income.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,31 @@ def create_income_model(overwrite_existing: bool = False):
140140
return save_imputation_models()
141141

142142

143+
def impute_over_incomes(
144+
dataset: UKSingleYearDataset, model, output_variables: list[str]
145+
) -> pd.DataFrame:
146+
"""
147+
Impute specified income components using trained model.
148+
149+
Args:
150+
dataset: PolicyEngine UK dataset to augment with income data.
151+
output_variables: List of income components to impute.
152+
153+
Returns:
154+
DataFrame with imputed income components.
155+
"""
156+
dataset = dataset.copy()
157+
input_df = Microsimulation(dataset=dataset).calculate_dataframe(
158+
["age", "gender", "region"]
159+
)
160+
output_df = model.predict(input_df)
161+
162+
for column in output_variables:
163+
dataset.person[column] = output_df[column].fillna(0).values
164+
165+
return dataset
166+
167+
143168
def impute_income(dataset: UKSingleYearDataset) -> UKSingleYearDataset:
144169
"""
145170
Impute detailed income components using trained model.
@@ -161,16 +186,23 @@ def impute_income(dataset: UKSingleYearDataset) -> UKSingleYearDataset:
161186
zero_weight_copy = subsample_dataset(zero_weight_copy, 10_000)
162187

163188
model = create_income_model()
164-
sim = Microsimulation(dataset=zero_weight_copy)
165189

166-
input_df = sim.calculate_dataframe(["age", "gender", "region"])
190+
# Impute just dividends on the original, full variable set on the copy
167191

168-
output_df = model.predict(input_df)
192+
zero_weight_copy = impute_over_incomes(
193+
zero_weight_copy,
194+
model,
195+
IMPUTATIONS,
196+
)
169197

170-
for column in output_df.columns:
171-
zero_weight_copy.person[column] = output_df[column].fillna(0).values
198+
dataset = impute_over_incomes(
199+
dataset,
200+
model,
201+
["dividend_income"],
202+
)
172203

173204
zero_weight_copy.validate()
205+
dataset.validate()
174206

175207
data = stack_datasets(
176208
dataset,

policyengine_uk_data/utils/loss.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def pe_count(*variables):
122122
on_uc * ~unemployed
123123
)
124124

125-
df["obr/winter_fuel_allowance_count"] = pe_count("winter_fuel_allowance")
125+
# df["obr/winter_fuel_allowance_count"] = pe_count("winter_fuel_allowance")
126126
df["obr/capital_gains_tax"] = pe("capital_gains_tax")
127127
df["obr/child_benefit"] = pe("child_benefit")
128128

@@ -152,7 +152,7 @@ def pe_count(*variables):
152152
)
153153

154154
df["obr/vat"] = pe("vat")
155-
df["obr/winter_fuel_allowance"] = pe("winter_fuel_allowance")
155+
# df["obr/winter_fuel_allowance"] = pe("winter_fuel_allowance")
156156

157157
# Not strictly from the OBR but from the 2024 Independent Schools Council census. OBR will be using that.
158158
df["obr/private_school_students"] = pe("attends_private_school")

0 commit comments

Comments
 (0)