Skip to content

Commit d72c484

Browse files
Merge pull request #248 from PolicyEngine/la-calibration
Calibrate local authority microdata to ONS experimental data
2 parents 4cdbbb2 + 10354e8 commit d72c484

15 files changed

Lines changed: 713 additions & 57 deletions

.github/workflows/pull_request.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,17 @@ jobs:
5151
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
5252
- name: Build datasets
5353
run: make data
54-
- name: Save calibration log
54+
- name: Save calibration log (constituencies)
5555
uses: actions/upload-artifact@v4
5656
with:
57-
name: calibration_log.csv
58-
path: calibration_log.csv
57+
name: constituency_calibration_log.csv
58+
path: constituency_calibration_log.csv
59+
60+
- name: Save calibration log (local authorities)
61+
uses: actions/upload-artifact@v4
62+
with:
63+
name: la_calibration_log.csv
64+
path: la_calibration_log.csv
65+
5966
- name: Run tests
6067
run: make test

.github/workflows/push.yaml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,17 @@ jobs:
5858
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
5959
- name: Build datasets
6060
run: make data
61-
- name: Save calibration log
61+
- name: Save calibration log (constituencies)
6262
uses: actions/upload-artifact@v4
6363
with:
64-
name: calibration_log.csv
65-
path: calibration_log.csv
64+
name: constituency_calibration_log.csv
65+
path: constituency_calibration_log.csv
66+
67+
- name: Save calibration log (local authorities)
68+
uses: actions/upload-artifact@v4
69+
with:
70+
name: la_calibration_log.csv
71+
path: la_calibration_log.csv
6672
- name: Run tests
6773
run: make test
6874
- name: Upload data

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Housing cost calibration to LAs.

policyengine_uk_data/datasets/create_datasets.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,28 +128,33 @@ def main():
128128
area_count=650,
129129
weight_file="parliamentary_constituency_weights.h5",
130130
excluded_training_targets=[],
131-
log_csv="calibration_log.csv",
131+
log_csv="constituency_calibration_log.csv",
132132
verbose=True, # Enable nested progress display
133133
area_name="Constituency",
134134
get_performance=get_performance,
135135
nested_progress=nested_progress, # Pass the nested progress manager
136136
)
137137

138138
from policyengine_uk_data.datasets.local_areas.local_authorities.calibrate import (
139+
get_performance as get_la_performance,
140+
)
141+
from policyengine_uk_data.datasets.local_areas.local_authorities.loss import (
139142
create_local_authority_target_matrix,
140143
)
141144

142145
# Run calibration with verbose progress
143146
frs_calibrated_las = calibrate_local_areas(
144147
dataset=frs,
148+
epochs=512,
145149
matrix_fn=create_local_authority_target_matrix,
146150
national_matrix_fn=create_national_target_matrix,
147151
area_count=360,
148152
weight_file="local_authority_weights.h5",
149153
excluded_training_targets=[],
150-
log_csv="calibration_log.csv",
154+
log_csv="la_calibration_log.csv",
151155
verbose=True, # Enable nested progress display
152156
area_name="Local Authority",
157+
get_performance=get_la_performance,
153158
nested_progress=nested_progress, # Pass the nested progress manager
154159
)
155160

policyengine_uk_data/datasets/imputations/salary_sacrifice.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -93,20 +93,6 @@ def save_salary_sacrifice_model():
9393
}
9494
)
9595

96-
n_participants = (
97-
train_df["pension_contributions_via_salary_sacrifice"] > 0
98-
).sum()
99-
print(f"Training salary sacrifice model on {len(train_df)} observations")
100-
print(
101-
f" With SS contributions: {n_participants} "
102-
f"({n_participants / len(train_df):.1%})"
103-
)
104-
mean_amount = train_df.loc[
105-
train_df["pension_contributions_via_salary_sacrifice"] > 0,
106-
"pension_contributions_via_salary_sacrifice",
107-
].mean()
108-
print(f" Mean SS amount (participants): £{mean_amount:,.0f}")
109-
11096
# Train QRF model
11197
model = QRF()
11298
model.fit(train_df[PREDICTORS], train_df[IMPUTATIONS])
@@ -166,17 +152,10 @@ def impute_salary_sacrifice(
166152

167153
# Get indicator for who was asked
168154
if "salary_sacrifice_asked" not in dataset.person.columns:
169-
print(
170-
"Warning: salary_sacrifice_asked not in dataset, "
171-
"skipping imputation"
172-
)
173155
return dataset
174156

175157
ss_asked = dataset.person.salary_sacrifice_asked.values
176158

177-
# Identify imputation candidates: those not asked about SS
178-
not_asked = ss_asked == 0
179-
180159
# Create prediction DataFrame for all records
181160
pred_df = pd.DataFrame(
182161
{
@@ -208,17 +187,4 @@ def impute_salary_sacrifice(
208187
# Update dataset
209188
dataset.person["pension_contributions_via_salary_sacrifice"] = final_ss
210189

211-
# Report results (no targeting - just descriptive)
212-
weights = sim.calculate("person_weight").values
213-
is_employee = employment_income > 0
214-
total_ss = (final_ss * weights).sum()
215-
participation_rate = ((final_ss > 0) * weights * is_employee).sum() / (
216-
weights * is_employee
217-
).sum()
218-
219-
print("Salary sacrifice imputation results (pre-calibration):")
220-
print(f" Total SS contributions: £{total_ss / 1e9:.1f}bn")
221-
print(f" Employee participation rate: {participation_rate:.1%}")
222-
print(" (Final totals depend on subsequent weight calibration)")
223-
224190
return dataset

policyengine_uk_data/datasets/imputations/student_loans.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,4 @@ def impute_student_loan_plan(
7575
# Store as the plan type
7676
dataset.person["student_loan_plan"] = plan
7777

78-
# Report imputation results
79-
weights = sim.calculate("person_weight").values
80-
total_with_loan = (has_student_loan * weights).sum()
81-
plan_1_count = (plan_1_mask * weights).sum()
82-
plan_2_count = (plan_2_mask * weights).sum()
83-
plan_5_count = (plan_5_mask * weights).sum()
84-
85-
print("Student loan plan imputation results:")
86-
print(f" Total with student loan: {total_with_loan / 1e6:.2f}m")
87-
print(f" Plan 1 (pre-2012): {plan_1_count / 1e6:.2f}m")
88-
print(f" Plan 2 (2012-2023): {plan_2_count / 1e6:.2f}m")
89-
print(f" Plan 5 (2023+): {plan_5_count / 1e6:.2f}m")
90-
9178
return dataset

policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
def calibrate(
1212
dataset: UKSingleYearDataset,
1313
excluded_training_targets=[],
14-
log_csv="calibration_log.csv",
14+
log_csv="constituency_calibration_log.csv",
1515
verbose: bool = False,
1616
):
1717
return calibrate_local_areas(

policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
def calibrate(
1212
dataset: UKSingleYearDataset,
1313
excluded_training_targets=[],
14-
log_csv="calibration_log.csv",
14+
log_csv="la_calibration_log.csv",
1515
verbose: bool = False,
1616
):
1717
return calibrate_local_areas(

0 commit comments

Comments
 (0)