Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .github/workflows/pull_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,17 @@ jobs:
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
- name: Build datasets
run: make data
- name: Save calibration log
- name: Save calibration log (constituencies)
uses: actions/upload-artifact@v4
with:
name: calibration_log.csv
path: calibration_log.csv
name: constituency_calibration_log.csv
path: constituency_calibration_log.csv

- name: Save calibration log (local authorities)
uses: actions/upload-artifact@v4
with:
name: la_calibration_log.csv
path: la_calibration_log.csv

- name: Run tests
run: make test
12 changes: 9 additions & 3 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,17 @@ jobs:
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
- name: Build datasets
run: make data
- name: Save calibration log
- name: Save calibration log (constituencies)
uses: actions/upload-artifact@v4
with:
name: calibration_log.csv
path: calibration_log.csv
name: constituency_calibration_log.csv
path: constituency_calibration_log.csv

- name: Save calibration log (local authorities)
uses: actions/upload-artifact@v4
with:
name: la_calibration_log.csv
path: la_calibration_log.csv
- name: Run tests
run: make test
- name: Upload data
Expand Down
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Housing cost calibration to LAs.
9 changes: 7 additions & 2 deletions policyengine_uk_data/datasets/create_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,28 +128,33 @@ def main():
area_count=650,
weight_file="parliamentary_constituency_weights.h5",
excluded_training_targets=[],
log_csv="calibration_log.csv",
log_csv="constituency_calibration_log.csv",
verbose=True, # Enable nested progress display
area_name="Constituency",
get_performance=get_performance,
nested_progress=nested_progress, # Pass the nested progress manager
)

from policyengine_uk_data.datasets.local_areas.local_authorities.calibrate import (
get_performance as get_la_performance,
)
from policyengine_uk_data.datasets.local_areas.local_authorities.loss import (
create_local_authority_target_matrix,
)

# Run calibration with verbose progress
frs_calibrated_las = calibrate_local_areas(
dataset=frs,
epochs=512,
matrix_fn=create_local_authority_target_matrix,
national_matrix_fn=create_national_target_matrix,
area_count=360,
weight_file="local_authority_weights.h5",
excluded_training_targets=[],
log_csv="calibration_log.csv",
log_csv="la_calibration_log.csv",
verbose=True, # Enable nested progress display
area_name="Local Authority",
get_performance=get_la_performance,
nested_progress=nested_progress, # Pass the nested progress manager
)

Expand Down
34 changes: 0 additions & 34 deletions policyengine_uk_data/datasets/imputations/salary_sacrifice.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,6 @@ def save_salary_sacrifice_model():
}
)

n_participants = (
train_df["pension_contributions_via_salary_sacrifice"] > 0
).sum()
print(f"Training salary sacrifice model on {len(train_df)} observations")
print(
f" With SS contributions: {n_participants} "
f"({n_participants / len(train_df):.1%})"
)
mean_amount = train_df.loc[
train_df["pension_contributions_via_salary_sacrifice"] > 0,
"pension_contributions_via_salary_sacrifice",
].mean()
print(f" Mean SS amount (participants): £{mean_amount:,.0f}")

# Train QRF model
model = QRF()
model.fit(train_df[PREDICTORS], train_df[IMPUTATIONS])
Expand Down Expand Up @@ -166,17 +152,10 @@ def impute_salary_sacrifice(

# Get indicator for who was asked
if "salary_sacrifice_asked" not in dataset.person.columns:
print(
"Warning: salary_sacrifice_asked not in dataset, "
"skipping imputation"
)
return dataset

ss_asked = dataset.person.salary_sacrifice_asked.values

# Identify imputation candidates: those not asked about SS
not_asked = ss_asked == 0

# Create prediction DataFrame for all records
pred_df = pd.DataFrame(
{
Expand Down Expand Up @@ -208,17 +187,4 @@ def impute_salary_sacrifice(
# Update dataset
dataset.person["pension_contributions_via_salary_sacrifice"] = final_ss

# Report results (no targeting - just descriptive)
weights = sim.calculate("person_weight").values
is_employee = employment_income > 0
total_ss = (final_ss * weights).sum()
participation_rate = ((final_ss > 0) * weights * is_employee).sum() / (
weights * is_employee
).sum()

print("Salary sacrifice imputation results (pre-calibration):")
print(f" Total SS contributions: £{total_ss / 1e9:.1f}bn")
print(f" Employee participation rate: {participation_rate:.1%}")
print(" (Final totals depend on subsequent weight calibration)")

return dataset
13 changes: 0 additions & 13 deletions policyengine_uk_data/datasets/imputations/student_loans.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,4 @@ def impute_student_loan_plan(
# Store as the plan type
dataset.person["student_loan_plan"] = plan

# Report imputation results
weights = sim.calculate("person_weight").values
total_with_loan = (has_student_loan * weights).sum()
plan_1_count = (plan_1_mask * weights).sum()
plan_2_count = (plan_2_mask * weights).sum()
plan_5_count = (plan_5_mask * weights).sum()

print("Student loan plan imputation results:")
print(f" Total with student loan: {total_with_loan / 1e6:.2f}m")
print(f" Plan 1 (pre-2012): {plan_1_count / 1e6:.2f}m")
print(f" Plan 2 (2012-2023): {plan_2_count / 1e6:.2f}m")
print(f" Plan 5 (2023+): {plan_5_count / 1e6:.2f}m")

return dataset
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def calibrate(
dataset: UKSingleYearDataset,
excluded_training_targets=[],
log_csv="calibration_log.csv",
log_csv="constituency_calibration_log.csv",
verbose: bool = False,
):
return calibrate_local_areas(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def calibrate(
dataset: UKSingleYearDataset,
excluded_training_targets=[],
log_csv="calibration_log.csv",
log_csv="la_calibration_log.csv",
verbose: bool = False,
):
return calibrate_local_areas(
Expand Down
Loading