Skip to content

Commit ebbce12

Browse files
Merge pull request #212 from PolicyEngine/re-add-uc
Re-add UC calibration
2 parents 50c9d5f + 4c743da commit ebbce12

10 files changed

Lines changed: 224 additions & 3 deletions

File tree

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Universal Credit calibration at national level by award amount and family type, and at constituency level in total.

policyengine_uk_data/datasets/local_areas/constituencies/loss.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
mapping_matrix,
1717
)
1818
from policyengine_uk.data import UKSingleYearDataset
19+
from policyengine_uk_data.utils.uc_data import uc_pc_households
1920

2021
FOLDER = Path(__file__).parent
2122

@@ -125,6 +126,14 @@ def create_constituency_target_matrix(
125126
employment_incomes.employment_income_lower_bound.sort_values().unique()
126127
) + [np.inf]
127128

129+
# UC household count by constituency
130+
y["uc_households"] = uc_pc_households.household_count.values
131+
matrix["uc_households"] = sim.map_result(
132+
(sim.calculate("universal_credit").values > 0).astype(int),
133+
"benunit",
134+
"household",
135+
)
136+
128137
for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):
129138
continue
130139
if (
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Universal Credit data sources
2+
3+
## National payment distribution
4+
5+
Source: Stat-Xplore (DWP)
6+
- Rows: Monthly award amount bands + Households on Universal Credit
7+
- Columns: Family type
8+
- File: `uc_national_payment_dist.xlsx`
9+
10+
## Parliamentary constituency households
11+
12+
### Great Britain data
13+
14+
Source: Stat-Xplore (DWP)
15+
- Rows: Westminster Parliamentary Constituency 2024 + Households on Universal Credit
16+
- File: `uc_pc_households.xlsx`
17+
18+
### Northern Ireland data
19+
20+
Source: Department for Communities Northern Ireland
21+
- URL: https://www.communities-ni.gov.uk/publications/universal-credit-statistics-may-2025
22+
- File: `dfc-ni-uc-stats-supp-tables-may-2025.ods`
23+
- Sheet: 5b
24+
- Data: Household counts by Westminster Parliamentary Constituency 2024
25+
26+
The NI data is combined with the GB data to produce a complete UK-wide parliamentary constituency table.
27+
28+
## Data processing notes
29+
30+
- The "Unknown" constituency category is excluded from the constituency data
31+
- Constituency household counts are scaled to match the national total from the payment distribution data, as the two sources have different totals due to timing and methodology differences
Binary file not shown.
19.3 KB
Binary file not shown.
33.8 KB
Binary file not shown.

policyengine_uk_data/tests/microsimulation/reforms_config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ reforms:
44
parameters:
55
gov.hmrc.income_tax.rates.uk[0].rate: 0.21
66
- name: Raise higher rate by 1pp
7-
expected_impact: 5.5
7+
expected_impact: 5.4
88
parameters:
99
gov.hmrc.income_tax.rates.uk[1].rate: 0.42
1010
- name: Raise personal allowance by ~800GBP/year
@@ -16,15 +16,15 @@ reforms:
1616
parameters:
1717
gov.hmrc.child_benefit.amount.additional: 25
1818
- name: Reduce Universal Credit taper rate to 20%
19-
expected_impact: -34.4
19+
expected_impact: -30.7
2020
parameters:
2121
gov.dwp.universal_credit.means_test.reduction_rate: 0.2
2222
- name: Raise Class 1 main employee NICs rate to 10%
2323
expected_impact: 12.4
2424
parameters:
2525
gov.hmrc.national_insurance.class_1.rates.employee.main: 0.1
2626
- name: Raise VAT standard rate by 2pp
27-
expected_impact: 18.7
27+
expected_impact: 19.3
2828
parameters:
2929
gov.hmrc.vat.standard_rate: 0.22
3030
- name: Raise additional rate by 3pp

policyengine_uk_data/utils/loss.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from policyengine_uk_data.storage import STORAGE_FOLDER
1212
from policyengine_uk_data.utils import uprate_values
1313
from policyengine_uk.data import UKSingleYearDataset
14+
from policyengine_uk_data.utils.uc_data import uc_national_payment_dist
1415

1516
tax_benefit = pd.read_csv(STORAGE_FOLDER / "tax_benefit.csv")
1617
tax_benefit["name"] = tax_benefit["name"].apply(lambda x: f"obr/{x}")
@@ -382,6 +383,26 @@ def pe_count(*variables):
382383
60 * 52 * 115_000
383384
) # same source as above, multiply avg cap amount by total capped population
384385

386+
# UC national payment distribution
387+
388+
uc_payment_dist = uc_national_payment_dist
389+
uc_payments = sim.calculate("universal_credit", map_to="benunit").values
390+
uc_family_type = sim.calculate("family_type", map_to="benunit").values
391+
392+
for i, row in uc_payment_dist.iterrows():
393+
lower = row.uc_annual_payment_min
394+
upper = row.uc_annual_payment_max
395+
family_type = row.family_type
396+
in_band = (
397+
(uc_payments >= lower)
398+
& (uc_payments < upper)
399+
& (uc_family_type == family_type)
400+
)
401+
name = f"dwp/uc_payment_dist/{family_type}_annual_payment_{lower:_.0f}_to_{upper:_.0f}"
402+
df[name] = household_from_family(in_band)
403+
target_names.append(name)
404+
target_values.append(row.household_count)
405+
385406
combined_targets = pd.concat(
386407
[
387408
targets,
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import pandas as pd
2+
from pathlib import Path
3+
4+
5+
def _parse_uc_national_payment_dist():
6+
"""Parse UC national payment distribution into long format."""
7+
storage_path = Path(__file__).parent.parent / "storage"
8+
file_path = storage_path / "uc_national_payment_dist.xlsx"
9+
10+
# Read the Excel file, skipping header rows
11+
df = pd.read_excel(file_path, header=None)
12+
13+
# Extract family types from row 7 (index 7)
14+
family_types = df.iloc[7, 3:7].tolist() # Columns 3-6: the 4 family types
15+
16+
# Extract data rows (starting from row 9, index 9)
17+
data_rows = []
18+
19+
for idx in range(9, len(df)):
20+
award_band = df.iloc[idx, 1] # Monthly award amount band
21+
22+
# Skip if not a valid award band
23+
if pd.isna(award_band) or award_band in ["No payment", "Total"]:
24+
continue
25+
26+
for col_idx, family_type in enumerate(family_types, start=3):
27+
household_count = df.iloc[idx, col_idx]
28+
29+
# Skip missing, ".." (suppressed), or zero values
30+
if (
31+
pd.isna(household_count)
32+
or household_count == ".."
33+
or household_count == 0
34+
):
35+
continue
36+
37+
data_rows.append(
38+
{
39+
"monthly_award_band": award_band,
40+
"family_type": family_type,
41+
"household_count": int(household_count),
42+
}
43+
)
44+
45+
result_df = pd.DataFrame(data_rows)
46+
47+
# Parse monthly band into min and max, then convert to annual
48+
def parse_band(band):
49+
"""Parse band like '£100.01 to £200.00' into (min, max)."""
50+
parts = band.replace("£", "").replace(",", "").split(" to ")
51+
if len(parts) == 2:
52+
return float(parts[0]) * 12, float(parts[1]) * 12
53+
return None, None
54+
55+
result_df[["uc_annual_payment_min", "uc_annual_payment_max"]] = result_df[
56+
"monthly_award_band"
57+
].apply(lambda x: pd.Series(parse_band(x)))
58+
59+
# Map family types to constant names
60+
family_type_mapping = {
61+
"Single, no children": "SINGLE",
62+
"Single, with children": "LONE_PARENT",
63+
"Couple, no children": "COUPLE_NO_CHILDREN",
64+
"Couple, with children": "COUPLE_WITH_CHILDREN",
65+
}
66+
result_df["family_type"] = result_df["family_type"].map(
67+
family_type_mapping
68+
)
69+
70+
# Reorder columns and drop monthly band
71+
result_df = result_df[
72+
[
73+
"uc_annual_payment_min",
74+
"uc_annual_payment_max",
75+
"family_type",
76+
"household_count",
77+
]
78+
]
79+
80+
return result_df
81+
82+
83+
def _parse_uc_pc_households():
84+
"""Parse UC parliamentary constituency households (GB + NI)."""
85+
storage_path = Path(__file__).parent.parent / "storage"
86+
87+
# Parse GB data
88+
gb_file_path = storage_path / "uc_pc_households.xlsx"
89+
df_gb = pd.read_excel(gb_file_path, header=None)
90+
91+
gb_data_rows = []
92+
93+
for idx in range(8, len(df_gb)):
94+
constituency = df_gb.iloc[idx, 1] # Column 1: constituency name
95+
household_count = df_gb.iloc[idx, 3] # Column 3: household count
96+
97+
# Skip if empty, invalid, Total row, or Unknown
98+
if (
99+
pd.isna(constituency)
100+
or pd.isna(household_count)
101+
or constituency in ["Total", "Unknown"]
102+
):
103+
continue
104+
105+
gb_data_rows.append(
106+
{
107+
"constituency_name": constituency,
108+
"household_count": int(household_count),
109+
}
110+
)
111+
112+
# Parse NI data
113+
ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods"
114+
df_ni = pd.read_excel(
115+
ni_file_path, sheet_name="5b", engine="odf", header=None
116+
)
117+
118+
# Get constituency names from row 2, columns 1-18
119+
ni_constituencies = df_ni.iloc[2, 1:19].tolist()
120+
121+
# Find May 2025 row
122+
may_2025_row = df_ni[df_ni[0] == "May 2025"].iloc[0]
123+
124+
ni_data_rows = []
125+
for col_idx, constituency_name in enumerate(ni_constituencies, start=1):
126+
household_count = may_2025_row[col_idx]
127+
128+
if pd.notna(household_count) and household_count != 0:
129+
ni_data_rows.append(
130+
{
131+
"constituency_name": constituency_name,
132+
"household_count": int(household_count),
133+
}
134+
)
135+
136+
# Combine GB and NI data
137+
result_df = pd.DataFrame(gb_data_rows + ni_data_rows)
138+
139+
# Scale constituency counts to match national total
140+
national_total = _parse_uc_national_payment_dist()["household_count"].sum()
141+
constituency_total = result_df["household_count"].sum()
142+
scaling_factor = national_total / constituency_total
143+
144+
result_df["household_count"] = (
145+
(result_df["household_count"] * scaling_factor).round().astype(int)
146+
)
147+
148+
return result_df
149+
150+
151+
# Module-level dataframes for easy import
152+
uc_national_payment_dist = _parse_uc_national_payment_dist()
153+
uc_pc_households = _parse_uc_pc_households()

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ dependencies = [
2626
"microimpute>=1.0.1",
2727
"black>=25.1.0",
2828
"rich>=13.0.0",
29+
"odfpy",
30+
"pandas",
31+
"openpyxl",
2932
]
3033

3134
[project.optional-dependencies]

0 commit comments

Comments
 (0)