Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Universal Credit calibration at national level by award amount and family type, and at constituency level in total.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
mapping_matrix,
)
from policyengine_uk.data import UKSingleYearDataset
from policyengine_uk_data.utils.uc_data import uc_pc_households

FOLDER = Path(__file__).parent

Expand Down Expand Up @@ -125,6 +126,14 @@ def create_constituency_target_matrix(
employment_incomes.employment_income_lower_bound.sort_values().unique()
) + [np.inf]

# UC household count by constituency
y["uc_households"] = uc_pc_households.household_count.values
matrix["uc_households"] = sim.map_result(
(sim.calculate("universal_credit").values > 0).astype(int),
"benunit",
"household",
)

for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):
continue
if (
Expand Down
31 changes: 31 additions & 0 deletions policyengine_uk_data/storage/UC_DATA_SOURCES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Universal Credit data sources

## National payment distribution

Source: Stat-Xplore (DWP)
- Rows: Monthly award amount bands + Households on Universal Credit
- Columns: Family type
- File: `uc_national_payment_dist.xlsx`

## Parliamentary constituency households

### Great Britain data

Source: Stat-Xplore (DWP)
- Rows: Westminster Parliamentary Constituency 2024 + Households on Universal Credit
- File: `uc_pc_households.xlsx`

### Northern Ireland data

Source: Department for Communities Northern Ireland
- URL: https://www.communities-ni.gov.uk/publications/universal-credit-statistics-may-2025
- File: `dfc-ni-uc-stats-supp-tables-may-2025.ods`
- Sheet: 5b
- Data: Household counts by Westminster Parliamentary Constituency 2024

The NI data is combined with the GB data to produce a complete UK-wide parliamentary constituency table.

## Data processing notes

- The "Unknown" constituency category is excluded from the constituency data
- Constituency household counts are scaled to match the national total from the payment distribution data, as the two sources have different totals due to timing and methodology differences
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ reforms:
parameters:
gov.hmrc.income_tax.rates.uk[0].rate: 0.21
- name: Raise higher rate by 1pp
expected_impact: 5.5
expected_impact: 5.4
parameters:
gov.hmrc.income_tax.rates.uk[1].rate: 0.42
- name: Raise personal allowance by ~800GBP/year
Expand All @@ -16,15 +16,15 @@ reforms:
parameters:
gov.hmrc.child_benefit.amount.additional: 25
- name: Reduce Universal Credit taper rate to 20%
expected_impact: -34.4
expected_impact: -30.7
parameters:
gov.dwp.universal_credit.means_test.reduction_rate: 0.2
- name: Raise Class 1 main employee NICs rate to 10%
expected_impact: 12.4
parameters:
gov.hmrc.national_insurance.class_1.rates.employee.main: 0.1
- name: Raise VAT standard rate by 2pp
expected_impact: 18.7
expected_impact: 19.3
parameters:
gov.hmrc.vat.standard_rate: 0.22
- name: Raise additional rate by 3pp
Expand Down
21 changes: 21 additions & 0 deletions policyengine_uk_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from policyengine_uk_data.storage import STORAGE_FOLDER
from policyengine_uk_data.utils import uprate_values
from policyengine_uk.data import UKSingleYearDataset
from policyengine_uk_data.utils.uc_data import uc_national_payment_dist

tax_benefit = pd.read_csv(STORAGE_FOLDER / "tax_benefit.csv")
tax_benefit["name"] = tax_benefit["name"].apply(lambda x: f"obr/{x}")
Expand Down Expand Up @@ -382,6 +383,26 @@ def pe_count(*variables):
60 * 52 * 115_000
) # same source as above, multiply avg cap amount by total capped population

# UC national payment distribution

uc_payment_dist = uc_national_payment_dist
uc_payments = sim.calculate("universal_credit", map_to="benunit").values
uc_family_type = sim.calculate("family_type", map_to="benunit").values

for i, row in uc_payment_dist.iterrows():
lower = row.uc_annual_payment_min
upper = row.uc_annual_payment_max
family_type = row.family_type
in_band = (
(uc_payments >= lower)
& (uc_payments < upper)
& (uc_family_type == family_type)
)
name = f"dwp/uc_payment_dist/{family_type}_annual_payment_{lower:_.0f}_to_{upper:_.0f}"
df[name] = household_from_family(in_band)
target_names.append(name)
target_values.append(row.household_count)

combined_targets = pd.concat(
[
targets,
Expand Down
153 changes: 153 additions & 0 deletions policyengine_uk_data/utils/uc_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import pandas as pd
from pathlib import Path


def _parse_uc_national_payment_dist():
"""Parse UC national payment distribution into long format."""
storage_path = Path(__file__).parent.parent / "storage"
file_path = storage_path / "uc_national_payment_dist.xlsx"

# Read the Excel file, skipping header rows
df = pd.read_excel(file_path, header=None)

# Extract family types from row 7 (index 7)
family_types = df.iloc[7, 3:7].tolist() # Columns 3-6: the 4 family types

# Extract data rows (starting from row 9, index 9)
data_rows = []

for idx in range(9, len(df)):
award_band = df.iloc[idx, 1] # Monthly award amount band

# Skip if not a valid award band
if pd.isna(award_band) or award_band in ["No payment", "Total"]:
continue

for col_idx, family_type in enumerate(family_types, start=3):
household_count = df.iloc[idx, col_idx]

# Skip missing, ".." (suppressed), or zero values
if (
pd.isna(household_count)
or household_count == ".."
or household_count == 0
):
continue

data_rows.append(
{
"monthly_award_band": award_band,
"family_type": family_type,
"household_count": int(household_count),
}
)

result_df = pd.DataFrame(data_rows)

# Parse monthly band into min and max, then convert to annual
def parse_band(band):
"""Parse band like '£100.01 to £200.00' into (min, max)."""
parts = band.replace("£", "").replace(",", "").split(" to ")
if len(parts) == 2:
return float(parts[0]) * 12, float(parts[1]) * 12
return None, None

result_df[["uc_annual_payment_min", "uc_annual_payment_max"]] = result_df[
"monthly_award_band"
].apply(lambda x: pd.Series(parse_band(x)))

# Map family types to constant names
family_type_mapping = {
"Single, no children": "SINGLE",
"Single, with children": "LONE_PARENT",
"Couple, no children": "COUPLE_NO_CHILDREN",
"Couple, with children": "COUPLE_WITH_CHILDREN",
}
result_df["family_type"] = result_df["family_type"].map(
family_type_mapping
)

# Reorder columns and drop monthly band
result_df = result_df[
[
"uc_annual_payment_min",
"uc_annual_payment_max",
"family_type",
"household_count",
]
]

return result_df


def _parse_uc_pc_households():
"""Parse UC parliamentary constituency households (GB + NI)."""
storage_path = Path(__file__).parent.parent / "storage"

# Parse GB data
gb_file_path = storage_path / "uc_pc_households.xlsx"
df_gb = pd.read_excel(gb_file_path, header=None)

gb_data_rows = []

for idx in range(8, len(df_gb)):
constituency = df_gb.iloc[idx, 1] # Column 1: constituency name
household_count = df_gb.iloc[idx, 3] # Column 3: household count

# Skip if empty, invalid, Total row, or Unknown
if (
pd.isna(constituency)
or pd.isna(household_count)
or constituency in ["Total", "Unknown"]
):
continue

gb_data_rows.append(
{
"constituency_name": constituency,
"household_count": int(household_count),
}
)

# Parse NI data
ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods"
df_ni = pd.read_excel(
ni_file_path, sheet_name="5b", engine="odf", header=None
)

# Get constituency names from row 2, columns 1-18
ni_constituencies = df_ni.iloc[2, 1:19].tolist()

# Find May 2025 row
may_2025_row = df_ni[df_ni[0] == "May 2025"].iloc[0]

ni_data_rows = []
for col_idx, constituency_name in enumerate(ni_constituencies, start=1):
household_count = may_2025_row[col_idx]

if pd.notna(household_count) and household_count != 0:
ni_data_rows.append(
{
"constituency_name": constituency_name,
"household_count": int(household_count),
}
)

# Combine GB and NI data
result_df = pd.DataFrame(gb_data_rows + ni_data_rows)

# Scale constituency counts to match national total
national_total = _parse_uc_national_payment_dist()["household_count"].sum()
constituency_total = result_df["household_count"].sum()
scaling_factor = national_total / constituency_total

result_df["household_count"] = (
(result_df["household_count"] * scaling_factor).round().astype(int)
)

return result_df


# Module-level dataframes for easy import
uc_national_payment_dist = _parse_uc_national_payment_dist()
uc_pc_households = _parse_uc_pc_households()
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ dependencies = [
"microimpute>=1.0.1",
"black>=25.1.0",
"rich>=13.0.0",
"odfpy",
"pandas",
"openpyxl",
]

[project.optional-dependencies]
Expand Down