Skip to content

Commit 2488d71

Browse files
Merge main and resolve changelog conflict
2 parents 536f750 + ccdb97b commit 2488d71

8 files changed

Lines changed: 188 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [1.39.3] - 2026-02-19 16:15:46
9+
10+
### Fixed
11+
12+
- Widened UC taper rate reform test tolerance to 15bn to account for calibration variance.
13+
14+
## [1.39.2] - 2026-02-19 13:58:30
15+
16+
### Added
17+
18+
- UC households by children count (0, 1, 2, 3+) as constituency calibration targets.
19+
20+
## [1.39.1] - 2026-02-19 11:54:03
21+
22+
### Added
23+
24+
- Test for highest_education in enhanced FRS dataset.
25+
826
## [1.39.0] - 2026-02-19 08:39:08
927

1028
### Added
@@ -617,6 +635,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
617635

618636

619637

638+
[1.39.3]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.39.2...1.39.3
639+
[1.39.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.39.1...1.39.2
640+
[1.39.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.39.0...1.39.1
620641
[1.39.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.38.0...1.39.0
621642
[1.38.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.37.1...1.38.0
622643
[1.37.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.37.0...1.37.1

changelog.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,3 +547,19 @@
547547
added:
548548
- highest_education variable derived from FRS EDUCQUAL field.
549549
date: 2026-02-19 08:39:08
550+
- bump: patch
551+
changes:
552+
added:
553+
- Test for highest_education in enhanced FRS dataset.
554+
date: 2026-02-19 11:54:03
555+
- bump: patch
556+
changes:
557+
added:
558+
- UC households by children count (0, 1, 2, 3+) as constituency calibration targets.
559+
date: 2026-02-19 13:58:30
560+
- bump: patch
561+
changes:
562+
fixed:
563+
- Widened UC taper rate reform test tolerance to 15bn to account for calibration
564+
variance.
565+
date: 2026-02-19 16:15:46

changelog_entry.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
- bump: minor
22
changes:
33
added:
4-
- SLC student loan calibration targets for Plan 2 and Plan 5 England borrowers earning above repayment threshold (2025-2030), wired into the target registry and loss matrix.
4+
- SLC student loan calibration targets for Plan 2 and Plan 5 England borrowers earning above repayment threshold (2025-2030), fetched live from Explore Education Statistics.

policyengine_uk_data/datasets/local_areas/constituencies/loss.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131
from policyengine_uk_data.targets.sources.local_uc import (
3232
get_constituency_uc_targets,
33+
get_constituency_uc_by_children_targets,
3334
)
3435

3536

@@ -96,11 +97,30 @@ def create_constituency_target_matrix(
9697

9798
# ── UC targets ─────────────────────────────────────────────────
9899
y["uc_households"] = get_constituency_uc_targets().values
99-
matrix["uc_households"] = sim.map_result(
100-
(sim.calculate("universal_credit").values > 0).astype(int),
101-
"benunit",
102-
"household",
100+
on_uc = (sim.calculate("universal_credit").values > 0).astype(int)
101+
matrix["uc_households"] = sim.map_result(on_uc, "benunit", "household")
102+
103+
# UC households split by number of children — forces the reweighting
104+
# to match the family-size distribution within each constituency,
105+
# preventing under-representation of larger families (see #274).
106+
is_child = sim.calculate("is_child").values
107+
children_per_hh = sim.map_result(is_child, "person", "household")
108+
on_uc_hh = sim.map_result(on_uc, "benunit", "household") > 0
109+
110+
matrix["uc_hh_0_children"] = (on_uc_hh & (children_per_hh == 0)).astype(
111+
float
103112
)
113+
matrix["uc_hh_1_child"] = (on_uc_hh & (children_per_hh == 1)).astype(float)
114+
matrix["uc_hh_2_children"] = (on_uc_hh & (children_per_hh == 2)).astype(
115+
float
116+
)
117+
matrix["uc_hh_3plus_children"] = (
118+
on_uc_hh & (children_per_hh >= 3)
119+
).astype(float)
120+
121+
uc_by_children = get_constituency_uc_by_children_targets()
122+
for col in uc_by_children.columns:
123+
y[col] = uc_by_children[col].values
104124

105125
# ── Boundary mapping (2010 → 2024) ────────────────────────────
106126
const_2024 = pd.read_csv(STORAGE_FOLDER / "constituencies_2024.csv")

policyengine_uk_data/targets/sources/local_uc.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,43 @@
44
loaded from pre-downloaded Stat-Xplore exports and scaled to match
55
national UC payment distribution totals.
66
7+
Also provides UC household counts split by number of children, using
8+
country-level proportions from Stat-Xplore (November 2023) applied to
9+
each constituency's total. This ensures the reweighting algorithm
10+
places adequate weight on larger families in every constituency.
11+
712
Source: DWP Stat-Xplore
813
https://stat-xplore.dwp.gov.uk
914
"""
1015

1116
import logging
1217

18+
import numpy as np
1319
import pandas as pd
1420

1521
logger = logging.getLogger(__name__)
1622

1723
_REF = "https://stat-xplore.dwp.gov.uk"
1824

25+
# Country-level UC households by number of children (Nov 2023, Stat-Xplore).
26+
# Used to split each constituency's UC total into children-count buckets.
27+
# Keys: (0 children, 1 child, 2 children, 3+ children)
28+
_UC_CHILDREN_BY_COUNTRY = {
29+
"E": np.array([2_411_993, 948_304, 802_992, 495_279], dtype=float),
30+
"W": np.array([141_054, 52_953, 44_348, 26_372], dtype=float),
31+
"S": np.array([253_609, 86_321, 66_829, 35_036], dtype=float),
32+
# Northern Ireland: use GB-wide proportions as fallback
33+
"N": np.array(
34+
[
35+
2_411_993 + 141_054 + 253_609,
36+
948_304 + 52_953 + 86_321,
37+
802_992 + 44_348 + 66_829,
38+
495_279 + 26_372 + 35_036,
39+
],
40+
dtype=float,
41+
),
42+
}
43+
1944

2045
def get_constituency_uc_targets() -> pd.Series:
2146
"""UC household counts for 650 constituencies (positional order).
@@ -28,6 +53,44 @@ def get_constituency_uc_targets() -> pd.Series:
2853
return uc_pc_households.household_count
2954

3055

56+
def get_constituency_uc_by_children_targets() -> pd.DataFrame:
57+
"""UC households split by 0, 1, 2, 3+ children for 650 constituencies.
58+
59+
Applies country-level proportions from Stat-Xplore to each
60+
constituency's total UC count. Returns a DataFrame with columns
61+
``uc_hh_0_children``, ``uc_hh_1_child``, ``uc_hh_2_children``,
62+
``uc_hh_3plus_children``, in the same positional order as
63+
:func:`get_constituency_uc_targets`.
64+
"""
65+
from policyengine_uk_data.utils.uc_data import uc_pc_households
66+
from policyengine_uk_data.storage import STORAGE_FOLDER
67+
68+
codes = pd.read_csv(STORAGE_FOLDER / "constituencies_2024.csv")["code"]
69+
totals = uc_pc_households.household_count.values.astype(float)
70+
71+
result = pd.DataFrame(index=range(len(totals)))
72+
cols = [
73+
"uc_hh_0_children",
74+
"uc_hh_1_child",
75+
"uc_hh_2_children",
76+
"uc_hh_3plus_children",
77+
]
78+
for col in cols:
79+
result[col] = 0.0
80+
81+
for i, (total, code) in enumerate(zip(totals, codes)):
82+
country_prefix = code[0]
83+
proportions = _UC_CHILDREN_BY_COUNTRY.get(
84+
country_prefix,
85+
_UC_CHILDREN_BY_COUNTRY["N"], # fallback
86+
)
87+
shares = proportions / proportions.sum()
88+
for j, col in enumerate(cols):
89+
result.loc[i, col] = round(total * shares[j])
90+
91+
return result
92+
93+
3194
def get_la_uc_targets() -> pd.Series:
3295
"""UC household counts for 360 local authorities (positional order).
3396

policyengine_uk_data/tests/microsimulation/reforms_config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ reforms:
1616
parameters:
1717
gov.hmrc.child_benefit.amount.additional: 25
1818
- name: Reduce Universal Credit taper rate to 20%
19-
expected_impact: -35.0
19+
expected_impact: -39.0
20+
tolerance: 15.0
2021
parameters:
2122
gov.dwp.universal_credit.means_test.reduction_rate: 0.2
2223
- name: Raise Class 1 main employee NICs rate to 10%
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Test UC households by number of children calibration targets.
2+
3+
Validates that the weighted count of UC households split by number of
4+
children (0, 1, 2, 3+) matches DWP Stat-Xplore country-level totals
5+
(November 2023).
6+
7+
Source: DWP Stat-Xplore, UC Households dataset
8+
https://stat-xplore.dwp.gov.uk/
9+
"""
10+
11+
import pytest
12+
13+
# DWP Stat-Xplore November 2023 national totals (GB)
14+
# England + Wales + Scotland
15+
_TARGETS = {
16+
"0_children": 2_411_993 + 141_054 + 253_609, # 2,806,656
17+
"1_child": 948_304 + 52_953 + 86_321, # 1,087,578
18+
"2_children": 802_992 + 44_348 + 66_829, # 914,169
19+
"3plus_children": 495_279 + 26_372 + 35_036, # 556,687
20+
}
21+
22+
TOLERANCE = 0.30 # 30% relative tolerance
23+
24+
25+
@pytest.mark.xfail(
26+
reason="Will pass after recalibration with UC-by-children constituency targets"
27+
)
28+
@pytest.mark.parametrize(
29+
"bucket,target",
30+
list(_TARGETS.items()),
31+
ids=list(_TARGETS.keys()),
32+
)
33+
def test_uc_households_by_children(baseline, bucket, target):
34+
"""Test that UC households by children count matches Stat-Xplore data."""
35+
uc = baseline.calculate("universal_credit", period=2025).values
36+
on_uc = baseline.map_result(uc > 0, "benunit", "household") > 0
37+
38+
is_child = baseline.calculate(
39+
"is_child", map_to="person", period=2025
40+
).values
41+
children_per_hh = baseline.map_result(is_child, "person", "household")
42+
43+
if bucket == "0_children":
44+
match = on_uc & (children_per_hh == 0)
45+
elif bucket == "1_child":
46+
match = on_uc & (children_per_hh == 1)
47+
elif bucket == "2_children":
48+
match = on_uc & (children_per_hh == 2)
49+
else: # 3plus_children
50+
match = on_uc & (children_per_hh >= 3)
51+
52+
household_weight = baseline.calculate(
53+
"household_weight", period=2025
54+
).values
55+
actual = (household_weight * match).sum()
56+
57+
assert abs(actual / target - 1) < TOLERANCE, (
58+
f"UC households with {bucket}: expected {target/1e3:.0f}k, "
59+
f"got {actual/1e3:.0f}k ({actual/target*100:.0f}% of target)"
60+
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "policyengine_uk_data"
7-
version = "1.39.0"
7+
version = "1.39.3"
88
description = "A package to create representative microdata for the UK."
99
readme = "README.md"
1010
authors = [

0 commit comments

Comments
 (0)