1+ """Constituency-level calibration target matrix.
2+
3+ Constructs the (matrix, y, country_mask) triple for calibrating
4+ household weights across 650 parliamentary constituencies. Target
5+ data is loaded from source modules in the targets system.
6+
7+ Sources:
8+ - Age: ONS mid-year population estimates
9+ - Income: HMRC SPI table 3.15
10+ - UC: DWP Stat-Xplore
11+ """
12+
113from policyengine_uk import Microsimulation
214import pandas as pd
315import numpy as np
4- from pathlib import Path
516
6- from policyengine_uk_data .utils .loss import (
7- create_target_matrix as create_national_target_matrix ,
8- )
17+ from policyengine_uk .data import UKSingleYearDataset
918from policyengine_uk_data .storage import STORAGE_FOLDER
1019from policyengine_uk_data .datasets .local_areas .constituencies .boundary_changes .mapping_matrix import (
1120 mapping_matrix ,
1221)
13- from policyengine_uk .data import UKSingleYearDataset
14- from policyengine_uk_data .utils .uc_data import uc_pc_households
15-
16- FOLDER = Path (__file__ ).parent
22+ from policyengine_uk_data .targets .sources .local_age import (
23+ get_constituency_age_targets ,
24+ get_uk_total_population ,
25+ )
26+ from policyengine_uk_data .targets .sources .local_income import (
27+ get_constituency_income_targets ,
28+ get_national_income_projections ,
29+ INCOME_VARIABLES ,
30+ )
31+ from policyengine_uk_data .targets .sources .local_uc import (
32+ get_constituency_uc_targets ,
33+ )
1734
1835
1936def create_constituency_target_matrix (
@@ -23,26 +40,18 @@ def create_constituency_target_matrix(
2340):
2441 if time_period is None :
2542 time_period = dataset .time_period
26- ages = pd .read_csv (FOLDER / "targets" / "age.csv" )
27- national_demographics = pd .read_csv (STORAGE_FOLDER / "demographics.csv" )
28- incomes = pd .read_csv (FOLDER / "targets" / "spi_by_constituency.csv" )
2943
3044 sim = Microsimulation (dataset = dataset , reform = reform )
3145 sim .default_calculation_period = dataset .time_period
3246
33- national_incomes = pd .read_csv (STORAGE_FOLDER / "incomes_projection.csv" )
34- national_incomes = national_incomes [
35- national_incomes .year
36- == max (national_incomes .year .min (), int (dataset .time_period ))
37- ]
38-
3947 matrix = pd .DataFrame ()
4048 y = pd .DataFrame ()
4149
42- INCOME_VARIABLES = [
43- "self_employment_income" ,
44- "employment_income" ,
45- ]
50+ # ── Income targets ─────────────────────────────────────────────
51+ incomes = get_constituency_income_targets ()
52+ national_incomes = get_national_income_projections (
53+ int (dataset .time_period )
54+ )
4655
4756 for income_variable in INCOME_VARIABLES :
4857 income_values = sim .calculate (income_variable ).values
@@ -56,84 +65,50 @@ def create_constituency_target_matrix(
5665 (national_incomes .total_income_lower_bound == 12_570 )
5766 & (national_incomes .total_income_upper_bound == np .inf )
5867 ][income_variable + "_amount" ].iloc [0 ]
59- national_consistency_adjustment_factor = (
60- national_target / local_target_sum
61- )
62- y [f"hmrc/{ income_variable } /amount" ] = (
63- local_targets * national_consistency_adjustment_factor
64- )
68+ adjustment = national_target / local_target_sum
69+ y [f"hmrc/{ income_variable } /amount" ] = local_targets * adjustment
70+
6571 matrix [f"hmrc/{ income_variable } /count" ] = sim .map_result (
6672 (income_values != 0 ) * in_spi_frame , "person" , "household"
6773 )
68- local_targets = incomes [f"{ income_variable } _count" ].values
69- local_target_sum = local_targets .sum ()
70- national_target = national_incomes [
71- (national_incomes .total_income_lower_bound == 12_570 )
72- & (national_incomes .total_income_upper_bound == np .inf )
73- ][income_variable + "_count" ].iloc [0 ]
7474 y [f"hmrc/{ income_variable } /count" ] = (
75- incomes [f"{ income_variable } _count" ].values
76- * national_consistency_adjustment_factor
75+ incomes [f"{ income_variable } _count" ].values * adjustment
7776 )
7877
79- uk_total_population = (
80- national_demographics [national_demographics .name == "uk_population" ][
81- str (time_period )
82- ].values [0 ]
83- * 1e6
84- )
78+ # ── Age targets ────────────────────────────────────────────────
79+ age_targets = get_constituency_age_targets ()
80+ uk_total_population = get_uk_total_population (int (time_period ))
8581
8682 age = sim .calculate ("age" ).values
8783 targets_total_pop = 0
88- for lower_age in range (0 , 80 , 10 ):
89- upper_age = lower_age + 10
90-
91- in_age_band = (age >= lower_age ) & (age < upper_age )
92-
93- age_str = f"{ lower_age } _{ upper_age } "
94- matrix [f"age/{ age_str } " ] = sim .map_result (
95- in_age_band , "person" , "household"
96- )
97-
98- age_count = ages [
99- [str (age ) for age in range (lower_age , upper_age )]
100- ].sum (axis = 1 )
101-
102- age_str = f"{ lower_age } _{ upper_age } "
103- y [f"age/{ age_str } " ] = age_count .values
104- targets_total_pop += age_count .values .sum ()
105-
106- # Adjust for consistency
107- for lower_age in range (0 , 80 , 10 ):
108- upper_age = lower_age + 10
109-
110- in_age_band = (age >= lower_age ) & (age < upper_age )
111-
112- age_str = f"{ lower_age } _{ upper_age } "
113- y [f"age/{ age_str } " ] *= uk_total_population / targets_total_pop * 0.9
114-
115- # UC household count by constituency
116- y ["uc_households" ] = uc_pc_households .household_count .values
84+ age_cols = [c for c in age_targets .columns if c .startswith ("age/" )]
85+ for col in age_cols :
86+ lower , upper = col .removeprefix ("age/" ).split ("_" )
87+ lower , upper = int (lower ), int (upper )
88+ in_band = (age >= lower ) & (age < upper )
89+ matrix [col ] = sim .map_result (in_band , "person" , "household" )
90+ y [col ] = age_targets [col ].values
91+ targets_total_pop += age_targets [col ].values .sum ()
92+
93+ # National consistency adjustment
94+ for col in age_cols :
95+ y [col ] *= uk_total_population / targets_total_pop * 0.9
96+
97+ # ── UC targets ─────────────────────────────────────────────────
98+ y ["uc_households" ] = get_constituency_uc_targets ().values
11799 matrix ["uc_households" ] = sim .map_result (
118100 (sim .calculate ("universal_credit" ).values > 0 ).astype (int ),
119101 "benunit" ,
120102 "household" ,
121103 )
122104
105+ # ── Boundary mapping (2010 → 2024) ────────────────────────────
123106 const_2024 = pd .read_csv (STORAGE_FOLDER / "constituencies_2024.csv" )
124- const_2010 = pd .read_csv (STORAGE_FOLDER / "constituencies_2010.csv" )
125-
126- y_2010 = y .copy ()
127- y_2010 ["name" ] = const_2010 ["name" ].values
128107
129108 y_columns = list (y .columns )
130- y_values = mapping_matrix @ y .values # Transform to 2024 constituencies
131-
109+ y_values = mapping_matrix @ y .values
132110 y = pd .DataFrame (y_values , columns = y_columns )
133111
134- y_2024 = y .copy ()
135- y_2024 ["name" ] = const_2024 ["name" ].values
136-
137112 country_mask = create_country_mask (
138113 household_countries = sim .calculate ("country" ).values ,
139114 codes = const_2024 .code ,
@@ -144,21 +119,16 @@ def create_constituency_target_matrix(
144119def create_country_mask (
145120 household_countries : np .ndarray , codes : pd .Series
146121) -> np .ndarray :
147- # Create a matrix R to accompany the loss matrix M s.t. (W x M) x R = Y_
148- # where Y_ is the target matrix for the country where no target is constructed from weights from a different country.
149-
150- constituency_countries = codes .apply (lambda code : code [0 ]).map (
122+ """Country mask: R[i,j] = 1 iff household j is in same country as area i."""
123+ area_countries = codes .apply (lambda code : code [0 ]).map (
151124 {
152125 "E" : "ENGLAND" ,
153126 "W" : "WALES" ,
154127 "S" : "SCOTLAND" ,
155128 "N" : "NORTHERN_IRELAND" ,
156129 }
157130 )
158-
159131 r = np .zeros ((len (codes ), len (household_countries )))
160-
161132 for i in range (len (codes )):
162- r [i ] = household_countries == constituency_countries [i ]
163-
133+ r [i ] = household_countries == area_countries .iloc [i ]
164134 return r
0 commit comments