Skip to content

Commit c231a88

Browse files
Merge pull request #120 from PolicyEngine/nikhilwoodruff/issue119
Enforce consistency between constituency targets
2 parents deac366 + a5846c3 commit c231a88

3 files changed

Lines changed: 78 additions & 19 deletions

File tree

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
fixed:
4+
- Inconsistent local area targets removed.

policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def calibrate(
4646
# Weights - 650 x 100180
4747
original_weights = np.log(
4848
sim.calculate("household_weight", 2025).values / COUNT_CONSTITUENCIES
49+
+ np.random.random(len(sim.calculate("household_weight", 2025).values))
50+
* 0.01
4951
)
5052
weights = torch.tensor(
5153
np.ones((COUNT_CONSTITUENCIES, len(original_weights)))
@@ -123,7 +125,7 @@ def dropout_weights(weights, p):
123125
masked_weights[mask] = mean
124126
return masked_weights
125127

126-
optimizer = torch.optim.Adam([weights], lr=0.15)
128+
optimizer = torch.optim.Adam([weights], lr=1e-1)
127129

128130
desc = range(128) if os.environ.get("DATA_LITE") else range(epochs)
129131
final_weights = (torch.exp(weights) * r).detach().numpy()
@@ -133,10 +135,8 @@ def dropout_weights(weights, p):
133135
optimizer.zero_grad()
134136
weights_ = torch.exp(dropout_weights(weights, 0.05)) * r
135137
l = loss(weights_)
136-
l.backward()
137-
optimizer.step()
138-
c_close = pct_close(weights_, constituency=True, national=False)
139-
n_close = pct_close(weights_, constituency=False, national=True)
138+
c_close = pct_close(weights_, constituency=True, national=False, t=0.1)
139+
n_close = pct_close(weights_, constituency=False, national=True, t=0.1)
140140
if epoch % 1 == 0:
141141
if dropout_targets:
142142
validation_loss = loss(weights_, validation=True)
@@ -181,6 +181,8 @@ def dropout_weights(weights, p):
181181
f.create_dataset(
182182
"household_weight/2025", data=final_weights.sum(axis=0)
183183
)
184+
l.backward()
185+
optimizer.step()
184186

185187
return final_weights
186188

policyengine_uk_data/datasets/frs/local_areas/constituencies/loss.py

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def create_constituency_target_matrix(
4040
INCOME_VARIABLES = [
4141
"total_income",
4242
"self_employment_income",
43+
"employment_income",
4344
]
4445

4546
for income_variable in INCOME_VARIABLES:
@@ -81,31 +82,84 @@ def create_constituency_target_matrix(
8182
employment_incomes.employment_income_lower_bound.sort_values().unique()
8283
) + [np.inf]
8384

85+
employment_incomes_all = (
86+
employment_incomes.groupby("code")[
87+
["employment_income_count", "employment_income_amount"]
88+
]
89+
.sum()
90+
.reset_index()
91+
)
92+
93+
hmrc_all_count_target = incomes["employment_income_count"].values
94+
ons_all_count_target = employment_incomes_all[
95+
"employment_income_count"
96+
].values
97+
count_scaling_factors = hmrc_all_count_target / ons_all_count_target
98+
99+
hmrc_all_amount_target = incomes["employment_income_amount"].values
100+
ons_all_amount_target = employment_incomes_all[
101+
"employment_income_amount"
102+
].values
103+
amount_scaling_factors = hmrc_all_amount_target / ons_all_amount_target
104+
84105
for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):
85-
if lower_bound < 12_570 or upper_bound > 70_000:
106+
if (
107+
lower_bound <= 15_000
108+
): # Skip some targets with very small sample sizes
109+
continue
110+
if upper_bound >= 200_000:
111+
continue
112+
count_target = (
113+
employment_incomes[
114+
(
115+
employment_incomes.employment_income_lower_bound
116+
== lower_bound
117+
)
118+
& (
119+
employment_incomes.employment_income_upper_bound
120+
== upper_bound
121+
)
122+
].employment_income_count.values
123+
* count_scaling_factors
124+
)
125+
126+
amount_target = (
127+
employment_incomes[
128+
(
129+
employment_incomes.employment_income_lower_bound
130+
== lower_bound
131+
)
132+
& (
133+
employment_incomes.employment_income_upper_bound
134+
== upper_bound
135+
)
136+
].employment_income_amount.values
137+
* amount_scaling_factors
138+
)
139+
140+
if count_target.mean() < 200:
141+
print(
142+
f"Skipping employment income band {lower_bound} to {upper_bound} due to low count target mean: {count_target.mean()}"
143+
)
144+
continue
145+
146+
if amount_target.mean() < 200 * 30e3:
147+
print(
148+
f"Skipping employment income band {lower_bound} to {upper_bound} due to low amount target mean: {amount_target.mean()}"
149+
)
86150
continue
151+
87152
in_bound = (
88153
(employment_income >= lower_bound)
89154
& (employment_income < upper_bound)
90155
& (employment_income != 0)
91156
& (age >= 16)
92157
)
93158
band_str = f"{lower_bound}_{upper_bound}"
94-
matrix[f"hmrc/employment_income/count/{band_str}"] = sim.map_result(
95-
in_bound, "person", "household"
96-
)
97-
y[f"hmrc/employment_income/count/{band_str}"] = employment_incomes[
98-
(employment_incomes.employment_income_lower_bound == lower_bound)
99-
& (employment_incomes.employment_income_upper_bound == upper_bound)
100-
].employment_income_count.values
101-
102159
matrix[f"hmrc/employment_income/amount/{band_str}"] = sim.map_result(
103160
employment_income * in_bound, "person", "household"
104161
)
105-
y[f"hmrc/employment_income/amount/{band_str}"] = employment_incomes[
106-
(employment_incomes.employment_income_lower_bound == lower_bound)
107-
& (employment_incomes.employment_income_upper_bound == upper_bound)
108-
].employment_income_amount.values
162+
y[f"hmrc/employment_income/amount/{band_str}"] = amount_target
109163

110164
if uprate:
111165
y = uprate_targets(y, time_period)
@@ -128,7 +182,6 @@ def create_constituency_target_matrix(
128182
household_countries=sim.calculate("country").values,
129183
codes=const_2024.code,
130184
)
131-
132185
return matrix, y, country_mask
133186

134187

0 commit comments

Comments
 (0)