Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
fixed:
- Inconsistent local area targets removed.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def calibrate(
# Weights - 650 x 100180
original_weights = np.log(
sim.calculate("household_weight", 2025).values / COUNT_CONSTITUENCIES
+ np.random.random(len(sim.calculate("household_weight", 2025).values))
* 0.01
)
weights = torch.tensor(
np.ones((COUNT_CONSTITUENCIES, len(original_weights)))
Expand Down Expand Up @@ -123,7 +125,7 @@ def dropout_weights(weights, p):
masked_weights[mask] = mean
return masked_weights

optimizer = torch.optim.Adam([weights], lr=0.15)
optimizer = torch.optim.Adam([weights], lr=1e-1)

desc = range(128) if os.environ.get("DATA_LITE") else range(epochs)
final_weights = (torch.exp(weights) * r).detach().numpy()
Expand All @@ -133,10 +135,8 @@ def dropout_weights(weights, p):
optimizer.zero_grad()
weights_ = torch.exp(dropout_weights(weights, 0.05)) * r
l = loss(weights_)
l.backward()
optimizer.step()
c_close = pct_close(weights_, constituency=True, national=False)
n_close = pct_close(weights_, constituency=False, national=True)
c_close = pct_close(weights_, constituency=True, national=False, t=0.1)
n_close = pct_close(weights_, constituency=False, national=True, t=0.1)
if epoch % 1 == 0:
if dropout_targets:
validation_loss = loss(weights_, validation=True)
Expand Down Expand Up @@ -181,6 +181,8 @@ def dropout_weights(weights, p):
f.create_dataset(
"household_weight/2025", data=final_weights.sum(axis=0)
)
l.backward()
optimizer.step()

return final_weights

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def create_constituency_target_matrix(
INCOME_VARIABLES = [
"total_income",
"self_employment_income",
"employment_income",
]

for income_variable in INCOME_VARIABLES:
Expand Down Expand Up @@ -81,31 +82,84 @@ def create_constituency_target_matrix(
employment_incomes.employment_income_lower_bound.sort_values().unique()
) + [np.inf]

employment_incomes_all = (
employment_incomes.groupby("code")[
["employment_income_count", "employment_income_amount"]
]
.sum()
.reset_index()
)

hmrc_all_count_target = incomes["employment_income_count"].values
ons_all_count_target = employment_incomes_all[
"employment_income_count"
].values
count_scaling_factors = hmrc_all_count_target / ons_all_count_target

hmrc_all_amount_target = incomes["employment_income_amount"].values
ons_all_amount_target = employment_incomes_all[
"employment_income_amount"
].values
amount_scaling_factors = hmrc_all_amount_target / ons_all_amount_target

for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):
if lower_bound < 12_570 or upper_bound > 70_000:
if (
lower_bound <= 15_000
): # Skip some targets with very small sample sizes
continue
if upper_bound >= 200_000:
continue
count_target = (
employment_incomes[
(
employment_incomes.employment_income_lower_bound
== lower_bound
)
& (
employment_incomes.employment_income_upper_bound
== upper_bound
)
].employment_income_count.values
* count_scaling_factors
)

amount_target = (
employment_incomes[
(
employment_incomes.employment_income_lower_bound
== lower_bound
)
& (
employment_incomes.employment_income_upper_bound
== upper_bound
)
].employment_income_amount.values
* amount_scaling_factors
)

if count_target.mean() < 200:
print(
f"Skipping employment income band {lower_bound} to {upper_bound} due to low count target mean: {count_target.mean()}"
)
continue

if amount_target.mean() < 200 * 30e3:
print(
f"Skipping employment income band {lower_bound} to {upper_bound} due to low amount target mean: {amount_target.mean()}"
)
continue

in_bound = (
(employment_income >= lower_bound)
& (employment_income < upper_bound)
& (employment_income != 0)
& (age >= 16)
)
band_str = f"{lower_bound}_{upper_bound}"
matrix[f"hmrc/employment_income/count/{band_str}"] = sim.map_result(
in_bound, "person", "household"
)
y[f"hmrc/employment_income/count/{band_str}"] = employment_incomes[
(employment_incomes.employment_income_lower_bound == lower_bound)
& (employment_incomes.employment_income_upper_bound == upper_bound)
].employment_income_count.values

matrix[f"hmrc/employment_income/amount/{band_str}"] = sim.map_result(
employment_income * in_bound, "person", "household"
)
y[f"hmrc/employment_income/amount/{band_str}"] = employment_incomes[
(employment_incomes.employment_income_lower_bound == lower_bound)
& (employment_incomes.employment_income_upper_bound == upper_bound)
].employment_income_amount.values
y[f"hmrc/employment_income/amount/{band_str}"] = amount_target

if uprate:
y = uprate_targets(y, time_period)
Expand All @@ -128,7 +182,6 @@ def create_constituency_target_matrix(
household_countries=sim.calculate("country").values,
codes=const_2024.code,
)

return matrix, y, country_mask


Expand Down
Loading