Skip to content

Commit a06f239

Browse files
Merge pull request #129 from PolicyEngine/fixes
Add tests for calibration improvements
2 parents 486a30d + f8d7260 commit a06f239

5 files changed

Lines changed: 90 additions & 21 deletions

File tree

.github/workflows/push.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,11 @@ jobs:
6868
- name: Build Jupyter Book
6969
run: make documentation
7070
- name: Deploy documentation
71-
uses: JamesIves/github-pages-deploy-action@releases/v4
71+
uses: JamesIves/github-pages-deploy-action@releases/v3
7272
with:
73-
token: ${{ secrets.GITHUB_TOKEN }}
74-
branch: gh-pages
75-
folder: docs/_build/html
73+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74+
BRANCH: gh-pages # The branch the action should deploy to.
75+
FOLDER: docs/_build/html
7676
- name: Publish a git tag
7777
run: ".github/publish-git-tag.sh || true"
7878
- name: Remove .whl files

changelog_entry.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
- bump: patch
2+
changes:
3+
fixed:
4+
- Documentation publishes.
5+
- Local authority calibration consistent with constituency calibration.
6+
- Domestic rates are nonzero.

policyengine_uk_data/datasets/frs/frs.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ def generate(self):
8686
for variable in frs:
8787
frs[variable] = {self.dwp_frs.time_period: np.array(frs[variable])}
8888

89+
# Domestic rates need to be set for 2025 too
90+
domestic_rates = np.array(
91+
frs["domestic_rates"][self.dwp_frs.time_period]
92+
)
93+
frs["domestic_rates"] = {
94+
self.dwp_frs.time_period: domestic_rates,
95+
"2025": domestic_rates,
96+
}
97+
8998
self.save_dataset(frs)
9099

91100
impute_brmas(self, frs)
@@ -414,7 +423,7 @@ def add_household_variables(frs: h5py.File, household: DataFrame, year: int):
414423
],
415424
)
416425
* 52
417-
)
426+
).astype(float)
418427

419428

420429
def add_market_income(

policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ def calibrate():
3232

3333
# Weights - 360 x 100180
3434
original_weights = np.log(
35-
(sim.calculate("household_weight", 2025).values + 1e-3)
36-
/ count_local_authority
35+
sim.calculate("household_weight", 2025).values / count_local_authority
36+
+ np.random.random(len(sim.calculate("household_weight", 2025).values))
37+
* 0.01
3738
)
3839
weights = torch.tensor(
3940
np.ones((count_local_authority, len(original_weights)))
@@ -93,7 +94,7 @@ def dropout_weights(weights, p):
9394
masked_weights[mask] = mean
9495
return masked_weights
9596

96-
optimizer = torch.optim.Adam([weights], lr=0.15)
97+
optimizer = torch.optim.Adam([weights], lr=1e-1)
9798

9899
desc = range(32) if os.environ.get("DATA_LITE") else range(128)
99100

policyengine_uk_data/datasets/frs/local_areas/local_authorities/loss.py

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,31 +75,84 @@ def create_local_authority_target_matrix(
7575
employment_incomes.employment_income_lower_bound.sort_values().unique()
7676
) + [np.inf]
7777

78+
employment_incomes_all = (
79+
employment_incomes.groupby("code")[
80+
["employment_income_count", "employment_income_amount"]
81+
]
82+
.sum()
83+
.reset_index()
84+
)
85+
86+
hmrc_all_count_target = incomes["employment_income_count"].values
87+
ons_all_count_target = employment_incomes_all[
88+
"employment_income_count"
89+
].values
90+
count_scaling_factors = hmrc_all_count_target / ons_all_count_target
91+
92+
hmrc_all_amount_target = incomes["employment_income_amount"].values
93+
ons_all_amount_target = employment_incomes_all[
94+
"employment_income_amount"
95+
].values
96+
amount_scaling_factors = hmrc_all_amount_target / ons_all_amount_target
97+
7898
for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):
79-
if lower_bound >= 70_000 or lower_bound < 12_570:
99+
if (
100+
lower_bound <= 15_000
101+
): # Skip some targets with very small sample sizes
80102
continue
103+
if upper_bound >= 200_000:
104+
continue
105+
count_target = (
106+
employment_incomes[
107+
(
108+
employment_incomes.employment_income_lower_bound
109+
== lower_bound
110+
)
111+
& (
112+
employment_incomes.employment_income_upper_bound
113+
== upper_bound
114+
)
115+
].employment_income_count.values
116+
* count_scaling_factors
117+
)
118+
119+
amount_target = (
120+
employment_incomes[
121+
(
122+
employment_incomes.employment_income_lower_bound
123+
== lower_bound
124+
)
125+
& (
126+
employment_incomes.employment_income_upper_bound
127+
== upper_bound
128+
)
129+
].employment_income_amount.values
130+
* amount_scaling_factors
131+
)
132+
133+
if count_target.mean() < 200:
134+
print(
135+
f"Skipping employment income band {lower_bound} to {upper_bound} due to low count target mean: {count_target.mean()}"
136+
)
137+
continue
138+
139+
if amount_target.mean() < 200 * 30e3:
140+
print(
141+
f"Skipping employment income band {lower_bound} to {upper_bound} due to low amount target mean: {amount_target.mean()}"
142+
)
143+
continue
144+
81145
in_bound = (
82146
(employment_income >= lower_bound)
83147
& (employment_income < upper_bound)
84148
& (employment_income != 0)
85149
& (age >= 16)
86150
)
87151
band_str = f"{lower_bound}_{upper_bound}"
88-
matrix[f"hmrc/employment_income/count/{band_str}"] = sim.map_result(
89-
in_bound, "person", "household"
90-
)
91-
y[f"hmrc/employment_income/count/{band_str}"] = employment_incomes[
92-
(employment_incomes.employment_income_lower_bound == lower_bound)
93-
& (employment_incomes.employment_income_upper_bound == upper_bound)
94-
].employment_income_count.values
95-
96152
matrix[f"hmrc/employment_income/amount/{band_str}"] = sim.map_result(
97153
employment_income * in_bound, "person", "household"
98154
)
99-
y[f"hmrc/employment_income/amount/{band_str}"] = employment_incomes[
100-
(employment_incomes.employment_income_lower_bound == lower_bound)
101-
& (employment_incomes.employment_income_upper_bound == upper_bound)
102-
].employment_income_amount.values
155+
y[f"hmrc/employment_income/amount/{band_str}"] = amount_target
103156

104157
if uprate:
105158
y = uprate_targets(y, time_period)

0 commit comments

Comments
 (0)