Skip to content

Commit 33f379b

Browse files
Merge pull request #163 from PolicyEngine/nikhilwoodruff/issue162
Make weights consistent and add 2023 FRS
2 parents 61c9533 + 7480e86 commit 33f379b

18 files changed

Lines changed: 474 additions & 46 deletions

File tree

.github/workflows/pull_request.yaml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,3 @@ jobs:
6060
path: calibration_log.csv
6161
- name: Run tests
6262
run: pytest
63-
- name: Test documentation builds
64-
run: make documentation
65-
66-
- name: Check documentation build
67-
run: |
68-
for notebook in $(find docs/_build/jupyter_execute -name "*.ipynb"); do
69-
if grep -q '"output_type": "error"' "$notebook"; then
70-
echo "Error found in $notebook"
71-
cat "$notebook"
72-
exit 1
73-
fi
74-
done

.github/workflows/push.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,6 @@ jobs:
6868
run: make upload
6969
env:
7070
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
71-
- name: Test documentation builds
72-
run: make documentation
73-
- name: Build Jupyter Book
74-
run: make documentation
75-
- name: Deploy documentation
76-
uses: JamesIves/github-pages-deploy-action@releases/v3
77-
with:
78-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
79-
BRANCH: gh-pages # The branch the action should deploy to.
80-
FOLDER: docs/_build/html
8171
- name: Publish a git tag
8272
run: ".github/publish-git-tag.sh || true"
8373
- name: Remove .whl files

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: patch
2+
changes:
3+
fixed:
4+
- Added calibrated weights from 2022.

policyengine_uk_data/datasets/frs/dwp_frs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@ class DWP_FRS_2022_23(DWP_FRS):
108108
time_period = 2022
109109

110110

111+
class DWP_FRS_2023_24(DWP_FRS):
112+
folder = STORAGE_FOLDER / "frs_2023_24"
113+
name = "dwp_frs_2023_24"
114+
label = "DWP FRS (2023-24)"
115+
file_path = STORAGE_FOLDER / "dwp_frs_2023_24.h5"
116+
time_period = 2023
117+
118+
111119
if __name__ == "__main__":
112120
DWP_FRS_2020_21().generate()
113121
DWP_FRS_2022_23().generate()
122+
DWP_FRS_2023_24().generate()

policyengine_uk_data/datasets/frs/enhanced_frs.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from policyengine_core.data import Dataset
22
from policyengine_uk_data.utils.imputations import *
33
from policyengine_uk_data.storage import STORAGE_FOLDER
4-
from policyengine_uk_data.datasets.frs.extended_frs import ExtendedFRS_2022_23
4+
from policyengine_uk_data.datasets.frs.extended_frs import (
5+
ExtendedFRS_2022_23,
6+
ExtendedFRS_2023_24,
7+
)
58
from policyengine_uk_data.datasets.frs.frs import FRS_2022_23
69
from policyengine_uk_data.utils.loss import create_target_matrix
710

@@ -156,6 +159,14 @@ class EnhancedFRS_2022_23(EnhancedFRS):
156159
end_year = 2028
157160

158161

162+
class EnhancedFRS_2023_24(EnhancedFRS):
163+
name = "enhanced_frs_2023_24"
164+
label = "Enhanced FRS (2023-24)"
165+
file_path = STORAGE_FOLDER / "enhanced_frs_2023_24.h5"
166+
data_format = Dataset.TIME_PERIOD_ARRAYS
167+
input_frs = ExtendedFRS_2023_24
168+
time_period = 2023
169+
170+
159171
if __name__ == "__main__":
160-
ReweightedFRS_2022_23().generate()
161-
EnhancedFRS_2022_23().generate()
172+
EnhancedFRS_2023_24().generate()

policyengine_uk_data/datasets/frs/extended_frs.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from policyengine_uk_data.utils.imputations import *
33
from policyengine_uk_data.storage import STORAGE_FOLDER
44
from typing import Type
5-
from policyengine_uk_data.datasets.frs.frs import FRS_2022_23
5+
from policyengine_uk_data.datasets.frs.frs import FRS_2022_23, FRS_2023_24
66
from tqdm import tqdm
77

88

@@ -103,6 +103,15 @@ class ExtendedFRS_2022_23(ExtendedFRS):
103103
time_period = 2022
104104

105105

106+
class ExtendedFRS_2023_24(ExtendedFRS):
107+
name = "extended_frs_2023_24"
108+
label = "Extended FRS (2023-24)"
109+
file_path = STORAGE_FOLDER / "extended_frs_2023_24.h5"
110+
data_format = Dataset.TIME_PERIOD_ARRAYS
111+
input_frs = FRS_2023_24
112+
time_period = 2023
113+
114+
106115
def create_public_services_inputs(sim) -> pd.DataFrame:
107116
variables = [
108117
"age",
@@ -184,4 +193,4 @@ def add_public_services(data: dict, simulation, time_period: int):
184193

185194

186195
if __name__ == "__main__":
187-
ExtendedFRS_2022_23().generate()
196+
ExtendedFRS_2023_24().generate()

policyengine_uk_data/datasets/frs/frs.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,14 @@ class FRS_2022_23(FRS):
190190
time_period = 2022
191191

192192

193+
class FRS_2023_24(FRS):
194+
dwp_frs = DWP_FRS_2023_24
195+
name = "frs_2023_24"
196+
label = "FRS (2023-24)"
197+
file_path = STORAGE_FOLDER / "frs_2023_24.h5"
198+
time_period = 2023
199+
200+
193201
def add_id_variables(frs: h5py.File, person: DataFrame, household: DataFrame):
194202
"""Adds ID variables and weights.
195203
@@ -874,7 +882,7 @@ def add_expenses(
874882

875883

876884
def add_benunit_variables(frs: h5py.File, benunit: DataFrame):
877-
frs["benunit_rent"] = np.maximum(benunit.BURENT.fillna(0) * 52, 0)
885+
pass
878886

879887

880888
def impute_brmas(dataset, frs):
@@ -931,3 +939,4 @@ def impute_brmas(dataset, frs):
931939
if __name__ == "__main__":
932940
FRS_2020_21().generate()
933941
FRS_2022_23().generate()
942+
FRS_2023_24().generate()

policyengine_uk_data/datasets/frs/local_areas/constituencies/calibrate.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
)
2121
from pathlib import Path
2222
from policyengine_uk_data.storage import STORAGE_FOLDER
23-
from policyengine_uk_data.datasets import EnhancedFRS_2022_23
23+
from policyengine_uk_data.datasets import EnhancedFRS_2023_24
2424

2525
FOLDER = Path(__file__).parent
2626

@@ -32,14 +32,14 @@ def calibrate(
3232
overwrite_efrs=True,
3333
):
3434
matrix_, y_, country_mask = create_constituency_target_matrix(
35-
EnhancedFRS_2022_23, 2025
35+
EnhancedFRS_2023_24, 2025
3636
)
3737

3838
m_national_, y_national_ = create_national_target_matrix(
39-
EnhancedFRS_2022_23, 2025
39+
EnhancedFRS_2023_24, 2025
4040
)
4141

42-
sim = Microsimulation(dataset=EnhancedFRS_2022_23)
42+
sim = Microsimulation(dataset=EnhancedFRS_2023_24)
4343

4444
COUNT_CONSTITUENCIES = 650
4545

@@ -181,12 +181,13 @@ def dropout_weights(weights, p):
181181

182182
if overwrite_efrs:
183183
with h5py.File(
184-
STORAGE_FOLDER / "enhanced_frs_2022_23.h5", "r+"
184+
STORAGE_FOLDER / "enhanced_frs_2023_24.h5", "r+"
185185
) as f:
186-
if "household_weight/2025" in f:
187-
del f["household_weight/2025"]
186+
if "household_weight/2023" in f:
187+
del f["household_weight/2023"]
188188
f.create_dataset(
189-
"household_weight/2025", data=final_weights.sum(axis=0)
189+
"household_weight/2023",
190+
data=final_weights.sum(axis=0) / 1.021,
190191
)
191192
l.backward()
192193
optimizer.step()

policyengine_uk_data/datasets/frs/local_areas/local_authorities/calibrate.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,21 @@
1212
create_local_authority_target_matrix,
1313
create_national_target_matrix,
1414
)
15-
from policyengine_uk_data.datasets import EnhancedFRS_2022_23
15+
from policyengine_uk_data.datasets import EnhancedFRS_2023_24
1616

1717
DEVICE = "cpu"
1818

1919

2020
def calibrate():
2121
matrix, y, r = create_local_authority_target_matrix(
22-
EnhancedFRS_2022_23, 2025
22+
EnhancedFRS_2023_24, 2025
2323
)
2424

2525
m_national, y_national = create_national_target_matrix(
26-
EnhancedFRS_2022_23, 2025
26+
EnhancedFRS_2023_24, 2025
2727
)
2828

29-
sim = Microsimulation(dataset=EnhancedFRS_2022_23)
29+
sim = Microsimulation(dataset=EnhancedFRS_2023_24)
3030

3131
count_local_authority = 360
3232

policyengine_uk_data/storage/download_private_prerequisites.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def extract_zipped_folder(folder):
1414
FILES = [
1515
"frs_2020_21.zip",
1616
"frs_2022_23.zip",
17+
"frs_2023_24.zip",
1718
"lcfs_2021_22.zip",
1819
"was_2006_20.zip",
1920
"etb_1977_21.zip",

0 commit comments

Comments
 (0)