Skip to content

Commit 466288b

Browse files
Format
1 parent d942f94 commit 466288b

3 files changed

Lines changed: 72 additions & 46 deletions

File tree

policyengine_uk_data/datasets/local_areas/constituencies/loss.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,9 @@ def create_constituency_target_matrix(
129129
# UC household count by constituency
130130
y["uc_households"] = uc_pc_households.household_count.values
131131
matrix["uc_households"] = sim.map_result(
132-
(sim.calculate("universal_credit").values > 0).astype(int), "benunit", "household"
132+
(sim.calculate("universal_credit").values > 0).astype(int),
133+
"benunit",
134+
"household",
133135
)
134136

135137
for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):

policyengine_uk_data/datasets/spi.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,22 @@ def create_spi(
2222
household["household_weight"] = df.FACT
2323
person["dividend_income"] = df.DIVIDENDS
2424
person["gift_aid"] = df.GIFTAID
25-
household["region"] = (
26-
df.GORCODE.map(
27-
{
28-
1: "NORTH_EAST",
29-
2: "NORTH_WEST",
30-
3: "YORKSHIRE",
31-
4: "EAST_MIDLANDS",
32-
5: "WEST_MIDLANDS",
33-
6: "EAST_OF_ENGLAND",
34-
7: "LONDON",
35-
8: "SOUTH_EAST",
36-
9: "SOUTH_WEST",
37-
10: "WALES",
38-
11: "SCOTLAND",
39-
12: "NORTHERN_IRELAND",
40-
}
41-
)
42-
.fillna("SOUTH_EAST")
43-
)
25+
household["region"] = df.GORCODE.map(
26+
{
27+
1: "NORTH_EAST",
28+
2: "NORTH_WEST",
29+
3: "YORKSHIRE",
30+
4: "EAST_MIDLANDS",
31+
5: "WEST_MIDLANDS",
32+
6: "EAST_OF_ENGLAND",
33+
7: "LONDON",
34+
8: "SOUTH_EAST",
35+
9: "SOUTH_WEST",
36+
10: "WALES",
37+
11: "SCOTLAND",
38+
12: "NORTHERN_IRELAND",
39+
}
40+
).fillna("SOUTH_EAST")
4441
household["rent"] = 0
4542
household["tenure_type"] = "OWNED_OUTRIGHT"
4643
household["council_tax"] = 0

policyengine_uk_data/utils/uc_data.py

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,20 @@ def _parse_uc_national_payment_dist():
2727
household_count = df.iloc[idx, col_idx]
2828

2929
# Skip missing, ".." (suppressed), or zero values
30-
if pd.isna(household_count) or household_count == ".." or household_count == 0:
30+
if (
31+
pd.isna(household_count)
32+
or household_count == ".."
33+
or household_count == 0
34+
):
3135
continue
3236

33-
data_rows.append({
34-
"monthly_award_band": award_band,
35-
"family_type": family_type,
36-
"household_count": int(household_count)
37-
})
37+
data_rows.append(
38+
{
39+
"monthly_award_band": award_band,
40+
"family_type": family_type,
41+
"household_count": int(household_count),
42+
}
43+
)
3844

3945
result_df = pd.DataFrame(data_rows)
4046

@@ -46,21 +52,30 @@ def parse_band(band):
4652
return float(parts[0]) * 12, float(parts[1]) * 12
4753
return None, None
4854

49-
result_df[["uc_annual_payment_min", "uc_annual_payment_max"]] = result_df["monthly_award_band"].apply(
50-
lambda x: pd.Series(parse_band(x))
51-
)
55+
result_df[["uc_annual_payment_min", "uc_annual_payment_max"]] = result_df[
56+
"monthly_award_band"
57+
].apply(lambda x: pd.Series(parse_band(x)))
5258

5359
# Map family types to constant names
5460
family_type_mapping = {
5561
"Single, no children": "SINGLE",
5662
"Single, with children": "LONE_PARENT",
5763
"Couple, no children": "COUPLE_NO_CHILDREN",
58-
"Couple, with children": "COUPLE_WITH_CHILDREN"
64+
"Couple, with children": "COUPLE_WITH_CHILDREN",
5965
}
60-
result_df["family_type"] = result_df["family_type"].map(family_type_mapping)
66+
result_df["family_type"] = result_df["family_type"].map(
67+
family_type_mapping
68+
)
6169

6270
# Reorder columns and drop monthly band
63-
result_df = result_df[["uc_annual_payment_min", "uc_annual_payment_max", "family_type", "household_count"]]
71+
result_df = result_df[
72+
[
73+
"uc_annual_payment_min",
74+
"uc_annual_payment_max",
75+
"family_type",
76+
"household_count",
77+
]
78+
]
6479

6580
return result_df
6681

@@ -80,43 +95,55 @@ def _parse_uc_pc_households():
8095
household_count = df_gb.iloc[idx, 3] # Column 3: household count
8196

8297
# Skip if empty, invalid, Total row, or Unknown
83-
if pd.isna(constituency) or pd.isna(household_count) or constituency in ["Total", "Unknown"]:
98+
if (
99+
pd.isna(constituency)
100+
or pd.isna(household_count)
101+
or constituency in ["Total", "Unknown"]
102+
):
84103
continue
85104

86-
gb_data_rows.append({
87-
"constituency_name": constituency,
88-
"household_count": int(household_count)
89-
})
105+
gb_data_rows.append(
106+
{
107+
"constituency_name": constituency,
108+
"household_count": int(household_count),
109+
}
110+
)
90111

91112
# Parse NI data
92113
ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods"
93-
df_ni = pd.read_excel(ni_file_path, sheet_name='5b', engine='odf', header=None)
114+
df_ni = pd.read_excel(
115+
ni_file_path, sheet_name="5b", engine="odf", header=None
116+
)
94117

95118
# Get constituency names from row 2, columns 1-18
96119
ni_constituencies = df_ni.iloc[2, 1:19].tolist()
97120

98121
# Find May 2025 row
99-
may_2025_row = df_ni[df_ni[0] == 'May 2025'].iloc[0]
122+
may_2025_row = df_ni[df_ni[0] == "May 2025"].iloc[0]
100123

101124
ni_data_rows = []
102125
for col_idx, constituency_name in enumerate(ni_constituencies, start=1):
103126
household_count = may_2025_row[col_idx]
104127

105128
if pd.notna(household_count) and household_count != 0:
106-
ni_data_rows.append({
107-
"constituency_name": constituency_name,
108-
"household_count": int(household_count)
109-
})
129+
ni_data_rows.append(
130+
{
131+
"constituency_name": constituency_name,
132+
"household_count": int(household_count),
133+
}
134+
)
110135

111136
# Combine GB and NI data
112137
result_df = pd.DataFrame(gb_data_rows + ni_data_rows)
113138

114139
# Scale constituency counts to match national total
115-
national_total = _parse_uc_national_payment_dist()['household_count'].sum()
116-
constituency_total = result_df['household_count'].sum()
140+
national_total = _parse_uc_national_payment_dist()["household_count"].sum()
141+
constituency_total = result_df["household_count"].sum()
117142
scaling_factor = national_total / constituency_total
118143

119-
result_df['household_count'] = (result_df['household_count'] * scaling_factor).round().astype(int)
144+
result_df["household_count"] = (
145+
(result_df["household_count"] * scaling_factor).round().astype(int)
146+
)
120147

121148
return result_df
122149

0 commit comments

Comments
 (0)