Format

nikhilwoodruff · nikhilwoodruff · commit 466288bd4a60 · 2025-10-20T11:30:22.000+01:00
diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py
@@ -129,7 +129,9 @@ def create_constituency_target_matrix(
     # UC household count by constituency
     y["uc_households"] = uc_pc_households.household_count.values
     matrix["uc_households"] = sim.map_result(
-        (sim.calculate("universal_credit").values > 0).astype(int), "benunit", "household"
+        (sim.calculate("universal_credit").values > 0).astype(int),
+        "benunit",
+        "household",
     )
 
     for lower_bound, upper_bound in zip(bounds[:-1], bounds[1:]):
diff --git a/policyengine_uk_data/datasets/spi.py b/policyengine_uk_data/datasets/spi.py
@@ -22,25 +22,22 @@ def create_spi(
     household["household_weight"] = df.FACT
     person["dividend_income"] = df.DIVIDENDS
     person["gift_aid"] = df.GIFTAID
-    household["region"] = (
-        df.GORCODE.map(
-            {
-                1: "NORTH_EAST",
-                2: "NORTH_WEST",
-                3: "YORKSHIRE",
-                4: "EAST_MIDLANDS",
-                5: "WEST_MIDLANDS",
-                6: "EAST_OF_ENGLAND",
-                7: "LONDON",
-                8: "SOUTH_EAST",
-                9: "SOUTH_WEST",
-                10: "WALES",
-                11: "SCOTLAND",
-                12: "NORTHERN_IRELAND",
-            }
-        )
-        .fillna("SOUTH_EAST")
-    )
+    household["region"] = df.GORCODE.map(
+        {
+            1: "NORTH_EAST",
+            2: "NORTH_WEST",
+            3: "YORKSHIRE",
+            4: "EAST_MIDLANDS",
+            5: "WEST_MIDLANDS",
+            6: "EAST_OF_ENGLAND",
+            7: "LONDON",
+            8: "SOUTH_EAST",
+            9: "SOUTH_WEST",
+            10: "WALES",
+            11: "SCOTLAND",
+            12: "NORTHERN_IRELAND",
+        }
+    ).fillna("SOUTH_EAST")
     household["rent"] = 0
     household["tenure_type"] = "OWNED_OUTRIGHT"
     household["council_tax"] = 0
diff --git a/policyengine_uk_data/utils/uc_data.py b/policyengine_uk_data/utils/uc_data.py
@@ -27,14 +27,20 @@ def _parse_uc_national_payment_dist():
             household_count = df.iloc[idx, col_idx]
 
             # Skip missing, ".." (suppressed), or zero values
-            if pd.isna(household_count) or household_count == ".." or household_count == 0:
+            if (
+                pd.isna(household_count)
+                or household_count == ".."
+                or household_count == 0
+            ):
                 continue
 
-            data_rows.append({
-                "monthly_award_band": award_band,
-                "family_type": family_type,
-                "household_count": int(household_count)
-            })
+            data_rows.append(
+                {
+                    "monthly_award_band": award_band,
+                    "family_type": family_type,
+                    "household_count": int(household_count),
+                }
+            )
 
     result_df = pd.DataFrame(data_rows)
 
@@ -46,21 +52,30 @@ def parse_band(band):
             return float(parts[0]) * 12, float(parts[1]) * 12
         return None, None
 
-    result_df[["uc_annual_payment_min", "uc_annual_payment_max"]] = result_df["monthly_award_band"].apply(
-        lambda x: pd.Series(parse_band(x))
-    )
+    result_df[["uc_annual_payment_min", "uc_annual_payment_max"]] = result_df[
+        "monthly_award_band"
+    ].apply(lambda x: pd.Series(parse_band(x)))
 
     # Map family types to constant names
     family_type_mapping = {
         "Single, no children": "SINGLE",
         "Single, with children": "LONE_PARENT",
         "Couple, no children": "COUPLE_NO_CHILDREN",
-        "Couple, with children": "COUPLE_WITH_CHILDREN"
+        "Couple, with children": "COUPLE_WITH_CHILDREN",
     }
-    result_df["family_type"] = result_df["family_type"].map(family_type_mapping)
+    result_df["family_type"] = result_df["family_type"].map(
+        family_type_mapping
+    )
 
     # Reorder columns and drop monthly band
-    result_df = result_df[["uc_annual_payment_min", "uc_annual_payment_max", "family_type", "household_count"]]
+    result_df = result_df[
+        [
+            "uc_annual_payment_min",
+            "uc_annual_payment_max",
+            "family_type",
+            "household_count",
+        ]
+    ]
 
     return result_df
 
@@ -80,43 +95,55 @@ def _parse_uc_pc_households():
         household_count = df_gb.iloc[idx, 3]  # Column 3: household count
 
         # Skip if empty, invalid, Total row, or Unknown
-        if pd.isna(constituency) or pd.isna(household_count) or constituency in ["Total", "Unknown"]:
+        if (
+            pd.isna(constituency)
+            or pd.isna(household_count)
+            or constituency in ["Total", "Unknown"]
+        ):
             continue
 
-        gb_data_rows.append({
-            "constituency_name": constituency,
-            "household_count": int(household_count)
-        })
+        gb_data_rows.append(
+            {
+                "constituency_name": constituency,
+                "household_count": int(household_count),
+            }
+        )
 
     # Parse NI data
     ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods"
-    df_ni = pd.read_excel(ni_file_path, sheet_name='5b', engine='odf', header=None)
+    df_ni = pd.read_excel(
+        ni_file_path, sheet_name="5b", engine="odf", header=None
+    )
 
     # Get constituency names from row 2, columns 1-18
     ni_constituencies = df_ni.iloc[2, 1:19].tolist()
 
     # Find May 2025 row
-    may_2025_row = df_ni[df_ni[0] == 'May 2025'].iloc[0]
+    may_2025_row = df_ni[df_ni[0] == "May 2025"].iloc[0]
 
     ni_data_rows = []
     for col_idx, constituency_name in enumerate(ni_constituencies, start=1):
         household_count = may_2025_row[col_idx]
 
         if pd.notna(household_count) and household_count != 0:
-            ni_data_rows.append({
-                "constituency_name": constituency_name,
-                "household_count": int(household_count)
-            })
+            ni_data_rows.append(
+                {
+                    "constituency_name": constituency_name,
+                    "household_count": int(household_count),
+                }
+            )
 
     # Combine GB and NI data
     result_df = pd.DataFrame(gb_data_rows + ni_data_rows)
 
     # Scale constituency counts to match national total
-    national_total = _parse_uc_national_payment_dist()['household_count'].sum()
-    constituency_total = result_df['household_count'].sum()
+    national_total = _parse_uc_national_payment_dist()["household_count"].sum()
+    constituency_total = result_df["household_count"].sum()
     scaling_factor = national_total / constituency_total
 
-    result_df['household_count'] = (result_df['household_count'] * scaling_factor).round().astype(int)
+    result_df["household_count"] = (
+        (result_df["household_count"] * scaling_factor).round().astype(int)
+    )
 
     return result_df