Format OBR importer for black CI

MaxGhenis · MaxGhenis · commit e08bc72589b7 · 2026-04-07T10:13:00.000-04:00
diff --git a/policyengine_uk/tests/test_import_obr_forecasts.py b/policyengine_uk/tests/test_import_obr_forecasts.py
@@ -52,9 +52,19 @@ def make_test_xlsx() -> bytes:
 
     sheet_16 = make_sheet(
         {
-            3: [make_inline_cell("Q3", "Average weekly earnings growth (per cent)")],
-            97: [make_inline_cell("B97", "2025"), make_number_cell("Q97", 5.17)],
-            98: [make_inline_cell("B98", "2026"), make_number_cell("Q98", 3.33)],
+            3: [
+                make_inline_cell(
+                    "Q3", "Average weekly earnings growth (per cent)"
+                )
+            ],
+            97: [
+                make_inline_cell("B97", "2025"),
+                make_number_cell("Q97", 5.17),
+            ],
+            98: [
+                make_inline_cell("B98", "2026"),
+                make_number_cell("Q98", 3.33),
+            ],
         }
     )
     sheet_17 = make_sheet(
@@ -94,11 +104,18 @@ def make_test_xlsx() -> bytes:
         {
             3: [
                 make_inline_cell(
-                    "D3", "House price index (per cent change on a year earlier)"
+                    "D3",
+                    "House price index (per cent change on a year earlier)",
                 )
             ],
-            97: [make_inline_cell("B97", "2025"), make_number_cell("D97", 2.80)],
-            98: [make_inline_cell("B98", "2026"), make_number_cell("D98", 2.40)],
+            97: [
+                make_inline_cell("B97", "2025"),
+                make_number_cell("D97", 2.80),
+            ],
+            98: [
+                make_inline_cell("B98", "2026"),
+                make_number_cell("D98", 2.40),
+            ],
         }
     )
 
@@ -125,7 +142,9 @@ def test_extract_annual_series_from_xlsx():
 
 
 def test_release_inference_helpers():
-    assert infer_release("Economy_Detailed_forecast_tables_November_2025.xlsx") == (
+    assert infer_release(
+        "Economy_Detailed_forecast_tables_November_2025.xlsx"
+    ) == (
         "November",
         2025,
     )
@@ -137,8 +156,7 @@ def test_release_inference_helpers():
 
 def test_update_yoy_growth_yaml_updates_forecast_window_only(tmp_path):
     yaml_path = tmp_path / "yoy_growth.yaml"
-    yaml_path.write_text(
-        """obr:
+    yaml_path.write_text("""obr:
   rpi:
     values:
       2024-01-01: 0.0300
@@ -209,8 +227,7 @@ def test_update_yoy_growth_yaml_updates_forecast_window_only(tmp_path):
       reference:
         - title: Old
           href: https://example.com/old
-"""
-    )
+""")
 
     update_yoy_growth_yaml(
         yaml_path=yaml_path,
@@ -229,17 +246,19 @@ def test_update_yoy_growth_yaml_updates_forecast_window_only(tmp_path):
     assert "2025-01-01: 0.0280" in content
     assert "2026-01-01: 0.0240" in content
     assert (
-        "OBR EFO March 2026 (detailed forecast tables, economy, Table 1.16)" in content
+        "OBR EFO March 2026 (detailed forecast tables, economy, Table 1.16)"
+        in content
+    )
+    assert (
+        "https://obr.uk/efo/economic-and-fiscal-outlook-march-2026/" in content
     )
-    assert "https://obr.uk/efo/economic-and-fiscal-outlook-march-2026/" in content
 
 
 def test_update_yoy_growth_yaml_keeps_existing_values_when_obr_has_blank_years(
     tmp_path,
 ):
     yaml_path = tmp_path / "yoy_growth.yaml"
-    yaml_path.write_text(
-        """obr:
+    yaml_path.write_text("""obr:
   mortgage_interest:
     values:
       2025-01-01: 0.0000
@@ -303,8 +322,7 @@ def test_update_yoy_growth_yaml_keeps_existing_values_when_obr_has_blank_years(
       reference:
         - title: OBR EFO November 2025 (detailed forecast tables, economy, Table 1.7)
           href: https://obr.uk/efo/economic-and-fiscal-outlook-november-2025/
-"""
-    )
+""")
 
     update_yoy_growth_yaml(
         yaml_path=yaml_path,
diff --git a/policyengine_uk/utils/import_obr_forecasts.py b/policyengine_uk/utils/import_obr_forecasts.py
@@ -126,7 +126,9 @@ def read_url_bytes(url: str) -> bytes:
         return response.read()
 
 
-def load_source_bytes(url: str | None, file_path: str | None) -> tuple[str, bytes]:
+def load_source_bytes(
+    url: str | None, file_path: str | None
+) -> tuple[str, bytes]:
     if bool(url) == bool(file_path):
         raise ValueError("Pass exactly one of --url or --file")
 
@@ -155,10 +157,13 @@ def extract_economy_workbook_bytes(
         candidates = [
             name
             for name in archive.namelist()
-            if name.lower().endswith(".xlsx") and "economy" in Path(name).name.lower()
+            if name.lower().endswith(".xlsx")
+            and "economy" in Path(name).name.lower()
         ]
         if not candidates:
-            raise ValueError("Could not find an economy workbook in the source")
+            raise ValueError(
+                "Could not find an economy workbook in the source"
+            )
         candidates.sort()
         workbook_name = candidates[0]
         return Path(workbook_name).name, archive.read(workbook_name)
@@ -171,7 +176,9 @@ def _load_shared_strings(archive: ZipFile) -> list[str]:
     root = ET.fromstring(archive.read("xl/sharedStrings.xml"))
     values: list[str] = []
     for item in root.findall("main:si", WORKBOOK_NS):
-        parts = [node.text or "" for node in item.iterfind(".//main:t", WORKBOOK_NS)]
+        parts = [
+            node.text or "" for node in item.iterfind(".//main:t", WORKBOOK_NS)
+        ]
         values.append("".join(parts))
     return values
 
@@ -208,7 +215,9 @@ def _cell_value(cell: ET.Element, shared_strings: list[str]) -> str | None:
     return None
 
 
-def read_sheet_rows(xlsx_bytes: bytes, sheet_name: str) -> list[dict[str, str | None]]:
+def read_sheet_rows(
+    xlsx_bytes: bytes, sheet_name: str
+) -> list[dict[str, str | None]]:
     with ZipFile(BytesIO(xlsx_bytes)) as archive:
         shared_strings = _load_shared_strings(archive)
         sheet_path = _sheet_paths(archive)[sheet_name]
@@ -225,26 +234,38 @@ def read_sheet_rows(xlsx_bytes: bytes, sheet_name: str) -> list[dict[str, str |
     return rows
 
 
-def find_series_column(rows: list[dict[str, str | None]], spec: SeriesSpec) -> str:
+def find_series_column(
+    rows: list[dict[str, str | None]], spec: SeriesSpec
+) -> str:
     headers: dict[str, str] = {}
     for row in rows[:4]:
         for column, value in row.items():
             label = normalise_label(value)
-            if label and (column not in headers or is_generic_header(headers[column])):
+            if label and (
+                column not in headers or is_generic_header(headers[column])
+            ):
                 headers[column] = label
 
     header = headers.get(spec.column, "")
     if spec.mode == "exact" and header in spec.needles:
         return spec.column
-    if spec.mode == "contains" and any(needle in header for needle in spec.needles):
+    if spec.mode == "contains" and any(
+        needle in header for needle in spec.needles
+    ):
         return spec.column
-    if spec.mode == "contains_all" and all(needle in header for needle in spec.needles):
+    if spec.mode == "contains_all" and all(
+        needle in header for needle in spec.needles
+    ):
         return spec.column
 
-    raise ValueError(f"Could not find a column for {spec.key} in sheet {spec.sheet}")
+    raise ValueError(
+        f"Could not find a column for {spec.key} in sheet {spec.sheet}"
+    )
 
 
-def extract_annual_series_from_xlsx(xlsx_bytes: bytes) -> dict[str, dict[int, float]]:
+def extract_annual_series_from_xlsx(
+    xlsx_bytes: bytes,
+) -> dict[str, dict[int, float]]:
     rows_by_sheet = {
         sheet: read_sheet_rows(xlsx_bytes, sheet)
         for sheet in {spec.sheet for spec in SERIES_SPECS}
@@ -307,10 +328,14 @@ def replace_year_value(section: str, year: int, value: float) -> str:
 
 
 def replace_first_reference(section: str, title: str, href: str) -> str:
-    title_pattern = re.compile(r"(^        - title:\s*).*$", flags=re.MULTILINE)
+    title_pattern = re.compile(
+        r"(^        - title:\s*).*$", flags=re.MULTILINE
+    )
     href_pattern = re.compile(r"(^          href:\s*).*$", flags=re.MULTILINE)
 
-    updated, title_count = title_pattern.subn(rf"\g<1>{title}", section, count=1)
+    updated, title_count = title_pattern.subn(
+        rf"\g<1>{title}", section, count=1
+    )
     if title_count == 0:
         raise ValueError("Could not find reference title in section")
 
@@ -320,7 +345,9 @@ def replace_first_reference(section: str, title: str, href: str) -> str:
     return updated
 
 
-def replace_series_section(content: str, series_key: str, updated_section: str) -> str:
+def replace_series_section(
+    content: str, series_key: str, updated_section: str
+) -> str:
     pattern = re.compile(
         rf"(^  {series_key}:\n.*?)(?=^  [a-z_]+:|\Z)",
         flags=re.MULTILINE | re.DOTALL,
@@ -356,7 +383,9 @@ def update_yoy_growth_yaml(
 
         available_years = [
             target_year
-            for target_year in range(forecast_start_year, forecast_end_year + 1)
+            for target_year in range(
+                forecast_start_year, forecast_end_year + 1
+            )
             if target_year in series_values[spec.key]
         ]
         if not available_years:
@@ -458,12 +487,14 @@ def main(argv: list[str] | None = None) -> int:
         month = args.release_month.capitalize()
         year = args.release_year
     elif args.release_month or args.release_year:
-        raise ValueError("Pass both --release-month and --release-year together")
+        raise ValueError(
+            "Pass both --release-month and --release-year together"
+        )
     else:
         month, year = infer_release(f"{source_name} {workbook_name}")
 
-    forecast_start_year = args.forecast_start_year or infer_forecast_start_year(
-        month, year
+    forecast_start_year = (
+        args.forecast_start_year or infer_forecast_start_year(month, year)
     )
     print_summary(series_values, forecast_start_year, args.forecast_years)