Health-Informatics-UoN · AndyRae · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026
@@ -17,5 +17,5 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run Ruff
-        uses: astral-sh/ruff-action@39f75e526a505e26a302f8796977b50c13720edf # v3.2.1
+        uses: astral-sh/ruff-action@4919ec5cf1f49eff0871dbcea0da843445b837e6 # v3.6.1
 
@@ -0,0 +1,7 @@
+{
+  "recommendations": [
+    "astral-sh.ty",
+    "ms-python.python",
+    "charliermarsh.ruff"
+  ]
+}
@@ -42,7 +42,12 @@ CI runs the full test suite; ensure `uv run pytest` passes locally before openin
 
 ## Commits and PRs
 
-- Use **Angular-style semantic commit messages** (e.g. `feat: add X`, `fix: handle Y`, `docs: update Z`). CI checks this.
+- Use **Angular-style semantic PR title messages** (e.g. `feat: add X`, `fix: handle Y`, `docs: update Z`). CI checks this.
 - Open a PR against the default branch. A code owner will review and approve before merge.
 
+## Releases
+
+Releases are made automatically through `semantic-release` and depend on PR titles.
+
 Thank you for contributing.
+
@@ -5,12 +5,10 @@
 with hardcoded test inputs.
 """
 
-from typing import Any, Dict
-
 from nuh_helper import shift_excel_dates
 
 
-def main():
+def main() -> None:
     """
     Main entry point with hardcoded inputs for dev/testing.
     """
@@ -22,7 +20,7 @@ def main():
     # Currently based on test.xlsx structure:
     # - 'patients' sheet: patient_id, gender, dob (date column)
     # - 'labs' sheet: patient_id, test_date (date column), result
-    sheet_configs: Dict[str, Dict[str, Any]] = {
+    sheet_configs: dict[str, dict[str, str | list[str] | int]] = {
         "patients": {
             "patient_id_col": "patient_id",
             "date_columns": ["dob", "date_of_diagnosis"],

@@ -8,16 +8,16 @@
 import random
 from datetime import date, datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, cast
+from typing import Any, cast
 
 import pandas as pd
 
 
 def generate_shift_mappings(
-    patient_ids: List[str],
+    patient_ids: list[str],
     min_shift_days: int = -15,
     max_shift_days: int = 15,
-    seed: Optional[int] = None,
+    seed: int | None = None,
 ) -> pd.DataFrame:
     """
     Generate random shift mappings for patient IDs.
@@ -55,14 +55,17 @@ def load_shift_mappings(csv_path: str) -> pd.DataFrame:
     return df
 
 
-def _parse_date_value(value: Any) -> Optional[pd.Timestamp]:
+def _parse_date_value(
+    value: str | float | int | datetime | date | pd.Timestamp | None,
+) -> pd.Timestamp | None:
     """Parse a value into a pandas Timestamp if possible."""
     if value is None or (isinstance(value, float) and pd.isna(value)):
         return None
 
     # Already datetime-like
     if isinstance(value, (pd.Timestamp, datetime, date)):
-        return pd.to_datetime(value, errors="coerce")
+        result = pd.to_datetime(value, errors="coerce")
+        return result if pd.notna(result) else None
 
     if isinstance(value, str):
         v = value.strip()
@@ -79,16 +82,14 @@ def _parse_date_value(value: Any) -> Optional[pd.Timestamp]:
                 pass
 
         # Fallback: let pandas try with dayfirst to handle ambiguous strings
-        parsed = pd.to_datetime(
-            v, errors="coerce", dayfirst=True, infer_datetime_format=True
-        )
+        parsed = pd.to_datetime(v, errors="coerce", dayfirst=True)
         return parsed if pd.notna(parsed) else None
 
     # Anything else: no parse
     return None
 
 
-def _normalize_patient_id(value: Any) -> Optional[str]:
+def _normalize_patient_id(value: str | float | int | None) -> str | None:
     """Normalize patient IDs by stripping whitespace and converting to string."""
     if value is None or (isinstance(value, float) and pd.isna(value)):
         return None
@@ -104,15 +105,15 @@ def _normalize_patient_id(value: Any) -> Optional[str]:
 def apply_date_shifts(
     df: pd.DataFrame,
     patient_id_col: str,
-    date_columns: List[str],
+    date_columns: list[str],
     shift_mappings: pd.DataFrame,
-    date_format: Optional[str] = None,
+    date_format: str | None = None,
 ) -> pd.DataFrame:
     """
     Apply date shifts to specified columns in a DataFrame.
 
     Args:
-        df: DataFrame containing patient data.
+        df: pd.DataFrame containing patient data.
         patient_id_col: Name of the column containing patient IDs.
         date_columns: List of column names containing dates to shift.
         shift_mappings: DataFrame with 'patient_id' and 'shift_days' columns.
@@ -128,22 +129,28 @@ def apply_date_shifts(
     # Normalize patient IDs in the working DataFrame to align with mapping keys
     df[patient_id_col] = df[patient_id_col].apply(_normalize_patient_id)
 
-    shift_dict = dict(zip(shift_mappings["patient_id"], shift_mappings["shift_days"]))
+    shift_dict = dict(
+        zip(
+            shift_mappings["patient_id"],
+            shift_mappings["shift_days"],
+            strict=True,
+        )
+    )
 
     for date_col in date_columns:
         if date_col not in df.columns:
             continue
 
-        # Parse flexible date strings (handles YYYY-DD-MM and placeholders like "Unknown")
+        # Parse flexible date strings (handles YYYY-DD-MM and placeholders "Unknown")
         df[date_col] = df[date_col].apply(_parse_date_value)
 
         # Apply shifts
         df[date_col] = df.apply(
             lambda row: (
-                row[date_col]
+                row[date_col]  # noqa: B023
                 + pd.Timedelta(days=shift_dict.get(row[patient_id_col], 0))
-                if row[date_col] is not None and row[patient_id_col] in shift_dict
-                else row[date_col]
+                if row[date_col] is not None and row[patient_id_col] in shift_dict  # noqa: B023
+                else row[date_col]  # noqa: B023
             ),
             axis=1,
         )
@@ -163,15 +170,15 @@ def shift_excel_dates(
     output_file: str,
     patient_sheet: str,
     patient_id_col: str,
-    sheet_configs: Dict[str, Dict[str, Any]],
+    sheet_configs: dict[str, dict[str, Any]],
     min_shift_days: int = -15,
     max_shift_days: int = 15,
-    linking_table_path: Optional[str] = None,
-    linking_table_output: Optional[str] = None,
-    seed: Optional[int] = None,
+    linking_table_path: str | None = None,
+    linking_table_output: str | None = None,
+    seed: int | None = None,
     patient_header_row: int = 0,
-    patient_skip_rows: Optional[List[int]] = None,
-    date_format: Optional[str] = None,
+    patient_skip_rows: list[int] | None = None,
+    date_format: str | None = None,
 ) -> None:
     """
     Shift dates in an Excel file for patient IDs consistently across sheets.
@@ -198,13 +205,13 @@ def shift_excel_dates(
         date_format: Optional Excel date format string (e.g., 'YYYY-MM-DD', 'yyyy-mm-dd').
                      If None, Excel's default date format is used.
                      Common formats: 'YYYY-MM-DD', 'MM/DD/YYYY', 'DD-MM-YYYY', etc.
-    """
+    """  # noqa: E501
 
     def _read_sheet_with_structure(
         excel_file: pd.ExcelFile,
         sheet_name: str,
         header_row: int = 0,
-    ) -> Tuple[pd.DataFrame, pd.DataFrame, List[List[Any]]]:
+    ) -> tuple[pd.DataFrame, pd.DataFrame, list[list[Any]]]:
         """
         Read a sheet preserving description rows and structure.
 
@@ -219,7 +226,7 @@ def _read_sheet_with_structure(
 
         if header_row == 0:
             # No description rows, header is first row
-            description_rows: List[List[Any]] = []
+            description_rows: list[list[Any]] = []
             description_df = pd.DataFrame()
             # Use first row as header
             data_df = pd.read_excel(excel_file, sheet_name=sheet_name, header=0)
@@ -239,10 +246,10 @@ def _write_sheet_with_structure(
         writer: pd.ExcelWriter,
         sheet_name: str,
         data_df: pd.DataFrame,
-        description_rows: List[List[Any]],
+        description_rows: list[list[Any]],
         header_row: int,
-        date_columns: Optional[List[str]] = None,
-        date_format: Optional[str] = None,
+        date_columns: list[str] | None = None,
+        date_format: str | None = None,
     ) -> None:
         """
         Write a sheet preserving description rows and structure.
@@ -372,13 +379,13 @@ def _write_sheet_with_structure(
             header_row = default_header_row
 
             # Track date columns for formatting
-            sheet_date_columns: Optional[List[str]] = None
+            sheet_date_columns: list[str] | None = None
 
             # Check if this sheet needs date shifting
             if sheet_name in sheet_configs:
                 config = sheet_configs[cast(str, sheet_name)]
                 sheet_patient_id_col: str = cast(str, config["patient_id_col"])
-                date_columns: List[str] = cast(List[str], config["date_columns"])
+                date_columns: list[str] = cast(list[str], config["date_columns"])
                 header_row = cast(int, config.get("header_row", header_row))
                 sheet_date_columns = date_columns
 
@@ -392,7 +399,7 @@ def _write_sheet_with_structure(
             if sheet_name in sheet_configs:
                 if sheet_patient_id_col not in df.columns:
                     raise ValueError(
-                        f"Patient ID column '{sheet_patient_id_col}' not found in sheet '{sheet_name}'"
+                        f"Patient ID column '{sheet_patient_id_col}' not found in sheet '{sheet_name}'"  # noqa: E501
                     )
 
                 df = apply_date_shifts(

@@ -1,8 +1,9 @@
 import csv
-from pathlib import Path
 from collections import Counter, defaultdict
-from openpyxl import Workbook
 from datetime import datetime
+from pathlib import Path
+
+from openpyxl import Workbook
 
 SCAN_REPORT_FILE_NAME = "ScanReport.xlsx"
 
@@ -25,7 +26,7 @@
 ]
 
 
-def index_table_names(table_names):
+def index_table_names(table_names: list[str]) -> dict[str, str]:
     indexed = {}
     counts = defaultdict(int)
 
@@ -36,13 +37,15 @@ def index_table_names(table_names):
     return indexed
 
 
-def read_csv_header(csv_path):
+def read_csv_header(csv_path: str) -> list[str]:
     with open(csv_path, newline="", encoding="utf-8") as f:
         reader = csv.reader(f)
         return next(reader)
 
 
-def scan_csv_values(csv_path, min_cell_count):
+def scan_csv_values(
+    csv_path: str, min_cell_count: int
+) -> tuple[dict[str, list[tuple[str, int]]], int]:
     counters = defaultdict(Counter)
     row_count = 0
 
@@ -64,20 +67,25 @@ def scan_csv_values(csv_path, min_cell_count):
 
 
 def generate_scan_report(
-    csv_files, output_path=SCAN_REPORT_FILE_NAME, min_cell_count=1
-):
+    csv_files: list[str],
+    output_path: str = SCAN_REPORT_FILE_NAME,
+    min_cell_count: int = 1,
+) -> str:
     tables = []
 
     for csv_file in csv_files:
         csv_file = Path(csv_file)
-        header = read_csv_header(csv_file)
-        tables.append({"name": csv_file.name, "path": csv_file, "fields": header})
+        header = read_csv_header(csv_file.as_posix())
+        tables.append(
+            {"name": csv_file.name, "path": csv_file.as_posix(), "fields": header}
+        )
 
     tables.sort(key=lambda t: t["name"])
     indexed_names = index_table_names([t["name"] for t in tables])
 
     wb = Workbook()
-    wb.remove(wb.active)
+    if wb.active:
+        wb.remove(wb.active)
 
     # FIELD_OVERVIEW
     field_sheet = wb.create_sheet("Field Overview")

@@ -1,7 +1,7 @@
 [project]
 name = "nuh-helper"
 dynamic = ["version"]
-description = "Helper functions for enabling studies"
+description = "Helper library for enabling studies"
 readme = "README.md"
 requires-python = ">=3.13"
 
@@ -34,3 +34,13 @@ dev = [
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
+
+[tool.ruff.lint]
+select = ["E", "F", "ANN", "B", "UP", "I", "SIM", "C4"]
+
+[tool.coverage.run]
+branch = true
+
+[tool.pytest.ini_options]
+addopts = "-ra"
+testpaths = ["tests"]