load_inspection_annotations() is completely non-functional (#86)

ATATC · web-flow · commit 80038dfccb5f · 2025-11-04T20:05:45.000-05:00
* Refactored `InspectionAnnotations` save/load methods to streamline CSV handling. (#81) * Enhanced `load_inspection_annotations` to support `background` parameter and improved row parsing with `literal_eval()`. (#81) * Refactored row parsing in `load_inspection_annotations()` with `literal_eval()`. (#81) * Enhanced `inspect()` with progress tracking using `rich.progress` and added `console` parameter for customizable output. (#81) * Added `console` parameter to `Progress` instances in `training.py` for consistent output handling. (#81) * Enhanced progress display in `inspect()` by adding `SpinnerColumn` for improved visual feedback with `rich.progress`. (#81) * Refactored `InspectionAnnotations` save/load methods to use JSON format instead of CSV for correct serialization. Added `_lists_to_tuples` helper for parsing JSON objects. (#81) * Updated `save` and `load_inspection_annotations` methods in `InspectionAnnotations` to include `background` in serialization and deserialization logic. (#81)
diff --git a/mipcandy/data/inspection.py b/mipcandy/data/inspection.py
@@ -1,10 +1,12 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, asdict
+from json import dump, load
 from os import PathLike
-from typing import Sequence, override, Callable, Self
+from typing import Sequence, override, Callable, Self, Any
 
 import numpy as np
 import torch
-from pandas import DataFrame
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn
 from torch import nn
 
 from mipcandy.data.dataset import SupervisedDataset
@@ -37,6 +39,9 @@ def center_of_foreground(self) -> tuple[int, int] | tuple[int, int, int]:
              round((self.foreground_bbox[3] + self.foreground_bbox[2]) * .5))
         return r if len(self.shape) == 2 else r + (round((self.foreground_bbox[5] + self.foreground_bbox[4]) * .5),)
 
+    def to_dict(self) -> dict[str, tuple[int, ...]]:
+        return asdict(self)
+
 
 class InspectionAnnotations(HasDevice, Sequence[InspectionAnnotation]):
     def __init__(self, dataset: SupervisedDataset, background: int, *annotations: InspectionAnnotation,
@@ -65,10 +70,8 @@ def __len__(self) -> int:
         return len(self._annotations)
 
     def save(self, path: str | PathLike[str]) -> None:
-        r = []
-        for annotation in self._annotations:
-            r.append({"foreground_bbox": annotation.foreground_bbox, "ids": annotation.ids})
-        DataFrame(r).to_csv(path, index=False)
+        with open(path, "w") as f:
+            dump({"background": self._background, "annotations": self._annotations}, f)
 
     def _get_shapes(self, get_shape: Callable[[InspectionAnnotation], tuple[int, ...]]) -> tuple[
         tuple[int, ...] | None, tuple[int, ...], tuple[int, ...]]:
@@ -208,27 +211,31 @@ def crop_roi(self, i: int, *, percentile: float = .95) -> tuple[torch.Tensor, to
         return crop(image.unsqueeze(0), roi).squeeze(0), crop(label.unsqueeze(0), roi).squeeze(0)
 
 
-def load_inspection_annotations(path: str | PathLike[str]) -> InspectionAnnotations:
-    df = DataFrame.from_csv(path)
-    return InspectionAnnotations(*(
-        InspectionAnnotation(
-            tuple(row["shape"]), format_bbox(row["foreground_bbox"]), tuple(row["ids"])
-        ) for _, row in df.iterrows()
+def _lists_to_tuples(pairs: Sequence[tuple[str, Any]]) -> dict[str, Any]:
+    return {k: tuple(v) if isinstance(v, list) else v for k, v in pairs}
+
+
+def load_inspection_annotations(path: str | PathLike[str], dataset: SupervisedDataset) -> InspectionAnnotations:
+    with open(path) as f:
+        obj = load(f, object_pairs_hook=_lists_to_tuples)
+    return InspectionAnnotations(dataset, obj["background"], *(
+        InspectionAnnotation(**row) for row in obj["annotations"]
     ))
 
 
-def inspect(dataset: SupervisedDataset, *, background: int = 0) -> InspectionAnnotations:
+def inspect(dataset: SupervisedDataset, *, background: int = 0, console: Console = Console()) -> InspectionAnnotations:
     r = []
-    for _, label in dataset:
-        indices = (label != background).nonzero()
-        mins = indices.min(dim=0)[0].tolist()
-        maxs = indices.max(dim=0)[0].tolist()
-        bbox = (mins[1], maxs[1], mins[2], maxs[2])
-        r.append(InspectionAnnotation(
-            label.shape[1:],
-            bbox if label.ndim == 3 else bbox + (mins[3], maxs[3]),
-            tuple(label.unique())
-        ))
+    with Progress(*Progress.get_default_columns(), SpinnerColumn(), console=console) as progress:
+        task = progress.add_task("Inspecting dataset...", total=len(dataset))
+        for _, label in dataset:
+            progress.update(task, advance=1, description=f"Inspecting dataset {tuple(label.shape)}")
+            indices = (label != background).nonzero()
+            mins = indices.min(dim=0)[0].tolist()
+            maxs = indices.max(dim=0)[0].tolist()
+            bbox = (mins[1], maxs[1], mins[2], maxs[2])
+            r.append(InspectionAnnotation(
+                label.shape[1:], bbox if label.ndim == 3 else bbox + (mins[3], maxs[3]), tuple(label.unique())
+            ))
     return InspectionAnnotations(dataset, background, *r, device=dataset.device())
 
 
diff --git a/mipcandy/training.py b/mipcandy/training.py
@@ -306,7 +306,7 @@ def train_epoch(self, epoch: int, toolbox: TrainerToolbox) -> None:
         toolbox.model.train()
         if toolbox.ema:
             toolbox.ema.train()
-        with Progress(*Progress.get_default_columns(), SpinnerColumn()) as progress:
+        with Progress(*Progress.get_default_columns(), SpinnerColumn(), console=self._console) as progress:
             epoch_prog = progress.add_task(f"Epoch {epoch}", total=len(self._dataloader))
             for images, labels in self._dataloader:
                 images, labels = images.to(self._device), labels.to(self._device)
@@ -439,7 +439,7 @@ def validate(self, toolbox: TrainerToolbox) -> tuple[float, dict[str, list[float
         worst_score = float("+inf")
         metrics = {}
         num_cases = len(self._validation_dataloader)
-        with Progress(*Progress.get_default_columns(), SpinnerColumn()) as progress:
+        with Progress(*Progress.get_default_columns(), SpinnerColumn(), console=self._console) as progress:
             val_prog = progress.add_task(f"Validating", total=num_cases)
             for image, label in self._validation_dataloader:
                 image, label = image.to(self._device), label.to(self._device)