Encode TTA pass count in prediction cache filename, remove tuner affinity crop

Donglai Wei · claude · Donglai Wei · commit a59936a058da · 2026-03-14T22:57:20.000-04:00
- Rename _tta_prediction.h5 → _tta_x{N}_prediction.h5 where N is the
  number of TTA passes (e.g. _tta_x1_prediction.h5 when TTA is disabled)
- Add compute_tta_passes(), tta_cache_suffix(), is_tta_cache_suffix() to
  connectomics.training.lightning.utils
- Remove _maybe_crop_affinity_array from tuner — cached predictions are
  already fully cropped by the test pipeline; add shape validation that
  errors on mismatch instead of silently hiding it
- Replace all hardcoded "_tta_prediction.h5" with dynamic suffix
- Fallback cache lookup uses _tta_x*_prediction.h5 glob pattern

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/connectomics/config/schema/stages.py b/connectomics/config/schema/stages.py
@@ -55,7 +55,7 @@ class TuneOutputConfig:
 
     output_dir: Optional[str] = None
     output_pred: Optional[str] = None
-    cache_suffix: str = "_tta_prediction.h5"
+    cache_suffix: str = "_tta_x1_prediction.h5"
     save_all_trials: bool = False
     save_best_segmentation: bool = True
     save_study: bool = True
diff --git a/connectomics/decoding/tuning/optuna_tuner.py b/connectomics/decoding/tuning/optuna_tuner.py
@@ -37,13 +37,8 @@
 except ImportError:
     OPTUNA_AVAILABLE = False
 
-from connectomics.data.process.affinity import (
-    affinity_deepem_crop_enabled,
-    compute_affinity_crop_pad,
-    crop_spatial_by_pad,
-    resolve_affinity_channel_groups_from_cfg,
-)
 from connectomics.metrics.metrics_seg import adapted_rand
+from connectomics.training.lightning.utils import tta_cache_suffix
 
 from ..registry import get_decoder
 from ..utils import remove_small_instances
@@ -53,26 +48,6 @@
 __all__ = ["OptunaDecodingTuner", "run_tuning", "load_and_apply_best_params"]
 
 
-def _maybe_crop_affinity_array(
-    data: np.ndarray,
-    *,
-    reference_spatial_shape: tuple[int, ...],
-    crop_pad: tuple[tuple[int, int], ...],
-) -> np.ndarray:
-    if not crop_pad:
-        return data
-    expected_cropped_shape = tuple(
-        int(reference_spatial_shape[axis]) - crop_pad[axis][0] - crop_pad[axis][1]
-        for axis in range(len(crop_pad))
-    )
-    data_spatial_shape = tuple(int(v) for v in data.shape[-len(crop_pad) :])
-    if data_spatial_shape == expected_cropped_shape:
-        return data
-    if data_spatial_shape != reference_spatial_shape:
-        return data
-    return crop_spatial_by_pad(data, crop_pad, item_name="tuning array")
-
-
 def _expand_tuning_paths(path_or_pattern: Any, *, field_name: str) -> list[str]:
     """Expand string/list path inputs used by the tuning loader."""
     import glob
@@ -119,9 +94,12 @@ def _temporary_tuning_inference_overrides(*cfg_objects: Any):
     """Force the pre-Optuna inference pass to cache raw predictions only."""
     inference_cfgs = []
     seen_inference_cfgs: set[int] = set()
+    primary_cfg = None
     for cfg_obj in cfg_objects:
         if cfg_obj is None:
             continue
+        if primary_cfg is None:
+            primary_cfg = cfg_obj
         inference_cfg = getattr(cfg_obj, "inference", None)
         if inference_cfg is None or id(inference_cfg) in seen_inference_cfgs:
             continue
@@ -131,6 +109,8 @@ def _temporary_tuning_inference_overrides(*cfg_objects: Any):
     if not inference_cfgs:
         raise ValueError("Missing runtime cfg.inference configuration required for tuning")
 
+    suffix = tta_cache_suffix(primary_cfg) if primary_cfg is not None else "_tta_x1_prediction.h5"
+
     backups = []
     for inference_cfg in inference_cfgs:
         save_prediction_cfg = getattr(inference_cfg, "save_prediction", None)
@@ -156,13 +136,13 @@ def _temporary_tuning_inference_overrides(*cfg_objects: Any):
         )
 
         save_prediction_cfg.enabled = True
-        save_prediction_cfg.cache_suffix = "_tta_prediction.h5"
+        save_prediction_cfg.cache_suffix = suffix
         inference_cfg.decoding = None
         if evaluation_cfg is not None:
             evaluation_cfg.enabled = False
 
     try:
-        yield "_tta_prediction.h5"
+        yield suffix
     finally:
         for backup in backups:
             inference_cfg = backup["inference_cfg"]
@@ -1164,7 +1144,7 @@ def run_tuning(model, trainer, cfg, checkpoint_path=None):
     logger.info("[1/4] Running inference on tuning dataset...")
 
     tune_data = cfg.data
-    cache_suffix = "_tta_prediction.h5"
+    cache_suffix = tta_cache_suffix(cfg)
 
     output_pred_dir = cfg.inference.save_prediction.output_path
     predictions_dir = Path(output_pred_dir)
@@ -1294,45 +1274,19 @@ def run_tuning(model, trainer, cfg, checkpoint_path=None):
             f"Mismatch: {len(all_predictions)} prediction files vs " f"{len(all_masks)} mask files"
         )
 
-    if affinity_deepem_crop_enabled(cfg):
-        groups = resolve_affinity_channel_groups_from_cfg(cfg)
-        all_offsets = []
-        for _, offsets in groups:
-            all_offsets.extend(offsets)
-        crop_pad = compute_affinity_crop_pad(all_offsets)
-        if crop_pad and any(before or after for before, after in crop_pad):
-            cropped_predictions = []
-            cropped_labels = []
-            cropped_masks = [] if all_masks is not None else None
-            for idx, pred in enumerate(all_predictions):
-                reference_spatial_shape = tuple(
-                    int(v) for v in all_labels[idx].shape[-len(crop_pad) :]
-                )
-                cropped_predictions.append(
-                    _maybe_crop_affinity_array(
-                        np.asarray(pred),
-                        reference_spatial_shape=reference_spatial_shape,
-                        crop_pad=crop_pad,
-                    )
-                )
-                cropped_labels.append(
-                    _maybe_crop_affinity_array(
-                        np.asarray(all_labels[idx]),
-                        reference_spatial_shape=reference_spatial_shape,
-                        crop_pad=crop_pad,
-                    )
-                )
-                if cropped_masks is not None:
-                    cropped_masks.append(
-                        _maybe_crop_affinity_array(
-                            np.asarray(all_masks[idx]),
-                            reference_spatial_shape=reference_spatial_shape,
-                            crop_pad=crop_pad,
-                        )
-                    )
-            all_predictions = cropped_predictions
-            all_labels = cropped_labels
-            all_masks = cropped_masks
+    # Validate that prediction and label spatial shapes match.
+    # Cached TTA prediction files are saved after crop_pad + affinity_crop
+    # in the test pipeline, so they should already align with the label volume.
+    for idx, pred in enumerate(all_predictions):
+        pred_spatial = tuple(int(v) for v in pred.shape[-3:])
+        label_spatial = tuple(int(v) for v in all_labels[idx].shape[-3:])
+        if pred_spatial != label_spatial:
+            raise ValueError(
+                f"Prediction/label spatial shape mismatch for volume {idx}: "
+                f"prediction {pred_spatial} vs label {label_spatial}. "
+                f"Cached predictions may be stale — regenerate TTA predictions "
+                f"by re-running inference with the real model checkpoint."
+            )
 
     # Step 4: Create tuner and run optimization (per-volume evaluation)
     logger.info("[4/5] Creating Optuna tuner...")
diff --git a/connectomics/training/lightning/__init__.py b/connectomics/training/lightning/__init__.py
@@ -17,10 +17,13 @@
 from .runtime import cleanup_run_directory, modify_checkpoint_state, setup_run_directory
 from .trainer import create_trainer
 from .utils import (
+    compute_tta_passes,
     extract_best_score_from_checkpoint,
+    is_tta_cache_suffix,
     parse_args,
     setup_config,
     setup_seed_everything,
+    tta_cache_suffix,
 )
 
 __all__ = [
@@ -41,4 +44,7 @@
     "parse_args",
     "setup_config",
     "extract_best_score_from_checkpoint",
+    "compute_tta_passes",
+    "tta_cache_suffix",
+    "is_tta_cache_suffix",
 ]
diff --git a/connectomics/training/lightning/model.py b/connectomics/training/lightning/model.py
@@ -44,6 +44,7 @@
 from ...models import build_model
 from ...models.loss import create_loss, get_loss_metadata_for_module
 from ..debugging import DebugManager
+from .utils import is_tta_cache_suffix, tta_cache_suffix
 
 # Import training/inference components
 from ..loss import LossOrchestrator, build_loss_weighter, infer_num_loss_tasks_from_config
@@ -445,7 +446,7 @@ def _load_cached_predictions(
                             f"{len(filenames)} filenames; decoding will use the explicit file only."
                         )
                     # Treat explicit file as intermediate prediction so decoding still runs.
-                    return pred, True, "_tta_prediction.h5"
+                    return pred, True, tta_cache_suffix(self.cfg)
                 except Exception as e:
                     logger.warning(
                         f"Failed to load explicit inference.tta_result_path file {pred_file}: {e}. "
@@ -467,11 +468,11 @@ def _load_cached_predictions(
 
         for filename in filenames:
             pred_file = output_dir / f"{filename}{cache_suffix}"
-            if not pred_file.exists() and mode == "test" and cache_suffix != "_tta_prediction.h5":
-                tta_pred_file = output_dir / f"{filename}_tta_prediction.h5"
-                if tta_pred_file.exists():
-                    pred_file = tta_pred_file
-                    loaded_suffix = "_tta_prediction.h5"
+            if not pred_file.exists() and mode == "test" and not is_tta_cache_suffix(cache_suffix):
+                tta_matches = sorted(output_dir.glob(f"{filename}_tta_x*_prediction.h5"))
+                if tta_matches:
+                    pred_file = tta_matches[-1]
+                    loaded_suffix = pred_file.name[len(filename):]
 
             if pred_file.exists():
                 try:
diff --git a/connectomics/training/lightning/utils.py b/connectomics/training/lightning/utils.py
@@ -346,10 +346,68 @@ def setup_seed_everything():
     return seed_everything
 
 
+def compute_tta_passes(cfg: Config, spatial_dims: int = 3) -> int:
+    """Return the total number of TTA inference passes from config.
+
+    This determines the multiplier in the cached prediction filename
+    (e.g. ``_tta_x16_prediction.h5``).  When TTA is disabled the count is 1.
+    """
+    inference_cfg = getattr(cfg, "inference", None)
+    if inference_cfg is None:
+        return 1
+    tta_cfg = getattr(inference_cfg, "test_time_augmentation", None)
+    if tta_cfg is None or not bool(getattr(tta_cfg, "enabled", False)):
+        return 1
+
+    flip_axes_cfg = getattr(tta_cfg, "flip_axes", None)
+    rotation90_axes_cfg = getattr(tta_cfg, "rotation90_axes", None)
+
+    def _cfg_len(value):
+        if value is None or isinstance(value, str):
+            return 0
+        try:
+            return len(value)
+        except TypeError:
+            return 0
+
+    if flip_axes_cfg == "all" or flip_axes_cfg == []:
+        flip_variants = 2 ** spatial_dims if spatial_dims > 0 else 1
+    elif flip_axes_cfg is None:
+        flip_variants = 1
+    else:
+        flip_variants = 1 + _cfg_len(flip_axes_cfg)
+
+    if rotation90_axes_cfg == "all":
+        rotation_planes = 3 if spatial_dims == 3 else 1 if spatial_dims == 2 else 0
+    elif rotation90_axes_cfg is None:
+        rotation_planes = 0
+    else:
+        rotation_planes = _cfg_len(rotation90_axes_cfg)
+
+    passes_per_flip = 1 if rotation_planes == 0 else rotation_planes * 4
+    return flip_variants * passes_per_flip
+
+
+def tta_cache_suffix(cfg: Config, spatial_dims: int = 3) -> str:
+    """Return the TTA prediction cache suffix, e.g. ``_tta_x1_prediction.h5``."""
+    n = compute_tta_passes(cfg, spatial_dims=spatial_dims)
+    return f"_tta_x{n}_prediction.h5"
+
+
+def is_tta_cache_suffix(suffix: str | None) -> bool:
+    """Return True for any TTA intermediate prediction suffix (``_tta_x*_prediction.h5``)."""
+    if not suffix:
+        return False
+    return suffix.startswith("_tta_x") and suffix.endswith("_prediction.h5")
+
+
 __all__ = [
     "parse_args",
     "setup_config",
     "expand_file_paths",
     "extract_best_score_from_checkpoint",
     "setup_seed_everything",
+    "compute_tta_passes",
+    "tta_cache_suffix",
+    "is_tta_cache_suffix",
 ]
diff --git a/scripts/main.py b/scripts/main.py
@@ -74,11 +74,13 @@
     cleanup_run_directory,
     create_datamodule,
     create_trainer,
+    is_tta_cache_suffix,
     modify_checkpoint_state,
     parse_args,
     setup_config,
     setup_run_directory,
     setup_seed_everything,
+    tta_cache_suffix,
 )
 
 # Setup seed_everything helper
@@ -211,20 +213,21 @@ def _resolve_cached_prediction_files(
         pred_file = output_dir / f"{filename}{cache_suffix}"
         current_suffix = cache_suffix
 
-        if not pred_file.exists() and cache_suffix != "_tta_prediction.h5":
-            tta_pred_file = output_dir / f"{filename}_tta_prediction.h5"
-            if tta_pred_file.exists():
-                pred_file = tta_pred_file
-                current_suffix = "_tta_prediction.h5"
+        # Fallback: search for any _tta_x*_prediction.h5 if exact suffix not found.
+        if not pred_file.exists() and not is_tta_cache_suffix(cache_suffix):
+            tta_matches = sorted(output_dir.glob(f"{filename}_tta_x*_prediction.h5"))
+            if tta_matches:
+                pred_file = tta_matches[-1]  # latest / highest augmentation count
+                current_suffix = pred_file.name[len(filename):]
 
         if not pred_file.exists():
             return False, None, []
 
         if not _is_valid_hdf5_prediction_file(pred_file):
             return False, None, []
 
-        if current_suffix == "_tta_prediction.h5":
-            loaded_suffix = "_tta_prediction.h5"
+        if is_tta_cache_suffix(current_suffix):
+            loaded_suffix = current_suffix
         resolved_files.append(pred_file)
 
     return True, loaded_suffix, resolved_files
@@ -272,7 +275,7 @@ def _has_tta_prediction_file(cfg: Config) -> bool:
 
 
 def _has_cached_predictions_in_output_dir(cfg: Config, mode: str) -> bool:
-    """Return True if all expected _tta_prediction.h5 files exist in the output directory."""
+    """Return True if all expected TTA prediction files exist in the output directory."""
     save_pred_cfg = getattr(cfg.inference, "save_prediction", None)
     if save_pred_cfg is None:
         return False
@@ -285,9 +288,10 @@ def _has_cached_predictions_in_output_dir(cfg: Config, mode: str) -> bool:
     if not test_image_paths:
         return False
 
+    suffix = tta_cache_suffix(cfg)
     output_path = Path(output_dir)
     for image_path in test_image_paths:
-        pred_file = output_path / f"{Path(image_path).stem}_tta_prediction.h5"
+        pred_file = output_path / f"{Path(image_path).stem}{suffix}"
         if not pred_file.exists():
             return False
         if not _is_valid_hdf5_prediction_file(pred_file):
@@ -309,7 +313,7 @@ def preflight_test_cache_hit(cfg: Config, datamodule) -> tuple[bool, str | None,
 
         # If explicit intermediate prediction exists, skip TTA inference and ckpt restore.
         if pred_file.exists() and _is_valid_hdf5_prediction_file(pred_file):
-            return True, "_tta_prediction.h5", 1
+            return True, tta_cache_suffix(cfg), 1
 
         print(
             "  WARNING: inference.tta_result_path file missing or unreadable "
@@ -691,7 +695,7 @@ def try_cache_only_test_execution(
             )
             return False
 
-    if loaded_suffix != "_tta_prediction.h5":
+    if not is_tta_cache_suffix(loaded_suffix):
         if _is_test_evaluation_enabled(cfg):
             print(
                 "  [OK]Loaded final predictions from disk, skipping "
@@ -766,7 +770,7 @@ def _configure_checkpoint_output_paths(args, cfg: Config) -> tuple[Path | None,
 
         save_pred_cfg = cfg.inference.save_prediction
         save_pred_cfg.output_path = str(output_base / results_folder_name)
-        save_pred_cfg.cache_suffix = "_tta_prediction.h5"
+        save_pred_cfg.cache_suffix = tta_cache_suffix(cfg)
 
         if args.mode == "tune-test":
             print(f"Test output: {save_pred_cfg.output_path}")
@@ -812,7 +816,7 @@ def _handle_test_cache_hit(
     ckpt_path: str | None,
 ) -> tuple[bool, None]:
     """Print cache-hit status and return whether the test loop can be skipped."""
-    if cached_suffix == "_tta_prediction.h5":
+    if is_tta_cache_suffix(cached_suffix):
         print("  [OK]Loaded intermediate predictions from disk, skipping inference")
     else:
         print(
@@ -828,7 +832,7 @@ def _handle_test_cache_hit(
 
     should_skip_test_loop = (
         args.mode == "test"
-        and cached_suffix != "_tta_prediction.h5"
+        and not is_tta_cache_suffix(cached_suffix)
         and not _is_test_evaluation_enabled(cfg)
     )
     if should_skip_test_loop:
@@ -876,7 +880,7 @@ def main():
 
     # Tuning expects cached intermediate predictions by default.
     if args.mode in ["tune", "tune-test"]:
-        cfg.inference.save_prediction.cache_suffix = "_tta_prediction.h5"
+        cfg.inference.save_prediction.cache_suffix = tta_cache_suffix(cfg)
 
     # Run preflight checks for training mode
     if args.mode == "train":
diff --git a/tests/unit/test_optuna_tuner.py b/tests/unit/test_optuna_tuner.py
diff --git a/tutorials/misc/hydra-lv.yaml b/tutorials/misc/hydra-lv.yaml