Fix ABISS batch mode, remove silent fallback, skip model build on cache hit

Donglai Wei · claude · Donglai Wei · commit afa5ca04d355 · 2026-03-14T21:52:37.000-04:00
- ABISS batch merge-threshold: restore proper single-invocation mode (C++ binary
  already supports argv[8..N] batch thresholds natively), improve error message
- Remove silent _fallback_decode_connected_components that masked ABISS errors
  and produced garbage segmentations (0.918 vs 0.07 adapted_rand)
- Skip expensive model build + checkpoint load when cached _tta_prediction.h5
  files exist for all test/tune volumes (nn.Identity lightweight module)
- Add _skip_inference guard in test_pipeline to error early if cache miss
  occurs with a dummy model instead of running inference
- Fix on_test_end → on_test_epoch_end for Lightning self.log() compatibility

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/connectomics/decoding/decoders/abiss.py b/connectomics/decoding/decoders/abiss.py
@@ -153,31 +153,6 @@ def _load_output(output_h5: Path, output_npy: Path, output_dataset: str) -> np.n
     return cast2dtype(seg)
 
 
-def _command_uses_run_abiss_single(cmd: str | Sequence[str]) -> bool:
-    if isinstance(cmd, str):
-        try:
-            tokens = shlex.split(cmd)
-        except ValueError:
-            return "run_abiss_single.py" in cmd
-        return any("run_abiss_single.py" in token for token in tokens)
-    return any("run_abiss_single.py" in str(token) for token in cmd)
-
-
-def _fallback_decode_connected_components(pred: np.ndarray) -> np.ndarray:
-    """Lightweight fallback when ABISS executable dependencies are unavailable."""
-    from scipy import ndimage as ndi
-
-    if pred.ndim == 4:
-        if pred.shape[0] == 1:
-            foreground = pred[0] > 0.5
-        else:
-            foreground = np.max(pred, axis=0) > 0.5
-    else:
-        foreground = pred > 0.5
-
-    labeled, _ = ndi.label(foreground.astype(np.uint8, copy=False))
-    return cast2dtype(labeled.astype(np.uint64, copy=False))
-
 
 def decode_abiss(
     predictions: np.ndarray,
@@ -305,22 +280,14 @@ def decode_abiss(
         if env:
             proc_env.update({str(k): str(v) for k, v in env.items()})
 
-        try:
-            subprocess.run(
-                cmd,
-                shell=use_shell,
-                env=proc_env,
-                cwd=str(workspace_path),
-                check=check,
-                timeout=timeout_sec,
-            )
-        except subprocess.CalledProcessError:
-            if _command_uses_run_abiss_single(cmd):
-                if batch_mt:
-                    seg = _fallback_decode_connected_components(pred)
-                    return {round(mt, 10): seg for mt in batch_mt}
-                return _fallback_decode_connected_components(pred)
-            raise
+        subprocess.run(
+            cmd,
+            shell=use_shell,
+            env=proc_env,
+            cwd=str(workspace_path),
+            check=check,
+            timeout=timeout_sec,
+        )
 
         # Batch mode: read multiple output files written by run_abiss_single.
         if batch_mt:
diff --git a/connectomics/training/lightning/model.py b/connectomics/training/lightning/model.py
@@ -767,7 +767,7 @@ def on_test_start(self):
                 "and letting MONAI move window batches to the configured sw_device."
             )
 
-    def on_test_end(self) -> None:
+    def on_test_epoch_end(self) -> None:
         """Log aggregated test metrics after all ranks finish their assigned volumes."""
         log_test_epoch_metrics(self)
 
diff --git a/connectomics/training/lightning/test_pipeline.py b/connectomics/training/lightning/test_pipeline.py
@@ -921,6 +921,16 @@ def run_test_step(module, batch: Dict[str, torch.Tensor], batch_idx: int) -> STE
         return torch.tensor(0.0, device=module.device)
 
     logger.info("No cached predictions found, running inference")
+
+    # If the model is a lightweight dummy (e.g. nn.Identity), inference would
+    # produce garbage.  Error out early instead of crashing later in TTA.
+    if getattr(module, "_skip_inference", False):
+        raise RuntimeError(
+            "Cached predictions expected but not found for this volume. "
+            "Cannot run inference with a lightweight (dummy) model. "
+            "Re-run with the real model checkpoint to generate predictions first."
+        )
+
     _log_volume_header(volume_name, "INFERENCE PLAN")
     logger.info(f"Input shape:       {tuple(images.shape)}")
     logger.info(f"Input device:      {images.device}")
diff --git a/scripts/main.py b/scripts/main.py
@@ -260,6 +260,41 @@ def _resolve_tta_result_path_override(cfg: Config) -> str:
     return ""
 
 
+def _has_tta_prediction_file(cfg: Config) -> bool:
+    """Return True if an explicit tta_result_path exists and is a valid HDF5 file."""
+    tta_path = _resolve_tta_result_path_override(cfg)
+    if not tta_path:
+        return False
+    pred_file = Path(tta_path).expanduser()
+    if not pred_file.is_absolute():
+        pred_file = Path.cwd() / pred_file
+    return pred_file.exists() and _is_valid_hdf5_prediction_file(pred_file)
+
+
+def _has_cached_predictions_in_output_dir(cfg: Config, mode: str) -> bool:
+    """Return True if all expected _tta_prediction.h5 files exist in the output directory."""
+    save_pred_cfg = getattr(cfg.inference, "save_prediction", None)
+    if save_pred_cfg is None:
+        return False
+    output_dir = getattr(save_pred_cfg, "output_path", None)
+    if not output_dir:
+        return False
+
+    # Resolve test/tune image paths to derive expected prediction filenames.
+    test_image_paths = resolve_test_image_paths(cfg)
+    if not test_image_paths:
+        return False
+
+    output_path = Path(output_dir)
+    for image_path in test_image_paths:
+        pred_file = output_path / f"{Path(image_path).stem}_tta_prediction.h5"
+        if not pred_file.exists():
+            return False
+        if not _is_valid_hdf5_prediction_file(pred_file):
+            return False
+    return True
+
+
 def preflight_test_cache_hit(cfg: Config, datamodule) -> tuple[bool, str | None, int]:
     """Check if test outputs already exist so inference (and ckpt restore) can be skipped."""
     save_pred_cfg = getattr(cfg.inference, "save_prediction", None)
@@ -868,23 +903,35 @@ def main():
     if try_cache_only_test_execution(cfg, args.mode, args.shard_id, args.num_shards):
         return
 
-    # Create model
-    print(f"Creating model: {cfg.model.arch.type}")
-    model = ConnectomicsModule(cfg)
-
-    # Count parameters
-    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
-    print(f"  Model parameters: {num_params:,}")
+    # Check for cached intermediate predictions early so we can skip both the
+    # expensive model build and checkpoint restore for test/tune modes.
+    tta_cached = args.mode in ("test", "tune", "tune-test") and (
+        _has_tta_prediction_file(cfg)
+        or _has_cached_predictions_in_output_dir(cfg, mode=args.mode)
+    )
 
-    # Don't use checkpoint path if external weights were loaded (already in model state)
-    # External weights are loaded during config setup via model.external_weights_path
-    if args.external_prefix:
+    # Create model
+    if tta_cached:
+        print(
+            f"  Cached intermediate predictions found; "
+            f"creating lightweight module (skipping {cfg.model.arch.type} build)."
+        )
+        model = ConnectomicsModule(cfg, model=torch.nn.Identity())
+        model._skip_inference = True
+        ckpt_path = None
+    elif args.external_prefix:
+        print(f"Creating model: {cfg.model.arch.type}")
+        model = ConnectomicsModule(cfg)
         print(
             "   WARNING: External weights loaded - checkpoint path will not "
             "be used for training/testing"
         )
         ckpt_path = None
     else:
+        print(f"Creating model: {cfg.model.arch.type}")
+        model = ConnectomicsModule(cfg)
+        num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        print(f"  Model parameters: {num_params:,}")
         ckpt_path = modify_checkpoint_state(
             args.checkpoint,
             run_dir,
diff --git a/scripts/run_abiss_single.py b/scripts/run_abiss_single.py
@@ -304,9 +304,10 @@ def _run_abiss_ws(
             _ABISS_TAG,
         ]
 
-        # Batch mode: multiple merge thresholds in one run (argv[8..N]).
+        # Batch mode: pass multiple merge thresholds as argv[8..N].
         # The C++ binary computes watershed + region graph once, then
-        # repeats the merge step for each threshold.
+        # deep-copies and repeats the merge step for each threshold,
+        # writing indexed output files (seg_{TAG}_{i}.data).
         use_batch = ws_merge_thresholds is not None and len(ws_merge_thresholds) > 1
         if use_batch:
             for mt in ws_merge_thresholds:
@@ -322,7 +323,8 @@ def _run_abiss_ws(
                 seg_file = ws_dir / f"seg_{_ABISS_TAG}_{i}.data"
                 if not seg_file.exists():
                     raise FileNotFoundError(
-                        f"ABISS watershed did not produce expected output: {seg_file}"
+                        f"ABISS batch mode did not produce expected output: {seg_file}. "
+                        f"Ensure the ws binary at {ws_binary} supports multi-threshold mode."
                     )
                 seg_xyz = _read_segmentation_xyz(seg_file, output_xyz_shape, halo=1)
                 results[round(mt, 10)] = np.transpose(seg_xyz, (2, 1, 0))

Original file line number	Diff line number	Diff line change
`@@ -767,7 +767,7 @@ def on_test_start(self):`
`767`	`767`	`"and letting MONAI move window batches to the configured sw_device."`
`768`	`768`	`)`
`769`	`769`
`770`		`- def on_test_end(self) -> None:`
	`770`	`+ def on_test_epoch_end(self) -> None:`
`771`	`771`	`"""Log aggregated test metrics after all ranks finish their assigned volumes."""`
`772`	`772`	`log_test_epoch_metrics(self)`
`773`	`773`