Optimize SDT: batch skeletonization, precomputed volume caching, new defaults

Donglai Wei · claude · Donglai Wei · commit 318d41e96cb4 · 2026-03-18T12:25:38.000-04:00
- Batch kimimaro.skeletonize() with parallel=0 replaces N serial per-instance calls
- Auto-precompute full-volume SDT and cache to disk (train-labels_sdt.h5)
- Precomputed SDT flows through spatial transforms (crop/flip/rotate) as "sdt" key
- MultiTaskLabelTransformd uses precomputed SDT when available, skipping per-crop computation
- Default smooth=False (saves ~20% overhead), relabel=False (global SDT)
- Use cc3d for connected components instead of skimage.measure.label

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.claude/benchmark/SNEMI.md b/.claude/benchmark/SNEMI.md
@@ -142,9 +142,9 @@ Files changed:
 - `connectomics/models/loss/metadata.py`: added metadata with `spatial_weight_arg="weight"`
 - `connectomics/training/loss/plan.py`: defaults `pos_weight` to `1.0` for this loss (prevents orchestrator from double-weighting; the loss handles class balancing internally)
 - `tutorials/bases/loss_profiles.yaml`: new `loss_per_channel` profile (single entry)
-- `tutorials/bases/pipeline_profiles.yaml`: `affinity-12` pipeline now uses `loss_per_channel` profile
+- `tutorials/bases/pipeline_profiles.yaml`: `aff12` pipeline now uses `loss_per_channel` profile
 
-Config (via `affinity-12` pipeline profile → `loss_per_channel` loss profile):
+Config (via `aff12` pipeline profile → `loss_per_channel` loss profile):
 ```yaml
 # loss_profiles.yaml
 loss_per_channel:
@@ -273,7 +273,7 @@ default:
       down_factors: [[1,2,2], [1,2,2], [1,2,2], [1,2,2]]
     input_size: [18, 160, 160]
     output_size: [18, 160, 160]
-    # Loss handled by affinity-12 pipeline profile → loss_per_channel → PerChannelBCEWithLogitsLoss
+    # Loss handled by aff12 pipeline profile → loss_per_channel → PerChannelBCEWithLogitsLoss
 ```
 
 ### Phase 3: Match Augmentation ✅
@@ -290,7 +290,7 @@ All three items implemented and tested (32/32 augmentation tests pass).
 3. **Contrast/brightness ±50%** — `contrast_range=[0.5, 1.5]`, `shift_intensity_offset=0.2` (matches DeepEM `MixedGrayscale2D`).
 
 All settings live in the `aug_em_neuron` profile (`tutorials/bases/augmentation_profiles.yaml`),
-which is applied automatically via the `affinity-12` pipeline profile in `tutorials/bases/pipeline_profiles.yaml`.
+which is applied automatically via the `aff12` pipeline profile in `tutorials/bases/pipeline_profiles.yaml`.
 No inline augmentation overrides are needed in `neuron_snemi.yaml`.
 
 ### Phase 4: Inference Improvements
diff --git a/connectomics/data/augment/build.py b/connectomics/data/augment/build.py
@@ -51,6 +51,18 @@
 )
 
 
+def _has_precomputed_sdt(cfg: Config) -> bool:
+    """Check if the label transform includes skeleton_aware_edt (precomputed SDT)."""
+    targets = getattr(cfg.data.label_transform, "targets", None)
+    if not targets:
+        return False
+    for t in targets:
+        name = t.get("name") if isinstance(t, dict) else getattr(t, "name", None)
+        if name == "skeleton_aware_edt":
+            return True
+    return False
+
+
 def _strict_binarize_mask(mask, threshold: float = 0.0):
     """Binarize mask with strict greater-than semantics (mask > threshold)."""
     if torch.is_tensor(mask):
@@ -101,6 +113,9 @@ def build_train_transforms(
         keys = ["image", "label"]
         if cfg.data.train.mask is not None:
             keys.append("mask")
+        # Include precomputed SDT key if present (auto-detected from label_transform).
+        if _has_precomputed_sdt(cfg):
+            keys.append("sdt")
 
     transforms = []
 
diff --git a/connectomics/data/dataset/data_dicts.py b/connectomics/data/dataset/data_dicts.py
@@ -13,6 +13,7 @@ def create_data_dicts_from_paths(
     image_paths: List[str],
     label_paths: Optional[List[str]] = None,
     mask_paths: Optional[List[str]] = None,
+    extra_paths: Optional[Dict[str, List[str]]] = None,
 ) -> List[Dict[str, object]]:
     """
     Create MONAI-style data dictionaries from file paths.
@@ -21,6 +22,8 @@ def create_data_dicts_from_paths(
         image_paths: List of image file paths
         label_paths: Optional list of label file paths
         mask_paths: Optional list of mask file paths
+        extra_paths: Optional dict of additional keys to include, e.g.
+            ``{"sdt": ["/path/to/sdt1.h5", "/path/to/sdt2.h5"]}``
 
     Returns:
         List of dictionaries with 'image', 'label', and/or 'mask' keys
@@ -36,6 +39,10 @@ def create_data_dicts_from_paths(
         if mask_paths is not None:
             data_dict["mask"] = mask_paths[i]
 
+        if extra_paths is not None:
+            for key, paths in extra_paths.items():
+                data_dict[key] = paths[i]
+
         data_dicts.append(data_dict)
 
     return data_dicts
diff --git a/connectomics/data/process/distance.py b/connectomics/data/process/distance.py
@@ -13,6 +13,8 @@
     remove_small_holes,
 )
 
+import cc3d
+
 from .bbox_processor import BBoxInstanceProcessor, BBoxProcessorConfig
 from .quantize import energy_quantize
 
@@ -21,6 +23,7 @@
     "edt_instance",
     "distance_transform",
     "skeleton_aware_distance_transform",
+    "precompute_sdt_volume",
     "smooth_edge",
     "signed_distance_transform",
 ]
@@ -283,11 +286,11 @@ def signed_distance_transform(
 def skeleton_aware_distance_transform(
     label: np.ndarray,
     bg_value: float = -1.0,
-    relabel: bool = True,
+    relabel: bool = False,
     padding: bool = False,
     resolution: Tuple[float] = (1.0, 1.0, 1.0),
     alpha: float = 0.8,
-    smooth: bool = True,
+    smooth: bool = False,
     smooth_skeleton_only: bool = True,
 ):
     """Skeleton-based distance transform (SDT).
@@ -296,8 +299,9 @@ def skeleton_aware_distance_transform(
     Distance Transform." International Conference on Medical Image Computing and
     Computer-Assisted Intervention. Cham: Springer Nature Switzerland, 2023.
 
-    Refactored to use BBoxInstanceProcessor for cleaner code and consistency.
-    Uses kimimaro for fast skeletonization (10-100x faster than scikit-image).
+    Uses batch kimimaro skeletonization: all instances are skeletonized in a single
+    call with automatic parallelism, then per-instance EDT is computed via
+    BBoxInstanceProcessor.
 
     Args:
         label: Instance segmentation (H, W) or (D, H, W)
@@ -306,7 +310,8 @@ def skeleton_aware_distance_transform(
         padding: Whether to pad before computing distance
         resolution: Voxel resolution for anisotropic data (z, y, x)
         alpha: Skeleton influence exponent (higher = stronger skeleton influence)
-        smooth: Whether to smooth edges before skeletonization
+        smooth: Whether to smooth edges before skeletonization (default False;
+                adds ~20% overhead with marginal quality impact when using kimimaro)
         smooth_skeleton_only: Only smooth skeleton mask (not entire object)
 
     Returns:
@@ -318,24 +323,33 @@ def skeleton_aware_distance_transform(
     if np.sum(label > 0) == 0:
         return np.full(label.shape, bg_value, dtype=np.float32)
 
-    # Configure bbox processor
+    # 1. Relabel outside processor so we can batch-skeletonize.
+    if relabel:
+        label = cc3d.connected_components(label, connectivity=6)
+
+    # 2. Batch skeletonize all instances in one call (parallel across instances).
+    skeleton_vertices = _batch_skeletonize(label, resolution)
+
+    # 3. Per-instance EDT using BBoxProcessor (skeletons already computed).
+    #    Padding coordinate offset: if padding is enabled, the processor pads the
+    #    label internally, shifting coordinates by pad_size. We account for this
+    #    when translating skeleton vertices to bbox-local coordinates.
+    pad_offset = 2 if padding else 0
+
     config = BBoxProcessorConfig(
         bg_value=bg_value,
-        relabel=relabel,
+        relabel=False,  # already relabeled above
         padding=padding,
         pad_size=2,
         bbox_relax=2,
         combine_mode="max",
     )
 
-    # Define per-instance skeleton EDT computation
     def compute_skeleton_edt(
         label_crop: np.ndarray, instance_id: int, bbox: Tuple[slice, ...], context: Dict
     ) -> Optional[np.ndarray]:
         """Compute skeleton-aware EDT for a single instance within bbox."""
-        # Extract and clean mask
         temp2 = remove_small_holes(label_crop == instance_id, 16, connectivity=1)
-
         if not temp2.any():
             return None
 
@@ -351,10 +365,15 @@ def compute_skeleton_edt(
                     binary = binary_smooth.astype(bool)
                     temp2 = binary
 
-        # Skeletonize using kimimaro
-        skeleton_mask = _skeletonize_instance(label_crop, instance_id, context["resolution"])
+        # Look up pre-computed skeleton and translate to bbox-local coordinates.
+        skeleton_mask = _skeleton_vertices_to_mask(
+            context["skeleton_vertices"].get(instance_id),
+            label_crop.shape,
+            bbox,
+            context["pad_offset"],
+        )
 
-        # Fallback to regular EDT if skeletonization fails
+        # Fallback to regular EDT if skeletonization failed for this instance.
         if skeleton_mask is None or not skeleton_mask.any():
             boundary_edt = distance_transform_edt(temp2, context["resolution"])
             edt_max = boundary_edt.max()
@@ -367,70 +386,143 @@ def compute_skeleton_edt(
         skeleton_edt = distance_transform_edt(~skeleton_mask, context["resolution"])
         boundary_edt = distance_transform_edt(temp2, context["resolution"])
 
-        # Normalized energy
         energy = boundary_edt / (skeleton_edt + boundary_edt + eps)
         energy = energy ** context["alpha"]
 
         return energy * temp2.astype(np.float32)
 
-    # Process all instances
     processor = BBoxInstanceProcessor(config)
     return processor.process(
         label,
         compute_skeleton_edt,
+        skeleton_vertices=skeleton_vertices,
+        pad_offset=pad_offset,
         resolution=resolution,
         alpha=alpha,
         smooth=smooth,
         smooth_skeleton_only=smooth_skeleton_only,
     )
 
 
-def _skeletonize_instance(
-    label_crop: np.ndarray, instance_id: int, resolution: Tuple[float, ...]
-) -> Optional[np.ndarray]:
-    """Helper function to skeletonize a single instance using kimimaro.
-
-    Args:
-        label_crop: Cropped label array containing the instance
-        instance_id: ID of the instance to skeletonize
-        resolution: Voxel resolution for anisotropic data
+def _batch_skeletonize(
+    label: np.ndarray, resolution: Tuple[float, ...]
+) -> Dict[int, np.ndarray]:
+    """Skeletonize all instances in one kimimaro call.
 
     Returns:
-        Binary skeleton mask, or None if skeletonization fails
+        Dict mapping instance_id → (N, ndim) int array of vertex coordinates
+        in the input label's coordinate system.
     """
-    instance_label = np.where(label_crop == instance_id, 1, 0).astype(np.uint32)
-
     try:
         skeletons = kimimaro.skeletonize(
-            instance_label,
+            label.astype(np.uint32),
             anisotropy=resolution,
             fix_branching=False,
             fix_borders=False,
             dust_threshold=5,
-            parallel=1,
+            parallel=0,  # auto-detect cores
             progress=False,
         )
+    except Exception:
+        return {}
 
-        if 1 in skeletons and len(skeletons[1].vertices) > 0:
-            skeleton_mask = np.zeros(label_crop.shape, dtype=bool)
-            vertices = skeletons[1].vertices.astype(int)
-
-            # Filter valid vertices
-            valid_mask = np.all(
-                (vertices >= 0) & (vertices < np.array(skeleton_mask.shape)), axis=1
-            )
-            valid_vertices = vertices[valid_mask]
-
-            if len(valid_vertices) > 0:
-                if label_crop.ndim == 3:
-                    skeleton_mask[
-                        valid_vertices[:, 0], valid_vertices[:, 1], valid_vertices[:, 2]
-                    ] = True
-                else:
-                    skeleton_mask[valid_vertices[:, 0], valid_vertices[:, 1]] = True
-                return skeleton_mask
+    result = {}
+    for inst_id, skel in skeletons.items():
+        if len(skel.vertices) > 0:
+            result[inst_id] = skel.vertices.astype(int)
+    return result
 
-    except Exception:
-        pass
 
-    return None
+def _skeleton_vertices_to_mask(
+    vertices: Optional[np.ndarray],
+    crop_shape: Tuple[int, ...],
+    bbox: Tuple[slice, ...],
+    pad_offset: int,
+) -> Optional[np.ndarray]:
+    """Convert skeleton vertices (full-volume coords) to a binary mask in bbox-local coords.
+
+    Args:
+        vertices: (N, ndim) vertex coordinates in the original (unpadded) label space,
+                  or None if this instance had no skeleton.
+        crop_shape: Shape of the bbox crop.
+        bbox: Tuple of slices defining the bbox in the (possibly padded) label.
+        pad_offset: Coordinate offset added by padding (0 if no padding).
+    """
+    if vertices is None or len(vertices) == 0:
+        return None
+
+    # Translate: original-label coords → padded-label coords → bbox-local coords.
+    bbox_origin = np.array([s.start for s in bbox])
+    local_verts = vertices + pad_offset - bbox_origin
+
+    # Filter to valid range.
+    valid = np.all((local_verts >= 0) & (local_verts < np.array(crop_shape)), axis=1)
+    local_verts = local_verts[valid]
+
+    if len(local_verts) == 0:
+        return None
+
+    mask = np.zeros(crop_shape, dtype=bool)
+    if len(crop_shape) == 3:
+        mask[local_verts[:, 0], local_verts[:, 1], local_verts[:, 2]] = True
+    else:
+        mask[local_verts[:, 0], local_verts[:, 1]] = True
+    return mask
+
+
+def precompute_sdt_volume(
+    label_path: str,
+    output_path: str,
+    resolution: Tuple[float, ...] = (1.0, 1.0, 1.0),
+    alpha: float = 0.8,
+    bg_value: float = -1.0,
+) -> str:
+    """Precompute skeleton-aware distance transform on a full label volume.
+
+    Computes the SDT once on the entire volume and saves to HDF5.
+    Subsequent training runs load the precomputed result, avoiding
+    the expensive per-crop skeletonization.
+
+    Args:
+        label_path: Path to the instance segmentation label volume.
+        output_path: Path to save the precomputed SDT (HDF5).
+        resolution: Voxel resolution (z, y, x) for anisotropic data.
+        alpha: Skeleton influence exponent.
+        bg_value: Background value for non-instance regions.
+
+    Returns:
+        The output_path (for chaining).
+    """
+    import logging
+    import time
+
+    from ..io.io import read_volume, save_volume
+
+    logger = logging.getLogger(__name__)
+    logger.info(f"Precomputing SDT: {label_path} → {output_path}")
+
+    label = read_volume(label_path)
+    logger.info(f"  Label shape: {label.shape}, unique instances: {len(np.unique(label)) - 1}")
+
+    t0 = time.time()
+    sdt = skeleton_aware_distance_transform(
+        label, resolution=resolution, alpha=alpha, bg_value=bg_value
+    )
+    elapsed = time.time() - t0
+    logger.info(f"  SDT computed in {elapsed:.1f}s, range: [{sdt.min():.3f}, {sdt.max():.3f}]")
+
+    save_volume(output_path, sdt)
+    logger.info(f"  Saved to {output_path}")
+
+    return output_path
+
+
+def sdt_path_for_label(label_path: str) -> str:
+    """Derive the SDT cache path from a label file path.
+
+    Example: ``datasets/SNEMI/train-labels.tif`` → ``datasets/SNEMI/train-labels_sdt.h5``
+    """
+    import os
+
+    base, _ = os.path.splitext(label_path)
+    return base + "_sdt.h5"
diff --git a/connectomics/data/process/transforms.py b/connectomics/data/process/transforms.py
diff --git a/connectomics/training/lightning/data_factory.py b/connectomics/training/lightning/data_factory.py