PytorchConnectomics
diff --git a/‎connectomics/config/schema/data.py‎
Lines changed: 2 additions & 0 deletions b/‎connectomics/config/schema/data.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎connectomics/data/augment/build.py‎
Lines changed: 2 additions & 15 deletions b/‎connectomics/data/augment/build.py‎
Lines changed: 2 additions & 15 deletions
diff --git a/‎connectomics/data/dataset/data_dicts.py‎
Lines changed: 8 additions & 8 deletions b/‎connectomics/data/dataset/data_dicts.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎connectomics/data/dataset/dataset_volume_cached.py‎
Lines changed: 23 additions & 3 deletions b/‎connectomics/data/dataset/dataset_volume_cached.py‎
Lines changed: 23 additions & 3 deletions
diff --git a/‎connectomics/data/process/bbox_processor.py‎
Lines changed: 36 additions & 21 deletions b/‎connectomics/data/process/bbox_processor.py‎
Lines changed: 36 additions & 21 deletions
@@ -214,6 +214,8 @@ class DataInputConfig:
     # Using Any to support both str and List[str] (OmegaConf doesn't support Union of containers)
     image: Any = None  # str, List[str], or None
     label: Any = None  # str, List[str], or None
+    label_aux: Any = None  # str, List[str], or None (auto-derived from label if null)
+    label_aux_type: str = "skeleton"  # "skeleton", "sdt", or "none"
     mask: Any = None  # str, List[str], or None (Valid region mask)
 
     # Paths - JSON/filename-based datasets
 
@@ -51,18 +51,6 @@
 )
 
 
-def _has_precomputed_sdt(cfg: Config) -> bool:
-    """Check if the label transform includes skeleton_aware_edt (precomputed SDT)."""
-    targets = getattr(cfg.data.label_transform, "targets", None)
-    if not targets:
-        return False
-    for t in targets:
-        name = t.get("name") if isinstance(t, dict) else getattr(t, "name", None)
-        if name == "skeleton_aware_edt":
-            return True
-    return False
-
-
 def _strict_binarize_mask(mask, threshold: float = 0.0):
     """Binarize mask with strict greater-than semantics (mask > threshold)."""
     if torch.is_tensor(mask):
@@ -111,11 +99,10 @@ def build_train_transforms(
     """
     if keys is None:
         keys = ["image", "label"]
+        if cfg.data.train.label_aux is not None:
+            keys.append("label_aux")
         if cfg.data.train.mask is not None:
             keys.append("mask")
-        # Include precomputed SDT key if present (auto-detected from label_transform).
-        if _has_precomputed_sdt(cfg):
-            keys.append("sdt")
 
     transforms = []
 
 
@@ -12,21 +12,22 @@
 def create_data_dicts_from_paths(
     image_paths: List[str],
     label_paths: Optional[List[str]] = None,
+    label_aux_paths: Optional[List[str]] = None,
     mask_paths: Optional[List[str]] = None,
-    extra_paths: Optional[Dict[str, List[str]]] = None,
 ) -> List[Dict[str, object]]:
     """
     Create MONAI-style data dictionaries from file paths.
 
     Args:
         image_paths: List of image file paths
         label_paths: Optional list of label file paths
+        label_aux_paths: Optional list of auxiliary label file paths
+            (e.g. precomputed SDT volumes)
         mask_paths: Optional list of mask file paths
-        extra_paths: Optional dict of additional keys to include, e.g.
-            ``{"sdt": ["/path/to/sdt1.h5", "/path/to/sdt2.h5"]}``
 
     Returns:
-        List of dictionaries with 'image', 'label', and/or 'mask' keys
+        List of dictionaries with 'image', 'label', 'label_aux',
+        and/or 'mask' keys
     """
     data_dicts: List[Dict[str, object]] = []
 
@@ -36,13 +37,12 @@ def create_data_dicts_from_paths(
         if label_paths is not None:
             data_dict["label"] = label_paths[i]
 
+        if label_aux_paths is not None:
+            data_dict["label_aux"] = label_aux_paths[i]
+
         if mask_paths is not None:
             data_dict["mask"] = mask_paths[i]
 
-        if extra_paths is not None:
-            for key, paths in extra_paths.items():
-                data_dict[key] = paths[i]
-
         data_dicts.append(data_dict)
 
     return data_dicts
@@ -102,6 +102,7 @@ def __init__(
         self,
         image_paths: List[str],
         label_paths: Optional[List[str]] = None,
+        label_aux_paths: Optional[List[str]] = None,
         mask_paths: Optional[List[str]] = None,
         patch_size: Tuple[int, ...] = (112, 112, 112),
         iter_num: int = 500,
@@ -130,40 +131,48 @@ def __init__(
         self.sample_nonzero_mask = sample_nonzero_mask
 
         label_paths = label_paths or [None] * len(image_paths)
+        label_aux_paths = label_aux_paths or [None] * len(image_paths)
         mask_paths = mask_paths or [None] * len(image_paths)
 
         # Load all volumes into memory
         logger.info("Loading %d volumes into memory...", len(image_paths))
         self.cached_images: List[np.ndarray] = []
         self.cached_labels: List[Optional[np.ndarray]] = []
+        self.cached_label_aux: List[Optional[np.ndarray]] = []
         self.cached_masks: List[Optional[np.ndarray]] = []
 
-        for i, (img_path, lbl_path, msk_path) in enumerate(
-            zip(image_paths, label_paths, mask_paths)
+        for i, (img_path, lbl_path, aux_path, msk_path) in enumerate(
+            zip(image_paths, label_paths, label_aux_paths, mask_paths)
         ):
             img = self._load_volume(img_path)
             lbl = self._load_volume(lbl_path) if lbl_path else None
+            aux = self._load_volume(aux_path) if aux_path else None
             msk = self._load_volume(msk_path) if msk_path else None
 
             # Apply one-time preprocessing before caching
             if pre_cache_transforms is not None:
                 sample = {"image": img}
                 if lbl is not None:
                     sample["label"] = lbl
+                if aux is not None:
+                    sample["label_aux"] = aux
                 if msk is not None:
                     sample["mask"] = msk
                 sample = pre_cache_transforms(sample)
                 img = sample["image"]
                 lbl = sample.get("label")
+                aux = sample.get("label_aux")
                 msk = sample.get("mask")
 
             # Pad and ensure minimum size
             img = self._prepare_volume(img)
             lbl = self._prepare_volume(lbl) if lbl is not None else None
+            aux = self._prepare_volume(aux) if aux is not None else None
             msk = self._prepare_volume(msk) if msk is not None else None
 
             self.cached_images.append(img)
             self.cached_labels.append(lbl)
+            self.cached_label_aux.append(aux)
             self.cached_masks.append(msk)
             logger.info("Volume %d/%d: %s", i + 1, len(image_paths), img.shape)
 
@@ -210,6 +219,7 @@ def __init__(
     def _crop_volumes(self, vol_idx: int, pos: Tuple[int, ...]) -> Dict[str, Any]:
         image = self.cached_images[vol_idx]
         label = self.cached_labels[vol_idx]
+        label_aux = self.cached_label_aux[vol_idx]
         mask = self.cached_masks[vol_idx]
 
         image_crop = crop_volume(image, self.patch_size, pos, pad_mode="reflect")
@@ -218,13 +228,23 @@ def _crop_volumes(self, vol_idx: int, pos: Tuple[int, ...]) -> Dict[str, Any]:
             if label is not None
             else None
         )
+        label_aux_crop = (
+            crop_volume(label_aux, self.patch_size, pos, pad_mode="constant")
+            if label_aux is not None
+            else None
+        )
         mask_crop = (
             crop_volume(mask, self.patch_size, pos, pad_mode="constant")
             if mask is not None
             else None
         )
 
-        return {"image": image_crop, "label": label_crop, "mask": mask_crop}
+        return {
+            "image": image_crop,
+            "label": label_crop,
+            "label_aux": label_aux_crop,
+            "mask": mask_crop,
+        }
 
     def _has_labels(self, vol_idx: int) -> bool:
         return self.cached_labels[vol_idx] is not None
 
@@ -71,6 +71,7 @@ def process(
         self,
         label: np.ndarray,
         instance_fn: Callable[[np.ndarray, int, Tuple[slice, ...], Dict], Optional[np.ndarray]],
+        num_workers: int = 0,
         **kwargs,
     ) -> np.ndarray:
         """
@@ -90,6 +91,10 @@ def process(
                 Returns:
                 - result_crop: Same shape as label_crop, or None to skip
 
+            num_workers: Number of threads for parallel instance processing.
+                0 = sequential (default). Scipy EDT releases the GIL, so
+                threads give real parallelism for the numeric heavy lifting.
+
             **kwargs: Additional arguments passed to instance_fn
 
         Returns:
@@ -113,30 +118,40 @@ def process(
             distance = self._apply_bg_value(distance)
             return self._postprocess(distance, was_padded)
 
-        # 5. Process each instance within its bounding box
-        for i in range(bbox_array.shape[0]):
+        # 5. Prepare per-instance work items
+        n = bbox_array.shape[0]
+        work_items = []
+        for i in range(n):
             instance_id = int(bbox_array[i, 0])
             bbox = self._extract_bbox(bbox_array[i], label_shape, label.ndim)
-
-            # Extract instance crop
             label_crop = label[bbox]
-
-            # Call user-provided instance processing function
-            try:
-                result_crop = instance_fn(label_crop, instance_id, bbox, kwargs)
-            except Exception as e:
-                # Skip instance on error
-                print(f"Warning: Failed to process instance {instance_id}: {e}")
-                continue
-
-            # Skip if function returned None or empty result
-            if result_crop is None or not np.any(result_crop):
-                continue
-
-            # Aggregate result back to full volume
-            self._aggregate_result(distance, bbox, result_crop)
-
-        # 6. Postprocessing
+            work_items.append((label_crop, instance_id, bbox))
+
+        # 6. Process instances (parallel or sequential)
+        if num_workers > 0:
+            from concurrent.futures import ThreadPoolExecutor
+
+            def _run(item):
+                label_crop, instance_id, bbox = item
+                try:
+                    return bbox, instance_fn(label_crop, instance_id, bbox, kwargs)
+                except Exception:
+                    return bbox, None
+
+            with ThreadPoolExecutor(max_workers=num_workers) as pool:
+                for bbox, result_crop in pool.map(_run, work_items):
+                    if result_crop is not None and np.any(result_crop):
+                        self._aggregate_result(distance, bbox, result_crop)
+        else:
+            for label_crop, instance_id, bbox in work_items:
+                try:
+                    result_crop = instance_fn(label_crop, instance_id, bbox, kwargs)
+                except Exception:
+                    continue
+                if result_crop is not None and np.any(result_crop):
+                    self._aggregate_result(distance, bbox, result_crop)
+
+        # 7. Postprocessing
         distance = self._apply_bg_value(distance)
         return self._postprocess(distance, was_padded)