Add parameter to DiceMetric and DiceHelper classes (#8774)

VijayVignesh1 · pre-commit-ci[bot] · web-flow · commit 65beb58bc102 · 2026-05-01T14:49:49.000+01:00
Fixes #8733 ### Description This PR adds support for connected component-based Dice metric calculation to the existing DiceMetric and DiceHelper classes. ### Changes * Added per_component: bool = False to both DiceMetric and DiceHelper constructors * Implemented compute_cc_dice method that calculates Dice scores for each connected component individually * Voronoi regions: Added compute_voronoi_regions_fast method for efficient connected component assignment without external cc3d dependency * Added input shape validation requiring 5D binary segmentation with 2 channels (background + foreground) when per_component=True * Updated first_ch calculation to properly exclude background channel when using per-component mode ### Reference * https://arxiv.org/abs/2410.18684 * https://github.com/alexanderjaus/CC-Metrics ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [x] New tests added to cover the changes. - [x] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [x] In-line docstrings updated. - [x] Documentation updated, tested `make html` command in the `docs/` folder. --------- Signed-off-by: Vijay Vignesh Prasad Rao <vijayvigneshp02@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
@@ -13,11 +13,17 @@
 
 import torch
 
-from monai.metrics.utils import do_metric_reduction
+from monai.metrics.utils import compute_voronoi_regions_fast, do_metric_reduction
 from monai.utils import MetricReduction, deprecated_arg
+from monai.utils.module import optional_import
 
 from .metric import CumulativeIterationMetric
 
+scipy_ndimage, has_scipy_ndimage = optional_import("scipy.ndimage")
+cupy, has_cupy = optional_import("cupy")
+cupy_ndimage, has_cupy_ndimage = optional_import("cupyx.scipy.ndimage")
+
+
 __all__ = ["DiceMetric", "compute_dice", "DiceHelper"]
 
 
@@ -41,6 +47,18 @@ class DiceMetric(CumulativeIterationMetric):
     image size they can get overwhelmed by the signal from the background. This assumes the shape of both prediction
     and ground truth is BCHW[D].
 
+    The `per_component=True` approach computes the Dice metric on a per-connected component basis in the ground truth segmentation,
+    ensuring equal weighting for each component regardless of its size. This method eliminates biases in traditional metrics,
+    providing a more balanced evaluation, particularly in scenarios where object size does not correlate with clinical relevance.
+    This provides a more granular evaluation of segmentation quality, especially useful when dealing with fragmented or
+    disconnected objects in the foreground.
+    Note:
+    - The input prediction (`y_pred`) and ground truth (`y`) must both have 2 channels (foreground/background),
+    with binary segmentation (0 for background, 1 for foreground). That is, this assumes the shape of both prediction
+    and ground truth is B2HW[D].
+    - This method cannot be used with multiclass segmentation.
+    For more information, refer to the original paper: https://arxiv.org/abs/2410.18684
+
     The typical execution steps of this metric class follows :py:class:`monai.metrics.metric.Cumulative`.
 
     Further information can be found in the official
@@ -95,6 +113,9 @@ class DiceMetric(CumulativeIterationMetric):
             If `True`, use "label_{index}" as the key corresponding to C channels; if ``include_background`` is True,
             the index begins at "0", otherwise at "1". It can also take a list of label names.
             The outcome will then be returned as a dictionary.
+        per_component: whether to compute the Dice metric per connected component. If `True`, the metric will be
+            computed for each connected component in the ground truth, and then averaged. This requires binary
+            segmentations with 2 channels (background + foreground) as input. This is a more fine-grained computation.
 
     """
 
@@ -106,6 +127,7 @@ def __init__(
         ignore_empty: bool = True,
         num_classes: int | None = None,
         return_with_label: bool | list[str] = False,
+        per_component: bool = False,
     ) -> None:
         super().__init__()
         self.include_background = include_background
@@ -114,13 +136,15 @@ def __init__(
         self.ignore_empty = ignore_empty
         self.num_classes = num_classes
         self.return_with_label = return_with_label
+        self.per_component = per_component
         self.dice_helper = DiceHelper(
             include_background=self.include_background,
             reduction=MetricReduction.NONE,
             get_not_nans=False,
             apply_argmax=False,
             ignore_empty=self.ignore_empty,
             num_classes=self.num_classes,
+            per_component=self.per_component,
         )
 
     def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
@@ -175,6 +199,7 @@ def compute_dice(
     include_background: bool = True,
     ignore_empty: bool = True,
     num_classes: int | None = None,
+    per_component: bool = False,
 ) -> torch.Tensor:
     """
     Computes Dice score metric for a batch of predictions. This performs the same computation as
@@ -192,6 +217,9 @@ def compute_dice(
         num_classes: number of input channels (always including the background). When this is ``None``,
             ``y_pred.shape[1]`` will be used. This option is useful when both ``y_pred`` and ``y`` are
             single-channel class indices and the number of classes is not automatically inferred from data.
+        per_component: whether to compute the Dice metric per connected component. If `True`, the metric will be
+            computed for each connected component in the ground truth, and then averaged. This requires binary
+            segmentations with 2 channels (background + foreground) as input. This is a more fine-grained computation.
 
     Returns:
         Dice scores per batch and per class, (shape: [batch_size, num_classes]).
@@ -204,6 +232,7 @@ def compute_dice(
         apply_argmax=False,
         ignore_empty=ignore_empty,
         num_classes=num_classes,
+        per_component=per_component,
     )(y_pred=y_pred, y=y)
 
 
@@ -246,6 +275,9 @@ class DiceHelper:
         num_classes: number of input channels (always including the background). When this is ``None``,
             ``y_pred.shape[1]`` will be used. This option is useful when both ``y_pred`` and ``y`` are
             single-channel class indices and the number of classes is not automatically inferred from data.
+        per_component: whether to compute the Dice metric per connected component. If `True`, the metric will be
+            computed for each connected component in the ground truth, and then averaged. This requires binary
+            segmentations with 2 channels (background + foreground) as input. This is a more fine-grained computation.
     """
 
     @deprecated_arg("softmax", "1.5", "1.7", "Use `apply_argmax` instead.", new_name="apply_argmax")
@@ -262,6 +294,7 @@ def __init__(
         num_classes: int | None = None,
         sigmoid: bool | None = None,
         softmax: bool | None = None,
+        per_component: bool = False,
     ) -> None:
         # handling deprecated arguments
         if sigmoid is not None:
@@ -277,6 +310,50 @@ def __init__(
         self.activate = activate
         self.ignore_empty = ignore_empty
         self.num_classes = num_classes
+        self.per_component = per_component
+
+    def compute_cc_dice(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """
+        Compute per-component Dice for a single batch item.
+
+        Args:
+            y_pred (torch.Tensor): Predictions with shape (1, 2, D, H, W) or (1, 2, H, W).
+            y (torch.Tensor): Ground truth with shape (1, 2, D, H, W) or (1, 2, H, W).
+
+        Returns:
+            torch.Tensor: Mean Dice over connected components.
+        """
+        if y_pred.ndim == y.ndim:
+            y_pred_idx = torch.argmax(y_pred, dim=1)
+            y_idx = torch.argmax(y, dim=1)
+        else:
+            y_pred_idx = y_pred
+            y_idx = y
+        if y_idx[0].sum() == 0:
+            if self.ignore_empty:
+                data = torch.tensor(float("nan"), device=y_idx.device)
+            elif y_pred_idx.sum() == 0:
+                data = torch.tensor(1.0, device=y_idx.device)
+            else:
+                data = torch.tensor(0.0, device=y_idx.device)
+        else:
+            cc_assignment = compute_voronoi_regions_fast(y_idx[0])
+            if cc_assignment.device != y_idx.device:
+                cc_assignment = cc_assignment.to(y_idx.device)
+            uniq, inv = torch.unique(cc_assignment.view(-1), return_inverse=True)
+            nof_components = uniq.numel()
+            code = (y_idx.view(-1) << 1) | y_pred_idx.view(-1)
+            idx = (inv << 2) | code
+            hist = torch.bincount(idx, minlength=nof_components * 4).reshape(-1, 4)
+            _, fp, fn, tp = hist[:, 0], hist[:, 1], hist[:, 2], hist[:, 3]
+            denom = 2 * tp + fp + fn
+            dice_scores = torch.where(
+                denom > 0, (2 * tp).float() / denom.float(), torch.tensor(1.0, device=denom.device)
+            )
+            data = dice_scores.unsqueeze(-1)
+            data = torch.nan_to_num(data)
+        data = data.reshape(-1, 1)
+        return torch.stack([data.mean()])
 
     def compute_channel(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         """
@@ -305,6 +382,9 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor | tupl
             y_pred: input predictions with shape (batch_size, num_classes or 1, spatial_dims...).
                 the number of channels is inferred from ``y_pred.shape[1]`` when ``num_classes is None``.
             y: ground truth with shape (batch_size, num_classes or 1, spatial_dims...).
+
+        Raises:
+            ValueError: when the shapes of `y_pred` and `y` are not compatible for the per-component computation.
         """
         _apply_argmax, _threshold = self.apply_argmax, self.threshold
         if self.num_classes is None:
@@ -322,15 +402,31 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor | tupl
                 y_pred = torch.sigmoid(y_pred)
             y_pred = y_pred > 0.5
 
-        first_ch = 0 if self.include_background else 1
+        if self.per_component:
+            if y_pred.ndim not in (4, 5) or y.ndim not in (4, 5) or y_pred.shape[1] != 2 or y.shape[1] != 2:
+                same_rank = y_pred.ndim == y.ndim and y_pred.ndim in (4, 5)
+                binary_channels = y_pred.shape[1] == 2 and y.shape[1] == 2
+                same_shape = y_pred.shape == y.shape
+                if not (same_rank and binary_channels and same_shape):
+                    raise ValueError(
+                        "per_component requires matching 4D/5D binary tensors "
+                        "(B, 2, H, W) or (B, 2, D, H, W). "
+                        f"Got y_pred={tuple(y_pred.shape)}, y={tuple(y.shape)}."
+                    )
+
+        first_ch = 0 if self.include_background and not self.per_component else 1
         data = []
         for b in range(y_pred.shape[0]):
+            if self.per_component:
+                data.append(self.compute_cc_dice(y_pred=y_pred[b].unsqueeze(0), y=y[b].unsqueeze(0)).reshape(-1))
+                continue
             c_list = []
             for c in range(first_ch, n_pred_ch) if n_pred_ch > 1 else [1]:
                 x_pred = (y_pred[b, 0] == c) if (y_pred.shape[1] == 1) else y_pred[b, c].bool()
                 x = (y[b, 0] == c) if (y.shape[1] == 1) else y[b, c]
                 c_list.append(self.compute_channel(x_pred, x))
             data.append(torch.stack(c_list))
+
         data = torch.stack(data, dim=0).contiguous()  # type: ignore
 
         f, not_nans = do_metric_reduction(data, self.reduction)  # type: ignore
diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py
@@ -39,6 +39,10 @@
 distance_transform_edt, _ = optional_import("scipy.ndimage", name="distance_transform_edt")
 distance_transform_cdt, _ = optional_import("scipy.ndimage", name="distance_transform_cdt")
 
+scipy_ndimage, has_scipy_ndimage = optional_import("scipy.ndimage")
+cupy, has_cupy = optional_import("cupy")
+cupy_ndimage, has_cupy_ndimage = optional_import("cupyx.scipy.ndimage")
+
 __all__ = [
     "ignore_background",
     "do_metric_reduction",
@@ -462,6 +466,59 @@ def prepare_spacing(
     )
 
 
+def compute_voronoi_regions_fast(labels: np.ndarray | torch.Tensor) -> torch.Tensor:
+    """
+    Voronoi assignment to connected components (CPU, single EDT) without cc3d.
+    Returns the ID of the nearest component for each voxel.
+
+    Args:
+        labels (np.ndarray | torch.Tensor): Label map where values > 0 are seeds.
+
+    Raises:
+        RuntimeError: when `scipy.ndimage` is not available.
+        ValueError: when `labels` has fewer than two dimensions.
+
+    Returns:
+        torch.Tensor: Voronoi region IDs (int32) on CPU.
+    """
+    if isinstance(labels, torch.Tensor) and labels.is_cuda and has_cupy and has_cupy_ndimage:
+        xp = cupy
+        nd_distance_transform_edt = cupy_ndimage.distance_transform_edt
+        nd_generate_binary_structure = cupy_ndimage.generate_binary_structure
+        nd_label = cupy_ndimage.label
+        x = cupy.asarray(labels.detach())
+    else:
+        xp = np
+        nd_distance_transform_edt = scipy_ndimage.distance_transform_edt
+        nd_generate_binary_structure = scipy_ndimage.generate_binary_structure
+        nd_label = scipy_ndimage.label
+
+        if not has_scipy_ndimage:
+            raise RuntimeError("scipy.ndimage is required for per_component Dice computation.")
+
+        if isinstance(labels, torch.Tensor):
+            warnings.warn(
+                "Voronoi computation is running on CPU. "
+                "To accelerate, move the input tensor to GPU and ensure 'cupy' with 'cupyx.scipy.ndimage' is installed."
+            )
+            x = labels.cpu().numpy()
+        else:
+            x = np.asarray(labels)
+    rank = conn_rank = x.ndim
+    structure = nd_generate_binary_structure(rank=rank, connectivity=conn_rank)
+    cc, num = nd_label(x > 0, structure=structure)
+    if num == 0:
+        return torch.zeros_like(torch.from_numpy(x), dtype=torch.int32)
+    edt_input = xp.ones(cc.shape, dtype=xp.uint8)
+    edt_input[cc > 0] = 0
+    indices = nd_distance_transform_edt(edt_input, sampling=None, return_distances=False, return_indices=True)
+    voronoi = cc[tuple(indices)]
+    if xp is cupy:
+        return torch.as_tensor(cupy.asnumpy(voronoi), dtype=torch.int32)
+    else:
+        return torch.as_tensor(voronoi, dtype=torch.int32)
+
+
 ENCODING_KERNEL = {2: [[8, 4], [2, 1]], 3: [[[128, 64], [32, 16]], [[8, 4], [2, 1]]]}
 
 
diff --git a/tests/metrics/test_compute_meandice.py b/tests/metrics/test_compute_meandice.py
@@ -18,6 +18,10 @@
 from parameterized import parameterized
 
 from monai.metrics import DiceHelper, DiceMetric, compute_dice
+from monai.utils.module import optional_import
+
+_, has_ndimage = optional_import("scipy.ndimage")
+_, has_cupy_ndimage = optional_import("cupyx.scipy.ndimage")
 
 _device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # keep background
@@ -250,6 +254,42 @@
     {"label_1": 0.4000, "label_2": 0.6667},
 ]
 
+# Testcase for per_component DiceMetric - 3D input
+y = torch.zeros((5, 2, 64, 64, 64), device=_device)
+y_hat = torch.zeros((5, 2, 64, 64, 64), device=_device)
+
+y[0, 1, 20:25, 20:25, 20:25] = 1
+y[0, 1, 40:45, 40:45, 40:45] = 1
+y[0, 0] = 1 - y[0, 1]
+
+y_hat[0, 1, 21:26, 21:26, 21:26] = 1
+y_hat[0, 1, 41:46, 39:44, 41:46] = 1
+y_hat[0, 0] = 1 - y_hat[0, 1]
+
+TEST_CASE_16 = [
+    {"per_component": True, "ignore_empty": False},
+    {"y": y, "y_pred": y_hat},
+    [[0.5120], [1.0], [1.0], [1.0], [1.0]],
+]
+
+# Testcase for per_component DiceMetric - 2D input
+y = torch.zeros((5, 2, 64, 64), device=_device)
+y_hat = torch.zeros((5, 2, 64, 64), device=_device)
+
+y[0, 1, 20:25, 20:25] = 1
+y[0, 1, 40:45, 40:45] = 1
+y[0, 0] = 1 - y[0, 1]
+
+y_hat[0, 1, 21:26, 21:26] = 1
+y_hat[0, 1, 41:46, 39:44] = 1
+y_hat[0, 0] = 1 - y_hat[0, 1]
+
+TEST_CASE_17 = [
+    {"per_component": True, "ignore_empty": False},
+    {"y": y, "y_pred": y_hat},
+    [[0.6400], [1.0], [1.0], [1.0], [1.0]],
+]
+
 
 class TestComputeMeanDice(unittest.TestCase):
 
@@ -301,6 +341,24 @@ def test_nans_class(self, params, input_data, expected_value):
         else:
             np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4)
 
+    # CC DiceMetric  tests
+    @parameterized.expand([TEST_CASE_16, TEST_CASE_17])
+    @unittest.skipUnless(has_ndimage, "Requires scipy.ndimage.")
+    def test_cc_dice_value_nogpu(self, params, input_data, expected_value):
+        dice_metric = DiceMetric(**params)
+        if not has_cupy_ndimage:
+            cpu_inputs = {"y": input_data["y"].cpu(), "y_pred": input_data["y_pred"].cpu()}
+            dice_metric(**cpu_inputs)
+        else:
+            dice_metric(**input_data)
+        result = dice_metric.aggregate(reduction="none")
+        np.testing.assert_allclose(result.cpu().numpy(), expected_value, atol=1e-4)
+
+    @unittest.skipUnless(has_ndimage, "Requires scipy.ndimage.")
+    def test_channel_dimensions(self):
+        with self.assertRaises(ValueError):
+            DiceMetric(per_component=True)(torch.ones([3, 3, 144, 144]), torch.ones([3, 3, 144, 144]))
+
 
 if __name__ == "__main__":
     unittest.main()