Project-MONAI
diff --git a/‎monai/data/image_reader.py‎
Lines changed: 2 additions & 0 deletions b/‎monai/data/image_reader.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎monai/data/test_time_augmentation.py‎
Lines changed: 27 additions & 15 deletions b/‎monai/data/test_time_augmentation.py‎
Lines changed: 27 additions & 15 deletions
diff --git a/‎monai/data/utils.py‎
Lines changed: 3 additions & 2 deletions b/‎monai/data/utils.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎monai/inferers/utils.py‎
Lines changed: 23 additions & 3 deletions b/‎monai/inferers/utils.py‎
Lines changed: 23 additions & 3 deletions
diff --git a/‎monai/losses/unified_focal_loss.py‎
Lines changed: 6 additions & 5 deletions b/‎monai/losses/unified_focal_loss.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎monai/metrics/panoptic_quality.py‎
Lines changed: 44 additions & 7 deletions b/‎monai/metrics/panoptic_quality.py‎
Lines changed: 44 additions & 7 deletions
diff --git a/‎monai/transforms/croppad/functional.py‎
Lines changed: 13 additions & 9 deletions b/‎monai/transforms/croppad/functional.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎monai/transforms/inverse.py‎
Lines changed: 4 additions & 1 deletion b/‎monai/transforms/inverse.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎monai/transforms/spatial/array.py‎
Lines changed: 7 additions & 0 deletions b/‎monai/transforms/spatial/array.py‎
Lines changed: 7 additions & 0 deletions
@@ -1113,6 +1113,8 @@ def get_data(self, img) -> tuple[np.ndarray, dict]:
 
         for i, filename in zip(ensure_tuple(img), self.filenames):
             header = self._get_meta_dict(i)
+            if MetaKeys.PIXDIM in header:
+                header[MetaKeys.ORIGINAL_PIXDIM] = np.array(header[MetaKeys.PIXDIM], copy=True)
             header[MetaKeys.AFFINE] = self._get_affine(i)
             header[MetaKeys.ORIGINAL_AFFINE] = self._get_affine(i)
             header["as_closest_canonical"] = self.as_closest_canonical
 
@@ -16,7 +16,6 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any
 
-import numpy as np
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
@@ -68,7 +67,7 @@ class TestTimeAugmentation:
     Args:
         transform: transform (or composed) to be applied to each realization. At least one transform must be of type
         `RandomizableTrait` (i.e. `Randomizable`, `RandomizableTransform`, or `RandomizableTrait`).
-            . All random transforms must be of type `InvertibleTransform`.
+        When `apply_inverse_to_pred` is True, all random transforms must be of type `InvertibleTransform`.
         batch_size: number of realizations to infer at once.
         num_workers: how many subprocesses to use for data.
         inferrer_fn: function to use to perform inference.
@@ -92,6 +91,11 @@ class TestTimeAugmentation:
             will return the full data. Dimensions will be same size as when passing a single image through
             `inferrer_fn`, with a dimension appended equal in size to `num_examples` (N), i.e., `[N,C,H,W,[D]]`.
         progress: whether to display a progress bar.
+        apply_inverse_to_pred: whether to apply inverse transformations to the predictions.
+            If the model's prediction is spatial (e.g. segmentation), this should be `True` to map the predictions
+            back to the original spatial reference.
+            If the prediction is non-spatial (e.g. classification label or score), this should be `False` to
+            aggregate the raw predictions directly. Defaults to `True`.
 
     Example:
         .. code-block:: python
@@ -125,6 +129,7 @@ def __init__(
         post_func: Callable = _identity,
         return_full_data: bool = False,
         progress: bool = True,
+        apply_inverse_to_pred: bool = True,
     ) -> None:
         self.transform = transform
         self.batch_size = batch_size
@@ -134,6 +139,7 @@ def __init__(
         self.image_key = image_key
         self.return_full_data = return_full_data
         self.progress = progress
+        self.apply_inverse_to_pred = apply_inverse_to_pred
         self._pred_key = CommonKeys.PRED
         self.inverter = Invertd(
             keys=self._pred_key,
@@ -152,20 +158,23 @@ def __init__(
 
     def _check_transforms(self):
         """Should be at least 1 random transform, and all random transforms should be invertible."""
-        ts = [self.transform] if not isinstance(self.transform, Compose) else self.transform.transforms
-        randoms = np.array([isinstance(t, Randomizable) for t in ts])
-        invertibles = np.array([isinstance(t, InvertibleTransform) for t in ts])
-        # check at least 1 random
-        if sum(randoms) == 0:
+        transforms = [self.transform] if not isinstance(self.transform, Compose) else self.transform.transforms
+        warns = []
+        randoms = []
+
+        for idx, t in enumerate(transforms):
+            if isinstance(t, Randomizable):
+                randoms.append(t)
+                if self.apply_inverse_to_pred and not isinstance(t, InvertibleTransform):
+                    warns.append(f"Transform #{idx} (type {type(t).__name__}) is random but not invertible.")
+
+        if len(randoms) == 0:
+            warns.append("TTA usually requires at least one `Randomizable` transform in the given transform sequence.")
+
+        if len(warns) > 0:
             warnings.warn(
-                "TTA usually has at least a `Randomizable` transform or `Compose` contains `Randomizable` transforms."
+                "TTA has encountered issues with the given transforms:\n  " + "\n  ".join(warns), stacklevel=2
             )
-        # check that whenever randoms is True, invertibles is also true
-        for r, i in zip(randoms, invertibles):
-            if r and not i:
-                warnings.warn(
-                    f"Not all applied random transform(s) are invertible. Problematic transform: {type(r).__name__}"
-                )
 
     def __call__(
         self, data: dict[str, Any], num_examples: int = 10
@@ -199,7 +208,10 @@ def __call__(
         for b in tqdm(dl) if has_tqdm and self.progress else dl:
             # do model forward pass
             b[self._pred_key] = self.inferrer_fn(b[self.image_key].to(self.device))
-            outs.extend([self.inverter(PadListDataCollate.inverse(i))[self._pred_key] for i in decollate_batch(b)])
+            if self.apply_inverse_to_pred:
+                outs.extend([self.inverter(PadListDataCollate.inverse(i))[self._pred_key] for i in decollate_batch(b)])
+            else:
+                outs.extend([i[self._pred_key] for i in decollate_batch(b)])
 
         output: NdarrayOrTensor = stack(outs, 0)
 
 
@@ -597,11 +597,12 @@ def decollate_batch(batch, detach: bool = True, pad=True, fill_value=None):
         type(batch).__module__ == "numpy" and not isinstance(batch, Iterable)
     ):
         return batch
+    # if scalar tensor/array, return the item itself.
+    if getattr(batch, "ndim", -1) == 0 and hasattr(batch, "item"):
+        return batch.item() if detach else batch
     if isinstance(batch, torch.Tensor):
         if detach:
             batch = batch.detach()
-        if batch.ndim == 0:
-            return batch.item() if detach else batch
         out_list = torch.unbind(batch, dim=0)
         # if of type MetaObj, decollate the metadata
         if isinstance(batch, MetaObj):
 
@@ -76,7 +76,8 @@ def sliding_window_inference(
 
     Args:
         inputs: input image to be processed (assuming NCHW[D])
-        roi_size: the spatial window size for inferences.
+        roi_size: the spatial window size for inferences, this must be a single value or a tuple with values
+            for each spatial dimension (eg. 2 for 2D, 3 for 3D).
             When its components have None or non-positives, the corresponding inputs dimension will be used.
             if the components of the `roi_size` are non-positive values, the transform will use the
             corresponding components of img size. For example, `roi_size=(32, -1)` will be adapted
@@ -131,11 +132,30 @@ def sliding_window_inference(
         kwargs: optional keyword args to be passed to ``predictor``.
 
     Note:
-        - input must be channel-first and have a batch dim, supports N-D sliding window.
+        - Inputs must be channel-first and have a batch dim (NCHW / NCDHW).
+        - If your data is NHWC/NDHWC, please apply `EnsureChannelFirst` / `EnsureChannelFirstd` upstream.
+
+    Raises:
+        ValueError: When the input dimensions do not match the expected dimensions based on ``roi_size``.
 
     """
-    buffered = buffer_steps is not None and buffer_steps > 0
     num_spatial_dims = len(inputs.shape) - 2
+
+    # Only perform strict shape validation if roi_size is a sequence (explicit dimensions).
+    # If roi_size is an integer, it is broadcast to all dimensions, so we cannot
+    # infer the expected dimensionality to enforce a strict check here.
+    if isinstance(roi_size, Sequence):
+        roi_dims = len(roi_size)
+        if num_spatial_dims != roi_dims:
+            raise ValueError(
+                f"Inputs must have {roi_dims + 2} dimensions for {roi_dims}D roi_size "
+                f"(Batch, Channel, {', '.join(['Spatial'] * roi_dims)}), "
+                f"but got inputs shape {inputs.shape}.\n"
+                "If you have channel-last data (e.g. B, D, H, W, C), please use "
+                "monai.transforms.EnsureChannelFirst or EnsureChannelFirstd upstream."
+            )
+    # -----------------------------------------------------------------
+    buffered = buffer_steps is not None and buffer_steps > 0
     if buffered:
         if buffer_dim < -num_spatial_dims or buffer_dim > num_spatial_dims:
             raise ValueError(f"buffer_dim must be in [{-num_spatial_dims}, {num_spatial_dims}], got {buffer_dim}.")
 
@@ -44,7 +44,7 @@ def __init__(
         Args:
             to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
             delta : weight of the background. Defaults to 0.7.
-            gamma : value of the exponent gamma in the definition of the Focal loss  . Defaults to 0.75.
+            gamma : value of the exponent gamma in the definition of the Focal loss. Defaults to 0.75.
             epsilon : it defines a very small number each time. similarly smooth value. Defaults to 1e-7.
         """
         super().__init__(reduction=LossReduction(reduction).value)
@@ -108,7 +108,7 @@ def __init__(
         Args:
             to_onehot_y : whether to convert `y` into the one-hot format. Defaults to False.
             delta : weight of the background. Defaults to 0.7.
-            gamma : value of the exponent gamma in the definition of the Focal loss  . Defaults to 0.75.
+            gamma : value of the exponent gamma in the definition of the Focal loss. Defaults to 2.
             epsilon : it defines a very small number each time. similarly smooth value. Defaults to 1e-7.
         """
         super().__init__(reduction=LossReduction(reduction).value)
@@ -167,10 +167,11 @@ def __init__(
         Args:
             to_onehot_y : whether to convert `y` into the one-hot format. Defaults to False.
             num_classes : number of classes, it only supports 2 now. Defaults to 2.
+            weight : weight for each loss function. Defaults to 0.5.
+            gamma : value of the exponent gamma in the definition of the Focal loss. Defaults to 0.5.
             delta : weight of the background. Defaults to 0.7.
-            gamma : value of the exponent gamma in the definition of the Focal loss. Defaults to 0.75.
-            epsilon : it defines a very small number each time. similarly smooth value. Defaults to 1e-7.
-            weight : weight for each loss function, if it's none it's 0.5. Defaults to None.
+
+
 
         Example:
             >>> import torch
 
@@ -21,7 +21,7 @@
 
 linear_sum_assignment, _ = optional_import("scipy.optimize", name="linear_sum_assignment")
 
-__all__ = ["PanopticQualityMetric", "compute_panoptic_quality"]
+__all__ = ["PanopticQualityMetric", "compute_panoptic_quality", "compute_mean_iou"]
 
 
 class PanopticQualityMetric(CumulativeIterationMetric):
@@ -55,6 +55,8 @@ class PanopticQualityMetric(CumulativeIterationMetric):
             If set `match_iou_threshold` < 0.5, this function uses Munkres assignment to find the
             maximal amount of unique pairing.
         smooth_numerator: a small constant added to the numerator to avoid zero.
+        return_confusion_matrix: if True, returns raw confusion matrix values (tp, fp, fn, iou_sum)
+            instead of computed metrics. Default is False.
 
     """
 
@@ -65,19 +67,22 @@ def __init__(
         reduction: MetricReduction | str = MetricReduction.MEAN_BATCH,
         match_iou_threshold: float = 0.5,
         smooth_numerator: float = 1e-6,
+        return_confusion_matrix: bool = False,
     ) -> None:
         super().__init__()
         self.num_classes = num_classes
         self.reduction = reduction
         self.match_iou_threshold = match_iou_threshold
         self.smooth_numerator = smooth_numerator
         self.metric_name = ensure_tuple(metric_name)
+        self.return_confusion_matrix = return_confusion_matrix
 
     def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
         """
         Args:
-            y_pred: Predictions. It must be in the form of B2HW and have integer type. The first channel and the
-                second channel represent the instance predictions and classification predictions respectively.
+            y_pred: Predictions. It must be in the form of B2HW (2D) or B2HWD (3D) and have integer type.
+                The first channel and the second channel represent the instance predictions and classification
+                predictions respectively.
             y: ground truth. It must have the same shape as `y_pred` and have integer type. The first channel and the
                 second channel represent the instance labels and classification labels respectively.
                 Values in the second channel of `y_pred` and `y` should be in the range of 0 to `self.num_classes`,
@@ -86,7 +91,7 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor
         Raises:
             ValueError: when `y_pred` and `y` have different shapes.
             ValueError: when `y_pred` and `y` have != 2 channels.
-            ValueError: when `y_pred` and `y` have != 4 dimensions.
+            ValueError: when `y_pred` and `y` have != 4 or 5 dimensions.
 
         """
         if y_pred.shape != y.shape:
@@ -98,8 +103,10 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor
             )
 
         dims = y_pred.ndimension()
-        if dims != 4:
-            raise ValueError(f"y_pred should have 4 dimensions (batch, 2, h, w), got {dims}.")
+        if dims not in (4, 5):
+            raise ValueError(
+                f"y_pred should have 4 dimensions (batch, 2, h, w) or 5 dimensions (batch, 2, h, w, d), got {dims}."
+            )
 
         batch_size = y_pred.shape[0]
 
@@ -131,13 +138,22 @@ def aggregate(self, reduction: MetricReduction | str | None = None) -> torch.Ten
                 available reduction modes: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
                 ``"mean_channel"``, ``"sum_channel"``}, default to `self.reduction`. if "none", will not do reduction.
 
+        Returns:
+            If `return_confusion_matrix` is True, returns the raw confusion matrix [tp, fp, fn, iou_sum].
+            Otherwise, returns the computed metric(s) based on `metric_name`.
+
         """
         data = self.get_buffer()
         if not isinstance(data, torch.Tensor):
             raise ValueError("the data to aggregate must be PyTorch Tensor.")
 
         # do metric reduction
         f, _ = do_metric_reduction(data, reduction or self.reduction)
+
+        if self.return_confusion_matrix:
+            # Return raw confusion matrix values
+            return f
+
         tp, fp, fn, iou_sum = f[..., 0], f[..., 1], f[..., 2], f[..., 3]
         results = []
         for metric_name in self.metric_name:
@@ -169,7 +185,7 @@ def compute_panoptic_quality(
     calculate PQ, and returning them directly enables further calculation over all images.
 
     Args:
-        pred: input data to compute, it must be in the form of HW and have integer type.
+        pred: input data to compute, it must be in the form of HW (2D) or HWD (3D) and have integer type.
         gt: ground truth. It must have the same shape as `pred` and have integer type.
         metric_name: output metric. The value can be "pq", "sq" or "rq".
         remap: whether to remap `pred` and `gt` to ensure contiguous ordering of instance id.
@@ -294,3 +310,24 @@ def _check_panoptic_metric_name(metric_name: str) -> str:
     if metric_name in ["recognition_quality", "rq"]:
         return "rq"
     raise ValueError(f"metric name: {metric_name} is wrong, please use 'pq', 'sq' or 'rq'.")
+
+
+def compute_mean_iou(confusion_matrix: torch.Tensor, smooth_numerator: float = 1e-6) -> torch.Tensor:
+    """Compute mean IoU from confusion matrix values.
+
+    Args:
+        confusion_matrix: tensor with shape (..., 4) where the last dimension contains
+            [tp, fp, fn, iou_sum] as returned by `compute_panoptic_quality` with `output_confusion_matrix=True`.
+        smooth_numerator: a small constant added to the numerator to avoid zero.
+
+    Returns:
+        Mean IoU computed as iou_sum / (tp + smooth_numerator).
+
+    """
+    if confusion_matrix.shape[-1] != 4:
+        raise ValueError(
+            f"confusion_matrix should have shape (..., 4) with [tp, fp, fn, iou_sum], "
+            f"got shape {confusion_matrix.shape}."
+        )
+    tp, iou_sum = confusion_matrix[..., 0], confusion_matrix[..., 3]
+    return iou_sum / (tp + smooth_numerator)
@@ -91,23 +91,27 @@ def pad_nd(
             https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         kwargs: other arguments for the `np.pad` or `torch.pad` function.
             note that `np.pad` treats channel dimension as the first dimension.
+    Raises:
+        ValueError: If `value` is provided when `mode` is not ``"constant"``.
     """
+    if mode != "constant" and "value" in kwargs:
+        raise ValueError("'value' argument is only valid when mode='constant'")
     if mode in {"linear_ramp", "maximum", "mean", "median", "minimum", "symmetric", "empty"}:
         return _np_pad(img, pad_width=to_pad, mode=mode, **kwargs)
     try:
         _pad = _np_pad
-        if mode in {"constant", "reflect", "edge", "replicate", "wrap", "circular"} and img.dtype not in {
-            torch.int16,
-            torch.int64,
-            torch.bool,
-            torch.uint8,
-        }:
+        if mode in {"constant", "reflect", "edge", "replicate", "wrap", "circular"}:
+            # Try PyTorch pad for these modes; fallback to NumPy on error.
             _pad = _pt_pad
         return _pad(img, pad_width=to_pad, mode=mode, **kwargs)
+    except NotImplementedError:
+        # PyTorch does not support this combination, fall back to NumPy
+        return _np_pad(img, pad_width=to_pad, mode=mode, **kwargs)
     except (ValueError, TypeError, RuntimeError) as err:
-        if isinstance(err, NotImplementedError) or any(
-            k in str(err) for k in ("supported", "unexpected keyword", "implemented", "value")
-        ):
+        # PyTorch may raise generic errors for unsupported modes/dtypes or kwargs.
+        # Since there are no stable exception types for these cases, we fall back
+        # to NumPy by matching known error message patterns.
+        if any(k in str(err) for k in ("supported", "unexpected keyword", "implemented", "value")):
             return _np_pad(img, pad_width=to_pad, mode=mode, **kwargs)
         raise ValueError(
             f"{img.shape} {to_pad} {mode} {kwargs} {img.dtype} {img.device if isinstance(img, torch.Tensor) else None}"
 
@@ -22,7 +22,7 @@
 from monai import transforms
 from monai.data.meta_obj import MetaObj, get_track_meta
 from monai.data.meta_tensor import MetaTensor
-from monai.data.utils import to_affine_nd
+from monai.data.utils import affine_to_spacing, to_affine_nd
 from monai.transforms.traits import InvertibleTrait
 from monai.transforms.transform import Transform
 from monai.utils import (
@@ -224,6 +224,9 @@ def track_transform_meta(
                 else:
                     raise
             out_obj.meta[MetaKeys.AFFINE] = convert_to_tensor(affine, device=torch.device("cpu"), dtype=torch.float64)
+            if MetaKeys.PIXDIM in out_obj.meta:
+                spacing = affine_to_spacing(out_obj.meta[MetaKeys.AFFINE])
+                out_obj.meta[MetaKeys.PIXDIM][1 : 1 + len(spacing)] = spacing
 
         if not (get_track_meta() and transform_info and transform_info.get(TraceKeys.TRACING)):
             if isinstance(data, Mapping):
 
@@ -2436,6 +2436,13 @@ def __init__(
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
 
+        Note:
+            The affine transformations in MONAI use a 'backward mapping' (image-to-grid) logic.
+            This can be counter-intuitive:
+            - Translation: A positive value shifts the image in the negative direction.
+            - Scaling: Positive scale_range values decrease the image size; values in [-1, 0) increase it.
+            - Rotation: The direction (CW/CCW) may vary depending on the axis.
+
         """
         RandomizableTransform.__init__(self, prob)
         LazyTransform.__init__(self, lazy=lazy)