Project-MONAI
diff --git a/‎docs/source/handlers.rst‎
Lines changed: 6 additions & 0 deletions b/‎docs/source/handlers.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/source/metrics.rst‎
Lines changed: 9 additions & 0 deletions b/‎docs/source/metrics.rst‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎monai/apps/detection/transforms/dictionary.py‎
Lines changed: 5 additions & 2 deletions b/‎monai/apps/detection/transforms/dictionary.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎monai/apps/nnunet/nnunetv2_runner.py‎
Lines changed: 77 additions & 34 deletions b/‎monai/apps/nnunet/nnunetv2_runner.py‎
Lines changed: 77 additions & 34 deletions
diff --git a/‎monai/data/test_time_augmentation.py‎
Lines changed: 27 additions & 15 deletions b/‎monai/data/test_time_augmentation.py‎
Lines changed: 27 additions & 15 deletions
diff --git a/‎monai/handlers/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎monai/handlers/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -83,6 +83,12 @@ Panoptic Quality metrics handler
     :members:
 
 
+Calibration Error metrics handler
+---------------------------------
+.. autoclass:: CalibrationError
+    :members:
+
+
 Mean squared error metrics handler
 ----------------------------------
 .. autoclass:: MeanSquaredError
 
@@ -185,6 +185,15 @@ Metrics
 .. autoclass:: MetricsReloadedCategorical
     :members:
 
+`Calibration Error`
+-------------------
+.. autofunction:: calibration_binning
+
+.. autoclass:: CalibrationReduction
+    :members:
+
+.. autoclass:: CalibrationErrorMetric
+    :members:
 
 
 Utilities
 
@@ -1137,11 +1137,14 @@ def generate_fg_center_boxes_np(self, boxes: NdarrayOrTensor, image_size: Sequen
                 extended_boxes[:, axis] = boxes_start[:, axis] - self.spatial_size[axis] // 2 + 1
                 extended_boxes[:, axis + spatial_dims] = boxes_stop[:, axis] + self.spatial_size[axis] // 2 - 1
             else:
+                # the cropper will extend an additional pixel to the left side when the size is even
+                radius_left = self.spatial_size[axis] // 2
+                radius_right = self.spatial_size[axis] - radius_left - 1  # we subtract 1 for the center voxel
                 # extended box start
-                extended_boxes[:, axis] = boxes_stop[:, axis] - self.spatial_size[axis] // 2 - 1
+                extended_boxes[:, axis] = boxes_stop[:, axis] - radius_right
                 extended_boxes[:, axis] = np.minimum(extended_boxes[:, axis], boxes_start[:, axis])
                 # extended box stop
-                extended_boxes[:, axis + spatial_dims] = extended_boxes[:, axis] + self.spatial_size[axis] // 2
+                extended_boxes[:, axis + spatial_dims] = boxes_start[:, axis] + radius_left
                 extended_boxes[:, axis + spatial_dims] = np.maximum(
                     extended_boxes[:, axis + spatial_dims], boxes_stop[:, axis]
                 )
 
@@ -14,6 +14,7 @@
 
 import glob
 import os
+import shlex
 import subprocess
 from typing import Any
 
@@ -486,16 +487,16 @@ def plan_and_process(
         if not no_pp:
             self.preprocess(c, n_proc, overwrite_plans_name, verbose)
 
-    def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int = 0, **kwargs: Any) -> None:
+    def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int | str = 0, **kwargs: Any) -> None:
         """
         Run the training on a single GPU with one specified configuration provided.
-        Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`.
+        Note: if CUDA_VISIBLE_DEVICES is already set and gpu_id resolves to 0, the existing value is preserved;
+        otherwise it is set to gpu_id.
 
         Args:
             config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres".
             fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4.
-            gpu_id: an integer to select the device to use, or a tuple/list of GPU device indices used for multi-GPU
-                training (e.g., (0,1)). Default: 0.
+            gpu_id: an int, MIG UUID (str), or tuple/list of GPU indices for multi-GPU training (e.g., (0,1)). Default: 0.
             kwargs: this optional parameter allows you to specify additional arguments in
                 ``nnunetv2.run.run_training.run_training_entry``.
 
@@ -525,35 +526,70 @@ def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int
             kwargs.pop("npz")
             logger.warning("please specify the `export_validation_probabilities` in the __init__ of `nnUNetV2Runner`.")
 
-        cmd = self.train_single_model_command(config, fold, gpu_id, kwargs)
-        run_cmd(cmd, shell=True)
+        cmd, env = self.train_single_model_command(config, fold, gpu_id, kwargs)
+        run_cmd(cmd, env=env)
 
-    def train_single_model_command(self, config, fold, gpu_id, kwargs):
-        if isinstance(gpu_id, (tuple, list)):
+    def train_single_model_command(
+        self, config: str, fold: int, gpu_id: int | str | tuple | list, kwargs: dict[str, Any]
+    ) -> tuple[list[str], dict[str, str]]:
+        """
+        Build the shell command string for training a single nnU-Net model.
+
+        Args:
+            config: Configuration name (e.g., "3d_fullres").
+            fold: Cross-validation fold index (0-4).
+            gpu_id: Device selector—int, str (MIG UUID), or tuple/list for multi-GPU.
+            kwargs: Additional CLI arguments forwarded to nnUNetv2_train.
+
+        Returns:
+            Tuple of (cmd, env) where cmd is a list[str] of argv entries and env is a dict[str, str]
+            passed to the subprocess.
+
+        Raises:
+            ValueError: If gpu_id is an empty tuple or list.
+        """
+        env = os.environ.copy()
+        device_setting: str = "0"
+        num_gpus = 1
+        if isinstance(gpu_id, str):
+            device_setting = gpu_id
+            num_gpus = 1
+        elif isinstance(gpu_id, (tuple, list)):
+            if len(gpu_id) == 0:
+                raise ValueError("gpu_id tuple/list cannot be empty")
             if len(gpu_id) > 1:
-                gpu_ids_str = ""
-                for _i in range(len(gpu_id)):
-                    gpu_ids_str += f"{gpu_id[_i]},"
-                device_setting = f"CUDA_VISIBLE_DEVICES={gpu_ids_str[:-1]}"
-            else:
-                device_setting = f"CUDA_VISIBLE_DEVICES={gpu_id[0]}"
+                device_setting = ",".join(str(x) for x in gpu_id)
+                num_gpus = len(gpu_id)
+            elif len(gpu_id) == 1:
+                device_setting = str(gpu_id[0])
+                num_gpus = 1
         else:
-            device_setting = f"CUDA_VISIBLE_DEVICES={gpu_id}"
-        num_gpus = 1 if isinstance(gpu_id, int) or len(gpu_id) == 1 else len(gpu_id)
-
-        cmd = (
-            f"{device_setting} nnUNetv2_train "
-            + f"{self.dataset_name_or_id} {config} {fold} "
-            + f"-tr {self.trainer_class_name} -num_gpus {num_gpus}"
-        )
+            device_setting = str(gpu_id)
+            num_gpus = 1
+        env_cuda = env.get("CUDA_VISIBLE_DEVICES")
+        if env_cuda is not None and device_setting == "0":
+            logger.info(f"Using existing environment variable CUDA_VISIBLE_DEVICES='{env_cuda}'")
+        else:
+            env["CUDA_VISIBLE_DEVICES"] = device_setting
+
+        cmd = [
+            "nnUNetv2_train",
+            f"{self.dataset_name_or_id}",
+            f"{config}",
+            f"{fold}",
+            "-tr",
+            f"{self.trainer_class_name}",
+            "-num_gpus",
+            f"{num_gpus}",
+        ]
         if self.export_validation_probabilities:
-            cmd += " --npz"
+            cmd.append("--npz")
         for _key, _value in kwargs.items():
             if _key == "p" or _key == "pretrained_weights":
-                cmd += f" -{_key} {_value}"
+                cmd.extend([f"-{_key}", f"{_value}"])
             else:
-                cmd += f" --{_key} {_value}"
-        return cmd
+                cmd.extend([f"--{_key}", f"{_value}"])
+        return cmd, env
 
     def train(
         self,
@@ -637,8 +673,8 @@ def train_parallel_cmd(
                 if _config in ensure_tuple(configs):
                     for _i in range(self.num_folds):
                         the_device = gpu_id_for_all[_index % n_devices]  # type: ignore
-                        cmd = self.train_single_model_command(_config, _i, the_device, kwargs)
-                        all_cmds[-1][the_device].append(cmd)
+                        cmd, env = self.train_single_model_command(_config, _i, the_device, kwargs)
+                        all_cmds[-1][the_device].append((cmd, env))
                         _index += 1
         return all_cmds
 
@@ -666,19 +702,21 @@ def train_parallel(
             for gpu_id, gpu_cmd in cmds.items():
                 if not gpu_cmd:
                     continue
+                cmds_for_log = [shlex.join(cmd) for cmd, _ in gpu_cmd]
                 logger.info(
                     f"training - stage {s + 1}:\n"
-                    f"for gpu {gpu_id}, commands: {gpu_cmd}\n"
+                    f"for gpu {gpu_id}, commands: {cmds_for_log}\n"
                     f"log '.txt' inside '{os.path.join(self.nnunet_results, self.dataset_name)}'"
                 )
         for stage in all_cmds:
             processes = []
             for device_id in stage:
                 if not stage[device_id]:
                     continue
-                cmd_str = "; ".join(stage[device_id])
+                cmd_str = "; ".join(shlex.join(cmd) for cmd, _ in stage[device_id])
+                env = stage[device_id][0][1]
                 logger.info(f"Current running command on GPU device {device_id}:\n{cmd_str}\n")
-                processes.append(subprocess.Popen(cmd_str, shell=True, stdout=subprocess.DEVNULL))
+                processes.append(subprocess.Popen(cmd_str, shell=True, env=env, stdout=subprocess.DEVNULL))
             # finish this stage first
             for p in processes:
                 p.wait()
@@ -779,7 +817,7 @@ def predict(
         part_id: int = 0,
         num_processes_preprocessing: int = -1,
         num_processes_segmentation_export: int = -1,
-        gpu_id: int = 0,
+        gpu_id: int | str = 0,
     ) -> None:
         """
         Use this to run inference with nnU-Net. This function is used when you want to manually specify a folder containing
@@ -813,9 +851,14 @@ def predict(
             num_processes_preprocessing: out-of-RAM issues.
             num_processes_segmentation_export: Number of processes used for segmentation export.
                 More is not always better. Beware of out-of-RAM issues.
-            gpu_id: which GPU to use for prediction.
+            gpu_id: GPU device index (int) or MIG UUID (str) for prediction.
+                If CUDA_VISIBLE_DEVICES is already set and gpu_id is 0, the existing
+                environment variable is preserved.
         """
-        os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
+        if "CUDA_VISIBLE_DEVICES" in os.environ and gpu_id in {0, "0"}:
+            logger.info(f"Predict: Using existing CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}")
+        else:
+            os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
 
         from nnunetv2.inference.predict_from_raw_data import nnUNetPredictor
 
 
@@ -16,7 +16,6 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any
 
-import numpy as np
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
@@ -68,7 +67,7 @@ class TestTimeAugmentation:
     Args:
         transform: transform (or composed) to be applied to each realization. At least one transform must be of type
         `RandomizableTrait` (i.e. `Randomizable`, `RandomizableTransform`, or `RandomizableTrait`).
-            . All random transforms must be of type `InvertibleTransform`.
+        When `apply_inverse_to_pred` is True, all random transforms must be of type `InvertibleTransform`.
         batch_size: number of realizations to infer at once.
         num_workers: how many subprocesses to use for data.
         inferrer_fn: function to use to perform inference.
@@ -92,6 +91,11 @@ class TestTimeAugmentation:
             will return the full data. Dimensions will be same size as when passing a single image through
             `inferrer_fn`, with a dimension appended equal in size to `num_examples` (N), i.e., `[N,C,H,W,[D]]`.
         progress: whether to display a progress bar.
+        apply_inverse_to_pred: whether to apply inverse transformations to the predictions.
+            If the model's prediction is spatial (e.g. segmentation), this should be `True` to map the predictions
+            back to the original spatial reference.
+            If the prediction is non-spatial (e.g. classification label or score), this should be `False` to
+            aggregate the raw predictions directly. Defaults to `True`.
 
     Example:
         .. code-block:: python
@@ -125,6 +129,7 @@ def __init__(
         post_func: Callable = _identity,
         return_full_data: bool = False,
         progress: bool = True,
+        apply_inverse_to_pred: bool = True,
     ) -> None:
         self.transform = transform
         self.batch_size = batch_size
@@ -134,6 +139,7 @@ def __init__(
         self.image_key = image_key
         self.return_full_data = return_full_data
         self.progress = progress
+        self.apply_inverse_to_pred = apply_inverse_to_pred
         self._pred_key = CommonKeys.PRED
         self.inverter = Invertd(
             keys=self._pred_key,
@@ -152,20 +158,23 @@ def __init__(
 
     def _check_transforms(self):
         """Should be at least 1 random transform, and all random transforms should be invertible."""
-        ts = [self.transform] if not isinstance(self.transform, Compose) else self.transform.transforms
-        randoms = np.array([isinstance(t, Randomizable) for t in ts])
-        invertibles = np.array([isinstance(t, InvertibleTransform) for t in ts])
-        # check at least 1 random
-        if sum(randoms) == 0:
+        transforms = [self.transform] if not isinstance(self.transform, Compose) else self.transform.transforms
+        warns = []
+        randoms = []
+
+        for idx, t in enumerate(transforms):
+            if isinstance(t, Randomizable):
+                randoms.append(t)
+                if self.apply_inverse_to_pred and not isinstance(t, InvertibleTransform):
+                    warns.append(f"Transform #{idx} (type {type(t).__name__}) is random but not invertible.")
+
+        if len(randoms) == 0:
+            warns.append("TTA usually requires at least one `Randomizable` transform in the given transform sequence.")
+
+        if len(warns) > 0:
             warnings.warn(
-                "TTA usually has at least a `Randomizable` transform or `Compose` contains `Randomizable` transforms."
+                "TTA has encountered issues with the given transforms:\n  " + "\n  ".join(warns), stacklevel=2
             )
-        # check that whenever randoms is True, invertibles is also true
-        for r, i in zip(randoms, invertibles):
-            if r and not i:
-                warnings.warn(
-                    f"Not all applied random transform(s) are invertible. Problematic transform: {type(r).__name__}"
-                )
 
     def __call__(
         self, data: dict[str, Any], num_examples: int = 10
@@ -199,7 +208,10 @@ def __call__(
         for b in tqdm(dl) if has_tqdm and self.progress else dl:
             # do model forward pass
             b[self._pred_key] = self.inferrer_fn(b[self.image_key].to(self.device))
-            outs.extend([self.inverter(PadListDataCollate.inverse(i))[self._pred_key] for i in decollate_batch(b)])
+            if self.apply_inverse_to_pred:
+                outs.extend([self.inverter(PadListDataCollate.inverse(i))[self._pred_key] for i in decollate_batch(b)])
+            else:
+                outs.extend([i[self._pred_key] for i in decollate_batch(b)])
 
         output: NdarrayOrTensor = stack(outs, 0)
 
 
@@ -12,6 +12,7 @@
 from __future__ import annotations
 
 from .average_precision import AveragePrecision
+from .calibration import CalibrationError
 from .checkpoint_loader import CheckpointLoader
 from .checkpoint_saver import CheckpointSaver
 from .classification_saver import ClassificationSaver