Project-MONAI
diff --git a/‎.github/workflows/pythonapp.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pythonapp.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎MANIFEST.in‎
Lines changed: 2 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/losses.rst‎
Lines changed: 5 additions & 0 deletions b/‎docs/source/losses.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎monai/apps/auto3dseg/auto_runner.py‎
Lines changed: 1 addition & 1 deletion b/‎monai/apps/auto3dseg/auto_runner.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monai/apps/reconstruction/transforms/dictionary.py‎
Lines changed: 34 additions & 3 deletions b/‎monai/apps/reconstruction/transforms/dictionary.py‎
Lines changed: 34 additions & 3 deletions
diff --git a/‎monai/auto3dseg/analyzer.py‎
Lines changed: 22 additions & 8 deletions b/‎monai/auto3dseg/analyzer.py‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎monai/data/image_writer.py‎
Lines changed: 1 addition & 1 deletion b/‎monai/data/image_writer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monai/data/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎monai/data/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monai/engines/trainer.py‎
Lines changed: 37 additions & 6 deletions b/‎monai/engines/trainer.py‎
Lines changed: 37 additions & 6 deletions
diff --git a/‎monai/engines/utils.py‎
Lines changed: 2 additions & 2 deletions b/‎monai/engines/utils.py‎
Lines changed: 2 additions & 2 deletions
@@ -82,7 +82,7 @@ jobs:
         find  /opt/hostedtoolcache/* -maxdepth 0 ! -name 'Python' -exec rm -rf {} \;
     - name: Install the dependencies
       run: |
-        python -m pip install --user --upgrade pip wheel
+        python -m pip install --user --upgrade pip wheel pybind11
         python -m pip install torch==2.5.1 torchvision==0.20.1
         cat "requirements-dev.txt"
         python -m pip install --no-build-isolation -r requirements-dev.txt
 
@@ -3,3 +3,5 @@ include monai/_version.py
 
 include README.md
 include LICENSE
+
+prune tests
@@ -98,6 +98,11 @@ Segmentation Losses
 .. autoclass:: NACLLoss
     :members:
 
+`MCCLoss`
+~~~~~~~~~
+.. autoclass:: MCCLoss
+    :members:
+
 Registration Losses
 -------------------
 
 
@@ -229,7 +229,7 @@ def __init__(
             input = os.path.join(os.path.abspath(work_dir), "input.yaml")
             logger.info(f"Input config is not provided, using the default {input}")
 
-        self.data_src_cfg = dict()
+        self.data_src_cfg = {}
         if isinstance(input, dict):
             self.data_src_cfg = input
         elif isinstance(input, str) and os.path.isfile(input):
 
@@ -20,6 +20,7 @@
 from monai.apps.reconstruction.transforms.array import EquispacedKspaceMask, RandomKspaceMask
 from monai.config import DtypeLike, KeysCollection
 from monai.config.type_definitions import NdarrayOrTensor
+from monai.data.meta_tensor import MetaTensor
 from monai.transforms import InvertibleTransform
 from monai.transforms.croppad.array import SpatialCrop
 from monai.transforms.intensity.array import NormalizeIntensity
@@ -33,15 +34,36 @@ class ExtractDataKeyFromMetaKeyd(MapTransform):
     Moves keys from meta to data. It is useful when a dataset of paired samples
     is loaded and certain keys should be moved from meta to data.
 
+    This transform supports two modes:
+
+    1. When ``meta_key`` references a metadata dictionary in the data (e.g., when
+       ``image_only=False`` was used with ``LoadImaged``), the requested keys are
+       extracted directly from that dictionary.
+
+    2. When ``meta_key`` references a ``MetaTensor`` in the data (e.g., when
+       ``image_only=True`` was used with ``LoadImaged``), the requested keys are
+       extracted from its ``.meta`` attribute.
+
     Args:
         keys: keys to be transferred from meta to data
-        meta_key: the meta key where all the meta-data is stored
+        meta_key: the key in the data dictionary where the metadata source is
+            stored. This can be either a metadata dictionary or a ``MetaTensor``.
         allow_missing_keys: don't raise exception if key is missing
 
     Example:
         When the fastMRI dataset is loaded, "kspace" is stored in the data dictionary,
         but the ground-truth image with the key "reconstruction_rss" is stored in the meta data.
         In this case, ExtractDataKeyFromMetaKeyd moves "reconstruction_rss" to data.
+
+        When ``LoadImaged`` is used with ``image_only=True`` (the default), the loaded
+        data is a ``MetaTensor`` with metadata accessible via ``.meta``. In this case,
+        set ``meta_key`` to the key of the ``MetaTensor`` itself::
+
+            li = LoadImaged(keys="image")  # image_only=True by default
+            dat = li({"image": "image.nii"})
+            e = ExtractDataKeyFromMetaKeyd("filename_or_obj", meta_key="image")
+            dat = e(dat)
+            assert dat["image"].meta["filename_or_obj"] == dat["filename_or_obj"]
     """
 
     def __init__(self, keys: KeysCollection, meta_key: str, allow_missing_keys: bool = False) -> None:
@@ -58,9 +80,18 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> dict[Hashable, T
             the new data dictionary
         """
         d = dict(data)
+        meta_obj = d[self.meta_key]
+
+        # If meta_key references a MetaTensor, extract from its .meta attribute;
+        # otherwise treat it as a metadata dictionary directly.
+        if isinstance(meta_obj, MetaTensor):
+            meta_dict: dict = meta_obj.meta
+        else:
+            meta_dict = dict(meta_obj)
+
         for key in self.keys:
-            if key in d[self.meta_key]:
-                d[key] = d[self.meta_key][key]  # type: ignore
+            if key in meta_dict:
+                d[key] = meta_dict[key]  # type: ignore
             elif not self.allow_missing_keys:
                 raise KeyError(
                     f"Key `{key}` of transform `{self.__class__.__name__}` was missing in the meta data"
 
@@ -15,7 +15,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Hashable, Mapping
 from copy import deepcopy
-from typing import Any
+from typing import Any, cast
 
 import numpy as np
 import torch
@@ -468,21 +468,35 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
         """
         d: dict[Hashable, MetaTensor] = dict(data)
         start = time.time()
-        if isinstance(d[self.image_key], (torch.Tensor, MetaTensor)) and d[self.image_key].device.type == "cuda":
-            using_cuda = True
-        else:
-            using_cuda = False
+        image_tensor = d[self.image_key]
+        label_tensor = d[self.label_key]
+        # Check if either tensor is on CUDA to determine if we should move both to CUDA for processing
+        using_cuda = any(
+            isinstance(t, (torch.Tensor, MetaTensor)) and t.device.type == "cuda" for t in (image_tensor, label_tensor)
+        )
         restore_grad_state = torch.is_grad_enabled()
         torch.set_grad_enabled(False)
 
-        ndas: list[MetaTensor] = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]  # type: ignore
-        ndas_label: MetaTensor = d[self.label_key].astype(torch.int16)  # (H,W,D)
+        if isinstance(image_tensor, (MetaTensor, torch.Tensor)) and isinstance(
+            label_tensor, (MetaTensor, torch.Tensor)
+        ):
+            if label_tensor.device != image_tensor.device:
+                if using_cuda:
+                    # Move both tensors to CUDA when mixing devices
+                    cuda_device = image_tensor.device if image_tensor.device.type == "cuda" else label_tensor.device
+                    image_tensor = cast(MetaTensor, image_tensor.to(cuda_device))
+                    label_tensor = cast(MetaTensor, label_tensor.to(cuda_device))
+                else:
+                    label_tensor = cast(MetaTensor, label_tensor.to(image_tensor.device))
+
+        ndas: list[MetaTensor] = [image_tensor[i] for i in range(image_tensor.shape[0])]  # type: ignore
+        ndas_label: MetaTensor = label_tensor.astype(torch.int16)  # (H,W,D)
 
         if ndas_label.shape != ndas[0].shape:
             raise ValueError(f"Label shape {ndas_label.shape} is different from image shape {ndas[0].shape}")
 
         nda_foregrounds: list[torch.Tensor] = [get_foreground_label(nda, ndas_label) for nda in ndas]
-        nda_foregrounds = [nda if nda.numel() > 0 else torch.Tensor([0]) for nda in nda_foregrounds]
+        nda_foregrounds = [nda if nda.numel() > 0 else MetaTensor([0.0]) for nda in nda_foregrounds]
 
         unique_label = unique(ndas_label)
         if isinstance(ndas_label, (MetaTensor, torch.Tensor)):
 
@@ -324,7 +324,7 @@ def convert_to_channel_last(
                 data = data[..., 0, :]
         # if desired, remove trailing singleton dimensions
         while squeeze_end_dims and data.shape[-1] == 1:
-            data = np.squeeze(data, -1)
+            data = data.squeeze(-1)
         if contiguous:
             data = ascontiguousarray(data)
         return data
 
@@ -881,7 +881,7 @@ def compute_shape_offset(
             Default is False, using option 1 to compute the shape and offset.
 
     """
-    shape = np.array(spatial_shape, copy=True, dtype=float)
+    shape = np.array(tuple(spatial_shape), copy=True, dtype=float)
     sr = len(shape)
     in_affine_ = convert_data_type(to_affine_nd(sr, in_affine), np.ndarray)[0]
     out_affine_ = convert_data_type(to_affine_nd(sr, out_affine), np.ndarray)[0]
 
@@ -131,6 +131,12 @@ class SupervisedTrainer(Trainer):
             `torch.Tensor` before forward pass,  then converted back afterward with copied meta information.
         compile_kwargs: dict of the args for `torch.compile()` API, for more details:
             https://pytorch.org/docs/stable/generated/torch.compile.html#torch-compile.
+        accumulation_steps: number of mini-batches over which to accumulate gradients before
+            calling ``optimizer.step()``, effectively simulating a larger batch size on
+            memory-constrained hardware. Must be a positive integer. Default: 1 (no accumulation).
+            When ``epoch_length`` is known and not divisible by ``accumulation_steps``, a flush
+            (optimizer step) is performed at the end of each epoch so no gradients are silently
+            discarded. The loss stored in ``engine.state.output`` is always the **unscaled** value.
     """
 
     def __init__(
@@ -160,7 +166,10 @@ def __init__(
         amp_kwargs: dict | None = None,
         compile: bool = False,
         compile_kwargs: dict | None = None,
+        accumulation_steps: int = 1,
     ) -> None:
+        if accumulation_steps < 1:
+            raise ValueError(f"`accumulation_steps` must be a positive integer, got {accumulation_steps!r}.")
         super().__init__(
             device=device,
             max_epochs=max_epochs,
@@ -190,6 +199,7 @@ def __init__(
         self.loss_function = loss_function
         self.inferer = SimpleInferer() if inferer is None else inferer
         self.optim_set_to_none = optim_set_to_none
+        self.accumulation_steps = accumulation_steps
 
     def _iteration(self, engine: SupervisedTrainer, batchdata: dict[str, torch.Tensor]) -> dict:
         """
@@ -245,21 +255,42 @@ def _compute_pred_loss():
             engine.state.output[Keys.LOSS] = engine.loss_function(engine.state.output[Keys.PRED], targets).mean()
             engine.fire_event(IterationEvents.LOSS_COMPLETED)
 
+        # Determine gradient accumulation state
+        acc = engine.accumulation_steps
+        if acc > 1:
+            epoch_length = engine.state.epoch_length
+            if epoch_length is not None:
+                local_iter = (engine.state.iteration - 1) % epoch_length  # 0-indexed within epoch
+                should_zero_grad = local_iter % acc == 0
+                should_step = (local_iter + 1) % acc == 0 or (local_iter + 1) == epoch_length
+            else:
+                local_iter = engine.state.iteration - 1  # 0-indexed global
+                should_zero_grad = local_iter % acc == 0
+                should_step = (local_iter + 1) % acc == 0
+        else:
+            should_zero_grad = True
+            should_step = True
+
         engine.network.train()
-        engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
+        if should_zero_grad:
+            engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.scaler is not None:
             with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_pred_loss()
-            engine.scaler.scale(engine.state.output[Keys.LOSS]).backward()
+            loss = engine.state.output[Keys.LOSS]
+            engine.scaler.scale(loss / acc if acc > 1 else loss).backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
-            engine.scaler.step(engine.optimizer)
-            engine.scaler.update()
+            if should_step:
+                engine.scaler.step(engine.optimizer)
+                engine.scaler.update()
         else:
             _compute_pred_loss()
-            engine.state.output[Keys.LOSS].backward()
+            loss = engine.state.output[Keys.LOSS]
+            (loss / acc if acc > 1 else loss).backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
-            engine.optimizer.step()
+            if should_step:
+                engine.optimizer.step()
         # copy back meta info
         if self.compile:
             if inputs_meta is not None:
 
@@ -219,8 +219,8 @@ def __call__(
         `kwargs` supports other args for `Tensor.to()` API.
         """
         image, label = default_prepare_batch(batchdata, device, non_blocking, **kwargs)
-        args_ = list()
-        kwargs_ = dict()
+        args_ = []
+        kwargs_ = {}
 
         def _get_data(key: str) -> torch.Tensor:
             data = batchdata[key]