Project-MONAI
diff --git a/‎MANIFEST.in‎
Lines changed: 2 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎monai/auto3dseg/analyzer.py‎
Lines changed: 33 additions & 34 deletions b/‎monai/auto3dseg/analyzer.py‎
Lines changed: 33 additions & 34 deletions
diff --git a/‎monai/losses/image_dissimilarity.py‎
Lines changed: 6 additions & 6 deletions b/‎monai/losses/image_dissimilarity.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎monai/losses/spectral_loss.py‎
Lines changed: 2 additions & 2 deletions b/‎monai/losses/spectral_loss.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎monai/losses/ssim_loss.py‎
Lines changed: 3 additions & 3 deletions b/‎monai/losses/ssim_loss.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎monai/networks/layers/filtering.py‎
Lines changed: 32 additions & 17 deletions b/‎monai/networks/layers/filtering.py‎
Lines changed: 32 additions & 17 deletions
diff --git a/‎monai/networks/nets/autoencoderkl.py‎
Lines changed: 34 additions & 7 deletions b/‎monai/networks/nets/autoencoderkl.py‎
Lines changed: 34 additions & 7 deletions
@@ -3,3 +3,5 @@ include monai/_version.py
 
 include README.md
 include LICENSE
+
+prune tests
@@ -216,50 +216,58 @@ def __init__(self, image_key: str, stats_name: str = DataStatsKeys.IMAGE_STATS)
         super().__init__(stats_name, report_format)
         self.update_ops(ImageStatsKeys.INTENSITY, SampleOperations())
 
+    @torch.no_grad()
     def __call__(self, data):
-        # Input Validation Addition
-        if not isinstance(data, dict):
-            raise TypeError(f"Input data must be a dict, but got {type(data).__name__}.")
-        if self.image_key not in data:
-            raise KeyError(f"Key '{self.image_key}' not found in input data.")
-        image = data[self.image_key]
-        if not isinstance(image, (np.ndarray, torch.Tensor, MetaTensor)):
-            raise TypeError(
-                f"Value for '{self.image_key}' must be a numpy array, torch.Tensor, or MetaTensor, "
-                f"but got {type(image).__name__}."
-            )
-        if image.ndim < 3:
-            raise ValueError(
-                f"Image data under '{self.image_key}' must have at least 3 dimensions, but got shape {image.shape}."
-            )
-            # --- End of validation ---
         """
-        Callable to execute the pre-defined functions
+        Callable to execute the pre-defined functions.
 
         Returns:
             A dictionary. The dict has the key in self.report_format. The value of
             ImageStatsKeys.INTENSITY is in a list format. Each element of the value list
             has stats pre-defined by SampleOperations (max, min, ....).
 
         Raises:
-            RuntimeError if the stats report generated is not consistent with the pre-
+            KeyError: if ``self.image_key`` is not present in the input data.
+            TypeError: if the input data is not a dictionary, or if the image value is
+                not a numpy array, torch.Tensor, or MetaTensor.
+            ValueError: if the image has fewer than 3 dimensions, or if pre-computed
+                ``nda_croppeds`` is not a list/tuple with one entry per image channel.
+            RuntimeError: if the stats report generated is not consistent with the pre-
                 defined report_format.
 
         Note:
             The stats operation uses numpy and torch to compute max, min, and other
             functions. If the input has nan/inf, the stats results will be nan/inf.
 
         """
+        if not isinstance(data, dict):
+            raise TypeError(f"Input data must be a dict, but got {type(data).__name__}.")
+        if self.image_key not in data:
+            raise KeyError(f"Key '{self.image_key}' not found in input data.")
+        image = data[self.image_key]
+        if not isinstance(image, (np.ndarray, torch.Tensor, MetaTensor)):
+            raise TypeError(
+                f"Value for '{self.image_key}' must be a numpy array, torch.Tensor, or MetaTensor, "
+                f"but got {type(image).__name__}."
+            )
+        if image.ndim < 3:
+            raise ValueError(
+                f"Image data under '{self.image_key}' must have at least 3 dimensions, but got shape {image.shape}."
+            )
+
         d = dict(data)
         start = time.time()
-        restore_grad_state = torch.is_grad_enabled()
-        torch.set_grad_enabled(False)
-
         ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
-        if "nda_croppeds" not in d:
+        if "nda_croppeds" in d:
+            nda_croppeds = d["nda_croppeds"]
+            if not isinstance(nda_croppeds, (list, tuple)) or len(nda_croppeds) != len(ndas):
+                raise ValueError(
+                    "Pre-computed 'nda_croppeds' must be a list or tuple with one entry per image channel "
+                    f"(expected {len(ndas)})."
+                )
+        else:
             nda_croppeds = [get_foreground_image(nda) for nda in ndas]
 
-        # perform calculation
         report = deepcopy(self.get_report_format())
 
         report[ImageStatsKeys.SHAPE] = [list(nda.shape) for nda in ndas]
@@ -284,7 +292,6 @@ def __call__(self, data):
 
         d[self.stats_name] = report
 
-        torch.set_grad_enabled(restore_grad_state)
         logger.debug(f"Get image stats spent {time.time() - start}")
         return d
 
@@ -321,6 +328,7 @@ def __init__(self, image_key: str, label_key: str, stats_name: str = DataStatsKe
         super().__init__(stats_name, report_format)
         self.update_ops(ImageStatsKeys.INTENSITY, SampleOperations())
 
+    @torch.no_grad()
     def __call__(self, data: Mapping) -> dict:
         """
         Callable to execute the pre-defined functions
@@ -341,9 +349,6 @@ def __call__(self, data: Mapping) -> dict:
 
         d = dict(data)
         start = time.time()
-        restore_grad_state = torch.is_grad_enabled()
-        torch.set_grad_enabled(False)
-
         ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
         ndas_label = d[self.label_key]  # (H,W,D)
 
@@ -353,7 +358,6 @@ def __call__(self, data: Mapping) -> dict:
         nda_foregrounds = [get_foreground_label(nda, ndas_label) for nda in ndas]
         nda_foregrounds = [nda if nda.numel() > 0 else MetaTensor([0.0]) for nda in nda_foregrounds]
 
-        # perform calculation
         report = deepcopy(self.get_report_format())
 
         report[ImageStatsKeys.INTENSITY] = [
@@ -365,7 +369,6 @@ def __call__(self, data: Mapping) -> dict:
 
         d[self.stats_name] = report
 
-        torch.set_grad_enabled(restore_grad_state)
         logger.debug(f"Get foreground image stats spent {time.time() - start}")
         return d
 
@@ -418,6 +421,7 @@ def __init__(
         id_seq = ID_SEP_KEY.join([LabelStatsKeys.LABEL, "0", LabelStatsKeys.IMAGE_INTST])
         self.update_ops_nested_label(id_seq, SampleOperations())
 
+    @torch.no_grad()
     def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTensor | dict]:
         """
         Callable to execute the pre-defined functions.
@@ -470,19 +474,15 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
         start = time.time()
         image_tensor = d[self.image_key]
         label_tensor = d[self.label_key]
-        # Check if either tensor is on CUDA to determine if we should move both to CUDA for processing
         using_cuda = any(
             isinstance(t, (torch.Tensor, MetaTensor)) and t.device.type == "cuda" for t in (image_tensor, label_tensor)
         )
-        restore_grad_state = torch.is_grad_enabled()
-        torch.set_grad_enabled(False)
 
         if isinstance(image_tensor, (MetaTensor, torch.Tensor)) and isinstance(
             label_tensor, (MetaTensor, torch.Tensor)
         ):
             if label_tensor.device != image_tensor.device:
                 if using_cuda:
-                    # Move both tensors to CUDA when mixing devices
                     cuda_device = image_tensor.device if image_tensor.device.type == "cuda" else label_tensor.device
                     image_tensor = cast(MetaTensor, image_tensor.to(cuda_device))
                     label_tensor = cast(MetaTensor, label_tensor.to(cuda_device))
@@ -548,7 +548,6 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
 
         d[self.stats_name] = report  # type: ignore[assignment]
 
-        torch.set_grad_enabled(restore_grad_state)
         logger.debug(f"Get label stats spent {time.time() - start}")
         return d  # type: ignore[return-value]
 
 
@@ -15,7 +15,7 @@
 from torch.nn import functional as F
 from torch.nn.modules.loss import _Loss
 
-from monai.networks.layers import gaussian_1d, separable_filtering
+from monai.networks.layers import separable_filtering
 from monai.utils import LossReduction
 from monai.utils.module import look_up_option
 
@@ -34,11 +34,11 @@ def make_triangular_kernel(kernel_size: int) -> torch.Tensor:
 
 
 def make_gaussian_kernel(kernel_size: int) -> torch.Tensor:
-    sigma = torch.tensor(kernel_size / 3.0)
-    kernel = gaussian_1d(sigma=sigma, truncated=kernel_size // 2, approx="sampled", normalize=False) * (
-        2.5066282 * sigma
-    )
-    return kernel[:kernel_size]
+    sigma = kernel_size / 3.0
+    half = kernel_size // 2
+    x = torch.arange(-half, half + 1, dtype=torch.float)
+    kernel = torch.exp(-0.5 / (sigma * sigma) * x**2)
+    return kernel
 
 
 kernel_dict = {
 
@@ -55,8 +55,8 @@ def __init__(
         self.fft_norm = fft_norm
 
     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        input_amplitude = self._get_fft_amplitude(target)
-        target_amplitude = self._get_fft_amplitude(input)
+        input_amplitude = self._get_fft_amplitude(input)
+        target_amplitude = self._get_fft_amplitude(target)
 
         # Compute distance between amplitude of frequency components
         # See Section 3.3 from https://arxiv.org/abs/2005.00341
 
@@ -111,17 +111,17 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
                 # 2D data
                 x = torch.ones([1,1,10,10])/2
                 y = torch.ones([1,1,10,10])/2
-                print(1-SSIMLoss(spatial_dims=2)(x,y))
+                print(SSIMLoss(spatial_dims=2)(x,y))
 
                 # pseudo-3D data
                 x = torch.ones([1,5,10,10])/2  # 5 could represent number of slices
                 y = torch.ones([1,5,10,10])/2
-                print(1-SSIMLoss(spatial_dims=2)(x,y))
+                print(SSIMLoss(spatial_dims=2)(x,y))
 
                 # 3D data
                 x = torch.ones([1,1,10,10,10])/2
                 y = torch.ones([1,1,10,10,10])/2
-                print(1-SSIMLoss(spatial_dims=3)(x,y))
+                print(SSIMLoss(spatial_dims=3)(x,y))
         """
         ssim_value = self.ssim_metric._compute_tensor(input, target).view(-1, 1)
         loss: torch.Tensor = 1 - ssim_value
 
@@ -221,7 +221,8 @@ def __init__(self, spatial_sigma, color_sigma):
             self.len_spatial_sigma = 3
         else:
             raise ValueError(
-                f"len(spatial_sigma) {spatial_sigma} must match number of spatial dims {self.ken_spatial_sigma}."
+                f"Length of `spatial_sigma` must match number of spatial dims (1, 2 or 3)"
+                f"or be a single float value ({spatial_sigma=})."
             )
 
         # Register sigmas as trainable parameters.
@@ -231,6 +232,10 @@ def __init__(self, spatial_sigma, color_sigma):
         self.sigma_color = torch.nn.Parameter(torch.tensor(color_sigma))
 
     def forward(self, input_tensor):
+        if len(input_tensor.shape) < 3:
+            raise ValueError(
+                f"Input must have at least 3 dimensions (batch, channel, *spatial_dims), got {len(input_tensor.shape)}"
+            )
         if input_tensor.shape[1] != 1:
             raise ValueError(
                 f"Currently channel dimensions >1 ({input_tensor.shape[1]}) are not supported. "
@@ -239,24 +244,27 @@ def forward(self, input_tensor):
             )
 
         len_input = len(input_tensor.shape)
+        spatial_dims = len_input - 2
 
         # C++ extension so far only supports 5-dim inputs.
-        if len_input == 3:
+        if spatial_dims == 1:
             input_tensor = input_tensor.unsqueeze(3).unsqueeze(4)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             input_tensor = input_tensor.unsqueeze(4)
 
-        if self.len_spatial_sigma != len_input:
-            raise ValueError(f"Spatial dimension ({len_input}) must match initialized len(spatial_sigma).")
+        if self.len_spatial_sigma != spatial_dims:
+            raise ValueError(
+                f"Number of spatial dimensions ({spatial_dims}) must match initialized `len(spatial_sigma)`."
+            )
 
         prediction = TrainableBilateralFilterFunction.apply(
             input_tensor, self.sigma_x, self.sigma_y, self.sigma_z, self.sigma_color
         )
 
         # Make sure to return tensor of the same shape as the input.
-        if len_input == 3:
+        if spatial_dims == 1:
             prediction = prediction.squeeze(4).squeeze(3)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             prediction = prediction.squeeze(4)
 
         return prediction
@@ -389,7 +397,8 @@ def __init__(self, spatial_sigma, color_sigma):
             self.len_spatial_sigma = 3
         else:
             raise ValueError(
-                f"len(spatial_sigma) {spatial_sigma} must match number of spatial dims {self.ken_spatial_sigma}."
+                f"Length of `spatial_sigma` must match number of spatial dims (1, 2 or 3)\n"
+                f"or be a single float value ({spatial_sigma=})."
             )
 
         # Register sigmas as trainable parameters.
@@ -399,39 +408,45 @@ def __init__(self, spatial_sigma, color_sigma):
         self.sigma_color = torch.nn.Parameter(torch.tensor(color_sigma))
 
     def forward(self, input_tensor, guidance_tensor):
+        if len(input_tensor.shape) < 3:
+            raise ValueError(
+                f"Input must have at least 3 dimensions (batch, channel, *spatial_dims), got {len(input_tensor.shape)}"
+            )
         if input_tensor.shape[1] != 1:
             raise ValueError(
-                f"Currently channel dimensions >1 ({input_tensor.shape[1]}) are not supported. "
+                f"Currently channel dimensions > 1 ({input_tensor.shape[1]}) are not supported. "
                 "Please use multiple parallel filter layers if you want "
                 "to filter multiple channels."
             )
         if input_tensor.shape != guidance_tensor.shape:
             raise ValueError(
-                "Shape of input image must equal shape of guidance image."
-                f"Got {input_tensor.shape} and {guidance_tensor.shape}."
+                f"Shape of input image must equal shape of guidance image, got {input_tensor.shape} and {guidance_tensor.shape}."
             )
 
         len_input = len(input_tensor.shape)
+        spatial_dims = len_input - 2
 
         # C++ extension so far only supports 5-dim inputs.
-        if len_input == 3:
+        if spatial_dims == 1:
             input_tensor = input_tensor.unsqueeze(3).unsqueeze(4)
             guidance_tensor = guidance_tensor.unsqueeze(3).unsqueeze(4)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             input_tensor = input_tensor.unsqueeze(4)
             guidance_tensor = guidance_tensor.unsqueeze(4)
 
-        if self.len_spatial_sigma != len_input:
-            raise ValueError(f"Spatial dimension ({len_input}) must match initialized len(spatial_sigma).")
+        if self.len_spatial_sigma != spatial_dims:
+            raise ValueError(
+                f"Number of spatial dimensions ({spatial_dims}) must match initialized `len(spatial_sigma)`."
+            )
 
         prediction = TrainableJointBilateralFilterFunction.apply(
             input_tensor, guidance_tensor, self.sigma_x, self.sigma_y, self.sigma_z, self.sigma_color
         )
 
         # Make sure to return tensor of the same shape as the input.
-        if len_input == 3:
+        if spatial_dims == 1:
             prediction = prediction.squeeze(4).squeeze(3)
-        elif len_input == 4:
+        elif spatial_dims == 2:
             prediction = prediction.squeeze(4)
 
         return prediction
@@ -680,6 +680,7 @@ def load_old_state_dict(self, old_state_dict: dict, verbose=False) -> None:
 
         Args:
             old_state_dict: state dict from the old AutoencoderKL model.
+            verbose: if True, print diagnostic information about key mismatches.
         """
 
         new_state_dict = self.state_dict()
@@ -715,13 +716,39 @@ def load_old_state_dict(self, old_state_dict: dict, verbose=False) -> None:
             new_state_dict[f"{block}.attn.to_k.bias"] = old_state_dict.pop(f"{block}.to_k.bias")
             new_state_dict[f"{block}.attn.to_v.bias"] = old_state_dict.pop(f"{block}.to_v.bias")
 
-            # old version did not have a projection so set these to the identity
-            new_state_dict[f"{block}.attn.out_proj.weight"] = torch.eye(
-                new_state_dict[f"{block}.attn.out_proj.weight"].shape[0]
-            )
-            new_state_dict[f"{block}.attn.out_proj.bias"] = torch.zeros(
-                new_state_dict[f"{block}.attn.out_proj.bias"].shape
-            )
+            out_w = f"{block}.attn.out_proj.weight"
+            out_b = f"{block}.attn.out_proj.bias"
+            proj_w = f"{block}.proj_attn.weight"
+            proj_b = f"{block}.proj_attn.bias"
+
+            if out_w in new_state_dict:
+                if proj_w in old_state_dict:
+                    new_state_dict[out_w] = old_state_dict.pop(proj_w)
+                    if proj_b in old_state_dict:
+                        new_state_dict[out_b] = old_state_dict.pop(proj_b)
+                    else:
+                        new_state_dict[out_b] = torch.zeros(
+                            new_state_dict[out_b].shape,
+                            dtype=new_state_dict[out_b].dtype,
+                            device=new_state_dict[out_b].device,
+                        )
+                else:
+                    # No legacy proj_attn - initialize out_proj to identity/zero
+                    new_state_dict[out_w] = torch.eye(
+                        new_state_dict[out_w].shape[0],
+                        dtype=new_state_dict[out_w].dtype,
+                        device=new_state_dict[out_w].device,
+                    )
+                    new_state_dict[out_b] = torch.zeros(
+                        new_state_dict[out_b].shape,
+                        dtype=new_state_dict[out_b].dtype,
+                        device=new_state_dict[out_b].device,
+                    )
+            elif proj_w in old_state_dict:
+                # new model has no out_proj at all - discard the legacy keys so they
+                # don't surface as "unexpected keys" during load_state_dict
+                old_state_dict.pop(proj_w)
+                old_state_dict.pop(proj_b, None)
 
         # fix the upsample conv blocks which were renamed postconv
         for k in new_state_dict: