Skip to content

Commit 4caba01

Browse files
authored
Merge branch 'dev' into fix/lncc-register-buffer-kernel
2 parents 878ad32 + 7f6b7e5 commit 4caba01

26 files changed

Lines changed: 600 additions & 122 deletions

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ include monai/_version.py
33

44
include README.md
55
include LICENSE
6+
7+
prune tests

monai/auto3dseg/analyzer.py

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -216,50 +216,58 @@ def __init__(self, image_key: str, stats_name: str = DataStatsKeys.IMAGE_STATS)
216216
super().__init__(stats_name, report_format)
217217
self.update_ops(ImageStatsKeys.INTENSITY, SampleOperations())
218218

219+
@torch.no_grad()
219220
def __call__(self, data):
220-
# Input Validation Addition
221-
if not isinstance(data, dict):
222-
raise TypeError(f"Input data must be a dict, but got {type(data).__name__}.")
223-
if self.image_key not in data:
224-
raise KeyError(f"Key '{self.image_key}' not found in input data.")
225-
image = data[self.image_key]
226-
if not isinstance(image, (np.ndarray, torch.Tensor, MetaTensor)):
227-
raise TypeError(
228-
f"Value for '{self.image_key}' must be a numpy array, torch.Tensor, or MetaTensor, "
229-
f"but got {type(image).__name__}."
230-
)
231-
if image.ndim < 3:
232-
raise ValueError(
233-
f"Image data under '{self.image_key}' must have at least 3 dimensions, but got shape {image.shape}."
234-
)
235-
# --- End of validation ---
236221
"""
237-
Callable to execute the pre-defined functions
222+
Callable to execute the pre-defined functions.
238223
239224
Returns:
240225
A dictionary. The dict has the key in self.report_format. The value of
241226
ImageStatsKeys.INTENSITY is in a list format. Each element of the value list
242227
has stats pre-defined by SampleOperations (max, min, ....).
243228
244229
Raises:
245-
RuntimeError if the stats report generated is not consistent with the pre-
230+
KeyError: if ``self.image_key`` is not present in the input data.
231+
TypeError: if the input data is not a dictionary, or if the image value is
232+
not a numpy array, torch.Tensor, or MetaTensor.
233+
ValueError: if the image has fewer than 3 dimensions, or if pre-computed
234+
``nda_croppeds`` is not a list/tuple with one entry per image channel.
235+
RuntimeError: if the stats report generated is not consistent with the pre-
246236
defined report_format.
247237
248238
Note:
249239
The stats operation uses numpy and torch to compute max, min, and other
250240
functions. If the input has nan/inf, the stats results will be nan/inf.
251241
252242
"""
243+
if not isinstance(data, dict):
244+
raise TypeError(f"Input data must be a dict, but got {type(data).__name__}.")
245+
if self.image_key not in data:
246+
raise KeyError(f"Key '{self.image_key}' not found in input data.")
247+
image = data[self.image_key]
248+
if not isinstance(image, (np.ndarray, torch.Tensor, MetaTensor)):
249+
raise TypeError(
250+
f"Value for '{self.image_key}' must be a numpy array, torch.Tensor, or MetaTensor, "
251+
f"but got {type(image).__name__}."
252+
)
253+
if image.ndim < 3:
254+
raise ValueError(
255+
f"Image data under '{self.image_key}' must have at least 3 dimensions, but got shape {image.shape}."
256+
)
257+
253258
d = dict(data)
254259
start = time.time()
255-
restore_grad_state = torch.is_grad_enabled()
256-
torch.set_grad_enabled(False)
257-
258260
ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
259-
if "nda_croppeds" not in d:
261+
if "nda_croppeds" in d:
262+
nda_croppeds = d["nda_croppeds"]
263+
if not isinstance(nda_croppeds, (list, tuple)) or len(nda_croppeds) != len(ndas):
264+
raise ValueError(
265+
"Pre-computed 'nda_croppeds' must be a list or tuple with one entry per image channel "
266+
f"(expected {len(ndas)})."
267+
)
268+
else:
260269
nda_croppeds = [get_foreground_image(nda) for nda in ndas]
261270

262-
# perform calculation
263271
report = deepcopy(self.get_report_format())
264272

265273
report[ImageStatsKeys.SHAPE] = [list(nda.shape) for nda in ndas]
@@ -284,7 +292,6 @@ def __call__(self, data):
284292

285293
d[self.stats_name] = report
286294

287-
torch.set_grad_enabled(restore_grad_state)
288295
logger.debug(f"Get image stats spent {time.time() - start}")
289296
return d
290297

@@ -321,6 +328,7 @@ def __init__(self, image_key: str, label_key: str, stats_name: str = DataStatsKe
321328
super().__init__(stats_name, report_format)
322329
self.update_ops(ImageStatsKeys.INTENSITY, SampleOperations())
323330

331+
@torch.no_grad()
324332
def __call__(self, data: Mapping) -> dict:
325333
"""
326334
Callable to execute the pre-defined functions
@@ -341,9 +349,6 @@ def __call__(self, data: Mapping) -> dict:
341349

342350
d = dict(data)
343351
start = time.time()
344-
restore_grad_state = torch.is_grad_enabled()
345-
torch.set_grad_enabled(False)
346-
347352
ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
348353
ndas_label = d[self.label_key] # (H,W,D)
349354

@@ -353,7 +358,6 @@ def __call__(self, data: Mapping) -> dict:
353358
nda_foregrounds = [get_foreground_label(nda, ndas_label) for nda in ndas]
354359
nda_foregrounds = [nda if nda.numel() > 0 else MetaTensor([0.0]) for nda in nda_foregrounds]
355360

356-
# perform calculation
357361
report = deepcopy(self.get_report_format())
358362

359363
report[ImageStatsKeys.INTENSITY] = [
@@ -365,7 +369,6 @@ def __call__(self, data: Mapping) -> dict:
365369

366370
d[self.stats_name] = report
367371

368-
torch.set_grad_enabled(restore_grad_state)
369372
logger.debug(f"Get foreground image stats spent {time.time() - start}")
370373
return d
371374

@@ -418,6 +421,7 @@ def __init__(
418421
id_seq = ID_SEP_KEY.join([LabelStatsKeys.LABEL, "0", LabelStatsKeys.IMAGE_INTST])
419422
self.update_ops_nested_label(id_seq, SampleOperations())
420423

424+
@torch.no_grad()
421425
def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTensor | dict]:
422426
"""
423427
Callable to execute the pre-defined functions.
@@ -470,19 +474,15 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
470474
start = time.time()
471475
image_tensor = d[self.image_key]
472476
label_tensor = d[self.label_key]
473-
# Check if either tensor is on CUDA to determine if we should move both to CUDA for processing
474477
using_cuda = any(
475478
isinstance(t, (torch.Tensor, MetaTensor)) and t.device.type == "cuda" for t in (image_tensor, label_tensor)
476479
)
477-
restore_grad_state = torch.is_grad_enabled()
478-
torch.set_grad_enabled(False)
479480

480481
if isinstance(image_tensor, (MetaTensor, torch.Tensor)) and isinstance(
481482
label_tensor, (MetaTensor, torch.Tensor)
482483
):
483484
if label_tensor.device != image_tensor.device:
484485
if using_cuda:
485-
# Move both tensors to CUDA when mixing devices
486486
cuda_device = image_tensor.device if image_tensor.device.type == "cuda" else label_tensor.device
487487
image_tensor = cast(MetaTensor, image_tensor.to(cuda_device))
488488
label_tensor = cast(MetaTensor, label_tensor.to(cuda_device))
@@ -548,7 +548,6 @@ def __call__(self, data: Mapping[Hashable, MetaTensor]) -> dict[Hashable, MetaTe
548548

549549
d[self.stats_name] = report # type: ignore[assignment]
550550

551-
torch.set_grad_enabled(restore_grad_state)
552551
logger.debug(f"Get label stats spent {time.time() - start}")
553552
return d # type: ignore[return-value]
554553

monai/losses/image_dissimilarity.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from torch.nn import functional as F
1616
from torch.nn.modules.loss import _Loss
1717

18-
from monai.networks.layers import gaussian_1d, separable_filtering
18+
from monai.networks.layers import separable_filtering
1919
from monai.utils import LossReduction
2020
from monai.utils.module import look_up_option
2121

@@ -34,11 +34,11 @@ def make_triangular_kernel(kernel_size: int) -> torch.Tensor:
3434

3535

3636
def make_gaussian_kernel(kernel_size: int) -> torch.Tensor:
37-
sigma = torch.tensor(kernel_size / 3.0)
38-
kernel = gaussian_1d(sigma=sigma, truncated=kernel_size // 2, approx="sampled", normalize=False) * (
39-
2.5066282 * sigma
40-
)
41-
return kernel[:kernel_size]
37+
sigma = kernel_size / 3.0
38+
half = kernel_size // 2
39+
x = torch.arange(-half, half + 1, dtype=torch.float)
40+
kernel = torch.exp(-0.5 / (sigma * sigma) * x**2)
41+
return kernel
4242

4343

4444
kernel_dict = {

monai/losses/spectral_loss.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ def __init__(
5555
self.fft_norm = fft_norm
5656

5757
def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
58-
input_amplitude = self._get_fft_amplitude(target)
59-
target_amplitude = self._get_fft_amplitude(input)
58+
input_amplitude = self._get_fft_amplitude(input)
59+
target_amplitude = self._get_fft_amplitude(target)
6060

6161
# Compute distance between amplitude of frequency components
6262
# See Section 3.3 from https://arxiv.org/abs/2005.00341

monai/losses/ssim_loss.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,17 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
111111
# 2D data
112112
x = torch.ones([1,1,10,10])/2
113113
y = torch.ones([1,1,10,10])/2
114-
print(1-SSIMLoss(spatial_dims=2)(x,y))
114+
print(SSIMLoss(spatial_dims=2)(x,y))
115115
116116
# pseudo-3D data
117117
x = torch.ones([1,5,10,10])/2 # 5 could represent number of slices
118118
y = torch.ones([1,5,10,10])/2
119-
print(1-SSIMLoss(spatial_dims=2)(x,y))
119+
print(SSIMLoss(spatial_dims=2)(x,y))
120120
121121
# 3D data
122122
x = torch.ones([1,1,10,10,10])/2
123123
y = torch.ones([1,1,10,10,10])/2
124-
print(1-SSIMLoss(spatial_dims=3)(x,y))
124+
print(SSIMLoss(spatial_dims=3)(x,y))
125125
"""
126126
ssim_value = self.ssim_metric._compute_tensor(input, target).view(-1, 1)
127127
loss: torch.Tensor = 1 - ssim_value

monai/networks/layers/filtering.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ def __init__(self, spatial_sigma, color_sigma):
221221
self.len_spatial_sigma = 3
222222
else:
223223
raise ValueError(
224-
f"len(spatial_sigma) {spatial_sigma} must match number of spatial dims {self.ken_spatial_sigma}."
224+
f"Length of `spatial_sigma` must match number of spatial dims (1, 2 or 3)"
225+
f"or be a single float value ({spatial_sigma=})."
225226
)
226227

227228
# Register sigmas as trainable parameters.
@@ -231,6 +232,10 @@ def __init__(self, spatial_sigma, color_sigma):
231232
self.sigma_color = torch.nn.Parameter(torch.tensor(color_sigma))
232233

233234
def forward(self, input_tensor):
235+
if len(input_tensor.shape) < 3:
236+
raise ValueError(
237+
f"Input must have at least 3 dimensions (batch, channel, *spatial_dims), got {len(input_tensor.shape)}"
238+
)
234239
if input_tensor.shape[1] != 1:
235240
raise ValueError(
236241
f"Currently channel dimensions >1 ({input_tensor.shape[1]}) are not supported. "
@@ -239,24 +244,27 @@ def forward(self, input_tensor):
239244
)
240245

241246
len_input = len(input_tensor.shape)
247+
spatial_dims = len_input - 2
242248

243249
# C++ extension so far only supports 5-dim inputs.
244-
if len_input == 3:
250+
if spatial_dims == 1:
245251
input_tensor = input_tensor.unsqueeze(3).unsqueeze(4)
246-
elif len_input == 4:
252+
elif spatial_dims == 2:
247253
input_tensor = input_tensor.unsqueeze(4)
248254

249-
if self.len_spatial_sigma != len_input:
250-
raise ValueError(f"Spatial dimension ({len_input}) must match initialized len(spatial_sigma).")
255+
if self.len_spatial_sigma != spatial_dims:
256+
raise ValueError(
257+
f"Number of spatial dimensions ({spatial_dims}) must match initialized `len(spatial_sigma)`."
258+
)
251259

252260
prediction = TrainableBilateralFilterFunction.apply(
253261
input_tensor, self.sigma_x, self.sigma_y, self.sigma_z, self.sigma_color
254262
)
255263

256264
# Make sure to return tensor of the same shape as the input.
257-
if len_input == 3:
265+
if spatial_dims == 1:
258266
prediction = prediction.squeeze(4).squeeze(3)
259-
elif len_input == 4:
267+
elif spatial_dims == 2:
260268
prediction = prediction.squeeze(4)
261269

262270
return prediction
@@ -389,7 +397,8 @@ def __init__(self, spatial_sigma, color_sigma):
389397
self.len_spatial_sigma = 3
390398
else:
391399
raise ValueError(
392-
f"len(spatial_sigma) {spatial_sigma} must match number of spatial dims {self.ken_spatial_sigma}."
400+
f"Length of `spatial_sigma` must match number of spatial dims (1, 2 or 3)\n"
401+
f"or be a single float value ({spatial_sigma=})."
393402
)
394403

395404
# Register sigmas as trainable parameters.
@@ -399,39 +408,45 @@ def __init__(self, spatial_sigma, color_sigma):
399408
self.sigma_color = torch.nn.Parameter(torch.tensor(color_sigma))
400409

401410
def forward(self, input_tensor, guidance_tensor):
411+
if len(input_tensor.shape) < 3:
412+
raise ValueError(
413+
f"Input must have at least 3 dimensions (batch, channel, *spatial_dims), got {len(input_tensor.shape)}"
414+
)
402415
if input_tensor.shape[1] != 1:
403416
raise ValueError(
404-
f"Currently channel dimensions >1 ({input_tensor.shape[1]}) are not supported. "
417+
f"Currently channel dimensions > 1 ({input_tensor.shape[1]}) are not supported. "
405418
"Please use multiple parallel filter layers if you want "
406419
"to filter multiple channels."
407420
)
408421
if input_tensor.shape != guidance_tensor.shape:
409422
raise ValueError(
410-
"Shape of input image must equal shape of guidance image."
411-
f"Got {input_tensor.shape} and {guidance_tensor.shape}."
423+
f"Shape of input image must equal shape of guidance image, got {input_tensor.shape} and {guidance_tensor.shape}."
412424
)
413425

414426
len_input = len(input_tensor.shape)
427+
spatial_dims = len_input - 2
415428

416429
# C++ extension so far only supports 5-dim inputs.
417-
if len_input == 3:
430+
if spatial_dims == 1:
418431
input_tensor = input_tensor.unsqueeze(3).unsqueeze(4)
419432
guidance_tensor = guidance_tensor.unsqueeze(3).unsqueeze(4)
420-
elif len_input == 4:
433+
elif spatial_dims == 2:
421434
input_tensor = input_tensor.unsqueeze(4)
422435
guidance_tensor = guidance_tensor.unsqueeze(4)
423436

424-
if self.len_spatial_sigma != len_input:
425-
raise ValueError(f"Spatial dimension ({len_input}) must match initialized len(spatial_sigma).")
437+
if self.len_spatial_sigma != spatial_dims:
438+
raise ValueError(
439+
f"Number of spatial dimensions ({spatial_dims}) must match initialized `len(spatial_sigma)`."
440+
)
426441

427442
prediction = TrainableJointBilateralFilterFunction.apply(
428443
input_tensor, guidance_tensor, self.sigma_x, self.sigma_y, self.sigma_z, self.sigma_color
429444
)
430445

431446
# Make sure to return tensor of the same shape as the input.
432-
if len_input == 3:
447+
if spatial_dims == 1:
433448
prediction = prediction.squeeze(4).squeeze(3)
434-
elif len_input == 4:
449+
elif spatial_dims == 2:
435450
prediction = prediction.squeeze(4)
436451

437452
return prediction

monai/networks/nets/autoencoderkl.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ def load_old_state_dict(self, old_state_dict: dict, verbose=False) -> None:
680680
681681
Args:
682682
old_state_dict: state dict from the old AutoencoderKL model.
683+
verbose: if True, print diagnostic information about key mismatches.
683684
"""
684685

685686
new_state_dict = self.state_dict()
@@ -715,13 +716,39 @@ def load_old_state_dict(self, old_state_dict: dict, verbose=False) -> None:
715716
new_state_dict[f"{block}.attn.to_k.bias"] = old_state_dict.pop(f"{block}.to_k.bias")
716717
new_state_dict[f"{block}.attn.to_v.bias"] = old_state_dict.pop(f"{block}.to_v.bias")
717718

718-
# old version did not have a projection so set these to the identity
719-
new_state_dict[f"{block}.attn.out_proj.weight"] = torch.eye(
720-
new_state_dict[f"{block}.attn.out_proj.weight"].shape[0]
721-
)
722-
new_state_dict[f"{block}.attn.out_proj.bias"] = torch.zeros(
723-
new_state_dict[f"{block}.attn.out_proj.bias"].shape
724-
)
719+
out_w = f"{block}.attn.out_proj.weight"
720+
out_b = f"{block}.attn.out_proj.bias"
721+
proj_w = f"{block}.proj_attn.weight"
722+
proj_b = f"{block}.proj_attn.bias"
723+
724+
if out_w in new_state_dict:
725+
if proj_w in old_state_dict:
726+
new_state_dict[out_w] = old_state_dict.pop(proj_w)
727+
if proj_b in old_state_dict:
728+
new_state_dict[out_b] = old_state_dict.pop(proj_b)
729+
else:
730+
new_state_dict[out_b] = torch.zeros(
731+
new_state_dict[out_b].shape,
732+
dtype=new_state_dict[out_b].dtype,
733+
device=new_state_dict[out_b].device,
734+
)
735+
else:
736+
# No legacy proj_attn - initialize out_proj to identity/zero
737+
new_state_dict[out_w] = torch.eye(
738+
new_state_dict[out_w].shape[0],
739+
dtype=new_state_dict[out_w].dtype,
740+
device=new_state_dict[out_w].device,
741+
)
742+
new_state_dict[out_b] = torch.zeros(
743+
new_state_dict[out_b].shape,
744+
dtype=new_state_dict[out_b].dtype,
745+
device=new_state_dict[out_b].device,
746+
)
747+
elif proj_w in old_state_dict:
748+
# new model has no out_proj at all - discard the legacy keys so they
749+
# don't surface as "unexpected keys" during load_state_dict
750+
old_state_dict.pop(proj_w)
751+
old_state_dict.pop(proj_b, None)
725752

726753
# fix the upsample conv blocks which were renamed postconv
727754
for k in new_state_dict:

0 commit comments

Comments
 (0)