Improve DIM performance using padding in FFT (#92)

MatthewFilipovich · web-flow · commit f2472b166f95 · 2026-04-01T11:49:01.000+01:00
diff --git a/src/torchoptics/functional/functional.py b/src/torchoptics/functional/functional.py
@@ -42,7 +42,7 @@ def calculate_std(intensity: Tensor, meshgrid: tuple[Tensor, Tensor]) -> Tensor:
     )
 
 
-def conv2d_fft(input: Tensor, weight: Tensor) -> Tensor:
+def conv2d_fft(input: Tensor, weight: Tensor, fft_padding: int = 0) -> Tensor:
     """Perform a 2D convolution using Fast Fourier Transforms (FFT).
 
     Unlike the :func:`torch.nn.functional.conv2d` function, which performs cross-correlation,
@@ -51,14 +51,18 @@ def conv2d_fft(input: Tensor, weight: Tensor) -> Tensor:
     Args:
         input (torch.Tensor): Input tensor to be convolved of shape :math:`(..., iH, iW)`.
         weight (torch.Tensor): Filters of shape :math:`(..., kH, kW)`.
+        fft_padding (int): Number of extra zeros appended to the input in each spatial dimension
+            before the FFT. Does not affect the output size, but can improve FFT performance
+            when the padded size has favorable prime factors. Default: ``0``.
 
     Returns:
         torch.Tensor: Convolved output tensor of shape :math:`(..., oH, oW)`.
 
     """
-    input_fr = fft2(input)
-    output_size = (input_fr.size(-2) - weight.size(-2) + 1, input_fr.size(-1) - weight.size(-1) + 1)
-    weight_fr = fft2(weight.flip(-1, -2).conj(), s=(input_fr.size(-2), input_fr.size(-1)))
+    output_size = (input.size(-2) - weight.size(-2) + 1, input.size(-1) - weight.size(-1) + 1)
+    fft_size = (input.size(-2) + fft_padding, input.size(-1) + fft_padding)
+    input_fr = fft2(input, s=fft_size)
+    weight_fr = fft2(weight.flip(-1, -2).conj(), s=fft_size)
     output_fr = input_fr * weight_fr.conj()
     return ifft2(output_fr)[..., : output_size[0], : output_size[1]]
 
diff --git a/src/torchoptics/propagation/direct_integration_method.py b/src/torchoptics/propagation/direct_integration_method.py
@@ -28,7 +28,8 @@ def dim_propagation(field: Field, propagation_plane: PlanarGrid, propagation_met
     """
     x, y = calculate_meshgrid(field, propagation_plane)
     impulse_response = calculate_impulse_response(field, propagation_plane, x, y, propagation_method)
-    propagated_data = conv2d_fft(impulse_response, field.data)
+    # padding=1: impulse response is (N+M-1), so FFT runs at (N+M) which can improve performance
+    propagated_data = conv2d_fft(impulse_response, field.data, fft_padding=1)
     return field.copy(data=propagated_data, z=propagation_plane.z, offset=propagation_plane.offset)
 
 
diff --git a/tests/functional/test_conv2d.py b/tests/functional/test_conv2d.py
@@ -9,3 +9,29 @@ def test_conv2d_fft():
     conv2d_output = torch.nn.functional.conv2d(input, weight.flip(-1, -2))
     conv2d_fft_output = conv2d_fft(input, weight)
     assert torch.allclose(conv2d_output, conv2d_fft_output, atol=1e-5)
+
+
+def test_conv2d_fft_large_kernel():
+    input = torch.randn(1, 1, 64, 96, dtype=torch.complex128)
+    weight = torch.randn(1, 1, 17, 33, dtype=torch.complex128)
+    conv2d_output = torch.nn.functional.conv2d(input, weight.flip(-1, -2))
+    conv2d_fft_output = conv2d_fft(input, weight)
+    assert torch.allclose(conv2d_output, conv2d_fft_output, atol=1e-8)
+
+
+def test_conv2d_fft_with_padding_matches_conv2d():
+    # Ensure fft_padding does not change numerical result compared to conv2d
+    input = torch.randn(2, 1, 30, 45, dtype=torch.complex64)
+    weight = torch.randn(1, 1, 5, 7, dtype=torch.complex64)
+    expected = torch.nn.functional.conv2d(input, weight.flip(-1, -2))
+
+    # no padding
+    out0 = conv2d_fft(input, weight, fft_padding=0)
+    # small padding
+    out8 = conv2d_fft(input, weight, fft_padding=8)
+    # larger padding
+    out32 = conv2d_fft(input, weight, fft_padding=32)
+
+    assert torch.allclose(expected, out0, atol=1e-5)
+    assert torch.allclose(expected, out8, atol=1e-5)
+    assert torch.allclose(expected, out32, atol=1e-5)