From 2f7371897a64fcd151a93fe6805598b681334205 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Wed, 11 Jun 2025 22:21:04 +0100 Subject: [PATCH 1/5] feat: add cuda backend to Kirchhoff --- pylops/waveeqprocessing/_kirchhoff_cuda.py | 488 +++++++++++++++++++++ pylops/waveeqprocessing/kirchhoff.py | 46 +- 2 files changed, 522 insertions(+), 12 deletions(-) create mode 100644 pylops/waveeqprocessing/_kirchhoff_cuda.py diff --git a/pylops/waveeqprocessing/_kirchhoff_cuda.py b/pylops/waveeqprocessing/_kirchhoff_cuda.py new file mode 100644 index 000000000..4a73347e1 --- /dev/null +++ b/pylops/waveeqprocessing/_kirchhoff_cuda.py @@ -0,0 +1,488 @@ +from math import cos, fabs + +import numpy as np +from numba import cuda + +from pylops.utils.backend import to_cupy + + +class _KirchhoffCudaHelper: + """A helper class to perform Kirchhoff demigration/migration using Numba CUDA. + + This class provides methods to compute the forward and adjoint operations for the + Kirchhoff operator, utilizing GPU acceleration through Numba's CUDA capabilities. + + Parameters + ---------- + ns : :obj:`int` + Number of sources. + nr : :obj:`int` + Number of receivers. + nt : :obj:`int` + Number of time samples. + ni : :obj:`int` + Number of image points. + dynamic : :obj:`bool`, optional + Flag indicating whether to use dynamic computation + (default is ``False``). + + """ + + def __init__(self, ns, nr, nt, ni, dynamic=False): + self.ns, self.nr, self.nt, self.ni = ns, nr, nt, ni + self.dynamic = dynamic + self._grid_setup() + + def _grid_setup(self): + """Set up CUDA grid and block dimensions. + + This method configures the number of blocks and threads per block for + CUDA kernels, depending on the number of sources and receivers. + """ + # use warp size as number of threads per block + current_device = cuda.get_current_device() + warp = current_device.WARP_SIZE // 2 + self.num_threads_per_blocks = (warp, warp) + # configure number of blocks + self.num_blocks = ( + (self.ns + self.num_threads_per_blocks[0] - 1) + // self.num_threads_per_blocks[0], + (self.nr + self.num_threads_per_blocks[1] - 1) + // self.num_threads_per_blocks[1], + ) + + @staticmethod + @cuda.jit + def _travsrcrec_kirch_matvec_cuda(x, y, ns, nr, nt, ni, dt, trav_srcs, trav_recs): + isrc, irec = cuda.grid(2) + if ns > isrc and nr > irec: + for ii in range(ni): + travisrc = trav_srcs[ii, isrc] + travirec = trav_recs[ii, irec] + trav = travisrc + travirec + itravii = int(trav / dt) + travdii = trav / dt - itravii + if 0 <= itravii < nt - 1: + ind1 = (isrc * nr + irec, itravii) + val1 = x[ii] * (1 - travdii) + ind2 = (isrc * nr + irec, itravii + 1) + val2 = x[ii] * travdii + cuda.atomic.add(y, ind1, val1) + cuda.atomic.add(y, ind2, val2) + + @staticmethod + @cuda.jit + def _travsrcrec_kirch_rmatvec_cuda(x, y, ns, nr, nt, ni, dt, trav_srcs, trav_recs): + isrc, irec = cuda.grid(2) + if ns > isrc and nr > irec: + for ii in range(ni): + travisrc = trav_srcs[ii, isrc] + travirec = trav_recs[ii, irec] + trav = travisrc + travirec + itravii = int(trav / dt) + travdii = trav / dt - itravii + if 0 <= itravii < nt - 1: + vii = ( + x[isrc * nr + irec, itravii] * (1 - travdii) + + x[isrc * nr + irec, itravii + 1] * travdii + ) + cuda.atomic.add(y, ii, vii) + + @staticmethod + @cuda.jit + def _ampsrcrec_kirch_matvec_cuda( + x, + y, + ns, + nr, + nt, + ni, + dt, + vel, + trav_srcs, + trav_recs, + amp_srcs, + amp_recs, + aperturemin, + aperturemax, + aperturetap, + nz, + six, + rix, + angleaperturemin, + angleaperturemax, + angles_srcs, + angles_recs, + ): + daperture = aperturemax - aperturemin + dangleaperture = angleaperturemax - angleaperturemin + + isrc, irec = cuda.grid(2) + if ns > isrc and nr > irec: + for ii in range(ni): + sixisrcrec = six[isrc * nr + irec] + rixisrcrec = rix[isrc * nr + irec] + travisrc = trav_srcs[ii, isrc] + travirec = trav_recs[ii, irec] + trav = travisrc + travirec + itravii = int(trav / dt) + travdii = trav / dt - itravii + ampisrc = amp_srcs[ii, isrc] + ampirec = amp_recs[ii, irec] + # extract source and receiver angle at given image point + angle_src = angles_srcs[ii, isrc] + angle_rec = angles_recs[ii, irec] + abs_angle_src = fabs(angle_src) + abs_angle_rec = fabs(angle_rec) + # compute cosine of half opening angle and total amplitude scaling + cosangle = cos((angle_src - angle_rec) / 2.0) + damp = 2.0 * cosangle * ampisrc * ampirec / vel[ii] + # identify z-index of image point + iz = ii % nz + # angle apertures checks + aptap = 1.0 + if ( + abs_angle_src < angleaperturemax + and abs_angle_rec < angleaperturemax + ): + if abs_angle_src >= angleaperturemin: + # extract source angle aperture taper value + aptap = ( + aptap + * aperturetap[ + int( + 20 + * (abs_angle_src - angleaperturemin) + // dangleaperture + ) + ] + ) + if abs_angle_rec >= angleaperturemin: + # extract receiver angle aperture taper value + aptap = ( + aptap + * aperturetap[ + int( + 20 + * (abs_angle_rec - angleaperturemin) + // dangleaperture + ) + ] + ) + + # aperture check + aperture = abs(sixisrcrec - rixisrcrec) / (iz + 1) + if aperture < aperturemax: + if aperture >= aperturemin: + # extract aperture taper value + aptap = ( + aptap + * aperturetap[ + int(20 * ((aperture - aperturemin) // daperture)) + ] + ) + # time limit check + if 0 <= itravii < nt - 1: + ind1 = (isrc * nr + irec, itravii) + val1 = x[ii] * (1 - travdii) * damp * aptap + ind2 = (isrc * nr + irec, itravii + 1) + val2 = x[ii] * travdii * damp * aptap + cuda.atomic.add(y, ind1, val1) + cuda.atomic.add(y, ind2, val2) + + @staticmethod + @cuda.jit + def _ampsrcrec_kirch_rmatvec_cuda( + x, + y, + ns, + nr, + nt, + ni, + dt, + vel, + trav_srcs, + trav_recs, + amp_srcs, + amp_recs, + aperturemin, + aperturemax, + aperturetap, + nz, + six, + rix, + angleaperturemin, + angleaperturemax, + angles_srcs, + angles_recs, + ): + daperture = aperturemax - aperturemin + dangleaperture = angleaperturemax - angleaperturemin + + isrc, irec = cuda.grid(2) + if ns > isrc and nr > irec: + for ii in range(ni): + sixisrcrec = six[isrc * nr + irec] + rixisrcrec = rix[isrc * nr + irec] + travisrc = trav_srcs[ii, isrc] + travirec = trav_recs[ii, irec] + trav = travisrc + travirec + itravii = int(trav / dt) + travdii = trav / dt - itravii + ampisrc = amp_srcs[ii, isrc] + ampirec = amp_recs[ii, irec] + # extract source and receiver angle at given image point + angle_src = angles_srcs[ii, isrc] + angle_rec = angles_recs[ii, irec] + abs_angle_src = fabs(angle_src) + abs_angle_rec = fabs(angle_rec) + # compute cosine of half opening angle and total amplitude scaling + cosangle = cos((angle_src - angle_rec) / 2.0) + damp = 2.0 * cosangle * ampisrc * ampirec / vel[ii] + # identify z-index of image point + iz = ii % nz + # angle apertures checks + aptap = 1.0 + if ( + abs_angle_src < angleaperturemax + and abs_angle_rec < angleaperturemax + ): + if abs_angle_src >= angleaperturemin: + # extract source angle aperture taper value + aptap = ( + aptap + * aperturetap[ + int( + 20 + * (abs_angle_src - angleaperturemin) + // dangleaperture + ) + ] + ) + if abs_angle_rec >= angleaperturemin: + # extract receiver angle aperture taper value + aptap = ( + aptap + * aperturetap[ + int( + 20 + * (abs_angle_rec - angleaperturemin) + // dangleaperture + ) + ] + ) + + # aperture check + aperture = abs(sixisrcrec - rixisrcrec) / (iz + 1) + if aperture < aperturemax: + if aperture >= aperturemin: + # extract aperture taper value + aptap = ( + aptap + * aperturetap[ + int(20 * ((aperture - aperturemin) // daperture)) + ] + ) + # time limit check + if 0 <= itravii < nt - 1: + ind1 = ii + val1 = ( + ( + x[isrc * nr + irec, itravii] * (1 - travdii) + + x[isrc * nr + irec, itravii + 1] * travdii + ) + * damp + * aptap + ) + cuda.atomic.add(y, ind1, val1) + + def _call_kinematic(self, opt, *inputs): + """Kinematic-only computations using CUDA. + + This method handles data preparation and execution of CUDA kernels + for both forward and adjoint operations of kinematic-only operator. + + Parameters + ---------- + opt : :obj:`str` + Operation type, either '_matvec' for forward or '_rmatvec' + for adjoint. + *inputs : :obj:`list` + List of input parameters required by the kernels. + + Returns + ------- + y_d : :obj:`cupy.ndarray` + Output data. + + """ + x_d = inputs[0] + y_d = inputs[1] + ns_d = np.int32(inputs[2]) + nr_d = np.int32(inputs[3]) + nt_d = np.int32(inputs[4]) + ni_d = np.int32(inputs[5]) + dt_d = np.float32(inputs[6]) + trav_srcs_d = to_cupy(inputs[7]) + trav_recs_d = to_cupy(inputs[8]) + + if opt == "_matvec": + self._travsrcrec_kirch_matvec_cuda[ + self.num_blocks, self.num_threads_per_blocks + ](x_d, y_d, ns_d, nr_d, nt_d, ni_d, dt_d, trav_srcs_d, trav_recs_d) + elif opt == "_rmatvec": + self._travsrcrec_kirch_rmatvec_cuda[ + self.num_blocks, self.num_threads_per_blocks + ](x_d, y_d, ns_d, nr_d, nt_d, ni_d, dt_d, trav_srcs_d, trav_recs_d) + cuda.synchronize() + + return y_d + + def _call_dynamic(self, opt, *inputs): + """Synamic computations using CUDA. + + This method handles data preparation and execution of CUDA kernels + for both forward and adjoint operations of dynamic operator. + + Parameters + ---------- + opt : :obj:`str` + Operation type, either '_matvec' for forward or '_rmatvec' + for adjoint. + *inputs : :obj:`list` + List of input parameters required by the kernels. + + Returns + ------- + y_d : :obj:`cupy.ndarray` + Output data. + + """ + x_d = inputs[0] + y_d = inputs[1] + ns_d = np.int32(inputs[2]) + nr_d = np.int32(inputs[3]) + nt_d = np.int32(inputs[4]) + ni_d = np.int32(inputs[5]) + dt_d = np.float32(inputs[6]) + vel_d = to_cupy(inputs[7]) + trav_srcs_d = to_cupy(inputs[8]) + trav_recs_d = to_cupy(inputs[9]) + amp_srcs_d = to_cupy(inputs[10]) + amp_recs_d = to_cupy(inputs[11]) + aperturemin_d = np.float32(inputs[12]) + aperturemax_d = np.float32(inputs[13]) + aperturetap_d = to_cupy(inputs[14]) + nz_d = np.int32(inputs[15]) + six_d = to_cupy(inputs[16]) + rix_d = to_cupy(inputs[17]) + angleaperturemin_d = np.float32(inputs[18]) + angleaperturemax_d = np.float32(inputs[19]) + angles_srcs_d = to_cupy(inputs[20]) + angles_recs_d = to_cupy(inputs[21]) + + if opt == "_matvec": + self._ampsrcrec_kirch_matvec_cuda[ + self.num_blocks, self.num_threads_per_blocks + ]( + x_d, + y_d, + ns_d, + nr_d, + nt_d, + ni_d, + dt_d, + vel_d, + trav_srcs_d, + trav_recs_d, + amp_srcs_d, + amp_recs_d, + aperturemin_d, + aperturemax_d, + aperturetap_d, + nz_d, + six_d, + rix_d, + angleaperturemin_d, + angleaperturemax_d, + angles_srcs_d, + angles_recs_d, + ) + elif opt == "_rmatvec": + self._ampsrcrec_kirch_rmatvec_cuda[ + self.num_blocks, self.num_threads_per_blocks + ]( + x_d, + y_d, + ns_d, + nr_d, + nt_d, + ni_d, + dt_d, + vel_d, + trav_srcs_d, + trav_recs_d, + amp_srcs_d, + amp_recs_d, + aperturemin_d, + aperturemax_d, + aperturetap_d, + nz_d, + six_d, + rix_d, + angleaperturemin_d, + angleaperturemax_d, + angles_srcs_d, + angles_recs_d, + ) + cuda.synchronize() + + return y_d + + def _matvec_cuda(self, *inputs): + """Forward with dispatching to appropriate CUDA kernels. + + This method selects the appropriate kernel to execute based on the + computation flags, and performs the forward operation. + + Parameters + ---------- + *inputs : :obj:`list` + List of input parameters required by the kernels. + + Returns + ------- + y_d : :obj:`cupy.ndarray` + Output (seismic data) of the forward operator. + + """ + if self.dynamic: + y_d = self._call_dynamic("_matvec", *inputs) + else: + y_d = self._call_kinematic("_matvec", *inputs) + + return y_d + + def _rmatvec_cuda(self, *inputs): + """Adjoint with dispatching to appropriate CUDA kernels. + + This method selects the appropriate kernel to execute based on the + computation flags, and performs the adjoint operation. + + Parameters + ---------- + *inputs : :obj:`list` + List of input parameters required by the kernels. + + Returns + ------- + y_d : :obj:`cupy.ndarray` + Output data (image) of the adjoint operator. + + """ + if self.dynamic: + y_d = self._call_dynamic("_rmatvec", *inputs) + else: + y_d = self._call_kinematic("_rmatvec", *inputs) + + return y_d diff --git a/pylops/waveeqprocessing/kirchhoff.py b/pylops/waveeqprocessing/kirchhoff.py index bdd5be878..9eed7dd6b 100644 --- a/pylops/waveeqprocessing/kirchhoff.py +++ b/pylops/waveeqprocessing/kirchhoff.py @@ -12,6 +12,7 @@ from pylops.signalprocessing import Convolve1D from pylops.utils import deps from pylops.utils._internal import _value_or_sized_to_array +from pylops.utils.backend import get_array_module from pylops.utils.decorators import reshaped from pylops.utils.tapers import taper from pylops.utils.typing import DTypeLike, NDArray @@ -25,6 +26,8 @@ if jit_message is None: from numba import jit, prange + from ._kirchhoff_cuda import _KirchhoffCudaHelper + # detect whether to use parallel or not numba_threads = int(os.getenv("NUMBA_NUM_THREADS", "1")) parallel = True if numba_threads != 1 else False @@ -82,8 +85,8 @@ class Kirchhoff(LinearOperator): :math:`\lbrack (n_y) n_x n_z \times n_s n_r \rbrack` or pair of traveltime tables of size :math:`\lbrack (n_y) n_x n_z \times n_s \rbrack` and :math:`\lbrack (n_y) n_x n_z \times n_r \rbrack` (to be provided if ``mode='byot'``). Note that the latter approach is recommended as less memory demanding - than the former. Moreover, only ``mode='dynamic'`` is only possible when traveltimes are provided in - the latter form. + than the former. Moreover, ``mode='dynamic'`` and ``engine='cuda'`` are only possible when traveltimes are + provided in the latter form. amp : :obj:`numpy.ndarray`, optional .. versionadded:: 2.0.0 @@ -108,7 +111,8 @@ class Kirchhoff(LinearOperator): Deprecated, will be removed in v3.0.0. Simply kept for back-compatibility with previous implementation, but effectively not affecting the behaviour of the operator. engine : :obj:`str`, optional - Engine used for computations (``numpy`` or ``numba``). + Engine used for computations (``numpy``, ``numba`` or ``cuda``). Note that the ``cuda`` engine + currently supports only ``dynamic=False``. dtype : :obj:`str`, optional Type of elements in input array. name : :obj:`str`, optional @@ -127,7 +131,10 @@ class Kirchhoff(LinearOperator): Raises ------ NotImplementedError - If ``mode`` is neither ``analytic``, ``eikonal``, or ``byot`` + If ``mode`` is neither ``analytic``, ``eikonal``, or ``byot``. + + NotImplementedError + If ``dynamic=True`` and ``engine="cuda"`` Notes ----- @@ -857,7 +864,7 @@ def _ampsrcrec_kirch_matvec( ] ) - # identify x-index of image point + # identify z-index of image point iz = ii % nz # aperture check aperture = abs(sixisrcrec - rixisrcrec) / (iz + 1) @@ -917,7 +924,7 @@ def _ampsrcrec_kirch_rmatvec( velii = vel[ii] angle_srcsii = angles_srcs[ii] angle_recsii = angles_recs[ii] - # identify x-index of image point + # identify z-index of image point iz = ii % nz for isrc in range(ns): trav_srcii = trav_srcsii[isrc] @@ -995,10 +1002,9 @@ def _ampsrcrec_kirch_rmatvec( return y def _register_multiplications(self, engine: str) -> None: - if engine not in ["numpy", "numba"]: - raise KeyError("engine must be numpy or numba") + if engine not in ["numpy", "numba", "cuda"]: + raise KeyError("engine must be numpy or numba or cuda") if engine == "numba" and jit_message is None: - # numba numba_opts = dict( nopython=True, nogil=True, parallel=parallel ) # fastmath=True, @@ -1011,7 +1017,21 @@ def _register_multiplications(self, engine: str) -> None: elif not self.travsrcrec: self._kirch_matvec = jit(**numba_opts)(self._trav_kirch_matvec) self._kirch_rmatvec = jit(**numba_opts)(self._trav_kirch_rmatvec) - + elif engine == "cuda": + if self.dynamic and self.travsrcrec: + self.cuda_helper = _KirchhoffCudaHelper( + self.ns, self.nr, self.nt, self.ni, True + ) + elif self.travsrcrec: + self.cuda_helper = _KirchhoffCudaHelper( + self.ns, self.nr, self.nt, self.ni, False + ) + elif not self.travsrcrec: + raise NotImplementedError( + "cuda not implemented for traveltimes " "provided in one table" + ) + self._kirch_matvec = self.cuda_helper._matvec_cuda + self._kirch_rmatvec = self.cuda_helper._rmatvec_cuda else: if engine == "numba" and jit_message is not None: logging.warning(jit_message) @@ -1027,7 +1047,8 @@ def _register_multiplications(self, engine: str) -> None: @reshaped def _matvec(self, x: NDArray) -> NDArray: - y = np.zeros((self.nsnr, self.nt), dtype=self.dtype) + ncp = get_array_module(x) + y = ncp.zeros((self.nsnr, self.nt), dtype=self.dtype) if self.dynamic and self.travsrcrec: inputs = ( x.ravel(), @@ -1074,9 +1095,10 @@ def _matvec(self, x: NDArray) -> NDArray: @reshaped def _rmatvec(self, x: NDArray) -> NDArray: + ncp = get_array_module(x) x = self.cop._rmatvec(x.ravel()) x = x.reshape(self.nsnr, self.nt) - y = np.zeros(self.ni, dtype=self.dtype) + y = ncp.zeros(self.ni, dtype=self.dtype) if self.dynamic and self.travsrcrec: inputs = ( x, From b944dcb92d1831f7ba3180cede83c2f471ab44e3 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Wed, 11 Jun 2025 23:11:28 +0100 Subject: [PATCH 2/5] minor: fix error message --- pylops/waveeqprocessing/kirchhoff.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pylops/waveeqprocessing/kirchhoff.py b/pylops/waveeqprocessing/kirchhoff.py index 9eed7dd6b..3f8e0a10f 100644 --- a/pylops/waveeqprocessing/kirchhoff.py +++ b/pylops/waveeqprocessing/kirchhoff.py @@ -1028,7 +1028,8 @@ def _register_multiplications(self, engine: str) -> None: ) elif not self.travsrcrec: raise NotImplementedError( - "cuda not implemented for traveltimes " "provided in one table" + "engine='cuda' not implemented for traveltimes " + "provided in one table" ) self._kirch_matvec = self.cuda_helper._matvec_cuda self._kirch_rmatvec = self.cuda_helper._rmatvec_cuda From b7cdbaba7b591e001cd6b4374f68b7ab6a9f85bb Mon Sep 17 00:00:00 2001 From: mrava87 Date: Wed, 18 Jun 2025 23:12:24 +0100 Subject: [PATCH 3/5] test: added cuda tests for Kirchhoff --- pytests/test_kirchhoff.py | 128 ++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 46 deletions(-) diff --git a/pytests/test_kirchhoff.py b/pytests/test_kirchhoff.py index 50de034bd..4a37008b6 100644 --- a/pytests/test_kirchhoff.py +++ b/pytests/test_kirchhoff.py @@ -1,8 +1,19 @@ import os import numpy as np + +if int(os.environ.get("TEST_CUPY_PYLOPS", 0)): + import cupy as np + from cupy.testing import assert_array_almost_equal + + backend = "cuda" +else: + import numpy as np + from numpy.testing import assert_array_almost_equal + + backend = "numpy" +import numpy as npp import pytest -from numpy.testing import assert_array_almost_equal from pylops.utils import dottest from pylops.utils.wavelets import ricker @@ -34,22 +45,22 @@ skfmm_enabled = False v0 = 500 -y = np.arange(PAR["ny"]) * PAR["dy"] -x = np.arange(PAR["nx"]) * PAR["dx"] -z = np.arange(PAR["nz"]) * PAR["dz"] -t = np.arange(PAR["nt"]) * PAR["dt"] - -sy = np.linspace(y.min(), y.max(), PAR["nsy"]) -sx = np.linspace(x.min(), x.max(), PAR["nsx"]) -syy, sxx = np.meshgrid(sy, sx, indexing="ij") -s2d = np.vstack((sx, 2 * np.ones(PAR["nsx"]))) -s3d = np.vstack((syy.ravel(), sxx.ravel(), 2 * np.ones(PAR["nsx"] * PAR["nsy"]))) - -ry = np.linspace(y.min(), y.max(), PAR["nry"]) -rx = np.linspace(x.min(), x.max(), PAR["nrx"]) -ryy, rxx = np.meshgrid(ry, rx, indexing="ij") -r2d = np.vstack((rx, 2 * np.ones(PAR["nrx"]))) -r3d = np.vstack((ryy.ravel(), rxx.ravel(), 2 * np.ones(PAR["nrx"] * PAR["nry"]))) +y = npp.arange(PAR["ny"]) * PAR["dy"] +x = npp.arange(PAR["nx"]) * PAR["dx"] +z = npp.arange(PAR["nz"]) * PAR["dz"] +t = npp.arange(PAR["nt"]) * PAR["dt"] + +sy = npp.linspace(y.min(), y.max(), PAR["nsy"]) +sx = npp.linspace(x.min(), x.max(), PAR["nsx"]) +syy, sxx = npp.meshgrid(sy, sx, indexing="ij") +s2d = npp.vstack((sx, 2 * npp.ones(PAR["nsx"]))) +s3d = npp.vstack((syy.ravel(), sxx.ravel(), 2 * npp.ones(PAR["nsx"] * PAR["nsy"]))) + +ry = npp.linspace(y.min(), y.max(), PAR["nry"]) +rx = npp.linspace(x.min(), x.max(), PAR["nrx"]) +ryy, rxx = npp.meshgrid(ry, rx, indexing="ij") +r2d = npp.vstack((rx, 2 * npp.ones(PAR["nrx"]))) +r3d = npp.vstack((ryy.ravel(), rxx.ravel(), 2 * npp.ones(PAR["nrx"] * PAR["nry"]))) wav, _, wavc = ricker(t[:21], f0=40) @@ -157,7 +168,7 @@ def test_traveltime_table(): trav_ana, trav_srcs_ana, trav_recs_ana, - dist_ana, + _, _, _, ) = Kirchhoff._traveltime_table(z, x, s2d, r2d, v0, mode="analytic") @@ -166,7 +177,7 @@ def test_traveltime_table(): trav_eik, trav_srcs_eik, trav_recs_eik, - dist_eik, + _, _, _, ) = Kirchhoff._traveltime_table( @@ -181,20 +192,13 @@ def test_traveltime_table(): ( trav_srcs_ana, trav_recs_ana, - dist_srcs_ana, - dist_recs_ana, _, _, - ) = Kirchhoff._traveltime_table(z, x, s3d, r3d, v0, y=y, mode="analytic") - - ( - trav_srcs_eik, - trav_recs_eik, - dist_srcs_eik, - dist_recs_eik, _, _, - ) = Kirchhoff._traveltime_table( + ) = Kirchhoff._traveltime_table(z, x, s3d, r3d, v0, y=y, mode="analytic") + + (trav_srcs_eik, trav_recs_eik, _, _, _, _,) = Kirchhoff._traveltime_table( z, x, s3d, @@ -206,12 +210,8 @@ def test_traveltime_table(): assert_array_almost_equal(trav_srcs_ana, trav_srcs_eik, decimal=2) assert_array_almost_equal(trav_recs_ana, trav_recs_eik, decimal=2) - assert_array_almost_equal(trav_ana, trav_eik, decimal=2) -@pytest.mark.skipif( - int(os.environ.get("TEST_CUPY_PYLOPS", 0)) == 1, reason="Not CuPy enabled" -) @pytest.mark.parametrize("par", [(par1), (par2), (par3), (par1d), (par2d), (par3d)]) def test_kirchhoff2d(par): """Dot-test for Kirchhoff operator""" @@ -226,8 +226,6 @@ def test_kirchhoff2d(par): ) + trav_recs.reshape(PAR["nx"] * PAR["nz"], 1, PAR["nrx"]) trav = trav.reshape(PAR["nx"] * PAR["nz"], PAR["nsx"] * PAR["nrx"]) amp = None - # amp = 1 / (dist + 1e-2 * dist.max()) - else: trav = None amp = None @@ -240,19 +238,31 @@ def test_kirchhoff2d(par): s2d, r2d, vel if par["mode"] == "eikonal" else v0, - wav, + np.asarray(wav), wavc, y=None, trav=trav, amp=amp, mode=par["mode"], + engine=backend, + ) + if par["mode"] == "byot": + Dop.trav = np.asarray(Dop.trav) + else: + Dop.trav_srcs = np.asarray(Dop.trav_srcs) + Dop.trav_recs = np.asarray(Dop.trav_recs) + if par["mode"] == "dynamic": + Dop.amp_srcs = np.asarray(Dop.amp_srcs) + Dop.amp_recs = np.asarray(Dop.amp_recs) + + assert dottest( + Dop, + PAR["nsx"] * PAR["nrx"] * PAR["nt"], + PAR["nz"] * PAR["nx"], + backend=backend, ) - assert dottest(Dop, PAR["nsx"] * PAR["nrx"] * PAR["nt"], PAR["nz"] * PAR["nx"]) -@pytest.mark.skipif( - int(os.environ.get("TEST_CUPY_PYLOPS", 0)) == 1, reason="Not CuPy enabled" -) @pytest.mark.parametrize("par", [(par1), (par2), (par3)]) def test_kirchhoff3d(par): """Dot-test for Kirchhoff operator""" @@ -287,17 +297,22 @@ def test_kirchhoff3d(par): y=y, trav=trav, mode=par["mode"], + engine=backend, ) + if par["mode"] == "byot": + Dop.trav = np.asarray(Dop.trav) + else: + Dop.trav_srcs = np.asarray(Dop.trav_srcs) + Dop.trav_recs = np.asarray(Dop.trav_recs) + assert dottest( Dop, PAR["nsx"] * PAR["nrx"] * PAR["nsy"] * PAR["nry"] * PAR["nt"], PAR["nz"] * PAR["nx"] * PAR["ny"], + backend=backend, ) -@pytest.mark.skipif( - int(os.environ.get("TEST_CUPY_PYLOPS", 0)) == 1, reason="Not CuPy enabled" -) @pytest.mark.parametrize( "par", [ @@ -323,7 +338,13 @@ def test_kirchhoff2d_trav_vs_travsrcrec(par): mode=par["mode"], dynamic=par["dynamic"], angleaperture=None, + engine=backend, ) + Dop.trav_srcs = np.asarray(Dop.trav_srcs) + Dop.trav_recs = np.asarray(Dop.trav_recs) + if par["dynamic"]: + Dop.amp_srcs = np.asarray(Dop.amp_srcs) + Dop.amp_recs = np.asarray(Dop.amp_recs) # old behaviour trav = Dop.trav_srcs.reshape( @@ -348,7 +369,13 @@ def test_kirchhoff2d_trav_vs_travsrcrec(par): mode=par["mode"], dynamic=par["dynamic"], angleaperture=None, + engine=backend, ) + D1op.trav_srcs = np.asarray(D1op.trav_srcs) + D1op.trav_recs = np.asarray(D1op.trav_recs) + if par["dynamic"]: + D1op.amp_srcs = np.asarray(D1op.amp_srcs) + D1op.amp_recs = np.asarray(D1op.amp_recs) # forward xx = np.random.normal(0, 1, PAR["nx"] * PAR["nz"]) @@ -359,9 +386,6 @@ def test_kirchhoff2d_trav_vs_travsrcrec(par): assert_array_almost_equal(Dop.H @ yy, D1op.H @ yy, decimal=2) -@pytest.mark.skipif( - int(os.environ.get("TEST_CUPY_PYLOPS", 0)) == 1, reason="Not CuPy enabled" -) @pytest.mark.parametrize( "par", [ @@ -384,7 +408,13 @@ def test_kirchhoff3d_trav_vs_travsrcrec(par): wavc, y=y, mode=par["mode"], + engine=backend, ) + Dop.trav_srcs = np.asarray(Dop.trav_srcs) + Dop.trav_recs = np.asarray(Dop.trav_recs) + if par["dynamic"]: + Dop.amp_srcs = np.asarray(Dop.amp_srcs) + Dop.amp_recs = np.asarray(Dop.amp_recs) # old behaviour trav = Dop.trav_srcs.reshape( @@ -409,7 +439,13 @@ def test_kirchhoff3d_trav_vs_travsrcrec(par): y=y, trav=trav, mode=par["mode"], + engine=backend, ) + D1op.trav_srcs = np.asarray(D1op.trav_srcs) + D1op.trav_recs = np.asarray(D1op.trav_recs) + if par["dynamic"]: + D1op.amp_srcs = np.asarray(D1op.amp_srcs) + D1op.amp_recs = np.asarray(D1op.amp_recs) # forward xx = np.random.normal(0, 1, PAR["ny"] * PAR["nx"] * PAR["nz"]) From b66656777ddd987633940d60dca5a00267f6c6ae Mon Sep 17 00:00:00 2001 From: mrava87 Date: Wed, 18 Jun 2025 23:29:08 +0100 Subject: [PATCH 4/5] test: fix test_kirchhoff for cuda engine --- pytests/test_kirchhoff.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pytests/test_kirchhoff.py b/pytests/test_kirchhoff.py index 4a37008b6..da672b0dd 100644 --- a/pytests/test_kirchhoff.py +++ b/pytests/test_kirchhoff.py @@ -6,7 +6,7 @@ import cupy as np from cupy.testing import assert_array_almost_equal - backend = "cuda" + backend = "cupy" else: import numpy as np from numpy.testing import assert_array_almost_equal @@ -215,6 +215,8 @@ def test_traveltime_table(): @pytest.mark.parametrize("par", [(par1), (par2), (par3), (par1d), (par2d), (par3d)]) def test_kirchhoff2d(par): """Dot-test for Kirchhoff operator""" + if backend == "cupy" and par["mode"] == "byot": + pytest.skip("cuda engine not available for single trav table") vel = v0 * np.ones((PAR["nx"], PAR["nz"])) if par["mode"] == "byot": @@ -244,7 +246,7 @@ def test_kirchhoff2d(par): trav=trav, amp=amp, mode=par["mode"], - engine=backend, + engine="numpy" if backend == "numpy" else "cuda", ) if par["mode"] == "byot": Dop.trav = np.asarray(Dop.trav) @@ -266,6 +268,8 @@ def test_kirchhoff2d(par): @pytest.mark.parametrize("par", [(par1), (par2), (par3)]) def test_kirchhoff3d(par): """Dot-test for Kirchhoff operator""" + if backend == "cupy" and par["mode"] == "byot": + pytest.skip("cuda engine not available for single trav table") vel = v0 * np.ones((PAR["ny"], PAR["nx"], PAR["nz"])) if par["mode"] == "byot": @@ -297,7 +301,7 @@ def test_kirchhoff3d(par): y=y, trav=trav, mode=par["mode"], - engine=backend, + engine="numpy" if backend == "numpy" else "cuda", ) if par["mode"] == "byot": Dop.trav = np.asarray(Dop.trav) @@ -338,7 +342,7 @@ def test_kirchhoff2d_trav_vs_travsrcrec(par): mode=par["mode"], dynamic=par["dynamic"], angleaperture=None, - engine=backend, + engine="numpy" if backend == "numpy" else "cuda", ) Dop.trav_srcs = np.asarray(Dop.trav_srcs) Dop.trav_recs = np.asarray(Dop.trav_recs) @@ -369,7 +373,7 @@ def test_kirchhoff2d_trav_vs_travsrcrec(par): mode=par["mode"], dynamic=par["dynamic"], angleaperture=None, - engine=backend, + engine="numpy" if backend == "numpy" else "cuda", ) D1op.trav_srcs = np.asarray(D1op.trav_srcs) D1op.trav_recs = np.asarray(D1op.trav_recs) @@ -408,7 +412,7 @@ def test_kirchhoff3d_trav_vs_travsrcrec(par): wavc, y=y, mode=par["mode"], - engine=backend, + engine="numpy" if backend == "numpy" else "cuda", ) Dop.trav_srcs = np.asarray(Dop.trav_srcs) Dop.trav_recs = np.asarray(Dop.trav_recs) @@ -439,7 +443,7 @@ def test_kirchhoff3d_trav_vs_travsrcrec(par): y=y, trav=trav, mode=par["mode"], - engine=backend, + engine="numpy" if backend == "numpy" else "cuda", ) D1op.trav_srcs = np.asarray(D1op.trav_srcs) D1op.trav_recs = np.asarray(D1op.trav_recs) From 768f9c55f1fe411d730818123d480f50cc639369 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Thu, 19 Jun 2025 22:34:24 +0100 Subject: [PATCH 5/5] doc: minor updates to docstring removing mention of dynamic not implemented with cuda --- pylops/waveeqprocessing/_kirchhoff_cuda.py | 11 ++++++----- pylops/waveeqprocessing/kirchhoff.py | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pylops/waveeqprocessing/_kirchhoff_cuda.py b/pylops/waveeqprocessing/_kirchhoff_cuda.py index 4a73347e1..f987960f5 100644 --- a/pylops/waveeqprocessing/_kirchhoff_cuda.py +++ b/pylops/waveeqprocessing/_kirchhoff_cuda.py @@ -7,10 +7,10 @@ class _KirchhoffCudaHelper: - """A helper class to perform Kirchhoff demigration/migration using Numba CUDA. + """Helper class for Kirchhoff operator using Numba CUDA. - This class provides methods to compute the forward and adjoint operations for the - Kirchhoff operator, utilizing GPU acceleration through Numba's CUDA capabilities. + This class provides methods to compute the forward and adjoint operations of the + Kirchhoff operator utilizing GPU acceleration through Numba's CUDA capabilities. Parameters ---------- @@ -39,7 +39,8 @@ def _grid_setup(self): This method configures the number of blocks and threads per block for CUDA kernels, depending on the number of sources and receivers. """ - # use warp size as number of threads per block + # use half of warp size as number of threads per block + # although this will be ideally lifted up to the user current_device = cuda.get_current_device() warp = current_device.WARP_SIZE // 2 self.num_threads_per_blocks = (warp, warp) @@ -339,7 +340,7 @@ def _call_kinematic(self, opt, *inputs): return y_d def _call_dynamic(self, opt, *inputs): - """Synamic computations using CUDA. + """Dynamic computations using CUDA. This method handles data preparation and execution of CUDA kernels for both forward and adjoint operations of dynamic operator. diff --git a/pylops/waveeqprocessing/kirchhoff.py b/pylops/waveeqprocessing/kirchhoff.py index 3f8e0a10f..a3aa9a22b 100644 --- a/pylops/waveeqprocessing/kirchhoff.py +++ b/pylops/waveeqprocessing/kirchhoff.py @@ -111,8 +111,7 @@ class Kirchhoff(LinearOperator): Deprecated, will be removed in v3.0.0. Simply kept for back-compatibility with previous implementation, but effectively not affecting the behaviour of the operator. engine : :obj:`str`, optional - Engine used for computations (``numpy``, ``numba`` or ``cuda``). Note that the ``cuda`` engine - currently supports only ``dynamic=False``. + Engine used for computations (``numpy``, ``numba`` or ``cuda``). dtype : :obj:`str`, optional Type of elements in input array. name : :obj:`str`, optional @@ -134,7 +133,7 @@ class Kirchhoff(LinearOperator): If ``mode`` is neither ``analytic``, ``eikonal``, or ``byot``. NotImplementedError - If ``dynamic=True`` and ``engine="cuda"`` + If ``engine="cuda"`` and ``trav`` is provided as a single table Notes -----