Skip to content

Commit d83f010

Browse files
committed
Add CUDA resampling kernels for end-to-end GPU reproject (#1045)
Native CUDA nearest, bilinear, and cubic (Catmull-Rom) resampling kernels replace cupyx.scipy.ndimage.map_coordinates. When the CUDA projection path produces on-device coordinates, the entire pipeline now stays on GPU with no CPU roundtrip. Full reproject pipeline (2048x2048, bilinear, 4326->UTM): GPU end-to-end: 78ms CPU Numba: 161ms Speedup: 2.1x
1 parent 39d534a commit d83f010

2 files changed

Lines changed: 435 additions & 9 deletions

File tree

xrspatial/reproject/__init__.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919
_compute_output_grid,
2020
_make_output_coords,
2121
)
22-
from ._interpolate import _resample_cupy, _resample_numpy, _validate_resampling
22+
from ._interpolate import (
23+
_resample_cupy,
24+
_resample_cupy_native,
25+
_resample_numpy,
26+
_validate_resampling,
27+
)
2328
from ._merge import _merge_arrays_cupy, _merge_arrays_numpy, _validate_strategy
2429
from ._transform import ApproximateTransform
2530

@@ -258,9 +263,8 @@ def _reproject_chunk_cupy(
258263
r_max = int(cp.ceil(cp.nanmax(src_row_px)).get()) + 3
259264
c_min = int(cp.floor(cp.nanmin(src_col_px)).get()) - 2
260265
c_max = int(cp.ceil(cp.nanmax(src_col_px)).get()) + 3
261-
# Convert to numpy for downstream resampling
262-
src_row_px = cp.asnumpy(src_row_px)
263-
src_col_px = cp.asnumpy(src_col_px)
266+
# Keep coordinates as CuPy arrays for native CUDA resampling
267+
_use_native_cuda = True
264268
else:
265269
# CPU fallback (Numba JIT or pyproj)
266270
src_y, src_x = _transform_coords(
@@ -283,6 +287,7 @@ def _reproject_chunk_cupy(
283287
r_max = int(np.ceil(np.nanmax(src_row_px))) + 3
284288
c_min = int(np.floor(np.nanmin(src_col_px))) - 2
285289
c_max = int(np.ceil(np.nanmax(src_col_px))) + 3
290+
_use_native_cuda = False
286291

287292
if r_min >= src_h or r_max <= 0 or c_min >= src_w or c_max <= 0:
288293
return cp.full(chunk_shape, nodata, dtype=cp.float64)
@@ -299,14 +304,21 @@ def _reproject_chunk_cupy(
299304
window = cp.asarray(window)
300305
window = window.astype(cp.float64)
301306

302-
# Convert sentinel nodata to NaN
307+
# Adjust coordinates relative to window (stays on GPU if CuPy)
308+
local_row = src_row_px - r_min_clip
309+
local_col = src_col_px - c_min_clip
310+
311+
if _use_native_cuda:
312+
# Coordinates are already CuPy arrays -- use native CUDA kernels
313+
# (nodata->NaN conversion is handled inside _resample_cupy_native)
314+
return _resample_cupy_native(window, local_row, local_col,
315+
resampling=resampling, nodata=nodata)
316+
317+
# CPU coordinates -- convert sentinel nodata to NaN before map_coordinates
303318
if not np.isnan(nodata):
304319
window = window.copy()
305320
window[window == nodata] = cp.nan
306321

307-
local_row = src_row_px - r_min_clip
308-
local_col = src_col_px - c_min_clip
309-
310322
return _resample_cupy(window, local_row, local_col,
311323
resampling=resampling, nodata=nodata)
312324

0 commit comments

Comments
 (0)