Skip to content

Commit 593e559

Browse files
authored
Add GPU (CuPy) backends for proximity, allocation, direction (#909)
* Fixes #901: add GPU (CuPy) backends for proximity, allocation, direction Add CUDA brute-force nearest-target kernel with device functions for Euclidean, Manhattan, and great-circle distance metrics. Each thread processes one pixel, scanning all targets to find the nearest. Supports proximity (distance), allocation (target value), and direction modes. Adds _process_cupy() and _process_dask_cupy() host functions with dispatch wired into _process(). Tests parametrized over cupy backend. * Fix OOM in dask+cupy proximity: use map_overlap for bounded, KDTree for unbounded The previous _process_dask_cupy called .compute() which would materialise the entire raster into GPU memory — OOM on large datasets. Bounded max_distance: use da.map_overlap with per-chunk CUDA kernel, so only one chunk + overlap padding is on GPU at a time. Unbounded max_distance: convert dask+cupy to dask+numpy and use the existing KDTree path (CPU-based O(N log T) beats brute-force O(NT), and KDTree is inherently a CPU data structure). Also adds 'dask+cupy' backend to all 9 proximity test parametrize lists.
1 parent cffcc4e commit 593e559

File tree

3 files changed

+286
-52
lines changed

3 files changed

+286
-52
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,10 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e
201201

202202
| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray |
203203
|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:|
204-
| [Allocation](xrspatial/proximity.py) | Assigns each cell to the identity of the nearest source feature | ✅️ || | |
204+
| [Allocation](xrspatial/proximity.py) | Assigns each cell to the identity of the nearest source feature | ✅️ || ✅️ | ✅️ |
205205
| [Cost Distance](xrspatial/cost_distance.py) | Computes minimum accumulated cost to the nearest source through a friction surface | ✅️ || 🔄 | 🔄 |
206-
| [Direction](xrspatial/proximity.py) | Computes the direction from each cell to the nearest source feature | ✅️ || | |
207-
| [Proximity](xrspatial/proximity.py) | Computes the distance from each cell to the nearest source feature | ✅️ || | |
206+
| [Direction](xrspatial/proximity.py) | Computes the direction from each cell to the nearest source feature | ✅️ || ✅️ | ✅️ |
207+
| [Proximity](xrspatial/proximity.py) | Computes the distance from each cell to the nearest source feature | ✅️ || ✅️ | ✅️ |
208208

209209
--------
210210

xrspatial/proximity.py

Lines changed: 267 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,23 @@
1212
except ImportError:
1313
cKDTree = None
1414

15+
import math as _math
16+
1517
import numpy as np
1618
import xarray as xr
17-
from numba import prange
19+
from numba import cuda, prange
20+
21+
try:
22+
import cupy
23+
except ImportError:
24+
class cupy(object):
25+
ndarray = False
1826

1927
from xrspatial.pathfinding import _available_memory_bytes
20-
from xrspatial.utils import get_dataarray_resolution, ngjit
28+
from xrspatial.utils import (
29+
cuda_args, get_dataarray_resolution, has_cuda_and_cupy,
30+
is_cupy_array, is_dask_cupy, ngjit,
31+
)
2132
from xrspatial.dataset_support import supports_dataset
2233

2334
EUCLIDEAN = 0
@@ -281,6 +292,182 @@ def _vectorized_calc_direction(x1, x2, y1, y2):
281292
return result.astype(np.float32)
282293

283294

295+
# =====================================================================
296+
# GPU (CuPy / CUDA) backend
297+
# =====================================================================
298+
299+
@cuda.jit(device=True)
300+
def _gpu_euclidean_distance(x1, x2, y1, y2):
301+
dx = x1 - x2
302+
dy = y1 - y2
303+
return _math.sqrt(dx * dx + dy * dy)
304+
305+
306+
@cuda.jit(device=True)
307+
def _gpu_manhattan_distance(x1, x2, y1, y2):
308+
return abs(x1 - x2) + abs(y1 - y2)
309+
310+
311+
@cuda.jit(device=True)
312+
def _gpu_great_circle_distance(x1, x2, y1, y2):
313+
if x1 == x2 and y1 == y2:
314+
return 0.0
315+
lat1 = y1 * 0.017453292519943295
316+
lon1 = x1 * 0.017453292519943295
317+
lat2 = y2 * 0.017453292519943295
318+
lon2 = x2 * 0.017453292519943295
319+
dlon = lon2 - lon1
320+
dlat = lat2 - lat1
321+
a = (_math.sin(dlat / 2.0) ** 2
322+
+ _math.cos(lat1) * _math.cos(lat2)
323+
* _math.sin(dlon / 2.0) ** 2)
324+
return 6378137.0 * 2.0 * _math.asin(_math.sqrt(a))
325+
326+
327+
@cuda.jit(device=True)
328+
def _gpu_distance(x1, x2, y1, y2, metric):
329+
if metric == EUCLIDEAN:
330+
return _gpu_euclidean_distance(x1, x2, y1, y2)
331+
elif metric == GREAT_CIRCLE:
332+
return _gpu_great_circle_distance(x1, x2, y1, y2)
333+
else:
334+
return _gpu_manhattan_distance(x1, x2, y1, y2)
335+
336+
337+
@cuda.jit(device=True)
338+
def _gpu_calc_direction(x1, x2, y1, y2):
339+
if x1 == x2 and y1 == y2:
340+
return 0.0
341+
dx = x2 - x1
342+
dy = y2 - y1
343+
d = _math.atan2(-dy, dx) * 57.29578
344+
if d < 0.0:
345+
d = 90.0 - d
346+
elif d > 90.0:
347+
d = 360.0 - d + 90.0
348+
else:
349+
d = 90.0 - d
350+
return d
351+
352+
353+
@cuda.jit
354+
def _proximity_cuda_kernel(target_xs, target_ys, target_vals, n_targets,
355+
y_coords, x_coords, max_distance,
356+
distance_metric, process_mode, out):
357+
iy, ix = cuda.grid(2)
358+
if iy >= out.shape[0] or ix >= out.shape[1]:
359+
return
360+
361+
px = x_coords[ix]
362+
py = y_coords[iy]
363+
364+
best_dist = 1.0e38
365+
best_idx = -1
366+
367+
for k in range(n_targets):
368+
d = _gpu_distance(px, target_xs[k], py, target_ys[k], distance_metric)
369+
if d < best_dist:
370+
best_dist = d
371+
best_idx = k
372+
373+
if best_idx >= 0 and best_dist <= max_distance:
374+
if process_mode == PROXIMITY:
375+
out[iy, ix] = best_dist
376+
elif process_mode == ALLOCATION:
377+
out[iy, ix] = target_vals[best_idx]
378+
else:
379+
out[iy, ix] = _gpu_calc_direction(
380+
px, target_xs[best_idx], py, target_ys[best_idx])
381+
382+
383+
def _process_cupy(raster_data, x_coords, y_coords, target_values,
384+
max_distance, distance_metric, process_mode):
385+
"""GPU proximity using CUDA brute-force nearest-target kernel."""
386+
import cupy as cp
387+
388+
# Find target pixels on GPU
389+
if len(target_values) == 0:
390+
mask = cp.isfinite(raster_data) & (raster_data != 0)
391+
else:
392+
mask = cp.isin(raster_data, cp.asarray(target_values))
393+
mask &= cp.isfinite(raster_data)
394+
395+
target_rows, target_cols = cp.where(mask)
396+
n_targets = int(target_rows.shape[0])
397+
398+
if n_targets == 0:
399+
return cp.full(raster_data.shape, cp.nan, dtype=cp.float32)
400+
401+
# Collect target world-coordinates and values
402+
y_dev = cp.asarray(y_coords, dtype=cp.float64)
403+
x_dev = cp.asarray(x_coords, dtype=cp.float64)
404+
target_ys = y_dev[target_rows]
405+
target_xs = x_dev[target_cols]
406+
target_vals = raster_data[target_rows, target_cols].astype(cp.float32)
407+
408+
# Pre-fill output with NaN (pixels with no target within range stay NaN)
409+
out = cp.full(raster_data.shape, cp.nan, dtype=cp.float32)
410+
411+
griddim, blockdim = cuda_args(raster_data.shape)
412+
_proximity_cuda_kernel[griddim, blockdim](
413+
target_xs, target_ys, target_vals, n_targets,
414+
y_dev, x_dev,
415+
np.float64(max_distance),
416+
np.int32(distance_metric),
417+
np.int32(process_mode),
418+
out,
419+
)
420+
421+
return out
422+
423+
424+
def _process_dask_cupy(raster, x_coords, y_coords, target_values,
425+
max_distance, distance_metric, process_mode):
426+
"""Dask+CuPy bounded proximity via map_overlap with per-chunk GPU kernel.
427+
428+
Each chunk (plus overlap padding of ``max_distance / cellsize`` pixels)
429+
is processed on GPU independently. Only valid for finite max_distance
430+
where the padding guarantees all relevant targets are visible within
431+
each overlapped chunk.
432+
"""
433+
import cupy as cp
434+
435+
cellsize_x, cellsize_y = get_dataarray_resolution(raster)
436+
pad_y = int(max_distance / abs(cellsize_y) + 0.5)
437+
pad_x = int(max_distance / abs(cellsize_x) + 0.5)
438+
439+
# Build 2D coordinate grids as dask+cupy arrays matching raster chunks.
440+
# Each chunk is small (chunk_h x chunk_w x 8 bytes); the full grid is
441+
# never materialised.
442+
x_cp = cp.asarray(x_coords, dtype=cp.float64)
443+
y_cp = cp.asarray(y_coords, dtype=cp.float64)
444+
x_da = da.from_array(x_cp, chunks=(x_cp.shape[0],))
445+
y_da = da.from_array(y_cp, chunks=(y_cp.shape[0],))
446+
xs = da.tile(x_da, (raster.shape[0], 1)).rechunk(raster.data.chunks)
447+
ys = da.repeat(y_da, raster.shape[1]).reshape(
448+
raster.shape).rechunk(raster.data.chunks)
449+
450+
# Capture closure vars for the chunk function
451+
tv = target_values
452+
md = max_distance
453+
dm = distance_metric
454+
pm = process_mode
455+
456+
def _chunk_func(data_chunk, xs_chunk, ys_chunk):
457+
# Use middle row/col to avoid NaN from boundary padding
458+
x_1d = xs_chunk[xs_chunk.shape[0] // 2, :]
459+
y_1d = ys_chunk[:, ys_chunk.shape[1] // 2]
460+
return _process_cupy(data_chunk, x_1d, y_1d, tv, md, dm, pm)
461+
462+
return da.map_overlap(
463+
_chunk_func,
464+
raster.data, xs, ys,
465+
depth=(pad_y, pad_x),
466+
boundary=np.nan,
467+
meta=cp.array((), dtype=cp.float32),
468+
)
469+
470+
284471
@ngjit
285472
def _process_proximity_line(
286473
source_line,
@@ -1062,47 +1249,89 @@ def _process_dask(raster, xs, ys):
10621249
ys = np.repeat(y_coords, raster.shape[1]).reshape(raster.shape)
10631250
result = _process_numpy(raster.data, xs, ys)
10641251

1065-
elif da is not None and isinstance(raster.data, da.Array):
1066-
use_kdtree = (
1067-
cKDTree is not None
1068-
and distance_metric in (EUCLIDEAN, MANHATTAN)
1069-
and max_distance >= max_possible_distance
1252+
elif has_cuda_and_cupy() and is_cupy_array(raster.data):
1253+
result = _process_cupy(
1254+
raster.data, x_coords, y_coords,
1255+
target_values, max_distance, distance_metric, process_mode,
10701256
)
1071-
if use_kdtree:
1072-
result = _process_dask_kdtree(
1257+
1258+
elif da is not None and isinstance(raster.data, da.Array):
1259+
if (has_cuda_and_cupy() and is_dask_cupy(raster)
1260+
and max_distance < max_possible_distance):
1261+
# Bounded dask+cupy: out-of-core GPU via map_overlap
1262+
result = _process_dask_cupy(
10731263
raster, x_coords, y_coords,
1074-
target_values, max_distance, distance_metric,
1075-
process_mode,
1264+
target_values, max_distance, distance_metric, process_mode,
10761265
)
10771266
else:
1078-
# Memory guard: unbounded distance on large rasters can OOM
1079-
if max_distance >= max_possible_distance:
1080-
H, W = raster.shape
1081-
required = H * W * 4 * 3 # raster + xs + ys, float32
1082-
avail = _available_memory_bytes()
1083-
if required > 0.8 * avail:
1084-
if cKDTree is None:
1085-
raise MemoryError(
1086-
"Raster too large for single-chunk processing "
1087-
"and scipy is not installed for memory-safe "
1088-
"KDTree path. Install scipy or set a finite "
1089-
"max_distance."
1090-
)
1091-
else: # must be GREAT_CIRCLE
1092-
raise MemoryError(
1093-
"GREAT_CIRCLE with unbounded max_distance on "
1094-
"this raster would exceed available memory. "
1095-
"Set a finite max_distance."
1096-
)
1267+
# dask+numpy path (or unbounded dask+cupy → convert first)
1268+
was_dask_cupy = has_cuda_and_cupy() and is_dask_cupy(raster)
1269+
if was_dask_cupy:
1270+
import cupy as cp
1271+
# Unbounded: convert to dask+numpy for KDTree/line-sweep
1272+
# (KDTree is CPU-only; O(N log T) beats brute-force O(NT))
1273+
original_chunks = raster.data.chunks
1274+
raster = raster.copy(
1275+
data=raster.data.map_blocks(
1276+
lambda x: x.get(), dtype=raster.dtype,
1277+
meta=np.array(()),
1278+
)
1279+
)
10971280

1098-
# Existing path: build 2D coordinate arrays as dask arrays
1099-
x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0])
1100-
y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0])
1101-
xs = da.tile(x_coords_da, (raster.shape[0], 1))
1102-
ys = da.repeat(y_coords_da, raster.shape[1]).reshape(raster.shape)
1103-
xs = xs.rechunk(raster.chunks)
1104-
ys = ys.rechunk(raster.chunks)
1105-
result = _process_dask(raster, xs, ys)
1281+
use_kdtree = (
1282+
cKDTree is not None
1283+
and distance_metric in (EUCLIDEAN, MANHATTAN)
1284+
and max_distance >= max_possible_distance
1285+
)
1286+
if use_kdtree:
1287+
result = _process_dask_kdtree(
1288+
raster, x_coords, y_coords,
1289+
target_values, max_distance, distance_metric,
1290+
process_mode,
1291+
)
1292+
else:
1293+
# Memory guard: unbounded distance on large rasters can OOM
1294+
if max_distance >= max_possible_distance:
1295+
H, W = raster.shape
1296+
required = H * W * 4 * 3 # raster + xs + ys, float32
1297+
avail = _available_memory_bytes()
1298+
if required > 0.8 * avail:
1299+
if cKDTree is None:
1300+
raise MemoryError(
1301+
"Raster too large for single-chunk processing "
1302+
"and scipy is not installed for memory-safe "
1303+
"KDTree path. Install scipy or set a finite "
1304+
"max_distance."
1305+
)
1306+
else: # must be GREAT_CIRCLE
1307+
raise MemoryError(
1308+
"GREAT_CIRCLE with unbounded max_distance on "
1309+
"this raster would exceed available memory. "
1310+
"Set a finite max_distance."
1311+
)
1312+
1313+
# Existing path: build 2D coordinate arrays as dask arrays
1314+
x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0])
1315+
y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0])
1316+
xs = da.tile(x_coords_da, (raster.shape[0], 1))
1317+
ys = da.repeat(y_coords_da, raster.shape[1]).reshape(
1318+
raster.shape)
1319+
xs = xs.rechunk(raster.chunks)
1320+
ys = ys.rechunk(raster.chunks)
1321+
result = _process_dask(raster, xs, ys)
1322+
1323+
# Convert result back to dask+cupy if input was dask+cupy
1324+
if was_dask_cupy:
1325+
result = result.map_blocks(
1326+
cp.asarray, dtype=result.dtype,
1327+
meta=cp.array((), dtype=result.dtype),
1328+
)
1329+
1330+
else:
1331+
raise TypeError(
1332+
f"Unsupported array type {type(raster.data).__name__} "
1333+
f"for proximity/allocation/direction"
1334+
)
11061335

11071336
return result
11081337

0 commit comments

Comments
 (0)