Skip to content

Commit 39b2fdc

Browse files
committed
Fixes #901: add GPU (CuPy) backends for proximity, allocation, direction
Add CUDA brute-force nearest-target kernel with device functions for Euclidean, Manhattan, and great-circle distance metrics. Each thread processes one pixel, scanning all targets to find the nearest. Supports proximity (distance), allocation (target value), and direction modes. Adds _process_cupy() and _process_dask_cupy() host functions with dispatch wired into _process(). Tests parametrized over cupy backend.
1 parent cffcc4e commit 39b2fdc

File tree

3 files changed

+225
-52
lines changed

3 files changed

+225
-52
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,10 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e
201201

202202
| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray |
203203
|:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:|
204-
| [Allocation](xrspatial/proximity.py) | Assigns each cell to the identity of the nearest source feature | ✅️ || | |
204+
| [Allocation](xrspatial/proximity.py) | Assigns each cell to the identity of the nearest source feature | ✅️ || ✅️ | ✅️ |
205205
| [Cost Distance](xrspatial/cost_distance.py) | Computes minimum accumulated cost to the nearest source through a friction surface | ✅️ || 🔄 | 🔄 |
206-
| [Direction](xrspatial/proximity.py) | Computes the direction from each cell to the nearest source feature | ✅️ || | |
207-
| [Proximity](xrspatial/proximity.py) | Computes the distance from each cell to the nearest source feature | ✅️ || | |
206+
| [Direction](xrspatial/proximity.py) | Computes the direction from each cell to the nearest source feature | ✅️ || ✅️ | ✅️ |
207+
| [Proximity](xrspatial/proximity.py) | Computes the distance from each cell to the nearest source feature | ✅️ || ✅️ | ✅️ |
208208

209209
--------
210210

xrspatial/proximity.py

Lines changed: 206 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,23 @@
1212
except ImportError:
1313
cKDTree = None
1414

15+
import math as _math
16+
1517
import numpy as np
1618
import xarray as xr
17-
from numba import prange
19+
from numba import cuda, prange
20+
21+
try:
22+
import cupy
23+
except ImportError:
24+
class cupy(object):
25+
ndarray = False
1826

1927
from xrspatial.pathfinding import _available_memory_bytes
20-
from xrspatial.utils import get_dataarray_resolution, ngjit
28+
from xrspatial.utils import (
29+
cuda_args, get_dataarray_resolution, has_cuda_and_cupy,
30+
is_cupy_array, is_dask_cupy, ngjit,
31+
)
2132
from xrspatial.dataset_support import supports_dataset
2233

2334
EUCLIDEAN = 0
@@ -281,6 +292,144 @@ def _vectorized_calc_direction(x1, x2, y1, y2):
281292
return result.astype(np.float32)
282293

283294

295+
# =====================================================================
296+
# GPU (CuPy / CUDA) backend
297+
# =====================================================================
298+
299+
@cuda.jit(device=True)
300+
def _gpu_euclidean_distance(x1, x2, y1, y2):
301+
dx = x1 - x2
302+
dy = y1 - y2
303+
return _math.sqrt(dx * dx + dy * dy)
304+
305+
306+
@cuda.jit(device=True)
307+
def _gpu_manhattan_distance(x1, x2, y1, y2):
308+
return abs(x1 - x2) + abs(y1 - y2)
309+
310+
311+
@cuda.jit(device=True)
312+
def _gpu_great_circle_distance(x1, x2, y1, y2):
313+
lat1 = y1 * 0.017453292519943295
314+
lon1 = x1 * 0.017453292519943295
315+
lat2 = y2 * 0.017453292519943295
316+
lon2 = x2 * 0.017453292519943295
317+
dlon = lon2 - lon1
318+
dlat = lat2 - lat1
319+
a = (_math.sin(dlat / 2.0) ** 2
320+
+ _math.cos(lat1) * _math.cos(lat2)
321+
* _math.sin(dlon / 2.0) ** 2)
322+
return 6378137.0 * 2.0 * _math.asin(_math.sqrt(a))
323+
324+
325+
@cuda.jit(device=True)
326+
def _gpu_distance(x1, x2, y1, y2, metric):
327+
if metric == EUCLIDEAN:
328+
return _gpu_euclidean_distance(x1, x2, y1, y2)
329+
elif metric == GREAT_CIRCLE:
330+
return _gpu_great_circle_distance(x1, x2, y1, y2)
331+
else:
332+
return _gpu_manhattan_distance(x1, x2, y1, y2)
333+
334+
335+
@cuda.jit(device=True)
336+
def _gpu_calc_direction(x1, x2, y1, y2):
337+
if x1 == x2 and y1 == y2:
338+
return 0.0
339+
dx = x2 - x1
340+
dy = y2 - y1
341+
d = _math.atan2(-dy, dx) * 57.29578
342+
if d < 0.0:
343+
d = 90.0 - d
344+
elif d > 90.0:
345+
d = 360.0 - d + 90.0
346+
else:
347+
d = 90.0 - d
348+
return d
349+
350+
351+
@cuda.jit
352+
def _proximity_cuda_kernel(target_xs, target_ys, target_vals, n_targets,
353+
y_coords, x_coords, max_distance,
354+
distance_metric, process_mode, out):
355+
iy, ix = cuda.grid(2)
356+
if iy >= out.shape[0] or ix >= out.shape[1]:
357+
return
358+
359+
px = x_coords[ix]
360+
py = y_coords[iy]
361+
362+
best_dist = 1.0e38
363+
best_idx = -1
364+
365+
for k in range(n_targets):
366+
d = _gpu_distance(px, target_xs[k], py, target_ys[k], distance_metric)
367+
if d < best_dist:
368+
best_dist = d
369+
best_idx = k
370+
371+
if best_idx >= 0 and best_dist <= max_distance:
372+
if process_mode == PROXIMITY:
373+
out[iy, ix] = best_dist
374+
elif process_mode == ALLOCATION:
375+
out[iy, ix] = target_vals[best_idx]
376+
else:
377+
out[iy, ix] = _gpu_calc_direction(
378+
px, target_xs[best_idx], py, target_ys[best_idx])
379+
380+
381+
def _process_cupy(raster_data, x_coords, y_coords, target_values,
382+
max_distance, distance_metric, process_mode):
383+
"""GPU proximity using CUDA brute-force nearest-target kernel."""
384+
import cupy as cp
385+
386+
# Find target pixels on GPU
387+
if len(target_values) == 0:
388+
mask = cp.isfinite(raster_data) & (raster_data != 0)
389+
else:
390+
mask = cp.isin(raster_data, cp.asarray(target_values))
391+
mask &= cp.isfinite(raster_data)
392+
393+
target_rows, target_cols = cp.where(mask)
394+
n_targets = int(target_rows.shape[0])
395+
396+
if n_targets == 0:
397+
return cp.full(raster_data.shape, cp.nan, dtype=cp.float32)
398+
399+
# Collect target world-coordinates and values
400+
y_dev = cp.asarray(y_coords, dtype=cp.float64)
401+
x_dev = cp.asarray(x_coords, dtype=cp.float64)
402+
target_ys = y_dev[target_rows]
403+
target_xs = x_dev[target_cols]
404+
target_vals = raster_data[target_rows, target_cols].astype(cp.float32)
405+
406+
# Pre-fill output with NaN (pixels with no target within range stay NaN)
407+
out = cp.full(raster_data.shape, cp.nan, dtype=cp.float32)
408+
409+
griddim, blockdim = cuda_args(raster_data.shape)
410+
_proximity_cuda_kernel[griddim, blockdim](
411+
target_xs, target_ys, target_vals, n_targets,
412+
y_dev, x_dev,
413+
np.float64(max_distance),
414+
np.int32(distance_metric),
415+
np.int32(process_mode),
416+
out,
417+
)
418+
419+
return out
420+
421+
422+
def _process_dask_cupy(raster, x_coords, y_coords, target_values,
423+
max_distance, distance_metric, process_mode):
424+
"""Dask+CuPy backend: compute to cupy, run GPU kernel."""
425+
import cupy as cp
426+
427+
cp_data = raster.data.compute()
428+
result = _process_cupy(cp_data, x_coords, y_coords, target_values,
429+
max_distance, distance_metric, process_mode)
430+
return da.from_array(result, chunks=raster.data.chunks)
431+
432+
284433
@ngjit
285434
def _process_proximity_line(
286435
source_line,
@@ -1062,47 +1211,66 @@ def _process_dask(raster, xs, ys):
10621211
ys = np.repeat(y_coords, raster.shape[1]).reshape(raster.shape)
10631212
result = _process_numpy(raster.data, xs, ys)
10641213

1065-
elif da is not None and isinstance(raster.data, da.Array):
1066-
use_kdtree = (
1067-
cKDTree is not None
1068-
and distance_metric in (EUCLIDEAN, MANHATTAN)
1069-
and max_distance >= max_possible_distance
1214+
elif has_cuda_and_cupy() and is_cupy_array(raster.data):
1215+
result = _process_cupy(
1216+
raster.data, x_coords, y_coords,
1217+
target_values, max_distance, distance_metric, process_mode,
10701218
)
1071-
if use_kdtree:
1072-
result = _process_dask_kdtree(
1219+
1220+
elif da is not None and isinstance(raster.data, da.Array):
1221+
if has_cuda_and_cupy() and is_dask_cupy(raster):
1222+
result = _process_dask_cupy(
10731223
raster, x_coords, y_coords,
1074-
target_values, max_distance, distance_metric,
1075-
process_mode,
1224+
target_values, max_distance, distance_metric, process_mode,
10761225
)
10771226
else:
1078-
# Memory guard: unbounded distance on large rasters can OOM
1079-
if max_distance >= max_possible_distance:
1080-
H, W = raster.shape
1081-
required = H * W * 4 * 3 # raster + xs + ys, float32
1082-
avail = _available_memory_bytes()
1083-
if required > 0.8 * avail:
1084-
if cKDTree is None:
1085-
raise MemoryError(
1086-
"Raster too large for single-chunk processing "
1087-
"and scipy is not installed for memory-safe "
1088-
"KDTree path. Install scipy or set a finite "
1089-
"max_distance."
1090-
)
1091-
else: # must be GREAT_CIRCLE
1092-
raise MemoryError(
1093-
"GREAT_CIRCLE with unbounded max_distance on "
1094-
"this raster would exceed available memory. "
1095-
"Set a finite max_distance."
1096-
)
1227+
use_kdtree = (
1228+
cKDTree is not None
1229+
and distance_metric in (EUCLIDEAN, MANHATTAN)
1230+
and max_distance >= max_possible_distance
1231+
)
1232+
if use_kdtree:
1233+
result = _process_dask_kdtree(
1234+
raster, x_coords, y_coords,
1235+
target_values, max_distance, distance_metric,
1236+
process_mode,
1237+
)
1238+
else:
1239+
# Memory guard: unbounded distance on large rasters can OOM
1240+
if max_distance >= max_possible_distance:
1241+
H, W = raster.shape
1242+
required = H * W * 4 * 3 # raster + xs + ys, float32
1243+
avail = _available_memory_bytes()
1244+
if required > 0.8 * avail:
1245+
if cKDTree is None:
1246+
raise MemoryError(
1247+
"Raster too large for single-chunk processing "
1248+
"and scipy is not installed for memory-safe "
1249+
"KDTree path. Install scipy or set a finite "
1250+
"max_distance."
1251+
)
1252+
else: # must be GREAT_CIRCLE
1253+
raise MemoryError(
1254+
"GREAT_CIRCLE with unbounded max_distance on "
1255+
"this raster would exceed available memory. "
1256+
"Set a finite max_distance."
1257+
)
10971258

1098-
# Existing path: build 2D coordinate arrays as dask arrays
1099-
x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0])
1100-
y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0])
1101-
xs = da.tile(x_coords_da, (raster.shape[0], 1))
1102-
ys = da.repeat(y_coords_da, raster.shape[1]).reshape(raster.shape)
1103-
xs = xs.rechunk(raster.chunks)
1104-
ys = ys.rechunk(raster.chunks)
1105-
result = _process_dask(raster, xs, ys)
1259+
# Existing path: build 2D coordinate arrays as dask arrays
1260+
x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0])
1261+
y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0])
1262+
xs = da.tile(x_coords_da, (raster.shape[0], 1))
1263+
ys = da.repeat(y_coords_da, raster.shape[1]).reshape(
1264+
raster.shape)
1265+
xs = xs.rechunk(raster.chunks)
1266+
ys = ys.rechunk(raster.chunks)
1267+
result = _process_dask(raster, xs, ys)
1268+
1269+
else:
1270+
raise TypeError(
1271+
f"Unsupported array type {type(raster.data).__name__} "
1272+
f"for proximity/allocation/direction"
1273+
)
11061274

11071275
return result
11081276

xrspatial/tests/test_proximity.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
from xrspatial import allocation, direction, euclidean_distance, great_circle_distance, proximity
1313
from xrspatial.proximity import _calc_direction
14-
from xrspatial.tests.general_checks import general_output_checks, create_test_raster
14+
from xrspatial.tests.general_checks import (
15+
general_output_checks, create_test_raster, has_cuda_and_cupy,
16+
)
1517

1618

1719
def test_great_circle_distance():
@@ -38,8 +40,11 @@ def test_raster(backend):
3840
raster = xr.DataArray(data, dims=['lat', 'lon'])
3941
raster['lon'] = _lon
4042
raster['lat'] = _lat
43+
if has_cuda_and_cupy() and 'cupy' in backend:
44+
import cupy
45+
raster.data = cupy.asarray(data)
4146
if 'dask' in backend and da is not None:
42-
raster.data = da.from_array(data, chunks=(4, 3))
47+
raster.data = da.from_array(raster.data, chunks=(4, 3))
4348
return raster
4449

4550

@@ -167,43 +172,43 @@ def qgis_proximity_distance_target_values():
167172
return target_values, qgis_result
168173

169174

170-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
175+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
171176
def test_default_proximity(test_raster, result_default_proximity):
172177
default_prox = proximity(test_raster, x='lon', y='lat')
173178
general_output_checks(test_raster, default_prox, result_default_proximity, verify_dtype=True)
174179

175180

176-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
181+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
177182
def test_target_proximity(test_raster, result_target_proximity):
178183
target_values, expected_result = result_target_proximity
179184
target_prox = proximity(test_raster, x='lon', y='lat', target_values=target_values)
180185
general_output_checks(test_raster, target_prox, expected_result, verify_dtype=True)
181186

182187

183-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
188+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
184189
def test_manhattan_proximity(test_raster, result_manhattan_proximity):
185190
manhattan_prox = proximity(test_raster, x='lon', y='lat', distance_metric='MANHATTAN')
186191
general_output_checks(
187192
test_raster, manhattan_prox, result_manhattan_proximity, verify_dtype=True
188193
)
189194

190195

191-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
196+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
192197
def test_great_circle_proximity(test_raster, result_great_circle_proximity):
193198
great_circle_prox = proximity(test_raster, x='lon', y='lat', distance_metric='GREAT_CIRCLE')
194199
general_output_checks(
195200
test_raster, great_circle_prox, result_great_circle_proximity, verify_dtype=True
196201
)
197202

198203

199-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
204+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
200205
def test_max_distance_proximity(test_raster, result_max_distance_proximity):
201206
max_distance, expected_result = result_max_distance_proximity
202207
max_distance_prox = proximity(test_raster, x='lon', y='lat', max_distance=max_distance)
203208
general_output_checks(test_raster, max_distance_prox, expected_result, verify_dtype=True)
204209

205210

206-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
211+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
207212
def test_default_allocation(test_raster, result_default_allocation):
208213
allocation_agg = allocation(test_raster, x='lon', y='lat')
209214
general_output_checks(test_raster, allocation_agg, result_default_allocation, verify_dtype=True)
@@ -224,7 +229,7 @@ def test_default_allocation_against_proximity(test_raster, result_default_proxim
224229
np.testing.assert_allclose(result_default_proximity[y, x], d)
225230

226231

227-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
232+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
228233
def test_max_distance_allocation(test_raster, result_max_distance_allocation):
229234
max_distance, expected_result = result_max_distance_allocation
230235
max_distance_alloc = allocation(test_raster, x='lon', y='lat', max_distance=max_distance)
@@ -247,7 +252,7 @@ def test_calc_direction():
247252
assert (abs(output-expected_output) <= tolerance).all()
248253

249254

250-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
255+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
251256
def test_default_direction(test_raster, result_default_direction):
252257
direction_agg = direction(test_raster, x='lon', y='lat')
253258
general_output_checks(test_raster, direction_agg, result_default_direction)
@@ -267,7 +272,7 @@ def test_default_direction_against_allocation(test_raster, result_default_alloca
267272
np.testing.assert_allclose(direction_agg.data[y, x], d)
268273

269274

270-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
275+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
271276
def test_max_distance_direction(test_raster, result_max_distance_direction):
272277
max_distance, expected_result = result_max_distance_direction
273278
max_distance_direction = direction(test_raster, x='lon', y='lat', max_distance=max_distance)

0 commit comments

Comments
 (0)