|
12 | 12 | except ImportError: |
13 | 13 | cKDTree = None |
14 | 14 |
|
| 15 | +import math as _math |
| 16 | + |
15 | 17 | import numpy as np |
16 | 18 | import xarray as xr |
17 | | -from numba import prange |
| 19 | +from numba import cuda, prange |
| 20 | + |
| 21 | +try: |
| 22 | + import cupy |
| 23 | +except ImportError: |
| 24 | + class cupy(object): |
| 25 | + ndarray = False |
18 | 26 |
|
19 | 27 | from xrspatial.pathfinding import _available_memory_bytes |
20 | | -from xrspatial.utils import get_dataarray_resolution, ngjit |
| 28 | +from xrspatial.utils import ( |
| 29 | + cuda_args, get_dataarray_resolution, has_cuda_and_cupy, |
| 30 | + is_cupy_array, is_dask_cupy, ngjit, |
| 31 | +) |
21 | 32 | from xrspatial.dataset_support import supports_dataset |
22 | 33 |
|
23 | 34 | EUCLIDEAN = 0 |
@@ -281,6 +292,144 @@ def _vectorized_calc_direction(x1, x2, y1, y2): |
281 | 292 | return result.astype(np.float32) |
282 | 293 |
|
283 | 294 |
|
| 295 | +# ===================================================================== |
| 296 | +# GPU (CuPy / CUDA) backend |
| 297 | +# ===================================================================== |
| 298 | + |
| 299 | +@cuda.jit(device=True) |
| 300 | +def _gpu_euclidean_distance(x1, x2, y1, y2): |
| 301 | + dx = x1 - x2 |
| 302 | + dy = y1 - y2 |
| 303 | + return _math.sqrt(dx * dx + dy * dy) |
| 304 | + |
| 305 | + |
| 306 | +@cuda.jit(device=True) |
| 307 | +def _gpu_manhattan_distance(x1, x2, y1, y2): |
| 308 | + return abs(x1 - x2) + abs(y1 - y2) |
| 309 | + |
| 310 | + |
| 311 | +@cuda.jit(device=True) |
| 312 | +def _gpu_great_circle_distance(x1, x2, y1, y2): |
| 313 | + lat1 = y1 * 0.017453292519943295 |
| 314 | + lon1 = x1 * 0.017453292519943295 |
| 315 | + lat2 = y2 * 0.017453292519943295 |
| 316 | + lon2 = x2 * 0.017453292519943295 |
| 317 | + dlon = lon2 - lon1 |
| 318 | + dlat = lat2 - lat1 |
| 319 | + a = (_math.sin(dlat / 2.0) ** 2 |
| 320 | + + _math.cos(lat1) * _math.cos(lat2) |
| 321 | + * _math.sin(dlon / 2.0) ** 2) |
| 322 | + return 6378137.0 * 2.0 * _math.asin(_math.sqrt(a)) |
| 323 | + |
| 324 | + |
| 325 | +@cuda.jit(device=True) |
| 326 | +def _gpu_distance(x1, x2, y1, y2, metric): |
| 327 | + if metric == EUCLIDEAN: |
| 328 | + return _gpu_euclidean_distance(x1, x2, y1, y2) |
| 329 | + elif metric == GREAT_CIRCLE: |
| 330 | + return _gpu_great_circle_distance(x1, x2, y1, y2) |
| 331 | + else: |
| 332 | + return _gpu_manhattan_distance(x1, x2, y1, y2) |
| 333 | + |
| 334 | + |
| 335 | +@cuda.jit(device=True) |
| 336 | +def _gpu_calc_direction(x1, x2, y1, y2): |
| 337 | + if x1 == x2 and y1 == y2: |
| 338 | + return 0.0 |
| 339 | + dx = x2 - x1 |
| 340 | + dy = y2 - y1 |
| 341 | + d = _math.atan2(-dy, dx) * 57.29578 |
| 342 | + if d < 0.0: |
| 343 | + d = 90.0 - d |
| 344 | + elif d > 90.0: |
| 345 | + d = 360.0 - d + 90.0 |
| 346 | + else: |
| 347 | + d = 90.0 - d |
| 348 | + return d |
| 349 | + |
| 350 | + |
| 351 | +@cuda.jit |
| 352 | +def _proximity_cuda_kernel(target_xs, target_ys, target_vals, n_targets, |
| 353 | + y_coords, x_coords, max_distance, |
| 354 | + distance_metric, process_mode, out): |
| 355 | + iy, ix = cuda.grid(2) |
| 356 | + if iy >= out.shape[0] or ix >= out.shape[1]: |
| 357 | + return |
| 358 | + |
| 359 | + px = x_coords[ix] |
| 360 | + py = y_coords[iy] |
| 361 | + |
| 362 | + best_dist = 1.0e38 |
| 363 | + best_idx = -1 |
| 364 | + |
| 365 | + for k in range(n_targets): |
| 366 | + d = _gpu_distance(px, target_xs[k], py, target_ys[k], distance_metric) |
| 367 | + if d < best_dist: |
| 368 | + best_dist = d |
| 369 | + best_idx = k |
| 370 | + |
| 371 | + if best_idx >= 0 and best_dist <= max_distance: |
| 372 | + if process_mode == PROXIMITY: |
| 373 | + out[iy, ix] = best_dist |
| 374 | + elif process_mode == ALLOCATION: |
| 375 | + out[iy, ix] = target_vals[best_idx] |
| 376 | + else: |
| 377 | + out[iy, ix] = _gpu_calc_direction( |
| 378 | + px, target_xs[best_idx], py, target_ys[best_idx]) |
| 379 | + |
| 380 | + |
| 381 | +def _process_cupy(raster_data, x_coords, y_coords, target_values, |
| 382 | + max_distance, distance_metric, process_mode): |
| 383 | + """GPU proximity using CUDA brute-force nearest-target kernel.""" |
| 384 | + import cupy as cp |
| 385 | + |
| 386 | + # Find target pixels on GPU |
| 387 | + if len(target_values) == 0: |
| 388 | + mask = cp.isfinite(raster_data) & (raster_data != 0) |
| 389 | + else: |
| 390 | + mask = cp.isin(raster_data, cp.asarray(target_values)) |
| 391 | + mask &= cp.isfinite(raster_data) |
| 392 | + |
| 393 | + target_rows, target_cols = cp.where(mask) |
| 394 | + n_targets = int(target_rows.shape[0]) |
| 395 | + |
| 396 | + if n_targets == 0: |
| 397 | + return cp.full(raster_data.shape, cp.nan, dtype=cp.float32) |
| 398 | + |
| 399 | + # Collect target world-coordinates and values |
| 400 | + y_dev = cp.asarray(y_coords, dtype=cp.float64) |
| 401 | + x_dev = cp.asarray(x_coords, dtype=cp.float64) |
| 402 | + target_ys = y_dev[target_rows] |
| 403 | + target_xs = x_dev[target_cols] |
| 404 | + target_vals = raster_data[target_rows, target_cols].astype(cp.float32) |
| 405 | + |
| 406 | + # Pre-fill output with NaN (pixels with no target within range stay NaN) |
| 407 | + out = cp.full(raster_data.shape, cp.nan, dtype=cp.float32) |
| 408 | + |
| 409 | + griddim, blockdim = cuda_args(raster_data.shape) |
| 410 | + _proximity_cuda_kernel[griddim, blockdim]( |
| 411 | + target_xs, target_ys, target_vals, n_targets, |
| 412 | + y_dev, x_dev, |
| 413 | + np.float64(max_distance), |
| 414 | + np.int32(distance_metric), |
| 415 | + np.int32(process_mode), |
| 416 | + out, |
| 417 | + ) |
| 418 | + |
| 419 | + return out |
| 420 | + |
| 421 | + |
| 422 | +def _process_dask_cupy(raster, x_coords, y_coords, target_values, |
| 423 | + max_distance, distance_metric, process_mode): |
| 424 | + """Dask+CuPy backend: compute to cupy, run GPU kernel.""" |
| 425 | + import cupy as cp |
| 426 | + |
| 427 | + cp_data = raster.data.compute() |
| 428 | + result = _process_cupy(cp_data, x_coords, y_coords, target_values, |
| 429 | + max_distance, distance_metric, process_mode) |
| 430 | + return da.from_array(result, chunks=raster.data.chunks) |
| 431 | + |
| 432 | + |
284 | 433 | @ngjit |
285 | 434 | def _process_proximity_line( |
286 | 435 | source_line, |
@@ -1062,47 +1211,66 @@ def _process_dask(raster, xs, ys): |
1062 | 1211 | ys = np.repeat(y_coords, raster.shape[1]).reshape(raster.shape) |
1063 | 1212 | result = _process_numpy(raster.data, xs, ys) |
1064 | 1213 |
|
1065 | | - elif da is not None and isinstance(raster.data, da.Array): |
1066 | | - use_kdtree = ( |
1067 | | - cKDTree is not None |
1068 | | - and distance_metric in (EUCLIDEAN, MANHATTAN) |
1069 | | - and max_distance >= max_possible_distance |
| 1214 | + elif has_cuda_and_cupy() and is_cupy_array(raster.data): |
| 1215 | + result = _process_cupy( |
| 1216 | + raster.data, x_coords, y_coords, |
| 1217 | + target_values, max_distance, distance_metric, process_mode, |
1070 | 1218 | ) |
1071 | | - if use_kdtree: |
1072 | | - result = _process_dask_kdtree( |
| 1219 | + |
| 1220 | + elif da is not None and isinstance(raster.data, da.Array): |
| 1221 | + if has_cuda_and_cupy() and is_dask_cupy(raster): |
| 1222 | + result = _process_dask_cupy( |
1073 | 1223 | raster, x_coords, y_coords, |
1074 | | - target_values, max_distance, distance_metric, |
1075 | | - process_mode, |
| 1224 | + target_values, max_distance, distance_metric, process_mode, |
1076 | 1225 | ) |
1077 | 1226 | else: |
1078 | | - # Memory guard: unbounded distance on large rasters can OOM |
1079 | | - if max_distance >= max_possible_distance: |
1080 | | - H, W = raster.shape |
1081 | | - required = H * W * 4 * 3 # raster + xs + ys, float32 |
1082 | | - avail = _available_memory_bytes() |
1083 | | - if required > 0.8 * avail: |
1084 | | - if cKDTree is None: |
1085 | | - raise MemoryError( |
1086 | | - "Raster too large for single-chunk processing " |
1087 | | - "and scipy is not installed for memory-safe " |
1088 | | - "KDTree path. Install scipy or set a finite " |
1089 | | - "max_distance." |
1090 | | - ) |
1091 | | - else: # must be GREAT_CIRCLE |
1092 | | - raise MemoryError( |
1093 | | - "GREAT_CIRCLE with unbounded max_distance on " |
1094 | | - "this raster would exceed available memory. " |
1095 | | - "Set a finite max_distance." |
1096 | | - ) |
| 1227 | + use_kdtree = ( |
| 1228 | + cKDTree is not None |
| 1229 | + and distance_metric in (EUCLIDEAN, MANHATTAN) |
| 1230 | + and max_distance >= max_possible_distance |
| 1231 | + ) |
| 1232 | + if use_kdtree: |
| 1233 | + result = _process_dask_kdtree( |
| 1234 | + raster, x_coords, y_coords, |
| 1235 | + target_values, max_distance, distance_metric, |
| 1236 | + process_mode, |
| 1237 | + ) |
| 1238 | + else: |
| 1239 | + # Memory guard: unbounded distance on large rasters can OOM |
| 1240 | + if max_distance >= max_possible_distance: |
| 1241 | + H, W = raster.shape |
| 1242 | + required = H * W * 4 * 3 # raster + xs + ys, float32 |
| 1243 | + avail = _available_memory_bytes() |
| 1244 | + if required > 0.8 * avail: |
| 1245 | + if cKDTree is None: |
| 1246 | + raise MemoryError( |
| 1247 | + "Raster too large for single-chunk processing " |
| 1248 | + "and scipy is not installed for memory-safe " |
| 1249 | + "KDTree path. Install scipy or set a finite " |
| 1250 | + "max_distance." |
| 1251 | + ) |
| 1252 | + else: # must be GREAT_CIRCLE |
| 1253 | + raise MemoryError( |
| 1254 | + "GREAT_CIRCLE with unbounded max_distance on " |
| 1255 | + "this raster would exceed available memory. " |
| 1256 | + "Set a finite max_distance." |
| 1257 | + ) |
1097 | 1258 |
|
1098 | | - # Existing path: build 2D coordinate arrays as dask arrays |
1099 | | - x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0]) |
1100 | | - y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0]) |
1101 | | - xs = da.tile(x_coords_da, (raster.shape[0], 1)) |
1102 | | - ys = da.repeat(y_coords_da, raster.shape[1]).reshape(raster.shape) |
1103 | | - xs = xs.rechunk(raster.chunks) |
1104 | | - ys = ys.rechunk(raster.chunks) |
1105 | | - result = _process_dask(raster, xs, ys) |
| 1259 | + # Existing path: build 2D coordinate arrays as dask arrays |
| 1260 | + x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0]) |
| 1261 | + y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0]) |
| 1262 | + xs = da.tile(x_coords_da, (raster.shape[0], 1)) |
| 1263 | + ys = da.repeat(y_coords_da, raster.shape[1]).reshape( |
| 1264 | + raster.shape) |
| 1265 | + xs = xs.rechunk(raster.chunks) |
| 1266 | + ys = ys.rechunk(raster.chunks) |
| 1267 | + result = _process_dask(raster, xs, ys) |
| 1268 | + |
| 1269 | + else: |
| 1270 | + raise TypeError( |
| 1271 | + f"Unsupported array type {type(raster.data).__name__} " |
| 1272 | + f"for proximity/allocation/direction" |
| 1273 | + ) |
1106 | 1274 |
|
1107 | 1275 | return result |
1108 | 1276 |
|
|
0 commit comments