|
12 | 12 | except ImportError: |
13 | 13 | cKDTree = None |
14 | 14 |
|
| 15 | +import math as _math |
| 16 | + |
15 | 17 | import numpy as np |
16 | 18 | import xarray as xr |
17 | | -from numba import prange |
| 19 | +from numba import cuda, prange |
| 20 | + |
| 21 | +try: |
| 22 | + import cupy |
| 23 | +except ImportError: |
| 24 | + class cupy(object): |
| 25 | + ndarray = False |
18 | 26 |
|
19 | 27 | from xrspatial.pathfinding import _available_memory_bytes |
20 | | -from xrspatial.utils import get_dataarray_resolution, ngjit |
| 28 | +from xrspatial.utils import ( |
| 29 | + cuda_args, get_dataarray_resolution, has_cuda_and_cupy, |
| 30 | + is_cupy_array, is_dask_cupy, ngjit, |
| 31 | +) |
21 | 32 | from xrspatial.dataset_support import supports_dataset |
22 | 33 |
|
23 | 34 | EUCLIDEAN = 0 |
@@ -281,6 +292,182 @@ def _vectorized_calc_direction(x1, x2, y1, y2): |
281 | 292 | return result.astype(np.float32) |
282 | 293 |
|
283 | 294 |
|
| 295 | +# ===================================================================== |
| 296 | +# GPU (CuPy / CUDA) backend |
| 297 | +# ===================================================================== |
| 298 | + |
| 299 | +@cuda.jit(device=True) |
| 300 | +def _gpu_euclidean_distance(x1, x2, y1, y2): |
| 301 | + dx = x1 - x2 |
| 302 | + dy = y1 - y2 |
| 303 | + return _math.sqrt(dx * dx + dy * dy) |
| 304 | + |
| 305 | + |
| 306 | +@cuda.jit(device=True) |
| 307 | +def _gpu_manhattan_distance(x1, x2, y1, y2): |
| 308 | + return abs(x1 - x2) + abs(y1 - y2) |
| 309 | + |
| 310 | + |
| 311 | +@cuda.jit(device=True) |
| 312 | +def _gpu_great_circle_distance(x1, x2, y1, y2): |
| 313 | + if x1 == x2 and y1 == y2: |
| 314 | + return 0.0 |
| 315 | + lat1 = y1 * 0.017453292519943295 |
| 316 | + lon1 = x1 * 0.017453292519943295 |
| 317 | + lat2 = y2 * 0.017453292519943295 |
| 318 | + lon2 = x2 * 0.017453292519943295 |
| 319 | + dlon = lon2 - lon1 |
| 320 | + dlat = lat2 - lat1 |
| 321 | + a = (_math.sin(dlat / 2.0) ** 2 |
| 322 | + + _math.cos(lat1) * _math.cos(lat2) |
| 323 | + * _math.sin(dlon / 2.0) ** 2) |
| 324 | + return 6378137.0 * 2.0 * _math.asin(_math.sqrt(a)) |
| 325 | + |
| 326 | + |
| 327 | +@cuda.jit(device=True) |
| 328 | +def _gpu_distance(x1, x2, y1, y2, metric): |
| 329 | + if metric == EUCLIDEAN: |
| 330 | + return _gpu_euclidean_distance(x1, x2, y1, y2) |
| 331 | + elif metric == GREAT_CIRCLE: |
| 332 | + return _gpu_great_circle_distance(x1, x2, y1, y2) |
| 333 | + else: |
| 334 | + return _gpu_manhattan_distance(x1, x2, y1, y2) |
| 335 | + |
| 336 | + |
| 337 | +@cuda.jit(device=True) |
| 338 | +def _gpu_calc_direction(x1, x2, y1, y2): |
| 339 | + if x1 == x2 and y1 == y2: |
| 340 | + return 0.0 |
| 341 | + dx = x2 - x1 |
| 342 | + dy = y2 - y1 |
| 343 | + d = _math.atan2(-dy, dx) * 57.29578 |
| 344 | + if d < 0.0: |
| 345 | + d = 90.0 - d |
| 346 | + elif d > 90.0: |
| 347 | + d = 360.0 - d + 90.0 |
| 348 | + else: |
| 349 | + d = 90.0 - d |
| 350 | + return d |
| 351 | + |
| 352 | + |
| 353 | +@cuda.jit |
| 354 | +def _proximity_cuda_kernel(target_xs, target_ys, target_vals, n_targets, |
| 355 | + y_coords, x_coords, max_distance, |
| 356 | + distance_metric, process_mode, out): |
| 357 | + iy, ix = cuda.grid(2) |
| 358 | + if iy >= out.shape[0] or ix >= out.shape[1]: |
| 359 | + return |
| 360 | + |
| 361 | + px = x_coords[ix] |
| 362 | + py = y_coords[iy] |
| 363 | + |
| 364 | + best_dist = 1.0e38 |
| 365 | + best_idx = -1 |
| 366 | + |
| 367 | + for k in range(n_targets): |
| 368 | + d = _gpu_distance(px, target_xs[k], py, target_ys[k], distance_metric) |
| 369 | + if d < best_dist: |
| 370 | + best_dist = d |
| 371 | + best_idx = k |
| 372 | + |
| 373 | + if best_idx >= 0 and best_dist <= max_distance: |
| 374 | + if process_mode == PROXIMITY: |
| 375 | + out[iy, ix] = best_dist |
| 376 | + elif process_mode == ALLOCATION: |
| 377 | + out[iy, ix] = target_vals[best_idx] |
| 378 | + else: |
| 379 | + out[iy, ix] = _gpu_calc_direction( |
| 380 | + px, target_xs[best_idx], py, target_ys[best_idx]) |
| 381 | + |
| 382 | + |
| 383 | +def _process_cupy(raster_data, x_coords, y_coords, target_values, |
| 384 | + max_distance, distance_metric, process_mode): |
| 385 | + """GPU proximity using CUDA brute-force nearest-target kernel.""" |
| 386 | + import cupy as cp |
| 387 | + |
| 388 | + # Find target pixels on GPU |
| 389 | + if len(target_values) == 0: |
| 390 | + mask = cp.isfinite(raster_data) & (raster_data != 0) |
| 391 | + else: |
| 392 | + mask = cp.isin(raster_data, cp.asarray(target_values)) |
| 393 | + mask &= cp.isfinite(raster_data) |
| 394 | + |
| 395 | + target_rows, target_cols = cp.where(mask) |
| 396 | + n_targets = int(target_rows.shape[0]) |
| 397 | + |
| 398 | + if n_targets == 0: |
| 399 | + return cp.full(raster_data.shape, cp.nan, dtype=cp.float32) |
| 400 | + |
| 401 | + # Collect target world-coordinates and values |
| 402 | + y_dev = cp.asarray(y_coords, dtype=cp.float64) |
| 403 | + x_dev = cp.asarray(x_coords, dtype=cp.float64) |
| 404 | + target_ys = y_dev[target_rows] |
| 405 | + target_xs = x_dev[target_cols] |
| 406 | + target_vals = raster_data[target_rows, target_cols].astype(cp.float32) |
| 407 | + |
| 408 | + # Pre-fill output with NaN (pixels with no target within range stay NaN) |
| 409 | + out = cp.full(raster_data.shape, cp.nan, dtype=cp.float32) |
| 410 | + |
| 411 | + griddim, blockdim = cuda_args(raster_data.shape) |
| 412 | + _proximity_cuda_kernel[griddim, blockdim]( |
| 413 | + target_xs, target_ys, target_vals, n_targets, |
| 414 | + y_dev, x_dev, |
| 415 | + np.float64(max_distance), |
| 416 | + np.int32(distance_metric), |
| 417 | + np.int32(process_mode), |
| 418 | + out, |
| 419 | + ) |
| 420 | + |
| 421 | + return out |
| 422 | + |
| 423 | + |
| 424 | +def _process_dask_cupy(raster, x_coords, y_coords, target_values, |
| 425 | + max_distance, distance_metric, process_mode): |
| 426 | + """Dask+CuPy bounded proximity via map_overlap with per-chunk GPU kernel. |
| 427 | +
|
| 428 | + Each chunk (plus overlap padding of ``max_distance / cellsize`` pixels) |
| 429 | + is processed on GPU independently. Only valid for finite max_distance |
| 430 | + where the padding guarantees all relevant targets are visible within |
| 431 | + each overlapped chunk. |
| 432 | + """ |
| 433 | + import cupy as cp |
| 434 | + |
| 435 | + cellsize_x, cellsize_y = get_dataarray_resolution(raster) |
| 436 | + pad_y = int(max_distance / abs(cellsize_y) + 0.5) |
| 437 | + pad_x = int(max_distance / abs(cellsize_x) + 0.5) |
| 438 | + |
| 439 | + # Build 2D coordinate grids as dask+cupy arrays matching raster chunks. |
| 440 | + # Each chunk is small (chunk_h x chunk_w x 8 bytes); the full grid is |
| 441 | + # never materialised. |
| 442 | + x_cp = cp.asarray(x_coords, dtype=cp.float64) |
| 443 | + y_cp = cp.asarray(y_coords, dtype=cp.float64) |
| 444 | + x_da = da.from_array(x_cp, chunks=(x_cp.shape[0],)) |
| 445 | + y_da = da.from_array(y_cp, chunks=(y_cp.shape[0],)) |
| 446 | + xs = da.tile(x_da, (raster.shape[0], 1)).rechunk(raster.data.chunks) |
| 447 | + ys = da.repeat(y_da, raster.shape[1]).reshape( |
| 448 | + raster.shape).rechunk(raster.data.chunks) |
| 449 | + |
| 450 | + # Capture closure vars for the chunk function |
| 451 | + tv = target_values |
| 452 | + md = max_distance |
| 453 | + dm = distance_metric |
| 454 | + pm = process_mode |
| 455 | + |
| 456 | + def _chunk_func(data_chunk, xs_chunk, ys_chunk): |
| 457 | + # Use middle row/col to avoid NaN from boundary padding |
| 458 | + x_1d = xs_chunk[xs_chunk.shape[0] // 2, :] |
| 459 | + y_1d = ys_chunk[:, ys_chunk.shape[1] // 2] |
| 460 | + return _process_cupy(data_chunk, x_1d, y_1d, tv, md, dm, pm) |
| 461 | + |
| 462 | + return da.map_overlap( |
| 463 | + _chunk_func, |
| 464 | + raster.data, xs, ys, |
| 465 | + depth=(pad_y, pad_x), |
| 466 | + boundary=np.nan, |
| 467 | + meta=cp.array((), dtype=cp.float32), |
| 468 | + ) |
| 469 | + |
| 470 | + |
284 | 471 | @ngjit |
285 | 472 | def _process_proximity_line( |
286 | 473 | source_line, |
@@ -1062,47 +1249,89 @@ def _process_dask(raster, xs, ys): |
1062 | 1249 | ys = np.repeat(y_coords, raster.shape[1]).reshape(raster.shape) |
1063 | 1250 | result = _process_numpy(raster.data, xs, ys) |
1064 | 1251 |
|
1065 | | - elif da is not None and isinstance(raster.data, da.Array): |
1066 | | - use_kdtree = ( |
1067 | | - cKDTree is not None |
1068 | | - and distance_metric in (EUCLIDEAN, MANHATTAN) |
1069 | | - and max_distance >= max_possible_distance |
| 1252 | + elif has_cuda_and_cupy() and is_cupy_array(raster.data): |
| 1253 | + result = _process_cupy( |
| 1254 | + raster.data, x_coords, y_coords, |
| 1255 | + target_values, max_distance, distance_metric, process_mode, |
1070 | 1256 | ) |
1071 | | - if use_kdtree: |
1072 | | - result = _process_dask_kdtree( |
| 1257 | + |
| 1258 | + elif da is not None and isinstance(raster.data, da.Array): |
| 1259 | + if (has_cuda_and_cupy() and is_dask_cupy(raster) |
| 1260 | + and max_distance < max_possible_distance): |
| 1261 | + # Bounded dask+cupy: out-of-core GPU via map_overlap |
| 1262 | + result = _process_dask_cupy( |
1073 | 1263 | raster, x_coords, y_coords, |
1074 | | - target_values, max_distance, distance_metric, |
1075 | | - process_mode, |
| 1264 | + target_values, max_distance, distance_metric, process_mode, |
1076 | 1265 | ) |
1077 | 1266 | else: |
1078 | | - # Memory guard: unbounded distance on large rasters can OOM |
1079 | | - if max_distance >= max_possible_distance: |
1080 | | - H, W = raster.shape |
1081 | | - required = H * W * 4 * 3 # raster + xs + ys, float32 |
1082 | | - avail = _available_memory_bytes() |
1083 | | - if required > 0.8 * avail: |
1084 | | - if cKDTree is None: |
1085 | | - raise MemoryError( |
1086 | | - "Raster too large for single-chunk processing " |
1087 | | - "and scipy is not installed for memory-safe " |
1088 | | - "KDTree path. Install scipy or set a finite " |
1089 | | - "max_distance." |
1090 | | - ) |
1091 | | - else: # must be GREAT_CIRCLE |
1092 | | - raise MemoryError( |
1093 | | - "GREAT_CIRCLE with unbounded max_distance on " |
1094 | | - "this raster would exceed available memory. " |
1095 | | - "Set a finite max_distance." |
1096 | | - ) |
| 1267 | + # dask+numpy path (or unbounded dask+cupy → convert first) |
| 1268 | + was_dask_cupy = has_cuda_and_cupy() and is_dask_cupy(raster) |
| 1269 | + if was_dask_cupy: |
| 1270 | + import cupy as cp |
| 1271 | + # Unbounded: convert to dask+numpy for KDTree/line-sweep |
| 1272 | + # (KDTree is CPU-only; O(N log T) beats brute-force O(NT)) |
| 1273 | + original_chunks = raster.data.chunks |
| 1274 | + raster = raster.copy( |
| 1275 | + data=raster.data.map_blocks( |
| 1276 | + lambda x: x.get(), dtype=raster.dtype, |
| 1277 | + meta=np.array(()), |
| 1278 | + ) |
| 1279 | + ) |
1097 | 1280 |
|
1098 | | - # Existing path: build 2D coordinate arrays as dask arrays |
1099 | | - x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0]) |
1100 | | - y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0]) |
1101 | | - xs = da.tile(x_coords_da, (raster.shape[0], 1)) |
1102 | | - ys = da.repeat(y_coords_da, raster.shape[1]).reshape(raster.shape) |
1103 | | - xs = xs.rechunk(raster.chunks) |
1104 | | - ys = ys.rechunk(raster.chunks) |
1105 | | - result = _process_dask(raster, xs, ys) |
| 1281 | + use_kdtree = ( |
| 1282 | + cKDTree is not None |
| 1283 | + and distance_metric in (EUCLIDEAN, MANHATTAN) |
| 1284 | + and max_distance >= max_possible_distance |
| 1285 | + ) |
| 1286 | + if use_kdtree: |
| 1287 | + result = _process_dask_kdtree( |
| 1288 | + raster, x_coords, y_coords, |
| 1289 | + target_values, max_distance, distance_metric, |
| 1290 | + process_mode, |
| 1291 | + ) |
| 1292 | + else: |
| 1293 | + # Memory guard: unbounded distance on large rasters can OOM |
| 1294 | + if max_distance >= max_possible_distance: |
| 1295 | + H, W = raster.shape |
| 1296 | + required = H * W * 4 * 3 # raster + xs + ys, float32 |
| 1297 | + avail = _available_memory_bytes() |
| 1298 | + if required > 0.8 * avail: |
| 1299 | + if cKDTree is None: |
| 1300 | + raise MemoryError( |
| 1301 | + "Raster too large for single-chunk processing " |
| 1302 | + "and scipy is not installed for memory-safe " |
| 1303 | + "KDTree path. Install scipy or set a finite " |
| 1304 | + "max_distance." |
| 1305 | + ) |
| 1306 | + else: # must be GREAT_CIRCLE |
| 1307 | + raise MemoryError( |
| 1308 | + "GREAT_CIRCLE with unbounded max_distance on " |
| 1309 | + "this raster would exceed available memory. " |
| 1310 | + "Set a finite max_distance." |
| 1311 | + ) |
| 1312 | + |
| 1313 | + # Existing path: build 2D coordinate arrays as dask arrays |
| 1314 | + x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0]) |
| 1315 | + y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0]) |
| 1316 | + xs = da.tile(x_coords_da, (raster.shape[0], 1)) |
| 1317 | + ys = da.repeat(y_coords_da, raster.shape[1]).reshape( |
| 1318 | + raster.shape) |
| 1319 | + xs = xs.rechunk(raster.chunks) |
| 1320 | + ys = ys.rechunk(raster.chunks) |
| 1321 | + result = _process_dask(raster, xs, ys) |
| 1322 | + |
| 1323 | + # Convert result back to dask+cupy if input was dask+cupy |
| 1324 | + if was_dask_cupy: |
| 1325 | + result = result.map_blocks( |
| 1326 | + cp.asarray, dtype=result.dtype, |
| 1327 | + meta=cp.array((), dtype=result.dtype), |
| 1328 | + ) |
| 1329 | + |
| 1330 | + else: |
| 1331 | + raise TypeError( |
| 1332 | + f"Unsupported array type {type(raster.data).__name__} " |
| 1333 | + f"for proximity/allocation/direction" |
| 1334 | + ) |
1106 | 1335 |
|
1107 | 1336 | return result |
1108 | 1337 |
|
|
0 commit comments