Skip to content

Commit a27cb7c

Browse files
committed
fixed proximity intermediate large numpy array problem and proximity output coords are now lazy if input is dask array
1 parent a94c9d7 commit a27cb7c

1 file changed

Lines changed: 24 additions & 10 deletions

File tree

xrspatial/proximity.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -420,16 +420,22 @@ def _process(
420420

421421
target_values = np.asarray(target_values)
422422

423-
# x-y coordinates of each pixel.
424-
# flatten the coords of input raster and reshape to 2d
425-
xs = np.tile(raster[x].data, raster.shape[0]).reshape(raster.shape)
426-
ys = np.repeat(raster[y].data, raster.shape[1]).reshape(raster.shape)
427-
428423
if max_distance is None:
429424
max_distance = np.inf
430425

426+
# Get 1D coordinate arrays (these are small, just the axis coordinates)
427+
x_coords = raster[x].data
428+
y_coords = raster[y].data
429+
430+
# Ensure 1D coords are numpy arrays for max_possible_distance calculation
431+
if da is not None and isinstance(x_coords, da.Array):
432+
x_coords = x_coords.compute()
433+
if da is not None and isinstance(y_coords, da.Array):
434+
y_coords = y_coords.compute()
435+
436+
# Compute max_possible_distance using coordinate endpoints directly
431437
max_possible_distance = _distance(
432-
xs[0][0], xs[-1][-1], ys[0][0], ys[-1][-1], distance_metric
438+
x_coords[0], x_coords[-1], y_coords[0], y_coords[-1], distance_metric
433439
)
434440

435441
@ngjit
@@ -620,13 +626,21 @@ def _process_dask(raster, xs, ys):
620626
return out
621627

622628
if isinstance(raster.data, np.ndarray):
623-
# numpy case
629+
# numpy case - create full coordinate arrays as numpy
630+
xs = np.tile(x_coords, raster.shape[0]).reshape(raster.shape)
631+
ys = np.repeat(y_coords, raster.shape[1]).reshape(raster.shape)
624632
result = _process_numpy(raster.data, xs, ys)
625633

626634
elif da is not None and isinstance(raster.data, da.Array):
627-
# dask + numpy case
628-
xs = da.from_array(xs, chunks=(raster.chunks))
629-
ys = da.from_array(ys, chunks=(raster.chunks))
635+
# dask case - create coordinate arrays as dask arrays directly
636+
# This avoids materializing the full arrays in memory
637+
# Convert 1D coords to dask arrays first
638+
x_coords_da = da.from_array(x_coords, chunks=x_coords.shape[0])
639+
y_coords_da = da.from_array(y_coords, chunks=y_coords.shape[0])
640+
xs = da.tile(x_coords_da, (raster.shape[0], 1))
641+
ys = da.repeat(y_coords_da, raster.shape[1]).reshape(raster.shape)
642+
xs = xs.rechunk(raster.chunks)
643+
ys = ys.rechunk(raster.chunks)
630644
result = _process_dask(raster, xs, ys)
631645

632646
return result

0 commit comments

Comments
 (0)