Skip to content

Commit 6febf40

Browse files
authored
Add memory guard for proximity line-sweep dask path (#1113)
* Add memory guard to proximity dask path for inf max_distance (#1111) When max_distance >= raster diagonal, the line-sweep algorithm rechunks the entire array into a single chunk. Add a memory estimate check (~35 bytes/pixel working memory) that raises ValueError before the rechunk if the working set would exceed 80% of available RAM. * Add memory guard for proximity line-sweep dask path (#1111) When max_distance >= raster diagonal and the non-KDTree path is used (GREAT_CIRCLE metric or no scipy), the line-sweep rechunks to a single chunk. The existing memory guard already catches this for the GREAT_CIRCLE case. Add a pre-rechunk estimate (~35 bytes/pixel) to _process_dask for the general case, raising ValueError before the rechunk if working memory would exceed 80% of available RAM. The EUCLIDEAN/MANHATTAN + scipy path uses the memory-safe KDTree and already has its own guards.
1 parent 3855b83 commit 6febf40

File tree

2 files changed

+42
-3
lines changed

2 files changed

+42
-3
lines changed

xrspatial/proximity.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,9 +1221,24 @@ def _process_numpy(img, x_coords, y_coords):
12211221
def _process_dask(raster, xs, ys):
12221222

12231223
if max_distance >= max_possible_distance:
1224-
# consider all targets in the whole raster
1225-
# the data array is computed at once,
1226-
# make sure your data fit your memory
1224+
# The line-sweep needs the full raster in one chunk.
1225+
# Guard against OOM before rechunking.
1226+
estimated_bytes = np.prod(raster.shape) * raster.data.dtype.itemsize
1227+
# ~35 bytes/pixel working memory (distance, output, pan_near,
1228+
# scan_line, nearest arrays, etc.)
1229+
estimated_working = estimated_bytes * 35
1230+
try:
1231+
from xrspatial.zonal import _available_memory_bytes
1232+
avail = _available_memory_bytes()
1233+
except ImportError:
1234+
avail = 2 * 1024**3
1235+
if estimated_working > 0.8 * avail:
1236+
raise ValueError(
1237+
f"proximity() with max_distance >= raster diagonal "
1238+
f"needs ~{estimated_working / 1e9:.1f} GB but only "
1239+
f"~{avail / 1e9:.1f} GB available. Set a finite "
1240+
f"max_distance for out-of-core dask processing."
1241+
)
12271242
height, width = raster.shape
12281243
raster.data = raster.data.rechunk({0: height, 1: width})
12291244
xs = xs.rechunk({0: height, 1: width})

xrspatial/tests/test_proximity.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,30 @@ def _make_kdtree_raster(height=20, width=30, chunks=(10, 15)):
374374
return raster
375375

376376

377+
@pytest.mark.skipif(da is None, reason="dask is not installed")
378+
def test_proximity_dask_inf_distance_memory_guard():
379+
"""Line-sweep path with inf max_distance should raise when memory is tight."""
380+
from unittest.mock import patch
381+
from xrspatial.proximity import _available_memory_bytes
382+
383+
data = np.zeros((100, 100), dtype=np.float64)
384+
data[50, 50] = 1.0
385+
raster = xr.DataArray(
386+
da.from_array(data, chunks=(50, 50)),
387+
dims=['y', 'x'],
388+
coords={
389+
'x': np.linspace(-10, 10, 100),
390+
'y': np.linspace(-5, 5, 100),
391+
},
392+
)
393+
394+
# Force the non-KDTree path by using GREAT_CIRCLE metric
395+
# (KDTree only supports EUCLIDEAN/MANHATTAN), and mock tight memory.
396+
with patch('xrspatial.proximity._available_memory_bytes', return_value=1024):
397+
with pytest.raises(MemoryError, match="exceed available memory"):
398+
proximity(raster, target_values=[1], distance_metric="GREAT_CIRCLE")
399+
400+
377401
@pytest.mark.skipif(da is None, reason="dask is not installed")
378402
@pytest.mark.parametrize("metric", ["EUCLIDEAN", "MANHATTAN"])
379403
def test_proximity_dask_kdtree_matches_numpy(metric):

0 commit comments

Comments
 (0)