Skip to content

Commit 1d60fbd

Browse files
authored
Add memory guard to cost_distance iterative Dijkstra + da.block assembly (#1119)
* Add sweep-performance design spec Parallel subagent triage + ralph-loop workflow for auditing all xrspatial modules for performance bottlenecks, OOM risk under 30TB dask workloads, and backend-specific anti-patterns. * Add sweep-performance implementation plan 7 tasks covering command scaffold, module scoring, parallel subagent dispatch, report merging, ralph-loop generation, and smoke tests. * Add sweep-performance slash command * Add memory guard to cost_distance iterative Dijkstra + use da.block (#1118) - Add memory guard before _preprocess_tiles: estimates ~3x dataset (source + friction cache + result) and raises MemoryError if it would exceed 80% of available RAM, suggesting finite max_cost. - Replace np.concatenate assembly with da.block to avoid building a monolithic numpy array from tile results. Tiles are now wrapped in dask.delayed and assembled lazily.
1 parent 74a6da9 commit 1d60fbd

File tree

1 file changed

+33
-6
lines changed

1 file changed

+33
-6
lines changed

xrspatial/cost_distance.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,24 @@ def _cost_distance_dask_iterative(source_da, friction_da,
916916
n_tile_y = len(chunks_y)
917917
n_tile_x = len(chunks_x)
918918

919+
# Memory guard: the tile cache holds all tiles in RAM simultaneously.
920+
# Estimate total bytes: source + friction (both arrays, full dataset).
921+
total_bytes = (np.prod(source_da.shape) * source_da.dtype.itemsize +
922+
np.prod(friction_da.shape) * friction_da.dtype.itemsize)
923+
# Working memory: tile cache (~2x dataset) + result (~1x) + boundaries
924+
estimated = total_bytes * 3
925+
try:
926+
from xrspatial.zonal import _available_memory_bytes
927+
avail = _available_memory_bytes()
928+
except ImportError:
929+
avail = 2 * 1024**3
930+
if estimated > 0.8 * avail:
931+
raise MemoryError(
932+
f"cost_distance iterative Dijkstra needs ~{estimated / 1e9:.1f} GB "
933+
f"to cache all tiles but only ~{avail / 1e9:.1f} GB available. "
934+
f"Set a finite max_cost to use the memory-safe map_overlap path."
935+
)
936+
919937
# Phase 0: batch-compute all tiles, extract boundaries & source flags
920938
friction_bdry, has_source, tile_cache = _preprocess_tiles(
921939
source_da, friction_da, chunks_y, chunks_x, target_values,
@@ -970,8 +988,14 @@ def _assemble_result(tile_cache, boundaries, friction_bdry,
970988
cellsize_x, cellsize_y, max_cost, target_values,
971989
dy, dx, dd, chunks_y, chunks_x,
972990
n_tile_y, n_tile_x, connectivity):
973-
"""Build result array from cached tiles and converged boundary seeds."""
974-
rows = []
991+
"""Build result dask array from cached tiles and converged boundary seeds.
992+
993+
Uses ``da.block`` to assemble tiles lazily instead of building a
994+
monolithic numpy array with ``np.concatenate``.
995+
"""
996+
import dask
997+
998+
block_grid = []
975999
for iy in range(n_tile_y):
9761000
row_blocks = []
9771001
for ix in range(n_tile_x):
@@ -987,10 +1011,13 @@ def _assemble_result(tile_cache, boundaries, friction_bdry,
9871011
cellsize_x, cellsize_y, max_cost, target_values,
9881012
dy, dx, dd, *seeds,
9891013
)
990-
row_blocks.append(_dist_to_float32(dist, h, w, max_cost))
991-
rows.append(np.concatenate(row_blocks, axis=1))
992-
full = np.concatenate(rows, axis=0)
993-
return da.from_array(full, chunks=(chunks_y, chunks_x))
1014+
tile = _dist_to_float32(dist, h, w, max_cost)
1015+
row_blocks.append(da.from_delayed(
1016+
dask.delayed(lambda t: t)(tile),
1017+
shape=(h, w), dtype=np.float32,
1018+
))
1019+
block_grid.append(row_blocks)
1020+
return da.block(block_grid)
9941021

9951022

9961023
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)