Skip to content

Commit d3d8438

Browse files
committed
Fix dask double-compute and GPU temporary allocation issues
- perlin.py: persist noise before computing min/ptp in both dask+numpy and dask+cupy paths, preventing a full recompute on normalization - terrain.py: persist warped coordinates before the octave loop so each iteration doesn't rebuild the warp subgraph; persist worley noise before min/max so the blend doesn't recompute it - terrain.py: pre-allocate scaled_x/scaled_y GPU buffers and use cupy.multiply(out=) instead of allocating temporaries per iteration
1 parent 00cc6c9 commit d3d8438

File tree

2 files changed

+26
-5
lines changed

2 files changed

+26
-5
lines changed

xrspatial/perlin.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ def _perlin_dask_numpy(data: da.Array,
120120
_func = partial(_perlin, p)
121121
data = da.map_blocks(_func, x, y, meta=np.array((), dtype=np.float32))
122122

123+
# persist so min/ptp don't recompute the noise from scratch
124+
(data,) = dask.persist(data)
123125
min_val, ptp_val = dask.compute(da.min(data), da.ptp(data))
124126
data = (data - min_val) / ptp_val
125127
return data
@@ -267,6 +269,8 @@ def _chunk_perlin(block, block_info=None):
267269
data = da.map_blocks(_chunk_perlin, data, dtype=cupy.float32,
268270
meta=cupy.array((), dtype=cupy.float32))
269271

272+
# persist so min/max don't recompute the noise from scratch
273+
(data,) = dask.persist(data)
270274
min_val, max_val = dask.compute(da.min(data), da.max(data))
271275
data = (data - min_val) / (max_val - min_val)
272276
return data

xrspatial/terrain.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,9 @@ def _terrain_dask_numpy(data, seed, x_range_scaled, y_range_scaled, zfactor,
178178
warp_y /= warp_norm
179179
x = x + warp_x * warp_strength
180180
y = y + warp_y * warp_strength
181+
# persist warped coords so the octave loop doesn't rebuild the
182+
# warp subgraph on every iteration
183+
(x, y) = dask.persist(x, y)
181184

182185
# --- octave noise loop ---
183186
norm = sum(persistence ** i for i in range(octaves))
@@ -222,6 +225,8 @@ def _terrain_dask_numpy(data, seed, x_range_scaled, y_range_scaled, zfactor,
222225
w_noise = da.map_blocks(
223226
_wfunc, x, y, meta=np.array((), dtype=np.float32)
224227
)
228+
# persist so min/max don't recompute worley (and warped coords)
229+
(w_noise,) = dask.persist(w_noise)
225230
w_min, w_max = dask.compute(da.min(w_noise), da.max(w_noise))
226231
w_ptp = w_max - w_min
227232
if w_ptp > 0:
@@ -264,24 +269,32 @@ def _terrain_gpu(height_map, seed, x_range=(0, 1), y_range=(0, 1),
264269
y_arr, x_arr = cupy.meshgrid(liny, linx, indexing='ij')
265270

266271
# --- domain warping ---
272+
# pre-allocate reusable buffers for scaled coordinates (GPU)
273+
if use_xy_kernel:
274+
scaled_x = cupy.empty_like(x_arr)
275+
scaled_y = cupy.empty_like(y_arr)
276+
267277
if warp_strength > 0:
268278
warp_x = cupy.zeros((h, w), dtype=cupy.float32)
269279
warp_y = cupy.zeros((h, w), dtype=cupy.float32)
280+
tmp = cupy.empty_like(noise)
270281

271282
for wi in range(warp_octaves):
272283
w_amp = persistence ** wi
273284
w_freq = lacunarity ** wi
274285
p_wx = cupy.asarray(_make_perm_table(seed + 100 + wi))
275286
p_wy = cupy.asarray(_make_perm_table(seed + 200 + wi))
276287

277-
tmp = cupy.empty_like(noise)
288+
cupy.multiply(x_arr, w_freq, out=scaled_x)
289+
cupy.multiply(y_arr, w_freq, out=scaled_y)
290+
278291
_perlin_gpu_xy[griddim, blockdim](
279-
p_wx, x_arr * w_freq, y_arr * w_freq, 1.0, tmp
292+
p_wx, scaled_x, scaled_y, 1.0, tmp
280293
)
281294
warp_x += tmp * w_amp
282295

283296
_perlin_gpu_xy[griddim, blockdim](
284-
p_wy, x_arr * w_freq, y_arr * w_freq, 1.0, tmp
297+
p_wy, scaled_x, scaled_y, 1.0, tmp
285298
)
286299
warp_y += tmp * w_amp
287300

@@ -302,8 +315,10 @@ def _terrain_gpu(height_map, seed, x_range=(0, 1), y_range=(0, 1),
302315
p = cupy.asarray(_make_perm_table(seed + i))
303316

304317
if use_xy_kernel:
318+
cupy.multiply(x_arr, freq, out=scaled_x)
319+
cupy.multiply(y_arr, freq, out=scaled_y)
305320
_perlin_gpu_xy[griddim, blockdim](
306-
p, x_arr * freq, y_arr * freq, 1.0, noise
321+
p, scaled_x, scaled_y, 1.0, noise
307322
)
308323
else:
309324
_perlin_gpu[griddim, blockdim](
@@ -323,8 +338,10 @@ def _terrain_gpu(height_map, seed, x_range=(0, 1), y_range=(0, 1),
323338
p = cupy.asarray(_make_perm_table(seed + i))
324339

325340
if use_xy_kernel:
341+
cupy.multiply(x_arr, freq, out=scaled_x)
342+
cupy.multiply(y_arr, freq, out=scaled_y)
326343
_perlin_gpu_xy[griddim, blockdim](
327-
p, x_arr * freq, y_arr * freq, amp, noise
344+
p, scaled_x, scaled_y, amp, noise
328345
)
329346
else:
330347
_perlin_gpu[griddim, blockdim](

0 commit comments

Comments
 (0)