@@ -17,7 +17,12 @@ class cupy(object):
1717except ImportError :
1818 da = None
1919
20- from xrspatial .utils import has_cuda_and_cupy , is_cupy_array , is_dask_cupy
20+ from xrspatial .utils import (
21+ ArrayTypeFunctionMapping ,
22+ has_cuda_and_cupy ,
23+ is_cupy_array ,
24+ is_dask_cupy ,
25+ )
2126
2227# Default erosion parameters
2328_DEFAULT_PARAMS = dict (
@@ -157,6 +162,199 @@ def _erode_cpu(heightmap, random_pos, boy, box, bw,
157162 return heightmap
158163
159164
165+ # ---------------------------------------------------------------------------
166+ # GPU (CuPy / CUDA) implementation
167+ # ---------------------------------------------------------------------------
168+
169+ if cuda is not None :
170+
171+ @cuda .jit
172+ def _erode_gpu_kernel (
173+ heightmap , random_pos , boy , box , bw ,
174+ inertia , capacity , deposition , erosion_rate ,
175+ evaporation , gravity , min_slope , max_lifetime ,
176+ ):
177+ """One CUDA thread per particle.
178+
179+ Particles within a single launch are independent. Conflicts at
180+ shared heightmap cells are resolved via ``cuda.atomic.add``.
181+ """
182+ tid = cuda .grid (1 )
183+ n_iterations = random_pos .shape [0 ]
184+ if tid >= n_iterations :
185+ return
186+
187+ height = heightmap .shape [0 ]
188+ width = heightmap .shape [1 ]
189+ n_brush = bw .shape [0 ]
190+
191+ pos_x = random_pos [tid , 0 ] * (width - 3 ) + 1
192+ pos_y = random_pos [tid , 1 ] * (height - 3 ) + 1
193+ dir_x = 0.0
194+ dir_y = 0.0
195+ speed = 1.0
196+ water = 1.0
197+ sediment = 0.0
198+
199+ for step in range (max_lifetime ):
200+ node_x = int (pos_x )
201+ node_y = int (pos_y )
202+
203+ if node_x < 1 or node_x >= width - 2 :
204+ return
205+ if node_y < 1 or node_y >= height - 2 :
206+ return
207+
208+ fx = pos_x - node_x
209+ fy = pos_y - node_y
210+
211+ h00 = heightmap [node_y , node_x ]
212+ h10 = heightmap [node_y , node_x + 1 ]
213+ h01 = heightmap [node_y + 1 , node_x ]
214+ h11 = heightmap [node_y + 1 , node_x + 1 ]
215+
216+ grad_x = (h10 - h00 ) * (1 - fy ) + (h11 - h01 ) * fy
217+ grad_y = (h01 - h00 ) * (1 - fx ) + (h11 - h10 ) * fx
218+
219+ dir_x = dir_x * inertia - grad_x * (1 - inertia )
220+ dir_y = dir_y * inertia - grad_y * (1 - inertia )
221+
222+ dir_len = (dir_x * dir_x + dir_y * dir_y ) ** 0.5
223+ if dir_len < 1e-10 :
224+ return
225+ dir_x /= dir_len
226+ dir_y /= dir_len
227+
228+ new_x = pos_x + dir_x
229+ new_y = pos_y + dir_y
230+
231+ if new_x < 1 or new_x >= width - 2 :
232+ return
233+ if new_y < 1 or new_y >= height - 2 :
234+ return
235+
236+ h_old = (h00 * (1 - fx ) * (1 - fy ) + h10 * fx * (1 - fy ) +
237+ h01 * (1 - fx ) * fy + h11 * fx * fy )
238+
239+ new_node_x = int (new_x )
240+ new_node_y = int (new_y )
241+ new_fx = new_x - new_node_x
242+ new_fy = new_y - new_node_y
243+ h_new = (heightmap [new_node_y , new_node_x ] * (1 - new_fx ) * (1 - new_fy ) +
244+ heightmap [new_node_y , new_node_x + 1 ] * new_fx * (1 - new_fy ) +
245+ heightmap [new_node_y + 1 , new_node_x ] * (1 - new_fx ) * new_fy +
246+ heightmap [new_node_y + 1 , new_node_x + 1 ] * new_fx * new_fy )
247+
248+ h_diff = h_new - h_old
249+
250+ neg_h_diff = - h_diff
251+ if neg_h_diff < min_slope :
252+ neg_h_diff = min_slope
253+ sed_capacity = neg_h_diff * speed * water * capacity
254+
255+ if sediment > sed_capacity or h_diff > 0 :
256+ if h_diff > 0 :
257+ amount = h_diff if h_diff < sediment else sediment
258+ else :
259+ amount = (sediment - sed_capacity ) * deposition
260+
261+ sediment -= amount
262+
263+ cuda .atomic .add (heightmap , (node_y , node_x ),
264+ amount * (1 - fx ) * (1 - fy ))
265+ cuda .atomic .add (heightmap , (node_y , node_x + 1 ),
266+ amount * fx * (1 - fy ))
267+ cuda .atomic .add (heightmap , (node_y + 1 , node_x ),
268+ amount * (1 - fx ) * fy )
269+ cuda .atomic .add (heightmap , (node_y + 1 , node_x + 1 ),
270+ amount * fx * fy )
271+ else :
272+ neg_h = - h_diff
273+ sc_minus_sed = (sed_capacity - sediment ) * erosion_rate
274+ amount = sc_minus_sed if sc_minus_sed < neg_h else neg_h
275+
276+ for k in range (n_brush ):
277+ ey = node_y + boy [k ]
278+ ex = node_x + box [k ]
279+ if 0 <= ey < height and 0 <= ex < width :
280+ cuda .atomic .add (heightmap , (ey , ex ),
281+ - (amount * bw [k ]))
282+
283+ sediment += amount
284+
285+ speed_sq = speed * speed + h_diff * gravity
286+ if speed_sq > 0 :
287+ speed = speed_sq ** 0.5
288+ else :
289+ speed = 0.0
290+ water *= (1 - evaporation )
291+
292+ pos_x = new_x
293+ pos_y = new_y
294+
295+
296+ def _erode_cupy (data , random_pos_np , boy , box , bw , params ):
297+ """Run erosion on a CuPy array using the CUDA kernel."""
298+ hm = data .astype (cupy .float64 )
299+
300+ # Transfer brush and random positions to device
301+ random_pos_d = cupy .asarray (random_pos_np )
302+ boy_d = cupy .asarray (boy )
303+ box_d = cupy .asarray (box )
304+ bw_d = cupy .asarray (bw )
305+
306+ n_particles = random_pos_np .shape [0 ]
307+ threads_per_block = 256
308+ blocks = (n_particles + threads_per_block - 1 ) // threads_per_block
309+
310+ _erode_gpu_kernel [blocks , threads_per_block ](
311+ hm , random_pos_d , boy_d , box_d , bw_d ,
312+ params ['inertia' ], params ['capacity' ], params ['deposition' ],
313+ params ['erosion' ], params ['evaporation' ], params ['gravity' ],
314+ params ['min_slope' ], int (params ['max_lifetime' ]),
315+ )
316+ cuda .synchronize ()
317+
318+ return hm .astype (cupy .float32 )
319+
320+
321+ def _erode_numpy (data , random_pos , boy , box , bw , params ):
322+ """Run erosion on a NumPy array using the CPU kernel."""
323+ hm = data .astype (np .float64 ).copy ()
324+
325+ hm = _erode_cpu (
326+ hm , random_pos , boy , box , bw ,
327+ params ['inertia' ], params ['capacity' ], params ['deposition' ],
328+ params ['erosion' ], params ['evaporation' ], params ['gravity' ],
329+ params ['min_slope' ], int (params ['max_lifetime' ]),
330+ )
331+
332+ return hm .astype (np .float32 )
333+
334+
335+ def _erode_dask_numpy (data , random_pos , boy , box , bw , params ):
336+ """Run erosion on a dask+numpy array.
337+
338+ Erosion is a global operation (particles traverse the full grid),
339+ so we materialize to numpy, run the CPU kernel, then re-wrap.
340+ """
341+ np_data = data .compute ()
342+ result = _erode_numpy (np_data , random_pos , boy , box , bw , params )
343+ return da .from_array (result , chunks = data .chunksize )
344+
345+
346+ def _erode_dask_cupy (data , random_pos , boy , box , bw , params ):
347+ """Run erosion on a dask+cupy array.
348+
349+ Materializes to a single CuPy array, runs the GPU kernel, then
350+ re-wraps as dask.
351+ """
352+ cp_data = data .compute () # CuPy ndarray
353+ result = _erode_cupy (cp_data , random_pos , boy , box , bw , params )
354+ return da .from_array (result , chunks = data .chunksize ,
355+ meta = cupy .array ((), dtype = cupy .float32 ))
356+
357+
160358def erode (agg , iterations = 50000 , seed = 42 , params = None ):
161359 """Apply particle-based hydraulic erosion to a terrain DataArray.
162360
@@ -185,49 +383,19 @@ def erode(agg, iterations=50000, seed=42, params=None):
185383 if params is not None :
186384 p .update (params )
187385
188- data = agg .data
189- is_dask = da is not None and isinstance (data , da .Array )
190- is_gpu = False
191-
192- if is_dask :
193- if is_dask_cupy (agg ):
194- is_gpu = True
195- data = data .compute () # cupy ndarray
196- else :
197- data = data .compute () # numpy ndarray
198-
199- if has_cuda_and_cupy () and is_cupy_array (data ):
200- is_gpu = True
201-
202- # work on a copy
203- if is_gpu :
204- hm = cupy .asnumpy (data ).astype (np .float64 )
205- else :
206- hm = data .astype (np .float64 ).copy ()
207-
208- # precompute brush and random positions outside JIT
386+ # Precompute brush and random positions on the host
209387 boy , box , bw = _build_brush (int (p ['radius' ]))
210388 rng = np .random .RandomState (seed )
211389 random_pos = rng .random ((iterations , 2 ))
212390
213- hm = _erode_cpu (
214- hm , random_pos , boy , box , bw ,
215- p [ 'inertia' ], p [ 'capacity' ], p [ 'deposition' ], p [ 'erosion' ] ,
216- p [ 'evaporation' ], p [ 'gravity' ], p [ 'min_slope' ] ,
217- int ( p [ 'max_lifetime' ] ),
391+ mapper = ArrayTypeFunctionMapping (
392+ numpy_func = lambda d : _erode_numpy ( d , random_pos , boy , box , bw , p ) ,
393+ cupy_func = lambda d : _erode_cupy ( d , random_pos , boy , box , bw , p ) ,
394+ dask_func = lambda d : _erode_dask_numpy ( d , random_pos , boy , box , bw , p ) ,
395+ dask_cupy_func = lambda d : _erode_dask_cupy ( d , random_pos , boy , box , bw , p ),
218396 )
219397
220- result_data = hm .astype (np .float32 )
221- if is_gpu :
222- result_data = cupy .asarray (result_data )
223-
224- if is_dask :
225- if is_gpu :
226- result_data = da .from_array (result_data ,
227- chunks = agg .data .chunksize ,
228- meta = cupy .array ((), dtype = cupy .float32 ))
229- else :
230- result_data = da .from_array (result_data , chunks = agg .data .chunksize )
398+ result_data = mapper (agg )(agg .data )
231399
232400 return xr .DataArray (result_data , dims = agg .dims , coords = agg .coords ,
233401 attrs = agg .attrs , name = agg .name )
0 commit comments