@@ -81,6 +81,9 @@ def _label_connected(data, valid, neighborhood):
8181 replacing the previous approach of calling ``scipy.ndimage.label``
8282 once per unique raster value.
8383
84+ Uses int32 indices internally, so the raster must have fewer than
85+ ~2.1 billion pixels (roughly 46 000 x 46 000).
86+
8487 Returns
8588 -------
8689 region_map : ndarray of int32 (2D)
@@ -208,7 +211,6 @@ def _sieve_numpy(data, threshold, neighborhood, skip_values):
208211 valid = ~ np .isnan (result ) if is_float else np .ones (result .shape , dtype = bool )
209212 skip_set = set (skip_values ) if skip_values is not None else set ()
210213
211- converged = False
212214 for _ in range (_MAX_ITERATIONS ):
213215 region_map , region_val , uid = _label_connected (
214216 result , valid , neighborhood
@@ -225,8 +227,7 @@ def _sieve_numpy(data, threshold, neighborhood, skip_values):
225227 and region_val [rid ] not in skip_set
226228 ]
227229 if not small_ids :
228- converged = True
229- break
230+ return result , True
230231
231232 adjacency = _build_adjacency (region_map , neighborhood )
232233
@@ -262,18 +263,9 @@ def _sieve_numpy(data, threshold, neighborhood, skip_values):
262263 merged_any = True
263264
264265 if not merged_any :
265- converged = True
266- break
266+ return result , True
267267
268- if not converged :
269- warnings .warn (
270- f"sieve() did not converge after { _MAX_ITERATIONS } iterations. "
271- f"The result may still contain regions smaller than "
272- f"threshold={ threshold } ." ,
273- stacklevel = 3 ,
274- )
275-
276- return result
268+ return result , False
277269
278270
279271# ---------------------------------------------------------------------------
@@ -285,8 +277,10 @@ def _sieve_cupy(data, threshold, neighborhood, skip_values):
285277 """CuPy backend: transfer to CPU, sieve, transfer back."""
286278 import cupy as cp
287279
288- np_result = _sieve_numpy (data .get (), threshold , neighborhood , skip_values )
289- return cp .asarray (np_result )
280+ np_result , converged = _sieve_numpy (
281+ data .get (), threshold , neighborhood , skip_values
282+ )
283+ return cp .asarray (np_result ), converged
290284
291285
292286# ---------------------------------------------------------------------------
@@ -326,8 +320,10 @@ def _sieve_dask(data, threshold, neighborhood, skip_values):
326320 )
327321
328322 np_data = data .compute ()
329- result = _sieve_numpy (np_data , threshold , neighborhood , skip_values )
330- return da .from_array (result , chunks = data .chunks )
323+ result , converged = _sieve_numpy (
324+ np_data , threshold , neighborhood , skip_values
325+ )
326+ return da .from_array (result , chunks = data .chunks ), converged
331327
332328
333329def _sieve_dask_cupy (data , threshold , neighborhood , skip_values ):
@@ -349,8 +345,10 @@ def _sieve_dask_cupy(data, threshold, neighborhood, skip_values):
349345 pass
350346
351347 cp_data = data .compute ()
352- result = _sieve_cupy (cp_data , threshold , neighborhood , skip_values )
353- return da .from_array (result , chunks = data .chunks )
348+ result , converged = _sieve_cupy (
349+ cp_data , threshold , neighborhood , skip_values
350+ )
351+ return da .from_array (result , chunks = data .chunks ), converged
354352
355353
356354# ---------------------------------------------------------------------------
@@ -444,21 +442,35 @@ def sieve(
444442 data = raster .data
445443
446444 if isinstance (data , np .ndarray ):
447- out = _sieve_numpy (data , threshold , neighborhood , skip_values )
445+ out , converged = _sieve_numpy (
446+ data , threshold , neighborhood , skip_values
447+ )
448448 elif has_cuda_and_cupy () and is_cupy_array (data ):
449- out = _sieve_cupy (data , threshold , neighborhood , skip_values )
449+ out , converged = _sieve_cupy (
450+ data , threshold , neighborhood , skip_values
451+ )
450452 elif da is not None and isinstance (data , da .Array ):
451453 if is_dask_cupy (raster ):
452- out = _sieve_dask_cupy (
454+ out , converged = _sieve_dask_cupy (
453455 data , threshold , neighborhood , skip_values
454456 )
455457 else :
456- out = _sieve_dask (data , threshold , neighborhood , skip_values )
458+ out , converged = _sieve_dask (
459+ data , threshold , neighborhood , skip_values
460+ )
457461 else :
458462 raise TypeError (
459463 f"Unsupported array type { type (data ).__name__ } for sieve()"
460464 )
461465
466+ if not converged :
467+ warnings .warn (
468+ f"sieve() did not converge after { _MAX_ITERATIONS } iterations. "
469+ f"The result may still contain regions smaller than "
470+ f"threshold={ threshold } ." ,
471+ stacklevel = 2 ,
472+ )
473+
462474 return DataArray (
463475 out ,
464476 name = name ,
0 commit comments