Skip to content

Commit 2455aad

Browse files
committed
Address code review: fix warning stacklevel, clean up test (#1162)
Move the convergence warning from _sieve_numpy into sieve() so stacklevel=2 always points at the caller regardless of backend. Return (result, converged) tuple from all backend functions. Remove unused import in test_sieve_convergence_warning. Add int32 limit note to _label_connected docstring.
1 parent a2c9019 commit 2455aad

File tree

2 files changed

+36
-26
lines changed

2 files changed

+36
-26
lines changed

xrspatial/sieve.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def _label_connected(data, valid, neighborhood):
8181
replacing the previous approach of calling ``scipy.ndimage.label``
8282
once per unique raster value.
8383
84+
Uses int32 indices internally, so the raster must have fewer than
85+
~2.1 billion pixels (roughly 46 000 x 46 000).
86+
8487
Returns
8588
-------
8689
region_map : ndarray of int32 (2D)
@@ -208,7 +211,6 @@ def _sieve_numpy(data, threshold, neighborhood, skip_values):
208211
valid = ~np.isnan(result) if is_float else np.ones(result.shape, dtype=bool)
209212
skip_set = set(skip_values) if skip_values is not None else set()
210213

211-
converged = False
212214
for _ in range(_MAX_ITERATIONS):
213215
region_map, region_val, uid = _label_connected(
214216
result, valid, neighborhood
@@ -225,8 +227,7 @@ def _sieve_numpy(data, threshold, neighborhood, skip_values):
225227
and region_val[rid] not in skip_set
226228
]
227229
if not small_ids:
228-
converged = True
229-
break
230+
return result, True
230231

231232
adjacency = _build_adjacency(region_map, neighborhood)
232233

@@ -262,18 +263,9 @@ def _sieve_numpy(data, threshold, neighborhood, skip_values):
262263
merged_any = True
263264

264265
if not merged_any:
265-
converged = True
266-
break
266+
return result, True
267267

268-
if not converged:
269-
warnings.warn(
270-
f"sieve() did not converge after {_MAX_ITERATIONS} iterations. "
271-
f"The result may still contain regions smaller than "
272-
f"threshold={threshold}.",
273-
stacklevel=3,
274-
)
275-
276-
return result
268+
return result, False
277269

278270

279271
# ---------------------------------------------------------------------------
@@ -285,8 +277,10 @@ def _sieve_cupy(data, threshold, neighborhood, skip_values):
285277
"""CuPy backend: transfer to CPU, sieve, transfer back."""
286278
import cupy as cp
287279

288-
np_result = _sieve_numpy(data.get(), threshold, neighborhood, skip_values)
289-
return cp.asarray(np_result)
280+
np_result, converged = _sieve_numpy(
281+
data.get(), threshold, neighborhood, skip_values
282+
)
283+
return cp.asarray(np_result), converged
290284

291285

292286
# ---------------------------------------------------------------------------
@@ -326,8 +320,10 @@ def _sieve_dask(data, threshold, neighborhood, skip_values):
326320
)
327321

328322
np_data = data.compute()
329-
result = _sieve_numpy(np_data, threshold, neighborhood, skip_values)
330-
return da.from_array(result, chunks=data.chunks)
323+
result, converged = _sieve_numpy(
324+
np_data, threshold, neighborhood, skip_values
325+
)
326+
return da.from_array(result, chunks=data.chunks), converged
331327

332328

333329
def _sieve_dask_cupy(data, threshold, neighborhood, skip_values):
@@ -349,8 +345,10 @@ def _sieve_dask_cupy(data, threshold, neighborhood, skip_values):
349345
pass
350346

351347
cp_data = data.compute()
352-
result = _sieve_cupy(cp_data, threshold, neighborhood, skip_values)
353-
return da.from_array(result, chunks=data.chunks)
348+
result, converged = _sieve_cupy(
349+
cp_data, threshold, neighborhood, skip_values
350+
)
351+
return da.from_array(result, chunks=data.chunks), converged
354352

355353

356354
# ---------------------------------------------------------------------------
@@ -444,21 +442,35 @@ def sieve(
444442
data = raster.data
445443

446444
if isinstance(data, np.ndarray):
447-
out = _sieve_numpy(data, threshold, neighborhood, skip_values)
445+
out, converged = _sieve_numpy(
446+
data, threshold, neighborhood, skip_values
447+
)
448448
elif has_cuda_and_cupy() and is_cupy_array(data):
449-
out = _sieve_cupy(data, threshold, neighborhood, skip_values)
449+
out, converged = _sieve_cupy(
450+
data, threshold, neighborhood, skip_values
451+
)
450452
elif da is not None and isinstance(data, da.Array):
451453
if is_dask_cupy(raster):
452-
out = _sieve_dask_cupy(
454+
out, converged = _sieve_dask_cupy(
453455
data, threshold, neighborhood, skip_values
454456
)
455457
else:
456-
out = _sieve_dask(data, threshold, neighborhood, skip_values)
458+
out, converged = _sieve_dask(
459+
data, threshold, neighborhood, skip_values
460+
)
457461
else:
458462
raise TypeError(
459463
f"Unsupported array type {type(data).__name__} for sieve()"
460464
)
461465

466+
if not converged:
467+
warnings.warn(
468+
f"sieve() did not converge after {_MAX_ITERATIONS} iterations. "
469+
f"The result may still contain regions smaller than "
470+
f"threshold={threshold}.",
471+
stacklevel=2,
472+
)
473+
462474
return DataArray(
463475
out,
464476
name=name,

xrspatial/tests/test_sieve.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,6 @@ def test_sieve_convergence_warning():
433433
"""Should warn when the iteration limit is reached."""
434434
from unittest.mock import patch
435435

436-
from xrspatial.sieve import _MAX_ITERATIONS
437-
438436
# Create a raster where merging is artificially stalled by
439437
# patching _MAX_ITERATIONS to 0 so the loop never runs.
440438
arr = np.array(

0 commit comments

Comments
 (0)