Skip to content

Commit 5e7702a

Browse files
committed
Promote polygonize out of experimental, add GPU CCL, dispatcher, GeoJSON
- Move polygonize.py from xrspatial/experimental/ to xrspatial/ and update all imports (tests, benchmarks, README, __init__, accessor) - Replace isinstance dispatch chain with ArrayTypeFunctionMapping - Add GPU-accelerated CCL via cupyx.scipy.ndimage.label with vectorized region assignment and raster-scan renumbering for _scan compatibility - Refactor _scan to accept pre-computed regions array - Add GeoJSON FeatureCollection return_type ("geojson") - Add CCL vs tracing phase benchmarks (PolygonizeCCL, PolygonizeTracing) - Add end-to-end CuPy benchmark (PolygonizeCuPy)
1 parent 2e7dae6 commit 5e7702a

7 files changed

Lines changed: 251 additions & 29 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e
212212

213213
| Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray |
214214
|:-----|:------------|:------------------:|:-----------------:|:---------------------:|:---------------------:|
215-
| [Polygonize](xrspatial/experimental/polygonize.py) | Converts contiguous regions of equal value into vector polygons | ✅️ | ✅️ | ✅️ | ✅️ |
215+
| [Polygonize](xrspatial/polygonize.py) | Converts contiguous regions of equal value into vector polygons | ✅️ | ✅️ | ✅️ | ✅️ |
216216

217217
--------
218218

benchmarks/benchmarks/polygonize.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
import numpy as np
22
import xarray as xr
33

4-
from xrspatial.experimental import polygonize
4+
from xrspatial import polygonize
5+
from xrspatial.polygonize import (
6+
_calculate_regions, _scan,
7+
)
8+
9+
try:
10+
import cupy
11+
from xrspatial.polygonize import _calculate_regions_cupy
12+
_has_cupy = True
13+
except ImportError:
14+
_has_cupy = False
515

616

717
class Polygonize:
@@ -40,3 +50,75 @@ def time_polygonize(self, nx, ret):
4050
gpd.GeoDataFrame({"DN": values, "geometry": shapes})
4151
else:
4252
polygonize(self.raster, mask=self.mask, return_type=ret)
53+
54+
55+
class PolygonizeCuPy:
56+
params = ([100, 300, 1000],)
57+
param_names = ("nx",)
58+
59+
def setup(self, nx):
60+
if not _has_cupy:
61+
raise NotImplementedError("CuPy not available")
62+
ny = nx // 2
63+
rng = np.random.default_rng(9461713)
64+
raster = rng.integers(low=0, high=4, size=(ny, nx), dtype=np.int32)
65+
mask = rng.uniform(0, 1, size=(ny, nx)) < 0.9
66+
self.raster = xr.DataArray(cupy.asarray(raster))
67+
self.mask = xr.DataArray(cupy.asarray(mask))
68+
69+
def time_polygonize(self, nx):
70+
polygonize(self.raster, mask=self.mask)
71+
72+
73+
class PolygonizeCCL:
74+
"""Benchmark connected-component labeling phase."""
75+
params = ([100, 300, 1000], ["numpy", "cupy"])
76+
param_names = ("nx", "backend")
77+
78+
def setup(self, nx, backend):
79+
if backend == "cupy" and not _has_cupy:
80+
raise NotImplementedError("CuPy not available")
81+
ny = nx // 2
82+
rng = np.random.default_rng(9461713)
83+
raster = rng.integers(low=0, high=4, size=(ny, nx), dtype=np.int32)
84+
mask = (rng.uniform(0, 1, size=(ny, nx)) < 0.9)
85+
if backend == "cupy":
86+
self.data = cupy.asarray(raster)
87+
self.mask = cupy.asarray(mask)
88+
else:
89+
self.data = raster
90+
self.mask = mask
91+
self.ny, self.nx = ny, nx
92+
self.backend = backend
93+
94+
def time_calculate_regions(self, nx, backend):
95+
if backend == "cupy":
96+
_calculate_regions_cupy(self.data, self.mask, False)
97+
cupy.cuda.Device().synchronize()
98+
else:
99+
_calculate_regions(
100+
self.data.ravel(), self.mask.ravel(), False, self.nx, self.ny)
101+
102+
103+
class PolygonizeTracing:
104+
"""Benchmark boundary tracing phase (always CPU)."""
105+
params = ([100, 300, 1000],)
106+
param_names = ("nx",)
107+
108+
def setup(self, nx):
109+
ny = nx // 2
110+
rng = np.random.default_rng(9461713)
111+
raster = rng.integers(low=0, high=4, size=(ny, nx), dtype=np.int32)
112+
mask = (rng.uniform(0, 1, size=(ny, nx)) < 0.9)
113+
values_flat = raster.ravel()
114+
mask_flat = mask.ravel()
115+
self.regions = _calculate_regions(
116+
values_flat, mask_flat, False, nx, ny)
117+
self.values = values_flat
118+
self.mask = mask_flat
119+
self.nx = nx
120+
self.ny = ny
121+
122+
def time_scan(self, nx):
123+
_scan(self.regions, self.values, self.mask, False, None,
124+
self.nx, self.ny)

xrspatial/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from xrspatial.proximity import proximity # noqa
3434
from xrspatial.slope import slope # noqa
3535
from xrspatial.terrain import generate_terrain # noqa
36+
from xrspatial.polygonize import polygonize # noqa
3637
from xrspatial.viewshed import viewshed # noqa
3738
from xrspatial.zonal import apply as zonal_apply # noqa
3839
from xrspatial.zonal import crop # noqa

xrspatial/accessor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@ def mahalanobis(self, other_bands, **kwargs):
145145
from .mahalanobis import mahalanobis
146146
return mahalanobis([self._obj] + list(other_bands), **kwargs)
147147

148+
# ---- Raster to vector ----
149+
150+
def polygonize(self, **kwargs):
151+
from .polygonize import polygonize
152+
return polygonize(self._obj, **kwargs)
153+
148154
# ---- Multispectral (self = NIR band) ----
149155

150156
def ndvi(self, red_agg, **kwargs):

xrspatial/experimental/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
from .min_observable_height import min_observable_height # noqa
2-
from .polygonize import polygonize # noqa
Lines changed: 129 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
except ImportError:
4646
cupy = None
4747

48-
from ..utils import is_cupy_array, ngjit
48+
from .utils import ArrayTypeFunctionMapping, is_cupy_array, ngjit
4949

5050
_regions_dtype = np.uint32
5151
_visited_dtype = np.uint8
@@ -417,15 +417,14 @@ def _transform_points(
417417

418418
@ngjit
419419
def _scan(
420+
regions: np.ndarray, # _regions_dtype, shape (nx*ny,)
420421
values: np.ndarray, # shape (nx*ny,)
421422
mask: Optional[np.ndarray], # shape (nx*ny,)
422423
connectivity_8: bool,
423424
transform: Optional[np.ndarray], # shape (6,)
424425
nx: int,
425426
ny: int,
426427
) -> Tuple[List[Union[int, float]], List[List[np.ndarray]]]:
427-
regions = _calculate_regions(values, mask, connectivity_8, nx, ny)
428-
429428
# Visited flags used to denote where boundaries have already been
430429
# followed and hence are not future start positions.
431430
visited = np.zeros_like(values, dtype=_visited_dtype)
@@ -503,6 +502,23 @@ def _to_spatialpandas(
503502
return df
504503

505504

505+
def _to_geojson(
506+
column: List[Union[int, float]],
507+
polygon_points: List[List[np.ndarray]],
508+
column_name: str,
509+
):
510+
"""Convert to GeoJSON FeatureCollection dict."""
511+
features = []
512+
for value, rings in zip(column, polygon_points):
513+
coords = [ring.tolist() for ring in rings]
514+
features.append({
515+
"type": "Feature",
516+
"properties": {column_name: value},
517+
"geometry": {"type": "Polygon", "coordinates": coords},
518+
})
519+
return {"type": "FeatureCollection", "features": features}
520+
521+
506522
def _polygonize_numpy(
507523
values: np.ndarray,
508524
mask: Optional[np.ndarray],
@@ -523,12 +539,12 @@ def _polygonize_numpy(
523539
mask = np.zeros_like(values, dtype=bool)
524540
mask[:, 0] = True
525541

526-
values = values.ravel()
527-
if mask is not None:
528-
mask = mask.ravel()
542+
values_flat = values.ravel()
543+
mask_flat = mask.ravel() if mask is not None else None
529544

545+
regions = _calculate_regions(values_flat, mask_flat, connectivity_8, nx, ny)
530546
column, polygon_points = _scan(
531-
values, mask, connectivity_8, transform, nx, ny)
547+
regions, values_flat, mask_flat, connectivity_8, transform, nx, ny)
532548

533549
return column, polygon_points
534550

@@ -537,6 +553,100 @@ def _polygonize_numpy(
537553
_DIR_ANGLE = {(1, 0): 0, (0, 1): 1, (-1, 0): 2, (0, -1): 3}
538554

539555

556+
def _calculate_regions_cupy(data, mask_data, connectivity_8):
557+
"""CuPy GPU backend for connected-component labeling.
558+
559+
Uses cupyx.scipy.ndimage.label per unique value to produce a regions
560+
array compatible with _scan. Returns a cupy uint32 2D array.
561+
"""
562+
import cupy as cp
563+
from cupyx.scipy.ndimage import label as cp_label
564+
565+
if connectivity_8:
566+
structure = cp.ones((3, 3), dtype=cp.int32)
567+
else:
568+
structure = cp.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=cp.int32)
569+
570+
regions = cp.zeros(data.shape, dtype=cp.uint32)
571+
572+
# Build valid mask (unmask + handle float NaN).
573+
is_float = cp.issubdtype(data.dtype, cp.floating)
574+
if mask_data is not None:
575+
valid = cp.asarray(mask_data, dtype=bool)
576+
if is_float:
577+
valid &= ~cp.isnan(data)
578+
else:
579+
valid = ~cp.isnan(data) if is_float else None
580+
581+
unique_vals = data[valid] if valid is not None else data.ravel()
582+
unique_vals = cp.unique(unique_vals)
583+
584+
uid = 1
585+
for v in unique_vals:
586+
bin_mask = (data == v)
587+
if valid is not None:
588+
bin_mask &= valid
589+
labeled, n_features = cp_label(bin_mask, structure=structure)
590+
if n_features == 0:
591+
continue
592+
# Vectorized assignment: offset labeled region IDs by (uid - 1) so
593+
# label 1 → uid, label 2 → uid+1, etc. Single kernel, no Python loop.
594+
where = labeled > 0
595+
regions[where] = (labeled[where].astype(cp.uint32) +
596+
cp.uint32(uid - 1))
597+
uid += n_features
598+
599+
return regions
600+
601+
602+
@ngjit
603+
def _renumber_regions(regions, nx, ny):
604+
"""Renumber regions so IDs are sequential in raster-scan order.
605+
606+
_scan expects region 1 to have the smallest ij, region 2 the next, etc.
607+
GPU CCL numbers regions per-value, not spatially. This pass assigns
608+
new sequential IDs in the order regions are first encountered.
609+
"""
610+
n = nx * ny
611+
max_old = 0
612+
for i in range(n):
613+
if regions[i] > max_old:
614+
max_old = regions[i]
615+
616+
# Map from old region ID to new sequential ID.
617+
remap = np.zeros(max_old + 1, dtype=_regions_dtype)
618+
new_id = _regions_dtype(0)
619+
for ij in range(n):
620+
old = regions[ij]
621+
if old == 0:
622+
continue
623+
if remap[old] == 0:
624+
new_id += 1
625+
remap[old] = new_id
626+
regions[ij] = remap[old]
627+
628+
return regions
629+
630+
631+
def _polygonize_cupy(data, mask_data, connectivity_8, transform):
632+
"""Hybrid GPU/CPU: GPU CCL, CPU boundary tracing."""
633+
np_data = cupy.asnumpy(data)
634+
np_mask = cupy.asnumpy(mask_data) if mask_data is not None else None
635+
ny, nx = np_data.shape
636+
if nx == 1:
637+
# Edge case: fall back to full numpy path (pads array).
638+
return _polygonize_numpy(np_data, np_mask, connectivity_8, transform)
639+
regions_gpu = _calculate_regions_cupy(data, mask_data, connectivity_8)
640+
regions = cupy.asnumpy(regions_gpu).ravel()
641+
# Renumber into raster-scan order for _scan compatibility.
642+
regions = _renumber_regions(regions, nx, ny)
643+
column, polygon_points = _scan(
644+
regions, np_data.ravel(),
645+
np_mask.ravel() if np_mask is not None else None,
646+
connectivity_8, transform, nx, ny)
647+
return column, polygon_points
648+
649+
540650
def _to_numpy(arr):
541651
"""Convert array to numpy, handling cupy arrays."""
542652
if cupy is not None and isinstance(arr, cupy.ndarray):
@@ -921,8 +1031,8 @@ def polygonize(
9211031
9221032
return_type: str, default="numpy"
9231033
Format of returned data. Allowed values are "numpy", "spatialpandas",
924-
"geopandas" and "awkward". Only "numpy" is always available, the
925-
others require optional dependencies.
1034+
"geopandas", "awkward" and "geojson". "numpy" and "geojson" are
1035+
always available, the others require optional dependencies.
9261036
9271037
Returns
9281038
-------
@@ -970,22 +1080,14 @@ def polygonize(
9701080
raise ValueError(
9711081
f"Incorrect transform length of {len(transform)} instead of 6")
9721082

973-
if isinstance(raster.data, np.ndarray):
974-
column, polygon_points = _polygonize_numpy(
975-
raster.data, mask_data, connectivity_8, transform)
976-
elif cupy is not None and is_cupy_array(raster.data):
977-
# Hybrid GPU/CPU: transfer to CPU for boundary tracing.
978-
np_data = cupy.asnumpy(raster.data)
979-
np_mask = cupy.asnumpy(mask_data) if mask_data is not None else None
980-
column, polygon_points = _polygonize_numpy(
981-
np_data, np_mask, connectivity_8, transform)
982-
elif da is not None and isinstance(raster.data, da.Array):
983-
# Handles both dask+numpy and dask+cupy (chunks converted in
984-
# _polygonize_chunk).
985-
column, polygon_points = _polygonize_dask(
986-
raster.data, mask_data, connectivity_8, transform)
987-
else:
988-
raise TypeError(f"Unsupported array type: {type(raster.data)}")
1083+
mapper = ArrayTypeFunctionMapping(
1084+
numpy_func=_polygonize_numpy,
1085+
cupy_func=_polygonize_cupy,
1086+
dask_func=_polygonize_dask,
1087+
dask_cupy_func=_polygonize_dask,
1088+
)
1089+
column, polygon_points = mapper(raster)(
1090+
raster.data, mask_data, connectivity_8, transform)
9891091

9901092
# Convert to requested return_type.
9911093
if return_type == "numpy":
@@ -996,5 +1098,7 @@ def polygonize(
9961098
return _to_geopandas(column, polygon_points, column_name)
9971099
elif return_type == "spatialpandas":
9981100
return _to_spatialpandas(column, polygon_points, column_name)
1101+
elif return_type == "geojson":
1102+
return _to_geojson(column, polygon_points, column_name)
9991103
else:
10001104
raise ValueError(f"Invalid return_type '{return_type}'")

xrspatial/tests/test_polygonize.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
sp = None
2727

2828

29-
from ..experimental.polygonize import polygonize
29+
from ..polygonize import polygonize
3030
from .general_checks import cuda_and_cupy_available, dask_array_available
3131

3232

@@ -204,6 +204,36 @@ def test_polygonize_invalid_return_type(raster_3x3):
204204
polygonize(raster, return_type=return_type)
205205

206206

207+
@pytest.mark.parametrize(
208+
"dtype",
209+
[np.int32, np.int64, np.float32, np.float64])
210+
@pytest.mark.parametrize("connectivity", [4, 8])
211+
def test_polygonize_geojson(raster_3x3, connectivity):
212+
raster = xr.DataArray(raster_3x3)
213+
fc = polygonize(
214+
raster, return_type="geojson", connectivity=connectivity)
215+
assert isinstance(fc, dict)
216+
assert fc["type"] == "FeatureCollection"
217+
features = fc["features"]
218+
assert len(features) == 3
219+
220+
values = [f["properties"]["DN"] for f in features]
221+
assert_allclose(values, [0, 1, 4])
222+
223+
for f in features:
224+
assert f["type"] == "Feature"
225+
assert f["geometry"]["type"] == "Polygon"
226+
coords = f["geometry"]["coordinates"]
227+
assert isinstance(coords, list)
228+
assert len(coords) >= 1
229+
for ring in coords:
230+
# Ring is a list of [x, y] pairs.
231+
assert isinstance(ring, list)
232+
assert len(ring) >= 4
233+
# Ring is closed.
234+
assert ring[0] == ring[-1]
235+
236+
207237
@pytest.mark.parametrize("transform", [
208238
(1, 0, 0, 0, 1, 0),
209239
(1.2, -0.3, 0.2, 1.4, 0.7, 0.1)])

0 commit comments

Comments
 (0)