Skip to content

Commit a8833e3

Browse files
committed
Improve polygonize performance: single-pass tracing, JIT merge helpers, batch shapely (#1008)
Replace the two-pass _follow with a single-pass implementation using a dynamically-grown buffer. This eliminates retracing every polygon boundary a second time, which was the dominant cost for rasters with many small regions. Add @ngjit to _point_in_ring, _simplify_ring, and _signed_ring_area so the dask chunk-merge path runs compiled instead of interpreted. Use shapely.polygons() batch constructor for hole-free polygons in _to_geopandas (shapely 2.0+, with fallback for older versions).
1 parent 0d1bdfb commit a8833e3

File tree

1 file changed

+141
-104
lines changed

1 file changed

+141
-104
lines changed

xrspatial/polygonize.py

Lines changed: 141 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ def _min_and_max(value0, value1):
107107
# facing W, and if hole is False the start is on the S edge of the pixel
108108
# facing E.
109109
#
110-
# There are two passes. First pass determines the number of points and
111-
# allocates a numpy array big enough to take the points, the second pass saves
112-
# the points. This is better than a single pass that repeatedly reallocates
113-
# space for the increasing number of points.
110+
# Single pass with a dynamically-grown buffer (doubling strategy). Buffer
111+
# starts at 64 points and grows as needed. Amortized cost is O(1) per point
112+
# from O(log n) reallocations, which is cheaper than retracing the boundary
113+
# a second time.
114114
#
115115
# Returns the region ID and the 2D array of boundary points. The last
116116
# boundary point is the same as the first.
@@ -125,98 +125,104 @@ def _follow(
125125
) -> Tuple[int, np.ndarray]:
126126
region = regions[ij]
127127
n = nx*ny
128-
points = None # Declare before loop for numba
129128

130-
for pass_ in range(2):
131-
prev_forward = 0 # Start with an invalid direction.
132-
if hole:
133-
forward = -1 # Facing W along N edge.
134-
left = -nx
135-
else:
136-
forward = 1 # Facing E along S edge.
137-
left = nx
138-
139-
start_forward = forward
140-
start_ij = ij
141-
npoints = 0
142-
143-
while True:
144-
if pass_ == 1:
145-
if forward == 1 and not hole:
146-
# Mark pixel so that it will not be considered a future
147-
# non-hole starting pixel.
148-
visited[ij] |= 1
149-
elif forward == -1 and ij+nx < n:
150-
# Mark pixel so that is will not be considered a future
151-
# hole starting pixel.
152-
visited[ij+nx] |= 2
153-
154-
if prev_forward != forward:
155-
if pass_ == 1:
156-
# Add point.
157-
i = ij % nx
158-
j = ij // nx
159-
if forward == -1:
160-
i += 1
161-
j += 1
162-
elif forward == nx:
163-
i += 1
164-
elif forward == -nx:
165-
j += 1
166-
points[2*npoints] = i
167-
points[2*npoints+1] = j
168-
npoints += 1
129+
# Dynamic buffer for point coordinates (x, y pairs stored flat).
130+
buf_cap = 64
131+
points = np.empty(2 * buf_cap)
132+
npoints = 0
133+
134+
prev_forward = 0 # Start with an invalid direction.
135+
if hole:
136+
forward = -1 # Facing W along N edge.
137+
left = -nx
138+
else:
139+
forward = 1 # Facing E along S edge.
140+
left = nx
141+
142+
start_forward = forward
143+
start_ij = ij
144+
145+
while True:
146+
if forward == 1 and not hole:
147+
# Mark pixel so that it will not be considered a future
148+
# non-hole starting pixel.
149+
visited[ij] |= 1
150+
elif forward == -1 and ij+nx < n:
151+
# Mark pixel so that it will not be considered a future
152+
# hole starting pixel.
153+
visited[ij+nx] |= 2
154+
155+
if prev_forward != forward:
156+
# Grow buffer if needed.
157+
if npoints >= buf_cap:
158+
old_points = points
159+
buf_cap *= 2
160+
points = np.empty(2 * buf_cap)
161+
points[:2*npoints] = old_points[:2*npoints]
162+
163+
# Add point.
164+
i = ij % nx
165+
j = ij // nx
166+
if forward == -1:
167+
i += 1
168+
j += 1
169+
elif forward == nx:
170+
i += 1
171+
elif forward == -nx:
172+
j += 1
173+
points[2*npoints] = i
174+
points[2*npoints+1] = j
175+
npoints += 1
176+
177+
prev_forward = forward
178+
ijnext = ij + forward
179+
ijnext_right = ijnext - left
180+
181+
# Determine direction of turn.
182+
if abs(forward) == 1: # Facing E (forward == 1) or W (forward -1)
183+
if _diff_row(ij, ijnext, nx):
184+
turn = Turn.Left
185+
elif (not _outside_domain(ijnext_right, n) and
186+
regions[ijnext_right] == region):
187+
turn = Turn.Right
188+
elif regions[ijnext] == region:
189+
turn = Turn.Straight
190+
else:
191+
turn = Turn.Left
192+
else: # Facing N (forward == nx) or S (forward == -nx)
193+
if _outside_domain(ijnext, n):
194+
turn = Turn.Left
195+
elif (not _diff_row(ijnext, ijnext_right, nx) and
196+
regions[ijnext_right] == region):
197+
turn = Turn.Right
198+
elif regions[ijnext] == region:
199+
turn = Turn.Straight
200+
else:
201+
turn = Turn.Left
169202

203+
# Apply turn.
204+
if turn == Turn.Straight:
205+
ij = ijnext
206+
elif turn == Turn.Left:
170207
prev_forward = forward
171-
ijnext = ij + forward
172-
ijnext_right = ijnext - left
173-
174-
# Determine direction of turn.
175-
if abs(forward) == 1: # Facing E (forward == 1) or W (forward -1)
176-
if _diff_row(ij, ijnext, nx):
177-
turn = Turn.Left
178-
elif (not _outside_domain(ijnext_right, n) and
179-
regions[ijnext_right] == region):
180-
turn = Turn.Right
181-
elif regions[ijnext] == region:
182-
turn = Turn.Straight
183-
else:
184-
turn = Turn.Left
185-
else: # Facing N (forward == nx) or S (forward == -nx)
186-
if _outside_domain(ijnext, n):
187-
turn = Turn.Left
188-
elif (not _diff_row(ijnext, ijnext_right, nx) and
189-
regions[ijnext_right] == region):
190-
turn = Turn.Right
191-
elif regions[ijnext] == region:
192-
turn = Turn.Straight
193-
else:
194-
turn = Turn.Left
195-
196-
# Apply turn.
197-
if turn == Turn.Straight:
198-
ij = ijnext
199-
elif turn == Turn.Left:
200-
prev_forward = forward
201-
forward = left
202-
left = -prev_forward
203-
else: # Turn.Right
204-
prev_forward = forward
205-
forward = -left
206-
left = prev_forward
207-
ij = ijnext_right
208-
209-
# Finished boundary when returned to start.
210-
if ij == start_ij and forward == start_forward:
211-
break
208+
forward = left
209+
left = -prev_forward
210+
else: # Turn.Right
211+
prev_forward = forward
212+
forward = -left
213+
left = prev_forward
214+
ij = ijnext_right
212215

213-
if pass_ == 0:
214-
# End of first pass.
215-
points = np.empty(2*(npoints+1)) # Note extra point at end.
216+
# Finished boundary when returned to start.
217+
if ij == start_ij and forward == start_forward:
218+
break
216219

217-
points = points.reshape((-1, 2))
218-
points[-1] = points[0] # End point the same as start point.
219-
return region, points
220+
# Trim buffer to actual size and add closing point.
221+
result = np.empty(2*(npoints+1))
222+
result[:2*npoints] = points[:2*npoints]
223+
result = result.reshape((-1, 2))
224+
result[-1] = result[0] # End point the same as start point.
225+
return region, result
220226

221227

222228
# Generator of numba-compatible comparison functions for values.
@@ -474,11 +480,34 @@ def _to_geopandas(
474480
column_name: str,
475481
):
476482
import geopandas as gpd
483+
import shapely
477484
from shapely.geometry import Polygon
478485

479-
# Convert list of point arrays to shapely Polygons.
480-
polygons = list(map(
481-
lambda points: Polygon(points[0], points[1:]), polygon_points))
486+
if hasattr(shapely, 'polygons'):
487+
# Shapely 2.0+: batch-construct hole-free polygons via
488+
# linearrings -> polygons pipeline (both are C-level batch ops).
489+
no_holes = [i for i, pts in enumerate(polygon_points)
490+
if len(pts) == 1]
491+
492+
if len(no_holes) == len(polygon_points):
493+
# All hole-free: batch create LinearRings then Polygons.
494+
rings = [shapely.linearrings(pts[0])
495+
for pts in polygon_points]
496+
polygons = list(shapely.polygons(rings))
497+
else:
498+
polygons = [None] * len(polygon_points)
499+
if no_holes:
500+
rings = [shapely.linearrings(polygon_points[i][0])
501+
for i in no_holes]
502+
batch = shapely.polygons(rings)
503+
for idx, poly in zip(no_holes, batch):
504+
polygons[idx] = poly
505+
for i, pts in enumerate(polygon_points):
506+
if polygons[i] is None:
507+
polygons[i] = Polygon(pts[0], pts[1:])
508+
else:
509+
# Shapely < 2.0 fallback.
510+
polygons = [Polygon(pts[0], pts[1:]) for pts in polygon_points]
482511

483512
df = gpd.GeoDataFrame({column_name: column, "geometry": polygons})
484513
return df
@@ -823,32 +852,40 @@ def _trace_rings(edge_set):
823852
return rings
824853

825854

855+
@ngjit
826856
def _simplify_ring(ring):
827857
"""Remove collinear (redundant) vertices from a closed ring."""
828858
n = len(ring) - 1 # exclude closing point
829859
if n <= 3:
830860
return ring
831-
keep = []
861+
# Single pass into pre-allocated output.
862+
out = np.empty((n + 1, 2), dtype=np.float64)
863+
count = 0
832864
for i in range(n):
833-
prev = ring[(i - 1) % n]
834-
curr = ring[i]
835-
nxt = ring[(i + 1) % n]
836-
if not ((prev[0] == curr[0] == nxt[0]) or
837-
(prev[1] == curr[1] == nxt[1])):
838-
keep.append(curr)
839-
if len(keep) < n:
840-
keep.append(keep[0])
841-
return np.array(keep, dtype=np.float64)
865+
pi = (i - 1) % n
866+
ni = (i + 1) % n
867+
if not ((ring[pi, 0] == ring[i, 0] == ring[ni, 0]) or
868+
(ring[pi, 1] == ring[i, 1] == ring[ni, 1])):
869+
out[count, 0] = ring[i, 0]
870+
out[count, 1] = ring[i, 1]
871+
count += 1
872+
if count < n:
873+
out[count, 0] = out[0, 0]
874+
out[count, 1] = out[0, 1]
875+
count += 1
876+
return out[:count].copy()
842877
return ring
843878

844879

880+
@ngjit
845881
def _signed_ring_area(ring):
846882
"""Shoelace formula for signed area of a closed ring."""
847883
x = ring[:, 0]
848884
y = ring[:, 1]
849885
return 0.5 * (np.dot(x[:-1], y[1:]) - np.dot(x[1:], y[:-1]))
850886

851887

888+
@ngjit
852889
def _point_in_ring(px, py, ring):
853890
"""Ray-casting point-in-polygon test."""
854891
n = len(ring) - 1

0 commit comments

Comments
 (0)