Skip to content

Commit 917e95d

Browse files
committed
Vectorize point/line extraction, clip lines to bounds, CSR GPU scanline
Point and line extraction now use shapely 2.0 vectorized ops (get_parts/get_coordinates) instead of Python loops. Line segments are clipped to raster bounds via Liang-Barsky before Bresenham, cutting iteration from ~20K to ~100 steps for far-off-screen lines. GPU scanline kernel reads active edges from a precomputed CSR structure instead of binary-searching + scanning dead edges.
1 parent fed736c commit 917e95d

File tree

1 file changed

+276
-41
lines changed

1 file changed

+276
-41
lines changed

xrspatial/rasterize.py

Lines changed: 276 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"""
1212
from __future__ import annotations
1313

14-
from typing import Optional, Sequence, Tuple, Union
14+
from typing import Optional, Tuple, Union
1515

1616
import numpy as np
1717
import xarray as xr
@@ -347,6 +347,49 @@ def _sort_edges(edge_arrays):
347347

348348
def _extract_points(geometries, values, bounds, height, width):
349349
"""Parse Point/MultiPoint geometries into pixel coordinate arrays."""
350+
if not geometries:
351+
return (np.empty(0, np.int32), np.empty(0, np.int32),
352+
np.empty(0, np.float64))
353+
if _HAS_SHAPELY2:
354+
return _extract_points_vectorized(
355+
geometries, values, bounds, height, width)
356+
return _extract_points_loop(
357+
geometries, values, bounds, height, width)
358+
359+
360+
def _extract_points_vectorized(geometries, values, bounds, height, width):
361+
"""Vectorized point extraction using shapely 2.0 array ops."""
362+
import shapely
363+
364+
xmin, ymin, xmax, ymax = bounds
365+
px = (xmax - xmin) / width
366+
py = (ymax - ymin) / height
367+
368+
geom_arr = np.array(geometries, dtype=object)
369+
val_arr = np.array(values, dtype=np.float64)
370+
371+
# Explode MultiPoints to individual Points
372+
parts, part_idx = shapely.get_parts(geom_arr, return_index=True)
373+
part_vals = val_arr[part_idx]
374+
375+
if len(parts) == 0:
376+
return (np.empty(0, np.int32), np.empty(0, np.int32),
377+
np.empty(0, np.float64))
378+
379+
# Extract coordinates with index back to each point
380+
coords, coord_idx = shapely.get_coordinates(
381+
parts, return_index=True)
382+
pt_vals = part_vals[coord_idx]
383+
384+
cols = np.floor((coords[:, 0] - xmin) / px).astype(np.int32)
385+
rows = np.floor((ymax - coords[:, 1]) / py).astype(np.int32)
386+
387+
valid = (rows >= 0) & (rows < height) & (cols >= 0) & (cols < width)
388+
return (rows[valid], cols[valid], pt_vals[valid])
389+
390+
391+
def _extract_points_loop(geometries, values, bounds, height, width):
392+
"""Loop-based point extraction (shapely < 2.0 fallback)."""
350393
xmin, ymin, xmax, ymax = bounds
351394
px = (xmax - xmin) / width
352395
py = (ymax - ymin) / height
@@ -382,8 +425,154 @@ def _extract_points(geometries, values, bounds, height, width):
382425
# Line segment extraction (always on host)
383426
# ---------------------------------------------------------------------------
384427

428+
_EMPTY_LINES = (np.empty(0, np.int32), np.empty(0, np.int32),
429+
np.empty(0, np.int32), np.empty(0, np.int32),
430+
np.empty(0, np.float64))
431+
432+
385433
def _extract_line_segments(geometries, values, bounds, height, width):
386-
"""Parse LineString/MultiLineString geometries into pixel-space segments."""
434+
"""Parse LineString/MultiLineString geometries into pixel-space segments.
435+
436+
Segments are clipped to the raster extent before conversion to pixel
437+
coordinates, so Bresenham never iterates over out-of-bounds pixels.
438+
"""
439+
if not geometries:
440+
return _EMPTY_LINES
441+
if _HAS_SHAPELY2:
442+
return _extract_lines_vectorized(
443+
geometries, values, bounds, height, width)
444+
return _extract_lines_loop(
445+
geometries, values, bounds, height, width)
446+
447+
448+
def _liang_barsky_clip(x0, y0, x1, y1, xmin, ymin, xmax, ymax):
449+
"""Liang-Barsky line clipping. Returns clipped (x0,y0,x1,y1) or None."""
450+
dx = x1 - x0
451+
dy = y1 - y0
452+
p = np.array([-dx, dx, -dy, dy])
453+
q = np.array([x0 - xmin, xmax - x0, y0 - ymin, ymax - y0])
454+
455+
t0, t1 = 0.0, 1.0
456+
for i in range(4):
457+
if p[i] == 0.0:
458+
if q[i] < 0.0:
459+
return None
460+
elif p[i] < 0.0:
461+
t = q[i] / p[i]
462+
if t > t1:
463+
return None
464+
if t > t0:
465+
t0 = t
466+
else:
467+
t = q[i] / p[i]
468+
if t < t0:
469+
return None
470+
if t < t1:
471+
t1 = t
472+
473+
cx0 = x0 + t0 * dx
474+
cy0 = y0 + t0 * dy
475+
cx1 = x0 + t1 * dx
476+
cy1 = y0 + t1 * dy
477+
return cx0, cy0, cx1, cy1
478+
479+
480+
def _extract_lines_vectorized(geometries, values, bounds, height, width):
481+
"""Vectorized line extraction with Liang-Barsky clipping."""
482+
import shapely
483+
484+
xmin, ymin, xmax, ymax = bounds
485+
px = (xmax - xmin) / width
486+
py = (ymax - ymin) / height
487+
488+
geom_arr = np.array(geometries, dtype=object)
489+
val_arr = np.array(values, dtype=np.float64)
490+
491+
# Explode MultiLineStrings to individual LineStrings
492+
parts, part_idx = shapely.get_parts(geom_arr, return_index=True)
493+
part_vals = val_arr[part_idx]
494+
495+
if len(parts) == 0:
496+
return _EMPTY_LINES
497+
498+
# Get all vertex coordinates with line membership
499+
coords, coord_line_idx = shapely.get_coordinates(
500+
parts, return_index=True)
501+
n_coords = len(coords)
502+
if n_coords < 2:
503+
return _EMPTY_LINES
504+
505+
# Mark last coordinate of each line (don't form cross-line segments)
506+
is_last = np.zeros(n_coords, dtype=bool)
507+
changes = np.nonzero(np.diff(coord_line_idx))[0]
508+
is_last[changes] = True
509+
is_last[-1] = True
510+
511+
# Segments: from each non-last coordinate to its successor
512+
start_idx = np.nonzero(~is_last)[0]
513+
end_idx = start_idx + 1
514+
seg_vals = part_vals[coord_line_idx[start_idx]]
515+
516+
# World-space segment endpoints
517+
x0 = coords[start_idx, 0]
518+
y0 = coords[start_idx, 1]
519+
x1 = coords[end_idx, 0]
520+
y1 = coords[end_idx, 1]
521+
522+
# Vectorized Liang-Barsky clip to raster bounds
523+
dx = x1 - x0
524+
dy = y1 - y0
525+
526+
# p and q arrays: shape (4, n_segments)
527+
p = np.array([-dx, dx, -dy, dy])
528+
q = np.array([x0 - xmin, xmax - x0, y0 - ymin, ymax - y0])
529+
530+
t0 = np.zeros(len(x0))
531+
t1 = np.ones(len(x0))
532+
valid = np.ones(len(x0), dtype=bool)
533+
534+
for i in range(4):
535+
parallel = p[i] == 0.0
536+
outside = parallel & (q[i] < 0.0)
537+
valid &= ~outside
538+
539+
neg = (~parallel) & (p[i] < 0.0)
540+
pos = (~parallel) & (p[i] > 0.0)
541+
542+
with np.errstate(divide='ignore', invalid='ignore'):
543+
t_neg = np.where(neg, q[i] / p[i], 0.0)
544+
t_pos = np.where(pos, q[i] / p[i], 1.0)
545+
546+
t0 = np.where(neg, np.maximum(t0, t_neg), t0)
547+
t1 = np.where(pos, np.minimum(t1, t_pos), t1)
548+
549+
valid &= (t0 <= t1)
550+
551+
# Apply clipping
552+
cx0 = x0 + t0 * dx
553+
cy0 = y0 + t0 * dy
554+
cx1 = x0 + t1 * dx
555+
cy1 = y0 + t1 * dy
556+
557+
# Convert to pixel space and floor to int32
558+
r0 = np.floor((ymax - cy0) / py).astype(np.int32)
559+
c0 = np.floor((cx0 - xmin) / px).astype(np.int32)
560+
r1 = np.floor((ymax - cy1) / py).astype(np.int32)
561+
c1 = np.floor((cx1 - xmin) / px).astype(np.int32)
562+
563+
# Clamp edge cases (clipping guarantees in-bounds but float rounding
564+
# at exact boundaries can produce height or width)
565+
np.clip(r0, 0, height - 1, out=r0)
566+
np.clip(c0, 0, width - 1, out=c0)
567+
np.clip(r1, 0, height - 1, out=r1)
568+
np.clip(c1, 0, width - 1, out=c1)
569+
570+
v = valid
571+
return (r0[v], c0[v], r1[v], c1[v], seg_vals[v])
572+
573+
574+
def _extract_lines_loop(geometries, values, bounds, height, width):
575+
"""Loop-based line extraction with Liang-Barsky clipping (fallback)."""
387576
xmin, ymin, xmax, ymax = bounds
388577
px = (xmax - xmin) / width
389578
py = (ymax - ymin) / height
@@ -401,19 +590,26 @@ def _extract_line_segments(geometries, values, bounds, height, width):
401590
continue
402591
for line in lines:
403592
coords = np.asarray(line.coords)
404-
rows = (ymax - coords[:, 1]) / py
405-
cols = (coords[:, 0] - xmin) / px
406593
for i in range(len(coords) - 1):
407-
all_r0.append(np.int32(int(np.floor(rows[i]))))
408-
all_c0.append(np.int32(int(np.floor(cols[i]))))
409-
all_r1.append(np.int32(int(np.floor(rows[i + 1]))))
410-
all_c1.append(np.int32(int(np.floor(cols[i + 1]))))
594+
clipped = _liang_barsky_clip(
595+
coords[i, 0], coords[i, 1],
596+
coords[i + 1, 0], coords[i + 1, 1],
597+
xmin, ymin, xmax, ymax)
598+
if clipped is None:
599+
continue
600+
cx0, cy0, cx1, cy1 = clipped
601+
r0 = min(max(int(np.floor((ymax - cy0) / py)), 0), height - 1)
602+
c0 = min(max(int(np.floor((cx0 - xmin) / px)), 0), width - 1)
603+
r1 = min(max(int(np.floor((ymax - cy1) / py)), 0), height - 1)
604+
c1 = min(max(int(np.floor((cx1 - xmin) / px)), 0), width - 1)
605+
all_r0.append(np.int32(r0))
606+
all_c0.append(np.int32(c0))
607+
all_r1.append(np.int32(r1))
608+
all_c1.append(np.int32(c1))
411609
all_vals.append(np.float64(val))
412610

413611
if not all_r0:
414-
return (np.empty(0, np.int32), np.empty(0, np.int32),
415-
np.empty(0, np.int32), np.empty(0, np.int32),
416-
np.empty(0, np.float64))
612+
return _EMPTY_LINES
417613
return (np.array(all_r0, np.int32), np.array(all_c0, np.int32),
418614
np.array(all_r1, np.int32), np.array(all_c1, np.int32),
419615
np.array(all_vals, np.float64))
@@ -649,26 +845,22 @@ def _merge_pixel_gpu(out, written, r, c, val, mode):
649845
out[r, c] = out[r, c] + 1.0
650846

651847
@cuda.jit
652-
def _scanline_fill_gpu(out, written, edge_y_min, edge_y_max,
653-
edge_x_at_ymin, edge_inv_slope, edge_value,
654-
edge_geom_id, n_edges, width, mode):
655-
"""CUDA kernel: one thread per raster row."""
848+
def _scanline_fill_gpu(out, written, edge_y_min, edge_x_at_ymin,
849+
edge_inv_slope, edge_value, edge_geom_id,
850+
row_ptr, col_idx, width, mode):
851+
"""CUDA kernel: one thread per raster row, CSR-indexed active edges.
852+
853+
Instead of binary-searching the sorted edge table and scanning
854+
through dead edges, each thread reads its active edge list
855+
directly from the precomputed CSR structure (row_ptr, col_idx).
856+
"""
656857
row = cuda.grid(1)
657858
if row >= out.shape[0]:
658859
return
659860

660-
lo, hi = 0, n_edges
661-
while lo < hi:
662-
mid = (lo + hi) // 2
663-
if edge_y_min[mid] <= row:
664-
lo = mid + 1
665-
else:
666-
hi = mid
667-
668-
count = 0
669-
for e in range(hi):
670-
if edge_y_max[e] >= row:
671-
count += 1
861+
start = row_ptr[row]
862+
end = row_ptr[row + 1]
863+
count = end - start
672864

673865
if count == 0:
674866
return
@@ -681,18 +873,16 @@ def _scanline_fill_gpu(out, written, edge_y_min, edge_y_max,
681873
vs = cuda.local.array(512, dtype=np.float64)
682874
gs = cuda.local.array(512, dtype=np.int32)
683875

684-
idx = 0
685-
for e in range(hi):
686-
if idx >= MAX_ISECT:
876+
actual = 0
877+
for k in range(start, end):
878+
if actual >= MAX_ISECT:
687879
break
688-
if edge_y_max[e] >= row:
689-
xs[idx] = (edge_x_at_ymin[e]
690-
+ (row - edge_y_min[e]) * edge_inv_slope[e])
691-
vs[idx] = edge_value[e]
692-
gs[idx] = edge_geom_id[e]
693-
idx += 1
694-
695-
actual = idx
880+
e = col_idx[k]
881+
xs[actual] = (edge_x_at_ymin[e]
882+
+ (row - edge_y_min[e]) * edge_inv_slope[e])
883+
vs[actual] = edge_value[e]
884+
gs[actual] = edge_geom_id[e]
885+
actual += 1
696886

697887
# Insertion sort by (geom_id, x)
698888
for i in range(1, actual):
@@ -794,6 +984,46 @@ def _burn_lines_gpu(out, written, r0_arr, c0_arr, r1_arr, c1_arr, vals,
794984
return _gpu_kernels
795985

796986

987+
@ngjit
988+
def _build_row_csr_numba(edge_y_min, edge_y_max, height):
989+
"""Numba-accelerated CSR builder for GPU scanline precomputation."""
990+
n_edges = len(edge_y_min)
991+
992+
# Pass 1: count active edges per row
993+
counts = np.zeros(height, dtype=np.int32)
994+
for e in range(n_edges):
995+
y_lo = edge_y_min[e]
996+
y_hi = edge_y_max[e]
997+
if y_hi >= height:
998+
y_hi = height - 1
999+
for r in range(y_lo, y_hi + 1):
1000+
counts[r] += 1
1001+
1002+
# Build row_ptr (prefix sum)
1003+
row_ptr = np.empty(height + 1, dtype=np.int32)
1004+
row_ptr[0] = 0
1005+
for r in range(height):
1006+
row_ptr[r + 1] = row_ptr[r] + counts[r]
1007+
1008+
# Pass 2: fill col_idx
1009+
total = row_ptr[height]
1010+
col_idx = np.empty(total, dtype=np.int32)
1011+
offsets = np.empty(height, dtype=np.int32)
1012+
for r in range(height):
1013+
offsets[r] = row_ptr[r]
1014+
1015+
for e in range(n_edges):
1016+
y_lo = edge_y_min[e]
1017+
y_hi = edge_y_max[e]
1018+
if y_hi >= height:
1019+
y_hi = height - 1
1020+
for r in range(y_lo, y_hi + 1):
1021+
col_idx[offsets[r]] = e
1022+
offsets[r] += 1
1023+
1024+
return row_ptr, col_idx
1025+
1026+
7971027
def _run_cupy(geometries, values, bounds, height, width, fill, dtype,
7981028
all_touched, merge_mode):
7991029
"""CuPy backend for rasterize."""
@@ -816,18 +1046,23 @@ def _run_cupy(geometries, values, bounds, height, width, fill, dtype,
8161046
edge_value, edge_geom_id = edge_arrays
8171047

8181048
if len(edge_y_min) > 0:
1049+
# Build CSR structure on CPU, then transfer to GPU
1050+
row_ptr, col_idx = _build_row_csr_numba(
1051+
edge_y_min, edge_y_max, height)
1052+
8191053
d_y_min = cupy.asarray(edge_y_min)
820-
d_y_max = cupy.asarray(edge_y_max)
8211054
d_x_at_ymin = cupy.asarray(edge_x_at_ymin)
8221055
d_inv_slope = cupy.asarray(edge_inv_slope)
8231056
d_value = cupy.asarray(edge_value)
8241057
d_geom_id = cupy.asarray(edge_geom_id)
1058+
d_row_ptr = cupy.asarray(row_ptr)
1059+
d_col_idx = cupy.asarray(col_idx)
8251060

8261061
tpb = 256
8271062
blocks = (height + tpb - 1) // tpb
8281063
kernels['scanline_fill'][blocks, tpb](
829-
out, written, d_y_min, d_y_max, d_x_at_ymin, d_inv_slope,
830-
d_value, d_geom_id, len(edge_y_min), width, merge_mode)
1064+
out, written, d_y_min, d_x_at_ymin, d_inv_slope,
1065+
d_value, d_geom_id, d_row_ptr, d_col_idx, width, merge_mode)
8311066

8321067
# 2. Lines
8331068
r0, c0, r1, c1, lvals = _extract_line_segments(

0 commit comments

Comments
 (0)