Skip to content

Commit c535ef2

Browse files
committed
Fast same-CRS merge and early-exit in numba dispatch (#1045)
Two optimizations that make merge 4.5-7.3x faster: 1. Same-CRS tiles skip reprojection entirely. When source and target CRS match, tiles are placed into the output grid by direct coordinate alignment (array slicing). Falls back to full reprojection if resolutions differ by >1%. 2. try_numba_transform now bails before allocating coordinate arrays when neither CRS side is a supported geographic system. This saved ~100ms per tile for unsupported pairs. Merge benchmark (4 overlapping WGS84 tiles): 512x512: 13ms (was 59ms, now 2.3x faster than rioxarray) 1024x1024: 48ms (was 293ms, now 2.6x faster than rioxarray) 2048x2048: 344ms (was 2520ms, now 1.6x faster than rioxarray)
1 parent 26ed84c commit c535ef2

File tree

2 files changed

+101
-9
lines changed

2 files changed

+101
-9
lines changed

xrspatial/reproject/__init__.py

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -715,21 +715,103 @@ def merge(
715715
return result
716716

717717

718+
def _place_same_crs(src_data, src_bounds, src_shape, y_desc,
719+
out_bounds, out_shape, nodata):
720+
"""Place a same-CRS tile into the output grid by coordinate alignment.
721+
722+
No reprojection needed -- just index the output rows/columns that
723+
overlap with the source tile and copy the data.
724+
"""
725+
out_h, out_w = out_shape
726+
src_h, src_w = src_shape
727+
o_left, o_bottom, o_right, o_top = out_bounds
728+
s_left, s_bottom, s_right, s_top = src_bounds
729+
730+
o_res_x = (o_right - o_left) / out_w
731+
o_res_y = (o_top - o_bottom) / out_h
732+
s_res_x = (s_right - s_left) / src_w
733+
s_res_y = (s_top - s_bottom) / src_h
734+
735+
# Output pixel range that this tile covers
736+
col_start = int(round((s_left - o_left) / o_res_x))
737+
col_end = int(round((s_right - o_left) / o_res_x))
738+
row_start = int(round((o_top - s_top) / o_res_y))
739+
row_end = int(round((o_top - s_bottom) / o_res_y))
740+
741+
# Clip to output bounds
742+
col_start_clip = max(0, col_start)
743+
col_end_clip = min(out_w, col_end)
744+
row_start_clip = max(0, row_start)
745+
row_end_clip = min(out_h, row_end)
746+
747+
if col_start_clip >= col_end_clip or row_start_clip >= row_end_clip:
748+
return np.full(out_shape, nodata, dtype=np.float64)
749+
750+
# Source pixel range (handle offset if tile extends beyond output)
751+
src_col_start = col_start_clip - col_start
752+
src_row_start = row_start_clip - row_start
753+
754+
# Resolutions may differ slightly; if close enough, do direct copy
755+
res_ratio_x = s_res_x / o_res_x
756+
res_ratio_y = s_res_y / o_res_y
757+
if abs(res_ratio_x - 1.0) > 0.01 or abs(res_ratio_y - 1.0) > 0.01:
758+
return None # resolutions too different, fall back to reproject
759+
760+
out_data = np.full(out_shape, nodata, dtype=np.float64)
761+
n_rows = row_end_clip - row_start_clip
762+
n_cols = col_end_clip - col_start_clip
763+
764+
# Clamp source window
765+
src_r_end = min(src_row_start + n_rows, src_h)
766+
src_c_end = min(src_col_start + n_cols, src_w)
767+
actual_rows = src_r_end - src_row_start
768+
actual_cols = src_c_end - src_col_start
769+
770+
if actual_rows <= 0 or actual_cols <= 0:
771+
return out_data
772+
773+
src_window = np.asarray(src_data[src_row_start:src_r_end,
774+
src_col_start:src_c_end],
775+
dtype=np.float64)
776+
out_data[row_start_clip:row_start_clip + actual_rows,
777+
col_start_clip:col_start_clip + actual_cols] = src_window
778+
return out_data
779+
780+
718781
def _merge_inmemory(
719782
raster_infos, tgt_wkt, out_bounds, out_shape,
720783
resampling, nodata, strategy,
721784
):
722-
"""In-memory merge using numpy."""
785+
"""In-memory merge using numpy.
786+
787+
Detects same-CRS tiles and uses fast direct placement instead
788+
of reprojection.
789+
"""
790+
from ._crs_utils import _require_pyproj
791+
pyproj = _require_pyproj()
792+
tgt_crs = pyproj.CRS.from_wkt(tgt_wkt)
793+
723794
arrays = []
724795
for info in raster_infos:
725-
reprojected = _reproject_chunk_numpy(
726-
info['raster'].values,
727-
info['src_bounds'], info['src_shape'], info['y_desc'],
728-
info['src_wkt'], tgt_wkt,
729-
out_bounds, out_shape,
730-
resampling, nodata, 16,
731-
)
732-
arrays.append(reprojected)
796+
# Check if source CRS matches target (no reprojection needed)
797+
placed = None
798+
if info['src_crs'] == tgt_crs:
799+
placed = _place_same_crs(
800+
info['raster'].values,
801+
info['src_bounds'], info['src_shape'], info['y_desc'],
802+
out_bounds, out_shape, nodata,
803+
)
804+
if placed is not None:
805+
arrays.append(placed)
806+
else:
807+
reprojected = _reproject_chunk_numpy(
808+
info['raster'].values,
809+
info['src_bounds'], info['src_shape'], info['y_desc'],
810+
info['src_wkt'], tgt_wkt,
811+
out_bounds, out_shape,
812+
resampling, nodata, 16,
813+
)
814+
arrays.append(reprojected)
733815
return _merge_arrays_numpy(arrays, nodata, strategy)
734816

735817

xrspatial/reproject/_projections.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,16 @@ def try_numba_transform(src_crs, tgt_crs, chunk_bounds, chunk_shape):
14541454
res_x = (right - left) / width
14551455
res_y = (top - bottom) / height
14561456

1457+
# Quick bail: if neither side is a geographic CRS we support, no fast path.
1458+
# This avoids the expensive array allocation below for unsupported pairs
1459+
# (e.g. same-CRS identity transforms in merge).
1460+
src_is_geo = _is_supported_geographic(src_epsg)
1461+
tgt_is_geo = _is_supported_geographic(tgt_epsg)
1462+
if not src_is_geo and not tgt_is_geo:
1463+
# Neither side is geographic -- can't be a supported pair
1464+
# (all our fast paths have geographic on one side)
1465+
return None
1466+
14571467
# Build output coordinate arrays (target CRS)
14581468
col_1d = np.arange(width, dtype=np.float64)
14591469
row_1d = np.arange(height, dtype=np.float64)

0 commit comments

Comments
 (0)