Skip to content

Commit a4b7507

Browse files
authored
Fix CuPy uint8 overflow and CUDA cubic NaN fallback (#1054) (#1064)
Two remaining gaps from issue #1054: 1. CuPy resampling paths lacked integer clipping, so cubic ringing on uint8 data could silently wrap (e.g. 260 -> 4). Both _resample_cupy_native and _resample_cupy now clip results before casting, matching the NumPy path. 2. The CUDA cubic kernel wrote nodata when any of the 16 neighbors was NaN. It now falls back to bilinear with weight renormalization, matching the CPU Numba JIT behavior.
1 parent 784168f commit a4b7507

File tree

2 files changed

+210
-6
lines changed

2 files changed

+210
-6
lines changed

xrspatial/reproject/_interpolate.py

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -649,10 +649,48 @@ def _resample_cubic_cuda(src, row_coords, col_coords, out, nodata):
649649
rv3 += sv * wc3
650650
val += rv3 * wr3
651651

652-
if has_nan:
653-
out[i, j] = nodata
654-
else:
652+
if not has_nan:
655653
out[i, j] = val
654+
else:
655+
# Fall back to bilinear with weight renormalization
656+
r1b = r0 + 1
657+
c1b = c0 + 1
658+
dr = r - r0
659+
dc = c - c0
660+
661+
w00 = (1.0 - dr) * (1.0 - dc)
662+
w01 = (1.0 - dr) * dc
663+
w10 = dr * (1.0 - dc)
664+
w11 = dr * dc
665+
666+
accum = 0.0
667+
wsum = 0.0
668+
669+
if 0 <= r0 < sh and 0 <= c0 < sw:
670+
v = src[r0, c0]
671+
if v == v:
672+
accum += w00 * v
673+
wsum += w00
674+
if 0 <= r0 < sh and 0 <= c1b < sw:
675+
v = src[r0, c1b]
676+
if v == v:
677+
accum += w01 * v
678+
wsum += w01
679+
if 0 <= r1b < sh and 0 <= c0 < sw:
680+
v = src[r1b, c0]
681+
if v == v:
682+
accum += w10 * v
683+
wsum += w10
684+
if 0 <= r1b < sh and 0 <= c1b < sw:
685+
v = src[r1b, c1b]
686+
if v == v:
687+
accum += w11 * v
688+
wsum += w11
689+
690+
if wsum > 1e-10:
691+
out[i, j] = accum / wsum
692+
else:
693+
out[i, j] = nodata
656694

657695

658696
# ---------------------------------------------------------------------------
@@ -722,19 +760,32 @@ def _resample_cupy_native(source_window, src_row_coords, src_col_coords,
722760
work, rc, cc, out, nd
723761
)
724762
if is_integer:
725-
out = cp.round(out).astype(source_window.dtype)
763+
info = cp.iinfo(source_window.dtype)
764+
out = cp.clip(cp.round(out), info.min, info.max).astype(
765+
source_window.dtype
766+
)
726767
return out
727768

728769
if order == 1:
729770
_resample_bilinear_cuda[blocks_per_grid, threads_per_block](
730771
work, rc, cc, out, nd
731772
)
773+
if is_integer:
774+
info = cp.iinfo(source_window.dtype)
775+
out = cp.clip(cp.round(out), info.min, info.max).astype(
776+
source_window.dtype
777+
)
732778
return out
733779

734780
# Cubic
735781
_resample_cubic_cuda[blocks_per_grid, threads_per_block](
736782
work, rc, cc, out, nd
737783
)
784+
if is_integer:
785+
info = cp.iinfo(source_window.dtype)
786+
out = cp.clip(cp.round(out), info.min, info.max).astype(
787+
source_window.dtype
788+
)
738789
return out
739790

740791

@@ -797,7 +848,10 @@ def _resample_cupy(source_window, src_row_coords, src_col_coords,
797848

798849
result[oob] = nodata
799850

800-
if is_integer and resampling == 'nearest':
801-
result = cp.round(result).astype(source_window.dtype)
851+
if is_integer:
852+
info = cp.iinfo(source_window.dtype)
853+
result = cp.clip(cp.round(result), info.min, info.max).astype(
854+
source_window.dtype
855+
)
802856

803857
return result

xrspatial/tests/test_reproject.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,3 +1009,153 @@ def test_merge_single_tile(self):
10091009
)
10101010
r = merge([t])
10111011
assert np.any(np.isfinite(r.values))
1012+
1013+
1014+
# ---------------------------------------------------------------------------
1015+
# CuPy resampler unit tests (integer clipping + cubic NaN fallback)
1016+
# ---------------------------------------------------------------------------
1017+
1018+
@pytest.mark.skipif(not HAS_CUPY, reason="CuPy not installed")
1019+
class TestCuPyResamplerClipping:
1020+
"""Verify uint8 overflow protection in CuPy resampling paths."""
1021+
1022+
def _sharp_edge_inputs(self):
1023+
"""Build a uint8 source with a sharp 0->255 edge and coordinate grids
1024+
that place sample points right at the transition (where cubic ringing
1025+
produces out-of-range values)."""
1026+
src = np.zeros((16, 16), dtype=np.float64)
1027+
src[:, 8:] = 255.0
1028+
1029+
# Sample at half-pixel offsets across the edge
1030+
rows, cols = np.meshgrid(
1031+
np.linspace(2, 13, 24), np.linspace(6.5, 9.5, 24), indexing='ij'
1032+
)
1033+
return src, rows.astype(np.float64), cols.astype(np.float64)
1034+
1035+
def test_cupy_native_nearest_uint8_clamp(self):
1036+
from xrspatial.reproject._interpolate import _resample_cupy_native
1037+
src, rows, cols = self._sharp_edge_inputs()
1038+
src_gpu = cp.asarray(np.zeros((16, 16), dtype=np.uint8))
1039+
src_gpu[:, 8:] = 255
1040+
result = _resample_cupy_native(src_gpu, rows, cols,
1041+
resampling='nearest', nodata=np.nan)
1042+
assert result.dtype == np.uint8
1043+
vals = cp.asnumpy(result)
1044+
assert np.all((vals == 0) | (vals == 255) | np.isnan(vals.astype(float)))
1045+
1046+
def test_cupy_native_bilinear_uint8_clamp(self):
1047+
from xrspatial.reproject._interpolate import _resample_cupy_native
1048+
src_gpu = cp.zeros((16, 16), dtype=np.uint8)
1049+
src_gpu[:, 8:] = 255
1050+
_, rows, cols = self._sharp_edge_inputs()
1051+
result = _resample_cupy_native(src_gpu, rows, cols,
1052+
resampling='bilinear', nodata=np.nan)
1053+
assert result.dtype == np.uint8
1054+
vals = cp.asnumpy(result)
1055+
assert np.all(vals <= 255)
1056+
assert np.all(vals >= 0)
1057+
1058+
def test_cupy_native_cubic_uint8_clamp(self):
1059+
from xrspatial.reproject._interpolate import _resample_cupy_native
1060+
src_gpu = cp.zeros((16, 16), dtype=np.uint8)
1061+
src_gpu[:, 8:] = 255
1062+
_, rows, cols = self._sharp_edge_inputs()
1063+
result = _resample_cupy_native(src_gpu, rows, cols,
1064+
resampling='cubic', nodata=np.nan)
1065+
assert result.dtype == np.uint8
1066+
vals = cp.asnumpy(result)
1067+
assert np.all(vals <= 255)
1068+
assert np.all(vals >= 0)
1069+
1070+
def test_cupy_map_coords_bilinear_uint8_clamp(self):
1071+
from xrspatial.reproject._interpolate import _resample_cupy
1072+
src_gpu = cp.zeros((16, 16), dtype=np.uint8)
1073+
src_gpu[:, 8:] = 255
1074+
_, rows, cols = self._sharp_edge_inputs()
1075+
result = _resample_cupy(src_gpu, rows, cols,
1076+
resampling='bilinear', nodata=np.nan)
1077+
assert result.dtype == np.uint8
1078+
vals = cp.asnumpy(result)
1079+
assert np.all(vals <= 255)
1080+
assert np.all(vals >= 0)
1081+
1082+
def test_cupy_map_coords_cubic_uint8_clamp(self):
1083+
from xrspatial.reproject._interpolate import _resample_cupy
1084+
src_gpu = cp.zeros((16, 16), dtype=np.uint8)
1085+
src_gpu[:, 8:] = 255
1086+
_, rows, cols = self._sharp_edge_inputs()
1087+
result = _resample_cupy(src_gpu, rows, cols,
1088+
resampling='cubic', nodata=np.nan)
1089+
assert result.dtype == np.uint8
1090+
vals = cp.asnumpy(result)
1091+
assert np.all(vals <= 255)
1092+
assert np.all(vals >= 0)
1093+
1094+
1095+
@pytest.mark.skipif(not HAS_CUPY, reason="CuPy not installed")
1096+
class TestCudaCubicNanFallback:
1097+
"""Verify _resample_cubic_cuda falls back to bilinear near NaN instead
1098+
of writing nodata."""
1099+
1100+
def test_cubic_nan_fallback_produces_valid_values(self):
1101+
"""Cubic with a few NaN neighbors should interpolate from valid
1102+
neighbors (bilinear fallback), not produce nodata everywhere."""
1103+
from xrspatial.reproject._interpolate import _resample_cupy_native
1104+
1105+
# 16x16 source with value 100.0, a few NaN pixels scattered
1106+
src = np.full((16, 16), 100.0, dtype=np.float64)
1107+
src[5, 5] = np.nan
1108+
src[10, 10] = np.nan
1109+
1110+
src_gpu = cp.asarray(src)
1111+
1112+
# Sample at points near (but not on) NaN pixels
1113+
rows = np.array([[5.3, 6.0, 10.3, 8.0]], dtype=np.float64)
1114+
cols = np.array([[5.3, 6.0, 10.3, 8.0]], dtype=np.float64)
1115+
1116+
result = _resample_cupy_native(src_gpu, rows, cols,
1117+
resampling='cubic', nodata=np.nan)
1118+
vals = cp.asnumpy(result).ravel()
1119+
1120+
# Points near NaN should get valid interpolated values (bilinear
1121+
# fallback), not NaN. Point (6.0, 6.0) and (8.0, 8.0) are far
1122+
# enough from any NaN that cubic should succeed directly.
1123+
assert np.isfinite(vals[1]), "point far from NaN should be finite"
1124+
assert np.isfinite(vals[3]), "point far from NaN should be finite"
1125+
# Points adjacent to NaN should also be finite via bilinear fallback
1126+
assert np.isfinite(vals[0]), "bilinear fallback should produce finite value near NaN"
1127+
assert np.isfinite(vals[2]), "bilinear fallback should produce finite value near NaN"
1128+
1129+
def test_cubic_nan_fallback_matches_cpu(self):
1130+
"""CUDA cubic NaN fallback should produce values close to the CPU
1131+
Numba JIT version."""
1132+
from xrspatial.reproject._interpolate import (
1133+
_resample_cupy_native,
1134+
_resample_numpy,
1135+
)
1136+
1137+
src = np.full((16, 16), 50.0, dtype=np.float64)
1138+
src[4, 4] = np.nan
1139+
src[7, 12] = np.nan
1140+
1141+
# Sample grid covering the whole raster
1142+
rows, cols = np.meshgrid(
1143+
np.linspace(1, 14, 12), np.linspace(1, 14, 12), indexing='ij'
1144+
)
1145+
rows = rows.astype(np.float64)
1146+
cols = cols.astype(np.float64)
1147+
1148+
cpu_result = _resample_numpy(src, rows, cols,
1149+
resampling='cubic', nodata=np.nan)
1150+
gpu_result = _resample_cupy_native(
1151+
cp.asarray(src), rows, cols,
1152+
resampling='cubic', nodata=np.nan
1153+
)
1154+
gpu_np = cp.asnumpy(gpu_result)
1155+
1156+
# Both should have the same NaN pattern
1157+
np.testing.assert_array_equal(np.isnan(cpu_result), np.isnan(gpu_np))
1158+
# Finite values should match closely
1159+
finite = np.isfinite(cpu_result)
1160+
np.testing.assert_allclose(cpu_result[finite], gpu_np[finite],
1161+
rtol=1e-10)

0 commit comments

Comments
 (0)