Skip to content

Commit 7527e56

Browse files
committed
geotiff: refuse VRT non-nearest resample algorithms (#1751)
ComplexSource parses <ResampleAlg> from VRT XML but read_vrt always calls _resample_nearest in the placement step, so a VRT declaring Bilinear / Cubic / CubicSpline / Lanczos / Average / Mode silently received nearest-sampled pixels mislabelled as the requested algorithm. Raise NotImplementedError at the resample call site when src.resample_alg is not nearest (case-insensitive: '', None, 'Nearest', 'NearestNeighbour', 'NearestNeighbor', 'NEAR' all pass). The check sits in the ``needs_resample`` branch rather than at parse time so a ComplexSource declaring Bilinear with matching SrcRect/DstRect sizes is still accepted -- no kernel runs, so no mislabelled output. Implementing the higher-quality kernels is out of scope here; the goal is to stop the silent wrong-numbers behaviour. Closes #1751.
1 parent e7b9cde commit 7527e56

2 files changed

Lines changed: 187 additions & 7 deletions

File tree

xrspatial/geotiff/_vrt.py

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,11 @@ class _Source:
150150
offset: float | None = None
151151
# GDAL ``<ComplexSource><ResampleAlg>`` values are ``NearestNeighbour``
152152
# (default), ``Bilinear``, ``Cubic``, ``CubicSpline``, ``Lanczos``,
153-
# ``Average``, ``Mode``. We parse the value so we can advertise it,
154-
# but only nearest-neighbour is implemented in the placement step
155-
# (issue #1694). Higher-quality resamplers are tracked for follow-up.
153+
# ``Average``, ``Mode``. Only nearest-neighbour is implemented in
154+
# the placement step (issue #1694); the resample site raises
155+
# ``NotImplementedError`` for any other declared algorithm rather
156+
# than silently substituting nearest (issue #1751). Higher-quality
157+
# resamplers are tracked for follow-up.
156158
resample_alg: str | None = None
157159

158160

@@ -353,10 +355,12 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
353355
scale = float(offset_str)
354356
if scale_str:
355357
offset = float(scale_str)
356-
# ``<ResampleAlg>`` is informational for the placement
357-
# step: read_vrt currently always nearest-neighbours, so
358-
# we only record the value for later honouring. See
359-
# issue #1694.
358+
# ``<ResampleAlg>`` records the requested resampler for
359+
# the placement step. ``read_vrt`` only implements
360+
# nearest-neighbour today, so the resample site refuses
361+
# the read for any other declared algorithm rather than
362+
# returning silently-mislabelled pixels. See issues
363+
# #1694 and #1751.
360364
resample_alg = _text(src_elem, 'ResampleAlg')
361365

362366
sources.append(_Source(
@@ -388,6 +392,44 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
388392
)
389393

390394

395+
# GDAL ``<ResampleAlg>`` values that are equivalent to nearest-neighbour
396+
# (or that explicitly request it). Comparison is case-insensitive and
397+
# done on the stripped element text. Empty / missing text is also
398+
# treated as nearest because the GDAL default for a SimpleSource with
399+
# no ``ResampleAlg`` child is nearest. See issue #1751.
400+
_NEAREST_RESAMPLE_ALGS = frozenset({
401+
'', 'nearest', 'nearestneighbour', 'nearestneighbor', 'near',
402+
})
403+
404+
405+
def _check_resample_alg_supported(resample_alg: str | None,
406+
filename: str) -> None:
407+
"""Raise ``NotImplementedError`` for unsupported VRT resample algorithms.
408+
409+
``ComplexSource`` may declare ``<ResampleAlg>`` values such as
410+
``Bilinear``, ``Cubic``, ``CubicSpline``, ``Lanczos``, ``Average``,
411+
or ``Mode``. ``read_vrt`` only implements nearest-neighbour
412+
placement, so a VRT that asks for any of the higher-quality
413+
resamplers would silently return nearest-sampled pixels mislabelled
414+
as the requested algorithm. Refuse the read instead of returning
415+
quietly wrong numbers. See issue #1751.
416+
"""
417+
if resample_alg is None:
418+
return
419+
norm = resample_alg.strip().lower()
420+
if norm in _NEAREST_RESAMPLE_ALGS:
421+
return
422+
raise NotImplementedError(
423+
f"VRT ComplexSource for '{filename}' requests "
424+
f"<ResampleAlg>{resample_alg}</ResampleAlg>, but read_vrt only "
425+
f"implements nearest-neighbour resampling. Returning "
426+
f"nearest-sampled pixels would silently mislabel them as "
427+
f"'{resample_alg}'. Re-export the VRT with "
428+
f"<ResampleAlg>Nearest</ResampleAlg> or matching SrcRect/DstRect "
429+
f"sizes if nearest-neighbour is acceptable. See issue #1751."
430+
)
431+
432+
391433
def _resample_nearest(src_arr: np.ndarray,
392434
out_h: int, out_w: int) -> np.ndarray:
393435
"""Nearest-neighbour resample ``src_arr`` to ``(out_h, out_w)``.
@@ -796,6 +838,13 @@ def read_vrt(vrt_path: str, *, window=None,
796838
src_arr = src_arr.astype(np.float64) + src.offset
797839

798840
if needs_resample:
841+
# Refuse VRTs that request a non-nearest resample
842+
# algorithm. ``_resample_nearest`` below is the only
843+
# implemented kernel; silently substituting it for
844+
# ``Bilinear`` / ``Cubic`` / ``Average`` / ``Mode``
845+
# would return wrong numbers under the user's
846+
# configured algorithm name. See issue #1751.
847+
_check_resample_alg_supported(src.resample_alg, src.filename)
799848
# Resample the full source rect to the destination rect
800849
# shape, then clip to the window subregion. Doing the
801850
# resample on the full rect keeps the nearest-neighbour
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""Regression tests for issue #1751.
2+
3+
``read_vrt`` parses ``<ResampleAlg>`` from a ``ComplexSource`` but always
4+
calls ``_resample_nearest`` during placement, regardless of the parsed
5+
algorithm. A VRT declaring ``Bilinear`` / ``Cubic`` / ``Average`` /
6+
``Mode`` therefore receives nearest-sampled pixels mislabelled as the
7+
requested algorithm -- silently wrong analytics.
8+
9+
The fix raises ``NotImplementedError`` at the resample call site when
10+
the source declares an unsupported algorithm and the SrcRect/DstRect
11+
sizes actually differ (a 1:1 placement is nearest-equivalent and stays
12+
permissive). Nearest, NearestNeighbour, NEAR, empty, and an absent
13+
``<ResampleAlg>`` element are all accepted.
14+
"""
15+
from __future__ import annotations
16+
17+
import numpy as np
18+
import pytest
19+
20+
from xrspatial.geotiff._vrt import read_vrt
21+
from xrspatial.geotiff._writer import write
22+
23+
24+
def _write_src(tmp_path) -> str:
25+
"""Write a 4x4 uint16 source TIFF and return its path."""
26+
src = np.arange(16, dtype=np.uint16).reshape(4, 4)
27+
src_path = str(tmp_path / 'src.tif')
28+
write(src, src_path, compression='none', tiled=False)
29+
return src_path
30+
31+
32+
def _write_vrt(tmp_path, xml: str, name: str = 'test.vrt') -> str:
33+
p = str(tmp_path / name)
34+
with open(p, 'w') as f:
35+
f.write(xml)
36+
return p
37+
38+
39+
def _vrt_xml(src_path: str, *, alg_elem: str,
40+
dst_x: int = 2, dst_y: int = 2) -> str:
41+
"""Render a VRT XML with a 4x4 SrcRect and configurable DstRect+Alg.
42+
43+
``alg_elem`` is the raw ``<ResampleAlg>...</ResampleAlg>`` element
44+
to splice into the ``<ComplexSource>``, or the empty string to
45+
omit it entirely.
46+
"""
47+
return f"""<VRTDataset rasterXSize="{dst_x}" rasterYSize="{dst_y}">
48+
<GeoTransform>0.0, 2.0, 0.0, 0.0, 0.0, -2.0</GeoTransform>
49+
<VRTRasterBand dataType="UInt16" band="1">
50+
<ComplexSource>
51+
<SourceFilename relativeToVRT="0">{src_path}</SourceFilename>
52+
<SourceBand>1</SourceBand>
53+
<SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>
54+
<DstRect xOff="0" yOff="0" xSize="{dst_x}" ySize="{dst_y}"/>
55+
{alg_elem}
56+
</ComplexSource>
57+
</VRTRasterBand>
58+
</VRTDataset>"""
59+
60+
61+
@pytest.mark.parametrize('alg', ['Bilinear', 'Cubic', 'CubicSpline',
62+
'Lanczos', 'Average', 'Mode'])
63+
def test_unsupported_resample_alg_raises(tmp_path, alg):
64+
"""A ComplexSource declaring any non-nearest algorithm with a size
65+
change must raise ``NotImplementedError`` rather than return
66+
silently nearest-sampled pixels."""
67+
src_path = _write_src(tmp_path)
68+
xml = _vrt_xml(src_path,
69+
alg_elem=f'<ResampleAlg>{alg}</ResampleAlg>')
70+
vrt_path = _write_vrt(tmp_path, xml, f'{alg.lower()}.vrt')
71+
72+
with pytest.raises(NotImplementedError) as excinfo:
73+
read_vrt(vrt_path)
74+
msg = str(excinfo.value)
75+
assert alg in msg
76+
assert '1751' in msg
77+
78+
79+
def test_unsupported_resample_alg_case_insensitive(tmp_path):
80+
"""Algorithm names are matched case-insensitively: ``bilinear``
81+
(lowercase) is the same unsupported request as ``Bilinear``."""
82+
src_path = _write_src(tmp_path)
83+
xml = _vrt_xml(src_path,
84+
alg_elem='<ResampleAlg>bilinear</ResampleAlg>')
85+
vrt_path = _write_vrt(tmp_path, xml, 'lower.vrt')
86+
87+
with pytest.raises(NotImplementedError, match='bilinear'):
88+
read_vrt(vrt_path)
89+
90+
91+
@pytest.mark.parametrize('alg', ['Nearest', 'NearestNeighbour',
92+
'NearestNeighbor', 'NEAR', 'nearest',
93+
'NEAREST', ''])
94+
def test_nearest_variants_accepted(tmp_path, alg):
95+
"""Nearest (and its case / spelling variants, plus empty text) is
96+
the implemented algorithm and must round-trip without raising."""
97+
src_path = _write_src(tmp_path)
98+
xml = _vrt_xml(src_path,
99+
alg_elem=f'<ResampleAlg>{alg}</ResampleAlg>')
100+
vrt_path = _write_vrt(tmp_path, xml, f'near_{alg or "empty"}.vrt')
101+
102+
arr, _ = read_vrt(vrt_path)
103+
assert arr.shape == (2, 2)
104+
105+
106+
def test_missing_resample_alg_accepted(tmp_path):
107+
"""Absent ``<ResampleAlg>`` (GDAL's nearest default) must still
108+
round-trip without raising."""
109+
src_path = _write_src(tmp_path)
110+
xml = _vrt_xml(src_path, alg_elem='')
111+
vrt_path = _write_vrt(tmp_path, xml, 'absent.vrt')
112+
113+
arr, _ = read_vrt(vrt_path)
114+
assert arr.shape == (2, 2)
115+
116+
117+
def test_bilinear_at_same_size_does_not_raise(tmp_path):
118+
"""A ``Bilinear`` declaration with matching SrcRect/DstRect sizes
119+
is nearest-equivalent (no resample step runs) so the read is
120+
accepted. This pins down the resample-site placement of the
121+
check -- a parse-time check would have rejected this case too."""
122+
src_path = _write_src(tmp_path)
123+
# SrcRect 4x4, DstRect 4x4: needs_resample is False, no kernel
124+
# call, no silent wrongness.
125+
xml = _vrt_xml(src_path,
126+
alg_elem='<ResampleAlg>Bilinear</ResampleAlg>',
127+
dst_x=4, dst_y=4)
128+
vrt_path = _write_vrt(tmp_path, xml, 'bilinear_1to1.vrt')
129+
130+
arr, _ = read_vrt(vrt_path)
131+
assert arr.shape == (4, 4)

0 commit comments

Comments
 (0)