Skip to content

Commit 9736c96

Browse files
authored
Validate read_vrt window against VRT extent (#1697) (#1698)
* Validate read_vrt window against VRT extent (#1697) The local TIFF path (#1634) and HTTP COG path (#1669) reject out-of-bounds, zero-size, and inverted windows up front with a ValueError. The VRT path silently clamped the window to the parsed extent instead, which returned a smaller array than open_geotiff's caller-built coord arrays and surfaced as a CoordinateValidationError deep inside xarray. Replace the clamp with the same validator the other two paths use, swapping "source extent" to "VRT extent" since the dimensions come from the parsed VRT rather than a single source file. Fixes #1697 * Reword stale read_vrt window-validation comment (PR #1698 review) The block comment above the window check still referenced "the clamp below", but the clamp logic was removed in #1697. Reword to describe the prior behavior (per-source reads silently clamped out-of-bounds windows) and why we now validate up front, keeping the issue reference so future readers can trace history.
1 parent 33a8b7d commit 9736c96

2 files changed

Lines changed: 243 additions & 4 deletions

File tree

xrspatial/geotiff/_vrt.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,24 @@ def read_vrt(vrt_path: str, *, window=None,
410410
f"band index {band} out of range for VRT with "
411411
f"{len(vrt.bands)} band(s)")
412412

413+
# Validate ``window`` against the VRT's parsed extent before any
414+
# source reads. Prior to #1697, per-source reads silently clamped
415+
# out-of-bounds windows to the source extent and returned a smaller
416+
# array, which then mismatched caller-built coord arrays in
417+
# ``open_geotiff`` and surfaced as an opaque
418+
# ``CoordinateValidationError`` far from the real cause. We now
419+
# reject invalid windows up front. Mirrors the local-path validator
420+
# in ``read_to_array`` (#1634) and the HTTP path validator in
421+
# ``_read_cog_http`` (#1669) so all backends agree on the contract.
422+
# See issue #1697.
413423
if window is not None:
414424
r0, c0, r1, c1 = window
415-
r0 = max(0, r0)
416-
c0 = max(0, c0)
417-
r1 = min(vrt.height, r1)
418-
c1 = min(vrt.width, c1)
425+
if (r0 < 0 or c0 < 0
426+
or r1 > vrt.height or c1 > vrt.width
427+
or r0 >= r1 or c0 >= c1):
428+
raise ValueError(
429+
f"window={window} is outside the VRT extent "
430+
f"({vrt.height}x{vrt.width}) or has non-positive size.")
419431
else:
420432
r0, c0, r1, c1 = 0, 0, vrt.height, vrt.width
421433

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
"""Regression tests for issue #1697.
2+
3+
``read_vrt(path, window=...)`` silently clamped invalid window
4+
coordinates instead of raising ``ValueError``. The local TIFF path
5+
(#1634) and the HTTP COG path (#1669) both reject the same bad
6+
windows up front. VRT was missed when #1634 landed.
7+
8+
The hidden failure mode is the same one #1634 fixed: ``open_geotiff``
9+
builds the y/x coord arrays from the caller-supplied window indices,
10+
so a clamped read returns a smaller array than the coord arrays and
11+
xarray raises a ``CoordinateValidationError`` deep inside its
12+
constructor instead of a clear xrspatial-level error.
13+
14+
These tests pin the VRT path to the same contract as the local and
15+
HTTP paths: out-of-bounds, zero-size, or inverted windows raise
16+
``ValueError`` with a message of the same shape (one "extent" word
17+
swap from ``source extent`` to ``VRT extent``).
18+
"""
19+
from __future__ import annotations
20+
21+
import os
22+
import uuid
23+
24+
import numpy as np
25+
import pytest
26+
import xarray as xr
27+
28+
from xrspatial.geotiff import to_geotiff
29+
from xrspatial.geotiff._reader import read_to_array
30+
from xrspatial.geotiff._vrt import read_vrt
31+
32+
33+
def _unique_dir(tmp_path, label: str) -> str:
34+
"""Return a sub-path under ``tmp_path`` with a uuid suffix so
35+
parallel test workers cannot collide on the same name."""
36+
d = tmp_path / f"vrt_1697_{label}_{uuid.uuid4().hex[:8]}"
37+
d.mkdir()
38+
return str(d)
39+
40+
41+
def _write_tif(path: str, size: int = 4) -> None:
42+
"""Write a ``size``x``size`` float32 GeoTIFF the VRT can wrap."""
43+
arr = np.arange(size * size, dtype=np.float32).reshape(size, size)
44+
y = np.linspace(float(size) - 0.5, 0.5, size)
45+
x = np.linspace(0.5, float(size) - 0.5, size)
46+
da = xr.DataArray(
47+
arr, dims=['y', 'x'],
48+
coords={'y': y, 'x': x},
49+
attrs={'crs': 4326},
50+
)
51+
to_geotiff(da, path, compression='none')
52+
53+
54+
def _write_vrt(vrt_path: str, source_filename: str, size: int = 4) -> None:
55+
"""Write a single-band VRT of dimension ``size``x``size`` pointing
56+
at ``source_filename`` (relative to the VRT directory)."""
57+
xml = (
58+
f'<VRTDataset rasterXSize="{size}" rasterYSize="{size}">\n'
59+
' <GeoTransform>0, 1, 0, 0, 0, -1</GeoTransform>\n'
60+
' <VRTRasterBand dataType="Float32" band="1">\n'
61+
' <SimpleSource>\n'
62+
f' <SourceFilename relativeToVRT="1">{source_filename}'
63+
'</SourceFilename>\n'
64+
' <SourceBand>1</SourceBand>\n'
65+
f' <SrcRect xOff="0" yOff="0" xSize="{size}" ySize="{size}"/>\n'
66+
f' <DstRect xOff="0" yOff="0" xSize="{size}" ySize="{size}"/>\n'
67+
' </SimpleSource>\n'
68+
' </VRTRasterBand>\n'
69+
'</VRTDataset>\n'
70+
)
71+
with open(vrt_path, 'w') as f:
72+
f.write(xml)
73+
74+
75+
@pytest.fixture
76+
def vrt_4x4(tmp_path):
77+
"""Return a path to a 4x4 single-band VRT wrapping a 4x4 TIFF."""
78+
d = _unique_dir(tmp_path, "fixture")
79+
tif = os.path.join(d, 'data.tif')
80+
_write_tif(tif, size=4)
81+
vrt = os.path.join(d, 'mosaic.vrt')
82+
_write_vrt(vrt, 'data.tif', size=4)
83+
return vrt
84+
85+
86+
# ---------------------------------------------------------------------------
87+
# Negative starts
88+
# ---------------------------------------------------------------------------
89+
90+
91+
def test_negative_r0_raises_value_error(vrt_4x4):
92+
"""``r0 < 0`` raises ValueError instead of being clamped to 0."""
93+
with pytest.raises(ValueError, match='outside the VRT extent'):
94+
read_vrt(vrt_4x4, window=(-1, 0, 2, 2))
95+
96+
97+
def test_negative_c0_raises_value_error(vrt_4x4):
98+
"""``c0 < 0`` raises ValueError instead of being clamped to 0."""
99+
with pytest.raises(ValueError, match='outside the VRT extent'):
100+
read_vrt(vrt_4x4, window=(0, -1, 2, 2))
101+
102+
103+
# ---------------------------------------------------------------------------
104+
# Past-edge stops
105+
# ---------------------------------------------------------------------------
106+
107+
108+
def test_r1_past_bottom_edge_raises_value_error(vrt_4x4):
109+
"""``r1 > vrt.height`` raises instead of being clamped to vrt.height."""
110+
with pytest.raises(ValueError, match='outside the VRT extent'):
111+
read_vrt(vrt_4x4, window=(0, 0, 5, 4))
112+
113+
114+
def test_c1_past_right_edge_raises_value_error(vrt_4x4):
115+
"""``c1 > vrt.width`` raises instead of being clamped to vrt.width."""
116+
with pytest.raises(ValueError, match='outside the VRT extent'):
117+
read_vrt(vrt_4x4, window=(0, 0, 4, 5))
118+
119+
120+
def test_window_past_both_edges_raises_value_error(vrt_4x4):
121+
"""Windows past both right and bottom edges raise the same error."""
122+
with pytest.raises(ValueError, match='outside the VRT extent'):
123+
read_vrt(vrt_4x4, window=(0, 0, 10, 10))
124+
125+
126+
# ---------------------------------------------------------------------------
127+
# Zero-size and inverted windows
128+
# ---------------------------------------------------------------------------
129+
130+
131+
def test_zero_size_row_window_raises_value_error(vrt_4x4):
132+
"""``r0 == r1`` produces a zero-height window and must raise."""
133+
with pytest.raises(ValueError, match='non-positive size'):
134+
read_vrt(vrt_4x4, window=(2, 0, 2, 4))
135+
136+
137+
def test_zero_size_col_window_raises_value_error(vrt_4x4):
138+
"""``c0 == c1`` produces a zero-width window and must raise."""
139+
with pytest.raises(ValueError, match='non-positive size'):
140+
read_vrt(vrt_4x4, window=(0, 2, 4, 2))
141+
142+
143+
def test_fully_zero_size_window_raises_value_error(vrt_4x4):
144+
"""``r0 == r1 and c0 == c1`` raises (current code returned a (0, 0) array)."""
145+
with pytest.raises(ValueError, match='non-positive size'):
146+
read_vrt(vrt_4x4, window=(2, 2, 2, 2))
147+
148+
149+
def test_inverted_row_window_raises_value_error(vrt_4x4):
150+
"""``r0 > r1`` is degenerate and must raise."""
151+
with pytest.raises(ValueError, match='non-positive size'):
152+
read_vrt(vrt_4x4, window=(3, 0, 1, 4))
153+
154+
155+
def test_inverted_col_window_raises_value_error(vrt_4x4):
156+
"""``c0 > c1`` is degenerate and must raise."""
157+
with pytest.raises(ValueError, match='non-positive size'):
158+
read_vrt(vrt_4x4, window=(0, 3, 4, 1))
159+
160+
161+
# ---------------------------------------------------------------------------
162+
# Valid in-bounds windows still work (don't over-reject)
163+
# ---------------------------------------------------------------------------
164+
165+
166+
def test_full_extent_window_still_works(vrt_4x4):
167+
"""A window covering the full VRT extent still reads the full array."""
168+
arr, _ = read_vrt(vrt_4x4, window=(0, 0, 4, 4))
169+
assert arr.shape == (4, 4)
170+
171+
172+
def test_interior_window_still_works(vrt_4x4):
173+
"""An interior window returns the requested subset shape."""
174+
arr, _ = read_vrt(vrt_4x4, window=(1, 1, 3, 3))
175+
assert arr.shape == (2, 2)
176+
177+
178+
def test_edge_aligned_window_still_works(vrt_4x4):
179+
"""A window that touches but does not exceed the edges is accepted."""
180+
arr, _ = read_vrt(vrt_4x4, window=(2, 2, 4, 4))
181+
assert arr.shape == (2, 2)
182+
183+
184+
def test_none_window_still_returns_full_array(vrt_4x4):
185+
"""``window=None`` still returns the full VRT extent."""
186+
arr, _ = read_vrt(vrt_4x4)
187+
assert arr.shape == (4, 4)
188+
189+
190+
# ---------------------------------------------------------------------------
191+
# Cross-path parity: VRT and local-TIFF reject the same bad window
192+
# ---------------------------------------------------------------------------
193+
194+
195+
def test_vrt_and_local_paths_share_window_validation(tmp_path):
196+
"""Same bad window rejected on both VRT and local-TIFF paths with the
197+
same error class and message shape (one word swap is fine)."""
198+
d = _unique_dir(tmp_path, "parity")
199+
tif = os.path.join(d, 'data.tif')
200+
_write_tif(tif, size=4)
201+
vrt = os.path.join(d, 'mosaic.vrt')
202+
_write_vrt(vrt, 'data.tif', size=4)
203+
204+
bad_window = (-1, 0, 2, 2)
205+
206+
with pytest.raises(ValueError) as vrt_exc:
207+
read_vrt(vrt, window=bad_window)
208+
with pytest.raises(ValueError) as local_exc:
209+
read_to_array(tif, window=bad_window)
210+
211+
vrt_msg = str(vrt_exc.value)
212+
local_msg = str(local_exc.value)
213+
214+
# Both messages must echo the offending window and the source dims.
215+
assert 'window=' in vrt_msg
216+
assert 'window=' in local_msg
217+
assert '4x4' in vrt_msg
218+
assert '4x4' in local_msg
219+
# And both must signal "out of bounds" via the same wording template,
220+
# with a single word swap on the extent label.
221+
assert 'extent' in vrt_msg
222+
assert 'extent' in local_msg
223+
assert 'non-positive size' in vrt_msg
224+
assert 'non-positive size' in local_msg
225+
# The VRT path says "VRT extent"; the local path says "source extent".
226+
assert 'VRT extent' in vrt_msg
227+
assert 'source extent' in local_msg

0 commit comments

Comments
 (0)