Skip to content

Commit 1a22486

Browse files
committed
geotiff: validate VRT writer source compatibility (#1733)
write_vrt previously read metadata from every source but used only the first source's pixel size, sample format + bps (dtype), band count, and CRS to write the VRT. A mismatched source would silently produce a VRT that misplaced pixels or mis-typed data downstream. Enforce the docstring contract: every source must agree with the first on pixel size (within 1e-6 relative tolerance), sample format + bps, band count, and CRS WKT (when both sides set it). Mismatches raise ValueError naming the offending source and field. Document nodata precedence: caller arg wins over the first source's per-band nodata. Adds regression coverage in test_vrt_writer_source_compat_1733.py.
1 parent 77e6e4e commit 1a22486

2 files changed

Lines changed: 187 additions & 5 deletions

File tree

xrspatial/geotiff/_vrt.py

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,10 @@ def write_vrt(vrt_path: str, source_files: list[str], *,
783783
"""Generate a VRT file that mosaics multiple GeoTIFF tiles.
784784
785785
Each source file is placed in the virtual raster based on its
786-
geo transform. Files must share the same CRS and pixel size.
786+
geo transform. All sources must share the same pixel size, dtype
787+
(sample format + bits-per-sample), band count, and CRS. Mismatches
788+
raise ``ValueError`` rather than producing a misplaced or mis-typed
789+
mosaic.
787790
788791
Parameters
789792
----------
@@ -796,10 +799,12 @@ def write_vrt(vrt_path: str, source_files: list[str], *,
796799
crs_wkt : str or None
797800
CRS as WKT string. If None, taken from the first source.
798801
nodata : float, int, or None
799-
NoData value. If None, taken from the first source. Integer
800-
sentinels (e.g. ``65535`` for uint16, ``-9999`` for int32) are
801-
accepted so the surface lines up with the ``nodata`` kwarg on
802-
``to_geotiff`` and ``write_geotiff_gpu``.
802+
NoData value applied to every band of the mosaic. Caller-supplied
803+
value takes precedence; when ``None``, the first source's
804+
per-band nodata is used. Integer sentinels (e.g. ``65535`` for
805+
uint16, ``-9999`` for int32) are accepted so the surface lines up
806+
with the ``nodata`` kwarg on ``to_geotiff`` and
807+
``write_geotiff_gpu``.
803808
804809
Returns
805810
-------
@@ -849,6 +854,65 @@ def write_vrt(vrt_path: str, source_files: list[str], *,
849854
res_x = first['transform'].pixel_width
850855
res_y = first['transform'].pixel_height
851856

857+
# Enforce the docstring contract: every source must agree with the
858+
# first on pixel size, sample format + bits-per-sample (i.e. dtype),
859+
# band count, and CRS WKT (when both sides set it). Without this,
860+
# build_vrt would silently produce a syntactically valid VRT that
861+
# misplaces or mis-types data downstream (issue #1733).
862+
#
863+
# Pixel size is compared with a small relative tolerance: TIFFs
864+
# written by different tools occasionally round the GeoTransform
865+
# slightly, and the existing mosaic-extent math rounds the final
866+
# raster size anyway. Sample format + bps must match exactly because
867+
# the VRT dtype is taken from ``first`` and applied to every band.
868+
_PIXEL_SIZE_RTOL = 1e-6
869+
870+
def _pixel_size_mismatch(a: float, b: float) -> bool:
871+
# Compare magnitudes; pixel_height is negative for the common
872+
# north-up case so a direct equality test would mis-flag a
873+
# source with the opposite sign.
874+
if a == b:
875+
return False
876+
denom = max(abs(a), abs(b))
877+
if denom == 0.0:
878+
return abs(a - b) > 0.0
879+
return abs(a - b) / denom > _PIXEL_SIZE_RTOL
880+
881+
first_crs = first.get('crs_wkt')
882+
for m in sources_meta[1:]:
883+
t = m['transform']
884+
if _pixel_size_mismatch(t.pixel_width, res_x) \
885+
or _pixel_size_mismatch(t.pixel_height, res_y):
886+
raise ValueError(
887+
f"VRT source {m['path']!r} has pixel size "
888+
f"({t.pixel_width}, {t.pixel_height}) which does not "
889+
f"match the first source ({res_x}, {res_y}). All sources "
890+
f"in a VRT must share the same pixel size."
891+
)
892+
if m['sample_format'] != first['sample_format'] \
893+
or m['bps'] != first['bps']:
894+
raise ValueError(
895+
f"VRT source {m['path']!r} has sample_format="
896+
f"{m['sample_format']}, bps={m['bps']} which does not "
897+
f"match the first source (sample_format="
898+
f"{first['sample_format']}, bps={first['bps']}). All "
899+
f"sources in a VRT must share the same dtype."
900+
)
901+
if m['bands'] != first['bands']:
902+
raise ValueError(
903+
f"VRT source {m['path']!r} has {m['bands']} band(s) "
904+
f"which does not match the first source "
905+
f"({first['bands']} band(s)). All sources in a VRT "
906+
f"must share the same band count."
907+
)
908+
m_crs = m.get('crs_wkt')
909+
if first_crs and m_crs and m_crs != first_crs:
910+
raise ValueError(
911+
f"VRT source {m['path']!r} has CRS WKT that does not "
912+
f"match the first source. All sources in a VRT must "
913+
f"share the same CRS."
914+
)
915+
852916
# Compute the bounding box of all sources
853917
all_x0, all_y0, all_x1, all_y1 = [], [], [], []
854918
for m in sources_meta:
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""Regression tests for issue #1733.
2+
3+
``write_vrt`` previously trusted the first source for resolution,
4+
sample format + bps (dtype), band count, and CRS. A mismatched source
5+
would silently produce a VRT that placed pixels incorrectly or
6+
re-interpreted bytes as the wrong dtype downstream.
7+
8+
These tests assert that ``write_vrt`` now rejects mismatched sources
9+
with a clear ``ValueError`` covering each of those properties, and
10+
still accepts sources that match within a small float tolerance on
11+
pixel size.
12+
"""
13+
from __future__ import annotations
14+
15+
import os
16+
import uuid
17+
18+
import numpy as np
19+
import pytest
20+
import xarray as xr
21+
22+
from xrspatial.geotiff import to_geotiff
23+
from xrspatial.geotiff._vrt import write_vrt
24+
25+
26+
def _unique_dir(tmp_path, label: str) -> str:
27+
d = tmp_path / f"vrt_1733_{label}_{uuid.uuid4().hex[:8]}"
28+
d.mkdir()
29+
return str(d)
30+
31+
32+
def _write_tif(path: str, *, h: int, w: int, dtype, bands: int = 1,
33+
px: float = 1.0, py: float = -1.0,
34+
origin_x: float = 0.0, origin_y: float = 100.0,
35+
crs: int | None = 4326) -> None:
36+
if bands == 1:
37+
arr = np.arange(h * w, dtype=dtype).reshape(h, w)
38+
dims = ['y', 'x']
39+
else:
40+
arr = np.arange(h * w * bands, dtype=dtype).reshape(h, w, bands)
41+
dims = ['y', 'x', 'band']
42+
y = origin_y + (np.arange(h) + 0.5) * py
43+
x = origin_x + (np.arange(w) + 0.5) * px
44+
coords = {'y': y, 'x': x}
45+
attrs = {}
46+
if crs is not None:
47+
attrs['crs'] = crs
48+
da = xr.DataArray(arr, dims=dims, coords=coords, attrs=attrs)
49+
to_geotiff(da, path, compression='none')
50+
51+
52+
def test_mismatched_pixel_size_raises(tmp_path):
53+
d = _unique_dir(tmp_path, "px")
54+
a = os.path.join(d, "a.tif")
55+
b = os.path.join(d, "b.tif")
56+
_write_tif(a, h=4, w=4, dtype=np.float32, px=1.0, py=-1.0)
57+
# Place b adjacent so the geometry would otherwise work, but the
58+
# pixel size disagrees.
59+
_write_tif(b, h=4, w=4, dtype=np.float32, px=2.0, py=-2.0,
60+
origin_x=4.0)
61+
vrt = os.path.join(d, "out.vrt")
62+
with pytest.raises(ValueError, match="pixel size"):
63+
write_vrt(vrt, [a, b])
64+
65+
66+
def test_mismatched_dtype_raises(tmp_path):
67+
d = _unique_dir(tmp_path, "dtype")
68+
a = os.path.join(d, "a.tif")
69+
b = os.path.join(d, "b.tif")
70+
_write_tif(a, h=4, w=4, dtype=np.float32)
71+
_write_tif(b, h=4, w=4, dtype=np.int16, origin_x=4.0)
72+
vrt = os.path.join(d, "out.vrt")
73+
with pytest.raises(ValueError, match="dtype|sample_format|bps"):
74+
write_vrt(vrt, [a, b])
75+
76+
77+
def test_mismatched_band_count_raises(tmp_path):
78+
d = _unique_dir(tmp_path, "bands")
79+
a = os.path.join(d, "a.tif")
80+
b = os.path.join(d, "b.tif")
81+
_write_tif(a, h=4, w=4, dtype=np.float32, bands=1)
82+
_write_tif(b, h=4, w=4, dtype=np.float32, bands=3, origin_x=4.0)
83+
vrt = os.path.join(d, "out.vrt")
84+
with pytest.raises(ValueError, match="band count"):
85+
write_vrt(vrt, [a, b])
86+
87+
88+
def test_compatible_sources_succeed(tmp_path):
89+
d = _unique_dir(tmp_path, "ok")
90+
a = os.path.join(d, "a.tif")
91+
b = os.path.join(d, "b.tif")
92+
_write_tif(a, h=4, w=4, dtype=np.float32)
93+
_write_tif(b, h=4, w=4, dtype=np.float32, origin_x=4.0)
94+
vrt = os.path.join(d, "out.vrt")
95+
write_vrt(vrt, [a, b])
96+
assert os.path.exists(vrt)
97+
98+
99+
def test_pixel_size_within_tolerance_accepted(tmp_path):
100+
d = _unique_dir(tmp_path, "tol")
101+
a = os.path.join(d, "a.tif")
102+
b = os.path.join(d, "b.tif")
103+
_write_tif(a, h=4, w=4, dtype=np.float32, px=1.0, py=-1.0)
104+
# Drift well below the 1e-6 relative tolerance.
105+
_write_tif(b, h=4, w=4, dtype=np.float32,
106+
px=1.0 + 1e-10, py=-1.0, origin_x=4.0)
107+
vrt = os.path.join(d, "out.vrt")
108+
write_vrt(vrt, [a, b])
109+
assert os.path.exists(vrt)
110+
111+
112+
def test_single_source_still_works(tmp_path):
113+
d = _unique_dir(tmp_path, "one")
114+
a = os.path.join(d, "a.tif")
115+
_write_tif(a, h=4, w=4, dtype=np.float32)
116+
vrt = os.path.join(d, "out.vrt")
117+
write_vrt(vrt, [a])
118+
assert os.path.exists(vrt)

0 commit comments

Comments
 (0)