Skip to content

Commit b90fa88

Browse files
committed
Fix read_vrt dropping SimpleSource <NODATA>0</NODATA> (#1655)
The per-source nodata fallback used `src.nodata or nodata`, and Python treats `0.0` as falsy. A SimpleSource that declared `<NODATA>0</NODATA>` silently picked up the band-level `<NoDataValue>` (or `None` when none was set), so pixels equal to 0.0 in the source file survived as valid data instead of being masked to NaN. The in-code comment described the behaviour as "backward compatibility" but the result is silent data corruption on VRTs that mosaic sources using 0 as a sentinel (a common remote-sensing convention). Switch to an explicit `is not None` check so a legitimate zero sentinel survives the fallback. Add `test_vrt_source_nodata_zero_1655.py` covering five cases: source NODATA=0 with no band fallback, integer XML literal, non-zero unchanged, band-level NoDataValue=0 still honoured when no per-source NODATA is set, and per-source precedence over a different band-level sentinel.
1 parent f0a09c0 commit b90fa88

3 files changed

Lines changed: 178 additions & 5 deletions

File tree

.claude/sweep-accuracy-state.csv

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

xrspatial/geotiff/_vrt.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,10 +364,12 @@ def read_vrt(vrt_path: str, *, window=None,
364364
# source with a fractional nodata (e.g. -9999.25) would
365365
# previously miss the mask because ``np.float32(-9999.25)``
366366
# rounds to the nearest float32 and then compares unequal
367-
# to the float64 pixel value. ``src.nodata or nodata`` is
368-
# kept for backward compatibility but intentionally treats
369-
# ``0.0`` as unset (a long-standing quirk of this reader).
370-
src_nodata = src.nodata or nodata
367+
# to the float64 pixel value. Use an explicit ``is not None``
368+
# check so a legitimate ``<NODATA>0</NODATA>`` survives the
369+
# fallback: the earlier ``src.nodata or nodata`` shortcut treated
370+
# ``0.0`` as falsy and silently replaced it with the band-level
371+
# sentinel (issue #1655).
372+
src_nodata = src.nodata if src.nodata is not None else nodata
371373
if src_nodata is not None and src_arr.dtype.kind == 'f':
372374
src_arr = src_arr.copy()
373375
sentinel = src_arr.dtype.type(src_nodata)
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""Regression tests for issue #1655.
2+
3+
``read_vrt`` used to evaluate the per-source NODATA fallback as
4+
``src.nodata or nodata``. Python treats ``0.0`` as falsy, so a
5+
SimpleSource that declared ``<NODATA>0</NODATA>`` was silently replaced
6+
with the band-level sentinel (or ``None`` when the band had none of its
7+
own). Pixels equal to 0.0 in the source file survived as valid data and
8+
biased every downstream NaN-aware aggregation.
9+
10+
The fix changes the fallback to an explicit ``is not None`` check so a
11+
legitimate zero sentinel survives.
12+
"""
13+
from __future__ import annotations
14+
15+
import numpy as np
16+
import pytest
17+
18+
from xrspatial.geotiff._geotags import GeoTransform
19+
from xrspatial.geotiff._vrt import read_vrt
20+
from xrspatial.geotiff._writer import write
21+
22+
23+
def _write_source(tmp_path, arr, name='src_1655.tif'):
24+
"""Write a small float32 GeoTIFF without a GDAL_NODATA tag."""
25+
p = str(tmp_path / name)
26+
write(
27+
arr, p,
28+
geo_transform=GeoTransform(
29+
origin_x=0.0, origin_y=0.0,
30+
pixel_width=1.0, pixel_height=-1.0,
31+
),
32+
crs_epsg=4326,
33+
compression='none',
34+
tiled=False,
35+
)
36+
return p
37+
38+
39+
def _vrt_with_source_nodata(tmp_path, src_path, nodata_xml,
40+
include_band_nodata=False,
41+
width=4, height=3,
42+
band_nodata='0.0'):
43+
"""Write a single-band Float32 VRT with the supplied ``<NODATA>``
44+
on its SimpleSource. ``include_band_nodata`` controls whether a
45+
``<NoDataValue>`` is emitted on the band as well.
46+
"""
47+
band_nd_elem = (
48+
f'<NoDataValue>{band_nodata}</NoDataValue>'
49+
if include_band_nodata else '')
50+
vrt_xml = (
51+
f'<VRTDataset rasterXSize="{width}" rasterYSize="{height}">\n'
52+
f' <SRS>EPSG:4326</SRS>\n'
53+
f' <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>\n'
54+
f' <VRTRasterBand dataType="Float32" band="1">\n'
55+
f' {band_nd_elem}\n'
56+
f' <SimpleSource>\n'
57+
f' <SourceFilename relativeToVRT="0">{src_path}</SourceFilename>\n'
58+
f' <SourceBand>1</SourceBand>\n'
59+
f' <SrcRect xOff="0" yOff="0" '
60+
f'xSize="{width}" ySize="{height}"/>\n'
61+
f' <DstRect xOff="0" yOff="0" '
62+
f'xSize="{width}" ySize="{height}"/>\n'
63+
f' <NODATA>{nodata_xml}</NODATA>\n'
64+
f' </SimpleSource>\n'
65+
f' </VRTRasterBand>\n'
66+
f'</VRTDataset>\n'
67+
)
68+
vrt_path = str(tmp_path / 'src_zero_1655.vrt')
69+
with open(vrt_path, 'w') as f:
70+
f.write(vrt_xml)
71+
return vrt_path
72+
73+
74+
class TestVRTSourceNodataZero:
75+
"""SimpleSource ``<NODATA>0</NODATA>`` must mask zeros to NaN."""
76+
77+
def test_source_nodata_zero_no_band_nodata(self, tmp_path):
78+
"""SimpleSource NODATA=0 with no band-level fallback masks zeros."""
79+
arr = np.array(
80+
[[1.0, 0.0, 3.0, 0.0],
81+
[4.0, 0.0, 6.0, 7.0],
82+
[0.0, 8.0, 9.0, 10.0]],
83+
dtype=np.float32,
84+
)
85+
src = _write_source(tmp_path, arr)
86+
vrt = _vrt_with_source_nodata(tmp_path, src, '0.0')
87+
88+
result, _ = read_vrt(vrt)
89+
assert int(np.isnan(result).sum()) == 4
90+
91+
def test_source_nodata_zero_integer_xml(self, tmp_path):
92+
"""``<NODATA>0</NODATA>`` (integer literal) also masks zeros."""
93+
arr = np.array(
94+
[[1.0, 0.0, 3.0]],
95+
dtype=np.float32,
96+
)
97+
src = _write_source(tmp_path, arr, name='int_xml.tif')
98+
vrt = _vrt_with_source_nodata(
99+
tmp_path, src, '0', width=3, height=1)
100+
101+
result, _ = read_vrt(vrt)
102+
assert int(np.isnan(result).sum()) == 1
103+
assert np.isnan(result[0, 1])
104+
105+
def test_source_nodata_nonzero_unchanged(self, tmp_path):
106+
"""SimpleSource NODATA != 0 keeps masking behaviour."""
107+
arr = np.array(
108+
[[1.0, 0.0, 3.0, 0.0]],
109+
dtype=np.float32,
110+
)
111+
src = _write_source(tmp_path, arr, name='nonzero.tif')
112+
vrt = _vrt_with_source_nodata(
113+
tmp_path, src, '1.0', width=4, height=1)
114+
115+
result, _ = read_vrt(vrt)
116+
# Only the literal 1.0 at [0, 0] should be masked.
117+
assert int(np.isnan(result).sum()) == 1
118+
assert np.isnan(result[0, 0])
119+
120+
def test_band_nodata_zero_still_honoured(self, tmp_path):
121+
"""Band-level ``<NoDataValue>0</NoDataValue>`` keeps working."""
122+
arr = np.array(
123+
[[1.0, 0.0, 3.0]],
124+
dtype=np.float32,
125+
)
126+
src = _write_source(tmp_path, arr, name='band_zero.tif')
127+
# Build a VRT where only the band carries nodata=0 (no NODATA
128+
# on the SimpleSource).
129+
vrt_xml = (
130+
f'<VRTDataset rasterXSize="3" rasterYSize="1">\n'
131+
f' <SRS>EPSG:4326</SRS>\n'
132+
f' <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>\n'
133+
f' <VRTRasterBand dataType="Float32" band="1">\n'
134+
f' <NoDataValue>0.0</NoDataValue>\n'
135+
f' <SimpleSource>\n'
136+
f' <SourceFilename relativeToVRT="0">{src}</SourceFilename>\n'
137+
f' <SourceBand>1</SourceBand>\n'
138+
f' <SrcRect xOff="0" yOff="0" xSize="3" ySize="1"/>\n'
139+
f' <DstRect xOff="0" yOff="0" xSize="3" ySize="1"/>\n'
140+
f' </SimpleSource>\n'
141+
f' </VRTRasterBand>\n'
142+
f'</VRTDataset>\n'
143+
)
144+
vrt = str(tmp_path / 'band_zero_1655.vrt')
145+
with open(vrt, 'w') as f:
146+
f.write(vrt_xml)
147+
148+
result, _ = read_vrt(vrt)
149+
assert int(np.isnan(result).sum()) == 1
150+
assert np.isnan(result[0, 1])
151+
152+
def test_source_nodata_zero_overrides_band(self, tmp_path):
153+
"""SimpleSource NODATA=0 takes precedence over band NoDataValue=99."""
154+
arr = np.array(
155+
[[1.0, 0.0, 99.0]],
156+
dtype=np.float32,
157+
)
158+
src = _write_source(tmp_path, arr, name='override.tif')
159+
vrt = _vrt_with_source_nodata(
160+
tmp_path, src, '0.0',
161+
include_band_nodata=True, band_nodata='99.0',
162+
width=3, height=1)
163+
164+
result, _ = read_vrt(vrt)
165+
# The SimpleSource sentinel (0.0) wins over the band sentinel
166+
# (99.0), so only the 0.0 cell becomes NaN. The 99.0 cell stays
167+
# because the masking is per-source, applied at read time, and
168+
# the band-level fallback never fires when src.nodata is set.
169+
assert int(np.isnan(result).sum()) == 1
170+
assert np.isnan(result[0, 1])
171+
assert result[0, 2] == pytest.approx(99.0)

0 commit comments

Comments
 (0)