Skip to content

Commit 008fcb1

Browse files
committed
Fix read_vrt(band=N) using band 0's nodata sentinel (#1598)
read_vrt sourced attrs['nodata'] and the integer-with-nodata float64 promotion mask from vrt.bands[0].nodata regardless of which band was selected. A multi-band VRT with per-band <NoDataValue> tags would silently return band N's pixels with band 0's sentinel reported in attrs and band N's actual sentinel left as literal integers in the array (no NaN, no float64 promotion). Use vrt.bands[band].nodata when a band is selected, fall back to band 0 when no band kwarg was given (preserves the prior multi-band "first band wins" contract). Tests cover band=0 sanity, the broken band=1 case, and the multi-band fallback.
1 parent c5e873d commit 008fcb1

3 files changed

Lines changed: 122 additions & 4 deletions

File tree

.claude/sweep-metadata-state.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module,last_inspected,issue,severity_max,categories_found,notes
2-
geotiff,2026-05-11,1580;1582,HIGH,1;3;4;5,"write_geotiff_gpu silently mis-ordered (band, y, x) DataArrays (#1580). to_geotiff ignored rioxarray nodatavals and CF _FillValue attrs (#1582). Both fixed in same branch."
2+
geotiff,2026-05-11,1598,MEDIUM,4,"read_vrt(band=N) used vrt.bands[0].nodata for attrs and integer-promotion mask, mis-masking band N reads with per-band sentinels (#1598)."
33
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.

xrspatial/geotiff/__init__.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2732,11 +2732,22 @@ def read_vrt(source: str, *, dtype=None, window=None,
27322732
attrs['crs_wkt'] = vrt.crs_wkt
27332733
if vrt.raster_type == 'point':
27342734
attrs['raster_type'] = 'point'
2735+
# When a specific band is selected, source its nodata from that
2736+
# band's <NoDataValue> instead of band 0's. Otherwise multi-band
2737+
# VRTs with per-band sentinels would mis-mask the read: attrs would
2738+
# advertise band 0's sentinel, the integer-promotion block below
2739+
# would mask against band 0's sentinel, and band N's actual nodata
2740+
# pixels would survive as literal integers. See issue #1598.
27352741
nodata = None
27362742
if vrt.bands:
2737-
nodata = vrt.bands[0].nodata
2738-
if nodata is not None:
2739-
attrs['nodata'] = nodata
2743+
band_idx_for_nodata = band if band is not None else 0
2744+
# ``_vrt.read_vrt`` already validates ``band`` is in range; the
2745+
# extra guard keeps a clearer message if a future refactor
2746+
# widens the public range without updating the internal reader.
2747+
if 0 <= band_idx_for_nodata < len(vrt.bands):
2748+
nodata = vrt.bands[band_idx_for_nodata].nodata
2749+
if nodata is not None:
2750+
attrs['nodata'] = nodata
27402751

27412752
# Mirror the integer-with-nodata promotion that open_geotiff /
27422753
# read_geotiff_dask / read_geotiff_gpu apply post-decode. The VRT
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""Regression tests for issue #1598.
2+
3+
``read_vrt(path, band=N)`` used to always source the nodata sentinel
4+
from ``vrt.bands[0]`` rather than the requested band, so a multi-band
5+
VRT with per-band ``<NoDataValue>`` would mis-mask reads of any band
6+
other than band 0:
7+
8+
* ``attrs['nodata']`` advertised band 0's sentinel (wrong).
9+
* The integer-to-float64 promotion ran against band 0's sentinel, so
10+
band N's actual nodata pixels stayed as literal integers.
11+
* The returned dtype was integer when it should have been float64.
12+
13+
The fix uses ``vrt.bands[band].nodata`` when a band is selected.
14+
"""
15+
from __future__ import annotations
16+
17+
import numpy as np
18+
19+
from xrspatial.geotiff import read_vrt
20+
from xrspatial.geotiff._writer import write
21+
22+
23+
def _write_two_band_per_band_nodata_vrt(tmp_path):
24+
"""Two single-band uint16 sources, each with a distinct nodata
25+
sentinel, exposed as bands 1 and 2 of a hand-rolled VRT.
26+
"""
27+
band0 = np.array([[1, 2], [3, 65535]], dtype=np.uint16)
28+
band1 = np.array([[7, 8], [9, 65000]], dtype=np.uint16)
29+
p0 = str(tmp_path / 'vrt_band0_1598.tif')
30+
p1 = str(tmp_path / 'vrt_band1_1598.tif')
31+
write(band0, p0, nodata=65535, compression='none', tiled=False)
32+
write(band1, p1, nodata=65000, compression='none', tiled=False)
33+
34+
vrt_path = str(tmp_path / 'two_band_per_band_nodata_1598.vrt')
35+
vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2">
36+
<GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>
37+
<VRTRasterBand dataType="UInt16" band="1">
38+
<NoDataValue>65535</NoDataValue>
39+
<SimpleSource>
40+
<SourceFilename relativeToVRT="0">{p0}</SourceFilename>
41+
<SourceBand>1</SourceBand>
42+
<SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>
43+
<DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>
44+
</SimpleSource>
45+
</VRTRasterBand>
46+
<VRTRasterBand dataType="UInt16" band="2">
47+
<NoDataValue>65000</NoDataValue>
48+
<SimpleSource>
49+
<SourceFilename relativeToVRT="0">{p1}</SourceFilename>
50+
<SourceBand>1</SourceBand>
51+
<SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>
52+
<DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>
53+
</SimpleSource>
54+
</VRTRasterBand>
55+
</VRTDataset>"""
56+
with open(vrt_path, 'w') as f:
57+
f.write(vrt_xml)
58+
return vrt_path
59+
60+
61+
def test_read_vrt_band0_uses_band0_nodata(tmp_path):
62+
"""Sanity check the band-0 selection still works after the fix.
63+
64+
Confirms the refactor did not flip the index.
65+
"""
66+
vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
67+
r = read_vrt(vrt_path, band=0)
68+
assert r.dtype == np.float64
69+
assert r.attrs.get('nodata') == 65535.0
70+
assert np.isnan(r.values[1, 1])
71+
assert r.values[0, 0] == 1
72+
73+
74+
def test_read_vrt_band1_uses_band1_nodata(tmp_path):
75+
"""The previously-broken case: band=1 must use band 1's sentinel.
76+
77+
Before the fix this returned dtype=uint16 with values=[[7,8],
78+
[9,65000]] and attrs['nodata']=65535.
79+
"""
80+
vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
81+
r = read_vrt(vrt_path, band=1)
82+
assert r.dtype == np.float64, (
83+
"band=1 read kept uint16 dtype; per-band nodata regression."
84+
)
85+
assert r.attrs.get('nodata') == 65000.0, (
86+
f"attrs['nodata'] was {r.attrs.get('nodata')}, "
87+
f"expected 65000 from band 1's <NoDataValue>."
88+
)
89+
assert np.isnan(r.values[1, 1]), (
90+
"band 1's sentinel pixel was not NaN-masked; "
91+
"promotion ran against the wrong sentinel."
92+
)
93+
assert r.values[0, 0] == 7
94+
assert r.values[1, 0] == 9
95+
96+
97+
def test_read_vrt_no_band_keeps_band0_nodata_attr(tmp_path):
98+
"""Unselected reads still surface band 0's sentinel.
99+
100+
Multi-band VRTs with mixed sentinels return all bands stacked, and
101+
the canonical attr cannot encode per-band values; advertising
102+
band 0's sentinel matches the prior behavior and the documented
103+
"first band wins" contract for multi-band reads.
104+
"""
105+
vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
106+
r = read_vrt(vrt_path)
107+
assert r.attrs.get('nodata') == 65535.0

0 commit comments

Comments
 (0)