Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-11,1580;1582,HIGH,1;3;4;5,"write_geotiff_gpu silently mis-ordered (band, y, x) DataArrays (#1580). to_geotiff ignored rioxarray nodatavals and CF _FillValue attrs (#1582). Both fixed in same branch."
geotiff,2026-05-11,1598,MEDIUM,4,"read_vrt(band=N) used vrt.bands[0].nodata for attrs and integer-promotion mask, mis-masking band N reads with per-band sentinels (#1598)."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
17 changes: 14 additions & 3 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2732,11 +2732,22 @@ def read_vrt(source: str, *, dtype=None, window=None,
attrs['crs_wkt'] = vrt.crs_wkt
if vrt.raster_type == 'point':
attrs['raster_type'] = 'point'
# When a specific band is selected, source its nodata from that
# band's <NoDataValue> instead of band 0's. Otherwise multi-band
# VRTs with per-band sentinels would mis-mask the read: attrs would
# advertise band 0's sentinel, the integer-promotion block below
# would mask against band 0's sentinel, and band N's actual nodata
# pixels would survive as literal integers. See issue #1598.
nodata = None
if vrt.bands:
nodata = vrt.bands[0].nodata
if nodata is not None:
attrs['nodata'] = nodata
band_idx_for_nodata = band if band is not None else 0
# ``_vrt.read_vrt`` already validates ``band`` is in range; the
# extra guard keeps a clearer message if a future refactor
# widens the public range without updating the internal reader.
if 0 <= band_idx_for_nodata < len(vrt.bands):
nodata = vrt.bands[band_idx_for_nodata].nodata
if nodata is not None:
attrs['nodata'] = nodata

# Mirror the integer-with-nodata promotion that open_geotiff /
# read_geotiff_dask / read_geotiff_gpu apply post-decode. The VRT
Expand Down
107 changes: 107 additions & 0 deletions xrspatial/geotiff/tests/test_vrt_band_nodata_1598.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""Regression tests for issue #1598.

``read_vrt(path, band=N)`` used to always source the nodata sentinel
from ``vrt.bands[0]`` rather than the requested band, so a multi-band
VRT with per-band ``<NoDataValue>`` would mis-mask reads of any band
other than band 0:

* ``attrs['nodata']`` advertised band 0's sentinel (wrong).
* The integer-to-float64 promotion ran against band 0's sentinel, so
band N's actual nodata pixels stayed as literal integers.
* The returned dtype was integer when it should have been float64.

The fix uses ``vrt.bands[band].nodata`` when a band is selected.
"""
from __future__ import annotations

import numpy as np

from xrspatial.geotiff import read_vrt
from xrspatial.geotiff._writer import write


def _write_two_band_per_band_nodata_vrt(tmp_path):
"""Two single-band uint16 sources, each with a distinct nodata
sentinel, exposed as bands 1 and 2 of a hand-rolled VRT.
"""
band0 = np.array([[1, 2], [3, 65535]], dtype=np.uint16)
band1 = np.array([[7, 8], [9, 65000]], dtype=np.uint16)
p0 = str(tmp_path / 'vrt_band0_1598.tif')
p1 = str(tmp_path / 'vrt_band1_1598.tif')
write(band0, p0, nodata=65535, compression='none', tiled=False)
write(band1, p1, nodata=65000, compression='none', tiled=False)

vrt_path = str(tmp_path / 'two_band_per_band_nodata_1598.vrt')
vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2">
<GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>
<VRTRasterBand dataType="UInt16" band="1">
<NoDataValue>65535</NoDataValue>
<SimpleSource>
<SourceFilename relativeToVRT="0">{p0}</SourceFilename>
<SourceBand>1</SourceBand>
<SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>
<DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>
</SimpleSource>
</VRTRasterBand>
<VRTRasterBand dataType="UInt16" band="2">
<NoDataValue>65000</NoDataValue>
<SimpleSource>
<SourceFilename relativeToVRT="0">{p1}</SourceFilename>
<SourceBand>1</SourceBand>
<SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>
<DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>
</SimpleSource>
</VRTRasterBand>
</VRTDataset>"""
with open(vrt_path, 'w') as f:
f.write(vrt_xml)
return vrt_path


def test_read_vrt_band0_uses_band0_nodata(tmp_path):
"""Sanity check the band-0 selection still works after the fix.

Confirms the refactor did not flip the index.
"""
vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
r = read_vrt(vrt_path, band=0)
assert r.dtype == np.float64
assert r.attrs.get('nodata') == 65535.0
assert np.isnan(r.values[1, 1])
assert r.values[0, 0] == 1


def test_read_vrt_band1_uses_band1_nodata(tmp_path):
"""The previously-broken case: band=1 must use band 1's sentinel.

Before the fix this returned dtype=uint16 with values=[[7,8],
[9,65000]] and attrs['nodata']=65535.
"""
vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
r = read_vrt(vrt_path, band=1)
assert r.dtype == np.float64, (
"band=1 read kept uint16 dtype; per-band nodata regression."
)
assert r.attrs.get('nodata') == 65000.0, (
f"attrs['nodata'] was {r.attrs.get('nodata')}, "
f"expected 65000 from band 1's <NoDataValue>."
)
assert np.isnan(r.values[1, 1]), (
"band 1's sentinel pixel was not NaN-masked; "
"promotion ran against the wrong sentinel."
)
assert r.values[0, 0] == 7
assert r.values[1, 0] == 9


def test_read_vrt_no_band_keeps_band0_nodata_attr(tmp_path):
"""Unselected reads still surface band 0's sentinel.

Multi-band VRTs with mixed sentinels return all bands stacked, and
the canonical attr cannot encode per-band values; advertising
band 0's sentinel matches the prior behavior and the documented
"first band wins" contract for multi-band reads.
"""
vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path)
r = read_vrt(vrt_path)
assert r.attrs.get('nodata') == 65535.0
Loading