|
| 1 | +"""Fail-closed coverage for ``MixedBandMetadataError`` (issue #1987 PR 5). |
| 2 | +
|
| 3 | +A VRT can mosaic source bands that declare disagreeing per-band |
| 4 | +``<NoDataValue>`` sentinels. The legacy reader picked band 0's sentinel |
| 5 | +for the whole mosaic, which let band N's valid pixels collide with |
| 6 | +band 0's sentinel after the flatten-to-scalar step. The fail-closed |
| 7 | +default refuses the ambiguity; ``band_nodata='first'`` keeps the legacy |
| 8 | +behaviour explicitly. |
| 9 | +
|
| 10 | +The validator check itself, the ``band_nodata=`` kwarg on |
| 11 | +``read_vrt``, and the per-VRT-read context plumbing landed in |
| 12 | +the #2031 bundle. This PR registers the check and forwards |
| 13 | +``band_nodata`` through ``open_geotiff`` and ``read_geotiff_dask``. |
| 14 | +The tests below pin the four entry-point contracts: |
| 15 | +
|
| 16 | +* default ``read_vrt`` raises on disagreeing per-band sentinels |
| 17 | +* default ``read_vrt(chunks=...)`` raises before any task fires |
| 18 | +* ``band_nodata='first'`` opts back into the legacy reader |
| 19 | +* ``band_nodata`` raises on non-VRT sources for ``open_geotiff`` and |
| 20 | + ``read_geotiff_dask`` |
| 21 | +""" |
| 22 | +from __future__ import annotations |
| 23 | + |
| 24 | +import os |
| 25 | + |
| 26 | +import numpy as np |
| 27 | +import pytest |
| 28 | + |
| 29 | +from xrspatial.geotiff import ( |
| 30 | + GeoTIFFAmbiguousMetadataError, |
| 31 | + MixedBandMetadataError, |
| 32 | + open_geotiff, |
| 33 | + read_geotiff_dask, |
| 34 | + read_vrt, |
| 35 | + to_geotiff, |
| 36 | +) |
| 37 | +from xrspatial.geotiff._writer import write |
| 38 | + |
| 39 | + |
| 40 | +def _write_mixed_band_vrt(tmp_path, *, sentinel_a=65535, sentinel_b=65000): |
| 41 | + """Two uint16 sources with distinct sentinels, mosaiced into one VRT. |
| 42 | +
|
| 43 | + ``[1, 1]`` is the sentinel pixel in each source so the legacy |
| 44 | + reader's flatten-to-first-band step would mis-mask band 1. |
| 45 | + """ |
| 46 | + band0 = np.array([[1, 2], [3, sentinel_a]], dtype=np.uint16) |
| 47 | + band1 = np.array([[7, 8], [9, sentinel_b]], dtype=np.uint16) |
| 48 | + p0 = str(tmp_path / 'mixed_band_1987_a.tif') |
| 49 | + p1 = str(tmp_path / 'mixed_band_1987_b.tif') |
| 50 | + write(band0, p0, nodata=sentinel_a, compression='none', tiled=False) |
| 51 | + write(band1, p1, nodata=sentinel_b, compression='none', tiled=False) |
| 52 | + vrt_path = str(tmp_path / 'mixed_band_1987.vrt') |
| 53 | + vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2"> |
| 54 | + <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform> |
| 55 | + <VRTRasterBand dataType="UInt16" band="1"> |
| 56 | + <NoDataValue>{sentinel_a}</NoDataValue> |
| 57 | + <SimpleSource> |
| 58 | + <SourceFilename relativeToVRT="0">{p0}</SourceFilename> |
| 59 | + <SourceBand>1</SourceBand> |
| 60 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 61 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 62 | + </SimpleSource> |
| 63 | + </VRTRasterBand> |
| 64 | + <VRTRasterBand dataType="UInt16" band="2"> |
| 65 | + <NoDataValue>{sentinel_b}</NoDataValue> |
| 66 | + <SimpleSource> |
| 67 | + <SourceFilename relativeToVRT="0">{p1}</SourceFilename> |
| 68 | + <SourceBand>1</SourceBand> |
| 69 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 70 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 71 | + </SimpleSource> |
| 72 | + </VRTRasterBand> |
| 73 | +</VRTDataset>""" |
| 74 | + with open(vrt_path, 'w') as f: |
| 75 | + f.write(vrt_xml) |
| 76 | + return vrt_path |
| 77 | + |
| 78 | + |
| 79 | +def _write_shared_sentinel_vrt(tmp_path, *, sentinel=65535): |
| 80 | + """Two uint16 sources mosaiced with the *same* per-band sentinel. |
| 81 | +
|
| 82 | + The fail-closed check must not trigger on a VRT whose per-band |
| 83 | + nodata agrees; the check is about ambiguity, not about per-band |
| 84 | + declarations. |
| 85 | + """ |
| 86 | + band0 = np.array([[1, 2], [3, sentinel]], dtype=np.uint16) |
| 87 | + band1 = np.array([[7, 8], [9, sentinel]], dtype=np.uint16) |
| 88 | + p0 = str(tmp_path / 'shared_band_1987_a.tif') |
| 89 | + p1 = str(tmp_path / 'shared_band_1987_b.tif') |
| 90 | + write(band0, p0, nodata=sentinel, compression='none', tiled=False) |
| 91 | + write(band1, p1, nodata=sentinel, compression='none', tiled=False) |
| 92 | + vrt_path = str(tmp_path / 'shared_band_1987.vrt') |
| 93 | + vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2"> |
| 94 | + <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform> |
| 95 | + <VRTRasterBand dataType="UInt16" band="1"> |
| 96 | + <NoDataValue>{sentinel}</NoDataValue> |
| 97 | + <SimpleSource> |
| 98 | + <SourceFilename relativeToVRT="0">{p0}</SourceFilename> |
| 99 | + <SourceBand>1</SourceBand> |
| 100 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 101 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 102 | + </SimpleSource> |
| 103 | + </VRTRasterBand> |
| 104 | + <VRTRasterBand dataType="UInt16" band="2"> |
| 105 | + <NoDataValue>{sentinel}</NoDataValue> |
| 106 | + <SimpleSource> |
| 107 | + <SourceFilename relativeToVRT="0">{p1}</SourceFilename> |
| 108 | + <SourceBand>1</SourceBand> |
| 109 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 110 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 111 | + </SimpleSource> |
| 112 | + </VRTRasterBand> |
| 113 | +</VRTDataset>""" |
| 114 | + with open(vrt_path, 'w') as f: |
| 115 | + f.write(vrt_xml) |
| 116 | + return vrt_path |
| 117 | + |
| 118 | + |
| 119 | +def _write_one_band_no_sentinel_vrt(tmp_path, *, sentinel=65535): |
| 120 | + """One band declares a sentinel, the other does not. |
| 121 | +
|
| 122 | + ``[b.nodata for b in vrt.bands]`` is ``[sentinel, None]``. The |
| 123 | + check considers ``None`` entries "no declaration" rather than a |
| 124 | + distinct sentinel, so only one concrete value is present and the |
| 125 | + check must accept. |
| 126 | + """ |
| 127 | + band0 = np.array([[1, 2], [3, sentinel]], dtype=np.uint16) |
| 128 | + band1 = np.array([[7, 8], [9, 99]], dtype=np.uint16) |
| 129 | + p0 = str(tmp_path / 'one_band_sentinel_a.tif') |
| 130 | + p1 = str(tmp_path / 'one_band_sentinel_b.tif') |
| 131 | + write(band0, p0, nodata=sentinel, compression='none', tiled=False) |
| 132 | + write(band1, p1, nodata=None, compression='none', tiled=False) |
| 133 | + vrt_path = str(tmp_path / 'one_band_sentinel.vrt') |
| 134 | + vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2"> |
| 135 | + <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform> |
| 136 | + <VRTRasterBand dataType="UInt16" band="1"> |
| 137 | + <NoDataValue>{sentinel}</NoDataValue> |
| 138 | + <SimpleSource> |
| 139 | + <SourceFilename relativeToVRT="0">{p0}</SourceFilename> |
| 140 | + <SourceBand>1</SourceBand> |
| 141 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 142 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 143 | + </SimpleSource> |
| 144 | + </VRTRasterBand> |
| 145 | + <VRTRasterBand dataType="UInt16" band="2"> |
| 146 | + <SimpleSource> |
| 147 | + <SourceFilename relativeToVRT="0">{p1}</SourceFilename> |
| 148 | + <SourceBand>1</SourceBand> |
| 149 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 150 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 151 | + </SimpleSource> |
| 152 | + </VRTRasterBand> |
| 153 | +</VRTDataset>""" |
| 154 | + with open(vrt_path, 'w') as f: |
| 155 | + f.write(vrt_xml) |
| 156 | + return vrt_path |
| 157 | + |
| 158 | + |
| 159 | +def test_read_vrt_rejects_mixed_per_band_nodata(tmp_path): |
| 160 | + """Default ``read_vrt`` raises on disagreeing per-band sentinels.""" |
| 161 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 162 | + with pytest.raises(MixedBandMetadataError) as exc_info: |
| 163 | + read_vrt(vrt_path) |
| 164 | + msg = str(exc_info.value) |
| 165 | + assert "65535" in msg and "65000" in msg |
| 166 | + assert "band_nodata='first'" in msg |
| 167 | + assert "#1987" in msg |
| 168 | + |
| 169 | + |
| 170 | +def test_read_vrt_chunked_rejects_mixed_per_band_nodata(tmp_path): |
| 171 | + """The chunked dispatch validates before scheduling any task.""" |
| 172 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 173 | + with pytest.raises(MixedBandMetadataError): |
| 174 | + read_vrt(vrt_path, chunks=1) |
| 175 | + |
| 176 | + |
| 177 | +def test_read_vrt_band_nodata_first_opts_back_in(tmp_path): |
| 178 | + """``band_nodata='first'`` keeps the legacy flatten-to-first path.""" |
| 179 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 180 | + r = read_vrt(vrt_path, band_nodata='first') |
| 181 | + # Legacy semantics: band 0's sentinel becomes ``attrs['nodata']``. |
| 182 | + assert r.attrs.get('nodata') == 65535.0 |
| 183 | + |
| 184 | + |
| 185 | +def test_read_vrt_shared_sentinel_accepts(tmp_path): |
| 186 | + """A VRT whose bands agree on a single sentinel is not ambiguous.""" |
| 187 | + vrt_path = _write_shared_sentinel_vrt(tmp_path) |
| 188 | + r = read_vrt(vrt_path) |
| 189 | + assert r.attrs.get('nodata') == 65535.0 |
| 190 | + |
| 191 | + |
| 192 | +def test_read_vrt_only_one_band_declares_sentinel_accepts(tmp_path): |
| 193 | + """``None`` per-band entries count as "no declaration", not a value. |
| 194 | +
|
| 195 | + A VRT with one declared sentinel and one undeclared band has only |
| 196 | + one concrete sentinel so the fail-closed check must accept. |
| 197 | + """ |
| 198 | + vrt_path = _write_one_band_no_sentinel_vrt(tmp_path) |
| 199 | + # Should not raise. |
| 200 | + read_vrt(vrt_path) |
| 201 | + |
| 202 | + |
| 203 | +def test_open_geotiff_propagates_mixed_band_rejection(tmp_path): |
| 204 | + """``open_geotiff`` on a ``.vrt`` source routes through ``read_vrt``.""" |
| 205 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 206 | + with pytest.raises(MixedBandMetadataError): |
| 207 | + open_geotiff(vrt_path) |
| 208 | + |
| 209 | + |
| 210 | +def test_open_geotiff_band_nodata_first_passes_through(tmp_path): |
| 211 | + """``open_geotiff(band_nodata='first')`` forwards to ``read_vrt``.""" |
| 212 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 213 | + r = open_geotiff(vrt_path, band_nodata='first') |
| 214 | + assert r.attrs.get('nodata') == 65535.0 |
| 215 | + |
| 216 | + |
| 217 | +def test_read_geotiff_dask_band_nodata_first_passes_through(tmp_path): |
| 218 | + """``read_geotiff_dask`` on a ``.vrt`` source forwards the kwarg.""" |
| 219 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 220 | + r = read_geotiff_dask(vrt_path, chunks=1, band_nodata='first') |
| 221 | + assert r.attrs.get('nodata') == 65535.0 |
| 222 | + |
| 223 | + |
| 224 | +def test_open_geotiff_band_nodata_rejected_on_non_vrt_source(tmp_path): |
| 225 | + """``band_nodata`` is VRT-only; reject up front on plain GeoTIFFs. |
| 226 | +
|
| 227 | + Same dispatch-kwarg pattern as ``missing_sources``, ``on_gpu_failure``, |
| 228 | + and ``max_cloud_bytes``: silently dropping a backend-specific kwarg |
| 229 | + is the bug class #1561 / #1605 / #1685 / #1795 / #1810 / #1974 closed |
| 230 | + on the rest of the surface. |
| 231 | + """ |
| 232 | + arr = np.zeros((2, 2), dtype=np.uint16) |
| 233 | + arr_da = _wrap_2d(arr) |
| 234 | + p = tmp_path / 'plain.tif' |
| 235 | + to_geotiff(arr_da, str(p), compression='none', tiled=False) |
| 236 | + with pytest.raises(ValueError, match="band_nodata only applies to VRT"): |
| 237 | + open_geotiff(str(p), band_nodata='first') |
| 238 | + |
| 239 | + |
| 240 | +def test_read_geotiff_dask_band_nodata_rejected_on_non_vrt_source(tmp_path): |
| 241 | + """``read_geotiff_dask`` matches ``open_geotiff``'s VRT-only guard.""" |
| 242 | + arr = np.zeros((2, 2), dtype=np.uint16) |
| 243 | + arr_da = _wrap_2d(arr) |
| 244 | + p = tmp_path / 'plain.tif' |
| 245 | + to_geotiff(arr_da, str(p), compression='none', tiled=False) |
| 246 | + with pytest.raises(ValueError, match="band_nodata only applies to VRT"): |
| 247 | + read_geotiff_dask(str(p), chunks=1, band_nodata='first') |
| 248 | + |
| 249 | + |
| 250 | +def test_mixed_band_metadata_error_subclasses_base(tmp_path): |
| 251 | + """``MixedBandMetadataError`` is catchable via the base class.""" |
| 252 | + vrt_path = _write_mixed_band_vrt(tmp_path) |
| 253 | + with pytest.raises(GeoTIFFAmbiguousMetadataError): |
| 254 | + read_vrt(vrt_path) |
| 255 | + |
| 256 | + |
| 257 | +def _wrap_2d(arr): |
| 258 | + """Wrap a 2D numpy array as a minimal DataArray for ``to_geotiff``.""" |
| 259 | + import xarray as xr |
| 260 | + h, w = arr.shape |
| 261 | + return xr.DataArray( |
| 262 | + arr, |
| 263 | + dims=('y', 'x'), |
| 264 | + coords={ |
| 265 | + 'y': np.arange(h, dtype=np.float64), |
| 266 | + 'x': np.arange(w, dtype=np.float64), |
| 267 | + }, |
| 268 | + attrs={'crs': 4326, |
| 269 | + 'transform': (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)}, |
| 270 | + ) |
0 commit comments