|
| 1 | +"""Regression tests for issue #1611. |
| 2 | +
|
| 3 | +``read_vrt(path)`` on a multi-band integer VRT with per-band |
| 4 | +``<NoDataValue>`` tags used to only mask band 0's sentinel. Bands 1 |
| 5 | +and up retained their integer sentinels as literal finite values in |
| 6 | +the returned float64 array, breaking the convention that |
| 7 | +``attrs['nodata']`` is present iff sentinel pixels are already NaN. |
| 8 | +
|
| 9 | +The float-VRT path masks per-band correctly in ``_vrt._read_data`` |
| 10 | +(lines 296-297, 347-351). For integer rasters the masking moved to |
| 11 | +``__init__.py:read_vrt`` and used ``vrt.bands[0].nodata`` for every |
| 12 | +band. The fix walks ``vrt.bands`` when ``band is None`` and masks |
| 13 | +each ``arr[..., i]`` slice against its own ``<NoDataValue>``. |
| 14 | +
|
| 15 | +This file mirrors test_vrt_band_nodata_1598.py for the multi-band |
| 16 | +``band=None`` path. PR #1602 fixed ``band=N`` single-band selection. |
| 17 | +""" |
| 18 | +from __future__ import annotations |
| 19 | + |
| 20 | +import numpy as np |
| 21 | +import pytest |
| 22 | + |
| 23 | +from xrspatial.geotiff import read_vrt |
| 24 | +from xrspatial.geotiff._writer import write |
| 25 | + |
| 26 | + |
| 27 | +def _write_two_band_per_band_nodata_vrt(tmp_path, *, dtype_str="UInt16", |
| 28 | + np_dtype=np.uint16, |
| 29 | + band0_sentinel=65535, |
| 30 | + band1_sentinel=65000, |
| 31 | + band0_other=(1, 2, 3), |
| 32 | + band1_other=(7, 8, 9)): |
| 33 | + """Two single-band integer sources, each with a distinct nodata |
| 34 | + sentinel, exposed as bands 1 and 2 of a hand-rolled VRT. |
| 35 | +
|
| 36 | + Used to be band 0's sentinel was the only one masked. Now every |
| 37 | + band gets its own sentinel. |
| 38 | + """ |
| 39 | + b0_arr = np.array([[band0_other[0], band0_other[1]], |
| 40 | + [band0_other[2], band0_sentinel]], dtype=np_dtype) |
| 41 | + b1_arr = np.array([[band1_other[0], band1_other[1]], |
| 42 | + [band1_other[2], band1_sentinel]], dtype=np_dtype) |
| 43 | + p0 = str(tmp_path / 'vrt_b0_1611.tif') |
| 44 | + p1 = str(tmp_path / 'vrt_b1_1611.tif') |
| 45 | + write(b0_arr, p0, nodata=band0_sentinel, compression='none', |
| 46 | + tiled=False) |
| 47 | + write(b1_arr, p1, nodata=band1_sentinel, compression='none', |
| 48 | + tiled=False) |
| 49 | + |
| 50 | + vrt_path = str(tmp_path / 'two_band_per_band_nodata_1611.vrt') |
| 51 | + vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2"> |
| 52 | + <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform> |
| 53 | + <VRTRasterBand dataType="{dtype_str}" band="1"> |
| 54 | + <NoDataValue>{band0_sentinel}</NoDataValue> |
| 55 | + <SimpleSource> |
| 56 | + <SourceFilename relativeToVRT="0">{p0}</SourceFilename> |
| 57 | + <SourceBand>1</SourceBand> |
| 58 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 59 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 60 | + </SimpleSource> |
| 61 | + </VRTRasterBand> |
| 62 | + <VRTRasterBand dataType="{dtype_str}" band="2"> |
| 63 | + <NoDataValue>{band1_sentinel}</NoDataValue> |
| 64 | + <SimpleSource> |
| 65 | + <SourceFilename relativeToVRT="0">{p1}</SourceFilename> |
| 66 | + <SourceBand>1</SourceBand> |
| 67 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 68 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 69 | + </SimpleSource> |
| 70 | + </VRTRasterBand> |
| 71 | +</VRTDataset>""" |
| 72 | + with open(vrt_path, 'w') as f: |
| 73 | + f.write(vrt_xml) |
| 74 | + return vrt_path |
| 75 | + |
| 76 | + |
| 77 | +def test_multiband_uint16_per_band_sentinel_each_masked(tmp_path): |
| 78 | + """The previously-broken case: every band's sentinel must be NaN. |
| 79 | +
|
| 80 | + Before the fix this returned dtype=float64 with band 0's (1,1) cell |
| 81 | + as NaN but band 1's (1,1) cell as the literal 65000.0. |
| 82 | + """ |
| 83 | + vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) |
| 84 | + r = read_vrt(vrt_path) |
| 85 | + assert r.shape == (2, 2, 2) |
| 86 | + assert r.dtype == np.float64, ( |
| 87 | + f"expected float64 promotion, got {r.dtype}" |
| 88 | + ) |
| 89 | + assert np.isnan(r.values[1, 1, 0]), ( |
| 90 | + "band 0's sentinel pixel was not NaN-masked." |
| 91 | + ) |
| 92 | + assert np.isnan(r.values[1, 1, 1]), ( |
| 93 | + "band 1's sentinel pixel was not NaN-masked; " |
| 94 | + "the regression from issue #1611 has returned." |
| 95 | + ) |
| 96 | + # Non-sentinel pixels survive unchanged |
| 97 | + assert r.values[0, 0, 0] == 1 |
| 98 | + assert r.values[0, 0, 1] == 7 |
| 99 | + assert r.values[1, 0, 0] == 3 |
| 100 | + assert r.values[1, 0, 1] == 9 |
| 101 | + |
| 102 | + |
| 103 | +def test_multiband_int32_negative_per_band_sentinel(tmp_path): |
| 104 | + """Negative sentinels in a signed integer VRT also mask per-band. |
| 105 | +
|
| 106 | + The original bug was dtype-independent: any integer dtype with |
| 107 | + per-band <NoDataValue> would have hit it. Cover int32 + negative |
| 108 | + sentinels to make sure the helper handles signed types and the |
| 109 | + range guard accepts negatives. |
| 110 | + """ |
| 111 | + vrt_path = _write_two_band_per_band_nodata_vrt( |
| 112 | + tmp_path, dtype_str="Int32", np_dtype=np.int32, |
| 113 | + band0_sentinel=-9999, band1_sentinel=-7777, |
| 114 | + band0_other=(10, 20, 30), band1_other=(40, 50, 60)) |
| 115 | + r = read_vrt(vrt_path) |
| 116 | + assert r.dtype == np.float64 |
| 117 | + assert np.isnan(r.values[1, 1, 0]) |
| 118 | + assert np.isnan(r.values[1, 1, 1]) |
| 119 | + assert r.values[0, 0, 0] == 10 |
| 120 | + assert r.values[0, 0, 1] == 40 |
| 121 | + |
| 122 | + |
| 123 | +def test_multiband_only_one_band_has_sentinel_present(tmp_path): |
| 124 | + """If only one band's sentinel actually appears in the data, only |
| 125 | + that band should change. The non-hitting band stays the same float64 |
| 126 | + value (no spurious NaN introduced). |
| 127 | +
|
| 128 | + Force band 1's sentinel never to appear by writing 99 instead. |
| 129 | + """ |
| 130 | + vrt_path = _write_two_band_per_band_nodata_vrt( |
| 131 | + tmp_path, |
| 132 | + band0_sentinel=65535, band1_sentinel=65000, |
| 133 | + band1_other=(7, 8, 9)) |
| 134 | + # Overwrite the band 1 source so the sentinel value 65000 is NOT |
| 135 | + # present in band 1's actual data. |
| 136 | + b1_no_sentinel = np.array([[7, 8], [9, 99]], dtype=np.uint16) |
| 137 | + import os |
| 138 | + p1 = os.path.join(os.path.dirname(vrt_path), 'vrt_b1_1611.tif') |
| 139 | + write(b1_no_sentinel, p1, nodata=65000, compression='none', |
| 140 | + tiled=False) |
| 141 | + |
| 142 | + r = read_vrt(vrt_path) |
| 143 | + assert r.dtype == np.float64, ( |
| 144 | + "Even when only band 0 has a present sentinel, the array still " |
| 145 | + "needs promotion so band 0's NaN can be expressed." |
| 146 | + ) |
| 147 | + assert np.isnan(r.values[1, 1, 0]) |
| 148 | + assert r.values[1, 1, 1] == 99.0 # band 1 sentinel absent, no NaN |
| 149 | + |
| 150 | + |
| 151 | +def test_multiband_no_sentinel_present_anywhere_keeps_int_dtype(tmp_path): |
| 152 | + """When no band actually contains its declared sentinel, skip |
| 153 | + promotion entirely. Avoids a needless float64 cast on integer data. |
| 154 | + """ |
| 155 | + vrt_path = _write_two_band_per_band_nodata_vrt( |
| 156 | + tmp_path, |
| 157 | + band0_sentinel=65535, band1_sentinel=65000, |
| 158 | + band0_other=(1, 2, 3), band1_other=(7, 8, 9)) |
| 159 | + # Replace BOTH source files so neither contains its sentinel |
| 160 | + import os |
| 161 | + b0 = np.array([[1, 2], [3, 4]], dtype=np.uint16) |
| 162 | + b1 = np.array([[7, 8], [9, 10]], dtype=np.uint16) |
| 163 | + p0 = os.path.join(os.path.dirname(vrt_path), 'vrt_b0_1611.tif') |
| 164 | + p1 = os.path.join(os.path.dirname(vrt_path), 'vrt_b1_1611.tif') |
| 165 | + write(b0, p0, nodata=65535, compression='none', tiled=False) |
| 166 | + write(b1, p1, nodata=65000, compression='none', tiled=False) |
| 167 | + |
| 168 | + r = read_vrt(vrt_path) |
| 169 | + # Sentinels not present -> integer dtype preserved (matches the |
| 170 | + # eager open_geotiff fast-path which also skips promotion when the |
| 171 | + # mask is empty). |
| 172 | + assert r.dtype == np.uint16 |
| 173 | + assert r.values[1, 1, 0] == 4 |
| 174 | + assert r.values[1, 1, 1] == 10 |
| 175 | + |
| 176 | + |
| 177 | +def test_multiband_per_band_out_of_range_sentinel_is_no_op(tmp_path): |
| 178 | + """A sentinel out of the integer dtype's range should be a no-op |
| 179 | + for that band rather than raising. Mirrors PR #1583's behaviour |
| 180 | + (#1581): the helper ``_int_nodata_in_range`` gates the cast. |
| 181 | + """ |
| 182 | + # uint16 cannot represent -9999; the helper should skip band 1. |
| 183 | + vrt_path = _write_two_band_per_band_nodata_vrt( |
| 184 | + tmp_path, |
| 185 | + dtype_str="UInt16", np_dtype=np.uint16, |
| 186 | + band0_sentinel=65535, band1_sentinel=10, # placeholder; rewrite below |
| 187 | + band0_other=(1, 2, 3), band1_other=(7, 8, 9)) |
| 188 | + |
| 189 | + # Hand-rewrite the VRT so band 1's NoDataValue is the out-of-range -9999. |
| 190 | + with open(vrt_path) as f: |
| 191 | + xml = f.read() |
| 192 | + xml = xml.replace("<NoDataValue>10</NoDataValue>", |
| 193 | + "<NoDataValue>-9999</NoDataValue>") |
| 194 | + with open(vrt_path, 'w') as f: |
| 195 | + f.write(xml) |
| 196 | + |
| 197 | + # Should not raise and should still mask band 0. |
| 198 | + r = read_vrt(vrt_path) |
| 199 | + assert np.isnan(r.values[1, 1, 0]) # band 0 sentinel still masked |
| 200 | + # Band 1's sentinel (-9999) is out of uint16 range; the value 10 |
| 201 | + # in band1[1,1] survives unchanged. |
| 202 | + assert r.values[1, 1, 1] == 10.0 or r.values[1, 1, 1] == 10 |
| 203 | + |
| 204 | + |
| 205 | +def test_multiband_band_kwarg_still_per_band_post_pr1602(tmp_path): |
| 206 | + """Non-regression check that PR #1602's band=N path still works. |
| 207 | +
|
| 208 | + The fix here only changes the ``band is None`` branch; ``band=N`` |
| 209 | + must still route through the single-band masking with its own |
| 210 | + sentinel. |
| 211 | + """ |
| 212 | + vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) |
| 213 | + r0 = read_vrt(vrt_path, band=0) |
| 214 | + r1 = read_vrt(vrt_path, band=1) |
| 215 | + assert r0.dtype == np.float64 |
| 216 | + assert r1.dtype == np.float64 |
| 217 | + assert r0.attrs.get('nodata') == 65535.0 |
| 218 | + assert r1.attrs.get('nodata') == 65000.0 |
| 219 | + assert np.isnan(r0.values[1, 1]) |
| 220 | + assert np.isnan(r1.values[1, 1]) |
| 221 | + |
| 222 | + |
| 223 | +def test_multiband_attrs_nodata_still_band0(tmp_path): |
| 224 | + """``attrs['nodata']`` for band=None reads is documented as band |
| 225 | + 0's sentinel (the canonical attr cannot encode per-band values). |
| 226 | + The pixel-level fix must not change that contract. |
| 227 | + """ |
| 228 | + vrt_path = _write_two_band_per_band_nodata_vrt(tmp_path) |
| 229 | + r = read_vrt(vrt_path) |
| 230 | + assert r.attrs.get('nodata') == 65535.0 |
0 commit comments