|
| 1 | +"""Regression tests for issue #1616. |
| 2 | +
|
| 3 | +A VRT whose ``<VRTRasterBand dataType="Float32">`` (or Float64) is fed |
| 4 | +by an integer source GeoTIFF with an in-range ``GDAL_NODATA`` sentinel |
| 5 | +used to leak the sentinel value through as a literal float in the |
| 6 | +returned array. ``attrs['nodata']`` advertised the sentinel but the |
| 7 | +pixels at the sentinel positions still held the integer value cast to |
| 8 | +float (e.g. ``65535.0`` rather than ``np.nan``). NaN-aware downstream |
| 9 | +code therefore saw the sentinel as valid data. |
| 10 | +
|
| 11 | +The fix masks integer source arrays before they're placed into a float |
| 12 | +``result`` buffer in ``_vrt.read_vrt`` so a float-dtype VRT result lands |
| 13 | +with NaN at the sentinel pixels, matching what ``open_geotiff`` returns |
| 14 | +for a single-file integer raster with the same sentinel. |
| 15 | +""" |
| 16 | +from __future__ import annotations |
| 17 | + |
| 18 | +import numpy as np |
| 19 | + |
| 20 | +from xrspatial.geotiff import read_vrt |
| 21 | +from xrspatial.geotiff._writer import write |
| 22 | + |
| 23 | + |
| 24 | +def _write_uint16_with_sentinel(tmp_path, sentinel=65535, filename='b0.tif'): |
| 25 | + band = np.array([[1, 2], [3, sentinel]], dtype=np.uint16) |
| 26 | + p = str(tmp_path / filename) |
| 27 | + write(band, p, nodata=sentinel, compression='none', tiled=False) |
| 28 | + return p |
| 29 | + |
| 30 | + |
| 31 | +def _write_int16_with_sentinel(tmp_path, sentinel=-1, filename='b0.tif'): |
| 32 | + band = np.array([[1, 2], [3, sentinel]], dtype=np.int16) |
| 33 | + p = str(tmp_path / filename) |
| 34 | + write(band, p, nodata=sentinel, compression='none', tiled=False) |
| 35 | + return p |
| 36 | + |
| 37 | + |
| 38 | +def _build_vrt(tmp_path, source_path, vrt_dtype, nodata_value, |
| 39 | + filename='mismatch.vrt'): |
| 40 | + """Hand-roll a VRT with the requested dataType / NoDataValue pair.""" |
| 41 | + vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2"> |
| 42 | + <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform> |
| 43 | + <VRTRasterBand dataType="{vrt_dtype}" band="1"> |
| 44 | + <NoDataValue>{nodata_value}</NoDataValue> |
| 45 | + <SimpleSource> |
| 46 | + <SourceFilename relativeToVRT="0">{source_path}</SourceFilename> |
| 47 | + <SourceBand>1</SourceBand> |
| 48 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 49 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 50 | + </SimpleSource> |
| 51 | + </VRTRasterBand> |
| 52 | +</VRTDataset>""" |
| 53 | + p = str(tmp_path / filename) |
| 54 | + with open(p, 'w') as f: |
| 55 | + f.write(vrt_xml) |
| 56 | + return p |
| 57 | + |
| 58 | + |
| 59 | +def test_float32_vrt_uint16_source_masks_in_range_sentinel(tmp_path): |
| 60 | + """Float32 VRT, uint16 source with in-range sentinel: pixel becomes NaN. |
| 61 | +
|
| 62 | + Before the fix this returned dtype=float32 with values[1, 1] == 65535.0 |
| 63 | + while ``attrs['nodata']`` advertised the sentinel. |
| 64 | + """ |
| 65 | + src = _write_uint16_with_sentinel(tmp_path) |
| 66 | + vrt = _build_vrt(tmp_path, src, 'Float32', 65535) |
| 67 | + r = read_vrt(vrt) |
| 68 | + assert r.dtype == np.float32, ( |
| 69 | + f"Float32-declared VRT should return float32, got {r.dtype}") |
| 70 | + assert np.isnan(r.values[1, 1]), ( |
| 71 | + "Sentinel pixel (uint16 65535 -> float32) should be NaN-masked; " |
| 72 | + f"got values[1, 1]={r.values[1, 1]}") |
| 73 | + assert r.attrs.get('nodata') == 65535.0 |
| 74 | + assert r.values[0, 0] == 1.0 |
| 75 | + |
| 76 | + |
| 77 | +def test_float64_vrt_int16_source_masks_negative_sentinel(tmp_path): |
| 78 | + """Float64 VRT, int16 source with negative sentinel: pixel becomes NaN.""" |
| 79 | + src = _write_int16_with_sentinel(tmp_path, sentinel=-1) |
| 80 | + vrt = _build_vrt(tmp_path, src, 'Float64', -1) |
| 81 | + r = read_vrt(vrt) |
| 82 | + assert r.dtype == np.float64 |
| 83 | + assert np.isnan(r.values[1, 1]), ( |
| 84 | + f"Sentinel pixel (-1) should be NaN-masked; " |
| 85 | + f"got values[1, 1]={r.values[1, 1]}") |
| 86 | + assert r.attrs.get('nodata') == -1.0 |
| 87 | + |
| 88 | + |
| 89 | +def test_float32_vrt_out_of_range_sentinel_is_noop(tmp_path): |
| 90 | + """An out-of-range sentinel (e.g. uint16 source + NoDataValue=-9999) |
| 91 | + stays unmasked rather than raising ``OverflowError`` from the |
| 92 | + ``uint16(-9999)`` cast. The pixel data is returned as-is and |
| 93 | + ``attrs['nodata']`` still surfaces the declared sentinel so callers |
| 94 | + can mask in user code or write through. |
| 95 | + """ |
| 96 | + arr = np.array([[1, 2], [3, 4]], dtype=np.uint16) |
| 97 | + p = str(tmp_path / 'b0_no_nodata.tif') |
| 98 | + write(arr, p, compression='none', tiled=False) # no GDAL_NODATA on file |
| 99 | + vrt = _build_vrt(tmp_path, p, 'Float32', -9999) |
| 100 | + r = read_vrt(vrt) |
| 101 | + assert r.dtype == np.float32 |
| 102 | + # No pixels match the (out-of-range) sentinel, so nothing was masked. |
| 103 | + assert not np.isnan(r.values).any() |
| 104 | + assert r.attrs.get('nodata') == -9999.0 |
| 105 | + |
| 106 | + |
| 107 | +def test_float32_vrt_uint16_source_no_sentinel_pixels(tmp_path): |
| 108 | + """Float32 VRT, uint16 source whose pixels do not match the sentinel: |
| 109 | + the result is a clean float array with no NaNs introduced. |
| 110 | +
|
| 111 | + This exercises the early-out path inside the new mask branch -- a |
| 112 | + declared sentinel that matches no pixels must not perturb the data |
| 113 | + or cause an extra copy that would surface as a different dtype. |
| 114 | + """ |
| 115 | + arr = np.array([[1, 2], [3, 4]], dtype=np.uint16) |
| 116 | + p = str(tmp_path / 'b0_clean.tif') |
| 117 | + write(arr, p, compression='none', tiled=False) |
| 118 | + vrt = _build_vrt(tmp_path, p, 'Float32', 65535) |
| 119 | + r = read_vrt(vrt) |
| 120 | + assert r.dtype == np.float32 |
| 121 | + assert not np.isnan(r.values).any() |
| 122 | + np.testing.assert_array_equal(r.values, arr.astype(np.float32)) |
| 123 | + |
| 124 | + |
| 125 | +def test_float_vrt_int_source_dask_path_masks_sentinel(tmp_path): |
| 126 | + """The dask wrapper path (``chunks=...``) also returns NaN at the |
| 127 | + sentinel pixel. The dask reader chunks the eager result after decode, |
| 128 | + so the bug propagates if the eager path leaks the sentinel. |
| 129 | + """ |
| 130 | + src = _write_uint16_with_sentinel(tmp_path) |
| 131 | + vrt = _build_vrt(tmp_path, src, 'Float32', 65535) |
| 132 | + r = read_vrt(vrt, chunks=2) |
| 133 | + # Dask path keeps the float32 dtype declared by the VRT. |
| 134 | + assert r.dtype == np.float32 |
| 135 | + val = r.values |
| 136 | + assert np.isnan(val[1, 1]) |
| 137 | + |
| 138 | + |
| 139 | +def test_float_vrt_int_source_round_trip_nodata_attr(tmp_path): |
| 140 | + """Even though the masking promotes pixels to NaN, the |
| 141 | + ``attrs['nodata']`` value still carries the original sentinel so a |
| 142 | + downstream write can restore the literal sentinel byte pattern. |
| 143 | + """ |
| 144 | + src = _write_uint16_with_sentinel(tmp_path) |
| 145 | + vrt = _build_vrt(tmp_path, src, 'Float32', 65535) |
| 146 | + r = read_vrt(vrt) |
| 147 | + assert r.attrs.get('nodata') == 65535.0 |
| 148 | + |
| 149 | + |
| 150 | +def test_float_vrt_int_source_with_band_select(tmp_path): |
| 151 | + """The band=N selection path also masks integer sentinels for a |
| 152 | + float-declared VRT. The per-band ``NoDataValue`` from the VRT XML |
| 153 | + must reach the source-side masking step, not just ``attrs['nodata']``. |
| 154 | + """ |
| 155 | + src_a = _write_uint16_with_sentinel(tmp_path, sentinel=65535, |
| 156 | + filename='ba.tif') |
| 157 | + src_b = _write_uint16_with_sentinel(tmp_path, sentinel=65000, |
| 158 | + filename='bb.tif') |
| 159 | + vrt_xml = f"""<VRTDataset rasterXSize="2" rasterYSize="2"> |
| 160 | + <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform> |
| 161 | + <VRTRasterBand dataType="Float32" band="1"> |
| 162 | + <NoDataValue>65535</NoDataValue> |
| 163 | + <SimpleSource> |
| 164 | + <SourceFilename relativeToVRT="0">{src_a}</SourceFilename> |
| 165 | + <SourceBand>1</SourceBand> |
| 166 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 167 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 168 | + </SimpleSource> |
| 169 | + </VRTRasterBand> |
| 170 | + <VRTRasterBand dataType="Float32" band="2"> |
| 171 | + <NoDataValue>65000</NoDataValue> |
| 172 | + <SimpleSource> |
| 173 | + <SourceFilename relativeToVRT="0">{src_b}</SourceFilename> |
| 174 | + <SourceBand>1</SourceBand> |
| 175 | + <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 176 | + <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/> |
| 177 | + </SimpleSource> |
| 178 | + </VRTRasterBand> |
| 179 | +</VRTDataset>""" |
| 180 | + vrt_path = str(tmp_path / 'mb.vrt') |
| 181 | + with open(vrt_path, 'w') as f: |
| 182 | + f.write(vrt_xml) |
| 183 | + |
| 184 | + # band 0 -> 65535 sentinel masked |
| 185 | + r0 = read_vrt(vrt_path, band=0) |
| 186 | + assert r0.dtype == np.float32 |
| 187 | + assert np.isnan(r0.values[1, 1]) |
| 188 | + assert r0.attrs.get('nodata') == 65535.0 |
| 189 | + |
| 190 | + # band 1 -> 65000 sentinel masked, not 65535 |
| 191 | + r1 = read_vrt(vrt_path, band=1) |
| 192 | + assert r1.dtype == np.float32 |
| 193 | + # band b had its sentinel at the same [1, 1] cell |
| 194 | + assert np.isnan(r1.values[1, 1]) |
| 195 | + assert r1.attrs.get('nodata') == 65000.0 |
0 commit comments