|
| 1 | +"""Streaming dask-write coverage for codecs not in the original #1084 matrix. |
| 2 | +
|
| 3 | +The original streaming-write test suite (``test_streaming_write.py``) |
| 4 | +covers ``none / deflate / lzw / zstd`` round-trips and the predictor flag. |
| 5 | +The LERC, LZ4, and packbits codecs route through the same |
| 6 | +``_compress_block`` helper as the eager writer but had no |
| 7 | +dask-streaming-write coverage before today. This file pins: |
| 8 | +
|
| 9 | +* Dask streaming write + LERC (lossless and lossy ``max_z_error``) |
| 10 | + produces identical output to the eager writer. |
| 11 | +* Dask streaming write + LZ4 round-trips a float32 raster. |
| 12 | +* Dask streaming write + packbits round-trips a uint8 raster (the only |
| 13 | + dtype packbits supports in this writer). |
| 14 | +* COG output with ``overview_resampling='cubic'`` round-trips through |
| 15 | + scipy.ndimage.zoom (the only overview method that takes a separate |
| 16 | + code path in ``_block_reduce_2d``). |
| 17 | +
|
| 18 | +Coverage-gap sweep 2026-05-11: closes the dask-streaming codec gap |
| 19 | +remaining after PR #1565. |
| 20 | +""" |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import numpy as np |
| 24 | +import pytest |
| 25 | +import xarray as xr |
| 26 | + |
| 27 | +pytest.importorskip("dask") |
| 28 | + |
| 29 | +from xrspatial.geotiff import open_geotiff, to_geotiff # noqa: E402 |
| 30 | +from xrspatial.geotiff._compression import ( # noqa: E402 |
| 31 | + LERC_AVAILABLE, |
| 32 | + LZ4_AVAILABLE, |
| 33 | +) |
| 34 | + |
| 35 | + |
| 36 | +# --------------------------------------------------------------------------- |
| 37 | +# Fixtures |
| 38 | +# --------------------------------------------------------------------------- |
| 39 | + |
| 40 | +@pytest.fixture |
| 41 | +def float_raster(): |
| 42 | + """200x200 float32 raster shaped to exceed a single chunk.""" |
| 43 | + rng = np.random.default_rng(20260511) |
| 44 | + arr = (rng.standard_normal((200, 200)) * 50.0).astype(np.float32) |
| 45 | + return xr.DataArray(arr, dims=['y', 'x']) |
| 46 | + |
| 47 | + |
| 48 | +@pytest.fixture |
| 49 | +def dask_float_raster(float_raster): |
| 50 | + return float_raster.chunk({'y': 64, 'x': 64}) |
| 51 | + |
| 52 | + |
| 53 | +@pytest.fixture |
| 54 | +def uint8_raster(): |
| 55 | + """200x200 uint8 raster. |
| 56 | +
|
| 57 | + packbits compresses runs of equal bytes, so we keep modest entropy |
| 58 | + rather than a uniform random fill -- the test still pins round-trip, |
| 59 | + not compression ratio. |
| 60 | + """ |
| 61 | + rng = np.random.default_rng(20260511) |
| 62 | + arr = (rng.integers(0, 16, size=(200, 200), dtype=np.uint8)) |
| 63 | + return xr.DataArray(arr, dims=['y', 'x']) |
| 64 | + |
| 65 | + |
| 66 | +@pytest.fixture |
| 67 | +def dask_uint8_raster(uint8_raster): |
| 68 | + return uint8_raster.chunk({'y': 64, 'x': 64}) |
| 69 | + |
| 70 | + |
| 71 | +# --------------------------------------------------------------------------- |
| 72 | +# LERC: lossless + lossy parity with eager writer |
| 73 | +# --------------------------------------------------------------------------- |
| 74 | + |
| 75 | +@pytest.mark.skipif(not LERC_AVAILABLE, reason="lerc not installed") |
| 76 | +class TestStreamingLerc: |
| 77 | + def test_lossless_round_trip(self, float_raster, dask_float_raster, |
| 78 | + tmp_path): |
| 79 | + """Dask + LERC (max_z_error=0) round-trips exactly.""" |
| 80 | + path = str(tmp_path / 'stream_lerc_lossless.tif') |
| 81 | + to_geotiff(dask_float_raster, path, compression='lerc') |
| 82 | + result = open_geotiff(path) |
| 83 | + # LERC with max_z_error=0 is lossless for float32 sources. |
| 84 | + np.testing.assert_array_equal(result.values, float_raster.values) |
| 85 | + |
| 86 | + def test_lossy_respects_max_z_error(self, float_raster, dask_float_raster, |
| 87 | + tmp_path): |
| 88 | + """Dask + LERC with non-zero max_z_error keeps every pixel within bound.""" |
| 89 | + max_z = 0.1 |
| 90 | + path = str(tmp_path / 'stream_lerc_lossy.tif') |
| 91 | + to_geotiff(dask_float_raster, path, |
| 92 | + compression='lerc', max_z_error=max_z) |
| 93 | + result = open_geotiff(path) |
| 94 | + max_diff = float(np.abs(result.values - float_raster.values).max()) |
| 95 | + assert max_diff <= max_z + 1e-7, ( |
| 96 | + f"LERC lossy stream write exceeded max_z_error budget: " |
| 97 | + f"{max_diff} > {max_z}") |
| 98 | + |
| 99 | + def test_streaming_matches_eager(self, float_raster, dask_float_raster, |
| 100 | + tmp_path): |
| 101 | + """Pixel-identical output between eager and streaming LERC writers. |
| 102 | +
|
| 103 | + This is the parity guarantee: both paths feed the same |
| 104 | + ``_compress_block`` with the same ``max_z_error``, so the |
| 105 | + encoded byte stream and decoded pixels should match exactly. |
| 106 | + """ |
| 107 | + eager_path = str(tmp_path / 'eager_lerc.tif') |
| 108 | + stream_path = str(tmp_path / 'stream_lerc.tif') |
| 109 | + to_geotiff(float_raster, eager_path, |
| 110 | + compression='lerc', max_z_error=0.05) |
| 111 | + to_geotiff(dask_float_raster, stream_path, |
| 112 | + compression='lerc', max_z_error=0.05) |
| 113 | + eager = open_geotiff(eager_path).values |
| 114 | + stream = open_geotiff(stream_path).values |
| 115 | + np.testing.assert_array_equal(eager, stream) |
| 116 | + |
| 117 | + |
| 118 | +# --------------------------------------------------------------------------- |
| 119 | +# LZ4: round-trip parity |
| 120 | +# --------------------------------------------------------------------------- |
| 121 | + |
| 122 | +@pytest.mark.skipif(not LZ4_AVAILABLE, reason="lz4 not installed") |
| 123 | +class TestStreamingLz4: |
| 124 | + def test_round_trip(self, float_raster, dask_float_raster, tmp_path): |
| 125 | + path = str(tmp_path / 'stream_lz4.tif') |
| 126 | + to_geotiff(dask_float_raster, path, compression='lz4') |
| 127 | + result = open_geotiff(path) |
| 128 | + np.testing.assert_array_equal(result.values, float_raster.values) |
| 129 | + |
| 130 | + def test_streaming_matches_eager(self, float_raster, dask_float_raster, |
| 131 | + tmp_path): |
| 132 | + eager_path = str(tmp_path / 'eager_lz4.tif') |
| 133 | + stream_path = str(tmp_path / 'stream_lz4.tif') |
| 134 | + to_geotiff(float_raster, eager_path, compression='lz4') |
| 135 | + to_geotiff(dask_float_raster, stream_path, compression='lz4') |
| 136 | + eager = open_geotiff(eager_path).values |
| 137 | + stream = open_geotiff(stream_path).values |
| 138 | + np.testing.assert_array_equal(eager, stream) |
| 139 | + |
| 140 | + |
| 141 | +# --------------------------------------------------------------------------- |
| 142 | +# packbits: round-trip on uint8 (the only supported dtype) |
| 143 | +# --------------------------------------------------------------------------- |
| 144 | + |
| 145 | +class TestStreamingPackbits: |
| 146 | + def test_round_trip_uint8(self, uint8_raster, dask_uint8_raster, tmp_path): |
| 147 | + path = str(tmp_path / 'stream_packbits.tif') |
| 148 | + to_geotiff(dask_uint8_raster, path, compression='packbits') |
| 149 | + result = open_geotiff(path) |
| 150 | + np.testing.assert_array_equal(result.values, uint8_raster.values) |
| 151 | + |
| 152 | + def test_streaming_matches_eager(self, uint8_raster, dask_uint8_raster, |
| 153 | + tmp_path): |
| 154 | + eager_path = str(tmp_path / 'eager_packbits.tif') |
| 155 | + stream_path = str(tmp_path / 'stream_packbits.tif') |
| 156 | + to_geotiff(uint8_raster, eager_path, compression='packbits') |
| 157 | + to_geotiff(dask_uint8_raster, stream_path, compression='packbits') |
| 158 | + eager = open_geotiff(eager_path).values |
| 159 | + stream = open_geotiff(stream_path).values |
| 160 | + np.testing.assert_array_equal(eager, stream) |
| 161 | + |
| 162 | + |
| 163 | +# --------------------------------------------------------------------------- |
| 164 | +# Cubic overview resampling (scipy code path) |
| 165 | +# --------------------------------------------------------------------------- |
| 166 | + |
| 167 | +class TestCubicOverview: |
| 168 | + """Pin the scipy-based cubic resampler in ``_block_reduce_2d``. |
| 169 | +
|
| 170 | + The other overview methods (mean / nearest / min / max / median / mode) |
| 171 | + have direct test coverage in ``test_cog.py``. The ``cubic`` branch |
| 172 | + only ran in production code; this test ensures the scipy import, |
| 173 | + zoom call, and dtype-preservation cast all work end-to-end. |
| 174 | + """ |
| 175 | + |
| 176 | + def test_cubic_overview_round_trip(self, tmp_path): |
| 177 | + pytest.importorskip("scipy") |
| 178 | + arr = np.arange(256, dtype=np.float32).reshape(16, 16) |
| 179 | + path = str(tmp_path / 'cubic_overview.tif') |
| 180 | + to_geotiff(arr, path, |
| 181 | + compression='deflate', |
| 182 | + tile_size=8, |
| 183 | + tiled=True, |
| 184 | + cog=True, |
| 185 | + overview_levels=[1], |
| 186 | + overview_resampling='cubic') |
| 187 | + |
| 188 | + # Full-resolution data is preserved exactly. |
| 189 | + full = open_geotiff(path) |
| 190 | + np.testing.assert_array_equal(full.values, arr) |
| 191 | + |
| 192 | + # Overview is half-size and same dtype (the cubic branch ends in |
| 193 | + # ``.astype(arr2d.dtype)``). |
| 194 | + ov = open_geotiff(path, overview_level=1) |
| 195 | + assert ov.shape == (8, 8) |
| 196 | + assert ov.dtype == arr.dtype |
| 197 | + |
| 198 | + # Cubic resampling values lie within the source range (no |
| 199 | + # ringing escape on a monotonic ramp). |
| 200 | + assert float(ov.values.min()) >= float(arr.min()) - 1.0 |
| 201 | + assert float(ov.values.max()) <= float(arr.max()) + 1.0 |
| 202 | + |
| 203 | + def test_cubic_distinct_from_mean(self, tmp_path): |
| 204 | + """Cubic and mean produce different overview pixels on a non-linear ramp. |
| 205 | +
|
| 206 | + Sanity-check that the cubic branch actually engages (the test |
| 207 | + would still pass on a perfectly linear ramp because both |
| 208 | + methods reduce to the same value, so use a quadratic). |
| 209 | + """ |
| 210 | + pytest.importorskip("scipy") |
| 211 | + x = np.arange(16, dtype=np.float32) |
| 212 | + y = x[:, None] |
| 213 | + arr = (x * x + y * y * 0.5).astype(np.float32) |
| 214 | + |
| 215 | + cubic_path = str(tmp_path / 'cubic_q.tif') |
| 216 | + mean_path = str(tmp_path / 'mean_q.tif') |
| 217 | + to_geotiff(arr, cubic_path, compression='deflate', tile_size=8, |
| 218 | + tiled=True, cog=True, overview_levels=[1], |
| 219 | + overview_resampling='cubic') |
| 220 | + to_geotiff(arr, mean_path, compression='deflate', tile_size=8, |
| 221 | + tiled=True, cog=True, overview_levels=[1], |
| 222 | + overview_resampling='mean') |
| 223 | + |
| 224 | + cubic_ov = open_geotiff(cubic_path, overview_level=1).values |
| 225 | + mean_ov = open_geotiff(mean_path, overview_level=1).values |
| 226 | + assert not np.array_equal(cubic_ov, mean_ov), ( |
| 227 | + "cubic and mean overview should differ on a non-linear ramp") |
0 commit comments