|
| 1 | +"""Parameter coverage for ``compression_level=`` with ``compression='lz4'``. |
| 2 | +
|
| 3 | +The level-validation map in ``xrspatial.geotiff.__init__`` advertises a |
| 4 | +``(0, 16)`` valid range for ``lz4``, but only the ``deflate`` ``(1, 9)`` |
| 5 | +and ``zstd`` ``(1, 22)`` ranges had direct round-trip + boundary-error |
| 6 | +tests under ``test_compression_level.py``. ``lz4`` shares the same |
| 7 | +range-validation call site (the dispatcher's eager numpy path, the dask |
| 8 | +streaming path, and ``_write_vrt_tiled`` all share ``_LEVEL_RANGES``), |
| 9 | +so a regression that drops ``lz4`` from the table -- or shifts the |
| 10 | +range bounds -- would only surface against user code. |
| 11 | +
|
| 12 | +This module pins: |
| 13 | +
|
| 14 | +* Round-trip integrity at the boundary levels ``0`` and ``16``. |
| 15 | +* Round-trip integrity at the documented default (``compression_level=None``) |
| 16 | + via the public ``to_geotiff`` API. The default uses the ``lz4_compress`` |
| 17 | + signature default (``level=0``), so the no-arg path must still produce a |
| 18 | + decodable file. |
| 19 | +* ValueError on out-of-range levels (``-1`` and ``17``) across both the |
| 20 | + eager (numpy) path and the dask streaming path. |
| 21 | +* Tile-row segmentation for dask-streaming inputs: a low-level lz4 file |
| 22 | + and a high-level lz4 file built from the same input both decode to |
| 23 | + the original values bit-exact (lz4 is lossless across its level range). |
| 24 | +
|
| 25 | +Cat 4 MEDIUM: parameter coverage gap on numeric parameter with multiple |
| 26 | +values where only the default was tested. |
| 27 | +""" |
| 28 | +from __future__ import annotations |
| 29 | + |
| 30 | +import importlib.util |
| 31 | +import os |
| 32 | + |
| 33 | +import numpy as np |
| 34 | +import pytest |
| 35 | +import xarray as xr |
| 36 | + |
| 37 | +from xrspatial.geotiff import open_geotiff, to_geotiff |
| 38 | + |
| 39 | + |
| 40 | +_HAS_LZ4 = importlib.util.find_spec("lz4") is not None |
| 41 | +_HAS_DASK = importlib.util.find_spec("dask") is not None |
| 42 | + |
| 43 | +pytestmark = pytest.mark.skipif(not _HAS_LZ4, reason="lz4 package required") |
| 44 | + |
| 45 | + |
| 46 | +def _make_da(seed: int = 0, shape: tuple = (64, 64)) -> xr.DataArray: |
| 47 | + """Return a small float32 DataArray with reproducible content.""" |
| 48 | + rng = np.random.default_rng(seed) |
| 49 | + arr = rng.standard_normal(shape).astype(np.float32) |
| 50 | + return xr.DataArray(arr, dims=["y", "x"]) |
| 51 | + |
| 52 | + |
| 53 | +def _make_compressible(shape: tuple = (128, 128)) -> xr.DataArray: |
| 54 | + """Smooth gradient + small noise; high spatial coherence so level |
| 55 | + differences actually move the needle on compressed size.""" |
| 56 | + rng = np.random.default_rng(42) |
| 57 | + y, x = np.mgrid[0: shape[0], 0: shape[1]] |
| 58 | + arr = ((y + x).astype(np.float32) |
| 59 | + + rng.standard_normal(shape).astype(np.float32) * 0.01) |
| 60 | + return xr.DataArray(arr, dims=["y", "x"]) |
| 61 | + |
| 62 | + |
| 63 | +# --------------------------------------------------------------------------- |
| 64 | +# Round-trip integrity across the documented level range |
| 65 | +# --------------------------------------------------------------------------- |
| 66 | + |
| 67 | + |
| 68 | +class TestLZ4LevelRoundTrip: |
| 69 | + """Round-trips at the boundaries of the documented ``lz4`` range.""" |
| 70 | + |
| 71 | + @pytest.mark.parametrize("level", [0, 1, 9, 16]) |
| 72 | + def test_lz4_level_round_trip(self, level, tmp_path): |
| 73 | + """Every documented level produces a decodable file with exact |
| 74 | + pixel fidelity (lz4 is lossless).""" |
| 75 | + da = _make_da(seed=level) |
| 76 | + path = str(tmp_path / f"lz4_level_{level}.tif") |
| 77 | + to_geotiff(da, path, compression="lz4", |
| 78 | + compression_level=level) |
| 79 | + result = open_geotiff(path) |
| 80 | + # lz4 is lossless: assert_array_equal, not assert_allclose. |
| 81 | + np.testing.assert_array_equal(result.values, da.values) |
| 82 | + |
| 83 | + def test_lz4_default_level_round_trip(self, tmp_path): |
| 84 | + """``compression_level=None`` falls through to ``lz4_compress``'s |
| 85 | + default (``level=0``). Pin the no-arg path so a future signature |
| 86 | + change is caught.""" |
| 87 | + da = _make_da(seed=99) |
| 88 | + path = str(tmp_path / "lz4_default.tif") |
| 89 | + to_geotiff(da, path, compression="lz4") |
| 90 | + result = open_geotiff(path) |
| 91 | + np.testing.assert_array_equal(result.values, da.values) |
| 92 | + |
| 93 | + |
| 94 | +# --------------------------------------------------------------------------- |
| 95 | +# Higher level should not produce a larger file on compressible input |
| 96 | +# --------------------------------------------------------------------------- |
| 97 | + |
| 98 | + |
| 99 | +class TestLZ4LevelSizeEffect: |
| 100 | + """Higher ``compression_level`` yields the same or fewer bytes for |
| 101 | + a compressible input. lz4 supports level 0 (fast) through 16 (HC); |
| 102 | + levels above 0 invoke the high-compression mode.""" |
| 103 | + |
| 104 | + def test_lz4_higher_level_not_larger(self, tmp_path): |
| 105 | + da = _make_compressible() |
| 106 | + path_lo = str(tmp_path / "lz4_lo.tif") |
| 107 | + path_hi = str(tmp_path / "lz4_hi.tif") |
| 108 | + to_geotiff(da, path_lo, compression="lz4", compression_level=0) |
| 109 | + to_geotiff(da, path_hi, compression="lz4", compression_level=16) |
| 110 | + size_lo = os.path.getsize(path_lo) |
| 111 | + size_hi = os.path.getsize(path_hi) |
| 112 | + # Allow equality: very small or already-compressed payloads can |
| 113 | + # land at the same byte count. The contract is "no worse". |
| 114 | + assert size_hi <= size_lo, ( |
| 115 | + f"Expected level-16 file ({size_hi}) <= level-0 file ({size_lo})") |
| 116 | + |
| 117 | + |
| 118 | +# --------------------------------------------------------------------------- |
| 119 | +# Out-of-range level rejection (eager path) |
| 120 | +# --------------------------------------------------------------------------- |
| 121 | + |
| 122 | + |
| 123 | +class TestLZ4LevelOutOfRange: |
| 124 | + """The ``_LEVEL_RANGES`` table advertises ``lz4: (0, 16)``. Pin the |
| 125 | + rejection path so a future range change does not silently widen the |
| 126 | + accepted band.""" |
| 127 | + |
| 128 | + @pytest.mark.parametrize("level", [-1, -10, 17, 100]) |
| 129 | + def test_lz4_out_of_range_level_raises_eager(self, level, tmp_path): |
| 130 | + """Out-of-range level on the numpy/eager path raises with the |
| 131 | + same error message format as deflate/zstd.""" |
| 132 | + da = _make_da() |
| 133 | + path = str(tmp_path / "lz4_bad.tif") |
| 134 | + with pytest.raises(ValueError, match="compression_level"): |
| 135 | + to_geotiff(da, path, compression="lz4", |
| 136 | + compression_level=level) |
| 137 | + |
| 138 | + def test_lz4_out_of_range_message_includes_range(self, tmp_path): |
| 139 | + """Error message advertises the valid (0, 16) range so callers |
| 140 | + know the bound. Mirrors ``test_compression_level`` for zstd.""" |
| 141 | + da = _make_da() |
| 142 | + path = str(tmp_path / "lz4_bad.tif") |
| 143 | + with pytest.raises(ValueError, match=r"lz4.*\(valid:\s*0-16\)"): |
| 144 | + to_geotiff(da, path, compression="lz4", |
| 145 | + compression_level=999) |
| 146 | + |
| 147 | + |
| 148 | +# --------------------------------------------------------------------------- |
| 149 | +# Dask streaming path level handling |
| 150 | +# --------------------------------------------------------------------------- |
| 151 | + |
| 152 | + |
| 153 | +@pytest.mark.skipif(not _HAS_DASK, reason="dask package required") |
| 154 | +class TestLZ4LevelDaskStreaming: |
| 155 | + """The dask streaming branch (``hasattr(raw, 'dask') and not cog``) has |
| 156 | + its own ``_LEVEL_RANGES`` check at a separate call site. Cover both |
| 157 | + accept and reject branches there.""" |
| 158 | + |
| 159 | + def _make_dask_da(self, shape=(64, 64), chunks=(16, 16)): |
| 160 | + import dask.array as da_mod |
| 161 | + rng = np.random.default_rng(7) |
| 162 | + arr = rng.standard_normal(shape).astype(np.float32) |
| 163 | + return xr.DataArray( |
| 164 | + da_mod.from_array(arr, chunks=chunks), |
| 165 | + dims=["y", "x"], |
| 166 | + ), arr |
| 167 | + |
| 168 | + @pytest.mark.parametrize("level", [0, 1, 8, 16]) |
| 169 | + def test_lz4_dask_streaming_level_round_trip(self, level, tmp_path): |
| 170 | + dask_da, np_arr = self._make_dask_da() |
| 171 | + path = str(tmp_path / f"lz4_dask_level_{level}.tif") |
| 172 | + to_geotiff(dask_da, path, compression="lz4", |
| 173 | + compression_level=level, tile_size=16) |
| 174 | + result = open_geotiff(path) |
| 175 | + np.testing.assert_array_equal(result.values, np_arr) |
| 176 | + |
| 177 | + @pytest.mark.parametrize("level", [-1, 17, 50]) |
| 178 | + def test_lz4_dask_streaming_out_of_range_raises(self, level, tmp_path): |
| 179 | + dask_da, _ = self._make_dask_da() |
| 180 | + path = str(tmp_path / "lz4_dask_bad.tif") |
| 181 | + with pytest.raises(ValueError, match="compression_level"): |
| 182 | + to_geotiff(dask_da, path, compression="lz4", |
| 183 | + compression_level=level, tile_size=16) |
0 commit comments