Skip to content

Commit 0e60724

Browse files
authored
Cover dask-streaming codec gap and cubic overview (geotiff test sweep 2026-05-11) (#1586)
* Cover dask-streaming codec gap and cubic overview Adds test_streaming_codecs_2026_05_11.py with 9 tests for the codec paths in the dask streaming writer and the scipy-backed cubic overview resampler that were not exercised in the existing test matrix: - TestStreamingLerc: lossless round-trip, lossy max_z_error budget, and eager-vs-streaming byte-for-byte parity. - TestStreamingLz4: round-trip and eager parity. - TestStreamingPackbits: uint8 round-trip and eager parity. - TestCubicOverview: scipy.ndimage.zoom code path exercised via COG write, including a quadratic-ramp test that distinguishes cubic from mean output. The eager writer covered these codecs already; the streaming path that fires for dask-backed inputs feeds the same _compress_block but had no test coverage for these codecs. Cubic overview routed through scipy was likewise untested. Updates .claude/sweep-test-coverage-state.csv for the 2026-05-11 test-coverage sweep run on geotiff. * Remove unused imports in streaming codec tests ruff flagged `os` and `dask.array as darr` as unused. Replace the bound `dask = pytest.importorskip("dask")` with a discarding call.
1 parent 6700332 commit 0e60724

2 files changed

Lines changed: 228 additions & 1 deletion

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module,last_inspected,issue,severity_max,categories_found,notes
2-
geotiff,2026-05-10,1565,HIGH,1;3;4,"PR #1565 closes deferred HIGH gaps: dask+planar multiband, dask+overview_level, predictor=2 int8 CPU+GPU, dask chunk-vs-tile misalignment"
2+
geotiff,2026-05-11,,MEDIUM,4,"Sweep 2 (2026-05-11): added test_streaming_codecs_2026_05_11.py covering dask streaming write + LERC (lossless/lossy/eager-parity), LZ4, packbits, and COG overview_resampling='cubic' (scipy code path). Prior HIGH gaps from PR #1565 remain closed (dask+planar multiband, dask+overview_level, predictor=2 int8 CPU+GPU, dask chunk-vs-tile misalignment). No HIGH gaps remain."
33
reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap."
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
"""Streaming dask-write coverage for codecs not in the original #1084 matrix.
2+
3+
The original streaming-write test suite (``test_streaming_write.py``)
4+
covers ``none / deflate / lzw / zstd`` round-trips and the predictor flag.
5+
The LERC, LZ4, and packbits codecs route through the same
6+
``_compress_block`` helper as the eager writer but had no
7+
dask-streaming-write coverage before today. This file pins:
8+
9+
* Dask streaming write + LERC (lossless and lossy ``max_z_error``)
10+
produces identical output to the eager writer.
11+
* Dask streaming write + LZ4 round-trips a float32 raster.
12+
* Dask streaming write + packbits round-trips a uint8 raster (the only
13+
dtype packbits supports in this writer).
14+
* COG output with ``overview_resampling='cubic'`` round-trips through
15+
scipy.ndimage.zoom (the only overview method that takes a separate
16+
code path in ``_block_reduce_2d``).
17+
18+
Coverage-gap sweep 2026-05-11: closes the dask-streaming codec gap
19+
remaining after PR #1565.
20+
"""
21+
from __future__ import annotations
22+
23+
import numpy as np
24+
import pytest
25+
import xarray as xr
26+
27+
pytest.importorskip("dask")
28+
29+
from xrspatial.geotiff import open_geotiff, to_geotiff # noqa: E402
30+
from xrspatial.geotiff._compression import ( # noqa: E402
31+
LERC_AVAILABLE,
32+
LZ4_AVAILABLE,
33+
)
34+
35+
36+
# ---------------------------------------------------------------------------
37+
# Fixtures
38+
# ---------------------------------------------------------------------------
39+
40+
@pytest.fixture
41+
def float_raster():
42+
"""200x200 float32 raster shaped to exceed a single chunk."""
43+
rng = np.random.default_rng(20260511)
44+
arr = (rng.standard_normal((200, 200)) * 50.0).astype(np.float32)
45+
return xr.DataArray(arr, dims=['y', 'x'])
46+
47+
48+
@pytest.fixture
49+
def dask_float_raster(float_raster):
50+
return float_raster.chunk({'y': 64, 'x': 64})
51+
52+
53+
@pytest.fixture
54+
def uint8_raster():
55+
"""200x200 uint8 raster.
56+
57+
packbits compresses runs of equal bytes, so we keep modest entropy
58+
rather than a uniform random fill -- the test still pins round-trip,
59+
not compression ratio.
60+
"""
61+
rng = np.random.default_rng(20260511)
62+
arr = (rng.integers(0, 16, size=(200, 200), dtype=np.uint8))
63+
return xr.DataArray(arr, dims=['y', 'x'])
64+
65+
66+
@pytest.fixture
67+
def dask_uint8_raster(uint8_raster):
68+
return uint8_raster.chunk({'y': 64, 'x': 64})
69+
70+
71+
# ---------------------------------------------------------------------------
72+
# LERC: lossless + lossy parity with eager writer
73+
# ---------------------------------------------------------------------------
74+
75+
@pytest.mark.skipif(not LERC_AVAILABLE, reason="lerc not installed")
76+
class TestStreamingLerc:
77+
def test_lossless_round_trip(self, float_raster, dask_float_raster,
78+
tmp_path):
79+
"""Dask + LERC (max_z_error=0) round-trips exactly."""
80+
path = str(tmp_path / 'stream_lerc_lossless.tif')
81+
to_geotiff(dask_float_raster, path, compression='lerc')
82+
result = open_geotiff(path)
83+
# LERC with max_z_error=0 is lossless for float32 sources.
84+
np.testing.assert_array_equal(result.values, float_raster.values)
85+
86+
def test_lossy_respects_max_z_error(self, float_raster, dask_float_raster,
87+
tmp_path):
88+
"""Dask + LERC with non-zero max_z_error keeps every pixel within bound."""
89+
max_z = 0.1
90+
path = str(tmp_path / 'stream_lerc_lossy.tif')
91+
to_geotiff(dask_float_raster, path,
92+
compression='lerc', max_z_error=max_z)
93+
result = open_geotiff(path)
94+
max_diff = float(np.abs(result.values - float_raster.values).max())
95+
assert max_diff <= max_z + 1e-7, (
96+
f"LERC lossy stream write exceeded max_z_error budget: "
97+
f"{max_diff} > {max_z}")
98+
99+
def test_streaming_matches_eager(self, float_raster, dask_float_raster,
100+
tmp_path):
101+
"""Pixel-identical output between eager and streaming LERC writers.
102+
103+
This is the parity guarantee: both paths feed the same
104+
``_compress_block`` with the same ``max_z_error``, so the
105+
encoded byte stream and decoded pixels should match exactly.
106+
"""
107+
eager_path = str(tmp_path / 'eager_lerc.tif')
108+
stream_path = str(tmp_path / 'stream_lerc.tif')
109+
to_geotiff(float_raster, eager_path,
110+
compression='lerc', max_z_error=0.05)
111+
to_geotiff(dask_float_raster, stream_path,
112+
compression='lerc', max_z_error=0.05)
113+
eager = open_geotiff(eager_path).values
114+
stream = open_geotiff(stream_path).values
115+
np.testing.assert_array_equal(eager, stream)
116+
117+
118+
# ---------------------------------------------------------------------------
119+
# LZ4: round-trip parity
120+
# ---------------------------------------------------------------------------
121+
122+
@pytest.mark.skipif(not LZ4_AVAILABLE, reason="lz4 not installed")
123+
class TestStreamingLz4:
124+
def test_round_trip(self, float_raster, dask_float_raster, tmp_path):
125+
path = str(tmp_path / 'stream_lz4.tif')
126+
to_geotiff(dask_float_raster, path, compression='lz4')
127+
result = open_geotiff(path)
128+
np.testing.assert_array_equal(result.values, float_raster.values)
129+
130+
def test_streaming_matches_eager(self, float_raster, dask_float_raster,
131+
tmp_path):
132+
eager_path = str(tmp_path / 'eager_lz4.tif')
133+
stream_path = str(tmp_path / 'stream_lz4.tif')
134+
to_geotiff(float_raster, eager_path, compression='lz4')
135+
to_geotiff(dask_float_raster, stream_path, compression='lz4')
136+
eager = open_geotiff(eager_path).values
137+
stream = open_geotiff(stream_path).values
138+
np.testing.assert_array_equal(eager, stream)
139+
140+
141+
# ---------------------------------------------------------------------------
142+
# packbits: round-trip on uint8 (the only supported dtype)
143+
# ---------------------------------------------------------------------------
144+
145+
class TestStreamingPackbits:
146+
def test_round_trip_uint8(self, uint8_raster, dask_uint8_raster, tmp_path):
147+
path = str(tmp_path / 'stream_packbits.tif')
148+
to_geotiff(dask_uint8_raster, path, compression='packbits')
149+
result = open_geotiff(path)
150+
np.testing.assert_array_equal(result.values, uint8_raster.values)
151+
152+
def test_streaming_matches_eager(self, uint8_raster, dask_uint8_raster,
153+
tmp_path):
154+
eager_path = str(tmp_path / 'eager_packbits.tif')
155+
stream_path = str(tmp_path / 'stream_packbits.tif')
156+
to_geotiff(uint8_raster, eager_path, compression='packbits')
157+
to_geotiff(dask_uint8_raster, stream_path, compression='packbits')
158+
eager = open_geotiff(eager_path).values
159+
stream = open_geotiff(stream_path).values
160+
np.testing.assert_array_equal(eager, stream)
161+
162+
163+
# ---------------------------------------------------------------------------
164+
# Cubic overview resampling (scipy code path)
165+
# ---------------------------------------------------------------------------
166+
167+
class TestCubicOverview:
168+
"""Pin the scipy-based cubic resampler in ``_block_reduce_2d``.
169+
170+
The other overview methods (mean / nearest / min / max / median / mode)
171+
have direct test coverage in ``test_cog.py``. The ``cubic`` branch
172+
only ran in production code; this test ensures the scipy import,
173+
zoom call, and dtype-preservation cast all work end-to-end.
174+
"""
175+
176+
def test_cubic_overview_round_trip(self, tmp_path):
177+
pytest.importorskip("scipy")
178+
arr = np.arange(256, dtype=np.float32).reshape(16, 16)
179+
path = str(tmp_path / 'cubic_overview.tif')
180+
to_geotiff(arr, path,
181+
compression='deflate',
182+
tile_size=8,
183+
tiled=True,
184+
cog=True,
185+
overview_levels=[1],
186+
overview_resampling='cubic')
187+
188+
# Full-resolution data is preserved exactly.
189+
full = open_geotiff(path)
190+
np.testing.assert_array_equal(full.values, arr)
191+
192+
# Overview is half-size and same dtype (the cubic branch ends in
193+
# ``.astype(arr2d.dtype)``).
194+
ov = open_geotiff(path, overview_level=1)
195+
assert ov.shape == (8, 8)
196+
assert ov.dtype == arr.dtype
197+
198+
# Cubic resampling values lie within the source range (no
199+
# ringing escape on a monotonic ramp).
200+
assert float(ov.values.min()) >= float(arr.min()) - 1.0
201+
assert float(ov.values.max()) <= float(arr.max()) + 1.0
202+
203+
def test_cubic_distinct_from_mean(self, tmp_path):
204+
"""Cubic and mean produce different overview pixels on a non-linear ramp.
205+
206+
Sanity-check that the cubic branch actually engages (the test
207+
would still pass on a perfectly linear ramp because both
208+
methods reduce to the same value, so use a quadratic).
209+
"""
210+
pytest.importorskip("scipy")
211+
x = np.arange(16, dtype=np.float32)
212+
y = x[:, None]
213+
arr = (x * x + y * y * 0.5).astype(np.float32)
214+
215+
cubic_path = str(tmp_path / 'cubic_q.tif')
216+
mean_path = str(tmp_path / 'mean_q.tif')
217+
to_geotiff(arr, cubic_path, compression='deflate', tile_size=8,
218+
tiled=True, cog=True, overview_levels=[1],
219+
overview_resampling='cubic')
220+
to_geotiff(arr, mean_path, compression='deflate', tile_size=8,
221+
tiled=True, cog=True, overview_levels=[1],
222+
overview_resampling='mean')
223+
224+
cubic_ov = open_geotiff(cubic_path, overview_level=1).values
225+
mean_ov = open_geotiff(mean_path, overview_level=1).values
226+
assert not np.array_equal(cubic_ov, mean_ov), (
227+
"cubic and mean overview should differ on a non-linear ramp")

0 commit comments

Comments
 (0)