|
| 1 | +"""Regression test for issue #1736. |
| 2 | +
|
| 3 | +The uncompressed tiled branch of ``xrspatial.geotiff._writer._write_tiled`` |
| 4 | +previously allocated a contiguous ``bytearray`` plus a memoryview |
| 5 | +``(n_tiles * tw * th * bytes_per_sample * samples)`` bytes long at the |
| 6 | +top of the loop and never read either back. Tile bytes were still |
| 7 | +built via ``tile_arr.tobytes()`` and appended to a list. The dead |
| 8 | +buffer roughly doubled peak memory for an uncompressed write. |
| 9 | +
|
| 10 | +The fix is a pure deletion. The tests below cover both behaviours |
| 11 | +worth pinning: |
| 12 | +
|
| 13 | +* round-trip fidelity (writing an uncompressed tiled GeoTIFF must |
| 14 | + still read back identically with no holes between tiles); and |
| 15 | +* peak-memory shape, by snapshotting ``tracemalloc`` peak across a |
| 16 | + direct ``_write_tiled`` call. The current implementation lands at |
| 17 | + roughly ``1.06x`` the raw raster size; the dead bytearray pushed it |
| 18 | + to ``~2.07x``. The threshold below (``1.5x``) catches any |
| 19 | + reintroduction of that allocation with comfortable headroom for |
| 20 | + unrelated implementation changes. |
| 21 | +""" |
| 22 | +from __future__ import annotations |
| 23 | + |
| 24 | +import os |
| 25 | +import tracemalloc |
| 26 | +import uuid |
| 27 | + |
| 28 | +import numpy as np |
| 29 | +import xarray as xr |
| 30 | + |
| 31 | +from xrspatial.geotiff import to_geotiff, open_geotiff |
| 32 | +from xrspatial.geotiff._compression import COMPRESSION_NONE |
| 33 | +from xrspatial.geotiff._writer import _write_tiled |
| 34 | + |
| 35 | + |
| 36 | +# Peak ``tracemalloc`` size, in multiples of the input raster size, that |
| 37 | +# the uncompressed branch of ``_write_tiled`` must stay under. The dead |
| 38 | +# bytearray drove peak to ~2.07x; the current implementation sits at |
| 39 | +# ~1.06-1.12x across the cases below. 1.5x leaves room for unrelated |
| 40 | +# refactors while still firmly catching the regression. |
| 41 | +_PEAK_RATIO_LIMIT = 1.5 |
| 42 | + |
| 43 | + |
| 44 | +def test_uncompressed_tiled_round_trip_exact(tmp_path): |
| 45 | + rng = np.random.RandomState(20260512) |
| 46 | + h, w = 96, 144 |
| 47 | + data = rng.randint(0, 200, size=(h, w)).astype(np.uint8) |
| 48 | + da = xr.DataArray(data, dims=['y', 'x']) |
| 49 | + |
| 50 | + p = str(tmp_path / f"tmp_1736_uncomp_{uuid.uuid4().hex[:8]}.tif") |
| 51 | + to_geotiff(da, p, tiled=True, tile_size=32, compression='none') |
| 52 | + assert os.path.exists(p) |
| 53 | + |
| 54 | + out = open_geotiff(p) |
| 55 | + np.testing.assert_array_equal(out.data, data) |
| 56 | + assert out.shape == (h, w) |
| 57 | + |
| 58 | + |
| 59 | +def test_uncompressed_tiled_round_trip_partial_edge_tiles(tmp_path): |
| 60 | + """Tile size that does not divide width/height exercises the |
| 61 | + zero-padded edge-tile branch inside the loop.""" |
| 62 | + rng = np.random.RandomState(20260513) |
| 63 | + h, w = 50, 70 # 32 does not divide either; edges pad |
| 64 | + data = rng.randint(0, 60000, size=(h, w)).astype(np.uint16) |
| 65 | + da = xr.DataArray(data, dims=['y', 'x']) |
| 66 | + |
| 67 | + p = str(tmp_path / f"tmp_1736_edge_{uuid.uuid4().hex[:8]}.tif") |
| 68 | + to_geotiff(da, p, tiled=True, tile_size=32, compression='none') |
| 69 | + |
| 70 | + out = open_geotiff(p) |
| 71 | + np.testing.assert_array_equal(out.data, data) |
| 72 | + |
| 73 | + |
| 74 | +def test_uncompressed_tiled_round_trip_multiband(tmp_path): |
| 75 | + rng = np.random.RandomState(20260514) |
| 76 | + h, w, b = 48, 80, 3 |
| 77 | + data = rng.randint(0, 200, size=(h, w, b)).astype(np.uint8) |
| 78 | + da = xr.DataArray(data, dims=['y', 'x', 'band']) |
| 79 | + |
| 80 | + p = str(tmp_path / f"tmp_1736_multi_{uuid.uuid4().hex[:8]}.tif") |
| 81 | + to_geotiff(da, p, tiled=True, tile_size=16, compression='none') |
| 82 | + |
| 83 | + out = open_geotiff(p) |
| 84 | + np.testing.assert_array_equal(out.data, data) |
| 85 | + |
| 86 | + |
| 87 | +def _peak_ratio_for_write_tiled(data: np.ndarray, tile_size: int) -> float: |
| 88 | + """Return ``tracemalloc`` peak / ``data.nbytes`` for one |
| 89 | + ``_write_tiled`` call against the uncompressed branch. |
| 90 | +
|
| 91 | + Allocations made before this call are excluded from peak by the |
| 92 | + ``reset_peak`` step, so the ratio reflects what ``_write_tiled`` |
| 93 | + itself adds. |
| 94 | + """ |
| 95 | + tracemalloc.start() |
| 96 | + try: |
| 97 | + tracemalloc.reset_peak() |
| 98 | + _write_tiled(data, COMPRESSION_NONE, 1, tile_size=tile_size) |
| 99 | + _current, peak = tracemalloc.get_traced_memory() |
| 100 | + finally: |
| 101 | + tracemalloc.stop() |
| 102 | + return peak / data.nbytes |
| 103 | + |
| 104 | + |
| 105 | +def test_uncompressed_tiled_peak_memory_single_band(): |
| 106 | + """Peak memory for the uncompressed branch should stay below |
| 107 | + ``_PEAK_RATIO_LIMIT * raster_bytes``. Reintroducing the dead |
| 108 | + ``bytearray(n_tiles * tile_bytes)`` would push the ratio to ~2x |
| 109 | + and fail this check.""" |
| 110 | + h, w = 1024, 1024 # 1 MB raw, exact tile divisor -> no edge padding |
| 111 | + data = np.random.RandomState(20260512).randint( |
| 112 | + 0, 255, size=(h, w), dtype=np.uint8, |
| 113 | + ) |
| 114 | + ratio = _peak_ratio_for_write_tiled(data, tile_size=256) |
| 115 | + assert ratio < _PEAK_RATIO_LIMIT, ( |
| 116 | + f"_write_tiled peak memory {ratio:.2f}x raster exceeds the " |
| 117 | + f"{_PEAK_RATIO_LIMIT}x cap; the dead bytearray from #1736 may " |
| 118 | + f"have been reintroduced." |
| 119 | + ) |
| 120 | + |
| 121 | + |
| 122 | +def test_uncompressed_tiled_peak_memory_multiband(): |
| 123 | + """3-band variant of the peak-memory check. ``samples == 3`` |
| 124 | + triples the would-be dead buffer, so this case is even more |
| 125 | + sensitive to a regression.""" |
| 126 | + h, w = 1024, 1024 |
| 127 | + data = np.random.RandomState(20260513).randint( |
| 128 | + 0, 255, size=(h, w, 3), dtype=np.uint8, |
| 129 | + ) |
| 130 | + ratio = _peak_ratio_for_write_tiled(data, tile_size=256) |
| 131 | + assert ratio < _PEAK_RATIO_LIMIT, ( |
| 132 | + f"_write_tiled peak memory {ratio:.2f}x raster exceeds the " |
| 133 | + f"{_PEAK_RATIO_LIMIT}x cap; the dead bytearray from #1736 may " |
| 134 | + f"have been reintroduced." |
| 135 | + ) |
0 commit comments