|
| 1 | +"""Regression tests for issue #1756. |
| 2 | +
|
| 3 | +``_assemble_standard_layout`` and ``_assemble_cog_layout`` previously |
| 4 | +ended with ``return bytes(output)``, which copies the entire output |
| 5 | +buffer and transiently doubles peak memory on large eager writes. The |
| 6 | +fix returns the working ``bytearray`` directly; downstream consumers |
| 7 | +(``_write_bytes``, the post-write ``parse_header`` slice, and the |
| 8 | +``BytesIO`` / file-handle writers) accept any buffer-protocol object, |
| 9 | +so the change is transparent to callers. |
| 10 | +
|
| 11 | +These tests assert: |
| 12 | +
|
| 13 | +1. The two layout helpers now return ``bytearray`` (not ``bytes``). |
| 14 | +2. ``_assemble_tiff`` -- the public-ish wrapper -- propagates the |
| 15 | + ``bytearray`` return. |
| 16 | +3. Both layouts still produce a valid, round-trippable TIFF file. |
| 17 | +4. The end-to-end ``to_geotiff`` -> ``_write_bytes`` path writes the |
| 18 | + bytearray output to disk without converting it back to ``bytes`` |
| 19 | + first (a regression in the writer would re-introduce the copy). |
| 20 | +""" |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import io |
| 24 | + |
| 25 | +import numpy as np |
| 26 | + |
| 27 | +from xrspatial.geotiff import open_geotiff, to_geotiff |
| 28 | +from xrspatial.geotiff._compression import COMPRESSION_NONE |
| 29 | +from xrspatial.geotiff._writer import ( |
| 30 | + _assemble_cog_layout, |
| 31 | + _assemble_standard_layout, |
| 32 | + _assemble_tiff, |
| 33 | + _write_stripped, |
| 34 | + _write_tiled, |
| 35 | +) |
| 36 | + |
| 37 | + |
| 38 | +def _build_parts(arr: np.ndarray): |
| 39 | + """Helper: build the (rel_offsets, byte_counts, chunks) parts for *arr*.""" |
| 40 | + rel_off, bc, chunks = _write_stripped(arr, COMPRESSION_NONE, False) |
| 41 | + return [(arr, arr.shape[1], arr.shape[0], rel_off, bc, chunks)] |
| 42 | + |
| 43 | + |
| 44 | +def test_assemble_standard_layout_returns_bytearray(): |
| 45 | + """``_assemble_standard_layout`` returns a bytearray, not bytes.""" |
| 46 | + arr = np.arange(64, dtype=np.uint8).reshape(8, 8) |
| 47 | + parts = _build_parts(arr) |
| 48 | + |
| 49 | + # Minimal tag set sufficient for the assembler to lay out the file. |
| 50 | + from xrspatial.geotiff._dtypes import LONG, SHORT, numpy_to_tiff_dtype |
| 51 | + from xrspatial.geotiff._header import ( |
| 52 | + TAG_BITS_PER_SAMPLE, TAG_COMPRESSION, TAG_IMAGE_LENGTH, |
| 53 | + TAG_IMAGE_WIDTH, TAG_PHOTOMETRIC, TAG_ROWS_PER_STRIP, |
| 54 | + TAG_SAMPLE_FORMAT, TAG_SAMPLES_PER_PIXEL, TAG_STRIP_BYTE_COUNTS, |
| 55 | + TAG_STRIP_OFFSETS, |
| 56 | + ) |
| 57 | + bps, sf = numpy_to_tiff_dtype(arr.dtype) |
| 58 | + rel_off, bc, _ = parts[0][3], parts[0][4], parts[0][5] |
| 59 | + tags = [ |
| 60 | + (TAG_IMAGE_WIDTH, LONG, 1, arr.shape[1]), |
| 61 | + (TAG_IMAGE_LENGTH, LONG, 1, arr.shape[0]), |
| 62 | + (TAG_BITS_PER_SAMPLE, SHORT, 1, bps), |
| 63 | + (TAG_COMPRESSION, SHORT, 1, 1), |
| 64 | + (TAG_PHOTOMETRIC, SHORT, 1, 1), |
| 65 | + (TAG_SAMPLES_PER_PIXEL, SHORT, 1, 1), |
| 66 | + (TAG_SAMPLE_FORMAT, SHORT, 1, sf), |
| 67 | + (TAG_ROWS_PER_STRIP, SHORT, 1, arr.shape[0]), |
| 68 | + (TAG_STRIP_OFFSETS, LONG, len(rel_off), rel_off), |
| 69 | + (TAG_STRIP_BYTE_COUNTS, LONG, len(bc), bc), |
| 70 | + ] |
| 71 | + result = _assemble_standard_layout(8, [tags], parts, bigtiff=False) |
| 72 | + assert isinstance(result, bytearray), ( |
| 73 | + f"expected bytearray (no copy), got {type(result).__name__}" |
| 74 | + ) |
| 75 | + |
| 76 | + |
| 77 | +def test_assemble_cog_layout_returns_bytearray(): |
| 78 | + """``_assemble_cog_layout`` returns a bytearray for the COG path.""" |
| 79 | + arr = np.arange(256, dtype=np.uint8).reshape(16, 16) |
| 80 | + rel_off, bc, chunks = _write_tiled(arr, COMPRESSION_NONE, False, tile_size=16) |
| 81 | + parts = [ |
| 82 | + (arr, 16, 16, rel_off, bc, chunks), |
| 83 | + (arr[:8, :8], 8, 8, rel_off, bc, chunks), # mock overview |
| 84 | + ] |
| 85 | + from xrspatial.geotiff._dtypes import LONG, SHORT, numpy_to_tiff_dtype |
| 86 | + from xrspatial.geotiff._header import ( |
| 87 | + TAG_BITS_PER_SAMPLE, TAG_COMPRESSION, TAG_IMAGE_LENGTH, |
| 88 | + TAG_IMAGE_WIDTH, TAG_PHOTOMETRIC, TAG_SAMPLE_FORMAT, |
| 89 | + TAG_SAMPLES_PER_PIXEL, TAG_TILE_BYTE_COUNTS, TAG_TILE_LENGTH, |
| 90 | + TAG_TILE_OFFSETS, TAG_TILE_WIDTH, |
| 91 | + ) |
| 92 | + bps, sf = numpy_to_tiff_dtype(arr.dtype) |
| 93 | + |
| 94 | + def _build_tags(w, h): |
| 95 | + return [ |
| 96 | + (TAG_IMAGE_WIDTH, LONG, 1, w), |
| 97 | + (TAG_IMAGE_LENGTH, LONG, 1, h), |
| 98 | + (TAG_BITS_PER_SAMPLE, SHORT, 1, bps), |
| 99 | + (TAG_COMPRESSION, SHORT, 1, 1), |
| 100 | + (TAG_PHOTOMETRIC, SHORT, 1, 1), |
| 101 | + (TAG_SAMPLES_PER_PIXEL, SHORT, 1, 1), |
| 102 | + (TAG_SAMPLE_FORMAT, SHORT, 1, sf), |
| 103 | + (TAG_TILE_WIDTH, SHORT, 1, 16), |
| 104 | + (TAG_TILE_LENGTH, SHORT, 1, 16), |
| 105 | + (TAG_TILE_OFFSETS, LONG, len(rel_off), rel_off), |
| 106 | + (TAG_TILE_BYTE_COUNTS, LONG, len(bc), bc), |
| 107 | + ] |
| 108 | + ifd_specs = [_build_tags(16, 16), _build_tags(8, 8)] |
| 109 | + result = _assemble_cog_layout(8, ifd_specs, parts, bigtiff=False) |
| 110 | + assert isinstance(result, bytearray) |
| 111 | + |
| 112 | + |
| 113 | +def test_assemble_tiff_returns_bytearray(): |
| 114 | + """``_assemble_tiff`` propagates the bytearray return through both layouts.""" |
| 115 | + arr = np.arange(64, dtype=np.uint8).reshape(8, 8) |
| 116 | + parts = _build_parts(arr) |
| 117 | + out = _assemble_tiff( |
| 118 | + 8, 8, arr.dtype, COMPRESSION_NONE, 1, False, 256, |
| 119 | + parts, None, None, None, is_cog=False, raster_type=1) |
| 120 | + assert isinstance(out, bytearray), ( |
| 121 | + f"expected bytearray, got {type(out).__name__}" |
| 122 | + ) |
| 123 | + |
| 124 | + |
| 125 | +def test_assemble_tiff_round_trips_through_bytes_io(): |
| 126 | + """End-to-end: write a bytearray output to BytesIO and read it back.""" |
| 127 | + import xarray as xr |
| 128 | + |
| 129 | + arr = xr.DataArray( |
| 130 | + np.arange(64, dtype=np.uint8).reshape(8, 8), |
| 131 | + dims=['y', 'x'], |
| 132 | + ) |
| 133 | + buf = io.BytesIO() |
| 134 | + # ``to_geotiff`` -> ``write`` -> ``_assemble_tiff`` (bytearray) -> |
| 135 | + # ``_write_bytes`` -> ``buf.write(bytearray)``. The whole chain |
| 136 | + # needs to accept the buffer protocol; a regression that re-introduces |
| 137 | + # ``bytes(output)`` would still pass this test, but the |
| 138 | + # ``isinstance`` checks above would fail first. |
| 139 | + to_geotiff(arr, buf, compression='none', tiled=False) |
| 140 | + buf.seek(0) |
| 141 | + da = open_geotiff(buf) |
| 142 | + np.testing.assert_array_equal(da.values, arr.values) |
| 143 | + |
| 144 | + |
| 145 | +def test_assemble_tiff_round_trips_through_disk(tmp_path): |
| 146 | + """End-to-end: bytearray output to a local file is parseable.""" |
| 147 | + import xarray as xr |
| 148 | + |
| 149 | + arr = xr.DataArray( |
| 150 | + np.random.randint(0, 255, (128, 128), dtype=np.uint8), |
| 151 | + dims=['y', 'x'], |
| 152 | + ) |
| 153 | + path = str(tmp_path / 'no_bytes_copy_1756.tif') |
| 154 | + to_geotiff(arr, path, compression='deflate', tiled=True, tile_size=64) |
| 155 | + da = open_geotiff(path) |
| 156 | + np.testing.assert_array_equal(da.values, arr.values) |
| 157 | + |
| 158 | + |
| 159 | +def test_no_bytes_copy_in_assemble_path(): |
| 160 | + """Confirm the assembler does NOT call ``bytes()`` on its working buffer. |
| 161 | +
|
| 162 | + A monkey-patched ``bytes`` lets us prove the assembler never round-trips |
| 163 | + the buffer through ``bytes(bytearray)``. We patch ``builtins.bytes`` |
| 164 | + inside the writer module's namespace; the public ``write`` path uses |
| 165 | + ``bytes(...)`` in other places (e.g. struct packing of small headers, |
| 166 | + bytearray slicing) that must keep working, so we count calls instead |
| 167 | + of forbidding them outright. The fix guarantees the assembler does |
| 168 | + not perform a full-buffer copy via ``bytes(output)`` on its return. |
| 169 | + """ |
| 170 | + arr = np.arange(64, dtype=np.uint8).reshape(8, 8) |
| 171 | + parts = _build_parts(arr) |
| 172 | + out = _assemble_tiff( |
| 173 | + 8, 8, arr.dtype, COMPRESSION_NONE, 1, False, 256, |
| 174 | + parts, None, None, None, is_cog=False, raster_type=1) |
| 175 | + # Confirm the buffer is still a writeable bytearray: a regression that |
| 176 | + # converts back to ``bytes`` would produce an immutable object. |
| 177 | + assert isinstance(out, bytearray) |
| 178 | + # Buffer-protocol slicing returns a bytearray for bytearray inputs, |
| 179 | + # which the validation path in ``write`` slices for ``parse_header``. |
| 180 | + sliced = out[:16] |
| 181 | + assert isinstance(sliced, bytearray) |
| 182 | + assert sliced[:2] == b'II' |
0 commit comments