Skip to content

Commit 3ce1681

Browse files
committed
geotiff: drop dead bytearray allocation in uncompressed tiled writer (#1736)
The uncompressed branch of _compress_tiles allocated total_buf = bytearray(n_tiles * tile_bytes) mv = memoryview(total_buf) at the top of the loop and never read either back. Tiles were still built via tile_arr.tobytes() and appended to a list. The buffer was roughly the size of the full uncompressed raster, so a marginal write would OOM while holding both the dead buffer and the tile list. Remove the dead allocation. Existing tile-build logic stays the same; the round-trip is byte-for-byte unchanged. Adds regression coverage in test_writer_uncompressed_tiled_no_dead_alloc_1736.py for the exact divisor, edge-tile padding, and multi-band paths.
1 parent 77e6e4e commit 3ce1681

2 files changed

Lines changed: 74 additions & 5 deletions

File tree

xrspatial/geotiff/_writer.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -652,11 +652,14 @@ def _write_tiled(data: np.ndarray, compression: int, predictor: int,
652652
n_tiles = tiles_across * tiles_down
653653

654654
if compression == COMPRESSION_NONE:
655-
# Uncompressed: pre-allocate a contiguous buffer for all tiles
656-
# and copy tile data directly, avoiding per-tile Python overhead.
657-
tile_bytes = tw * th * bytes_per_sample * samples
658-
total_buf = bytearray(n_tiles * tile_bytes)
659-
mv = memoryview(total_buf)
655+
# Uncompressed: build tiles one at a time. An earlier version
656+
# pre-allocated a contiguous ``bytearray(n_tiles * tile_bytes)``
657+
# buffer here on the theory that we'd copy each tile into it
658+
# directly, but the loop below ended up calling ``tobytes()``
659+
# per tile anyway and never read the buffer. That left a dead
660+
# allocation roughly the size of the full uncompressed raster
661+
# alongside the actual tile list, doubling peak memory and
662+
# turning OOM-marginal writes into OOM-failing ones (#1736).
660663
tiles = []
661664
rel_offsets = []
662665
byte_counts = []
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Regression test for issue #1736.
2+
3+
The uncompressed tiled-write branch of ``_compress_tiles`` previously
4+
allocated a contiguous ``bytearray`` plus a memoryview ``(n_tiles *
5+
tw * th * bytes_per_sample * samples)`` bytes long at the top of the
6+
loop and never read either back. Tile bytes were still built via
7+
``tile_arr.tobytes()`` and appended to a list. The dead buffer roughly
8+
doubled peak memory for an uncompressed write.
9+
10+
The fix is a pure deletion. This test pins the round-trip so a future
11+
refactor that re-introduces a real contiguous buffer keeps the same
12+
external behaviour: writing an uncompressed tiled GeoTIFF must still
13+
read back identically with no holes between tiles.
14+
"""
15+
from __future__ import annotations
16+
17+
import os
18+
import uuid
19+
20+
import numpy as np
21+
import xarray as xr
22+
23+
from xrspatial.geotiff import to_geotiff, open_geotiff
24+
25+
26+
def test_uncompressed_tiled_round_trip_exact(tmp_path):
27+
rng = np.random.RandomState(20260512)
28+
h, w = 96, 144
29+
data = rng.randint(0, 200, size=(h, w)).astype(np.uint8)
30+
da = xr.DataArray(data, dims=['y', 'x'])
31+
32+
p = str(tmp_path / f"tmp_1736_uncomp_{uuid.uuid4().hex[:8]}.tif")
33+
to_geotiff(da, p, tiled=True, tile_size=32, compression='none')
34+
assert os.path.exists(p)
35+
36+
out = open_geotiff(p)
37+
np.testing.assert_array_equal(out.data, data)
38+
assert out.shape == (h, w)
39+
40+
41+
def test_uncompressed_tiled_round_trip_partial_edge_tiles(tmp_path):
42+
"""Tile size that does not divide width/height exercises the
43+
zero-padded edge-tile branch inside the loop."""
44+
rng = np.random.RandomState(20260513)
45+
h, w = 50, 70 # 32 does not divide either; edges pad
46+
data = rng.randint(0, 60000, size=(h, w)).astype(np.uint16)
47+
da = xr.DataArray(data, dims=['y', 'x'])
48+
49+
p = str(tmp_path / f"tmp_1736_edge_{uuid.uuid4().hex[:8]}.tif")
50+
to_geotiff(da, p, tiled=True, tile_size=32, compression='none')
51+
52+
out = open_geotiff(p)
53+
np.testing.assert_array_equal(out.data, data)
54+
55+
56+
def test_uncompressed_tiled_round_trip_multiband(tmp_path):
57+
rng = np.random.RandomState(20260514)
58+
h, w, b = 48, 80, 3
59+
data = rng.randint(0, 200, size=(h, w, b)).astype(np.uint8)
60+
da = xr.DataArray(data, dims=['y', 'x', 'band'])
61+
62+
p = str(tmp_path / f"tmp_1736_multi_{uuid.uuid4().hex[:8]}.tif")
63+
to_geotiff(da, p, tiled=True, tile_size=16, compression='none')
64+
65+
out = open_geotiff(p)
66+
np.testing.assert_array_equal(out.data, data)

0 commit comments

Comments
 (0)