|
| 1 | +"""Local-file tile/strip byte-count cap (issue #1664). |
| 2 | +
|
| 3 | +Before #1664, ``XRSPATIAL_COG_MAX_TILE_BYTES`` only fired in the HTTP |
| 4 | +fetch path. A crafted local TIFF with a huge ``TileByteCounts`` / |
| 5 | +``StripByteCounts`` could still feed an enormous slice into the |
| 6 | +decompressor, which can balloon into gigabytes of decoded output even |
| 7 | +when the underlying mmap slice is bounded by the file size. |
| 8 | +
|
| 9 | +These tests fabricate small COGs / strip-TIFFs, rewrite their byte |
| 10 | +counts to oversized values, and check that the cap raises before the |
| 11 | +decoder runs. |
| 12 | +""" |
| 13 | +from __future__ import annotations |
| 14 | + |
| 15 | +import struct |
| 16 | + |
| 17 | +import numpy as np |
| 18 | +import pytest |
| 19 | +import xarray as xr |
| 20 | + |
| 21 | +from xrspatial.geotiff import open_geotiff, to_geotiff |
| 22 | +from xrspatial.geotiff import _reader as _reader_mod |
| 23 | + |
| 24 | + |
| 25 | +# --------------------------------------------------------------------------- |
| 26 | +# Helpers -- patch in-place IFD entries for tile / strip byte counts |
| 27 | +# --------------------------------------------------------------------------- |
| 28 | + |
| 29 | + |
| 30 | +def _patch_byte_counts(data: bytearray, tag: int, value: int) -> None: |
| 31 | + """Rewrite every entry for *tag* (325=TileByteCounts, 279=StripByteCounts).""" |
| 32 | + from xrspatial.geotiff._header import parse_header |
| 33 | + header = parse_header(bytes(data)) |
| 34 | + bo = header.byte_order |
| 35 | + ifd_offset = header.first_ifd_offset |
| 36 | + num_entries = struct.unpack_from(f'{bo}H', data, ifd_offset)[0] |
| 37 | + entry_offset = ifd_offset + 2 |
| 38 | + |
| 39 | + for i in range(num_entries): |
| 40 | + eo = entry_offset + i * 12 |
| 41 | + cur_tag = struct.unpack_from(f'{bo}H', data, eo)[0] |
| 42 | + if cur_tag != tag: |
| 43 | + continue |
| 44 | + type_id = struct.unpack_from(f'{bo}H', data, eo + 2)[0] |
| 45 | + count = struct.unpack_from(f'{bo}I', data, eo + 4)[0] |
| 46 | + if type_id == 4: # LONG |
| 47 | + total = count * 4 |
| 48 | + if total <= 4: |
| 49 | + for k in range(count): |
| 50 | + struct.pack_into(f'{bo}I', data, eo + 8 + k * 4, value) |
| 51 | + else: |
| 52 | + ptr = struct.unpack_from(f'{bo}I', data, eo + 8)[0] |
| 53 | + for k in range(count): |
| 54 | + struct.pack_into(f'{bo}I', data, ptr + k * 4, value) |
| 55 | + elif type_id == 3: # SHORT |
| 56 | + clipped = min(value, 0xFFFF) |
| 57 | + total = count * 2 |
| 58 | + if total <= 4: |
| 59 | + for k in range(count): |
| 60 | + struct.pack_into( |
| 61 | + f'{bo}H', data, eo + 8 + k * 2, clipped) |
| 62 | + else: |
| 63 | + ptr = struct.unpack_from(f'{bo}I', data, eo + 8)[0] |
| 64 | + for k in range(count): |
| 65 | + struct.pack_into( |
| 66 | + f'{bo}H', data, ptr + k * 2, clipped) |
| 67 | + return |
| 68 | + raise AssertionError(f"tag {tag} not found in IFD") |
| 69 | + |
| 70 | + |
| 71 | +def _build_forged_tiled_cog(tmp_path, byte_count_value: int) -> str: |
| 72 | + """Write a real tiled COG, patch every TileByteCounts entry, return path.""" |
| 73 | + arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) |
| 74 | + da = xr.DataArray(arr, dims=['y', 'x']) |
| 75 | + path = str(tmp_path / "forged_local_tiles_1664.tif") |
| 76 | + to_geotiff(da, path, tile_size=32, compression='deflate') |
| 77 | + with open(path, 'rb') as f: |
| 78 | + data = bytearray(f.read()) |
| 79 | + _patch_byte_counts(data, 325, byte_count_value) # 325 = TileByteCounts |
| 80 | + with open(path, 'wb') as f: |
| 81 | + f.write(data) |
| 82 | + return path |
| 83 | + |
| 84 | + |
| 85 | +def _build_forged_stripped_tif(tmp_path, byte_count_value: int) -> str: |
| 86 | + """Write a strip-organized TIFF, patch every StripByteCounts entry.""" |
| 87 | + arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) |
| 88 | + da = xr.DataArray(arr, dims=['y', 'x']) |
| 89 | + path = str(tmp_path / "forged_local_strips_1664.tif") |
| 90 | + # tiled=False forces strip layout; deflate gets the decompressor on |
| 91 | + # the hot path so a huge declared size matters. |
| 92 | + to_geotiff(da, path, tiled=False, compression='deflate') |
| 93 | + with open(path, 'rb') as f: |
| 94 | + data = bytearray(f.read()) |
| 95 | + _patch_byte_counts(data, 279, byte_count_value) # 279 = StripByteCounts |
| 96 | + with open(path, 'wb') as f: |
| 97 | + f.write(data) |
| 98 | + return path |
| 99 | + |
| 100 | + |
| 101 | +# --------------------------------------------------------------------------- |
| 102 | +# Tiled local reads |
| 103 | +# --------------------------------------------------------------------------- |
| 104 | + |
| 105 | + |
| 106 | +class TestLocalTileByteCap: |
| 107 | + def test_huge_tile_byte_count_rejected(self, tmp_path, monkeypatch): |
| 108 | + """A local tile with a huge TileByteCount raises before decode.""" |
| 109 | + # 100 MB > the 1 MB cap we set below. |
| 110 | + path = _build_forged_tiled_cog(tmp_path, 100 * 1024 * 1024) |
| 111 | + monkeypatch.setenv('XRSPATIAL_COG_MAX_TILE_BYTES', str(1024 * 1024)) |
| 112 | + |
| 113 | + with pytest.raises(ValueError, match="TileByteCount"): |
| 114 | + open_geotiff(path) |
| 115 | + |
| 116 | + def test_error_message_names_value_and_cap(self, tmp_path, monkeypatch): |
| 117 | + path = _build_forged_tiled_cog(tmp_path, 50 * 1024 * 1024) |
| 118 | + monkeypatch.setenv('XRSPATIAL_COG_MAX_TILE_BYTES', str(1024)) |
| 119 | + |
| 120 | + with pytest.raises(ValueError) as excinfo: |
| 121 | + open_geotiff(path) |
| 122 | + msg = str(excinfo.value) |
| 123 | + # The forged value (52,428,800) and the cap (1,024) both appear. |
| 124 | + assert "52,428,800" in msg or "52428800" in msg |
| 125 | + assert "1,024" in msg or "1024" in msg |
| 126 | + assert "denial-of-service" in msg.lower() or "malformed" in msg |
| 127 | + |
| 128 | + def test_normal_local_cog_under_default_cap(self, tmp_path): |
| 129 | + """Legitimate local reads with the default cap still succeed.""" |
| 130 | + arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64) |
| 131 | + da = xr.DataArray(arr, dims=['y', 'x']) |
| 132 | + path = str(tmp_path / "normal_local_1664.tif") |
| 133 | + to_geotiff(da, path, tile_size=32, compression='deflate') |
| 134 | + |
| 135 | + result = open_geotiff(path) |
| 136 | + np.testing.assert_array_equal(result.values, arr) |
| 137 | + |
| 138 | + def test_env_override_lifts_cap(self, tmp_path, monkeypatch): |
| 139 | + """A user with legitimate large tiles can lift the cap via env.""" |
| 140 | + # 50 MB declared. With cap=64 MB the read succeeds even though |
| 141 | + # the underlying compressed slice is smaller (mmap truncates at |
| 142 | + # EOF). |
| 143 | + path = _build_forged_tiled_cog(tmp_path, 50 * 1024 * 1024) |
| 144 | + monkeypatch.setenv( |
| 145 | + 'XRSPATIAL_COG_MAX_TILE_BYTES', str(64 * 1024 * 1024)) |
| 146 | + |
| 147 | + # Read may raise inside the decompressor (the truncated mmap |
| 148 | + # slice is garbage to deflate) but it must NOT raise the cap |
| 149 | + # error. The thing we care about is that the cap check passes. |
| 150 | + try: |
| 151 | + open_geotiff(path) |
| 152 | + except ValueError as e: |
| 153 | + assert "exceeds the per-tile safety cap" not in str(e) |
| 154 | + |
| 155 | + |
| 156 | +# --------------------------------------------------------------------------- |
| 157 | +# Strip-organized local reads |
| 158 | +# --------------------------------------------------------------------------- |
| 159 | + |
| 160 | + |
| 161 | +class TestLocalStripByteCap: |
| 162 | + def test_huge_strip_byte_count_rejected(self, tmp_path, monkeypatch): |
| 163 | + path = _build_forged_stripped_tif(tmp_path, 100 * 1024 * 1024) |
| 164 | + monkeypatch.setenv('XRSPATIAL_COG_MAX_TILE_BYTES', str(1024 * 1024)) |
| 165 | + |
| 166 | + with pytest.raises(ValueError, match="StripByteCount"): |
| 167 | + open_geotiff(path) |
| 168 | + |
| 169 | + def test_strip_error_message_mentions_strip(self, tmp_path, monkeypatch): |
| 170 | + path = _build_forged_stripped_tif(tmp_path, 50 * 1024 * 1024) |
| 171 | + monkeypatch.setenv('XRSPATIAL_COG_MAX_TILE_BYTES', str(2048)) |
| 172 | + |
| 173 | + with pytest.raises(ValueError) as excinfo: |
| 174 | + open_geotiff(path) |
| 175 | + msg = str(excinfo.value) |
| 176 | + assert "strip" in msg.lower() |
| 177 | + assert "safety cap" in msg.lower() |
| 178 | + |
| 179 | + |
| 180 | +# --------------------------------------------------------------------------- |
| 181 | +# Cap helper directly |
| 182 | +# --------------------------------------------------------------------------- |
| 183 | + |
| 184 | + |
| 185 | +def test_max_tile_bytes_env_negative_falls_back(monkeypatch): |
| 186 | + """Negative env value parses to the minimum of 1 (not the default).""" |
| 187 | + monkeypatch.setenv('XRSPATIAL_COG_MAX_TILE_BYTES', '-5') |
| 188 | + # _max_tile_bytes_from_env clamps to max(1, val); a negative becomes 1. |
| 189 | + assert _reader_mod._max_tile_bytes_from_env() == 1 |
| 190 | + |
| 191 | + |
| 192 | +def test_max_tile_bytes_env_garbage_falls_back(monkeypatch): |
| 193 | + monkeypatch.setenv('XRSPATIAL_COG_MAX_TILE_BYTES', 'not-a-number') |
| 194 | + assert ( |
| 195 | + _reader_mod._max_tile_bytes_from_env() |
| 196 | + == _reader_mod.MAX_TILE_BYTES_DEFAULT |
| 197 | + ) |
0 commit comments