Skip to content

Commit 62d8709

Browse files
committed
Reject TIFFs whose declared tile grid exceeds TileOffsets length (#1219)
A TIFF can declare image dimensions that imply more tiles than its TileOffsets tag supplies. The GPU _assemble_tiles_kernel reads tile_out_offsets[tile_idx] where tile_idx is computed from the output pixel position, so threads whose pixel maps past the supplied count perform out-of-bounds device reads. The CPU _read_tiles loop silently skips those tiles with `if tile_idx >= len(offsets): continue` and returns a zero-padded raster. Add validate_tile_layout(ifd) in _header.py and call it from _read_tiles, _read_cog_http, and open_geotiff_gpu. Raises ValueError with the mismatch count before any decode work runs.
1 parent 82204f7 commit 62d8709

File tree

5 files changed

+226
-3
lines changed

5 files changed

+226
-3
lines changed

.claude/sweep-security-state.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
"last_inspected": "2026-04-17",
1111
"issue": 1215,
1212
"severity_max": "HIGH",
13-
"categories_found": [1, 4]
13+
"categories_found": [1, 4],
14+
"followup_issues": [1219]
1415
}
1516
}
1617
}

xrspatial/geotiff/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1068,7 +1068,7 @@ def read_geotiff_gpu(source: str, *,
10681068
"Install it with: pip install cupy-cuda12x")
10691069

10701070
from ._reader import _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT
1071-
from ._header import parse_header, parse_all_ifds
1071+
from ._header import parse_header, parse_all_ifds, validate_tile_layout
10721072
from ._dtypes import tiff_dtype_to_numpy
10731073
from ._geotags import extract_geo_info
10741074
from ._gpu_decode import gpu_decode_tiles
@@ -1135,6 +1135,11 @@ def read_geotiff_gpu(source: str, *,
11351135
# A single tile's decoded bytes must also fit under the pixel budget.
11361136
_check_dimensions(tw, th, samples, max_pixels)
11371137

1138+
# Reject malformed TIFFs whose declared tile grid exceeds the
1139+
# supplied TileOffsets length. The GPU tile-assembly kernel would
1140+
# read OOB otherwise. See issue #1219.
1141+
validate_tile_layout(ifd)
1142+
11381143
finally:
11391144
src.close()
11401145

xrspatial/geotiff/_header.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""TIFF/BigTIFF header and IFD parsing."""
22
from __future__ import annotations
33

4+
import math
45
import struct
56
from dataclasses import dataclass, field
67
from typing import Any
@@ -208,6 +209,68 @@ def nodata_str(self) -> str | None:
208209
return str(v).rstrip('\x00')
209210

210211

212+
def validate_tile_layout(ifd: IFD) -> None:
213+
"""Validate that a tiled IFD's TileOffsets covers the declared tile grid.
214+
215+
A well-formed tiled TIFF must supply at least `tiles_across * tiles_down`
216+
TileOffsets entries (times samples_per_pixel for planar config 2). An
217+
adversarial or malformed file can declare larger image dimensions than
218+
its offsets array covers, which causes out-of-bounds reads in
219+
downstream decoders (notably the GPU tile-assembly kernel).
220+
221+
Parameters
222+
----------
223+
ifd : IFD
224+
Parsed IFD. Must be tiled.
225+
226+
Raises
227+
------
228+
ValueError
229+
If TileOffsets or TileByteCounts is missing, if tile width/height
230+
is zero, or if the declared grid exceeds the offsets array length.
231+
"""
232+
if not ifd.is_tiled:
233+
return
234+
235+
offsets = ifd.tile_offsets
236+
byte_counts = ifd.tile_byte_counts
237+
if offsets is None or byte_counts is None:
238+
raise ValueError("Tiled TIFF is missing TileOffsets or TileByteCounts")
239+
240+
tw = ifd.tile_width
241+
th = ifd.tile_height
242+
if tw <= 0 or th <= 0:
243+
raise ValueError(
244+
f"Invalid tile dimensions: tile_width={tw}, tile_height={th}")
245+
246+
width = ifd.width
247+
height = ifd.height
248+
if width <= 0 or height <= 0:
249+
raise ValueError(
250+
f"Invalid image dimensions: width={width}, height={height}")
251+
252+
tiles_across = math.ceil(width / tw)
253+
tiles_down = math.ceil(height / th)
254+
planar = ifd.planar_config
255+
samples = ifd.samples_per_pixel
256+
bands = samples if (planar == 2 and samples > 1) else 1
257+
expected = tiles_across * tiles_down * bands
258+
259+
if len(offsets) < expected:
260+
raise ValueError(
261+
f"Malformed TIFF: declared tile grid requires {expected} tile "
262+
f"offsets ({tiles_across} x {tiles_down}"
263+
f"{f' x {bands} bands' if bands > 1 else ''}), "
264+
f"but TileOffsets has only {len(offsets)} entries"
265+
)
266+
if len(byte_counts) < expected:
267+
raise ValueError(
268+
f"Malformed TIFF: declared tile grid requires {expected} tile "
269+
f"byte counts, but TileByteCounts has only {len(byte_counts)} "
270+
f"entries"
271+
)
272+
273+
211274
def parse_header(data: bytes | memoryview) -> TIFFHeader:
212275
"""Parse a TIFF/BigTIFF file header.
213276

xrspatial/geotiff/_reader.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
from ._dtypes import SUB_BYTE_BPS, tiff_dtype_to_numpy
1919
from ._geotags import GeoInfo, GeoTransform, extract_geo_info
20-
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header
20+
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header, validate_tile_layout
2121

2222
# ---------------------------------------------------------------------------
2323
# Allocation guard: reject TIFF dimensions that would exhaust memory
@@ -501,6 +501,11 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
501501

502502
_check_dimensions(out_w, out_h, samples, max_pixels)
503503

504+
# Reject malformed TIFFs whose declared tile grid exceeds the number of
505+
# supplied TileOffsets entries. Silent skipping in the CPU loop below
506+
# would mask the problem, and the GPU path reads OOB. See issue #1219.
507+
validate_tile_layout(ifd)
508+
504509
_alloc = np.zeros if window is not None else np.empty
505510
if samples > 1:
506511
result = _alloc((out_h, out_w, samples), dtype=dtype)
@@ -664,6 +669,10 @@ def _read_cog_http(url: str, overview_level: int | None = None,
664669
# A single tile's decoded bytes must also fit under the pixel budget.
665670
_check_dimensions(tw, th, samples, max_pixels)
666671

672+
# Reject malformed TIFFs whose declared tile grid exceeds the supplied
673+
# TileOffsets length. See issue #1219.
674+
validate_tile_layout(ifd)
675+
667676
if samples > 1:
668677
result = np.empty((height, width, samples), dtype=dtype)
669678
else:

xrspatial/geotiff/tests/test_security.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,3 +372,148 @@ def test_absolute_path_also_canonicalized(self, tmp_path):
372372

373373
assert ".." not in source_path
374374
assert source_path == os.path.realpath("/tmp/../tmp/test.tif")
375+
376+
377+
# ---------------------------------------------------------------------------
378+
# Tile layout validation (issue #1219)
379+
#
380+
# An adversarial TIFF can declare image dimensions that imply more tiles
381+
# than its TileOffsets tag supplies. The CPU path silently skipped the
382+
# missing tiles (zero-padded output) and the GPU tile-assembly kernel
383+
# read past the end of the decompression-offsets array on device.
384+
# ---------------------------------------------------------------------------
385+
386+
def _make_short_offsets_tiff(
387+
width: int,
388+
height: int,
389+
tile_size: int,
390+
declared_offset_count: int,
391+
dtype: np.dtype = np.dtype('float32'),
392+
) -> bytes:
393+
"""Build a tiled TIFF whose TileOffsets tag count is less than what
394+
the image/tile dimensions imply.
395+
396+
The produced bytes are a valid TIFF that rioxarray/GDAL might still
397+
accept with a warning, but our reader should reject them.
398+
"""
399+
import math
400+
401+
# Start from a normal tiled TIFF, then rewrite the TileOffsets IFD
402+
# entry to advertise a smaller count.
403+
data = bytearray(make_minimal_tiff(
404+
width, height, dtype, tiled=True, tile_size=tile_size))
405+
406+
# Parse to locate the TileOffsets entry.
407+
header = parse_header(bytes(data))
408+
ifds = parse_all_ifds(bytes(data), header)
409+
ifd = ifds[0]
410+
411+
offsets = ifd.tile_offsets
412+
assert offsets is not None
413+
tiles_across = math.ceil(width / tile_size)
414+
tiles_down = math.ceil(height / tile_size)
415+
true_count = tiles_across * tiles_down
416+
assert len(offsets) == true_count
417+
assert declared_offset_count < true_count
418+
419+
# Find the IFD entry bytes for tag 324 (TileOffsets) and rewrite its
420+
# count field. TIFF (non-BigTIFF) entries are 12 bytes: HHIi
421+
# (tag, type, count, value_or_ptr).
422+
bo = header.byte_order
423+
ifd_offset = header.first_ifd_offset
424+
num_entries = struct.unpack_from(f'{bo}H', data, ifd_offset)[0]
425+
entry_offset = ifd_offset + 2
426+
for i in range(num_entries):
427+
eo = entry_offset + i * 12
428+
tag = struct.unpack_from(f'{bo}H', data, eo)[0]
429+
if tag == 324: # TileOffsets
430+
# Overwrite the count field at eo+4 (4 bytes, unsigned int).
431+
struct.pack_into(
432+
f'{bo}I', data, eo + 4, declared_offset_count)
433+
break
434+
else:
435+
raise AssertionError("TileOffsets tag not found in IFD")
436+
437+
return bytes(data)
438+
439+
440+
class TestTileLayoutValidation:
441+
"""Regression tests for issue #1219."""
442+
443+
def test_validate_tile_layout_rejects_short_offsets(self):
444+
"""validate_tile_layout raises when offsets count < declared grid."""
445+
from xrspatial.geotiff._header import validate_tile_layout
446+
447+
# 16x16 image with 4x4 tiles = 16 tiles, but only 4 offsets declared.
448+
data = _make_short_offsets_tiff(
449+
width=16, height=16, tile_size=4, declared_offset_count=4)
450+
header = parse_header(data)
451+
ifds = parse_all_ifds(data, header)
452+
ifd = ifds[0]
453+
454+
with pytest.raises(ValueError, match="Malformed TIFF.*tile offsets"):
455+
validate_tile_layout(ifd)
456+
457+
def test_validate_tile_layout_accepts_well_formed(self):
458+
"""validate_tile_layout accepts a normal tiled TIFF."""
459+
from xrspatial.geotiff._header import validate_tile_layout
460+
461+
data = make_minimal_tiff(
462+
8, 8, np.dtype('float32'), tiled=True, tile_size=4)
463+
header = parse_header(data)
464+
ifds = parse_all_ifds(data, header)
465+
ifd = ifds[0]
466+
467+
# Should not raise.
468+
validate_tile_layout(ifd)
469+
470+
def test_validate_tile_layout_ignores_stripped(self):
471+
"""validate_tile_layout is a no-op for stripped TIFFs."""
472+
from xrspatial.geotiff._header import validate_tile_layout
473+
474+
data = make_minimal_tiff(4, 4, np.dtype('float32'))
475+
header = parse_header(data)
476+
ifds = parse_all_ifds(data, header)
477+
ifd = ifds[0]
478+
479+
# Should not raise -- stripped file, not tiled.
480+
validate_tile_layout(ifd)
481+
482+
def test_read_tiles_rejects_short_offsets(self):
483+
"""_read_tiles surfaces the malformed-TIFF error instead of
484+
silently zero-padding missing tiles."""
485+
data = _make_short_offsets_tiff(
486+
width=16, height=16, tile_size=4, declared_offset_count=4)
487+
header = parse_header(data)
488+
ifds = parse_all_ifds(data, header)
489+
ifd = ifds[0]
490+
dtype = tiff_dtype_to_numpy(ifd.bits_per_sample, ifd.sample_format)
491+
492+
with pytest.raises(ValueError, match="Malformed TIFF"):
493+
_read_tiles(data, ifd, header, dtype)
494+
495+
def test_read_to_array_rejects_short_offsets(self, tmp_path):
496+
"""End-to-end: reading a short-offsets TIFF raises a clear
497+
ValueError (not a silent zero output or CUDA crash)."""
498+
data = _make_short_offsets_tiff(
499+
width=16, height=16, tile_size=4, declared_offset_count=4)
500+
path = str(tmp_path / "malformed_1219.tif")
501+
with open(path, 'wb') as f:
502+
f.write(data)
503+
504+
with pytest.raises(ValueError, match="Malformed TIFF"):
505+
read_to_array(path)
506+
507+
def test_boundary_exact_count_ok(self, tmp_path):
508+
"""A TIFF with exactly the required number of offsets reads fine."""
509+
# 8x8 image, 4x4 tiles => 4 tiles exactly.
510+
expected = np.arange(64, dtype=np.float32).reshape(8, 8)
511+
data = make_minimal_tiff(
512+
8, 8, np.dtype('float32'),
513+
pixel_data=expected, tiled=True, tile_size=4)
514+
path = str(tmp_path / "exact_1219.tif")
515+
with open(path, 'wb') as f:
516+
f.write(data)
517+
518+
arr, _ = read_to_array(path)
519+
np.testing.assert_array_equal(arr, expected)

0 commit comments

Comments
 (0)