Skip to content

Commit ed1e40f

Browse files
committed
Handle big-endian pixel data correctly on read
Big-endian TIFFs (byte order marker 'MM') now byte-swap pixel data to native order after decompression. Previously, the reader did .view(dtype) with a native-order dtype, producing garbage values for multi-byte types (uint16, int32, float32, float64). Fix: _decode_strip_or_tile uses dtype.newbyteorder(file_byte_order) for the view, then .astype(native_dtype) if a swap is needed. Single-byte types (uint8) need no swap. The COG HTTP reader path has the same fix. Also fixed the test conftest: make_minimal_tiff(big_endian=True) now actually writes pixel bytes in big-endian order. 7 new tests: float32, uint16, int32, float64, uint8 (no swap), windowed read, and public API -- all with big-endian TIFFs.
1 parent a7df688 commit ed1e40f

File tree

3 files changed

+138
-8
lines changed

3 files changed

+138
-8
lines changed

xrspatial/geotiff/_reader.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,17 @@ def _packed_byte_count(pixel_count: int, bps: int) -> int:
194194

195195

196196
def _decode_strip_or_tile(data_slice, compression, width, height, samples,
197-
bps, bytes_per_sample, is_sub_byte, dtype, pred):
197+
bps, bytes_per_sample, is_sub_byte, dtype, pred,
198+
byte_order='<'):
198199
"""Decompress, apply predictor, unpack sub-byte, and reshape a strip/tile.
199200
201+
Parameters
202+
----------
203+
byte_order : str
204+
'<' for little-endian, '>' for big-endian. When the file byte
205+
order differs from the system's native order, pixel data is
206+
byte-swapped after decompression.
207+
200208
Returns an array shaped (height, width) or (height, width, samples).
201209
"""
202210
pixel_count = width * height * samples
@@ -217,13 +225,21 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
217225
if is_sub_byte:
218226
pixels = unpack_bits(chunk, bps, pixel_count)
219227
else:
220-
pixels = chunk.view(dtype)
228+
# Use the file's byte order for the view, then convert to native
229+
file_dtype = dtype.newbyteorder(byte_order)
230+
pixels = chunk.view(file_dtype)
231+
if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER):
232+
pixels = pixels.astype(dtype)
221233

222234
if samples > 1:
223235
return pixels.reshape(height, width, samples)
224236
return pixels.reshape(height, width)
225237

226238

239+
import sys as _sys
240+
_NATIVE_ORDER = '<' if _sys.byteorder == 'little' else '>'
241+
242+
227243
# ---------------------------------------------------------------------------
228244
# Strip reader
229245
# ---------------------------------------------------------------------------
@@ -305,7 +321,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
305321
strip_data = data[offsets[global_idx]:offsets[global_idx] + byte_counts[global_idx]]
306322
strip_pixels = _decode_strip_or_tile(
307323
strip_data, compression, width, strip_rows, 1,
308-
bps, bytes_per_sample, is_sub_byte, dtype, pred)
324+
bps, bytes_per_sample, is_sub_byte, dtype, pred,
325+
byte_order=header.byte_order)
309326

310327
src_r0 = max(r0 - strip_row, 0)
311328
src_r1 = min(r1 - strip_row, strip_rows)
@@ -326,7 +343,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
326343
strip_data = data[offsets[strip_idx]:offsets[strip_idx] + byte_counts[strip_idx]]
327344
strip_pixels = _decode_strip_or_tile(
328345
strip_data, compression, width, strip_rows, samples,
329-
bps, bytes_per_sample, is_sub_byte, dtype, pred)
346+
bps, bytes_per_sample, is_sub_byte, dtype, pred,
347+
byte_order=header.byte_order)
330348

331349
src_r0 = max(r0 - strip_row, 0)
332350
src_r1 = min(r1 - strip_row, strip_rows)
@@ -424,7 +442,8 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
424442
tile_data = data[offsets[tile_idx]:offsets[tile_idx] + byte_counts[tile_idx]]
425443
tile_pixels = _decode_strip_or_tile(
426444
tile_data, compression, tw, th, tile_samples,
427-
bps, bytes_per_sample, is_sub_byte, dtype, pred)
445+
bps, bytes_per_sample, is_sub_byte, dtype, pred,
446+
byte_order=header.byte_order)
428447

429448
tile_r0 = tr * th
430449
tile_c0 = tc * tw
@@ -552,10 +571,13 @@ def _read_cog_http(url: str, overview_level: int | None = None,
552571
chunk = chunk.copy()
553572
chunk = _apply_predictor(chunk, pred, tw, th, bytes_per_sample * samples)
554573

574+
file_dtype = dtype.newbyteorder(header.byte_order)
555575
if samples > 1:
556-
tile_pixels = chunk.view(dtype).reshape(th, tw, samples)
576+
tile_pixels = chunk.view(file_dtype).reshape(th, tw, samples)
557577
else:
558-
tile_pixels = chunk.view(dtype).reshape(th, tw)
578+
tile_pixels = chunk.view(file_dtype).reshape(th, tw)
579+
if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER):
580+
tile_pixels = tile_pixels.astype(dtype)
559581

560582
# Place tile
561583
y0 = tr * th

xrspatial/geotiff/tests/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ def make_minimal_tiff(
6363
pixel_bytes = b''.join(tile_blobs)
6464
tile_byte_counts = [len(b) for b in tile_blobs]
6565
else:
66-
pixel_bytes = pixel_data.tobytes()
66+
if big_endian and pixel_data.dtype.itemsize > 1:
67+
pixel_bytes = pixel_data.astype(pixel_data.dtype.newbyteorder('>')).tobytes()
68+
else:
69+
pixel_bytes = pixel_data.tobytes()
6770

6871
# --- Collect tags as (tag_id, type_id, value_bytes) ---
6972
# value_bytes is the serialized value; if len <= 4 it's inline, else overflow.

xrspatial/geotiff/tests/test_features.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,111 @@ def test_no_crs_no_wkt(self, tmp_path):
421421
# Arbitrary tag preservation
422422
# -----------------------------------------------------------------------
423423

424+
# -----------------------------------------------------------------------
425+
# Big-endian pixel data
426+
# -----------------------------------------------------------------------
427+
428+
class TestBigEndian:
429+
430+
def test_float32_big_endian(self, tmp_path):
431+
"""Read a big-endian float32 TIFF."""
432+
from .conftest import make_minimal_tiff
433+
expected = np.arange(16, dtype=np.float32).reshape(4, 4)
434+
tiff_data = make_minimal_tiff(4, 4, np.dtype('float32'),
435+
pixel_data=expected, big_endian=True)
436+
path = str(tmp_path / 'be_f32.tif')
437+
with open(path, 'wb') as f:
438+
f.write(tiff_data)
439+
440+
result, _ = read_to_array(path)
441+
assert result.dtype == np.float32
442+
np.testing.assert_array_equal(result, expected)
443+
444+
def test_uint16_big_endian(self, tmp_path):
445+
"""Read a big-endian uint16 TIFF."""
446+
from .conftest import make_minimal_tiff
447+
expected = np.arange(20, dtype=np.uint16).reshape(4, 5) * 1000
448+
tiff_data = make_minimal_tiff(5, 4, np.dtype('uint16'),
449+
pixel_data=expected, big_endian=True)
450+
path = str(tmp_path / 'be_u16.tif')
451+
with open(path, 'wb') as f:
452+
f.write(tiff_data)
453+
454+
result, _ = read_to_array(path)
455+
assert result.dtype == np.uint16
456+
np.testing.assert_array_equal(result, expected)
457+
458+
def test_int32_big_endian(self, tmp_path):
459+
"""Read a big-endian int32 TIFF."""
460+
from .conftest import make_minimal_tiff
461+
expected = np.arange(16, dtype=np.int32).reshape(4, 4) - 8
462+
tiff_data = make_minimal_tiff(4, 4, np.dtype('int32'),
463+
pixel_data=expected, big_endian=True)
464+
path = str(tmp_path / 'be_i32.tif')
465+
with open(path, 'wb') as f:
466+
f.write(tiff_data)
467+
468+
result, _ = read_to_array(path)
469+
assert result.dtype == np.int32
470+
np.testing.assert_array_equal(result, expected)
471+
472+
def test_float64_big_endian(self, tmp_path):
473+
"""Read a big-endian float64 TIFF."""
474+
from .conftest import make_minimal_tiff
475+
expected = np.linspace(-1.0, 1.0, 16, dtype=np.float64).reshape(4, 4)
476+
tiff_data = make_minimal_tiff(4, 4, np.dtype('float64'),
477+
pixel_data=expected, big_endian=True)
478+
path = str(tmp_path / 'be_f64.tif')
479+
with open(path, 'wb') as f:
480+
f.write(tiff_data)
481+
482+
result, _ = read_to_array(path)
483+
assert result.dtype == np.float64
484+
np.testing.assert_array_almost_equal(result, expected)
485+
486+
def test_uint8_big_endian_no_swap_needed(self, tmp_path):
487+
"""uint8 big-endian needs no byte swap (single byte per sample)."""
488+
from .conftest import make_minimal_tiff
489+
expected = np.arange(16, dtype=np.uint8).reshape(4, 4)
490+
tiff_data = make_minimal_tiff(4, 4, np.dtype('uint8'),
491+
pixel_data=expected, big_endian=True)
492+
path = str(tmp_path / 'be_u8.tif')
493+
with open(path, 'wb') as f:
494+
f.write(tiff_data)
495+
496+
result, _ = read_to_array(path)
497+
np.testing.assert_array_equal(result, expected)
498+
499+
def test_big_endian_windowed(self, tmp_path):
500+
"""Windowed read of a big-endian TIFF."""
501+
from .conftest import make_minimal_tiff
502+
expected = np.arange(64, dtype=np.float32).reshape(8, 8)
503+
tiff_data = make_minimal_tiff(8, 8, np.dtype('float32'),
504+
pixel_data=expected, big_endian=True)
505+
path = str(tmp_path / 'be_window.tif')
506+
with open(path, 'wb') as f:
507+
f.write(tiff_data)
508+
509+
result, _ = read_to_array(path, window=(2, 3, 6, 7))
510+
np.testing.assert_array_equal(result, expected[2:6, 3:7])
511+
512+
def test_big_endian_via_public_api(self, tmp_path):
513+
"""read_geotiff handles big-endian files."""
514+
from .conftest import make_minimal_tiff
515+
expected = np.arange(16, dtype=np.float32).reshape(4, 4)
516+
tiff_data = make_minimal_tiff(
517+
4, 4, np.dtype('float32'), pixel_data=expected,
518+
big_endian=True,
519+
geo_transform=(-120.0, 45.0, 0.001, -0.001), epsg=4326)
520+
path = str(tmp_path / 'be_api.tif')
521+
with open(path, 'wb') as f:
522+
f.write(tiff_data)
523+
524+
da = read_geotiff(path)
525+
assert da.attrs['crs'] == 4326
526+
np.testing.assert_array_equal(da.values, expected)
527+
528+
424529
class TestExtraTags:
425530

426531
def _make_tiff_with_extra_tags(self, tmp_path):

0 commit comments

Comments
 (0)