Skip to content

Commit a7df688

Browse files
committed
Fix BigTIFF auto-detection and add bigtiff= parameter
The auto-detection now estimates total file size (header + IFDs + overflow + pixel data) instead of only checking compressed pixel data size, and compares against UINT32_MAX (4,294,967,295) instead of a hardcoded 3.9 GB threshold. Also adds a bigtiff= parameter to write() and write_geotiff(): - bigtiff=None (default): auto-detect based on estimated file size - bigtiff=True: force BigTIFF even for small files - bigtiff=False: force classic TIFF (user's responsibility if >4GB) 3 new tests: force BigTIFF via public API, small file stays classic, force classic via bigtiff=False.
1 parent cc77511 commit a7df688

File tree

3 files changed

+57
-6
lines changed

3 files changed

+57
-6
lines changed

xrspatial/geotiff/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,8 @@ def write_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
248248
predictor: bool = False,
249249
cog: bool = False,
250250
overview_levels: list[int] | None = None,
251-
overview_resampling: str = 'mean') -> None:
251+
overview_resampling: str = 'mean',
252+
bigtiff: bool | None = None) -> None:
252253
"""Write data as a GeoTIFF or Cloud Optimized GeoTIFF.
253254
254255
Parameters
@@ -349,6 +350,7 @@ def write_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
349350
resolution_unit=res_unit,
350351
gdal_metadata_xml=gdal_meta_xml,
351352
extra_tags=extra_tags_list,
353+
bigtiff=bigtiff,
352354
)
353355

354356

xrspatial/geotiff/_writer.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,8 @@ def _assemble_tiff(width: int, height: int, dtype: np.dtype,
417417
extra_tags: list | None = None,
418418
x_resolution: float | None = None,
419419
y_resolution: float | None = None,
420-
resolution_unit: int | None = None) -> bytes:
420+
resolution_unit: int | None = None,
421+
force_bigtiff: bool | None = None) -> bytes:
421422
"""Assemble a complete TIFF file.
422423
423424
Parameters
@@ -535,9 +536,22 @@ def _assemble_tiff(width: int, height: int, dtype: np.dtype,
535536
ifd_specs.append(tags)
536537

537538
# --- Determine if BigTIFF is needed ---
538-
total_data = sum(sum(len(c) for c in chunks)
539-
for _, _, _, _, _, chunks in pixel_data_parts)
540-
bigtiff = total_data > 3_900_000_000 # ~4GB threshold with margin
539+
# Classic TIFF uses 32-bit offsets (max ~4.29 GB). Estimate total file
540+
# size including headers, IFDs, overflow data, and all pixel data.
541+
# Switch to BigTIFF if any offset could exceed 2^32.
542+
total_pixel_data = sum(sum(len(c) for c in chunks)
543+
for _, _, _, _, _, chunks in pixel_data_parts)
544+
# Conservative overhead estimate: header + IFDs + overflow + geo tags
545+
num_levels = len(ifd_specs)
546+
max_tags_per_ifd = max(len(tags) for tags in ifd_specs) if ifd_specs else 20
547+
ifd_overhead = num_levels * (2 + 12 * max_tags_per_ifd + 4 + 1024) # ~1KB overflow per IFD
548+
estimated_file_size = 8 + ifd_overhead + total_pixel_data
549+
550+
UINT32_MAX = 0xFFFFFFFF # 4,294,967,295
551+
if force_bigtiff is not None:
552+
bigtiff = force_bigtiff
553+
else:
554+
bigtiff = estimated_file_size > UINT32_MAX
541555

542556
header_size = 16 if bigtiff else 8
543557

@@ -721,7 +735,8 @@ def write(data: np.ndarray, path: str, *,
721735
y_resolution: float | None = None,
722736
resolution_unit: int | None = None,
723737
gdal_metadata_xml: str | None = None,
724-
extra_tags: list | None = None) -> None:
738+
extra_tags: list | None = None,
739+
bigtiff: bool | None = None) -> None:
725740
"""Write a numpy array as a GeoTIFF or COG.
726741
727742
Parameters
@@ -794,6 +809,7 @@ def write(data: np.ndarray, path: str, *,
794809
extra_tags=extra_tags,
795810
x_resolution=x_resolution, y_resolution=y_resolution,
796811
resolution_unit=resolution_unit,
812+
force_bigtiff=bigtiff,
797813
)
798814

799815
# Write to a temp file then atomically rename, so concurrent writes to

xrspatial/geotiff/tests/test_features.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,39 @@ def test_bigtiff_read_write_round_trip(self, tmp_path):
957957
result, _ = read_to_array(path)
958958
np.testing.assert_array_equal(result, arr)
959959

960+
def test_force_bigtiff_via_public_api(self, tmp_path):
961+
"""bigtiff=True on write_geotiff forces BigTIFF even for small files."""
962+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
963+
path = str(tmp_path / 'forced_bigtiff.tif')
964+
write_geotiff(arr, path, compression='none', bigtiff=True)
965+
966+
with open(path, 'rb') as f:
967+
header = parse_header(f.read(16))
968+
assert header.is_bigtiff
969+
970+
result = read_geotiff(path)
971+
np.testing.assert_array_equal(result.values, arr)
972+
973+
def test_small_file_stays_classic(self, tmp_path):
974+
"""Small files default to classic TIFF (bigtiff=None auto-detects)."""
975+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
976+
path = str(tmp_path / 'classic.tif')
977+
write_geotiff(arr, path, compression='none')
978+
979+
with open(path, 'rb') as f:
980+
header = parse_header(f.read(16))
981+
assert not header.is_bigtiff
982+
983+
def test_force_bigtiff_false_stays_classic(self, tmp_path):
984+
"""bigtiff=False forces classic TIFF."""
985+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
986+
path = str(tmp_path / 'forced_classic.tif')
987+
write_geotiff(arr, path, compression='none', bigtiff=False)
988+
989+
with open(path, 'rb') as f:
990+
header = parse_header(f.read(16))
991+
assert not header.is_bigtiff
992+
960993

961994
# -----------------------------------------------------------------------
962995
# Sub-byte bit depths (1-bit, 4-bit, 12-bit)

0 commit comments

Comments
 (0)