diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index feffe706..6be5202a 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1239,6 +1239,21 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path = _coerce_path(path) + # Reject non-positive tile_size up front. The tiled writer computes + # the tile grid as ``math.ceil(width / tile_size)``; tile_size=0 hits + # ZeroDivisionError deep inside the writer, and negative values + # produce a nonsensical tile grid. tiled=False ignores tile_size, so + # only validate when tiled output is actually requested. + if tiled: + if not isinstance(tile_size, (int, np.integer)) or isinstance( + tile_size, bool): + raise ValueError( + f"tile_size must be a positive int, got " + f"{tile_size!r} (type {type(tile_size).__name__}).") + if tile_size <= 0: + raise ValueError( + f"tile_size must be a positive int, got tile_size={tile_size}.") + # Up-front validation: catch bad compression names before they reach # any of the deeper write paths (streaming, GPU, VRT, COG) where the # error surfaces from _compression_tag with a less obvious traceback. @@ -1908,6 +1923,33 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512, source = _coerce_path(source) + # Reject non-positive chunk sizes up front. ``chunks=0`` and negative + # values otherwise propagate into dask chunk math (``range(0, N, 0)`` + # ValueError, or empty chunk grids) with no indication that ``chunks`` + # was the problem. ``chunks`` may be an int or a (row, col) tuple. + if isinstance(chunks, int) and not isinstance(chunks, bool): + if chunks <= 0: + raise ValueError( + f"chunks must be a positive int or (row, col) tuple of " + f"positive ints, got chunks={chunks}.") + elif isinstance(chunks, tuple): + if len(chunks) != 2: + raise ValueError( + f"chunks tuple must have length 2 (row, col), got " + f"chunks={chunks!r} with length {len(chunks)}.") + for _v in chunks: + if (not isinstance(_v, (int, np.integer)) + or isinstance(_v, bool) + or _v <= 0): + raise ValueError( + f"chunks must be a positive int or (row, col) tuple " + f"of positive ints, got chunks={chunks!r}.") + else: + raise ValueError( + f"chunks must be a positive int or (row, col) tuple of " + f"positive ints, got chunks={chunks!r} " + f"(type {type(chunks).__name__}).") + # ``open_geotiff`` already routes ``.vrt`` to ``read_vrt`` before # reaching here, so this branch is only hit when ``read_geotiff_dask`` # is called directly with a VRT path. Keep it as a defensive fallback diff --git a/xrspatial/geotiff/tests/test_size_param_validation_1752.py b/xrspatial/geotiff/tests/test_size_param_validation_1752.py new file mode 100644 index 00000000..d9c94b92 --- /dev/null +++ b/xrspatial/geotiff/tests/test_size_param_validation_1752.py @@ -0,0 +1,120 @@ +"""Regression tests for issue #1752. + +Two public geotiff entry points used to accept size parameters without +checking they were positive: + +* ``to_geotiff(..., tiled=True, tile_size=0)`` reached the tiled writer + where ``math.ceil(width / tile_size)`` raised ``ZeroDivisionError``, + with a traceback that did not name ``tile_size`` as the bad input. +* ``read_geotiff_dask(chunks=0)`` (or ``chunks=(0, N)``) propagated zero + into dask's chunk math and surfaced as a confusing ``range()`` / + empty-chunks error. + +Both entry points now validate the size arguments up front and raise +``ValueError`` naming the parameter and the invalid value. +""" +from __future__ import annotations + +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import read_geotiff_dask, to_geotiff + + +def _make_raster(tmp_path: str) -> str: + arr = np.arange(100, dtype=np.float32).reshape(10, 10) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(10), 'x': np.arange(10)}, + attrs={'transform': (1.0, 0.0, 0.0, 0.0, -1.0, 10.0)}, + ) + path = os.path.join(tmp_path, 'raster.tif') + to_geotiff(da, path) + return path + + +# -- to_geotiff tile_size --------------------------------------------------- + + +def test_to_geotiff_tile_size_zero_raises(tmp_path): + arr = np.arange(100, dtype=np.float32).reshape(10, 10) + da = xr.DataArray(arr, dims=['y', 'x']) + out = os.path.join(str(tmp_path), 'out.tif') + with pytest.raises(ValueError, match='tile_size'): + to_geotiff(da, out, tiled=True, tile_size=0) + + +def test_to_geotiff_tile_size_negative_raises(tmp_path): + arr = np.arange(100, dtype=np.float32).reshape(10, 10) + da = xr.DataArray(arr, dims=['y', 'x']) + out = os.path.join(str(tmp_path), 'out.tif') + with pytest.raises(ValueError, match='tile_size'): + to_geotiff(da, out, tiled=True, tile_size=-1) + + +def test_to_geotiff_tile_size_non_int_raises(tmp_path): + arr = np.arange(100, dtype=np.float32).reshape(10, 10) + da = xr.DataArray(arr, dims=['y', 'x']) + out = os.path.join(str(tmp_path), 'out.tif') + with pytest.raises(ValueError, match='tile_size'): + to_geotiff(da, out, tiled=True, tile_size=256.0) + + +def test_to_geotiff_tile_size_one_still_writes(tmp_path): + # tile_size=1 is silly but technically valid TIFF; do not reject it. + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + da = xr.DataArray(arr, dims=['y', 'x']) + out = os.path.join(str(tmp_path), 'out.tif') + to_geotiff(da, out, tiled=True, tile_size=1) + assert os.path.exists(out) + + +# -- read_geotiff_dask chunks ---------------------------------------------- + + +def test_read_geotiff_dask_chunks_zero_raises(tmp_path): + path = _make_raster(str(tmp_path)) + with pytest.raises(ValueError, match='chunks'): + read_geotiff_dask(path, chunks=0) + + +def test_read_geotiff_dask_chunks_negative_raises(tmp_path): + path = _make_raster(str(tmp_path)) + with pytest.raises(ValueError, match='chunks'): + read_geotiff_dask(path, chunks=-1) + + +def test_read_geotiff_dask_chunks_tuple_zero_row_raises(tmp_path): + path = _make_raster(str(tmp_path)) + with pytest.raises(ValueError, match='chunks'): + read_geotiff_dask(path, chunks=(0, 256)) + + +def test_read_geotiff_dask_chunks_tuple_negative_col_raises(tmp_path): + path = _make_raster(str(tmp_path)) + with pytest.raises(ValueError, match='chunks'): + read_geotiff_dask(path, chunks=(256, -1)) + + +def test_read_geotiff_dask_chunks_tuple_wrong_length_raises(tmp_path): + path = _make_raster(str(tmp_path)) + with pytest.raises(ValueError, match='chunks'): + read_geotiff_dask(path, chunks=(64, 64, 64)) + + +def test_read_geotiff_dask_positive_int_chunks_works(tmp_path): + path = _make_raster(str(tmp_path)) + arr = read_geotiff_dask(path, chunks=256) + assert arr.shape == (10, 10) + # Materialise to confirm the lazy graph is well-formed. + np.asarray(arr) + + +def test_read_geotiff_dask_positive_tuple_chunks_works(tmp_path): + path = _make_raster(str(tmp_path)) + arr = read_geotiff_dask(path, chunks=(4, 8)) + assert arr.shape == (10, 10) + np.asarray(arr)