Skip to content

Commit 96ea30c

Browse files
committed
geotiff: validate tile_size and chunks size args (#1752)
`to_geotiff(..., tiled=True, tile_size=0)` reached the tiled writer where `math.ceil(width / tile_size)` raised `ZeroDivisionError` without naming `tile_size`. `read_geotiff_dask(chunks=0)` (or `chunks=(0, N)`) propagated zero into dask's chunk math and surfaced as a confusing `range()` / empty-chunks error. Add up-front checks at both entry points that reject non-positive ints, non-int values, and malformed `chunks` tuples with a `ValueError` that names the parameter and the bad value. Style matches the existing `window=` validator in `read_geotiff_dask`. Closes #1752.
1 parent e7b9cde commit 96ea30c

2 files changed

Lines changed: 162 additions & 0 deletions

File tree

xrspatial/geotiff/__init__.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,21 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
12391239

12401240
path = _coerce_path(path)
12411241

1242+
# Reject non-positive tile_size up front. The tiled writer computes
1243+
# the tile grid as ``math.ceil(width / tile_size)``; tile_size=0 hits
1244+
# ZeroDivisionError deep inside the writer, and negative values
1245+
# produce a nonsensical tile grid. tiled=False ignores tile_size, so
1246+
# only validate when tiled output is actually requested.
1247+
if tiled:
1248+
if not isinstance(tile_size, (int, np.integer)) or isinstance(
1249+
tile_size, bool):
1250+
raise ValueError(
1251+
f"tile_size must be a positive int, got "
1252+
f"{tile_size!r} (type {type(tile_size).__name__}).")
1253+
if tile_size <= 0:
1254+
raise ValueError(
1255+
f"tile_size must be a positive int, got tile_size={tile_size}.")
1256+
12421257
# Up-front validation: catch bad compression names before they reach
12431258
# any of the deeper write paths (streaming, GPU, VRT, COG) where the
12441259
# error surfaces from _compression_tag with a less obvious traceback.
@@ -1908,6 +1923,33 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
19081923

19091924
source = _coerce_path(source)
19101925

1926+
# Reject non-positive chunk sizes up front. ``chunks=0`` and negative
1927+
# values otherwise propagate into dask chunk math (``range(0, N, 0)``
1928+
# ValueError, or empty chunk grids) with no indication that ``chunks``
1929+
# was the problem. ``chunks`` may be an int or a (row, col) tuple.
1930+
if isinstance(chunks, int) and not isinstance(chunks, bool):
1931+
if chunks <= 0:
1932+
raise ValueError(
1933+
f"chunks must be a positive int or (row, col) tuple of "
1934+
f"positive ints, got chunks={chunks}.")
1935+
elif isinstance(chunks, tuple):
1936+
if len(chunks) != 2:
1937+
raise ValueError(
1938+
f"chunks tuple must have length 2 (row, col), got "
1939+
f"chunks={chunks!r} with length {len(chunks)}.")
1940+
for _v in chunks:
1941+
if (not isinstance(_v, (int, np.integer))
1942+
or isinstance(_v, bool)
1943+
or _v <= 0):
1944+
raise ValueError(
1945+
f"chunks must be a positive int or (row, col) tuple "
1946+
f"of positive ints, got chunks={chunks!r}.")
1947+
else:
1948+
raise ValueError(
1949+
f"chunks must be a positive int or (row, col) tuple of "
1950+
f"positive ints, got chunks={chunks!r} "
1951+
f"(type {type(chunks).__name__}).")
1952+
19111953
# ``open_geotiff`` already routes ``.vrt`` to ``read_vrt`` before
19121954
# reaching here, so this branch is only hit when ``read_geotiff_dask``
19131955
# is called directly with a VRT path. Keep it as a defensive fallback
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
"""Regression tests for issue #1752.
2+
3+
Two public geotiff entry points used to accept size parameters without
4+
checking they were positive:
5+
6+
* ``to_geotiff(..., tiled=True, tile_size=0)`` reached the tiled writer
7+
where ``math.ceil(width / tile_size)`` raised ``ZeroDivisionError``,
8+
with a traceback that did not name ``tile_size`` as the bad input.
9+
* ``read_geotiff_dask(chunks=0)`` (or ``chunks=(0, N)``) propagated zero
10+
into dask's chunk math and surfaced as a confusing ``range()`` /
11+
empty-chunks error.
12+
13+
Both entry points now validate the size arguments up front and raise
14+
``ValueError`` naming the parameter and the invalid value.
15+
"""
16+
from __future__ import annotations
17+
18+
import os
19+
20+
import numpy as np
21+
import pytest
22+
import xarray as xr
23+
24+
from xrspatial.geotiff import read_geotiff_dask, to_geotiff
25+
26+
27+
def _make_raster(tmp_path: str) -> str:
28+
arr = np.arange(100, dtype=np.float32).reshape(10, 10)
29+
da = xr.DataArray(
30+
arr, dims=['y', 'x'],
31+
coords={'y': np.arange(10), 'x': np.arange(10)},
32+
attrs={'transform': (1.0, 0.0, 0.0, 0.0, -1.0, 10.0)},
33+
)
34+
path = os.path.join(tmp_path, 'raster.tif')
35+
to_geotiff(da, path)
36+
return path
37+
38+
39+
# -- to_geotiff tile_size ---------------------------------------------------
40+
41+
42+
def test_to_geotiff_tile_size_zero_raises(tmp_path):
43+
arr = np.arange(100, dtype=np.float32).reshape(10, 10)
44+
da = xr.DataArray(arr, dims=['y', 'x'])
45+
out = os.path.join(str(tmp_path), 'out.tif')
46+
with pytest.raises(ValueError, match='tile_size'):
47+
to_geotiff(da, out, tiled=True, tile_size=0)
48+
49+
50+
def test_to_geotiff_tile_size_negative_raises(tmp_path):
51+
arr = np.arange(100, dtype=np.float32).reshape(10, 10)
52+
da = xr.DataArray(arr, dims=['y', 'x'])
53+
out = os.path.join(str(tmp_path), 'out.tif')
54+
with pytest.raises(ValueError, match='tile_size'):
55+
to_geotiff(da, out, tiled=True, tile_size=-1)
56+
57+
58+
def test_to_geotiff_tile_size_non_int_raises(tmp_path):
59+
arr = np.arange(100, dtype=np.float32).reshape(10, 10)
60+
da = xr.DataArray(arr, dims=['y', 'x'])
61+
out = os.path.join(str(tmp_path), 'out.tif')
62+
with pytest.raises(ValueError, match='tile_size'):
63+
to_geotiff(da, out, tiled=True, tile_size=256.0)
64+
65+
66+
def test_to_geotiff_tile_size_one_still_writes(tmp_path):
67+
# tile_size=1 is silly but technically valid TIFF; do not reject it.
68+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
69+
da = xr.DataArray(arr, dims=['y', 'x'])
70+
out = os.path.join(str(tmp_path), 'out.tif')
71+
to_geotiff(da, out, tiled=True, tile_size=1)
72+
assert os.path.exists(out)
73+
74+
75+
# -- read_geotiff_dask chunks ----------------------------------------------
76+
77+
78+
def test_read_geotiff_dask_chunks_zero_raises(tmp_path):
79+
path = _make_raster(str(tmp_path))
80+
with pytest.raises(ValueError, match='chunks'):
81+
read_geotiff_dask(path, chunks=0)
82+
83+
84+
def test_read_geotiff_dask_chunks_negative_raises(tmp_path):
85+
path = _make_raster(str(tmp_path))
86+
with pytest.raises(ValueError, match='chunks'):
87+
read_geotiff_dask(path, chunks=-1)
88+
89+
90+
def test_read_geotiff_dask_chunks_tuple_zero_row_raises(tmp_path):
91+
path = _make_raster(str(tmp_path))
92+
with pytest.raises(ValueError, match='chunks'):
93+
read_geotiff_dask(path, chunks=(0, 256))
94+
95+
96+
def test_read_geotiff_dask_chunks_tuple_negative_col_raises(tmp_path):
97+
path = _make_raster(str(tmp_path))
98+
with pytest.raises(ValueError, match='chunks'):
99+
read_geotiff_dask(path, chunks=(256, -1))
100+
101+
102+
def test_read_geotiff_dask_chunks_tuple_wrong_length_raises(tmp_path):
103+
path = _make_raster(str(tmp_path))
104+
with pytest.raises(ValueError, match='chunks'):
105+
read_geotiff_dask(path, chunks=(64, 64, 64))
106+
107+
108+
def test_read_geotiff_dask_positive_int_chunks_works(tmp_path):
109+
path = _make_raster(str(tmp_path))
110+
arr = read_geotiff_dask(path, chunks=256)
111+
assert arr.shape == (10, 10)
112+
# Materialise to confirm the lazy graph is well-formed.
113+
np.asarray(arr)
114+
115+
116+
def test_read_geotiff_dask_positive_tuple_chunks_works(tmp_path):
117+
path = _make_raster(str(tmp_path))
118+
arr = read_geotiff_dask(path, chunks=(4, 8))
119+
assert arr.shape == (10, 10)
120+
np.asarray(arr)

0 commit comments

Comments
 (0)