Skip to content

Commit bad091d

Browse files
committed
Address review: add multiband tests, BigTIFF comment, fail-fast URI check (#1084)
- Add 3D band-last and band-first streaming write tests - Add forced bigtiff=True round-trip test - Add cloud URI rejection test - Note the uint32 offset limitation for BigTIFF files > 4 GB - Move fsspec URI check to top of write_streaming for fail-fast
1 parent c13cd20 commit bad091d

File tree

2 files changed

+64
-7
lines changed

2 files changed

+64
-7
lines changed

xrspatial/geotiff/_writer.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,12 @@ def write_streaming(dask_data, path: str, *,
10431043
import os
10441044
import tempfile
10451045

1046+
# Fail fast for unsupported destinations
1047+
if _is_fsspec_uri(path):
1048+
raise NotImplementedError(
1049+
"Streaming dask write to cloud storage is not yet supported. "
1050+
"Use .compute() first or write to a .vrt file.")
1051+
10461052
height, width = dask_data.shape[:2]
10471053
samples = dask_data.shape[2] if dask_data.ndim == 3 else 1
10481054
dtype = dask_data.dtype
@@ -1125,7 +1131,10 @@ def write_streaming(dask_data, path: str, *,
11251131
if resolution_unit is not None:
11261132
tags.append((TAG_RESOLUTION_UNIT, SHORT, 1, resolution_unit))
11271133

1128-
# Layout tags with placeholder offsets / byte-counts
1134+
# Layout tags with placeholder offsets / byte-counts.
1135+
# NOTE: offsets use TIFF type LONG (uint32). For BigTIFF files
1136+
# exceeding 4 GB these would need LONG8 -- same limitation as the
1137+
# eager writer.
11291138
placeholder = [0] * n_entries
11301139
if tiled:
11311140
tags.append((TAG_TILE_WIDTH, SHORT, 1, tile_size))
@@ -1184,12 +1193,6 @@ def write_streaming(dask_data, path: str, *,
11841193
bigtiff=use_bigtiff)
11851194
pixel_data_start = overflow_base + len(placeholder_overflow)
11861195

1187-
# Cloud storage not supported for streaming (needs seek)
1188-
if _is_fsspec_uri(path):
1189-
raise NotImplementedError(
1190-
"Streaming dask write to cloud storage is not yet supported. "
1191-
"Use .compute() first or write to a .vrt file.")
1192-
11931196
dir_name = os.path.dirname(os.path.abspath(path))
11941197
os.makedirs(dir_name, exist_ok=True)
11951198
fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix='.tif.tmp')

xrspatial/geotiff/tests/test_streaming_write.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,60 @@ def test_float64(self, tmp_path):
196196
np.testing.assert_array_almost_equal(result.values, arr)
197197

198198

199+
# -- Multiband ----------------------------------------------------------------
200+
201+
class TestStreamingMultiband:
202+
def test_3d_band_last(self, tmp_path):
203+
"""3D array with (y, x, band) layout."""
204+
arr = np.random.default_rng(1084).random(
205+
(100, 100, 3), dtype=np.float32)
206+
da = xr.DataArray(arr, dims=['y', 'x', 'band'])
207+
dask_da = da.chunk({'y': 50, 'x': 50})
208+
209+
path = str(tmp_path / 'band_last_1084.tif')
210+
to_geotiff(dask_da, path)
211+
result = open_geotiff(path)
212+
np.testing.assert_array_almost_equal(result.values, arr, decimal=5)
213+
214+
def test_3d_band_first(self, tmp_path):
215+
"""Band-first (band, y, x) DataArray gets transposed automatically."""
216+
arr = np.random.default_rng(1084).random(
217+
(3, 100, 100), dtype=np.float32)
218+
da = xr.DataArray(arr, dims=['band', 'y', 'x'])
219+
dask_da = da.chunk({'y': 50, 'x': 50})
220+
221+
path = str(tmp_path / 'band_first_1084.tif')
222+
to_geotiff(dask_da, path)
223+
result = open_geotiff(path)
224+
# Result is (y, x, band), so compare transposed
225+
np.testing.assert_array_almost_equal(
226+
result.values, np.moveaxis(arr, 0, -1), decimal=5)
227+
228+
229+
# -- BigTIFF and error cases --------------------------------------------------
230+
231+
class TestStreamingBigTiffAndErrors:
232+
def test_forced_bigtiff(self, tmp_path):
233+
"""bigtiff=True on a small array should produce a valid BigTIFF."""
234+
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
235+
da = xr.DataArray(arr, dims=['y', 'x'])
236+
dask_da = da.chunk({'y': 4, 'x': 4})
237+
238+
path = str(tmp_path / 'bigtiff_1084.tif')
239+
to_geotiff(dask_da, path, bigtiff=True)
240+
result = open_geotiff(path)
241+
np.testing.assert_array_equal(result.values, arr)
242+
243+
def test_cloud_uri_raises(self, tmp_path):
244+
"""Streaming to cloud storage should raise NotImplementedError."""
245+
arr = np.ones((10, 10), dtype=np.float32)
246+
da = xr.DataArray(arr, dims=['y', 'x'])
247+
dask_da = da.chunk({'y': 5, 'x': 5})
248+
249+
with pytest.raises(NotImplementedError, match='cloud'):
250+
to_geotiff(dask_da, 's3://bucket/file.tif')
251+
252+
199253
# -- COG fallback to eager path -----------------------------------------------
200254

201255
class TestCogFallback:

0 commit comments

Comments
 (0)