Skip to content

Commit deef216

Browse files
authored
Fix geotiff metadata propagation gaps (#1580, #1582) (#1585)
* Fix geotiff metadata propagation gaps (#1580, #1582) Two metadata-propagation defects flagged by the geotiff sweep: * #1580 -- write_geotiff_gpu treated arr.shape[:2] as (height, width) regardless of dim names, so a rioxarray-style (band, y, x) CuPy DataArray was written with the band axis stored as image width. Mirror the CPU writer's moveaxis remap when dims[0] is band/bands/ channel so both backends produce the same file for the same input. * #1582 -- to_geotiff (and the GPU writer it dispatches to) only consulted attrs['nodata'] when extracting the NoData sentinel, silently dropping rioxarray's attrs['nodatavals'] and CF-style attrs['_FillValue']. Add a _resolve_nodata_attr helper that walks those aliases in priority order (nodata -> nodatavals -> _FillValue) so a rioxarray-sourced DataArray round-trips with its sentinel intact. xrspatial's own nodata key still wins when both are set. Tests cover band-first dim ordering across all three accepted band dim names, byte-for-byte CPU/GPU parity, alias resolution priority, NaN-in-nodatavals (no GDAL_NODATA tag emitted), and the GPU writer honouring the same aliases. * Clarify _resolve_nodata_attr docstring on nodatavals selection Docstring said "Uses the first band's value" but the implementation returns the first usable entry, skipping None / non-numeric / NaN slots. Reword so callers don't assume only index 0 is consulted.
1 parent aa67395 commit deef216

4 files changed

Lines changed: 424 additions & 2 deletions

File tree

.claude/sweep-metadata-state.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
module,last_inspected,issue,severity_max,categories_found,notes
2+
geotiff,2026-05-11,1580;1582,HIGH,1;3;4;5,"write_geotiff_gpu silently mis-ordered (band, y, x) DataArrays (#1580). to_geotiff ignored rioxarray nodatavals and CF _FillValue attrs (#1582). Both fixed in same branch."
23
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.

xrspatial/geotiff/__init__.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,61 @@ def _apply_nodata_mask_gpu(arr_gpu, nodata):
666666
_TIFF_SHORT = 3
667667

668668

669+
def _resolve_nodata_attr(attrs: dict):
670+
"""Resolve a NoData sentinel from DataArray attrs.
671+
672+
xrspatial's own readers always emit ``attrs['nodata']`` (a scalar),
673+
so that key is checked first for a clean intra-library round-trip.
674+
Falls back to two ecosystem conventions on miss:
675+
676+
* ``attrs['nodatavals']`` -- rioxarray's per-band tuple. Returns
677+
the first entry that is not None, not non-numeric, and not NaN.
678+
In practice this is band 0 for almost every real file; the skip
679+
logic only matters when band 0 is missing a sentinel (NaN /
680+
None) while a later band declares one. Bands with mixed concrete
681+
sentinels are uncommon and would need an explicit ``nodata=``
682+
argument anyway.
683+
* ``attrs['_FillValue']`` -- CF-style xarray pipelines.
684+
685+
Returns ``None`` when none of the keys carry a usable value. NaN
686+
entries in ``nodatavals`` are skipped rather than treated as a
687+
sentinel (NaN means "the float NaN is the sentinel", which is
688+
already the default and doesn't need a GDAL_NODATA tag).
689+
"""
690+
nodata = attrs.get('nodata')
691+
if nodata is not None:
692+
return nodata
693+
694+
vals = attrs.get('nodatavals')
695+
if vals is not None:
696+
try:
697+
seq = list(vals)
698+
except TypeError:
699+
seq = [vals]
700+
for v in seq:
701+
if v is None:
702+
continue
703+
try:
704+
fv = float(v)
705+
except (TypeError, ValueError):
706+
continue
707+
if np.isnan(fv):
708+
continue
709+
return v
710+
711+
fill = attrs.get('_FillValue')
712+
if fill is not None:
713+
try:
714+
ffv = float(fill)
715+
except (TypeError, ValueError):
716+
return fill # non-numeric -- pass through verbatim
717+
if np.isnan(ffv):
718+
return None
719+
return fill
720+
721+
return None
722+
723+
669724
def _merge_friendly_extra_tags(extra_tags_list, attrs: dict) -> list | None:
670725
"""Combine ``attrs['extra_tags']`` with friendly tag attrs.
671726
@@ -967,7 +1022,7 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
9671022
if epsg is None and wkt_fallback is None:
9681023
wkt_fallback = wkt
9691024
if nodata is None:
970-
nodata = data.attrs.get('nodata')
1025+
nodata = _resolve_nodata_attr(data.attrs)
9711026
if data.attrs.get('raster_type') == 'point':
9721027
raster_type = RASTER_PIXEL_IS_POINT
9731028
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
@@ -2263,6 +2318,14 @@ def write_geotiff_gpu(data, path: str, *,
22632318
else:
22642319
arr = cupy.asarray(np.asarray(arr)) # numpy -> GPU
22652320

2321+
# Handle band-first dimension order (band, y, x) -> (y, x, band).
2322+
# rioxarray and CF-style multi-band rasters land here; without
2323+
# this remap the writer treats arr.shape[2] as the band axis and
2324+
# produces a transposed file (issue #1580). The CPU writer does
2325+
# the same remap at the matching step in to_geotiff().
2326+
if arr.ndim == 3 and data.dims[0] in ('band', 'bands', 'channel'):
2327+
arr = cupy.ascontiguousarray(cupy.moveaxis(arr, 0, -1))
2328+
22662329
# Prefer attrs['transform'] over the coord-derived transform: it
22672330
# is bit-stable across round-trips, while _coords_to_transform
22682331
# can drift on fractional pixel sizes (the same reasoning the
@@ -2290,7 +2353,7 @@ def write_geotiff_gpu(data, path: str, *,
22902353
if epsg is None and wkt_fallback is None:
22912354
wkt_fallback = wkt
22922355
if nodata is None:
2293-
nodata = data.attrs.get('nodata')
2356+
nodata = _resolve_nodata_attr(data.attrs)
22942357
if data.attrs.get('raster_type') == 'point':
22952358
raster_type = RASTER_PIXEL_IS_POINT
22962359
# Mirror the CPU writer's pass-through of GDAL metadata, the
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""Regression test for issue #1580.
2+
3+
``write_geotiff_gpu`` used to assume the CuPy-backed DataArray was
4+
``(y, x[, band])`` and computed ``height, width = arr.shape[:2]``
5+
unconditionally. A rioxarray-style ``(band, y, x)`` 3D DataArray --
6+
auto-dispatched into the GPU writer from ``to_geotiff`` -- ended up
7+
written with the band axis stored as image width and the actual width
8+
stored as samples-per-pixel.
9+
10+
The CPU eager path in ``to_geotiff`` already handles this with a
11+
``np.moveaxis(arr, 0, -1)`` when ``data.dims[0] in ('band', 'bands',
12+
'channel')``; the GPU writer now mirrors that remap so both backends
13+
produce the same file for the same input.
14+
"""
15+
from __future__ import annotations
16+
17+
import importlib.util
18+
19+
import numpy as np
20+
import pytest
21+
import xarray as xr
22+
23+
from xrspatial.geotiff import open_geotiff, to_geotiff, write_geotiff_gpu
24+
25+
26+
def _gpu_available() -> bool:
27+
if importlib.util.find_spec("cupy") is None:
28+
return False
29+
try:
30+
import cupy
31+
return bool(cupy.cuda.is_available())
32+
except Exception:
33+
return False
34+
35+
36+
_HAS_GPU = _gpu_available()
37+
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
38+
39+
40+
@_gpu_only
41+
@pytest.mark.parametrize("band_dim_name", ["band", "bands", "channel"])
42+
def test_band_first_layout_written_correctly_via_write_geotiff_gpu(
43+
tmp_path, band_dim_name):
44+
"""Direct call to write_geotiff_gpu with a (band, y, x) CuPy
45+
DataArray must produce the same file dimensions as the CPU writer
46+
would (height=y, width=x, samples=band).
47+
"""
48+
import cupy
49+
rng = np.random.default_rng(seed=1580)
50+
arr_bhw = rng.integers(0, 255, (3, 16, 32), dtype=np.uint8)
51+
da = xr.DataArray(
52+
cupy.asarray(arr_bhw),
53+
dims=[band_dim_name, "y", "x"],
54+
coords={
55+
band_dim_name: np.arange(3),
56+
"y": np.arange(16, dtype=np.float64),
57+
"x": np.arange(32, dtype=np.float64),
58+
},
59+
attrs={"crs": 4326},
60+
)
61+
62+
out = str(tmp_path / f"band_first_1580_{band_dim_name}.tif")
63+
write_geotiff_gpu(da, out, compression="none")
64+
65+
rd = open_geotiff(out)
66+
assert rd.sizes["y"] == 16, (
67+
f"expected height=16, got {rd.sizes}; the writer is treating the "
68+
f"band axis as height"
69+
)
70+
assert rd.sizes["x"] == 32, f"expected width=32, got {rd.sizes}"
71+
assert rd.sizes["band"] == 3, (
72+
f"expected 3 bands, got {rd.sizes}; the writer is treating the "
73+
f"width as samples-per-pixel"
74+
)
75+
76+
# Pixel values should match the source after the (band, y, x) ->
77+
# (y, x, band) remap.
78+
np.testing.assert_array_equal(rd.values, np.moveaxis(arr_bhw, 0, -1))
79+
80+
81+
@_gpu_only
82+
def test_band_first_layout_via_to_geotiff_auto_dispatch(tmp_path):
83+
"""The user-facing path: pass a CuPy (band, y, x) DataArray to
84+
``to_geotiff`` and let auto-detection pick the GPU writer.
85+
"""
86+
import cupy
87+
rng = np.random.default_rng(seed=1580 + 1)
88+
arr_bhw = rng.integers(0, 255, (2, 8, 12), dtype=np.uint8)
89+
da = xr.DataArray(
90+
cupy.asarray(arr_bhw),
91+
dims=["band", "y", "x"],
92+
coords={
93+
"band": np.arange(2),
94+
"y": np.arange(8, dtype=np.float64),
95+
"x": np.arange(12, dtype=np.float64),
96+
},
97+
attrs={"crs": 4326},
98+
)
99+
100+
out = str(tmp_path / "band_first_1580_autodispatch.tif")
101+
to_geotiff(da, out, compression="none")
102+
103+
rd = open_geotiff(out)
104+
assert rd.sizes == {"y": 8, "x": 12, "band": 2}, (
105+
f"auto-dispatched GPU write produced wrong dims: {rd.sizes}"
106+
)
107+
np.testing.assert_array_equal(rd.values, np.moveaxis(arr_bhw, 0, -1))
108+
109+
110+
@_gpu_only
111+
def test_yxbands_layout_unchanged(tmp_path):
112+
"""Regression guard: the original (y, x, band) layout must still
113+
write correctly after the band-first remap was added.
114+
"""
115+
import cupy
116+
rng = np.random.default_rng(seed=1580 + 2)
117+
arr_yxb = rng.integers(0, 255, (8, 12, 2), dtype=np.uint8)
118+
da = xr.DataArray(
119+
cupy.asarray(arr_yxb),
120+
dims=["y", "x", "band"],
121+
coords={
122+
"y": np.arange(8, dtype=np.float64),
123+
"x": np.arange(12, dtype=np.float64),
124+
"band": np.arange(2),
125+
},
126+
attrs={"crs": 4326},
127+
)
128+
129+
out = str(tmp_path / "yxb_1580.tif")
130+
write_geotiff_gpu(da, out, compression="none")
131+
132+
rd = open_geotiff(out)
133+
assert rd.sizes == {"y": 8, "x": 12, "band": 2}
134+
np.testing.assert_array_equal(rd.values, arr_yxb)
135+
136+
137+
@_gpu_only
138+
def test_gpu_band_first_matches_cpu_byte_for_byte_on_pixel_values(tmp_path):
139+
"""Cross-backend parity: GPU and CPU writers must emit the same
140+
pixel values for the same (band, y, x) input.
141+
"""
142+
import cupy
143+
rng = np.random.default_rng(seed=1580 + 3)
144+
arr_bhw = rng.integers(0, 255, (3, 24, 40), dtype=np.uint8)
145+
da_cpu = xr.DataArray(
146+
arr_bhw,
147+
dims=["band", "y", "x"],
148+
coords={"band": np.arange(3),
149+
"y": np.arange(24, dtype=np.float64),
150+
"x": np.arange(40, dtype=np.float64)},
151+
attrs={"crs": 4326},
152+
)
153+
da_gpu = xr.DataArray(
154+
cupy.asarray(arr_bhw),
155+
dims=["band", "y", "x"],
156+
coords={"band": np.arange(3),
157+
"y": np.arange(24, dtype=np.float64),
158+
"x": np.arange(40, dtype=np.float64)},
159+
attrs={"crs": 4326},
160+
)
161+
162+
cpu_path = str(tmp_path / "band_first_1580_cpu.tif")
163+
gpu_path = str(tmp_path / "band_first_1580_gpu.tif")
164+
to_geotiff(da_cpu, cpu_path, compression="none", gpu=False)
165+
write_geotiff_gpu(da_gpu, gpu_path, compression="none")
166+
167+
cpu_rd = open_geotiff(cpu_path).values
168+
gpu_rd = open_geotiff(gpu_path).values
169+
np.testing.assert_array_equal(cpu_rd, gpu_rd)

0 commit comments

Comments
 (0)