Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-11,1598,MEDIUM,4,"read_vrt(band=N) used vrt.bands[0].nodata for attrs and integer-promotion mask, mis-masking band N reads with per-band sentinels (#1598)."
geotiff,2026-05-11,1597,HIGH,4;5,read_geotiff_dask dropped nodata mask when sentinel only hit non-first chunks (per-chunk dtype divergence; #1597).
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
geotiff,2026-05-11,1606,MEDIUM,1;5,_write_vrt_tiled dropped nodatavals/_FillValue aliases plus gdal_metadata/extra_tags/x_resolution/y_resolution/resolution_unit/raster_type; backend-inconsistent with to_geotiff(.tif) and write_geotiff_gpu (#1606).
74 changes: 69 additions & 5 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,8 +1197,22 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt,
nodata, compression, compression_level,
tile_size, predictor, bigtiff,
max_z_error: float = 0.0):
"""Write a single tile GeoTIFF. Used by _write_vrt_tiled."""
max_z_error: float = 0.0,
raster_type: int = RASTER_PIXEL_IS_AREA,
x_resolution=None,
y_resolution=None,
resolution_unit=None,
gdal_metadata_xml=None,
extra_tags=None):
"""Write a single tile GeoTIFF. Used by _write_vrt_tiled.

Forwards the same rich-tag set that ``to_geotiff`` passes through to
``write`` (raster_type, x/y resolution, GDAL metadata, extra tags) so
every per-tile file under a VRT carries the same metadata it would
have received from a single-file ``to_geotiff(..., out.tif)`` write.
Without this, ``to_geotiff(da, "out.vrt")`` silently drops everything
except the per-tile geo_transform / crs / nodata. See issue #1606.
"""
if hasattr(chunk_data, 'compute'):
chunk_data = chunk_data.compute()
if hasattr(chunk_data, 'get'):
Expand Down Expand Up @@ -1234,6 +1248,12 @@ def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt,
tile_size=tile_size,
predictor=predictor,
compression_level=compression_level,
raster_type=raster_type,
x_resolution=x_resolution,
y_resolution=y_resolution,
resolution_unit=resolution_unit,
gdal_metadata_xml=gdal_metadata_xml,
extra_tags=extra_tags,
bigtiff=bigtiff,
max_z_error=max_z_error)

Expand Down Expand Up @@ -1282,6 +1302,12 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
wkt_fallback = crs

geo_transform = None
raster_type = RASTER_PIXEL_IS_AREA
x_res = None
y_res = None
res_unit = None
gdal_meta_xml = None
extra_tags_list = None

if isinstance(data, xr.DataArray):
raw = data.data
Expand All @@ -1300,10 +1326,36 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
if epsg is None and wkt_fallback is None:
wkt_fallback = wkt
if nodata is None:
nodata = data.attrs.get('nodata')
# Use the same alias-aware resolver that to_geotiff /
# write_geotiff_gpu apply so a rioxarray-style DataArray
# (``attrs['nodatavals']``) or a CF-style one
# (``attrs['_FillValue']``) round-trips through ``.vrt``
# the same way it does through ``.tif``. Before this fix
# the VRT path used ``attrs.get('nodata')`` directly and
# silently dropped both aliases (issue #1606).
nodata = _resolve_nodata_attr(data.attrs)
geo_transform = _transform_from_attr(data.attrs.get('transform'))
if geo_transform is None:
geo_transform = _coords_to_transform(data)
# Pull the same rich-tag set that to_geotiff forwards to
# ``write`` so per-tile files under the VRT carry it too.
if data.attrs.get('raster_type') == 'point':
raster_type = RASTER_PIXEL_IS_POINT
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
if gdal_meta_xml is None:
gdal_meta_dict = data.attrs.get('gdal_metadata')
if isinstance(gdal_meta_dict, dict):
from ._geotags import _build_gdal_metadata_xml
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)
extra_tags_list = data.attrs.get('extra_tags')
extra_tags_list = _merge_friendly_extra_tags(
extra_tags_list, data.attrs)
x_res = data.attrs.get('x_resolution')
Comment thread
brendancol marked this conversation as resolved.
Outdated
y_res = data.attrs.get('y_resolution')
unit_str = data.attrs.get('resolution_unit')
if unit_str is not None:
_unit_ids = {'none': 1, 'inch': 2, 'centimeter': 3}
res_unit = _unit_ids.get(str(unit_str), None)
Comment thread
brendancol marked this conversation as resolved.
Outdated
else:
raw = data

Expand Down Expand Up @@ -1380,7 +1432,13 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
task = dask.delayed(_write_single_tile)(
chunk_data, tile_path, tile_gt, epsg, wkt_fallback,
nodata, compression, compression_level,
tile_size, predictor, bigtiff, max_z_error)
tile_size, predictor, bigtiff, max_z_error,
raster_type=raster_type,
x_resolution=x_res,
y_resolution=y_res,
resolution_unit=res_unit,
gdal_metadata_xml=gdal_meta_xml,
extra_tags=extra_tags_list)
delayed_tasks.append(task)
else:
# Numpy: slice and write directly
Expand All @@ -1389,7 +1447,13 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
_write_single_tile(
chunk_data, tile_path, tile_gt, epsg, wkt_fallback,
nodata, compression, compression_level,
tile_size, predictor, bigtiff, max_z_error)
tile_size, predictor, bigtiff, max_z_error,
raster_type=raster_type,
x_resolution=x_res,
y_resolution=y_res,
resolution_unit=res_unit,
gdal_metadata_xml=gdal_meta_xml,
extra_tags=extra_tags_list)

col_offset += chunk_w
row_offset += chunk_h
Expand Down
148 changes: 148 additions & 0 deletions xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Regression tests for issue #1606.

``to_geotiff(da, 'out.vrt')`` (which dispatches to ``_write_vrt_tiled``)
used to drop a chunk of metadata that ``to_geotiff(da, 'out.tif')``
preserved:

* ``attrs['nodatavals']`` / ``attrs['_FillValue']`` -- the VRT path
read ``attrs['nodata']`` directly instead of going through
``_resolve_nodata_attr``.
* ``attrs['gdal_metadata']`` / ``attrs['gdal_metadata_xml']``
* ``attrs['extra_tags']`` and the friendly tag attrs folded in by
``_merge_friendly_extra_tags``
* ``attrs['x_resolution']`` / ``attrs['y_resolution']`` / ``['resolution_unit']``
Comment thread
brendancol marked this conversation as resolved.
Outdated
* ``attrs['raster_type']``

Each tile under the VRT now carries the same rich tag set the
equivalent single-file ``.tif`` write would emit.
"""
import glob
import os

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff


def _make_rioxarray_style(arr=None):
"""DataArray that looks like rioxarray output: nodata only via aliases."""
if arr is None:
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
arr[0, 0] = -9999.0
return xr.DataArray(
arr,
dims=('y', 'x'),
coords={'y': np.arange(arr.shape[0], dtype=np.float64),
'x': np.arange(arr.shape[1], dtype=np.float64)},
attrs={
# No bare 'nodata' key -- forces _resolve_nodata_attr to walk
# the alias chain.
'nodatavals': (-9999.0,),
'_FillValue': -9999.0,
'crs': 4326,
'gdal_metadata': {'AREA_OR_POINT': 'Area', 'foo': 'bar'},
'x_resolution': 96,
'y_resolution': 96,
'resolution_unit': 'inch',
'raster_type': 'point',
},
)


def _first_tile_path(vrt_path):
tiles_dir = vrt_path[:-len('.vrt')] + '_tiles'
tiles = sorted(glob.glob(os.path.join(tiles_dir, '*.tif')))
assert tiles, f'no per-tile .tif files under {tiles_dir}'
return tiles[0]


class TestVrtTiledMetadataParity:
def test_nodatavals_alias_propagates_to_tiles(self, tmp_path):
da = _make_rioxarray_style()
vrt = str(tmp_path / 'nodatavals.vrt')
to_geotiff(da, vrt, tile_size=4)
tile_da = open_geotiff(_first_tile_path(vrt))
# Before the fix this was None: _write_vrt_tiled read
# attrs['nodata'] directly and ignored the nodatavals alias.
assert tile_da.attrs.get('nodata') == -9999.0

def test_fill_value_alias_propagates_to_tiles(self, tmp_path):
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
arr[0, 0] = -9999.0
da = xr.DataArray(
arr, dims=('y', 'x'),
coords={'y': np.arange(8.0), 'x': np.arange(8.0)},
attrs={'_FillValue': -9999.0, 'crs': 4326},
)
vrt = str(tmp_path / 'fillvalue.vrt')
to_geotiff(da, vrt, tile_size=4)
tile_da = open_geotiff(_first_tile_path(vrt))
assert tile_da.attrs.get('nodata') == -9999.0

def test_gdal_metadata_propagates_to_tiles(self, tmp_path):
da = _make_rioxarray_style()
vrt = str(tmp_path / 'gdal_meta.vrt')
to_geotiff(da, vrt, tile_size=4)
tile_da = open_geotiff(_first_tile_path(vrt))
gm = tile_da.attrs.get('gdal_metadata')
assert gm == {'AREA_OR_POINT': 'Area', 'foo': 'bar'}

def test_resolution_tags_propagate_to_tiles(self, tmp_path):
da = _make_rioxarray_style()
vrt = str(tmp_path / 'resolution.vrt')
to_geotiff(da, vrt, tile_size=4)
tile_da = open_geotiff(_first_tile_path(vrt))
assert tile_da.attrs.get('x_resolution') == 96.0
assert tile_da.attrs.get('y_resolution') == 96.0
assert tile_da.attrs.get('resolution_unit') == 'inch'

def test_raster_type_point_propagates_to_tiles(self, tmp_path):
da = _make_rioxarray_style()
vrt = str(tmp_path / 'point.vrt')
to_geotiff(da, vrt, tile_size=4)
tile_da = open_geotiff(_first_tile_path(vrt))
assert tile_da.attrs.get('raster_type') == 'point'

def test_tif_vs_vrt_tile_metadata_parity(self, tmp_path):
"""Same DataArray, two destinations -- per-tile metadata matches."""
da = _make_rioxarray_style()
tif_path = str(tmp_path / 'parity.tif')
vrt_path = str(tmp_path / 'parity.vrt')
to_geotiff(da, tif_path, tile_size=4)
to_geotiff(da, vrt_path, tile_size=4)

tif_da = open_geotiff(tif_path)
tile_da = open_geotiff(_first_tile_path(vrt_path))

keys = ('nodata', 'gdal_metadata', 'raster_type',
'x_resolution', 'y_resolution', 'resolution_unit')
for k in keys:
assert tif_da.attrs.get(k) == tile_da.attrs.get(k), (
f'{k} mismatch: tif={tif_da.attrs.get(k)!r}, '
f'vrt-tile={tile_da.attrs.get(k)!r}')


class TestVrtTiledMetadataDask:
def test_nodatavals_alias_dask(self, tmp_path):
pytest.importorskip('dask.array')
import dask.array as dska
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
arr[0, 0] = -9999.0
da_np = xr.DataArray(
arr, dims=('y', 'x'),
coords={'y': np.arange(8.0), 'x': np.arange(8.0)},
attrs={'nodatavals': (-9999.0,), 'crs': 4326,
'gdal_metadata': {'k': 'v'}},
)
# Dask-back the data so _write_vrt_tiled takes the dask branch.
da = xr.DataArray(
dska.from_array(arr, chunks=4),
dims=da_np.dims, coords=da_np.coords, attrs=da_np.attrs,
)
vrt = str(tmp_path / 'dask.vrt')
to_geotiff(da, vrt, tile_size=4)
tile_da = open_geotiff(_first_tile_path(vrt))
assert tile_da.attrs.get('nodata') == -9999.0
assert tile_da.attrs.get('gdal_metadata') == {'k': 'v'}
Loading