Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-11,1598,MEDIUM,4,"read_vrt(band=N) used vrt.bands[0].nodata for attrs and integer-promotion mask, mis-masking band N reads with per-band sentinels (#1598)."
geotiff,2026-05-11,1597,HIGH,4;5,read_geotiff_dask dropped nodata mask when sentinel only hit non-first chunks (per-chunk dtype divergence; #1597).
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
geotiff,2026-05-11,1606,MEDIUM,1;5,_write_vrt_tiled dropped nodatavals/_FillValue aliases plus gdal_metadata/extra_tags/x_resolution/y_resolution/resolution_unit/raster_type; backend-inconsistent with to_geotiff(.tif) and write_geotiff_gpu (#1606).
175 changes: 132 additions & 43 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,61 @@ def _merge_friendly_extra_tags(extra_tags_list, attrs: dict) -> list | None:
return existing or None


# String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids.
_RESOLUTION_UNIT_IDS = {'none': 1, 'inch': 2, 'centimeter': 3}


def _extract_rich_tags(attrs: dict) -> dict:
"""Extract the rich-tag set forwarded by the writers to ``write(...)``.

Centralises the bookkeeping shared by :func:`to_geotiff`,
:func:`_write_vrt_tiled`, and :func:`write_geotiff_gpu`:

* ``raster_type`` -- mapped from ``attrs['raster_type']`` ('point'
becomes :data:`RASTER_PIXEL_IS_POINT`; everything else stays
:data:`RASTER_PIXEL_IS_AREA`).
* ``gdal_metadata_xml`` -- prefers ``attrs['gdal_metadata_xml']``;
falls back to building XML from ``attrs['gdal_metadata']`` when
it is a dict.
* ``extra_tags`` -- ``attrs['extra_tags']`` folded with the friendly
tag attrs (image_description / extra_samples / colormap) via
:func:`_merge_friendly_extra_tags`.
* ``x_resolution`` / ``y_resolution`` -- pass-through.
* ``resolution_unit`` -- string label mapped to the integer tag id.

Returns a kwargs dict ready to splat into ``write(...)``: every key
matches the corresponding parameter name on
:func:`xrspatial.geotiff._writer.write`.
"""
raster_type = (RASTER_PIXEL_IS_POINT
if attrs.get('raster_type') == 'point'
else RASTER_PIXEL_IS_AREA)

gdal_meta_xml = attrs.get('gdal_metadata_xml')
if gdal_meta_xml is None:
gdal_meta_dict = attrs.get('gdal_metadata')
if isinstance(gdal_meta_dict, dict):
from ._geotags import _build_gdal_metadata_xml
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)

extra_tags_list = _merge_friendly_extra_tags(
attrs.get('extra_tags'), attrs)

res_unit = None
unit_str = attrs.get('resolution_unit')
if unit_str is not None:
res_unit = _RESOLUTION_UNIT_IDS.get(str(unit_str), None)

return {
'raster_type': raster_type,
'gdal_metadata_xml': gdal_meta_xml,
'extra_tags': extra_tags_list,
'x_resolution': attrs.get('x_resolution'),
'y_resolution': attrs.get('y_resolution'),
'resolution_unit': res_unit,
}


def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
crs: int | str | None = None,
nodata=None,
Expand Down Expand Up @@ -1050,27 +1105,17 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
wkt_fallback = wkt
if nodata is None:
nodata = _resolve_nodata_attr(data.attrs)
if data.attrs.get('raster_type') == 'point':
raster_type = RASTER_PIXEL_IS_POINT
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
if gdal_meta_xml is None:
gdal_meta_dict = data.attrs.get('gdal_metadata')
if isinstance(gdal_meta_dict, dict):
from ._geotags import _build_gdal_metadata_xml
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)
extra_tags_list = data.attrs.get('extra_tags')
# Fold friendly attrs into extra_tags so a user-edited
# attrs['image_description'] / ['extra_samples'] / ['colormap']
# actually reaches the file. Existing entries with the same tag id
# win, which keeps verbatim round-trips byte-stable.
extra_tags_list = _merge_friendly_extra_tags(
extra_tags_list, data.attrs)
x_res = data.attrs.get('x_resolution')
y_res = data.attrs.get('y_resolution')
unit_str = data.attrs.get('resolution_unit')
if unit_str is not None:
_unit_ids = {'none': 1, 'inch': 2, 'centimeter': 3}
res_unit = _unit_ids.get(str(unit_str), None)
# Pull raster_type, gdal_metadata_xml, extra_tags (folded with
# the friendly image_description / extra_samples / colormap
# attrs), x/y_resolution, and resolution_unit via the shared
# helper so all three writers stay in lockstep.
_rich = _extract_rich_tags(data.attrs)
raster_type = _rich['raster_type']
gdal_meta_xml = _rich['gdal_metadata_xml']
extra_tags_list = _rich['extra_tags']
x_res = _rich['x_resolution']
y_res = _rich['y_resolution']
res_unit = _rich['resolution_unit']

# Dask-backed: stream tiles to avoid materialising the full array.
# COG requires overviews from the full array, so it falls through
Expand Down Expand Up @@ -1197,8 +1242,22 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt,
nodata, compression, compression_level,
tile_size, predictor, bigtiff,
max_z_error: float = 0.0):
"""Write a single tile GeoTIFF. Used by _write_vrt_tiled."""
max_z_error: float = 0.0,
raster_type: int = RASTER_PIXEL_IS_AREA,
x_resolution=None,
y_resolution=None,
resolution_unit=None,
gdal_metadata_xml=None,
extra_tags=None):
"""Write a single tile GeoTIFF. Used by _write_vrt_tiled.

Forwards the same rich-tag set that ``to_geotiff`` passes through to
``write`` (raster_type, x/y resolution, GDAL metadata, extra tags) so
every per-tile file under a VRT carries the same metadata it would
have received from a single-file ``to_geotiff(..., out.tif)`` write.
Without this, ``to_geotiff(da, "out.vrt")`` silently drops everything
except the per-tile geo_transform / crs / nodata. See issue #1606.
"""
if hasattr(chunk_data, 'compute'):
chunk_data = chunk_data.compute()
if hasattr(chunk_data, 'get'):
Expand Down Expand Up @@ -1234,6 +1293,12 @@ def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt,
tile_size=tile_size,
predictor=predictor,
compression_level=compression_level,
raster_type=raster_type,
x_resolution=x_resolution,
y_resolution=y_resolution,
resolution_unit=resolution_unit,
gdal_metadata_xml=gdal_metadata_xml,
extra_tags=extra_tags,
bigtiff=bigtiff,
max_z_error=max_z_error)

Expand Down Expand Up @@ -1282,6 +1347,12 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
wkt_fallback = crs

geo_transform = None
raster_type = RASTER_PIXEL_IS_AREA
x_res = None
y_res = None
res_unit = None
gdal_meta_xml = None
extra_tags_list = None

if isinstance(data, xr.DataArray):
raw = data.data
Expand All @@ -1300,10 +1371,26 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
if epsg is None and wkt_fallback is None:
wkt_fallback = wkt
if nodata is None:
nodata = data.attrs.get('nodata')
# Use the same alias-aware resolver that to_geotiff /
# write_geotiff_gpu apply so a rioxarray-style DataArray
# (``attrs['nodatavals']``) or a CF-style one
# (``attrs['_FillValue']``) round-trips through ``.vrt``
# the same way it does through ``.tif``. Before this fix
# the VRT path used ``attrs.get('nodata')`` directly and
# silently dropped both aliases (issue #1606).
nodata = _resolve_nodata_attr(data.attrs)
geo_transform = _transform_from_attr(data.attrs.get('transform'))
if geo_transform is None:
geo_transform = _coords_to_transform(data)
# Pull the same rich-tag set that to_geotiff forwards to
# ``write`` so per-tile files under the VRT carry it too.
_rich = _extract_rich_tags(data.attrs)
raster_type = _rich['raster_type']
gdal_meta_xml = _rich['gdal_metadata_xml']
extra_tags_list = _rich['extra_tags']
x_res = _rich['x_resolution']
y_res = _rich['y_resolution']
res_unit = _rich['resolution_unit']
else:
raw = data

Expand Down Expand Up @@ -1380,7 +1467,13 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
task = dask.delayed(_write_single_tile)(
chunk_data, tile_path, tile_gt, epsg, wkt_fallback,
nodata, compression, compression_level,
tile_size, predictor, bigtiff, max_z_error)
tile_size, predictor, bigtiff, max_z_error,
raster_type=raster_type,
x_resolution=x_res,
y_resolution=y_res,
resolution_unit=res_unit,
gdal_metadata_xml=gdal_meta_xml,
extra_tags=extra_tags_list)
delayed_tasks.append(task)
else:
# Numpy: slice and write directly
Expand All @@ -1389,7 +1482,13 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
_write_single_tile(
chunk_data, tile_path, tile_gt, epsg, wkt_fallback,
nodata, compression, compression_level,
tile_size, predictor, bigtiff, max_z_error)
tile_size, predictor, bigtiff, max_z_error,
raster_type=raster_type,
x_resolution=x_res,
y_resolution=y_res,
resolution_unit=res_unit,
gdal_metadata_xml=gdal_meta_xml,
extra_tags=extra_tags_list)

col_offset += chunk_w
row_offset += chunk_h
Expand Down Expand Up @@ -2554,28 +2653,18 @@ def write_geotiff_gpu(data, path: str, *,
wkt_fallback = wkt
if nodata is None:
nodata = _resolve_nodata_attr(data.attrs)
if data.attrs.get('raster_type') == 'point':
raster_type = RASTER_PIXEL_IS_POINT
# Mirror the CPU writer's pass-through of GDAL metadata, the
# extra_tags list, the friendly image_description / extra_samples
# / colormap synthesis, and the resolution tags. Without these,
# a GPU write -> CPU read round-trip silently drops every rich
# tag (#1563).
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
if gdal_meta_xml is None:
gdal_meta_dict = data.attrs.get('gdal_metadata')
if isinstance(gdal_meta_dict, dict):
from ._geotags import _build_gdal_metadata_xml
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)
extra_tags_list = data.attrs.get('extra_tags')
extra_tags_list = _merge_friendly_extra_tags(
extra_tags_list, data.attrs)
x_res = data.attrs.get('x_resolution')
y_res = data.attrs.get('y_resolution')
unit_str = data.attrs.get('resolution_unit')
if unit_str is not None:
_unit_ids = {'none': 1, 'inch': 2, 'centimeter': 3}
res_unit = _unit_ids.get(str(unit_str), None)
_rich = _extract_rich_tags(data.attrs)
raster_type = _rich['raster_type']
gdal_meta_xml = _rich['gdal_metadata_xml']
extra_tags_list = _rich['extra_tags']
x_res = _rich['x_resolution']
y_res = _rich['y_resolution']
res_unit = _rich['resolution_unit']
else:
if hasattr(data, 'compute'):
data = data.compute() # Dask -> CuPy or numpy
Expand Down
Loading
Loading