Skip to content

Commit 1ce5959

Browse files
authored
Fix _write_vrt_tiled metadata drop vs to_geotiff (#1606) (#1609)
1 parent 243bc37 commit 1ce5959

3 files changed

Lines changed: 360 additions & 43 deletions

File tree

.claude/sweep-metadata-state.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ module,last_inspected,issue,severity_max,categories_found,notes
22
geotiff,2026-05-11,1598,MEDIUM,4,"read_vrt(band=N) used vrt.bands[0].nodata for attrs and integer-promotion mask, mis-masking band N reads with per-band sentinels (#1598)."
33
geotiff,2026-05-11,1597,HIGH,4;5,read_geotiff_dask dropped nodata mask when sentinel only hit non-first chunks (per-chunk dtype divergence; #1597).
44
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
5+
geotiff,2026-05-11,1606,MEDIUM,1;5,_write_vrt_tiled dropped nodatavals/_FillValue aliases plus gdal_metadata/extra_tags/x_resolution/y_resolution/resolution_unit/raster_type; backend-inconsistent with to_geotiff(.tif) and write_geotiff_gpu (#1606).

xrspatial/geotiff/__init__.py

Lines changed: 132 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,61 @@ def _merge_friendly_extra_tags(extra_tags_list, attrs: dict) -> list | None:
779779
return existing or None
780780

781781

782+
# String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids.
783+
_RESOLUTION_UNIT_IDS = {'none': 1, 'inch': 2, 'centimeter': 3}
784+
785+
786+
def _extract_rich_tags(attrs: dict) -> dict:
787+
"""Extract the rich-tag set forwarded by the writers to ``write(...)``.
788+
789+
Centralises the bookkeeping shared by :func:`to_geotiff`,
790+
:func:`_write_vrt_tiled`, and :func:`write_geotiff_gpu`:
791+
792+
* ``raster_type`` -- mapped from ``attrs['raster_type']`` ('point'
793+
becomes :data:`RASTER_PIXEL_IS_POINT`; everything else stays
794+
:data:`RASTER_PIXEL_IS_AREA`).
795+
* ``gdal_metadata_xml`` -- prefers ``attrs['gdal_metadata_xml']``;
796+
falls back to building XML from ``attrs['gdal_metadata']`` when
797+
it is a dict.
798+
* ``extra_tags`` -- ``attrs['extra_tags']`` folded with the friendly
799+
tag attrs (image_description / extra_samples / colormap) via
800+
:func:`_merge_friendly_extra_tags`.
801+
* ``x_resolution`` / ``y_resolution`` -- pass-through.
802+
* ``resolution_unit`` -- string label mapped to the integer tag id.
803+
804+
Returns a kwargs dict ready to splat into ``write(...)``: every key
805+
matches the corresponding parameter name on
806+
:func:`xrspatial.geotiff._writer.write`.
807+
"""
808+
raster_type = (RASTER_PIXEL_IS_POINT
809+
if attrs.get('raster_type') == 'point'
810+
else RASTER_PIXEL_IS_AREA)
811+
812+
gdal_meta_xml = attrs.get('gdal_metadata_xml')
813+
if gdal_meta_xml is None:
814+
gdal_meta_dict = attrs.get('gdal_metadata')
815+
if isinstance(gdal_meta_dict, dict):
816+
from ._geotags import _build_gdal_metadata_xml
817+
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)
818+
819+
extra_tags_list = _merge_friendly_extra_tags(
820+
attrs.get('extra_tags'), attrs)
821+
822+
res_unit = None
823+
unit_str = attrs.get('resolution_unit')
824+
if unit_str is not None:
825+
res_unit = _RESOLUTION_UNIT_IDS.get(str(unit_str), None)
826+
827+
return {
828+
'raster_type': raster_type,
829+
'gdal_metadata_xml': gdal_meta_xml,
830+
'extra_tags': extra_tags_list,
831+
'x_resolution': attrs.get('x_resolution'),
832+
'y_resolution': attrs.get('y_resolution'),
833+
'resolution_unit': res_unit,
834+
}
835+
836+
782837
def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
783838
crs: int | str | None = None,
784839
nodata=None,
@@ -1051,27 +1106,17 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
10511106
wkt_fallback = wkt
10521107
if nodata is None:
10531108
nodata = _resolve_nodata_attr(data.attrs)
1054-
if data.attrs.get('raster_type') == 'point':
1055-
raster_type = RASTER_PIXEL_IS_POINT
1056-
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
1057-
if gdal_meta_xml is None:
1058-
gdal_meta_dict = data.attrs.get('gdal_metadata')
1059-
if isinstance(gdal_meta_dict, dict):
1060-
from ._geotags import _build_gdal_metadata_xml
1061-
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)
1062-
extra_tags_list = data.attrs.get('extra_tags')
1063-
# Fold friendly attrs into extra_tags so a user-edited
1064-
# attrs['image_description'] / ['extra_samples'] / ['colormap']
1065-
# actually reaches the file. Existing entries with the same tag id
1066-
# win, which keeps verbatim round-trips byte-stable.
1067-
extra_tags_list = _merge_friendly_extra_tags(
1068-
extra_tags_list, data.attrs)
1069-
x_res = data.attrs.get('x_resolution')
1070-
y_res = data.attrs.get('y_resolution')
1071-
unit_str = data.attrs.get('resolution_unit')
1072-
if unit_str is not None:
1073-
_unit_ids = {'none': 1, 'inch': 2, 'centimeter': 3}
1074-
res_unit = _unit_ids.get(str(unit_str), None)
1109+
# Pull raster_type, gdal_metadata_xml, extra_tags (folded with
1110+
# the friendly image_description / extra_samples / colormap
1111+
# attrs), x/y_resolution, and resolution_unit via the shared
1112+
# helper so all three writers stay in lockstep.
1113+
_rich = _extract_rich_tags(data.attrs)
1114+
raster_type = _rich['raster_type']
1115+
gdal_meta_xml = _rich['gdal_metadata_xml']
1116+
extra_tags_list = _rich['extra_tags']
1117+
x_res = _rich['x_resolution']
1118+
y_res = _rich['y_resolution']
1119+
res_unit = _rich['resolution_unit']
10751120

10761121
# Dask-backed: stream tiles to avoid materialising the full array.
10771122
# COG requires overviews from the full array, so it falls through
@@ -1198,8 +1243,22 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
11981243
def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt,
11991244
nodata, compression, compression_level,
12001245
tile_size, predictor, bigtiff,
1201-
max_z_error: float = 0.0):
1202-
"""Write a single tile GeoTIFF. Used by _write_vrt_tiled."""
1246+
max_z_error: float = 0.0,
1247+
raster_type: int = RASTER_PIXEL_IS_AREA,
1248+
x_resolution=None,
1249+
y_resolution=None,
1250+
resolution_unit=None,
1251+
gdal_metadata_xml=None,
1252+
extra_tags=None):
1253+
"""Write a single tile GeoTIFF. Used by _write_vrt_tiled.
1254+
1255+
Forwards the same rich-tag set that ``to_geotiff`` passes through to
1256+
``write`` (raster_type, x/y resolution, GDAL metadata, extra tags) so
1257+
every per-tile file under a VRT carries the same metadata it would
1258+
have received from a single-file ``to_geotiff(..., out.tif)`` write.
1259+
Without this, ``to_geotiff(da, "out.vrt")`` silently drops everything
1260+
except the per-tile geo_transform / crs / nodata. See issue #1606.
1261+
"""
12031262
if hasattr(chunk_data, 'compute'):
12041263
chunk_data = chunk_data.compute()
12051264
if hasattr(chunk_data, 'get'):
@@ -1235,6 +1294,12 @@ def _write_single_tile(chunk_data, path, geo_transform, epsg, wkt,
12351294
tile_size=tile_size,
12361295
predictor=predictor,
12371296
compression_level=compression_level,
1297+
raster_type=raster_type,
1298+
x_resolution=x_resolution,
1299+
y_resolution=y_resolution,
1300+
resolution_unit=resolution_unit,
1301+
gdal_metadata_xml=gdal_metadata_xml,
1302+
extra_tags=extra_tags,
12381303
bigtiff=bigtiff,
12391304
max_z_error=max_z_error)
12401305

@@ -1283,6 +1348,12 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
12831348
wkt_fallback = crs
12841349

12851350
geo_transform = None
1351+
raster_type = RASTER_PIXEL_IS_AREA
1352+
x_res = None
1353+
y_res = None
1354+
res_unit = None
1355+
gdal_meta_xml = None
1356+
extra_tags_list = None
12861357

12871358
if isinstance(data, xr.DataArray):
12881359
raw = data.data
@@ -1301,10 +1372,26 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
13011372
if epsg is None and wkt_fallback is None:
13021373
wkt_fallback = wkt
13031374
if nodata is None:
1304-
nodata = data.attrs.get('nodata')
1375+
# Use the same alias-aware resolver that to_geotiff /
1376+
# write_geotiff_gpu apply so a rioxarray-style DataArray
1377+
# (``attrs['nodatavals']``) or a CF-style one
1378+
# (``attrs['_FillValue']``) round-trips through ``.vrt``
1379+
# the same way it does through ``.tif``. Before this fix
1380+
# the VRT path used ``attrs.get('nodata')`` directly and
1381+
# silently dropped both aliases (issue #1606).
1382+
nodata = _resolve_nodata_attr(data.attrs)
13051383
geo_transform = _transform_from_attr(data.attrs.get('transform'))
13061384
if geo_transform is None:
13071385
geo_transform = _coords_to_transform(data)
1386+
# Pull the same rich-tag set that to_geotiff forwards to
1387+
# ``write`` so per-tile files under the VRT carry it too.
1388+
_rich = _extract_rich_tags(data.attrs)
1389+
raster_type = _rich['raster_type']
1390+
gdal_meta_xml = _rich['gdal_metadata_xml']
1391+
extra_tags_list = _rich['extra_tags']
1392+
x_res = _rich['x_resolution']
1393+
y_res = _rich['y_resolution']
1394+
res_unit = _rich['resolution_unit']
13081395
else:
13091396
raw = data
13101397

@@ -1381,7 +1468,13 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
13811468
task = dask.delayed(_write_single_tile)(
13821469
chunk_data, tile_path, tile_gt, epsg, wkt_fallback,
13831470
nodata, compression, compression_level,
1384-
tile_size, predictor, bigtiff, max_z_error)
1471+
tile_size, predictor, bigtiff, max_z_error,
1472+
raster_type=raster_type,
1473+
x_resolution=x_res,
1474+
y_resolution=y_res,
1475+
resolution_unit=res_unit,
1476+
gdal_metadata_xml=gdal_meta_xml,
1477+
extra_tags=extra_tags_list)
13851478
delayed_tasks.append(task)
13861479
else:
13871480
# Numpy: slice and write directly
@@ -1390,7 +1483,13 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
13901483
_write_single_tile(
13911484
chunk_data, tile_path, tile_gt, epsg, wkt_fallback,
13921485
nodata, compression, compression_level,
1393-
tile_size, predictor, bigtiff, max_z_error)
1486+
tile_size, predictor, bigtiff, max_z_error,
1487+
raster_type=raster_type,
1488+
x_resolution=x_res,
1489+
y_resolution=y_res,
1490+
resolution_unit=res_unit,
1491+
gdal_metadata_xml=gdal_meta_xml,
1492+
extra_tags=extra_tags_list)
13941493

13951494
col_offset += chunk_w
13961495
row_offset += chunk_h
@@ -2702,28 +2801,18 @@ def write_geotiff_gpu(data, path: str, *,
27022801
wkt_fallback = wkt
27032802
if nodata is None:
27042803
nodata = _resolve_nodata_attr(data.attrs)
2705-
if data.attrs.get('raster_type') == 'point':
2706-
raster_type = RASTER_PIXEL_IS_POINT
27072804
# Mirror the CPU writer's pass-through of GDAL metadata, the
27082805
# extra_tags list, the friendly image_description / extra_samples
27092806
# / colormap synthesis, and the resolution tags. Without these,
27102807
# a GPU write -> CPU read round-trip silently drops every rich
27112808
# tag (#1563).
2712-
gdal_meta_xml = data.attrs.get('gdal_metadata_xml')
2713-
if gdal_meta_xml is None:
2714-
gdal_meta_dict = data.attrs.get('gdal_metadata')
2715-
if isinstance(gdal_meta_dict, dict):
2716-
from ._geotags import _build_gdal_metadata_xml
2717-
gdal_meta_xml = _build_gdal_metadata_xml(gdal_meta_dict)
2718-
extra_tags_list = data.attrs.get('extra_tags')
2719-
extra_tags_list = _merge_friendly_extra_tags(
2720-
extra_tags_list, data.attrs)
2721-
x_res = data.attrs.get('x_resolution')
2722-
y_res = data.attrs.get('y_resolution')
2723-
unit_str = data.attrs.get('resolution_unit')
2724-
if unit_str is not None:
2725-
_unit_ids = {'none': 1, 'inch': 2, 'centimeter': 3}
2726-
res_unit = _unit_ids.get(str(unit_str), None)
2809+
_rich = _extract_rich_tags(data.attrs)
2810+
raster_type = _rich['raster_type']
2811+
gdal_meta_xml = _rich['gdal_metadata_xml']
2812+
extra_tags_list = _rich['extra_tags']
2813+
x_res = _rich['x_resolution']
2814+
y_res = _rich['y_resolution']
2815+
res_unit = _rich['resolution_unit']
27272816
else:
27282817
if hasattr(data, 'compute'):
27292818
data = data.compute() # Dask -> CuPy or numpy

0 commit comments

Comments
 (0)