Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-12,1710,MEDIUM,2,"open_geotiff/read_geotiff_dask/read_geotiff_gpu windowed reads of non-georef TIFFs produced float64 half-pixel-shifted coords while full reads produced int64 [0,1,2,...] coords. Affected every backend the same way; not a backend parity bug, a windowed-vs-full inconsistency. _populate_attrs_from_geo_info also fabricated an identity transform attr on non-georef files. Fixed by threading has_georef through all windowed coord paths and through the transform attr emitter (#1710)."
geotiff,2026-05-12,1739,HIGH,1;4,"COG overview reads dropped attrs['nodata'] from level 0, so the writer-baked sentinel survived as raw data in the overview pixels (silent numerical corruption). extract_geo_info_with_overview_inheritance was inheriting CRS-side fields only; extended to per-IFD pass-through tags (nodata, gdal_metadata*, resolution*, colormap, extra_tags, image_description, extra_samples). All four backends affected (numpy/dask/cupy/dask+cupy). Fixed in #1739."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
94 changes: 77 additions & 17 deletions xrspatial/geotiff/_geotags.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,24 +703,39 @@ def extract_geo_info_with_overview_inheritance(
"""Extract geo metadata, inheriting from level 0 when the IFD lacks it.

Wraps :func:`extract_geo_info` for overview reads. GDAL-style COG
writers (including this package's :func:`to_geotiff`) put the
GeoKeyDirectory, ModelPixelScale and ModelTiepoint only on the
level-0 IFD. Calling ``extract_geo_info`` directly on an overview
IFD therefore returns a default :class:`GeoTransform` with
``has_georef=False`` and no CRS, so overview reads silently lose
their georeferencing.
writers (including this package's :func:`to_geotiff`) put a handful
of tags only on the level-0 IFD:

When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0
set) that lacks its own georef, we re-run ``extract_geo_info`` on
the first full-resolution IFD (NewSubfileType bit 0 clear, bit 2
clear) and rescale the pixel size by ``width_full / width_overview``
so coords cover the same extent as level 0.
* GeoKeyDirectory, ModelPixelScale, ModelTiepoint (georef)
* GDAL_NODATA, GDAL_METADATA (per-IFD pass-through tags)
* XResolution, YResolution, ResolutionUnit (resolution tags)
* ColorMap, ImageDescription, ExtraSamples (extra-tag pass-through)

Calling ``extract_geo_info`` directly on an overview IFD therefore
returns a default :class:`GeoTransform` with ``has_georef=False``,
no CRS, and a ``nodata=None`` field, so overview reads silently
lose their georeferencing and their nodata sentinel.

If the overview IFD already carries its own geokeys (some writers do
replicate them), this returns its own ``extract_geo_info`` output
unchanged. If no full-resolution sibling exists or the parent's geo
info is also missing, the overview's own (possibly empty) info is
returned -- callers get the same fallback behaviour they used to.
When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0
set), we re-run ``extract_geo_info`` on the first full-resolution
IFD (NewSubfileType bit 0 clear, bit 2 clear). Per-IFD pass-through
tags (nodata, GDAL metadata, resolution, colormap, extra tags,
image description, extra samples) are inherited when the overview
lacks its own value, regardless of whether the overview has its own
georef. The transform and CRS-side fields are additionally
inherited when the overview lacks its own georef, with the pixel
size rescaled by ``width_full / width_overview`` so coords cover
the same extent as level 0.

If the overview IFD already carries its own value for a given
field, that value wins -- inheritance is per-field and only fills
in missing entries. If no full-resolution sibling exists, the
overview's own (possibly empty) info is returned -- callers get the
same fallback behaviour they used to.

Inheriting nodata + the rich-tag set fixes #1739 (silent numerical
corruption when reading COG overview pixels because attrs['nodata']
was lost). The georef inheritance is the original fix from #1640.

Parameters
----------
Expand All @@ -744,7 +759,7 @@ def extract_geo_info_with_overview_inheritance(
# page IFDs (bit 1) are filtered out by ``select_overview_ifd``
# before reaching here, so we never inherit a mask's geo info.
is_overview = bool(ifd.subfile_type & 1)
if not is_overview or info.has_georef:
if not is_overview:
return info

# Find the level-0 IFD: NewSubfileType has bit 0 clear (not an
Expand All @@ -763,6 +778,51 @@ def extract_geo_info_with_overview_inheritance(
return info

base_info = extract_geo_info(base_ifd, data, byte_order)

# Inherit the per-IFD metadata that the COG writer emits only on the
# level-0 IFD: GDAL_NODATA, GDAL_METADATA, x/y resolution, colormap,
# extra tags, image description, extra samples. Without this block
# an overview read silently drops attrs['nodata'] (so the sentinel
# pixels the writer baked into the overview survive as ordinary data
# and poison downstream stats) and attrs['gdal_metadata'] (user
# metadata loss). See issue #1739.
#
# Each field is inherited only when the overview lacks its own
# value, so an overview IFD that does re-declare any of these keeps
# its own copy. Mirrors the gate the CRS-side inheritance applies
# below: prefer the overview's own value when present.
if info.nodata is None and base_info.nodata is not None:
info.nodata = base_info.nodata
if (info.gdal_metadata is None
and base_info.gdal_metadata is not None):
info.gdal_metadata = base_info.gdal_metadata
if (info.gdal_metadata_xml is None
and base_info.gdal_metadata_xml is not None):
info.gdal_metadata_xml = base_info.gdal_metadata_xml
if info.x_resolution is None and base_info.x_resolution is not None:
info.x_resolution = base_info.x_resolution
if info.y_resolution is None and base_info.y_resolution is not None:
info.y_resolution = base_info.y_resolution
if (info.resolution_unit is None
and base_info.resolution_unit is not None):
info.resolution_unit = base_info.resolution_unit
if info.colormap is None and base_info.colormap is not None:
info.colormap = base_info.colormap
if info.extra_tags is None and base_info.extra_tags is not None:
info.extra_tags = base_info.extra_tags
if (info.image_description is None
and base_info.image_description is not None):
info.image_description = base_info.image_description
if (info.extra_samples is None
and base_info.extra_samples is not None):
info.extra_samples = base_info.extra_samples

# If the overview already has its own georef, the rest of the
# inheritance (transform + CRS-side fields) is unnecessary -- return
# now with just the per-IFD-tag inheritance applied above.
if info.has_georef:
return info

if not base_info.has_georef:
return info

Expand Down
6 changes: 5 additions & 1 deletion xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,15 @@ def test_cpu_cog_overview_mean_partial_block(tmp_path):

ov = open_geotiff(p, overview_level=1)
# Top-left 2x2 was all-NaN -> reduces to NaN -> rewritten to -9999
# on disk, then read back as NaN once the overview-nodata
# inheritance fix (#1739) restores attrs['nodata'] and re-masks
# the sentinel.
# Top-right 2x2 [3,4,7,8] -> mean 5.5
# Bottom-left [10,20,10,20] -> 15
# Bottom-right [30,40,30,40] -> 35
data = np.asarray(ov.data)
assert data[0, 0] == -9999.0
assert ov.attrs.get('nodata') == -9999.0
assert np.isnan(data[0, 0])
np.testing.assert_allclose(data[0, 1], 5.5)
np.testing.assert_allclose(data[1, 0], 15.0)
np.testing.assert_allclose(data[1, 1], 35.0)
Expand Down
Loading
Loading