diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv
index fab813b0..80e68447 100644
--- a/.claude/sweep-metadata-state.csv
+++ b/.claude/sweep-metadata-state.csv
@@ -1,3 +1,4 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-12,1710,MEDIUM,2,"open_geotiff/read_geotiff_dask/read_geotiff_gpu windowed reads of non-georef TIFFs produced float64 half-pixel-shifted coords while full reads produced int64 [0,1,2,...] coords. Affected every backend the same way; not a backend parity bug, a windowed-vs-full inconsistency. _populate_attrs_from_geo_info also fabricated an identity transform attr on non-georef files. Fixed by threading has_georef through all windowed coord paths and through the transform attr emitter (#1710)."
+geotiff,2026-05-12,1739,HIGH,1;4,"COG overview reads dropped attrs['nodata'] from level 0, so the writer-baked sentinel survived as raw data in the overview pixels (silent numerical corruption). extract_geo_info_with_overview_inheritance was inheriting CRS-side fields only; extended to per-IFD pass-through tags (nodata, gdal_metadata*, resolution*, colormap, extra_tags, image_description, extra_samples). All four backends affected (numpy/dask/cupy/dask+cupy). Fixed in #1739."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py
index 972817e1..b096dc32 100644
--- a/xrspatial/geotiff/_geotags.py
+++ b/xrspatial/geotiff/_geotags.py
@@ -703,24 +703,39 @@ def extract_geo_info_with_overview_inheritance(
"""Extract geo metadata, inheriting from level 0 when the IFD lacks it.
Wraps :func:`extract_geo_info` for overview reads. GDAL-style COG
- writers (including this package's :func:`to_geotiff`) put the
- GeoKeyDirectory, ModelPixelScale and ModelTiepoint only on the
- level-0 IFD. Calling ``extract_geo_info`` directly on an overview
- IFD therefore returns a default :class:`GeoTransform` with
- ``has_georef=False`` and no CRS, so overview reads silently lose
- their georeferencing.
+ writers (including this package's :func:`to_geotiff`) put a handful
+ of tags only on the level-0 IFD:
- When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0
- set) that lacks its own georef, we re-run ``extract_geo_info`` on
- the first full-resolution IFD (NewSubfileType bit 0 clear, bit 2
- clear) and rescale the pixel size by ``width_full / width_overview``
- so coords cover the same extent as level 0.
+ * GeoKeyDirectory, ModelPixelScale, ModelTiepoint (georef)
+ * GDAL_NODATA, GDAL_METADATA (per-IFD pass-through tags)
+ * XResolution, YResolution, ResolutionUnit (resolution tags)
+ * ColorMap, ImageDescription, ExtraSamples (extra-tag pass-through)
+
+ Calling ``extract_geo_info`` directly on an overview IFD therefore
+ returns a default :class:`GeoTransform` with ``has_georef=False``,
+ no CRS, and a ``nodata=None`` field, so overview reads silently
+ lose their georeferencing and their nodata sentinel.
- If the overview IFD already carries its own geokeys (some writers do
- replicate them), this returns its own ``extract_geo_info`` output
- unchanged. If no full-resolution sibling exists or the parent's geo
- info is also missing, the overview's own (possibly empty) info is
- returned -- callers get the same fallback behaviour they used to.
+ When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0
+ set), we re-run ``extract_geo_info`` on the first full-resolution
+ IFD (NewSubfileType bit 0 clear, bit 2 clear). Per-IFD pass-through
+ tags (nodata, GDAL metadata, resolution, colormap, extra tags,
+ image description, extra samples) are inherited when the overview
+ lacks its own value, regardless of whether the overview has its own
+ georef. The transform and CRS-side fields are additionally
+ inherited when the overview lacks its own georef, with the pixel
+ size rescaled by ``width_full / width_overview`` so coords cover
+ the same extent as level 0.
+
+ If the overview IFD already carries its own value for a given
+ field, that value wins -- inheritance is per-field and only fills
+ in missing entries. If no full-resolution sibling exists, the
+ overview's own (possibly empty) info is returned -- callers get the
+ same fallback behaviour they used to.
+
+ Inheriting nodata + the rich-tag set fixes #1739 (silent numerical
+ corruption when reading COG overview pixels because attrs['nodata']
+ was lost). The georef inheritance is the original fix from #1640.
Parameters
----------
@@ -744,7 +759,7 @@ def extract_geo_info_with_overview_inheritance(
# page IFDs (bit 1) are filtered out by ``select_overview_ifd``
# before reaching here, so we never inherit a mask's geo info.
is_overview = bool(ifd.subfile_type & 1)
- if not is_overview or info.has_georef:
+ if not is_overview:
return info
# Find the level-0 IFD: NewSubfileType has bit 0 clear (not an
@@ -763,6 +778,51 @@ def extract_geo_info_with_overview_inheritance(
return info
base_info = extract_geo_info(base_ifd, data, byte_order)
+
+ # Inherit the per-IFD metadata that the COG writer emits only on the
+ # level-0 IFD: GDAL_NODATA, GDAL_METADATA, x/y resolution, colormap,
+ # extra tags, image description, extra samples. Without this block
+ # an overview read silently drops attrs['nodata'] (so the sentinel
+ # pixels the writer baked into the overview survive as ordinary data
+ # and poison downstream stats) and attrs['gdal_metadata'] (user
+ # metadata loss). See issue #1739.
+ #
+ # Each field is inherited only when the overview lacks its own
+ # value, so an overview IFD that does re-declare any of these keeps
+ # its own copy. Mirrors the gate the CRS-side inheritance applies
+ # below: prefer the overview's own value when present.
+ if info.nodata is None and base_info.nodata is not None:
+ info.nodata = base_info.nodata
+ if (info.gdal_metadata is None
+ and base_info.gdal_metadata is not None):
+ info.gdal_metadata = base_info.gdal_metadata
+ if (info.gdal_metadata_xml is None
+ and base_info.gdal_metadata_xml is not None):
+ info.gdal_metadata_xml = base_info.gdal_metadata_xml
+ if info.x_resolution is None and base_info.x_resolution is not None:
+ info.x_resolution = base_info.x_resolution
+ if info.y_resolution is None and base_info.y_resolution is not None:
+ info.y_resolution = base_info.y_resolution
+ if (info.resolution_unit is None
+ and base_info.resolution_unit is not None):
+ info.resolution_unit = base_info.resolution_unit
+ if info.colormap is None and base_info.colormap is not None:
+ info.colormap = base_info.colormap
+ if info.extra_tags is None and base_info.extra_tags is not None:
+ info.extra_tags = base_info.extra_tags
+ if (info.image_description is None
+ and base_info.image_description is not None):
+ info.image_description = base_info.image_description
+ if (info.extra_samples is None
+ and base_info.extra_samples is not None):
+ info.extra_samples = base_info.extra_samples
+
+ # If the overview already has its own georef, the rest of the
+ # inheritance (transform + CRS-side fields) is unnecessary -- return
+ # now with just the per-IFD-tag inheritance applied above.
+ if info.has_georef:
+ return info
+
if not base_info.has_georef:
return info
diff --git a/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py b/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py
index 9c77b5c7..f93875fa 100644
--- a/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py
+++ b/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py
@@ -86,11 +86,15 @@ def test_cpu_cog_overview_mean_partial_block(tmp_path):
ov = open_geotiff(p, overview_level=1)
# Top-left 2x2 was all-NaN -> reduces to NaN -> rewritten to -9999
+ # on disk, then read back as NaN once the overview-nodata
+ # inheritance fix (#1739) restores attrs['nodata'] and re-masks
+ # the sentinel.
# Top-right 2x2 [3,4,7,8] -> mean 5.5
# Bottom-left [10,20,10,20] -> 15
# Bottom-right [30,40,30,40] -> 35
data = np.asarray(ov.data)
- assert data[0, 0] == -9999.0
+ assert ov.attrs.get('nodata') == -9999.0
+ assert np.isnan(data[0, 0])
np.testing.assert_allclose(data[0, 1], 5.5)
np.testing.assert_allclose(data[1, 0], 15.0)
np.testing.assert_allclose(data[1, 1], 35.0)
diff --git a/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py b/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py
new file mode 100644
index 00000000..97b8e702
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py
@@ -0,0 +1,326 @@
+"""Regression tests for issue #1739.
+
+Overview IFDs in COGs typically carry no GDAL_NODATA tag (and no
+GDAL_METADATA, XResolution, YResolution, ResolutionUnit, ColorMap,
+ImageDescription, or ExtraSamples either) -- the writer puts those
+tags only on the level-0 IFD. Before the fix, ``open_geotiff(path,
+overview_level=N)`` for ``N >= 1`` returned a DataArray whose
+``attrs['nodata']`` was None even though the level-0 IFD declared
+a sentinel, and the overview's on-disk pixels still carried that
+sentinel (the writer rewrites NaN to sentinel before reducing).
+The result was silent numerical corruption: sentinel pixels survived
+into stats / threshold / plot pipelines.
+
+The fix wires the per-IFD pass-through tags into
+``extract_geo_info_with_overview_inheritance`` so an overview without
+its own copy inherits from level 0, mirroring the original georef
+inheritance from #1640.
+
+These tests cover all four backends (numpy, dask+numpy, cupy,
+dask+cupy) and assert that the nodata sentinel + GDAL metadata + the
+resolution tags survive the level 0 -> overview transition, and that
+sentinel pixels come back as NaN on every backend.
+"""
+from __future__ import annotations
+
+import importlib.util
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from xrspatial.geotiff import open_geotiff, to_geotiff
+from xrspatial.geotiff._writer import write
+from xrspatial.geotiff._geotags import GeoTransform
+
+
+def _gpu_available() -> bool:
+ if importlib.util.find_spec("cupy") is None:
+ return False
+ try:
+ import cupy
+ return bool(cupy.cuda.is_available())
+ except Exception:
+ return False
+
+
+_HAS_GPU = _gpu_available()
+_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
+
+
+_BACKENDS = [
+ pytest.param({}, id="numpy"),
+ pytest.param({"chunks": 16}, id="dask+numpy"),
+ pytest.param({"gpu": True}, id="cupy",
+ marks=pytest.mark.skipif(
+ not _HAS_GPU, reason="cupy + CUDA required")),
+ pytest.param({"gpu": True, "chunks": 16}, id="dask+cupy",
+ marks=pytest.mark.skipif(
+ not _HAS_GPU, reason="cupy + CUDA required")),
+]
+
+
+def _materialise(da: xr.DataArray) -> np.ndarray:
+ """Return a numpy view of the data regardless of backend."""
+ raw = da.data
+ if hasattr(raw, 'compute'):
+ raw = raw.compute()
+ if hasattr(raw, 'get'):
+ raw = raw.get()
+ return np.asarray(raw)
+
+
+def _make_cog_with_nodata(path: str, sentinel: float = -9999.0) -> None:
+ """Write a 64x64 COG with two overview levels + a sentinel column.
+
+ The first 16x16 block is the sentinel; the rest is 100.0. Using
+ nearest-neighbour overview resampling preserves exactly 64 sentinel
+ pixels in level 1 and 16 in level 2, so the test can assert counts
+ without floating-point fudge factors.
+ """
+ arr = np.full((64, 64), 100.0, dtype=np.float32)
+ arr[0:16, 0:16] = sentinel
+ da = xr.DataArray(
+ arr, dims=['y', 'x'],
+ coords={'y': np.arange(64, dtype=np.float64),
+ 'x': np.arange(64, dtype=np.float64)},
+ attrs={'crs': 4326},
+ )
+ to_geotiff(da, path, cog=True, tile_size=16,
+ overview_levels=[2, 4], nodata=sentinel,
+ overview_resampling='nearest')
+
+
+@pytest.mark.parametrize("backend_kwargs", _BACKENDS)
+def test_overview_inherits_nodata_attr(tmp_path, backend_kwargs):
+ """attrs['nodata'] is set on every overview level, not just level 0."""
+ path = str(tmp_path / "overview_nodata_inherit_1739.tif")
+ _make_cog_with_nodata(path)
+
+ for lvl in (0, 1, 2):
+ da = open_geotiff(path, overview_level=lvl, **backend_kwargs)
+ assert da.attrs.get('nodata') == -9999.0, (
+ f"backend={backend_kwargs}, overview_level={lvl}: expected "
+ f"nodata=-9999.0, got {da.attrs.get('nodata')!r}"
+ )
+
+
+@pytest.mark.parametrize("backend_kwargs", _BACKENDS)
+def test_overview_sentinel_pixels_masked_to_nan(tmp_path, backend_kwargs):
+ """Pixels at the sentinel value come back as NaN on every overview level.
+
+ Without the inheritance fix, an overview read returned a DataArray
+ with literal -9999.0 pixels and no nodata attr, silently poisoning
+ downstream stats. With the fix, the reader inherits the sentinel
+ and applies the same NaN-mask substitution it applies at level 0.
+ """
+ path = str(tmp_path / "overview_nodata_mask_1739.tif")
+ _make_cog_with_nodata(path)
+
+ expected_nan_counts = {0: 256, 1: 64, 2: 16}
+ for lvl, expected in expected_nan_counts.items():
+ da = open_geotiff(path, overview_level=lvl, **backend_kwargs)
+ vals = _materialise(da)
+ actual_nan = int(np.isnan(vals).sum())
+ sentinel_remaining = int((vals == -9999.0).sum())
+
+ assert sentinel_remaining == 0, (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"{sentinel_remaining} sentinel pixels survived as raw "
+ f"values; expected all masked to NaN"
+ )
+ assert actual_nan == expected, (
+ f"backend={backend_kwargs}, overview_level={lvl}: expected "
+ f"{expected} NaN pixels, got {actual_nan}"
+ )
+
+
+@pytest.mark.parametrize("backend_kwargs", _BACKENDS)
+def test_overview_nanmean_matches_pre_sentinel_value(tmp_path, backend_kwargs):
+ """nanmean on every overview level equals the non-sentinel value.
+
+ With the fix, sentinel pixels are NaN-masked, so np.nanmean returns
+ 100.0 (the value of every other pixel). Without it, the sentinel
+ survives as -9999.0 and the mean is far below 100.0.
+ """
+ path = str(tmp_path / "overview_nodata_mean_1739.tif")
+ _make_cog_with_nodata(path)
+
+ for lvl in (0, 1, 2):
+ da = open_geotiff(path, overview_level=lvl, **backend_kwargs)
+ vals = _materialise(da)
+ assert np.nanmean(vals) == pytest.approx(100.0), (
+ f"backend={backend_kwargs}, overview_level={lvl}: nanmean="
+ f"{np.nanmean(vals)} (expected 100.0); sentinel pixels "
+ "are leaking into the average."
+ )
+
+
+@pytest.mark.parametrize("backend_kwargs", _BACKENDS)
+def test_overview_inherits_gdal_metadata(tmp_path, backend_kwargs):
+ """attrs['gdal_metadata'] and ['gdal_metadata_xml'] come from level 0.
+
+ The COG writer emits the GDAL_METADATA tag only on the level-0 IFD.
+ Without the inheritance fix, overview reads dropped the dict, so a
+ user-supplied scaling factor or band-stats payload only appeared at
+ level 0 -- silently inconsistent across overview reads.
+ """
+ arr = np.full((64, 64), 100.0, dtype=np.float32)
+ gt = GeoTransform(origin_x=0.0, origin_y=0.0,
+ pixel_width=1.0, pixel_height=-1.0)
+ path = str(tmp_path / "overview_gdal_md_inherit_1739.tif")
+ write(arr, path, geo_transform=gt, crs_epsg=4326,
+ cog=True, tile_size=16, overview_levels=[2, 4],
+ gdal_metadata_xml=(
+ '\n'
+ ' - 1.5
\n'
+ '\n'),
+ compression='none')
+
+ for lvl in (0, 1, 2):
+ da = open_geotiff(path, overview_level=lvl, **backend_kwargs)
+ assert da.attrs.get('gdal_metadata') is not None, (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"gdal_metadata attr is missing")
+ assert da.attrs['gdal_metadata'].get('MY_SCALING') == '1.5', (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"expected gdal_metadata['MY_SCALING']='1.5', got "
+ f"{da.attrs['gdal_metadata']}"
+ )
+ assert da.attrs.get('gdal_metadata_xml') is not None, (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"gdal_metadata_xml attr is missing")
+
+
+@pytest.mark.parametrize("backend_kwargs", _BACKENDS)
+def test_overview_inherits_resolution_tags(tmp_path, backend_kwargs):
+ """XResolution / YResolution / ResolutionUnit propagate to overviews.
+
+ Same pattern as gdal_metadata: the writer puts these tags only on
+ the level-0 IFD, so an overview read used to come back with the
+ resolution tags missing. The inheritance fix surfaces them on every
+ level.
+ """
+ arr = np.full((64, 64), 100.0, dtype=np.float32)
+ gt = GeoTransform(origin_x=0.0, origin_y=0.0,
+ pixel_width=1.0, pixel_height=-1.0)
+ path = str(tmp_path / "overview_res_inherit_1739.tif")
+ write(arr, path, geo_transform=gt, crs_epsg=4326,
+ cog=True, tile_size=16, overview_levels=[2, 4],
+ x_resolution=300.0, y_resolution=300.0, resolution_unit=2,
+ compression='none')
+
+ for lvl in (0, 1, 2):
+ da = open_geotiff(path, overview_level=lvl, **backend_kwargs)
+ assert da.attrs.get('x_resolution') == 300.0, (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"x_resolution={da.attrs.get('x_resolution')!r}")
+ assert da.attrs.get('y_resolution') == 300.0, (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"y_resolution={da.attrs.get('y_resolution')!r}")
+ assert da.attrs.get('resolution_unit') == 'inch', (
+ f"backend={backend_kwargs}, overview_level={lvl}: "
+ f"resolution_unit={da.attrs.get('resolution_unit')!r}")
+
+
+def test_attrs_keysets_consistent_across_overview_levels(tmp_path):
+ """The set of attrs keys is identical at every overview level.
+
+ Strong contract that catches future regressions where one of the
+ inherited fields gets dropped without removing the entry from the
+ main code path (e.g. a refactor that splits the inheritance block
+ into helpers and forgets one field).
+ """
+ arr = np.full((64, 64), 100.0, dtype=np.float32)
+ arr[0:16, 0:16] = -9999.0
+ gt = GeoTransform(origin_x=0.0, origin_y=0.0,
+ pixel_width=1.0, pixel_height=-1.0)
+ path = str(tmp_path / "overview_attr_keysets_1739.tif")
+ write(arr, path, geo_transform=gt, crs_epsg=4326, nodata=-9999.0,
+ cog=True, tile_size=16, overview_levels=[2, 4],
+ x_resolution=300, y_resolution=300, resolution_unit=2,
+ gdal_metadata_xml=(
+ '\n - V
\n'
+ '\n'))
+
+ level_keysets = {
+ lvl: set(open_geotiff(path, overview_level=lvl).attrs.keys())
+ for lvl in (0, 1, 2)
+ }
+ base = level_keysets[0]
+ for lvl in (1, 2):
+ diff = base ^ level_keysets[lvl]
+ assert not diff, (
+ f"overview_level={lvl} attrs keyset differs from level 0:\n"
+ f" level 0: {sorted(base)}\n"
+ f" level {lvl}: {sorted(level_keysets[lvl])}\n"
+ f" symmetric diff: {sorted(diff)}"
+ )
+
+
+def test_overview_with_own_nodata_keeps_own_value(tmp_path):
+ """Overview IFDs that re-declare GDAL_NODATA keep their own value.
+
+ The inheritance is per-field and only fills in missing entries, so
+ an overview that does carry its own tag (rare but valid -- some
+ writers do this) is not stomped on by the parent's value.
+
+ This is exercised by reading the same COG with overview level 0
+ twice: once directly and once after manipulating the level-0
+ nodata. Since the writer always emits the same nodata on level 0,
+ hand-building a TIFF here would be overkill; the simpler contract
+ test in test_attrs_keysets_consistent_across_* already pins the
+ typical writer path. This test pins the "overview already has its
+ own value" branch by simulating it directly against
+ ``extract_geo_info_with_overview_inheritance``.
+ """
+ from xrspatial.geotiff._geotags import (
+ extract_geo_info_with_overview_inheritance,
+ GeoInfo, GeoTransform as _GT,
+ )
+ # Drive the helper with fake IFD objects + a fake base IFD to
+ # avoid having to hand-pack a TIFF with re-declared overview
+ # nodata. The helper only inspects ``ifd.subfile_type`` /
+ # ``.width`` / ``.height`` and forwards the rest to
+ # ``extract_geo_info``; we monkeypatch that to return controlled
+ # GeoInfo instances.
+ import xrspatial.geotiff._geotags as _gt_mod
+
+ class _StubIFD:
+ def __init__(self, subfile_type, width, height):
+ self.subfile_type = subfile_type
+ self.width = width
+ self.height = height
+
+ base_ifd = _StubIFD(0, 64, 64)
+ ov_ifd = _StubIFD(1, 32, 32) # overview (NewSubfileType bit 0)
+
+ # Overview already has its own nodata (-5555); base has -9999.
+ # Test: inheritance leaves the overview's -5555 untouched.
+ def fake_extract(ifd, data, byte_order):
+ if ifd is ov_ifd:
+ gi = GeoInfo()
+ gi.nodata = -5555.0
+ gi.has_georef = False
+ return gi
+ if ifd is base_ifd:
+ gi = GeoInfo()
+ gi.nodata = -9999.0
+ gi.transform = _GT(0.0, 0.0, 1.0, -1.0)
+ gi.has_georef = True
+ gi.crs_epsg = 4326
+ return gi
+ return GeoInfo()
+
+ orig = _gt_mod.extract_geo_info
+ _gt_mod.extract_geo_info = fake_extract
+ try:
+ out = extract_geo_info_with_overview_inheritance(
+ ov_ifd, [base_ifd, ov_ifd], b'', '<')
+ finally:
+ _gt_mod.extract_geo_info = orig
+
+ assert out.nodata == -5555.0, (
+ f"overview's own nodata=-5555 was overwritten by base's -9999; "
+ f"got {out.nodata}"
+ )