Skip to content

Commit 03d7380

Browse files
authored
Inherit georef from level-0 IFD when reading overview IFDs (#1640) (#1641)
* Inherit georef from level-0 IFD when reading overview IFDs (#1640) GDAL-style COG writers, including this package's own to_geotiff, put the GeoKeyDirectory, ModelPixelScale, and ModelTiepoint only on the level-0 IFD. Before this fix, open_geotiff(path, overview_level=N) with N >= 1 returned a DataArray whose CRS, transform attr, and y/x coords were silently replaced with defaults (unit transform, integer pixel indices, no crs). Downstream spatial ops then computed pixel sizes from the wrong coords and produced silently-wrong answers. Add extract_geo_info_with_overview_inheritance in _geotags.py. When the selected IFD is a reduced-resolution overview (NewSubfileType bit 0) and its own extract_geo_info reports has_georef=False, the helper re-runs extract_geo_info on the first full-resolution IFD and rescales the pixel size by width_full / width_overview (and the same for height) before copying CRS-side metadata across. Overviews that already carry their own valid georef (some writers replicate it) are left untouched. Wire the helper into every read entry point: - read_to_array (eager numpy and dask chunk workers) - _read_cog_http (HTTP COGs) - _read_geo_info (dask metadata probe) - read_geotiff_gpu (cupy and dask+cupy paths) Add xrspatial/geotiff/tests/test_overview_geo_inheritance_1640.py covering all four backends (numpy, dask+numpy, cupy, dask+cupy) plus two edge cases: an overview that carries its own geokeys keeps them, and a file with no full-res sibling falls back without raising. * Update sweep-metadata state for geotiff (#1640)
1 parent 977968f commit 03d7380

5 files changed

Lines changed: 449 additions & 7 deletions

File tree

.claude/sweep-metadata-state.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module,last_inspected,issue,severity_max,categories_found,notes
2-
geotiff,2026-05-11,1632,HIGH,1;5,extract_geo_info dropped user-defined CRS WKT: GeoKey *CSTypeGeoKey=32767 files emitted attrs[crs_name] only (no crs/crs_wkt) so to_geotiff silently lost the projection on read->write. Fixed by promoting WKT-looking citation to crs_wkt across all four read backends (#1632).
2+
geotiff,2026-05-11,1640,HIGH,1;2;4,"open_geotiff(overview_level>=1) silently dropped CRS, transform attr, and georeferenced y/x coords because GDAL-style COGs (including to_geotiff output) write GeoKeys only on the level-0 IFD. Fixed by inheriting and rescaling georef from the level-0 IFD across all four read backends (#1640)."
33
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.

xrspatial/geotiff/__init__.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def _read_geo_info(source, *, overview_level: int | None = None):
241241
Overview IFD index (0 = full resolution).
242242
"""
243243
from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy
244-
from ._geotags import extract_geo_info
244+
from ._geotags import extract_geo_info_with_overview_inheritance
245245
from ._header import parse_all_ifds, parse_header, select_overview_ifd
246246
from ._reader import _coerce_path, _is_file_like
247247

@@ -275,7 +275,10 @@ def _read_geo_info(source, *, overview_level: int | None = None):
275275
if not ifds:
276276
raise ValueError("No IFDs found in TIFF file")
277277
ifd = select_overview_ifd(ifds, overview_level)
278-
geo_info = extract_geo_info(ifd, data, header.byte_order)
278+
# Inherit georef from the level-0 IFD when the overview itself
279+
# has no geokeys (issue #1640). Pass-through for level 0.
280+
geo_info = extract_geo_info_with_overview_inheritance(
281+
ifd, ifds, data, header.byte_order)
279282
bps = resolve_bits_per_sample(ifd.bits_per_sample)
280283
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
281284
n_bands = ifd.samples_per_pixel if ifd.samples_per_pixel > 1 else 0
@@ -2264,7 +2267,7 @@ def read_geotiff_gpu(source: str, *,
22642267
parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
22652268
)
22662269
from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy
2267-
from ._geotags import extract_geo_info
2270+
from ._geotags import extract_geo_info_with_overview_inheritance
22682271
from ._gpu_decode import gpu_decode_tiles
22692272

22702273
source = _coerce_path(source)
@@ -2301,7 +2304,10 @@ def read_geotiff_gpu(source: str, *,
23012304

23022305
bps = resolve_bits_per_sample(ifd.bits_per_sample)
23032306
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
2304-
geo_info = extract_geo_info(ifd, data, header.byte_order)
2307+
# Inherit georef from the level-0 IFD when the overview itself
2308+
# has no geokeys (issue #1640); pass-through for level 0.
2309+
geo_info = extract_geo_info_with_overview_inheritance(
2310+
ifd, ifds, data, header.byte_order)
23052311
# Capture the Orientation tag (274) once so the post-decode flip
23062312
# below picks it up for both the stripped fallback and the tiled
23072313
# GPU pipelines. CPU read_to_array applies the array remap +

xrspatial/geotiff/_geotags.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,130 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview,
672672
)
673673

674674

675+
def extract_geo_info_with_overview_inheritance(
676+
ifd: IFD,
677+
ifds: list,
678+
data: bytes | memoryview,
679+
byte_order: str,
680+
) -> GeoInfo:
681+
"""Extract geo metadata, inheriting from level 0 when the IFD lacks it.
682+
683+
Wraps :func:`extract_geo_info` for overview reads. GDAL-style COG
684+
writers (including this package's :func:`to_geotiff`) put the
685+
GeoKeyDirectory, ModelPixelScale and ModelTiepoint only on the
686+
level-0 IFD. Calling ``extract_geo_info`` directly on an overview
687+
IFD therefore returns a default :class:`GeoTransform` with
688+
``has_georef=False`` and no CRS, so overview reads silently lose
689+
their georeferencing.
690+
691+
When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0
692+
set) that lacks its own georef, we re-run ``extract_geo_info`` on
693+
the first full-resolution IFD (NewSubfileType bit 0 clear, bit 2
694+
clear) and rescale the pixel size by ``width_full / width_overview``
695+
so coords cover the same extent as level 0.
696+
697+
If the overview IFD already carries its own geokeys (some writers do
698+
replicate them), this returns its own ``extract_geo_info`` output
699+
unchanged. If no full-resolution sibling exists or the parent's geo
700+
info is also missing, the overview's own (possibly empty) info is
701+
returned -- callers get the same fallback behaviour they used to.
702+
703+
Parameters
704+
----------
705+
ifd : IFD
706+
The IFD selected by ``select_overview_ifd``.
707+
ifds : list of IFD
708+
All IFDs parsed from the file. Used to locate the level-0
709+
sibling for georef inheritance.
710+
data : bytes or memoryview
711+
Full file bytes (forwarded to ``extract_geo_info``).
712+
byte_order : str
713+
``'<'`` or ``'>'`` (forwarded to ``extract_geo_info``).
714+
715+
Returns
716+
-------
717+
GeoInfo
718+
"""
719+
info = extract_geo_info(ifd, data, byte_order)
720+
721+
# Overview IFDs have NewSubfileType bit 0 set; mask IFDs (bit 2) and
722+
# page IFDs (bit 1) are filtered out by ``select_overview_ifd``
723+
# before reaching here, so we never inherit a mask's geo info.
724+
is_overview = bool(ifd.subfile_type & 1)
725+
if not is_overview or info.has_georef:
726+
return info
727+
728+
# Find the level-0 IFD: NewSubfileType has bit 0 clear (not an
729+
# overview) and bit 2 clear (not a transparency mask). This is the
730+
# same criterion ``select_overview_ifd``'s filter uses to identify
731+
# the full-resolution pyramid root.
732+
base_ifd = None
733+
for cand in ifds:
734+
if cand is ifd:
735+
continue
736+
st = cand.subfile_type
737+
if (st & 1) == 0 and (st & 4) == 0:
738+
base_ifd = cand
739+
break
740+
if base_ifd is None:
741+
return info
742+
743+
base_info = extract_geo_info(base_ifd, data, byte_order)
744+
if not base_info.has_georef:
745+
return info
746+
747+
# Rescale the pixel size by the integer reduction factor. Width and
748+
# height ratios should match for power-of-two overview pyramids;
749+
# average them so a slightly off-by-one edge size still produces a
750+
# sensible transform.
751+
base_w = base_ifd.width
752+
base_h = base_ifd.height
753+
ov_w = ifd.width
754+
ov_h = ifd.height
755+
if base_w <= 0 or base_h <= 0 or ov_w <= 0 or ov_h <= 0:
756+
return info
757+
scale_x = base_w / ov_w
758+
scale_y = base_h / ov_h
759+
760+
base_t = base_info.transform
761+
info.transform = GeoTransform(
762+
origin_x=base_t.origin_x,
763+
origin_y=base_t.origin_y,
764+
pixel_width=base_t.pixel_width * scale_x,
765+
pixel_height=base_t.pixel_height * scale_y,
766+
)
767+
info.has_georef = True
768+
769+
# Inherit CRS-side metadata from the parent. The overview IFD may
770+
# carry its own raster_type (rare) -- prefer the overview's value
771+
# when it differs from the default, otherwise inherit.
772+
info.crs_epsg = base_info.crs_epsg
773+
info.crs_wkt = base_info.crs_wkt
774+
info.crs_name = base_info.crs_name
775+
info.geog_citation = base_info.geog_citation
776+
info.datum_code = base_info.datum_code
777+
info.angular_units = base_info.angular_units
778+
info.angular_units_code = base_info.angular_units_code
779+
info.linear_units = base_info.linear_units
780+
info.linear_units_code = base_info.linear_units_code
781+
info.semi_major_axis = base_info.semi_major_axis
782+
info.inv_flattening = base_info.inv_flattening
783+
info.projection_code = base_info.projection_code
784+
info.vertical_epsg = base_info.vertical_epsg
785+
info.vertical_citation = base_info.vertical_citation
786+
info.vertical_datum = base_info.vertical_datum
787+
info.vertical_units = base_info.vertical_units
788+
info.vertical_units_code = base_info.vertical_units_code
789+
info.model_type = base_info.model_type
790+
# Keep ``raster_type`` from the overview unless it was the default
791+
# AREA, in which case prefer the parent's setting (PixelIsPoint is
792+
# almost never re-declared on overview IFDs).
793+
if info.raster_type == RASTER_PIXEL_IS_AREA:
794+
info.raster_type = base_info.raster_type
795+
796+
return info
797+
798+
675799
def _model_type_from_wkt(wkt: str) -> int:
676800
"""Guess ModelType from a WKT string prefix."""
677801
upper = wkt.strip().upper()

xrspatial/geotiff/_reader.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
GeoTransform,
2727
RASTER_PIXEL_IS_POINT,
2828
extract_geo_info,
29+
extract_geo_info_with_overview_inheritance,
2930
)
3031
from ._header import (
3132
IFD,
@@ -1236,7 +1237,13 @@ def _parse_cog_http_meta(
12361237
raise ValueError("No IFDs found in COG")
12371238

12381239
ifd = select_overview_ifd(ifds, overview_level)
1239-
geo_info = extract_geo_info(ifd, header_bytes, header.byte_order)
1240+
# When the requested IFD is an overview that lacks its own geokeys
1241+
# (the common case for COG writers, including this package's
1242+
# ``to_geotiff``), inherit and rescale the georef from the level-0
1243+
# IFD so overview reads do not silently lose CRS / transform.
1244+
# See issue #1640.
1245+
geo_info = extract_geo_info_with_overview_inheritance(
1246+
ifd, ifds, header_bytes, header.byte_order)
12401247
return header, ifd, geo_info, header_bytes
12411248

12421249

@@ -1586,7 +1593,12 @@ def read_to_array(source, *, window=None, overview_level: int | None = None,
15861593

15871594
bps = resolve_bits_per_sample(ifd.bits_per_sample)
15881595
dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
1589-
geo_info = extract_geo_info(ifd, data, header.byte_order)
1596+
# Inherit georef from level 0 when an overview IFD lacks its own
1597+
# geokeys (issue #1640). For overview_level=0 (or None) this is a
1598+
# no-op: the helper short-circuits when the IFD is not a
1599+
# NewSubfileType=overview entry.
1600+
geo_info = extract_geo_info_with_overview_inheritance(
1601+
ifd, ifds, data, header.byte_order)
15901602

15911603
# Orientation tag (274): values 2-8 mean the stored pixel order
15921604
# differs from display order. We need to remap the array post

0 commit comments

Comments
 (0)