Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-11,1632,HIGH,1;5,extract_geo_info dropped user-defined CRS WKT: GeoKey *CSTypeGeoKey=32767 files emitted attrs[crs_name] only (no crs/crs_wkt) so to_geotiff silently lost the projection on read->write. Fixed by promoting WKT-looking citation to crs_wkt across all four read backends (#1632).
geotiff,2026-05-11,1640,HIGH,1;2;4,"open_geotiff(overview_level>=1) silently dropped CRS, transform attr, and georeferenced y/x coords because GDAL-style COGs (including to_geotiff output) write GeoKeys only on the level-0 IFD. Fixed by inheriting and rescaling georef from the level-0 IFD across all four read backends (#1640)."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
14 changes: 10 additions & 4 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _read_geo_info(source, *, overview_level: int | None = None):
Overview IFD index (0 = full resolution).
"""
from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._geotags import extract_geo_info_with_overview_inheritance
from ._header import parse_all_ifds, parse_header, select_overview_ifd
from ._reader import _coerce_path, _is_file_like

Expand Down Expand Up @@ -275,7 +275,10 @@ def _read_geo_info(source, *, overview_level: int | None = None):
if not ifds:
raise ValueError("No IFDs found in TIFF file")
ifd = select_overview_ifd(ifds, overview_level)
geo_info = extract_geo_info(ifd, data, header.byte_order)
# Inherit georef from the level-0 IFD when the overview itself
# has no geokeys (issue #1640). Pass-through for level 0.
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order)
bps = resolve_bits_per_sample(ifd.bits_per_sample)
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
n_bands = ifd.samples_per_pixel if ifd.samples_per_pixel > 1 else 0
Expand Down Expand Up @@ -2260,7 +2263,7 @@ def read_geotiff_gpu(source: str, *,
parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
)
from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._geotags import extract_geo_info_with_overview_inheritance
from ._gpu_decode import gpu_decode_tiles

source = _coerce_path(source)
Expand Down Expand Up @@ -2297,7 +2300,10 @@ def read_geotiff_gpu(source: str, *,

bps = resolve_bits_per_sample(ifd.bits_per_sample)
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
geo_info = extract_geo_info(ifd, data, header.byte_order)
# Inherit georef from the level-0 IFD when the overview itself
# has no geokeys (issue #1640); pass-through for level 0.
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order)
# Capture the Orientation tag (274) once so the post-decode flip
# below picks it up for both the stripped fallback and the tiled
# GPU pipelines. CPU read_to_array applies the array remap +
Expand Down
124 changes: 124 additions & 0 deletions xrspatial/geotiff/_geotags.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,130 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview,
)


def extract_geo_info_with_overview_inheritance(
ifd: IFD,
ifds: list,
data: bytes | memoryview,
byte_order: str,
) -> GeoInfo:
"""Extract geo metadata, inheriting from level 0 when the IFD lacks it.

Wraps :func:`extract_geo_info` for overview reads. GDAL-style COG
writers (including this package's :func:`to_geotiff`) put the
GeoKeyDirectory, ModelPixelScale and ModelTiepoint only on the
level-0 IFD. Calling ``extract_geo_info`` directly on an overview
IFD therefore returns a default :class:`GeoTransform` with
``has_georef=False`` and no CRS, so overview reads silently lose
their georeferencing.

When ``ifd`` is a reduced-resolution overview (NewSubfileType bit 0
set) that lacks its own georef, we re-run ``extract_geo_info`` on
the first full-resolution IFD (NewSubfileType bit 0 clear, bit 2
clear) and rescale the pixel size by ``width_full / width_overview``
so coords cover the same extent as level 0.

If the overview IFD already carries its own geokeys (some writers do
replicate them), this returns its own ``extract_geo_info`` output
unchanged. If no full-resolution sibling exists or the parent's geo
info is also missing, the overview's own (possibly empty) info is
returned -- callers get the same fallback behaviour they used to.

Parameters
----------
ifd : IFD
The IFD selected by ``select_overview_ifd``.
ifds : list of IFD
All IFDs parsed from the file. Used to locate the level-0
sibling for georef inheritance.
data : bytes or memoryview
Full file bytes (forwarded to ``extract_geo_info``).
byte_order : str
``'<'`` or ``'>'`` (forwarded to ``extract_geo_info``).

Returns
-------
GeoInfo
"""
info = extract_geo_info(ifd, data, byte_order)

# Overview IFDs have NewSubfileType bit 0 set; mask IFDs (bit 2) and
# page IFDs (bit 1) are filtered out by ``select_overview_ifd``
# before reaching here, so we never inherit a mask's geo info.
is_overview = bool(ifd.subfile_type & 1)
if not is_overview or info.has_georef:
return info

# Find the level-0 IFD: NewSubfileType has bit 0 clear (not an
# overview) and bit 2 clear (not a transparency mask). This is the
# same criterion ``select_overview_ifd``'s filter uses to identify
# the full-resolution pyramid root.
base_ifd = None
for cand in ifds:
if cand is ifd:
continue
st = cand.subfile_type
if (st & 1) == 0 and (st & 4) == 0:
base_ifd = cand
break
if base_ifd is None:
return info

base_info = extract_geo_info(base_ifd, data, byte_order)
if not base_info.has_georef:
return info

# Rescale the pixel size by the integer reduction factor. Width and
# height ratios should match for power-of-two overview pyramids;
# average them so a slightly off-by-one edge size still produces a
# sensible transform.
base_w = base_ifd.width
base_h = base_ifd.height
ov_w = ifd.width
ov_h = ifd.height
if base_w <= 0 or base_h <= 0 or ov_w <= 0 or ov_h <= 0:
return info
scale_x = base_w / ov_w
scale_y = base_h / ov_h

base_t = base_info.transform
info.transform = GeoTransform(
origin_x=base_t.origin_x,
origin_y=base_t.origin_y,
pixel_width=base_t.pixel_width * scale_x,
pixel_height=base_t.pixel_height * scale_y,
)
info.has_georef = True

# Inherit CRS-side metadata from the parent. The overview IFD may
# carry its own raster_type (rare) -- prefer the overview's value
# when it differs from the default, otherwise inherit.
info.crs_epsg = base_info.crs_epsg
info.crs_wkt = base_info.crs_wkt
info.crs_name = base_info.crs_name
info.geog_citation = base_info.geog_citation
info.datum_code = base_info.datum_code
info.angular_units = base_info.angular_units
info.angular_units_code = base_info.angular_units_code
info.linear_units = base_info.linear_units
info.linear_units_code = base_info.linear_units_code
info.semi_major_axis = base_info.semi_major_axis
info.inv_flattening = base_info.inv_flattening
info.projection_code = base_info.projection_code
info.vertical_epsg = base_info.vertical_epsg
info.vertical_citation = base_info.vertical_citation
info.vertical_datum = base_info.vertical_datum
info.vertical_units = base_info.vertical_units
info.vertical_units_code = base_info.vertical_units_code
info.model_type = base_info.model_type
# Keep ``raster_type`` from the overview unless it was the default
# AREA, in which case prefer the parent's setting (PixelIsPoint is
# almost never re-declared on overview IFDs).
if info.raster_type == RASTER_PIXEL_IS_AREA:
info.raster_type = base_info.raster_type

return info


def _model_type_from_wkt(wkt: str) -> int:
"""Guess ModelType from a WKT string prefix."""
upper = wkt.strip().upper()
Expand Down
16 changes: 14 additions & 2 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
GeoTransform,
RASTER_PIXEL_IS_POINT,
extract_geo_info,
extract_geo_info_with_overview_inheritance,
)
from ._header import (
IFD,
Expand Down Expand Up @@ -1236,7 +1237,13 @@ def _parse_cog_http_meta(
raise ValueError("No IFDs found in COG")

ifd = select_overview_ifd(ifds, overview_level)
geo_info = extract_geo_info(ifd, header_bytes, header.byte_order)
# When the requested IFD is an overview that lacks its own geokeys
# (the common case for COG writers, including this package's
# ``to_geotiff``), inherit and rescale the georef from the level-0
# IFD so overview reads do not silently lose CRS / transform.
# See issue #1640.
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, header_bytes, header.byte_order)
return header, ifd, geo_info, header_bytes


Expand Down Expand Up @@ -1586,7 +1593,12 @@ def read_to_array(source, *, window=None, overview_level: int | None = None,

bps = resolve_bits_per_sample(ifd.bits_per_sample)
dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
geo_info = extract_geo_info(ifd, data, header.byte_order)
# Inherit georef from level 0 when an overview IFD lacks its own
# geokeys (issue #1640). For overview_level=0 (or None) this is a
# no-op: the helper short-circuits when the IFD is not a
# NewSubfileType=overview entry.
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order)

# Orientation tag (274): values 2-8 mean the stored pixel order
# differs from display order. We need to remap the array post
Expand Down
Loading
Loading