Skip to content

Commit a5c28f8

Browse files
authored
Fix windowed read coords of non-georef TIFFs (#1710) (#1722)
* Fix windowed reads of non-georef TIFFs producing half-pixel-shifted coords (#1710) A TIFF with no GeoTIFF tags used to get different `y`/`x` coords depending on whether `window=` was passed. Full reads took the `has_georef=False` shortcut in `_geo_to_coords` and returned integer pixel coords with `int64` dtype. Windowed reads skipped that shortcut and synthesised float64 coords like `[-0.5, -1.5, ...]` from the default unit `GeoTransform`, because the `PixelIsArea` half-pixel shift was applied to a transform that was never real. This patch threads `has_georef` through all three windowed-read paths (`open_geotiff` eager, `read_geotiff_dask`, `_gpu_apply_window_band`) and through `_populate_attrs_from_geo_info`. Non-georef files now get integer pixel coords on both full and windowed reads, and do not emit a fabricated `attrs['transform']` identity tuple. Georef files are unaffected: they still get float64 coords with the half-pixel shift and a real transform attr. Adds regression coverage in `xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py` covering numpy, dask+numpy, cupy, and dask+cupy backends. Closes #1710. * Update sweep-metadata state CSV for geotiff (#1710) * Tighten has_georef comment per Copilot feedback on #1722 Clarify that the gate is "no transform tags present" (ModelTransformation/ModelPixelScale/ModelTiepoint/GeoKeys), not "files lack any GeoTIFF tags".
1 parent 1ab1d0e commit a5c28f8

3 files changed

Lines changed: 241 additions & 26 deletions

File tree

.claude/sweep-metadata-state.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module,last_inspected,issue,severity_max,categories_found,notes
2-
geotiff,2026-05-12,1657,HIGH,1;5,NewSubfileType (254) and SubIFDs (330) leaked through attrs['extra_tags'] across all four backends. Round-trip read overview -> write produced primary IFD wrongly marked as overview (NewSubfileType=1) and SubIFDs with stale byte offsets that crashed readers. Fixed by adding both tags to _MANAGED_TAGS and to _DANGEROUS_EXTRA_TAG_IDS writer-side filter (#1657).
2+
geotiff,2026-05-12,1710,MEDIUM,2,"open_geotiff/read_geotiff_dask/read_geotiff_gpu windowed reads of non-georef TIFFs produced float64 half-pixel-shifted coords while full reads produced int64 [0,1,2,...] coords. Affected every backend the same way; not a backend parity bug, a windowed-vs-full inconsistency. _populate_attrs_from_geo_info also fabricated an identity transform attr on non-georef files. Fixed by threading has_georef through all windowed coord paths and through the transform attr emitter (#1710)."
33
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.

xrspatial/geotiff/__init__.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,16 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
436436
attrs['raster_type'] = 'point'
437437

438438
src_t = geo_info.transform
439-
if src_t is not None:
439+
# Skip the transform attr for files where no GeoTIFF transform tags
440+
# (ModelTransformation, ModelPixelScale, or ModelTiepoint) are
441+
# present, signalled by ``has_georef=False``. GeoKeys / CRS metadata
442+
# can still be present in that case. The default unit
443+
# ``GeoTransform`` is a struct placeholder, not real georef --
444+
# emitting it leaks an identity transform into attrs and confuses
445+
# downstream code that expects ``'transform' in attrs`` to mean
446+
# "this raster has a georef transform" (#1710).
447+
has_georef = getattr(geo_info, 'has_georef', True)
448+
if src_t is not None and has_georef:
440449
if window is not None:
441450
r0, c0, _r1, _c1 = window
442451
origin_x_w = float(src_t.origin_x) + c0 * float(src_t.pixel_width)
@@ -695,15 +704,22 @@ def open_geotiff(source, *, dtype=None,
695704
if window is not None:
696705
# Adjust coordinates for windowed read. ``read_to_array`` rejected
697706
# out-of-bounds windows above, so ``r0/c0/r1/c1`` are guaranteed
698-
# in-range here (no clamp needed).
707+
# in-range here (no clamp needed). For files with no GeoTIFF
708+
# tags (``has_georef=False``), the default unit transform is
709+
# not real, so fall back to integer pixel coords matching the
710+
# ``_geo_to_coords`` shortcut taken on full reads. See #1710.
699711
r0, c0, r1, c1 = window
700-
t = geo_info.transform
701-
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
702-
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x
703-
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y
712+
if not getattr(geo_info, 'has_georef', True):
713+
full_x = np.arange(c0, c1, dtype=np.int64)
714+
full_y = np.arange(r0, r1, dtype=np.int64)
704715
else:
705-
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
706-
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
716+
t = geo_info.transform
717+
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
718+
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x
719+
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y
720+
else:
721+
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
722+
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
707723
coords = {'y': full_y, 'x': full_x}
708724

709725
if name is None:
@@ -1844,20 +1860,26 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
18441860
raise ValueError(
18451861
f"window={window} is outside the source extent "
18461862
f"({full_h}x{full_w}) or has non-positive size.")
1847-
# Mirror the eager-path windowed coord computation in open_geotiff.
1848-
t = geo_info.transform
1849-
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
1850-
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
1851-
* t.pixel_width + t.origin_x)
1852-
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
1853-
* t.pixel_height + t.origin_y)
1863+
# Mirror the eager-path windowed coord computation in open_geotiff,
1864+
# including the ``has_georef=False`` shortcut to integer pixel
1865+
# coords for non-georef files (#1710).
1866+
if not getattr(geo_info, 'has_georef', True):
1867+
win_x = np.arange(win_c0, win_c1, dtype=np.int64)
1868+
win_y = np.arange(win_r0, win_r1, dtype=np.int64)
18541869
else:
1855-
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
1856-
* t.pixel_width + t.origin_x
1857-
+ t.pixel_width * 0.5)
1858-
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
1859-
* t.pixel_height + t.origin_y
1860-
+ t.pixel_height * 0.5)
1870+
t = geo_info.transform
1871+
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
1872+
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
1873+
* t.pixel_width + t.origin_x)
1874+
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
1875+
* t.pixel_height + t.origin_y)
1876+
else:
1877+
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
1878+
* t.pixel_width + t.origin_x
1879+
+ t.pixel_width * 0.5)
1880+
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
1881+
* t.pixel_height + t.origin_y
1882+
+ t.pixel_height * 0.5)
18611883
coords = {'y': win_y, 'x': win_x}
18621884
full_h = win_r1 - win_r0
18631885
full_w = win_c1 - win_c0
@@ -2275,12 +2297,14 @@ def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band):
22752297
out_w = c1 - c0
22762298
# Mirror the eager-numpy windowed coord computation in
22772299
# open_geotiff so the GPU-windowed coords carry the same
2278-
# absolute pixel-center values as the CPU path.
2300+
# absolute pixel-center values as the CPU path. For files
2301+
# with no GeoTIFF tags (``has_georef=False``), fall back to
2302+
# integer pixel coords matching ``_geo_to_coords`` (#1710).
22792303
t = geo_info.transform
2280-
if t is None:
2304+
if t is None or not getattr(geo_info, 'has_georef', True):
22812305
coords = {
2282-
'y': np.arange(out_h, dtype=np.int64),
2283-
'x': np.arange(out_w, dtype=np.int64),
2306+
'y': np.arange(r0, r1, dtype=np.int64),
2307+
'x': np.arange(c0, c1, dtype=np.int64),
22842308
}
22852309
else:
22862310
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
"""Regression tests for issue #1710.
2+
3+
A TIFF with no GeoTIFF tags (no ModelPixelScale / ModelTiepoint /
4+
GeoKeyDirectory) used to produce different `y`/`x` coordinates between
5+
full reads and windowed reads. Full reads emitted integer pixel coords
6+
``[0, 1, 2, ...]`` via the ``has_georef=False`` shortcut in
7+
``_geo_to_coords``; windowed reads bypassed that shortcut and synthesised
8+
float64 coords like ``[-0.5, -1.5, ...]`` from the default unit
9+
``GeoTransform``.
10+
11+
The fix routes every windowed-read path through the same shortcut, and
12+
suppresses the fabricated ``attrs['transform']`` for non-georef files.
13+
"""
14+
from __future__ import annotations
15+
16+
import importlib.util
17+
18+
import numpy as np
19+
import pytest
20+
21+
from xrspatial.geotiff import open_geotiff
22+
from xrspatial.geotiff._writer import write
23+
24+
25+
def _gpu_available() -> bool:
26+
if importlib.util.find_spec("cupy") is None:
27+
return False
28+
try:
29+
import cupy
30+
return bool(cupy.cuda.is_available())
31+
except Exception:
32+
return False
33+
34+
35+
_HAS_GPU = _gpu_available()
36+
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
37+
38+
39+
@pytest.fixture
40+
def no_georef_path_1710(tmp_path):
41+
"""8x8 float32 TIFF with NO GeoTIFF tags (no ModelPixelScale etc.)."""
42+
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
43+
path = str(tmp_path / "no_georef_1710.tif")
44+
write(arr, path, compression='none', tiled=False)
45+
return path
46+
47+
48+
class TestEagerWindowedCoords:
49+
50+
def test_full_read_integer_coords(self, no_georef_path_1710):
51+
da = open_geotiff(no_georef_path_1710)
52+
assert da.y.dtype == np.int64
53+
assert da.x.dtype == np.int64
54+
np.testing.assert_array_equal(da.y.values, np.arange(8))
55+
np.testing.assert_array_equal(da.x.values, np.arange(8))
56+
57+
def test_windowed_read_integer_coords(self, no_georef_path_1710):
58+
da = open_geotiff(no_georef_path_1710, window=(0, 0, 4, 4))
59+
assert da.y.dtype == np.int64
60+
assert da.x.dtype == np.int64
61+
np.testing.assert_array_equal(da.y.values, np.arange(4))
62+
np.testing.assert_array_equal(da.x.values, np.arange(4))
63+
64+
def test_offset_window_integer_coords(self, no_georef_path_1710):
65+
"""Windowed read at (2, 3) origin should give coords [2,3,4,5] / [3,4,5,6]."""
66+
da = open_geotiff(no_georef_path_1710, window=(2, 3, 6, 7))
67+
assert da.y.dtype == np.int64
68+
assert da.x.dtype == np.int64
69+
np.testing.assert_array_equal(da.y.values, np.arange(2, 6))
70+
np.testing.assert_array_equal(da.x.values, np.arange(3, 7))
71+
72+
def test_no_transform_attr_emitted(self, no_georef_path_1710):
73+
"""Non-georef files should not advertise a fabricated transform."""
74+
da = open_geotiff(no_georef_path_1710)
75+
assert 'transform' not in da.attrs, (
76+
f"non-georef file should not carry a fabricated transform; "
77+
f"got attrs={dict(da.attrs)}"
78+
)
79+
# Same for windowed read
80+
da_w = open_geotiff(no_georef_path_1710, window=(0, 0, 4, 4))
81+
assert 'transform' not in da_w.attrs
82+
83+
84+
class TestDaskWindowedCoords:
85+
86+
def test_full_read_integer_coords(self, no_georef_path_1710):
87+
da = open_geotiff(no_georef_path_1710, chunks=4)
88+
assert da.y.dtype == np.int64
89+
np.testing.assert_array_equal(da.y.values, np.arange(8))
90+
91+
def test_windowed_read_integer_coords(self, no_georef_path_1710):
92+
da = open_geotiff(no_georef_path_1710, chunks=4, window=(0, 0, 4, 4))
93+
assert da.y.dtype == np.int64
94+
np.testing.assert_array_equal(da.y.values, np.arange(4))
95+
96+
def test_offset_window_integer_coords(self, no_georef_path_1710):
97+
da = open_geotiff(no_georef_path_1710, chunks=4, window=(2, 3, 6, 7))
98+
assert da.y.dtype == np.int64
99+
np.testing.assert_array_equal(da.y.values, np.arange(2, 6))
100+
np.testing.assert_array_equal(da.x.values, np.arange(3, 7))
101+
102+
103+
@_gpu_only
104+
class TestGpuWindowedCoords:
105+
106+
def test_full_read_integer_coords(self, no_georef_path_1710):
107+
da = open_geotiff(no_georef_path_1710, gpu=True)
108+
assert da.y.dtype == np.int64
109+
np.testing.assert_array_equal(da.y.values, np.arange(8))
110+
111+
def test_windowed_read_integer_coords(self, no_georef_path_1710):
112+
da = open_geotiff(no_georef_path_1710, gpu=True, window=(0, 0, 4, 4))
113+
assert da.y.dtype == np.int64
114+
np.testing.assert_array_equal(da.y.values, np.arange(4))
115+
116+
def test_dask_cupy_windowed_integer_coords(self, no_georef_path_1710):
117+
da = open_geotiff(no_georef_path_1710, gpu=True, chunks=4,
118+
window=(0, 0, 4, 4))
119+
assert da.y.dtype == np.int64
120+
np.testing.assert_array_equal(da.y.values, np.arange(4))
121+
122+
123+
class TestBackendParity:
124+
"""Full read and windowed read must agree on coord dtype and values
125+
across every backend pair."""
126+
127+
def test_dtype_parity_full(self, no_georef_path_1710):
128+
np_da = open_geotiff(no_georef_path_1710)
129+
dk_da = open_geotiff(no_georef_path_1710, chunks=4)
130+
assert np_da.y.dtype == dk_da.y.dtype == np.int64
131+
if _HAS_GPU:
132+
gpu_da = open_geotiff(no_georef_path_1710, gpu=True)
133+
assert gpu_da.y.dtype == np.int64
134+
135+
def test_dtype_parity_windowed(self, no_georef_path_1710):
136+
win = (0, 0, 4, 4)
137+
np_da = open_geotiff(no_georef_path_1710, window=win)
138+
dk_da = open_geotiff(no_georef_path_1710, chunks=4, window=win)
139+
assert np_da.y.dtype == dk_da.y.dtype == np.int64
140+
if _HAS_GPU:
141+
gpu_da = open_geotiff(no_georef_path_1710, gpu=True, window=win)
142+
assert gpu_da.y.dtype == np.int64
143+
144+
def test_full_and_windowed_first_n_match(self, no_georef_path_1710):
145+
"""Coords for a window starting at (0, 0) should equal the first N
146+
coords of the full read."""
147+
win = (0, 0, 4, 4)
148+
full = open_geotiff(no_georef_path_1710)
149+
win_da = open_geotiff(no_georef_path_1710, window=win)
150+
np.testing.assert_array_equal(full.y.values[:4], win_da.y.values)
151+
np.testing.assert_array_equal(full.x.values[:4], win_da.x.values)
152+
153+
154+
class TestGeorefStillWorks:
155+
"""The fix must not regress georeferenced reads.
156+
157+
Files with real GeoTIFF tags still get float64 coords with the
158+
half-pixel shift for PixelIsArea.
159+
"""
160+
161+
def test_georef_full_read_keeps_float64(self, tmp_path):
162+
from xrspatial.geotiff._geotags import GeoTransform
163+
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
164+
gt = GeoTransform(
165+
origin_x=500000.0, origin_y=4000000.0,
166+
pixel_width=30.0, pixel_height=-30.0,
167+
)
168+
path = str(tmp_path / "georef_1710.tif")
169+
write(arr, path, geo_transform=gt, crs_epsg=32610,
170+
compression='none', tiled=False)
171+
da = open_geotiff(path)
172+
assert da.y.dtype == np.float64
173+
assert da.x.dtype == np.float64
174+
assert da.x.values[0] == pytest.approx(500015.0)
175+
# transform attr SHOULD be present for georef files
176+
assert 'transform' in da.attrs
177+
178+
def test_georef_windowed_read_keeps_float64(self, tmp_path):
179+
from xrspatial.geotiff._geotags import GeoTransform
180+
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
181+
gt = GeoTransform(
182+
origin_x=500000.0, origin_y=4000000.0,
183+
pixel_width=30.0, pixel_height=-30.0,
184+
)
185+
path = str(tmp_path / "georef_win_1710.tif")
186+
write(arr, path, geo_transform=gt, crs_epsg=32610,
187+
compression='none', tiled=False)
188+
da = open_geotiff(path, window=(0, 0, 4, 4))
189+
assert da.y.dtype == np.float64
190+
assert da.x.dtype == np.float64
191+
assert 'transform' in da.attrs

0 commit comments

Comments
 (0)