Skip to content

Commit d670d93

Browse files
committed
Apply TIFF Orientation tag on HTTP COG full reads (#1717)
The HTTP path in `_read_cog_http` returned the raw decoded buffer for full reads, skipping the `_apply_orientation` remap that the local path runs in `read_to_array`. Opening the same file locally vs over HTTP produced different pixel orders and transforms for any Orientation tag value other than 1. Extract the orientation + geo_info update into `_apply_orientation_with_geo` and call it from both paths. The existing rejection of windowed reads against non-default orientation is kept unchanged on both paths. Tests cover orientations 1-8 via local + HTTP round-trip against the same in-memory loopback server used by `test_cog_http_concurrent.py`, plus a regression check that windowed HTTP reads still raise on non-default orientation.
1 parent 1624d13 commit d670d93

2 files changed

Lines changed: 242 additions & 80 deletions

File tree

xrspatial/geotiff/_reader.py

Lines changed: 73 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,14 @@ def _read_cog_http(url: str, overview_level: int | None = None,
16621662
if arr.ndim == 3 and ifd.samples_per_pixel > 1 and band is not None:
16631663
arr = arr[:, :, band]
16641664

1665+
# Apply Orientation tag (274) so HTTP reads return the same pixel
1666+
# order and transform as the local-file path. Only the full-read
1667+
# branch reaches here; the windowed-read branch is rejected above
1668+
# for non-default orientation. See issue #1717.
1669+
if ifd.orientation != 1:
1670+
arr, geo_info = _apply_orientation_with_geo(
1671+
arr, geo_info, ifd.orientation)
1672+
16651673
return arr, geo_info
16661674

16671675

@@ -1948,6 +1956,69 @@ def _apply_orientation(arr: np.ndarray, orientation: int) -> np.ndarray:
19481956
)
19491957

19501958

1959+
def _apply_orientation_with_geo(
1960+
arr: np.ndarray, geo_info: GeoInfo, orientation: int,
1961+
) -> tuple[np.ndarray, GeoInfo]:
1962+
"""Apply Orientation tag to ``arr`` and update ``geo_info`` to match.
1963+
1964+
Shared helper used by the local-file and HTTP COG paths so both
1965+
return the same pixel order and transform for a given file. See
1966+
issue #1717 for the HTTP-path parity break this consolidates.
1967+
"""
1968+
if orientation == 1:
1969+
return arr, geo_info
1970+
# Use the *file* dimensions (before orientation) for the transform
1971+
# math below. After ``_apply_orientation`` the array shape may swap
1972+
# (orientations 5-8), so capture them now.
1973+
file_h = arr.shape[0]
1974+
file_w = arr.shape[1]
1975+
arr = _apply_orientation(arr, orientation)
1976+
t = geo_info.transform
1977+
if not geo_info.has_georef:
1978+
pass
1979+
elif orientation in (2, 3, 4):
1980+
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
1981+
x_shift = file_w - 1
1982+
y_shift = file_h - 1
1983+
else:
1984+
x_shift = file_w
1985+
y_shift = file_h
1986+
new_origin_x = t.origin_x
1987+
new_origin_y = t.origin_y
1988+
new_px_w = t.pixel_width
1989+
new_px_h = t.pixel_height
1990+
if orientation in (2, 3): # x flipped
1991+
new_origin_x = t.origin_x + x_shift * t.pixel_width
1992+
new_px_w = -t.pixel_width
1993+
if orientation in (3, 4): # y flipped
1994+
new_origin_y = t.origin_y + y_shift * t.pixel_height
1995+
new_px_h = -t.pixel_height
1996+
geo_info.transform = GeoTransform(
1997+
origin_x=new_origin_x,
1998+
origin_y=new_origin_y,
1999+
pixel_width=new_px_w,
2000+
pixel_height=new_px_h,
2001+
)
2002+
elif orientation in (5, 6, 7, 8):
2003+
geo_info.transform = GeoTransform(
2004+
origin_x=t.origin_x,
2005+
origin_y=t.origin_y,
2006+
pixel_width=t.pixel_height,
2007+
pixel_height=t.pixel_width,
2008+
)
2009+
if (geo_info.crs_epsg is not None
2010+
or geo_info.crs_wkt is not None):
2011+
import warnings
2012+
warnings.warn(
2013+
f"Orientation {orientation} swaps spatial axes on "
2014+
f"a georeferenced file; the returned coords are "
2015+
f"shape-correct but the geographic transform may "
2016+
f"need manual adjustment.",
2017+
stacklevel=2,
2018+
)
2019+
return arr, geo_info
2020+
2021+
19512022
def read_to_array(source, *, window=None, overview_level: int | None = None,
19522023
band: int | None = None,
19532024
max_pixels: int = MAX_PIXELS_DEFAULT,
@@ -2076,86 +2147,8 @@ def read_to_array(source, *, window=None, overview_level: int | None = None,
20762147
arr = arr[:, :, band]
20772148

20782149
if orientation != 1:
2079-
# Use the *file* dimensions (before orientation) for the
2080-
# transform-flip math below. After ``_apply_orientation`` the
2081-
# array shape may swap (orientations 5-8), so capture them now.
2082-
file_h = arr.shape[0]
2083-
file_w = arr.shape[1]
2084-
arr = _apply_orientation(arr, orientation)
2085-
# The pixel buffer was just remapped; the transform that maps
2086-
# display pixels back to geographic coordinates needs the
2087-
# matching remap or the y/x coords still describe the file's
2088-
# original layout.
2089-
#
2090-
# Orientations 2-4 are pure mirror flips: the array shape stays
2091-
# the same, but the displayed origin moves to the opposite
2092-
# edge along whichever axes were flipped. Update origin and
2093-
# sign of the affected pixel scale so xarray coords land on
2094-
# the right geographic positions.
2095-
#
2096-
# Orientations 5-8 swap rows and columns. Pixel sizes swap
2097-
# axes so coord array lengths match the new shape. Signs are
2098-
# preserved rather than coerced to north-up since some
2099-
# legitimate files use a non-standard sign convention
2100-
# (south-up, west-up). For 6/7/8 (rotations + flips, not a
2101-
# pure transpose) the swap is geometrically inexact for
2102-
# georef'd files: a strict implementation would also adjust
2103-
# origin and re-sign per axis. Those files are vanishingly
2104-
# rare in practice (TIFF Orientation 5-8 with a meaningful
2105-
# ModelTransformation); warn so the user knows to verify.
2106-
t = geo_info.transform
2107-
# Only georeferenced files have a meaningful transform to flip.
2108-
# Plain TIFFs with an Orientation tag but no GeoTIFF tags get
2109-
# their pixel buffer remapped above; their default transform
2110-
# is left untouched and the downstream consumer falls back to
2111-
# integer pixel coords.
2112-
if not geo_info.has_georef:
2113-
pass
2114-
elif orientation in (2, 3, 4):
2115-
# PixelIsPoint tiepoints are at pixel centers, so the
2116-
# opposite-edge pixel sits ``(N-1) * step`` away. PixelIsArea
2117-
# tiepoints are at pixel edges, so the opposite edge is
2118-
# ``N * step`` away. The two cases collapse to a single
2119-
# formula below by switching the offset.
2120-
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
2121-
x_shift = file_w - 1
2122-
y_shift = file_h - 1
2123-
else:
2124-
x_shift = file_w
2125-
y_shift = file_h
2126-
new_origin_x = t.origin_x
2127-
new_origin_y = t.origin_y
2128-
new_px_w = t.pixel_width
2129-
new_px_h = t.pixel_height
2130-
if orientation in (2, 3): # x flipped
2131-
new_origin_x = t.origin_x + x_shift * t.pixel_width
2132-
new_px_w = -t.pixel_width
2133-
if orientation in (3, 4): # y flipped
2134-
new_origin_y = t.origin_y + y_shift * t.pixel_height
2135-
new_px_h = -t.pixel_height
2136-
geo_info.transform = GeoTransform(
2137-
origin_x=new_origin_x,
2138-
origin_y=new_origin_y,
2139-
pixel_width=new_px_w,
2140-
pixel_height=new_px_h,
2141-
)
2142-
elif orientation in (5, 6, 7, 8):
2143-
geo_info.transform = GeoTransform(
2144-
origin_x=t.origin_x,
2145-
origin_y=t.origin_y,
2146-
pixel_width=t.pixel_height,
2147-
pixel_height=t.pixel_width,
2148-
)
2149-
if (geo_info.crs_epsg is not None
2150-
or geo_info.crs_wkt is not None):
2151-
import warnings
2152-
warnings.warn(
2153-
f"Orientation {orientation} swaps spatial axes on "
2154-
f"a georeferenced file; the returned coords are "
2155-
f"shape-correct but the geographic transform may "
2156-
f"need manual adjustment.",
2157-
stacklevel=2,
2158-
)
2150+
arr, geo_info = _apply_orientation_with_geo(
2151+
arr, geo_info, orientation)
21592152

21602153
# MinIsWhite (photometric=0): invert single-band grayscale values
21612154
if ifd.photometric == 0 and ifd.samples_per_pixel == 1:
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""HTTP COG full reads must honour TIFF Orientation tag (274).
2+
3+
Issue #1717: ``_read_cog_http`` skipped ``_apply_orientation`` on the
4+
full-read branch, so opening the same oriented file locally vs over HTTP
5+
returned different pixel orders. This is a backend parity break.
6+
7+
These tests open the same Orientation-tagged TIFF via both paths and
8+
assert the returned array and geo transform agree, for every value of
9+
the tag (1-8). The existing rejection of windowed reads + non-default
10+
orientation must still raise.
11+
"""
12+
from __future__ import annotations
13+
14+
import http.server
15+
import socketserver
16+
import threading
17+
18+
import numpy as np
19+
import pytest
20+
21+
from xrspatial.geotiff._reader import _read_cog_http, read_to_array
22+
23+
tifffile = pytest.importorskip("tifffile")
24+
25+
26+
_ORIENTATIONS = [1, 2, 3, 4, 5, 6, 7, 8]
27+
28+
29+
def _write_with_orientation(path, arr, orientation):
30+
tifffile.imwrite(
31+
str(path),
32+
arr,
33+
extratags=[(274, 'H', 1, orientation, True)],
34+
)
35+
36+
37+
class _RangeHandler(http.server.BaseHTTPRequestHandler):
38+
"""Serve a single in-memory bytes payload with HTTP Range support."""
39+
40+
payload: bytes = b''
41+
42+
def do_GET(self): # noqa: N802
43+
rng = self.headers.get('Range')
44+
if rng and rng.startswith('bytes='):
45+
spec = rng[len('bytes='):]
46+
start_s, _, end_s = spec.partition('-')
47+
start = int(start_s)
48+
end = int(end_s) if end_s else len(self.payload) - 1
49+
chunk = self.payload[start:end + 1]
50+
self.send_response(206)
51+
self.send_header('Content-Type', 'application/octet-stream')
52+
self.send_header(
53+
'Content-Range',
54+
f'bytes {start}-{start + len(chunk) - 1}/{len(self.payload)}',
55+
)
56+
self.send_header('Content-Length', str(len(chunk)))
57+
self.end_headers()
58+
self.wfile.write(chunk)
59+
return
60+
self.send_response(200)
61+
self.send_header('Content-Type', 'application/octet-stream')
62+
self.send_header('Content-Length', str(len(self.payload)))
63+
self.end_headers()
64+
self.wfile.write(self.payload)
65+
66+
def log_message(self, *_args, **_kwargs):
67+
pass
68+
69+
70+
def _serve(payload: bytes):
71+
handler_cls = type(
72+
'RangeHandler1717', (_RangeHandler,), {'payload': payload}
73+
)
74+
httpd = socketserver.TCPServer(('127.0.0.1', 0), handler_cls)
75+
port = httpd.server_address[1]
76+
thread = threading.Thread(target=httpd.serve_forever, daemon=True)
77+
thread.start()
78+
return httpd, port
79+
80+
81+
@pytest.fixture
82+
def _allow_loopback(monkeypatch):
83+
monkeypatch.setenv('XRSPATIAL_GEOTIFF_ALLOW_PRIVATE_HOSTS', '1')
84+
85+
86+
@pytest.mark.parametrize("orientation", _ORIENTATIONS)
87+
def test_http_full_read_matches_local_for_orientation(
88+
tmp_path, _allow_loopback, orientation,
89+
):
90+
"""Local-file vs HTTP full read must produce identical output."""
91+
rng = np.random.default_rng(orientation)
92+
arr = rng.integers(0, 255, size=(12, 16), dtype=np.uint8)
93+
path = tmp_path / f"tmp_1717_orient_{orientation}.tif"
94+
_write_with_orientation(path, arr, orientation)
95+
96+
with open(path, 'rb') as f:
97+
payload = f.read()
98+
99+
arr_local, geo_local = read_to_array(str(path))
100+
101+
httpd, port = _serve(payload)
102+
try:
103+
url = f'http://127.0.0.1:{port}/orient_{orientation}.tif'
104+
arr_http, geo_http = _read_cog_http(url)
105+
finally:
106+
httpd.shutdown()
107+
httpd.server_close()
108+
109+
assert arr_http.shape == arr_local.shape, (
110+
f"orientation={orientation}: HTTP shape {arr_http.shape} != "
111+
f"local shape {arr_local.shape}"
112+
)
113+
np.testing.assert_array_equal(
114+
arr_http, arr_local,
115+
err_msg=f"orientation={orientation}: HTTP pixels differ from local",
116+
)
117+
assert geo_http.transform == geo_local.transform, (
118+
f"orientation={orientation}: transform mismatch "
119+
f"http={geo_http.transform} local={geo_local.transform}"
120+
)
121+
122+
123+
@pytest.mark.parametrize("orientation", [2, 3, 4, 5, 6, 7, 8])
124+
def test_http_windowed_read_rejects_non_default_orientation(
125+
tmp_path, _allow_loopback, orientation,
126+
):
127+
"""Windowed reads against an oriented file should still raise.
128+
129+
Mirrors the local-path guard so the contract is uniform across
130+
backends. Resolving windowed-read semantics for oriented files is
131+
out of scope for #1717.
132+
"""
133+
arr = np.zeros((8, 8), dtype=np.uint8)
134+
path = tmp_path / f"tmp_1717_window_reject_{orientation}.tif"
135+
_write_with_orientation(path, arr, orientation)
136+
137+
with open(path, 'rb') as f:
138+
payload = f.read()
139+
140+
httpd, port = _serve(payload)
141+
try:
142+
url = f'http://127.0.0.1:{port}/window_{orientation}.tif'
143+
with pytest.raises(ValueError, match="Orientation tag"):
144+
_read_cog_http(url, window=(0, 0, 4, 4))
145+
finally:
146+
httpd.shutdown()
147+
httpd.server_close()
148+
149+
150+
def test_http_default_orientation_still_works(tmp_path, _allow_loopback):
151+
"""Sanity: orientation=1 (default) HTTP read is byte-identical to local."""
152+
arr = np.arange(48, dtype=np.uint8).reshape(6, 8)
153+
path = tmp_path / "tmp_1717_default.tif"
154+
_write_with_orientation(path, arr, 1)
155+
156+
with open(path, 'rb') as f:
157+
payload = f.read()
158+
159+
arr_local, _ = read_to_array(str(path))
160+
httpd, port = _serve(payload)
161+
try:
162+
url = f'http://127.0.0.1:{port}/default.tif'
163+
arr_http, _ = _read_cog_http(url)
164+
finally:
165+
httpd.shutdown()
166+
httpd.server_close()
167+
168+
np.testing.assert_array_equal(arr_http, arr_local)
169+
np.testing.assert_array_equal(arr_http, arr)

0 commit comments

Comments
 (0)