diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 0c2071b2..9ec33f80 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1662,6 +1662,14 @@ def _read_cog_http(url: str, overview_level: int | None = None, if arr.ndim == 3 and ifd.samples_per_pixel > 1 and band is not None: arr = arr[:, :, band] + # Apply Orientation tag (274) so HTTP reads return the same pixel + # order and transform as the local-file path. Only the full-read + # branch reaches here; the windowed-read branch is rejected above + # for non-default orientation. See issue #1717. + if ifd.orientation != 1: + arr, geo_info = _apply_orientation_with_geo( + arr, geo_info, ifd.orientation) + return arr, geo_info @@ -1948,6 +1956,69 @@ def _apply_orientation(arr: np.ndarray, orientation: int) -> np.ndarray: ) +def _apply_orientation_with_geo( + arr: np.ndarray, geo_info: GeoInfo, orientation: int, +) -> tuple[np.ndarray, GeoInfo]: + """Apply Orientation tag to ``arr`` and update ``geo_info`` to match. + + Shared helper used by the local-file and HTTP COG paths so both + return the same pixel order and transform for a given file. See + issue #1717 for the HTTP-path parity break this consolidates. + """ + if orientation == 1: + return arr, geo_info + # Use the *file* dimensions (before orientation) for the transform + # math below. After ``_apply_orientation`` the array shape may swap + # (orientations 5-8), so capture them now. + file_h = arr.shape[0] + file_w = arr.shape[1] + arr = _apply_orientation(arr, orientation) + t = geo_info.transform + if not geo_info.has_georef: + pass + elif orientation in (2, 3, 4): + if geo_info.raster_type == RASTER_PIXEL_IS_POINT: + x_shift = file_w - 1 + y_shift = file_h - 1 + else: + x_shift = file_w + y_shift = file_h + new_origin_x = t.origin_x + new_origin_y = t.origin_y + new_px_w = t.pixel_width + new_px_h = t.pixel_height + if orientation in (2, 3): # x flipped + new_origin_x = t.origin_x + x_shift * t.pixel_width + new_px_w = -t.pixel_width + if orientation in (3, 4): # y flipped + new_origin_y = t.origin_y + y_shift * t.pixel_height + new_px_h = -t.pixel_height + geo_info.transform = GeoTransform( + origin_x=new_origin_x, + origin_y=new_origin_y, + pixel_width=new_px_w, + pixel_height=new_px_h, + ) + elif orientation in (5, 6, 7, 8): + geo_info.transform = GeoTransform( + origin_x=t.origin_x, + origin_y=t.origin_y, + pixel_width=t.pixel_height, + pixel_height=t.pixel_width, + ) + if (geo_info.crs_epsg is not None + or geo_info.crs_wkt is not None): + import warnings + warnings.warn( + f"Orientation {orientation} swaps spatial axes on " + f"a georeferenced file; the returned coords are " + f"shape-correct but the geographic transform may " + f"need manual adjustment.", + stacklevel=2, + ) + return arr, geo_info + + def read_to_array(source, *, window=None, overview_level: int | None = None, band: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, @@ -2076,86 +2147,8 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, arr = arr[:, :, band] if orientation != 1: - # Use the *file* dimensions (before orientation) for the - # transform-flip math below. After ``_apply_orientation`` the - # array shape may swap (orientations 5-8), so capture them now. - file_h = arr.shape[0] - file_w = arr.shape[1] - arr = _apply_orientation(arr, orientation) - # The pixel buffer was just remapped; the transform that maps - # display pixels back to geographic coordinates needs the - # matching remap or the y/x coords still describe the file's - # original layout. - # - # Orientations 2-4 are pure mirror flips: the array shape stays - # the same, but the displayed origin moves to the opposite - # edge along whichever axes were flipped. Update origin and - # sign of the affected pixel scale so xarray coords land on - # the right geographic positions. - # - # Orientations 5-8 swap rows and columns. Pixel sizes swap - # axes so coord array lengths match the new shape. Signs are - # preserved rather than coerced to north-up since some - # legitimate files use a non-standard sign convention - # (south-up, west-up). For 6/7/8 (rotations + flips, not a - # pure transpose) the swap is geometrically inexact for - # georef'd files: a strict implementation would also adjust - # origin and re-sign per axis. Those files are vanishingly - # rare in practice (TIFF Orientation 5-8 with a meaningful - # ModelTransformation); warn so the user knows to verify. - t = geo_info.transform - # Only georeferenced files have a meaningful transform to flip. - # Plain TIFFs with an Orientation tag but no GeoTIFF tags get - # their pixel buffer remapped above; their default transform - # is left untouched and the downstream consumer falls back to - # integer pixel coords. - if not geo_info.has_georef: - pass - elif orientation in (2, 3, 4): - # PixelIsPoint tiepoints are at pixel centers, so the - # opposite-edge pixel sits ``(N-1) * step`` away. PixelIsArea - # tiepoints are at pixel edges, so the opposite edge is - # ``N * step`` away. The two cases collapse to a single - # formula below by switching the offset. - if geo_info.raster_type == RASTER_PIXEL_IS_POINT: - x_shift = file_w - 1 - y_shift = file_h - 1 - else: - x_shift = file_w - y_shift = file_h - new_origin_x = t.origin_x - new_origin_y = t.origin_y - new_px_w = t.pixel_width - new_px_h = t.pixel_height - if orientation in (2, 3): # x flipped - new_origin_x = t.origin_x + x_shift * t.pixel_width - new_px_w = -t.pixel_width - if orientation in (3, 4): # y flipped - new_origin_y = t.origin_y + y_shift * t.pixel_height - new_px_h = -t.pixel_height - geo_info.transform = GeoTransform( - origin_x=new_origin_x, - origin_y=new_origin_y, - pixel_width=new_px_w, - pixel_height=new_px_h, - ) - elif orientation in (5, 6, 7, 8): - geo_info.transform = GeoTransform( - origin_x=t.origin_x, - origin_y=t.origin_y, - pixel_width=t.pixel_height, - pixel_height=t.pixel_width, - ) - if (geo_info.crs_epsg is not None - or geo_info.crs_wkt is not None): - import warnings - warnings.warn( - f"Orientation {orientation} swaps spatial axes on " - f"a georeferenced file; the returned coords are " - f"shape-correct but the geographic transform may " - f"need manual adjustment.", - stacklevel=2, - ) + arr, geo_info = _apply_orientation_with_geo( + arr, geo_info, orientation) # MinIsWhite (photometric=0): invert single-band grayscale values if ifd.photometric == 0 and ifd.samples_per_pixel == 1: diff --git a/xrspatial/geotiff/tests/test_http_orientation_1717.py b/xrspatial/geotiff/tests/test_http_orientation_1717.py new file mode 100644 index 00000000..b6c35d7a --- /dev/null +++ b/xrspatial/geotiff/tests/test_http_orientation_1717.py @@ -0,0 +1,169 @@ +"""HTTP COG full reads must honour TIFF Orientation tag (274). + +Issue #1717: ``_read_cog_http`` skipped ``_apply_orientation`` on the +full-read branch, so opening the same oriented file locally vs over HTTP +returned different pixel orders. This is a backend parity break. + +These tests open the same Orientation-tagged TIFF via both paths and +assert the returned array and geo transform agree, for every value of +the tag (1-8). The existing rejection of windowed reads + non-default +orientation must still raise. +""" +from __future__ import annotations + +import http.server +import socketserver +import threading + +import numpy as np +import pytest + +from xrspatial.geotiff._reader import _read_cog_http, read_to_array + +tifffile = pytest.importorskip("tifffile") + + +_ORIENTATIONS = [1, 2, 3, 4, 5, 6, 7, 8] + + +def _write_with_orientation(path, arr, orientation): + tifffile.imwrite( + str(path), + arr, + extratags=[(274, 'H', 1, orientation, True)], + ) + + +class _RangeHandler(http.server.BaseHTTPRequestHandler): + """Serve a single in-memory bytes payload with HTTP Range support.""" + + payload: bytes = b'' + + def do_GET(self): # noqa: N802 + rng = self.headers.get('Range') + if rng and rng.startswith('bytes='): + spec = rng[len('bytes='):] + start_s, _, end_s = spec.partition('-') + start = int(start_s) + end = int(end_s) if end_s else len(self.payload) - 1 + chunk = self.payload[start:end + 1] + self.send_response(206) + self.send_header('Content-Type', 'application/octet-stream') + self.send_header( + 'Content-Range', + f'bytes {start}-{start + len(chunk) - 1}/{len(self.payload)}', + ) + self.send_header('Content-Length', str(len(chunk))) + self.end_headers() + self.wfile.write(chunk) + return + self.send_response(200) + self.send_header('Content-Type', 'application/octet-stream') + self.send_header('Content-Length', str(len(self.payload))) + self.end_headers() + self.wfile.write(self.payload) + + def log_message(self, *_args, **_kwargs): + pass + + +def _serve(payload: bytes): + handler_cls = type( + 'RangeHandler1717', (_RangeHandler,), {'payload': payload} + ) + httpd = socketserver.TCPServer(('127.0.0.1', 0), handler_cls) + port = httpd.server_address[1] + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + return httpd, port + + +@pytest.fixture +def _allow_loopback(monkeypatch): + monkeypatch.setenv('XRSPATIAL_GEOTIFF_ALLOW_PRIVATE_HOSTS', '1') + + +@pytest.mark.parametrize("orientation", _ORIENTATIONS) +def test_http_full_read_matches_local_for_orientation( + tmp_path, _allow_loopback, orientation, +): + """Local-file vs HTTP full read must produce identical output.""" + rng = np.random.default_rng(orientation) + arr = rng.integers(0, 255, size=(12, 16), dtype=np.uint8) + path = tmp_path / f"tmp_1717_orient_{orientation}.tif" + _write_with_orientation(path, arr, orientation) + + with open(path, 'rb') as f: + payload = f.read() + + arr_local, geo_local = read_to_array(str(path)) + + httpd, port = _serve(payload) + try: + url = f'http://127.0.0.1:{port}/orient_{orientation}.tif' + arr_http, geo_http = _read_cog_http(url) + finally: + httpd.shutdown() + httpd.server_close() + + assert arr_http.shape == arr_local.shape, ( + f"orientation={orientation}: HTTP shape {arr_http.shape} != " + f"local shape {arr_local.shape}" + ) + np.testing.assert_array_equal( + arr_http, arr_local, + err_msg=f"orientation={orientation}: HTTP pixels differ from local", + ) + assert geo_http.transform == geo_local.transform, ( + f"orientation={orientation}: transform mismatch " + f"http={geo_http.transform} local={geo_local.transform}" + ) + + +@pytest.mark.parametrize("orientation", [2, 3, 4, 5, 6, 7, 8]) +def test_http_windowed_read_rejects_non_default_orientation( + tmp_path, _allow_loopback, orientation, +): + """Windowed reads against an oriented file should still raise. + + Mirrors the local-path guard so the contract is uniform across + backends. Resolving windowed-read semantics for oriented files is + out of scope for #1717. + """ + arr = np.zeros((8, 8), dtype=np.uint8) + path = tmp_path / f"tmp_1717_window_reject_{orientation}.tif" + _write_with_orientation(path, arr, orientation) + + with open(path, 'rb') as f: + payload = f.read() + + httpd, port = _serve(payload) + try: + url = f'http://127.0.0.1:{port}/window_{orientation}.tif' + with pytest.raises(ValueError, match="Orientation tag"): + _read_cog_http(url, window=(0, 0, 4, 4)) + finally: + httpd.shutdown() + httpd.server_close() + + +def test_http_default_orientation_still_works(tmp_path, _allow_loopback): + """Sanity: orientation=1 (default) HTTP read is byte-identical to local.""" + arr = np.arange(48, dtype=np.uint8).reshape(6, 8) + path = tmp_path / "tmp_1717_default.tif" + _write_with_orientation(path, arr, 1) + + with open(path, 'rb') as f: + payload = f.read() + + arr_local, _ = read_to_array(str(path)) + httpd, port = _serve(payload) + try: + url = f'http://127.0.0.1:{port}/default.tif' + arr_http, _ = _read_cog_http(url) + finally: + httpd.shutdown() + httpd.server_close() + + np.testing.assert_array_equal(arr_http, arr_local) + np.testing.assert_array_equal(arr_http, arr)