|
| 1 | +"""Regression tests for issue #1632. |
| 2 | +
|
| 3 | +Files with a user-defined CRS (no EPSG, WKT stored in the GeoTIFF |
| 4 | +citation under ``GEOKEY_*_CS_TYPE == 32767``) used to round-trip with |
| 5 | +``attrs['crs_name']`` set but ``attrs['crs_wkt']`` and ``attrs['crs']`` |
| 6 | +unset. ``to_geotiff`` only consults the latter two, so a read -> write |
| 7 | +cycle silently dropped the projection. |
| 8 | +
|
| 9 | +The fix promotes the citation to ``attrs['crs_wkt']`` whenever no EPSG |
| 10 | +is resolved and the citation parses as WKT (starts with one of the |
| 11 | +known WKT 1 / WKT 2 root keywords). ``crs_name`` stays populated for |
| 12 | +back-compat. Tests pin the contract across all four read backends and |
| 13 | +across the read -> write -> read round trip. |
| 14 | +""" |
| 15 | +from __future__ import annotations |
| 16 | + |
| 17 | +import importlib.util |
| 18 | + |
| 19 | +import numpy as np |
| 20 | +import pytest |
| 21 | +import xarray as xr |
| 22 | + |
| 23 | +tifffile = pytest.importorskip("tifffile") |
| 24 | + |
| 25 | +from xrspatial.geotiff import open_geotiff, to_geotiff |
| 26 | +from xrspatial.geotiff._geotags import _looks_like_wkt |
| 27 | + |
| 28 | + |
| 29 | +# A user-defined Lambert Conformal Conic that pyproj cannot identify |
| 30 | +# as a registered EPSG. Trimmed to keep test fixtures readable. |
| 31 | +_USER_DEFINED_WKT = ( |
| 32 | + 'PROJCS["User defined LCC",' |
| 33 | + 'GEOGCS["NAD83",' |
| 34 | + 'DATUM["North American Datum 1983",' |
| 35 | + 'SPHEROID["GRS 1980",6378137,298.257222101]],' |
| 36 | + 'PRIMEM["Greenwich",0],' |
| 37 | + 'UNIT["degree",0.0174532925199433]],' |
| 38 | + 'PROJECTION["Lambert Conformal Conic 2SP"],' |
| 39 | + 'PARAMETER["central_meridian",-100],' |
| 40 | + 'PARAMETER["latitude_of_origin",40],' |
| 41 | + 'PARAMETER["standard_parallel_1",30],' |
| 42 | + 'PARAMETER["standard_parallel_2",50],' |
| 43 | + 'UNIT["metre",1]]' |
| 44 | +) |
| 45 | + |
| 46 | + |
| 47 | +def _gpu_available() -> bool: |
| 48 | + if importlib.util.find_spec("cupy") is None: |
| 49 | + return False |
| 50 | + try: |
| 51 | + import cupy |
| 52 | + return bool(cupy.cuda.is_available()) |
| 53 | + except Exception: |
| 54 | + return False |
| 55 | + |
| 56 | + |
| 57 | +_HAS_GPU = _gpu_available() |
| 58 | +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") |
| 59 | + |
| 60 | + |
| 61 | +def _write_user_defined_crs_tif(path): |
| 62 | + """Write a tiny GeoTIFF with WKT-only CRS and return the DataArray written.""" |
| 63 | + arr = np.ones((4, 4), dtype=np.float32) |
| 64 | + da = xr.DataArray( |
| 65 | + arr, dims=['y', 'x'], |
| 66 | + coords={'y': np.linspace(50.0, 47.0, 4), |
| 67 | + 'x': np.linspace(10.0, 13.0, 4)}, |
| 68 | + attrs={'crs_wkt': _USER_DEFINED_WKT}, |
| 69 | + ) |
| 70 | + to_geotiff(da, path, compression='none') |
| 71 | + return da |
| 72 | + |
| 73 | + |
| 74 | +# --------------------------------------------------------------------------- |
| 75 | +# _looks_like_wkt unit tests |
| 76 | +# --------------------------------------------------------------------------- |
| 77 | + |
| 78 | + |
| 79 | +@pytest.mark.parametrize("text", [ |
| 80 | + 'PROJCS["foo"]', |
| 81 | + 'GEOGCS["foo"]', |
| 82 | + 'PROJCRS["foo"]', |
| 83 | + 'GEOGCRS["foo"]', |
| 84 | + 'COMPD_CS["foo"]', |
| 85 | + 'COMPOUNDCRS["foo"]', |
| 86 | + 'BOUNDCRS["foo"]', |
| 87 | + 'VERT_CS["foo"]', |
| 88 | + 'VERTCRS["foo"]', |
| 89 | + 'LOCAL_CS["foo"]', |
| 90 | + ' PROJCS["leading whitespace"]', |
| 91 | + 'projcs["lowercase"]', |
| 92 | +]) |
| 93 | +def test_looks_like_wkt_positive(text): |
| 94 | + """Top-level WKT 1 / WKT 2 keywords parse as WKT.""" |
| 95 | + assert _looks_like_wkt(text) |
| 96 | + |
| 97 | + |
| 98 | +def test_looks_like_wkt_requires_bracket(): |
| 99 | + """A keyword without the opening bracket is not WKT.""" |
| 100 | + # "PROJCS" alone is a token, not a complete WKT element. The check |
| 101 | + # demands the bracket so plain-text references to WKT keywords in |
| 102 | + # human-readable names do not collide with the WKT path. |
| 103 | + assert not _looks_like_wkt("PROJCS") |
| 104 | + assert not _looks_like_wkt("PROJCS no bracket here") |
| 105 | + |
| 106 | + |
| 107 | +@pytest.mark.parametrize("text", [ |
| 108 | + None, |
| 109 | + '', |
| 110 | + 'NAD83 / UTM Zone 12N', # human-readable name, not WKT |
| 111 | + 'epsg:4326', # urn-like |
| 112 | + 'WGS 84', |
| 113 | + 'Some random string', |
| 114 | + 'PROJ string +proj=longlat +datum=WGS84', |
| 115 | + 42, # non-string input |
| 116 | + b'PROJCS["bytes input"]', # bytes, not str |
| 117 | +]) |
| 118 | +def test_looks_like_wkt_negative(text): |
| 119 | + """Non-WKT inputs return False (including non-string types).""" |
| 120 | + assert not _looks_like_wkt(text) |
| 121 | + |
| 122 | + |
| 123 | +# --------------------------------------------------------------------------- |
| 124 | +# Read-side: backends emit crs_wkt for user-defined CRS files |
| 125 | +# --------------------------------------------------------------------------- |
| 126 | + |
| 127 | + |
| 128 | +def test_eager_emits_crs_wkt_for_user_defined_crs(tmp_path): |
| 129 | + """The eager numpy read populates attrs['crs_wkt'] when the file's |
| 130 | + citation carries WKT, even without an EPSG.""" |
| 131 | + p = str(tmp_path / "user_defined_crs.tif") |
| 132 | + _write_user_defined_crs_tif(p) |
| 133 | + |
| 134 | + rd = open_geotiff(p) |
| 135 | + assert rd.attrs.get("crs") is None # no EPSG |
| 136 | + assert rd.attrs.get("crs_wkt") is not None |
| 137 | + assert rd.attrs["crs_wkt"].startswith("PROJCS[") |
| 138 | + # crs_name is kept for back-compat |
| 139 | + assert rd.attrs.get("crs_name") == rd.attrs["crs_wkt"] |
| 140 | + |
| 141 | + |
| 142 | +def test_dask_emits_crs_wkt_for_user_defined_crs(tmp_path): |
| 143 | + """The dask read path emits the same crs_wkt as numpy.""" |
| 144 | + p = str(tmp_path / "user_defined_crs_dask.tif") |
| 145 | + _write_user_defined_crs_tif(p) |
| 146 | + |
| 147 | + rd = open_geotiff(p, chunks=4) |
| 148 | + assert rd.attrs.get("crs_wkt") is not None |
| 149 | + assert rd.attrs["crs_wkt"].startswith("PROJCS[") |
| 150 | + |
| 151 | + |
| 152 | +@_gpu_only |
| 153 | +def test_cupy_emits_crs_wkt_for_user_defined_crs(tmp_path): |
| 154 | + """The cupy / GPU read path emits the same crs_wkt as numpy.""" |
| 155 | + p = str(tmp_path / "user_defined_crs_gpu.tif") |
| 156 | + _write_user_defined_crs_tif(p) |
| 157 | + |
| 158 | + rd = open_geotiff(p, gpu=True) |
| 159 | + assert rd.attrs.get("crs_wkt") is not None |
| 160 | + assert rd.attrs["crs_wkt"].startswith("PROJCS[") |
| 161 | + |
| 162 | + |
| 163 | +@_gpu_only |
| 164 | +def test_dask_cupy_emits_crs_wkt_for_user_defined_crs(tmp_path): |
| 165 | + """The dask+cupy read path emits the same crs_wkt as numpy.""" |
| 166 | + p = str(tmp_path / "user_defined_crs_dask_gpu.tif") |
| 167 | + _write_user_defined_crs_tif(p) |
| 168 | + |
| 169 | + rd = open_geotiff(p, gpu=True, chunks=4) |
| 170 | + assert rd.attrs.get("crs_wkt") is not None |
| 171 | + assert rd.attrs["crs_wkt"].startswith("PROJCS[") |
| 172 | + |
| 173 | + |
| 174 | +# --------------------------------------------------------------------------- |
| 175 | +# Read -> write -> read round trip: WKT survives the second write |
| 176 | +# --------------------------------------------------------------------------- |
| 177 | + |
| 178 | + |
| 179 | +def test_user_defined_crs_round_trips_through_to_geotiff(tmp_path): |
| 180 | + """A read -> write of a user-defined CRS file keeps the projection. |
| 181 | +
|
| 182 | + Pre-fix, ``to_geotiff(open_geotiff(src), dst)`` produced ``dst`` with |
| 183 | + no GeoKey CRS entries and no GeoAsciiParams tag because the read path |
| 184 | + only set ``attrs['crs_name']`` and the writer never consults that. |
| 185 | + """ |
| 186 | + src = str(tmp_path / "round_trip_src.tif") |
| 187 | + _write_user_defined_crs_tif(src) |
| 188 | + |
| 189 | + rd = open_geotiff(src) |
| 190 | + dst = str(tmp_path / "round_trip_dst.tif") |
| 191 | + to_geotiff(rd, dst, compression='none') |
| 192 | + |
| 193 | + # The second file should carry the same WKT in its citation. |
| 194 | + rd2 = open_geotiff(dst) |
| 195 | + assert rd2.attrs.get("crs_wkt") == rd.attrs.get("crs_wkt") |
| 196 | + |
| 197 | + # And the raw GeoKey + ASCII tags must be present. |
| 198 | + with tifffile.TiffFile(dst) as tif: |
| 199 | + keys = tif.pages[0].tags.get(34735) # GeoKeyDirectory |
| 200 | + ascii_tag = tif.pages[0].tags.get(34737) # GeoAsciiParams |
| 201 | + assert keys is not None |
| 202 | + # GeoKeyDirectory header is 4 entries; a real CRS adds 3+ key |
| 203 | + # entries (model type, raster type, GTCitation -> ascii ref). |
| 204 | + assert len(keys.value) > 4 |
| 205 | + assert ascii_tag is not None |
| 206 | + assert "PROJCS[" in ascii_tag.value |
| 207 | + |
| 208 | + |
| 209 | +def test_epsg_crs_unchanged_by_fix(tmp_path): |
| 210 | + """The fix must not regress the EPSG path: files with attrs['crs'] = <int> |
| 211 | + should still emit both crs and crs_wkt on read.""" |
| 212 | + arr = np.ones((4, 4), dtype=np.float32) |
| 213 | + da = xr.DataArray( |
| 214 | + arr, dims=['y', 'x'], |
| 215 | + coords={'y': np.linspace(50.0, 47.0, 4), |
| 216 | + 'x': np.linspace(10.0, 13.0, 4)}, |
| 217 | + attrs={'crs': 4326}, |
| 218 | + ) |
| 219 | + p = str(tmp_path / "epsg.tif") |
| 220 | + to_geotiff(da, p, compression='none') |
| 221 | + |
| 222 | + rd = open_geotiff(p) |
| 223 | + assert rd.attrs.get("crs") == 4326 |
| 224 | + assert rd.attrs.get("crs_wkt") is not None |
| 225 | + # The WKT here is pyproj's canonical 4326 WKT; the citation is the |
| 226 | + # short EPSG-style name "WGS 84", not WKT, so crs_name should not |
| 227 | + # be promoted to crs_wkt. |
| 228 | + assert rd.attrs.get("crs_name") != rd.attrs.get("crs_wkt") |
| 229 | + |
| 230 | + |
| 231 | +def test_human_readable_crs_name_not_promoted_to_crs_wkt(tmp_path): |
| 232 | + """A citation that is a human-readable name (not WKT) must stay in |
| 233 | + crs_name only. The _looks_like_wkt gate prevents accidental promotion.""" |
| 234 | + # tifffile-built file with citation 'NAD83 / UTM Zone 12N' as the |
| 235 | + # citation, no EPSG. We can't easily build the GeoKey table from |
| 236 | + # scratch here without recapitulating extract_geo_info; instead we |
| 237 | + # exercise the path via the helper directly. |
| 238 | + assert not _looks_like_wkt("NAD83 / UTM Zone 12N") |
| 239 | + assert not _looks_like_wkt("WGS 84") |
| 240 | + assert not _looks_like_wkt("") |
| 241 | + assert not _looks_like_wkt(None) |
0 commit comments