|
| 1 | +"""Locking test for the canonical tier of the attrs contract. |
| 2 | +
|
| 3 | +Issue #1984, PR 4 of 7. |
| 4 | +
|
| 5 | +The attrs contract (see ``xrspatial/geotiff/_attrs.py`` and |
| 6 | +``docs/source/user_guide/attrs_contract.rst``) splits every key the |
| 7 | +read paths emit into three tiers. This file pins the *canonical* tier: |
| 8 | +keys xrspatial owns and guarantees round-trip stable through |
| 9 | +``to_geotiff`` -> ``open_geotiff``. |
| 10 | +
|
| 11 | +Sibling files cover the other tiers: |
| 12 | +
|
| 13 | +* ``test_attrs_contract_aliases_1984.py`` -- compatibility aliases. |
| 14 | +* ``test_attrs_contract_passthrough_1984.py`` -- best-effort |
| 15 | + pass-through. |
| 16 | +* ``test_attrs_contract_version_1984.py`` -- per-backend stamping of |
| 17 | + ``attrs['_xrspatial_geotiff_contract']`` (also canonical, kept in its |
| 18 | + own file because the assertion is per read path rather than |
| 19 | + per round-trip). |
| 20 | +
|
| 21 | +The canonical keys locked here: |
| 22 | +
|
| 23 | +* ``crs`` -- EPSG integer code. |
| 24 | +* ``crs_wkt`` -- horizontal CRS WKT string. |
| 25 | +* ``transform`` -- rasterio-style 6-tuple. |
| 26 | +* ``nodata`` -- declared file sentinel (GDAL_NODATA). |
| 27 | +* ``raster_type`` -- 'point' (set explicitly) or absent |
| 28 | + (= 'area', the implicit default). |
| 29 | +* ``extra_tags`` -- list of (id, type, count, value) |
| 30 | + tuples for out-of-band TIFF tags. |
| 31 | +* ``gdal_metadata`` -- dict parsed from GDAL_METADATA XML. |
| 32 | +* ``gdal_metadata_xml`` -- raw GDAL_METADATA XML string. |
| 33 | +* ``x_resolution``, ``y_resolution``, |
| 34 | + ``resolution_unit`` -- TIFF XResolution / YResolution / |
| 35 | + ResolutionUnit. |
| 36 | +* ``_xrspatial_geotiff_contract`` -- integer contract version. Stamped |
| 37 | + on every read. |
| 38 | +
|
| 39 | +The fixture below sets every canonical key on a synthetic DataArray, |
| 40 | +round-trips it through ``to_geotiff`` -> ``open_geotiff``, and the test |
| 41 | +suite below asserts both presence and value equality per key. The |
| 42 | +single-fixture shape is intentional: a future writer change that drops |
| 43 | +one canonical key shows up here as one failing assertion rather than |
| 44 | +being lost in a larger diff. |
| 45 | +
|
| 46 | +Issues #1985 (parity matrix) and #1986 (round-trip invariants) consume |
| 47 | +this assertion list. If you add a key here, update the canonical block |
| 48 | +in the contract page and the ``_attrs.py`` module docstring as well. |
| 49 | +""" |
| 50 | +from __future__ import annotations |
| 51 | + |
| 52 | +import numpy as np |
| 53 | +import pytest |
| 54 | +import xarray as xr |
| 55 | + |
| 56 | +from xrspatial.geotiff import open_geotiff, to_geotiff |
| 57 | +from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION |
| 58 | + |
| 59 | + |
| 60 | +_CONTRACT_KEY = '_xrspatial_geotiff_contract' |
| 61 | + |
| 62 | +# Every key the canonical tier guarantees round-trip stable. Keep the |
| 63 | +# order consistent with the contract docs so a diff here lines up with |
| 64 | +# a diff in ``attrs_contract.rst``. |
| 65 | +_CANONICAL_KEYS = ( |
| 66 | + 'crs', |
| 67 | + 'crs_wkt', |
| 68 | + 'transform', |
| 69 | + 'nodata', |
| 70 | + 'extra_tags', |
| 71 | + 'gdal_metadata', |
| 72 | + 'gdal_metadata_xml', |
| 73 | + 'x_resolution', |
| 74 | + 'y_resolution', |
| 75 | + 'resolution_unit', |
| 76 | + _CONTRACT_KEY, |
| 77 | +) |
| 78 | + |
| 79 | +# Fixture values, written into ``attrs`` on the synthetic DataArray and |
| 80 | +# compared to the read-back attrs after round-trip. ``transform`` is |
| 81 | +# pinned by ``y`` / ``x`` coords so we expect that exact 6-tuple back. |
| 82 | +_NODATA_SENTINEL = -9999.0 |
| 83 | +_X_RES = 300.0 |
| 84 | +_Y_RES = 300.0 |
| 85 | +_RES_UNIT = 'inch' |
| 86 | +_GDAL_META = {'AREA_OR_POINT': 'Area', 'TIFFTAG_SOFTWARE': 'xrspatial-1984'} |
| 87 | +# Software tag (305, ASCII). Picked because it is benign (no spatial |
| 88 | +# interpretation, no security filter) and tifffile decodes it as-is. |
| 89 | +# Count must include the trailing NUL byte. |
| 90 | +_SOFTWARE_STR = 'xrspatial-canonical-1984' |
| 91 | +_EXTRA_TAGS = [(305, 2, len(_SOFTWARE_STR) + 1, _SOFTWARE_STR)] |
| 92 | +# ``crs_wkt`` is round-tripped via attrs['crs'] (the EPSG code drives |
| 93 | +# the writer), so we leave it for the reader to emit. Setting it on |
| 94 | +# write is allowed but not required; the read-back value comes from the |
| 95 | +# PROJ database. |
| 96 | + |
| 97 | + |
| 98 | +def _make_canonical_da(): |
| 99 | + """Build a synthetic DataArray exercising every canonical attr. |
| 100 | +
|
| 101 | + Returns the DataArray plus the expected ``transform`` tuple so the |
| 102 | + test can assert on the round-tripped value without recomputing it. |
| 103 | + """ |
| 104 | + h, w = 4, 4 |
| 105 | + data = np.arange(h * w, dtype=np.float32).reshape(h, w) |
| 106 | + # Pin a non-identity transform so the round-trip check catches a |
| 107 | + # writer that drops the tiepoint / pixel-scale tags. Coords are |
| 108 | + # interpreted as pixel centres; the emitted transform's origin is |
| 109 | + # the top-left corner, so origin_x = 100 - 10/2 = 95 and |
| 110 | + # origin_y = 240 - (-10)/2 = 245. |
| 111 | + x = np.array([100.0, 110.0, 120.0, 130.0], dtype=np.float64) |
| 112 | + y = np.array([240.0, 230.0, 220.0, 210.0], dtype=np.float64) |
| 113 | + expected_transform = (10.0, 0.0, 95.0, 0.0, -10.0, 245.0) |
| 114 | + |
| 115 | + da = xr.DataArray( |
| 116 | + data, dims=('y', 'x'), coords={'y': y, 'x': x}, |
| 117 | + attrs={ |
| 118 | + 'crs': 4326, |
| 119 | + 'nodata': _NODATA_SENTINEL, |
| 120 | + 'extra_tags': list(_EXTRA_TAGS), |
| 121 | + 'gdal_metadata': dict(_GDAL_META), |
| 122 | + 'x_resolution': _X_RES, |
| 123 | + 'y_resolution': _Y_RES, |
| 124 | + 'resolution_unit': _RES_UNIT, |
| 125 | + }, |
| 126 | + ) |
| 127 | + return da, expected_transform |
| 128 | + |
| 129 | + |
| 130 | +@pytest.fixture |
| 131 | +def canonical_roundtrip(tmp_path): |
| 132 | + """Round-trip the canonical fixture through write -> read. |
| 133 | +
|
| 134 | + Returns ``(read_da, expected_transform)``. Scoped to one round-trip |
| 135 | + per test so per-key assertions stay independent and a single failure |
| 136 | + points at one key rather than cascading. |
| 137 | + """ |
| 138 | + da, expected_transform = _make_canonical_da() |
| 139 | + path = str(tmp_path / 'attrs_contract_canonical.tif') |
| 140 | + to_geotiff(da, path) |
| 141 | + rd = open_geotiff(path) |
| 142 | + return rd, expected_transform |
| 143 | + |
| 144 | + |
| 145 | +# --------------------------------------------------------------------------- |
| 146 | +# Single-fixture coverage: every canonical key is present on read-back. |
| 147 | +# --------------------------------------------------------------------------- |
| 148 | + |
| 149 | + |
| 150 | +def test_every_canonical_key_present(canonical_roundtrip): |
| 151 | + """Pin the canonical key set after a round-trip. |
| 152 | +
|
| 153 | + A writer that drops one canonical key (e.g. forgets to emit |
| 154 | + GDAL_METADATA) shows up here as one missing key rather than as a |
| 155 | + later equality failure with a less obvious cause. |
| 156 | + """ |
| 157 | + rd, _ = canonical_roundtrip |
| 158 | + missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) |
| 159 | + assert missing == [], ( |
| 160 | + f"canonical attrs missing after round-trip: {missing}. " |
| 161 | + f"attrs keys present: {sorted(rd.attrs.keys())}" |
| 162 | + ) |
| 163 | + |
| 164 | + |
| 165 | +# --------------------------------------------------------------------------- |
| 166 | +# Per-key value assertions: each canonical key round-trips by value. |
| 167 | +# --------------------------------------------------------------------------- |
| 168 | + |
| 169 | + |
| 170 | +def test_crs_roundtrip(canonical_roundtrip): |
| 171 | + rd, _ = canonical_roundtrip |
| 172 | + assert rd.attrs['crs'] == 4326 |
| 173 | + |
| 174 | + |
| 175 | +def test_crs_wkt_roundtrip(canonical_roundtrip): |
| 176 | + """``crs_wkt`` is reader-emitted from the EPSG code. Pin presence |
| 177 | + and the substring guarantee callers rely on -- the exact WKT string |
| 178 | + is PROJ-version dependent, but ``WGS 84`` always appears.""" |
| 179 | + rd, _ = canonical_roundtrip |
| 180 | + wkt = rd.attrs['crs_wkt'] |
| 181 | + assert isinstance(wkt, str) and len(wkt) > 0 |
| 182 | + assert 'WGS 84' in wkt, ( |
| 183 | + f"crs_wkt round-trip lost the CRS identity: {wkt!r}" |
| 184 | + ) |
| 185 | + |
| 186 | + |
| 187 | +def test_transform_roundtrip(canonical_roundtrip): |
| 188 | + rd, expected_transform = canonical_roundtrip |
| 189 | + t = tuple(rd.attrs['transform']) |
| 190 | + assert t == pytest.approx(expected_transform), ( |
| 191 | + f"transform round-trip mismatch.\n expected: {expected_transform}\n" |
| 192 | + f" got : {t}" |
| 193 | + ) |
| 194 | + |
| 195 | + |
| 196 | +def test_nodata_roundtrip(canonical_roundtrip): |
| 197 | + rd, _ = canonical_roundtrip |
| 198 | + assert rd.attrs['nodata'] == _NODATA_SENTINEL |
| 199 | + |
| 200 | + |
| 201 | +def test_extra_tags_roundtrip(canonical_roundtrip): |
| 202 | + """A non-friendly extra_tags entry (Software, 305) round-trips |
| 203 | + intact. The writer must preserve unknown tags so users can attach |
| 204 | + arbitrary metadata.""" |
| 205 | + rd, _ = canonical_roundtrip |
| 206 | + got = rd.attrs['extra_tags'] |
| 207 | + # Look up tag 305 specifically; ordering and any reader-added |
| 208 | + # entries are not part of the contract for this assertion. |
| 209 | + by_id = {t[0]: t for t in got} |
| 210 | + assert 305 in by_id, ( |
| 211 | + f"Software tag (305) missing from read-back extra_tags: {got}" |
| 212 | + ) |
| 213 | + tag_id, type_id, count, value = by_id[305] |
| 214 | + assert tag_id == 305 |
| 215 | + assert type_id == 2 # TIFF ASCII |
| 216 | + assert value == _SOFTWARE_STR |
| 217 | + |
| 218 | + |
| 219 | +def test_gdal_metadata_roundtrip(canonical_roundtrip): |
| 220 | + """The parsed dict survives the round-trip key-by-key. Allow extra |
| 221 | + entries the reader might inject (e.g. ``STATISTICS_*``) so this |
| 222 | + test is not a tripwire for unrelated reader changes.""" |
| 223 | + rd, _ = canonical_roundtrip |
| 224 | + got = rd.attrs['gdal_metadata'] |
| 225 | + assert isinstance(got, dict), f"gdal_metadata is not a dict: {got!r}" |
| 226 | + for k, v in _GDAL_META.items(): |
| 227 | + assert got.get(k) == v, ( |
| 228 | + f"gdal_metadata[{k!r}] mismatch.\n expected: {v!r}\n" |
| 229 | + f" got : {got.get(k)!r}\n full read-back: {got!r}" |
| 230 | + ) |
| 231 | + |
| 232 | + |
| 233 | +def test_gdal_metadata_xml_roundtrip(canonical_roundtrip): |
| 234 | + """The raw XML string is reconstructed by the writer from the |
| 235 | + ``gdal_metadata`` dict. Pin presence + the substring that proves |
| 236 | + our fixture survived; the exact XML formatting is writer-dependent.""" |
| 237 | + rd, _ = canonical_roundtrip |
| 238 | + xml = rd.attrs['gdal_metadata_xml'] |
| 239 | + assert isinstance(xml, str) and xml.startswith('<GDALMetadata>') |
| 240 | + assert 'xrspatial-1984' in xml, ( |
| 241 | + f"gdal_metadata_xml lost the fixture marker: {xml!r}" |
| 242 | + ) |
| 243 | + |
| 244 | + |
| 245 | +def test_resolution_group_roundtrip(canonical_roundtrip): |
| 246 | + """``x_resolution`` / ``y_resolution`` / ``resolution_unit`` are |
| 247 | + written and read as one logical unit -- pin them together so a |
| 248 | + writer that drops one but keeps the others fails here.""" |
| 249 | + rd, _ = canonical_roundtrip |
| 250 | + assert rd.attrs['x_resolution'] == pytest.approx(_X_RES) |
| 251 | + assert rd.attrs['y_resolution'] == pytest.approx(_Y_RES) |
| 252 | + assert rd.attrs['resolution_unit'] == _RES_UNIT |
| 253 | + |
| 254 | + |
| 255 | +def test_contract_version_roundtrip(canonical_roundtrip): |
| 256 | + """``_xrspatial_geotiff_contract`` is stamped on every read; pin |
| 257 | + that the canonical fixture sees the current version. Per-backend |
| 258 | + coverage lives in ``test_attrs_contract_version_1984.py``.""" |
| 259 | + rd, _ = canonical_roundtrip |
| 260 | + assert rd.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION |
| 261 | + |
| 262 | + |
| 263 | +# --------------------------------------------------------------------------- |
| 264 | +# ``raster_type`` lives outside the shared fixture because the canonical |
| 265 | +# default ('area') is encoded as *absence* in attrs. The two branches need |
| 266 | +# different fixtures. |
| 267 | +# --------------------------------------------------------------------------- |
| 268 | + |
| 269 | + |
| 270 | +def test_raster_type_area_omitted_on_roundtrip(tmp_path): |
| 271 | + """RasterPixelIsArea is the implicit default and is encoded as |
| 272 | + *absence* of ``attrs['raster_type']``. A DataArray with no |
| 273 | + ``raster_type`` attr must round-trip to a DataArray that still has |
| 274 | + no ``raster_type`` attr.""" |
| 275 | + da, _ = _make_canonical_da() |
| 276 | + assert 'raster_type' not in da.attrs |
| 277 | + path = str(tmp_path / 'raster_type_area.tif') |
| 278 | + to_geotiff(da, path) |
| 279 | + rd = open_geotiff(path) |
| 280 | + assert 'raster_type' not in rd.attrs, ( |
| 281 | + f"area is the implicit default but the reader emitted " |
| 282 | + f"raster_type={rd.attrs['raster_type']!r}" |
| 283 | + ) |
| 284 | + |
| 285 | + |
| 286 | +def test_raster_type_point_roundtrip(tmp_path): |
| 287 | + """``raster_type='point'`` is the only value the writer accepts via |
| 288 | + attrs; the reader emits it back on a round-trip.""" |
| 289 | + da, _ = _make_canonical_da() |
| 290 | + da.attrs['raster_type'] = 'point' |
| 291 | + path = str(tmp_path / 'raster_type_point.tif') |
| 292 | + to_geotiff(da, path) |
| 293 | + rd = open_geotiff(path) |
| 294 | + assert rd.attrs.get('raster_type') == 'point' |
0 commit comments