diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 70f35984..885bcf88 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -74,6 +74,12 @@ # on_gpu_failure=" (forward verbatim). _GPU_DEPRECATED_SENTINEL = object() _ON_GPU_FAILURE_SENTINEL = object() +# ``write_vrt`` needs to distinguish "user passed crs_wkt= explicitly" +# (deprecation path) from "user passed nothing" (no warning, pick CRS +# from the first source). A plain default of None does not work because +# None is itself a value a caller could supply alongside crs=. See +# issue #1715. +_CRS_WKT_DEPRECATED_SENTINEL = object() # Names of dims that ``to_geotiff`` / ``write_geotiff_gpu`` treat as the # non-spatial band axis. Used both to remap ``(band, y, x)`` inputs to @@ -153,6 +159,91 @@ def _wkt_to_epsg(wkt_or_proj: str) -> int | None: return None +def _resolve_crs_to_wkt(crs) -> str | None: + """Normalise a CRS argument to a WKT string for downstream writers. + + Mirrors ``to_geotiff`` / ``write_geotiff_gpu``'s ``crs`` kwarg semantics + so callers can pass an int EPSG code, a WKT string, or a PROJ string + interchangeably. Returns the canonical WKT string (or ``None`` if + ``crs`` is ``None``) for forwarding to ``_vrt.write_vrt``, which only + speaks WKT. + + Used by ``write_vrt`` (see issue #1715) to close the parameter-naming + drift versus the eager and GPU writer entry points. + + Parameters + ---------- + crs : int, str, or None + EPSG code (int), WKT string, or PROJ string. ``None`` returns + ``None`` (the downstream writer falls back to the first source + file's CRS). + + Returns + ------- + str or None + Canonical WKT string, or ``None`` if ``crs`` is ``None``. + + Raises + ------ + TypeError + If ``crs`` is not an int, str, or ``None``. + ValueError + If ``crs`` is an int that pyproj cannot resolve to a known CRS, + or a string that pyproj cannot parse. + ImportError + If pyproj is not installed and ``crs`` is supplied as something + other than a string. (A string is passed through verbatim so the + WKT-only path keeps working without pyproj.) + """ + if crs is None: + return None + if not isinstance(crs, (int, str)): + raise TypeError( + f"crs must be int (EPSG code), str (WKT or PROJ), or None; " + f"got {type(crs).__name__}") + if isinstance(crs, str): + # Empty string is a common "no CRS" sentinel from upstream + # GeoTIFFs; preserve the existing _vrt.write_vrt semantics (it + # falls back to the first source's CRS for empty strings too). + if not crs: + return None + # If the caller already handed us a WKT, return it untouched. + # PROJCS/GEOGCS/PROJCRS/GEOGCRS are the standard WKT root + # keywords; anything else (EPSG:NNNN, +proj=...) gets normalised + # through pyproj so the downstream XML sees a canonical WKT. + if crs.lstrip().startswith(('PROJCS', 'GEOGCS', 'PROJCRS', 'GEOGCRS', + 'COMPD_CS', 'COMPOUNDCRS')): + return crs + try: + from pyproj import CRS + except ImportError as exc: + raise ImportError( + "pyproj is required to convert non-WKT CRS strings (got " + f"{crs!r}). Pass a WKT string directly, or install pyproj." + ) from exc + try: + return CRS.from_user_input(crs).to_wkt() + except Exception as exc: + raise ValueError( + f"Could not parse crs={crs!r} as an EPSG/PROJ/WKT string: " + f"{type(exc).__name__}: {exc}" + ) from exc + # int branch: convert EPSG -> WKT via pyproj. + try: + from pyproj import CRS + except ImportError as exc: + raise ImportError( + f"pyproj is required to convert crs={crs} (EPSG int) to WKT. " + "Install pyproj, or pass crs as a WKT string." + ) from exc + try: + return CRS.from_epsg(crs).to_wkt() + except Exception as exc: + raise ValueError( + f"Could not resolve EPSG:{crs}: {type(exc).__name__}: {exc}" + ) from exc + + def _geo_to_coords(geo_info, height: int, width: int) -> dict: """Build y/x coordinate arrays from GeoInfo. @@ -3450,7 +3541,8 @@ def _sentinel_for_dtype(nodata_val, dtype): def write_vrt(vrt_path: str, source_files: list[str], *, relative: bool = True, - crs_wkt: str | None = None, + crs: int | str | None = None, + crs_wkt: str | None = _CRS_WKT_DEPRECATED_SENTINEL, nodata: float | int | None = None) -> str: """Generate a VRT file that mosaics multiple GeoTIFF tiles. @@ -3462,9 +3554,21 @@ def write_vrt(vrt_path: str, source_files: list[str], *, Paths to the source GeoTIFF files. relative : bool, optional Store source paths relative to the VRT file (default True). + crs : int, str, or None, optional + EPSG code (int), WKT string, or PROJ string. If None, the CRS + is taken from the first source GeoTIFF. Mirrors the ``crs`` + kwarg on ``to_geotiff`` and ``write_geotiff_gpu`` so the same + value can be forwarded to whichever writer the caller picked + without per-writer special-casing (issue #1715). crs_wkt : str or None, optional - CRS as a WKT string. If None, the CRS is taken from the first - source GeoTIFF. + Deprecated alias for ``crs``. Emits ``DeprecationWarning`` when + supplied (including ``crs_wkt=None``); passing both ``crs`` and + ``crs_wkt`` raises ``TypeError``. The value is forwarded through + the same ``_resolve_crs_to_wkt`` path as ``crs``, so any string + the resolver accepts (WKT root keyword, PROJ string, + ``"EPSG:NNNN"``) and ``None`` work here. The historic + ``str | None`` surface is preserved; new code should use ``crs`` + instead, which additionally accepts ``int`` EPSG codes. nodata : float, int, or None, optional NoData value. If None, taken from the first source GeoTIFF. Integer sentinels (e.g. ``65535`` for uint16, ``-9999`` for @@ -3478,13 +3582,36 @@ def write_vrt(vrt_path: str, source_files: list[str], *, """ # Explicit signature (previously ``**kwargs``) so ``inspect.signature``, # IDE autocomplete, and ``mypy --strict`` can see the accepted kwargs - # without parsing the docstring. Mirrors ``_vrt.write_vrt`` exactly; if - # that signature changes, this wrapper must be updated in lockstep. + # without parsing the docstring. Mirrors ``_vrt.write_vrt`` for the + # historic ``crs_wkt`` path; the new ``crs`` path normalises through + # ``_resolve_crs_to_wkt`` before forwarding because the internal + # writer still only speaks WKT. + crs_wkt_passed = crs_wkt is not _CRS_WKT_DEPRECATED_SENTINEL + if crs is not None and crs_wkt_passed: + # Both supplied is ambiguous regardless of whether the WKT happens + # to encode the same CRS as the int. Refuse rather than silently + # picking one. + raise TypeError( + "write_vrt: pass either 'crs' or the deprecated 'crs_wkt' " + "alias, not both.") + if crs_wkt_passed: + warnings.warn( + "write_vrt(..., crs_wkt=...) is deprecated; use crs=... " + "instead. The kwarg was renamed for parity with to_geotiff " + "and write_geotiff_gpu, which already accept 'crs' as either " + "an int EPSG code or a WKT string.", + DeprecationWarning, + stacklevel=2, + ) + crs = crs_wkt + + resolved_wkt = _resolve_crs_to_wkt(crs) + from ._vrt import write_vrt as _write_vrt_internal return _write_vrt_internal( vrt_path, source_files, relative=relative, - crs_wkt=crs_wkt, + crs_wkt=resolved_wkt, nodata=nodata, ) diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py index 03373027..31da870b 100644 --- a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py +++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py @@ -204,9 +204,16 @@ def test_relative_false_parses_back_to_same_source(self, source_tif, tmp_path): class TestWriteVrtCrsWktBehaviour: - """``crs_wkt=`` overrides the first source's CRS WKT. Without an - override, the first source's WKT is propagated. With an override, - the override wins.""" + """``crs=`` overrides the first source's CRS. Without an override, + the first source's WKT is propagated. With an override, the + override wins. + + Pre-#1715 the kwarg was named ``crs_wkt``. The new canonical name + is ``crs`` (parity with ``to_geotiff`` / ``write_geotiff_gpu``); + the old name is still accepted with ``DeprecationWarning``. These + tests exercise the new path; the deprecated path is covered by + ``test_write_vrt_crs_1715.py``. + """ def _read_parsed(self, vrt_path, tmp_path): with open(vrt_path, 'r') as fh: @@ -221,7 +228,7 @@ def test_crs_wkt_override_wins(self, source_tif, tmp_path): 'PROJECTION["Transverse_Mercator"],UNIT["metre",1]]' ) vrt_path = str(tmp_path / 'crs_wkt_override.vrt') - write_vrt(vrt_path, [source_tif], crs_wkt=override) + write_vrt(vrt_path, [source_tif], crs=override) parsed = self._read_parsed(vrt_path, tmp_path) assert parsed.crs_wkt == override @@ -254,7 +261,7 @@ def test_crs_wkt_override_distinct_from_default(self, source_tif, tmp_path): ) # Override path vrt_override = str(tmp_path / 'override.vrt') - write_vrt(vrt_override, [source_tif], crs_wkt=override) + write_vrt(vrt_override, [source_tif], crs=override) # Default path vrt_default = str(tmp_path / 'default.vrt') write_vrt(vrt_default, [source_tif]) diff --git a/xrspatial/geotiff/tests/test_polish_1488.py b/xrspatial/geotiff/tests/test_polish_1488.py index 6501add9..8d26b9c5 100644 --- a/xrspatial/geotiff/tests/test_polish_1488.py +++ b/xrspatial/geotiff/tests/test_polish_1488.py @@ -89,8 +89,11 @@ def test_known_kwargs_accepted(self, tmp_path): a_path = str(tmp_path / 'a_c5_1488.tif') write(arr, a_path, compression='none') vrt_path = str(tmp_path / 'mosaic_c5_1488.vrt') - # All three documented kwargs should be accepted. - write_vrt(vrt_path, [a_path], relative=False, crs_wkt=None, + # All four documented kwargs should be accepted. ``crs`` is the + # canonical name as of #1715 (was ``crs_wkt`` pre-rename); pass + # ``crs=None`` instead of the deprecated alias to avoid the + # DeprecationWarning the alias now emits. + write_vrt(vrt_path, [a_path], relative=False, crs=None, nodata=-9999.0) assert os.path.exists(vrt_path) diff --git a/xrspatial/geotiff/tests/test_signature_parity_1631.py b/xrspatial/geotiff/tests/test_signature_parity_1631.py index 2f7e0378..1c5e48a9 100644 --- a/xrspatial/geotiff/tests/test_signature_parity_1631.py +++ b/xrspatial/geotiff/tests/test_signature_parity_1631.py @@ -51,21 +51,34 @@ def _gpu_available() -> bool: def test_write_vrt_signature_exposes_documented_kwargs(): - """``inspect.signature(write_vrt)`` reports the three accepted kwargs. + """``inspect.signature(write_vrt)`` reports the four accepted kwargs. Prior to #1631 the public wrapper used ``**kwargs``, so ``inspect.signature`` only saw ``vrt_path`` and ``source_files``. + Issue #1715 added ``crs`` for parity with ``to_geotiff`` / + ``write_geotiff_gpu`` while keeping the historic ``crs_wkt`` as a + deprecated alias (sentinel default so the deprecation shim can + tell "user passed nothing" from "user passed crs_wkt=None"). """ sig = inspect.signature(write_vrt) params = sig.parameters assert 'relative' in params - assert 'crs_wkt' in params + assert 'crs' in params # added in #1715 + assert 'crs_wkt' in params # deprecated alias assert 'nodata' in params - # Defaults must match _vrt.write_vrt assert params['relative'].default is True - assert params['crs_wkt'].default is None + # ``crs`` is the new canonical kwarg; default None means "pick from + # the first source", matching to_geotiff / write_geotiff_gpu. + assert params['crs'].default is None + # ``crs_wkt`` carries a sentinel default so the deprecation shim + # can distinguish "user passed nothing" (no warning) from "user + # passed crs_wkt=None" (deprecated-but-explicit, warn). The + # sentinel itself is private; check that it is NOT None so a + # future maintainer cannot accidentally drop the sentinel logic. + assert params['crs_wkt'].default is not None + assert params['crs_wkt'].default is not inspect.Parameter.empty assert params['nodata'].default is None - # No more catch-all VAR_KEYWORD + # No catch-all VAR_KEYWORD kinds = {p.kind for p in params.values()} assert inspect.Parameter.VAR_KEYWORD not in kinds @@ -88,7 +101,12 @@ def test_write_vrt_unknown_kwarg_rejected_at_public_level(tmp_path): def test_write_vrt_accepts_documented_kwargs(tmp_path): - """Each documented kwarg round-trips through the explicit signature.""" + """Each documented kwarg round-trips through the explicit signature. + + Uses the new ``crs=None`` kwarg form (issue #1715). The deprecated + ``crs_wkt`` alias is exercised separately in + ``test_write_vrt_crs_1715.py``. + """ arr = np.zeros((8, 8), dtype=np.float32) da = xr.DataArray( arr, dims=['y', 'x'], @@ -101,7 +119,7 @@ def test_write_vrt_accepts_documented_kwargs(tmp_path): vrt_path = str(tmp_path / 't.vrt') out = write_vrt( vrt_path, [tif_path], - relative=False, crs_wkt=None, nodata=-9999.0, + relative=False, crs=None, nodata=-9999.0, ) assert out == vrt_path assert os.path.exists(vrt_path) diff --git a/xrspatial/geotiff/tests/test_write_vrt_crs_1715.py b/xrspatial/geotiff/tests/test_write_vrt_crs_1715.py new file mode 100644 index 00000000..a33ebad1 --- /dev/null +++ b/xrspatial/geotiff/tests/test_write_vrt_crs_1715.py @@ -0,0 +1,229 @@ +"""Regression test for #1715: write_vrt accepts ``crs`` for parity with +``to_geotiff`` / ``write_geotiff_gpu``. + +The api-consistency sweep on 2026-05-12 flagged that ``write_vrt`` was +the only writer in ``xrspatial.geotiff`` using ``crs_wkt`` instead of +``crs``, breaking the "forward the same kwargs to whichever writer +matches the output extension" pattern. The fix adds ``crs`` as the +canonical kwarg and keeps ``crs_wkt`` as a deprecated alias. + +This module pins: + +* ``crs`` accepts ``int`` (EPSG) and ``str`` (WKT) and ``None``, + matching ``to_geotiff``/``write_geotiff_gpu``. +* The ``crs_wkt`` alias still works but emits ``DeprecationWarning``. +* Passing both ``crs`` and ``crs_wkt`` raises ``TypeError``. +* The deprecation shim does NOT warn when neither kwarg is supplied + (the no-crs path picks from the first source, unchanged from + pre-#1715 behaviour). +* Read-back round trip: ``read_vrt(written).attrs['crs'] == 4326`` + when the writer was given ``crs=4326``. +""" +from __future__ import annotations + +import os +import warnings + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + read_vrt, + to_geotiff, + write_vrt, +) + + +def _build_source_tif(tmp_path, name='src.tif'): + """Create a small GeoTIFF used as the VRT's source file.""" + arr = np.arange(8 * 8, dtype=np.float32).reshape(8, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8.0, 0, -1), 'x': np.arange(8.0)}, + attrs={'crs': 4326, 'transform': (1.0, 0, 0.0, 0, -1.0, 8.0)}, + ) + p = str(tmp_path / name) + to_geotiff(da, p) + return p + + +# --- Signature pins --- + + +def test_write_vrt_accepts_crs_kwarg(): + """``crs`` is in the signature and defaults to ``None``.""" + import inspect + + sig = inspect.signature(write_vrt) + assert 'crs' in sig.parameters + assert sig.parameters['crs'].default is None + + +def test_write_vrt_crs_annotation_matches_writer_trio(): + """``crs`` is annotated ``int | str | None``, identical to + ``to_geotiff(..., crs=...)`` and ``write_geotiff_gpu(..., crs=...)``. + """ + import inspect + + sig = inspect.signature(write_vrt) + ann = str(sig.parameters['crs'].annotation) + assert ann == 'int | str | None' + + +# --- Runtime: ``crs=`` writes an EPSG-resolved WKT --- + + +def test_write_vrt_crs_epsg_int_writes_wkt_to_xml(tmp_path): + """``crs=4326`` resolves to a WKT string in the VRT's element. + + The current implementation forwards the WKT to ``_vrt.write_vrt``, + which interpolates it into the XML node. Reading the file + back with ``read_vrt`` must therefore produce + ``attrs['crs'] == 4326`` (because ``_wkt_to_epsg`` round-trips + EPSG:4326's WKT cleanly). + """ + src = _build_source_tif(tmp_path, 'epsg_int.tif') + vrt_path = str(tmp_path / 'epsg_int.vrt') + + out = write_vrt(vrt_path, [src], crs=4326) + assert out == vrt_path + assert os.path.exists(vrt_path) + + da = read_vrt(vrt_path) + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_crs_wkt_string(tmp_path): + """``crs=`` passes the WKT through verbatim.""" + src = _build_source_tif(tmp_path, 'wkt.tif') + vrt_path = str(tmp_path / 'wkt.vrt') + + # Build a WKT for EPSG:4326 directly via pyproj + from pyproj import CRS + + wkt = CRS.from_epsg(4326).to_wkt() + + out = write_vrt(vrt_path, [src], crs=wkt) + assert out == vrt_path + da = read_vrt(vrt_path) + # WKT round-trips back to EPSG:4326 via _wkt_to_epsg + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_crs_none_falls_through(tmp_path): + """``crs=None`` (the default) picks the CRS from the first source.""" + src = _build_source_tif(tmp_path, 'none.tif') + vrt_path = str(tmp_path / 'none.vrt') + + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + out = write_vrt(vrt_path, [src], crs=None) + assert out == vrt_path + da = read_vrt(vrt_path) + # The source TIFF was written with EPSG:4326; VRT inherits it. + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_no_crs_kwarg_no_warning(tmp_path): + """Omitting ``crs`` entirely (the most common call shape) does not + emit any warning. The deprecation shim only fires when ``crs_wkt`` + is supplied explicitly.""" + src = _build_source_tif(tmp_path, 'no_kwarg.tif') + vrt_path = str(tmp_path / 'no_kwarg.vrt') + + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + write_vrt(vrt_path, [src]) # neither kwarg supplied + assert os.path.exists(vrt_path) + + +# --- Deprecation shim: ``crs_wkt=`` still works but warns --- + + +def test_write_vrt_crs_wkt_deprecated_warns(tmp_path): + """Passing ``crs_wkt=`` emits ``DeprecationWarning`` but still + produces a working VRT.""" + src = _build_source_tif(tmp_path, 'depr.tif') + vrt_path = str(tmp_path / 'depr.vrt') + + from pyproj import CRS + + wkt = CRS.from_epsg(4326).to_wkt() + + with pytest.warns(DeprecationWarning, match='crs_wkt'): + out = write_vrt(vrt_path, [src], crs_wkt=wkt) + assert out == vrt_path + da = read_vrt(vrt_path) + assert da.attrs.get('crs') == 4326 + + +def test_write_vrt_crs_wkt_none_still_warns(tmp_path): + """``crs_wkt=None`` (explicit) was a documented shape in the old + signature -- it now warns because the caller is using the + deprecated kwarg name, even if the value is None.""" + src = _build_source_tif(tmp_path, 'depr_none.tif') + vrt_path = str(tmp_path / 'depr_none.vrt') + + with pytest.warns(DeprecationWarning, match='crs_wkt'): + write_vrt(vrt_path, [src], crs_wkt=None) + assert os.path.exists(vrt_path) + + +def test_write_vrt_both_crs_and_crs_wkt_rejected(tmp_path): + """Passing both raises ``TypeError`` rather than silently picking + one. The error message names both kwargs so the caller can fix + their call quickly.""" + src = _build_source_tif(tmp_path, 'both.tif') + vrt_path = str(tmp_path / 'both.vrt') + + from pyproj import CRS + + wkt = CRS.from_epsg(4326).to_wkt() + + with pytest.raises(TypeError, match='crs.*crs_wkt'): + write_vrt(vrt_path, [src], crs=4326, crs_wkt=wkt) + + +# --- Cross-writer parity: same kwarg name on all three writers --- + + +def test_writer_trio_all_accept_crs_kwarg(): + """``crs`` is the canonical kwarg on every public writer in the trio. + A caller forwarding ``crs=`` to whichever writer matches the + output extension never has to special-case the kwarg name (issue + #1715).""" + import inspect + + from xrspatial.geotiff import to_geotiff, write_geotiff_gpu, write_vrt + + for fn in (to_geotiff, write_geotiff_gpu, write_vrt): + sig = inspect.signature(fn) + assert 'crs' in sig.parameters, f"{fn.__name__} missing crs kwarg" + assert ( + str(sig.parameters['crs'].annotation) == 'int | str | None' + ), f"{fn.__name__}.crs annotation drift" + + +# --- Negative tests: bad input shapes --- + + +def test_write_vrt_crs_invalid_type_rejected(tmp_path): + """``crs=`` (or any non-int/str/None) raises ``TypeError`` from + the public wrapper rather than from deep inside the writer.""" + src = _build_source_tif(tmp_path, 'bad_type.tif') + vrt_path = str(tmp_path / 'bad_type.vrt') + + with pytest.raises(TypeError, match='crs must be'): + write_vrt(vrt_path, [src], crs=[4326]) + + +def test_write_vrt_crs_unparseable_string_rejected(tmp_path): + """``crs='not a CRS'`` raises ``ValueError`` from the public + wrapper (the WKT keyword heuristic recognises PROJCS/GEOGCS only; + everything else is sent through pyproj which will reject it).""" + src = _build_source_tif(tmp_path, 'bad_str.tif') + vrt_path = str(tmp_path / 'bad_str.vrt') + + with pytest.raises(ValueError, match='Could not parse crs'): + write_vrt(vrt_path, [src], crs='not-a-real-crs-string')