diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index 3ff54bea..f21c7fe2 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-11,1631,MEDIUM,3,"Filed write_vrt and write_geotiff_gpu signature/docstring drift vs to_geotiff (MEDIUM, #1631). Fix in PR (TBD): explicit write_vrt(relative, crs_wkt, nodata) signature (was **kwargs); 'cubic' added to write_geotiff_gpu overview_resampling docstring; write_geotiff_gpu(data) typed xr.DataArray|cupy.ndarray to match to_geotiff. Prior 1605/1606/1611/1612/1613/1615/1623 all CLOSED." +geotiff,2026-05-11,1644,MEDIUM,3,"Filed write_geotiff_gpu compression docstring drift vs to_geotiff (MEDIUM Cat 3, #1644). Fix on deep-sweep-api-consistency-geotiff-2026-05-11-1778545740: sync the full 9-codec list into the docstring and note GPU vs CPU encode paths; regression test test_compression_docstring_1644.py pins the codec list and exercises each CPU-fallback codec end-to-end. Other potential drifts surveyed: write_vrt returns str while to_geotiff/write_geotiff_gpu return None (LOW, intentional backward-compat); write_vrt nodata typed float|None vs int-accepting siblings (LOW, PEP 484 int->float compat); kwarg-only ordering drift across read functions (LOW, no user impact). Prior issues 1631/1637/1615/1560/1541/1562 all CLOSED." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 8495101b..142c7404 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -2723,8 +2723,35 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, nodata : float, int, or None NoData value. compression : str - 'zstd' (default, fastest on GPU), 'deflate', 'jpeg', or 'none'. - JPEG uses nvJPEG when available, falling back to Pillow. + Codec name. Accepts the same set ``to_geotiff`` lists in its + own signature: ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``, + ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias + ``'j2k'``), or ``'lerc'``. + + Routing per codec: + + - ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch + compression on the GPU -- the fastest paths and the reason to + use this entry point. + - ``'jpeg'``: nvJPEG when libnvjpeg is loadable, Pillow + otherwise. Note that ``to_geotiff`` rejects + ``compression='jpeg'`` at runtime because its CPU encoder + omits the required TIFF JPEGTables tag (347); this GPU entry + point instead emits self-contained JFIF tiles. The two + writers therefore disagree about JPEG-in-TIFF interop. Files + produced here decode through this library's own reader but + may not round-trip through GDAL, rasterio, or libtiff + readers that require the JPEGTables tag. Treat the JPEG path + as experimental and internal-reader-only until the + JPEGTables fix lands. + - ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when + available, glymur CPU encode otherwise. The two paths are + not byte-for-byte identical (different libraries, different + default parameters); use ``to_geotiff`` if you need exact + CPU-writer parity. + - ``'lerc'``, ``'lzw'``, ``'packbits'``, and ``'lz4'``: no + nvCOMP/CUDA accelerator, so these fall through to the CPU + encoder for byte-stable parity with ``to_geotiff``. compression_level : int or None Compression effort level. Accepted for API compatibility but currently ignored -- nvCOMP does not expose level control. diff --git a/xrspatial/geotiff/tests/test_compression_docstring_1644.py b/xrspatial/geotiff/tests/test_compression_docstring_1644.py new file mode 100644 index 00000000..a8f04957 --- /dev/null +++ b/xrspatial/geotiff/tests/test_compression_docstring_1644.py @@ -0,0 +1,133 @@ +"""Regression test for #1644: ``write_geotiff_gpu`` compression docstring +parity vs ``to_geotiff``. + +The api-consistency sweep on 2026-05-11 flagged that +``write_geotiff_gpu.__doc__`` listed only four codecs (``'zstd'``, +``'deflate'``, ``'jpeg'``, ``'none'``) under the ``compression`` +parameter, while the implementation actually accepts every codec +``to_geotiff`` does. + +Routing for the additional codecs: + +* ``'lzw'``, ``'packbits'``, ``'lz4'``, ``'lerc'`` -- not nvCOMP- + accelerated and have no GPU library, so they fall through to the + CPU encoder. Byte-for-byte identical to ``to_geotiff``. +* ``'jpeg2000'`` / ``'j2k'`` -- attempts an nvJPEG2K *GPU* encode + first via ``_nvjpeg2k_batch_encode`` and falls back to the CPU + ``glymur`` encoder only when libnvjpeg2k is unavailable. The two + paths are NOT byte-stable against each other; this module pins the + acceptance contract (the codec name is accepted and a file gets + written), not output-byte parity with the CPU writer. +* ``'jpeg'`` -- accepted here even though ``to_geotiff`` rejects it + (the CPU writer omits the JPEGTables tag, so its output doesn't + round-trip through GDAL). The GPU path emits self-contained JFIF + tiles. Covered separately by + ``test_gpu_writer_compression_modes_2026_05_11.py``; this module + excludes it from the parametrized fallback list because the test + data needs to be uint8 with sensible pixel content. + +This module pins the full codec list against future drift and confirms +the underlying entry point accepts the codec names that the docstring +now advertises. +""" +from __future__ import annotations + +import importlib.util +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import write_geotiff_gpu + + +def _gpu_available() -> bool: + """True when cupy imports and CUDA is initialised.""" + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required", +) + + +# Codecs to exercise end-to-end through the GPU writer to confirm they +# accept the docstring's advertised names. Excludes ``jpeg`` because +# (a) ``to_geotiff`` rejects it at runtime and (b) the JPEG round-trip +# is covered with appropriate uint8 RGB data in +# ``test_gpu_writer_compression_modes_2026_05_11.py``; keeping it out of +# this parametrize avoids exercising the JPEG path on dtype/shape +# combinations that aren't representative. +_GPU_FALLBACK_CODECS = ( + "lzw", "packbits", "lz4", "lerc", "jpeg2000", "j2k", +) + + +def test_write_geotiff_gpu_docstring_lists_full_codec_set(): + """The ``compression`` docstring lists every codec ``to_geotiff`` accepts. + + Prior to #1644 the docstring listed only ``'zstd'``, ``'deflate'``, + ``'jpeg'``, and ``'none'``, which made the GPU writer look much + more restrictive than it actually is. The block below pins the + canonical wording. + """ + doc = write_geotiff_gpu.__doc__ + assert doc is not None, "write_geotiff_gpu lost its docstring" + block_start = doc.index("compression : str") + block_end = doc.index("compression_level", block_start) + block = doc[block_start:block_end] + # Every codec name in the canonical list must appear in the block. + # Use single-quoted form because that is how the docstring writes them. + for codec in ( + "'none'", "'deflate'", "'lzw'", "'jpeg'", "'packbits'", + "'zstd'", "'lz4'", "'jpeg2000'", "'j2k'", "'lerc'", + ): + assert codec in block, ( + f"compression docstring missing {codec}; current block:\n{block}" + ) + + +@_gpu_only +@pytest.mark.parametrize("codec", _GPU_FALLBACK_CODECS) +def test_write_geotiff_gpu_accepts_cpu_fallback_codecs(tmp_path, codec): + """Codecs without a GPU encoder still write successfully via CPU. + + Confirms the docstring's promise that the GPU writer accepts the + same codec set as ``to_geotiff``. ``jpeg`` is exercised separately + by ``test_gpu_writer_compression_modes_2026_05_11.py`` because the + test data must be uint8 with sensible content. ``jpeg2000`` / + ``j2k`` will attempt nvJPEG2K if available and fall back to + ``glymur`` otherwise; either way the encoder needs uint8/uint16 + input, so pick a uint16 source for those codecs so the encode path + is the one users actually hit, not a dtype-rejected pre-check. + """ + import cupy + + if codec in ("jpeg2000", "j2k"): + arr_cpu = np.random.RandomState(0).randint( + 0, 65535, size=(64, 64), dtype=np.uint16, + ) + else: + arr_cpu = np.random.RandomState(0).rand(64, 64).astype(np.float32) + da = xr.DataArray( + cupy.asarray(arr_cpu), dims=["y", "x"], + coords={"y": np.arange(64.0, 0, -1), "x": np.arange(64.0)}, + attrs={"crs": 4326, + "transform": (1.0, 0.0, 0.0, 0.0, -1.0, 64.0)}, + ) + path = str(tmp_path / f"out_{codec}.tif") + write_geotiff_gpu(da, path, compression=codec) + assert os.path.exists(path), ( + f"write_geotiff_gpu(compression={codec!r}) failed to write a file" + ) + # File must be non-empty so we know the encode path actually ran + assert os.path.getsize(path) > 0