Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3474,6 +3474,16 @@ def read_vrt(source: str, *, dtype=None,
attrs['crs_wkt'] = vrt.crs_wkt
if vrt.raster_type == 'point':
attrs['raster_type'] = 'point'
# Surface skipped-source records as ``attrs['vrt_holes']`` so
# callers can detect a partial mosaic by attribute lookup. Under
# lenient mode (the default), the underlying ``_vrt.read_vrt``
# already warned per skipped source but the warning is easy to
# miss in a pipeline; the attr lets downstream code branch on
# ``"vrt_holes" in da.attrs`` instead of monitoring the warnings
# stream. Empty list is omitted so the attr only appears when
# there is actually a hole. See issue #1734.
if vrt.holes:
attrs['vrt_holes'] = list(vrt.holes)
# When a specific band is selected, source its nodata from that
# band's <NoDataValue> instead of band 0's. Otherwise multi-band
# VRTs with per-band sentinels would mis-mask the read: attrs would
Expand Down
30 changes: 29 additions & 1 deletion xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ class VRTDataset:
# coords must be emitted without the shift. Parsed from
# ``<Metadata><MDI key="AREA_OR_POINT">Point</MDI></Metadata>``.
raster_type: str = 'area' # 'area' or 'point'
# Per-load record of sources skipped by ``read_vrt`` under the
# lenient default (not strict mode). Each entry is a dict with
# ``source``, ``band`` (1-based), ``dst_rect`` (xoff, yoff,
# xsize, ysize), and ``error`` keys. Empty when no sources failed
# to read. Populated by :func:`read_vrt` so callers can detect
# holes by attribute lookup instead of parsing a UserWarning
# message (issue #1734).
holes: list[dict] = field(default_factory=list)


def _parse_rect(elem) -> _Rect:
Expand Down Expand Up @@ -656,17 +664,37 @@ def read_vrt(vrt_path: str, *, window=None,
# so partial mosaics are caught in CI. Default mode warns
# once per missing source then continues, preserving the
# historical behaviour. See issue #1662.
#
# The lenient default leaves zero-filled holes in
# integer VRTs that downstream code cannot tell from
# real data unless the band has a nodata sentinel set.
# Recording the skipped source on ``vrt.holes`` lets the
# public ``read_vrt`` surface a machine-readable
# ``attrs['vrt_holes']`` entry on the returned
# DataArray, alongside the warning. Callers that need
# to fail loudly can either inspect that attr or set
# ``XRSPATIAL_GEOTIFF_STRICT=1`` to raise here.
# See issue #1734.
import warnings
from . import _geotiff_strict_mode, GeoTIFFFallbackWarning
if _geotiff_strict_mode():
raise
warnings.warn(
f"VRT source {src.filename!r} could not be read "
f"({type(e).__name__}: {e}); skipping. The output "
f"mosaic will have a hole at this tile.",
f"mosaic will have a hole at this tile. Inspect "
f"``DataArray.attrs['vrt_holes']`` or set "
f"XRSPATIAL_GEOTIFF_STRICT=1 to raise instead.",
GeoTIFFFallbackWarning,
stacklevel=2,
)
vrt.holes.append({
'source': src.filename,
'band': src.band,
'dst_rect': (dr.x_off, dr.y_off,
dr.x_size, dr.y_size),
'error': f"{type(e).__name__}: {e}",
})
continue # skip missing/unreadable sources

# Handle source nodata. Cast the sentinel to the *source*
Expand Down
178 changes: 178 additions & 0 deletions xrspatial/geotiff/tests/test_vrt_holes_attr_1734.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""Regression test for issue #1734.

Under the lenient default (``XRSPATIAL_GEOTIFF_STRICT`` unset),
``read_vrt`` warns once per unreadable source and continues, producing
a mosaic with zero-filled holes for integer VRTs that downstream code
cannot distinguish from real data. The warning is easy to miss in a
pipeline that ignores ``UserWarning``s.

This module pins the new behaviour: the returned DataArray now carries
an ``attrs['vrt_holes']`` list describing each skipped source so
callers can detect a partial mosaic with a single attribute lookup.
Strict mode is unchanged and still raises.
"""
from __future__ import annotations

import warnings

import pytest

from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt


@pytest.fixture
def clear_strict_env(monkeypatch):
monkeypatch.delenv('XRSPATIAL_GEOTIFF_STRICT', raising=False)


@pytest.fixture
def set_strict_env(monkeypatch):
monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1')


def _write_vrt_with_missing_source(vrt_path, missing_src) -> None:
"""Write a VRT with an Int32 band whose only source is missing.

Integer ``dataType`` is the failure mode issue #1734 was about: the
pre-fix lenient path zero-fills the output buffer (``fill = 0`` for
integer dtypes) and the user cannot distinguish that hole from real
zero-valued data. ``NoDataValue`` is omitted on purpose -- having
one would let downstream code mask the hole and side-step the
regression. See the module docstring.
"""
vrt_path.write_text(
'<VRTDataset rasterXSize="4" rasterYSize="4">\n'
' <SRS></SRS>\n'
' <GeoTransform>0, 1, 0, 0, 0, -1</GeoTransform>\n'
' <VRTRasterBand dataType="Int32" band="1">\n'
' <SimpleSource>\n'
f' <SourceFilename relativeToVRT="0">{missing_src}'
'</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
' <SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
' <DstRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)


def test_skipped_source_records_vrt_holes_attr(
clear_strict_env, tmp_path,
):
"""A VRT with a missing source returns a DataArray whose attrs
carry a ``vrt_holes`` entry naming the source, band, dst_rect,
and underlying error.

Uses an Int32 VRT so the hole is zero-filled (the exact failure
mode #1734 was about): without the attr there is no way to tell
the all-zeros tile from real data.
"""
import numpy as np

vrt_path = tmp_path / 'mosaic_1734_missing.vrt'
missing_src = f'{tmp_path}/does_not_exist_1734.tif'
_write_vrt_with_missing_source(vrt_path, missing_src)

with warnings.catch_warnings():
warnings.simplefilter('ignore', GeoTIFFFallbackWarning)
da = read_vrt(str(vrt_path))

# Confirm the integer-specific failure mode is in play: the hole is
# filled with zeros (not NaN), indistinguishable from real data
# without the attr.
assert np.issubdtype(da.dtype, np.integer)
assert (da.values == 0).all()

assert 'vrt_holes' in da.attrs
holes = da.attrs['vrt_holes']
assert isinstance(holes, list)
assert len(holes) == 1
h = holes[0]
assert h['source'].endswith('does_not_exist_1734.tif')
assert h['band'] == 1
assert h['dst_rect'] == (0, 0, 4, 4)
assert 'error' in h
assert h['error'] # non-empty


def test_no_holes_attr_when_all_sources_read(clear_strict_env, tmp_path):
"""A successful VRT read does not advertise an empty ``vrt_holes``
attr; the key is omitted entirely so ``"vrt_holes" in attrs`` is a
cheap completeness check."""
import numpy as np
import xarray as xr
from xrspatial.geotiff import to_geotiff

# Write a real source the VRT can reference.
src_path = tmp_path / 'src_1734.tif'
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
da_src = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.linspace(3.5, 0.5, 4),
'x': np.linspace(0.5, 3.5, 4)},
attrs={'crs': 4326},
)
to_geotiff(da_src, str(src_path), compression='none')

vrt_path = tmp_path / 'mosaic_1734_ok.vrt'
vrt_path.write_text(
'<VRTDataset rasterXSize="4" rasterYSize="4">\n'
' <SRS></SRS>\n'
' <GeoTransform>0, 1, 0, 0, 0, -1</GeoTransform>\n'
' <VRTRasterBand dataType="Float32" band="1">\n'
' <SimpleSource>\n'
f' <SourceFilename relativeToVRT="0">{src_path}'
'</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
' <SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
' <DstRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)

with warnings.catch_warnings():
warnings.simplefilter('error', GeoTIFFFallbackWarning)
da = read_vrt(str(vrt_path))

assert 'vrt_holes' not in da.attrs


def test_strict_mode_still_raises(set_strict_env, tmp_path):
"""Strict mode is unchanged: the missing source surfaces the
underlying ``FileNotFoundError`` (an ``OSError`` subclass) from
``read_to_array`` instead of warning-and-skipping.

Asserting the concrete exception class -- not a bare ``Exception``
-- keeps the regression test honest: an unrelated bug somewhere in
the read path that happens to raise a different exception will
fail this test instead of silently satisfying it.
"""
vrt_path = tmp_path / 'mosaic_1734_strict.vrt'
missing_src = f'{tmp_path}/does_not_exist_1734_strict.tif'
_write_vrt_with_missing_source(vrt_path, missing_src)

with pytest.raises(FileNotFoundError,
match='does_not_exist_1734_strict.tif'):
read_vrt(str(vrt_path))


def test_warning_mentions_how_to_detect_holes(clear_strict_env, tmp_path):
"""The fallback warning now points callers at the attr or the
strict env var so the recovery path is discoverable from a single
captured warning."""
vrt_path = tmp_path / 'mosaic_1734_msg.vrt'
missing_src = f'{tmp_path}/does_not_exist_1734_msg.tif'
_write_vrt_with_missing_source(vrt_path, missing_src)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
read_vrt(str(vrt_path))

fallback = [
x for x in w if issubclass(x.category, GeoTIFFFallbackWarning)
]
assert fallback, "expected at least one GeoTIFFFallbackWarning"
msg = ' '.join(str(x.message) for x in fallback)
assert 'vrt_holes' in msg or 'XRSPATIAL_GEOTIFF_STRICT' in msg
Loading