-
Notifications
You must be signed in to change notification settings - Fork 94
Expand file tree
/
Copy path__init__.py
More file actions
602 lines (560 loc) · 26.4 KB
/
Copy path__init__.py
File metadata and controls
602 lines (560 loc) · 26.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
"""Lightweight GeoTIFF/COG reader and writer.
No GDAL dependency -- uses only numpy, numba, xarray, and the standard library.
Public API
----------
open_geotiff(source, ...)
Read a GeoTIFF, COG, or VRT file to an xarray.DataArray. Auto-dispatches
to the GPU, dask, or numpy backend based on the ``gpu`` and ``chunks``
kwargs.
read_geotiff_gpu(source, ...)
GPU-only read returning a CuPy-backed DataArray. ``open_geotiff(...,
gpu=True)`` calls this internally; use the explicit name when you want
the strict-mode failure semantics (``on_gpu_failure='strict'``) or want
to bypass auto-dispatch.
read_geotiff_dask(source, ...)
Dask-only read returning a windowed lazy DataArray. ``open_geotiff(...,
chunks=N)`` calls this internally.
read_vrt(source, ...)
Read a GDAL Virtual Raster Table (.vrt). ``open_geotiff`` routes ``.vrt``
paths here automatically; the explicit entry point is useful for
callers that already know they have a VRT.
to_geotiff(data, path, ...)
Write an xarray.DataArray as a GeoTIFF or COG. Auto-dispatches to GPU
when the data is CuPy-backed.
write_geotiff_gpu(data, path, ...)
GPU-only writer using nvCOMP. ``to_geotiff(..., gpu=True)`` calls this
internally.
write_vrt(path, source_files, ...)
Generate a VRT mosaic XML from a list of GeoTIFF files. ``vrt_path``
is kept as a deprecated alias for ``path``; passing both ``path`` and
``vrt_path`` raises ``TypeError`` (#1946).
"""
from __future__ import annotations
import math
import os
import warnings
from typing import TYPE_CHECKING
import numpy as np
import xarray as xr
if TYPE_CHECKING:
from typing import BinaryIO
from ._coords import (
_BAND_DIM_NAMES,
coords_from_geo_info as _coords_from_geo_info,
coords_from_pixel_geometry as _coords_from_pixel_geometry,
transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry,
geo_to_coords as _geo_to_coords,
transform_tuple as _transform_tuple,
transform_from_attr as _transform_from_attr,
coords_to_transform as _coords_to_transform,
require_transform_for_georeferenced as _require_transform_for_georeferenced,
)
from ._errors import (
ConflictingCRSError,
ConflictingNodataError,
GeoTIFFAmbiguousMetadataError,
InvalidCRSCodeError,
MixedBandMetadataError,
NonUniformCoordsError,
RotatedTransformError,
UnparseableCRSError,
)
from ._geotags import GeoTransform, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT
from ._reader import UnsafeURLError
# ``read_to_array`` is internal: it is used by ``open_geotiff`` and the
# GPU fallback below but is not in ``__all__`` or the module-level
# Public API docstring. Bind it under a leading-underscore name so it
# does not leak into ``xrspatial.geotiff``'s public namespace. Tests
# and internal callers that genuinely need it can import directly from
# ``xrspatial.geotiff._reader``. See issue #1708.
from ._attrs import (
_LEVEL_RANGES,
_VALID_COMPRESSIONS,
_extent_to_window,
_extract_rich_tags,
_populate_attrs_from_geo_info,
_resolve_nodata_attr,
)
from ._backends._gpu_helpers import _is_gpu_data
# Re-export only; called by xrspatial/geotiff/tests/test_nodata_*.py.
from ._backends._gpu_helpers import _apply_nodata_mask_gpu # noqa: F401
from ._backends.dask import read_geotiff_dask
from ._backends.gpu import read_geotiff_gpu
from ._backends.vrt import read_vrt
from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg
from ._reader import (
_MAX_CLOUD_BYTES_SENTINEL,
read_to_array as _read_to_array,
)
from ._runtime import (
GeoTIFFFallbackWarning,
_CRS_WKT_DEPRECATED_SENTINEL,
_GPU_DEPRECATED_SENTINEL,
_MISSING_SOURCES_SENTINEL,
_ON_GPU_FAILURE_SENTINEL,
_geotiff_strict_mode,
_gpu_fallback_warning_message,
)
from ._validation import (
_validate_3d_writer_dims,
_validate_chunks_arg,
_validate_dtype_cast,
_validate_tile_size_arg,
)
from ._writer import write
from ._writers.eager import to_geotiff
# Re-export only; called by xrspatial/geotiff/tests/test_nodata_no_extra_copy_1553.py.
from ._writers.eager import _write_single_tile # noqa: F401
from ._writers.gpu import write_geotiff_gpu
from ._writers.vrt import write_vrt
# All names below are part of the supported public API. ``plot_geotiff``
# is intentionally omitted: it is deprecated in favour of ``da.xrs.plot()``
# and emits a ``DeprecationWarning`` when called.
__all__ = [
'ConflictingCRSError',
'ConflictingNodataError',
'GeoTIFFAmbiguousMetadataError',
'GeoTIFFFallbackWarning',
'InvalidCRSCodeError',
'MixedBandMetadataError',
'NonUniformCoordsError',
'RotatedTransformError',
'UnparseableCRSError',
'UnsafeURLError',
'open_geotiff',
'read_geotiff_gpu',
'read_geotiff_dask',
'read_vrt',
'to_geotiff',
'write_geotiff_gpu',
'write_vrt',
]
def _read_geo_info(source, *, overview_level: int | None = None):
"""Read only the geographic metadata and image dimensions from a GeoTIFF.
Returns (geo_info, height, width, dtype, n_bands) without reading pixel
data. Uses mmap for header-only access on string paths; for file-like
inputs it reads the bytes directly. O(1) memory regardless of file size
when a path is supplied.
Parameters
----------
source : str or binary file-like
Path or any object with ``read``/``seek``.
overview_level : int or None
Overview IFD index (0 = full resolution).
"""
from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy
from ._geotags import extract_geo_info_with_overview_inheritance
from ._header import parse_all_ifds, parse_header, select_overview_ifd
from ._reader import (
_CloudSource, _coerce_path, _is_file_like, _is_fsspec_uri,
_parse_cog_http_meta,
)
from ._validation import _validate_predictor_sample_format
source = _coerce_path(source)
if isinstance(source, str) and _is_fsspec_uri(source):
# fsspec URI (s3://, gs://, az://, memory://, ...): use the
# bounded-prefetch metadata parser instead of downloading the
# full remote object. ``_parse_cog_http_meta`` only needs
# ``read_range`` on the source, which ``_CloudSource`` provides;
# it grows a small range buffer until the IFD chain resolves
# (capped by ``MAX_HTTP_HEADER_BYTES``). Avoids the
# whole-file fetch that would otherwise happen on every
# ``open_geotiff(..., chunks=...)`` graph build for a large COG.
_src = _CloudSource(source)
try:
_header, _ifd, geo_info, _ = _parse_cog_http_meta(
_src, overview_level=overview_level)
finally:
_src.close()
bps = resolve_bits_per_sample(_ifd.bits_per_sample)
file_dtype = tiff_dtype_to_numpy(bps, _ifd.sample_format)
_validate_predictor_sample_format(_ifd.predictor, _ifd.sample_format)
n_bands = (
_ifd.samples_per_pixel if _ifd.samples_per_pixel > 1 else 0
)
# Stash photometric + samples_per_pixel so the dask graph builder
# can detect MinIsWhite and invert ``geo_info.nodata`` before
# binding it into the chunk closure (#1809).
geo_info._ifd_photometric = _ifd.photometric
geo_info._ifd_samples_per_pixel = _ifd.samples_per_pixel
return geo_info, _ifd.height, _ifd.width, file_dtype, n_bands
if _is_file_like(source):
# File-like: read its full bytes; we don't try to mmap arbitrary
# buffers because they may not back a real file descriptor.
try:
cur = source.tell()
except (OSError, AttributeError):
cur = 0
source.seek(0)
data = source.read()
try:
source.seek(cur)
except (OSError, AttributeError):
pass
close_data = False
elif isinstance(source, str):
with open(source, 'rb') as f:
import mmap
data = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
close_data = True
else:
raise TypeError(
"source must be a str path or binary file-like, "
f"got {type(source).__name__}")
try:
header = parse_header(data)
ifds = parse_all_ifds(data, header)
if not ifds:
raise ValueError("No IFDs found in TIFF file")
ifd = select_overview_ifd(ifds, overview_level)
# Inherit georef from the level-0 IFD when the overview itself
# has no geokeys (issue #1640). Pass-through for level 0.
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order)
bps = resolve_bits_per_sample(ifd.bits_per_sample)
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
_validate_predictor_sample_format(ifd.predictor, ifd.sample_format)
n_bands = ifd.samples_per_pixel if ifd.samples_per_pixel > 1 else 0
# Stash photometric + samples_per_pixel so the dask graph builder
# can detect MinIsWhite and invert ``geo_info.nodata`` before
# binding it into the chunk closure (#1809).
geo_info._ifd_photometric = ifd.photometric
geo_info._ifd_samples_per_pixel = ifd.samples_per_pixel
return geo_info, ifd.height, ifd.width, file_dtype, n_bands
finally:
if close_data:
data.close()
def open_geotiff(source: str | BinaryIO, *,
dtype: str | np.dtype | None = None,
window: tuple | None = None,
overview_level: int | None = None,
band: int | None = None,
name: str | None = None,
chunks: int | tuple | None = None,
gpu: bool = False,
max_pixels: int | None = None,
max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL,
on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL,
missing_sources: str = _MISSING_SOURCES_SENTINEL,
) -> xr.DataArray:
"""Read a GeoTIFF, COG, or VRT file into an xarray.DataArray.
Automatically dispatches to the best backend:
- ``gpu=True``: GPU-accelerated read via nvCOMP (returns CuPy)
- ``chunks=N``: Dask lazy read via windowed chunks
- ``gpu=True, chunks=N``: Dask+CuPy for out-of-core GPU pipelines
- Default: NumPy eager read
VRT files are auto-detected by extension.
Parameters
----------
source : str or binary file-like
File path, HTTP URL, cloud URI (s3://, gs://, az://), or a
binary file-like object (e.g. ``io.BytesIO``) with read+seek.
VRT, dask-chunked, GPU, and remote-URL paths require a string;
in-memory file-like buffers go through the eager numpy reader.
dtype : str, numpy.dtype, or None
Cast the result to this dtype after reading. None keeps the
file's native dtype. Float-to-int casts raise ValueError to
prevent accidental data loss.
window : tuple or None
(row_start, col_start, row_stop, col_stop) for windowed reading.
overview_level : int or None
Overview level (0 = full resolution).
band : int or None
Band index (0-based). None returns all bands.
name : str or None
Name for the DataArray.
chunks : int, tuple, or None
Chunk size for Dask lazy reading.
gpu : bool
Use GPU-accelerated decompression (requires cupy + nvCOMP).
max_pixels : int or None
Maximum allowed pixel count (width * height * samples). None
uses the default (~1 billion). Raise to read legitimately
large files.
max_cloud_bytes : int or None, optional
Byte ceiling for eager reads from fsspec sources (``s3://``,
``gs://``, ``az://``, ``abfs://``, ``memory://``, ...). The
compressed object size is checked against this budget before
any bytes are downloaded. Default is 256 MiB, overridable via
the ``XRSPATIAL_GEOTIFF_MAX_CLOUD_BYTES`` env var. Pass
``None`` to skip the check entirely. The HTTP path already
reads only what it needs via range requests and is not subject
to this limit. Has no effect on local file or file-like
sources. Passing this kwarg with ``gpu=True``, ``chunks=...``,
or a ``.vrt`` source raises ``ValueError`` because those
backends do not apply the cloud-byte budget. See issue #1928
(eager path) and issue #1974 (rejection guard).
on_gpu_failure : {'auto', 'strict'}, optional
Forwarded to ``read_geotiff_gpu`` when ``gpu=True``. Controls
whether GPU decode failures fall back to CPU (``'auto'``,
default) or re-raise the original exception (``'strict'``).
Passing this kwarg with ``gpu=False`` raises ``ValueError``
because the policy only applies to the GPU pipeline. See
``read_geotiff_gpu`` for the full description.
missing_sources : {'raise', 'warn'}, optional
Forwarded to ``read_vrt`` when the source is a ``.vrt`` file.
When the caller does not pass this kwarg, the public
``read_vrt`` default applies (``'raise'`` since #1860).
``'raise'`` fails immediately on an unreadable backing source.
``'warn'`` is the opt-in lenient mode: emit
``GeoTIFFFallbackWarning``, record ``attrs['vrt_holes']``, and
return a partial mosaic. Passing this kwarg with a non-VRT
source raises ``ValueError`` because the policy only applies to
the VRT pipeline. See ``read_vrt`` for the full description.
Returns
-------
xr.DataArray
NumPy, Dask, CuPy, or Dask+CuPy backed depending on options.
Notes
-----
The CRS is stored as an int EPSG code in ``attrs['crs']`` whenever the
file's GeoKeys carry a recognized EPSG. Files whose CRS can only be
expressed as WKT keep the WKT in ``attrs['crs_wkt']`` and leave
``attrs['crs']`` unset. ``to_geotiff`` accepts either an int EPSG or a
WKT string in ``attrs['crs']`` for backward compatibility.
The file's GeoTransform is also surfaced as ``attrs['transform']``,
a rasterio-style 6-tuple
``(pixel_width, 0, origin_x, 0, pixel_height, origin_y)``. ``to_geotiff``
uses this attr verbatim when present, falling back to recomputing the
transform from the y/x coord arrays only when it is missing. The attr
is what makes write -> read -> write -> read round-trips bit-stable for
rasters with fractional pixel sizes or origins.
Integer rasters with a nodata sentinel are silently promoted to
``float64`` with NaN replacing the sentinel so downstream NaN-aware
code works uniformly. Pass ``dtype=...`` to keep the source dtype
(the cast will fail with ``ValueError`` for float-to-int because that
is lossy in a way users rarely intend; cast explicitly after read if
you need it).
"""
from ._reader import _coerce_path
source = _coerce_path(source)
# ``on_gpu_failure`` is GPU-only. Reject it up front for CPU/dask paths
# rather than silently dropping it once dispatch is decided -- callers
# otherwise have no way to learn that the policy is being ignored.
# ``gpu=False`` (the default) on a ``.vrt`` source still routes through
# ``read_vrt`` below which has no GPU-failure concept, so the same
# rejection rule applies there.
if on_gpu_failure is not _ON_GPU_FAILURE_SENTINEL and not gpu:
raise ValueError(
"on_gpu_failure only applies when gpu=True. "
"Pass gpu=True to enable the GPU pipeline, or drop "
"on_gpu_failure to keep the default CPU path.")
# ``missing_sources`` is VRT-only. Reject it up front when the source
# is not a ``.vrt`` file so callers learn the policy is being ignored
# instead of getting a silent drop -- same pattern ``on_gpu_failure``
# uses above for the GPU-only kwarg, and the same class of dispatcher
# silently-drops-backend-kwarg bug #1561 / #1605 / #1685 / #1795 fixed
# for the other VRT/GPU kwargs. See issue #1810.
missing_sources_passed = (
missing_sources is not _MISSING_SOURCES_SENTINEL)
_is_vrt_source = (
isinstance(source, str) and source.lower().endswith('.vrt'))
if missing_sources_passed and not _is_vrt_source:
raise ValueError(
"missing_sources only applies to VRT sources. "
"Pass a .vrt path to enable the VRT pipeline, or drop "
"missing_sources to keep the default GeoTIFF path.")
# ``max_cloud_bytes`` is the eager fsspec-read budget. Only
# ``_read_to_array`` on the eager non-VRT, non-GPU, non-dask branch
# consumes it; the GPU (``read_geotiff_gpu``), dask
# (``read_geotiff_dask``), and VRT (``read_vrt``) branches all ignore
# the kwarg silently. Reject it up front on those paths so callers
# learn the budget is being dropped, matching the
# ``on_gpu_failure`` / ``missing_sources`` guards above and the
# silently-drops-backend-kwarg fixes in #1561 / #1605 / #1685 / #1810.
# See issue #1974.
if max_cloud_bytes is not _MAX_CLOUD_BYTES_SENTINEL:
if _is_vrt_source:
raise ValueError(
"max_cloud_bytes is not supported for VRT sources. "
"The VRT reader does not apply the cloud-byte budget; "
"drop the kwarg, or call open_geotiff on the underlying "
".tif source.")
if gpu:
raise ValueError(
"max_cloud_bytes is not supported when gpu=True. "
"The GPU reader does not apply the cloud-byte budget; "
"drop the kwarg, or pass gpu=False to use the eager "
"CPU path.")
if chunks is not None:
raise ValueError(
"max_cloud_bytes is not supported when chunks=... (dask). "
"The dask reader does not apply the cloud-byte budget; "
"drop the kwarg, or drop chunks to use the eager path.")
# VRT files (string paths only -- VRT XML references other files on disk)
if _is_vrt_source:
# ``read_vrt`` does not accept ``overview_level`` (the VRT XML
# references its own source files; overview selection would need
# to apply to each one). Silently dropping the kwarg was the same
# class of bug issue #1561 fixed for the dask and GPU dispatchers,
# so refuse the combination up front rather than handing the
# caller a full-resolution mosaic with no warning. See issue #1685.
# ``overview_level=0`` is documented as "full resolution" (the
# default), so treat it as a no-op the same as ``None`` rather
# than rejecting a kwarg value the caller could have omitted.
if overview_level not in (None, 0):
raise ValueError(
"overview_level is not supported for VRT sources. "
"VRT references its own source files; pass overview_level "
"to open_geotiff on a .tif source, or drop the kwarg.")
# ``on_gpu_failure`` only routes through ``read_geotiff_gpu``.
# ``read_vrt`` has no analogous failure policy, so any value the
# caller supplied alongside a VRT source would be silently lost.
# The ``gpu=False`` branch is already rejected above; this catches
# the ``gpu=True, source.endswith('.vrt')`` case the earlier check
# lets through.
if on_gpu_failure is not _ON_GPU_FAILURE_SENTINEL:
raise ValueError(
"on_gpu_failure is not supported for VRT sources. "
"VRT reads do not go through the GPU decoder pipeline; "
"drop the kwarg or call read_geotiff_gpu directly on a "
".tif source.")
vrt_kwargs = {}
if missing_sources_passed:
vrt_kwargs['missing_sources'] = missing_sources
return read_vrt(source, dtype=dtype, window=window, band=band,
name=name, chunks=chunks, gpu=gpu,
max_pixels=max_pixels, **vrt_kwargs)
# File-like buffers don't support the GPU or dask code paths because
# those re-open the source by path from worker tasks or device-side
# readers. Reject early with a clear message.
if not isinstance(source, str):
if gpu:
raise ValueError(
"gpu=True is not supported for file-like sources. "
"Pass a path string instead.")
if chunks is not None:
raise ValueError(
"chunks=... (dask) is not supported for file-like sources. "
"Pass a path string instead.")
# GPU path
if gpu:
gpu_kwargs = {}
if on_gpu_failure is not _ON_GPU_FAILURE_SENTINEL:
gpu_kwargs['on_gpu_failure'] = on_gpu_failure
return read_geotiff_gpu(source, dtype=dtype,
overview_level=overview_level,
window=window, band=band,
name=name, chunks=chunks,
max_pixels=max_pixels,
**gpu_kwargs)
# Dask path (CPU)
if chunks is not None:
return read_geotiff_dask(source, dtype=dtype, chunks=chunks,
overview_level=overview_level,
window=window, band=band,
max_pixels=max_pixels, name=name)
kwargs = {}
if max_pixels is not None:
kwargs['max_pixels'] = max_pixels
if max_cloud_bytes is not _MAX_CLOUD_BYTES_SENTINEL:
kwargs['max_cloud_bytes'] = max_cloud_bytes
# ``read_to_array`` validates ``window`` against the selected IFD's
# extent and raises ``ValueError`` for out-of-bounds windows with
# the same message format as the dask path's pre-flight validator
# in :func:`read_geotiff_dask`. That keeps the two backends in sync
# on the contract without forcing a second metadata parse here. See
# issue #1634.
arr, geo_info = _read_to_array(
source, window=window,
overview_level=overview_level, band=band,
**kwargs,
)
height, width = arr.shape[:2]
coords = _geo_to_coords(geo_info, height, width)
if window is not None:
# Adjust coordinates for windowed read. ``read_to_array`` rejected
# out-of-bounds windows above, so ``r0/c0/r1/c1`` are guaranteed
# in-range here (no clamp needed). For files with no GeoTIFF
# tags (``has_georef=False``), the default unit transform is
# not real, so fall back to integer pixel coords matching the
# ``_geo_to_coords`` shortcut taken on full reads. See #1710.
coords = _coords_from_geo_info(
geo_info, height, width, window=window,
)
if name is None:
# Derive from source path. File-like buffers don't have a path,
# so leave name unset rather than fabricating one.
if isinstance(source, str):
import os
name = os.path.splitext(os.path.basename(source))[0]
attrs = {}
_populate_attrs_from_geo_info(attrs, geo_info, window=window)
# Apply nodata mask: replace nodata sentinel values with NaN.
# ``arr`` came from ``read_to_array``, which returns a freshly
# allocated buffer from ``_read_tiles`` / ``_read_strips`` (and is
# ``np.ascontiguousarray``-wrapped if the orientation tag triggered
# a remap). Nothing else holds a reference here, so the in-place
# write is safe; an extra ``arr.copy()`` would just double peak
# memory for a multi-MB raster.
nodata = geo_info.nodata
if nodata is not None:
attrs['nodata'] = nodata
# When the reader applied MinIsWhite, the sentinel-equality mask
# must compare against the inverted sentinel value (issue #1809).
# ``read_to_array`` / ``_read_cog_http`` stash that value on
# ``geo_info._mask_nodata``; fall back to the original sentinel
# on non-MinIsWhite files.
mask_nodata = getattr(geo_info, '_mask_nodata', nodata)
if arr.dtype.kind == 'f':
if mask_nodata is not None and not np.isnan(mask_nodata):
arr[arr == arr.dtype.type(mask_nodata)] = np.nan
elif arr.dtype.kind in ('u', 'i'):
# Integer arrays: convert to float to represent NaN.
# An out-of-range sentinel (e.g. uint16 file with
# GDAL_NODATA="-9999") cannot match any decoded pixel, so the
# mask would be all-False -- skip the cast that would otherwise
# raise OverflowError and leave the array unchanged. A
# non-finite sentinel ("NaN" / "Inf" GDAL_NODATA strings) also
# cannot match an integer pixel, so the ``int(nodata)`` cast
# below would raise ValueError; gate on ``np.isfinite`` first
# to mirror ``_resolve_masked_fill`` / ``_sparse_fill_value``
# in ``_reader.py`` (#1774). A fractional sentinel (e.g.
# ``GDAL_NODATA="3.5"`` on a ``uint16`` file) also cannot match
# an integer pixel; ``int(3.5)`` would truncate to 3 and
# silently mask a real pixel value, so gate on
# ``float(nodata).is_integer()`` as well (mirrors the
# ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616).
# attrs['nodata'] still carries the raw sentinel so a write
# round-trip preserves the tag.
if (mask_nodata is not None
and np.isfinite(mask_nodata)
and float(mask_nodata).is_integer()):
nodata_int = int(mask_nodata)
info = np.iinfo(arr.dtype)
if info.min <= nodata_int <= info.max:
mask = arr == arr.dtype.type(nodata_int)
if mask.any():
arr = arr.astype(np.float64)
arr[mask] = np.nan
if dtype is not None:
target = np.dtype(dtype)
_validate_dtype_cast(arr.dtype, target)
arr = arr.astype(target)
if arr.ndim == 3:
dims = ['y', 'x', 'band']
coords['band'] = np.arange(arr.shape[2])
else:
dims = ['y', 'x']
da = xr.DataArray(
arr,
dims=dims,
coords=coords,
name=name,
attrs=attrs,
)
return da
def plot_geotiff(da: xr.DataArray, **kwargs):
"""Plot a DataArray using its embedded colormap if present.
.. deprecated:: 0.10.0
Use ``da.xrs.plot()`` instead. ``plot_geotiff`` is a thin wrapper
kept for backward compatibility and will be removed in a future
release.
"""
warnings.warn(
"plot_geotiff is deprecated and will be removed in a future "
"release. Use ``da.xrs.plot()`` instead.",
DeprecationWarning,
stacklevel=2,
)
return da.xrs.plot(**kwargs)