@@ -308,11 +308,13 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
308308 sets it next to its own nodata-masking step (the value's presence in
309309 attrs signals "this array has been NaN-masked").
310310
311- ``window`` is a ``(r0, c0, r1, c1)`` tuple for the eager windowed
312- read; when set, the emitted ``attrs['transform']`` shifts the origin
313- to the window's top-left. The dask and GPU paths do not use this --
314- their windows are per-chunk inside the graph, not on the outer
315- DataArray.
311+ ``window`` is a ``(r0, c0, r1, c1)`` tuple for windowed reads; when
312+ set, the emitted ``attrs['transform']`` shifts the origin to the
313+ window's top-left. The eager path and the dask path (since #1561,
314+ which threads ``window=`` through ``read_geotiff_dask``) both pass
315+ the outer window through this helper so the resulting DataArray
316+ advertises the windowed transform. The GPU path does not currently
317+ expose a windowed read, so it passes ``window=None``.
316318 """
317319 if geo_info .crs_epsg is not None :
318320 attrs ['crs' ] = geo_info .crs_epsg
@@ -502,7 +504,9 @@ def open_geotiff(source, *, dtype=None, window=None,
502504 # Dask path (CPU)
503505 if chunks is not None :
504506 return read_geotiff_dask (source , dtype = dtype , chunks = chunks ,
505- overview_level = overview_level , name = name )
507+ overview_level = overview_level ,
508+ window = window , band = band ,
509+ max_pixels = max_pixels , name = name )
506510
507511 kwargs = {}
508512 if max_pixels is not None :
@@ -978,15 +982,24 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path, *,
978982 "max_z_error is not supported on the GPU writer "
979983 "(nvCOMP has no LERC backend). Use the CPU path "
980984 "(gpu=False) or omit max_z_error." )
985+ # Strip output is not implemented on the GPU path; reject up
986+ # front rather than silently producing a tiled file.
987+ if not tiled :
988+ raise ValueError (
989+ "tiled=False is not supported on the GPU writer. "
990+ "Pass gpu=False or omit tiled=False." )
981991 try :
982992 write_geotiff_gpu (data , path , crs = crs , nodata = nodata ,
983993 compression = compression ,
984994 compression_level = compression_level ,
995+ tiled = tiled ,
985996 tile_size = tile_size ,
986997 predictor = predictor ,
987998 cog = cog ,
988999 overview_levels = overview_levels ,
989- overview_resampling = overview_resampling )
1000+ overview_resampling = overview_resampling ,
1001+ bigtiff = bigtiff ,
1002+ streaming_buffer_bytes = streaming_buffer_bytes )
9901003 return
9911004 except (ImportError , Exception ):
9921005 pass # fall through to CPU path
@@ -1393,6 +1406,9 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
13931406
13941407def read_geotiff_dask (source : str , * , dtype = None , chunks : int | tuple = 512 ,
13951408 overview_level : int | None = None ,
1409+ window : tuple | None = None ,
1410+ band : int | None = None ,
1411+ max_pixels : int | None = None ,
13961412 name : str | None = None ) -> xr .DataArray :
13971413 """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing.
13981414
@@ -1409,6 +1425,21 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
14091425 Chunk size in pixels. Default 512.
14101426 overview_level : int or None
14111427 Overview level (0 = full resolution).
1428+ window : tuple or None
1429+ ``(row_start, col_start, row_stop, col_stop)`` to restrict
1430+ chunking to a sub-region of the file. Chunks are laid out
1431+ relative to the window origin. None reads the full raster.
1432+ band : int or None
1433+ Zero-based band index. None returns all bands (3D for
1434+ multi-band files, 2D for single-band). Selecting a single band
1435+ produces a 2D DataArray.
1436+ max_pixels : int or None
1437+ Maximum allowed pixel count (width * height * samples) for the
1438+ windowed region. None uses the reader default (~1 billion).
1439+ The cap is checked once up-front against the lazy region; each
1440+ chunk task also re-checks against ``max_pixels`` so windowed
1441+ reads stay bounded even when ``read_to_array`` is invoked
1442+ directly.
14121443 name : str or None
14131444 Name for the DataArray.
14141445
@@ -1492,14 +1523,68 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
14921523 else :
14931524 target_dtype = effective_dtype
14941525
1495- coords = _geo_to_coords (geo_info , full_h , full_w )
1526+ # Window clipping: restrict the lazy region to the requested
1527+ # sub-rectangle. ``read_to_array`` already accepts ``window=`` per
1528+ # chunk; we only need to remap the chunk grid so its origin moves to
1529+ # ``(win_r0, win_c0)`` and its extent shrinks to the window.
1530+ win_r0 = win_c0 = 0
1531+ if window is not None :
1532+ win_r0 , win_c0 , win_r1 , win_c1 = window
1533+ if (win_r0 < 0 or win_c0 < 0
1534+ or win_r1 > full_h or win_c1 > full_w
1535+ or win_r0 >= win_r1 or win_c0 >= win_c1 ):
1536+ raise ValueError (
1537+ f"window={ window } is outside the source extent "
1538+ f"({ full_h } x{ full_w } ) or has non-positive size." )
1539+ # Mirror the eager-path windowed coord computation in open_geotiff.
1540+ t = geo_info .transform
1541+ if geo_info .raster_type == RASTER_PIXEL_IS_POINT :
1542+ win_x = (np .arange (win_c0 , win_c1 , dtype = np .float64 )
1543+ * t .pixel_width + t .origin_x )
1544+ win_y = (np .arange (win_r0 , win_r1 , dtype = np .float64 )
1545+ * t .pixel_height + t .origin_y )
1546+ else :
1547+ win_x = (np .arange (win_c0 , win_c1 , dtype = np .float64 )
1548+ * t .pixel_width + t .origin_x
1549+ + t .pixel_width * 0.5 )
1550+ win_y = (np .arange (win_r0 , win_r1 , dtype = np .float64 )
1551+ * t .pixel_height + t .origin_y
1552+ + t .pixel_height * 0.5 )
1553+ coords = {'y' : win_y , 'x' : win_x }
1554+ full_h = win_r1 - win_r0
1555+ full_w = win_c1 - win_c0
1556+ else :
1557+ coords = _geo_to_coords (geo_info , full_h , full_w )
1558+
1559+ if band is not None :
1560+ if n_bands == 0 :
1561+ if band != 0 :
1562+ raise IndexError (
1563+ f"band={ band } requested on a single-band file." )
1564+ elif not 0 <= band < n_bands :
1565+ raise IndexError (
1566+ f"band={ band } out of range for { n_bands } -band file." )
1567+
1568+ # Up-front pixel-count guard against the windowed extent. Chunk
1569+ # tasks re-check via read_to_array's own ``max_pixels`` (which we
1570+ # forward through ``_delayed_read_window``), but catching an
1571+ # oversized request before any task is scheduled saves the caller
1572+ # from a misleading "tile size exceeds max_pixels" error in a
1573+ # chunk that happens to align with the file's tile grid.
1574+ if max_pixels is not None :
1575+ eff_bands = (1 if band is not None
1576+ else (n_bands if n_bands > 0 else 1 ))
1577+ if full_h * full_w * eff_bands > max_pixels :
1578+ raise ValueError (
1579+ f"Requested region { full_h } x{ full_w } x{ eff_bands } "
1580+ f"exceeds max_pixels={ max_pixels :,} ." )
14961581
14971582 if name is None :
14981583 import os
14991584 name = os .path .splitext (os .path .basename (source ))[0 ]
15001585
15011586 attrs = {}
1502- _populate_attrs_from_geo_info (attrs , geo_info )
1587+ _populate_attrs_from_geo_info (attrs , geo_info , window = window )
15031588 if nodata is not None :
15041589 attrs ['nodata' ] = nodata
15051590
@@ -1536,24 +1621,35 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
15361621
15371622 # For multi-band, each window read returns (h, w, bands); for single-band (h, w)
15381623 # read_to_array with band=0 extracts a single band, band=None returns all
1539- band_arg = None # return all bands (or 2D if single-band)
1624+ band_arg = band # None => all bands (or 2D for single-band file)
1625+
1626+ # When ``band`` is set, each chunk reads a 2D slice -- collapse the
1627+ # output dims so the returned DataArray is 2D regardless of file band
1628+ # count.
1629+ out_has_band_axis = band is None and n_bands > 0
15401630
15411631 dask_rows = []
15421632 for r0 in rows :
15431633 r1 = min (r0 + ch_h , full_h )
15441634 dask_cols = []
15451635 for c0 in cols :
15461636 c1 = min (c0 + ch_w , full_w )
1547- if n_bands > 0 :
1637+ if out_has_band_axis :
15481638 block_shape = (r1 - r0 , c1 - c0 , n_bands )
15491639 else :
15501640 block_shape = (r1 - r0 , c1 - c0 )
1641+ # Translate window-relative chunk coords back to file-relative
1642+ # coords for ``read_to_array``. ``win_r0`` / ``win_c0`` are 0
1643+ # when no window was requested.
15511644 block = da .from_delayed (
1552- _delayed_read_window (source , r0 , c0 , r1 , c1 ,
1645+ _delayed_read_window (source ,
1646+ r0 + win_r0 , c0 + win_c0 ,
1647+ r1 + win_r0 , c1 + win_c0 ,
15531648 overview_level , nodata ,
15541649 band_arg ,
15551650 target_dtype = target_dtype if dtype is not None else None ,
1556- http_meta_key = http_meta_key ),
1651+ http_meta_key = http_meta_key ,
1652+ max_pixels = max_pixels ),
15571653 shape = block_shape ,
15581654 dtype = target_dtype ,
15591655 )
@@ -1562,7 +1658,7 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
15621658
15631659 dask_arr = da .concatenate (dask_rows , axis = 0 )
15641660
1565- if n_bands > 0 :
1661+ if out_has_band_axis :
15661662 dims = ['y' , 'x' , 'band' ]
15671663 coords ['band' ] = np .arange (n_bands )
15681664 else :
@@ -1574,7 +1670,8 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
15741670
15751671
15761672def _delayed_read_window (source , r0 , c0 , r1 , c1 , overview_level , nodata ,
1577- band , * , target_dtype = None , http_meta_key = None ):
1673+ band , * , target_dtype = None , http_meta_key = None ,
1674+ max_pixels = None ):
15781675 """Dask-delayed function to read a single window.
15791676
15801677 *http_meta_key* is an optional ``Delayed[(TIFFHeader, IFD)]`` parsed
@@ -1590,21 +1687,30 @@ def _delayed_read_window(source, r0, c0, r1, c1, overview_level, nodata,
15901687 def _read (http_meta ):
15911688 if http_meta is not None and isinstance (source , str ) and \
15921689 source .startswith (('http://' , 'https://' )):
1593- from ._reader import _HTTPSource , _fetch_decode_cog_http_tiles
1690+ from ._reader import (
1691+ _HTTPSource , _fetch_decode_cog_http_tiles ,
1692+ MAX_PIXELS_DEFAULT ,
1693+ )
15941694 header , ifd = http_meta
15951695 src = _HTTPSource (source )
15961696 try :
15971697 arr = _fetch_decode_cog_http_tiles (
1598- src , header , ifd , window = (r0 , c0 , r1 , c1 ))
1698+ src , header , ifd ,
1699+ max_pixels = (max_pixels if max_pixels is not None
1700+ else MAX_PIXELS_DEFAULT ),
1701+ window = (r0 , c0 , r1 , c1 ))
15991702 finally :
16001703 src .close ()
16011704 if (arr .ndim == 3 and ifd .samples_per_pixel > 1
16021705 and band is not None ):
16031706 arr = arr [:, :, band ]
16041707 else :
1708+ _r2a_kwargs = {}
1709+ if max_pixels is not None :
1710+ _r2a_kwargs ['max_pixels' ] = max_pixels
16051711 arr , _ = read_to_array (source , window = (r0 , c0 , r1 , c1 ),
16061712 overview_level = overview_level ,
1607- band = band )
1713+ band = band , ** _r2a_kwargs )
16081714 if nodata is not None :
16091715 # ``arr`` was just decoded by ``_fetch_decode_cog_http_tiles``
16101716 # or ``read_to_array``; both return freshly-allocated buffers
@@ -2270,11 +2376,15 @@ def write_geotiff_gpu(data, path: str, *,
22702376 nodata = None ,
22712377 compression : str = 'zstd' ,
22722378 compression_level : int | None = None ,
2379+ tiled : bool = True ,
22732380 tile_size : int = 256 ,
22742381 predictor : bool | int = False ,
22752382 cog : bool = False ,
22762383 overview_levels : list [int ] | None = None ,
2277- overview_resampling : str = 'mean' ) -> None :
2384+ overview_resampling : str = 'mean' ,
2385+ bigtiff : bool | None = None ,
2386+ max_z_error : float = 0.0 ,
2387+ streaming_buffer_bytes : int | None = None ) -> None :
22782388 """Write a CuPy-backed DataArray as a GeoTIFF with GPU compression.
22792389
22802390 Tiles are extracted and compressed on the GPU via nvCOMP, then
@@ -2304,6 +2414,12 @@ def write_geotiff_gpu(data, path: str, *,
23042414 compression_level : int or None
23052415 Compression effort level. Accepted for API compatibility but
23062416 currently ignored -- nvCOMP does not expose level control.
2417+ tiled : bool
2418+ Must be True (default). The GPU writer is tiled-only because
2419+ nvCOMP batch compression operates on per-tile streams; passing
2420+ ``tiled=False`` raises ``ValueError`` rather than silently
2421+ producing a tiled file. Accepted for API parity with
2422+ ``to_geotiff``.
23072423 tile_size : int
23082424 Tile size in pixels (default 256).
23092425 predictor : bool or int
@@ -2319,7 +2435,37 @@ def write_geotiff_gpu(data, path: str, *,
23192435 overview_resampling : str
23202436 Resampling method for overviews: 'mean' (default), 'nearest',
23212437 'min', 'max', 'median', or 'mode'.
2438+ bigtiff : bool or None
2439+ Force BigTIFF (64-bit offsets). None auto-promotes when the
2440+ estimated file size would exceed the classic-TIFF 4 GB limit.
2441+ max_z_error : float
2442+ Per-pixel error budget for LERC compression. The GPU writer
2443+ does not implement LERC (nvCOMP has no LERC backend), so any
2444+ non-zero value raises ``ValueError``. Accepted at the signature
2445+ level for API parity with ``to_geotiff``.
2446+ streaming_buffer_bytes : int or None
2447+ Accepted for API parity with ``to_geotiff``. The GPU writer
2448+ materialises the entire array on device and has no streaming
2449+ concept, so this kwarg is a no-op.
23222450 """
2451+ if not tiled :
2452+ raise ValueError (
2453+ "write_geotiff_gpu requires tiled=True. nvCOMP batch "
2454+ "compression is tile-based; the strip layout is not "
2455+ "implemented on the GPU path. Use to_geotiff(..., gpu=False, "
2456+ "tiled=False) for strip output on CPU." )
2457+ if max_z_error < 0 :
2458+ raise ValueError (
2459+ f"max_z_error must be >= 0, got { max_z_error } " )
2460+ if max_z_error != 0 :
2461+ raise ValueError (
2462+ "max_z_error is not supported on the GPU writer "
2463+ "(nvCOMP has no LERC backend). Use to_geotiff(..., gpu=False) "
2464+ "or omit max_z_error." )
2465+ # streaming_buffer_bytes is intentionally a no-op on the GPU path;
2466+ # the kwarg exists for API parity with to_geotiff so callers can pass
2467+ # the same kwargs to both entry points without filtering.
2468+ del streaming_buffer_bytes
23232469 try :
23242470 import cupy
23252471 except ImportError :
@@ -2488,6 +2634,7 @@ def _gpu_compress_to_part(gpu_arr, w, h, spp):
24882634 x_resolution = x_res ,
24892635 y_resolution = y_res ,
24902636 resolution_unit = res_unit ,
2637+ force_bigtiff = bigtiff ,
24912638 )
24922639
24932640 _write_bytes (file_bytes , path )
0 commit comments