@@ -498,6 +498,7 @@ def open_geotiff(source, *, dtype=None, window=None,
498498 if gpu :
499499 return read_geotiff_gpu (source , dtype = dtype ,
500500 overview_level = overview_level ,
501+ window = window , band = band ,
501502 name = name , chunks = chunks ,
502503 max_pixels = max_pixels )
503504
@@ -1933,9 +1934,70 @@ def _apply_orientation_geo_info(geo_info, orientation: int,
19331934 return geo_info
19341935
19351936
1937+ def _gpu_apply_window_band (arr_gpu , geo_info , * , window , band ):
1938+ """Slice a fully-decoded GPU array down to a window and/or band.
1939+
1940+ Used by ``read_geotiff_gpu`` to keep the public surface in line with
1941+ ``open_geotiff`` and ``read_geotiff_dask``: callers can pass ``window``
1942+ and ``band``, and the returned DataArray covers exactly that subset.
1943+
1944+ The current implementation slices on device after the full-image GPU
1945+ decode is complete. That preserves correctness but does no I/O
1946+ savings -- a future PR can short-circuit tile decode for partial
1947+ windows. For ``band`` selection, the savings are also post-decode
1948+ because the planar=1 (chunky) tile assembly returns all bands in a
1949+ single GPU buffer.
1950+
1951+ Returns ``(arr_gpu, coords)`` where ``coords`` is a dict with
1952+ ``y`` / ``x`` numpy arrays sized to the output array. The caller is
1953+ responsible for setting ``attrs['transform']`` to the windowed origin
1954+ via ``_populate_attrs_from_geo_info(..., window=window)`` so the array
1955+ and the transform agree.
1956+ """
1957+ if window is not None :
1958+ r0 , c0 , r1 , c1 = window
1959+ arr_gpu = arr_gpu [r0 :r1 , c0 :c1 ]
1960+ out_h = r1 - r0
1961+ out_w = c1 - c0
1962+ # Mirror the eager-numpy windowed coord computation in
1963+ # open_geotiff so the GPU-windowed coords carry the same
1964+ # absolute pixel-center values as the CPU path.
1965+ t = geo_info .transform
1966+ if t is None :
1967+ coords = {
1968+ 'y' : np .arange (out_h , dtype = np .int64 ),
1969+ 'x' : np .arange (out_w , dtype = np .int64 ),
1970+ }
1971+ else :
1972+ if geo_info .raster_type == RASTER_PIXEL_IS_POINT :
1973+ full_x = (np .arange (c0 , c1 , dtype = np .float64 )
1974+ * t .pixel_width + t .origin_x )
1975+ full_y = (np .arange (r0 , r1 , dtype = np .float64 )
1976+ * t .pixel_height + t .origin_y )
1977+ else :
1978+ full_x = (np .arange (c0 , c1 , dtype = np .float64 )
1979+ * t .pixel_width + t .origin_x
1980+ + t .pixel_width * 0.5 )
1981+ full_y = (np .arange (r0 , r1 , dtype = np .float64 )
1982+ * t .pixel_height + t .origin_y
1983+ + t .pixel_height * 0.5 )
1984+ coords = {'y' : full_y , 'x' : full_x }
1985+ else :
1986+ out_h = arr_gpu .shape [0 ]
1987+ out_w = arr_gpu .shape [1 ]
1988+ coords = _geo_to_coords (geo_info , out_h , out_w )
1989+
1990+ if band is not None and arr_gpu .ndim == 3 :
1991+ arr_gpu = arr_gpu [:, :, band ]
1992+
1993+ return arr_gpu , coords
1994+
1995+
19361996def read_geotiff_gpu (source : str , * ,
19371997 dtype = None ,
19381998 overview_level : int | None = None ,
1999+ window : tuple | None = None ,
2000+ band : int | None = None ,
19392001 name : str | None = None ,
19402002 chunks : int | tuple | None = None ,
19412003 max_pixels : int | None = None ,
@@ -1958,6 +2020,18 @@ def read_geotiff_gpu(source: str, *,
19582020 File path.
19592021 overview_level : int or None
19602022 Overview level (0 = full resolution).
2023+ window : tuple or None
2024+ ``(row_start, col_start, row_stop, col_stop)`` for windowed
2025+ reading. None reads the full raster. The GPU pipeline currently
2026+ decodes all tiles and slices on device after assembly, so the
2027+ kwarg restores API parity with ``open_geotiff`` and
2028+ ``read_geotiff_dask`` but does not yet skip I/O for partial
2029+ windows. The returned coords, ``attrs['transform']``, and
2030+ shape match the eager numpy path.
2031+ band : int or None
2032+ Zero-based band index. None returns all bands (3D output for
2033+ multi-band files, 2D for single-band). Selecting a single band
2034+ yields a 2D DataArray.
19612035 chunks : int, tuple, or None
19622036 If set, return a Dask-chunked CuPy DataArray. int for square
19632037 chunks, (row, col) tuple for rectangular.
@@ -2051,6 +2125,19 @@ def read_geotiff_gpu(source: str, *,
20512125 if max_pixels is None :
20522126 max_pixels = MAX_PIXELS_DEFAULT
20532127
2128+ # Window basic shape check happens here; full bounds-vs-file validation
2129+ # runs after the IFD parse below so we can compare against the chosen
2130+ # overview level's actual height/width. ``band`` is similarly validated
2131+ # against ``ifd.samples_per_pixel`` after the header parse.
2132+ if window is not None :
2133+ if len (window ) != 4 :
2134+ raise ValueError (
2135+ f"window must be a 4-tuple (r0, c0, r1, c1), got { window !r} " )
2136+ w_r0 , w_c0 , w_r1 , w_c1 = window
2137+ if w_r0 >= w_r1 or w_c0 >= w_c1 or w_r0 < 0 or w_c0 < 0 :
2138+ raise ValueError (
2139+ f"window={ window } has non-positive size or negative origin." )
2140+
20542141 # Parse metadata on CPU (fast, <1ms)
20552142 src = _FileSource (source )
20562143 data = src .read_all ()
@@ -2075,6 +2162,45 @@ def read_geotiff_gpu(source: str, *,
20752162 # only need to copy the post-flip geo_info back here.
20762163 orientation = ifd .orientation
20772164
2165+ # Orientation tag (274): values 2-8 mean the stored pixel order
2166+ # differs from display order. A windowed read against a non-default
2167+ # orientation has ambiguous semantics (does the window refer to
2168+ # file pixels or display pixels?), so the CPU reader
2169+ # ``_reader.read_to_array`` rejects ``window=`` for orientation != 1.
2170+ # Mirror that here so the GPU path agrees with the CPU path and
2171+ # ``read_geotiff_dask``. Use the same error wording so the failure
2172+ # message is identical across backends.
2173+ if orientation != 1 and window is not None :
2174+ raise ValueError (
2175+ f"Orientation tag (274) is { orientation } ; windowed reads "
2176+ f"(window=...) and dask-chunked reads (chunks=...) are not "
2177+ f"supported for non-default orientation. Read the full "
2178+ f"array first, then slice."
2179+ )
2180+
2181+ # Validate band against the selected IFD's sample count.
2182+ # ``samples_per_pixel`` is at least 1 for any valid TIFF; we treat
2183+ # ``band=0`` as "first band" for single-band files too so the
2184+ # behaviour mirrors ``read_geotiff_dask``.
2185+ ifd_samples = ifd .samples_per_pixel
2186+ if band is not None :
2187+ if ifd_samples <= 1 :
2188+ if band != 0 :
2189+ raise IndexError (
2190+ f"band={ band } requested on a single-band file." )
2191+ elif not 0 <= band < ifd_samples :
2192+ raise IndexError (
2193+ f"band={ band } out of range for { ifd_samples } -band file." )
2194+
2195+ # Validate window upper bounds against the selected IFD's extent.
2196+ if window is not None :
2197+ w_r0 , w_c0 , w_r1 , w_c1 = window
2198+ ifd_h , ifd_w = ifd .height , ifd .width
2199+ if w_r1 > ifd_h or w_c1 > ifd_w :
2200+ raise ValueError (
2201+ f"window={ window } is outside the source extent "
2202+ f"({ ifd_h } x{ ifd_w } )." )
2203+
20782204 if not ifd .is_tiled :
20792205 # Fall back to CPU for stripped files. read_to_array remaps
20802206 # the array but only updates geo_info.transform for orientations
@@ -2089,12 +2215,11 @@ def read_geotiff_gpu(source: str, *,
20892215 geo_info = _apply_orientation_geo_info (
20902216 geo_info , orientation ,
20912217 file_h = ifd .height , file_w = ifd .width )
2092- coords = _geo_to_coords (geo_info , arr_gpu .shape [0 ], arr_gpu .shape [1 ])
20932218 if name is None :
20942219 import os
20952220 name = os .path .splitext (os .path .basename (source ))[0 ]
20962221 attrs = {}
2097- _populate_attrs_from_geo_info (attrs , geo_info )
2222+ _populate_attrs_from_geo_info (attrs , geo_info , window = window )
20982223 # Apply nodata mask + record sentinel so the GPU read agrees
20992224 # with the CPU eager path. Without this, integer rasters keep
21002225 # the literal sentinel value and float rasters keep the
@@ -2107,6 +2232,12 @@ def read_geotiff_gpu(source: str, *,
21072232 target = np .dtype (dtype )
21082233 _validate_dtype_cast (np .dtype (str (arr_gpu .dtype )), target )
21092234 arr_gpu = arr_gpu .astype (target )
2235+ # Apply window/band slicing post-decode. The stripped CPU
2236+ # fallback already produces the full-image array; slice on the
2237+ # GPU so the result matches ``open_geotiff`` /
2238+ # ``read_geotiff_dask`` semantics.
2239+ arr_gpu , coords = _gpu_apply_window_band (
2240+ arr_gpu , geo_info , window = window , band = band )
21102241 # Multi-band stripped reads come back as (y, x, band); mirror
21112242 # the tiled branch so dims line up with ndim. Single-band stays
21122243 # 2-D ('y', 'x').
@@ -2115,8 +2246,20 @@ def read_geotiff_gpu(source: str, *,
21152246 coords ['band' ] = np .arange (arr_gpu .shape [2 ])
21162247 else :
21172248 dims = ['y' , 'x' ]
2118- return xr .DataArray (arr_gpu , dims = dims ,
2119- coords = coords , name = name , attrs = attrs )
2249+ result = xr .DataArray (arr_gpu , dims = dims ,
2250+ coords = coords , name = name , attrs = attrs )
2251+ # ``chunks`` was previously honoured only on the tiled path,
2252+ # so stripped TIFFs returned an unchunked DataArray even when
2253+ # the caller asked for a Dask+CuPy result. Mirror the tiled
2254+ # branch's chunking step so behaviour is consistent across
2255+ # layouts.
2256+ if chunks is not None :
2257+ if isinstance (chunks , int ):
2258+ chunk_dict = {'y' : chunks , 'x' : chunks }
2259+ else :
2260+ chunk_dict = {'y' : chunks [0 ], 'x' : chunks [1 ]}
2261+ result = result .chunk (chunk_dict )
2262+ return result
21202263
21212264 offsets = ifd .tile_offsets
21222265 byte_counts = ifd .tile_byte_counts
@@ -2352,16 +2495,21 @@ def _read_once():
23522495 import os
23532496 name = os .path .splitext (os .path .basename (source ))[0 ]
23542497
2355- # Use the post-orientation array shape so coords match the array.
2356- out_h = arr_gpu .shape [0 ]
2357- out_w = arr_gpu .shape [1 ]
2358- coords = _geo_to_coords (geo_info , out_h , out_w )
2359-
23602498 attrs = {}
2361- _populate_attrs_from_geo_info (attrs , geo_info )
2499+ _populate_attrs_from_geo_info (attrs , geo_info , window = window )
23622500 if nodata is not None :
23632501 attrs ['nodata' ] = nodata
23642502
2503+ # Apply window/band slicing post-decode. Coords are derived from the
2504+ # sliced array so the (y, x) labels line up with the user's requested
2505+ # subrectangle. This mirrors the ``open_geotiff`` / ``read_geotiff_dask``
2506+ # contract: ``attrs['transform']`` always carries the full-source
2507+ # GeoTransform shifted to the window origin (via
2508+ # ``_populate_attrs_from_geo_info(..., window=window)``), while
2509+ # ``coords['y']`` / ``coords['x']`` cover only the windowed cells.
2510+ arr_gpu , coords = _gpu_apply_window_band (
2511+ arr_gpu , geo_info , window = window , band = band )
2512+
23652513 if arr_gpu .ndim == 3 :
23662514 dims = ['y' , 'x' , 'band' ]
23672515 coords ['band' ] = np .arange (arr_gpu .shape [2 ])
0 commit comments