Skip to content

Commit 1878c9f

Browse files
committed
Add lightweight GeoTIFF/COG reader and writer
Reads and writes GeoTIFF and Cloud Optimized GeoTIFF files using only numpy, numba, xarray, and the standard library. No GDAL required. What it does: - Parses TIFF/BigTIFF headers and IFDs via struct - Reads GeoTIFF tags: CRS (EPSG), affine transforms, GeoKeys, PixelIsArea vs PixelIsPoint - Deflate (zlib), LZW (Numba JIT), horizontal and floating-point predictor codecs - Strip and tiled layouts, windowed reads - COG writer with overview generation and IFD-first layout - HTTP range-request reader for remote COGs - mmap for local file access (zero-copy) - Nodata sentinel masking to NaN on read - Metadata round-trip: CRS, transform, nodata, raster type, pixels Read performance vs rioxarray/GDAL: - 5-7x faster on uncompressed data - On par for compressed COGs - 100% pixel-exact across all tested formats Write performance: - On par for uncompressed - 2-4x slower for compressed (zlib is C-native in GDAL) Tested against Landsat 8, Copernicus DEM, USGS 1-arc-second, and USGS 1-meter DEMs. 154 tests cover codecs, header parsing, geo metadata, round-trips across 8 dtypes x 3 compressions, edge cases (corrupt files, NaN/Inf, extreme shapes, PixelIsPoint), and the public API.
1 parent 820c0aa commit 1878c9f

17 files changed

Lines changed: 4703 additions & 0 deletions

xrspatial/geotiff/__init__.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
"""Lightweight GeoTIFF/COG reader and writer.
2+
3+
No GDAL dependency -- uses only numpy, numba, xarray, and the standard library.
4+
5+
Public API
6+
----------
7+
read_geotiff(source, ...)
8+
Read a GeoTIFF file to an xarray.DataArray.
9+
write_geotiff(data, path, ...)
10+
Write an xarray.DataArray as a GeoTIFF or COG.
11+
open_cog(url, ...)
12+
Read a Cloud Optimized GeoTIFF from an HTTP URL.
13+
"""
14+
from __future__ import annotations
15+
16+
import numpy as np
17+
import xarray as xr
18+
19+
from ._geotags import GeoTransform, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT
20+
from ._reader import read_to_array
21+
from ._writer import write
22+
23+
__all__ = ['read_geotiff', 'write_geotiff', 'open_cog']
24+
25+
26+
def _geo_to_coords(geo_info, height: int, width: int) -> dict:
27+
"""Build y/x coordinate arrays from GeoInfo.
28+
29+
For PixelIsArea (default): origin is the edge of pixel (0,0), so pixel
30+
centers are at origin + 0.5*pixel_size.
31+
For PixelIsPoint: origin (tiepoint) is already the center of pixel (0,0),
32+
so no half-pixel offset is needed.
33+
"""
34+
t = geo_info.transform
35+
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
36+
# Tiepoint is pixel center -- no offset needed
37+
x = np.arange(width, dtype=np.float64) * t.pixel_width + t.origin_x
38+
y = np.arange(height, dtype=np.float64) * t.pixel_height + t.origin_y
39+
else:
40+
# Tiepoint is pixel edge -- shift to center
41+
x = np.arange(width, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
42+
y = np.arange(height, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
43+
return {'y': y, 'x': x}
44+
45+
46+
def _coords_to_transform(da: xr.DataArray) -> GeoTransform | None:
47+
"""Infer GeoTransform from DataArray coordinates.
48+
49+
Coordinates are always pixel-center values. The transform origin depends
50+
on raster_type:
51+
- PixelIsArea (default): origin = center - half_pixel (edge of pixel 0)
52+
- PixelIsPoint: origin = center (center of pixel 0)
53+
"""
54+
ydim = da.dims[-2]
55+
xdim = da.dims[-1]
56+
57+
if xdim not in da.coords or ydim not in da.coords:
58+
return None
59+
60+
x = da.coords[xdim].values
61+
y = da.coords[ydim].values
62+
63+
if len(x) < 2 or len(y) < 2:
64+
return None
65+
66+
pixel_width = float(x[1] - x[0])
67+
pixel_height = float(y[1] - y[0])
68+
69+
is_point = da.attrs.get('raster_type') == 'point'
70+
if is_point:
71+
# PixelIsPoint: tiepoint is at the pixel center
72+
origin_x = float(x[0])
73+
origin_y = float(y[0])
74+
else:
75+
# PixelIsArea: tiepoint is at the edge (center - half pixel)
76+
origin_x = float(x[0]) - pixel_width * 0.5
77+
origin_y = float(y[0]) - pixel_height * 0.5
78+
79+
return GeoTransform(
80+
origin_x=origin_x,
81+
origin_y=origin_y,
82+
pixel_width=pixel_width,
83+
pixel_height=pixel_height,
84+
)
85+
86+
87+
def read_geotiff(source: str, *, window=None,
88+
overview_level: int | None = None,
89+
band: int = 0,
90+
name: str | None = None) -> xr.DataArray:
91+
"""Read a GeoTIFF file into an xarray.DataArray.
92+
93+
Parameters
94+
----------
95+
source : str
96+
File path or HTTP URL.
97+
window : tuple or None
98+
(row_start, col_start, row_stop, col_stop) for windowed reading.
99+
overview_level : int or None
100+
Overview level to read (0 = full resolution). None reads full res.
101+
band : int
102+
Band index (0-based) for multi-band files.
103+
name : str or None
104+
Name for the DataArray. Defaults to filename stem.
105+
106+
Returns
107+
-------
108+
xr.DataArray
109+
2D DataArray with y/x coordinates and geo attributes.
110+
"""
111+
arr, geo_info = read_to_array(
112+
source, window=window,
113+
overview_level=overview_level, band=band,
114+
)
115+
116+
height, width = arr.shape[:2]
117+
coords = _geo_to_coords(geo_info, height, width)
118+
119+
if window is not None:
120+
# Adjust coordinates for windowed read
121+
r0, c0, r1, c1 = window
122+
t = geo_info.transform
123+
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
124+
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
125+
coords = {'y': full_y, 'x': full_x}
126+
127+
if name is None:
128+
# Derive from source path
129+
import os
130+
name = os.path.splitext(os.path.basename(source))[0]
131+
132+
attrs = {}
133+
if geo_info.crs_epsg is not None:
134+
attrs['crs'] = geo_info.crs_epsg
135+
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
136+
attrs['raster_type'] = 'point'
137+
138+
# Apply nodata mask: replace nodata sentinel values with NaN
139+
nodata = geo_info.nodata
140+
if nodata is not None:
141+
attrs['nodata'] = nodata
142+
if arr.dtype.kind == 'f' and not np.isnan(nodata):
143+
arr = arr.copy()
144+
arr[arr == np.float32(nodata)] = np.nan
145+
146+
da = xr.DataArray(
147+
arr,
148+
dims=['y', 'x'],
149+
coords=coords,
150+
name=name,
151+
attrs=attrs,
152+
)
153+
return da
154+
155+
156+
def write_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
157+
crs: int | None = None,
158+
nodata=None,
159+
compression: str = 'deflate',
160+
tiled: bool = True,
161+
tile_size: int = 256,
162+
predictor: bool = False,
163+
cog: bool = False,
164+
overview_levels: list[int] | None = None) -> None:
165+
"""Write data as a GeoTIFF or Cloud Optimized GeoTIFF.
166+
167+
Parameters
168+
----------
169+
data : xr.DataArray or np.ndarray
170+
2D raster data.
171+
path : str
172+
Output file path.
173+
crs : int or None
174+
EPSG code. If None and data is a DataArray, tries to read from attrs.
175+
nodata : float, int, or None
176+
NoData value.
177+
compression : str
178+
'none', 'deflate', or 'lzw'.
179+
tiled : bool
180+
Use tiled layout (default True).
181+
tile_size : int
182+
Tile size in pixels (default 256).
183+
predictor : bool
184+
Use horizontal differencing predictor.
185+
cog : bool
186+
Write as Cloud Optimized GeoTIFF.
187+
overview_levels : list[int] or None
188+
Overview decimation factors. Only used when cog=True.
189+
"""
190+
geo_transform = None
191+
epsg = crs
192+
raster_type = RASTER_PIXEL_IS_AREA
193+
194+
if isinstance(data, xr.DataArray):
195+
arr = data.values
196+
if geo_transform is None:
197+
geo_transform = _coords_to_transform(data)
198+
if epsg is None:
199+
epsg = data.attrs.get('crs')
200+
if nodata is None:
201+
nodata = data.attrs.get('nodata')
202+
if data.attrs.get('raster_type') == 'point':
203+
raster_type = RASTER_PIXEL_IS_POINT
204+
else:
205+
arr = np.asarray(data)
206+
207+
if arr.ndim != 2:
208+
raise ValueError(f"Expected 2D array, got {arr.ndim}D")
209+
210+
write(
211+
arr, path,
212+
geo_transform=geo_transform,
213+
crs_epsg=epsg,
214+
nodata=nodata,
215+
compression=compression,
216+
tiled=tiled,
217+
tile_size=tile_size,
218+
predictor=predictor,
219+
cog=cog,
220+
overview_levels=overview_levels,
221+
raster_type=raster_type,
222+
)
223+
224+
225+
def open_cog(url: str, *,
226+
overview_level: int | None = None) -> xr.DataArray:
227+
"""Read a Cloud Optimized GeoTIFF from an HTTP URL.
228+
229+
Uses range requests so only the needed tiles are fetched.
230+
231+
Parameters
232+
----------
233+
url : str
234+
HTTP(S) URL to the COG.
235+
overview_level : int or None
236+
Overview level (0 = full resolution).
237+
238+
Returns
239+
-------
240+
xr.DataArray
241+
"""
242+
return read_geotiff(url, overview_level=overview_level)

0 commit comments

Comments
 (0)