Skip to content

Commit 7d25252

Browse files
authored
Fix three accuracy bugs in open_geotiff/to_geotiff (#1081) (#1082)
1. Windowed reads now respect PixelIsPoint raster type instead of unconditionally applying the half-pixel coordinate offset. 2. Custom CRS without EPSG codes are written to GeoAsciiParamsTag so they survive round-trips (previously silently dropped). 3. NaN pixels are converted back to the nodata sentinel value before writing, so the GDAL_NODATA tag matches the actual data.
1 parent b6a15e5 commit 7d25252

File tree

4 files changed

+367
-6
lines changed

4 files changed

+367
-6
lines changed

xrspatial/geotiff/__init__.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,12 @@ def open_geotiff(source: str, *, window=None,
213213
# Adjust coordinates for windowed read
214214
r0, c0, r1, c1 = window
215215
t = geo_info.transform
216-
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
217-
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
216+
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
217+
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x
218+
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y
219+
else:
220+
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
221+
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
218222
coords = {'y': full_y, 'x': full_x}
219223

220224
if name is None:
@@ -402,6 +406,7 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
402406

403407
geo_transform = None
404408
epsg = None
409+
wkt_fallback = None # WKT string when EPSG is not available
405410
raster_type = RASTER_PIXEL_IS_AREA
406411
x_res = None
407412
y_res = None
@@ -414,6 +419,8 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
414419
epsg = crs
415420
elif isinstance(crs, str):
416421
epsg = _wkt_to_epsg(crs) # try to extract EPSG from WKT/PROJ
422+
if epsg is None:
423+
wkt_fallback = crs
417424

418425
if isinstance(data, xr.DataArray):
419426
# Handle CuPy-backed DataArrays: convert to numpy for CPU write
@@ -436,12 +443,16 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
436443
if isinstance(crs_attr, str):
437444
# WKT string from reproject() or other source
438445
epsg = _wkt_to_epsg(crs_attr)
446+
if epsg is None and wkt_fallback is None:
447+
wkt_fallback = crs_attr
439448
elif crs_attr is not None:
440449
epsg = int(crs_attr)
441450
if epsg is None:
442451
wkt = data.attrs.get('crs_wkt')
443452
if isinstance(wkt, str):
444453
epsg = _wkt_to_epsg(wkt)
454+
if epsg is None and wkt_fallback is None:
455+
wkt_fallback = wkt
445456
if nodata is None:
446457
nodata = data.attrs.get('nodata')
447458
if data.attrs.get('raster_type') == 'point':
@@ -477,10 +488,19 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path: str, *,
477488
elif arr.dtype == np.bool_:
478489
arr = arr.astype(np.uint8)
479490

491+
# Restore NaN pixels to the nodata sentinel value so the written file
492+
# has sentinel values matching the GDAL_NODATA tag.
493+
if nodata is not None and arr.dtype.kind == 'f' and not np.isnan(nodata):
494+
nan_mask = np.isnan(arr)
495+
if nan_mask.any():
496+
arr = arr.copy()
497+
arr[nan_mask] = arr.dtype.type(nodata)
498+
480499
write(
481500
arr, path,
482501
geo_transform=geo_transform,
483502
crs_epsg=epsg,
503+
crs_wkt=wkt_fallback if epsg is None else None,
484504
nodata=nodata,
485505
compression=compression,
486506
tiled=tiled,

xrspatial/geotiff/_geotags.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,9 +522,18 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview,
522522
)
523523

524524

525+
def _model_type_from_wkt(wkt: str) -> int:
526+
"""Guess ModelType from a WKT string prefix."""
527+
upper = wkt.strip().upper()
528+
if upper.startswith(('GEOGCS', 'GEOGCRS')):
529+
return MODEL_TYPE_GEOGRAPHIC
530+
return MODEL_TYPE_PROJECTED
531+
532+
525533
def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None,
526534
nodata=None,
527-
raster_type: int = RASTER_PIXEL_IS_AREA) -> dict[int, tuple]:
535+
raster_type: int = RASTER_PIXEL_IS_AREA,
536+
crs_wkt: str | None = None) -> dict[int, tuple]:
528537
"""Build GeoTIFF IFD tag entries for writing.
529538
530539
Parameters
@@ -537,6 +546,11 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None,
537546
NoData value.
538547
raster_type : int
539548
RASTER_PIXEL_IS_AREA (1) or RASTER_PIXEL_IS_POINT (2).
549+
crs_wkt : str or None
550+
WKT or PROJ string for the CRS. Used only when *crs_epsg* is
551+
None so that custom (non-EPSG) coordinate systems survive
552+
round-trips. Stored in the GeoAsciiParamsTag and referenced
553+
from GTCitationGeoKey.
540554
541555
Returns
542556
-------
@@ -562,6 +576,10 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None,
562576
num_keys = 1 # at least RasterType
563577
key_entries = []
564578

579+
# Collect ASCII params strings (pipe-delimited in GeoAsciiParamsTag)
580+
ascii_parts = []
581+
ascii_offset = 0
582+
565583
# ModelType
566584
if crs_epsg is not None:
567585
# Guess model type from EPSG (simple heuristic)
@@ -571,6 +589,10 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None,
571589
model_type = MODEL_TYPE_PROJECTED
572590
key_entries.append((GEOKEY_MODEL_TYPE, 0, 1, model_type))
573591
num_keys += 1
592+
elif crs_wkt is not None:
593+
model_type = _model_type_from_wkt(crs_wkt)
594+
key_entries.append((GEOKEY_MODEL_TYPE, 0, 1, model_type))
595+
num_keys += 1
574596

575597
# RasterType
576598
key_entries.append((GEOKEY_RASTER_TYPE, 0, 1, raster_type))
@@ -582,6 +604,22 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None,
582604
else:
583605
key_entries.append((GEOKEY_PROJECTED_CS_TYPE, 0, 1, crs_epsg))
584606
num_keys += 1
607+
elif crs_wkt is not None:
608+
# User-defined CRS: store 32767 and write WKT to GeoAsciiParams
609+
if model_type == MODEL_TYPE_GEOGRAPHIC:
610+
key_entries.append((GEOKEY_GEOGRAPHIC_TYPE, 0, 1, 32767))
611+
else:
612+
key_entries.append((GEOKEY_PROJECTED_CS_TYPE, 0, 1, 32767))
613+
num_keys += 1
614+
# GTCitationGeoKey -> GeoAsciiParams
615+
wkt_with_pipe = crs_wkt + '|'
616+
key_entries.append((
617+
GEOKEY_CITATION, TAG_GEO_ASCII_PARAMS,
618+
len(wkt_with_pipe), ascii_offset,
619+
))
620+
ascii_parts.append(wkt_with_pipe)
621+
ascii_offset += len(wkt_with_pipe)
622+
num_keys += 1
585623

586624
num_keys = len(key_entries)
587625
header = [1, 1, 0, num_keys]
@@ -591,6 +629,10 @@ def build_geo_tags(transform: GeoTransform, crs_epsg: int | None = None,
591629

592630
tags[TAG_GEO_KEY_DIRECTORY] = tuple(flat)
593631

632+
# GeoAsciiParamsTag (34737)
633+
if ascii_parts:
634+
tags[TAG_GEO_ASCII_PARAMS] = ''.join(ascii_parts)
635+
594636
# GDAL_NODATA
595637
if nodata is not None:
596638
tags[TAG_GDAL_NODATA] = str(nodata)

xrspatial/geotiff/_writer.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from ._geotags import (
3333
GeoTransform,
3434
build_geo_tags,
35+
TAG_GEO_ASCII_PARAMS,
3536
TAG_GEO_KEY_DIRECTORY,
3637
TAG_GDAL_NODATA,
3738
TAG_MODEL_PIXEL_SCALE,
@@ -525,6 +526,7 @@ def _assemble_tiff(width: int, height: int, dtype: np.dtype,
525526
nodata,
526527
is_cog: bool = False,
527528
raster_type: int = 1,
529+
crs_wkt: str | None = None,
528530
gdal_metadata_xml: str | None = None,
529531
extra_tags: list | None = None,
530532
x_resolution: float | None = None,
@@ -557,12 +559,14 @@ def _assemble_tiff(width: int, height: int, dtype: np.dtype,
557559
geo_tags_dict = {}
558560
if geo_transform is not None:
559561
geo_tags_dict = build_geo_tags(
560-
geo_transform, crs_epsg, nodata, raster_type=raster_type)
562+
geo_transform, crs_epsg, nodata, raster_type=raster_type,
563+
crs_wkt=crs_wkt)
561564
else:
562565
# No spatial reference -- still write CRS and nodata if provided
563-
if crs_epsg is not None or nodata is not None:
566+
if crs_epsg is not None or crs_wkt is not None or nodata is not None:
564567
geo_tags_dict = build_geo_tags(
565568
GeoTransform(), crs_epsg, nodata, raster_type=raster_type,
569+
crs_wkt=crs_wkt,
566570
)
567571
# Remove the default pixel scale / tiepoint tags since we
568572
# have no real transform -- keep only GeoKeys and NODATA.
@@ -641,6 +645,8 @@ def _assemble_tiff(width: int, height: int, dtype: np.dtype,
641645
tags.append((gtag, DOUBLE, 6, list(gval)))
642646
elif gtag == TAG_GEO_KEY_DIRECTORY:
643647
tags.append((gtag, SHORT, len(gval), list(gval)))
648+
elif gtag == TAG_GEO_ASCII_PARAMS:
649+
tags.append((gtag, ASCII, len(str(gval)) + 1, str(gval)))
644650
elif gtag == TAG_GDAL_NODATA:
645651
tags.append((gtag, ASCII, len(str(gval)) + 1, str(gval)))
646652

@@ -846,6 +852,7 @@ def _assemble_cog_layout(header_size: int,
846852
def write(data: np.ndarray, path: str, *,
847853
geo_transform: GeoTransform | None = None,
848854
crs_epsg: int | None = None,
855+
crs_wkt: str | None = None,
849856
nodata=None,
850857
compression: str = 'zstd',
851858
tiled: bool = True,
@@ -939,7 +946,7 @@ def write(data: np.ndarray, path: str, *,
939946
file_bytes = _assemble_tiff(
940947
w, h, data.dtype, comp_tag, predictor, tiled, tile_size,
941948
parts, geo_transform, crs_epsg, nodata, is_cog=cog,
942-
raster_type=raster_type,
949+
raster_type=raster_type, crs_wkt=crs_wkt,
943950
gdal_metadata_xml=gdal_metadata_xml,
944951
extra_tags=extra_tags,
945952
x_resolution=x_resolution, y_resolution=y_resolution,

0 commit comments

Comments
 (0)