Skip to content

Commit 339581f

Browse files
committed
Fix KvikIO GDS error handling and ZSTD GPU fallback
- GDS tile read: added sync + verification after each pread to catch partial reads and CUDA errors early. Catches exception and tries to reset CUDA state before falling back. - gpu_decode_tiles: unsupported GPU codecs (ZSTD without nvCOMP, etc.) now decompress on CPU then transfer to GPU instead of raising ValueError. This keeps the predictor + assembly on GPU. - Fixes cudaErrorIllegalAddress from kvikio version mismatch (26.02 C lib vs 26.06 Python bindings) by catching the error gracefully instead of poisoning the GPU state.
1 parent 1553d03 commit 339581f

1 file changed

Lines changed: 25 additions & 6 deletions

File tree

xrspatial/geotiff/_gpu_decode.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -696,12 +696,22 @@ def _try_kvikio_read_tiles(file_path, tile_offsets, tile_byte_counts, tile_bytes
696696
with kvikio.CuFile(file_path, 'r') as f:
697697
for off, bc in zip(tile_offsets, tile_byte_counts):
698698
buf = cupy.empty(bc, dtype=cupy.uint8)
699-
f.pread(buf, file_offset=off)
699+
nbytes = f.pread(buf, file_offset=off)
700+
# Verify the read completed correctly
701+
actual = nbytes.get() if hasattr(nbytes, 'get') else int(nbytes)
702+
if actual != bc:
703+
return None # partial read, fall back
700704
d_tiles.append(buf)
705+
cupy.cuda.Device().synchronize()
701706
return d_tiles
702707
except Exception:
703-
# GDS not available (no NVMe, no kernel module, etc.)
704-
# Fall back to normal CPU read path
708+
# GDS not available, version mismatch, or CUDA error
709+
# Reset CUDA error state if possible
710+
try:
711+
import cupy
712+
cupy.cuda.Device().synchronize()
713+
except Exception:
714+
pass
705715
return None
706716

707717

@@ -1182,9 +1192,18 @@ def gpu_decode_tiles(
11821192
d_decomp_offsets = cupy.asarray(decomp_offsets)
11831193

11841194
else:
1185-
raise ValueError(
1186-
f"GPU decode supports LZW (5), deflate (8), and uncompressed (1), "
1187-
f"got compression={compression}")
1195+
# Unsupported GPU codec: decompress on CPU, transfer to GPU
1196+
from ._compression import decompress as cpu_decompress
1197+
raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
1198+
for i, tile in enumerate(compressed_tiles):
1199+
start = i * tile_bytes
1200+
chunk = cpu_decompress(tile, compression, tile_bytes)
1201+
raw_host[start:start + min(len(chunk), tile_bytes)] = \
1202+
chunk[:tile_bytes] if len(chunk) >= tile_bytes else \
1203+
np.pad(chunk, (0, tile_bytes - len(chunk)))
1204+
d_decomp = cupy.asarray(raw_host)
1205+
decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes
1206+
d_decomp_offsets = cupy.asarray(decomp_offsets)
11881207

11891208
# Apply predictor on GPU
11901209
if predictor == 2:

0 commit comments

Comments
 (0)