Skip to content

Commit 784168f

Browse files
authored
Add LZ4 and LERC compression codecs for geotiff (#1063)
* Add LZ4 compression support for GeoTIFF module (#1051) Add CPU LZ4 codec using python-lz4 (lz4.frame format) for TIFF compression tag 50004. GPU nvCOMP acceleration deferred to a follow-up since GDAL uses lz4-frame format which needs bridging to nvCOMP's raw LZ4 format -- the CPU fallback path handles GPU reads/writes correctly in the meantime. Changes: - _compression.py: lz4_decompress/lz4_compress functions, COMPRESSION_LZ4 constant, wired into decompress/compress dispatchers - _writer.py: 'lz4' key in _compression_tag mapping - tests/test_lz4.py: 13 tests covering codec roundtrips, write-read roundtrips (tiled, stripped, float32, predictor), public API, and error handling when lz4 is not installed * Add LERC compression for controlled-error lossy raster encoding (#1052) Adds support for LERC (Limited Error Raster Compression, TIFF tag 34887) to the geotiff module. LERC is CPU-only and wraps Esri's C library via the `lerc` Python package. The key feature is the max_z_error parameter which guarantees a maximum per-pixel encoding error (0 = lossless). Changes: - _compression.py: lerc_compress/lerc_decompress functions, COMPRESSION_LERC constant, dispatch entries in compress() and decompress() - _writer.py: LERC in compression tag map, strip writer, tile writer - _gpu_decode.py: explicit CPU fallback for LERC in both decode and encode paths (no GPU LERC library exists) - tests/test_lerc.py: codec roundtrips (float32, uint8, uint16), lossy tolerance check, write-read roundtrips (tiled, stripped, public API), availability flag tests
1 parent fdc6d81 commit 784168f

File tree

5 files changed

+485
-0
lines changed

5 files changed

+485
-0
lines changed

xrspatial/geotiff/_compression.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,143 @@ def jpeg2000_compress(data: bytes, width: int, height: int,
790790
os.unlink(tmp)
791791

792792

793+
# -- LERC codec (via lerc) ----------------------------------------------------
794+
795+
LERC_AVAILABLE = False
796+
try:
797+
import lerc as _lerc
798+
LERC_AVAILABLE = True
799+
except ImportError:
800+
_lerc = None
801+
802+
803+
def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
804+
samples: int = 1) -> bytes:
805+
"""Decompress LERC data. Requires the ``lerc`` package."""
806+
if not LERC_AVAILABLE:
807+
raise ImportError(
808+
"lerc is required to read LERC-compressed TIFFs. "
809+
"Install it with: pip install lerc")
810+
result = _lerc.decode(data)
811+
# lerc.decode returns (result_code, data_array, valid_mask, ...)
812+
if result[0] != 0:
813+
raise RuntimeError(f"LERC decode failed with error code {result[0]}")
814+
arr = result[1]
815+
return arr.tobytes()
816+
817+
818+
def lerc_compress(data: bytes, width: int, height: int,
819+
samples: int = 1, dtype: np.dtype = np.dtype('float32'),
820+
max_z_error: float = 0.0) -> bytes:
821+
"""Compress raw pixel data with LERC. Requires the ``lerc`` package.
822+
823+
Parameters
824+
----------
825+
max_z_error : float
826+
Maximum encoding error per pixel. 0 = lossless.
827+
"""
828+
if not LERC_AVAILABLE:
829+
raise ImportError(
830+
"lerc is required to write LERC-compressed TIFFs. "
831+
"Install it with: pip install lerc")
832+
if samples == 1:
833+
arr = np.frombuffer(data, dtype=dtype).reshape(height, width)
834+
else:
835+
arr = np.frombuffer(data, dtype=dtype).reshape(height, width, samples)
836+
n_values_per_pixel = samples
837+
# lerc.encode(npArr, nValuesPerPixel, bHasMask, npValidMask,
838+
# maxZErr, nBytesHint)
839+
# nBytesHint=1 triggers actual encoding (0 = compute size only)
840+
result = _lerc.encode(arr, n_values_per_pixel, False, None,
841+
max_z_error, 1)
842+
if result[0] != 0:
843+
raise RuntimeError(f"LERC encode failed with error code {result[0]}")
844+
# result is (error_code, nBytesWritten, ctypes_buffer)
845+
return bytes(result[2])
846+
847+
848+
# -- LZ4 codec (via python-lz4) -----------------------------------------------
849+
850+
LZ4_AVAILABLE = False
851+
try:
852+
import lz4.frame as _lz4
853+
LZ4_AVAILABLE = True
854+
except ImportError:
855+
_lz4 = None
856+
857+
858+
def lz4_decompress(data: bytes) -> bytes:
859+
"""Decompress LZ4 frame data. Requires the ``lz4`` package."""
860+
if not LZ4_AVAILABLE:
861+
raise ImportError(
862+
"lz4 is required to read LZ4-compressed TIFFs. "
863+
"Install it with: pip install lz4")
864+
return _lz4.decompress(data)
865+
866+
867+
def lz4_compress(data: bytes, level: int = 0) -> bytes:
868+
"""Compress data with LZ4 frame format. Requires the ``lz4`` package."""
869+
if not LZ4_AVAILABLE:
870+
raise ImportError(
871+
"lz4 is required to write LZ4-compressed TIFFs. "
872+
"Install it with: pip install lz4")
873+
return _lz4.compress(data, compression_level=level)
874+
875+
876+
# -- LERC codec (via lerc) ----------------------------------------------------
877+
878+
LERC_AVAILABLE = False
879+
try:
880+
import lerc as _lerc
881+
LERC_AVAILABLE = True
882+
except ImportError:
883+
_lerc = None
884+
885+
886+
def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
887+
samples: int = 1) -> bytes:
888+
"""Decompress LERC data. Requires the ``lerc`` package."""
889+
if not LERC_AVAILABLE:
890+
raise ImportError(
891+
"lerc is required to read LERC-compressed TIFFs. "
892+
"Install it with: pip install lerc")
893+
result = _lerc.decode(data)
894+
# lerc.decode returns (result_code, data_array, valid_mask, ...)
895+
if result[0] != 0:
896+
raise RuntimeError(f"LERC decode failed with error code {result[0]}")
897+
arr = result[1]
898+
return arr.tobytes()
899+
900+
901+
def lerc_compress(data: bytes, width: int, height: int,
902+
samples: int = 1, dtype: np.dtype = np.dtype('float32'),
903+
max_z_error: float = 0.0) -> bytes:
904+
"""Compress raw pixel data with LERC. Requires the ``lerc`` package.
905+
906+
Parameters
907+
----------
908+
max_z_error : float
909+
Maximum encoding error per pixel. 0 = lossless.
910+
"""
911+
if not LERC_AVAILABLE:
912+
raise ImportError(
913+
"lerc is required to write LERC-compressed TIFFs. "
914+
"Install it with: pip install lerc")
915+
if samples == 1:
916+
arr = np.frombuffer(data, dtype=dtype).reshape(height, width)
917+
else:
918+
arr = np.frombuffer(data, dtype=dtype).reshape(height, width, samples)
919+
n_values_per_pixel = samples
920+
# lerc.encode(npArr, nValuesPerPixel, bHasMask, npValidMask,
921+
# maxZErr, nBytesHint)
922+
# nBytesHint=1 triggers actual encoding (0 = compute size only)
923+
result = _lerc.encode(arr, n_values_per_pixel, False, None,
924+
max_z_error, 1)
925+
if result[0] != 0:
926+
raise RuntimeError(f"LERC encode failed with error code {result[0]}")
927+
# result is (error_code, nBytesWritten, ctypes_buffer)
928+
return bytes(result[2])
929+
793930

794931
# -- Dispatch helpers ---------------------------------------------------------
795932

@@ -800,7 +937,9 @@ def jpeg2000_compress(data: bytes, width: int, height: int,
800937
COMPRESSION_DEFLATE = 8
801938
COMPRESSION_JPEG2000 = 34712
802939
COMPRESSION_ZSTD = 50000
940+
COMPRESSION_LZ4 = 50004
803941
COMPRESSION_PACKBITS = 32773
942+
COMPRESSION_LERC = 34887
804943
COMPRESSION_ADOBE_DEFLATE = 32946
805944

806945

@@ -839,6 +978,11 @@ def decompress(data, compression: int, expected_size: int = 0,
839978
elif compression == COMPRESSION_JPEG2000:
840979
return np.frombuffer(
841980
jpeg2000_decompress(data, width, height, samples), dtype=np.uint8)
981+
elif compression == COMPRESSION_LZ4:
982+
return np.frombuffer(lz4_decompress(data), dtype=np.uint8)
983+
elif compression == COMPRESSION_LERC:
984+
return np.frombuffer(
985+
lerc_decompress(data, width, height, samples), dtype=np.uint8)
842986
else:
843987
raise ValueError(f"Unsupported compression type: {compression}")
844988

@@ -869,9 +1013,13 @@ def compress(data: bytes, compression: int, level: int = 6) -> bytes:
8691013
return packbits_compress(data)
8701014
elif compression == COMPRESSION_ZSTD:
8711015
return zstd_compress(data, level)
1016+
elif compression == COMPRESSION_LZ4:
1017+
return lz4_compress(data, level)
8721018
elif compression == COMPRESSION_JPEG:
8731019
raise ValueError("Use jpeg_compress() directly with width/height/samples")
8741020
elif compression == COMPRESSION_JPEG2000:
8751021
raise ValueError("Use jpeg2000_compress() directly with width/height/samples/dtype")
1022+
elif compression == COMPRESSION_LERC:
1023+
raise ValueError("Use lerc_compress() directly with width/height/samples/dtype")
8761024
else:
8771025
raise ValueError(f"Unsupported compression type: {compression}")

xrspatial/geotiff/_gpu_decode.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,21 @@ def gpu_decode_tiles(
15341534
decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes
15351535
d_decomp_offsets = cupy.asarray(decomp_offsets)
15361536

1537+
elif compression == 34887: # LERC
1538+
from ._compression import lerc_decompress
1539+
raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
1540+
for i, tile in enumerate(compressed_tiles):
1541+
start = i * tile_bytes
1542+
chunk = np.frombuffer(
1543+
lerc_decompress(tile, tile_width, tile_height, samples),
1544+
dtype=np.uint8)
1545+
raw_host[start:start + min(len(chunk), tile_bytes)] = \
1546+
chunk[:tile_bytes] if len(chunk) >= tile_bytes else \
1547+
np.pad(chunk, (0, tile_bytes - len(chunk)))
1548+
d_decomp = cupy.asarray(raw_host)
1549+
decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes
1550+
d_decomp_offsets = cupy.asarray(decomp_offsets)
1551+
15371552
elif compression == 1: # Uncompressed
15381553
raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
15391554
for i, tile in enumerate(compressed_tiles):
@@ -2273,6 +2288,19 @@ def gpu_compress_tiles(d_image, tile_width, tile_height,
22732288
samples=samples, dtype=dtype))
22742289
return result
22752290

2291+
# LERC: CPU only, no GPU library
2292+
if compression == 34887:
2293+
from ._compression import lerc_compress
2294+
cpu_buf = d_tile_buf.get()
2295+
result = []
2296+
for i in range(n_tiles):
2297+
start = i * tile_bytes
2298+
tile_data = bytes(cpu_buf[start:start + tile_bytes])
2299+
result.append(lerc_compress(
2300+
tile_data, tile_width, tile_height,
2301+
samples=samples, dtype=dtype))
2302+
return result
2303+
22762304
# Try nvCOMP batch compress
22772305
result = _nvcomp_batch_compress(d_tiles, None, tile_bytes, compression, n_tiles)
22782306

xrspatial/geotiff/_writer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
COMPRESSION_DEFLATE,
1111
COMPRESSION_JPEG,
1212
COMPRESSION_JPEG2000,
13+
COMPRESSION_LERC,
14+
COMPRESSION_LZ4,
1315
COMPRESSION_LZW,
1416
COMPRESSION_NONE,
1517
COMPRESSION_PACKBITS,
@@ -71,8 +73,10 @@ def _compression_tag(compression_name: str) -> int:
7173
'jpeg': COMPRESSION_JPEG,
7274
'packbits': COMPRESSION_PACKBITS,
7375
'zstd': COMPRESSION_ZSTD,
76+
'lz4': COMPRESSION_LZ4,
7477
'jpeg2000': COMPRESSION_JPEG2000,
7578
'j2k': COMPRESSION_JPEG2000,
79+
'lerc': COMPRESSION_LERC,
7680
}
7781
name = compression_name.lower()
7882
if name not in _map:
@@ -332,6 +336,10 @@ def _write_stripped(data: np.ndarray, compression: int, predictor: bool,
332336
from ._compression import jpeg2000_compress
333337
compressed = jpeg2000_compress(
334338
strip_data, width, strip_rows, samples=samples, dtype=dtype)
339+
elif compression == COMPRESSION_LERC:
340+
from ._compression import lerc_compress
341+
compressed = lerc_compress(
342+
strip_data, width, strip_rows, samples=samples, dtype=dtype)
335343
else:
336344
compressed = compress(strip_data, compression)
337345

@@ -387,6 +395,10 @@ def _prepare_tile(data, tr, tc, th, tw, height, width, samples, dtype,
387395
from ._compression import jpeg2000_compress
388396
return jpeg2000_compress(
389397
tile_data, tw, th, samples=samples, dtype=dtype)
398+
if compression == COMPRESSION_LERC:
399+
from ._compression import lerc_compress
400+
return lerc_compress(
401+
tile_data, tw, th, samples=samples, dtype=dtype)
390402
return compress(tile_data, compression)
391403

392404

0 commit comments

Comments
 (0)