1717
1818//! Compression codec support for data compression and decompression.
1919
20+ use std:: fmt;
2021use std:: io:: { Read , Write } ;
2122
2223use flate2:: Compression ;
2324use flate2:: read:: GzDecoder ;
2425use flate2:: write:: GzEncoder ;
25- use serde:: { Deserialize , Serialize } ;
26+ use serde:: { Deserialize , Deserializer , Serialize , Serializer } ;
2627
2728use crate :: { Error , ErrorKind , Result } ;
2829
30+ /// Default compression level for Zstandard (zstd).
31+ const ZSTD_DEFAULT_LEVEL : u8 = 3 ;
32+ /// Default compression level for Gzip.
33+ const GZIP_DEFAULT_LEVEL : u8 = 6 ;
34+ /// Maximum compression level for Gzip.
35+ const GZIP_MAX_LEVEL : u8 = 9 ;
36+
2937/// Data compression formats
30- #[ derive( Debug , PartialEq , Eq , Clone , Copy , Default , Serialize , Deserialize ) ]
31- #[ serde( rename_all = "lowercase" ) ]
38+ #[ derive( Debug , PartialEq , Eq , Clone , Copy , Default ) ]
3239pub enum CompressionCodec {
3340 #[ default]
3441 /// No compression
3542 None ,
3643 /// LZ4 single compression frame with content size present
3744 Lz4 ,
38- /// Zstandard single compression frame with content size present
39- Zstd ,
40- /// Gzip compression
41- Gzip ,
45+ /// Zstandard single compression frame with content size present.
46+ /// Level range is 0–22, where 0 means default compression level (not no compression).
47+ /// Use [`CompressionCodec::zstd_default`] to construct with the default level.
48+ Zstd ( u8 ) ,
49+ /// Gzip compression. Level range is 0–9, where 0 means no compression.
50+ /// Use [`CompressionCodec::gzip_default`] to construct with the default level.
51+ Gzip ( u8 ) ,
52+ /// Snappy compression
53+ Snappy ,
54+ }
55+
56+ impl CompressionCodec {
57+ /// Returns a Zstd codec with the default compression level.
58+ pub const fn zstd_default ( ) -> Self {
59+ CompressionCodec :: Zstd ( ZSTD_DEFAULT_LEVEL )
60+ }
61+
62+ /// Returns a Gzip codec with the default compression level.
63+ pub const fn gzip_default ( ) -> Self {
64+ CompressionCodec :: Gzip ( GZIP_DEFAULT_LEVEL )
65+ }
66+
67+ /// Returns the codec name as used in serialization and error messages.
68+ pub fn name ( & self ) -> & ' static str {
69+ match self {
70+ CompressionCodec :: None => "none" ,
71+ CompressionCodec :: Lz4 => "lz4" ,
72+ CompressionCodec :: Zstd ( _) => "zstd" ,
73+ CompressionCodec :: Gzip ( _) => "gzip" ,
74+ CompressionCodec :: Snappy => "snappy" ,
75+ }
76+ }
77+ }
78+
79+ // Note: serialize/deserialize do not round-trip the compression level. Iceberg configuration
80+ // only the codec name (e.g. "zstd"), not the level, so deserialization always produces the
81+ // default level. A `Zstd(5)` written to metadata will be read back as `Zstd(3)`. Some
82+ // compression configuration (e.g. Avro metadata) has a separate level field alongside the codec name.
83+ impl Serialize for CompressionCodec {
84+ fn serialize < S : Serializer > ( & self , serializer : S ) -> std:: result:: Result < S :: Ok , S :: Error > {
85+ serializer. serialize_str ( self . name ( ) )
86+ }
87+ }
88+
89+ impl < ' de > Deserialize < ' de > for CompressionCodec {
90+ fn deserialize < D : Deserializer < ' de > > ( deserializer : D ) -> std:: result:: Result < Self , D :: Error > {
91+ let s = String :: deserialize ( deserializer) ?;
92+ match s. to_lowercase ( ) . as_str ( ) {
93+ "none" => Ok ( CompressionCodec :: None ) ,
94+ "lz4" => Ok ( CompressionCodec :: Lz4 ) ,
95+ "zstd" => Ok ( CompressionCodec :: zstd_default ( ) ) ,
96+ "gzip" => Ok ( CompressionCodec :: gzip_default ( ) ) ,
97+ "snappy" => Ok ( CompressionCodec :: Snappy ) ,
98+ other => Err ( serde:: de:: Error :: unknown_variant ( other, & [
99+ "none" , "lz4" , "zstd" , "gzip" , "snappy" ,
100+ ] ) ) ,
101+ }
102+ }
103+ }
104+
105+ impl fmt:: Display for CompressionCodec {
106+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
107+ match self {
108+ CompressionCodec :: None => write ! ( f, "None" ) ,
109+ CompressionCodec :: Lz4 => write ! ( f, "Lz4" ) ,
110+ CompressionCodec :: Zstd ( level) => write ! ( f, "Zstd(level={level})" ) ,
111+ CompressionCodec :: Gzip ( level) => write ! ( f, "Gzip(level={level})" ) ,
112+ CompressionCodec :: Snappy => write ! ( f, "Snappy" ) ,
113+ }
114+ }
42115}
43116
44117impl CompressionCodec {
@@ -49,13 +122,17 @@ impl CompressionCodec {
49122 ErrorKind :: FeatureUnsupported ,
50123 "LZ4 decompression is not supported currently" ,
51124 ) ) ,
52- CompressionCodec :: Zstd => Ok ( zstd:: stream:: decode_all ( & bytes[ ..] ) ?) ,
53- CompressionCodec :: Gzip => {
125+ CompressionCodec :: Zstd ( _ ) => Ok ( zstd:: stream:: decode_all ( & bytes[ ..] ) ?) ,
126+ CompressionCodec :: Gzip ( _ ) => {
54127 let mut decoder = GzDecoder :: new ( & bytes[ ..] ) ;
55128 let mut decompressed = Vec :: new ( ) ;
56129 decoder. read_to_end ( & mut decompressed) ?;
57130 Ok ( decompressed)
58131 }
132+ CompressionCodec :: Snappy => Err ( Error :: new (
133+ ErrorKind :: FeatureUnsupported ,
134+ "Snappy decompression is not supported currently" ,
135+ ) ) ,
59136 }
60137 }
61138
@@ -66,19 +143,24 @@ impl CompressionCodec {
66143 ErrorKind :: FeatureUnsupported ,
67144 "LZ4 compression is not supported currently" ,
68145 ) ) ,
69- CompressionCodec :: Zstd => {
146+ CompressionCodec :: Zstd ( level ) => {
70147 let writer = Vec :: < u8 > :: new ( ) ;
71- let mut encoder = zstd:: stream:: Encoder :: new ( writer, 3 ) ?;
148+ let mut encoder = zstd:: stream:: Encoder :: new ( writer, * level as i32 ) ?;
72149 encoder. include_checksum ( true ) ?;
73150 encoder. set_pledged_src_size ( Some ( bytes. len ( ) . try_into ( ) ?) ) ?;
74151 std:: io:: copy ( & mut & bytes[ ..] , & mut encoder) ?;
75152 Ok ( encoder. finish ( ) ?)
76153 }
77- CompressionCodec :: Gzip => {
78- let mut encoder = GzEncoder :: new ( Vec :: new ( ) , Compression :: default ( ) ) ;
154+ CompressionCodec :: Gzip ( level) => {
155+ let compression = Compression :: new ( ( * level) . min ( GZIP_MAX_LEVEL ) as u32 ) ;
156+ let mut encoder = GzEncoder :: new ( Vec :: new ( ) , compression) ;
79157 encoder. write_all ( & bytes) ?;
80158 Ok ( encoder. finish ( ) ?)
81159 }
160+ CompressionCodec :: Snappy => Err ( Error :: new (
161+ ErrorKind :: FeatureUnsupported ,
162+ "Snappy compression is not supported currently" ,
163+ ) ) ,
82164 }
83165 }
84166
@@ -95,8 +177,10 @@ impl CompressionCodec {
95177 pub fn suffix ( & self ) -> Result < & ' static str > {
96178 match self {
97179 CompressionCodec :: None => Ok ( "" ) ,
98- CompressionCodec :: Gzip => Ok ( ".gz" ) ,
99- codec @ ( CompressionCodec :: Lz4 | CompressionCodec :: Zstd ) => Err ( Error :: new (
180+ CompressionCodec :: Gzip ( _) => Ok ( ".gz" ) ,
181+ codec @ ( CompressionCodec :: Lz4
182+ | CompressionCodec :: Zstd ( _)
183+ | CompressionCodec :: Snappy ) => Err ( Error :: new (
100184 ErrorKind :: FeatureUnsupported ,
101185 format ! ( "suffix not defined for {codec:?}" ) ,
102186 ) ) ,
@@ -123,7 +207,10 @@ mod tests {
123207 async fn test_compression_codec_compress ( ) {
124208 let bytes_vec = [ 0_u8 ; 100 ] . to_vec ( ) ;
125209
126- let compression_codecs = [ CompressionCodec :: Zstd , CompressionCodec :: Gzip ] ;
210+ let compression_codecs = [
211+ CompressionCodec :: zstd_default ( ) ,
212+ CompressionCodec :: gzip_default ( ) ,
213+ ] ;
127214
128215 for codec in compression_codecs {
129216 let compressed = codec. compress ( bytes_vec. clone ( ) ) . unwrap ( ) ;
@@ -135,7 +222,10 @@ mod tests {
135222
136223 #[ tokio:: test]
137224 async fn test_compression_codec_unsupported ( ) {
138- let unsupported_codecs = [ ( CompressionCodec :: Lz4 , "LZ4" ) ] ;
225+ let unsupported_codecs = [
226+ ( CompressionCodec :: Lz4 , "LZ4" ) ,
227+ ( CompressionCodec :: Snappy , "Snappy" ) ,
228+ ] ;
139229 let bytes_vec = [ 0_u8 ; 100 ] . to_vec ( ) ;
140230
141231 for ( codec, name) in unsupported_codecs {
@@ -153,18 +243,34 @@ mod tests {
153243
154244 #[ test]
155245 fn test_suffix ( ) {
156- // Test supported codecs
157246 assert_eq ! ( CompressionCodec :: None . suffix( ) . unwrap( ) , "" ) ;
158- assert_eq ! ( CompressionCodec :: Gzip . suffix( ) . unwrap( ) , ".gz" ) ;
247+ assert_eq ! ( CompressionCodec :: gzip_default ( ) . suffix( ) . unwrap( ) , ".gz" ) ;
159248
160- // Test unsupported codecs return errors
161249 assert ! ( CompressionCodec :: Lz4 . suffix( ) . is_err( ) ) ;
162- assert ! ( CompressionCodec :: Zstd . suffix( ) . is_err( ) ) ;
250+ assert ! ( CompressionCodec :: zstd_default( ) . suffix( ) . is_err( ) ) ;
251+ assert ! ( CompressionCodec :: Snappy . suffix( ) . is_err( ) ) ;
163252
164253 let lz4_err = CompressionCodec :: Lz4 . suffix ( ) . unwrap_err ( ) ;
165254 assert ! ( lz4_err. to_string( ) . contains( "suffix not defined for Lz4" ) ) ;
166255
167- let zstd_err = CompressionCodec :: Zstd . suffix ( ) . unwrap_err ( ) ;
256+ let zstd_err = CompressionCodec :: zstd_default ( ) . suffix ( ) . unwrap_err ( ) ;
168257 assert ! ( zstd_err. to_string( ) . contains( "suffix not defined for Zstd" ) ) ;
169258 }
259+
260+ #[ test]
261+ fn test_display ( ) {
262+ assert_eq ! ( CompressionCodec :: None . to_string( ) , "None" ) ;
263+ assert_eq ! ( CompressionCodec :: Lz4 . to_string( ) , "Lz4" ) ;
264+ assert_eq ! (
265+ CompressionCodec :: zstd_default( ) . to_string( ) ,
266+ "Zstd(level=3)"
267+ ) ;
268+ assert_eq ! ( CompressionCodec :: Zstd ( 5 ) . to_string( ) , "Zstd(level=5)" ) ;
269+ assert_eq ! (
270+ CompressionCodec :: gzip_default( ) . to_string( ) ,
271+ "Gzip(level=6)"
272+ ) ;
273+ assert_eq ! ( CompressionCodec :: Gzip ( 9 ) . to_string( ) , "Gzip(level=9)" ) ;
274+ assert_eq ! ( CompressionCodec :: Snappy . to_string( ) , "Snappy" ) ;
275+ }
170276}
0 commit comments