@@ -21,7 +21,7 @@ use std::sync::Arc;
2121
2222use crate :: {
2323 _internal_datafusion_err, DataFusionError , Result ,
24- config:: { ParquetOptions , TableParquetOptions } ,
24+ config:: { ParquetCdcOptions , ParquetOptions , TableParquetOptions } ,
2525} ;
2626
2727use arrow:: datatypes:: Schema ;
@@ -166,6 +166,42 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
166166 }
167167}
168168
169+ /// Convert DataFusion's [`ParquetCdcOptions`] into parquet-rs's `Option<CdcOptions>`.
170+ ///
171+ /// parquet-rs has no `enabled` flag; CDC is on when the option is `Some`. So a
172+ /// disabled [`ParquetCdcOptions`] maps to `None`, and an enabled one to `Some`
173+ /// with the chunking parameters.
174+ impl From < & ParquetCdcOptions > for Option < parquet:: file:: properties:: CdcOptions > {
175+ fn from ( value : & ParquetCdcOptions ) -> Self {
176+ value
177+ . enabled
178+ . then_some ( parquet:: file:: properties:: CdcOptions {
179+ min_chunk_size : value. min_chunk_size ,
180+ max_chunk_size : value. max_chunk_size ,
181+ norm_level : value. norm_level ,
182+ } )
183+ }
184+ }
185+
186+ /// Convert parquet-rs's `Option<&CdcOptions>` back into DataFusion's
187+ /// [`ParquetCdcOptions`].
188+ ///
189+ /// The presence of parquet-rs options means CDC was enabled, so `Some` maps to
190+ /// `enabled: true`; `None` yields the disabled default.
191+ impl From < Option < & parquet:: file:: properties:: CdcOptions > > for ParquetCdcOptions {
192+ fn from ( value : Option < & parquet:: file:: properties:: CdcOptions > ) -> Self {
193+ match value {
194+ Some ( cdc) => ParquetCdcOptions {
195+ enabled : true ,
196+ min_chunk_size : cdc. min_chunk_size ,
197+ max_chunk_size : cdc. max_chunk_size ,
198+ norm_level : cdc. norm_level ,
199+ } ,
200+ None => ParquetCdcOptions :: default ( ) ,
201+ }
202+ }
203+ }
204+
169205impl ParquetOptions {
170206 /// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
171207 ///
@@ -249,27 +285,7 @@ impl ParquetOptions {
249285 if let Some ( encoding) = encoding {
250286 builder = builder. set_encoding ( parse_encoding_string ( encoding) ?) ;
251287 }
252- if content_defined_chunking. enabled {
253- let cdc = content_defined_chunking;
254- if cdc. min_chunk_size == 0 {
255- return Err ( DataFusionError :: Configuration (
256- "CDC min_chunk_size must be greater than 0" . to_string ( ) ,
257- ) ) ;
258- }
259- if cdc. max_chunk_size <= cdc. min_chunk_size {
260- return Err ( DataFusionError :: Configuration ( format ! (
261- "CDC max_chunk_size ({}) must be greater than min_chunk_size ({})" ,
262- cdc. max_chunk_size, cdc. min_chunk_size
263- ) ) ) ;
264- }
265- builder = builder. set_content_defined_chunking ( Some (
266- parquet:: file:: properties:: CdcOptions {
267- min_chunk_size : cdc. min_chunk_size ,
268- max_chunk_size : cdc. max_chunk_size ,
269- norm_level : cdc. norm_level ,
270- } ,
271- ) ) ;
272- }
288+ builder = builder. set_content_defined_chunking ( content_defined_chunking. into ( ) ) ;
273289
274290 Ok ( builder)
275291 }
@@ -412,7 +428,7 @@ mod tests {
412428 #[ cfg( feature = "parquet_encryption" ) ]
413429 use crate :: config:: ConfigFileEncryptionProperties ;
414430 use crate :: config:: {
415- CdcOptions , ParquetColumnOptions , ParquetEncryptionOptions , ParquetOptions ,
431+ ParquetCdcOptions , ParquetColumnOptions , ParquetEncryptionOptions , ParquetOptions ,
416432 } ;
417433 use crate :: parquet_config:: DFParquetWriterVersion ;
418434 use parquet:: basic:: Compression ;
@@ -604,15 +620,7 @@ mod tests {
604620 skip_arrow_metadata : global_options_defaults. skip_arrow_metadata ,
605621 coerce_int96 : None ,
606622 coerce_int96_tz : None ,
607- content_defined_chunking : props
608- . content_defined_chunking ( )
609- . map ( |c| CdcOptions {
610- enabled : true ,
611- min_chunk_size : c. min_chunk_size ,
612- max_chunk_size : c. max_chunk_size ,
613- norm_level : c. norm_level ,
614- } )
615- . unwrap_or_default ( ) ,
623+ content_defined_chunking : props. content_defined_chunking ( ) . into ( ) ,
616624 } ,
617625 column_specific_options,
618626 key_value_metadata,
@@ -826,7 +834,7 @@ mod tests {
826834 #[ test]
827835 fn test_cdc_enabled_with_custom_options ( ) {
828836 let mut opts = TableParquetOptions :: default ( ) ;
829- opts. global . content_defined_chunking = CdcOptions {
837+ opts. global . content_defined_chunking = ParquetCdcOptions {
830838 enabled : true ,
831839 min_chunk_size : 128 * 1024 ,
832840 max_chunk_size : 512 * 1024 ,
@@ -854,7 +862,7 @@ mod tests {
854862 fn test_cdc_params_ignored_when_disabled ( ) {
855863 // Parameters are customized but `enabled` is false, so CDC stays off.
856864 let mut opts = TableParquetOptions :: default ( ) ;
857- opts. global . content_defined_chunking = CdcOptions {
865+ opts. global . content_defined_chunking = ParquetCdcOptions {
858866 enabled : false ,
859867 min_chunk_size : 128 * 1024 ,
860868 max_chunk_size : 512 * 1024 ,
@@ -869,7 +877,7 @@ mod tests {
869877 #[ test]
870878 fn test_cdc_round_trip_through_writer_props ( ) {
871879 let mut opts = TableParquetOptions :: default ( ) ;
872- opts. global . content_defined_chunking = CdcOptions {
880+ opts. global . content_defined_chunking = ParquetCdcOptions {
873881 enabled : true ,
874882 min_chunk_size : 64 * 1024 ,
875883 max_chunk_size : 2 * 1024 * 1024 ,
@@ -887,31 +895,6 @@ mod tests {
887895 assert_eq ! ( cdc. norm_level, -1 ) ;
888896 }
889897
890- #[ test]
891- fn test_cdc_validation_zero_min_chunk_size ( ) {
892- let mut opts = TableParquetOptions :: default ( ) ;
893- opts. global . content_defined_chunking = CdcOptions {
894- enabled : true ,
895- min_chunk_size : 0 ,
896- ..CdcOptions :: default ( )
897- } ;
898- opts. arrow_schema ( & Arc :: new ( Schema :: empty ( ) ) ) ;
899- assert ! ( WriterPropertiesBuilder :: try_from( & opts) . is_err( ) ) ;
900- }
901-
902- #[ test]
903- fn test_cdc_validation_max_not_greater_than_min ( ) {
904- let mut opts = TableParquetOptions :: default ( ) ;
905- opts. global . content_defined_chunking = CdcOptions {
906- enabled : true ,
907- min_chunk_size : 512 * 1024 ,
908- max_chunk_size : 256 * 1024 ,
909- ..CdcOptions :: default ( )
910- } ;
911- opts. arrow_schema ( & Arc :: new ( Schema :: empty ( ) ) ) ;
912- assert ! ( WriterPropertiesBuilder :: try_from( & opts) . is_err( ) ) ;
913- }
914-
915898 #[ test]
916899 fn test_bloom_filter_set_ndv_only ( ) {
917900 // the TableParquetOptions::default, with only ndv set
0 commit comments