@@ -333,16 +333,16 @@ impl<'a> DFParquetMetadata<'a> {
333333 } ,
334334 ) ;
335335
336- get_col_stats (
337- logical_file_schema ,
338- & null_counts_array ,
339- & mut max_accs ,
340- & mut min_accs ,
341- & mut is_max_value_exact,
342- & mut is_min_value_exact ,
343- & column_byte_sizes ,
344- & distinct_counts_array ,
345- )
336+ let mut accumulators = StatisticsAccumulators {
337+ min_accs : & mut min_accs ,
338+ max_accs : & mut max_accs ,
339+ null_counts_array : & mut null_counts_array ,
340+ is_min_value_exact : & mut is_min_value_exact ,
341+ is_max_value_exact : & mut is_max_value_exact,
342+ column_byte_sizes : & mut column_byte_sizes ,
343+ distinct_counts_array : & mut distinct_counts_array ,
344+ } ;
345+ accumulators . build_column_statistics ( logical_file_schema )
346346 } else {
347347 // Record column sizes
348348 logical_file_schema
@@ -415,55 +415,6 @@ fn create_max_min_accs(
415415 ( max_values, min_values)
416416}
417417
418- #[ expect( clippy:: too_many_arguments) ]
419- fn get_col_stats (
420- schema : & Schema ,
421- null_counts : & [ Precision < usize > ] ,
422- max_values : & mut [ Option < MaxAccumulator > ] ,
423- min_values : & mut [ Option < MinAccumulator > ] ,
424- is_max_value_exact : & mut [ Option < bool > ] ,
425- is_min_value_exact : & mut [ Option < bool > ] ,
426- column_byte_sizes : & [ Precision < usize > ] ,
427- distinct_counts : & [ Precision < usize > ] ,
428- ) -> Vec < ColumnStatistics > {
429- ( 0 ..schema. fields ( ) . len ( ) )
430- . map ( |i| {
431- let max_value = match (
432- max_values. get_mut ( i) . unwrap ( ) ,
433- is_max_value_exact. get ( i) . unwrap ( ) ,
434- ) {
435- ( Some ( max_value) , Some ( true ) ) => {
436- max_value. evaluate ( ) . ok ( ) . map ( Precision :: Exact )
437- }
438- ( Some ( max_value) , Some ( false ) ) | ( Some ( max_value) , None ) => {
439- max_value. evaluate ( ) . ok ( ) . map ( Precision :: Inexact )
440- }
441- ( None , _) => None ,
442- } ;
443- let min_value = match (
444- min_values. get_mut ( i) . unwrap ( ) ,
445- is_min_value_exact. get ( i) . unwrap ( ) ,
446- ) {
447- ( Some ( min_value) , Some ( true ) ) => {
448- min_value. evaluate ( ) . ok ( ) . map ( Precision :: Exact )
449- }
450- ( Some ( min_value) , Some ( false ) ) | ( Some ( min_value) , None ) => {
451- min_value. evaluate ( ) . ok ( ) . map ( Precision :: Inexact )
452- }
453- ( None , _) => None ,
454- } ;
455- ColumnStatistics {
456- null_count : null_counts[ i] ,
457- max_value : max_value. unwrap_or ( Precision :: Absent ) ,
458- min_value : min_value. unwrap_or ( Precision :: Absent ) ,
459- sum_value : Precision :: Absent ,
460- distinct_count : distinct_counts[ i] ,
461- byte_size : column_byte_sizes[ i] ,
462- }
463- } )
464- . collect ( )
465- }
466-
467418/// Holds the accumulator state for collecting statistics from row groups
468419struct StatisticsAccumulators < ' a > {
469420 min_accs : & ' a mut [ Option < MinAccumulator > ] ,
@@ -475,6 +426,48 @@ struct StatisticsAccumulators<'a> {
475426 distinct_counts_array : & ' a mut [ Precision < usize > ] ,
476427}
477428
429+ impl StatisticsAccumulators < ' _ > {
430+ /// Converts the accumulated statistics into a vector of `ColumnStatistics`
431+ fn build_column_statistics ( & mut self , schema : & Schema ) -> Vec < ColumnStatistics > {
432+ ( 0 ..schema. fields ( ) . len ( ) )
433+ . map ( |i| {
434+ let max_value = match (
435+ self . max_accs . get_mut ( i) . unwrap ( ) ,
436+ self . is_max_value_exact . get ( i) . unwrap ( ) ,
437+ ) {
438+ ( Some ( max_value) , Some ( true ) ) => {
439+ max_value. evaluate ( ) . ok ( ) . map ( Precision :: Exact )
440+ }
441+ ( Some ( max_value) , Some ( false ) ) | ( Some ( max_value) , None ) => {
442+ max_value. evaluate ( ) . ok ( ) . map ( Precision :: Inexact )
443+ }
444+ ( None , _) => None ,
445+ } ;
446+ let min_value = match (
447+ self . min_accs . get_mut ( i) . unwrap ( ) ,
448+ self . is_min_value_exact . get ( i) . unwrap ( ) ,
449+ ) {
450+ ( Some ( min_value) , Some ( true ) ) => {
451+ min_value. evaluate ( ) . ok ( ) . map ( Precision :: Exact )
452+ }
453+ ( Some ( min_value) , Some ( false ) ) | ( Some ( min_value) , None ) => {
454+ min_value. evaluate ( ) . ok ( ) . map ( Precision :: Inexact )
455+ }
456+ ( None , _) => None ,
457+ } ;
458+ ColumnStatistics {
459+ null_count : self . null_counts_array [ i] ,
460+ max_value : max_value. unwrap_or ( Precision :: Absent ) ,
461+ min_value : min_value. unwrap_or ( Precision :: Absent ) ,
462+ sum_value : Precision :: Absent ,
463+ distinct_count : self . distinct_counts_array [ i] ,
464+ byte_size : self . column_byte_sizes [ i] ,
465+ }
466+ } )
467+ . collect ( )
468+ }
469+ }
470+
478471fn summarize_column_statistics (
479472 parquet_schema : & SchemaDescriptor ,
480473 logical_file_schema : & Schema ,
0 commit comments