@@ -11,30 +11,32 @@ use vortex_array::ExecutionCtx;
1111use vortex_array:: IntoArray ;
1212use vortex_array:: LEGACY_SESSION ;
1313use vortex_array:: VortexSessionExecute ;
14+ use vortex_array:: aggregate_fn:: fns:: sum:: sum;
1415use vortex_array:: arrays:: ConstantArray ;
1516use vortex_array:: arrays:: StructArray ;
17+ use vortex_array:: arrays:: struct_:: StructArrayExt ;
1618use vortex_array:: builders:: ArrayBuilder ;
1719use vortex_array:: builders:: BoolBuilder ;
1820use vortex_array:: builders:: builder_with_capacity;
1921use vortex_array:: dtype:: DType ;
2022use vortex_array:: dtype:: FieldName ;
2123use vortex_array:: dtype:: Nullability ;
24+ use vortex_array:: dtype:: PType ;
2225use vortex_array:: expr:: stats:: Precision ;
2326use vortex_array:: expr:: stats:: Stat ;
2427use vortex_array:: expr:: stats:: StatsProvider ;
2528use vortex_array:: scalar:: Scalar ;
2629use vortex_array:: scalar:: ScalarTruncation ;
2730use vortex_array:: scalar:: lower_bound;
2831use vortex_array:: scalar:: upper_bound;
32+ use vortex_array:: stats:: StatsSet ;
2933use vortex_array:: validity:: Validity ;
3034use vortex_buffer:: BufferString ;
3135use vortex_buffer:: ByteBuffer ;
32- use vortex_error:: VortexExpect ;
3336use vortex_error:: VortexResult ;
3437
3538use crate :: layouts:: zoned:: schema:: MAX_IS_TRUNCATED ;
3639use crate :: layouts:: zoned:: schema:: MIN_IS_TRUNCATED ;
37- use crate :: layouts:: zoned:: zone_map:: ZoneMap ;
3840
3941/// Accumulates write-time statistics for each logical zone.
4042pub struct StatsAccumulator {
@@ -88,14 +90,9 @@ impl StatsAccumulator {
8890 Ok ( ( ) )
8991 }
9092
91- /// Finishes the accumulator into a [`ZoneMap`].
92- ///
93- /// Returns `None` if none of the requested statistics can be computed, for example they are
94- /// not applicable to the column's data type.
95- pub fn as_stats_table ( & mut self ) -> VortexResult < Option < ZoneMap > > {
93+ pub fn as_array ( & mut self ) -> VortexResult < Option < StructArray > > {
9694 let mut names = Vec :: new ( ) ;
9795 let mut fields = Vec :: new ( ) ;
98- let mut stats = Vec :: new ( ) ;
9996
10097 for builder in self
10198 . builders
@@ -110,7 +107,6 @@ impl StatsAccumulator {
110107 continue ;
111108 }
112109
113- stats. push ( builder. stat ( ) ) ;
114110 names. extend ( values. names ) ;
115111 fields. extend ( values. arrays ) ;
116112 }
@@ -119,15 +115,49 @@ impl StatsAccumulator {
119115 return Ok ( None ) ;
120116 }
121117
122- let array = StructArray :: try_new ( names. into ( ) , fields, self . length , Validity :: NonNullable )
123- . vortex_expect ( "Failed to create zone map" ) ;
124- let stats = stats. into ( ) ;
118+ StructArray :: try_new ( names. into ( ) , fields, self . length , Validity :: NonNullable ) . map ( Some )
119+ }
125120
126- // SAFETY: `StatsAccumulator` builds the struct fields from `stats_builder_with_capacity`
127- // using the same field-ordering and truncation-column rules as `stats_table_dtype`.
128- // The `stats` list is collected in that same sorted order, so the resulting struct array
129- // matches the expected zoned stats-table dtype by construction.
130- Ok ( Some ( unsafe { ZoneMap :: new_unchecked ( array, stats) } ) )
121+ /// Returns an aggregated stats set for the table.
122+ pub fn as_stats_set (
123+ & mut self ,
124+ stats : & [ Stat ] ,
125+ ctx : & mut ExecutionCtx ,
126+ ) -> VortexResult < StatsSet > {
127+ let mut stats_set = StatsSet :: default ( ) ;
128+ let Some ( array) = self . as_array ( ) ? else {
129+ return Ok ( stats_set) ;
130+ } ;
131+
132+ for & stat in stats {
133+ let Some ( array) = array. unmasked_field_by_name_opt ( stat. name ( ) ) else {
134+ continue ;
135+ } ;
136+
137+ // Different stats need different aggregations
138+ match stat {
139+ // For stats that are associative, we can just compute them over the stat column
140+ Stat :: Min | Stat :: Max | Stat :: Sum => {
141+ if let Some ( s) = array. statistics ( ) . compute_stat ( stat, ctx) ?
142+ && let Some ( v) = s. into_value ( )
143+ {
144+ stats_set. set ( stat, Precision :: exact ( v) )
145+ }
146+ }
147+ // These stats sum up
148+ Stat :: NullCount | Stat :: NaNCount | Stat :: UncompressedSizeInBytes => {
149+ if let Some ( sum_value) = sum ( array, ctx) ?
150+ . cast ( & DType :: Primitive ( PType :: U64 , Nullability :: Nullable ) ) ?
151+ . into_value ( )
152+ {
153+ stats_set. set ( stat, Precision :: exact ( sum_value) ) ;
154+ }
155+ }
156+ // We could implement these aggregations in the future, but for now they're unused
157+ Stat :: IsConstant | Stat :: IsSorted | Stat :: IsStrictSorted => { }
158+ }
159+ }
160+ Ok ( stats_set)
131161 }
132162}
133163
@@ -385,12 +415,9 @@ mod tests {
385415 . vortex_expect ( "push_chunk should succeed for test data" ) ;
386416 acc. push_chunk ( & builder2. finish ( ) , & mut ctx)
387417 . vortex_expect ( "push_chunk should succeed for test data" ) ;
388- let stats_table = acc
389- . as_stats_table ( )
390- . unwrap ( )
391- . expect ( "Must have stats table" ) ;
418+ let stats_table = acc. as_array ( ) . unwrap ( ) . expect ( "Must have stats table" ) ;
392419 assert_eq ! (
393- stats_table. array ( ) . names( ) . as_ref( ) ,
420+ stats_table. names( ) . as_ref( ) ,
394421 & [
395422 Stat :: Max . name( ) ,
396423 MAX_IS_TRUNCATED ,
@@ -399,7 +426,6 @@ mod tests {
399426 ]
400427 ) ;
401428 let field1_bool = stats_table
402- . array ( )
403429 . unmasked_field ( 1 )
404430 . clone ( )
405431 . execute :: < BoolArray > ( & mut ctx)
@@ -409,7 +435,6 @@ mod tests {
409435 BitBuffer :: from( vec![ false , true ] )
410436 ) ;
411437 let field3_bool = stats_table
412- . array ( )
413438 . unmasked_field ( 3 )
414439 . clone ( )
415440 . execute :: < BoolArray > ( & mut ctx)
@@ -427,12 +452,9 @@ mod tests {
427452 let mut acc = StatsAccumulator :: new ( array. dtype ( ) , & [ Stat :: Max , Stat :: Min , Stat :: Sum ] , 12 ) ;
428453 acc. push_chunk ( & array, & mut ctx)
429454 . vortex_expect ( "push_chunk should succeed for test array" ) ;
430- let stats_table = acc
431- . as_stats_table ( )
432- . unwrap ( )
433- . expect ( "Must have stats table" ) ;
455+ let stats_table = acc. as_array ( ) . unwrap ( ) . expect ( "Must have stats table" ) ;
434456 assert_eq ! (
435- stats_table. array ( ) . names( ) . as_ref( ) ,
457+ stats_table. names( ) . as_ref( ) ,
436458 & [
437459 Stat :: Max . name( ) ,
438460 MAX_IS_TRUNCATED ,
@@ -442,14 +464,12 @@ mod tests {
442464 ]
443465 ) ;
444466 let field1_bool = stats_table
445- . array ( )
446467 . unmasked_field ( 1 )
447468 . clone ( )
448469 . execute :: < BoolArray > ( & mut ctx)
449470 . unwrap ( ) ;
450471 assert_eq ! ( field1_bool. to_bit_buffer( ) , BitBuffer :: from( vec![ false ] ) ) ;
451472 let field3_bool = stats_table
452- . array ( )
453473 . unmasked_field ( 3 )
454474 . clone ( )
455475 . execute :: < BoolArray > ( & mut ctx)
0 commit comments