@@ -23,6 +23,7 @@ use vortex_array::dtype::StructFields;
2323use vortex_array:: expr:: Expression ;
2424use vortex_array:: expr:: StatsCatalog ;
2525use vortex_array:: expr:: lit;
26+ use vortex_array:: expr:: pruning:: ROW_COUNT_FIELD ;
2627use vortex_array:: expr:: stats:: Stat ;
2728use vortex_array:: scalar:: Scalar ;
2829use vortex_array:: scalar_fn:: fns:: literal:: Literal ;
@@ -115,6 +116,11 @@ impl FileStatsLayoutReader {
115116/// Implements [`StatsCatalog`] to provide file-level stats to expressions during pruning evaluation.
116117impl StatsCatalog for FileStatsLayoutReader {
117118 fn stats_ref ( & self , field_path : & FieldPath , stat : Stat ) -> Option < Expression > {
119+ if field_path. parts ( ) . len ( ) == 1 && field_path. parts ( ) [ 0 ] . as_name ( ) == Some ( ROW_COUNT_FIELD )
120+ {
121+ return Some ( lit ( self . child . row_count ( ) ) ) ;
122+ }
123+
118124 // FileStats currently only holds top-level field statistics.
119125 if field_path. parts ( ) . len ( ) != 1 {
120126 return None ;
@@ -126,7 +132,8 @@ impl StatsCatalog for FileStatsLayoutReader {
126132
127133 let stat_value = field_stats. get ( stat) ?. as_exact ( ) ?;
128134 let field_dtype = self . struct_fields . field_by_index ( field_idx) ?;
129- let stat_scalar = Scalar :: try_new ( field_dtype, Some ( stat_value) ) . ok ( ) ?;
135+ let stat_dtype = stat. dtype ( & field_dtype) ?;
136+ let stat_scalar = Scalar :: try_new ( stat_dtype, Some ( stat_value) ) . ok ( ) ?;
130137
131138 Some ( lit ( stat_scalar) )
132139 }
@@ -209,12 +216,14 @@ mod tests {
209216
210217 use vortex_array:: ArrayContext ;
211218 use vortex_array:: IntoArray as _;
219+ use vortex_array:: arrays:: PrimitiveArray ;
212220 use vortex_array:: arrays:: StructArray ;
213221 use vortex_array:: dtype:: DType ;
214222 use vortex_array:: dtype:: Nullability ;
215223 use vortex_array:: dtype:: PType ;
216224 use vortex_array:: expr:: get_item;
217225 use vortex_array:: expr:: gt;
226+ use vortex_array:: expr:: is_not_null;
218227 use vortex_array:: expr:: lit;
219228 use vortex_array:: expr:: root;
220229 use vortex_array:: expr:: stats:: Precision ;
@@ -259,6 +268,18 @@ mod tests {
259268 )
260269 }
261270
271+ fn test_file_null_count_stats ( null_count : u64 ) -> FileStatistics {
272+ let mut stats = StatsSet :: default ( ) ;
273+ stats. set (
274+ Stat :: NullCount ,
275+ Precision :: exact ( ScalarValue :: from ( null_count) ) ,
276+ ) ;
277+ FileStatistics :: new (
278+ Arc :: from ( [ stats] ) ,
279+ Arc :: from ( [ DType :: Primitive ( PType :: I32 , Nullability :: Nullable ) ] ) ,
280+ )
281+ }
282+
262283 #[ test]
263284 fn pruning_when_filter_out_of_range ( ) -> VortexResult < ( ) > {
264285 block_on ( |handle| async {
@@ -337,4 +358,47 @@ mod tests {
337358 Ok ( ( ) )
338359 } )
339360 }
361+
362+ #[ test]
363+ fn pruning_is_not_null_when_file_is_all_null ( ) -> VortexResult < ( ) > {
364+ block_on ( |handle| async {
365+ let session = SESSION . clone ( ) . with_handle ( handle) ;
366+ let ctx = ArrayContext :: empty ( ) ;
367+ let segments = Arc :: new ( TestSegments :: default ( ) ) ;
368+ let ( ptr, eof) = SequenceId :: root ( ) . split ( ) ;
369+ let struct_array = StructArray :: from_fields (
370+ [ (
371+ "col" ,
372+ PrimitiveArray :: from_option_iter ( [ None :: < i32 > , None , None , None , None ] )
373+ . into_array ( ) ,
374+ ) ]
375+ . as_slice ( ) ,
376+ ) ?;
377+ let strategy = TableStrategy :: new (
378+ Arc :: new ( FlatLayoutStrategy :: default ( ) ) ,
379+ Arc :: new ( FlatLayoutStrategy :: default ( ) ) ,
380+ ) ;
381+ let layout = strategy
382+ . write_stream (
383+ ctx,
384+ Arc :: < TestSegments > :: clone ( & segments) ,
385+ struct_array. into_array ( ) . to_array_stream ( ) . sequenced ( ptr) ,
386+ eof,
387+ & session,
388+ )
389+ . await ?;
390+
391+ let child = layout. new_reader ( "" . into ( ) , segments, & SESSION ) ?;
392+
393+ let reader =
394+ FileStatsLayoutReader :: new ( child, test_file_null_count_stats ( 5 ) , SESSION . clone ( ) ) ;
395+
396+ let expr = is_not_null ( get_item ( "col" , root ( ) ) ) ;
397+ let mask = Mask :: new_true ( 5 ) ;
398+ let result = reader. pruning_evaluation ( & ( 0 ..5 ) , & expr, mask) ?. await ?;
399+ assert_eq ! ( result, Mask :: new_false( 5 ) ) ;
400+
401+ Ok ( ( ) )
402+ } )
403+ }
340404}
0 commit comments