@@ -126,7 +126,10 @@ impl StatsCatalog for FileStatsLayoutReader {
126126
127127 let stat_value = field_stats. get ( stat) ?. as_exact ( ) ?;
128128 let field_dtype = self . struct_fields . field_by_index ( field_idx) ?;
129- let stat_scalar = Scalar :: try_new ( field_dtype, Some ( stat_value) ) . ok ( ) ?;
129+ // Use the stat's own dtype rather than the field dtype. For example,
130+ // NullCount is always u64 regardless of the column type.
131+ let stat_dtype = stat. dtype ( & field_dtype) ?;
132+ let stat_scalar = Scalar :: try_new ( stat_dtype, Some ( stat_value) ) . ok ( ) ?;
130133
131134 Some ( lit ( stat_scalar) )
132135 }
@@ -209,16 +212,20 @@ mod tests {
209212
210213 use vortex_array:: ArrayContext ;
211214 use vortex_array:: IntoArray as _;
215+ use vortex_array:: arrays:: PrimitiveArray ;
212216 use vortex_array:: arrays:: StructArray ;
217+ use vortex_array:: arrays:: datetime:: TemporalData ;
213218 use vortex_array:: dtype:: DType ;
214219 use vortex_array:: dtype:: Nullability ;
215220 use vortex_array:: dtype:: PType ;
216221 use vortex_array:: expr:: get_item;
217222 use vortex_array:: expr:: gt;
223+ use vortex_array:: expr:: is_null;
218224 use vortex_array:: expr:: lit;
219225 use vortex_array:: expr:: root;
220226 use vortex_array:: expr:: stats:: Precision ;
221227 use vortex_array:: expr:: stats:: Stat ;
228+ use vortex_array:: extension:: datetime:: TimeUnit ;
222229 use vortex_array:: scalar:: ScalarValue ;
223230 use vortex_array:: scalar_fn:: session:: ScalarFnSession ;
224231 use vortex_array:: session:: ArraySession ;
@@ -337,4 +344,58 @@ mod tests {
337344 Ok ( ( ) )
338345 } )
339346 }
347+
348+ /// Regression test: `IS NULL` on a nullable timestamp column must not fail with a
349+ /// dtype mismatch. The bug was that `stats_ref` used the *field* dtype (timestamp)
350+ /// for the `NullCount` stat scalar instead of the stat's own dtype (u64).
351+ #[ test]
352+ fn is_null_pruning_on_nullable_timestamp_column ( ) -> VortexResult < ( ) > {
353+ block_on ( |handle| async {
354+ let ctx = ArrayContext :: empty ( ) ;
355+ let segments = Arc :: new ( TestSegments :: default ( ) ) ;
356+ let ( ptr, eof) = SequenceId :: root ( ) . split ( ) ;
357+
358+ // Build a struct with a nullable timestamp column containing some nulls.
359+ let prim_array =
360+ PrimitiveArray :: from_option_iter ( [ Some ( 1_000_000i64 ) , None , Some ( 3_000_000 ) ] )
361+ . into_array ( ) ;
362+ let ts_data = TemporalData :: new_timestamp ( prim_array, TimeUnit :: Microseconds , None ) ;
363+ let ts_dtype = ts_data. dtype ( ) . clone ( ) ;
364+ let ts_array = ts_data. into_array ( ) ;
365+
366+ let struct_array = StructArray :: from_fields ( [ ( "deleted_at" , ts_array) ] . as_slice ( ) ) ?;
367+
368+ let strategy = TableStrategy :: new (
369+ Arc :: new ( FlatLayoutStrategy :: default ( ) ) ,
370+ Arc :: new ( FlatLayoutStrategy :: default ( ) ) ,
371+ ) ;
372+ let layout = strategy
373+ . write_stream (
374+ ctx,
375+ segments. clone ( ) ,
376+ struct_array. into_array ( ) . to_array_stream ( ) . sequenced ( ptr) ,
377+ eof,
378+ handle,
379+ )
380+ . await ?;
381+
382+ let child = layout. new_reader ( "" . into ( ) , segments, & SESSION ) ?;
383+
384+ // File-level stats: 1 null in deleted_at.
385+ let mut stats = StatsSet :: default ( ) ;
386+ stats. set ( Stat :: NullCount , Precision :: exact ( ScalarValue :: from ( 1u64 ) ) ) ;
387+ let file_stats = FileStatistics :: new ( Arc :: from ( [ stats] ) , Arc :: from ( [ ts_dtype] ) ) ;
388+
389+ let reader = FileStatsLayoutReader :: new ( child, file_stats, SESSION . clone ( ) ) ;
390+
391+ // `is_null(deleted_at)` — should NOT panic or error due to dtype mismatch.
392+ let expr = is_null ( get_item ( "deleted_at" , root ( ) ) ) ;
393+ let mask = Mask :: new_true ( 3 ) ;
394+ let result = reader. pruning_evaluation ( & ( 0 ..3 ) , & expr, mask) ?. await ?;
395+ // null_count is 1 (non-zero), so is_null is not falsified => not pruned.
396+ assert_eq ! ( result, Mask :: new_true( 3 ) ) ;
397+
398+ Ok ( ( ) )
399+ } )
400+ }
340401}
0 commit comments