@@ -2444,4 +2444,137 @@ mod tests {
24442444 ) ;
24452445 Ok ( ( ) )
24462446 }
2447+
2448+ #[ tokio:: test]
2449+ async fn test_filter_statistics_equality_int8_ndv ( ) -> Result < ( ) > {
2450+ // a: min=-100, max=100, ndv=50
2451+ let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int8 , false ) ] ) ;
2452+ let input = Arc :: new ( StatisticsExec :: new (
2453+ Statistics {
2454+ num_rows : Precision :: Inexact ( 100 ) ,
2455+ total_byte_size : Precision :: Inexact ( 100 ) ,
2456+ column_statistics : vec ! [ ColumnStatistics {
2457+ min_value: Precision :: Inexact ( ScalarValue :: Int8 ( Some ( -100 ) ) ) ,
2458+ max_value: Precision :: Inexact ( ScalarValue :: Int8 ( Some ( 100 ) ) ) ,
2459+ distinct_count: Precision :: Inexact ( 50 ) ,
2460+ ..Default :: default ( )
2461+ } ] ,
2462+ } ,
2463+ schema. clone ( ) ,
2464+ ) ) ;
2465+
2466+ let predicate = Arc :: new ( BinaryExpr :: new (
2467+ Arc :: new ( Column :: new ( "a" , 0 ) ) ,
2468+ Operator :: Eq ,
2469+ Arc :: new ( Literal :: new ( ScalarValue :: Int8 ( Some ( 42 ) ) ) ) ,
2470+ ) ) ;
2471+ let filter: Arc < dyn ExecutionPlan > =
2472+ Arc :: new ( FilterExec :: try_new ( predicate, input) ?) ;
2473+ let statistics = filter. partition_statistics ( None ) ?;
2474+ assert_eq ! (
2475+ statistics. column_statistics[ 0 ] . distinct_count,
2476+ Precision :: Exact ( 1 )
2477+ ) ;
2478+ Ok ( ( ) )
2479+ }
2480+
2481+ #[ tokio:: test]
2482+ async fn test_filter_statistics_equality_int64_ndv ( ) -> Result < ( ) > {
2483+ // a: min=0, max=1_000_000, ndv=100_000
2484+ let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int64 , false ) ] ) ;
2485+ let input = Arc :: new ( StatisticsExec :: new (
2486+ Statistics {
2487+ num_rows : Precision :: Inexact ( 100_000 ) ,
2488+ total_byte_size : Precision :: Inexact ( 800_000 ) ,
2489+ column_statistics : vec ! [ ColumnStatistics {
2490+ min_value: Precision :: Inexact ( ScalarValue :: Int64 ( Some ( 0 ) ) ) ,
2491+ max_value: Precision :: Inexact ( ScalarValue :: Int64 ( Some ( 1_000_000 ) ) ) ,
2492+ distinct_count: Precision :: Inexact ( 100_000 ) ,
2493+ ..Default :: default ( )
2494+ } ] ,
2495+ } ,
2496+ schema. clone ( ) ,
2497+ ) ) ;
2498+
2499+ let predicate = Arc :: new ( BinaryExpr :: new (
2500+ Arc :: new ( Column :: new ( "a" , 0 ) ) ,
2501+ Operator :: Eq ,
2502+ Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 42 ) ) ) ) ,
2503+ ) ) ;
2504+ let filter: Arc < dyn ExecutionPlan > =
2505+ Arc :: new ( FilterExec :: try_new ( predicate, input) ?) ;
2506+ let statistics = filter. partition_statistics ( None ) ?;
2507+ assert_eq ! (
2508+ statistics. column_statistics[ 0 ] . distinct_count,
2509+ Precision :: Exact ( 1 )
2510+ ) ;
2511+ Ok ( ( ) )
2512+ }
2513+
2514+ #[ tokio:: test]
2515+ async fn test_filter_statistics_equality_float32_ndv ( ) -> Result < ( ) > {
2516+ // a: min=0.0, max=100.0, ndv=50
2517+ let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Float32 , false ) ] ) ;
2518+ let input = Arc :: new ( StatisticsExec :: new (
2519+ Statistics {
2520+ num_rows : Precision :: Inexact ( 100 ) ,
2521+ total_byte_size : Precision :: Inexact ( 400 ) ,
2522+ column_statistics : vec ! [ ColumnStatistics {
2523+ min_value: Precision :: Inexact ( ScalarValue :: Float32 ( Some ( 0.0 ) ) ) ,
2524+ max_value: Precision :: Inexact ( ScalarValue :: Float32 ( Some ( 100.0 ) ) ) ,
2525+ distinct_count: Precision :: Inexact ( 50 ) ,
2526+ ..Default :: default ( )
2527+ } ] ,
2528+ } ,
2529+ schema. clone ( ) ,
2530+ ) ) ;
2531+
2532+ let predicate = Arc :: new ( BinaryExpr :: new (
2533+ Arc :: new ( Column :: new ( "a" , 0 ) ) ,
2534+ Operator :: Eq ,
2535+ Arc :: new ( Literal :: new ( ScalarValue :: Float32 ( Some ( 3.14 ) ) ) ) ,
2536+ ) ) ;
2537+ let filter: Arc < dyn ExecutionPlan > =
2538+ Arc :: new ( FilterExec :: try_new ( predicate, input) ?) ;
2539+ let statistics = filter. partition_statistics ( None ) ?;
2540+ assert_eq ! (
2541+ statistics. column_statistics[ 0 ] . distinct_count,
2542+ Precision :: Exact ( 1 )
2543+ ) ;
2544+ Ok ( ( ) )
2545+ }
2546+
2547+ #[ tokio:: test]
2548+ async fn test_filter_statistics_equality_reversed_ndv ( ) -> Result < ( ) > {
2549+ // a: min=1, max=100, ndv=80
2550+ let schema = Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ;
2551+ let input = Arc :: new ( StatisticsExec :: new (
2552+ Statistics {
2553+ num_rows : Precision :: Inexact ( 100 ) ,
2554+ total_byte_size : Precision :: Inexact ( 400 ) ,
2555+ column_statistics : vec ! [ ColumnStatistics {
2556+ min_value: Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
2557+ max_value: Precision :: Inexact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
2558+ distinct_count: Precision :: Inexact ( 80 ) ,
2559+ ..Default :: default ( )
2560+ } ] ,
2561+ } ,
2562+ schema. clone ( ) ,
2563+ ) ) ;
2564+
2565+ // 42 = a (literal on the left)
2566+ let predicate = Arc :: new ( BinaryExpr :: new (
2567+ Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) ,
2568+ Operator :: Eq ,
2569+ Arc :: new ( Column :: new ( "a" , 0 ) ) ,
2570+ ) ) ;
2571+ let filter: Arc < dyn ExecutionPlan > =
2572+ Arc :: new ( FilterExec :: try_new ( predicate, input) ?) ;
2573+ let statistics = filter. partition_statistics ( None ) ?;
2574+ assert_eq ! (
2575+ statistics. column_statistics[ 0 ] . distinct_count,
2576+ Precision :: Exact ( 1 )
2577+ ) ;
2578+ Ok ( ( ) )
2579+ }
24472580}
0 commit comments