File tree Expand file tree Collapse file tree
datafusion/physical-plan/src/joins Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -712,7 +712,13 @@ fn max_distinct_count(
712712 // NDV can never exceed the number of rows
713713 match num_rows {
714714 Precision :: Absent => dc,
715- _ => dc. min ( num_rows) . to_inexact ( ) ,
715+ _ => {
716+ if dc. get_value ( ) <= num_rows. get_value ( ) {
717+ dc
718+ } else {
719+ num_rows. to_inexact ( )
720+ }
721+ }
716722 }
717723 }
718724 _ => {
@@ -2974,4 +2980,76 @@ mod tests {
29742980 let result = max_distinct_count ( & num_rows, & stats) ;
29752981 assert_eq ! ( result, Exact ( 0 ) ) ;
29762982 }
2983+
2984+ #[ test]
2985+ fn test_max_distinct_count_preserves_precision_when_not_capped ( ) {
2986+ assert_eq ! (
2987+ max_distinct_count(
2988+ & Exact ( 10 ) ,
2989+ & ColumnStatistics {
2990+ distinct_count: Exact ( 5 ) ,
2991+ ..Default :: default ( )
2992+ }
2993+ ) ,
2994+ Exact ( 5 )
2995+ ) ;
2996+ assert_eq ! (
2997+ max_distinct_count(
2998+ & Exact ( 10 ) ,
2999+ & ColumnStatistics {
3000+ distinct_count: Inexact ( 5 ) ,
3001+ ..Default :: default ( )
3002+ }
3003+ ) ,
3004+ Inexact ( 5 )
3005+ ) ;
3006+ // Inexact num_rows does not affect an exact NDV that is within bounds
3007+ assert_eq ! (
3008+ max_distinct_count(
3009+ & Inexact ( 10 ) ,
3010+ & ColumnStatistics {
3011+ distinct_count: Exact ( 5 ) ,
3012+ ..Default :: default ( )
3013+ }
3014+ ) ,
3015+ Exact ( 5 )
3016+ ) ;
3017+ }
3018+
3019+ #[ test]
3020+ fn test_max_distinct_count_demotes_to_inexact_when_capped ( ) {
3021+ // Exact NDV > Exact num_rows is an illegal state (NDV <= num_rows is a
3022+ // mathematical invariant), but the code handles it defensively by
3023+ // capping and demoting to inexact
3024+ assert_eq ! (
3025+ max_distinct_count(
3026+ & Exact ( 10 ) ,
3027+ & ColumnStatistics {
3028+ distinct_count: Exact ( 15 ) ,
3029+ ..Default :: default ( )
3030+ }
3031+ ) ,
3032+ Inexact ( 10 )
3033+ ) ;
3034+ assert_eq ! (
3035+ max_distinct_count(
3036+ & Inexact ( 10 ) ,
3037+ & ColumnStatistics {
3038+ distinct_count: Exact ( 15 ) ,
3039+ ..Default :: default ( )
3040+ }
3041+ ) ,
3042+ Inexact ( 10 )
3043+ ) ;
3044+ assert_eq ! (
3045+ max_distinct_count(
3046+ & Exact ( 10 ) ,
3047+ & ColumnStatistics {
3048+ distinct_count: Inexact ( 15 ) ,
3049+ ..Default :: default ( )
3050+ }
3051+ ) ,
3052+ Inexact ( 10 )
3053+ ) ;
3054+ }
29773055}
You can’t perform that action at this time.
0 commit comments