Skip to content

Commit 9ff12fb

Browse files
committed
Preserve NDV precision in max_distinct_count when cap is not triggered
1 parent b7e8e16 commit 9ff12fb

1 file changed

Lines changed: 79 additions & 1 deletion

File tree

  • datafusion/physical-plan/src/joins

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,13 @@ fn max_distinct_count(
713713
// NDV can never exceed the number of rows
714714
match num_rows {
715715
Precision::Absent => dc,
716-
_ => dc.min(num_rows).to_inexact(),
716+
_ => {
717+
if dc.get_value() <= num_rows.get_value() {
718+
dc
719+
} else {
720+
num_rows.to_inexact()
721+
}
722+
}
717723
}
718724
}
719725
_ => {
@@ -3195,4 +3201,76 @@ mod tests {
31953201
assert_eq!(cmp_nl.compare(0, 0), Ordering::Greater);
31963202
assert_eq!(cmp_nl.compare(1, 1), Ordering::Less);
31973203
}
3204+
3205+
#[test]
3206+
fn test_max_distinct_count_preserves_precision_when_not_capped() {
3207+
assert_eq!(
3208+
max_distinct_count(
3209+
&Exact(10),
3210+
&ColumnStatistics {
3211+
distinct_count: Exact(5),
3212+
..Default::default()
3213+
}
3214+
),
3215+
Exact(5)
3216+
);
3217+
assert_eq!(
3218+
max_distinct_count(
3219+
&Exact(10),
3220+
&ColumnStatistics {
3221+
distinct_count: Inexact(5),
3222+
..Default::default()
3223+
}
3224+
),
3225+
Inexact(5)
3226+
);
3227+
// Inexact num_rows does not affect an exact NDV that is within bounds
3228+
assert_eq!(
3229+
max_distinct_count(
3230+
&Inexact(10),
3231+
&ColumnStatistics {
3232+
distinct_count: Exact(5),
3233+
..Default::default()
3234+
}
3235+
),
3236+
Exact(5)
3237+
);
3238+
}
3239+
3240+
#[test]
3241+
fn test_max_distinct_count_demotes_to_inexact_when_capped() {
3242+
// Exact NDV > Exact num_rows is an illegal state (NDV <= num_rows is a
3243+
// mathematical invariant), but the code handles it defensively by
3244+
// capping and demoting to inexact
3245+
assert_eq!(
3246+
max_distinct_count(
3247+
&Exact(10),
3248+
&ColumnStatistics {
3249+
distinct_count: Exact(15),
3250+
..Default::default()
3251+
}
3252+
),
3253+
Inexact(10)
3254+
);
3255+
assert_eq!(
3256+
max_distinct_count(
3257+
&Inexact(10),
3258+
&ColumnStatistics {
3259+
distinct_count: Exact(15),
3260+
..Default::default()
3261+
}
3262+
),
3263+
Inexact(10)
3264+
);
3265+
assert_eq!(
3266+
max_distinct_count(
3267+
&Exact(10),
3268+
&ColumnStatistics {
3269+
distinct_count: Inexact(15),
3270+
..Default::default()
3271+
}
3272+
),
3273+
Inexact(10)
3274+
);
3275+
}
31983276
}

0 commit comments

Comments
 (0)