Skip to content

Commit 991084b

Browse files
Dandandanclaude
andcommitted
fix: improve inner join cardinality estimation for FK joins
When distinct count statistics are absent, the join cardinality estimator falls back to using num_rows as the distinct count estimate. Previously it used max(left_distinct, right_distinct) as the selectivity denominator, which for a dimension-fact FK join like warehouse(5) ⋈ catalog_sales(1.4M) would compute: (5 * 1.4M) / max(5, 1.4M) = 5 rows — a severe underestimate. This caused the optimizer to keep the 1.4M-row fact table as the hash join build side (since it appeared to be "5 rows"), leading to massive concat_batches allocations and 100x+ slowdowns on queries like TPC-DS Q99. Fix: when no actual distinct count stats are available, use min(left_distinct, right_distinct) instead of max. This gives (5 * 1.4M) / min(5, 1.4M) = 1.4M — the correct FK join estimate. The optimizer then correctly swaps to put the small dimension table as the build side. Also handle the edge case where selectivity is 0 (one side has no non-null values): return 0 rows instead of None. TPC-DS Q99 improvement: 10.4s → 59ms (157x faster). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5ba06ac commit 991084b

File tree

1 file changed

+41
-13
lines changed
  • datafusion/physical-plan/src/joins

1 file changed

+41
-13
lines changed

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -610,19 +610,38 @@ fn estimate_inner_join_cardinality(
610610

611611
// The algorithm here is partly based on the non-histogram selectivity estimation
612612
// from Spark's Catalyst optimizer.
613+
//
614+
// For each join key column, estimate the number of distinct values and use
615+
// the maximum as the selectivity denominator: cardinality = (L * R) / max_distinct.
616+
//
617+
// When actual distinct_count statistics are available, use max(left, right)
618+
// as in Spark's algorithm. When falling back to num_rows (no distinct stats),
619+
// use min(left, right) instead. This handles the common FK join pattern:
620+
// e.g. warehouse(5) ⋈ catalog_sales(1.4M) should estimate ~1.4M, not 5.
621+
// Using max(5, 1.4M) = 1.4M as denominator gives (5 * 1.4M) / 1.4M = 5 (wrong).
622+
// Using min(5, 1.4M) = 5 as denominator gives (5 * 1.4M) / 5 = 1.4M (correct).
613623
let mut join_selectivity = Precision::Absent;
614624
for (left_stat, right_stat) in left_column_statistics
615625
.iter()
616626
.zip(right_column_statistics.iter())
617627
{
628+
let left_has_distinct = left_stat.distinct_count.get_value().is_some();
629+
let right_has_distinct = right_stat.distinct_count.get_value().is_some();
630+
618631
let left_max_distinct = max_distinct_count(&left_num_rows, left_stat);
619632
let right_max_distinct = max_distinct_count(&right_num_rows, right_stat);
620-
let max_distinct = left_max_distinct.max(&right_max_distinct);
621-
if max_distinct.get_value().is_some() {
622-
// Seems like there are a few implementations of this algorithm that implement
623-
// exponential decay for the selectivity (like Hive's Optiq Optimizer). Needs
624-
// further exploration.
625-
join_selectivity = max_distinct;
633+
634+
// When actual distinct counts are known, use max (Spark's algorithm).
635+
// When falling back to num_rows estimates, use min to avoid
636+
// underestimating FK joins where the large table has many duplicates.
637+
let selectivity = if left_has_distinct || right_has_distinct {
638+
left_max_distinct.max(&right_max_distinct)
639+
} else {
640+
left_max_distinct.min(&right_max_distinct)
641+
};
642+
643+
if selectivity.get_value().is_some() {
644+
join_selectivity = selectivity;
626645
}
627646
}
628647

@@ -638,6 +657,10 @@ fn estimate_inner_join_cardinality(
638657
Precision::Inexact(value) if value > 0 => {
639658
Some(Precision::Inexact((left_num_rows * right_num_rows) / value))
640659
}
660+
// Selectivity is zero (one side has no non-null values), so the join
661+
// produces no rows. Return 0 rather than None to avoid losing the estimate.
662+
Precision::Exact(0) => Some(Precision::Exact(0)),
663+
Precision::Inexact(0) => Some(Precision::Inexact(0)),
641664
// Since we don't have any information about the selectivity (which is derived
642665
// from the number of distinct rows information) we can give up here for now.
643666
// And let other passes handle this (otherwise we would need to produce an
@@ -2159,22 +2182,25 @@ mod tests {
21592182
Some(Inexact(10)),
21602183
),
21612184
// range(left) > range(right)
2185+
// Without distinct count stats, min(left_distinct, right_distinct)
2186+
// is used: min(5, 3) = 3, so (10*10)/3 = 33
21622187
(
21632188
(10, Inexact(6), Inexact(10), Absent, Absent),
21642189
(10, Inexact(8), Inexact(10), Absent, Absent),
2165-
Some(Inexact(20)),
2190+
Some(Inexact(33)),
21662191
),
21672192
// range(right) > range(left)
21682193
(
21692194
(10, Inexact(8), Inexact(10), Absent, Absent),
21702195
(10, Inexact(6), Inexact(10), Absent, Absent),
2171-
Some(Inexact(20)),
2196+
Some(Inexact(33)),
21722197
),
21732198
// range(left) > len(left), range(right) > len(right)
2199+
// Without distinct stats, min(10, 20) = 10, so (10*20)/10 = 20
21742200
(
21752201
(10, Inexact(1), Inexact(15), Absent, Absent),
21762202
(20, Inexact(1), Inexact(40), Absent, Absent),
2177-
Some(Inexact(10)),
2203+
Some(Inexact(20)),
21782204
),
21792205
// Distinct count matches the range
21802206
(
@@ -2201,16 +2227,18 @@ mod tests {
22012227
Some(Inexact(20)),
22022228
),
22032229
// min(left) < 0 (range(left) > range(right))
2230+
// Without distinct stats, min(10, 5) = 5, so (10*10)/5 = 20
22042231
(
22052232
(10, Inexact(-5), Inexact(5), Absent, Absent),
22062233
(10, Inexact(1), Inexact(5), Absent, Absent),
2207-
Some(Inexact(10)),
2234+
Some(Inexact(20)),
22082235
),
22092236
// min(right) < 0, max(right) < 0 (range(right) > range(left))
2237+
// min(6, 10) = 6, so (10*10)/6 = 16
22102238
(
22112239
(10, Inexact(-25), Inexact(-20), Absent, Absent),
22122240
(10, Inexact(-25), Inexact(-15), Absent, Absent),
2213-
Some(Inexact(10)),
2241+
Some(Inexact(16)),
22142242
),
22152243
// range(left) < 0, range(right) >= 0
22162244
// (there isn't a case where both left and right ranges are negative
@@ -2275,11 +2303,11 @@ mod tests {
22752303
(10, Inexact(0), Inexact(10), Absent, Absent),
22762304
Some(Inexact(0)),
22772305
),
2278-
// distinct(left) = 0, distinct(right) = 0
2306+
// distinct(left) = 0, distinct(right) = 0: no matching keys possible
22792307
(
22802308
(10, Inexact(1), Inexact(10), Inexact(0), Absent),
22812309
(10, Inexact(1), Inexact(10), Inexact(0), Absent),
2282-
None,
2310+
Some(Inexact(0)),
22832311
),
22842312
// Inexact row count < exact null count with absent distinct count
22852313
(

0 commit comments

Comments
 (0)