Skip to content

Commit d865806

Browse files
committed
Optimize for single-column key joins.
1 parent a3e6884 commit d865806

1 file changed

Lines changed: 25 additions & 9 deletions

File tree

  • datafusion/physical-plan/src/joins

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,6 +1827,11 @@ fn eq_dyn_null(
18271827
/// dispatch. Wraps `arrow_ord::ord::DynComparator` closures built once per
18281828
/// batch pair, used for all row comparisons within those batches.
18291829
///
1830+
/// The first key column is stored separately so that single-column joins
1831+
/// (the common case) avoid Vec iteration entirely, and multi-column joins
1832+
/// short-circuit without entering the loop when the first column is
1833+
/// selective.
1834+
///
18301835
/// Null handling is baked into the closures at construction time:
18311836
/// - `NullEqualsNull`: `make_comparator` returns `Equal` for both-null, which
18321837
/// is the desired behavior. Closures are used as-is.
@@ -1840,19 +1845,22 @@ fn eq_dyn_null(
18401845
/// buffered head/tail equality in SMJ) should construct with
18411846
/// `NullEqualsNull`.
18421847
pub struct JoinKeyComparator {
1843-
comparators: Vec<DynComparator>,
1848+
first: DynComparator,
1849+
rest: Vec<DynComparator>,
18441850
}
18451851

18461852
impl JoinKeyComparator {
1847-
/// Build comparators for each join key column pair. The `sort_options`
1848-
/// slice must have the same length as the array slices.
1853+
/// Build comparators for each join key column pair.
18491854
pub fn new(
18501855
left_arrays: &[ArrayRef],
18511856
right_arrays: &[ArrayRef],
18521857
sort_options: &[SortOptions],
18531858
null_equality: NullEquality,
18541859
) -> Result<Self> {
1855-
let comparators = left_arrays
1860+
debug_assert_eq!(left_arrays.len(), right_arrays.len());
1861+
debug_assert_eq!(left_arrays.len(), sort_options.len());
1862+
1863+
let mut iter = left_arrays
18561864
.iter()
18571865
.zip(right_arrays.iter())
18581866
.zip(sort_options.iter())
@@ -1877,17 +1885,22 @@ impl JoinKeyComparator {
18771885
} else {
18781886
Ok(inner)
18791887
}
1880-
})
1881-
.collect::<Result<Vec<_>>>()?;
1888+
});
18821889

1883-
Ok(Self { comparators })
1890+
let first = iter.next().expect("join must have at least one key")?;
1891+
let rest = iter.collect::<Result<Vec<_>>>()?;
1892+
Ok(Self { first, rest })
18841893
}
18851894

18861895
/// Compare row `left` (in the left arrays) with row `right` (in the right
18871896
/// arrays). Returns the lexicographic ordering across all key columns.
18881897
#[inline]
18891898
pub fn compare(&self, left: usize, right: usize) -> Ordering {
1890-
for cmp_fn in &self.comparators {
1899+
let ord = (self.first)(left, right);
1900+
if ord != Ordering::Equal || self.rest.is_empty() {
1901+
return ord;
1902+
}
1903+
for cmp_fn in &self.rest {
18911904
let ord = cmp_fn(left, right);
18921905
if ord != Ordering::Equal {
18931906
return ord;
@@ -1902,7 +1915,10 @@ impl JoinKeyComparator {
19021915
/// `false` because the override is baked into the comparators.
19031916
#[inline]
19041917
pub fn is_equal(&self, left: usize, right: usize) -> bool {
1905-
for cmp_fn in &self.comparators {
1918+
if (self.first)(left, right) != Ordering::Equal {
1919+
return false;
1920+
}
1921+
for cmp_fn in &self.rest {
19061922
if cmp_fn(left, right) != Ordering::Equal {
19071923
return false;
19081924
}

0 commit comments

Comments
 (0)