Skip to content

Commit d32d5ae

Browse files
committed
Remove branch in compare() by instantiating closures based on NullEquality at construction.
1 parent 6a0351d commit d32d5ae

1 file changed

Lines changed: 43 additions & 53 deletions

File tree

  • datafusion/physical-plan/src/joins

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 43 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,12 +1826,21 @@ fn eq_dyn_null(
18261826
/// Pre-built comparator for join key columns that eliminates per-row type
18271827
/// dispatch. Wraps `arrow_ord::ord::DynComparator` closures built once per
18281828
/// batch pair, used for all row comparisons within those batches.
1829+
///
1830+
/// Null handling is baked into the closures at construction time:
1831+
/// - `NullEqualsNull`: `make_comparator` returns `Equal` for both-null, which
1832+
/// is the desired behavior. Closures are used as-is.
1833+
/// - `NullEqualsNothing`: columns where both sides contain nulls get a wrapper
1834+
/// that returns `Less` for both-null. Columns where one side has no nulls
1835+
/// skip the wrapper since both-null is impossible.
1836+
///
1837+
/// Because `NullEqualsNothing` wraps comparators to return `Less` for
1838+
/// both-null, `is_equal` will return `false` for both-null rows when that
1839+
/// mode is active. Callers needing both-null == equal semantics (e.g.,
1840+
/// buffered head/tail equality in SMJ) should construct with
1841+
/// `NullEqualsNull`.
18291842
pub struct JoinKeyComparator {
18301843
comparators: Vec<DynComparator>,
1831-
/// Only populated when null_equality == NullEqualsNothing
1832-
left_nulls: Vec<Option<NullBuffer>>,
1833-
right_nulls: Vec<Option<NullBuffer>>,
1834-
null_equality: NullEquality,
18351844
}
18361845

18371846
impl JoinKeyComparator {
@@ -1847,66 +1856,50 @@ impl JoinKeyComparator {
18471856
.iter()
18481857
.zip(right_arrays.iter())
18491858
.zip(sort_options.iter())
1850-
.map(|((l, r), opts)| Ok(make_comparator(l.as_ref(), r.as_ref(), *opts)?))
1859+
.map(|((l, r), opts)| {
1860+
let inner = make_comparator(l.as_ref(), r.as_ref(), *opts)?;
1861+
if null_equality == NullEquality::NullEqualsNothing {
1862+
let ln = l.logical_nulls().filter(|n| n.null_count() > 0);
1863+
let rn = r.logical_nulls().filter(|n| n.null_count() > 0);
1864+
match (ln, rn) {
1865+
// Both sides have nulls — wrap to override both-null.
1866+
(Some(ln), Some(rn)) => Ok(Box::new(move |i, j| {
1867+
if ln.is_null(i) && rn.is_null(j) {
1868+
Ordering::Less
1869+
} else {
1870+
inner(i, j)
1871+
}
1872+
})
1873+
as DynComparator),
1874+
// One side has no nulls — both-null impossible, no wrap.
1875+
_ => Ok(inner),
1876+
}
1877+
} else {
1878+
Ok(inner)
1879+
}
1880+
})
18511881
.collect::<Result<Vec<_>>>()?;
18521882

1853-
let (left_nulls, right_nulls) =
1854-
if null_equality == NullEquality::NullEqualsNothing {
1855-
let ln = left_arrays
1856-
.iter()
1857-
.map(|a| a.logical_nulls().filter(|n| n.null_count() > 0))
1858-
.collect();
1859-
let rn = right_arrays
1860-
.iter()
1861-
.map(|a| a.logical_nulls().filter(|n| n.null_count() > 0))
1862-
.collect();
1863-
(ln, rn)
1864-
} else {
1865-
(vec![], vec![])
1866-
};
1867-
1868-
Ok(Self {
1869-
comparators,
1870-
left_nulls,
1871-
right_nulls,
1872-
null_equality,
1873-
})
1883+
Ok(Self { comparators })
18741884
}
18751885

18761886
/// Compare row `left` (in the left arrays) with row `right` (in the right
18771887
/// arrays). Returns the lexicographic ordering across all key columns.
18781888
#[inline]
18791889
pub fn compare(&self, left: usize, right: usize) -> Ordering {
1880-
if self.null_equality == NullEquality::NullEqualsNothing {
1881-
for (idx, cmp_fn) in self.comparators.iter().enumerate() {
1882-
// Override both-null: make_comparator returns Equal but
1883-
// NullEqualsNothing semantics require Less.
1884-
if let (Some(ln), Some(rn)) =
1885-
(&self.left_nulls[idx], &self.right_nulls[idx])
1886-
&& ln.is_null(left)
1887-
&& rn.is_null(right)
1888-
{
1889-
return Ordering::Less;
1890-
}
1891-
let ord = cmp_fn(left, right);
1892-
if ord != Ordering::Equal {
1893-
return ord;
1894-
}
1895-
}
1896-
} else {
1897-
for cmp_fn in &self.comparators {
1898-
let ord = cmp_fn(left, right);
1899-
if ord != Ordering::Equal {
1900-
return ord;
1901-
}
1890+
for cmp_fn in &self.comparators {
1891+
let ord = cmp_fn(left, right);
1892+
if ord != Ordering::Equal {
1893+
return ord;
19021894
}
19031895
}
19041896
Ordering::Equal
19051897
}
19061898

19071899
/// Check equality of row `left` (in the left arrays) with row `right`
1908-
/// (in the right arrays). Both-null is treated as equal regardless of
1909-
/// `null_equality` — this matches `is_join_arrays_equal` semantics.
1900+
/// (in the right arrays). Both-null is treated as equal when constructed
1901+
/// with `NullEqualsNull`. With `NullEqualsNothing`, both-null returns
1902+
/// `false` because the override is baked into the comparators.
19101903
#[inline]
19111904
pub fn is_equal(&self, left: usize, right: usize) -> bool {
19121905
for cmp_fn in &self.comparators {
@@ -3126,9 +3119,6 @@ mod tests {
31263119
assert_eq!(cmp.compare(0, 0), Ordering::Greater);
31273120
// left[3]=2 vs right[3]=2 → Equal
31283121
assert_eq!(cmp.compare(3, 3), Ordering::Equal);
3129-
3130-
// is_equal always treats both-null as equal
3131-
assert!(cmp.is_equal(1, 1));
31323122
}
31333123

31343124
#[test]

0 commit comments

Comments
 (0)