@@ -1826,12 +1826,21 @@ fn eq_dyn_null(
18261826/// Pre-built comparator for join key columns that eliminates per-row type
18271827/// dispatch. Wraps `arrow_ord::ord::DynComparator` closures built once per
18281828/// batch pair, used for all row comparisons within those batches.
1829+ ///
1830+ /// Null handling is baked into the closures at construction time:
1831+ /// - `NullEqualsNull`: `make_comparator` returns `Equal` for both-null, which
1832+ /// is the desired behavior. Closures are used as-is.
1833+ /// - `NullEqualsNothing`: columns where both sides contain nulls get a wrapper
1834+ /// that returns `Less` for both-null. Columns where one side has no nulls
1835+ /// skip the wrapper since both-null is impossible.
1836+ ///
1837+ /// Because `NullEqualsNothing` wraps comparators to return `Less` for
1838+ /// both-null, `is_equal` will return `false` for both-null rows when that
1839+ /// mode is active. Callers needing both-null == equal semantics (e.g.,
1840+ /// buffered head/tail equality in SMJ) should construct with
1841+ /// `NullEqualsNull`.
18291842pub struct JoinKeyComparator {
18301843 comparators : Vec < DynComparator > ,
1831- /// Only populated when null_equality == NullEqualsNothing
1832- left_nulls : Vec < Option < NullBuffer > > ,
1833- right_nulls : Vec < Option < NullBuffer > > ,
1834- null_equality : NullEquality ,
18351844}
18361845
18371846impl JoinKeyComparator {
@@ -1847,66 +1856,50 @@ impl JoinKeyComparator {
18471856 . iter ( )
18481857 . zip ( right_arrays. iter ( ) )
18491858 . zip ( sort_options. iter ( ) )
1850- . map ( |( ( l, r) , opts) | Ok ( make_comparator ( l. as_ref ( ) , r. as_ref ( ) , * opts) ?) )
1859+ . map ( |( ( l, r) , opts) | {
1860+ let inner = make_comparator ( l. as_ref ( ) , r. as_ref ( ) , * opts) ?;
1861+ if null_equality == NullEquality :: NullEqualsNothing {
1862+ let ln = l. logical_nulls ( ) . filter ( |n| n. null_count ( ) > 0 ) ;
1863+ let rn = r. logical_nulls ( ) . filter ( |n| n. null_count ( ) > 0 ) ;
1864+ match ( ln, rn) {
1865+ // Both sides have nulls — wrap to override both-null.
1866+ ( Some ( ln) , Some ( rn) ) => Ok ( Box :: new ( move |i, j| {
1867+ if ln. is_null ( i) && rn. is_null ( j) {
1868+ Ordering :: Less
1869+ } else {
1870+ inner ( i, j)
1871+ }
1872+ } )
1873+ as DynComparator ) ,
1874+ // One side has no nulls — both-null impossible, no wrap.
1875+ _ => Ok ( inner) ,
1876+ }
1877+ } else {
1878+ Ok ( inner)
1879+ }
1880+ } )
18511881 . collect :: < Result < Vec < _ > > > ( ) ?;
18521882
1853- let ( left_nulls, right_nulls) =
1854- if null_equality == NullEquality :: NullEqualsNothing {
1855- let ln = left_arrays
1856- . iter ( )
1857- . map ( |a| a. logical_nulls ( ) . filter ( |n| n. null_count ( ) > 0 ) )
1858- . collect ( ) ;
1859- let rn = right_arrays
1860- . iter ( )
1861- . map ( |a| a. logical_nulls ( ) . filter ( |n| n. null_count ( ) > 0 ) )
1862- . collect ( ) ;
1863- ( ln, rn)
1864- } else {
1865- ( vec ! [ ] , vec ! [ ] )
1866- } ;
1867-
1868- Ok ( Self {
1869- comparators,
1870- left_nulls,
1871- right_nulls,
1872- null_equality,
1873- } )
1883+ Ok ( Self { comparators } )
18741884 }
18751885
18761886 /// Compare row `left` (in the left arrays) with row `right` (in the right
18771887 /// arrays). Returns the lexicographic ordering across all key columns.
18781888 #[ inline]
18791889 pub fn compare ( & self , left : usize , right : usize ) -> Ordering {
1880- if self . null_equality == NullEquality :: NullEqualsNothing {
1881- for ( idx, cmp_fn) in self . comparators . iter ( ) . enumerate ( ) {
1882- // Override both-null: make_comparator returns Equal but
1883- // NullEqualsNothing semantics require Less.
1884- if let ( Some ( ln) , Some ( rn) ) =
1885- ( & self . left_nulls [ idx] , & self . right_nulls [ idx] )
1886- && ln. is_null ( left)
1887- && rn. is_null ( right)
1888- {
1889- return Ordering :: Less ;
1890- }
1891- let ord = cmp_fn ( left, right) ;
1892- if ord != Ordering :: Equal {
1893- return ord;
1894- }
1895- }
1896- } else {
1897- for cmp_fn in & self . comparators {
1898- let ord = cmp_fn ( left, right) ;
1899- if ord != Ordering :: Equal {
1900- return ord;
1901- }
1890+ for cmp_fn in & self . comparators {
1891+ let ord = cmp_fn ( left, right) ;
1892+ if ord != Ordering :: Equal {
1893+ return ord;
19021894 }
19031895 }
19041896 Ordering :: Equal
19051897 }
19061898
19071899 /// Check equality of row `left` (in the left arrays) with row `right`
1908- /// (in the right arrays). Both-null is treated as equal regardless of
1909- /// `null_equality` — this matches `is_join_arrays_equal` semantics.
1900+ /// (in the right arrays). Both-null is treated as equal when constructed
1901+ /// with `NullEqualsNull`. With `NullEqualsNothing`, both-null returns
1902+ /// `false` because the override is baked into the comparators.
19101903 #[ inline]
19111904 pub fn is_equal ( & self , left : usize , right : usize ) -> bool {
19121905 for cmp_fn in & self . comparators {
@@ -3126,9 +3119,6 @@ mod tests {
31263119 assert_eq ! ( cmp. compare( 0 , 0 ) , Ordering :: Greater ) ;
31273120 // left[3]=2 vs right[3]=2 → Equal
31283121 assert_eq ! ( cmp. compare( 3 , 3 ) , Ordering :: Equal ) ;
3129-
3130- // is_equal always treats both-null as equal
3131- assert ! ( cmp. is_equal( 1 , 1 ) ) ;
31323122 }
31333123
31343124 #[ test]
0 commit comments