Skip to content

Commit 501b223

Browse files
committed
datafusion-common: restore hash_utils leaf fast path
1 parent c022fb4 commit 501b223

1 file changed

Lines changed: 53 additions & 22 deletions

File tree

datafusion/common/src/hash_utils.rs

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -209,29 +209,49 @@ fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col:
209209
}
210210

211211
pub trait HashValue {
212-
fn hash_one<S: BuildHasher>(&self, state: &S) -> u64;
212+
fn hash_one(&self, state: &RandomState) -> u64;
213213
/// Write this value into an existing hasher (same data as `hash_one`).
214214
fn hash_write(&self, hasher: &mut impl Hasher);
215215
}
216216

217217
impl<T: HashValue + ?Sized> HashValue for &T {
218-
fn hash_one<S: BuildHasher>(&self, state: &S) -> u64 {
218+
fn hash_one(&self, state: &RandomState) -> u64 {
219219
T::hash_one(self, state)
220220
}
221221
fn hash_write(&self, hasher: &mut impl Hasher) {
222222
T::hash_write(self, hasher)
223223
}
224224
}
225225

226+
#[cfg(not(feature = "force_hash_collisions"))]
227+
// Keep custom BuildHasher leaf hashing off the default RandomState fast path.
228+
trait BuildHasherHashValue {
229+
fn hash_one_with_hasher<S: BuildHasher>(&self, state: &S) -> u64;
230+
}
231+
232+
#[cfg(not(feature = "force_hash_collisions"))]
233+
impl<T: BuildHasherHashValue + ?Sized> BuildHasherHashValue for &T {
234+
fn hash_one_with_hasher<S: BuildHasher>(&self, state: &S) -> u64 {
235+
T::hash_one_with_hasher(self, state)
236+
}
237+
}
238+
226239
macro_rules! hash_value {
227240
($($t:ty),+) => {
228241
$(impl HashValue for $t {
229-
fn hash_one<S: BuildHasher>(&self, state: &S) -> u64 {
242+
fn hash_one(&self, state: &RandomState) -> u64 {
230243
state.hash_one(self)
231244
}
232245
fn hash_write(&self, hasher: &mut impl Hasher) {
233246
Hash::hash(self, hasher)
234247
}
248+
}
249+
250+
#[cfg(not(feature = "force_hash_collisions"))]
251+
impl BuildHasherHashValue for $t {
252+
fn hash_one_with_hasher<S: BuildHasher>(&self, state: &S) -> u64 {
253+
state.hash_one(self)
254+
}
235255
})+
236256
};
237257
}
@@ -241,12 +261,19 @@ hash_value!(bool, str, [u8], IntervalDayTime, IntervalMonthDayNano);
241261
macro_rules! hash_float_value {
242262
($(($t:ty, $i:ty)),+) => {
243263
$(impl HashValue for $t {
244-
fn hash_one<S: BuildHasher>(&self, state: &S) -> u64 {
264+
fn hash_one(&self, state: &RandomState) -> u64 {
245265
state.hash_one(<$i>::from_ne_bytes(self.to_ne_bytes()))
246266
}
247267
fn hash_write(&self, hasher: &mut impl Hasher) {
248268
hasher.write(&self.to_ne_bytes())
249269
}
270+
}
271+
272+
#[cfg(not(feature = "force_hash_collisions"))]
273+
impl BuildHasherHashValue for $t {
274+
fn hash_one_with_hasher<S: BuildHasher>(&self, state: &S) -> u64 {
275+
state.hash_one(<$i>::from_ne_bytes(self.to_ne_bytes()))
276+
}
250277
})+
251278
};
252279
}
@@ -551,7 +578,7 @@ fn hash_array_primitive_with_hasher<T, S>(
551578
hashes_buffer: &mut [u64],
552579
rehash: bool,
553580
) where
554-
T: ArrowPrimitiveType<Native: HashValue>,
581+
T: ArrowPrimitiveType<Native: BuildHasherHashValue>,
555582
S: BuildHasher,
556583
{
557584
assert_eq!(
@@ -563,23 +590,25 @@ fn hash_array_primitive_with_hasher<T, S>(
563590
if array.null_count() == 0 {
564591
if rehash {
565592
for (hash, &value) in hashes_buffer.iter_mut().zip(array.values().iter()) {
566-
*hash = combine_hashes(value.hash_one(hash_builder), *hash);
593+
*hash = combine_hashes(value.hash_one_with_hasher(hash_builder), *hash);
567594
}
568595
} else {
569596
for (hash, &value) in hashes_buffer.iter_mut().zip(array.values().iter()) {
570-
*hash = value.hash_one(hash_builder);
597+
*hash = value.hash_one_with_hasher(hash_builder);
571598
}
572599
}
573600
} else if rehash {
574601
for i in array.nulls().unwrap().valid_indices() {
575602
let value = unsafe { array.value_unchecked(i) };
576-
hashes_buffer[i] =
577-
combine_hashes(value.hash_one(hash_builder), hashes_buffer[i]);
603+
hashes_buffer[i] = combine_hashes(
604+
value.hash_one_with_hasher(hash_builder),
605+
hashes_buffer[i],
606+
);
578607
}
579608
} else {
580609
for i in array.nulls().unwrap().valid_indices() {
581610
let value = unsafe { array.value_unchecked(i) };
582-
hashes_buffer[i] = value.hash_one(hash_builder);
611+
hashes_buffer[i] = value.hash_one_with_hasher(hash_builder);
583612
}
584613
}
585614
}
@@ -592,7 +621,7 @@ fn hash_array_with_hasher<T, S>(
592621
rehash: bool,
593622
) where
594623
T: ArrayAccessor,
595-
T::Item: HashValue,
624+
T::Item: BuildHasherHashValue,
596625
S: BuildHasher,
597626
{
598627
assert_eq!(
@@ -605,24 +634,26 @@ fn hash_array_with_hasher<T, S>(
605634
if rehash {
606635
for (i, hash) in hashes_buffer.iter_mut().enumerate() {
607636
let value = unsafe { array.value_unchecked(i) };
608-
*hash = combine_hashes(value.hash_one(hash_builder), *hash);
637+
*hash = combine_hashes(value.hash_one_with_hasher(hash_builder), *hash);
609638
}
610639
} else {
611640
for (i, hash) in hashes_buffer.iter_mut().enumerate() {
612641
let value = unsafe { array.value_unchecked(i) };
613-
*hash = value.hash_one(hash_builder);
642+
*hash = value.hash_one_with_hasher(hash_builder);
614643
}
615644
}
616645
} else if rehash {
617646
for i in array.nulls().unwrap().valid_indices() {
618647
let value = unsafe { array.value_unchecked(i) };
619-
hashes_buffer[i] =
620-
combine_hashes(value.hash_one(hash_builder), hashes_buffer[i]);
648+
hashes_buffer[i] = combine_hashes(
649+
value.hash_one_with_hasher(hash_builder),
650+
hashes_buffer[i],
651+
);
621652
}
622653
} else {
623654
for i in array.nulls().unwrap().valid_indices() {
624655
let value = unsafe { array.value_unchecked(i) };
625-
hashes_buffer[i] = value.hash_one(hash_builder);
656+
hashes_buffer[i] = value.hash_one_with_hasher(hash_builder);
626657
}
627658
}
628659
}
@@ -664,17 +695,17 @@ fn hash_string_view_array_inner_with_hasher<
664695
let view_len = v as u32;
665696
if !HAS_BUFFERS || view_len <= 12 {
666697
if REHASH {
667-
*hash = combine_hashes(v.hash_one(hash_builder), *hash);
698+
*hash = combine_hashes(v.hash_one_with_hasher(hash_builder), *hash);
668699
} else {
669-
*hash = v.hash_one(hash_builder);
700+
*hash = v.hash_one_with_hasher(hash_builder);
670701
}
671702
continue;
672703
}
673704
let value = view_bytes(view_len, v);
674705
if REHASH {
675-
*hash = combine_hashes(value.hash_one(hash_builder), *hash);
706+
*hash = combine_hashes(value.hash_one_with_hasher(hash_builder), *hash);
676707
} else {
677-
*hash = value.hash_one(hash_builder);
708+
*hash = value.hash_one_with_hasher(hash_builder);
678709
}
679710
}
680711
}
@@ -693,12 +724,12 @@ fn hash_generic_byte_view_array_with_hasher<T: ByteViewType, S: BuildHasher>(
693724
) {
694725
(false, false, false) => {
695726
for (hash, &view) in hashes_buffer.iter_mut().zip(array.views().iter()) {
696-
*hash = view.hash_one(hash_builder);
727+
*hash = view.hash_one_with_hasher(hash_builder);
697728
}
698729
}
699730
(false, false, true) => {
700731
for (hash, &view) in hashes_buffer.iter_mut().zip(array.views().iter()) {
701-
*hash = combine_hashes(view.hash_one(hash_builder), *hash);
732+
*hash = combine_hashes(view.hash_one_with_hasher(hash_builder), *hash);
702733
}
703734
}
704735
(false, true, false) => {

0 commit comments

Comments
 (0)