Skip to content

Commit 16c82bb

Browse files
author
B Vadlamani
committed
add_bool_buffers_memebership_test
1 parent 698dcbb commit 16c82bb

2 files changed

Lines changed: 30 additions & 18 deletions

File tree

datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::{fmt::Display, hash::Hash, sync::Arc};
2121

2222
use arrow::{
2323
array::{Array, ArrayRef, BooleanArray, Int32Array, UInt32Array, UInt64Array},
24+
buffer::BooleanBuffer,
2425
datatypes::{DataType, Schema},
2526
record_batch::RecordBatch,
2627
};
@@ -351,18 +352,26 @@ impl PhysicalExpr for HashTableLookupExpr {
351352
.as_any()
352353
.downcast_ref::<Int32Array>()
353354
.expect("Expected Int32Array");
354-
arr.iter()
355-
.map(|v| v.map(|val| bitmap.contains(val as u32)))
356-
.collect()
355+
let buffer = BooleanBuffer::collect_bool(arr.len(), |i| {
356+
if arr.is_null(i) {
357+
return false;
358+
}
359+
bitmap.contains(arr.value(i) as u32)
360+
});
361+
BooleanArray::new(buffer, None)
357362
}
358363
DataType::UInt32 => {
359364
let arr = key_col
360365
.as_any()
361366
.downcast_ref::<UInt32Array>()
362367
.expect("Expected UInt32Array");
363-
arr.iter()
364-
.map(|v| v.map(|val| bitmap.contains(val)))
365-
.collect()
368+
let buffer = BooleanBuffer::collect_bool(arr.len(), |i| {
369+
if arr.is_null(i) {
370+
return false;
371+
}
372+
bitmap.contains(arr.value(i))
373+
});
374+
BooleanArray::new(buffer, None)
366375
}
367376
other => {
368377
return internal_err!(

datafusion/physical-plan/src/joins/hash_join/stream.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ use crate::{
4747
};
4848

4949
use arrow::array::{Array, ArrayRef, BooleanArray, Int32Array, UInt32Array, UInt64Array};
50+
use arrow::buffer::BooleanBuffer;
5051
use arrow::compute::filter_record_batch;
5152
use arrow::datatypes::{Schema, SchemaRef};
5253
use arrow::record_batch::{RecordBatch, RecordBatchOptions};
@@ -731,21 +732,23 @@ impl HashJoinStream {
731732
let mask: BooleanArray = match key_col.data_type() {
732733
DataType::Int32 => {
733734
let arr = key_col.as_any().downcast_ref::<Int32Array>().unwrap();
734-
arr.iter()
735-
.map(|v| match v {
736-
Some(v) => bitmap.contains(v as u32) == is_semi,
737-
None => !is_semi,
738-
})
739-
.collect()
735+
let buffer = BooleanBuffer::collect_bool(arr.len(), |i| {
736+
if arr.is_null(i) {
737+
return !is_semi;
738+
}
739+
bitmap.contains(arr.value(i) as u32) == is_semi
740+
});
741+
BooleanArray::new(buffer, None)
740742
}
741743
DataType::UInt32 => {
742744
let arr = key_col.as_any().downcast_ref::<UInt32Array>().unwrap();
743-
arr.iter()
744-
.map(|v| match v {
745-
Some(v) => bitmap.contains(v) == is_semi,
746-
None => !is_semi,
747-
})
748-
.collect()
745+
let buffer = BooleanBuffer::collect_bool(arr.len(), |i| {
746+
if arr.is_null(i) {
747+
return !is_semi;
748+
}
749+
bitmap.contains(arr.value(i)) == is_semi
750+
});
751+
BooleanArray::new(buffer, None)
749752
}
750753
_ => {
751754
return internal_err!("unsupported data type for roaring bitmap");

0 commit comments

Comments
 (0)