Skip to content

Commit 18a219c

Browse files
authored
perf: reuse mask in truncate_list_nulls and avoid counting all true bits (#22158)
## Which issue does this PR close? N/A ## Rationale for this change 1. instead of counting set bits to check if there is at least 1 set bits, we can use the existing helpers on `BooleanArray` that check if there is at least 1 set bit 2. Avoid unnecessary `BooleanBuffer` bitwise operations and reuse mask ## What changes are included in this PR? reused mask, and use helper to check if at least one false ## Are these changes tested? Existing tests ## Are there any user-facing changes? No ------ Cc @gstvg, @comphead
1 parent 63ef9c3 commit 18a219c

1 file changed

Lines changed: 8 additions & 6 deletions

File tree

  • datafusion/common/src/utils

datafusion/common/src/utils/mod.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ use arrow::array::{
3131
cast::AsArray,
3232
};
3333
use arrow::array::{
34-
ArrowPrimitiveType, Datum, GenericListArray, Int32Array, Int64Array,
34+
ArrowPrimitiveType, BooleanArray, Datum, GenericListArray, Int32Array, Int64Array,
3535
MutableArrayData, PrimitiveArray, make_array,
3636
};
3737
use arrow::array::{LargeListViewArray, ListViewArray};
3838
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
39-
use arrow::compute::kernels::cmp::neq;
39+
use arrow::compute::kernels::cmp::eq;
4040
use arrow::compute::kernels::length::length;
4141
use arrow::compute::{SortColumn, SortOptions, partition};
4242
use arrow::datatypes::{
@@ -1129,6 +1129,7 @@ pub fn remove_list_null_values(array: &ArrayRef) -> Result<ArrayRef> {
11291129
}
11301130
}
11311131

1132+
/// Create a new list array where all the nulls point to empty lists
11321133
fn truncate_list_nulls<O: OffsetSizeTrait>(
11331134
list: &GenericListArray<O>,
11341135
) -> Result<GenericListArray<O>> {
@@ -1142,17 +1143,18 @@ fn truncate_list_nulls<O: OffsetSizeTrait>(
11421143
&Int64Array::new_scalar(0)
11431144
};
11441145

1145-
let not_empty = neq(&lengths, zero)?;
1146-
let null_and_non_empty = &!nulls.inner() & not_empty.values();
1146+
let (mut valid_or_empty, _nulls) = eq(&lengths, zero)?.into_parts();
1147+
valid_or_empty |= nulls.inner();
1148+
let valid_or_empty = BooleanArray::from(valid_or_empty);
11471149

1148-
if null_and_non_empty.count_set_bits() > 0 {
1150+
if valid_or_empty.has_false() {
11491151
let array_data = list.values().to_data();
11501152
let offsets = list.offsets();
11511153
let capacity = offsets[offsets.len() - 1] - offsets[0];
11521154
let mut mutable_array_data =
11531155
MutableArrayData::new(vec![&array_data], false, capacity.as_usize());
11541156

1155-
let valid_or_empty = nulls.inner() | &!not_empty.values();
1157+
let (valid_or_empty, _nulls) = valid_or_empty.into_parts();
11561158

11571159
for (start, end) in valid_or_empty.set_slices() {
11581160
mutable_array_data.extend(

0 commit comments

Comments
 (0)