Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,7 @@ harness = false
[[bench]]
name = "listview_rebuild"
harness = false

[[bench]]
name = "listview_builder_extend"
harness = false
97 changes: 97 additions & 0 deletions vortex-array/benches/listview_builder_extend.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![expect(clippy::cast_possible_truncation)]
#![expect(clippy::cast_possible_wrap)]

use std::sync::Arc;

use divan::Bencher;
use vortex_array::IntoArray;
use vortex_array::arrays::ListViewArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::builders::ArrayBuilder;
use vortex_array::builders::ListViewBuilder;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability::NonNullable;
use vortex_array::dtype::Nullability::Nullable;
use vortex_array::dtype::PType::I32;
use vortex_array::validity::Validity;
use vortex_buffer::Buffer;

fn main() {
divan::main();
}

const ZCTL_ARGS: &[(usize, usize)] = &[
// num_lists, list_size
(1_000, 8),
(1_000, 64),
(10_000, 8),
];

const NON_ZCTL_ARGS: &[(usize, usize)] = &[
// num_lists, list_size
(1_000, 8),
(1_000, 32),
(10_000, 8),
];

fn make_listview(
num_lists: usize,
list_size: usize,
step: usize,
with_nulls: bool,
) -> ListViewArray {
let element_count = step * num_lists + list_size;
let elements = PrimitiveArray::from_iter(0..element_count as i32).into_array();
let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * step) as u32).collect();
let sizes: Buffer<u16> = std::iter::repeat_n(list_size as u16, num_lists).collect();
let validity = if with_nulls {
Validity::from_iter((0..num_lists).map(|i| i % 5 != 0))
} else {
Validity::NonNullable
};

ListViewArray::new(elements, offsets.into_array(), sizes.into_array(), validity)
}

#[divan::bench(args = ZCTL_ARGS)]
fn extend_from_array_zctl(bencher: Bencher, (num_lists, list_size): (usize, usize)) {
let source = make_listview(num_lists, list_size, list_size, false);
debug_assert!(source.is_zero_copy_to_list());
let source = source.into_array();

bencher.with_inputs(|| &source).bench_refs(|source| {
let mut builder = ListViewBuilder::<u32, u32>::with_capacity(
Arc::new(DType::Primitive(I32, NonNullable)),
NonNullable,
num_lists * list_size,
num_lists,
);
builder.extend_from_array(source);
divan::black_box(builder.finish_into_listview())
});
}

#[divan::bench(args = NON_ZCTL_ARGS)]
fn extend_from_array_non_zctl_overlapping(
bencher: Bencher,
(num_lists, list_size): (usize, usize),
) {
// `step = 1` creates heavily overlapping lists, which forces the non-ZCTL extend path.
let source = make_listview(num_lists, list_size, 1, true);
debug_assert!(!source.is_zero_copy_to_list());
let source = source.into_array();

bencher.with_inputs(|| &source).bench_refs(|source| {
let mut builder = ListViewBuilder::<u32, u8>::with_capacity(
Arc::new(DType::Primitive(I32, NonNullable)),
Nullable,
num_lists * list_size,
num_lists,
);
builder.extend_from_array(source);
divan::black_box(builder.finish_into_listview())
});
}
75 changes: 51 additions & 24 deletions vortex-array/src/builders/listview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,33 +296,15 @@ impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
return;
}

// If we do not have the guarantee that the array is zero-copyable to a list, then we have
// to manually append each scalar.
if !listview.is_zero_copy_to_list() {
for i in 0..listview.len() {
let list = listview
.scalar_at(i)
.vortex_expect("scalar_at failed in extend_from_array_unchecked");

self.append_scalar(&list)
.vortex_expect("was unable to extend the `ListViewBuilder`")
}

return;
}

// Otherwise, after removing any leading and trailing elements, we can simply bulk append
// the entire array.
// Normalize to an exact zero-copy-to-list layout and then bulk append. This avoids the
// very expensive scalar_at-per-list path for overlapping / out-of-order list views.
let listview = listview
.rebuild(ListViewRebuildMode::MakeExact)
.vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
debug_assert!(listview.is_zero_copy_to_list());

self.nulls.append_validity_mask(
array
.validity_mask()
.vortex_expect("validity_mask in extend_from_array_unchecked"),
);
self.nulls
.append_validity_mask(listview.listview_validity_mask());

// Bulk append the new elements (which should have no gaps or overlaps).
let old_elements_len = self.elements_builder.len();
Expand Down Expand Up @@ -429,11 +411,13 @@ fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
mod tests {
use std::sync::Arc;

use vortex_buffer::buffer;
use vortex_error::VortexExpect;

use super::ListViewBuilder;
use crate::IntoArray;
use crate::arrays::ListArray;
use crate::arrays::ListViewArray;
use crate::arrays::listview::ListViewArrayExt;
use crate::assert_arrays_eq;
use crate::builders::ArrayBuilder;
Expand All @@ -443,6 +427,7 @@ mod tests {
use crate::dtype::Nullability::Nullable;
use crate::dtype::PType::I32;
use crate::scalar::Scalar;
use crate::validity::Validity;

#[test]
fn test_empty() {
Expand Down Expand Up @@ -696,6 +681,50 @@ mod tests {
);
}

#[test]
fn test_extend_from_array_overlapping_listview() {
let dtype: Arc<DType> = Arc::new(I32.into());

// Non-ZCTL source:
// - List 0: [10, 20]
// - List 1: null (size is intentionally non-zero in source metadata)
// - List 2: [10]
let source = unsafe {
ListViewArray::new_unchecked(
buffer![10i32, 20, 30].into_array(),
buffer![0u32, 1, 0].into_array(),
buffer![2u8, 2, 1].into_array(),
Validity::from_iter([true, false, true]),
)
};
assert!(!source.is_zero_copy_to_list());

let mut builder =
ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
builder.extend_from_array(&source.into_array());

let listview = builder.finish_into_listview();
assert_eq!(listview.len(), 3);
assert!(listview.is_zero_copy_to_list());

assert_arrays_eq!(
listview.list_elements_at(0).unwrap(),
PrimitiveArray::from_iter([10i32, 20])
);
assert!(
!listview
.validity()
.vortex_expect("listview validity should be derivable")
.is_valid(1)
.unwrap()
);
assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
assert_arrays_eq!(
listview.list_elements_at(2).unwrap(),
PrimitiveArray::from_iter([10i32])
);
}

#[test]
fn test_error_append_null_to_non_nullable() {
let dtype: Arc<DType> = Arc::new(I32.into());
Expand All @@ -719,8 +748,6 @@ mod tests {

#[test]
fn test_append_array_as_list() {
use vortex_buffer::buffer;

let dtype: Arc<DType> = Arc::new(I32.into());
let mut builder =
ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
Expand Down
Loading