diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index e324c70aadb..f9adbeb99db 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -187,3 +187,7 @@ harness = false [[bench]] name = "listview_rebuild" harness = false + +[[bench]] +name = "listview_builder_extend" +harness = false diff --git a/vortex-array/benches/listview_builder_extend.rs b/vortex-array/benches/listview_builder_extend.rs new file mode 100644 index 00000000000..75487564ea8 --- /dev/null +++ b/vortex-array/benches/listview_builder_extend.rs @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![expect(clippy::cast_possible_truncation)] +#![expect(clippy::cast_possible_wrap)] + +use std::sync::Arc; + +use divan::Bencher; +use vortex_array::IntoArray; +use vortex_array::arrays::ListViewArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::builders::ArrayBuilder; +use vortex_array::builders::ListViewBuilder; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability::NonNullable; +use vortex_array::dtype::Nullability::Nullable; +use vortex_array::dtype::PType::I32; +use vortex_array::validity::Validity; +use vortex_buffer::Buffer; + +fn main() { + divan::main(); +} + +const ZCTL_ARGS: &[(usize, usize)] = &[ + // num_lists, list_size + (1_000, 8), + (1_000, 64), + (10_000, 8), +]; + +const NON_ZCTL_ARGS: &[(usize, usize)] = &[ + // num_lists, list_size + (1_000, 8), + (1_000, 32), + (10_000, 8), +]; + +fn make_listview( + num_lists: usize, + list_size: usize, + step: usize, + with_nulls: bool, +) -> ListViewArray { + let element_count = step * num_lists + list_size; + let elements = PrimitiveArray::from_iter(0..element_count as i32).into_array(); + let offsets: Buffer = (0..num_lists).map(|i| (i * step) as u32).collect(); + let sizes: Buffer = std::iter::repeat_n(list_size as u16, num_lists).collect(); + let validity = if with_nulls { + Validity::from_iter((0..num_lists).map(|i| i % 5 != 0)) + } else { + Validity::NonNullable + }; + + ListViewArray::new(elements, offsets.into_array(), sizes.into_array(), validity) +} + +#[divan::bench(args = ZCTL_ARGS)] +fn extend_from_array_zctl(bencher: Bencher, (num_lists, list_size): (usize, usize)) { + let source = make_listview(num_lists, list_size, list_size, false); + debug_assert!(source.is_zero_copy_to_list()); + let source = source.into_array(); + + bencher.with_inputs(|| &source).bench_refs(|source| { + let mut builder = ListViewBuilder::::with_capacity( + Arc::new(DType::Primitive(I32, NonNullable)), + NonNullable, + num_lists * list_size, + num_lists, + ); + builder.extend_from_array(source); + divan::black_box(builder.finish_into_listview()) + }); +} + +#[divan::bench(args = NON_ZCTL_ARGS)] +fn extend_from_array_non_zctl_overlapping( + bencher: Bencher, + (num_lists, list_size): (usize, usize), +) { + // `step = 1` creates heavily overlapping lists, which forces the non-ZCTL extend path. + let source = make_listview(num_lists, list_size, 1, true); + debug_assert!(!source.is_zero_copy_to_list()); + let source = source.into_array(); + + bencher.with_inputs(|| &source).bench_refs(|source| { + let mut builder = ListViewBuilder::::with_capacity( + Arc::new(DType::Primitive(I32, NonNullable)), + Nullable, + num_lists * list_size, + num_lists, + ); + builder.extend_from_array(source); + divan::black_box(builder.finish_into_listview()) + }); +} diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index b339dc430f7..a694bbbf035 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -296,33 +296,15 @@ impl ArrayBuilder for ListViewBuilder { return; } - // If we do not have the guarantee that the array is zero-copyable to a list, then we have - // to manually append each scalar. - if !listview.is_zero_copy_to_list() { - for i in 0..listview.len() { - let list = listview - .scalar_at(i) - .vortex_expect("scalar_at failed in extend_from_array_unchecked"); - - self.append_scalar(&list) - .vortex_expect("was unable to extend the `ListViewBuilder`") - } - - return; - } - - // Otherwise, after removing any leading and trailing elements, we can simply bulk append - // the entire array. + // Normalize to an exact zero-copy-to-list layout and then bulk append. This avoids the + // very expensive scalar_at-per-list path for overlapping / out-of-order list views. let listview = listview .rebuild(ListViewRebuildMode::MakeExact) .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array"); debug_assert!(listview.is_zero_copy_to_list()); - self.nulls.append_validity_mask( - array - .validity_mask() - .vortex_expect("validity_mask in extend_from_array_unchecked"), - ); + self.nulls + .append_validity_mask(listview.listview_validity_mask()); // Bulk append the new elements (which should have no gaps or overlaps). let old_elements_len = self.elements_builder.len(); @@ -429,11 +411,13 @@ fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>( mod tests { use std::sync::Arc; + use vortex_buffer::buffer; use vortex_error::VortexExpect; use super::ListViewBuilder; use crate::IntoArray; use crate::arrays::ListArray; + use crate::arrays::ListViewArray; use crate::arrays::listview::ListViewArrayExt; use crate::assert_arrays_eq; use crate::builders::ArrayBuilder; @@ -443,6 +427,7 @@ mod tests { use crate::dtype::Nullability::Nullable; use crate::dtype::PType::I32; use crate::scalar::Scalar; + use crate::validity::Validity; #[test] fn test_empty() { @@ -696,6 +681,50 @@ mod tests { ); } + #[test] + fn test_extend_from_array_overlapping_listview() { + let dtype: Arc = Arc::new(I32.into()); + + // Non-ZCTL source: + // - List 0: [10, 20] + // - List 1: null (size is intentionally non-zero in source metadata) + // - List 2: [10] + let source = unsafe { + ListViewArray::new_unchecked( + buffer![10i32, 20, 30].into_array(), + buffer![0u32, 1, 0].into_array(), + buffer![2u8, 2, 1].into_array(), + Validity::from_iter([true, false, true]), + ) + }; + assert!(!source.is_zero_copy_to_list()); + + let mut builder = + ListViewBuilder::::with_capacity(Arc::clone(&dtype), Nullable, 0, 0); + builder.extend_from_array(&source.into_array()); + + let listview = builder.finish_into_listview(); + assert_eq!(listview.len(), 3); + assert!(listview.is_zero_copy_to_list()); + + assert_arrays_eq!( + listview.list_elements_at(0).unwrap(), + PrimitiveArray::from_iter([10i32, 20]) + ); + assert!( + !listview + .validity() + .vortex_expect("listview validity should be derivable") + .is_valid(1) + .unwrap() + ); + assert_eq!(listview.list_elements_at(1).unwrap().len(), 0); + assert_arrays_eq!( + listview.list_elements_at(2).unwrap(), + PrimitiveArray::from_iter([10i32]) + ); + } + #[test] fn test_error_append_null_to_non_nullable() { let dtype: Arc = Arc::new(I32.into()); @@ -719,8 +748,6 @@ mod tests { #[test] fn test_append_array_as_list() { - use vortex_buffer::buffer; - let dtype: Arc = Arc::new(I32.into()); let mut builder = ListViewBuilder::::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);