Skip to content

Commit c4ad4e2

Browse files
authored
Remove ListViewBuilder scalar_at (#7438)
| Benchmark | Before (median) | After (median) | Speedup | |---|---:|---:|---:| | `extend_from_array_non_zctl_overlapping (1000, 8)` | 902.2 µs | 63.66 µs | 14.17x | | `extend_from_array_non_zctl_overlapping (1000, 32)` | 2.446 ms | 76.54 µs | 31.96x | | `extend_from_array_non_zctl_overlapping (10000, 8)` | 9.821 ms | 561.5 µs | 17.49x | | `extend_from_array_zctl (1000, 8)` | 1.057 ms | 13.95 µs | 75.77x | | `extend_from_array_zctl (1000, 64)` | 5.416 ms | 38.29 µs | 141.45x | | `extend_from_array_zctl (10000, 8)` | 10.52 ms | 111.3 µs | 94.52x | Signed-off-by: Nicholas Gates <nick@nickgates.com>
1 parent f747201 commit c4ad4e2

3 files changed

Lines changed: 152 additions & 24 deletions

File tree

vortex-array/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,7 @@ harness = false
187187
[[bench]]
188188
name = "listview_rebuild"
189189
harness = false
190+
191+
[[bench]]
192+
name = "listview_builder_extend"
193+
harness = false
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![expect(clippy::cast_possible_truncation)]
5+
#![expect(clippy::cast_possible_wrap)]
6+
7+
use std::sync::Arc;
8+
9+
use divan::Bencher;
10+
use vortex_array::IntoArray;
11+
use vortex_array::arrays::ListViewArray;
12+
use vortex_array::arrays::PrimitiveArray;
13+
use vortex_array::builders::ArrayBuilder;
14+
use vortex_array::builders::ListViewBuilder;
15+
use vortex_array::dtype::DType;
16+
use vortex_array::dtype::Nullability::NonNullable;
17+
use vortex_array::dtype::Nullability::Nullable;
18+
use vortex_array::dtype::PType::I32;
19+
use vortex_array::validity::Validity;
20+
use vortex_buffer::Buffer;
21+
22+
fn main() {
23+
divan::main();
24+
}
25+
26+
const ZCTL_ARGS: &[(usize, usize)] = &[
27+
// num_lists, list_size
28+
(1_000, 8),
29+
(1_000, 64),
30+
(10_000, 8),
31+
];
32+
33+
const NON_ZCTL_ARGS: &[(usize, usize)] = &[
34+
// num_lists, list_size
35+
(1_000, 8),
36+
(1_000, 32),
37+
(10_000, 8),
38+
];
39+
40+
fn make_listview(
41+
num_lists: usize,
42+
list_size: usize,
43+
step: usize,
44+
with_nulls: bool,
45+
) -> ListViewArray {
46+
let element_count = step * num_lists + list_size;
47+
let elements = PrimitiveArray::from_iter(0..element_count as i32).into_array();
48+
let offsets: Buffer<u32> = (0..num_lists).map(|i| (i * step) as u32).collect();
49+
let sizes: Buffer<u16> = std::iter::repeat_n(list_size as u16, num_lists).collect();
50+
let validity = if with_nulls {
51+
Validity::from_iter((0..num_lists).map(|i| i % 5 != 0))
52+
} else {
53+
Validity::NonNullable
54+
};
55+
56+
ListViewArray::new(elements, offsets.into_array(), sizes.into_array(), validity)
57+
}
58+
59+
#[divan::bench(args = ZCTL_ARGS)]
60+
fn extend_from_array_zctl(bencher: Bencher, (num_lists, list_size): (usize, usize)) {
61+
let source = make_listview(num_lists, list_size, list_size, false);
62+
debug_assert!(source.is_zero_copy_to_list());
63+
let source = source.into_array();
64+
65+
bencher.with_inputs(|| &source).bench_refs(|source| {
66+
let mut builder = ListViewBuilder::<u32, u32>::with_capacity(
67+
Arc::new(DType::Primitive(I32, NonNullable)),
68+
NonNullable,
69+
num_lists * list_size,
70+
num_lists,
71+
);
72+
builder.extend_from_array(source);
73+
divan::black_box(builder.finish_into_listview())
74+
});
75+
}
76+
77+
#[divan::bench(args = NON_ZCTL_ARGS)]
78+
fn extend_from_array_non_zctl_overlapping(
79+
bencher: Bencher,
80+
(num_lists, list_size): (usize, usize),
81+
) {
82+
// `step = 1` creates heavily overlapping lists, which forces the non-ZCTL extend path.
83+
let source = make_listview(num_lists, list_size, 1, true);
84+
debug_assert!(!source.is_zero_copy_to_list());
85+
let source = source.into_array();
86+
87+
bencher.with_inputs(|| &source).bench_refs(|source| {
88+
let mut builder = ListViewBuilder::<u32, u8>::with_capacity(
89+
Arc::new(DType::Primitive(I32, NonNullable)),
90+
Nullable,
91+
num_lists * list_size,
92+
num_lists,
93+
);
94+
builder.extend_from_array(source);
95+
divan::black_box(builder.finish_into_listview())
96+
});
97+
}

vortex-array/src/builders/listview.rs

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -296,33 +296,15 @@ impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
296296
return;
297297
}
298298

299-
// If we do not have the guarantee that the array is zero-copyable to a list, then we have
300-
// to manually append each scalar.
301-
if !listview.is_zero_copy_to_list() {
302-
for i in 0..listview.len() {
303-
let list = listview
304-
.scalar_at(i)
305-
.vortex_expect("scalar_at failed in extend_from_array_unchecked");
306-
307-
self.append_scalar(&list)
308-
.vortex_expect("was unable to extend the `ListViewBuilder`")
309-
}
310-
311-
return;
312-
}
313-
314-
// Otherwise, after removing any leading and trailing elements, we can simply bulk append
315-
// the entire array.
299+
// Normalize to an exact zero-copy-to-list layout and then bulk append. This avoids the
300+
// very expensive scalar_at-per-list path for overlapping / out-of-order list views.
316301
let listview = listview
317302
.rebuild(ListViewRebuildMode::MakeExact)
318303
.vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
319304
debug_assert!(listview.is_zero_copy_to_list());
320305

321-
self.nulls.append_validity_mask(
322-
array
323-
.validity_mask()
324-
.vortex_expect("validity_mask in extend_from_array_unchecked"),
325-
);
306+
self.nulls
307+
.append_validity_mask(listview.listview_validity_mask());
326308

327309
// Bulk append the new elements (which should have no gaps or overlaps).
328310
let old_elements_len = self.elements_builder.len();
@@ -429,11 +411,13 @@ fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
429411
mod tests {
430412
use std::sync::Arc;
431413

414+
use vortex_buffer::buffer;
432415
use vortex_error::VortexExpect;
433416

434417
use super::ListViewBuilder;
435418
use crate::IntoArray;
436419
use crate::arrays::ListArray;
420+
use crate::arrays::ListViewArray;
437421
use crate::arrays::listview::ListViewArrayExt;
438422
use crate::assert_arrays_eq;
439423
use crate::builders::ArrayBuilder;
@@ -443,6 +427,7 @@ mod tests {
443427
use crate::dtype::Nullability::Nullable;
444428
use crate::dtype::PType::I32;
445429
use crate::scalar::Scalar;
430+
use crate::validity::Validity;
446431

447432
#[test]
448433
fn test_empty() {
@@ -696,6 +681,50 @@ mod tests {
696681
);
697682
}
698683

684+
#[test]
685+
fn test_extend_from_array_overlapping_listview() {
686+
let dtype: Arc<DType> = Arc::new(I32.into());
687+
688+
// Non-ZCTL source:
689+
// - List 0: [10, 20]
690+
// - List 1: null (size is intentionally non-zero in source metadata)
691+
// - List 2: [10]
692+
let source = unsafe {
693+
ListViewArray::new_unchecked(
694+
buffer![10i32, 20, 30].into_array(),
695+
buffer![0u32, 1, 0].into_array(),
696+
buffer![2u8, 2, 1].into_array(),
697+
Validity::from_iter([true, false, true]),
698+
)
699+
};
700+
assert!(!source.is_zero_copy_to_list());
701+
702+
let mut builder =
703+
ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
704+
builder.extend_from_array(&source.into_array());
705+
706+
let listview = builder.finish_into_listview();
707+
assert_eq!(listview.len(), 3);
708+
assert!(listview.is_zero_copy_to_list());
709+
710+
assert_arrays_eq!(
711+
listview.list_elements_at(0).unwrap(),
712+
PrimitiveArray::from_iter([10i32, 20])
713+
);
714+
assert!(
715+
!listview
716+
.validity()
717+
.vortex_expect("listview validity should be derivable")
718+
.is_valid(1)
719+
.unwrap()
720+
);
721+
assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
722+
assert_arrays_eq!(
723+
listview.list_elements_at(2).unwrap(),
724+
PrimitiveArray::from_iter([10i32])
725+
);
726+
}
727+
699728
#[test]
700729
fn test_error_append_null_to_non_nullable() {
701730
let dtype: Arc<DType> = Arc::new(I32.into());
@@ -719,8 +748,6 @@ mod tests {
719748

720749
#[test]
721750
fn test_append_array_as_list() {
722-
use vortex_buffer::buffer;
723-
724751
let dtype: Arc<DType> = Arc::new(I32.into());
725752
let mut builder =
726753
ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);

0 commit comments

Comments
 (0)