|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +//! Benchmarks for the `list_length` scalar function over `List` and `ListView` inputs. |
| 5 | +//! |
| 6 | +//! `list_length` reads only the offsets/sizes (never the elements), so its cost scales with the |
| 7 | +//! number of lists. |
| 8 | +
|
| 9 | +#![expect(clippy::unwrap_used)] |
| 10 | +#![expect(clippy::cast_possible_truncation)] |
| 11 | + |
| 12 | +use std::sync::LazyLock; |
| 13 | + |
| 14 | +use divan::Bencher; |
| 15 | +use rand::RngExt; |
| 16 | +use rand::SeedableRng; |
| 17 | +use rand::distr::Uniform; |
| 18 | +use rand::rngs::StdRng; |
| 19 | +use vortex_array::ArrayRef; |
| 20 | +use vortex_array::Canonical; |
| 21 | +use vortex_array::IntoArray; |
| 22 | +use vortex_array::VortexSessionExecute; |
| 23 | +use vortex_array::arrays::BoolArray; |
| 24 | +use vortex_array::arrays::ListArray; |
| 25 | +use vortex_array::arrays::ListViewArray; |
| 26 | +use vortex_array::arrays::PrimitiveArray; |
| 27 | +use vortex_array::expr::list_length; |
| 28 | +use vortex_array::expr::root; |
| 29 | +use vortex_array::validity::Validity; |
| 30 | +use vortex_buffer::Buffer; |
| 31 | +use vortex_session::VortexSession; |
| 32 | + |
| 33 | +fn main() { |
| 34 | + divan::main(); |
| 35 | +} |
| 36 | + |
| 37 | +static SESSION: LazyLock<VortexSession> = LazyLock::new(vortex_array::array_session); |
| 38 | + |
| 39 | +const BASE_LIST_SIZE: usize = 8; |
| 40 | + |
| 41 | +const SMALL: usize = 100; |
| 42 | +const MEDIUM: usize = 10_000; |
| 43 | +const LARGE: usize = 1_000_000; |
| 44 | + |
| 45 | +/// A uniformly-random partition of `num_lists * LIST_SIZE` elements into `num_lists` lists, |
| 46 | +/// plus a validity mask with ~1/8 of lists null at random positions. |
| 47 | +fn random_lists(num_lists: usize) -> (Vec<i32>, Validity) { |
| 48 | + let mut rng = StdRng::seed_from_u64(num_lists as u64); |
| 49 | + let total = (num_lists * BASE_LIST_SIZE) as i32; |
| 50 | + |
| 51 | + let cut_dist = Uniform::new_inclusive(0i32, total).unwrap(); |
| 52 | + let mut cuts: Vec<i32> = (0..num_lists - 1).map(|_| rng.sample(cut_dist)).collect(); |
| 53 | + cuts.sort_unstable(); |
| 54 | + let mut sizes = Vec::with_capacity(num_lists); |
| 55 | + let mut prev = 0i32; |
| 56 | + for cut in cuts { |
| 57 | + sizes.push(cut - prev); |
| 58 | + prev = cut; |
| 59 | + } |
| 60 | + sizes.push(total - prev); |
| 61 | + |
| 62 | + let null_dist = Uniform::new(0u32, 8).unwrap(); |
| 63 | + let valid = (0..num_lists).map(|_| rng.sample(null_dist) != 0); |
| 64 | + ( |
| 65 | + sizes, |
| 66 | + Validity::Array(BoolArray::from_iter(valid).into_array()), |
| 67 | + ) |
| 68 | +} |
| 69 | + |
| 70 | +/// A canonical `List<i32>` of `num_lists` variable-length lists, ~1/8 of them null. |
| 71 | +fn make_list(num_lists: usize) -> ArrayRef { |
| 72 | + let (sizes, validity) = random_lists(num_lists); |
| 73 | + let total: i32 = sizes.iter().sum(); |
| 74 | + let elements = PrimitiveArray::from_iter(0..total).into_array(); |
| 75 | + let offsets: Buffer<i32> = std::iter::once(0) |
| 76 | + .chain(sizes.iter().scan(0i32, |acc, &s| { |
| 77 | + *acc += s; |
| 78 | + Some(*acc) |
| 79 | + })) |
| 80 | + .collect(); |
| 81 | + ListArray::try_new(elements, offsets.into_array(), validity) |
| 82 | + .unwrap() |
| 83 | + .into_array() |
| 84 | +} |
| 85 | + |
| 86 | +/// A gapless `ListView<i32>` of `num_lists` variable-length lists, ~1/8 of them null. |
| 87 | +fn make_listview(num_lists: usize) -> ArrayRef { |
| 88 | + let (sizes, validity) = random_lists(num_lists); |
| 89 | + let total: i32 = sizes.iter().sum(); |
| 90 | + let elements = PrimitiveArray::from_iter(0..total).into_array(); |
| 91 | + let offsets: Buffer<i32> = sizes |
| 92 | + .iter() |
| 93 | + .scan(0i32, |acc, &s| { |
| 94 | + let start = *acc; |
| 95 | + *acc += s; |
| 96 | + Some(start) |
| 97 | + }) |
| 98 | + .collect(); |
| 99 | + let sizes: Buffer<i32> = sizes.into_iter().collect(); |
| 100 | + ListViewArray::new(elements, offsets.into_array(), sizes.into_array(), validity).into_array() |
| 101 | +} |
| 102 | + |
| 103 | +/// Apply `list_length(root())` and materialize the result. |
| 104 | +fn run(bencher: Bencher, array: ArrayRef) { |
| 105 | + let expr = list_length(root()); |
| 106 | + bencher |
| 107 | + .with_inputs(|| (&array, SESSION.create_execution_ctx())) |
| 108 | + .bench_refs(|(array, ctx)| { |
| 109 | + array |
| 110 | + .clone() |
| 111 | + .apply(&expr) |
| 112 | + .unwrap() |
| 113 | + .execute::<Canonical>(ctx) |
| 114 | + .unwrap() |
| 115 | + }); |
| 116 | +} |
| 117 | + |
| 118 | +#[divan::bench] |
| 119 | +fn list_length_small(bencher: Bencher) { |
| 120 | + run(bencher, make_list(SMALL)); |
| 121 | +} |
| 122 | + |
| 123 | +#[divan::bench] |
| 124 | +fn list_length_medium(bencher: Bencher) { |
| 125 | + run(bencher, make_list(MEDIUM)); |
| 126 | +} |
| 127 | + |
| 128 | +#[divan::bench] |
| 129 | +fn list_length_large(bencher: Bencher) { |
| 130 | + run(bencher, make_list(LARGE)); |
| 131 | +} |
| 132 | + |
| 133 | +#[divan::bench] |
| 134 | +fn listview_length_small(bencher: Bencher) { |
| 135 | + run(bencher, make_listview(SMALL)); |
| 136 | +} |
| 137 | + |
| 138 | +#[divan::bench] |
| 139 | +fn listview_length_medium(bencher: Bencher) { |
| 140 | + run(bencher, make_listview(MEDIUM)); |
| 141 | +} |
| 142 | + |
| 143 | +#[divan::bench] |
| 144 | +fn listview_length_large(bencher: Bencher) { |
| 145 | + run(bencher, make_listview(LARGE)); |
| 146 | +} |
0 commit comments