Skip to content

Commit 97b18fb

Browse files
authored
Add benchmarks for take on a FilterArray (#7420)
Before we make changes to this code we should make sure we know what we are improving --------- Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent 5330d32 commit 97b18fb

2 files changed

Lines changed: 144 additions & 0 deletions

File tree

vortex-array/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ harness = false
176176
name = "take_fsl"
177177
harness = false
178178

179+
[[bench]]
180+
name = "take_filter"
181+
harness = false
182+
179183
[[bench]]
180184
name = "filter_bool"
181185
harness = false
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks for taking from a lazy [`FilterArray`].
5+
//!
6+
//! Parameterized over:
7+
//! - Number of indices to take
8+
//! - Number of rows retained by the filter
9+
//! - Filter mask layout (single contiguous slice vs random positions)
10+
//! - Take index layout (sequential vs random ranks)
11+
12+
#![expect(clippy::unwrap_used)]
13+
#![expect(clippy::cast_possible_truncation)]
14+
15+
use divan::Bencher;
16+
use rand::RngExt;
17+
use rand::SeedableRng;
18+
use rand::rngs::StdRng;
19+
use rand::seq::SliceRandom;
20+
use vortex_array::ArrayRef;
21+
use vortex_array::IntoArray;
22+
use vortex_array::LEGACY_SESSION;
23+
use vortex_array::RecursiveCanonical;
24+
use vortex_array::VortexSessionExecute;
25+
use vortex_array::arrays::PrimitiveArray;
26+
use vortex_buffer::Buffer;
27+
use vortex_mask::Mask;
28+
29+
fn main() {
30+
divan::main();
31+
}
32+
33+
const ARRAY_LEN: usize = 100_000;
34+
const FILTERED_LENS: &[usize] = &[10_000, 50_000, 90_000];
35+
const NUM_INDICES: &[usize] = &[1_000, 10_000];
36+
const MASK_SEED: u64 = 42;
37+
const INDEX_SEED: u64 = 43;
38+
39+
fn primitive_array() -> ArrayRef {
40+
PrimitiveArray::from_iter(0..ARRAY_LEN as u32).into_array()
41+
}
42+
43+
fn slice_mask(filtered_len: usize) -> Mask {
44+
let start = (ARRAY_LEN - filtered_len) / 2;
45+
Mask::from_slices(ARRAY_LEN, vec![(start, start + filtered_len)])
46+
}
47+
48+
fn random_mask(filtered_len: usize) -> Mask {
49+
let mut indices: Vec<usize> = (0..ARRAY_LEN).collect();
50+
indices.shuffle(&mut StdRng::seed_from_u64(MASK_SEED));
51+
indices.truncate(filtered_len);
52+
indices.sort_unstable();
53+
Mask::from_indices(ARRAY_LEN, indices)
54+
}
55+
56+
fn sequential_indices(num_indices: usize) -> ArrayRef {
57+
Buffer::from_iter(0..num_indices as u64).into_array()
58+
}
59+
60+
fn random_indices(num_indices: usize, filtered_len: usize) -> ArrayRef {
61+
let mut rng = StdRng::seed_from_u64(INDEX_SEED);
62+
Buffer::from_iter((0..num_indices).map(|_| rng.random_range(0..filtered_len as u64)))
63+
.into_array()
64+
}
65+
66+
#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)]
67+
fn take_filter_slice_mask_sequential_indices<const FILTERED_LEN: usize>(
68+
bencher: Bencher,
69+
num_indices: usize,
70+
) {
71+
let array = primitive_array().filter(slice_mask(FILTERED_LEN)).unwrap();
72+
let indices = sequential_indices(num_indices);
73+
74+
bencher
75+
.with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx()))
76+
.bench_refs(|(array, indices, ctx)| {
77+
array
78+
.take(indices.clone())
79+
.unwrap()
80+
.execute::<RecursiveCanonical>(ctx)
81+
.unwrap()
82+
});
83+
}
84+
85+
#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)]
86+
fn take_filter_slice_mask_random_indices<const FILTERED_LEN: usize>(
87+
bencher: Bencher,
88+
num_indices: usize,
89+
) {
90+
let array = primitive_array().filter(slice_mask(FILTERED_LEN)).unwrap();
91+
let indices = random_indices(num_indices, FILTERED_LEN);
92+
93+
bencher
94+
.with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx()))
95+
.bench_refs(|(array, indices, ctx)| {
96+
array
97+
.take(indices.clone())
98+
.unwrap()
99+
.execute::<RecursiveCanonical>(ctx)
100+
.unwrap()
101+
});
102+
}
103+
104+
#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)]
105+
fn take_filter_random_mask_sequential_indices<const FILTERED_LEN: usize>(
106+
bencher: Bencher,
107+
num_indices: usize,
108+
) {
109+
let array = primitive_array().filter(random_mask(FILTERED_LEN)).unwrap();
110+
let indices = sequential_indices(num_indices);
111+
112+
bencher
113+
.with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx()))
114+
.bench_refs(|(array, indices, ctx)| {
115+
array
116+
.take(indices.clone())
117+
.unwrap()
118+
.execute::<RecursiveCanonical>(ctx)
119+
.unwrap()
120+
});
121+
}
122+
123+
#[divan::bench(args = NUM_INDICES, consts = FILTERED_LENS)]
124+
fn take_filter_random_mask_random_indices<const FILTERED_LEN: usize>(
125+
bencher: Bencher,
126+
num_indices: usize,
127+
) {
128+
let array = primitive_array().filter(random_mask(FILTERED_LEN)).unwrap();
129+
let indices = random_indices(num_indices, FILTERED_LEN);
130+
131+
bencher
132+
.with_inputs(|| (&array, &indices, LEGACY_SESSION.create_execution_ctx()))
133+
.bench_refs(|(array, indices, ctx)| {
134+
array
135+
.take(indices.clone())
136+
.unwrap()
137+
.execute::<RecursiveCanonical>(ctx)
138+
.unwrap()
139+
});
140+
}

0 commit comments

Comments
 (0)