Skip to content

Commit 993bcd6

Browse files
committed
runend shit
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent e22b3c3 commit 993bcd6

8 files changed

Lines changed: 669 additions & 52 deletions

File tree

encodings/runend/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,7 @@ harness = false
6060
[[bench]]
6161
name = "run_end_take"
6262
harness = false
63+
64+
[[bench]]
65+
name = "run_end_filter"
66+
harness = false
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![allow(clippy::cast_possible_truncation, clippy::unwrap_used)]
5+
6+
use std::fmt;
7+
8+
use divan::Bencher;
9+
use rand::SeedableRng;
10+
use rand::rngs::StdRng;
11+
use rand::seq::SliceRandom;
12+
use vortex_array::ArrayRef;
13+
use vortex_array::IntoArray;
14+
use vortex_array::LEGACY_SESSION;
15+
use vortex_array::VortexSessionExecute;
16+
use vortex_array::arrays::PrimitiveArray;
17+
use vortex_buffer::Buffer;
18+
use vortex_mask::Mask;
19+
use vortex_runend::_benchmarking::RunEndFilterMode;
20+
use vortex_runend::_benchmarking::override_run_end_filter_mode;
21+
use vortex_runend::RunEnd;
22+
23+
fn main() {
24+
divan::main();
25+
}
26+
27+
const LEN: usize = 1_048_576;
28+
const TRUE_COUNT: usize = 32_768;
29+
const LONG_SLICE_COUNT: usize = 8;
30+
const SHORT_SLICE_LEN: usize = 8;
31+
const CLUSTER_COUNT: usize = 8;
32+
33+
#[derive(Clone, Copy, Debug)]
34+
enum MaskShape {
35+
Random,
36+
FewLongSlices,
37+
ManyShortSlices,
38+
ClusteredFewRuns,
39+
}
40+
41+
impl fmt::Display for MaskShape {
42+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43+
match self {
44+
Self::Random => write!(f, "random"),
45+
Self::FewLongSlices => write!(f, "few_long_slices"),
46+
Self::ManyShortSlices => write!(f, "many_short_slices"),
47+
Self::ClusteredFewRuns => write!(f, "clustered_few_runs"),
48+
}
49+
}
50+
}
51+
52+
#[derive(Clone, Copy, Debug)]
53+
struct BenchArgs {
54+
run_length: usize,
55+
mask_shape: MaskShape,
56+
}
57+
58+
impl fmt::Display for BenchArgs {
59+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60+
write!(f, "{}_runs_{}", self.mask_shape, self.run_length)
61+
}
62+
}
63+
64+
const BENCH_ARGS: &[BenchArgs] = &[
65+
BenchArgs {
66+
run_length: 16,
67+
mask_shape: MaskShape::Random,
68+
},
69+
BenchArgs {
70+
run_length: 256,
71+
mask_shape: MaskShape::Random,
72+
},
73+
BenchArgs {
74+
run_length: 4096,
75+
mask_shape: MaskShape::Random,
76+
},
77+
BenchArgs {
78+
run_length: 16,
79+
mask_shape: MaskShape::FewLongSlices,
80+
},
81+
BenchArgs {
82+
run_length: 256,
83+
mask_shape: MaskShape::FewLongSlices,
84+
},
85+
BenchArgs {
86+
run_length: 4096,
87+
mask_shape: MaskShape::FewLongSlices,
88+
},
89+
BenchArgs {
90+
run_length: 16,
91+
mask_shape: MaskShape::ManyShortSlices,
92+
},
93+
BenchArgs {
94+
run_length: 256,
95+
mask_shape: MaskShape::ManyShortSlices,
96+
},
97+
BenchArgs {
98+
run_length: 4096,
99+
mask_shape: MaskShape::ManyShortSlices,
100+
},
101+
BenchArgs {
102+
run_length: 16,
103+
mask_shape: MaskShape::ClusteredFewRuns,
104+
},
105+
BenchArgs {
106+
run_length: 256,
107+
mask_shape: MaskShape::ClusteredFewRuns,
108+
},
109+
BenchArgs {
110+
run_length: 4096,
111+
mask_shape: MaskShape::ClusteredFewRuns,
112+
},
113+
];
114+
115+
#[divan::bench(args = BENCH_ARGS)]
116+
fn filter_auto(bencher: Bencher, args: BenchArgs) {
117+
filter_with_mode(bencher, args, RunEndFilterMode::Auto);
118+
}
119+
120+
#[divan::bench(args = BENCH_ARGS)]
121+
fn filter_force_take(bencher: Bencher, args: BenchArgs) {
122+
filter_with_mode(bencher, args, RunEndFilterMode::Take);
123+
}
124+
125+
#[divan::bench(args = BENCH_ARGS)]
126+
fn filter_force_encoded(bencher: Bencher, args: BenchArgs) {
127+
filter_with_mode(bencher, args, RunEndFilterMode::Encoded);
128+
}
129+
130+
fn filter_with_mode(bencher: Bencher, args: BenchArgs, filter_mode: RunEndFilterMode) {
131+
let array = run_end_fixture(args.run_length);
132+
let mask = mask_fixture(args.mask_shape, args.run_length);
133+
134+
bencher
135+
.with_inputs(|| {
136+
(
137+
array.clone(),
138+
mask.clone(),
139+
LEGACY_SESSION.create_execution_ctx(),
140+
)
141+
})
142+
.bench_refs(|(array, mask, ctx)| {
143+
let _filter_mode_guard = override_run_end_filter_mode(filter_mode);
144+
let result = array
145+
.filter(mask.clone())
146+
.unwrap()
147+
.execute::<ArrayRef>(ctx)
148+
.unwrap();
149+
divan::black_box(result);
150+
});
151+
}
152+
153+
fn run_end_fixture(run_length: usize) -> ArrayRef {
154+
let run_count = LEN.div_ceil(run_length);
155+
let ends = (0..run_count)
156+
.map(|run_idx| ((run_idx + 1) * run_length).min(LEN) as u32)
157+
.collect::<Buffer<_>>()
158+
.into_array();
159+
let values =
160+
PrimitiveArray::from_iter((0..run_count).map(|run_idx| run_idx as i32)).into_array();
161+
162+
RunEnd::new(ends, values).into_array()
163+
}
164+
165+
fn mask_fixture(mask_shape: MaskShape, run_length: usize) -> Mask {
166+
match mask_shape {
167+
MaskShape::Random => random_mask(run_length),
168+
MaskShape::FewLongSlices => few_long_slices_mask(run_length),
169+
MaskShape::ManyShortSlices => many_short_slices_mask(run_length),
170+
MaskShape::ClusteredFewRuns => clustered_few_runs_mask(run_length),
171+
}
172+
}
173+
174+
fn random_mask(run_length: usize) -> Mask {
175+
let mut rng = StdRng::seed_from_u64(run_length as u64);
176+
let mut indices = (0..LEN).collect::<Vec<_>>();
177+
indices.shuffle(&mut rng);
178+
indices.truncate(TRUE_COUNT);
179+
indices.sort_unstable();
180+
181+
Mask::from_indices(LEN, indices)
182+
}
183+
184+
fn few_long_slices_mask(run_length: usize) -> Mask {
185+
let slice_len = TRUE_COUNT / LONG_SLICE_COUNT;
186+
let spacing = LEN / LONG_SLICE_COUNT;
187+
let misalignment = (run_length / 2).min(slice_len / 2);
188+
let slices = (0..LONG_SLICE_COUNT)
189+
.map(|slice_idx| {
190+
let start = slice_idx * spacing + misalignment;
191+
(start, start + slice_len)
192+
})
193+
.collect();
194+
195+
Mask::from_slices(LEN, slices)
196+
}
197+
198+
fn many_short_slices_mask(run_length: usize) -> Mask {
199+
let slice_count = TRUE_COUNT / SHORT_SLICE_LEN;
200+
let spacing = LEN / slice_count;
201+
let misalignment = (run_length / 4).min(spacing - SHORT_SLICE_LEN);
202+
let slices = (0..slice_count)
203+
.map(|slice_idx| {
204+
let start = slice_idx * spacing + misalignment;
205+
(start, start + SHORT_SLICE_LEN)
206+
})
207+
.collect();
208+
209+
Mask::from_slices(LEN, slices)
210+
}
211+
212+
fn clustered_few_runs_mask(run_length: usize) -> Mask {
213+
let run_count = LEN.div_ceil(run_length);
214+
let runs_to_keep = TRUE_COUNT.div_ceil(run_length);
215+
let cluster_count = runs_to_keep.min(CLUSTER_COUNT);
216+
let base_cluster_runs = runs_to_keep / cluster_count;
217+
let extra_cluster_runs = runs_to_keep % cluster_count;
218+
let spacing = run_count / cluster_count;
219+
220+
let mut next_start_run = 0usize;
221+
let slices = (0..cluster_count)
222+
.map(|cluster_idx| {
223+
let cluster_runs = base_cluster_runs + usize::from(cluster_idx < extra_cluster_runs);
224+
let start_run = next_start_run;
225+
let end_run = (start_run + cluster_runs).min(run_count);
226+
next_start_run += spacing;
227+
228+
let start = start_run * run_length;
229+
let end = (end_run * run_length).min(LEN);
230+
(start, end)
231+
})
232+
.collect();
233+
234+
Mask::from_slices(LEN, slices)
235+
}

0 commit comments

Comments
 (0)