Skip to content

Commit 7b47788

Browse files
bench: bit-packed compare-constant baseline (#8012)
## Summary - Adds a divan benchmark `bitpack_compare` in `vortex-fastlanes` that compares an `Operator::Eq` / `Operator::Lt` against an out-of-range constant on a `BitPackedData` array vs. an explicit "decompress, then Arrow compare" baseline that materialises the unpacked `PrimitiveArray` first. - The constant is chosen as `1 << BW`, just past the packable range, so a future compare-constant kernel can recognise it and short-circuit. Today both arms decompress; this PR establishes a baseline for that follow-up to land against. - Grid sized for fast runs: `len ∈ {1024, 65536}`, `bit_width ∈ {4, 16}`, Eq + Lt. The follow-up optimization (out-of-range fast path on `BitPacked`, plus the `bitpack_constant` analytical encoder) is in #PR2-PLACEHOLDER, stacked on this branch. Splitting the bench out lets the speedup PR show concrete numbers against this measured baseline. ## Test plan - [x] `cargo check -p vortex-fastlanes --benches` - [ ] `cargo bench -p vortex-fastlanes --bench bitpack_compare` records the slow baseline numbers prior to the follow-up landing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent faf7e42 commit 7b47788

2 files changed

Lines changed: 112 additions & 0 deletions

File tree

encodings/fastlanes/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,7 @@ required-features = ["_test-harness"]
5959
name = "bit_transpose"
6060
harness = false
6161
required-features = ["_test-harness"]
62+
63+
[[bench]]
64+
name = "bitpack_compare"
65+
harness = false
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Compare an already-packed `BitPackedArray` against a constant value. Compares the
5+
//! out-of-range fast path (constant outside `[0, 2^bit_width - 1]`) against an explicit
6+
//! "decompress, then compare" baseline.
7+
//!
8+
//! Sized to finish quickly. Run with `cargo bench -p vortex-fastlanes --bench bitpack_compare`.
9+
10+
#![expect(clippy::unwrap_used)]
11+
#![expect(clippy::cast_possible_truncation)]
12+
13+
use divan::Bencher;
14+
use divan::counter::ItemsCount;
15+
use vortex_array::ArrayRef;
16+
use vortex_array::ExecutionCtx;
17+
use vortex_array::IntoArray;
18+
use vortex_array::LEGACY_SESSION;
19+
use vortex_array::VortexSessionExecute;
20+
use vortex_array::arrays::BoolArray;
21+
use vortex_array::arrays::ConstantArray;
22+
use vortex_array::arrays::PrimitiveArray;
23+
use vortex_array::builtins::ArrayBuiltins;
24+
use vortex_array::scalar_fn::fns::operators::Operator;
25+
use vortex_array::validity::Validity;
26+
use vortex_buffer::BufferMut;
27+
use vortex_fastlanes::BitPackedData;
28+
29+
fn main() {
30+
divan::main();
31+
}
32+
33+
const LENS: &[usize] = &[1024, 64 * 1024];
34+
const BIT_WIDTHS: &[u8] = &[4, 16];
35+
36+
/// Build a packed array of varied in-range values, plus an out-of-range constant RHS for
37+
/// the fast-path benches.
38+
fn build_inputs<const BW: u8>(len: usize) -> (ArrayRef, ArrayRef, ExecutionCtx) {
39+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
40+
let buf: BufferMut<u32> = (0..len).map(|i| (i as u32) % (1 << BW)).collect();
41+
let array = BitPackedData::encode(
42+
&PrimitiveArray::new(buf.freeze(), Validity::NonNullable).into_array(),
43+
BW,
44+
&mut ctx,
45+
)
46+
.unwrap()
47+
.into_array();
48+
// 1 << BW is just past the packable range, so the out-of-range fast path fires.
49+
let constant = 1u32 << BW;
50+
let rhs = ConstantArray::new(constant, len).into_array();
51+
(array, rhs, ctx)
52+
}
53+
54+
#[divan::bench(args = LENS, consts = BIT_WIDTHS)]
55+
fn fast_eq_out_of_range<const BW: u8>(bencher: Bencher, len: usize) {
56+
let (array, rhs, mut ctx) = build_inputs::<BW>(len);
57+
bencher.counter(ItemsCount::new(len)).bench_local(|| {
58+
array
59+
.clone()
60+
.binary(rhs.clone(), Operator::Eq)
61+
.unwrap()
62+
.execute::<BoolArray>(&mut ctx)
63+
.unwrap()
64+
});
65+
}
66+
67+
#[divan::bench(args = LENS, consts = BIT_WIDTHS)]
68+
fn baseline_eq<const BW: u8>(bencher: Bencher, len: usize) {
69+
let (array, rhs, mut ctx) = build_inputs::<BW>(len);
70+
bencher.counter(ItemsCount::new(len)).bench_local(|| {
71+
// What the fallback would do: materialize the unpacked primitive, then run Arrow
72+
// compare on it.
73+
let primitive = array.clone().execute::<PrimitiveArray>(&mut ctx).unwrap();
74+
primitive
75+
.into_array()
76+
.binary(rhs.clone(), Operator::Eq)
77+
.unwrap()
78+
.execute::<BoolArray>(&mut ctx)
79+
.unwrap()
80+
});
81+
}
82+
83+
#[divan::bench(args = LENS, consts = BIT_WIDTHS)]
84+
fn fast_lt_out_of_range<const BW: u8>(bencher: Bencher, len: usize) {
85+
let (array, rhs, mut ctx) = build_inputs::<BW>(len);
86+
bencher.counter(ItemsCount::new(len)).bench_local(|| {
87+
array
88+
.clone()
89+
.binary(rhs.clone(), Operator::Lt)
90+
.unwrap()
91+
.execute::<BoolArray>(&mut ctx)
92+
.unwrap()
93+
});
94+
}
95+
96+
#[divan::bench(args = LENS, consts = BIT_WIDTHS)]
97+
fn baseline_lt<const BW: u8>(bencher: Bencher, len: usize) {
98+
let (array, rhs, mut ctx) = build_inputs::<BW>(len);
99+
bencher.counter(ItemsCount::new(len)).bench_local(|| {
100+
let primitive = array.clone().execute::<PrimitiveArray>(&mut ctx).unwrap();
101+
primitive
102+
.into_array()
103+
.binary(rhs.clone(), Operator::Lt)
104+
.unwrap()
105+
.execute::<BoolArray>(&mut ctx)
106+
.unwrap()
107+
});
108+
}

0 commit comments

Comments
 (0)