Skip to content

Commit dba7935

Browse files
Add bit-packed cast benchmark (#8058)
## Summary - Add a `cast_bitpacked` divan benchmark for widening bit-packed `u16` arrays to `u32`. - Compare the public `array.cast(u32).execute()` path against an explicit canonicalize-then-cast baseline. - Cover single and chunked inputs with and without patches. ## Checks - `cargo +nightly fmt --all` - `cargo bench -p vortex-fastlanes --features _test-harness --bench cast_bitpacked --no-run` --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 012d0ec commit dba7935

2 files changed

Lines changed: 113 additions & 0 deletions

File tree

encodings/fastlanes/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,8 @@ required-features = ["_test-harness"]
6363
[[bench]]
6464
name = "bitpack_compare"
6565
harness = false
66+
67+
[[bench]]
68+
name = "cast_bitpacked"
69+
harness = false
70+
required-features = ["_test-harness"]
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks the cost of widening a bit-packed narrow integer column to a wider integer type on
5+
//! decompression (e.g. `u16 -> u32`).
6+
//!
7+
//! Two strategies are compared:
8+
//!
9+
//! - `cast_execute`: the real public path, `array.cast(u32).execute()`.
10+
//! - `canonicalize_then_cast`: explicitly canonicalizes to a full-length `u16` `PrimitiveArray` and
11+
//! then casts that to `u32`.
12+
13+
#![expect(clippy::unwrap_used)]
14+
15+
use std::sync::LazyLock;
16+
17+
use divan::Bencher;
18+
use rand::RngExt;
19+
use rand::SeedableRng;
20+
use rand::prelude::StdRng;
21+
use vortex_array::ArrayRef;
22+
use vortex_array::IntoArray;
23+
use vortex_array::VortexSessionExecute;
24+
use vortex_array::arrays::ChunkedArray;
25+
use vortex_array::arrays::PrimitiveArray;
26+
use vortex_array::builtins::ArrayBuiltins;
27+
use vortex_array::dtype::DType;
28+
use vortex_array::dtype::Nullability;
29+
use vortex_array::dtype::PType;
30+
use vortex_array::session::ArraySession;
31+
use vortex_array::validity::Validity;
32+
use vortex_buffer::BufferMut;
33+
use vortex_error::VortexExpect;
34+
use vortex_fastlanes::BitPackedArray;
35+
use vortex_fastlanes::BitPackedData;
36+
use vortex_session::VortexSession;
37+
38+
fn main() {
39+
divan::main();
40+
}
41+
42+
static SESSION: LazyLock<VortexSession> =
43+
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
44+
45+
const U32: DType = DType::Primitive(PType::U32, Nullability::NonNullable);
46+
47+
// (chunk_len, chunk_count, fraction_patched)
48+
const ARGS: &[(usize, usize, f64)] = &[
49+
(65_536, 1, 0.00),
50+
(65_536, 1, 0.01),
51+
(65_536, 4, 0.00),
52+
(65_536, 4, 0.01),
53+
(262_144, 1, 0.00),
54+
(262_144, 1, 0.01),
55+
];
56+
57+
/// Build a single bit-packed `u16` chunk. Most values fit in `bit_width` bits; `fraction_patched`
58+
/// of them are large enough to require patches.
59+
fn make_chunk(rng: &mut StdRng, len: usize, fraction_patched: f64) -> BitPackedArray {
60+
let bit_width = 9u8;
61+
let cap = 1u16 << bit_width;
62+
let values = (0..len)
63+
.map(|_| {
64+
if rng.random_bool(fraction_patched) {
65+
rng.random_range(cap..u16::MAX)
66+
} else {
67+
rng.random_range(0..cap)
68+
}
69+
})
70+
.collect::<BufferMut<u16>>();
71+
let array = PrimitiveArray::new(values, Validity::NonNullable);
72+
BitPackedData::encode(
73+
&array.into_array(),
74+
bit_width,
75+
&mut SESSION.create_execution_ctx(),
76+
)
77+
.vortex_expect("encode")
78+
}
79+
80+
fn make_chunks(len: usize, count: usize, fraction_patched: f64) -> Vec<BitPackedArray> {
81+
let mut rng = StdRng::seed_from_u64(0);
82+
(0..count)
83+
.map(|_| make_chunk(&mut rng, len, fraction_patched))
84+
.collect()
85+
}
86+
87+
fn single(chunks: &[BitPackedArray]) -> ArrayRef {
88+
if chunks.len() == 1 {
89+
chunks[0].clone().into_array()
90+
} else {
91+
ChunkedArray::from_iter(chunks.iter().map(|c| c.clone().into_array())).into_array()
92+
}
93+
}
94+
95+
#[divan::bench(args = ARGS)]
96+
fn cast_execute(bencher: Bencher, (chunk_len, chunk_count, frac): (usize, usize, f64)) {
97+
let chunks = make_chunks(chunk_len, chunk_count, frac);
98+
bencher
99+
.with_inputs(|| (single(&chunks), SESSION.create_execution_ctx()))
100+
.bench_refs(|(array, ctx)| {
101+
array
102+
.clone()
103+
.cast(U32)
104+
.unwrap()
105+
.execute::<PrimitiveArray>(ctx)
106+
.unwrap()
107+
});
108+
}

0 commit comments

Comments
 (0)