Skip to content

Commit 9a56591

Browse files
committed
Add bit-packed cast benchmark
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 012d0ec commit 9a56591

2 files changed

Lines changed: 133 additions & 0 deletions

File tree

encodings/fastlanes/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,8 @@ required-features = ["_test-harness"]
6363
[[bench]]
6464
name = "bitpack_compare"
6565
harness = false
66+
67+
[[bench]]
68+
name = "cast_bitpacked"
69+
harness = false
70+
required-features = ["_test-harness"]
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks the cost of widening a bit-packed narrow integer column to a wider integer type on
5+
//! decompression (e.g. `u16 -> u32`).
6+
//!
7+
//! Two strategies are compared:
8+
//!
9+
//! - `cast_execute`: the real public path, `array.cast(u32).execute()`.
10+
//! - `canonicalize_then_cast`: explicitly canonicalizes to a full-length `u16` `PrimitiveArray` and
11+
//! then casts that to `u32`.
12+
13+
#![expect(clippy::unwrap_used)]
14+
15+
use std::sync::LazyLock;
16+
17+
use divan::Bencher;
18+
use rand::RngExt;
19+
use rand::SeedableRng;
20+
use rand::prelude::StdRng;
21+
use vortex_array::ArrayRef;
22+
use vortex_array::IntoArray;
23+
use vortex_array::VortexSessionExecute;
24+
use vortex_array::arrays::ChunkedArray;
25+
use vortex_array::arrays::PrimitiveArray;
26+
use vortex_array::builtins::ArrayBuiltins;
27+
use vortex_array::dtype::DType;
28+
use vortex_array::dtype::Nullability;
29+
use vortex_array::dtype::PType;
30+
use vortex_array::session::ArraySession;
31+
use vortex_array::validity::Validity;
32+
use vortex_buffer::BufferMut;
33+
use vortex_error::VortexExpect;
34+
use vortex_fastlanes::BitPackedArray;
35+
use vortex_fastlanes::BitPackedData;
36+
use vortex_session::VortexSession;
37+
38+
fn main() {
39+
divan::main();
40+
}
41+
42+
static SESSION: LazyLock<VortexSession> =
43+
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
44+
45+
const U32: DType = DType::Primitive(PType::U32, Nullability::NonNullable);
46+
47+
// (chunk_len, chunk_count, fraction_patched)
48+
const ARGS: &[(usize, usize, f64)] = &[
49+
(65_536, 1, 0.00),
50+
(65_536, 1, 0.01),
51+
(65_536, 16, 0.00),
52+
(65_536, 16, 0.01),
53+
(1_048_576, 1, 0.00),
54+
(1_048_576, 1, 0.01),
55+
];
56+
57+
/// Build a single bit-packed `u16` chunk. Most values fit in `bit_width` bits; `fraction_patched`
58+
/// of them are large enough to require patches.
59+
fn make_chunk(rng: &mut StdRng, len: usize, fraction_patched: f64) -> BitPackedArray {
60+
let bit_width = 9u8;
61+
let cap = 1u16 << bit_width;
62+
let values = (0..len)
63+
.map(|_| {
64+
if rng.random_bool(fraction_patched) {
65+
rng.random_range(cap..u16::MAX)
66+
} else {
67+
rng.random_range(0..cap)
68+
}
69+
})
70+
.collect::<BufferMut<u16>>();
71+
let array = PrimitiveArray::new(values, Validity::NonNullable);
72+
BitPackedData::encode(
73+
&array.into_array(),
74+
bit_width,
75+
&mut SESSION.create_execution_ctx(),
76+
)
77+
.vortex_expect("encode")
78+
}
79+
80+
fn make_chunks(len: usize, count: usize, fraction_patched: f64) -> Vec<BitPackedArray> {
81+
let mut rng = StdRng::seed_from_u64(0);
82+
(0..count)
83+
.map(|_| make_chunk(&mut rng, len, fraction_patched))
84+
.collect()
85+
}
86+
87+
fn single(chunks: &[BitPackedArray]) -> ArrayRef {
88+
if chunks.len() == 1 {
89+
chunks[0].clone().into_array()
90+
} else {
91+
ChunkedArray::from_iter(chunks.iter().map(|c| c.clone().into_array())).into_array()
92+
}
93+
}
94+
95+
/// The real public path: `array.cast(u32).execute()`.
96+
#[cfg(not(codspeed))]
97+
#[divan::bench(args = ARGS)]
98+
fn cast_execute(bencher: Bencher, (chunk_len, chunk_count, frac): (usize, usize, f64)) {
99+
let chunks = make_chunks(chunk_len, chunk_count, frac);
100+
bencher
101+
.with_inputs(|| (single(&chunks), SESSION.create_execution_ctx()))
102+
.bench_refs(|(array, ctx)| {
103+
array
104+
.clone()
105+
.cast(U32)
106+
.unwrap()
107+
.execute::<PrimitiveArray>(ctx)
108+
.unwrap()
109+
});
110+
}
111+
112+
/// Baseline: canonicalize to a full-length `u16` array, then cast that primitive array to `u32`.
113+
#[cfg(not(codspeed))]
114+
#[divan::bench(args = ARGS)]
115+
fn canonicalize_then_cast(bencher: Bencher, (chunk_len, chunk_count, frac): (usize, usize, f64)) {
116+
let chunks = make_chunks(chunk_len, chunk_count, frac);
117+
bencher
118+
.with_inputs(|| (single(&chunks), SESSION.create_execution_ctx()))
119+
.bench_refs(|(array, ctx)| {
120+
let canonical = array.clone().execute::<PrimitiveArray>(ctx).unwrap();
121+
canonical
122+
.into_array()
123+
.cast(U32)
124+
.unwrap()
125+
.execute::<PrimitiveArray>(ctx)
126+
.unwrap()
127+
});
128+
}

0 commit comments

Comments
 (0)