Skip to content

Commit 11249fb

Browse files
committed
Add pcodec 1k-page-size benchmark
Adds a divan benchmark that compares pcodec with the default page size against pcodec configured with a 1024-value page size on f64 and i64 data. The bench reports the compression ratio of each variant on startup and times compression, full decompression, and per-element scalar_at access for both, so we can quantify the size/random-access tradeoff of smaller pco pages. Signed-off-by: Claude <noreply@anthropic.com>
1 parent da19bca commit 11249fb

2 files changed

Lines changed: 265 additions & 0 deletions

File tree

vortex/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,8 @@ test = false
102102
[[bench]]
103103
name = "pipeline"
104104
harness = false
105+
106+
[[bench]]
107+
name = "pcodec_page_size"
108+
harness = false
109+
test = false

vortex/benches/pcodec_page_size.rs

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Compare pcodec with the default page size against pcodec with a 1024-value
5+
//! ("1k") page size on `f64` and `i64` data.
6+
//!
7+
//! Pcodec lets you tune the page size independently of the chunk size. Smaller
8+
//! pages add per-page overhead (so the compression ratio gets worse) but let
9+
//! random access decode a much smaller window, which should help `scalar_at`.
10+
//! These benches measure both effects.
11+
12+
#![expect(clippy::unwrap_used)]
13+
#![expect(clippy::cast_precision_loss)]
14+
15+
use std::sync::LazyLock;
16+
17+
use divan::Bencher;
18+
#[cfg(not(codspeed))]
19+
use divan::counter::BytesCount;
20+
use mimalloc::MiMalloc;
21+
use rand::RngExt;
22+
use rand::SeedableRng;
23+
use rand::rngs::StdRng;
24+
use vortex::array::IntoArray;
25+
use vortex::array::LEGACY_SESSION;
26+
use vortex::array::VortexSessionExecute;
27+
use vortex::array::arrays::PrimitiveArray;
28+
use vortex::array::session::ArraySession;
29+
use vortex::encodings::pco::Pco;
30+
use vortex::encodings::pco::PcoArray;
31+
use vortex_session::VortexSession;
32+
33+
#[global_allocator]
34+
static GLOBAL: MiMalloc = MiMalloc;
35+
36+
static SESSION: LazyLock<VortexSession> =
37+
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
38+
39+
const NUM_VALUES: usize = 100_000;
40+
const NUM_ACCESSES: usize = 1000;
41+
42+
/// `0` tells `Pco::from_primitive` to fall back to the chunk-default page size
43+
/// (`pco::DEFAULT_MAX_PAGE_N`, currently 262144 values).
44+
const DEFAULT_PAGE: usize = 0;
45+
const PAGE_1K: usize = 1024;
46+
const COMPRESSION_LEVEL: usize = 3;
47+
48+
fn setup_arrays() -> (PrimitiveArray, PrimitiveArray) {
49+
let mut rng = StdRng::seed_from_u64(0);
50+
// A noisy ramp: large dynamic range with smooth structure - the kind of
51+
// data where pcodec's bin/offset scheme actually does work, so the page
52+
// overhead is visible in the compression ratio.
53+
let f64_array = PrimitiveArray::from_iter((0..NUM_VALUES).map(|i| {
54+
let noise: f64 = rng.random_range(-0.5..0.5);
55+
i as f64 * 0.001 + noise
56+
}));
57+
let i64_array =
58+
PrimitiveArray::from_iter((0..NUM_VALUES).map(|i| i as i64 + rng.random_range(-50..50)));
59+
(f64_array, i64_array)
60+
}
61+
62+
fn compress(parray: &PrimitiveArray, values_per_page: usize) -> PcoArray {
63+
let mut ctx = SESSION.create_execution_ctx();
64+
Pco::from_primitive(
65+
parray.as_view(),
66+
COMPRESSION_LEVEL,
67+
values_per_page,
68+
&mut ctx,
69+
)
70+
.unwrap()
71+
}
72+
73+
fn print_compression_ratios() {
74+
let (f64_array, i64_array) = setup_arrays();
75+
let f64_uncompressed = f64_array.nbytes();
76+
let i64_uncompressed = i64_array.nbytes();
77+
78+
let f64_default = compress(&f64_array, DEFAULT_PAGE).nbytes();
79+
let f64_1k = compress(&f64_array, PAGE_1K).nbytes();
80+
let i64_default = compress(&i64_array, DEFAULT_PAGE).nbytes();
81+
let i64_1k = compress(&i64_array, PAGE_1K).nbytes();
82+
83+
eprintln!();
84+
eprintln!(
85+
"pcodec page-size compression ratio ({NUM_VALUES} values, level {COMPRESSION_LEVEL}):"
86+
);
87+
eprintln!(" f64 uncompressed = {:>10} bytes", f64_uncompressed);
88+
eprintln!(
89+
" f64 default page = {:>10} bytes ({:.3}x, {:.2} bits/value)",
90+
f64_default,
91+
f64_uncompressed as f64 / f64_default as f64,
92+
(f64_default as f64 * 8.0) / NUM_VALUES as f64,
93+
);
94+
eprintln!(
95+
" f64 1k page = {:>10} bytes ({:.3}x, {:.2} bits/value)",
96+
f64_1k,
97+
f64_uncompressed as f64 / f64_1k as f64,
98+
(f64_1k as f64 * 8.0) / NUM_VALUES as f64,
99+
);
100+
eprintln!(" i64 uncompressed = {:>10} bytes", i64_uncompressed);
101+
eprintln!(
102+
" i64 default page = {:>10} bytes ({:.3}x, {:.2} bits/value)",
103+
i64_default,
104+
i64_uncompressed as f64 / i64_default as f64,
105+
(i64_default as f64 * 8.0) / NUM_VALUES as f64,
106+
);
107+
eprintln!(
108+
" i64 1k page = {:>10} bytes ({:.3}x, {:.2} bits/value)",
109+
i64_1k,
110+
i64_uncompressed as f64 / i64_1k as f64,
111+
(i64_1k as f64 * 8.0) / NUM_VALUES as f64,
112+
);
113+
eprintln!();
114+
}
115+
116+
fn with_byte_counter<'a, 'b>(bencher: Bencher<'a, 'b>, bytes: u64) -> Bencher<'a, 'b> {
117+
#[cfg(not(codspeed))]
118+
return bencher.counter(BytesCount::new(bytes));
119+
#[cfg(codspeed)]
120+
{
121+
_ = bytes;
122+
return bencher;
123+
}
124+
}
125+
126+
fn random_indices() -> Vec<usize> {
127+
let mut rng = StdRng::seed_from_u64(1);
128+
(0..NUM_ACCESSES)
129+
.map(|_| rng.random_range(0..NUM_VALUES))
130+
.collect()
131+
}
132+
133+
fn main() {
134+
print_compression_ratios();
135+
divan::main();
136+
}
137+
138+
// --- compression ---
139+
140+
#[divan::bench(name = "pcodec_compress_f64_default_page")]
141+
fn bench_compress_f64_default(bencher: Bencher) {
142+
let (f64_array, _) = setup_arrays();
143+
with_byte_counter(bencher, (NUM_VALUES * 8) as u64)
144+
.with_inputs(|| (&f64_array, SESSION.create_execution_ctx()))
145+
.bench_refs(|(a, ctx)| {
146+
Pco::from_primitive(a.as_view(), COMPRESSION_LEVEL, DEFAULT_PAGE, ctx).unwrap()
147+
});
148+
}
149+
150+
#[divan::bench(name = "pcodec_compress_f64_1k_page")]
151+
fn bench_compress_f64_1k(bencher: Bencher) {
152+
let (f64_array, _) = setup_arrays();
153+
with_byte_counter(bencher, (NUM_VALUES * 8) as u64)
154+
.with_inputs(|| (&f64_array, SESSION.create_execution_ctx()))
155+
.bench_refs(|(a, ctx)| {
156+
Pco::from_primitive(a.as_view(), COMPRESSION_LEVEL, PAGE_1K, ctx).unwrap()
157+
});
158+
}
159+
160+
#[divan::bench(name = "pcodec_compress_i64_default_page")]
161+
fn bench_compress_i64_default(bencher: Bencher) {
162+
let (_, i64_array) = setup_arrays();
163+
with_byte_counter(bencher, (NUM_VALUES * 8) as u64)
164+
.with_inputs(|| (&i64_array, SESSION.create_execution_ctx()))
165+
.bench_refs(|(a, ctx)| {
166+
Pco::from_primitive(a.as_view(), COMPRESSION_LEVEL, DEFAULT_PAGE, ctx).unwrap()
167+
});
168+
}
169+
170+
#[divan::bench(name = "pcodec_compress_i64_1k_page")]
171+
fn bench_compress_i64_1k(bencher: Bencher) {
172+
let (_, i64_array) = setup_arrays();
173+
with_byte_counter(bencher, (NUM_VALUES * 8) as u64)
174+
.with_inputs(|| (&i64_array, SESSION.create_execution_ctx()))
175+
.bench_refs(|(a, ctx)| {
176+
Pco::from_primitive(a.as_view(), COMPRESSION_LEVEL, PAGE_1K, ctx).unwrap()
177+
});
178+
}
179+
180+
// --- decompression (full) ---
181+
182+
#[divan::bench(name = "pcodec_decompress_f64_default_page")]
183+
fn bench_decompress_f64_default(bencher: Bencher) {
184+
let (f64_array, _) = setup_arrays();
185+
let compressed = compress(&f64_array, DEFAULT_PAGE).into_array();
186+
with_byte_counter(bencher, (NUM_VALUES * 8) as u64)
187+
.with_inputs(|| (&compressed, SESSION.create_execution_ctx()))
188+
.bench_refs(|(a, ctx)| (**a).clone().execute::<PrimitiveArray>(ctx).unwrap());
189+
}
190+
191+
#[divan::bench(name = "pcodec_decompress_f64_1k_page")]
192+
fn bench_decompress_f64_1k(bencher: Bencher) {
193+
let (f64_array, _) = setup_arrays();
194+
let compressed = compress(&f64_array, PAGE_1K).into_array();
195+
with_byte_counter(bencher, (NUM_VALUES * 8) as u64)
196+
.with_inputs(|| (&compressed, SESSION.create_execution_ctx()))
197+
.bench_refs(|(a, ctx)| (**a).clone().execute::<PrimitiveArray>(ctx).unwrap());
198+
}
199+
200+
// --- scalar_at ---
201+
202+
#[divan::bench(name = "pcodec_scalar_at_f64_default_page")]
203+
fn bench_scalar_at_f64_default(bencher: Bencher) {
204+
let (f64_array, _) = setup_arrays();
205+
let compressed = compress(&f64_array, DEFAULT_PAGE).into_array();
206+
let indices = random_indices();
207+
bencher
208+
.with_inputs(|| (&compressed, &indices))
209+
.bench_refs(|(array, indices)| {
210+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
211+
for &idx in indices.iter() {
212+
divan::black_box(array.execute_scalar(idx, &mut ctx).unwrap());
213+
}
214+
});
215+
}
216+
217+
#[divan::bench(name = "pcodec_scalar_at_f64_1k_page")]
218+
fn bench_scalar_at_f64_1k(bencher: Bencher) {
219+
let (f64_array, _) = setup_arrays();
220+
let compressed = compress(&f64_array, PAGE_1K).into_array();
221+
let indices = random_indices();
222+
bencher
223+
.with_inputs(|| (&compressed, &indices))
224+
.bench_refs(|(array, indices)| {
225+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
226+
for &idx in indices.iter() {
227+
divan::black_box(array.execute_scalar(idx, &mut ctx).unwrap());
228+
}
229+
});
230+
}
231+
232+
#[divan::bench(name = "pcodec_scalar_at_i64_default_page")]
233+
fn bench_scalar_at_i64_default(bencher: Bencher) {
234+
let (_, i64_array) = setup_arrays();
235+
let compressed = compress(&i64_array, DEFAULT_PAGE).into_array();
236+
let indices = random_indices();
237+
bencher
238+
.with_inputs(|| (&compressed, &indices))
239+
.bench_refs(|(array, indices)| {
240+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
241+
for &idx in indices.iter() {
242+
divan::black_box(array.execute_scalar(idx, &mut ctx).unwrap());
243+
}
244+
});
245+
}
246+
247+
#[divan::bench(name = "pcodec_scalar_at_i64_1k_page")]
248+
fn bench_scalar_at_i64_1k(bencher: Bencher) {
249+
let (_, i64_array) = setup_arrays();
250+
let compressed = compress(&i64_array, PAGE_1K).into_array();
251+
let indices = random_indices();
252+
bencher
253+
.with_inputs(|| (&compressed, &indices))
254+
.bench_refs(|(array, indices)| {
255+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
256+
for &idx in indices.iter() {
257+
divan::black_box(array.execute_scalar(idx, &mut ctx).unwrap());
258+
}
259+
});
260+
}

0 commit comments

Comments
 (0)