Skip to content

Commit b97b7e4

Browse files
committed
vortex-row: benchmark FSST row-encoding (unpack-then-convert vs phases)
FSST is not order-preserving, so row keys must be the decompressed bytes; the only strategy today is decompress to a canonical VarBinView then row-encode it. This bench measures that path and its two phases (decompress-only, and row-encode of an already-decompressed column) on compressible multi-block strings, to quantify the opportunity for a future fused FSST row-encode kernel: the phases are additive (decompress ~46%, row-encode ~54%), and the row-encode phase re-reads/re-writes the decompressed bytes a fused kernel could emit once. Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 2fc07fa commit b97b7e4

3 files changed

Lines changed: 126 additions & 0 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-row/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ mimalloc = { workspace = true }
3535
rand = { workspace = true }
3636
rstest = { workspace = true }
3737
vortex-array = { workspace = true, features = ["_test-harness"] }
38+
vortex-fsst = { workspace = true }
3839

3940
[[bench]]
4041
name = "row_encode"
4142
harness = false
43+
44+
[[bench]]
45+
name = "fsst_row_encode"
46+
harness = false
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![expect(clippy::unwrap_used)]
5+
6+
//! Row-encoding an FSST-compressed string column: the only realizable strategy is
7+
//! "unpack then convert" (decompress FSST to a canonical `VarBinView`, then row-encode it),
8+
//! because FSST is **not order-preserving** — its 1-byte codes are assigned by compression
9+
//! gain, not by value, so the compressed bytes cannot be compared lexicographically. A
10+
//! hypothetical "direct" kernel could only *fuse* decompression with row-key emission; it
11+
//! still has to expand every symbol.
12+
//!
13+
//! These benchmarks measure the full path and its two phases so the fusion opportunity is
14+
//! quantifiable:
15+
//! * `fsst_unpack_then_convert` — decompress + row-encode (the status quo).
16+
//! * `fsst_decompress_only` — decompress alone (the irreducible floor: a direct kernel
17+
//! must still produce these bytes).
18+
//! * `plain_row_encode_only` — row-encode an already-decompressed `VarBinView` (the part
19+
//! a fused kernel would overlap with decompression; its writes into the intermediate
20+
//! buffer + views are what fusion removes).
21+
22+
use divan::counter::BytesCount;
23+
use mimalloc::MiMalloc;
24+
use rand::RngExt;
25+
use rand::SeedableRng;
26+
use rand::rngs::StdRng;
27+
use vortex_array::ArrayRef;
28+
use vortex_array::Canonical;
29+
use vortex_array::IntoArray;
30+
use vortex_array::LEGACY_SESSION;
31+
use vortex_array::VortexSessionExecute;
32+
use vortex_array::arrays::VarBinArray;
33+
use vortex_array::dtype::DType;
34+
use vortex_array::dtype::Nullability;
35+
use vortex_fsst::fsst_compress;
36+
use vortex_fsst::fsst_train_compressor;
37+
use vortex_row::RowEncoder;
38+
39+
#[global_allocator]
40+
static GLOBAL: MiMalloc = MiMalloc;
41+
42+
const N: usize = 100_000;
43+
const AVG_LEN: usize = 64;
44+
const UNIQUE_CHARS: u8 = 8;
45+
46+
/// Generate compressible, multi-block (>32 byte) strings over a small alphabet so FSST finds
47+
/// a strong symbol table — the regime where a direct kernel would matter most.
48+
fn generate_strings() -> (VarBinArray, u64) {
49+
let mut rng = StdRng::seed_from_u64(0);
50+
let mut strings = Vec::with_capacity(N);
51+
let mut total_bytes: u64 = 0;
52+
for _ in 0..N {
53+
let len = AVG_LEN * rng.random_range(50..=150) / 100;
54+
total_bytes += len as u64;
55+
let s = (0..len)
56+
.map(|_| rng.random_range(b'a'..(b'a' + UNIQUE_CHARS)) as char)
57+
.collect::<String>()
58+
.into_bytes();
59+
strings.push(Some(s.into_boxed_slice()));
60+
}
61+
let arr = VarBinArray::from_iter(strings, DType::Binary(Nullability::NonNullable));
62+
(arr, total_bytes)
63+
}
64+
65+
fn build_fsst() -> (ArrayRef, u64) {
66+
let (arr, total_bytes) = generate_strings();
67+
let compressor = fsst_train_compressor(&arr);
68+
let len = arr.len();
69+
let dtype = arr.dtype().clone();
70+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
71+
let fsst = fsst_compress(arr, len, &dtype, &compressor, &mut ctx).into_array();
72+
(fsst, total_bytes)
73+
}
74+
75+
fn decompress(fsst: &ArrayRef) -> ArrayRef {
76+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
77+
fsst.clone()
78+
.execute::<Canonical>(&mut ctx)
79+
.unwrap()
80+
.into_array()
81+
}
82+
83+
fn main() {
84+
divan::main();
85+
}
86+
87+
/// Status quo: decompress FSST to a canonical `VarBinView`, then row-encode it.
88+
#[divan::bench]
89+
fn fsst_unpack_then_convert(bencher: divan::Bencher) {
90+
let (fsst, total_bytes) = build_fsst();
91+
let encoder = RowEncoder::default();
92+
bencher.counter(BytesCount::new(total_bytes)).bench_local(|| {
93+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
94+
let decoded = fsst.clone().execute::<Canonical>(&mut ctx).unwrap().into_array();
95+
encoder.encode(&[decoded], &mut ctx).unwrap()
96+
});
97+
}
98+
99+
/// Irreducible floor: FSST decompression alone (a direct kernel must still produce these
100+
/// bytes, since the sort key *is* the decompressed bytes).
101+
#[divan::bench]
102+
fn fsst_decompress_only(bencher: divan::Bencher) {
103+
let (fsst, total_bytes) = build_fsst();
104+
bencher
105+
.counter(BytesCount::new(total_bytes))
106+
.bench_local(|| decompress(&fsst));
107+
}
108+
109+
/// Row-encode an already-decompressed `VarBinView`. The writes into the decompressed buffer +
110+
/// views that precede this step are what a fused direct kernel would eliminate.
111+
#[divan::bench]
112+
fn plain_row_encode_only(bencher: divan::Bencher) {
113+
let (fsst, total_bytes) = build_fsst();
114+
let decoded = decompress(&fsst);
115+
let encoder = RowEncoder::default();
116+
bencher.counter(BytesCount::new(total_bytes)).bench_local(|| {
117+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
118+
encoder.encode(std::slice::from_ref(&decoded), &mut ctx).unwrap()
119+
});
120+
}

0 commit comments

Comments
 (0)