Skip to content

Commit 8c48a53

Browse files
authored
Pluggable Compressor (#7018)
## Summary: Extensible and Pluggable Compressor Tracking Issue: #7216 You can see a lot of the details in the tracking issue. This is a major step in supporting extension types as a first-class feature in Vortex. The entire compressor has been rewritten, see the tracking issue for full design details and motivation. The new `vortex-compressor` crate extracts the encoding-agnostic compression framework from `vortex-btrblocks`, inverting the dependency graph so that encoding crates can implement a single `Scheme` trait and register themselves with the compressor. Additionally, `vortex-btrblocks` remains the "batteries-included" default compressor, and depends on `vortex-compressor`. --- The compression benchmark comment is [here](#7018 (comment)). For reviewers: I would just look at the whole `vortex-compressor` and `vortex-btrblocks` crates instead of the git diff since basically everything has changed. ## Changes - [x] Extract `vortex-compressor` crate with unified `Scheme` trait - [x] Migrate `vortex-btrblocks` to depend on `vortex-compressor` - [ ] Verify what APIs we want to preserve from `vortex-btrblocks` (re-exports) - [ ] Figure out why the `RunEndScheme` exclusion in `rle.rs` is broken and re-enable it ## Testing Existing tests pass, so that's a good sign. I added a few new tests that check the newer parts of the compressor as well. --------- Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 5e93e8e commit 8c48a53

52 files changed

Lines changed: 6165 additions & 4339 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 19 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ members = [
1212
"vortex-proto",
1313
"vortex-array",
1414
"vortex-tensor",
15+
"vortex-compressor",
1516
"vortex-btrblocks",
1617
"vortex-layout",
1718
"vortex-scan",
@@ -264,6 +265,7 @@ vortex-array = { version = "0.1.0", path = "./vortex-array", default-features =
264265
vortex-btrblocks = { version = "0.1.0", path = "./vortex-btrblocks", default-features = false }
265266
vortex-buffer = { version = "0.1.0", path = "./vortex-buffer", default-features = false }
266267
vortex-bytebool = { version = "0.1.0", path = "./encodings/bytebool", default-features = false }
268+
vortex-compressor = { version = "0.1.0", path = "./vortex-compressor", default-features = false }
267269
vortex-datafusion = { version = "0.1.0", path = "./vortex-datafusion", default-features = false }
268270
vortex-datetime-parts = { version = "0.1.0", path = "./encodings/datetime-parts", default-features = false }
269271
vortex-decimal-byte-parts = { version = "0.1.0", path = "encodings/decimal-byte-parts", default-features = false }

fuzz/src/array/mod.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,10 @@ use vortex_array::search_sorted::SearchSorted;
6161
use vortex_array::search_sorted::SearchSortedSide;
6262
use vortex_btrblocks::BtrBlocksCompressor;
6363
use vortex_btrblocks::BtrBlocksCompressorBuilder;
64-
use vortex_btrblocks::FloatCode;
65-
use vortex_btrblocks::IntCode;
66-
use vortex_btrblocks::StringCode;
64+
use vortex_btrblocks::SchemeExt;
65+
use vortex_btrblocks::schemes::float;
66+
use vortex_btrblocks::schemes::integer;
67+
use vortex_btrblocks::schemes::string;
6768
use vortex_error::VortexExpect;
6869
use vortex_error::vortex_panic;
6970
use vortex_mask::Mask;
@@ -546,9 +547,11 @@ pub fn compress_array(array: &ArrayRef, strategy: CompressorStrategy) -> ArrayRe
546547
.compress(array)
547548
.vortex_expect("BtrBlocksCompressor compress should succeed in fuzz test"),
548549
CompressorStrategy::Compact => BtrBlocksCompressorBuilder::default()
549-
.include_string([StringCode::Zstd])
550-
.include_int([IntCode::Pco])
551-
.include_float([FloatCode::Pco])
550+
.include([
551+
string::ZstdScheme.id(),
552+
integer::PcoScheme.id(),
553+
float::PcoScheme.id(),
554+
])
552555
.build()
553556
.compress(array)
554557
.vortex_expect("Compact compress should succeed in fuzz test"),

vortex-array/public-api.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24250,6 +24250,14 @@ pub fn vortex_array::ExecutionCtx::new(session: vortex_session::VortexSession) -
2425024250

2425124251
pub fn vortex_array::ExecutionCtx::session(&self) -> &vortex_session::VortexSession
2425224252

24253+
impl core::clone::Clone for vortex_array::ExecutionCtx
24254+
24255+
pub fn vortex_array::ExecutionCtx::clone(&self) -> vortex_array::ExecutionCtx
24256+
24257+
impl core::fmt::Debug for vortex_array::ExecutionCtx
24258+
24259+
pub fn vortex_array::ExecutionCtx::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
24260+
2425324261
impl core::fmt::Display for vortex_array::ExecutionCtx
2425424262

2425524263
pub fn vortex_array::ExecutionCtx::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

vortex-array/src/executor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ impl dyn DynArray + '_ {
190190
///
191191
/// Accumulates a trace of execution steps. Individual steps are logged at TRACE level for
192192
/// real-time following, and the full trace is dumped at DEBUG level when the context is dropped.
193+
#[derive(Debug, Clone)]
193194
pub struct ExecutionCtx {
194195
id: usize,
195196
session: VortexSession,

vortex-btrblocks/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ rust-version = { workspace = true }
1414
version = { workspace = true }
1515

1616
[dependencies]
17-
enum-iterator = { workspace = true }
1817
getrandom_v03 = { workspace = true }
1918
itertools = { workspace = true }
2019
num-traits = { workspace = true }
@@ -25,6 +24,7 @@ tracing = { workspace = true }
2524
vortex-alp = { workspace = true }
2625
vortex-array = { workspace = true }
2726
vortex-buffer = { workspace = true }
27+
vortex-compressor = { workspace = true }
2828
vortex-datetime-parts = { workspace = true }
2929
vortex-decimal-byte-parts = { workspace = true }
3030
vortex-error = { workspace = true }

vortex-btrblocks/benches/dict_encode.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use vortex_array::arrays::BoolArray;
99
use vortex_array::arrays::PrimitiveArray;
1010
use vortex_array::builders::dict::dict_encode;
1111
use vortex_array::validity::Validity;
12-
use vortex_btrblocks::CompressorStats;
1312
use vortex_btrblocks::IntegerStats;
1413
use vortex_btrblocks::integer_dictionary_encode;
1514
use vortex_buffer::BufferMut;

vortex-btrblocks/benches/stats_calc.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ mod benchmarks {
1010
use divan::Bencher;
1111
use vortex_array::arrays::PrimitiveArray;
1212
use vortex_array::validity::Validity;
13-
use vortex_btrblocks::CompressorStats;
1413
use vortex_btrblocks::GenerateStatsOptions;
1514
use vortex_btrblocks::IntegerStats;
1615
use vortex_buffer::Buffer;

0 commit comments

Comments
 (0)