Skip to content

Commit 968400d

Browse files
committed
Add compressor for constant nonnullable and all valid bool arrays
Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent d0ed3fc commit 968400d

8 files changed

Lines changed: 545 additions & 1 deletion

File tree

vortex-btrblocks/public-api.lock

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,53 @@
11
pub mod vortex_btrblocks
22

3+
pub enum vortex_btrblocks::BoolCode
4+
5+
pub vortex_btrblocks::BoolCode::Constant
6+
7+
pub vortex_btrblocks::BoolCode::Uncompressed
8+
9+
impl core::clone::Clone for vortex_btrblocks::BoolCode
10+
11+
pub fn vortex_btrblocks::BoolCode::clone(&self) -> vortex_btrblocks::BoolCode
12+
13+
impl core::cmp::Eq for vortex_btrblocks::BoolCode
14+
15+
impl core::cmp::Ord for vortex_btrblocks::BoolCode
16+
17+
pub fn vortex_btrblocks::BoolCode::cmp(&self, other: &vortex_btrblocks::BoolCode) -> core::cmp::Ordering
18+
19+
impl core::cmp::PartialEq for vortex_btrblocks::BoolCode
20+
21+
pub fn vortex_btrblocks::BoolCode::eq(&self, other: &vortex_btrblocks::BoolCode) -> bool
22+
23+
impl core::cmp::PartialOrd for vortex_btrblocks::BoolCode
24+
25+
pub fn vortex_btrblocks::BoolCode::partial_cmp(&self, other: &vortex_btrblocks::BoolCode) -> core::option::Option<core::cmp::Ordering>
26+
27+
impl core::fmt::Debug for vortex_btrblocks::BoolCode
28+
29+
pub fn vortex_btrblocks::BoolCode::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
30+
31+
impl core::hash::Hash for vortex_btrblocks::BoolCode
32+
33+
pub fn vortex_btrblocks::BoolCode::hash<__H: core::hash::Hasher>(&self, state: &mut __H)
34+
35+
impl core::marker::Copy for vortex_btrblocks::BoolCode
36+
37+
impl core::marker::StructuralPartialEq for vortex_btrblocks::BoolCode
38+
39+
impl enum_iterator::Sequence for vortex_btrblocks::BoolCode
40+
41+
pub const vortex_btrblocks::BoolCode::CARDINALITY: usize
42+
43+
pub fn vortex_btrblocks::BoolCode::first() -> core::option::Option<Self>
44+
45+
pub fn vortex_btrblocks::BoolCode::last() -> core::option::Option<Self>
46+
47+
pub fn vortex_btrblocks::BoolCode::next(&self) -> core::option::Option<Self>
48+
49+
pub fn vortex_btrblocks::BoolCode::previous(&self) -> core::option::Option<Self>
50+
351
pub enum vortex_btrblocks::FloatCode
452

553
pub vortex_btrblocks::FloatCode::Alp
@@ -188,6 +236,8 @@ pub fn vortex_btrblocks::StringCode::previous(&self) -> core::option::Option<Sel
188236

189237
pub struct vortex_btrblocks::BtrBlocksCompressor
190238

239+
pub vortex_btrblocks::BtrBlocksCompressor::bool_schemes: alloc::vec::Vec<&'static dyn vortex_btrblocks::compressor::bool::BoolScheme>
240+
191241
pub vortex_btrblocks::BtrBlocksCompressor::float_schemes: alloc::vec::Vec<&'static dyn vortex_btrblocks::compressor::float::FloatScheme>
192242

193243
pub vortex_btrblocks::BtrBlocksCompressor::int_schemes: alloc::vec::Vec<&'static dyn vortex_btrblocks::compressor::integer::IntegerScheme>
@@ -208,6 +258,8 @@ pub fn vortex_btrblocks::BtrBlocksCompressor::default() -> Self
208258

209259
impl vortex_btrblocks::CanonicalCompressor for vortex_btrblocks::BtrBlocksCompressor
210260

261+
pub fn vortex_btrblocks::BtrBlocksCompressor::bool_schemes(&self) -> &[&'static dyn vortex_btrblocks::compressor::bool::BoolScheme]
262+
211263
pub fn vortex_btrblocks::BtrBlocksCompressor::compress_canonical(&self, array: vortex_array::canonical::Canonical, ctx: vortex_btrblocks::ctx::CompressorContext, excludes: vortex_btrblocks::ctx::Excludes<'_>) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
212264

213265
pub fn vortex_btrblocks::BtrBlocksCompressor::float_schemes(&self) -> &[&'static dyn vortex_btrblocks::compressor::float::FloatScheme]
@@ -224,12 +276,16 @@ pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::build(self) -> vortex_btrbl
224276

225277
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::empty() -> Self
226278

279+
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::exclude_bool(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::BoolCode>) -> Self
280+
227281
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::exclude_float(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::FloatCode>) -> Self
228282

229283
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::exclude_int(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::IntCode>) -> Self
230284

231285
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::exclude_string(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::StringCode>) -> Self
232286

287+
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::include_bool(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::BoolCode>) -> Self
288+
233289
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::include_float(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::FloatCode>) -> Self
234290

235291
pub fn vortex_btrblocks::BtrBlocksCompressorBuilder::include_int(self, codes: impl core::iter::traits::collect::IntoIterator<Item = vortex_btrblocks::IntCode>) -> Self
@@ -282,6 +338,8 @@ pub fn vortex_btrblocks::IntegerStats::source(&self) -> &vortex_array::arrays::p
282338

283339
pub trait vortex_btrblocks::CanonicalCompressor
284340

341+
pub fn vortex_btrblocks::CanonicalCompressor::bool_schemes(&self) -> &[&'static dyn vortex_btrblocks::compressor::bool::BoolScheme]
342+
285343
pub fn vortex_btrblocks::CanonicalCompressor::compress_canonical(&self, array: vortex_array::canonical::Canonical, ctx: vortex_btrblocks::ctx::CompressorContext, excludes: vortex_btrblocks::ctx::Excludes<'_>) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
286344

287345
pub fn vortex_btrblocks::CanonicalCompressor::float_schemes(&self) -> &[&'static dyn vortex_btrblocks::compressor::float::FloatScheme]
@@ -292,6 +350,8 @@ pub fn vortex_btrblocks::CanonicalCompressor::string_schemes(&self) -> &[&'stati
292350

293351
impl vortex_btrblocks::CanonicalCompressor for vortex_btrblocks::BtrBlocksCompressor
294352

353+
pub fn vortex_btrblocks::BtrBlocksCompressor::bool_schemes(&self) -> &[&'static dyn vortex_btrblocks::compressor::bool::BoolScheme]
354+
295355
pub fn vortex_btrblocks::BtrBlocksCompressor::compress_canonical(&self, array: vortex_array::canonical::Canonical, ctx: vortex_btrblocks::ctx::CompressorContext, excludes: vortex_btrblocks::ctx::Excludes<'_>) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
296356

297357
pub fn vortex_btrblocks::BtrBlocksCompressor::float_schemes(&self) -> &[&'static dyn vortex_btrblocks::compressor::float::FloatScheme]

vortex-btrblocks/src/builder.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66
use itertools::Itertools;
77
use vortex_utils::aliases::hash_set::HashSet;
88

9+
use crate::BoolCode;
910
use crate::BtrBlocksCompressor;
1011
use crate::FloatCode;
1112
use crate::IntCode;
1213
use crate::StringCode;
14+
use crate::compressor::bool::ALL_BOOL_SCHEMES;
15+
use crate::compressor::bool::BoolScheme;
1316
use crate::compressor::float::ALL_FLOAT_SCHEMES;
1417
use crate::compressor::float::FloatScheme;
1518
use crate::compressor::integer::ALL_INT_SCHEMES;
@@ -43,6 +46,7 @@ use crate::compressor::string::StringScheme;
4346
/// ```
4447
#[derive(Debug, Clone)]
4548
pub struct BtrBlocksCompressorBuilder {
49+
bool_schemes: HashSet<&'static dyn BoolScheme>,
4650
int_schemes: HashSet<&'static dyn IntegerScheme>,
4751
float_schemes: HashSet<&'static dyn FloatScheme>,
4852
string_schemes: HashSet<&'static dyn StringScheme>,
@@ -51,6 +55,7 @@ pub struct BtrBlocksCompressorBuilder {
5155
impl Default for BtrBlocksCompressorBuilder {
5256
fn default() -> Self {
5357
Self {
58+
bool_schemes: ALL_BOOL_SCHEMES.iter().copied().collect(),
5459
int_schemes: ALL_INT_SCHEMES
5560
.iter()
5661
.copied()
@@ -74,12 +79,20 @@ impl BtrBlocksCompressorBuilder {
7479
/// Create a new builder with no encodings enabled.
7580
pub fn empty() -> Self {
7681
Self {
82+
bool_schemes: Default::default(),
7783
int_schemes: Default::default(),
7884
float_schemes: Default::default(),
7985
string_schemes: Default::default(),
8086
}
8187
}
8288

89+
/// Excludes the specified bool compression schemes.
90+
pub fn exclude_bool(mut self, codes: impl IntoIterator<Item = BoolCode>) -> Self {
91+
let codes: HashSet<_> = codes.into_iter().collect();
92+
self.bool_schemes.retain(|s| !codes.contains(&s.code()));
93+
self
94+
}
95+
8396
/// Excludes the specified integer compression schemes.
8497
pub fn exclude_int(mut self, codes: impl IntoIterator<Item = IntCode>) -> Self {
8598
let codes: HashSet<_> = codes.into_iter().collect();
@@ -101,6 +114,17 @@ impl BtrBlocksCompressorBuilder {
101114
self
102115
}
103116

117+
/// Includes the specified bool compression schemes.
118+
pub fn include_bool(mut self, codes: impl IntoIterator<Item = BoolCode>) -> Self {
119+
let codes: HashSet<_> = codes.into_iter().collect();
120+
for scheme in ALL_BOOL_SCHEMES {
121+
if codes.contains(&scheme.code()) {
122+
self.bool_schemes.insert(*scheme);
123+
}
124+
}
125+
self
126+
}
127+
104128
/// Includes the specified integer compression schemes.
105129
pub fn include_int(mut self, codes: impl IntoIterator<Item = IntCode>) -> Self {
106130
let codes: HashSet<_> = codes.into_iter().collect();
@@ -138,6 +162,11 @@ impl BtrBlocksCompressorBuilder {
138162
pub fn build(self) -> BtrBlocksCompressor {
139163
// Note we should apply the schemes in the same order, in case try conflict.
140164
BtrBlocksCompressor {
165+
bool_schemes: self
166+
.bool_schemes
167+
.into_iter()
168+
.sorted_by_key(|s| s.code())
169+
.collect_vec(),
141170
int_schemes: self
142171
.int_schemes
143172
.into_iter()

vortex-btrblocks/src/canonical_compressor.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use vortex_array::vtable::ValidityHelper;
2727
use vortex_error::VortexResult;
2828
use vortex_error::vortex_bail;
2929

30+
use crate::BoolCompressor;
3031
use crate::BtrBlocksCompressorBuilder;
3132
use crate::CompressorContext;
3233
use crate::CompressorExt;
@@ -35,6 +36,7 @@ use crate::FloatCompressor;
3536
use crate::IntCode;
3637
use crate::IntCompressor;
3738
use crate::StringCompressor;
39+
use crate::compressor::bool::BoolScheme;
3840
use crate::compressor::decimal::compress_decimal;
3941
use crate::compressor::float::FloatScheme;
4042
use crate::compressor::integer::IntegerScheme;
@@ -54,6 +56,9 @@ pub trait CanonicalCompressor {
5456
excludes: Excludes,
5557
) -> VortexResult<ArrayRef>;
5658

59+
/// Returns the enabled bool compression schemes.
60+
fn bool_schemes(&self) -> &[&'static dyn BoolScheme];
61+
5762
/// Returns the enabled integer compression schemes.
5863
fn int_schemes(&self) -> &[&'static dyn IntegerScheme];
5964

@@ -93,6 +98,9 @@ pub trait CanonicalCompressor {
9398
/// ```
9499
#[derive(Clone)]
95100
pub struct BtrBlocksCompressor {
101+
/// Bool compressor with configured schemes.
102+
pub bool_schemes: Vec<&'static dyn BoolScheme>,
103+
96104
/// Integer compressor with configured schemes.
97105
pub int_schemes: Vec<&'static dyn IntegerScheme>,
98106

@@ -127,6 +135,12 @@ impl BtrBlocksCompressor {
127135
self.compress_canonical(compact, CompressorContext::default(), Excludes::none())
128136
}
129137

138+
pub(crate) fn bool_compressor(&self) -> BoolCompressor<'_> {
139+
BoolCompressor {
140+
btr_blocks_compressor: self,
141+
}
142+
}
143+
130144
pub(crate) fn integer_compressor(&self) -> IntCompressor<'_> {
131145
IntCompressor {
132146
btr_blocks_compressor: self,
@@ -215,7 +229,10 @@ impl CanonicalCompressor for BtrBlocksCompressor {
215229
) -> VortexResult<ArrayRef> {
216230
match array {
217231
Canonical::Null(null_array) => Ok(null_array.into_array()),
218-
Canonical::Bool(bool_array) => Ok(bool_array.into_array()),
232+
Canonical::Bool(bool_array) => {
233+
self.bool_compressor()
234+
.compress(self, &bool_array, ctx, excludes.bool)
235+
}
219236
Canonical::Primitive(primitive) => {
220237
if primitive.ptype().is_int() {
221238
self.integer_compressor()
@@ -304,6 +321,10 @@ impl CanonicalCompressor for BtrBlocksCompressor {
304321
}
305322
}
306323

324+
fn bool_schemes(&self) -> &[&'static dyn BoolScheme] {
325+
&self.bool_schemes
326+
}
327+
307328
fn int_schemes(&self) -> &[&'static dyn IntegerScheme] {
308329
&self.int_schemes
309330
}

0 commit comments

Comments
 (0)