-
Notifications
You must be signed in to change notification settings - Fork 150
Expand file tree
/
Copy pathbuilder.rs
More file actions
153 lines (140 loc) · 5.43 KB
/
builder.rs
File metadata and controls
153 lines (140 loc) · 5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
//! Builder for configuring `BtrBlocksCompressor` instances.
use vortex_utils::aliases::hash_set::HashSet;
use crate::BtrBlocksCompressor;
use crate::CascadingCompressor;
use crate::Scheme;
use crate::SchemeExt;
use crate::SchemeId;
use crate::schemes::bool;
use crate::schemes::decimal;
use crate::schemes::float;
use crate::schemes::integer;
use crate::schemes::rle;
use crate::schemes::string;
use crate::schemes::temporal;
/// All available compression schemes.
///
/// This list is order-sensitive: the builder preserves this order when constructing
/// the final scheme list, so that tie-breaking is deterministic.
pub const ALL_SCHEMES: &[&dyn Scheme] = &[
////////////////////////////////////////////////////////////////////////////////////////////////
// Bool schemes.
////////////////////////////////////////////////////////////////////////////////////////////////
&bool::BoolConstantScheme,
////////////////////////////////////////////////////////////////////////////////////////////////
// Integer schemes.
////////////////////////////////////////////////////////////////////////////////////////////////
&integer::IntConstantScheme,
// NOTE: FoR must precede BitPacking to avoid unnecessary patches.
&integer::FoRScheme,
// NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
&integer::ZigZagScheme,
&integer::BitPackingScheme,
&integer::SparseScheme,
&integer::IntDictScheme,
&integer::RunEndScheme,
&integer::SequenceScheme,
&rle::RLE_INTEGER_SCHEME,
////////////////////////////////////////////////////////////////////////////////////////////////
// Float schemes.
////////////////////////////////////////////////////////////////////////////////////////////////
&float::FloatConstantScheme,
&float::ALPScheme,
&float::ALPRDScheme,
&float::FloatDictScheme,
&float::NullDominatedSparseScheme,
&rle::RLE_FLOAT_SCHEME,
////////////////////////////////////////////////////////////////////////////////////////////////
// String schemes.
////////////////////////////////////////////////////////////////////////////////////////////////
&string::StringDictScheme,
&string::FSSTScheme,
&string::StringConstantScheme,
&string::NullDominatedSparseScheme,
// Decimal schemes.
&decimal::DecimalScheme,
// Temporal schemes.
&temporal::TemporalScheme,
];
/// Builder for creating configured [`BtrBlocksCompressor`] instances.
///
/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
/// are not in `ALL_SCHEMES` and must be added explicitly via
/// [`with_new_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
///
/// # Examples
///
/// ```rust
/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
/// use vortex_btrblocks::schemes::integer::IntDictScheme;
///
/// // Default compressor with all schemes in ALL_SCHEMES.
/// let compressor = BtrBlocksCompressorBuilder::default().build();
///
/// // Exclude specific schemes.
/// let compressor = BtrBlocksCompressorBuilder::default()
/// .exclude([IntDictScheme.id()])
/// .build();
/// ```
#[derive(Debug, Clone)]
pub struct BtrBlocksCompressorBuilder {
schemes: Vec<&'static dyn Scheme>,
}
impl Default for BtrBlocksCompressorBuilder {
fn default() -> Self {
Self {
schemes: ALL_SCHEMES.to_vec(),
}
}
}
impl BtrBlocksCompressorBuilder {
/// Adds an external compression scheme not in [`ALL_SCHEMES`].
///
/// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes with
/// the compressor.
///
/// # Panics
///
/// Panics if a scheme with the same [`SchemeId`] is already present.
pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
assert!(
!self.schemes.iter().any(|s| s.id() == scheme.id()),
"scheme {:?} is already present in the builder",
scheme.id(),
);
self.schemes.push(scheme);
self
}
/// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
///
/// This provides better compression ratios than the default, especially for floating-point
/// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
/// Pco schemes for integers and floats are included.
///
/// # Panics
///
/// Panics if any of the compact schemes are already present.
#[cfg(feature = "zstd")]
pub fn with_compact(self) -> Self {
// This should be fast since we don't have that many schemes.
let builder = self.with_new_scheme(&string::ZstdScheme);
#[cfg(feature = "pco")]
let builder = builder
.with_new_scheme(&integer::PcoScheme)
.with_new_scheme(&float::PcoScheme);
builder
}
/// Excludes the specified compression schemes by their [`SchemeId`].
pub fn exclude(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
let ids: HashSet<_> = ids.into_iter().collect();
self.schemes.retain(|s| !ids.contains(&s.id()));
self
}
/// Builds the configured [`BtrBlocksCompressor`].
pub fn build(self) -> BtrBlocksCompressor {
BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
}
}