Skip to content

Commit 777edad

Browse files
committed
add ArrayAndStats
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com> clean up Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent e65db97 commit 777edad

17 files changed

Lines changed: 362 additions & 499 deletions

File tree

vortex-btrblocks/benches/dict_encode.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use vortex_array::arrays::BoolArray;
99
use vortex_array::arrays::PrimitiveArray;
1010
use vortex_array::builders::dict::dict_encode;
1111
use vortex_array::validity::Validity;
12-
use vortex_btrblocks::CompressorStats;
1312
use vortex_btrblocks::IntegerStats;
1413
use vortex_btrblocks::integer_dictionary_encode;
1514
use vortex_buffer::BufferMut;

vortex-btrblocks/benches/stats_calc.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ mod benchmarks {
1010
use divan::Bencher;
1111
use vortex_array::arrays::PrimitiveArray;
1212
use vortex_array::validity::Validity;
13-
use vortex_btrblocks::CompressorStats;
1413
use vortex_btrblocks::GenerateStatsOptions;
1514
use vortex_btrblocks::IntegerStats;
1615
use vortex_buffer::Buffer;

vortex-btrblocks/src/builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ pub const ALL_SCHEMES: &[&dyn Scheme] = &[
1717
// Integer schemes.
1818
&crate::compressor::integer::UncompressedScheme as &dyn Scheme,
1919
&crate::compressor::integer::ConstantScheme,
20+
// NOTE: For must precede BitPacking to avoid unnecessary patches.
2021
&crate::compressor::integer::FORScheme,
21-
&crate::compressor::integer::ZigZagScheme,
2222
&crate::compressor::integer::BitPackingScheme,
23+
&crate::compressor::integer::ZigZagScheme,
2324
&crate::compressor::integer::SparseScheme,
2425
&crate::compressor::integer::DictScheme,
2526
&crate::compressor::integer::RunEndScheme,

vortex-btrblocks/src/canonical_compressor.rs

Lines changed: 17 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ use vortex_array::arrays::ExtensionArray;
1717
use vortex_array::arrays::FixedSizeListArray;
1818
use vortex_array::arrays::ListArray;
1919
use vortex_array::arrays::ListViewArray;
20-
use vortex_array::arrays::Primitive;
2120
use vortex_array::arrays::StructArray;
2221
use vortex_array::arrays::TemporalArray;
2322
use vortex_array::arrays::listview::list_from_list_view;
@@ -28,18 +27,14 @@ use vortex_array::scalar::Scalar;
2827
use vortex_array::vtable::ValidityHelper;
2928
use vortex_error::VortexResult;
3029

30+
use crate::ArrayAndStats;
3131
use crate::BtrBlocksCompressorBuilder;
3232
use crate::CompressorContext;
33-
use crate::CompressorStats;
3433
use crate::GenerateStatsOptions;
3534
use crate::Scheme;
3635
use crate::SchemeId;
37-
use crate::StatsCache;
3836
use crate::compressor::decimal::compress_decimal;
39-
use crate::compressor::float::FloatStats;
4037
use crate::compressor::integer::DictScheme as IntDictScheme;
41-
use crate::compressor::integer::IntegerStats;
42-
use crate::compressor::string::StringStats;
4338
use crate::compressor::temporal::compress_temporal;
4439

4540
/// The main compressor type implementing BtrBlocks-inspired compression.
@@ -215,69 +210,41 @@ impl BtrBlocksCompressor {
215210
}
216211

217212
let before_nbytes = array.nbytes();
218-
let needs_distinct = eligible.iter().any(|s| s.needs_distinct_values());
219-
let mut cache = StatsCache::new();
220-
221-
// Pre-populate the stats cache with the right `count_distinct_values` setting.
222-
// This matches the old `gen_stats` behavior where distinct values were only computed
223-
// when Dict was in the scheme list.
224-
if let Some(prim) = array.as_opt::<Primitive>() {
225-
let prim = prim.to_primitive();
226-
if prim.ptype().is_int() {
227-
cache.get_or_insert_with::<IntegerStats>(|| {
228-
IntegerStats::generate_opts(
229-
&prim,
230-
GenerateStatsOptions {
231-
count_distinct_values: needs_distinct,
232-
},
233-
)
234-
});
235-
} else {
236-
cache.get_or_insert_with::<FloatStats>(|| {
237-
FloatStats::generate_opts(
238-
&prim,
239-
GenerateStatsOptions {
240-
count_distinct_values: needs_distinct,
241-
},
242-
)
243-
});
244-
}
245-
} else if array.as_opt::<vortex_array::arrays::VarBinView>().is_some() {
246-
cache.get_or_insert_with::<StringStats>(|| {
247-
StringStats::generate_opts(
248-
&array.to_varbinview(),
249-
GenerateStatsOptions {
250-
count_distinct_values: needs_distinct,
251-
},
252-
)
213+
let merged_opts = eligible
214+
.iter()
215+
.fold(GenerateStatsOptions::default(), |acc, s| {
216+
acc.merge(s.stats_options())
253217
});
254-
}
255218

256-
if let Some(winner) = self.choose_scheme(&eligible, &array, ctx, &mut cache, excludes)? {
257-
let compressed = winner.compress(self, &array, ctx, &mut cache, excludes)?;
219+
let mut ctx = ctx;
220+
ctx.stats_options = merged_opts;
221+
222+
let mut data = ArrayAndStats::new(array, merged_opts);
223+
224+
if let Some(winner) = self.choose_scheme(&eligible, &mut data, ctx, excludes)? {
225+
let compressed = winner.compress(self, &mut data, ctx, excludes)?;
258226
if compressed.nbytes() < before_nbytes {
259227
return Ok(compressed);
260228
}
261229
}
262230

263231
// No scheme improved on the original.
264-
Ok(array)
232+
Ok(data.into_array())
265233
}
266234

267235
/// Evaluates each candidate scheme and returns the one with the best compression ratio
268236
/// (must be > 1.0).
269237
fn choose_scheme(
270238
&self,
271239
schemes: &[&'static dyn Scheme],
272-
array: &ArrayRef,
240+
data: &mut ArrayAndStats,
273241
ctx: CompressorContext,
274-
cache: &mut StatsCache,
275242
excludes: &[SchemeId],
276243
) -> VortexResult<Option<&'static dyn Scheme>> {
277244
let mut best: Option<(&'static dyn Scheme, f64)> = None;
278245

279246
for &scheme in schemes {
280-
let ratio = self.evaluate_scheme(scheme, array, ctx, cache, excludes)?;
247+
let ratio = self.evaluate_scheme(scheme, data, ctx, excludes)?;
281248
if is_valid_ratio(ratio) && ratio > 1.0 && best.is_none_or(|(_, r)| ratio > r) {
282249
best = Some((scheme, ratio));
283250
}
@@ -290,12 +257,11 @@ impl BtrBlocksCompressor {
290257
fn evaluate_scheme(
291258
&self,
292259
scheme: &'static dyn Scheme,
293-
array: &ArrayRef,
260+
data: &mut ArrayAndStats,
294261
ctx: CompressorContext,
295-
cache: &mut StatsCache,
296262
excludes: &[SchemeId],
297263
) -> VortexResult<f64> {
298-
let ratio = scheme.expected_compression_ratio(self, array, ctx, cache, excludes)?;
264+
let ratio = scheme.expected_compression_ratio(self, data, ctx, excludes)?;
299265

300266
tracing::debug!(
301267
scheme = %scheme.id(),

vortex-btrblocks/src/compressor/float/dictionary.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ mod tests {
112112
use vortex_buffer::buffer;
113113

114114
use super::super::FloatStats;
115-
use crate::CompressorStats;
116115
use crate::compressor::float::dictionary::dictionary_encode;
117116

118117
#[test]

0 commit comments

Comments
 (0)