@@ -17,7 +17,6 @@ use vortex_array::arrays::ExtensionArray;
1717use vortex_array:: arrays:: FixedSizeListArray ;
1818use vortex_array:: arrays:: ListArray ;
1919use vortex_array:: arrays:: ListViewArray ;
20- use vortex_array:: arrays:: Primitive ;
2120use vortex_array:: arrays:: StructArray ;
2221use vortex_array:: arrays:: TemporalArray ;
2322use vortex_array:: arrays:: listview:: list_from_list_view;
@@ -28,18 +27,14 @@ use vortex_array::scalar::Scalar;
2827use vortex_array:: vtable:: ValidityHelper ;
2928use vortex_error:: VortexResult ;
3029
30+ use crate :: ArrayAndStats ;
3131use crate :: BtrBlocksCompressorBuilder ;
3232use crate :: CompressorContext ;
33- use crate :: CompressorStats ;
3433use crate :: GenerateStatsOptions ;
3534use crate :: Scheme ;
3635use crate :: SchemeId ;
37- use crate :: StatsCache ;
3836use crate :: compressor:: decimal:: compress_decimal;
39- use crate :: compressor:: float:: FloatStats ;
4037use crate :: compressor:: integer:: DictScheme as IntDictScheme ;
41- use crate :: compressor:: integer:: IntegerStats ;
42- use crate :: compressor:: string:: StringStats ;
4338use crate :: compressor:: temporal:: compress_temporal;
4439
4540/// The main compressor type implementing BtrBlocks-inspired compression.
@@ -215,69 +210,41 @@ impl BtrBlocksCompressor {
215210 }
216211
217212 let before_nbytes = array. nbytes ( ) ;
218- let needs_distinct = eligible. iter ( ) . any ( |s| s. needs_distinct_values ( ) ) ;
219- let mut cache = StatsCache :: new ( ) ;
220-
221- // Pre-populate the stats cache with the right `count_distinct_values` setting.
222- // This matches the old `gen_stats` behavior where distinct values were only computed
223- // when Dict was in the scheme list.
224- if let Some ( prim) = array. as_opt :: < Primitive > ( ) {
225- let prim = prim. to_primitive ( ) ;
226- if prim. ptype ( ) . is_int ( ) {
227- cache. get_or_insert_with :: < IntegerStats > ( || {
228- IntegerStats :: generate_opts (
229- & prim,
230- GenerateStatsOptions {
231- count_distinct_values : needs_distinct,
232- } ,
233- )
234- } ) ;
235- } else {
236- cache. get_or_insert_with :: < FloatStats > ( || {
237- FloatStats :: generate_opts (
238- & prim,
239- GenerateStatsOptions {
240- count_distinct_values : needs_distinct,
241- } ,
242- )
243- } ) ;
244- }
245- } else if array. as_opt :: < vortex_array:: arrays:: VarBinView > ( ) . is_some ( ) {
246- cache. get_or_insert_with :: < StringStats > ( || {
247- StringStats :: generate_opts (
248- & array. to_varbinview ( ) ,
249- GenerateStatsOptions {
250- count_distinct_values : needs_distinct,
251- } ,
252- )
213+ let merged_opts = eligible
214+ . iter ( )
215+ . fold ( GenerateStatsOptions :: default ( ) , |acc, s| {
216+ acc. merge ( s. stats_options ( ) )
253217 } ) ;
254- }
255218
256- if let Some ( winner) = self . choose_scheme ( & eligible, & array, ctx, & mut cache, excludes) ? {
257- let compressed = winner. compress ( self , & array, ctx, & mut cache, excludes) ?;
219+ let mut ctx = ctx;
220+ ctx. stats_options = merged_opts;
221+
222+ let mut data = ArrayAndStats :: new ( array, merged_opts) ;
223+
224+ if let Some ( winner) = self . choose_scheme ( & eligible, & mut data, ctx, excludes) ? {
225+ let compressed = winner. compress ( self , & mut data, ctx, excludes) ?;
258226 if compressed. nbytes ( ) < before_nbytes {
259227 return Ok ( compressed) ;
260228 }
261229 }
262230
263231 // No scheme improved on the original.
264- Ok ( array )
232+ Ok ( data . into_array ( ) )
265233 }
266234
267235 /// Evaluates each candidate scheme and returns the one with the best compression ratio
268236 /// (must be > 1.0).
269237 fn choose_scheme (
270238 & self ,
271239 schemes : & [ & ' static dyn Scheme ] ,
272- array : & ArrayRef ,
240+ data : & mut ArrayAndStats ,
273241 ctx : CompressorContext ,
274- cache : & mut StatsCache ,
275242 excludes : & [ SchemeId ] ,
276243 ) -> VortexResult < Option < & ' static dyn Scheme > > {
277244 let mut best: Option < ( & ' static dyn Scheme , f64 ) > = None ;
278245
279246 for & scheme in schemes {
280- let ratio = self . evaluate_scheme ( scheme, array , ctx, cache , excludes) ?;
247+ let ratio = self . evaluate_scheme ( scheme, data , ctx, excludes) ?;
281248 if is_valid_ratio ( ratio) && ratio > 1.0 && best. is_none_or ( |( _, r) | ratio > r) {
282249 best = Some ( ( scheme, ratio) ) ;
283250 }
@@ -290,12 +257,11 @@ impl BtrBlocksCompressor {
290257 fn evaluate_scheme (
291258 & self ,
292259 scheme : & ' static dyn Scheme ,
293- array : & ArrayRef ,
260+ data : & mut ArrayAndStats ,
294261 ctx : CompressorContext ,
295- cache : & mut StatsCache ,
296262 excludes : & [ SchemeId ] ,
297263 ) -> VortexResult < f64 > {
298- let ratio = scheme. expected_compression_ratio ( self , array , ctx, cache , excludes) ?;
264+ let ratio = scheme. expected_compression_ratio ( self , data , ctx, excludes) ?;
299265
300266 tracing:: debug!(
301267 scheme = %scheme. id( ) ,
0 commit comments