@@ -17,6 +17,8 @@ use vortex_array::scalar::Scalar;
1717use vortex_compressor:: builtins:: FloatDictScheme ;
1818use vortex_compressor:: builtins:: StringDictScheme ;
1919use vortex_compressor:: estimate:: CompressionEstimate ;
20+ use vortex_compressor:: estimate:: DeferredEstimate ;
21+ use vortex_compressor:: estimate:: EstimateVerdict ;
2022use vortex_compressor:: scheme:: AncestorExclusion ;
2123use vortex_compressor:: scheme:: ChildSelection ;
2224use vortex_compressor:: scheme:: DescendantExclusion ;
@@ -134,21 +136,21 @@ impl Scheme for FoRScheme {
134136 // FoR only subtracts the min. Without further compression (e.g. BitPacking), the output is
135137 // the same size.
136138 if ctx. finished_cascading ( ) {
137- return CompressionEstimate :: Skip ;
139+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
138140 }
139141
140142 let stats = data. integer_stats ( ) ;
141143
142144 // Only apply when the min is not already zero.
143145 if stats. erased ( ) . min_is_zero ( ) {
144- return CompressionEstimate :: Skip ;
146+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
145147 }
146148
147149 // Difference between max and min.
148150 let for_bitwidth = match stats. erased ( ) . max_minus_min ( ) . checked_ilog2 ( ) {
149151 Some ( l) => l + 1 ,
150152 // If max-min == 0, the we should be compressing this as a constant array.
151- None => return CompressionEstimate :: Skip ,
153+ None => return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ,
152154 } ;
153155
154156 // If BitPacking can be applied (only non-negative values) and FoR doesn't reduce bit width
@@ -162,7 +164,7 @@ impl Scheme for FoRScheme {
162164 {
163165 let bitpack_bitwidth = max_log + 1 ;
164166 if for_bitwidth >= bitpack_bitwidth {
165- return CompressionEstimate :: Skip ;
167+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
166168 }
167169 }
168170
@@ -173,7 +175,9 @@ impl Scheme for FoRScheme {
173175 . try_into ( )
174176 . vortex_expect ( "bit width must fit in u32" ) ;
175177
176- CompressionEstimate :: Ratio ( full_width as f64 / for_bitwidth as f64 )
178+ CompressionEstimate :: Verdict ( EstimateVerdict :: Ratio (
179+ full_width as f64 / for_bitwidth as f64 ,
180+ ) )
177181 }
178182
179183 fn compress (
@@ -265,17 +269,17 @@ impl Scheme for ZigZagScheme {
265269 // ZigZag only transforms negative values to positive. Without further compression,
266270 // the output is the same size.
267271 if ctx. finished_cascading ( ) {
268- return CompressionEstimate :: Skip ;
272+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
269273 }
270274
271275 let stats = data. integer_stats ( ) ;
272276
273277 // ZigZag is only useful when there are negative values.
274278 if !stats. erased ( ) . min_is_negative ( ) {
275- return CompressionEstimate :: Skip ;
279+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
276280 }
277281
278- CompressionEstimate :: Sample
282+ CompressionEstimate :: Deferred ( DeferredEstimate :: Sample )
279283 }
280284
281285 fn compress (
@@ -314,10 +318,10 @@ impl Scheme for BitPackingScheme {
314318
315319 // BitPacking only works for non-negative values.
316320 if stats. erased ( ) . min_is_negative ( ) {
317- return CompressionEstimate :: Skip ;
321+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
318322 }
319323
320- CompressionEstimate :: Sample
324+ CompressionEstimate :: Deferred ( DeferredEstimate :: Sample )
321325 }
322326
323327 fn compress (
@@ -443,12 +447,12 @@ impl Scheme for SparseScheme {
443447
444448 // All-null arrays should be compressed as constant instead anyways.
445449 if value_count == 0 {
446- return CompressionEstimate :: Skip ;
450+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
447451 }
448452
449453 // If the majority (90%) of values is null, this will compress well.
450454 if stats. null_count ( ) as f64 / len > 0.9 {
451- return CompressionEstimate :: Ratio ( len / value_count as f64 ) ;
455+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Ratio ( len / value_count as f64 ) ) ;
452456 }
453457
454458 let ( _, most_frequent_count) = stats
@@ -460,18 +464,20 @@ impl Scheme for SparseScheme {
460464
461465 // If the most frequent value is the only value, we should compress as constant instead.
462466 if most_frequent_count == value_count {
463- return CompressionEstimate :: Skip ;
467+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
464468 }
465469 debug_assert ! ( value_count > most_frequent_count) ;
466470
467471 // See if the most frequent value accounts for >= 90% of the set values.
468472 let freq = most_frequent_count as f64 / value_count as f64 ;
469473 if freq < 0.9 {
470- return CompressionEstimate :: Skip ;
474+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
471475 }
472476
473477 // We only store the positions of the non-top values.
474- CompressionEstimate :: Ratio ( value_count as f64 / ( value_count - most_frequent_count) as f64 )
478+ CompressionEstimate :: Verdict ( EstimateVerdict :: Ratio (
479+ value_count as f64 / ( value_count - most_frequent_count) as f64 ,
480+ ) )
475481 }
476482
477483 fn compress (
@@ -603,10 +609,10 @@ impl Scheme for RunEndScheme {
603609 ) -> CompressionEstimate {
604610 // If the run length is below the threshold, drop it.
605611 if data. integer_stats ( ) . average_run_length ( ) < RUN_END_THRESHOLD {
606- return CompressionEstimate :: Skip ;
612+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
607613 }
608614
609- CompressionEstimate :: Sample
615+ CompressionEstimate :: Deferred ( DeferredEstimate :: Sample )
610616 }
611617
612618 fn compress (
@@ -668,14 +674,14 @@ impl Scheme for SequenceScheme {
668674 // It is pointless checking if a sample is a sequence since it will not correspond to the
669675 // entire array.
670676 if ctx. is_sample ( ) {
671- return CompressionEstimate :: Skip ;
677+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
672678 }
673679
674680 let stats = data. integer_stats ( ) ;
675681
676682 // `SequenceArray` does not support nulls.
677683 if stats. null_count ( ) > 0 {
678- return CompressionEstimate :: Skip ;
684+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
679685 }
680686
681687 // If the distinct_values_count was computed, and not all values are unique, then this
@@ -684,23 +690,25 @@ impl Scheme for SequenceScheme {
684690 . distinct_count ( )
685691 . is_some_and ( |count| count as usize != data. array_len ( ) )
686692 {
687- return CompressionEstimate :: Skip ;
693+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
688694 }
689695
690696 // TODO(connor): Why do we sequence encode the whole thing and then throw it away? And then
691697 // why do we divide the ratio by 2???
692698
693- CompressionEstimate :: Estimate ( Box :: new ( |_compressor, data, _ctx| {
694- let Some ( encoded) = sequence_encode ( data. array_as_primitive ( ) ) ? else {
695- // If we are unable to sequence encode this array, make sure we skip.
696- return Ok ( CompressionEstimate :: Skip ) ;
697- } ;
698-
699- // TODO(connor): This doesn't really make sense?
700- // Since two values are required to store base and multiplier the compression ratio is
701- // divided by 2.
702- Ok ( CompressionEstimate :: Ratio ( encoded. len ( ) as f64 / 2.0 ) )
703- } ) )
699+ CompressionEstimate :: Deferred ( DeferredEstimate :: Callback ( Box :: new (
700+ |_compressor, data, _ctx| {
701+ let Some ( encoded) = sequence_encode ( data. array_as_primitive ( ) ) ? else {
702+ // If we are unable to sequence encode this array, make sure we skip.
703+ return Ok ( EstimateVerdict :: Skip ) ;
704+ } ;
705+
706+ // TODO(connor): This doesn't really make sense?
707+ // Since two values are required to store base and multiplier the compression ratio is
708+ // divided by 2.
709+ Ok ( EstimateVerdict :: Ratio ( encoded. len ( ) as f64 / 2.0 ) )
710+ } ,
711+ ) ) )
704712 }
705713
706714 fn compress (
@@ -738,10 +746,10 @@ impl Scheme for PcoScheme {
738746
739747 // Pco does not support I8 or U8.
740748 if matches ! ( data. array_as_primitive( ) . ptype( ) , PType :: I8 | PType :: U8 ) {
741- return CompressionEstimate :: Skip ;
749+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
742750 }
743751
744- CompressionEstimate :: Sample
752+ CompressionEstimate :: Deferred ( DeferredEstimate :: Sample )
745753 }
746754
747755 fn compress (
@@ -865,14 +873,14 @@ impl Scheme for IntRLEScheme {
865873 ) -> CompressionEstimate {
866874 // RLE is only useful when we cascade it with another encoding.
867875 if ctx. finished_cascading ( ) {
868- return CompressionEstimate :: Skip ;
876+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
869877 }
870878
871879 if data. integer_stats ( ) . average_run_length ( ) < RUN_LENGTH_THRESHOLD {
872- return CompressionEstimate :: Skip ;
880+ return CompressionEstimate :: Verdict ( EstimateVerdict :: Skip ) ;
873881 }
874882
875- CompressionEstimate :: Sample
883+ CompressionEstimate :: Deferred ( DeferredEstimate :: Sample )
876884 }
877885
878886 fn compress (
0 commit comments