@@ -11,6 +11,7 @@ use vortex_array::Canonical;
1111use vortex_array:: IntoArray ;
1212use vortex_array:: ToCanonical ;
1313use vortex_array:: dtype:: PType ;
14+ use vortex_compressor:: estimate:: CompressionEstimate ;
1415use vortex_compressor:: scheme:: ChildSelection ;
1516use vortex_compressor:: scheme:: DescendantExclusion ;
1617use vortex_error:: VortexResult ;
@@ -24,7 +25,6 @@ use crate::CompressorContext;
2425use crate :: Scheme ;
2526use crate :: SchemeExt ;
2627use crate :: compress_patches;
27- use crate :: estimate_compression_ratio_with_sampling;
2828
2929/// ALP (Adaptive Lossless floating-Point) encoding.
3030#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
@@ -69,22 +69,21 @@ impl Scheme for ALPScheme {
6969
7070 fn expected_compression_ratio (
7171 & self ,
72- compressor : & CascadingCompressor ,
7372 data : & mut ArrayAndStats ,
7473 ctx : CompressorContext ,
75- ) -> VortexResult < f64 > {
74+ ) -> CompressionEstimate {
7675 // ALP encodes floats as integers. Without integer compression afterward, the encoded ints
7776 // are the same size.
7877 if ctx. finished_cascading ( ) {
79- return Ok ( 0.0 ) ;
78+ return CompressionEstimate :: Skip ;
8079 }
8180
8281 // We don't support ALP for f16.
83- if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
84- return Ok ( 0.0 ) ;
82+ if data. array_as_primitive ( ) . ptype ( ) == PType :: F16 {
83+ return CompressionEstimate :: Skip ;
8584 }
8685
87- estimate_compression_ratio_with_sampling ( self , compressor , data . array ( ) , ctx )
86+ CompressionEstimate :: Sample
8887 }
8988
9089 fn compress (
@@ -93,9 +92,7 @@ impl Scheme for ALPScheme {
9392 data : & mut ArrayAndStats ,
9493 ctx : CompressorContext ,
9594 ) -> VortexResult < ArrayRef > {
96- let stats = data. float_stats ( ) ;
97-
98- let alp_encoded = alp_encode ( stats. source ( ) , None ) ?;
95+ let alp_encoded = alp_encode ( & data. array_as_primitive ( ) , None ) ?;
9996
10097 // Compress the ALP ints.
10198 let compressed_alp_ints =
@@ -120,15 +117,15 @@ impl Scheme for ALPRDScheme {
120117
121118 fn expected_compression_ratio (
122119 & self ,
123- compressor : & CascadingCompressor ,
124120 data : & mut ArrayAndStats ,
125- ctx : CompressorContext ,
126- ) -> VortexResult < f64 > {
127- if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
128- return Ok ( 0.0 ) ;
121+ _ctx : CompressorContext ,
122+ ) -> CompressionEstimate {
123+ // We don't support ALPRD for f16.
124+ if data. array_as_primitive ( ) . ptype ( ) == PType :: F16 {
125+ return CompressionEstimate :: Skip ;
129126 }
130127
131- estimate_compression_ratio_with_sampling ( self , compressor , data . array ( ) , ctx )
128+ CompressionEstimate :: Sample
132129 }
133130
134131 fn compress (
@@ -137,15 +134,15 @@ impl Scheme for ALPRDScheme {
137134 data : & mut ArrayAndStats ,
138135 _ctx : CompressorContext ,
139136 ) -> VortexResult < ArrayRef > {
140- let stats = data. float_stats ( ) ;
137+ let primitive_array = data. array_as_primitive ( ) ;
141138
142- let encoder = match stats . source ( ) . ptype ( ) {
143- PType :: F32 => RDEncoder :: new ( stats . source ( ) . as_slice :: < f32 > ( ) ) ,
144- PType :: F64 => RDEncoder :: new ( stats . source ( ) . as_slice :: < f64 > ( ) ) ,
139+ let encoder = match primitive_array . ptype ( ) {
140+ PType :: F32 => RDEncoder :: new ( primitive_array . as_slice :: < f32 > ( ) ) ,
141+ PType :: F64 => RDEncoder :: new ( primitive_array . as_slice :: < f64 > ( ) ) ,
145142 ptype => vortex_panic ! ( "cannot ALPRD compress ptype {ptype}" ) ,
146143 } ;
147144
148- let alp_rd = encoder. encode ( stats . source ( ) ) ;
145+ let alp_rd = encoder. encode ( & primitive_array ) ;
149146 let dtype = alp_rd. dtype ( ) . clone ( ) ;
150147 let right_bit_width = alp_rd. right_bit_width ( ) ;
151148 let mut alp_rd_data = alp_rd. into_data ( ) ;
@@ -193,24 +190,25 @@ impl Scheme for NullDominatedSparseScheme {
193190
194191 fn expected_compression_ratio (
195192 & self ,
196- _compressor : & CascadingCompressor ,
197193 data : & mut ArrayAndStats ,
198194 _ctx : CompressorContext ,
199- ) -> VortexResult < f64 > {
195+ ) -> CompressionEstimate {
196+ let len = data. array_len ( ) as f64 ;
200197 let stats = data. float_stats ( ) ;
198+ let value_count = stats. value_count ( ) ;
201199
202- if stats . value_count ( ) == 0 {
203- // All nulls should use ConstantScheme instead of this.
204- return Ok ( 0.0 ) ;
200+ // All-null arrays should be compressed as constant instead anyways.
201+ if value_count == 0 {
202+ return CompressionEstimate :: Skip ;
205203 }
206204
207205 // If the majority (90%) of values is null, this will compress well.
208- if stats. null_count ( ) as f64 / stats . source ( ) . len ( ) as f64 > 0.9 {
209- return Ok ( stats . source ( ) . len ( ) as f64 / stats . value_count ( ) as f64 ) ;
206+ if stats. null_count ( ) as f64 / len > 0.9 {
207+ return CompressionEstimate :: Ratio ( len / value_count as f64 ) ;
210208 }
211209
212210 // Otherwise we don't go this route.
213- Ok ( 0.0 )
211+ CompressionEstimate :: Skip
214212 }
215213
216214 fn compress (
@@ -219,10 +217,8 @@ impl Scheme for NullDominatedSparseScheme {
219217 data : & mut ArrayAndStats ,
220218 ctx : CompressorContext ,
221219 ) -> VortexResult < ArrayRef > {
222- let stats = data. float_stats ( ) ;
223-
224220 // We pass None as we only run this pathway for NULL-dominated float arrays.
225- let sparse_encoded = Sparse :: encode ( & stats . source ( ) . clone ( ) . into_array ( ) , None ) ?;
221+ let sparse_encoded = Sparse :: encode ( data . array ( ) , None ) ?;
226222
227223 if let Some ( sparse) = sparse_encoded. as_opt :: < Sparse > ( ) {
228224 let indices = sparse. patches ( ) . indices ( ) . to_primitive ( ) . narrow ( ) ?;
@@ -252,17 +248,26 @@ impl Scheme for PcoScheme {
252248 is_float_primitive ( canonical)
253249 }
254250
251+ fn expected_compression_ratio (
252+ & self ,
253+ _data : & mut ArrayAndStats ,
254+ _ctx : CompressorContext ,
255+ ) -> CompressionEstimate {
256+ CompressionEstimate :: Sample
257+ }
258+
255259 fn compress (
256260 & self ,
257261 _compressor : & CascadingCompressor ,
258262 data : & mut ArrayAndStats ,
259263 _ctx : CompressorContext ,
260264 ) -> VortexResult < ArrayRef > {
261- let stats = data. float_stats ( ) ;
262- Ok (
263- vortex_pco:: Pco :: from_primitive ( stats. source ( ) , pco:: DEFAULT_COMPRESSION_LEVEL , 8192 ) ?
264- . into_array ( ) ,
265- )
265+ Ok ( vortex_pco:: Pco :: from_primitive (
266+ & data. array_as_primitive ( ) ,
267+ pco:: DEFAULT_COMPRESSION_LEVEL ,
268+ 8192 ,
269+ ) ?
270+ . into_array ( ) )
266271 }
267272}
268273
@@ -408,7 +413,8 @@ mod scheme_selection_tests {
408413 let array = PrimitiveArray :: new ( Buffer :: copy_from ( & values) , Validity :: NonNullable ) ;
409414 let btr = BtrBlocksCompressor :: default ( ) ;
410415 let compressed = btr. compress ( & array. into_array ( ) ) ?;
411- assert ! ( compressed. is:: <Dict >( ) ) ;
416+ assert ! ( compressed. is:: <ALP >( ) ) ;
417+ assert ! ( compressed. children( ) [ 0 ] . is:: <Dict >( ) ) ;
412418 Ok ( ( ) )
413419 }
414420
0 commit comments