@@ -15,6 +15,7 @@ use vortex_array::IntoArray;
1515use vortex_array:: ToCanonical ;
1616use vortex_array:: arrays:: primitive:: PrimitiveArrayExt ;
1717use vortex_array:: dtype:: PType ;
18+ use vortex_compressor:: estimate:: CompressionEstimate ;
1819use vortex_compressor:: scheme:: ChildSelection ;
1920use vortex_compressor:: scheme:: DescendantExclusion ;
2021use vortex_error:: VortexResult ;
@@ -28,7 +29,6 @@ use crate::CompressorContext;
2829use crate :: Scheme ;
2930use crate :: SchemeExt ;
3031use crate :: compress_patches;
31- use crate :: estimate_compression_ratio_with_sampling;
3232
3333/// ALP (Adaptive Lossless floating-Point) encoding.
3434#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
@@ -73,22 +73,21 @@ impl Scheme for ALPScheme {
7373
7474 fn expected_compression_ratio (
7575 & self ,
76- compressor : & CascadingCompressor ,
7776 data : & mut ArrayAndStats ,
7877 ctx : CompressorContext ,
79- ) -> VortexResult < f64 > {
78+ ) -> CompressionEstimate {
8079 // ALP encodes floats as integers. Without integer compression afterward, the encoded ints
8180 // are the same size.
8281 if ctx. finished_cascading ( ) {
83- return Ok ( 0.0 ) ;
82+ return CompressionEstimate :: Skip ;
8483 }
8584
8685 // We don't support ALP for f16.
87- if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
88- return Ok ( 0.0 ) ;
86+ if data. array_as_primitive ( ) . ptype ( ) == PType :: F16 {
87+ return CompressionEstimate :: Skip ;
8988 }
9089
91- estimate_compression_ratio_with_sampling ( self , compressor , data . array ( ) , ctx )
90+ CompressionEstimate :: Sample
9291 }
9392
9493 fn compress (
@@ -97,9 +96,7 @@ impl Scheme for ALPScheme {
9796 data : & mut ArrayAndStats ,
9897 ctx : CompressorContext ,
9998 ) -> VortexResult < ArrayRef > {
100- let stats = data. float_stats ( ) ;
101-
102- let alp_encoded = alp_encode ( stats. source ( ) , None ) ?;
99+ let alp_encoded = alp_encode ( & data. array_as_primitive ( ) , None ) ?;
103100
104101 // Compress the ALP ints.
105102 let compressed_alp_ints =
@@ -124,15 +121,15 @@ impl Scheme for ALPRDScheme {
124121
125122 fn expected_compression_ratio (
126123 & self ,
127- compressor : & CascadingCompressor ,
128124 data : & mut ArrayAndStats ,
129- ctx : CompressorContext ,
130- ) -> VortexResult < f64 > {
131- if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
132- return Ok ( 0.0 ) ;
125+ _ctx : CompressorContext ,
126+ ) -> CompressionEstimate {
127+ // We don't support ALPRD for f16.
128+ if data. array_as_primitive ( ) . ptype ( ) == PType :: F16 {
129+ return CompressionEstimate :: Skip ;
133130 }
134131
135- estimate_compression_ratio_with_sampling ( self , compressor , data . array ( ) , ctx )
132+ CompressionEstimate :: Sample
136133 }
137134
138135 fn compress (
@@ -141,15 +138,15 @@ impl Scheme for ALPRDScheme {
141138 data : & mut ArrayAndStats ,
142139 _ctx : CompressorContext ,
143140 ) -> VortexResult < ArrayRef > {
144- let stats = data. float_stats ( ) ;
141+ let primitive_array = data. array_as_primitive ( ) ;
145142
146- let encoder = match stats . source ( ) . ptype ( ) {
147- PType :: F32 => RDEncoder :: new ( stats . source ( ) . as_slice :: < f32 > ( ) ) ,
148- PType :: F64 => RDEncoder :: new ( stats . source ( ) . as_slice :: < f64 > ( ) ) ,
143+ let encoder = match primitive_array . ptype ( ) {
144+ PType :: F32 => RDEncoder :: new ( primitive_array . as_slice :: < f32 > ( ) ) ,
145+ PType :: F64 => RDEncoder :: new ( primitive_array . as_slice :: < f64 > ( ) ) ,
149146 ptype => vortex_panic ! ( "cannot ALPRD compress ptype {ptype}" ) ,
150147 } ;
151148
152- let alp_rd = encoder. encode ( stats . source ( ) ) ;
149+ let alp_rd = encoder. encode ( & primitive_array ) ;
153150 let dtype = alp_rd. dtype ( ) . clone ( ) ;
154151 let right_bit_width = alp_rd. right_bit_width ( ) ;
155152 let mut parts = ALPRDArrayOwnedExt :: into_data_parts ( alp_rd) ;
@@ -191,24 +188,25 @@ impl Scheme for NullDominatedSparseScheme {
191188
192189 fn expected_compression_ratio (
193190 & self ,
194- _compressor : & CascadingCompressor ,
195191 data : & mut ArrayAndStats ,
196192 _ctx : CompressorContext ,
197- ) -> VortexResult < f64 > {
193+ ) -> CompressionEstimate {
194+ let len = data. array_len ( ) as f64 ;
198195 let stats = data. float_stats ( ) ;
196+ let value_count = stats. value_count ( ) ;
199197
200- if stats . value_count ( ) == 0 {
201- // All nulls should use ConstantScheme instead of this.
202- return Ok ( 0.0 ) ;
198+ // All-null arrays should be compressed as constant instead anyways.
199+ if value_count == 0 {
200+ return CompressionEstimate :: Skip ;
203201 }
204202
205203 // If the majority (90%) of values is null, this will compress well.
206- if stats. null_count ( ) as f64 / stats . source ( ) . len ( ) as f64 > 0.9 {
207- return Ok ( stats . source ( ) . len ( ) as f64 / stats . value_count ( ) as f64 ) ;
204+ if stats. null_count ( ) as f64 / len > 0.9 {
205+ return CompressionEstimate :: Ratio ( len / value_count as f64 ) ;
208206 }
209207
210208 // Otherwise we don't go this route.
211- Ok ( 0.0 )
209+ CompressionEstimate :: Skip
212210 }
213211
214212 fn compress (
@@ -217,10 +215,8 @@ impl Scheme for NullDominatedSparseScheme {
217215 data : & mut ArrayAndStats ,
218216 ctx : CompressorContext ,
219217 ) -> VortexResult < ArrayRef > {
220- let stats = data. float_stats ( ) ;
221-
222218 // We pass None as we only run this pathway for NULL-dominated float arrays.
223- let sparse_encoded = Sparse :: encode ( & stats . source ( ) . clone ( ) . into_array ( ) , None ) ?;
219+ let sparse_encoded = Sparse :: encode ( data . array ( ) , None ) ?;
224220
225221 if let Some ( sparse) = sparse_encoded. as_opt :: < Sparse > ( ) {
226222 let indices = sparse. patches ( ) . indices ( ) . to_primitive ( ) . narrow ( ) ?;
@@ -250,17 +246,26 @@ impl Scheme for PcoScheme {
250246 is_float_primitive ( canonical)
251247 }
252248
249+ fn expected_compression_ratio (
250+ & self ,
251+ _data : & mut ArrayAndStats ,
252+ _ctx : CompressorContext ,
253+ ) -> CompressionEstimate {
254+ CompressionEstimate :: Sample
255+ }
256+
253257 fn compress (
254258 & self ,
255259 _compressor : & CascadingCompressor ,
256260 data : & mut ArrayAndStats ,
257261 _ctx : CompressorContext ,
258262 ) -> VortexResult < ArrayRef > {
259- let stats = data. float_stats ( ) ;
260- Ok (
261- vortex_pco:: Pco :: from_primitive ( stats. source ( ) , pco:: DEFAULT_COMPRESSION_LEVEL , 8192 ) ?
262- . into_array ( ) ,
263- )
263+ Ok ( vortex_pco:: Pco :: from_primitive (
264+ & data. array_as_primitive ( ) ,
265+ pco:: DEFAULT_COMPRESSION_LEVEL ,
266+ 8192 ,
267+ ) ?
268+ . into_array ( ) )
264269 }
265270}
266271
@@ -406,7 +411,8 @@ mod scheme_selection_tests {
406411 let array = PrimitiveArray :: new ( Buffer :: copy_from ( & values) , Validity :: NonNullable ) ;
407412 let btr = BtrBlocksCompressor :: default ( ) ;
408413 let compressed = btr. compress ( & array. into_array ( ) ) ?;
409- assert ! ( compressed. is:: <Dict >( ) ) ;
414+ assert ! ( compressed. is:: <ALP >( ) ) ;
415+ assert ! ( compressed. children( ) [ 0 ] . is:: <Dict >( ) ) ;
410416 Ok ( ( ) )
411417 }
412418
0 commit comments