1717
1818use std:: sync:: Arc ;
1919
20- use arrow:: array:: {
21- ArrayRef , Int8Array , Int16Array , Int64Array , StringArray , StringViewArray ,
22- UInt8Array , UInt16Array ,
23- } ;
20+ use arrow:: array:: { ArrayRef , Int64Array , StringArray , StringViewArray } ;
2421use arrow:: datatypes:: { DataType , Field , Schema } ;
2522use criterion:: { Criterion , criterion_group, criterion_main} ;
2623use datafusion_expr:: function:: AccumulatorArgs ;
@@ -59,38 +56,6 @@ fn create_i64_array(n_distinct: usize) -> Int64Array {
5956 . collect ( )
6057}
6158
62- fn create_u8_array ( n_distinct : usize ) -> UInt8Array {
63- let mut rng = StdRng :: seed_from_u64 ( 42 ) ;
64- let max_val = n_distinct. min ( 256 ) as u8 ;
65- ( 0 ..BATCH_SIZE )
66- . map ( |_| Some ( rng. random_range ( 0 ..max_val) ) )
67- . collect ( )
68- }
69-
70- fn create_i8_array ( n_distinct : usize ) -> Int8Array {
71- let mut rng = StdRng :: seed_from_u64 ( 42 ) ;
72- let max_val = ( n_distinct. min ( 256 ) / 2 ) as i8 ;
73- ( 0 ..BATCH_SIZE )
74- . map ( |_| Some ( rng. random_range ( -max_val..max_val) ) )
75- . collect ( )
76- }
77-
78- fn create_u16_array ( n_distinct : usize ) -> UInt16Array {
79- let mut rng = StdRng :: seed_from_u64 ( 42 ) ;
80- let max_val = n_distinct. min ( 65536 ) as u16 ;
81- ( 0 ..BATCH_SIZE )
82- . map ( |_| Some ( rng. random_range ( 0 ..max_val) ) )
83- . collect ( )
84- }
85-
86- fn create_i16_array ( n_distinct : usize ) -> Int16Array {
87- let mut rng = StdRng :: seed_from_u64 ( 42 ) ;
88- let max_val = ( n_distinct. min ( 65536 ) / 2 ) as i16 ;
89- ( 0 ..BATCH_SIZE )
90- . map ( |_| Some ( rng. random_range ( -max_val..max_val) ) )
91- . collect ( )
92- }
93-
9459/// Creates a pool of `n_distinct` random strings of the given length.
9560fn create_string_pool ( n_distinct : usize , string_length : usize ) -> Vec < String > {
9661 let mut rng = StdRng :: seed_from_u64 ( 42 ) ;
@@ -123,7 +88,7 @@ fn approx_distinct_benchmark(c: &mut Criterion) {
12388 for pct in [ 80 , 99 ] {
12489 let n_distinct = BATCH_SIZE * pct / 100 ;
12590
126- // Int64
91+ // --- Int64 benchmarks ---
12792 let values = Arc :: new ( create_i64_array ( n_distinct) ) as ArrayRef ;
12893 c. bench_function ( & format ! ( "approx_distinct i64 {pct}% distinct" ) , |b| {
12994 b. iter ( || {
@@ -139,7 +104,7 @@ fn approx_distinct_benchmark(c: &mut Criterion) {
139104 {
140105 let string_pool = create_string_pool ( n_distinct, str_len) ;
141106
142- // Utf8
107+ // --- Utf8 benchmarks ---
143108 let values = Arc :: new ( create_string_array ( & string_pool) ) as ArrayRef ;
144109 c. bench_function (
145110 & format ! ( "approx_distinct utf8 {label} {pct}% distinct" ) ,
@@ -153,7 +118,7 @@ fn approx_distinct_benchmark(c: &mut Criterion) {
153118 } ,
154119 ) ;
155120
156- // Utf8View
121+ // --- Utf8View benchmarks ---
157122 let values = Arc :: new ( create_string_view_array ( & string_pool) ) as ArrayRef ;
158123 c. bench_function (
159124 & format ! ( "approx_distinct utf8view {label} {pct}% distinct" ) ,
@@ -168,52 +133,6 @@ fn approx_distinct_benchmark(c: &mut Criterion) {
168133 ) ;
169134 }
170135 }
171-
172- // Small integer types
173-
174- // UInt8
175- let values = Arc :: new ( create_u8_array ( 200 ) ) as ArrayRef ;
176- c. bench_function ( "approx_distinct u8 bitmap" , |b| {
177- b. iter ( || {
178- let mut accumulator = prepare_accumulator ( DataType :: UInt8 ) ;
179- accumulator
180- . update_batch ( std:: slice:: from_ref ( & values) )
181- . unwrap ( )
182- } )
183- } ) ;
184-
185- // Int8
186- let values = Arc :: new ( create_i8_array ( 200 ) ) as ArrayRef ;
187- c. bench_function ( "approx_distinct i8 bitmap" , |b| {
188- b. iter ( || {
189- let mut accumulator = prepare_accumulator ( DataType :: Int8 ) ;
190- accumulator
191- . update_batch ( std:: slice:: from_ref ( & values) )
192- . unwrap ( )
193- } )
194- } ) ;
195-
196- // UInt16
197- let values = Arc :: new ( create_u16_array ( 50000 ) ) as ArrayRef ;
198- c. bench_function ( "approx_distinct u16 bitmap" , |b| {
199- b. iter ( || {
200- let mut accumulator = prepare_accumulator ( DataType :: UInt16 ) ;
201- accumulator
202- . update_batch ( std:: slice:: from_ref ( & values) )
203- . unwrap ( )
204- } )
205- } ) ;
206-
207- // Int16
208- let values = Arc :: new ( create_i16_array ( 50000 ) ) as ArrayRef ;
209- c. bench_function ( "approx_distinct i16 bitmap" , |b| {
210- b. iter ( || {
211- let mut accumulator = prepare_accumulator ( DataType :: Int16 ) ;
212- accumulator
213- . update_batch ( std:: slice:: from_ref ( & values) )
214- . unwrap ( )
215- } )
216- } ) ;
217136}
218137
219138criterion_group ! ( benches, approx_distinct_benchmark) ;
0 commit comments