Skip to content

Commit 3db92e3

Browse files
committed
create_benches_small_int
1 parent f8c01a1 commit 3db92e3

File tree

1 file changed

+82
-1
lines changed

1 file changed

+82
-1
lines changed

datafusion/functions-aggregate/benches/approx_distinct.rs

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow::array::{ArrayRef, Int64Array, StringArray, StringViewArray};
20+
use arrow::array::{
21+
ArrayRef, Int8Array, Int16Array, Int64Array, StringArray, StringViewArray,
22+
UInt8Array, UInt16Array,
23+
};
2124
use arrow::datatypes::{DataType, Field, Schema};
2225
use criterion::{Criterion, criterion_group, criterion_main};
2326
use datafusion_expr::function::AccumulatorArgs;
@@ -56,6 +59,38 @@ fn create_i64_array(n_distinct: usize) -> Int64Array {
5659
.collect()
5760
}
5861

62+
fn create_u8_array(n_distinct: usize) -> UInt8Array {
63+
let mut rng = StdRng::seed_from_u64(42);
64+
let max_val = n_distinct.min(256) as u8;
65+
(0..BATCH_SIZE)
66+
.map(|_| Some(rng.random_range(0..max_val)))
67+
.collect()
68+
}
69+
70+
fn create_i8_array(n_distinct: usize) -> Int8Array {
71+
let mut rng = StdRng::seed_from_u64(42);
72+
let max_val = (n_distinct.min(256) / 2) as i8;
73+
(0..BATCH_SIZE)
74+
.map(|_| Some(rng.random_range(-max_val..max_val)))
75+
.collect()
76+
}
77+
78+
fn create_u16_array(n_distinct: usize) -> UInt16Array {
79+
let mut rng = StdRng::seed_from_u64(42);
80+
let max_val = n_distinct.min(65536) as u16;
81+
(0..BATCH_SIZE)
82+
.map(|_| Some(rng.random_range(0..max_val)))
83+
.collect()
84+
}
85+
86+
fn create_i16_array(n_distinct: usize) -> Int16Array {
87+
let mut rng = StdRng::seed_from_u64(42);
88+
let max_val = (n_distinct.min(65536) / 2) as i16;
89+
(0..BATCH_SIZE)
90+
.map(|_| Some(rng.random_range(-max_val..max_val)))
91+
.collect()
92+
}
93+
5994
/// Creates a pool of `n_distinct` random strings of the given length.
6095
fn create_string_pool(n_distinct: usize, string_length: usize) -> Vec<String> {
6196
let mut rng = StdRng::seed_from_u64(42);
@@ -133,6 +168,52 @@ fn approx_distinct_benchmark(c: &mut Criterion) {
133168
);
134169
}
135170
}
171+
172+
// Small integer types
173+
174+
// UInt8
175+
let values = Arc::new(create_u8_array(200)) as ArrayRef;
176+
c.bench_function("approx_distinct u8 bitmap", |b| {
177+
b.iter(|| {
178+
let mut accumulator = prepare_accumulator(DataType::UInt8);
179+
accumulator
180+
.update_batch(std::slice::from_ref(&values))
181+
.unwrap()
182+
})
183+
});
184+
185+
// Int8
186+
let values = Arc::new(create_i8_array(200)) as ArrayRef;
187+
c.bench_function("approx_distinct i8 bitmap", |b| {
188+
b.iter(|| {
189+
let mut accumulator = prepare_accumulator(DataType::Int8);
190+
accumulator
191+
.update_batch(std::slice::from_ref(&values))
192+
.unwrap()
193+
})
194+
});
195+
196+
// UInt16
197+
let values = Arc::new(create_u16_array(50000)) as ArrayRef;
198+
c.bench_function("approx_distinct u16 bitmap", |b| {
199+
b.iter(|| {
200+
let mut accumulator = prepare_accumulator(DataType::UInt16);
201+
accumulator
202+
.update_batch(std::slice::from_ref(&values))
203+
.unwrap()
204+
})
205+
});
206+
207+
// Int16
208+
let values = Arc::new(create_i16_array(50000)) as ArrayRef;
209+
c.bench_function("approx_distinct i16 bitmap", |b| {
210+
b.iter(|| {
211+
let mut accumulator = prepare_accumulator(DataType::Int16);
212+
accumulator
213+
.update_batch(std::slice::from_ref(&values))
214+
.unwrap()
215+
})
216+
});
136217
}
137218

138219
criterion_group!(benches, approx_distinct_benchmark);

0 commit comments

Comments
 (0)