|
17 | 17 |
|
18 | 18 | use std::sync::Arc; |
19 | 19 |
|
20 | | -use arrow::array::{ArrayRef, Int64Array, StringArray, StringViewArray}; |
| 20 | +use arrow::array::{ |
| 21 | + ArrayRef, Int8Array, Int16Array, Int64Array, StringArray, StringViewArray, |
| 22 | + UInt8Array, UInt16Array, |
| 23 | +}; |
21 | 24 | use arrow::datatypes::{DataType, Field, Schema}; |
22 | 25 | use criterion::{Criterion, criterion_group, criterion_main}; |
23 | 26 | use datafusion_expr::function::AccumulatorArgs; |
@@ -56,6 +59,38 @@ fn create_i64_array(n_distinct: usize) -> Int64Array { |
56 | 59 | .collect() |
57 | 60 | } |
58 | 61 |
|
| 62 | +fn create_u8_array(n_distinct: usize) -> UInt8Array { |
| 63 | + let mut rng = StdRng::seed_from_u64(42); |
| 64 | + let max_val = n_distinct.min(256) as u8; |
| 65 | + (0..BATCH_SIZE) |
| 66 | + .map(|_| Some(rng.random_range(0..max_val))) |
| 67 | + .collect() |
| 68 | +} |
| 69 | + |
| 70 | +fn create_i8_array(n_distinct: usize) -> Int8Array { |
| 71 | + let mut rng = StdRng::seed_from_u64(42); |
| 72 | + let max_val = (n_distinct.min(256) / 2) as i8; |
| 73 | + (0..BATCH_SIZE) |
| 74 | + .map(|_| Some(rng.random_range(-max_val..max_val))) |
| 75 | + .collect() |
| 76 | +} |
| 77 | + |
| 78 | +fn create_u16_array(n_distinct: usize) -> UInt16Array { |
| 79 | + let mut rng = StdRng::seed_from_u64(42); |
| 80 | + let max_val = n_distinct.min(65536) as u16; |
| 81 | + (0..BATCH_SIZE) |
| 82 | + .map(|_| Some(rng.random_range(0..max_val))) |
| 83 | + .collect() |
| 84 | +} |
| 85 | + |
| 86 | +fn create_i16_array(n_distinct: usize) -> Int16Array { |
| 87 | + let mut rng = StdRng::seed_from_u64(42); |
| 88 | + let max_val = (n_distinct.min(65536) / 2) as i16; |
| 89 | + (0..BATCH_SIZE) |
| 90 | + .map(|_| Some(rng.random_range(-max_val..max_val))) |
| 91 | + .collect() |
| 92 | +} |
| 93 | + |
59 | 94 | /// Creates a pool of `n_distinct` random strings of the given length. |
60 | 95 | fn create_string_pool(n_distinct: usize, string_length: usize) -> Vec<String> { |
61 | 96 | let mut rng = StdRng::seed_from_u64(42); |
@@ -133,6 +168,52 @@ fn approx_distinct_benchmark(c: &mut Criterion) { |
133 | 168 | ); |
134 | 169 | } |
135 | 170 | } |
| 171 | + |
| 172 | + // Small integer types |
| 173 | + |
| 174 | + // UInt8 |
| 175 | + let values = Arc::new(create_u8_array(200)) as ArrayRef; |
| 176 | + c.bench_function("approx_distinct u8 bitmap", |b| { |
| 177 | + b.iter(|| { |
| 178 | + let mut accumulator = prepare_accumulator(DataType::UInt8); |
| 179 | + accumulator |
| 180 | + .update_batch(std::slice::from_ref(&values)) |
| 181 | + .unwrap() |
| 182 | + }) |
| 183 | + }); |
| 184 | + |
| 185 | + // Int8 |
| 186 | + let values = Arc::new(create_i8_array(200)) as ArrayRef; |
| 187 | + c.bench_function("approx_distinct i8 bitmap", |b| { |
| 188 | + b.iter(|| { |
| 189 | + let mut accumulator = prepare_accumulator(DataType::Int8); |
| 190 | + accumulator |
| 191 | + .update_batch(std::slice::from_ref(&values)) |
| 192 | + .unwrap() |
| 193 | + }) |
| 194 | + }); |
| 195 | + |
| 196 | + // UInt16 |
| 197 | + let values = Arc::new(create_u16_array(50000)) as ArrayRef; |
| 198 | + c.bench_function("approx_distinct u16 bitmap", |b| { |
| 199 | + b.iter(|| { |
| 200 | + let mut accumulator = prepare_accumulator(DataType::UInt16); |
| 201 | + accumulator |
| 202 | + .update_batch(std::slice::from_ref(&values)) |
| 203 | + .unwrap() |
| 204 | + }) |
| 205 | + }); |
| 206 | + |
| 207 | + // Int16 |
| 208 | + let values = Arc::new(create_i16_array(50000)) as ArrayRef; |
| 209 | + c.bench_function("approx_distinct i16 bitmap", |b| { |
| 210 | + b.iter(|| { |
| 211 | + let mut accumulator = prepare_accumulator(DataType::Int16); |
| 212 | + accumulator |
| 213 | + .update_batch(std::slice::from_ref(&values)) |
| 214 | + .unwrap() |
| 215 | + }) |
| 216 | + }); |
136 | 217 | } |
137 | 218 |
|
138 | 219 | criterion_group!(benches, approx_distinct_benchmark); |
|
0 commit comments