|
1 | 1 | //! Bench to compare and optimize time performance of inserting a prefix-free encoded list of hashes. |
2 | | -use criterion::{black_box, criterion_group, criterion_main, Criterion}; |
| 2 | +use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; |
3 | 3 | use hyperloglog_rs::prelude::*; |
4 | 4 | use twox_hash::XxHash64; |
5 | 5 |
|
@@ -72,6 +72,73 @@ fn bench_hyperloglog_insert(c: &mut Criterion) { |
72 | 72 | group.finish(); |
73 | 73 | } |
74 | 74 |
|
75 | | -criterion_group!(benches, bench_hyperloglog_insert); |
| 75 | +type HLL14 = |
| 76 | + HyperLogLog<Precision14, Bits6, <Precision14 as PackedRegister<Bits6>>::Array, XxHash64>; |
| 77 | + |
| 78 | +/// Builds a counter holding `count` distinct elements, asserting it is still in hash-list |
| 79 | +/// mode (`count` must stay below the conversion threshold, ~34k at Precision14/Bits6). |
| 80 | +fn hash_list_of(count: u64, seed: u64) -> HLL14 { |
| 81 | + let mut hll = HLL14::default(); |
| 82 | + for value in iter_random_values::<u64>(count, None, Some(seed)) { |
| 83 | + hll.insert(&value); |
| 84 | + } |
| 85 | + assert!( |
| 86 | + hll.is_hash_list(), |
| 87 | + "a counter of {count} elements must still be a hash list" |
| 88 | + ); |
| 89 | + hll |
| 90 | +} |
| 91 | + |
| 92 | +/// Compares the per-insert cost of the hash-list mode (sorted, gap-encoded, O(n) per insert) |
| 93 | +/// against the fully-fledged HyperLogLog mode (O(1) register update), inserting the same batch |
| 94 | +/// of new elements into bases of increasing size. The HyperLogLog base is obtained by flipping |
| 95 | +/// a hash-list counter with `convert_hash_list_to_hyperloglog`, so both modes hold the same |
| 96 | +/// elements. The clone in the setup closure is not timed (`iter_batched`). |
| 97 | +fn bench_insert_modes(c: &mut Criterion) { |
| 98 | + let mut group = c.benchmark_group("insert_mode_p14_bits6"); |
| 99 | + |
| 100 | + // 256 fresh elements, disjoint from the bases, inserted on each measured iteration. |
| 101 | + let batch: Vec<u64> = iter_random_values::<u64>(256, None, Some(0x00BA_7C00)).collect(); |
| 102 | + |
| 103 | + for &base_size in &[256_u64, 4_096, 16_384] { |
| 104 | + let hash_list_base = hash_list_of(base_size, 0x00BA_5E00); |
| 105 | + let mut hll_base = hash_list_base.clone(); |
| 106 | + hll_base.convert_hash_list_to_hyperloglog().unwrap(); |
| 107 | + assert!(!hll_base.is_hash_list()); |
| 108 | + |
| 109 | + group.bench_with_input( |
| 110 | + BenchmarkId::new("hash_list", base_size), |
| 111 | + &base_size, |
| 112 | + |b, _| { |
| 113 | + b.iter_batched( |
| 114 | + || hash_list_base.clone(), |
| 115 | + |mut hll| { |
| 116 | + for value in &batch { |
| 117 | + hll.insert(black_box(value)); |
| 118 | + } |
| 119 | + black_box(hll.is_hash_list()) |
| 120 | + }, |
| 121 | + BatchSize::SmallInput, |
| 122 | + ); |
| 123 | + }, |
| 124 | + ); |
| 125 | + |
| 126 | + group.bench_with_input(BenchmarkId::new("hll", base_size), &base_size, |b, _| { |
| 127 | + b.iter_batched( |
| 128 | + || hll_base.clone(), |
| 129 | + |mut hll| { |
| 130 | + for value in &batch { |
| 131 | + hll.insert(black_box(value)); |
| 132 | + } |
| 133 | + }, |
| 134 | + BatchSize::SmallInput, |
| 135 | + ); |
| 136 | + }); |
| 137 | + } |
| 138 | + |
| 139 | + group.finish(); |
| 140 | +} |
| 141 | + |
| 142 | +criterion_group!(benches, bench_hyperloglog_insert, bench_insert_modes); |
76 | 143 |
|
77 | 144 | criterion_main!(benches); |
0 commit comments