Skip to content

Commit 0ba9f81

Browse files
Add benchmark comparing hash-list vs HyperLogLog per-insert cost
1 parent 1215c2f commit 0ba9f81

1 file changed

Lines changed: 69 additions & 2 deletions

File tree

benches/hyperloglog_insert.rs

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! Bench to compare and optimize time performance of inserting a prefix-free encoded list of hashes.
2-
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2+
use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
33
use hyperloglog_rs::prelude::*;
44
use twox_hash::XxHash64;
55

@@ -72,6 +72,73 @@ fn bench_hyperloglog_insert(c: &mut Criterion) {
7272
group.finish();
7373
}
7474

75-
criterion_group!(benches, bench_hyperloglog_insert);
75+
type HLL14 =
76+
HyperLogLog<Precision14, Bits6, <Precision14 as PackedRegister<Bits6>>::Array, XxHash64>;
77+
78+
/// Builds a counter holding `count` distinct elements, asserting it is still in hash-list
79+
/// mode (`count` must stay below the conversion threshold, ~34k at Precision14/Bits6).
80+
fn hash_list_of(count: u64, seed: u64) -> HLL14 {
81+
let mut hll = HLL14::default();
82+
for value in iter_random_values::<u64>(count, None, Some(seed)) {
83+
hll.insert(&value);
84+
}
85+
assert!(
86+
hll.is_hash_list(),
87+
"a counter of {count} elements must still be a hash list"
88+
);
89+
hll
90+
}
91+
92+
/// Compares the per-insert cost of the hash-list mode (sorted, gap-encoded, O(n) per insert)
93+
/// against the fully-fledged HyperLogLog mode (O(1) register update), inserting the same batch
94+
/// of new elements into bases of increasing size. The HyperLogLog base is obtained by flipping
95+
/// a hash-list counter with `convert_hash_list_to_hyperloglog`, so both modes hold the same
96+
/// elements. The clone in the setup closure is not timed (`iter_batched`).
97+
fn bench_insert_modes(c: &mut Criterion) {
98+
let mut group = c.benchmark_group("insert_mode_p14_bits6");
99+
100+
// 256 fresh elements, disjoint from the bases, inserted on each measured iteration.
101+
let batch: Vec<u64> = iter_random_values::<u64>(256, None, Some(0x00BA_7C00)).collect();
102+
103+
for &base_size in &[256_u64, 4_096, 16_384] {
104+
let hash_list_base = hash_list_of(base_size, 0x00BA_5E00);
105+
let mut hll_base = hash_list_base.clone();
106+
hll_base.convert_hash_list_to_hyperloglog().unwrap();
107+
assert!(!hll_base.is_hash_list());
108+
109+
group.bench_with_input(
110+
BenchmarkId::new("hash_list", base_size),
111+
&base_size,
112+
|b, _| {
113+
b.iter_batched(
114+
|| hash_list_base.clone(),
115+
|mut hll| {
116+
for value in &batch {
117+
hll.insert(black_box(value));
118+
}
119+
black_box(hll.is_hash_list())
120+
},
121+
BatchSize::SmallInput,
122+
);
123+
},
124+
);
125+
126+
group.bench_with_input(BenchmarkId::new("hll", base_size), &base_size, |b, _| {
127+
b.iter_batched(
128+
|| hll_base.clone(),
129+
|mut hll| {
130+
for value in &batch {
131+
hll.insert(black_box(value));
132+
}
133+
},
134+
BatchSize::SmallInput,
135+
);
136+
});
137+
}
138+
139+
group.finish();
140+
}
141+
142+
criterion_group!(benches, bench_hyperloglog_insert, bench_insert_modes);
76143

77144
criterion_main!(benches);

0 commit comments

Comments
 (0)