Skip to content

Commit 0efe8e0

Browse files
committed
more benches
1 parent 540b308 commit 0efe8e0

3 files changed

Lines changed: 179 additions & 13 deletions

File tree

crates/hash-sorted-map/benchmarks/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ ahash = "0.8"
2121
hashbrown = "0.15"
2222
foldhash = "0.1"
2323
fnv = "1"
24+
itertools = "0.14"

crates/hash-sorted-map/benchmarks/performance.rs

Lines changed: 152 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ use std::hash::BuildHasher;
22

33
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
44
use hash_sorted_map::HashSortedMap;
5-
use hash_sorted_map_benchmarks::{random_trigram_hashes, IdentityBuildHasher};
5+
use hash_sorted_map_benchmarks::{folded_multiply, random_trigram_hashes, IdentityBuildHasher};
6+
use rand::RngExt;
67

78
fn trigrams() -> Vec<u32> {
89
random_trigram_hashes(1000)
@@ -428,13 +429,162 @@ fn bench_sort(c: &mut Criterion) {
428429
group.finish();
429430
}
430431

432+
fn bench_merge_sort(c: &mut Criterion) {
433+
const NUM_MAPS: usize = 100;
434+
const KEYS_PER_MAP: usize = 100_000;
435+
436+
// Pre-generate 100 key vectors with random u32 values scrambled via folded_multiply.
437+
let maps_data: Vec<Vec<u32>> = (0..NUM_MAPS)
438+
.map(|_| {
439+
let mut rng = rand::rng();
440+
(0..KEYS_PER_MAP)
441+
.map(|_| folded_multiply(rng.random_range(0..1_000_000u32) as u64, 0x243f6a8885a308d3) as u32)
442+
.collect()
443+
})
444+
.collect();
445+
446+
let hasher = IdentityBuildHasher::default();
447+
let mut group = c.benchmark_group("merge_100_maps_sorted");
448+
group.sample_size(10);
449+
450+
// ── 1. HashSortedMap: merge all, then sort_by_hash ──────────────
451+
group.bench_function("HashSortedMap merge + sort_by_hash", |b| {
452+
b.iter(|| {
453+
let mut map: HashSortedMap<u32, u32, _> =
454+
HashSortedMap::with_hasher(IdentityBuildHasher::default());
455+
for keys in &maps_data {
456+
for &key in keys {
457+
*map.entry(key).or_default() += 1u32;
458+
}
459+
}
460+
map.sort_by_hash()
461+
});
462+
});
463+
464+
// ── 2. K-way merge over pre-sorted vectors ──────────────────────
465+
group.bench_function("k-way merge sorted vecs", |b| {
466+
use itertools::Itertools;
467+
468+
b.iter(|| {
469+
// Phase 1: build per-map sorted (hash, key, count) vectors.
470+
let sorted_vecs: Vec<Vec<(u64, u32, u32)>> = maps_data
471+
.iter()
472+
.map(|keys| {
473+
let mut counts = std::collections::HashMap::<u32, u32, IdentityBuildHasher>::with_hasher(IdentityBuildHasher::default());
474+
for &key in keys {
475+
*counts.entry(key).or_default() += 1;
476+
}
477+
let mut vec: Vec<(u64, u32, u32)> = counts
478+
.into_iter()
479+
.map(|(k, v)| (hasher.hash_one(k), k, v))
480+
.collect();
481+
vec.sort_unstable_by_key(|&(h, _, _)| h);
482+
vec
483+
})
484+
.collect();
485+
486+
// Phase 2: k-merge + group_by to aggregate counts.
487+
let result: Vec<(u32, u32)> = sorted_vecs
488+
.into_iter()
489+
.map(|v| v.into_iter())
490+
.kmerge_by(|a, b| a.0 <= b.0)
491+
.chunk_by(|&(_, key, _)| key)
492+
.into_iter()
493+
.map(|(key, group)| (key, group.map(|(_, _, c)| c).sum()))
494+
.collect();
495+
result
496+
});
497+
});
498+
499+
// ── 3. hashbrown HashMap merge, then sort into Vec ──────────────
500+
group.bench_function("hashbrown merge + Vec sort", |b| {
501+
b.iter(|| {
502+
let mut map =
503+
hashbrown::HashMap::<u32, u32, IdentityBuildHasher>::with_hasher(IdentityBuildHasher::default());
504+
for keys in &maps_data {
505+
for &key in keys {
506+
*map.entry(key).or_default() += 1;
507+
}
508+
}
509+
let mut vec: Vec<(u32, u32)> = map.into_iter().collect();
510+
vec.sort_unstable_by_key(|&(key, _)| hasher.hash_one(key));
511+
vec
512+
});
513+
});
514+
515+
// ── 4. hashbrown HashMap merge only (no sort) ───────────────────
516+
group.bench_function("hashbrown merge", |b| {
517+
b.iter(|| {
518+
let mut map =
519+
hashbrown::HashMap::<u32, u32, IdentityBuildHasher>::with_hasher(IdentityBuildHasher::default());
520+
for keys in &maps_data {
521+
for &key in keys {
522+
*map.entry(key).or_default() += 1;
523+
}
524+
}
525+
map
526+
});
527+
});
528+
529+
// ── 5. HashSortedMap merge only (no sort) ───────────────────────
530+
group.bench_function("HashSortedMap merge", |b| {
531+
b.iter(|| {
532+
let mut map: HashSortedMap<u32, u32, _> =
533+
HashSortedMap::with_hasher(IdentityBuildHasher::default());
534+
for keys in &maps_data {
535+
for &key in keys {
536+
*map.entry(key).or_default() += 1u32;
537+
}
538+
}
539+
map
540+
});
541+
});
542+
543+
// ── 6. hashbrown presized merge only ────────────────────────────
544+
group.bench_function("hashbrown merge presized", |b| {
545+
b.iter(|| {
546+
let mut map =
547+
hashbrown::HashMap::<u32, u32, IdentityBuildHasher>::with_capacity_and_hasher(
548+
1_000_000,
549+
IdentityBuildHasher::default(),
550+
);
551+
for keys in &maps_data {
552+
for &key in keys {
553+
*map.entry(key).or_default() += 1;
554+
}
555+
}
556+
map
557+
});
558+
});
559+
560+
// ── 7. HashSortedMap presized merge only ─────────────────────────
561+
group.bench_function("HashSortedMap merge presized", |b| {
562+
b.iter(|| {
563+
let mut map: HashSortedMap<u32, u32, _> =
564+
HashSortedMap::with_capacity_and_hasher(
565+
1_000_000,
566+
IdentityBuildHasher::default(),
567+
);
568+
for keys in &maps_data {
569+
for &key in keys {
570+
*map.entry(key).or_default() += 1u32;
571+
}
572+
}
573+
map
574+
});
575+
});
576+
577+
group.finish();
578+
}
579+
431580
criterion_group!(
432581
benches,
433582
bench_insert,
434583
bench_reinsert,
435584
bench_grow,
436585
bench_count,
437586
bench_iter,
438-
bench_sort
587+
bench_sort,
588+
bench_merge_sort
439589
);
440590
criterion_main!(benches);

crates/hash-sorted-map/src/hash_sorted_map.rs

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -660,12 +660,9 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> {
660660
/// Cold path: the chain was full, the table is at capacity, and we need to
661661
/// grow before inserting. Re-walks via the slow path after grow.
662662
///
663-
/// After `grow()` doubles `num_primary` (`n_bits += 1`), our key's new
664-
/// primary group can have at most ~half the old chain's keys, so hitting
665-
/// `NeedsOverflow` again would require `GROUP_SIZE` keys to all collide on
666-
/// one extra bit of hash — essentially impossible for any reasonable hash.
667-
/// (`insert_for_grow` relies on the same assumption to skip its own
668-
/// capacity check.)
663+
/// With clustered hash functions (e.g. identity hashing), the new primary
664+
/// group may still be full after grow, so we handle `NeedsOverflow` by
665+
/// allocating an overflow group.
669666
#[cold]
670667
#[inline(never)]
671668
fn insert_after_grow<K: Hash + Eq, V, S: BuildHasher>(
@@ -675,9 +672,9 @@ fn insert_after_grow<K: Hash + Eq, V, S: BuildHasher>(
675672
value: V,
676673
) -> &mut V {
677674
map.grow();
675+
let tag = tag(hash);
678676
match map.find_or_insertion_slot(hash, &key) {
679677
FindResult::Vacant(Insertion::Empty { group, slot }) => {
680-
let tag = tag(hash);
681678
// SAFETY: `group` points into `map.container.groups` and is valid for `'a`.
682679
unsafe {
683680
let g = &mut *group;
@@ -688,10 +685,28 @@ fn insert_after_grow<K: Hash + Eq, V, S: BuildHasher>(
688685
g.values[slot].assume_init_mut()
689686
}
690687
}
691-
// After grow, the new primary group for `key` cannot be full (see
692-
// function docs), and the key wasn't in the table before grow.
693-
FindResult::Vacant(Insertion::NeedsOverflow { .. }) | FindResult::Found(_) => {
694-
unreachable!("post-grow walk must hit an empty slot")
688+
FindResult::Vacant(Insertion::NeedsOverflow { tail }) => {
689+
// Primary group chain is full even after grow (possible with
690+
// clustered identity hashes). Allocate an overflow group.
691+
debug_assert!(
692+
(map.container.num_groups as usize) < map.container.groups.len(),
693+
"overflow pool exhausted right after grow"
694+
);
695+
let new_gi = map.container.num_groups as usize;
696+
map.container.num_groups += 1;
697+
unsafe {
698+
(*tail).overflow = new_gi as u32;
699+
}
700+
let slot = slot_hint(hash);
701+
let group = &mut map.container.groups[new_gi];
702+
group.ctrl[slot] = tag;
703+
group.keys[slot] = MaybeUninit::new(key);
704+
group.values[slot] = MaybeUninit::new(value);
705+
map.container.len += 1;
706+
unsafe { group.values[slot].assume_init_mut() }
707+
}
708+
FindResult::Found(_) => {
709+
unreachable!("key was not in the table before grow")
695710
}
696711
}
697712
}

0 commit comments

Comments
 (0)