@@ -2,7 +2,8 @@ use std::hash::BuildHasher;
22
33use criterion:: { criterion_group, criterion_main, BatchSize , Criterion } ;
44use hash_sorted_map:: HashSortedMap ;
5- use hash_sorted_map_benchmarks:: { random_trigram_hashes, IdentityBuildHasher } ;
5+ use hash_sorted_map_benchmarks:: { folded_multiply, random_trigram_hashes, IdentityBuildHasher } ;
6+ use rand:: RngExt ;
67
78fn trigrams ( ) -> Vec < u32 > {
89 random_trigram_hashes ( 1000 )
@@ -428,13 +429,162 @@ fn bench_sort(c: &mut Criterion) {
428429 group. finish ( ) ;
429430}
430431
432+ fn bench_merge_sort ( c : & mut Criterion ) {
433+ const NUM_MAPS : usize = 100 ;
434+ const KEYS_PER_MAP : usize = 100_000 ;
435+
436+ // Pre-generate 100 key vectors with random u32 values scrambled via folded_multiply.
437+ let maps_data: Vec < Vec < u32 > > = ( 0 ..NUM_MAPS )
438+ . map ( |_| {
439+ let mut rng = rand:: rng ( ) ;
440+ ( 0 ..KEYS_PER_MAP )
441+ . map ( |_| folded_multiply ( rng. random_range ( 0 ..1_000_000u32 ) as u64 , 0x243f6a8885a308d3 ) as u32 )
442+ . collect ( )
443+ } )
444+ . collect ( ) ;
445+
446+ let hasher = IdentityBuildHasher :: default ( ) ;
447+ let mut group = c. benchmark_group ( "merge_100_maps_sorted" ) ;
448+ group. sample_size ( 10 ) ;
449+
450+ // ── 1. HashSortedMap: merge all, then sort_by_hash ──────────────
451+ group. bench_function ( "HashSortedMap merge + sort_by_hash" , |b| {
452+ b. iter ( || {
453+ let mut map: HashSortedMap < u32 , u32 , _ > =
454+ HashSortedMap :: with_hasher ( IdentityBuildHasher :: default ( ) ) ;
455+ for keys in & maps_data {
456+ for & key in keys {
457+ * map. entry ( key) . or_default ( ) += 1u32 ;
458+ }
459+ }
460+ map. sort_by_hash ( )
461+ } ) ;
462+ } ) ;
463+
464+ // ── 2. K-way merge over pre-sorted vectors ──────────────────────
465+ group. bench_function ( "k-way merge sorted vecs" , |b| {
466+ use itertools:: Itertools ;
467+
468+ b. iter ( || {
469+ // Phase 1: build per-map sorted (hash, key, count) vectors.
470+ let sorted_vecs: Vec < Vec < ( u64 , u32 , u32 ) > > = maps_data
471+ . iter ( )
472+ . map ( |keys| {
473+ let mut counts = std:: collections:: HashMap :: < u32 , u32 , IdentityBuildHasher > :: with_hasher ( IdentityBuildHasher :: default ( ) ) ;
474+ for & key in keys {
475+ * counts. entry ( key) . or_default ( ) += 1 ;
476+ }
477+ let mut vec: Vec < ( u64 , u32 , u32 ) > = counts
478+ . into_iter ( )
479+ . map ( |( k, v) | ( hasher. hash_one ( k) , k, v) )
480+ . collect ( ) ;
481+ vec. sort_unstable_by_key ( |& ( h, _, _) | h) ;
482+ vec
483+ } )
484+ . collect ( ) ;
485+
486+ // Phase 2: k-merge + group_by to aggregate counts.
487+ let result: Vec < ( u32 , u32 ) > = sorted_vecs
488+ . into_iter ( )
489+ . map ( |v| v. into_iter ( ) )
490+ . kmerge_by ( |a, b| a. 0 <= b. 0 )
491+ . chunk_by ( |& ( _, key, _) | key)
492+ . into_iter ( )
493+ . map ( |( key, group) | ( key, group. map ( |( _, _, c) | c) . sum ( ) ) )
494+ . collect ( ) ;
495+ result
496+ } ) ;
497+ } ) ;
498+
499+ // ── 3. hashbrown HashMap merge, then sort into Vec ──────────────
500+ group. bench_function ( "hashbrown merge + Vec sort" , |b| {
501+ b. iter ( || {
502+ let mut map =
503+ hashbrown:: HashMap :: < u32 , u32 , IdentityBuildHasher > :: with_hasher ( IdentityBuildHasher :: default ( ) ) ;
504+ for keys in & maps_data {
505+ for & key in keys {
506+ * map. entry ( key) . or_default ( ) += 1 ;
507+ }
508+ }
509+ let mut vec: Vec < ( u32 , u32 ) > = map. into_iter ( ) . collect ( ) ;
510+ vec. sort_unstable_by_key ( |& ( key, _) | hasher. hash_one ( key) ) ;
511+ vec
512+ } ) ;
513+ } ) ;
514+
515+ // ── 4. hashbrown HashMap merge only (no sort) ───────────────────
516+ group. bench_function ( "hashbrown merge" , |b| {
517+ b. iter ( || {
518+ let mut map =
519+ hashbrown:: HashMap :: < u32 , u32 , IdentityBuildHasher > :: with_hasher ( IdentityBuildHasher :: default ( ) ) ;
520+ for keys in & maps_data {
521+ for & key in keys {
522+ * map. entry ( key) . or_default ( ) += 1 ;
523+ }
524+ }
525+ map
526+ } ) ;
527+ } ) ;
528+
529+ // ── 5. HashSortedMap merge only (no sort) ───────────────────────
530+ group. bench_function ( "HashSortedMap merge" , |b| {
531+ b. iter ( || {
532+ let mut map: HashSortedMap < u32 , u32 , _ > =
533+ HashSortedMap :: with_hasher ( IdentityBuildHasher :: default ( ) ) ;
534+ for keys in & maps_data {
535+ for & key in keys {
536+ * map. entry ( key) . or_default ( ) += 1u32 ;
537+ }
538+ }
539+ map
540+ } ) ;
541+ } ) ;
542+
543+ // ── 6. hashbrown presized merge only ────────────────────────────
544+ group. bench_function ( "hashbrown merge presized" , |b| {
545+ b. iter ( || {
546+ let mut map =
547+ hashbrown:: HashMap :: < u32 , u32 , IdentityBuildHasher > :: with_capacity_and_hasher (
548+ 1_000_000 ,
549+ IdentityBuildHasher :: default ( ) ,
550+ ) ;
551+ for keys in & maps_data {
552+ for & key in keys {
553+ * map. entry ( key) . or_default ( ) += 1 ;
554+ }
555+ }
556+ map
557+ } ) ;
558+ } ) ;
559+
560+ // ── 7. HashSortedMap presized merge only ─────────────────────────
561+ group. bench_function ( "HashSortedMap merge presized" , |b| {
562+ b. iter ( || {
563+ let mut map: HashSortedMap < u32 , u32 , _ > =
564+ HashSortedMap :: with_capacity_and_hasher (
565+ 1_000_000 ,
566+ IdentityBuildHasher :: default ( ) ,
567+ ) ;
568+ for keys in & maps_data {
569+ for & key in keys {
570+ * map. entry ( key) . or_default ( ) += 1u32 ;
571+ }
572+ }
573+ map
574+ } ) ;
575+ } ) ;
576+
577+ group. finish ( ) ;
578+ }
579+
431580criterion_group ! (
432581 benches,
433582 bench_insert,
434583 bench_reinsert,
435584 bench_grow,
436585 bench_count,
437586 bench_iter,
438- bench_sort
587+ bench_sort,
588+ bench_merge_sort
439589) ;
440590criterion_main ! ( benches) ;
0 commit comments