Replace key with hasher traits

aneubeck · aneubeck · commit 8480ea3ed645 · 2025-08-13T15:55:20.000+02:00
diff --git a/crates/consistent-hashing/README.md b/crates/consistent-hashing/README.md
@@ -67,7 +67,7 @@ For small `k` neither optimization is probably improving the actual performance
 
 The next section proves the correctness of this algorithm.
 
-## N-Choose-R replication
+## N-Choose-K replication
 
 We define the consistent `n-choose-k` replication as follows:
 
@@ -87,13 +87,12 @@ Properties 2, 3, and 4 can be proven via induction as follows.
 
 `k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`.
 
-`k -> k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4.
+`k → k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4.
 
 Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element.
 We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`.
 
 If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3.
 
 If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction).
-Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof.
-
+Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof.
diff --git a/crates/consistent-hashing/src/lib.rs b/crates/consistent-hashing/src/lib.rs
@@ -1,21 +1,76 @@
-use std::hash::{DefaultHasher, Hash, Hasher};
+use std::hash::{Hash, Hasher};
+
+/// A trait which behaves like a pseudo-random number generator.
+/// It is used to generate consistent hashes within one bucket.
+/// Note: the hasher must have been seeded with the key during construction.
+pub trait HashSequence {
+    fn next(&mut self) -> u64;
+}
+
+/// A trait for building a special bit mask and sequences of hashes for different bit positions.
+/// Note: the hasher must have been seeded with the key during construction.
+pub trait HashSeqBuilder {
+    type Seq: HashSequence;
+
+    fn bit_mask(&self) -> u64;
+    /// Return a HashSequence instance which is seeded with the given bit position
+    /// and the seed of this builder.
+    fn hash_seq(&self, bit: u64) -> Self::Seq;
+}
+
+/// A trait for building multiple independent hash builders
+/// Note: the hasher must have been seeded with the key during construction.
+pub trait ManySeqBuilder {
+    type Builder: HashSeqBuilder;
+
+    /// Returns the i-th independent hash builder.
+    fn seq_builder(&self, i: usize) -> Self::Builder;
+}
+
+impl<H: Hasher> HashSequence for H {
+    fn next(&mut self) -> u64 {
+        54387634019u64.hash(self);
+        self.finish()
+    }
+}
+
+impl<H: Hasher + Clone> HashSeqBuilder for H {
+    type Seq = H;
+
+    fn bit_mask(&self) -> u64 {
+        self.finish()
+    }
+
+    fn hash_seq(&self, bit: u64) -> Self::Seq {
+        let mut hasher = self.clone();
+        bit.hash(&mut hasher);
+        hasher
+    }
+}
+
+impl<H: Hasher + Clone> ManySeqBuilder for H {
+    type Builder = H;
+
+    fn seq_builder(&self, i: usize) -> Self::Builder {
+        let mut hasher = self.clone();
+        i.hash(&mut hasher);
+        hasher
+    }
+}
 
 /// One building block for the consistent hashing algorithm is a consistent
 /// hash iterator which enumerates all the hashes for a specific bucket.
 /// A bucket covers the range `(1<<bit)..(2<<bit)`.
 #[derive(Default)]
-struct BucketIterator {
-    hasher: DefaultHasher,
+struct BucketIterator<H: HashSequence> {
+    hasher: H,
     n: usize,
     is_first: bool,
-    bit: u64,
+    bit: u64,  // A bitmask with a single bit set.
 }
 
-impl BucketIterator {
-    fn new(key: u64, n: usize, bit: u64) -> Self {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        bit.hash(&mut hasher);
+impl<H: HashSequence> BucketIterator<H> {
+    fn new(n: usize, bit: u64, hasher: H) -> Self {
         Self {
             hasher,
             n,
@@ -25,24 +80,23 @@ impl BucketIterator {
     }
 }
 
-impl Iterator for BucketIterator {
+impl<H: HashSequence> Iterator for BucketIterator<H> {
     type Item = usize;
 
     fn next(&mut self) -> Option<Self::Item> {
         if self.bit == 0 {
             return None;
         }
         if self.is_first {
-            let res = self.hasher.finish() % self.bit + self.bit;
+            let res = (self.hasher.next() & (self.bit - 1)) + self.bit;
+            self.is_first = false;
             if res < self.n as u64 {
                 self.n = res as usize;
                 return Some(self.n);
             }
-            self.is_first = false;
         }
         loop {
-            478392.hash(&mut self.hasher);
-            let res = self.hasher.finish() % (self.bit * 2);
+            let res = self.hasher.next() & (self.bit * 2 - 1);
             if res & self.bit == 0 {
                 return None;
             }
@@ -56,77 +110,70 @@ impl Iterator for BucketIterator {
 
 /// An iterator which enumerates all the consistent hashes for a given key
 /// from largest to smallest in the range `0..n`.
-pub struct ConsistentHashRevIterator {
+pub struct ConsistentHashRevIterator<H: HashSeqBuilder> {
+    builder: H,
     bits: u64,
-    key: u64,
     n: usize,
-    inner: BucketIterator,
+    inner: Option<BucketIterator<H::Seq>>,
 }
 
-impl ConsistentHashRevIterator {
-    pub fn new(key: u64, n: usize) -> Self {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let bits = hasher.finish() % n.next_power_of_two() as u64;
-        let inner = BucketIterator::default();
+impl<H: HashSeqBuilder> ConsistentHashRevIterator<H> {
+    pub fn new(n: usize, builder: H) -> Self {
         Self {
-            bits,
-            key,
+            bits: builder.bit_mask() & (n.next_power_of_two() as u64 - 1),
+            builder,
             n,
-            inner,
+            inner: None,
         }
     }
 }
 
-impl Iterator for ConsistentHashRevIterator {
+impl<H: HashSeqBuilder> Iterator for ConsistentHashRevIterator<H> {
     type Item = usize;
 
     fn next(&mut self) -> Option<Self::Item> {
         if self.n == 0 {
             return None;
         }
-        if let Some(res) = self.inner.next() {
+        if let Some(res) = self.inner.as_mut().and_then(|inner| inner.next()) {
             return Some(res);
         }
         while self.bits > 0 {
             let bit = 1 << self.bits.ilog2();
             self.bits ^= bit;
-            self.inner = BucketIterator::new(self.key, self.n, bit);
-            if let Some(res) = self.inner.next() {
+            let seq = self.builder.hash_seq(bit);
+            let mut iter = BucketIterator::new(self.n, bit, seq);
+            if let Some(res) = iter.next() {
+                self.inner = Some(iter);
                 return Some(res);
             }
         }
         self.n = 0;
-        Some(self.n)
+        Some(0)
     }
 }
 
 /// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest
 /// for the range `n..`.
-pub struct ConsistentHashIterator {
+pub struct ConsistentHashIterator<H: HashSeqBuilder> {
     bits: u64,
-    key: u64,
     n: usize,
+    builder: H,
     stack: Vec<usize>,
 }
 
-impl ConsistentHashIterator {
-    pub fn new(key: u64, n: usize) -> Self {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let mut bits = hasher.finish() as u64;
-        bits &= !((n + 2).next_power_of_two() as u64 / 2 - 1);
-        let stack = if n == 0 { vec![0] } else { vec![] };
+impl<H: HashSeqBuilder> ConsistentHashIterator<H> {
+    pub fn new(n: usize, builder: H) -> Self {
         Self {
-            bits,
-            key,
+            bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1),
+            stack: if n == 0 { vec![0] } else { vec![] },
+            builder,
             n,
-            stack,
         }
     }
 }
 
-impl Iterator for ConsistentHashIterator {
+impl<H: HashSeqBuilder> Iterator for ConsistentHashIterator<H> {
     type Item = usize;
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -136,7 +183,7 @@ impl Iterator for ConsistentHashIterator {
         while self.bits > 0 {
             let bit = self.bits & !(self.bits - 1);
             self.bits &= self.bits - 1;
-            let inner = BucketIterator::new(self.key, bit as usize * 2, bit);
+            let inner = BucketIterator::new(bit as usize * 2, bit, self.builder.hash_seq(bit));
             self.stack = inner.take_while(|x| *x >= self.n).collect();
             if let Some(res) = self.stack.pop() {
                 return Some(res);
@@ -148,22 +195,22 @@ impl Iterator for ConsistentHashIterator {
 
 /// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute
 /// the next or previous consistent hash for a given key for a given number of nodes `n`.
-pub struct ConsistentHasher {
-    key: u64,
+pub struct ConsistentHasher<H: HashSeqBuilder> {
+    builder: H,
 }
 
-impl ConsistentHasher {
-    pub fn new(key: u64) -> Self {
-        Self { key }
+impl<H: HashSeqBuilder + Clone> ConsistentHasher<H> {
+    pub fn new(builder: H) -> Self {
+        Self { builder }
     }
 
     pub fn prev(&self, n: usize) -> Option<usize> {
-        let mut sampler = ConsistentHashRevIterator::new(self.key, n);
+        let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone());
         sampler.next()
     }
 
     pub fn next(&self, n: usize) -> Option<usize> {
-        let mut sampler = ConsistentHashIterator::new(self.key, n);
+        let mut sampler = ConsistentHashIterator::new(n, self.builder.clone());
         sampler.next()
     }
 }
@@ -174,21 +221,21 @@ impl ConsistentHasher {
 /// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change
 /// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple
 /// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples.
-pub struct ConsistentChooseKHasher {
-    key: u64,
+pub struct ConsistentChooseKHasher<H: ManySeqBuilder> {
+    builder: H,
     k: usize,
 }
 
-impl ConsistentChooseKHasher {
-    pub fn new(key: u64, k: usize) -> Self {
-        Self { key, k }
+impl<H: ManySeqBuilder> ConsistentChooseKHasher<H> {
+    pub fn new(builder: H, k: usize) -> Self {
+        Self { builder, k }
     }
 
     // TODO: Implement this as an iterator!
     pub fn prev(&self, mut n: usize) -> Vec<usize> {
         let mut samples = Vec::with_capacity(self.k);
         let mut samplers: Vec<_> = (0..self.k)
-            .map(|i| ConsistentHashRevIterator::new(self.key + 43987492 * i as u64, n - i).peekable())
+            .map(|i| ConsistentHashRevIterator::new(n - i, self.builder.seq_builder(i)).peekable())
             .collect();
         for i in (0..self.k).rev() {
             let mut max = 0;
@@ -211,25 +258,33 @@ impl ConsistentChooseKHasher {
 mod tests {
     use super::*;
 
+    fn hasher_for_key(key: u64) -> DefaultHasher {
+        let mut hasher = DefaultHasher::default();
+        key.hash(&mut hasher);
+        hasher
+    }
+
     #[test]
     fn test_uniform_1() {
         for k in 0..100 {
-            let sampler = ConsistentHasher::new(k);
+            let hasher = hasher_for_key(k);
+            let sampler = ConsistentHasher::new(hasher.clone());
             for n in 0..1000 {
                 assert!(sampler.prev(n + 1) <= sampler.prev(n + 2));
                 let next = sampler.next(n).unwrap();
                 assert_eq!(next, sampler.prev(next + 1).unwrap());
             }
-            let mut iter_rev: Vec<_> = ConsistentHashIterator::new(k, 0)
+            let mut iter_rev: Vec<_> = ConsistentHashIterator::new(0, hasher.clone())
                 .take_while(|x| *x < 1000)
                 .collect();
             iter_rev.reverse();
-            let iter: Vec<_> = ConsistentHashRevIterator::new(k, 1000).collect();
+            let iter: Vec<_> = ConsistentHashRevIterator::new(1000, hasher).collect();
             assert_eq!(iter, iter_rev);
         }
         let mut stats = vec![0; 13];
         for i in 0..100000 {
-            let sampler = ConsistentHasher::new(i);
+            let hasher = hasher_for_key(i);
+            let sampler = ConsistentHasher::new(hasher);
             let x = sampler.prev(stats.len()).unwrap();
             stats[x] += 1;
         }
@@ -240,7 +295,8 @@ mod tests {
     fn test_uniform_k() {
         const K: usize = 3;
         for k in 0..100 {
-            let sampler = ConsistentChooseKHasher::new(k, K);
+            let hasher = hasher_for_key(k);
+            let sampler = ConsistentChooseKHasher::new(hasher, K);
             for n in K..1000 {
                 let samples = sampler.prev(n + 1);
                 assert!(samples.len() == K);
@@ -263,7 +319,8 @@ mod tests {
         }
         let mut stats = vec![0; 8];
         for i in 0..32 {
-            let sampler = ConsistentChooseKHasher::new(i + 32783, 2);
+            let hasher = hasher_for_key(i + 32783);
+            let sampler = ConsistentChooseKHasher::new(hasher, 2);
             let samples = sampler.prev(stats.len());
             for s in samples {
                 stats[s] += 1;
@@ -274,8 +331,9 @@ mod tests {
         for k in 1..10 {
             for n in k + 1..20 {
                 for key in 0..1000 {
-                    let sampler1 = ConsistentChooseKHasher::new(key, k);
-                    let sampler2 = ConsistentChooseKHasher::new(key, k + 1);
+                    let hasher = hasher_for_key(key);
+                    let sampler1 = ConsistentChooseKHasher::new(hasher.clone(), k);
+                    let sampler2 = ConsistentChooseKHasher::new(hasher, k + 1);
                     let set1 = sampler1.prev(n);
                     let set2 = sampler2.prev(n);
                     assert_eq!(set1.len(), k);