Skip to content

Commit 8480ea3

Browse files
committed
Replace key with hasher traits
1 parent 90259e9 commit 8480ea3

2 files changed

Lines changed: 127 additions & 70 deletions

File tree

crates/consistent-hashing/README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ For small `k` neither optimization is probably improving the actual performance
6767

6868
The next section proves the correctness of this algorithm.
6969

70-
## N-Choose-R replication
70+
## N-Choose-K replication
7171

7272
We define the consistent `n-choose-k` replication as follows:
7373

@@ -87,13 +87,12 @@ Properties 2, 3, and 4 can be proven via induction as follows.
8787

8888
`k = 1`: We expect that `consistent_hash` returns a single uniformly distributed node index which is consistent in `n`, i.e. changes the hash value with probability `1/(n+1)`, when `n` increments by one. In our implementation, we use an `O(1)` implementation of the jump-hash algorithm. For `k=1`, `consistent_choose_k(key, 1, n)` becomes a single function call to `consistent_choose_max(key, 1, n)` which in turn calls `consistent_hash(key, 0, n)`. I.e. `consistent_choose_k` inherits the all the desired properties from `consistent_hash` for `k=1` and all `n>=1`.
8989

90-
`k -> k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4.
90+
`k k+1`: `M(k+1, n+1) = M(k+1, n)` iff `M(k, n+1) < n` and `consistent_hash(_, k, n+1-k) < n - k`. The probability for this is `(n+1-k)/(n+1)` for the former by induction and `(n-k)/(n+1-k)` by the assumption that `consistent_hash` is a proper consistent hash function. Since both these probabilities are assumed to be independent, the probability that our initial value changes is `1 - (n+1-k)/(n+1) * (n-k)/(n+1-k) = 1 - (n-k)/(n+1) = (k+1)/(n+1)` proving property 4.
9191

9292
Property 3 is trivially satisfied if `S(k+1, n+1) = S(k+1, n)`. So, we focus on the case where `S(k+1, n+1) != S(k+1, n)`, which implies that `n ∈ S(k+1, n+1)` as largest element.
9393
We know that `S(k+1, n) = {m} ∪ S(k, m)` for some `m` by definition and `S(k, n) = S(k, u) ∖ {v} ∪ {w}` by induction for some `u`, `v`, and `w`. Thus far we have `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, u) ∖ {v} ∪ {w}`.
9494

9595
If `u = m`, then `S(k+1, n) = {m} ∪ S(k, m) ∖ {v} ∪ {w}` and `S(k+1, n+1) = {n} ∪ S(k, n) = {n} ∪ S(k, m) ∖ {v} ∪ {w}` and the two differ exaclty in the elemetns `m` and `n` proving property 3.
9696

9797
If `u ≠ m`, then `consistent_hash(_, k, n) = m`, since that's the only way how the largest values in `S(k+1, n)` and `S(k, n)` can differ. In this case, `m ∉ S(k+1, n+1)`, since `n` (and not `m`) is the largest element of `S(k+1, n+1)`. Furthermore, `S(k, n) = S(k, m)`, since `consistent_hash(_, i, n) < m` for all `i < k` (otherwise there is a contradiction).
98-
Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof.
99-
98+
Putting it together leads to `S(k+1, n+1) = {n} ∪ S(k, m)` and `S(k+1, n) = {m} ∪ S(k, m)` which differ exactly in the elements `n` and `m` which concludes the proof.

crates/consistent-hashing/src/lib.rs

Lines changed: 124 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,76 @@
1-
use std::hash::{DefaultHasher, Hash, Hasher};
1+
use std::hash::{Hash, Hasher};
2+
3+
/// A trait which behaves like a pseudo-random number generator.
4+
/// It is used to generate consistent hashes within one bucket.
5+
/// Note: the hasher must have been seeded with the key during construction.
6+
pub trait HashSequence {
7+
fn next(&mut self) -> u64;
8+
}
9+
10+
/// A trait for building a special bit mask and sequences of hashes for different bit positions.
11+
/// Note: the hasher must have been seeded with the key during construction.
12+
pub trait HashSeqBuilder {
13+
type Seq: HashSequence;
14+
15+
fn bit_mask(&self) -> u64;
16+
/// Return a HashSequence instance which is seeded with the given bit position
17+
/// and the seed of this builder.
18+
fn hash_seq(&self, bit: u64) -> Self::Seq;
19+
}
20+
21+
/// A trait for building multiple independent hash builders
22+
/// Note: the hasher must have been seeded with the key during construction.
23+
pub trait ManySeqBuilder {
24+
type Builder: HashSeqBuilder;
25+
26+
/// Returns the i-th independent hash builder.
27+
fn seq_builder(&self, i: usize) -> Self::Builder;
28+
}
29+
30+
impl<H: Hasher> HashSequence for H {
31+
fn next(&mut self) -> u64 {
32+
54387634019u64.hash(self);
33+
self.finish()
34+
}
35+
}
36+
37+
impl<H: Hasher + Clone> HashSeqBuilder for H {
38+
type Seq = H;
39+
40+
fn bit_mask(&self) -> u64 {
41+
self.finish()
42+
}
43+
44+
fn hash_seq(&self, bit: u64) -> Self::Seq {
45+
let mut hasher = self.clone();
46+
bit.hash(&mut hasher);
47+
hasher
48+
}
49+
}
50+
51+
impl<H: Hasher + Clone> ManySeqBuilder for H {
52+
type Builder = H;
53+
54+
fn seq_builder(&self, i: usize) -> Self::Builder {
55+
let mut hasher = self.clone();
56+
i.hash(&mut hasher);
57+
hasher
58+
}
59+
}
260

361
/// One building block for the consistent hashing algorithm is a consistent
462
/// hash iterator which enumerates all the hashes for a specific bucket.
563
/// A bucket covers the range `(1<<bit)..(2<<bit)`.
664
#[derive(Default)]
7-
struct BucketIterator {
8-
hasher: DefaultHasher,
65+
struct BucketIterator<H: HashSequence> {
66+
hasher: H,
967
n: usize,
1068
is_first: bool,
11-
bit: u64,
69+
bit: u64, // A bitmask with a single bit set.
1270
}
1371

14-
impl BucketIterator {
15-
fn new(key: u64, n: usize, bit: u64) -> Self {
16-
let mut hasher = DefaultHasher::new();
17-
key.hash(&mut hasher);
18-
bit.hash(&mut hasher);
72+
impl<H: HashSequence> BucketIterator<H> {
73+
fn new(n: usize, bit: u64, hasher: H) -> Self {
1974
Self {
2075
hasher,
2176
n,
@@ -25,24 +80,23 @@ impl BucketIterator {
2580
}
2681
}
2782

28-
impl Iterator for BucketIterator {
83+
impl<H: HashSequence> Iterator for BucketIterator<H> {
2984
type Item = usize;
3085

3186
fn next(&mut self) -> Option<Self::Item> {
3287
if self.bit == 0 {
3388
return None;
3489
}
3590
if self.is_first {
36-
let res = self.hasher.finish() % self.bit + self.bit;
91+
let res = (self.hasher.next() & (self.bit - 1)) + self.bit;
92+
self.is_first = false;
3793
if res < self.n as u64 {
3894
self.n = res as usize;
3995
return Some(self.n);
4096
}
41-
self.is_first = false;
4297
}
4398
loop {
44-
478392.hash(&mut self.hasher);
45-
let res = self.hasher.finish() % (self.bit * 2);
99+
let res = self.hasher.next() & (self.bit * 2 - 1);
46100
if res & self.bit == 0 {
47101
return None;
48102
}
@@ -56,77 +110,70 @@ impl Iterator for BucketIterator {
56110

57111
/// An iterator which enumerates all the consistent hashes for a given key
58112
/// from largest to smallest in the range `0..n`.
59-
pub struct ConsistentHashRevIterator {
113+
pub struct ConsistentHashRevIterator<H: HashSeqBuilder> {
114+
builder: H,
60115
bits: u64,
61-
key: u64,
62116
n: usize,
63-
inner: BucketIterator,
117+
inner: Option<BucketIterator<H::Seq>>,
64118
}
65119

66-
impl ConsistentHashRevIterator {
67-
pub fn new(key: u64, n: usize) -> Self {
68-
let mut hasher = DefaultHasher::new();
69-
key.hash(&mut hasher);
70-
let bits = hasher.finish() % n.next_power_of_two() as u64;
71-
let inner = BucketIterator::default();
120+
impl<H: HashSeqBuilder> ConsistentHashRevIterator<H> {
121+
pub fn new(n: usize, builder: H) -> Self {
72122
Self {
73-
bits,
74-
key,
123+
bits: builder.bit_mask() & (n.next_power_of_two() as u64 - 1),
124+
builder,
75125
n,
76-
inner,
126+
inner: None,
77127
}
78128
}
79129
}
80130

81-
impl Iterator for ConsistentHashRevIterator {
131+
impl<H: HashSeqBuilder> Iterator for ConsistentHashRevIterator<H> {
82132
type Item = usize;
83133

84134
fn next(&mut self) -> Option<Self::Item> {
85135
if self.n == 0 {
86136
return None;
87137
}
88-
if let Some(res) = self.inner.next() {
138+
if let Some(res) = self.inner.as_mut().and_then(|inner| inner.next()) {
89139
return Some(res);
90140
}
91141
while self.bits > 0 {
92142
let bit = 1 << self.bits.ilog2();
93143
self.bits ^= bit;
94-
self.inner = BucketIterator::new(self.key, self.n, bit);
95-
if let Some(res) = self.inner.next() {
144+
let seq = self.builder.hash_seq(bit);
145+
let mut iter = BucketIterator::new(self.n, bit, seq);
146+
if let Some(res) = iter.next() {
147+
self.inner = Some(iter);
96148
return Some(res);
97149
}
98150
}
99151
self.n = 0;
100-
Some(self.n)
152+
Some(0)
101153
}
102154
}
103155

104156
/// Same as `ConsistentHashRevIterator`, but iterates from smallest to largest
105157
/// for the range `n..`.
106-
pub struct ConsistentHashIterator {
158+
pub struct ConsistentHashIterator<H: HashSeqBuilder> {
107159
bits: u64,
108-
key: u64,
109160
n: usize,
161+
builder: H,
110162
stack: Vec<usize>,
111163
}
112164

113-
impl ConsistentHashIterator {
114-
pub fn new(key: u64, n: usize) -> Self {
115-
let mut hasher = DefaultHasher::new();
116-
key.hash(&mut hasher);
117-
let mut bits = hasher.finish() as u64;
118-
bits &= !((n + 2).next_power_of_two() as u64 / 2 - 1);
119-
let stack = if n == 0 { vec![0] } else { vec![] };
165+
impl<H: HashSeqBuilder> ConsistentHashIterator<H> {
166+
pub fn new(n: usize, builder: H) -> Self {
120167
Self {
121-
bits,
122-
key,
168+
bits: builder.bit_mask() & !((n + 2).next_power_of_two() as u64 / 2 - 1),
169+
stack: if n == 0 { vec![0] } else { vec![] },
170+
builder,
123171
n,
124-
stack,
125172
}
126173
}
127174
}
128175

129-
impl Iterator for ConsistentHashIterator {
176+
impl<H: HashSeqBuilder> Iterator for ConsistentHashIterator<H> {
130177
type Item = usize;
131178

132179
fn next(&mut self) -> Option<Self::Item> {
@@ -136,7 +183,7 @@ impl Iterator for ConsistentHashIterator {
136183
while self.bits > 0 {
137184
let bit = self.bits & !(self.bits - 1);
138185
self.bits &= self.bits - 1;
139-
let inner = BucketIterator::new(self.key, bit as usize * 2, bit);
186+
let inner = BucketIterator::new(bit as usize * 2, bit, self.builder.hash_seq(bit));
140187
self.stack = inner.take_while(|x| *x >= self.n).collect();
141188
if let Some(res) = self.stack.pop() {
142189
return Some(res);
@@ -148,22 +195,22 @@ impl Iterator for ConsistentHashIterator {
148195

149196
/// Wrapper around `ConsistentHashIterator` and `ConsistentHashRevIterator` to compute
150197
/// the next or previous consistent hash for a given key for a given number of nodes `n`.
151-
pub struct ConsistentHasher {
152-
key: u64,
198+
pub struct ConsistentHasher<H: HashSeqBuilder> {
199+
builder: H,
153200
}
154201

155-
impl ConsistentHasher {
156-
pub fn new(key: u64) -> Self {
157-
Self { key }
202+
impl<H: HashSeqBuilder + Clone> ConsistentHasher<H> {
203+
pub fn new(builder: H) -> Self {
204+
Self { builder }
158205
}
159206

160207
pub fn prev(&self, n: usize) -> Option<usize> {
161-
let mut sampler = ConsistentHashRevIterator::new(self.key, n);
208+
let mut sampler = ConsistentHashRevIterator::new(n, self.builder.clone());
162209
sampler.next()
163210
}
164211

165212
pub fn next(&self, n: usize) -> Option<usize> {
166-
let mut sampler = ConsistentHashIterator::new(self.key, n);
213+
let mut sampler = ConsistentHashIterator::new(n, self.builder.clone());
167214
sampler.next()
168215
}
169216
}
@@ -174,21 +221,21 @@ impl ConsistentHasher {
174221
/// I.e. on average exactly `1/(n+1)` (resp. `1/(k+1)`) many hashes will change
175222
/// when `n` (resp. `k`) increases by one. Additionally, the returned `k` tuple
176223
/// is guaranteed to be uniformely chosen from all possible `n-choose-k` tuples.
177-
pub struct ConsistentChooseKHasher {
178-
key: u64,
224+
pub struct ConsistentChooseKHasher<H: ManySeqBuilder> {
225+
builder: H,
179226
k: usize,
180227
}
181228

182-
impl ConsistentChooseKHasher {
183-
pub fn new(key: u64, k: usize) -> Self {
184-
Self { key, k }
229+
impl<H: ManySeqBuilder> ConsistentChooseKHasher<H> {
230+
pub fn new(builder: H, k: usize) -> Self {
231+
Self { builder, k }
185232
}
186233

187234
// TODO: Implement this as an iterator!
188235
pub fn prev(&self, mut n: usize) -> Vec<usize> {
189236
let mut samples = Vec::with_capacity(self.k);
190237
let mut samplers: Vec<_> = (0..self.k)
191-
.map(|i| ConsistentHashRevIterator::new(self.key + 43987492 * i as u64, n - i).peekable())
238+
.map(|i| ConsistentHashRevIterator::new(n - i, self.builder.seq_builder(i)).peekable())
192239
.collect();
193240
for i in (0..self.k).rev() {
194241
let mut max = 0;
@@ -211,25 +258,33 @@ impl ConsistentChooseKHasher {
211258
mod tests {
212259
use super::*;
213260

261+
fn hasher_for_key(key: u64) -> DefaultHasher {
262+
let mut hasher = DefaultHasher::default();
263+
key.hash(&mut hasher);
264+
hasher
265+
}
266+
214267
#[test]
215268
fn test_uniform_1() {
216269
for k in 0..100 {
217-
let sampler = ConsistentHasher::new(k);
270+
let hasher = hasher_for_key(k);
271+
let sampler = ConsistentHasher::new(hasher.clone());
218272
for n in 0..1000 {
219273
assert!(sampler.prev(n + 1) <= sampler.prev(n + 2));
220274
let next = sampler.next(n).unwrap();
221275
assert_eq!(next, sampler.prev(next + 1).unwrap());
222276
}
223-
let mut iter_rev: Vec<_> = ConsistentHashIterator::new(k, 0)
277+
let mut iter_rev: Vec<_> = ConsistentHashIterator::new(0, hasher.clone())
224278
.take_while(|x| *x < 1000)
225279
.collect();
226280
iter_rev.reverse();
227-
let iter: Vec<_> = ConsistentHashRevIterator::new(k, 1000).collect();
281+
let iter: Vec<_> = ConsistentHashRevIterator::new(1000, hasher).collect();
228282
assert_eq!(iter, iter_rev);
229283
}
230284
let mut stats = vec![0; 13];
231285
for i in 0..100000 {
232-
let sampler = ConsistentHasher::new(i);
286+
let hasher = hasher_for_key(i);
287+
let sampler = ConsistentHasher::new(hasher);
233288
let x = sampler.prev(stats.len()).unwrap();
234289
stats[x] += 1;
235290
}
@@ -240,7 +295,8 @@ mod tests {
240295
fn test_uniform_k() {
241296
const K: usize = 3;
242297
for k in 0..100 {
243-
let sampler = ConsistentChooseKHasher::new(k, K);
298+
let hasher = hasher_for_key(k);
299+
let sampler = ConsistentChooseKHasher::new(hasher, K);
244300
for n in K..1000 {
245301
let samples = sampler.prev(n + 1);
246302
assert!(samples.len() == K);
@@ -263,7 +319,8 @@ mod tests {
263319
}
264320
let mut stats = vec![0; 8];
265321
for i in 0..32 {
266-
let sampler = ConsistentChooseKHasher::new(i + 32783, 2);
322+
let hasher = hasher_for_key(i + 32783);
323+
let sampler = ConsistentChooseKHasher::new(hasher, 2);
267324
let samples = sampler.prev(stats.len());
268325
for s in samples {
269326
stats[s] += 1;
@@ -274,8 +331,9 @@ mod tests {
274331
for k in 1..10 {
275332
for n in k + 1..20 {
276333
for key in 0..1000 {
277-
let sampler1 = ConsistentChooseKHasher::new(key, k);
278-
let sampler2 = ConsistentChooseKHasher::new(key, k + 1);
334+
let hasher = hasher_for_key(key);
335+
let sampler1 = ConsistentChooseKHasher::new(hasher.clone(), k);
336+
let sampler2 = ConsistentChooseKHasher::new(hasher, k + 1);
279337
let set1 = sampler1.prev(n);
280338
let set2 = sampler2.prev(n);
281339
assert_eq!(set1.len(), k);

0 commit comments

Comments
 (0)