even faster combine

srijs · srijs · commit 8df0677c0b4c · 2025-06-30T01:14:03.000+02:00
diff --git a/benches/bench.rs b/benches/bench.rs
@@ -35,8 +35,7 @@ fn bench_megabyte_specialized(b: &mut Bencher) {
     )
 }
 
-fn bench_combine(b: &mut Bencher) {
-    let (i1, l1, i2, l2) = rand::thread_rng().gen();
+fn bench_combine_inner(b: &mut Bencher, i1: u32, l1: u64, i2: u32, l2: u64) {
     let h1 = Hasher::new_with_initial_len(i1, l1);
     let h2 = Hasher::new_with_initial_len(i2, l2);
 
@@ -47,6 +46,21 @@ fn bench_combine(b: &mut Bencher) {
     })
 }
 
+fn bench_combine_16(b: &mut Bencher) {
+    let (i1, l1, i2, l2): (u32, u64, u32, u16) = rand::thread_rng().gen();
+    bench_combine_inner(b, i1, l1, i2, u64::from(l2))
+}
+
+fn bench_combine_32(b: &mut Bencher) {
+    let (i1, l1, i2, l2): (u32, u64, u32, u32) = rand::thread_rng().gen();
+    bench_combine_inner(b, i1, l1, i2, u64::from(l2))
+}
+
+fn bench_combine_64(b: &mut Bencher) {
+    let (i1, l1, i2, l2): (u32, u64, u32, u64) = rand::thread_rng().gen();
+    bench_combine_inner(b, i1, l1, i2, l2)
+}
+
 bencher::benchmark_group!(
     bench_baseline,
     bench_kilobyte_baseline,
@@ -57,5 +71,10 @@ bencher::benchmark_group!(
     bench_kilobyte_specialized,
     bench_megabyte_specialized
 );
-bencher::benchmark_group!(bench_combine_group, bench_combine);
-bencher::benchmark_main!(bench_baseline, bench_specialized, bench_combine_group);
+bencher::benchmark_group!(
+    bench_combine,
+    bench_combine_16,
+    bench_combine_32,
+    bench_combine_64
+);
+bencher::benchmark_main!(bench_baseline, bench_specialized, bench_combine);
diff --git a/src/combine.rs b/src/combine.rs
@@ -1,72 +1,57 @@
-const GF2_DIM: usize = 32;
+const POLY: u32 = 0xedb88320;
 
-fn gf2_matrix_times(mat: &[u32; GF2_DIM], vec: u32) -> u32 {
-    let mut sum = 0;
+static X2N_TABLE: [u32; 32] = [
+    0x00800000, 0x00008000, 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, 0xd7bbfe6a,
+    0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3,
+    0x31fec169, 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, 0x2e4e5eef, 0x4eaba214,
+    0xa8a472c0, 0x429a969e, 0x148d302a, 0xc40ba6d0, 0xc4e22c3c, 0x40000000, 0x20000000, 0x08000000,
+];
 
-    for (i, m) in mat.iter().enumerate() {
-        if vec >> i & 1 == 1 {
-            sum ^= *m;
-        }
-    }
-
-    sum
-}
+// Calculates a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
+// reflected. For speed, this requires that a not be zero.
+fn multiply(a: u32, mut b: u32) -> u32 {
+    let mut m = 1u32 << 31;
+    let mut p = 0u32;
 
-fn gf2_matrix_square(square: &mut [u32; GF2_DIM], mat: &[u32; GF2_DIM]) {
-    for n in 0..GF2_DIM {
-        square[n] = gf2_matrix_times(mat, mat[n]);
-    }
-}
-
-pub(crate) fn combine(mut crc1: u32, crc2: u32, mut len2: u64) -> u32 {
-    let mut even = [0u32; GF2_DIM]; // even-power-of-two zeros operator
-    let mut odd = [0u32; GF2_DIM]; // odd-power-of-two zeros operator
-
-    // degenerate case (also disallow negative lengths)
-    if len2 == 0 {
-        return crc1;
-    }
-
-    // put operator for one zero bit in odd
-    odd[0] = 0xedb88320; // CRC-32 polynomial
-    for (i, r) in odd[1..].iter_mut().enumerate() {
-        *r = 1 << i;
-    }
-
-    // put operator for two zero bits in even
-    gf2_matrix_square(&mut even, &odd);
-
-    // put operator for four zero bits in odd
-    gf2_matrix_square(&mut odd, &even);
-
-    // apply len2 zeros to crc1 (first square will put the operator for one
-    // zero byte, eight zero bits, in even)
     loop {
-        // apply zeros operator for this bit of len2
-        gf2_matrix_square(&mut even, &odd);
-        if len2 & 1 == 1 {
-            crc1 = gf2_matrix_times(&even, crc1);
+        if (a & m) != 0 {
+            p ^= b;
+            if (a & (m - 1)) == 0 {
+                break;
+            }
         }
-        len2 >>= 1;
-
-        // if no more bits set, then done
-        if len2 == 0 {
-            break;
+        m >>= 1;
+        if b & 1 != 0 {
+            b = (b >> 1) ^ POLY;
+        } else {
+            b >>= 1;
         }
+    }
 
-        // another iteration of the loop with odd and even swapped
-        gf2_matrix_square(&mut odd, &even);
-        if len2 & 1 == 1 {
-            crc1 = gf2_matrix_times(&odd, crc1);
-        }
-        len2 >>= 1;
+    p
+}
+
+pub(crate) fn combine(crc1: u32, crc2: u32, len2: u64) -> u32 {
+    let mut p = 1u32 << 31; // x^0 == 1
+    let n = 64 - len2.leading_zeros();
 
-        // if no more bits set, then done
-        if len2 == 0 {
-            break;
+    for i in 0..n {
+        if (len2 >> i & 1) != 0 {
+            p = multiply(X2N_TABLE[(i & 0x1F) as usize], p);
         }
     }
 
-    // return combined crc
-    crc1 ^ crc2
+    multiply(p, crc1) ^ crc2
+}
+
+#[test]
+fn golden() {
+    assert_eq!(
+        combine(0xB8AD0532, 0x804754D9, 0x19B77C403D9D90EE),
+        940758956
+    );
+    assert_eq!(
+        combine(0xF310DC54, 0x8B65DF79, 0x2F0327F1309076FF),
+        3454617599
+    );
 }