diff --git a/Cargo.toml b/Cargo.toml index c1a797f91..be3336c1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ license = "MIT" [dependencies] cfg-if = { version = "0.1", default-features = false } -static_assertions = { version = "1.0", default-features = false } +static_assertions = { version = "1.1", default-features = false } rand = { version = ">= 0.3.10, < 0.8", optional = true } serde = { version = "1.0", features = ["derive"], optional = true} digest = { version = "0.9", default-features = false, optional = true } diff --git a/comparison/src/c_xxhash.rs b/comparison/src/c_xxhash.rs index 0fa9284c7..12f6eefd1 100644 --- a/comparison/src/c_xxhash.rs +++ b/comparison/src/c_xxhash.rs @@ -16,17 +16,17 @@ mod ffi { } extern "C" { - pub fn XXH32(input: *const c_void, length: size_t, seed: u32) -> XXH32_hash_t; - pub fn XXH64(input: *const c_void, length: size_t, seed: u64) -> XXH64_hash_t; + pub fn XXH32(input: *const c_void, length: size_t, seed: XXH32_hash_t) -> XXH32_hash_t; + pub fn XXH64(input: *const c_void, length: size_t, seed: XXH64_hash_t) -> XXH64_hash_t; pub fn XXH3_64bits_withSeed( - data: *const ::std::os::raw::c_void, - len: usize, - seed: ::std::os::raw::c_ulonglong, + data: *const c_void, + len: size_t, + seed: XXH64_hash_t, ) -> XXH64_hash_t; pub fn XXH3_128bits_withSeed( - data: *const ::std::os::raw::c_void, - len: usize, - seed: ::std::os::raw::c_ulonglong, + data: *const c_void, + len: size_t, + seed: XXH64_hash_t, ) -> XXH128_hash_t; } } diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs index 4a0022ba3..8743598d4 100644 --- a/comparison/src/lib.rs +++ b/comparison/src/lib.rs @@ -3,7 +3,7 @@ use proptest::{collection::vec as propvec, prelude::*}; use std::hash::Hasher; #[cfg(test)] -use twox_hash::{XxHash32, XxHash64}; +use twox_hash::{xxh3, HasherExt, XxHash32, XxHash64}; pub mod c_xxhash; @@ -12,6 +12,12 @@ pub fn hash_once(mut hasher: impl Hasher, data: &[u8]) -> u64 { hasher.finish() } +#[cfg(test)] +pub fn hash_once_ext(mut hasher: impl HasherExt, data: &[u8]) -> u128 { + hasher.write(&data); + hasher.finish_ext() +} + #[cfg(test)] fn hash_by_chunks(mut hasher: impl Hasher, mut data: &[u8], chunk_sizes: &[usize]) -> u64 { for &chunk_size in chunk_sizes { @@ -23,6 +29,17 @@ fn hash_by_chunks(mut hasher: impl Hasher, mut data: &[u8], chunk_sizes: &[usize hasher.finish() } +#[cfg(test)] +fn hash_by_chunks_ext(mut hasher: impl HasherExt, mut data: &[u8], chunk_sizes: &[usize]) -> u128 { + for &chunk_size in chunk_sizes { + let (this_chunk, remaining) = data.split_at(chunk_size); + hasher.write(this_chunk); + data = remaining; + } + + hasher.finish_ext() +} + prop_compose! { fn data_and_offset () @@ -81,6 +98,41 @@ proptest! { prop_assert_eq!(our_result, their_result as u64); } + + #[test] + fn same_results_as_c_for_xxh3_64_bit(seed: u64, data: Vec) { + let our_result = hash_once(xxh3::Hash64::with_seed(seed), &data); + let their_result = c_xxhash::xxh3_hash64(&data, seed); + + prop_assert_eq!(our_result, their_result); + } + + #[test] + fn same_results_as_c_with_offset_for_xxh3_64_bit(seed: u64, (data, offset) in data_and_offset()) { + let data = &data[offset..]; + let our_result = hash_once(xxh3::Hash64::with_seed(seed), data); + let their_result = c_xxhash::xxh3_hash64(data, seed); + + prop_assert_eq!(our_result, their_result); + } + + #[test] + fn same_results_as_c_for_xxh3_128_bit(seed: u64, data: Vec) { + let our_result = hash_once_ext(xxh3::Hash128::with_seed(seed), &data); + let their_result = c_xxhash::xxh3_hash128(&data, seed); + + prop_assert_eq!(our_result, their_result); + } + + #[test] + fn same_results_as_c_with_offset_for_xxh3_128_bit(seed: u64, (data, offset) in data_and_offset()) { + let data = &data[offset..]; + let our_result = hash_once_ext(xxh3::Hash128::with_seed(seed), data); + let their_result = c_xxhash::xxh3_hash128(data, seed); + + prop_assert_eq!(our_result, their_result); + } + } proptest! { @@ -101,4 +153,21 @@ proptest! { prop_assert_eq!(chunked_result, monolithic_result); } + + #[test] + fn same_results_with_many_chunks_as_one_for_xxh3_64_bit(seed: u64, (data, chunk_sizes) in data_and_chunk_sizes()) { + let chunked_result = hash_by_chunks(xxh3::Hash64::with_seed(seed), &data, &chunk_sizes); + let monolithic_result = hash_once(xxh3::Hash64::with_seed(seed), &data); + + prop_assert_eq!(chunked_result, monolithic_result); + } + + #[test] + fn same_results_with_many_chunks_as_one_for_xxh3_128_bit(seed: u64, (data, chunk_sizes) in data_and_chunk_sizes()) { + let chunked_result = hash_by_chunks_ext(xxh3::Hash128::with_seed(seed), &data, &chunk_sizes); + let monolithic_result = hash_once_ext(xxh3::Hash128::with_seed(seed), &data); + + prop_assert_eq!(chunked_result, monolithic_result); + } + } diff --git a/comparison/xxHash b/comparison/xxHash index d7f47bc3b..94e5f23e7 160000 --- a/comparison/xxHash +++ b/comparison/xxHash @@ -1 +1 @@ -Subproject commit d7f47bc3bf1ca767b82eda6ada557ba02dc36e83 +Subproject commit 94e5f23e736f2bb67ebdf90727353e65344f9fc0 diff --git a/src/lib.rs b/src/lib.rs index 3b6e19751..4ea6a7d1a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,7 +55,7 @@ mod digest_support; pub use crate::sixty_four::XxHash64; pub use crate::thirty_two::XxHash32; -pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64}; +pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64, HasherExt}; /// A backwards compatibility type alias. Consider directly using /// `XxHash64` instead. diff --git a/src/xxh3.rs b/src/xxh3.rs index 22dedac78..5e2839011 100644 --- a/src/xxh3.rs +++ b/src/xxh3.rs @@ -1,24 +1,60 @@ -//! The in-progress XXH3 algorithm. +//! The XXH3 algorithm. //! -//! Please read [the notes in original implementation][warning] to -//! learn about when to use these algorithms. Specifically, the -//! version of code this crate reproduces says: +//! XXH3 is a new hash algorithm featuring: +//! - Improved speed for both small and large inputs +//! - True 64-bit and 128-bit outputs +//! - SIMD acceleration +//! - Improved 32-bit viability //! -//! > The algorithm is currently in development, meaning its return -//! values might still change in future versions. However, the API -//! is stable, and can be used in production, typically for -//! generation of ephemeral hashes (produced and consumed in same -//! session). +//! Speed analysis methodology is explained here: //! -//! [warning]: https://github.com/Cyan4973/xxHash#new-hash-algorithms +//! +//! +//! In general, expect XXH3 to run about ~2x faster on large inputs and >3x +//! faster on small ones compared to XXH64, though exact differences depend on +//! the platform. +//! +//! The algorithm is portable: Like XXH32 and XXH64, it generates the same hash +//! on all platforms. +//! +//! It benefits greatly from SIMD and 64-bit arithmetic, but does not require it. +//! +//! Almost all 32-bit and 64-bit targets that can run XXH32 smoothly can run +//! XXH3 at competitive speeds, even if XXH64 runs slowly. Further details are +//! explained in the implementation. +//! +//! Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8, +//! ZVector and scalar targets. This can be controlled with the XXH_VECTOR macro. +//! +//! XXH3 offers 2 variants, _64bits and _128bits. +//! When only 64 bits are needed, prefer calling the _64bits variant, as it +//! reduces the amount of mixing, resulting in faster speed on small inputs. +//! +//! It's also generally simpler to manipulate a scalar return type than a struct. +//! +//! The 128-bit version adds additional strength, but it is slightly slower. +//! +//! The XXH3 algorithm is still in development. +//! The results it produces may still change in future versions. +//! +//! Results produced by v0.7.x are not comparable with results from v0.7.y. +//! However, the API is completely stable, and it can safely be used for +//! ephemeral data (local sessions). +//! +//! Avoid storing values in long-term storage until the algorithm is finalized. +//! XXH3's return values will be officially finalized upon reaching v0.8.0. +//! +//! After which, return values of XXH3 and XXH128 will no longer change in +//! future versions. +//! +//! The API supports one-shot hashing, streaming mode, and custom secrets. use alloc::vec::Vec; -use core::convert::TryInto; use core::hash::Hasher; use core::mem; use core::ops::{Deref, DerefMut}; -use core::slice; +use core::ptr; #[cfg(target_arch = "x86")] use core::arch::x86::*; @@ -40,77 +76,112 @@ use crate::thirty_two::{PRIME_1 as PRIME32_1, PRIME_2 as PRIME32_2, PRIME_3 as P #[cfg(feature = "std")] pub use crate::std_support::xxh3::{RandomHashBuilder128, RandomHashBuilder64}; +/// Default 64-bit variant, using default secret and default seed of 0. +/// +/// It's the fastest variant. #[inline(always)] pub fn hash64(data: &[u8]) -> u64 { - hash64_with_seed(data, 0) + hash64_internal(data, 0, &SECRET, || { + hash_long_64bits_internal(data, &SECRET) + }) } +/// This variant generates a custom secret on the fly based on default secret altered using the `seed` value. +/// +/// While this operation is decently fast, note that it's not completely free. +/// Note: seed==0 produces the same results as XXH3_64bits(). #[inline(always)] pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 { - let len = data.len(); + hash64_internal(data, seed, &SECRET, || { + if seed == 0 { + hash_long_64bits_internal(data, &SECRET) + } else { + let secret = Secret::with_seed(seed); - if len <= 16 { - hash_len_0to16_64bits(data, len, &SECRET, seed) - } else if len <= 128 { - hash_len_17to128_64bits(data, len, &SECRET, seed) - } else if len <= MIDSIZE_MAX { - hash_len_129to240_64bits(data, len, &SECRET, seed) - } else { - hash_long_64bits_with_seed(data, len, seed) - } + hash_long_64bits_internal(data, &secret) + } + }) } +/// Default 64-bit variant, using a secret and default seed of 0. +/// +/// It's possible to provide any blob of bytes as a "secret" to generate the hash. +/// This makes it more difficult for an external actor to prepare an intentional collision. +/// The main condition is that secretSize *must* be large enough (>= Secret::SIZE_MIN). +/// However, the quality of produced hash values depends on secret's entropy. +/// Technically, the secret must look like a bunch of random bytes. +/// Avoid "trivial" or structured data such as repeated sequences or a text document. +/// Whenever unsure about the "randomness" of the blob of bytes, +/// consider relabelling it as a "custom seed" instead, +/// and employ "XXH3_generateSecret()" (see below) +/// to generate a high entropy secret derived from the custom seed. #[inline(always)] pub fn hash64_with_secret(data: &[u8], secret: &[u8]) -> u64 { - debug_assert!(secret.len() >= SECRET_SIZE_MIN); + hash64_internal(data, 0, secret, || hash_long_64bits_internal(data, secret)) +} + +#[inline(always)] +fn hash64_internal(data: &[u8], seed: u64, secret: &[u8], hash_long_64bits: F) -> u64 +where + F: FnOnce() -> u64, +{ + debug_assert!(secret.len() >= Secret::SIZE_MIN); let len = data.len(); if len <= 16 { - hash_len_0to16_64bits(data, len, secret, 0) + hash_len_0to16_64bits(data, len, secret, seed) } else if len <= 128 { - hash_len_17to128_64bits(data, len, secret, 0) + hash_len_17to128_64bits(data, len, secret, seed) } else if len <= MIDSIZE_MAX { - hash_len_129to240_64bits(data, len, secret, 0) + hash_len_129to240_64bits(data, len, secret, seed) } else { - hash_long_64bits_with_secret(data, len, secret) + hash_long_64bits() } } #[inline(always)] pub fn hash128(data: &[u8]) -> u128 { - hash128_with_seed(data, 0) + hash128_internal(data, 0, &SECRET, || { + hash_long_128bits_internal(data, &SECRET) + }) } #[inline(always)] pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 { - let len = data.len(); + hash128_internal(data, seed, &SECRET, || { + if seed == 0 { + hash_long_128bits_internal(data, &SECRET) + } else { + let secret = Secret::with_seed(seed); - if len <= 16 { - hash_len_0to16_128bits(data, len, &SECRET, seed) - } else if len <= 128 { - hash_len_17to128_128bits(data, len, &SECRET, seed) - } else if len <= MIDSIZE_MAX { - hash_len_129to240_128bits(data, len, &SECRET, seed) - } else { - hash_long_128bits_with_seed(data, len, seed) - } + hash_long_128bits_internal(data, &secret) + } + }) } #[inline(always)] pub fn hash128_with_secret(data: &[u8], secret: &[u8]) -> u128 { - debug_assert!(secret.len() >= SECRET_SIZE_MIN); + hash128_internal(data, 0, secret, || hash_long_128bits_internal(data, secret)) +} + +#[inline(always)] +fn hash128_internal(data: &[u8], seed: u64, secret: &[u8], hash_long_128bits: F) -> u128 +where + F: FnOnce() -> u128, +{ + debug_assert!(secret.len() >= Secret::SIZE_MIN); let len = data.len(); if len <= 16 { - hash_len_0to16_128bits(data, len, secret, 0) + hash_len_0to16_128bits(data, len, secret, seed) } else if len <= 128 { - hash_len_17to128_128bits(data, len, secret, 0) + hash_len_17to128_128bits(data, len, secret, seed) } else if len <= MIDSIZE_MAX { - hash_len_129to240_128bits(data, len, secret, 0) + hash_len_129to240_128bits(data, len, secret, seed) } else { - hash_long_128bits_with_secret(data, len, secret) + hash_long_128bits() } } @@ -137,7 +208,7 @@ impl Hasher for Hash64 { #[inline(always)] fn write(&mut self, bytes: &[u8]) { - self.0.update(bytes, AccWidth::Acc64Bits) + self.0.update(bytes) } } @@ -164,7 +235,7 @@ impl Hasher for Hash128 { #[inline(always)] fn write(&mut self, bytes: &[u8]) { - self.0.update(bytes, AccWidth::Acc128Bits) + self.0.update(bytes) } } @@ -183,9 +254,6 @@ impl HasherExt for Hash128 { * XXH3 default settings * ========================================== */ -const SECRET_DEFAULT_SIZE: usize = 192; -const SECRET_SIZE_MIN: usize = 136; - const SECRET: Secret = Secret([ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, @@ -203,23 +271,27 @@ const SECRET: Secret = Secret([ #[repr(align(64))] #[derive(Clone)] -struct Secret([u8; SECRET_DEFAULT_SIZE]); +struct Secret([u8; Secret::DEFAULT_SIZE]); -const_assert_eq!(mem::size_of::() % 16, 0); +impl Deref for Secret { + type Target = [u8]; -impl Default for Secret { #[inline(always)] - fn default() -> Self { - SECRET + fn deref(&self) -> &Self::Target { + &self.0[..] } } -impl Deref for Secret { - type Target = [u8]; +const_assert!(Secret::DEFAULT_SIZE >= Secret::SIZE_MIN); +const_assert_eq!(mem::size_of::() % mem::size_of::(), 0); + +impl Secret { + pub const SIZE_MIN: usize = 136; + pub const DEFAULT_SIZE: usize = 192; #[inline(always)] - fn deref(&self) -> &Self::Target { - &self.0[..] + pub fn with_seed(seed: u64) -> Self { + Secret(unsafe { init_custom_secret(seed) }) } } @@ -256,8 +328,8 @@ cfg_if! { where E: serde::de::Error, { - if v.len() == SECRET_DEFAULT_SIZE { - let mut secret = [0; SECRET_DEFAULT_SIZE]; + if v.len() == Secret::DEFAULT_SIZE { + let mut secret = [0; Secret::DEFAULT_SIZE]; secret.copy_from_slice(v); @@ -270,43 +342,21 @@ cfg_if! { } } -impl Secret { - #[inline(always)] - pub fn with_seed(seed: u64) -> Self { - let mut secret = [0; SECRET_DEFAULT_SIZE]; - - for off in (0..SECRET_DEFAULT_SIZE).step_by(16) { - secret[off..].write_u64_le(SECRET[off..].read_u64_le().wrapping_add(seed)); - secret[off + 8..].write_u64_le(SECRET[off + 8..].read_u64_le().wrapping_sub(seed)); - } +#[cfg_attr(target_feature = "avx2", repr(align(32)))] +#[cfg_attr( + all(not(target_feature = "avx2"), target_feature = "sse2"), + repr(align(16)) +)] +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Clone)] +struct Acc([u64; ACC_NB]); - Secret(secret) - } -} +const_assert_eq!(Acc::SIZE, 64); -cfg_if! { - if #[cfg(target_feature = "avx2")] { - #[repr(align(32))] - #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] - #[derive(Clone)] - struct Acc([u64; ACC_NB]); - } else if #[cfg(target_feature = "sse2")] { - #[repr(align(16))] - #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] - #[derive(Clone)] - struct Acc([u64; ACC_NB]); - } else { - #[repr(align(8))] - #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] - #[derive(Clone)] - struct Acc([u64; ACC_NB]); - } +impl Acc { + pub const SIZE: usize = mem::size_of::(); } -const ACC_SIZE: usize = mem::size_of::(); - -const_assert_eq!(ACC_SIZE, 64); - impl Default for Acc { #[inline(always)] fn default() -> Self { @@ -339,41 +389,61 @@ impl DerefMut for Acc { } } -trait Buf { - fn read_u32_le(&self) -> u32; +trait ReadU32 { + fn read_le32(&self) -> u32; +} - fn read_u64_le(&self) -> u64; +trait ReadU64 { + fn read_le64(&self) -> u64; } -trait BufMut { - fn write_u32_le(&mut self, n: u32); +trait WriteU32 { + fn write_le32(&mut self, n: u32); +} - fn write_u64_le(&mut self, n: u64); +trait WriteU64 { + fn write_le64(&mut self, n: u64); } -impl Buf for [u8] { +impl ReadU32 for *const u32 { #[inline(always)] - fn read_u32_le(&self) -> u32 { - let buf = &self[..mem::size_of::()]; - u32::from_le_bytes(buf.try_into().unwrap()) + fn read_le32(&self) -> u32 { + u32::from_le(unsafe { self.read() }) } +} +impl ReadU64 for *const u64 { #[inline(always)] - fn read_u64_le(&self) -> u64 { - let buf = &self[..mem::size_of::()]; - u64::from_le_bytes(buf.try_into().unwrap()) + fn read_le64(&self) -> u64 { + u64::from_le(unsafe { self.read() }) } } -impl BufMut for [u8] { +impl ReadU32 for [u8] { #[inline(always)] - fn write_u32_le(&mut self, n: u32) { - self[..mem::size_of::()].copy_from_slice(&n.to_le_bytes()[..]); + fn read_le32(&self) -> u32 { + u32::from_le(unsafe { self.as_ptr().cast::().read() }) } +} + +impl ReadU64 for [u8] { + #[inline(always)] + fn read_le64(&self) -> u64 { + u64::from_le(unsafe { self.as_ptr().cast::().read() }) + } +} +impl WriteU32 for [u8] { #[inline(always)] - fn write_u64_le(&mut self, n: u64) { - self[..mem::size_of::()].copy_from_slice(&n.to_le_bytes()[..]); + fn write_le32(&mut self, n: u32) { + unsafe { self.as_mut_ptr().cast::().write(u32::to_le(n)) } + } +} + +impl WriteU64 for [u8] { + #[inline(always)] + fn write_le64(&mut self, n: u64) { + unsafe { self.as_mut_ptr().cast::().write(u64::to_le(n)) } } } @@ -382,124 +452,135 @@ impl BufMut for [u8] { * ========================================== */ #[inline(always)] -fn hash_len_0to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { +fn hash_len_0to16_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!(len <= 16); if len > 8 { - hash_len_9to16_64bits(data, len, key, seed) + hash_len_9to16_64bits(data, len, secret, seed) } else if len >= 4 { - hash_len_4to8_64bits(data, len, key, seed) + hash_len_4to8_64bits(data, len, secret, seed) } else if len > 0 { - hash_len_1to3_64bits(data, len, key, seed) + hash_len_1to3_64bits(data, len, secret, seed) } else { - 0 + xxh64_avalanche(seed ^ (secret[56..].read_le64() ^ secret[64..].read_le64())) } } #[inline(always)] -fn hash_len_9to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { +fn hash_len_9to16_64bits(input: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!((9..=16).contains(&len)); - let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); - let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); + let bitflip1 = (secret[24..].read_le64() ^ secret[32..].read_le64()).wrapping_add(seed); + let bitflip2 = (secret[40..].read_le64() ^ secret[48..].read_le64()).wrapping_sub(seed); + let input_lo = input.read_le64() ^ bitflip1; + let input_hi = input[len - 8..].read_le64() ^ bitflip2; let acc = (len as u64) - .wrapping_add(ll1) - .wrapping_add(ll2) - .wrapping_add(mul128_fold64(ll1, ll2)); + .wrapping_add(input_lo.swap_bytes()) + .wrapping_add(input_hi) + .wrapping_add(mul128_fold64(input_lo, input_hi)); avalanche(acc) } #[inline(always)] -fn hash_len_4to8_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { +fn hash_len_4to8_64bits(input: &[u8], len: usize, secret: &[u8], mut seed: u64) -> u64 { debug_assert!((4..=8).contains(&len)); - let in1 = u64::from(data.read_u32_le()); - let in2 = u64::from(data[len - 4..].read_u32_le()); - let in64 = in1.wrapping_add(in2 << 32); - let keyed = in64 ^ key.read_u64_le().wrapping_add(seed); - let mix64 = - (len as u64).wrapping_add((keyed ^ (keyed >> 51)).wrapping_mul(u64::from(PRIME32_1))); + seed ^= u64::from((seed as u32).swap_bytes()) << 32; + + let input1 = u64::from(input.read_le32()); + let input2 = u64::from(input[len - 4..].read_le32()); + let bitflip = (secret[8..].read_le64() ^ secret[16..].read_le64()).wrapping_sub(seed); + let input64 = input2.wrapping_add(input1 << 32); + let keyed = input64 ^ bitflip; - avalanche((mix64 ^ (mix64 >> 47)).wrapping_mul(PRIME64_2)) + rrmxmx(keyed, len) } #[inline(always)] -fn hash_len_1to3_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { +fn hash_len_1to3_64bits(input: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!((1..=3).contains(&len)); - let c1 = u32::from(data[0]); - let c2 = u32::from(data[len >> 1]); - let c3 = u32::from(data[len - 1]); - let combined = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); - let keyed = u64::from(combined) ^ u64::from(key.read_u32_le()).wrapping_add(seed); - let mixed = keyed.wrapping_mul(PRIME64_1); + /* + * len = 1: combined = { input[0], 0x01, input[0], input[0] } + * len = 2: combined = { input[1], 0x02, input[0], input[1] } + * len = 3: combined = { input[2], 0x03, input[0], input[1] } + */ + let c1 = u32::from(input[0]); + let c2 = u32::from(input[len >> 1]); + let c3 = u32::from(input[len - 1]); + let combined = (c1 << 16) | (c2 << 24) | c3 | ((len as u32) << 8); + let bitflip = u64::from(secret.read_le32() ^ secret[4..].read_le32()).wrapping_add(seed); + let keyed = u64::from(combined) ^ bitflip; - avalanche(mixed) + xxh64_avalanche(keyed) } +/// For mid range keys, XXH3 uses a Mum-hash variant. #[inline(always)] fn hash_len_17to128_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!((17..=128).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); + debug_assert!(secret.len() >= Secret::SIZE_MIN); - let mut acc = PRIME64_1.wrapping_mul(len as u64); + let mut acc = (len as u64).wrapping_mul(PRIME64_1); if len > 32 { if len > 64 { if len > 96 { acc = acc - .wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)) - .wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); + .wrapping_add(mix16bytes(&data[48..], &secret[96..], seed)) + .wrapping_add(mix16bytes(&data[len - 64..], &secret[112..], seed)); } acc = acc - .wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)) - .wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); + .wrapping_add(mix16bytes(&data[32..], &secret[64..], seed)) + .wrapping_add(mix16bytes(&data[len - 48..], &secret[80..], seed)); } acc = acc - .wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)) - .wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); + .wrapping_add(mix16bytes(&data[16..], &secret[32..], seed)) + .wrapping_add(mix16bytes(&data[len - 32..], &secret[48..], seed)); } acc = acc - .wrapping_add(mix_16bytes(data, &secret[..], seed)) - .wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); + .wrapping_add(mix16bytes(data, &secret, seed)) + .wrapping_add(mix16bytes(&data[len - 16..], &secret[16..], seed)); avalanche(acc) } const MIDSIZE_MAX: usize = 240; -const MIDSIZE_STARTOFFSET: usize = 3; -const MIDSIZE_LASTOFFSET: usize = 17; +const MIDSIZE_START_OFFSET: usize = 3; +const MIDSIZE_LAST_OFFSET: usize = 17; #[inline(always)] fn hash_len_129to240_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { debug_assert!((129..=MIDSIZE_MAX).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); + debug_assert!(secret.len() >= Secret::SIZE_MIN); let acc = (len as u64).wrapping_mul(PRIME64_1); let acc = (0..8).fold(acc, |acc, i| { - acc.wrapping_add(mix_16bytes(&data[16 * i..], &secret[16 * i..], seed)) + acc.wrapping_add(mix16bytes(&data[16 * i..], &secret[16 * i..], seed)) }); let acc = avalanche(acc); - let nb_rounds = len / 16; - debug_assert!(nb_rounds >= 8); + let rounds = len / 16; + debug_assert!(rounds >= 8); - let acc = (8..nb_rounds).fold(acc, |acc, i| { - acc.wrapping_add(mix_16bytes( + let acc = (8..rounds).fold(acc, |acc, i| { + acc.wrapping_add(mix16bytes( &data[16 * i..], - &secret[16 * (i - 8) + MIDSIZE_STARTOFFSET..], + &secret[16 * (i - 8) + MIDSIZE_START_OFFSET..], seed, )) }); - avalanche(acc.wrapping_add(mix_16bytes( + let acc = acc.wrapping_add(mix16bytes( &data[len - 16..], - &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], + &secret[Secret::SIZE_MIN - MIDSIZE_LAST_OFFSET..], seed, - ))) + )); + + avalanche(acc) } /* ========================================== @@ -510,170 +591,173 @@ const STRIPE_LEN: usize = 64; const SECRET_CONSUME_RATE: usize = 8; // nb of secret bytes consumed at each accumulation const SECRET_MERGEACCS_START: usize = 11; // do not align on 8, so that secret is different from accumulator const SECRET_LASTACC_START: usize = 7; // do not align on 8, so that secret is different from scrambler -const ACC_NB: usize = STRIPE_LEN / mem::size_of::(); - -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) enum AccWidth { - Acc64Bits, - Acc128Bits, -} - -#[inline(always)] -fn hash_long_64bits_with_default_secret(data: &[u8], len: usize) -> u64 { - hash_long_internal(data, len, &SECRET) -} - -#[inline(always)] -fn hash_long_64bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u64 { - hash_long_internal(data, len, secret) -} - -/// Generate a custom key, based on alteration of default kSecret with the seed, -/// and then use this key for long mode hashing. -/// -/// This operation is decently fast but nonetheless costs a little bit of time. -/// Try to avoid it whenever possible (typically when `seed.is_none()`). -#[inline(always)] -fn hash_long_64bits_with_seed(data: &[u8], len: usize, seed: u64) -> u64 { - if seed == 0 { - hash_long_64bits_with_default_secret(data, len) - } else { - let secret = Secret::with_seed(seed); - hash_long_internal(data, len, &secret) - } -} +const ACC_NB: usize = STRIPE_LEN / mem::size_of::(); #[inline(always)] -fn hash_long_internal(data: &[u8], len: usize, secret: &[u8]) -> u64 { +fn hash_long_64bits_internal(input: &[u8], secret: &[u8]) -> u64 { + let len = input.len(); let mut acc = Acc::default(); - hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc64Bits); + hash_long_internal_loop(&mut acc, input, len, secret); + + debug_assert!(secret.len() >= mem::size_of::() + SECRET_MERGEACCS_START); merge_accs( &acc, &secret[SECRET_MERGEACCS_START..], - (len as u64).wrapping_mul(PRIME64_1), + PRIME64_1.wrapping_mul(len as u64), ) } #[inline(always)] -fn hash_long_internal_loop( - acc: &mut [u64], - data: &[u8], - len: usize, - secret: &[u8], - acc_width: AccWidth, -) { - let secret_len = secret.len(); - let nb_rounds = (secret_len - STRIPE_LEN) / SECRET_CONSUME_RATE; - let block_len = STRIPE_LEN * nb_rounds; - - debug_assert!(secret_len >= SECRET_SIZE_MIN); +fn hash_long_internal_loop(acc: &mut [u64], input: &[u8], len: usize, secret: &[u8]) { + let secret_size = secret.len(); + let nb_stripes_per_block = (secret_size - STRIPE_LEN) / SECRET_CONSUME_RATE; + let block_len = STRIPE_LEN * nb_stripes_per_block; + let nb_blocks = (len - 1) / block_len; - let mut chunks = data.chunks_exact(block_len); + debug_assert!(secret_size >= Secret::SIZE_MIN); - for chunk in &mut chunks { - accumulate(acc, chunk, secret, nb_rounds, acc_width); + for i in 0..nb_blocks { + accumulate(acc, &input[i * block_len..], secret, nb_stripes_per_block); unsafe { - scramble_acc(acc, &secret[secret_len - STRIPE_LEN..]); + scramble_acc(acc, &secret[secret_size - STRIPE_LEN..]); } } /* last partial block */ debug_assert!(len > STRIPE_LEN); - let nb_stripes = (len % block_len) / STRIPE_LEN; + let block_size = block_len * nb_blocks; + let nb_stripes = (len - 1 - block_size) / STRIPE_LEN; - debug_assert!(nb_stripes < (secret_len / SECRET_CONSUME_RATE)); + debug_assert!(nb_stripes < (secret_size / SECRET_CONSUME_RATE)); - accumulate(acc, chunks.remainder(), secret, nb_stripes, acc_width); + accumulate(acc, &input[block_size..], secret, nb_stripes); - /* last stripe */ - if (len & (STRIPE_LEN - 1)) != 0 { - unsafe { - accumulate512( - acc, - &data[len - STRIPE_LEN..], - &secret[secret_len - STRIPE_LEN - SECRET_LASTACC_START..], - acc_width, - ); - } + // last stripe + unsafe { + accumulate512( + acc, + &input[len - STRIPE_LEN..], + &secret[secret_size - STRIPE_LEN - SECRET_LASTACC_START..], + ); } } +const PREFETCH_DIST: isize = 384; + #[inline(always)] -fn accumulate(acc: &mut [u64], data: &[u8], secret: &[u8], nb_stripes: usize, acc_width: AccWidth) { - for n in 0..nb_stripes { +fn accumulate(acc: &mut [u64], input: &[u8], secret: &[u8], nb_stripes: usize) { + for (chunk, secret) in input + .chunks(STRIPE_LEN) + .zip(secret.chunks(SECRET_CONSUME_RATE)) + .take(nb_stripes) + { unsafe { - accumulate512( - acc, - &data[n * STRIPE_LEN..], - &secret[n * SECRET_CONSUME_RATE..], - acc_width, - ); + prefetch(chunk.as_ptr().offset(PREFETCH_DIST).cast()); + accumulate512(acc, chunk, secret); } } } #[inline(always)] -const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 { - ((z << 6) | (y << 4) | (x << 2) | w) as i32 +unsafe fn prefetch(p: *const i8) { + _mm_prefetch(p, _MM_HINT_T0); +} + +#[inline(always)] +const fn _mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 { + (z << 6) | (y << 4) | (x << 2) | w } #[cfg(target_feature = "avx2")] mod avx2 { use super::*; - #[target_feature(enable = "avx2")] - pub(crate) unsafe fn accumulate512( - acc: &mut [u64], - data: &[u8], - keys: &[u8], - acc_width: AccWidth, - ) { - let xacc = acc.as_mut_ptr() as *mut __m256i; - let xdata = data.as_ptr() as *const __m256i; - let xkey = keys.as_ptr() as *const __m256i; + const_assert!((mem::align_of::() % mem::size_of::<__m256i>()) == 0); + + pub unsafe fn accumulate512(acc: &mut [u64], input: &[u8], secret: &[u8]) { + let xacc = acc.as_mut_ptr().cast::<__m256i>(); + let xinput = input.as_ptr().cast::<__m256i>(); + let xsecret = secret.as_ptr().cast::<__m256i>(); for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { - let d = _mm256_loadu_si256(xdata.add(i)); - let k = _mm256_loadu_si256(xkey.add(i)); - let dk = _mm256_xor_si256(d, k); // uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} - let mul = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} - - xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { - let dswap = _mm256_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); - let add = _mm256_add_epi64(xacc.add(i).read(), dswap); - _mm256_add_epi64(mul, add) - } else { - let add = _mm256_add_epi64(xacc.add(i).read(), d); - _mm256_add_epi64(mul, add) - }) + // data_vec = xinput[i]; + let data_vec = _mm256_loadu_si256(xinput.add(i)); + + // key_vec = xsecret[i]; + let key_vec = _mm256_loadu_si256(xsecret.add(i)); + + // data_key = data_vec ^ key_vec; + let data_key = _mm256_xor_si256(data_vec, key_vec); + + // data_key_lo = data_key >> 32; + let data_key_lo = _mm256_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1)); + + // product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); + let product = _mm256_mul_epu32(data_key, data_key_lo); + + // xacc[i] += swap(data_vec); + let data_swap = _mm256_shuffle_epi32(data_vec, _mm_shuffle(1, 0, 3, 2)); + let sum = _mm256_add_epi64(xacc.add(i).read(), data_swap); + + // xacc[i] += product; + xacc.add(i).write(_mm256_add_epi64(product, sum)); } } - #[target_feature(enable = "avx2")] - pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { - let xacc = acc.as_mut_ptr() as *mut __m256i; - let xkey = key.as_ptr() as *const __m256i; + pub unsafe fn scramble_acc(acc: &mut [u64], secret: &[u8]) { + let xacc = acc.as_mut_ptr().cast::<__m256i>(); + let xsecret = secret.as_ptr().cast::<__m256i>(); let prime32 = _mm256_set1_epi32(PRIME32_1 as i32); for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { - let data = xacc.add(i).read(); - let shifted = _mm256_srli_epi64(data, 47); - let data = _mm256_xor_si256(data, shifted); - - let k = _mm256_loadu_si256(xkey.add(i)); - let dk = _mm256_xor_si256(data, k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ - let dk1 = _mm256_mul_epu32(dk, prime32); + // xacc[i] ^= (xacc[i] >> 47) + let acc_vec = xacc.add(i).read(); + let shifted = _mm256_srli_epi64(acc_vec, 47); + let data_vec = _mm256_xor_si256(acc_vec, shifted); + + // xacc[i] ^= xsecret; + let key_vec = _mm256_loadu_si256(xsecret.add(i)); + let data_key = _mm256_xor_si256(data_vec, key_vec); + + // xacc[i] *= XXH_PRIME32_1; + let data_key_hi = _mm256_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1)); + let prod_lo = _mm256_mul_epu32(data_key, prime32); + let prod_hi = _mm256_mul_epu32(data_key_hi, prime32); + xacc.add(i) + .write(_mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32))); + } + } - let d2 = _mm256_shuffle_epi32(dk, 0x31); - let dk2 = _mm256_mul_epu32(d2, prime32); - let dk2h = _mm256_slli_epi64(dk2, 32); + const_assert_eq!(Secret::DEFAULT_SIZE % mem::size_of::<__m256i>(), 0); + const_assert_eq!(Secret::DEFAULT_SIZE / mem::size_of::<__m256i>(), 6); - xacc.add(i).write(_mm256_add_epi64(dk1, dk2h)); - } + #[target_feature(enable = "avx2")] + pub unsafe fn init_custom_secret(seed: u64) -> [u8; Secret::DEFAULT_SIZE] { + let mut secret = mem::MaybeUninit::<[u8; Secret::DEFAULT_SIZE]>::zeroed(); + + let seed64 = seed as i64; + let seed = _mm256_set_epi64x(-seed64, seed64, -seed64, seed64); + let src = SECRET.as_ptr().cast::<__m256i>(); + let dest = secret.as_mut_ptr().cast::<__m256i>(); + + dest.offset(0) + .write(_mm256_add_epi64(_mm256_load_si256(src.offset(0)), seed)); + dest.offset(1) + .write(_mm256_add_epi64(_mm256_load_si256(src.offset(1)), seed)); + dest.offset(2) + .write(_mm256_add_epi64(_mm256_load_si256(src.offset(2)), seed)); + dest.offset(3) + .write(_mm256_add_epi64(_mm256_load_si256(src.offset(3)), seed)); + dest.offset(4) + .write(_mm256_add_epi64(_mm256_load_si256(src.offset(4)), seed)); + dest.offset(5) + .write(_mm256_add_epi64(_mm256_load_si256(src.offset(5)), seed)); + + secret.assume_init() } } @@ -681,116 +765,151 @@ mod avx2 { mod sse2 { use super::*; - #[target_feature(enable = "sse2")] - #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn accumulate512( - acc: &mut [u64], - data: &[u8], - keys: &[u8], - acc_width: AccWidth, - ) { - let xacc = acc.as_mut_ptr() as *mut __m128i; - let xdata = data.as_ptr() as *const __m128i; - let xkey = keys.as_ptr() as *const __m128i; + const_assert_eq!(mem::align_of::() % mem::size_of::<__m128i>(), 0); + + pub unsafe fn accumulate512(acc: &mut [u64], input: &[u8], secret: &[u8]) { + let xacc = acc.as_mut_ptr().cast::<__m128i>(); + let xinput = input.as_ptr().cast::<__m128i>(); + let xsecret = secret.as_ptr().cast::<__m128i>(); for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { - let d = _mm_loadu_si128(xdata.add(i)); - let k = _mm_loadu_si128(xkey.add(i)); - let dk = _mm_xor_si128(d, k); // uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ - let mul = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */ - xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { - let dswap = _mm_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); - let add = _mm_add_epi64(xacc.add(i).read(), dswap); - _mm_add_epi64(mul, add) - } else { - let add = _mm_add_epi64(xacc.add(i).read(), d); - _mm_add_epi64(mul, add) - }) + // data_vec = xinput[i]; + let data_vec = _mm_loadu_si128(xinput.add(i)); + + // key_vec = xsecret[i]; + let key_vec = _mm_loadu_si128(xsecret.add(i)); + + // data_key = data_vec ^ key_vec; + let data_key = _mm_xor_si128(data_vec, key_vec); + + // data_key_lo = data_key >> 32; + let data_key_lo = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1)); + + // product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); + let product = _mm_mul_epu32(data_key, data_key_lo); + + // xacc[i] += swap(data_vec); + let data_swap = _mm_shuffle_epi32(data_vec, _mm_shuffle(1, 0, 3, 2)); + let sum = _mm_add_epi64(xacc.add(i).read(), data_swap); + + // xacc[i] += product; + xacc.add(i).write(_mm_add_epi64(product, sum)); } } - #[target_feature(enable = "sse2")] - #[allow(clippy::cast_ptr_alignment)] - pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { - let xacc = acc.as_mut_ptr() as *mut __m128i; - let xkey = key.as_ptr() as *const __m128i; + pub unsafe fn scramble_acc(acc: &mut [u64], secret: &[u8]) { + let xacc = acc.as_mut_ptr().cast::<__m128i>(); + let xsecret = secret.as_ptr().cast::<__m128i>(); let prime32 = _mm_set1_epi32(PRIME32_1 as i32); for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { - let data = xacc.add(i).read(); - let shifted = _mm_srli_epi64(data, 47); - let data = _mm_xor_si128(data, shifted); + // xacc[i] ^= (xacc[i] >> 47) + let acc_vec = xacc.add(i).read(); + let shifted = _mm_srli_epi64(acc_vec, 47); + let data_vec = _mm_xor_si128(acc_vec, shifted); + + // xacc[i] ^= xsecret[i]; + let key_vec = _mm_loadu_si128(xsecret.add(i)); + let data_key = _mm_xor_si128(data_vec, key_vec); + + // xacc[i] *= XXH_PRIME32_1; + let data_key_hi = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1)); + let prod_lo = _mm_mul_epu32(data_key, prime32); + let prod_hi = _mm_mul_epu32(data_key_hi, prime32); + xacc.add(i) + .write(_mm_add_epi64(prod_lo, _mm_slli_epi32(prod_hi, 32))); + } + } - let k = _mm_loadu_si128(xkey.add(i)); - let dk = _mm_xor_si128(data, k); + const_assert_eq!(Secret::DEFAULT_SIZE % mem::size_of::<__m128i>(), 0); - let dk1 = _mm_mul_epu32(dk, prime32); + pub unsafe fn init_custom_secret(seed: u64) -> [u8; Secret::DEFAULT_SIZE] { + let mut secret = mem::MaybeUninit::<[u8; Secret::DEFAULT_SIZE]>::zeroed(); - let d2 = _mm_shuffle_epi32(dk, 0x31); - let dk2 = _mm_mul_epu32(d2, prime32); - let dk2h = _mm_slli_epi64(dk2, 32); + let seed64 = seed as i64; + let seed = _mm_set_epi64x(-seed64, seed64); - xacc.add(i).write(_mm_add_epi64(dk1, dk2h)); + let rounds = Secret::DEFAULT_SIZE / mem::size_of::<__m128i>(); + let src = SECRET.as_ptr().cast::(); + let dest = secret.as_mut_ptr().cast::<__m128i>(); + + for i in 0..rounds { + dest.add(i).write(_mm_add_epi64( + _mm_castps_si128(_mm_load_ps(src.add(i * 4))), + seed, + )) } + + secret.assume_init() } } -#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))] +/// scalar variants - universal +#[cfg(not(any(target_feature = "sse2", target_feature = "avx2")))] mod generic { use super::*; + const_assert_eq!(mem::align_of::() % mem::size_of::(), 0); + #[inline(always)] - pub(crate) unsafe fn accumulate512( - acc: &mut [u64], - data: &[u8], - key: &[u8], - acc_width: AccWidth, - ) { - for i in (0..ACC_NB).step_by(2) { - let in1 = data[8 * i..].read_u64_le(); - let in2 = data[8 * (i + 1)..].read_u64_le(); - let key1 = key[8 * i..].read_u64_le(); - let key2 = key[8 * (i + 1)..].read_u64_le(); - let data_key1 = key1 ^ in1; - let data_key2 = key2 ^ in2; - acc[i] = acc[i].wrapping_add(mul32_to64(data_key1, data_key1 >> 32)); - acc[i + 1] = acc[i].wrapping_add(mul32_to64(data_key2, data_key2 >> 32)); - - if acc_width == AccWidth::Acc128Bits { - acc[i] = acc[i].wrapping_add(in2); - acc[i + 1] = acc[i + 1].wrapping_add(in1); - } else { - acc[i] = acc[i].wrapping_add(in1); - acc[i + 1] = acc[i + 1].wrapping_add(in2); - } + pub unsafe fn accumulate512(acc: &mut [u64], data: &[u8], secret: &[u8]) { + let xinput = data.as_ptr().cast::(); + let xsecret = secret.as_ptr().cast::(); + + for i in 0..ACC_NB { + let data_val = xinput.add(i).read_le64(); + let data_key = data_val ^ xsecret.add(i).read_le64(); + acc[i ^ 1] = acc[i ^ 1].wrapping_add(data_val); // swap adjacent lanes + acc[i] = acc[i].wrapping_add(mul32_to64(data_key as u32, (data_key >> 32) as u32)); } } #[inline(always)] - fn mul32_to64(a: u64, b: u64) -> u64 { - (a & 0xFFFFFFFF).wrapping_mul(b & 0xFFFFFFFF) + fn mul32_to64(a: u32, b: u32) -> u64 { + u64::from(a).wrapping_mul(u64::from(b)) } #[inline(always)] - pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { + pub unsafe fn scramble_acc(acc: &mut [u64], secret: &[u8]) { + let xsecret = secret.as_ptr().cast::(); + for i in 0..ACC_NB { - let key64 = key[8 * i..].read_u64_le(); + let key64 = xsecret.add(i).read_le64(); let mut acc64 = acc[i]; - acc64 ^= acc64 >> 47; + acc64 = xorshift64(acc64, 47); acc64 ^= key64; acc64 = acc64.wrapping_mul(u64::from(PRIME32_1)); acc[i] = acc64; } } + + const_assert_eq!(Secret::DEFAULT_SIZE % mem::size_of::(), 0); + + pub unsafe fn init_custom_secret(seed: u64) -> [u8; Secret::DEFAULT_SIZE] { + let mut secret = mem::MaybeUninit::<[u8; Secret::DEFAULT_SIZE]>::zeroed(); + + let rounds = Secret::DEFAULT_SIZE / mem::size_of::(); + let src = SECRET.as_ptr().cast::(); + let dest = secret.as_mut_ptr().cast::(); + + for i in 0..rounds { + let lo = src.add(i * 2).read_le64().wrapping_add(seed); + let hi = src.add(i * 2 + 1).read_le64().wrapping_sub(seed); + dest.add(i * 2).write(lo); + dest.add(i * 2 + 1).write(hi); + } + + secret.assume_init() + } } cfg_if! { if #[cfg(target_feature = "avx2")] { - use avx2::{accumulate512, scramble_acc}; + use avx2::{accumulate512, scramble_acc, init_custom_secret}; } else if #[cfg(target_feature = "sse2")] { - use sse2::{accumulate512, scramble_acc}; + use sse2::{accumulate512, scramble_acc, init_custom_secret}; } else { - use generic::{accumulate512, scramble_acc}; + use generic::{accumulate512, scramble_acc, init_custom_secret}; } } @@ -808,36 +927,97 @@ fn merge_accs(acc: &[u64], secret: &[u8], start: u64) -> u64 { #[inline(always)] fn mix2accs(acc: &[u64], secret: &[u8]) -> u64 { mul128_fold64( - acc[0] ^ secret.read_u64_le(), - acc[1] ^ secret[8..].read_u64_le(), + acc[0] ^ secret.read_le64(), + acc[1] ^ secret[8..].read_le64(), ) } #[inline(always)] -fn mix_16bytes(data: &[u8], key: &[u8], seed: u64) -> u64 { - let ll1 = data.read_u64_le(); - let ll2 = data[8..].read_u64_le(); +fn mix16bytes(input: &[u8], secret: &[u8], seed: u64) -> u64 { + let input_lo = input.read_le64(); + let input_hi = input[8..].read_le64(); mul128_fold64( - ll1 ^ key.read_u64_le().wrapping_add(seed), - ll2 ^ key[8..].read_u64_le().wrapping_sub(seed), + input_lo ^ secret.read_le64().wrapping_add(seed), + input_hi ^ secret[8..].read_le64().wrapping_sub(seed), ) } +#[inline(always)] +fn mix32bytes( + acc: (u64, u64), + input_1: &[u8], + input_2: &[u8], + secret: &[u8], + seed: u64, +) -> (u64, u64) { + let (mut low64, mut high64) = acc; + + low64 = low64.wrapping_add(mix16bytes(input_1, secret, seed)); + low64 ^= input_2.read_le64().wrapping_add(input_2[8..].read_le64()); + high64 = high64.wrapping_add(mix16bytes(input_2, &secret[16..], seed)); + high64 ^= input_1.read_le64().wrapping_add(input_1[8..].read_le64()); + + (low64, high64) +} + #[inline(always)] fn mul128_fold64(ll1: u64, ll2: u64) -> u64 { - let lll = u128::from(ll1).wrapping_mul(u128::from(ll2)); + let product = u128::from(ll1).wrapping_mul(u128::from(ll2)); - (lll as u64) ^ ((lll >> 64) as u64) + (product as u64) ^ ((product >> 64) as u64) } +/// Calculates a 32-bit to 64-bit long multiply. +#[inline(always)] +fn mult32to64(lhs: u32, rhs: u32) -> u64 { + u64::from(lhs).wrapping_mul(u64::from(rhs)) +} + +/// Calculates a 64->128-bit long multiply. +#[inline(always)] +fn mult64to128(lhs: u64, rhs: u64) -> (u64, u64) { + let product = u128::from(lhs).wrapping_mul(u128::from(rhs)); + + (product as u64, (product >> 64) as u64) +} + +#[inline(always)] +fn xorshift64(v64: u64, shift: usize) -> u64 { + v64 ^ (v64 >> shift) +} + +#[inline(always)] +fn xxh64_avalanche(mut hash: u64) -> u64 { + hash ^= hash >> 33; + hash = hash.wrapping_mul(PRIME64_2); + hash ^= hash >> 29; + hash = hash.wrapping_mul(PRIME64_3); + hash ^= hash >> 32; + hash +} + +/// This is a fast avalanche stage, +/// suitable when input bits are already partially mixed #[inline(always)] fn avalanche(mut h64: u64) -> u64 { h64 ^= h64 >> 37; - h64 = h64.wrapping_mul(PRIME64_3); + h64 = h64.wrapping_mul(0x165667919E3779F9); h64 ^ (h64 >> 32) } +/// This is a stronger avalanche, +/// inspired by Pelle Evensen's rrmxmx +/// preferable when input has not been previously mixed +#[inline(always)] +fn rrmxmx(mut h64: u64, len: usize) -> u64 { + h64 ^= h64.rotate_left(49) ^ h64.rotate_left(24); + h64 = h64.wrapping_mul(0x9FB21C651E98DF25); + h64 ^= (h64 >> 35) + len as u64; + h64 = h64.wrapping_mul(0x9FB21C651E98DF25); + h64 ^ (h64 >> 28) +} + /* === XXH3 streaming === */ const INTERNAL_BUFFER_SIZE: usize = 256; @@ -851,8 +1031,11 @@ const_assert_eq!(INTERNAL_BUFFER_SIZE % STRIPE_LEN, 0); #[derive(Clone)] struct State { acc: Acc, + buffer: [u8; INTERNAL_BUFFER_SIZE], + buffered_size: usize, + secret_limit: usize, + nb_stripes_per_block: usize, secret: With, - buf: Vec, seed: u64, total_len: usize, nb_stripes_so_far: usize, @@ -861,7 +1044,7 @@ struct State { #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] #[derive(Clone)] enum With { - Default(Secret), + DefaultSecret, Custom(Secret), Ref(Vec), } @@ -871,7 +1054,8 @@ impl Deref for With { fn deref(&self) -> &Self::Target { match self { - With::Default(secret) | With::Custom(secret) => &secret.0[..], + With::DefaultSecret => &SECRET, + With::Custom(secret) => &secret.0, With::Ref(secret) => secret, } } @@ -879,16 +1063,22 @@ impl Deref for With { impl Default for State { fn default() -> Self { - Self::new(0, With::Default(Secret::default())) + Self::new(0, With::DefaultSecret) } } impl State { fn new(seed: u64, secret: With) -> Self { + let secret_limit = secret.len() - STRIPE_LEN; + let nb_stripes_per_block = secret_limit / SECRET_CONSUME_RATE; + State { acc: Acc::default(), + buffer: [0; INTERNAL_BUFFER_SIZE], + buffered_size: 0, secret, - buf: Vec::with_capacity(INTERNAL_BUFFER_SIZE), + secret_limit, + nb_stripes_per_block, seed, total_len: 0, nb_stripes_so_far: 0, @@ -902,124 +1092,148 @@ impl State { fn with_secret>>(secret: S) -> State { let secret = secret.into(); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); + debug_assert!(secret.len() >= Secret::SIZE_MIN); Self::new(0, With::Ref(secret)) } #[inline(always)] - fn secret_limit(&self) -> usize { - self.secret.len() - STRIPE_LEN + fn is_empty(&self) -> bool { + self.buffered_size == 0 } #[inline(always)] - fn nb_stripes_per_block(&self) -> usize { - self.secret_limit() / SECRET_CONSUME_RATE + fn clear(&mut self) { + self.buffered_size = 0 } #[inline(always)] - fn update(&mut self, mut input: &[u8], acc_width: AccWidth) { - let len = input.len(); + fn extend_from_slice(&mut self, data: &[u8]) { + debug_assert!(self.buffered_size + data.len() <= self.buffer.len()); - if len == 0 { + let buf = &mut self.buffer[self.buffered_size..]; + let len = data.len(); + buf[..len].copy_from_slice(data); + self.buffered_size += len; + } + + #[inline(always)] + fn update(&mut self, mut input: &[u8]) { + if input.is_empty() { return; } - self.total_len += len; + self.total_len += input.len(); - if self.buf.len() + len <= self.buf.capacity() { - self.buf.extend_from_slice(input); + let free_size = INTERNAL_BUFFER_SIZE - self.buffered_size; + + if input.len() <= free_size { + // fill in tmp buffer + self.extend_from_slice(input); return; } - let nb_stripes_per_block = self.nb_stripes_per_block(); - let secret_limit = self.secret_limit(); + // total input is now > XXH3_INTERNALBUFFER_SIZE - if !self.buf.is_empty() { - // some data within internal buffer: fill then consume it - let (load, rest) = input.split_at(self.buf.capacity() - self.buf.len()); - self.buf.extend_from_slice(load); + // Internal buffer is partially filled (always, except at beginning) Complete it, then consume it. + if !self.is_empty() { + let (load, rest) = input.split_at(free_size); + self.extend_from_slice(load); input = rest; self.nb_stripes_so_far = consume_stripes( &mut self.acc, self.nb_stripes_so_far, - nb_stripes_per_block, - &self.buf, + self.nb_stripes_per_block, + &self.buffer, INTERNAL_BUFFER_STRIPES, &self.secret, - secret_limit, - acc_width, + self.secret_limit, ); - self.buf.clear(); + self.clear(); } - // consume input by full buffer quantities - let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE); + // Consume input by a multiple of internal buffer size + if input.len() > INTERNAL_BUFFER_SIZE { + let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE); + + for chunk in &mut chunks { + self.nb_stripes_so_far = consume_stripes( + &mut self.acc, + self.nb_stripes_so_far, + self.nb_stripes_per_block, + chunk, + INTERNAL_BUFFER_STRIPES, + &self.secret, + self.secret_limit, + ); + } - for chunk in &mut chunks { - self.nb_stripes_so_far = consume_stripes( - &mut self.acc, - self.nb_stripes_so_far, - nb_stripes_per_block, - chunk, - INTERNAL_BUFFER_STRIPES, - &self.secret, - secret_limit, - acc_width, - ); + input = chunks.remainder(); + + // for last partial stripe + unsafe { + ptr::copy_nonoverlapping( + input.as_ptr().add(INTERNAL_BUFFER_SIZE).sub(STRIPE_LEN), + self.buffer.as_mut_ptr().add(self.buffer.len() - STRIPE_LEN), + STRIPE_LEN, + ); + } } - // some remaining input data : buffer it - self.buf.extend_from_slice(chunks.remainder()) + // Some remaining input (always) : buffer it + self.extend_from_slice(input) } #[inline(always)] - fn digest_long(&self, acc_width: AccWidth) -> Acc { + fn digest_long(&self) -> Acc { + // Digest on a local copy. + // This way, the state remains unaltered, and it can continue ingesting more input afterwards. let mut acc = self.acc.clone(); - let secret_limit = self.secret_limit(); - if self.buf.len() >= STRIPE_LEN { - // digest locally, state remains unaltered, and can continue ingesting more data afterwards - let total_nb_stripes = self.buf.len() / STRIPE_LEN; + if self.buffered_size >= STRIPE_LEN { + let nb_stripes = (self.buffered_size - 1) / STRIPE_LEN; let _nb_stripes_so_far = consume_stripes( &mut acc, self.nb_stripes_so_far, - self.nb_stripes_per_block(), - &self.buf, - total_nb_stripes, + self.nb_stripes_per_block, + &self.buffer, + nb_stripes, &self.secret, - secret_limit, - acc_width, + self.secret_limit, ); - if (self.buf.len() % STRIPE_LEN) != 0 { - unsafe { - accumulate512( - &mut acc, - &self.buf[self.buf.len() - STRIPE_LEN..], - &self.secret[secret_limit - SECRET_LASTACC_START..], - acc_width, - ); - } + + // last stripe + unsafe { + accumulate512( + &mut acc, + &self.buffer[self.buffer.len() - STRIPE_LEN..], + &self.secret[self.secret_limit - SECRET_LASTACC_START..], + ); } - } else if !self.buf.is_empty() { + } else { // one last stripe let mut last_stripe = [0u8; STRIPE_LEN]; - let catchup_size = STRIPE_LEN - self.buf.len(); + let catchup_size = STRIPE_LEN - self.buffered_size; - last_stripe[..catchup_size].copy_from_slice(unsafe { - slice::from_raw_parts( - self.buf.as_ptr().add(self.buf.capacity() - catchup_size), + unsafe { + ptr::copy_nonoverlapping( + self.buffer + .as_ptr() + .add(self.buffer.len()) + .sub(catchup_size), + last_stripe.as_mut_ptr(), catchup_size, - ) - }); - last_stripe[catchup_size..].copy_from_slice(&self.buf); + ); + ptr::copy_nonoverlapping( + self.buffer.as_ptr(), + last_stripe.as_mut_ptr().add(catchup_size), + self.buffered_size, + ); - unsafe { accumulate512( &mut acc, - &last_stripe[..], - &self.secret[secret_limit - SECRET_LASTACC_START..], - acc_width, + &last_stripe, + &self.secret[self.secret_limit - SECRET_LASTACC_START..], ); } } @@ -1030,7 +1244,7 @@ impl State { #[inline(always)] fn digest64(&self) -> u64 { if self.total_len > MIDSIZE_MAX { - let acc = self.digest_long(AccWidth::Acc64Bits); + let acc = self.digest_long(); merge_accs( &acc, @@ -1038,89 +1252,89 @@ impl State { (self.total_len as u64).wrapping_mul(PRIME64_1), ) } else if self.seed != 0 { - hash64_with_seed(&self.buf, self.seed) + hash64_with_seed(&self.buffer[..self.total_len], self.seed) } else { - hash64_with_secret(&self.buf, &self.secret[..self.secret_limit() + STRIPE_LEN]) + hash64_with_secret( + &self.buffer[..self.total_len], + &self.secret[..self.secret_limit + STRIPE_LEN], + ) } } #[inline(always)] fn digest128(&self) -> u128 { - let secret_limit = self.secret_limit(); - if self.total_len > MIDSIZE_MAX { - let acc = self.digest_long(AccWidth::Acc128Bits); - - debug_assert!(secret_limit + STRIPE_LEN >= ACC_SIZE + SECRET_MERGEACCS_START); + let acc = self.digest_long(); - let total_len = self.total_len as u64; + debug_assert!(self.secret_limit + STRIPE_LEN >= Acc::SIZE + SECRET_MERGEACCS_START); let low64 = merge_accs( &acc, &self.secret[SECRET_MERGEACCS_START..], - total_len.wrapping_mul(PRIME64_1), + (self.total_len as u64).wrapping_mul(PRIME64_1), ); let high64 = merge_accs( &acc, - &self.secret[secret_limit + STRIPE_LEN - ACC_SIZE - SECRET_MERGEACCS_START..], - !total_len.wrapping_mul(PRIME64_2), + &self.secret[self.secret_limit + STRIPE_LEN - Acc::SIZE - SECRET_MERGEACCS_START..], + !(self.total_len as u64).wrapping_mul(PRIME64_2), ); u128::from(low64) + (u128::from(high64) << 64) } else if self.seed != 0 { - hash128_with_seed(&self.buf, self.seed) + hash128_with_seed(&self.buffer[..self.total_len], self.seed) } else { - hash128_with_secret(&self.buf, &self.secret[..secret_limit + STRIPE_LEN]) + hash128_with_secret( + &self.buffer[..self.total_len], + &self.secret[..self.secret_limit + STRIPE_LEN], + ) } } } #[inline(always)] -#[allow(clippy::too_many_arguments)] fn consume_stripes( acc: &mut [u64], nb_stripes_so_far: usize, nb_stripes_per_block: usize, - data: &[u8], - total_stripes: usize, + input: &[u8], + nb_stripes: usize, secret: &[u8], secret_limit: usize, - acc_width: AccWidth, ) -> usize { + debug_assert!(nb_stripes <= nb_stripes_per_block); debug_assert!(nb_stripes_so_far < nb_stripes_per_block); - if nb_stripes_per_block - nb_stripes_so_far <= total_stripes { - let nb_stripes = nb_stripes_per_block - nb_stripes_so_far; + if nb_stripes_per_block - nb_stripes_so_far <= nb_stripes { + // need a scrambling operation + let nb_stripes_to_end_of_block = nb_stripes_per_block - nb_stripes_so_far; + let nb_stripes_after_block = nb_stripes - nb_stripes_to_end_of_block; accumulate( acc, - data, + input, &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], - nb_stripes, - acc_width, + nb_stripes_to_end_of_block, ); unsafe { scramble_acc(acc, &secret[secret_limit..]); } accumulate( acc, - &data[nb_stripes * STRIPE_LEN..], + &input[nb_stripes_to_end_of_block * STRIPE_LEN..], secret, - total_stripes - nb_stripes, - acc_width, + nb_stripes_after_block, ); - total_stripes - nb_stripes + nb_stripes_after_block } else { accumulate( acc, - data, + input, &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], - total_stripes, - acc_width, + nb_stripes, ); - nb_stripes_so_far + total_stripes + nb_stripes_so_far + nb_stripes } } @@ -1139,189 +1353,217 @@ fn hash_len_0to16_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> } else if len > 0 { hash_len_1to3_128bits(data, len, secret, seed) } else { - 0 + let bitflipl = secret[64..].read_le64() ^ secret[72..].read_le64(); + let bitfliph = secret[80..].read_le64() ^ secret[88..].read_le64(); + + let low64 = xxh64_avalanche(seed ^ bitflipl); + let high64 = xxh64_avalanche(seed ^ bitfliph); + + u128::from(low64) + (u128::from(high64) << 64) } } #[inline(always)] -fn hash_len_1to3_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { +fn hash_len_1to3_128bits(input: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!((1..=3).contains(&len)); - let c1 = u32::from(data[0]); - let c2 = u32::from(data[len >> 1]); - let c3 = u32::from(data[len - 1]); - let combinedl = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); - let combinedh = combinedl.swap_bytes(); - let keyedl = u64::from(combinedl) ^ u64::from(key.read_u32_le()).wrapping_add(seed); - let keyedh = u64::from(combinedh) ^ u64::from(key[4..].read_u32_le()).wrapping_sub(seed); - let mixedl = keyedl.wrapping_mul(PRIME64_1); - let mixedh = keyedh.wrapping_mul(PRIME64_2); - - u128::from(avalanche(mixedl)) + (u128::from(avalanche(mixedh)) << 64) + // len = 1: combinedl = { input[0], 0x01, input[0], input[0] } + // len = 2: combinedl = { input[1], 0x02, input[0], input[1] } + // len = 3: combinedl = { input[2], 0x03, input[0], input[1] } + + let c1 = u32::from(input[0]); + let c2 = u32::from(input[len >> 1]); + let c3 = u32::from(input[len - 1]); + let combinedl = (c1 << 16) | (c2 << 24) | c3 | ((len as u32) << 8); + let combinedh = combinedl.swap_bytes().rotate_left(13); + let bitflipl = u64::from(secret.read_le32() ^ secret[4..].read_le32()).wrapping_add(seed); + let bitfliph = u64::from(secret[8..].read_le32() ^ secret[12..].read_le32()).wrapping_sub(seed); + let keyed_lo = u64::from(combinedl) ^ bitflipl; + let keyed_hi = u64::from(combinedh) ^ bitfliph; + let low64 = xxh64_avalanche(keyed_lo); + let high64 = xxh64_avalanche(keyed_hi); + u128::from(low64) + (u128::from(high64) << 64) } #[inline(always)] -fn hash_len_4to8_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { +fn hash_len_4to8_128bits(input: &[u8], len: usize, secret: &[u8], mut seed: u64) -> u128 { debug_assert!((4..=8).contains(&len)); - let in1 = u64::from(data.read_u32_le()); - let in2 = u64::from(data[len - 4..].read_u32_le()); - let in64l = in1.wrapping_add(in2 << 32); - let in64h = in64l.swap_bytes(); - let keyedl = in64l ^ key.read_u64_le().wrapping_add(seed); - let keyedh = in64h ^ key[8..].read_u64_le().wrapping_sub(seed); - let mix64l1 = - (len as u64).wrapping_add((keyedl ^ (keyedl >> 51)).wrapping_mul(u64::from(PRIME32_1))); - let mix64l2 = (mix64l1 ^ (mix64l1 >> 47)).wrapping_mul(PRIME64_2); - let mix64h1 = (keyedh ^ (keyedh >> 47)) - .wrapping_mul(PRIME64_1) - .wrapping_sub(len as u64); - let mix64h2 = (mix64h1 ^ (mix64h1 >> 43)).wrapping_mul(PRIME64_4); + seed ^= u64::from((seed as u32).swap_bytes()) << 32; + + let input_lo = input.read_le32(); + let input_hi = input[input.len() - 4..].read_le32(); + let input_64 = u64::from(input_lo) + (u64::from(input_hi) << 32); + let bitflip = (secret[16..].read_le64() ^ secret[24..].read_le64()).wrapping_add(seed); + let keyed = input_64 ^ bitflip; + + // Shift len to the left to ensure it is even, this avoids even multiplies. + let (mut low64, mut high64) = mult64to128(keyed, PRIME64_1.wrapping_add((len << 2) as u64)); + high64 = high64.wrapping_add(low64 << 1); + low64 ^= high64 >> 3; - u128::from(avalanche(mix64l2)) + (u128::from(avalanche(mix64h2)) << 64) + low64 = xorshift64(low64, 35); + low64 = low64.wrapping_mul(0x9FB21C651E98DF25); + low64 = xorshift64(low64, 28); + high64 = avalanche(high64); + + u128::from(low64) + (u128::from(high64) << 64) } #[inline(always)] -fn hash_len_9to16_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { +fn hash_len_9to16_128bits(input: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!((9..=16).contains(&len)); + let bitflipl = (secret[32..].read_le64() ^ secret[40..].read_le64()).wrapping_sub(seed); + let bitfliph = (secret[48..].read_le64() ^ secret[56..].read_le64()).wrapping_add(seed); + let input_lo = input.read_le64(); + let mut input_hi = input[len - 8..].read_le64(); + let (mut low64, mut high64) = mult64to128(input_lo ^ input_hi ^ bitflipl, PRIME64_1); + + /* + * Put len in the middle of m128 to ensure that the length gets mixed to + * both the low and high bits in the 128x64 multiply below. + */ + low64 = low64.wrapping_add(((len - 1) << 54) as u64); + input_hi ^= bitfliph; + + /* + * Add the high 32 bits of input_hi to the high 32 bits of m128, then + * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to + * the high 64 bits of m128. + * + * The best approach to this operation is different on 32-bit and 64-bit. + */ + if cfg!(target_pointer_width = "32") { + /* + * 32-bit optimized version, which is more readable. + * + * On 32-bit, it removes an ADC and delays a dependency between the two + * halves of m128.high64, but it generates an extra mask on 64-bit. + */ + high64 = high64 + .wrapping_add(input_hi % 0xFFFFFFFF00000000) + .wrapping_add(mult32to64(input_hi as u32, PRIME32_2)); + } else { + /* + * 64-bit optimized (albeit more confusing) version. + * + * Uses some properties of addition and multiplication to remove the mask: + * + * Let: + * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) + * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) + * c = XXH_PRIME32_2 + * + * a + (b * c) + * Inverse Property: x + y - x == y + * a + (b * (1 + c - 1)) + * Distributive Property: x * (y + z) == (x * y) + (x * z) + * a + (b * 1) + (b * (c - 1)) + * Identity Property: x * 1 == x + * a + b + (b * (c - 1)) + * + * Substitute a, b, and c: + * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + * + * Since input_hi.hi + input_hi.lo == input_hi, we get this: + * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + */ + high64 = high64 + .wrapping_add(input_hi) + .wrapping_add(mult32to64(input_hi as u32, PRIME32_2 - 1)); + } - let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); - let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); - let inlow = ll1 ^ ll2; - - let m128 = u128::from(inlow).wrapping_mul(u128::from(PRIME64_1)); - let high64 = ((m128 >> 64) as u64).wrapping_add(ll2.wrapping_mul(PRIME64_1)); - let low64 = (m128 as u64) ^ (high64 >> 32); + // m128 ^= XXH_swap64(m128 >> 64); + low64 ^= high64.swap_bytes(); - let h128 = u128::from(low64).wrapping_mul(u128::from(PRIME64_2)); - let high64 = ((h128 >> 64) as u64).wrapping_add(high64.wrapping_mul(PRIME64_2)); - let low64 = h128 as u64; + // 128x64 multiply: h128 = m128 * XXH_PRIME64_2; + let (lo64, hi64) = mult64to128(low64, PRIME64_2); + let hi64 = hi64.wrapping_add(high64.wrapping_mul(PRIME64_2)); - u128::from(avalanche(low64)) + (u128::from(avalanche(high64)) << 64) + u128::from(avalanche(lo64)) + (u128::from(avalanche(hi64)) << 64) } #[inline(always)] -fn hash_len_17to128_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { +fn hash_len_17to128_128bits(input: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!((17..=128).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); + debug_assert!(secret.len() >= Secret::SIZE_MIN); - let mut acc1 = PRIME64_1.wrapping_mul(len as u64); - let mut acc2 = 0u64; + let mut acc = (PRIME64_1.wrapping_mul(len as u64), 0); if len > 32 { if len > 64 { if len > 96 { - acc1 = acc1.wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); + acc = mix32bytes(acc, &input[48..], &input[len - 64..], &secret[96..], seed); } - acc1 = acc1.wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); + acc = mix32bytes(acc, &input[32..], &input[len - 48..], &secret[64..], seed); } - - acc1 = acc1.wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); + acc = mix32bytes(acc, &input[16..], &input[len - 32..], &secret[32..], seed); } + let (low64, high64) = mix32bytes(acc, input, &input[len - 16..], secret, seed); - acc1 = acc1.wrapping_add(mix_16bytes(data, &secret[..], seed)); - acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); - - let low64 = acc1.wrapping_add(acc2); - let high64 = acc1 + let lo64 = low64.wrapping_add(high64); + let hi64 = low64 .wrapping_mul(PRIME64_1) - .wrapping_add(acc2.wrapping_mul(PRIME64_4)) + .wrapping_add(high64.wrapping_mul(PRIME64_4)) .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); - u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) + u128::from(avalanche(lo64)) + (u128::from(0u64.wrapping_sub(avalanche(hi64))) << 64) } #[inline(always)] -fn hash_len_129to240_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { +fn hash_len_129to240_128bits(input: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { debug_assert!((129..=MIDSIZE_MAX).contains(&len)); - debug_assert!(secret.len() >= SECRET_SIZE_MIN); - - let acc1 = (len as u64).wrapping_mul(PRIME64_1); - let acc2 = 0u64; - - let (acc1, acc2) = (0..4).fold((acc1, acc2), |(acc1, acc2), i| { - ( - acc1.wrapping_add(mix_16bytes(&data[32 * i..], &secret[32 * i..], seed)), - acc2.wrapping_add(mix_16bytes( - &data[32 * i + 16..], - &secret[32 * i + 16..], - 0u64.wrapping_sub(seed), - )), - ) + debug_assert!(secret.len() >= Secret::SIZE_MIN); + + let acc = (PRIME64_1.wrapping_mul(len as u64), 0); + + let (low64, high64) = (0..4).fold(acc, |acc, i| { + let off = 32 * i; + mix32bytes(acc, &input[off..], &input[off + 16..], &secret[off..], seed) }); - let acc1 = avalanche(acc1); - let acc2 = avalanche(acc2); - let nb_rounds = len / 32; - debug_assert!(nb_rounds >= 4); + let acc = (avalanche(low64), avalanche(high64)); - let (acc1, acc2) = (4..nb_rounds).fold((acc1, acc2), |(acc1, acc2), i| { - ( - acc1.wrapping_add(mix_16bytes( - &data[32 * i..], - &secret[32 * (i - 4) + MIDSIZE_STARTOFFSET..], - seed, - )), - acc2.wrapping_add(mix_16bytes( - &data[32 * i + 16..], - &secret[32 * (i - 4) + 16 + MIDSIZE_STARTOFFSET..], - 0u64.wrapping_sub(seed), - )), + let rounds = len / 32; + debug_assert!(rounds >= 4); + + let acc = (4..rounds).fold(acc, |acc, i| { + let off = 32 * i; + mix32bytes( + acc, + &input[off..], + &input[off + 16..], + &secret[MIDSIZE_START_OFFSET + 32 * (i - 4)..], + seed, ) }); // last bytes - let acc1 = acc1.wrapping_add(mix_16bytes( - &data[len - 16..], - &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], - seed, - )); - let acc2 = acc2.wrapping_add(mix_16bytes( - &data[len - 32..], - &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET - 16..], + let (low64, high64) = mix32bytes( + acc, + &input[len - 16..], + &input[len - 32..], + &secret[Secret::SIZE_MIN - MIDSIZE_LAST_OFFSET - 16..], 0u64.wrapping_sub(seed), - )); + ); - let low64 = acc1.wrapping_add(acc2); - let high64 = acc1 + let lo64 = low64.wrapping_add(high64); + let hi64 = low64 .wrapping_mul(PRIME64_1) - .wrapping_add(acc2.wrapping_mul(PRIME64_4)) + .wrapping_add(high64.wrapping_mul(PRIME64_4)) .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); - u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) -} - -#[inline] -fn hash_long_128bits_with_default_secret(data: &[u8], len: usize) -> u128 { - hash_long_128bits_internal(data, len, &SECRET) -} - -#[inline] -fn hash_long_128bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u128 { - hash_long_128bits_internal(data, len, secret) -} - -#[inline] -fn hash_long_128bits_with_seed(data: &[u8], len: usize, seed: u64) -> u128 { - if seed == 0 { - hash_long_128bits_with_default_secret(data, len) - } else { - let secret = Secret::with_seed(seed); - - hash_long_128bits_internal(data, len, &secret) - } + u128::from(avalanche(lo64)) + (u128::from(0u64.wrapping_sub(avalanche(hi64))) << 64) } #[inline(always)] -fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 { +fn hash_long_128bits_internal(data: &[u8], secret: &[u8]) -> u128 { let mut acc = Acc::default(); + let len = data.len(); - hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc128Bits); + hash_long_internal_loop(&mut acc, data, len, secret); - debug_assert!(secret.len() >= acc.len() + SECRET_MERGEACCS_START); + debug_assert!(secret.len() >= Acc::SIZE + SECRET_MERGEACCS_START); let low64 = merge_accs( &acc, @@ -1330,7 +1572,7 @@ fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 { ); let high64 = merge_accs( &acc, - &secret[secret.len() - ACC_SIZE - SECRET_MERGEACCS_START..], + &secret[secret.len() - Acc::SIZE - SECRET_MERGEACCS_START..], !(len as u64).wrapping_mul(PRIME64_2), ); @@ -1346,16 +1588,17 @@ and near the end of the digest function */ #[cfg(test)] mod tests { use alloc::vec; + use core::cmp; use super::*; - const PRIME: u64 = 2654435761; + const PRIME32: u64 = 2654435761; const PRIME64: u64 = 11400714785074694797; - const SANITY_BUFFER_SIZE: usize = 2243; + const SANITY_BUFFER_SIZE: usize = 2367; fn sanity_buffer() -> [u8; SANITY_BUFFER_SIZE] { let mut buf = [0; SANITY_BUFFER_SIZE]; - let mut byte_gen: u64 = PRIME; + let mut byte_gen: u64 = PRIME32; for b in buf.iter_mut() { *b = (byte_gen >> 56) as u8; @@ -1370,34 +1613,32 @@ mod tests { let buf = sanity_buffer(); let test_cases = vec![ - (&[][..], 0, 0), /* zero-length hash is always 0 */ - (&[][..], PRIME64, 0), - (&buf[..1], 0, 0x7198D737CFE7F386), /* 1 - 3 */ - (&buf[..1], PRIME64, 0xB70252DB7161C2BD), /* 1 - 3 */ - (&buf[..6], 0, 0x22CBF5F3E1F6257C), /* 4 - 8 */ - (&buf[..6], PRIME64, 0x6398631C12AB94CE), /* 4 - 8 */ - (&buf[..12], 0, 0xD5361CCEEBB5A0CC), /* 9 - 16 */ - (&buf[..12], PRIME64, 0xC4C125E75A808C3D), /* 9 - 16 */ - (&buf[..24], 0, 0x46796F3F78B20F6B), /* 17 - 32 */ - (&buf[..24], PRIME64, 0x60171A7CD0A44C10), /* 17 - 32 */ - (&buf[..48], 0, 0xD8D4D3590D136E11), /* 33 - 64 */ - (&buf[..48], PRIME64, 0x05441F2AEC2A1296), /* 33 - 64 */ - (&buf[..80], 0, 0xA1DC8ADB3145B86A), /* 65 - 96 */ - (&buf[..80], PRIME64, 0xC9D55256965B7093), /* 65 - 96 */ - (&buf[..112], 0, 0xE43E5717A61D3759), /* 97 -128 */ - (&buf[..112], PRIME64, 0x5A5F89A3FECE44A5), /* 97 -128 */ - (&buf[..195], 0, 0x6F747739CBAC22A5), /* 129-240 */ - (&buf[..195], PRIME64, 0x33368E23C7F95810), /* 129-240 */ - (&buf[..403], 0, 0x4834389B15D981E8), /* one block, last stripe is overlapping */ - (&buf[..403], PRIME64, 0x85CE5DFFC7B07C87), /* one block, last stripe is overlapping */ - (&buf[..512], 0, 0x6A1B982631F059A8), /* one block, finishing at stripe boundary */ - (&buf[..512], PRIME64, 0x10086868CF0ADC99), /* one block, finishing at stripe boundary */ - (&buf[..2048], 0, 0xEFEFD4449323CDD4), /* 2 blocks, finishing at block boundary */ - (&buf[..2048], PRIME64, 0x01C85E405ECA3F6E), /* 2 blocks, finishing at block boundary */ - (&buf[..2240], 0, 0x998C0437486672C7), /* 3 blocks, finishing at stripe boundary */ - (&buf[..2240], PRIME64, 0x4ED38056B87ABC7F), /* 3 blocks, finishing at stripe boundary */ - (&buf[..2243], 0, 0xA559D20581D742D3), /* 3 blocks, last stripe is overlapping */ - (&buf[..2243], PRIME64, 0x96E051AB57F21FC8), /* 3 blocks, last stripe is overlapping */ + (&[][..], 0, 0x2D06800538D394C2), /* zero-length hash is always 0 */ + (&[][..], PRIME64, 0xA8A6B918B2F0364A), + (&buf[..1], 0, 0xC44BDFF4074EECDB), /* 1 - 3 */ + (&buf[..1], PRIME64, 0x032BE332DD766EF8), /* 1 - 3 */ + (&buf[..6], 0, 0x27B56A84CD2D7325), /* 4 - 8 */ + (&buf[..6], PRIME64, 0x84589C116AB59AB9), /* 4 - 8 */ + (&buf[..12], 0, 0xA713DAF0DFBB77E7), /* 9 - 16 */ + (&buf[..12], PRIME64, 0xE7303E1B2336DE0E), /* 9 - 16 */ + (&buf[..24], 0, 0xA3FE70BF9D3510EB), /* 17 - 32 */ + (&buf[..24], PRIME64, 0x850E80FC35BDD690), /* 17 - 32 */ + (&buf[..48], 0, 0x397DA259ECBA1F11), /* 33 - 64 */ + (&buf[..48], PRIME64, 0xADC2CBAA44ACC616), /* 33 - 64 */ + (&buf[..80], 0, 0xBCDEFBBB2C47C90A), /* 65 - 96 */ + (&buf[..80], PRIME64, 0xC6DD0CB699532E73), /* 65 - 96 */ + (&buf[..195], 0, 0xCD94217EE362EC3A), /* 129-240 */ + (&buf[..195], PRIME64, 0xBA68003D370CB3D9), /* 129-240 */ + (&buf[..403], 0, 0xCDEB804D65C6DEA4), /* one block, last stripe is overlapping */ + (&buf[..403], PRIME64, 0x6259F6ECFD6443FD), /* one block, last stripe is overlapping */ + (&buf[..512], 0, 0x617E49599013CB6B), /* one block, finishing at stripe boundary */ + (&buf[..512], PRIME64, 0x3CE457DE14C27708), /* one block, finishing at stripe boundary */ + (&buf[..2048], 0, 0xDD59E2C3A5F038E0), /* 2 nb_blocks, finishing at block boundary */ + (&buf[..2048], PRIME64, 0x66F81670669ABABC), /* 2 nb_blocks, finishing at block boundary */ + (&buf[..2240], 0, 0x6E73A90539CF2948), /* 3 nb_blocks, finishing at stripe boundary */ + (&buf[..2240], PRIME64, 0x757BA8487D1B5247), /* 3 nb_blocks, finishing at stripe boundary */ + (&buf[..2367], 0, 0xCB37AEB9E5D361ED), /* 3 nb_blocks, last stripe is overlapping */ + (&buf[..2367], PRIME64, 0xD2DB3415B942B42A), /* 3 nb_blocks, last stripe is overlapping */ ]; for (buf, seed, result) in test_cases { @@ -1407,11 +1648,12 @@ mod tests { assert_eq!( hash, result, - "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", + "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf ); } @@ -1426,11 +1668,12 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf ); } @@ -1444,11 +1687,12 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf ); } @@ -1465,11 +1709,12 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", + "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf ); } } @@ -1478,22 +1723,21 @@ mod tests { #[test] fn hash_64bits_with_secret_sanity_check() { let buf = sanity_buffer(); - let secret = &buf[7..7 + SECRET_SIZE_MIN + 11]; + let secret = &buf[7..7 + Secret::SIZE_MIN + 11]; let test_cases = vec![ - (&[][..], secret, 0), /* zero-length hash is always 0 */ - (&buf[..1], secret, 0x7F69735D618DB3F0), /* 1 - 3 */ - (&buf[..6], secret, 0xBFCC7CB1B3554DCE), /* 6 - 8 */ - (&buf[..12], secret, 0x8C50DC90AC9206FC), /* 9 - 16 */ - (&buf[..24], secret, 0x1CD2C2EE9B9A0928), /* 17 - 32 */ - (&buf[..48], secret, 0xA785256D9D65D514), /* 33 - 64 */ - (&buf[..80], secret, 0x6F3053360D21BBB7), /* 65 - 96 */ - (&buf[..112], secret, 0x560E82D25684154C), /* 97 -128 */ - (&buf[..195], secret, 0xBA5BDDBC5A767B11), /* 129-240 */ - (&buf[..403], secret, 0xFC3911BBA656DB58), /* one block, last stripe is overlapping */ - (&buf[..512], secret, 0x306137DD875741F1), /* one block, finishing at stripe boundary */ - (&buf[..2048], secret, 0x2836B83880AD3C0C), /* > one block, at least one scrambling */ - (&buf[..2243], secret, 0x3446E248A00CB44A), /* > one block, at least one scrambling, last stripe unaligned */ + (&[][..], secret, 0x3559D64878C5C66C), /* zero-length hash is always 0 */ + (&buf[..1], secret, 0x8A52451418B2DA4D), /* 1 - 3 */ + (&buf[..6], secret, 0x82C90AB0519369AD), /* 6 - 8 */ + (&buf[..12], secret, 0x14631E773B78EC57), /* 9 - 16 */ + (&buf[..24], secret, 0xCDD5542E4A9D9FE8), /* 17 - 32 */ + (&buf[..48], secret, 0x33ABD54D094B2534), /* 33 - 64 */ + (&buf[..80], secret, 0xE687BA1684965297), /* 65 - 96 */ + (&buf[..195], secret, 0xA057273F5EECFB20), /* 129-240 */ + (&buf[..403], secret, 0x14546019124D43B8), /* one block, last stripe is overlapping */ + (&buf[..512], secret, 0x7564693DD526E28D), /* one block, finishing at stripe boundary */ + (&buf[..2048], secret, 0xD32E975821D6519F), /* > one block, at least one scrambling */ + (&buf[..2367], secret, 0x293FA8E5173BB5E7), /* > one block, at least one scrambling, last stripe unaligned */ ]; for (buf, secret, result) in test_cases { @@ -1503,10 +1747,11 @@ mod tests { assert_eq!( hash, result, - "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}", + "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), hash, - result + result, + buf ); } @@ -1521,10 +1766,11 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}", + "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), hash, - result + result, + buf ); } @@ -1541,10 +1787,11 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}", + "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), hash, - result + result, + buf ); } } @@ -1555,36 +1802,47 @@ mod tests { let buf = sanity_buffer(); let test_cases = vec![ - (&[][..], 0, 0u64, 0u64), /* zero-length hash is { seed, -seed } by default */ - (&[][..], PRIME, 0, 0), - (&buf[..1], 0, 0x7198D737CFE7F386, 0x3EE70EA338F3F1E8), /* 1-3 */ - (&buf[..1], PRIME, 0x8E05996EC27C0F46, 0x90DFC659A8BDCC0C), /* 1-3 */ - (&buf[..6], 0, 0x22CBF5F3E1F6257C, 0xD4E6C2B94FFC3BFA), /* 4-8 */ - (&buf[..6], PRIME, 0x97B28D3079F8541F, 0xEFC0B954298E6555), /* 4-8 */ - (&buf[..12], 0, 0x0E0CD01F05AC2F0D, 0x2B55C95951070D4B), /* 9-16 */ - (&buf[..12], PRIME, 0xA9DE561CA04CDF37, 0x609E31FDC00A43C9), /* 9-16 */ - (&buf[..24], 0, 0x46796F3F78B20F6B, 0x58FF55C3926C13FA), /* 17-32 */ - (&buf[..24], PRIME, 0x30D5C4E9EB415C55, 0x8868344B3A4645D0), /* 17-32 */ - (&buf[..48], 0, 0xD8D4D3590D136E11, 0x5527A42843020A62), /* 33-64 */ - (&buf[..48], PRIME, 0x1D8834E1A5407A1C, 0x44375B9FB060F541), /* 33-64 */ - (&buf[..81], 0, 0x4B9B448ED8DFD3DD, 0xE805A6D1A43D70E5), /* 65-96 */ - (&buf[..81], PRIME, 0xD2D6B075945617BA, 0xE58BE5736F6E7550), /* 65-96 */ - (&buf[..103], 0, 0xC5A9F97B29EFA44E, 0x254DB7BE881E125C), /* 97-128 */ - (&buf[..103], PRIME, 0xFA2086367CDB177F, 0x0AEDEA68C988B0C0), /* 97-128 */ - (&buf[..192], 0, 0xC3142FDDD9102A3F, 0x06F1747E77185F97), /* 129-240 */ - (&buf[..192], PRIME, 0xA89F07B35987540F, 0xCF1B35FB2C557F54), /* 129-240 */ - (&buf[..222], 0, 0xA61AC4EB3295F86B, 0x33FA7B7598C28A07), /* 129-240 */ - (&buf[..222], PRIME, 0x54135EB88AD8B75E, 0xBC45CE6AE50BCF53), /* 129-240 */ - (&buf[..403], 0, 0xB0C48E6D18E9D084, 0xB16FC17E992FF45D), /* one block, last stripe is overlapping */ - (&buf[..403], PRIME64, 0x0A1D320C9520871D, 0xCE11CB376EC93252), /* one block, last stripe is overlapping */ - (&buf[..512], 0, 0xA03428558AC97327, 0x4ECF51281BA406F7), /* one block, finishing at stripe boundary */ - (&buf[..512], PRIME64, 0xAF67A482D6C893F2, 0x1382D92F25B84D90), /* one block, finishing at stripe boundary */ - (&buf[..2048], 0, 0x21901B416B3B9863, 0x212AF8E6326F01E0), /* two blocks, finishing at block boundary */ - (&buf[..2048], PRIME, 0xBDBB2282577DADEC, 0xF78CDDC2C9A9A692), /* two blocks, finishing at block boundary */ - (&buf[..2240], 0, 0x00AD52FA9385B6FE, 0xC705BAD3356CE302), /* two blocks, ends at stripe boundary */ - (&buf[..2240], PRIME, 0x10FD0072EC68BFAA, 0xE1312F3458817F15), /* two blocks, ends at stripe boundary */ - (&buf[..2237], 0, 0x970C91411533862C, 0x4BBD06FF7BFF0AB1), /* two blocks, ends at stripe boundary */ - (&buf[..2237], PRIME, 0xD80282846D814431, 0x14EBB157B84D9785), /* two blocks, ends at stripe boundary */ + (&[][..], 0, 0x6001C324468D497Fu64, 0x99AA06D3014798D8u64), /* zero-length hash is { seed, -seed } by default */ + (&[][..], PRIME32, 0x5444F7869C671AB0, 0x92220AE55E14AB50), + (&buf[..1], 0, 0xC44BDFF4074EECDB, 0xA6CD5E9392000F6A), /* 1-3 */ + (&buf[..1], PRIME32, 0xB53D5557E7F76F8D, 0x89B99554BA22467C), /* 1-3 */ + (&buf[..6], 0, 0x3E7039BDDA43CFC6, 0x082AFE0B8162D12A), /* 4-8 */ + (&buf[..6], PRIME32, 0x269D8F70BE98856E, 0x5A865B5389ABD2B1), /* 4-8 */ + (&buf[..12], 0, 0x061A192713F69AD9, 0x6E3EFD8FC7802B18), /* 9-16 */ + (&buf[..12], PRIME32, 0x9BE9F9A67F3C7DFB, 0xD7E09D518A3405D3), /* 9-16 */ + (&buf[..24], 0, 0x1E7044D28B1B901D, 0x0CE966E4678D3761), /* 17-32 */ + (&buf[..24], PRIME32, 0xD7304C54EBAD40A9, 0x3162026714A6A243), /* 17-32 */ + (&buf[..48], 0, 0xF942219AED80F67B, 0xA002AC4E5478227E), /* 33-64 */ + (&buf[..48], PRIME32, 0x7BA3C3E453A1934E, 0x163ADDE36C072295), /* 33-64 */ + (&buf[..81], 0, 0x5E8BAFB9F95FB803, 0x4952F58181AB0042), /* 65-96 */ + (&buf[..81], PRIME32, 0x703FBB3D7A5F755C, 0x2724EC7ADC750FB6), /* 65-96 */ + (&buf[..222], 0, 0xF1AEBD597CEC6B3A, 0x337E09641B948717), /* 129-240 */ + (&buf[..222], PRIME32, 0xAE995BB8AF917A8D, 0x91820016621E97F1), /* 129-240 */ + (&buf[..403], 0, 0xCDEB804D65C6DEA4, 0x1B6DE21E332DD73D), /* one block, last stripe is overlapping */ + (&buf[..403], PRIME64, 0x6259F6ECFD6443FD, 0xBED311971E0BE8F2), /* one block, last stripe is overlapping */ + (&buf[..512], 0, 0x617E49599013CB6B, 0x18D2D110DCC9BCA1), /* one block, finishing at stripe boundary */ + (&buf[..512], PRIME64, 0x3CE457DE14C27708, 0x925D06B8EC5B8040), /* one block, finishing at stripe boundary */ + (&buf[..2048], 0, 0xDD59E2C3A5F038E0, 0xF736557FD47073A5), /* two nb_blocks, finishing at block boundary */ + ( + &buf[..2048], + PRIME32, + 0x230D43F30206260B, + 0x7FB03F7E7186C3EA, + ), /* two nb_blocks, finishing at block boundary */ + (&buf[..2240], 0, 0x6E73A90539CF2948, 0xCCB134FBFA7CE49D), /* two nb_blocks, ends at stripe boundary */ + ( + &buf[..2240], + PRIME32, + 0xED385111126FBA6F, + 0x50A1FE17B338995F, + ), /* two nb_blocks, ends at stripe boundary */ + (&buf[..2367], 0, 0xCB37AEB9E5D361ED, 0xE89C0F6FF369B427), /* two nb_blocks, ends at stripe boundary */ + ( + &buf[..2367], + PRIME32, + 0x6F5360AE69C2F406, + 0xD23AAE4B76C31ECB, + ), /* two nb_blocks, ends at stripe boundary */ ]; for (buf, seed, lo, hi) in test_cases { @@ -1596,11 +1854,27 @@ mod tests { assert_eq!( hash, result, - "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", + "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf + ); + } + + // check that the no-seed variant produces same result as seed==0 + if seed == 0 { + let hash = hash128(buf); + + assert_eq!( + hash, + result, + "hash128(&buf[..{}]) failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", + buf.len(), + hash, + result, + buf ); } @@ -1615,11 +1889,37 @@ mod tests { assert_eq!( hash, result, - "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", + buf.len(), + seed, + hash, + result, + buf + ); + } + + // random ingestion + { + let mut hasher = Hash128::with_seed(seed); + let len = buf.len(); + let modulo = cmp::max(len, 2); + let mut n = 0; + while n < len { + let l = (rand::random::() % modulo).min(len - n); + hasher.write(&buf[n..n + l]); + n += l; + } + let hash = hasher.finish_ext(); + + assert_eq!( + hash, + result, + "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf ); } @@ -1633,11 +1933,12 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + "Hash128::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf ); } @@ -1654,11 +1955,88 @@ mod tests { assert_eq!( hash, result, - "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", + "Hash128::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", buf.len(), seed, hash, - result + result, + buf + ); + } + } + } + + #[test] + fn hash_128bits_with_secret_sanity_check() { + let buf = sanity_buffer(); + let secret = &buf[7..7 + Secret::SIZE_MIN + 11]; + + let test_cases = vec![ + ( + &[][..], + secret, + 0x005923CCEECBE8AEu64, + 0x5F70F4EA232F1D38u64, + ), /* zero-length hash is always 0 */ + (&buf[..1], secret, 0x8A52451418B2DA4D, 0x3A66AF5A9819198E), /* 1 - 3 */ + (&buf[..6], secret, 0x0B61C8ACA7D4778F, 0x376BD91B6432F36D), /* 6 - 8 */ + (&buf[..12], secret, 0xAF82F6EBA263D7D8, 0x90A3C2D839F57D0F), /* 9 - 16 */ + ]; + + for (buf, secret, lo, hi) in test_cases { + let result = u128::from(lo) + (u128::from(hi) << 64); + + { + let hash = hash128_with_secret(buf, secret); + + assert_eq!( + hash, + result, + "hash128_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", + buf.len(), + hash, + result, + buf + ); + } + + // streaming API test + + // single ingestio + { + let mut hasher = Hash128::with_secret(secret); + hasher.write(buf); + let hash = hasher.finish_ext(); + + assert_eq!( + hash, + result, + "Hash128::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", + buf.len(), + hash, + result, + buf + ); + } + + // byte by byte ingestion + { + let mut hasher = Hash128::with_secret(secret); + + for chunk in buf.chunks(1) { + hasher.write(chunk); + } + + let hash = hasher.finish_ext(); + + assert_eq!( + hash, + result, + "Hash128::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}, buf: {:?}", + buf.len(), + hash, + result, + buf ); } }