|
| 1 | +use std::fmt::Write; |
| 2 | + |
| 3 | +// MD5 hash function implementation |
| 4 | +// Reference: https://www.ietf.org/rfc/rfc1321.txt |
| 5 | +// |
| 6 | +// MD5 produces a 128-bit (16-byte) hash value. |
| 7 | +// Note: MD5 is cryptographically broken and should NOT be used for security |
| 8 | +// purposes. It remains useful for checksums and non-security applications. |
| 9 | + |
| 10 | +/// Per-round shift amounts (RFC 1321, §3.4) |
| 11 | +const S: [u32; 64] = [ |
| 12 | + 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, // Round 1 |
| 13 | + 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, // Round 2 |
| 14 | + 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, // Round 3 |
| 15 | + 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, // Round 4 |
| 16 | +]; |
| 17 | + |
| 18 | +/// Precomputed table of abs(sin(i+1)) * 2^32 (RFC 1321, §3.4) |
| 19 | +const K: [u32; 64] = [ |
| 20 | + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, |
| 21 | + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, |
| 22 | + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, |
| 23 | + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, |
| 24 | + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, |
| 25 | + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, |
| 26 | + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, |
| 27 | + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, |
| 28 | +]; |
| 29 | + |
| 30 | +/// Initial hash state (RFC 1321, §3.3) — "magic" little-endian constants |
| 31 | +const INIT_STATE: [u32; 4] = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476]; |
| 32 | + |
| 33 | +/// Computes the MD5 hash of the given byte slice. |
| 34 | +/// Returns a 16-byte array representing the 128-bit digest. |
| 35 | +pub fn md5(input: &[u8]) -> [u8; 16] { |
| 36 | + let mut state = INIT_STATE; |
| 37 | + |
| 38 | + // --- Pre-processing: padding --- |
| 39 | + // Append bit '1' (0x80 byte), then zeros, then 64-bit little-endian |
| 40 | + // message length in bits, so total length ≡ 448 (mod 512) bits. |
| 41 | + let bit_len = (input.len() as u64).wrapping_mul(8); |
| 42 | + let mut msg = input.to_vec(); |
| 43 | + msg.push(0x80); |
| 44 | + while msg.len() % 64 != 56 { |
| 45 | + msg.push(0x00); |
| 46 | + } |
| 47 | + msg.extend_from_slice(&bit_len.to_le_bytes()); |
| 48 | + |
| 49 | + // --- Processing: 512-bit (64-byte) chunks --- |
| 50 | + for chunk in msg.chunks_exact(64) { |
| 51 | + // Break chunk into 16 little-endian 32-bit words |
| 52 | + let mut m = [0u32; 16]; |
| 53 | + for (i, word) in m.iter_mut().enumerate() { |
| 54 | + let offset = i * 4; |
| 55 | + *word = u32::from_le_bytes(chunk[offset..offset + 4].try_into().unwrap()); |
| 56 | + } |
| 57 | + |
| 58 | + let [mut a, mut b, mut c, mut d] = state; |
| 59 | + |
| 60 | + for i in 0..64u32 { |
| 61 | + let (f, g) = match i { |
| 62 | + 0..=15 => ((b & c) | (!b & d), i), |
| 63 | + 16..=31 => ((d & b) | (!d & c), (5 * i + 1) % 16), |
| 64 | + 32..=47 => (b ^ c ^ d, (3 * i + 5) % 16), |
| 65 | + _ => (c ^ (b | !d), (7 * i) % 16), |
| 66 | + }; |
| 67 | + |
| 68 | + let temp = d; |
| 69 | + d = c; |
| 70 | + c = b; |
| 71 | + b = b.wrapping_add( |
| 72 | + (a.wrapping_add(f) |
| 73 | + .wrapping_add(K[i as usize]) |
| 74 | + .wrapping_add(m[g as usize])) |
| 75 | + .rotate_left(S[i as usize]), |
| 76 | + ); |
| 77 | + a = temp; |
| 78 | + } |
| 79 | + |
| 80 | + state[0] = state[0].wrapping_add(a); |
| 81 | + state[1] = state[1].wrapping_add(b); |
| 82 | + state[2] = state[2].wrapping_add(c); |
| 83 | + state[3] = state[3].wrapping_add(d); |
| 84 | + } |
| 85 | + |
| 86 | + // --- Produce final digest (little-endian word order) --- |
| 87 | + let mut digest = [0u8; 16]; |
| 88 | + for (i, &word) in state.iter().enumerate() { |
| 89 | + digest[i * 4..i * 4 + 4].copy_from_slice(&word.to_le_bytes()); |
| 90 | + } |
| 91 | + digest |
| 92 | +} |
| 93 | + |
| 94 | +/// Convenience helper: returns the MD5 digest as a lowercase hex string. |
| 95 | +pub fn md5_hex(input: &[u8]) -> String { |
| 96 | + md5(input) |
| 97 | + .iter() |
| 98 | + .fold(String::with_capacity(32), |mut s, b| { |
| 99 | + write!(s, "{b:02x}").unwrap(); |
| 100 | + s |
| 101 | + }) |
| 102 | +} |
| 103 | + |
| 104 | +#[cfg(test)] |
| 105 | +mod tests { |
| 106 | + use super::*; |
| 107 | + |
| 108 | + // All expected values from the RFC 1321 test suite and NIST vectors. |
| 109 | + |
| 110 | + #[test] |
| 111 | + fn test_empty_string() { |
| 112 | + assert_eq!(md5_hex(b""), "d41d8cd98f00b204e9800998ecf8427e"); |
| 113 | + } |
| 114 | + |
| 115 | + #[test] |
| 116 | + fn test_abc() { |
| 117 | + assert_eq!(md5_hex(b"abc"), "900150983cd24fb0d6963f7d28e17f72"); |
| 118 | + } |
| 119 | + |
| 120 | + #[test] |
| 121 | + fn test_rfc_message() { |
| 122 | + assert_eq!( |
| 123 | + md5_hex(b"message digest"), |
| 124 | + "f96b697d7cb7938d525a2f31aaf161d0" |
| 125 | + ); |
| 126 | + } |
| 127 | + |
| 128 | + #[test] |
| 129 | + fn test_alphabet() { |
| 130 | + assert_eq!( |
| 131 | + md5_hex(b"abcdefghijklmnopqrstuvwxyz"), |
| 132 | + "c3fcd3d76192e4007dfb496cca67e13b" |
| 133 | + ); |
| 134 | + } |
| 135 | + |
| 136 | + #[test] |
| 137 | + fn test_alphanumeric() { |
| 138 | + assert_eq!( |
| 139 | + md5_hex(b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), |
| 140 | + "d174ab98d277d9f5a5611c2c9f419d9f" |
| 141 | + ); |
| 142 | + } |
| 143 | + |
| 144 | + #[test] |
| 145 | + fn test_digits_repeated() { |
| 146 | + assert_eq!( |
| 147 | + md5_hex( |
| 148 | + b"12345678901234567890123456789012345678901234567890123456789012345678901234567890" |
| 149 | + ), |
| 150 | + "57edf4a22be3c955ac49da2e2107b67a" |
| 151 | + ); |
| 152 | + } |
| 153 | + |
| 154 | + #[test] |
| 155 | + fn test_single_char() { |
| 156 | + assert_eq!(md5_hex(b"a"), "0cc175b9c0f1b6a831c399e269772661"); |
| 157 | + } |
| 158 | + |
| 159 | + #[test] |
| 160 | + fn test_returns_16_bytes() { |
| 161 | + assert_eq!(md5(b"hello").len(), 16); |
| 162 | + } |
| 163 | + |
| 164 | + #[test] |
| 165 | + fn test_deterministic() { |
| 166 | + assert_eq!(md5(b"rust"), md5(b"rust")); |
| 167 | + } |
| 168 | + |
| 169 | + #[test] |
| 170 | + fn test_different_inputs_differ() { |
| 171 | + assert_ne!(md5(b"foo"), md5(b"bar")); |
| 172 | + } |
| 173 | +} |
0 commit comments