Skip to content

Commit 72532bd

Browse files
mvogttechclaude
andcommitted
fix: add SHA-1 portable fallback and comprehensive test coverage
SHA-NI intrinsics produce wrong results under MSVC 2022 /arch:AVX2 (ABCD state register corrupted, only E word correct). Added portable C SHA-1 (RFC 3174) with runtime validation — hashes "abc" against the FIPS 180-1 vector on first call and falls back if SHA-NI is broken. Tests expanded from 89 to 157 assertions covering sha1(), findHeader(), mask/unmask alignment offsets, SIMD boundary transitions, NT-store threshold, and base64/utf8 edge cases. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1afec15 commit 72532bd

2 files changed

Lines changed: 548 additions & 3 deletions

File tree

src/ws_sha1_ni.c

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
/* ws_sha1_ni.c — SHA-1 with Intel SHA-NI
1+
/* ws_sha1_ni.c — SHA-1 with Intel SHA-NI + portable C fallback
22
* Re-derived from Intel whitepaper, counting exactly 20 sha1rnds4 calls.
33
* Pattern: E0/E1 ping-pong, msg schedule runs from rounds 4-55.
4+
*
5+
* The portable fallback is used when SHA-NI is unavailable or when MSVC
6+
* generates broken code for the SHA-NI intrinsics (validated at runtime).
47
*/
58
#include <stdint.h>
69
#include <string.h>
7-
#include <immintrin.h>
10+
11+
/* ── CPU feature detection ─────────────────────────────────────────── */
812

913
#ifdef _MSC_VER
14+
#include <intrin.h>
1015
int ws_has_sha_ni(void) {
1116
int info[4];
1217
__cpuidex(info, 7, 0);
@@ -21,6 +26,58 @@ int ws_has_sha_ni(void) {
2126
}
2227
#endif
2328

29+
/* ── Portable SHA-1 (RFC 3174) ─────────────────────────────────────── */
30+
31+
static uint32_t rol32(uint32_t x, int n) { return (x << n) | (x >> (32-n)); }
32+
33+
static void sha1_portable_block(uint32_t state[5], const uint8_t data[64]) {
34+
uint32_t W[80];
35+
for (int i = 0; i < 16; i++)
36+
W[i] = ((uint32_t)data[i*4]<<24) | ((uint32_t)data[i*4+1]<<16) |
37+
((uint32_t)data[i*4+2]<<8) | data[i*4+3];
38+
for (int i = 16; i < 80; i++)
39+
W[i] = rol32(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
40+
41+
uint32_t a=state[0], b=state[1], c=state[2], d=state[3], e=state[4];
42+
for (int i = 0; i < 80; i++) {
43+
uint32_t f, k;
44+
if (i < 20) { f = (b&c) ^ (~b&d); k = 0x5A827999; }
45+
else if (i < 40) { f = b^c^d; k = 0x6ED9EBA1; }
46+
else if (i < 60) { f = (b&c) ^ (b&d) ^ (c&d); k = 0x8F1BBCDC; }
47+
else { f = b^c^d; k = 0xCA62C1D6; }
48+
uint32_t t = rol32(a,5) + f + e + k + W[i];
49+
e=d; d=c; c=rol32(b,30); b=a; a=t;
50+
}
51+
state[0]+=a; state[1]+=b; state[2]+=c; state[3]+=d; state[4]+=e;
52+
}
53+
54+
static void sha1_portable(const uint8_t *msg, size_t len, uint8_t out[20]) {
55+
uint8_t padded[128];
56+
memset(padded, 0, len <= 55 ? 64 : 128);
57+
if (len) memcpy(padded, msg, len);
58+
padded[len] = 0x80;
59+
int nblocks;
60+
if (len <= 55) {
61+
nblocks = 1;
62+
uint64_t bits = (uint64_t)len << 3;
63+
for (int i = 0; i < 8; i++) padded[63-i] = (uint8_t)(bits >> (i*8));
64+
} else {
65+
nblocks = 2;
66+
uint64_t bits = (uint64_t)len << 3;
67+
for (int i = 0; i < 8; i++) padded[127-i] = (uint8_t)(bits >> (i*8));
68+
}
69+
uint32_t state[5] = {0x67452301,0xEFCDAB89,0x98BADCFE,0x10325476,0xC3D2E1F0};
70+
for (int b = 0; b < nblocks; b++) sha1_portable_block(state, padded + b*64);
71+
for (int i = 0; i < 5; i++) {
72+
out[i*4+0]=(uint8_t)(state[i]>>24); out[i*4+1]=(uint8_t)(state[i]>>16);
73+
out[i*4+2]=(uint8_t)(state[i]>>8); out[i*4+3]=(uint8_t)(state[i]);
74+
}
75+
}
76+
77+
/* ── SHA-NI accelerated path ───────────────────────────────────────── */
78+
79+
#include <immintrin.h>
80+
2481
static void sha1_ni_block(uint32_t state[5], const uint8_t data[64]) {
2582
__m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
2683
__m128i MSG0, MSG1, MSG2, MSG3;
@@ -89,7 +146,7 @@ static void sha1_ni_block(uint32_t state[5], const uint8_t data[64]) {
89146
state[4] = (uint32_t)_mm_extract_epi32(E0, 3);
90147
}
91148

92-
void ws_sha1_ni(const uint8_t *msg, size_t len, uint8_t out[20]) {
149+
static void sha1_ni(const uint8_t *msg, size_t len, uint8_t out[20]) {
93150
uint8_t padded[128];
94151
memset(padded, 0, len <= 55 ? 64 : 128);
95152
if (len) memcpy(padded, msg, len);
@@ -111,3 +168,29 @@ void ws_sha1_ni(const uint8_t *msg, size_t len, uint8_t out[20]) {
111168
out[i*4+2]=(uint8_t)(state[i]>>8); out[i*4+3]=(uint8_t)(state[i]);
112169
}
113170
}
171+
172+
/* ── Runtime dispatch: validate SHA-NI, fall back to portable ──────── */
173+
174+
/* SHA-1("abc") = a9993e36 4706816a ba3e2571 7850c26c 9cd0d89d */
175+
static const uint8_t sha1_abc_expected[20] = {
176+
0xa9,0x99,0x3e,0x36,0x47,0x06,0x81,0x6a,0xba,0x3e,
177+
0x25,0x71,0x78,0x50,0xc2,0x6c,0x9c,0xd0,0xd8,0x9d
178+
};
179+
180+
static int sha_ni_ok = -1; /* -1 = untested, 0 = broken, 1 = working */
181+
182+
void ws_sha1_ni(const uint8_t *msg, size_t len, uint8_t out[20]) {
183+
if (sha_ni_ok == -1) {
184+
if (ws_has_sha_ni()) {
185+
uint8_t test[20];
186+
sha1_ni((const uint8_t*)"abc", 3, test);
187+
sha_ni_ok = (memcmp(test, sha1_abc_expected, 20) == 0);
188+
} else {
189+
sha_ni_ok = 0;
190+
}
191+
}
192+
if (sha_ni_ok)
193+
sha1_ni(msg, len, out);
194+
else
195+
sha1_portable(msg, len, out);
196+
}

0 commit comments

Comments
 (0)