1- /* ws_sha1_ni.c — SHA-1 with Intel SHA-NI
1+ /* ws_sha1_ni.c — SHA-1 with Intel SHA-NI + portable C fallback
22 * Re-derived from Intel whitepaper, counting exactly 20 sha1rnds4 calls.
33 * Pattern: E0/E1 ping-pong, msg schedule runs from rounds 4-55.
4+ *
5+ * The portable fallback is used when SHA-NI is unavailable or when MSVC
6+ * generates broken code for the SHA-NI intrinsics (validated at runtime).
47 */
58#include <stdint.h>
69#include <string.h>
7- #include <immintrin.h>
10+
11+ /* ── CPU feature detection ─────────────────────────────────────────── */
812
913#ifdef _MSC_VER
14+ #include <intrin.h>
1015int ws_has_sha_ni (void ) {
1116 int info [4 ];
1217 __cpuidex (info , 7 , 0 );
@@ -21,6 +26,58 @@ int ws_has_sha_ni(void) {
2126}
2227#endif
2328
29+ /* ── Portable SHA-1 (RFC 3174) ─────────────────────────────────────── */
30+
31+ static uint32_t rol32 (uint32_t x , int n ) { return (x << n ) | (x >> (32 - n )); }
32+
33+ static void sha1_portable_block (uint32_t state [5 ], const uint8_t data [64 ]) {
34+ uint32_t W [80 ];
35+ for (int i = 0 ; i < 16 ; i ++ )
36+ W [i ] = ((uint32_t )data [i * 4 ]<<24 ) | ((uint32_t )data [i * 4 + 1 ]<<16 ) |
37+ ((uint32_t )data [i * 4 + 2 ]<<8 ) | data [i * 4 + 3 ];
38+ for (int i = 16 ; i < 80 ; i ++ )
39+ W [i ] = rol32 (W [i - 3 ] ^ W [i - 8 ] ^ W [i - 14 ] ^ W [i - 16 ], 1 );
40+
41+ uint32_t a = state [0 ], b = state [1 ], c = state [2 ], d = state [3 ], e = state [4 ];
42+ for (int i = 0 ; i < 80 ; i ++ ) {
43+ uint32_t f , k ;
44+ if (i < 20 ) { f = (b & c ) ^ (~b & d ); k = 0x5A827999 ; }
45+ else if (i < 40 ) { f = b ^c ^d ; k = 0x6ED9EBA1 ; }
46+ else if (i < 60 ) { f = (b & c ) ^ (b & d ) ^ (c & d ); k = 0x8F1BBCDC ; }
47+ else { f = b ^c ^d ; k = 0xCA62C1D6 ; }
48+ uint32_t t = rol32 (a ,5 ) + f + e + k + W [i ];
49+ e = d ; d = c ; c = rol32 (b ,30 ); b = a ; a = t ;
50+ }
51+ state [0 ]+= a ; state [1 ]+= b ; state [2 ]+= c ; state [3 ]+= d ; state [4 ]+= e ;
52+ }
53+
54+ static void sha1_portable (const uint8_t * msg , size_t len , uint8_t out [20 ]) {
55+ uint8_t padded [128 ];
56+ memset (padded , 0 , len <= 55 ? 64 : 128 );
57+ if (len ) memcpy (padded , msg , len );
58+ padded [len ] = 0x80 ;
59+ int nblocks ;
60+ if (len <= 55 ) {
61+ nblocks = 1 ;
62+ uint64_t bits = (uint64_t )len << 3 ;
63+ for (int i = 0 ; i < 8 ; i ++ ) padded [63 - i ] = (uint8_t )(bits >> (i * 8 ));
64+ } else {
65+ nblocks = 2 ;
66+ uint64_t bits = (uint64_t )len << 3 ;
67+ for (int i = 0 ; i < 8 ; i ++ ) padded [127 - i ] = (uint8_t )(bits >> (i * 8 ));
68+ }
69+ uint32_t state [5 ] = {0x67452301 ,0xEFCDAB89 ,0x98BADCFE ,0x10325476 ,0xC3D2E1F0 };
70+ for (int b = 0 ; b < nblocks ; b ++ ) sha1_portable_block (state , padded + b * 64 );
71+ for (int i = 0 ; i < 5 ; i ++ ) {
72+ out [i * 4 + 0 ]= (uint8_t )(state [i ]>>24 ); out [i * 4 + 1 ]= (uint8_t )(state [i ]>>16 );
73+ out [i * 4 + 2 ]= (uint8_t )(state [i ]>>8 ); out [i * 4 + 3 ]= (uint8_t )(state [i ]);
74+ }
75+ }
76+
77+ /* ── SHA-NI accelerated path ───────────────────────────────────────── */
78+
79+ #include <immintrin.h>
80+
2481static void sha1_ni_block (uint32_t state [5 ], const uint8_t data [64 ]) {
2582 __m128i ABCD , ABCD_SAVE , E0 , E0_SAVE , E1 ;
2683 __m128i MSG0 , MSG1 , MSG2 , MSG3 ;
@@ -89,7 +146,7 @@ static void sha1_ni_block(uint32_t state[5], const uint8_t data[64]) {
89146 state [4 ] = (uint32_t )_mm_extract_epi32 (E0 , 3 );
90147}
91148
92- void ws_sha1_ni (const uint8_t * msg , size_t len , uint8_t out [20 ]) {
149+ static void sha1_ni (const uint8_t * msg , size_t len , uint8_t out [20 ]) {
93150 uint8_t padded [128 ];
94151 memset (padded , 0 , len <= 55 ? 64 : 128 );
95152 if (len ) memcpy (padded , msg , len );
@@ -111,3 +168,29 @@ void ws_sha1_ni(const uint8_t *msg, size_t len, uint8_t out[20]) {
111168 out [i * 4 + 2 ]= (uint8_t )(state [i ]>>8 ); out [i * 4 + 3 ]= (uint8_t )(state [i ]);
112169 }
113170}
171+
172+ /* ── Runtime dispatch: validate SHA-NI, fall back to portable ──────── */
173+
174+ /* SHA-1("abc") = a9993e36 4706816a ba3e2571 7850c26c 9cd0d89d */
175+ static const uint8_t sha1_abc_expected [20 ] = {
176+ 0xa9 ,0x99 ,0x3e ,0x36 ,0x47 ,0x06 ,0x81 ,0x6a ,0xba ,0x3e ,
177+ 0x25 ,0x71 ,0x78 ,0x50 ,0xc2 ,0x6c ,0x9c ,0xd0 ,0xd8 ,0x9d
178+ };
179+
180+ static int sha_ni_ok = -1 ; /* -1 = untested, 0 = broken, 1 = working */
181+
182+ void ws_sha1_ni (const uint8_t * msg , size_t len , uint8_t out [20 ]) {
183+ if (sha_ni_ok == -1 ) {
184+ if (ws_has_sha_ni ()) {
185+ uint8_t test [20 ];
186+ sha1_ni ((const uint8_t * )"abc" , 3 , test );
187+ sha_ni_ok = (memcmp (test , sha1_abc_expected , 20 ) == 0 );
188+ } else {
189+ sha_ni_ok = 0 ;
190+ }
191+ }
192+ if (sha_ni_ok )
193+ sha1_ni (msg , len , out );
194+ else
195+ sha1_portable (msg , len , out );
196+ }
0 commit comments