Skip to content

Commit d6e7f28

Browse files
committed
SSSE3 (I386) Implementation
1 parent c72c04d commit d6e7f28

8 files changed

Lines changed: 2395 additions & 8 deletions

File tree

HashLib/src/Checksum/HlpAdler32Dispatch.pas

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ procedure Adler32_Update_Scalar(AData: PByte; ALength: UInt32; ASums: Pointer);
6464
end;
6565

6666
// =============================================================================
67-
// SIMD implementations: SSE2 (IA-32); SSE2 / SSSE3 / AVX2 (x86-64)
67+
// SIMD implementations: SSE2 / SSSE3 (IA-32); SSE2 / SSSE3 / AVX2 (x86-64)
6868
// =============================================================================
6969

7070
{$IFDEF HASHLIB_X86_SIMD}
@@ -122,6 +122,12 @@ procedure Adler32_ProcessBlocks_Sse2(AData: PByte; ANumBlocks: UInt32;
122122
{$I ..\Include\Simd\Adler32\Adler32BlocksSse2_i386.inc}
123123
end;
124124

125+
procedure Adler32_ProcessBlocks_Ssse3(AData: PByte; ANumBlocks: UInt32;
126+
ASums, AConstants: Pointer);
127+
{$I ..\Include\Simd\Common\SimdProc4Begin_i386.inc}
128+
{$I ..\Include\Simd\Adler32\Adler32BlocksSsse3_i386.inc}
129+
end;
130+
125131
{$ENDIF HASHLIB_I386_ASM}
126132

127133
{$IFDEF HASHLIB_X86_64_ASM}
@@ -163,6 +169,15 @@ procedure Adler32_Update_Sse2(AData: PByte; ALength: UInt32; ASums: Pointer);
163169
Adler32_Update_Simd(AData, ALength, ASums, @Adler32_ProcessBlocks_Sse2);
164170
end;
165171

172+
{$IFDEF HASHLIB_I386_ASM}
173+
174+
procedure Adler32_Update_Ssse3(AData: PByte; ALength: UInt32; ASums: Pointer);
175+
begin
176+
Adler32_Update_Simd(AData, ALength, ASums, @Adler32_ProcessBlocks_Ssse3);
177+
end;
178+
179+
{$ENDIF HASHLIB_I386_ASM}
180+
166181
{$ENDIF HASHLIB_X86_SIMD}
167182

168183
// =============================================================================
@@ -174,7 +189,11 @@ procedure InitDispatch();
174189
Adler32_Update := @Adler32_Update_Scalar;
175190
{$IFDEF HASHLIB_I386_ASM}
176191
case TSimd.GetActiveLevel() of
177-
TSimdLevel.SSE2, TSimdLevel.SSSE3:
192+
TSimdLevel.SSSE3:
193+
begin
194+
Adler32_Update := @Adler32_Update_Ssse3;
195+
end;
196+
TSimdLevel.SSE2:
178197
begin
179198
Adler32_Update := @Adler32_Update_Sse2;
180199
end;

HashLib/src/Crypto/HlpSHA1Dispatch.pas

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ procedure SHA1_Compress_Scalar(AState, AData: Pointer; ANumBlocks: UInt32);
102102
end;
103103

104104
// =============================================================================
105-
// SIMD implementations: SSE2 (IA-32); ShaNi, SSE2, SSSE3, AVX2 (x86-64)
105+
// SIMD implementations: SSE2 / SSSE3 (IA-32); ShaNi, SSE2, SSSE3, AVX2 (x86-64)
106106
// =============================================================================
107107

108108
{$IFDEF HASHLIB_I386_ASM}
@@ -112,6 +112,17 @@ procedure SHA1_Compress_Sse2(AState, AData: Pointer; ANumBlocks: UInt32);
112112
{$I ..\Include\Simd\SHA1\SHA1CompressSse2_i386.inc}
113113
end;
114114

115+
procedure SHA1_Compress_Ssse3(AState, AData: Pointer; ANumBlocks: UInt32;
116+
AConstants: Pointer);
117+
{$I ..\Include\Simd\Common\SimdProc4Begin_i386.inc}
118+
{$I ..\Include\Simd\SHA1\SHA1CompressSsse3_i386.inc}
119+
end;
120+
121+
procedure SHA1_Compress_Ssse3_Wrap(AState, AData: Pointer; ANumBlocks: UInt32);
122+
begin
123+
SHA1_Compress_Ssse3(AState, AData, ANumBlocks, @K_SHA1);
124+
end;
125+
115126
{$ENDIF HASHLIB_I386_ASM}
116127

117128
{$IFDEF HASHLIB_X86_64_ASM}
@@ -165,7 +176,11 @@ procedure InitDispatch();
165176
SHA1_Compress := @SHA1_Compress_Scalar;
166177
{$IFDEF HASHLIB_I386_ASM}
167178
case TSimd.GetActiveLevel() of
168-
TSimdLevel.SSE2, TSimdLevel.SSSE3:
179+
TSimdLevel.SSSE3:
180+
begin
181+
SHA1_Compress := @SHA1_Compress_Ssse3_Wrap;
182+
end;
183+
TSimdLevel.SSE2:
169184
begin
170185
SHA1_Compress := @SHA1_Compress_Sse2;
171186
end;

HashLib/src/Crypto/HlpSHA2_256Dispatch.pas

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ procedure SHA256_Compress_Scalar(AState, AData: Pointer; ANumBlocks: UInt32);
9797
end;
9898

9999
// =============================================================================
100-
// SIMD implementations: SSE2 (IA-32); ShaNi, SSE2, SSSE3, AVX2 (x86-64)
100+
// SIMD implementations: SSE2 / SSSE3 (IA-32); ShaNi, SSE2, SSSE3, AVX2 (x86-64)
101101
// =============================================================================
102102

103103
{$IFDEF HASHLIB_I386_ASM}
@@ -108,6 +108,12 @@ procedure SHA256_Compress_Sse2(AState, AData: Pointer; ANumBlocks: UInt32;
108108
{$I ..\Include\Simd\SHA256\SHA256CompressSse2_i386.inc}
109109
end;
110110

111+
procedure SHA256_Compress_Ssse3(AState, AData: Pointer; ANumBlocks: UInt32;
112+
AConstants: Pointer);
113+
{$I ..\Include\Simd\Common\SimdProc4Begin_i386.inc}
114+
{$I ..\Include\Simd\SHA256\SHA256CompressSsse3_i386.inc}
115+
end;
116+
111117
{$ENDIF HASHLIB_I386_ASM}
112118

113119
{$IFDEF HASHLIB_X86_64_ASM}
@@ -160,6 +166,15 @@ procedure SHA256_Compress_Sse2_Wrap(AState, AData: Pointer; ANumBlocks: UInt32);
160166
SHA256_Compress_Sse2(AState, AData, ANumBlocks, @K256);
161167
end;
162168

169+
{$IFDEF HASHLIB_I386_ASM}
170+
171+
procedure SHA256_Compress_Ssse3_Wrap(AState, AData: Pointer; ANumBlocks: UInt32);
172+
begin
173+
SHA256_Compress_Ssse3(AState, AData, ANumBlocks, @K256);
174+
end;
175+
176+
{$ENDIF HASHLIB_I386_ASM}
177+
163178
{$ENDIF HASHLIB_X86_SIMD}
164179

165180
// =============================================================================
@@ -171,7 +186,11 @@ procedure InitDispatch();
171186
SHA256_Compress := @SHA256_Compress_Scalar;
172187
{$IFDEF HASHLIB_I386_ASM}
173188
case TSimd.GetActiveLevel() of
174-
TSimdLevel.SSE2, TSimdLevel.SSSE3:
189+
TSimdLevel.SSSE3:
190+
begin
191+
SHA256_Compress := @SHA256_Compress_Ssse3_Wrap;
192+
end;
193+
TSimdLevel.SSE2:
175194
begin
176195
SHA256_Compress := @SHA256_Compress_Sse2_Wrap;
177196
end;

HashLib/src/Crypto/HlpSHA2_512Dispatch.pas

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ procedure SHA512_Compress_Scalar(AState, AData: Pointer; ANumBlocks: UInt32);
121121
end;
122122

123123
// =============================================================================
124-
// SIMD implementations: SSE2 (IA-32); SSE2 / SSSE3 / AVX2 (x86-64)
124+
// SIMD implementations: SSE2 / SSSE3 (IA-32); SSE2 / SSSE3 / AVX2 (x86-64)
125125
// =============================================================================
126126

127127
{$IFDEF HASHLIB_I386_ASM}
@@ -132,6 +132,17 @@ procedure SHA512_Compress_Sse2(AState, AData: Pointer; ANumBlocks: UInt32;
132132
{$I ..\Include\Simd\SHA512\SHA512CompressSse2_i386.inc}
133133
end;
134134

135+
procedure SHA512_Compress_Ssse3(AState, AData: Pointer; ANumBlocks: UInt32;
136+
AConstants: Pointer);
137+
{$I ..\Include\Simd\Common\SimdProc4Begin_i386.inc}
138+
{$I ..\Include\Simd\SHA512\SHA512CompressSsse3_i386.inc}
139+
end;
140+
141+
procedure SHA512_Compress_Ssse3_Wrap(AState, AData: Pointer; ANumBlocks: UInt32);
142+
begin
143+
SHA512_Compress_Ssse3(AState, AData, ANumBlocks, @K512);
144+
end;
145+
135146
{$ENDIF HASHLIB_I386_ASM}
136147

137148
{$IFDEF HASHLIB_X86_64_ASM}
@@ -184,7 +195,11 @@ procedure InitDispatch();
184195
SHA512_Compress := @SHA512_Compress_Scalar;
185196
{$IFDEF HASHLIB_I386_ASM}
186197
case TSimd.GetActiveLevel() of
187-
TSimdLevel.SSE2, TSimdLevel.SSSE3:
198+
TSimdLevel.SSSE3:
199+
begin
200+
SHA512_Compress := @SHA512_Compress_Ssse3_Wrap;
201+
end;
202+
TSimdLevel.SSE2:
188203
begin
189204
SHA512_Compress := @SHA512_Compress_Sse2_Wrap;
190205
end;
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// SSSE3 Adler-32 block processing (IA-32).
2+
// After SimdProc4Begin_i386: ebx = AData, esi = ANumBlocks, edi = ASums, eax = AConstants.
3+
// Constants: [weights_hi: 16B, weights_lo: 16B, ones_16: 16B] (48 bytes; same as x64 SSSE3).
4+
// No xmm8 on IA-32: psadbw uses a copy in xmm5 (first half) or xmm4 (second half), then reload weights.
5+
// Caller applies mod 65521.
6+
7+
push eax
8+
9+
mov ecx, esi
10+
mov eax, dword ptr [edi]
11+
mov esi, dword ptr [edi + 4]
12+
13+
mov edx, ecx
14+
imul edx, eax
15+
movd xmm2, edx
16+
movd xmm1, esi
17+
pxor xmm0, xmm0
18+
pxor xmm3, xmm3
19+
20+
{$IFDEF MSWINDOWS}
21+
sub esp, 32
22+
movdqu oword ptr [esp], xmm6
23+
movdqu oword ptr [esp + $10], xmm7
24+
mov edx, dword ptr [esp + 32]
25+
{$ELSE}
26+
mov edx, dword ptr [esp]
27+
{$ENDIF}
28+
29+
movdqu xmm4, oword ptr [edx]
30+
movdqu xmm5, oword ptr [edx + 16]
31+
movdqu xmm6, oword ptr [edx + 32]
32+
33+
@adler32_ssse3_loop:
34+
paddd xmm2, xmm0
35+
36+
movdqu xmm7, oword ptr [ebx]
37+
movdqa xmm5, xmm7
38+
psadbw xmm5, xmm3
39+
paddd xmm0, xmm5
40+
movdqu xmm4, oword ptr [edx]
41+
movdqu xmm5, oword ptr [edx + 16]
42+
pmaddubsw xmm7, xmm4
43+
pmaddwd xmm7, xmm6
44+
paddd xmm1, xmm7
45+
46+
movdqu xmm7, oword ptr [ebx + 16]
47+
movdqa xmm4, xmm7
48+
psadbw xmm4, xmm3
49+
paddd xmm0, xmm4
50+
movdqu xmm4, oword ptr [edx]
51+
movdqu xmm5, oword ptr [edx + 16]
52+
pmaddubsw xmm7, xmm5
53+
pmaddwd xmm7, xmm6
54+
paddd xmm1, xmm7
55+
56+
add ebx, 32
57+
dec ecx
58+
jnz @adler32_ssse3_loop
59+
60+
pslld xmm2, 5
61+
paddd xmm1, xmm2
62+
63+
pshufd xmm7, xmm0, $B1
64+
paddd xmm0, xmm7
65+
pshufd xmm7, xmm0, $4E
66+
paddd xmm0, xmm7
67+
movd edx, xmm0
68+
add eax, edx
69+
70+
pshufd xmm7, xmm1, $B1
71+
paddd xmm1, xmm7
72+
pshufd xmm7, xmm1, $4E
73+
paddd xmm1, xmm7
74+
movd esi, xmm1
75+
76+
mov dword ptr [edi], eax
77+
mov dword ptr [edi + 4], esi
78+
79+
{$IFDEF MSWINDOWS}
80+
movdqu xmm6, oword ptr [esp]
81+
movdqu xmm7, oword ptr [esp + $10]
82+
add esp, 32
83+
{$ENDIF}
84+
add esp, 4
85+
pop edi
86+
pop esi
87+
pop ebx

0 commit comments

Comments
 (0)