Skip to content

Commit 8462a2a

Browse files
committed
fix CRC SSE2 64bits error in incremental mode
1 parent 1d744a5 commit 8462a2a

3 files changed

Lines changed: 70 additions & 25 deletions

File tree

HashLib/src/Checksum/HlpCRCDispatch.pas

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
CRC32Fast (HlpCRC32Fast.pas): PKZIP/Castagnoli only; FCurrentCRC with not/xor
1010
convention; uses CRCDispatch_UpdateReflectedCrc32 + TCRCFoldRuntimeCtx32.
1111
12-
Keep MSB SIMD kernels aligned: CRCFoldMsbSse2.inc (x64 offsets +224/+228) vs
13-
CRCFoldMsbSse2_i386.inc (+160/+164 for 32-bit TableRow pointers).
12+
TCRCFoldRuntimeCtx64 matches Ctx32 shape: FoldConstants + TableRow only.
13+
MSB fold reads CRC width from FoldConstants.CrcBits (see TGF2.GenerateFoldConstants)
14+
and derives the state mask the same way as TCRC.FCRCMask.
1415
}
1516

1617
{$I ..\Include\HashLib.inc}
@@ -25,12 +26,11 @@ interface
2526
MinSimdBytes = Int32(16);
2627

2728
type
28-
// Runtime context: PCLMUL reads first field only (offset 0).
29+
// Runtime context: PCLMUL reads first field only (offset 0). Same layout as
30+
// TCRCFoldRuntimeCtx32 apart from TableRow pointer size (PUInt64 vs PUInt32).
2931
TCRCFoldRuntimeCtx64 = packed record
3032
FoldConstants: TCRCFoldConstants;
3133
TableRow: array [0 .. 15] of PUInt64;
32-
Width: Int32;
33-
CrcMask: UInt64;
3434
end;
3535

3636
PCRCFoldRuntimeCtx64 = ^TCRCFoldRuntimeCtx64;
@@ -94,6 +94,11 @@ function CrcTableU32(const Row: PUInt32; B: Byte): UInt32; inline;
9494
Result := PUInt32(NativeUInt(Row) + UInt64(B) * SizeOf(UInt32))^;
9595
end;
9696

97+
function CRCMaskFromWidth(AWidth: Int32): UInt64; inline;
98+
begin
99+
Result := ((UInt64(1) shl (AWidth - 1)) - 1) shl 1 or 1;
100+
end;
101+
97102
function CRC_Fold_Lsb_Scalar(AData: PByte; ALength: UInt32;
98103
AState: Pointer; AConstants: Pointer): UInt64;
99104
var
@@ -144,14 +149,18 @@ function CRC_Fold_Msb_Scalar(AData: PByte; ALength: UInt32;
144149
LTemp, LNewTemp, LTempCopy: UInt64;
145150
LPtr: PByte;
146151
LLen: UInt32;
152+
LWidth: Int32;
153+
LCrcMask: UInt64;
147154
LCrcBytes, LBIdx: Int32;
148155
LByte: Byte;
149156
begin
150157
Ctx := PCRCFoldRuntimeCtx64(AConstants);
151158
LPtr := AData;
152159
LLen := ALength;
153160
LTemp := PUInt64(AState)^;
154-
LCrcBytes := (Ctx.Width + 7) shr 3;
161+
LWidth := Int32(Ctx.FoldConstants.CrcBits);
162+
LCrcMask := CRCMaskFromWidth(LWidth);
163+
LCrcBytes := (LWidth + 7) shr 3;
155164

156165
while LLen >= 16 do
157166
begin
@@ -161,8 +170,8 @@ function CRC_Fold_Msb_Scalar(AData: PByte; ALength: UInt32;
161170
LBIdx := 0;
162171
while LBIdx < LCrcBytes do
163172
begin
164-
LByte := LPtr[LBIdx] xor Byte(LTempCopy shr (Ctx.Width - 8));
165-
LTempCopy := (LTempCopy shl 8) and Ctx.CrcMask;
173+
LByte := LPtr[LBIdx] xor Byte(LTempCopy shr (LWidth - 8));
174+
LTempCopy := (LTempCopy shl 8) and LCrcMask;
166175
LNewTemp := LNewTemp xor CrcTableU64(Ctx.TableRow[15 - LBIdx], LByte);
167176
System.Inc(LBIdx);
168177
end;
@@ -295,8 +304,6 @@ procedure CRCDispatch_InitRuntimeCtx64(const Table: THashLibMatrixUInt64Array;
295304
TGF2.GenerateFoldConstants(APoly, AWidth, AReflected, Ctx.FoldConstants);
296305
for I := 0 to 15 do
297306
Ctx.TableRow[I] := PUInt64(@Table[I][0]);
298-
Ctx.Width := AWidth;
299-
Ctx.CrcMask := ((UInt64(1) shl (AWidth - 1)) - 1) shl 1 or 1;
300307
end;
301308

302309
procedure CRCDispatch_InitRuntimeCtx32(const Table: THashLibMatrixUInt32Array;

HashLib/src/Include/Simd/CRC/CRCFoldMsbSse2_i386.inc

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
// MSB-first 64-bit CRC: 16-byte slice (IA-32; SSE2 not required — same as x64 MSB path).
2-
// x64 counterpart: CRCFoldMsbSse2_x86_64.inc (Width/CrcMask at +224/+228).
2+
// x64 counterpart: CRCFoldMsbSse2_x86_64.inc.
33
// After SimdProc4Begin_i386: ebx = AData, esi = ALength, edi = AState, eax = Ctx.
4-
// Offsets (packed TCRCFoldRuntimeCtx64, 32-bit ptrs): TableRow +96, Width +160,
5-
// CrcMask +164 (x86-64 uses +224/+228 because TableRow entries are 8 bytes).
4+
// Ctx: FoldConstants.CrcBits at +80, TableRow at +96. CrcMask derived from width.
65
// Result UInt64 in edx:eax.
76

87
push ebp
@@ -23,20 +22,45 @@
2322

2423
mov dword ptr [esp + 36], esi
2524

26-
mov eax, dword ptr [ebp + 160]
25+
// Width from CrcBits [ebp+80]; build 64-bit mask into [esp+8],[esp+12]
26+
mov eax, dword ptr [ebp + 80]
27+
mov ecx, eax
28+
dec ecx
29+
xor eax, eax
30+
xor edx, edx
31+
cmp ecx, 32
32+
jae @MsbSse2_mask_ge32
33+
mov eax, 1
34+
shl eax, cl
35+
xor edx, edx
36+
jmp @MsbSse2_mask_sub1
37+
@MsbSse2_mask_ge32:
38+
mov esi, ecx
39+
sub esi, 32
40+
xor eax, eax
41+
mov edx, 1
42+
mov ecx, esi
43+
shl edx, cl
44+
@MsbSse2_mask_sub1:
45+
sub eax, 1
46+
sbb edx, 0
47+
shld edx, eax, 1
48+
shl eax, 1
49+
or eax, 1
50+
mov dword ptr [esp + 8], eax
51+
mov dword ptr [esp + 12], edx
52+
53+
mov esi, dword ptr [esp + 36]
54+
55+
mov eax, dword ptr [ebp + 80]
2756
add eax, 7
2857
shr eax, 3
2958
mov dword ptr [esp + 16], eax
3059

31-
mov eax, dword ptr [ebp + 160]
60+
mov eax, dword ptr [ebp + 80]
3261
sub eax, 8
3362
mov dword ptr [esp + 20], eax
3463

35-
mov eax, dword ptr [ebp + 164]
36-
mov dword ptr [esp + 8], eax
37-
mov eax, dword ptr [ebp + 168]
38-
mov dword ptr [esp + 12], eax
39-
4064
mov eax, dword ptr [edi]
4165
mov edx, dword ptr [edi + 4]
4266
mov dword ptr [esp], eax

HashLib/src/Include/Simd/CRC/CRCFoldMsbSse2_x86_64.inc

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
// MSB-first 64-bit CRC: 16-byte slice (slicing-by-16) using SSE2 for load only.
2-
// IA-32 counterpart: CRCFoldMsbSse2_i386.inc (Width/CrcMask at +160/+164).
2+
// IA-32 counterpart: CRCFoldMsbSse2_i386.inc.
33
// Signature (after SimdProc4Begin_x86_64.inc):
44
// function CRC_Fold_Msb_Sse2(AData: PByte; ALength: UInt32;
55
// AState: Pointer; AConstants: Pointer): UInt64;
66
// MS x64: rcx=AData, edx=ALength, r8=AState, r9=Ctx.
7-
// Offsets in PCRCFoldRuntimeCtx64 (packed): TableRow at +96, Width at +224,
8-
// CrcMask at +228 (verify with SizeOf if compiler padding differs).
7+
// Callee-saved: rsi/rdi must be preserved (used as LTempCopy / LBIdx scratch).
8+
// Ctx = PCRCFoldRuntimeCtx64: FoldConstants.CrcBits at +80, TableRow at +96.
9+
// CrcMask = ((1 shl (W-1)) - 1) shl 1 or 1; W = dword [r9+80] (TCRC table path: W>=8).
910

1011
push rbx
1112
push rbp
13+
push rsi
14+
push rdi
1215
push r12
1316
push r13
1417
push r14
@@ -21,8 +24,17 @@
2124
cmp edx, 16
2225
jb @MsbSse2_done
2326

24-
mov r14d, dword ptr [r9 + 224] // Width
25-
mov r12, qword ptr [r9 + 228] // CrcMask
27+
mov r14d, dword ptr [r9 + 80] // Width from FoldConstants.CrcBits
28+
29+
// r12 = CRCMaskFromWidth(r14d)
30+
mov ecx, r14d
31+
dec ecx
32+
mov rax, 1
33+
shl rax, cl
34+
sub rax, 1
35+
shl rax, 1
36+
or rax, 1
37+
mov r12, rax
2638

2739
mov eax, r14d
2840
add eax, 7
@@ -84,5 +96,7 @@
8496
pop r14
8597
pop r13
8698
pop r12
99+
pop rdi
100+
pop rsi
87101
pop rbp
88102
pop rbx

0 commit comments

Comments
 (0)