Skip to content

Commit 1875149

Browse files
committed
Implement CRC SIMD (PCLMULQDQ)
1 parent 6ea787e commit 1875149

13 files changed

Lines changed: 771 additions & 73 deletions

File tree

HashLib.Benchmark/Delphi/PerformanceBenchmarkConsole.dpr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ uses
1919
HlpAdler32 in '..\..\HashLib\src\Checksum\HlpAdler32.pas',
2020
HlpAdler32Dispatch in '..\..\HashLib\src\Checksum\HlpAdler32Dispatch.pas',
2121
HlpCRC in '..\..\HashLib\src\Checksum\HlpCRC.pas',
22+
HlpCRCDispatch in '..\..\HashLib\src\Checksum\HlpCRCDispatch.pas',
23+
HlpGF2 in '..\..\HashLib\src\Checksum\HlpGF2.pas',
2224
HlpCRC16 in '..\..\HashLib\src\Checksum\HlpCRC16.pas',
2325
HlpCRC32 in '..\..\HashLib\src\Checksum\HlpCRC32.pas',
2426
HlpCRC32Fast in '..\..\HashLib\src\Checksum\HlpCRC32Fast.pas',

HashLib.Benchmark/Delphi/PerformanceBenchmarkFMX.dpr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ uses
1818
HlpAdler32 in '..\..\HashLib\src\Checksum\HlpAdler32.pas',
1919
HlpAdler32Dispatch in '..\..\HashLib\src\Checksum\HlpAdler32Dispatch.pas',
2020
HlpCRC in '..\..\HashLib\src\Checksum\HlpCRC.pas',
21+
HlpCRCDispatch in '..\..\HashLib\src\Checksum\HlpCRCDispatch.pas',
22+
HlpGF2 in '..\..\HashLib\src\Checksum\HlpGF2.pas',
2123
HlpCRC16 in '..\..\HashLib\src\Checksum\HlpCRC16.pas',
2224
HlpCRC32 in '..\..\HashLib\src\Checksum\HlpCRC32.pas',
2325
HlpCRC32Fast in '..\..\HashLib\src\Checksum\HlpCRC32Fast.pas',

HashLib.Tests/Delphi.Tests/HashLib.Tests.dpr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ uses
4040
HlpAdler32 in '..\..\HashLib\src\Checksum\HlpAdler32.pas',
4141
HlpAdler32Dispatch in '..\..\HashLib\src\Checksum\HlpAdler32Dispatch.pas',
4242
HlpCRC in '..\..\HashLib\src\Checksum\HlpCRC.pas',
43+
HlpCRCDispatch in '..\..\HashLib\src\Checksum\HlpCRCDispatch.pas',
44+
HlpGF2 in '..\..\HashLib\src\Checksum\HlpGF2.pas',
4345
HlpCRC16 in '..\..\HashLib\src\Checksum\HlpCRC16.pas',
4446
HlpCRC32 in '..\..\HashLib\src\Checksum\HlpCRC32.pas',
4547
HlpCRC32Fast in '..\..\HashLib\src\Checksum\HlpCRC32Fast.pas',

HashLib.Tests/src/CRCTests.pas

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ interface
1515
HlpICRC,
1616
HlpHashFactory,
1717
HlpIHash,
18+
HlpSimd,
1819
HlpConverters;
1920

2021
type
@@ -25,6 +26,7 @@ TTestCRCModel = class(THashLibAlgorithmTestCase)
2526
FCRC: IHash;
2627

2728
protected
29+
procedure Setup; override;
2830
procedure TearDown; override;
2931
published
3032
procedure TestCheckValue;
@@ -65,6 +67,14 @@ implementation
6567

6668
{ TTestCRCModel }
6769

70+
procedure TTestCRCModel.Setup;
71+
begin
72+
inherited;
73+
WriteLn('PCLMULQDQ : ', TSimd.HasPCLMULQDQ());
74+
WriteLn('VPCLMULQDQ : ', TSimd.HasVPCLMULQDQ());
75+
WriteLn('SIMD Level : ', Ord(TSimd.GetActiveLevel()));
76+
end;
77+
6878
procedure TTestCRCModel.TearDown;
6979
begin
7080
FCRC := nil;

HashLib/src/Checksum/HlpCRC.pas

Lines changed: 145 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ interface
1616
HlpIHashInfo,
1717
HlpHashResult,
1818
HlpIHashResult,
19-
HlpICRC;
19+
HlpICRC,
20+
HlpGF2;
2021

2122
resourcestring
2223
SUnSupportedCRCType = 'UnSupported CRC Type: "%s"';
@@ -582,8 +583,10 @@ TCRC = class sealed(THash, IChecksum, ICRC, ITransformBlock)
582583
FPolynomial, FInitialValue, FOutputXor, FCheckValue, FCRCMask,
583584
FCRCHighBitMask, FHash: UInt64;
584585
FIsInputReflected, FIsOutputReflected, FIsTableGenerated: Boolean;
586+
FHasPclmulConstants: Boolean;
585587

586-
FCRCTable: THashLibUInt64Array;
588+
FCRCTable: THashLibMatrixUInt64Array;
589+
FPclmulConstants: TCRCFoldConstants;
587590

588591
const
589592
Delta = Int32(7);
@@ -648,6 +651,9 @@ TCRC = class sealed(THash, IChecksum, ICRC, ITransformBlock)
648651

649652
implementation
650653

654+
uses
655+
HlpCRCDispatch;
656+
651657
{ TCRC }
652658

653659
function TCRC.GetCheckValue: UInt64;
@@ -737,31 +743,90 @@ function TCRC.GetName: String;
737743

738744
procedure TCRC.CalculateCRCbyTable(AData: PByte; ADataLength, AIndex: Int32);
739745
var
740-
LLength, LIndex: Int32;
741-
LTemp: UInt64;
742-
LCRCTable: THashLibUInt64Array;
746+
LLength: Int32;
747+
LTemp, LQWord1, LQWord2, LNewTemp, LTempCopy: UInt64;
748+
LCRCTable: THashLibMatrixUInt64Array;
749+
LPtrData: PByte;
750+
LBIdx, LCrcBytes: Int32;
751+
LByte: Byte;
743752
begin
744753
LLength := ADataLength;
745-
LIndex := AIndex;
754+
LPtrData := AData + AIndex;
746755
LTemp := FHash;
747756
LCRCTable := FCRCTable;
748757

749-
if (IsInputReflected) then
758+
if IsInputReflected then
750759
begin
760+
// Slicing-by-16: process 16 bytes per iteration using UInt64 reads
761+
while LLength >= 16 do
762+
begin
763+
LQWord1 := PUInt64(LPtrData)^ xor LTemp;
764+
LQWord2 := PUInt64(LPtrData + 8)^;
765+
766+
LTemp := LCRCTable[15][Byte(LQWord1)]
767+
xor LCRCTable[14][Byte(LQWord1 shr 8)]
768+
xor LCRCTable[13][Byte(LQWord1 shr 16)]
769+
xor LCRCTable[12][Byte(LQWord1 shr 24)]
770+
xor LCRCTable[11][Byte(LQWord1 shr 32)]
771+
xor LCRCTable[10][Byte(LQWord1 shr 40)]
772+
xor LCRCTable[9][Byte(LQWord1 shr 48)]
773+
xor LCRCTable[8][Byte(LQWord1 shr 56)]
774+
xor LCRCTable[7][Byte(LQWord2)]
775+
xor LCRCTable[6][Byte(LQWord2 shr 8)]
776+
xor LCRCTable[5][Byte(LQWord2 shr 16)]
777+
xor LCRCTable[4][Byte(LQWord2 shr 24)]
778+
xor LCRCTable[3][Byte(LQWord2 shr 32)]
779+
xor LCRCTable[2][Byte(LQWord2 shr 40)]
780+
xor LCRCTable[1][Byte(LQWord2 shr 48)]
781+
xor LCRCTable[0][Byte(LQWord2 shr 56)];
782+
783+
System.Inc(LPtrData, 16);
784+
System.Dec(LLength, 16);
785+
end;
786+
787+
// Remaining 1..15 bytes: byte-at-a-time using row 0
751788
while LLength > 0 do
752789
begin
753-
LTemp := (LTemp shr 8) xor LCRCTable[Byte(LTemp xor AData[LIndex])];
754-
System.Inc(LIndex);
790+
LTemp := (LTemp shr 8) xor LCRCTable[0][Byte(LTemp xor LPtrData^)];
791+
System.Inc(LPtrData);
755792
System.Dec(LLength);
756793
end;
757794
end
758795
else
759796
begin
797+
// Non-reflected: slicing-by-16 with byte reads
798+
LCrcBytes := (Width + 7) shr 3;
799+
800+
while LLength >= 16 do
801+
begin
802+
LNewTemp := UInt64(0);
803+
LTempCopy := LTemp;
804+
805+
LBIdx := 0;
806+
while LBIdx < LCrcBytes do
807+
begin
808+
LByte := LPtrData[LBIdx] xor Byte(LTempCopy shr (Width - 8));
809+
LTempCopy := (LTempCopy shl 8) and FCRCMask;
810+
LNewTemp := LNewTemp xor LCRCTable[15 - LBIdx][LByte];
811+
System.Inc(LBIdx);
812+
end;
813+
while LBIdx < 16 do
814+
begin
815+
LNewTemp := LNewTemp xor LCRCTable[15 - LBIdx][LPtrData[LBIdx]];
816+
System.Inc(LBIdx);
817+
end;
818+
819+
LTemp := LNewTemp;
820+
System.Inc(LPtrData, 16);
821+
System.Dec(LLength, 16);
822+
end;
823+
824+
// Remaining 1..15 bytes: byte-at-a-time using row 0
760825
while LLength > 0 do
761826
begin
762-
LTemp := (LTemp shl 8) xor LCRCTable
763-
[Byte((LTemp shr (Width - 8)) xor AData[LIndex])];
764-
System.Inc(LIndex);
827+
LTemp := (LTemp shl 8) xor LCRCTable[0]
828+
[Byte((LTemp shr (Width - 8)) xor LPtrData^)];
829+
System.Inc(LPtrData);
765830
System.Dec(LLength);
766831
end;
767832
end;
@@ -807,6 +872,7 @@ procedure TCRC.CalculateCRCdirect(AData: PByte; ADataLength, AIndex: Int32);
807872
function TCRC.Clone(): IHash;
808873
var
809874
LHashInstance: TCRC;
875+
LIdx: Int32;
810876
begin
811877
LHashInstance := TCRC.Create(Width, Polynomial, InitialValue,
812878
IsInputReflected, IsOutputReflected, OutputXor, CheckValue,
@@ -815,7 +881,11 @@ function TCRC.Clone(): IHash;
815881
LHashInstance.FCRCHighBitMask := FCRCHighBitMask;
816882
LHashInstance.FHash := FHash;
817883
LHashInstance.FIsTableGenerated := FIsTableGenerated;
818-
LHashInstance.FCRCTable := System.Copy(FCRCTable);
884+
LHashInstance.FHasPclmulConstants := FHasPclmulConstants;
885+
LHashInstance.FPclmulConstants := FPclmulConstants;
886+
System.SetLength(LHashInstance.FCRCTable, System.Length(FCRCTable));
887+
for LIdx := 0 to System.High(FCRCTable) do
888+
LHashInstance.FCRCTable[LIdx] := System.Copy(FCRCTable[LIdx]);
819889
Result := LHashInstance;
820890
Result.BufferSize := BufferSize;
821891
end;
@@ -832,6 +902,7 @@ constructor TCRC.Create(AWidth: Int32; APolynomial, AInitial: UInt64;
832902
end;
833903

834904
FIsTableGenerated := False;
905+
FHasPclmulConstants := False;
835906

836907
inherited Create(-1, -1); // Dummy State
837908

@@ -1340,62 +1411,73 @@ class function TCRC.CreateCRCObject(AValue: TCRCStandard): ICRC;
13401411

13411412
procedure TCRC.GenerateTable;
13421413
var
1343-
LBit, LCRC: UInt64;
1344-
LIdx, LJdx: Int32;
1414+
LCRC: UInt64;
1415+
LIdx, LRow, LBit: Int32;
1416+
LReflectedPoly: UInt64;
13451417
begin
1346-
System.SetLength(FCRCTable, 256);
1347-
LIdx := 0;
1348-
while LIdx < 256 do
1418+
System.SetLength(FCRCTable, 16);
1419+
for LIdx := 0 to 15 do
1420+
System.SetLength(FCRCTable[LIdx], 256);
1421+
1422+
if IsInputReflected then
13491423
begin
1350-
LCRC := UInt64(LIdx);
1351-
if (IsInputReflected) then
1352-
begin
1353-
LCRC := Reflect(LCRC, 8);
1354-
end;
1355-
LCRC := LCRC shl (Width - 8);
1356-
LJdx := 0;
1357-
while LJdx < 8 do
1424+
LReflectedPoly := Reflect(Polynomial, Width);
1425+
for LIdx := 0 to 255 do
13581426
begin
1359-
1360-
LBit := LCRC and FCRCHighBitMask;
1361-
LCRC := LCRC shl 1;
1362-
if (LBit <> 0) then
1363-
LCRC := (LCRC xor Polynomial);
1364-
System.Inc(LJdx);
1427+
LCRC := UInt64(LIdx);
1428+
for LRow := 0 to 15 do
1429+
begin
1430+
for LBit := 0 to 7 do
1431+
LCRC := (LCRC shr 1) xor (UInt64(-Int64(LCRC and 1)) and LReflectedPoly);
1432+
FCRCTable[LRow][LIdx] := LCRC;
1433+
end;
13651434
end;
1366-
1367-
if (IsInputReflected) then
1435+
end
1436+
else
1437+
begin
1438+
for LIdx := 0 to 255 do
13681439
begin
1369-
LCRC := Reflect(LCRC, Width);
1440+
LCRC := UInt64(LIdx) shl (Width - 8);
1441+
for LRow := 0 to 15 do
1442+
begin
1443+
for LBit := 0 to 7 do
1444+
begin
1445+
if (LCRC and FCRCHighBitMask) <> 0 then
1446+
LCRC := ((LCRC shl 1) xor Polynomial) and FCRCMask
1447+
else
1448+
LCRC := (LCRC shl 1) and FCRCMask;
1449+
end;
1450+
FCRCTable[LRow][LIdx] := LCRC;
1451+
end;
13701452
end;
1371-
LCRC := LCRC and FCRCMask;
1372-
FCRCTable[LIdx] := LCRC;
1373-
System.Inc(LIdx);
13741453
end;
13751454

13761455
FIsTableGenerated := True;
13771456
end;
13781457

13791458
procedure TCRC.Initialize;
13801459
begin
1381-
// initialize some bitmasks
13821460
FCRCHighBitMask := UInt64(1) shl (Width - 1);
13831461
FCRCMask := ((FCRCHighBitMask - 1) shl 1) or 1;
13841462
FHash := InitialValue;
13851463

1386-
if (Width > Delta) then // then use table
1464+
if Width > Delta then
13871465
begin
1388-
13891466
if not FIsTableGenerated then
1390-
begin
13911467
GenerateTable();
1468+
1469+
if not FHasPclmulConstants then
1470+
begin
1471+
if (Width >= 8) and (Width <= 32) and IsInputReflected then
1472+
begin
1473+
TGF2.GenerateFoldConstants(Polynomial, Width, True, FPclmulConstants);
1474+
FHasPclmulConstants := True;
1475+
end;
13921476
end;
13931477

1394-
if (IsInputReflected) then
1478+
if IsInputReflected then
13951479
FHash := Reflect(FHash, Width);
1396-
13971480
end;
1398-
13991481
end;
14001482

14011483
class function TCRC.Reflect(AValue: UInt64; AWidth: Int32): UInt64;
@@ -1419,32 +1501,37 @@ class function TCRC.Reflect(AValue: UInt64; AWidth: Int32): UInt64;
14191501
procedure TCRC.TransformBytes(const AData: THashLibByteArray;
14201502
AIndex, ALength: Int32);
14211503
var
1422-
LIdx: Int32;
14231504
LPtrAData: PByte;
1505+
LState: array [0 .. 1] of UInt64;
1506+
LProcessed, LTail: Int32;
14241507
begin
14251508
{$IFDEF DEBUG}
14261509
System.Assert(AIndex >= 0);
14271510
System.Assert(ALength >= 0);
14281511
System.Assert(AIndex + ALength <= System.Length(AData));
14291512
{$ENDIF DEBUG}
14301513

1431-
// table driven CRC reportedly only works for 8, 16, 24, 32 LBits
1432-
// HOWEVER, it seems to work for everything > 7 LBits, so use it
1433-
// accordingly
1434-
1435-
LIdx := AIndex;
1436-
14371514
LPtrAData := PByte(AData);
14381515

1439-
if (Width > Delta) then
1516+
if Width > Delta then
14401517
begin
1441-
CalculateCRCbyTable(LPtrAData, ALength, LIdx);
1518+
if FHasPclmulConstants and IsInputReflected and (ALength >= 64)
1519+
and Assigned(CRC_Fold_Lsb) then
1520+
begin
1521+
LState[0] := FHash;
1522+
LState[1] := 0;
1523+
LProcessed := ALength and (not Int32(15));
1524+
FHash := CRC_Fold_Lsb(LPtrAData + AIndex, UInt32(LProcessed),
1525+
@LState[0], @FPclmulConstants) and FCRCMask;
1526+
LTail := ALength - LProcessed;
1527+
if LTail > 0 then
1528+
CalculateCRCbyTable(LPtrAData, LTail, AIndex + LProcessed);
1529+
end
1530+
else
1531+
CalculateCRCbyTable(LPtrAData, ALength, AIndex);
14421532
end
14431533
else
1444-
begin
1445-
CalculateCRCdirect(LPtrAData, ALength, LIdx);
1446-
end;
1447-
1534+
CalculateCRCdirect(LPtrAData, ALength, AIndex);
14481535
end;
14491536

14501537
function TCRC.TransformFinal: IHashResult;

0 commit comments

Comments
 (0)