Skip to content

Commit 6eda690

Browse files
committed
harden ProbeHardwareAndCache method on TArmSimdFeatures
1 parent ae1fa64 commit 6eda690

1 file changed

Lines changed: 37 additions & 28 deletions

File tree

HashLib/src/Utils/HlpArmSimdFeatures.pas

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -359,35 +359,44 @@ class function TArmSimdFeatures.CPUHasPMULL(): Boolean;
359359
{ ========================= Probe & Override ================================= }
360360

361361
class procedure TArmSimdFeatures.ProbeHardwareAndCache();
362+
var
363+
LHasNEON, LHasSVE, LHasSVE2, LHasAES: Boolean;
362364
begin
363-
FActiveSimdLevel := TArmSimdLevel.Scalar;
364-
FHasAES := False;
365-
FHasSHA1 := False;
366-
FHasSHA256 := False;
367-
FHasSHA512 := False;
368-
FHasSHA3 := False;
369-
FHasCRC32 := False;
370-
FHasPMULL := False;
371-
372-
if CPUHasNEON() then
373-
begin
374-
FActiveSimdLevel := TArmSimdLevel.NEON;
375-
376-
FHasAES := CPUHasAES();
377-
FHasSHA1 := CPUHasSHA1();
378-
FHasSHA256 := CPUHasSHA256();
379-
FHasSHA512 := CPUHasSHA512();
380-
FHasSHA3 := CPUHasSHA3();
381-
FHasCRC32 := CPUHasCRC32();
382-
FHasPMULL := CPUHasPMULL();
383-
384-
if CPUHasSVE() then
385-
begin
386-
FActiveSimdLevel := TArmSimdLevel.SVE;
387-
if CPUHasSVE2() then
388-
FActiveSimdLevel := TArmSimdLevel.SVE2;
389-
end;
390-
end;
365+
// Probe once, reason later
366+
LHasNEON := CPUHasNEON();
367+
LHasSVE := CPUHasSVE() and LHasNEON; // SVE operates alongside Advanced SIMD (NEON)
368+
LHasSVE2 := CPUHasSVE2() and LHasSVE; // SVE2 is a strict superset of SVE
369+
370+
// Pick the highest tier the CPU can sustain
371+
if LHasSVE2 then
372+
FActiveSimdLevel := TArmSimdLevel.SVE2
373+
else if LHasSVE then
374+
FActiveSimdLevel := TArmSimdLevel.SVE
375+
else if LHasNEON then
376+
FActiveSimdLevel := TArmSimdLevel.NEON
377+
else
378+
FActiveSimdLevel := TArmSimdLevel.Scalar;
379+
380+
// ARMv8 crypto extensions - operate on NEON V registers, so gate on NEON.
381+
// AES and PMULL/PMULL2 share the same FEAT_AES feature bit in the
382+
// ARM Architecture Reference Manual:
383+
// a CPU that reports AES also reports 64-bit polynomial multiply, and vice versa.
384+
// We probe both and require agreement - if they ever disagree (buggy CPUID
385+
// emulation, partial hypervisor masking), we conservatively disable both.
386+
LHasAES := CPUHasAES() and CPUHasPMULL() and LHasNEON;
387+
FHasAES := LHasAES;
388+
FHasPMULL := LHasAES;
389+
390+
// SHA extensions - each is an independent FEAT_SHA* bit, but all require
391+
// NEON register state. SHA2 (SHA256) is architecturally a prerequisite for
392+
// SHA512 and SHA3 on ARMv8, so we chain them defensively.
393+
FHasSHA1 := CPUHasSHA1() and LHasNEON;
394+
FHasSHA256 := CPUHasSHA256() and LHasNEON;
395+
FHasSHA512 := CPUHasSHA512() and LHasNEON and FHasSHA256;
396+
FHasSHA3 := CPUHasSHA3() and LHasNEON and FHasSHA256;
397+
398+
// CRC32 uses general-purpose registers, not NEON - genuinely independent.
399+
FHasCRC32 := CPUHasCRC32();
391400
end;
392401

393402
class procedure TArmSimdFeatures.ApplyBuildOverrides();

0 commit comments

Comments
 (0)