Skip to content

Commit 913188e

Browse files
authored
Add more x86 cpu feature (#1341)
1 parent b434834 commit 913188e

1 file changed

Lines changed: 78 additions & 1 deletion

File tree

include/xsimd/config/xsimd_cpu_features_x86.hpp

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,16 @@ namespace xsimd
397397
sse4_2 = 20,
398398
/* Population count instruction (POPCNT). */
399399
popcnt = 23,
400+
/* Advanced Encryption Standard instruction set. */
401+
aes_ni = 25,
400402
/* OS has enabled XSAVE/XRSTOR for extended processor state management. */
401403
osxsave = 27,
402404
/* Advanced Vector Extensions (256-bit SIMD). */
403405
avx = 28,
406+
/* Half to single floating point conversion. */
407+
f16c = 29,
408+
/* On-chip random number generator. */
409+
rdrnd = 30,
404410
};
405411
enum class edx
406412
{
@@ -449,6 +455,10 @@ namespace xsimd
449455
avx512f = 16,
450456
/* AVX-512 Doubleword and Quadword instructions. */
451457
avx512dq = 17,
458+
/* Low-level access to the entropy-generating hardware. */
459+
rdseed = 18,
460+
/* Intel arbitrary precision add carry. */
461+
adx = 19,
452462
/* AVX-512 Integer Fused Multiply-Add instructions. */
453463
avx512ifma = 21,
454464
/* AVX-512 Prefetch instructions. */
@@ -457,23 +467,50 @@ namespace xsimd
457467
avx512er = 27,
458468
/* AVX-512 Conflict Detection instructions. */
459469
avx512cd = 28,
470+
/* Sha-1 and Sha-256 extension. */
471+
sha = 29,
460472
/* AVX-512 Byte and Word instructions. */
461473
avx512bw = 30,
474+
/* AVX-512 Vector Length Extensions for xmm and ymm registers. */
475+
avx512vl = 31,
462476
};
463477
enum class ecx
464478
{
465479
/* AVX-512 Vector Bit Manipulation instructions. */
466480
avx512vbmi = 1,
467481
/* AVX-512 Vector Bit Manipulation instructions 2. */
468482
avx512vbmi2 = 6,
483+
/* Galois Field instructions. */
484+
gfni = 8,
485+
/* Vector Advanced Encryption Standard instructions. */
486+
vaes = 9,
487+
/* Carry-less multiplication quadword instruction. */
488+
vpclmulqdq = 10,
469489
/* AVX-512 Vector Neural Network instructions. */
470490
avx512vnni_bw = 11,
491+
/* AVX-512 bit algorithm instructions (BITALG). */
492+
avx512_bitalg = 12,
493+
/* AVX-512 vector population count for doubleword and quadword. */
494+
avx512_vpopcntdq = 14,
495+
};
496+
enum class edx
497+
{
498+
/* AVX-512 4-register neural network instructions (word variable precision). */
499+
avx512_4vnniw = 2,
500+
/* AVX-512 4-register multiply-accumulate single precision. */
501+
avx512_4fmaps = 3,
502+
/* AVX-512 intersect pairs of packed doubleword/quadword integers. */
503+
avx512_vp2intersect = 8,
504+
/* AVX-512 16-bit floating-point instructions. */
505+
avx512_fp16 = 23,
506+
471507
};
472508

473509
using regs_t = detail::x86_cpuid_regs<leaf, subleaf,
474510
detail::x86_reg_id<eax, 0>,
475511
detail::x86_reg_id<ebx, 1>,
476-
detail::x86_reg_id<ecx, 2>>;
512+
detail::x86_reg_id<ecx, 2>,
513+
detail::x86_reg_id<edx, 3>>;
477514
};
478515

479516
/**
@@ -497,6 +534,8 @@ namespace xsimd
497534
{
498535
/* AVX (VEX-encoded) Vector Neural Network instructions. */
499536
avxvnni = 4,
537+
/* AVX-512 BFloat16 instructions. */
538+
avx512_bf16 = 5,
500539
};
501540

502541
using regs_t = detail::x86_cpuid_regs<leaf, subleaf,
@@ -818,16 +857,30 @@ namespace xsimd
818857

819858
inline bool avx() const noexcept { return avx_enabled() && leaf1().all_bits_set<x86_cpuid_leaf1::ecx::avx>(); }
820859

860+
inline bool avx_128() const noexcept { return sse_enabled() && leaf1().all_bits_set<x86_cpuid_leaf1::ecx::avx>(); }
861+
862+
inline bool aes_ni() const noexcept { return sse_enabled() && leaf1().all_bits_set<x86_cpuid_leaf1::ecx::aes_ni>(); }
863+
864+
inline bool f16c() const noexcept { return avx_enabled() && leaf1().all_bits_set<x86_cpuid_leaf1::ecx::f16c>(); }
865+
866+
inline bool rdrnd() const noexcept { return leaf1().all_bits_set<x86_cpuid_leaf1::ecx::rdrnd>(); }
867+
821868
inline bool bmi1() const noexcept { return leaf7().all_bits_set<x86_cpuid_leaf7::ebx::bmi1>(); }
822869

823870
inline bool avx2() const noexcept { return avx_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx2>(); }
824871

872+
inline bool avx2_128() const noexcept { return sse_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx2>(); }
873+
825874
inline bool bmi2() const noexcept { return leaf7().all_bits_set<x86_cpuid_leaf7::ebx::bmi2>(); }
826875

827876
inline bool avx512f() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512f>(); }
828877

829878
inline bool avx512dq() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512dq>(); }
830879

880+
inline bool rdseed() const noexcept { return leaf7().all_bits_set<x86_cpuid_leaf7::ebx::rdseed>(); }
881+
882+
inline bool adx() const noexcept { return leaf7().all_bits_set<x86_cpuid_leaf7::ebx::adx>(); }
883+
831884
inline bool avx512ifma() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512ifma>(); }
832885

833886
inline bool avx512pf() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512pf>(); }
@@ -836,16 +889,40 @@ namespace xsimd
836889

837890
inline bool avx512cd() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512cd>(); }
838891

892+
inline bool sha() const noexcept { return leaf7().all_bits_set<x86_cpuid_leaf7::ebx::sha>(); }
893+
839894
inline bool avx512bw() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512bw>(); }
840895

896+
inline bool avx512vl() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512vl>(); }
897+
841898
inline bool avx512vbmi() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512vbmi>(); }
842899

843900
inline bool avx512vbmi2() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512vbmi2>(); }
844901

902+
inline bool gfni() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::gfni>(); }
903+
904+
inline bool vaes() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::vaes>(); }
905+
906+
inline bool vpclmulqdq() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::vpclmulqdq>(); }
907+
845908
inline bool avx512vnni_bw() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512vnni_bw>(); }
846909

910+
inline bool avx512_bitalg() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512_bitalg>(); }
911+
912+
inline bool avx512_vpopcntdq() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512_vpopcntdq>(); }
913+
914+
inline bool avx512_4vnniw() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::edx::avx512_4vnniw>(); }
915+
916+
inline bool avx512_4fmaps() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::edx::avx512_4fmaps>(); }
917+
918+
inline bool avx512_vp2intersect() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::edx::avx512_vp2intersect>(); }
919+
920+
inline bool avx512_fp16() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::edx::avx512_fp16>(); }
921+
847922
inline bool avxvnni() const noexcept { return avx_enabled() && leaf7sub1().all_bits_set<x86_cpuid_leaf7sub1::eax::avxvnni>(); }
848923

924+
inline bool avx512_bf16() const noexcept { return avx512_enabled() && leaf7sub1().all_bits_set<x86_cpuid_leaf7sub1::eax::avx512_bf16>(); }
925+
849926
inline bool fma4() const noexcept { return avx_enabled() && leaf80000001().all_bits_set<x86_cpuid_leaf80000001::ecx::fma4>(); }
850927
};
851928

0 commit comments

Comments
 (0)