Skip to content

Commit 6c5824a

Browse files
committed
Refactor x86 CPU features
1 parent 4d185a6 commit 6c5824a

4 files changed

Lines changed: 371 additions & 125 deletions

File tree

include/xsimd/config/xsimd_config.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,17 @@
3333
* @defgroup xsimd_config_macro Instruction Set Detection
3434
*/
3535

36+
/**
37+
* @ingroup xsimd_config_macro
38+
*
39+
* Set to 1 if the target is the x86 architecture family.
40+
*/
41+
#if defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
42+
#define XSIMD_TARGET_X86 1
43+
#else
44+
#define XSIMD_TARGET_X86 0
45+
#endif
46+
3647
/**
3748
* @ingroup xsimd_config_macro
3849
*

include/xsimd/config/xsimd_cpuid.hpp

Lines changed: 37 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@
1212
#ifndef XSIMD_CPUID_HPP
1313
#define XSIMD_CPUID_HPP
1414

15-
#include <algorithm>
1615
#include <cstring>
1716

17+
#include "../types/xsimd_all_registers.hpp"
18+
#include "../xsimd_cpu_features_x86.hpp"
19+
#include "xsimd_inline.hpp"
20+
1821
#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
1922
#include <asm/hwcap.h>
2023
#include <sys/auxv.h>
@@ -25,13 +28,6 @@
2528

2629
#endif
2730

28-
#if defined(_MSC_VER)
29-
// Contains the definition of __cpuidex
30-
#include <intrin.h>
31-
#endif
32-
33-
#include "../types/xsimd_all_registers.hpp"
34-
3531
namespace xsimd
3632
{
3733
namespace detail
@@ -126,138 +122,54 @@ namespace xsimd
126122
#endif
127123
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
128124
#endif
129-
130-
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
131-
132-
auto get_xcr0_low = []() noexcept
133-
{
134-
uint32_t xcr0;
135-
136-
#if defined(_MSC_VER) && _MSC_VER >= 1400
137-
138-
xcr0 = (uint32_t)_xgetbv(0);
139-
140-
#elif defined(__GNUC__)
141-
142-
__asm__(
143-
"xorl %%ecx, %%ecx\n"
144-
"xgetbv\n"
145-
: "=a"(xcr0)
146-
:
147-
#if defined(__i386__)
148-
: "ecx", "edx"
149-
#else
150-
: "rcx", "rdx"
151-
#endif
152-
);
153-
154-
#else /* _MSC_VER < 1400 */
155-
#error "_MSC_VER < 1400 is not supported"
156-
#endif /* _MSC_VER && _MSC_VER >= 1400 */
157-
return xcr0;
158-
};
159-
160-
auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
161-
{
162-
163-
#if defined(_MSC_VER)
164-
__cpuidex(reg, level, count);
165-
166-
#elif defined(__INTEL_COMPILER)
167-
__cpuid(reg, level);
168-
169-
#elif defined(__GNUC__) || defined(__clang__)
170-
171-
#if defined(__i386__) && defined(__PIC__)
172-
// %ebx may be the PIC register
173-
__asm__("xchg{l}\t{%%}ebx, %1\n\t"
174-
"cpuid\n\t"
175-
"xchg{l}\t{%%}ebx, %1\n\t"
176-
: "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3])
177-
: "0"(level), "2"(count));
178-
179-
#else
180-
__asm__("cpuid\n\t"
181-
: "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3])
182-
: "0"(level), "2"(count));
183-
#endif
184-
185-
#else
186-
#error "Unsupported configuration"
187125
#endif
188-
};
189-
190-
int regs1[4];
191-
192-
get_cpuid(regs1, 0x1);
193-
194-
// OS can explicitly disable the usage of SSE/AVX extensions
195-
// by setting an appropriate flag in CR0 register
196-
//
197-
// https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
126+
const auto cpuid = xsimd::x86_cpu_id::read();
127+
auto xcr0 = xsimd::x86_xcr0::make_false();
198128

199-
unsigned sse_state_os_enabled = 1;
129+
bool sse_state_os_enabled = true;
200130
// AVX and AVX512 strictly require OSXSAVE to be enabled by the OS.
201131
// If OSXSAVE is disabled (e.g., via bcdedit /set xsavedisable 1),
202132
// AVX state won't be preserved across context switches, so AVX cannot be used.
203-
unsigned avx_state_os_enabled = 0;
204-
unsigned avx512_state_os_enabled = 0;
133+
bool avx_state_os_enabled = false;
134+
bool avx512_state_os_enabled = false;
205135

206-
// OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
207-
// 18] to enable XSETBV/XGETBV instructions to access XCR0 and
208-
// to support processor extended state management using
209-
// XSAVE/XRSTOR.
210-
bool osxsave = regs1[2] >> 27 & 1;
211-
if (osxsave)
136+
if (cpuid.osxsave())
212137
{
138+
xcr0 = xsimd::x86_xcr0::read();
213139

214-
uint32_t xcr0 = get_xcr0_low();
215-
216-
sse_state_os_enabled = xcr0 >> 1 & 1;
217-
avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
218-
avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
140+
sse_state_os_enabled = xcr0.sse_state_os_enabled();
141+
avx_state_os_enabled = xcr0.avx_state_os_enabled();
142+
avx512_state_os_enabled = xcr0.avx512_state_os_enabled();
219143
}
220144

221-
sse2 = regs1[3] >> 26 & sse_state_os_enabled;
222-
sse3 = regs1[2] >> 0 & sse_state_os_enabled;
223-
ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
224-
sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
225-
sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
226-
fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
227-
228-
avx = regs1[2] >> 28 & avx_state_os_enabled;
229-
fma3_avx = avx && fma3_sse42;
230-
231-
int regs8[4];
232-
get_cpuid(regs8, 0x80000001);
233-
fma4 = regs8[2] >> 16 & avx_state_os_enabled;
234-
235-
// sse4a = regs[2] >> 6 & 1;
145+
sse2 = cpuid.sse2() && sse_state_os_enabled;
146+
sse3 = cpuid.sse3() && sse_state_os_enabled;
147+
ssse3 = cpuid.ssse3() && sse_state_os_enabled;
148+
sse4_1 = cpuid.sse4_1() && sse_state_os_enabled;
149+
sse4_2 = cpuid.sse4_2() && sse_state_os_enabled;
150+
fma3_sse42 = cpuid.fma3() && sse_state_os_enabled;
236151

237-
// xop = regs[2] >> 11 & 1;
238-
239-
int regs7[4];
240-
get_cpuid(regs7, 0x7);
241-
avx2 = regs7[1] >> 5 & avx_state_os_enabled;
242-
243-
int regs7a[4];
244-
get_cpuid(regs7a, 0x7, 0x1);
245-
avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
152+
// sse4a not implemented in cpu_id yet
153+
// xop not implemented in cpu_id yet
246154

155+
avx = cpuid.avx() && avx_state_os_enabled;
156+
fma3_avx = avx && fma3_sse42;
157+
fma4 = cpuid.fma4() && avx_state_os_enabled;
158+
avx2 = cpuid.avx2() && avx_state_os_enabled;
159+
avxvnni = cpuid.avxvnni() && avx_state_os_enabled;
247160
fma3_avx2 = avx2 && fma3_sse42;
248161

249-
avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
250-
avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
251-
avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
252-
avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
253-
avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
254-
avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
255-
avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
256-
avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
257-
avx512vbmi2 = regs7[2] >> 6 & avx512_state_os_enabled;
258-
avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
162+
avx512f = cpuid.avx512f() && avx512_state_os_enabled;
163+
avx512cd = cpuid.avx512cd() && avx512_state_os_enabled;
164+
avx512dq = cpuid.avx512dq() && avx512_state_os_enabled;
165+
avx512bw = cpuid.avx512bw() && avx512_state_os_enabled;
166+
avx512er = cpuid.avx512er() && avx512_state_os_enabled;
167+
avx512pf = cpuid.avx512pf() && avx512_state_os_enabled;
168+
avx512ifma = cpuid.avx512ifma() && avx512_state_os_enabled;
169+
avx512vbmi = cpuid.avx512vbmi() && avx512_state_os_enabled;
170+
avx512vbmi2 = cpuid.avx512vbmi2() && avx512_state_os_enabled;
171+
avx512vnni_bw = cpuid.avx512vnni_bw() && avx512_state_os_enabled;
259172
avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw;
260-
#endif
261173
}
262174
};
263175
} // namespace detail

include/xsimd/types/xsimd_all_registers.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
* The full license is in the file LICENSE, distributed with this software. *
1010
****************************************************************************/
1111

12+
#include "../config/xsimd_inline.hpp"
13+
1214
#include "xsimd_fma3_sse_register.hpp"
1315
#include "xsimd_fma4_register.hpp"
1416
#include "xsimd_sse2_register.hpp"

0 commit comments

Comments
 (0)