Skip to content

Commit ad8556a

Browse files
committed
Improve x86 cpu feature code clarity
1 parent 363258b commit ad8556a

File tree

3 files changed

+143
-128
lines changed

3 files changed

+143
-128
lines changed

include/xsimd/config/xsimd_cpu_features_x86.hpp

Lines changed: 138 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define XSIMD_CPU_FEATURES_X86_HPP
1414

1515
#include <array>
16+
#include <cassert>
1617
#include <cstdint>
1718

1819
#include "../utils/bits.hpp"
@@ -33,6 +34,104 @@ namespace xsimd
3334
inline xcr0_reg_t get_xcr0_low() noexcept;
3435
}
3536

37+
/**
38+
* CPU Identification (CPUID) instruction results.
39+
*
40+
* The CPUID instruction provides detailed information about the processor,
41+
* including supported instruction set extensions (SSE, AVX, AVX-512, etc.).
42+
* This utility parses CPUID leaf values to detect available CPU features.
43+
*
44+
* @see https://en.wikipedia.org/wiki/CPUID
45+
*/
46+
class x86_cpu_id
47+
{
48+
public:
49+
/** Read the CpuId registers from the CPU if on the correct architecture. */
50+
inline static x86_cpu_id read()
51+
{
52+
cpu_id_regs regs = {};
53+
// TODO(C++20): Use designated initializer
54+
regs.reg1 = detail::get_cpuid(0x1);
55+
regs.reg7 = detail::get_cpuid(0x7);
56+
regs.reg7a = detail::get_cpuid(0x7, 0x1);
57+
regs.reg8 = detail::get_cpuid(0x80000001);
58+
return x86_cpu_id(regs);
59+
}
60+
61+
/** Create a value which return false to everything. */
62+
constexpr x86_cpu_id() noexcept = default;
63+
64+
constexpr bool sse2() const noexcept { return utils::all_bits_set<26>(m_regs.reg1[3]); }
65+
66+
constexpr bool sse3() const noexcept { return utils::all_bits_set<0>(m_regs.reg1[2]); }
67+
68+
constexpr bool ssse3() const noexcept { return utils::all_bits_set<9>(m_regs.reg1[2]); }
69+
70+
constexpr bool sse4_1() const noexcept { return utils::all_bits_set<19>(m_regs.reg1[2]); }
71+
72+
constexpr bool sse4_2() const noexcept { return utils::all_bits_set<20>(m_regs.reg1[2]); }
73+
74+
constexpr bool fma3() const noexcept { return utils::all_bits_set<12>(m_regs.reg1[2]); }
75+
76+
/**
77+
* Indicates whether the OS has enabled extended state management.
78+
*
79+
* When true, the OS has set bit 18 (OSXSAVE) in the CR4 control register,
80+
* enabling the XGETBV/XSETBV instructions to access XCR0 and support
81+
* processor extended state management using XSAVE/XRSTOR.
82+
*
83+
* This value is read from CPUID leaf 0x1, ECX bit 27, which reflects
84+
* the state of CR4.OSXSAVE.
85+
*/
86+
constexpr bool osxsave() const noexcept { return utils::all_bits_set<27>(m_regs.reg1[2]); }
87+
88+
constexpr bool avx() const noexcept { return utils::all_bits_set<28>(m_regs.reg1[2]); }
89+
90+
constexpr bool avx2() const noexcept { return utils::all_bits_set<5>(m_regs.reg7[1]); }
91+
92+
constexpr bool avx512f() const noexcept { return utils::all_bits_set<16>(m_regs.reg7[1]); }
93+
94+
constexpr bool avx512dq() const noexcept { return utils::all_bits_set<17>(m_regs.reg7[1]); }
95+
96+
constexpr bool avx512ifma() const noexcept { return utils::all_bits_set<21>(m_regs.reg7[1]); }
97+
98+
constexpr bool avx512pf() const noexcept { return utils::all_bits_set<26>(m_regs.reg7[1]); }
99+
100+
constexpr bool avx512er() const noexcept { return utils::all_bits_set<27>(m_regs.reg7[1]); }
101+
102+
constexpr bool avx512cd() const noexcept { return utils::all_bits_set<28>(m_regs.reg7[1]); }
103+
104+
constexpr bool avx512bw() const noexcept { return utils::all_bits_set<30>(m_regs.reg7[1]); }
105+
106+
constexpr bool avx512vbmi() const noexcept { return utils::all_bits_set<1>(m_regs.reg7[2]); }
107+
108+
constexpr bool avx512vbmi2() const noexcept { return utils::all_bits_set<6>(m_regs.reg7[2]); }
109+
110+
constexpr bool avx512vnni_bw() const noexcept { return utils::all_bits_set<11>(m_regs.reg7[2]); }
111+
112+
constexpr bool avxvnni() const noexcept { return utils::all_bits_set<4>(m_regs.reg7a[0]); }
113+
114+
constexpr bool fma4() const noexcept { return utils::all_bits_set<16>(m_regs.reg8[2]); }
115+
116+
private:
117+
struct cpu_id_regs
118+
{
119+
using reg_t = detail::cpuid_reg_t;
120+
121+
reg_t reg1 = {};
122+
reg_t reg7 = {};
123+
reg_t reg7a = {};
124+
reg_t reg8 = {};
125+
};
126+
127+
/** Parse CpuInfo register values into individual components. */
128+
constexpr explicit x86_cpu_id(const cpu_id_regs& regs) noexcept
129+
: m_regs(regs)
130+
{
131+
}
132+
cpu_id_regs m_regs = {};
133+
};
134+
36135
/*
37136
* Extended Control Register 0 (XCR0).
38137
*
@@ -56,13 +155,20 @@ namespace xsimd
56155
constexpr static x86_xcr0 safe_default() noexcept
57156
{
58157
reg_t low = {};
59-
low = utils::set_bit<static_cast<reg_t>(bit::sse)>(low);
158+
low = utils::make_bit_mask(static_cast<reg_t>(bit::sse));
60159
return x86_xcr0(low);
61160
}
62161

63-
/** Read the XCR0 register from the CPU if on the correct architecture. */
162+
/**
163+
* Read the XCR0 register from the CPU if on the correct architecture.
164+
*
165+
* This is only safe to call if bit 18 of CR4.OSXSAVE has been set.
166+
*
167+
* @see cpu_id::osxsave
168+
*/
64169
inline static x86_xcr0 read()
65170
{
171+
assert(x86_cpu_id::read().osxsave());
66172
return x86_xcr0(detail::get_xcr0_low());
67173
}
68174

@@ -71,20 +177,20 @@ namespace xsimd
71177

72178
constexpr bool sse_enabled() const noexcept
73179
{
74-
return bit_is_set<bit::sse>(m_low);
180+
return all_bits_set<bit::sse>(m_low);
75181
}
76182

77183
constexpr bool avx_enabled() const noexcept
78184
{
79185
// Check both SSE and AVX bits even though AVX must imply SSE
80-
return bit_is_set<bit::sse, bit::avx>(m_low);
186+
return all_bits_set<bit::sse, bit::avx>(m_low);
81187
}
82188

83189
constexpr bool avx512_enabled() const noexcept
84190
{
85191
// Check all SSE, AVX, and AVX512 bits even though AVX512 must
86192
// imply AVX and SSE
87-
return bit_is_set<bit::sse, bit::avx, bit::zmm_hi256>(m_low);
193+
return all_bits_set<bit::sse, bit::avx, bit::zmm_hi256>(m_low);
88194
}
89195

90196
private:
@@ -108,14 +214,16 @@ namespace xsimd
108214
zmm_hi256 = 6,
109215
/** AVX-512 enabled and XSAVE support for upper ZMM registers. */
110216
hi16_zmm = 7,
217+
/** Saving/restoring Intel Processor Trace state via XSAVE enabled.*/
218+
processor_trace = 8,
111219
/** XSAVE support for PKRU register. */
112220
pkru = 9,
113221
};
114222

115223
template <bit... Bits>
116-
static constexpr bool bit_is_set(reg_t value) noexcept
224+
static constexpr bool all_bits_set(reg_t value) noexcept
117225
{
118-
return utils::bit_is_set<static_cast<reg_t>(Bits)...>(value);
226+
return utils::all_bits_set<static_cast<reg_t>(Bits)...>(value);
119227
}
120228

121229
/** Parse a XCR0 value into individual components. */
@@ -126,122 +234,14 @@ namespace xsimd
126234
reg_t m_low = {};
127235
};
128236

129-
/**
130-
* CPU Identification (CPUID) instruction results.
131-
*
132-
* The CPUID instruction provides detailed information about the processor,
133-
* including supported instruction set extensions (SSE, AVX, AVX-512, etc.).
134-
* This utility parses CPUID leaf values to detect available CPU features.
135-
*
136-
* @see https://en.wikipedia.org/wiki/CPUID
137-
*/
138-
class x86_cpu_id
139-
{
140-
public:
141-
/**
142-
* Read the CpuId registers from the CPU if on the correct architecture.
143-
*
144-
* This is only safe to call if bit 18 of CR4.OSXSAVE has been set.
145-
*
146-
* @see cpu_id::osxsave
147-
*/
148-
inline static x86_cpu_id read()
149-
{
150-
cpu_id_regs regs = {};
151-
// TODO(C++20): Use designated initializer
152-
regs.reg1 = detail::get_cpuid(0x1);
153-
regs.reg7 = detail::get_cpuid(0x7);
154-
regs.reg7a = detail::get_cpuid(0x7, 0x1);
155-
regs.reg8 = detail::get_cpuid(0x80000001);
156-
return x86_cpu_id(regs);
157-
}
158-
159-
/** Create a value which return false to everything. */
160-
constexpr x86_cpu_id() noexcept = default;
161-
162-
constexpr bool sse2() const noexcept { return utils::bit_is_set<26>(m_regs.reg1[3]); }
163-
164-
constexpr bool sse3() const noexcept { return utils::bit_is_set<0>(m_regs.reg1[2]); }
165-
166-
constexpr bool ssse3() const noexcept { return utils::bit_is_set<9>(m_regs.reg1[2]); }
167-
168-
constexpr bool sse4_1() const noexcept { return utils::bit_is_set<19>(m_regs.reg1[2]); }
169-
170-
constexpr bool sse4_2() const noexcept { return utils::bit_is_set<20>(m_regs.reg1[2]); }
171-
172-
constexpr bool fma3() const noexcept { return utils::bit_is_set<12>(m_regs.reg1[2]); }
173-
174-
/**
175-
* Indicates whether the OS has enabled extended state management.
176-
*
177-
* When true, the OS has set bit 18 (OSXSAVE) in the CR4 control register,
178-
* enabling the XGETBV/XSETBV instructions to access XCR0 and support
179-
* processor extended state management using XSAVE/XRSTOR.
180-
*
181-
* This value is read from CPUID leaf 0x1, ECX bit 27, which reflects
182-
* the state of CR4.OSXSAVE.
183-
*/
184-
constexpr bool osxsave() const noexcept { return utils::bit_is_set<27>(m_regs.reg1[2]); }
185-
186-
constexpr bool avx() const noexcept { return utils::bit_is_set<28>(m_regs.reg1[2]); }
187-
188-
constexpr bool avx2() const noexcept { return utils::bit_is_set<5>(m_regs.reg7[1]); }
189-
190-
constexpr bool avx512f() const noexcept { return utils::bit_is_set<16>(m_regs.reg7[1]); }
191-
192-
constexpr bool avx512dq() const noexcept { return utils::bit_is_set<17>(m_regs.reg7[1]); }
193-
194-
constexpr bool avx512ifma() const noexcept { return utils::bit_is_set<21>(m_regs.reg7[1]); }
195-
196-
constexpr bool avx512pf() const noexcept { return utils::bit_is_set<26>(m_regs.reg7[1]); }
197-
198-
constexpr bool avx512er() const noexcept { return utils::bit_is_set<27>(m_regs.reg7[1]); }
199-
200-
constexpr bool avx512cd() const noexcept { return utils::bit_is_set<28>(m_regs.reg7[1]); }
201-
202-
constexpr bool avx512bw() const noexcept { return utils::bit_is_set<30>(m_regs.reg7[1]); }
203-
204-
constexpr bool avx512vbmi() const noexcept { return utils::bit_is_set<1>(m_regs.reg7[2]); }
205-
206-
constexpr bool avx512vbmi2() const noexcept { return utils::bit_is_set<6>(m_regs.reg7[2]); }
207-
208-
constexpr bool avx512vnni_bw() const noexcept { return utils::bit_is_set<11>(m_regs.reg7[2]); }
209-
210-
constexpr bool avxvnni() const noexcept { return utils::bit_is_set<4>(m_regs.reg7a[0]); }
211-
212-
constexpr bool fma4() const noexcept { return utils::bit_is_set<16>(m_regs.reg8[2]); }
213-
214-
private:
215-
struct cpu_id_regs
216-
{
217-
using reg_t = detail::cpuid_reg_t;
218-
219-
reg_t reg1 = {};
220-
reg_t reg7 = {};
221-
reg_t reg7a = {};
222-
reg_t reg8 = {};
223-
};
224-
225-
/** Parse CpuInfo register values into individual components. */
226-
constexpr explicit x86_cpu_id(const cpu_id_regs& regs) noexcept
227-
: m_regs(regs)
228-
{
229-
}
230-
cpu_id_regs m_regs = {};
231-
};
232-
233237
namespace detail
234238
{
239+
#if XSIMD_TARGET_X86
240+
235241
inline cpuid_reg_t get_cpuid(int level, int count) noexcept
236242
{
237243
cpuid_reg_t reg = {};
238-
239-
#if !XSIMD_TARGET_X86
240-
(void)level;
241-
(void)count;
242-
return {}; // All bits to zero
243-
244-
#elif defined(_MSC_VER)
244+
#if defined(_MSC_VER)
245245
__cpuidex(reg.data(), level, count);
246246

247247
#elif defined(__INTEL_COMPILER)
@@ -268,11 +268,12 @@ namespace xsimd
268268

269269
inline xcr0_reg_t get_xcr0_low() noexcept
270270
{
271-
#if !XSIMD_TARGET_X86
272-
return {}; // All bits to zero
273-
274-
#elif defined(_MSC_VER) && _MSC_VER >= 1400
271+
#if defined(_MSC_VER)
272+
#if _MSC_VER >= 1400
275273
return static_cast<xcr0_reg_t>(_xgetbv(0));
274+
#else
275+
#error "_MSC_VER < 1400 is not supported"
276+
#endif
276277

277278
#elif defined(__GNUC__)
278279
xcr0_reg_t xcr0 = {};
@@ -288,11 +289,22 @@ namespace xsimd
288289
#endif
289290
);
290291
return xcr0;
292+
#endif
293+
}
291294

292-
#else /* _MSC_VER < 1400 */
293-
#error "_MSC_VER < 1400 is not supported"
294-
#endif /* _MSC_VER && _MSC_VER >= 1400 */
295-
};
295+
#else // XSIMD_TARGET_X86
296+
297+
inline cpuid_reg_t get_cpuid(int level, int count) noexcept
298+
{
299+
return {}; // All bits to zero
300+
}
301+
302+
inline xcr0_reg_t get_xcr0_low() noexcept
303+
{
304+
return {}; // All bits to zero
305+
}
306+
307+
#endif // XSIMD_TARGET_X86
296308
}
297309
}
298310
#endif

include/xsimd/config/xsimd_cpuid.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ namespace xsimd
115115
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
116116
#endif
117117
#endif
118-
// Safe on all platforms, we simply be false
118+
// Safe on all platforms, it will be all false if non x86.
119119
const auto cpuid = xsimd::x86_cpu_id::read();
120120
const auto xcr0 = cpuid.osxsave() ? x86_xcr0::read() : x86_xcr0::safe_default();
121121

include/xsimd/utils/bits.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
#ifndef XSIMD_CPUID_UTILS_HPP
1313
#define XSIMD_CPUID_UTILS_HPP
1414

15+
#include <cassert>
1516
namespace xsimd
1617
{
1718
namespace utils
1819
{
1920
template <typename I>
2021
constexpr I make_bit_mask(I bit)
2122
{
23+
assert(bit >= 0);
24+
assert(bit < 8 * sizeof(I));
2225
return static_cast<I>(I { 1 } << bit);
2326
}
2427

@@ -30,7 +33,7 @@ namespace xsimd
3033
}
3134

3235
template <int... Bits, typename I>
33-
constexpr bool bit_is_set(I value)
36+
constexpr bool all_bits_set(I value)
3437
{
3538
constexpr I mask = make_bit_mask<I>(static_cast<I>(Bits)...);
3639
return (value & mask) == mask;

0 commit comments

Comments
 (0)