1313#define XSIMD_CPU_FEATURES_X86_HPP
1414
1515#include < array>
16+ #include < cassert>
1617#include < cstdint>
1718
1819#include " ../utils/bits.hpp"
@@ -33,6 +34,104 @@ namespace xsimd
3334 inline xcr0_reg_t get_xcr0_low () noexcept ;
3435 }
3536
37+ /* *
38+ * CPU Identification (CPUID) instruction results.
39+ *
40+ * The CPUID instruction provides detailed information about the processor,
41+ * including supported instruction set extensions (SSE, AVX, AVX-512, etc.).
42+ * This utility parses CPUID leaf values to detect available CPU features.
43+ *
44+ * @see https://en.wikipedia.org/wiki/CPUID
45+ */
46+ class x86_cpu_id
47+ {
48+ public:
49+ /* * Read the CpuId registers from the CPU if on the correct architecture. */
50+ inline static x86_cpu_id read ()
51+ {
52+ cpu_id_regs regs = {};
53+ // TODO(C++20): Use designated initializer
54+ regs.reg1 = detail::get_cpuid (0x1 );
55+ regs.reg7 = detail::get_cpuid (0x7 );
56+ regs.reg7a = detail::get_cpuid (0x7 , 0x1 );
57+ regs.reg8 = detail::get_cpuid (0x80000001 );
58+ return x86_cpu_id (regs);
59+ }
60+
61+ /* * Create a value which return false to everything. */
62+ constexpr x86_cpu_id () noexcept = default;
63+
64+ constexpr bool sse2 () const noexcept { return utils::all_bits_set<26 >(m_regs.reg1 [3 ]); }
65+
66+ constexpr bool sse3 () const noexcept { return utils::all_bits_set<0 >(m_regs.reg1 [2 ]); }
67+
68+ constexpr bool ssse3 () const noexcept { return utils::all_bits_set<9 >(m_regs.reg1 [2 ]); }
69+
70+ constexpr bool sse4_1 () const noexcept { return utils::all_bits_set<19 >(m_regs.reg1 [2 ]); }
71+
72+ constexpr bool sse4_2 () const noexcept { return utils::all_bits_set<20 >(m_regs.reg1 [2 ]); }
73+
74+ constexpr bool fma3 () const noexcept { return utils::all_bits_set<12 >(m_regs.reg1 [2 ]); }
75+
76+ /* *
77+ * Indicates whether the OS has enabled extended state management.
78+ *
79+ * When true, the OS has set bit 18 (OSXSAVE) in the CR4 control register,
80+ * enabling the XGETBV/XSETBV instructions to access XCR0 and support
81+ * processor extended state management using XSAVE/XRSTOR.
82+ *
83+ * This value is read from CPUID leaf 0x1, ECX bit 27, which reflects
84+ * the state of CR4.OSXSAVE.
85+ */
86+ constexpr bool osxsave () const noexcept { return utils::all_bits_set<27 >(m_regs.reg1 [2 ]); }
87+
88+ constexpr bool avx () const noexcept { return utils::all_bits_set<28 >(m_regs.reg1 [2 ]); }
89+
90+ constexpr bool avx2 () const noexcept { return utils::all_bits_set<5 >(m_regs.reg7 [1 ]); }
91+
92+ constexpr bool avx512f () const noexcept { return utils::all_bits_set<16 >(m_regs.reg7 [1 ]); }
93+
94+ constexpr bool avx512dq () const noexcept { return utils::all_bits_set<17 >(m_regs.reg7 [1 ]); }
95+
96+ constexpr bool avx512ifma () const noexcept { return utils::all_bits_set<21 >(m_regs.reg7 [1 ]); }
97+
98+ constexpr bool avx512pf () const noexcept { return utils::all_bits_set<26 >(m_regs.reg7 [1 ]); }
99+
100+ constexpr bool avx512er () const noexcept { return utils::all_bits_set<27 >(m_regs.reg7 [1 ]); }
101+
102+ constexpr bool avx512cd () const noexcept { return utils::all_bits_set<28 >(m_regs.reg7 [1 ]); }
103+
104+ constexpr bool avx512bw () const noexcept { return utils::all_bits_set<30 >(m_regs.reg7 [1 ]); }
105+
106+ constexpr bool avx512vbmi () const noexcept { return utils::all_bits_set<1 >(m_regs.reg7 [2 ]); }
107+
108+ constexpr bool avx512vbmi2 () const noexcept { return utils::all_bits_set<6 >(m_regs.reg7 [2 ]); }
109+
110+ constexpr bool avx512vnni_bw () const noexcept { return utils::all_bits_set<11 >(m_regs.reg7 [2 ]); }
111+
112+ constexpr bool avxvnni () const noexcept { return utils::all_bits_set<4 >(m_regs.reg7a [0 ]); }
113+
114+ constexpr bool fma4 () const noexcept { return utils::all_bits_set<16 >(m_regs.reg8 [2 ]); }
115+
116+ private:
117+ struct cpu_id_regs
118+ {
119+ using reg_t = detail::cpuid_reg_t ;
120+
121+ reg_t reg1 = {};
122+ reg_t reg7 = {};
123+ reg_t reg7a = {};
124+ reg_t reg8 = {};
125+ };
126+
127+ /* * Parse CpuInfo register values into individual components. */
128+ constexpr explicit x86_cpu_id (const cpu_id_regs& regs) noexcept
129+ : m_regs(regs)
130+ {
131+ }
132+ cpu_id_regs m_regs = {};
133+ };
134+
36135 /*
37136 * Extended Control Register 0 (XCR0).
38137 *
@@ -56,13 +155,20 @@ namespace xsimd
56155 constexpr static x86_xcr0 safe_default () noexcept
57156 {
58157 reg_t low = {};
59- low = utils::set_bit< static_cast <reg_t >(bit::sse)>(low );
158+ low = utils::make_bit_mask ( static_cast <reg_t >(bit::sse));
60159 return x86_xcr0 (low);
61160 }
62161
63- /* * Read the XCR0 register from the CPU if on the correct architecture. */
162+ /* *
163+ * Read the XCR0 register from the CPU if on the correct architecture.
164+ *
165+ * This is only safe to call if bit 18 of CR4.OSXSAVE has been set.
166+ *
167+ * @see cpu_id::osxsave
168+ */
64169 inline static x86_xcr0 read ()
65170 {
171+ assert (x86_cpu_id::read ().osxsave ());
66172 return x86_xcr0 (detail::get_xcr0_low ());
67173 }
68174
@@ -71,20 +177,20 @@ namespace xsimd
71177
72178 constexpr bool sse_enabled () const noexcept
73179 {
74- return bit_is_set <bit::sse>(m_low);
180+ return all_bits_set <bit::sse>(m_low);
75181 }
76182
77183 constexpr bool avx_enabled () const noexcept
78184 {
79185 // Check both SSE and AVX bits even though AVX must imply SSE
80- return bit_is_set <bit::sse, bit::avx>(m_low);
186+ return all_bits_set <bit::sse, bit::avx>(m_low);
81187 }
82188
83189 constexpr bool avx512_enabled () const noexcept
84190 {
85191 // Check all SSE, AVX, and AVX512 bits even though AVX512 must
86192 // imply AVX and SSE
87- return bit_is_set <bit::sse, bit::avx, bit::zmm_hi256>(m_low);
193+ return all_bits_set <bit::sse, bit::avx, bit::zmm_hi256>(m_low);
88194 }
89195
90196 private:
@@ -108,14 +214,16 @@ namespace xsimd
108214 zmm_hi256 = 6 ,
109215 /* * AVX-512 enabled and XSAVE support for upper ZMM registers. */
110216 hi16_zmm = 7 ,
217+ /* * Saving/restoring Intel Processor Trace state via XSAVE enabled.*/
218+ processor_trace = 8 ,
111219 /* * XSAVE support for PKRU register. */
112220 pkru = 9 ,
113221 };
114222
115223 template <bit... Bits>
116- static constexpr bool bit_is_set (reg_t value) noexcept
224+ static constexpr bool all_bits_set (reg_t value) noexcept
117225 {
118- return utils::bit_is_set <static_cast <reg_t >(Bits)...>(value);
226+ return utils::all_bits_set <static_cast <reg_t >(Bits)...>(value);
119227 }
120228
121229 /* * Parse a XCR0 value into individual components. */
@@ -126,122 +234,14 @@ namespace xsimd
126234 reg_t m_low = {};
127235 };
128236
129- /* *
130- * CPU Identification (CPUID) instruction results.
131- *
132- * The CPUID instruction provides detailed information about the processor,
133- * including supported instruction set extensions (SSE, AVX, AVX-512, etc.).
134- * This utility parses CPUID leaf values to detect available CPU features.
135- *
136- * @see https://en.wikipedia.org/wiki/CPUID
137- */
138- class x86_cpu_id
139- {
140- public:
141- /* *
142- * Read the CpuId registers from the CPU if on the correct architecture.
143- *
144- * This is only safe to call if bit 18 of CR4.OSXSAVE has been set.
145- *
146- * @see cpu_id::osxsave
147- */
148- inline static x86_cpu_id read ()
149- {
150- cpu_id_regs regs = {};
151- // TODO(C++20): Use designated initializer
152- regs.reg1 = detail::get_cpuid (0x1 );
153- regs.reg7 = detail::get_cpuid (0x7 );
154- regs.reg7a = detail::get_cpuid (0x7 , 0x1 );
155- regs.reg8 = detail::get_cpuid (0x80000001 );
156- return x86_cpu_id (regs);
157- }
158-
159- /* * Create a value which return false to everything. */
160- constexpr x86_cpu_id () noexcept = default;
161-
162- constexpr bool sse2 () const noexcept { return utils::bit_is_set<26 >(m_regs.reg1 [3 ]); }
163-
164- constexpr bool sse3 () const noexcept { return utils::bit_is_set<0 >(m_regs.reg1 [2 ]); }
165-
166- constexpr bool ssse3 () const noexcept { return utils::bit_is_set<9 >(m_regs.reg1 [2 ]); }
167-
168- constexpr bool sse4_1 () const noexcept { return utils::bit_is_set<19 >(m_regs.reg1 [2 ]); }
169-
170- constexpr bool sse4_2 () const noexcept { return utils::bit_is_set<20 >(m_regs.reg1 [2 ]); }
171-
172- constexpr bool fma3 () const noexcept { return utils::bit_is_set<12 >(m_regs.reg1 [2 ]); }
173-
174- /* *
175- * Indicates whether the OS has enabled extended state management.
176- *
177- * When true, the OS has set bit 18 (OSXSAVE) in the CR4 control register,
178- * enabling the XGETBV/XSETBV instructions to access XCR0 and support
179- * processor extended state management using XSAVE/XRSTOR.
180- *
181- * This value is read from CPUID leaf 0x1, ECX bit 27, which reflects
182- * the state of CR4.OSXSAVE.
183- */
184- constexpr bool osxsave () const noexcept { return utils::bit_is_set<27 >(m_regs.reg1 [2 ]); }
185-
186- constexpr bool avx () const noexcept { return utils::bit_is_set<28 >(m_regs.reg1 [2 ]); }
187-
188- constexpr bool avx2 () const noexcept { return utils::bit_is_set<5 >(m_regs.reg7 [1 ]); }
189-
190- constexpr bool avx512f () const noexcept { return utils::bit_is_set<16 >(m_regs.reg7 [1 ]); }
191-
192- constexpr bool avx512dq () const noexcept { return utils::bit_is_set<17 >(m_regs.reg7 [1 ]); }
193-
194- constexpr bool avx512ifma () const noexcept { return utils::bit_is_set<21 >(m_regs.reg7 [1 ]); }
195-
196- constexpr bool avx512pf () const noexcept { return utils::bit_is_set<26 >(m_regs.reg7 [1 ]); }
197-
198- constexpr bool avx512er () const noexcept { return utils::bit_is_set<27 >(m_regs.reg7 [1 ]); }
199-
200- constexpr bool avx512cd () const noexcept { return utils::bit_is_set<28 >(m_regs.reg7 [1 ]); }
201-
202- constexpr bool avx512bw () const noexcept { return utils::bit_is_set<30 >(m_regs.reg7 [1 ]); }
203-
204- constexpr bool avx512vbmi () const noexcept { return utils::bit_is_set<1 >(m_regs.reg7 [2 ]); }
205-
206- constexpr bool avx512vbmi2 () const noexcept { return utils::bit_is_set<6 >(m_regs.reg7 [2 ]); }
207-
208- constexpr bool avx512vnni_bw () const noexcept { return utils::bit_is_set<11 >(m_regs.reg7 [2 ]); }
209-
210- constexpr bool avxvnni () const noexcept { return utils::bit_is_set<4 >(m_regs.reg7a [0 ]); }
211-
212- constexpr bool fma4 () const noexcept { return utils::bit_is_set<16 >(m_regs.reg8 [2 ]); }
213-
214- private:
215- struct cpu_id_regs
216- {
217- using reg_t = detail::cpuid_reg_t ;
218-
219- reg_t reg1 = {};
220- reg_t reg7 = {};
221- reg_t reg7a = {};
222- reg_t reg8 = {};
223- };
224-
225- /* * Parse CpuInfo register values into individual components. */
226- constexpr explicit x86_cpu_id (const cpu_id_regs& regs) noexcept
227- : m_regs(regs)
228- {
229- }
230- cpu_id_regs m_regs = {};
231- };
232-
233237 namespace detail
234238 {
239+ #if XSIMD_TARGET_X86
240+
235241 inline cpuid_reg_t get_cpuid (int level, int count) noexcept
236242 {
237243 cpuid_reg_t reg = {};
238-
239- #if !XSIMD_TARGET_X86
240- (void )level;
241- (void )count;
242- return {}; // All bits to zero
243-
244- #elif defined(_MSC_VER)
244+ #if defined(_MSC_VER)
245245 __cpuidex (reg.data (), level, count);
246246
247247#elif defined(__INTEL_COMPILER)
@@ -268,11 +268,12 @@ namespace xsimd
268268
269269 inline xcr0_reg_t get_xcr0_low () noexcept
270270 {
271- #if !XSIMD_TARGET_X86
272- return {}; // All bits to zero
273-
274- #elif defined(_MSC_VER) && _MSC_VER >= 1400
271+ #if defined(_MSC_VER)
272+ #if _MSC_VER >= 1400
275273 return static_cast <xcr0_reg_t >(_xgetbv (0 ));
274+ #else
275+ #error "_MSC_VER < 1400 is not supported"
276+ #endif
276277
277278#elif defined(__GNUC__)
278279 xcr0_reg_t xcr0 = {};
@@ -288,11 +289,22 @@ namespace xsimd
288289#endif
289290 );
290291 return xcr0;
292+ #endif
293+ }
291294
292- #else /* _MSC_VER < 1400 */
293- #error "_MSC_VER < 1400 is not supported"
294- #endif /* _MSC_VER && _MSC_VER >= 1400 */
295- };
295+ #else // XSIMD_TARGET_X86
296+
297+ inline cpuid_reg_t get_cpuid (int level, int count) noexcept
298+ {
299+ return {}; // All bits to zero
300+ }
301+
302+ inline xcr0_reg_t get_xcr0_low () noexcept
303+ {
304+ return {}; // All bits to zero
305+ }
306+
307+ #endif // XSIMD_TARGET_X86
296308 }
297309}
298310#endif
0 commit comments