1616#include < cassert>
1717#include < cstdint>
1818#include < cstring>
19+ #if __cplusplus >= 201703L
20+ #include < string_view>
21+ #endif
1922
2023#include " ../utils/bits.hpp"
2124#include " ./xsimd_config.hpp"
@@ -52,8 +55,13 @@ namespace xsimd
5255 template <typename E>
5356 using x86_reg32_bitset = utils::uint_bitset<E, x86_reg32_t >;
5457
55- template <x86_reg32_t leaf, x86_reg32_t subleaf, typename A, typename B, typename C, typename D>
56- class x86_cpuid_regs : private x86_reg32_bitset <A>, x86_reg32_bitset<B>, x86_reg32_bitset<C>, x86_reg32_bitset<D>
58+ template <x86_reg32_t leaf_num, x86_reg32_t subleaf_num,
59+ typename A, typename B, typename C, typename D>
60+ class x86_cpuid_regs
61+ : private x86_reg32_bitset<A>,
62+ private x86_reg32_bitset<B>,
63+ private x86_reg32_bitset<C>,
64+ private x86_reg32_bitset<D>
5765 {
5866 private:
5967 using eax_bitset = x86_reg32_bitset<A>;
@@ -75,6 +83,8 @@ namespace xsimd
7583 using ebx = B;
7684 using ecx = C;
7785 using edx = D;
86+ static constexpr x86_reg32_t leaf = leaf_num;
87+ static constexpr x86_reg32_t subleaf = subleaf_num;
7888
7989 inline static x86_cpuid_regs read ()
8090 {
@@ -84,9 +94,13 @@ namespace xsimd
8494 constexpr x86_cpuid_regs () noexcept = default;
8595
8696 using eax_bitset::all_bits_set;
97+ using eax_bitset::get_range;
8798 using ebx_bitset::all_bits_set;
99+ using ebx_bitset::get_range;
88100 using ecx_bitset::all_bits_set;
101+ using ecx_bitset::get_range;
89102 using edx_bitset::all_bits_set;
103+ using edx_bitset::get_range;
90104 };
91105
92106 template <typename T>
@@ -95,8 +109,84 @@ namespace xsimd
95109 typename T::ebx,
96110 typename T::ecx,
97111 typename T::edx>;
112+
113+ template <bool extended>
114+ struct x86_cpuid_highest_func
115+ {
116+ private:
117+ using x86_reg32_t = detail::x86_reg32_t ;
118+ using manufacturer_str = std::array<char , 3 * sizeof (x86_reg32_t )>;
119+
120+ public:
121+ static constexpr x86_reg32_t leaf = extended ? 0x80000000 : 0x0 ;
122+
123+ inline static x86_cpuid_highest_func read ()
124+ {
125+ auto regs = detail::x86_cpuid (0 );
126+ x86_cpuid_highest_func out {};
127+ // Highest function parameter in EAX
128+ out.m_highest_leaf = regs[0 ];
129+
130+ // Manufacturer string in EBX, EDX, ECX (in that order)
131+ char * manuf = out.m_manufacturer_id .data ();
132+ std::memcpy (manuf + 0 * sizeof (x86_reg32_t ), ®s[1 ], sizeof (x86_reg32_t ));
133+ std::memcpy (manuf + 1 * sizeof (x86_reg32_t ), ®s[3 ], sizeof (x86_reg32_t ));
134+ std::memcpy (manuf + 2 * sizeof (x86_reg32_t ), ®s[2 ], sizeof (x86_reg32_t ));
135+
136+ return out;
137+ }
138+
139+ constexpr x86_cpuid_highest_func () noexcept = default;
140+
141+ /* *
142+ * Highest available leaf in CPUID non-extended range.
143+ *
144+ * This is the highest function parameter (EAX) that can be passed to CPUID.
145+ * This is valid in the specified range:
146+ * - if `extended` is `false`, that is below `0x80000000`,
147+ * - if `extended` is `true`, that is above `0x80000000`,
148+ */
149+ constexpr x86_reg32_t highest_leaf () const noexcept
150+ {
151+ return m_highest_leaf;
152+ }
153+
154+ /* *
155+ * The manufacturer ID string in a static array.
156+ *
157+ * This raw character array is case specific and may contain both leading
158+ * and trailing whitespaces.
159+ * It cannot be assumed to be null terminated.
160+ * This is not implemented for all manufacturer when `extended` is `true`.
161+ */
162+ constexpr manufacturer_str manufacturer_id_raw () const noexcept
163+ {
164+ return m_manufacturer_id;
165+ }
166+
167+ #if __cplusplus >= 201703L
168+ constexpr std::string_view manufacturer_id () const noexcept
169+ {
170+ return { m_manufacturer_id.data (), m_manufacturer_id.size () };
171+ }
172+ #endif
173+
174+ private:
175+ manufacturer_str m_manufacturer_id {};
176+ x86_reg32_t m_highest_leaf {};
177+ };
98178 }
99179
180+ /* *
181+ * Highest CPUID Function Parameter and Manufacturer ID (EAX=0).
182+ *
183+ * Returns the highest leaf value supported by CPUID in the standard range
184+ * (below 0x80000000), and the processor manufacturer ID string.
185+ *
186+ * @see https://en.wikipedia.org/wiki/CPUID
187+ */
188+ using x86_cpuid_leaf0 = detail::x86_cpuid_highest_func<false >;
189+
100190 struct x86_cpuid_leaf1_traits
101191 {
102192 static constexpr detail::x86_reg32_t leaf = 1 ;
@@ -153,6 +243,10 @@ namespace xsimd
153243
154244 enum class eax
155245 {
246+ /* Start bit for the encoding of the highest subleaf available. */
247+ highest_subleaf_start = 0 ,
248+ /* End bit for the encoding of the highest subleaf available. */
249+ highest_subleaf_end = 32 ,
156250 };
157251 enum class ebx
158252 {
@@ -236,6 +330,16 @@ namespace xsimd
236330 */
237331 using x86_cpuid_leaf7sub1 = detail::make_x86_cpuid_regs<x86_cpuid_leaf7sub1_traits>;
238332
333+ /* *
334+ * Highest Extended CPUID Function Parameter (EAX=0x80000000).
335+ *
336+ * Returns the highest leaf value supported by CPUID in the extended range
337+ * (at or above 0x80000000), and the processor manufacturer ID string.
338+ *
339+ * @see https://en.wikipedia.org/wiki/CPUID
340+ */
341+ using x86_cpuid_leaf80000000 = detail::x86_cpuid_highest_func<true >;
342+
239343 struct x86_cpuid_leaf80000001_traits
240344 {
241345 static constexpr detail::x86_reg32_t leaf = 0x80000001 ;
@@ -449,20 +553,24 @@ namespace xsimd
449553 private:
450554 enum class status
451555 {
452- leaf1_valid = 0 ,
453- leaf7_valid = 1 ,
454- leaf7sub1_valid = 2 ,
455- leaf80000001_valid = 3 ,
456- xcr0_valid = 4 ,
556+ leaf0_valid = 0 ,
557+ leaf1_valid = 1 ,
558+ leaf7_valid = 2 ,
559+ leaf7sub1_valid = 3 ,
560+ leaf80000000_valid = 4 ,
561+ leaf80000001_valid = 5 ,
562+ xcr0_valid = 6 ,
457563 };
458564
459565 using status_bitset = utils::uint_bitset<status, std::uint32_t >;
460566
461- mutable x86_xcr0 m_xcr0 {};
567+ mutable x86_cpuid_leaf0 m_leaf0 {};
462568 mutable x86_cpuid_leaf1 m_leaf1 {};
463569 mutable x86_cpuid_leaf7 m_leaf7 {};
464570 mutable x86_cpuid_leaf7sub1 m_leaf7sub1 {};
571+ mutable x86_cpuid_leaf80000000 m_leaf80000000 {};
465572 mutable x86_cpuid_leaf80000001 m_leaf80000001 {};
573+ mutable x86_xcr0 m_xcr0 {};
466574 mutable status_bitset m_status {};
467575
468576 inline x86_xcr0 const & xcr0 () const noexcept
@@ -475,44 +583,119 @@ namespace xsimd
475583 return m_xcr0;
476584 }
477585
478- inline x86_cpuid_leaf1 const & leaf1 () const
586+ inline x86_cpuid_leaf0 const & leaf0 () const
479587 {
480- if (!m_status.bit_is_set <status::leaf1_valid >())
588+ if (!m_status.bit_is_set <status::leaf0_valid >())
481589 {
482- m_leaf1 = x86_cpuid_leaf1 ::read ();
483- m_status.set_bit <status::leaf1_valid >();
590+ m_leaf0 = x86_cpuid_leaf0 ::read ();
591+ m_status.set_bit <status::leaf0_valid >();
484592 }
485- return m_leaf1 ;
593+ return m_leaf0 ;
486594 }
487595
488- inline x86_cpuid_leaf7 const & leaf7 () const
596+ inline x86_cpuid_leaf80000000 const & leaf80000000 () const
489597 {
490- if (!m_status.bit_is_set <status::leaf7_valid>())
598+ if (!m_status.bit_is_set <status::leaf80000000_valid>())
599+ {
600+ m_leaf80000000 = x86_cpuid_leaf80000000::read ();
601+ m_status.set_bit <status::leaf80000000_valid>();
602+ }
603+ return m_leaf80000000;
604+ }
605+
606+ /* *
607+ * Internal utility to lazily read and cache a CPUID leaf.
608+ *
609+ * @tparam status_id The status bit tracking whether this leaf has been read and cached.
610+ * @tparam L The CPUID leaf type (e.g. x86_cpuid_leaf1, x86_cpuid_leaf7).
611+ * @param leaf_cache A non-const reference to the class member that stores the leaf
612+ * value. It must be non-const because this function may write to it on first
613+ * call. It is passed explicitly (rather than accessed via `this`) to allow
614+ * factoring the caching logic across different leaf members.
615+ * @return A const reference to `leaf_cache`. The non-const input / const-ref output
616+ * asymmetry is intentional: callers must not modify the cached value, but
617+ * this function needs write access to populate it.
618+ *
619+ * On first call, checks whether the leaf number is within the range advertised as
620+ * supported by CPUID (via leaf 0 for the standard range, leaf 0x80000000 for the
621+ * extended range). If supported, reads the leaf from the CPU; otherwise leaves
622+ * `leaf_cache` at its zero-initialized default (all feature bits false). Either
623+ * way, `status_id` is set so subsequent calls return immediately.
624+ */
625+ template <status status_id, typename L>
626+ inline auto const & safe_read_leaf (L& leaf_cache) const
627+ {
628+ // Check if already initialized
629+ if (m_status.bit_is_set <status_id>())
630+ {
631+ return leaf_cache;
632+ }
633+
634+ // Limit where we need to check leaf0 or leaf 80000000.
635+ constexpr auto extended_threshold = x86_cpuid_leaf80000000::leaf;
636+
637+ // Check if it is safe to call CPUID with this value.
638+ // First we identify if the leaf is in the regular or extended range.
639+ // TODO(C++17): if constexpr
640+ if (L::leaf < extended_threshold)
491641 {
492- m_leaf7 = x86_cpuid_leaf7::read ();
493- m_status.set_bit <status::leaf7_valid>();
642+ // Check leaf0 in regular range
643+ if (L::leaf <= leaf0 ().highest_leaf ())
644+ {
645+ leaf_cache = L::read ();
646+ }
494647 }
495- return m_leaf7;
648+ else
649+ {
650+ // Check leaf80000000 in extended range
651+ if (L::leaf <= leaf80000000 ().highest_leaf ())
652+ {
653+ leaf_cache = L::read ();
654+ }
655+ }
656+
657+ // Mark as valid in all cases, including if it was not read.
658+ // In this case it will be filled with zeros (all false).
659+ m_status.set_bit <status_id>();
660+ return leaf_cache;
661+ }
662+
663+ inline x86_cpuid_leaf1 const & leaf1 () const
664+ {
665+ return safe_read_leaf<status::leaf1_valid>(m_leaf1);
666+ }
667+
668+ inline x86_cpuid_leaf7 const & leaf7 () const
669+ {
670+ return safe_read_leaf<status::leaf7_valid>(m_leaf7);
496671 }
497672
498673 inline x86_cpuid_leaf7sub1 const & leaf7sub1 () const
499674 {
500- if (!m_status.bit_is_set <status::leaf7sub1_valid>())
675+ // Check if already initialized
676+ if (m_status.bit_is_set <status::leaf7sub1_valid>())
677+ {
678+ return m_leaf7sub1;
679+ }
680+
681+ // Check if safe to call CPUID with this value as subleaf.
682+ constexpr auto start = x86_cpuid_leaf7::eax::highest_subleaf_start;
683+ constexpr auto end = x86_cpuid_leaf7::eax::highest_subleaf_end;
684+ const auto highest_subleaf7 = leaf7 ().get_range <start, end>();
685+ if (x86_cpuid_leaf7sub1::subleaf <= highest_subleaf7)
501686 {
502687 m_leaf7sub1 = x86_cpuid_leaf7sub1::read ();
503- m_status.set_bit <status::leaf7sub1_valid>();
504688 }
689+
690+ // Mark as valid in all cases, including if it was not read.
691+ // In this case it will be filled with zeros (all false).
692+ m_status.set_bit <status::leaf7sub1_valid>();
505693 return m_leaf7sub1;
506694 }
507695
508696 inline x86_cpuid_leaf80000001 const & leaf80000001 () const
509697 {
510- if (!m_status.bit_is_set <status::leaf80000001_valid>())
511- {
512- m_leaf80000001 = x86_cpuid_leaf80000001::read ();
513- m_status.set_bit <status::leaf80000001_valid>();
514- }
515- return m_leaf80000001;
698+ return safe_read_leaf<status::leaf80000001_valid>(m_leaf80000001);
516699 }
517700 };
518701
0 commit comments