Skip to content

Commit 23282a2

Browse files
authored
Safe reading of CPUID (#1277)
* Add highest func parameter * Properly check highest leaf * Properly check if subleaf is available * Add bit manip tests * Restrict bit manip to unsigned types * Document cache leaf function
1 parent e2f0536 commit 23282a2

File tree

4 files changed

+362
-27
lines changed

4 files changed

+362
-27
lines changed

include/xsimd/config/xsimd_cpu_features_x86.hpp

Lines changed: 209 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include <cassert>
1717
#include <cstdint>
1818
#include <cstring>
19+
#if __cplusplus >= 201703L
20+
#include <string_view>
21+
#endif
1922

2023
#include "../utils/bits.hpp"
2124
#include "./xsimd_config.hpp"
@@ -52,8 +55,13 @@ namespace xsimd
5255
template <typename E>
5356
using x86_reg32_bitset = utils::uint_bitset<E, x86_reg32_t>;
5457

55-
template <x86_reg32_t leaf, x86_reg32_t subleaf, typename A, typename B, typename C, typename D>
56-
class x86_cpuid_regs : private x86_reg32_bitset<A>, x86_reg32_bitset<B>, x86_reg32_bitset<C>, x86_reg32_bitset<D>
58+
template <x86_reg32_t leaf_num, x86_reg32_t subleaf_num,
59+
typename A, typename B, typename C, typename D>
60+
class x86_cpuid_regs
61+
: private x86_reg32_bitset<A>,
62+
private x86_reg32_bitset<B>,
63+
private x86_reg32_bitset<C>,
64+
private x86_reg32_bitset<D>
5765
{
5866
private:
5967
using eax_bitset = x86_reg32_bitset<A>;
@@ -75,6 +83,8 @@ namespace xsimd
7583
using ebx = B;
7684
using ecx = C;
7785
using edx = D;
86+
static constexpr x86_reg32_t leaf = leaf_num;
87+
static constexpr x86_reg32_t subleaf = subleaf_num;
7888

7989
inline static x86_cpuid_regs read()
8090
{
@@ -84,9 +94,13 @@ namespace xsimd
8494
constexpr x86_cpuid_regs() noexcept = default;
8595

8696
using eax_bitset::all_bits_set;
97+
using eax_bitset::get_range;
8798
using ebx_bitset::all_bits_set;
99+
using ebx_bitset::get_range;
88100
using ecx_bitset::all_bits_set;
101+
using ecx_bitset::get_range;
89102
using edx_bitset::all_bits_set;
103+
using edx_bitset::get_range;
90104
};
91105

92106
template <typename T>
@@ -95,8 +109,84 @@ namespace xsimd
95109
typename T::ebx,
96110
typename T::ecx,
97111
typename T::edx>;
112+
113+
template <bool extended>
114+
struct x86_cpuid_highest_func
115+
{
116+
private:
117+
using x86_reg32_t = detail::x86_reg32_t;
118+
using manufacturer_str = std::array<char, 3 * sizeof(x86_reg32_t)>;
119+
120+
public:
121+
static constexpr x86_reg32_t leaf = extended ? 0x80000000 : 0x0;
122+
123+
inline static x86_cpuid_highest_func read()
124+
{
125+
auto regs = detail::x86_cpuid(0);
126+
x86_cpuid_highest_func out {};
127+
// Highest function parameter in EAX
128+
out.m_highest_leaf = regs[0];
129+
130+
// Manufacturer string in EBX, EDX, ECX (in that order)
131+
char* manuf = out.m_manufacturer_id.data();
132+
std::memcpy(manuf + 0 * sizeof(x86_reg32_t), &regs[1], sizeof(x86_reg32_t));
133+
std::memcpy(manuf + 1 * sizeof(x86_reg32_t), &regs[3], sizeof(x86_reg32_t));
134+
std::memcpy(manuf + 2 * sizeof(x86_reg32_t), &regs[2], sizeof(x86_reg32_t));
135+
136+
return out;
137+
}
138+
139+
constexpr x86_cpuid_highest_func() noexcept = default;
140+
141+
/**
142+
* Highest available leaf in CPUID non-extended range.
143+
*
144+
* This is the highest function parameter (EAX) that can be passed to CPUID.
145+
* This is valid in the specified range:
146+
* - if `extended` is `false`, that is below `0x80000000`,
147+
* - if `extended` is `true`, that is above `0x80000000`,
148+
*/
149+
constexpr x86_reg32_t highest_leaf() const noexcept
150+
{
151+
return m_highest_leaf;
152+
}
153+
154+
/**
155+
* The manufacturer ID string in a static array.
156+
*
157+
* This raw character array is case specific and may contain both leading
158+
* and trailing whitespaces.
159+
* It cannot be assumed to be null terminated.
160+
* This is not implemented for all manufacturer when `extended` is `true`.
161+
*/
162+
constexpr manufacturer_str manufacturer_id_raw() const noexcept
163+
{
164+
return m_manufacturer_id;
165+
}
166+
167+
#if __cplusplus >= 201703L
168+
constexpr std::string_view manufacturer_id() const noexcept
169+
{
170+
return { m_manufacturer_id.data(), m_manufacturer_id.size() };
171+
}
172+
#endif
173+
174+
private:
175+
manufacturer_str m_manufacturer_id {};
176+
x86_reg32_t m_highest_leaf {};
177+
};
98178
}
99179

180+
/**
181+
* Highest CPUID Function Parameter and Manufacturer ID (EAX=0).
182+
*
183+
* Returns the highest leaf value supported by CPUID in the standard range
184+
* (below 0x80000000), and the processor manufacturer ID string.
185+
*
186+
* @see https://en.wikipedia.org/wiki/CPUID
187+
*/
188+
using x86_cpuid_leaf0 = detail::x86_cpuid_highest_func<false>;
189+
100190
struct x86_cpuid_leaf1_traits
101191
{
102192
static constexpr detail::x86_reg32_t leaf = 1;
@@ -153,6 +243,10 @@ namespace xsimd
153243

154244
enum class eax
155245
{
246+
/* Start bit for the encoding of the highest subleaf available. */
247+
highest_subleaf_start = 0,
248+
/* End bit for the encoding of the highest subleaf available. */
249+
highest_subleaf_end = 32,
156250
};
157251
enum class ebx
158252
{
@@ -236,6 +330,16 @@ namespace xsimd
236330
*/
237331
using x86_cpuid_leaf7sub1 = detail::make_x86_cpuid_regs<x86_cpuid_leaf7sub1_traits>;
238332

333+
/**
334+
* Highest Extended CPUID Function Parameter (EAX=0x80000000).
335+
*
336+
* Returns the highest leaf value supported by CPUID in the extended range
337+
* (at or above 0x80000000), and the processor manufacturer ID string.
338+
*
339+
* @see https://en.wikipedia.org/wiki/CPUID
340+
*/
341+
using x86_cpuid_leaf80000000 = detail::x86_cpuid_highest_func<true>;
342+
239343
struct x86_cpuid_leaf80000001_traits
240344
{
241345
static constexpr detail::x86_reg32_t leaf = 0x80000001;
@@ -449,20 +553,24 @@ namespace xsimd
449553
private:
450554
enum class status
451555
{
452-
leaf1_valid = 0,
453-
leaf7_valid = 1,
454-
leaf7sub1_valid = 2,
455-
leaf80000001_valid = 3,
456-
xcr0_valid = 4,
556+
leaf0_valid = 0,
557+
leaf1_valid = 1,
558+
leaf7_valid = 2,
559+
leaf7sub1_valid = 3,
560+
leaf80000000_valid = 4,
561+
leaf80000001_valid = 5,
562+
xcr0_valid = 6,
457563
};
458564

459565
using status_bitset = utils::uint_bitset<status, std::uint32_t>;
460566

461-
mutable x86_xcr0 m_xcr0 {};
567+
mutable x86_cpuid_leaf0 m_leaf0 {};
462568
mutable x86_cpuid_leaf1 m_leaf1 {};
463569
mutable x86_cpuid_leaf7 m_leaf7 {};
464570
mutable x86_cpuid_leaf7sub1 m_leaf7sub1 {};
571+
mutable x86_cpuid_leaf80000000 m_leaf80000000 {};
465572
mutable x86_cpuid_leaf80000001 m_leaf80000001 {};
573+
mutable x86_xcr0 m_xcr0 {};
466574
mutable status_bitset m_status {};
467575

468576
inline x86_xcr0 const& xcr0() const noexcept
@@ -475,44 +583,119 @@ namespace xsimd
475583
return m_xcr0;
476584
}
477585

478-
inline x86_cpuid_leaf1 const& leaf1() const
586+
inline x86_cpuid_leaf0 const& leaf0() const
479587
{
480-
if (!m_status.bit_is_set<status::leaf1_valid>())
588+
if (!m_status.bit_is_set<status::leaf0_valid>())
481589
{
482-
m_leaf1 = x86_cpuid_leaf1::read();
483-
m_status.set_bit<status::leaf1_valid>();
590+
m_leaf0 = x86_cpuid_leaf0::read();
591+
m_status.set_bit<status::leaf0_valid>();
484592
}
485-
return m_leaf1;
593+
return m_leaf0;
486594
}
487595

488-
inline x86_cpuid_leaf7 const& leaf7() const
596+
inline x86_cpuid_leaf80000000 const& leaf80000000() const
489597
{
490-
if (!m_status.bit_is_set<status::leaf7_valid>())
598+
if (!m_status.bit_is_set<status::leaf80000000_valid>())
599+
{
600+
m_leaf80000000 = x86_cpuid_leaf80000000::read();
601+
m_status.set_bit<status::leaf80000000_valid>();
602+
}
603+
return m_leaf80000000;
604+
}
605+
606+
/**
607+
* Internal utility to lazily read and cache a CPUID leaf.
608+
*
609+
* @tparam status_id The status bit tracking whether this leaf has been read and cached.
610+
* @tparam L The CPUID leaf type (e.g. x86_cpuid_leaf1, x86_cpuid_leaf7).
611+
* @param leaf_cache A non-const reference to the class member that stores the leaf
612+
* value. It must be non-const because this function may write to it on first
613+
* call. It is passed explicitly (rather than accessed via `this`) to allow
614+
* factoring the caching logic across different leaf members.
615+
* @return A const reference to `leaf_cache`. The non-const input / const-ref output
616+
* asymmetry is intentional: callers must not modify the cached value, but
617+
* this function needs write access to populate it.
618+
*
619+
* On first call, checks whether the leaf number is within the range advertised as
620+
* supported by CPUID (via leaf 0 for the standard range, leaf 0x80000000 for the
621+
* extended range). If supported, reads the leaf from the CPU; otherwise leaves
622+
* `leaf_cache` at its zero-initialized default (all feature bits false). Either
623+
* way, `status_id` is set so subsequent calls return immediately.
624+
*/
625+
template <status status_id, typename L>
626+
inline auto const& safe_read_leaf(L& leaf_cache) const
627+
{
628+
// Check if already initialized
629+
if (m_status.bit_is_set<status_id>())
630+
{
631+
return leaf_cache;
632+
}
633+
634+
// Limit where we need to check leaf0 or leaf 80000000.
635+
constexpr auto extended_threshold = x86_cpuid_leaf80000000::leaf;
636+
637+
// Check if it is safe to call CPUID with this value.
638+
// First we identify if the leaf is in the regular or extended range.
639+
// TODO(C++17): if constexpr
640+
if (L::leaf < extended_threshold)
491641
{
492-
m_leaf7 = x86_cpuid_leaf7::read();
493-
m_status.set_bit<status::leaf7_valid>();
642+
// Check leaf0 in regular range
643+
if (L::leaf <= leaf0().highest_leaf())
644+
{
645+
leaf_cache = L::read();
646+
}
494647
}
495-
return m_leaf7;
648+
else
649+
{
650+
// Check leaf80000000 in extended range
651+
if (L::leaf <= leaf80000000().highest_leaf())
652+
{
653+
leaf_cache = L::read();
654+
}
655+
}
656+
657+
// Mark as valid in all cases, including if it was not read.
658+
// In this case it will be filled with zeros (all false).
659+
m_status.set_bit<status_id>();
660+
return leaf_cache;
661+
}
662+
663+
inline x86_cpuid_leaf1 const& leaf1() const
664+
{
665+
return safe_read_leaf<status::leaf1_valid>(m_leaf1);
666+
}
667+
668+
inline x86_cpuid_leaf7 const& leaf7() const
669+
{
670+
return safe_read_leaf<status::leaf7_valid>(m_leaf7);
496671
}
497672

498673
inline x86_cpuid_leaf7sub1 const& leaf7sub1() const
499674
{
500-
if (!m_status.bit_is_set<status::leaf7sub1_valid>())
675+
// Check if already initialized
676+
if (m_status.bit_is_set<status::leaf7sub1_valid>())
677+
{
678+
return m_leaf7sub1;
679+
}
680+
681+
// Check if safe to call CPUID with this value as subleaf.
682+
constexpr auto start = x86_cpuid_leaf7::eax::highest_subleaf_start;
683+
constexpr auto end = x86_cpuid_leaf7::eax::highest_subleaf_end;
684+
const auto highest_subleaf7 = leaf7().get_range<start, end>();
685+
if (x86_cpuid_leaf7sub1::subleaf <= highest_subleaf7)
501686
{
502687
m_leaf7sub1 = x86_cpuid_leaf7sub1::read();
503-
m_status.set_bit<status::leaf7sub1_valid>();
504688
}
689+
690+
// Mark as valid in all cases, including if it was not read.
691+
// In this case it will be filled with zeros (all false).
692+
m_status.set_bit<status::leaf7sub1_valid>();
505693
return m_leaf7sub1;
506694
}
507695

508696
inline x86_cpuid_leaf80000001 const& leaf80000001() const
509697
{
510-
if (!m_status.bit_is_set<status::leaf80000001_valid>())
511-
{
512-
m_leaf80000001 = x86_cpuid_leaf80000001::read();
513-
m_status.set_bit<status::leaf80000001_valid>();
514-
}
515-
return m_leaf80000001;
698+
return safe_read_leaf<status::leaf80000001_valid>(m_leaf80000001);
516699
}
517700
};
518701

0 commit comments

Comments
 (0)