Skip to content

Commit a2ae288

Browse files
committed
Reduce x86_cpu_feature size
1 parent b5047a6 commit a2ae288

File tree

1 file changed

+138
-65
lines changed

1 file changed

+138
-65
lines changed

include/xsimd/config/xsimd_cpu_features_x86.hpp

Lines changed: 138 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <cassert>
1717
#include <cstdint>
1818
#include <cstring>
19+
#include <type_traits>
1920
#if __cplusplus >= 201703L
2021
#include <string_view>
2122
#endif
@@ -52,63 +53,147 @@ namespace xsimd
5253

5354
inline x86_reg32_t x86_xcr0_low() noexcept;
5455

56+
/** A strongly type bitset for a 32 bits register. */
5557
template <typename E>
5658
using x86_reg32_bitset = utils::uint_bitset<E, x86_reg32_t>;
5759

58-
template <x86_reg32_t leaf_num, x86_reg32_t subleaf_num,
59-
typename A, typename B, typename C, typename D>
60-
class x86_cpuid_regs
61-
: private x86_reg32_bitset<A>,
62-
private x86_reg32_bitset<B>,
63-
private x86_reg32_bitset<C>,
64-
private x86_reg32_bitset<D>
60+
/** A wrapper to attach a register bitfield descriptor and its CPUID index. */
61+
template <typename E, x86_reg32_t I>
62+
struct x86_reg_id
63+
{
64+
static constexpr x86_reg32_t index = I;
65+
using bits = E;
66+
67+
static_assert(index >= 0 && index < 4, "At most 4 register in CPUID");
68+
};
69+
70+
/** Find the register id with index k. */
71+
template <x86_reg32_t K, typename... reg_ids>
72+
struct find_reg_k;
73+
74+
/** Find the register id with index k (empty / nothing found case). */
75+
template <x86_reg32_t K, typename... reg_ids>
76+
struct find_reg_k
77+
{
78+
using type = x86_reg_id<void, 0>;
79+
};
80+
81+
/** Find the register id with index k (recursive case). */
82+
template <x86_reg32_t K, typename reg_id_head, typename... reg_id_tail>
83+
struct find_reg_k<K, reg_id_head, reg_id_tail...>
84+
{
85+
using type = std::conditional_t<
86+
reg_id_head::index == K,
87+
reg_id_head,
88+
typename find_reg_k<K, reg_id_tail...>::type>;
89+
};
90+
91+
/**
92+
* A class with strongly typed bitfield for `CPUID` registers.
93+
*
94+
* The class stores a variable number of register (up to four) from the CPUID
95+
* output. This is a space optimization to avoid storing many zeros in the
96+
* final `x86_cpu_features`.
97+
* As a result, some of the type aliases `eax`, `ebx`, `ecx`, `edx` may be `void`.
98+
*/
99+
template <x86_reg32_t leaf_num, x86_reg32_t subleaf_num, typename... reg_ids>
100+
class x86_cpuid_regs : private x86_reg32_bitset<typename reg_ids::bits>...
65101
{
66102
private:
67-
using eax_bitset = x86_reg32_bitset<A>;
68-
using ebx_bitset = x86_reg32_bitset<B>;
69-
using ecx_bitset = x86_reg32_bitset<C>;
70-
using edx_bitset = x86_reg32_bitset<D>;
103+
static_assert(sizeof...(reg_ids) <= 4, "At most 4 register in CPUID");
71104

72105
/* Parse CPUINFO register value into individual bit components.*/
73106
constexpr explicit x86_cpuid_regs(const cpuid_reg_t& regs) noexcept
74-
: eax_bitset(regs[0])
75-
, ebx_bitset(regs[1])
76-
, ecx_bitset(regs[2])
77-
, edx_bitset(regs[3])
107+
: x86_reg32_bitset<typename reg_ids::bits>(regs[reg_ids::index])...
78108
{
79109
}
80110

81111
public:
82-
using eax = A;
83-
using ebx = B;
84-
using ecx = C;
85-
using edx = D;
86112
static constexpr x86_reg32_t leaf = leaf_num;
87113
static constexpr x86_reg32_t subleaf = subleaf_num;
88114

115+
using eax = typename find_reg_k<0, reg_ids...>::type::bits;
116+
using ebx = typename find_reg_k<1, reg_ids...>::type::bits;
117+
using ecx = typename find_reg_k<2, reg_ids...>::type::bits;
118+
using edx = typename find_reg_k<3, reg_ids...>::type::bits;
119+
89120
inline static x86_cpuid_regs read()
90121
{
91122
return x86_cpuid_regs(detail::x86_cpuid(leaf, subleaf));
92123
}
93124

94125
constexpr x86_cpuid_regs() noexcept = default;
95126

96-
using eax_bitset::all_bits_set;
97-
using eax_bitset::get_range;
98-
using ebx_bitset::all_bits_set;
99-
using ebx_bitset::get_range;
100-
using ecx_bitset::all_bits_set;
101-
using ecx_bitset::get_range;
102-
using edx_bitset::all_bits_set;
103-
using edx_bitset::get_range;
104-
};
127+
// TODO(C++17) compact version for which this was designed.
128+
// The else clause contains a very verbose port.
129+
#if 0
130+
using x86_reg32_bitset<typename reg_ids::bits>::all_bits_set...;
131+
using x86_reg32_bitset<typename reg_ids::bits>::get_range...;
132+
#else
133+
134+
private:
135+
template <int N>
136+
struct m_empty_reg
137+
{
138+
enum class type {};
139+
};
140+
141+
using eax_or_empty = typename std::conditional<std::is_void<eax>::value, typename m_empty_reg<0>::type, eax>::type;
142+
using ebx_or_empty = typename std::conditional<std::is_void<ebx>::value, typename m_empty_reg<1>::type, ebx>::type;
143+
using ecx_or_empty = typename std::conditional<std::is_void<ecx>::value, typename m_empty_reg<2>::type, ecx>::type;
144+
using edx_or_empty = typename std::conditional<std::is_void<edx>::value, typename m_empty_reg<3>::type, edx>::type;
145+
146+
public:
147+
template <eax_or_empty... bits, typename T = eax, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
148+
constexpr bool all_bits_set() const noexcept
149+
{
150+
return x86_reg32_bitset<eax>::template all_bits_set<bits...>();
151+
}
152+
153+
template <eax_or_empty start, eax_or_empty end, typename T = eax, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
154+
constexpr x86_reg32_t get_range() const noexcept
155+
{
156+
return x86_reg32_bitset<eax>::template get_range<start, end>();
157+
}
158+
159+
template <ebx_or_empty... bits, typename T = ebx, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
160+
constexpr bool all_bits_set() const noexcept
161+
{
162+
return x86_reg32_bitset<ebx>::template all_bits_set<bits...>();
163+
}
164+
165+
template <ebx_or_empty start, ebx_or_empty end, typename T = ebx, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
166+
constexpr x86_reg32_t get_range() const noexcept
167+
{
168+
return x86_reg32_bitset<ebx>::template get_range<start, end>();
169+
}
170+
171+
template <ecx_or_empty... bits, typename T = ecx, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
172+
constexpr bool all_bits_set() const noexcept
173+
{
174+
return x86_reg32_bitset<ecx>::template all_bits_set<bits...>();
175+
}
176+
177+
template <ecx_or_empty start, ecx_or_empty end, typename T = ecx, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
178+
constexpr x86_reg32_t get_range() const noexcept
179+
{
180+
return x86_reg32_bitset<ecx>::template get_range<start, end>();
181+
}
182+
183+
template <edx_or_empty... bits, typename T = edx, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
184+
constexpr bool all_bits_set() const noexcept
185+
{
186+
return x86_reg32_bitset<edx>::template all_bits_set<bits...>();
187+
}
188+
189+
template <edx_or_empty start, edx_or_empty end, typename T = edx, typename std::enable_if<!std::is_void<T>::value, int>::type = 0>
190+
constexpr x86_reg32_t get_range() const noexcept
191+
{
192+
return x86_reg32_bitset<edx>::template get_range<start, end>();
193+
}
105194

106-
template <typename T>
107-
using make_x86_cpuid_regs = x86_cpuid_regs<T::leaf, T::subleaf,
108-
typename T::eax,
109-
typename T::ebx,
110-
typename T::ecx,
111-
typename T::edx>;
195+
#endif // C++17
196+
};
112197

113198
template <bool extended>
114199
struct x86_cpuid_highest_func
@@ -298,12 +383,6 @@ namespace xsimd
298383
static constexpr detail::x86_reg32_t leaf = 1;
299384
static constexpr detail::x86_reg32_t subleaf = 0;
300385

301-
enum class eax
302-
{
303-
};
304-
enum class ebx
305-
{
306-
};
307386
enum class ecx
308387
{
309388
/* Streaming SIMD Extensions 3. */
@@ -328,6 +407,10 @@ namespace xsimd
328407
/* Streaming SIMD Extensions 2. */
329408
sse2 = 26,
330409
};
410+
411+
using regs_t = detail::x86_cpuid_regs<leaf, subleaf,
412+
detail::x86_reg_id<ecx, 2>,
413+
detail::x86_reg_id<edx, 3>>;
331414
};
332415

333416
/**
@@ -340,7 +423,7 @@ namespace xsimd
340423
*
341424
* @see https://en.wikipedia.org/wiki/CPUID
342425
*/
343-
using x86_cpuid_leaf1 = detail::make_x86_cpuid_regs<x86_cpuid_leaf1_traits>;
426+
using x86_cpuid_leaf1 = typename x86_cpuid_leaf1_traits::regs_t;
344427

345428
struct x86_cpuid_leaf7_traits
346429
{
@@ -386,9 +469,11 @@ namespace xsimd
386469
/* AVX-512 Vector Neural Network instructions. */
387470
avx512vnni_bw = 11,
388471
};
389-
enum class edx
390-
{
391-
};
472+
473+
using regs_t = detail::x86_cpuid_regs<leaf, subleaf,
474+
detail::x86_reg_id<eax, 0>,
475+
detail::x86_reg_id<ebx, 1>,
476+
detail::x86_reg_id<ecx, 2>>;
392477
};
393478

394479
/**
@@ -401,7 +486,7 @@ namespace xsimd
401486
*
402487
* @see https://en.wikipedia.org/wiki/CPUID
403488
*/
404-
using x86_cpuid_leaf7 = detail::make_x86_cpuid_regs<x86_cpuid_leaf7_traits>;
489+
using x86_cpuid_leaf7 = typename x86_cpuid_leaf7_traits::regs_t;
405490

406491
struct x86_cpuid_leaf7sub1_traits
407492
{
@@ -413,15 +498,9 @@ namespace xsimd
413498
/* AVX (VEX-encoded) Vector Neural Network instructions. */
414499
avxvnni = 4,
415500
};
416-
enum class ebx
417-
{
418-
};
419-
enum class ecx
420-
{
421-
};
422-
enum class edx
423-
{
424-
};
501+
502+
using regs_t = detail::x86_cpuid_regs<leaf, subleaf,
503+
detail::x86_reg_id<eax, 0>>;
425504
};
426505

427506
/**
@@ -434,7 +513,7 @@ namespace xsimd
434513
*
435514
* @see https://en.wikipedia.org/wiki/CPUID
436515
*/
437-
using x86_cpuid_leaf7sub1 = detail::make_x86_cpuid_regs<x86_cpuid_leaf7sub1_traits>;
516+
using x86_cpuid_leaf7sub1 = typename x86_cpuid_leaf7sub1_traits::regs_t;
438517

439518
/**
440519
* Highest Extended CPUID Function Parameter (EAX=0x80000000).
@@ -451,20 +530,14 @@ namespace xsimd
451530
static constexpr detail::x86_reg32_t leaf = 0x80000001;
452531
static constexpr detail::x86_reg32_t subleaf = 0;
453532

454-
enum class eax
455-
{
456-
};
457-
enum class ebx
458-
{
459-
};
460533
enum class ecx
461534
{
462535
/* AMD Fused multiply-add with 4 operands (FMA4). */
463536
fma4 = 16,
464537
};
465-
enum class edx
466-
{
467-
};
538+
539+
using regs_t = detail::x86_cpuid_regs<leaf, subleaf,
540+
detail::x86_reg_id<ecx, 2>>;
468541
};
469542

470543
/**
@@ -477,7 +550,7 @@ namespace xsimd
477550
*
478551
* @see https://en.wikipedia.org/wiki/CPUID
479552
*/
480-
using x86_cpuid_leaf80000001 = detail::make_x86_cpuid_regs<x86_cpuid_leaf80000001_traits>;
553+
using x86_cpuid_leaf80000001 = typename x86_cpuid_leaf80000001_traits::regs_t;
481554

482555
/*
483556
* Extended Control Register 0 (XCR0).

0 commit comments

Comments
 (0)