Skip to content

Commit 05b9d84

Browse files
authored
Merge pull request #1312 from AntoinePrv/cpu-features
Add generic cpu_features
2 parents 6978612 + 6bed918 commit 05b9d84

3 files changed

Lines changed: 91 additions & 50 deletions

File tree

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
****************************************************************************/
11+
12+
#ifndef XSIMD_CPU_FEATURES_HPP
13+
#define XSIMD_CPU_FEATURES_HPP
14+
15+
#include "./xsimd_cpu_features_arm.hpp"
16+
#include "./xsimd_cpu_features_ppc.hpp"
17+
#include "./xsimd_cpu_features_riscv.hpp"
18+
#include "./xsimd_cpu_features_x86.hpp"
19+
20+
namespace xsimd
21+
{
22+
23+
/**
24+
* Cross-platform CPU feature detection class.
25+
*
26+
* All member functions are safe to work on with all platforms.
27+
*
28+
* @warning This class is *not* thread safe.
29+
* Its internal lazy querying structure makes even `const` member function prone to data race.
30+
* The structure is also generally not appropriate for directly branching (e.g. on
31+
* ``cpu_features::avx2``) because it include a branch that the compiler cannot optimize.
32+
* The current appropriate way to use this class for dynamic dispatching is to store the
33+
* result of the function calls (e.g. @ref cpu_features) into (static) constants.
34+
* This is done in @ref xsimd::available_architectures.
35+
*
36+
* @see xsimd::dispatch
37+
* @see xsimd::available_architectures
38+
*/
39+
class cpu_features : public ppc_cpu_features,
40+
public riscv_cpu_features,
41+
public arm_cpu_features,
42+
public x86_cpu_features
43+
{
44+
};
45+
46+
}
47+
48+
#endif

include/xsimd/config/xsimd_cpuid.hpp

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,7 @@
1313
#define XSIMD_CPUID_HPP
1414

1515
#include "../types/xsimd_all_registers.hpp"
16-
#include "./xsimd_cpu_features_arm.hpp"
17-
#include "./xsimd_cpu_features_ppc.hpp"
18-
#include "./xsimd_cpu_features_riscv.hpp"
19-
#include "./xsimd_cpu_features_x86.hpp"
16+
#include "./xsimd_cpu_features.hpp"
2017
#include "./xsimd_inline.hpp"
2118

2219
namespace xsimd
@@ -81,58 +78,54 @@ namespace xsimd
8178
wasm = 1;
8279
#endif
8380

84-
// Safe on all platforms, it will be false if non PowerPC.
85-
const auto ppc_cpu = xsimd::ppc_cpu_features();
81+
const auto cpu = xsimd::cpu_features();
8682

87-
vsx = ppc_cpu.vsx();
83+
vsx = cpu.vsx();
8884

89-
// Safe on all platforms, it will be all false if non risc-v.
90-
const auto riscv_cpu = xsimd::riscv_cpu_features();
85+
rvv128 = cpu.rvv() && (cpu.rvv_size_bytes() >= (128 / 8));
86+
rvv256 = cpu.rvv() && (cpu.rvv_size_bytes() >= (256 / 8));
87+
rvv512 = cpu.rvv() && (cpu.rvv_size_bytes() >= (512 / 8));
9188

92-
rvv128 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (128 / 8));
93-
rvv256 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (256 / 8));
94-
rvv512 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (512 / 8));
89+
neon = cpu.neon();
90+
neon64 = cpu.neon64();
91+
i8mm_neon64 = cpu.neon64() && cpu.i8mm();
9592

96-
// Safe on all platforms, it will be all false if non arm.
97-
const auto arm_cpu = xsimd::arm_cpu_features();
93+
// Running SVE128 on a SVE256 machine is more tricky than the x86 equivalent
94+
// of running SSE code on an AVX machine and requires to explicitly change the
95+
// vector length using `prctl` (per thread setting).
96+
// This is something we have not tested and not integrated in xsimd so the safe
97+
// default is to assume only one valid SVE width at runtime.
98+
sve128 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 128);
99+
sve256 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 256);
100+
sve512 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 512);
98101

99-
neon = arm_cpu.neon();
100-
neon64 = arm_cpu.neon64();
101-
i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm();
102-
sve128 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (128 / 8));
103-
sve256 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (256 / 8));
104-
sve512 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (512 / 8));
105-
106-
// Safe on all platforms, it will be all false if non x86.
107-
const auto x86_cpu = xsimd::x86_cpu_features();
108-
109-
sse2 = x86_cpu.sse2();
110-
sse3 = x86_cpu.sse3();
111-
ssse3 = x86_cpu.ssse3();
112-
sse4_1 = x86_cpu.sse4_1();
113-
sse4_2 = x86_cpu.sse4_2();
114-
fma3_sse42 = x86_cpu.fma3();
102+
sse2 = cpu.sse2();
103+
sse3 = cpu.sse3();
104+
ssse3 = cpu.ssse3();
105+
sse4_1 = cpu.sse4_1();
106+
sse4_2 = cpu.sse4_2();
107+
fma3_sse42 = cpu.fma3();
115108

116109
// sse4a not implemented in cpu_id yet
117110
// xop not implemented in cpu_id yet
118111

119-
avx = x86_cpu.avx();
112+
avx = cpu.avx();
120113
fma3_avx = avx && fma3_sse42;
121-
fma4 = x86_cpu.fma4();
122-
avx2 = x86_cpu.avx2();
123-
avxvnni = x86_cpu.avxvnni();
114+
fma4 = cpu.fma4();
115+
avx2 = cpu.avx2();
116+
avxvnni = cpu.avxvnni();
124117
fma3_avx2 = avx2 && fma3_sse42;
125118

126-
avx512f = x86_cpu.avx512f();
127-
avx512cd = x86_cpu.avx512cd();
128-
avx512dq = x86_cpu.avx512dq();
129-
avx512bw = x86_cpu.avx512bw();
130-
avx512er = x86_cpu.avx512er();
131-
avx512pf = x86_cpu.avx512pf();
132-
avx512ifma = x86_cpu.avx512ifma();
133-
avx512vbmi = x86_cpu.avx512vbmi();
134-
avx512vbmi2 = x86_cpu.avx512vbmi2();
135-
avx512vnni_bw = x86_cpu.avx512vnni_bw();
119+
avx512f = cpu.avx512f();
120+
avx512cd = cpu.avx512cd();
121+
avx512dq = cpu.avx512dq();
122+
avx512bw = cpu.avx512bw();
123+
avx512er = cpu.avx512er();
124+
avx512pf = cpu.avx512pf();
125+
avx512ifma = cpu.avx512ifma();
126+
avx512vbmi = cpu.avx512vbmi();
127+
avx512vbmi2 = cpu.avx512vbmi2();
128+
avx512vnni_bw = cpu.avx512vnni_bw();
136129
avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw;
137130
}
138131
};

test/test_cpu_features.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ namespace detail
5252
*/
5353
TEST_CASE("[cpu_features] x86 implication chains")
5454
{
55-
xsimd::x86_cpu_features cpu;
55+
xsimd::cpu_features cpu;
5656

5757
// SSE implication chain
5858
CHECK_IMPLICATION(cpu.sse4_2(), cpu.sse4_1());
@@ -118,7 +118,7 @@ TEST_CASE("[cpu_features] x86 manufacturer from environment")
118118

119119
TEST_CASE("[cpu_features] x86 features from environment")
120120
{
121-
xsimd::x86_cpu_features cpu;
121+
xsimd::cpu_features cpu;
122122

123123
CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_SSE2", cpu.sse2());
124124
CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_SSE3", cpu.sse3());
@@ -148,7 +148,7 @@ TEST_CASE("[cpu_features] arm implication chains")
148148

149149
TEST_CASE("[cpu_features] arm features from environment")
150150
{
151-
xsimd::arm_cpu_features cpu;
151+
xsimd::cpu_features cpu;
152152

153153
CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_NEON", cpu.neon());
154154
CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_NEON64", cpu.neon64());
@@ -158,21 +158,21 @@ TEST_CASE("[cpu_features] arm features from environment")
158158

159159
TEST_CASE("[cpu_features] risc-v implication chains")
160160
{
161-
xsimd::riscv_cpu_features cpu;
161+
xsimd::cpu_features cpu;
162162

163163
CHECK_IMPLICATION(cpu.rvv(), cpu.rvv_size_bytes() >= (128 / 8));
164164
}
165165

166166
TEST_CASE("[cpu_features] risc-v features from environment")
167167
{
168-
xsimd::riscv_cpu_features cpu;
168+
xsimd::cpu_features cpu;
169169

170170
CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_RVV", cpu.rvv());
171171
}
172172

173173
TEST_CASE("[cpu_features] ppc features from environment")
174174
{
175-
xsimd::ppc_cpu_features cpu;
175+
xsimd::cpu_features cpu;
176176

177177
CHECK_ENV_FEATURE("XSIMD_TEST_CPU_ASSUME_VSX", cpu.vsx());
178178
}

0 commit comments

Comments
 (0)