|
13 | 13 | #define XSIMD_CPUID_HPP |
14 | 14 |
|
15 | 15 | #include "../types/xsimd_all_registers.hpp" |
16 | | -#include "./xsimd_cpu_features_arm.hpp" |
17 | | -#include "./xsimd_cpu_features_ppc.hpp" |
18 | | -#include "./xsimd_cpu_features_riscv.hpp" |
19 | | -#include "./xsimd_cpu_features_x86.hpp" |
| 16 | +#include "./xsimd_cpu_features.hpp" |
20 | 17 | #include "./xsimd_inline.hpp" |
21 | 18 |
|
22 | 19 | namespace xsimd |
@@ -81,58 +78,54 @@ namespace xsimd |
81 | 78 | wasm = 1; |
82 | 79 | #endif |
83 | 80 |
|
84 | | - // Safe on all platforms, it will be false if non PowerPC. |
85 | | - const auto ppc_cpu = xsimd::ppc_cpu_features(); |
| 81 | + const auto cpu = xsimd::cpu_features(); |
86 | 82 |
|
87 | | - vsx = ppc_cpu.vsx(); |
| 83 | + vsx = cpu.vsx(); |
88 | 84 |
|
89 | | - // Safe on all platforms, it will be all false if non risc-v. |
90 | | - const auto riscv_cpu = xsimd::riscv_cpu_features(); |
| 85 | + rvv128 = cpu.rvv() && (cpu.rvv_size_bytes() >= (128 / 8)); |
| 86 | + rvv256 = cpu.rvv() && (cpu.rvv_size_bytes() >= (256 / 8)); |
| 87 | + rvv512 = cpu.rvv() && (cpu.rvv_size_bytes() >= (512 / 8)); |
91 | 88 |
|
92 | | - rvv128 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (128 / 8)); |
93 | | - rvv256 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (256 / 8)); |
94 | | - rvv512 = riscv_cpu.rvv() && (riscv_cpu.rvv_size_bytes() >= (512 / 8)); |
| 89 | + neon = cpu.neon(); |
| 90 | + neon64 = cpu.neon64(); |
| 91 | + i8mm_neon64 = cpu.neon64() && cpu.i8mm(); |
95 | 92 |
|
96 | | - // Safe on all platforms, it will be all false if non arm. |
97 | | - const auto arm_cpu = xsimd::arm_cpu_features(); |
| 93 | + // Running SVE128 on a SVE256 machine is more tricky than the x86 equivalent |
| 94 | + // of running SSE code on an AVX machine and requires to explicitly change the |
| 95 | + // vector length using `prctl` (per thread setting). |
| 96 | + // This is something we have not tested and not integrated in xsimd so the safe |
| 97 | + // default is to assume only one valid SVE width at runtime. |
| 98 | + sve128 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 128); |
| 99 | + sve256 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 256); |
| 100 | + sve512 = cpu.sve() && (cpu.sve_size_bytes() * 8 == 512); |
98 | 101 |
|
99 | | - neon = arm_cpu.neon(); |
100 | | - neon64 = arm_cpu.neon64(); |
101 | | - i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm(); |
102 | | - sve128 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (128 / 8)); |
103 | | - sve256 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (256 / 8)); |
104 | | - sve512 = arm_cpu.sve() && (arm_cpu.sve_size_bytes() >= (512 / 8)); |
105 | | - |
106 | | - // Safe on all platforms, it will be all false if non x86. |
107 | | - const auto x86_cpu = xsimd::x86_cpu_features(); |
108 | | - |
109 | | - sse2 = x86_cpu.sse2(); |
110 | | - sse3 = x86_cpu.sse3(); |
111 | | - ssse3 = x86_cpu.ssse3(); |
112 | | - sse4_1 = x86_cpu.sse4_1(); |
113 | | - sse4_2 = x86_cpu.sse4_2(); |
114 | | - fma3_sse42 = x86_cpu.fma3(); |
| 102 | + sse2 = cpu.sse2(); |
| 103 | + sse3 = cpu.sse3(); |
| 104 | + ssse3 = cpu.ssse3(); |
| 105 | + sse4_1 = cpu.sse4_1(); |
| 106 | + sse4_2 = cpu.sse4_2(); |
| 107 | + fma3_sse42 = cpu.fma3(); |
115 | 108 |
|
116 | 109 | // sse4a not implemented in cpu_id yet |
117 | 110 | // xop not implemented in cpu_id yet |
118 | 111 |
|
119 | | - avx = x86_cpu.avx(); |
| 112 | + avx = cpu.avx(); |
120 | 113 | fma3_avx = avx && fma3_sse42; |
121 | | - fma4 = x86_cpu.fma4(); |
122 | | - avx2 = x86_cpu.avx2(); |
123 | | - avxvnni = x86_cpu.avxvnni(); |
| 114 | + fma4 = cpu.fma4(); |
| 115 | + avx2 = cpu.avx2(); |
| 116 | + avxvnni = cpu.avxvnni(); |
124 | 117 | fma3_avx2 = avx2 && fma3_sse42; |
125 | 118 |
|
126 | | - avx512f = x86_cpu.avx512f(); |
127 | | - avx512cd = x86_cpu.avx512cd(); |
128 | | - avx512dq = x86_cpu.avx512dq(); |
129 | | - avx512bw = x86_cpu.avx512bw(); |
130 | | - avx512er = x86_cpu.avx512er(); |
131 | | - avx512pf = x86_cpu.avx512pf(); |
132 | | - avx512ifma = x86_cpu.avx512ifma(); |
133 | | - avx512vbmi = x86_cpu.avx512vbmi(); |
134 | | - avx512vbmi2 = x86_cpu.avx512vbmi2(); |
135 | | - avx512vnni_bw = x86_cpu.avx512vnni_bw(); |
| 119 | + avx512f = cpu.avx512f(); |
| 120 | + avx512cd = cpu.avx512cd(); |
| 121 | + avx512dq = cpu.avx512dq(); |
| 122 | + avx512bw = cpu.avx512bw(); |
| 123 | + avx512er = cpu.avx512er(); |
| 124 | + avx512pf = cpu.avx512pf(); |
| 125 | + avx512ifma = cpu.avx512ifma(); |
| 126 | + avx512vbmi = cpu.avx512vbmi(); |
| 127 | + avx512vbmi2 = cpu.avx512vbmi2(); |
| 128 | + avx512vnni_bw = cpu.avx512vnni_bw(); |
136 | 129 | avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw; |
137 | 130 | } |
138 | 131 | }; |
|
0 commit comments