Skip to content

Commit 618d2ec

Browse files
Support explicit instantiation of per-arch batch
As a side effect, centralize alignment specification.
1 parent d75423e commit 618d2ec

25 files changed

+180
-72
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ The same computation operating on vectors and using the most performant instruct
110110
#include "xsimd/xsimd.hpp"
111111

112112
namespace xs = xsimd;
113-
using vector_type = std::vector<double, xsimd::aligned_allocator<double, XSIMD_DEFAULT_ALIGNMENT>>;
113+
using vector_type = std::vector<double, xsimd::aligned_allocator<double>>;
114114

115115
void mean(const vector_type& a, const vector_type& b, vector_type& res)
116116
{
@@ -142,7 +142,7 @@ the loop from the example becomes:
142142
#include "xsimd/stl/algorithms.hpp"
143143

144144
namespace xs = xsimd;
145-
using vector_type = std::vector<double, xsimd::aligned_allocator<double, XSIMD_DEFAULT_ALIGNMENT>>;
145+
using vector_type = std::vector<double, xsimd::aligned_allocator<double>>;
146146

147147
void mean(const vector_type& a, const vector_type& b, vector_type& res)
148148
{

benchmark/xsimd_benchmark.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace xsimd
3232
using duration_type = std::chrono::duration<double, std::milli>;
3333

3434
template <class T>
35-
using bench_vector = std::vector<T, xsimd::aligned_allocator<T, XSIMD_DEFAULT_ALIGNMENT>>;
35+
using bench_vector = std::vector<T, xsimd::aligned_allocator<T>>;
3636

3737
template <class T>
3838
void init_benchmark(bench_vector<T>& lhs, bench_vector<T>& rhs, bench_vector<T>& res, size_t size)

docs/source/basic_usage.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ Here is an example that computes the mean of two sets of 4 double floating point
2121
2222
int main(int argc, char* argv[])
2323
{
24-
xs::batch<double, 4> a(1.5, 2.5, 3.5, 4.5);
25-
xs::batch<double, 4> b(2.5, 3.5, 4.5, 5.5);
24+
xs::avx::batch<double> a(1.5, 2.5, 3.5, 4.5);
25+
xs::avx::batch<double> b(2.5, 3.5, 4.5, 5.5);
2626
auto mean = (a + b) / 2;
2727
std::cout << mean << std::endl;
2828
return 0;
@@ -46,7 +46,7 @@ The same computation operating on vectors and using the most performant instruct
4646
#include "xsimd/xsimd.hpp"
4747
4848
namespace xs = xsimd;
49-
using vector_type = std::vector<double, xsimd::aligned_allocator<double, XSIMD_DEFAULT_ALIGNMENT>>;
49+
using vector_type = std::vector<double, xsimd::aligned_allocator<double>>;
5050
5151
void mean(const vector_type& a, const vector_type& b, vector_type& res)
5252
{

examples/mandelbrot.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ int main()
229229
const float y1 = 1;
230230
const int maxIters = 256;
231231

232-
std::vector<int, xsimd::aligned_allocator<int, XSIMD_DEFAULT_ALIGNMENT>> buf(width * height);
232+
std::vector<int, xsimd::aligned_allocator<int>> buf(width * height);
233233

234234
auto bencher = pico_bench::Benchmarker<milliseconds>{64, seconds{10}};
235235

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#ifndef XSIMD_ARCH_HPP
2+
#define XSIMD_ARCH_HPP
3+
4+
namespace xsimd {
5+
6+
// forward declaration
7+
template<class T, size_t N> class batch;
8+
9+
namespace arch {
10+
11+
struct sse {
12+
template<class T>
13+
using batch = xsimd::batch<T, 128 / ( 8 * sizeof(T))>;
14+
static constexpr size_t alignment = 8;
15+
};
16+
17+
struct sse2 : sse {
18+
static constexpr size_t alignment = 16;
19+
};
20+
21+
struct sse3 : sse2 {
22+
};
23+
24+
struct sse4_1 : sse3 {
25+
};
26+
27+
struct sse4_2 : sse4_1{
28+
};
29+
30+
struct avx {
31+
template<class T>
32+
using batch = xsimd::batch<T, 256 / ( 8 * sizeof(T))>;
33+
static constexpr size_t alignment = 32;
34+
};
35+
36+
// FIXME: unsure of that one
37+
struct fma3 : avx {
38+
};
39+
40+
struct avx2 {
41+
template<class T>
42+
using batch = xsimd::batch<T, 256 / ( 8 * sizeof(T))>;
43+
static constexpr size_t alignment = 32;
44+
};
45+
46+
struct avx512 {
47+
template<class T>
48+
using batch = xsimd::batch<T, 512 / ( 8 * sizeof(T))>;
49+
static constexpr size_t alignment = 64;
50+
};
51+
52+
struct neon64 {
53+
template<class T>
54+
using batch = xsimd::batch<T, 512 / ( 8 * sizeof(T))>;
55+
static constexpr size_t alignment = 32;
56+
};
57+
58+
struct neon {
59+
template<class T>
60+
using batch = xsimd::batch<T, 512 / ( 8 * sizeof(T))>;
61+
static constexpr size_t alignment = 16;
62+
};
63+
64+
struct scalar {
65+
template<class T>
66+
using batch = xsimd::batch<T, 1>;
67+
static constexpr size_t alignment = sizeof(void*);
68+
};
69+
70+
template <class T, class InstructionSet>
71+
using batch = typename InstructionSet::template batch<T>;
72+
73+
#if XSIMD_X86_INSTR_SET == XSIMD_X86_SSE_VERSION
74+
using x86 = sse;
75+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_SSE2_VERSION
76+
using x86 = sse2;
77+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_SSE3_VERSION
78+
using x86 = sse3;
79+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_SSE4_1_VERSION
80+
using x86 = sse4_1;
81+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_SSE4_2_VERSION
82+
using x86 = sse4_2;
83+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_AVX_VERSION
84+
using x86 = avx;
85+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_FMA3_VERSION
86+
using x86 = fma3;
87+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_AVX2_VERSION
88+
using x86 = avx2;
89+
#elif XSIMD_X86_INSTR_SET == XSIMD_X86_AVX512_VERSION
90+
using x86 = avx512;
91+
#endif
92+
93+
#if XSIMD_ARM_INSTR_SET == XSIMD_ARM7_NEON_VERSION
94+
using arm = neon;
95+
#elif XSIMD_ARM_INSTR_SET == XSIMD_ARM8_32_NEON_VERSION
96+
using arm = neon;
97+
#elif XSIMD_ARM_INSTR_SET == XSIMD_ARM8_64_NEON_VERSION
98+
using arm = neon64;
99+
#endif
100+
101+
#if XSIMD_X86_INSTR_SET != XSIMD_VERSION_NUMBER_NOT_AVAILABLE
102+
using default_ = x86;
103+
#elif XSIMD_ARM_INSTR_SET != XSIMD_VERSION_NUMBER_NOT_AVAILABLE
104+
using default_ = arm;
105+
#else
106+
using default_ = scalar;
107+
#endif
108+
109+
}
110+
111+
112+
}
113+
114+
#endif

include/xsimd/config/xsimd_config.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,15 @@
1212
#define XSIMD_CONFIG_HPP
1313

1414
#include "xsimd_align.hpp"
15+
#include "xsimd_arch.hpp"
1516

1617
#define XSIMD_VERSION_MAJOR 7
1718
#define XSIMD_VERSION_MINOR 5
1819
#define XSIMD_VERSION_PATCH 0
1920

2021
#ifndef XSIMD_DEFAULT_ALLOCATOR
21-
#if XSIMD_X86_INSTR_SET_AVAILABLE
22-
#define XSIMD_DEFAULT_ALLOCATOR(T) xsimd::aligned_allocator<T, XSIMD_DEFAULT_ALIGNMENT>
22+
#if XSIMD_X86_INSTR_SET != XSIMD_VERSION_NUMBER_NOT_AVAILABLE
23+
#define XSIMD_DEFAULT_ALLOCATOR(T) xsimd::aligned_allocator<T, xsimd::arch::x86::alignment>
2324
#else
2425
#define XSIMD_DEFAULT_ALLOCATOR(T) std::allocator<T>
2526
#endif

include/xsimd/memory/xsimd_aligned_allocator.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ namespace xsimd
3939
* @tparam T type of objects to allocate.
4040
* @tparam Align alignment in bytes.
4141
*/
42-
template <class T, size_t Align>
42+
template <class T, size_t Align=arch::default_::alignment>
4343
class aligned_allocator
4444
{
4545
public:

include/xsimd/memory/xsimd_alignment.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,11 @@ namespace xsimd
4242
using type = unaligned_mode;
4343
};
4444

45-
#if defined(XSIMD_DEFAULT_ALIGNMENT)
4645
template <class T>
47-
struct allocator_alignment<aligned_allocator<T, XSIMD_DEFAULT_ALIGNMENT>>
46+
struct allocator_alignment<aligned_allocator<T>>
4847
{
4948
using type = aligned_mode;
5049
};
51-
#endif
5250

5351
template <class A>
5452
using allocator_alignment_t = typename allocator_alignment<A>::type;

include/xsimd/types/xsimd_fallback.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ namespace xsimd
4646
using value_type = T;
4747
static constexpr std::size_t size = N;
4848
using batch_type = batch<T, N>;
49-
static constexpr std::size_t align = XSIMD_DEFAULT_ALIGNMENT;
49+
static constexpr std::size_t align = arch::scalar::alignment;
5050
};
5151

5252
template <typename T, std::size_t N>
@@ -94,7 +94,7 @@ namespace xsimd
9494
using value_type = T;
9595
static constexpr std::size_t size = N;
9696
using batch_bool_type = batch_bool<T, N>;
97-
static constexpr std::size_t align = XSIMD_DEFAULT_ALIGNMENT;
97+
static constexpr std::size_t align = arch::scalar::alignment;
9898
using storage_type = std::array<T, N>;
9999
};
100100

@@ -161,7 +161,7 @@ namespace xsimd
161161

162162
template <class T, std::size_t N>
163163
struct simd_batch_traits<batch_bool<std::complex<T>, N>>
164-
: complex_batch_bool_traits<std::complex<T>, T, N, XSIMD_DEFAULT_ALIGNMENT>
164+
: complex_batch_bool_traits<std::complex<T>, T, N, arch::scalar::alignment>
165165
{
166166
};
167167

@@ -195,7 +195,7 @@ namespace xsimd
195195

196196
template <class T, std::size_t N>
197197
struct simd_batch_traits<batch<std::complex<T>, N>>
198-
: complex_batch_traits<std::complex<T>, T, N, XSIMD_DEFAULT_ALIGNMENT>
198+
: complex_batch_traits<std::complex<T>, T, N, arch::scalar::alignment>
199199
{
200200
};
201201

@@ -258,7 +258,7 @@ namespace xsimd
258258

259259
template <class T, std::size_t N, bool i3ec>
260260
struct simd_batch_traits<batch_bool<xtl::xcomplex<T, T, i3ec>, N>>
261-
: complex_batch_bool_traits<xtl::xcomplex<T, T, i3ec>, T, N, XSIMD_DEFAULT_ALIGNMENT>
261+
: complex_batch_bool_traits<xtl::xcomplex<T, T, i3ec>, T, N, arch::scalar::alignment>
262262
{
263263
};
264264

@@ -293,7 +293,7 @@ namespace xsimd
293293

294294
template <class T, std::size_t N, bool i3ec>
295295
struct simd_batch_traits<batch<xtl::xcomplex<T, T, i3ec>, N>>
296-
: complex_batch_traits<xtl::xcomplex<T, T, i3ec>, T, N, XSIMD_DEFAULT_ALIGNMENT>
296+
: complex_batch_traits<xtl::xcomplex<T, T, i3ec>, T, N, arch::scalar::alignment>
297297
{
298298
};
299299

include/xsimd/types/xsimd_neon_bool.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ namespace xsimd
7272
using value_type = T;
7373
static constexpr std::size_t size = 8;
7474
using batch_type = batch<T, 8>;
75-
static constexpr std::size_t align = XSIMD_DEFAULT_ALIGNMENT;
75+
static constexpr std::size_t align = arch::neon::alignment;
7676
};
7777

7878
template <class T>
@@ -120,7 +120,7 @@ namespace xsimd
120120
using value_type = T;
121121
static constexpr std::size_t size = 4;
122122
using batch_type = batch<T, 4>;
123-
static constexpr std::size_t align = XSIMD_DEFAULT_ALIGNMENT;
123+
static constexpr std::size_t align = arch::neon::alignment;
124124
};
125125

126126
template <class T>
@@ -167,7 +167,7 @@ namespace xsimd
167167
using value_type = T;
168168
static constexpr std::size_t size = 2;
169169
using batch_type = batch<T, 2>;
170-
static constexpr std::size_t align = XSIMD_DEFAULT_ALIGNMENT;
170+
static constexpr std::size_t align = arch::neon::alignment;
171171
};
172172

173173
template <class T>
@@ -214,7 +214,7 @@ namespace xsimd
214214
using value_type = T;
215215
static constexpr std::size_t size = 16;
216216
using batch_type = batch<T, 16>;
217-
static constexpr std::size_t align = XSIMD_DEFAULT_ALIGNMENT;
217+
static constexpr std::size_t align = arch::neon::alignment;
218218
};
219219

220220
template <class T>

0 commit comments

Comments
 (0)