|
1 | 1 | #pragma once |
2 | | -#include <immintrin.h> |
3 | 2 |
|
4 | 3 | //--------------------------------------------- |
5 | | -// Precision / SIMD mode selection |
| 4 | +// Precision selection (scalar only) |
6 | 5 | //--------------------------------------------- |
7 | 6 | #if defined(NEXT_FP64) |
8 | 7 |
|
|
12 | 11 |
|
13 | 12 | using real = float; |
14 | 13 |
|
15 | | -#elif defined(NEXT_SIMD64) |
16 | | - |
17 | | - using real = __m128d; // 2-wide double (SSE) |
18 | | - |
19 | | -#elif defined(NEXT_SIMD32) |
20 | | - |
21 | | - using real = __m128; // 4-wide float (SSE) |
22 | | - |
23 | | -#elif defined(NEXT_AVX512_64) |
24 | | - |
25 | | - using real = __m512d; // 8-wide double (AVX-512) |
26 | | - |
27 | | -#elif defined(NEXT_AVX512_32) |
28 | | - |
29 | | - using real = __m512; // 16-wide float (AVX-512) |
30 | | - |
31 | 14 | #else |
32 | | - #error "Define one of: NEXT_FP64, NEXT_FP32, NEXT_SIMD32, NEXT_SIMD64, NEXT_AVX512_32, NEXT_AVX512_64." |
33 | | -#endif |
34 | | - |
35 | | -//--------------------------------------------- |
36 | | -// SIMD operator overloads |
37 | | -//--------------------------------------------- |
38 | | -#if defined(NEXT_SIMD32) |
39 | | - |
40 | | -inline real operator+(real a, real b) { return _mm_add_ps(a, b); } |
41 | | -inline real operator-(real a, real b) { return _mm_sub_ps(a, b); } |
42 | | -inline real operator*(real a, real b) { return _mm_mul_ps(a, b); } |
43 | | -inline real operator/(real a, real b) { return _mm_div_ps(a, b); } |
44 | | - |
45 | | -#elif defined(NEXT_SIMD64) |
46 | | - |
47 | | -inline real operator+(real a, real b) { return _mm_add_pd(a, b); } |
48 | | -inline real operator-(real a, real b) { return _mm_sub_pd(a, b); } |
49 | | -inline real operator*(real a, real b) { return _mm_mul_pd(a, b); } |
50 | | -inline real operator/(real a, real b) { return _mm_div_pd(a, b); } |
51 | | - |
52 | | -#elif defined(NEXT_AVX512_32) |
53 | | - |
54 | | -inline real operator+(real a, real b) { return _mm512_add_ps(a, b); } |
55 | | -inline real operator-(real a, real b) { return _mm512_sub_ps(a, b); } |
56 | | -inline real operator*(real a, real b) { return _mm512_mul_ps(a, b); } |
57 | | -inline real operator/(real a, real b) { return _mm512_div_ps(a, b); } |
58 | | - |
59 | | -#elif defined(NEXT_AVX512_64) |
60 | | - |
61 | | -inline real operator+(real a, real b) { return _mm512_add_pd(a, b); } |
62 | | -inline real operator-(real a, real b) { return _mm512_sub_pd(a, b); } |
63 | | -inline real operator*(real a, real b) { return _mm512_mul_pd(a, b); } |
64 | | -inline real operator/(real a, real b) { return _mm512_div_pd(a, b); } |
65 | | - |
| 15 | + #error "Define one of: NEXT_FP32 or NEXT_FP64." |
66 | 16 | #endif |
0 commit comments