2424#include " ../utils/xsimd_type_traits.hpp"
2525#include " ./common/xsimd_common_bit.hpp"
2626#include " ./common/xsimd_common_cast.hpp"
27+ #include " ./xsimd_common_fwd.hpp"
2728
28- // Wrap intrinsics so we can pass them as function pointers
29- // - OP: intrinsics name prefix, e.g., vorrq
30- // - RT: type traits to deduce intrinsics return types
31- #define WRAP_BINARY_UINT_EXCLUDING_64 (OP, RT ) \
32- namespace wrap \
33- { \
34- XSIMD_INLINE RT<uint8x16_t > OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \
35- { \
36- return ::OP##_u8 (a, b); \
37- } \
38- XSIMD_INLINE RT<uint16x8_t > OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \
39- { \
40- return ::OP##_u16 (a, b); \
41- } \
42- XSIMD_INLINE RT<uint32x4_t > OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \
43- { \
44- return ::OP##_u32 (a, b); \
45- } \
46- }
47-
48- #define WRAP_BINARY_INT_EXCLUDING_64 (OP, RT ) \
49- WRAP_BINARY_UINT_EXCLUDING_64 (OP, RT) \
50- namespace wrap \
51- { \
52- XSIMD_INLINE RT<int8x16_t > OP##_s8 (int8x16_t a, int8x16_t b) noexcept \
53- { \
54- return ::OP##_s8 (a, b); \
55- } \
56- XSIMD_INLINE RT<int16x8_t > OP##_s16 (int16x8_t a, int16x8_t b) noexcept \
57- { \
58- return ::OP##_s16 (a, b); \
59- } \
60- XSIMD_INLINE RT<int32x4_t > OP##_s32 (int32x4_t a, int32x4_t b) noexcept \
61- { \
62- return ::OP##_s32 (a, b); \
63- } \
64- }
65-
66- #define WRAP_BINARY_INT (OP, RT ) \
67- WRAP_BINARY_INT_EXCLUDING_64 (OP, RT) \
68- namespace wrap \
69- { \
70- XSIMD_INLINE RT<uint64x2_t > OP##_u64 (uint64x2_t a, uint64x2_t b) noexcept \
71- { \
72- return ::OP##_u64 (a, b); \
73- } \
74- XSIMD_INLINE RT<int64x2_t > OP##_s64 (int64x2_t a, int64x2_t b) noexcept \
75- { \
76- return ::OP##_s64 (a, b); \
77- } \
78- }
79-
80- #define WRAP_BINARY_FLOAT (OP, RT ) \
81- namespace wrap \
82- { \
83- XSIMD_INLINE RT<float32x4_t > OP##_f32(float32x4_t a, float32x4_t b) noexcept \
84- { \
85- return ::OP##_f32 (a, b); \
86- } \
87- }
88-
89- #define WRAP_UNARY_INT_EXCLUDING_64 (OP ) \
29+ #define WRAP_BINARY_IMPL (OP, VEC, RT ) \
9030 namespace wrap \
9131 { \
92- XSIMD_INLINE uint8x16_t OP##_u8(uint8x16_t a) noexcept \
93- { \
94- return ::OP##_u8 (a); \
95- } \
96- XSIMD_INLINE int8x16_t OP##_s8(int8x16_t a) noexcept \
32+ XSIMD_INLINE auto (OP)(VEC a, VEC b) noexcept -> RT<VEC> \
9733 { \
98- return ::OP##_s8 (a); \
99- } \
100- XSIMD_INLINE uint16x8_t OP##_u16(uint16x8_t a) noexcept \
101- { \
102- return ::OP##_u16 (a); \
103- } \
104- XSIMD_INLINE int16x8_t OP##_s16(int16x8_t a) noexcept \
105- { \
106- return ::OP##_s16 (a); \
107- } \
108- XSIMD_INLINE uint32x4_t OP##_u32(uint32x4_t a) noexcept \
109- { \
110- return ::OP##_u32 (a); \
111- } \
112- XSIMD_INLINE int32x4_t OP##_s32(int32x4_t a) noexcept \
113- { \
114- return ::OP##_s32 (a); \
34+ return (::OP)(a, b); \
11535 } \
11636 }
11737
118- #define WRAP_UNARY_INT (OP ) \
119- WRAP_UNARY_INT_EXCLUDING_64 (OP) \
120- namespace wrap \
121- { \
122- XSIMD_INLINE uint64x2_t OP##_u64 (uint64x2_t a) noexcept \
123- { \
124- return ::OP##_u64 (a); \
125- } \
126- XSIMD_INLINE int64x2_t OP##_s64 (int64x2_t a) noexcept \
127- { \
128- return ::OP##_s64 (a); \
129- } \
38+ #define WRAP_BINARY_UINT_EXCLUDING_64 (OP_U8, OP_U16, OP_U32, RT ) \
39+ WRAP_BINARY_IMPL (OP_U8, uint8x16_t , RT) \
40+ WRAP_BINARY_IMPL(OP_U16, uint16x8_t , RT) \
41+ WRAP_BINARY_IMPL(OP_U32, uint32x4_t , RT)
42+
43+ #define WRAP_BINARY_INT_EXCLUDING_64 (OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, RT ) \
44+ WRAP_BINARY_UINT_EXCLUDING_64 (OP_U8, OP_U16, OP_U32, RT) \
45+ WRAP_BINARY_IMPL(OP_I8, int8x16_t , RT) \
46+ WRAP_BINARY_IMPL(OP_I16, int16x8_t , RT) \
47+ WRAP_BINARY_IMPL(OP_I32, int32x4_t , RT)
48+
49+ #define WRAP_BINARY_INT (OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, OP_U64, OP_I64, RT ) \
50+ WRAP_BINARY_INT_EXCLUDING_64 (OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, RT) \
51+ WRAP_BINARY_IMPL(OP_U64, uint64x2_t , RT) \
52+ WRAP_BINARY_IMPL(OP_I64, int64x2_t , RT)
53+
54+ #define WRAP_BINARY_FLOAT (OP_F32, RT ) \
55+ WRAP_BINARY_IMPL (OP_F32, float32x4_t , RT)
56+
57+ #define WRAP_UNARY_IMPL (OP, VEC ) \
58+ namespace wrap \
59+ { \
60+ XSIMD_INLINE auto (OP)(VEC a) noexcept -> VEC \
61+ { \
62+ return (::OP)(a); \
63+ } \
13064 }
13165
132- #define WRAP_UNARY_FLOAT (OP ) \
133- namespace wrap \
134- { \
135- XSIMD_INLINE float32x4_t OP##_f32(float32x4_t a) noexcept \
136- { \
137- return ::OP##_f32 (a); \
138- } \
139- }
66+ #define WRAP_UNARY_INT_EXCLUDING_64 (OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32 ) \
67+ WRAP_UNARY_IMPL (OP_U8, uint8x16_t ) \
68+ WRAP_UNARY_IMPL(OP_I8, int8x16_t ) \
69+ WRAP_UNARY_IMPL(OP_U16, uint16x8_t ) \
70+ WRAP_UNARY_IMPL(OP_I16, int16x8_t ) \
71+ WRAP_UNARY_IMPL(OP_U32, uint32x4_t ) \
72+ WRAP_UNARY_IMPL(OP_I32, int32x4_t )
73+
74+ #define WRAP_UNARY_INT (OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, OP_U64, OP_I64 ) \
75+ WRAP_UNARY_INT_EXCLUDING_64 (OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32) \
76+ WRAP_UNARY_IMPL(OP_U64, uint64x2_t ) \
77+ WRAP_UNARY_IMPL(OP_I64, int64x2_t )
78+
79+ #define WRAP_UNARY_FLOAT (OP_F32 ) \
80+ WRAP_UNARY_IMPL (OP_F32, float32x4_t )
14081
14182// Dummy identity caster to ease coding
14283XSIMD_INLINE uint8x16_t vreinterpretq_u8_u8(uint8x16_t arg) noexcept { return arg; }
@@ -826,8 +767,8 @@ namespace xsimd
826767 * add *
827768 *******/
828769
829- WRAP_BINARY_INT (vaddq , detail::identity_return_type)
830- WRAP_BINARY_FLOAT (vaddq , detail::identity_return_type)
770+ WRAP_BINARY_INT (vaddq_u8, vaddq_s8, vaddq_u16, vaddq_s16, vaddq_u32, vaddq_s32, vaddq_u64, vaddq_s64 , detail::identity_return_type)
771+ WRAP_BINARY_FLOAT (vaddq_f32 , detail::identity_return_type)
831772
832773 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
833774 XSIMD_INLINE batch<T, A> add (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -845,7 +786,7 @@ namespace xsimd
845786 * avg *
846787 *******/
847788
848- WRAP_BINARY_UINT_EXCLUDING_64 (vhaddq , detail::identity_return_type)
789+ WRAP_BINARY_UINT_EXCLUDING_64 (vhaddq_u8, vhaddq_u16, vhaddq_u32 , detail::identity_return_type)
849790
850791 template <class A , class T , class = std::enable_if_t <(std::is_unsigned<T>::value && sizeof (T) != 8 )>>
851792 XSIMD_INLINE batch<T, A> avg (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -861,7 +802,7 @@ namespace xsimd
861802 * avgr *
862803 ********/
863804
864- WRAP_BINARY_UINT_EXCLUDING_64 (vrhaddq , detail::identity_return_type)
805+ WRAP_BINARY_UINT_EXCLUDING_64 (vrhaddq_u8, vrhaddq_u16, vrhaddq_u32 , detail::identity_return_type)
865806
866807 template <class A , class T , class = std::enable_if_t <(std::is_unsigned<T>::value && sizeof (T) != 8 )>>
867808 XSIMD_INLINE batch<T, A> avgr (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -877,7 +818,7 @@ namespace xsimd
877818 * sadd *
878819 ********/
879820
880- WRAP_BINARY_INT (vqaddq , detail::identity_return_type)
821+ WRAP_BINARY_INT (vqaddq_u8, vqaddq_s8, vqaddq_u16, vqaddq_s16, vqaddq_u32, vqaddq_s32, vqaddq_u64, vqaddq_s64 , detail::identity_return_type)
881822
882823 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
883824 XSIMD_INLINE batch<T, A> sadd (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -895,8 +836,8 @@ namespace xsimd
895836 * sub *
896837 *******/
897838
898- WRAP_BINARY_INT (vsubq , detail::identity_return_type)
899- WRAP_BINARY_FLOAT (vsubq , detail::identity_return_type)
839+ WRAP_BINARY_INT (vsubq_u8, vsubq_s8, vsubq_u16, vsubq_s16, vsubq_u32, vsubq_s32, vsubq_u64, vsubq_s64 , detail::identity_return_type)
840+ WRAP_BINARY_FLOAT (vsubq_f32 , detail::identity_return_type)
900841
901842 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
902843 XSIMD_INLINE batch<T, A> sub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -914,7 +855,7 @@ namespace xsimd
914855 * ssub *
915856 ********/
916857
917- WRAP_BINARY_INT (vqsubq , detail::identity_return_type)
858+ WRAP_BINARY_INT (vqsubq_u8, vqsubq_s8, vqsubq_u16, vqsubq_s16, vqsubq_u32, vqsubq_s32, vqsubq_u64, vqsubq_s64 , detail::identity_return_type)
918859
919860 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
920861 XSIMD_INLINE batch<T, A> ssub (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -932,8 +873,8 @@ namespace xsimd
932873 * mul *
933874 *******/
934875
935- WRAP_BINARY_INT_EXCLUDING_64 (vmulq , detail::identity_return_type)
936- WRAP_BINARY_FLOAT (vmulq , detail::identity_return_type)
876+ WRAP_BINARY_INT_EXCLUDING_64 (vmulq_u8, vmulq_s8, vmulq_u16, vmulq_s16, vmulq_u32, vmulq_s32 , detail::identity_return_type)
877+ WRAP_BINARY_FLOAT (vmulq_f32 , detail::identity_return_type)
937878
938879 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
939880 XSIMD_INLINE batch<T, A> mul (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -985,8 +926,8 @@ namespace xsimd
985926 * eq *
986927 ******/
987928
988- WRAP_BINARY_INT_EXCLUDING_64 (vceqq , detail::comp_return_type)
989- WRAP_BINARY_FLOAT (vceqq , detail::comp_return_type)
929+ WRAP_BINARY_INT_EXCLUDING_64 (vceqq_u8, vceqq_s8, vceqq_u16, vceqq_s16, vceqq_u32, vceqq_s32 , detail::comp_return_type)
930+ WRAP_BINARY_FLOAT (vceqq_f32 , detail::comp_return_type)
990931
991932 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
992933 XSIMD_INLINE batch_bool<T, A> eq (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1070,8 +1011,8 @@ namespace xsimd
10701011 * lt *
10711012 ******/
10721013
1073- WRAP_BINARY_INT_EXCLUDING_64 (vcltq , detail::comp_return_type)
1074- WRAP_BINARY_FLOAT (vcltq , detail::comp_return_type)
1014+ WRAP_BINARY_INT_EXCLUDING_64 (vcltq_u8, vcltq_s8, vcltq_u16, vcltq_s16, vcltq_u32, vcltq_s32 , detail::comp_return_type)
1015+ WRAP_BINARY_FLOAT (vcltq_f32 , detail::comp_return_type)
10751016
10761017 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
10771018 XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1103,8 +1044,8 @@ namespace xsimd
11031044 * le *
11041045 ******/
11051046
1106- WRAP_BINARY_INT_EXCLUDING_64 (vcleq , detail::comp_return_type)
1107- WRAP_BINARY_FLOAT (vcleq , detail::comp_return_type)
1047+ WRAP_BINARY_INT_EXCLUDING_64 (vcleq_u8, vcleq_s8, vcleq_u16, vcleq_s16, vcleq_u32, vcleq_s32 , detail::comp_return_type)
1048+ WRAP_BINARY_FLOAT (vcleq_f32 , detail::comp_return_type)
11081049
11091050 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
11101051 XSIMD_INLINE batch_bool<T, A> le (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1139,8 +1080,8 @@ namespace xsimd
11391080 }
11401081 }
11411082
1142- WRAP_BINARY_INT_EXCLUDING_64 (vcgtq , detail::comp_return_type)
1143- WRAP_BINARY_FLOAT (vcgtq , detail::comp_return_type)
1083+ WRAP_BINARY_INT_EXCLUDING_64 (vcgtq_u8, vcgtq_s8, vcgtq_u16, vcgtq_s16, vcgtq_u32, vcgtq_s32 , detail::comp_return_type)
1084+ WRAP_BINARY_FLOAT (vcgtq_f32 , detail::comp_return_type)
11441085
11451086 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
11461087 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1172,8 +1113,8 @@ namespace xsimd
11721113 * ge *
11731114 ******/
11741115
1175- WRAP_BINARY_INT_EXCLUDING_64 (vcgeq , detail::comp_return_type)
1176- WRAP_BINARY_FLOAT (vcgeq , detail::comp_return_type)
1116+ WRAP_BINARY_INT_EXCLUDING_64 (vcgeq_u8, vcgeq_s8, vcgeq_u16, vcgeq_s16, vcgeq_u32, vcgeq_s32 , detail::comp_return_type)
1117+ WRAP_BINARY_FLOAT (vcgeq_f32 , detail::comp_return_type)
11771118
11781119 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
11791120 XSIMD_INLINE batch_bool<T, A> ge (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1207,7 +1148,7 @@ namespace xsimd
12071148 * bitwise_and *
12081149 ***************/
12091150
1210- WRAP_BINARY_INT (vandq , detail::identity_return_type)
1151+ WRAP_BINARY_INT (vandq_u8, vandq_s8, vandq_u16, vandq_s16, vandq_u32, vandq_s32, vandq_u64, vandq_s64 , detail::identity_return_type)
12111152
12121153 namespace detail
12131154 {
@@ -1247,7 +1188,7 @@ namespace xsimd
12471188 * bitwise_or *
12481189 **************/
12491190
1250- WRAP_BINARY_INT (vorrq , detail::identity_return_type)
1191+ WRAP_BINARY_INT (vorrq_u8, vorrq_s8, vorrq_u16, vorrq_s16, vorrq_u32, vorrq_s32, vorrq_u64, vorrq_s64 , detail::identity_return_type)
12511192
12521193 namespace detail
12531194 {
@@ -1287,7 +1228,7 @@ namespace xsimd
12871228 * bitwise_xor *
12881229 ***************/
12891230
1290- WRAP_BINARY_INT (veorq , detail::identity_return_type)
1231+ WRAP_BINARY_INT (veorq_u8, veorq_s8, veorq_u16, veorq_s16, veorq_u32, veorq_s32, veorq_u64, veorq_s64 , detail::identity_return_type)
12911232
12921233 namespace detail
12931234 {
@@ -1337,7 +1278,7 @@ namespace xsimd
13371278 * bitwise_not *
13381279 ***************/
13391280
1340- WRAP_UNARY_INT_EXCLUDING_64 (vmvnq )
1281+ WRAP_UNARY_INT_EXCLUDING_64 (vmvnq_u8, vmvnq_s8, vmvnq_u16, vmvnq_s16, vmvnq_u32, vmvnq_s32 )
13411282
13421283 namespace detail
13431284 {
@@ -1377,7 +1318,7 @@ namespace xsimd
13771318 * bitwise_andnot *
13781319 ******************/
13791320
1380- WRAP_BINARY_INT (vbicq , detail::identity_return_type)
1321+ WRAP_BINARY_INT (vbicq_u8, vbicq_s8, vbicq_u16, vbicq_s16, vbicq_u32, vbicq_s32, vbicq_u64, vbicq_s64 , detail::identity_return_type)
13811322
13821323 namespace detail
13831324 {
@@ -1416,8 +1357,8 @@ namespace xsimd
14161357 * min *
14171358 *******/
14181359
1419- WRAP_BINARY_INT_EXCLUDING_64 (vminq , detail::identity_return_type)
1420- WRAP_BINARY_FLOAT (vminq , detail::identity_return_type)
1360+ WRAP_BINARY_INT_EXCLUDING_64 (vminq_u8, vminq_s8, vminq_u16, vminq_s16, vminq_u32, vminq_s32 , detail::identity_return_type)
1361+ WRAP_BINARY_FLOAT (vminq_f32 , detail::identity_return_type)
14211362
14221363 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
14231364 XSIMD_INLINE batch<T, A> min (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1440,8 +1381,8 @@ namespace xsimd
14401381 * max *
14411382 *******/
14421383
1443- WRAP_BINARY_INT_EXCLUDING_64 (vmaxq , detail::identity_return_type)
1444- WRAP_BINARY_FLOAT (vmaxq , detail::identity_return_type)
1384+ WRAP_BINARY_INT_EXCLUDING_64 (vmaxq_u8, vmaxq_s8, vmaxq_u16, vmaxq_s16, vmaxq_u32, vmaxq_s32 , detail::identity_return_type)
1385+ WRAP_BINARY_FLOAT (vmaxq_f32 , detail::identity_return_type)
14451386
14461387 template <class A , class T , detail::exclude_int64_neon_t <T> = 0 >
14471388 XSIMD_INLINE batch<T, A> max (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
@@ -1470,7 +1411,7 @@ namespace xsimd
14701411 XSIMD_INLINE int16x8_t vabsq_s16 (int16x8_t a) noexcept { return ::vabsq_s16 (a); }
14711412 XSIMD_INLINE int32x4_t vabsq_s32 (int32x4_t a) noexcept { return ::vabsq_s32 (a); }
14721413 }
1473- WRAP_UNARY_FLOAT (vabsq )
1414+ WRAP_UNARY_FLOAT (vabsq_f32 )
14741415
14751416 namespace detail
14761417 {
0 commit comments