Skip to content

Commit 25a8f5e

Browse files
committed
Try explicit macros
1 parent 7ab5638 commit 25a8f5e

1 file changed

Lines changed: 75 additions & 134 deletions

File tree

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 75 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -24,119 +24,60 @@
2424
#include "../utils/xsimd_type_traits.hpp"
2525
#include "./common/xsimd_common_bit.hpp"
2626
#include "./common/xsimd_common_cast.hpp"
27+
#include "./xsimd_common_fwd.hpp"
2728

28-
// Wrap intrinsics so we can pass them as function pointers
29-
// - OP: intrinsics name prefix, e.g., vorrq
30-
// - RT: type traits to deduce intrinsics return types
31-
#define WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
32-
namespace wrap \
33-
{ \
34-
XSIMD_INLINE RT<uint8x16_t> OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \
35-
{ \
36-
return ::OP##_u8(a, b); \
37-
} \
38-
XSIMD_INLINE RT<uint16x8_t> OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \
39-
{ \
40-
return ::OP##_u16(a, b); \
41-
} \
42-
XSIMD_INLINE RT<uint32x4_t> OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \
43-
{ \
44-
return ::OP##_u32(a, b); \
45-
} \
46-
}
47-
48-
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
49-
WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
50-
namespace wrap \
51-
{ \
52-
XSIMD_INLINE RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
53-
{ \
54-
return ::OP##_s8(a, b); \
55-
} \
56-
XSIMD_INLINE RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
57-
{ \
58-
return ::OP##_s16(a, b); \
59-
} \
60-
XSIMD_INLINE RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
61-
{ \
62-
return ::OP##_s32(a, b); \
63-
} \
64-
}
65-
66-
#define WRAP_BINARY_INT(OP, RT) \
67-
WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
68-
namespace wrap \
69-
{ \
70-
XSIMD_INLINE RT<uint64x2_t> OP##_u64(uint64x2_t a, uint64x2_t b) noexcept \
71-
{ \
72-
return ::OP##_u64(a, b); \
73-
} \
74-
XSIMD_INLINE RT<int64x2_t> OP##_s64(int64x2_t a, int64x2_t b) noexcept \
75-
{ \
76-
return ::OP##_s64(a, b); \
77-
} \
78-
}
79-
80-
#define WRAP_BINARY_FLOAT(OP, RT) \
81-
namespace wrap \
82-
{ \
83-
XSIMD_INLINE RT<float32x4_t> OP##_f32(float32x4_t a, float32x4_t b) noexcept \
84-
{ \
85-
return ::OP##_f32(a, b); \
86-
} \
87-
}
88-
89-
#define WRAP_UNARY_INT_EXCLUDING_64(OP) \
29+
#define WRAP_BINARY_IMPL(OP, VEC, RT) \
9030
namespace wrap \
9131
{ \
92-
XSIMD_INLINE uint8x16_t OP##_u8(uint8x16_t a) noexcept \
93-
{ \
94-
return ::OP##_u8(a); \
95-
} \
96-
XSIMD_INLINE int8x16_t OP##_s8(int8x16_t a) noexcept \
32+
XSIMD_INLINE auto(OP)(VEC a, VEC b) noexcept -> RT<VEC> \
9733
{ \
98-
return ::OP##_s8(a); \
99-
} \
100-
XSIMD_INLINE uint16x8_t OP##_u16(uint16x8_t a) noexcept \
101-
{ \
102-
return ::OP##_u16(a); \
103-
} \
104-
XSIMD_INLINE int16x8_t OP##_s16(int16x8_t a) noexcept \
105-
{ \
106-
return ::OP##_s16(a); \
107-
} \
108-
XSIMD_INLINE uint32x4_t OP##_u32(uint32x4_t a) noexcept \
109-
{ \
110-
return ::OP##_u32(a); \
111-
} \
112-
XSIMD_INLINE int32x4_t OP##_s32(int32x4_t a) noexcept \
113-
{ \
114-
return ::OP##_s32(a); \
34+
return (::OP)(a, b); \
11535
} \
11636
}
11737

118-
#define WRAP_UNARY_INT(OP) \
119-
WRAP_UNARY_INT_EXCLUDING_64(OP) \
120-
namespace wrap \
121-
{ \
122-
XSIMD_INLINE uint64x2_t OP##_u64(uint64x2_t a) noexcept \
123-
{ \
124-
return ::OP##_u64(a); \
125-
} \
126-
XSIMD_INLINE int64x2_t OP##_s64(int64x2_t a) noexcept \
127-
{ \
128-
return ::OP##_s64(a); \
129-
} \
38+
#define WRAP_BINARY_UINT_EXCLUDING_64(OP_U8, OP_U16, OP_U32, RT) \
39+
WRAP_BINARY_IMPL(OP_U8, uint8x16_t, RT) \
40+
WRAP_BINARY_IMPL(OP_U16, uint16x8_t, RT) \
41+
WRAP_BINARY_IMPL(OP_U32, uint32x4_t, RT)
42+
43+
#define WRAP_BINARY_INT_EXCLUDING_64(OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, RT) \
44+
WRAP_BINARY_UINT_EXCLUDING_64(OP_U8, OP_U16, OP_U32, RT) \
45+
WRAP_BINARY_IMPL(OP_I8, int8x16_t, RT) \
46+
WRAP_BINARY_IMPL(OP_I16, int16x8_t, RT) \
47+
WRAP_BINARY_IMPL(OP_I32, int32x4_t, RT)
48+
49+
#define WRAP_BINARY_INT(OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, OP_U64, OP_I64, RT) \
50+
WRAP_BINARY_INT_EXCLUDING_64(OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, RT) \
51+
WRAP_BINARY_IMPL(OP_U64, uint64x2_t, RT) \
52+
WRAP_BINARY_IMPL(OP_I64, int64x2_t, RT)
53+
54+
#define WRAP_BINARY_FLOAT(OP_F32, RT) \
55+
WRAP_BINARY_IMPL(OP_F32, float32x4_t, RT)
56+
57+
#define WRAP_UNARY_IMPL(OP, VEC) \
58+
namespace wrap \
59+
{ \
60+
XSIMD_INLINE auto(OP)(VEC a) noexcept -> VEC \
61+
{ \
62+
return (::OP)(a); \
63+
} \
13064
}
13165

132-
#define WRAP_UNARY_FLOAT(OP) \
133-
namespace wrap \
134-
{ \
135-
XSIMD_INLINE float32x4_t OP##_f32(float32x4_t a) noexcept \
136-
{ \
137-
return ::OP##_f32(a); \
138-
} \
139-
}
66+
#define WRAP_UNARY_INT_EXCLUDING_64(OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32) \
67+
WRAP_UNARY_IMPL(OP_U8, uint8x16_t) \
68+
WRAP_UNARY_IMPL(OP_I8, int8x16_t) \
69+
WRAP_UNARY_IMPL(OP_U16, uint16x8_t) \
70+
WRAP_UNARY_IMPL(OP_I16, int16x8_t) \
71+
WRAP_UNARY_IMPL(OP_U32, uint32x4_t) \
72+
WRAP_UNARY_IMPL(OP_I32, int32x4_t)
73+
74+
#define WRAP_UNARY_INT(OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32, OP_U64, OP_I64) \
75+
WRAP_UNARY_INT_EXCLUDING_64(OP_U8, OP_I8, OP_U16, OP_I16, OP_U32, OP_I32) \
76+
WRAP_UNARY_IMPL(OP_U64, uint64x2_t) \
77+
WRAP_UNARY_IMPL(OP_I64, int64x2_t)
78+
79+
#define WRAP_UNARY_FLOAT(OP_F32) \
80+
WRAP_UNARY_IMPL(OP_F32, float32x4_t)
14081

14182
// Dummy identity caster to ease coding
14283
XSIMD_INLINE uint8x16_t vreinterpretq_u8_u8(uint8x16_t arg) noexcept { return arg; }
@@ -826,8 +767,8 @@ namespace xsimd
826767
* add *
827768
*******/
828769

829-
WRAP_BINARY_INT(vaddq, detail::identity_return_type)
830-
WRAP_BINARY_FLOAT(vaddq, detail::identity_return_type)
770+
WRAP_BINARY_INT(vaddq_u8, vaddq_s8, vaddq_u16, vaddq_s16, vaddq_u32, vaddq_s32, vaddq_u64, vaddq_s64, detail::identity_return_type)
771+
WRAP_BINARY_FLOAT(vaddq_f32, detail::identity_return_type)
831772

832773
template <class A, class T, detail::enable_neon_type_t<T> = 0>
833774
XSIMD_INLINE batch<T, A> add(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -845,7 +786,7 @@ namespace xsimd
845786
* avg *
846787
*******/
847788

848-
WRAP_BINARY_UINT_EXCLUDING_64(vhaddq, detail::identity_return_type)
789+
WRAP_BINARY_UINT_EXCLUDING_64(vhaddq_u8, vhaddq_u16, vhaddq_u32, detail::identity_return_type)
849790

850791
template <class A, class T, class = std::enable_if_t<(std::is_unsigned<T>::value && sizeof(T) != 8)>>
851792
XSIMD_INLINE batch<T, A> avg(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -861,7 +802,7 @@ namespace xsimd
861802
* avgr *
862803
********/
863804

864-
WRAP_BINARY_UINT_EXCLUDING_64(vrhaddq, detail::identity_return_type)
805+
WRAP_BINARY_UINT_EXCLUDING_64(vrhaddq_u8, vrhaddq_u16, vrhaddq_u32, detail::identity_return_type)
865806

866807
template <class A, class T, class = std::enable_if_t<(std::is_unsigned<T>::value && sizeof(T) != 8)>>
867808
XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -877,7 +818,7 @@ namespace xsimd
877818
* sadd *
878819
********/
879820

880-
WRAP_BINARY_INT(vqaddq, detail::identity_return_type)
821+
WRAP_BINARY_INT(vqaddq_u8, vqaddq_s8, vqaddq_u16, vqaddq_s16, vqaddq_u32, vqaddq_s32, vqaddq_u64, vqaddq_s64, detail::identity_return_type)
881822

882823
template <class A, class T, detail::enable_neon_type_t<T> = 0>
883824
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -895,8 +836,8 @@ namespace xsimd
895836
* sub *
896837
*******/
897838

898-
WRAP_BINARY_INT(vsubq, detail::identity_return_type)
899-
WRAP_BINARY_FLOAT(vsubq, detail::identity_return_type)
839+
WRAP_BINARY_INT(vsubq_u8, vsubq_s8, vsubq_u16, vsubq_s16, vsubq_u32, vsubq_s32, vsubq_u64, vsubq_s64, detail::identity_return_type)
840+
WRAP_BINARY_FLOAT(vsubq_f32, detail::identity_return_type)
900841

901842
template <class A, class T, detail::enable_neon_type_t<T> = 0>
902843
XSIMD_INLINE batch<T, A> sub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -914,7 +855,7 @@ namespace xsimd
914855
* ssub *
915856
********/
916857

917-
WRAP_BINARY_INT(vqsubq, detail::identity_return_type)
858+
WRAP_BINARY_INT(vqsubq_u8, vqsubq_s8, vqsubq_u16, vqsubq_s16, vqsubq_u32, vqsubq_s32, vqsubq_u64, vqsubq_s64, detail::identity_return_type)
918859

919860
template <class A, class T, detail::enable_neon_type_t<T> = 0>
920861
XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -932,8 +873,8 @@ namespace xsimd
932873
* mul *
933874
*******/
934875

935-
WRAP_BINARY_INT_EXCLUDING_64(vmulq, detail::identity_return_type)
936-
WRAP_BINARY_FLOAT(vmulq, detail::identity_return_type)
876+
WRAP_BINARY_INT_EXCLUDING_64(vmulq_u8, vmulq_s8, vmulq_u16, vmulq_s16, vmulq_u32, vmulq_s32, detail::identity_return_type)
877+
WRAP_BINARY_FLOAT(vmulq_f32, detail::identity_return_type)
937878

938879
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
939880
XSIMD_INLINE batch<T, A> mul(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -985,8 +926,8 @@ namespace xsimd
985926
* eq *
986927
******/
987928

988-
WRAP_BINARY_INT_EXCLUDING_64(vceqq, detail::comp_return_type)
989-
WRAP_BINARY_FLOAT(vceqq, detail::comp_return_type)
929+
WRAP_BINARY_INT_EXCLUDING_64(vceqq_u8, vceqq_s8, vceqq_u16, vceqq_s16, vceqq_u32, vceqq_s32, detail::comp_return_type)
930+
WRAP_BINARY_FLOAT(vceqq_f32, detail::comp_return_type)
990931

991932
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
992933
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1070,8 +1011,8 @@ namespace xsimd
10701011
* lt *
10711012
******/
10721013

1073-
WRAP_BINARY_INT_EXCLUDING_64(vcltq, detail::comp_return_type)
1074-
WRAP_BINARY_FLOAT(vcltq, detail::comp_return_type)
1014+
WRAP_BINARY_INT_EXCLUDING_64(vcltq_u8, vcltq_s8, vcltq_u16, vcltq_s16, vcltq_u32, vcltq_s32, detail::comp_return_type)
1015+
WRAP_BINARY_FLOAT(vcltq_f32, detail::comp_return_type)
10751016

10761017
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
10771018
XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1103,8 +1044,8 @@ namespace xsimd
11031044
* le *
11041045
******/
11051046

1106-
WRAP_BINARY_INT_EXCLUDING_64(vcleq, detail::comp_return_type)
1107-
WRAP_BINARY_FLOAT(vcleq, detail::comp_return_type)
1047+
WRAP_BINARY_INT_EXCLUDING_64(vcleq_u8, vcleq_s8, vcleq_u16, vcleq_s16, vcleq_u32, vcleq_s32, detail::comp_return_type)
1048+
WRAP_BINARY_FLOAT(vcleq_f32, detail::comp_return_type)
11081049

11091050
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
11101051
XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1139,8 +1080,8 @@ namespace xsimd
11391080
}
11401081
}
11411082

1142-
WRAP_BINARY_INT_EXCLUDING_64(vcgtq, detail::comp_return_type)
1143-
WRAP_BINARY_FLOAT(vcgtq, detail::comp_return_type)
1083+
WRAP_BINARY_INT_EXCLUDING_64(vcgtq_u8, vcgtq_s8, vcgtq_u16, vcgtq_s16, vcgtq_u32, vcgtq_s32, detail::comp_return_type)
1084+
WRAP_BINARY_FLOAT(vcgtq_f32, detail::comp_return_type)
11441085

11451086
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
11461087
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1172,8 +1113,8 @@ namespace xsimd
11721113
* ge *
11731114
******/
11741115

1175-
WRAP_BINARY_INT_EXCLUDING_64(vcgeq, detail::comp_return_type)
1176-
WRAP_BINARY_FLOAT(vcgeq, detail::comp_return_type)
1116+
WRAP_BINARY_INT_EXCLUDING_64(vcgeq_u8, vcgeq_s8, vcgeq_u16, vcgeq_s16, vcgeq_u32, vcgeq_s32, detail::comp_return_type)
1117+
WRAP_BINARY_FLOAT(vcgeq_f32, detail::comp_return_type)
11771118

11781119
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
11791120
XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1207,7 +1148,7 @@ namespace xsimd
12071148
* bitwise_and *
12081149
***************/
12091150

1210-
WRAP_BINARY_INT(vandq, detail::identity_return_type)
1151+
WRAP_BINARY_INT(vandq_u8, vandq_s8, vandq_u16, vandq_s16, vandq_u32, vandq_s32, vandq_u64, vandq_s64, detail::identity_return_type)
12111152

12121153
namespace detail
12131154
{
@@ -1247,7 +1188,7 @@ namespace xsimd
12471188
* bitwise_or *
12481189
**************/
12491190

1250-
WRAP_BINARY_INT(vorrq, detail::identity_return_type)
1191+
WRAP_BINARY_INT(vorrq_u8, vorrq_s8, vorrq_u16, vorrq_s16, vorrq_u32, vorrq_s32, vorrq_u64, vorrq_s64, detail::identity_return_type)
12511192

12521193
namespace detail
12531194
{
@@ -1287,7 +1228,7 @@ namespace xsimd
12871228
* bitwise_xor *
12881229
***************/
12891230

1290-
WRAP_BINARY_INT(veorq, detail::identity_return_type)
1231+
WRAP_BINARY_INT(veorq_u8, veorq_s8, veorq_u16, veorq_s16, veorq_u32, veorq_s32, veorq_u64, veorq_s64, detail::identity_return_type)
12911232

12921233
namespace detail
12931234
{
@@ -1337,7 +1278,7 @@ namespace xsimd
13371278
* bitwise_not *
13381279
***************/
13391280

1340-
WRAP_UNARY_INT_EXCLUDING_64(vmvnq)
1281+
WRAP_UNARY_INT_EXCLUDING_64(vmvnq_u8, vmvnq_s8, vmvnq_u16, vmvnq_s16, vmvnq_u32, vmvnq_s32)
13411282

13421283
namespace detail
13431284
{
@@ -1377,7 +1318,7 @@ namespace xsimd
13771318
* bitwise_andnot *
13781319
******************/
13791320

1380-
WRAP_BINARY_INT(vbicq, detail::identity_return_type)
1321+
WRAP_BINARY_INT(vbicq_u8, vbicq_s8, vbicq_u16, vbicq_s16, vbicq_u32, vbicq_s32, vbicq_u64, vbicq_s64, detail::identity_return_type)
13811322

13821323
namespace detail
13831324
{
@@ -1416,8 +1357,8 @@ namespace xsimd
14161357
* min *
14171358
*******/
14181359

1419-
WRAP_BINARY_INT_EXCLUDING_64(vminq, detail::identity_return_type)
1420-
WRAP_BINARY_FLOAT(vminq, detail::identity_return_type)
1360+
WRAP_BINARY_INT_EXCLUDING_64(vminq_u8, vminq_s8, vminq_u16, vminq_s16, vminq_u32, vminq_s32, detail::identity_return_type)
1361+
WRAP_BINARY_FLOAT(vminq_f32, detail::identity_return_type)
14211362

14221363
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
14231364
XSIMD_INLINE batch<T, A> min(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1440,8 +1381,8 @@ namespace xsimd
14401381
* max *
14411382
*******/
14421383

1443-
WRAP_BINARY_INT_EXCLUDING_64(vmaxq, detail::identity_return_type)
1444-
WRAP_BINARY_FLOAT(vmaxq, detail::identity_return_type)
1384+
WRAP_BINARY_INT_EXCLUDING_64(vmaxq_u8, vmaxq_s8, vmaxq_u16, vmaxq_s16, vmaxq_u32, vmaxq_s32, detail::identity_return_type)
1385+
WRAP_BINARY_FLOAT(vmaxq_f32, detail::identity_return_type)
14451386

14461387
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
14471388
XSIMD_INLINE batch<T, A> max(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
@@ -1470,7 +1411,7 @@ namespace xsimd
14701411
XSIMD_INLINE int16x8_t vabsq_s16(int16x8_t a) noexcept { return ::vabsq_s16(a); }
14711412
XSIMD_INLINE int32x4_t vabsq_s32(int32x4_t a) noexcept { return ::vabsq_s32(a); }
14721413
}
1473-
WRAP_UNARY_FLOAT(vabsq)
1414+
WRAP_UNARY_FLOAT(vabsq_f32)
14741415

14751416
namespace detail
14761417
{

0 commit comments

Comments
 (0)