@@ -34,37 +34,10 @@ namespace xsimd
3434
3535 namespace detail
3636 {
37- template <template <class > class return_type , class ... T>
38- struct neon_dispatcher_base
39- {
40- struct unary
41- {
42- using container_type = std::tuple<return_type<T> (*)(T)...>;
43- const container_type m_func;
44-
45- template <class U >
46- return_type<U> apply (U rhs) const noexcept
47- {
48- using func_type = return_type<U> (*)(U);
49- auto func = std::get<func_type>(m_func);
50- return func (rhs);
51- }
52- };
53-
54- struct binary
55- {
56- using container_type = std::tuple<return_type<T> (*)(T, T)...>;
57- const container_type m_func;
58-
59- template <class U >
60- return_type<U> apply (U lhs, U rhs) const noexcept
61- {
62- using func_type = return_type<U> (*)(U, U);
63- auto func = std::get<func_type>(m_func);
64- return func (lhs, rhs);
65- }
66- };
67- };
37+
38+ template <class T >
39+ using enable_neon64_type_t = std::enable_if_t <std::is_integral<T>::value || std::is_same<T, float >::value || std::is_same<T, double >::value,
40+ int >;
6841
6942 /* *******************
7043 * bitwise_caster *
@@ -941,186 +914,73 @@ namespace xsimd
941914 return vsetq_lane_f64 (val, self, I);
942915 }
943916
944- /* *****************
945- * reducer macros *
946- ******************/
947-
948- // Wrap reducer intrinsics so we can pass them as function pointers
949- // - OP: intrinsics name prefix, e.g., vorrq
950-
951- #define WRAP_REDUCER_INT_EXCLUDING_64 (OP ) \
952- namespace wrap \
953- { \
954- XSIMD_INLINE uint8_t OP##_u8(uint8x16_t a) noexcept \
955- { \
956- return ::OP##_u8 (a); \
957- } \
958- XSIMD_INLINE int8_t OP##_s8(int8x16_t a) noexcept \
959- { \
960- return ::OP##_s8 (a); \
961- } \
962- XSIMD_INLINE uint16_t OP##_u16(uint16x8_t a) noexcept \
963- { \
964- return ::OP##_u16 (a); \
965- } \
966- XSIMD_INLINE int16_t OP##_s16(int16x8_t a) noexcept \
967- { \
968- return ::OP##_s16 (a); \
969- } \
970- XSIMD_INLINE uint32_t OP##_u32(uint32x4_t a) noexcept \
971- { \
972- return ::OP##_u32 (a); \
973- } \
974- XSIMD_INLINE int32_t OP##_s32(int32x4_t a) noexcept \
975- { \
976- return ::OP##_s32 (a); \
977- } \
978- }
979-
980- #define WRAP_REDUCER_INT (OP ) \
981- WRAP_REDUCER_INT_EXCLUDING_64 (OP) \
982- namespace wrap \
983- { \
984- XSIMD_INLINE uint64_t OP##_u64(uint64x2_t a) noexcept \
985- { \
986- return ::OP##_u64 (a); \
987- } \
988- XSIMD_INLINE int64_t OP##_s64(int64x2_t a) noexcept \
989- { \
990- return ::OP##_s64 (a); \
991- } \
992- }
993-
994- #define WRAP_REDUCER_FLOAT (OP ) \
995- namespace wrap \
996- { \
997- XSIMD_INLINE float OP##_f32(float32x4_t a) noexcept \
998- { \
999- return ::OP##_f32 (a); \
1000- } \
1001- XSIMD_INLINE double OP##_f64(float64x2_t a) noexcept \
1002- { \
1003- return ::OP##_f64 (a); \
1004- } \
1005- }
1006-
1007- namespace detail
1008- {
1009- template <class R >
1010- struct reducer_return_type_impl ;
1011-
1012- template <>
1013- struct reducer_return_type_impl <uint8x16_t >
1014- {
1015- using type = uint8_t ;
1016- };
1017-
1018- template <>
1019- struct reducer_return_type_impl <int8x16_t >
1020- {
1021- using type = int8_t ;
1022- };
1023-
1024- template <>
1025- struct reducer_return_type_impl <uint16x8_t >
1026- {
1027- using type = uint16_t ;
1028- };
1029-
1030- template <>
1031- struct reducer_return_type_impl <int16x8_t >
1032- {
1033- using type = int16_t ;
1034- };
1035-
1036- template <>
1037- struct reducer_return_type_impl <uint32x4_t >
1038- {
1039- using type = uint32_t ;
1040- };
1041-
1042- template <>
1043- struct reducer_return_type_impl <int32x4_t >
1044- {
1045- using type = int32_t ;
1046- };
1047-
1048- template <>
1049- struct reducer_return_type_impl <uint64x2_t >
1050- {
1051- using type = uint64_t ;
1052- };
1053-
1054- template <>
1055- struct reducer_return_type_impl <int64x2_t >
1056- {
1057- using type = int64_t ;
1058- };
1059-
1060- template <>
1061- struct reducer_return_type_impl <float32x4_t >
1062- {
1063- using type = float ;
1064- };
1065-
1066- template <>
1067- struct reducer_return_type_impl <float64x2_t >
1068- {
1069- using type = double ;
1070- };
1071-
1072- template <class R >
1073- using reducer_return_type = typename reducer_return_type_impl<R>::type;
1074-
1075- template <class ... T>
1076- struct neon_reducer_dispatcher_impl : neon_dispatcher_base<reducer_return_type, T...>
1077- {
1078- };
1079-
1080- using neon_reducer_dispatcher = neon_reducer_dispatcher_impl<uint8x16_t , int8x16_t ,
1081- uint16x8_t , int16x8_t ,
1082- uint32x4_t , int32x4_t ,
1083- uint64x2_t , int64x2_t ,
1084- float32x4_t , float64x2_t >;
1085- template <class T >
1086- using enable_neon64_type_t = std::enable_if_t <std::is_integral<T>::value || std::is_same<T, float >::value || std::is_same<T, double >::value,
1087- int >;
1088- }
1089-
1090917 /* *************
1091918 * reduce_add *
1092919 **************/
1093920
1094- WRAP_REDUCER_INT (vaddvq)
1095- WRAP_REDUCER_FLOAT (vaddvq)
921+ namespace wrap
922+ {
923+ // TODO(c++17): Make a single function with if constexpr switch
924+ template <class T , std::enable_if_t <std::is_same<T, uint8_t >::value, int > = 0 >
925+ XSIMD_INLINE uint8_t x_vaddvq (uint8x16_t a) noexcept { return vaddvq_u8 (a); }
926+ template <class T , std::enable_if_t <std::is_same<T, int8_t >::value, int > = 0 >
927+ XSIMD_INLINE int8_t x_vaddvq (int8x16_t a) noexcept { return vaddvq_s8 (a); }
928+ template <class T , std::enable_if_t <std::is_same<T, uint16_t >::value, int > = 0 >
929+ XSIMD_INLINE uint16_t x_vaddvq (uint16x8_t a) noexcept { return vaddvq_u16 (a); }
930+ template <class T , std::enable_if_t <std::is_same<T, int16_t >::value, int > = 0 >
931+ XSIMD_INLINE int16_t x_vaddvq (int16x8_t a) noexcept { return vaddvq_s16 (a); }
932+ template <class T , std::enable_if_t <std::is_same<T, uint32_t >::value, int > = 0 >
933+ XSIMD_INLINE uint32_t x_vaddvq (uint32x4_t a) noexcept { return vaddvq_u32 (a); }
934+ template <class T , std::enable_if_t <std::is_same<T, int32_t >::value, int > = 0 >
935+ XSIMD_INLINE int32_t x_vaddvq (int32x4_t a) noexcept { return vaddvq_s32 (a); }
936+ template <class T , std::enable_if_t <std::is_same<T, uint64_t >::value, int > = 0 >
937+ XSIMD_INLINE uint64_t x_vaddvq (uint64x2_t a) noexcept { return vaddvq_u64 (a); }
938+ template <class T , std::enable_if_t <std::is_same<T, int64_t >::value, int > = 0 >
939+ XSIMD_INLINE int64_t x_vaddvq (int64x2_t a) noexcept { return vaddvq_s64 (a); }
940+ template <class T , std::enable_if_t <std::is_same<T, float >::value, int > = 0 >
941+ XSIMD_INLINE float x_vaddvq (float32x4_t a) noexcept { return vaddvq_f32 (a); }
942+ template <class T , std::enable_if_t <std::is_same<T, double >::value, int > = 0 >
943+ XSIMD_INLINE double x_vaddvq (float64x2_t a) noexcept { return vaddvq_f64 (a); }
944+ }
1096945
1097946 template <class A , class T , detail::enable_neon64_type_t <T> = 0 >
1098947 XSIMD_INLINE typename batch<T, A>::value_type reduce_add (batch<T, A> const & arg, requires_arch<neon64>) noexcept
1099948 {
1100949 using register_type = typename batch<T, A>::register_type;
1101- const detail::neon_reducer_dispatcher::unary dispatcher = {
1102- std::make_tuple (wrap::vaddvq_u8, wrap::vaddvq_s8, wrap::vaddvq_u16, wrap::vaddvq_s16,
1103- wrap::vaddvq_u32, wrap::vaddvq_s32, wrap::vaddvq_u64, wrap::vaddvq_s64,
1104- wrap::vaddvq_f32, wrap::vaddvq_f64)
1105- };
1106- return dispatcher.apply (register_type (arg));
950+ return wrap::x_vaddvq<T>(register_type (arg));
1107951 }
1108952
1109953 /* *************
1110954 * reduce_max *
1111955 **************/
1112956
1113- WRAP_REDUCER_INT_EXCLUDING_64 (vmaxvq)
1114- WRAP_REDUCER_FLOAT (vmaxvq)
1115-
1116957 namespace wrap
1117958 {
1118- XSIMD_INLINE uint64_t vmaxvq_u64 (uint64x2_t a) noexcept
959+ // TODO(c++17): Make a single function with if constexpr switch
960+ template <class T , std::enable_if_t <std::is_same<T, uint8_t >::value, int > = 0 >
961+ XSIMD_INLINE uint8_t x_vmaxvq (uint8x16_t a) noexcept { return vmaxvq_u8 (a); }
962+ template <class T , std::enable_if_t <std::is_same<T, int8_t >::value, int > = 0 >
963+ XSIMD_INLINE int8_t x_vmaxvq (int8x16_t a) noexcept { return vmaxvq_s8 (a); }
964+ template <class T , std::enable_if_t <std::is_same<T, uint16_t >::value, int > = 0 >
965+ XSIMD_INLINE uint16_t x_vmaxvq (uint16x8_t a) noexcept { return vmaxvq_u16 (a); }
966+ template <class T , std::enable_if_t <std::is_same<T, int16_t >::value, int > = 0 >
967+ XSIMD_INLINE int16_t x_vmaxvq (int16x8_t a) noexcept { return vmaxvq_s16 (a); }
968+ template <class T , std::enable_if_t <std::is_same<T, uint32_t >::value, int > = 0 >
969+ XSIMD_INLINE uint32_t x_vmaxvq (uint32x4_t a) noexcept { return vmaxvq_u32 (a); }
970+ template <class T , std::enable_if_t <std::is_same<T, int32_t >::value, int > = 0 >
971+ XSIMD_INLINE int32_t x_vmaxvq (int32x4_t a) noexcept { return vmaxvq_s32 (a); }
972+ template <class T , std::enable_if_t <std::is_same<T, float >::value, int > = 0 >
973+ XSIMD_INLINE float x_vmaxvq (float32x4_t a) noexcept { return vmaxvq_f32 (a); }
974+ template <class T , std::enable_if_t <std::is_same<T, double >::value, int > = 0 >
975+ XSIMD_INLINE double x_vmaxvq (float64x2_t a) noexcept { return vmaxvq_f64 (a); }
976+
977+ template <class T , std::enable_if_t <std::is_same<T, uint64_t >::value, int > = 0 >
978+ XSIMD_INLINE uint64_t x_vmaxvq (uint64x2_t a) noexcept
1119979 {
1120980 return std::max (vdupd_laneq_u64 (a, 0 ), vdupd_laneq_u64 (a, 1 ));
1121981 }
1122-
1123- XSIMD_INLINE int64_t vmaxvq_s64 (int64x2_t a) noexcept
982+ template < class T , std:: enable_if_t <std::is_same<T, int64_t >::value, int > = 0 >
983+ XSIMD_INLINE int64_t x_vmaxvq (int64x2_t a) noexcept
1124984 {
1125985 return std::max (vdupd_laneq_s64 (a, 0 ), vdupd_laneq_s64 (a, 1 ));
1126986 }
@@ -1130,29 +990,40 @@ namespace xsimd
1130990 XSIMD_INLINE typename batch<T, A>::value_type reduce_max (batch<T, A> const & arg, requires_arch<neon64>) noexcept
1131991 {
1132992 using register_type = typename batch<T, A>::register_type;
1133- const detail::neon_reducer_dispatcher::unary dispatcher = {
1134- std::make_tuple (wrap::vmaxvq_u8, wrap::vmaxvq_s8, wrap::vmaxvq_u16, wrap::vmaxvq_s16,
1135- wrap::vmaxvq_u32, wrap::vmaxvq_s32, wrap::vmaxvq_u64, wrap::vmaxvq_s64,
1136- wrap::vmaxvq_f32, wrap::vmaxvq_f64)
1137- };
1138- return dispatcher.apply (register_type (arg));
993+ return wrap::x_vmaxvq<T>(register_type (arg));
1139994 }
1140995
1141996 /* *************
1142997 * reduce_min *
1143998 **************/
1144999
1145- WRAP_REDUCER_INT_EXCLUDING_64 (vminvq)
1146- WRAP_REDUCER_FLOAT (vminvq)
1147-
11481000 namespace wrap
11491001 {
1150- XSIMD_INLINE uint64_t vminvq_u64 (uint64x2_t a) noexcept
1002+ // TODO(c++17): Make a single function with if constexpr switch
1003+ template <class T , std::enable_if_t <std::is_same<T, uint8_t >::value, int > = 0 >
1004+ XSIMD_INLINE uint8_t x_vminvq (uint8x16_t a) noexcept { return vminvq_u8 (a); }
1005+ template <class T , std::enable_if_t <std::is_same<T, int8_t >::value, int > = 0 >
1006+ XSIMD_INLINE int8_t x_vminvq (int8x16_t a) noexcept { return vminvq_s8 (a); }
1007+ template <class T , std::enable_if_t <std::is_same<T, uint16_t >::value, int > = 0 >
1008+ XSIMD_INLINE uint16_t x_vminvq (uint16x8_t a) noexcept { return vminvq_u16 (a); }
1009+ template <class T , std::enable_if_t <std::is_same<T, int16_t >::value, int > = 0 >
1010+ XSIMD_INLINE int16_t x_vminvq (int16x8_t a) noexcept { return vminvq_s16 (a); }
1011+ template <class T , std::enable_if_t <std::is_same<T, uint32_t >::value, int > = 0 >
1012+ XSIMD_INLINE uint32_t x_vminvq (uint32x4_t a) noexcept { return vminvq_u32 (a); }
1013+ template <class T , std::enable_if_t <std::is_same<T, int32_t >::value, int > = 0 >
1014+ XSIMD_INLINE int32_t x_vminvq (int32x4_t a) noexcept { return vminvq_s32 (a); }
1015+ template <class T , std::enable_if_t <std::is_same<T, float >::value, int > = 0 >
1016+ XSIMD_INLINE float x_vminvq (float32x4_t a) noexcept { return vminvq_f32 (a); }
1017+ template <class T , std::enable_if_t <std::is_same<T, double >::value, int > = 0 >
1018+ XSIMD_INLINE double x_vminvq (float64x2_t a) noexcept { return vminvq_f64 (a); }
1019+
1020+ template <class T , std::enable_if_t <std::is_same<T, uint64_t >::value, int > = 0 >
1021+ XSIMD_INLINE uint64_t x_vminvq (uint64x2_t a) noexcept
11511022 {
11521023 return std::min (vdupd_laneq_u64 (a, 0 ), vdupd_laneq_u64 (a, 1 ));
11531024 }
1154-
1155- XSIMD_INLINE int64_t vminvq_s64 (int64x2_t a) noexcept
1025+ template < class T , std:: enable_if_t <std::is_same<T, int64_t >::value, int > = 0 >
1026+ XSIMD_INLINE int64_t x_vminvq (int64x2_t a) noexcept
11561027 {
11571028 return std::min (vdupd_laneq_s64 (a, 0 ), vdupd_laneq_s64 (a, 1 ));
11581029 }
@@ -1162,18 +1033,9 @@ namespace xsimd
11621033 XSIMD_INLINE typename batch<T, A>::value_type reduce_min (batch<T, A> const & arg, requires_arch<neon64>) noexcept
11631034 {
11641035 using register_type = typename batch<T, A>::register_type;
1165- const detail::neon_reducer_dispatcher::unary dispatcher = {
1166- std::make_tuple (wrap::vminvq_u8, wrap::vminvq_s8, wrap::vminvq_u16, wrap::vminvq_s16,
1167- wrap::vminvq_u32, wrap::vminvq_s32, wrap::vminvq_u64, wrap::vminvq_s64,
1168- wrap::vminvq_f32, wrap::vminvq_f64)
1169- };
1170- return dispatcher.apply (register_type (arg));
1036+ return wrap::x_vminvq<T>(register_type (arg));
11711037 }
11721038
1173- #undef WRAP_REDUCER_INT_EXCLUDING_64
1174- #undef WRAP_REDUCER_INT
1175- #undef WRAP_REDUCER_FLOAT
1176-
11771039 /* *********
11781040 * select *
11791041 **********/
0 commit comments