Skip to content

Commit 6fc9157

Browse files
committed
No dispatcher in neon64.hpp
1 parent 562f400 commit 6fc9157

2 files changed

Lines changed: 97 additions & 214 deletions

File tree

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 75 additions & 213 deletions
Original file line numberDiff line numberDiff line change
@@ -34,37 +34,10 @@ namespace xsimd
3434

3535
namespace detail
3636
{
37-
template <template <class> class return_type, class... T>
38-
struct neon_dispatcher_base
39-
{
40-
struct unary
41-
{
42-
using container_type = std::tuple<return_type<T> (*)(T)...>;
43-
const container_type m_func;
44-
45-
template <class U>
46-
return_type<U> apply(U rhs) const noexcept
47-
{
48-
using func_type = return_type<U> (*)(U);
49-
auto func = std::get<func_type>(m_func);
50-
return func(rhs);
51-
}
52-
};
53-
54-
struct binary
55-
{
56-
using container_type = std::tuple<return_type<T> (*)(T, T)...>;
57-
const container_type m_func;
58-
59-
template <class U>
60-
return_type<U> apply(U lhs, U rhs) const noexcept
61-
{
62-
using func_type = return_type<U> (*)(U, U);
63-
auto func = std::get<func_type>(m_func);
64-
return func(lhs, rhs);
65-
}
66-
};
67-
};
37+
38+
template <class T>
39+
using enable_neon64_type_t = std::enable_if_t<std::is_integral<T>::value || std::is_same<T, float>::value || std::is_same<T, double>::value,
40+
int>;
6841

6942
/********************
7043
* bitwise_caster *
@@ -941,186 +914,73 @@ namespace xsimd
941914
return vsetq_lane_f64(val, self, I);
942915
}
943916

944-
/******************
945-
* reducer macros *
946-
******************/
947-
948-
// Wrap reducer intrinsics so we can pass them as function pointers
949-
// - OP: intrinsics name prefix, e.g., vorrq
950-
951-
#define WRAP_REDUCER_INT_EXCLUDING_64(OP) \
952-
namespace wrap \
953-
{ \
954-
XSIMD_INLINE uint8_t OP##_u8(uint8x16_t a) noexcept \
955-
{ \
956-
return ::OP##_u8(a); \
957-
} \
958-
XSIMD_INLINE int8_t OP##_s8(int8x16_t a) noexcept \
959-
{ \
960-
return ::OP##_s8(a); \
961-
} \
962-
XSIMD_INLINE uint16_t OP##_u16(uint16x8_t a) noexcept \
963-
{ \
964-
return ::OP##_u16(a); \
965-
} \
966-
XSIMD_INLINE int16_t OP##_s16(int16x8_t a) noexcept \
967-
{ \
968-
return ::OP##_s16(a); \
969-
} \
970-
XSIMD_INLINE uint32_t OP##_u32(uint32x4_t a) noexcept \
971-
{ \
972-
return ::OP##_u32(a); \
973-
} \
974-
XSIMD_INLINE int32_t OP##_s32(int32x4_t a) noexcept \
975-
{ \
976-
return ::OP##_s32(a); \
977-
} \
978-
}
979-
980-
#define WRAP_REDUCER_INT(OP) \
981-
WRAP_REDUCER_INT_EXCLUDING_64(OP) \
982-
namespace wrap \
983-
{ \
984-
XSIMD_INLINE uint64_t OP##_u64(uint64x2_t a) noexcept \
985-
{ \
986-
return ::OP##_u64(a); \
987-
} \
988-
XSIMD_INLINE int64_t OP##_s64(int64x2_t a) noexcept \
989-
{ \
990-
return ::OP##_s64(a); \
991-
} \
992-
}
993-
994-
#define WRAP_REDUCER_FLOAT(OP) \
995-
namespace wrap \
996-
{ \
997-
XSIMD_INLINE float OP##_f32(float32x4_t a) noexcept \
998-
{ \
999-
return ::OP##_f32(a); \
1000-
} \
1001-
XSIMD_INLINE double OP##_f64(float64x2_t a) noexcept \
1002-
{ \
1003-
return ::OP##_f64(a); \
1004-
} \
1005-
}
1006-
1007-
namespace detail
1008-
{
1009-
template <class R>
1010-
struct reducer_return_type_impl;
1011-
1012-
template <>
1013-
struct reducer_return_type_impl<uint8x16_t>
1014-
{
1015-
using type = uint8_t;
1016-
};
1017-
1018-
template <>
1019-
struct reducer_return_type_impl<int8x16_t>
1020-
{
1021-
using type = int8_t;
1022-
};
1023-
1024-
template <>
1025-
struct reducer_return_type_impl<uint16x8_t>
1026-
{
1027-
using type = uint16_t;
1028-
};
1029-
1030-
template <>
1031-
struct reducer_return_type_impl<int16x8_t>
1032-
{
1033-
using type = int16_t;
1034-
};
1035-
1036-
template <>
1037-
struct reducer_return_type_impl<uint32x4_t>
1038-
{
1039-
using type = uint32_t;
1040-
};
1041-
1042-
template <>
1043-
struct reducer_return_type_impl<int32x4_t>
1044-
{
1045-
using type = int32_t;
1046-
};
1047-
1048-
template <>
1049-
struct reducer_return_type_impl<uint64x2_t>
1050-
{
1051-
using type = uint64_t;
1052-
};
1053-
1054-
template <>
1055-
struct reducer_return_type_impl<int64x2_t>
1056-
{
1057-
using type = int64_t;
1058-
};
1059-
1060-
template <>
1061-
struct reducer_return_type_impl<float32x4_t>
1062-
{
1063-
using type = float;
1064-
};
1065-
1066-
template <>
1067-
struct reducer_return_type_impl<float64x2_t>
1068-
{
1069-
using type = double;
1070-
};
1071-
1072-
template <class R>
1073-
using reducer_return_type = typename reducer_return_type_impl<R>::type;
1074-
1075-
template <class... T>
1076-
struct neon_reducer_dispatcher_impl : neon_dispatcher_base<reducer_return_type, T...>
1077-
{
1078-
};
1079-
1080-
using neon_reducer_dispatcher = neon_reducer_dispatcher_impl<uint8x16_t, int8x16_t,
1081-
uint16x8_t, int16x8_t,
1082-
uint32x4_t, int32x4_t,
1083-
uint64x2_t, int64x2_t,
1084-
float32x4_t, float64x2_t>;
1085-
template <class T>
1086-
using enable_neon64_type_t = std::enable_if_t<std::is_integral<T>::value || std::is_same<T, float>::value || std::is_same<T, double>::value,
1087-
int>;
1088-
}
1089-
1090917
/**************
1091918
* reduce_add *
1092919
**************/
1093920

1094-
WRAP_REDUCER_INT(vaddvq)
1095-
WRAP_REDUCER_FLOAT(vaddvq)
921+
namespace wrap
922+
{
923+
// TODO(c++17): Make a single function with if constexpr switch
924+
template <class T, std::enable_if_t<std::is_same<T, uint8_t>::value, int> = 0>
925+
XSIMD_INLINE uint8_t x_vaddvq(uint8x16_t a) noexcept { return vaddvq_u8(a); }
926+
template <class T, std::enable_if_t<std::is_same<T, int8_t>::value, int> = 0>
927+
XSIMD_INLINE int8_t x_vaddvq(int8x16_t a) noexcept { return vaddvq_s8(a); }
928+
template <class T, std::enable_if_t<std::is_same<T, uint16_t>::value, int> = 0>
929+
XSIMD_INLINE uint16_t x_vaddvq(uint16x8_t a) noexcept { return vaddvq_u16(a); }
930+
template <class T, std::enable_if_t<std::is_same<T, int16_t>::value, int> = 0>
931+
XSIMD_INLINE int16_t x_vaddvq(int16x8_t a) noexcept { return vaddvq_s16(a); }
932+
template <class T, std::enable_if_t<std::is_same<T, uint32_t>::value, int> = 0>
933+
XSIMD_INLINE uint32_t x_vaddvq(uint32x4_t a) noexcept { return vaddvq_u32(a); }
934+
template <class T, std::enable_if_t<std::is_same<T, int32_t>::value, int> = 0>
935+
XSIMD_INLINE int32_t x_vaddvq(int32x4_t a) noexcept { return vaddvq_s32(a); }
936+
template <class T, std::enable_if_t<std::is_same<T, uint64_t>::value, int> = 0>
937+
XSIMD_INLINE uint64_t x_vaddvq(uint64x2_t a) noexcept { return vaddvq_u64(a); }
938+
template <class T, std::enable_if_t<std::is_same<T, int64_t>::value, int> = 0>
939+
XSIMD_INLINE int64_t x_vaddvq(int64x2_t a) noexcept { return vaddvq_s64(a); }
940+
template <class T, std::enable_if_t<std::is_same<T, float>::value, int> = 0>
941+
XSIMD_INLINE float x_vaddvq(float32x4_t a) noexcept { return vaddvq_f32(a); }
942+
template <class T, std::enable_if_t<std::is_same<T, double>::value, int> = 0>
943+
XSIMD_INLINE double x_vaddvq(float64x2_t a) noexcept { return vaddvq_f64(a); }
944+
}
1096945

1097946
template <class A, class T, detail::enable_neon64_type_t<T> = 0>
1098947
XSIMD_INLINE typename batch<T, A>::value_type reduce_add(batch<T, A> const& arg, requires_arch<neon64>) noexcept
1099948
{
1100949
using register_type = typename batch<T, A>::register_type;
1101-
const detail::neon_reducer_dispatcher::unary dispatcher = {
1102-
std::make_tuple(wrap::vaddvq_u8, wrap::vaddvq_s8, wrap::vaddvq_u16, wrap::vaddvq_s16,
1103-
wrap::vaddvq_u32, wrap::vaddvq_s32, wrap::vaddvq_u64, wrap::vaddvq_s64,
1104-
wrap::vaddvq_f32, wrap::vaddvq_f64)
1105-
};
1106-
return dispatcher.apply(register_type(arg));
950+
return wrap::x_vaddvq<T>(register_type(arg));
1107951
}
1108952

1109953
/**************
1110954
* reduce_max *
1111955
**************/
1112956

1113-
WRAP_REDUCER_INT_EXCLUDING_64(vmaxvq)
1114-
WRAP_REDUCER_FLOAT(vmaxvq)
1115-
1116957
namespace wrap
1117958
{
1118-
XSIMD_INLINE uint64_t vmaxvq_u64(uint64x2_t a) noexcept
959+
// TODO(c++17): Make a single function with if constexpr switch
960+
template <class T, std::enable_if_t<std::is_same<T, uint8_t>::value, int> = 0>
961+
XSIMD_INLINE uint8_t x_vmaxvq(uint8x16_t a) noexcept { return vmaxvq_u8(a); }
962+
template <class T, std::enable_if_t<std::is_same<T, int8_t>::value, int> = 0>
963+
XSIMD_INLINE int8_t x_vmaxvq(int8x16_t a) noexcept { return vmaxvq_s8(a); }
964+
template <class T, std::enable_if_t<std::is_same<T, uint16_t>::value, int> = 0>
965+
XSIMD_INLINE uint16_t x_vmaxvq(uint16x8_t a) noexcept { return vmaxvq_u16(a); }
966+
template <class T, std::enable_if_t<std::is_same<T, int16_t>::value, int> = 0>
967+
XSIMD_INLINE int16_t x_vmaxvq(int16x8_t a) noexcept { return vmaxvq_s16(a); }
968+
template <class T, std::enable_if_t<std::is_same<T, uint32_t>::value, int> = 0>
969+
XSIMD_INLINE uint32_t x_vmaxvq(uint32x4_t a) noexcept { return vmaxvq_u32(a); }
970+
template <class T, std::enable_if_t<std::is_same<T, int32_t>::value, int> = 0>
971+
XSIMD_INLINE int32_t x_vmaxvq(int32x4_t a) noexcept { return vmaxvq_s32(a); }
972+
template <class T, std::enable_if_t<std::is_same<T, float>::value, int> = 0>
973+
XSIMD_INLINE float x_vmaxvq(float32x4_t a) noexcept { return vmaxvq_f32(a); }
974+
template <class T, std::enable_if_t<std::is_same<T, double>::value, int> = 0>
975+
XSIMD_INLINE double x_vmaxvq(float64x2_t a) noexcept { return vmaxvq_f64(a); }
976+
977+
template <class T, std::enable_if_t<std::is_same<T, uint64_t>::value, int> = 0>
978+
XSIMD_INLINE uint64_t x_vmaxvq(uint64x2_t a) noexcept
1119979
{
1120980
return std::max(vdupd_laneq_u64(a, 0), vdupd_laneq_u64(a, 1));
1121981
}
1122-
1123-
XSIMD_INLINE int64_t vmaxvq_s64(int64x2_t a) noexcept
982+
template <class T, std::enable_if_t<std::is_same<T, int64_t>::value, int> = 0>
983+
XSIMD_INLINE int64_t x_vmaxvq(int64x2_t a) noexcept
1124984
{
1125985
return std::max(vdupd_laneq_s64(a, 0), vdupd_laneq_s64(a, 1));
1126986
}
@@ -1130,29 +990,40 @@ namespace xsimd
1130990
XSIMD_INLINE typename batch<T, A>::value_type reduce_max(batch<T, A> const& arg, requires_arch<neon64>) noexcept
1131991
{
1132992
using register_type = typename batch<T, A>::register_type;
1133-
const detail::neon_reducer_dispatcher::unary dispatcher = {
1134-
std::make_tuple(wrap::vmaxvq_u8, wrap::vmaxvq_s8, wrap::vmaxvq_u16, wrap::vmaxvq_s16,
1135-
wrap::vmaxvq_u32, wrap::vmaxvq_s32, wrap::vmaxvq_u64, wrap::vmaxvq_s64,
1136-
wrap::vmaxvq_f32, wrap::vmaxvq_f64)
1137-
};
1138-
return dispatcher.apply(register_type(arg));
993+
return wrap::x_vmaxvq<T>(register_type(arg));
1139994
}
1140995

1141996
/**************
1142997
* reduce_min *
1143998
**************/
1144999

1145-
WRAP_REDUCER_INT_EXCLUDING_64(vminvq)
1146-
WRAP_REDUCER_FLOAT(vminvq)
1147-
11481000
namespace wrap
11491001
{
1150-
XSIMD_INLINE uint64_t vminvq_u64(uint64x2_t a) noexcept
1002+
// TODO(c++17): Make a single function with if constexpr switch
1003+
template <class T, std::enable_if_t<std::is_same<T, uint8_t>::value, int> = 0>
1004+
XSIMD_INLINE uint8_t x_vminvq(uint8x16_t a) noexcept { return vminvq_u8(a); }
1005+
template <class T, std::enable_if_t<std::is_same<T, int8_t>::value, int> = 0>
1006+
XSIMD_INLINE int8_t x_vminvq(int8x16_t a) noexcept { return vminvq_s8(a); }
1007+
template <class T, std::enable_if_t<std::is_same<T, uint16_t>::value, int> = 0>
1008+
XSIMD_INLINE uint16_t x_vminvq(uint16x8_t a) noexcept { return vminvq_u16(a); }
1009+
template <class T, std::enable_if_t<std::is_same<T, int16_t>::value, int> = 0>
1010+
XSIMD_INLINE int16_t x_vminvq(int16x8_t a) noexcept { return vminvq_s16(a); }
1011+
template <class T, std::enable_if_t<std::is_same<T, uint32_t>::value, int> = 0>
1012+
XSIMD_INLINE uint32_t x_vminvq(uint32x4_t a) noexcept { return vminvq_u32(a); }
1013+
template <class T, std::enable_if_t<std::is_same<T, int32_t>::value, int> = 0>
1014+
XSIMD_INLINE int32_t x_vminvq(int32x4_t a) noexcept { return vminvq_s32(a); }
1015+
template <class T, std::enable_if_t<std::is_same<T, float>::value, int> = 0>
1016+
XSIMD_INLINE float x_vminvq(float32x4_t a) noexcept { return vminvq_f32(a); }
1017+
template <class T, std::enable_if_t<std::is_same<T, double>::value, int> = 0>
1018+
XSIMD_INLINE double x_vminvq(float64x2_t a) noexcept { return vminvq_f64(a); }
1019+
1020+
template <class T, std::enable_if_t<std::is_same<T, uint64_t>::value, int> = 0>
1021+
XSIMD_INLINE uint64_t x_vminvq(uint64x2_t a) noexcept
11511022
{
11521023
return std::min(vdupd_laneq_u64(a, 0), vdupd_laneq_u64(a, 1));
11531024
}
1154-
1155-
XSIMD_INLINE int64_t vminvq_s64(int64x2_t a) noexcept
1025+
template <class T, std::enable_if_t<std::is_same<T, int64_t>::value, int> = 0>
1026+
XSIMD_INLINE int64_t x_vminvq(int64x2_t a) noexcept
11561027
{
11571028
return std::min(vdupd_laneq_s64(a, 0), vdupd_laneq_s64(a, 1));
11581029
}
@@ -1162,18 +1033,9 @@ namespace xsimd
11621033
XSIMD_INLINE typename batch<T, A>::value_type reduce_min(batch<T, A> const& arg, requires_arch<neon64>) noexcept
11631034
{
11641035
using register_type = typename batch<T, A>::register_type;
1165-
const detail::neon_reducer_dispatcher::unary dispatcher = {
1166-
std::make_tuple(wrap::vminvq_u8, wrap::vminvq_s8, wrap::vminvq_u16, wrap::vminvq_s16,
1167-
wrap::vminvq_u32, wrap::vminvq_s32, wrap::vminvq_u64, wrap::vminvq_s64,
1168-
wrap::vminvq_f32, wrap::vminvq_f64)
1169-
};
1170-
return dispatcher.apply(register_type(arg));
1036+
return wrap::x_vminvq<T>(register_type(arg));
11711037
}
11721038

1173-
#undef WRAP_REDUCER_INT_EXCLUDING_64
1174-
#undef WRAP_REDUCER_INT
1175-
#undef WRAP_REDUCER_FLOAT
1176-
11771039
/**********
11781040
* select *
11791041
**********/

test/test_utils.hpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <cmath>
1717
#include <complex>
1818
#include <limits>
19+
#include <sstream>
20+
#include <string>
1921
#include <type_traits>
2022
#include <vector>
2123

@@ -399,10 +401,29 @@ namespace detail
399401
void stringify(std::ostream* os) const override { *os << msg_; }
400402
};
401403

404+
template <typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
405+
std::string to_string_full_precision(T value)
406+
{
407+
// TODO(C++17): use std::to_chars
408+
char buf[64];
409+
std::snprintf(
410+
buf, sizeof(buf),
411+
"%.*g",
412+
std::numeric_limits<T>::max_digits10,
413+
static_cast<double>(value));
414+
return std::string(buf);
415+
}
416+
417+
template <typename T, typename std::enable_if<!std::is_arithmetic<T>::value, int>::type = 0>
418+
std::string to_string_full_precision(T value)
419+
{
420+
return doctest::toString(value).c_str();
421+
}
422+
402423
template <class T>
403424
StringContextScope make_context_info(const char* name, const T& val)
404425
{
405-
return StringContextScope(std::string(name) + ":" + doctest::toString(val).c_str());
426+
return StringContextScope(std::string(name) + ":" + to_string_full_precision(val));
406427
}
407428
}
408429

0 commit comments

Comments
 (0)