No dispatcher in neon64.hpp

AntoinePrv · AntoinePrv · commit 6fc9157abd8f · 2026-04-27T11:07:50.000+02:00
diff --git a/include/xsimd/arch/xsimd_neon64.hpp b/include/xsimd/arch/xsimd_neon64.hpp
@@ -34,37 +34,10 @@ namespace xsimd
 
         namespace detail
         {
-            template <template <class> class return_type, class... T>
-            struct neon_dispatcher_base
-            {
-                struct unary
-                {
-                    using container_type = std::tuple<return_type<T> (*)(T)...>;
-                    const container_type m_func;
-
-                    template <class U>
-                    return_type<U> apply(U rhs) const noexcept
-                    {
-                        using func_type = return_type<U> (*)(U);
-                        auto func = std::get<func_type>(m_func);
-                        return func(rhs);
-                    }
-                };
-
-                struct binary
-                {
-                    using container_type = std::tuple<return_type<T> (*)(T, T)...>;
-                    const container_type m_func;
-
-                    template <class U>
-                    return_type<U> apply(U lhs, U rhs) const noexcept
-                    {
-                        using func_type = return_type<U> (*)(U, U);
-                        auto func = std::get<func_type>(m_func);
-                        return func(lhs, rhs);
-                    }
-                };
-            };
+
+            template <class T>
+            using enable_neon64_type_t = std::enable_if_t<std::is_integral<T>::value || std::is_same<T, float>::value || std::is_same<T, double>::value,
+                                                          int>;
 
             /********************
              *  bitwise_caster  *
@@ -941,186 +914,73 @@ namespace xsimd
             return vsetq_lane_f64(val, self, I);
         }
 
-        /******************
-         * reducer macros *
-         ******************/
-
-        // Wrap reducer intrinsics so we can pass them as function pointers
-        // - OP: intrinsics name prefix, e.g., vorrq
-
-#define WRAP_REDUCER_INT_EXCLUDING_64(OP)                     \
-    namespace wrap                                            \
-    {                                                         \
-        XSIMD_INLINE uint8_t OP##_u8(uint8x16_t a) noexcept   \
-        {                                                     \
-            return ::OP##_u8(a);                              \
-        }                                                     \
-        XSIMD_INLINE int8_t OP##_s8(int8x16_t a) noexcept     \
-        {                                                     \
-            return ::OP##_s8(a);                              \
-        }                                                     \
-        XSIMD_INLINE uint16_t OP##_u16(uint16x8_t a) noexcept \
-        {                                                     \
-            return ::OP##_u16(a);                             \
-        }                                                     \
-        XSIMD_INLINE int16_t OP##_s16(int16x8_t a) noexcept   \
-        {                                                     \
-            return ::OP##_s16(a);                             \
-        }                                                     \
-        XSIMD_INLINE uint32_t OP##_u32(uint32x4_t a) noexcept \
-        {                                                     \
-            return ::OP##_u32(a);                             \
-        }                                                     \
-        XSIMD_INLINE int32_t OP##_s32(int32x4_t a) noexcept   \
-        {                                                     \
-            return ::OP##_s32(a);                             \
-        }                                                     \
-    }
-
-#define WRAP_REDUCER_INT(OP)                                  \
-    WRAP_REDUCER_INT_EXCLUDING_64(OP)                         \
-    namespace wrap                                            \
-    {                                                         \
-        XSIMD_INLINE uint64_t OP##_u64(uint64x2_t a) noexcept \
-        {                                                     \
-            return ::OP##_u64(a);                             \
-        }                                                     \
-        XSIMD_INLINE int64_t OP##_s64(int64x2_t a) noexcept   \
-        {                                                     \
-            return ::OP##_s64(a);                             \
-        }                                                     \
-    }
-
-#define WRAP_REDUCER_FLOAT(OP)                               \
-    namespace wrap                                           \
-    {                                                        \
-        XSIMD_INLINE float OP##_f32(float32x4_t a) noexcept  \
-        {                                                    \
-            return ::OP##_f32(a);                            \
-        }                                                    \
-        XSIMD_INLINE double OP##_f64(float64x2_t a) noexcept \
-        {                                                    \
-            return ::OP##_f64(a);                            \
-        }                                                    \
-    }
-
-        namespace detail
-        {
-            template <class R>
-            struct reducer_return_type_impl;
-
-            template <>
-            struct reducer_return_type_impl<uint8x16_t>
-            {
-                using type = uint8_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<int8x16_t>
-            {
-                using type = int8_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<uint16x8_t>
-            {
-                using type = uint16_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<int16x8_t>
-            {
-                using type = int16_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<uint32x4_t>
-            {
-                using type = uint32_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<int32x4_t>
-            {
-                using type = int32_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<uint64x2_t>
-            {
-                using type = uint64_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<int64x2_t>
-            {
-                using type = int64_t;
-            };
-
-            template <>
-            struct reducer_return_type_impl<float32x4_t>
-            {
-                using type = float;
-            };
-
-            template <>
-            struct reducer_return_type_impl<float64x2_t>
-            {
-                using type = double;
-            };
-
-            template <class R>
-            using reducer_return_type = typename reducer_return_type_impl<R>::type;
-
-            template <class... T>
-            struct neon_reducer_dispatcher_impl : neon_dispatcher_base<reducer_return_type, T...>
-            {
-            };
-
-            using neon_reducer_dispatcher = neon_reducer_dispatcher_impl<uint8x16_t, int8x16_t,
-                                                                         uint16x8_t, int16x8_t,
-                                                                         uint32x4_t, int32x4_t,
-                                                                         uint64x2_t, int64x2_t,
-                                                                         float32x4_t, float64x2_t>;
-            template <class T>
-            using enable_neon64_type_t = std::enable_if_t<std::is_integral<T>::value || std::is_same<T, float>::value || std::is_same<T, double>::value,
-                                                          int>;
-        }
-
         /**************
          * reduce_add *
          **************/
 
-        WRAP_REDUCER_INT(vaddvq)
-        WRAP_REDUCER_FLOAT(vaddvq)
+        namespace wrap
+        {
+            // TODO(c++17): Make a single function with if constexpr switch
+            template <class T, std::enable_if_t<std::is_same<T, uint8_t>::value, int> = 0>
+            XSIMD_INLINE uint8_t x_vaddvq(uint8x16_t a) noexcept { return vaddvq_u8(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int8_t>::value, int> = 0>
+            XSIMD_INLINE int8_t x_vaddvq(int8x16_t a) noexcept { return vaddvq_s8(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint16_t>::value, int> = 0>
+            XSIMD_INLINE uint16_t x_vaddvq(uint16x8_t a) noexcept { return vaddvq_u16(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int16_t>::value, int> = 0>
+            XSIMD_INLINE int16_t x_vaddvq(int16x8_t a) noexcept { return vaddvq_s16(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint32_t>::value, int> = 0>
+            XSIMD_INLINE uint32_t x_vaddvq(uint32x4_t a) noexcept { return vaddvq_u32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int32_t>::value, int> = 0>
+            XSIMD_INLINE int32_t x_vaddvq(int32x4_t a) noexcept { return vaddvq_s32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint64_t>::value, int> = 0>
+            XSIMD_INLINE uint64_t x_vaddvq(uint64x2_t a) noexcept { return vaddvq_u64(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int64_t>::value, int> = 0>
+            XSIMD_INLINE int64_t x_vaddvq(int64x2_t a) noexcept { return vaddvq_s64(a); }
+            template <class T, std::enable_if_t<std::is_same<T, float>::value, int> = 0>
+            XSIMD_INLINE float x_vaddvq(float32x4_t a) noexcept { return vaddvq_f32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, double>::value, int> = 0>
+            XSIMD_INLINE double x_vaddvq(float64x2_t a) noexcept { return vaddvq_f64(a); }
+        }
 
         template <class A, class T, detail::enable_neon64_type_t<T> = 0>
         XSIMD_INLINE typename batch<T, A>::value_type reduce_add(batch<T, A> const& arg, requires_arch<neon64>) noexcept
         {
             using register_type = typename batch<T, A>::register_type;
-            const detail::neon_reducer_dispatcher::unary dispatcher = {
-                std::make_tuple(wrap::vaddvq_u8, wrap::vaddvq_s8, wrap::vaddvq_u16, wrap::vaddvq_s16,
-                                wrap::vaddvq_u32, wrap::vaddvq_s32, wrap::vaddvq_u64, wrap::vaddvq_s64,
-                                wrap::vaddvq_f32, wrap::vaddvq_f64)
-            };
-            return dispatcher.apply(register_type(arg));
+            return wrap::x_vaddvq<T>(register_type(arg));
         }
 
         /**************
          * reduce_max *
          **************/
 
-        WRAP_REDUCER_INT_EXCLUDING_64(vmaxvq)
-        WRAP_REDUCER_FLOAT(vmaxvq)
-
         namespace wrap
         {
-            XSIMD_INLINE uint64_t vmaxvq_u64(uint64x2_t a) noexcept
+            // TODO(c++17): Make a single function with if constexpr switch
+            template <class T, std::enable_if_t<std::is_same<T, uint8_t>::value, int> = 0>
+            XSIMD_INLINE uint8_t x_vmaxvq(uint8x16_t a) noexcept { return vmaxvq_u8(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int8_t>::value, int> = 0>
+            XSIMD_INLINE int8_t x_vmaxvq(int8x16_t a) noexcept { return vmaxvq_s8(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint16_t>::value, int> = 0>
+            XSIMD_INLINE uint16_t x_vmaxvq(uint16x8_t a) noexcept { return vmaxvq_u16(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int16_t>::value, int> = 0>
+            XSIMD_INLINE int16_t x_vmaxvq(int16x8_t a) noexcept { return vmaxvq_s16(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint32_t>::value, int> = 0>
+            XSIMD_INLINE uint32_t x_vmaxvq(uint32x4_t a) noexcept { return vmaxvq_u32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int32_t>::value, int> = 0>
+            XSIMD_INLINE int32_t x_vmaxvq(int32x4_t a) noexcept { return vmaxvq_s32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, float>::value, int> = 0>
+            XSIMD_INLINE float x_vmaxvq(float32x4_t a) noexcept { return vmaxvq_f32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, double>::value, int> = 0>
+            XSIMD_INLINE double x_vmaxvq(float64x2_t a) noexcept { return vmaxvq_f64(a); }
+
+            template <class T, std::enable_if_t<std::is_same<T, uint64_t>::value, int> = 0>
+            XSIMD_INLINE uint64_t x_vmaxvq(uint64x2_t a) noexcept
             {
                 return std::max(vdupd_laneq_u64(a, 0), vdupd_laneq_u64(a, 1));
             }
-
-            XSIMD_INLINE int64_t vmaxvq_s64(int64x2_t a) noexcept
+            template <class T, std::enable_if_t<std::is_same<T, int64_t>::value, int> = 0>
+            XSIMD_INLINE int64_t x_vmaxvq(int64x2_t a) noexcept
             {
                 return std::max(vdupd_laneq_s64(a, 0), vdupd_laneq_s64(a, 1));
             }
@@ -1130,29 +990,40 @@ namespace xsimd
         XSIMD_INLINE typename batch<T, A>::value_type reduce_max(batch<T, A> const& arg, requires_arch<neon64>) noexcept
         {
             using register_type = typename batch<T, A>::register_type;
-            const detail::neon_reducer_dispatcher::unary dispatcher = {
-                std::make_tuple(wrap::vmaxvq_u8, wrap::vmaxvq_s8, wrap::vmaxvq_u16, wrap::vmaxvq_s16,
-                                wrap::vmaxvq_u32, wrap::vmaxvq_s32, wrap::vmaxvq_u64, wrap::vmaxvq_s64,
-                                wrap::vmaxvq_f32, wrap::vmaxvq_f64)
-            };
-            return dispatcher.apply(register_type(arg));
+            return wrap::x_vmaxvq<T>(register_type(arg));
         }
 
         /**************
          * reduce_min *
          **************/
 
-        WRAP_REDUCER_INT_EXCLUDING_64(vminvq)
-        WRAP_REDUCER_FLOAT(vminvq)
-
         namespace wrap
         {
-            XSIMD_INLINE uint64_t vminvq_u64(uint64x2_t a) noexcept
+            // TODO(c++17): Make a single function with if constexpr switch
+            template <class T, std::enable_if_t<std::is_same<T, uint8_t>::value, int> = 0>
+            XSIMD_INLINE uint8_t x_vminvq(uint8x16_t a) noexcept { return vminvq_u8(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int8_t>::value, int> = 0>
+            XSIMD_INLINE int8_t x_vminvq(int8x16_t a) noexcept { return vminvq_s8(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint16_t>::value, int> = 0>
+            XSIMD_INLINE uint16_t x_vminvq(uint16x8_t a) noexcept { return vminvq_u16(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int16_t>::value, int> = 0>
+            XSIMD_INLINE int16_t x_vminvq(int16x8_t a) noexcept { return vminvq_s16(a); }
+            template <class T, std::enable_if_t<std::is_same<T, uint32_t>::value, int> = 0>
+            XSIMD_INLINE uint32_t x_vminvq(uint32x4_t a) noexcept { return vminvq_u32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, int32_t>::value, int> = 0>
+            XSIMD_INLINE int32_t x_vminvq(int32x4_t a) noexcept { return vminvq_s32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, float>::value, int> = 0>
+            XSIMD_INLINE float x_vminvq(float32x4_t a) noexcept { return vminvq_f32(a); }
+            template <class T, std::enable_if_t<std::is_same<T, double>::value, int> = 0>
+            XSIMD_INLINE double x_vminvq(float64x2_t a) noexcept { return vminvq_f64(a); }
+
+            template <class T, std::enable_if_t<std::is_same<T, uint64_t>::value, int> = 0>
+            XSIMD_INLINE uint64_t x_vminvq(uint64x2_t a) noexcept
             {
                 return std::min(vdupd_laneq_u64(a, 0), vdupd_laneq_u64(a, 1));
             }
-
-            XSIMD_INLINE int64_t vminvq_s64(int64x2_t a) noexcept
+            template <class T, std::enable_if_t<std::is_same<T, int64_t>::value, int> = 0>
+            XSIMD_INLINE int64_t x_vminvq(int64x2_t a) noexcept
             {
                 return std::min(vdupd_laneq_s64(a, 0), vdupd_laneq_s64(a, 1));
             }
@@ -1162,18 +1033,9 @@ namespace xsimd
         XSIMD_INLINE typename batch<T, A>::value_type reduce_min(batch<T, A> const& arg, requires_arch<neon64>) noexcept
         {
             using register_type = typename batch<T, A>::register_type;
-            const detail::neon_reducer_dispatcher::unary dispatcher = {
-                std::make_tuple(wrap::vminvq_u8, wrap::vminvq_s8, wrap::vminvq_u16, wrap::vminvq_s16,
-                                wrap::vminvq_u32, wrap::vminvq_s32, wrap::vminvq_u64, wrap::vminvq_s64,
-                                wrap::vminvq_f32, wrap::vminvq_f64)
-            };
-            return dispatcher.apply(register_type(arg));
+            return wrap::x_vminvq<T>(register_type(arg));
         }
 
-#undef WRAP_REDUCER_INT_EXCLUDING_64
-#undef WRAP_REDUCER_INT
-#undef WRAP_REDUCER_FLOAT
-
         /**********
          * select *
          **********/
diff --git a/test/test_utils.hpp b/test/test_utils.hpp
@@ -16,6 +16,8 @@
 #include <cmath>
 #include <complex>
 #include <limits>
+#include <sstream>
+#include <string>
 #include <type_traits>
 #include <vector>
 
@@ -399,10 +401,29 @@ namespace detail
         void stringify(std::ostream* os) const override { *os << msg_; }
     };
 
+    template <typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
+    std::string to_string_full_precision(T value)
+    {
+        // TODO(C++17): use std::to_chars
+        char buf[64];
+        std::snprintf(
+            buf, sizeof(buf),
+            "%.*g",
+            std::numeric_limits<T>::max_digits10,
+            static_cast<double>(value));
+        return std::string(buf);
+    }
+
+    template <typename T, typename std::enable_if<!std::is_arithmetic<T>::value, int>::type = 0>
+    std::string to_string_full_precision(T value)
+    {
+        return doctest::toString(value).c_str();
+    }
+
     template <class T>
     StringContextScope make_context_info(const char* name, const T& val)
     {
-        return StringContextScope(std::string(name) + ":" + doctest::toString(val).c_str());
+        return StringContextScope(std::string(name) + ":" + to_string_full_precision(val));
     }
 }