@@ -289,22 +289,7 @@ namespace xsimd
289289 }
290290
291291#if defined(_MSC_VER) && defined(_M_ARM64)
292- // -----------------------------------------------------------------------
293- // C++14-compatible dispatch helpers for MSVC ARM64.
294- //
295- // On MSVC ARM64, all NEON types are the same underlying type (__n128),
296- // so overload resolution on NEON types does not work and the existing
297- // std::tuple-based dispatcher cannot be used. The original workaround
298- // used `if constexpr` (C++17). The helpers below replace that with
299- // std::enable_if overloads, which are valid C++14.
300- //
301- // Each helper is a function template parameterised on the *element* type
302- // T. The correct intrinsic is selected at compile time via enable_if on
303- // sizeof(T) and std::is_unsigned<T> / std::is_floating_point<T>.
304- // -----------------------------------------------------------------------
305292 namespace detail {
306-
307- // -- load (for set<integral>) --
308293 template <class T >
309294 XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 && std::is_unsigned<T>::value, __n128>::type
310295 msvc_arm64_load (const T* d) noexcept { return vld1q_u8 (reinterpret_cast <const uint8_t *>(d)); }
@@ -330,7 +315,6 @@ namespace xsimd
330315 XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 && std::is_signed<T>::value, __n128>::type
331316 msvc_arm64_load (const T* d) noexcept { return vld1q_s64 (reinterpret_cast <const int64_t *>(d)); }
332317
333- // -- load_u (for set<batch_bool>) -- loads from unsigned element array
334318 template <class T >
335319 XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 , __n128>::type
336320 msvc_arm64_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u8 (reinterpret_cast <const uint8_t *>(d)); }
@@ -344,7 +328,6 @@ namespace xsimd
344328 XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 , __n128>::type
345329 msvc_arm64_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u64 (reinterpret_cast <const uint64_t *>(d)); }
346330
347- // -- eq for batch_bool (unsigned comparison by size) --
348331 template <class T >
349332 XSIMD_INLINE typename std::enable_if<sizeof (T)==1 , __n128>::type
350333 msvc_arm64_eq_bool (__n128 a, __n128 b) noexcept { return vceqq_u8 (a,b); }
@@ -355,10 +338,8 @@ namespace xsimd
355338 XSIMD_INLINE typename std::enable_if<sizeof (T)==4 , __n128>::type
356339 msvc_arm64_eq_bool (__n128 a, __n128 b) noexcept { return vceqq_u32 (a,b); }
357340
358- } // namespace detail (MSVC ARM64 helpers)
341+ }
359342
360- // Macro to generate C++14 enable_if dispatch overloads for a full binary op
361- // (all 9 NEON element types: u8,s8,u16,s16,u32,s32,u64,s64,f32).
362343#define XSIMD_MSVC_ARM64_BINARY_FULL (fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, u64fn, s64fn, f32fn ) \
363344 namespace detail { \
364345 template <class T > \
@@ -390,7 +371,6 @@ namespace xsimd
390371 fname (__n128 a, __n128 b) noexcept { return s64fn (a,b); } \
391372 }
392373
393- // Macro for binary ops excluding int64 (u8,s8,u16,s16,u32,s32,f32).
394374#define XSIMD_MSVC_ARM64_BINARY_EX64 (fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, f32fn ) \
395375 namespace detail { \
396376 template <class T > \
@@ -416,7 +396,6 @@ namespace xsimd
416396 fname (__n128 a, __n128 b) noexcept { return f32fn (a,b); } \
417397 }
418398
419- // Macro for unsigned-only binary ops excluding int64 (u8,u16,u32).
420399#define XSIMD_MSVC_ARM64_BINARY_UINT_EX64 (fname, u8fn, u16fn, u32fn ) \
421400 namespace detail { \
422401 template <class T > \
@@ -430,7 +409,6 @@ namespace xsimd
430409 fname (__n128 a, __n128 b) noexcept { return u32fn (a,b); } \
431410 }
432411
433- // Macro for unary ops excluding int64 (u8,s8,u16,s16,u32,s32,f32).
434412#define XSIMD_MSVC_ARM64_UNARY_EX64 (fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, f32fn ) \
435413 namespace detail { \
436414 template <class T > \
@@ -456,7 +434,6 @@ namespace xsimd
456434 fname (__n128 a) noexcept { return f32fn (a); } \
457435 }
458436
459- // Macro for select (ternary: cond, a, b) — all 9 types.
460437#define XSIMD_MSVC_ARM64_SELECT_FULL (fname, u8fn, s8fn, u16fn, s16fn, u32fn, s32fn, u64fn, s64fn, f32fn ) \
461438 namespace detail { \
462439 template <class T > \
@@ -488,7 +465,6 @@ namespace xsimd
488465 fname (__n128 c, __n128 a, __n128 b) noexcept { return s64fn (c,a,b); } \
489466 }
490467
491- // Macro for bitwise ops on batch_bool (unsigned only, all sizes).
492468#define XSIMD_MSVC_ARM64_BINARY_UINT_ALL (fname, u8fn, u16fn, u32fn, u64fn ) \
493469 namespace detail { \
494470 template <class T > \
@@ -505,7 +481,6 @@ namespace xsimd
505481 fname##_bool(__n128 a, __n128 b) noexcept { return u64fn (a,b); } \
506482 }
507483
508- // Macro for bitwise unary ops on batch_bool (unsigned only, all sizes).
509484#define XSIMD_MSVC_ARM64_UNARY_UINT_ALL (fname, u8fn, u16fn, u32fn, u64fn ) \
510485 namespace detail { \
511486 template <class T > \
@@ -522,7 +497,6 @@ namespace xsimd
522497 fname##_bool(__n128 a) noexcept { return u64fn (a); } \
523498 }
524499
525- // Generate all dispatch helpers used by the MSVC ARM64 paths below.
526500XSIMD_MSVC_ARM64_BINARY_FULL (msvc_arm64_add, vaddq_u8, vaddq_s8, vaddq_u16, vaddq_s16, vaddq_u32, vaddq_s32, vaddq_u64, vaddq_s64, vaddq_f32)
527501XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_sadd, vqaddq_u8, vqaddq_s8, vqaddq_u16, vqaddq_s16, vqaddq_u32, vqaddq_s32, vqaddq_u64, vqaddq_s64, vaddq_f32)
528502XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_sub, vsubq_u8, vsubq_s8, vsubq_u16, vsubq_s16, vsubq_u32, vsubq_s32, vsubq_u64, vsubq_s64, vsubq_f32)
@@ -544,26 +518,22 @@ namespace detail {
544518}
545519XSIMD_MSVC_ARM64_UNARY_EX64 (msvc_arm64_abs, msvc_arm64_abs_u8, vabsq_s8, msvc_arm64_abs_u16, vabsq_s16, msvc_arm64_abs_u32, vabsq_s32, vabsq_f32)
546520
547- // bitwise ops on batch<T,A>
548521XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_and, vandq_u8, vandq_u8, vandq_u16, vandq_u16, vandq_u32, vandq_u32, vandq_u64, vandq_u64, vandq_u8)
549522XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_or, vorrq_u8, vorrq_u8, vorrq_u16, vorrq_u16, vorrq_u32, vorrq_u32, vorrq_u64, vorrq_u64, vorrq_u8)
550523XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_xor, veorq_u8, veorq_u8, veorq_u16, veorq_u16, veorq_u32, veorq_u32, veorq_u64, veorq_u64, veorq_u8)
551524XSIMD_MSVC_ARM64_BINARY_FULL(msvc_arm64_andn, vbicq_u8, vbicq_u8, vbicq_u16, vbicq_u16, vbicq_u32, vbicq_u32, vbicq_u64, vbicq_u64, vbicq_u8)
552- // bitwise ops on batch_bool<T,A>
525+
553526XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_and, vandq_u8, vandq_u16, vandq_u32, vandq_u64)
554527XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_or, vorrq_u8, vorrq_u16, vorrq_u32, vorrq_u64)
555528XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_xor, veorq_u8, veorq_u16, veorq_u32, veorq_u64)
556529XSIMD_MSVC_ARM64_BINARY_UINT_ALL(msvc_arm64_andn, vbicq_u8, vbicq_u16, vbicq_u32, vbicq_u64)
557530namespace detail {
558- // On MSVC ARM64 all NEON types are __n128, so vmvnq_u32 works for any lane width.
559531 XSIMD_INLINE __n128 msvc_arm64_not_u64_impl (__n128 a) noexcept { return vmvnq_u32 (a); }
560532}
561533XSIMD_MSVC_ARM64_UNARY_UINT_ALL (msvc_arm64_not, vmvnq_u8, vmvnq_u16, vmvnq_u32, msvc_arm64_not_u64_impl)
562534
563- // select
564535XSIMD_MSVC_ARM64_SELECT_FULL(msvc_arm64_select, vbslq_u8, vbslq_s8, vbslq_u16, vbslq_s16, vbslq_u32, vbslq_s32, vbslq_u64, vbslq_s64, vbslq_f32)
565536
566- // rotate_left (N is a compile-time constant)
567537namespace detail {
568538 template <size_t N, class T >
569539 XSIMD_INLINE typename std::enable_if<sizeof (T)==1 && std::is_unsigned<T>::value, __n128>::type
@@ -592,7 +562,7 @@ namespace detail {
592562 template <size_t N, class T >
593563 XSIMD_INLINE typename std::enable_if<sizeof (T)==8 && std::is_signed<T>::value, __n128>::type
594564 msvc_arm64_rotate_left (__n128 a) noexcept { return vextq_s64 (a, a, N % 2 ); }
595- } // namespace detail
565+ }
596566#endif
597567
598568 /* ************
0 commit comments