@@ -255,6 +255,12 @@ namespace xsimd
255255 using type = uint64x2_t ;
256256 };
257257
258+ template <>
259+ struct comp_return_type_impl <uint64x2_t >
260+ {
261+ using type = uint64x2_t ;
262+ };
263+
258264 template <>
259265 struct comp_return_type_impl <float32x4_t >
260266 {
@@ -290,43 +296,9 @@ namespace xsimd
290296
291297#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
292298 namespace detail {
293- template <class T >
294- XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 && std::is_unsigned<T>::value, __n128>::type
295- msvc_arm64_load (const T* d) noexcept { return vld1q_u8 (reinterpret_cast <const uint8_t *>(d)); }
296- template <class T >
297- XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 && std::is_signed<T>::value, __n128>::type
298- msvc_arm64_load (const T* d) noexcept { return vld1q_s8 (reinterpret_cast <const int8_t *>(d)); }
299- template <class T >
300- XSIMD_INLINE typename std::enable_if<sizeof (T) == 2 && std::is_unsigned<T>::value, __n128>::type
301- msvc_arm64_load (const T* d) noexcept { return vld1q_u16 (reinterpret_cast <const uint16_t *>(d)); }
302- template <class T >
303- XSIMD_INLINE typename std::enable_if<sizeof (T) == 2 && std::is_signed<T>::value, __n128>::type
304- msvc_arm64_load (const T* d) noexcept { return vld1q_s16 (reinterpret_cast <const int16_t *>(d)); }
305- template <class T >
306- XSIMD_INLINE typename std::enable_if<sizeof (T) == 4 && std::is_unsigned<T>::value, __n128>::type
307- msvc_arm64_load (const T* d) noexcept { return vld1q_u32 (reinterpret_cast <const uint32_t *>(d)); }
308- template <class T >
309- XSIMD_INLINE typename std::enable_if<sizeof (T) == 4 && std::is_signed<T>::value && !std::is_floating_point<T>::value, __n128>::type
310- msvc_arm64_load (const T* d) noexcept { return vld1q_s32 (reinterpret_cast <const int32_t *>(d)); }
311- template <class T >
312- XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 && std::is_unsigned<T>::value, __n128>::type
313- msvc_arm64_load (const T* d) noexcept { return vld1q_u64 (reinterpret_cast <const uint64_t *>(d)); }
314- template <class T >
315- XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 && std::is_signed<T>::value, __n128>::type
316- msvc_arm64_load (const T* d) noexcept { return vld1q_s64 (reinterpret_cast <const int64_t *>(d)); }
317-
318- template <class T >
319- XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 , __n128>::type
320- msvc_arm64_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u8 (reinterpret_cast <const uint8_t *>(d)); }
321- template <class T >
322- XSIMD_INLINE typename std::enable_if<sizeof (T) == 2 , __n128>::type
323- msvc_arm64_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u16 (reinterpret_cast <const uint16_t *>(d)); }
324- template <class T >
325- XSIMD_INLINE typename std::enable_if<sizeof (T) == 4 , __n128>::type
326- msvc_arm64_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u32 (reinterpret_cast <const uint32_t *>(d)); }
327- template <class T >
328- XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 , __n128>::type
329- msvc_arm64_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u64 (reinterpret_cast <const uint64_t *>(d)); }
299+ // msvc_arm64_load / msvc_arm64_load_u have been superseded by the
300+ // cross-platform detail::neon_load<T> / detail::neon_load_u<T> helpers
301+ // defined below (outside this block). They are no longer used here.
330302
331303 template <class T >
332304 XSIMD_INLINE typename std::enable_if<sizeof (T)==1 , __n128>::type
@@ -565,6 +537,51 @@ namespace detail {
565537}
566538#endif
567539
540+ namespace detail
541+ {
542+ // Cross-platform helpers: load a NEON register from an aligned array.
543+ // On GCC/Clang the return type is the specific NEON vector type;
544+ // on MSVC ARM64 all NEON types are __n128, so the same code works.
545+ template <class T >
546+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 && std::is_unsigned<T>::value, uint8x16_t >::type
547+ neon_load (const T* d) noexcept { return vld1q_u8 (reinterpret_cast <const uint8_t *>(d)); }
548+ template <class T >
549+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 && std::is_signed<T>::value, int8x16_t >::type
550+ neon_load (const T* d) noexcept { return vld1q_s8 (reinterpret_cast <const int8_t *>(d)); }
551+ template <class T >
552+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 2 && std::is_unsigned<T>::value, uint16x8_t >::type
553+ neon_load (const T* d) noexcept { return vld1q_u16 (reinterpret_cast <const uint16_t *>(d)); }
554+ template <class T >
555+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 2 && std::is_signed<T>::value, int16x8_t >::type
556+ neon_load (const T* d) noexcept { return vld1q_s16 (reinterpret_cast <const int16_t *>(d)); }
557+ template <class T >
558+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 4 && std::is_unsigned<T>::value, uint32x4_t >::type
559+ neon_load (const T* d) noexcept { return vld1q_u32 (reinterpret_cast <const uint32_t *>(d)); }
560+ template <class T >
561+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 4 && std::is_signed<T>::value && !std::is_floating_point<T>::value, int32x4_t >::type
562+ neon_load (const T* d) noexcept { return vld1q_s32 (reinterpret_cast <const int32_t *>(d)); }
563+ template <class T >
564+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 && std::is_unsigned<T>::value, uint64x2_t >::type
565+ neon_load (const T* d) noexcept { return vld1q_u64 (reinterpret_cast <const uint64_t *>(d)); }
566+ template <class T >
567+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 && std::is_signed<T>::value, int64x2_t >::type
568+ neon_load (const T* d) noexcept { return vld1q_s64 (reinterpret_cast <const int64_t *>(d)); }
569+
570+ // Load the unsigned-integer representation of T from an aligned array.
571+ template <class T >
572+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 1 , uint8x16_t >::type
573+ neon_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u8 (reinterpret_cast <const uint8_t *>(d)); }
574+ template <class T >
575+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 2 , uint16x8_t >::type
576+ neon_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u16 (reinterpret_cast <const uint16_t *>(d)); }
577+ template <class T >
578+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 4 , uint32x4_t >::type
579+ neon_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u32 (reinterpret_cast <const uint32_t *>(d)); }
580+ template <class T >
581+ XSIMD_INLINE typename std::enable_if<sizeof (T) == 8 , uint64x2_t >::type
582+ neon_load_u (const as_unsigned_integer_t <T>* d) noexcept { return vld1q_u64 (reinterpret_cast <const uint64_t *>(d)); }
583+ }
584+
568585 /* ************
569586 * broadcast *
570587 *************/
@@ -630,69 +647,43 @@ namespace detail {
630647 template <class A , class T , class ... Args, detail::enable_integral_t <T> = 0 >
631648 XSIMD_INLINE batch<T, A> set (batch<T, A> const &, requires_arch<neon>, Args... args) noexcept
632649 {
633- #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
650+ // Use load-from-array on all platforms: avoids brace-init of NEON types
651+ // (which MSVC ARM64 does not support) while remaining portable.
634652 alignas (16 ) T data[] = { static_cast <T>(args)... };
635- return detail::msvc_arm64_load<T>(data);
636- #else
637- return xsimd::types::detail::neon_vector_type<T> { args... };
638- #endif
653+ return detail::neon_load<T>(data);
639654 }
640655
641656 template <class A , class T , class ... Args, detail::enable_integral_t <T> = 0 >
642657 XSIMD_INLINE batch_bool<T, A> set (batch_bool<T, A> const &, requires_arch<neon>, Args... args) noexcept
643658 {
644- #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
645659 using unsigned_type = as_unsigned_integer_t <T>;
646660 alignas (16 ) unsigned_type data[] = { static_cast <unsigned_type>(args ? -1LL : 0LL )... };
647- return detail::msvc_arm64_load_u<T>(data);
648- #else
649- using register_type = typename batch_bool<T, A>::register_type;
650- using unsigned_type = as_unsigned_integer_t <T>;
651- return register_type { static_cast <unsigned_type>(args ? -1LL : 0LL )... };
652- #endif
661+ return detail::neon_load_u<T>(data);
653662 }
654663
655664 template <class A >
656665 XSIMD_INLINE batch<float , A> set (batch<float , A> const &, requires_arch<neon>, float f0, float f1, float f2, float f3) noexcept
657666 {
658- #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
659- // On MSVC ARM64, use load from array instead of brace initialization
660667 alignas (16 ) float data[] = { f0, f1, f2, f3 };
661668 return vld1q_f32 (data);
662- #else
663- return float32x4_t { f0, f1, f2, f3 };
664- #endif
665669 }
666670
667671 template <class A >
668672 XSIMD_INLINE batch<std::complex <float >, A> set (batch<std::complex <float >, A> const &, requires_arch<neon>,
669673 std::complex <float > c0, std::complex <float > c1,
670674 std::complex <float > c2, std::complex <float > c3) noexcept
671675 {
672- #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
673- // On MSVC ARM64, use load from array instead of brace initialization
674676 alignas (16 ) float real_data[] = { c0.real (), c1.real (), c2.real (), c3.real () };
675677 alignas (16 ) float imag_data[] = { c0.imag (), c1.imag (), c2.imag (), c3.imag () };
676678 return batch<std::complex <float >, A>(vld1q_f32 (real_data), vld1q_f32 (imag_data));
677- #else
678- return batch<std::complex <float >, A>(float32x4_t { c0.real (), c1.real (), c2.real (), c3.real () },
679- float32x4_t { c0.imag (), c1.imag (), c2.imag (), c3.imag () });
680- #endif
681679 }
682680
683681 template <class A , class ... Args>
684682 XSIMD_INLINE batch_bool<float , A> set (batch_bool<float , A> const &, requires_arch<neon>, Args... args) noexcept
685683 {
686- #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
687- // On MSVC ARM64, use load from array instead of brace initialization
688684 using unsigned_type = as_unsigned_integer_t <float >;
689685 alignas (16 ) unsigned_type data[] = { static_cast <unsigned_type>(args ? -1LL : 0LL )... };
690686 return vld1q_u32 (data);
691- #else
692- using register_type = typename batch_bool<float , A>::register_type;
693- using unsigned_type = as_unsigned_integer_t <float >;
694- return register_type { static_cast <unsigned_type>(args ? -1LL : 0LL )... };
695- #endif
696687 }
697688
698689 /* ************
0 commit comments