Skip to content

Commit 7337ff8

Browse files
committed
removed unneccesary macro
1 parent 7e3fe1a commit 7337ff8

File tree

2 files changed

+59
-69
lines changed

2 files changed

+59
-69
lines changed

include/xsimd/arch/common/xsimd_common_memory.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ namespace xsimd
7272
if ((bitmask >> i) & 1u)
7373
std::swap(mask_buffer[inserted++], mask_buffer[i]);
7474
// Fill remaining (don't-care) tail positions with index 0.
75-
for (size_t i = inserted; i < sizeof...(Is); ++i)
76-
mask_buffer[i] = 0;
75+
std::fill(mask_buffer + inserted, mask_buffer + sizeof...(Is), IT(0));
7776
return batch<IT, A>::load_aligned(&mask_buffer[0]);
7877
}
7978
}

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 58 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,12 @@ namespace xsimd
255255
using type = uint64x2_t;
256256
};
257257

258+
template <>
259+
struct comp_return_type_impl<uint64x2_t>
260+
{
261+
using type = uint64x2_t;
262+
};
263+
258264
template <>
259265
struct comp_return_type_impl<float32x4_t>
260266
{
@@ -290,43 +296,9 @@ namespace xsimd
290296

291297
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
292298
namespace detail {
293-
template <class T>
294-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1 && std::is_unsigned<T>::value, __n128>::type
295-
msvc_arm64_load(const T* d) noexcept { return vld1q_u8(reinterpret_cast<const uint8_t*>(d)); }
296-
template <class T>
297-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1 && std::is_signed<T>::value, __n128>::type
298-
msvc_arm64_load(const T* d) noexcept { return vld1q_s8(reinterpret_cast<const int8_t*>(d)); }
299-
template <class T>
300-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 2 && std::is_unsigned<T>::value, __n128>::type
301-
msvc_arm64_load(const T* d) noexcept { return vld1q_u16(reinterpret_cast<const uint16_t*>(d)); }
302-
template <class T>
303-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 2 && std::is_signed<T>::value, __n128>::type
304-
msvc_arm64_load(const T* d) noexcept { return vld1q_s16(reinterpret_cast<const int16_t*>(d)); }
305-
template <class T>
306-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 4 && std::is_unsigned<T>::value, __n128>::type
307-
msvc_arm64_load(const T* d) noexcept { return vld1q_u32(reinterpret_cast<const uint32_t*>(d)); }
308-
template <class T>
309-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 4 && std::is_signed<T>::value && !std::is_floating_point<T>::value, __n128>::type
310-
msvc_arm64_load(const T* d) noexcept { return vld1q_s32(reinterpret_cast<const int32_t*>(d)); }
311-
template <class T>
312-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8 && std::is_unsigned<T>::value, __n128>::type
313-
msvc_arm64_load(const T* d) noexcept { return vld1q_u64(reinterpret_cast<const uint64_t*>(d)); }
314-
template <class T>
315-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8 && std::is_signed<T>::value, __n128>::type
316-
msvc_arm64_load(const T* d) noexcept { return vld1q_s64(reinterpret_cast<const int64_t*>(d)); }
317-
318-
template <class T>
319-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1, __n128>::type
320-
msvc_arm64_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u8(reinterpret_cast<const uint8_t*>(d)); }
321-
template <class T>
322-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 2, __n128>::type
323-
msvc_arm64_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u16(reinterpret_cast<const uint16_t*>(d)); }
324-
template <class T>
325-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 4, __n128>::type
326-
msvc_arm64_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u32(reinterpret_cast<const uint32_t*>(d)); }
327-
template <class T>
328-
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8, __n128>::type
329-
msvc_arm64_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u64(reinterpret_cast<const uint64_t*>(d)); }
299+
// msvc_arm64_load / msvc_arm64_load_u have been superseded by the
300+
// cross-platform detail::neon_load<T> / detail::neon_load_u<T> helpers
301+
// defined below (outside this block). They are no longer used here.
330302

331303
template <class T>
332304
XSIMD_INLINE typename std::enable_if<sizeof(T)==1, __n128>::type
@@ -565,6 +537,51 @@ namespace detail {
565537
}
566538
#endif
567539

540+
namespace detail
541+
{
542+
// Cross-platform helpers: load a NEON register from an aligned array.
543+
// On GCC/Clang the return type is the specific NEON vector type;
544+
// on MSVC ARM64 all NEON types are __n128, so the same code works.
545+
template <class T>
546+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1 && std::is_unsigned<T>::value, uint8x16_t>::type
547+
neon_load(const T* d) noexcept { return vld1q_u8(reinterpret_cast<const uint8_t*>(d)); }
548+
template <class T>
549+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1 && std::is_signed<T>::value, int8x16_t>::type
550+
neon_load(const T* d) noexcept { return vld1q_s8(reinterpret_cast<const int8_t*>(d)); }
551+
template <class T>
552+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 2 && std::is_unsigned<T>::value, uint16x8_t>::type
553+
neon_load(const T* d) noexcept { return vld1q_u16(reinterpret_cast<const uint16_t*>(d)); }
554+
template <class T>
555+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 2 && std::is_signed<T>::value, int16x8_t>::type
556+
neon_load(const T* d) noexcept { return vld1q_s16(reinterpret_cast<const int16_t*>(d)); }
557+
template <class T>
558+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 4 && std::is_unsigned<T>::value, uint32x4_t>::type
559+
neon_load(const T* d) noexcept { return vld1q_u32(reinterpret_cast<const uint32_t*>(d)); }
560+
template <class T>
561+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 4 && std::is_signed<T>::value && !std::is_floating_point<T>::value, int32x4_t>::type
562+
neon_load(const T* d) noexcept { return vld1q_s32(reinterpret_cast<const int32_t*>(d)); }
563+
template <class T>
564+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8 && std::is_unsigned<T>::value, uint64x2_t>::type
565+
neon_load(const T* d) noexcept { return vld1q_u64(reinterpret_cast<const uint64_t*>(d)); }
566+
template <class T>
567+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8 && std::is_signed<T>::value, int64x2_t>::type
568+
neon_load(const T* d) noexcept { return vld1q_s64(reinterpret_cast<const int64_t*>(d)); }
569+
570+
// Load the unsigned-integer representation of T from an aligned array.
571+
template <class T>
572+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 1, uint8x16_t>::type
573+
neon_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u8(reinterpret_cast<const uint8_t*>(d)); }
574+
template <class T>
575+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 2, uint16x8_t>::type
576+
neon_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u16(reinterpret_cast<const uint16_t*>(d)); }
577+
template <class T>
578+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 4, uint32x4_t>::type
579+
neon_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u32(reinterpret_cast<const uint32_t*>(d)); }
580+
template <class T>
581+
XSIMD_INLINE typename std::enable_if<sizeof(T) == 8, uint64x2_t>::type
582+
neon_load_u(const as_unsigned_integer_t<T>* d) noexcept { return vld1q_u64(reinterpret_cast<const uint64_t*>(d)); }
583+
}
584+
568585
/*************
569586
* broadcast *
570587
*************/
@@ -630,69 +647,43 @@ namespace detail {
630647
template <class A, class T, class... Args, detail::enable_integral_t<T> = 0>
631648
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<neon>, Args... args) noexcept
632649
{
633-
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
650+
// Use load-from-array on all platforms: avoids brace-init of NEON types
651+
// (which MSVC ARM64 does not support) while remaining portable.
634652
alignas(16) T data[] = { static_cast<T>(args)... };
635-
return detail::msvc_arm64_load<T>(data);
636-
#else
637-
return xsimd::types::detail::neon_vector_type<T> { args... };
638-
#endif
653+
return detail::neon_load<T>(data);
639654
}
640655

641656
template <class A, class T, class... Args, detail::enable_integral_t<T> = 0>
642657
XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<neon>, Args... args) noexcept
643658
{
644-
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
645659
using unsigned_type = as_unsigned_integer_t<T>;
646660
alignas(16) unsigned_type data[] = { static_cast<unsigned_type>(args ? -1LL : 0LL)... };
647-
return detail::msvc_arm64_load_u<T>(data);
648-
#else
649-
using register_type = typename batch_bool<T, A>::register_type;
650-
using unsigned_type = as_unsigned_integer_t<T>;
651-
return register_type { static_cast<unsigned_type>(args ? -1LL : 0LL)... };
652-
#endif
661+
return detail::neon_load_u<T>(data);
653662
}
654663

655664
template <class A>
656665
XSIMD_INLINE batch<float, A> set(batch<float, A> const&, requires_arch<neon>, float f0, float f1, float f2, float f3) noexcept
657666
{
658-
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
659-
// On MSVC ARM64, use load from array instead of brace initialization
660667
alignas(16) float data[] = { f0, f1, f2, f3 };
661668
return vld1q_f32(data);
662-
#else
663-
return float32x4_t { f0, f1, f2, f3 };
664-
#endif
665669
}
666670

667671
template <class A>
668672
XSIMD_INLINE batch<std::complex<float>, A> set(batch<std::complex<float>, A> const&, requires_arch<neon>,
669673
std::complex<float> c0, std::complex<float> c1,
670674
std::complex<float> c2, std::complex<float> c3) noexcept
671675
{
672-
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
673-
// On MSVC ARM64, use load from array instead of brace initialization
674676
alignas(16) float real_data[] = { c0.real(), c1.real(), c2.real(), c3.real() };
675677
alignas(16) float imag_data[] = { c0.imag(), c1.imag(), c2.imag(), c3.imag() };
676678
return batch<std::complex<float>, A>(vld1q_f32(real_data), vld1q_f32(imag_data));
677-
#else
678-
return batch<std::complex<float>, A>(float32x4_t { c0.real(), c1.real(), c2.real(), c3.real() },
679-
float32x4_t { c0.imag(), c1.imag(), c2.imag(), c3.imag() });
680-
#endif
681679
}
682680

683681
template <class A, class... Args>
684682
XSIMD_INLINE batch_bool<float, A> set(batch_bool<float, A> const&, requires_arch<neon>, Args... args) noexcept
685683
{
686-
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
687-
// On MSVC ARM64, use load from array instead of brace initialization
688684
using unsigned_type = as_unsigned_integer_t<float>;
689685
alignas(16) unsigned_type data[] = { static_cast<unsigned_type>(args ? -1LL : 0LL)... };
690686
return vld1q_u32(data);
691-
#else
692-
using register_type = typename batch_bool<float, A>::register_type;
693-
using unsigned_type = as_unsigned_integer_t<float>;
694-
return register_type { static_cast<unsigned_type>(args ? -1LL : 0LL)... };
695-
#endif
696687
}
697688

698689
/*************

0 commit comments

Comments
 (0)