Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/xsimd/arch/xsimd_avx2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ namespace xsimd
template <class A, class T, bool... Values, class Mode>
XSIMD_INLINE void store_masked(T* mem, batch<T, A> const& src, batch_bool_constant<T, A, Values...> mask, Mode, requires_arch<avx2>) noexcept
{
constexpr size_t lanes_per_half = sizeof(__m128i) / sizeof(T);
constexpr size_t lanes_per_half = batch<T, A>::size / 2;

// confined to lower 128-bit half → forward to SSE
XSIMD_IF_CONSTEXPR(mask.countl_zero() >= lanes_per_half)
Expand Down
18 changes: 18 additions & 0 deletions include/xsimd/arch/xsimd_sve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ namespace xsimd
template <class T>
svbool_t sve_ptrue() noexcept { return sve_ptrue_impl(index<sizeof(T)> {}); }

// predicate loading
template <bool M0, bool M1>
svbool_t sve_pmask() noexcept { return svdupq_b64(M0, M1); }
template <bool M0, bool M1, bool M2, bool M3>
svbool_t sve_pmask() noexcept { return svdupq_b32(M0, M1, M2, M3); }
template <bool M0, bool M1, bool M2, bool M3, bool M4, bool M5, bool M6, bool M7>
svbool_t sve_pmask() noexcept { return svdupq_b16(M0, M1, M2, M3, M4, M5, M6, M7); }
template <bool M0, bool M1, bool M2, bool M3, bool M4, bool M5, bool M6, bool M7,
bool M8, bool M9, bool M10, bool M11, bool M12, bool M13, bool M14, bool M15>
svbool_t sve_pmask() noexcept { return svdupq_b8(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11, M12, M13, M14, M15); }

// count active lanes in a predicate
XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<1>) noexcept { return svcntp_b8(p, p); }
XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<2>) noexcept { return svcntp_b16(p, p); }
Expand Down Expand Up @@ -95,6 +106,13 @@ namespace xsimd
return load_aligned<A>(src, convert<T>(), sve {});
}

// load_masked
template <class A, class T, bool... Values, class Mode, detail::sve_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> load_masked(T const* mem, batch_bool_constant<float, A, Values...> mask, Mode, requires_arch<sve>) noexcept
{
return svld1(detail::sve_pmask<Values...>(), reinterpret_cast<detail::sve_fix_char_t<T> const*>(mem));
}

// load_complex
template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
XSIMD_INLINE batch<std::complex<T>, A> load_complex_aligned(std::complex<T> const* mem, convert<std::complex<T>>, requires_arch<sve>) noexcept
Expand Down
Loading