Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions include/xsimd/arch/xsimd_avx512bw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,25 @@ namespace xsimd
}
}

// decr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512bw>) noexcept
{

XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm512_mask_sub_epi8(self, mask.data, self, _mm512_set1_epi8(1));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm512_mask_sub_epi16(self, mask.data, self, _mm512_set1_epi16(1));
}
else
{
return decr_if(self, mask, avx512dq {});
}
}

// eq
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
Expand All @@ -245,6 +264,43 @@ namespace xsimd
return detail::compare_int_avx512bw<A, T, _MM_CMPINT_GT>(self, other);
}

// incr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512bw>) noexcept
{

XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm512_mask_add_epi8(self, mask.data, self, _mm512_set1_epi8(1));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm512_mask_add_epi16(self, mask.data, self, _mm512_set1_epi16(1));
}
else
{
return incr_if(self, mask, avx512dq {});
}
}

// insert
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx512bw>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm512_mask_set1_epi8(self, __mmask64(1ULL << (I & 63)), val);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm512_mask_set1_epi16(self, __mmask32(1 << (I & 31)), val);
}
else
{
return insert(self, val, pos, avx512dq {});
}
}

// le
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
Expand Down
75 changes: 75 additions & 0 deletions include/xsimd/arch/xsimd_avx512f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ namespace xsimd
using namespace types;

// fwd
template <class A, class T, class Mask>
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept;
template <class A, class T, class Mask>
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept;
template <class A, class T, size_t I>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
template <class A>
XSIMD_INLINE void transpose(batch<uint16_t, A>* matrix_begin, batch<uint16_t, A>* matrix_end, requires_arch<generic>) noexcept;
template <class A>
Expand Down Expand Up @@ -757,6 +763,24 @@ namespace xsimd
return _mm512_permutex2var_pd(self.real(), idx, self.imag());
}
}
// incr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512f>) noexcept
{

XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm512_mask_sub_epi32(self, mask.data, self, _mm512_set1_epi32(1));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm512_mask_sub_epi64(self, mask.data, self, _mm512_set1_epi64(1));
}
else
{
return decr_if(self, mask, generic {});
}
}

// div
template <class A>
Expand Down Expand Up @@ -1073,6 +1097,57 @@ namespace xsimd
return _mm512_add_pd(tmpx, tmpy);
}

// incr_if
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512f>) noexcept
{

XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm512_mask_add_epi32(self, mask.data, self, _mm512_set1_epi32(1));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm512_mask_add_epi64(self, mask.data, self, _mm512_set1_epi64(1));
}
else
{
return incr_if(self, mask, generic {});
}
}

// insert
template <class A, size_t I>
XSIMD_INLINE batch<float, A> insert(batch<float, A> const& self, float val, index<I>, requires_arch<avx512f>) noexcept
{

int32_t tmp = bit_cast<int32_t>(val);
return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_castps_si512(self), __mmask16(1 << (I & 15)), tmp));
}

template <class A, size_t I>
XSIMD_INLINE batch<double, A> insert(batch<double, A> const& self, double val, index<I>, requires_arch<avx512f>) noexcept
{
int64_t tmp = bit_cast<int64_t>(val);
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_castpd_si512(self), __mmask8(1 << (I & 7)), tmp));
}
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx512f>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm512_mask_set1_epi32(self, __mmask16(1 << (I & 15)), val);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm512_mask_set1_epi64(self, __mmask8(1 << (I & 7)), val);
}
else
{
return insert(self, val, pos, generic {});
}
}

// isnan
template <class A>
XSIMD_INLINE batch_bool<float, A> isnan(batch<float, A> const& self, requires_arch<avx512f>) noexcept
Expand Down