Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/xsimd/arch/common/xsimd_common_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ namespace xsimd
template <class A, size_t I, class T>
XSIMD_INLINE typename batch<std::complex<T>, A>::value_type get(batch<std::complex<T>, A> const& self, ::xsimd::index<I>, requires_arch<common>) noexcept
{
alignas(A::alignment()) T buffer[batch<std::complex<T>, A>::size];
using value_type = typename batch<std::complex<T>, A>::value_type;
alignas(A::alignment()) value_type buffer[batch<std::complex<T>, A>::size];
self.store_aligned(&buffer[0]);
return buffer[I];
}
Expand Down
55 changes: 55 additions & 0 deletions include/xsimd/arch/xsimd_avx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,61 @@ namespace xsimd
return self - batch<T, A>(mask.data);
}

// get
template <class A, size_t I>
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<avx>) noexcept
{
constexpr size_t elements_per_lane = 4;
constexpr size_t lane = I / elements_per_lane;
constexpr size_t sub_index = I % elements_per_lane;
__m128 half;
XSIMD_IF_CONSTEXPR(lane == 0)
{
half = _mm256_castps256_ps128(self);
}
else
{
half = detail::upper_half((__m256)self);
}
return kernel::get(batch<float, sse4_1>(half), ::xsimd::index<sub_index> {}, sse4_1 {});
}

template <class A, size_t I>
XSIMD_INLINE double get(batch<double, A> const& self, ::xsimd::index<I>, requires_arch<avx>) noexcept
{
constexpr size_t elements_per_lane = 2;
constexpr size_t lane = I / elements_per_lane;
constexpr size_t sub_index = I % elements_per_lane;
__m128d half;
XSIMD_IF_CONSTEXPR(lane == 0)
{
half = _mm256_castpd256_pd128(self);
}
else
{
half = detail::upper_half((__m256d)self);
}
return kernel::get(batch<double, sse4_1>(half), ::xsimd::index<sub_index> {}, sse4_1 {});
}

template <class A, size_t I, class T, class = std::enable_if_t<std::is_integral<T>::value>>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<avx>) noexcept
{
constexpr size_t elements_per_lane = 16 / sizeof(T);
constexpr size_t lane = I / elements_per_lane;
constexpr size_t sub_index = I % elements_per_lane;
__m128i half;
XSIMD_IF_CONSTEXPR(lane == 0)
{
half = _mm256_castsi256_si128(self);
}
else
{
half = detail::upper_half((__m256i)self);
}
return kernel::get(batch<T, sse4_1>(half), ::xsimd::index<sub_index> {}, sse4_1 {});
}

// insert
template <class A, class T, size_t I, class = std::enable_if_t<std::is_integral<T>::value>>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx>) noexcept
Expand Down
55 changes: 55 additions & 0 deletions include/xsimd/arch/xsimd_avx512f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1346,6 +1346,61 @@ namespace xsimd
}
}

// get
template <class A, size_t I>
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<avx512f>) noexcept
{
constexpr size_t elements_per_lane = 8;
constexpr size_t lane = I / elements_per_lane;
constexpr size_t sub_index = I % elements_per_lane;
__m256 half;
XSIMD_IF_CONSTEXPR(lane == 0)
{
half = _mm512_castps512_ps256(self);
}
else
{
half = detail::upper_half((__m512)self);
}
return kernel::get(batch<float, avx>(half), ::xsimd::index<sub_index> {}, avx {});
}

template <class A, size_t I>
XSIMD_INLINE double get(batch<double, A> const& self, ::xsimd::index<I>, requires_arch<avx512f>) noexcept
{
constexpr size_t elements_per_lane = 4;
constexpr size_t lane = I / elements_per_lane;
constexpr size_t sub_index = I % elements_per_lane;
__m256d half;
XSIMD_IF_CONSTEXPR(lane == 0)
{
half = _mm512_castpd512_pd256(self);
}
else
{
half = detail::upper_half((__m512d)self);
}
return kernel::get(batch<double, avx>(half), ::xsimd::index<sub_index> {}, avx {});
}

template <class A, size_t I, class T, class = std::enable_if_t<std::is_integral<T>::value>>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<avx512f>) noexcept
{
constexpr size_t elements_per_lane = 32 / sizeof(T);
constexpr size_t lane = I / elements_per_lane;
constexpr size_t sub_index = I % elements_per_lane;
__m256i half;
XSIMD_IF_CONSTEXPR(lane == 0)
{
half = _mm512_castsi512_si256(self);
}
else
{
half = detail::upper_half((__m512i)self);
}
return kernel::get(batch<T, avx>(half), ::xsimd::index<sub_index> {}, avx {});
}

// insert
template <class A, size_t I>
XSIMD_INLINE batch<float, A> insert(batch<float, A> const& self, float val, index<I>, requires_arch<avx512f>) noexcept
Expand Down
55 changes: 55 additions & 0 deletions include/xsimd/arch/xsimd_neon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2742,6 +2742,61 @@ namespace xsimd
return vshrq_n_s64(x, shift);
}

// get
template <class A, size_t I>
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_f32(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_u8(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_signed_t<T, 1> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_s8(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_u16(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_signed_t<T, 2> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_s16(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_u32(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_s32(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_u64(self, I);
}

template <class A, size_t I, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<neon>) noexcept
{
return vgetq_lane_s64(self, I);
}

// first
template <class A>
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<neon>) noexcept
Expand Down
7 changes: 7 additions & 0 deletions include/xsimd/arch/xsimd_neon64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ namespace xsimd
{
using namespace types;

// get
template <class A, size_t I>
XSIMD_INLINE double get(batch<double, A> const& self, ::xsimd::index<I>, requires_arch<neon64>) noexcept
{
return vgetq_lane_f64(self, I);
}

// first
template <class A>
XSIMD_INLINE double first(batch<double, A> const& self, requires_arch<neon64>) noexcept
Expand Down
96 changes: 96 additions & 0 deletions include/xsimd/arch/xsimd_sse2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,102 @@ namespace xsimd
return _mm_castsi128_pd(_mm_cmpeq_epi32(_mm_castpd_si128(self), _mm_castpd_si128(other)));
}

// get
template <class A, size_t I>
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(I == 0)
{
return _mm_cvtss_f32(self);
}
else
{
return _mm_cvtss_f32(_mm_shuffle_ps(self, self, _MM_SHUFFLE(I, I, I, I)));
}
}

template <class A, size_t I>
XSIMD_INLINE double get(batch<double, A> const& self, ::xsimd::index<I>, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(I == 0)
{
return _mm_cvtsd_f64(self);
}
else
{
return _mm_cvtsd_f64(_mm_unpackhi_pd(self, self));
}
}

template <class A, size_t I, class T, class = std::enable_if_t<std::is_integral<T>::value>>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
XSIMD_IF_CONSTEXPR(I == 0)
{
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFF);
}
else
{
return static_cast<T>((_mm_cvtsi128_si32(_mm_srli_si128(self, I)) & 0xFF));
}
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
XSIMD_IF_CONSTEXPR(I == 0)
{
return static_cast<T>(_mm_cvtsi128_si32(self) & 0xFFFF);
}
else
{
return static_cast<T>((_mm_cvtsi128_si32(_mm_srli_si128(self, I * 2)) & 0xFFFF));
}
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
XSIMD_IF_CONSTEXPR(I == 0)
{
return static_cast<T>(_mm_cvtsi128_si32(self));
}
else
{
return static_cast<T>(_mm_cvtsi128_si32(_mm_shuffle_epi32(self, _MM_SHUFFLE(I, I, I, I))));
}
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
#if defined(__x86_64__)
XSIMD_IF_CONSTEXPR(I == 0)
{
return static_cast<T>(_mm_cvtsi128_si64(self));
}
else
{
return static_cast<T>(_mm_cvtsi128_si64(_mm_srli_si128(self, 8)));
}
#else
__m128i shifted;
XSIMD_IF_CONSTEXPR(I == 0)
{
shifted = self;
}
else
{
shifted = _mm_srli_si128(self, 8);
}
int64_t i;
_mm_storel_epi64(reinterpret_cast<__m128i*>(&i), shifted);
return static_cast<T>(i);
#endif
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}

// first
template <class A>
XSIMD_INLINE float first(batch<float, A> const& self, requires_arch<sse2>) noexcept
Expand Down
44 changes: 44 additions & 0 deletions include/xsimd/arch/xsimd_sse4_1.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,50 @@ namespace xsimd
return _mm_floor_pd(self);
}

// get
template <class A, size_t I, class T, class = std::enable_if_t<std::is_integral<T>::value>>
XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<sse4_1>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return static_cast<T>(_mm_extract_epi8(self, I));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return static_cast<T>(_mm_extract_epi16(self, I));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return static_cast<T>(_mm_extract_epi32(self, I));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
#if defined(__x86_64__)
return static_cast<T>(_mm_extract_epi64(self, I));
#else
return get(self, ::xsimd::index<I> {}, sse2 {});
#endif
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}

template <class A, size_t I>
XSIMD_INLINE float get(batch<float, A> const& self, ::xsimd::index<I>, requires_arch<sse4_1>) noexcept
{
XSIMD_IF_CONSTEXPR(I == 0)
{
return _mm_cvtss_f32(self);
}
else
{
return bit_cast<float>(static_cast<uint32_t>(_mm_extract_epi32(_mm_castps_si128(self), I)));
}
}

// insert
template <class A, class T, size_t I, class = std::enable_if_t<std::is_integral<T>::value>>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<sse4_1>) noexcept
Expand Down
Loading
Loading