Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/xsimd/arch/common/xsimd_common_details.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ namespace xsimd
XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept;
template <class T, class A>
XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& self) noexcept;
template <class T, class A, class Vt, Vt... Values>
XSIMD_INLINE typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
swizzle(batch<T, A> const& x, batch_constant<Vt, A, Values...> mask) noexcept;
template <class T, class A>
XSIMD_INLINE batch<T, A> tan(batch<T, A> const& self) noexcept;
template <class T, class A>
Expand Down
13 changes: 11 additions & 2 deletions include/xsimd/arch/common/xsimd_common_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ namespace xsimd
}
};

return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, rotate_generator, A>(), A {});
return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, rotate_generator, A>());
}

template <size_t N, class A, class T>
Expand All @@ -362,7 +362,7 @@ namespace xsimd
}
};

return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, rotate_generator, A>(), A {});
return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, rotate_generator, A>());
}

template <size_t N, class A, class T>
Expand Down Expand Up @@ -611,6 +611,15 @@ namespace xsimd
return batch<T, A>::load_aligned(out_buffer);
}

template <class A, class T, class ITy, ITy... Is>
XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& self, batch_constant<ITy, A, Is...>, requires_arch<common>) noexcept
{
constexpr size_t size = batch<T, A>::size;
alignas(A::alignment()) T self_buffer[size];
store_aligned(&self_buffer[0], self);
return { self_buffer[Is]... };
}

template <class A, class T, class ITy>
XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self, batch<ITy, A> mask, requires_arch<common>) noexcept
{
Expand Down
29 changes: 28 additions & 1 deletion include/xsimd/arch/xsimd_avx2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,9 +657,35 @@ namespace xsimd

// rotate_left
template <size_t N, class A>
XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<avx2>) noexcept
{
auto other = _mm256_permute2x128_si256(self, self, 0x1);
if (N < 16)
{
return _mm256_alignr_epi8(other, self, N);
}
else
{
return _mm256_alignr_epi8(self, other, N - 16);
}
}
template <size_t N, class A>
XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<avx2>) noexcept
{
return bitwise_cast<int8_t>(rotate_left<N, A>(bitwise_cast<uint8_t>(self), avx2 {}));
}
template <size_t N, class A>
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx2>) noexcept
{
return _mm256_alignr_epi8(self, self, N);
auto other = _mm256_permute2x128_si256(self, self, 0x1);
if (N < 8)
{
return _mm256_alignr_epi8(other, self, 2 * N);
}
else
{
return _mm256_alignr_epi8(self, other, 2 * (N - 8));
}
}
template <size_t N, class A>
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<avx2>) noexcept
Expand Down Expand Up @@ -876,6 +902,7 @@ namespace xsimd
}

// swizzle (dynamic mask)

template <class A>
XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
{
Expand Down
12 changes: 0 additions & 12 deletions include/xsimd/arch/xsimd_avx512bw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,18 +429,6 @@ namespace xsimd
return detail::compare_int_avx512bw<A, T, _MM_CMPINT_NE>(self, other);
}

// rotate_left
template <size_t N, class A>
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx512bw>) noexcept
{
return _mm512_alignr_epi8(self, self, N);
}
template <size_t N, class A>
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<avx512bw>) noexcept
{
return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), avx512bw {}));
}

// sadd
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
Expand Down
2 changes: 2 additions & 0 deletions include/xsimd/arch/xsimd_avx512f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ namespace xsimd
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<common>) noexcept;
template <class A, class T, size_t I>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<common>) noexcept;
template <class A, class T, class ITy, ITy... Is>
XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& self, batch_constant<ITy, A, Is...>, requires_arch<common>) noexcept;
template <class A>
XSIMD_INLINE void transpose(batch<uint16_t, A>* matrix_begin, batch<uint16_t, A>* matrix_end, requires_arch<common>) noexcept;
template <class A>
Expand Down
7 changes: 4 additions & 3 deletions include/xsimd/arch/xsimd_neon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2771,10 +2771,11 @@ namespace xsimd
XSIMD_INLINE batch<T, A> rotate_left(batch<T, A> const& a, requires_arch<neon>) noexcept
{
using register_type = typename batch<T, A>::register_type;
// Adding modulo to avoid warning.
const detail::neon_dispatcher::binary dispatcher = {
std::make_tuple(wrap::rotate_left_u8<N>, wrap::rotate_left_s8<N>, wrap::rotate_left_u16<N>, wrap::rotate_left_s16<N>,
wrap::rotate_left_u32<N>, wrap::rotate_left_s32<N>, wrap::rotate_left_u64<N>, wrap::rotate_left_s64<N>,
wrap::rotate_left_f32<N>)
std::make_tuple(wrap::rotate_left_u8<N>, wrap::rotate_left_s8<N>, wrap::rotate_left_u16<N % 8>, wrap::rotate_left_s16<N % 8>,
wrap::rotate_left_u32<N % 4>, wrap::rotate_left_s32<N % 4>, wrap::rotate_left_u64<N % 2>, wrap::rotate_left_s64<N % 2>,
wrap::rotate_left_f32<N % 4>)
};
return dispatcher.apply(register_type(a), register_type(a));
}
Expand Down
13 changes: 12 additions & 1 deletion include/xsimd/arch/xsimd_ssse3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,22 @@ namespace xsimd

// rotate_left
template <size_t N, class A>
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<ssse3>) noexcept
XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<ssse3>) noexcept
{
return _mm_alignr_epi8(self, self, N);
}
template <size_t N, class A>
XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<ssse3>) noexcept
{
return bitwise_cast<int8_t>(rotate_left<N, A>(bitwise_cast<uint8_t>(self), ssse3 {}));
}

template <size_t N, class A>
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<ssse3>) noexcept
{
return _mm_alignr_epi8(self, self, 2 * N);
}
template <size_t N, class A>
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<ssse3>) noexcept
{
return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), ssse3 {}));
Expand Down
19 changes: 18 additions & 1 deletion test/test_batch_manip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace xsimd
struct init_swizzle_base
{
using swizzle_vector_type = std::array<T, N>;
swizzle_vector_type lhs_in, exped_reverse, exped_fill, exped_dup, exped_ror, exped_rol;
swizzle_vector_type lhs_in, exped_reverse, exped_fill, exped_dup, exped_ror, exped_rol, exped_rol2;

template <int... Indices>
std::vector<swizzle_vector_type> create_swizzle_vectors()
Expand All @@ -42,12 +42,14 @@ namespace xsimd
exped_dup[i] = lhs_in[2 * (i / 2)];
exped_ror[i] = lhs_in[(i - 1) % N];
exped_rol[i] = lhs_in[(i + 1) % N];
exped_rol2[i] = lhs_in[(i + N - 1) % N];
}
vects.push_back(std::move(exped_reverse));
vects.push_back(std::move(exped_fill));
vects.push_back(std::move(exped_dup));
vects.push_back(std::move(exped_ror));
vects.push_back(std::move(exped_rol));
vects.push_back(std::move(exped_rol2));

return vects;
}
Expand Down Expand Up @@ -176,6 +178,20 @@ struct swizzle_test
CHECK_BATCH_EQ(b_res, b_exped);
}

void rotate_left_inv()
{
xsimd::init_swizzle_base<value_type, size> swizzle_base;
auto swizzle_vecs = swizzle_base.create_swizzle_vectors();
auto v_lhs = swizzle_vecs[0];
auto v_exped = swizzle_vecs[6];

B b_lhs = B::load_unaligned(v_lhs.data());
B b_exped = B::load_unaligned(v_exped.data());

B b_res = xsimd::rotate_left<size - 1>(b_lhs);
CHECK_BATCH_EQ(b_res, b_exped);
}

void swizzle_reverse()
{
xsimd::init_swizzle_base<value_type, size> swizzle_base;
Expand Down Expand Up @@ -248,6 +264,7 @@ TEST_CASE_TEMPLATE("[swizzle]", B, BATCH_SWIZZLE_TYPES)
SUBCASE("rotate")
{
Test.rotate_left();
Test.rotate_left_inv();
Test.rotate_right();
}

Expand Down
24 changes: 12 additions & 12 deletions test/test_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,13 +561,13 @@ struct shuffle_test
CHECK_BATCH_EQ(b_res, b_ref);
}

void swizzle()
void shuffle()
{
B b_lhs = B::load_unaligned(lhs.data());
B b_rhs = B::load_unaligned(rhs.data());

{
struct swizzle_lo_generator
struct shuffle_lo_generator
{
static constexpr size_t get(size_t index, size_t size)
{
Expand All @@ -580,13 +580,13 @@ struct shuffle_test
ref[i] = lhs[size - i - 1];
B b_ref = B::load_unaligned(ref.data());

INFO("swizzle first batch");
B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant<mask_type, swizzle_lo_generator, arch_type>());
INFO("shuffle first batch");
B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant<mask_type, shuffle_lo_generator, arch_type>());
CHECK_BATCH_EQ(b_res, b_ref);
}

{
struct swizzle_hi_generator
struct shuffle_hi_generator
{
static constexpr size_t get(size_t index, size_t size)
{
Expand All @@ -599,8 +599,8 @@ struct shuffle_test
ref[i] = rhs[size - i - 1];
B b_ref = B::load_unaligned(ref.data());

INFO("swizzle second batch");
B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant<mask_type, swizzle_hi_generator, arch_type>());
INFO("shuffle second batch");
B b_res = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant<mask_type, shuffle_hi_generator, arch_type>());
CHECK_BATCH_EQ(b_res, b_ref);
}
}
Expand Down Expand Up @@ -709,9 +709,9 @@ TEST_CASE_TEMPLATE("[shuffle]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xs
{
Test.select();
}
SUBCASE("swizzle")
SUBCASE("shuffle")
{
Test.swizzle();
Test.shuffle();
}
SUBCASE("transpose")
{
Expand All @@ -733,12 +733,12 @@ TEST_CASE_TEMPLATE("[small integer transpose]", B, xsimd::batch<uint16_t>, xsimd
}

#if (XSIMD_WITH_SSE2 && !XSIMD_WITH_AVX)
TEST_CASE_TEMPLATE("[small integer swizzle]", B, xsimd::batch<uint16_t>, xsimd::batch<int16_t>)
TEST_CASE_TEMPLATE("[small integer shuffle]", B, xsimd::batch<uint16_t>, xsimd::batch<int16_t>)
{
shuffle_test<B> Test;
SUBCASE("swizzle")
SUBCASE("shuffle")
{
Test.swizzle();
Test.shuffle();
}
}
#endif
Expand Down
8 changes: 1 addition & 7 deletions test/test_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,13 +591,7 @@ namespace xsimd
#define BATCH_TYPES BATCH_INT_TYPES, BATCH_FLOAT_TYPES
#define BATCH_MATH_TYPES xsimd::batch<int32_t>, BATCH_FLOAT_TYPES

#if !XSIMD_WITH_AVX || XSIMD_WITH_AVX2
#define BATCH_SWIZZLE_TAIL , xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>
#else
#define BATCH_SWIZZLE_TAIL
#endif

#define BATCH_SWIZZLE_TYPES BATCH_FLOAT_TYPES, BATCH_COMPLEX_TYPES BATCH_SWIZZLE_TAIL
#define BATCH_SWIZZLE_TYPES BATCH_FLOAT_TYPES, BATCH_COMPLEX_TYPES, BATCH_INT_TYPES

/********************
* conversion utils *
Expand Down
Loading