Skip to content

Commit 6768db4

Browse files
committed
Add single shift optimization
1 parent 1d0dd83 commit 6768db4

3 files changed

Lines changed: 34 additions & 3 deletions

File tree

include/xsimd/arch/utils/shifts.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,19 @@ namespace xsimd
4141
return static_cast<I>((I { 1 } << bit_index) - I { 1 });
4242
}
4343

44+
template <class T, class A, T... Vs>
45+
constexpr bool all_equals(batch_constant<T, A, Vs...> c)
46+
{
47+
static_assert(sizeof...(Vs) > 0);
48+
49+
bool out = true;
50+
for (std::size_t k = 0; k < sizeof...(Vs); ++k)
51+
{
52+
out &= c.get(k) == c.get(0);
53+
}
54+
return out;
55+
}
56+
4457
template <class T, class T2, class A, T... Vs>
4558
XSIMD_INLINE batch<T, A> bitwise_lshift_as_twice_larger(
4659
batch<T, A> const& self, batch_constant<T, A, Vs...>) noexcept

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,14 @@ namespace xsimd
337337
// Missing implementations are dispacthed to the `batch` overload in xsimd_api.
338338
template <class T, class A, T... Vs, detail::enable_sized_integral_t<T, 2> = 0>
339339
XSIMD_INLINE batch<T, A> bitwise_lshift(
340-
batch<T, A> const& self, batch_constant<T, A, Vs...>, requires_arch<avx2>) noexcept
340+
batch<T, A> const& self, batch_constant<T, A, Vs...> shifts, requires_arch<avx2>) noexcept
341341
{
342342
using uint_t = typename std::make_unsigned<T>::type;
343+
344+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
345+
{
346+
return bitwise_lshift<s.get(0), A>(self);
347+
}
343348
return bitwise_cast<T>(
344349
utils::bitwise_lshift_as_twice_larger<uint_t, uint32_t>(
345350
bitwise_cast<uint_t>(self),
@@ -348,9 +353,14 @@ namespace xsimd
348353

349354
template <class T, class A, T... Vs, detail::enable_sized_integral_t<T, 1> = 0>
350355
XSIMD_INLINE batch<T, A> bitwise_lshift(
351-
batch<T, A> const& self, batch_constant<T, A, Vs...>, requires_arch<avx2>) noexcept
356+
batch<T, A> const& self, batch_constant<T, A, Vs...> shifts, requires_arch<avx2>) noexcept
352357
{
353358
using uint_t = typename std::make_unsigned<T>::type;
359+
360+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
361+
{
362+
return bitwise_lshift<s.get(0), A>(self);
363+
}
354364
return bitwise_cast<T>(
355365
utils::bitwise_lshift_as_twice_larger<uint_t, uint16_t>(
356366
bitwise_cast<uint_t>(self),

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,12 @@ namespace xsimd
330330
// bitwise_lshift multiple (constant)
331331
template <class A, uint16_t... Vs>
332332
XSIMD_INLINE batch<uint16_t, A> bitwise_lshift(
333-
batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...>, requires_arch<sse2>) noexcept
333+
batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...> shifts, requires_arch<sse2>) noexcept
334334
{
335+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
336+
{
337+
return bitwise_lshift<s.get(0), A>(self);
338+
}
335339
constexpr auto mults = batch_constant<uint16_t, A, static_cast<uint16_t>(1u << Vs)...>();
336340
return _mm_mullo_epi16(self, mults.as_batch());
337341
}
@@ -340,6 +344,10 @@ namespace xsimd
340344
XSIMD_INLINE batch<uint8_t, A> bitwise_lshift(
341345
batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> shifts, requires_arch<sse2>) noexcept
342346
{
347+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
348+
{
349+
return bitwise_lshift<s.get(0), A>(self);
350+
}
343351
return utils::bitwise_lshift_as_twice_larger<uint8_t, uint16_t>(self, shifts);
344352
}
345353

0 commit comments

Comments
 (0)