Skip to content

Commit ead3339

Browse files
AntoinePrvserge-sans-paille
authored andcommitted
Add bitwise-shift batch constant api
1 parent b8f7ebe commit ead3339

File tree

7 files changed

+251
-7
lines changed

7 files changed

+251
-7
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* Copyright (c) Marco Barbone *
7+
* *
8+
* Distributed under the terms of the BSD 3-Clause License. *
9+
* *
10+
* The full license is in the file LICENSE, distributed with this software. *
11+
****************************************************************************/
12+
13+
#ifndef XSIMD_UTILS_SHIFTS_HPP
14+
#define XSIMD_UTILS_SHIFTS_HPP
15+
16+
#include "../../config/xsimd_inline.hpp"
17+
#include "../../types/xsimd_batch.hpp"
18+
#include "../../types/xsimd_batch_constant.hpp"
19+
#include "../../types/xsimd_traits.hpp"
20+
21+
namespace xsimd
22+
{
23+
namespace kernel
24+
{
25+
namespace utils
26+
{
27+
template <typename I, I offset, I length, I... Vs>
28+
struct select_stride
29+
{
30+
template <typename K>
31+
static constexpr K get(K i, K)
32+
{
33+
constexpr I values_array[] = { Vs... };
34+
return static_cast<K>(values_array[length * i + offset]);
35+
}
36+
};
37+
38+
template <typename I>
39+
constexpr I lsb_mask(I bit_index)
40+
{
41+
if (bit_index == 8 * sizeof(I))
42+
{
43+
return ~I { 0 };
44+
}
45+
return static_cast<I>((I { 1 } << bit_index) - I { 1 });
46+
}
47+
48+
template <class T, class A, T V0, T... Vs>
49+
constexpr bool all_equals(batch_constant<T, A, V0, Vs...> c)
50+
{
51+
return (c == std::integral_constant<T, V0> {}).all();
52+
}
53+
54+
template <class T, class A, T... Vs>
55+
XSIMD_INLINE batch<T, A> bitwise_lshift_as_twice_larger(
56+
batch<T, A> const& self, batch_constant<T, A, Vs...>) noexcept
57+
{
58+
using T2 = widen_t<T>;
59+
60+
const auto self2 = bitwise_cast<T2>(self);
61+
62+
// Lower byte: shift as twice the size and mask bits flowing to higher byte.
63+
constexpr auto shifts_lo = make_batch_constant<T2, select_stride<T, 0, 2, Vs...>, A>();
64+
constexpr auto mask_lo = lsb_mask<T2>(8 * sizeof(T));
65+
const auto shifted_lo = bitwise_lshift(self2, shifts_lo);
66+
constexpr auto batch_mask_lo = make_batch_constant<T2, mask_lo, A>();
67+
const auto masked_lo = bitwise_and(shifted_lo, batch_mask_lo.as_batch());
68+
69+
// Higher byte: mask bits that would flow from lower byte and shift as twice the size.
70+
constexpr auto shifts_hi = make_batch_constant<T2, select_stride<T, 1, 2, Vs...>, A>();
71+
constexpr auto mask_hi = mask_lo << (8 * sizeof(T));
72+
constexpr auto batch_mask_hi = make_batch_constant<T2, mask_hi, A>();
73+
const auto masked_hi = bitwise_and(self2, batch_mask_hi.as_batch());
74+
const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi);
75+
76+
return bitwise_cast<T>(bitwise_or(masked_lo, shifted_hi));
77+
}
78+
}
79+
}
80+
}
81+
82+
#endif

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "../types/xsimd_avx2_register.hpp"
1919
#include "../types/xsimd_batch_constant.hpp"
20+
#include "./utils/shifts.hpp"
2021

2122
#include <limits>
2223

@@ -332,6 +333,29 @@ namespace xsimd
332333
}
333334
}
334335

336+
// bitwise_lshift multiple (constant) specific implementations.
337+
// Missing implementations are dispatched to the `batch` overload in xsimd_api.
338+
// The 1 byte constant implementation calls the 2 bytes constant version, the 2 bytes
339+
// constant version calls into the 4 bytes version which resolves to the dynamic one above.
340+
template <class A, class T, T... Vs,
341+
std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 2), int> = 0>
342+
XSIMD_INLINE batch<T, A> bitwise_lshift(
343+
batch<T, A> const& self, batch_constant<T, A, Vs...> shifts, requires_arch<avx2> req) noexcept
344+
{
345+
using uint_t = typename std::make_unsigned<T>::type;
346+
347+
// AVX2 only supports 16-bit shifts with a uniform bitshift value,
348+
// otherwise emulate using 32-bit shifts.
349+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
350+
{
351+
return bitwise_lshift<shifts.get(0), A>(self, req);
352+
}
353+
return bitwise_cast<T>(
354+
utils::bitwise_lshift_as_twice_larger<uint_t>(
355+
bitwise_cast<uint_t>(self),
356+
batch_constant<uint_t, A, static_cast<uint_t>(Vs)...> {}));
357+
}
358+
335359
// bitwise_or
336360
template <class A, class T, class = std::enable_if_t<std::is_integral<T>::value>>
337361
XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "../types/xsimd_batch_constant.hpp"
2020
#include "../types/xsimd_sse2_register.hpp"
21+
#include "./utils/shifts.hpp"
2122

2223
namespace xsimd
2324
{
@@ -326,6 +327,36 @@ namespace xsimd
326327
return bitwise_lshift<shift>(self, common {});
327328
}
328329

330+
// bitwise_lshift multiple (constant)
331+
// Missing implementations are dispacthed to the `batch` overload in xsimd_api.
332+
template <class A, class T, T... Vs, detail::enable_sized_integral_t<T, 2> = 0>
333+
XSIMD_INLINE batch<T, A> bitwise_lshift(
334+
batch<T, A> const& self, batch_constant<T, A, Vs...> shifts, requires_arch<sse2> req) noexcept
335+
{
336+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
337+
{
338+
return bitwise_lshift<shifts.get(0), A>(self, req);
339+
}
340+
constexpr auto mults = batch_constant<T, A, static_cast<T>(1u << Vs)...>();
341+
return _mm_mullo_epi16(self, mults.as_batch());
342+
}
343+
344+
template <class A, class T, T... Vs, detail::enable_sized_integral_t<T, 1> = 0>
345+
XSIMD_INLINE batch<T, A> bitwise_lshift(
346+
batch<T, A> const& self, batch_constant<T, A, Vs...> shifts, requires_arch<sse2> req) noexcept
347+
{
348+
using uint_t = typename std::make_unsigned<T>::type;
349+
350+
XSIMD_IF_CONSTEXPR(utils::all_equals(shifts))
351+
{
352+
return bitwise_lshift<shifts.get(0), A>(self, req);
353+
}
354+
return bitwise_cast<T>(
355+
utils::bitwise_lshift_as_twice_larger<uint_t>(
356+
bitwise_cast<uint_t>(self),
357+
batch_constant<uint_t, A, static_cast<uint_t>(Vs)...> {}));
358+
}
359+
329360
// bitwise_not
330361
template <class A>
331362
XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<sse2>) noexcept

include/xsimd/arch/xsimd_sse4_1.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ namespace xsimd
4141
return _mm_ceil_pd(self);
4242
}
4343

44+
// bitwise_lshift multiple (constant)
45+
template <class A, uint32_t... Vs>
46+
XSIMD_INLINE batch<uint32_t, A> bitwise_lshift(
47+
batch<uint32_t, A> const& self, batch_constant<uint32_t, A, Vs...>, requires_arch<sse4_1>) noexcept
48+
{
49+
constexpr auto mults = batch_constant<uint32_t, A, static_cast<uint32_t>(1u << Vs)...>();
50+
return _mm_mullo_epi32(self, mults.as_batch());
51+
}
52+
4453
// fast_cast
4554
namespace detail
4655
{

include/xsimd/types/xsimd_api.hpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,43 @@ namespace xsimd
353353
return kernel::bitwise_cast<A>(x, batch<T_out, A> {}, A {});
354354
}
355355

356+
namespace detail
357+
{
358+
// Detection for kernel overloads accepting ``batch_constant`` in ``bitwise_lshift``
359+
// directly (or in a parent register function).
360+
// The ``batch_constant`` overload is a rare but useful optimization.
361+
// Running the detection here is less error prone than to add a fallback to all
362+
// architectures.
363+
364+
template <class Arch, class Batch, class BatchConstant, class = void>
365+
struct has_bitwise_lshift_batch_const : std::false_type
366+
{
367+
};
368+
369+
template <class Arch, class Batch, class BatchConstant>
370+
struct has_bitwise_lshift_batch_const<
371+
Arch, Batch, BatchConstant,
372+
void_t<decltype(kernel::bitwise_lshift<Arch>(
373+
std::declval<Batch>(), std::declval<BatchConstant>(), Arch {}))>>
374+
: std::true_type
375+
{
376+
};
377+
378+
template <class Arch, class T, T... Values>
379+
XSIMD_INLINE batch<T, Arch> bitwise_lshift_batch_const(batch<T, Arch> const& x, batch_constant<T, Arch, Values...> shift, std::true_type) noexcept
380+
{
381+
// Optimized ``batch_constant`` implementation
382+
return kernel::bitwise_lshift<Arch>(x, shift, Arch {});
383+
}
384+
385+
template <class Arch, class T, T... Values>
386+
XSIMD_INLINE batch<T, Arch> bitwise_lshift_batch_const(batch<T, Arch> const& x, batch_constant<T, Arch, Values...> shift, std::false_type) noexcept
387+
{
388+
// Fallback to regular run-time implementation
389+
return kernel::bitwise_lshift<Arch>(x, shift.as_batch(), Arch {});
390+
}
391+
}
392+
356393
/**
357394
* @ingroup batch_bitwise
358395
*
@@ -367,17 +404,24 @@ namespace xsimd
367404
detail::static_check_supported_config<T, A>();
368405
return kernel::bitwise_lshift<A>(x, shift, A {});
369406
}
407+
template <size_t shift, class T, class A>
408+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x) noexcept
409+
{
410+
detail::static_check_supported_config<T, A>();
411+
return kernel::bitwise_lshift<shift, A>(x, A {});
412+
}
370413
template <class T, class A>
371414
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept
372415
{
373416
detail::static_check_supported_config<T, A>();
374417
return kernel::bitwise_lshift<A>(x, shift, A {});
375418
}
376-
template <size_t shift, class T, class A>
377-
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x) noexcept
419+
template <class T, class A, T... Values>
420+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, batch_constant<T, A, Values...> shift) noexcept
378421
{
379422
detail::static_check_supported_config<T, A>();
380-
return kernel::bitwise_lshift<shift, A>(x, A {});
423+
using has_batch_const_impl = detail::has_bitwise_lshift_batch_const<A, decltype(x), decltype(shift)>;
424+
return detail::bitwise_lshift_batch_const<A>(x, shift, has_batch_const_impl {});
381425
}
382426

383427
/**

include/xsimd/types/xsimd_traits.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#ifndef XSIMD_TRAITS_HPP
1313
#define XSIMD_TRAITS_HPP
1414

15+
#include <cstdint>
1516
#include <type_traits>
1617

1718
#include "xsimd_batch.hpp"
@@ -421,6 +422,21 @@ namespace xsimd
421422
using type = uint16_t;
422423
};
423424
template <>
425+
struct widen<int32_t>
426+
{
427+
using type = int64_t;
428+
};
429+
template <>
430+
struct widen<int16_t>
431+
{
432+
using type = int32_t;
433+
};
434+
template <>
435+
struct widen<int8_t>
436+
{
437+
using type = int16_t;
438+
};
439+
template <>
424440
struct widen<float>
425441
{
426442
using type = double;

test/test_xsimd_api.cpp

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ struct xsimd_api_integral_types_functions
351351
{
352352
using value_type = typename scalar_type<T>::type;
353353

354-
void test_bitwise_lshift()
354+
void test_bitwise_lshift_single()
355355
{
356356
constexpr int shift = 3;
357357
value_type val0(12);
@@ -364,6 +364,35 @@ struct xsimd_api_integral_types_functions
364364
CHECK_EQ(extract(cr), r);
365365
}
366366

367+
/* Test when T is a batch_constant only, not a scalar. */
368+
template <typename U = T>
369+
void test_bitwise_lshift_multiple(T const& vals, typename std::enable_if<!std::is_integral<U>::value, int>::type = 0)
370+
{
371+
#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE
372+
constexpr auto Max = static_cast<value_type>(std::numeric_limits<value_type>::digits);
373+
constexpr auto max_batch = xsimd::make_batch_constant<value_type, Max>();
374+
constexpr auto shifts = xsimd::make_iota_batch_constant<value_type>() % max_batch;
375+
376+
{
377+
auto shifted = xsimd::bitwise_lshift(vals, shifts.as_batch());
378+
auto shifted_cst = xsimd::bitwise_lshift(vals, shifts);
379+
380+
for (std::size_t i = 0; i < shifts.size; ++i)
381+
{
382+
const auto expected = static_cast<value_type>(vals.get(i) << shifts.get(i));
383+
CHECK_EQ(shifted.get(i), expected);
384+
CHECK_EQ(shifted_cst.get(i), expected);
385+
}
386+
}
387+
#endif
388+
}
389+
390+
/* Test multiple does not make sense when T is scalar. */
391+
template <typename U = T>
392+
void test_bitwise_lshift_multiple(T const&, typename std::enable_if<std::is_integral<U>::value, int>::type = 0)
393+
{
394+
}
395+
367396
void test_bitwise_rshift()
368397
{
369398
constexpr int shift = 3;
@@ -424,11 +453,20 @@ struct xsimd_api_integral_types_functions
424453

425454
TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES)
426455
{
427-
xsimd_api_integral_types_functions<B> Test;
456+
using test_type = xsimd_api_integral_types_functions<B>;
457+
458+
test_type Test;
459+
460+
SUBCASE("test_bitwise_lshift_single")
461+
{
462+
Test.test_bitwise_lshift_single();
463+
}
428464

429-
SUBCASE("bitwise_lshift")
465+
SUBCASE("bitwise_lshift_multiple")
430466
{
431-
Test.test_bitwise_lshift();
467+
Test.test_bitwise_lshift_multiple({ 1 });
468+
Test.test_bitwise_lshift_multiple({ 3 });
469+
Test.test_bitwise_lshift_multiple({ 127 });
432470
}
433471

434472
SUBCASE("bitwise_rshift")

0 commit comments

Comments
 (0)