|
| 1 | +/*************************************************************************** |
| 2 | + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
| 3 | + * Martin Renou * |
| 4 | + * Copyright (c) QuantStack * |
| 5 | + * Copyright (c) Serge Guelton * |
| 6 | + * Copyright (c) Marco Barbone * |
| 7 | + * * |
| 8 | + * Distributed under the terms of the BSD 3-Clause License. * |
| 9 | + * * |
| 10 | + * The full license is in the file LICENSE, distributed with this software. * |
| 11 | + ****************************************************************************/ |
| 12 | + |
| 13 | +#ifndef XSIMD_UTILS_SHIFTS_HPP |
| 14 | +#define XSIMD_UTILS_SHIFTS_HPP |
| 15 | + |
| 16 | +#include "../../config/xsimd_inline.hpp" |
| 17 | +#include "../../types/xsimd_batch.hpp" |
| 18 | +#include "../../types/xsimd_batch_constant.hpp" |
| 19 | + |
| 20 | +namespace xsimd |
| 21 | +{ |
| 22 | + namespace kernel |
| 23 | + { |
| 24 | + namespace utils |
| 25 | + { |
| 26 | + template <typename I, I offset, I length, I... Vs> |
| 27 | + struct select_stride |
| 28 | + { |
| 29 | + static constexpr I values_array[] = { Vs... }; |
| 30 | + |
| 31 | + template <typename K> |
| 32 | + static constexpr K get(K i, K) |
| 33 | + { |
| 34 | + return static_cast<K>(values_array[length * i + offset]); |
| 35 | + } |
| 36 | + }; |
| 37 | + |
| 38 | + template <typename I> |
| 39 | + constexpr I lsb_mask(I bit_index) |
| 40 | + { |
| 41 | + return static_cast<I>((I { 1 } << bit_index) - I { 1 }); |
| 42 | + } |
| 43 | + |
| 44 | + template <class T, class T2, class A, T... Vs> |
| 45 | + XSIMD_INLINE batch<T, A> bitwise_lshift_as_twice_larger( |
| 46 | + batch<T, A> const& self, batch_constant<T, A, Vs...>) noexcept |
| 47 | + { |
| 48 | + static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other"); |
| 49 | + |
| 50 | + const auto self2 = bitwise_cast<T2>(self); |
| 51 | + |
| 52 | + // Lower byte: shift as twice the size and mask bits flowing to higher byte. |
| 53 | + constexpr auto shifts_lo = make_batch_constant<T2, select_stride<T, 0, 2, Vs...>, A>(); |
| 54 | + constexpr auto mask_lo = lsb_mask<T2>(8 * sizeof(T)); |
| 55 | + const auto shifted_lo = bitwise_lshift(self2, shifts_lo); |
| 56 | + constexpr auto batch_mask_lo = make_batch_constant<T2, mask_lo, A>(); |
| 57 | + const auto masked_lo = bitwise_and(shifted_lo, batch_mask_lo.as_batch()); |
| 58 | + |
| 59 | + // Higher byte: mask bits that would flow from lower byte and shift as twice the size. |
| 60 | + constexpr auto shifts_hi = make_batch_constant<T2, select_stride<T, 1, 2, Vs...>, A>(); |
| 61 | + constexpr auto mask_hi = mask_lo << (8 * sizeof(T)); |
| 62 | + constexpr auto batch_mask_hi = make_batch_constant<T2, mask_hi, A>(); |
| 63 | + const auto masked_hi = bitwise_and(self2, batch_mask_hi.as_batch()); |
| 64 | + const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi); |
| 65 | + |
| 66 | + return bitwise_cast<T>(bitwise_or(masked_lo, shifted_hi)); |
| 67 | + } |
| 68 | + } |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +#endif |
0 commit comments