@@ -151,73 +151,6 @@ constexpr bool IsSse2 = std::is_base_of_v<xsimd::sse2, Arch>;
151151template <typename Arch>
152152constexpr bool IsAvx2 = std::is_base_of_v<xsimd::avx2, Arch>;
153153
154- // / Whether we are compiling for the Neon or above in the arm64 family.
155- template <typename Arch>
156- constexpr bool IsNeon = std::is_base_of_v<xsimd::neon, Arch>;
157-
158- // / Wrapper around ``xsimd::bitwise_lshift`` with optimizations for non implemented sizes.
159- // /
160- // / We replace the variable left shift by a variable multiply with a power of two.
161- // /
162- // / This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of
163- // / integers per second through vectorization, Software Practice & Experience 45 (1),
164- // / 2015. http://arxiv.org/abs/1209.2137
165- // /
166- // / TODO(xsimd) Tracking in https://github.com/xtensor-stack/xsimd/pull/1220
167- // / When migrating, be sure to use batch_constant overload, and not the batch one.
168- template <typename Arch, typename Int, Int... kShifts >
169- ARROW_FORCE_INLINE auto left_shift (const xsimd::batch<Int, Arch>& batch,
170- xsimd::batch_constant<Int, Arch, kShifts ...> shifts)
171- -> xsimd::batch<Int, Arch> {
172- constexpr bool kIsSse2 = IsSse2<Arch>;
173- constexpr bool kIsAvx2 = IsAvx2<Arch>;
174- static_assert (
175- !(kIsSse2 && kIsAvx2 ),
176- " In xsimd, an x86 arch is either part of the SSE family or of the AVX family,"
177- " not both. If this check fails, it means the assumptions made here to detect SSE "
178- " and AVX are out of date." );
179-
180- constexpr auto kMults = xsimd::make_batch_constant<Int, 1 , Arch>() << shifts;
181-
182- constexpr auto IntSize = sizeof (Int);
183-
184- // Sizes and architecture for which there is no variable left shift and there is a
185- // multiplication
186- if constexpr ( //
187- (kIsSse2 && (IntSize == sizeof (uint16_t ) || IntSize == sizeof (uint32_t ))) //
188- || (kIsAvx2 && (IntSize == sizeof (uint16_t ))) //
189- ) {
190- return batch * kMults ;
191- }
192-
193- // Architecture for which there is no variable left shift on uint8_t but a fallback
194- // exists for uint16_t.
195- if constexpr ((kIsSse2 || kIsAvx2 ) && (IntSize == sizeof (uint8_t ))) {
196- const auto batch16 = xsimd::bitwise_cast<uint16_t >(batch);
197-
198- constexpr auto kShifts0 = select_stride<uint16_t , 0 >(shifts);
199- const auto shifted0 = left_shift (batch16, kShifts0 ) & 0x00FF ;
200-
201- constexpr auto kShifts1 = select_stride<uint16_t , 1 >(shifts);
202- const auto shifted1 = left_shift (batch16 & 0xFF00 , kShifts1 );
203-
204- return xsimd::bitwise_cast<Int>(shifted0 | shifted1);
205- }
206-
207- // TODO(xsimd) bug fixed in xsimd 14.1.0
208- // https://github.com/xtensor-stack/xsimd/pull/1266
209- #if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0)
210- if constexpr (IsNeon<Arch>) {
211- using SInt = std::make_signed_t <Int>;
212- constexpr auto signed_shifts =
213- xsimd::batch_constant<SInt, Arch, static_cast <SInt>(kShifts )...>();
214- return xsimd::kernel::bitwise_lshift (batch, signed_shifts.as_batch (), Arch{});
215- }
216- #endif
217-
218- return batch << shifts;
219- }
220-
221154// / Fallback for variable shift right.
222155// /
223156// / When we know that the relevant bits will not overflow, we can instead shift left all
@@ -243,9 +176,8 @@ ARROW_FORCE_INLINE auto right_shift_by_excess(
243176
244177 constexpr auto IntSize = sizeof (Int);
245178
246- // Architecture for which there is no variable right shift but a larger fallback exists.
247- // TODO(xsimd) Tracking for Avx2 in https://github.com/xtensor-stack/xsimd/pull/1220
248- // When migrating, be sure to use batch_constant overload, and not the batch one.
179+ // Architectures for which there is no variable right shift but a larger fallback
180+ // exists.
249181 if constexpr (kIsAvx2 && (IntSize == sizeof (uint8_t ) || IntSize == sizeof (uint16_t ))) {
250182 using twice_uint = SizedUint<2 * IntSize>;
251183
@@ -262,27 +194,17 @@ ARROW_FORCE_INLINE auto right_shift_by_excess(
262194 return xsimd::bitwise_cast<Int>(shifted0 | shifted1);
263195 }
264196
265- // These conditions are the ones matched in `left_shift`, i.e. the ones where variable
266- // shift right will not be available but a left shift (fallback) exists.
197+ // Architectures for which there is no variable right shift but a left shift exists
198+ // (possibly using the multiply trick inside of xsimd).
199+ // We use a variable left shift and fixed right shift.
267200 if constexpr (kIsSse2 && (IntSize != sizeof (uint64_t ))) {
268201 constexpr Int kMaxRShift = max_value (std::array{kShifts ...});
269202
270203 constexpr auto kLShifts =
271204 xsimd::make_batch_constant<Int, kMaxRShift , Arch>() - shifts;
272205
273- return xsimd::bitwise_rshift<kMaxRShift >(left_shift (batch, kLShifts ));
274- }
275-
276- // TODO(xsimd) bug fixed in xsimd 14.1.0
277- // https://github.com/xtensor-stack/xsimd/pull/1266
278- #if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0)
279- if constexpr (IsNeon<Arch>) {
280- using SInt = std::make_signed_t <Int>;
281- constexpr auto signed_shifts =
282- xsimd::batch_constant<SInt, Arch, static_cast <SInt>(kShifts )...>();
283- return xsimd::kernel::bitwise_rshift (batch, signed_shifts.as_batch (), Arch{});
206+ return xsimd::bitwise_rshift<kMaxRShift >(batch << kLShifts );
284207 }
285- #endif
286208
287209 return batch >> shifts;
288210}
@@ -1040,7 +962,7 @@ struct LargeKernel {
1040962
1041963 const auto high_swizzled = xsimd::swizzle (bytes, kHighSwizzles );
1042964 const auto high_words = xsimd::bitwise_cast<unpacked_type>(high_swizzled);
1043- const auto high_shifted = left_shift ( high_words, kHighLShifts ) ;
965+ const auto high_shifted = high_words << kHighLShifts ;
1044966
1045967 // We can have a single mask and apply it after OR because the shifts will ensure that
1046968 // there are zeros where the high/low values are incomplete.
0 commit comments