@@ -148,6 +148,10 @@ constexpr bool IsSse2 = std::is_base_of_v<xsimd::sse2, Arch>;
148148template <typename Arch>
149149constexpr bool IsAvx2 = std::is_base_of_v<xsimd::avx2, Arch>;
150150
151+ // / Whether we are compiling for the Neon or above in the arm64 family.
152+ template <typename Arch>
153+ constexpr bool IsNeon = std::is_base_of_v<xsimd::neon, Arch>;
154+
151155// / Wrapper around ``xsimd::bitwise_lshift`` with optimizations for non implemented sizes.
152156//
153157// We replace the variable left shift by a variable multiply with a power of two.
@@ -196,6 +200,15 @@ auto left_shift(const xsimd::batch<Int, Arch>& batch,
196200 return xsimd::bitwise_cast<Int>(shifted0 | shifted1);
197201 }
198202
203+ // TODO(xsimd) bug fixed likely in xsimd>14.0.0
204+ // https://github.com/xtensor-stack/xsimd/pull/1266
205+ if constexpr (IsNeon<Arch>) {
206+ using SInt = std::make_signed_t <Int>;
207+ constexpr auto signed_shifts =
208+ xsimd::batch_constant<SInt, Arch, static_cast <SInt>(kShifts )...>();
209+ return xsimd::kernel::bitwise_lshift (batch, signed_shifts.as_batch (), Arch{});
210+ }
211+
199212 return batch << shifts;
200213}
201214
@@ -252,6 +265,15 @@ auto right_shift_by_excess(const xsimd::batch<Int, Arch>& batch,
252265 return xsimd::bitwise_rshift<kMaxRShift >(left_shift (batch, kLShifts ));
253266 }
254267
268+ // TODO(xsimd) bug fixed likely in xsimd>14.0.0
269+ // https://github.com/xtensor-stack/xsimd/pull/1266
270+ if constexpr (IsNeon<Arch>) {
271+ using SInt = std::make_signed_t <Int>;
272+ constexpr auto signed_shifts =
273+ xsimd::batch_constant<SInt, Arch, static_cast <SInt>(kShifts )...>();
274+ return xsimd::kernel::bitwise_rshift (batch, signed_shifts.as_batch (), Arch{});
275+ }
276+
255277 return batch >> shifts;
256278}
257279
0 commit comments