Skip to content

Commit ec6cf6a

Browse files
committed
Unsigned bitwise shifts are never called on neon
1 parent c3a8d37 commit ec6cf6a

File tree

2 files changed

+24
-16
lines changed

2 files changed

+24
-16
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,9 +2382,10 @@ namespace xsimd
23822382
}
23832383

23842384
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
2385-
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2385+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
23862386
{
2387-
return vshlq_u8(lhs, rhs);
2387+
// Blindly converting to signed since out of bounds shifts are UB anyways
2388+
return vshlq_u8(lhs, vreinterpretq_s8_u8(rhs));
23882389
}
23892390

23902391
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
@@ -2394,9 +2395,10 @@ namespace xsimd
23942395
}
23952396

23962397
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
2397-
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2398+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
23982399
{
2399-
return vshlq_u16(lhs, rhs);
2400+
// Blindly converting to signed since out of bounds shifts are UB anyways
2401+
return vshlq_u16(lhs, vreinterpretq_s16_u16(rhs));
24002402
}
24012403

24022404
template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
@@ -2406,9 +2408,10 @@ namespace xsimd
24062408
}
24072409

24082410
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
2409-
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2411+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
24102412
{
2411-
return vshlq_u32(lhs, rhs);
2413+
// Blindly converting to signed since out of bounds shifts are UB anyways
2414+
return vshlq_u32(lhs, vreinterpretq_s32_u32(rhs));
24122415
}
24132416

24142417
template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
@@ -2418,9 +2421,10 @@ namespace xsimd
24182421
}
24192422

24202423
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
2421-
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2424+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
24222425
{
2423-
return vshlq_u64(lhs, rhs);
2426+
// Blindly converting to signed since out of bounds shifts are UB
2427+
return vshlq_u64(lhs, vreinterpretq_s64_u64(rhs));
24242428
}
24252429

24262430
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
@@ -2618,9 +2622,10 @@ namespace xsimd
26182622
}
26192623

26202624
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
2621-
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2625+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
26222626
{
2623-
return vshlq_u8(lhs, vnegq_s8(rhs));
2627+
// Blindly converting to signed since out of bounds shifts are UB anyways
2628+
return vshlq_u8(lhs, vnegq_s8(vreinterpretq_s8_u8(rhs)));
26242629
}
26252630

26262631
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
@@ -2630,9 +2635,10 @@ namespace xsimd
26302635
}
26312636

26322637
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
2633-
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2638+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
26342639
{
2635-
return vshlq_u16(lhs, vnegq_s16(rhs));
2640+
// Blindly converting to signed since out of bounds shifts are UB anyways
2641+
return vshlq_u16(lhs, vnegq_s16(vreinterpretq_s16_u16(rhs)));
26362642
}
26372643

26382644
template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
@@ -2642,9 +2648,10 @@ namespace xsimd
26422648
}
26432649

26442650
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
2645-
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon>) noexcept
2651+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
26462652
{
2647-
return vshlq_u32(lhs, vnegq_s32(rhs));
2653+
// Blindly converting to signed since out of bounds shifts are UB anyways
2654+
return vshlq_u32(lhs, vnegq_s32(vreinterpretq_s32_u32(rhs)));
26482655
}
26492656

26502657
template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,9 +1209,10 @@ namespace xsimd
12091209
}
12101210

12111211
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
1212-
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<as_signed_integer_t<T>, A> const& rhs, requires_arch<neon64>) noexcept
1212+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept
12131213
{
1214-
return vshlq_u64(lhs, vnegq_s64(rhs));
1214+
// Blindly converting to signed since out of bounds shifts are UB anyways
1215+
return vshlq_u64(lhs, vnegq_s64(vreinterpretq_s64_u64(rhs)));
12151216
}
12161217

12171218
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>

0 commit comments

Comments
 (0)