@@ -766,28 +766,36 @@ namespace xsimd
766766 XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
767767 {
768768 uint8x16_t val = vshrq_n_u8(b.data, 7);
769- vst1q_u8((uint8_t*)mem, val);
769+ alignas(A::alignment()) uint8_t buffer[batch_bool<T, A>::size];
770+ vst1q_u8(buffer, val);
771+ memcpy(mem, buffer, sizeof(buffer));
770772 }
771773
772774 template <class T, class A, detail::enable_sized_t<T, 2> = 0>
773775 XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
774776 {
775777 uint8x8_t val = vshr_n_u8(vqmovn_u16(b.data), 7);
776- vst1_u8((uint8_t*)mem, val);
778+ alignas(A::alignment()) uint8_t buffer[batch_bool<T, A>::size];
779+ vst1_u8(buffer, val);
780+ memcpy(mem, buffer, sizeof(buffer));
777781 }
778782
779783 template <class T, class A, detail::enable_sized_t<T, 4> = 0>
780784 XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
781785 {
782786 uint8x8_t val = vshr_n_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(b.data), vdup_n_u16(0))), 7);
783- vst1_lane_u32((uint32_t*)mem, vreinterpret_u32_u8(val), 0);
787+ alignas(A::alignment()) uint8_t buffer[8];
788+ vst1_u8(buffer, val);
789+ memcpy(mem, buffer, batch_bool<T, A>::size);
784790 }
785791
786792 template <class T, class A, detail::enable_sized_t<T, 8> = 0>
787793 XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
788794 {
789795 uint8x8_t val = vshr_n_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(vcombine_u32(vqmovn_u64(b.data), vdup_n_u32(0))), vdup_n_u16(0))), 7);
790- vst1_lane_u16((uint16_t*)mem, vreinterpret_u16_u8(val), 0);
796+ alignas(A::alignment()) uint8_t buffer[8];
797+ vst1_u8(buffer, val);
798+ memcpy(mem, buffer, batch_bool<T, A>::size);
791799 }
792800
793801 template <class A>
0 commit comments