Skip to content

Commit 73421a3

Browse files
committed
Implement NEON64, use template argument deduction
1 parent 2b851b8 commit 73421a3

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3365,7 +3365,7 @@ namespace xsimd
33653365
template <class A, class T>
33663366
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
33673367
{
3368-
uint8x16_t popcnts = vcntq_u8(bitwise_cast<uint8_t, T, A>(bitwise_cast<T, A>(self)));
3368+
uint8x16_t popcnts = vcntq_u8(bitwise_cast<uint8_t>(bitwise_cast(self)));
33693369
uint64x2_t psum = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(popcnts)));
33703370
uint64x1_t total = vadd_u64(vget_low_u64(psum), vget_high_u64(psum));
33713371

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,15 @@ namespace xsimd
652652
return vaddvq_u32(positioned);
653653
}
654654

655+
/*********
656+
* count *
657+
*********/
658+
template <class A, class T>
659+
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<neon64>) noexcept
660+
{
661+
return vaddlvq_u8(vcntq_u8(bitwise_cast<uint8_t>(bitwise_cast(self)))) / (sizeof(T) * 8);
662+
}
663+
655664
/*******
656665
* abs *
657666
*******/

0 commit comments

Comments
 (0)