Skip to content

Commit 06dac6b

Browse files
committed
Address comments
Avoid `__builtin_popcountll` on ARMv7-A since it is entirely manual: <https://godbolt.org/z/vdM4f4n9G>.
1 parent fcd534f commit 06dac6b

File tree

5 files changed

+7
-31
lines changed

5 files changed

+7
-31
lines changed
File renamed without changes.

include/xsimd/arch/common/xsimd_common_logical.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#ifndef XSIMD_COMMON_LOGICAL_HPP
1313
#define XSIMD_COMMON_LOGICAL_HPP
1414

15-
#include "../../types/xsimd_bit.hpp"
15+
#include "./xsimd_common_bit.hpp"
1616
#include "./xsimd_common_details.hpp"
1717

1818
#include <climits>

include/xsimd/arch/xsimd_common.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define XSIMD_COMMON_HPP
1414

1515
#include "./common/xsimd_common_arithmetic.hpp"
16+
#include "./common/xsimd_common_bit.hpp"
1617
#include "./common/xsimd_common_cast.hpp"
1718
#include "./common/xsimd_common_complex.hpp"
1819
#include "./common/xsimd_common_logical.hpp"

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
#include <tuple>
1919
#include <type_traits>
2020

21-
#include "../types/xsimd_bit.hpp"
2221
#include "../types/xsimd_neon_register.hpp"
2322
#include "../types/xsimd_utils.hpp"
23+
#include "./common/xsimd_common_bit.hpp"
2424
#include "./common/xsimd_common_cast.hpp"
2525

2626
// Wrap intrinsics so we can pass them as function pointers
@@ -3362,36 +3362,12 @@ namespace xsimd
33623362
/*********
33633363
* count *
33643364
*********/
3365-
template <class A, class T, detail::enable_sized_t<T, 1> = 0>
3366-
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3367-
{
3368-
uint8x8_t narrowed = vshrn_n_u16(vreinterpretq_u16_u8(self), 4);
3369-
uint64_t result = vget_lane_u64(vreinterpret_u64_u8(narrowed), 0);
3370-
return xsimd::detail::popcount(result) / 4;
3371-
}
3372-
3373-
template <class A, class T, detail::enable_sized_t<T, 2> = 0>
3374-
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3375-
{
3376-
uint8x8_t narrowed = vmovn_u16(self);
3377-
uint64_t result = vget_lane_u64(vreinterpret_u64_u8(narrowed), 0);
3378-
return xsimd::detail::popcount(result) / 8;
3379-
}
3380-
3381-
template <class A, class T, detail::enable_sized_t<T, 4> = 0>
3382-
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3383-
{
3384-
uint16x4_t narrowed = vmovn_u32(self);
3385-
uint64_t result = vget_lane_u64(vreinterpret_u64_u16(narrowed), 0);
3386-
return xsimd::detail::popcount(result) / 16;
3387-
}
3388-
3389-
template <class A, class T, detail::enable_sized_t<T, 8> = 0>
3365+
template <class A, class T>
33903366
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
33913367
{
3392-
uint32x2_t narrowed = vmovn_u64(self);
3393-
uint64_t result = vget_lane_u64(vreinterpret_u64_u32(narrowed), 0);
3394-
return xsimd::detail::popcount(result) / 32;
3368+
uint8x16_t popcnts = vcntq_u8(bitwise_cast<uint8_t, T, A>(bitwise_cast<T, A>(self)));
3369+
uint64x2_t total = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(popcnts)));
3370+
return vget_lane_u64(vadd_u64(vgetq_low_u64(total), vgetq_high_u64(total)), 0) / (sizeof(T) * 8);
33953371
}
33963372

33973373
#define WRAP_MASK_OP(OP) \

include/xsimd/xsimd.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ namespace xsimd
7676
#else
7777
#include "types/xsimd_batch.hpp"
7878
#include "types/xsimd_batch_constant.hpp"
79-
#include "types/xsimd_bit.hpp"
8079
#include "types/xsimd_traits.hpp"
8180

8281
// This include must come last

0 commit comments

Comments
 (0)