Skip to content

Commit 40256dc

Browse files
authored
Merge pull request #1272 from AntoinePrv/arm-detection-new
Refactor arm detection
2 parents 23282a2 + 682ed06 commit 40256dc

File tree

4 files changed

+266
-27
lines changed

4 files changed

+266
-27
lines changed

include/xsimd/config/xsimd_config.hpp

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,25 +357,50 @@
357357
/**
358358
* @ingroup xsimd_config_macro
359359
*
360-
* Set to 1 if NEON64 is available at compile-time, to 0 otherwise.
360+
* Set to 1 if the target is in the ARM architecture family in 64 bits, to 0 otherwise
361361
*/
362362
#if defined(__aarch64__) || defined(_M_ARM64)
363-
#define XSIMD_WITH_NEON64 1
363+
#define XSIMD_TARGET_ARM64 1
364364
#else
365-
#define XSIMD_WITH_NEON64 0
365+
#define XSIMD_TARGET_ARM64 0
366+
#endif
367+
368+
/**
369+
* @ingroup xsimd_config_macro
370+
*
371+
* Set to 1 if the target is in the ARM architecture family, to 0 otherwise
372+
*/
373+
#if defined(__arm__) || defined(_M_ARM) || XSIMD_TARGET_ARM64
374+
#define XSIMD_TARGET_ARM 1
375+
#else
376+
#define XSIMD_TARGET_ARM 0
366377
#endif
367378

368379
/**
369380
* @ingroup xsimd_config_macro
370381
*
371382
* Set to 1 if NEON is available at compile-time, to 0 otherwise.
372383
*/
373-
#if (defined(__ARM_NEON) && __ARM_ARCH >= 7) || XSIMD_WITH_NEON64
384+
#if (defined(__ARM_NEON) && (__ARM_ARCH >= 7)) || XSIMD_TARGET_ARM64
374385
#define XSIMD_WITH_NEON 1
375386
#else
376387
#define XSIMD_WITH_NEON 0
377388
#endif
378389

390+
// Neon is always available on Arm64, though it is theoritially possible to compile
391+
// without it, such as -march=armv8-a+nosimd.
392+
// Note that MSVC may never define __ARM_NEON even when available.
393+
/**
394+
* @ingroup xsimd_config_macro
395+
*
396+
* Set to 1 if NEON64 is available at compile-time, to 0 otherwise.
397+
*/
398+
#if XSIMD_TARGET_ARM64
399+
#define XSIMD_WITH_NEON64 1
400+
#else
401+
#define XSIMD_WITH_NEON64 0
402+
#endif
403+
379404
/**
380405
* @ingroup xsimd_config_macro
381406
*
@@ -497,4 +522,15 @@
497522
#define XSIMD_NO_SUPPORTED_ARCHITECTURE
498523
#endif
499524

525+
/**
526+
* @ingroup xsimd_config_macro
527+
*
528+
* Set to 1 if the target is a linux
529+
*/
530+
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
531+
#define XSIMD_HAVE_LINUX_GETAUXVAL 1
532+
#else
533+
#define XSIMD_HAVE_LINUX_GETAUXVAL 0
534+
#endif
535+
500536
#endif
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
***************************************************************************/
11+
12+
#ifndef XSIMD_CPU_FEATURES_ARM_HPP
13+
#define XSIMD_CPU_FEATURES_ARM_HPP
14+
15+
#include "./xsimd_config.hpp"
16+
17+
#if XSIMD_TARGET_ARM && XSIMD_HAVE_LINUX_GETAUXVAL
18+
#include "../utils/bits.hpp"
19+
#include "./xsimd_getauxval.hpp"
20+
21+
// HWCAP_XXX masks to use on getauxval results.
22+
// Header does not exists on all architectures and masks are architecture
23+
// specific.
24+
#include <asm/hwcap.h>
25+
26+
// Port possibly missing mask. Should only be defined on Arm64.
27+
#if XSIMD_TARGET_ARM64 && !defined(HWCAP2_I8MM)
28+
#define HWCAP2_I8MM (1 << 13)
29+
#endif
30+
#endif // XSIMD_TARGET_ARM && XSIMD_HAVE_LINUX_GETAUXVAL
31+
32+
namespace xsimd
33+
{
34+
/**
35+
* An opinionated CPU feature detection utility for ARM.
36+
*
37+
* Combines compile-time knowledge with runtime detection when available.
38+
* On Linux, runtime detection uses getauxval to query the auxiliary vector.
39+
* On other platforms, only compile-time information is used.
40+
*
41+
* This is well defined on all architectures.
42+
* It will always return false on non-ARM architectures.
43+
*/
44+
class arm_cpu_features
45+
{
46+
public:
47+
arm_cpu_features() noexcept = default;
48+
49+
inline bool neon() const noexcept
50+
{
51+
#if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_HAVE_LINUX_GETAUXVAL
52+
return hwcap().has_feature(HWCAP_NEON);
53+
#else
54+
return static_cast<bool>(XSIMD_WITH_NEON);
55+
#endif
56+
}
57+
58+
constexpr bool neon64() const noexcept
59+
{
60+
return static_cast<bool>(XSIMD_WITH_NEON64);
61+
}
62+
63+
inline bool sve() const noexcept
64+
{
65+
#if XSIMD_TARGET_ARM64 && XSIMD_HAVE_LINUX_GETAUXVAL
66+
return hwcap().has_feature(HWCAP_SVE);
67+
#else
68+
return false;
69+
#endif
70+
}
71+
72+
inline bool i8mm() const noexcept
73+
{
74+
#if XSIMD_TARGET_ARM64 && XSIMD_HAVE_LINUX_GETAUXVAL
75+
return hwcap2().has_feature(HWCAP2_I8MM);
76+
#else
77+
return false;
78+
#endif
79+
}
80+
81+
private:
82+
#if XSIMD_TARGET_ARM && XSIMD_HAVE_LINUX_GETAUXVAL
83+
enum class status
84+
{
85+
hwcap_valid = 0,
86+
hwcap2_valid = 1,
87+
};
88+
89+
using status_bitset = utils::uint_bitset<status, std::uint32_t>;
90+
91+
mutable status_bitset m_status {};
92+
93+
mutable xsimd::linux_auxval m_hwcap {};
94+
95+
inline xsimd::linux_auxval const& hwcap() const noexcept
96+
{
97+
if (!m_status.bit_is_set<status::hwcap_valid>())
98+
{
99+
m_hwcap = xsimd::linux_auxval::read(AT_HWCAP);
100+
m_status.set_bit<status::hwcap_valid>();
101+
}
102+
return m_hwcap;
103+
}
104+
105+
#if XSIMD_TARGET_ARM64
106+
mutable xsimd::linux_auxval m_hwcap2 {};
107+
108+
inline xsimd::linux_auxval const& hwcap2() const noexcept
109+
{
110+
if (!m_status.bit_is_set<status::hwcap2_valid>())
111+
{
112+
m_hwcap2 = xsimd::linux_auxval::read(AT_HWCAP2);
113+
m_status.set_bit<status::hwcap2_valid>();
114+
}
115+
return m_hwcap2;
116+
}
117+
#endif
118+
#endif // XSIMD_TARGET_ARM && XSIMD_HAVE_LINUX_GETAUXVAL
119+
};
120+
}
121+
#endif

include/xsimd/config/xsimd_cpuid.hpp

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,13 @@
1313
#define XSIMD_CPUID_HPP
1414

1515
#include "../types/xsimd_all_registers.hpp"
16+
#include "./xsimd_cpu_features_arm.hpp"
1617
#include "./xsimd_cpu_features_x86.hpp"
17-
#include "xsimd_inline.hpp"
18+
#include "./xsimd_inline.hpp"
1819

19-
#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
20+
#if XSIMD_HAVE_LINUX_GETAUXVAL && defined(__riscv_vector)
2021
#include <asm/hwcap.h>
2122
#include <sys/auxv.h>
22-
23-
#ifndef HWCAP2_I8MM
24-
#define HWCAP2_I8MM (1 << 13)
25-
#endif
26-
2723
#endif
2824

2925
namespace xsimd
@@ -92,29 +88,24 @@ namespace xsimd
9288
vsx = 1;
9389
#endif
9490

95-
#if defined(__aarch64__) || defined(_M_ARM64)
96-
neon = 1;
97-
neon64 = 1;
98-
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
99-
i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
100-
sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
101-
#endif
102-
103-
#elif defined(__ARM_NEON) || defined(_M_ARM)
91+
#if XSIMD_HAVE_LINUX_GETAUXVAL
92+
#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
10493

105-
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
106-
neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON);
107-
#endif
108-
109-
#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
110-
111-
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
11294
#ifndef HWCAP_V
11395
#define HWCAP_V (1 << ('V' - 'A'))
11496
#endif
11597
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
11698
#endif
11799
#endif
100+
101+
// Safe on all platforms, it will be all false if non arm.
102+
const auto arm_cpu = xsimd::arm_cpu_features();
103+
104+
neon = arm_cpu.neon();
105+
neon64 = arm_cpu.neon64();
106+
i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm();
107+
sve = arm_cpu.sve();
108+
118109
// Safe on all platforms, it will be all false if non x86.
119110
const auto x86_cpu = xsimd::x86_cpu_features();
120111

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
***************************************************************************/
11+
12+
#ifndef XSIMD_GETAUXVAL_HPP
13+
#define XSIMD_GETAUXVAL_HPP
14+
15+
#include "./xsimd_config.hpp"
16+
17+
#if XSIMD_HAVE_LINUX_GETAUXVAL
18+
#include <sys/auxv.h> // getauxval
19+
#endif
20+
21+
namespace xsimd
22+
{
23+
namespace detail
24+
{
25+
using linux_getauxval_t = unsigned long;
26+
27+
inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept;
28+
}
29+
30+
/*
31+
* Holds the value of a Linux auxiliary vector entry (e.g. AT_HWCAP).
32+
*
33+
* On Linux systems, the kernel exposes some CPU features through the
34+
* auxiliary vector, which can be queried via `getauxval(AT_HWCAP)`.
35+
* Well defined on all platforms, and will return always falsw on
36+
* non-linux platforms.
37+
*
38+
* Usage:
39+
* auto hwcap = linux_auxval::read(AT_HWCAP);
40+
* bool neon = hwcap.has_feature(HWCAP_NEON);
41+
*
42+
* @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt
43+
*/
44+
class linux_auxval
45+
{
46+
private:
47+
using getauxval_t = detail::linux_getauxval_t;
48+
49+
public:
50+
constexpr linux_auxval() noexcept = default;
51+
52+
inline static linux_auxval read(getauxval_t type) noexcept
53+
{
54+
return linux_auxval(detail::linux_getauxval(type));
55+
}
56+
57+
constexpr bool has_feature(getauxval_t feat) const noexcept
58+
{
59+
return (m_auxval & feat) == feat;
60+
}
61+
62+
private:
63+
getauxval_t m_auxval = {};
64+
65+
constexpr explicit linux_auxval(getauxval_t v) noexcept
66+
: m_auxval(v)
67+
{
68+
}
69+
};
70+
71+
/********************
72+
* Implementation *
73+
********************/
74+
75+
namespace detail
76+
{
77+
#if XSIMD_HAVE_LINUX_GETAUXVAL
78+
inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept
79+
{
80+
return getauxval(type);
81+
}
82+
#else
83+
inline linux_getauxval_t linux_getauxval(linux_getauxval_t) noexcept
84+
{
85+
return {}; // All bits set to 0
86+
}
87+
#endif
88+
}
89+
}
90+
91+
#endif

0 commit comments

Comments
 (0)