Skip to content

Commit eae2990

Browse files
Tentative support for avx512vl extensions to 256 bit registers
In addition to missing instructions (e.g. bas on int64_t etc) this mostly changes the mask representation from vector register to scalar, thus the big diff.
1 parent 60ca510 commit eae2990

7 files changed

Lines changed: 755 additions & 2 deletions

File tree

.github/workflows/linux.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ jobs:
3232
- { compiler: 'clang', version: '18', flags: 'avx512' }
3333
- { compiler: 'clang', version: '18', flags: 'avx_128' }
3434
- { compiler: 'clang', version: '18', flags: 'avx2_128' }
35+
- { compiler: 'clang', version: '18', flags: 'avx512vl_256' }
3536
steps:
3637
- name: Setup compiler
3738
if: ${{ matrix.sys.compiler == 'gcc' }}
@@ -94,6 +95,9 @@ jobs:
9495
if [[ '${{ matrix.sys.flags }}' == 'avx512' ]]; then
9596
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512"
9697
fi
98+
if [[ '${{ matrix.sys.flags }}' == 'avx512vl_256' ]]; then
99+
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512 -DXSIMD_DEFAULT_ARCH=avx512vl_256"
100+
fi
97101
if [[ '${{ matrix.sys.flags }}' == 'avx512pf' ]]; then
98102
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=knl"
99103
fi

include/xsimd/arch/xsimd_avx512vl_256.hpp

Lines changed: 728 additions & 0 deletions
Large diffs are not rendered by default.

include/xsimd/arch/xsimd_isa.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575

7676
#if XSIMD_WITH_AVX512VL
7777
#include "./xsimd_avx512vl.hpp"
78+
#include "./xsimd_avx512vl_256.hpp"
7879
#endif
7980

8081
#if XSIMD_WITH_AVX512DQ

include/xsimd/config/xsimd_arch.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ namespace xsimd
163163

164164
using all_x86_architectures = arch_list<
165165
avx512vnni<avx512vbmi2>, avx512vbmi2, avx512vbmi, avx512ifma, avx512pf, avx512vnni<avx512bw>, avx512bw, avx512er, avx512dq, avx512vl, avx512cd, avx512f,
166-
avxvnni, fma3<avx2>, avx2, fma3<avx>, avx, avx2_128, avx_128, fma4, fma3<sse4_2>,
166+
avxvnni, avx512vl_256, fma3<avx2>, avx2, fma3<avx>, avx, avx2_128, avx_128, fma4, fma3<sse4_2>,
167167
sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
168168

169169
using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;

include/xsimd/config/xsimd_cpu_features_x86.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,8 @@ namespace xsimd
895895

896896
inline bool avx512vl() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512vl>(); }
897897

898+
inline bool avx512vl_256() const noexcept { return avx512_enabled() && osxsave() && leaf7().all_bits_set<x86_cpuid_leaf7::ebx::avx512vl>(); }
899+
898900
inline bool avx512vbmi() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512vbmi>(); }
899901

900902
inline bool avx512vbmi2() const noexcept { return avx512_enabled() && leaf7().all_bits_set<x86_cpuid_leaf7::ecx::avx512vbmi2>(); }

include/xsimd/types/xsimd_avx512f_register.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define XSIMD_AVX512F_REGISTER_HPP
1414

1515
#include "./xsimd_common_arch.hpp"
16+
#include "./xsimd_fma3_avx2_register.hpp"
1617

1718
namespace xsimd
1819
{
@@ -69,7 +70,6 @@ namespace xsimd
6970
XSIMD_DECLARE_SIMD_REGISTER(long long int, avx512f, __m512i);
7071
XSIMD_DECLARE_SIMD_REGISTER(float, avx512f, __m512);
7172
XSIMD_DECLARE_SIMD_REGISTER(double, avx512f, __m512d);
72-
7373
}
7474
#endif
7575
}

include/xsimd/types/xsimd_avx512vl_register.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,18 @@ namespace xsimd
2929
static constexpr char const* name() noexcept { return "avx512vl"; }
3030
};
3131

32+
/**
33+
* @ingroup architectures
34+
*
35+
* AVX512VL instructions extension for 256 bits registers
36+
*/
37+
struct avx512vl_256 : fma3<avx2>
38+
{
39+
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VL; }
40+
static constexpr bool available() noexcept { return true; }
41+
static constexpr char const* name() noexcept { return "avx512vl/256"; }
42+
};
43+
3244
#if XSIMD_WITH_AVX512VL
3345

3446
#if !XSIMD_WITH_AVX512CD
@@ -45,6 +57,12 @@ namespace xsimd
4557

4658
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vl, avx512cd);
4759

60+
template <class T>
61+
struct get_bool_simd_register<T, avx512vl_256>
62+
{
63+
using type = simd_avx512_bool_register<T>;
64+
};
65+
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vl_256, avx2);
4866
}
4967
#endif
5068
}

0 commit comments

Comments
 (0)