Skip to content

Commit 7c36cbc

Browse files
committed
chore: misc small fixes touched while debugging masked memory
- sse2 store_masked: align call signature to the (Mode, common{}) shape - avx512vl_register: doc comment AVX512DQ -> AVX512VL typo - isa.hpp: include _128 headers before wider arch so the AVX half-fold's recursive call resolves at parse time (load-bearing for the upcoming perf change); wrap in clang-format off/on - avx_128 swizzle: split the dynamic-mask overload into per-type ovlds for true C++14 builds where XSIMD_IF_CONSTEXPR is plain 'if' and both branches must type-check
1 parent 934bddc commit 7c36cbc

4 files changed

Lines changed: 23 additions & 18 deletions

File tree

include/xsimd/arch/xsimd_avx_128.hpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -129,20 +129,20 @@ namespace xsimd
129129
}
130130

131131
// swizzle (dynamic mask)
132-
template <class A, class T, class ITy, class = std::enable_if_t<std::is_floating_point<T>::value && sizeof(T) == sizeof(ITy)>>
133-
XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& self, batch<ITy, A> mask, requires_arch<avx_128>) noexcept
134-
{
135-
XSIMD_IF_CONSTEXPR(std::is_same<T, float>::value)
136-
{
137-
return _mm_permutevar_ps(self, mask);
138-
}
139-
else
140-
{
141-
// VPERMILPD's variable control reads bit 1 of each 64-bit selector
142-
// (bit 0 is ignored), so a {0,1} index needs to become {0,2}.
143-
// Negation is a cheap alternative to a left shift by 1.
144-
return _mm_permutevar_pd(self, -mask);
145-
}
132+
template <class A, class ITy>
133+
XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch<ITy, A> mask, requires_arch<avx_128>) noexcept
134+
{
135+
static_assert(sizeof(float) == sizeof(ITy), "index type must match value width");
136+
return _mm_permutevar_ps(self, mask);
137+
}
138+
template <class A, class ITy>
139+
XSIMD_INLINE batch<double, A> swizzle(batch<double, A> const& self, batch<ITy, A> mask, requires_arch<avx_128>) noexcept
140+
{
141+
static_assert(sizeof(double) == sizeof(ITy), "index type must match value width");
142+
// VPERMILPD's variable control reads bit 1 of each 64-bit selector
143+
// (bit 0 is ignored), so a {0,1} index needs to become {0,2}.
144+
// Negation is a cheap alternative to a left shift by 1.
145+
return _mm_permutevar_pd(self, -mask);
146146
}
147147

148148
// swizzle (constant mask)

include/xsimd/arch/xsimd_isa.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,11 @@
4848
#endif
4949

5050
#if XSIMD_WITH_AVX
51-
#include "./xsimd_avx.hpp"
51+
// clang-format off
52+
// _128 first: avx half-fold recursive call needs avx_128 visible at parse time.
5253
#include "./xsimd_avx_128.hpp"
54+
#include "./xsimd_avx.hpp"
55+
// clang-format on
5356
#endif
5457

5558
#if XSIMD_WITH_FMA3_AVX
@@ -61,8 +64,10 @@
6164
#endif
6265

6366
#if XSIMD_WITH_AVX2
64-
#include "./xsimd_avx2.hpp"
67+
// clang-format off
6568
#include "./xsimd_avx2_128.hpp"
69+
#include "./xsimd_avx2.hpp"
70+
// clang-format on
6671
#endif
6772

6873
#if XSIMD_WITH_FMA3_AVX2

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2331,7 +2331,7 @@ namespace xsimd
23312331
}
23322332
else
23332333
{
2334-
store_masked<A>(mem, src, mask, requires_arch<common> {});
2334+
store_masked<A>(mem, src, mask, aligned_mode {}, common {});
23352335
}
23362336
}
23372337

include/xsimd/types/xsimd_avx512vl_register.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace xsimd
2020
/**
2121
* @ingroup architectures
2222
*
23-
* AVX512DQ instructions
23+
* AVX512VL instructions
2424
*/
2525
struct avx512vl : avx512cd
2626
{

0 commit comments

Comments
 (0)