Skip to content

Commit 83853c1

Browse files
cyb70289serge-sans-paille
authored andcommitted
Some avx512 bugfixes
Fixes #850
1 parent f47622a commit 83853c1

File tree

3 files changed

+13
-15
lines changed

3 files changed

+13
-15
lines changed

include/xsimd/arch/generic/xsimd_generic_memory.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ namespace xsimd
3434
inline batch<T, A> extract_pair(batch<T, A> const& self, batch<T, A> const& other, std::size_t i, requires_arch<generic>) noexcept
3535
{
3636
constexpr std::size_t size = batch<T, A>::size;
37-
assert(0 <= i && i < size && "index in bounds");
37+
assert(i < size && "index in bounds");
3838

3939
alignas(A::alignment()) T self_buffer[size];
4040
self.store_aligned(self_buffer);

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -362,11 +362,11 @@ namespace xsimd
362362
{
363363
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
364364
{
365-
return _mm512_mask_blend_epi8(cond, false_br, true_br);
365+
return _mm512_mask_blend_epi8(cond, false_br.data, true_br.data);
366366
}
367367
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
368368
{
369-
return _mm512_mask_blend_epi16(cond, false_br, true_br);
369+
return _mm512_mask_blend_epi16(cond, false_br.data, true_br.data);
370370
}
371371
else
372372
{
@@ -410,14 +410,14 @@ namespace xsimd
410410
batch<T, A> xx;
411411
if (N & 1)
412412
{
413-
alignas(32) uint64_t buffer[8];
413+
alignas(A::alignment()) uint64_t buffer[8];
414414
_mm512_store_epi64(&buffer[0], x);
415415
for (int i = 7; i > 0; --i)
416416
buffer[i] = (buffer[i] << 8) | (buffer[i - 1] >> 56);
417417
buffer[0] = buffer[0] << 8;
418418
xx = _mm512_load_epi64(&buffer[0]);
419419

420-
alignas(32) auto slide_perm = detail::make_slide_perm_hi(::xsimd::detail::make_index_sequence<512 / 64>());
420+
alignas(A::alignment()) auto slide_perm = detail::make_slide_perm_hi(::xsimd::detail::make_index_sequence<512 / 64>());
421421
__m512i xl = _mm512_slli_epi64(x, 8);
422422
__m512i xr = _mm512_srli_epi64(x, 56);
423423
xr = _mm512_permutex2var_epi64(xr, _mm512_load_epi64(slide_perm.data()), _mm512_setzero_si512());
@@ -429,8 +429,8 @@ namespace xsimd
429429
{
430430
xx = x;
431431
}
432-
alignas(32) auto slide_pattern = detail::make_slide_left_pattern<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
433-
alignas(32) auto slide_mask = detail::make_slide_left_mask<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
432+
alignas(A::alignment()) auto slide_pattern = detail::make_slide_left_pattern<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
433+
alignas(A::alignment()) auto slide_mask = detail::make_slide_left_mask<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
434434
return _mm512_and_si512(_mm512_permutexvar_epi16(_mm512_load_epi32(slide_pattern.data()), xx), _mm512_load_epi32(slide_mask.data()));
435435
}
436436

@@ -469,7 +469,7 @@ namespace xsimd
469469
batch<T, A> xx;
470470
if (N & 1)
471471
{
472-
alignas(32) auto slide_perm = detail::make_slide_perm_low(::xsimd::detail::make_index_sequence<512 / 64>());
472+
alignas(A::alignment()) auto slide_perm = detail::make_slide_perm_low(::xsimd::detail::make_index_sequence<512 / 64>());
473473
__m512i xr = _mm512_srli_epi64(x, 8);
474474
__m512i xl = _mm512_slli_epi64(x, 56);
475475
xl = _mm512_permutex2var_epi64(xl, _mm512_load_epi64(slide_perm.data()), _mm512_setzero_si512());
@@ -481,8 +481,8 @@ namespace xsimd
481481
{
482482
xx = x;
483483
}
484-
alignas(32) auto slide_pattern = detail::make_slide_right_pattern<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
485-
alignas(32) auto slide_mask = detail::make_slide_right_mask<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
484+
alignas(A::alignment()) auto slide_pattern = detail::make_slide_right_pattern<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
485+
alignas(A::alignment()) auto slide_mask = detail::make_slide_right_mask<N / 2>(::xsimd::detail::make_index_sequence<512 / 16>());
486486
return _mm512_and_si512(_mm512_permutexvar_epi16(_mm512_load_epi32(slide_pattern.data()), xx), _mm512_load_epi32(slide_mask.data()));
487487
}
488488

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,18 +1380,16 @@ namespace xsimd
13801380

13811381
// scatter
13821382
template <class A, class T,
1383-
typename std::enable_if<std::is_same<uint32_t, T>::value || std::is_same<int32_t, T>::value,
1384-
void>::type>
1383+
class = typename std::enable_if<std::is_same<uint32_t, T>::value || std::is_same<int32_t, T>::value, void>::type>
13851384
inline void scatter(batch<T, A> const& src, T* dst,
13861385
batch<int32_t, A> const& index,
13871386
kernel::requires_arch<avx512f>) noexcept
13881387
{
13891388
_mm512_i32scatter_epi32(dst, index, src, sizeof(T));
13901389
}
13911390

1392-
template <class T, class A,
1393-
typename std::enable_if<std::is_same<uint64_t, T>::value || std::is_same<int64_t, T>::value,
1394-
void>::type>
1391+
template <class A, class T,
1392+
class = typename std::enable_if<std::is_same<uint64_t, T>::value || std::is_same<int64_t, T>::value, void>::type>
13951393
inline void scatter(batch<T, A> const& src, T* dst,
13961394
batch<int64_t, A> const& index,
13971395
kernel::requires_arch<avx512f>) noexcept

0 commit comments

Comments
 (0)