Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion include/xsimd/arch/generic/xsimd_generic_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ namespace xsimd

// load
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> load(bool const* mem, batch_bool<T, A>, requires_arch<generic>) noexcept
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<generic>) noexcept
{
using batch_type = batch<T, A>;
batch_type ref(0);
Expand All @@ -273,6 +273,12 @@ namespace xsimd
return ref != batch_type::load_aligned(&buffer[0]);
}

template <class A, class T>
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> b, requires_arch<generic>) noexcept
{
return load_unaligned(mem, b, A {});
}

// load_aligned
namespace detail
{
Expand Down
72 changes: 36 additions & 36 deletions include/xsimd/arch/xsimd_avx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,44 +633,26 @@ namespace xsimd
template <class T, class A, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<avx>) noexcept
{
alignas(A::alignment()) static const uint32_t lut32[] = {
0x00000000,
0x000000FF,
0x0000FF00,
0x0000FFFF,
0x00FF0000,
0x00FF00FF,
0x00FFFF00,
0x00FFFFFF,
0xFF000000,
0xFF0000FF,
0xFF00FF00,
0xFF00FFFF,
0xFFFF0000,
0xFFFF00FF,
0xFFFFFF00,
0xFFFFFFFF,
};
alignas(A::alignment()) static const uint64_t lut64[] = {
0x0000000000000000ul,
0x000000000000FFFFul,
0x00000000FFFF0000ul,
0x00000000FFFFFFFFul,
0x0000FFFF00000000ul,
0x0000FFFF0000FFFFul,
0x0000FFFFFFFF0000ul,
0x0000FFFFFFFFFFFFul,
0xFFFF000000000000ul,
0xFFFF00000000FFFFul,
0xFFFF0000FFFF0000ul,
0xFFFF0000FFFFFFFFul,
0xFFFFFFFF00000000ul,
0xFFFFFFFF0000FFFFul,
0xFFFFFFFFFFFF0000ul,
0xFFFFFFFFFFFFFFFFul,
};
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
alignas(A::alignment()) static const uint32_t lut32[] = {
0x00000000,
0x000000FF,
0x0000FF00,
0x0000FFFF,
0x00FF0000,
0x00FF00FF,
0x00FFFF00,
0x00FFFFFF,
0xFF000000,
0xFF0000FF,
0xFF00FF00,
0xFF00FFFF,
0xFFFF0000,
0xFFFF00FF,
0xFFFFFF00,
0xFFFFFFFF,
};
assert(!(mask & ~0xFFFFFFFFul) && "inbound mask");
return _mm256_setr_epi32(lut32[mask & 0xF], lut32[(mask >> 4) & 0xF],
lut32[(mask >> 8) & 0xF], lut32[(mask >> 12) & 0xF],
Expand All @@ -679,6 +661,24 @@ namespace xsimd
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
alignas(A::alignment()) static const uint64_t lut64[] = {
0x0000000000000000ul,
0x000000000000FFFFul,
0x00000000FFFF0000ul,
0x00000000FFFFFFFFul,
0x0000FFFF00000000ul,
0x0000FFFF0000FFFFul,
0x0000FFFFFFFF0000ul,
0x0000FFFFFFFFFFFFul,
0xFFFF000000000000ul,
0xFFFF00000000FFFFul,
0xFFFF0000FFFF0000ul,
0xFFFF0000FFFFFFFFul,
0xFFFFFFFF00000000ul,
0xFFFFFFFF0000FFFFul,
0xFFFFFFFFFFFF0000ul,
0xFFFFFFFFFFFFFFFFul,
};
assert(!(mask & ~0xFFFFul) && "inbound mask");
return _mm256_setr_epi64x(lut64[mask & 0xF], lut64[(mask >> 4) & 0xF], lut64[(mask >> 8) & 0xF], lut64[(mask >> 12) & 0xF]);
}
Expand Down
3 changes: 2 additions & 1 deletion include/xsimd/arch/xsimd_avx2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,16 +731,17 @@ namespace xsimd
template <class A, class T, bool... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx2>) noexcept
{
constexpr int mask = batch_bool_constant<T, A, Values...>::mask();
// FIXME: for some reason mask here is not considered as an immediate,
// but it's okay for _mm256_blend_epi32
// case 2: return _mm256_blend_epi16(false_br, true_br, mask);
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
constexpr int mask = batch_bool_constant<T, A, Values...>::mask();
return _mm256_blend_epi32(false_br, true_br, mask);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
constexpr int mask = batch_bool_constant<T, A, Values...>::mask();
constexpr int imask = detail::interleave(mask);
return _mm256_blend_epi32(false_br, true_br, imask);
}
Expand Down
23 changes: 11 additions & 12 deletions include/xsimd/arch/xsimd_avx512bw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,19 +316,18 @@ namespace xsimd
}

// load
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> load(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 64, void>::type>
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
{
using register_type = typename batch_bool<T, A>::register_type;
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size == 64)
{
__m512i bool_val = _mm512_loadu_si512((__m512i const*)mem);
return (register_type)_mm512_cmpgt_epu8_mask(bool_val, _mm512_setzero_si512());
}
else
{
return load(mem, batch_bool<T, A>(), avx512dq {});
}
__m512i bool_val = _mm512_loadu_si512((__m512i const*)mem);
return _mm512_cmpgt_epu8_mask(bool_val, _mm512_setzero_si512());
}

template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 64, void>::type>
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
{
__m512i bool_val = _mm512_load_si512((__m512i const*)mem);
return _mm512_cmpgt_epu8_mask(bool_val, _mm512_setzero_si512());
}

// max
Expand Down
6 changes: 3 additions & 3 deletions include/xsimd/arch/xsimd_avx512f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,14 +1209,14 @@ namespace xsimd
}
}

// load
// load mask
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> load(bool const* mem, batch_bool<T, A>, requires_arch<avx512f>) noexcept
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512f>) noexcept
{
using register_type = typename batch_bool<T, A>::register_type;
constexpr auto size = batch_bool<T, A>::size;
constexpr auto iter = size / 8;
static_assert(size % 8 == 0, "incorrect size of bool batch");
static_assert((size % 8) == 0, "incorrect size of bool batch");
register_type mask = 0;
for (std::size_t i = 0; i < iter; ++i)
{
Expand Down
4 changes: 2 additions & 2 deletions include/xsimd/types/xsimd_batch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -968,13 +968,13 @@ namespace xsimd
template <class T, class A>
XSIMD_INLINE batch_bool<T, A> batch_bool<T, A>::load_aligned(bool const* mem) noexcept
{
return kernel::load<A>(mem, batch_bool<T, A>(), A {});
return kernel::load_aligned<A>(mem, batch_bool<T, A>(), A {});
}

template <class T, class A>
XSIMD_INLINE batch_bool<T, A> batch_bool<T, A>::load_unaligned(bool const* mem) noexcept
{
return load_aligned(mem);
return kernel::load_unaligned<A>(mem, batch_bool<T, A>(), A {});
}

/**
Expand Down
8 changes: 4 additions & 4 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU"
# Users may override the c++ standard:
if(NOT DEFINED CMAKE_CXX_STANDARD OR "${CMAKE_CXX_STANDARD}" STREQUAL "")
if (ENABLE_XTL_COMPLEX)
CHECK_CXX_COMPILER_FLAG("-std=c++14" HAS_CPP14_FLAG)
if (NOT HAS_CPP14_FLAG)
message(FATAL_ERROR "Unsupported compiler -- xsimd requires C++14 support when xtl complex support is enabled")
CHECK_CXX_COMPILER_FLAG("-std=c++17" HAS_CPP17_FLAG)
if (NOT HAS_CPP17_FLAG)
message(FATAL_ERROR "Unsupported compiler -- xsimd requires C++17 support when xtl complex support is enabled")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
else()
CHECK_CXX_COMPILER_FLAG("-std=c++11" HAS_CPP11_FLAG)
if (NOT HAS_CPP11_FLAG)
Expand Down