Skip to content

Commit 81879b7

Browse files
committed
Fix more
1 parent ec071a8 commit 81879b7

2 files changed

Lines changed: 14 additions & 12 deletions

File tree

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,20 +1035,21 @@ namespace xsimd
10351035
XSIMD_INLINE void store_masked(T* mem, batch<T, A> const& src, batch_bool_constant<T, A, Values...> mask, Mode, requires_arch<avx>) noexcept
10361036
{
10371037
constexpr size_t half_size = batch<T, A>::size / 2;
1038+
using half_arch = avx_128;
10381039

10391040
// confined to lower 128-bit half → forward to 128 bit
10401041
XSIMD_IF_CONSTEXPR(mask.countl_zero() >= half_size)
10411042
{
1042-
constexpr auto mlo = ::xsimd::detail::lower_half<sse4_2>(mask);
1043-
const auto lo = xsimd::batch<T, sse4_2>(detail::lower_half(src));
1044-
store_masked<avx_128>(mem, lo, mlo, Mode {}, sse4_2 {});
1043+
constexpr auto mlo = ::xsimd::detail::lower_half<half_arch>(mask);
1044+
const auto lo = xsimd::batch<T, half_arch>(detail::lower_half(src));
1045+
store_masked<half_arch>(mem, lo, mlo, Mode {}, half_arch {});
10451046
}
10461047
// confined to upper 128-bit half → forward to 128 bit
10471048
else XSIMD_IF_CONSTEXPR(mask.countr_zero() >= half_size)
10481049
{
1049-
constexpr auto mhi = ::xsimd::detail::upper_half<sse4_2>(mask);
1050-
const auto hi = xsimd::batch<T, sse4_2>(detail::upper_half(src));
1051-
store_masked<avx_128>(mem + half_size, hi, mhi, Mode {}, sse4_2 {});
1050+
constexpr auto mhi = ::xsimd::detail::upper_half<half_arch>(mask);
1051+
const auto hi = xsimd::batch<T, half_arch>(detail::upper_half(src));
1052+
store_masked<half_arch>(mem + half_size, hi, mhi, Mode {}, half_arch {});
10521053
}
10531054
else
10541055
{

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,20 +195,21 @@ namespace xsimd
195195
XSIMD_INLINE void store_masked(T* mem, batch<T, A> const& src, batch_bool_constant<T, A, Values...> mask, Mode, requires_arch<avx2>) noexcept
196196
{
197197
constexpr size_t lanes_per_half = batch<T, A>::size / 2;
198+
using half_arch = avx2_128;
198199

199200
// confined to lower 128-bit half → forward to SSE
200201
XSIMD_IF_CONSTEXPR(mask.countl_zero() >= lanes_per_half)
201202
{
202-
constexpr auto mlo = ::xsimd::detail::lower_half<sse4_2>(mask);
203-
const auto lo = detail::lower_half(src);
204-
store_masked<sse4_2>(mem, lo, mlo, Mode {}, sse4_2 {});
203+
constexpr auto mlo = ::xsimd::detail::lower_half<half_arch>(mask);
204+
const auto lo = xsimd::batch<T, half_arch>(detail::lower_half(src));
205+
store_masked<half_arch>(mem, lo, mlo, Mode {}, half_arch {});
205206
}
206207
// confined to upper 128-bit half → forward to SSE
207208
else XSIMD_IF_CONSTEXPR(mask.countr_zero() >= lanes_per_half)
208209
{
209-
constexpr auto mhi = ::xsimd::detail::upper_half<sse4_2>(mask);
210-
const auto hi = detail::upper_half(src);
211-
store_masked<sse4_2>(mem + lanes_per_half, hi, mhi, Mode {}, sse4_2 {});
210+
constexpr auto mhi = ::xsimd::detail::upper_half<half_arch>(mask);
211+
const auto hi = xsimd::batch<T, half_arch>(detail::upper_half(src));
212+
store_masked<half_arch>(mem + lanes_per_half, hi, mhi, Mode {}, half_arch {});
212213
}
213214
else
214215
{

0 commit comments

Comments
 (0)