1313#define XSIMD_AVX2_HPP
1414
1515#include " ../types/xsimd_avx2_register.hpp"
16+ #include " ../types/xsimd_avx512vl_register.hpp"
1617#include " ../types/xsimd_batch_constant.hpp"
1718#include " ./utils/shifts.hpp"
1819
@@ -138,7 +139,8 @@ namespace xsimd
138139 }
139140
140141 // single templated implementation for integer masked loads (32/64-bit)
141- template <class A , class T , bool ... Values, class Mode >
142+ template <class A , class T , bool ... Values, class Mode ,
143+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
142144 XSIMD_INLINE std::enable_if_t <std::is_integral<T>::value && (sizeof (T) >= 4 ), batch<T, A>>
143145 load_masked (T const * mem, batch_bool_constant<T, A, Values...> mask, convert<T>, Mode, requires_arch<avx2>) noexcept
144146 {
@@ -148,26 +150,30 @@ namespace xsimd
148150 return detail::maskload (reinterpret_cast <const int_t *>(mem), mask.as_batch ());
149151 }
150152
151- template <class A , bool ... Values, class Mode >
153+ template <class A , bool ... Values, class Mode ,
154+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
152155 XSIMD_INLINE batch<int32_t , A> load_masked (int32_t const * mem, batch_bool_constant<int32_t , A, Values...> mask, convert<int32_t >, Mode, requires_arch<avx2>) noexcept
153156 {
154157 return load_masked<A, int32_t >(mem, mask, convert<int32_t > {}, Mode {}, avx2 {});
155158 }
156159
157- template <class A , bool ... Values, class Mode >
160+ template <class A , bool ... Values, class Mode ,
161+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
158162 XSIMD_INLINE batch<uint32_t , A> load_masked (uint32_t const * mem, batch_bool_constant<uint32_t , A, Values...>, convert<uint32_t >, Mode, requires_arch<avx2>) noexcept
159163 {
160164 const auto r = load_masked<A, int32_t >(reinterpret_cast <int32_t const *>(mem), batch_bool_constant<int32_t , A, Values...> {}, convert<int32_t > {}, Mode {}, avx2 {});
161165 return bitwise_cast<uint32_t >(r);
162166 }
163167
164- template <class A , bool ... Values, class Mode >
168+ template <class A , bool ... Values, class Mode ,
169+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
165170 XSIMD_INLINE batch<int64_t , A> load_masked (int64_t const * mem, batch_bool_constant<int64_t , A, Values...> mask, convert<int64_t >, Mode, requires_arch<avx2>) noexcept
166171 {
167172 return load_masked<A, int64_t >(mem, mask, convert<int64_t > {}, Mode {}, avx2 {});
168173 }
169174
170- template <class A , bool ... Values, class Mode >
175+ template <class A , bool ... Values, class Mode ,
176+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
171177 XSIMD_INLINE batch<uint64_t , A> load_masked (uint64_t const * mem, batch_bool_constant<uint64_t , A, Values...>, convert<uint64_t >, Mode, requires_arch<avx2>) noexcept
172178 {
173179 const auto r = load_masked<A, int64_t >(reinterpret_cast <int64_t const *>(mem), batch_bool_constant<int64_t , A, Values...> {}, convert<int64_t > {}, Mode {}, avx2 {});
@@ -190,39 +196,44 @@ namespace xsimd
190196 }
191197 }
192198
193- template <class A , class T , bool ... Values, class Mode >
199+ template <class A , class T , bool ... Values, class Mode ,
200+ typename = std::enable_if_t <std::is_integral<T>::value && (sizeof (T) >= 4 ) && std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
194201 XSIMD_INLINE void store_masked (T* mem, batch<T, A> const & src, batch_bool_constant<T, A, Values...> mask, Mode, requires_arch<avx2>) noexcept
195202 {
196203 constexpr size_t lanes_per_half = batch<T, A>::size / 2 ;
204+ using half_batch = ::xsimd::make_sized_batch_t <T, lanes_per_half>;
205+ using half_arch = typename half_batch::arch_type;
197206
198- // confined to lower 128-bit half → forward to SSE
207+ // lower 128-bit half
199208 XSIMD_IF_CONSTEXPR (mask.countl_zero () >= lanes_per_half)
200209 {
201- constexpr auto mlo = ::xsimd::detail::lower_half<sse4_2 >(mask);
202- const auto lo = detail::lower_half (src);
203- store_masked<sse4_2 >(mem, lo, mlo, Mode {}, sse4_2 {});
210+ constexpr auto mlo = ::xsimd::detail::lower_half<half_arch >(mask);
211+ const half_batch lo = detail::lower_half (src);
212+ store_masked<half_arch >(mem, lo, mlo, Mode {}, half_arch {});
204213 }
205- // confined to upper 128-bit half → forward to SSE
214+ // upper 128-bit half
206215 else XSIMD_IF_CONSTEXPR (mask.countr_zero () >= lanes_per_half)
207216 {
208- constexpr auto mhi = ::xsimd::detail::upper_half<sse4_2 >(mask);
209- const auto hi = detail::upper_half (src);
210- store_masked<sse4_2 >(mem + lanes_per_half, hi, mhi, Mode {}, sse4_2 {});
217+ constexpr auto mhi = ::xsimd::detail::upper_half<half_arch >(mask);
218+ const half_batch hi = detail::upper_half (src);
219+ store_masked<half_arch >(mem + lanes_per_half, hi, mhi, Mode {}, half_arch {});
211220 }
212221 else
213222 {
214223 detail::maskstore<T, A>(mem, mask.as_batch (), src);
215224 }
216225 }
217226
218- template <class A , bool ... Values, class Mode >
219- XSIMD_INLINE void store_masked (uint32_t * mem, batch<uint32_t , A> const & src, batch_bool_constant<uint32_t , A, Values...> mask, Mode, requires_arch<avx2>) noexcept
227+ template <class A , bool ... Values, class Mode ,
228+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
229+ XSIMD_INLINE void store_masked (uint32_t * mem, batch<uint32_t , A> const & src, batch_bool_constant<uint32_t , A, Values...>, Mode, requires_arch<avx2>) noexcept
220230 {
221231 const auto s32 = bitwise_cast<int32_t >(src);
222- store_masked<A>(reinterpret_cast <int32_t *>(mem), s32, mask , Mode {}, avx2 {});
232+ store_masked<A>(reinterpret_cast <int32_t *>(mem), s32, batch_bool_constant< int32_t , A, Values...> {} , Mode {}, avx2 {});
223233 }
224234
225- template <class A , bool ... Values, class Mode >
235+ template <class A , bool ... Values, class Mode ,
236+ class = std::enable_if_t <std::is_base_of<avx2, A>::value && !std::is_base_of<avx512vl_256, A>::value>>
226237 XSIMD_INLINE void store_masked (uint64_t * mem, batch<uint64_t , A> const & src, batch_bool_constant<uint64_t , A, Values...>, Mode, requires_arch<avx2>) noexcept
227238 {
228239 const auto s64 = bitwise_cast<int64_t >(src);
0 commit comments