@@ -132,14 +132,10 @@ auto swizzle_bytes(const xsimd::batch<uint8_t, Arch>& batch,
132132 constexpr auto kSelfSwizzle = array_to_batch_constant<kPlan .self_lane , Arch>();
133133 constexpr auto kCrossSwizzle = array_to_batch_constant<kPlan .cross_lane , Arch>();
134134
135- struct LaneMask {
136- static constexpr uint8_t get (uint8_t i, uint8_t n) {
137- constexpr auto kMask = std::array{kIdx ...};
138- return kMask [i] % (kMask .size () / 2 );
139- }
140- };
135+ constexpr auto kLaneMaskArr =
136+ std::array{static_cast <uint8_t >(kIdx % (mask.size / 2 ))...};
137+ constexpr auto kLaneMask = array_to_batch_constant<kLaneMaskArr , Arch>();
141138
142- constexpr auto kLaneMask = xsimd::make_batch_constant<uint8_t , Arch, LaneMask>();
143139 if constexpr (isOnlyFromLow (mask)) {
144140 auto broadcast = _mm256_permute2x128_si256 (batch, batch, 0x00 ); // [low | low]
145141 return _mm256_shuffle_epi8 (broadcast, kLaneMask .as_batch ());
@@ -163,17 +159,21 @@ constexpr auto make_mult(xsimd::batch_constant<Int, Arch, kShifts...>) {
163159 return xsimd::batch_constant<Int, Arch, static_cast <Int>(1u << kShifts )...>();
164160}
165161
166- template <typename Int, int kOffset , int kLength , Int... kVals >
167- struct SelectStride {
168- static constexpr auto kShiftsArr = std::array{kVals ...};
169-
170- static constexpr Int get (int i, int n) { return kShiftsArr [kLength * i + kOffset ]; }
171- };
162+ template <typename Int, int kOffset , int kLength , typename Arr>
163+ constexpr auto select_stride_impl (Arr shifts) {
164+ std::array<Int, shifts.size () / kLength > out{};
165+ for (std::size_t i = 0 ; i < out.size (); ++i) {
166+ out[i] = shifts[kLength * i + kOffset ];
167+ }
168+ return out;
169+ }
172170
173171template <typename ToInt, int kOffset , typename Int, typename Arch, Int... kShifts >
174172constexpr auto select_stride (xsimd::batch_constant<Int, Arch, kShifts ...>) {
175- return xsimd::make_batch_constant<
176- ToInt, Arch, SelectStride<Int, kOffset , sizeof (ToInt) / sizeof (Int), kShifts ...>>();
173+ constexpr auto kStridesArr =
174+ select_stride_impl<ToInt, kOffset , sizeof (ToInt) / sizeof (Int)>(
175+ std::array{kShifts ...});
176+ return array_to_batch_constant<kStridesArr , Arch>();
177177}
178178
179179template <typename Arch>
@@ -272,17 +272,15 @@ auto right_shift_by_excess(const xsimd::batch<Int, Arch>& batch,
272272 // These conditions are the ones matched in `left_shift`, i.e. the ones where variable
273273 // shift right will not be available but a left shift (fallback) exists.
274274 if constexpr (kHasSse2 && (IntSize != sizeof (uint64_t ))) {
275- static constexpr auto kShiftsArr = std::array{kShifts ...};
276- static constexpr Int kMaxRightShift = max_value (kShiftsArr );
277-
278- struct MakeShifts {
279- static constexpr Int get (int i, int n) { return kMaxRightShift - kShiftsArr .at (i); }
280- };
275+ constexpr auto kShiftsArr = std::array{kShifts ...};
276+ constexpr Int kMaxRightShift = max_value (kShiftsArr );
277+ constexpr auto kLShiftsArr =
278+ std::array{static_cast <Int>(kMaxRightShift - kShifts )...};
281279
282280 // TODO(xsimd 14.0) this can be simplified to
283281 // constexpr auto kRShifts = xsimd::make_batch_constant<Int, kMaxRightShift, Arch>() -
284282 // shifts;
285- constexpr auto kLShifts = xsimd::make_batch_constant<Int , Arch, MakeShifts >();
283+ constexpr auto kLShifts = array_to_batch_constant< kLShiftsArr , Arch>();
286284
287285 const auto lshifted = left_shift (batch, kLShifts );
288286 // TODO(xsimd 14.0) this can be simplified to
@@ -365,9 +363,9 @@ struct KernelShape {
365363template <typename UnpackedUint, int kPackedBitSize , int kSimdBitSize >
366364struct KernelTraits {
367365 static constexpr KernelShape kShape = {
368- /* .simd_bit_size_= */ kSimdBitSize ,
369- /* .unpacked_bit_size= */ 8 * sizeof (UnpackedUint),
370- /* .packed_bit_size_= */ kPackedBitSize ,
366+ .simd_bit_size_ = kSimdBitSize ,
367+ . unpacked_bit_size_ = 8 * sizeof (UnpackedUint),
368+ .packed_bit_size_ = kPackedBitSize ,
371369 };
372370
373371 using unpacked_type = UnpackedUint;
@@ -459,9 +457,9 @@ constexpr MediumKernelPlanSize BuildMediumPlanSize(const KernelShape& shape) {
459457 } while (packed_start_bit % 8 != 0 );
460458
461459 return {
462- /* .reads_per_kernel_= */ reads_per_kernel,
463- /* .swizzles_per_read_= */ swizzles_per_read,
464- /* .shifts_per_swizzle_= */ shifts_per_swizzle,
460+ .reads_per_kernel_ = reads_per_kernel,
461+ .swizzles_per_read_ = swizzles_per_read,
462+ .shifts_per_swizzle_ = shifts_per_swizzle,
465463 };
466464}
467465
@@ -617,7 +615,7 @@ struct MediumKernel {
617615 const auto shifted = right_shift_by_excess (words, kRightShifts );
618616 const auto vals = shifted & kMask ;
619617 if constexpr (std::is_same_v<unpacked_type, bool >) {
620- const xsimd::batch_bool<uint_type, arch_type> bools ( vals) ;
618+ const xsimd::batch_bool<uint_type, arch_type> bools = vals != 0 ;
621619 bools.store_unaligned (out + kOutOffset );
622620 } else {
623621 vals.store_unaligned (out + kOutOffset );
0 commit comments