@@ -1517,13 +1517,33 @@ namespace xsimd
15171517 {
15181518 XSIMD_IF_CONSTEXPR ((8 * sizeof (T)) >= batch_bool<T, A>::size)
15191519 {
1520+ // (A) Easy case: the number of slots fits in T.
15201521 const auto zero = detail::broadcast<as_unsigned_integer_t <T>, types::detail::rvv_width_m1>(T (0 ));
15211522 auto ones = detail::broadcast<as_unsigned_integer_t <T>, A::width>(1 );
15221523 auto iota = detail::vindex<A, as_unsigned_integer_t <T>>();
15231524 auto upowers = detail::rvvsll (ones, iota);
15241525 auto r = __riscv_vredor (self.data .as_mask (), upowers, (typename decltype (zero)::register_type)zero, batch_bool<T, A>::size);
15251526 return detail::reduce_scalar<A, as_unsigned_integer_t <T>>(r);
15261527 }
1528+ else XSIMD_IF_CONSTEXPR ((2 * 8 * sizeof (T)) == batch_bool<T, A>::size)
1529+ {
1530+ // (B) We need two rounds, one for the low part, one for the high part.
1531+
1532+ // The low part is similar to the approach in (A).
1533+ const auto zero = detail::broadcast<as_unsigned_integer_t <T>, types::detail::rvv_width_m1>(T (0 ));
1534+ auto ones = detail::broadcast<as_unsigned_integer_t <T>, A::width>(1 );
1535+ auto iota_low = detail::vindex<A, as_unsigned_integer_t <T>>();
1536+ auto upowers_low = detail::rvvsll (ones, iota_low);
1537+ auto r_low = __riscv_vredor (self.data .as_mask (), upowers_low, (typename decltype (zero)::register_type)zero, batch_bool<T, A>::size);
1538+
1539+ // The high part requires a sub before the shift. The lower part
1540+ // gets a negative number interpreted as a very high positive
1541+ // number because we work on unsigned number.
1542+ auto iota_high = __riscv_vsub (iota_low, 8 * sizeof (T), batch_bool<T, A>::size);
1543+ auto upowers_high = detail::rvvsll (ones, iota_high);
1544+ auto r_high = __riscv_vredor (self.data .as_mask (), upowers_high, (typename decltype (zero)::register_type)zero, batch_bool<T, A>::size);
1545+ return detail::reduce_scalar<A, as_unsigned_integer_t <T>>(r_low) | (detail::reduce_scalar<A, as_unsigned_integer_t <T>>(r_high) << 8 * sizeof (T));
1546+ }
15271547 else
15281548 {
15291549 return mask (self, common {});
0 commit comments