Skip to content

Commit fa8f577

Browse files
committed
reverse work distribution over bulk tasks
tid=0 is kept as last so that it might be the one with less general work, considering that it might already have to do other stuff (e.g. axpy)
1 parent 2fe10a8 commit fa8f577

1 file changed

Lines changed: 18 additions & 5 deletions

File tree

  • include/dlaf/eigensolver/reduction_to_band

include/dlaf/eigensolver/reduction_to_band/impl.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <cmath>
1616
#include <cstddef>
1717
#include <sstream>
18+
#include <tuple>
1819
#include <utility>
1920
#include <vector>
2021

@@ -639,12 +640,24 @@ void computePanelReflectors(TriggerSender&& trigger, comm::IndexT_MPI rank_v0,
639640

640641
const SizeType nrefls = taus.size().rows();
641642

642-
const std::size_t batch_size = util::ceilDiv(tiles.size(), nworkers);
643-
const std::size_t begin = tid * batch_size;
644-
const std::size_t end = std::min((tid + 1) * batch_size, tiles.size());
645-
646643
const bool rankHasHead = rank_v0 == pcomm.get().rank();
647-
const bool tid_has_head = rankHasHead && tid == 0;
644+
645+
const auto [begin, end, tid_has_head] = [=, ntiles = tiles.size()]() {
646+
const std::size_t batch_size = util::ceilDiv(ntiles, nworkers);
647+
648+
const std::size_t mirror_tid = nworkers - 1 - tid;
649+
std::size_t begin = mirror_tid * batch_size;
650+
std::size_t end = std::min((mirror_tid + 1) * batch_size, ntiles);
651+
652+
std::swap(begin, end);
653+
654+
begin = ntiles - begin;
655+
end = end > ntiles ? ntiles : ntiles - end;
656+
657+
const bool tid_has_head = rankHasHead && begin == 0;
658+
659+
return std::tuple<std::size_t, std::size_t, bool>{begin, end, tid_has_head};
660+
}();
648661

649662
if (tid == 0) {
650663
// Note: (x0, x_squares, w[pt_cols], pt_row0[pt_cols])

0 commit comments

Comments
 (0)