Skip to content

Commit 289184e

Browse files
committed
ITS: recover single threaded performance in findCellsNeighbours
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 1177fbe commit 289184e

1 file changed

Lines changed: 95 additions & 91 deletions

File tree

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 95 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -447,128 +447,132 @@ void TrackerTraits<nLayers>::findCellsNeighbours(const int iteration)
447447
#ifdef OPTIMISATION_OUTPUT
448448
std::ofstream off(std::format("cellneighs{}.txt", iteration));
449449
#endif
450-
for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) {
451-
const int nextLayerCellsNum{static_cast<int>(mTimeFrame->getCells()[iLayer + 1].size())};
452-
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
453-
deepVectorClear(mTimeFrame->getCellsNeighboursLUT()[iLayer]);
454-
if (mTimeFrame->getCells()[iLayer + 1].empty() ||
455-
mTimeFrame->getCellsLookupTable()[iLayer].empty()) {
456-
continue;
457-
}
458450

459-
mTaskArena->execute([&] {
460-
int layerCellsNum{static_cast<int>(mTimeFrame->getCells()[iLayer].size())};
451+
struct Neighbor {
452+
int cell{-1}, nextCell{-1}, level{-1};
453+
};
461454

462-
bounded_vector<int> perCellCount(layerCellsNum + 1, 0, mMemoryPool.get());
463-
tbb::parallel_for(
464-
tbb::blocked_range<int>(0, layerCellsNum),
465-
[&](const tbb::blocked_range<int>& Cells) {
466-
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
467-
const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]};
468-
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
469-
const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]};
470-
const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]};
471-
472-
int foundNextCells{0};
473-
for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) {
474-
auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy
475-
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) {
476-
break;
477-
}
455+
mTaskArena->execute([&] {
456+
for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) {
457+
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
458+
deepVectorClear(mTimeFrame->getCellsNeighboursLUT()[iLayer]);
459+
if (mTimeFrame->getCells()[iLayer + 1].empty() ||
460+
mTimeFrame->getCellsLookupTable()[iLayer].empty()) {
461+
continue;
462+
}
478463

479-
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
480-
!nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) {
481-
continue;
482-
}
483-
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation
464+
int nCells{static_cast<int>(mTimeFrame->getCells()[iLayer].size())};
465+
bounded_vector<Neighbor> cellsNeighbours(mMemoryPool.get());
466+
467+
auto forCellNeighbour = [&](auto Tag, int iCell, int offset = 0) -> int {
468+
const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]};
469+
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
470+
const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]};
471+
const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]};
472+
int foundNextCells{0};
473+
for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) {
474+
auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy
475+
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) {
476+
break;
477+
}
478+
479+
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
480+
!nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) {
481+
continue;
482+
}
483+
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation
484484

485485
#ifdef OPTIMISATION_OUTPUT
486-
bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]};
487-
off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl;
486+
bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]};
487+
off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl;
488488
#endif
489489

490-
if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) {
491-
continue;
492-
}
493-
++foundNextCells;
494-
}
495-
perCellCount[iCell] = foundNextCells;
490+
if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) {
491+
continue;
496492
}
497-
});
498-
499-
std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0);
500-
int totalCellNeighbours = perCellCount.back();
501-
if (totalCellNeighbours == 0) {
502-
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
503-
return;
504-
}
505493

506-
struct Neighbor {
507-
int cell{-1}, nextCell{-1}, level{-1};
494+
if constexpr (decltype(Tag)::value == PassMode::OnePass::value) {
495+
cellsNeighbours.emplace_back(iCell, iNextCell, currentCellSeed.getLevel() + 1);
496+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) {
497+
++foundNextCells;
498+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) {
499+
cellsNeighbours[offset++] = {iCell, iNextCell, currentCellSeed.getLevel() + 1};
500+
} else {
501+
static_assert(false, "Unknown mode!");
502+
}
503+
}
504+
return foundNextCells;
508505
};
509-
bounded_vector<Neighbor> cellsNeighbours(mMemoryPool.get());
510-
cellsNeighbours.resize(totalCellNeighbours);
511506

512-
tbb::parallel_for(
513-
tbb::blocked_range<int>(0, layerCellsNum),
514-
[&](const tbb::blocked_range<int>& Cells) {
515-
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
516-
if (perCellCount[iCell] == perCellCount[iCell + 1]) {
517-
continue;
507+
if (mTaskArena->max_concurrency() <= 1) {
508+
for (int iCell{0}; iCell < nCells; ++iCell) {
509+
forCellNeighbour(PassMode::OnePass{}, iCell);
510+
}
511+
} else {
512+
bounded_vector<int> perCellCount(nCells + 1, 0, mMemoryPool.get());
513+
tbb::parallel_for(
514+
tbb::blocked_range<int>(0, nCells),
515+
[&](const tbb::blocked_range<int>& Cells) {
516+
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
517+
perCellCount[iCell] = forCellNeighbour(PassMode::TwoPassCount{}, iCell);
518518
}
519-
const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]};
520-
const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()};
521-
const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]};
522-
const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]};
523-
524-
int position = perCellCount[iCell];
525-
for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) {
526-
auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy
527-
if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex) {
528-
break;
529-
}
519+
});
530520

531-
if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) ||
532-
!nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) {
533-
continue;
534-
}
535-
536-
float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation
537-
if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) {
521+
std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0);
522+
int totalCellNeighbours = perCellCount.back();
523+
if (totalCellNeighbours == 0) {
524+
deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]);
525+
continue;
526+
}
527+
cellsNeighbours.resize(totalCellNeighbours);
528+
529+
tbb::parallel_for(
530+
tbb::blocked_range<int>(0, nCells),
531+
[&](const tbb::blocked_range<int>& Cells) {
532+
for (int iCell = Cells.begin(); iCell < Cells.end(); ++iCell) {
533+
int offset = perCellCount[iCell];
534+
if (offset == perCellCount[iCell + 1]) {
538535
continue;
539536
}
540-
541-
cellsNeighbours[position++] = {iCell, iNextCell, currentCellSeed.getLevel() + 1};
537+
forCellNeighbour(PassMode::TwoPassInsert{}, iCell, offset);
542538
}
543-
}
544-
});
539+
});
540+
}
541+
542+
if (cellsNeighbours.empty()) {
543+
continue;
544+
}
545545

546546
tbb::parallel_sort(cellsNeighbours.begin(), cellsNeighbours.end(), [](const auto& a, const auto& b) {
547547
return a.nextCell < b.nextCell;
548548
});
549549

550550
auto& cellsNeighbourLUT = mTimeFrame->getCellsNeighboursLUT()[iLayer];
551-
cellsNeighbourLUT.assign(nextLayerCellsNum, 0);
551+
cellsNeighbourLUT.assign(mTimeFrame->getCells()[iLayer + 1].size(), 0);
552552
for (const auto& neigh : cellsNeighbours) {
553553
++cellsNeighbourLUT[neigh.nextCell];
554554
}
555555
std::inclusive_scan(cellsNeighbourLUT.begin(), cellsNeighbourLUT.end(), cellsNeighbourLUT.begin());
556556

557-
mTimeFrame->getCellsNeighbours()[iLayer].reserve(totalCellNeighbours);
557+
mTimeFrame->getCellsNeighbours()[iLayer].reserve(cellsNeighbours.size());
558558
std::ranges::transform(cellsNeighbours, std::back_inserter(mTimeFrame->getCellsNeighbours()[iLayer]), [](const auto& neigh) { return neigh.cell; });
559559

560560
auto it = cellsNeighbours.begin();
561-
while (it != cellsNeighbours.end()) {
562-
const int current_nextCell = it->nextCell;
563-
auto group_end = std::find_if_not(it, cellsNeighbours.end(),
564-
[current_nextCell](const auto& nb) { return nb.nextCell == current_nextCell; });
565-
const auto max_level_it = std::max_element(it, group_end,
566-
[](const auto& a, const auto& b) { return a.level < b.level; });
567-
mTimeFrame->getCells()[iLayer + 1][current_nextCell].setLevel(max_level_it->level);
568-
it = group_end;
561+
int current = it->nextCell;
562+
int maxLvl = it->level;
563+
++it;
564+
for (; it != cellsNeighbours.end(); ++it) {
565+
if (it->nextCell == current) {
566+
maxLvl = std::max(maxLvl, it->level);
567+
} else {
568+
mTimeFrame->getCells()[iLayer + 1][current].setLevel(maxLvl);
569+
current = it->nextCell;
570+
maxLvl = it->level;
571+
}
569572
}
570-
});
571-
}
573+
mTimeFrame->getCells()[iLayer + 1][current].setLevel(maxLvl);
574+
}
575+
});
572576
}
573577

574578
template <int nLayers>

0 commit comments

Comments
 (0)