Skip to content

Commit d114ea1

Browse files
committed
ITS: recover single threaded performance in findRoads
1 parent 97c94ac commit d114ea1

1 file changed

Lines changed: 62 additions & 54 deletions

File tree

Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx

Lines changed: 62 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -760,65 +760,73 @@ void TrackerTraits<nLayers>::findRoads(const int iteration)
760760

761761
bounded_vector<TrackITSExt> tracks(mMemoryPool.get());
762762
mTaskArena->execute([&] {
763-
bounded_vector<int> perSeedCount(trackSeeds.size() + 1, 0, mMemoryPool.get());
764-
tbb::parallel_for(
765-
tbb::blocked_range<int>(0, (int)trackSeeds.size()),
766-
[&](const tbb::blocked_range<int>& Seeds) {
767-
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
768-
const CellSeed& seed{trackSeeds[iSeed]};
769-
TrackITSExt temporaryTrack{seed};
770-
temporaryTrack.resetCovariance();
771-
temporaryTrack.setChi2(0);
772-
for (int iL{0}; iL < 7; ++iL) {
773-
temporaryTrack.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::UnusedIndex);
774-
}
763+
auto forSeed = [&](auto Tag, int iSeed, int offset = 0) {
764+
const CellSeed& seed{trackSeeds[iSeed]};
765+
TrackITSExt temporaryTrack{seed};
766+
temporaryTrack.resetCovariance();
767+
temporaryTrack.setChi2(0);
768+
for (int iL{0}; iL < 7; ++iL) {
769+
temporaryTrack.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::UnusedIndex);
770+
}
775771

776-
bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF);
777-
if (!fitSuccess) {
778-
continue;
779-
}
780-
temporaryTrack.getParamOut() = temporaryTrack.getParamIn();
781-
temporaryTrack.resetCovariance();
782-
temporaryTrack.setChi2(0);
783-
fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f);
784-
if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) {
785-
continue;
786-
}
787-
++perSeedCount[iSeed];
788-
}
789-
});
790-
std::exclusive_scan(perSeedCount.begin(), perSeedCount.end(), perSeedCount.begin(), 0);
791-
auto totalTracks{perSeedCount.back()};
792-
if (totalTracks == 0) {
793-
return;
794-
}
795-
tracks.resize(totalTracks);
772+
bool fitSuccess = fitTrack(temporaryTrack, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF);
773+
if (!fitSuccess) {
774+
return 0;
775+
}
796776

797-
tbb::parallel_for(
798-
tbb::blocked_range<int>(0, (int)trackSeeds.size()),
799-
[&](const tbb::blocked_range<int>& Seeds) {
800-
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
801-
if (perSeedCount[iSeed] == perSeedCount[iSeed + 1]) {
802-
continue;
803-
}
804-
const CellSeed& seed{trackSeeds[iSeed]};
805-
auto& trk = tracks[perSeedCount[iSeed]] = TrackITSExt(seed);
806-
trk.resetCovariance();
807-
trk.setChi2(0);
808-
for (int iL{0}; iL < 7; ++iL) {
809-
trk.setExternalClusterIndex(iL, seed.getCluster(iL), seed.getCluster(iL) != constants::UnusedIndex);
777+
temporaryTrack.getParamOut() = temporaryTrack.getParamIn();
778+
temporaryTrack.resetCovariance();
779+
temporaryTrack.setChi2(0);
780+
fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f);
781+
if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt[mTrkParams[iteration].NLayers - temporaryTrack.getNClusters()]) {
782+
return 0;
783+
}
784+
785+
if constexpr (decltype(Tag)::value == PassMode::OnePass::value) {
786+
tracks.push_back(temporaryTrack);
787+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) {
788+
// nothing to do
789+
} else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) {
790+
tracks[offset] = temporaryTrack;
791+
} else {
792+
static_assert(false, "Unknown mode!");
793+
}
794+
return 1;
795+
};
796+
797+
const int nSeeds = static_cast<int>(trackSeeds.size());
798+
if (mTaskArena->max_concurrency() <= 1) {
799+
for (int iSeed{0}; iSeed < nSeeds; ++iSeed) {
800+
forSeed(PassMode::OnePass{}, iSeed);
801+
}
802+
} else {
803+
bounded_vector<int> perSeedCount(nSeeds + 1, 0, mMemoryPool.get());
804+
tbb::parallel_for(
805+
tbb::blocked_range<int>(0, nSeeds),
806+
[&](const tbb::blocked_range<int>& Seeds) {
807+
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
808+
perSeedCount[iSeed] = forSeed(PassMode::TwoPassCount{}, iSeed);
810809
}
810+
});
811811

812-
bool fitSuccess = fitTrack(trk, 0, mTrkParams[0].NLayers, 1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF);
813-
if (!fitSuccess) {
814-
continue;
812+
std::exclusive_scan(perSeedCount.begin(), perSeedCount.end(), perSeedCount.begin(), 0);
813+
auto totalTracks{perSeedCount.back()};
814+
if (totalTracks == 0) {
815+
return;
816+
}
817+
tracks.resize(totalTracks);
818+
819+
tbb::parallel_for(
820+
tbb::blocked_range<int>(0, nSeeds),
821+
[&](const tbb::blocked_range<int>& Seeds) {
822+
for (int iSeed = Seeds.begin(); iSeed < Seeds.end(); ++iSeed) {
823+
if (perSeedCount[iSeed] == perSeedCount[iSeed + 1]) {
824+
continue;
825+
}
826+
forSeed(PassMode::TwoPassInsert{}, iSeed, perSeedCount[iSeed]);
815827
}
816-
trk.getParamOut() = trk.getParamIn();
817-
trk.resetCovariance();
818-
trk.setChi2(0);
819-
fitTrack(trk, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f);
820-
}
821-
});
828+
});
829+
}
822830

823831
deepVectorClear(trackSeeds);
824832
tbb::parallel_sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) {

0 commit comments

Comments
 (0)