Skip to content

Commit e45e4cf

Browse files
committed
ITS: use generic global scratch space for beams
1 parent 1444985 commit e45e4cf

6 files changed

Lines changed: 58 additions & 20 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
8787
void createTrackITSExtDevice(const size_t);
8888
void loadTrackExtensionStartStatesDevice();
8989
void createTrackExtensionCandidatesDevice(const size_t);
90+
void createTrackExtensionScratchDevice(const size_t, const int);
9091
void downloadTrackITSExtDevice();
9192
void downloadTrackExtensionCandidatesDevice();
9293
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
@@ -125,6 +126,8 @@ class TimeFrameGPU : public TimeFrame<NLayers>
125126
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
126127
TrackExtensionStartState<NLayers>* getDeviceTrackExtensionStartStates() { return mTrackExtensionStartStatesDevice; }
127128
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
129+
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
130+
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
128131
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
129132
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
130133
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
@@ -224,6 +227,8 @@ class TimeFrameGPU : public TimeFrame<NLayers>
224227
TrackITSExt* mTrackITSExtDevice;
225228
TrackExtensionStartState<NLayers>* mTrackExtensionStartStatesDevice{nullptr};
226229
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
230+
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
231+
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
227232
std::array<gpuPair<int, int>*, NLayers - 2> mNeighbourPairsDevice;
228233
std::array<int*, NLayers - 2> mNeighboursDevice;
229234
std::array<TrackingFrameInfo*, NLayers> mTrackingFrameInfoDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLaye
4747
const int** ROFClusters,
4848
const TrackingFrameInfo** trackingFrameInfo,
4949
TrackExtensionCandidate<NLayers>* candidates,
50+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
51+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
5052
const std::vector<float>& layerRadiiHost,
5153
const std::vector<float>& layerxX0Host,
5254
const int nTracks,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <cuda_runtime.h>
1414

15+
#include <algorithm>
1516
#include <unistd.h>
1617
#include <vector>
1718

@@ -574,6 +575,21 @@ void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nT
574575
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
575576
}
576577

578+
template <int NLayers>
579+
void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const size_t nTracks, const int beamWidth)
580+
{
581+
GPUTimer timer("reserving track extension scratch");
582+
const size_t nHypotheses = nTracks * std::max(1, beamWidth);
583+
GPULog("gpu-allocation: reserving {} track extension hypotheses per scratch buffer, for {:.2f} MB each.", nHypotheses, nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>) / constants::MB);
584+
mActiveTrackExtensionHypothesesDevice = nullptr;
585+
mNextTrackExtensionHypothesesDevice = nullptr;
586+
if (nHypotheses == 0) {
587+
return;
588+
}
589+
allocMem(reinterpret_cast<void**>(&mActiveTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
590+
allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
591+
}
592+
577593
template <int NLayers>
578594
void TimeFrameGPU<NLayers>::downloadCellsDevice()
579595
{

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,11 @@ template <int NLayers>
394394
void TrackerTraitsGPU<NLayers>::buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack)
395395
{
396396
const auto nTracks = this->mTimeFrame->getTracks().size();
397+
const int beamWidth = std::max(1, this->mTrkParams[iteration].TrackFollowerBeamWidth);
397398
mTimeFrameGPU->syncStreams();
398399
mTimeFrameGPU->loadTrackExtensionStartStatesDevice();
399400
mTimeFrameGPU->createTrackExtensionCandidatesDevice(nTracks);
401+
mTimeFrameGPU->createTrackExtensionScratchDevice(nTracks, beamWidth);
400402
computeTrackExtensionCandidatesHandler<NLayers>(mTimeFrameGPU->getDeviceTrackExtensionStartStates(),
401403
mTimeFrameGPU->getDeviceIndexTableUtils(),
402404
mTimeFrameGPU->getDeviceROFMaskTableView(),
@@ -407,12 +409,14 @@ void TrackerTraitsGPU<NLayers>::buildTrackExtensionCandidates(const int iteratio
407409
mTimeFrameGPU->getDeviceROFrameClusters(),
408410
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
409411
mTimeFrameGPU->getDeviceTrackExtensionCandidates(),
412+
mTimeFrameGPU->getDeviceActiveTrackExtensionHypotheses(),
413+
mTimeFrameGPU->getDeviceNextTrackExtensionHypotheses(),
410414
this->mTrkParams[iteration].LayerRadii,
411415
this->mTrkParams[iteration].LayerxX0,
412416
static_cast<int>(nTracks),
413417
this->mTrkParams[iteration].NLayers,
414418
this->mTrkParams[iteration].PhiBins,
415-
this->mTrkParams[iteration].TrackFollowerBeamWidth,
419+
beamWidth,
416420
this->mTrkParams[iteration].PassFlags[IterationStep::TrackFollowerTop],
417421
this->mTrkParams[iteration].PassFlags[IterationStep::TrackFollowerBot],
418422
this->mBz,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,6 @@ struct compare_track_chi2 {
103103
}
104104
};
105105

106-
template <int NLayers>
107-
struct TrackExtensionHypothesis {
108-
o2::track::TrackParCov param;
109-
std::array<int, NLayers> clusters{};
110-
TimeStamp time;
111-
float chi2{0.f};
112-
int nClusters{0};
113-
int edgeLayer{constants::UnusedIndex};
114-
};
115-
116106
template <int NLayers>
117107
GPUdi() bool isBetterTrackExtensionHypothesis(const TrackExtensionHypothesis<NLayers>& a, const TrackExtensionHypothesis<NLayers>& b)
118108
{
@@ -236,13 +226,13 @@ GPUdi() bool followTrackExtensionDirection(const TrackExtensionStartState<NLayer
236226
const bool outward,
237227
const o2::base::Propagator* propagator,
238228
const o2::base::PropagatorF::MatCorrType matCorrType,
229+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
230+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
239231
TrackExtensionStartState<NLayers>& updatedTrack)
240232
{
241233
const int step = outward ? 1 : -1;
242234
const int end = outward ? nLayers - 1 : 0;
243-
const int beamWidth = o2::gpu::CAMath::Min(o2::gpu::CAMath::Max(beamWidthConfig, 1), MaxTrackExtensionGPUBeamWidth);
244-
TrackExtensionHypothesis<NLayers> activeHypotheses[MaxTrackExtensionGPUBeamWidth];
245-
TrackExtensionHypothesis<NLayers> nextHypotheses[MaxTrackExtensionGPUBeamWidth];
235+
const int beamWidth = o2::gpu::CAMath::Max(beamWidthConfig, 1);
246236
int nActive{1};
247237
int nNext{0};
248238
initialiseTrackExtensionHypothesis(track, outward, activeHypotheses[0]);
@@ -431,6 +421,8 @@ GPUg() void __launch_bounds__(256, 1) computeTrackExtensionCandidatesKernel(cons
431421
const int** ROFClusters,
432422
const TrackingFrameInfo** trackingFrameInfo,
433423
TrackExtensionCandidate<NLayers>* candidates,
424+
TrackExtensionHypothesis<NLayers>* activeHypothesesScratch,
425+
TrackExtensionHypothesis<NLayers>* nextHypothesesScratch,
434426
const float* layerRadii,
435427
const float* layerxX0,
436428
const int nTracks,
@@ -452,26 +444,28 @@ GPUg() void __launch_bounds__(256, 1) computeTrackExtensionCandidatesKernel(cons
452444
candidates[getFlatTrackExtensionCandidateIndex(iTrack, iCandidate)].reset();
453445
}
454446
const auto& track = tracks[iTrack];
447+
auto* activeHypotheses = activeHypothesesScratch + (iTrack * beamWidth);
448+
auto* nextHypotheses = nextHypothesesScratch + (iTrack * beamWidth);
455449
int slot{0};
456450
if (extendTop && track.lastClusterLayer != nLayers - 1) {
457451
TrackExtensionStartState<NLayers> topCandidate;
458-
if (followTrackExtensionDirection(track, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, true, propagator, matCorrType, topCandidate)) {
452+
if (followTrackExtensionDirection(track, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, true, propagator, matCorrType, activeHypotheses, nextHypotheses, topCandidate)) {
459453
writeTrackExtensionCandidate(iTrack, track, topCandidate, candidates, slot);
460454
if (extendBot && topCandidate.firstClusterLayer != 0) {
461455
TrackExtensionStartState<NLayers> topBottomCandidate;
462-
if (followTrackExtensionDirection(topCandidate, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, false, propagator, matCorrType, topBottomCandidate)) {
456+
if (followTrackExtensionDirection(topCandidate, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, false, propagator, matCorrType, activeHypotheses, nextHypotheses, topBottomCandidate)) {
463457
writeTrackExtensionCandidate(iTrack, track, topBottomCandidate, candidates, slot);
464458
}
465459
}
466460
}
467461
}
468462
if (extendBot && track.firstClusterLayer != 0) {
469463
TrackExtensionStartState<NLayers> bottomCandidate;
470-
if (followTrackExtensionDirection(track, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, false, propagator, matCorrType, bottomCandidate)) {
464+
if (followTrackExtensionDirection(track, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, false, propagator, matCorrType, activeHypotheses, nextHypotheses, bottomCandidate)) {
471465
writeTrackExtensionCandidate(iTrack, track, bottomCandidate, candidates, slot);
472466
if (extendTop && bottomCandidate.lastClusterLayer != nLayers - 1) {
473467
TrackExtensionStartState<NLayers> bottomTopCandidate;
474-
if (followTrackExtensionDirection(bottomCandidate, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, true, propagator, matCorrType, bottomTopCandidate)) {
468+
if (followTrackExtensionDirection(bottomCandidate, *utils, rofMask, rofOverlaps, clusters, usedClusters, clustersIndexTables, ROFClusters, trackingFrameInfo, layerRadii, layerxX0, nLayers, phiBins, beamWidth, bz, maxChi2ClusterAttachment, maxChi2NDF, nSigmaCutPhi, nSigmaCutZ, true, propagator, matCorrType, activeHypotheses, nextHypotheses, bottomTopCandidate)) {
475469
writeTrackExtensionCandidate(iTrack, track, bottomTopCandidate, candidates, slot);
476470
}
477471
}
@@ -948,6 +942,8 @@ void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLaye
948942
const int** ROFClusters,
949943
const TrackingFrameInfo** trackingFrameInfo,
950944
TrackExtensionCandidate<NLayers>* candidates,
945+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
946+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
951947
const std::vector<float>& layerRadiiHost,
952948
const std::vector<float>& layerxX0Host,
953949
const int nTracks,
@@ -965,7 +961,7 @@ void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLaye
965961
const o2::base::PropagatorF::MatCorrType matCorrType,
966962
gpu::Stream& stream)
967963
{
968-
if (nTracks <= 0 || candidates == nullptr) {
964+
if (nTracks <= 0 || candidates == nullptr || activeHypotheses == nullptr || nextHypotheses == nullptr) {
969965
return;
970966
}
971967
thrust::device_vector<float> layerRadii(layerRadiiHost);
@@ -981,6 +977,8 @@ void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLaye
981977
ROFClusters,
982978
trackingFrameInfo,
983979
candidates,
980+
activeHypotheses,
981+
nextHypotheses,
984982
thrust::raw_pointer_cast(&layerRadii[0]),
985983
thrust::raw_pointer_cast(&layerxX0[0]),
986984
nTracks,
@@ -1529,6 +1527,8 @@ template void computeTrackExtensionCandidatesHandler<7>(const TrackExtensionStar
15291527
const int** ROFClusters,
15301528
const TrackingFrameInfo** trackingFrameInfo,
15311529
TrackExtensionCandidate<7>* candidates,
1530+
TrackExtensionHypothesis<7>* activeHypotheses,
1531+
TrackExtensionHypothesis<7>* nextHypotheses,
15321532
const std::vector<float>& layerRadiiHost,
15331533
const std::vector<float>& layerxX0Host,
15341534
const int nTracks,
@@ -1737,6 +1737,8 @@ template void computeTrackExtensionCandidatesHandler<11>(const TrackExtensionSta
17371737
const int** ROFClusters,
17381738
const TrackingFrameInfo** trackingFrameInfo,
17391739
TrackExtensionCandidate<11>* candidates,
1740+
TrackExtensionHypothesis<11>* activeHypotheses,
1741+
TrackExtensionHypothesis<11>* nextHypotheses,
17401742
const std::vector<float>& layerRadiiHost,
17411743
const std::vector<float>& layerxX0Host,
17421744
const int nTracks,

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackExtensionCandidate.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@
2323
namespace o2::its
2424
{
2525

26+
template <int NLayers>
27+
struct TrackExtensionHypothesis {
28+
o2::track::TrackParCov param;
29+
std::array<int, NLayers> clusters{};
30+
TimeStamp time;
31+
float chi2{0.f};
32+
int nClusters{0};
33+
int edgeLayer{constants::UnusedIndex};
34+
};
35+
2636
template <int NLayers>
2737
struct TrackExtensionStartState {
2838
o2::track::TrackParCov paramIn;
@@ -65,7 +75,6 @@ struct TrackExtensionCandidate {
6575
};
6676

6777
inline constexpr int MaxTrackExtensionCandidatesPerTrack = 4;
68-
inline constexpr int MaxTrackExtensionGPUBeamWidth = 4;
6978

7079
inline constexpr size_t getFlatTrackExtensionCandidateIndex(size_t trackIndex, size_t candidateIndex)
7180
{

0 commit comments

Comments
 (0)