Skip to content

Commit a04bc42

Browse files
committed
ITS: re-enable the possibility of extending tracks
1 parent aafbebf commit a04bc42

16 files changed

Lines changed: 1639 additions & 6 deletions

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "ITStracking/BoundedAllocator.h"
2020
#include "ITStracking/TimeFrame.h"
2121
#include "ITStracking/Configuration.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2223
#include "ITStrackingGPU/Utils.h"
2324

2425
namespace o2::its::gpu
@@ -84,7 +85,13 @@ class TimeFrameGPU : public TimeFrame<NLayers>
8485
void createNeighboursDevice(const unsigned int layer);
8586
void createNeighboursLUTDevice(const int, const unsigned int);
8687
void createTrackITSExtDevice(const size_t);
88+
void loadTrackExtensionStartStatesDevice();
89+
void createTrackExtensionCandidatesDevice(const size_t);
90+
void createTrackExtensionScratchDevice(const int nThreads, const int beamWidth);
91+
void createTrackExtensionResultsDevice(const size_t);
8792
void downloadTrackITSExtDevice();
93+
void downloadTrackExtensionCandidatesDevice();
94+
void downloadTrackExtensionResultsDevice();
8895
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
8996
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
9097
void downloadCellsDevice();
@@ -111,13 +118,21 @@ class TimeFrameGPU : public TimeFrame<NLayers>
111118
const auto getDeviceROFMaskTableView() { return mDeviceROFMaskTableView; }
112119
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
113120
auto& getTrackITSExt() { return mTrackITSExt; }
121+
auto& getTrackExtensionCandidates() { return mTrackExtensionCandidates; }
122+
auto& getTrackExtensionResults() { return mTrackExtensionResults; }
114123
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
115124
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
116125
unsigned char* getDeviceUsedClusters(const int);
117126
const o2::base::Propagator* getChainPropagator();
118127

119128
// Hybrid
120129
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
130+
TrackExtensionStartState<NLayers>* getDeviceTrackExtensionStartStates() { return mTrackExtensionStartStatesDevice; }
131+
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
132+
int* getDeviceTrackExtensionCandidateOffsets() { return mTrackExtensionCandidateOffsetsDevice; }
133+
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
134+
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
135+
TrackExtensionResult<NLayers>* getDeviceTrackExtensionResults() { return mTrackExtensionResultsDevice; }
121136
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
122137
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
123138
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
@@ -215,6 +230,13 @@ class TimeFrameGPU : public TimeFrame<NLayers>
215230
float** mCellSeedsChi2DeviceArray;
216231

217232
TrackITSExt* mTrackITSExtDevice;
233+
TrackExtensionStartState<NLayers>* mTrackExtensionStartStatesDevice{nullptr};
234+
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
235+
int* mTrackExtensionCandidateOffsetsDevice{nullptr};
236+
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
237+
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
238+
TrackExtensionResult<NLayers>* mTrackExtensionResultsDevice{nullptr};
239+
unsigned int mNTrackExtensionResults{0};
218240
std::array<gpuPair<int, int>*, NLayers - 2> mNeighbourPairsDevice;
219241
std::array<int*, NLayers - 2> mNeighboursDevice;
220242
std::array<TrackingFrameInfo*, NLayers> mTrackingFrameInfoDevice;
@@ -231,6 +253,12 @@ class TimeFrameGPU : public TimeFrame<NLayers>
231253

232254
// Temporary buffer for storing output tracks from GPU tracking
233255
bounded_vector<TrackITSExt> mTrackITSExt;
256+
// Temporary buffer for compact track states used by GPU track extension
257+
bounded_vector<TrackExtensionStartState<NLayers>> mTrackExtensionStartStates;
258+
// Temporary buffer for compact track extension proposals from GPU tracking
259+
bounded_vector<TrackExtensionCandidate<NLayers>> mTrackExtensionCandidates;
260+
// Temporary buffer for fitted track extension proposals from GPU tracking
261+
bounded_vector<TrackExtensionResult<NLayers>> mTrackExtensionResults;
234262
};
235263

236264
template <int NLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
3535
void computeLayerCells(const int iteration) final;
3636
void findCellsNeighbours(const int iteration) final;
3737
void findRoads(const int iteration) final;
38+
void extendTracks(const int iteration) final;
3839

3940
void setBz(float) final;
4041

@@ -47,6 +48,11 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
4748
int getTFNumberOfCells() const override;
4849

4950
private:
51+
bool hasTrackFollower(const int iteration) const;
52+
53+
void buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack) final;
54+
bool materializeTrackExtensionCandidate(TrackITSExt& track, const typename TrackerTraits<NLayers>::TrackExtensionCandidateN& candidate, const int iteration) final;
55+
5056
IndexTableUtilsN* mDeviceIndexTableUtils;
5157
gpu::TimeFrameGPU<NLayers>* mTimeFrameGPU;
5258
};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
#ifndef ITSTRACKINGGPU_TRACKINGKERNELS_H_
1414
#define ITSTRACKINGGPU_TRACKINGKERNELS_H_
1515

16+
#include <array>
1617
#include <gsl/gsl>
1718

1819
#include "ITStracking/BoundedAllocator.h"
1920
#include "ITStracking/ROFLookupTables.h"
2021
#include "ITStracking/Definitions.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2123
#include "ITStrackingGPU/Utils.h"
2224
#include "DetectorsBase/Propagator.h"
2325
#include "GPUCommonDef.h"
@@ -35,6 +37,58 @@ class Cluster;
3537
class TrackITSExt;
3638
class ExternalAllocator;
3739

40+
inline constexpr int kTrackExtensionLaunchBlocks = 60;
41+
inline constexpr int kTrackExtensionLaunchThreadsPerBlock = 256;
42+
inline constexpr int kTrackExtensionLaunchThreads = kTrackExtensionLaunchBlocks * kTrackExtensionLaunchThreadsPerBlock;
43+
44+
template <int NLayers>
45+
void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLayers>* tracks,
46+
const IndexTableUtils<NLayers>* utils,
47+
const typename ROFMaskTable<NLayers>::View& rofMask,
48+
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
49+
const Cluster** clusters,
50+
const unsigned char** usedClusters,
51+
const int** clustersIndexTables,
52+
const int** ROFClusters,
53+
const TrackingFrameInfo** trackingFrameInfo,
54+
TrackExtensionCandidate<NLayers>* candidates,
55+
int* candidateOffsets,
56+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
57+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
58+
const std::array<float, NLayers> layerRadii,
59+
const std::array<float, NLayers> layerxX0,
60+
const int nTracks,
61+
const int nLayers,
62+
const int phiBins,
63+
const int beamWidth,
64+
const bool extendTop,
65+
const bool extendBot,
66+
const float bz,
67+
const float maxChi2ClusterAttachment,
68+
const float maxChi2NDF,
69+
const float nSigmaCutPhi,
70+
const float nSigmaCutZ,
71+
const o2::base::Propagator* propagator,
72+
const o2::base::PropagatorF::MatCorrType matCorrType,
73+
gpu::Stream& stream);
74+
75+
template <int NLayers>
76+
void computeTrackExtensionResultsHandler(const TrackExtensionStartState<NLayers>* tracks,
77+
const TrackExtensionCandidate<NLayers>* candidates,
78+
const int* candidateOffsets,
79+
TrackExtensionResult<NLayers>* results,
80+
const TrackingFrameInfo** trackingFrameInfo,
81+
const std::array<float, NLayers> layerxX0,
82+
const int nTracks,
83+
const int nLayers,
84+
const float bz,
85+
const float maxChi2ClusterAttachment,
86+
const float maxChi2NDF,
87+
const o2::base::Propagator* propagator,
88+
const o2::base::PropagatorF::MatCorrType matCorrType,
89+
const bool shiftRefToCluster,
90+
gpu::Stream& stream);
91+
3892
template <int NLayers>
3993
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
4094
const typename ROFMaskTable<NLayers>::View& rofMask,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <cuda_runtime.h>
1414

15+
#include <algorithm>
1516
#include <unistd.h>
1617
#include <vector>
1718

@@ -532,6 +533,87 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
532533
GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt)));
533534
}
534535

536+
template <int NLayers>
537+
void TimeFrameGPU<NLayers>::loadTrackExtensionStartStatesDevice()
538+
{
539+
GPUTimer timer("loading track extension start states");
540+
GPULog("gpu-transfer: loading {} track extension start states, for {:.2f} MB.", this->mTracks.size(), this->mTracks.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>) / constants::MB);
541+
mTrackExtensionStartStatesDevice = nullptr;
542+
mTrackExtensionStartStates = bounded_vector<TrackExtensionStartState<NLayers>>(this->mTracks.size(), {}, this->getMemoryPool().get());
543+
if (this->mTracks.empty()) {
544+
return;
545+
}
546+
for (size_t iTrack{0}; iTrack < this->mTracks.size(); ++iTrack) {
547+
const auto& track = this->mTracks[iTrack];
548+
auto& state = mTrackExtensionStartStates[iTrack];
549+
state.paramIn = track.getParamIn();
550+
state.paramOut = track.getParamOut();
551+
state.time = track.getTimeStamp();
552+
state.chi2 = track.getChi2();
553+
state.nClusters = track.getNClusters();
554+
state.firstClusterLayer = static_cast<int>(track.getFirstClusterLayer());
555+
state.lastClusterLayer = static_cast<int>(track.getLastClusterLayer());
556+
for (int iLayer{0}; iLayer < NLayers; ++iLayer) {
557+
state.clusters[iLayer] = track.getClusterIndex(iLayer);
558+
}
559+
}
560+
allocMem(reinterpret_cast<void**>(&mTrackExtensionStartStatesDevice), mTrackExtensionStartStates.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
561+
GPUChkErrS(cudaMemcpy(mTrackExtensionStartStatesDevice, mTrackExtensionStartStates.data(), mTrackExtensionStartStates.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>), cudaMemcpyHostToDevice));
562+
}
563+
564+
template <int NLayers>
565+
void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks)
566+
{
567+
GPUTimer timer("reserving track extension candidates");
568+
const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
569+
GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
570+
mTrackExtensionCandidates = bounded_vector<TrackExtensionCandidate<NLayers>>(nCandidates, {}, this->getMemoryPool().get());
571+
mTrackExtensionCandidatesDevice = nullptr;
572+
mTrackExtensionCandidateOffsetsDevice = nullptr;
573+
if (mTrackExtensionCandidates.empty()) {
574+
return;
575+
}
576+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
577+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidateOffsetsDevice), (nTracks + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
578+
}
579+
580+
template <int NLayers>
581+
void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const int nThreads, const int beamWidth)
582+
{
583+
GPUTimer timer("reserving track extension scratch");
584+
const size_t nHypotheses = static_cast<size_t>(std::max(1, nThreads)) * std::max(1, beamWidth);
585+
GPULog("gpu-allocation: reserving {} track extension hypotheses per scratch buffer, for {:.2f} MB each.", nHypotheses, nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>) / constants::MB);
586+
mActiveTrackExtensionHypothesesDevice = nullptr;
587+
mNextTrackExtensionHypothesesDevice = nullptr;
588+
if (nHypotheses == 0) {
589+
return;
590+
}
591+
allocMem(reinterpret_cast<void**>(&mActiveTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
592+
allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
593+
}
594+
595+
template <int NLayers>
596+
void TimeFrameGPU<NLayers>::createTrackExtensionResultsDevice(const size_t nTracks)
597+
{
598+
GPUTimer timer("reserving fitted track extension results");
599+
mNTrackExtensionResults = 0;
600+
if (nTracks == 0 || mTrackExtensionCandidateOffsetsDevice == nullptr) {
601+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(0, {}, this->getMemoryPool().get());
602+
mTrackExtensionResultsDevice = nullptr;
603+
return;
604+
}
605+
int nResults{0};
606+
GPUChkErrS(cudaMemcpy(&nResults, mTrackExtensionCandidateOffsetsDevice + nTracks, sizeof(int), cudaMemcpyDeviceToHost));
607+
mNTrackExtensionResults = nResults;
608+
GPULog("gpu-allocation: reserving {} fitted track extension results, for {:.2f} MB.", mNTrackExtensionResults, mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
609+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(mNTrackExtensionResults, {}, this->getMemoryPool().get());
610+
mTrackExtensionResultsDevice = nullptr;
611+
if (mTrackExtensionResults.empty()) {
612+
return;
613+
}
614+
allocMem(reinterpret_cast<void**>(&mTrackExtensionResultsDevice), mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
615+
}
616+
535617
template <int NLayers>
536618
void TimeFrameGPU<NLayers>::downloadCellsDevice()
537619
{
@@ -578,6 +660,28 @@ void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice()
578660
GPUChkErrS(cudaMemcpy(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost));
579661
}
580662

663+
template <int NLayers>
664+
void TimeFrameGPU<NLayers>::downloadTrackExtensionCandidatesDevice()
665+
{
666+
GPUTimer timer("downloading track extension candidates");
667+
GPULog("gpu-transfer: downloading {} track extension candidates, for {:.2f} MB.", mTrackExtensionCandidates.size(), mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
668+
if (mTrackExtensionCandidates.empty()) {
669+
return;
670+
}
671+
GPUChkErrS(cudaMemcpy(mTrackExtensionCandidates.data(), mTrackExtensionCandidatesDevice, mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>), cudaMemcpyDeviceToHost));
672+
}
673+
674+
template <int NLayers>
675+
void TimeFrameGPU<NLayers>::downloadTrackExtensionResultsDevice()
676+
{
677+
GPUTimer timer("downloading fitted track extension results");
678+
GPULog("gpu-transfer: downloading {} fitted track extension results, for {:.2f} MB.", mTrackExtensionResults.size(), mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
679+
if (mTrackExtensionResults.empty()) {
680+
return;
681+
}
682+
GPUChkErrS(cudaMemcpy(mTrackExtensionResults.data(), mTrackExtensionResultsDevice, mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>), cudaMemcpyDeviceToHost));
683+
}
684+
581685
template <int NLayers>
582686
void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers)
583687
{

0 commit comments

Comments
 (0)