Skip to content

Commit 5436649

Browse files
committed
ITS: re-enable the possibility of extending tracks
1 parent 231abca commit 5436649

18 files changed

Lines changed: 1613 additions & 6 deletions

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "ITStracking/BoundedAllocator.h"
2020
#include "ITStracking/TimeFrame.h"
2121
#include "ITStracking/Configuration.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2223
#include "ITStrackingGPU/Utils.h"
2324

2425
namespace o2::its::gpu
@@ -90,8 +91,14 @@ class TimeFrameGPU : public TimeFrame<NLayers>
9091
void createNeighboursDevice(const unsigned int layer);
9192
void createNeighboursLUTDevice(const int, const unsigned int);
9293
void createTrackITSExtDevice(const size_t);
94+
void loadTrackExtensionStartTracksDevice();
95+
void createTrackExtensionCandidatesDevice(const size_t);
96+
void createTrackExtensionScratchDevice(const int nThreads, const int beamWidth);
97+
void createTrackExtensionResultsDevice(const size_t);
9398
void downloadTrackITSExtDevice();
9499
void downloadCellsNeighboursDevice(std::vector<bounded_vector<CellNeighbour>>&, const int);
100+
void downloadTrackExtensionCandidatesDevice();
101+
void downloadTrackExtensionResultsDevice();
95102
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
96103
void downloadCellsDevice();
97104
void downloadCellsLUTDevice();
@@ -118,13 +125,21 @@ class TimeFrameGPU : public TimeFrame<NLayers>
118125
const auto getDeviceTrackingTopologyView() const { return mDeviceTrackingTopologyView; }
119126
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
120127
auto& getTrackITSExt() { return mTrackITSExt; }
128+
auto& getTrackExtensionCandidates() { return mTrackExtensionCandidates; }
129+
auto& getTrackExtensionResults() { return mTrackExtensionResults; }
121130
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
122131
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
123132
unsigned char* getDeviceUsedClusters(const int);
124133
const o2::base::Propagator* getChainPropagator();
125134

126135
// Hybrid
127136
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
137+
TrackITSExt* getDeviceTrackExtensionStartTracks() { return mTrackExtensionStartTracksDevice; }
138+
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
139+
int* getDeviceTrackExtensionCandidateOffsets() { return mTrackExtensionCandidateOffsetsDevice; }
140+
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
141+
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
142+
TrackExtensionResult<NLayers>* getDeviceTrackExtensionResults() { return mTrackExtensionResultsDevice; }
128143
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
129144
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
130145
CellNeighbour** getDeviceArrayNeighbours() { return mNeighboursDeviceArray; }
@@ -222,6 +237,13 @@ class TimeFrameGPU : public TimeFrame<NLayers>
222237
float** mCellSeedsChi2DeviceArray;
223238

224239
TrackITSExt* mTrackITSExtDevice;
240+
TrackITSExt* mTrackExtensionStartTracksDevice{nullptr};
241+
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
242+
int* mTrackExtensionCandidateOffsetsDevice{nullptr};
243+
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
244+
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
245+
TrackExtensionResult<NLayers>* mTrackExtensionResultsDevice{nullptr};
246+
unsigned int mNTrackExtensionResults{0};
225247
std::array<CellNeighbour*, MaxCells> mNeighboursDevice{};
226248
CellNeighbour** mNeighboursDeviceArray{nullptr};
227249
std::array<TrackingFrameInfo*, NLayers> mTrackingFrameInfoDevice;
@@ -238,6 +260,11 @@ class TimeFrameGPU : public TimeFrame<NLayers>
238260

239261
// Temporary buffer for storing output tracks from GPU tracking
240262
bounded_vector<TrackITSExt> mTrackITSExt;
263+
bounded_vector<TrackITSExt> mTrackExtensionStartTracks;
264+
// Temporary buffer for compact track extension proposals from GPU tracking
265+
bounded_vector<TrackExtensionCandidate<NLayers>> mTrackExtensionCandidates;
266+
// Temporary buffer for fitted track extension proposals from GPU tracking
267+
bounded_vector<TrackExtensionResult<NLayers>> mTrackExtensionResults;
241268
};
242269

243270
template <int NLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
3535
void computeLayerCells(const int iteration) final;
3636
void findCellsNeighbours(const int iteration) final;
3737
void findRoads(const int iteration) final;
38+
void extendTracks(const int iteration) final;
3839

3940
void setBz(float) final;
4041

@@ -47,6 +48,11 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
4748
int getTFNumberOfCells() const override;
4849

4950
private:
51+
bool hasTrackFollower(const int iteration) const;
52+
53+
void buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack) final;
54+
bool materializeTrackExtensionCandidate(TrackITSExt& track, const typename TrackerTraits<NLayers>::TrackExtensionCandidateN& candidate, const int iteration) final;
55+
5056
IndexTableUtilsN* mDeviceIndexTableUtils;
5157
gpu::TimeFrameGPU<NLayers>* mTimeFrameGPU;
5258
};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
#ifndef ITSTRACKINGGPU_TRACKINGKERNELS_H_
1414
#define ITSTRACKINGGPU_TRACKINGKERNELS_H_
1515

16+
#include <array>
1617
#include <gsl/gsl>
1718

1819
#include "ITStracking/BoundedAllocator.h"
1920
#include "ITStracking/ROFLookupTables.h"
2021
#include "ITStracking/TrackingTopology.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2123
#include "ITStrackingGPU/Utils.h"
2224
#include "DetectorsBase/Propagator.h"
2325

@@ -35,6 +37,58 @@ class Cluster;
3537
class TrackITSExt;
3638
class ExternalAllocator;
3739

40+
inline constexpr int kTrackExtensionLaunchBlocks = 60;
41+
inline constexpr int kTrackExtensionLaunchThreadsPerBlock = 256;
42+
inline constexpr int kTrackExtensionLaunchThreads = kTrackExtensionLaunchBlocks * kTrackExtensionLaunchThreadsPerBlock;
43+
44+
template <int NLayers>
45+
void computeTrackExtensionCandidatesHandler(const TrackITSExt* tracks,
46+
const IndexTableUtils<NLayers>* utils,
47+
const typename ROFMaskTable<NLayers>::View& rofMask,
48+
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
49+
const Cluster** clusters,
50+
const unsigned char** usedClusters,
51+
const int** clustersIndexTables,
52+
const int** ROFClusters,
53+
const TrackingFrameInfo** trackingFrameInfo,
54+
TrackExtensionCandidate<NLayers>* candidates,
55+
int* candidateOffsets,
56+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
57+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
58+
const std::array<float, NLayers> layerRadii,
59+
const std::array<float, NLayers> layerxX0,
60+
const int nTracks,
61+
const int nLayers,
62+
const int phiBins,
63+
const int beamWidth,
64+
const bool extendTop,
65+
const bool extendBot,
66+
const float bz,
67+
const float maxChi2ClusterAttachment,
68+
const float maxChi2NDF,
69+
const float nSigmaCutPhi,
70+
const float nSigmaCutZ,
71+
const o2::base::Propagator* propagator,
72+
const o2::base::PropagatorF::MatCorrType matCorrType,
73+
gpu::Stream& stream);
74+
75+
template <int NLayers>
76+
void computeTrackExtensionResultsHandler(const TrackITSExt* tracks,
77+
const TrackExtensionCandidate<NLayers>* candidates,
78+
const int* candidateOffsets,
79+
TrackExtensionResult<NLayers>* results,
80+
const TrackingFrameInfo** trackingFrameInfo,
81+
const std::array<float, NLayers> layerxX0,
82+
const int nTracks,
83+
const int nLayers,
84+
const float bz,
85+
const float maxChi2ClusterAttachment,
86+
const float maxChi2NDF,
87+
const o2::base::Propagator* propagator,
88+
const o2::base::PropagatorF::MatCorrType matCorrType,
89+
const bool shiftRefToCluster,
90+
gpu::Stream& stream);
91+
3892
template <int NLayers>
3993
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
4094
const typename ROFMaskTable<NLayers>::View& rofMask,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <cuda_runtime.h>
1414

15+
#include <algorithm>
1516
#include <unistd.h>
1617
#include <vector>
1718

@@ -581,6 +582,73 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
581582
GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt)));
582583
}
583584

585+
template <int NLayers>
586+
void TimeFrameGPU<NLayers>::loadTrackExtensionStartTracksDevice()
587+
{
588+
GPUTimer timer("loading track extension start tracks");
589+
GPULog("gpu-transfer: loading {} track extension start tracks, for {:.2f} MB.", this->mTracks.size(), this->mTracks.size() * sizeof(o2::its::TrackITSExt) / constants::MB);
590+
mTrackExtensionStartTracksDevice = nullptr;
591+
mTrackExtensionStartTracks = bounded_vector<TrackITSExt>(this->mTracks.begin(), this->mTracks.end(), this->getMemoryPool().get());
592+
if (this->mTracks.empty()) {
593+
return;
594+
}
595+
allocMem(reinterpret_cast<void**>(&mTrackExtensionStartTracksDevice), mTrackExtensionStartTracks.size() * sizeof(o2::its::TrackITSExt), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
596+
GPUChkErrS(cudaMemcpy(mTrackExtensionStartTracksDevice, mTrackExtensionStartTracks.data(), mTrackExtensionStartTracks.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyHostToDevice));
597+
}
598+
599+
template <int NLayers>
600+
void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks)
601+
{
602+
GPUTimer timer("reserving track extension candidates");
603+
const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
604+
GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
605+
mTrackExtensionCandidates = bounded_vector<TrackExtensionCandidate<NLayers>>(nCandidates, {}, this->getMemoryPool().get());
606+
mTrackExtensionCandidatesDevice = nullptr;
607+
mTrackExtensionCandidateOffsetsDevice = nullptr;
608+
if (mTrackExtensionCandidates.empty()) {
609+
return;
610+
}
611+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
612+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidateOffsetsDevice), (nTracks + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
613+
}
614+
615+
template <int NLayers>
616+
void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const int nThreads, const int beamWidth)
617+
{
618+
GPUTimer timer("reserving track extension scratch");
619+
const size_t nHypotheses = static_cast<size_t>(std::max(1, nThreads)) * std::max(1, beamWidth);
620+
GPULog("gpu-allocation: reserving {} track extension hypotheses per scratch buffer, for {:.2f} MB each.", nHypotheses, nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>) / constants::MB);
621+
mActiveTrackExtensionHypothesesDevice = nullptr;
622+
mNextTrackExtensionHypothesesDevice = nullptr;
623+
if (nHypotheses == 0) {
624+
return;
625+
}
626+
allocMem(reinterpret_cast<void**>(&mActiveTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
627+
allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
628+
}
629+
630+
template <int NLayers>
631+
void TimeFrameGPU<NLayers>::createTrackExtensionResultsDevice(const size_t nTracks)
632+
{
633+
GPUTimer timer("reserving fitted track extension results");
634+
mNTrackExtensionResults = 0;
635+
if (nTracks == 0 || mTrackExtensionCandidateOffsetsDevice == nullptr) {
636+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(0, {}, this->getMemoryPool().get());
637+
mTrackExtensionResultsDevice = nullptr;
638+
return;
639+
}
640+
int nResults{0};
641+
GPUChkErrS(cudaMemcpy(&nResults, mTrackExtensionCandidateOffsetsDevice + nTracks, sizeof(int), cudaMemcpyDeviceToHost));
642+
mNTrackExtensionResults = nResults;
643+
GPULog("gpu-allocation: reserving {} fitted track extension results, for {:.2f} MB.", mNTrackExtensionResults, mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
644+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(mNTrackExtensionResults, {}, this->getMemoryPool().get());
645+
mTrackExtensionResultsDevice = nullptr;
646+
if (mTrackExtensionResults.empty()) {
647+
return;
648+
}
649+
allocMem(reinterpret_cast<void**>(&mTrackExtensionResultsDevice), mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
650+
}
651+
584652
template <int NLayers>
585653
void TimeFrameGPU<NLayers>::downloadCellsDevice()
586654
{
@@ -627,6 +695,28 @@ void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice()
627695
GPUChkErrS(cudaMemcpy(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost));
628696
}
629697

698+
template <int NLayers>
699+
void TimeFrameGPU<NLayers>::downloadTrackExtensionCandidatesDevice()
700+
{
701+
GPUTimer timer("downloading track extension candidates");
702+
GPULog("gpu-transfer: downloading {} track extension candidates, for {:.2f} MB.", mTrackExtensionCandidates.size(), mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
703+
if (mTrackExtensionCandidates.empty()) {
704+
return;
705+
}
706+
GPUChkErrS(cudaMemcpy(mTrackExtensionCandidates.data(), mTrackExtensionCandidatesDevice, mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>), cudaMemcpyDeviceToHost));
707+
}
708+
709+
template <int NLayers>
710+
void TimeFrameGPU<NLayers>::downloadTrackExtensionResultsDevice()
711+
{
712+
GPUTimer timer("downloading fitted track extension results");
713+
GPULog("gpu-transfer: downloading {} fitted track extension results, for {:.2f} MB.", mTrackExtensionResults.size(), mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
714+
if (mTrackExtensionResults.empty()) {
715+
return;
716+
}
717+
GPUChkErrS(cudaMemcpy(mTrackExtensionResults.data(), mTrackExtensionResultsDevice, mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>), cudaMemcpyDeviceToHost));
718+
}
719+
630720
template <int NLayers>
631721
void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers)
632722
{

0 commit comments

Comments
 (0)