Skip to content

Commit a94fe97

Browse files
committed
ITS: GPU Vertexer
1 parent af2faab commit a94fe97

11 files changed

Lines changed: 279 additions & 634 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
4848
void loadVertices(const int);
4949

5050
///
51-
void createTrackletsLUTDevice(const int);
51+
void createTrackletsLUTDevice(const int, const int = nLayers - 1);
52+
void createTrackletsPerROFDevice(const int, const int);
5253
void loadTrackletsDevice();
5354
void loadTrackletsLUTDevice();
5455
void loadCellsDevice();
@@ -110,6 +111,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
110111
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
111112
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
112113
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
114+
int** getDeviceArrayTrackletsPerROF() const { return mTrackletsPerROFDeviceArray; }
113115
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
114116
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
115117
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
@@ -168,6 +170,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
168170
const int** mROFrameClustersDeviceArray;
169171
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
170172
Tracklet** mTrackletsDeviceArray;
173+
std::array<int*, 2> mTrackletsPerROFDevice;
171174
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
172175
std::array<int*, nLayers - 2> mCellsLUTDevice;
173176
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
@@ -176,6 +179,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
176179
int** mNeighboursCellDeviceArray;
177180
int** mNeighboursCellLUTDeviceArray;
178181
int** mTrackletsLUTDeviceArray;
182+
int** mTrackletsPerROFDeviceArray;
179183
std::array<CellSeed*, nLayers - 2> mCellsDevice;
180184
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
181185
CellSeed* mTrackSeedsDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,13 @@
1818

1919
#include <vector>
2020

21+
#include "ITStracking/IndexTableUtils.h"
22+
#include "ITStracking/MathUtils.h"
23+
#include "ITStracking/Cluster.h"
24+
2125
#include "GPUCommonDef.h"
2226
#include "GPUCommonHelpers.h"
27+
#include "GPUCommonMath.h"
2328

2429
#ifndef __HIPCC__
2530
#define THRUST_NAMESPACE thrust::cuda
@@ -155,6 +160,39 @@ class Streams
155160
std::vector<Stream> mStreams;
156161
};
157162

163+
#ifdef GPUCA_GPUCODE
164+
GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
165+
const o2::its::IndexTableUtils* utils,
166+
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
167+
{
168+
const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
169+
const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi;
170+
const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
171+
const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi;
172+
173+
if (zRangeMax < -utils->getLayerZ(layerIndex) ||
174+
zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
175+
return {};
176+
}
177+
178+
return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)),
179+
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
180+
o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)),
181+
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
182+
}
183+
184+
GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof, const int totROFs, const int layer, const int** roframesClus, const Cluster** clusters)
185+
{
186+
if (rof < 0 || rof >= totROFs) {
187+
return gpuSpan<const Cluster>();
188+
}
189+
const int start_clus_id{roframesClus[layer][rof]};
190+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
191+
const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id;
192+
return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta);
193+
}
194+
#endif
195+
158196
} // namespace gpu
159197
} // namespace o2::its
160198

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexerTraitsGPU.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,6 @@ class VertexerTraitsGPU final : public VertexerTraits
5050
TimeFrameGPUParameters mTfGPUParams;
5151
};
5252

53-
inline void VertexerTraitsGPU::adoptTimeFrame(TimeFrame<7>* tf) noexcept
54-
{
55-
mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<7>*>(tf);
56-
mTimeFrame = static_cast<TimeFrame<7>*>(tf);
57-
}
58-
5953
} // namespace o2::its
6054

6155
#endif

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -12,46 +12,57 @@
1212

1313
#ifndef ITSTRACKINGGPU_VERTEXINGKERNELS_H_
1414
#define ITSTRACKINGGPU_VERTEXINGKERNELS_H_
15-
#include "ITStracking/MathUtils.h"
16-
#include "ITStracking/Configuration.h"
17-
#include "ITStracking/ClusterLines.h"
18-
#include "ITStracking/Tracklet.h"
1915

2016
#include "ITStrackingGPU/Utils.h"
21-
#include "ITStrackingGPU/ClusterLinesGPU.h"
22-
#include "ITStrackingGPU/VertexerTraitsGPU.h"
23-
#include "ITStrackingGPU/TracerGPU.h"
17+
#include "ITStracking/BoundedAllocator.h"
18+
#include "ITStracking/Cluster.h"
19+
#include "ITStracking/IndexTableUtils.h"
20+
#include <gsl/span>
2421

25-
namespace o2::its::gpu
22+
namespace o2::its
23+
{
24+
namespace gpu
2625
{
2726
#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler
28-
template <TrackletMode Mode>
29-
GPUg() void trackleterKernelMultipleRof(
30-
const Cluster* clustersNextLayer, // 0 2
31-
const Cluster* clustersCurrentLayer, // 1 1
32-
const int* sizeNextLClusters,
33-
const int* sizeCurrentLClusters,
34-
const int* nextIndexTables,
35-
Tracklet* Tracklets,
36-
int* foundTracklets,
37-
const IndexTableUtils* utils,
38-
const unsigned int startRofId,
39-
const unsigned int rofSize,
40-
const float phiCut,
41-
const size_t maxTrackletsPerCluster);
27+
GPUg() void computeLayerTrackletMutliROFKernel(const Cluster** clusters,
28+
const unsigned int* nClusters,
29+
const int** rofClusters,
30+
const uint8_t** usedClusters,
31+
const int** clusterIndexTables,
32+
const float phiCut,
33+
Tracklet* tracklets,
34+
int* foundTracklets,
35+
const IndexTableUtils* utils,
36+
const int nRofs,
37+
const int deltaRof,
38+
const int* rofPV,
39+
int iteration,
40+
int verPerRofThreshold,
41+
int* rofFoundTrackletsOffsets,
42+
const int maxTrackletsPerCluster);
4243
#endif
43-
template <TrackletMode Mode>
44-
void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2
45-
const Cluster* clustersCurrentLayer, // 1 1
46-
const int* sizeNextLClusters,
47-
const int* sizeCurrentLClusters,
48-
const int* nextIndexTables,
49-
Tracklet* Tracklets,
50-
int* foundTracklets,
51-
const IndexTableUtils* utils,
52-
const unsigned int startRofId,
53-
const unsigned int rofSize,
54-
const float phiCut,
55-
const size_t maxTrackletsPerCluster = 1e2);
56-
} // namespace o2::its::gpu
44+
} // namespace gpu
45+
46+
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
47+
const uint8_t* multMask,
48+
const int nRofs,
49+
const int deltaROF,
50+
const int* rofPV,
51+
int verPerRofThreshold,
52+
const Cluster** clusters,
53+
std::vector<unsigned int> nClusters,
54+
const int** ROFClusters,
55+
const uint8_t** usedClusters,
56+
const int** clustersIndexTables,
57+
int** trackletsLUTs,
58+
int** trackletsPerROF,
59+
gsl::span<int*> trackletsLUTsHost,
60+
const int iteration,
61+
const float phiCut,
62+
const int maxTrackletsPerCluster,
63+
const int nBlocks,
64+
const int nThreads,
65+
gpu::Streams& streams);
66+
67+
} // namespace o2::its
5768
#endif

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,11 @@ void TimeFrameGPU<nLayers>::loadVertices(const int iteration)
274274
}
275275

276276
template <int nLayers>
277-
void TimeFrameGPU<nLayers>::createTrackletsLUTDevice(const int iteration)
277+
void TimeFrameGPU<nLayers>::createTrackletsLUTDevice(const int iteration, const int maxLayers)
278278
{
279279
GPUTimer timer(mGpuStreams[0], "creating tracklets LUTs");
280-
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
280+
const int n = o2::gpu::CAMath::Min(nLayers - 1, maxLayers);
281+
for (int iLayer{0}; iLayer < n; ++iLayer) {
281282
const int ncls = this->mClusters[iLayer].size() + 1;
282283
if (!iteration) {
283284
GPULog("gpu-transfer: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, iLayer, ncls * sizeof(int) / constants::MB);
@@ -291,6 +292,23 @@ void TimeFrameGPU<nLayers>::createTrackletsLUTDevice(const int iteration)
291292
}
292293
}
293294

295+
template <int nLayers>
296+
void TimeFrameGPU<nLayers>::createTrackletsPerROFDevice(const int iteration, const int nrof)
297+
{
298+
GPUTimer timer(mGpuStreams[0], "creating tracklets per ROF");
299+
for (int iLayer{0}; iLayer < 2; ++iLayer) {
300+
if (!iteration) {
301+
GPULog("gpu-transfer: creating tracklets per ROF for {} elements on layer {}, for {:.2f} MB.", nrof, iLayer, nrof * sizeof(int) / constants::MB);
302+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsPerROFDevice[iLayer]), nrof * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator());
303+
}
304+
GPUChkErrS(cudaMemsetAsync(mTrackletsPerROFDevice[iLayer], 0, nrof * sizeof(int), mGpuStreams[iLayer].get()));
305+
}
306+
if (!iteration) {
307+
allocMemAsync(reinterpret_cast<void**>(&mTrackletsPerROFDeviceArray), 2 * sizeof(int*), mGpuStreams[0], this->getExtAllocator());
308+
GPUChkErrS(cudaMemcpyAsync(mTrackletsPerROFDeviceArray, mTrackletsPerROFDevice.data(), mTrackletsPerROFDevice.size() * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
309+
}
310+
}
311+
294312
template <int nLayers>
295313
void TimeFrameGPU<nLayers>::createTrackletsBuffers()
296314
{

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ template <int nLayers>
2727
void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
2828
{
2929
mTimeFrameGPU->initialise(iteration, this->mTrkParams[iteration], nLayers);
30-
mTimeFrameGPU->loadClustersDevice(iteration);
31-
mTimeFrameGPU->loadUnsortedClustersDevice(iteration);
32-
mTimeFrameGPU->loadClustersIndexTables(iteration);
30+
// mTimeFrameGPU->loadClustersDevice(iteration);
31+
// mTimeFrameGPU->loadUnsortedClustersDevice(iteration);
32+
// mTimeFrameGPU->loadClustersIndexTables(iteration);
3333
mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration);
3434
mTimeFrameGPU->loadMultiplicityCutMask(iteration);
3535
mTimeFrameGPU->loadVertices(iteration);
36-
mTimeFrameGPU->loadROframeClustersDevice(iteration);
36+
// mTimeFrameGPU->loadROframeClustersDevice(iteration);
3737
mTimeFrameGPU->createUsedClustersDevice(iteration);
3838
mTimeFrameGPU->loadIndexTableUtils(iteration);
3939
}

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -92,26 +92,6 @@ struct TypedAllocator {
9292
ExternalAllocator* mInternalAllocator;
9393
};
9494

95-
GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
96-
const o2::its::IndexTableUtils& utils,
97-
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
98-
{
99-
const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
100-
const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi;
101-
const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
102-
const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi;
103-
104-
if (zRangeMax < -utils.getLayerZ(layerIndex) ||
105-
zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
106-
return getEmptyBinsRect();
107-
}
108-
109-
return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)),
110-
utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
111-
o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex, zRangeMax)),
112-
utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
113-
}
114-
11595
GPUd() bool fitTrack(TrackITSExt& track,
11696
int start,
11797
int end,
@@ -275,21 +255,6 @@ GPUd() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
275255
return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
276256
};
277257

278-
GPUd() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
279-
const int totROFs,
280-
const int layer,
281-
const int** roframesClus,
282-
const Cluster** clusters)
283-
{
284-
if (rof < 0 || rof >= totROFs) {
285-
return gpuSpan<const Cluster>();
286-
}
287-
const int start_clus_id{roframesClus[layer][rof]};
288-
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
289-
const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id;
290-
return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta);
291-
}
292-
293258
template <int nLayers>
294259
GPUg() void fitTrackSeedsKernel(
295260
CellSeed* trackSeeds,
@@ -554,7 +519,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
554519
const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate};
555520
const float sqInverseDeltaZ0{1.f / (math_utils::Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + constants::Tolerance)}; /// protecting from overflows adding the detector resolution
556521
const float sigmaZ{o2::gpu::CAMath::Sqrt(math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInverseDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * MSAngle))};
557-
const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)};
522+
const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)};
558523
if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) {
559524
continue;
560525
}

Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cxx

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,26 @@
1313

1414
#include "ITStrackingGPU/VertexingKernels.h"
1515
#include "ITStrackingGPU/VertexerTraitsGPU.h"
16+
#include "ITStracking/TrackingConfigParam.h"
1617

1718
namespace o2::its
1819
{
1920

2021
void VertexerTraitsGPU::initialise(const TrackingParameters& trackingParams, const int iteration)
2122
{
2223
mTimeFrameGPU->initialise(0, trackingParams, 3, &mIndexTableUtils, &mTfGPUParams);
23-
// mTimeFrameGPU->loadClustersDevice(iteration);
24-
// mTimeFrameGPU->loadUnsortedClustersDevice(iteration);
25-
// mTimeFrameGPU->loadClustersIndexTables(iteration);
26-
// mTimeFrameGPU->loadROframeClustersDevice(iteration);
24+
mTimeFrameGPU->loadClustersDevice(iteration);
25+
mTimeFrameGPU->loadUnsortedClustersDevice(iteration);
26+
mTimeFrameGPU->loadClustersIndexTables(iteration);
27+
mTimeFrameGPU->loadROframeClustersDevice(iteration);
28+
mTimeFrameGPU->createUsedClustersDevice(iteration);
29+
mTimeFrameGPU->loadIndexTableUtils(iteration);
30+
}
31+
32+
void VertexerTraitsGPU::adoptTimeFrame(TimeFrame<7>* tf) noexcept
33+
{
34+
mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<7>*>(tf);
35+
mTimeFrame = static_cast<TimeFrame<7>*>(tf);
2736
}
2837

2938
void VertexerTraitsGPU::updateVertexingParameters(const std::vector<VertexingParameters>& vrtPar, const TimeFrameGPUParameters& tfPar)
@@ -39,15 +48,30 @@ void VertexerTraitsGPU::updateVertexingParameters(const std::vector<VertexingPar
3948

4049
void VertexerTraitsGPU::computeTracklets(const int iteration)
4150
{
42-
if (mTimeFrameGPU->getClusters().empty()) {
43-
return;
44-
}
45-
for (short pivotRofId{0}; pivotRofId < mTimeFrameGPU->getNrof(); ++pivotRofId) {
46-
bool skipROF = iteration && (int)mTimeFrame->getPrimaryVertices(pivotRofId).size() > mVrtParams[iteration].vertPerRofThreshold;
47-
short startROF{std::max((short)0, static_cast<short>(pivotRofId - mVrtParams[iteration].deltaRof))};
48-
short endROF{std::min(static_cast<short>(mTimeFrame->getNrof()), static_cast<short>(pivotRofId + mVrtParams[iteration].deltaRof + 1))};
49-
}
50-
// gpu::computeTrackletsInROFsHandler
51+
const auto& conf = ITSGpuTrackingParamConfig::Instance();
52+
53+
mTimeFrameGPU->createTrackletsLUTDevice(iteration, 2);
54+
mTimeFrameGPU->createTrackletsPerROFDevice(iteration, mTimeFrameGPU->getNrof());
55+
countTrackletsInROFsHandler(mTimeFrameGPU->getDeviceIndexTableUtils(),
56+
mTimeFrameGPU->getDeviceMultCutMask(),
57+
mTimeFrameGPU->getNrof(),
58+
this->mVrtParams[iteration].deltaRof,
59+
mTimeFrameGPU->getDeviceROFramesPV(),
60+
mVrtParams[iteration].vertPerRofThreshold,
61+
mTimeFrameGPU->getDeviceArrayClusters(),
62+
mTimeFrameGPU->getClusterSizes(),
63+
mTimeFrameGPU->getDeviceROframeClusters(),
64+
mTimeFrameGPU->getDeviceArrayUsedClusters(),
65+
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
66+
mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
67+
mTimeFrameGPU->getDeviceArrayTrackletsPerROF(),
68+
mTimeFrameGPU->getDeviceTrackletsLUTs(), // Required for the exclusive sums
69+
iteration,
70+
mVrtParams[iteration].phiCut,
71+
200,
72+
conf.nBlocks,
73+
conf.nThreads,
74+
mTimeFrameGPU->getStreams());
5175
}
5276

5377
void VertexerTraitsGPU::computeTrackletMatching(const int iteration)

0 commit comments

Comments
 (0)