Skip to content

Commit aedd1d5

Browse files
committed
ITS: remove thread-lock for det mode
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 60408cd commit aedd1d5

3 files changed

Lines changed: 12 additions & 35 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
#define THRUST_NAMESPACE thrust::hip
2828
#endif
2929

30-
#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999)
31-
#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999)
32-
3330
namespace o2::its
3431
{
3532

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
#include "ITStrackingGPU/TracerGPU.h"
2222

2323
#include <unistd.h>
24-
#include <thread>
25-
#include <tuple>
2624
#include <vector>
2725
#include <fmt/format.h>
2826

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -898,10 +898,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
898898
gpu::Streams& streams)
899899
{
900900
for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) {
901-
gpu::computeLayerTrackletsMultiROFKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
902-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS),
903-
0,
904-
streams[iLayer].get()>>>(
901+
gpu::computeLayerTrackletsMultiROFKernel<true><<<nBlocks, nThreads, 0, streams[iLayer].get()>>>(
905902
utils,
906903
multMask,
907904
iLayer,
@@ -967,10 +964,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
967964
gpu::Streams& streams)
968965
{
969966
for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) {
970-
gpu::computeLayerTrackletsMultiROFKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
971-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS),
972-
0,
973-
streams[iLayer].get()>>>(
967+
gpu::computeLayerTrackletsMultiROFKernel<false><<<nBlocks, nThreads, 0, streams[iLayer].get()>>>(
974968
utils,
975969
multMask,
976970
iLayer,
@@ -1004,10 +998,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
1004998
nTracklets[iLayer] = unique_end - tracklets_ptr;
1005999
if (iLayer > 0) {
10061000
GPUChkErrS(cudaMemsetAsync(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int), streams[iLayer].get()));
1007-
gpu::compileTrackletsLookupTableKernel<<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1008-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS),
1009-
0,
1010-
streams[iLayer].get()>>>(
1001+
gpu::compileTrackletsLookupTableKernel<<<nBlocks, nThreads, 0, streams[iLayer].get()>>>(
10111002
spanTracklets[iLayer],
10121003
trackletsLUTsHost[iLayer],
10131004
nTracklets[iLayer]);
@@ -1034,8 +1025,7 @@ void countCellsHandler(
10341025
const int nBlocks,
10351026
const int nThreads)
10361027
{
1037-
gpu::computeLayerCellsKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1038-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1028+
gpu::computeLayerCellsKernel<true><<<nBlocks, nThreads>>>(
10391029
sortedClusters, // const Cluster**
10401030
unsortedClusters, // const Cluster**
10411031
tfInfo, // const TrackingFrameInfo**
@@ -1070,8 +1060,7 @@ void computeCellsHandler(
10701060
const int nBlocks,
10711061
const int nThreads)
10721062
{
1073-
gpu::computeLayerCellsKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1074-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1063+
gpu::computeLayerCellsKernel<false><<<nBlocks, nThreads>>>(
10751064
sortedClusters, // const Cluster**
10761065
unsortedClusters, // const Cluster**
10771066
tfInfo, // const TrackingFrameInfo**
@@ -1101,8 +1090,7 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
11011090
const int nBlocks,
11021091
const int nThreads)
11031092
{
1104-
gpu::computeLayerCellNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1105-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1093+
gpu::computeLayerCellNeighboursKernel<true><<<nBlocks, nThreads>>>(
11061094
cellsLayersDevice,
11071095
neighboursLUT,
11081096
neighboursIndexTable,
@@ -1136,8 +1124,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11361124
const int nThreads)
11371125
{
11381126

1139-
gpu::computeLayerCellNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1140-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1127+
gpu::computeLayerCellNeighboursKernel<false><<<nBlocks, nThreads>>>(
11411128
cellsLayersDevice,
11421129
neighboursLUT,
11431130
neighboursIndexTable,
@@ -1192,8 +1179,7 @@ void processNeighboursHandler(const int startLayer,
11921179
thrust::device_vector<int, gpu::TypedAllocator<int>> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); // Shortcut: device_vector skips central memory management, we are relying on the contingency.
11931180
// TODO: fix this.
11941181

1195-
gpu::processNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1196-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1182+
gpu::processNeighboursKernel<true><<<nBlocks, nThreads>>>(
11971183
startLayer,
11981184
startLevel,
11991185
allCellSeeds,
@@ -1215,8 +1201,7 @@ void processNeighboursHandler(const int startLayer,
12151201

12161202
thrust::device_vector<int, gpu::TypedAllocator<int>> updatedCellId(foundSeedsTable.back(), 0, allocInt);
12171203
thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> updatedCellSeed(foundSeedsTable.back(), allocCellSeed);
1218-
gpu::processNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1219-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1204+
gpu::processNeighboursKernel<false><<<nBlocks, nThreads>>>(
12201205
startLayer,
12211206
startLevel,
12221207
allCellSeeds,
@@ -1249,8 +1234,7 @@ void processNeighboursHandler(const int startLayer,
12491234
foundSeedsTable.resize(lastCellSeedSize + 1);
12501235
thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0);
12511236

1252-
gpu::processNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1253-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1237+
gpu::processNeighboursKernel<true><<<nBlocks, nThreads>>>(
12541238
iLayer,
12551239
--level,
12561240
allCellSeeds,
@@ -1276,8 +1260,7 @@ void processNeighboursHandler(const int startLayer,
12761260
updatedCellSeed.resize(foundSeeds);
12771261
thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed());
12781262

1279-
gpu::processNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1280-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1263+
gpu::processNeighboursKernel<false><<<nBlocks, nThreads>>>(
12811264
iLayer,
12821265
level,
12831266
allCellSeeds,
@@ -1320,8 +1303,7 @@ void trackSeedHandler(CellSeed* trackSeeds,
13201303
const int nThreads)
13211304
{
13221305
thrust::device_vector<float> minPts(minPtsHost);
1323-
gpu::fitTrackSeedsKernel<<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
1324-
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
1306+
gpu::fitTrackSeedsKernel<<<nBlocks, nThreads>>>(
13251307
trackSeeds, // CellSeed*
13261308
foundTrackingFrameInfo, // TrackingFrameInfo**
13271309
tracks, // TrackITSExt*

0 commit comments

Comments
 (0)