ITS: remove thread-lock for det mode

f3sch · f3sch · commit aedd1d5e7a99 · 2025-07-21T17:42:40.000+02:00
Signed-off-by: Felix Schlepper &lt;felix.schlepper@cern.ch&gt;
diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h
@@ -27,9 +27,6 @@
 #define THRUST_NAMESPACE thrust::hip
 #endif
 
-#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999)
-#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999)
-
 namespace o2::its
 {
 
diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
@@ -21,8 +21,6 @@
 #include "ITStrackingGPU/TracerGPU.h"
 
 #include <unistd.h>
-#include <thread>
-#include <tuple>
 #include <vector>
 #include <fmt/format.h>
 
diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
@@ -898,10 +898,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
                                  gpu::Streams& streams)
 {
   for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) {
-    gpu::computeLayerTrackletsMultiROFKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                                     o2::gpu::CAMath::Min(nThreads, GPU_THREADS),
-                                                     0,
-                                                     streams[iLayer].get()>>>(
+    gpu::computeLayerTrackletsMultiROFKernel<true><<<nBlocks, nThreads, 0, streams[iLayer].get()>>>(
       utils,
       multMask,
       iLayer,
@@ -967,10 +964,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
                                    gpu::Streams& streams)
 {
   for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) {
-    gpu::computeLayerTrackletsMultiROFKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                                      o2::gpu::CAMath::Min(nThreads, GPU_THREADS),
-                                                      0,
-                                                      streams[iLayer].get()>>>(
+    gpu::computeLayerTrackletsMultiROFKernel<false><<<nBlocks, nThreads, 0, streams[iLayer].get()>>>(
       utils,
       multMask,
       iLayer,
@@ -1004,10 +998,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
     nTracklets[iLayer] = unique_end - tracklets_ptr;
     if (iLayer > 0) {
       GPUChkErrS(cudaMemsetAsync(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int), streams[iLayer].get()));
-      gpu::compileTrackletsLookupTableKernel<<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                               o2::gpu::CAMath::Min(nThreads, GPU_THREADS),
-                                               0,
-                                               streams[iLayer].get()>>>(
+      gpu::compileTrackletsLookupTableKernel<<<nBlocks, nThreads, 0, streams[iLayer].get()>>>(
         spanTracklets[iLayer],
         trackletsLUTsHost[iLayer],
         nTracklets[iLayer]);
@@ -1034,8 +1025,7 @@ void countCellsHandler(
   const int nBlocks,
   const int nThreads)
 {
-  gpu::computeLayerCellsKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                       o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::computeLayerCellsKernel<true><<<nBlocks, nThreads>>>(
     sortedClusters,           // const Cluster**
     unsortedClusters,         // const Cluster**
     tfInfo,                   // const TrackingFrameInfo**
@@ -1070,8 +1060,7 @@ void computeCellsHandler(
   const int nBlocks,
   const int nThreads)
 {
-  gpu::computeLayerCellsKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                        o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::computeLayerCellsKernel<false><<<nBlocks, nThreads>>>(
     sortedClusters,           // const Cluster**
     unsortedClusters,         // const Cluster**
     tfInfo,                   // const TrackingFrameInfo**
@@ -1101,8 +1090,7 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
                                         const int nBlocks,
                                         const int nThreads)
 {
-  gpu::computeLayerCellNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                                o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::computeLayerCellNeighboursKernel<true><<<nBlocks, nThreads>>>(
     cellsLayersDevice,
     neighboursLUT,
     neighboursIndexTable,
@@ -1136,8 +1124,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
                                   const int nThreads)
 {
 
-  gpu::computeLayerCellNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                                 o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::computeLayerCellNeighboursKernel<false><<<nBlocks, nThreads>>>(
     cellsLayersDevice,
     neighboursLUT,
     neighboursIndexTable,
@@ -1192,8 +1179,7 @@ void processNeighboursHandler(const int startLayer,
   thrust::device_vector<int, gpu::TypedAllocator<int>> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); // Shortcut: device_vector skips central memory management, we are relying on the contingency.
                                                                                                              // TODO: fix this.
 
-  gpu::processNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                       o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::processNeighboursKernel<true><<<nBlocks, nThreads>>>(
     startLayer,
     startLevel,
     allCellSeeds,
@@ -1215,8 +1201,7 @@ void processNeighboursHandler(const int startLayer,
 
   thrust::device_vector<int, gpu::TypedAllocator<int>> updatedCellId(foundSeedsTable.back(), 0, allocInt);
   thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> updatedCellSeed(foundSeedsTable.back(), allocCellSeed);
-  gpu::processNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                        o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::processNeighboursKernel<false><<<nBlocks, nThreads>>>(
     startLayer,
     startLevel,
     allCellSeeds,
@@ -1249,8 +1234,7 @@ void processNeighboursHandler(const int startLayer,
     foundSeedsTable.resize(lastCellSeedSize + 1);
     thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0);
 
-    gpu::processNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                         o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+    gpu::processNeighboursKernel<true><<<nBlocks, nThreads>>>(
       iLayer,
       --level,
       allCellSeeds,
@@ -1276,8 +1260,7 @@ void processNeighboursHandler(const int startLayer,
     updatedCellSeed.resize(foundSeeds);
     thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed());
 
-    gpu::processNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                                          o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+    gpu::processNeighboursKernel<false><<<nBlocks, nThreads>>>(
       iLayer,
       level,
       allCellSeeds,
@@ -1320,8 +1303,7 @@ void trackSeedHandler(CellSeed* trackSeeds,
                       const int nThreads)
 {
   thrust::device_vector<float> minPts(minPtsHost);
-  gpu::fitTrackSeedsKernel<<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
-                             o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
+  gpu::fitTrackSeedsKernel<<<nBlocks, nThreads>>>(
     trackSeeds,                           // CellSeed*
     foundTrackingFrameInfo,               // TrackingFrameInfo**
     tracks,                               // TrackITSExt*

Original file line number	Diff line number	Diff line change
`@@ -27,9 +27,6 @@`
`27`	`27`	`#define THRUST_NAMESPACE thrust::hip`
`28`	`28`	`#endif`
`29`	`29`
`30`		`-#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999)`
`31`		`-#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999)`
`32`		`-`
`33`	`30`	`namespace o2::its`
`34`	`31`	`{`
`35`	`32`