@@ -96,21 +96,22 @@ void trackleterKernelHost(
9696 }
9797}
9898
99- void trackletSelectionKernelHost (
99+ static void trackletSelectionKernelHost (
100100 const gsl::span<const Cluster> clusters0, // 0
101101 const gsl::span<const Cluster> clusters1, // 1
102102 gsl::span<unsigned char > usedClusters0, // Layer 0
103103 gsl::span<unsigned char > usedClusters2, // Layer 2
104104 const gsl::span<const Tracklet>& tracklets01,
105105 const gsl::span<const Tracklet>& tracklets12,
106- bounded_vector<uint8_t >& usedTracklets,
106+ bounded_vector<bool >& usedTracklets,
107107 const gsl::span<int > foundTracklets01,
108108 const gsl::span<int > foundTracklets12,
109109 bounded_vector<Line>& lines,
110110 const gsl::span<const o2::MCCompLabel>& trackletLabels,
111111 bounded_vector<o2::MCCompLabel>& linesLabels,
112- const short pivotRofId,
113- const short targetRofId,
112+ const short targetRofId0,
113+ const short targetRofId2,
114+ bool safeWrites = false ,
114115 const float tanLambdaCut = 0 .025f ,
115116 const float phiCut = 0 .005f ,
116117 const int maxTracklets = static_cast <int >(1e2 ))
@@ -120,16 +121,27 @@ void trackletSelectionKernelHost(
120121 int validTracklets{0 };
121122 for (int iTracklet12{offset12}; iTracklet12 < offset12 + foundTracklets12[iCurrentLayerClusterIndex]; ++iTracklet12) {
122123 for (int iTracklet01{offset01}; iTracklet01 < offset01 + foundTracklets01[iCurrentLayerClusterIndex]; ++iTracklet01) {
124+ if (usedTracklets[iTracklet01]) {
125+ continue ;
126+ }
127+
123128 const auto & tracklet01{tracklets01[iTracklet01]};
124129 const auto & tracklet12{tracklets12[iTracklet12]};
125- if (tracklet01.rof [0 ] != targetRofId || tracklet12.rof [1 ] != targetRofId) {
130+
131+ if (tracklet01.rof [0 ] != targetRofId0 || tracklet12.rof [1 ] != targetRofId2) {
126132 continue ;
127133 }
134+
128135 const float deltaTanLambda{o2::gpu::GPUCommonMath::Abs (tracklet01.tanLambda - tracklet12.tanLambda )};
129136 const float deltaPhi{o2::gpu::GPUCommonMath::Abs (math_utils::smallestAngleDifference (tracklet01.phi , tracklet12.phi ))};
130- if (!usedTracklets[iTracklet01] && deltaTanLambda < tanLambdaCut && deltaPhi < phiCut && validTracklets != maxTracklets) {
131- usedClusters0[tracklet01.firstClusterIndex ] = true ;
132- usedClusters2[tracklet12.secondClusterIndex ] = true ;
137+ if (deltaTanLambda < tanLambdaCut && deltaPhi < phiCut && validTracklets != maxTracklets) {
138+ if (safeWrites) {
139+ __atomic_store_n (&usedClusters0[tracklet01.firstClusterIndex ], 1 , __ATOMIC_RELAXED);
140+ __atomic_store_n (&usedClusters2[tracklet12.secondClusterIndex ], 1 , __ATOMIC_RELAXED);
141+ } else {
142+ usedClusters0[tracklet01.firstClusterIndex ] = 1 ;
143+ usedClusters2[tracklet12.secondClusterIndex ] = 1 ;
144+ }
133145 usedTracklets[iTracklet01] = true ;
134146 lines.emplace_back (tracklet01, clusters0.data (), clusters1.data ());
135147 if (!trackletLabels.empty ()) {
@@ -325,31 +337,37 @@ void VertexerTraits::computeTrackletMatching(const int iteration)
325337 if (iteration && (int )mTimeFrame ->getPrimaryVertices (pivotRofId).size () > mVrtParams [iteration].vertPerRofThreshold ) {
326338 continue ;
327339 }
328- if (mTimeFrame ->getFoundTracklets (pivotRofId, 0 ). empty ( )) {
340+ if (! mTimeFrame ->getNTrackletsROF (pivotRofId, 0 )) {
329341 continue ;
330342 }
331343 mTimeFrame ->getLines (pivotRofId).reserve (mTimeFrame ->getNTrackletsCluster (pivotRofId, 0 ).size ());
332- bounded_vector<uint8_t > usedTracklets (mTimeFrame ->getFoundTracklets (pivotRofId, 0 ).size (), false , mMemoryPool .get ());
344+ bounded_vector<bool > usedTracklets (mTimeFrame ->getFoundTracklets (pivotRofId, 0 ).size (), false , mMemoryPool .get ());
333345 short startROF{std::max ((short )0 , static_cast <short >(pivotRofId - mVrtParams [iteration].deltaRof ))};
334346 short endROF{std::min (static_cast <short >(mTimeFrame ->getNrof ()), static_cast <short >(pivotRofId + mVrtParams [iteration].deltaRof + 1 ))};
335- for (short targetRofId = startROF; targetRofId < endROF; ++targetRofId) {
336- trackletSelectionKernelHost (
337- mTimeFrame ->getClustersOnLayer (targetRofId, 0 ),
338- mTimeFrame ->getClustersOnLayer (pivotRofId, 1 ),
339- mTimeFrame ->getUsedClustersROF (targetRofId, 0 ),
340- mTimeFrame ->getUsedClustersROF (targetRofId, 2 ),
341- mTimeFrame ->getFoundTracklets (pivotRofId, 0 ),
342- mTimeFrame ->getFoundTracklets (pivotRofId, 1 ),
343- usedTracklets,
344- mTimeFrame ->getNTrackletsCluster (pivotRofId, 0 ),
345- mTimeFrame ->getNTrackletsCluster (pivotRofId, 1 ),
346- mTimeFrame ->getLines (pivotRofId),
347- mTimeFrame ->getLabelsFoundTracklets (pivotRofId, 0 ),
348- mTimeFrame ->getLinesLabel (pivotRofId),
349- pivotRofId,
350- targetRofId,
351- mVrtParams [iteration].tanLambdaCut ,
352- mVrtParams [iteration].phiCut );
347+ for (short targetRofId0 = startROF; targetRofId0 < endROF; ++targetRofId0) {
348+ for (short targetRofId2 = startROF; targetRofId2 < endROF; ++targetRofId2) {
349+ if (std::abs (targetRofId0 - targetRofId2) > mVrtParams [iteration].deltaRof ) { // do not allow over 3 ROFs
350+ continue ;
351+ }
352+ trackletSelectionKernelHost (
353+ mTimeFrame ->getClustersOnLayer (targetRofId0, 0 ),
354+ mTimeFrame ->getClustersOnLayer (pivotRofId, 1 ),
355+ mTimeFrame ->getUsedClustersROF (targetRofId0, 0 ),
356+ mTimeFrame ->getUsedClustersROF (targetRofId2, 2 ),
357+ mTimeFrame ->getFoundTracklets (pivotRofId, 0 ),
358+ mTimeFrame ->getFoundTracklets (pivotRofId, 1 ),
359+ usedTracklets,
360+ mTimeFrame ->getNTrackletsCluster (pivotRofId, 0 ),
361+ mTimeFrame ->getNTrackletsCluster (pivotRofId, 1 ),
362+ mTimeFrame ->getLines (pivotRofId),
363+ mTimeFrame ->getLabelsFoundTracklets (pivotRofId, 0 ),
364+ mTimeFrame ->getLinesLabel (pivotRofId),
365+ targetRofId0,
366+ targetRofId2,
367+ mTaskArena ->max_concurrency () > 1 && mVrtParams [iteration].deltaRof != 0 ,
368+ mVrtParams [iteration].tanLambdaCut ,
369+ mVrtParams [iteration].phiCut );
370+ }
353371 }
354372 }
355373 });
0 commit comments