Skip to content

Commit 4ca1149

Browse files
committed
Various fixes for HIP cluster. REVIEW IF THESE ARE REALLY NEEDED.
1 parent 432ec89 commit 4ca1149

3 files changed

Lines changed: 27 additions & 16 deletions

File tree

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,17 +1209,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12091209
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow));
12101210
}
12111211

1212-
// TODO: Move this right after CheckPadBaseline once tail zeroing is moved into this kernel.
1213-
// The mPnHIPTails counter zeroing will then also need to be adjusted accordingly.
1214-
if (rec()->GetParam().rec.tpc.hipTailFilter) {
1215-
runKernel<GPUTPCCFHIPClusterizer>({GetGridBlk(1, lane), {iSector}});
1216-
}
1217-
1218-
if (clusterer.mPmemory->counters.nClusters == 0) {
1219-
return;
1220-
}
1221-
1222-
if (GetProcessingSettings().nn.applyNNclusterizer) {
1212+
const auto nRegularClusters = clusterer.mPmemory->counters.nClusters;
1213+
if (nRegularClusters != 0) {
1214+
if (GetProcessingSettings().nn.applyNNclusterizer) {
12231215
#ifdef GPUCA_HAS_ONNX
12241216
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane];
12251217
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
@@ -1359,12 +1351,27 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13591351
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
13601352
}
13611353

1362-
if (doGPU && propagateMCLabels) {
1363-
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane);
1364-
if (doGPU) {
1354+
if (doGPU && propagateMCLabels) {
1355+
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane);
1356+
if (doGPU) {
1357+
SynchronizeStream(lane);
1358+
}
1359+
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels
1360+
}
1361+
}
1362+
1363+
// TODO: Move this right after CheckPadBaseline once tail zeroing is moved into this kernel.
1364+
// The mPnHIPTails counter zeroing will then also need to be adjusted accordingly.
1365+
if (rec()->GetParam().rec.tpc.hipTailFilter) {
1366+
runKernel<GPUTPCCFHIPClusterizer>({GetGridBlk(1, lane), {iSector}});
1367+
if (doGPU && (nRegularClusters == 0 || GetProcessingSettings().debugLevel >= 3)) {
1368+
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
13651369
SynchronizeStream(lane);
13661370
}
1367-
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels
1371+
}
1372+
1373+
if (clusterer.mPmemory->counters.nClusters == 0) {
1374+
return;
13681375
}
13691376

13701377
if (GetProcessingSettings().debugLevel >= 3) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
444444
merged[i] = false;
445445
}
446446

447+
tpccf::SizeT nCreatedClusters = 0;
447448
for (uint32_t i = 0; i < n; i++) {
448449
if (merged[i]) {
449450
continue;
@@ -511,6 +512,9 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
511512
uint32_t index = CAMath::AtomicAdd(&clusterer.mPclusterInRow[row], 1u);
512513
if (index < clusterer.mNMaxClusterPerRow) {
513514
clusterer.mPclusterByRow[clusterer.mNMaxClusterPerRow * row + index] = cn;
515+
nCreatedClusters++;
514516
}
515517
}
518+
519+
clusterer.mPmemory->counters.nClusters += nCreatedClusters;
516520
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class GPUTPCCFHIPClusterizer : public GPUKernelTemplate
151151
{
152152
public:
153153
enum {
154-
MaxHIPTails = 256,
154+
MaxHIPTails = 1 << 15,
155155
};
156156

157157
struct GPUSharedMemory {

0 commit comments

Comments
 (0)