AliceO2/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx at 49b9f8da43870d41d9a50ff9f9cd07e109ac94b3 · alibuild/AliceO2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUTPCCFMCLabelFlattener.cxx
/// \author Felix Weiglhofer

#include "GPUTPCCFMCLabelFlattener.h"

#if !defined(GPUCA_GPUCODE)
#include "GPUHostDataTypes.h"
#endif

using namespace o2::gpu;
using namespace o2::gpu::tpccf;

#if !defined(GPUCA_GPUCODE)
void GPUTPCCFMCLabelFlattener::setGlobalOffsetsAndAllocate(
  GPUTPCClusterFinder& cls,
  GPUTPCLinearLabels& labels)
{
  uint32_t headerOffset = labels.header.size();
  uint32_t dataOffset = labels.data.size();

  cls.mPlabelsHeaderGlobalOffset = headerOffset;
  cls.mPlabelsDataGlobalOffset = dataOffset;

  for (Row row = 0; row < GPUTPCGeometry::NROWS; row++) {
    headerOffset += cls.mPclusterInRow[row];
    dataOffset += cls.mPlabelsInRow[row];
  }

  labels.header.resize(headerOffset);
  labels.data.resize(dataOffset);
}
#endif

template <>
GPUd() void GPUTPCCFMCLabelFlattener::Thread<GPUTPCCFMCLabelFlattener::setRowOffsets>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory&, processorType& clusterer)
{
#if !defined(GPUCA_GPUCODE)
  const Row row = get_global_id(0);
  const size_t clusterInRow = clusterer.mPclusterInRow[row];

  // Label Flattener assumes 1 label container per cluster,
  // but HIP clusters don't support MC labels yet and containers are missing for those clusters.
  // So append empty label container for each HIP cluster.
  // Note: This assumes that HIP cluster are store behind regular clusters!
  auto& labels = clusterer.mPlabelsByRow[row].data;
  labels.resize(std::max(labels.size(), clusterInRow));

  uint32_t labelCount = 0;

  for (size_t i = 0; i < clusterInRow; i++) {
    auto& interim = labels[i];
    labelCount += interim.labels.size();
  }

  clusterer.mPlabelsInRow[row] = labelCount;
#endif
}

template <>
GPUd() void GPUTPCCFMCLabelFlattener::Thread<GPUTPCCFMCLabelFlattener::flatten>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory&, processorType& clusterer, GPUTPCLinearLabels* out)
{
#if !defined(GPUCA_GPUCODE)
  uint32_t row = get_global_id(0);

  uint32_t headerOffset = clusterer.mPlabelsHeaderGlobalOffset;
  uint32_t dataOffset = clusterer.mPlabelsDataGlobalOffset;
  for (uint32_t r = 0; r < row; r++) {
    headerOffset += clusterer.mPclusterInRow[r];
    dataOffset += clusterer.mPlabelsInRow[r];
  }

  auto* labels = clusterer.mPlabelsByRow[row].data.data();
  for (uint32_t c = 0; c < clusterer.mPclusterInRow[row]; c++) {
    GPUTPCClusterMCInterim& interim = labels[c];
    assert(dataOffset + interim.labels.size() <= out->data.size());
    out->header[headerOffset] = dataOffset;
    std::copy(interim.labels.cbegin(), interim.labels.cend(), out->data.begin() + dataOffset);

    headerOffset++;
    dataOffset += interim.labels.size();
    interim = {}; // ensure interim labels are destroyed to prevent memleak
  }
#endif
}