AliceO2/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h at bb9f0cc0ba65352f43091e57907fda9eed010ea8 · AliceO2Group/AliceO2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUTPCCFCheckPadBaseline.h
/// \author Felix Weiglhofer
///
/// Kernel identifies noisy TPC pads by analyzing charge patterns over time.
/// A pad is marked noisy if it exceeds thresholds for total or consecutive
/// time bins with charge, unless the charge exceeds a saturation threshold.
///
/// Optionally detects Highly Ionising Particle (HIP) tails: when a saturated
/// ADC value (1023) is found, the tail region on the triggering pad and its
/// neighbors is zeroed in the charge map until an exponential charge filter
/// drops below a configurable threshold.

#ifndef O2_GPU_GPU_TPC_CF_CHECK_PAD_BASELINE_H
#define O2_GPU_GPU_TPC_CF_CHECK_PAD_BASELINE_H

#include "GPUGeneralKernels.h"
#include "GPUConstantMem.h"
#include "GPUTPCGeometry.h"

#include "clusterFinderDefs.h"
#include "CfArray2D.h"

namespace o2::gpu
{

struct HIPTailDescriptor {
  uint32_t iPrev;
  uint32_t iNext;
  uint16_t pad;
  uint16_t tailStart;
  uint16_t tailEnd;
  float qTot;
  float qMax;
};

class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
{

 public:
  enum {
    PadsPerCacheline = TPCMapMemoryLayout<uint16_t>::Width,
    TimebinsPerCacheline = TPCMapMemoryLayout<uint16_t>::Height,
    EntriesPerCacheline = PadsPerCacheline * TimebinsPerCacheline,
    NumOfCachedPads = GPUCA_WARP_SIZE / TimebinsPerCacheline,
    NumOfCachedTBs = TimebinsPerCacheline * 8,
    // Threads index shared memory as [iThread / MaxNPadsPerRow][iThread % MaxNPadsPerRow].
    // Rounding up to a multiple of PadsPerCacheline ensures iThread / MaxNPadsPerRow < NumOfCachedTBs
    // for all threads, avoiding out-of-bounds access.
    MaxNPadsPerRow = CAMath::nextMultipleOf<PadsPerCacheline>(GPUTPCGeometry::MaxNPadsPerRow()),

    MaxADC = 1023,

    NThreads = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFCheckPadBaseline),
    SSClusterPadWidth = 5,
  };

  union HipTailRange {
    struct {
      int16_t start;
      int16_t end;
    };

    // Be careful with using default initialized values.
    // Need default constructor, so can be placed in shared memory.
    // Might be zero initialized, but invalid tail needs start = end = -1 instead.
    GPUdDefault() HipTailRange() = default;
    GPUdi() HipTailRange(int16_t st, int16_t e) : start(st), end(e) {}

    GPUdi() bool HasValue() const { return start > -1; }
    GPUdi() bool IsOpen() const { return start > -1 && end < 0; }

    GPUdi() void SetOpen(int16_t st)
    {
      start = st;
      end = -1;
    }

    GPUdi() int16_t Length() const { return end - start; }

    GPUdi() void Reset() { start = end = -1; }
  };

  struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int16_t, NThreads> {
    tpccf::Charge charges[NumOfCachedTBs][MaxNPadsPerRow];
    HipTailRange tails[MaxNPadsPerRow];
    uint8_t tailsClosedPad[MaxNPadsPerRow];
    HipTailRange tailsClosed[MaxNPadsPerRow];
    uint32_t tailsClosedStoreIdx[MaxNPadsPerRow];
    tpccf::Charge tailQTotScratch[NThreads];
    tpccf::Charge tailQMaxScratch[NThreads];
    uint32_t tailStoreBase;
  };

  // Accumulated values from scanning cached charges in a pad
  struct PadChargeAccu {
    int32_t totalCharges = 0;
    int32_t consecCharges = 0;
    int32_t maxConsecCharges = 0;
    tpccf::Charge maxCharge = 0;
    int16_t HIPtb = -1;
    int16_t aboveThresholdStart = -1; // first TB of current above-hipTailThreshold streak; used to extend the tail back over the rising edge before saturation
    HipTailRange activeHIPTail{-1, -1};
    tpccf::Charge tailFilterCharge = 0;
  };

  typedef GPUTPCClusterFinder processorType;
  GPUhdi() static processorType* Processor(GPUConstantMem& processors)
  {
    return processors.tpcClusterer;
  }

  GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep()
  {
    return gpudatatypes::RecoStep::TPCClusterFinding;
  }

  static int32_t GetNBlocks(bool isGPU)
  {
    // Important to exclude rightmost padding from Pad Filter.
    // There's nothing to filter there and padding is counted as start of a row, so it causes an overflow in the row count.
    const int32_t nBlocksCPU = (TPC_CLUSTERER_STRIDED_PAD_COUNT - GPUCF_PADDING_PAD) / PadsPerCacheline;
    return isGPU ? GPUTPCGeometry::NROWS : nBlocksCPU;
  }

  template <int32_t iKernel = defaultKernel>
  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);

 private:
  GPUd() static void CheckBaselineGPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
  GPUd() static void CheckBaselineCPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);

  GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge);
};

class GPUTPCCFHIPTailConnector : public GPUKernelTemplate
{
 public:
  enum {
    MaxHIPTails = 1 << 15,
    MaxHIPTailsPerRow = MaxHIPTails,
  };

  struct GPUSharedMemory {
  };

  typedef GPUTPCClusterFinder processorType;
  GPUhdi() static processorType* Processor(GPUConstantMem& processors)
  {
    return processors.tpcClusterer;
  }

  GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep()
  {
    return gpudatatypes::RecoStep::TPCClusterFinding;
  }

  template <int32_t iKernel = defaultKernel>
  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
};

class GPUTPCCFHIPClusterizer : public GPUKernelTemplate
{
 public:
  enum {
    MaxHIPTails = GPUTPCCFHIPTailConnector::MaxHIPTails,
    MaxHIPTailsPerRow = GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow,
  };

  struct GPUSharedMemory {
  };

  typedef GPUTPCClusterFinder processorType;
  GPUhdi() static processorType* Processor(GPUConstantMem& processors)
  {
    return processors.tpcClusterer;
  }

  GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep()
  {
    return gpudatatypes::RecoStep::TPCClusterFinding;
  }

  template <int32_t iKernel = defaultKernel>
  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, uint8_t onlyMC);
};

} // namespace o2::gpu

#endif