1515#ifndef ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H
1616#define ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H
1717#ifndef GPUCA_GPUCODE_DEVICE
18+ #include < climits>
1819#include < cstdint>
1920#include < cstddef> // for size_t
2021#include < utility>
@@ -62,6 +63,9 @@ struct ClusterNative {
6263 static constexpr int scalePadPacked = 64 ; // < ~60 is needed for 0.1mm precision, but power of two avoids rounding
6364 static constexpr int scaleSigmaTimePacked = 32 ; // 1/32nd of pad/timebin precision for cluster size
6465 static constexpr int scaleSigmaPadPacked = 32 ;
66+ static constexpr int scaleSaturatedQtot = 8 ;
67+ static constexpr int maxRegularQtot = 25 * 1024 ;
68+ static constexpr int maxSaturatedQtot = (USHRT_MAX - maxRegularQtot) * scaleSaturatedQtot;
6569
6670 uint32_t timeFlagsPacked; // < Contains the time in the lower 24 bits in a packed format, contains the flags in the
6771 // upper 8 bits
@@ -83,7 +87,14 @@ struct ClusterNative {
8387 }
8488
8589 GPUd () uint16_t getQmax () const { return qMax; }
86- GPUd () uint16_t getQtot () const { return qTot; }
90+ GPUd () uint16_t getQtot () const
91+ {
92+ if (isSaturated ()) [[unlikely]] {
93+ auto sQtot = getSaturatedQtot ();
94+ return sQtot < USHRT_MAX ? sQtot : USHRT_MAX ;
95+ }
96+ return qTot;
97+ }
8798 GPUd () uint8_t getFlags () const { return timeFlagsPacked >> 24 ; }
8899 GPUd () uint32_t getTimePacked () const { return timeFlagsPacked & 0xFFFFFF ; }
89100 GPUd () void setTimePackedFlags (uint32_t timePacked, uint8_t flags)
@@ -119,7 +130,13 @@ struct ClusterNative {
119130 // / Y = (12.4 - 0.5 * (66 - 1)) * 4.16mm = -83.616mm
120131 GPUd () float getPad () const { return unpackPad (padPacked); }
121132 GPUd () void setPad (float pad) { padPacked = packPad (pad); }
122- GPUd () float getSigmaTime () const { return float (sigmaTimePacked) * (1 .f / scaleSigmaTimePacked); }
133+ GPUd () float getSigmaTime () const
134+ {
135+ if (isSaturated ()) [[unlikely]] {
136+ return 0 ;
137+ }
138+ return float (sigmaTimePacked) * (1 .f / scaleSigmaTimePacked);
139+ }
123140 GPUd () void setSigmaTime (float sigmaTime)
124141 {
125142 uint32_t tmp = sigmaTime * scaleSigmaTimePacked + 0.5 ;
@@ -138,6 +155,31 @@ struct ClusterNative {
138155 sigmaPadPacked = tmp;
139156 }
140157
158+ GPUd () bool isSaturated () const { return qTot > maxRegularQtot; }
159+
160+ GPUd () void setSaturatedQtot (uint32_t qtot)
161+ {
162+ this ->qTot = USHRT_MAX ;
163+ if (qtot < maxSaturatedQtot) {
164+ this ->qTot = ((qtot + scaleSaturatedQtot / 2 ) / scaleSaturatedQtot) + maxRegularQtot;
165+ }
166+ }
167+
168+ GPUd () uint32_t getSaturatedQtot () const
169+ {
170+ return uint32_t (qTot - maxRegularQtot) * scaleSaturatedQtot;
171+ }
172+
173+ GPUd () void setSaturatedTailLength (uint32_t tail)
174+ {
175+ sigmaTimePacked = encodeTailLength (tail);
176+ }
177+
178+ GPUd () uint32_t getSaturatedTailLength () const
179+ {
180+ return decodeTailLength (sigmaTimePacked);
181+ }
182+
141183 GPUd () bool operator <(const ClusterNative& rhs) const
142184 {
143185 if (this ->getTimePacked () != rhs.getTimePacked ()) {
@@ -167,6 +209,93 @@ struct ClusterNative {
167209 this ->qTot == rhs.qTot &&
168210 this ->getFlags () == rhs.getFlags ();
169211 }
212+
213+ private:
214+ static constexpr GPUd () uint32_t decodeTailLength(uint8_t code)
215+ {
216+ // Quantize tail length into 8bits.
217+ // Max expected length is 1500 tbs.
218+ // But allow outliers up to 8000 tbs.
219+ //
220+ // Full code layout is:
221+ //
222+ // | Code range | Decoded values | Step | Codes |
223+ // | ---------: | -------------: | ----: | ----: |
224+ // | `0..63` | `0..63` | `1` | `64` |
225+ // | `64..95` | `64..126` | `2` | `32` |
226+ // | `96..127` | `128..252` | `4` | `32` |
227+ // | `128..159` | `256..504` | `8` | `32` |
228+ // | `160..223` | `512..1520` | `16` | `64` |
229+ // | `224..239` | `1552..2032` | `32` | `16` |
230+ // | `240..255` | `2048..8048` | `400` | `16` |
231+ //
232+
233+ if (code < 64 ) {
234+ return code;
235+ }
236+
237+ if (code < 160 ) {
238+ uint32_t q = (uint32_t )code - 64u ;
239+ uint32_t exponent = (q >> 5 ) + 1u ; // 1, 2, 3
240+ uint32_t mantissa = q & 31u ; // 0..31
241+
242+ return (32u + mantissa) << exponent;
243+ }
244+
245+ if (code < 224 ) {
246+ return 512u + 16u * ((uint32_t )code - 160u );
247+ }
248+
249+ if (code < 240 ) {
250+ return 1552u + 32u * ((uint32_t )code - 224u );
251+ }
252+
253+ return 2048u + 400u * ((uint32_t )code - 240u );
254+ }
255+
256+ static constexpr GPUd () uint8_t encodeTailLength(uint32_t value)
257+ {
258+ // Saturate above representable range.
259+ if (value >= decodeTailLength (255 )) [[unlikely]] {
260+ return 255 ;
261+ }
262+
263+ // Binary search for the first code whose decoded value >= value.
264+ uint8_t lo = 0 ;
265+ uint8_t hi = 255 ;
266+
267+ while (lo < hi) {
268+ uint8_t mid = lo + ((hi - lo) >> 1 );
269+ uint32_t decoded = decodeTailLength (mid);
270+
271+ if (decoded < value) {
272+ lo = mid + 1 ;
273+ } else {
274+ hi = mid;
275+ }
276+ }
277+
278+ // lo is now the first code with decoded >= value.
279+ if (lo == 0 ) [[unlikely]] {
280+ return 0 ;
281+ }
282+
283+ uint8_t above_code = lo;
284+ uint8_t below_code = lo - 1 ;
285+
286+ uint32_t above_value = decodeTailLength (above_code);
287+ uint32_t below_value = decodeTailLength (below_code);
288+
289+ uint32_t above_error = above_value - value;
290+ uint32_t below_error = value - below_value;
291+
292+ // Tie-break downward.
293+ if (below_error <= above_error) {
294+ return below_code;
295+ } else {
296+ return above_code;
297+ }
298+ }
170299};
171300
172301// This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to
0 commit comments