Skip to content

Commit be72b12

Browse files
committed
ITS: track and hard limit memory allocations + tbb
1 parent d4684e0 commit be72b12

19 files changed

Lines changed: 903 additions & 616 deletions

Detectors/ITSMFT/ITS/tracking/CMakeLists.txt

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# granted to it by virtue of its status as an Intergovernmental Organization
1010
# or submit itself to any jurisdiction.
1111

12+
#add_compile_options(-O0 -g -fPIC -fno-omit-frame-pointer)
1213
o2_add_library(ITStracking
1314
TARGETVARNAME targetName
1415
SOURCES src/ClusterLines.cxx
@@ -35,12 +36,8 @@ o2_add_library(ITStracking
3536
O2::ITSBase
3637
O2::ITSReconstruction
3738
O2::ITSMFTReconstruction
38-
O2::DataFormatsITS)
39-
40-
if (OpenMP_CXX_FOUND)
41-
target_compile_definitions(${targetName} PRIVATE WITH_OPENMP)
42-
target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX)
43-
endif()
39+
O2::DataFormatsITS
40+
PRIVATE_LINK_LIBRARIES TBB::tbb)
4441

4542
o2_add_library(ITSTrackingInterface
4643
TARGETVARNAME targetName
@@ -50,11 +47,6 @@ o2_add_library(ITSTrackingInterface
5047
O2::Framework
5148
O2::GPUTracking)
5249

53-
if (OpenMP_CXX_FOUND)
54-
target_compile_definitions(${targetName} PRIVATE WITH_OPENMP)
55-
target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX)
56-
endif()
57-
5850
o2_target_root_dictionary(ITStracking
5951
HEADERS include/ITStracking/ClusterLines.h
6052
include/ITStracking/Tracklet.h

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class TrackerTraitsGPU final : public TrackerTraits
4343

4444
const char* getName() const noexcept final { return "GPU"; }
4545
bool isGPU() const noexcept final { return true; }
46+
bool usesMemoryPool() const noexcept final { return false; }
4647

4748
// TimeFrameGPU information forwarding
4849
int getTFNumberOfClusters() const override;
@@ -63,4 +64,4 @@ inline void TrackerTraitsGPU<nLayers>::adoptTimeFrame(TimeFrame* tf)
6364
} // namespace its
6465
} // namespace o2
6566

66-
#endif
67+
#endif

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexerTraitsGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class VertexerTraitsGPU final : public VertexerTraits
5050

5151
virtual bool isGPU() final { return true; }
5252
virtual const char* getName() final { return "GPU"; }
53+
virtual bool usesMemoryPool() const noexcept { return false; }
5354

5455
protected:
5556
IndexTableUtils* mDeviceIndexTableUtils;
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
///
12+
/// \file BoundedAllocator.h
13+
/// \brief
14+
///
15+
16+
#ifndef TRACKINGITSU_INCLUDE_BOUNDEDALLOCATOR_H_
17+
#define TRACKINGITSU_INCLUDE_BOUNDEDALLOCATOR_H_
18+
19+
#include <limits>
20+
#include <memory_resource>
21+
#include <atomic>
22+
#include <new>
23+
#include <vector>
24+
#include <string>
25+
#include <exception>
26+
27+
#include "Framework/Logger.h"
28+
29+
namespace o2::its
30+
{
31+
32+
// 1. Thread-safe memory resource with global limit
33+
class BoundedMemoryResource final : public std::pmr::memory_resource
34+
{
35+
public:
36+
class MaxMemoryReached final : public std::bad_alloc
37+
{
38+
public:
39+
const char* what() const noexcept final
40+
{
41+
return "Reached set memory limit";
42+
}
43+
};
44+
45+
BoundedMemoryResource(const std::string& name, size_t maxBytes = 0, std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
46+
: mMaxMemory(maxBytes), mUpstream(upstream), mName(name) {}
47+
48+
void* do_allocate(size_t bytes, size_t alignment) final
49+
{
50+
size_t new_used{0}, current_used{mUsedMemory.load(std::memory_order_relaxed)};
51+
do {
52+
new_used = current_used + bytes;
53+
if (new_used > mMaxMemory) {
54+
++mCountMaxThrow;
55+
throw MaxMemoryReached();
56+
}
57+
} while (!mUsedMemory.compare_exchange_weak(current_used, new_used,
58+
std::memory_order_acq_rel,
59+
std::memory_order_relaxed));
60+
++mCountAllocate;
61+
return mUpstream->allocate(bytes, alignment);
62+
}
63+
64+
void do_deallocate(void* p, size_t bytes, size_t alignment) final
65+
{
66+
++mCountDeallocate;
67+
mUpstream->deallocate(p, bytes, alignment);
68+
mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
69+
}
70+
71+
bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final
72+
{
73+
return this == &other;
74+
}
75+
76+
size_t getUsedMemory() const noexcept { return mUsedMemory.load(); }
77+
size_t getMaxMemory() const noexcept { return mMaxMemory; }
78+
void setMaxMemory(size_t max)
79+
{
80+
if (mUsedMemory > max) {
81+
++mCountMaxThrow;
82+
throw MaxMemoryReached();
83+
}
84+
mMaxMemory = max;
85+
}
86+
87+
void print() const
88+
{
89+
LOGP(info, "BoundedAllocator:{} alloc={} dealloc={} maxthrow={} maxmem={} used={}", mName,
90+
mCountAllocate.load(std::memory_order_relaxed),
91+
mCountDeallocate.load(std::memory_order_relaxed),
92+
mCountMaxThrow.load(std::memory_order_relaxed),
93+
mMaxMemory,
94+
mUsedMemory.load(std::memory_order_relaxed));
95+
}
96+
97+
private:
98+
size_t mMaxMemory{std::numeric_limits<size_t>::max()};
99+
std::atomic<size_t> mCountMaxThrow{0};
100+
std::atomic<size_t> mCountAllocate{0};
101+
std::atomic<size_t> mCountDeallocate{0};
102+
std::atomic<size_t> mUsedMemory{0};
103+
std::pmr::memory_resource* mUpstream;
104+
std::string mName;
105+
};
106+
107+
template <typename T>
108+
using BoundedVector = std::pmr::vector<T>;
109+
110+
} // namespace o2::its
111+
112+
#endif

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ struct TrackingParameters {
9090
float CellsPerClusterLimit = 2.f;
9191
/// Fitter parameters
9292
o2::base::PropagatorImpl<float>::MatCorrType CorrType = o2::base::PropagatorImpl<float>::MatCorrType::USEMatCorrNONE;
93-
size_t MaxMemory = 12000000000UL;
9493
float MaxChi2ClusterAttachment = 60.f;
9594
float MaxChi2NDF = 30.f;
9695
std::vector<float> MinPt = {0.f, 0.f, 0.f, 0.f};
@@ -100,14 +99,17 @@ struct TrackingParameters {
10099
bool SaveTimeBenchmarks = false;
101100
bool DoUPCIteration = false;
102101
bool FataliseUponFailure = true;
103-
bool DropTFUponFailure = false;
104102
/// Cluster attachment
105103
bool UseTrackFollower = false;
106104
bool UseTrackFollowerTop = false;
107105
bool UseTrackFollowerBot = false;
108106
bool UseTrackFollowerMix = false;
109107
float TrackFollowerNSigmaCutZ = 1.f;
110108
float TrackFollowerNSigmaCutPhi = 1.f;
109+
110+
bool PrintMemory = false; // print allocator usage in epilog report
111+
size_t MaxMemory = 12000000000UL;
112+
bool DropTFUponFailure = false;
111113
};
112114

113115
struct VertexingParameters {
@@ -138,7 +140,9 @@ struct VertexingParameters {
138140
int zSpan = -1;
139141

140142
int nThreads = 1;
143+
bool PrintMemory = false; // print allocator usage in epilog report
141144
size_t MaxMemory = 12000000000UL;
145+
bool DropTFUponFailure = false;
142146
};
143147

144148
struct TimeFrameGPUParameters {

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ GPUhdi() constexpr std::array<float, LayersNumber> InverseZBinSize()
8484
{
8585
constexpr auto zSize = LayersZCoordinate();
8686
return std::array<float, LayersNumber>{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]),
87-
0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]),
88-
0.5f * ZBins / (zSize[6])};
87+
0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]),
88+
0.5f * ZBins / (zSize[6])};
8989
}
9090

9191
GPUhdi() constexpr float getInverseZCoordinate(const int layerIndex)

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h

Lines changed: 69 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "ITStracking/Tracklet.h"
3636
#include "ITStracking/IndexTableUtils.h"
3737
#include "ITStracking/ExternalAllocator.h"
38+
#include "ITStracking/BoundedAllocator.h"
3839

3940
#include "SimulationDataFormat/MCCompLabel.h"
4041
#include "SimulationDataFormat/MCTruthContainer.h"
@@ -59,6 +60,37 @@ class ROFRecord;
5960

6061
namespace its
6162
{
63+
64+
template <typename T>
65+
void deepVectorClear(std::vector<T>& vec)
66+
{
67+
std::vector<T>().swap(vec);
68+
}
69+
70+
template <typename T>
71+
void deepVectorClear(BoundedVector<T>& vec)
72+
{
73+
vec = BoundedVector<T>(vec.get_allocator().resource());
74+
}
75+
76+
template <typename T>
77+
void deepVectorClear(std::vector<BoundedVector<T>>& vec)
78+
{
79+
for (auto& v : vec) {
80+
v = BoundedVector<T>(v.get_allocator().resource());
81+
}
82+
}
83+
84+
template <typename T>
85+
void clearResizeBoundedVector(std::vector<BoundedVector<T>>& vec, size_t size, BoundedMemoryResource* bmr)
86+
{
87+
vec.clear();
88+
vec.reserve(size);
89+
for (size_t i{0}; i < size; ++i) {
90+
vec.emplace_back(bmr);
91+
}
92+
}
93+
6294
using Vertex = o2::dataformats::Vertex<o2::dataformats::TimeStamp<int>>;
6395

6496
template <int nLayers = 7>
@@ -164,8 +196,8 @@ struct TimeFrame {
164196
auto& getCellsLookupTable() { return mCellsLookupTable; }
165197
auto& getCellsNeighbours() { return mCellsNeighbours; }
166198
auto& getCellsNeighboursLUT() { return mCellsNeighboursLUT; }
167-
std::vector<Road<nLayers - 2>>& getRoads() { return mRoads; }
168-
std::vector<TrackITSExt>& getTracks(int rofId) { return mTracks[rofId]; }
199+
auto& getRoads() { return mRoads; }
200+
auto& getTracks(int rofId) { return mTracks[rofId]; }
169201
std::vector<MCCompLabel>& getTracksLabel(const int rofId) { return mTracksLabel[rofId]; }
170202
std::vector<MCCompLabel>& getLinesLabel(const int rofId) { return mLinesLabels[rofId]; }
171203
std::vector<std::pair<MCCompLabel, float>>& getVerticesMCRecInfo() { return mVerticesMCRecInfo; }
@@ -179,8 +211,12 @@ struct TimeFrame {
179211
auto getNumberOfExtendedTracks() const { return mNExtendedTracks; }
180212
auto getNumberOfUsedExtendedClusters() const { return mNExtendedUsedClusters; }
181213

214+
void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool) { mMemoryPool = pool; }
215+
auto& getMemoryPool() const noexcept { return mMemoryPool; }
216+
bool usesMemoryPool() const noexcept { return true; }
182217
bool checkMemory(unsigned long max) { return getArtefactsMemory() < max; }
183-
unsigned long getArtefactsMemory();
218+
unsigned long getArtefactsMemory() const;
219+
void printArtefactsMemory() const;
184220
int getROFCutClusterMult() const { return mCutClusterMult; };
185221
int getROFCutVertexMult() const { return mCutVertexMult; };
186222
int getROFCutAllMult() const { return mCutClusterMult + mCutVertexMult; }
@@ -189,7 +225,7 @@ struct TimeFrame {
189225
void computeTrackletsPerROFScans();
190226
void computeTracletsPerClusterScans();
191227
int& getNTrackletsROF(int rofId, int combId) { return mNTrackletsPerROF[combId][rofId]; }
192-
std::vector<Line>& getLines(int rofId) { return mLines[rofId]; }
228+
auto& getLines(int rofId) { return mLines[rofId]; }
193229
int getNLinesTotal() const
194230
{
195231
return std::accumulate(mLines.begin(), mLines.end(), 0, [](int sum, const auto& l) { return sum + l.size(); });
@@ -234,6 +270,7 @@ struct TimeFrame {
234270
void addClusterExternalIndexToLayer(int layer, const int idx) { mClusterExternalIndices[layer].push_back(idx); }
235271

236272
void resetVectors();
273+
void resetTracklets();
237274

238275
/// Debug and printing
239276
void checkTrackletLUTs();
@@ -279,29 +316,32 @@ struct TimeFrame {
279316
}
280317
}
281318

282-
std::array<std::vector<Cluster>, nLayers> mUnsortedClusters;
283-
std::vector<std::vector<Tracklet>> mTracklets;
284-
std::vector<std::vector<CellSeed>> mCells;
285-
std::vector<std::vector<o2::track::TrackParCovF>> mCellSeeds;
286-
std::vector<std::vector<float>> mCellSeedsChi2;
287-
std::vector<Road<nLayers - 2>> mRoads;
288-
std::vector<std::vector<TrackITSExt>> mTracks;
289-
std::vector<std::vector<int>> mCellsNeighbours;
290-
std::vector<std::vector<int>> mCellsLookupTable;
319+
std::array<BoundedVector<Cluster>, nLayers> mUnsortedClusters;
320+
std::vector<BoundedVector<Tracklet>> mTracklets;
321+
std::vector<BoundedVector<CellSeed>> mCells;
322+
std::vector<BoundedVector<o2::track::TrackParCovF>> mCellSeeds;
323+
std::vector<BoundedVector<float>> mCellSeedsChi2;
324+
BoundedVector<Road<nLayers - 2>> mRoads;
325+
std::vector<BoundedVector<TrackITSExt>> mTracks;
326+
std::vector<BoundedVector<int>> mCellsNeighbours;
327+
std::vector<BoundedVector<int>> mCellsLookupTable;
291328
std::vector<uint8_t> mMultiplicityCutMask;
292329

293330
const o2::base::PropagatorImpl<float>* mPropagatorDevice = nullptr; // Needed only for GPU
294-
void dropTracks()
331+
332+
void wipe()
295333
{
296-
for (auto& v : mTracks) {
334+
for (auto& v : mUnsortedClusters) {
297335
deepVectorClear(v);
298336
}
299-
}
300-
301-
template <typename T>
302-
void deepVectorClear(std::vector<T>& vec)
303-
{
304-
std::vector<T>().swap(vec);
337+
deepVectorClear(mTracks);
338+
deepVectorClear(mTracklets);
339+
deepVectorClear(mCells);
340+
deepVectorClear(mCellSeeds);
341+
deepVectorClear(mCellSeedsChi2);
342+
deepVectorClear(mRoads);
343+
deepVectorClear(mCellsNeighbours);
344+
deepVectorClear(mCellsLookupTable);
305345
}
306346

307347
virtual void prepareClusters(const TrackingParameters& trkParam, const int maxLayers);
@@ -341,11 +381,16 @@ struct TimeFrame {
341381
unsigned int mNoVertexROF = 0;
342382
std::vector<int> mTotVertPerIteration;
343383
// \Vertexer
384+
385+
std::shared_ptr<BoundedMemoryResource> mMemoryPool;
344386
};
345387

346388
template <int nLayers>
347389
inline gsl::span<const Vertex> TimeFrame<nLayers>::getPrimaryVertices(int rofId) const
348390
{
391+
if (mPrimaryVertices.empty()) {
392+
return {};
393+
}
349394
const int start = mROFramesPV[rofId];
350395
const int stop_idx = rofId >= mNrof - 1 ? mNrof : rofId + 1;
351396
int delta = mMultiplicityCutMask[rofId] ? mROFramesPV[stop_idx] - start : 0; // return empty span if Rof is excluded
@@ -364,6 +409,9 @@ inline gsl::span<const std::pair<MCCompLabel, float>> TimeFrame<nLayers>::getPri
364409
template <int nLayers>
365410
inline gsl::span<const Vertex> TimeFrame<nLayers>::getPrimaryVertices(int romin, int romax) const
366411
{
412+
if (mPrimaryVertices.empty()) {
413+
return {};
414+
}
367415
return {&mPrimaryVertices[mROFramesPV[romin]], static_cast<gsl::span<const Vertex>::size_type>(mROFramesPV[romax + 1] - mROFramesPV[romin])};
368416
}
369417

0 commit comments

Comments
 (0)