Skip to content

Commit 2f86036

Browse files
Barthelemyknopers8
andauthored
[QC-1037] IncreasingEntries fault tolerance (#2020)
* [QC-1037] IncreasingEntries: tolerate a number of bad cycles * [QC-1037] IncreasingEntries: tolerate a number of bad cycles * Update Modules/Common/src/IncreasingEntries.cxx Co-authored-by: Piotr Konopka <piotr.jan.konopka@cern.ch> * change name of parameter * doc * fix format * format --------- Co-authored-by: Piotr Konopka <piotr.jan.konopka@cern.ch>
1 parent c518497 commit 2f86036

3 files changed

Lines changed: 54 additions & 26 deletions

File tree

Modules/Common/include/Common/IncreasingEntries.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace o2::quality_control_modules::common
2424
{
2525

2626
/// \brief Check if the number of Entries has increased or not
27-
/// If it does not increase, the quality is bad.
27+
/// If it does not increase over the past N cycles (N=1 by default), the quality is bad.
2828
/// The behaviour can be modified with the customParameter "mustIncrease". If set to "false",
2929
/// it will actually have a bad quality if the number of entries increases.
3030
class IncreasingEntries : public o2::quality_control::checker::CheckInterface
@@ -42,13 +42,23 @@ class IncreasingEntries : public o2::quality_control::checker::CheckInterface
4242
std::string getAcceptedType() override;
4343

4444
private:
45-
std::map<std::string, double> mLastEntries;
45+
std::map<std::string, double> mLastEntries; // moName -> number of entries
46+
47+
// count the number of faults we have seen in a row for each object
48+
std::map<std::string, size_t> mMoFaultCount; // moName -> number of faults in a row
49+
50+
// the pave text with the error message
4651
std::shared_ptr<TPaveText> mPaveText;
47-
bool mMustIncrease = true;
52+
4853
// store the faults to beautify them later
4954
std::vector<std::string> mFaultyObjectsNames;
5055

51-
ClassDefOverride(IncreasingEntries, 2);
56+
// decides whether the number of entries must increase or it must remain the same
57+
bool mMustIncrease = true;
58+
// The number of cycles during which the number of entries did not move until we set the quality bad.
59+
int mBadCyclesLimit = 1;
60+
61+
ClassDefOverride(IncreasingEntries, 3);
5262
};
5363

5464
} // namespace o2::quality_control_modules::common

Modules/Common/src/IncreasingEntries.cxx

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,23 @@ namespace o2::quality_control_modules::common
3434

3535
void IncreasingEntries::configure()
3636
{
37-
try {
38-
mMustIncrease = parseBoolParam(mCustomParameters, "default", "default", "mustIncrease");
39-
} catch (AliceO2::Common::ObjectNotFoundError& exc) {
40-
mMustIncrease = true; // if not there, default behaviour
41-
}
37+
auto option = mCustomParameters.atOptional("mustIncrease");
38+
mMustIncrease = option.has_value() ? decodeBool(option.value()) : true;
4239
ILOG(Debug, Support) << "mustIncrease: " << mMustIncrease << ENDM;
4340

41+
option = mCustomParameters.atOptional("nBadCyclesLimit");
42+
mBadCyclesLimit = option.has_value() ? stoi(option.value()) : 1;
43+
ILOG(Debug, Support) << "nBadCyclesLimit: " << mBadCyclesLimit << ENDM;
44+
45+
mPaveText = make_shared<TPaveText>(1, 0.125, 0.6, 0, "NDC");
46+
mPaveText->SetFillColor(kRed);
47+
mPaveText->SetMargin(0);
4448
if (mMustIncrease) {
45-
mPaveText = make_shared<TPaveText>(1, 0.125, 0.6, 0, "NDC");
4649
mPaveText->AddText("Number of Entries has *not* changed");
47-
mPaveText->AddText("in the past cycle");
48-
mPaveText->SetFillColor(kRed);
49-
mPaveText->SetMargin(0);
50+
mPaveText->AddText(string("in the past ") + mBadCyclesLimit + " cycle(s)");
5051
} else {
51-
mPaveText = make_shared<TPaveText>(1, 0.125, 0.6, 0, "NDC");
5252
mPaveText->AddText("Number of Entries has *changed*");
53-
mPaveText->AddText("in the past cycle");
54-
mPaveText->SetFillColor(kRed);
55-
mPaveText->SetMargin(0);
53+
mPaveText->AddText(string("in the past ") + mBadCyclesLimit + " cycle(s)");
5654
}
5755
}
5856

@@ -68,19 +66,27 @@ Quality IncreasingEntries::check(std::map<std::string, std::shared_ptr<MonitorOb
6866
continue;
6967
}
7068

71-
double previousNumberEntries = mLastEntries.count(moName) > 0 ? mLastEntries.at(moName) : 0;
72-
double currentNumberEntries = histo->GetEntries();
69+
const double previousNumberEntries = mLastEntries.count(moName) > 0 ? mLastEntries.at(moName) : 0;
70+
const double currentNumberEntries = histo->GetEntries();
71+
size_t faultCount = mMoFaultCount.count(moName) > 0 ? mMoFaultCount.at(moName) : 0;
7372

74-
if (mMustIncrease && previousNumberEntries == currentNumberEntries) {
75-
result = Quality::Bad;
76-
result.addReason(FlagReasonFactory::NoDetectorData(), "Number of entries stopped increasing.");
77-
mFaultyObjectsNames.push_back(mo->getName());
78-
} else if (!mMustIncrease && previousNumberEntries != currentNumberEntries) {
73+
if (mMustIncrease == (previousNumberEntries == currentNumberEntries)) {
74+
faultCount++;
75+
} else {
76+
faultCount = 0;
77+
}
78+
79+
if (faultCount >= mBadCyclesLimit) {
7980
result = Quality::Bad;
80-
result.addReason(FlagReasonFactory::Unknown(), "Number of entries has increased.");
8181
mFaultyObjectsNames.push_back(mo->getName());
82+
if (mMustIncrease) {
83+
result.addReason(FlagReasonFactory::NoDetectorData(), "Number of entries stopped increasing.");
84+
} else {
85+
result.addReason(FlagReasonFactory::Unknown(), "Number of entries has increased.");
86+
}
8287
}
8388

89+
mMoFaultCount[moName] = faultCount;
8490
mLastEntries[moName] = currentNumberEntries;
8591
}
8692
return result;

doc/Advanced.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1478,7 +1478,7 @@ One can also enable publishing metrics related to CPU/memory usage. To do so, us
14781478

14791479
## Common check `IncreasingEntries`
14801480

1481-
This check make sures that the number of entries has increased in the past cycle. If not it will display a pavetext
1481+
This check make sures that the number of entries has increased in the past cycle(s). If not, it will display a pavetext
14821482
on the plot and set the quality to bad.
14831483

14841484
If you use `SetBinContent` the number of entries does not increase creating a false positive. Please call `ResetStats()`
@@ -1491,6 +1491,18 @@ The behaviour of the check can be inverted by setting the customparameter "mustI
14911491
}
14921492
```
14931493

1494+
The number of cycles during which we tolerate increasing (or not respectively) the number of entries can be set with the custom parameter `nBadCyclesLimit`:
1495+
```
1496+
"extendedCheckParameters": {
1497+
"default": {
1498+
"default": {
1499+
"nBadCyclesLimit": "3",
1500+
}
1501+
}
1502+
}
1503+
```
1504+
In the example above, the quality goes to bad when there are 3 cycles in a row with no increase in the number of entries.
1505+
14941506
## Update the shmem segment size of a detector
14951507

14961508
In consul go to `o2/runtime/aliecs/defaults` and modify the file corresponding to the detector: [det]_qc_shm_segment_size

0 commit comments

Comments
 (0)