Skip to content

Commit d8f8aa6

Browse files
committed
wip: selectively applicable conditions and metrics
save a list of refrences to entries that a Condition object wont be evaulated against. using Condition.Match with these data will return inconclusive results save a reference to the listdata entry that a Metric object was generated from. if such a reference is present, and the Condition object has it in its skip list, it wont be used when building the perf threshold string add helper function that looks through a condition list, takes out Condition objects where specialized keywords are present, then filters to Condition objects that have a generallized keyword present. These generallized versions can have entries added to their evaluation skip list improve .String() on Condition objects, add more log points, add helper function for slice subtraction use check_drivesize as a testing ground for these changes. If '<drive> used_pct' keyword is present, it will be taken as a specialized keyword, and generallized 'used_pct' containing Conditions will have the drive entry added to their blacklist
1 parent 46fe963 commit d8f8aa6

5 files changed

Lines changed: 139 additions & 37 deletions

File tree

pkg/snclient/check_drivesize.go

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,26 @@ func (l *CheckDrivesize) isExcluded(drive map[string]string, excludes []string)
298298
return false
299299
}
300300

301-
func (l *CheckDrivesize) addMetrics(drive string, check *CheckData, usage *disk.UsageStat, magic float64) {
301+
func (l *CheckDrivesize) handleDriveUsagePctThresholds(driveName string, check *CheckData, driveEntry *map[string]string) {
302+
// convert '<drive> used_pct' keywords in conditions to '<drive> used %' as that matches the metric name
303+
convertDriveUsagePctMetric1 := fmt.Sprintf("%s used_pct", driveName)
304+
305+
// metrics are normally added if the operand is simply 'used' , 'used_pct' , 'used_bytes' etc. and do not have a drive prefix
306+
// detect conditions where the operand is named '<drive> used %', this is the default way snclient names percent usage metrics.
307+
// if there is a condition using that as an operand, add usage metrics for that drive as well. during the metrics condition checking, they will take effect.
308+
// this helps to check usage metrics specific to drives.
309+
driveUsagePctMetric := fmt.Sprintf("%s used %%", driveName)
310+
311+
check.warnThreshold = check.TransformMultipleKeywords([]string{convertDriveUsagePctMetric1}, driveUsagePctMetric, check.warnThreshold)
312+
check.critThreshold = check.TransformMultipleKeywords([]string{convertDriveUsagePctMetric1}, driveUsagePctMetric, check.critThreshold)
313+
check.okThreshold = check.TransformMultipleKeywords([]string{convertDriveUsagePctMetric1}, driveUsagePctMetric, check.okThreshold)
314+
315+
check.warnThreshold.disableGenerallizedConditionsForEntry(check, driveEntry, []string{driveUsagePctMetric}, []string{"used", "used_pct"})
316+
check.critThreshold.disableGenerallizedConditionsForEntry(check, driveEntry, []string{driveUsagePctMetric}, []string{"used", "used_pct"})
317+
check.okThreshold.disableGenerallizedConditionsForEntry(check, driveEntry, []string{driveUsagePctMetric}, []string{"used", "used_pct"})
318+
}
319+
320+
func (l *CheckDrivesize) addMetrics(drive *map[string]string, check *CheckData, usage *disk.UsageStat, magic float64) {
302321
total := usage.Total
303322
if !l.freespaceIgnoreReserved {
304323
total = usage.Used + usage.Free // use this total instead of usage.Total to account in the root reserved space
@@ -307,34 +326,34 @@ func (l *CheckDrivesize) addMetrics(drive string, check *CheckData, usage *disk.
307326
if check.HasThreshold("free") || check.HasThreshold("free_pct") || check.HasThreshold("free_bytes") {
308327
check.warnThreshold = check.TransformMultipleKeywords([]string{"free_pct", "free_bytes"}, "free", check.warnThreshold)
309328
check.critThreshold = check.TransformMultipleKeywords([]string{"free_pct", "free_bytes"}, "free", check.critThreshold)
310-
check.AddBytePercentMetrics("free", drive+" free", magic*float64(usage.Free), magic*float64(total))
329+
perfLabel := fmt.Sprintf("%s free", (*drive)["drive"])
330+
check.AddBytePercentMetrics("free", perfLabel, magic*float64(usage.Free), magic*float64(total))
311331
}
312332

313-
// convert '<drive> used_pct' keywords in conditions to '<drive> used %' as that matches the metric name
314-
convertDriveUsagePctMetric1 := fmt.Sprintf("%s used_pct", drive)
315-
// metrics are normally added if the operand is simply 'used' , 'used_pct' , 'used_bytes' etc. and do not have a drive prefix
316-
// detect conditions where the operand is named '<drive> used %', this is the default way snclient names percent usage metrics.
317-
// if there is a condition using that as an operand, add usage metrics for that drive as well. during the metrics condition checking, they will take effect.
318-
// this helps to check usage metrics specific to drives.
319333
driveUsagePctMetric := fmt.Sprintf("%s used %%", drive)
320334

321-
check.warnThreshold = check.TransformMultipleKeywords([]string{convertDriveUsagePctMetric1}, driveUsagePctMetric, check.warnThreshold)
322-
check.critThreshold = check.TransformMultipleKeywords([]string{convertDriveUsagePctMetric1}, driveUsagePctMetric, check.critThreshold)
323-
324335
if check.HasThreshold(driveUsagePctMetric) || check.HasThreshold("used") || check.HasThreshold("used_pct") || check.HasThreshold("used_bytes") {
325336
check.warnThreshold = check.TransformMultipleKeywords([]string{"used_pct", "used_bytes"}, "used", check.warnThreshold)
326337
check.critThreshold = check.TransformMultipleKeywords([]string{"used_pct", "used_bytes"}, "used", check.critThreshold)
327-
check.AddBytePercentMetrics("used", drive+" used", magic*float64(usage.Used), magic*float64(total))
338+
perfLabel := fmt.Sprintf("%s used", (*drive)["drive"])
339+
check.AddBytePercentMetrics("used", perfLabel, magic*float64(usage.Used), magic*float64(total))
340+
for _, m := range check.result.Metrics {
341+
if strings.HasPrefix(m.Name, perfLabel) {
342+
m.Entry = drive
343+
}
344+
}
328345
}
329346
if check.HasThreshold("inodes") || check.HasThreshold("inodes_used") || check.HasThreshold("inodes_used_pct") {
330347
check.warnThreshold = check.TransformMultipleKeywords([]string{"inodes_used_pct", "inodes_used"}, "inodes", check.warnThreshold)
331348
check.critThreshold = check.TransformMultipleKeywords([]string{"inodes_used_pct", "inodes_used"}, "inodes", check.critThreshold)
332-
check.AddPercentMetrics("inodes", drive+" inodes", float64(usage.InodesUsed), float64(usage.InodesTotal))
349+
perfLabel := fmt.Sprintf("%s inodes", (*drive)["drive"])
350+
check.AddPercentMetrics("inodes", perfLabel, float64(usage.InodesUsed), float64(usage.InodesTotal))
333351
}
334352
if check.HasThreshold("inodes_free") || check.HasThreshold("inodes_free_pct") {
335353
check.warnThreshold = check.TransformMultipleKeywords([]string{"inodes_free_pct"}, "inodes_free", check.warnThreshold)
336354
check.critThreshold = check.TransformMultipleKeywords([]string{"inodes_free_pct"}, "inodes_free", check.critThreshold)
337-
check.AddPercentMetrics("inodes_free", drive+" inodes free", float64(usage.InodesFree), float64(usage.InodesTotal))
355+
perfLabel := fmt.Sprintf("%s inodes free", (*drive)["drive"])
356+
check.AddPercentMetrics("inodes_free", perfLabel, float64(usage.InodesFree), float64(usage.InodesTotal))
338357
}
339358
}
340359

@@ -452,7 +471,9 @@ func (l *CheckDrivesize) addDriveSizeDetails(check *CheckData, drive map[string]
452471
return
453472
}
454473

455-
l.addMetrics(drive["drive"], check, usage, magic)
474+
l.handleDriveUsagePctThresholds(drive["drive"], check, &drive)
475+
476+
l.addMetrics(&drive, check, usage, magic)
456477
}
457478

458479
func (l *CheckDrivesize) getFlagNames(drive map[string]string) []string {

pkg/snclient/checkdata.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,9 @@ func (cd *CheckData) Finalize() (*CheckResult, error) {
158158
log.Debugf("condition critical: %s", cd.critThreshold.String())
159159
log.Debugf("condition ok: %s", cd.okThreshold.String())
160160
// Run thresholds once on cd.details. This is done separately than metrics or entries
161+
// details are of type map[string]string, like entries in cd.listData, but there is only one per check
161162
// This can possibly set a value to cd.details[_state]
163+
log.Tracef("checking warning, critical, and ok thresholds on check details")
162164
cd.Check(cd.details, cd.warnThreshold, cd.critThreshold, cd.okThreshold)
163165
log.Tracef("details:")
164166
logTraceASCIIMap(cd.details)
@@ -210,6 +212,7 @@ func (cd *CheckData) finalizeOutput() (*CheckResult, error) {
210212

211213
// each entry in the list data is individually checked
212214
// This may set "_state" of each entry
215+
log.Tracef("checking warning, critical, and ok thresholds on a check entry")
213216
cd.Check(entry, cd.warnThreshold, cd.critThreshold, cd.okThreshold)
214217
}
215218

@@ -230,11 +233,12 @@ func (cd *CheckData) finalizeOutput() (*CheckResult, error) {
230233
}
231234

232235
cd.result.ApplyPerfSyntax(cd.perfSyntax, cd.timezone)
233-
234236
// Run a separate check on the macros
237+
log.Tracef("checking warning, critical, and ok thresholds on check macros")
235238
cd.Check(finalMacros, cd.warnThreshold, cd.critThreshold, cd.okThreshold)
236239
cd.setStateFromMaps(finalMacros)
237240
// Metrics are checked last, which also sets the final state
241+
log.Tracef("checking warning, critical, and ok thresholds on check metrics")
238242
cd.CheckMetrics(cd.warnThreshold, cd.critThreshold, cd.okThreshold)
239243

240244
switch {
@@ -458,21 +462,21 @@ func (cd *CheckData) Check(data map[string]string, warnCond, critCond, okCond Co
458462

459463
for i := range warnCond {
460464
if res, ok := warnCond[i].Match(data); res && ok {
461-
log.Debugf("This data '%s' matched the WARNING Condition", warnCond[i].original)
465+
log.Debugf("The given data matched the WARNING condition: '%s' ", warnCond[i].String())
462466
data["_state"] = fmt.Sprintf("%d", CheckExitWarning)
463467
}
464468
}
465469

466470
for i := range critCond {
467471
if res, ok := critCond[i].Match(data); res && ok {
468-
log.Debugf("This data '%s' matched the CRITICAL Condition", critCond[i].original)
472+
log.Debugf("This given data matched the CRITICAL condition: '%s' ", critCond[i].String())
469473
data["_state"] = fmt.Sprintf("%d", CheckExitCritical)
470474
}
471475
}
472476

473477
for i := range okCond {
474478
if res, ok := okCond[i].Match(data); res && ok {
475-
log.Debugf("This data '%s' matched the OK Condition", okCond[i].original)
479+
log.Debugf("This given data matched the OK condition: '%s' ", okCond[i].String())
476480
data["_state"] = fmt.Sprintf("%d", CheckExitOK)
477481
}
478482
}
@@ -1157,6 +1161,22 @@ func (cd *CheckData) hasThresholdCond(condList ConditionList, name string) bool
11571161
return false
11581162
}
11591163

1164+
func (cd *CheckData) filterThresholdConditionsUsingKeywords(condList ConditionList, keywords []string) []*Condition {
1165+
ret := []*Condition{}
1166+
for _, cond := range condList {
1167+
if len(cond.group) > 0 {
1168+
groupRet := cd.filterThresholdConditionsUsingKeywords(cond.group, keywords)
1169+
ret = append(ret, groupRet...)
1170+
}
1171+
1172+
if slices.Contains(keywords, cond.keyword) {
1173+
ret = append(ret, cond)
1174+
}
1175+
}
1176+
1177+
return ret
1178+
}
1179+
11601180
// hasThresholdCond returns true is the given list of conditions uses the given name at least once.
11611181
func (cd *CheckData) getAllThresholdKeywords(condList ConditionList) []string {
11621182
keywords := []string{}

pkg/snclient/checkmetric.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package snclient
33
import (
44
"bytes"
55
"fmt"
6+
"slices"
67
"strconv"
78
"strings"
89

@@ -23,7 +24,8 @@ type CheckMetric struct {
2324
CriticalStr *string // set critical from string
2425
Min *float64
2526
Max *float64
26-
PerfConfig *PerfConfig // apply perf tweaks
27+
PerfConfig *PerfConfig // apply perf tweaks
28+
Entry *map[string]string // entry that this metric is generated from
2729
}
2830

2931
func (m *CheckMetric) String() string {
@@ -156,16 +158,34 @@ func (m *CheckMetric) tweakedNum(rawNum any) (num, unit string) {
156158
return convert.Num2String(rawNum), m.Unit
157159
}
158160

161+
// Generate a string to be used in perfdata about this threshold
159162
func (m *CheckMetric) ThresholdString(conditions ConditionList) string {
160163
conv := func(rawNum any) string {
161164
num, _ := m.tweakedNum(rawNum)
162165

163166
return num
164167
}
165168

169+
conditionsToUseWhenBuildingPerfString := ConditionList{}
170+
171+
for _, cond := range conditions {
172+
if m.Entry == nil {
173+
conditionsToUseWhenBuildingPerfString = append(conditionsToUseWhenBuildingPerfString, cond)
174+
}
175+
176+
if slices.Contains(cond.skipEntries, m.Entry) {
177+
log.Tracef("condition: %q , skipping to add to list before generating threshold perf string", cond)
178+
continue
179+
}
180+
181+
conditionsToUseWhenBuildingPerfString = append(conditionsToUseWhenBuildingPerfString, cond)
182+
}
183+
184+
namesToUseWhenBuildingPerfString := []string{m.Name}
185+
166186
if m.ThresholdName != "" {
167-
return ThresholdString([]string{m.Name, m.ThresholdName}, conditions, conv)
187+
namesToUseWhenBuildingPerfString = append(namesToUseWhenBuildingPerfString, m.ThresholdName)
168188
}
169189

170-
return ThresholdString([]string{m.Name}, conditions, conv)
190+
return ThresholdString(namesToUseWhenBuildingPerfString, conditionsToUseWhenBuildingPerfString, conv)
171191
}

pkg/snclient/condition.go

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"math"
7+
"reflect"
78
"regexp"
89
"slices"
910
"strconv"
@@ -42,6 +43,9 @@ type Condition struct {
4243

4344
// back reference to check attributes (used to expand by unit)
4445
attr *[]CheckAttribute
46+
47+
// back reference to check entries to skip
48+
skipEntries [](*map[string]string)
4549
}
4650

4751
// Operator defines a filter operator.
@@ -228,15 +232,6 @@ func NewCondition(input string, attr *[]CheckAttribute) (*Condition, error) {
228232
}
229233

230234
func (c *Condition) String() string {
231-
if c.original != "" {
232-
// keyword might have been changed by a transform function, print it out separately if that is the case
233-
if strings.Contains(c.original, c.keyword) {
234-
return c.original
235-
}
236-
237-
return fmt.Sprintf("(original: %s | keyword: %s)", c.original, c.keyword)
238-
}
239-
240235
if len(c.group) > 0 {
241236
groups := []string{}
242237
for _, g := range c.group {
@@ -246,7 +241,7 @@ func (c *Condition) String() string {
246241
return "(" + strings.Join(groups, " "+c.groupOperator.String()+" ") + ")"
247242
}
248243

249-
return fmt.Sprintf("%s %s %v%s", c.keyword, c.operator.String(), c.value, c.unit)
244+
return fmt.Sprintf("Condition{kw: %q , op: %s , val: %v , un: %s , org: %s}", c.keyword, c.operator.String(), c.value, c.unit, c.original)
250245
}
251246

252247
// Match checks if given map matches current condition
@@ -255,6 +250,16 @@ func (c *Condition) Match(data map[string]string) (res, ok bool) {
255250
if c.isNone {
256251
return false, true
257252
}
253+
254+
for _, skipEntry := range c.skipEntries {
255+
// need to use reflect Pointer() to compare
256+
// 'data' argument is passed by value
257+
if reflect.ValueOf(data).Pointer() == reflect.ValueOf(*skipEntry).Pointer() {
258+
log.Tracef("Condition: %q , skipping entry due to it being in skip list", c.String())
259+
return false, false
260+
}
261+
}
262+
258263
if len(c.group) > 0 {
259264
finalOK := true
260265
for i := range c.group {
@@ -510,6 +515,7 @@ func (c *Condition) Clone() *Condition {
510515
group: make(ConditionList, 0),
511516
attr: c.attr,
512517
original: c.original,
518+
skipEntries: slices.Clone(c.skipEntries),
513519
}
514520

515521
for i := range c.group {
@@ -1039,27 +1045,28 @@ func conditionFixTokenOperator(token []string) []string {
10391045
}
10401046

10411047
// ThresholdString returns string used in warn/crit threshold performance data.
1042-
func ThresholdString(name []string, conditions ConditionList, numberFormat func(any) string) string {
1048+
// The name should be contained within the condition
1049+
func ThresholdString(names []string, conditions ConditionList, numberFormat func(any) string) string {
10431050
// fetch warning conditions for name of metric
10441051
filtered := make(ConditionList, 0)
10451052
var group GroupOperator
10461053
for num := range conditions {
10471054
cond := conditions[num]
1048-
if slices.Contains(name, cond.keyword) {
1055+
if slices.Contains(names, cond.keyword) {
10491056
filtered = append(filtered, cond)
10501057
}
10511058
if cond.groupOperator == GroupOr {
10521059
group = cond.groupOperator
10531060
for i := range cond.group {
1054-
if slices.Contains(name, cond.group[i].keyword) {
1061+
if slices.Contains(names, cond.group[i].keyword) {
10551062
filtered = append(filtered, cond.group[i])
10561063
}
10571064
}
10581065
}
10591066
if cond.groupOperator == GroupAnd {
10601067
group = cond.groupOperator
10611068
for i := range cond.group {
1062-
if slices.Contains(name, cond.group[i].keyword) {
1069+
if slices.Contains(names, cond.group[i].keyword) {
10631070
filtered = append(filtered, cond.group[i])
10641071
}
10651072
}
@@ -1105,7 +1112,7 @@ func ThresholdString(name []string, conditions ConditionList, numberFormat func(
11051112
return fmt.Sprintf("%s:%s", numberFormat(low), numberFormat(high))
11061113
}
11071114

1108-
// implicite And
1115+
// implicit And
11091116
return fmt.Sprintf("@%s:%s", numberFormat(low), numberFormat(high))
11101117
}
11111118

@@ -1170,3 +1177,19 @@ func replaceStrOp(input string) string {
11701177

11711178
return strings.Join(output, "")
11721179
}
1180+
1181+
// A condition list can contain some conditions that use a specialized keyword, and generallized keywords.
1182+
// This function only does modifications if there are conditions using the specialized keyword
1183+
// For all others that do not use the specizalied keyword, check if they are using a generallized keyword.
1184+
// After these two rounds of filtering conditions, disable a entry from this condition.
1185+
func (cl *ConditionList) disableGenerallizedConditionsForEntry(cd *CheckData, entry *map[string]string, specializedKeywords, generallizedKeywords []string) {
1186+
conditionsWithSpecializedKeyword := cd.filterThresholdConditionsUsingKeywords(*cl, specializedKeywords)
1187+
if len(conditionsWithSpecializedKeyword) > 0 {
1188+
conditionsWithoutSpecializedKeyword := utils.SubtractSlice(*cl, conditionsWithSpecializedKeyword)
1189+
conditionsWithoutSpecializedKeywordAndGenerallizedKeyword := cd.filterThresholdConditionsUsingKeywords(conditionsWithoutSpecializedKeyword, generallizedKeywords)
1190+
for _, cond := range conditionsWithoutSpecializedKeywordAndGenerallizedKeyword {
1191+
cond.skipEntries = append(cond.skipEntries, entry)
1192+
log.Tracef("Condition: %q is marked to skip an entry", cond.String())
1193+
}
1194+
}
1195+
}

pkg/utils/utils.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,3 +697,21 @@ func ReplaceNumbersWithZeroPadded(s string, padding int) string {
697697
return fmt.Sprintf(format, num)
698698
})
699699
}
700+
701+
// generic function to sutract all elements of op1 from op2
702+
// this does not modify op1 or op2
703+
func SubtractSlice[T comparable](op1 []T, op2 []T) (ret []T) {
704+
toRemove := make(map[T]struct{}, len(op2))
705+
706+
for _, elem := range op2 {
707+
toRemove[elem] = struct{}{}
708+
}
709+
710+
op1Copy := make([]T, len(op1))
711+
copy(op1Copy, op1)
712+
713+
return slices.DeleteFunc(op1Copy, func(elem T) bool {
714+
_, exists := toRemove[elem]
715+
return exists
716+
})
717+
}

0 commit comments

Comments
 (0)