Skip to content

Commit 3ae478e

Browse files
authored
New antispam modes (#949)
* Change do_if Check func argument to interface + extract antispam rules from config * add rules to antispam * antispam section in settings * add doc + antispammerMaintenance
1 parent b02f4ed commit 3ae478e

23 files changed

Lines changed: 587 additions & 148 deletions

fd/util.go

Lines changed: 88 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ import (
1616

1717
func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings {
1818
capacity := pipeline.DefaultCapacity
19-
antispamThreshold := pipeline.DefaultAntispamThreshold
20-
var antispamExceptions antispam.Exceptions
2119
sourceNameMetaField := pipeline.DefaultSourceNameMetaField
2220
avgInputEventSize := pipeline.DefaultAvgInputEventSize
2321
maxInputEventSize := pipeline.DefaultMaxInputEventSize
@@ -32,6 +30,11 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings {
3230
metaCacheSize := pipeline.DefaultMetaCacheSize
3331
pool := ""
3432

33+
antispamThreshold := pipeline.DefaultAntispamThreshold
34+
antispamMaintenanceInterval := pipeline.DefaultMaintenanceInterval
35+
var antispamExceptions antispam.Exceptions
36+
var antispamRules antispam.Rules
37+
3538
metricHoldDuration := pipeline.DefaultMetricHoldDuration
3639
metricMaxLabelValueLength := pipeline.DefaultMetricMaxLabelValueLength
3740

@@ -89,20 +92,40 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings {
8992
eventTimeout = i
9093
}
9194

92-
antispamThreshold = settings.Get("antispam_threshold").MustInt()
93-
antispamThreshold *= int(maintenanceInterval / time.Second)
94-
if antispamThreshold < 0 {
95-
logger.Warn("negative antispam_threshold value, antispam disabled")
96-
antispamThreshold = 0
97-
}
98-
9995
var err error
10096
antispamExceptions, err = extractAntispamExceptions(settings)
10197
if err != nil {
10298
logger.Fatalf("extract exceptions: %s", err)
10399
}
104100
antispamExceptions.Prepare()
105101

102+
antispamSettings := settings.Get("antispam")
103+
str = antispamSettings.Get("maintenance_interval").MustString()
104+
if str != "" {
105+
i, err := time.ParseDuration(str)
106+
if err != nil {
107+
logger.Fatalf("can't parse antispam maintenance interval: %s", err.Error())
108+
}
109+
antispamMaintenanceInterval = i
110+
}
111+
112+
antispamThreshold = antispamSettings.Get("threshold").MustInt(pipeline.DefaultAntispamThreshold)
113+
if mp, _ := antispamSettings.Map(); mp == nil {
114+
antispamThreshold = settings.Get("antispam_threshold").MustInt(pipeline.DefaultAntispamThreshold)
115+
}
116+
if antispamThreshold < pipeline.DefaultAntispamThreshold {
117+
logger.Warn("invalid antispam_threshold value, antispam disabled")
118+
antispamThreshold = pipeline.DefaultAntispamThreshold
119+
}
120+
if antispamThreshold != pipeline.DefaultAntispamThreshold {
121+
antispamThreshold *= int(antispamMaintenanceInterval / time.Second)
122+
}
123+
124+
antispamRules, err = extractAntispamRules(antispamSettings, antispamMaintenanceInterval)
125+
if err != nil {
126+
logger.Fatalf("extract antispam rules: %s", err)
127+
}
128+
106129
sourceNameMetaField = settings.Get("source_name_meta_field").MustString()
107130
isStrict = settings.Get("is_strict").MustBool()
108131

@@ -139,14 +162,18 @@ func extractPipelineParams(settings *simplejson.Json) *pipeline.Settings {
139162
MaxEventSize: maxInputEventSize,
140163
CutOffEventByLimit: cutOffEventByLimit,
141164
CutOffEventByLimitField: cutOffEventByLimitField,
142-
AntispamThreshold: antispamThreshold,
143-
AntispamExceptions: antispamExceptions,
144-
SourceNameMetaField: sourceNameMetaField,
145-
MaintenanceInterval: maintenanceInterval,
146-
EventTimeout: eventTimeout,
147-
StreamField: streamField,
148-
IsStrict: isStrict,
149-
Pool: pipeline.PoolType(pool),
165+
Antispam: pipeline.AntispamSettings{
166+
Threshold: antispamThreshold,
167+
Rules: antispamRules,
168+
Exceptions: antispamExceptions,
169+
MaintenanceInterval: antispamMaintenanceInterval,
170+
},
171+
SourceNameMetaField: sourceNameMetaField,
172+
MaintenanceInterval: maintenanceInterval,
173+
EventTimeout: eventTimeout,
174+
StreamField: streamField,
175+
IsStrict: isStrict,
176+
Pool: pipeline.PoolType(pool),
150177
Metric: &pipeline.MetricSettings{
151178
HoldDuration: metricHoldDuration,
152179
MaxLabelValueLength: metricMaxLabelValueLength,
@@ -171,6 +198,50 @@ func extractAntispamExceptions(settings *simplejson.Json) (antispam.Exceptions,
171198
return exceptions, nil
172199
}
173200

201+
func extractAntispamRules(settings *simplejson.Json, antispamMaintenanceInterval time.Duration) (antispam.Rules, error) {
202+
rulesJSON := settings.Get("rules")
203+
rulesRaw := rulesJSON.MustArray()
204+
if len(rulesRaw) == 0 {
205+
return nil, nil
206+
}
207+
208+
rules := make(antispam.Rules, 0, len(rulesRaw))
209+
for i := range rulesRaw {
210+
ruleJSON := rulesJSON.GetIndex(i)
211+
212+
name := ruleJSON.Get("name").MustString()
213+
if name == "" {
214+
return nil, fmt.Errorf("name must be set")
215+
}
216+
217+
threshold := ruleJSON.Get("threshold").MustInt()
218+
if threshold < pipeline.DefaultAntispamThreshold {
219+
logger.Warnf("invalid threshold value, antispam disabled for rule #%d", i)
220+
threshold = pipeline.DefaultAntispamThreshold
221+
}
222+
if threshold != pipeline.DefaultAntispamThreshold {
223+
threshold *= int(antispamMaintenanceInterval / time.Second)
224+
}
225+
226+
doIfChecker, err := extractDoIfChecker(ruleJSON.Get("do_if"))
227+
if err != nil {
228+
return nil, err
229+
}
230+
231+
if doIfChecker == nil {
232+
return nil, fmt.Errorf("missing do_if section, rule #%d", i)
233+
}
234+
235+
rules = append(rules, antispam.Rule{
236+
Name: name,
237+
Threshold: threshold,
238+
DoIfChecker: doIfChecker,
239+
})
240+
}
241+
242+
return rules, nil
243+
}
244+
174245
func extractMatchMode(actionJSON *simplejson.Json) pipeline.MatchMode {
175246
mm := actionJSON.Get("match_mode").MustString()
176247
return pipeline.MatchModeFromString(mm)

pipeline/README.idoc.md

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Which field in the log indicates `stream`. Mostly used for distinguishing `stdou
5454

5555
**`maintenance_interval`** *`string`* *`default=5s`*
5656

57-
How often to perform maintenance. Maintenance includes antispammer maintenance and metric cleanup, metric holder maintenance, increasing basic pipeline metrics with accumulated deltas, logging pipeline stats. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
57+
How often to perform maintenance. Maintenance includes metric cleanup, metric holder maintenance, increasing basic pipeline metrics with accumulated deltas, logging pipeline stats. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
5858

5959
<br>
6060

@@ -64,16 +64,20 @@ How long the event can process in action plugins and block stream in streamer un
6464

6565
<br>
6666

67-
**`antispam_threshold`** *`int`* *`default=0`*
67+
**`antispam_threshold`** *`int`* *`default=-1`*
6868

69-
Threshold value for the [antispammer](/pipeline/antispam/README.md#antispammer) to ban sources. If set to 0 antispammer is disabled. If set to the value greater than 0 antispammer is enabled and bans sources which write `antispam_threshold` or more logs in `maintenance_interval` time.
69+
Threshold value for the [antispammer](/pipeline/antispam/README.md#antispammer) to ban sources. If set to -1 antispammer is disabled. If set to the value greater than -1 antispammer is enabled and bans sources which write `antispam_threshold` or more logs in `maintenance_interval` time.
70+
71+
> ⚠ DEPRECATED. Use `threshold` in `antispam` instead.
7072
7173
<br>
7274

7375
**`antispam_exceptions`** *`[]`[antispam.Exception](/pipeline/antispam/README.md#exception-parameters)*
7476

7577
The list of antispammer exceptions. If the log matches at least one of the exceptions it is not accounted in antispammer.
7678

79+
> ⚠ DEPRECATED. Use `rules` in `antispam` instead.
80+
7781
<br>
7882

7983
**`meta_cache_size`** *`int`* *`default=1024`*
@@ -146,6 +150,8 @@ pipelines:
146150
max_label_value_length: 100
147151
```
148152

153+
<br>
154+
149155
**`hold_duration`** *`string`* *`default=30m`*
150156

151157
The amount of time the metric can be idle until it is deleted. Used for deleting rarely updated metrics to save metrics storage resources. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
@@ -158,6 +164,59 @@ Maximum length of custom metric labels in action plugins. If zero, no limit is s
158164

159165
<br>
160166

167+
## Antispam
168+
169+
Section for antispam in settings. Example:
170+
171+
```yaml
172+
pipelines:
173+
test:
174+
settings:
175+
antispam:
176+
threshold: 3000
177+
maintenance_interval: 5s
178+
rules:
179+
- name: ban_all
180+
threshold: 0
181+
do_if:
182+
op: equal
183+
field: source_name
184+
values: ["test.log"]
185+
- name: custom_threshold
186+
threshold: 5000
187+
do_if:
188+
op: and
189+
operands:
190+
- op: contains
191+
data: meta.service
192+
values:
193+
- test_service
194+
- op: prefix
195+
data: event
196+
values:
197+
- '{"level":"debug"'
198+
```
199+
200+
<br>
201+
202+
**`maintenance_interval`** *`string`* *`default=5s`*
203+
204+
How often to perform antispammer maintenance. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
205+
206+
<br>
207+
208+
**`threshold`** *`int`* *`default=-1`*
209+
210+
Threshold value for the [antispammer](/pipeline/antispam/README.md#antispammer) to ban sources. If set to -1 antispammer is disabled. If set to the value greater than -1 antispammer is enabled and bans sources which write `threshold` or more logs in `maintenance_interval` time. If the `antispam` section is not specified, then `antispam_threshold` is used.
211+
212+
<br>
213+
214+
**`rules`** *`[]`[antispam.Rule](/pipeline/antispam/README.md#rules)*
215+
216+
The list of antispammer rules. If the log matches at least one of the exceptions it is not accounted in antispammer.
217+
218+
<br>
219+
161220
## Datetime parse formats
162221

163222
Most of the plugins which work with parsing datetime call `pipeline.ParseTime` function. It accepts datetime layouts the same way as Go [time.Parse](https://pkg.go.dev/time#Parse) (in format of datetime like `2006-01-02T15:04:05.999999999Z07:00`) except unix timestamp formats, they can only be specified via aliases.

pipeline/README.md

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Which field in the log indicates `stream`. Mostly used for distinguishing `stdou
5454

5555
**`maintenance_interval`** *`string`* *`default=5s`*
5656

57-
How often to perform maintenance. Maintenance includes antispammer maintenance and metric cleanup, metric holder maintenance, increasing basic pipeline metrics with accumulated deltas, logging pipeline stats. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
57+
How often to perform maintenance. Maintenance includes metric cleanup, metric holder maintenance, increasing basic pipeline metrics with accumulated deltas, logging pipeline stats. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
5858

5959
<br>
6060

@@ -64,16 +64,20 @@ How long the event can process in action plugins and block stream in streamer un
6464

6565
<br>
6666

67-
**`antispam_threshold`** *`int`* *`default=0`*
67+
**`antispam_threshold`** *`int`* *`default=-1`*
6868

69-
Threshold value for the [antispammer](/pipeline/antispam/README.md#antispammer) to ban sources. If set to 0 antispammer is disabled. If set to the value greater than 0 antispammer is enabled and bans sources which write `antispam_threshold` or more logs in `maintenance_interval` time.
69+
Threshold value for the [antispammer](/pipeline/antispam/README.md#antispammer) to ban sources. If set to -1 antispammer is disabled. If set to the value greater than -1 antispammer is enabled and bans sources which write `antispam_threshold` or more logs in `maintenance_interval` time.
70+
71+
> ⚠ DEPRECATED. Use `threshold` in `antispam` instead.
7072
7173
<br>
7274

7375
**`antispam_exceptions`** *`[]`[antispam.Exception](/pipeline/antispam/README.md#exception-parameters)*
7476

7577
The list of antispammer exceptions. If the log matches at least one of the exceptions it is not accounted in antispammer.
7678

79+
> ⚠ DEPRECATED. Use `rules` in `antispam` instead.
80+
7781
<br>
7882

7983
**`meta_cache_size`** *`int`* *`default=1024`*
@@ -146,6 +150,8 @@ pipelines:
146150
max_label_value_length: 100
147151
```
148152

153+
<br>
154+
149155
**`hold_duration`** *`string`* *`default=30m`*
150156

151157
The amount of time the metric can be idle until it is deleted. Used for deleting rarely updated metrics to save metrics storage resources. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
@@ -158,6 +164,59 @@ Maximum length of custom metric labels in action plugins. If zero, no limit is s
158164

159165
<br>
160166

167+
## Antispam
168+
169+
Section for antispam in settings. Example:
170+
171+
```yaml
172+
pipelines:
173+
test:
174+
settings:
175+
antispam:
176+
threshold: 3000
177+
maintenance_interval: 5s
178+
rules:
179+
- name: ban_all
180+
threshold: 0
181+
do_if:
182+
op: equal
183+
field: source_name
184+
values: ["test.log"]
185+
- name: custom_threshold
186+
threshold: 5000
187+
do_if:
188+
op: and
189+
operands:
190+
- op: contains
191+
data: meta.service
192+
values:
193+
- test_service
194+
- op: prefix
195+
data: event
196+
values:
197+
- '{"level":"debug"'
198+
```
199+
200+
<br>
201+
202+
**`maintenance_interval`** *`string`* *`default=5s`*
203+
204+
How often to perform antispammer maintenance. The value must be passed in format of duration (`<number>(ms|s|m|h)`).
205+
206+
<br>
207+
208+
**`threshold`** *`int`* *`default=-1`*
209+
210+
Threshold value for the [antispammer](/pipeline/antispam/README.md#antispammer) to ban sources. If set to -1 antispammer is disabled. If set to the value greater than -1 antispammer is enabled and bans sources which write `threshold` or more logs in `maintenance_interval` time. If the `antispam` section is not specified, then `antispam_threshold` is used.
211+
212+
<br>
213+
214+
**`rules`** *`[]`[antispam.Rule](/pipeline/antispam/README.md#rules)*
215+
216+
The list of antispammer rules. If the log matches at least one of the exceptions it is not accounted in antispammer.
217+
218+
<br>
219+
161220
## Datetime parse formats
162221

163222
Most of the plugins which work with parsing datetime call `pipeline.ParseTime` function. It accepts datetime layouts the same way as Go [time.Parse](https://pkg.go.dev/time#Parse) (in format of datetime like `2006-01-02T15:04:05.999999999Z07:00`) except unix timestamp formats, they can only be specified via aliases.

0 commit comments

Comments
 (0)