Skip to content

Commit b51a25f

Browse files
committed
[AWSINTS-3462] feat(go-forwarder): add scrubbing
1 parent 5a38306 commit b51a25f

2 files changed

Lines changed: 190 additions & 0 deletions

File tree

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0.
3+
// This product includes software developed at Datadog (https://www.datadoghq.com/).
4+
// Copyright 2026-Present Datadog, Inc.
5+
6+
package transform
7+
8+
import (
9+
"log/slog"
10+
"regexp"
11+
12+
"github.com/DataDog/datadog-serverless-functions/aws/logs_monitoring_go/internal/config"
13+
)
14+
15+
const (
16+
ipPattern = `\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`
17+
emailPattern = `[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+`
18+
19+
ipReplacement = "xxx.xxx.xxx.xxx"
20+
emailReplacement = "xxxxx@xxxxx.com"
21+
)
22+
23+
type scrubbingRule struct {
24+
regex *regexp.Regexp
25+
replacement string
26+
}
27+
28+
type Scrubber struct {
29+
rules []scrubbingRule
30+
}
31+
32+
func NewScrubber(cfg config.ScrubbingConfig) Scrubber {
33+
var rules []scrubbingRule
34+
35+
if cfg.ScrubIP {
36+
rules = append(rules, scrubbingRule{
37+
regex: regexp.MustCompile(ipPattern),
38+
replacement: ipReplacement,
39+
})
40+
}
41+
42+
if cfg.ScrubEmail {
43+
rules = append(rules, scrubbingRule{
44+
regex: regexp.MustCompile(emailPattern),
45+
replacement: emailReplacement,
46+
})
47+
}
48+
49+
if cfg.CustomRule != "" {
50+
re, err := regexp.Compile(cfg.CustomRule)
51+
if err != nil {
52+
slog.Error("invalid custom scrubbing rule, make sure your regex is RE2 compatible",
53+
slog.String("pattern", cfg.CustomRule),
54+
slog.Any("error", err),
55+
)
56+
} else {
57+
rules = append(rules, scrubbingRule{
58+
regex: re,
59+
replacement: cfg.CustomReplacement,
60+
})
61+
}
62+
}
63+
64+
return Scrubber{rules: rules}
65+
}
66+
67+
func (s Scrubber) ScrubMessage(msg string) string {
68+
for _, rule := range s.rules {
69+
msg = rule.regex.ReplaceAllString(msg, rule.replacement)
70+
}
71+
return msg
72+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0.
3+
// This product includes software developed at Datadog (https://www.datadoghq.com/).
4+
// Copyright 2026-Present Datadog, Inc.
5+
6+
package transform
7+
8+
import (
9+
"testing"
10+
11+
"github.com/DataDog/datadog-serverless-functions/aws/logs_monitoring_go/internal/config"
12+
)
13+
14+
func TestNewScrubber(t *testing.T) {
15+
tests := map[string]struct {
16+
cfg config.ScrubbingConfig
17+
nRules int
18+
}{
19+
"no_rules": {
20+
cfg: config.ScrubbingConfig{},
21+
nRules: 0,
22+
},
23+
"ip_only": {
24+
cfg: config.ScrubbingConfig{ScrubIP: true},
25+
nRules: 1,
26+
},
27+
"email_only": {
28+
cfg: config.ScrubbingConfig{ScrubEmail: true},
29+
nRules: 1,
30+
},
31+
"ip_and_email": {
32+
cfg: config.ScrubbingConfig{ScrubIP: true, ScrubEmail: true},
33+
nRules: 2,
34+
},
35+
"custom_rule": {
36+
cfg: config.ScrubbingConfig{CustomRule: `\d+`, CustomReplacement: "NUM"},
37+
nRules: 1,
38+
},
39+
"all_rules": {
40+
cfg: config.ScrubbingConfig{ScrubIP: true, ScrubEmail: true, CustomRule: `secret`, CustomReplacement: "[REDACTED]"},
41+
nRules: 3,
42+
},
43+
"invalid_custom_regex": {
44+
cfg: config.ScrubbingConfig{CustomRule: `([invalid`},
45+
nRules: 0,
46+
},
47+
}
48+
49+
for name, tc := range tests {
50+
t.Run(name, func(t *testing.T) {
51+
t.Parallel()
52+
s := NewScrubber(tc.cfg)
53+
if got := len(s.rules); got != tc.nRules {
54+
t.Errorf("got %d rules, want %d", got, tc.nRules)
55+
}
56+
})
57+
}
58+
}
59+
60+
func TestScrubMessage(t *testing.T) {
61+
tests := map[string]struct {
62+
cfg config.ScrubbingConfig
63+
input string
64+
want string
65+
}{
66+
"ip_redaction": {
67+
cfg: config.ScrubbingConfig{ScrubIP: true},
68+
input: "connected from 192.168.1.1 to 10.0.0.1",
69+
want: "connected from xxx.xxx.xxx.xxx to xxx.xxx.xxx.xxx",
70+
},
71+
"email_redaction": {
72+
cfg: config.ScrubbingConfig{ScrubEmail: true},
73+
input: "user john.doe@example.com logged in",
74+
want: "user xxxxx@xxxxx.com logged in",
75+
},
76+
"custom_pattern": {
77+
cfg: config.ScrubbingConfig{CustomRule: `secret-\w+`, CustomReplacement: "[REDACTED]"},
78+
input: "token=secret-abc123 visible",
79+
want: "token=[REDACTED] visible",
80+
},
81+
"custom_empty_replacement": {
82+
cfg: config.ScrubbingConfig{CustomRule: `remove-this `},
83+
input: "remove-this here",
84+
want: "here",
85+
},
86+
"ip_and_email_sequential": {
87+
cfg: config.ScrubbingConfig{ScrubIP: true, ScrubEmail: true},
88+
input: "192.168.1.1 user@host.com",
89+
want: "xxx.xxx.xxx.xxx xxxxx@xxxxx.com",
90+
},
91+
"no_match": {
92+
cfg: config.ScrubbingConfig{ScrubIP: true, ScrubEmail: true},
93+
input: "clean message with no sensitive data",
94+
want: "clean message with no sensitive data",
95+
},
96+
"multiple_ips": {
97+
cfg: config.ScrubbingConfig{ScrubIP: true},
98+
input: "src=1.2.3.4 dst=5.6.7.8 via=10.0.0.1",
99+
want: "src=xxx.xxx.xxx.xxx dst=xxx.xxx.xxx.xxx via=xxx.xxx.xxx.xxx",
100+
},
101+
"non_ascii_custom": {
102+
cfg: config.ScrubbingConfig{CustomRule: `[^\x01-\x7f]+`, CustomReplacement: "xxxxx"},
103+
input: "abcdef\u65e5\u672c\u8a9eefg\u304b\u304d\u304f\u3051\u3053hij",
104+
want: "abcdefxxxxxefgxxxxxhij",
105+
},
106+
}
107+
108+
for name, tc := range tests {
109+
t.Run(name, func(t *testing.T) {
110+
t.Parallel()
111+
s := NewScrubber(tc.cfg)
112+
got := s.ScrubMessage(tc.input)
113+
if got != tc.want {
114+
t.Errorf("got %q, want %q", got, tc.want)
115+
}
116+
})
117+
}
118+
}

0 commit comments

Comments
 (0)