@@ -31,102 +31,58 @@ public class FilterConfig {
3131 private boolean filterTinyText = true ;
3232 private boolean filterHiddenOCG = true ;
3333 private boolean filterSensitiveData = false ;
34- private final List <SanitizationRule > filterRules = new ArrayList <>() ;
34+ private final List <SanitizationRule > filterRules ;
3535
3636 /** Default rules */
37- private static final List <SanitizationRule > DEFAULT_RULES = new ArrayList <>();
38- static {
39- DEFAULT_RULES .add (new SanitizationRule (
37+ private void initializeDefaultRules () {
38+ filterRules .add (new SanitizationRule (
4039 Pattern .compile ("[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\ .[A-Za-z]{2,}" ),
4140 "email@example.com"
4241 ));
43- DEFAULT_RULES .add (new SanitizationRule (
42+ filterRules .add (new SanitizationRule (
4443 Pattern .compile ("[+]\\ d+(?:-\\ d+)+" ),
4544 "+00-0000-0000"
4645 ));
47- DEFAULT_RULES .add (new SanitizationRule (
46+ filterRules .add (new SanitizationRule (
4847 Pattern .compile ("[A-Z]{1,2}\\ d{6,9}" ),
4948 "AA0000000"
5049 ));
51- DEFAULT_RULES .add (new SanitizationRule (
50+ filterRules .add (new SanitizationRule (
5251 Pattern .compile ("\\ b\\ d{4}-?\\ d{4}-?\\ d{4}-?\\ d{4}\\ b" ),
5352 "0000-0000-0000-0000"
5453 ));
55- DEFAULT_RULES .add (new SanitizationRule (
54+ filterRules .add (new SanitizationRule (
5655 Pattern .compile ("\\ b\\ d{10,18}\\ b" ),
5756 "0000000000000000"
5857 ));
59- DEFAULT_RULES .add (new SanitizationRule (
58+ filterRules .add (new SanitizationRule (
6059 Pattern .compile ("\\ b(?:\\ d{1,3}\\ .){3}\\ d{1,3}\\ b" ),
6160 "0.0.0.0"
6261 ));
63- DEFAULT_RULES .add (new SanitizationRule (
62+ filterRules .add (new SanitizationRule (
6463 Pattern .compile ("\\ b([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}\\ b" ),
6564 "0.0.0.0::1"
6665 ));
67- DEFAULT_RULES .add (new SanitizationRule (
66+ filterRules .add (new SanitizationRule (
6867 Pattern .compile ("\\ b(?:[0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}\\ b" ),
6968 "00:00:00:00:00:00"
7069 ));
71- DEFAULT_RULES .add (new SanitizationRule (
70+ filterRules .add (new SanitizationRule (
7271 Pattern .compile ("\\ b\\ d{15}\\ b" ),
7372 "000000000000000"
7473 ));
75- DEFAULT_RULES .add (new SanitizationRule (
74+ filterRules .add (new SanitizationRule (
7675 Pattern .compile ("https?://[A-Za-z0-9.-]+(:\\ d+)?(/\\ S*)?" ),
7776 "https://example.com"
7877 ));
79- //TODO Confirm info about regex for Korean phone, card, resident numbers and etc.
80- // Korean Resident Registration Number
81- DEFAULT_RULES .add (new SanitizationRule (
82- Pattern .compile ("\\ b\\ d{6}-\\ d{7}\\ b" ),
83- "000000-0000000"
84- ));
85- // Korean phone numbers
86- DEFAULT_RULES .add (new SanitizationRule (
87- Pattern .compile ("\\ b0\\ d{1,2}-\\ d{3,4}-\\ d{4}\\ b" ),
88- "010-0000-0000"
89- ));
90- // Korean business registration number
91- DEFAULT_RULES .add (new SanitizationRule (
92- Pattern .compile ("\\ b\\ d{3}-\\ d{2}-\\ d{5}\\ b" ),
93- "000-00-00000"
94- ));
95- // Korean bank account numbers
96- // DEFAULT_RULES.add(new SanitizationRule(
97- // Pattern.compile("\\b\\d{2,4}-\\d{2,3}-\\d{4,6}\\b"),
98- // "000-000-000000"
99- // ));
100- //TODO Confirm info about regex for AWS (maybe create 2 separate rules for AKIA|ASIA)
101- // AWS Access Key
102- DEFAULT_RULES .add (new SanitizationRule (
103- Pattern .compile ("\\ b(AKIA|ASIA)[0-9A-Z]{12,124}\\ b" ),
104- "AKIA0000000000000000"
105- ));
106- //TODO Confirm info about regex for GitHub (maybe create separate rules for ghp|ghu|gho|ghs|ghr)
107- // GitHub Personal Access Token
108- DEFAULT_RULES .add (new SanitizationRule (
109- Pattern .compile ("\\ bgh[puors]_[A-Za-z0-9]{10,251}\\ b" ),
110- "ghp_000000000000000000000000000000000000"
111- ));
112- // GitHub Fine-grained Personal Access Token
113- DEFAULT_RULES .add (new SanitizationRule (
114- Pattern .compile ("\\ bgithub_pat_[A-Za-z0-9_]{10,243}\\ b" ),
115- "github_pat_0000000000000000000000_00000000000000000000000000000000000000000000000000000000000"
116- ));
117- // AWS Secret Key (Finds 40-character, base-64 strings that don't have any base 64 characters immediately before or after).
118- // Has to be last rule
119- DEFAULT_RULES .add (new SanitizationRule (
120- Pattern .compile ("(?<![A-Za-z0-9/+])[A-Za-z0-9/+]{40}(?![A-Za-z0-9/+])" ),
121- "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
122- ));
12378 }
12479
12580 /**
12681 * Constructor initializing the configuration of filter.
12782 */
12883 public FilterConfig () {
129- filterRules .addAll (DEFAULT_RULES );
84+ this .filterRules = new ArrayList <>();
85+ initializeDefaultRules ();
13086 }
13187
13288 /**
0 commit comments