-
Notifications
You must be signed in to change notification settings - Fork 332
Expand file tree
/
Copy pathRedactUtils.java
More file actions
316 lines (281 loc) · 12.5 KB
/
RedactUtils.java
File metadata and controls
316 lines (281 loc) · 12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
package datadog.crashtracking.parsers;
import de.thetaphi.forbiddenapis.SuppressForbidden;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utilities for redacting potentially sensitive data from JVM crash log register-to-memory mapping
* entries.
*/
public final class RedactUtils {
static final String REDACTED = "redacted";
static final String REDACTED_CLASS = "Redacted";
private static final String REDACTED_STRING = "REDACTED";
private static final String[] KNOWN_PACKAGES_PREFIXES = {
// Java SE / JDK internals
"java/",
"jdk/",
"sun/",
"javax/",
// Jakarta EE (successor to javax)
"jakarta/",
// Oracle/Sun vendor packages
"com/sun/",
"com/oracle/",
// Datadog top-level and internal shorthand
"datadog/",
"com/dd/",
};
// " - string: "value"" in String oop dumps
private static final Pattern STRING_CONTENT = Pattern.compile("(\\s*- string: )\"[^\"]*\"");
// Type descriptors like Lcom/company/Type;
private static final Pattern TYPE_DESCRIPTOR = Pattern.compile("L([A-Za-z$_][A-Za-z0-9$_/]*);");
// klass references: - klass: 'com/company/Class'
private static final Pattern KLASS_REF = Pattern.compile("(klass: ')([^']+)'");
// 'in 'class'' clause in {method} descriptor entries
private static final Pattern METHOD_IN_CLASS = Pattern.compile("( in ')([^']+)'");
// Object-reference field values in oop dumps: a 'com/company/Class'{0x...}
private static final Pattern OBJ_FIELD_REF = Pattern.compile("(a ')([A-Za-z$_][A-Za-z0-9$_/]*)'");
// Class name in nmethod compiled-method output (JDK 11+):
// "Compiled method (c2) ... com.company.Foo::methodName (N bytes)" (PRODUCT — dots)
// "Compiled method (c2) ... com/company/Foo::methodName (N bytes)" (debug — slashes)
private static final Pattern NMETHOD_CLASS =
Pattern.compile("([A-Za-z$_][A-Za-z0-9$_]*(?:[./][A-Za-z$_][A-Za-z0-9$_]*)+)::");
// Library path in two formats produced by os::print_location():
// <offset 0x...> in /path/to/lib.so at 0x... (no dladdr symbol)
// symbol+offset in /path/to/lib.so at 0x... (dladdr resolved a symbol name)
private static final Pattern LIBRARY_PATH =
Pattern.compile("((?:<[^>]+>|\\S+\\+\\S+)\\s+in\\s+)(/\\S+)");
// Dotted class name followed by an OOP reference: "com.company.Type"{0x...}
// This specifically identifies the inline string value of a java.lang.Class 'name' field
private static final Pattern DOTTED_CLASS_OOP_REF =
Pattern.compile(
"\"([A-Za-z$_][A-Za-z0-9$_]*(?:\\.[A-Za-z$_][A-Za-z0-9$_]*)*)\"(\\{0x[0-9a-fA-F]+\\})");
// is an oop: com.company.Class
private static final Pattern IS_AN_OOP =
Pattern.compile("(is an oop: )([A-Za-z$_][A-Za-z0-9$_]*(?:\\.[A-Za-z$_][A-Za-z0-9$_]*)*)");
// Hex-dump bytes in "points into unknown readable memory:" lines.
// Two formats produced by os::print_location():
// "memory: 0x<addr> | ff ff ff ff ..." (Linux/macOS amd64 — address + pipe + bytes)
// "memory: ff ff ff ff ..." (Linux aarch64 — bytes only)
// The address (when present) is kept; only the raw bytes are redacted.
private static final Pattern READABLE_MEMORY_HEX_DUMP =
Pattern.compile(
"(points into unknown readable memory: (?:0x[0-9a-fA-F]+ \\| )?)([0-9a-fA-F]{2}(?: [0-9a-fA-F]{2})*)");
private RedactUtils() {}
/**
* Main entry point: redact sensitive data from a register-to-memory mapping value (possibly
* multiline).
*/
@SuppressForbidden // split on single-character uses a fast path without regex
public static String redactRegisterToMemoryMapping(String value) {
if (value == null || value.isEmpty()) return value;
String[] lines = value.split("\n", -1);
// java.lang.Class oop dumps: String fields hold class names, not arbitrary data.
// All other oop types: String fields are application data and must be fully redacted.
boolean isClassOop = isJavaLangClassOop(lines[0]);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < lines.length; i++) {
if (i > 0) sb.append('\n');
sb.append(redactLine(lines[i], isClassOop));
}
return sb.toString();
}
/**
* Returns true if the first line of a register-to-memory mapping value indicates a {@code
* java.lang.Class} oop (not a subclass or other type).
*/
private static boolean isJavaLangClassOop(String firstLine) {
int idx = firstLine.indexOf("is an oop: java.lang.Class");
if (idx < 0) return false;
// Ensure the class name ends here — not a prefix of e.g. java.lang.ClassLoader
int end = idx + "is an oop: java.lang.Class".length();
return end >= firstLine.length() || firstLine.charAt(end) == ' ';
}
private static String redactLine(String line, boolean isClassOop) {
line = redactStringTypeValue(line);
line = redactTypeDescriptors(line);
line = redactKlassReference(line);
line = redactMethodClass(line);
line = redactObjFieldRef(line);
line = redactNmethodClass(line);
line = redactLibraryPath(line);
line = redactStringOopRef(line, isClassOop);
line = redactOopClassName(line);
line = redactReadableMemoryHexDump(line);
return line;
}
/**
* Redacts {@code "value"\{0x...\}} OOP references in oop dump field lines. When {@code
* isClassOop} is true (inside a {@code java.lang.Class} oop dump) the value is treated as a class
* name and redacted to {@code "redacted.Redacted"} unless it belongs to a known package.
* Otherwise — any other oop type — the value is always fully redacted to {@code "REDACTED"} since
* it may be arbitrary application data.
*/
private static String redactStringOopRef(String line, boolean isClassOop) {
return replaceAll(
DOTTED_CLASS_OOP_REF,
line,
m ->
isClassOop
? "\"" + redactDottedClassName(m.group(1)) + "\"" + m.group(2)
: "\"" + REDACTED_STRING + "\"" + m.group(2));
}
/**
* Redacts string content in String oop dump lines: <code> - string: "Some string"</code> to
* <code> - string: "REDACTED"</code>
*/
static String redactStringTypeValue(String line) {
return STRING_CONTENT.matcher(line).replaceAll("$1\"" + REDACTED_STRING + "\"");
}
/**
* Redacts the package of type descriptors in a line: <code>Lcom/company/Type;</code> to <code>
* Lredacted/Redacted;</code>
*/
static String redactTypeDescriptors(String line) {
return replaceAll(TYPE_DESCRIPTOR, line, m -> "L" + redactJvmClassName(m.group(1)) + ";");
}
/**
* Redacts klass references in a line: <code>klass: 'com/company/Class'</code> to <code>
* klass: 'redacted/Redacted'</code>
*/
static String redactKlassReference(String line) {
return replaceAll(KLASS_REF, line, m -> m.group(1) + redactJvmClassName(m.group(2)) + "'");
}
/**
* Redacts the class in a method descriptor's {@code in 'class'} clause: <code>
* in 'com/company/Class'</code> to <code>in 'redacted/Redacted'</code>
*/
static String redactMethodClass(String line) {
return replaceAll(
METHOD_IN_CLASS, line, m -> m.group(1) + redactJvmClassName(m.group(2)) + "'");
}
/**
* Redacts all but the parent directory and filename from a library path. Handles both <code>
* <offset 0x...> in /path/to/dir/lib.so</code> and <code>symbol+0 in
* /path/to/dir/lib.so</code> to <code>... in /redacted/dir/lib.so</code>
*/
static String redactLibraryPath(String line) {
return replaceAll(LIBRARY_PATH, line, m -> m.group(1) + redactPath(m.group(2)));
}
/**
* Redacts the class name in oop dump object-reference field values: <code>a
* 'com/company/Class'</code> to <code>a 'redacted/Redacted'</code>.
*/
static String redactObjFieldRef(String line) {
return replaceAll(OBJ_FIELD_REF, line, m -> m.group(1) + redactJvmClassName(m.group(2)) + "'");
}
/**
* Redacts the class name in nmethod {@code Compiled method} output (JDK 11+): <code>
* com.company.Foo::methodName</code> to <code>redacted.Redacted::methodName</code>. Handles both
* dot-separated (PRODUCT build) and slash-separated (debug build) class names.
*/
static String redactNmethodClass(String line) {
return replaceAll(
NMETHOD_CLASS,
line,
m -> {
String cls = m.group(1);
String redacted =
cls.indexOf('/') >= 0 ? redactJvmClassName(cls) : redactDottedClassName(cls);
return redacted + "::";
});
}
/**
* Redacts any {@code "value"\{0x...\}} OOP reference to {@code "REDACTED"\{0x...\}}. This is the
* safe default for lines that are not part of a {@code java.lang.Class} oop dump, where the
* String value may be arbitrary application data. For class-name-aware redaction (inside a {@code
* java.lang.Class} oop) use {@link #redactRegisterToMemoryMapping} which detects the oop type
* automatically.
*/
// @VisibleForTesting — no production callers; used directly in unit tests
static String redactDottedClassOopRef(String line) {
return redactStringOopRef(line, false);
}
/**
* Redacts the class name in {@code is an oop: ClassName}: <code>is an oop: com.company.Class
* </code> to <code>is an oop: redacted.Redacted</code>
*/
static String redactOopClassName(String line) {
return replaceAll(IS_AN_OOP, line, m -> m.group(1) + redactDottedClassName(m.group(2)));
}
/**
* Redacts hex-dump bytes in <code>points into unknown readable memory:</code> lines, keeping the
* optional leading address. Handles two formats:
*
* <ul>
* <li><code>memory: 0x<addr> | ff ff ff ff</code> to <code>memory: 0x<addr> |
* REDACTED</code>
* <li><code>memory: ff ff ff ff</code> to <code>memory: REDACTED</code>
* </ul>
*/
static String redactReadableMemoryHexDump(String line) {
return replaceAll(READABLE_MEMORY_HEX_DUMP, line, m -> m.group(1) + REDACTED_STRING);
}
/**
* Redacts a slash-separated JVM class name, unless it belongs to a known package. Unknown classes
* are fully redacted: <code>com/company/SomeType</code> to <code>redacted/Redacted</code>; <code>
* java/lang/String</code> unchanged.
*/
static String redactJvmClassName(String className) {
if (isKnownJvmPackage(className)) {
return className;
}
return redactClassName('/', className);
}
/**
* Redacts a dot-separated class name, unless it belongs to a known package. Unknown classes are
* fully redacted: <code>com.company.SomeType</code> to <code>redacted.Redacted</code>; <code>
* java.lang.String</code> unchanged.
*/
static String redactDottedClassName(String className) {
if (isKnownJvmPackage(className.replace('.', '/'))) {
return className;
}
return redactClassName('.', className);
}
private static String redactClassName(char sep, String className) {
int lastSep = className.lastIndexOf(sep);
if (lastSep < 0) return className; // no package — nothing to redact
return REDACTED + sep + REDACTED_CLASS;
}
/**
* Redacts all but the parent directory and filename from a library path, collapsing all
* intermediate segments to a single {@code redacted}. <code>/path/to/dir/lib.so</code> to <code>
* /redacted/dir/lib.so</code>
*/
static String redactPath(String path) {
int last = path.lastIndexOf('/');
if (last <= 0) return path; // /file or empty — nothing to redact
int secondLast = path.lastIndexOf('/', last - 1);
if (secondLast <= 0) return path; // /dir/file — nothing to redact
// Collapse everything before the second-last slash to a single /redacted
return "/" + REDACTED + path.substring(secondLast);
}
private static boolean isKnownJvmPackage(String slashClassName) {
for (String prefix : KNOWN_PACKAGES_PREFIXES) {
if (slashClassName.startsWith(prefix)) {
return true;
}
}
// Match *.datadog* — packages whose second segment starts with "datadog"
// e.g. com/datadog/..., org/datadog/..., com/datadoghq/...
int slash = slashClassName.indexOf('/');
return slash > 0 && slashClassName.startsWith("datadog", slash + 1);
}
private static String replaceAll(
Pattern pattern, String input, Function<Matcher, String> replacement) {
Matcher m = pattern.matcher(input);
if (!m.find()) {
return input;
}
StringBuilder sb = new StringBuilder();
int lastEnd = 0;
do {
sb.append(input, lastEnd, m.start());
sb.append(replacement.apply(m));
lastEnd = m.end();
} while (m.find());
return sb.append(input, lastEnd, input.length()).toString();
}
}