Skip to content

Commit aed9baf

Browse files
bric3devflow.devflow-routing-intake
andauthored
Add processor context for hotspot crash tracking (#10867)
chore: Add processor context for hotspot crash tracking fix: supports registers emitted for different platforms. JVM signal handlers produce different output / formats depending on the platform. For example an Linux-x64 will of course have different register names, but they will be formated as four per line, while the linux-aarch64 will use one register per line. Links to different `os::print_context` implementations * https://github.com/openjdk/jdk/blob/master/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp#L419 * https://github.com/openjdk/jdk/blob/master/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp#L341 * https://github.com/openjdk/jdk/blob/master/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp#L601 * https://github.com/openjdk/jdk/blob/master/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp#L464 Merge remote-tracking branch 'origin/master' into bdu/processor-context chore: cleanup paths chore: empty commit Co-authored-by: devflow.devflow-routing-intake <devflow.devflow-routing-intake@kubernetes.us1.ddbuild.io>
1 parent bbaa2d2 commit aed9baf

File tree

11 files changed

+2815
-12
lines changed

11 files changed

+2815
-12
lines changed

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploader.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,18 @@ private RequestBody makeErrorTrackingRequestBody(@Nonnull CrashLog payload, bool
571571
"os.version")); // this has been restructured under OsInfo so taking raw here
572572
writer.endObject();
573573
}
574+
// experimental
575+
if (payload.experimental != null && payload.experimental.ucontext != null) {
576+
writer.name("experimental");
577+
writer.beginObject();
578+
writer.name("ucontext");
579+
writer.beginObject();
580+
for (Map.Entry<String, String> entry : payload.experimental.ucontext.entrySet()) {
581+
writer.name(entry.getKey()).value(entry.getValue());
582+
}
583+
writer.endObject();
584+
writer.endObject();
585+
}
574586
writer.endObject();
575587
}
576588
return RequestBody.create(APPLICATION_JSON, buf.readByteString());

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/dto/CrashLog.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ public final class CrashLog {
3939
@Json(name = "sig_info")
4040
public final SigInfo sigInfo;
4141

42+
public final Experimental experimental;
43+
4244
public CrashLog(
4345
String uuid,
4446
boolean incomplete,
@@ -49,6 +51,30 @@ public CrashLog(
4951
ProcInfo procInfo,
5052
SigInfo sigInfo,
5153
String dataSchemaVersion) {
54+
this(
55+
uuid,
56+
incomplete,
57+
timestamp,
58+
error,
59+
metadata,
60+
osInfo,
61+
procInfo,
62+
sigInfo,
63+
dataSchemaVersion,
64+
null);
65+
}
66+
67+
public CrashLog(
68+
String uuid,
69+
boolean incomplete,
70+
String timestamp,
71+
ErrorData error,
72+
Metadata metadata,
73+
OSInfo osInfo,
74+
ProcInfo procInfo,
75+
SigInfo sigInfo,
76+
String dataSchemaVersion,
77+
Experimental experimental) {
5278
this.uuid = uuid != null ? uuid : RandomUtils.randomUUID().toString();
5379
this.incomplete = incomplete;
5480
this.timestamp = timestamp;
@@ -58,6 +84,7 @@ public CrashLog(
5884
this.procInfo = procInfo;
5985
this.sigInfo = sigInfo;
6086
this.dataSchemaVersion = dataSchemaVersion;
87+
this.experimental = experimental;
6188
}
6289

6390
public String toJson() {
@@ -85,7 +112,8 @@ public boolean equals(Object o) {
85112
&& Objects.equals(osInfo, crashLog.osInfo)
86113
&& Objects.equals(procInfo, crashLog.procInfo)
87114
&& Objects.equals(sigInfo, crashLog.sigInfo)
88-
&& Objects.equals(dataSchemaVersion, crashLog.dataSchemaVersion);
115+
&& Objects.equals(dataSchemaVersion, crashLog.dataSchemaVersion)
116+
&& Objects.equals(experimental, crashLog.experimental);
89117
}
90118

91119
@Override
@@ -100,7 +128,8 @@ public int hashCode() {
100128
procInfo,
101129
sigInfo,
102130
version,
103-
dataSchemaVersion);
131+
dataSchemaVersion,
132+
experimental);
104133
}
105134

106135
public boolean equalsForTest(Object o) {
@@ -119,6 +148,7 @@ public boolean equalsForTest(Object o) {
119148
&& Objects.equals(error, crashLog.error)
120149
&& Objects.equals(procInfo, crashLog.procInfo)
121150
&& Objects.equals(sigInfo, crashLog.sigInfo)
122-
&& Objects.equals(dataSchemaVersion, crashLog.dataSchemaVersion);
151+
&& Objects.equals(dataSchemaVersion, crashLog.dataSchemaVersion)
152+
&& Objects.equals(experimental, crashLog.experimental);
123153
}
124154
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package datadog.crashtracking.dto;
2+
3+
import java.util.Map;
4+
import java.util.Objects;
5+
6+
public final class Experimental {
7+
public final Map<String, String> ucontext;
8+
9+
public Experimental(Map<String, String> ucontext) {
10+
this.ucontext = ucontext;
11+
}
12+
13+
@Override
14+
public boolean equals(Object o) {
15+
if (!(o instanceof Experimental)) return false;
16+
Experimental that = (Experimental) o;
17+
return Objects.equals(ucontext, that.ucontext);
18+
}
19+
20+
@Override
21+
public int hashCode() {
22+
return Objects.hash(ucontext);
23+
}
24+
}

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/parsers/HotspotCrashLogParser.java

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import datadog.crashtracking.buildid.BuildInfo;
88
import datadog.crashtracking.dto.CrashLog;
99
import datadog.crashtracking.dto.ErrorData;
10+
import datadog.crashtracking.dto.Experimental;
1011
import datadog.crashtracking.dto.Metadata;
1112
import datadog.crashtracking.dto.OSInfo;
1213
import datadog.crashtracking.dto.ProcInfo;
@@ -21,11 +22,25 @@
2122
import java.time.format.DateTimeFormatter;
2223
import java.time.format.DateTimeParseException;
2324
import java.util.ArrayList;
25+
import java.util.LinkedHashMap;
2426
import java.util.List;
2527
import java.util.Locale;
28+
import java.util.Map;
2629
import java.util.regex.Matcher;
2730
import java.util.regex.Pattern;
2831

32+
/**
33+
* Parser for HotSpot JVM fatal error logs ({@code hs_err_pidNNN.log}).
34+
*
35+
* <p>The log is parsed using a linear state machine that mirrors the deterministic section order
36+
* emitted by {@code VMError::report()} in HotSpot. The section order is fixed for a given platform
37+
* but differs across OS/CPU combinations.
38+
*
39+
* <p>If an early sentinel line is absent (e.g. {@code "Native frames:"} is missing because the JVM
40+
* crashed before producing a stack), the state machine will not advance past {@code THREAD} state
41+
* and subsequent sections such as {@code siginfo} and registers will be silently skipped. The
42+
* resulting {@link datadog.crashtracking.dto.CrashLog} will be marked {@code incomplete}.
43+
*/
2944
public final class HotspotCrashLogParser {
3045
private static final DateTimeFormatter ZONED_DATE_TIME_FORMATTER =
3146
DateTimeFormatter.ofPattern("EEE MMM ppd HH:mm:ss yyyy zzz", Locale.getDefault());
@@ -45,6 +60,7 @@ enum State {
4560
SUMMARY,
4661
THREAD,
4762
STACKTRACE,
63+
REGISTERS,
4864
SEEK_DYNAMIC_LIBRARIES,
4965
DYNAMIC_LIBRARIES,
5066
DONE
@@ -68,6 +84,20 @@ public HotspotCrashLogParser() {
6884
+ "(?:si_addr:\\s+(0x[0-9a-fA-F]+)|si_pid:\\s+(\\d+),\\s+si_uid:\\s+(\\d+))");
6985
private static final Pattern DYNAMIC_LIBS_PATH_PARSER =
7086
Pattern.compile("^(?:0x)?[0-9a-fA-F]+(?:-[0-9a-fA-F]+)?\\s+(?:[^\\s/\\[]+\\s+)*(.*)$");
87+
// Matches register entries like:
88+
// * RAX=0x..., R8 =0x..., TRAPNO=0x... (x86-64)
89+
// * R0=0x..., R30=0x... (Linux aarch64)
90+
// * x0=0x..., fp=0x..., lr=0x..., sp=0x..., pc=0x... (macOS aarch64)
91+
// Note that register formatting varies by platform, the JVM crash handler can emit one or four
92+
// per line.
93+
private static final Pattern REGISTER_ENTRY_PARSER =
94+
Pattern.compile("([A-Za-z][A-Za-z0-9]*)\\s*=\\s*(0x[0-9a-fA-F]+)");
95+
// Used for the REGISTERS-state exit condition only: the register name must start the line
96+
// (after optional whitespace). This prevents lines like "Top of Stack: (sp=0x...)" and
97+
// "Instructions: (pc=0x...)" from being mistaken for register entries by REGISTER_ENTRY_PARSER's
98+
// find(), which would otherwise match the lowercase "sp"/"pc" tokens embedded in those lines.
99+
private static final Pattern REGISTER_LINE_START =
100+
Pattern.compile("^\\s*[A-Za-z][A-Za-z0-9]*\\s*=\\s*0x");
71101

72102
private StackFrame parseLine(String line) {
73103
if (line == null || line.isEmpty()) {
@@ -87,10 +117,10 @@ private StackFrame parseLine(String line) {
87117
switch (firstChar) {
88118
case 'J':
89119
{
90-
// J 36572 c2 datadog.trace.util.AgentTaskScheduler$PeriodicTask.run()V (25 bytes) @
91-
// 0x00007f2fd0198488 [0x00007f2fd0198420+0x0000000000000068]
92-
// J 3896 c2 java.nio.ByteBuffer.allocate(I)Ljava/nio/ByteBuffer; java.base@21.0.1 (20
93-
// bytes) @ 0x0000000112ad51e8 [0x0000000112ad4fc0+0x0000000000000228]
120+
// spotless:off
121+
// J 36572 c2 datadog.trace.util.AgentTaskScheduler$PeriodicTask.run()V (25 bytes) @ 0x00007f2fd0198488 [0x00007f2fd0198420+0x0000000000000068]
122+
// J 3896 c2 java.nio.ByteBuffer.allocate(I)Ljava/nio/ByteBuffer; java.base@21.0.1 (20 bytes) @ 0x0000000112ad51e8 [0x0000000112ad4fc0+0x0000000000000228]
123+
// spotless:on
94124
String[] parts = SPACE_SPLITTER.split(line);
95125
if (parts.length > 3) {
96126
functionName = parts[3];
@@ -224,6 +254,7 @@ public CrashLog parse(String uuid, String crashLog) {
224254
String datetime = null;
225255
boolean incomplete = false;
226256
String oomMessage = null;
257+
Map<String, String> registers = null;
227258

228259
String[] lines = NEWLINE_SPLITTER.split(crashLog);
229260
outer:
@@ -291,6 +322,9 @@ public CrashLog parse(String uuid, String crashLog) {
291322
Integer siUid = safelyParseInt(siginfoMatcher.group(7));
292323
sigInfo = new SigInfo(number, name, siCode, sigAction, address, siPid, siUid);
293324
}
325+
} else if (line.startsWith("Registers:")) {
326+
registers = new LinkedHashMap<>();
327+
state = State.REGISTERS;
294328
} else if (line.contains("P R O C E S S")) {
295329
state = State.SEEK_DYNAMIC_LIBRARIES;
296330
} else {
@@ -301,6 +335,17 @@ public CrashLog parse(String uuid, String crashLog) {
301335
}
302336
}
303337
break;
338+
case REGISTERS:
339+
if (!line.isEmpty() && !REGISTER_LINE_START.matcher(line).find()) {
340+
// non-empty line that does not start with a register entry signals end of section
341+
state = State.STACKTRACE;
342+
} else {
343+
final Matcher m = REGISTER_ENTRY_PARSER.matcher(line);
344+
while (m.find()) {
345+
registers.put(m.group(1), m.group(2));
346+
}
347+
}
348+
break;
304349
case SEEK_DYNAMIC_LIBRARIES:
305350
if (line.startsWith("Dynamic libraries:")) {
306351
state = State.DYNAMIC_LIBRARIES;
@@ -387,8 +432,19 @@ public CrashLog parse(String uuid, String crashLog) {
387432
Metadata metadata = new Metadata("dd-trace-java", VersionInfo.VERSION, "java", null);
388433
Integer parsedPid = safelyParseInt(pid);
389434
ProcInfo procInfo = parsedPid != null ? new ProcInfo(parsedPid) : null;
435+
Experimental experimental =
436+
(registers != null && !registers.isEmpty()) ? new Experimental(registers) : null;
390437
return new CrashLog(
391-
uuid, incomplete, datetime, error, metadata, OSInfo.current(), procInfo, sigInfo, "1.0");
438+
uuid,
439+
incomplete,
440+
datetime,
441+
error,
442+
metadata,
443+
OSInfo.current(),
444+
procInfo,
445+
sigInfo,
446+
"1.0",
447+
experimental);
392448
}
393449

394450
static String dateTimeToISO(String datetime) {

dd-java-agent/agent-crashtracking/src/test/java/datadog/crashtracking/parsers/HotspotCrashLogParserTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,42 @@ public void testIncompleteParsing() throws Exception {
5252
assertEquals(0, crashLog.error.stack.frames.length);
5353
}
5454

55+
/** macOS aarch64 uses lowercase register names: x0-x28, fp, lr, sp, pc, cpsr */
56+
@Test
57+
public void testRegisterParsingMacosAarch64() throws Exception {
58+
CrashLog crashLog =
59+
new HotspotCrashLogParser()
60+
.parse(
61+
UUID.randomUUID().toString(), readFileAsString("sample-crash-macos-aarch64.txt"));
62+
63+
assertNotNull(crashLog.experimental, "experimental field should be populated");
64+
assertNotNull(crashLog.experimental.ucontext, "ucontext should be populated");
65+
assertEquals("0x0000000000000c55", crashLog.experimental.ucontext.get("x0"));
66+
assertEquals("0x0000000000000000", crashLog.experimental.ucontext.get("x2"));
67+
assertEquals("0x000000016feee210", crashLog.experimental.ucontext.get("fp"));
68+
assertEquals("0x0000000116d0c970", crashLog.experimental.ucontext.get("lr"));
69+
assertEquals("0x000000016feee0f0", crashLog.experimental.ucontext.get("sp"));
70+
assertEquals("0x000000010f8ac794", crashLog.experimental.ucontext.get("pc"));
71+
assertEquals("0x0000000060001000", crashLog.experimental.ucontext.get("cpsr"));
72+
}
73+
74+
/** Linux aarch64 uses uppercase register names: R0-R30 */
75+
@Test
76+
public void testRegisterParsingLinuxAarch64() throws Exception {
77+
CrashLog crashLog =
78+
new HotspotCrashLogParser()
79+
.parse(
80+
UUID.randomUUID().toString(), readFileAsString("sample-crash-linux-aarch64.txt"));
81+
82+
assertNotNull(crashLog.experimental, "experimental field should be populated");
83+
assertNotNull(crashLog.experimental.ucontext, "ucontext should be populated");
84+
assertEquals("0x0000000000000000", crashLog.experimental.ucontext.get("R0"));
85+
assertEquals("0x0000000000000001", crashLog.experimental.ucontext.get("R1"));
86+
assertEquals("0x0000ffff9efa168c", crashLog.experimental.ucontext.get("R30"));
87+
// "Register to memory mapping:" section must NOT be included
88+
assertEquals(31, crashLog.experimental.ucontext.size(), "R0-R30 = 31 registers");
89+
}
90+
5591
private String readFileAsString(String resource) throws IOException {
5692
try (InputStream stream = getClass().getClassLoader().getResourceAsStream(resource)) {
5793
return new BufferedReader(
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"timestamp":"2024-09-20T13:19:06Z","ddsource":"crashtracker","error":{"is_crash":true,"type":"SIGSEGV","message":"Process terminated by signal SIGSEGV","source_type":"Crashtracking","stack":{"format":"CrashTrackerV1","frames":[{"function":"__pthread_clockjoin_ex+0x255","path":"libpthread.so.0","relative_address":"0x9cd5"}]}},"sig_info":{"si_signo_human_readable":"SIGSEGV","si_signo":11,"si_code":0,"si_code_human_readable":"SI_USER","si_pid":554848,"si_uid":1000}}
1+
{"timestamp":"2024-09-20T13:19:06Z","ddsource":"crashtracker","error":{"is_crash":true,"type":"SIGSEGV","message":"Process terminated by signal SIGSEGV","source_type":"Crashtracking","stack":{"format":"CrashTrackerV1","frames":[{"function":"__pthread_clockjoin_ex+0x255","path":"libpthread.so.0","relative_address":"0x9cd5"}]}},"sig_info":{"si_signo_human_readable":"SIGSEGV","si_signo":11,"si_code":0,"si_code_human_readable":"SI_USER","si_pid":554848,"si_uid":1000},"experimental":{"ucontext":{"RAX":"0x00000000000000ca","RBX":"0x00000000000000ca","RCX":"0x00007f011ab1ccd7","RDX":"0x000000000008ca23","RSP":"0x00007ffeabf89710","RBP":"0x00007ffeabf897b8","RSI":"0x0000000000000000","RDI":"0x00007f01192129d0","R8":"0x0000000000000000","R9":"0x00007f0119212700","R10":"0x0000000000000000","R11":"0x0000000000000246","R12":"0x000000000008ca23","R13":"0x00007f01192129d0","R14":"0x00007ffeabf89840","R15":"0x00007f0119212700","RIP":"0x00007f011ab1ccd5","EFLAGS":"0x0000000000000246","CSGSFS":"0x002b000000000033","ERR":"0x0000000000000000","TRAPNO":"0x0000000000000000"}}}

0 commit comments

Comments
 (0)