Skip to content

Commit 92b8b82

Browse files
bric3devflow.devflow-routing-intake
andauthored
fix(crashtracking): Properly handle J9 crash report (#10976)
fix(crashtracking): Properly handle J9 crash report The J9 crashtracking initializer only worked when the JVM arguments spelled out everything explicitly. It missed two OpenJ9 behaviors: - `-Xdump:java:file=...` is optional, and without it the javacore is written to the JVM working directory - the `-Xdump:tool:exec=` value may be passed as `... %pid`, not only the escaped `...\ %pid` form As a result the crash uploader could be initialized with the wrong javacore location or fail to recover the configured uploader script path. Now it defaults the javacore location to `user.dir` when OpenJ9 uses its implicit output path, and accept both `exec=<path> %pid` and `exec=<path>\ %pid` when extracting the crash uploader script path. Co-authored-by: devflow.devflow-routing-intake <devflow.devflow-routing-intake@kubernetes.us1.ddbuild.io>
1 parent c9ecd86 commit 92b8b82

File tree

6 files changed

+329
-122
lines changed

6 files changed

+329
-122
lines changed

dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/Initializer.java

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import com.sun.management.HotSpotDiagnosticMXBean;
99
import datadog.environment.JavaVirtualMachine;
1010
import datadog.environment.OperatingSystem;
11+
import datadog.environment.SystemProperties;
1112
import datadog.libs.ddprof.DdprofLibraryLoader;
1213
import datadog.trace.api.Platform;
1314
import datadog.trace.util.TempLocationManager;
@@ -127,22 +128,31 @@ public static boolean initialize(boolean forceJmx) {
127128
*/
128129
private static boolean initializeJ9() {
129130
try {
130-
String scriptPath = getJ9CrashUploaderScriptPath();
131-
132131
// Check if -Xdump:tool is already configured via JVM arguments
133132
boolean xdumpConfigured = isXdumpToolConfigured();
134133
// Get custom javacore path if configured
135134
String javacorePath = getJ9JavacorePath();
135+
if (javacorePath == null || javacorePath.isEmpty()) {
136+
// OpenJ9 defaults javacore output to the JVM working directory. Persist that location in
137+
// the uploader config so the crash script does not need to guess from its own cwd.
138+
javacorePath = SystemProperties.get("user.dir");
139+
}
136140

137141
if (xdumpConfigured) {
138142
LOG.debug("J9 crash tracking: -Xdump:tool already configured, crash uploads enabled");
143+
// Use the path from the -Xdump:tool arg when available (allows callers to specify a known
144+
// path via -Xdump:tool:events=gpf+abort,exec=<path>\ %pid), falling back to the default
145+
// TempLocationManager path when the path cannot be extracted.
146+
String extractedPath = extractJ9ScriptPathFromXdumpArg();
147+
String scriptPath = extractedPath != null ? extractedPath : getJ9CrashUploaderScriptPath();
139148
// Initialize the crash uploader script and config manager
140149
CrashUploaderScriptInitializer.initialize(scriptPath, null, javacorePath);
141150
// Also set up OOME notifier script
142151
String oomeScript = getScript("dd_oome_notifier");
143152
OOMENotifierScriptInitializer.initialize(oomeScript);
144153
return true;
145154
} else {
155+
String scriptPath = getJ9CrashUploaderScriptPath();
146156
// Log instructions for manual configuration
147157
LOG.info("J9 JVM detected. To enable crash tracking, add this JVM argument at startup:");
148158
LOG.info(" -Xdump:tool:events=gpf+abort,exec={}\\ %pid", scriptPath);
@@ -158,6 +168,40 @@ private static boolean initializeJ9() {
158168
return false;
159169
}
160170

171+
/**
172+
* Extract the crash uploader script path from the {@code -Xdump:tool} JVM argument.
173+
*
174+
* <p>Looks for a JVM argument of the form {@code
175+
* -Xdump:tool:events=...,exec=/path/to/dd_crash_uploader.sh\ %pid} and returns the script path
176+
* portion (before the {@code \ %pid} argument separator).
177+
*
178+
* @return the script path, or {@code null} if not found or not extractable
179+
*/
180+
private static String extractJ9ScriptPathFromXdumpArg() {
181+
List<String> vmArgs = JavaVirtualMachine.getVmOptions();
182+
for (String arg : vmArgs) {
183+
if (arg.startsWith("-Xdump:tool") && arg.contains("dd_crash_uploader")) {
184+
int execIdx = arg.indexOf("exec=");
185+
if (execIdx >= 0) {
186+
String execVal = arg.substring(execIdx + 5);
187+
// Separator between command and args: plain space, or "\ " (backslash + space) as
188+
// suggested by the Initializer's log hint. Check plain space first since that is the
189+
// form that actually works when the shell splits the exec string into tokens.
190+
int spaceIdx = execVal.indexOf(' ');
191+
if (spaceIdx >= 0) {
192+
String candidate = execVal.substring(0, spaceIdx);
193+
// Strip a trailing backslash left over from the "\ %pid" notation
194+
return candidate.endsWith("\\")
195+
? candidate.substring(0, candidate.length() - 1)
196+
: candidate;
197+
}
198+
return execVal;
199+
}
200+
}
201+
}
202+
return null;
203+
}
204+
161205
/**
162206
* Get the custom javacore file path from -Xdump:java:file=... JVM argument.
163207
*

dd-smoke-tests/crashtracking/build.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,3 @@ tasks.withType(Test).configureEach {
4242
showStandardStreams = true
4343
}
4444
}
45-
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package datadog.smoketest.crashtracking;
2+
3+
import java.lang.reflect.Field;
4+
import java.lang.reflect.Method;
5+
import java.nio.file.Files;
6+
import java.nio.file.Paths;
7+
import java.util.concurrent.TimeUnit;
8+
9+
/**
10+
* Test application for OpenJ9 crash tracking smoke tests.
11+
*
12+
* <p>Waits for the agent to write the crash-uploader script, then crashes the JVM via a null
13+
* pointer write using {@code sun.misc.Unsafe.putAddress(0L, 0L)} (accessed via reflection so the
14+
* class compiles against any Java version). This triggers a GPF (general protection fault) on
15+
* OpenJ9, which the {@code -Xdump:tool:events=gpf+abort,...} handler detects.
16+
*
17+
* <p>Note: {@code sun.misc.Unsafe.getLong(0L)} is converted to a Java-level {@link
18+
* NullPointerException} on Semeru/OpenJ9 25, so it does not exercise crash tracking. {@code
19+
* sun.misc.Unsafe.putAddress(0L, 0L)} goes directly to {@code unsafePut64} in the JVM native
20+
* library and produces a native SIGSEGV at address 0.
21+
*
22+
* <p>System properties consumed:
23+
*
24+
* <ul>
25+
* <li>{@code dd.test.crash_script} — path of the crash-uploader script; the application waits for
26+
* the agent to write it before crashing, ensuring the agent is fully initialized
27+
* </ul>
28+
*/
29+
public class OpenJ9CrashtrackingTestApplication {
30+
public static void main(String[] args) throws Exception {
31+
// Wait for the agent to write the crash-uploader script (proves initialization is done)
32+
String scriptPath = System.getProperty("dd.test.crash_script");
33+
if (scriptPath != null) {
34+
long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(30);
35+
while (!Files.exists(Paths.get(scriptPath)) && System.nanoTime() < deadline) {
36+
Thread.sleep(200);
37+
}
38+
if (!Files.exists(Paths.get(scriptPath))) {
39+
System.err.println("Timeout: crash script not created at " + scriptPath);
40+
System.exit(-1);
41+
}
42+
}
43+
44+
System.out.println("===> Crash script ready, crashing JVM via Unsafe.putAddress(0L, 0L)...");
45+
System.out.flush();
46+
47+
// Write to address 0 via sun.misc.Unsafe to trigger a SIGSEGV (GPF event).
48+
// Unsafe.getLong(0L) was not enough on OpenJ9 here; it threw a NullPointerException instead.
49+
Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
50+
Field f = unsafeClass.getDeclaredField("theUnsafe");
51+
f.setAccessible(true);
52+
Object theUnsafe = f.get(null);
53+
Method putAddress = unsafeClass.getDeclaredMethod("putAddress", long.class, long.class);
54+
putAddress.invoke(theUnsafe, 0L, 0L);
55+
}
56+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package datadog.smoketest;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertNotNull;
5+
import static org.junit.jupiter.api.Assertions.assertTrue;
6+
7+
import com.squareup.moshi.Moshi;
8+
import java.io.File;
9+
import java.io.IOException;
10+
import java.nio.charset.StandardCharsets;
11+
import java.nio.file.FileSystems;
12+
import java.nio.file.Files;
13+
import java.nio.file.Path;
14+
import java.nio.file.Paths;
15+
import java.util.Comparator;
16+
import java.util.Map;
17+
import java.util.concurrent.BlockingQueue;
18+
import java.util.concurrent.LinkedBlockingQueue;
19+
import java.util.concurrent.TimeUnit;
20+
import java.util.stream.Stream;
21+
import okhttp3.mockwebserver.Dispatcher;
22+
import okhttp3.mockwebserver.MockResponse;
23+
import okhttp3.mockwebserver.MockWebServer;
24+
import okhttp3.mockwebserver.RecordedRequest;
25+
import org.junit.jupiter.api.AfterAll;
26+
import org.junit.jupiter.api.AfterEach;
27+
import org.junit.jupiter.api.BeforeEach;
28+
29+
abstract class AbstractCrashtrackingSmokeTest {
30+
static final OutputThreads OUTPUT = new OutputThreads();
31+
static final Path LOG_FILE_DIR =
32+
Paths.get(System.getProperty("datadog.smoketest.builddir"), "reports");
33+
34+
MockWebServer tracingServer;
35+
final BlockingQueue<CrashTelemetryData> crashEvents = new LinkedBlockingQueue<>();
36+
final Moshi moshi = new Moshi.Builder().build();
37+
Path tempDir;
38+
39+
@BeforeEach
40+
void setUpTracingServer() throws Exception {
41+
tempDir = Files.createTempDirectory("dd-smoketest-");
42+
crashEvents.clear();
43+
tracingServer = new MockWebServer();
44+
tracingServer.setDispatcher(
45+
new Dispatcher() {
46+
@Override
47+
public MockResponse dispatch(RecordedRequest request) {
48+
String data = request.getBody().readString(StandardCharsets.UTF_8);
49+
System.out.println("URL ====== " + request.getPath());
50+
if ("/telemetry/proxy/api/v2/apmtelemetry".equals(request.getPath())) {
51+
try {
52+
MinimalTelemetryData minimal =
53+
moshi.adapter(MinimalTelemetryData.class).fromJson(data);
54+
if ("logs".equals(minimal.request_type)) {
55+
crashEvents.add(moshi.adapter(CrashTelemetryData.class).fromJson(data));
56+
}
57+
} catch (IOException e) {
58+
System.out.println("Unable to parse: " + e);
59+
}
60+
}
61+
System.out.println(data);
62+
return new MockResponse().setResponseCode(200);
63+
}
64+
});
65+
OUTPUT.clearMessages();
66+
}
67+
68+
@AfterEach
69+
void tearDownTracingServer() throws Exception {
70+
tracingServer.shutdown();
71+
try (Stream<Path> files = Files.walk(tempDir)) {
72+
files.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
73+
}
74+
Files.deleteIfExists(tempDir);
75+
}
76+
77+
@AfterAll
78+
static void shutdownOutputThreads() {
79+
OUTPUT.close();
80+
}
81+
82+
protected long crashDataTimeoutMs() {
83+
return 10 * 1000;
84+
}
85+
86+
protected String assertCrashPing() throws InterruptedException, IOException {
87+
CrashTelemetryData crashData = crashEvents.poll(crashDataTimeoutMs(), TimeUnit.MILLISECONDS);
88+
assertNotNull(crashData, "Crash ping not sent");
89+
assertTrue(crashData.payload.get(0).tags.contains("is_crash_ping:true"), "Not a crash ping");
90+
final Object uuid =
91+
moshi.adapter(Map.class).fromJson(crashData.payload.get(0).message).get("crash_uuid");
92+
assertNotNull(uuid, "crash uuid not found");
93+
return uuid.toString();
94+
}
95+
96+
protected CrashTelemetryData assertCrashData(String uuid)
97+
throws InterruptedException, IOException {
98+
CrashTelemetryData crashData = crashEvents.poll(crashDataTimeoutMs(), TimeUnit.MILLISECONDS);
99+
assertNotNull(crashData, "Crash data not uploaded");
100+
assertTrue(
101+
crashData.payload.get(0).tags.contains("severity:crash"), "Expected severity:crash tag");
102+
final Object receivedUuid =
103+
moshi.adapter(Map.class).fromJson(crashData.payload.get(0).message).get("uuid");
104+
assertEquals(uuid, receivedUuid, "crash uuid should match the one sent with the ping");
105+
return crashData;
106+
}
107+
108+
static String javaPath() {
109+
String sep = FileSystems.getDefault().getSeparator();
110+
return System.getProperty("java.home") + sep + "bin" + sep + "java";
111+
}
112+
113+
static String appShadowJar() {
114+
return System.getProperty("datadog.smoketest.app.shadowJar.path");
115+
}
116+
117+
static String agentShadowJar() {
118+
return System.getProperty("datadog.smoketest.agent.shadowJar.path");
119+
}
120+
}

0 commit comments

Comments
 (0)