Skip to content

Commit 3ff081c

Browse files
authored
Unset inherited JVM env vars in crashtracking scripts (#10819)
Unset inherited JVM env vars in crashtracking scripts The OOME notifier and crash uploader scripts now unset JDK_JAVA_OPTIONS, JAVA_TOOL_OPTIONS, and _JAVA_OPTIONS before spawning a child JVM. This prevents port conflicts, memory contention, and lost diagnostics when the parent environment contains flags like JMX remote ports. Fixes #10766 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Merge branch 'master' into jb/oome_script Merge branch 'master' into jb/oome_script Merge branch 'master' into jb/oome_script Co-authored-by: jaroslav.bachorik <jaroslav.bachorik@datadoghq.com>
1 parent 64e46eb commit 3ff081c

File tree

5 files changed

+161
-0
lines changed

5 files changed

+161
-0
lines changed

dd-java-agent/agent-crashtracking/src/main/resources/datadog/crashtracking/notify_oome.bat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ echo Tags: %tags%
3434
echo JAVA_HOME: %java_home%
3535
echo PID: %PID%
3636

37+
:: Clear environment variables that the parent JVM may have set so the child JVM
38+
:: starts with a minimal configuration (avoids port conflicts, memory contention, etc.)
39+
set JDK_JAVA_OPTIONS=
40+
set JAVA_TOOL_OPTIONS=
41+
set _JAVA_OPTIONS=
42+
3743
:: Execute the Java command with the loaded values
3844
"%java_home%\bin\java" -Ddd.dogstatsd.start-delay=0 -jar "%agent%" sendOomeEvent "%tags%"
3945
set RC=%ERRORLEVEL%

dd-java-agent/agent-crashtracking/src/main/resources/datadog/crashtracking/notify_oome.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ echo "Tags: $config_tags"
4949
echo "JAVA_HOME: $config_java_home"
5050
echo "PID: $PID"
5151

52+
# Clear environment variables that the parent JVM may have set so the child JVM
53+
# starts with a minimal configuration (avoids port conflicts, memory contention, etc.)
54+
unset JDK_JAVA_OPTIONS
55+
unset JAVA_TOOL_OPTIONS
56+
unset _JAVA_OPTIONS
57+
5258
# Execute the Java command with the loaded values
5359
"$config_java_home/bin/java" -Ddd.dogstatsd.start-delay=0 -jar "$config_agent" sendOomeEvent "$config_tags"
5460
RC=$?

dd-java-agent/agent-crashtracking/src/main/resources/datadog/crashtracking/upload_crash.bat

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ setlocal enabledelayedexpansion
44
:: Check if PID is provided
55
if "%1"=="" (
66
echo "Error: No PID provided. Running in legacy mode."
7+
:: Clear environment variables that the parent JVM may have set
8+
set JDK_JAVA_OPTIONS=
9+
set JAVA_TOOL_OPTIONS=
10+
set _JAVA_OPTIONS=
11+
712
"!JAVA_HOME!\bin\java" -jar "!AGENT_JAR!" uploadCrash "!JAVA_ERROR_FILE!"
813
if %ERRORLEVEL% EQU 0 (
914
echo "Uploaded error file \"!JAVA_ERROR_FILE!\""
@@ -128,6 +133,12 @@ echo Error Log: %config_hs_err%
128133
echo JAVA_HOME: %config_java_home%
129134
echo PID: %PID%
130135

136+
:: Clear environment variables that the parent JVM may have set so the child JVM
137+
:: starts with a minimal configuration (avoids port conflicts, memory contention, etc.)
138+
set JDK_JAVA_OPTIONS=
139+
set JAVA_TOOL_OPTIONS=
140+
set _JAVA_OPTIONS=
141+
131142
:: Execute the Java command with the loaded values
132143
"%config_java_home%\bin\java" -jar "%config_agent%" uploadCrash -c "%configFile%" "%config_hs_err%"
133144
set RC=%ERRORLEVEL%

dd-java-agent/agent-crashtracking/src/main/resources/datadog/crashtracking/upload_crash.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ set +e
77
if [ -z "$1" ]; then
88
echo "Warn: No PID provided. Running in legacy mode."
99

10+
# Clear environment variables that the parent JVM may have set
11+
unset JDK_JAVA_OPTIONS
12+
unset JAVA_TOOL_OPTIONS
13+
unset _JAVA_OPTIONS
14+
1015
"!JAVA_HOME!/bin/java" -jar "!AGENT_JAR!" uploadCrash "!JAVA_ERROR_FILE!"
1116
if [ $? -eq 0 ]; then
1217
echo "Error file !JAVA_ERROR_FILE! was uploaded successfully"
@@ -119,6 +124,12 @@ echo "Error Log: $config_hs_err"
119124
echo "JAVA_HOME: $config_java_home"
120125
echo "PID: $PID"
121126

127+
# Clear environment variables that the parent JVM may have set so the child JVM
128+
# starts with a minimal configuration (avoids port conflicts, memory contention, etc.)
129+
unset JDK_JAVA_OPTIONS
130+
unset JAVA_TOOL_OPTIONS
131+
unset _JAVA_OPTIONS
132+
122133
# Execute the Java command with the loaded values
123134
"$config_java_home/bin/java" -jar "$config_agent" uploadCrash -c "$configFile" "$config_hs_err"
124135
RC=$?

dd-smoke-tests/crashtracking/src/test/java/datadog/smoketest/CrashtrackingSmokeTest.java

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import datadog.environment.OperatingSystem;
1414
import java.io.File;
1515
import java.io.IOException;
16+
import java.net.ServerSocket;
1617
import java.nio.charset.StandardCharsets;
1718
import java.nio.file.FileSystems;
1819
import java.nio.file.Files;
@@ -319,6 +320,132 @@ void testCombineTracking() throws Exception {
319320
assertOOMEvent();
320321
}
321322

323+
/**
324+
* Verifies that the OOME notifier script correctly unsets inherited JVM environment variables.
325+
* Without the fix, the child JVM spawned by the script would inherit JDK_JAVA_OPTIONS containing
326+
* JMX port-binding flags, causing a BindException and losing the OOME event.
327+
*
328+
* @see <a href="https://github.com/DataDog/dd-trace-java/issues/10766">#10766</a>
329+
*/
330+
@Test
331+
void testOomeTrackingWithInheritedEnvVars() throws Exception {
332+
int jmxPort = findFreePort();
333+
334+
Path script = tempDir.resolve("dd_oome_notifier." + getExtension());
335+
String onErrorValue = script + " %p";
336+
String errorFile = tempDir.resolve("hs_err_pid%p.log").toString();
337+
338+
String onOOMEArg =
339+
!Platform.isLinux()
340+
? "-XX:OnOutOfMemoryError=" + onErrorValue
341+
: "-Ddd.crashtracking.debug.autoconfig.enable=true";
342+
343+
List<String> processArgs = new ArrayList<>();
344+
processArgs.add(javaPath());
345+
processArgs.add("-javaagent:" + agentShadowJar());
346+
processArgs.add("-Xmx96m");
347+
processArgs.add("-Xms96m");
348+
if (!onOOMEArg.isEmpty()) {
349+
processArgs.add(onOOMEArg);
350+
}
351+
processArgs.add("-XX:ErrorFile=" + errorFile);
352+
processArgs.add("-XX:+CrashOnOutOfMemoryError");
353+
processArgs.add("-Ddd.dogstatsd.start-delay=0");
354+
processArgs.add("-Ddd.trace.enabled=false");
355+
processArgs.add("-jar");
356+
processArgs.add(appShadowJar());
357+
358+
ProcessBuilder pb = new ProcessBuilder(processArgs);
359+
pb.environment().put("DD_DOGSTATSD_PORT", String.valueOf(udpServer.getPort()));
360+
// Simulate admission controller injecting JMX flags via JDK_JAVA_OPTIONS
361+
pb.environment()
362+
.put(
363+
"JDK_JAVA_OPTIONS",
364+
"-Dcom.sun.management.jmxremote"
365+
+ " -Dcom.sun.management.jmxremote.port="
366+
+ jmxPort
367+
+ " -Dcom.sun.management.jmxremote.rmi.port="
368+
+ jmxPort
369+
+ " -Dcom.sun.management.jmxremote.authenticate=false"
370+
+ " -Dcom.sun.management.jmxremote.ssl=false");
371+
372+
System.out.println("==> Process args: " + pb.command());
373+
System.out.println("==> JMX port: " + jmxPort);
374+
375+
Process p = pb.start();
376+
OUTPUT.captureOutput(
377+
p, LOG_FILE_DIR.resolve("testProcess.testOomeTrackingWithInheritedEnvVars.log").toFile());
378+
379+
assertExpectedCrash(p);
380+
assertOOMEvent();
381+
}
382+
383+
/**
384+
* Verifies that the crash uploader script correctly unsets inherited JVM environment variables.
385+
* Without the fix, the child JVM spawned by the script would inherit JDK_JAVA_OPTIONS containing
386+
* JMX port-binding flags, causing a BindException and losing the crash data.
387+
*
388+
* @see <a href="https://github.com/DataDog/dd-trace-java/issues/10766">#10766</a>
389+
*/
390+
@Test
391+
void testCrashTrackingWithInheritedEnvVars() throws Exception {
392+
int jmxPort = findFreePort();
393+
394+
Path script = tempDir.resolve("dd_crash_uploader." + getExtension());
395+
String onErrorValue = script + " %p";
396+
String errorFile = tempDir.resolve("hs_err.log").toString();
397+
398+
String onErrorArg =
399+
!Platform.isLinux()
400+
? "-XX:OnError=" + onErrorValue
401+
: "-Ddd.crashtracking.debug.autoconfig.enable=true";
402+
403+
List<String> processArgs = new ArrayList<>();
404+
processArgs.add(javaPath());
405+
processArgs.add("-javaagent:" + agentShadowJar());
406+
processArgs.add("-Xmx96m");
407+
processArgs.add("-Xms96m");
408+
if (!onErrorArg.isEmpty()) {
409+
processArgs.add(onErrorArg);
410+
}
411+
processArgs.add("-XX:ErrorFile=" + errorFile);
412+
processArgs.add("-XX:+CrashOnOutOfMemoryError");
413+
processArgs.add("-Ddd.dogstatsd.start-delay=0");
414+
processArgs.add("-Ddd.trace.enabled=false");
415+
processArgs.add("-jar");
416+
processArgs.add(appShadowJar());
417+
418+
ProcessBuilder pb = new ProcessBuilder(processArgs);
419+
pb.environment().put("DD_TRACE_AGENT_PORT", String.valueOf(tracingServer.getPort()));
420+
// Simulate admission controller injecting JMX flags via JDK_JAVA_OPTIONS
421+
pb.environment()
422+
.put(
423+
"JDK_JAVA_OPTIONS",
424+
"-Dcom.sun.management.jmxremote"
425+
+ " -Dcom.sun.management.jmxremote.port="
426+
+ jmxPort
427+
+ " -Dcom.sun.management.jmxremote.rmi.port="
428+
+ jmxPort
429+
+ " -Dcom.sun.management.jmxremote.authenticate=false"
430+
+ " -Dcom.sun.management.jmxremote.ssl=false");
431+
432+
System.out.println("==> Process args: " + pb.command());
433+
System.out.println("==> JMX port: " + jmxPort);
434+
435+
Process p = pb.start();
436+
OUTPUT.captureOutput(
437+
p, LOG_FILE_DIR.resolve("testProcess.testCrashTrackingWithInheritedEnvVars.log").toFile());
438+
439+
assertExpectedCrash(p);
440+
assertCrashData(assertCrashPing());
441+
}
442+
443+
private static int findFreePort() throws IOException {
444+
try (ServerSocket socket = new ServerSocket(0)) {
445+
return socket.getLocalPort();
446+
}
447+
}
448+
322449
private static void assertExpectedCrash(Process p) throws InterruptedException {
323450
// exit code -1 means the test application exited prematurely
324451
// exit code > 0 means the test application crashed, as expected

0 commit comments

Comments
 (0)