Skip to content

Commit 5ad2ab6

Browse files
authored
Merge pull request #882 from cqse/ts/43260_profiler_crash_after_missconfig
TS-43260 Profiler crash fix in case of invalid config options
2 parents 9910871 + f9f88d4 commit 5ad2ab6

5 files changed

Lines changed: 209 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ We use [semantic versioning](http://semver.org/):
55
- PATCH version when you make backwards compatible bug fixes.
66

77
# Next version
8+
- [fix] _agent_: The profiled application no longer crashes when the profiler configuration is invalid (e.g., missing `teamscale-user`). Instead, the application starts normally without coverage collection.
89

910
# 36.5.0
1011
- [feature] _agent_: Renamed the docker image to `cqse/teamscale-java-profiler` and added support for the `linux/arm64` platform

agent/src/main/java/com/teamscale/jacoco/agent/PreMain.java

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,17 @@
3535

3636
import static com.teamscale.jacoco.agent.logging.LoggingUtils.getLoggerContext;
3737

38-
/** Container class for the premain entry point for the agent. */
38+
/**
39+
* Container class for the premain entry point for the agent.
40+
* <p>
41+
* <b>Design decision: the agent must never crash the profiled application.</b>
42+
* Even when the agent is misconfigured, startup must not propagate any exception out of
43+
* {@link #premain(String, Instrumentation)}. An exception escaping {@code premain} would be re-thrown by the JVM as
44+
* {@code IllegalAgentException}, which aborts startup of the profiled application. The profiler is an observational
45+
* tool, so a failure to start up the profiler must degrade to "no coverage is collected" rather than taking down the
46+
* application. All failure paths in {@code premain} are therefore logged and swallowed; see the top-level
47+
* {@code try}/{@code catch} in {@link #premain(String, Instrumentation)}.
48+
*/
3949
public class PreMain {
4050

4151
private static LoggingUtils.LoggingResources loggingResources = null;
@@ -60,8 +70,26 @@ public class PreMain {
6070

6171
/**
6272
* Entry point for the agent, called by the JVM.
73+
* <p>
74+
* This method never throws. Any exception raised during startup is logged (to the file log and, if configured,
75+
* to Teamscale) and swallowed, and the JVM continues without coverage collection. See the class-level Javadoc for
76+
* the rationale.
6377
*/
64-
public static void premain(String options, Instrumentation instrumentation) throws Exception {
78+
public static void premain(String options, Instrumentation instrumentation) {
79+
try {
80+
startProfiler(options, instrumentation);
81+
} catch (Throwable t) {
82+
// Top-level safety net: an exception escaping premain would be re-thrown by the JVM as
83+
// IllegalAgentException and abort the profiled application. See class-level Javadoc.
84+
logStartupFailure(t);
85+
}
86+
}
87+
88+
/**
89+
* Starts the profiler. Contains all logic that may legitimately throw during startup; every caller must route
90+
* through {@link #premain(String, Instrumentation)} which catches these exceptions.
91+
*/
92+
private static void startProfiler(String options, Instrumentation instrumentation) throws Exception {
6593
if (System.getProperty(LOCKING_SYSTEM_PROPERTY) != null) {
6694
return;
6795
}
@@ -81,14 +109,12 @@ public static void premain(String options, Instrumentation instrumentation) thro
81109
environmentConfigFile);
82110
agentOptions = parseResult.getFirst();
83111

84-
// After parsing everything and configuring logging, we now
85-
// can throw the caught exceptions.
112+
// After parsing everything and configuring logging, we now can throw the caught exceptions. They are
113+
// handled locally below and never propagate out of premain (see class-level Javadoc).
86114
for (Exception exception : parseResult.getSecond()) {
87115
throw exception;
88116
}
89117
} catch (AgentOptionParseException e) {
90-
getLoggerContext().getLogger(PreMain.class).error(e.getMessage(), e);
91-
92118
// Flush logs to Teamscale, if configured.
93119
closeLoggingResources();
94120

@@ -97,7 +123,9 @@ public static void premain(String options, Instrumentation instrumentation) thro
97123
agentOptions.configurationViaTeamscale.unregisterProfiler();
98124
}
99125

100-
throw e;
126+
// Don't crash the profiled application due to a configuration error
127+
// (see TS-43260). The error has already been logged in getAndApplyAgentOptions.
128+
return;
101129
} catch (AgentOptionReceiveException e) {
102130
// When Teamscale is not available, we don't want to fail hard to still allow for testing even if no
103131
// coverage is collected (see TS-33237)
@@ -118,6 +146,29 @@ public static void premain(String options, Instrumentation instrumentation) thro
118146
agent.registerShutdownHook();
119147
}
120148

149+
/**
150+
* Logs a startup failure on a best-effort basis. Must not throw: even logging failures are swallowed so that
151+
* {@link #premain(String, Instrumentation)} keeps its no-throw contract.
152+
*/
153+
private static void logStartupFailure(Throwable t) {
154+
try {
155+
LoggingUtils.getLogger(PreMain.class).error(
156+
"Teamscale Java Profiler failed to start up. The profiled application will continue to run "
157+
+ "without coverage collection.", t);
158+
} catch (Throwable loggingFailure) {
159+
// The logger may not be initialized yet or may itself be broken. Fall back to stderr so the failure is
160+
// at least visible somewhere, then give up silently.
161+
try {
162+
System.err.println(
163+
"[Teamscale Java Profiler] Failed to start up and will not collect coverage: " + t);
164+
loggingFailure.addSuppressed(t);
165+
loggingFailure.printStackTrace();
166+
} catch (Throwable ignored) {
167+
// Nothing we can do; the alternative is to crash the profiled application, which we must not do.
168+
}
169+
}
170+
}
171+
121172
private static Pair<AgentOptions, List<Exception>> getAndApplyAgentOptions(String options,
122173
String environmentConfigId,
123174
String environmentConfigFile) throws AgentOptionParseException, IOException, AgentOptionReceiveException {
@@ -158,7 +209,9 @@ private static Pair<AgentOptions, List<Exception>> getAndApplyAgentOptions(Strin
158209
agentOptions = parseResult.getFirst();
159210
} catch (AgentOptionParseException e) {
160211
try (LoggingUtils.LoggingResources ignored = initializeFallbackLogging(options, delayedLogger)) {
161-
delayedLogger.errorAndStdErr("Failed to parse agent options: " + e.getMessage(), e);
212+
delayedLogger.errorAndStdErr(
213+
"Failed to parse agent options: " + e.getMessage() + " The application should start up normally, but NO coverage will be collected!",
214+
e);
162215
attemptLogAndThrow(delayedLogger);
163216
throw e;
164217
}
@@ -204,9 +257,14 @@ private static void initializeLogging(AgentOptions agentOptions, DelayedLogger l
204257
}
205258
}
206259

207-
/** Closes the opened logging contexts. */
260+
/**
261+
* Closes the opened logging contexts. Safe to call before logging has been initialized, in which case this is a
262+
* no-op.
263+
*/
208264
static void closeLoggingResources() {
209-
loggingResources.close();
265+
if (loggingResources != null) {
266+
loggingResources.close();
267+
}
210268
}
211269

212270
/**

agent/src/main/java/com/teamscale/jacoco/agent/options/AgentOptionsParser.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,8 +410,18 @@ private void readConfigFromTeamscale(
410410
return;
411411
}
412412
if (!options.teamscaleServer.isConfiguredForServerConnection()) {
413+
List<String> missingOptions = new ArrayList<>();
414+
if (options.teamscaleServer.url == null) {
415+
missingOptions.add("teamscale-server-url");
416+
}
417+
if (options.teamscaleServer.userName == null) {
418+
missingOptions.add("teamscale-user");
419+
}
420+
if (options.teamscaleServer.userAccessToken == null) {
421+
missingOptions.add("teamscale-access-token");
422+
}
413423
throw new AgentOptionParseException(
414-
"Config-id '" + options.teamscaleServer.configId + "' specified without teamscale url/user/accessKey! These options must be provided locally via config-file or command line argument.");
424+
"Config-id '" + options.teamscaleServer.configId + "' specified but the following required option(s) are missing: " + String.join(", ", missingOptions) + ". These options must be provided locally via config-file or command line argument.");
415425
}
416426
// Set ssl validation option in case it needs to be off before trying to reach Teamscale.
417427
HttpUtils.setShouldValidateSsl(options.shouldValidateSsl());
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package com.teamscale.jacoco.agent;
2+
3+
import com.teamscale.jacoco.agent.logging.LoggingUtils;
4+
import org.junit.jupiter.api.AfterEach;
5+
import org.junit.jupiter.api.BeforeEach;
6+
import org.junit.jupiter.api.Test;
7+
import org.junit.jupiter.api.io.TempDir;
8+
9+
import java.io.IOException;
10+
import java.lang.instrument.Instrumentation;
11+
import java.nio.charset.StandardCharsets;
12+
import java.nio.file.Files;
13+
import java.nio.file.Path;
14+
15+
import static org.assertj.core.api.Assertions.assertThat;
16+
import static org.assertj.core.api.Assertions.assertThatCode;
17+
18+
/**
19+
* Verifies the cross-cutting design invariant documented on {@link PreMain}: a failure to start up the profiler
20+
* (misconfiguration, missing files, environment issues, ...) must never propagate an exception out of
21+
* {@link PreMain#premain(String, Instrumentation)}, because the JVM would turn that into an
22+
* {@code IllegalAgentException} and abort the profiled application.
23+
*/
24+
class PreMainTest {
25+
26+
/**
27+
* {@link PreMain#premain(String, Instrumentation)} sets a system property on its first invocation and early-returns
28+
* on every subsequent one. Clearing it before each test ensures every test actually exercises the startup path
29+
* instead of silently short-circuiting.
30+
*/
31+
@BeforeEach
32+
void resetPremainLock() {
33+
System.clearProperty("TEAMSCALE_JAVA_PROFILER_ATTACHED");
34+
}
35+
36+
/**
37+
* Stops the logger context so file appenders configured during {@code premain} release their file handles. Without
38+
* this, Windows refuses to delete the {@code @TempDir} that holds {@code agent.log} and the test fails during
39+
* cleanup.
40+
*/
41+
@AfterEach
42+
void releaseLogFileHandles() {
43+
LoggingUtils.getLoggerContext().stop();
44+
}
45+
46+
/**
47+
* Invalid agent options produce an {@code AgentOptionParseException} internally. The agent must swallow it so the
48+
* profiled application keeps running.
49+
*/
50+
@Test
51+
void premainDoesNotThrowOnInvalidOptions() {
52+
assertThatCode(() -> PreMain.premain("this=is=not=a=valid=option,bogus-key=value", null))
53+
.doesNotThrowAnyException();
54+
}
55+
56+
/**
57+
* Regression test for TS-43260: a {@code config-id} without the required Teamscale credentials used to propagate an
58+
* {@code AgentOptionParseException} out of {@code premain} and abort the profiled application. It must now be
59+
* swallowed.
60+
*/
61+
@Test
62+
void premainDoesNotThrowOnConfigIdWithoutCredentials() {
63+
assertThatCode(() -> PreMain.premain("config-id=some-config", null))
64+
.doesNotThrowAnyException();
65+
}
66+
67+
/**
68+
* Exercises the top-level {@code try}/{@code catch} in {@link PreMain#premain(String, Instrumentation)} that routes
69+
* into {@code logStartupFailure} — the safety net for failures that surface <em>after</em> options have been parsed
70+
* (and are therefore not covered by the {@code AgentOption*Exception} catches inside {@code startProfiler}).
71+
* <p>
72+
* The trick: valid options pass parsing, then a {@code null} {@link Instrumentation} forces JaCoCo's runtime setup
73+
* to dereference {@code null} (first at {@code AgentModule.openPackage(inst, …)} inside
74+
* {@code JaCoCoPreMain.createRuntime}). The resulting {@link NullPointerException} is a stand-in for real-world
75+
* post-parse failures — e.g. a Jetty {@code BindException} when {@code http-server-port} is taken, or an
76+
* {@code UploaderException} during uploader construction — which would bubble up the same way. In production the
77+
* JVM always supplies a non-null {@code Instrumentation}; passing {@code null} here is only a cheap way to stage
78+
* the failure without a running JVM agent attach.
79+
* <p>
80+
* A custom {@code logging-config} points logback at a file inside {@code tempDir} so we can read back the emitted
81+
* error event. A plain {@link ch.qos.logback.core.read.ListAppender} attached in the test would be detached again
82+
* by the {@code LoggerContext.reset()} that {@code LoggingUtils.reconfigureLoggerContext} performs during option
83+
* parsing.
84+
*/
85+
@Test
86+
void premainLogsFailureWhenJaCoCoSetupThrows(@TempDir Path tempDir) throws IOException {
87+
Path logFile = tempDir.resolve("agent.log");
88+
Path logbackConfig = tempDir.resolve("logback.xml");
89+
Files.write(logbackConfig, ("<configuration>\n"
90+
+ " <appender name=\"FILE\" class=\"ch.qos.logback.core.FileAppender\">\n"
91+
+ " <file>" + logFile.toAbsolutePath() + "</file>\n"
92+
+ " <encoder><pattern>%-5level %logger - %msg%n%ex</pattern></encoder>\n"
93+
+ " </appender>\n"
94+
+ " <root level=\"INFO\"><appender-ref ref=\"FILE\"/></root>\n"
95+
+ "</configuration>\n").getBytes(StandardCharsets.UTF_8));
96+
97+
assertThatCode(() -> PreMain.premain(
98+
"logging-config=" + logbackConfig.toAbsolutePath() + ",includes=com.example.*", null))
99+
.doesNotThrowAnyException();
100+
101+
assertThat(logFile).exists();
102+
String log = new String(Files.readAllBytes(logFile), StandardCharsets.UTF_8);
103+
assertThat(log)
104+
.contains("ERROR")
105+
.contains("failed to start up")
106+
.contains("NullPointerException");
107+
}
108+
}

agent/src/test/java/com/teamscale/jacoco/agent/options/AgentOptionsTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,27 @@ private static AgentOptionsParser getAgentOptionsParserWithDummyLoggerAndCredent
489489
return new AgentOptionsParser(new CommandLineLogger(), null, null, credentials, null);
490490
}
491491

492+
/** Tests that config-id without teamscale-user reports the missing option by name. */
493+
@Test
494+
public void configIdWithMissingUserReportsMissingOption() {
495+
assertThatThrownBy(
496+
() -> parseAndMaybeThrow(
497+
"config-id=test,teamscale-server-url=http://localhost:8080,teamscale-access-token=keyfoo"))
498+
.isInstanceOf(AgentOptionParseException.class)
499+
.hasMessageContaining("teamscale-user");
500+
}
501+
502+
/** Tests that config-id with all required options missing reports all of them. */
503+
@Test
504+
public void configIdWithAllMissingOptionsReportsAll() {
505+
assertThatThrownBy(
506+
() -> parseAndMaybeThrow("config-id=test"))
507+
.isInstanceOf(AgentOptionParseException.class)
508+
.hasMessageContaining("teamscale-server-url")
509+
.hasMessageContaining("teamscale-user")
510+
.hasMessageContaining("teamscale-access-token");
511+
}
512+
492513
/**
493514
* Delete created coverage folders
494515
*/

0 commit comments

Comments
 (0)