Skip to content

Commit 340d080

Browse files
author
Yuriy Bezsonov
committed
feat(perf-collector): add JVM-native JFR ring buffer for on-demand profiling
1 parent 95bdfe6 commit 340d080

1 file changed

Lines changed: 42 additions & 22 deletions

File tree

  • apps/perf-collector/src/main/java/com/example/perf/collector

apps/perf-collector/src/main/java/com/example/perf/collector/Profiler.java

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,10 @@
6363
* (non-newest) rotated files, POST each one to Pyroscope as a JFR
6464
* binary, then delete.
6565
*
66-
* On-demand: jfrDump(pid, jobId) asks async-profiler for a snapshot of the
67-
* *same running session* (using {@code asprof dump -o jfr -f ...}), which
68-
* produces a proper JFR file without interrupting continuous profiling.
66+
* On-demand: jfrDump(pid, jobId) flushes the JVM-native JFR ring (started
67+
* at attach-time with {@code JFR.start name=perf maxage=10m}) to a file via
68+
* {@code jattach jcmd JFR.dump}. Carries the last 10 minutes of typed JVM
69+
* events (GC, JIT, monitor contention, safepoints, deopts).
6970
* threadPrint(pid) is unchanged — still uses {@code jattach jcmd Thread.print}.
7071
*/
7172
@Component
@@ -131,14 +132,34 @@ public void attachIfNeeded(TargetJvm jvm) {
131132
try {
132133
copyLibIntoTargetTmp(jvm.pid());
133134
startAsprof(jvm.pid());
135+
startJvmJfr(jvm.pid());
134136
trackedJvms.put(jvm.pid(), jvm);
135-
logger.info("Attached async-profiler (cpu+wall, JFR/15s) to pid {} service={} version={} target={}",
137+
logger.info("Attached async-profiler (cpu+wall, JFR/15s) + JVM-native JFR (maxage=10m) to pid {} service={} version={} target={}",
136138
jvm.pid(), jvm.serviceName(), jvm.version(), jvm.idLabel());
137139
} catch (Exception e) {
138140
logger.warn("Failed attaching to pid {}: {}", jvm.pid(), e.getMessage());
139141
}
140142
}
141143

144+
/**
145+
* Start a JVM-native JFR recording with a 10-minute in-memory ring.
146+
* On-demand {@link #jfrDump(long, String)} flushes the ring on request.
147+
* Independent of async-profiler — the JVM's own JFR subsystem captures
148+
* the typed events (GC, JIT, monitor contention, safepoints, deopts)
149+
* the analyzer's JfrParser consumes.
150+
*
151+
* Idempotent in practice: if the {@code name=perf} recording is already
152+
* running (collector restart against a long-lived JVM), the JVM returns
153+
* an error and we log-and-continue.
154+
*/
155+
private void startJvmJfr(long pid) {
156+
try {
157+
jattachJcmd(pid, "JFR.start name=perf maxage=10m");
158+
} catch (Exception e) {
159+
logger.info("JFR.start (already running?) pid={}: {}", pid, e.getMessage());
160+
}
161+
}
162+
142163
/** Copy libasyncProfiler.so from the node's hostPath into the target container's /tmp. */
143164
private void copyLibIntoTargetTmp(long pid) throws IOException {
144165
var targetTmp = Path.of("/proc", Long.toString(pid), "root", "tmp");
@@ -161,10 +182,6 @@ private void copyLibIntoTargetTmp(long pid) throws IOException {
161182
* proper metadata chunk — critical for Pyroscope's JFR parser.
162183
* - A stray previous-collector session gets cleared via {@code asprof stop}
163184
* first so {@code start} doesn't fail with "Profiler already started".
164-
*
165-
* The same async-profiler session also serves on-demand JFR dumps via
166-
* {@link #jfrDump(long, String)} — no separate {@code jcmd JFR.start} is
167-
* needed.
168185
*/
169186
private void startAsprof(long pid) throws IOException, InterruptedException {
170187
try {
@@ -299,39 +316,42 @@ private void detach(long pid) {
299316
}
300317

301318
/**
302-
* On-demand: ask async-profiler to dump the live session to a new JFR file.
303-
* The running session is not disturbed — dump only flushes current state.
304-
* Pyroscope's parser requires proper metadata, which async-profiler's dump
305-
* produces inside its own rotating file, so we dump to a dedicated path
306-
* and immediately return it.
319+
* On-demand: flush the JVM's continuous JFR ring (started by
320+
* {@link #startJvmJfr(long)} at attach-time) to a file in the target
321+
* container's /tmp. Carries the last 10 minutes of typed JVM events.
322+
* Independent of the async-profiler session that streams to Pyroscope.
307323
*/
308324
public Path jfrDump(long pid, String jobId) throws IOException, InterruptedException {
309325
var fileInTarget = "/tmp/perf-ondemand-" + jobId + ".jfr";
310-
run(props.asprofBinary(),
311-
"dump",
312-
"-o", "jfr",
313-
"-f", fileInTarget,
314-
"--libpath", "/tmp/libasyncProfiler.so",
315-
Long.toString(pid));
326+
jattachJcmd(pid, "JFR.dump name=perf filename=" + fileInTarget);
316327
return Path.of("/proc", Long.toString(pid), "root", "tmp",
317328
"perf-ondemand-" + jobId + ".jfr");
318329
}
319330

320331
/** jcmd Thread.print -e; jattach writes stack output to stdout. Unchanged. */
321332
public String threadPrint(long pid) throws IOException, InterruptedException {
333+
return jattachJcmd(pid, "Thread.print -e");
334+
}
335+
336+
/**
337+
* Run a {@code jcmd} command on the target JVM through {@code jattach}
338+
* and return its stdout. Errors propagate as IOException with the
339+
* jcmd output included.
340+
*/
341+
private String jattachJcmd(long pid, String jcmdCommand) throws IOException, InterruptedException {
322342
var proc = new ProcessBuilder(
323-
props.jattachBinary(), Long.toString(pid), "jcmd", "Thread.print -e")
343+
props.jattachBinary(), Long.toString(pid), "jcmd", jcmdCommand)
324344
.redirectErrorStream(true)
325345
.start();
326346
var out = new String(proc.getInputStream().readAllBytes());
327347
boolean ok = proc.waitFor(30, TimeUnit.SECONDS);
328348
if (!ok) {
329349
proc.destroyForcibly();
330-
throw new IOException("jattach Thread.print timed out for pid " + pid);
350+
throw new IOException("jattach jcmd '" + jcmdCommand + "' timed out for pid " + pid);
331351
}
332352
if (proc.exitValue() != 0) {
333353
throw new IOException(
334-
"jattach Thread.print exit=%d output=%s".formatted(proc.exitValue(), out));
354+
"jattach jcmd '%s' exit=%d output=%s".formatted(jcmdCommand, proc.exitValue(), out));
335355
}
336356
return out;
337357
}

0 commit comments

Comments
 (0)