6363 * (non-newest) rotated files, POST each one to Pyroscope as a JFR
6464 * binary, then delete.
6565 *
66- * On-demand: jfrDump(pid, jobId) asks async-profiler for a snapshot of the
67- * *same running session* (using {@code asprof dump -o jfr -f ...}), which
68- * produces a proper JFR file without interrupting continuous profiling.
66+ * On-demand: jfrDump(pid, jobId) flushes the JVM-native JFR ring (started
67+ * at attach-time with {@code JFR.start name=perf maxage=10m}) to a file via
68+ * {@code jattach jcmd JFR.dump}. Carries the last 10 minutes of typed JVM
69+ * events (GC, JIT, monitor contention, safepoints, deopts).
6970 * threadPrint(pid) is unchanged — still uses {@code jattach jcmd Thread.print}.
7071 */
7172@ Component
@@ -131,14 +132,34 @@ public void attachIfNeeded(TargetJvm jvm) {
131132 try {
132133 copyLibIntoTargetTmp (jvm .pid ());
133134 startAsprof (jvm .pid ());
135+ startJvmJfr (jvm .pid ());
134136 trackedJvms .put (jvm .pid (), jvm );
135- logger .info ("Attached async-profiler (cpu+wall, JFR/15s) to pid {} service={} version={} target={}" ,
137+ logger .info ("Attached async-profiler (cpu+wall, JFR/15s) + JVM-native JFR (maxage=10m) to pid {} service={} version={} target={}" ,
136138 jvm .pid (), jvm .serviceName (), jvm .version (), jvm .idLabel ());
137139 } catch (Exception e ) {
138140 logger .warn ("Failed attaching to pid {}: {}" , jvm .pid (), e .getMessage ());
139141 }
140142 }
141143
144+ /**
145+ * Start a JVM-native JFR recording with a 10-minute in-memory ring.
146+ * On-demand {@link #jfrDump(long, String)} flushes the ring on request.
147+ * Independent of async-profiler — the JVM's own JFR subsystem captures
148+ * the typed events (GC, JIT, monitor contention, safepoints, deopts)
149+ * the analyzer's JfrParser consumes.
150+ *
151+ * Idempotent in practice: if the {@code name=perf} recording is already
152+ * running (collector restart against a long-lived JVM), the JVM returns
153+ * an error and we log-and-continue.
154+ */
155+ private void startJvmJfr (long pid ) {
156+ try {
157+ jattachJcmd (pid , "JFR.start name=perf maxage=10m" );
158+ } catch (Exception e ) {
159+ logger .info ("JFR.start (already running?) pid={}: {}" , pid , e .getMessage ());
160+ }
161+ }
162+
142163 /** Copy libasyncProfiler.so from the node's hostPath into the target container's /tmp. */
143164 private void copyLibIntoTargetTmp (long pid ) throws IOException {
144165 var targetTmp = Path .of ("/proc" , Long .toString (pid ), "root" , "tmp" );
@@ -161,10 +182,6 @@ private void copyLibIntoTargetTmp(long pid) throws IOException {
161182 * proper metadata chunk — critical for Pyroscope's JFR parser.
162183 * - A stray previous-collector session gets cleared via {@code asprof stop}
163184 * first so {@code start} doesn't fail with "Profiler already started".
164- *
165- * The same async-profiler session also serves on-demand JFR dumps via
166- * {@link #jfrDump(long, String)} — no separate {@code jcmd JFR.start} is
167- * needed.
168185 */
169186 private void startAsprof (long pid ) throws IOException , InterruptedException {
170187 try {
@@ -299,39 +316,42 @@ private void detach(long pid) {
299316 }
300317
301318 /**
302- * On-demand: ask async-profiler to dump the live session to a new JFR file.
303- * The running session is not disturbed — dump only flushes current state.
304- * Pyroscope's parser requires proper metadata, which async-profiler's dump
305- * produces inside its own rotating file, so we dump to a dedicated path
306- * and immediately return it.
319+ * On-demand: flush the JVM's continuous JFR ring (started by
320+ * {@link #startJvmJfr(long)} at attach-time) to a file in the target
321+ * container's /tmp. Carries the last 10 minutes of typed JVM events.
322+ * Independent of the async-profiler session that streams to Pyroscope.
307323 */
308324 public Path jfrDump (long pid , String jobId ) throws IOException , InterruptedException {
309325 var fileInTarget = "/tmp/perf-ondemand-" + jobId + ".jfr" ;
310- run (props .asprofBinary (),
311- "dump" ,
312- "-o" , "jfr" ,
313- "-f" , fileInTarget ,
314- "--libpath" , "/tmp/libasyncProfiler.so" ,
315- Long .toString (pid ));
326+ jattachJcmd (pid , "JFR.dump name=perf filename=" + fileInTarget );
316327 return Path .of ("/proc" , Long .toString (pid ), "root" , "tmp" ,
317328 "perf-ondemand-" + jobId + ".jfr" );
318329 }
319330
320331 /** jcmd Thread.print -e; jattach writes stack output to stdout. Unchanged. */
321332 public String threadPrint (long pid ) throws IOException , InterruptedException {
333+ return jattachJcmd (pid , "Thread.print -e" );
334+ }
335+
336+ /**
337+ * Run a {@code jcmd} command on the target JVM through {@code jattach}
338+ * and return its stdout. Errors propagate as IOException with the
339+ * jcmd output included.
340+ */
341+ private String jattachJcmd (long pid , String jcmdCommand ) throws IOException , InterruptedException {
322342 var proc = new ProcessBuilder (
323- props .jattachBinary (), Long .toString (pid ), "jcmd" , "Thread.print -e" )
343+ props .jattachBinary (), Long .toString (pid ), "jcmd" , jcmdCommand )
324344 .redirectErrorStream (true )
325345 .start ();
326346 var out = new String (proc .getInputStream ().readAllBytes ());
327347 boolean ok = proc .waitFor (30 , TimeUnit .SECONDS );
328348 if (!ok ) {
329349 proc .destroyForcibly ();
330- throw new IOException ("jattach Thread.print timed out for pid " + pid );
350+ throw new IOException ("jattach jcmd '" + jcmdCommand + "' timed out for pid " + pid );
331351 }
332352 if (proc .exitValue () != 0 ) {
333353 throw new IOException (
334- "jattach Thread.print exit=%d output=%s" .formatted (proc .exitValue (), out ));
354+ "jattach jcmd '%s' exit=%d output=%s" .formatted (jcmdCommand , proc .exitValue (), out ));
335355 }
336356 return out ;
337357 }
0 commit comments