feat(vitest,tinybench): emit benchmark markers inside the sample window

not-matthias · not-matthias · commit 5f5f1b100400 · 2026-06-11T15:31:10.000+02:00
Emit benchmark start/end markers for the tinybench plugin and the vitest
walltime runner, wrapping the measured function in a root frame.

The runner consumes the instrument-hooks FIFO stream in order and expects
SampleStart &gt; BenchmarkStart &gt; BenchmarkEnd &gt; SampleEnd nesting per
benchmark, so the markers must land inside the sample window:

- vitest: emit the marker pair before stopBenchmark(), and move
  stopBenchmark() plus the markers into a finally block so a throwing
  benchmark cannot leave the profiler started-but-never-stopped
- tinybench: emit markers per task between start/stop instead of a single
  run-level pair; wrap the body in try/finally to keep start/stop balanced
  when a benchmark throws
- benchmark.js: bind wrapWithRootFrame/wrapWithRootFrameSync to the real
  implementations in the integ test's core mock

Add a regression test asserting both markers land between startBenchmark
and stopBenchmark in walltime mode.
diff --git a/packages/benchmark.js-plugin/src/index.ts b/packages/benchmark.js-plugin/src/index.ts
@@ -9,6 +9,8 @@ import {
   SetupInstrumentsResponse,
   teardownCore,
   tryIntrospect,
+  wrapWithRootFrame,
+  wrapWithRootFrameSync,
 } from "@codspeed/core";
 import Benchmark from "benchmark";
 import buildSuiteAdd from "./buildSuiteAdd";
@@ -195,7 +197,7 @@ async function runBenchmarks({
       await optimizeFunction(benchPayload);
       await mongoMeasurement.start(uri);
       global.gc?.();
-      await (async function __codspeed_root_frame__() {
+      await wrapWithRootFrame(async () => {
         InstrumentHooks.startBenchmark();
         await benchPayload();
         InstrumentHooks.stopBenchmark();
@@ -205,7 +207,7 @@ async function runBenchmarks({
     } else {
       optimizeFunctionSync(benchPayload);
       await mongoMeasurement.start(uri);
-      (function __codspeed_root_frame__() {
+      wrapWithRootFrameSync(() => {
         InstrumentHooks.startBenchmark();
         benchPayload();
         InstrumentHooks.stopBenchmark();
diff --git a/packages/benchmark.js-plugin/tests/index.integ.test.ts b/packages/benchmark.js-plugin/tests/index.integ.test.ts
@@ -11,6 +11,8 @@ jest.mock("@codspeed/core", () => {
   const actual = jest.requireActual("@codspeed/core");
   mockCore.getGitDir = actual.getGitDir;
   mockCore.getCallingFile = actual.getCallingFile;
+  mockCore.wrapWithRootFrame = actual.wrapWithRootFrame;
+  mockCore.wrapWithRootFrameSync = actual.wrapWithRootFrameSync;
   return mockCore;
 });
 
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -77,6 +77,7 @@ export type {
 } from "./generated/openapi";
 export { getV8Flags, tryIntrospect } from "./introspection";
 export { optimizeFunction, optimizeFunctionSync } from "./optimization";
+export { wrapWithRootFrame, wrapWithRootFrameSync } from "./rootFrame";
 export * from "./utils";
 export * from "./walltime";
 export type { InstrumentMode };
diff --git a/packages/core/src/rootFrame.ts b/packages/core/src/rootFrame.ts
@@ -0,0 +1,19 @@
+/**
+ * Wrap a benchmark function so it executes under a frame named
+ * `__codspeed_root_frame__`. CodSpeed uses this frame to locate the
+ * benchmark root in collected call stacks; samples without it cannot be
+ * attributed to a benchmark.
+ */
+export function wrapWithRootFrame<T>(
+  fn: () => T | Promise<T>,
+): () => Promise<T> {
+  return async function __codspeed_root_frame__() {
+    return await fn();
+  };
+}
+
+export function wrapWithRootFrameSync<T>(fn: () => T): () => T {
+  return function __codspeed_root_frame__() {
+    return fn();
+  };
+}
diff --git a/packages/tinybench-plugin/src/analysis.ts b/packages/tinybench-plugin/src/analysis.ts
@@ -3,6 +3,8 @@ import {
   InstrumentHooks,
   mongoMeasurement,
   optimizeFunction,
+  wrapWithRootFrame,
+  wrapWithRootFrameSync,
 } from "@codspeed/core";
 import { Bench, Fn, FnOptions, Task } from "tinybench";
 import { BaseBenchRunner } from "./shared";
@@ -25,18 +27,6 @@ class AnalysisBenchRunner extends BaseBenchRunner {
     return InstrumentHooks.isInstrumented() ? "Measured" : "Checked";
   }
 
-  private wrapFunctionWithFrame(fn: Fn, isAsync: boolean): Fn {
-    if (isAsync) {
-      return async function __codspeed_root_frame__() {
-        await fn();
-      };
-    } else {
-      return function __codspeed_root_frame__() {
-        fn();
-      };
-    }
-  }
-
   protected async runTaskAsync(task: Task, uri: string): Promise<void> {
     const { fnOpts, fn } = task as unknown as { fnOpts?: FnOptions; fn: Fn };
 
@@ -50,10 +40,7 @@ class AnalysisBenchRunner extends BaseBenchRunner {
     await mongoMeasurement.start(uri);
 
     global.gc?.();
-    await this.wrapWithInstrumentHooksAsync(
-      this.wrapFunctionWithFrame(fn, true),
-      uri,
-    );
+    await this.wrapWithInstrumentHooksAsync(wrapWithRootFrame(fn), uri);
 
     await mongoMeasurement.stop(uri);
     await fnOpts?.afterEach?.call(task, "run");
@@ -68,7 +55,7 @@ class AnalysisBenchRunner extends BaseBenchRunner {
     fnOpts?.beforeAll?.call(task, "run");
     fnOpts?.beforeEach?.call(task, "run");
 
-    this.wrapWithInstrumentHooks(this.wrapFunctionWithFrame(fn, false), uri);
+    this.wrapWithInstrumentHooks(wrapWithRootFrameSync(fn), uri);
 
     fnOpts?.afterEach?.call(task, "run");
     fnOpts?.afterAll?.call(task, "run");
diff --git a/packages/tinybench-plugin/src/shared.ts b/packages/tinybench-plugin/src/shared.ts
@@ -1,4 +1,11 @@
-import { InstrumentHooks, setupCore, teardownCore } from "@codspeed/core";
+import {
+  getInstrumentMode,
+  InstrumentHooks,
+  MARKER_TYPE_BENCHMARK_END,
+  MARKER_TYPE_BENCHMARK_START,
+  setupCore,
+  teardownCore,
+} from "@codspeed/core";
 import { Bench, Fn, Task } from "tinybench";
 import { getTaskUri } from "./uri";
 
@@ -40,21 +47,31 @@ export abstract class BaseBenchRunner {
 
   protected wrapWithInstrumentHooks<T>(fn: () => T, uri: string): T {
     InstrumentHooks.startBenchmark();
-    const result = fn();
-    InstrumentHooks.stopBenchmark();
-    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-    return result;
+    const runStart = InstrumentHooks.currentTimestamp();
+    try {
+      return fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      this.sendBenchmarkMarkers(runStart, runEnd);
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+    }
   }
 
   protected async wrapWithInstrumentHooksAsync(
     fn: Fn,
     uri: string,
   ): Promise<unknown> {
     InstrumentHooks.startBenchmark();
-    const result = await fn();
-    InstrumentHooks.stopBenchmark();
-    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-    return result;
+    const runStart = InstrumentHooks.currentTimestamp();
+    try {
+      return await fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      this.sendBenchmarkMarkers(runStart, runEnd);
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+    }
   }
 
   protected abstract getModeName(): string;
@@ -63,6 +80,23 @@ export abstract class BaseBenchRunner {
   protected abstract finalizeAsyncRun(): Task[];
   protected abstract finalizeSyncRun(): Task[];
 
+  // Benchmark markers bracket a single benchmark and must sit inside the sample
+  // window opened by startBenchmark(), so they are emitted per task before
+  // stopBenchmark() closes it. The runner consumes the FIFO stream in order:
+  // a marker sent after StopBenchmark falls outside the sample and breaks the
+  // expected SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
+  private sendBenchmarkMarkers(runStart: bigint, runEnd: bigint): void {
+    if (getInstrumentMode() !== "walltime") {
+      return;
+    }
+    InstrumentHooks.addMarker(
+      process.pid,
+      MARKER_TYPE_BENCHMARK_START,
+      runStart,
+    );
+    InstrumentHooks.addMarker(process.pid, MARKER_TYPE_BENCHMARK_END, runEnd);
+  }
+
   public setupBenchMethods(): void {
     this.bench.run = async () => {
       this.setupBenchRun();
diff --git a/packages/tinybench-plugin/src/walltime.ts b/packages/tinybench-plugin/src/walltime.ts
@@ -3,6 +3,8 @@ import {
   mongoMeasurement,
   msToNs,
   msToS,
+  wrapWithRootFrame,
+  wrapWithRootFrameSync,
   writeWalltimeResults,
   type BenchmarkStats,
   type Benchmark as CodspeedBenchmark,
@@ -64,21 +66,10 @@ class WalltimeBenchRunner extends BaseBenchRunner {
 
   private wrapTaskFunction(task: Task, isAsync: boolean): void {
     const { fn } = task as unknown as { fn: Fn };
-    if (isAsync) {
-      // eslint-disable-next-line no-inner-declarations
-      async function __codspeed_root_frame__() {
-        await fn();
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (task as any).fn = __codspeed_root_frame__;
-    } else {
-      // eslint-disable-next-line no-inner-declarations
-      function __codspeed_root_frame__() {
-        fn();
-      }
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (task as any).fn = __codspeed_root_frame__;
-    }
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (task as any).fn = isAsync
+      ? wrapWithRootFrame(fn)
+      : wrapWithRootFrameSync(fn);
   }
 
   private registerCodspeedBenchmarkFromTask(task: Task): void {
diff --git a/packages/tinybench-plugin/tests/index.integ.test.ts b/packages/tinybench-plugin/tests/index.integ.test.ts
@@ -16,6 +16,8 @@ const mockCore = vi.hoisted(() => {
       startBenchmark: vi.fn(),
       stopBenchmark: vi.fn(),
       setExecutedBenchmark: vi.fn(),
+      currentTimestamp: vi.fn().mockReturnValue(0n),
+      addMarker: vi.fn(),
     },
     optimizeFunction: vi
       .fn()
@@ -24,6 +26,7 @@ const mockCore = vi.hoisted(() => {
       }),
     setupCore: vi.fn(),
     teardownCore: vi.fn(),
+    writeWalltimeResults: vi.fn(),
   };
 });
 
@@ -205,6 +208,39 @@ describe("Benchmark.Suite", () => {
     expect(afterAll).toHaveBeenCalledTimes(2);
   });
 
+  it("emits benchmark markers inside the sample window in walltime mode", async () => {
+    process.env.CODSPEED_RUNNER_MODE = "walltime";
+    mockCore.InstrumentHooks.isInstrumented.mockReturnValue(true);
+
+    let ts = 0n;
+    mockCore.InstrumentHooks.currentTimestamp.mockImplementation(() => ts++);
+
+    await withCodSpeed(
+      new Bench({ time: 0, iterations: 1, warmup: false }),
+    )
+      .add("RegExp", () => {
+        /o/.test("Hello World!");
+      })
+      .run();
+
+    const { startBenchmark, stopBenchmark, addMarker } =
+      mockCore.InstrumentHooks;
+
+    const startOrder = startBenchmark.mock.invocationCallOrder[0];
+    const stopOrder = stopBenchmark.mock.invocationCallOrder[0];
+    const markerOrders = addMarker.mock.invocationCallOrder;
+
+    // A BenchmarkStart/BenchmarkEnd pair must be emitted per benchmark...
+    expect(markerOrders).toHaveLength(2);
+    // ...and both must land between startBenchmark (SampleStart) and
+    // stopBenchmark (SampleEnd), otherwise the runner cannot bracket the
+    // perf samples and flame graph generation fails.
+    for (const order of markerOrders) {
+      expect(order).toBeGreaterThan(startOrder);
+      expect(order).toBeLessThan(stopOrder);
+    }
+  });
+
   it("should call setupCore and teardownCore only once", async () => {
     mockCore.InstrumentHooks.isInstrumented.mockReturnValue(true);
     const bench = withCodSpeed(new Bench())
diff --git a/packages/vitest-plugin/src/analysis.ts b/packages/vitest-plugin/src/analysis.ts
@@ -5,6 +5,7 @@ import {
   optimizeFunction,
   setupCore,
   teardownCore,
+  wrapWithRootFrame,
 } from "@codspeed/core";
 import { Benchmark, type RunnerTestSuite } from "vitest";
 import { NodeBenchmarkRunner } from "vitest/runners";
@@ -47,7 +48,7 @@ async function runAnalysisBench(
   await callSuiteHook(suite, benchmark, "beforeEach");
   await mongoMeasurement.start(uri);
   global.gc?.();
-  await (async function __codspeed_root_frame__() {
+  await wrapWithRootFrame(async () => {
     InstrumentHooks.startBenchmark();
     // @ts-expect-error we do not need to bind the function to an instance of tinybench's Bench
     await fn();
diff --git a/packages/vitest-plugin/src/walltime/index.ts b/packages/vitest-plugin/src/walltime/index.ts
@@ -1,9 +1,11 @@
 import {
   InstrumentHooks,
+  MARKER_TYPE_BENCHMARK_END,
+  MARKER_TYPE_BENCHMARK_START,
   setupCore,
+  wrapWithRootFrame,
   writeWalltimeResults,
 } from "@codspeed/core";
-import { Fn } from "tinybench";
 import {
   RunnerTaskEventPack,
   RunnerTaskResultPack,
@@ -66,6 +68,7 @@ export class WalltimeRunner extends NodeBenchmarkRunner {
     this.isTinybenchHookedWithCodspeed = true;
 
     const originalRun = tinybench.Task.prototype.run;
+    const pid = process.pid;
 
     const getSuiteUri = (): string => {
       if (this.currentSuiteId === null) {
@@ -75,21 +78,35 @@ export class WalltimeRunner extends NodeBenchmarkRunner {
     };
 
     tinybench.Task.prototype.run = async function () {
-      const { fn } = this as { fn: Fn };
       const suiteUri = getSuiteUri();
 
-      function __codspeed_root_frame__() {
-        return fn();
-      }
-      (this as { fn: Fn }).fn = __codspeed_root_frame__;
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const task = this as any;
+      const originalFn = task.fn;
+      task.fn = wrapWithRootFrame(() => originalFn.call(task));
 
       InstrumentHooks.startBenchmark();
-      await originalRun.call(this);
-      InstrumentHooks.stopBenchmark();
-
-      // Look up the URI by task name
-      const uri = `${suiteUri}::${this.name}`;
-      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+      const runStart = InstrumentHooks.currentTimestamp();
+      try {
+        await originalRun.call(this);
+      } finally {
+        const runEnd = InstrumentHooks.currentTimestamp();
+        task.fn = originalFn;
+
+        // Benchmark markers must land inside the sample window opened by
+        // startBenchmark(), so they have to be emitted before stopBenchmark()
+        // closes it. The runner consumes the FIFO stream in order, so a marker
+        // sent after StopBenchmark falls outside the sample and breaks the
+        // expected SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
+        InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_START, runStart);
+        InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_END, runEnd);
+
+        InstrumentHooks.stopBenchmark();
+
+        // Look up the URI by task name
+        const uri = `${suiteUri}::${this.name}`;
+        InstrumentHooks.setExecutedBenchmark(pid, uri);
+      }
 
       return this;
     };