fix(walltime): emit benchmark markers inside the sample window

not-matthias · not-matthias · commit 79e868631bb8 · 2026-06-11T12:02:26.000+02:00
The runner consumes the instrument-hooks FIFO stream in order and expects
SampleStart &gt; BenchmarkStart &gt; BenchmarkEnd &gt; SampleEnd nesting per
benchmark. Both walltime paths were emitting the BenchmarkStart/End
markers after stopBenchmark() had already written SampleEnd, leaving the
markers outside the sample window so the parser could not bracket the
perf samples (flame graph generation failed).

- vitest: emit the marker pair before stopBenchmark() in the finally block
- tinybench: emit markers per task between start/stop instead of a single
  run-level pair after every sample window had closed; wrap the body in
  try/finally so start/stop stay balanced when a benchmark throws

Add a regression test asserting both markers land between startBenchmark
and stopBenchmark in walltime mode.
diff --git a/packages/tinybench-plugin/src/shared.ts b/packages/tinybench-plugin/src/shared.ts
@@ -47,21 +47,31 @@ export abstract class BaseBenchRunner {
 
   protected wrapWithInstrumentHooks<T>(fn: () => T, uri: string): T {
     InstrumentHooks.startBenchmark();
-    const result = fn();
-    InstrumentHooks.stopBenchmark();
-    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-    return result;
+    const runStart = InstrumentHooks.currentTimestamp();
+    try {
+      return fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      this.sendBenchmarkMarkers(runStart, runEnd);
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+    }
   }
 
   protected async wrapWithInstrumentHooksAsync(
     fn: Fn,
     uri: string,
   ): Promise<unknown> {
     InstrumentHooks.startBenchmark();
-    const result = await fn();
-    InstrumentHooks.stopBenchmark();
-    InstrumentHooks.setExecutedBenchmark(process.pid, uri);
-    return result;
+    const runStart = InstrumentHooks.currentTimestamp();
+    try {
+      return await fn();
+    } finally {
+      const runEnd = InstrumentHooks.currentTimestamp();
+      this.sendBenchmarkMarkers(runStart, runEnd);
+      InstrumentHooks.stopBenchmark();
+      InstrumentHooks.setExecutedBenchmark(process.pid, uri);
+    }
   }
 
   protected abstract getModeName(): string;
@@ -70,7 +80,12 @@ export abstract class BaseBenchRunner {
   protected abstract finalizeAsyncRun(): Task[];
   protected abstract finalizeSyncRun(): Task[];
 
-  private sendRunMarkers(runStart: bigint, runEnd: bigint): void {
+  // Benchmark markers bracket a single benchmark and must sit inside the sample
+  // window opened by startBenchmark(), so they are emitted per task before
+  // stopBenchmark() closes it. The runner consumes the FIFO stream in order:
+  // a marker sent after StopBenchmark falls outside the sample and breaks the
+  // expected SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
+  private sendBenchmarkMarkers(runStart: bigint, runEnd: bigint): void {
     if (getInstrumentMode() !== "walltime") {
       return;
     }
@@ -86,27 +101,21 @@ export abstract class BaseBenchRunner {
     this.bench.run = async () => {
       this.setupBenchRun();
 
-      const runStart = InstrumentHooks.currentTimestamp();
       for (const task of this.bench.tasks) {
         const uri = this.getTaskUri(task);
         await this.runTaskAsync(task, uri);
       }
-      const runEnd = InstrumentHooks.currentTimestamp();
-      this.sendRunMarkers(runStart, runEnd);
 
       return this.finalizeAsyncRun();
     };
 
     this.bench.runSync = () => {
       this.setupBenchRun();
 
-      const runStart = InstrumentHooks.currentTimestamp();
       for (const task of this.bench.tasks) {
         const uri = this.getTaskUri(task);
         this.runTaskSync(task, uri);
       }
-      const runEnd = InstrumentHooks.currentTimestamp();
-      this.sendRunMarkers(runStart, runEnd);
 
       return this.finalizeSyncRun();
     };
diff --git a/packages/tinybench-plugin/tests/index.integ.test.ts b/packages/tinybench-plugin/tests/index.integ.test.ts
@@ -26,6 +26,7 @@ const mockCore = vi.hoisted(() => {
       }),
     setupCore: vi.fn(),
     teardownCore: vi.fn(),
+    writeWalltimeResults: vi.fn(),
   };
 });
 
@@ -207,6 +208,39 @@ describe("Benchmark.Suite", () => {
     expect(afterAll).toHaveBeenCalledTimes(2);
   });
 
+  it("emits benchmark markers inside the sample window in walltime mode", async () => {
+    process.env.CODSPEED_RUNNER_MODE = "walltime";
+    mockCore.InstrumentHooks.isInstrumented.mockReturnValue(true);
+
+    let ts = 0n;
+    mockCore.InstrumentHooks.currentTimestamp.mockImplementation(() => ts++);
+
+    await withCodSpeed(
+      new Bench({ time: 0, iterations: 1, warmup: false }),
+    )
+      .add("RegExp", () => {
+        /o/.test("Hello World!");
+      })
+      .run();
+
+    const { startBenchmark, stopBenchmark, addMarker } =
+      mockCore.InstrumentHooks;
+
+    const startOrder = startBenchmark.mock.invocationCallOrder[0];
+    const stopOrder = stopBenchmark.mock.invocationCallOrder[0];
+    const markerOrders = addMarker.mock.invocationCallOrder;
+
+    // A BenchmarkStart/BenchmarkEnd pair must be emitted per benchmark...
+    expect(markerOrders).toHaveLength(2);
+    // ...and both must land between startBenchmark (SampleStart) and
+    // stopBenchmark (SampleEnd), otherwise the runner cannot bracket the
+    // perf samples and flame graph generation fails.
+    for (const order of markerOrders) {
+      expect(order).toBeGreaterThan(startOrder);
+      expect(order).toBeLessThan(stopOrder);
+    }
+  });
+
   it("should call setupCore and teardownCore only once", async () => {
     mockCore.InstrumentHooks.isInstrumented.mockReturnValue(true);
     const bench = withCodSpeed(new Bench())
diff --git a/packages/vitest-plugin/src/walltime/index.ts b/packages/vitest-plugin/src/walltime/index.ts
@@ -91,13 +91,18 @@ export class WalltimeRunner extends NodeBenchmarkRunner {
         await originalRun.call(this);
       } finally {
         const runEnd = InstrumentHooks.currentTimestamp();
-        InstrumentHooks.stopBenchmark();
         task.fn = originalFn;
 
-        // Emit a single marker pair covering the whole measurement run
+        // Benchmark markers must land inside the sample window opened by
+        // startBenchmark(), so they have to be emitted before stopBenchmark()
+        // closes it. The runner consumes the FIFO stream in order, so a marker
+        // sent after StopBenchmark falls outside the sample and breaks the
+        // expected SampleStart > BenchmarkStart > BenchmarkEnd > SampleEnd nesting.
         InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_START, runStart);
         InstrumentHooks.addMarker(pid, MARKER_TYPE_BENCHMARK_END, runEnd);
 
+        InstrumentHooks.stopBenchmark();
+
         // Look up the URI by task name
         const uri = `${suiteUri}::${this.name}`;
         InstrumentHooks.setExecutedBenchmark(pid, uri);