From 85859606277183d322f1330a4450f7213d61fcaf Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Tue, 5 May 2026 19:39:28 +0530
Subject: [PATCH 1/7] feat: add benchmark framework for collection mount
 performance

- Playwright benchmark tests measuring collection mount time across bru/yml formats and sizes (50-5000 requests)
- IPC listener approach for precise mount-complete signal
- Generic benchmark utils: stats, results I/O, baseline comparison, PR commenting
- Collection generator using @usebruno/filestore serializers
- CI workflow running on ubuntu, macos, and windows with PR comment reporting
- Regression detection against committed baselines with configurable threshold
---
 .../tests/run-benchmark-tests/action.yml      |  25 ++++
 .github/workflows/benchmarks.yml              |  73 ++++++++++
 .gitignore                                    |   4 +
 package.json                                  |   1 +
 playwright.benchmark.config.ts                |  38 ++++++
 playwright.config.ts                          |   3 +-
 tests/benchmarks/mounting/baseline.json       |  46 +++++++
 .../mounting/collection-mount.bench.ts        | 115 ++++++++++++++++
 .../benchmarks/utils/collection-generator.ts  |  67 +++++++++
 tests/benchmarks/utils/compare.js             | 129 ++++++++++++++++++
 tests/benchmarks/utils/pr-comment.js          |  82 +++++++++++
 tests/benchmarks/utils/results.ts             |  81 +++++++++++
 tests/benchmarks/utils/stats.ts               | 111 +++++++++++++++
 13 files changed, 774 insertions(+), 1 deletion(-)
 create mode 100644 .github/actions/tests/run-benchmark-tests/action.yml
 create mode 100644 .github/workflows/benchmarks.yml
 create mode 100644 playwright.benchmark.config.ts
 create mode 100644 tests/benchmarks/mounting/baseline.json
 create mode 100644 tests/benchmarks/mounting/collection-mount.bench.ts
 create mode 100644 tests/benchmarks/utils/collection-generator.ts
 create mode 100644 tests/benchmarks/utils/compare.js
 create mode 100644 tests/benchmarks/utils/pr-comment.js
 create mode 100644 tests/benchmarks/utils/results.ts
 create mode 100644 tests/benchmarks/utils/stats.ts

diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml
new file mode 100644
index 00000000000..f089fbd9ae0
--- /dev/null
+++ b/.github/actions/tests/run-benchmark-tests/action.yml
@@ -0,0 +1,25 @@
+name: 'Run Benchmark Tests'
+description: 'Run Playwright benchmark tests and compare against baseline'
+inputs:
+  os:
+    description: 'Operating system (ubuntu, macos, windows)'
+    default: 'ubuntu'
+runs:
+  using: 'composite'
+  steps:
+    - name: Run Benchmark Tests (Ubuntu)
+      if: inputs.os == 'ubuntu'
+      shell: bash
+      run: xvfb-run npm run test:benchmark
+
+    - name: Run Benchmark Tests
+      if: inputs.os != 'ubuntu'
+      shell: bash
+      run: npm run test:benchmark
+
+    - name: Compare Against Baseline
+      shell: bash
+      run: >-
+        node tests/benchmarks/utils/compare.js
+        --results tests/benchmarks/results/mounting.json
+        --baseline tests/benchmarks/mounting/baseline.json
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 00000000000..521484f109d
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,73 @@
+name: Benchmarks
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [main, 'release/v*']
+
+jobs:
+  benchmark:
+    name: Performance Benchmarks (${{ matrix.os }})
+    timeout-minutes: 60
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-24.04, macos-latest, windows-latest]
+        include:
+          - os: ubuntu-24.04
+            os-name: ubuntu
+          - os: macos-latest
+            os-name: macos
+          - os: windows-latest
+            os-name: windows
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install System Dependencies (Ubuntu)
+        if: matrix.os-name == 'ubuntu'
+        run: |
+          sudo apt-get update
+          sudo apt-get --no-install-recommends install -y \
+            libglib2.0-0 libnss3 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgtk-3-0 libasound2t64 \
+            xvfb
+
+      - name: Setup Node Dependencies
+        uses: ./.github/actions/common/setup-node-deps
+
+      - name: Configure Chrome Sandbox
+        if: matrix.os-name == 'ubuntu'
+        run: |
+          sudo chown root node_modules/electron/dist/chrome-sandbox
+          sudo chmod 4755 node_modules/electron/dist/chrome-sandbox
+
+      - name: Run Benchmark Tests
+        uses: ./.github/actions/tests/run-benchmark-tests
+        with:
+          os: ${{ matrix.os-name }}
+
+      - name: Upload Benchmark Results
+        uses: actions/upload-artifact@v6
+        if: ${{ !cancelled() }}
+        with:
+          name: benchmark-results-${{ matrix.os-name }}
+          path: |
+            tests/benchmarks/results/
+            benchmark-report/
+          retention-days: 30
+
+      - name: Comment Benchmark Results on PR
+        if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const run = require('./tests/benchmarks/utils/pr-comment.js');
+            await run({
+              github,
+              context,
+              resultsPath: 'tests/benchmarks/results/mounting.json',
+              baselinePath: 'tests/benchmarks/mounting/baseline.json',
+              title: 'Benchmark Results — Collection Mount'
+            });
diff --git a/.gitignore b/.gitignore
index 8dfcfb9e86e..acfe558c205 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,6 +58,10 @@ skills-lock.json
 # Playwright
 /blob-report/
 
+# Benchmark results (generated at runtime)
+tests/benchmarks/results/
+/benchmark-report/
+
 # Development plan files
 CLAUDE.md
 AGENTS.md
diff --git a/package.json b/package.json
index e4c3c737319..fd9089b9765 100644
--- a/package.json
+++ b/package.json
@@ -83,6 +83,7 @@
     "test:e2e": "playwright test --project=default",
     "test:e2e:ssl": "playwright test --project=ssl",
     "test:e2e:auth": "playwright test --project=auth",
+    "test:benchmark": "playwright test --config=playwright.benchmark.config.ts",
     "lint": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint",
     "lint:fix": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint --fix",
     "prepare": "husky"
diff --git a/playwright.benchmark.config.ts b/playwright.benchmark.config.ts
new file mode 100644
index 00000000000..69ecd66f44c
--- /dev/null
+++ b/playwright.benchmark.config.ts
@@ -0,0 +1,38 @@
+import { defineConfig } from '@playwright/test';
+
+export default defineConfig({
+  fullyParallel: false,
+  forbidOnly: !!process.env.CI,
+  retries: 0,
+  workers: 1,
+  reporter: [
+    ['list'],
+    ['json', { outputFile: 'benchmark-report/results.json' }]
+  ],
+
+  use: {
+    trace: 'off'
+  },
+
+  projects: [
+    {
+      name: 'benchmarks',
+      testDir: './tests/benchmarks',
+      testMatch: '**/*.bench.ts'
+    }
+  ],
+
+  webServer: [
+    {
+      command: 'npm run dev:web',
+      url: 'http://localhost:3000',
+      reuseExistingServer: !process.env.CI,
+      timeout: 10 * 60 * 1000
+    }
+  ],
+
+  timeout: 10 * 60 * 1000,
+  expect: {
+    timeout: 120_000
+  }
+});
diff --git a/playwright.config.ts b/playwright.config.ts
index f1ec9b2e2a7..eb758f08d5f 100644
--- a/playwright.config.ts
+++ b/playwright.config.ts
@@ -23,7 +23,8 @@ export default defineConfig({
       testDir: './tests',
       testIgnore: [
         'ssl/**', // custom CA certificate tests require separate server setup and certificate generation
-        'auth/**' // auth tests have their own project
+        'auth/**', // auth tests have their own project
+        'benchmarks/**'  
       ]
     },
     {
diff --git a/tests/benchmarks/mounting/baseline.json b/tests/benchmarks/mounting/baseline.json
new file mode 100644
index 00000000000..092998a23ce
--- /dev/null
+++ b/tests/benchmarks/mounting/baseline.json
@@ -0,0 +1,46 @@
+{
+  "description": "Benchmark baselines for collection mount times. Update by running: node tests/benchmarks/mounting/compare.js --update-baseline",
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 2000,
+      "p50": 1800
+    },
+    "bru-200": {
+      "mean": 5000,
+      "p50": 4500
+    },
+    "bru-500": {
+      "mean": 12000,
+      "p50": 11000
+    },
+    "bru-1000": {
+      "mean": 25000,
+      "p50": 24000
+    },
+    "bru-5000": {
+      "mean": 120000,
+      "p50": 115000
+    },
+    "yml-50": {
+      "mean": 2000,
+      "p50": 1800
+    },
+    "yml-200": {
+      "mean": 5000,
+      "p50": 4500
+    },
+    "yml-500": {
+      "mean": 12000,
+      "p50": 11000
+    },
+    "yml-1000": {
+      "mean": 25000,
+      "p50": 24000
+    },
+    "yml-5000": {
+      "mean": 120000,
+      "p50": 115000
+    }
+  }
+}
diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts
new file mode 100644
index 00000000000..6767b0a222d
--- /dev/null
+++ b/tests/benchmarks/mounting/collection-mount.bench.ts
@@ -0,0 +1,115 @@
+import { test } from '../../../playwright';
+import { type ElectronApplication, type Page } from '@playwright/test';
+import { openCollection, closeAllCollections } from '../../utils/page';
+import { summarize } from '../utils/stats';
+import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results';
+import { generateCollection, type CollectionFormat } from '../utils/collection-generator';
+import * as path from 'path';
+import * as fs from 'fs';
+
+const COLLECTION_SIZES = [50, 200, 500, 1000, 5000];
+const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml'];
+const ITERATIONS_PER_SIZE = 3;
+
+async function measureCollectionMount(
+  page: Page,
+  electronApp: ElectronApplication,
+  collectionDir: string,
+  collectionName: string
+): Promise<number> {
+  await electronApp.evaluate(
+    ({ dialog }, { dir }) => {
+      (dialog as any).__originalShowOpenDialog ??= dialog.showOpenDialog;
+      dialog.showOpenDialog = async () => ({ canceled: false, filePaths: [dir] });
+    },
+    { dir: collectionDir }
+  );
+
+  await page.evaluate(() => {
+    (window as any).__benchMountDone = new Promise<void>((resolve) => {
+      const off = (window as any).ipcRenderer.on('main:collection-loading-state-updated', (val: any) => {
+        if (!val.isLoading) {
+          off(); resolve();
+        }
+      });
+    });
+  });
+
+  const start = performance.now();
+
+  await page.getByTestId('collections-header-add-menu').click();
+  await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click();
+  await page.locator('#sidebar-collection-name').filter({ hasText: collectionName }).waitFor({ state: 'visible' });
+
+  await openCollection(page, collectionName);
+  await page.evaluate(() => (window as any).__benchMountDone);
+
+  const elapsed = performance.now() - start;
+
+  await electronApp.evaluate(({ dialog }) => {
+    if ((dialog as any).__originalShowOpenDialog) {
+      dialog.showOpenDialog = (dialog as any).__originalShowOpenDialog;
+    }
+  });
+
+  await closeAllCollections(page);
+
+  return elapsed;
+}
+
+function resultKey(format: CollectionFormat, size: number): string {
+  return `${format}-${size}`;
+}
+
+test.describe('Benchmark: Collection Mount', () => {
+  const results: Record<string, number[]> = {};
+
+  for (const format of COLLECTION_FORMATS) {
+    test.describe(`format: ${format}`, () => {
+      for (const size of COLLECTION_SIZES) {
+        test(`mount ${format} collection with ${size} requests`, async ({ page, electronApp, createTmpDir }) => {
+          test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000);
+          const timings: number[] = [];
+
+          for (let i = 0; i < ITERATIONS_PER_SIZE; i++) {
+            const collectionName = `bench-${format}-${size}-iter-${i}`;
+            const collectionDir = await createTmpDir(`bench-${format}-${size}-${i}`);
+            generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format });
+
+            const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName);
+            timings.push(Math.round(elapsed));
+            await page.waitForTimeout(500);
+          }
+
+          const key = resultKey(format, size);
+          results[key] = timings;
+
+          const stats = summarize(timings);
+          const r = (v: number) => Math.round(v);
+          console.log(`[BENCHMARK] ${format} ${size} requests — mean: ${r(stats.mean)}ms, median: ${r(stats.median)}ms, p90: ${r(stats.p90)}ms, stdDev: ${r(stats.stdDev)}ms, raw: [${timings.join(', ')}]`);
+
+          test.info().annotations.push({
+            type: 'benchmark',
+            description: JSON.stringify({ format, size, ...stats, timings })
+          });
+        });
+      }
+    });
+  }
+
+  test.afterAll(async () => {
+    const resultsDir = path.join(process.cwd(), 'tests', 'benchmarks', 'results');
+    fs.mkdirSync(resultsDir, { recursive: true });
+    const outputPath = path.join(resultsDir, 'mounting.json');
+    const entries: Record<string, ResultEntry> = {};
+
+    for (const [key, timings] of Object.entries(results)) {
+      if (timings.length === 0) continue;
+      const [format, sizeStr] = key.split('-');
+      entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) });
+    }
+
+    writeResults(outputPath, entries);
+    console.log(`[BENCHMARK] Results written to ${outputPath}`);
+  });
+});
diff --git a/tests/benchmarks/utils/collection-generator.ts b/tests/benchmarks/utils/collection-generator.ts
new file mode 100644
index 00000000000..582076eb4b1
--- /dev/null
+++ b/tests/benchmarks/utils/collection-generator.ts
@@ -0,0 +1,67 @@
+import { stringifyRequest, stringifyCollection, stringifyFolder } from '@usebruno/filestore';
+import type { BrunoItem } from '@usebruno/schema-types';
+import * as path from 'path';
+import * as fs from 'fs';
+
+export type CollectionFormat = 'bru' | 'yml';
+
+export function buildRequestItem(seq: number): BrunoItem {
+  return {
+    uid: `req-${seq}`,
+    type: 'http-request',
+    name: `request-${seq}`,
+    seq,
+    request: {
+      method: 'GET',
+      url: `https://example.com/api/v1/resource/${seq}`,
+      headers: [
+        { uid: `h1-${seq}`, name: 'Content-Type', value: 'application/json', enabled: true },
+        { uid: `h2-${seq}`, name: 'Accept', value: 'application/json', enabled: true }
+      ],
+      body: { mode: 'none' },
+      auth: { mode: 'none' }
+    }
+  } as BrunoItem;
+}
+
+export interface GenerateCollectionOptions {
+  dir: string;
+  name: string;
+  requestCount: number;
+  format: CollectionFormat;
+  requestsPerFolder?: number;
+}
+
+export function generateCollection({
+  dir,
+  name,
+  requestCount,
+  format,
+  requestsPerFolder = 10
+}: GenerateCollectionOptions) {
+  if (format === 'bru') {
+    fs.writeFileSync(path.join(dir, 'bruno.json'), JSON.stringify({ version: '1', name, type: 'collection' }, null, 2));
+    fs.writeFileSync(path.join(dir, 'collection.bru'), stringifyCollection({ name } as any, {}, { format: 'bru' }) || `meta {\n  name: ${name}\n}\n`);
+  } else {
+    const ymlContent = stringifyCollection({ name } as any, { name, type: 'collection', opencollection: '1.0.0' }, { format: 'yml' });
+    fs.writeFileSync(path.join(dir, 'opencollection.yml'), ymlContent);
+  }
+
+  const ext = format === 'bru' ? 'bru' : 'yml';
+  const folderFile = format === 'bru' ? 'folder.bru' : 'folder.yml';
+  const folderCount = Math.ceil(requestCount / requestsPerFolder);
+
+  Array.from({ length: folderCount }).forEach((_, f) => {
+    const folderPath = path.join(dir, `folder-${f}`);
+    fs.mkdirSync(folderPath, { recursive: true });
+
+    const folderContent = stringifyFolder({ name: `folder-${f}` }, { format });
+    fs.writeFileSync(path.join(folderPath, folderFile), folderContent || `meta {\n  name: folder-${f}\n}\n`);
+
+    const count = Math.min(requestsPerFolder, requestCount - f * requestsPerFolder);
+    Array.from({ length: count }).forEach((_, r) => {
+      const seq = f * requestsPerFolder + r + 1;
+      fs.writeFileSync(path.join(folderPath, `request-${seq}.${ext}`), stringifyRequest(buildRequestItem(seq), { format }));
+    });
+  });
+}
diff --git a/tests/benchmarks/utils/compare.js b/tests/benchmarks/utils/compare.js
new file mode 100644
index 00000000000..9119e3b04d6
--- /dev/null
+++ b/tests/benchmarks/utils/compare.js
@@ -0,0 +1,129 @@
+#!/usr/bin/env node
+
+/**
+ * Generic benchmark comparison: compares results against a baseline and exits
+ * with code 1 if any metric exceeds the allowed regression threshold.
+ *
+ * Usage:
+ *   node tests/benchmarks/utils/compare.js --results <path> --baseline <path> [--update-baseline]
+ *
+ * Examples:
+ *   node tests/benchmarks/utils/compare.js \
+ *     --results benchmark-results.json \
+ *     --baseline tests/benchmarks/mounting/baseline.json
+ *
+ *   node tests/benchmarks/utils/compare.js \
+ *     --results benchmark-results.json \
+ *     --baseline tests/benchmarks/mounting/baseline.json \
+ *     --update-baseline
+ */
+
+import { existsSync, readFileSync, writeFileSync } from 'fs';
+
+function parseArgs(argv) {
+  const args = {};
+  for (let i = 2; i < argv.length; i++) {
+    if (argv[i] === '--results') args.results = argv[++i];
+    else if (argv[i] === '--baseline') args.baseline = argv[++i];
+    else if (argv[i] === '--update-baseline') args.updateBaseline = true;
+  }
+  return args;
+}
+
+function loadJSON(filepath) {
+  if (!existsSync(filepath)) {
+    console.error(`File not found: ${filepath}`);
+    process.exit(1);
+  }
+  return JSON.parse(readFileSync(filepath, 'utf-8'));
+}
+
+function pctChange(baseline, current) {
+  if (baseline === 0) return current === 0 ? 0 : Infinity;
+  return ((current - baseline) / baseline) * 100;
+}
+
+function formatPct(pct) {
+  const sign = pct > 0 ? '+' : '';
+  return `${sign}${pct.toFixed(1)}%`;
+}
+
+const args = parseArgs(process.argv);
+
+if (!args.results || !args.baseline) {
+  console.error('Usage: compare.js --results <path> --baseline <path> [--update-baseline]');
+  process.exit(1);
+}
+
+const results = loadJSON(args.results);
+const baseline = loadJSON(args.baseline);
+const threshold = baseline.thresholdPercent || 20;
+const resultEntries = results.entries || results;
+const baselineEntries = baseline.entries || baseline.collections || {};
+
+if (args.updateBaseline) {
+  const newBaseline = {
+    thresholdPercent: threshold,
+    entries: {}
+  };
+  for (const [key, data] of Object.entries(resultEntries)) {
+    newBaseline.entries[key] = {
+      mean: data.mean,
+      p50: data.p50
+    };
+  }
+  writeFileSync(args.baseline, JSON.stringify(newBaseline, null, 2) + '\n');
+  console.log(`Baseline updated at ${args.baseline}`);
+  process.exit(0);
+}
+
+let hasRegression = false;
+const rows = [];
+
+console.log('');
+console.log('='.repeat(72));
+console.log(' BENCHMARK COMPARISON');
+console.log('='.repeat(72));
+console.log(`  Regression threshold: ${threshold}%`);
+console.log('');
+
+for (const [key, data] of Object.entries(resultEntries)) {
+  const base = baselineEntries[key];
+  if (!base) {
+    console.log(`  [SKIP] No baseline for ${key}`);
+    continue;
+  }
+
+  const meanPct = pctChange(base.mean, data.mean);
+  const p50Pct = pctChange(base.p50, data.p50);
+
+  const meanStatus = meanPct > threshold ? 'FAIL' : meanPct < -threshold ? 'IMPROVED' : 'OK';
+  const p50Status = p50Pct > threshold ? 'FAIL' : p50Pct < -threshold ? 'IMPROVED' : 'OK';
+
+  if (meanStatus === 'FAIL' || p50Status === 'FAIL') {
+    hasRegression = true;
+  }
+
+  rows.push({
+    key,
+    'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`,
+    'mean change': formatPct(meanPct),
+    'mean status': meanStatus,
+    'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`,
+    'p50 change': formatPct(p50Pct),
+    'p50 status': p50Status
+  });
+}
+
+console.table(rows);
+console.log('');
+
+if (hasRegression) {
+  console.error(`FAILED: One or more benchmarks regressed beyond the ${threshold}% threshold.`);
+  console.error('If this regression is expected, update the baseline:');
+  console.error(`  node tests/benchmarks/utils/compare.js --results ${args.results} --baseline ${args.baseline} --update-baseline`);
+  process.exit(1);
+} else {
+  console.log('PASSED: All benchmarks are within the acceptable threshold.');
+  process.exit(0);
+}
diff --git a/tests/benchmarks/utils/pr-comment.js b/tests/benchmarks/utils/pr-comment.js
new file mode 100644
index 00000000000..a0c18dff189
--- /dev/null
+++ b/tests/benchmarks/utils/pr-comment.js
@@ -0,0 +1,82 @@
+#!/usr/bin/env node
+
+/**
+ * Generic benchmark PR comment: posts/updates a comparison table on a PR.
+ *
+ * Called by CI via actions/github-script:
+ *   const run = require('./tests/benchmarks/utils/pr-comment.js');
+ *   await run({ github, context, resultsPath, baselinePath, title });
+ */
+
+const fs = require('fs');
+
+function buildCommentBody(results, baseline, title) {
+  const threshold = baseline.thresholdPercent || 20;
+  const resultEntries = results.entries || results;
+  const baselineEntries = baseline.entries || baseline.collections || {};
+  const marker = `## ${title}`;
+
+  let body = `${marker}\n\n`;
+  body += `| Key | Mean (ms) | Baseline Mean | Change | Status |\n`;
+  body += `|---|---|---|---|---|\n`;
+
+  let hasRegression = false;
+
+  for (const [key, data] of Object.entries(resultEntries)) {
+    const base = baselineEntries[key];
+    if (!base) continue;
+
+    const pct = ((data.mean - base.mean) / base.mean * 100).toFixed(1);
+    const status = pct > threshold ? '🔴 REGRESSION' : pct < -threshold ? '🟢 IMPROVED' : '✅ OK';
+    if (pct > threshold) hasRegression = true;
+
+    body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${pct > 0 ? '+' : ''}${pct}% | ${status} |\n`;
+  }
+
+  body += `\n> Threshold: ${threshold}% regression allowed\n`;
+
+  if (hasRegression) {
+    body += '\n⚠️ **Performance regression detected.** If expected, update the baseline.\n';
+  }
+
+  return { body, marker };
+}
+
+async function postOrUpdateComment(github, context, body, marker) {
+  const { data: comments } = await github.rest.issues.listComments({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    issue_number: context.issue.number
+  });
+
+  const existing = comments.find((c) => c.body.startsWith(marker));
+
+  if (existing) {
+    await github.rest.issues.updateComment({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      comment_id: existing.id,
+      body
+    });
+  } else {
+    await github.rest.issues.createComment({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      issue_number: context.issue.number,
+      body
+    });
+  }
+}
+
+module.exports = async function run({ github, context, resultsPath, baselinePath, title }) {
+  if (!fs.existsSync(resultsPath)) {
+    console.log(`No benchmark results found at ${resultsPath}, skipping comment.`);
+    return;
+  }
+
+  const results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
+  const baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf-8'));
+  const { body, marker } = buildCommentBody(results, baseline, title);
+
+  await postOrUpdateComment(github, context, body, marker);
+};
diff --git a/tests/benchmarks/utils/results.ts b/tests/benchmarks/utils/results.ts
new file mode 100644
index 00000000000..6ba6fad0eb3
--- /dev/null
+++ b/tests/benchmarks/utils/results.ts
@@ -0,0 +1,81 @@
+/**
+ * Standard read/write helpers for benchmark results and baselines.
+ *
+ * Results shape (written by benchmark tests):
+ * {
+ *   "entries": {
+ *     "<key>": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta }
+ *   }
+ * }
+ *
+ * Baseline shape (committed per suite):
+ * {
+ *   "thresholdPercent": 20,
+ *   "entries": {
+ *     "<key>": { mean, p50 }
+ *   }
+ * }
+ */
+
+import { existsSync, readFileSync, writeFileSync } from 'fs';
+import { summarize } from './stats';
+
+export interface ResultEntry {
+  mean: number;
+  median: number;
+  p50: number;
+  p90: number;
+  p99: number;
+  stdDev: number;
+  min: number;
+  max: number;
+  count: number;
+  timings: number[];
+  [key: string]: any;
+}
+
+export interface ResultsFile {
+  entries: Record<string, ResultEntry>;
+}
+
+export interface BaselineEntry {
+  mean: number;
+  p50: number;
+}
+
+export interface BaselineFile {
+  thresholdPercent: number;
+  entries: Record<string, BaselineEntry>;
+}
+
+export function readResults(filePath: string): ResultsFile {
+  if (!existsSync(filePath)) {
+    throw new Error(`Results file not found: ${filePath}`);
+  }
+  return JSON.parse(readFileSync(filePath, 'utf-8'));
+}
+
+export function writeResults(filePath: string, entries: Record<string, ResultEntry>) {
+  const data: ResultsFile = { entries };
+  writeFileSync(filePath, JSON.stringify(data, null, 2));
+}
+
+export function buildResultEntry(timings: number[], meta: Record<string, any> = {}): ResultEntry {
+  return { ...summarize(timings), timings, ...meta };
+}
+
+export function readBaseline(filePath: string): BaselineFile {
+  if (!existsSync(filePath)) {
+    throw new Error(`Baseline file not found: ${filePath}`);
+  }
+  return JSON.parse(readFileSync(filePath, 'utf-8'));
+}
+
+export function writeBaseline(filePath: string, results: ResultsFile, thresholdPercent: number) {
+  const entries: Record<string, BaselineEntry> = {};
+  for (const [key, data] of Object.entries(results.entries)) {
+    entries[key] = { mean: data.mean, p50: data.p50 };
+  }
+  const data: BaselineFile = { thresholdPercent, entries };
+  writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n');
+}
diff --git a/tests/benchmarks/utils/stats.ts b/tests/benchmarks/utils/stats.ts
new file mode 100644
index 00000000000..bd17703d926
--- /dev/null
+++ b/tests/benchmarks/utils/stats.ts
@@ -0,0 +1,111 @@
+/**
+ * Statistical utility functions for benchmark analysis.
+ */
+
+function assertValid(values: number[]) {
+  if (values.length === 0) {
+    throw new Error('Values array must not be empty');
+  }
+  if (!values.every(Number.isFinite)) {
+    throw new TypeError('All values must be finite numbers');
+  }
+}
+
+function sorted(values: number[]): number[] {
+  return [...values].sort((a, b) => a - b);
+}
+
+export function mean(values: number[]): number {
+  assertValid(values);
+  return values.reduce((sum, v) => sum + v, 0) / values.length;
+}
+
+export function median(values: number[]): number {
+  assertValid(values);
+  const s = sorted(values);
+  const mid = Math.floor(s.length / 2);
+
+  return s.length % 2 === 0
+    ? (s[mid - 1] + s[mid]) / 2
+    : s[mid];
+}
+
+export function percentile(values: number[], p: number): number {
+  assertValid(values);
+
+  if (p < 0 || p > 100) {
+    throw new RangeError(`Percentile must be between 0 and 100, got ${p}`);
+  }
+
+  const s = sorted(values);
+  const index = (p / 100) * (s.length - 1);
+
+  const lower = Math.floor(index);
+  const upper = Math.ceil(index);
+
+  if (lower === upper) return s[lower];
+
+  const weight = index - lower;
+  return s[lower] + weight * (s[upper] - s[lower]);
+}
+
+/**
+ * Population standard deviation (divide by N)
+ */
+export function populationStdDev(values: number[]): number {
+  assertValid(values);
+  const avg = mean(values);
+
+  const variance
+    = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) / values.length;
+
+  return Math.sqrt(variance);
+}
+
+/**
+ * Sample standard deviation (divide by N - 1)
+ */
+export function sampleStdDev(values: number[]): number {
+  assertValid(values);
+
+  if (values.length < 2) {
+    throw new Error('Sample standard deviation requires at least 2 values');
+  }
+
+  const avg = mean(values);
+
+  const variance
+    = values.reduce((sum, v) => sum + (v - avg) ** 2, 0)
+      / (values.length - 1);
+
+  return Math.sqrt(variance);
+}
+
+export function min(values: number[]): number {
+  assertValid(values);
+  return values.reduce((a, b) => (a < b ? a : b), Infinity);
+}
+
+export function max(values: number[]): number {
+  assertValid(values);
+  return values.reduce((a, b) => (a > b ? a : b), -Infinity);
+}
+
+/**
+ * Summary for benchmarking (no rounding, keep precision)
+ */
+export function summarize(values: number[]) {
+  assertValid(values);
+
+  return {
+    mean: mean(values),
+    median: median(values),
+    p50: percentile(values, 50),
+    p90: percentile(values, 90),
+    p99: percentile(values, 99),
+    min: min(values),
+    max: max(values),
+    stdDev: populationStdDev(values),
+    count: values.length
+  };
+}

From dd4840e2e2b766f77b53658c84f7967686159d83 Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Tue, 5 May 2026 23:51:44 +0530
Subject: [PATCH 2/7] feat: reuse collection across iterations, add
 update-baseline input, fix review issues

- Same collection mounted/unmounted across iterations for cold vs cached comparison
- workflow_dispatch has update-baseline boolean input for manual baseline updates
- Fix string comparison bug in pr-comment.js (pct was string from toFixed)
- Remove dead baseline.collections fallback in compare.js and pr-comment.js
- Remove unnecessary waitForTimeout between iterations
- Rename pct/pctChange to changePercent/percentChange for readability
---
 .../tests/run-benchmark-tests/action.yml      | 13 +++++++++++
 .github/workflows/benchmarks.yml              |  6 +++++
 .../mounting/collection-mount.bench.ts        |  9 ++++----
 tests/benchmarks/utils/compare.js             | 22 +++++++++----------
 tests/benchmarks/utils/pr-comment.js          | 11 +++++-----
 5 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml
index f089fbd9ae0..480245c503f 100644
--- a/.github/actions/tests/run-benchmark-tests/action.yml
+++ b/.github/actions/tests/run-benchmark-tests/action.yml
@@ -4,6 +4,9 @@ inputs:
   os:
     description: 'Operating system (ubuntu, macos, windows)'
     default: 'ubuntu'
+  update-baseline:
+    description: 'Update baseline instead of comparing'
+    default: 'false'
 runs:
   using: 'composite'
   steps:
@@ -17,7 +20,17 @@ runs:
       shell: bash
       run: npm run test:benchmark
 
+    - name: Update Baseline
+      if: inputs.update-baseline == 'true'
+      shell: bash
+      run: >-
+        node tests/benchmarks/utils/compare.js
+        --results tests/benchmarks/results/mounting.json
+        --baseline tests/benchmarks/mounting/baseline.json
+        --update-baseline
+
     - name: Compare Against Baseline
+      if: inputs.update-baseline != 'true'
       shell: bash
       run: >-
         node tests/benchmarks/utils/compare.js
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 521484f109d..a3254f17960 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -1,6 +1,11 @@
 name: Benchmarks
 on:
   workflow_dispatch:
+    inputs:
+      update-baseline:
+        description: 'Update baseline with current results instead of comparing'
+        type: boolean
+        default: false
   pull_request:
     branches: [main, 'release/v*']
 
@@ -47,6 +52,7 @@ jobs:
         uses: ./.github/actions/tests/run-benchmark-tests
         with:
           os: ${{ matrix.os-name }}
+          update-baseline: ${{ github.event.inputs.update-baseline || 'false' }}
 
       - name: Upload Benchmark Results
         uses: actions/upload-artifact@v6
diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts
index 6767b0a222d..dcac927ee8c 100644
--- a/tests/benchmarks/mounting/collection-mount.bench.ts
+++ b/tests/benchmarks/mounting/collection-mount.bench.ts
@@ -71,14 +71,13 @@ test.describe('Benchmark: Collection Mount', () => {
           test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000);
           const timings: number[] = [];
 
-          for (let i = 0; i < ITERATIONS_PER_SIZE; i++) {
-            const collectionName = `bench-${format}-${size}-iter-${i}`;
-            const collectionDir = await createTmpDir(`bench-${format}-${size}-${i}`);
-            generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format });
+          const collectionName = `bench-${format}-${size}`;
+          const collectionDir = await createTmpDir(`bench-${format}-${size}`);
+          generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format });
 
+          for (let i = 0; i < ITERATIONS_PER_SIZE; i++) {
             const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName);
             timings.push(Math.round(elapsed));
-            await page.waitForTimeout(500);
           }
 
           const key = resultKey(format, size);
diff --git a/tests/benchmarks/utils/compare.js b/tests/benchmarks/utils/compare.js
index 9119e3b04d6..e1cf4555780 100644
--- a/tests/benchmarks/utils/compare.js
+++ b/tests/benchmarks/utils/compare.js
@@ -38,14 +38,14 @@ function loadJSON(filepath) {
   return JSON.parse(readFileSync(filepath, 'utf-8'));
 }
 
-function pctChange(baseline, current) {
+function percentChange(baseline, current) {
   if (baseline === 0) return current === 0 ? 0 : Infinity;
   return ((current - baseline) / baseline) * 100;
 }
 
-function formatPct(pct) {
-  const sign = pct > 0 ? '+' : '';
-  return `${sign}${pct.toFixed(1)}%`;
+function formatChange(change) {
+  const sign = change > 0 ? '+' : '';
+  return `${sign}${change.toFixed(1)}%`;
 }
 
 const args = parseArgs(process.argv);
@@ -59,7 +59,7 @@ const results = loadJSON(args.results);
 const baseline = loadJSON(args.baseline);
 const threshold = baseline.thresholdPercent || 20;
 const resultEntries = results.entries || results;
-const baselineEntries = baseline.entries || baseline.collections || {};
+const baselineEntries = baseline.entries || {};
 
 if (args.updateBaseline) {
   const newBaseline = {
@@ -94,11 +94,11 @@ for (const [key, data] of Object.entries(resultEntries)) {
     continue;
   }
 
-  const meanPct = pctChange(base.mean, data.mean);
-  const p50Pct = pctChange(base.p50, data.p50);
+  const meanChange = percentChange(base.mean, data.mean);
+  const p50Change = percentChange(base.p50, data.p50);
 
-  const meanStatus = meanPct > threshold ? 'FAIL' : meanPct < -threshold ? 'IMPROVED' : 'OK';
-  const p50Status = p50Pct > threshold ? 'FAIL' : p50Pct < -threshold ? 'IMPROVED' : 'OK';
+  const meanStatus = meanChange > threshold ? 'FAIL' : meanChange < -threshold ? 'IMPROVED' : 'OK';
+  const p50Status = p50Change > threshold ? 'FAIL' : p50Change < -threshold ? 'IMPROVED' : 'OK';
 
   if (meanStatus === 'FAIL' || p50Status === 'FAIL') {
     hasRegression = true;
@@ -107,10 +107,10 @@ for (const [key, data] of Object.entries(resultEntries)) {
   rows.push({
     key,
     'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`,
-    'mean change': formatPct(meanPct),
+    'mean change': formatChange(meanChange),
     'mean status': meanStatus,
     'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`,
-    'p50 change': formatPct(p50Pct),
+    'p50 change': formatChange(p50Change),
     'p50 status': p50Status
   });
 }
diff --git a/tests/benchmarks/utils/pr-comment.js b/tests/benchmarks/utils/pr-comment.js
index a0c18dff189..63765ec5baf 100644
--- a/tests/benchmarks/utils/pr-comment.js
+++ b/tests/benchmarks/utils/pr-comment.js
@@ -13,7 +13,7 @@ const fs = require('fs');
 function buildCommentBody(results, baseline, title) {
   const threshold = baseline.thresholdPercent || 20;
   const resultEntries = results.entries || results;
-  const baselineEntries = baseline.entries || baseline.collections || {};
+  const baselineEntries = baseline.entries || {};
   const marker = `## ${title}`;
 
   let body = `${marker}\n\n`;
@@ -26,11 +26,12 @@ function buildCommentBody(results, baseline, title) {
     const base = baselineEntries[key];
     if (!base) continue;
 
-    const pct = ((data.mean - base.mean) / base.mean * 100).toFixed(1);
-    const status = pct > threshold ? '🔴 REGRESSION' : pct < -threshold ? '🟢 IMPROVED' : '✅ OK';
-    if (pct > threshold) hasRegression = true;
+    const changePercent = (data.mean - base.mean) / base.mean * 100;
+    const changeStr = changePercent.toFixed(1);
+    const status = changePercent > threshold ? '🔴 REGRESSION' : changePercent < -threshold ? '🟢 IMPROVED' : '✅ OK';
+    if (changePercent > threshold) hasRegression = true;
 
-    body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${pct > 0 ? '+' : ''}${pct}% | ${status} |\n`;
+    body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${changePercent > 0 ? '+' : ''}${changeStr}% | ${status} |\n`;
   }
 
   body += `\n> Threshold: ${threshold}% regression allowed\n`;

From 0e3661cae0748d548bad26d4bede6fd8609efaa0 Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Wed, 6 May 2026 01:21:43 +0530
Subject: [PATCH 3/7] feat: reduce max size to 3000, update baselines from CI
 data, auto-commit on update-baseline

- Reduce max collection size from 5000 to 3000 to keep CI runtime reasonable
- Update baseline values from actual CI run data (worst case across ubuntu/macos/windows)
- Auto-commit updated baseline.json when update-baseline is triggered via workflow_dispatch
- Reuse same collection across iterations for cold vs cached comparison
- Fix string comparison bug and remove dead code from review feedback
- Rename pct variables to changePercent for readability
- Remove unnecessary waitForTimeout between iterations
---
 .github/workflows/benchmarks.yml              | 10 ++++-
 tests/benchmarks/mounting/baseline.json       | 43 +++++++++----------
 .../mounting/collection-mount.bench.ts        |  2 +-
 3 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index a3254f17960..fe896d309e7 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -26,7 +26,7 @@ jobs:
           - os: windows-latest
             os-name: windows
     permissions:
-      contents: read
+      contents: write
       pull-requests: write
     steps:
       - uses: actions/checkout@v6
@@ -64,6 +64,14 @@ jobs:
             benchmark-report/
           retention-days: 30
 
+      - name: Commit Updated Baseline
+        if: github.event.inputs.update-baseline == 'true' && matrix.os-name == 'ubuntu'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add tests/benchmarks/mounting/baseline.json
+          git diff --staged --quiet || git commit -m "chore: update benchmark baseline" && git push
+
       - name: Comment Benchmark Results on PR
         if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu'
         uses: actions/github-script@v7
diff --git a/tests/benchmarks/mounting/baseline.json b/tests/benchmarks/mounting/baseline.json
index 092998a23ce..834e8ed7144 100644
--- a/tests/benchmarks/mounting/baseline.json
+++ b/tests/benchmarks/mounting/baseline.json
@@ -1,46 +1,45 @@
 {
-  "description": "Benchmark baselines for collection mount times. Update by running: node tests/benchmarks/mounting/compare.js --update-baseline",
   "thresholdPercent": 20,
   "entries": {
     "bru-50": {
       "mean": 2000,
-      "p50": 1800
+      "p50": 900
     },
     "bru-200": {
-      "mean": 5000,
-      "p50": 4500
+      "mean": 1500,
+      "p50": 1500
     },
     "bru-500": {
-      "mean": 12000,
-      "p50": 11000
+      "mean": 4000,
+      "p50": 3800
     },
     "bru-1000": {
-      "mean": 25000,
-      "p50": 24000
+      "mean": 10500,
+      "p50": 10000
     },
-    "bru-5000": {
-      "mean": 120000,
-      "p50": 115000
+    "bru-3000": {
+      "mean": 420000,
+      "p50": 400000
     },
     "yml-50": {
-      "mean": 2000,
-      "p50": 1800
+      "mean": 700,
+      "p50": 650
     },
     "yml-200": {
-      "mean": 5000,
-      "p50": 4500
+      "mean": 1500,
+      "p50": 1500
     },
     "yml-500": {
-      "mean": 12000,
-      "p50": 11000
+      "mean": 4500,
+      "p50": 4300
     },
     "yml-1000": {
-      "mean": 25000,
-      "p50": 24000
+      "mean": 13000,
+      "p50": 12600
     },
-    "yml-5000": {
-      "mean": 120000,
-      "p50": 115000
+    "yml-3000": {
+      "mean": 180000,
+      "p50": 180000
     }
   }
 }
diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts
index dcac927ee8c..b19008d514c 100644
--- a/tests/benchmarks/mounting/collection-mount.bench.ts
+++ b/tests/benchmarks/mounting/collection-mount.bench.ts
@@ -7,7 +7,7 @@ import { generateCollection, type CollectionFormat } from '../utils/collection-g
 import * as path from 'path';
 import * as fs from 'fs';
 
-const COLLECTION_SIZES = [50, 200, 500, 1000, 5000];
+const COLLECTION_SIZES = [50, 200, 500, 1000, 3000];
 const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml'];
 const ITERATIONS_PER_SIZE = 3;
 

From 4325f21266cac9496d2915fc7236eb41897056db Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Wed, 6 May 2026 01:41:38 +0530
Subject: [PATCH 4/7] fix: handle PR comment permission error on fork PRs

- Add continue-on-error to PR comment step since GITHUB_TOKEN lacks write access on cross-fork PRs
---
 .github/workflows/benchmarks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index fe896d309e7..bbe2c940380 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -74,6 +74,7 @@ jobs:
 
       - name: Comment Benchmark Results on PR
         if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu'
+        continue-on-error: true
         uses: actions/github-script@v7
         with:
           script: |

From f80f21d5da4b32b129bc980b621b94d48faaf224 Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Wed, 6 May 2026 02:16:23 +0530
Subject: [PATCH 5/7] feat: per-OS baselines from CI run data, auto-commit on
 update-baseline

- Split baseline.json into baseline.ubuntu/macos/windows.json with real CI data
- Action and workflow dynamically reference baseline per OS
- PR comment posted per OS with OS-specific comparison
- Auto-commit updated baseline on workflow_dispatch with update-baseline flag
---
 .../tests/run-benchmark-tests/action.yml      |  4 +-
 .github/workflows/benchmarks.yml              | 12 ++---
 tests/benchmarks/mounting/baseline.json       | 45 -------------------
 tests/benchmarks/mounting/baseline.macos.json | 45 +++++++++++++++++++
 .../benchmarks/mounting/baseline.ubuntu.json  | 45 +++++++++++++++++++
 .../benchmarks/mounting/baseline.windows.json | 45 +++++++++++++++++++
 6 files changed, 143 insertions(+), 53 deletions(-)
 delete mode 100644 tests/benchmarks/mounting/baseline.json
 create mode 100644 tests/benchmarks/mounting/baseline.macos.json
 create mode 100644 tests/benchmarks/mounting/baseline.ubuntu.json
 create mode 100644 tests/benchmarks/mounting/baseline.windows.json

diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml
index 480245c503f..ece9bd3fe3f 100644
--- a/.github/actions/tests/run-benchmark-tests/action.yml
+++ b/.github/actions/tests/run-benchmark-tests/action.yml
@@ -26,7 +26,7 @@ runs:
       run: >-
         node tests/benchmarks/utils/compare.js
         --results tests/benchmarks/results/mounting.json
-        --baseline tests/benchmarks/mounting/baseline.json
+        --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json
         --update-baseline
 
     - name: Compare Against Baseline
@@ -35,4 +35,4 @@ runs:
       run: >-
         node tests/benchmarks/utils/compare.js
         --results tests/benchmarks/results/mounting.json
-        --baseline tests/benchmarks/mounting/baseline.json
+        --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index bbe2c940380..304af458483 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -65,15 +65,15 @@ jobs:
           retention-days: 30
 
       - name: Commit Updated Baseline
-        if: github.event.inputs.update-baseline == 'true' && matrix.os-name == 'ubuntu'
+        if: github.event.inputs.update-baseline == 'true'
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
-          git add tests/benchmarks/mounting/baseline.json
-          git diff --staged --quiet || git commit -m "chore: update benchmark baseline" && git push
+          git add tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json
+          git diff --staged --quiet || git commit -m "chore: update ${{ matrix.os-name }} benchmark baseline" && git push
 
       - name: Comment Benchmark Results on PR
-        if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu'
+        if: github.event_name == 'pull_request' && !cancelled()
         continue-on-error: true
         uses: actions/github-script@v7
         with:
@@ -83,6 +83,6 @@ jobs:
               github,
               context,
               resultsPath: 'tests/benchmarks/results/mounting.json',
-              baselinePath: 'tests/benchmarks/mounting/baseline.json',
-              title: 'Benchmark Results — Collection Mount'
+              baselinePath: 'tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json',
+              title: 'Benchmark Results — Collection Mount (${{ matrix.os-name }})'
             });
diff --git a/tests/benchmarks/mounting/baseline.json b/tests/benchmarks/mounting/baseline.json
deleted file mode 100644
index 834e8ed7144..00000000000
--- a/tests/benchmarks/mounting/baseline.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "thresholdPercent": 20,
-  "entries": {
-    "bru-50": {
-      "mean": 2000,
-      "p50": 900
-    },
-    "bru-200": {
-      "mean": 1500,
-      "p50": 1500
-    },
-    "bru-500": {
-      "mean": 4000,
-      "p50": 3800
-    },
-    "bru-1000": {
-      "mean": 10500,
-      "p50": 10000
-    },
-    "bru-3000": {
-      "mean": 420000,
-      "p50": 400000
-    },
-    "yml-50": {
-      "mean": 700,
-      "p50": 650
-    },
-    "yml-200": {
-      "mean": 1500,
-      "p50": 1500
-    },
-    "yml-500": {
-      "mean": 4500,
-      "p50": 4300
-    },
-    "yml-1000": {
-      "mean": 13000,
-      "p50": 12600
-    },
-    "yml-3000": {
-      "mean": 180000,
-      "p50": 180000
-    }
-  }
-}
diff --git a/tests/benchmarks/mounting/baseline.macos.json b/tests/benchmarks/mounting/baseline.macos.json
new file mode 100644
index 00000000000..d7de548c4bc
--- /dev/null
+++ b/tests/benchmarks/mounting/baseline.macos.json
@@ -0,0 +1,45 @@
+{
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 2200,
+      "p50": 1000
+    },
+    "bru-200": {
+      "mean": 1300,
+      "p50": 1100
+    },
+    "bru-500": {
+      "mean": 3600,
+      "p50": 3500
+    },
+    "bru-1000": {
+      "mean": 9100,
+      "p50": 9000
+    },
+    "bru-3000": {
+      "mean": 185000,
+      "p50": 183000
+    },
+    "yml-50": {
+      "mean": 700,
+      "p50": 650
+    },
+    "yml-200": {
+      "mean": 1400,
+      "p50": 1250
+    },
+    "yml-500": {
+      "mean": 3900,
+      "p50": 3700
+    },
+    "yml-1000": {
+      "mean": 11700,
+      "p50": 11900
+    },
+    "yml-3000": {
+      "mean": 85000,
+      "p50": 80000
+    }
+  }
+}
diff --git a/tests/benchmarks/mounting/baseline.ubuntu.json b/tests/benchmarks/mounting/baseline.ubuntu.json
new file mode 100644
index 00000000000..0d4ff8c6806
--- /dev/null
+++ b/tests/benchmarks/mounting/baseline.ubuntu.json
@@ -0,0 +1,45 @@
+{
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 1500,
+      "p50": 700
+    },
+    "bru-200": {
+      "mean": 1200,
+      "p50": 1150
+    },
+    "bru-500": {
+      "mean": 2900,
+      "p50": 2900
+    },
+    "bru-1000": {
+      "mean": 8000,
+      "p50": 8000
+    },
+    "bru-3000": {
+      "mean": 175000,
+      "p50": 170000
+    },
+    "yml-50": {
+      "mean": 600,
+      "p50": 560
+    },
+    "yml-200": {
+      "mean": 1200,
+      "p50": 1200
+    },
+    "yml-500": {
+      "mean": 3500,
+      "p50": 3400
+    },
+    "yml-1000": {
+      "mean": 10700,
+      "p50": 10650
+    },
+    "yml-3000": {
+      "mean": 85000,
+      "p50": 80000
+    }
+  }
+}
diff --git a/tests/benchmarks/mounting/baseline.windows.json b/tests/benchmarks/mounting/baseline.windows.json
new file mode 100644
index 00000000000..f3be08bb31d
--- /dev/null
+++ b/tests/benchmarks/mounting/baseline.windows.json
@@ -0,0 +1,45 @@
+{
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 2700,
+      "p50": 800
+    },
+    "bru-200": {
+      "mean": 1500,
+      "p50": 1400
+    },
+    "bru-500": {
+      "mean": 3500,
+      "p50": 3500
+    },
+    "bru-1000": {
+      "mean": 9500,
+      "p50": 9400
+    },
+    "bru-3000": {
+      "mean": 195000,
+      "p50": 190000
+    },
+    "yml-50": {
+      "mean": 600,
+      "p50": 570
+    },
+    "yml-200": {
+      "mean": 1350,
+      "p50": 1300
+    },
+    "yml-500": {
+      "mean": 3800,
+      "p50": 3700
+    },
+    "yml-1000": {
+      "mean": 11000,
+      "p50": 11000
+    },
+    "yml-3000": {
+      "mean": 90000,
+      "p50": 88000
+    }
+  }
+}

From 791d5288f9c5a186bcae27ee99fb4ad3feeb245a Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Wed, 6 May 2026 15:33:13 +0530
Subject: [PATCH 6/7] feat: include suite metadata (name, unit, direction) in
 benchmark results

- writeResults now accepts SuiteMeta with name, unit, and direction
- Results JSON includes suite field for the visualization dashboard to ingest
- Mounting benchmark outputs unit: ms, direction: smaller
---
 .../benchmarks/mounting/collection-mount.bench.ts |  2 +-
 tests/benchmarks/utils/results.ts                 | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts
index b19008d514c..cc17c01b921 100644
--- a/tests/benchmarks/mounting/collection-mount.bench.ts
+++ b/tests/benchmarks/mounting/collection-mount.bench.ts
@@ -108,7 +108,7 @@ test.describe('Benchmark: Collection Mount', () => {
       entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) });
     }
 
-    writeResults(outputPath, entries);
+    writeResults(outputPath, { name: 'Collection Mount', unit: 'ms', direction: 'smaller' }, entries);
     console.log(`[BENCHMARK] Results written to ${outputPath}`);
   });
 });
diff --git a/tests/benchmarks/utils/results.ts b/tests/benchmarks/utils/results.ts
index 6ba6fad0eb3..8bf018a0187 100644
--- a/tests/benchmarks/utils/results.ts
+++ b/tests/benchmarks/utils/results.ts
@@ -3,6 +3,7 @@
  *
  * Results shape (written by benchmark tests):
  * {
+ *   "suite": { "name": "...", "unit": "ms", "direction": "smaller" },
  *   "entries": {
  *     "<key>": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta }
  *   }
@@ -20,6 +21,15 @@
 import { existsSync, readFileSync, writeFileSync } from 'fs';
 import { summarize } from './stats';
 
+export type Direction = 'smaller' | 'bigger';
+export type Unit = 'ms' | 's' | 'ops/s' | 'bytes' | '%' | 'count';
+
+export interface SuiteMeta {
+  name: string;
+  unit: Unit;
+  direction: Direction;
+}
+
 export interface ResultEntry {
   mean: number;
   median: number;
@@ -35,6 +45,7 @@ export interface ResultEntry {
 }
 
 export interface ResultsFile {
+  suite: SuiteMeta;
   entries: Record<string, ResultEntry>;
 }
 
@@ -55,8 +66,8 @@ export function readResults(filePath: string): ResultsFile {
   return JSON.parse(readFileSync(filePath, 'utf-8'));
 }
 
-export function writeResults(filePath: string, entries: Record<string, ResultEntry>) {
-  const data: ResultsFile = { entries };
+export function writeResults(filePath: string, suite: SuiteMeta, entries: Record<string, ResultEntry>) {
+  const data: ResultsFile = { suite, entries };
   writeFileSync(filePath, JSON.stringify(data, null, 2));
 }
 

From 29668dab4d702984a080b6aaec088af68c9a458e Mon Sep 17 00:00:00 2001
From: Chirag Chandrashekhar <cchirag85@gmail.com>
Date: Mon, 11 May 2026 15:45:37 +0530
Subject: [PATCH 7/7] feat: extract timing helpers, capture raw float ms in
 mount benchmark

---
 .../mounting/collection-mount.bench.ts        |  7 +++---
 tests/benchmarks/utils/timing.ts              | 25 +++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)
 create mode 100644 tests/benchmarks/utils/timing.ts

diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts
index cc17c01b921..82e3046ea18 100644
--- a/tests/benchmarks/mounting/collection-mount.bench.ts
+++ b/tests/benchmarks/mounting/collection-mount.bench.ts
@@ -3,6 +3,7 @@ import { type ElectronApplication, type Page } from '@playwright/test';
 import { openCollection, closeAllCollections } from '../../utils/page';
 import { summarize } from '../utils/stats';
 import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results';
+import { startTimer } from '../utils/timing';
 import { generateCollection, type CollectionFormat } from '../utils/collection-generator';
 import * as path from 'path';
 import * as fs from 'fs';
@@ -35,7 +36,7 @@ async function measureCollectionMount(
     });
   });
 
-  const start = performance.now();
+  const timer = startTimer();
 
   await page.getByTestId('collections-header-add-menu').click();
   await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click();
@@ -44,7 +45,7 @@ async function measureCollectionMount(
   await openCollection(page, collectionName);
   await page.evaluate(() => (window as any).__benchMountDone);
 
-  const elapsed = performance.now() - start;
+  const elapsed = timer.elapsed();
 
   await electronApp.evaluate(({ dialog }) => {
     if ((dialog as any).__originalShowOpenDialog) {
@@ -77,7 +78,7 @@ test.describe('Benchmark: Collection Mount', () => {
 
           for (let i = 0; i < ITERATIONS_PER_SIZE; i++) {
             const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName);
-            timings.push(Math.round(elapsed));
+            timings.push(elapsed);
           }
 
           const key = resultKey(format, size);
diff --git a/tests/benchmarks/utils/timing.ts b/tests/benchmarks/utils/timing.ts
new file mode 100644
index 00000000000..984d1dc3d8b
--- /dev/null
+++ b/tests/benchmarks/utils/timing.ts
@@ -0,0 +1,25 @@
+/**
+ * Timing utilities for benchmarks.
+ *
+ * Capture:  const t = startTimer(); ...do work...; const ms = t.elapsed();
+ * Convert:  convertDuration(1500, 'ms', 's') === 1.5
+ */
+
+export type DurationUnit = 'ns' | 'us' | 'ms' | 's';
+
+const DURATION_TO_MS: Record<DurationUnit, number> = {
+  ns: 1e-6,
+  us: 1e-3,
+  ms: 1,
+  s: 1000
+};
+
+export function startTimer() {
+  const start = performance.now();
+  return { elapsed: () => performance.now() - start };
+}
+
+export function convertDuration(value: number, from: DurationUnit, to: DurationUnit): number {
+  if (from === to) return value;
+  return (value * DURATION_TO_MS[from]) / DURATION_TO_MS[to];
+}