From 85859606277183d322f1330a4450f7213d61fcaf Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Tue, 5 May 2026 19:39:28 +0530 Subject: [PATCH 1/7] feat: add benchmark framework for collection mount performance - Playwright benchmark tests measuring collection mount time across bru/yml formats and sizes (50-5000 requests) - IPC listener approach for precise mount-complete signal - Generic benchmark utils: stats, results I/O, baseline comparison, PR commenting - Collection generator using @usebruno/filestore serializers - CI workflow running on ubuntu, macos, and windows with PR comment reporting - Regression detection against committed baselines with configurable threshold --- .../tests/run-benchmark-tests/action.yml | 25 ++++ .github/workflows/benchmarks.yml | 73 ++++++++++ .gitignore | 4 + package.json | 1 + playwright.benchmark.config.ts | 38 ++++++ playwright.config.ts | 3 +- tests/benchmarks/mounting/baseline.json | 46 +++++++ .../mounting/collection-mount.bench.ts | 115 ++++++++++++++++ .../benchmarks/utils/collection-generator.ts | 67 +++++++++ tests/benchmarks/utils/compare.js | 129 ++++++++++++++++++ tests/benchmarks/utils/pr-comment.js | 82 +++++++++++ tests/benchmarks/utils/results.ts | 81 +++++++++++ tests/benchmarks/utils/stats.ts | 111 +++++++++++++++ 13 files changed, 774 insertions(+), 1 deletion(-) create mode 100644 .github/actions/tests/run-benchmark-tests/action.yml create mode 100644 .github/workflows/benchmarks.yml create mode 100644 playwright.benchmark.config.ts create mode 100644 tests/benchmarks/mounting/baseline.json create mode 100644 tests/benchmarks/mounting/collection-mount.bench.ts create mode 100644 tests/benchmarks/utils/collection-generator.ts create mode 100644 tests/benchmarks/utils/compare.js create mode 100644 tests/benchmarks/utils/pr-comment.js create mode 100644 tests/benchmarks/utils/results.ts create mode 100644 tests/benchmarks/utils/stats.ts diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml new file mode 100644 index 00000000000..f089fbd9ae0 --- /dev/null +++ b/.github/actions/tests/run-benchmark-tests/action.yml @@ -0,0 +1,25 @@ +name: 'Run Benchmark Tests' +description: 'Run Playwright benchmark tests and compare against baseline' +inputs: + os: + description: 'Operating system (ubuntu, macos, windows)' + default: 'ubuntu' +runs: + using: 'composite' + steps: + - name: Run Benchmark Tests (Ubuntu) + if: inputs.os == 'ubuntu' + shell: bash + run: xvfb-run npm run test:benchmark + + - name: Run Benchmark Tests + if: inputs.os != 'ubuntu' + shell: bash + run: npm run test:benchmark + + - name: Compare Against Baseline + shell: bash + run: >- + node tests/benchmarks/utils/compare.js + --results tests/benchmarks/results/mounting.json + --baseline tests/benchmarks/mounting/baseline.json diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000000..521484f109d --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,73 @@ +name: Benchmarks +on: + workflow_dispatch: + pull_request: + branches: [main, 'release/v*'] + +jobs: + benchmark: + name: Performance Benchmarks (${{ matrix.os }}) + timeout-minutes: 60 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04, macos-latest, windows-latest] + include: + - os: ubuntu-24.04 + os-name: ubuntu + - os: macos-latest + os-name: macos + - os: windows-latest + os-name: windows + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v6 + + - name: Install System Dependencies (Ubuntu) + if: matrix.os-name == 'ubuntu' + run: | + sudo apt-get update + sudo apt-get --no-install-recommends install -y \ + libglib2.0-0 libnss3 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgtk-3-0 libasound2t64 \ + xvfb + + - name: Setup Node Dependencies + uses: ./.github/actions/common/setup-node-deps + + - name: Configure Chrome Sandbox + if: matrix.os-name == 'ubuntu' + run: | + sudo chown root node_modules/electron/dist/chrome-sandbox + sudo chmod 4755 node_modules/electron/dist/chrome-sandbox + + - name: Run Benchmark Tests + uses: ./.github/actions/tests/run-benchmark-tests + with: + os: ${{ matrix.os-name }} + + - name: Upload Benchmark Results + uses: actions/upload-artifact@v6 + if: ${{ !cancelled() }} + with: + name: benchmark-results-${{ matrix.os-name }} + path: | + tests/benchmarks/results/ + benchmark-report/ + retention-days: 30 + + - name: Comment Benchmark Results on PR + if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu' + uses: actions/github-script@v7 + with: + script: | + const run = require('./tests/benchmarks/utils/pr-comment.js'); + await run({ + github, + context, + resultsPath: 'tests/benchmarks/results/mounting.json', + baselinePath: 'tests/benchmarks/mounting/baseline.json', + title: 'Benchmark Results — Collection Mount' + }); diff --git a/.gitignore b/.gitignore index 8dfcfb9e86e..acfe558c205 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,10 @@ skills-lock.json # Playwright /blob-report/ +# Benchmark results (generated at runtime) +tests/benchmarks/results/ +/benchmark-report/ + # Development plan files CLAUDE.md AGENTS.md diff --git a/package.json b/package.json index e4c3c737319..fd9089b9765 100644 --- a/package.json +++ b/package.json @@ -83,6 +83,7 @@ "test:e2e": "playwright test --project=default", "test:e2e:ssl": "playwright test --project=ssl", "test:e2e:auth": "playwright test --project=auth", + "test:benchmark": "playwright test --config=playwright.benchmark.config.ts", "lint": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint", "lint:fix": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint --fix", "prepare": "husky" diff --git a/playwright.benchmark.config.ts b/playwright.benchmark.config.ts new file mode 100644 index 00000000000..69ecd66f44c --- /dev/null +++ b/playwright.benchmark.config.ts @@ -0,0 +1,38 @@ +import { defineConfig } from '@playwright/test'; + +export default defineConfig({ + fullyParallel: false, + forbidOnly: !!process.env.CI, + retries: 0, + workers: 1, + reporter: [ + ['list'], + ['json', { outputFile: 'benchmark-report/results.json' }] + ], + + use: { + trace: 'off' + }, + + projects: [ + { + name: 'benchmarks', + testDir: './tests/benchmarks', + testMatch: '**/*.bench.ts' + } + ], + + webServer: [ + { + command: 'npm run dev:web', + url: 'http://localhost:3000', + reuseExistingServer: !process.env.CI, + timeout: 10 * 60 * 1000 + } + ], + + timeout: 10 * 60 * 1000, + expect: { + timeout: 120_000 + } +}); diff --git a/playwright.config.ts b/playwright.config.ts index f1ec9b2e2a7..eb758f08d5f 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -23,7 +23,8 @@ export default defineConfig({ testDir: './tests', testIgnore: [ 'ssl/**', // custom CA certificate tests require separate server setup and certificate generation - 'auth/**' // auth tests have their own project + 'auth/**', // auth tests have their own project + 'benchmarks/**' ] }, { diff --git a/tests/benchmarks/mounting/baseline.json b/tests/benchmarks/mounting/baseline.json new file mode 100644 index 00000000000..092998a23ce --- /dev/null +++ b/tests/benchmarks/mounting/baseline.json @@ -0,0 +1,46 @@ +{ + "description": "Benchmark baselines for collection mount times. Update by running: node tests/benchmarks/mounting/compare.js --update-baseline", + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 2000, + "p50": 1800 + }, + "bru-200": { + "mean": 5000, + "p50": 4500 + }, + "bru-500": { + "mean": 12000, + "p50": 11000 + }, + "bru-1000": { + "mean": 25000, + "p50": 24000 + }, + "bru-5000": { + "mean": 120000, + "p50": 115000 + }, + "yml-50": { + "mean": 2000, + "p50": 1800 + }, + "yml-200": { + "mean": 5000, + "p50": 4500 + }, + "yml-500": { + "mean": 12000, + "p50": 11000 + }, + "yml-1000": { + "mean": 25000, + "p50": 24000 + }, + "yml-5000": { + "mean": 120000, + "p50": 115000 + } + } +} diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts new file mode 100644 index 00000000000..6767b0a222d --- /dev/null +++ b/tests/benchmarks/mounting/collection-mount.bench.ts @@ -0,0 +1,115 @@ +import { test } from '../../../playwright'; +import { type ElectronApplication, type Page } from '@playwright/test'; +import { openCollection, closeAllCollections } from '../../utils/page'; +import { summarize } from '../utils/stats'; +import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results'; +import { generateCollection, type CollectionFormat } from '../utils/collection-generator'; +import * as path from 'path'; +import * as fs from 'fs'; + +const COLLECTION_SIZES = [50, 200, 500, 1000, 5000]; +const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml']; +const ITERATIONS_PER_SIZE = 3; + +async function measureCollectionMount( + page: Page, + electronApp: ElectronApplication, + collectionDir: string, + collectionName: string +): Promise { + await electronApp.evaluate( + ({ dialog }, { dir }) => { + (dialog as any).__originalShowOpenDialog ??= dialog.showOpenDialog; + dialog.showOpenDialog = async () => ({ canceled: false, filePaths: [dir] }); + }, + { dir: collectionDir } + ); + + await page.evaluate(() => { + (window as any).__benchMountDone = new Promise((resolve) => { + const off = (window as any).ipcRenderer.on('main:collection-loading-state-updated', (val: any) => { + if (!val.isLoading) { + off(); resolve(); + } + }); + }); + }); + + const start = performance.now(); + + await page.getByTestId('collections-header-add-menu').click(); + await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click(); + await page.locator('#sidebar-collection-name').filter({ hasText: collectionName }).waitFor({ state: 'visible' }); + + await openCollection(page, collectionName); + await page.evaluate(() => (window as any).__benchMountDone); + + const elapsed = performance.now() - start; + + await electronApp.evaluate(({ dialog }) => { + if ((dialog as any).__originalShowOpenDialog) { + dialog.showOpenDialog = (dialog as any).__originalShowOpenDialog; + } + }); + + await closeAllCollections(page); + + return elapsed; +} + +function resultKey(format: CollectionFormat, size: number): string { + return `${format}-${size}`; +} + +test.describe('Benchmark: Collection Mount', () => { + const results: Record = {}; + + for (const format of COLLECTION_FORMATS) { + test.describe(`format: ${format}`, () => { + for (const size of COLLECTION_SIZES) { + test(`mount ${format} collection with ${size} requests`, async ({ page, electronApp, createTmpDir }) => { + test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000); + const timings: number[] = []; + + for (let i = 0; i < ITERATIONS_PER_SIZE; i++) { + const collectionName = `bench-${format}-${size}-iter-${i}`; + const collectionDir = await createTmpDir(`bench-${format}-${size}-${i}`); + generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format }); + + const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName); + timings.push(Math.round(elapsed)); + await page.waitForTimeout(500); + } + + const key = resultKey(format, size); + results[key] = timings; + + const stats = summarize(timings); + const r = (v: number) => Math.round(v); + console.log(`[BENCHMARK] ${format} ${size} requests — mean: ${r(stats.mean)}ms, median: ${r(stats.median)}ms, p90: ${r(stats.p90)}ms, stdDev: ${r(stats.stdDev)}ms, raw: [${timings.join(', ')}]`); + + test.info().annotations.push({ + type: 'benchmark', + description: JSON.stringify({ format, size, ...stats, timings }) + }); + }); + } + }); + } + + test.afterAll(async () => { + const resultsDir = path.join(process.cwd(), 'tests', 'benchmarks', 'results'); + fs.mkdirSync(resultsDir, { recursive: true }); + const outputPath = path.join(resultsDir, 'mounting.json'); + const entries: Record = {}; + + for (const [key, timings] of Object.entries(results)) { + if (timings.length === 0) continue; + const [format, sizeStr] = key.split('-'); + entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) }); + } + + writeResults(outputPath, entries); + console.log(`[BENCHMARK] Results written to ${outputPath}`); + }); +}); diff --git a/tests/benchmarks/utils/collection-generator.ts b/tests/benchmarks/utils/collection-generator.ts new file mode 100644 index 00000000000..582076eb4b1 --- /dev/null +++ b/tests/benchmarks/utils/collection-generator.ts @@ -0,0 +1,67 @@ +import { stringifyRequest, stringifyCollection, stringifyFolder } from '@usebruno/filestore'; +import type { BrunoItem } from '@usebruno/schema-types'; +import * as path from 'path'; +import * as fs from 'fs'; + +export type CollectionFormat = 'bru' | 'yml'; + +export function buildRequestItem(seq: number): BrunoItem { + return { + uid: `req-${seq}`, + type: 'http-request', + name: `request-${seq}`, + seq, + request: { + method: 'GET', + url: `https://example.com/api/v1/resource/${seq}`, + headers: [ + { uid: `h1-${seq}`, name: 'Content-Type', value: 'application/json', enabled: true }, + { uid: `h2-${seq}`, name: 'Accept', value: 'application/json', enabled: true } + ], + body: { mode: 'none' }, + auth: { mode: 'none' } + } + } as BrunoItem; +} + +export interface GenerateCollectionOptions { + dir: string; + name: string; + requestCount: number; + format: CollectionFormat; + requestsPerFolder?: number; +} + +export function generateCollection({ + dir, + name, + requestCount, + format, + requestsPerFolder = 10 +}: GenerateCollectionOptions) { + if (format === 'bru') { + fs.writeFileSync(path.join(dir, 'bruno.json'), JSON.stringify({ version: '1', name, type: 'collection' }, null, 2)); + fs.writeFileSync(path.join(dir, 'collection.bru'), stringifyCollection({ name } as any, {}, { format: 'bru' }) || `meta {\n name: ${name}\n}\n`); + } else { + const ymlContent = stringifyCollection({ name } as any, { name, type: 'collection', opencollection: '1.0.0' }, { format: 'yml' }); + fs.writeFileSync(path.join(dir, 'opencollection.yml'), ymlContent); + } + + const ext = format === 'bru' ? 'bru' : 'yml'; + const folderFile = format === 'bru' ? 'folder.bru' : 'folder.yml'; + const folderCount = Math.ceil(requestCount / requestsPerFolder); + + Array.from({ length: folderCount }).forEach((_, f) => { + const folderPath = path.join(dir, `folder-${f}`); + fs.mkdirSync(folderPath, { recursive: true }); + + const folderContent = stringifyFolder({ name: `folder-${f}` }, { format }); + fs.writeFileSync(path.join(folderPath, folderFile), folderContent || `meta {\n name: folder-${f}\n}\n`); + + const count = Math.min(requestsPerFolder, requestCount - f * requestsPerFolder); + Array.from({ length: count }).forEach((_, r) => { + const seq = f * requestsPerFolder + r + 1; + fs.writeFileSync(path.join(folderPath, `request-${seq}.${ext}`), stringifyRequest(buildRequestItem(seq), { format })); + }); + }); +} diff --git a/tests/benchmarks/utils/compare.js b/tests/benchmarks/utils/compare.js new file mode 100644 index 00000000000..9119e3b04d6 --- /dev/null +++ b/tests/benchmarks/utils/compare.js @@ -0,0 +1,129 @@ +#!/usr/bin/env node + +/** + * Generic benchmark comparison: compares results against a baseline and exits + * with code 1 if any metric exceeds the allowed regression threshold. + * + * Usage: + * node tests/benchmarks/utils/compare.js --results --baseline [--update-baseline] + * + * Examples: + * node tests/benchmarks/utils/compare.js \ + * --results benchmark-results.json \ + * --baseline tests/benchmarks/mounting/baseline.json + * + * node tests/benchmarks/utils/compare.js \ + * --results benchmark-results.json \ + * --baseline tests/benchmarks/mounting/baseline.json \ + * --update-baseline + */ + +import { existsSync, readFileSync, writeFileSync } from 'fs'; + +function parseArgs(argv) { + const args = {}; + for (let i = 2; i < argv.length; i++) { + if (argv[i] === '--results') args.results = argv[++i]; + else if (argv[i] === '--baseline') args.baseline = argv[++i]; + else if (argv[i] === '--update-baseline') args.updateBaseline = true; + } + return args; +} + +function loadJSON(filepath) { + if (!existsSync(filepath)) { + console.error(`File not found: ${filepath}`); + process.exit(1); + } + return JSON.parse(readFileSync(filepath, 'utf-8')); +} + +function pctChange(baseline, current) { + if (baseline === 0) return current === 0 ? 0 : Infinity; + return ((current - baseline) / baseline) * 100; +} + +function formatPct(pct) { + const sign = pct > 0 ? '+' : ''; + return `${sign}${pct.toFixed(1)}%`; +} + +const args = parseArgs(process.argv); + +if (!args.results || !args.baseline) { + console.error('Usage: compare.js --results --baseline [--update-baseline]'); + process.exit(1); +} + +const results = loadJSON(args.results); +const baseline = loadJSON(args.baseline); +const threshold = baseline.thresholdPercent || 20; +const resultEntries = results.entries || results; +const baselineEntries = baseline.entries || baseline.collections || {}; + +if (args.updateBaseline) { + const newBaseline = { + thresholdPercent: threshold, + entries: {} + }; + for (const [key, data] of Object.entries(resultEntries)) { + newBaseline.entries[key] = { + mean: data.mean, + p50: data.p50 + }; + } + writeFileSync(args.baseline, JSON.stringify(newBaseline, null, 2) + '\n'); + console.log(`Baseline updated at ${args.baseline}`); + process.exit(0); +} + +let hasRegression = false; +const rows = []; + +console.log(''); +console.log('='.repeat(72)); +console.log(' BENCHMARK COMPARISON'); +console.log('='.repeat(72)); +console.log(` Regression threshold: ${threshold}%`); +console.log(''); + +for (const [key, data] of Object.entries(resultEntries)) { + const base = baselineEntries[key]; + if (!base) { + console.log(` [SKIP] No baseline for ${key}`); + continue; + } + + const meanPct = pctChange(base.mean, data.mean); + const p50Pct = pctChange(base.p50, data.p50); + + const meanStatus = meanPct > threshold ? 'FAIL' : meanPct < -threshold ? 'IMPROVED' : 'OK'; + const p50Status = p50Pct > threshold ? 'FAIL' : p50Pct < -threshold ? 'IMPROVED' : 'OK'; + + if (meanStatus === 'FAIL' || p50Status === 'FAIL') { + hasRegression = true; + } + + rows.push({ + key, + 'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`, + 'mean change': formatPct(meanPct), + 'mean status': meanStatus, + 'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`, + 'p50 change': formatPct(p50Pct), + 'p50 status': p50Status + }); +} + +console.table(rows); +console.log(''); + +if (hasRegression) { + console.error(`FAILED: One or more benchmarks regressed beyond the ${threshold}% threshold.`); + console.error('If this regression is expected, update the baseline:'); + console.error(` node tests/benchmarks/utils/compare.js --results ${args.results} --baseline ${args.baseline} --update-baseline`); + process.exit(1); +} else { + console.log('PASSED: All benchmarks are within the acceptable threshold.'); + process.exit(0); +} diff --git a/tests/benchmarks/utils/pr-comment.js b/tests/benchmarks/utils/pr-comment.js new file mode 100644 index 00000000000..a0c18dff189 --- /dev/null +++ b/tests/benchmarks/utils/pr-comment.js @@ -0,0 +1,82 @@ +#!/usr/bin/env node + +/** + * Generic benchmark PR comment: posts/updates a comparison table on a PR. + * + * Called by CI via actions/github-script: + * const run = require('./tests/benchmarks/utils/pr-comment.js'); + * await run({ github, context, resultsPath, baselinePath, title }); + */ + +const fs = require('fs'); + +function buildCommentBody(results, baseline, title) { + const threshold = baseline.thresholdPercent || 20; + const resultEntries = results.entries || results; + const baselineEntries = baseline.entries || baseline.collections || {}; + const marker = `## ${title}`; + + let body = `${marker}\n\n`; + body += `| Key | Mean (ms) | Baseline Mean | Change | Status |\n`; + body += `|---|---|---|---|---|\n`; + + let hasRegression = false; + + for (const [key, data] of Object.entries(resultEntries)) { + const base = baselineEntries[key]; + if (!base) continue; + + const pct = ((data.mean - base.mean) / base.mean * 100).toFixed(1); + const status = pct > threshold ? 'šŸ”“ REGRESSION' : pct < -threshold ? '🟢 IMPROVED' : 'āœ… OK'; + if (pct > threshold) hasRegression = true; + + body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${pct > 0 ? '+' : ''}${pct}% | ${status} |\n`; + } + + body += `\n> Threshold: ${threshold}% regression allowed\n`; + + if (hasRegression) { + body += '\nāš ļø **Performance regression detected.** If expected, update the baseline.\n'; + } + + return { body, marker }; +} + +async function postOrUpdateComment(github, context, body, marker) { + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number + }); + + const existing = comments.find((c) => c.body.startsWith(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body + }); + } +} + +module.exports = async function run({ github, context, resultsPath, baselinePath, title }) { + if (!fs.existsSync(resultsPath)) { + console.log(`No benchmark results found at ${resultsPath}, skipping comment.`); + return; + } + + const results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8')); + const baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf-8')); + const { body, marker } = buildCommentBody(results, baseline, title); + + await postOrUpdateComment(github, context, body, marker); +}; diff --git a/tests/benchmarks/utils/results.ts b/tests/benchmarks/utils/results.ts new file mode 100644 index 00000000000..6ba6fad0eb3 --- /dev/null +++ b/tests/benchmarks/utils/results.ts @@ -0,0 +1,81 @@ +/** + * Standard read/write helpers for benchmark results and baselines. + * + * Results shape (written by benchmark tests): + * { + * "entries": { + * "": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta } + * } + * } + * + * Baseline shape (committed per suite): + * { + * "thresholdPercent": 20, + * "entries": { + * "": { mean, p50 } + * } + * } + */ + +import { existsSync, readFileSync, writeFileSync } from 'fs'; +import { summarize } from './stats'; + +export interface ResultEntry { + mean: number; + median: number; + p50: number; + p90: number; + p99: number; + stdDev: number; + min: number; + max: number; + count: number; + timings: number[]; + [key: string]: any; +} + +export interface ResultsFile { + entries: Record; +} + +export interface BaselineEntry { + mean: number; + p50: number; +} + +export interface BaselineFile { + thresholdPercent: number; + entries: Record; +} + +export function readResults(filePath: string): ResultsFile { + if (!existsSync(filePath)) { + throw new Error(`Results file not found: ${filePath}`); + } + return JSON.parse(readFileSync(filePath, 'utf-8')); +} + +export function writeResults(filePath: string, entries: Record) { + const data: ResultsFile = { entries }; + writeFileSync(filePath, JSON.stringify(data, null, 2)); +} + +export function buildResultEntry(timings: number[], meta: Record = {}): ResultEntry { + return { ...summarize(timings), timings, ...meta }; +} + +export function readBaseline(filePath: string): BaselineFile { + if (!existsSync(filePath)) { + throw new Error(`Baseline file not found: ${filePath}`); + } + return JSON.parse(readFileSync(filePath, 'utf-8')); +} + +export function writeBaseline(filePath: string, results: ResultsFile, thresholdPercent: number) { + const entries: Record = {}; + for (const [key, data] of Object.entries(results.entries)) { + entries[key] = { mean: data.mean, p50: data.p50 }; + } + const data: BaselineFile = { thresholdPercent, entries }; + writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n'); +} diff --git a/tests/benchmarks/utils/stats.ts b/tests/benchmarks/utils/stats.ts new file mode 100644 index 00000000000..bd17703d926 --- /dev/null +++ b/tests/benchmarks/utils/stats.ts @@ -0,0 +1,111 @@ +/** + * Statistical utility functions for benchmark analysis. + */ + +function assertValid(values: number[]) { + if (values.length === 0) { + throw new Error('Values array must not be empty'); + } + if (!values.every(Number.isFinite)) { + throw new TypeError('All values must be finite numbers'); + } +} + +function sorted(values: number[]): number[] { + return [...values].sort((a, b) => a - b); +} + +export function mean(values: number[]): number { + assertValid(values); + return values.reduce((sum, v) => sum + v, 0) / values.length; +} + +export function median(values: number[]): number { + assertValid(values); + const s = sorted(values); + const mid = Math.floor(s.length / 2); + + return s.length % 2 === 0 + ? (s[mid - 1] + s[mid]) / 2 + : s[mid]; +} + +export function percentile(values: number[], p: number): number { + assertValid(values); + + if (p < 0 || p > 100) { + throw new RangeError(`Percentile must be between 0 and 100, got ${p}`); + } + + const s = sorted(values); + const index = (p / 100) * (s.length - 1); + + const lower = Math.floor(index); + const upper = Math.ceil(index); + + if (lower === upper) return s[lower]; + + const weight = index - lower; + return s[lower] + weight * (s[upper] - s[lower]); +} + +/** + * Population standard deviation (divide by N) + */ +export function populationStdDev(values: number[]): number { + assertValid(values); + const avg = mean(values); + + const variance + = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) / values.length; + + return Math.sqrt(variance); +} + +/** + * Sample standard deviation (divide by N - 1) + */ +export function sampleStdDev(values: number[]): number { + assertValid(values); + + if (values.length < 2) { + throw new Error('Sample standard deviation requires at least 2 values'); + } + + const avg = mean(values); + + const variance + = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) + / (values.length - 1); + + return Math.sqrt(variance); +} + +export function min(values: number[]): number { + assertValid(values); + return values.reduce((a, b) => (a < b ? a : b), Infinity); +} + +export function max(values: number[]): number { + assertValid(values); + return values.reduce((a, b) => (a > b ? a : b), -Infinity); +} + +/** + * Summary for benchmarking (no rounding, keep precision) + */ +export function summarize(values: number[]) { + assertValid(values); + + return { + mean: mean(values), + median: median(values), + p50: percentile(values, 50), + p90: percentile(values, 90), + p99: percentile(values, 99), + min: min(values), + max: max(values), + stdDev: populationStdDev(values), + count: values.length + }; +} From dd4840e2e2b766f77b53658c84f7967686159d83 Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Tue, 5 May 2026 23:51:44 +0530 Subject: [PATCH 2/7] feat: reuse collection across iterations, add update-baseline input, fix review issues - Same collection mounted/unmounted across iterations for cold vs cached comparison - workflow_dispatch has update-baseline boolean input for manual baseline updates - Fix string comparison bug in pr-comment.js (pct was string from toFixed) - Remove dead baseline.collections fallback in compare.js and pr-comment.js - Remove unnecessary waitForTimeout between iterations - Rename pct/pctChange to changePercent/percentChange for readability --- .../tests/run-benchmark-tests/action.yml | 13 +++++++++++ .github/workflows/benchmarks.yml | 6 +++++ .../mounting/collection-mount.bench.ts | 9 ++++---- tests/benchmarks/utils/compare.js | 22 +++++++++---------- tests/benchmarks/utils/pr-comment.js | 11 +++++----- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml index f089fbd9ae0..480245c503f 100644 --- a/.github/actions/tests/run-benchmark-tests/action.yml +++ b/.github/actions/tests/run-benchmark-tests/action.yml @@ -4,6 +4,9 @@ inputs: os: description: 'Operating system (ubuntu, macos, windows)' default: 'ubuntu' + update-baseline: + description: 'Update baseline instead of comparing' + default: 'false' runs: using: 'composite' steps: @@ -17,7 +20,17 @@ runs: shell: bash run: npm run test:benchmark + - name: Update Baseline + if: inputs.update-baseline == 'true' + shell: bash + run: >- + node tests/benchmarks/utils/compare.js + --results tests/benchmarks/results/mounting.json + --baseline tests/benchmarks/mounting/baseline.json + --update-baseline + - name: Compare Against Baseline + if: inputs.update-baseline != 'true' shell: bash run: >- node tests/benchmarks/utils/compare.js diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 521484f109d..a3254f17960 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -1,6 +1,11 @@ name: Benchmarks on: workflow_dispatch: + inputs: + update-baseline: + description: 'Update baseline with current results instead of comparing' + type: boolean + default: false pull_request: branches: [main, 'release/v*'] @@ -47,6 +52,7 @@ jobs: uses: ./.github/actions/tests/run-benchmark-tests with: os: ${{ matrix.os-name }} + update-baseline: ${{ github.event.inputs.update-baseline || 'false' }} - name: Upload Benchmark Results uses: actions/upload-artifact@v6 diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts index 6767b0a222d..dcac927ee8c 100644 --- a/tests/benchmarks/mounting/collection-mount.bench.ts +++ b/tests/benchmarks/mounting/collection-mount.bench.ts @@ -71,14 +71,13 @@ test.describe('Benchmark: Collection Mount', () => { test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000); const timings: number[] = []; - for (let i = 0; i < ITERATIONS_PER_SIZE; i++) { - const collectionName = `bench-${format}-${size}-iter-${i}`; - const collectionDir = await createTmpDir(`bench-${format}-${size}-${i}`); - generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format }); + const collectionName = `bench-${format}-${size}`; + const collectionDir = await createTmpDir(`bench-${format}-${size}`); + generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format }); + for (let i = 0; i < ITERATIONS_PER_SIZE; i++) { const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName); timings.push(Math.round(elapsed)); - await page.waitForTimeout(500); } const key = resultKey(format, size); diff --git a/tests/benchmarks/utils/compare.js b/tests/benchmarks/utils/compare.js index 9119e3b04d6..e1cf4555780 100644 --- a/tests/benchmarks/utils/compare.js +++ b/tests/benchmarks/utils/compare.js @@ -38,14 +38,14 @@ function loadJSON(filepath) { return JSON.parse(readFileSync(filepath, 'utf-8')); } -function pctChange(baseline, current) { +function percentChange(baseline, current) { if (baseline === 0) return current === 0 ? 0 : Infinity; return ((current - baseline) / baseline) * 100; } -function formatPct(pct) { - const sign = pct > 0 ? '+' : ''; - return `${sign}${pct.toFixed(1)}%`; +function formatChange(change) { + const sign = change > 0 ? '+' : ''; + return `${sign}${change.toFixed(1)}%`; } const args = parseArgs(process.argv); @@ -59,7 +59,7 @@ const results = loadJSON(args.results); const baseline = loadJSON(args.baseline); const threshold = baseline.thresholdPercent || 20; const resultEntries = results.entries || results; -const baselineEntries = baseline.entries || baseline.collections || {}; +const baselineEntries = baseline.entries || {}; if (args.updateBaseline) { const newBaseline = { @@ -94,11 +94,11 @@ for (const [key, data] of Object.entries(resultEntries)) { continue; } - const meanPct = pctChange(base.mean, data.mean); - const p50Pct = pctChange(base.p50, data.p50); + const meanChange = percentChange(base.mean, data.mean); + const p50Change = percentChange(base.p50, data.p50); - const meanStatus = meanPct > threshold ? 'FAIL' : meanPct < -threshold ? 'IMPROVED' : 'OK'; - const p50Status = p50Pct > threshold ? 'FAIL' : p50Pct < -threshold ? 'IMPROVED' : 'OK'; + const meanStatus = meanChange > threshold ? 'FAIL' : meanChange < -threshold ? 'IMPROVED' : 'OK'; + const p50Status = p50Change > threshold ? 'FAIL' : p50Change < -threshold ? 'IMPROVED' : 'OK'; if (meanStatus === 'FAIL' || p50Status === 'FAIL') { hasRegression = true; @@ -107,10 +107,10 @@ for (const [key, data] of Object.entries(resultEntries)) { rows.push({ key, 'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`, - 'mean change': formatPct(meanPct), + 'mean change': formatChange(meanChange), 'mean status': meanStatus, 'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`, - 'p50 change': formatPct(p50Pct), + 'p50 change': formatChange(p50Change), 'p50 status': p50Status }); } diff --git a/tests/benchmarks/utils/pr-comment.js b/tests/benchmarks/utils/pr-comment.js index a0c18dff189..63765ec5baf 100644 --- a/tests/benchmarks/utils/pr-comment.js +++ b/tests/benchmarks/utils/pr-comment.js @@ -13,7 +13,7 @@ const fs = require('fs'); function buildCommentBody(results, baseline, title) { const threshold = baseline.thresholdPercent || 20; const resultEntries = results.entries || results; - const baselineEntries = baseline.entries || baseline.collections || {}; + const baselineEntries = baseline.entries || {}; const marker = `## ${title}`; let body = `${marker}\n\n`; @@ -26,11 +26,12 @@ function buildCommentBody(results, baseline, title) { const base = baselineEntries[key]; if (!base) continue; - const pct = ((data.mean - base.mean) / base.mean * 100).toFixed(1); - const status = pct > threshold ? 'šŸ”“ REGRESSION' : pct < -threshold ? '🟢 IMPROVED' : 'āœ… OK'; - if (pct > threshold) hasRegression = true; + const changePercent = (data.mean - base.mean) / base.mean * 100; + const changeStr = changePercent.toFixed(1); + const status = changePercent > threshold ? 'šŸ”“ REGRESSION' : changePercent < -threshold ? '🟢 IMPROVED' : 'āœ… OK'; + if (changePercent > threshold) hasRegression = true; - body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${pct > 0 ? '+' : ''}${pct}% | ${status} |\n`; + body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${changePercent > 0 ? '+' : ''}${changeStr}% | ${status} |\n`; } body += `\n> Threshold: ${threshold}% regression allowed\n`; From 0e3661cae0748d548bad26d4bede6fd8609efaa0 Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Wed, 6 May 2026 01:21:43 +0530 Subject: [PATCH 3/7] feat: reduce max size to 3000, update baselines from CI data, auto-commit on update-baseline - Reduce max collection size from 5000 to 3000 to keep CI runtime reasonable - Update baseline values from actual CI run data (worst case across ubuntu/macos/windows) - Auto-commit updated baseline.json when update-baseline is triggered via workflow_dispatch - Reuse same collection across iterations for cold vs cached comparison - Fix string comparison bug and remove dead code from review feedback - Rename pct variables to changePercent for readability - Remove unnecessary waitForTimeout between iterations --- .github/workflows/benchmarks.yml | 10 ++++- tests/benchmarks/mounting/baseline.json | 43 +++++++++---------- .../mounting/collection-mount.bench.ts | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index a3254f17960..fe896d309e7 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -26,7 +26,7 @@ jobs: - os: windows-latest os-name: windows permissions: - contents: read + contents: write pull-requests: write steps: - uses: actions/checkout@v6 @@ -64,6 +64,14 @@ jobs: benchmark-report/ retention-days: 30 + - name: Commit Updated Baseline + if: github.event.inputs.update-baseline == 'true' && matrix.os-name == 'ubuntu' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add tests/benchmarks/mounting/baseline.json + git diff --staged --quiet || git commit -m "chore: update benchmark baseline" && git push + - name: Comment Benchmark Results on PR if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu' uses: actions/github-script@v7 diff --git a/tests/benchmarks/mounting/baseline.json b/tests/benchmarks/mounting/baseline.json index 092998a23ce..834e8ed7144 100644 --- a/tests/benchmarks/mounting/baseline.json +++ b/tests/benchmarks/mounting/baseline.json @@ -1,46 +1,45 @@ { - "description": "Benchmark baselines for collection mount times. Update by running: node tests/benchmarks/mounting/compare.js --update-baseline", "thresholdPercent": 20, "entries": { "bru-50": { "mean": 2000, - "p50": 1800 + "p50": 900 }, "bru-200": { - "mean": 5000, - "p50": 4500 + "mean": 1500, + "p50": 1500 }, "bru-500": { - "mean": 12000, - "p50": 11000 + "mean": 4000, + "p50": 3800 }, "bru-1000": { - "mean": 25000, - "p50": 24000 + "mean": 10500, + "p50": 10000 }, - "bru-5000": { - "mean": 120000, - "p50": 115000 + "bru-3000": { + "mean": 420000, + "p50": 400000 }, "yml-50": { - "mean": 2000, - "p50": 1800 + "mean": 700, + "p50": 650 }, "yml-200": { - "mean": 5000, - "p50": 4500 + "mean": 1500, + "p50": 1500 }, "yml-500": { - "mean": 12000, - "p50": 11000 + "mean": 4500, + "p50": 4300 }, "yml-1000": { - "mean": 25000, - "p50": 24000 + "mean": 13000, + "p50": 12600 }, - "yml-5000": { - "mean": 120000, - "p50": 115000 + "yml-3000": { + "mean": 180000, + "p50": 180000 } } } diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts index dcac927ee8c..b19008d514c 100644 --- a/tests/benchmarks/mounting/collection-mount.bench.ts +++ b/tests/benchmarks/mounting/collection-mount.bench.ts @@ -7,7 +7,7 @@ import { generateCollection, type CollectionFormat } from '../utils/collection-g import * as path from 'path'; import * as fs from 'fs'; -const COLLECTION_SIZES = [50, 200, 500, 1000, 5000]; +const COLLECTION_SIZES = [50, 200, 500, 1000, 3000]; const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml']; const ITERATIONS_PER_SIZE = 3; From 4325f21266cac9496d2915fc7236eb41897056db Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Wed, 6 May 2026 01:41:38 +0530 Subject: [PATCH 4/7] fix: handle PR comment permission error on fork PRs - Add continue-on-error to PR comment step since GITHUB_TOKEN lacks write access on cross-fork PRs --- .github/workflows/benchmarks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index fe896d309e7..bbe2c940380 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -74,6 +74,7 @@ jobs: - name: Comment Benchmark Results on PR if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu' + continue-on-error: true uses: actions/github-script@v7 with: script: | From f80f21d5da4b32b129bc980b621b94d48faaf224 Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Wed, 6 May 2026 02:16:23 +0530 Subject: [PATCH 5/7] feat: per-OS baselines from CI run data, auto-commit on update-baseline - Split baseline.json into baseline.ubuntu/macos/windows.json with real CI data - Action and workflow dynamically reference baseline per OS - PR comment posted per OS with OS-specific comparison - Auto-commit updated baseline on workflow_dispatch with update-baseline flag --- .../tests/run-benchmark-tests/action.yml | 4 +- .github/workflows/benchmarks.yml | 12 ++--- tests/benchmarks/mounting/baseline.json | 45 ------------------- tests/benchmarks/mounting/baseline.macos.json | 45 +++++++++++++++++++ .../benchmarks/mounting/baseline.ubuntu.json | 45 +++++++++++++++++++ .../benchmarks/mounting/baseline.windows.json | 45 +++++++++++++++++++ 6 files changed, 143 insertions(+), 53 deletions(-) delete mode 100644 tests/benchmarks/mounting/baseline.json create mode 100644 tests/benchmarks/mounting/baseline.macos.json create mode 100644 tests/benchmarks/mounting/baseline.ubuntu.json create mode 100644 tests/benchmarks/mounting/baseline.windows.json diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml index 480245c503f..ece9bd3fe3f 100644 --- a/.github/actions/tests/run-benchmark-tests/action.yml +++ b/.github/actions/tests/run-benchmark-tests/action.yml @@ -26,7 +26,7 @@ runs: run: >- node tests/benchmarks/utils/compare.js --results tests/benchmarks/results/mounting.json - --baseline tests/benchmarks/mounting/baseline.json + --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json --update-baseline - name: Compare Against Baseline @@ -35,4 +35,4 @@ runs: run: >- node tests/benchmarks/utils/compare.js --results tests/benchmarks/results/mounting.json - --baseline tests/benchmarks/mounting/baseline.json + --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index bbe2c940380..304af458483 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -65,15 +65,15 @@ jobs: retention-days: 30 - name: Commit Updated Baseline - if: github.event.inputs.update-baseline == 'true' && matrix.os-name == 'ubuntu' + if: github.event.inputs.update-baseline == 'true' run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add tests/benchmarks/mounting/baseline.json - git diff --staged --quiet || git commit -m "chore: update benchmark baseline" && git push + git add tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json + git diff --staged --quiet || git commit -m "chore: update ${{ matrix.os-name }} benchmark baseline" && git push - name: Comment Benchmark Results on PR - if: github.event_name == 'pull_request' && !cancelled() && matrix.os-name == 'ubuntu' + if: github.event_name == 'pull_request' && !cancelled() continue-on-error: true uses: actions/github-script@v7 with: @@ -83,6 +83,6 @@ jobs: github, context, resultsPath: 'tests/benchmarks/results/mounting.json', - baselinePath: 'tests/benchmarks/mounting/baseline.json', - title: 'Benchmark Results — Collection Mount' + baselinePath: 'tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json', + title: 'Benchmark Results — Collection Mount (${{ matrix.os-name }})' }); diff --git a/tests/benchmarks/mounting/baseline.json b/tests/benchmarks/mounting/baseline.json deleted file mode 100644 index 834e8ed7144..00000000000 --- a/tests/benchmarks/mounting/baseline.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "thresholdPercent": 20, - "entries": { - "bru-50": { - "mean": 2000, - "p50": 900 - }, - "bru-200": { - "mean": 1500, - "p50": 1500 - }, - "bru-500": { - "mean": 4000, - "p50": 3800 - }, - "bru-1000": { - "mean": 10500, - "p50": 10000 - }, - "bru-3000": { - "mean": 420000, - "p50": 400000 - }, - "yml-50": { - "mean": 700, - "p50": 650 - }, - "yml-200": { - "mean": 1500, - "p50": 1500 - }, - "yml-500": { - "mean": 4500, - "p50": 4300 - }, - "yml-1000": { - "mean": 13000, - "p50": 12600 - }, - "yml-3000": { - "mean": 180000, - "p50": 180000 - } - } -} diff --git a/tests/benchmarks/mounting/baseline.macos.json b/tests/benchmarks/mounting/baseline.macos.json new file mode 100644 index 00000000000..d7de548c4bc --- /dev/null +++ b/tests/benchmarks/mounting/baseline.macos.json @@ -0,0 +1,45 @@ +{ + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 2200, + "p50": 1000 + }, + "bru-200": { + "mean": 1300, + "p50": 1100 + }, + "bru-500": { + "mean": 3600, + "p50": 3500 + }, + "bru-1000": { + "mean": 9100, + "p50": 9000 + }, + "bru-3000": { + "mean": 185000, + "p50": 183000 + }, + "yml-50": { + "mean": 700, + "p50": 650 + }, + "yml-200": { + "mean": 1400, + "p50": 1250 + }, + "yml-500": { + "mean": 3900, + "p50": 3700 + }, + "yml-1000": { + "mean": 11700, + "p50": 11900 + }, + "yml-3000": { + "mean": 85000, + "p50": 80000 + } + } +} diff --git a/tests/benchmarks/mounting/baseline.ubuntu.json b/tests/benchmarks/mounting/baseline.ubuntu.json new file mode 100644 index 00000000000..0d4ff8c6806 --- /dev/null +++ b/tests/benchmarks/mounting/baseline.ubuntu.json @@ -0,0 +1,45 @@ +{ + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 1500, + "p50": 700 + }, + "bru-200": { + "mean": 1200, + "p50": 1150 + }, + "bru-500": { + "mean": 2900, + "p50": 2900 + }, + "bru-1000": { + "mean": 8000, + "p50": 8000 + }, + "bru-3000": { + "mean": 175000, + "p50": 170000 + }, + "yml-50": { + "mean": 600, + "p50": 560 + }, + "yml-200": { + "mean": 1200, + "p50": 1200 + }, + "yml-500": { + "mean": 3500, + "p50": 3400 + }, + "yml-1000": { + "mean": 10700, + "p50": 10650 + }, + "yml-3000": { + "mean": 85000, + "p50": 80000 + } + } +} diff --git a/tests/benchmarks/mounting/baseline.windows.json b/tests/benchmarks/mounting/baseline.windows.json new file mode 100644 index 00000000000..f3be08bb31d --- /dev/null +++ b/tests/benchmarks/mounting/baseline.windows.json @@ -0,0 +1,45 @@ +{ + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 2700, + "p50": 800 + }, + "bru-200": { + "mean": 1500, + "p50": 1400 + }, + "bru-500": { + "mean": 3500, + "p50": 3500 + }, + "bru-1000": { + "mean": 9500, + "p50": 9400 + }, + "bru-3000": { + "mean": 195000, + "p50": 190000 + }, + "yml-50": { + "mean": 600, + "p50": 570 + }, + "yml-200": { + "mean": 1350, + "p50": 1300 + }, + "yml-500": { + "mean": 3800, + "p50": 3700 + }, + "yml-1000": { + "mean": 11000, + "p50": 11000 + }, + "yml-3000": { + "mean": 90000, + "p50": 88000 + } + } +} From 791d5288f9c5a186bcae27ee99fb4ad3feeb245a Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Wed, 6 May 2026 15:33:13 +0530 Subject: [PATCH 6/7] feat: include suite metadata (name, unit, direction) in benchmark results - writeResults now accepts SuiteMeta with name, unit, and direction - Results JSON includes suite field for the visualization dashboard to ingest - Mounting benchmark outputs unit: ms, direction: smaller --- .../benchmarks/mounting/collection-mount.bench.ts | 2 +- tests/benchmarks/utils/results.ts | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts index b19008d514c..cc17c01b921 100644 --- a/tests/benchmarks/mounting/collection-mount.bench.ts +++ b/tests/benchmarks/mounting/collection-mount.bench.ts @@ -108,7 +108,7 @@ test.describe('Benchmark: Collection Mount', () => { entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) }); } - writeResults(outputPath, entries); + writeResults(outputPath, { name: 'Collection Mount', unit: 'ms', direction: 'smaller' }, entries); console.log(`[BENCHMARK] Results written to ${outputPath}`); }); }); diff --git a/tests/benchmarks/utils/results.ts b/tests/benchmarks/utils/results.ts index 6ba6fad0eb3..8bf018a0187 100644 --- a/tests/benchmarks/utils/results.ts +++ b/tests/benchmarks/utils/results.ts @@ -3,6 +3,7 @@ * * Results shape (written by benchmark tests): * { + * "suite": { "name": "...", "unit": "ms", "direction": "smaller" }, * "entries": { * "": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta } * } @@ -20,6 +21,15 @@ import { existsSync, readFileSync, writeFileSync } from 'fs'; import { summarize } from './stats'; +export type Direction = 'smaller' | 'bigger'; +export type Unit = 'ms' | 's' | 'ops/s' | 'bytes' | '%' | 'count'; + +export interface SuiteMeta { + name: string; + unit: Unit; + direction: Direction; +} + export interface ResultEntry { mean: number; median: number; @@ -35,6 +45,7 @@ export interface ResultEntry { } export interface ResultsFile { + suite: SuiteMeta; entries: Record; } @@ -55,8 +66,8 @@ export function readResults(filePath: string): ResultsFile { return JSON.parse(readFileSync(filePath, 'utf-8')); } -export function writeResults(filePath: string, entries: Record) { - const data: ResultsFile = { entries }; +export function writeResults(filePath: string, suite: SuiteMeta, entries: Record) { + const data: ResultsFile = { suite, entries }; writeFileSync(filePath, JSON.stringify(data, null, 2)); } From 29668dab4d702984a080b6aaec088af68c9a458e Mon Sep 17 00:00:00 2001 From: Chirag Chandrashekhar Date: Mon, 11 May 2026 15:45:37 +0530 Subject: [PATCH 7/7] feat: extract timing helpers, capture raw float ms in mount benchmark --- .../mounting/collection-mount.bench.ts | 7 +++--- tests/benchmarks/utils/timing.ts | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 tests/benchmarks/utils/timing.ts diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts index cc17c01b921..82e3046ea18 100644 --- a/tests/benchmarks/mounting/collection-mount.bench.ts +++ b/tests/benchmarks/mounting/collection-mount.bench.ts @@ -3,6 +3,7 @@ import { type ElectronApplication, type Page } from '@playwright/test'; import { openCollection, closeAllCollections } from '../../utils/page'; import { summarize } from '../utils/stats'; import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results'; +import { startTimer } from '../utils/timing'; import { generateCollection, type CollectionFormat } from '../utils/collection-generator'; import * as path from 'path'; import * as fs from 'fs'; @@ -35,7 +36,7 @@ async function measureCollectionMount( }); }); - const start = performance.now(); + const timer = startTimer(); await page.getByTestId('collections-header-add-menu').click(); await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click(); @@ -44,7 +45,7 @@ async function measureCollectionMount( await openCollection(page, collectionName); await page.evaluate(() => (window as any).__benchMountDone); - const elapsed = performance.now() - start; + const elapsed = timer.elapsed(); await electronApp.evaluate(({ dialog }) => { if ((dialog as any).__originalShowOpenDialog) { @@ -77,7 +78,7 @@ test.describe('Benchmark: Collection Mount', () => { for (let i = 0; i < ITERATIONS_PER_SIZE; i++) { const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName); - timings.push(Math.round(elapsed)); + timings.push(elapsed); } const key = resultKey(format, size); diff --git a/tests/benchmarks/utils/timing.ts b/tests/benchmarks/utils/timing.ts new file mode 100644 index 00000000000..984d1dc3d8b --- /dev/null +++ b/tests/benchmarks/utils/timing.ts @@ -0,0 +1,25 @@ +/** + * Timing utilities for benchmarks. + * + * Capture: const t = startTimer(); ...do work...; const ms = t.elapsed(); + * Convert: convertDuration(1500, 'ms', 's') === 1.5 + */ + +export type DurationUnit = 'ns' | 'us' | 'ms' | 's'; + +const DURATION_TO_MS: Record = { + ns: 1e-6, + us: 1e-3, + ms: 1, + s: 1000 +}; + +export function startTimer() { + const start = performance.now(); + return { elapsed: () => performance.now() - start }; +} + +export function convertDuration(value: number, from: DurationUnit, to: DurationUnit): number { + if (from === to) return value; + return (value * DURATION_TO_MS[from]) / DURATION_TO_MS[to]; +}