feat: add benchmark framework for collection mount performance (#7915)

2026-06-11 09:51:30 +00:00 · 2026-05-18 12:19:23 +05:30
parent b79349b052
commit 736c050dae
16 changed files with 928 additions and 1 deletions
--- a/.github/actions/tests/run-benchmark-tests/action.yml
+++ b/.github/actions/tests/run-benchmark-tests/action.yml
@@ -0,0 +1,38 @@
+name: 'Run Benchmark Tests'
+description: 'Run Playwright benchmark tests and compare against baseline'
+inputs:
+  os:
+    description: 'Operating system (ubuntu, macos, windows)'
+    default: 'ubuntu'
+  update-baseline:
+    description: 'Update baseline instead of comparing'
+    default: 'false'
+runs:
+  using: 'composite'
+  steps:
+    - name: Run Benchmark Tests (Ubuntu)
+      if: inputs.os == 'ubuntu'
+      shell: bash
+      run: xvfb-run npm run test:benchmark
+
+    - name: Run Benchmark Tests
+      if: inputs.os != 'ubuntu'
+      shell: bash
+      run: npm run test:benchmark
+
+    - name: Update Baseline
+      if: inputs.update-baseline == 'true'
+      shell: bash
+      run: >-
+        node tests/benchmarks/utils/compare.js
+        --results tests/benchmarks/results/mounting.json
+        --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json
+        --update-baseline
+
+    - name: Compare Against Baseline
+      if: inputs.update-baseline != 'true'
+      shell: bash
+      run: >-
+        node tests/benchmarks/utils/compare.js
+        --results tests/benchmarks/results/mounting.json
+        --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,88 @@
+name: Benchmarks
+on:
+  workflow_dispatch:
+    inputs:
+      update-baseline:
+        description: 'Update baseline with current results instead of comparing'
+        type: boolean
+        default: false
+  pull_request:
+    branches: [main, 'release/v*']
+
+jobs:
+  benchmark:
+    name: Performance Benchmarks (${{ matrix.os }})
+    timeout-minutes: 60
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-24.04, macos-latest, windows-latest]
+        include:
+          - os: ubuntu-24.04
+            os-name: ubuntu
+          - os: macos-latest
+            os-name: macos
+          - os: windows-latest
+            os-name: windows
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install System Dependencies (Ubuntu)
+        if: matrix.os-name == 'ubuntu'
+        run: |
+          sudo apt-get update
+          sudo apt-get --no-install-recommends install -y \
+            libglib2.0-0 libnss3 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgtk-3-0 libasound2t64 \
+            xvfb
+
+      - name: Setup Node Dependencies
+        uses: ./.github/actions/common/setup-node-deps
+
+      - name: Configure Chrome Sandbox
+        if: matrix.os-name == 'ubuntu'
+        run: |
+          sudo chown root node_modules/electron/dist/chrome-sandbox
+          sudo chmod 4755 node_modules/electron/dist/chrome-sandbox
+
+      - name: Run Benchmark Tests
+        uses: ./.github/actions/tests/run-benchmark-tests
+        with:
+          os: ${{ matrix.os-name }}
+          update-baseline: ${{ github.event.inputs.update-baseline || 'false' }}
+
+      - name: Upload Benchmark Results
+        uses: actions/upload-artifact@v6
+        if: ${{ !cancelled() }}
+        with:
+          name: benchmark-results-${{ matrix.os-name }}
+          path: |
+            tests/benchmarks/results/
+            benchmark-report/
+          retention-days: 30
+
+      - name: Commit Updated Baseline
+        if: github.event.inputs.update-baseline == 'true'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json
+          git diff --staged --quiet || git commit -m "chore: update ${{ matrix.os-name }} benchmark baseline" && git push
+
+      - name: Comment Benchmark Results on PR
+        if: github.event_name == 'pull_request' && !cancelled()
+        continue-on-error: true
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const run = require('./tests/benchmarks/utils/pr-comment.js');
+            await run({
+              github,
+              context,
+              resultsPath: 'tests/benchmarks/results/mounting.json',
+              baselinePath: 'tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json',
+              title: 'Benchmark Results — Collection Mount (${{ matrix.os-name }})'
+            });
--- a/.gitignore
+++ b/.gitignore
@@ -58,6 +58,10 @@ skills-lock.json
 # Playwright
 /blob-report/

+# Benchmark results (generated at runtime)
+tests/benchmarks/results/
+/benchmark-report/
+
 # Development plan files
 CLAUDE.md
 AGENTS.md
--- a/package.json
+++ b/package.json
@@ -83,6 +83,7 @@
    "test:e2e": "playwright test --project=default",
    "test:e2e:ssl": "playwright test --project=ssl",
    "test:e2e:auth": "playwright test --project=auth",
+    "test:benchmark": "playwright test --config=playwright.benchmark.config.ts",
    "lint": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint",
    "lint:fix": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint --fix",
    "prepare": "husky"
--- a/playwright.benchmark.config.ts
+++ b/playwright.benchmark.config.ts
@@ -0,0 +1,38 @@
+import { defineConfig } from '@playwright/test';
+
+export default defineConfig({
+  fullyParallel: false,
+  forbidOnly: !!process.env.CI,
+  retries: 0,
+  workers: 1,
+  reporter: [
+    ['list'],
+    ['json', { outputFile: 'benchmark-report/results.json' }]
+  ],
+
+  use: {
+    trace: 'off'
+  },
+
+  projects: [
+    {
+      name: 'benchmarks',
+      testDir: './tests/benchmarks',
+      testMatch: '**/*.bench.ts'
+    }
+  ],
+
+  webServer: [
+    {
+      command: 'npm run dev:web',
+      url: 'http://localhost:3000',
+      reuseExistingServer: !process.env.CI,
+      timeout: 10 * 60 * 1000
+    }
+  ],
+
+  timeout: 10 * 60 * 1000,
+  expect: {
+    timeout: 120_000
+  }
+});
--- a/playwright.config.ts
+++ b/playwright.config.ts
@@ -23,7 +23,8 @@ export default defineConfig({
      testDir: './tests',
      testIgnore: [
        'ssl/**', // custom CA certificate tests require separate server setup and certificate generation
-        'auth/**' // auth tests have their own project
+        'auth/**', // auth tests have their own project
+        'benchmarks/**'  
      ]
    },
    {
--- a/tests/benchmarks/mounting/baseline.macos.json
+++ b/tests/benchmarks/mounting/baseline.macos.json
@@ -0,0 +1,45 @@
+{
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 2200,
+      "p50": 1000
+    },
+    "bru-200": {
+      "mean": 1300,
+      "p50": 1100
+    },
+    "bru-500": {
+      "mean": 3600,
+      "p50": 3500
+    },
+    "bru-1000": {
+      "mean": 9100,
+      "p50": 9000
+    },
+    "bru-3000": {
+      "mean": 185000,
+      "p50": 183000
+    },
+    "yml-50": {
+      "mean": 700,
+      "p50": 650
+    },
+    "yml-200": {
+      "mean": 1400,
+      "p50": 1250
+    },
+    "yml-500": {
+      "mean": 3900,
+      "p50": 3700
+    },
+    "yml-1000": {
+      "mean": 11700,
+      "p50": 11900
+    },
+    "yml-3000": {
+      "mean": 85000,
+      "p50": 80000
+    }
+  }
+}
--- a/tests/benchmarks/mounting/baseline.ubuntu.json
+++ b/tests/benchmarks/mounting/baseline.ubuntu.json
@@ -0,0 +1,45 @@
+{
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 1500,
+      "p50": 700
+    },
+    "bru-200": {
+      "mean": 1200,
+      "p50": 1150
+    },
+    "bru-500": {
+      "mean": 2900,
+      "p50": 2900
+    },
+    "bru-1000": {
+      "mean": 8000,
+      "p50": 8000
+    },
+    "bru-3000": {
+      "mean": 175000,
+      "p50": 170000
+    },
+    "yml-50": {
+      "mean": 600,
+      "p50": 560
+    },
+    "yml-200": {
+      "mean": 1200,
+      "p50": 1200
+    },
+    "yml-500": {
+      "mean": 3500,
+      "p50": 3400
+    },
+    "yml-1000": {
+      "mean": 10700,
+      "p50": 10650
+    },
+    "yml-3000": {
+      "mean": 85000,
+      "p50": 80000
+    }
+  }
+}
--- a/tests/benchmarks/mounting/baseline.windows.json
+++ b/tests/benchmarks/mounting/baseline.windows.json
@@ -0,0 +1,45 @@
+{
+  "thresholdPercent": 20,
+  "entries": {
+    "bru-50": {
+      "mean": 2700,
+      "p50": 800
+    },
+    "bru-200": {
+      "mean": 1500,
+      "p50": 1400
+    },
+    "bru-500": {
+      "mean": 3500,
+      "p50": 3500
+    },
+    "bru-1000": {
+      "mean": 9500,
+      "p50": 9400
+    },
+    "bru-3000": {
+      "mean": 195000,
+      "p50": 190000
+    },
+    "yml-50": {
+      "mean": 600,
+      "p50": 570
+    },
+    "yml-200": {
+      "mean": 1350,
+      "p50": 1300
+    },
+    "yml-500": {
+      "mean": 3800,
+      "p50": 3700
+    },
+    "yml-1000": {
+      "mean": 11000,
+      "p50": 11000
+    },
+    "yml-3000": {
+      "mean": 90000,
+      "p50": 88000
+    }
+  }
+}
--- a/tests/benchmarks/mounting/collection-mount.bench.ts
+++ b/tests/benchmarks/mounting/collection-mount.bench.ts
@@ -0,0 +1,115 @@
+import { test } from '../../../playwright';
+import { type ElectronApplication, type Page } from '@playwright/test';
+import { openCollection, closeAllCollections } from '../../utils/page';
+import { summarize } from '../utils/stats';
+import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results';
+import { startTimer } from '../utils/timing';
+import { generateCollection, type CollectionFormat } from '../utils/collection-generator';
+import * as path from 'path';
+import * as fs from 'fs';
+
+const COLLECTION_SIZES = [50, 200, 500, 1000, 3000];
+const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml'];
+const ITERATIONS_PER_SIZE = 3;
+
+async function measureCollectionMount(
+  page: Page,
+  electronApp: ElectronApplication,
+  collectionDir: string,
+  collectionName: string
+): Promise<number> {
+  await electronApp.evaluate(
+    ({ dialog }, { dir }) => {
+      (dialog as any).__originalShowOpenDialog ??= dialog.showOpenDialog;
+      dialog.showOpenDialog = async () => ({ canceled: false, filePaths: [dir] });
+    },
+    { dir: collectionDir }
+  );
+
+  await page.evaluate(() => {
+    (window as any).__benchMountDone = new Promise<void>((resolve) => {
+      const off = (window as any).ipcRenderer.on('main:collection-loading-state-updated', (val: any) => {
+        if (!val.isLoading) {
+          off(); resolve();
+        }
+      });
+    });
+  });
+
+  const timer = startTimer();
+
+  await page.getByTestId('collections-header-add-menu').click();
+  await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click();
+  await page.locator('#sidebar-collection-name').filter({ hasText: collectionName }).waitFor({ state: 'visible' });
+
+  await openCollection(page, collectionName);
+  await page.evaluate(() => (window as any).__benchMountDone);
+
+  const elapsed = timer.elapsed();
+
+  await electronApp.evaluate(({ dialog }) => {
+    if ((dialog as any).__originalShowOpenDialog) {
+      dialog.showOpenDialog = (dialog as any).__originalShowOpenDialog;
+    }
+  });
+
+  await closeAllCollections(page);
+
+  return elapsed;
+}
+
+function resultKey(format: CollectionFormat, size: number): string {
+  return `${format}-${size}`;
+}
+
+test.describe('Benchmark: Collection Mount', () => {
+  const results: Record<string, number[]> = {};
+
+  for (const format of COLLECTION_FORMATS) {
+    test.describe(`format: ${format}`, () => {
+      for (const size of COLLECTION_SIZES) {
+        test(`mount ${format} collection with ${size} requests`, async ({ page, electronApp, createTmpDir }) => {
+          test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000);
+          const timings: number[] = [];
+
+          const collectionName = `bench-${format}-${size}`;
+          const collectionDir = await createTmpDir(`bench-${format}-${size}`);
+          generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format });
+
+          for (let i = 0; i < ITERATIONS_PER_SIZE; i++) {
+            const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName);
+            timings.push(elapsed);
+          }
+
+          const key = resultKey(format, size);
+          results[key] = timings;
+
+          const stats = summarize(timings);
+          const r = (v: number) => Math.round(v);
+          console.log(`[BENCHMARK] ${format} ${size} requests — mean: ${r(stats.mean)}ms, median: ${r(stats.median)}ms, p90: ${r(stats.p90)}ms, stdDev: ${r(stats.stdDev)}ms, raw: [${timings.join(', ')}]`);
+
+          test.info().annotations.push({
+            type: 'benchmark',
+            description: JSON.stringify({ format, size, ...stats, timings })
+          });
+        });
+      }
+    });
+  }
+
+  test.afterAll(async () => {
+    const resultsDir = path.join(process.cwd(), 'tests', 'benchmarks', 'results');
+    fs.mkdirSync(resultsDir, { recursive: true });
+    const outputPath = path.join(resultsDir, 'mounting.json');
+    const entries: Record<string, ResultEntry> = {};
+
+    for (const [key, timings] of Object.entries(results)) {
+      if (timings.length === 0) continue;
+      const [format, sizeStr] = key.split('-');
+      entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) });
+    }
+
+    writeResults(outputPath, { name: 'Collection Mount', unit: 'ms', direction: 'smaller' }, entries);
+    console.log(`[BENCHMARK] Results written to ${outputPath}`);
+  });
+});
--- a/tests/benchmarks/utils/collection-generator.ts
+++ b/tests/benchmarks/utils/collection-generator.ts
@@ -0,0 +1,67 @@
+import { stringifyRequest, stringifyCollection, stringifyFolder } from '@usebruno/filestore';
+import type { BrunoItem } from '@usebruno/schema-types';
+import * as path from 'path';
+import * as fs from 'fs';
+
+export type CollectionFormat = 'bru' | 'yml';
+
+export function buildRequestItem(seq: number): BrunoItem {
+  return {
+    uid: `req-${seq}`,
+    type: 'http-request',
+    name: `request-${seq}`,
+    seq,
+    request: {
+      method: 'GET',
+      url: `https://example.com/api/v1/resource/${seq}`,
+      headers: [
+        { uid: `h1-${seq}`, name: 'Content-Type', value: 'application/json', enabled: true },
+        { uid: `h2-${seq}`, name: 'Accept', value: 'application/json', enabled: true }
+      ],
+      body: { mode: 'none' },
+      auth: { mode: 'none' }
+    }
+  } as BrunoItem;
+}
+
+export interface GenerateCollectionOptions {
+  dir: string;
+  name: string;
+  requestCount: number;
+  format: CollectionFormat;
+  requestsPerFolder?: number;
+}
+
+export function generateCollection({
+  dir,
+  name,
+  requestCount,
+  format,
+  requestsPerFolder = 10
+}: GenerateCollectionOptions) {
+  if (format === 'bru') {
+    fs.writeFileSync(path.join(dir, 'bruno.json'), JSON.stringify({ version: '1', name, type: 'collection' }, null, 2));
+    fs.writeFileSync(path.join(dir, 'collection.bru'), stringifyCollection({ name } as any, {}, { format: 'bru' }) || `meta {\n  name: ${name}\n}\n`);
+  } else {
+    const ymlContent = stringifyCollection({ name } as any, { name, type: 'collection', opencollection: '1.0.0' }, { format: 'yml' });
+    fs.writeFileSync(path.join(dir, 'opencollection.yml'), ymlContent);
+  }
+
+  const ext = format === 'bru' ? 'bru' : 'yml';
+  const folderFile = format === 'bru' ? 'folder.bru' : 'folder.yml';
+  const folderCount = Math.ceil(requestCount / requestsPerFolder);
+
+  Array.from({ length: folderCount }).forEach((_, f) => {
+    const folderPath = path.join(dir, `folder-${f}`);
+    fs.mkdirSync(folderPath, { recursive: true });
+
+    const folderContent = stringifyFolder({ name: `folder-${f}` }, { format });
+    fs.writeFileSync(path.join(folderPath, folderFile), folderContent || `meta {\n  name: folder-${f}\n}\n`);
+
+    const count = Math.min(requestsPerFolder, requestCount - f * requestsPerFolder);
+    Array.from({ length: count }).forEach((_, r) => {
+      const seq = f * requestsPerFolder + r + 1;
+      fs.writeFileSync(path.join(folderPath, `request-${seq}.${ext}`), stringifyRequest(buildRequestItem(seq), { format }));
+    });
+  });
+}
--- a/tests/benchmarks/utils/compare.js
+++ b/tests/benchmarks/utils/compare.js
@@ -0,0 +1,129 @@
+#!/usr/bin/env node
+
+/**
+ * Generic benchmark comparison: compares results against a baseline and exits
+ * with code 1 if any metric exceeds the allowed regression threshold.
+ *
+ * Usage:
+ *   node tests/benchmarks/utils/compare.js --results <path> --baseline <path> [--update-baseline]
+ *
+ * Examples:
+ *   node tests/benchmarks/utils/compare.js \
+ *     --results benchmark-results.json \
+ *     --baseline tests/benchmarks/mounting/baseline.json
+ *
+ *   node tests/benchmarks/utils/compare.js \
+ *     --results benchmark-results.json \
+ *     --baseline tests/benchmarks/mounting/baseline.json \
+ *     --update-baseline
+ */
+
+import { existsSync, readFileSync, writeFileSync } from 'fs';
+
+function parseArgs(argv) {
+  const args = {};
+  for (let i = 2; i < argv.length; i++) {
+    if (argv[i] === '--results') args.results = argv[++i];
+    else if (argv[i] === '--baseline') args.baseline = argv[++i];
+    else if (argv[i] === '--update-baseline') args.updateBaseline = true;
+  }
+  return args;
+}
+
+function loadJSON(filepath) {
+  if (!existsSync(filepath)) {
+    console.error(`File not found: ${filepath}`);
+    process.exit(1);
+  }
+  return JSON.parse(readFileSync(filepath, 'utf-8'));
+}
+
+function percentChange(baseline, current) {
+  if (baseline === 0) return current === 0 ? 0 : Infinity;
+  return ((current - baseline) / baseline) * 100;
+}
+
+function formatChange(change) {
+  const sign = change > 0 ? '+' : '';
+  return `${sign}${change.toFixed(1)}%`;
+}
+
+const args = parseArgs(process.argv);
+
+if (!args.results || !args.baseline) {
+  console.error('Usage: compare.js --results <path> --baseline <path> [--update-baseline]');
+  process.exit(1);
+}
+
+const results = loadJSON(args.results);
+const baseline = loadJSON(args.baseline);
+const threshold = baseline.thresholdPercent || 20;
+const resultEntries = results.entries || results;
+const baselineEntries = baseline.entries || {};
+
+if (args.updateBaseline) {
+  const newBaseline = {
+    thresholdPercent: threshold,
+    entries: {}
+  };
+  for (const [key, data] of Object.entries(resultEntries)) {
+    newBaseline.entries[key] = {
+      mean: data.mean,
+      p50: data.p50
+    };
+  }
+  writeFileSync(args.baseline, JSON.stringify(newBaseline, null, 2) + '\n');
+  console.log(`Baseline updated at ${args.baseline}`);
+  process.exit(0);
+}
+
+let hasRegression = false;
+const rows = [];
+
+console.log('');
+console.log('='.repeat(72));
+console.log(' BENCHMARK COMPARISON');
+console.log('='.repeat(72));
+console.log(`  Regression threshold: ${threshold}%`);
+console.log('');
+
+for (const [key, data] of Object.entries(resultEntries)) {
+  const base = baselineEntries[key];
+  if (!base) {
+    console.log(`  [SKIP] No baseline for ${key}`);
+    continue;
+  }
+
+  const meanChange = percentChange(base.mean, data.mean);
+  const p50Change = percentChange(base.p50, data.p50);
+
+  const meanStatus = meanChange > threshold ? 'FAIL' : meanChange < -threshold ? 'IMPROVED' : 'OK';
+  const p50Status = p50Change > threshold ? 'FAIL' : p50Change < -threshold ? 'IMPROVED' : 'OK';
+
+  if (meanStatus === 'FAIL' || p50Status === 'FAIL') {
+    hasRegression = true;
+  }
+
+  rows.push({
+    key,
+    'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`,
+    'mean change': formatChange(meanChange),
+    'mean status': meanStatus,
+    'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`,
+    'p50 change': formatChange(p50Change),
+    'p50 status': p50Status
+  });
+}
+
+console.table(rows);
+console.log('');
+
+if (hasRegression) {
+  console.error(`FAILED: One or more benchmarks regressed beyond the ${threshold}% threshold.`);
+  console.error('If this regression is expected, update the baseline:');
+  console.error(`  node tests/benchmarks/utils/compare.js --results ${args.results} --baseline ${args.baseline} --update-baseline`);
+  process.exit(1);
+} else {
+  console.log('PASSED: All benchmarks are within the acceptable threshold.');
+  process.exit(0);
+}
--- a/tests/benchmarks/utils/pr-comment.js
+++ b/tests/benchmarks/utils/pr-comment.js
@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+
+/**
+ * Generic benchmark PR comment: posts/updates a comparison table on a PR.
+ *
+ * Called by CI via actions/github-script:
+ *   const run = require('./tests/benchmarks/utils/pr-comment.js');
+ *   await run({ github, context, resultsPath, baselinePath, title });
+ */
+
+const fs = require('fs');
+
+function buildCommentBody(results, baseline, title) {
+  const threshold = baseline.thresholdPercent || 20;
+  const resultEntries = results.entries || results;
+  const baselineEntries = baseline.entries || {};
+  const marker = `## ${title}`;
+
+  let body = `${marker}\n\n`;
+  body += `| Key | Mean (ms) | Baseline Mean | Change | Status |\n`;
+  body += `|---|---|---|---|---|\n`;
+
+  let hasRegression = false;
+
+  for (const [key, data] of Object.entries(resultEntries)) {
+    const base = baselineEntries[key];
+    if (!base) continue;
+
+    const changePercent = (data.mean - base.mean) / base.mean * 100;
+    const changeStr = changePercent.toFixed(1);
+    const status = changePercent > threshold ? '🔴 REGRESSION' : changePercent < -threshold ? '🟢 IMPROVED' : '✅ OK';
+    if (changePercent > threshold) hasRegression = true;
+
+    body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${changePercent > 0 ? '+' : ''}${changeStr}% | ${status} |\n`;
+  }
+
+  body += `\n> Threshold: ${threshold}% regression allowed\n`;
+
+  if (hasRegression) {
+    body += '\n⚠️ **Performance regression detected.** If expected, update the baseline.\n';
+  }
+
+  return { body, marker };
+}
+
+async function postOrUpdateComment(github, context, body, marker) {
+  const { data: comments } = await github.rest.issues.listComments({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    issue_number: context.issue.number
+  });
+
+  const existing = comments.find((c) => c.body.startsWith(marker));
+
+  if (existing) {
+    await github.rest.issues.updateComment({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      comment_id: existing.id,
+      body
+    });
+  } else {
+    await github.rest.issues.createComment({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      issue_number: context.issue.number,
+      body
+    });
+  }
+}
+
+module.exports = async function run({ github, context, resultsPath, baselinePath, title }) {
+  if (!fs.existsSync(resultsPath)) {
+    console.log(`No benchmark results found at ${resultsPath}, skipping comment.`);
+    return;
+  }
+
+  const results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
+  const baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf-8'));
+  const { body, marker } = buildCommentBody(results, baseline, title);
+
+  await postOrUpdateComment(github, context, body, marker);
+};
--- a/tests/benchmarks/utils/results.ts
+++ b/tests/benchmarks/utils/results.ts
@@ -0,0 +1,92 @@
+/**
+ * Standard read/write helpers for benchmark results and baselines.
+ *
+ * Results shape (written by benchmark tests):
+ * {
+ *   "suite": { "name": "...", "unit": "ms", "direction": "smaller" },
+ *   "entries": {
+ *     "<key>": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta }
+ *   }
+ * }
+ *
+ * Baseline shape (committed per suite):
+ * {
+ *   "thresholdPercent": 20,
+ *   "entries": {
+ *     "<key>": { mean, p50 }
+ *   }
+ * }
+ */
+
+import { existsSync, readFileSync, writeFileSync } from 'fs';
+import { summarize } from './stats';
+
+export type Direction = 'smaller' | 'bigger';
+export type Unit = 'ms' | 's' | 'ops/s' | 'bytes' | '%' | 'count';
+
+export interface SuiteMeta {
+  name: string;
+  unit: Unit;
+  direction: Direction;
+}
+
+export interface ResultEntry {
+  mean: number;
+  median: number;
+  p50: number;
+  p90: number;
+  p99: number;
+  stdDev: number;
+  min: number;
+  max: number;
+  count: number;
+  timings: number[];
+  [key: string]: any;
+}
+
+export interface ResultsFile {
+  suite: SuiteMeta;
+  entries: Record<string, ResultEntry>;
+}
+
+export interface BaselineEntry {
+  mean: number;
+  p50: number;
+}
+
+export interface BaselineFile {
+  thresholdPercent: number;
+  entries: Record<string, BaselineEntry>;
+}
+
+export function readResults(filePath: string): ResultsFile {
+  if (!existsSync(filePath)) {
+    throw new Error(`Results file not found: ${filePath}`);
+  }
+  return JSON.parse(readFileSync(filePath, 'utf-8'));
+}
+
+export function writeResults(filePath: string, suite: SuiteMeta, entries: Record<string, ResultEntry>) {
+  const data: ResultsFile = { suite, entries };
+  writeFileSync(filePath, JSON.stringify(data, null, 2));
+}
+
+export function buildResultEntry(timings: number[], meta: Record<string, any> = {}): ResultEntry {
+  return { ...summarize(timings), timings, ...meta };
+}
+
+export function readBaseline(filePath: string): BaselineFile {
+  if (!existsSync(filePath)) {
+    throw new Error(`Baseline file not found: ${filePath}`);
+  }
+  return JSON.parse(readFileSync(filePath, 'utf-8'));
+}
+
+export function writeBaseline(filePath: string, results: ResultsFile, thresholdPercent: number) {
+  const entries: Record<string, BaselineEntry> = {};
+  for (const [key, data] of Object.entries(results.entries)) {
+    entries[key] = { mean: data.mean, p50: data.p50 };
+  }
+  const data: BaselineFile = { thresholdPercent, entries };
+  writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n');
+}
--- a/tests/benchmarks/utils/stats.ts
+++ b/tests/benchmarks/utils/stats.ts
@@ -0,0 +1,111 @@
+/**
+ * Statistical utility functions for benchmark analysis.
+ */
+
+function assertValid(values: number[]) {
+  if (values.length === 0) {
+    throw new Error('Values array must not be empty');
+  }
+  if (!values.every(Number.isFinite)) {
+    throw new TypeError('All values must be finite numbers');
+  }
+}
+
+function sorted(values: number[]): number[] {
+  return [...values].sort((a, b) => a - b);
+}
+
+export function mean(values: number[]): number {
+  assertValid(values);
+  return values.reduce((sum, v) => sum + v, 0) / values.length;
+}
+
+export function median(values: number[]): number {
+  assertValid(values);
+  const s = sorted(values);
+  const mid = Math.floor(s.length / 2);
+
+  return s.length % 2 === 0
+    ? (s[mid - 1] + s[mid]) / 2
+    : s[mid];
+}
+
+export function percentile(values: number[], p: number): number {
+  assertValid(values);
+
+  if (p < 0 || p > 100) {
+    throw new RangeError(`Percentile must be between 0 and 100, got ${p}`);
+  }
+
+  const s = sorted(values);
+  const index = (p / 100) * (s.length - 1);
+
+  const lower = Math.floor(index);
+  const upper = Math.ceil(index);
+
+  if (lower === upper) return s[lower];
+
+  const weight = index - lower;
+  return s[lower] + weight * (s[upper] - s[lower]);
+}
+
+/**
+ * Population standard deviation (divide by N)
+ */
+export function populationStdDev(values: number[]): number {
+  assertValid(values);
+  const avg = mean(values);
+
+  const variance
+    = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) / values.length;
+
+  return Math.sqrt(variance);
+}
+
+/**
+ * Sample standard deviation (divide by N - 1)
+ */
+export function sampleStdDev(values: number[]): number {
+  assertValid(values);
+
+  if (values.length < 2) {
+    throw new Error('Sample standard deviation requires at least 2 values');
+  }
+
+  const avg = mean(values);
+
+  const variance
+    = values.reduce((sum, v) => sum + (v - avg) ** 2, 0)
+      / (values.length - 1);
+
+  return Math.sqrt(variance);
+}
+
+export function min(values: number[]): number {
+  assertValid(values);
+  return values.reduce((a, b) => (a < b ? a : b), Infinity);
+}
+
+export function max(values: number[]): number {
+  assertValid(values);
+  return values.reduce((a, b) => (a > b ? a : b), -Infinity);
+}
+
+/**
+ * Summary for benchmarking (no rounding, keep precision)
+ */
+export function summarize(values: number[]) {
+  assertValid(values);
+
+  return {
+    mean: mean(values),
+    median: median(values),
+    p50: percentile(values, 50),
+    p90: percentile(values, 90),
+    p99: percentile(values, 99),
+    min: min(values),
+    max: max(values),
+    stdDev: populationStdDev(values),
+    count: values.length
+  };
+}
--- a/tests/benchmarks/utils/timing.ts
+++ b/tests/benchmarks/utils/timing.ts
@@ -0,0 +1,25 @@
+/**
+ * Timing utilities for benchmarks.
+ *
+ * Capture:  const t = startTimer(); ...do work...; const ms = t.elapsed();
+ * Convert:  convertDuration(1500, 'ms', 's') === 1.5
+ */
+
+export type DurationUnit = 'ns' | 'us' | 'ms' | 's';
+
+const DURATION_TO_MS: Record<DurationUnit, number> = {
+  ns: 1e-6,
+  us: 1e-3,
+  ms: 1,
+  s: 1000
+};
+
+export function startTimer() {
+  const start = performance.now();
+  return { elapsed: () => performance.now() - start };
+}
+
+export function convertDuration(value: number, from: DurationUnit, to: DurationUnit): number {
+  if (from === to) return value;
+  return (value * DURATION_TO_MS[from]) / DURATION_TO_MS[to];
+}