mirror of
https://github.com/usebruno/bruno.git
synced 2026-06-11 09:51:30 +00:00
feat: add benchmark framework for collection mount performance (#7915)
This commit is contained in:
committed by
GitHub
parent
b79349b052
commit
736c050dae
38
.github/actions/tests/run-benchmark-tests/action.yml
vendored
Normal file
38
.github/actions/tests/run-benchmark-tests/action.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: 'Run Benchmark Tests'
|
||||
description: 'Run Playwright benchmark tests and compare against baseline'
|
||||
inputs:
|
||||
os:
|
||||
description: 'Operating system (ubuntu, macos, windows)'
|
||||
default: 'ubuntu'
|
||||
update-baseline:
|
||||
description: 'Update baseline instead of comparing'
|
||||
default: 'false'
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Run Benchmark Tests (Ubuntu)
|
||||
if: inputs.os == 'ubuntu'
|
||||
shell: bash
|
||||
run: xvfb-run npm run test:benchmark
|
||||
|
||||
- name: Run Benchmark Tests
|
||||
if: inputs.os != 'ubuntu'
|
||||
shell: bash
|
||||
run: npm run test:benchmark
|
||||
|
||||
- name: Update Baseline
|
||||
if: inputs.update-baseline == 'true'
|
||||
shell: bash
|
||||
run: >-
|
||||
node tests/benchmarks/utils/compare.js
|
||||
--results tests/benchmarks/results/mounting.json
|
||||
--baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json
|
||||
--update-baseline
|
||||
|
||||
- name: Compare Against Baseline
|
||||
if: inputs.update-baseline != 'true'
|
||||
shell: bash
|
||||
run: >-
|
||||
node tests/benchmarks/utils/compare.js
|
||||
--results tests/benchmarks/results/mounting.json
|
||||
--baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json
|
||||
88
.github/workflows/benchmarks.yml
vendored
Normal file
88
.github/workflows/benchmarks.yml
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
name: Benchmarks
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
update-baseline:
|
||||
description: 'Update baseline with current results instead of comparing'
|
||||
type: boolean
|
||||
default: false
|
||||
pull_request:
|
||||
branches: [main, 'release/v*']
|
||||
|
||||
jobs:
|
||||
benchmark:
|
||||
name: Performance Benchmarks (${{ matrix.os }})
|
||||
timeout-minutes: 60
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-24.04, macos-latest, windows-latest]
|
||||
include:
|
||||
- os: ubuntu-24.04
|
||||
os-name: ubuntu
|
||||
- os: macos-latest
|
||||
os-name: macos
|
||||
- os: windows-latest
|
||||
os-name: windows
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install System Dependencies (Ubuntu)
|
||||
if: matrix.os-name == 'ubuntu'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get --no-install-recommends install -y \
|
||||
libglib2.0-0 libnss3 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgtk-3-0 libasound2t64 \
|
||||
xvfb
|
||||
|
||||
- name: Setup Node Dependencies
|
||||
uses: ./.github/actions/common/setup-node-deps
|
||||
|
||||
- name: Configure Chrome Sandbox
|
||||
if: matrix.os-name == 'ubuntu'
|
||||
run: |
|
||||
sudo chown root node_modules/electron/dist/chrome-sandbox
|
||||
sudo chmod 4755 node_modules/electron/dist/chrome-sandbox
|
||||
|
||||
- name: Run Benchmark Tests
|
||||
uses: ./.github/actions/tests/run-benchmark-tests
|
||||
with:
|
||||
os: ${{ matrix.os-name }}
|
||||
update-baseline: ${{ github.event.inputs.update-baseline || 'false' }}
|
||||
|
||||
- name: Upload Benchmark Results
|
||||
uses: actions/upload-artifact@v6
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
name: benchmark-results-${{ matrix.os-name }}
|
||||
path: |
|
||||
tests/benchmarks/results/
|
||||
benchmark-report/
|
||||
retention-days: 30
|
||||
|
||||
- name: Commit Updated Baseline
|
||||
if: github.event.inputs.update-baseline == 'true'
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git add tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json
|
||||
git diff --staged --quiet || git commit -m "chore: update ${{ matrix.os-name }} benchmark baseline" && git push
|
||||
|
||||
- name: Comment Benchmark Results on PR
|
||||
if: github.event_name == 'pull_request' && !cancelled()
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const run = require('./tests/benchmarks/utils/pr-comment.js');
|
||||
await run({
|
||||
github,
|
||||
context,
|
||||
resultsPath: 'tests/benchmarks/results/mounting.json',
|
||||
baselinePath: 'tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json',
|
||||
title: 'Benchmark Results — Collection Mount (${{ matrix.os-name }})'
|
||||
});
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -58,6 +58,10 @@ skills-lock.json
|
||||
# Playwright
|
||||
/blob-report/
|
||||
|
||||
# Benchmark results (generated at runtime)
|
||||
tests/benchmarks/results/
|
||||
/benchmark-report/
|
||||
|
||||
# Development plan files
|
||||
CLAUDE.md
|
||||
AGENTS.md
|
||||
|
||||
@@ -83,6 +83,7 @@
|
||||
"test:e2e": "playwright test --project=default",
|
||||
"test:e2e:ssl": "playwright test --project=ssl",
|
||||
"test:e2e:auth": "playwright test --project=auth",
|
||||
"test:benchmark": "playwright test --config=playwright.benchmark.config.ts",
|
||||
"lint": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint",
|
||||
"lint:fix": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint --fix",
|
||||
"prepare": "husky"
|
||||
|
||||
38
playwright.benchmark.config.ts
Normal file
38
playwright.benchmark.config.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { defineConfig } from '@playwright/test';
|
||||
|
||||
export default defineConfig({
|
||||
fullyParallel: false,
|
||||
forbidOnly: !!process.env.CI,
|
||||
retries: 0,
|
||||
workers: 1,
|
||||
reporter: [
|
||||
['list'],
|
||||
['json', { outputFile: 'benchmark-report/results.json' }]
|
||||
],
|
||||
|
||||
use: {
|
||||
trace: 'off'
|
||||
},
|
||||
|
||||
projects: [
|
||||
{
|
||||
name: 'benchmarks',
|
||||
testDir: './tests/benchmarks',
|
||||
testMatch: '**/*.bench.ts'
|
||||
}
|
||||
],
|
||||
|
||||
webServer: [
|
||||
{
|
||||
command: 'npm run dev:web',
|
||||
url: 'http://localhost:3000',
|
||||
reuseExistingServer: !process.env.CI,
|
||||
timeout: 10 * 60 * 1000
|
||||
}
|
||||
],
|
||||
|
||||
timeout: 10 * 60 * 1000,
|
||||
expect: {
|
||||
timeout: 120_000
|
||||
}
|
||||
});
|
||||
@@ -23,7 +23,8 @@ export default defineConfig({
|
||||
testDir: './tests',
|
||||
testIgnore: [
|
||||
'ssl/**', // custom CA certificate tests require separate server setup and certificate generation
|
||||
'auth/**' // auth tests have their own project
|
||||
'auth/**', // auth tests have their own project
|
||||
'benchmarks/**'
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
45
tests/benchmarks/mounting/baseline.macos.json
Normal file
45
tests/benchmarks/mounting/baseline.macos.json
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"thresholdPercent": 20,
|
||||
"entries": {
|
||||
"bru-50": {
|
||||
"mean": 2200,
|
||||
"p50": 1000
|
||||
},
|
||||
"bru-200": {
|
||||
"mean": 1300,
|
||||
"p50": 1100
|
||||
},
|
||||
"bru-500": {
|
||||
"mean": 3600,
|
||||
"p50": 3500
|
||||
},
|
||||
"bru-1000": {
|
||||
"mean": 9100,
|
||||
"p50": 9000
|
||||
},
|
||||
"bru-3000": {
|
||||
"mean": 185000,
|
||||
"p50": 183000
|
||||
},
|
||||
"yml-50": {
|
||||
"mean": 700,
|
||||
"p50": 650
|
||||
},
|
||||
"yml-200": {
|
||||
"mean": 1400,
|
||||
"p50": 1250
|
||||
},
|
||||
"yml-500": {
|
||||
"mean": 3900,
|
||||
"p50": 3700
|
||||
},
|
||||
"yml-1000": {
|
||||
"mean": 11700,
|
||||
"p50": 11900
|
||||
},
|
||||
"yml-3000": {
|
||||
"mean": 85000,
|
||||
"p50": 80000
|
||||
}
|
||||
}
|
||||
}
|
||||
45
tests/benchmarks/mounting/baseline.ubuntu.json
Normal file
45
tests/benchmarks/mounting/baseline.ubuntu.json
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"thresholdPercent": 20,
|
||||
"entries": {
|
||||
"bru-50": {
|
||||
"mean": 1500,
|
||||
"p50": 700
|
||||
},
|
||||
"bru-200": {
|
||||
"mean": 1200,
|
||||
"p50": 1150
|
||||
},
|
||||
"bru-500": {
|
||||
"mean": 2900,
|
||||
"p50": 2900
|
||||
},
|
||||
"bru-1000": {
|
||||
"mean": 8000,
|
||||
"p50": 8000
|
||||
},
|
||||
"bru-3000": {
|
||||
"mean": 175000,
|
||||
"p50": 170000
|
||||
},
|
||||
"yml-50": {
|
||||
"mean": 600,
|
||||
"p50": 560
|
||||
},
|
||||
"yml-200": {
|
||||
"mean": 1200,
|
||||
"p50": 1200
|
||||
},
|
||||
"yml-500": {
|
||||
"mean": 3500,
|
||||
"p50": 3400
|
||||
},
|
||||
"yml-1000": {
|
||||
"mean": 10700,
|
||||
"p50": 10650
|
||||
},
|
||||
"yml-3000": {
|
||||
"mean": 85000,
|
||||
"p50": 80000
|
||||
}
|
||||
}
|
||||
}
|
||||
45
tests/benchmarks/mounting/baseline.windows.json
Normal file
45
tests/benchmarks/mounting/baseline.windows.json
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"thresholdPercent": 20,
|
||||
"entries": {
|
||||
"bru-50": {
|
||||
"mean": 2700,
|
||||
"p50": 800
|
||||
},
|
||||
"bru-200": {
|
||||
"mean": 1500,
|
||||
"p50": 1400
|
||||
},
|
||||
"bru-500": {
|
||||
"mean": 3500,
|
||||
"p50": 3500
|
||||
},
|
||||
"bru-1000": {
|
||||
"mean": 9500,
|
||||
"p50": 9400
|
||||
},
|
||||
"bru-3000": {
|
||||
"mean": 195000,
|
||||
"p50": 190000
|
||||
},
|
||||
"yml-50": {
|
||||
"mean": 600,
|
||||
"p50": 570
|
||||
},
|
||||
"yml-200": {
|
||||
"mean": 1350,
|
||||
"p50": 1300
|
||||
},
|
||||
"yml-500": {
|
||||
"mean": 3800,
|
||||
"p50": 3700
|
||||
},
|
||||
"yml-1000": {
|
||||
"mean": 11000,
|
||||
"p50": 11000
|
||||
},
|
||||
"yml-3000": {
|
||||
"mean": 90000,
|
||||
"p50": 88000
|
||||
}
|
||||
}
|
||||
}
|
||||
115
tests/benchmarks/mounting/collection-mount.bench.ts
Normal file
115
tests/benchmarks/mounting/collection-mount.bench.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { test } from '../../../playwright';
|
||||
import { type ElectronApplication, type Page } from '@playwright/test';
|
||||
import { openCollection, closeAllCollections } from '../../utils/page';
|
||||
import { summarize } from '../utils/stats';
|
||||
import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results';
|
||||
import { startTimer } from '../utils/timing';
|
||||
import { generateCollection, type CollectionFormat } from '../utils/collection-generator';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
|
||||
const COLLECTION_SIZES = [50, 200, 500, 1000, 3000];
|
||||
const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml'];
|
||||
const ITERATIONS_PER_SIZE = 3;
|
||||
|
||||
async function measureCollectionMount(
|
||||
page: Page,
|
||||
electronApp: ElectronApplication,
|
||||
collectionDir: string,
|
||||
collectionName: string
|
||||
): Promise<number> {
|
||||
await electronApp.evaluate(
|
||||
({ dialog }, { dir }) => {
|
||||
(dialog as any).__originalShowOpenDialog ??= dialog.showOpenDialog;
|
||||
dialog.showOpenDialog = async () => ({ canceled: false, filePaths: [dir] });
|
||||
},
|
||||
{ dir: collectionDir }
|
||||
);
|
||||
|
||||
await page.evaluate(() => {
|
||||
(window as any).__benchMountDone = new Promise<void>((resolve) => {
|
||||
const off = (window as any).ipcRenderer.on('main:collection-loading-state-updated', (val: any) => {
|
||||
if (!val.isLoading) {
|
||||
off(); resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
const timer = startTimer();
|
||||
|
||||
await page.getByTestId('collections-header-add-menu').click();
|
||||
await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click();
|
||||
await page.locator('#sidebar-collection-name').filter({ hasText: collectionName }).waitFor({ state: 'visible' });
|
||||
|
||||
await openCollection(page, collectionName);
|
||||
await page.evaluate(() => (window as any).__benchMountDone);
|
||||
|
||||
const elapsed = timer.elapsed();
|
||||
|
||||
await electronApp.evaluate(({ dialog }) => {
|
||||
if ((dialog as any).__originalShowOpenDialog) {
|
||||
dialog.showOpenDialog = (dialog as any).__originalShowOpenDialog;
|
||||
}
|
||||
});
|
||||
|
||||
await closeAllCollections(page);
|
||||
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
function resultKey(format: CollectionFormat, size: number): string {
|
||||
return `${format}-${size}`;
|
||||
}
|
||||
|
||||
test.describe('Benchmark: Collection Mount', () => {
|
||||
const results: Record<string, number[]> = {};
|
||||
|
||||
for (const format of COLLECTION_FORMATS) {
|
||||
test.describe(`format: ${format}`, () => {
|
||||
for (const size of COLLECTION_SIZES) {
|
||||
test(`mount ${format} collection with ${size} requests`, async ({ page, electronApp, createTmpDir }) => {
|
||||
test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000);
|
||||
const timings: number[] = [];
|
||||
|
||||
const collectionName = `bench-${format}-${size}`;
|
||||
const collectionDir = await createTmpDir(`bench-${format}-${size}`);
|
||||
generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format });
|
||||
|
||||
for (let i = 0; i < ITERATIONS_PER_SIZE; i++) {
|
||||
const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName);
|
||||
timings.push(elapsed);
|
||||
}
|
||||
|
||||
const key = resultKey(format, size);
|
||||
results[key] = timings;
|
||||
|
||||
const stats = summarize(timings);
|
||||
const r = (v: number) => Math.round(v);
|
||||
console.log(`[BENCHMARK] ${format} ${size} requests — mean: ${r(stats.mean)}ms, median: ${r(stats.median)}ms, p90: ${r(stats.p90)}ms, stdDev: ${r(stats.stdDev)}ms, raw: [${timings.join(', ')}]`);
|
||||
|
||||
test.info().annotations.push({
|
||||
type: 'benchmark',
|
||||
description: JSON.stringify({ format, size, ...stats, timings })
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
test.afterAll(async () => {
|
||||
const resultsDir = path.join(process.cwd(), 'tests', 'benchmarks', 'results');
|
||||
fs.mkdirSync(resultsDir, { recursive: true });
|
||||
const outputPath = path.join(resultsDir, 'mounting.json');
|
||||
const entries: Record<string, ResultEntry> = {};
|
||||
|
||||
for (const [key, timings] of Object.entries(results)) {
|
||||
if (timings.length === 0) continue;
|
||||
const [format, sizeStr] = key.split('-');
|
||||
entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) });
|
||||
}
|
||||
|
||||
writeResults(outputPath, { name: 'Collection Mount', unit: 'ms', direction: 'smaller' }, entries);
|
||||
console.log(`[BENCHMARK] Results written to ${outputPath}`);
|
||||
});
|
||||
});
|
||||
67
tests/benchmarks/utils/collection-generator.ts
Normal file
67
tests/benchmarks/utils/collection-generator.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
import { stringifyRequest, stringifyCollection, stringifyFolder } from '@usebruno/filestore';
|
||||
import type { BrunoItem } from '@usebruno/schema-types';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
|
||||
export type CollectionFormat = 'bru' | 'yml';
|
||||
|
||||
export function buildRequestItem(seq: number): BrunoItem {
|
||||
return {
|
||||
uid: `req-${seq}`,
|
||||
type: 'http-request',
|
||||
name: `request-${seq}`,
|
||||
seq,
|
||||
request: {
|
||||
method: 'GET',
|
||||
url: `https://example.com/api/v1/resource/${seq}`,
|
||||
headers: [
|
||||
{ uid: `h1-${seq}`, name: 'Content-Type', value: 'application/json', enabled: true },
|
||||
{ uid: `h2-${seq}`, name: 'Accept', value: 'application/json', enabled: true }
|
||||
],
|
||||
body: { mode: 'none' },
|
||||
auth: { mode: 'none' }
|
||||
}
|
||||
} as BrunoItem;
|
||||
}
|
||||
|
||||
export interface GenerateCollectionOptions {
|
||||
dir: string;
|
||||
name: string;
|
||||
requestCount: number;
|
||||
format: CollectionFormat;
|
||||
requestsPerFolder?: number;
|
||||
}
|
||||
|
||||
export function generateCollection({
|
||||
dir,
|
||||
name,
|
||||
requestCount,
|
||||
format,
|
||||
requestsPerFolder = 10
|
||||
}: GenerateCollectionOptions) {
|
||||
if (format === 'bru') {
|
||||
fs.writeFileSync(path.join(dir, 'bruno.json'), JSON.stringify({ version: '1', name, type: 'collection' }, null, 2));
|
||||
fs.writeFileSync(path.join(dir, 'collection.bru'), stringifyCollection({ name } as any, {}, { format: 'bru' }) || `meta {\n name: ${name}\n}\n`);
|
||||
} else {
|
||||
const ymlContent = stringifyCollection({ name } as any, { name, type: 'collection', opencollection: '1.0.0' }, { format: 'yml' });
|
||||
fs.writeFileSync(path.join(dir, 'opencollection.yml'), ymlContent);
|
||||
}
|
||||
|
||||
const ext = format === 'bru' ? 'bru' : 'yml';
|
||||
const folderFile = format === 'bru' ? 'folder.bru' : 'folder.yml';
|
||||
const folderCount = Math.ceil(requestCount / requestsPerFolder);
|
||||
|
||||
Array.from({ length: folderCount }).forEach((_, f) => {
|
||||
const folderPath = path.join(dir, `folder-${f}`);
|
||||
fs.mkdirSync(folderPath, { recursive: true });
|
||||
|
||||
const folderContent = stringifyFolder({ name: `folder-${f}` }, { format });
|
||||
fs.writeFileSync(path.join(folderPath, folderFile), folderContent || `meta {\n name: folder-${f}\n}\n`);
|
||||
|
||||
const count = Math.min(requestsPerFolder, requestCount - f * requestsPerFolder);
|
||||
Array.from({ length: count }).forEach((_, r) => {
|
||||
const seq = f * requestsPerFolder + r + 1;
|
||||
fs.writeFileSync(path.join(folderPath, `request-${seq}.${ext}`), stringifyRequest(buildRequestItem(seq), { format }));
|
||||
});
|
||||
});
|
||||
}
|
||||
129
tests/benchmarks/utils/compare.js
Normal file
129
tests/benchmarks/utils/compare.js
Normal file
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Generic benchmark comparison: compares results against a baseline and exits
|
||||
* with code 1 if any metric exceeds the allowed regression threshold.
|
||||
*
|
||||
* Usage:
|
||||
* node tests/benchmarks/utils/compare.js --results <path> --baseline <path> [--update-baseline]
|
||||
*
|
||||
* Examples:
|
||||
* node tests/benchmarks/utils/compare.js \
|
||||
* --results benchmark-results.json \
|
||||
* --baseline tests/benchmarks/mounting/baseline.json
|
||||
*
|
||||
* node tests/benchmarks/utils/compare.js \
|
||||
* --results benchmark-results.json \
|
||||
* --baseline tests/benchmarks/mounting/baseline.json \
|
||||
* --update-baseline
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync } from 'fs';
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {};
|
||||
for (let i = 2; i < argv.length; i++) {
|
||||
if (argv[i] === '--results') args.results = argv[++i];
|
||||
else if (argv[i] === '--baseline') args.baseline = argv[++i];
|
||||
else if (argv[i] === '--update-baseline') args.updateBaseline = true;
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
function loadJSON(filepath) {
|
||||
if (!existsSync(filepath)) {
|
||||
console.error(`File not found: ${filepath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
return JSON.parse(readFileSync(filepath, 'utf-8'));
|
||||
}
|
||||
|
||||
function percentChange(baseline, current) {
|
||||
if (baseline === 0) return current === 0 ? 0 : Infinity;
|
||||
return ((current - baseline) / baseline) * 100;
|
||||
}
|
||||
|
||||
function formatChange(change) {
|
||||
const sign = change > 0 ? '+' : '';
|
||||
return `${sign}${change.toFixed(1)}%`;
|
||||
}
|
||||
|
||||
const args = parseArgs(process.argv);
|
||||
|
||||
if (!args.results || !args.baseline) {
|
||||
console.error('Usage: compare.js --results <path> --baseline <path> [--update-baseline]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const results = loadJSON(args.results);
|
||||
const baseline = loadJSON(args.baseline);
|
||||
const threshold = baseline.thresholdPercent || 20;
|
||||
const resultEntries = results.entries || results;
|
||||
const baselineEntries = baseline.entries || {};
|
||||
|
||||
if (args.updateBaseline) {
|
||||
const newBaseline = {
|
||||
thresholdPercent: threshold,
|
||||
entries: {}
|
||||
};
|
||||
for (const [key, data] of Object.entries(resultEntries)) {
|
||||
newBaseline.entries[key] = {
|
||||
mean: data.mean,
|
||||
p50: data.p50
|
||||
};
|
||||
}
|
||||
writeFileSync(args.baseline, JSON.stringify(newBaseline, null, 2) + '\n');
|
||||
console.log(`Baseline updated at ${args.baseline}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
let hasRegression = false;
|
||||
const rows = [];
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(72));
|
||||
console.log(' BENCHMARK COMPARISON');
|
||||
console.log('='.repeat(72));
|
||||
console.log(` Regression threshold: ${threshold}%`);
|
||||
console.log('');
|
||||
|
||||
for (const [key, data] of Object.entries(resultEntries)) {
|
||||
const base = baselineEntries[key];
|
||||
if (!base) {
|
||||
console.log(` [SKIP] No baseline for ${key}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const meanChange = percentChange(base.mean, data.mean);
|
||||
const p50Change = percentChange(base.p50, data.p50);
|
||||
|
||||
const meanStatus = meanChange > threshold ? 'FAIL' : meanChange < -threshold ? 'IMPROVED' : 'OK';
|
||||
const p50Status = p50Change > threshold ? 'FAIL' : p50Change < -threshold ? 'IMPROVED' : 'OK';
|
||||
|
||||
if (meanStatus === 'FAIL' || p50Status === 'FAIL') {
|
||||
hasRegression = true;
|
||||
}
|
||||
|
||||
rows.push({
|
||||
key,
|
||||
'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`,
|
||||
'mean change': formatChange(meanChange),
|
||||
'mean status': meanStatus,
|
||||
'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`,
|
||||
'p50 change': formatChange(p50Change),
|
||||
'p50 status': p50Status
|
||||
});
|
||||
}
|
||||
|
||||
console.table(rows);
|
||||
console.log('');
|
||||
|
||||
if (hasRegression) {
|
||||
console.error(`FAILED: One or more benchmarks regressed beyond the ${threshold}% threshold.`);
|
||||
console.error('If this regression is expected, update the baseline:');
|
||||
console.error(` node tests/benchmarks/utils/compare.js --results ${args.results} --baseline ${args.baseline} --update-baseline`);
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('PASSED: All benchmarks are within the acceptable threshold.');
|
||||
process.exit(0);
|
||||
}
|
||||
83
tests/benchmarks/utils/pr-comment.js
Normal file
83
tests/benchmarks/utils/pr-comment.js
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Generic benchmark PR comment: posts/updates a comparison table on a PR.
|
||||
*
|
||||
* Called by CI via actions/github-script:
|
||||
* const run = require('./tests/benchmarks/utils/pr-comment.js');
|
||||
* await run({ github, context, resultsPath, baselinePath, title });
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
function buildCommentBody(results, baseline, title) {
|
||||
const threshold = baseline.thresholdPercent || 20;
|
||||
const resultEntries = results.entries || results;
|
||||
const baselineEntries = baseline.entries || {};
|
||||
const marker = `## ${title}`;
|
||||
|
||||
let body = `${marker}\n\n`;
|
||||
body += `| Key | Mean (ms) | Baseline Mean | Change | Status |\n`;
|
||||
body += `|---|---|---|---|---|\n`;
|
||||
|
||||
let hasRegression = false;
|
||||
|
||||
for (const [key, data] of Object.entries(resultEntries)) {
|
||||
const base = baselineEntries[key];
|
||||
if (!base) continue;
|
||||
|
||||
const changePercent = (data.mean - base.mean) / base.mean * 100;
|
||||
const changeStr = changePercent.toFixed(1);
|
||||
const status = changePercent > threshold ? '🔴 REGRESSION' : changePercent < -threshold ? '🟢 IMPROVED' : '✅ OK';
|
||||
if (changePercent > threshold) hasRegression = true;
|
||||
|
||||
body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${changePercent > 0 ? '+' : ''}${changeStr}% | ${status} |\n`;
|
||||
}
|
||||
|
||||
body += `\n> Threshold: ${threshold}% regression allowed\n`;
|
||||
|
||||
if (hasRegression) {
|
||||
body += '\n⚠️ **Performance regression detected.** If expected, update the baseline.\n';
|
||||
}
|
||||
|
||||
return { body, marker };
|
||||
}
|
||||
|
||||
async function postOrUpdateComment(github, context, body, marker) {
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number
|
||||
});
|
||||
|
||||
const existing = comments.find((c) => c.body.startsWith(marker));
|
||||
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = async function run({ github, context, resultsPath, baselinePath, title }) {
|
||||
if (!fs.existsSync(resultsPath)) {
|
||||
console.log(`No benchmark results found at ${resultsPath}, skipping comment.`);
|
||||
return;
|
||||
}
|
||||
|
||||
const results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
|
||||
const baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf-8'));
|
||||
const { body, marker } = buildCommentBody(results, baseline, title);
|
||||
|
||||
await postOrUpdateComment(github, context, body, marker);
|
||||
};
|
||||
92
tests/benchmarks/utils/results.ts
Normal file
92
tests/benchmarks/utils/results.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
/**
|
||||
* Standard read/write helpers for benchmark results and baselines.
|
||||
*
|
||||
* Results shape (written by benchmark tests):
|
||||
* {
|
||||
* "suite": { "name": "...", "unit": "ms", "direction": "smaller" },
|
||||
* "entries": {
|
||||
* "<key>": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* Baseline shape (committed per suite):
|
||||
* {
|
||||
* "thresholdPercent": 20,
|
||||
* "entries": {
|
||||
* "<key>": { mean, p50 }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync } from 'fs';
|
||||
import { summarize } from './stats';
|
||||
|
||||
export type Direction = 'smaller' | 'bigger';
|
||||
export type Unit = 'ms' | 's' | 'ops/s' | 'bytes' | '%' | 'count';
|
||||
|
||||
export interface SuiteMeta {
|
||||
name: string;
|
||||
unit: Unit;
|
||||
direction: Direction;
|
||||
}
|
||||
|
||||
export interface ResultEntry {
|
||||
mean: number;
|
||||
median: number;
|
||||
p50: number;
|
||||
p90: number;
|
||||
p99: number;
|
||||
stdDev: number;
|
||||
min: number;
|
||||
max: number;
|
||||
count: number;
|
||||
timings: number[];
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
export interface ResultsFile {
|
||||
suite: SuiteMeta;
|
||||
entries: Record<string, ResultEntry>;
|
||||
}
|
||||
|
||||
export interface BaselineEntry {
|
||||
mean: number;
|
||||
p50: number;
|
||||
}
|
||||
|
||||
export interface BaselineFile {
|
||||
thresholdPercent: number;
|
||||
entries: Record<string, BaselineEntry>;
|
||||
}
|
||||
|
||||
export function readResults(filePath: string): ResultsFile {
|
||||
if (!existsSync(filePath)) {
|
||||
throw new Error(`Results file not found: ${filePath}`);
|
||||
}
|
||||
return JSON.parse(readFileSync(filePath, 'utf-8'));
|
||||
}
|
||||
|
||||
export function writeResults(filePath: string, suite: SuiteMeta, entries: Record<string, ResultEntry>) {
|
||||
const data: ResultsFile = { suite, entries };
|
||||
writeFileSync(filePath, JSON.stringify(data, null, 2));
|
||||
}
|
||||
|
||||
export function buildResultEntry(timings: number[], meta: Record<string, any> = {}): ResultEntry {
|
||||
return { ...summarize(timings), timings, ...meta };
|
||||
}
|
||||
|
||||
export function readBaseline(filePath: string): BaselineFile {
|
||||
if (!existsSync(filePath)) {
|
||||
throw new Error(`Baseline file not found: ${filePath}`);
|
||||
}
|
||||
return JSON.parse(readFileSync(filePath, 'utf-8'));
|
||||
}
|
||||
|
||||
export function writeBaseline(filePath: string, results: ResultsFile, thresholdPercent: number) {
|
||||
const entries: Record<string, BaselineEntry> = {};
|
||||
for (const [key, data] of Object.entries(results.entries)) {
|
||||
entries[key] = { mean: data.mean, p50: data.p50 };
|
||||
}
|
||||
const data: BaselineFile = { thresholdPercent, entries };
|
||||
writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n');
|
||||
}
|
||||
111
tests/benchmarks/utils/stats.ts
Normal file
111
tests/benchmarks/utils/stats.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
/**
|
||||
* Statistical utility functions for benchmark analysis.
|
||||
*/
|
||||
|
||||
function assertValid(values: number[]) {
|
||||
if (values.length === 0) {
|
||||
throw new Error('Values array must not be empty');
|
||||
}
|
||||
if (!values.every(Number.isFinite)) {
|
||||
throw new TypeError('All values must be finite numbers');
|
||||
}
|
||||
}
|
||||
|
||||
function sorted(values: number[]): number[] {
|
||||
return [...values].sort((a, b) => a - b);
|
||||
}
|
||||
|
||||
export function mean(values: number[]): number {
|
||||
assertValid(values);
|
||||
return values.reduce((sum, v) => sum + v, 0) / values.length;
|
||||
}
|
||||
|
||||
export function median(values: number[]): number {
|
||||
assertValid(values);
|
||||
const s = sorted(values);
|
||||
const mid = Math.floor(s.length / 2);
|
||||
|
||||
return s.length % 2 === 0
|
||||
? (s[mid - 1] + s[mid]) / 2
|
||||
: s[mid];
|
||||
}
|
||||
|
||||
export function percentile(values: number[], p: number): number {
|
||||
assertValid(values);
|
||||
|
||||
if (p < 0 || p > 100) {
|
||||
throw new RangeError(`Percentile must be between 0 and 100, got ${p}`);
|
||||
}
|
||||
|
||||
const s = sorted(values);
|
||||
const index = (p / 100) * (s.length - 1);
|
||||
|
||||
const lower = Math.floor(index);
|
||||
const upper = Math.ceil(index);
|
||||
|
||||
if (lower === upper) return s[lower];
|
||||
|
||||
const weight = index - lower;
|
||||
return s[lower] + weight * (s[upper] - s[lower]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Population standard deviation (divide by N)
|
||||
*/
|
||||
export function populationStdDev(values: number[]): number {
|
||||
assertValid(values);
|
||||
const avg = mean(values);
|
||||
|
||||
const variance
|
||||
= values.reduce((sum, v) => sum + (v - avg) ** 2, 0) / values.length;
|
||||
|
||||
return Math.sqrt(variance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample standard deviation (divide by N - 1)
|
||||
*/
|
||||
export function sampleStdDev(values: number[]): number {
|
||||
assertValid(values);
|
||||
|
||||
if (values.length < 2) {
|
||||
throw new Error('Sample standard deviation requires at least 2 values');
|
||||
}
|
||||
|
||||
const avg = mean(values);
|
||||
|
||||
const variance
|
||||
= values.reduce((sum, v) => sum + (v - avg) ** 2, 0)
|
||||
/ (values.length - 1);
|
||||
|
||||
return Math.sqrt(variance);
|
||||
}
|
||||
|
||||
export function min(values: number[]): number {
|
||||
assertValid(values);
|
||||
return values.reduce((a, b) => (a < b ? a : b), Infinity);
|
||||
}
|
||||
|
||||
export function max(values: number[]): number {
|
||||
assertValid(values);
|
||||
return values.reduce((a, b) => (a > b ? a : b), -Infinity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Summary for benchmarking (no rounding, keep precision)
|
||||
*/
|
||||
export function summarize(values: number[]) {
|
||||
assertValid(values);
|
||||
|
||||
return {
|
||||
mean: mean(values),
|
||||
median: median(values),
|
||||
p50: percentile(values, 50),
|
||||
p90: percentile(values, 90),
|
||||
p99: percentile(values, 99),
|
||||
min: min(values),
|
||||
max: max(values),
|
||||
stdDev: populationStdDev(values),
|
||||
count: values.length
|
||||
};
|
||||
}
|
||||
25
tests/benchmarks/utils/timing.ts
Normal file
25
tests/benchmarks/utils/timing.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Timing utilities for benchmarks.
|
||||
*
|
||||
* Capture: const t = startTimer(); ...do work...; const ms = t.elapsed();
|
||||
* Convert: convertDuration(1500, 'ms', 's') === 1.5
|
||||
*/
|
||||
|
||||
export type DurationUnit = 'ns' | 'us' | 'ms' | 's';
|
||||
|
||||
const DURATION_TO_MS: Record<DurationUnit, number> = {
|
||||
ns: 1e-6,
|
||||
us: 1e-3,
|
||||
ms: 1,
|
||||
s: 1000
|
||||
};
|
||||
|
||||
export function startTimer() {
|
||||
const start = performance.now();
|
||||
return { elapsed: () => performance.now() - start };
|
||||
}
|
||||
|
||||
export function convertDuration(value: number, from: DurationUnit, to: DurationUnit): number {
|
||||
if (from === to) return value;
|
||||
return (value * DURATION_TO_MS[from]) / DURATION_TO_MS[to];
|
||||
}
|
||||
Reference in New Issue
Block a user