Skip to content

Commit a01a8c1

Browse files
committed
fix(benchmarks): Run all Claude UI directory suites
Continue running Claude UI benchmark suites after an earlier suite fails, while preserving a non-zero final exit code. Extract the directory runner into a testable module and cover the failure behavior.
1 parent 2266589 commit a01a8c1

3 files changed

Lines changed: 87 additions & 48 deletions

File tree

benchmarks/claude-ui/run-directory.ts

Lines changed: 2 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,7 @@
11
#!/usr/bin/env tsx
2-
import { access, readdir } from 'node:fs/promises';
3-
import path from 'node:path';
4-
import { main } from '../../src/benchmarks/claude-ui/harness.ts';
2+
import { runDirectory } from '../../src/benchmarks/claude-ui/run-directory.ts';
53

6-
async function directoryExists(directory: string): Promise<boolean> {
7-
try {
8-
await access(directory);
9-
return true;
10-
} catch {
11-
return false;
12-
}
13-
}
14-
15-
async function suitePaths(directory: string): Promise<string[]> {
16-
if (!(await directoryExists(directory))) return [];
17-
const entries = await readdir(directory, { withFileTypes: true });
18-
return entries
19-
.filter(
20-
(entry) => entry.isFile() && (entry.name.endsWith('.yml') || entry.name.endsWith('.yaml')),
21-
)
22-
.map((entry) => path.join(directory, entry.name))
23-
.sort();
24-
}
25-
26-
async function run(): Promise<number> {
27-
const directory = process.argv[2];
28-
const maybeLabel = process.argv[3];
29-
const label = maybeLabel && !maybeLabel.startsWith('-') ? maybeLabel : directory;
30-
const forwardedArgs =
31-
maybeLabel && !maybeLabel.startsWith('-') ? process.argv.slice(4) : process.argv.slice(3);
32-
if (!directory) {
33-
console.error('Usage: run-directory.ts <suite-directory> [label] [benchmark args...]');
34-
return 1;
35-
}
36-
37-
const suites = await suitePaths(directory);
38-
if (suites.length === 0) {
39-
console.error(`No ${label} Claude UI benchmark suites found in ${directory}`);
40-
return 1;
41-
}
42-
43-
for (const suite of suites) {
44-
const exitCode = await main(['--suite', suite, ...forwardedArgs]);
45-
if (exitCode !== 0) return exitCode;
46-
}
47-
return 0;
48-
}
49-
50-
run()
4+
runDirectory(process.argv.slice(2))
515
.then((exitCode) => {
526
process.exitCode = exitCode;
537
})
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
2+
import os from 'node:os';
3+
import path from 'node:path';
4+
import { describe, expect, it } from 'vitest';
5+
import { runDirectory } from '../run-directory.ts';
6+
7+
describe('runDirectory', () => {
8+
it('continues running later suites after an earlier suite returns non-zero', async () => {
9+
const directory = await mkdtemp(path.join(os.tmpdir(), 'xcodebuildmcp-bench-suites-'));
10+
try {
11+
await writeFile(path.join(directory, 'a.yml'), 'name: a\n', 'utf8');
12+
await writeFile(path.join(directory, 'b.yml'), 'name: b\n', 'utf8');
13+
await writeFile(path.join(directory, 'c.yml'), 'name: c\n', 'utf8');
14+
15+
const calls: string[][] = [];
16+
const exitCode = await runDirectory(
17+
[directory, 'private', '--model', 'opus'],
18+
async (args) => {
19+
calls.push(args);
20+
return args[1]?.endsWith('b.yml') ? 1 : 0;
21+
},
22+
);
23+
24+
expect(exitCode).toBe(1);
25+
expect(calls).toEqual([
26+
['--suite', path.join(directory, 'a.yml'), '--model', 'opus'],
27+
['--suite', path.join(directory, 'b.yml'), '--model', 'opus'],
28+
['--suite', path.join(directory, 'c.yml'), '--model', 'opus'],
29+
]);
30+
} finally {
31+
await rm(directory, { recursive: true, force: true });
32+
}
33+
});
34+
});
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import { access, readdir } from 'node:fs/promises';
2+
import path from 'node:path';
3+
import { main } from './harness.ts';
4+
5+
async function directoryExists(directory: string): Promise<boolean> {
6+
try {
7+
await access(directory);
8+
return true;
9+
} catch {
10+
return false;
11+
}
12+
}
13+
14+
export async function suitePaths(directory: string): Promise<string[]> {
15+
if (!(await directoryExists(directory))) return [];
16+
const entries = await readdir(directory, { withFileTypes: true });
17+
return entries
18+
.filter(
19+
(entry) => entry.isFile() && (entry.name.endsWith('.yml') || entry.name.endsWith('.yaml')),
20+
)
21+
.map((entry) => path.join(directory, entry.name))
22+
.sort();
23+
}
24+
25+
type RunSuite = (args: string[]) => Promise<number>;
26+
27+
export async function runDirectory(args: string[], runSuite: RunSuite = main): Promise<number> {
28+
const directory = args[0];
29+
const maybeLabel = args[1];
30+
const label = maybeLabel && !maybeLabel.startsWith('-') ? maybeLabel : directory;
31+
const forwardedArgs = maybeLabel && !maybeLabel.startsWith('-') ? args.slice(2) : args.slice(1);
32+
if (!directory) {
33+
console.error('Usage: run-directory.ts <suite-directory> [label] [benchmark args...]');
34+
return 1;
35+
}
36+
37+
const suites = await suitePaths(directory);
38+
if (suites.length === 0) {
39+
console.error(`No ${label} Claude UI benchmark suites found in ${directory}`);
40+
return 1;
41+
}
42+
43+
let finalExitCode = 0;
44+
for (const suite of suites) {
45+
const exitCode = await runSuite(['--suite', suite, ...forwardedArgs]);
46+
if (exitCode !== 0 && finalExitCode === 0) {
47+
finalExitCode = exitCode;
48+
}
49+
}
50+
return finalExitCode;
51+
}

0 commit comments

Comments
 (0)