Skip to content

Commit 390362a

Browse files
committed
feat(cli): rename studio to dashboard (ag-uh9)
1 parent 4777748 commit 390362a

40 files changed

Lines changed: 339 additions & 205 deletions

apps/cli/src/cli.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { killAllTrackedChildren } from '@agentv/core';
44
import { runCli } from './index.js';
55

66
// Forward SIGINT/SIGTERM to spawned provider subprocesses before exiting.
7-
// Without this, Studio's `child.kill('SIGTERM')` against the CLI orphans
7+
// Without this, Dashboard's `child.kill('SIGTERM')` against the CLI orphans
88
// any in-flight `claude`/`codex`/`pi`/`copilot` subprocess. The partial
99
// `index.jsonl` is already row-by-row durable, so finished tests survive.
1010
//

apps/cli/src/commands/eval/run-eval.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1460,7 +1460,7 @@ export async function runEvalCommand(
14601460

14611461
// Write a stub benchmark.json before dispatching tests, carrying the planned
14621462
// execution count so an interrupted run can still surface as resumable in
1463-
// Studio (results.length < planned_test_count) even when every recorded row
1463+
// Dashboard (results.length < planned_test_count) even when every recorded row
14641464
// has execution_status: ok. The end-of-run write preserves this value via
14651465
// readPlannedTestCount inside aggregateRunDir / writeArtifactsFromResults.
14661466
// Skip on resume — we want to preserve the *original* planned count.

apps/cli/src/commands/results/eval-runner.ts

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
/**
2-
* Studio eval runner — discovery, launch, and status tracking for eval runs
3-
* initiated from the Studio UI.
2+
* Dashboard eval runner — discovery, launch, and status tracking for eval runs
3+
* initiated from the Dashboard UI.
44
*
55
* Provides Hono route handlers for:
66
* - GET /api/eval/discover — discover eval files in the project
77
* - GET /api/eval/targets — list available target names
88
* - POST /api/eval/run — launch an eval run as a child process
99
* - GET /api/eval/status/:id — poll running eval status
10-
* - GET /api/eval/runs — list active and recent Studio-launched runs
10+
* - GET /api/eval/runs — list active and recent Dashboard-launched runs
1111
*
1212
* All handlers accept a `cwd` (project root) to resolve paths against.
1313
* The module spawns `bun apps/cli/src/cli.ts eval run ...` and tracks
@@ -34,7 +34,7 @@ import { findRepoRoot } from '../eval/shared.js';
3434

3535
// ── In-memory run tracker ────────────────────────────────────────────────
3636

37-
interface StudioRun {
37+
interface DashboardRun {
3838
id: string;
3939
status: 'starting' | 'running' | 'finished' | 'failed';
4040
command: string;
@@ -50,14 +50,14 @@ interface StudioRun {
5050
process?: ChildProcess;
5151
}
5252

53-
const activeRuns = new Map<string, StudioRun>();
53+
const activeRuns = new Map<string, DashboardRun>();
5454

5555
function generateRunId(): string {
5656
const now = new Date();
5757
const pad = (n: number, w = 2) => String(n).padStart(w, '0');
5858
const ts = `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}-${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
5959
const rand = Math.random().toString(36).slice(2, 6);
60-
return `studio-${ts}-${rand}`;
60+
return `dashboard-${ts}-${rand}`;
6161
}
6262

6363
// Keep only last 20 finished runs to prevent unbounded memory growth
@@ -73,7 +73,7 @@ function pruneFinishedRuns() {
7373
}
7474

7575
/**
76-
* Look up the target for a Studio-launched run by its index.jsonl path.
76+
* Look up the target for a Dashboard-launched run by its index.jsonl path.
7777
* Called by handleRuns in serve.ts when the JSONL has 0 records (run just started).
7878
*/
7979
export function getActiveRunTarget(indexJsonlPath: string): string | undefined {
@@ -86,12 +86,12 @@ export function getActiveRunTarget(indexJsonlPath: string): string | undefined {
8686
}
8787

8888
/**
89-
* Look up the in-memory status for a Studio-launched run by its index.jsonl path.
89+
* Look up the in-memory status for a Dashboard-launched run by its index.jsonl path.
9090
* Returns 'starting' | 'running' | 'finished' | 'failed' if the run is tracked,
9191
* else undefined. Used by handleRuns to render a spinner for active runs in the
9292
* RunList instead of a misleading red ✗ derived from a 0 pass-rate.
9393
*/
94-
export function getActiveRunStatus(indexJsonlPath: string): StudioRun['status'] | undefined {
94+
export function getActiveRunStatus(indexJsonlPath: string): DashboardRun['status'] | undefined {
9595
for (const run of activeRuns.values()) {
9696
if (run.outputDir && path.join(run.outputDir, 'index.jsonl') === indexJsonlPath) {
9797
return run.status;
@@ -287,7 +287,7 @@ function isCommandAvailable(cmd: string): boolean {
287287
*
288288
* The log file is the source of truth shown by the RunDetail "Run Log"
289289
* section after the run completes. The in-memory `stdout`/`stderr` buffers on
290-
* `StudioRun` remain capped for live status polling.
290+
* `DashboardRun` remain capped for live status polling.
291291
*
292292
* Stream `error` events (e.g. the output dir was removed underneath us by a
293293
* test teardown) are swallowed so they don't surface as unhandled errors and
@@ -348,7 +348,7 @@ export function registerEvalRoutes(
348348
// ── Launch eval run ────────────────────────────────────────────────────
349349
app.post('/api/eval/run', async (c) => {
350350
if (readOnly) {
351-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
351+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
352352
}
353353
const cwd = getCwd(c);
354354

@@ -378,7 +378,7 @@ export function registerEvalRoutes(
378378
// Determine the output directory for this run. When the caller provides
379379
// an explicit --output (resume/rerun), use that path. Otherwise generate
380380
// the default path now so we can pass it via --output and later correlate
381-
// the filesystem run with this in-memory StudioRun (needed to show the
381+
// the filesystem run with this in-memory DashboardRun (needed to show the
382382
// target in the sidebar before any results have been written).
383383
const outputDir = body.output?.trim()
384384
? path.resolve(cwd, body.output.trim())
@@ -389,7 +389,7 @@ export function registerEvalRoutes(
389389
const command = buildCliPreview(args);
390390
const runId = generateRunId();
391391

392-
const run: StudioRun = {
392+
const run: DashboardRun = {
393393
id: runId,
394394
status: 'starting',
395395
command,
@@ -479,7 +479,7 @@ export function registerEvalRoutes(
479479
// before exiting.
480480
app.post('/api/eval/run/:id/stop', (c) => {
481481
if (readOnly) {
482-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
482+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
483483
}
484484
const id = c.req.param('id');
485485
const run = activeRuns.get(id ?? '');
@@ -570,7 +570,7 @@ export function registerEvalRoutes(
570570

571571
app.post('/api/projects/:projectId/eval/run', async (c) => {
572572
if (readOnly) {
573-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
573+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
574574
}
575575
const cwd = getCwd(c);
576576

@@ -605,7 +605,7 @@ export function registerEvalRoutes(
605605
const command = buildCliPreview(args);
606606
const runId = generateRunId();
607607

608-
const run: StudioRun = {
608+
const run: DashboardRun = {
609609
id: runId,
610610
status: 'starting',
611611
command,
@@ -668,7 +668,7 @@ export function registerEvalRoutes(
668668

669669
app.post('/api/projects/:projectId/eval/run/:id/stop', (c) => {
670670
if (readOnly) {
671-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
671+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
672672
}
673673
const id = c.req.param('id');
674674
const run = activeRuns.get(id ?? '');

apps/cli/src/commands/results/report-template.ts

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

apps/cli/src/commands/results/run-tags.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* { "tags": ["baseline", "v2-prompt"], "updated_at": "2026-04-10T00:00:00.000Z" }
1111
* ```
1212
*
13-
* Used by the Studio compare API so users can retroactively tag runs
13+
* Used by the Dashboard compare API so users can retroactively tag runs
1414
* without changing the eval YAML or the run manifest itself. This mirrors
1515
* the Langfuse / W&B / GitHub `tags` pattern — a mutable multi-valued
1616
* list of free-form labels that lives alongside the immutable run_id.

apps/cli/src/commands/results/serve.ts

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
/**
2-
* `agentv studio` / `agentv serve` — starts the AgentV Studio server, a React SPA for
2+
* `agentv dashboard` / `agentv serve` — starts the AgentV Dashboard server, a React SPA for
33
* reviewing evaluation results.
44
*
55
* The server uses Hono for routing and @hono/node-server to listen.
6-
* The Studio SPA is served from a pre-built dist directory.
6+
* The Dashboard SPA is served from a pre-built dist directory.
77
*
88
* API endpoints:
9-
* - GET / — Studio SPA (React app)
9+
* - GET / — Dashboard SPA (React app)
1010
* - GET /api/runs — list available run workspaces with metadata
1111
* - GET /api/runs/:filename — load results from a specific run workspace
1212
* - GET /api/runs/:filename/log — stream the captured console.log for a run
@@ -386,13 +386,13 @@ async function handleRuns(c: C, { searchDir, agentvDir }: DataContext) {
386386
avgScore = records.reduce((sum, r) => sum + r.score, 0) / records.length;
387387
} else {
388388
// Run is in-progress with 0 results written yet — fall back to the
389-
// in-memory target stored when the Studio launched this run.
389+
// in-memory target stored when the Dashboard launched this run.
390390
target = getActiveRunTarget(m.path);
391391
}
392392
} catch {
393393
// ignore enrichment errors
394394
}
395-
// Surface live status for Studio-launched runs that are still starting
395+
// Surface live status for Dashboard-launched runs that are still starting
396396
// or running so the RunList can render a spinner instead of the
397397
// pass/fail dot derived from a 0% pass rate.
398398
const liveStatus = getActiveRunStatus(m.path);
@@ -447,7 +447,7 @@ async function handleRunDetail(c: C, { searchDir }: DataContext) {
447447
try {
448448
const loaded = await loadManifestResultsForMeta(searchDir, meta);
449449
// Surface run_dir + suite_filter for local runs so the UI can launch a
450-
// Studio-side resume against this exact run. Remote runs live in the
450+
// Dashboard-side resume against this exact run. Remote runs live in the
451451
// results-repo cache and cannot be resumed in place, so omit both fields.
452452
const resumeMeta = meta.source === 'local' ? deriveResumeMeta(searchDir, meta.path) : {};
453453
const liveStatus = meta.source === 'local' ? getActiveRunStatus(meta.path) : undefined;
@@ -467,7 +467,7 @@ async function handleRunDetail(c: C, { searchDir }: DataContext) {
467467
* Compute `run_dir` (relative to cwd, snake_case) and `suite_filter` (the
468468
* eval file path stored in benchmark.json metadata) for a local run manifest.
469469
* Returns whatever fields could be resolved — both are best-effort and only
470-
* needed by the Studio "Resume run" / "Rerun failed" actions.
470+
* needed by the Dashboard "Resume run" / "Rerun failed" actions.
471471
*/
472472
function deriveResumeMeta(
473473
cwd: string,
@@ -1104,11 +1104,11 @@ export function createApp(
11041104
});
11051105
}
11061106

1107-
// ── Studio configuration ──────────────────────────────────────────────
1107+
// ── Dashboard configuration ──────────────────────────────────────────────
11081108

11091109
app.post('/api/config', async (c) => {
11101110
if (readOnly) {
1111-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1111+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
11121112
}
11131113
try {
11141114
const body = await c.req.json<Partial<StudioConfig>>();
@@ -1174,7 +1174,7 @@ export function createApp(
11741174

11751175
app.post('/api/projects', async (c) => {
11761176
if (readOnly) {
1177-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1177+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
11781178
}
11791179
try {
11801180
const body = await c.req.json<{ path: string }>();
@@ -1268,7 +1268,7 @@ export function createApp(
12681268

12691269
app.delete('/api/projects/:projectId', (c) => {
12701270
if (readOnly) {
1271-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1271+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
12721272
}
12731273
const removed = removeProject(c.req.param('projectId') ?? '');
12741274
if (!removed) return c.json({ error: 'Project not found' }, 404);
@@ -1289,13 +1289,13 @@ export function createApp(
12891289
app.get('/api/runs', (c) => handleRuns(c, defaultCtx));
12901290
app.put('/api/runs/:filename/tags', (c) => {
12911291
if (readOnly) {
1292-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1292+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
12931293
}
12941294
return handleRunTagsPut(c, defaultCtx);
12951295
});
12961296
app.delete('/api/runs/:filename/tags', (c) => {
12971297
if (readOnly) {
1298-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1298+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
12991299
}
13001300
return handleRunTagsDelete(c, defaultCtx);
13011301
});
@@ -1321,7 +1321,7 @@ export function createApp(
13211321

13221322
app.post('/api/feedback', async (c) => {
13231323
if (readOnly) {
1324-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1324+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
13251325
}
13261326
let body: unknown;
13271327
try {
@@ -1416,13 +1416,13 @@ export function createApp(
14161416
app.get('/api/projects/:projectId/runs', (c) => withProject(c, handleRuns));
14171417
app.put('/api/projects/:projectId/runs/:filename/tags', (c) => {
14181418
if (readOnly) {
1419-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1419+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
14201420
}
14211421
return withProject(c, handleRunTagsPut);
14221422
});
14231423
app.delete('/api/projects/:projectId/runs/:filename/tags', (c) => {
14241424
if (readOnly) {
1425-
return c.json({ error: 'Studio is running in read-only mode' }, 403);
1425+
return c.json({ error: 'Dashboard is running in read-only mode' }, 403);
14261426
}
14271427
return withProject(c, handleRunTagsDelete);
14281428
});
@@ -1465,11 +1465,13 @@ export function createApp(
14651465
{ readOnly },
14661466
);
14671467

1468-
// ── Static file serving for Studio SPA ────────────────────────────────
1468+
// ── Static file serving for Dashboard SPA ────────────────────────────────
14691469

14701470
const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
14711471
if (!studioDistPath || !existsSync(path.join(studioDistPath, 'index.html'))) {
1472-
throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
1472+
throw new Error(
1473+
'Dashboard dist not found. Run "bun run build" in apps/studio/ to build the SPA.',
1474+
);
14731475
}
14741476

14751477
app.get('/', (c) => {
@@ -1546,8 +1548,8 @@ function resolveStudioDistDir(): string | undefined {
15461548
// ── CLI command ──────────────────────────────────────────────────────────
15471549

15481550
export const resultsServeCommand = command({
1549-
name: 'studio',
1550-
description: 'Start AgentV Studio — a local dashboard for reviewing evaluation results',
1551+
name: 'dashboard',
1552+
description: 'Start AgentV Dashboard — a local dashboard for reviewing evaluation results',
15511553
args: {
15521554
source: positional({
15531555
type: optional(string),
@@ -1583,7 +1585,7 @@ export const resultsServeCommand = command({
15831585
}),
15841586
readOnly: flag({
15851587
long: 'read-only',
1586-
description: 'Disable write operations and launch Studio in read-only leaderboard mode',
1588+
description: 'Disable write operations and launch Dashboard in read-only leaderboard mode',
15871589
}),
15881590
},
15891591
handler: async ({ source, port, dir, single, add, remove, readOnly }) => {
@@ -1614,7 +1616,7 @@ export const resultsServeCommand = command({
16141616
}
16151617

16161618
// ── Version check ────────────────────────────────────────────────
1617-
// Enforce `required_version` from .agentv/config.yaml so Studio/serve
1619+
// Enforce `required_version` from .agentv/config.yaml so Dashboard/serve
16181620
// match `agentv eval` behavior. Same prompt in TTY, warn+continue
16191621
// otherwise. Single-project scope only — when one agentv instance
16201622
// serves multiple repos with differing version requirements, a

0 commit comments

Comments
 (0)