Skip to content

Commit b1ad27f

Browse files
authored
[codex] Hide resume actions for active eval runs (#1264)
* hide resume actions for active eval runs * sort resume helper imports * show stop action on active run detail * share studio run status UI
1 parent 790ad01 commit b1ad27f

11 files changed

Lines changed: 118 additions & 57 deletions

File tree

apps/cli/src/commands/results/serve.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,10 +435,12 @@ async function handleRunDetail(c: C, { searchDir }: DataContext) {
435435
// Studio-side resume against this exact run. Remote runs live in the
436436
// results-repo cache and cannot be resumed in place, so omit both fields.
437437
const resumeMeta = meta.source === 'local' ? deriveResumeMeta(searchDir, meta.path) : {};
438+
const liveStatus = meta.source === 'local' ? getActiveRunStatus(meta.path) : undefined;
438439
return c.json({
439440
results: stripHeavyFields(loaded),
440441
source: meta.source,
441442
source_label: meta.displayName,
443+
...(liveStatus && { status: liveStatus }),
442444
...resumeMeta,
443445
});
444446
} catch {

apps/studio/src/components/ResumeRunActions.tsx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
buildResumeRequestBody,
2828
shouldShowResumeActions,
2929
} from './resume-run-helpers';
30+
import type { RunStatus } from './stop-run-helpers';
3031

3132
export interface ResumeRunActionsProps {
3233
results: EvalResult[];
@@ -36,6 +37,7 @@ export interface ResumeRunActionsProps {
3637
projectId?: string;
3738
isReadOnly: boolean;
3839
plannedTestCount?: number;
40+
runStatus?: RunStatus;
3941
}
4042

4143
export function ResumeRunActions({
@@ -46,12 +48,13 @@ export function ResumeRunActions({
4648
projectId,
4749
isReadOnly,
4850
plannedTestCount,
51+
runStatus,
4952
}: ResumeRunActionsProps) {
5053
const navigate = useNavigate();
5154
const [busy, setBusy] = useState<ResumeMode | null>(null);
5255
const [error, setError] = useState<string | null>(null);
5356

54-
if (!shouldShowResumeActions(results, isReadOnly, plannedTestCount)) return null;
57+
if (!shouldShowResumeActions(results, isReadOnly, plannedTestCount, runStatus)) return null;
5558

5659
// Both actions need the run dir + the original eval file. Without those
5760
// we can't target the existing run workspace, so we render the buttons
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/**
2+
* RunStatusIndicator — shared live/terminal status badge for Studio-launched
3+
* eval runs. Used anywhere the UI needs the same colored status label and
4+
* active spinner so run/job views stay visually consistent.
5+
*/
6+
7+
import type { RunStatus } from './stop-run-helpers';
8+
9+
export interface RunStatusIndicatorProps {
10+
status: RunStatus;
11+
}
12+
13+
export function RunStatusIndicator({ status }: RunStatusIndicatorProps) {
14+
const isTerminal = status === 'finished' || status === 'failed';
15+
const statusColors: Record<string, string> = {
16+
starting: 'text-yellow-400',
17+
running: 'text-cyan-400',
18+
finished: 'text-emerald-400',
19+
failed: 'text-red-400',
20+
};
21+
const statusColor = statusColors[status] ?? 'text-gray-400';
22+
23+
return (
24+
<>
25+
<span className={`text-sm font-medium ${statusColor}`}>
26+
{status.charAt(0).toUpperCase() + status.slice(1)}
27+
</span>
28+
{!isTerminal && (
29+
<span className="inline-block h-3 w-3 animate-spin rounded-full border-2 border-cyan-400 border-t-transparent" />
30+
)}
31+
</>
32+
);
33+
}

apps/studio/src/components/StopRunButton.tsx

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
/**
2-
* StopRunButton — pause-style affordance on /jobs/:runId that interrupts
3-
* a Studio-launched eval. Stop is part of the stop → resume → complete
4-
* workflow, not a destructive cancel: the partial index.jsonl is
5-
* preserved and can be resumed in one click from the run-detail page.
2+
* StopRunButton — stop affordance on /jobs/:runId and active run detail
3+
* views that interrupts a Studio-launched eval. Stop is part of the
4+
* stop → resume → complete workflow, not a destructive cancel: the
5+
* partial index.jsonl is preserved and can be resumed in one click from
6+
* the run-detail page.
67
*
78
* Calls POST /api/eval/run/:id/stop (or the project-scoped variant).
89
* Optimistically flips the local label to "Stopping…" until the next
910
* poll of /api/eval/status/:id observes a terminal state — at which
1011
* point the button hides via `shouldShowStopButton`.
1112
*
1213
* Styling is intentionally neutral (gray, not red) to signal that this
13-
* is a pause, not a kill.
14+
* stops execution without deleting the partial run workspace.
1415
*/
1516

1617
import { useState } from 'react';
@@ -51,10 +52,17 @@ export function StopRunButton({ runId, status, isReadOnly, projectId }: StopRunB
5152
type="button"
5253
onClick={onClick}
5354
disabled={stopping}
54-
className="rounded-md border border-gray-700 bg-transparent px-3 py-1.5 text-sm font-medium text-gray-300 hover:bg-gray-800 disabled:cursor-not-allowed disabled:opacity-50"
55+
className="inline-flex items-center gap-2 rounded-md border border-gray-700 bg-transparent px-3 py-1.5 text-sm font-medium text-gray-300 hover:bg-gray-800 disabled:cursor-not-allowed disabled:opacity-50"
5556
data-testid="stop-run-button"
5657
>
57-
{stopping ? 'Stopping…' : '⏸ Stop'}
58+
{stopping ? (
59+
'Stopping…'
60+
) : (
61+
<>
62+
<span aria-hidden="true" className="inline-block h-2.5 w-2.5 rounded-[1px] bg-current" />
63+
Stop
64+
</>
65+
)}
5866
</button>
5967
{error && <p className="text-xs text-red-400">{error}</p>}
6068
</div>

apps/studio/src/components/resume-run-helpers.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ describe('shouldShowResumeActions', () => {
2525
expect(shouldShowResumeActions([ok('a'), errored('b')], false)).toBe(true);
2626
});
2727

28+
it('hides while the run is still active even if it looks incomplete', () => {
29+
expect(shouldShowResumeActions([ok('a')], false, 5, 'running')).toBe(false);
30+
expect(shouldShowResumeActions([errored('a')], false, undefined, 'starting')).toBe(false);
31+
});
32+
33+
it('shows once the run is terminal and resumable', () => {
34+
expect(shouldShowResumeActions([ok('a')], false, 5, 'failed')).toBe(true);
35+
expect(shouldShowResumeActions([errored('a')], false, undefined, 'finished')).toBe(true);
36+
});
37+
2838
it('hides in read-only mode even when execution errors are present', () => {
2939
expect(shouldShowResumeActions([errored('a')], true)).toBe(false);
3040
});

apps/studio/src/components/resume-run-helpers.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import type { EvalResult, RunEvalRequest } from '~/lib/types';
1313

14+
import { type RunStatus, isTerminalRunStatus } from './stop-run-helpers';
15+
1416
export type ResumeMode = 'resume' | 'rerun';
1517

1618
export interface BuildResumeRequestParams {
@@ -39,8 +41,10 @@ export function shouldShowResumeActions(
3941
results: EvalResult[],
4042
isReadOnly: boolean,
4143
plannedTestCount?: number,
44+
runStatus?: RunStatus,
4245
): boolean {
4346
if (isReadOnly) return false;
47+
if (runStatus && !isTerminalRunStatus(runStatus)) return false;
4448
if (results.some((r) => r.executionStatus === 'execution_error')) return true;
4549
if (plannedTestCount !== undefined && results.length < plannedTestCount) return true;
4650
return false;

apps/studio/src/lib/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ export interface RunDetailResponse {
8484
results: EvalResult[];
8585
source: 'local' | 'remote';
8686
source_label?: string;
87+
/** Live execution status when this run is still tracked in-memory by Studio. */
88+
status?: 'starting' | 'running' | 'finished' | 'failed';
8789
/** Path to the run workspace directory (relative to cwd when inside, otherwise absolute). Local runs only. */
8890
run_dir?: string;
8991
/** Eval file path the run was launched against, if recorded in benchmark.json. Local runs only. */

apps/studio/src/routes/jobs/$runId.tsx

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import { Link, createFileRoute } from '@tanstack/react-router';
1111

12+
import { RunStatusIndicator } from '~/components/RunStatusIndicator';
1213
import { StopRunButton } from '~/components/StopRunButton';
1314
import { useEvalRunStatus, useStudioConfig } from '~/lib/api';
1415

@@ -45,15 +46,6 @@ function JobDetailPage() {
4546

4647
const isTerminal = status.status === 'finished' || status.status === 'failed';
4748

48-
const statusColors: Record<string, string> = {
49-
starting: 'text-yellow-400',
50-
running: 'text-cyan-400',
51-
finished: 'text-emerald-400',
52-
failed: 'text-red-400',
53-
};
54-
55-
const statusColor = statusColors[status.status] ?? 'text-gray-400';
56-
5749
return (
5850
<div className="space-y-4">
5951
<BackLink />
@@ -79,12 +71,7 @@ function JobDetailPage() {
7971
</div>
8072
<div className="flex flex-shrink-0 items-center gap-3">
8173
<StopRunButton runId={runId} status={status.status} isReadOnly={isReadOnly} />
82-
<span className={`text-sm font-medium ${statusColor}`}>
83-
{status.status.charAt(0).toUpperCase() + status.status.slice(1)}
84-
</span>
85-
{!isTerminal && (
86-
<span className="inline-block h-3 w-3 animate-spin rounded-full border-2 border-cyan-400 border-t-transparent" />
87-
)}
74+
<RunStatusIndicator status={status.status} />
8875
</div>
8976
</div>
9077

apps/studio/src/routes/projects/$projectId_/jobs/$runId.tsx

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import { Link, createFileRoute } from '@tanstack/react-router';
66

7+
import { RunStatusIndicator } from '~/components/RunStatusIndicator';
78
import { StopRunButton } from '~/components/StopRunButton';
89
import { useEvalRunStatus, useStudioConfig } from '~/lib/api';
910

@@ -40,15 +41,6 @@ function ProjectJobDetailPage() {
4041

4142
const isTerminal = status.status === 'finished' || status.status === 'failed';
4243

43-
const statusColors: Record<string, string> = {
44-
starting: 'text-yellow-400',
45-
running: 'text-cyan-400',
46-
finished: 'text-emerald-400',
47-
failed: 'text-red-400',
48-
};
49-
50-
const statusColor = statusColors[status.status] ?? 'text-gray-400';
51-
5244
return (
5345
<div className="space-y-4">
5446
<BackLink projectId={projectId} />
@@ -78,12 +70,7 @@ function ProjectJobDetailPage() {
7870
isReadOnly={isReadOnly}
7971
projectId={projectId}
8072
/>
81-
<span className={`text-sm font-medium ${statusColor}`}>
82-
{status.status.charAt(0).toUpperCase() + status.status.slice(1)}
83-
</span>
84-
{!isTerminal && (
85-
<span className="inline-block h-3 w-3 animate-spin rounded-full border-2 border-cyan-400 border-t-transparent" />
86-
)}
73+
<RunStatusIndicator status={status.status} />
8774
</div>
8875
</div>
8976

apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import { useState } from 'react';
88
import { ResumeRunActions } from '~/components/ResumeRunActions';
99
import { RunDetail } from '~/components/RunDetail';
1010
import { RunEvalModal } from '~/components/RunEvalModal';
11+
import { RunStatusIndicator } from '~/components/RunStatusIndicator';
12+
import { StopRunButton } from '~/components/StopRunButton';
1113
import { useProjectRunDetail, useStudioConfig } from '~/lib/api';
1214

1315
export const Route = createFileRoute('/projects/$projectId_/runs/$runId')({
@@ -47,6 +49,8 @@ function ProjectRunDetailPage() {
4749
const experiment = firstResult?.experiment;
4850
const timestamp = firstResult?.timestamp;
4951
const prefill = target ? { target } : undefined;
52+
const runStatus = data?.status;
53+
const isActiveRun = runStatus === 'starting' || runStatus === 'running';
5054

5155
const heading = (() => {
5256
const parts = [experiment, target].filter((p) => p && p !== 'default');
@@ -70,16 +74,27 @@ function ProjectRunDetailPage() {
7074
<p className="mt-1 text-sm text-gray-500">{meta}</p>
7175
</div>
7276
<div className="flex items-center gap-3">
73-
<ResumeRunActions
74-
results={data?.results ?? []}
75-
runDir={data?.run_dir}
76-
suiteFilter={data?.suite_filter}
77-
target={target ?? undefined}
78-
projectId={projectId}
79-
isReadOnly={isReadOnly}
80-
plannedTestCount={data?.planned_test_count}
81-
/>
82-
{!isReadOnly && (
77+
{!isReadOnly && isActiveRun ? (
78+
<StopRunButton
79+
runId={runId}
80+
status={runStatus}
81+
isReadOnly={isReadOnly}
82+
projectId={projectId}
83+
/>
84+
) : (
85+
<ResumeRunActions
86+
results={data?.results ?? []}
87+
runDir={data?.run_dir}
88+
suiteFilter={data?.suite_filter}
89+
target={target ?? undefined}
90+
projectId={projectId}
91+
isReadOnly={isReadOnly}
92+
plannedTestCount={data?.planned_test_count}
93+
runStatus={runStatus}
94+
/>
95+
)}
96+
{runStatus && <RunStatusIndicator status={runStatus} />}
97+
{!isReadOnly && !isActiveRun && (
8398
<button
8499
type="button"
85100
onClick={() => setShowRunEval(true)}

0 commit comments

Comments
 (0)