Skip to content

Commit d8c16d2

Browse files
that-github-userunknownclaude
authored
Add --retry flag to re-run only failed or timed-out agents (#115)
Loads latest result, identifies failed/timed-out agents, creates new worktrees only for those, re-runs with same prompt, merges results back into original result set. Saves money by not re-running successful agents. Generated by thinktank Opus (5 agents, 1 completed just under timeout). Closes #77 Co-authored-by: unknown <that-github-user@github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f1c1423 commit d8c16d2

4 files changed

Lines changed: 371 additions & 12 deletions

File tree

src/cli.ts

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { compare } from "./commands/compare.js";
77
import { type ConfigAction, config } from "./commands/config.js";
88
import { evaluate } from "./commands/evaluate.js";
99
import { list } from "./commands/list.js";
10-
import { run } from "./commands/run.js";
10+
import { retry, run } from "./commands/run.js";
1111
import { stats } from "./commands/stats.js";
1212
import { undo } from "./commands/undo.js";
1313
import { loadConfig } from "./utils/config.js";
@@ -48,15 +48,8 @@ program
4848
.option("--no-color", "Disable colored output")
4949
.option("--output-format <format>", "Output format: text (default) or json", "text")
5050
.option("--verbose", "Show detailed output from each agent")
51+
.option("--retry", "Re-run only failed/timed-out agents from the last run")
5152
.action(async (promptArg: string | undefined, opts) => {
52-
const prompt = resolvePrompt(promptArg, opts.file);
53-
54-
const attempts = parseInt(opts.attempts, 10);
55-
if (Number.isNaN(attempts) || attempts < 1 || attempts > 20) {
56-
console.error("Error: --attempts must be a number between 1 and 20");
57-
process.exit(1);
58-
}
59-
6053
const testTimeout = parseInt(opts.testTimeout, 10);
6154
if (Number.isNaN(testTimeout) || testTimeout < 10 || testTimeout > 600) {
6255
console.error("Error: --test-timeout must be a number between 10 and 600 seconds");
@@ -92,6 +85,33 @@ program
9285
process.exit(1);
9386
}
9487

88+
// --retry: re-run only failed agents from last run, ignore --attempts and prompt
89+
if (opts.retry) {
90+
await retry({
91+
prompt: "", // ignored — loaded from previous result
92+
attempts: 0, // ignored — determined by failed agent count
93+
testCmd: opts.testCmd,
94+
testTimeout,
95+
timeout,
96+
model: opts.model,
97+
threshold,
98+
runner: opts.runner,
99+
scoring: opts.scoring,
100+
verbose: opts.verbose ?? false,
101+
outputFormat: opts.outputFormat,
102+
retry: true,
103+
});
104+
return;
105+
}
106+
107+
const prompt = resolvePrompt(promptArg, opts.file);
108+
109+
const attempts = parseInt(opts.attempts, 10);
110+
if (Number.isNaN(attempts) || attempts < 1 || attempts > 20) {
111+
console.error("Error: --attempts must be a number between 1 and 20");
112+
process.exit(1);
113+
}
114+
95115
const knownModels = ["sonnet", "opus", "haiku"];
96116
if (!knownModels.includes(opts.model) && !opts.model.startsWith("claude-")) {
97117
console.warn(

src/commands/run.test.ts

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import assert from "node:assert/strict";
22
import { afterEach, describe, it } from "node:test";
3-
import type { RunOptions } from "../types.js";
4-
import { makeResultFilename, preflightValidation } from "./run.js";
3+
import type { AgentResult, EnsembleResult, RunOptions } from "../types.js";
4+
import {
5+
findFailedAgents,
6+
makeResultFilename,
7+
mergeRetryResults,
8+
preflightValidation,
9+
} from "./run.js";
510

611
function makeOpts(overrides: Partial<RunOptions> = {}): RunOptions {
712
return {
@@ -117,3 +122,135 @@ describe("NO_COLOR environment variable", () => {
117122
assert.equal(colors.bold("test"), "test");
118123
});
119124
});
125+
126+
function makeAgent(overrides: Partial<AgentResult> = {}): AgentResult {
127+
return {
128+
id: 1,
129+
worktree: "/tmp/thinktank-agent-1",
130+
status: "success",
131+
exitCode: 0,
132+
duration: 5000,
133+
output: "",
134+
diff: "diff --git a/file.ts b/file.ts\n+added line",
135+
filesChanged: ["file.ts"],
136+
linesAdded: 1,
137+
linesRemoved: 0,
138+
...overrides,
139+
};
140+
}
141+
142+
function makeResult(overrides: Partial<EnsembleResult> = {}): EnsembleResult {
143+
return {
144+
prompt: "fix the bug",
145+
model: "sonnet",
146+
timestamp: "2026-03-28T10:00:00.000Z",
147+
scoring: "copeland",
148+
agents: [
149+
makeAgent({ id: 1, status: "success" }),
150+
makeAgent({ id: 2, status: "error", exitCode: 1, diff: "", filesChanged: [] }),
151+
makeAgent({ id: 3, status: "timeout", exitCode: 1, diff: "", filesChanged: [] }),
152+
],
153+
tests: [],
154+
convergence: [],
155+
recommended: 1,
156+
scores: [],
157+
...overrides,
158+
};
159+
}
160+
161+
describe("findFailedAgents", () => {
162+
it("returns agents with error status", () => {
163+
const result = makeResult();
164+
const failed = findFailedAgents(result);
165+
const ids = failed.map((a) => a.id);
166+
assert.ok(ids.includes(2));
167+
});
168+
169+
it("returns agents with timeout status", () => {
170+
const result = makeResult();
171+
const failed = findFailedAgents(result);
172+
const ids = failed.map((a) => a.id);
173+
assert.ok(ids.includes(3));
174+
});
175+
176+
it("does not return successful agents", () => {
177+
const result = makeResult();
178+
const failed = findFailedAgents(result);
179+
const ids = failed.map((a) => a.id);
180+
assert.ok(!ids.includes(1));
181+
});
182+
183+
it("returns empty array when all agents succeeded", () => {
184+
const result = makeResult({
185+
agents: [makeAgent({ id: 1, status: "success" }), makeAgent({ id: 2, status: "success" })],
186+
});
187+
const failed = findFailedAgents(result);
188+
assert.equal(failed.length, 0);
189+
});
190+
191+
it("returns all agents when all failed", () => {
192+
const result = makeResult({
193+
agents: [
194+
makeAgent({ id: 1, status: "error" }),
195+
makeAgent({ id: 2, status: "timeout" }),
196+
makeAgent({ id: 3, status: "error" }),
197+
],
198+
});
199+
const failed = findFailedAgents(result);
200+
assert.equal(failed.length, 3);
201+
});
202+
});
203+
204+
describe("mergeRetryResults", () => {
205+
it("replaces failed agents with retried results", () => {
206+
const original = makeResult();
207+
const retried = [
208+
makeAgent({ id: 2, status: "success", diff: "new diff for 2", filesChanged: ["a.ts"] }),
209+
makeAgent({ id: 3, status: "success", diff: "new diff for 3", filesChanged: ["b.ts"] }),
210+
];
211+
212+
const merged = mergeRetryResults(original, retried);
213+
214+
assert.equal(merged.length, 3);
215+
assert.equal(merged[0].id, 1);
216+
assert.equal(merged[0].status, "success");
217+
assert.equal(merged[1].id, 2);
218+
assert.equal(merged[1].status, "success");
219+
assert.equal(merged[1].diff, "new diff for 2");
220+
assert.equal(merged[2].id, 3);
221+
assert.equal(merged[2].status, "success");
222+
assert.equal(merged[2].diff, "new diff for 3");
223+
});
224+
225+
it("preserves successful agents unchanged", () => {
226+
const original = makeResult();
227+
const retried = [makeAgent({ id: 2, status: "success" })];
228+
229+
const merged = mergeRetryResults(original, retried);
230+
231+
assert.equal(merged[0].id, 1);
232+
assert.equal(merged[0].status, "success");
233+
assert.equal(merged[0].diff, original.agents[0].diff);
234+
});
235+
236+
it("handles retry where agent still fails", () => {
237+
const original = makeResult();
238+
const retried = [makeAgent({ id: 2, status: "error", diff: "" })];
239+
240+
const merged = mergeRetryResults(original, retried);
241+
242+
assert.equal(merged[1].id, 2);
243+
assert.equal(merged[1].status, "error");
244+
});
245+
246+
it("returns same count as original agents", () => {
247+
const original = makeResult();
248+
const retried = [
249+
makeAgent({ id: 2, status: "success" }),
250+
makeAgent({ id: 3, status: "success" }),
251+
];
252+
253+
const merged = mergeRetryResults(original, retried);
254+
assert.equal(merged.length, original.agents.length);
255+
});
256+
});

0 commit comments

Comments
 (0)