Skip to content

Commit 6078585

Browse files
that-github-userunknownclaude
authored
Add --filter flags to stats command (model, date range, test status) (#124)
--model, --since, --until, --passed-only filter runs before aggregating. 9 new tests for filtering logic. Generated by thinktank Opus (5 agents, ALL pass, 71% convergence). Closes #78 Co-authored-by: unknown <that-github-user@github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6fe013c commit 6078585

3 files changed

Lines changed: 172 additions & 5 deletions

File tree

src/cli.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,17 @@ program
194194
program
195195
.command("stats")
196196
.description("Show aggregate statistics across all thinktank runs")
197-
.action(async () => {
198-
await stats();
197+
.option("--model <name>", "Filter to runs using the specified model")
198+
.option("--since <date>", "Show only runs from this date onward (ISO 8601)")
199+
.option("--until <date>", "Show only runs up to this date (ISO 8601)")
200+
.option("--passed-only", "Show only runs where at least one agent passed tests")
201+
.action(async (opts) => {
202+
await stats({
203+
model: opts.model,
204+
since: opts.since,
205+
until: opts.until,
206+
passedOnly: opts.passedOnly,
207+
});
199208
});
200209

201210
program

src/commands/stats.test.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import assert from "node:assert/strict";
2+
import { describe, it } from "node:test";
3+
import type { EnsembleResult } from "../types.js";
4+
import { filterResults } from "./stats.js";
5+
6+
function makeResult(overrides: Partial<EnsembleResult> = {}): EnsembleResult {
7+
return {
8+
prompt: "test prompt",
9+
model: "sonnet",
10+
timestamp: "2026-03-15T10:00:00Z",
11+
scoring: "copeland",
12+
agents: [
13+
{
14+
id: 1,
15+
worktree: "/tmp/w1",
16+
status: "success",
17+
exitCode: 0,
18+
duration: 5000,
19+
output: "",
20+
diff: "diff",
21+
filesChanged: ["a.ts"],
22+
linesAdded: 10,
23+
linesRemoved: 2,
24+
},
25+
],
26+
tests: [{ agentId: 1, passed: true, output: "", exitCode: 0 }],
27+
convergence: [{ agents: [1], similarity: 0.9, filesChanged: ["a.ts"], description: "group" }],
28+
recommended: 1,
29+
scores: [],
30+
...overrides,
31+
};
32+
}
33+
34+
describe("filterResults", () => {
35+
const results: EnsembleResult[] = [
36+
makeResult({ model: "sonnet", timestamp: "2026-03-10T10:00:00Z" }),
37+
makeResult({
38+
model: "opus",
39+
timestamp: "2026-03-15T10:00:00Z",
40+
tests: [{ agentId: 1, passed: false, output: "", exitCode: 1 }],
41+
}),
42+
makeResult({ model: "sonnet", timestamp: "2026-03-20T10:00:00Z" }),
43+
];
44+
45+
it("returns all results with no filters", () => {
46+
const filtered = filterResults(results, {});
47+
assert.equal(filtered.length, 3);
48+
});
49+
50+
it("filters by model name (case-insensitive substring)", () => {
51+
const filtered = filterResults(results, { model: "Opus" });
52+
assert.equal(filtered.length, 1);
53+
assert.equal(filtered[0].model, "opus");
54+
});
55+
56+
it("filters by --since date", () => {
57+
const filtered = filterResults(results, { since: "2026-03-12" });
58+
assert.equal(filtered.length, 2);
59+
});
60+
61+
it("filters by --until date", () => {
62+
const filtered = filterResults(results, { until: "2026-03-15T10:00:00Z" });
63+
assert.equal(filtered.length, 2);
64+
});
65+
66+
it("filters by --since and --until together", () => {
67+
const filtered = filterResults(results, {
68+
since: "2026-03-12",
69+
until: "2026-03-18",
70+
});
71+
assert.equal(filtered.length, 1);
72+
assert.equal(filtered[0].timestamp, "2026-03-15T10:00:00Z");
73+
});
74+
75+
it("filters by --passed-only", () => {
76+
const filtered = filterResults(results, { passedOnly: true });
77+
assert.equal(filtered.length, 2);
78+
for (const r of filtered) {
79+
assert.ok(r.tests.some((t) => t.passed));
80+
}
81+
});
82+
83+
it("combines model and passed-only filters", () => {
84+
const filtered = filterResults(results, { model: "opus", passedOnly: true });
85+
assert.equal(filtered.length, 0);
86+
});
87+
88+
it("combines model and date filters", () => {
89+
const filtered = filterResults(results, {
90+
model: "sonnet",
91+
since: "2026-03-15",
92+
});
93+
assert.equal(filtered.length, 1);
94+
assert.equal(filtered[0].timestamp, "2026-03-20T10:00:00Z");
95+
});
96+
97+
it("returns empty array when no results match", () => {
98+
const filtered = filterResults(results, { model: "haiku" });
99+
assert.equal(filtered.length, 0);
100+
});
101+
});

src/commands/stats.ts

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,47 @@ import { join } from "node:path";
33
import pc from "picocolors";
44
import type { EnsembleResult } from "../types.js";
55

6-
export async function stats(): Promise<void> {
6+
export interface StatsOptions {
7+
model?: string;
8+
since?: string;
9+
until?: string;
10+
passedOnly?: boolean;
11+
}
12+
13+
export function filterResults(results: EnsembleResult[], opts: StatsOptions): EnsembleResult[] {
14+
let filtered = results;
15+
16+
if (opts.model) {
17+
const model = opts.model.toLowerCase();
18+
filtered = filtered.filter((r) => r.model.toLowerCase().includes(model));
19+
}
20+
21+
if (opts.since) {
22+
const since = new Date(opts.since);
23+
if (Number.isNaN(since.getTime())) {
24+
console.log(pc.red(` Invalid --since date: ${opts.since}`));
25+
process.exit(1);
26+
}
27+
filtered = filtered.filter((r) => new Date(r.timestamp) >= since);
28+
}
29+
30+
if (opts.until) {
31+
const until = new Date(opts.until);
32+
if (Number.isNaN(until.getTime())) {
33+
console.log(pc.red(` Invalid --until date: ${opts.until}`));
34+
process.exit(1);
35+
}
36+
filtered = filtered.filter((r) => new Date(r.timestamp) <= until);
37+
}
38+
39+
if (opts.passedOnly) {
40+
filtered = filtered.filter((r) => r.tests.some((t) => t.passed));
41+
}
42+
43+
return filtered;
44+
}
45+
46+
export async function stats(opts: StatsOptions = {}): Promise<void> {
747
let files: string[];
848
try {
949
const entries = await readdir(".thinktank");
@@ -18,16 +58,23 @@ export async function stats(): Promise<void> {
1858
return;
1959
}
2060

21-
const results: EnsembleResult[] = [];
61+
const allResults: EnsembleResult[] = [];
2262
for (const file of files) {
2363
try {
2464
const raw = await readFile(join(".thinktank", file), "utf-8");
25-
results.push(JSON.parse(raw) as EnsembleResult);
65+
allResults.push(JSON.parse(raw) as EnsembleResult);
2666
} catch {
2767
// skip malformed files
2868
}
2969
}
3070

71+
const results = filterResults(allResults, opts);
72+
73+
if (results.length === 0) {
74+
console.log(pc.yellow(" No runs match the specified filters."));
75+
return;
76+
}
77+
3178
const totalRuns = results.length;
3279
const avgAgents = results.reduce((sum, r) => sum + r.agents.length, 0) / totalRuns;
3380

@@ -43,9 +90,19 @@ export async function stats(): Promise<void> {
4390
? testPassRates.reduce((sum, r) => sum + r, 0) / testPassRates.length
4491
: null;
4592

93+
const hasFilters = opts.model || opts.since || opts.until || opts.passedOnly;
94+
4695
console.log();
4796
console.log(pc.bold(" thinktank stats"));
4897
console.log(pc.dim(" ─────────────────────────────"));
98+
if (hasFilters) {
99+
const parts: string[] = [];
100+
if (opts.model) parts.push(`model=${pc.cyan(opts.model)}`);
101+
if (opts.since) parts.push(`since=${pc.cyan(opts.since)}`);
102+
if (opts.until) parts.push(`until=${pc.cyan(opts.until)}`);
103+
if (opts.passedOnly) parts.push(pc.cyan("passed-only"));
104+
console.log(` Filters: ${parts.join(pc.dim(", "))}`);
105+
}
49106
console.log(` Total runs: ${pc.cyan(String(totalRuns))}`);
50107
console.log(` Avg agents/run: ${pc.cyan(avgAgents.toFixed(1))}`);
51108
console.log(` Avg convergence: ${pc.cyan((avgConvergence * 100).toFixed(1) + "%")}`);

0 commit comments

Comments
 (0)