Skip to content

Commit 406db90

Browse files
miguelg719chromiebotclaude
authored
Evals TUI tree traversal (#2100)
# why # what changed # test plan <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Unifies the Evals TUI/CLI behind a shared command tree with path-style traversal and contextual help. Supports Linear STG-1880 and fixes root fallback parsing by stripping a leading `evals` sigil to improve onboarding and command discovery. - **New Features** - Single `buildCommandTree` powers both REPL and argv dispatch. - `>` works as a path separator in REPL and argv; REPL preserves quoted `>`, argv re-splits any arg containing `>`. - Context navigation and prompt: `..` goes up; bare `evals` jumps to root; invoking a node with children auto-enters that context in REPL; prompt shows the current path; banner mentions `..`. - Help routing: `--help` after a leaf calls that node’s help; `help` shows help for the current context. - Root fallback: unknown tokens at root call `run` and now strip a leading `evals` before forwarding (e.g., `evals act`); unknown at depth errors for clarity. - REPL quality of life: Esc pops context when idle; Esc still aborts in-flight runs (cooperative/double-press aggressive). - **Refactors** - `cli.ts` and `repl.ts` now resolve and execute via `dispatch` from the shared command tree; added `tokenizeArgv` for argv `>` splitting and `renderPrompt` for context-aware prompts. - Moved line tokenization to `tui/tokenize.ts`; added `tui/commandTree.ts` with resolution, prompt rendering, and argv tokenization. - Kept command handlers lazy-loaded; added focused tests for the tree, `>` chaining, argv sigil handling, and CLI edge cases. <sup>Written for commit 2f5c929. Summary will update on new commits.</sup> <!-- End of auto-generated description by cubic. --> --------- Co-authored-by: Chromie <miguel@browserbase.com> Co-authored-by: Claude <noreply@anthropic.com>
1 parent ffb9b20 commit 406db90

7 files changed

Lines changed: 1244 additions & 248 deletions

File tree

packages/evals/cli.ts

Lines changed: 22 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ await (async () => {
5050

5151
import { red } from "./tui/format.js";
5252
import { getCurrentDirPath, getRuntimeTasksRoot } from "./runtimePaths.js";
53+
import type { TaskRegistry } from "./framework/types.js";
5354

5455
/**
5556
* Directory of the running entry module. Differs between source and
@@ -60,13 +61,6 @@ const ENTRY_DIR = getCurrentDirPath();
6061
const args = process.argv.slice(2);
6162

6263
(async () => {
63-
// Keep heavy command modules behind their command branches. The run stack
64-
// imports Braintrust transitively, and importing it for `help`/`config path`
65-
// makes quiet commands print optional OpenTelemetry warnings.
66-
const { printHelp, printRunHelp, printListHelp, printNewHelp } = await import(
67-
"./tui/commands/help.js"
68-
);
69-
7064
// Best-effort shutdown: flush Braintrust telemetry and exit with the
7165
// conventional signal code. Does not guarantee in-flight task
7266
// cancellation upstream; the goal is clean process shutdown with no
@@ -123,110 +117,38 @@ const args = process.argv.slice(2);
123117
};
124118
}
125119

126-
async function executeRun(tokens: string[]): Promise<void> {
127-
const { readConfig } = await import("./tui/commands/config.js");
128-
const { runCommand } = await import("./tui/commands/run.js");
129-
const { parseRunArgs, resolveRunOptions } = await import(
130-
"./tui/commands/parse.js"
131-
);
132-
const flags = parseRunArgs(tokens);
133-
const configFile = readConfig(ENTRY_DIR);
134-
const resolved = resolveRunOptions(
135-
flags,
136-
configFile.defaults,
137-
process.env,
138-
configFile.core,
139-
);
140-
141-
if (flags.legacy) {
142-
const { runLegacy } = await import("./tui/commands/legacy.js");
143-
const { discoverTasks } = await import("./framework/discovery.js");
144-
const registry = await discoverTasks(getRuntimeTasksRoot(), false);
145-
await runLegacy(resolved, flags, registry);
146-
return; // unreachable — runLegacy calls process.exit
147-
}
148-
149-
await runCommand(resolved);
150-
}
151-
152120
try {
153121
if (args.length === 0) {
154122
const { startRepl } = await import("./tui/repl.js");
155123
await startRepl(ENTRY_DIR);
156124
return;
157125
}
158126

159-
const command = args[0].toLowerCase();
160-
const subArgs = args.slice(1);
161-
// Help is only triggered when `--help`/`-h`/`help` sits immediately
162-
// after the command. Later positions are arguments or flag values and
163-
// must not be swallowed (e.g. `evals run act --help` would otherwise
164-
// print run help instead of erroring on the unknown `--help` flag).
165-
const wantsHelp =
166-
subArgs[0] === "--help" || subArgs[0] === "-h" || subArgs[0] === "help";
167-
168-
switch (command) {
169-
case "run": {
170-
if (wantsHelp) {
171-
printRunHelp();
172-
return;
173-
}
174-
await executeRun(subArgs);
175-
return;
176-
}
127+
const { buildCommandTree, dispatch, tokenizeArgv } = await import(
128+
"./tui/commandTree.js"
129+
);
177130

178-
case "list": {
179-
if (wantsHelp) {
180-
printListHelp();
181-
return;
182-
}
183-
const detailed =
184-
subArgs.includes("--detailed") || subArgs.includes("-d");
185-
const tierFilter = subArgs.find((a) => !a.startsWith("-"));
186-
const tasksRoot = getRuntimeTasksRoot();
131+
let registry: TaskRegistry | null = null;
132+
const getRegistry = async (): Promise<TaskRegistry> => {
133+
if (!registry) {
187134
const { discoverTasks } = await import("./framework/discovery.js");
188-
const { printList } = await import("./tui/commands/list.js");
189-
const registry = await discoverTasks(tasksRoot, false);
190-
printList(registry, tierFilter, detailed);
191-
return;
135+
registry = await discoverTasks(getRuntimeTasksRoot(), false);
192136
}
137+
return registry;
138+
};
193139

194-
case "config": {
195-
const { handleConfig } = await import("./tui/commands/config.js");
196-
await handleConfig(subArgs, ENTRY_DIR);
197-
return;
198-
}
199-
200-
case "experiments": {
201-
const { handleExperiments } = await import(
202-
"./tui/commands/experiments.js"
203-
);
204-
await handleExperiments(subArgs);
205-
return;
206-
}
207-
208-
case "new": {
209-
if (wantsHelp) {
210-
printNewHelp();
211-
return;
212-
}
213-
const { scaffoldTask } = await import("./tui/commands/new.js");
214-
scaffoldTask(subArgs);
215-
return;
216-
}
217-
218-
case "help":
219-
case "--help":
220-
case "-h":
221-
printHelp();
222-
return;
223-
224-
default: {
225-
// Unknown first arg → treat as run target: `evals act` == `evals run act`
226-
await executeRun(args);
227-
return;
228-
}
229-
}
140+
const tree = buildCommandTree();
141+
142+
const tokens = tokenizeArgv(args);
143+
await dispatch(tree, tokens, {
144+
entryDir: ENTRY_DIR,
145+
getRegistry,
146+
setRegistry: (r) => {
147+
registry = r;
148+
},
149+
abortRef: null,
150+
contextPath: null,
151+
});
230152
} catch (err) {
231153
console.error(red(`Error: ${(err as Error).message}`));
232154
process.exitCode = 1;

packages/evals/tests/cli.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,29 @@ describe("CLI entrypoint", () => {
239239
path.join(repoRoot, "packages", "evals", "evals.config.json"),
240240
);
241241
});
242+
243+
it("treats `>` as equivalent to a space separator (argv form)", async () => {
244+
const direct = await runCli(["config", "path"]);
245+
const piped = await runCli(["config", ">", "path"]);
246+
expect(piped.code).toBe(0);
247+
expect(piped.stdout).toBe(direct.stdout);
248+
});
249+
250+
it("supports `>` chaining across multiple levels", async () => {
251+
const direct = await runCli(["config", "core", "path"]);
252+
const piped = await runCli(["config", ">", "core", ">", "path"]);
253+
expect(piped.code).toBe(0);
254+
expect(piped.stdout).toBe(direct.stdout);
255+
});
256+
257+
it("strips a leading `evals` sigil token (no-op at root)", async () => {
258+
// From a shell, `evals evals run act --dry-run` should resolve like
259+
// `evals run act --dry-run` — the leading `evals` arg is the sigil.
260+
const { stdout, code } = await runCli(["evals", "run", "act", "--dry-run"]);
261+
expect(code).toBe(0);
262+
const payload = JSON.parse(stdout);
263+
expect(payload.normalizedTarget).toBe("act");
264+
});
242265
});
243266

244267
describe.sequential("core config", () => {

0 commit comments

Comments
 (0)