-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathcommand.tsx
More file actions
129 lines (118 loc) · 4.93 KB
/
command.tsx
File metadata and controls
129 lines (118 loc) · 4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import { getErrorMessage } from '../../errors';
import { handleRunEval } from '../../operations/eval';
import type { RunEvalOptions } from '../../operations/eval';
import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
import { requireProject } from '../../tui/guards';
import type { Command } from '@commander-js/extra-typings';
import { Text, render } from 'ink';
import React from 'react';
function formatRunOutput(result: Awaited<ReturnType<typeof handleRunEval>>): void {
if (!result.run) return;
const { run } = result;
const date = new Date(run.timestamp).toLocaleString([], {
year: 'numeric',
month: 'short',
day: 'numeric',
hour: '2-digit',
minute: '2-digit',
});
console.log(`\nAgent: ${run.agent} | ${date} | Sessions: ${run.sessionCount} | Lookback: ${run.lookbackDays}d\n`);
for (const r of run.results) {
const score = r.aggregateScore.toFixed(2);
const errors = r.sessionScores.filter(s => s.errorMessage).length;
const errorSuffix = errors > 0 ? ` (${errors} errors)` : '';
console.log(` ${r.evaluator}: ${score}${errorSuffix}`);
}
if (result.filePath) {
console.log(`\nResults saved to: ${result.filePath}`);
}
}
export const registerRun = (program: Command) => {
const runCmd = program.command('run').description(COMMAND_DESCRIPTIONS.run);
runCmd
.command('evals')
.description('Run on-demand evaluation of agent traces. Use --agent-arn to evaluate agents outside the project.')
.option('-a, --agent <name>', 'Agent name from project config')
.option('--agent-arn <arn>', 'Agent runtime ARN — run outside a project directory')
.option('-e, --evaluator <names...>', 'Evaluator name(s) from project or Builtin.* IDs')
.option('--evaluator-arn <arns...>', 'Evaluator ARN(s) — use with --agent-arn for standalone mode')
.option('--region <region>', 'AWS region (required with --agent-arn, auto-detected otherwise)')
.option('-s, --session-id <id>', 'Evaluate a specific session only')
.option('-t, --trace-id <id>', 'Evaluate a specific trace only')
.option('--days <days>', 'Lookback window in days', '7')
.option(
'--custom-service-name <name>',
'Custom service name for external agents — filters by service.name instead of cloud.resource_id'
)
.option('--custom-log-group-name <name>', 'Custom CloudWatch log group name for external agents')
.option('--input-path <path>', 'Path to a local trace file or directory — skips CloudWatch discovery')
.option('--output <path>', 'Custom output file path for results')
.option('--json', 'Output as JSON')
.action(
async (cliOptions: {
agent?: string;
agentArn?: string;
evaluator?: string[];
evaluatorArn?: string[];
region?: string;
sessionId?: string;
traceId?: string;
days: string;
customServiceName?: string;
customLogGroupName?: string;
inputPath?: string;
output?: string;
json?: boolean;
}) => {
const isArnMode = !!(cliOptions.agentArn && cliOptions.evaluatorArn);
const isCustomMode = !!cliOptions.customServiceName;
const isInputMode = !!cliOptions.inputPath;
if (!isArnMode && !isCustomMode && !isInputMode) {
requireProject();
}
if (!cliOptions.evaluator && !cliOptions.evaluatorArn) {
const error = 'At least one --evaluator or --evaluator-arn is required';
if (cliOptions.json) {
console.log(JSON.stringify({ success: false, error }));
} else {
render(<Text color="red">{error}</Text>);
}
process.exit(1);
}
const options: RunEvalOptions = {
agent: cliOptions.agent,
agentArn: cliOptions.agentArn,
evaluator: cliOptions.evaluator ?? [],
evaluatorArn: cliOptions.evaluatorArn,
region: cliOptions.region,
sessionId: cliOptions.sessionId,
traceId: cliOptions.traceId,
customServiceName: cliOptions.customServiceName,
customLogGroupName: cliOptions.customLogGroupName,
inputPath: cliOptions.inputPath,
days: parseInt(cliOptions.days, 10),
output: cliOptions.output,
json: cliOptions.json,
};
try {
const result = await handleRunEval(options);
if (cliOptions.json) {
console.log(JSON.stringify(result));
} else if (result.success) {
formatRunOutput(result);
} else {
formatRunOutput(result);
render(<Text color="red">{result.error}</Text>);
}
process.exit(result.success ? 0 : 1);
} catch (error) {
if (cliOptions.json) {
console.log(JSON.stringify({ success: false, error: getErrorMessage(error) }));
} else {
render(<Text color="red">Error: {getErrorMessage(error)}</Text>);
}
process.exit(1);
}
}
);
};