Skip to content

Commit 408801a

Browse files
authored
feat: rename run evals command to run eval (#636)
* feat: rename `run evals` command to `run eval` * chore: fix formatting in README.md
1 parent 8a1af21 commit 408801a

8 files changed

Lines changed: 20 additions & 20 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Note: CDK L3 constructs are in a separate package `@aws/agentcore-cdk`.
3030
- `status` - Check deployment status
3131
- `dev` - Local development server (CodeZip: uvicorn with hot-reload; Container: Docker build + run with volume mount)
3232
- `invoke` - Invoke agents (local or deployed)
33-
- `run evals` - Run on-demand evaluation against agent sessions
33+
- `run eval` - Run on-demand evaluation against agent sessions
3434
- `evals history` - View past eval run results
3535
- `pause online-eval` - Pause (disable) a deployed online eval config
3636
- `resume online-eval` - Resume (enable) a paused online eval config

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ agentcore invoke
9898
| -------------------- | --------------------------------------------- |
9999
| `add evaluator` | Add a custom LLM-as-a-Judge evaluator |
100100
| `add online-eval` | Add continuous evaluation for live traffic |
101-
| `run evals` | Run on-demand evaluation against agent traces |
101+
| `run eval` | Run on-demand evaluation against agent traces |
102102
| `evals history` | View past eval run results |
103103
| `pause online-eval` | Pause a deployed online eval config |
104104
| `resume online-eval` | Resume a paused online eval config |

docs/commands.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -586,16 +586,16 @@ agentcore traces get abc123 --agent MyAgent --output ./trace.json
586586

587587
See [Evaluations](evals.md) for the full guide on evaluators, scoring, and online monitoring.
588588

589-
### run evals
589+
### run eval
590590

591591
Run on-demand evaluation against historical agent traces.
592592

593593
```bash
594594
# Project mode
595-
agentcore run evals --agent MyAgent --evaluator ResponseQuality --days 7
595+
agentcore run eval --agent MyAgent --evaluator ResponseQuality --days 7
596596

597597
# Standalone mode (no project required)
598-
agentcore run evals \
598+
agentcore run eval \
599599
--agent-arn arn:aws:...:runtime/abc123 \
600600
--evaluator-arn arn:aws:...:evaluator/eval123 \
601601
--region us-east-1

docs/evals.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -149,25 +149,25 @@ Run evaluators against historical agent traces.
149149

150150
```bash
151151
# Project mode — evaluate a project agent
152-
agentcore run evals \
152+
agentcore run eval \
153153
--agent MyAgent \
154154
--evaluator ResponseQuality \
155155
--days 7
156156

157157
# Standalone mode — evaluate any agent by ARN
158-
agentcore run evals \
158+
agentcore run eval \
159159
--agent-arn arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/abc123 \
160160
--evaluator-arn arn:aws:bedrock-agentcore:us-east-1:123456789012:evaluator/eval123 \
161161
--region us-east-1
162162

163163
# Multiple evaluators
164-
agentcore run evals \
164+
agentcore run eval \
165165
--agent MyAgent \
166166
--evaluator ResponseQuality Builtin.Faithfulness \
167167
--days 14
168168

169169
# Target specific session or trace
170-
agentcore run evals \
170+
agentcore run eval \
171171
--agent MyAgent \
172172
--evaluator ResponseQuality \
173173
--session-id abc123 \
@@ -359,7 +359,7 @@ AgentCore provides pre-built evaluators that can be used without creating custom
359359
by their `Builtin.*` ID in `--evaluator` flags or in online eval config `evaluators` arrays.
360360

361361
```bash
362-
agentcore run evals --agent MyAgent --evaluator Builtin.Faithfulness
362+
agentcore run eval --agent MyAgent --evaluator Builtin.Faithfulness
363363
```
364364

365365
---
@@ -369,8 +369,8 @@ agentcore run evals --agent MyAgent --evaluator Builtin.Faithfulness
369369
### CI/CD Quality Gate
370370

371371
```bash
372-
# Run evals and fail pipeline if score < threshold
373-
result=$(agentcore run evals --agent MyAgent --evaluator ResponseQuality --days 1 --json)
372+
# Run eval and fail pipeline if score < threshold
373+
result=$(agentcore run eval --agent MyAgent --evaluator ResponseQuality --days 1 --json)
374374
score=$(echo "$result" | jq '.run.results[0].aggregateScore')
375375
if (( $(echo "$score < 0.7" | bc -l) )); then
376376
echo "Quality gate failed: score $score < 0.7"
@@ -389,7 +389,7 @@ agentcore add evaluator \
389389
--instructions "Evaluate the agent response quality. Context: {context}"
390390

391391
# 2. Run on-demand eval to verify
392-
agentcore run evals --agent MyAgent --evaluator ResponseQuality --days 7
392+
agentcore run eval --agent MyAgent --evaluator ResponseQuality --days 7
393393

394394
# 3. Set up continuous monitoring
395395
agentcore add online-eval \
@@ -407,7 +407,7 @@ agentcore deploy
407407
Evaluate agents and use evaluators outside of a project directory using ARNs:
408408

409409
```bash
410-
agentcore run evals \
410+
agentcore run eval \
411411
--agent-arn arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/my-agent \
412412
--evaluator-arn arn:aws:bedrock-agentcore:us-east-1:123456789012:evaluator/my-eval \
413413
--region us-east-1 \

e2e-tests/evals-lifecycle.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ describe.sequential('e2e: evaluations lifecycle', () => {
137137
async () => {
138138
const result = await run([
139139
'run',
140-
'evals',
140+
'eval',
141141
'--agent',
142142
agentName,
143143
'--evaluator',
@@ -146,7 +146,7 @@ describe.sequential('e2e: evaluations lifecycle', () => {
146146
'1',
147147
'--json',
148148
]);
149-
expect(result.exitCode, `Run evals failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0);
149+
expect(result.exitCode, `Run eval failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0);
150150
const json = parseJsonOutput(result.stdout) as Record<string, unknown>;
151151
expect(json).toHaveProperty('success', true);
152152
expect(json).toHaveProperty('run');

src/cli/commands/eval/command.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ export const registerEval = (program: Command) => {
3939

4040
const runs = result.runs ?? [];
4141
if (runs.length === 0) {
42-
console.log('No eval runs found. Run `agentcore run evals` to create one.');
42+
console.log('No eval runs found. Run `agentcore run eval` to create one.');
4343
return;
4444
}
4545

src/cli/commands/run/command.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ export const registerRun = (program: Command) => {
3636
const runCmd = program.command('run').description(COMMAND_DESCRIPTIONS.run);
3737

3838
runCmd
39-
.command('evals')
39+
.command('eval')
4040
.description('Run on-demand evaluation of agent traces. Use --agent-arn to evaluate agents outside the project.')
4141
.option('-a, --agent <name>', 'Agent name from project config')
4242
.option('--agent-arn <arn>', 'Agent runtime ARN — run outside a project directory')

src/cli/tui/screens/eval/EvalScreen.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,9 +403,9 @@ export function EvalScreen({ onExit }: EvalScreenProps) {
403403
{noRuns && (
404404
<Box flexDirection="column">
405405
<Text dimColor>No eval runs found.</Text>
406-
<Text dimColor>Run `agentcore run evals` to evaluate a project agent,</Text>
406+
<Text dimColor>Run `agentcore run eval` to evaluate a project agent,</Text>
407407
<Text dimColor>
408-
or `agentcore run evals --agent-arn <Text bold>ARN</Text> --evaluator-arn <Text bold>ARN</Text>` for agents
408+
or `agentcore run eval --agent-arn <Text bold>ARN</Text> --evaluator-arn <Text bold>ARN</Text>` for agents
409409
outside the project.
410410
</Text>
411411
{resultsDir && <Text dimColor>Results saved to: {resultsDir}</Text>}

0 commit comments

Comments
 (0)