Skip to content

Commit ec38020

Browse files
authored
fix: support non-default runtime endpoint in on-demand evals (#634)
* fix: support non-default runtime endpoint in on-demand evals * fix: format command.tsx with prettier
1 parent 5e55ea5 commit ec38020

4 files changed

Lines changed: 152 additions & 2 deletions

File tree

src/cli/commands/run/command.tsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ export const registerRun = (program: Command) => {
4545
.option('--region <region>', 'AWS region (required with --agent-arn, auto-detected otherwise)')
4646
.option('-s, --session-id <id>', 'Evaluate a specific session only')
4747
.option('-t, --trace-id <id>', 'Evaluate a specific trace only')
48+
.option(
49+
'--endpoint <name>',
50+
'Runtime endpoint name (e.g. PROMPT_V1). Defaults to AGENTCORE_RUNTIME_ENDPOINT env var, then DEFAULT'
51+
)
4852
.option('--days <days>', 'Lookback window in days', '7')
4953
.option('--output <path>', 'Custom output file path for results')
5054
.option('--json', 'Output as JSON')
@@ -57,6 +61,7 @@ export const registerRun = (program: Command) => {
5761
region?: string;
5862
sessionId?: string;
5963
traceId?: string;
64+
endpoint?: string;
6065
days: string;
6166
output?: string;
6267
json?: boolean;
@@ -84,6 +89,7 @@ export const registerRun = (program: Command) => {
8489
region: cliOptions.region,
8590
sessionId: cliOptions.sessionId,
8691
traceId: cliOptions.traceId,
92+
endpoint: cliOptions.endpoint,
8793
days: parseInt(cliOptions.days, 10),
8894
output: cliOptions.output,
8995
json: cliOptions.json,

src/cli/operations/eval/__tests__/run-eval.test.ts

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,146 @@ describe('handleRunEval', () => {
598598
expect(result.error).toContain('No evaluators specified');
599599
});
600600

601+
// ─── Endpoint selection ──────────────────────────────────────────────────
602+
603+
it('uses --endpoint option to construct runtime log group', async () => {
604+
const ctx = makeDeployedContext();
605+
mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
606+
mockResolveAgent.mockReturnValue({
607+
success: true,
608+
agent: {
609+
agentName: 'my-agent',
610+
targetName: 'dev',
611+
region: 'us-east-1',
612+
accountId: '111222333444',
613+
runtimeId: 'rt-123',
614+
},
615+
});
616+
617+
const spanRows = [makeOtelSpanRow('session-1', 'trace-1')];
618+
setupCloudWatchToReturn(spanRows);
619+
620+
mockEvaluate.mockResolvedValue({
621+
evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1' } } }],
622+
});
623+
624+
await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7, endpoint: 'PROMPT_V1' });
625+
626+
// The second CloudWatch query (runtime logs) should target the PROMPT_V1 log group
627+
const runtimeLogCall = mockSend.mock.calls.find((c: unknown[]) => {
628+
const input = (c[0] as { input?: { logGroupName?: string } }).input;
629+
return input?.logGroupName?.includes('PROMPT_V1');
630+
});
631+
expect(runtimeLogCall).toBeDefined();
632+
});
633+
634+
it('uses AGENTCORE_RUNTIME_ENDPOINT env var when --endpoint is not provided', async () => {
635+
const originalEnv = process.env.AGENTCORE_RUNTIME_ENDPOINT;
636+
process.env.AGENTCORE_RUNTIME_ENDPOINT = 'CUSTOM_V2';
637+
638+
try {
639+
const ctx = makeDeployedContext();
640+
mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
641+
mockResolveAgent.mockReturnValue({
642+
success: true,
643+
agent: {
644+
agentName: 'my-agent',
645+
targetName: 'dev',
646+
region: 'us-east-1',
647+
accountId: '111222333444',
648+
runtimeId: 'rt-123',
649+
},
650+
});
651+
652+
const spanRows = [makeOtelSpanRow('session-1', 'trace-1')];
653+
setupCloudWatchToReturn(spanRows);
654+
655+
mockEvaluate.mockResolvedValue({
656+
evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1' } } }],
657+
});
658+
659+
await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7 });
660+
661+
const runtimeLogCall = mockSend.mock.calls.find((c: unknown[]) => {
662+
const input = (c[0] as { input?: { logGroupName?: string } }).input;
663+
return input?.logGroupName?.includes('CUSTOM_V2');
664+
});
665+
expect(runtimeLogCall).toBeDefined();
666+
} finally {
667+
if (originalEnv === undefined) {
668+
delete process.env.AGENTCORE_RUNTIME_ENDPOINT;
669+
} else {
670+
process.env.AGENTCORE_RUNTIME_ENDPOINT = originalEnv;
671+
}
672+
}
673+
});
674+
675+
it('--endpoint takes precedence over AGENTCORE_RUNTIME_ENDPOINT env var', async () => {
676+
const originalEnv = process.env.AGENTCORE_RUNTIME_ENDPOINT;
677+
process.env.AGENTCORE_RUNTIME_ENDPOINT = 'ENV_ENDPOINT';
678+
679+
try {
680+
const ctx = makeDeployedContext();
681+
mockLoadDeployedProjectConfig.mockResolvedValue(ctx);
682+
mockResolveAgent.mockReturnValue({
683+
success: true,
684+
agent: {
685+
agentName: 'my-agent',
686+
targetName: 'dev',
687+
region: 'us-east-1',
688+
accountId: '111222333444',
689+
runtimeId: 'rt-123',
690+
},
691+
});
692+
693+
const spanRows = [makeOtelSpanRow('session-1', 'trace-1')];
694+
setupCloudWatchToReturn(spanRows);
695+
696+
mockEvaluate.mockResolvedValue({
697+
evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1' } } }],
698+
});
699+
700+
await handleRunEval({ evaluator: ['Builtin.GoalSuccessRate'], days: 7, endpoint: 'FLAG_ENDPOINT' });
701+
702+
const flagCall = mockSend.mock.calls.find((c: unknown[]) => {
703+
const input = (c[0] as { input?: { logGroupName?: string } }).input;
704+
return input?.logGroupName?.includes('FLAG_ENDPOINT');
705+
});
706+
const envCall = mockSend.mock.calls.find((c: unknown[]) => {
707+
const input = (c[0] as { input?: { logGroupName?: string } }).input;
708+
return input?.logGroupName?.includes('ENV_ENDPOINT');
709+
});
710+
expect(flagCall).toBeDefined();
711+
expect(envCall).toBeUndefined();
712+
} finally {
713+
if (originalEnv === undefined) {
714+
delete process.env.AGENTCORE_RUNTIME_ENDPOINT;
715+
} else {
716+
process.env.AGENTCORE_RUNTIME_ENDPOINT = originalEnv;
717+
}
718+
}
719+
});
720+
721+
it('uses --endpoint in ARN mode', async () => {
722+
setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]);
723+
mockEvaluate.mockResolvedValue({
724+
evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 's1' } } }],
725+
});
726+
727+
await handleRunEval({
728+
agentArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:runtime/rt-arn-ep',
729+
evaluator: ['Builtin.Helpfulness'],
730+
days: 3,
731+
endpoint: 'PROMPT_V1',
732+
});
733+
734+
const runtimeLogCall = mockSend.mock.calls.find((c: unknown[]) => {
735+
const input = (c[0] as { input?: { logGroupName?: string } }).input;
736+
return input?.logGroupName?.includes('PROMPT_V1');
737+
});
738+
expect(runtimeLogCall).toBeDefined();
739+
});
740+
601741
// ─── Evaluator-level grouping ────────────────────────────────────────────
602742

603743
it('sends targetTraceIds for TRACE-level builtin evaluators', async () => {

src/cli/operations/eval/run-eval.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ function resolveFromArn(options: RunEvalOptions): ResolveResult {
9393
return { success: false, error: 'No evaluators specified. Use -e/--evaluator with Builtin.* or --evaluator-arn.' };
9494
}
9595

96-
const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-${DEFAULT_ENDPOINT_NAME}`;
96+
const endpointName = options.endpoint ?? process.env.AGENTCORE_RUNTIME_ENDPOINT ?? DEFAULT_ENDPOINT_NAME;
97+
const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-${endpointName}`;
9798

9899
return {
99100
success: true,
@@ -118,7 +119,8 @@ function resolveFromProject(context: DeployedProjectConfig, options: RunEvalOpti
118119
}
119120

120121
const { agent } = agentResult;
121-
const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${agent.runtimeId}-${DEFAULT_ENDPOINT_NAME}`;
122+
const endpointName = options.endpoint ?? process.env.AGENTCORE_RUNTIME_ENDPOINT ?? DEFAULT_ENDPOINT_NAME;
123+
const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${agent.runtimeId}-${endpointName}`;
122124

123125
// Resolve evaluator names to IDs
124126
const evaluatorIds: string[] = [];

src/cli/operations/eval/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ export interface RunEvalOptions {
5656
sessionIds?: string[];
5757
/** Filter to a specific trace */
5858
traceId?: string;
59+
/** Runtime endpoint name (e.g. PROMPT_V1). Defaults to AGENTCORE_RUNTIME_ENDPOINT env var, then DEFAULT. */
60+
endpoint?: string;
5961
days: number;
6062
output?: string;
6163
json?: boolean;

0 commit comments

Comments
 (0)