Skip to content

Commit a155796

Browse files
jsonbaileyclaude
andcommitted
feat: add ManagedAgent with evaluations support (AIC-1657)
Introduces ManagedAgent class as the agent-mode analogue of ManagedModel. ManagedAgent.run() invokes the runner, tracks metrics, and wires judge evaluations (via aiAgentConfig.evaluator) into ManagedResult.evaluations. Adds LDAIClient.createAgent() to LDAIClientImpl and the LDAIClient interface. Exports ManagedAgent from the public API. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 074b07f commit a155796

6 files changed

Lines changed: 337 additions & 0 deletions

File tree

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import { ManagedAgent } from '../src/api/agent/ManagedAgent';
2+
import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker';
3+
import { LDAIAgentConfig } from '../src/api/config/types';
4+
import { Evaluator } from '../src/api/judge/Evaluator';
5+
import { LDJudgeResult } from '../src/api/judge/types';
6+
import { RunnerResult } from '../src/api/model/types';
7+
import { Runner } from '../src/api/providers/Runner';
8+
9+
describe('ManagedAgent', () => {
10+
let mockRunner: jest.Mocked<Runner>;
11+
let mockTracker: jest.Mocked<LDAIConfigTracker>;
12+
let agentConfig: LDAIAgentConfig;
13+
14+
const runnerResult: RunnerResult = {
15+
content: 'Agent response',
16+
metrics: { success: true },
17+
};
18+
19+
beforeEach(() => {
20+
mockRunner = {
21+
run: jest.fn().mockResolvedValue(runnerResult),
22+
};
23+
24+
mockTracker = {
25+
trackMetricsOf: jest.fn().mockImplementation(async (_extractor: any, func: any) => func()),
26+
trackJudgeResult: jest.fn(),
27+
resumptionToken: 'agent-resumption-token',
28+
getTrackData: jest.fn().mockReturnValue({}),
29+
trackDuration: jest.fn(),
30+
trackTokens: jest.fn(),
31+
trackSuccess: jest.fn(),
32+
trackError: jest.fn(),
33+
trackFeedback: jest.fn(),
34+
trackTimeToFirstToken: jest.fn(),
35+
trackDurationOf: jest.fn(),
36+
trackOpenAIMetrics: jest.fn(),
37+
trackBedrockConverseMetrics: jest.fn(),
38+
trackVercelAIMetrics: jest.fn(),
39+
getSummary: jest.fn(),
40+
} as any;
41+
42+
agentConfig = {
43+
key: 'test-agent',
44+
enabled: true,
45+
instructions: 'You are a helpful agent.',
46+
model: { name: 'gpt-4' },
47+
provider: { name: 'openai' },
48+
createTracker: () => mockTracker,
49+
};
50+
});
51+
52+
it('returns a ManagedResult with content and metrics', async () => {
53+
const agent = new ManagedAgent(agentConfig, mockRunner);
54+
const result = await agent.run('Hello agent');
55+
56+
expect(result.content).toBe('Agent response');
57+
expect(result.metrics.success).toBe(true);
58+
expect(result.metrics.resumptionToken).toBe('agent-resumption-token');
59+
});
60+
61+
it('passes the prompt directly to the runner', async () => {
62+
const agent = new ManagedAgent(agentConfig, mockRunner);
63+
await agent.run('My question');
64+
65+
expect(mockRunner.run).toHaveBeenCalledWith('My question');
66+
});
67+
68+
it('resolves to empty evaluations when no evaluator configured', async () => {
69+
const agent = new ManagedAgent(agentConfig, mockRunner);
70+
const result = await agent.run('Hello');
71+
const evaluations = await result.evaluations;
72+
expect(evaluations).toEqual([]);
73+
});
74+
75+
it('resolves to empty evaluations with noop evaluator', async () => {
76+
const configWithNoop: LDAIAgentConfig = {
77+
...agentConfig,
78+
evaluator: Evaluator.noop(),
79+
};
80+
const agent = new ManagedAgent(configWithNoop, mockRunner);
81+
const result = await agent.run('Hello');
82+
const evaluations = await result.evaluations;
83+
expect(evaluations).toEqual([]);
84+
});
85+
86+
it('awaiting evaluations calls tracker.trackJudgeResult', async () => {
87+
const judgeResult: LDJudgeResult = {
88+
success: true,
89+
sampled: true,
90+
score: 0.85,
91+
metricKey: 'quality',
92+
};
93+
const mockEvaluator = {
94+
judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] },
95+
evaluate: jest.fn().mockResolvedValue([judgeResult]),
96+
judges: new Map(),
97+
} as unknown as Evaluator;
98+
99+
const configWithEvaluator: LDAIAgentConfig = {
100+
...agentConfig,
101+
evaluator: mockEvaluator,
102+
};
103+
104+
const agent = new ManagedAgent(configWithEvaluator, mockRunner);
105+
const result = await agent.run('Hello');
106+
107+
await result.evaluations;
108+
expect(mockTracker.trackJudgeResult).toHaveBeenCalledWith(judgeResult);
109+
});
110+
111+
it('passes the prompt to evaluator.evaluate as input', async () => {
112+
const mockEvaluator = {
113+
judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] },
114+
evaluate: jest.fn().mockResolvedValue([]),
115+
judges: new Map(),
116+
} as unknown as Evaluator;
117+
118+
const configWithEvaluator: LDAIAgentConfig = {
119+
...agentConfig,
120+
evaluator: mockEvaluator,
121+
};
122+
123+
const agent = new ManagedAgent(configWithEvaluator, mockRunner);
124+
const result = await agent.run('user prompt');
125+
await result.evaluations;
126+
127+
expect(mockEvaluator.evaluate).toHaveBeenCalledWith('user prompt', 'Agent response');
128+
});
129+
130+
it('returns before evaluations resolve', async () => {
131+
let resolveEval!: (v: LDJudgeResult[]) => void;
132+
const slowEvaluator = {
133+
judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] },
134+
evaluate: jest.fn().mockReturnValue(
135+
new Promise<LDJudgeResult[]>((resolve) => {
136+
resolveEval = resolve;
137+
}),
138+
),
139+
judges: new Map(),
140+
} as unknown as Evaluator;
141+
142+
const configWithEvaluator: LDAIAgentConfig = {
143+
...agentConfig,
144+
evaluator: slowEvaluator,
145+
};
146+
147+
const agent = new ManagedAgent(configWithEvaluator, mockRunner);
148+
149+
let evaluationsResolved = false;
150+
const result = await agent.run('Hello');
151+
152+
expect(result.content).toBe('Agent response');
153+
154+
result.evaluations.then(() => {
155+
evaluationsResolved = true;
156+
});
157+
158+
await Promise.resolve();
159+
expect(evaluationsResolved).toBe(false);
160+
161+
resolveEval([{ success: true, sampled: true, score: 0.9 }]);
162+
await result.evaluations;
163+
expect(evaluationsResolved).toBe(true);
164+
});
165+
166+
it('exposes the agent config via getConfig', () => {
167+
const agent = new ManagedAgent(agentConfig, mockRunner);
168+
expect(agent.getConfig()).toBe(agentConfig);
169+
});
170+
});

packages/sdk/server-ai/src/LDAIClientImpl.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { randomUUID } from 'node:crypto';
33

44
import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common';
55

6+
import { ManagedAgent } from './api/agent/ManagedAgent';
67
import { ManagedModel } from './api/chat';
78
import {
89
LDAIAgentConfig,
@@ -38,6 +39,7 @@ import { aiSdkLanguage, aiSdkName, aiSdkVersion } from './sdkInfo';
3839
const TRACK_SDK_INFO = '$ld:ai:sdk:info';
3940
const TRACK_USAGE_COMPLETION_CONFIG = '$ld:ai:usage:completion-config';
4041
const TRACK_USAGE_CREATE_CHAT = '$ld:ai:usage:create-chat';
42+
const TRACK_USAGE_CREATE_AGENT = '$ld:ai:usage:create-agent';
4143
const TRACK_USAGE_JUDGE_CONFIG = '$ld:ai:usage:judge-config';
4244
const TRACK_USAGE_CREATE_JUDGE = '$ld:ai:usage:create-judge';
4345
const TRACK_USAGE_AGENT_CONFIG = '$ld:ai:usage:agent-config';
@@ -72,6 +74,30 @@ function runnerFromAIProvider(provider: AIProvider, config: LDAICompletionConfig
7274
};
7375
}
7476

77+
/**
78+
* Adapt a (deprecated) AIProvider to the Runner protocol for agent configs.
79+
* Prepends the agent's instructions as a system message so existing
80+
* AIProvider-based agent flows preserve their instruction behavior under the
81+
* stateless Runner contract.
82+
*/
83+
function runnerFromAIProviderForAgent(provider: AIProvider, config: LDAIAgentConfig): Runner {
84+
return {
85+
async run(input: string): Promise<RunnerResult> {
86+
const messages: LDMessage[] = [];
87+
if (config.instructions) {
88+
messages.push({ role: 'system', content: config.instructions });
89+
}
90+
messages.push({ role: 'user', content: input });
91+
const response = await provider.invokeModel(messages);
92+
return {
93+
content: response.message.content,
94+
metrics: response.metrics,
95+
raw: response,
96+
};
97+
},
98+
};
99+
}
100+
75101
export class LDAIClientImpl implements LDAIClient {
76102
private _logger?: LDLogger;
77103

@@ -439,6 +465,45 @@ export class LDAIClientImpl implements LDAIClient {
439465
return new ManagedModel(configWithEvaluator, runner, this._logger);
440466
}
441467

468+
async createAgent(
469+
key: string,
470+
context: LDContext,
471+
defaultValue?: LDAIAgentConfigDefault,
472+
variables?: Record<string, unknown>,
473+
defaultAiProvider?: SupportedAIProvider,
474+
): Promise<ManagedAgent | undefined> {
475+
this._ldClient.track(TRACK_USAGE_CREATE_AGENT, context, key, 1);
476+
477+
const config = await this._agentConfig(
478+
key,
479+
context,
480+
defaultValue ?? disabledAIConfig,
481+
variables,
482+
);
483+
484+
if (!config.enabled) {
485+
this._logger?.info(`Agent configuration is disabled: ${key}`);
486+
return undefined;
487+
}
488+
489+
const provider = await AIProviderFactory.create(config, this._logger, defaultAiProvider);
490+
if (!provider) {
491+
return undefined;
492+
}
493+
494+
const evaluator = await this._buildEvaluator(
495+
config.judgeConfiguration?.judges ?? [],
496+
context,
497+
variables,
498+
defaultAiProvider,
499+
);
500+
501+
const configWithEvaluator: LDAIAgentConfig = { ...config, evaluator };
502+
503+
const runner = runnerFromAIProviderForAgent(provider, configWithEvaluator);
504+
return new ManagedAgent(configWithEvaluator, runner, this._logger);
505+
}
506+
442507
/**
443508
* @deprecated Use `createModel` instead. This method will be removed in a future version.
444509
*/

packages/sdk/server-ai/src/api/LDAIClient.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { LDContext } from '@launchdarkly/js-server-sdk-common';
22

3+
import { ManagedAgent } from './agent/ManagedAgent';
34
import { ManagedModel } from './chat';
45
import {
56
LDAIAgentConfig,
@@ -275,6 +276,25 @@ export interface LDAIClient {
275276
defaultAiProvider?: SupportedAIProvider,
276277
): Promise<ManagedModel | undefined>;
277278

279+
/**
280+
* Creates and returns a new ManagedAgent instance for agent interactions.
281+
* Evaluations are wired automatically and exposed on ManagedResult.evaluations.
282+
*
283+
* @param key The key identifying the agent AI config to use.
284+
* @param context The standard LDContext used when evaluating flags.
285+
* @param defaultValue Optional fallback when the configuration is not available from LaunchDarkly.
286+
* @param variables Dictionary of values for instruction interpolation.
287+
* @param defaultAiProvider Optional default AI provider to use.
288+
* @returns A promise that resolves to the ManagedAgent instance, or undefined if disabled.
289+
*/
290+
createAgent(
291+
key: string,
292+
context: LDContext,
293+
defaultValue?: LDAIAgentConfigDefault,
294+
variables?: Record<string, unknown>,
295+
defaultAiProvider?: SupportedAIProvider,
296+
): Promise<ManagedAgent | undefined>;
297+
278298
/**
279299
* @deprecated Use `createModel` instead. This method will be removed in a future version.
280300
*/
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { LDLogger } from '@launchdarkly/js-server-sdk-common';
2+
3+
import { LDAIAgentConfig } from '../config/types';
4+
import { LDJudgeResult } from '../judge/types';
5+
import { LDAIMetricSummary, ManagedResult, RunnerResult } from '../model/types';
6+
import { Runner } from '../providers/Runner';
7+
8+
/**
9+
* ManagedAgent provides agent invocation with automatic tracking and automatic
10+
* judge evaluation.
11+
*
12+
* The class is stateless: each `run()` call sends the prompt directly to the
13+
* underlying `Runner` and returns a `ManagedResult`. Conversation history,
14+
* if any, must be managed by the caller (or by the Runner implementation).
15+
*
16+
* Obtain an instance via `LDAIClient.createAgent()`.
17+
*/
18+
export class ManagedAgent {
19+
constructor(
20+
protected readonly aiAgentConfig: LDAIAgentConfig,
21+
protected readonly runner: Runner,
22+
private readonly _logger?: LDLogger,
23+
) {}
24+
25+
/**
26+
* Invoke the agent with a prompt string and return a ManagedResult.
27+
*
28+
* `run()` resolves before `ManagedResult.evaluations` resolves. Awaiting
29+
* `evaluations` guarantees both judge evaluation and tracker.trackJudgeResult()
30+
* are complete.
31+
*
32+
* @param prompt The user input to send to the agent.
33+
* @returns Promise resolving to ManagedResult (before evaluations settle).
34+
*/
35+
async run(prompt: string): Promise<ManagedResult> {
36+
const tracker = this.aiAgentConfig.createTracker!();
37+
38+
const result = await tracker.trackMetricsOf(
39+
(r: RunnerResult) => r.metrics,
40+
() => this.runner.run(prompt),
41+
);
42+
43+
const metrics: LDAIMetricSummary = {
44+
success: result.metrics.success,
45+
usage: result.metrics.usage,
46+
toolCalls: result.metrics.toolCalls,
47+
durationMs: result.metrics.durationMs,
48+
resumptionToken: tracker.resumptionToken,
49+
};
50+
51+
const output = result.content;
52+
const evaluator = this.aiAgentConfig.evaluator;
53+
let evaluations: Promise<LDJudgeResult[]>;
54+
if (evaluator) {
55+
evaluations = evaluator.evaluate(prompt, output).then((results) => {
56+
results.forEach((judgeResult) => {
57+
tracker.trackJudgeResult(judgeResult);
58+
});
59+
return results;
60+
});
61+
} else {
62+
evaluations = Promise.resolve([]);
63+
}
64+
65+
return {
66+
content: output,
67+
metrics,
68+
raw: result.raw,
69+
parsed: result.parsed,
70+
evaluations,
71+
};
72+
}
73+
74+
/**
75+
* Get the underlying AI agent configuration used to initialize this ManagedAgent.
76+
*/
77+
getConfig(): LDAIAgentConfig {
78+
return this.aiAgentConfig;
79+
}
80+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export { ManagedAgent } from './ManagedAgent';

packages/sdk/server-ai/src/api/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export * from './config';
2+
export * from './agent';
23
export * from './chat';
34
export * from './graph';
45
export * from './judge';

0 commit comments

Comments
 (0)