Skip to content

Commit 3031d7d

Browse files
jsonbaileyclaude
andcommitted
feat: add ManagedAgent with evaluations support (AIC-1657)
Introduces ManagedAgent class as the agent-mode analogue of TrackedChat. ManagedAgent.run() invokes the provider, tracks metrics, and wires judge evaluations (via aiAgentConfig.evaluator) into ManagedResult.evaluations. Adds LDAIClient.createAgent() to LDAIClientImpl and the LDAIClient interface. Exports ManagedAgent from the public API. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 4494a66 commit 3031d7d

6 files changed

Lines changed: 299 additions & 0 deletions

File tree

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import { ManagedAgent } from '../src/api/agent/ManagedAgent';
2+
import { LDAIAgentConfig } from '../src/api/config/types';
3+
import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker';
4+
import { Evaluator } from '../src/api/judge/Evaluator';
5+
import { LDJudgeResult } from '../src/api/judge/types';
6+
import { AIProvider } from '../src/api/providers/AIProvider';
7+
import { ChatResponse } from '../src/api/chat/types';
8+
9+
describe('ManagedAgent', () => {
10+
let mockProvider: jest.Mocked<AIProvider>;
11+
let mockTracker: jest.Mocked<LDAIConfigTracker>;
12+
let agentConfig: LDAIAgentConfig;
13+
14+
const mockResponse: ChatResponse = {
15+
message: { role: 'assistant', content: 'Agent response' },
16+
metrics: { success: true },
17+
};
18+
19+
beforeEach(() => {
20+
mockProvider = {
21+
invokeModel: jest.fn().mockResolvedValue(mockResponse),
22+
} as any;
23+
24+
mockTracker = {
25+
trackMetricsOf: jest.fn().mockImplementation(async (_extractor: any, func: any) => func()),
26+
trackJudgeResult: jest.fn(),
27+
resumptionToken: 'agent-resumption-token',
28+
getTrackData: jest.fn().mockReturnValue({}),
29+
trackDuration: jest.fn(),
30+
trackTokens: jest.fn(),
31+
trackSuccess: jest.fn(),
32+
trackError: jest.fn(),
33+
trackFeedback: jest.fn(),
34+
trackTimeToFirstToken: jest.fn(),
35+
trackDurationOf: jest.fn(),
36+
trackOpenAIMetrics: jest.fn(),
37+
trackBedrockConverseMetrics: jest.fn(),
38+
trackVercelAISDKGenerateTextMetrics: jest.fn(),
39+
trackStreamMetricsOf: jest.fn(),
40+
trackToolCall: jest.fn(),
41+
trackToolCalls: jest.fn(),
42+
getSummary: jest.fn(),
43+
} as any;
44+
45+
agentConfig = {
46+
key: 'test-agent',
47+
enabled: true,
48+
instructions: 'You are a helpful agent.',
49+
model: { name: 'gpt-4' },
50+
provider: { name: 'openai' },
51+
createTracker: () => mockTracker,
52+
};
53+
});
54+
55+
it('run() returns a ManagedResult with content and metrics', async () => {
56+
const agent = new ManagedAgent(agentConfig, mockProvider);
57+
const result = await agent.run('Hello agent');
58+
59+
expect(result.content).toBe('Agent response');
60+
expect(result.metrics.success).toBe(true);
61+
expect(result.metrics.resumptionToken).toBe('agent-resumption-token');
62+
});
63+
64+
it('run() invokes the provider with the prompt as user message', async () => {
65+
const agent = new ManagedAgent(agentConfig, mockProvider);
66+
await agent.run('My question');
67+
68+
expect(mockProvider.invokeModel).toHaveBeenCalledWith([
69+
{ role: 'user', content: 'My question' },
70+
]);
71+
});
72+
73+
it('run() resolves to empty evaluations when no evaluator configured', async () => {
74+
const agent = new ManagedAgent(agentConfig, mockProvider);
75+
const result = await agent.run('Hello');
76+
const evaluations = await result.evaluations;
77+
expect(evaluations).toEqual([]);
78+
});
79+
80+
it('run() resolves to empty evaluations with noop evaluator', async () => {
81+
const configWithNoop: LDAIAgentConfig = {
82+
...agentConfig,
83+
evaluator: Evaluator.noop(),
84+
};
85+
const agent = new ManagedAgent(configWithNoop, mockProvider);
86+
const result = await agent.run('Hello');
87+
const evaluations = await result.evaluations;
88+
expect(evaluations).toEqual([]);
89+
});
90+
91+
it('awaiting evaluations calls tracker.trackJudgeResult', async () => {
92+
const judgeResult: LDJudgeResult = {
93+
success: true,
94+
sampled: true,
95+
score: 0.85,
96+
metricKey: 'quality',
97+
};
98+
const mockEvaluator = {
99+
judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] },
100+
evaluate: jest.fn().mockResolvedValue([judgeResult]),
101+
judges: new Map(),
102+
} as unknown as Evaluator;
103+
104+
const configWithEvaluator: LDAIAgentConfig = {
105+
...agentConfig,
106+
evaluator: mockEvaluator,
107+
};
108+
109+
const agent = new ManagedAgent(configWithEvaluator, mockProvider);
110+
const result = await agent.run('Hello');
111+
112+
await result.evaluations;
113+
expect(mockTracker.trackJudgeResult).toHaveBeenCalledWith(judgeResult);
114+
});
115+
116+
it('evaluate() is called with prompt as input and response content as output', async () => {
117+
const mockEvaluator = {
118+
judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] },
119+
evaluate: jest.fn().mockResolvedValue([]),
120+
judges: new Map(),
121+
} as unknown as Evaluator;
122+
123+
const configWithEvaluator: LDAIAgentConfig = {
124+
...agentConfig,
125+
evaluator: mockEvaluator,
126+
};
127+
128+
const agent = new ManagedAgent(configWithEvaluator, mockProvider);
129+
const result = await agent.run('user prompt');
130+
await result.evaluations;
131+
132+
expect(mockEvaluator.evaluate).toHaveBeenCalledWith('user prompt', 'Agent response');
133+
});
134+
135+
it('getConfig() returns the agent config', () => {
136+
const agent = new ManagedAgent(agentConfig, mockProvider);
137+
expect(agent.getConfig()).toBe(agentConfig);
138+
});
139+
140+
it('getProvider() returns the provider', () => {
141+
const agent = new ManagedAgent(agentConfig, mockProvider);
142+
expect(agent.getProvider()).toBe(mockProvider);
143+
});
144+
});

packages/sdk/server-ai/src/LDAIClientImpl.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { randomUUID } from 'node:crypto';
33

44
import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common';
55

6+
import { ManagedAgent } from './api/agent/ManagedAgent';
67
import { TrackedChat } from './api/chat';
78
import {
89
LDAIAgentConfig,
@@ -37,6 +38,7 @@ import { aiSdkLanguage, aiSdkName, aiSdkVersion } from './sdkInfo';
3738
const TRACK_SDK_INFO = '$ld:ai:sdk:info';
3839
const TRACK_USAGE_COMPLETION_CONFIG = '$ld:ai:usage:completion-config';
3940
const TRACK_USAGE_CREATE_CHAT = '$ld:ai:usage:create-chat';
41+
const TRACK_USAGE_CREATE_AGENT = '$ld:ai:usage:create-agent';
4042
const TRACK_USAGE_JUDGE_CONFIG = '$ld:ai:usage:judge-config';
4143
const TRACK_USAGE_CREATE_JUDGE = '$ld:ai:usage:create-judge';
4244
const TRACK_USAGE_AGENT_CONFIG = '$ld:ai:usage:agent-config';
@@ -420,6 +422,44 @@ export class LDAIClientImpl implements LDAIClient {
420422
return this.createChat(key, context, defaultValue, variables, defaultAiProvider);
421423
}
422424

425+
async createAgent(
426+
key: string,
427+
context: LDContext,
428+
defaultValue?: LDAIAgentConfigDefault,
429+
variables?: Record<string, unknown>,
430+
defaultAiProvider?: SupportedAIProvider,
431+
): Promise<ManagedAgent | undefined> {
432+
this._ldClient.track(TRACK_USAGE_CREATE_AGENT, context, key, 1);
433+
434+
const config = await this._agentConfig(
435+
key,
436+
context,
437+
defaultValue ?? disabledAIConfig,
438+
variables,
439+
);
440+
441+
if (!config.enabled) {
442+
this._logger?.info(`Agent configuration is disabled: ${key}`);
443+
return undefined;
444+
}
445+
446+
const provider = await AIProviderFactory.create(config, this._logger, defaultAiProvider);
447+
if (!provider) {
448+
return undefined;
449+
}
450+
451+
const evaluator = await this._buildEvaluator(
452+
config.judgeConfiguration?.judges ?? [],
453+
context,
454+
variables,
455+
defaultAiProvider,
456+
);
457+
458+
const configWithEvaluator: LDAIAgentConfig = { ...config, evaluator };
459+
460+
return new ManagedAgent(configWithEvaluator, provider, this._logger);
461+
}
462+
423463
/**
424464
* @deprecated Use `createChat` instead. This method will be removed in a future version.
425465
*/

packages/sdk/server-ai/src/api/LDAIClient.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { LDContext } from '@launchdarkly/js-server-sdk-common';
22

3+
import { ManagedAgent } from './agent/ManagedAgent';
34
import { TrackedChat } from './chat';
45
import {
56
LDAIAgentConfig,
@@ -296,6 +297,25 @@ export interface LDAIClient {
296297
defaultAiProvider?: SupportedAIProvider,
297298
): Promise<TrackedChat | undefined>;
298299

300+
/**
301+
* Creates and returns a new ManagedAgent instance for agent interactions.
302+
* Evaluations are wired automatically and exposed on ManagedResult.evaluations.
303+
*
304+
* @param key The key identifying the agent AI config to use.
305+
* @param context The standard LDContext used when evaluating flags.
306+
* @param defaultValue Optional fallback when the configuration is not available from LaunchDarkly.
307+
* @param variables Dictionary of values for instruction interpolation.
308+
* @param defaultAiProvider Optional default AI provider to use.
309+
* @returns A promise that resolves to the ManagedAgent instance, or undefined if disabled.
310+
*/
311+
createAgent(
312+
key: string,
313+
context: LDContext,
314+
defaultValue?: LDAIAgentConfigDefault,
315+
variables?: Record<string, unknown>,
316+
defaultAiProvider?: SupportedAIProvider,
317+
): Promise<ManagedAgent | undefined>;
318+
299319
/**
300320
* @deprecated Use `createChat` instead. This method will be removed in a future version.
301321
*/
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { LDLogger } from '@launchdarkly/js-server-sdk-common';
2+
3+
import { LDAIAgentConfig } from '../config/types';
4+
import { LDJudgeResult } from '../judge/types';
5+
import { LDAIMetricSummary, ManagedResult } from '../model/types';
6+
import { AIProvider } from '../providers/AIProvider';
7+
import { ChatResponse } from '../chat/types';
8+
9+
/**
10+
* ManagedAgent provides agent invocation with automatic judge evaluation.
11+
*
12+
* This is the agent-mode analogue of TrackedChat (ManagedModel). It invokes
13+
* the provider, tracks metrics, and wires judge evaluations into a single
14+
* Promise exposed on ManagedResult.evaluations.
15+
*
16+
* Obtain an instance via `LDAIClient.createAgent()`.
17+
*/
18+
export class ManagedAgent {
19+
constructor(
20+
protected readonly aiAgentConfig: LDAIAgentConfig,
21+
protected readonly provider: AIProvider,
22+
private readonly _logger?: LDLogger,
23+
) {}
24+
25+
/**
26+
* Invoke the agent with a prompt string and return a ManagedResult.
27+
*
28+
* run() returns before ManagedResult.evaluations resolves. Awaiting evaluations
29+
* guarantees both evaluation and tracker.trackJudgeResult() are complete.
30+
*
31+
* @param prompt The user input to send to the agent.
32+
* @returns Promise resolving to ManagedResult (before evaluations settle).
33+
*/
34+
async run(prompt: string): Promise<ManagedResult> {
35+
const tracker = this.aiAgentConfig.createTracker!();
36+
37+
const userMessage = { role: 'user' as const, content: prompt };
38+
const allMessages = [userMessage];
39+
40+
// Delegate to provider-specific implementation with tracking
41+
const response = await tracker.trackMetricsOf(
42+
(result: ChatResponse) => result.metrics,
43+
() => this.provider.invokeModel(allMessages),
44+
);
45+
46+
// Build the metric summary from response metrics + resumption token
47+
const metrics: LDAIMetricSummary = {
48+
success: response.metrics.success,
49+
usage: response.metrics.usage,
50+
toolCalls: response.metrics.toolCalls,
51+
durationMs: response.metrics.durationMs,
52+
resumptionToken: tracker.resumptionToken,
53+
};
54+
55+
const output = response.message.content;
56+
57+
// Wire evaluation + tracking into a single Promise.
58+
// run() returns before this resolves — awaiting evaluations guarantees
59+
// both evaluation and tracking are complete.
60+
const evaluator = this.aiAgentConfig.evaluator;
61+
let evaluations: Promise<LDJudgeResult[]>;
62+
if (evaluator && evaluator.judgeConfiguration.judges.length > 0) {
63+
evaluations = evaluator.evaluate(prompt, output).then((results) => {
64+
results.forEach((judgeResult) => {
65+
tracker.trackJudgeResult(judgeResult);
66+
});
67+
return results;
68+
});
69+
} else {
70+
evaluations = Promise.resolve([]);
71+
}
72+
73+
return {
74+
content: output,
75+
metrics,
76+
evaluations,
77+
};
78+
}
79+
80+
/**
81+
* Get the underlying AI agent configuration.
82+
*/
83+
getConfig(): LDAIAgentConfig {
84+
return this.aiAgentConfig;
85+
}
86+
87+
/**
88+
* Get the underlying AI provider instance.
89+
*/
90+
getProvider(): AIProvider {
91+
return this.provider;
92+
}
93+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export { ManagedAgent } from './ManagedAgent';

packages/sdk/server-ai/src/api/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export * from './config';
2+
export * from './agent';
23
export * from './chat';
34
export * from './graph';
45
export * from './judge';

0 commit comments

Comments
 (0)