From 94b313d52c9adffc89a73507e780a1992e514c1b Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Fri, 1 May 2026 11:19:59 -0500 Subject: [PATCH 01/19] feat: add Evaluator class for judge orchestration (#1331) --- .../sdk/server-ai/__tests__/Evaluator.test.ts | 108 ++++++++++++++++++ .../sdk/server-ai/__tests__/Judge.test.ts | 83 ++++++++++++-- .../__tests__/LDAIClientImpl.test.ts | 2 +- packages/sdk/server-ai/src/LDAIClientImpl.ts | 71 ++++++++---- packages/sdk/server-ai/src/api/LDAIClient.ts | 3 + .../sdk/server-ai/src/api/config/types.ts | 15 +++ .../sdk/server-ai/src/api/judge/Evaluator.ts | 24 ++++ packages/sdk/server-ai/src/api/judge/Judge.ts | 25 +++- 8 files changed, 287 insertions(+), 44 deletions(-) create mode 100644 packages/sdk/server-ai/__tests__/Evaluator.test.ts create mode 100644 packages/sdk/server-ai/src/api/judge/Evaluator.ts diff --git a/packages/sdk/server-ai/__tests__/Evaluator.test.ts b/packages/sdk/server-ai/__tests__/Evaluator.test.ts new file mode 100644 index 0000000000..730b77eb0c --- /dev/null +++ b/packages/sdk/server-ai/__tests__/Evaluator.test.ts @@ -0,0 +1,108 @@ +import { LDAIJudgeConfig } from '../src/api/config/types'; +import { Evaluator } from '../src/api/judge/Evaluator'; +import { Judge } from '../src/api/judge/Judge'; +import { LDJudgeResult } from '../src/api/judge/types'; +import { AIProvider } from '../src/api/providers/AIProvider'; + +function makeJudgeConfig(key: string): LDAIJudgeConfig { + return { + key, + enabled: true, + evaluationMetricKey: '$ld:ai:judge:quality', + messages: [{ role: 'system', content: 'You are a judge.' }], + createTracker: () => ({}) as any, + }; +} + +function makeProvider(): jest.Mocked { + return { + invokeModel: jest.fn(), + invokeStructuredModel: jest.fn(), + } as any; +} + +describe('Evaluator', () => { + describe('noop()', () => { + it('returns an empty result array', async () => { + const evaluator = Evaluator.noop(); + const results = await evaluator.evaluate('input', 'output'); + expect(results).toEqual([]); + }); + }); + + describe('evaluate()', () => { + it('calls each configured judge and returns results', async () => { + const mockProvider = makeProvider(); + const judgeConfig = makeJudgeConfig('judge-1'); + + const mockResult: LDJudgeResult = { + success: true, + sampled: true, + score: 0.9, + reasoning: 'Good response', + metricKey: '$ld:ai:judge:quality', + judgeConfigKey: 'judge-1', + }; + + const judge = new Judge(judgeConfig, mockProvider, 1.0); + jest.spyOn(judge, 'evaluate').mockResolvedValue(mockResult); + + const evaluator = new Evaluator([judge]); + + const results = await evaluator.evaluate('user input', 'ai output'); + + expect(results).toHaveLength(1); + expect(results[0]).toEqual(mockResult); + // Evaluator does not pass a per-call samplingRate — judge uses its own. + expect(judge.evaluate).toHaveBeenCalledWith('user input', 'ai output'); + }); + + it('does NOT call tracker.trackJudgeResult', async () => { + const mockProvider = makeProvider(); + const judgeConfig = makeJudgeConfig('judge-1'); + + const mockResult: LDJudgeResult = { + success: true, + sampled: true, + score: 0.8, + reasoning: 'ok', + metricKey: '$ld:ai:judge:quality', + }; + + const judge = new Judge(judgeConfig, mockProvider, 1.0); + jest.spyOn(judge, 'evaluate').mockResolvedValue(mockResult); + + const evaluator = new Evaluator([judge]); + + // No tracker — if Evaluator tried to call trackJudgeResult this would throw or fail + await evaluator.evaluate('input', 'output'); + + // Test passes if no error is thrown (no tracker involved) + expect(true).toBe(true); + }); + + it('runs multiple judges in parallel and returns all results', async () => { + const makeJudge = (key: string, score: number): Judge => { + const mockProvider = makeProvider(); + const jc = makeJudgeConfig(key); + const j = new Judge(jc, mockProvider, 1.0); + jest.spyOn(j, 'evaluate').mockResolvedValue({ + success: true, + sampled: true, + score, + reasoning: 'ok', + metricKey: '$ld:ai:judge:quality', + }); + return j; + }; + + const evaluator = new Evaluator([makeJudge('judge-a', 0.5), makeJudge('judge-b', 0.9)]); + + const results = await evaluator.evaluate('input', 'output'); + + expect(results).toHaveLength(2); + const scores = results.map((r) => r.score).sort(); + expect(scores).toEqual([0.5, 0.9]); + }); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index 43ea75e0ab..044ecd1f6d 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -54,17 +54,76 @@ describe('Judge', () => { describe('constructor', () => { it('initializes with proper configuration', () => { - const judge = new Judge(judgeConfig, mockProvider, mockLogger); + const judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); expect(judge).toBeDefined(); }); + + it('defaults sampleRate to 1.0 when omitted', () => { + const judge = new Judge(judgeConfig, mockProvider); + expect(judge.sampleRate).toBe(1.0); + }); + + it('exposes the sampleRate provided to the constructor', () => { + const judge = new Judge(judgeConfig, mockProvider, 0.25, mockLogger); + expect(judge.sampleRate).toBe(0.25); + }); + + it('honors a sampleRate of 0', () => { + const judge = new Judge(judgeConfig, mockProvider, 0, mockLogger); + expect(judge.sampleRate).toBe(0); + }); + }); + + describe('sampling fallback in evaluate()', () => { + it('uses the constructor sampleRate when no per-call rate is supplied', async () => { + // Force sampling to skip: math.random() returns 0.6, sampleRate 0.5 → 0.6 > 0.5 → skip. + const randomSpy = jest.spyOn(Math, 'random').mockReturnValue(0.6); + + const judge = new Judge(judgeConfig, mockProvider, 0.5, mockLogger); + const result = await judge.evaluate('input', 'output'); + + // Skipped due to sampling: sampled stays false (default), no provider call. + expect(result.sampled).toBe(false); + expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + + randomSpy.mockRestore(); + }); + + it('honors an explicit per-call samplingRate of 0 over the constructor default', async () => { + // Even with Math.random() at 0, samplingRate=0 means 0 > 0 is false — skip path is + // `Math.random() > rate`, so rate=0 + random=0 does NOT skip. Use random=0.5. + const randomSpy = jest.spyOn(Math, 'random').mockReturnValue(0.5); + + // Constructor rate is 1.0 (would normally always sample); per-call 0 overrides to skip. + const judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + const result = await judge.evaluate('input', 'output', 0); + + expect(result.sampled).toBe(false); + expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + + randomSpy.mockRestore(); + }); + + it('per-call samplingRate of undefined falls through to the constructor default', async () => { + // Constructor 0 (always skip), per-call undefined → effective rate 0. + const randomSpy = jest.spyOn(Math, 'random').mockReturnValue(0.5); + + const judge = new Judge(judgeConfig, mockProvider, 0, mockLogger); + const result = await judge.evaluate('input', 'output', undefined); + + expect(result.sampled).toBe(false); + expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + + randomSpy.mockRestore(); + }); }); describe('evaluate', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); }); it('evaluates AI response successfully', async () => { @@ -205,7 +264,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, mockLogger); + const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, 1.0, mockLogger); const result = await judgeWithoutMetrics.evaluate('test input', 'test output'); @@ -227,7 +286,7 @@ describe('Judge', () => { evaluationMetricKey: 'relevance', evaluationMetricKeys: undefined, }; - const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, mockLogger); + const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, 1.0, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -265,7 +324,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, mockLogger); + const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, 1.0, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -303,7 +362,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['', ' ', 'relevance', 'accuracy'], }; - const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, mockLogger); + const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, 1.0, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -342,7 +401,7 @@ describe('Judge', () => { evaluationMetricKey: 'helpfulness', evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithBoth = new Judge(configWithBoth, mockProvider, mockLogger); + const judgeWithBoth = new Judge(configWithBoth, mockProvider, 1.0, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -379,7 +438,7 @@ describe('Judge', () => { ...judgeConfig, messages: undefined, }; - const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, mockLogger); + const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, 1.0, mockLogger); const result = await judgeWithoutMessages.evaluate('test input', 'test output'); @@ -488,7 +547,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); }); it('evaluates messages and response successfully', async () => { @@ -573,7 +632,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); }); it('constructs evaluation messages correctly', () => { @@ -598,7 +657,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); }); it('parses valid evaluation response correctly', () => { @@ -669,7 +728,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, mockLogger); + const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, 1.0, mockLogger); const result = await judgeWithEmptyKeys.evaluate('test input', 'test output'); diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index 8892fd9a30..ca8aaaf6fa 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -673,7 +673,7 @@ describe('createJudge method', () => { response_to_evaluate: '{{response_to_evaluate}}', }); expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); - expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, undefined); + expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, 1.0, undefined); expect(result).toBe(mockJudge); judgeConfigSpy.mockRestore(); }); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index 316a84e56d..16248fe564 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -22,6 +22,7 @@ import { } from './api/config'; import { LDAIConfigFlagValue, LDAIConfigUtils } from './api/config/LDAIConfigUtils'; import { AgentGraphDefinition, LDAgentGraphFlagValue, LDGraphTracker } from './api/graph'; +import { Evaluator } from './api/judge/Evaluator'; import { Judge } from './api/judge/Judge'; import { LDAIClient } from './api/LDAIClient'; import { AIProviderFactory, SupportedAIProvider } from './api/providers'; @@ -141,33 +142,32 @@ export class LDAIClientImpl implements LDAIClient { return config; } - private async _initializeJudges( + private async _buildEvaluator( judgeConfigs: LDJudge[], context: LDContext, variables?: Record, defaultAiProvider?: SupportedAIProvider, - ): Promise> { - const judges: Record = {}; - - const judgePromises = judgeConfigs.map(async (judgeConfig) => { - const judge = await this.createJudge( - judgeConfig.key, - context, - undefined, - variables, - defaultAiProvider, - ); - return judge ? { key: judgeConfig.key, judge } : null; - }); - - const results = await Promise.all(judgePromises); - results.forEach((result) => { - if (result) { - judges[result.key] = result.judge; - } - }); + ): Promise { + if (judgeConfigs.length === 0) { + return Evaluator.noop(); + } - return judges; + const judgeInstances = ( + await Promise.all( + judgeConfigs.map((jc) => + this._createJudgeInstance( + jc.key, + context, + undefined, + variables, + defaultAiProvider, + jc.samplingRate, + ), + ), + ) + ).filter((j): j is Judge => j !== undefined); + + return new Evaluator(judgeInstances); } private async _completionConfig( @@ -318,14 +318,17 @@ export class LDAIClientImpl implements LDAIClient { return undefined; } - const judges = await this._initializeJudges( + const evaluator = await this._buildEvaluator( config.judgeConfiguration?.judges ?? [], context, variables, defaultAiProvider, ); - return new TrackedChat(config, provider, judges, this._logger); + // Attach the evaluator to the config for use by the managed layer + const configWithEvaluator: LDAICompletionConfig = { ...config, evaluator }; + + return new TrackedChat(configWithEvaluator, provider, {}, this._logger); } async createJudge( @@ -334,9 +337,27 @@ export class LDAIClientImpl implements LDAIClient { defaultValue?: LDAIJudgeConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, + sampleRate: number = 1.0, ): Promise { this._ldClient.track(TRACK_USAGE_CREATE_JUDGE, context, key, 1); + return this._createJudgeInstance( + key, + context, + defaultValue, + variables, + defaultAiProvider, + sampleRate, + ); + } + private async _createJudgeInstance( + key: string, + context: LDContext, + defaultValue?: LDAIJudgeConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + sampleRate: number = 1.0, + ): Promise { try { if (variables?.message_history !== undefined) { this._logger?.warn( @@ -373,7 +394,7 @@ export class LDAIClientImpl implements LDAIClient { return undefined; } - return new Judge(judgeConfig, provider, this._logger); + return new Judge(judgeConfig, provider, sampleRate, this._logger); } catch (error) { this._logger?.error(`Failed to initialize judge ${key}:`, error); return undefined; diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index 5dfec98072..fa8170e0eb 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -297,6 +297,8 @@ export interface LDAIClient { * @param variables Dictionary of values for instruction interpolation. * The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored. * @param defaultAiProvider Optional default AI provider to use. + * @param sampleRate Optional default sampling rate (0-1) baked into the Judge. + * Used by `Judge.evaluate()` when no per-call rate is supplied. Defaults to 1.0. * @returns Promise that resolves to a Judge instance or undefined if disabled/unsupported * * @example @@ -326,6 +328,7 @@ export interface LDAIClient { defaultValue?: LDAIJudgeConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, + sampleRate?: number, ): Promise; /** diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts index d1c2a161e2..159cad8f83 100644 --- a/packages/sdk/server-ai/src/api/config/types.ts +++ b/packages/sdk/server-ai/src/api/config/types.ts @@ -1,3 +1,4 @@ +import type { Evaluator } from '../judge/Evaluator'; import { LDAIConfigTracker } from './LDAIConfigTracker'; // ============================================================================ @@ -220,6 +221,13 @@ export interface LDAIAgentConfig extends LDAIConfig { * Root-level tools map keyed by tool name. Distinct from model.parameters.tools[]. */ tools?: { [toolName: string]: LDTool }; + /** + * Evaluator for this agent config. Populated by createAgent. + * Not part of the flag value shape. + * + * @internal + */ + evaluator?: Evaluator; } /** @@ -239,6 +247,13 @@ export interface LDAICompletionConfig extends LDAIConfig { * Root-level tools map keyed by tool name. Distinct from model.parameters.tools[]. */ tools?: { [toolName: string]: LDTool }; + /** + * Evaluator for this completion config. Populated by createChat/createModel. + * Not part of the flag value shape. + * + * @internal + */ + evaluator?: Evaluator; } /** diff --git a/packages/sdk/server-ai/src/api/judge/Evaluator.ts b/packages/sdk/server-ai/src/api/judge/Evaluator.ts new file mode 100644 index 0000000000..8d596364ad --- /dev/null +++ b/packages/sdk/server-ai/src/api/judge/Evaluator.ts @@ -0,0 +1,24 @@ +import { Judge } from './Judge'; +import { LDJudgeResult } from './types'; + +/** + * Wraps a collection of judges, providing a single `evaluate` method that + * runs all judges against a given input/output pair. + * + * @internal + */ +export class Evaluator { + constructor(private readonly _judges: Judge[]) {} + + static noop(): Evaluator { + return new Evaluator([]); + } + + async evaluate(input: string, output: string): Promise { + if (this._judges.length === 0) { + return []; + } + + return Promise.all(this._judges.map((judge) => judge.evaluate(input, output))); + } +} diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index ef49e3b723..820014ffaa 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -37,11 +37,20 @@ export class Judge { constructor( private readonly _aiConfig: LDAIJudgeConfig, private readonly _aiProvider: AIProvider, + private readonly _sampleRate: number = 1.0, logger?: LDLogger, ) { this._logger = logger; } + /** + * The default sampling rate baked in at construction. Used by `evaluate` / + * `evaluateMessages` when no per-call rate is supplied. + */ + get sampleRate(): number { + return this._sampleRate; + } + /** * Gets the evaluation metric key, prioritizing evaluationMetricKey over evaluationMetricKeys. * Falls back to the first valid (non-empty, non-whitespace) value in evaluationMetricKeys if evaluationMetricKey is not provided. @@ -69,10 +78,13 @@ export class Judge { * * @param input The input prompt or question that was provided to the AI * @param output The AI-generated response to be evaluated - * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1) + * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed. + * When omitted, the Judge's constructor-default rate is used. An explicit `0` overrides + * the default — only `undefined` falls through. * @returns Promise that resolves to evaluation results */ - async evaluate(input: string, output: string, samplingRate: number = 1): Promise { + async evaluate(input: string, output: string, samplingRate?: number): Promise { + const effectiveRate = samplingRate ?? this._sampleRate; const result: LDJudgeResult = { success: false, sampled: false, @@ -99,8 +111,8 @@ export class Judge { return result; } - if (Math.random() > samplingRate) { - this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`); + if (Math.random() > effectiveRate) { + this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${effectiveRate}`); return result; } @@ -143,13 +155,14 @@ export class Judge { * * @param messages Array of messages representing the conversation history * @param response The AI response to be evaluated - * @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1) + * @param samplingRatio Sampling ratio (0-1). When omitted, the Judge's + * constructor-default rate is used. * @returns Promise that resolves to evaluation results */ async evaluateMessages( messages: LDMessage[], response: ChatResponse, - samplingRatio: number = 1, + samplingRatio?: number, ): Promise { const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n'); const output = response.message.content; From dcc81305724a95bcd00c38bb9db3bd095f89ee96 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Mon, 4 May 2026 16:04:36 -0500 Subject: [PATCH 02/19] feat: introduce ManagedResult, RunnerResult, and LDAIMetricSummary (#1332) --- .../sdk/server-ai/__tests__/Evaluator.test.ts | 19 +- .../sdk/server-ai/__tests__/Judge.test.ts | 206 +++++++------ .../__tests__/LDAIClientImpl.test.ts | 35 ++- .../__tests__/LDAIConfigTrackerImpl.test.ts | 75 ++++- .../__tests__/LDGraphTrackerImpl.test.ts | 5 +- .../server-ai/__tests__/ManagedModel.test.ts | 136 +++++++++ .../server-ai/__tests__/RunnerFactory.test.ts | 273 ++++++++++++++++++ .../server-ai/__tests__/TrackedChat.test.ts | 231 --------------- .../examples/chat-judge/src/index.ts | 13 +- .../examples/chat-observability/src/index.ts | 10 +- .../examples/tracked-chat/src/index.ts | 10 +- packages/sdk/server-ai/src/LDAIClientImpl.ts | 157 ++++++---- .../server-ai/src/LDAIConfigTrackerImpl.ts | 20 +- .../sdk/server-ai/src/LDGraphTrackerImpl.ts | 26 +- packages/sdk/server-ai/src/api/LDAIClient.ts | 36 ++- .../sdk/server-ai/src/api/ManagedModel.ts | 63 ++++ .../sdk/server-ai/src/api/chat/TrackedChat.ts | 167 ----------- packages/sdk/server-ai/src/api/chat/index.ts | 1 - .../src/api/config/LDAIConfigTracker.ts | 31 +- .../src/api/config/LDAIConfigUtils.ts | 31 +- .../sdk/server-ai/src/api/config/types.ts | 10 +- .../server-ai/src/api/graph/LDGraphTracker.ts | 11 +- packages/sdk/server-ai/src/api/graph/types.ts | 57 +++- packages/sdk/server-ai/src/api/index.ts | 2 + packages/sdk/server-ai/src/api/judge/Judge.ts | 23 +- .../server-ai/src/api/metrics/LDAIMetrics.ts | 12 + packages/sdk/server-ai/src/api/model/index.ts | 1 + packages/sdk/server-ai/src/api/model/types.ts | 105 +++++++ .../server-ai/src/api/providers/AIProvider.ts | 78 ++++- .../src/api/providers/AIProviderFactory.ts | 132 --------- .../sdk/server-ai/src/api/providers/Runner.ts | 39 +++ .../src/api/providers/RunnerFactory.ts | 264 +++++++++++++++++ .../sdk/server-ai/src/api/providers/index.ts | 3 +- 33 files changed, 1452 insertions(+), 830 deletions(-) create mode 100644 packages/sdk/server-ai/__tests__/ManagedModel.test.ts create mode 100644 packages/sdk/server-ai/__tests__/RunnerFactory.test.ts delete mode 100644 packages/sdk/server-ai/__tests__/TrackedChat.test.ts create mode 100644 packages/sdk/server-ai/src/api/ManagedModel.ts delete mode 100644 packages/sdk/server-ai/src/api/chat/TrackedChat.ts create mode 100644 packages/sdk/server-ai/src/api/model/index.ts create mode 100644 packages/sdk/server-ai/src/api/model/types.ts delete mode 100644 packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts create mode 100644 packages/sdk/server-ai/src/api/providers/Runner.ts create mode 100644 packages/sdk/server-ai/src/api/providers/RunnerFactory.ts diff --git a/packages/sdk/server-ai/__tests__/Evaluator.test.ts b/packages/sdk/server-ai/__tests__/Evaluator.test.ts index 730b77eb0c..7cdafb3104 100644 --- a/packages/sdk/server-ai/__tests__/Evaluator.test.ts +++ b/packages/sdk/server-ai/__tests__/Evaluator.test.ts @@ -2,7 +2,7 @@ import { LDAIJudgeConfig } from '../src/api/config/types'; import { Evaluator } from '../src/api/judge/Evaluator'; import { Judge } from '../src/api/judge/Judge'; import { LDJudgeResult } from '../src/api/judge/types'; -import { AIProvider } from '../src/api/providers/AIProvider'; +import { Runner } from '../src/api/providers/Runner'; function makeJudgeConfig(key: string): LDAIJudgeConfig { return { @@ -14,10 +14,9 @@ function makeJudgeConfig(key: string): LDAIJudgeConfig { }; } -function makeProvider(): jest.Mocked { +function makeRunner(): jest.Mocked { return { - invokeModel: jest.fn(), - invokeStructuredModel: jest.fn(), + run: jest.fn(), } as any; } @@ -32,7 +31,7 @@ describe('Evaluator', () => { describe('evaluate()', () => { it('calls each configured judge and returns results', async () => { - const mockProvider = makeProvider(); + const mockRunner = makeRunner(); const judgeConfig = makeJudgeConfig('judge-1'); const mockResult: LDJudgeResult = { @@ -44,7 +43,7 @@ describe('Evaluator', () => { judgeConfigKey: 'judge-1', }; - const judge = new Judge(judgeConfig, mockProvider, 1.0); + const judge = new Judge(judgeConfig, mockRunner, 1.0); jest.spyOn(judge, 'evaluate').mockResolvedValue(mockResult); const evaluator = new Evaluator([judge]); @@ -58,7 +57,7 @@ describe('Evaluator', () => { }); it('does NOT call tracker.trackJudgeResult', async () => { - const mockProvider = makeProvider(); + const mockRunner = makeRunner(); const judgeConfig = makeJudgeConfig('judge-1'); const mockResult: LDJudgeResult = { @@ -69,7 +68,7 @@ describe('Evaluator', () => { metricKey: '$ld:ai:judge:quality', }; - const judge = new Judge(judgeConfig, mockProvider, 1.0); + const judge = new Judge(judgeConfig, mockRunner, 1.0); jest.spyOn(judge, 'evaluate').mockResolvedValue(mockResult); const evaluator = new Evaluator([judge]); @@ -83,9 +82,9 @@ describe('Evaluator', () => { it('runs multiple judges in parallel and returns all results', async () => { const makeJudge = (key: string, score: number): Judge => { - const mockProvider = makeProvider(); + const mockRunner = makeRunner(); const jc = makeJudgeConfig(key); - const j = new Judge(jc, mockProvider, 1.0); + const j = new Judge(jc, mockRunner, 1.0); jest.spyOn(j, 'evaluate').mockResolvedValue({ success: true, sampled: true, diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index 044ecd1f6d..bd49305f4d 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -3,11 +3,11 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; import { LDAIJudgeConfig, LDMessage } from '../src/api/config/types'; import { Judge } from '../src/api/judge/Judge'; -import { StructuredResponse } from '../src/api/judge/types'; -import { AIProvider } from '../src/api/providers/AIProvider'; +import { RunnerResult } from '../src/api/model/types'; +import { Runner } from '../src/api/providers/Runner'; describe('Judge', () => { - let mockProvider: jest.Mocked; + let mockRunner: jest.Mocked; let mockTracker: jest.Mocked; let mockLogger: jest.Mocked; let judgeConfig: LDAIJudgeConfig; @@ -19,8 +19,8 @@ describe('Judge', () => { }; beforeEach(() => { - mockProvider = { - invokeStructuredModel: jest.fn(), + mockRunner = { + run: jest.fn(), } as any; mockTracker = { @@ -54,23 +54,23 @@ describe('Judge', () => { describe('constructor', () => { it('initializes with proper configuration', () => { - const judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + const judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); expect(judge).toBeDefined(); }); it('defaults sampleRate to 1.0 when omitted', () => { - const judge = new Judge(judgeConfig, mockProvider); + const judge = new Judge(judgeConfig, mockRunner); expect(judge.sampleRate).toBe(1.0); }); it('exposes the sampleRate provided to the constructor', () => { - const judge = new Judge(judgeConfig, mockProvider, 0.25, mockLogger); + const judge = new Judge(judgeConfig, mockRunner, 0.25, mockLogger); expect(judge.sampleRate).toBe(0.25); }); it('honors a sampleRate of 0', () => { - const judge = new Judge(judgeConfig, mockProvider, 0, mockLogger); + const judge = new Judge(judgeConfig, mockRunner, 0, mockLogger); expect(judge.sampleRate).toBe(0); }); }); @@ -80,12 +80,12 @@ describe('Judge', () => { // Force sampling to skip: math.random() returns 0.6, sampleRate 0.5 → 0.6 > 0.5 → skip. const randomSpy = jest.spyOn(Math, 'random').mockReturnValue(0.6); - const judge = new Judge(judgeConfig, mockProvider, 0.5, mockLogger); + const judge = new Judge(judgeConfig, mockRunner, 0.5, mockLogger); const result = await judge.evaluate('input', 'output'); - // Skipped due to sampling: sampled stays false (default), no provider call. + // Skipped due to sampling: sampled stays false (default), no runner call. expect(result.sampled).toBe(false); - expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + expect(mockRunner.run).not.toHaveBeenCalled(); randomSpy.mockRestore(); }); @@ -96,11 +96,11 @@ describe('Judge', () => { const randomSpy = jest.spyOn(Math, 'random').mockReturnValue(0.5); // Constructor rate is 1.0 (would normally always sample); per-call 0 overrides to skip. - const judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + const judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); const result = await judge.evaluate('input', 'output', 0); expect(result.sampled).toBe(false); - expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + expect(mockRunner.run).not.toHaveBeenCalled(); randomSpy.mockRestore(); }); @@ -109,11 +109,11 @@ describe('Judge', () => { // Constructor 0 (always skip), per-call undefined → effective rate 0. const randomSpy = jest.spyOn(Math, 'random').mockReturnValue(0.5); - const judge = new Judge(judgeConfig, mockProvider, 0, mockLogger); + const judge = new Judge(judgeConfig, mockRunner, 0, mockLogger); const result = await judge.evaluate('input', 'output', undefined); expect(result.sampled).toBe(false); - expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + expect(mockRunner.run).not.toHaveBeenCalled(); randomSpy.mockRestore(); }); @@ -123,19 +123,16 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); }); it('evaluates AI response successfully', async () => { - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.8, reasoning: 'The response is relevant to the question', }, - rawResponse: JSON.stringify({ - score: 0.8, - reasoning: 'The response is relevant to the question', - }), metrics: { success: true, usage: { @@ -147,7 +144,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judge.evaluate( 'What is the capital of France?', @@ -163,7 +160,7 @@ describe('Judge', () => { judgeConfigKey: 'test-judge', }); - expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith( + expect(mockRunner.run).toHaveBeenCalledWith( expect.arrayContaining([ expect.objectContaining({ role: 'system', @@ -175,20 +172,17 @@ describe('Judge', () => { 'Evaluate and report scores for important metrics: Input: What is the capital of France?, Output: Paris is the capital of France.', }), ]), - expect.any(Object), // evaluation response structure + expect.any(Object), // evaluation schema ); }); it('returns evaluation result with correct evaluationMetricKey for tracker integration', async () => { - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.85, reasoning: 'Highly relevant response', }, - rawResponse: JSON.stringify({ - score: 0.85, - reasoning: 'Highly relevant response', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -196,7 +190,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judge.evaluate('test input', 'test output'); @@ -212,15 +206,12 @@ describe('Judge', () => { const originalRandom = Math.random; Math.random = jest.fn().mockReturnValue(0.3); - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.8, reasoning: 'Good', }, - rawResponse: JSON.stringify({ - score: 0.8, - reasoning: 'Good', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -228,13 +219,13 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judge.evaluate('test input', 'test output', 0.5); expect(result).toBeDefined(); expect(result.sampled).toBe(true); - expect(mockProvider.invokeStructuredModel).toHaveBeenCalled(); + expect(mockRunner.run).toHaveBeenCalled(); Math.random = originalRandom; }); @@ -250,7 +241,7 @@ describe('Judge', () => { sampled: false, judgeConfigKey: 'test-judge', }); - expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + expect(mockRunner.run).not.toHaveBeenCalled(); expect(mockLogger.debug).toHaveBeenCalledWith( 'Judge evaluation skipped due to sampling rate: 0.5', ); @@ -264,7 +255,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, 1.0, mockLogger); + const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockRunner, 1.0, mockLogger); const result = await judgeWithoutMetrics.evaluate('test input', 'test output'); @@ -286,17 +277,14 @@ describe('Judge', () => { evaluationMetricKey: 'relevance', evaluationMetricKeys: undefined, }; - const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, 1.0, mockLogger); + const judgeWithSingleKey = new Judge(configWithSingleKey, mockRunner, 1.0, mockLogger); - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.8, reasoning: 'The response is relevant', }, - rawResponse: JSON.stringify({ - score: 0.8, - reasoning: 'The response is relevant', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -304,7 +292,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judgeWithSingleKey.evaluate('test input', 'test output'); @@ -324,17 +312,14 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, 1.0, mockLogger); + const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockRunner, 1.0, mockLogger); - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.8, reasoning: 'The response is relevant', }, - rawResponse: JSON.stringify({ - score: 0.8, - reasoning: 'The response is relevant', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -342,7 +327,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judgeWithLegacyKeys.evaluate('test input', 'test output'); @@ -362,17 +347,14 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['', ' ', 'relevance', 'accuracy'], }; - const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, 1.0, mockLogger); + const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockRunner, 1.0, mockLogger); - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.8, reasoning: 'The response is relevant', }, - rawResponse: JSON.stringify({ - score: 0.8, - reasoning: 'The response is relevant', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -380,7 +362,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judgeWithInvalidKeys.evaluate('test input', 'test output'); @@ -401,17 +383,14 @@ describe('Judge', () => { evaluationMetricKey: 'helpfulness', evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithBoth = new Judge(configWithBoth, mockProvider, 1.0, mockLogger); + const judgeWithBoth = new Judge(configWithBoth, mockRunner, 1.0, mockLogger); - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.7, reasoning: 'The response is helpful', }, - rawResponse: JSON.stringify({ - score: 0.7, - reasoning: 'The response is helpful', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -419,7 +398,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judgeWithBoth.evaluate('test input', 'test output'); @@ -438,7 +417,7 @@ describe('Judge', () => { ...judgeConfig, messages: undefined, }; - const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, 1.0, mockLogger); + const judgeWithoutMessages = new Judge(configWithoutMessages, mockRunner, 1.0, mockLogger); const result = await judgeWithoutMessages.evaluate('test input', 'test output'); @@ -454,10 +433,10 @@ describe('Judge', () => { ); }); - it('returns result with success false when response has no score or reasoning', async () => { - const mockStructuredResponse: StructuredResponse = { - data: {}, - rawResponse: '{}', + it('returns result with success false when parsed is undefined or has no score/reasoning', async () => { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: undefined, metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -465,7 +444,33 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); + + const result = await judge.evaluate('test input', 'test output'); + + expect(result).toEqual({ + success: false, + sampled: true, + judgeConfigKey: 'test-judge', + }); + expect(mockLogger.warn).toHaveBeenCalledWith( + 'Could not parse evaluation response: undefined', + mockTrackData, + ); + }); + + it('returns result with success false when parsed is an empty object', async () => { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: {}, + metrics: { + success: true, + usage: { total: 100, input: 50, output: 50 }, + }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judge.evaluate('test input', 'test output'); @@ -481,17 +486,13 @@ describe('Judge', () => { }); it('returns result with success false when response structure is malformed', async () => { - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { evaluations: { relevance: { score: 0.8, reasoning: 'Good' }, }, }, - rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'Good' }, - }, - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -499,7 +500,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judge.evaluate('test input', 'test output'); @@ -514,7 +515,7 @@ describe('Judge', () => { ); }); - it('handles provider errors gracefully', async () => { + it('handles runner errors gracefully', async () => { const error = new Error('Provider error'); mockTracker.trackMetricsOf.mockRejectedValue(error); @@ -547,7 +548,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); }); it('evaluates messages and response successfully', async () => { @@ -560,15 +561,12 @@ describe('Judge', () => { metrics: { success: true }, }; - const mockStructuredResponse: StructuredResponse = { - data: { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { score: 0.8, reasoning: 'The response is relevant to the question', }, - rawResponse: JSON.stringify({ - score: 0.8, - reasoning: 'The response is relevant to the question', - }), metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -576,7 +574,7 @@ describe('Judge', () => { }; mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + mockRunner.run.mockResolvedValue(mockRunnerResult); const result = await judge.evaluateMessages(messages, response); @@ -589,7 +587,7 @@ describe('Judge', () => { judgeConfigKey: 'test-judge', }); - expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith( + expect(mockRunner.run).toHaveBeenCalledWith( expect.arrayContaining([ expect.objectContaining({ role: 'system', @@ -601,7 +599,7 @@ describe('Judge', () => { 'Evaluate and report scores for important metrics: Input: What is the capital of France?\r\nParis is the capital of France., Output: Paris is the capital of France.', }), ]), - expect.any(Object), // evaluation response structure + expect.any(Object), // evaluation schema ); }); @@ -622,7 +620,7 @@ describe('Judge', () => { sampled: false, judgeConfigKey: 'test-judge', }); - expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + expect(mockRunner.run).not.toHaveBeenCalled(); Math.random = originalRandom; }); @@ -632,7 +630,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); }); it('constructs evaluation messages correctly', () => { @@ -657,7 +655,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockProvider, 1.0, mockLogger); + judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); }); it('parses valid evaluation response correctly', () => { @@ -728,7 +726,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, 1.0, mockLogger); + const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockRunner, 1.0, mockLogger); const result = await judgeWithEmptyKeys.evaluate('test input', 'test output'); diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index ca8aaaf6fa..0e5ca6de0e 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -5,15 +5,16 @@ import { LDAICompletionConfigDefault, LDAIJudgeConfigDefault, } from '../src/api/config/types'; +import { Evaluator } from '../src/api/judge/Evaluator'; import { Judge } from '../src/api/judge/Judge'; -import { AIProviderFactory } from '../src/api/providers/AIProviderFactory'; +import { RunnerFactory } from '../src/api/providers/RunnerFactory'; import { LDAIClientImpl } from '../src/LDAIClientImpl'; import { LDClientMin } from '../src/LDClientMin'; import { aiSdkLanguage, aiSdkName, aiSdkVersion } from '../src/sdkInfo'; -// Mock Judge and AIProviderFactory +// Mock Judge and RunnerFactory jest.mock('../src/api/judge/Judge'); -jest.mock('../src/api/providers/AIProviderFactory'); +jest.mock('../src/api/providers/RunnerFactory'); const mockLdClient: jest.Mocked = { variation: jest.fn(), @@ -80,6 +81,8 @@ describe('config evaluation', () => { defaultValue, 'completion', variables, + undefined, + undefined, ); expect(result.messages).toEqual([ { role: 'system', content: 'Hello John' }, @@ -146,6 +149,7 @@ describe('config evaluation', () => { 'agent', variables, undefined, + undefined, ); expect(result.instructions).toBe( 'You are a helpful assistant. Your name is John and your score is 42', @@ -338,7 +342,7 @@ describe('config evaluation', () => { const result = await client.completionConfig(key, testContext, defaultValue); expect(result.enabled).toBe(false); - expect(result.createTracker).toBeUndefined(); + expect(result.createTracker).toBeInstanceOf(Function); }); it('handles missing metadata mode by defaulting to completion mode', async () => { @@ -436,6 +440,8 @@ describe('completionConfig method', () => { defaultValue, 'completion', variables, + undefined, + undefined, ); expect(result).toBeDefined(); evaluateSpy.mockRestore(); @@ -458,6 +464,7 @@ describe('agentConfig method', () => { instructions: 'You are a helpful assistant.', createTracker: () => ({}) as any, enabled: true, + evaluator: Evaluator.noop(), }; const evaluateSpy = jest.spyOn(client as any, '_evaluate'); @@ -478,8 +485,10 @@ describe('agentConfig method', () => { 'agent', variables, undefined, + undefined, ); - expect(result).toBe(mockConfig); + expect(result).toMatchObject(mockConfig); + expect(result.evaluator).toBeInstanceOf(Evaluator); evaluateSpy.mockRestore(); }); }); @@ -542,6 +551,7 @@ describe('agents method', () => { provider: { name: 'openai' }, instructions: 'You are a research assistant specializing in climate change.', createTracker: expect.any(Function), + evaluator: expect.any(Evaluator), enabled: true, }, 'writing-agent': { @@ -553,6 +563,7 @@ describe('agents method', () => { provider: { name: 'anthropic' }, instructions: 'You are a writing assistant with academic style.', createTracker: expect.any(Function), + evaluator: expect.any(Evaluator), enabled: true, }, }); @@ -631,8 +642,8 @@ describe('createJudge method', () => { evaluateMessages: jest.fn(), } as any; - // Mock AIProviderFactory.create - (AIProviderFactory.create as jest.Mock).mockResolvedValue(mockProvider); + // Mock RunnerFactory.createModel + (RunnerFactory.createModel as jest.Mock).mockResolvedValue(mockProvider); // Mock Judge constructor (Judge as jest.MockedClass).mockImplementation(() => mockJudge); @@ -672,7 +683,7 @@ describe('createJudge method', () => { message_history: '{{message_history}}', response_to_evaluate: '{{response_to_evaluate}}', }); - expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); + expect(RunnerFactory.createModel).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, 1.0, undefined); expect(result).toBe(mockJudge); judgeConfigSpy.mockRestore(); @@ -697,12 +708,12 @@ describe('createJudge method', () => { const result = await client.createJudge(key, testContext, defaultValue); expect(result).toBeUndefined(); - expect(AIProviderFactory.create).not.toHaveBeenCalled(); + expect(RunnerFactory.createModel).not.toHaveBeenCalled(); expect(Judge).not.toHaveBeenCalled(); judgeConfigSpy.mockRestore(); }); - it('returns undefined when AIProviderFactory.create fails', async () => { + it('returns undefined when RunnerFactory.createModel returns undefined', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-judge'; const defaultValue: LDAIJudgeConfigDefault = { @@ -723,12 +734,12 @@ describe('createJudge method', () => { const judgeConfigSpy = jest.spyOn(client as any, '_judgeConfig'); judgeConfigSpy.mockResolvedValue(mockJudgeConfig); - (AIProviderFactory.create as jest.Mock).mockResolvedValue(undefined); + (RunnerFactory.createModel as jest.Mock).mockResolvedValue(undefined); const result = await client.createJudge(key, testContext, defaultValue); expect(result).toBeUndefined(); - expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); + expect(RunnerFactory.createModel).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); expect(Judge).not.toHaveBeenCalled(); judgeConfigSpy.mockRestore(); }); diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index 4263bc3048..2c723de035 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -579,7 +579,7 @@ it('only tracks non-zero token counts', () => { ); }); -it('returns empty summary when no metrics tracked', () => { +it('returns summary with resumptionToken immediately after construction', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -593,7 +593,11 @@ it('returns empty summary when no metrics tracked', () => { const summary = tracker.getSummary(); - expect(summary).toEqual({}); + expect(summary.resumptionToken).toBe(tracker.resumptionToken); + expect(typeof summary.resumptionToken).toBe('string'); + expect(summary.success).toBeUndefined(); + expect(summary.tokens).toBeUndefined(); + expect(summary.durationMs).toBeUndefined(); }); it('summarizes tracked metrics', () => { @@ -620,6 +624,7 @@ it('summarizes tracked metrics', () => { const summary = tracker.getSummary(); expect(summary).toEqual({ + resumptionToken: tracker.resumptionToken, durationMs: 1000, tokens: { total: 100, @@ -633,6 +638,26 @@ it('summarizes tracked metrics', () => { }); }); +it('accumulates toolCalls in getSummary after trackToolCall', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCall('tool-a'); + tracker.trackToolCall('tool-b'); + + const summary = tracker.getSummary(); + + expect(summary.toolCalls).toEqual(['tool-a', 'tool-b']); +}); + it('tracks duration when async function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, @@ -1002,6 +1027,52 @@ describe('trackToolCalls', () => { }); }); +describe('trackStreamMetricsOf', () => { + it('tracks tool calls from streaming metrics extractor', async () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const mockStream = {}; + const metricsExtractor = jest.fn().mockResolvedValue({ + success: true, + toolCalls: ['tool-a', 'tool-b'], + }); + + tracker.trackStreamMetricsOf( + () => mockStream, + metricsExtractor, + ); + + // Flush promises so the background tracking completes + await Promise.resolve(); + await Promise.resolve(); + + const summary = tracker.getSummary(); + expect(summary.toolCalls).toEqual(['tool-a', 'tool-b']); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-a' }, + 1, + ); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-b' }, + 1, + ); + }); +}); + describe('graphKey constructor support', () => { it('includes graphKey in trackDuration event when set on constructor', () => { const tracker = new LDAIConfigTrackerImpl( diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts index 507d0d3b09..915ed4741a 100644 --- a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts @@ -102,9 +102,10 @@ it('fromResumptionToken reconstructs the tracker with original runId', () => { // getSummary // --------------------------------------------------------------------------- -it('returns an empty summary initially', () => { +it('returns a summary with only the resumption token initially', () => { const tracker = makeTracker('r'); - expect(tracker.getSummary()).toEqual({}); + const summary = tracker.getSummary(); + expect(summary).toEqual({ resumptionToken: tracker.resumptionToken }); }); it('returns a copy of the summary (not a reference)', () => { diff --git a/packages/sdk/server-ai/__tests__/ManagedModel.test.ts b/packages/sdk/server-ai/__tests__/ManagedModel.test.ts new file mode 100644 index 0000000000..09158bd7ce --- /dev/null +++ b/packages/sdk/server-ai/__tests__/ManagedModel.test.ts @@ -0,0 +1,136 @@ +import { ManagedModel } from '../src/api/ManagedModel'; +import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; +import { LDAICompletionConfig } from '../src/api/config/types'; +import { Evaluator } from '../src/api/judge/Evaluator'; +import { RunnerResult } from '../src/api/model/types'; +import { Runner } from '../src/api/providers/Runner'; + +describe('ManagedModel', () => { + let mockRunner: jest.Mocked; + let mockTracker: jest.Mocked; + let aiConfig: LDAICompletionConfig; + + beforeEach(() => { + mockRunner = { + run: jest.fn(), + }; + + mockTracker = { + trackMetricsOf: jest.fn(), + trackDuration: jest.fn(), + trackTokens: jest.fn(), + trackSuccess: jest.fn(), + trackError: jest.fn(), + trackFeedback: jest.fn(), + trackTimeToFirstToken: jest.fn(), + trackDurationOf: jest.fn(), + trackOpenAIMetrics: jest.fn(), + trackBedrockConverseMetrics: jest.fn(), + trackVercelAIMetrics: jest.fn(), + getSummary: jest.fn().mockReturnValue({}), + trackJudgeResult: jest.fn(), + resumptionToken: 'resumption-token-123', + } as any; + + aiConfig = { + key: 'test-config', + enabled: true, + messages: [{ role: 'system', content: 'You are a helpful assistant.' }], + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + createTracker: () => mockTracker, + evaluator: Evaluator.noop(), + }; + }); + + it('passes the prompt directly to the runner without prepending config messages', async () => { + const runnerResult: RunnerResult = { + content: 'Response from model', + metrics: { success: true, usage: { total: 10, input: 4, output: 6 } }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); + mockRunner.run.mockResolvedValue(runnerResult); + + const model = new ManagedModel(aiConfig, mockRunner); + await model.run('Hello'); + + expect(mockRunner.run).toHaveBeenCalledTimes(1); + expect(mockRunner.run).toHaveBeenCalledWith('Hello'); + }); + + it('returns a ManagedResult with content, metrics, and an evaluations promise', async () => { + const runnerResult: RunnerResult = { + content: 'Hi there', + metrics: { + success: true, + usage: { total: 12, input: 5, output: 7 }, + toolCalls: ['tool-1'], + durationMs: 42, + }, + raw: { providerSpecific: true }, + }; + + const expectedSummary = { + success: true, + tokens: { total: 12, input: 5, output: 7 }, + toolCalls: ['tool-1'], + durationMs: 42, + resumptionToken: 'resumption-token-123', + }; + + mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); + mockTracker.getSummary.mockReturnValue(expectedSummary); + mockRunner.run.mockResolvedValue(runnerResult); + + const model = new ManagedModel(aiConfig, mockRunner); + const result = await model.run('say hi'); + + expect(result.content).toBe('Hi there'); + expect(result.metrics).toEqual(expectedSummary); + expect(result.raw).toEqual({ providerSpecific: true }); + await expect(result.evaluations).resolves.toEqual([]); + }); + + it('forwards the runner result through tracker.trackMetricsOf', async () => { + const runnerResult: RunnerResult = { + content: 'tracked', + metrics: { success: true, usage: { total: 1, input: 1, output: 0 } }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); + mockRunner.run.mockResolvedValue(runnerResult); + + const model = new ManagedModel(aiConfig, mockRunner); + await model.run('prompt'); + + expect(mockTracker.trackMetricsOf).toHaveBeenCalledTimes(1); + const [extractor] = mockTracker.trackMetricsOf.mock.calls[0]; + // The extractor should pull metrics off the RunnerResult + expect(extractor(runnerResult)).toBe(runnerResult.metrics); + }); + + it('does not retain conversation state across runs', async () => { + const runnerResult: RunnerResult = { + content: 'ok', + metrics: { success: true, usage: { total: 1, input: 1, output: 0 } }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); + mockRunner.run.mockResolvedValue(runnerResult); + + const model = new ManagedModel(aiConfig, mockRunner); + + await model.run('first'); + await model.run('second'); + + // Each call passes only the latest prompt — no accumulated history. + expect(mockRunner.run).toHaveBeenNthCalledWith(1, 'first'); + expect(mockRunner.run).toHaveBeenNthCalledWith(2, 'second'); + }); + + it('exposes the AI config via getConfig', () => { + const model = new ManagedModel(aiConfig, mockRunner); + expect(model.getConfig()).toBe(aiConfig); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts b/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts new file mode 100644 index 0000000000..d9d2e485ca --- /dev/null +++ b/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts @@ -0,0 +1,273 @@ +import { LDAIConfigKind } from '../src/api/config/types'; +import { AIProvider, ToolRegistry } from '../src/api/providers/AIProvider'; +import { AgentGraphRunner, Runner } from '../src/api/providers/Runner'; +import { RunnerFactory, SupportedAIProvider } from '../src/api/providers/RunnerFactory'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const makeConfig = (providerName: string): LDAIConfigKind => + ({ + key: 'test-config', + enabled: true, + provider: { name: providerName }, + createTracker: () => ({}) as any, + evaluator: {} as any, + }) as unknown as LDAIConfigKind; + +const makeRunner = (): Runner => ({ run: jest.fn() }); +const makeGraphRunner = (): AgentGraphRunner => ({ run: jest.fn() }); + +// --------------------------------------------------------------------------- +// _getProvidersToTry (tested indirectly via createModel provider selection) +// --------------------------------------------------------------------------- + +describe('RunnerFactory.createModel', () => { + afterEach(() => { + jest.resetModules(); + jest.restoreAllMocks(); + }); + + it('returns undefined and logs a warning when no provider package is installed', async () => { + const warnSpy = jest.fn(); + const logger = { warn: warnSpy, debug: jest.fn(), info: jest.fn(), error: jest.fn() }; + + // Override dynamic import so every package throws MODULE_NOT_FOUND + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(undefined); + + const result = await RunnerFactory.createModel(makeConfig('openai'), logger as any); + + expect(result).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('not supported')); + }); + + it('returns a Runner from the first provider that succeeds', async () => { + const runner = makeRunner(); + const mockFactory: AIProvider = { + createModel: jest.fn().mockResolvedValue(runner), + createAgent: jest.fn().mockResolvedValue(undefined), + createAgentGraph: jest.fn().mockResolvedValue(undefined), + } as unknown as AIProvider; + + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(mockFactory); + + const result = await RunnerFactory.createModel(makeConfig('openai')); + + expect(result).toBe(runner); + expect(mockFactory.createModel).toHaveBeenCalledWith(expect.objectContaining({ enabled: true })); + }); + + it('uses only the defaultAiProvider when one is specified', async () => { + const runner = makeRunner(); + const mockFactory: AIProvider = { + createModel: jest.fn().mockResolvedValue(runner), + } as unknown as AIProvider; + + const getProviderSpy = jest + .spyOn(RunnerFactory as any, '_getProviderFactory') + .mockResolvedValue(mockFactory); + + await RunnerFactory.createModel(makeConfig('langchain'), undefined, 'openai' as SupportedAIProvider); + + // _getProviderFactory should only have been called once, with 'openai' + expect(getProviderSpy).toHaveBeenCalledTimes(1); + expect(getProviderSpy).toHaveBeenCalledWith('openai', undefined); + }); + + it('falls through to multi-provider packages when specific provider returns undefined', async () => { + const runner = makeRunner(); + + let callCount = 0; + const getProviderSpy = jest + .spyOn(RunnerFactory as any, '_getProviderFactory') + .mockImplementation(async (providerType: string) => { + callCount += 1; + if (providerType === 'openai') { + // openai package not installed + return undefined; + } + // langchain succeeds + return { + createModel: jest.fn().mockResolvedValue(runner), + } as unknown as AIProvider; + }); + + const result = await RunnerFactory.createModel(makeConfig('openai')); + + expect(result).toBe(runner); + // Should have tried openai first, then langchain + expect(getProviderSpy.mock.calls[0][0]).toBe('openai'); + expect(getProviderSpy.mock.calls[1][0]).toBe('langchain'); + }); +}); + +// --------------------------------------------------------------------------- +// _withFallback behaviour +// --------------------------------------------------------------------------- + +describe('RunnerFactory._withFallback', () => { + it('returns the first truthy result and does not call remaining factories', async () => { + const runner = makeRunner(); + const factoryA: AIProvider = { + createModel: jest.fn().mockResolvedValue(runner), + } as unknown as AIProvider; + const factoryB: AIProvider = { + createModel: jest.fn().mockResolvedValue(makeRunner()), + } as unknown as AIProvider; + + jest + .spyOn(RunnerFactory as any, '_getProviderFactory') + .mockResolvedValueOnce(factoryA) + .mockResolvedValueOnce(factoryB); + + const result = await RunnerFactory.createModel(makeConfig('openai')); + + expect(result).toBe(runner); + // factoryB.createModel should never have been called + expect(factoryB.createModel).not.toHaveBeenCalled(); + }); + + it('returns undefined when all factories return undefined', async () => { + const factoryA: AIProvider = { + createModel: jest.fn().mockResolvedValue(undefined), + } as unknown as AIProvider; + + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(factoryA); + + const result = await RunnerFactory.createModel(makeConfig('openai')); + + expect(result).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// createAgent +// --------------------------------------------------------------------------- + +describe('RunnerFactory.createAgent', () => { + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('delegates to factory.createAgent with config and tools', async () => { + const runner = makeRunner(); + const tools: ToolRegistry = { myTool: jest.fn() }; + const mockFactory: AIProvider = { + createAgent: jest.fn().mockResolvedValue(runner), + } as unknown as AIProvider; + + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(mockFactory); + + const result = await RunnerFactory.createAgent(makeConfig('openai'), tools); + + expect(result).toBe(runner); + expect(mockFactory.createAgent).toHaveBeenCalledWith( + expect.objectContaining({ enabled: true }), + tools, + ); + }); + + it('returns undefined and warns when no provider supports createAgent', async () => { + const warnSpy = jest.fn(); + const logger = { warn: warnSpy, debug: jest.fn(), info: jest.fn(), error: jest.fn() }; + + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(undefined); + + const result = await RunnerFactory.createAgent(makeConfig('openai'), undefined, logger as any); + + expect(result).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('not supported')); + }); +}); + +// --------------------------------------------------------------------------- +// createAgentGraph +// --------------------------------------------------------------------------- + +describe('RunnerFactory.createAgentGraph', () => { + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('delegates to factory.createAgentGraph with graphDef and tools', async () => { + const graphRunner = makeGraphRunner(); + const tools: ToolRegistry = { search: jest.fn() }; + const graphDef = {} as any; // AgentGraphDefinition shape not needed for this test + + const mockFactory: AIProvider = { + createAgentGraph: jest.fn().mockResolvedValue(graphRunner), + } as unknown as AIProvider; + + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(mockFactory); + + const result = await RunnerFactory.createAgentGraph(graphDef, tools); + + expect(result).toBe(graphRunner); + expect(mockFactory.createAgentGraph).toHaveBeenCalledWith(graphDef, tools); + }); + + it('returns undefined and warns when no provider supports createAgentGraph', async () => { + const warnSpy = jest.fn(); + const logger = { warn: warnSpy, debug: jest.fn(), info: jest.fn(), error: jest.fn() }; + const graphDef = {} as any; + + jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(undefined); + + const result = await RunnerFactory.createAgentGraph( + graphDef, + undefined, + logger as any, + ); + + expect(result).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('AgentGraphRunner')); + }); +}); + +// --------------------------------------------------------------------------- +// AIProvider default factory method implementations +// --------------------------------------------------------------------------- + +describe('AIProvider default factory methods', () => { + class ConcreteProvider extends AIProvider {} + + it('createModel returns undefined by default', async () => { + const provider = new ConcreteProvider(); + const result = await provider.createModel(makeConfig('openai')); + expect(result).toBeUndefined(); + }); + + it('createAgent returns undefined by default', async () => { + const provider = new ConcreteProvider(); + const result = await provider.createAgent(makeConfig('openai')); + expect(result).toBeUndefined(); + }); + + it('createAgentGraph returns undefined by default', async () => { + const provider = new ConcreteProvider(); + const result = await provider.createAgentGraph({} as any); + expect(result).toBeUndefined(); + }); + + it('createModel warns when not overridden', async () => { + const warnSpy = jest.fn(); + const provider = new ConcreteProvider({ warn: warnSpy } as any); + await provider.createModel(makeConfig('openai')); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('createModel not implemented')); + }); + + it('createAgent warns when not overridden', async () => { + const warnSpy = jest.fn(); + const provider = new ConcreteProvider({ warn: warnSpy } as any); + await provider.createAgent(makeConfig('openai')); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('createAgent not implemented')); + }); + + it('createAgentGraph warns when not overridden', async () => { + const warnSpy = jest.fn(); + const provider = new ConcreteProvider({ warn: warnSpy } as any); + await provider.createAgentGraph({} as any); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('createAgentGraph not implemented')); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts deleted file mode 100644 index 75681b0f83..0000000000 --- a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts +++ /dev/null @@ -1,231 +0,0 @@ -import { TrackedChat } from '../src/api/chat/TrackedChat'; -import { ChatResponse } from '../src/api/chat/types'; -import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; -import { LDAICompletionConfig, LDMessage } from '../src/api/config/types'; -import { AIProvider } from '../src/api/providers/AIProvider'; - -describe('TrackedChat', () => { - let mockProvider: jest.Mocked; - let mockTracker: jest.Mocked; - let aiConfig: LDAICompletionConfig; - - beforeEach(() => { - // Mock the AIProvider - mockProvider = { - invokeModel: jest.fn(), - } as any; - - // Mock the LDAIConfigTracker - mockTracker = { - trackMetricsOf: jest.fn(), - trackDuration: jest.fn(), - trackTokens: jest.fn(), - trackSuccess: jest.fn(), - trackError: jest.fn(), - trackFeedback: jest.fn(), - trackTimeToFirstToken: jest.fn(), - trackDurationOf: jest.fn(), - trackOpenAIMetrics: jest.fn(), - trackBedrockConverseMetrics: jest.fn(), - trackVercelAIMetrics: jest.fn(), - getSummary: jest.fn(), - } as any; - - // Create a basic AI config - aiConfig = { - key: 'test-config', - enabled: true, - messages: [{ role: 'system', content: 'You are a helpful assistant.' }], - model: { name: 'gpt-4' }, - provider: { name: 'openai' }, - createTracker: () => mockTracker, - }; - }); - - describe('appendMessages', () => { - it('appends messages to the conversation history', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - const messagesToAppend: LDMessage[] = [ - { role: 'user', content: 'Hello' }, - { role: 'assistant', content: 'Hi there!' }, - ]; - - chat.appendMessages(messagesToAppend); - - const messages = chat.getMessages(false); - expect(messages).toHaveLength(2); - expect(messages[0]).toEqual({ role: 'user', content: 'Hello' }); - expect(messages[1]).toEqual({ role: 'assistant', content: 'Hi there!' }); - }); - - it('appends multiple message batches sequentially', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([{ role: 'user', content: 'First message' }]); - chat.appendMessages([{ role: 'assistant', content: 'Second message' }]); - chat.appendMessages([{ role: 'user', content: 'Third message' }]); - - const messages = chat.getMessages(false); - expect(messages).toHaveLength(3); - expect(messages[0].content).toBe('First message'); - expect(messages[1].content).toBe('Second message'); - expect(messages[2].content).toBe('Third message'); - }); - - it('handles empty message array', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([]); - - const messages = chat.getMessages(false); - expect(messages).toHaveLength(0); - }); - }); - - describe('getMessages', () => { - it('returns only conversation history when includeConfigMessages is false', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([ - { role: 'user', content: 'User message' }, - { role: 'assistant', content: 'Assistant message' }, - ]); - - const messages = chat.getMessages(false); - - expect(messages).toHaveLength(2); - expect(messages[0]).toEqual({ role: 'user', content: 'User message' }); - expect(messages[1]).toEqual({ role: 'assistant', content: 'Assistant message' }); - }); - - it('returns only conversation history when includeConfigMessages is omitted (defaults to false)', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([{ role: 'user', content: 'User message' }]); - - const messages = chat.getMessages(); - - expect(messages).toHaveLength(1); - expect(messages[0]).toEqual({ role: 'user', content: 'User message' }); - }); - - it('returns config messages prepended when includeConfigMessages is true', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([ - { role: 'user', content: 'User message' }, - { role: 'assistant', content: 'Assistant message' }, - ]); - - const messages = chat.getMessages(true); - - expect(messages).toHaveLength(3); - expect(messages[0]).toEqual({ role: 'system', content: 'You are a helpful assistant.' }); - expect(messages[1]).toEqual({ role: 'user', content: 'User message' }); - expect(messages[2]).toEqual({ role: 'assistant', content: 'Assistant message' }); - }); - - it('returns only config messages when no conversation history exists and includeConfigMessages is true', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - const messages = chat.getMessages(true); - - expect(messages).toHaveLength(1); - expect(messages[0]).toEqual({ role: 'system', content: 'You are a helpful assistant.' }); - }); - - it('returns empty array when no messages exist and includeConfigMessages is false', () => { - const configWithoutMessages: LDAICompletionConfig = { - ...aiConfig, - messages: [], - }; - const chat = new TrackedChat(configWithoutMessages, mockProvider); - - const messages = chat.getMessages(false); - - expect(messages).toHaveLength(0); - }); - - it('returns a copy of the messages array (not a reference)', () => { - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([{ role: 'user', content: 'Original message' }]); - - const messages1 = chat.getMessages(); - const messages2 = chat.getMessages(); - - expect(messages1).not.toBe(messages2); - expect(messages1).toEqual(messages2); - - // Modifying returned array should not affect internal state - messages1.push({ role: 'assistant', content: 'Modified' }); - - const messages3 = chat.getMessages(); - expect(messages3).toHaveLength(1); - expect(messages3[0].content).toBe('Original message'); - }); - - it('handles undefined config messages gracefully', () => { - const configWithoutMessages: LDAICompletionConfig = { - ...aiConfig, - messages: undefined, - }; - const chat = new TrackedChat(configWithoutMessages, mockProvider); - - chat.appendMessages([{ role: 'user', content: 'User message' }]); - - const messagesWithConfig = chat.getMessages(true); - expect(messagesWithConfig).toHaveLength(1); - expect(messagesWithConfig[0].content).toBe('User message'); - - const messagesWithoutConfig = chat.getMessages(false); - expect(messagesWithoutConfig).toHaveLength(1); - expect(messagesWithoutConfig[0].content).toBe('User message'); - }); - }); - - describe('integration with invoke', () => { - it('adds messages from invoke to history accessible via getMessages', async () => { - const mockResponse: ChatResponse = { - message: { role: 'assistant', content: 'Response from model' }, - metrics: { success: true }, - }; - - mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - - mockProvider.invokeModel.mockResolvedValue(mockResponse); - - const chat = new TrackedChat(aiConfig, mockProvider); - - await chat.invoke('Hello'); - - const messages = chat.getMessages(false); - expect(messages).toHaveLength(2); - expect(messages[0]).toEqual({ role: 'user', content: 'Hello' }); - expect(messages[1]).toEqual({ role: 'assistant', content: 'Response from model' }); - }); - - it('preserves appended messages when invoking', async () => { - const mockResponse: ChatResponse = { - message: { role: 'assistant', content: 'Response' }, - metrics: { success: true }, - }; - - mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); - - mockProvider.invokeModel.mockResolvedValue(mockResponse); - - const chat = new TrackedChat(aiConfig, mockProvider); - - chat.appendMessages([{ role: 'user', content: 'Pre-appended message' }]); - await chat.invoke('New user input'); - - const messages = chat.getMessages(false); - expect(messages).toHaveLength(3); - expect(messages[0].content).toBe('Pre-appended message'); - expect(messages[1].content).toBe('New user input'); - expect(messages[2].content).toBe('Response'); - }); - }); -}); diff --git a/packages/sdk/server-ai/examples/chat-judge/src/index.ts b/packages/sdk/server-ai/examples/chat-judge/src/index.ts index 9145081cf6..e74877aa48 100644 --- a/packages/sdk/server-ai/examples/chat-judge/src/index.ts +++ b/packages/sdk/server-ai/examples/chat-judge/src/index.ts @@ -43,11 +43,11 @@ async function main() { enabled: false, }; - const chat = await aiClient.createChat(aiConfigKey, context, defaultValue, { + const model = await aiClient.createModel(aiConfigKey, context, defaultValue, { companyName: 'LaunchDarkly', }); - if (!chat) { + if (!model) { console.log('*** AI chat configuration is not enabled'); process.exit(0); } @@ -56,15 +56,14 @@ async function main() { const userInput = 'How can LaunchDarkly help me?'; console.log('User Input:', userInput); - // The invoke method will automatically evaluate the chat response with any judges defined - // in the AI config. - const chatResponse = await chat.invoke(userInput); - console.log('Chat Response:', chatResponse.message.content); + // The run() method invokes the model and returns a ManagedResult. + const result = await model.run(userInput); + console.log('Chat Response:', result.content); // Judge evaluations run asynchronously and do not block your application. // Results are automatically sent to LaunchDarkly for AI config metrics. // You only need to await if you want to access the evaluation results in your code. - const evalResults = await chatResponse.evaluations; + const evalResults = await result.evaluations; console.log('Judge results:', JSON.stringify(evalResults, null, 2)); console.log('Success.'); diff --git a/packages/sdk/server-ai/examples/chat-observability/src/index.ts b/packages/sdk/server-ai/examples/chat-observability/src/index.ts index d7f5818597..b4f25e703b 100644 --- a/packages/sdk/server-ai/examples/chat-observability/src/index.ts +++ b/packages/sdk/server-ai/examples/chat-observability/src/index.ts @@ -47,12 +47,12 @@ async function main() { // provider: { name: 'openai' }, // messages: [...] // }; - // const chat = await aiClient.createChat(aiConfigKey, context, defaultValue, { example_type: 'observability_demo' }); - const chat = await aiClient.createChat(aiConfigKey, context, undefined, { + // const model = await aiClient.createModel(aiConfigKey, context, defaultValue, { example_type: 'observability_demo' }); + const model = await aiClient.createModel(aiConfigKey, context, undefined, { example_type: 'observability_demo', }); - if (!chat) { + if (!model) { console.log('*** AI chat configuration is not enabled'); ldClient.close(); process.exit(0); @@ -62,8 +62,8 @@ async function main() { const userInput = 'What is feature flagging in 2 sentences?'; console.log('User Input:', userInput); - const response = await chat.invoke(userInput); - console.log('Chat Response:', response.message.content); + const result = await model.run(userInput); + console.log('Chat Response:', result.content); console.log('\nSuccess.'); } catch (err) { diff --git a/packages/sdk/server-ai/examples/tracked-chat/src/index.ts b/packages/sdk/server-ai/examples/tracked-chat/src/index.ts index fd350e068c..f3b200ea2d 100644 --- a/packages/sdk/server-ai/examples/tracked-chat/src/index.ts +++ b/packages/sdk/server-ai/examples/tracked-chat/src/index.ts @@ -46,12 +46,12 @@ async function main() { // provider: { name: 'openai' }, // messages: [...] // }; - // const chat = await aiClient.createChat(aiConfigKey, context, defaultValue, { companyName: 'LaunchDarkly' }); - const chat = await aiClient.createChat(aiConfigKey, context, undefined, { + // const model = await aiClient.createModel(aiConfigKey, context, defaultValue, { companyName: 'LaunchDarkly' }); + const model = await aiClient.createModel(aiConfigKey, context, undefined, { companyName: 'LaunchDarkly', }); - if (!chat) { + if (!model) { console.log('*** AI chat configuration is not enabled'); process.exit(0); } @@ -62,9 +62,9 @@ async function main() { const userInput = 'Hello! Can you help me understand how your company can help me?'; console.log('User Input:', userInput); - const response = await chat.invoke(userInput); + const result = await model.run(userInput); - console.log('AI Response:', response.message.content); + console.log('AI Response:', result.content); console.log('Success.'); } catch (err) { diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index 16248fe564..d4ad021b4d 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -3,7 +3,7 @@ import { randomUUID } from 'node:crypto'; import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { TrackedChat } from './api/chat'; +import { ManagedModel } from './api/ManagedModel'; import { LDAIAgentConfig, LDAIAgentConfigDefault, @@ -25,7 +25,7 @@ import { AgentGraphDefinition, LDAgentGraphFlagValue, LDGraphTracker } from './a import { Evaluator } from './api/judge/Evaluator'; import { Judge } from './api/judge/Judge'; import { LDAIClient } from './api/LDAIClient'; -import { AIProviderFactory, SupportedAIProvider } from './api/providers'; +import { RunnerFactory, SupportedAIProvider } from './api/providers'; import { LDAIConfigTrackerImpl } from './LDAIConfigTrackerImpl'; import { LDClientMin } from './LDClientMin'; import { LDGraphTrackerImpl } from './LDGraphTrackerImpl'; @@ -79,21 +79,12 @@ export class LDAIClientImpl implements LDAIClient { mode: LDAIConfigMode, variables?: Record, graphKey?: string, + defaultAiProvider?: SupportedAIProvider, ): Promise { const ldFlagValue = LDAIConfigUtils.toFlagValue(defaultValue, mode); const value: LDAIConfigFlagValue = await this._ldClient.variation(key, context, ldFlagValue); - // Validate mode match - // eslint-disable-next-line no-underscore-dangle - const flagMode = value._ldMeta?.mode ?? 'completion'; - if (flagMode !== mode) { - this._logger?.warn( - `AI Config mode mismatch for ${key}: expected ${mode}, got ${flagMode}. Returning disabled config.`, - ); - return LDAIConfigUtils.createDisabledConfig(key, mode); - } - const trackerFactory = () => new LDAIConfigTrackerImpl( this._ldClient, @@ -109,7 +100,28 @@ export class LDAIClientImpl implements LDAIClient { graphKey, ); - const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory); + // Validate mode match + // eslint-disable-next-line no-underscore-dangle + const flagMode = value._ldMeta?.mode ?? 'completion'; + let evaluator = Evaluator.noop(); + + if (flagMode !== mode) { + this._logger?.warn( + `AI Config mode mismatch for ${key}: expected ${mode}, got ${flagMode}. Returning disabled config.`, + ); + return LDAIConfigUtils.createDisabledConfig(key, mode, trackerFactory, evaluator); + } + + if (flagMode !== 'judge') { + evaluator = await this._buildEvaluator( + value.judgeConfiguration?.judges ?? [], + context, + variables, + defaultAiProvider, + ); + } + + const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory, evaluator); // Apply variable interpolation (always needed for ldctx) return this._applyInterpolation(config, context, variables); @@ -175,9 +187,17 @@ export class LDAIClientImpl implements LDAIClient { context: LDContext, defaultValue: LDAICompletionConfigDefault, variables?: Record, + defaultAiProvider?: SupportedAIProvider, ): Promise { - const config = await this._evaluate(key, context, defaultValue, 'completion', variables); - return config as LDAICompletionConfig; + return (await this._evaluate( + key, + context, + defaultValue, + 'completion', + variables, + undefined, + defaultAiProvider, + )) as LDAICompletionConfig; } async completionConfig( @@ -185,9 +205,16 @@ export class LDAIClientImpl implements LDAIClient { context: LDContext, defaultValue?: LDAICompletionConfigDefault, variables?: Record, + defaultAiProvider?: SupportedAIProvider, ): Promise { this._ldClient.track(TRACK_USAGE_COMPLETION_CONFIG, context, key, 1); - return this._completionConfig(key, context, defaultValue ?? disabledAIConfig, variables); + return this._completionConfig( + key, + context, + defaultValue ?? disabledAIConfig, + variables, + defaultAiProvider, + ); } /** @@ -228,9 +255,17 @@ export class LDAIClientImpl implements LDAIClient { defaultValue: LDAIAgentConfigDefault, variables?: Record, graphKey?: string, + defaultAiProvider?: SupportedAIProvider, ): Promise { - const config = await this._evaluate(key, context, defaultValue, 'agent', variables, graphKey); - return config as LDAIAgentConfig; + return (await this._evaluate( + key, + context, + defaultValue, + 'agent', + variables, + graphKey, + defaultAiProvider, + )) as LDAIAgentConfig; } async agentConfig( @@ -238,9 +273,17 @@ export class LDAIClientImpl implements LDAIClient { context: LDContext, defaultValue?: LDAIAgentConfigDefault, variables?: Record, + defaultAiProvider?: SupportedAIProvider, ): Promise { this._ldClient.track(TRACK_USAGE_AGENT_CONFIG, context, key, 1); - return this._agentConfig(key, context, defaultValue ?? disabledAIConfig, variables); + return this._agentConfig( + key, + context, + defaultValue ?? disabledAIConfig, + variables, + undefined, + defaultAiProvider, + ); } /** @@ -293,42 +336,17 @@ export class LDAIClientImpl implements LDAIClient { return this.agentConfigs(agentConfigs, context); } + /** + * @deprecated Use `createModel` instead. This method will be removed in a future version. + */ async createChat( key: string, context: LDContext, defaultValue?: LDAICompletionConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, - ): Promise { - this._ldClient.track(TRACK_USAGE_CREATE_CHAT, context, key, 1); - const config = await this._completionConfig( - key, - context, - defaultValue ?? disabledAIConfig, - variables, - ); - - if (!config.enabled) { - this._logger?.info(`Chat configuration is disabled: ${key}`); - return undefined; - } - - const provider = await AIProviderFactory.create(config, this._logger, defaultAiProvider); - if (!provider) { - return undefined; - } - - const evaluator = await this._buildEvaluator( - config.judgeConfiguration?.judges ?? [], - context, - variables, - defaultAiProvider, - ); - - // Attach the evaluator to the config for use by the managed layer - const configWithEvaluator: LDAICompletionConfig = { ...config, evaluator }; - - return new TrackedChat(configWithEvaluator, provider, {}, this._logger); + ): Promise { + return this.createModel(key, context, defaultValue, variables, defaultAiProvider); } async createJudge( @@ -389,20 +407,49 @@ export class LDAIClientImpl implements LDAIClient { return undefined; } - const provider = await AIProviderFactory.create(judgeConfig, this._logger, defaultAiProvider); - if (!provider) { + const runner = await RunnerFactory.createModel(judgeConfig, this._logger, defaultAiProvider); + if (!runner) { return undefined; } - return new Judge(judgeConfig, provider, sampleRate, this._logger); + return new Judge(judgeConfig, runner, sampleRate, this._logger); } catch (error) { this._logger?.error(`Failed to initialize judge ${key}:`, error); return undefined; } } + async createModel( + key: string, + context: LDContext, + defaultValue?: LDAICompletionConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + this._ldClient.track(TRACK_USAGE_CREATE_CHAT, context, key, 1); + const config = await this._completionConfig( + key, + context, + defaultValue ?? disabledAIConfig, + variables, + defaultAiProvider, + ); + + if (!config.enabled) { + this._logger?.info(`Completion configuration is disabled: ${key}`); + return undefined; + } + + const runner = await RunnerFactory.createModel(config, this._logger, defaultAiProvider); + if (!runner) { + return undefined; + } + + return new ManagedModel(config, runner, this._logger); + } + /** - * @deprecated Use `createChat` instead. This method will be removed in a future version. + * @deprecated Use `createModel` instead. This method will be removed in a future version. */ async initChat( key: string, @@ -410,8 +457,8 @@ export class LDAIClientImpl implements LDAIClient { defaultValue?: LDAICompletionConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, - ): Promise { - return this.createChat(key, context, defaultValue, variables, defaultAiProvider); + ): Promise { + return this.createModel(key, context, defaultValue, variables, defaultAiProvider); } createTracker(token: string, context: LDContext): LDAIConfigTracker { diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index d2f4265371..97957e6cf2 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -1,7 +1,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; import { LDAIConfigTracker } from './api/config'; -import { LDAIMetricSummary } from './api/config/LDAIConfigTracker'; +import { LDAIMetricSummary } from './api/model/types'; import { LDJudgeResult } from './api/judge/types'; import { createBedrockTokenUsage, @@ -26,7 +26,9 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { private _providerName: string, private _context: LDContext, private _graphKey?: string, - ) {} + ) { + this._trackedMetrics.resumptionToken = this.resumptionToken; + } getTrackData(): { runId: string; @@ -132,6 +134,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackToolCall(toolKey: string): void { + if (!this._trackedMetrics.toolCalls) { + this._trackedMetrics.toolCalls = []; + } + this._trackedMetrics.toolCalls.push(toolKey); this._ldClient.track('$ld:ai:tool_call', this._context, { ...this.getTrackData(), toolKey }, 1); } @@ -206,6 +212,11 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { this.trackTokens(metrics.usage); } + // Track tool calls if available + if (metrics.toolCalls?.length) { + this.trackToolCalls(metrics.toolCalls); + } + return result; } @@ -252,6 +263,11 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { if (metrics.usage) { this.trackTokens(metrics.usage); } + + // Track tool calls if available + if (metrics.toolCalls?.length) { + this.trackToolCalls(metrics.toolCalls); + } } catch (error) { // If metrics extraction fails, track error this.trackError(); diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts index f2e6c41670..dc33b4d839 100644 --- a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts @@ -1,7 +1,7 @@ import type { LDContext } from '@launchdarkly/js-server-sdk-common'; import type { LDGraphTracker } from './api/graph/LDGraphTracker'; -import type { LDGraphMetricSummary, LDGraphTrackData } from './api/graph/types'; +import type { LDAIGraphMetricSummary } from './api/graph/types'; import type { LDTokenUsage } from './api/metrics'; import type { LDClientMin } from './LDClientMin'; @@ -12,7 +12,7 @@ import type { LDClientMin } from './LDClientMin'; * {@link LDGraphTrackerImpl.fromResumptionToken}. */ export class LDGraphTrackerImpl implements LDGraphTracker { - private _summary: LDGraphMetricSummary = {}; + private _summary: LDAIGraphMetricSummary = {}; constructor( private readonly _ldClient: LDClientMin, @@ -21,7 +21,9 @@ export class LDGraphTrackerImpl implements LDGraphTracker { private readonly _variationKey: string | undefined, private readonly _version: number, private readonly _context: LDContext, - ) {} + ) { + this._summary.resumptionToken = this.resumptionToken; + } /** * Reconstructs an {@link LDGraphTrackerImpl} from a resumption token, preserving @@ -40,7 +42,7 @@ export class LDGraphTrackerImpl implements LDGraphTracker { context: LDContext, ): LDGraphTrackerImpl { const json = Buffer.from(token, 'base64url').toString('utf8'); - const data = JSON.parse(json) as LDGraphTrackData; + const data = JSON.parse(json); return new LDGraphTrackerImpl( ldClient, data.runId, @@ -51,19 +53,21 @@ export class LDGraphTrackerImpl implements LDGraphTracker { ); } - getTrackData(): LDGraphTrackData { - const data: LDGraphTrackData = { + getTrackData(): { + runId: string; + graphKey: string; + variationKey?: string; + version: number; + } { + return { runId: this._runId, graphKey: this._graphKey, version: this._version, + ...(this._variationKey !== undefined ? { variationKey: this._variationKey } : {}), }; - if (this._variationKey !== undefined) { - data.variationKey = this._variationKey; - } - return data; } - getSummary(): LDGraphMetricSummary { + getSummary(): LDAIGraphMetricSummary { return { ...this._summary }; } diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index fa8170e0eb..8053fc3c77 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -1,6 +1,6 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; -import { TrackedChat } from './chat'; +import { ManagedModel } from './ManagedModel'; import { LDAIAgentConfig, LDAIAgentConfigDefault, @@ -77,6 +77,7 @@ export interface LDAIClient { context: LDContext, defaultValue?: LDAICompletionConfigDefault, variables?: Record, + defaultAiProvider?: SupportedAIProvider, ): Promise; /** @@ -128,6 +129,7 @@ export interface LDAIClient { context: LDContext, defaultValue?: LDAIAgentConfigDefault, variables?: Record, + defaultAiProvider?: SupportedAIProvider, ): Promise; /** @@ -234,10 +236,9 @@ export interface LDAIClient { ): Promise>; /** - * Returns a TrackedChat instance for chat interactions. - * This method serves as the primary entry point for creating TrackedChat instances from configuration. + * Creates and returns a new ManagedModel instance for LLM model interactions. * - * @param key The key identifying the AI chat configuration to use. + * @param key The key identifying the AI completion configuration to use. * @param context The standard LDContext used when evaluating flags. * @param defaultValue Optional fallback when the configuration is not available from LaunchDarkly. * When omitted or null, a disabled default is used. @@ -245,7 +246,7 @@ export interface LDAIClient { * The variables will also be used for judge evaluation. For the judge only, the variables * `message_history` and `response_to_evaluate` are reserved and will be ignored. * @param defaultAiProvider Optional default AI provider to use. - * @returns A promise that resolves to the TrackedChat instance, or null if the configuration is disabled. + * @returns A promise that resolves to the ManagedModel instance, or undefined if the configuration is disabled. * * @example * ``` @@ -261,23 +262,34 @@ export interface LDAIClient { * }; * const variables = { customerName: 'John' }; * - * const chat = await client.createChat(key, context, defaultValue, variables); - * if (chat) { - * const response = await chat.invoke("I need help with my order"); - * console.log(response.message.content); + * const model = await client.createModel(key, context, defaultValue, variables); + * if (model) { + * const result = await model.run("I need help with my order"); + * console.log(result.content); * } * ``` */ + createModel( + key: string, + context: LDContext, + defaultValue?: LDAICompletionConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise; + + /** + * @deprecated Use `createModel` instead. This method will be removed in a future version. + */ createChat( key: string, context: LDContext, defaultValue?: LDAICompletionConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, - ): Promise; + ): Promise; /** - * @deprecated Use `createChat` instead. This method will be removed in a future version. + * @deprecated Use `createModel` instead. This method will be removed in a future version. */ initChat( key: string, @@ -285,7 +297,7 @@ export interface LDAIClient { defaultValue?: LDAICompletionConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, - ): Promise; + ): Promise; /** * Creates and returns a new Judge instance for AI evaluation. diff --git a/packages/sdk/server-ai/src/api/ManagedModel.ts b/packages/sdk/server-ai/src/api/ManagedModel.ts new file mode 100644 index 0000000000..7ac068afe7 --- /dev/null +++ b/packages/sdk/server-ai/src/api/ManagedModel.ts @@ -0,0 +1,63 @@ +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + +import { LDAICompletionConfig } from './config/types'; +import { LDJudgeResult } from './judge/types'; +import { ManagedResult, RunnerResult } from './model/types'; +import { Runner } from './providers/Runner'; + +/** + * ManagedModel provides chat-completion invocation with automatic tracking and + * (in a future PR) automatic judge evaluation. + * + * The class is stateless: each `run()` call sends the prompt directly to the + * underlying `Runner` and returns a `ManagedResult`. Conversation history, + * if any, must be managed by the caller (or by the Runner implementation). + * + * Obtain an instance via `LDAIClient.createModel()`. + */ +export class ManagedModel { + constructor( + protected readonly aiConfig: LDAICompletionConfig, + protected readonly runner: Runner, + private readonly _logger?: LDLogger, + ) {} + + /** + * Invoke the model with a prompt string and return a ManagedResult. + * + * `run()` resolves before `ManagedResult.evaluations` resolves. Awaiting + * `evaluations` guarantees both judge evaluation and tracker.trackJudgeResult() + * are complete. + * + * @param prompt The user input to send to the model. + * @returns Promise resolving to ManagedResult (before evaluations settle). + */ + async run(prompt: string): Promise { + const tracker = this.aiConfig.createTracker(); + + const result = await tracker.trackMetricsOf( + (r: RunnerResult) => r.metrics, + () => this.runner.run(prompt), + ); + + const metrics = tracker.getSummary(); + + // Evaluations are wired in a follow-up PR. For now, resolve empty. + const evaluations: Promise = Promise.resolve([]); + + return { + content: result.content, + metrics, + raw: result.raw, + parsed: result.parsed, + evaluations, + }; + } + + /** + * Get the underlying AI configuration used to initialize this ManagedModel. + */ + getConfig(): LDAICompletionConfig { + return this.aiConfig; + } +} diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts deleted file mode 100644 index 2d5b21a85f..0000000000 --- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts +++ /dev/null @@ -1,167 +0,0 @@ -import { LDLogger } from '@launchdarkly/js-server-sdk-common'; - -import { LDAICompletionConfig, LDMessage } from '../config/types'; -import { Judge } from '../judge/Judge'; -import { LDJudgeResult } from '../judge/types'; -import { AIProvider } from '../providers/AIProvider'; -import { ChatResponse } from './types'; - -/** - * Concrete implementation of TrackedChat that provides chat functionality - * by delegating to an AIProvider implementation. - * This class handles conversation management and tracking, while delegating - * the actual model invocation to the provider. - */ -export class TrackedChat { - protected messages: LDMessage[]; - - constructor( - protected readonly aiConfig: LDAICompletionConfig, - protected readonly provider: AIProvider, - protected readonly judges: Record = {}, - private readonly _logger?: LDLogger, - ) { - this.messages = []; - } - - /** - * Invoke the chat model with a prompt string. - * This method handles conversation management and tracking, delegating to the provider's invokeModel method. - */ - async invoke(prompt: string): Promise { - const tracker = this.aiConfig.createTracker!(); - - // Convert prompt string to LDMessage with role 'user' and add to conversation history - const userMessage: LDMessage = { - role: 'user', - content: prompt, - }; - this.messages.push(userMessage); - - // Prepend config messages to conversation history for model invocation - const configMessages = this.aiConfig.messages || []; - const allMessages = [...configMessages, ...this.messages]; - - // Delegate to provider-specific implementation with tracking - const response = await tracker.trackMetricsOf( - (result: ChatResponse) => result.metrics, - () => this.provider.invokeModel(allMessages), - ); - - if ( - this.aiConfig.judgeConfiguration?.judges && - this.aiConfig.judgeConfiguration.judges.length > 0 - ) { - response.evaluations = this._evaluateWithJudges(this.messages, response).then( - (evaluations) => { - evaluations.forEach((judgeResult) => { - tracker.trackJudgeResult(judgeResult); - }); - return evaluations; - }, - ); - } - - this.messages.push(response.message); - return response; - } - - /** - * Evaluates the response with all configured judges. - * Returns a promise that resolves to an array of evaluation results. - * - * @param messages Array of messages representing the conversation history - * @param response The AI response to be evaluated - * @returns Promise resolving to array of judge evaluation results - */ - private async _evaluateWithJudges( - messages: LDMessage[], - response: ChatResponse, - ): Promise { - const judgeConfigs = this.aiConfig.judgeConfiguration!.judges; - - // Start all judge evaluations in parallel - const evaluationPromises = judgeConfigs.map(async (judgeConfig) => { - const judge = this.judges[judgeConfig.key]; - if (!judge) { - this._logger?.warn( - `Judge configuration is not enabled for ${judgeConfig.key} in ${this.aiConfig.key}`, - ); - const result: LDJudgeResult = { - success: false, - sampled: true, - errorMessage: `Judge configuration is not enabled for ${judgeConfig.key}`, - }; - return result; - } - - return judge.evaluateMessages(messages, response, judgeConfig.samplingRate); - }); - - // ensure all evaluations complete even if some fail - const results = await Promise.allSettled(evaluationPromises); - - return results.map((settled) => { - if (settled.status === 'fulfilled') { - return settled.value; - } - const result: LDJudgeResult = { - success: false, - sampled: true, - errorMessage: 'Judge evaluation failed', - }; - return result; - }); - } - - /** - * Get the underlying AI configuration used to initialize this TrackedChat. - */ - getConfig(): LDAICompletionConfig { - return this.aiConfig; - } - - /** - * Get the underlying AI provider instance. - * This provides direct access to the provider for advanced use cases. - */ - getProvider(): AIProvider { - return this.provider; - } - - /** - * Get the judges associated with this TrackedChat. - * Returns a record of judge instances keyed by their configuration keys. - */ - getJudges(): Record { - return this.judges; - } - - /** - * Append messages to the conversation history. - * Adds messages to the conversation history without invoking the model, - * which is useful for managing multi-turn conversations or injecting context. - * - * @param messages Array of messages to append to the conversation history - */ - appendMessages(messages: LDMessage[]): void { - this.messages.push(...messages); - } - - /** - * Get all messages in the conversation history. - * - * @param includeConfigMessages Whether to include the config messages from the AIConfig. - * Defaults to false. - * @returns Array of messages. When includeConfigMessages is true, returns both config - * messages and conversation history with config messages prepended. When false, - * returns only the conversation history messages. - */ - getMessages(includeConfigMessages: boolean = false): LDMessage[] { - if (includeConfigMessages) { - const configMessages = this.aiConfig.messages || []; - return [...configMessages, ...this.messages]; - } - return [...this.messages]; - } -} diff --git a/packages/sdk/server-ai/src/api/chat/index.ts b/packages/sdk/server-ai/src/api/chat/index.ts index f7876298ea..fcb073fefc 100644 --- a/packages/sdk/server-ai/src/api/chat/index.ts +++ b/packages/sdk/server-ai/src/api/chat/index.ts @@ -1,2 +1 @@ export * from './types'; -export * from './TrackedChat'; diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index e0aff2c6b5..51c47e0292 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -1,35 +1,6 @@ import { LDJudgeResult } from '../judge/types'; import { LDAIMetrics, LDFeedbackKind, LDTokenUsage } from '../metrics'; - -/** - * Metrics which have been tracked. - */ -export interface LDAIMetricSummary { - /** - * The duration of generation. - */ - durationMs?: number; - - /** - * Information about token usage. - */ - tokens?: LDTokenUsage; - - /** - * Was generation successful. - */ - success?: boolean; - - /** - * Any sentiment about the generation. - */ - feedback?: { kind: LDFeedbackKind }; - - /** - * Time to first token for this generation. - */ - timeToFirstTokenMs?: number; -} +import { LDAIMetricSummary } from '../model/types'; /** * The LDAIConfigTracker is used to track various details about AI operations. diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts index 093598bb2f..6f77f54de4 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts @@ -1,3 +1,4 @@ +import { Evaluator } from '../judge/Evaluator'; import { LDAIConfigTracker } from './LDAIConfigTracker'; import { LDAIAgentConfig, @@ -90,12 +91,14 @@ export class LDAIConfigUtils { * @param key The configuration key * @param flagValue The flag value from LaunchDarkly * @param trackerFactory A factory function that creates a new tracker for each execution + * @param evaluator The evaluator to attach to completion and agent configs * @returns The appropriate AI configuration type */ static fromFlagValue( key: string, flagValue: LDAIConfigFlagValue, trackerFactory: () => LDAIConfigTracker, + evaluator: Evaluator, ): LDAIConfigKind { // Determine the actual mode from flag value // eslint-disable-next-line no-underscore-dangle @@ -103,34 +106,43 @@ export class LDAIConfigUtils { switch (flagValueMode) { case 'agent': - return this.toAgentConfig(key, flagValue, trackerFactory); + return this.toAgentConfig(key, flagValue, trackerFactory, evaluator); case 'judge': return this.toJudgeConfig(key, flagValue, trackerFactory); case 'completion': default: - return this.toCompletionConfig(key, flagValue, trackerFactory); + return this.toCompletionConfig(key, flagValue, trackerFactory, evaluator); } } /** * Creates a disabled configuration of the specified mode. * + * @param key The configuration key * @param mode The mode for the disabled config + * @param createTracker A factory function that creates a new tracker for each execution + * @param evaluator The evaluator to attach to completion and agent configs * @returns A disabled config of the appropriate type */ - static createDisabledConfig(key: string, mode: LDAIConfigMode): LDAIConfigKind { + static createDisabledConfig( + key: string, + mode: LDAIConfigMode, + createTracker: () => LDAIConfigTracker, + evaluator: Evaluator, + ): LDAIConfigKind { switch (mode) { case 'agent': return { key, enabled: false, - createTracker: undefined, + createTracker, + evaluator, } as LDAIAgentConfig; case 'judge': return { key, enabled: false, - createTracker: undefined, + createTracker, } as LDAIJudgeConfig; case 'completion': default: @@ -138,7 +150,8 @@ export class LDAIConfigUtils { return { key, enabled: false, - createTracker: undefined, + createTracker, + evaluator, } as LDAICompletionConfig; } } @@ -187,16 +200,19 @@ export class LDAIConfigUtils { * @param key The configuration key * @param flagValue The flag value from LaunchDarkly * @param trackerFactory A factory function that creates a new tracker for each execution + * @param evaluator The evaluator for this completion config * @returns A completion configuration */ static toCompletionConfig( key: string, flagValue: LDAIConfigFlagValue, trackerFactory: () => LDAIConfigTracker, + evaluator: Evaluator, ): LDAICompletionConfig { return { ...this._toBaseConfig(key, flagValue), createTracker: trackerFactory, + evaluator, messages: flagValue.messages, judgeConfiguration: flagValue.judgeConfiguration, tools: this._resolveTools(flagValue), @@ -209,16 +225,19 @@ export class LDAIConfigUtils { * @param key The configuration key * @param flagValue The flag value from LaunchDarkly * @param trackerFactory A factory function that creates a new tracker for each execution + * @param evaluator The evaluator for this agent config * @returns An agent configuration */ static toAgentConfig( key: string, flagValue: LDAIConfigFlagValue, trackerFactory: () => LDAIConfigTracker, + evaluator: Evaluator, ): LDAIAgentConfig { return { ...this._toBaseConfig(key, flagValue), createTracker: trackerFactory, + evaluator, instructions: flagValue.instructions, judgeConfiguration: flagValue.judgeConfiguration, tools: this._resolveTools(flagValue), diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts index 159cad8f83..d22e265593 100644 --- a/packages/sdk/server-ai/src/api/config/types.ts +++ b/packages/sdk/server-ai/src/api/config/types.ts @@ -126,7 +126,7 @@ export interface LDAIConfig extends Omit { * new tracker with a fresh runId. Use createTracker() at the start of each * execution to obtain a tracker, then use it to record metrics for that run. */ - createTracker?: () => LDAIConfigTracker; + createTracker: () => LDAIConfigTracker; } // ============================================================================ @@ -222,12 +222,12 @@ export interface LDAIAgentConfig extends LDAIConfig { */ tools?: { [toolName: string]: LDTool }; /** - * Evaluator for this agent config. Populated by createAgent. + * Evaluator for this agent config. Populated at evaluation time. * Not part of the flag value shape. * * @internal */ - evaluator?: Evaluator; + evaluator: Evaluator; } /** @@ -248,12 +248,12 @@ export interface LDAICompletionConfig extends LDAIConfig { */ tools?: { [toolName: string]: LDTool }; /** - * Evaluator for this completion config. Populated by createChat/createModel. + * Evaluator for this completion config. Populated at evaluation time. * Not part of the flag value shape. * * @internal */ - evaluator?: Evaluator; + evaluator: Evaluator; } /** diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts index 54f7defba6..b04c29f5df 100644 --- a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts +++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts @@ -1,5 +1,5 @@ import type { LDTokenUsage } from '../metrics'; -import type { LDGraphMetricSummary, LDGraphTrackData } from './types'; +import type { LDAIGraphMetricSummary } from './types'; /** * Tracks graph-level and edge-level metrics for an agent graph invocation. @@ -25,12 +25,17 @@ export interface LDGraphTracker { /** * Returns tracking metadata to be included in every LDClient.track call. */ - getTrackData(): LDGraphTrackData; + getTrackData(): { + runId: string; + graphKey: string; + variationKey?: string; + version: number; + }; /** * Returns a snapshot of all graph-level metrics tracked so far. */ - getSummary(): LDGraphMetricSummary; + getSummary(): LDAIGraphMetricSummary; /** * A URL-safe Base64-encoded (RFC 4648, no padding) token encoding the tracker's diff --git a/packages/sdk/server-ai/src/api/graph/types.ts b/packages/sdk/server-ai/src/api/graph/types.ts index 1b578fecba..1d19f88c9c 100644 --- a/packages/sdk/server-ai/src/api/graph/types.ts +++ b/packages/sdk/server-ai/src/api/graph/types.ts @@ -1,4 +1,4 @@ -import { LDTokenUsage } from '../metrics'; +import { LDAIMetrics, LDTokenUsage } from '../metrics'; /** * Represents a directed edge in an agent graph, connecting a source node to a target node. @@ -40,7 +40,7 @@ export interface LDAgentGraphFlagValue { /** * Accumulated graph-level metrics collected by an LDGraphTracker. */ -export interface LDGraphMetricSummary { +export interface LDAIGraphMetricSummary { /** * Whether the graph invocation succeeded. Absent if not yet tracked. */ @@ -60,29 +60,62 @@ export interface LDGraphMetricSummary { * Execution path through the graph as an array of config keys. Absent if not yet tracked. */ path?: string[]; + + /** + * Resumption token for deferred feedback association. + */ + resumptionToken?: string; } /** - * Tracking metadata returned by {@link LDGraphTracker.getTrackData}. + * Graph-level metrics for a completed graph run, as returned by a graph runner. + * Does NOT include handoffs or evaluations — those are managed-layer concerns. */ -export interface LDGraphTrackData { +export interface LDAIGraphMetrics { + /** + * Whether the graph invocation succeeded. + */ + success: boolean; + + /** + * Execution path through the graph as an ordered array of config keys. + */ + path: string[]; + + /** + * Total graph execution duration in milliseconds, if tracked. + */ + durationMs?: number; + + /** + * Aggregate token usage across the entire graph invocation, if available. + */ + usage?: LDTokenUsage; + /** - * UUID v4 uniquely identifying this tracker and all events it emits. + * Per-node metrics keyed by agent config key. */ - runId: string; + nodeMetrics: Record; +} +/** + * The result returned by a graph runner invocation (provider-level). + * Does NOT include evaluations or handoffs. + */ +export interface AgentGraphRunnerResult { /** - * The graph configuration key. + * The text content of the graph's final response. */ - graphKey: string; + content: string; /** - * The variation key. Absent when a default config was used rather than a real flag evaluation. + * Graph-level metrics for this invocation. */ - variationKey?: string; + metrics: LDAIGraphMetrics; /** - * The version of the flag variation. + * The raw response object from the provider, if available. */ - version: number; + raw?: unknown; } + diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts index 7470ef740c..e63b7b6e24 100644 --- a/packages/sdk/server-ai/src/api/index.ts +++ b/packages/sdk/server-ai/src/api/index.ts @@ -2,6 +2,8 @@ export * from './config'; export * from './chat'; export * from './graph'; export * from './judge'; +export * from './ManagedModel'; export * from './metrics'; +export * from './model'; export * from './LDAIClient'; export * from './providers'; diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index 820014ffaa..35624cad30 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -4,8 +4,9 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { ChatResponse } from '../chat/types'; import { LDAIJudgeConfig, LDMessage } from '../config/types'; -import { AIProvider } from '../providers/AIProvider'; -import { LDJudgeResult, StructuredResponse } from './types'; +import { RunnerResult } from '../model/types'; +import { Runner } from '../providers/Runner'; +import { LDJudgeResult } from './types'; const EVALUATION_SCHEMA = { type: 'object', @@ -36,7 +37,7 @@ export class Judge { constructor( private readonly _aiConfig: LDAIJudgeConfig, - private readonly _aiProvider: AIProvider, + private readonly _runner: Runner, private readonly _sampleRate: number = 1.0, logger?: LDLogger, ) { @@ -121,15 +122,15 @@ export class Judge { const messages = this._constructEvaluationMessages(input, output); const response = await tracker.trackMetricsOf( - (r: StructuredResponse) => r.metrics, - () => this._aiProvider.invokeStructuredModel(messages, EVALUATION_SCHEMA), + (r: RunnerResult) => r.metrics, + () => this._runner.run(messages, EVALUATION_SCHEMA), ); - const evalResult = this._parseEvaluationResponse(response.data); + const evalResult = this._parseEvaluationResponse(response.parsed); if (!evalResult) { this._logger?.warn( - `Could not parse evaluation response: ${JSON.stringify(response.data)}`, + `Could not parse evaluation response: ${JSON.stringify(response.parsed)}`, tracker.getTrackData(), ); return result; @@ -178,10 +179,10 @@ export class Judge { } /** - * Returns the AI provider used by this judge. + * Returns the runner used by this judge. */ - getProvider(): AIProvider { - return this._aiProvider; + getRunner(): Runner { + return this._runner; } /** @@ -211,7 +212,7 @@ export class Judge { * Returns score and reasoning, or undefined if parsing fails. */ private _parseEvaluationResponse( - data: Record, + data: Record | undefined, ): { score: number; reasoning: string } | undefined { if (!data || typeof data !== 'object' || Array.isArray(data)) { return undefined; diff --git a/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts b/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts index 3b0fb99ec7..2d9f3a47d7 100644 --- a/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts +++ b/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts @@ -15,4 +15,16 @@ export interface LDAIMetrics { * This will be undefined if no token usage data is available. */ usage?: LDTokenUsage; + + /** + * List of tool call identifiers made during the operation. + * This will be undefined if no tool calls were made. + */ + toolCalls?: string[]; + + /** + * Duration of the operation in milliseconds. + * This will be undefined if duration was not tracked. + */ + durationMs?: number; } diff --git a/packages/sdk/server-ai/src/api/model/index.ts b/packages/sdk/server-ai/src/api/model/index.ts new file mode 100644 index 0000000000..3869bd6bcd --- /dev/null +++ b/packages/sdk/server-ai/src/api/model/index.ts @@ -0,0 +1 @@ +export type { LDAIMetricSummary, ManagedResult, RunnerResult } from './types'; diff --git a/packages/sdk/server-ai/src/api/model/types.ts b/packages/sdk/server-ai/src/api/model/types.ts new file mode 100644 index 0000000000..cb2b44531c --- /dev/null +++ b/packages/sdk/server-ai/src/api/model/types.ts @@ -0,0 +1,105 @@ +import { LDJudgeResult } from '../judge/types'; +import { LDAIMetrics, LDFeedbackKind } from '../metrics'; +import { LDTokenUsage } from '../metrics/LDTokenUsage'; + +/** + * Summary metrics returned in a ManagedResult or from LDAIConfigTracker.getSummary(). + * Provides a flat view of the key metrics for the completed operation. + */ +export interface LDAIMetricSummary { + /** + * Whether the AI operation was successful. + */ + success?: boolean; + + /** + * Token usage information, if available. + */ + tokens?: LDTokenUsage; + + /** + * List of tool call identifiers made during the operation, if any. + */ + toolCalls?: string[]; + + /** + * Duration of the operation in milliseconds, if tracked. + */ + durationMs?: number; + + /** + * Time to first token in milliseconds, if tracked. + */ + timeToFirstTokenMs?: number; + + /** + * User feedback sentiment for this generation, if provided. + */ + feedback?: { kind: LDFeedbackKind }; + + /** + * Resumption token for deferred feedback association. + */ + resumptionToken?: string; +} + +/** + * The result returned by a Runner (provider-level) invocation. + * Providers implement Runner and return RunnerResult from run(). + * This type does NOT include evaluations — those are wired in the managed layer. + */ +export interface RunnerResult { + /** + * The text content of the model's response. + */ + content: string; + + /** + * Metrics information for the operation. + */ + metrics: LDAIMetrics; + + /** + * The raw response object from the provider, if available. + */ + raw?: unknown; + + /** + * Parsed structured output, if the provider returned structured data. + */ + parsed?: Record; +} + +/** + * The result returned by a managed model invocation (ManagedModel.run()). + * Includes a promise for asynchronous judge evaluations. + */ +export interface ManagedResult { + /** + * The text content of the model's response. + */ + content: string; + + /** + * Summarized metrics for this invocation. + */ + metrics: LDAIMetricSummary; + + /** + * The raw response object from the provider, if available. + */ + raw?: unknown; + + /** + * Parsed structured output, if available. + */ + parsed?: Record; + + /** + * Promise that resolves to the judge evaluation results. + * This promise encapsulates both evaluation and tracking + * (tracker.trackJudgeResult is called when it resolves). + * Awaiting this promise guarantees both evaluation and tracking are complete. + */ + evaluations: Promise; +} diff --git a/packages/sdk/server-ai/src/api/providers/AIProvider.ts b/packages/sdk/server-ai/src/api/providers/AIProvider.ts index e83ea2a834..62a1fd55e5 100644 --- a/packages/sdk/server-ai/src/api/providers/AIProvider.ts +++ b/packages/sdk/server-ai/src/api/providers/AIProvider.ts @@ -2,7 +2,16 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { ChatResponse } from '../chat/types'; import { LDAIConfigKind, LDMessage } from '../config/types'; +import { AgentGraphDefinition } from '../graph/AgentGraphDefinition'; import { StructuredResponse } from '../judge/types'; +import { AgentGraphRunner, Runner } from './Runner'; + +/** + * A registry of callable tools keyed by tool name. + * Mirrors Python's `Dict[str, Callable]` — values are typically functions + * that the provider invokes when the model requests a tool call. + */ +export type ToolRegistry = Record unknown>; /** * Abstract base class for AI providers that implement chat model functionality. @@ -20,12 +29,11 @@ export abstract class AIProvider { } /** * Invoke the chat model with an array of messages. - * This method should convert messages to provider format, invoke the model, - * and return a ChatResponse with the result and metrics. * * Default implementation takes no action and returns a placeholder response. * Provider implementations should override this method. * + * @deprecated Use the `Runner` interface and its `run` method instead. * @param messages Array of LDMessage objects representing the conversation * @returns Promise that resolves to a ChatResponse containing the model's response */ @@ -49,12 +57,11 @@ export abstract class AIProvider { /** * Invoke the chat model with structured output support. - * This method should convert messages to provider format, invoke the model with - * structured output configuration, and return a structured response. * * Default implementation takes no action and returns a placeholder response. * Provider implementations should override this method. * + * @deprecated Use the `Runner` interface and its `run` method with `outputType` instead. * @param messages Array of LDMessage objects representing the conversation * @param responseStructure Dictionary of output configurations keyed by output name * @returns Promise that resolves to a structured response @@ -78,11 +85,74 @@ export abstract class AIProvider { }; } + // ============================================================================ + // Factory instance methods (Python AIProvider pattern) + // + // Provider packages override these to return a configured Runner for the + // relevant mode. The default implementations log a warning and return + // undefined, mirroring Python's base-class behaviour. + // ============================================================================ + + /** + * Create a Runner for a completion or judge AI Config. + * + * Override in provider subclasses to return a configured {@link Runner}. + * Default implementation logs a warning and returns `undefined`. + * + * @param config The completion or judge AI configuration. + * @returns Promise resolving to a {@link Runner}, or `undefined` if this + * provider does not support model creation. + */ + async createModel(_config: LDAIConfigKind): Promise { + this.logger?.warn('createModel not implemented by this provider'); + return undefined; + } + + /** + * Create a Runner for an agent AI Config. + * + * Override in provider subclasses to return a configured {@link Runner}. + * Default implementation logs a warning and returns `undefined`. + * + * @param config The agent AI configuration. + * @param tools Optional registry of callable tools. + * @returns Promise resolving to a {@link Runner}, or `undefined` if this + * provider does not support agent creation. + */ + async createAgent(_config: LDAIConfigKind, _tools?: ToolRegistry): Promise { + this.logger?.warn('createAgent not implemented by this provider'); + return undefined; + } + + /** + * Create an AgentGraphRunner for an agent graph definition. + * + * Override in provider subclasses to return a configured {@link AgentGraphRunner}. + * Default implementation logs a warning and returns `undefined`. + * + * @param graphDef The agent graph definition. + * @param tools Optional registry of callable tools. + * @returns Promise resolving to an {@link AgentGraphRunner}, or `undefined` if + * this provider does not support graph execution. + */ + async createAgentGraph( + _graphDef: AgentGraphDefinition, + _tools?: ToolRegistry, + ): Promise { + this.logger?.warn('createAgentGraph not implemented by this provider'); + return undefined; + } + + // ============================================================================ + // Legacy static factory (retained for backward compatibility) + // ============================================================================ + /** * Static method that constructs an instance of the provider. * Each provider implementation must provide their own static create method * that accepts an AIConfig and returns a configured instance. * + * @deprecated Use the `createModel` factory method instead. * @param aiConfig The LaunchDarkly AI configuration * @param logger Optional logger for the provider * @returns Promise that resolves to a configured provider instance diff --git a/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts b/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts deleted file mode 100644 index fd4764e0de..0000000000 --- a/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts +++ /dev/null @@ -1,132 +0,0 @@ -import { LDLogger } from '@launchdarkly/js-server-sdk-common'; - -import { LDAIConfigKind } from '../config/types'; -import { AIProvider } from './AIProvider'; - -/** - * List of supported AI providers. - */ -export const SUPPORTED_AI_PROVIDERS = [ - 'openai', - // Multi-provider packages should be last in the list - 'langchain', - 'vercel', -] as const; - -/** - * Type representing the supported AI providers. - */ -export type SupportedAIProvider = (typeof SUPPORTED_AI_PROVIDERS)[number]; - -/** - * Factory for creating AIProvider instances based on the provider configuration. - */ -export class AIProviderFactory { - /** - * Create an AIProvider instance based on the AI configuration. - * This method attempts to load provider-specific implementations dynamically. - * Returns undefined if the provider is not supported. - * - * @param aiConfig The AI configuration - * @param logger Optional logger for logging provider initialization - * @param defaultAiProvider Optional default AI provider to use - */ - static async create( - aiConfig: LDAIConfigKind, - logger?: LDLogger, - defaultAiProvider?: SupportedAIProvider, - ): Promise { - const providerName = aiConfig.provider?.name?.toLowerCase(); - // Determine which providers to try based on defaultAiProvider - const providersToTry = this._getProvidersToTry(defaultAiProvider, providerName); - - // Try each provider in order - for (const providerType of providersToTry) { - logger?.debug( - `Attempting to create AIProvider for: ${aiConfig.provider?.name} with provider type: ${providerType}`, - ); - // eslint-disable-next-line no-await-in-loop - const provider = await this._tryCreateProvider(providerType, aiConfig, logger); - if (provider) { - logger?.debug(`Successfully created AIProvider for: ${aiConfig.provider?.name}`); - return provider; - } - } - - // If no provider was successfully created, log a warning - logger?.warn( - `Provider is not supported or failed to initialize: ${aiConfig.provider?.name ?? 'unknown'}`, - ); - return undefined; - } - - /** - * Determine which providers to try based on defaultAiProvider and providerName. - */ - private static _getProvidersToTry( - defaultAiProvider?: SupportedAIProvider, - providerName?: string, - ): SupportedAIProvider[] { - // If defaultAiProvider is set, only try that specific provider - if (defaultAiProvider) { - return [defaultAiProvider]; - } - - // If no defaultAiProvider is set, try all providers in order - const providerSet = new Set(); - - // First try the specific provider if it's supported - if (providerName && SUPPORTED_AI_PROVIDERS.includes(providerName as SupportedAIProvider)) { - providerSet.add(providerName as SupportedAIProvider); - } - - // Then try multi-provider packages, but avoid duplicates - const multiProviderPackages: SupportedAIProvider[] = ['langchain', 'vercel']; - multiProviderPackages.forEach((provider) => { - providerSet.add(provider); - }); - - return Array.from(providerSet); - } - - /** - * Try to create a provider of the specified type. - */ - private static async _tryCreateProvider( - providerType: SupportedAIProvider, - aiConfig: LDAIConfigKind, - logger?: LDLogger, - ): Promise { - try { - let module; - - switch (providerType) { - case 'openai': { - // eslint-disable-next-line import/no-extraneous-dependencies - module = await import('@launchdarkly/server-sdk-ai-openai' as any); - const provider = (await module.OpenAIProvider.create(aiConfig, logger)) as AIProvider; - return provider; - } - case 'langchain': { - // eslint-disable-next-line import/no-extraneous-dependencies - module = await import('@launchdarkly/server-sdk-ai-langchain' as any); - const provider = (await module.LangChainProvider.create(aiConfig, logger)) as AIProvider; - return provider; - } - case 'vercel': { - // eslint-disable-next-line import/no-extraneous-dependencies - module = await import('@launchdarkly/server-sdk-ai-vercel' as any); - const provider = (await module.VercelProvider.create(aiConfig, logger)) as AIProvider; - return provider; - } - default: - return undefined; - } - } catch (error: any) { - logger?.warn( - `Unable to create AIProvider. Check that you have installed the correct package. ${error.message}`, - ); - return undefined; - } - } -} diff --git a/packages/sdk/server-ai/src/api/providers/Runner.ts b/packages/sdk/server-ai/src/api/providers/Runner.ts new file mode 100644 index 0000000000..42b43cd8ce --- /dev/null +++ b/packages/sdk/server-ai/src/api/providers/Runner.ts @@ -0,0 +1,39 @@ +import { LDMessage } from '../config/types'; +import { AgentGraphRunnerResult } from '../graph/types'; +import { RunnerResult } from '../model/types'; + +/** + * Runner protocol for AI model providers. + * + * A single Runner interface covers completion, agent, and judge use cases. + * For structured output (e.g., judge evaluation), pass an `outputType` schema + * and access the parsed result via `RunnerResult.parsed`. + */ +export interface Runner { + /** + * Invoke the model with the given input. + * + * @param input The input to the model. For agents this is a string prompt; + * for model completions and judges this is an array of messages. + * @param outputType Optional JSON schema for structured output. When provided, + * the model should return structured data accessible via `RunnerResult.parsed`. + * @returns Promise resolving to a RunnerResult. + */ + run(input: string | LDMessage[], outputType?: Record): Promise; +} + +/** + * Runner protocol for agent graph providers. + * + * Providers implementing AgentGraphRunner can execute an entire agent graph + * and return a structured AgentGraphRunnerResult. + */ +export interface AgentGraphRunner { + /** + * Execute the agent graph with the given input. + * + * @param input The user input to process through the graph. + * @returns Promise resolving to an AgentGraphRunnerResult. + */ + run(input: string): Promise; +} diff --git a/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts b/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts new file mode 100644 index 0000000000..d02c79d445 --- /dev/null +++ b/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts @@ -0,0 +1,264 @@ +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + +import { LDAIConfigKind } from '../config/types'; +import { AgentGraphDefinition } from '../graph/AgentGraphDefinition'; +import { AIProvider, ToolRegistry } from './AIProvider'; +import { AgentGraphRunner, Runner } from './Runner'; + +/** + * List of supported AI providers. + */ +export const SUPPORTED_AI_PROVIDERS = [ + 'openai', + // Multi-provider packages should be last in the list + 'langchain', + 'vercel', +] as const; + +/** + * Type representing the supported AI providers. + */ +export type SupportedAIProvider = (typeof SUPPORTED_AI_PROVIDERS)[number]; + +/** + * Sole entry point for runner creation. + * + * RunnerFactory is the single factory for creating {@link Runner} and + * {@link AgentGraphRunner} instances. It mirrors the Python RunnerFactory + * pattern: it knows about supported provider packages, loads them dynamically + * via {@link _getProviderFactory}, and delegates creation to the factory + * instance methods on {@link AIProvider}. + * + * Provider packages implement {@link AIProvider} factory methods + * (`createModel`, `createAgent`, `createAgentGraph`). The legacy + * {@link AIProvider} abstract class is retained for backward compatibility, + * and the {@link _LegacyProviderAdapter} shim wraps packages that have not + * yet migrated to the new pattern. + */ +export class RunnerFactory { + /** + * Load and return the AIProvider factory for the given provider type. + * + * This is the single place in the codebase that knows provider package names. + * If the provider package exports the new `*RunnerFactory` class, it is + * instantiated directly. Otherwise a {@link _LegacyProviderAdapter} wrapping + * the old `static create()` class is returned to keep CI green during the + * transition. + * + * @param providerType One of the {@link SUPPORTED_AI_PROVIDERS} values. + * @param logger Optional logger forwarded to the provider factory. + * @returns A configured {@link AIProvider} instance, or `undefined` if the + * package cannot be loaded. + */ + private static async _getProviderFactory( + providerType: SupportedAIProvider, + logger?: LDLogger, + ): Promise { + try { + let module: any; + + switch (providerType) { + case 'openai': { + // eslint-disable-next-line import/no-extraneous-dependencies + module = await import('@launchdarkly/server-sdk-ai-openai' as any); + return new module.OpenAIRunnerFactory(logger) as AIProvider; + } + case 'langchain': { + // eslint-disable-next-line import/no-extraneous-dependencies + module = await import('@launchdarkly/server-sdk-ai-langchain' as any); + return new module.LangChainRunnerFactory(logger) as AIProvider; + } + case 'vercel': { + // eslint-disable-next-line import/no-extraneous-dependencies + module = await import('@launchdarkly/server-sdk-ai-vercel' as any); + return new module.VercelRunnerFactory(logger) as AIProvider; + } + default: + return undefined; + } + } catch (error: any) { + logger?.warn( + `Unable to load provider package. Check that you have installed the correct package. ${error.message}`, + ); + return undefined; + } + } + + /** + * Determine which providers to try based on defaultAiProvider and providerName. + * + * Mirrors Python's `_get_providers_to_try` helper. + */ + private static _getProvidersToTry( + defaultAiProvider?: SupportedAIProvider, + providerName?: string, + ): SupportedAIProvider[] { + // If defaultAiProvider is set, only try that specific provider + if (defaultAiProvider) { + return [defaultAiProvider]; + } + + const providerSet = new Set(); + + // First try the specific provider if it's supported + if (providerName && SUPPORTED_AI_PROVIDERS.includes(providerName as SupportedAIProvider)) { + providerSet.add(providerName as SupportedAIProvider); + } + + // Then try multi-provider packages as fallback, avoiding duplicates + const multiProviderPackages: SupportedAIProvider[] = ['langchain', 'vercel']; + multiProviderPackages.forEach((provider) => { + providerSet.add(provider); + }); + + return Array.from(providerSet); + } + + /** + * Try each provider in order and return the first non-undefined result. + * + * Mirrors Python's `_with_fallback` helper. Loads each provider factory via + * {@link _getProviderFactory} and calls `fn` with it. Returns the first + * truthy result, or `undefined` if no provider succeeds. + * + * @param providers Ordered list of provider types to try. + * @param fn Callback that calls the appropriate factory method on the provider. + * @param logger Optional logger forwarded to each provider factory. + */ + private static async _withFallback( + providers: SupportedAIProvider[], + fn: (factory: AIProvider) => Promise, + logger?: LDLogger, + ): Promise { + for (const providerType of providers) { + logger?.debug(`Attempting to create runner with provider: ${providerType}`); + // eslint-disable-next-line no-await-in-loop, no-underscore-dangle + const factory = await RunnerFactory._getProviderFactory(providerType, logger); + if (factory) { + // eslint-disable-next-line no-await-in-loop + const result = await fn(factory); + if (result) { + logger?.debug(`Successfully created runner with provider: ${providerType}`); + return result; + } + } + } + return undefined; + } + + /** + * Create a Runner for the given AI configuration. + * + * Suitable for completion, judge, and agent config modes. Dynamically + * loads the matching provider package via {@link _getProviderFactory} and + * delegates to its {@link AIProvider.createModel} method. + * + * @param config The AI configuration (completion, agent, or judge). + * @param logger Optional logger forwarded to the underlying provider. + * @param defaultAiProvider Optional provider override + * ('openai', 'langchain', 'vercel', …). When set, only that provider is + * tried. When omitted, providers are tried in priority order based on the + * provider name in the config. + * @returns A configured {@link Runner} ready to invoke the model, or + * `undefined` if no suitable provider could be loaded. + */ + static async createModel( + config: LDAIConfigKind, + logger?: LDLogger, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + const providerName = config.provider?.name?.toLowerCase(); + // eslint-disable-next-line no-underscore-dangle + const providers = RunnerFactory._getProvidersToTry(defaultAiProvider, providerName); + + // eslint-disable-next-line no-underscore-dangle + const runner = await RunnerFactory._withFallback( + providers, + (factory) => factory.createModel(config), + logger, + ); + + if (!runner) { + logger?.warn( + `Provider is not supported or failed to initialize: ${config.provider?.name ?? 'unknown'}`, + ); + } + + return runner; + } + + /** + * Create a Runner for an agent AI Config. + * + * Delegates to the provider factory's {@link AIProvider.createAgent} method. + * + * @param config The agent AI configuration. + * @param tools Optional registry of callable tools. + * @param logger Optional logger forwarded to the underlying provider. + * @param defaultAiProvider Optional provider override. + * @returns A configured {@link Runner}, or `undefined` if no suitable + * provider could be loaded. + */ + static async createAgent( + config: LDAIConfigKind, + tools?: ToolRegistry, + logger?: LDLogger, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + const providerName = config.provider?.name?.toLowerCase(); + // eslint-disable-next-line no-underscore-dangle + const providers = RunnerFactory._getProvidersToTry(defaultAiProvider, providerName); + + // eslint-disable-next-line no-underscore-dangle + const runner = await RunnerFactory._withFallback( + providers, + (factory) => factory.createAgent(config, tools), + logger, + ); + + if (!runner) { + logger?.warn( + `Provider is not supported or failed to initialize: ${config.provider?.name ?? 'unknown'}`, + ); + } + + return runner; + } + + /** + * Create an AgentGraphRunner for the given agent graph definition. + * + * Delegates to the provider factory's {@link AIProvider.createAgentGraph} method. + * + * @param graphDef The agent graph definition. + * @param tools Optional registry of callable tools. + * @param logger Optional logger forwarded to the underlying provider. + * @param defaultAiProvider Optional provider override. + * @returns A configured {@link AgentGraphRunner}, or `undefined` if no + * suitable provider could be loaded. + */ + static async createAgentGraph( + graphDef: AgentGraphDefinition, + tools?: ToolRegistry, + logger?: LDLogger, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + // AgentGraph does not have a per-node provider name at this level; + // fall back to all multi-provider packages unless overridden. + // eslint-disable-next-line no-underscore-dangle + const providers = RunnerFactory._getProvidersToTry(defaultAiProvider); + + // eslint-disable-next-line no-underscore-dangle + const runner = await RunnerFactory._withFallback( + providers, + (factory) => factory.createAgentGraph(graphDef, tools), + logger, + ); + + if (!runner) { + logger?.warn(`No provider could create an AgentGraphRunner for the given graph definition.`); + } + + return runner; + } +} diff --git a/packages/sdk/server-ai/src/api/providers/index.ts b/packages/sdk/server-ai/src/api/providers/index.ts index 5439dfa830..83a288c61c 100644 --- a/packages/sdk/server-ai/src/api/providers/index.ts +++ b/packages/sdk/server-ai/src/api/providers/index.ts @@ -1,2 +1,3 @@ export * from './AIProvider'; -export * from './AIProviderFactory'; +export * from './Runner'; +export * from './RunnerFactory'; From a2185281d8264f51e015d59d48045eb8b63b1439 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Mon, 4 May 2026 16:48:34 -0500 Subject: [PATCH 03/19] chore: wire evaluations tracking chain in ManagedModel.run() (#1333) --- .../server-ai/__tests__/ManagedModel.test.ts | 8 +- .../__tests__/ManagedModelRun.test.ts | 160 ++++++++++++++++++ .../server-ai/__tests__/RunnerFactory.test.ts | 9 +- .../sdk/server-ai/src/api/ManagedModel.ts | 20 ++- 4 files changed, 187 insertions(+), 10 deletions(-) create mode 100644 packages/sdk/server-ai/__tests__/ManagedModelRun.test.ts diff --git a/packages/sdk/server-ai/__tests__/ManagedModel.test.ts b/packages/sdk/server-ai/__tests__/ManagedModel.test.ts index 09158bd7ce..f7dc3ecd75 100644 --- a/packages/sdk/server-ai/__tests__/ManagedModel.test.ts +++ b/packages/sdk/server-ai/__tests__/ManagedModel.test.ts @@ -1,13 +1,13 @@ import { ManagedModel } from '../src/api/ManagedModel'; import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; import { LDAICompletionConfig } from '../src/api/config/types'; -import { Evaluator } from '../src/api/judge/Evaluator'; import { RunnerResult } from '../src/api/model/types'; import { Runner } from '../src/api/providers/Runner'; describe('ManagedModel', () => { let mockRunner: jest.Mocked; let mockTracker: jest.Mocked; + let mockEvaluator: { evaluate: jest.Mock }; let aiConfig: LDAICompletionConfig; beforeEach(() => { @@ -32,6 +32,10 @@ describe('ManagedModel', () => { resumptionToken: 'resumption-token-123', } as any; + mockEvaluator = { + evaluate: jest.fn().mockResolvedValue([]), + }; + aiConfig = { key: 'test-config', enabled: true, @@ -39,7 +43,7 @@ describe('ManagedModel', () => { model: { name: 'gpt-4' }, provider: { name: 'openai' }, createTracker: () => mockTracker, - evaluator: Evaluator.noop(), + evaluator: mockEvaluator as any, }; }); diff --git a/packages/sdk/server-ai/__tests__/ManagedModelRun.test.ts b/packages/sdk/server-ai/__tests__/ManagedModelRun.test.ts new file mode 100644 index 0000000000..532f133ff5 --- /dev/null +++ b/packages/sdk/server-ai/__tests__/ManagedModelRun.test.ts @@ -0,0 +1,160 @@ +import { ManagedModel } from '../src/api/ManagedModel'; +import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; +import { LDAICompletionConfig } from '../src/api/config/types'; +import { Evaluator } from '../src/api/judge/Evaluator'; +import { LDJudgeResult } from '../src/api/judge/types'; +import { RunnerResult } from '../src/api/model/types'; +import { Runner } from '../src/api/providers/Runner'; + +describe('ManagedModel.run() evaluations', () => { + let mockRunner: jest.Mocked; + let mockTracker: jest.Mocked; + let aiConfig: LDAICompletionConfig; + + const runnerResult: RunnerResult = { + content: 'AI response content', + metrics: { success: true }, + }; + + beforeEach(() => { + mockRunner = { + run: jest.fn().mockResolvedValue(runnerResult), + }; + + mockTracker = { + trackMetricsOf: jest.fn().mockImplementation(async (_extractor: any, func: any) => func()), + trackJudgeResult: jest.fn(), + resumptionToken: 'test-resumption-token', + getTrackData: jest.fn().mockReturnValue({}), + trackDuration: jest.fn(), + trackTokens: jest.fn(), + trackSuccess: jest.fn(), + trackError: jest.fn(), + trackFeedback: jest.fn(), + trackTimeToFirstToken: jest.fn(), + trackDurationOf: jest.fn(), + trackOpenAIMetrics: jest.fn(), + trackBedrockConverseMetrics: jest.fn(), + trackVercelAIMetrics: jest.fn(), + getSummary: jest + .fn() + .mockReturnValue({ success: true, resumptionToken: 'test-resumption-token' }), + } as any; + + aiConfig = { + key: 'test-config', + enabled: true, + messages: [{ role: 'system', content: 'You are helpful.' }], + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + createTracker: () => mockTracker, + evaluator: Evaluator.noop(), + }; + }); + + it('returns before evaluations resolve', async () => { + let resolveEval!: (v: LDJudgeResult[]) => void; + const slowEvaluator = { + judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] }, + evaluate: jest.fn().mockReturnValue( + new Promise((resolve) => { + resolveEval = resolve; + }), + ), + judges: new Map(), + } as unknown as Evaluator; + + const configWithEvaluator: LDAICompletionConfig = { + ...aiConfig, + evaluator: slowEvaluator, + }; + + const model = new ManagedModel(configWithEvaluator, mockRunner); + + let evaluationsResolved = false; + const result = await model.run('Hello'); + + expect(result.content).toBe('AI response content'); + + result.evaluations.then(() => { + evaluationsResolved = true; + }); + + await Promise.resolve(); + expect(evaluationsResolved).toBe(false); + + resolveEval([{ success: true, sampled: true, score: 0.9 }]); + await result.evaluations; + expect(evaluationsResolved).toBe(true); + }); + + it('awaiting evaluations guarantees tracking is complete', async () => { + const judgeResult: LDJudgeResult = { + success: true, + sampled: true, + score: 0.8, + metricKey: 'quality', + }; + const mockEvaluator = { + judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] }, + evaluate: jest.fn().mockResolvedValue([judgeResult]), + judges: new Map(), + } as unknown as Evaluator; + + const configWithEvaluator: LDAICompletionConfig = { + ...aiConfig, + evaluator: mockEvaluator, + }; + + const model = new ManagedModel(configWithEvaluator, mockRunner); + const result = await model.run('Hello'); + + await result.evaluations; + expect(mockTracker.trackJudgeResult).toHaveBeenCalledWith(judgeResult); + }); + + it('builds ManagedResult with correct content and metrics', async () => { + const model = new ManagedModel(aiConfig, mockRunner); + const result = await model.run('test prompt'); + + expect(result.content).toBe('AI response content'); + expect(result.metrics.success).toBe(true); + expect(result.metrics.resumptionToken).toBe('test-resumption-token'); + expect(result.evaluations).toBeInstanceOf(Promise); + }); + + it('resolves to empty evaluations when evaluator is noop', async () => { + const configWithNoop: LDAICompletionConfig = { + ...aiConfig, + evaluator: Evaluator.noop(), + }; + const model = new ManagedModel(configWithNoop, mockRunner); + const result = await model.run('Hello'); + const evaluations = await result.evaluations; + expect(evaluations).toEqual([]); + }); + + it('passes the prompt to evaluator.evaluate as input', async () => { + const judgeResult: LDJudgeResult = { + success: true, + sampled: true, + score: 1.0, + }; + const mockEvaluator = { + judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] }, + evaluate: jest.fn().mockResolvedValue([judgeResult]), + judges: new Map(), + } as unknown as Evaluator; + + const configWithEvaluator: LDAICompletionConfig = { + ...aiConfig, + evaluator: mockEvaluator, + }; + + const model = new ManagedModel(configWithEvaluator, mockRunner); + const result = await model.run('user prompt here'); + await result.evaluations; + + expect(mockEvaluator.evaluate).toHaveBeenCalledWith('user prompt here', 'AI response content'); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts b/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts index d9d2e485ca..f76ca556e9 100644 --- a/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts +++ b/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts @@ -78,11 +78,10 @@ describe('RunnerFactory.createModel', () => { it('falls through to multi-provider packages when specific provider returns undefined', async () => { const runner = makeRunner(); - let callCount = 0; const getProviderSpy = jest .spyOn(RunnerFactory as any, '_getProviderFactory') - .mockImplementation(async (providerType: string) => { - callCount += 1; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + .mockImplementation(async (providerType: any) => { if (providerType === 'openai') { // openai package not installed return undefined; @@ -107,6 +106,10 @@ describe('RunnerFactory.createModel', () => { // --------------------------------------------------------------------------- describe('RunnerFactory._withFallback', () => { + afterEach(() => { + jest.restoreAllMocks(); + }); + it('returns the first truthy result and does not call remaining factories', async () => { const runner = makeRunner(); const factoryA: AIProvider = { diff --git a/packages/sdk/server-ai/src/api/ManagedModel.ts b/packages/sdk/server-ai/src/api/ManagedModel.ts index 7ac068afe7..5d6860c443 100644 --- a/packages/sdk/server-ai/src/api/ManagedModel.ts +++ b/packages/sdk/server-ai/src/api/ManagedModel.ts @@ -1,13 +1,12 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { LDAICompletionConfig } from './config/types'; -import { LDJudgeResult } from './judge/types'; import { ManagedResult, RunnerResult } from './model/types'; import { Runner } from './providers/Runner'; /** * ManagedModel provides chat-completion invocation with automatic tracking and - * (in a future PR) automatic judge evaluation. + * automatic judge evaluation. * * The class is stateless: each `run()` call sends the prompt directly to the * underlying `Runner` and returns a `ManagedResult`. Conversation history, @@ -42,11 +41,22 @@ export class ManagedModel { const metrics = tracker.getSummary(); - // Evaluations are wired in a follow-up PR. For now, resolve empty. - const evaluations: Promise = Promise.resolve([]); + const output = result.content; + const evaluations = this.aiConfig.evaluator + .evaluate(prompt, output) + .then((results) => { + results.forEach((judgeResult) => { + tracker.trackJudgeResult(judgeResult); + }); + return results; + }) + .catch((err) => { + this._logger?.warn('Judge evaluation failed unexpectedly:', err); + return []; + }); return { - content: result.content, + content: output, metrics, raw: result.raw, parsed: result.parsed, From fcadf29d50134c0f6e90f10f1ff36ea6b5389e75 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 5 May 2026 09:11:22 -0500 Subject: [PATCH 04/19] feat: add ManagedAgent with evaluations support (#1334) --- .../server-ai/__tests__/ManagedAgent.test.ts | 160 ++++++++++++++++++ packages/sdk/server-ai/src/LDAIClientImpl.ts | 35 ++++ packages/sdk/server-ai/src/api/LDAIClient.ts | 20 +++ .../sdk/server-ai/src/api/ManagedAgent.ts | 73 ++++++++ packages/sdk/server-ai/src/api/index.ts | 1 + 5 files changed, 289 insertions(+) create mode 100644 packages/sdk/server-ai/__tests__/ManagedAgent.test.ts create mode 100644 packages/sdk/server-ai/src/api/ManagedAgent.ts diff --git a/packages/sdk/server-ai/__tests__/ManagedAgent.test.ts b/packages/sdk/server-ai/__tests__/ManagedAgent.test.ts new file mode 100644 index 0000000000..81b801861f --- /dev/null +++ b/packages/sdk/server-ai/__tests__/ManagedAgent.test.ts @@ -0,0 +1,160 @@ +import { ManagedAgent } from '../src/api/ManagedAgent'; +import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; +import { LDAIAgentConfig } from '../src/api/config/types'; +import { Evaluator } from '../src/api/judge/Evaluator'; +import { LDJudgeResult } from '../src/api/judge/types'; +import { RunnerResult } from '../src/api/model/types'; +import { Runner } from '../src/api/providers/Runner'; + +describe('ManagedAgent', () => { + let mockRunner: jest.Mocked; + let mockTracker: jest.Mocked; + let agentConfig: LDAIAgentConfig; + + const runnerResult: RunnerResult = { + content: 'Agent response', + metrics: { success: true }, + }; + + beforeEach(() => { + mockRunner = { + run: jest.fn().mockResolvedValue(runnerResult), + }; + + mockTracker = { + trackMetricsOf: jest.fn().mockImplementation(async (_extractor: any, func: any) => func()), + trackJudgeResult: jest.fn(), + resumptionToken: 'agent-resumption-token', + getTrackData: jest.fn().mockReturnValue({}), + trackDuration: jest.fn(), + trackTokens: jest.fn(), + trackSuccess: jest.fn(), + trackError: jest.fn(), + trackFeedback: jest.fn(), + trackTimeToFirstToken: jest.fn(), + trackDurationOf: jest.fn(), + trackOpenAIMetrics: jest.fn(), + trackBedrockConverseMetrics: jest.fn(), + trackVercelAIMetrics: jest.fn(), + getSummary: jest.fn().mockReturnValue({ success: true, resumptionToken: 'agent-resumption-token' }), + } as any; + + agentConfig = { + key: 'test-agent', + enabled: true, + instructions: 'You are a helpful agent.', + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + createTracker: () => mockTracker, + evaluator: Evaluator.noop(), + }; + }); + + it('returns a ManagedResult with content and metrics', async () => { + const agent = new ManagedAgent(agentConfig, mockRunner); + const result = await agent.run('Hello agent'); + + expect(result.content).toBe('Agent response'); + expect(result.metrics.success).toBe(true); + expect(result.metrics.resumptionToken).toBe('agent-resumption-token'); + }); + + it('passes the prompt directly to the runner', async () => { + const agent = new ManagedAgent(agentConfig, mockRunner); + await agent.run('My question'); + + expect(mockRunner.run).toHaveBeenCalledWith('My question'); + }); + + it('resolves to empty evaluations with noop evaluator', async () => { + const agent = new ManagedAgent(agentConfig, mockRunner); + const result = await agent.run('Hello'); + const evaluations = await result.evaluations; + expect(evaluations).toEqual([]); + }); + + it('awaiting evaluations calls tracker.trackJudgeResult', async () => { + const judgeResult: LDJudgeResult = { + success: true, + sampled: true, + score: 0.85, + metricKey: 'quality', + }; + const mockEvaluator = { + judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] }, + evaluate: jest.fn().mockResolvedValue([judgeResult]), + judges: new Map(), + } as unknown as Evaluator; + + const configWithEvaluator: LDAIAgentConfig = { + ...agentConfig, + evaluator: mockEvaluator, + }; + + const agent = new ManagedAgent(configWithEvaluator, mockRunner); + const result = await agent.run('Hello'); + + await result.evaluations; + expect(mockTracker.trackJudgeResult).toHaveBeenCalledWith(judgeResult); + }); + + it('passes the prompt to evaluator.evaluate as input', async () => { + const mockEvaluator = { + judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] }, + evaluate: jest.fn().mockResolvedValue([]), + judges: new Map(), + } as unknown as Evaluator; + + const configWithEvaluator: LDAIAgentConfig = { + ...agentConfig, + evaluator: mockEvaluator, + }; + + const agent = new ManagedAgent(configWithEvaluator, mockRunner); + const result = await agent.run('user prompt'); + await result.evaluations; + + expect(mockEvaluator.evaluate).toHaveBeenCalledWith('user prompt', 'Agent response'); + }); + + it('returns before evaluations resolve', async () => { + let resolveEval!: (v: LDJudgeResult[]) => void; + const slowEvaluator = { + judgeConfiguration: { judges: [{ key: 'judge-1', samplingRate: 1.0 }] }, + evaluate: jest.fn().mockReturnValue( + new Promise((resolve) => { + resolveEval = resolve; + }), + ), + judges: new Map(), + } as unknown as Evaluator; + + const configWithEvaluator: LDAIAgentConfig = { + ...agentConfig, + evaluator: slowEvaluator, + }; + + const agent = new ManagedAgent(configWithEvaluator, mockRunner); + + let evaluationsResolved = false; + const result = await agent.run('Hello'); + + expect(result.content).toBe('Agent response'); + + result.evaluations.then(() => { + evaluationsResolved = true; + }); + + await Promise.resolve(); + expect(evaluationsResolved).toBe(false); + + resolveEval([{ success: true, sampled: true, score: 0.9 }]); + await result.evaluations; + expect(evaluationsResolved).toBe(true); + }); + + it('exposes the agent config via getConfig', () => { + const agent = new ManagedAgent(agentConfig, mockRunner); + expect(agent.getConfig()).toBe(agentConfig); + }); +}); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index d4ad021b4d..c655db2551 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -3,6 +3,7 @@ import { randomUUID } from 'node:crypto'; import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common'; +import { ManagedAgent } from './api/ManagedAgent'; import { ManagedModel } from './api/ManagedModel'; import { LDAIAgentConfig, @@ -37,6 +38,7 @@ import { aiSdkLanguage, aiSdkName, aiSdkVersion } from './sdkInfo'; const TRACK_SDK_INFO = '$ld:ai:sdk:info'; const TRACK_USAGE_COMPLETION_CONFIG = '$ld:ai:usage:completion-config'; const TRACK_USAGE_CREATE_CHAT = '$ld:ai:usage:create-chat'; +const TRACK_USAGE_CREATE_AGENT = '$ld:ai:usage:create-agent'; const TRACK_USAGE_JUDGE_CONFIG = '$ld:ai:usage:judge-config'; const TRACK_USAGE_CREATE_JUDGE = '$ld:ai:usage:create-judge'; const TRACK_USAGE_AGENT_CONFIG = '$ld:ai:usage:agent-config'; @@ -51,6 +53,8 @@ const INIT_TRACK_CONTEXT: LDContext = { const disabledAIConfig: LDAIConfigDefault = { enabled: false }; + + export class LDAIClientImpl implements LDAIClient { private _logger?: LDLogger; @@ -448,6 +452,37 @@ export class LDAIClientImpl implements LDAIClient { return new ManagedModel(config, runner, this._logger); } + async createAgent( + key: string, + context: LDContext, + defaultValue?: LDAIAgentConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + this._ldClient.track(TRACK_USAGE_CREATE_AGENT, context, key, 1); + + const config = await this._agentConfig( + key, + context, + defaultValue ?? disabledAIConfig, + variables, + undefined, + defaultAiProvider, + ); + + if (!config.enabled) { + this._logger?.info(`Agent configuration is disabled: ${key}`); + return undefined; + } + + const runner = await RunnerFactory.createAgent(config, undefined, this._logger, defaultAiProvider); + if (!runner) { + return undefined; + } + + return new ManagedAgent(config, runner, this._logger); + } + /** * @deprecated Use `createModel` instead. This method will be removed in a future version. */ diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index 8053fc3c77..749a739320 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -1,5 +1,6 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; +import { ManagedAgent } from './ManagedAgent'; import { ManagedModel } from './ManagedModel'; import { LDAIAgentConfig, @@ -277,6 +278,25 @@ export interface LDAIClient { defaultAiProvider?: SupportedAIProvider, ): Promise; + /** + * Creates and returns a new ManagedAgent instance for agent interactions. + * Evaluations are wired automatically and exposed on ManagedResult.evaluations. + * + * @param key The key identifying the agent AI config to use. + * @param context The standard LDContext used when evaluating flags. + * @param defaultValue Optional fallback when the configuration is not available from LaunchDarkly. + * @param variables Dictionary of values for instruction interpolation. + * @param defaultAiProvider Optional default AI provider to use. + * @returns A promise that resolves to the ManagedAgent instance, or undefined if disabled. + */ + createAgent( + key: string, + context: LDContext, + defaultValue?: LDAIAgentConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise; + /** * @deprecated Use `createModel` instead. This method will be removed in a future version. */ diff --git a/packages/sdk/server-ai/src/api/ManagedAgent.ts b/packages/sdk/server-ai/src/api/ManagedAgent.ts new file mode 100644 index 0000000000..0ce5eb5803 --- /dev/null +++ b/packages/sdk/server-ai/src/api/ManagedAgent.ts @@ -0,0 +1,73 @@ +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + +import { LDAIAgentConfig } from './config/types'; +import { ManagedResult, RunnerResult } from './model/types'; +import { Runner } from './providers/Runner'; + +/** + * ManagedAgent provides agent invocation with automatic tracking and automatic + * judge evaluation. + * + * The class is stateless: each `run()` call sends the prompt directly to the + * underlying `Runner` and returns a `ManagedResult`. Conversation history, + * if any, must be managed by the caller (or by the Runner implementation). + * + * Obtain an instance via `LDAIClient.createAgent()`. + */ +export class ManagedAgent { + constructor( + protected readonly aiAgentConfig: LDAIAgentConfig, + protected readonly runner: Runner, + private readonly _logger?: LDLogger, + ) {} + + /** + * Invoke the agent with a prompt string and return a ManagedResult. + * + * `run()` resolves before `ManagedResult.evaluations` resolves. Awaiting + * `evaluations` guarantees both judge evaluation and tracker.trackJudgeResult() + * are complete. + * + * @param prompt The user input to send to the agent. + * @returns Promise resolving to ManagedResult (before evaluations settle). + */ + async run(prompt: string): Promise { + const tracker = this.aiAgentConfig.createTracker!(); + + const result = await tracker.trackMetricsOf( + (r: RunnerResult) => r.metrics, + () => this.runner.run(prompt), + ); + + const metrics = tracker.getSummary(); + + const output = result.content; + const evaluations = this.aiAgentConfig.evaluator + .evaluate(prompt, output) + .then((results) => { + results.forEach((judgeResult) => { + tracker.trackJudgeResult(judgeResult); + }); + return results; + }) + .catch((err) => { + this._logger?.warn('Judge evaluation failed unexpectedly:', err); + return []; + }); + + return { + content: output, + metrics, + raw: result.raw, + parsed: result.parsed, + evaluations, + }; + } + + /** + * Get the underlying AI agent configuration used to initialize this ManagedAgent. + */ + getConfig(): LDAIAgentConfig { + return this.aiAgentConfig; + } +} diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts index e63b7b6e24..852f54a2aa 100644 --- a/packages/sdk/server-ai/src/api/index.ts +++ b/packages/sdk/server-ai/src/api/index.ts @@ -2,6 +2,7 @@ export * from './config'; export * from './chat'; export * from './graph'; export * from './judge'; +export * from './ManagedAgent'; export * from './ManagedModel'; export * from './metrics'; export * from './model'; From 6bbce5f256a09167e5b4bdfe5a3a70a11e5473da Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 5 May 2026 10:30:11 -0500 Subject: [PATCH 05/19] feat: add ManagedGraphResult, GraphMetricSummary, and ManagedAgentGraph (#1335) --- .../__tests__/ManagedAgentGraph.test.ts | 187 ++++++++++++++++++ .../src/api/graph/ManagedAgentGraph.ts | 109 ++++++++++ packages/sdk/server-ai/src/api/graph/index.ts | 1 + packages/sdk/server-ai/src/api/graph/types.ts | 93 +++++++++ 4 files changed, 390 insertions(+) create mode 100644 packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts create mode 100644 packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts diff --git a/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts b/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts new file mode 100644 index 0000000000..bbe7883c09 --- /dev/null +++ b/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts @@ -0,0 +1,187 @@ +import { AgentGraphDefinition } from '../src/api/graph/AgentGraphDefinition'; +import { AgentGraphNode } from '../src/api/graph/AgentGraphNode'; +import { LDGraphTracker } from '../src/api/graph/LDGraphTracker'; +import { ManagedAgentGraph } from '../src/api/graph/ManagedAgentGraph'; +import { AgentGraphRunnerResult } from '../src/api/graph/types'; +import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; + +const makeNodeTracker = (summary: Record = {}): jest.Mocked => + ({ + trackTokens: jest.fn(), + trackDuration: jest.fn(), + trackToolCalls: jest.fn(), + trackSuccess: jest.fn(), + trackError: jest.fn(), + getSummary: jest.fn().mockReturnValue(summary), + }) as any; + +const makeNode = (tracker: jest.Mocked): AgentGraphNode => + ({ + getConfig: jest.fn().mockReturnValue({ createTracker: jest.fn().mockReturnValue(tracker) }), + }) as any; + +describe('ManagedAgentGraph', () => { + const mockGraphTracker: jest.Mocked = { + getTrackData: jest.fn().mockReturnValue({ runId: 'r1', graphKey: 'g1', version: 1 }), + getSummary: jest.fn().mockReturnValue({}), + resumptionToken: 'graph-resumption-token', + trackInvocationSuccess: jest.fn(), + trackInvocationFailure: jest.fn(), + trackDuration: jest.fn(), + trackTotalTokens: jest.fn(), + trackPath: jest.fn(), + trackRedirect: jest.fn(), + trackHandoffSuccess: jest.fn(), + trackHandoffFailure: jest.fn(), + }; + + let mockGraphDefinition: jest.Mocked; + + beforeEach(() => { + jest.clearAllMocks(); + mockGraphDefinition = { + enabled: true, + createTracker: jest.fn().mockReturnValue(mockGraphTracker), + getConfig: jest.fn(), + getNode: jest.fn().mockReturnValue(undefined), + getChildNodes: jest.fn(), + getParentNodes: jest.fn(), + terminalNodes: jest.fn(), + rootNode: jest.fn(), + traverse: jest.fn(), + reverseTraverse: jest.fn(), + } as any; + }); + + it('builds ManagedGraphResult from runner result', async () => { + const nodeATracker = makeNodeTracker({ success: true, resumptionToken: 'node-a-token' }); + const nodeBTracker = makeNodeTracker({ success: true, resumptionToken: 'node-b-token' }); + mockGraphDefinition.getNode = jest + .fn() + .mockImplementation((key: string) => + key === 'node-a' ? makeNode(nodeATracker) : makeNode(nodeBTracker), + ); + + const runnerResult: AgentGraphRunnerResult = { + content: 'Graph output', + metrics: { + success: true, + path: ['node-a', 'node-b'], + durationMs: 1500, + usage: { total: 100, input: 50, output: 50 }, + nodeMetrics: { + 'node-a': { success: true, usage: { total: 40, input: 20, output: 20 } }, + 'node-b': { success: true, usage: { total: 60, input: 30, output: 30 } }, + }, + }, + }; + + const managedGraph = new ManagedAgentGraph(mockGraphDefinition); + const result = await managedGraph.run(async (_def, _tracker) => runnerResult); + + expect(result.content).toBe('Graph output'); + expect(result.metrics.success).toBe(true); + expect(result.metrics.path).toEqual(['node-a', 'node-b']); + expect(result.metrics.durationMs).toBe(1500); + expect(result.metrics.tokens).toEqual({ total: 100, input: 50, output: 50 }); + expect(result.metrics.resumptionToken).toBe('graph-resumption-token'); + expect(result.metrics.nodeMetrics).toEqual({ + 'node-a': { success: true, resumptionToken: 'node-a-token' }, + 'node-b': { success: true, resumptionToken: 'node-b-token' }, + }); + }); + + it('fires tracking events into per-node trackers', async () => { + const nodeTracker = makeNodeTracker({}); + mockGraphDefinition.getNode = jest.fn().mockReturnValue(makeNode(nodeTracker)); + + const runnerResult: AgentGraphRunnerResult = { + content: 'out', + metrics: { + success: true, + path: ['n1'], + nodeMetrics: { + n1: { + success: true, + usage: { total: 10, input: 5, output: 5 }, + durationMs: 200, + toolCalls: ['tool-a'], + }, + }, + }, + }; + + const managedGraph = new ManagedAgentGraph(mockGraphDefinition); + await managedGraph.run(async () => runnerResult); + + expect(nodeTracker.trackTokens).toHaveBeenCalledWith({ total: 10, input: 5, output: 5 }); + expect(nodeTracker.trackDuration).toHaveBeenCalledWith(200); + expect(nodeTracker.trackToolCalls).toHaveBeenCalledWith(['tool-a']); + expect(nodeTracker.trackSuccess).toHaveBeenCalled(); + expect(nodeTracker.getSummary).toHaveBeenCalled(); + }); + + it('calls trackError for failed nodes', async () => { + const nodeTracker = makeNodeTracker({}); + mockGraphDefinition.getNode = jest.fn().mockReturnValue(makeNode(nodeTracker)); + + await new ManagedAgentGraph(mockGraphDefinition).run(async () => ({ + content: '', + metrics: { success: false, path: [], nodeMetrics: { n1: { success: false } } }, + })); + + expect(nodeTracker.trackError).toHaveBeenCalled(); + expect(nodeTracker.trackSuccess).not.toHaveBeenCalled(); + }); + + it('skips node metrics when getNode returns undefined', async () => { + mockGraphDefinition.getNode = jest.fn().mockReturnValue(undefined); + + const managedGraph = new ManagedAgentGraph(mockGraphDefinition); + const result = await managedGraph.run(async () => ({ + content: '', + metrics: { + success: true, + path: [], + nodeMetrics: { missing: { success: true } }, + }, + })); + + expect(result.metrics.nodeMetrics).toEqual({}); + }); + + it('passes graphDefinition and graphTracker to runner', async () => { + const runnerFn = jest.fn().mockResolvedValue({ + content: 'output', + metrics: { success: true, path: [], nodeMetrics: {} }, + }); + + await new ManagedAgentGraph(mockGraphDefinition).run(runnerFn); + + expect(runnerFn).toHaveBeenCalledWith(mockGraphDefinition, mockGraphTracker); + }); + + it('creates a tracker via graphDefinition.createTracker()', async () => { + await new ManagedAgentGraph(mockGraphDefinition).run(async () => ({ + content: '', + metrics: { success: true, path: [], nodeMetrics: {} }, + })); + + expect(mockGraphDefinition.createTracker).toHaveBeenCalled(); + }); + + it('resolves to empty evaluations by default', async () => { + const result = await new ManagedAgentGraph(mockGraphDefinition).run(async () => ({ + content: '', + metrics: { success: true, path: [], nodeMetrics: {} }, + })); + + expect(await result.evaluations).toEqual([]); + }); + + it('returns the graph definition via getGraphDefinition', () => { + expect(new ManagedAgentGraph(mockGraphDefinition).getGraphDefinition()).toBe( + mockGraphDefinition, + ); + }); +}); diff --git a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts new file mode 100644 index 0000000000..ab2c4d755b --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts @@ -0,0 +1,109 @@ +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + +import { LDAIMetrics } from '../metrics'; +import { LDAIMetricSummary } from '../model/types'; +import { LDJudgeResult } from '../judge/types'; +import { AgentGraphDefinition } from './AgentGraphDefinition'; +import { LDGraphTracker } from './LDGraphTracker'; +import { AgentGraphRunnerResult, GraphMetricSummary, ManagedGraphResult } from './types'; + +/** + * ManagedAgentGraph wraps an AgentGraphDefinition and provides a managed run() + * method that returns ManagedGraphResult with async judge evaluations. + * + * The runner function is responsible for executing the graph and returning + * an AgentGraphRunnerResult. ManagedAgentGraph builds the managed result from + * the runner result, including GraphMetricSummary with the graphTracker's + * resumptionToken. + */ +export class ManagedAgentGraph { + constructor( + private readonly _graphDefinition: AgentGraphDefinition, + private readonly _logger?: LDLogger, + ) {} + + /** + * Runs the agent graph using the provided runner function and returns a ManagedGraphResult. + * + * The runner function receives the graph tracker and AgentGraphDefinition, + * executes the graph, and returns an AgentGraphRunnerResult. + * + * run() returns before ManagedGraphResult.evaluations resolves. + * + * @param runner Async function that executes the graph and returns AgentGraphRunnerResult. + * @returns ManagedGraphResult with GraphMetricSummary and evaluations promise. + */ + async run( + runner: ( + graphDefinition: AgentGraphDefinition, + graphTracker: LDGraphTracker, + ) => Promise, + ): Promise { + const graphTracker = this._graphDefinition.createTracker(); + + const runnerResult = await runner(this._graphDefinition, graphTracker); + + const metrics: GraphMetricSummary = { + success: runnerResult.metrics.success, + path: runnerResult.metrics.path, + durationMs: runnerResult.metrics.durationMs, + tokens: runnerResult.metrics.usage, + nodeMetrics: this._buildNodeMetrics(runnerResult.metrics.nodeMetrics), + resumptionToken: graphTracker.resumptionToken, + }; + + const evaluations: Promise = Promise.resolve([]); + + return { + content: runnerResult.content, + metrics, + raw: runnerResult.raw, + evaluations, + }; + } + + /** + * Converts per-node LDAIMetrics from the runner into LDAIMetricSummary by + * creating a per-node tracker, firing tracking events, and calling getSummary(). + */ + private _buildNodeMetrics( + nodeMetrics: Record, + ): Record { + const summaries: Record = {}; + + for (const [nodeKey, metrics] of Object.entries(nodeMetrics)) { + const node = this._graphDefinition.getNode(nodeKey); + if (!node) { + this._logger?.warn(`ManagedAgentGraph: no node found for key "${nodeKey}", skipping metrics`); + continue; + } + + const tracker = node.getConfig().createTracker!(); + if (metrics.usage) { + tracker.trackTokens(metrics.usage); + } + if (metrics.durationMs !== undefined) { + tracker.trackDuration(metrics.durationMs); + } + if (metrics.toolCalls?.length) { + tracker.trackToolCalls(metrics.toolCalls); + } + if (metrics.success) { + tracker.trackSuccess(); + } else { + tracker.trackError(); + } + + summaries[nodeKey] = tracker.getSummary(); + } + + return summaries; + } + + /** + * Returns the underlying AgentGraphDefinition. + */ + getGraphDefinition(): AgentGraphDefinition { + return this._graphDefinition; + } +} diff --git a/packages/sdk/server-ai/src/api/graph/index.ts b/packages/sdk/server-ai/src/api/graph/index.ts index 9d899029d5..6b26c398b0 100644 --- a/packages/sdk/server-ai/src/api/graph/index.ts +++ b/packages/sdk/server-ai/src/api/graph/index.ts @@ -2,3 +2,4 @@ export * from './types'; export * from './LDGraphTracker'; export * from './AgentGraphNode'; export * from './AgentGraphDefinition'; +export * from './ManagedAgentGraph'; diff --git a/packages/sdk/server-ai/src/api/graph/types.ts b/packages/sdk/server-ai/src/api/graph/types.ts index 1d19f88c9c..902458d8d2 100644 --- a/packages/sdk/server-ai/src/api/graph/types.ts +++ b/packages/sdk/server-ai/src/api/graph/types.ts @@ -1,4 +1,6 @@ +import { LDJudgeResult } from '../judge/types'; import { LDAIMetrics, LDTokenUsage } from '../metrics'; +import { LDAIMetricSummary } from '../model/types'; /** * Represents a directed edge in an agent graph, connecting a source node to a target node. @@ -119,3 +121,94 @@ export interface AgentGraphRunnerResult { raw?: unknown; } +// ============================================================================ +// Managed-Layer Graph Types +// ============================================================================ + +/** + * Graph metric summary returned in ManagedGraphResult. + * Includes per-node metrics and a resumption token. + */ +export interface GraphMetricSummary { + /** + * Whether the graph invocation succeeded. + */ + success: boolean; + + /** + * Execution path through the graph as an ordered array of config keys. + */ + path: string[]; + + /** + * Total graph execution duration in milliseconds, if tracked. + */ + durationMs?: number; + + /** + * Aggregate token usage across the entire graph invocation, if available. + */ + tokens?: LDTokenUsage; + + /** + * Per-node metric summaries keyed by agent config key. + */ + nodeMetrics: Record; + + /** + * Resumption token for deferred feedback association. + */ + resumptionToken?: string; +} + +/** + * The result returned by a managed graph invocation (ManagedAgentGraph.run()). + */ +export interface ManagedGraphResult { + /** + * The text content of the graph's final response. + */ + content: string; + + /** + * Summarized metrics for this graph invocation. + */ + metrics: GraphMetricSummary; + + /** + * The raw response object from the provider, if available. + */ + raw?: unknown; + + /** + * Promise that resolves to the judge evaluation results. + * Awaiting this promise guarantees both evaluation and tracking are complete. + */ + evaluations: Promise; +} + +/** + * Tracking metadata returned by {@link LDGraphTracker.getTrackData}. + */ +export interface LDGraphTrackData { + /** + * UUID v4 uniquely identifying this tracker and all events it emits. + */ + runId: string; + + /** + * The graph configuration key. + */ + graphKey: string; + + /** + * The variation key. Absent when a default config was used rather than a real flag evaluation. + */ + variationKey?: string; + + /** + * The version of the flag variation. + */ + version: number; +} + From 0ecde68074b37db5f58c8643801c31f53e44478f Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 5 May 2026 12:06:55 -0500 Subject: [PATCH 06/19] chore: add Runner and AgentGraphRunner protocol tests (#1336) --- .../__tests__/RunnerProtocol.test.ts | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 packages/sdk/server-ai/__tests__/RunnerProtocol.test.ts diff --git a/packages/sdk/server-ai/__tests__/RunnerProtocol.test.ts b/packages/sdk/server-ai/__tests__/RunnerProtocol.test.ts new file mode 100644 index 0000000000..70c1e11aa7 --- /dev/null +++ b/packages/sdk/server-ai/__tests__/RunnerProtocol.test.ts @@ -0,0 +1,72 @@ +import type { AgentGraphRunnerResult } from '../src/api/graph/types'; +import type { RunnerResult } from '../src/api/model/types'; +import type { AgentGraphRunner, Runner } from '../src/api/providers/Runner'; + +/** + * Verify that the Runner and AgentGraphRunner protocols can be implemented + * by a plain object (no abstract class required). + */ +describe('Runner protocol', () => { + it('can be implemented as a plain object (no class extension required)', async () => { + const runnerResult: RunnerResult = { + content: 'Hello from runner', + metrics: { success: true }, + }; + + const myRunner: Runner = { + run: jest.fn().mockResolvedValue(runnerResult), + }; + + const result = await myRunner.run('Hello'); + + expect(result.content).toBe('Hello from runner'); + expect(result.metrics.success).toBe(true); + }); + + it('Runner.run() accepts optional outputType for structured output', async () => { + const runnerResult: RunnerResult = { + content: '', + metrics: { success: true }, + parsed: { score: 0.9, reasoning: 'good' }, + }; + + const myRunner: Runner = { + run: jest.fn().mockResolvedValue(runnerResult), + }; + + const schema = { type: 'object', properties: { score: { type: 'number' } } }; + const result = await myRunner.run('Evaluate', schema); + + expect(result.parsed).toEqual({ score: 0.9, reasoning: 'good' }); + expect(myRunner.run).toHaveBeenCalledWith('Evaluate', schema); + }); + + it('AgentGraphRunner can be implemented as a plain object', async () => { + const graphResult: AgentGraphRunnerResult = { + content: 'Graph output', + metrics: { + success: true, + path: ['node-a'], + nodeMetrics: { 'node-a': { success: true } }, + }, + }; + + const myGraphRunner: AgentGraphRunner = { + run: jest.fn().mockResolvedValue(graphResult), + }; + + const result = await myGraphRunner.run('user input'); + + expect(result.content).toBe('Graph output'); + expect(result.metrics.path).toEqual(['node-a']); + }); + + it('RunnerResult does not include evaluations field', () => { + const result: RunnerResult = { + content: 'test', + metrics: { success: true }, + }; + + expect('evaluations' in result).toBe(false); + }); +}); From 663c7cf4a7823227530cfb417816245645b6c928 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 5 May 2026 12:20:42 -0500 Subject: [PATCH 07/19] chore: rename _buildNodeMetrics to _trackNodeMetrics (AIC-2388) (#1354) --- packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts index ab2c4d755b..d55bb26d50 100644 --- a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts +++ b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts @@ -48,7 +48,7 @@ export class ManagedAgentGraph { path: runnerResult.metrics.path, durationMs: runnerResult.metrics.durationMs, tokens: runnerResult.metrics.usage, - nodeMetrics: this._buildNodeMetrics(runnerResult.metrics.nodeMetrics), + nodeMetrics: this._trackNodeMetrics(runnerResult.metrics.nodeMetrics), resumptionToken: graphTracker.resumptionToken, }; @@ -66,7 +66,7 @@ export class ManagedAgentGraph { * Converts per-node LDAIMetrics from the runner into LDAIMetricSummary by * creating a per-node tracker, firing tracking events, and calling getSummary(). */ - private _buildNodeMetrics( + private _trackNodeMetrics( nodeMetrics: Record, ): Record { const summaries: Record = {}; From c09537a1048e632aca3b6798f662aa226de4230c Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 5 May 2026 15:51:45 -0500 Subject: [PATCH 08/19] feat: replace VercelProvider with Runner protocol implementation (AIC-2388) (#1339) --- .../__tests__/VercelHelper.test.ts | 96 +++ .../__tests__/VercelModelRunner.test.ts | 172 ++++ .../__tests__/VercelProvider.test.ts | 794 ------------------ .../__tests__/VercelRunnerFactory.test.ts | 70 ++ .../server-ai-vercel/src/VercelHelper.ts | 89 ++ .../server-ai-vercel/src/VercelModelRunner.ts | 116 +++ .../server-ai-vercel/src/VercelProvider.ts | 423 ---------- .../src/VercelRunnerFactory.ts | 131 +++ .../server-ai-vercel/src/index.ts | 10 +- 9 files changed, 683 insertions(+), 1218 deletions(-) create mode 100644 packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts create mode 100644 packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts delete mode 100644 packages/ai-providers/server-ai-vercel/__tests__/VercelProvider.test.ts create mode 100644 packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts create mode 100644 packages/ai-providers/server-ai-vercel/src/VercelHelper.ts create mode 100644 packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts delete mode 100644 packages/ai-providers/server-ai-vercel/src/VercelProvider.ts create mode 100644 packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts new file mode 100644 index 0000000000..833aeca4c3 --- /dev/null +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts @@ -0,0 +1,96 @@ +import { + convertMessagesToVercel, + getAIMetricsFromResponse, + getAIMetricsFromStream, + mapProviderName, + mapUsageDataToLDTokenUsage, +} from '../src/VercelHelper'; + +describe('convertMessagesToVercel', () => { + it('passes role and content through unchanged', () => { + expect( + convertMessagesToVercel([ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'u' }, + { role: 'assistant', content: 'a' }, + ]), + ).toEqual([ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'u' }, + { role: 'assistant', content: 'a' }, + ]); + }); +}); + +describe('mapProviderName', () => { + it('maps gemini to google (case-insensitive)', () => { + expect(mapProviderName('gemini')).toBe('google'); + expect(mapProviderName('Gemini')).toBe('google'); + }); + + it('returns the provider unchanged when no mapping exists', () => { + expect(mapProviderName('openai')).toBe('openai'); + expect(mapProviderName('anthropic')).toBe('anthropic'); + }); +}); + +describe('mapUsageDataToLDTokenUsage', () => { + it('prefers v5 field names (inputTokens / outputTokens) over v4', () => { + const usage = mapUsageDataToLDTokenUsage({ + totalTokens: 100, + inputTokens: 40, + outputTokens: 60, + promptTokens: 1, + completionTokens: 2, + }); + expect(usage).toEqual({ total: 100, input: 40, output: 60 }); + }); + + it('falls back to v4 field names when v5 is absent', () => { + const usage = mapUsageDataToLDTokenUsage({ + totalTokens: 50, + promptTokens: 20, + completionTokens: 30, + }); + expect(usage).toEqual({ total: 50, input: 20, output: 30 }); + }); +}); + +describe('getAIMetricsFromResponse', () => { + it('treats missing finishReason as success', () => { + expect( + getAIMetricsFromResponse({ + usage: { totalTokens: 5, promptTokens: 2, completionTokens: 3 }, + }), + ).toEqual({ success: true, usage: { total: 5, input: 2, output: 3 } }); + }); + + it('marks success=false when finishReason is "error"', () => { + expect( + getAIMetricsFromResponse({ + finishReason: 'error', + usage: { totalTokens: 10, promptTokens: 4, completionTokens: 6 }, + }).success, + ).toBe(false); + }); +}); + +describe('getAIMetricsFromStream', () => { + it('extracts usage from a successful stream', async () => { + const result = await getAIMetricsFromStream({ + finishReason: Promise.resolve('stop'), + usage: Promise.resolve({ totalTokens: 100, promptTokens: 49, completionTokens: 51 }), + }); + expect(result).toEqual({ + success: true, + usage: { total: 100, input: 49, output: 51 }, + }); + }); + + it('marks success=false on error finishReason', async () => { + const result = await getAIMetricsFromStream({ + finishReason: Promise.resolve('error'), + }); + expect(result.success).toBe(false); + }); +}); diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts new file mode 100644 index 0000000000..4b4bed8846 --- /dev/null +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts @@ -0,0 +1,172 @@ +import { generateObject, generateText, jsonSchema } from 'ai'; + +import type { LDAICompletionConfig } from '@launchdarkly/server-sdk-ai'; + +import { VercelModelRunner } from '../src/VercelModelRunner'; + +jest.mock('ai', () => ({ + generateText: jest.fn(), + generateObject: jest.fn(), + jsonSchema: jest.fn((schema) => schema), +})); + +const mockLogger = { + warn: jest.fn(), + info: jest.fn(), + error: jest.fn(), + debug: jest.fn(), +}; + +const baseConfig: LDAICompletionConfig = { + key: 'completion', + enabled: true, + model: { name: 'mock' }, +}; + +describe('VercelModelRunner', () => { + const fakeModel = { name: 'mock' }; + let runner: VercelModelRunner; + + beforeEach(() => { + runner = new VercelModelRunner(fakeModel as any, baseConfig, {}, mockLogger); + jest.clearAllMocks(); + }); + + describe('run (chat completion)', () => { + it('returns a successful RunnerResult with content, metrics, and raw response', async () => { + const result = { + text: 'Hi!', + usage: { totalTokens: 12, promptTokens: 7, completionTokens: 5 }, + }; + (generateText as jest.Mock).mockResolvedValue(result); + + const out = await runner.run('hello'); + + expect(generateText).toHaveBeenCalledWith({ + model: fakeModel, + messages: [{ role: 'user', content: 'hello' }], + experimental_telemetry: { isEnabled: true }, + }); + expect(out.content).toBe('Hi!'); + expect(out.metrics).toEqual({ + success: true, + usage: { total: 12, input: 7, output: 5 }, + }); + expect(out.raw).toBe(result); + }); + + it('prepends config messages before the user prompt', async () => { + (generateText as jest.Mock).mockResolvedValue({ + text: 'reply', + usage: { totalTokens: 1, promptTokens: 1, completionTokens: 0 }, + }); + + const configWithMessages: LDAICompletionConfig = { + ...baseConfig, + messages: [{ role: 'system', content: 'You are X' }], + }; + const r = new VercelModelRunner(fakeModel as any, configWithMessages, {}, mockLogger); + await r.run('hi'); + + expect(generateText).toHaveBeenCalledWith({ + model: fakeModel, + messages: [ + { role: 'system', content: 'You are X' }, + { role: 'user', content: 'hi' }, + ], + experimental_telemetry: { isEnabled: true }, + }); + }); + + it('preserves v5 token field handling via getAIMetricsFromResponse', async () => { + (generateText as jest.Mock).mockResolvedValue({ + text: 'ok', + usage: { totalTokens: 100, inputTokens: 40, outputTokens: 60 }, + }); + + const out = await runner.run('hello'); + + expect(out.metrics.usage).toEqual({ total: 100, input: 40, output: 60 }); + }); + + it('uses a LDMessage[] directly without prepending config messages', async () => { + (generateText as jest.Mock).mockResolvedValue({ + text: 'direct', + usage: { totalTokens: 5, promptTokens: 2, completionTokens: 3 }, + }); + + const configWithMessages: LDAICompletionConfig = { + ...baseConfig, + messages: [{ role: 'system', content: 'Should not appear' }], + }; + const r = new VercelModelRunner(fakeModel as any, configWithMessages, {}, mockLogger); + const prebuilt = [ + { role: 'system' as const, content: 'Custom system' }, + { role: 'user' as const, content: 'Direct input' }, + ]; + await r.run(prebuilt); + + expect(generateText).toHaveBeenCalledWith({ + model: fakeModel, + messages: prebuilt, + experimental_telemetry: { isEnabled: true }, + }); + }); + + it('returns success=false when generateText throws', async () => { + const err = new Error('boom'); + (generateText as jest.Mock).mockRejectedValue(err); + + const out = await runner.run('hello'); + + expect(out.content).toBe(''); + expect(out.metrics.success).toBe(false); + expect(mockLogger.warn).toHaveBeenCalledWith('Vercel AI model invocation failed:', err); + }); + }); + + describe('run (structured output)', () => { + it('exposes parsed structured output via parsed', async () => { + const obj = { name: 'Ada', age: 36 }; + (generateObject as jest.Mock).mockResolvedValue({ + object: obj, + usage: { totalTokens: 30, promptTokens: 10, completionTokens: 20 }, + }); + + const schema = { type: 'object' }; + const out = await runner.run('tell', schema); + + expect(jsonSchema).toHaveBeenCalledWith(schema); + expect(generateObject).toHaveBeenCalledWith({ + model: fakeModel, + messages: [{ role: 'user', content: 'tell' }], + schema, + experimental_telemetry: { isEnabled: true }, + }); + expect(out.parsed).toEqual(obj); + expect(out.content).toBe(JSON.stringify(obj)); + expect(out.metrics.success).toBe(true); + }); + + it('returns success=false when generateObject throws', async () => { + const err = new Error('struct boom'); + (generateObject as jest.Mock).mockRejectedValue(err); + + const out = await runner.run('tell', { type: 'object' }); + + expect(out.content).toBe(''); + expect(out.parsed).toBeUndefined(); + expect(out.metrics.success).toBe(false); + expect(mockLogger.warn).toHaveBeenCalledWith( + 'Vercel AI structured model invocation failed:', + err, + ); + }); + }); + + describe('getModel', () => { + it('returns the underlying Vercel AI model', () => { + expect(runner.getModel()).toBe(fakeModel); + }); + }); +}); diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelProvider.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelProvider.test.ts deleted file mode 100644 index 0f7c3ef008..0000000000 --- a/packages/ai-providers/server-ai-vercel/__tests__/VercelProvider.test.ts +++ /dev/null @@ -1,794 +0,0 @@ -import { generateObject, generateText, jsonSchema } from 'ai'; - -import { VercelProvider } from '../src/VercelProvider'; - -// Mock Vercel AI SDK -jest.mock('ai', () => ({ - generateText: jest.fn(), - generateObject: jest.fn(), - jsonSchema: jest.fn((schema) => schema), -})); - -describe('VercelProvider', () => { - let mockModel: any; - let provider: VercelProvider; - - beforeEach(() => { - mockModel = { name: 'test-model' }; - provider = new VercelProvider(mockModel, {}); - jest.clearAllMocks(); - }); - - describe('getAIMetricsFromResponse', () => { - it('creates metrics with success=true and token usage', () => { - const mockResponse = { - usage: { - promptTokens: 50, - completionTokens: 50, - totalTokens: 100, - }, - }; - - const result = VercelProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 50, - output: 50, - }, - }); - }); - - it('creates metrics with success=true and no usage when usage is missing', () => { - const mockResponse = {}; - - const result = VercelProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - - it('handles partial usage data', () => { - const mockResponse = { - usage: { - promptTokens: 30, - // completionTokens and totalTokens missing - }, - }; - - const result = VercelProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 0, - input: 30, - output: 0, - }, - }); - }); - - it('supports v5 field names (inputTokens, outputTokens)', () => { - const mockResponse = { - usage: { - inputTokens: 40, - outputTokens: 60, - totalTokens: 100, - }, - }; - - const result = VercelProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 40, - output: 60, - }, - }); - }); - - it('prefers v5 field names over v4 when both are present', () => { - const mockResponse = { - usage: { - // v4 field names - promptTokens: 10, - completionTokens: 20, - // v5 field names (should be preferred) - inputTokens: 40, - outputTokens: 60, - totalTokens: 100, - }, - }; - - const result = VercelProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 40, // inputTokens preferred over promptTokens - output: 60, // outputTokens preferred over completionTokens - }, - }); - }); - }); - - describe('invokeModel', () => { - it('invokes Vercel AI generateText and returns response', async () => { - const mockResponse = { - text: 'Hello! How can I help you today?', - usage: { - promptTokens: 10, - completionTokens: 15, - totalTokens: 25, - }, - }; - - (generateText as jest.Mock).mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - - const result = await provider.invokeModel(messages); - - expect(generateText).toHaveBeenCalledWith({ - model: mockModel, - messages: [{ role: 'user', content: 'Hello!' }], - experimental_telemetry: { isEnabled: true }, - }); - - expect(result).toEqual({ - message: { - role: 'assistant', - content: 'Hello! How can I help you today?', - }, - metrics: { - success: true, - usage: { - total: 25, - input: 10, - output: 15, - }, - }, - }); - }); - - it('handles response without usage data', async () => { - const mockResponse = { - text: 'Hello! How can I help you today?', - }; - - (generateText as jest.Mock).mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - - const result = await provider.invokeModel(messages); - - expect(result).toEqual({ - message: { - role: 'assistant', - content: 'Hello! How can I help you today?', - }, - metrics: { - success: true, - usage: undefined, - }, - }); - }); - - it('handles errors and returns failure metrics', async () => { - const mockError = new Error('API call failed'); - (generateText as jest.Mock).mockRejectedValue(mockError); - - const mockLogger = { - warn: jest.fn(), - }; - provider = new VercelProvider(mockModel, {}, mockLogger as any); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - const result = await provider.invokeModel(messages); - - expect(mockLogger.warn).toHaveBeenCalledWith('Vercel AI model invocation failed:', mockError); - expect(result).toEqual({ - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - }, - }); - }); - }); - - describe('invokeStructuredModel', () => { - it('invokes Vercel AI generateObject and returns structured response', async () => { - const mockResponse = { - object: { - name: 'John Doe', - age: 30, - isActive: true, - }, - usage: { - promptTokens: 10, - completionTokens: 15, - totalTokens: 25, - }, - }; - - (generateObject as jest.Mock).mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Generate user data' }]; - const responseStructure = { - name: 'string', - age: 0, - isActive: true, - }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(generateObject).toHaveBeenCalledWith({ - model: mockModel, - messages: [{ role: 'user', content: 'Generate user data' }], - schema: responseStructure, - experimental_telemetry: { isEnabled: true }, - }); - expect(jsonSchema).toHaveBeenCalledWith(responseStructure); - - expect(result).toEqual({ - data: { - name: 'John Doe', - age: 30, - isActive: true, - }, - rawResponse: JSON.stringify({ - name: 'John Doe', - age: 30, - isActive: true, - }), - metrics: { - success: true, - usage: { - total: 25, - input: 10, - output: 15, - }, - }, - }); - }); - - it('handles structured response without usage data', async () => { - const mockResponse = { - object: { - result: 'success', - }, - }; - - (generateObject as jest.Mock).mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Generate result' }]; - const responseStructure = { - result: 'string', - }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(result).toEqual({ - data: { - result: 'success', - }, - rawResponse: JSON.stringify({ - result: 'success', - }), - metrics: { - success: true, - usage: undefined, - }, - }); - }); - - it('handles errors and returns failure metrics', async () => { - const mockError = new Error('API call failed'); - (generateObject as jest.Mock).mockRejectedValue(mockError); - - const mockLogger = { - warn: jest.fn(), - }; - provider = new VercelProvider(mockModel, {}, mockLogger as any); - - const messages = [{ role: 'user' as const, content: 'Generate result' }]; - const responseStructure = { - result: 'string', - }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(mockLogger.warn).toHaveBeenCalledWith( - 'Vercel AI structured model invocation failed:', - mockError, - ); - expect(result).toEqual({ - data: {}, - rawResponse: '', - metrics: { - success: false, - }, - }); - }); - }); - - describe('getModel', () => { - it('returns the underlying Vercel AI model', () => { - const model = provider.getModel(); - expect(model).toBe(mockModel); - }); - }); - - describe('createVercelModel', () => { - it('creates OpenAI model for openai provider', async () => { - const mockAiConfig = { - key: 'test-config', - model: { name: 'gpt-4', parameters: {} }, - provider: { name: 'openai' }, - enabled: true, - tracker: {} as any, - toVercelAISDK: jest.fn(), - }; - - // Mock the dynamic import - jest.doMock('@ai-sdk/openai', () => ({ - openai: jest.fn().mockReturnValue(mockModel), - })); - - const result = await VercelProvider.createVercelModel(mockAiConfig); - expect(result).toBe(mockModel); - }); - - it('throws error for unsupported provider', async () => { - const mockAiConfig = { - key: 'test-config', - model: { name: 'test-model', parameters: {} }, - provider: { name: 'unsupported' }, - enabled: true, - tracker: {} as any, - toVercelAISDK: jest.fn(), - }; - - await expect(VercelProvider.createVercelModel(mockAiConfig)).rejects.toThrow( - 'Unsupported Vercel AI provider: unsupported', - ); - }); - }); - - describe('create', () => { - it('creates VercelProvider with correct model and parameters', async () => { - const mockAiConfig = { - key: 'test-config', - model: { - name: 'gpt-4', - parameters: { - temperature: 0.7, - maxTokens: 1000, - }, - }, - provider: { name: 'openai' }, - enabled: true, - tracker: {} as any, - toVercelAISDK: jest.fn(), - }; - - // Mock the dynamic import - jest.doMock('@ai-sdk/openai', () => ({ - openai: jest.fn().mockReturnValue(mockModel), - })); - - const result = await VercelProvider.create(mockAiConfig); - - expect(result).toBeInstanceOf(VercelProvider); - expect(result.getModel()).toBeDefined(); - }); - }); - - describe('toVercelAISDK', () => { - const mockToVercelModel = { name: 'mockModel' }; - const mockMessages = [ - { role: 'user' as const, content: 'test prompt' }, - { role: 'system' as const, content: 'test instruction' }, - ]; - const mockOptions = { - nonInterpolatedMessages: [ - { role: 'assistant' as const, content: 'test assistant instruction' }, - ], - }; - const mockProvider = jest.fn().mockReturnValue(mockToVercelModel); - - beforeEach(() => { - jest.clearAllMocks(); - }); - - it('handles undefined model and messages', () => { - const aiConfig = { - key: 'test-config', - enabled: true, - }; - - const result = VercelProvider.toVercelAISDK(aiConfig, mockProvider); - - expect(mockProvider).toHaveBeenCalledWith(''); - expect(result).toEqual( - expect.objectContaining({ - model: mockToVercelModel, - messages: undefined, - }), - ); - }); - - it('uses additional messages', () => { - const aiConfig = { - key: 'test-config', - model: { name: 'test-ai-model' }, - enabled: true, - }; - - const result = VercelProvider.toVercelAISDK(aiConfig, mockProvider, mockOptions); - - expect(mockProvider).toHaveBeenCalledWith('test-ai-model'); - expect(result).toEqual( - expect.objectContaining({ - model: mockToVercelModel, - messages: mockOptions.nonInterpolatedMessages, - }), - ); - }); - - it('combines config messages and additional messages', () => { - const aiConfig = { - key: 'test-config', - model: { name: 'test-ai-model' }, - messages: mockMessages, - enabled: true, - }; - - const result = VercelProvider.toVercelAISDK(aiConfig, mockProvider, mockOptions); - - expect(mockProvider).toHaveBeenCalledWith('test-ai-model'); - expect(result).toEqual( - expect.objectContaining({ - model: mockToVercelModel, - messages: [...mockMessages, ...(mockOptions.nonInterpolatedMessages ?? [])], - }), - ); - }); - - it('maps parameters correctly', () => { - const aiConfig = { - key: 'test-config', - model: { - name: 'test-ai-model', - parameters: { - max_tokens: 100, - temperature: 0.7, - top_p: 0.9, - top_k: 50, - presence_penalty: 0.1, - frequency_penalty: 0.2, - stop: ['stop1', 'stop2'], - seed: 42, - }, - }, - messages: mockMessages, - enabled: true, - }; - - const result = VercelProvider.toVercelAISDK(aiConfig, mockProvider); - - expect(mockProvider).toHaveBeenCalledWith('test-ai-model'); - expect(result).toEqual({ - model: mockToVercelModel, - messages: mockMessages, - maxTokens: 100, - temperature: 0.7, - topP: 0.9, - topK: 50, - presencePenalty: 0.1, - frequencyPenalty: 0.2, - stopSequences: ['stop1', 'stop2'], - seed: 42, - }); - }); - - it('handles provider map with provider name', () => { - const providerMap = { - openai: jest.fn().mockReturnValue(mockToVercelModel), - anthropic: jest.fn().mockReturnValue({ name: 'other-model' }), - }; - - const aiConfig = { - key: 'test-config', - model: { name: 'test-ai-model' }, - provider: { name: 'openai' }, - enabled: true, - }; - - const result = VercelProvider.toVercelAISDK(aiConfig, providerMap); - - expect(providerMap.openai).toHaveBeenCalledWith('test-ai-model'); - expect(providerMap.anthropic).not.toHaveBeenCalled(); - expect(result.model).toBe(mockToVercelModel); - }); - - it('throws error when model cannot be determined', () => { - const aiConfig = { - key: 'test-config', - model: { name: 'test-ai-model' }, - provider: { name: 'unknown' }, - enabled: true, - }; - - const providerMap = { - openai: jest.fn().mockReturnValue(mockToVercelModel), - }; - - expect(() => VercelProvider.toVercelAISDK(aiConfig, providerMap)).toThrow( - 'Vercel AI SDK model cannot be determined from the supplied provider parameter.', - ); - }); - - it('throws error when function provider returns undefined', () => { - const aiConfig = { - key: 'test-config', - model: { name: 'test-ai-model' }, - enabled: true, - }; - - const undefinedProvider = jest.fn().mockReturnValue(undefined); - - expect(() => VercelProvider.toVercelAISDK(aiConfig, undefinedProvider)).toThrow( - 'Vercel AI SDK model cannot be determined from the supplied provider parameter.', - ); - }); - }); - - describe('getAIMetricsFromStream', () => { - it('extracts metrics from successful stream with usage', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - usage: Promise.resolve({ - totalTokens: 100, - promptTokens: 49, - completionTokens: 51, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 49, - output: 51, - }, - }); - }); - - it('extracts metrics using totalUsage when available', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - usage: Promise.resolve({ - totalTokens: 50, - promptTokens: 20, - completionTokens: 30, - }), - totalUsage: Promise.resolve({ - totalTokens: 100, - promptTokens: 49, - completionTokens: 51, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 49, - output: 51, - }, - }); - }); - - it('handles stream without usage data', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - - it('handles error finishReason', async () => { - const mockStream = { - finishReason: Promise.resolve('error'), - usage: Promise.resolve({ - totalTokens: 100, - promptTokens: 49, - completionTokens: 51, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: false, - usage: { - total: 100, - input: 49, - output: 51, - }, - }); - }); - - it('handles rejected finishReason promise', async () => { - const mockStream = { - finishReason: Promise.reject(new Error('API error')), - usage: Promise.resolve({ - totalTokens: 100, - promptTokens: 49, - completionTokens: 51, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: false, - usage: { - total: 100, - input: 49, - output: 51, - }, - }); - }); - - it('handles missing finishReason', async () => { - const mockStream = { - usage: Promise.resolve({ - totalTokens: 100, - promptTokens: 49, - completionTokens: 51, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - // When finishReason is missing, it defaults to 'unknown' which is !== 'error', so success is true - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 49, - output: 51, - }, - }); - }); - - it('handles missing finishReason and usage', async () => { - const mockStream = {}; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - // When finishReason is missing, it defaults to 'unknown' which is !== 'error', so success is true - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - - it('handles rejected usage promise gracefully', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - usage: Promise.reject(new Error('Usage API error')), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - - it('handles rejected totalUsage promise and falls back to usage', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - totalUsage: Promise.reject(new Error('TotalUsage API error')), - usage: Promise.resolve({ - totalTokens: 100, - promptTokens: 49, - completionTokens: 51, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 49, - output: 51, - }, - }); - }); - - it('handles rejected totalUsage and usage promises gracefully', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - totalUsage: Promise.reject(new Error('TotalUsage API error')), - usage: Promise.reject(new Error('Usage API error')), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - - it('supports v4 field names (promptTokens, completionTokens)', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - usage: Promise.resolve({ - totalTokens: 100, - promptTokens: 40, - completionTokens: 60, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 40, - output: 60, - }, - }); - }); - - it('supports v5 field names (inputTokens, outputTokens)', async () => { - const mockStream = { - finishReason: Promise.resolve('stop'), - usage: Promise.resolve({ - totalTokens: 100, - inputTokens: 40, - outputTokens: 60, - }), - }; - - const result = await VercelProvider.getAIMetricsFromStream(mockStream); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 40, - output: 60, - }, - }); - }); - }); -}); diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts new file mode 100644 index 0000000000..061d4cd455 --- /dev/null +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts @@ -0,0 +1,70 @@ +import { VercelModelRunner } from '../src/VercelModelRunner'; +import { VercelRunnerFactory } from '../src/VercelRunnerFactory'; + +describe('VercelRunnerFactory', () => { + describe('createModel', () => { + it('builds a VercelModelRunner with mapped parameters', async () => { + const fakeModel = { name: 'gpt-4o' }; + jest.doMock('@ai-sdk/openai', () => ({ + openai: jest.fn().mockReturnValue(fakeModel), + })); + + const factory = new VercelRunnerFactory(); + const runner = await factory.createModel({ + key: 'completion', + enabled: true, + provider: { name: 'openai' }, + model: { name: 'gpt-4o', parameters: { max_tokens: 100, temperature: 0.7 } }, + }); + + expect(runner).toBeInstanceOf(VercelModelRunner); + expect(runner.getModel()).toBe(fakeModel); + }); + }); + + describe('mapParameters', () => { + it('maps known LD parameters to Vercel AI SDK names', () => { + const params = VercelRunnerFactory.mapParameters({ + max_tokens: 100, + max_completion_tokens: 200, + temperature: 0.7, + top_p: 0.9, + top_k: 50, + presence_penalty: 0.1, + frequency_penalty: 0.2, + stop: ['x', 'y'], + seed: 42, + }); + + expect(params).toEqual({ + maxTokens: 100, + maxOutputTokens: 200, + temperature: 0.7, + topP: 0.9, + topK: 50, + presencePenalty: 0.1, + frequencyPenalty: 0.2, + stopSequences: ['x', 'y'], + seed: 42, + }); + }); + + it('returns an empty object when parameters is undefined', () => { + expect(VercelRunnerFactory.mapParameters(undefined)).toEqual({}); + }); + }); + + describe('createVercelModel', () => { + it('throws on an unsupported provider', async () => { + await expect( + VercelRunnerFactory.createVercelModel({ + key: 'k', + enabled: true, + provider: { name: 'unsupported' }, + model: { name: 'm' }, + }), + ).rejects.toThrow('Unsupported Vercel AI provider: unsupported'); + }); + }); + +}); diff --git a/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts b/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts new file mode 100644 index 0000000000..add9b5d9f0 --- /dev/null +++ b/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts @@ -0,0 +1,89 @@ +import type { LDAIMetrics, LDMessage, LDTokenUsage } from '@launchdarkly/server-sdk-ai'; + +import type { ModelUsageTokens, StreamResponse, TextResponse } from './types'; + +/** + * Convert LaunchDarkly messages to the Vercel AI SDK message format. + * + * The Vercel AI SDK accepts the same `{ role, content }` shape that LDMessage + * uses, so this helper currently performs a structural pass-through. + */ +export function convertMessagesToVercel(messages: LDMessage[]): LDMessage[] { + return messages.map((msg) => ({ role: msg.role, content: msg.content })); +} + +/** + * Map LaunchDarkly provider names to Vercel AI SDK provider identifiers. + */ +export function mapProviderName(ldProviderName: string): string { + const lowercasedName = ldProviderName.toLowerCase(); + const mapping: Record = { + gemini: 'google', + }; + return mapping[lowercasedName] || lowercasedName; +} + +/** + * Map Vercel AI SDK usage data to LaunchDarkly token usage. + * Supports both v4 (promptTokens/completionTokens) and v5 + * (inputTokens/outputTokens) field names. + */ +export function mapUsageDataToLDTokenUsage(usageData: ModelUsageTokens): LDTokenUsage { + const { totalTokens, inputTokens, outputTokens, promptTokens, completionTokens } = usageData; + return { + total: totalTokens ?? 0, + input: inputTokens ?? promptTokens ?? 0, + output: outputTokens ?? completionTokens ?? 0, + }; +} + +/** + * Get AI metrics from a Vercel AI SDK text response (e.g., generateText). + * Supports both v4 and v5 token field names. + */ +export function getAIMetricsFromResponse(response: TextResponse): LDAIMetrics { + const finishReason = response?.finishReason ?? 'unknown'; + + let usage: LDTokenUsage | undefined; + if (response?.totalUsage) { + usage = mapUsageDataToLDTokenUsage(response.totalUsage); + } else if (response?.usage) { + usage = mapUsageDataToLDTokenUsage(response.usage); + } + + return { + success: finishReason !== 'error', + usage, + }; +} + +/** + * Get AI metrics from a Vercel AI SDK streaming result. + * + * Awaits the stream's terminal promises and prefers `totalUsage` over + * `usage` for cumulative usage across all steps. + */ +export async function getAIMetricsFromStream(stream: StreamResponse): Promise { + const finishReason = (await stream.finishReason?.catch(() => 'error')) ?? 'unknown'; + + let usage: LDTokenUsage | undefined; + + if (stream.totalUsage) { + const usageData = await stream.totalUsage.catch(() => undefined); + if (usageData) { + usage = mapUsageDataToLDTokenUsage(usageData); + } + } + + if (!usage && stream.usage) { + const usageData = await stream.usage.catch(() => undefined); + if (usageData) { + usage = mapUsageDataToLDTokenUsage(usageData); + } + } + + return { + success: finishReason !== 'error', + usage, + }; +} diff --git a/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts b/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts new file mode 100644 index 0000000000..61293cf0c9 --- /dev/null +++ b/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts @@ -0,0 +1,116 @@ +import { generateObject, generateText, jsonSchema, LanguageModel } from 'ai'; + +import type { + LDAICompletionConfig, + LDLogger, + LDMessage, + Runner, + RunnerResult, +} from '@launchdarkly/server-sdk-ai'; + +import type { VercelAIModelParameters } from './types'; +import { convertMessagesToVercel, getAIMetricsFromResponse } from './VercelHelper'; + +/** + * Runner implementation for Vercel AI SDK chat models. + * + * Implements the unified `Runner` protocol via {@link run}. Returned by + * {@link VercelRunnerFactory.createModel}. + */ +export class VercelModelRunner implements Runner { + private _model: LanguageModel; + private _config: LDAICompletionConfig; + private _parameters: VercelAIModelParameters; + private _logger?: LDLogger; + + constructor( + model: LanguageModel, + config: LDAICompletionConfig, + parameters: VercelAIModelParameters, + logger?: LDLogger, + ) { + this._model = model; + this._config = config; + this._parameters = parameters; + this._logger = logger; + } + + /** + * Run the Vercel AI model with the given prompt. + * + * @param input The user prompt string, or a pre-built message array. When a + * string is supplied the config's system messages are prepended automatically. + * When a `LDMessage[]` is supplied it is used as-is (config messages are NOT + * prepended — the caller is responsible for the full message list). + * @param outputType Optional JSON schema for structured output. When provided, + * the parsed object is exposed via {@link RunnerResult.parsed}. + */ + async run(input: string | LDMessage[], outputType?: Record): Promise { + const messages: LDMessage[] = Array.isArray(input) + ? input + : [...(this._config.messages ?? []), { role: 'user', content: input }]; + + if (outputType !== undefined) { + return this._runStructured(messages, outputType); + } + return this._runCompletion(messages); + } + + /** + * Get the underlying Vercel AI model instance. + */ + getModel(): LanguageModel { + return this._model; + } + + private async _runCompletion(messages: LDMessage[]): Promise { + try { + const result = await generateText({ + ...this._parameters, + model: this._model, + messages: convertMessagesToVercel(messages), + experimental_telemetry: { isEnabled: true }, + }); + + const metrics = getAIMetricsFromResponse(result); + return { content: result.text, metrics, raw: result }; + } catch (error) { + this._logger?.warn('Vercel AI model invocation failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + } + + private async _runStructured( + messages: LDMessage[], + outputType: Record, + ): Promise { + try { + const result = await generateObject({ + ...this._parameters, + model: this._model, + messages: convertMessagesToVercel(messages), + schema: jsonSchema(outputType), + experimental_telemetry: { isEnabled: true }, + }); + + const metrics = getAIMetricsFromResponse(result); + const parsed = result.object as Record; + + return { + content: JSON.stringify(parsed), + metrics, + raw: result, + parsed, + }; + } catch (error) { + this._logger?.warn('Vercel AI structured model invocation failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + } +} diff --git a/packages/ai-providers/server-ai-vercel/src/VercelProvider.ts b/packages/ai-providers/server-ai-vercel/src/VercelProvider.ts deleted file mode 100644 index 0f98e8ece2..0000000000 --- a/packages/ai-providers/server-ai-vercel/src/VercelProvider.ts +++ /dev/null @@ -1,423 +0,0 @@ -import { generateObject, generateText, jsonSchema, LanguageModel } from 'ai'; - -import { AIProvider } from '@launchdarkly/server-sdk-ai'; -import type { - ChatResponse, - LDAIConfig, - LDAIMetrics, - LDLogger, - LDMessage, - LDTokenUsage, - StructuredResponse, -} from '@launchdarkly/server-sdk-ai'; - -import type { - ModelUsageTokens, - StreamResponse, - TextResponse, - VercelAIModelParameters, - VercelAISDKConfig, - VercelAISDKMapOptions, - VercelAISDKProvider, -} from './types'; - -/** - * Vercel AI implementation of AIProvider. - * This provider integrates Vercel AI SDK with LaunchDarkly's tracking capabilities. - */ -export class VercelProvider extends AIProvider { - private _model: LanguageModel; - private _parameters: VercelAIModelParameters; - - /** - * Constructor for the VercelProvider. - * @param model - The Vercel AI model to use. - * @param parameters - The Vercel AI model parameters. - * @param logger - The logger to use for the Vercel AI provider. - */ - constructor(model: LanguageModel, parameters: VercelAIModelParameters, logger?: LDLogger) { - super(logger); - this._model = model; - this._parameters = parameters; - } - - /** - * Static factory method to create a Vercel AIProvider from an AI configuration. - * This method auto-detects the provider and creates the model. - * Note: Messages from the AI config are not included in the provider - messages - * should be passed at invocation time via invokeModel(). - * - * @param aiConfig The LaunchDarkly AI configuration - * @param logger Optional logger - * @returns A Promise that resolves to a configured VercelProvider - */ - static async create(aiConfig: LDAIConfig, logger?: LDLogger): Promise { - const model = await VercelProvider.createVercelModel(aiConfig); - const parameters = VercelProvider.mapParameters(aiConfig.model?.parameters); - return new VercelProvider(model, parameters, logger); - } - - /** - * Invoke the Vercel AI model with an array of messages. - */ - async invokeModel(messages: LDMessage[]): Promise { - try { - const result = await generateText({ - ...this._parameters, - model: this._model, - messages, - experimental_telemetry: { isEnabled: true }, - }); - - // Create the assistant message - const assistantMessage: LDMessage = { - role: 'assistant', - content: result.text, - }; - - // Extract metrics including token usage and success status - const metrics = VercelProvider.getAIMetricsFromResponse(result); - - return { - message: assistantMessage, - metrics, - }; - } catch (error) { - this.logger?.warn('Vercel AI model invocation failed:', error); - - return { - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - }, - }; - } - } - - /** - * Invoke the Vercel AI model with structured output support. - */ - async invokeStructuredModel( - messages: LDMessage[], - responseStructure: Record, - ): Promise { - try { - const result = await generateObject({ - ...this._parameters, - model: this._model, - messages, - schema: jsonSchema(responseStructure), - experimental_telemetry: { isEnabled: true }, - }); - - const metrics = VercelProvider.createAIMetrics(result); - - return { - data: result.object as Record, - rawResponse: JSON.stringify(result.object), - metrics, - }; - } catch (error) { - this.logger?.warn('Vercel AI structured model invocation failed:', error); - - return { - data: {}, - rawResponse: '', - metrics: { - success: false, - }, - }; - } - } - - /** - * Get the underlying Vercel AI model instance. - */ - getModel(): LanguageModel { - return this._model; - } - - /** - * Map LaunchDarkly provider names to LangChain provider names. - * This method enables seamless integration between LaunchDarkly's standardized - * provider naming and LangChain's naming conventions. - */ - static mapProvider(ldProviderName: string): string { - const lowercasedName = ldProviderName.toLowerCase(); - - const mapping: Record = { - gemini: 'google', - }; - - return mapping[lowercasedName] || lowercasedName; - } - - /** - * Map Vercel AI SDK usage data to LaunchDarkly token usage. - * - * @param usageData Usage data from Vercel AI SDK - * @returns LDTokenUsage - */ - static mapUsageDataToLDTokenUsage(usageData: ModelUsageTokens): LDTokenUsage { - // Support v4 field names (promptTokens, completionTokens) for backward compatibility - const { totalTokens, inputTokens, outputTokens, promptTokens, completionTokens } = usageData; - return { - total: totalTokens ?? 0, - input: inputTokens ?? promptTokens ?? 0, - output: outputTokens ?? completionTokens ?? 0, - }; - } - - /** - * Get AI metrics from a Vercel AI SDK text response - * This method extracts token usage information and success status from Vercel AI responses - * and returns a LaunchDarkly AIMetrics object. - * Supports both v4 and v5 field names for backward compatibility. - * - * @param response The response from generateText() or similar non-streaming operations - * @returns LDAIMetrics with success status and token usage - * - * @example - * const tracker = aiConfig.createTracker(); - * const response = await tracker.trackMetricsOf( - * VercelProvider.getAIMetricsFromResponse, - * () => generateText(vercelConfig) - * ); - */ - static getAIMetricsFromResponse(response: TextResponse): LDAIMetrics { - const finishReason = response?.finishReason ?? 'unknown'; - - // favor totalUsage over usage for cumulative usage across all steps - let usage: LDTokenUsage | undefined; - if (response?.totalUsage) { - usage = VercelProvider.mapUsageDataToLDTokenUsage(response.totalUsage); - } else if (response?.usage) { - usage = VercelProvider.mapUsageDataToLDTokenUsage(response.usage); - } - - const success = finishReason !== 'error'; - - return { - success, - usage, - }; - } - - /** - * Create AI metrics information from a Vercel AI response. - * This method extracts token usage information and success status from Vercel AI responses - * and returns a LaunchDarkly AIMetrics object. - * Supports both v4 and v5 field names for backward compatibility. - * - * @deprecated Use `getAIMetricsFromResponse()` instead. - * @param vercelResponse The response from generateText() or similar non-streaming operations - * @returns LDAIMetrics with success status and token usage - */ - static createAIMetrics(vercelResponse: TextResponse): LDAIMetrics { - return VercelProvider.getAIMetricsFromResponse(vercelResponse); - } - - /** - * Get AI metrics from a Vercel AI SDK streaming result. - * - * This method waits for the stream to complete, then extracts metrics using totalUsage - * (preferred for cumulative usage across all steps) or usage if totalUsage is unavailable. - * - * @param stream The stream result from streamText() - * @returns A Promise that resolves to LDAIMetrics - * - * @example - * const tracker = aiConfig.createTracker(); - * const stream = tracker.trackStreamMetricsOf( - * () => streamText(vercelConfig), - * VercelProvider.getAIMetricsFromStream - * ); - */ - static async getAIMetricsFromStream(stream: StreamResponse): Promise { - const finishReason = (await stream.finishReason?.catch(() => 'error')) ?? 'unknown'; - - // favor totalUsage over usage for cumulative usage across all steps - let usage: LDTokenUsage | undefined; - - if (stream.totalUsage) { - const usageData = await stream.totalUsage.catch(() => undefined); - if (usageData) { - usage = VercelProvider.mapUsageDataToLDTokenUsage(usageData); - } - } - - if (!usage && stream.usage) { - const usageData = await stream.usage.catch(() => undefined); - if (usageData) { - usage = VercelProvider.mapUsageDataToLDTokenUsage(usageData); - } - } - - const success = finishReason !== 'error'; - - return { - success, - usage, - }; - } - - /** - * Map LaunchDarkly model parameters to Vercel AI SDK parameters. - * - * Parameter mappings: - * - max_tokens → maxTokens - * - max_completion_tokens → maxOutputTokens - * - temperature → temperature - * - top_p → topP - * - top_k → topK - * - presence_penalty → presencePenalty - * - frequency_penalty → frequencyPenalty - * - stop → stopSequences - * - seed → seed - * - * @param parameters The LaunchDarkly model parameters to map - * @returns An object containing mapped Vercel AI SDK parameters - */ - static mapParameters(parameters?: { [index: string]: unknown }): VercelAIModelParameters { - if (!parameters) { - return {}; - } - - const params: VercelAIModelParameters = {}; - - if (parameters.max_tokens !== undefined) { - params.maxTokens = parameters.max_tokens as number; - } - if (parameters.max_completion_tokens !== undefined) { - params.maxOutputTokens = parameters.max_completion_tokens as number; - } - if (parameters.temperature !== undefined) { - params.temperature = parameters.temperature as number; - } - if (parameters.top_p !== undefined) { - params.topP = parameters.top_p as number; - } - if (parameters.top_k !== undefined) { - params.topK = parameters.top_k as number; - } - if (parameters.presence_penalty !== undefined) { - params.presencePenalty = parameters.presence_penalty as number; - } - if (parameters.frequency_penalty !== undefined) { - params.frequencyPenalty = parameters.frequency_penalty as number; - } - if (parameters.stop !== undefined) { - params.stopSequences = parameters.stop as string[]; - } - if (parameters.seed !== undefined) { - params.seed = parameters.seed as number; - } - - return params; - } - - /** - * Convert an AI configuration to Vercel AI SDK parameters. - * This static method allows converting an LDAIConfig to VercelAISDKConfig without - * requiring an instance of VercelProvider. - * - * @param aiConfig The LaunchDarkly AI configuration - * @param provider A Vercel AI SDK Provider or a map of provider names to Vercel AI SDK Providers - * @param options Optional mapping options - * @returns A configuration directly usable in Vercel AI SDK generateText() and streamText() - * @throws {Error} if a Vercel AI SDK model cannot be determined from the given provider parameter - */ - static toVercelAISDK( - aiConfig: LDAIConfig, - provider: VercelAISDKProvider | Record>, - options?: VercelAISDKMapOptions | undefined, - ): VercelAISDKConfig { - // Determine the model from the provider - let model: TMod | undefined; - if (typeof provider === 'function') { - model = provider(aiConfig.model?.name ?? ''); - } else { - model = provider[aiConfig.provider?.name ?? '']?.(aiConfig.model?.name ?? ''); - } - if (!model) { - throw new Error( - 'Vercel AI SDK model cannot be determined from the supplied provider parameter.', - ); - } - - // Merge messages from config and options - let messages: LDMessage[] | undefined; - const configMessages = ('messages' in aiConfig ? aiConfig.messages : undefined) as - | LDMessage[] - | undefined; - if (configMessages || options?.nonInterpolatedMessages) { - messages = [...(configMessages ?? []), ...(options?.nonInterpolatedMessages ?? [])]; - } - - // Map parameters using the shared mapping method - const params = VercelProvider.mapParameters(aiConfig.model?.parameters); - - // Build and return the Vercel AI SDK configuration - return { - model, - messages, - ...params, - }; - } - - /** - * Create a Vercel AI model from an AI configuration. - * This method auto-detects the provider and creates the model instance. - * - * @param aiConfig The LaunchDarkly AI configuration - * @returns A Promise that resolves to a configured Vercel AI model - */ - static async createVercelModel(aiConfig: LDAIConfig): Promise { - const providerName = VercelProvider.mapProvider(aiConfig.provider?.name || ''); - const modelName = aiConfig.model?.name || ''; - - // Map provider names to their corresponding Vercel AI SDK imports - switch (providerName) { - case 'openai': - try { - const { openai } = await import('@ai-sdk/openai'); - return openai(modelName); - } catch (error) { - throw new Error(`Failed to load @ai-sdk/openai: ${error}`); - } - case 'anthropic': - try { - const { anthropic } = await import('@ai-sdk/anthropic'); - return anthropic(modelName); - } catch (error) { - throw new Error(`Failed to load @ai-sdk/anthropic: ${error}`); - } - case 'google': - try { - const { google } = await import('@ai-sdk/google'); - return google(modelName); - } catch (error) { - throw new Error(`Failed to load @ai-sdk/google: ${error}`); - } - case 'cohere': - try { - const { cohere } = await import('@ai-sdk/cohere'); - return cohere(modelName); - } catch (error) { - throw new Error(`Failed to load @ai-sdk/cohere: ${error}`); - } - case 'mistral': - try { - const { mistral } = await import('@ai-sdk/mistral'); - return mistral(modelName); - } catch (error) { - throw new Error(`Failed to load @ai-sdk/mistral: ${error}`); - } - default: - throw new Error(`Unsupported Vercel AI provider: ${providerName}`); - } - } -} diff --git a/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts b/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts new file mode 100644 index 0000000000..0510900e3d --- /dev/null +++ b/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts @@ -0,0 +1,131 @@ +import { LanguageModel } from 'ai'; + +import { AIProvider } from '@launchdarkly/server-sdk-ai'; +import type { LDAICompletionConfig, LDAIConfig, LDLogger } from '@launchdarkly/server-sdk-ai'; + +import type { VercelAIModelParameters } from './types'; +import { mapProviderName } from './VercelHelper'; +import { VercelModelRunner } from './VercelModelRunner'; + +/** + * Factory for creating Vercel AI runners. + * + * Vercel ships only a model runner; agent and graph runners are not provided + * because the Vercel AI SDK is a thin model layer rather than an agent + * framework. + */ +export class VercelRunnerFactory extends AIProvider { + constructor(logger?: LDLogger) { + super(logger); + } + + /** + * Create a model runner from a completion AI configuration. + */ + async createModel(config: LDAICompletionConfig): Promise { + const model = await VercelRunnerFactory.createVercelModel(config); + const parameters = VercelRunnerFactory.mapParameters(config.model?.parameters); + return new VercelModelRunner(model, config, parameters, this.logger); + } + + /** + * Create a Vercel AI model from an AI configuration. + * This method auto-detects the provider and creates the model instance. + */ + static async createVercelModel(aiConfig: LDAIConfig): Promise { + const providerName = mapProviderName(aiConfig.provider?.name || ''); + const modelName = aiConfig.model?.name || ''; + + switch (providerName) { + case 'openai': + try { + const { openai } = await import('@ai-sdk/openai'); + return openai(modelName); + } catch (error) { + throw new Error(`Failed to load @ai-sdk/openai: ${error}`); + } + case 'anthropic': + try { + const { anthropic } = await import('@ai-sdk/anthropic'); + return anthropic(modelName); + } catch (error) { + throw new Error(`Failed to load @ai-sdk/anthropic: ${error}`); + } + case 'google': + try { + const { google } = await import('@ai-sdk/google'); + return google(modelName); + } catch (error) { + throw new Error(`Failed to load @ai-sdk/google: ${error}`); + } + case 'cohere': + try { + const { cohere } = await import('@ai-sdk/cohere'); + return cohere(modelName); + } catch (error) { + throw new Error(`Failed to load @ai-sdk/cohere: ${error}`); + } + case 'mistral': + try { + const { mistral } = await import('@ai-sdk/mistral'); + return mistral(modelName); + } catch (error) { + throw new Error(`Failed to load @ai-sdk/mistral: ${error}`); + } + default: + throw new Error(`Unsupported Vercel AI provider: ${providerName}`); + } + } + + /** + * Map LaunchDarkly model parameters to Vercel AI SDK parameters. + * + * Parameter mappings: + * - max_tokens -> maxTokens + * - max_completion_tokens -> maxOutputTokens + * - temperature -> temperature + * - top_p -> topP + * - top_k -> topK + * - presence_penalty -> presencePenalty + * - frequency_penalty -> frequencyPenalty + * - stop -> stopSequences + * - seed -> seed + */ + static mapParameters(parameters?: { [index: string]: unknown }): VercelAIModelParameters { + if (!parameters) { + return {}; + } + + const params: VercelAIModelParameters = {}; + + if (parameters.max_tokens !== undefined) { + params.maxTokens = parameters.max_tokens as number; + } + if (parameters.max_completion_tokens !== undefined) { + params.maxOutputTokens = parameters.max_completion_tokens as number; + } + if (parameters.temperature !== undefined) { + params.temperature = parameters.temperature as number; + } + if (parameters.top_p !== undefined) { + params.topP = parameters.top_p as number; + } + if (parameters.top_k !== undefined) { + params.topK = parameters.top_k as number; + } + if (parameters.presence_penalty !== undefined) { + params.presencePenalty = parameters.presence_penalty as number; + } + if (parameters.frequency_penalty !== undefined) { + params.frequencyPenalty = parameters.frequency_penalty as number; + } + if (parameters.stop !== undefined) { + params.stopSequences = parameters.stop as string[]; + } + if (parameters.seed !== undefined) { + params.seed = parameters.seed as number; + } + + return params; + } +} diff --git a/packages/ai-providers/server-ai-vercel/src/index.ts b/packages/ai-providers/server-ai-vercel/src/index.ts index 6e7eb55023..6da7e3730b 100644 --- a/packages/ai-providers/server-ai-vercel/src/index.ts +++ b/packages/ai-providers/server-ai-vercel/src/index.ts @@ -1,4 +1,12 @@ -export { VercelProvider } from './VercelProvider'; +export { VercelModelRunner } from './VercelModelRunner'; +export { VercelRunnerFactory } from './VercelRunnerFactory'; +export { + convertMessagesToVercel, + getAIMetricsFromResponse, + getAIMetricsFromStream, + mapProviderName, + mapUsageDataToLDTokenUsage, +} from './VercelHelper'; export type { VercelAIModelParameters, VercelAISDKConfig, From 6dbae78081867caeae4c94e3d9c9224e6c91c50f Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 5 May 2026 18:42:21 -0500 Subject: [PATCH 09/19] chore: skip tracking judge results that were not sampled (#1355) --- packages/sdk/server-ai/src/api/ManagedAgent.ts | 3 +++ packages/sdk/server-ai/src/api/ManagedModel.ts | 3 +++ 2 files changed, 6 insertions(+) diff --git a/packages/sdk/server-ai/src/api/ManagedAgent.ts b/packages/sdk/server-ai/src/api/ManagedAgent.ts index 0ce5eb5803..b4278b889f 100644 --- a/packages/sdk/server-ai/src/api/ManagedAgent.ts +++ b/packages/sdk/server-ai/src/api/ManagedAgent.ts @@ -46,6 +46,9 @@ export class ManagedAgent { .evaluate(prompt, output) .then((results) => { results.forEach((judgeResult) => { + if (!judgeResult.sampled) { + return; + } tracker.trackJudgeResult(judgeResult); }); return results; diff --git a/packages/sdk/server-ai/src/api/ManagedModel.ts b/packages/sdk/server-ai/src/api/ManagedModel.ts index 5d6860c443..a8653025db 100644 --- a/packages/sdk/server-ai/src/api/ManagedModel.ts +++ b/packages/sdk/server-ai/src/api/ManagedModel.ts @@ -46,6 +46,9 @@ export class ManagedModel { .evaluate(prompt, output) .then((results) => { results.forEach((judgeResult) => { + if (!judgeResult.sampled) { + return; + } tracker.trackJudgeResult(judgeResult); }); return results; From a8e7bbb50d983d87865665ff94b8847a465759a1 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Wed, 6 May 2026 13:43:25 -0500 Subject: [PATCH 10/19] chore: Add managed-agent example to server-sdk-ai (#1358) --- package.json | 1 + .../examples/managed-agent/README.md | 49 ++++++++++++ .../examples/managed-agent/package.json | 29 +++++++ .../examples/managed-agent/src/index.ts | 75 +++++++++++++++++++ .../examples/managed-agent/tsconfig.json | 18 +++++ .../server-ai/examples/vercel-ai/src/index.ts | 34 ++++----- release-please-config.json | 25 +++++++ 7 files changed, 213 insertions(+), 18 deletions(-) create mode 100644 packages/sdk/server-ai/examples/managed-agent/README.md create mode 100644 packages/sdk/server-ai/examples/managed-agent/package.json create mode 100644 packages/sdk/server-ai/examples/managed-agent/src/index.ts create mode 100644 packages/sdk/server-ai/examples/managed-agent/tsconfig.json diff --git a/package.json b/package.json index 0cfc4ff14e..634ff65bb8 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "packages/sdk/server-ai/examples/chat-judge", "packages/sdk/server-ai/examples/direct-judge", "packages/sdk/server-ai/examples/openai", + "packages/sdk/server-ai/examples/managed-agent", "packages/sdk/server-ai/examples/tracked-chat", "packages/sdk/server-ai/examples/chat-observability", "packages/sdk/server-ai/examples/openai-observability", diff --git a/packages/sdk/server-ai/examples/managed-agent/README.md b/packages/sdk/server-ai/examples/managed-agent/README.md new file mode 100644 index 0000000000..35f71d461a --- /dev/null +++ b/packages/sdk/server-ai/examples/managed-agent/README.md @@ -0,0 +1,49 @@ +# Managed Agent Example + +This example demonstrates how to use the LaunchDarkly AI SDK agent functionality with multiple providers for managed agent interactions. + +## Prerequisites + +1. A LaunchDarkly account and SDK key +1. An OpenAI API key (for the AI provider) +1. Node.js 16 or later + +## Setup + +1. Install dependencies: + ```bash + yarn install + ``` + +1. Set up environment variables: + ```bash + cp .env.example .env + ``` + + Edit `.env` and add your keys: + ``` + LAUNCHDARKLY_SDK_KEY=your-sdk-key-here + OPENAI_API_KEY=your-openai-api-key-here + LAUNCHDARKLY_AI_CONFIG_KEY=sample-ai-agent-config + ``` + +1. Create an AI Config in LaunchDarkly: + - Navigate to the AI Configs section in your LaunchDarkly dashboard + - Create a new AI Config with the key `sample-agent-config` and the **Agent** mode + - Add a variation with the following settings: + - **Model Selection**: Select "OpenAI" as the provider and "gpt-3.5-turbo" as the model + - **Instructions**: "You are a helpful assistant for {{companyName}}. You should be friendly and informative." + - Save the variation + - Update the default target rule to use the newly created variation + +## Running the Example + +```bash +yarn start +``` + +This will: +1. Initialize the LaunchDarkly client +1. Create an agent using the AI Config +1. Send a prompt to the agent and display the response +1. Automatically track interaction metrics (duration, tokens, success/error) diff --git a/packages/sdk/server-ai/examples/managed-agent/package.json b/packages/sdk/server-ai/examples/managed-agent/package.json new file mode 100644 index 0000000000..f94c314e91 --- /dev/null +++ b/packages/sdk/server-ai/examples/managed-agent/package.json @@ -0,0 +1,29 @@ +{ + "name": "@launchdarkly/managed-agent-example", + "private": true, + "version": "1.0.0", + "description": "Example demonstrating LaunchDarkly AI SDK agent functionality with multiple providers", + "type": "module", + "scripts": { + "build": "tsc", + "start": "yarn build && node ./dist/index.js" + }, + "dependencies": { + "@ai-sdk/google": "^2.0.20", + "@langchain/core": "^1.1.42", + "@langchain/google-genai": "^1.0.3", + "@langchain/openai": "^0.5.0", + "@launchdarkly/node-server-sdk": "9.10.13", + "@launchdarkly/server-sdk-ai": "0.18.1", + "@launchdarkly/server-sdk-ai-langchain": "0.6.1", + "@launchdarkly/server-sdk-ai-openai": "0.5.8", + "@launchdarkly/server-sdk-ai-vercel": "0.5.8", + "dotenv": "^16.0.0", + "langchain": "^1.3.5" + }, + "devDependencies": { + "@types/node": "^20.0.0", + "tsx": "^4.0.0", + "typescript": "^5.0.0" + } +} diff --git a/packages/sdk/server-ai/examples/managed-agent/src/index.ts b/packages/sdk/server-ai/examples/managed-agent/src/index.ts new file mode 100644 index 0000000000..18fcd7484a --- /dev/null +++ b/packages/sdk/server-ai/examples/managed-agent/src/index.ts @@ -0,0 +1,75 @@ +/* eslint-disable no-console */ +import 'dotenv/config'; + +import { init, type LDContext } from '@launchdarkly/node-server-sdk'; +import { initAi } from '@launchdarkly/server-sdk-ai'; + +// Environment variables +const sdkKey = process.env.LAUNCHDARKLY_SDK_KEY; +const aiConfigKey = process.env.LAUNCHDARKLY_AI_CONFIG_KEY || 'sample-agent-config'; + +// Validate required environment variables +if (!sdkKey) { + console.error('*** Please set the LAUNCHDARKLY_SDK_KEY env first'); + process.exit(1); +} + +// Initialize LaunchDarkly client +const ldClient = init(sdkKey); + +// Set up the context properties. This context should appear on your LaunchDarkly contexts dashboard +// soon after you run the demo. +const context: LDContext = { + kind: 'user', + key: 'example-user-key', + name: 'Sandy', +}; + +async function main() { + try { + await ldClient.waitForInitialization({ timeout: 10 }); + console.log('*** SDK successfully initialized'); + } catch (error) { + console.log(`*** SDK failed to initialize: ${error}`); + process.exit(1); + } + + const aiClient = initAi(ldClient); + + // Get AI agent configuration from LaunchDarkly. + // + // Pass a defaultValue for improved resiliency when the flag is unavailable or LaunchDarkly is unreachable; omit for a disabled default. + // Example: + // const defaultValue = { + // enabled: true, + // model: { name: 'gpt-4' }, + // provider: { name: 'openai' }, + // instructions: 'You are a helpful research assistant for {{companyName}}.' + // }; + // const agent = await aiClient.createAgent(aiConfigKey, context, defaultValue, { companyName: 'LaunchDarkly' }); + const agent = await aiClient.createAgent(aiConfigKey, context, undefined, { + companyName: 'LaunchDarkly', + }); + + if (!agent) { + console.log('*** AI agent configuration is not enabled'); + process.exit(0); + } + + // Example of using the agent functionality + console.log('\n*** Starting agent invocation:'); + try { + const userInput = 'Hello! Can you help me understand how your company can help me?'; + console.log('User Input:', userInput); + + const result = await agent.run(userInput); + + console.log('AI Response:', result.content); + + console.log('Success.'); + } catch (err) { + console.error('Error:', err); + } +} + +main(); diff --git a/packages/sdk/server-ai/examples/managed-agent/tsconfig.json b/packages/sdk/server-ai/examples/managed-agent/tsconfig.json new file mode 100644 index 0000000000..6916599c7d --- /dev/null +++ b/packages/sdk/server-ai/examples/managed-agent/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "node", + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "outDir": "./dist", + "rootDir": "./src", + "declaration": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts index ec14fe9157..88f8909911 100644 --- a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts +++ b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts @@ -4,7 +4,12 @@ import { generateText, streamText } from 'ai'; import { init, type LDClient, type LDContext } from '@launchdarkly/node-server-sdk'; import { initAi } from '@launchdarkly/server-sdk-ai'; -import { VercelProvider } from '@launchdarkly/server-sdk-ai-vercel'; +import { + convertMessagesToVercel, + getAIMetricsFromResponse, + getAIMetricsFromStream, + VercelRunnerFactory, +} from '@launchdarkly/server-sdk-ai-vercel'; // Environment variables const sdkKey = process.env.LAUNCHDARKLY_SDK_KEY ?? ''; @@ -59,24 +64,22 @@ async function main() { console.log('Using model:', aiConfig.model?.name); + const model = openai(aiConfig.model?.name || 'gpt-4'); + const parameters = VercelRunnerFactory.mapParameters(aiConfig.model?.parameters); + try { const userMessage = { role: 'user' as const, content: 'What can you help me with?', }; - // Example of using generateText (non-streaming) console.log('\n*** Generating text:'); - // Convert config to Vercel AI SDK format - const vercelConfig = VercelProvider.toVercelAISDK(aiConfig, openai, { - nonInterpolatedMessages: [userMessage], - }); + const messages = convertMessagesToVercel([...(aiConfig.messages || []), userMessage]); - // Call the model and track metrics for the ai config const tracker = aiConfig.createTracker!(); - const result = await tracker.trackMetricsOf(VercelProvider.getAIMetricsFromResponse, () => - generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), + const result = await tracker.trackMetricsOf(getAIMetricsFromResponse, () => + generateText({ ...parameters, model, messages }), ); console.log('Response:', result.text); @@ -91,21 +94,16 @@ async function main() { content: 'Count from 1 to 5.', }; - // Example of using generateText (non-streaming) console.log('\n*** Streaming text:'); - // Convert config to Vercel AI SDK format - const vercelConfig = VercelProvider.toVercelAISDK(aiConfig, openai, { - nonInterpolatedMessages: [userMessage], - }); - // Stream is returned immediately (synchronously), metrics tracked in background + const messages = convertMessagesToVercel([...(aiConfig.messages || []), userMessage]); + const streamTracker = aiConfig.createTracker!(); const streamResult = streamTracker.trackStreamMetricsOf( - () => streamText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), - VercelProvider.getAIMetricsFromStream, + () => streamText({ ...parameters, model, messages }), + getAIMetricsFromStream, ); - // Consume the stream immediately - no await needed before this! for await (const textPart of streamResult.textStream) { process.stdout.write(textPart); } diff --git a/release-please-config.json b/release-please-config.json index f7ed3327a0..8d1ce10673 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -9,6 +9,11 @@ "path": "/packages/sdk/server-ai/examples/tracked-chat/package.json", "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai-langchain']" }, + { + "type": "json", + "path": "/packages/sdk/server-ai/examples/managed-agent/package.json", + "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai-langchain']" + }, { "type": "json", "path": "/packages/sdk/server-ai/examples/chat-judge/package.json", @@ -35,6 +40,11 @@ "path": "/packages/sdk/server-ai/examples/tracked-chat/package.json", "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai-vercel']" }, + { + "type": "json", + "path": "/packages/sdk/server-ai/examples/managed-agent/package.json", + "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai-vercel']" + }, { "type": "json", "path": "/packages/sdk/server-ai/examples/chat-judge/package.json", @@ -61,6 +71,11 @@ "path": "/packages/sdk/server-ai/examples/tracked-chat/package.json", "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai-openai']" }, + { + "type": "json", + "path": "/packages/sdk/server-ai/examples/managed-agent/package.json", + "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai-openai']" + }, { "type": "json", "path": "/packages/sdk/server-ai/examples/chat-judge/package.json", @@ -158,6 +173,11 @@ "path": "/packages/sdk/server-ai/examples/tracked-chat/package.json", "jsonpath": "$.dependencies['@launchdarkly/node-server-sdk']" }, + { + "type": "json", + "path": "/packages/sdk/server-ai/examples/managed-agent/package.json", + "jsonpath": "$.dependencies['@launchdarkly/node-server-sdk']" + }, { "type": "json", "path": "/packages/sdk/server-ai/examples/chat-judge/package.json", @@ -252,6 +272,11 @@ "path": "examples/tracked-chat/package.json", "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai']" }, + { + "type": "json", + "path": "examples/managed-agent/package.json", + "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai']" + }, { "type": "json", "path": "examples/chat-judge/package.json", From 0ab910c593d9fae234c109882a83728cc43d8f2a Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Wed, 6 May 2026 16:57:18 -0500 Subject: [PATCH 11/19] feat: Replace OpenAIProvider with Runner protocol implementation (AIC-2388) (#1337) --- .../__tests__/OpenAIAgentRunner.test.ts | 101 +++++ .../__tests__/OpenAIHelper.test.ts | 163 ++++++++ .../__tests__/OpenAIModelRunner.test.ts | 172 ++++++++ .../__tests__/OpenAIProvider.test.ts | 383 ------------------ .../__tests__/OpenAIRunnerFactory.test.ts | 160 ++++++++ .../server-ai-openai/package.json | 7 +- .../server-ai-openai/src/OpenAIAgentRunner.ts | 84 ++++ .../server-ai-openai/src/OpenAIHelper.ts | 208 ++++++++++ .../server-ai-openai/src/OpenAIModelRunner.ts | 134 ++++++ .../server-ai-openai/src/OpenAIProvider.ts | 250 ------------ .../src/OpenAIRunnerFactory.ts | 113 ++++++ .../server-ai-openai/src/index.ts | 15 +- .../examples/managed-agent/package.json | 5 +- .../openai-observability/src/index.ts | 4 +- .../server-ai/examples/openai/src/index.ts | 4 +- 15 files changed, 1163 insertions(+), 640 deletions(-) create mode 100644 packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts create mode 100644 packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts create mode 100644 packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts delete mode 100644 packages/ai-providers/server-ai-openai/__tests__/OpenAIProvider.test.ts create mode 100644 packages/ai-providers/server-ai-openai/__tests__/OpenAIRunnerFactory.test.ts create mode 100644 packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts create mode 100644 packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts create mode 100644 packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts delete mode 100644 packages/ai-providers/server-ai-openai/src/OpenAIProvider.ts create mode 100644 packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts new file mode 100644 index 0000000000..4381b55b89 --- /dev/null +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts @@ -0,0 +1,101 @@ +import type { LDAIAgentConfig } from '@launchdarkly/server-sdk-ai'; + +import { OpenAIAgentRunner } from '../src/OpenAIAgentRunner'; + +const mockRun = jest.fn(); + +function makeRunResult(overrides: Record = {}) { + return { + finalOutput: overrides.finalOutput ?? '', + newItems: overrides.newItems ?? [], + runContext: { + usage: overrides.usage ?? { totalTokens: 0, inputTokens: 0, outputTokens: 0 }, + }, + ...overrides, + }; +} + +describe('OpenAIAgentRunner', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('returns content with no toolCalls when the model does not invoke tools', async () => { + mockRun.mockResolvedValue( + makeRunResult({ + finalOutput: 'Done', + usage: { totalTokens: 12, inputTokens: 8, outputTokens: 4 }, + }), + ); + + const runner = new OpenAIAgentRunner({}, mockRun, {}); + const result = await runner.run('Say done'); + + expect(result.content).toBe('Done'); + expect(result.metrics.success).toBe(true); + expect(result.metrics.toolCalls).toBeUndefined(); + expect(result.metrics.usage).toEqual({ total: 12, input: 8, output: 4 }); + }); + + it('reports tool calls from newItems with LD config name mapping', async () => { + mockRun.mockResolvedValue( + makeRunResult({ + finalOutput: 'The answer is 42.', + newItems: [ + { + type: 'tool_call_item', + rawItem: { type: 'function_call', name: 'lookup' }, + agent: { name: 'ldai-agent' }, + }, + ], + usage: { totalTokens: 28, inputTokens: 16, outputTokens: 12 }, + }), + ); + + const runner = new OpenAIAgentRunner({}, mockRun, { lookup: 'lookup' }); + const result = await runner.run('Look up 42'); + + expect(result.content).toBe('The answer is 42.'); + expect(result.metrics.toolCalls).toEqual(['lookup']); + expect(result.metrics.usage).toEqual({ total: 28, input: 16, output: 12 }); + }); + + it('returns an unsuccessful RunnerResult when the agent run throws', async () => { + mockRun.mockRejectedValue(new Error('boom')); + + const runner = new OpenAIAgentRunner({}, mockRun, {}); + const result = await runner.run('Hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(false); + }); + + it('calls run with maxTurns of 25', async () => { + mockRun.mockResolvedValue(makeRunResult({ finalOutput: 'ok' })); + + const agent = { name: 'test-agent' }; + const runner = new OpenAIAgentRunner(agent, mockRun, {}); + await runner.run('test'); + + expect(mockRun).toHaveBeenCalledWith( + agent, + 'test', + expect.objectContaining({ maxTurns: 25 }), + ); + }); + + it('reuses the same Agent across multiple run() calls', async () => { + mockRun.mockResolvedValue(makeRunResult({ finalOutput: 'ok' })); + + const agent = { name: 'test-agent' }; + const runner = new OpenAIAgentRunner(agent, mockRun, {}); + await runner.run('first'); + await runner.run('second'); + await runner.run('third'); + + expect(mockRun).toHaveBeenCalledTimes(3); + expect(mockRun.mock.calls[0][0]).toBe(agent); + expect(mockRun.mock.calls[1][0]).toBe(agent); + expect(mockRun.mock.calls[2][0]).toBe(agent); + }); +}); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts new file mode 100644 index 0000000000..521b5561fc --- /dev/null +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts @@ -0,0 +1,163 @@ +import { + convertMessagesToOpenAI, + getAIMetricsFromResponse, + getAIUsageFromAgentResult, + getAIUsageFromResponse, + getToolCallsFromRunItems, + isAgentToolInstance, + registryValueToAgentTool, +} from '../src/OpenAIHelper'; + +it('converts LDMessages to OpenAI message dicts preserving role and content', () => { + const messages = convertMessagesToOpenAI([ + { role: 'system', content: 'You are X' }, + { role: 'user', content: 'Hi' }, + { role: 'assistant', content: 'Hello' }, + ]); + + expect(messages).toEqual([ + { role: 'system', content: 'You are X' }, + { role: 'user', content: 'Hi' }, + { role: 'assistant', content: 'Hello' }, + ]); +}); + +it('returns undefined when usage is missing from response', () => { + expect(getAIUsageFromResponse({})).toBeUndefined(); +}); + +it('maps OpenAI prompt/completion/total token fields to LDTokenUsage', () => { + const usage = getAIUsageFromResponse({ + usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 }, + }); + + expect(usage).toEqual({ total: 15, input: 5, output: 10 }); +}); + +it('returns success=true with usage extracted from the response', () => { + const metrics = getAIMetricsFromResponse({ + usage: { prompt_tokens: 1, completion_tokens: 2, total_tokens: 3 }, + }); + + expect(metrics).toEqual({ + success: true, + usage: { total: 3, input: 1, output: 2 }, + }); +}); + +it('returns undefined when runContext.usage is missing', () => { + expect(getAIUsageFromAgentResult({ runContext: {} })).toBeUndefined(); +}); + +it('returns undefined when all token counts are zero', () => { + const result = { + runContext: { usage: { totalTokens: 0, inputTokens: 0, outputTokens: 0 } }, + }; + expect(getAIUsageFromAgentResult(result)).toBeUndefined(); +}); + +it('extracts token usage from runContext.usage', () => { + const result = { + runContext: { usage: { totalTokens: 30, inputTokens: 20, outputTokens: 10 } }, + }; + expect(getAIUsageFromAgentResult(result)).toEqual({ total: 30, input: 20, output: 10 }); +}); + +it('returns undefined on malformed agent result input without throwing', () => { + expect(getAIUsageFromAgentResult(null)).toBeUndefined(); + expect(getAIUsageFromAgentResult({})).toBeUndefined(); +}); + +it('extracts function_call names from tool_call_items', () => { + const items = [ + { type: 'tool_call_item', rawItem: { type: 'function_call', name: 'lookup' } }, + { type: 'tool_call_item', rawItem: { type: 'function_call', name: 'save' } }, + ]; + expect(getToolCallsFromRunItems(items)).toEqual(['lookup', 'save']); +}); + +it('extracts hosted_tool_call names from run items', () => { + const items = [ + { type: 'tool_call_item', rawItem: { type: 'hosted_tool_call', name: 'web_search' } }, + ]; + expect(getToolCallsFromRunItems(items)).toEqual(['web_search']); +}); + +it('normalizes _call suffix to known hosted tool names', () => { + const items = [ + { type: 'tool_call_item', rawItem: { type: 'web_search_call' } }, + { type: 'tool_call_item', rawItem: { type: 'file_search_call' } }, + ]; + expect(getToolCallsFromRunItems(items)).toEqual(['web_search', 'file_search']); +}); + +it('preserves unknown _call suffix types as-is', () => { + const items = [ + { type: 'tool_call_item', rawItem: { type: 'custom_thing_call' } }, + ]; + expect(getToolCallsFromRunItems(items)).toEqual(['custom_thing_call']); +}); + +it('skips non tool_call_item entries', () => { + const items = [ + { type: 'message_item', rawItem: { type: 'message', content: 'hi' } }, + { type: 'tool_call_item', rawItem: { type: 'function_call', name: 'fn' } }, + ]; + expect(getToolCallsFromRunItems(items)).toEqual(['fn']); +}); + +it('returns false for functions passed to isAgentToolInstance', () => { + expect(isAgentToolInstance(() => {})).toBe(false); +}); + +it('returns true for non-callable objects passed to isAgentToolInstance', () => { + expect(isAgentToolInstance({ name: 'web_search' })).toBe(true); + expect(isAgentToolInstance('string')).toBe(true); +}); + +describe('given a shared fakeTool mock', () => { + const fakeTool = jest.fn((opts: any) => ({ ...opts, _wrapped: true })); + + it('passes through non-callable values without wrapping', () => { + const hostedTool = { name: 'web_search', type: 'hosted' }; + expect(registryValueToAgentTool(hostedTool, fakeTool)).toBe(hostedTool); + expect(fakeTool).not.toHaveBeenCalled(); + }); + + it('wraps callable values using the tool helper with schema from definition', async () => { + const fn = jest.fn().mockResolvedValue('result'); + const definition = { + name: 'myTool', + description: 'Does stuff', + parameters: { type: 'object', properties: { x: { type: 'number' } } }, + }; + + const wrapped = registryValueToAgentTool(fn, fakeTool, definition); + + expect(fakeTool).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'myTool', + description: 'Does stuff', + strict: false, + }), + ); + expect(wrapped._wrapped).toBe(true); + }); + + it('serializes non-string tool results to JSON', async () => { + const fn = jest.fn().mockResolvedValue({ key: 'value' }); + const definition = { name: 'test' }; + + let capturedExecute: any; + fakeTool.mockImplementation((opts: any) => { + capturedExecute = opts.execute; + return opts; + }); + + registryValueToAgentTool(fn, fakeTool, definition); + const result = await capturedExecute({ arg: 1 }); + + expect(fn).toHaveBeenCalledWith({ arg: 1 }); + expect(result).toBe('{"key":"value"}'); + }); +}); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts new file mode 100644 index 0000000000..5e6cfaba30 --- /dev/null +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts @@ -0,0 +1,172 @@ +import { OpenAI } from 'openai'; + +import type { LDAICompletionConfig } from '@launchdarkly/server-sdk-ai'; + +import { OpenAIModelRunner } from '../src/OpenAIModelRunner'; + +jest.mock('openai', () => ({ + OpenAI: jest.fn().mockImplementation(() => ({ + chat: { + completions: { + create: jest.fn(), + }, + }, + })), +})); + +describe('OpenAIModelRunner', () => { + let mockOpenAI: jest.Mocked; + let runner: OpenAIModelRunner; + + const baseConfig = { + key: 'completion', + enabled: true, + model: { name: 'gpt-3.5-turbo' }, + } as LDAICompletionConfig; + + beforeEach(() => { + mockOpenAI = new OpenAI() as jest.Mocked; + runner = new OpenAIModelRunner(mockOpenAI, baseConfig); + }); + + describe('run (chat completion)', () => { + it('returns a RunnerResult with content, metrics, and raw response', async () => { + const mockResponse = { + choices: [{ message: { content: 'Hello there!' } }], + usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, + }; + (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); + + const result = await runner.run('Hi'); + + expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Hi' }], + }); + expect(result.content).toBe('Hello there!'); + expect(result.metrics).toEqual({ + success: true, + usage: { total: 15, input: 10, output: 5 }, + }); + expect(result.raw).toBe(mockResponse); + expect(result.parsed).toBeUndefined(); + }); + + it('prepends config messages before the user prompt', async () => { + const mockResponse = { + choices: [{ message: { content: 'reply' } }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }; + (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); + + const configWithMessages: LDAICompletionConfig = { + ...baseConfig, + messages: [{ role: 'system', content: 'You are X' }], + }; + const r = new OpenAIModelRunner(mockOpenAI, configWithMessages); + await r.run('Hi'); + + expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ + model: 'gpt-3.5-turbo', + messages: [ + { role: 'system', content: 'You are X' }, + { role: 'user', content: 'Hi' }, + ], + }); + }); + + it('passes a LDMessage[] input directly without prepending config messages', async () => { + const mockResponse = { + choices: [{ message: { content: 'Evaluation result' } }], + usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, + }; + (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); + + const messages = [ + { role: 'system' as const, content: 'You are a judge' }, + { role: 'user' as const, content: 'Rate this: hello' }, + ]; + const result = await runner.run(messages); + + expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ + model: 'gpt-3.5-turbo', + messages, + }); + expect(result.content).toBe('Evaluation result'); + expect(result.metrics.success).toBe(true); + }); + + it('marks the result unsuccessful when response has no content', async () => { + const mockResponse = { choices: [{ message: {} }] }; + (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); + + const result = await runner.run('Hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(false); + }); + + it('returns an unsuccessful RunnerResult when the API call throws', async () => { + (mockOpenAI.chat.completions.create as jest.Mock).mockRejectedValue(new Error('boom')); + + const result = await runner.run('Hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(false); + expect(result.raw).toBeUndefined(); + }); + }); + + describe('run (structured output)', () => { + it('parses structured output and exposes it via parsed', async () => { + const mockResponse = { + choices: [{ message: { content: '{"name":"Ada","age":36}' } }], + usage: { prompt_tokens: 20, completion_tokens: 10, total_tokens: 30 }, + }; + (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); + + const schema = { + type: 'object', + properties: { name: { type: 'string' }, age: { type: 'number' } }, + required: ['name', 'age'], + }; + const result = await runner.run('Tell me about a person', schema); + + expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Tell me about a person' }], + response_format: { + type: 'json_schema', + json_schema: { + name: 'structured_output', + schema, + strict: true, + }, + }, + }); + expect(result.content).toBe('{"name":"Ada","age":36}'); + expect(result.parsed).toEqual({ name: 'Ada', age: 36 }); + expect(result.metrics.success).toBe(true); + }); + + it('marks the result unsuccessful when structured output is not valid JSON', async () => { + const mockResponse = { + choices: [{ message: { content: 'not json' } }], + usage: { prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 }, + }; + (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); + + const result = await runner.run('Hi', { type: 'object' }); + + expect(result.content).toBe('not json'); + expect(result.parsed).toBeUndefined(); + expect(result.metrics.success).toBe(false); + }); + }); + + describe('getClient', () => { + it('returns the underlying OpenAI client', () => { + expect(runner.getClient()).toBe(mockOpenAI); + }); + }); +}); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIProvider.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIProvider.test.ts deleted file mode 100644 index 06f4eb98d1..0000000000 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIProvider.test.ts +++ /dev/null @@ -1,383 +0,0 @@ -import { OpenAI } from 'openai'; - -import { OpenAIProvider } from '../src/OpenAIProvider'; - -// Mock OpenAI -jest.mock('openai', () => ({ - OpenAI: jest.fn().mockImplementation(() => ({ - chat: { - completions: { - create: jest.fn().mockResolvedValue({ - choices: [{ message: { content: 'Test response' } }], - usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, - }), - }, - }, - })), -})); - -describe('OpenAIProvider', () => { - let mockOpenAI: jest.Mocked; - let provider: OpenAIProvider; - - beforeEach(() => { - mockOpenAI = new OpenAI() as jest.Mocked; - provider = new OpenAIProvider(mockOpenAI, 'gpt-3.5-turbo', {}); - }); - - describe('getAIMetricsFromResponse', () => { - it('creates metrics with success=true and token usage', () => { - const mockResponse = { - usage: { - prompt_tokens: 50, - completion_tokens: 50, - total_tokens: 100, - }, - }; - - const result = OpenAIProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 50, - output: 50, - }, - }); - }); - - it('creates metrics with success=true and no usage when usage is missing', () => { - const mockResponse = {}; - - const result = OpenAIProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - - it('handles partial usage data', () => { - const mockResponse = { - usage: { - prompt_tokens: 30, - // completion_tokens and total_tokens missing - }, - }; - - const result = OpenAIProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 0, - input: 30, - output: 0, - }, - }); - }); - }); - - describe('invokeModel', () => { - it('invokes OpenAI chat completions and returns response', async () => { - const mockResponse = { - choices: [ - { - message: { - content: 'Hello! How can I help you today?', - }, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 15, - total_tokens: 25, - }, - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - - const result = await provider.invokeModel(messages); - - expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ - model: 'gpt-3.5-turbo', - messages: [{ role: 'user', content: 'Hello!' }], - }); - - expect(result).toEqual({ - message: { - role: 'assistant', - content: 'Hello! How can I help you today?', - }, - metrics: { - success: true, - usage: { - total: 25, - input: 10, - output: 15, - }, - }, - }); - }); - - it('returns unsuccessful response when no content in response', async () => { - const mockResponse = { - choices: [ - { - message: { - // content is missing - }, - }, - ], - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - - const result = await provider.invokeModel(messages); - - expect(result).toEqual({ - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - usage: undefined, - }, - }); - }); - - it('returns unsuccessful response when choices array is empty', async () => { - const mockResponse = { - choices: [], - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - - const result = await provider.invokeModel(messages); - - expect(result).toEqual({ - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - usage: undefined, - }, - }); - }); - - it('returns unsuccessful response when choices is undefined', async () => { - const mockResponse = { - // choices is missing entirely - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Hello!' }]; - - const result = await provider.invokeModel(messages); - - expect(result).toEqual({ - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - usage: undefined, - }, - }); - }); - }); - - describe('invokeStructuredModel', () => { - it('invokes OpenAI with structured output and returns parsed response', async () => { - const mockResponse = { - choices: [ - { - message: { - content: '{"name": "John", "age": 30, "city": "New York"}', - }, - }, - ], - usage: { - prompt_tokens: 20, - completion_tokens: 10, - total_tokens: 30, - }, - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Tell me about a person' }]; - const responseStructure = { - type: 'object', - properties: { - name: { type: 'string' }, - age: { type: 'number' }, - city: { type: 'string' }, - }, - required: ['name', 'age', 'city'], - }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ - model: 'gpt-3.5-turbo', - messages: [{ role: 'user', content: 'Tell me about a person' }], - response_format: { - type: 'json_schema', - json_schema: { - name: 'structured_output', - schema: responseStructure, - strict: true, - }, - }, - }); - - expect(result).toEqual({ - data: { - name: 'John', - age: 30, - city: 'New York', - }, - rawResponse: '{"name": "John", "age": 30, "city": "New York"}', - metrics: { - success: true, - usage: { - total: 30, - input: 20, - output: 10, - }, - }, - }); - }); - - it('returns unsuccessful response when no content in structured response', async () => { - const mockResponse = { - choices: [ - { - message: { - // content is missing - }, - }, - ], - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Tell me about a person' }]; - const responseStructure = { type: 'object' }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(result).toEqual({ - data: {}, - rawResponse: '', - metrics: { - success: false, - usage: undefined, - }, - }); - }); - - it('handles JSON parsing errors gracefully', async () => { - const mockResponse = { - choices: [ - { - message: { - content: 'invalid json content', - }, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Tell me about a person' }]; - const responseStructure = { type: 'object' }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(result).toEqual({ - data: {}, - rawResponse: 'invalid json content', - metrics: { - success: false, - usage: { - total: 15, - input: 10, - output: 5, - }, - }, - }); - }); - - it('handles empty choices array in structured response', async () => { - const mockResponse = { - choices: [], - }; - - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [{ role: 'user' as const, content: 'Tell me about a person' }]; - const responseStructure = { type: 'object' }; - - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(result).toEqual({ - data: {}, - rawResponse: '', - metrics: { - success: false, - usage: undefined, - }, - }); - }); - }); - - describe('getClient', () => { - it('returns the underlying OpenAI client', () => { - const client = provider.getClient(); - expect(client).toBe(mockOpenAI); - }); - }); - - describe('create', () => { - it('creates OpenAIProvider with correct model and parameters', async () => { - const mockAiConfig = { - key: 'test-config', - model: { - name: 'gpt-4', - parameters: { - temperature: 0.7, - max_tokens: 1000, - }, - }, - provider: { name: 'openai' }, - enabled: true, - tracker: {} as any, - toVercelAISDK: jest.fn(), - }; - - const result = await OpenAIProvider.create(mockAiConfig); - - expect(result).toBeInstanceOf(OpenAIProvider); - expect(result.getClient()).toBeDefined(); - }); - }); -}); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIRunnerFactory.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIRunnerFactory.test.ts new file mode 100644 index 0000000000..9fd516567c --- /dev/null +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIRunnerFactory.test.ts @@ -0,0 +1,160 @@ +import type { LDAIAgentConfig, LDAICompletionConfig } from '@launchdarkly/server-sdk-ai'; + +import { OpenAIAgentRunner } from '../src/OpenAIAgentRunner'; +import { OpenAIModelRunner } from '../src/OpenAIModelRunner'; +import { OpenAIRunnerFactory } from '../src/OpenAIRunnerFactory'; + +jest.mock('openai', () => ({ + OpenAI: jest.fn().mockImplementation(() => ({ + chat: { completions: { create: jest.fn() } }, + })), +})); + +const MockAgent = jest.fn().mockImplementation((opts: any) => opts); +const mockRun = jest.fn(); +const mockTool = jest.fn((opts: any) => opts); + +jest.mock('@openai/agents', () => ({ + Agent: MockAgent, + run: (...args: any[]) => mockRun(...args), + tool: (opts: any) => mockTool(opts), +})); + +describe('OpenAIRunnerFactory', () => { + let factory: OpenAIRunnerFactory; + + beforeEach(() => { + jest.clearAllMocks(); + factory = new OpenAIRunnerFactory(); + }); + + describe('createModel', () => { + it('builds an OpenAIModelRunner that shares the factory client', async () => { + const config = { + key: 'completion', + enabled: true, + model: { name: 'gpt-4o', parameters: { temperature: 0.5 } }, + } as unknown as LDAICompletionConfig; + + const runner = await factory.createModel(config); + + expect(runner).toBeInstanceOf(OpenAIModelRunner); + expect(runner.getClient()).toBe(factory.getClient()); + }); + + it('builds a model runner from a minimal config', async () => { + const runner = await factory.createModel({ key: 'completion', enabled: true } as unknown as LDAICompletionConfig); + expect(runner).toBeInstanceOf(OpenAIModelRunner); + }); + }); + + describe('createAgent', () => { + it('builds an OpenAIAgentRunner without tools when none are configured', async () => { + const config = { + key: 'agent', + enabled: true, + model: { name: 'gpt-4o' }, + instructions: 'be helpful', + } as unknown as LDAIAgentConfig; + + const runner = await factory.createAgent(config); + + expect(runner).toBeInstanceOf(OpenAIAgentRunner); + }); + + it('passes instructions and model to the Agent constructor', async () => { + const config = { + key: 'agent', + enabled: true, + model: { name: 'gpt-4o' }, + instructions: 'You are an expert.', + } as unknown as LDAIAgentConfig; + + await factory.createAgent(config); + + expect(MockAgent).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'ldai-agent', + model: 'gpt-4o', + instructions: 'You are an expert.', + }), + ); + }); + + it('maps model parameters to ModelSettings on the Agent', async () => { + const config = { + key: 'agent', + enabled: true, + model: { + name: 'gpt-4o', + parameters: { temperature: 0.7, top_p: 0.9, max_tokens: 1000 }, + }, + instructions: '', + } as unknown as LDAIAgentConfig; + + await factory.createAgent(config); + + expect(MockAgent).toHaveBeenCalledWith( + expect.objectContaining({ + modelSettings: { temperature: 0.7, topP: 0.9, maxTokens: 1000 }, + }), + ); + }); + + it('extracts tool definitions from config.tools', async () => { + const config = { + key: 'agent', + enabled: true, + model: { name: 'gpt-4o', parameters: { temperature: 0.7 } }, + tools: { lookup: { name: 'lookup', description: 'look things up' } }, + instructions: 'be helpful', + } as unknown as LDAIAgentConfig; + + const runner = await factory.createAgent(config, { lookup: () => 'ok' }); + + expect(runner).toBeInstanceOf(OpenAIAgentRunner); + expect(mockTool).toHaveBeenCalled(); + }); + + it('skips tools not in the registry and logs a warning', async () => { + const warnMessages: string[] = []; + const logger = { warn: (msg: string) => warnMessages.push(msg) } as any; + const factoryWithLogger = new OpenAIRunnerFactory(logger); + + const config = { + key: 'agent', + enabled: true, + model: { name: 'gpt-4o' }, + tools: { missing: { name: 'missing', description: 'not provided', parameters: { type: 'object' } } }, + instructions: '', + } as unknown as LDAIAgentConfig; + + await factoryWithLogger.createAgent(config, {}); + + expect(warnMessages.some((m) => m.includes("'missing'"))).toBe(true); + }); + + it('passes through pre-built agent tool instances without wrapping', async () => { + const hostedTool = { name: 'web_search', type: 'web_search_tool' }; + const config = { + key: 'agent', + enabled: true, + model: { name: 'gpt-4o' }, + tools: { web_search: { name: 'web_search', description: 'search the web' } }, + instructions: '', + } as unknown as LDAIAgentConfig; + + await factory.createAgent(config, { web_search: hostedTool }); + + const agentOpts = MockAgent.mock.calls[MockAgent.mock.calls.length - 1][0]; + expect(agentOpts.tools).toContain(hostedTool); + expect(mockTool).not.toHaveBeenCalled(); + }); + }); + + describe('getClient', () => { + it('returns the underlying OpenAI client', () => { + expect(factory.getClient()).toBeDefined(); + }); + }); +}); diff --git a/packages/ai-providers/server-ai-openai/package.json b/packages/ai-providers/server-ai-openai/package.json index 28bf351492..b114282b30 100644 --- a/packages/ai-providers/server-ai-openai/package.json +++ b/packages/ai-providers/server-ai-openai/package.json @@ -44,6 +44,7 @@ "devDependencies": { "@launchdarkly/js-server-sdk-common": "2.18.7", "@launchdarkly/server-sdk-ai": "^0.19.1", + "@openai/agents": "^0.9.0", "@opentelemetry/api": "^1.9.0", "@traceloop/instrumentation-openai": "^0.22.0", "@types/jest": "^29.5.3", @@ -60,11 +61,15 @@ }, "peerDependencies": { "@launchdarkly/server-sdk-ai": "^0.19.1", + "@openai/agents": "^0.9.0", "@opentelemetry/api": "^1.0.0", "@traceloop/instrumentation-openai": "^0.22.0", - "openai": ">= 4 < 6" + "openai": ">= 4 < 7" }, "peerDependenciesMeta": { + "@openai/agents": { + "optional": true + }, "@opentelemetry/api": { "optional": true }, diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts b/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts new file mode 100644 index 0000000000..dbe13ac29b --- /dev/null +++ b/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts @@ -0,0 +1,84 @@ +import type { + LDAIMetrics, + LDLogger, + LDTokenUsage, + Runner, + RunnerResult, +} from '@launchdarkly/server-sdk-ai'; + +import { + getAIUsageFromAgentResult, + getToolCallsFromRunItems, +} from './OpenAIHelper'; + +/** + * Tool registry mapping tool names to their callable implementations or + * pre-built openai-agents tool instances (e.g. `webSearchTool()`). + */ +export type ToolRegistry = Record unknown | Promise) | unknown>; + +const MAX_TURNS = 25; + +/** + * Runner implementation for a single OpenAI agent. + * + * Executes a pre-built agent using the OpenAI Agents SDK (`@openai/agents`). + * Tool calling and the agentic loop are handled internally by the SDK's + * `run()` function. Created by {@link OpenAIRunnerFactory.createAgent}. + * + * Requires `@openai/agents` to be installed. + */ +export class OpenAIAgentRunner implements Runner { + private _agent: any; + private _agentRun: (agent: any, input: string, opts: any) => Promise; + private _logger?: LDLogger; + private _toolNameMap: Record; + + constructor( + agent: any, + agentRun: (agent: any, input: string, opts: any) => Promise, + toolNameMap: Record, + logger?: LDLogger, + ) { + this._agent = agent; + this._agentRun = agentRun; + this._toolNameMap = toolNameMap; + this._logger = logger; + } + + async run(input: string, _outputType?: Record): Promise { + try { + const result = await this._agentRun(this._agent, String(input), { maxTurns: MAX_TURNS }); + + const toolCalls = getToolCallsFromRunItems(result.newItems ?? []).reduce( + (acc: string[], fnName: string) => { + const ldName = this._toolNameMap[fnName]; + if (ldName) { + acc.push(ldName); + } + return acc; + }, + [], + ); + + const usage: LDTokenUsage | undefined = getAIUsageFromAgentResult(result); + const metrics: LDAIMetrics = { + success: true, + usage, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + }; + + return { + content: String(result.finalOutput ?? ''), + metrics, + raw: result, + }; + } catch (error) { + this._logger?.warn('OpenAI agent run failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + } +} diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts b/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts new file mode 100644 index 0000000000..e9ee4365bc --- /dev/null +++ b/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts @@ -0,0 +1,208 @@ +import type { LDAIMetrics, LDLogger, LDMessage, LDTool, LDTokenUsage } from '@launchdarkly/server-sdk-ai'; + +import type { ToolRegistry } from './OpenAIAgentRunner'; + +/** + * OpenAI chat completion message format. + * Mirrors the relevant subset of OpenAI's `ChatCompletionMessageParam`. + */ +export interface OpenAIChatMessage { + role: 'user' | 'assistant' | 'system'; + content: string; +} + +/** + * Convert LaunchDarkly messages to OpenAI chat completion message format. + * + * @param messages Array of LDMessage objects + * @returns Array of OpenAI ChatCompletionMessageParam-compatible objects + */ +export function convertMessagesToOpenAI(messages: LDMessage[]): OpenAIChatMessage[] { + return messages.map((msg) => ({ role: msg.role, content: msg.content })); +} + +/** + * Extract token usage from an OpenAI response. + */ +export function getAIUsageFromResponse(response: any): LDTokenUsage | undefined { + if (!response?.usage) { + return undefined; + } + const { prompt_tokens, completion_tokens, total_tokens } = response.usage; + return { + total: total_tokens || 0, + input: prompt_tokens || 0, + output: completion_tokens || 0, + }; +} + +/** + * Get AI metrics from an OpenAI response. + */ +export function getAIMetricsFromResponse(response: any): LDAIMetrics { + return { + success: true, + usage: getAIUsageFromResponse(response), + }; +} + +/** + * Convert a snake_case string to camelCase. + */ +function _snakeToCamel(key: string): string { + return key.replace(/_([a-z])/g, (_, c) => c.toUpperCase()); +} + +/** + * Convert all snake_case keys in a record to camelCase. + */ +export function _mapParameterKeys(parameters: Record): Record { + const result: Record = {}; + for (const [key, value] of Object.entries(parameters)) { + result[_snakeToCamel(key)] = value; + } + return result; +} + +// ============================================================================ +// OpenAI Agents SDK helpers +// ============================================================================ + +const OPENAI_HOSTED_TOOL_NAMES = new Set([ + 'web_search', + 'file_search', + 'code_interpreter', + 'tool_search', +]); + +/** + * Extract aggregated token usage from an openai-agents RunResult. + * + * Reads `result.runContext.usage` which the Agents SDK populates + * automatically across all model calls within a single run. + */ +export function getAIUsageFromAgentResult(result: any): LDTokenUsage | undefined { + try { + const { usage } = result.runContext; + if (!usage) { + return undefined; + } + const total = usage.totalTokens || 0; + const input = usage.inputTokens || 0; + const output = usage.outputTokens || 0; + if (total || input || output) { + return { total, input, output }; + } + } catch { + // fall through + } + return undefined; +} + +/** + * Extract tool call names from RunResult.newItems. + * + * Returns an array of tool names observed during the run. For function_call + * items the raw function name is returned; for hosted tool calls the + * canonical name (without the `_call` suffix) is used when it matches a + * known OpenAI hosted tool. + */ +export function getToolCallsFromRunItems(newItems: any[]): string[] { + const result: string[] = []; + for (const item of newItems) { + if (item?.type !== 'tool_call_item') { + continue; + } + const raw = item.rawItem; + if (!raw) { + continue; + } + if (raw.type === 'function_call') { + if (raw.name) { + result.push(raw.name); + } + } else if (typeof raw.type === 'string') { + if (raw.type === 'hosted_tool_call' && raw.name) { + result.push(raw.name); + } else if (raw.type.endsWith('_call')) { + const base = raw.type.slice(0, -'_call'.length); + result.push(OPENAI_HOSTED_TOOL_NAMES.has(base) ? base : raw.type); + } + } + } + return result; +} + +/** + * True if `value` is already an openai-agents tool object (not a plain callable). + */ +export function isAgentToolInstance(value: unknown): boolean { + return typeof value !== 'function'; +} + +/** + * Turn a ToolRegistry value into an object the OpenAI Agents SDK accepts + * in `Agent({ tools: [...] })`. + * + * Plain callables are wrapped with the Agents SDK `tool()` helper using the + * JSON schema from the LD tool definition. Values that are already tool + * instances (e.g., `webSearchTool()`, `fileSearchTool(...)`) are returned + * unchanged. + */ +export function registryValueToAgentTool( + value: unknown, + toolHelper: (opts: any) => any, + definition?: LDTool, +): any { + if (isAgentToolInstance(value)) { + return value; + } + const fn = value as (...args: any[]) => any; + return toolHelper({ + name: definition?.name ?? fn.name ?? 'unknown', + description: definition?.description ?? '', + parameters: definition?.parameters ?? { type: 'object', properties: {}, additionalProperties: false }, + strict: false, + execute: async (args: any) => { + const result = await fn(args); + return typeof result === 'string' ? result : JSON.stringify(result); + }, + }); +} + +/** + * Build agent tools from the LaunchDarkly config tools map and a user-provided registry. + * + * Iterates over `configTools` (from `config.tools`), matches each against the + * `registry`, and wraps them into openai-agents compatible tool objects. + * Returns the tools array and a name mapping for tracking. + */ +export function buildAgentTools( + toolHelper: any, + configTools: { [toolName: string]: LDTool }, + registry: ToolRegistry, + logger?: LDLogger, +): { agentTools: any[]; toolNameMap: Record } { + const agentTools: any[] = []; + const toolNameMap: Record = {}; + + for (const [name, definition] of Object.entries(configTools)) { + const toolFn = registry[name]; + if (toolFn !== undefined) { + if (isAgentToolInstance(toolFn)) { + const instanceName = (toolFn as any).name ?? name; + toolNameMap[instanceName] = name; + } else { + toolNameMap[name] = name; + } + agentTools.push(registryValueToAgentTool(toolFn, toolHelper, definition)); + continue; + } + + logger?.warn( + `Tool '${name}' is defined in the AI config but was not found in ` + + `the tool registry; skipping.`, + ); + } + return { agentTools, toolNameMap }; +} diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts b/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts new file mode 100644 index 0000000000..196ce257be --- /dev/null +++ b/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts @@ -0,0 +1,134 @@ +import { OpenAI } from 'openai'; + +import type { + LDAICompletionConfig, + LDLogger, + LDMessage, + Runner, + RunnerResult, +} from '@launchdarkly/server-sdk-ai'; + +import { convertMessagesToOpenAI, getAIMetricsFromResponse } from './OpenAIHelper'; + +/** + * Runner implementation for OpenAI chat completions. + * + * Implements the unified `Runner` protocol via {@link run}. Returned by + * {@link OpenAIRunnerFactory.createModel}. + */ +export class OpenAIModelRunner implements Runner { + private _client: OpenAI; + private _config: LDAICompletionConfig; + private _modelName: string; + private _parameters: Record; + private _logger?: LDLogger; + + constructor(client: OpenAI, config: LDAICompletionConfig, logger?: LDLogger) { + this._client = client; + this._config = config; + this._modelName = config.model?.name ?? ''; + this._parameters = { ...(config.model?.parameters ?? {}) }; + this._logger = logger; + } + + /** + * Run the OpenAI model with the given prompt or message array. + * + * When `input` is a string it is wrapped as a user turn and appended to any + * messages defined in the config. When `input` is already a `LDMessage[]` + * (e.g. when called from the Judge evaluation path) it is used as-is. + * + * @param input The user prompt string, or a pre-built message array. + * @param outputType Optional JSON schema for structured output. When provided, + * the response is parsed and exposed via {@link RunnerResult.parsed}. + */ + async run(input: string | LDMessage[], outputType?: Record): Promise { + const messages: LDMessage[] = Array.isArray(input) + ? input + : [...(this._config.messages ?? []), { role: 'user', content: input }]; + + if (outputType !== undefined) { + return this._runStructured(messages, outputType); + } + return this._runCompletion(messages); + } + + /** + * Get the underlying OpenAI client instance. + */ + getClient(): OpenAI { + return this._client; + } + + private async _runCompletion(messages: LDMessage[]): Promise { + try { + const response = await this._client.chat.completions.create({ + ...this._parameters, + model: this._modelName, + messages: convertMessagesToOpenAI(messages), + }); + + const metrics = getAIMetricsFromResponse(response); + const content = response?.choices?.[0]?.message?.content || ''; + + if (!content) { + this._logger?.warn('OpenAI response has no content available'); + metrics.success = false; + } + + return { content, metrics, raw: response }; + } catch (error) { + this._logger?.warn('OpenAI model invocation failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + } + + private async _runStructured( + messages: LDMessage[], + outputType: Record, + ): Promise { + let response; + try { + response = await this._client.chat.completions.create({ + ...this._parameters, + model: this._modelName, + messages: convertMessagesToOpenAI(messages), + response_format: { + type: 'json_schema', + json_schema: { + name: 'structured_output', + schema: outputType, + strict: true, + }, + }, + }); + } catch (error) { + this._logger?.warn('OpenAI structured model invocation failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + + const metrics = getAIMetricsFromResponse(response); + const content = response?.choices?.[0]?.message?.content || ''; + + if (!content) { + this._logger?.warn('OpenAI structured response has no content available'); + metrics.success = false; + return { content: '', metrics, raw: response }; + } + + try { + const parsed = JSON.parse(content) as Record; + return { content, metrics, raw: response, parsed }; + } catch (parseError) { + this._logger?.warn('OpenAI structured response contains invalid JSON:', parseError); + metrics.success = false; + return { content, metrics, raw: response }; + } + } +} diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIProvider.ts b/packages/ai-providers/server-ai-openai/src/OpenAIProvider.ts deleted file mode 100644 index 283c8952df..0000000000 --- a/packages/ai-providers/server-ai-openai/src/OpenAIProvider.ts +++ /dev/null @@ -1,250 +0,0 @@ -import { OpenAI } from 'openai'; - -import { AIProvider } from '@launchdarkly/server-sdk-ai'; -import type { - ChatResponse, - LDAIConfig, - LDAIMetrics, - LDLogger, - LDMessage, - LDTokenUsage, - StructuredResponse, -} from '@launchdarkly/server-sdk-ai'; - -let instrumentPromise: Promise | undefined; - -/** - * OpenAI implementation of AIProvider. - * This provider integrates OpenAI's chat completions API with LaunchDarkly's tracking capabilities. - */ -export class OpenAIProvider extends AIProvider { - private _client: OpenAI; - private _modelName: string; - private _parameters: Record; - - constructor( - client: OpenAI, - modelName: string, - parameters: Record, - logger?: LDLogger, - ) { - super(logger); - this._client = client; - this._modelName = modelName; - this._parameters = parameters; - } - - /** - * Static factory method to create an OpenAI AIProvider from an AI configuration. - */ - static async create(aiConfig: LDAIConfig, logger?: LDLogger): Promise { - // eslint-disable-next-line no-underscore-dangle - await OpenAIProvider._ensureInstrumented(logger); - - const client = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY, - }); - const modelName = aiConfig.model?.name || ''; - const parameters = aiConfig.model?.parameters || {}; - return new OpenAIProvider(client, modelName, parameters, logger); - } - - /** - * Automatically patches the ESM openai module for OpenTelemetry tracing when - * a TracerProvider is active and @traceloop/instrumentation-openai is installed. - * - * OpenTelemetry instrumentations auto-patch CJS require() calls, but this - * provider loads openai via ESM import, which bypasses those hooks. This - * method bridges that gap by calling manuallyInstrument() on the ESM module. - */ - private static async _ensureInstrumented(logger?: LDLogger): Promise { - if (instrumentPromise !== undefined) { - return instrumentPromise; - } - - instrumentPromise = (async () => { - try { - const { OpenAIInstrumentation } = await import('@traceloop/instrumentation-openai'); - const instrumentation = new OpenAIInstrumentation(); - instrumentation.manuallyInstrument(OpenAI); - logger?.info('OpenAI ESM module instrumented for OpenTelemetry tracing.'); - } catch { - logger?.debug( - 'OpenTelemetry instrumentation not available for OpenAI provider. ' + - 'Install @traceloop/instrumentation-openai to enable automatic tracing.', - ); - } - })(); - - return instrumentPromise; - } - - /** - * Invoke the OpenAI model with an array of messages. - */ - async invokeModel(messages: LDMessage[]): Promise { - try { - const response = await this._client.chat.completions.create({ - ...this._parameters, - model: this._modelName, - messages, - }); - - // Generate metrics early (assumes success by default) - const metrics = OpenAIProvider.getAIMetricsFromResponse(response); - - // Safely extract the first choice content using optional chaining - const content = response?.choices?.[0]?.message?.content || ''; - - if (!content) { - this.logger?.warn('OpenAI response has no content available'); - metrics.success = false; - } - - const assistantMessage: LDMessage = { - role: 'assistant', - content, - }; - - return { - message: assistantMessage, - metrics, - }; - } catch (error) { - this.logger?.warn('OpenAI model invocation failed:', error); - - return { - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - }, - }; - } - } - - /** - * Invoke the OpenAI model with structured output support. - */ - async invokeStructuredModel( - messages: LDMessage[], - responseStructure: Record, - ): Promise { - let response; - try { - response = await this._client.chat.completions.create({ - ...this._parameters, - model: this._modelName, - messages, - response_format: { - type: 'json_schema', - json_schema: { - name: 'structured_output', - schema: responseStructure, - strict: true, - }, - }, - }); - } catch (error) { - this.logger?.warn('OpenAI structured model invocation failed:', error); - - return { - data: {}, - rawResponse: '', - metrics: { - success: false, - }, - }; - } - - // Generate metrics early (assumes success by default) - const metrics = OpenAIProvider.getAIMetricsFromResponse(response); - - // Safely extract the first choice content using optional chaining - const content = response?.choices?.[0]?.message?.content || ''; - - if (!content) { - this.logger?.warn('OpenAI structured response has no content available'); - metrics.success = false; - return { - data: {}, - rawResponse: '', - metrics, - }; - } - - try { - const data = JSON.parse(content) as Record; - - return { - data, - rawResponse: content, - metrics, - }; - } catch (parseError) { - this.logger?.warn('OpenAI structured response contains invalid JSON:', parseError); - metrics.success = false; - return { - data: {}, - rawResponse: content, - metrics, - }; - } - } - - /** - * Get the underlying OpenAI client instance. - */ - getClient(): OpenAI { - return this._client; - } - - /** - * Get AI metrics from an OpenAI response. - * This method extracts token usage information and success status from OpenAI responses - * and returns a LaunchDarkly AIMetrics object. - * - * @param response The response from OpenAI chat completions API - * @returns LDAIMetrics with success status and token usage - * - * @example - * const tracker = aiConfig.createTracker(); - * const response = await tracker.trackMetricsOf( - * OpenAIProvider.getAIMetricsFromResponse, - * () => client.chat.completions.create(config) - * ); - */ - static getAIMetricsFromResponse(response: any): LDAIMetrics { - // Extract token usage if available - let usage: LDTokenUsage | undefined; - if (response?.usage) { - const { prompt_tokens, completion_tokens, total_tokens } = response.usage; - usage = { - total: total_tokens || 0, - input: prompt_tokens || 0, - output: completion_tokens || 0, - }; - } - - // OpenAI responses that complete successfully are considered successful by default - return { - success: true, - usage, - }; - } - - /** - * Create AI metrics information from an OpenAI response. - * This method extracts token usage information and success status from OpenAI responses - * and returns a LaunchDarkly AIMetrics object. - * - * @deprecated Use `getAIMetricsFromResponse()` instead. - * @param openaiResponse The response from OpenAI chat completions API - * @returns LDAIMetrics with success status and token usage - */ - static createAIMetrics(openaiResponse: any): LDAIMetrics { - return OpenAIProvider.getAIMetricsFromResponse(openaiResponse); - } -} diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts b/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts new file mode 100644 index 0000000000..1f3f8f86c7 --- /dev/null +++ b/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts @@ -0,0 +1,113 @@ +import { OpenAI } from 'openai'; + +import { AIProvider } from '@launchdarkly/server-sdk-ai'; +import type { LDAIAgentConfig, LDAICompletionConfig, LDLogger } from '@launchdarkly/server-sdk-ai'; + +import { OpenAIAgentRunner, ToolRegistry } from './OpenAIAgentRunner'; +import { _mapParameterKeys, buildAgentTools } from './OpenAIHelper'; +import { OpenAIModelRunner } from './OpenAIModelRunner'; + +let instrumentPromise: Promise | undefined; + +/** + * Factory for creating OpenAI runners (chat completion and agent). + * + * A single factory shares one `OpenAI` client across all runners it produces + * so connection pooling and instrumentation are preserved. + */ +export class OpenAIRunnerFactory extends AIProvider { + private _client: OpenAI; + + constructor(logger?: LDLogger) { + super(logger); + this._client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); + // Fire-and-forget: OTel instrumentation is optional and must not block construction. + OpenAIRunnerFactory._ensureInstrumented(logger).catch(() => {}); + } + + /** + * Create a model runner from a completion AI configuration. + */ + async createModel(config: LDAICompletionConfig): Promise { + return new OpenAIModelRunner(this._client, config, this.logger); + } + + /** + * Create an agent runner from an agent AI configuration. + * + * The returned runner uses the OpenAI Agents SDK (`@openai/agents`) which + * manages its own OpenAI client internally. + * + * @param config The LaunchDarkly AI agent configuration. Tool definitions + * are sourced from `config.tools`. + * @param tools Registry mapping tool names to their callable implementations + * or pre-built openai-agents tool instances. Tool names referenced by the + * config that are not present here will be logged and skipped. + */ + async createAgent(config: LDAIAgentConfig, tools?: ToolRegistry): Promise { + let Agent: any; + let agentRun: any; + let toolHelper: any; + try { + const agents = await import('@openai/agents'); + Agent = agents.Agent; + agentRun = agents.run; + toolHelper = agents.tool; + } catch (e) { + throw new Error( + `@openai/agents is required for OpenAIAgentRunner.\n` + + `Install it with: npm install @openai/agents openai zod\n` + + `Cause: ${e instanceof Error ? e.message : e}`, + ); + } + + const registry = tools ?? {}; + const configTools = config.tools ?? {}; + const parameters = _mapParameterKeys({ ...(config.model?.parameters ?? {}) }); + delete parameters.tools; + + const { agentTools, toolNameMap } = buildAgentTools(toolHelper, configTools, registry, this.logger); + const agent = new Agent({ + name: 'ldai-agent', + instructions: config.instructions || undefined, + model: config.model?.name ?? '', + tools: agentTools, + modelSettings: parameters, + }); + + return new OpenAIAgentRunner(agent, agentRun, toolNameMap, this.logger); + } + + /** + * Get the underlying OpenAI client instance. + */ + getClient(): OpenAI { + return this._client; + } + + /** + * Automatically patches the ESM openai module for OpenTelemetry tracing when + * a TracerProvider is active and @traceloop/instrumentation-openai is installed. + */ + private static async _ensureInstrumented(logger?: LDLogger): Promise { + if (instrumentPromise !== undefined) { + return instrumentPromise; + } + + instrumentPromise = (async () => { + try { + const { OpenAIInstrumentation } = await import('@traceloop/instrumentation-openai'); + const instrumentation = new OpenAIInstrumentation(); + instrumentation.manuallyInstrument(OpenAI); + logger?.info('OpenAI ESM module instrumented for OpenTelemetry tracing.'); + } catch { + logger?.debug( + 'OpenTelemetry instrumentation not available for OpenAI provider. ' + + 'Install @traceloop/instrumentation-openai to enable automatic tracing.', + ); + } + })(); + + return instrumentPromise; + } +} diff --git a/packages/ai-providers/server-ai-openai/src/index.ts b/packages/ai-providers/server-ai-openai/src/index.ts index bfdeac9b4b..4ca49b1e91 100644 --- a/packages/ai-providers/server-ai-openai/src/index.ts +++ b/packages/ai-providers/server-ai-openai/src/index.ts @@ -1 +1,14 @@ -export { OpenAIProvider } from './OpenAIProvider'; +export { OpenAIModelRunner } from './OpenAIModelRunner'; +export { OpenAIAgentRunner, ToolRegistry } from './OpenAIAgentRunner'; +export { OpenAIRunnerFactory } from './OpenAIRunnerFactory'; +export { + buildAgentTools, + convertMessagesToOpenAI, + getAIMetricsFromResponse, + getAIUsageFromResponse, + getAIUsageFromAgentResult, + getToolCallsFromRunItems, + isAgentToolInstance, + registryValueToAgentTool, +} from './OpenAIHelper'; +export type { OpenAIChatMessage } from './OpenAIHelper'; diff --git a/packages/sdk/server-ai/examples/managed-agent/package.json b/packages/sdk/server-ai/examples/managed-agent/package.json index f94c314e91..d09febcd1b 100644 --- a/packages/sdk/server-ai/examples/managed-agent/package.json +++ b/packages/sdk/server-ai/examples/managed-agent/package.json @@ -18,8 +18,11 @@ "@launchdarkly/server-sdk-ai-langchain": "0.6.1", "@launchdarkly/server-sdk-ai-openai": "0.5.8", "@launchdarkly/server-sdk-ai-vercel": "0.5.8", + "@openai/agents": "^0.9.1", "dotenv": "^16.0.0", - "langchain": "^1.3.5" + "langchain": "^1.3.5", + "openai": "^5.0.0", + "zod": "^4.0.0" }, "devDependencies": { "@types/node": "^20.0.0", diff --git a/packages/sdk/server-ai/examples/openai-observability/src/index.ts b/packages/sdk/server-ai/examples/openai-observability/src/index.ts index d13c01349f..82b45d186c 100644 --- a/packages/sdk/server-ai/examples/openai-observability/src/index.ts +++ b/packages/sdk/server-ai/examples/openai-observability/src/index.ts @@ -49,7 +49,7 @@ async function main() { const aiClient = initAi(ldClient); // ── 2. Import provider and OpenAI after instrumentation so OpenLLMetry can patch the client ── - const { OpenAIProvider } = await import('@launchdarkly/server-sdk-ai-openai'); + const { getAIMetricsFromResponse } = await import('@launchdarkly/server-sdk-ai-openai'); const { OpenAI } = await import('openai'); const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, @@ -77,7 +77,7 @@ async function main() { try { // ── 4. Call OpenAI and track metrics with the provider's extractor ── const tracker = aiConfig.createTracker!(); - const completion = await tracker.trackMetricsOf(OpenAIProvider.getAIMetricsFromResponse, () => + const completion = await tracker.trackMetricsOf(getAIMetricsFromResponse, () => openai.chat.completions.create({ messages: aiConfig.messages || [], model: aiConfig.model?.name || 'gpt-4', diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts index c5d348ad4b..f4e30379e3 100644 --- a/packages/sdk/server-ai/examples/openai/src/index.ts +++ b/packages/sdk/server-ai/examples/openai/src/index.ts @@ -3,7 +3,7 @@ import { OpenAI } from 'openai'; import { init, LDContext } from '@launchdarkly/node-server-sdk'; import { initAi } from '@launchdarkly/server-sdk-ai'; -import { OpenAIProvider } from '@launchdarkly/server-sdk-ai-openai'; +import { getAIMetricsFromResponse } from '@launchdarkly/server-sdk-ai-openai'; // Environment variables const sdkKey = process.env.LAUNCHDARKLY_SDK_KEY; @@ -67,7 +67,7 @@ async function main() { const tracker = aiConfig.createTracker!(); const completion = await tracker.trackMetricsOf( - OpenAIProvider.getAIMetricsFromResponse, + getAIMetricsFromResponse, async () => client.chat.completions.create({ messages: aiConfig.messages || [], From 288ac6514f96fa0f03d562505b3e4ac321500a5a Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Wed, 6 May 2026 16:57:44 -0500 Subject: [PATCH 12/19] feat: Replace LangChainProvider with Runner protocol implementation (AIC-2388) (#1338) --- .../__tests__/LangChainAgentRunner.test.ts | 87 ++++++ .../__tests__/LangChainHelper.test.ts | 217 +++++++++++++ .../__tests__/LangChainModelRunner.test.ts | 136 +++++++++ .../__tests__/LangChainProvider.test.ts | 231 -------------- .../__tests__/LangChainRunnerFactory.test.ts | 110 +++++++ .../src/LangChainAgentRunner.ts | 84 ++++++ .../src/LangChainHelper.ts | 216 +++++++++++++ .../src/LangChainModelRunner.ts | 118 ++++++++ .../src/LangChainProvider.ts | 285 ------------------ .../src/LangChainRunnerFactory.ts | 92 ++++++ .../server-ai-langchain/src/index.ts | 16 +- 11 files changed, 1075 insertions(+), 517 deletions(-) create mode 100644 packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts create mode 100644 packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts create mode 100644 packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts delete mode 100644 packages/ai-providers/server-ai-langchain/__tests__/LangChainProvider.test.ts create mode 100644 packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts create mode 100644 packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts create mode 100644 packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts create mode 100644 packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts delete mode 100644 packages/ai-providers/server-ai-langchain/src/LangChainProvider.ts create mode 100644 packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts new file mode 100644 index 0000000000..60edc50c57 --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts @@ -0,0 +1,87 @@ +import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages'; + +import { CompiledAgent, LangChainAgentRunner } from '../src/LangChainAgentRunner'; + +const mockLogger = { + warn: jest.fn(), + info: jest.fn(), + error: jest.fn(), + debug: jest.fn(), +}; + +function makeAgent(invoke: jest.Mock): CompiledAgent { + return { invoke }; +} + +it('returns content with no toolCalls when the agent returns a simple response', async () => { + const finalMsg = new AIMessage('done'); + finalMsg.usage_metadata = { total_tokens: 6, input_tokens: 4, output_tokens: 2 }; + + const agent = makeAgent(jest.fn().mockResolvedValue({ messages: [finalMsg] })); + const runner = new LangChainAgentRunner(agent, mockLogger); + const result = await runner.run('hi'); + + expect(agent.invoke).toHaveBeenCalledWith({ + messages: [{ role: 'user', content: 'hi' }], + }); + expect(result.content).toBe('done'); + expect(result.metrics.success).toBe(true); + expect(result.metrics.toolCalls).toBeUndefined(); + expect(result.metrics.usage).toEqual({ total: 6, input: 4, output: 2 }); +}); + +it('extracts tool calls and aggregates usage from multi-step agent messages', async () => { + const toolCallMsg = new AIMessage(''); + toolCallMsg.tool_calls = [{ id: 'call_1', name: 'lookup', args: { id: 42 } }]; + toolCallMsg.usage_metadata = { total_tokens: 14, input_tokens: 10, output_tokens: 4 }; + + const toolResultMsg = new ToolMessage({ tool_call_id: 'call_1', content: '{"value":42}' }); + + const finalMsg = new AIMessage('Answer is 42.'); + finalMsg.usage_metadata = { total_tokens: 14, input_tokens: 6, output_tokens: 8 }; + + const agent = makeAgent( + jest.fn().mockResolvedValue({ + messages: [ + new HumanMessage('Look up 42'), + toolCallMsg, + toolResultMsg, + finalMsg, + ], + }), + ); + + const runner = new LangChainAgentRunner(agent, mockLogger); + const result = await runner.run('Look up 42'); + + expect(result.content).toBe('Answer is 42.'); + expect(result.metrics.toolCalls).toEqual(['lookup']); + expect(result.metrics.usage).toEqual({ total: 28, input: 16, output: 12 }); +}); + +it('returns success=false when the agent throws', async () => { + const agent = makeAgent(jest.fn().mockRejectedValue(new Error('boom'))); + const runner = new LangChainAgentRunner(agent, mockLogger); + const result = await runner.run('hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(false); + expect(mockLogger.warn).toHaveBeenCalled(); +}); + +it('returns the underlying agent via getAgent()', () => { + const agent = makeAgent(jest.fn()); + const runner = new LangChainAgentRunner(agent, mockLogger); + expect(runner.getAgent()).toBe(agent); +}); + +it('handles empty messages array gracefully', async () => { + const agent = makeAgent(jest.fn().mockResolvedValue({ messages: [] })); + const runner = new LangChainAgentRunner(agent, mockLogger); + const result = await runner.run('hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(true); + expect(result.metrics.toolCalls).toBeUndefined(); + expect(result.metrics.usage).toBeUndefined(); +}); diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts new file mode 100644 index 0000000000..add5307a03 --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts @@ -0,0 +1,217 @@ +import { AIMessage, HumanMessage, SystemMessage, ToolMessage } from '@langchain/core/messages'; +import { initChatModel } from 'langchain/chat_models/universal'; + +import { + buildStructuredTools, + convertMessagesToLangChain, + createLangChainModel, + extractLastMessageContent, + extractToolCalls, + getAIMetricsFromResponse, + getAIUsageFromResponse, + mapProviderName, + sumTokenUsageFromMessages, +} from '../src/LangChainHelper'; + +jest.mock('langchain/chat_models/universal', () => ({ + initChatModel: jest.fn(), +})); + +const mockInitChatModel = initChatModel as jest.MockedFunction; + +describe('createLangChainModel', () => { + const fakeLLM = { invoke: jest.fn() }; + + beforeEach(() => { + mockInitChatModel.mockReset(); + mockInitChatModel.mockResolvedValue(fakeLLM as any); + }); + + it('calls initChatModel with model name and mapped provider', async () => { + await createLangChainModel({ + key: 'cfg', + enabled: true, + provider: { name: 'openai' }, + model: { name: 'gpt-4o', parameters: { temperature: 0.5 } }, + }); + + expect(mockInitChatModel).toHaveBeenCalledWith('gpt-4o', { + temperature: 0.5, + modelProvider: 'openai', + }); + }); + + it('maps gemini to google-genai', async () => { + await createLangChainModel({ + key: 'cfg', + enabled: true, + provider: { name: 'gemini' }, + model: { name: 'gemini-2.0' }, + }); + + expect(mockInitChatModel).toHaveBeenCalledWith('gemini-2.0', { + modelProvider: 'google-genai', + }); + }); +}); + +it('converts system, user, and assistant messages to LangChain instances', () => { + const result = convertMessagesToLangChain([ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'u' }, + { role: 'assistant', content: 'a' }, + ]); + + expect(result).toHaveLength(3); + expect(result[0]).toBeInstanceOf(SystemMessage); + expect(result[1]).toBeInstanceOf(HumanMessage); + expect(result[2]).toBeInstanceOf(AIMessage); +}); + +it('throws on an unsupported role', () => { + expect(() => convertMessagesToLangChain([{ role: 'tool' as any, content: 'x' }])).toThrow( + 'Unsupported message role: tool', + ); +}); + +it('maps gemini to google-genai (case-insensitive)', () => { + expect(mapProviderName('gemini')).toBe('google-genai'); + expect(mapProviderName('Gemini')).toBe('google-genai'); + expect(mapProviderName('GEMINI')).toBe('google-genai'); +}); + +it('returns the provider unchanged when no mapping exists', () => { + expect(mapProviderName('openai')).toBe('openai'); + expect(mapProviderName('anthropic')).toBe('anthropic'); +}); + +it('returns undefined when usage_metadata is absent', () => { + expect(getAIUsageFromResponse(new AIMessage('x'))).toBeUndefined(); +}); + +it('maps usage_metadata to LDTokenUsage', () => { + const message = new AIMessage('x'); + message.usage_metadata = { total_tokens: 30, input_tokens: 10, output_tokens: 20 }; + expect(getAIUsageFromResponse(message)).toEqual({ total: 30, input: 10, output: 20 }); +}); + +it('returns success=true with usage from the response', () => { + const message = new AIMessage('x'); + message.usage_metadata = { total_tokens: 3, input_tokens: 1, output_tokens: 2 }; + expect(getAIMetricsFromResponse(message)).toEqual({ + success: true, + usage: { total: 3, input: 1, output: 2 }, + }); +}); + +describe('buildStructuredTools', () => { + const mockLogger = { warn: jest.fn(), info: jest.fn(), error: jest.fn(), debug: jest.fn() }; + + beforeEach(() => jest.clearAllMocks()); + + it('builds a StructuredTool from a valid tool definition', () => { + const toolDefs = [{ name: 'lookup', description: 'looks up a value' }]; + const registry = { lookup: jest.fn().mockReturnValue('result') }; + + const result = buildStructuredTools(toolDefs, registry, mockLogger); + + expect(result).toHaveLength(1); + expect(result[0].name).toBe('lookup'); + expect(result[0].description).toBe('looks up a value'); + }); + + it('skips tools missing from the registry and logs a warning', () => { + const toolDefs = [{ name: 'missing', description: 'not in registry' }]; + + const result = buildStructuredTools(toolDefs, {}, mockLogger); + + expect(result).toHaveLength(0); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining("Tool 'missing'"), + ); + }); + + it('skips non-function built-in tools and logs a warning', () => { + const toolDefs = [{ type: 'code_interpreter', name: 'ci' }]; + + const result = buildStructuredTools(toolDefs, { ci: jest.fn() }, mockLogger); + + expect(result).toHaveLength(0); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining("Built-in tool 'code_interpreter'"), + ); + }); + + it('handles function-style tool definitions with nested function.name', () => { + const toolDefs = [ + { type: 'function', function: { name: 'search', description: 'searches' } }, + ]; + const registry = { search: jest.fn() }; + + const result = buildStructuredTools(toolDefs, registry, mockLogger); + + expect(result).toHaveLength(1); + expect(result[0].name).toBe('search'); + }); + + it('uses a default description when none is provided', () => { + const toolDefs = [{ name: 'mytool' }]; + const registry = { mytool: jest.fn() }; + + const result = buildStructuredTools(toolDefs, registry); + + expect(result[0].description).toBe('Tool mytool'); + }); +}); + +it('extracts tool call names from AIMessages with tool_calls', () => { + const msg1 = new AIMessage(''); + msg1.tool_calls = [ + { id: 'c1', name: 'lookup', args: {} }, + { id: 'c2', name: 'search', args: {} }, + ]; + const msg2 = new AIMessage('done'); + + expect(extractToolCalls([msg1, msg2])).toEqual(['lookup', 'search']); +}); + +it('returns an empty array when no tool calls are present', () => { + expect(extractToolCalls([new AIMessage('done')])).toEqual([]); +}); + +it('handles empty messages for extractToolCalls', () => { + expect(extractToolCalls([])).toEqual([]); +}); + +it('extracts string content from the last message', () => { + expect( + extractLastMessageContent([new HumanMessage('hi'), new AIMessage('hello')]), + ).toBe('hello'); +}); + +it('returns empty string for empty array', () => { + expect(extractLastMessageContent([])).toBe(''); +}); + +it('returns empty string when last message content is not a string', () => { + const msg = new AIMessage({ content: [{ type: 'text', text: 'hi' }] }); + expect(extractLastMessageContent([msg])).toBe(''); +}); + +it('sums usage across multiple messages', () => { + const m1 = new AIMessage(''); + m1.usage_metadata = { total_tokens: 10, input_tokens: 6, output_tokens: 4 }; + const m2 = new AIMessage('done'); + m2.usage_metadata = { total_tokens: 8, input_tokens: 3, output_tokens: 5 }; + const toolMsg = new ToolMessage({ tool_call_id: 'x', content: 'res' }); + + expect(sumTokenUsageFromMessages([m1, toolMsg, m2])).toEqual({ + total: 18, + input: 9, + output: 9, + }); +}); + +it('returns undefined when no messages have usage', () => { + expect(sumTokenUsageFromMessages([new AIMessage('hi')])).toBeUndefined(); +}); diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts new file mode 100644 index 0000000000..ea1429380b --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts @@ -0,0 +1,136 @@ +import { AIMessage } from '@langchain/core/messages'; + +import type { LDAICompletionConfig, LDMessage } from '@launchdarkly/server-sdk-ai'; + +import { LangChainModelRunner } from '../src/LangChainModelRunner'; + +const mockLogger = { + warn: jest.fn(), + info: jest.fn(), + error: jest.fn(), + debug: jest.fn(), +}; + +const baseConfig: LDAICompletionConfig = { + key: 'completion', + enabled: true, + model: { name: 'fake' }, +}; + +describe('LangChainModelRunner', () => { + let mockLLM: any; + let runner: LangChainModelRunner; + + beforeEach(() => { + mockLLM = { + invoke: jest.fn(), + withStructuredOutput: jest.fn(), + }; + runner = new LangChainModelRunner(mockLLM, baseConfig, mockLogger); + jest.clearAllMocks(); + }); + + it('returns a successful RunnerResult with content, metrics, and raw response', async () => { + const response = new AIMessage('hello'); + response.usage_metadata = { total_tokens: 12, input_tokens: 7, output_tokens: 5 }; + mockLLM.invoke.mockResolvedValue(response); + + const result = await runner.run('hi'); + + expect(result.content).toBe('hello'); + expect(result.metrics).toEqual({ + success: true, + usage: { total: 12, input: 7, output: 5 }, + }); + expect(result.raw).toBe(response); + }); + + it('prepends config messages before the user prompt', async () => { + const response = new AIMessage('reply'); + mockLLM.invoke.mockResolvedValue(response); + + const configWithMessages: LDAICompletionConfig = { + ...baseConfig, + messages: [{ role: 'system', content: 'You are X' }], + }; + const r = new LangChainModelRunner(mockLLM, configWithMessages, mockLogger); + await r.run('hi'); + + const passed = mockLLM.invoke.mock.calls[0][0]; + expect(passed).toHaveLength(2); + expect(passed[0].content).toBe('You are X'); + expect(passed[1].content).toBe('hi'); + }); + + it('uses a LDMessage[] as-is without prepending config messages', async () => { + const response = new AIMessage('direct reply'); + mockLLM.invoke.mockResolvedValue(response); + + const configWithMessages: LDAICompletionConfig = { + ...baseConfig, + messages: [{ role: 'system', content: 'You are X' }], + }; + const r = new LangChainModelRunner(mockLLM, configWithMessages, mockLogger); + const inputMessages: LDMessage[] = [ + { role: 'user', content: 'direct question' }, + ]; + await r.run(inputMessages); + + const passed = mockLLM.invoke.mock.calls[0][0]; + expect(passed).toHaveLength(1); + expect(passed[0].content).toBe('direct question'); + }); + + it('marks success=false and warns when content is non-string (multimodal)', async () => { + mockLLM.invoke.mockResolvedValue(new AIMessage([{ type: 'image' }] as any)); + + const result = await runner.run('hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(false); + expect(mockLogger.warn).toHaveBeenCalled(); + }); + + it('returns success=false when invoke throws', async () => { + const err = new Error('boom'); + mockLLM.invoke.mockRejectedValue(err); + + const result = await runner.run('hi'); + + expect(result.content).toBe(''); + expect(result.metrics.success).toBe(false); + expect(mockLogger.warn).toHaveBeenCalledWith('LangChain model invocation failed:', err); + }); + + it('exposes parsed structured output via parsed', async () => { + const data = { name: 'Ada', age: 36 }; + const invoke = jest.fn().mockResolvedValue(data); + mockLLM.withStructuredOutput.mockReturnValue({ invoke }); + + const result = await runner.run('hi', { type: 'object' }); + + expect(result.parsed).toEqual(data); + expect(result.content).toBe(JSON.stringify(data)); + expect(result.metrics.success).toBe(true); + }); + + it('returns success=false when structured invoke throws', async () => { + const err = new Error('struct boom'); + const invoke = jest.fn().mockRejectedValue(err); + mockLLM.withStructuredOutput.mockReturnValue({ invoke }); + + const result = await runner.run('hi', { type: 'object' }); + + expect(result.content).toBe(''); + expect(result.parsed).toBeUndefined(); + expect(result.metrics.success).toBe(false); + expect(mockLogger.warn).toHaveBeenCalledWith( + 'LangChain structured model invocation failed:', + err, + ); + }); + + it('returns the underlying chat model', () => { + expect(runner.getChatModel()).toBe(mockLLM); + }); +}); diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainProvider.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainProvider.test.ts deleted file mode 100644 index c2f284a6a5..0000000000 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainProvider.test.ts +++ /dev/null @@ -1,231 +0,0 @@ -import { AIMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; - -import { LangChainProvider } from '../src/LangChainProvider'; - -// Mock LangChain dependencies -jest.mock('langchain/chat_models/universal', () => ({ - initChatModel: jest.fn(), -})); - -// Mock logger -const mockLogger = { - warn: jest.fn(), - info: jest.fn(), - error: jest.fn(), - debug: jest.fn(), -}; - -describe('LangChainProvider', () => { - describe('convertMessagesToLangChain', () => { - it('converts system messages to SystemMessage', () => { - const messages = [{ role: 'system' as const, content: 'You are a helpful assistant.' }]; - const result = LangChainProvider.convertMessagesToLangChain(messages); - - expect(result).toHaveLength(1); - expect(result[0]).toBeInstanceOf(SystemMessage); - expect(result[0].content).toBe('You are a helpful assistant.'); - }); - - it('converts user messages to HumanMessage', () => { - const messages = [{ role: 'user' as const, content: 'Hello, how are you?' }]; - const result = LangChainProvider.convertMessagesToLangChain(messages); - - expect(result).toHaveLength(1); - expect(result[0]).toBeInstanceOf(HumanMessage); - expect(result[0].content).toBe('Hello, how are you?'); - }); - - it('converts assistant messages to AIMessage', () => { - const messages = [{ role: 'assistant' as const, content: 'I am doing well, thank you!' }]; - const result = LangChainProvider.convertMessagesToLangChain(messages); - - expect(result).toHaveLength(1); - expect(result[0]).toBeInstanceOf(AIMessage); - expect(result[0].content).toBe('I am doing well, thank you!'); - }); - - it('converts multiple messages in order', () => { - const messages = [ - { role: 'system' as const, content: 'You are a helpful assistant.' }, - { role: 'user' as const, content: 'What is the weather like?' }, - { role: 'assistant' as const, content: 'I cannot check the weather.' }, - ]; - const result = LangChainProvider.convertMessagesToLangChain(messages); - - expect(result).toHaveLength(3); - expect(result[0]).toBeInstanceOf(SystemMessage); - expect(result[1]).toBeInstanceOf(HumanMessage); - expect(result[2]).toBeInstanceOf(AIMessage); - }); - - it('throws error for unsupported message role', () => { - const messages = [{ role: 'unknown' as any, content: 'Test message' }]; - - expect(() => LangChainProvider.convertMessagesToLangChain(messages)).toThrow( - 'Unsupported message role: unknown', - ); - }); - - it('handles empty message array', () => { - const result = LangChainProvider.convertMessagesToLangChain([]); - - expect(result).toHaveLength(0); - }); - }); - - describe('getAIMetricsFromResponse', () => { - it('creates metrics with success=true and token usage', () => { - const mockResponse = new AIMessage('Test response'); - mockResponse.usage_metadata = { - total_tokens: 100, - input_tokens: 50, - output_tokens: 50, - }; - - const result = LangChainProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: { - total: 100, - input: 50, - output: 50, - }, - }); - }); - - it('creates metrics with success=true and no usage when metadata is missing', () => { - const mockResponse = new AIMessage('Test response'); - - const result = LangChainProvider.getAIMetricsFromResponse(mockResponse); - - expect(result).toEqual({ - success: true, - usage: undefined, - }); - }); - }); - - describe('invokeModel', () => { - let mockLLM: any; - let provider: LangChainProvider; - - beforeEach(() => { - mockLLM = { - invoke: jest.fn(), - }; - provider = new LangChainProvider(mockLLM, mockLogger); - jest.clearAllMocks(); - }); - - it('returns success=true for string content', async () => { - const mockResponse = new AIMessage('Test response'); - mockLLM.invoke.mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Hello' }]; - const result = await provider.invokeModel(messages); - - expect(result.metrics.success).toBe(true); - expect(result.message.content).toBe('Test response'); - expect(mockLogger.warn).not.toHaveBeenCalled(); - }); - - it('returns success=false for non-string content and logs warning', async () => { - const mockResponse = new AIMessage({ type: 'image', data: 'base64data' } as any); - mockLLM.invoke.mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Hello' }]; - const result = await provider.invokeModel(messages); - - expect(result.metrics.success).toBe(false); - expect(result.message.content).toBe(''); - expect(mockLogger.warn).toHaveBeenCalledTimes(1); - }); - - it('returns success=false for array content and logs warning', async () => { - const mockResponse = new AIMessage(['text', { type: 'image', data: 'base64data' }] as any); - mockLLM.invoke.mockResolvedValue(mockResponse); - - const messages = [{ role: 'user' as const, content: 'Hello' }]; - const result = await provider.invokeModel(messages); - - expect(result.metrics.success).toBe(false); - expect(result.message.content).toBe(''); - expect(mockLogger.warn).toHaveBeenCalledTimes(1); - }); - - it('returns success=false when model invocation throws an error', async () => { - const error = new Error('Model invocation failed'); - mockLLM.invoke.mockRejectedValue(error); - - const messages = [{ role: 'user' as const, content: 'Hello' }]; - const result = await provider.invokeModel(messages); - - expect(result.metrics.success).toBe(false); - expect(result.message.content).toBe(''); - expect(result.message.role).toBe('assistant'); - expect(mockLogger.warn).toHaveBeenCalledWith('LangChain model invocation failed:', error); - }); - }); - - describe('invokeStructuredModel', () => { - let mockLLM: any; - let provider: LangChainProvider; - - beforeEach(() => { - mockLLM = { - withStructuredOutput: jest.fn(), - }; - provider = new LangChainProvider(mockLLM, mockLogger); - jest.clearAllMocks(); - }); - - it('returns success=true for successful invocation', async () => { - const mockResponse = { result: 'structured data' }; - const mockInvoke = jest.fn().mockResolvedValue(mockResponse); - mockLLM.withStructuredOutput.mockReturnValue({ invoke: mockInvoke }); - - const messages = [{ role: 'user' as const, content: 'Hello' }]; - const responseStructure = { type: 'object', properties: {} }; - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(result.metrics.success).toBe(true); - expect(result.data).toEqual(mockResponse); - expect(result.rawResponse).toBe(JSON.stringify(mockResponse)); - expect(mockLogger.warn).not.toHaveBeenCalled(); - }); - - it('returns success=false when structured model invocation throws an error', async () => { - const error = new Error('Structured invocation failed'); - const mockInvoke = jest.fn().mockRejectedValue(error); - mockLLM.withStructuredOutput.mockReturnValue({ invoke: mockInvoke }); - - const messages = [{ role: 'user' as const, content: 'Hello' }]; - const responseStructure = { type: 'object', properties: {} }; - const result = await provider.invokeStructuredModel(messages, responseStructure); - - expect(result.metrics.success).toBe(false); - expect(result.data).toEqual({}); - expect(result.rawResponse).toBe(''); - expect(result.metrics.usage).toEqual({ total: 0, input: 0, output: 0 }); - expect(mockLogger.warn).toHaveBeenCalledWith( - 'LangChain structured model invocation failed:', - error, - ); - }); - }); - - describe('mapProvider', () => { - it('maps gemini to google-genai', () => { - expect(LangChainProvider.mapProvider('gemini')).toBe('google-genai'); - expect(LangChainProvider.mapProvider('Gemini')).toBe('google-genai'); - expect(LangChainProvider.mapProvider('GEMINI')).toBe('google-genai'); - }); - - it('returns provider name unchanged for unmapped providers', () => { - expect(LangChainProvider.mapProvider('openai')).toBe('openai'); - expect(LangChainProvider.mapProvider('anthropic')).toBe('anthropic'); - expect(LangChainProvider.mapProvider('unknown')).toBe('unknown'); - }); - }); -}); diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts new file mode 100644 index 0000000000..8670b9a291 --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts @@ -0,0 +1,110 @@ +import { createAgent } from 'langchain'; +import { initChatModel } from 'langchain/chat_models/universal'; + +import type { LDAIAgentConfig, LDAICompletionConfig } from '@launchdarkly/server-sdk-ai'; + +import { LangChainAgentRunner } from '../src/LangChainAgentRunner'; +import { LangChainModelRunner } from '../src/LangChainModelRunner'; +import { LangChainRunnerFactory } from '../src/LangChainRunnerFactory'; + +jest.mock('langchain/chat_models/universal', () => ({ + initChatModel: jest.fn(), +})); + +jest.mock('langchain', () => ({ + createAgent: jest.fn(), +})); + +const mockInitChatModel = initChatModel as jest.MockedFunction; +const mockCreateAgent = createAgent as jest.MockedFunction; + +describe('LangChainRunnerFactory', () => { + let factory: LangChainRunnerFactory; + const fakeLLM = { invoke: jest.fn(), bindTools: jest.fn() }; + const fakeCompiledAgent = { invoke: jest.fn() }; + + beforeEach(() => { + factory = new LangChainRunnerFactory(); + mockInitChatModel.mockReset(); + mockCreateAgent.mockReset(); + mockInitChatModel.mockResolvedValue(fakeLLM as any); + mockCreateAgent.mockReturnValue(fakeCompiledAgent as any); + }); + + it('builds a LangChainModelRunner with model and parameters from the config', async () => { + const config: LDAICompletionConfig = { + key: 'completion', + enabled: true, + provider: { name: 'openai' }, + model: { name: 'gpt-4o', parameters: { temperature: 0.5 } }, + }; + + const runner = await factory.createModel(config); + + expect(mockInitChatModel).toHaveBeenCalledWith('gpt-4o', { + temperature: 0.5, + modelProvider: 'openai', + }); + expect(runner).toBeInstanceOf(LangChainModelRunner); + }); + + it('maps gemini provider to google-genai', async () => { + await factory.createModel({ + key: 'completion', + enabled: true, + provider: { name: 'gemini' }, + model: { name: 'gemini-2.0' }, + }); + + expect(mockInitChatModel).toHaveBeenCalledWith('gemini-2.0', { + modelProvider: 'google-genai', + }); + }); + + it('strips tools from parameters and passes them to createAgent', async () => { + const tools = [{ name: 'lookup', description: 'looks up a value' }]; + const config: LDAIAgentConfig = { + key: 'agent', + enabled: true, + provider: { name: 'openai' }, + model: { name: 'gpt-4o', parameters: { temperature: 0.7, tools } }, + instructions: 'be helpful', + }; + + const runner = await factory.createAgent(config, { lookup: () => 'ok' }); + + expect(mockInitChatModel).toHaveBeenCalledWith('gpt-4o', { + temperature: 0.7, + modelProvider: 'openai', + }); + expect(mockCreateAgent).toHaveBeenCalledWith( + expect.objectContaining({ + model: fakeLLM, + systemPrompt: 'be helpful', + }), + ); + expect(mockCreateAgent.mock.calls[0][0].tools).toHaveLength(1); + expect(runner).toBeInstanceOf(LangChainAgentRunner); + }); + + it('passes undefined tools to createAgent when no tool definitions exist', async () => { + const config: LDAIAgentConfig = { + key: 'agent', + enabled: true, + provider: { name: 'openai' }, + model: { name: 'gpt-4o' }, + instructions: '', + }; + + await factory.createAgent(config); + + expect(mockCreateAgent).toHaveBeenCalledWith( + expect.objectContaining({ + model: fakeLLM, + tools: undefined, + systemPrompt: undefined, + }), + ); + }); + +}); diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts b/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts new file mode 100644 index 0000000000..fddcf8dc33 --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts @@ -0,0 +1,84 @@ +import type { BaseMessage } from '@langchain/core/messages'; + +import type { LDAIMetrics, LDLogger, RunnerResult, Runner } from '@launchdarkly/server-sdk-ai'; + +import { + extractLastMessageContent, + extractToolCalls, + sumTokenUsageFromMessages, +} from './LangChainHelper'; + +/** + * Tool registry mapping tool names to their callable implementations. + */ +export type ToolRegistry = Record unknown>; + +/** + * Minimal interface for a compiled LangChain agent (from `createAgent`). + * The agent handles the tool-calling loop internally. + */ +export interface CompiledAgent { + invoke(input: Record): Promise<{ messages: BaseMessage[] }>; +} + +/** + * Runner implementation for a single LangChain agent. + * + * Wraps a compiled LangChain agent graph (from `langchain`'s `createAgent`) + * and delegates execution to it. Tool calling and loop management are handled + * internally by the graph, matching the Python SDK's approach. + * + * Returned by {@link LangChainRunnerFactory.createAgent}. + */ +export class LangChainAgentRunner implements Runner { + private _agent: CompiledAgent; + private _logger?: LDLogger; + + constructor(agent: CompiledAgent, logger?: LDLogger) { + this._agent = agent; + this._logger = logger; + } + + /** + * Run the agent with the given prompt. + * + * Delegates to the compiled LangChain agent, which handles the + * tool-calling loop internally. + * + * @param input The user prompt to send to the agent. + * @param _outputType Reserved for future structured output support; currently + * ignored by the agent runner. + */ + async run(input: string, _outputType?: Record): Promise { + try { + const result = await this._agent.invoke({ + messages: [{ role: 'user', content: input }], + }); + + const messages = result.messages ?? []; + const content = extractLastMessageContent(messages); + const toolCalls = extractToolCalls(messages); + + const metrics: LDAIMetrics = { + success: true, + usage: sumTokenUsageFromMessages(messages), + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + }; + + return { content, metrics, raw: result }; + } catch (error) { + this._logger?.warn('LangChain agent run failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + } + + /** + * Return the underlying compiled LangChain agent. + */ + getAgent(): CompiledAgent { + return this._agent; + } +} diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts b/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts new file mode 100644 index 0000000000..6072bc8686 --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts @@ -0,0 +1,216 @@ +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage, BaseMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; +import { tool } from '@langchain/core/tools'; +import type { StructuredToolInterface } from '@langchain/core/tools'; +import { initChatModel } from 'langchain/chat_models/universal'; + +import type { + LDAIConfig, + LDAIMetrics, + LDLogger, + LDMessage, + LDTokenUsage, +} from '@launchdarkly/server-sdk-ai'; + +import type { ToolRegistry } from './LangChainAgentRunner'; + +/** + * Convert LaunchDarkly messages to LangChain message instances. + */ +export function convertMessagesToLangChain( + messages: LDMessage[], +): (HumanMessage | SystemMessage | AIMessage)[] { + return messages.map((msg) => { + switch (msg.role) { + case 'system': + return new SystemMessage(msg.content); + case 'user': + return new HumanMessage(msg.content); + case 'assistant': + return new AIMessage(msg.content); + default: + throw new Error(`Unsupported message role: ${msg.role}`); + } + }); +} + +/** + * Create a LangChain chat model from a LaunchDarkly AI configuration. + */ +export async function createLangChainModel(aiConfig: LDAIConfig): Promise { + const modelName = aiConfig.model?.name || ''; + const provider = aiConfig.provider?.name || ''; + const parameters = { ...(aiConfig.model?.parameters || {}) }; + delete parameters.tools; + + return initChatModel(modelName, { + ...parameters, + modelProvider: mapProviderName(provider), + }); +} + +/** + * Map LaunchDarkly provider names to LangChain `modelProvider` strings. + */ +export function mapProviderName(ldProviderName: string): string { + const lowercasedName = ldProviderName.toLowerCase(); + const mapping: Record = { + gemini: 'google-genai', + }; + return mapping[lowercasedName] || lowercasedName; +} + +/** + * Extract token usage from a LangChain AIMessage response. + */ +export function getAIUsageFromResponse(response: AIMessage): LDTokenUsage | undefined { + if (!response?.usage_metadata) { + return undefined; + } + return { + total: response.usage_metadata.total_tokens, + input: response.usage_metadata.input_tokens, + output: response.usage_metadata.output_tokens, + }; +} + +/** + * Get AI metrics from a LangChain provider response. + */ +export function getAIMetricsFromResponse(response: AIMessage): LDAIMetrics { + return { + success: true, + usage: getAIUsageFromResponse(response), + }; +} + +/** + * Extract JSON Schema from an LD tool definition's parameters. + * Falls back to an open object schema when no parameters are defined. + */ +function _getInputSchema(toolDef: Record): Record { + const params = toolDef.function?.parameters ?? toolDef.parameters; + if (params && typeof params === 'object' && params.properties) { + return params; + } + return { type: 'object', properties: {}, additionalProperties: true }; +} + +/** + * Build LangChain StructuredTool instances from LD tool definitions + * and a ToolRegistry. Tools missing from the registry are skipped with a + * warning. Non-function built-in tools are also skipped. + */ +export function buildStructuredTools( + toolDefinitions: any[], + tools: ToolRegistry, + logger?: LDLogger, +): StructuredToolInterface[] { + const result: StructuredToolInterface[] = []; + + for (const td of toolDefinitions) { + if (typeof td !== 'object' || td === null) { + continue; + } + + const toolType: string | undefined = td.type; + if (toolType && toolType !== 'function') { + logger?.warn( + `Built-in tool '${toolType}' is not reliably supported via LangChain and will be skipped. ` + + 'Use a provider-specific runner to use built-in provider tools.', + ); + continue; + } + + const name: string | undefined = td.name ?? td.function?.name; + if (!name) { + continue; + } + + const fn = tools[name]; + if (!fn) { + logger?.warn( + `Tool '${name}' is defined in the AI config but was not found in ` + + `the tool registry; skipping.`, + ); + continue; + } + + const rawDesc: string = + (typeof td.description === 'string' ? td.description : '') || + (typeof td.function?.description === 'string' ? td.function.description : ''); + const description = rawDesc.trim() || `Tool ${name}`; + + result.push( + tool( + async (args: any) => { + const res = await fn(args ?? {}); + return typeof res === 'string' ? res : JSON.stringify(res); + }, + { + name, + description, + schema: _getInputSchema(td) as any, + }, + ), + ); + } + + return result; +} + +/** + * Extract tool-call names from a LangChain agent message list. + */ +export function extractToolCalls(messages: BaseMessage[]): string[] { + const toolCalls: string[] = []; + for (const msg of messages ?? []) { + const msgToolCalls = (msg as AIMessage).tool_calls; + if (!msgToolCalls) { + continue; + } + for (const tc of msgToolCalls) { + if (tc.name) { + toolCalls.push(tc.name); + } + } + } + return toolCalls; +} + +/** + * Extract the string content of the last message in a list. + */ +export function extractLastMessageContent(messages: BaseMessage[]): string { + if (messages && messages.length > 0) { + const last = messages[messages.length - 1]; + if (typeof last.content === 'string') { + return last.content; + } + } + return ''; +} + +/** + * Sum token usage across all messages in a LangChain agent result. + */ +export function sumTokenUsageFromMessages(messages: BaseMessage[]): LDTokenUsage | undefined { + let inputSum = 0; + let outputSum = 0; + let totalSum = 0; + + for (const m of messages) { + const usage = getAIUsageFromResponse(m as AIMessage); + if (!usage) { + continue; + } + inputSum += usage.input; + outputSum += usage.output; + totalSum += usage.total; + } + + if (inputSum === 0 && outputSum === 0 && totalSum === 0) { + return undefined; + } + return { total: totalSum, input: inputSum, output: outputSum }; +} diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts new file mode 100644 index 0000000000..ec4237269f --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts @@ -0,0 +1,118 @@ +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage } from '@langchain/core/messages'; + +import type { + LDAICompletionConfig, + LDLogger, + LDMessage, + Runner, + RunnerResult, +} from '@launchdarkly/server-sdk-ai'; + +import { convertMessagesToLangChain, getAIMetricsFromResponse } from './LangChainHelper'; + +/** + * Runner implementation for LangChain chat models. + * + * Implements the unified `Runner` protocol via {@link run}. Returned by + * {@link LangChainRunnerFactory.createModel}. + */ +export class LangChainModelRunner implements Runner { + private _llm: BaseChatModel; + private _config: LDAICompletionConfig; + private _logger?: LDLogger; + + constructor(llm: BaseChatModel, config: LDAICompletionConfig, logger?: LDLogger) { + this._llm = llm; + this._config = config; + this._logger = logger; + } + + /** + * Run the LangChain model with the given prompt. + * + * @param input The user prompt string or a pre-built message array to send to the model. + * When a string is provided, config messages are prepended before the user prompt. + * When an {@link LDMessage} array is provided, it is used as-is (config messages are + * not prepended). + * @param outputType Optional JSON schema for structured output. When provided, + * the parsed result is exposed via {@link RunnerResult.parsed}. + */ + async run(input: string | LDMessage[], outputType?: Record): Promise { + const messages: LDMessage[] = Array.isArray(input) + ? input + : [...(this._config.messages ?? []), { role: 'user', content: input }]; + + if (outputType !== undefined) { + return this._runStructured(messages, outputType); + } + return this._runCompletion(messages); + } + + /** + * Get the underlying LangChain model instance. + */ + getChatModel(): BaseChatModel { + return this._llm; + } + + private async _runCompletion(messages: LDMessage[]): Promise { + try { + const langchainMessages = convertMessagesToLangChain(messages); + const response: AIMessage = await this._llm.invoke(langchainMessages); + const metrics = getAIMetricsFromResponse(response); + + let content: string = ''; + if (typeof response.content === 'string') { + content = response.content; + } else { + this._logger?.warn( + `Multimodal response not supported, expecting a string. Content type: ${typeof response.content}, Content:`, + JSON.stringify(response.content, null, 2), + ); + metrics.success = false; + } + + return { content, metrics, raw: response }; + } catch (error) { + this._logger?.warn('LangChain model invocation failed:', error); + return { + content: '', + metrics: { success: false }, + }; + } + } + + private async _runStructured( + messages: LDMessage[], + outputType: Record, + ): Promise { + try { + const langchainMessages = convertMessagesToLangChain(messages); + const response = (await this._llm + .withStructuredOutput(outputType) + .invoke(langchainMessages)) as Record; + + const metrics = { + success: true, + usage: { total: 0, input: 0, output: 0 }, + }; + + return { + content: JSON.stringify(response), + metrics, + raw: response, + parsed: response, + }; + } catch (error) { + this._logger?.warn('LangChain structured model invocation failed:', error); + return { + content: '', + metrics: { + success: false, + usage: { total: 0, input: 0, output: 0 }, + }, + }; + } + } +} diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainProvider.ts b/packages/ai-providers/server-ai-langchain/src/LangChainProvider.ts deleted file mode 100644 index c8ff337ff4..0000000000 --- a/packages/ai-providers/server-ai-langchain/src/LangChainProvider.ts +++ /dev/null @@ -1,285 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage, HumanMessage, SystemMessage } from '@langchain/core/messages'; -import { initChatModel } from 'langchain/chat_models/universal'; - -import { AIProvider } from '@launchdarkly/server-sdk-ai'; -import type { - ChatResponse, - LDAIConfig, - LDAIMetrics, - LDLogger, - LDMessage, - LDTokenUsage, - StructuredResponse, -} from '@launchdarkly/server-sdk-ai'; - -let instrumentPromise: Promise | undefined; - -/** - * LangChain implementation of AIProvider. - * This provider integrates LangChain models with LaunchDarkly's tracking capabilities. - */ -export class LangChainProvider extends AIProvider { - private _llm: BaseChatModel; - - constructor(llm: BaseChatModel, logger?: LDLogger) { - super(logger); - this._llm = llm; - } - - /** - * Static factory method to create a LangChain AIProvider from an AI configuration. - */ - static async create(aiConfig: LDAIConfig, logger?: LDLogger): Promise { - // eslint-disable-next-line no-underscore-dangle - await LangChainProvider._ensureInstrumented(logger); - - const llm = await LangChainProvider.createLangChainModel(aiConfig); - return new LangChainProvider(llm, logger); - } - - /** - * Automatically patches the ESM LangChain module for OpenTelemetry tracing when - * a TracerProvider is active and @traceloop/instrumentation-langchain is installed. - * - * OpenTelemetry instrumentations auto-patch CJS require() calls, but this - * provider loads LangChain via ESM import, which bypasses those hooks. This - * method bridges that gap by calling manuallyInstrument() on the ESM module. - */ - private static async _ensureInstrumented(logger?: LDLogger): Promise { - if (instrumentPromise !== undefined) { - return instrumentPromise; - } - - instrumentPromise = (async () => { - try { - const { LangChainInstrumentation } = await import('@traceloop/instrumentation-langchain'); - const callbackManagerModule = await import('@langchain/core/callbacks/manager'); - const instrumentation = new LangChainInstrumentation(); - instrumentation.manuallyInstrument({ callbackManagerModule }); - logger?.info('LangChain ESM module instrumented for OpenTelemetry tracing.'); - } catch { - logger?.debug( - 'OpenTelemetry instrumentation not available for LangChain provider. ' + - 'Install @traceloop/instrumentation-langchain to enable automatic tracing.', - ); - } - })(); - - return instrumentPromise; - } - - /** - * Invoke the LangChain model with an array of messages. - */ - async invokeModel(messages: LDMessage[]): Promise { - try { - // Convert LDMessage[] to LangChain messages - const langchainMessages = LangChainProvider.convertMessagesToLangChain(messages); - - // Get the LangChain response - const response: AIMessage = await this._llm.invoke(langchainMessages); - - // Generate metrics early (assumes success by default) - const metrics = LangChainProvider.getAIMetricsFromResponse(response); - - // Extract text content from the response - let content: string = ''; - if (typeof response.content === 'string') { - content = response.content; - } else { - // Log warning for non-string content (likely multimodal) - this.logger?.warn( - `Multimodal response not supported, expecting a string. Content type: ${typeof response.content}, Content:`, - JSON.stringify(response.content, null, 2), - ); - // Update metrics to reflect content loss - metrics.success = false; - } - - // Create the assistant message - const assistantMessage: LDMessage = { - role: 'assistant', - content, - }; - - return { - message: assistantMessage, - metrics, - }; - } catch (error) { - this.logger?.warn('LangChain model invocation failed:', error); - - return { - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - }, - }; - } - } - - /** - * Invoke the LangChain model with structured output support. - */ - async invokeStructuredModel( - messages: LDMessage[], - responseStructure: Record, - ): Promise { - try { - // Convert LDMessage[] to LangChain messages - const langchainMessages = LangChainProvider.convertMessagesToLangChain(messages); - - // Get the LangChain response - const response = await this._llm - .withStructuredOutput(responseStructure) - .invoke(langchainMessages); - - // Using structured output doesn't support metrics - const metrics = { - success: true, - usage: { - total: 0, - input: 0, - output: 0, - }, - }; - - return { - data: response, - rawResponse: JSON.stringify(response), - metrics, - }; - } catch (error) { - this.logger?.warn('LangChain structured model invocation failed:', error); - - return { - data: {}, - rawResponse: '', - metrics: { - success: false, - usage: { - total: 0, - input: 0, - output: 0, - }, - }, - }; - } - } - - /** - * Get the underlying LangChain model instance. - */ - getChatModel(): BaseChatModel { - return this._llm; - } - - /** - * Map LaunchDarkly provider names to LangChain provider names. - * This method enables seamless integration between LaunchDarkly's standardized - * provider naming and LangChain's naming conventions. - */ - static mapProvider(ldProviderName: string): string { - const lowercasedName = ldProviderName.toLowerCase(); - - const mapping: Record = { - gemini: 'google-genai', - }; - - return mapping[lowercasedName] || lowercasedName; - } - - /** - * Get AI metrics from a LangChain provider response. - * This method extracts token usage information and success status from LangChain responses - * and returns a LaunchDarkly AIMetrics object. - * - * @param response The response from the LangChain model - * @returns LDAIMetrics with success status and token usage - * - * @example - * ```typescript - * // Use with tracker.trackMetricsOf for automatic tracking - * const response = await tracker.trackMetricsOf( - * LangChainProvider.getAIMetricsFromResponse, - * () => llm.invoke(messages) - * ); - * ``` - */ - static getAIMetricsFromResponse(response: AIMessage): LDAIMetrics { - // Extract token usage if available - let usage: LDTokenUsage | undefined; - if (response?.usage_metadata) { - usage = { - total: response.usage_metadata.total_tokens, - input: response.usage_metadata.input_tokens, - output: response.usage_metadata.output_tokens, - }; - } - - // LangChain responses that complete successfully are considered successful by default - return { - success: true, - usage, - }; - } - - /** - * Create AI metrics information from a LangChain provider response. - * This method extracts token usage information and success status from LangChain responses - * and returns a LaunchDarkly AIMetrics object. - * - * @deprecated Use `getAIMetricsFromResponse()` instead. - * @param langChainResponse The response from the LangChain model - * @returns LDAIMetrics with success status and token usage - */ - static createAIMetrics(langChainResponse: AIMessage): LDAIMetrics { - return LangChainProvider.getAIMetricsFromResponse(langChainResponse); - } - - /** - * Convert LaunchDarkly messages to LangChain messages. - * This helper method enables developers to work directly with LangChain message types - * while maintaining compatibility with LaunchDarkly's standardized message format. - */ - static convertMessagesToLangChain( - messages: LDMessage[], - ): (HumanMessage | SystemMessage | AIMessage)[] { - return messages.map((msg) => { - switch (msg.role) { - case 'system': - return new SystemMessage(msg.content); - case 'user': - return new HumanMessage(msg.content); - case 'assistant': - return new AIMessage(msg.content); - default: - throw new Error(`Unsupported message role: ${msg.role}`); - } - }); - } - - /** - * Create a LangChain model from an AI configuration. - * This public helper method enables developers to initialize their own LangChain models - * using LaunchDarkly AI configurations. - * - * @param aiConfig The LaunchDarkly AI configuration - * @returns A Promise that resolves to a configured LangChain BaseChatModel - */ - static async createLangChainModel(aiConfig: LDAIConfig): Promise { - const modelName = aiConfig.model?.name || ''; - const provider = aiConfig.provider?.name || ''; - const parameters = aiConfig.model?.parameters || {}; - - // Use LangChain's universal initChatModel to support multiple providers - return initChatModel(modelName, { - ...parameters, - modelProvider: LangChainProvider.mapProvider(provider), - }); - } -} diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts b/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts new file mode 100644 index 0000000000..ef84252f09 --- /dev/null +++ b/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts @@ -0,0 +1,92 @@ +import { createAgent } from 'langchain'; + +import { AIProvider } from '@launchdarkly/server-sdk-ai'; +import type { + LDAIAgentConfig, + LDAICompletionConfig, + LDAIConfig, + LDLogger, +} from '@launchdarkly/server-sdk-ai'; + +import { LangChainAgentRunner, ToolRegistry } from './LangChainAgentRunner'; +import { buildStructuredTools, createLangChainModel } from './LangChainHelper'; +import { LangChainModelRunner } from './LangChainModelRunner'; + +let instrumentPromise: Promise | undefined; + +/** + * Factory for creating LangChain runners (chat completion and agent). + */ +export class LangChainRunnerFactory extends AIProvider { + constructor(logger?: LDLogger) { + super(logger); + LangChainRunnerFactory._ensureInstrumented(logger).catch(() => {}); + } + + /** + * Create a model runner from a completion AI configuration. + */ + async createModel(config: LDAICompletionConfig): Promise { + const llm = await createLangChainModel(config); + return new LangChainModelRunner(llm, config, this.logger); + } + + /** + * Create an agent runner from an agent AI configuration. + * + * Uses LangChain's `createAgent` to build a compiled agent graph that + * handles the tool-calling loop internally. Tool definitions are sourced + * from `config.model.parameters.tools` and matched against the supplied + * `tools` registry. + */ + async createAgent(config: LDAIAgentConfig, tools?: ToolRegistry): Promise { + const parameters = { ...(config.model?.parameters || {}) }; + const toolDefinitions = (parameters.tools as any[] | undefined) ?? []; + delete parameters.tools; + + const configForModel: LDAIConfig = { + ...config, + model: { ...(config.model ?? { name: '' }), parameters }, + }; + const llm = await createLangChainModel(configForModel); + + const lcTools = buildStructuredTools(toolDefinitions, tools ?? {}, this.logger); + const instructions = config.instructions ?? ''; + + const agent = createAgent({ + model: llm, + tools: lcTools.length > 0 ? lcTools : undefined, + systemPrompt: instructions || undefined, + }); + + return new LangChainAgentRunner(agent as any, this.logger); + } + + /** + * Automatically patches the ESM LangChain module for OpenTelemetry tracing + * when a TracerProvider is active and @traceloop/instrumentation-langchain + * is installed. + */ + private static async _ensureInstrumented(logger?: LDLogger): Promise { + if (instrumentPromise !== undefined) { + return instrumentPromise; + } + + instrumentPromise = (async () => { + try { + const { LangChainInstrumentation } = await import('@traceloop/instrumentation-langchain'); + const callbackManagerModule = await import('@langchain/core/callbacks/manager'); + const instrumentation = new LangChainInstrumentation(); + instrumentation.manuallyInstrument({ callbackManagerModule }); + logger?.info('LangChain ESM module instrumented for OpenTelemetry tracing.'); + } catch { + logger?.debug( + 'OpenTelemetry instrumentation not available for LangChain provider. ' + + 'Install @traceloop/instrumentation-langchain to enable automatic tracing.', + ); + } + })(); + + return instrumentPromise; + } +} diff --git a/packages/ai-providers/server-ai-langchain/src/index.ts b/packages/ai-providers/server-ai-langchain/src/index.ts index 63c20c4154..84e382fe2b 100644 --- a/packages/ai-providers/server-ai-langchain/src/index.ts +++ b/packages/ai-providers/server-ai-langchain/src/index.ts @@ -7,4 +7,18 @@ * @packageDocumentation */ -export * from './LangChainProvider'; +export { LangChainModelRunner } from './LangChainModelRunner'; +export { LangChainAgentRunner, ToolRegistry } from './LangChainAgentRunner'; +export type { CompiledAgent } from './LangChainAgentRunner'; +export { LangChainRunnerFactory } from './LangChainRunnerFactory'; +export { + buildStructuredTools, + convertMessagesToLangChain, + createLangChainModel, + extractLastMessageContent, + extractToolCalls, + getAIMetricsFromResponse, + getAIUsageFromResponse, + mapProviderName, + sumTokenUsageFromMessages, +} from './LangChainHelper'; From ef1d648fe1ed3d010fb90ba032eb2882b0e46dff Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 6 May 2026 18:00:56 -0500 Subject: [PATCH 13/19] chore: fix lint errors in OpenAI and LangChain provider packages Co-Authored-By: Claude Sonnet 4.6 --- .../ai-providers/server-ai-langchain/src/LangChainHelper.ts | 4 ++-- .../server-ai-langchain/src/LangChainRunnerFactory.ts | 1 + .../server-ai-openai/__tests__/OpenAIAgentRunner.test.ts | 2 -- .../server-ai-openai/__tests__/OpenAIHelper.test.ts | 4 ++-- packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts | 6 +++--- .../server-ai-openai/src/OpenAIRunnerFactory.ts | 5 +++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts b/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts index 6072bc8686..29ee11f365 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts @@ -88,7 +88,7 @@ export function getAIMetricsFromResponse(response: AIMessage): LDAIMetrics { * Extract JSON Schema from an LD tool definition's parameters. * Falls back to an open object schema when no parameters are defined. */ -function _getInputSchema(toolDef: Record): Record { +function getInputSchema(toolDef: Record): Record { const params = toolDef.function?.parameters ?? toolDef.parameters; if (params && typeof params === 'object' && params.properties) { return params; @@ -150,7 +150,7 @@ export function buildStructuredTools( { name, description, - schema: _getInputSchema(td) as any, + schema: getInputSchema(td) as any, }, ), ); diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts b/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts index ef84252f09..482447c900 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts @@ -20,6 +20,7 @@ let instrumentPromise: Promise | undefined; export class LangChainRunnerFactory extends AIProvider { constructor(logger?: LDLogger) { super(logger); + // eslint-disable-next-line no-underscore-dangle LangChainRunnerFactory._ensureInstrumented(logger).catch(() => {}); } diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts index 4381b55b89..640dc99527 100644 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts @@ -1,5 +1,3 @@ -import type { LDAIAgentConfig } from '@launchdarkly/server-sdk-ai'; - import { OpenAIAgentRunner } from '../src/OpenAIAgentRunner'; const mockRun = jest.fn(); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts index 521b5561fc..abe5ec4195 100644 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts @@ -116,7 +116,7 @@ it('returns true for non-callable objects passed to isAgentToolInstance', () => }); describe('given a shared fakeTool mock', () => { - const fakeTool = jest.fn((opts: any) => ({ ...opts, _wrapped: true })); + const fakeTool = jest.fn((opts: any) => ({ ...opts, wrapped: true })); it('passes through non-callable values without wrapping', () => { const hostedTool = { name: 'web_search', type: 'hosted' }; @@ -141,7 +141,7 @@ describe('given a shared fakeTool mock', () => { strict: false, }), ); - expect(wrapped._wrapped).toBe(true); + expect(wrapped.wrapped).toBe(true); }); it('serializes non-string tool results to JSON', async () => { diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts b/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts index e9ee4365bc..179c8fc091 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts @@ -49,17 +49,17 @@ export function getAIMetricsFromResponse(response: any): LDAIMetrics { /** * Convert a snake_case string to camelCase. */ -function _snakeToCamel(key: string): string { +function snakeToCamel(key: string): string { return key.replace(/_([a-z])/g, (_, c) => c.toUpperCase()); } /** * Convert all snake_case keys in a record to camelCase. */ -export function _mapParameterKeys(parameters: Record): Record { +export function mapParameterKeys(parameters: Record): Record { const result: Record = {}; for (const [key, value] of Object.entries(parameters)) { - result[_snakeToCamel(key)] = value; + result[snakeToCamel(key)] = value; } return result; } diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts b/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts index 1f3f8f86c7..97eebf64bb 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts @@ -4,7 +4,7 @@ import { AIProvider } from '@launchdarkly/server-sdk-ai'; import type { LDAIAgentConfig, LDAICompletionConfig, LDLogger } from '@launchdarkly/server-sdk-ai'; import { OpenAIAgentRunner, ToolRegistry } from './OpenAIAgentRunner'; -import { _mapParameterKeys, buildAgentTools } from './OpenAIHelper'; +import { mapParameterKeys, buildAgentTools } from './OpenAIHelper'; import { OpenAIModelRunner } from './OpenAIModelRunner'; let instrumentPromise: Promise | undefined; @@ -22,6 +22,7 @@ export class OpenAIRunnerFactory extends AIProvider { super(logger); this._client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); // Fire-and-forget: OTel instrumentation is optional and must not block construction. + // eslint-disable-next-line no-underscore-dangle OpenAIRunnerFactory._ensureInstrumented(logger).catch(() => {}); } @@ -63,7 +64,7 @@ export class OpenAIRunnerFactory extends AIProvider { const registry = tools ?? {}; const configTools = config.tools ?? {}; - const parameters = _mapParameterKeys({ ...(config.model?.parameters ?? {}) }); + const parameters = mapParameterKeys({ ...(config.model?.parameters ?? {}) }); delete parameters.tools; const { agentTools, toolNameMap } = buildAgentTools(toolHelper, configTools, registry, this.logger); From bd995e745108316c7d955b0d7c89d404304587d2 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Wed, 6 May 2026 18:09:41 -0500 Subject: [PATCH 14/19] chore: fix test type errors after createTracker became required on config types Co-Authored-By: Claude Sonnet 4.6 --- .../server-ai-langchain/__tests__/LangChainHelper.test.ts | 2 ++ .../__tests__/LangChainModelRunner.test.ts | 1 + .../__tests__/LangChainRunnerFactory.test.ts | 4 ++++ .../server-ai-vercel/__tests__/VercelModelRunner.test.ts | 1 + .../server-ai-vercel/__tests__/VercelRunnerFactory.test.ts | 2 ++ packages/sdk/server-ai/examples/chat-judge/package.json | 4 ++-- packages/sdk/server-ai/examples/direct-judge/package.json | 4 ++-- 7 files changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts index add5307a03..9b9276702e 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts @@ -33,6 +33,7 @@ describe('createLangChainModel', () => { enabled: true, provider: { name: 'openai' }, model: { name: 'gpt-4o', parameters: { temperature: 0.5 } }, + createTracker: jest.fn(), }); expect(mockInitChatModel).toHaveBeenCalledWith('gpt-4o', { @@ -47,6 +48,7 @@ describe('createLangChainModel', () => { enabled: true, provider: { name: 'gemini' }, model: { name: 'gemini-2.0' }, + createTracker: jest.fn(), }); expect(mockInitChatModel).toHaveBeenCalledWith('gemini-2.0', { diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts index ea1429380b..63888b084a 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts @@ -15,6 +15,7 @@ const baseConfig: LDAICompletionConfig = { key: 'completion', enabled: true, model: { name: 'fake' }, + createTracker: jest.fn(), }; describe('LangChainModelRunner', () => { diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts index 8670b9a291..6184dc7fa0 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainRunnerFactory.test.ts @@ -37,6 +37,7 @@ describe('LangChainRunnerFactory', () => { enabled: true, provider: { name: 'openai' }, model: { name: 'gpt-4o', parameters: { temperature: 0.5 } }, + createTracker: jest.fn(), }; const runner = await factory.createModel(config); @@ -54,6 +55,7 @@ describe('LangChainRunnerFactory', () => { enabled: true, provider: { name: 'gemini' }, model: { name: 'gemini-2.0' }, + createTracker: jest.fn(), }); expect(mockInitChatModel).toHaveBeenCalledWith('gemini-2.0', { @@ -69,6 +71,7 @@ describe('LangChainRunnerFactory', () => { provider: { name: 'openai' }, model: { name: 'gpt-4o', parameters: { temperature: 0.7, tools } }, instructions: 'be helpful', + createTracker: jest.fn(), }; const runner = await factory.createAgent(config, { lookup: () => 'ok' }); @@ -94,6 +97,7 @@ describe('LangChainRunnerFactory', () => { provider: { name: 'openai' }, model: { name: 'gpt-4o' }, instructions: '', + createTracker: jest.fn(), }; await factory.createAgent(config); diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts index 4b4bed8846..a340dc5407 100644 --- a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts @@ -21,6 +21,7 @@ const baseConfig: LDAICompletionConfig = { key: 'completion', enabled: true, model: { name: 'mock' }, + createTracker: jest.fn(), }; describe('VercelModelRunner', () => { diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts index 061d4cd455..b34dd81922 100644 --- a/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelRunnerFactory.test.ts @@ -15,6 +15,7 @@ describe('VercelRunnerFactory', () => { enabled: true, provider: { name: 'openai' }, model: { name: 'gpt-4o', parameters: { max_tokens: 100, temperature: 0.7 } }, + createTracker: jest.fn(), }); expect(runner).toBeInstanceOf(VercelModelRunner); @@ -62,6 +63,7 @@ describe('VercelRunnerFactory', () => { enabled: true, provider: { name: 'unsupported' }, model: { name: 'm' }, + createTracker: jest.fn(), }), ).rejects.toThrow('Unsupported Vercel AI provider: unsupported'); }); diff --git a/packages/sdk/server-ai/examples/chat-judge/package.json b/packages/sdk/server-ai/examples/chat-judge/package.json index 6700587200..636c2f8dd2 100644 --- a/packages/sdk/server-ai/examples/chat-judge/package.json +++ b/packages/sdk/server-ai/examples/chat-judge/package.json @@ -14,10 +14,10 @@ "@launchdarkly/node-server-sdk": "9.10.14", "@launchdarkly/server-sdk-ai": "0.19.1", "@launchdarkly/server-sdk-ai-langchain": "0.6.3", - "langchain": "^1.0.0", "@launchdarkly/server-sdk-ai-openai": "0.5.10", "@launchdarkly/server-sdk-ai-vercel": "0.5.10", - "dotenv": "^16.0.0" + "dotenv": "^16.0.0", + "langchain": "^1.0.0" }, "devDependencies": { "@tsconfig/node20": "20.1.4", diff --git a/packages/sdk/server-ai/examples/direct-judge/package.json b/packages/sdk/server-ai/examples/direct-judge/package.json index f0f9bb7eeb..bbb3e2436a 100644 --- a/packages/sdk/server-ai/examples/direct-judge/package.json +++ b/packages/sdk/server-ai/examples/direct-judge/package.json @@ -14,10 +14,10 @@ "@launchdarkly/node-server-sdk": "9.10.14", "@launchdarkly/server-sdk-ai": "0.19.1", "@launchdarkly/server-sdk-ai-langchain": "0.6.3", - "langchain": "^1.0.0", "@launchdarkly/server-sdk-ai-openai": "0.5.10", "@launchdarkly/server-sdk-ai-vercel": "0.5.10", - "dotenv": "^16.0.0" + "dotenv": "^16.0.0", + "langchain": "^1.0.0" }, "devDependencies": { "@tsconfig/node20": "20.1.4", From 70e4eb9376848116cb97599bda1457ae1f99e1bf Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 7 May 2026 09:01:23 -0500 Subject: [PATCH 15/19] fix: add zod devDependency to Vercel provider (peer dep of ai v5) Co-Authored-By: Claude Sonnet 4.6 --- packages/ai-providers/server-ai-vercel/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/ai-providers/server-ai-vercel/package.json b/packages/ai-providers/server-ai-vercel/package.json index 0477a1eff6..172889668b 100644 --- a/packages/ai-providers/server-ai-vercel/package.json +++ b/packages/ai-providers/server-ai-vercel/package.json @@ -53,6 +53,7 @@ "@typescript-eslint/parser": "^6.20.0", "ai": "^5.0.0", "eslint": "^8.45.0", + "zod": "^3.25.76", "eslint-plugin-import": "^2.27.5", "eslint-plugin-jest": "^27.6.3", "jest": "^29.6.1", From c216575b9b9b7b4f48ee49f20e26bae3b7839d2d Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Thu, 7 May 2026 10:05:06 -0500 Subject: [PATCH 16/19] fix!: Use LDAIGraphMetricSummary for graph metric summary (#1362) --- .../sdk/server-ai/src/LDGraphTrackerImpl.ts | 4 +- .../server-ai/src/api/graph/LDGraphTracker.ts | 8 ++- .../src/api/graph/ManagedAgentGraph.ts | 8 +-- packages/sdk/server-ai/src/api/graph/types.ts | 66 ++++++------------- 4 files changed, 32 insertions(+), 54 deletions(-) diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts index dc33b4d839..9ceb56392f 100644 --- a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts @@ -12,7 +12,7 @@ import type { LDClientMin } from './LDClientMin'; * {@link LDGraphTrackerImpl.fromResumptionToken}. */ export class LDGraphTrackerImpl implements LDGraphTracker { - private _summary: LDAIGraphMetricSummary = {}; + private _summary: Partial = {}; constructor( private readonly _ldClient: LDClientMin, @@ -67,7 +67,7 @@ export class LDGraphTrackerImpl implements LDGraphTracker { }; } - getSummary(): LDAIGraphMetricSummary { + getSummary(): Partial { return { ...this._summary }; } diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts index b04c29f5df..7040dce585 100644 --- a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts +++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts @@ -33,9 +33,13 @@ export interface LDGraphTracker { }; /** - * Returns a snapshot of all graph-level metrics tracked so far. + * Returns a snapshot of all graph-level metrics tracked so far. Fields + * populate incrementally as `track*` methods are called, so the result is + * a `Partial`. Once the graph invocation has + * completed via `ManagedAgentGraph.run()`, prefer `ManagedGraphResult.metrics` + * which is fully populated. */ - getSummary(): LDAIGraphMetricSummary; + getSummary(): Partial; /** * A URL-safe Base64-encoded (RFC 4648, no padding) token encoding the tracker's diff --git a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts index d55bb26d50..d7da031d69 100644 --- a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts +++ b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts @@ -5,7 +5,7 @@ import { LDAIMetricSummary } from '../model/types'; import { LDJudgeResult } from '../judge/types'; import { AgentGraphDefinition } from './AgentGraphDefinition'; import { LDGraphTracker } from './LDGraphTracker'; -import { AgentGraphRunnerResult, GraphMetricSummary, ManagedGraphResult } from './types'; +import { AgentGraphRunnerResult, LDAIGraphMetricSummary, ManagedGraphResult } from './types'; /** * ManagedAgentGraph wraps an AgentGraphDefinition and provides a managed run() @@ -13,7 +13,7 @@ import { AgentGraphRunnerResult, GraphMetricSummary, ManagedGraphResult } from ' * * The runner function is responsible for executing the graph and returning * an AgentGraphRunnerResult. ManagedAgentGraph builds the managed result from - * the runner result, including GraphMetricSummary with the graphTracker's + * the runner result, including LDAIGraphMetricSummary with the graphTracker's * resumptionToken. */ export class ManagedAgentGraph { @@ -31,7 +31,7 @@ export class ManagedAgentGraph { * run() returns before ManagedGraphResult.evaluations resolves. * * @param runner Async function that executes the graph and returns AgentGraphRunnerResult. - * @returns ManagedGraphResult with GraphMetricSummary and evaluations promise. + * @returns ManagedGraphResult with LDAIGraphMetricSummary and evaluations promise. */ async run( runner: ( @@ -43,7 +43,7 @@ export class ManagedAgentGraph { const runnerResult = await runner(this._graphDefinition, graphTracker); - const metrics: GraphMetricSummary = { + const metrics: LDAIGraphMetricSummary = { success: runnerResult.metrics.success, path: runnerResult.metrics.path, durationMs: runnerResult.metrics.durationMs, diff --git a/packages/sdk/server-ai/src/api/graph/types.ts b/packages/sdk/server-ai/src/api/graph/types.ts index 902458d8d2..dd023ed24d 100644 --- a/packages/sdk/server-ai/src/api/graph/types.ts +++ b/packages/sdk/server-ai/src/api/graph/types.ts @@ -40,28 +40,38 @@ export interface LDAgentGraphFlagValue { } /** - * Accumulated graph-level metrics collected by an LDGraphTracker. + * Summarized graph-level metrics for a completed graph invocation, as + * returned by {@link ManagedAgentGraph.run} via {@link ManagedGraphResult.metrics}. + * + * For the tracker-layer incremental view (where fields populate as tracking + * calls arrive), see {@link LDGraphTracker.getSummary}, which returns a + * `Partial`. */ export interface LDAIGraphMetricSummary { /** - * Whether the graph invocation succeeded. Absent if not yet tracked. + * Whether the graph invocation succeeded. */ - success?: boolean; + success: boolean; /** - * Total graph execution duration in milliseconds. Absent if not yet tracked. + * Execution path through the graph as an ordered array of config keys. */ - durationMs?: number; + path: string[]; + + /** + * Per-node metric summaries keyed by agent config key. + */ + nodeMetrics: Record; /** - * Aggregate token usage across the entire graph invocation. Absent if not yet tracked. + * Total graph execution duration in milliseconds, if tracked. */ - tokens?: LDTokenUsage; + durationMs?: number; /** - * Execution path through the graph as an array of config keys. Absent if not yet tracked. + * Aggregate token usage across the entire graph invocation, if available. */ - path?: string[]; + tokens?: LDTokenUsage; /** * Resumption token for deferred feedback association. @@ -125,42 +135,6 @@ export interface AgentGraphRunnerResult { // Managed-Layer Graph Types // ============================================================================ -/** - * Graph metric summary returned in ManagedGraphResult. - * Includes per-node metrics and a resumption token. - */ -export interface GraphMetricSummary { - /** - * Whether the graph invocation succeeded. - */ - success: boolean; - - /** - * Execution path through the graph as an ordered array of config keys. - */ - path: string[]; - - /** - * Total graph execution duration in milliseconds, if tracked. - */ - durationMs?: number; - - /** - * Aggregate token usage across the entire graph invocation, if available. - */ - tokens?: LDTokenUsage; - - /** - * Per-node metric summaries keyed by agent config key. - */ - nodeMetrics: Record; - - /** - * Resumption token for deferred feedback association. - */ - resumptionToken?: string; -} - /** * The result returned by a managed graph invocation (ManagedAgentGraph.run()). */ @@ -173,7 +147,7 @@ export interface ManagedGraphResult { /** * Summarized metrics for this graph invocation. */ - metrics: GraphMetricSummary; + metrics: LDAIGraphMetricSummary; /** * The raw response object from the provider, if available. From 12a90c96d8f7335f88b77ed5d5fcda0e2623ff25 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Thu, 7 May 2026 11:05:47 -0500 Subject: [PATCH 17/19] fix!: Build judge input as string and strip legacy judge config messages (#1364) --- .../__tests__/LangChainModelRunner.test.ts | 21 +-- .../src/LangChainModelRunner.ts | 19 +- .../__tests__/OpenAIModelRunner.test.ts | 21 --- .../server-ai-openai/src/OpenAIAgentRunner.ts | 2 +- .../server-ai-openai/src/OpenAIModelRunner.ts | 18 +- .../__tests__/VercelModelRunner.test.ts | 24 --- .../server-ai-vercel/src/VercelModelRunner.ts | 19 +- .../sdk/server-ai/__tests__/Judge.test.ts | 166 ++++++++++++------ .../__tests__/LDAIClientImpl.test.ts | 81 +++++++-- packages/sdk/server-ai/src/LDAIClientImpl.ts | 61 ++++--- packages/sdk/server-ai/src/api/judge/Judge.ts | 58 +++--- .../sdk/server-ai/src/api/providers/Runner.ts | 8 +- 12 files changed, 280 insertions(+), 218 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts index 63888b084a..e68af0d718 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts @@ -1,6 +1,6 @@ import { AIMessage } from '@langchain/core/messages'; -import type { LDAICompletionConfig, LDMessage } from '@launchdarkly/server-sdk-ai'; +import type { LDAICompletionConfig } from '@launchdarkly/server-sdk-ai'; import { LangChainModelRunner } from '../src/LangChainModelRunner'; @@ -63,25 +63,6 @@ describe('LangChainModelRunner', () => { expect(passed[1].content).toBe('hi'); }); - it('uses a LDMessage[] as-is without prepending config messages', async () => { - const response = new AIMessage('direct reply'); - mockLLM.invoke.mockResolvedValue(response); - - const configWithMessages: LDAICompletionConfig = { - ...baseConfig, - messages: [{ role: 'system', content: 'You are X' }], - }; - const r = new LangChainModelRunner(mockLLM, configWithMessages, mockLogger); - const inputMessages: LDMessage[] = [ - { role: 'user', content: 'direct question' }, - ]; - await r.run(inputMessages); - - const passed = mockLLM.invoke.mock.calls[0][0]; - expect(passed).toHaveLength(1); - expect(passed[0].content).toBe('direct question'); - }); - it('marks success=false and warns when content is non-string (multimodal)', async () => { mockLLM.invoke.mockResolvedValue(new AIMessage([{ type: 'image' }] as any)); diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts index ec4237269f..37cb657387 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts @@ -29,19 +29,20 @@ export class LangChainModelRunner implements Runner { } /** - * Run the LangChain model with the given prompt. + * Run the LangChain model with the given user prompt. * - * @param input The user prompt string or a pre-built message array to send to the model. - * When a string is provided, config messages are prepended before the user prompt. - * When an {@link LDMessage} array is provided, it is used as-is (config messages are - * not prepended). + * Prepends any messages defined in the AI config (system prompt, etc.) before + * the user prompt. + * + * @param input The user prompt string. * @param outputType Optional JSON schema for structured output. When provided, * the parsed result is exposed via {@link RunnerResult.parsed}. */ - async run(input: string | LDMessage[], outputType?: Record): Promise { - const messages: LDMessage[] = Array.isArray(input) - ? input - : [...(this._config.messages ?? []), { role: 'user', content: input }]; + async run(input: string, outputType?: Record): Promise { + const messages: LDMessage[] = [ + ...(this._config.messages ?? []), + { role: 'user', content: input }, + ]; if (outputType !== undefined) { return this._runStructured(messages, outputType); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts index 5e6cfaba30..613578e8f5 100644 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts @@ -75,27 +75,6 @@ describe('OpenAIModelRunner', () => { }); }); - it('passes a LDMessage[] input directly without prepending config messages', async () => { - const mockResponse = { - choices: [{ message: { content: 'Evaluation result' } }], - usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, - }; - (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); - - const messages = [ - { role: 'system' as const, content: 'You are a judge' }, - { role: 'user' as const, content: 'Rate this: hello' }, - ]; - const result = await runner.run(messages); - - expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({ - model: 'gpt-3.5-turbo', - messages, - }); - expect(result.content).toBe('Evaluation result'); - expect(result.metrics.success).toBe(true); - }); - it('marks the result unsuccessful when response has no content', async () => { const mockResponse = { choices: [{ message: {} }] }; (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any); diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts b/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts index dbe13ac29b..2511149ec4 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts @@ -48,7 +48,7 @@ export class OpenAIAgentRunner implements Runner { async run(input: string, _outputType?: Record): Promise { try { - const result = await this._agentRun(this._agent, String(input), { maxTurns: MAX_TURNS }); + const result = await this._agentRun(this._agent, input, { maxTurns: MAX_TURNS }); const toolCalls = getToolCallsFromRunItems(result.newItems ?? []).reduce( (acc: string[], fnName: string) => { diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts b/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts index 196ce257be..7f8b874e3b 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts @@ -32,20 +32,20 @@ export class OpenAIModelRunner implements Runner { } /** - * Run the OpenAI model with the given prompt or message array. + * Run the OpenAI model with the given user prompt. * - * When `input` is a string it is wrapped as a user turn and appended to any - * messages defined in the config. When `input` is already a `LDMessage[]` - * (e.g. when called from the Judge evaluation path) it is used as-is. + * Prepends any messages defined in the AI config (system prompt, + * instructions, etc.) before the user prompt. * - * @param input The user prompt string, or a pre-built message array. + * @param input The user prompt string. * @param outputType Optional JSON schema for structured output. When provided, * the response is parsed and exposed via {@link RunnerResult.parsed}. */ - async run(input: string | LDMessage[], outputType?: Record): Promise { - const messages: LDMessage[] = Array.isArray(input) - ? input - : [...(this._config.messages ?? []), { role: 'user', content: input }]; + async run(input: string, outputType?: Record): Promise { + const messages: LDMessage[] = [ + ...(this._config.messages ?? []), + { role: 'user', content: input }, + ]; if (outputType !== undefined) { return this._runStructured(messages, outputType); diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts index a340dc5407..49313865f1 100644 --- a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts @@ -90,30 +90,6 @@ describe('VercelModelRunner', () => { expect(out.metrics.usage).toEqual({ total: 100, input: 40, output: 60 }); }); - it('uses a LDMessage[] directly without prepending config messages', async () => { - (generateText as jest.Mock).mockResolvedValue({ - text: 'direct', - usage: { totalTokens: 5, promptTokens: 2, completionTokens: 3 }, - }); - - const configWithMessages: LDAICompletionConfig = { - ...baseConfig, - messages: [{ role: 'system', content: 'Should not appear' }], - }; - const r = new VercelModelRunner(fakeModel as any, configWithMessages, {}, mockLogger); - const prebuilt = [ - { role: 'system' as const, content: 'Custom system' }, - { role: 'user' as const, content: 'Direct input' }, - ]; - await r.run(prebuilt); - - expect(generateText).toHaveBeenCalledWith({ - model: fakeModel, - messages: prebuilt, - experimental_telemetry: { isEnabled: true }, - }); - }); - it('returns success=false when generateText throws', async () => { const err = new Error('boom'); (generateText as jest.Mock).mockRejectedValue(err); diff --git a/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts b/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts index 61293cf0c9..54e7db3472 100644 --- a/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts +++ b/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts @@ -36,19 +36,20 @@ export class VercelModelRunner implements Runner { } /** - * Run the Vercel AI model with the given prompt. + * Run the Vercel AI model with the given user prompt. * - * @param input The user prompt string, or a pre-built message array. When a - * string is supplied the config's system messages are prepended automatically. - * When a `LDMessage[]` is supplied it is used as-is (config messages are NOT - * prepended — the caller is responsible for the full message list). + * Prepends any messages defined in the AI config (system prompt, etc.) before + * the user prompt. + * + * @param input The user prompt string. * @param outputType Optional JSON schema for structured output. When provided, * the parsed object is exposed via {@link RunnerResult.parsed}. */ - async run(input: string | LDMessage[], outputType?: Record): Promise { - const messages: LDMessage[] = Array.isArray(input) - ? input - : [...(this._config.messages ?? []), { role: 'user', content: input }]; + async run(input: string, outputType?: Record): Promise { + const messages: LDMessage[] = [ + ...(this._config.messages ?? []), + { role: 'user', content: input }, + ]; if (outputType !== undefined) { return this._runStructured(messages, outputType); diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index bd49305f4d..a242d371ed 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -2,10 +2,74 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; import { LDAIJudgeConfig, LDMessage } from '../src/api/config/types'; -import { Judge } from '../src/api/judge/Judge'; +import { Judge, stripLegacyJudgeMessages } from '../src/api/judge/Judge'; import { RunnerResult } from '../src/api/model/types'; import { Runner } from '../src/api/providers/Runner'; +describe('stripLegacyJudgeMessages', () => { + it('strips assistant messages containing {{message_history}}', () => { + const messages: LDMessage[] = [ + { role: 'system', content: 'You are a judge.' }, + { role: 'assistant', content: 'Here is the history: {{message_history}}' }, + ]; + const result = stripLegacyJudgeMessages(messages); + expect(result).toHaveLength(1); + expect(result[0].role).toBe('system'); + }); + + it('strips user messages containing {{response_to_evaluate}}', () => { + const messages: LDMessage[] = [ + { role: 'system', content: 'You are a judge.' }, + { role: 'user', content: 'Evaluate: {{response_to_evaluate}}' }, + ]; + const result = stripLegacyJudgeMessages(messages); + expect(result).toHaveLength(1); + expect(result[0].role).toBe('system'); + }); + + it('strips all legacy template messages from a typical legacy config', () => { + const messages: LDMessage[] = [ + { role: 'system', content: 'You are a judge.' }, + { role: 'assistant', content: '{{message_history}}' }, + { role: 'user', content: '{{response_to_evaluate}}' }, + ]; + const result = stripLegacyJudgeMessages(messages); + expect(result).toHaveLength(1); + expect(result[0].role).toBe('system'); + }); + + it('does not strip system messages even when they contain template variables', () => { + const messages: LDMessage[] = [ + { + role: 'system', + content: 'Judge using {{message_history}} and {{response_to_evaluate}}.', + }, + ]; + const result = stripLegacyJudgeMessages(messages); + expect(result).toHaveLength(1); + expect(result[0].role).toBe('system'); + }); + + it('leaves non-system messages without template variables untouched', () => { + const messages: LDMessage[] = [ + { role: 'system', content: 'You are a judge.' }, + { role: 'user', content: 'This is a regular message.' }, + ]; + const result = stripLegacyJudgeMessages(messages); + expect(result).toHaveLength(2); + }); + + it('returns an empty array for an empty input', () => { + expect(stripLegacyJudgeMessages([])).toEqual([]); + }); + + it('passes a new-style system-only config through unchanged', () => { + const messages: LDMessage[] = [{ role: 'system', content: 'You are a judge.' }]; + const result = stripLegacyJudgeMessages(messages); + expect(result).toEqual(messages); + }); +}); + describe('Judge', () => { let mockRunner: jest.Mocked; let mockTracker: jest.Mocked; @@ -37,14 +101,7 @@ describe('Judge', () => { judgeConfig = { key: 'test-judge', enabled: true, - messages: [ - { role: 'system', content: 'You are a helpful judge that evaluates AI responses.' }, - { - role: 'user', - content: - 'Evaluate and report scores for important metrics: Input: {{message_history}}, Output: {{response_to_evaluate}}', - }, - ], + messages: [{ role: 'system', content: 'You are a helpful judge that evaluates AI responses.' }], model: { name: 'gpt-4' }, provider: { name: 'openai' }, createTracker: () => mockTracker, @@ -161,21 +218,33 @@ describe('Judge', () => { }); expect(mockRunner.run).toHaveBeenCalledWith( - expect.arrayContaining([ - expect.objectContaining({ - role: 'system', - content: 'You are a helpful judge that evaluates AI responses.', - }), - expect.objectContaining({ - role: 'user', - content: - 'Evaluate and report scores for important metrics: Input: What is the capital of France?, Output: Paris is the capital of France.', - }), - ]), + 'MESSAGE HISTORY:\nWhat is the capital of France?\n\nRESPONSE TO EVALUATE:\nParis is the capital of France.', expect.any(Object), // evaluation schema ); }); + it('passes a string input to the runner (not a message list)', async () => { + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { + score: 0.85, + reasoning: 'Good answer.', + }, + metrics: { success: true }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockRunner.run.mockResolvedValue(mockRunnerResult); + + await judge.evaluate('What is AI?', 'AI is artificial intelligence.'); + + expect(mockRunner.run).toHaveBeenCalledTimes(1); + const inputArg = mockRunner.run.mock.calls[0][0]; + expect(typeof inputArg).toBe('string'); + expect(inputArg).toContain('MESSAGE HISTORY:\nWhat is AI?'); + expect(inputArg).toContain('RESPONSE TO EVALUATE:\nAI is artificial intelligence.'); + }); + it('returns evaluation result with correct evaluationMetricKey for tracker integration', async () => { const mockRunnerResult: RunnerResult = { content: '', @@ -412,25 +481,29 @@ describe('Judge', () => { }); }); - it('returns error result when messages are missing', async () => { + it('proceeds (does not error early) when messages is undefined', async () => { const configWithoutMessages: LDAIJudgeConfig = { ...judgeConfig, messages: undefined, }; const judgeWithoutMessages = new Judge(configWithoutMessages, mockRunner, 1.0, mockLogger); + const mockRunnerResult: RunnerResult = { + content: '', + parsed: { + score: 0.7, + reasoning: 'Acceptable response.', + }, + metrics: { success: true }, + }; + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockRunner.run.mockResolvedValue(mockRunnerResult); + const result = await judgeWithoutMessages.evaluate('test input', 'test output'); - expect(result).toEqual({ - success: false, - sampled: true, - errorMessage: 'Judge configuration must include messages', - judgeConfigKey: 'test-judge', - }); - expect(mockLogger.warn).toHaveBeenCalledWith( - 'Judge configuration must include messages', - mockTrackData, - ); + expect(result.sampled).toBe(true); + expect(result.success).toBe(true); + expect(mockRunner.run).toHaveBeenCalledTimes(1); }); it('returns result with success false when parsed is undefined or has no score/reasoning', async () => { @@ -588,17 +661,7 @@ describe('Judge', () => { }); expect(mockRunner.run).toHaveBeenCalledWith( - expect.arrayContaining([ - expect.objectContaining({ - role: 'system', - content: 'You are a helpful judge that evaluates AI responses.', - }), - expect.objectContaining({ - role: 'user', - content: - 'Evaluate and report scores for important metrics: Input: What is the capital of France?\r\nParis is the capital of France., Output: Paris is the capital of France.', - }), - ]), + 'MESSAGE HISTORY:\nWhat is the capital of France?\r\nParis is the capital of France.\n\nRESPONSE TO EVALUATE:\nParis is the capital of France.', expect.any(Object), // evaluation schema ); }); @@ -626,28 +689,19 @@ describe('Judge', () => { }); }); - describe('_constructEvaluationMessages', () => { + describe('_buildEvaluationInput', () => { let judge: Judge; beforeEach(() => { judge = new Judge(judgeConfig, mockRunner, 1.0, mockLogger); }); - it('constructs evaluation messages correctly', () => { + it('builds the evaluation string in the expected format', () => { // eslint-disable-next-line no-underscore-dangle - const constructMessages = (judge as any)._constructEvaluationMessages.bind(judge); - const messages = constructMessages('test input', 'test output'); + const buildInput = (judge as any)._buildEvaluationInput.bind(judge); + const input = buildInput('hello', 'world'); - expect(messages).toHaveLength(2); - expect(messages[0]).toEqual({ - role: 'system', - content: 'You are a helpful judge that evaluates AI responses.', - }); - expect(messages[1]).toEqual({ - role: 'user', - content: - 'Evaluate and report scores for important metrics: Input: test input, Output: test output', - }); + expect(input).toBe('MESSAGE HISTORY:\nhello\n\nRESPONSE TO EVALUATE:\nworld'); }); }); diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index 0e5ca6de0e..a113514e82 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -12,8 +12,15 @@ import { LDAIClientImpl } from '../src/LDAIClientImpl'; import { LDClientMin } from '../src/LDClientMin'; import { aiSdkLanguage, aiSdkName, aiSdkVersion } from '../src/sdkInfo'; -// Mock Judge and RunnerFactory -jest.mock('../src/api/judge/Judge'); +// Mock Judge and RunnerFactory. Preserve the real `stripLegacyJudgeMessages` +// helper so the real `_judgeConfig` strip path can be exercised by tests. +jest.mock('../src/api/judge/Judge', () => { + const actual = jest.requireActual('../src/api/judge/Judge'); + return { + ...actual, + Judge: jest.fn(), + }; +}); jest.mock('../src/api/providers/RunnerFactory'); const mockLdClient: jest.Mocked = { @@ -184,7 +191,10 @@ describe('config evaluation', () => { const evaluateSpy = jest.spyOn(client as any, '_evaluate'); const result = await client.judgeConfig(key, testContext, defaultValue); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', { + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); // Should use first value from evaluationMetricKeys expect(result.evaluationMetricKey).toBe('relevance'); expect(result.createTracker).toBeDefined(); @@ -217,7 +227,10 @@ describe('config evaluation', () => { const evaluateSpy = jest.spyOn(client as any, '_evaluate'); const result = await client.judgeConfig(key, testContext, defaultValue); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', { + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); expect(result.evaluationMetricKey).toBe('relevance'); expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); @@ -250,7 +263,10 @@ describe('config evaluation', () => { const evaluateSpy = jest.spyOn(client as any, '_evaluate'); const result = await client.judgeConfig(key, testContext, defaultValue); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', { + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); expect(result.evaluationMetricKey).toBe('helpfulness'); expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); @@ -283,7 +299,10 @@ describe('config evaluation', () => { const evaluateSpy = jest.spyOn(client as any, '_evaluate'); const result = await client.judgeConfig(key, testContext, defaultValue); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', { + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); // Empty string should be treated as invalid, so should fall back to first value in evaluationMetricKeys expect(result.evaluationMetricKey).toBe('relevance'); expect(result.createTracker).toBeDefined(); @@ -316,7 +335,10 @@ describe('config evaluation', () => { const evaluateSpy = jest.spyOn(client as any, '_evaluate'); const result = await client.judgeConfig(key, testContext, defaultValue); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', { + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); // Should skip empty and whitespace strings, use first valid value expect(result.evaluationMetricKey).toBe('relevance'); expect(result.createTracker).toBeDefined(); @@ -622,8 +644,44 @@ describe('judgeConfig method', () => { key, 1, ); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', variables); - expect(result).toBe(mockJudgeConfig); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', { + ...variables, + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); + // System messages without legacy template variables pass through unchanged. + expect(result).toMatchObject(mockJudgeConfig); + expect(result.messages).toEqual(mockJudgeConfig.messages); + evaluateSpy.mockRestore(); + }); + + it('strips legacy judge template messages from the returned config', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: false, + }; + + const mockJudgeConfig = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKey: 'relevance', + messages: [ + { role: 'system' as const, content: 'You are a judge.' }, + { role: 'assistant' as const, content: '{{message_history}}' }, + { role: 'user' as const, content: 'Evaluate: {{response_to_evaluate}}' }, + ], + createTracker: () => ({}) as any, + toVercelAISDK: jest.fn(), + }; + + const evaluateSpy = jest.spyOn(client as any, '_evaluate'); + evaluateSpy.mockResolvedValue(mockJudgeConfig); + + const result = await client.judgeConfig(key, testContext, defaultValue); + + expect(result.messages).toEqual([{ role: 'system', content: 'You are a judge.' }]); evaluateSpy.mockRestore(); }); }); @@ -679,10 +737,7 @@ describe('createJudge method', () => { key, 1, ); - expect(judgeConfigSpy).toHaveBeenCalledWith(key, testContext, defaultValue, { - message_history: '{{message_history}}', - response_to_evaluate: '{{response_to_evaluate}}', - }); + expect(judgeConfigSpy).toHaveBeenCalledWith(key, testContext, defaultValue, undefined); expect(RunnerFactory.createModel).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, 1.0, undefined); expect(result).toBe(mockJudge); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index c655db2551..f94b43bc3d 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -24,7 +24,7 @@ import { import { LDAIConfigFlagValue, LDAIConfigUtils } from './api/config/LDAIConfigUtils'; import { AgentGraphDefinition, LDAgentGraphFlagValue, LDGraphTracker } from './api/graph'; import { Evaluator } from './api/judge/Evaluator'; -import { Judge } from './api/judge/Judge'; +import { Judge, stripLegacyJudgeMessages } from './api/judge/Judge'; import { LDAIClient } from './api/LDAIClient'; import { RunnerFactory, SupportedAIProvider } from './api/providers'; import { LDAIConfigTrackerImpl } from './LDAIConfigTrackerImpl'; @@ -239,8 +239,43 @@ export class LDAIClientImpl implements LDAIClient { defaultValue: LDAIJudgeConfigDefault, variables?: Record, ): Promise { - const config = await this._evaluate(key, context, defaultValue, 'judge', variables); - return config as LDAIJudgeConfig; + if (variables?.message_history !== undefined) { + this._logger?.warn( + "The variable 'message_history' is reserved by the judge and will be ignored.", + ); + } + if (variables?.response_to_evaluate !== undefined) { + this._logger?.warn( + "The variable 'response_to_evaluate' is reserved by the judge and will be ignored.", + ); + } + + // Re-inject the reserved variables as their literal placeholders so they + // survive Mustache interpolation in `_evaluate`. Without this, legacy + // templates like `{{message_history}}` get rendered to empty strings and + // `stripLegacyJudgeMessages` below cannot detect them. + const extendedVariables = { + ...variables, + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }; + + const config = (await this._evaluate( + key, + context, + defaultValue, + 'judge', + extendedVariables, + )) as LDAIJudgeConfig; + + // Strip legacy judge template messages (containing {{message_history}} or + // {{response_to_evaluate}}) before returning the config. New-style configs + // omit these and rely on Judge._buildEvaluationInput. + if (config.messages) { + return { ...config, messages: stripLegacyJudgeMessages(config.messages) }; + } + + return config; } async judgeConfig( @@ -381,29 +416,11 @@ export class LDAIClientImpl implements LDAIClient { sampleRate: number = 1.0, ): Promise { try { - if (variables?.message_history !== undefined) { - this._logger?.warn( - "The variable 'message_history' is reserved by the judge and will be ignored.", - ); - } - if (variables?.response_to_evaluate !== undefined) { - this._logger?.warn( - "The variable 'response_to_evaluate' is reserved by the judge and will be ignored.", - ); - } - - // Overwrite reserved variables to ensure they remain as placeholders for judge evaluation - const extendedVariables = { - ...variables, - message_history: '{{message_history}}', - response_to_evaluate: '{{response_to_evaluate}}', - }; - const judgeConfig = await this._judgeConfig( key, context, defaultValue ?? disabledAIConfig, - extendedVariables, + variables, ); if (!judgeConfig.enabled) { diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index 35624cad30..86e34c8088 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -1,5 +1,3 @@ -import Mustache from 'mustache'; - import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { ChatResponse } from '../chat/types'; @@ -26,6 +24,27 @@ const EVALUATION_SCHEMA = { additionalProperties: false, } as const; +/** + * Remove legacy judge template messages from a message list. + * + * Strips any non-system message whose content contains `{{message_history}}` + * or `{{response_to_evaluate}}`. These were used by older judge configs to + * indicate where the SDK should interpolate the evaluated conversation; new + * configs omit them entirely and rely on the string input built by + * `Judge._buildEvaluationInput`. + * + * @param messages The raw message list from the judge AI config. + * @returns A new list with legacy template messages removed. + */ +export function stripLegacyJudgeMessages(messages: LDMessage[]): LDMessage[] { + return messages.filter( + (msg) => + msg.role === 'system' || + (!msg.content.includes('{{message_history}}') && + !msg.content.includes('{{response_to_evaluate}}')), + ); +} + /** * Judge implementation that handles evaluation functionality and conversation management. * @@ -105,13 +124,6 @@ export class Judge { return result; } - if (!this._aiConfig.messages) { - this._logger?.warn('Judge configuration must include messages', tracker.getTrackData()); - result.sampled = true; - result.errorMessage = 'Judge configuration must include messages'; - return result; - } - if (Math.random() > effectiveRate) { this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${effectiveRate}`); return result; @@ -119,11 +131,11 @@ export class Judge { result.sampled = true; - const messages = this._constructEvaluationMessages(input, output); + const evaluationInput = this._buildEvaluationInput(input, output); const response = await tracker.trackMetricsOf( (r: RunnerResult) => r.metrics, - () => this._runner.run(messages, EVALUATION_SCHEMA), + () => this._runner.run(evaluationInput, EVALUATION_SCHEMA), ); const evalResult = this._parseEvaluationResponse(response.parsed); @@ -186,25 +198,13 @@ export class Judge { } /** - * Constructs evaluation messages by combining judge's config messages with input/output. - */ - private _constructEvaluationMessages(input: string, output: string): LDMessage[] { - const messages: LDMessage[] = this._aiConfig.messages!.map((msg) => ({ - ...msg, - content: this._interpolateMessage(msg.content, { - message_history: input, - response_to_evaluate: output, - }), - })); - - return messages; - } - - /** - * Interpolates message content with variables using Mustache templating. + * Builds the evaluation input string passed to the runner. + * + * Combines the original prompt and the response into a single, well-known + * format the judge model is expected to evaluate. */ - private _interpolateMessage(content: string, variables: Record): string { - return Mustache.render(content, variables, undefined, { escape: (item: any) => item }); + private _buildEvaluationInput(input: string, output: string): string { + return `MESSAGE HISTORY:\n${input}\n\nRESPONSE TO EVALUATE:\n${output}`; } /** diff --git a/packages/sdk/server-ai/src/api/providers/Runner.ts b/packages/sdk/server-ai/src/api/providers/Runner.ts index 42b43cd8ce..7a642275e8 100644 --- a/packages/sdk/server-ai/src/api/providers/Runner.ts +++ b/packages/sdk/server-ai/src/api/providers/Runner.ts @@ -1,4 +1,3 @@ -import { LDMessage } from '../config/types'; import { AgentGraphRunnerResult } from '../graph/types'; import { RunnerResult } from '../model/types'; @@ -11,15 +10,14 @@ import { RunnerResult } from '../model/types'; */ export interface Runner { /** - * Invoke the model with the given input. + * Invoke the model with the given input string. * - * @param input The input to the model. For agents this is a string prompt; - * for model completions and judges this is an array of messages. + * @param input The string input to the model. * @param outputType Optional JSON schema for structured output. When provided, * the model should return structured data accessible via `RunnerResult.parsed`. * @returns Promise resolving to a RunnerResult. */ - run(input: string | LDMessage[], outputType?: Record): Promise; + run(input: string, outputType?: Record): Promise; } /** From 3e4bc9632c5ae233d689f0a3d6ec0887ea73ed2e Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Thu, 7 May 2026 11:31:28 -0500 Subject: [PATCH 18/19] feat!: Remove AIProvider deprecated methods and create*/init* aliases (AIC-2388) (#1363) --- .../server-ai-langchain/README.md | 18 +-- .../src/LangChainRunnerFactory.ts | 6 +- .../ai-providers/server-ai-openai/README.md | 18 +-- .../src/OpenAIRunnerFactory.ts | 11 +- .../ai-providers/server-ai-vercel/README.md | 18 +-- .../src/VercelRunnerFactory.ts | 2 +- packages/sdk/server-ai/README.md | 29 ++--- .../__tests__/LDAIClientImpl.test.ts | 2 +- .../server-ai/__tests__/RunnerFactory.test.ts | 49 ++++--- .../examples/openai-observability/README.md | 2 +- packages/sdk/server-ai/src/LDAIClientImpl.ts | 26 ++-- packages/sdk/server-ai/src/api/judge/index.ts | 2 +- packages/sdk/server-ai/src/api/judge/types.ts | 18 --- .../server-ai/src/api/providers/AIProvider.ts | 122 +++--------------- .../src/api/providers/RunnerFactory.ts | 23 ++-- 15 files changed, 120 insertions(+), 226 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/README.md b/packages/ai-providers/server-ai-langchain/README.md index bfc35f7d27..f899682093 100644 --- a/packages/ai-providers/server-ai-langchain/README.md +++ b/packages/ai-providers/server-ai-langchain/README.md @@ -20,7 +20,7 @@ ## Quick Setup -This package provides LangChain integration for the LaunchDarkly AI SDK. The simplest way to use it is with the LaunchDarkly AI SDK's `initChat` method: +This package provides LangChain integration for the LaunchDarkly AI SDK. The simplest way to use it is with the LaunchDarkly AI SDK's `createModel` method: 1. Install the required packages: @@ -30,7 +30,7 @@ npm install @launchdarkly/server-sdk-ai @launchdarkly/server-sdk-ai-langchain -- yarn add @launchdarkly/server-sdk-ai @launchdarkly/server-sdk-ai-langchain ``` -2. Create a chat session and use it: +2. Create a managed model and run it: ```typescript import { init } from '@launchdarkly/node-server-sdk'; @@ -40,17 +40,17 @@ import { initAi } from '@launchdarkly/server-sdk-ai'; const ldClient = init(sdkKey); const aiClient = initAi(ldClient); -// Create a chat session -const defaultConfig = { - enabled: true, +// Create a managed model +const defaultConfig = { + enabled: true, model: { name: 'gpt-4' }, provider: { name: 'openai' } }; -const chat = await aiClient.initChat('my-chat-config', context, defaultConfig); +const model = await aiClient.createModel('my-chat-config', context, defaultConfig); -if (chat) { - const response = await chat.invoke('What is the capital of France?'); - console.log(response.message.content); +if (model) { + const result = await model.run('What is the capital of France?'); + console.log(result.content); } ``` diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts b/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts index 482447c900..f00cfb0e14 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainRunnerFactory.ts @@ -29,7 +29,7 @@ export class LangChainRunnerFactory extends AIProvider { */ async createModel(config: LDAICompletionConfig): Promise { const llm = await createLangChainModel(config); - return new LangChainModelRunner(llm, config, this.logger); + return new LangChainModelRunner(llm, config, this._logger); } /** @@ -51,7 +51,7 @@ export class LangChainRunnerFactory extends AIProvider { }; const llm = await createLangChainModel(configForModel); - const lcTools = buildStructuredTools(toolDefinitions, tools ?? {}, this.logger); + const lcTools = buildStructuredTools(toolDefinitions, tools ?? {}, this._logger); const instructions = config.instructions ?? ''; const agent = createAgent({ @@ -60,7 +60,7 @@ export class LangChainRunnerFactory extends AIProvider { systemPrompt: instructions || undefined, }); - return new LangChainAgentRunner(agent as any, this.logger); + return new LangChainAgentRunner(agent as any, this._logger); } /** diff --git a/packages/ai-providers/server-ai-openai/README.md b/packages/ai-providers/server-ai-openai/README.md index 6cc18807df..dbc01cd941 100644 --- a/packages/ai-providers/server-ai-openai/README.md +++ b/packages/ai-providers/server-ai-openai/README.md @@ -23,7 +23,7 @@ ## Quick Setup -This package provides OpenAI integration for the LaunchDarkly AI SDK. The simplest way to use it is with the LaunchDarkly AI SDK's `initChat` method: +This package provides OpenAI integration for the LaunchDarkly AI SDK. The simplest way to use it is with the LaunchDarkly AI SDK's `createModel` method: 1. Install the required packages: @@ -31,7 +31,7 @@ This package provides OpenAI integration for the LaunchDarkly AI SDK. The simple npm install @launchdarkly/server-sdk-ai @launchdarkly/server-sdk-ai-openai --save ``` -2. Create a chat session and use it: +2. Create a managed model and run it: ```typescript import { init } from '@launchdarkly/node-server-sdk'; @@ -41,17 +41,17 @@ import { initAi } from '@launchdarkly/server-sdk-ai'; const ldClient = init(sdkKey); const aiClient = initAi(ldClient); -// Create a chat session -const defaultConfig = { - enabled: true, +// Create a managed model +const defaultConfig = { + enabled: true, model: { name: 'gpt-4' }, provider: { name: 'openai' } }; -const chat = await aiClient.initChat('my-chat-config', context, defaultConfig); +const model = await aiClient.createModel('my-chat-config', context, defaultConfig); -if (chat) { - const response = await chat.invoke("What is the capital of France?"); - console.log(response.message.content); +if (model) { + const result = await model.run('What is the capital of France?'); + console.log(result.content); } ``` diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts b/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts index 97eebf64bb..9f99f7d798 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIRunnerFactory.ts @@ -30,7 +30,7 @@ export class OpenAIRunnerFactory extends AIProvider { * Create a model runner from a completion AI configuration. */ async createModel(config: LDAICompletionConfig): Promise { - return new OpenAIModelRunner(this._client, config, this.logger); + return new OpenAIModelRunner(this._client, config, this._logger); } /** @@ -67,7 +67,12 @@ export class OpenAIRunnerFactory extends AIProvider { const parameters = mapParameterKeys({ ...(config.model?.parameters ?? {}) }); delete parameters.tools; - const { agentTools, toolNameMap } = buildAgentTools(toolHelper, configTools, registry, this.logger); + const { agentTools, toolNameMap } = buildAgentTools( + toolHelper, + configTools, + registry, + this._logger, + ); const agent = new Agent({ name: 'ldai-agent', instructions: config.instructions || undefined, @@ -76,7 +81,7 @@ export class OpenAIRunnerFactory extends AIProvider { modelSettings: parameters, }); - return new OpenAIAgentRunner(agent, agentRun, toolNameMap, this.logger); + return new OpenAIAgentRunner(agent, agentRun, toolNameMap, this._logger); } /** diff --git a/packages/ai-providers/server-ai-vercel/README.md b/packages/ai-providers/server-ai-vercel/README.md index fb65d1832f..f320194983 100644 --- a/packages/ai-providers/server-ai-vercel/README.md +++ b/packages/ai-providers/server-ai-vercel/README.md @@ -20,7 +20,7 @@ ## Quick Setup -This package provides Vercel AI SDK integration for the LaunchDarkly AI SDK. The simplest way to use it is with the LaunchDarkly AI SDK's `initChat` method: +This package provides Vercel AI SDK integration for the LaunchDarkly AI SDK. The simplest way to use it is with the LaunchDarkly AI SDK's `createModel` method: 1. Install the required packages: @@ -30,7 +30,7 @@ npm install @launchdarkly/server-sdk-ai @launchdarkly/server-sdk-ai-vercel --sav yarn add @launchdarkly/server-sdk-ai @launchdarkly/server-sdk-ai-vercel ``` -2. Create a chat session and use it: +2. Create a managed model and run it: ```typescript import { init } from '@launchdarkly/node-server-sdk'; @@ -40,17 +40,17 @@ import { initAi } from '@launchdarkly/server-sdk-ai'; const ldClient = init(sdkKey); const aiClient = initAi(ldClient); -// Create a chat session -const defaultConfig = { - enabled: true, +// Create a managed model +const defaultConfig = { + enabled: true, model: { name: 'gpt-4' }, provider: { name: 'openai' } }; -const chat = await aiClient.initChat('my-chat-config', context, defaultConfig); +const model = await aiClient.createModel('my-chat-config', context, defaultConfig); -if (chat) { - const response = await chat.invoke('What is the capital of France?'); - console.log(response.message.content); +if (model) { + const result = await model.run('What is the capital of France?'); + console.log(result.content); } ``` diff --git a/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts b/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts index 0510900e3d..9869301314 100644 --- a/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts +++ b/packages/ai-providers/server-ai-vercel/src/VercelRunnerFactory.ts @@ -25,7 +25,7 @@ export class VercelRunnerFactory extends AIProvider { async createModel(config: LDAICompletionConfig): Promise { const model = await VercelRunnerFactory.createVercelModel(config); const parameters = VercelRunnerFactory.mapParameters(config.model?.parameters); - return new VercelModelRunner(model, config, parameters, this.logger); + return new VercelModelRunner(model, config, parameters, this._logger); } /** diff --git a/packages/sdk/server-ai/README.md b/packages/sdk/server-ai/README.md index 96e2faf4b0..081bfbc831 100644 --- a/packages/sdk/server-ai/README.md +++ b/packages/sdk/server-ai/README.md @@ -79,37 +79,34 @@ if (aiConfig.enabled) { } ``` -## TrackedChat for Conversational AI +## ManagedModel for Tracked Model Invocations -`TrackedChat` provides a high-level interface for conversational AI with automatic conversation management and metrics tracking: +`ManagedModel` provides a high-level interface for invoking AI models with automatic metrics tracking and judge evaluation: - Automatically configures models based on AI configuration -- Maintains conversation history across multiple interactions - Automatically tracks token usage, latency, and success rates +- Runs configured judges asynchronously and reports their results - Works with any supported AI provider (see [AI Providers](https://github.com/launchdarkly/js-core#ai-providers) for available packages) -### Using TrackedChat +### Using ManagedModel ```typescript // Use the same defaultConfig from the retrieval section above -const chat = await aiClient.createChat( +const model = await aiClient.createModel( 'customer-support-chat', context, defaultConfig, { customerName: 'John' } ); -if (chat) { - // Simple conversation flow - metrics are automatically tracked by invoke() - const response1 = await chat.invoke('I need help with my order'); - console.log(response1.message.content); - - const response2 = await chat.invoke("What's the status?"); - console.log(response2.message.content); - - // Access conversation history - const messages = chat.getMessages(); - console.log(`Conversation has ${messages.length} messages`); +if (model) { + // Metrics are automatically tracked by run() + const result = await model.run('I need help with my order'); + console.log(result.content); + + // Judge evaluations run asynchronously; await if you need their results + const evals = await result.evaluations; + console.log('Judge results:', evals); } ``` diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index a113514e82..1214e2d463 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -692,7 +692,7 @@ describe('createJudge method', () => { beforeEach(() => { mockProvider = { - invokeStructuredModel: jest.fn(), + run: jest.fn(), }; mockJudge = { diff --git a/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts b/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts index f76ca556e9..b7aa331fa7 100644 --- a/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts +++ b/packages/sdk/server-ai/__tests__/RunnerFactory.test.ts @@ -1,4 +1,7 @@ -import { LDAIConfigKind } from '../src/api/config/types'; +import { + LDAIAgentConfig, + LDAICompletionConfig, +} from '../src/api/config/types'; import { AIProvider, ToolRegistry } from '../src/api/providers/AIProvider'; import { AgentGraphRunner, Runner } from '../src/api/providers/Runner'; import { RunnerFactory, SupportedAIProvider } from '../src/api/providers/RunnerFactory'; @@ -7,14 +10,23 @@ import { RunnerFactory, SupportedAIProvider } from '../src/api/providers/RunnerF // Helpers // --------------------------------------------------------------------------- -const makeConfig = (providerName: string): LDAIConfigKind => +const makeConfig = (providerName: string): LDAICompletionConfig => ({ key: 'test-config', enabled: true, provider: { name: providerName }, createTracker: () => ({}) as any, evaluator: {} as any, - }) as unknown as LDAIConfigKind; + }) as unknown as LDAICompletionConfig; + +const makeAgentConfig = (providerName: string): LDAIAgentConfig => + ({ + key: 'test-agent-config', + enabled: true, + provider: { name: providerName }, + createTracker: () => ({}) as any, + evaluator: {} as any, + }) as unknown as LDAIAgentConfig; const makeRunner = (): Runner => ({ run: jest.fn() }); const makeGraphRunner = (): AgentGraphRunner => ({ run: jest.fn() }); @@ -162,7 +174,7 @@ describe('RunnerFactory.createAgent', () => { jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(mockFactory); - const result = await RunnerFactory.createAgent(makeConfig('openai'), tools); + const result = await RunnerFactory.createAgent(makeAgentConfig('openai'), tools); expect(result).toBe(runner); expect(mockFactory.createAgent).toHaveBeenCalledWith( @@ -177,7 +189,11 @@ describe('RunnerFactory.createAgent', () => { jest.spyOn(RunnerFactory as any, '_getProviderFactory').mockResolvedValue(undefined); - const result = await RunnerFactory.createAgent(makeConfig('openai'), undefined, logger as any); + const result = await RunnerFactory.createAgent( + makeAgentConfig('openai'), + undefined, + logger as any, + ); expect(result).toBeUndefined(); expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('not supported')); @@ -243,7 +259,7 @@ describe('AIProvider default factory methods', () => { it('createAgent returns undefined by default', async () => { const provider = new ConcreteProvider(); - const result = await provider.createAgent(makeConfig('openai')); + const result = await provider.createAgent(makeAgentConfig('openai')); expect(result).toBeUndefined(); }); @@ -252,25 +268,4 @@ describe('AIProvider default factory methods', () => { const result = await provider.createAgentGraph({} as any); expect(result).toBeUndefined(); }); - - it('createModel warns when not overridden', async () => { - const warnSpy = jest.fn(); - const provider = new ConcreteProvider({ warn: warnSpy } as any); - await provider.createModel(makeConfig('openai')); - expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('createModel not implemented')); - }); - - it('createAgent warns when not overridden', async () => { - const warnSpy = jest.fn(); - const provider = new ConcreteProvider({ warn: warnSpy } as any); - await provider.createAgent(makeConfig('openai')); - expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('createAgent not implemented')); - }); - - it('createAgentGraph warns when not overridden', async () => { - const warnSpy = jest.fn(); - const provider = new ConcreteProvider({ warn: warnSpy } as any); - await provider.createAgentGraph({} as any); - expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('createAgentGraph not implemented')); - }); }); diff --git a/packages/sdk/server-ai/examples/openai-observability/README.md b/packages/sdk/server-ai/examples/openai-observability/README.md index 2e43d77f58..a958d45575 100644 --- a/packages/sdk/server-ai/examples/openai-observability/README.md +++ b/packages/sdk/server-ai/examples/openai-observability/README.md @@ -1,6 +1,6 @@ # Provider-Specific Observability Example (OpenAI) -This example shows how to use the LaunchDarkly observability plugin when calling an AI provider directly — without the higher-level `createChat` abstraction. It uses OpenAI as the provider, but the same pattern applies to any provider (Bedrock, Anthropic, Vercel AI SDK, etc.). +This example shows how to use the LaunchDarkly observability plugin when calling an AI provider directly — without the higher-level `createModel` abstraction. It uses OpenAI as the provider, but the same pattern applies to any provider (Bedrock, Anthropic, Vercel AI SDK, etc.). ## How it works diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index f94b43bc3d..6d1d11498d 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -375,19 +375,6 @@ export class LDAIClientImpl implements LDAIClient { return this.agentConfigs(agentConfigs, context); } - /** - * @deprecated Use `createModel` instead. This method will be removed in a future version. - */ - async createChat( - key: string, - context: LDContext, - defaultValue?: LDAICompletionConfigDefault, - variables?: Record, - defaultAiProvider?: SupportedAIProvider, - ): Promise { - return this.createModel(key, context, defaultValue, variables, defaultAiProvider); - } - async createJudge( key: string, context: LDContext, @@ -500,6 +487,19 @@ export class LDAIClientImpl implements LDAIClient { return new ManagedAgent(config, runner, this._logger); } + /** + * @deprecated Use `createModel` instead. This method will be removed in a future version. + */ + async createChat( + key: string, + context: LDContext, + defaultValue?: LDAICompletionConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + return this.createModel(key, context, defaultValue, variables, defaultAiProvider); + } + /** * @deprecated Use `createModel` instead. This method will be removed in a future version. */ diff --git a/packages/sdk/server-ai/src/api/judge/index.ts b/packages/sdk/server-ai/src/api/judge/index.ts index ca86630278..4fa3bbb60a 100644 --- a/packages/sdk/server-ai/src/api/judge/index.ts +++ b/packages/sdk/server-ai/src/api/judge/index.ts @@ -1,2 +1,2 @@ export { Judge } from './Judge'; -export type { LDJudgeResult, StructuredResponse } from './types'; +export type { LDJudgeResult } from './types'; diff --git a/packages/sdk/server-ai/src/api/judge/types.ts b/packages/sdk/server-ai/src/api/judge/types.ts index b9d8a05a46..381eb97d90 100644 --- a/packages/sdk/server-ai/src/api/judge/types.ts +++ b/packages/sdk/server-ai/src/api/judge/types.ts @@ -1,21 +1,3 @@ -import { LDAIMetrics } from '../metrics/LDAIMetrics'; - -/** - * Structured response from AI models. - */ -export interface StructuredResponse { - /** The structured data returned by the model */ - data: Record; - - /** The raw response from the model */ - rawResponse: string; - - /** - * Metrics information including success status and token usage. - */ - metrics: LDAIMetrics; -} - /** * Result from a judge evaluation containing score, reasoning, and metadata. */ diff --git a/packages/sdk/server-ai/src/api/providers/AIProvider.ts b/packages/sdk/server-ai/src/api/providers/AIProvider.ts index 62a1fd55e5..c795f36d35 100644 --- a/packages/sdk/server-ai/src/api/providers/AIProvider.ts +++ b/packages/sdk/server-ai/src/api/providers/AIProvider.ts @@ -1,9 +1,7 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { ChatResponse } from '../chat/types'; -import { LDAIConfigKind, LDMessage } from '../config/types'; +import { LDAIAgentConfig, LDAICompletionConfig, LDAIJudgeConfig } from '../config/types'; import { AgentGraphDefinition } from '../graph/AgentGraphDefinition'; -import { StructuredResponse } from '../judge/types'; import { AgentGraphRunner, Runner } from './Runner'; /** @@ -14,97 +12,36 @@ import { AgentGraphRunner, Runner } from './Runner'; export type ToolRegistry = Record unknown>; /** - * Abstract base class for AI providers that implement chat model functionality. - * This class provides the contract that all provider implementations must follow - * to integrate with LaunchDarkly's tracking and configuration capabilities. + * Abstract base class for AI providers. * - * Following the AICHAT spec recommendation to use base classes with non-abstract methods - * for better extensibility and backwards compatibility. + * An `AIProvider` is a per-provider factory: it is instantiated once per + * provider package and is responsible for constructing focused runtime + * capability objects via {@link createModel}, {@link createAgent}, and + * {@link createAgentGraph}. + * + * Provider packages subclass `AIProvider` and override the methods they + * support. The default implementations return `undefined`, mirroring Python's + * base-class behaviour, so providers only need to implement the modes they + * actually support. */ export abstract class AIProvider { - protected readonly logger?: LDLogger; + // eslint-disable-next-line @typescript-eslint/naming-convention + protected _logger?: LDLogger; constructor(logger?: LDLogger) { - this.logger = logger; - } - /** - * Invoke the chat model with an array of messages. - * - * Default implementation takes no action and returns a placeholder response. - * Provider implementations should override this method. - * - * @deprecated Use the `Runner` interface and its `run` method instead. - * @param messages Array of LDMessage objects representing the conversation - * @returns Promise that resolves to a ChatResponse containing the model's response - */ - async invokeModel(_messages: LDMessage[]): Promise { - this.logger?.warn('invokeModel not implemented by this provider'); - return { - message: { - role: 'assistant', - content: '', - }, - metrics: { - success: false, - usage: { - total: 0, - input: 0, - output: 0, - }, - }, - }; - } - - /** - * Invoke the chat model with structured output support. - * - * Default implementation takes no action and returns a placeholder response. - * Provider implementations should override this method. - * - * @deprecated Use the `Runner` interface and its `run` method with `outputType` instead. - * @param messages Array of LDMessage objects representing the conversation - * @param responseStructure Dictionary of output configurations keyed by output name - * @returns Promise that resolves to a structured response - */ - async invokeStructuredModel( - _messages: LDMessage[], - _responseStructure: Record, - ): Promise { - this.logger?.warn('invokeStructuredModel not implemented by this provider'); - return { - data: {}, - rawResponse: '', - metrics: { - success: false, - usage: { - total: 0, - input: 0, - output: 0, - }, - }, - }; + this._logger = logger; } - - // ============================================================================ - // Factory instance methods (Python AIProvider pattern) - // - // Provider packages override these to return a configured Runner for the - // relevant mode. The default implementations log a warning and return - // undefined, mirroring Python's base-class behaviour. - // ============================================================================ - /** * Create a Runner for a completion or judge AI Config. * * Override in provider subclasses to return a configured {@link Runner}. - * Default implementation logs a warning and returns `undefined`. + * Default implementation returns `undefined`. * * @param config The completion or judge AI configuration. * @returns Promise resolving to a {@link Runner}, or `undefined` if this * provider does not support model creation. */ - async createModel(_config: LDAIConfigKind): Promise { - this.logger?.warn('createModel not implemented by this provider'); + async createModel(_config: LDAICompletionConfig | LDAIJudgeConfig): Promise { return undefined; } @@ -112,15 +49,14 @@ export abstract class AIProvider { * Create a Runner for an agent AI Config. * * Override in provider subclasses to return a configured {@link Runner}. - * Default implementation logs a warning and returns `undefined`. + * Default implementation returns `undefined`. * * @param config The agent AI configuration. * @param tools Optional registry of callable tools. * @returns Promise resolving to a {@link Runner}, or `undefined` if this * provider does not support agent creation. */ - async createAgent(_config: LDAIConfigKind, _tools?: ToolRegistry): Promise { - this.logger?.warn('createAgent not implemented by this provider'); + async createAgent(_config: LDAIAgentConfig, _tools?: ToolRegistry): Promise { return undefined; } @@ -128,7 +64,7 @@ export abstract class AIProvider { * Create an AgentGraphRunner for an agent graph definition. * * Override in provider subclasses to return a configured {@link AgentGraphRunner}. - * Default implementation logs a warning and returns `undefined`. + * Default implementation returns `undefined`. * * @param graphDef The agent graph definition. * @param tools Optional registry of callable tools. @@ -139,26 +75,6 @@ export abstract class AIProvider { _graphDef: AgentGraphDefinition, _tools?: ToolRegistry, ): Promise { - this.logger?.warn('createAgentGraph not implemented by this provider'); return undefined; } - - // ============================================================================ - // Legacy static factory (retained for backward compatibility) - // ============================================================================ - - /** - * Static method that constructs an instance of the provider. - * Each provider implementation must provide their own static create method - * that accepts an AIConfig and returns a configured instance. - * - * @deprecated Use the `createModel` factory method instead. - * @param aiConfig The LaunchDarkly AI configuration - * @param logger Optional logger for the provider - * @returns Promise that resolves to a configured provider instance - */ - // eslint-disable-next-line @typescript-eslint/no-unused-vars - static async create(aiConfig: LDAIConfigKind, logger?: LDLogger): Promise { - throw new Error('Provider implementations must override the static create method'); - } } diff --git a/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts b/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts index d02c79d445..0451118495 100644 --- a/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts +++ b/packages/sdk/server-ai/src/api/providers/RunnerFactory.ts @@ -1,6 +1,10 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { LDAIConfigKind } from '../config/types'; +import { + LDAIAgentConfig, + LDAICompletionConfig, + LDAIJudgeConfig, +} from '../config/types'; import { AgentGraphDefinition } from '../graph/AgentGraphDefinition'; import { AIProvider, ToolRegistry } from './AIProvider'; import { AgentGraphRunner, Runner } from './Runner'; @@ -29,21 +33,16 @@ export type SupportedAIProvider = (typeof SUPPORTED_AI_PROVIDERS)[number]; * via {@link _getProviderFactory}, and delegates creation to the factory * instance methods on {@link AIProvider}. * - * Provider packages implement {@link AIProvider} factory methods - * (`createModel`, `createAgent`, `createAgentGraph`). The legacy - * {@link AIProvider} abstract class is retained for backward compatibility, - * and the {@link _LegacyProviderAdapter} shim wraps packages that have not - * yet migrated to the new pattern. + * Provider packages subclass {@link AIProvider} and override its factory + * methods (`createModel`, `createAgent`, `createAgentGraph`). */ export class RunnerFactory { /** * Load and return the AIProvider factory for the given provider type. * * This is the single place in the codebase that knows provider package names. - * If the provider package exports the new `*RunnerFactory` class, it is - * instantiated directly. Otherwise a {@link _LegacyProviderAdapter} wrapping - * the old `static create()` class is returned to keep CI green during the - * transition. + * Each supported provider package exports a `*RunnerFactory` class that + * extends {@link AIProvider}; this method instantiates it directly. * * @param providerType One of the {@link SUPPORTED_AI_PROVIDERS} values. * @param logger Optional logger forwarded to the provider factory. @@ -163,7 +162,7 @@ export class RunnerFactory { * `undefined` if no suitable provider could be loaded. */ static async createModel( - config: LDAIConfigKind, + config: LDAICompletionConfig | LDAIJudgeConfig, logger?: LDLogger, defaultAiProvider?: SupportedAIProvider, ): Promise { @@ -200,7 +199,7 @@ export class RunnerFactory { * provider could be loaded. */ static async createAgent( - config: LDAIConfigKind, + config: LDAIAgentConfig, tools?: ToolRegistry, logger?: LDLogger, defaultAiProvider?: SupportedAIProvider, From ca9ebb21e2bbc8f85808e62890f62e887a2980bc Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Thu, 7 May 2026 11:58:32 -0500 Subject: [PATCH 19/19] feat!: Rename LDAIMetrics.usage and LDAIGraphMetrics.usage to .tokens (#1366) --- .../__tests__/LangChainAgentRunner.test.ts | 6 ++--- .../__tests__/LangChainHelper.test.ts | 2 +- .../__tests__/LangChainModelRunner.test.ts | 2 +- .../src/LangChainAgentRunner.ts | 2 +- .../src/LangChainHelper.ts | 2 +- .../src/LangChainModelRunner.ts | 4 ++-- .../__tests__/OpenAIAgentRunner.test.ts | 4 ++-- .../__tests__/OpenAIHelper.test.ts | 2 +- .../__tests__/OpenAIModelRunner.test.ts | 2 +- .../server-ai-openai/src/OpenAIAgentRunner.ts | 4 ++-- .../server-ai-openai/src/OpenAIHelper.ts | 2 +- .../__tests__/VercelHelper.test.ts | 4 ++-- .../__tests__/VercelModelRunner.test.ts | 4 ++-- .../server-ai-vercel/src/VercelHelper.ts | 18 +++++++-------- .../sdk/server-ai/__tests__/Judge.test.ts | 22 +++++++++---------- .../__tests__/LDAIConfigTrackerImpl.test.ts | 2 +- .../__tests__/ManagedAgentGraph.test.ts | 8 +++---- .../server-ai/__tests__/ManagedModel.test.ts | 8 +++---- .../server-ai/src/LDAIConfigTrackerImpl.ts | 8 +++---- .../src/api/graph/ManagedAgentGraph.ts | 6 ++--- packages/sdk/server-ai/src/api/graph/types.ts | 2 +- .../server-ai/src/api/metrics/LDAIMetrics.ts | 2 +- 22 files changed, 58 insertions(+), 58 deletions(-) diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts index 60edc50c57..24a9ec193e 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainAgentRunner.test.ts @@ -27,7 +27,7 @@ it('returns content with no toolCalls when the agent returns a simple response', expect(result.content).toBe('done'); expect(result.metrics.success).toBe(true); expect(result.metrics.toolCalls).toBeUndefined(); - expect(result.metrics.usage).toEqual({ total: 6, input: 4, output: 2 }); + expect(result.metrics.tokens).toEqual({ total: 6, input: 4, output: 2 }); }); it('extracts tool calls and aggregates usage from multi-step agent messages', async () => { @@ -56,7 +56,7 @@ it('extracts tool calls and aggregates usage from multi-step agent messages', as expect(result.content).toBe('Answer is 42.'); expect(result.metrics.toolCalls).toEqual(['lookup']); - expect(result.metrics.usage).toEqual({ total: 28, input: 16, output: 12 }); + expect(result.metrics.tokens).toEqual({ total: 28, input: 16, output: 12 }); }); it('returns success=false when the agent throws', async () => { @@ -83,5 +83,5 @@ it('handles empty messages array gracefully', async () => { expect(result.content).toBe(''); expect(result.metrics.success).toBe(true); expect(result.metrics.toolCalls).toBeUndefined(); - expect(result.metrics.usage).toBeUndefined(); + expect(result.metrics.tokens).toBeUndefined(); }); diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts index 9b9276702e..77a56d6b3c 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainHelper.test.ts @@ -102,7 +102,7 @@ it('returns success=true with usage from the response', () => { message.usage_metadata = { total_tokens: 3, input_tokens: 1, output_tokens: 2 }; expect(getAIMetricsFromResponse(message)).toEqual({ success: true, - usage: { total: 3, input: 1, output: 2 }, + tokens: { total: 3, input: 1, output: 2 }, }); }); diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts index e68af0d718..73dbd940c6 100644 --- a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts +++ b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts @@ -41,7 +41,7 @@ describe('LangChainModelRunner', () => { expect(result.content).toBe('hello'); expect(result.metrics).toEqual({ success: true, - usage: { total: 12, input: 7, output: 5 }, + tokens: { total: 12, input: 7, output: 5 }, }); expect(result.raw).toBe(response); }); diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts b/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts index fddcf8dc33..db344e6357 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainAgentRunner.ts @@ -61,7 +61,7 @@ export class LangChainAgentRunner implements Runner { const metrics: LDAIMetrics = { success: true, - usage: sumTokenUsageFromMessages(messages), + tokens: sumTokenUsageFromMessages(messages), toolCalls: toolCalls.length > 0 ? toolCalls : undefined, }; diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts b/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts index 29ee11f365..7463d82fcd 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainHelper.ts @@ -80,7 +80,7 @@ export function getAIUsageFromResponse(response: AIMessage): LDTokenUsage | unde export function getAIMetricsFromResponse(response: AIMessage): LDAIMetrics { return { success: true, - usage: getAIUsageFromResponse(response), + tokens: getAIUsageFromResponse(response), }; } diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts index 37cb657387..a53c4255d1 100644 --- a/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts +++ b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts @@ -96,7 +96,7 @@ export class LangChainModelRunner implements Runner { const metrics = { success: true, - usage: { total: 0, input: 0, output: 0 }, + tokens: { total: 0, input: 0, output: 0 }, }; return { @@ -111,7 +111,7 @@ export class LangChainModelRunner implements Runner { content: '', metrics: { success: false, - usage: { total: 0, input: 0, output: 0 }, + tokens: { total: 0, input: 0, output: 0 }, }, }; } diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts index 640dc99527..0dabc64b50 100644 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIAgentRunner.test.ts @@ -32,7 +32,7 @@ describe('OpenAIAgentRunner', () => { expect(result.content).toBe('Done'); expect(result.metrics.success).toBe(true); expect(result.metrics.toolCalls).toBeUndefined(); - expect(result.metrics.usage).toEqual({ total: 12, input: 8, output: 4 }); + expect(result.metrics.tokens).toEqual({ total: 12, input: 8, output: 4 }); }); it('reports tool calls from newItems with LD config name mapping', async () => { @@ -55,7 +55,7 @@ describe('OpenAIAgentRunner', () => { expect(result.content).toBe('The answer is 42.'); expect(result.metrics.toolCalls).toEqual(['lookup']); - expect(result.metrics.usage).toEqual({ total: 28, input: 16, output: 12 }); + expect(result.metrics.tokens).toEqual({ total: 28, input: 16, output: 12 }); }); it('returns an unsuccessful RunnerResult when the agent run throws', async () => { diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts index abe5ec4195..7446514887 100644 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIHelper.test.ts @@ -41,7 +41,7 @@ it('returns success=true with usage extracted from the response', () => { expect(metrics).toEqual({ success: true, - usage: { total: 3, input: 1, output: 2 }, + tokens: { total: 3, input: 1, output: 2 }, }); }); diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts index 613578e8f5..93e4aebff9 100644 --- a/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts +++ b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts @@ -46,7 +46,7 @@ describe('OpenAIModelRunner', () => { expect(result.content).toBe('Hello there!'); expect(result.metrics).toEqual({ success: true, - usage: { total: 15, input: 10, output: 5 }, + tokens: { total: 15, input: 10, output: 5 }, }); expect(result.raw).toBe(mockResponse); expect(result.parsed).toBeUndefined(); diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts b/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts index 2511149ec4..cc47b30a91 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIAgentRunner.ts @@ -61,10 +61,10 @@ export class OpenAIAgentRunner implements Runner { [], ); - const usage: LDTokenUsage | undefined = getAIUsageFromAgentResult(result); + const tokens: LDTokenUsage | undefined = getAIUsageFromAgentResult(result); const metrics: LDAIMetrics = { success: true, - usage, + tokens, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, }; diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts b/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts index 179c8fc091..6d53737a63 100644 --- a/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts +++ b/packages/ai-providers/server-ai-openai/src/OpenAIHelper.ts @@ -42,7 +42,7 @@ export function getAIUsageFromResponse(response: any): LDTokenUsage | undefined export function getAIMetricsFromResponse(response: any): LDAIMetrics { return { success: true, - usage: getAIUsageFromResponse(response), + tokens: getAIUsageFromResponse(response), }; } diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts index 833aeca4c3..d007bb2acd 100644 --- a/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelHelper.test.ts @@ -62,7 +62,7 @@ describe('getAIMetricsFromResponse', () => { getAIMetricsFromResponse({ usage: { totalTokens: 5, promptTokens: 2, completionTokens: 3 }, }), - ).toEqual({ success: true, usage: { total: 5, input: 2, output: 3 } }); + ).toEqual({ success: true, tokens: { total: 5, input: 2, output: 3 } }); }); it('marks success=false when finishReason is "error"', () => { @@ -83,7 +83,7 @@ describe('getAIMetricsFromStream', () => { }); expect(result).toEqual({ success: true, - usage: { total: 100, input: 49, output: 51 }, + tokens: { total: 100, input: 49, output: 51 }, }); }); diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts index 49313865f1..f7beb6f4d4 100644 --- a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts +++ b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts @@ -51,7 +51,7 @@ describe('VercelModelRunner', () => { expect(out.content).toBe('Hi!'); expect(out.metrics).toEqual({ success: true, - usage: { total: 12, input: 7, output: 5 }, + tokens: { total: 12, input: 7, output: 5 }, }); expect(out.raw).toBe(result); }); @@ -87,7 +87,7 @@ describe('VercelModelRunner', () => { const out = await runner.run('hello'); - expect(out.metrics.usage).toEqual({ total: 100, input: 40, output: 60 }); + expect(out.metrics.tokens).toEqual({ total: 100, input: 40, output: 60 }); }); it('returns success=false when generateText throws', async () => { diff --git a/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts b/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts index add9b5d9f0..bda56a401b 100644 --- a/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts +++ b/packages/ai-providers/server-ai-vercel/src/VercelHelper.ts @@ -44,16 +44,16 @@ export function mapUsageDataToLDTokenUsage(usageData: ModelUsageTokens): LDToken export function getAIMetricsFromResponse(response: TextResponse): LDAIMetrics { const finishReason = response?.finishReason ?? 'unknown'; - let usage: LDTokenUsage | undefined; + let tokens: LDTokenUsage | undefined; if (response?.totalUsage) { - usage = mapUsageDataToLDTokenUsage(response.totalUsage); + tokens = mapUsageDataToLDTokenUsage(response.totalUsage); } else if (response?.usage) { - usage = mapUsageDataToLDTokenUsage(response.usage); + tokens = mapUsageDataToLDTokenUsage(response.usage); } return { success: finishReason !== 'error', - usage, + tokens, }; } @@ -66,24 +66,24 @@ export function getAIMetricsFromResponse(response: TextResponse): LDAIMetrics { export async function getAIMetricsFromStream(stream: StreamResponse): Promise { const finishReason = (await stream.finishReason?.catch(() => 'error')) ?? 'unknown'; - let usage: LDTokenUsage | undefined; + let tokens: LDTokenUsage | undefined; if (stream.totalUsage) { const usageData = await stream.totalUsage.catch(() => undefined); if (usageData) { - usage = mapUsageDataToLDTokenUsage(usageData); + tokens = mapUsageDataToLDTokenUsage(usageData); } } - if (!usage && stream.usage) { + if (!tokens && stream.usage) { const usageData = await stream.usage.catch(() => undefined); if (usageData) { - usage = mapUsageDataToLDTokenUsage(usageData); + tokens = mapUsageDataToLDTokenUsage(usageData); } } return { success: finishReason !== 'error', - usage, + tokens, }; } diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index a242d371ed..7223863f0e 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -192,7 +192,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { + tokens: { total: 100, input: 50, output: 50, @@ -254,7 +254,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -283,7 +283,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -356,7 +356,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -391,7 +391,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -426,7 +426,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -462,7 +462,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -512,7 +512,7 @@ describe('Judge', () => { parsed: undefined, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -538,7 +538,7 @@ describe('Judge', () => { parsed: {}, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -568,7 +568,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; @@ -642,7 +642,7 @@ describe('Judge', () => { }, metrics: { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }, }; diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index 2c723de035..3d711b1aaa 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -723,7 +723,7 @@ describe('trackMetricsOf', () => { const mockResult = { response: 'test' }; const mockMetrics = { success: true, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, }; const metricsExtractor = jest.fn().mockReturnValue(mockMetrics); diff --git a/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts b/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts index bbe7883c09..cc5f160796 100644 --- a/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts +++ b/packages/sdk/server-ai/__tests__/ManagedAgentGraph.test.ts @@ -68,10 +68,10 @@ describe('ManagedAgentGraph', () => { success: true, path: ['node-a', 'node-b'], durationMs: 1500, - usage: { total: 100, input: 50, output: 50 }, + tokens: { total: 100, input: 50, output: 50 }, nodeMetrics: { - 'node-a': { success: true, usage: { total: 40, input: 20, output: 20 } }, - 'node-b': { success: true, usage: { total: 60, input: 30, output: 30 } }, + 'node-a': { success: true, tokens: { total: 40, input: 20, output: 20 } }, + 'node-b': { success: true, tokens: { total: 60, input: 30, output: 30 } }, }, }, }; @@ -103,7 +103,7 @@ describe('ManagedAgentGraph', () => { nodeMetrics: { n1: { success: true, - usage: { total: 10, input: 5, output: 5 }, + tokens: { total: 10, input: 5, output: 5 }, durationMs: 200, toolCalls: ['tool-a'], }, diff --git a/packages/sdk/server-ai/__tests__/ManagedModel.test.ts b/packages/sdk/server-ai/__tests__/ManagedModel.test.ts index f7dc3ecd75..57b5b58a3f 100644 --- a/packages/sdk/server-ai/__tests__/ManagedModel.test.ts +++ b/packages/sdk/server-ai/__tests__/ManagedModel.test.ts @@ -50,7 +50,7 @@ describe('ManagedModel', () => { it('passes the prompt directly to the runner without prepending config messages', async () => { const runnerResult: RunnerResult = { content: 'Response from model', - metrics: { success: true, usage: { total: 10, input: 4, output: 6 } }, + metrics: { success: true, tokens: { total: 10, input: 4, output: 6 } }, }; mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); @@ -68,7 +68,7 @@ describe('ManagedModel', () => { content: 'Hi there', metrics: { success: true, - usage: { total: 12, input: 5, output: 7 }, + tokens: { total: 12, input: 5, output: 7 }, toolCalls: ['tool-1'], durationMs: 42, }, @@ -99,7 +99,7 @@ describe('ManagedModel', () => { it('forwards the runner result through tracker.trackMetricsOf', async () => { const runnerResult: RunnerResult = { content: 'tracked', - metrics: { success: true, usage: { total: 1, input: 1, output: 0 } }, + metrics: { success: true, tokens: { total: 1, input: 1, output: 0 } }, }; mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); @@ -117,7 +117,7 @@ describe('ManagedModel', () => { it('does not retain conversation state across runs', async () => { const runnerResult: RunnerResult = { content: 'ok', - metrics: { success: true, usage: { total: 1, input: 1, output: 0 } }, + metrics: { success: true, tokens: { total: 1, input: 1, output: 0 } }, }; mockTracker.trackMetricsOf.mockImplementation(async (_extractor, func) => func()); diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index 97957e6cf2..74525d7e1c 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -208,8 +208,8 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } // Track token usage if available - if (metrics.usage) { - this.trackTokens(metrics.usage); + if (metrics.tokens) { + this.trackTokens(metrics.tokens); } // Track tool calls if available @@ -260,8 +260,8 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } // Track token usage if available - if (metrics.usage) { - this.trackTokens(metrics.usage); + if (metrics.tokens) { + this.trackTokens(metrics.tokens); } // Track tool calls if available diff --git a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts index d7da031d69..6b9934134d 100644 --- a/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts +++ b/packages/sdk/server-ai/src/api/graph/ManagedAgentGraph.ts @@ -47,7 +47,7 @@ export class ManagedAgentGraph { success: runnerResult.metrics.success, path: runnerResult.metrics.path, durationMs: runnerResult.metrics.durationMs, - tokens: runnerResult.metrics.usage, + tokens: runnerResult.metrics.tokens, nodeMetrics: this._trackNodeMetrics(runnerResult.metrics.nodeMetrics), resumptionToken: graphTracker.resumptionToken, }; @@ -79,8 +79,8 @@ export class ManagedAgentGraph { } const tracker = node.getConfig().createTracker!(); - if (metrics.usage) { - tracker.trackTokens(metrics.usage); + if (metrics.tokens) { + tracker.trackTokens(metrics.tokens); } if (metrics.durationMs !== undefined) { tracker.trackDuration(metrics.durationMs); diff --git a/packages/sdk/server-ai/src/api/graph/types.ts b/packages/sdk/server-ai/src/api/graph/types.ts index dd023ed24d..b7b9b04a5d 100644 --- a/packages/sdk/server-ai/src/api/graph/types.ts +++ b/packages/sdk/server-ai/src/api/graph/types.ts @@ -102,7 +102,7 @@ export interface LDAIGraphMetrics { /** * Aggregate token usage across the entire graph invocation, if available. */ - usage?: LDTokenUsage; + tokens?: LDTokenUsage; /** * Per-node metrics keyed by agent config key. diff --git a/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts b/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts index 2d9f3a47d7..0abe1a58de 100644 --- a/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts +++ b/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts @@ -14,7 +14,7 @@ export interface LDAIMetrics { * Token usage information for the operation. * This will be undefined if no token usage data is available. */ - usage?: LDTokenUsage; + tokens?: LDTokenUsage; /** * List of tool call identifiers made during the operation.