diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index 6c8985b914..c0def31740 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -47,14 +47,14 @@ describe('Judge', () => { ], model: { name: 'gpt-4' }, provider: { name: 'openai' }, - tracker: mockTracker, + createTracker: () => mockTracker, evaluationMetricKey: 'relevance', }; }); describe('constructor', () => { it('initializes with proper configuration', () => { - const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + const judge = new Judge(judgeConfig, mockProvider, mockLogger); expect(judge).toBeDefined(); }); @@ -64,7 +64,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('evaluates AI response successfully', async () => { @@ -209,12 +209,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithoutMetrics = new Judge( - configWithoutMetrics, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, mockLogger); const result = await judgeWithoutMetrics.evaluate('test input', 'test output'); @@ -231,12 +226,7 @@ describe('Judge', () => { evaluationMetricKey: 'relevance', evaluationMetricKeys: undefined, }; - const judgeWithSingleKey = new Judge( - configWithSingleKey, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -275,12 +265,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithLegacyKeys = new Judge( - configWithLegacyKeys, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -319,12 +304,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['', ' ', 'relevance', 'accuracy'], }; - const judgeWithInvalidKeys = new Judge( - configWithInvalidKeys, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -364,7 +344,7 @@ describe('Judge', () => { evaluationMetricKey: 'helpfulness', evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithBoth = new Judge(configWithBoth, mockTracker, mockProvider, mockLogger); + const judgeWithBoth = new Judge(configWithBoth, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -402,12 +382,7 @@ describe('Judge', () => { ...judgeConfig, messages: undefined, }; - const judgeWithoutMessages = new Judge( - configWithoutMessages, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, mockLogger); const result = await judgeWithoutMessages.evaluate('test input', 'test output'); @@ -511,7 +486,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('evaluates messages and response successfully', async () => { @@ -596,7 +571,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('constructs evaluation messages correctly', () => { @@ -621,7 +596,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('parses valid evaluation response correctly', () => { @@ -633,7 +608,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({ relevance: { score: 0.8, reasoning: 'Good' }, @@ -647,7 +622,7 @@ describe('Judge', () => { relevance: { score: 0.8, reasoning: 'Good' }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); }); @@ -661,7 +636,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); }); @@ -675,7 +650,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -693,7 +668,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -711,7 +686,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -729,7 +704,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -744,12 +719,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithEmptyKeys = new Judge( - configWithEmptyKeys, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, mockLogger); const result = await judgeWithEmptyKeys.evaluate('test input', 'test output'); @@ -769,7 +739,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -787,7 +757,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index 35e8b671a2..9695c1f815 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -85,7 +85,7 @@ describe('config evaluation', () => { { role: 'system', content: 'Hello John' }, { role: 'user', content: 'Score: 42' }, ]); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -143,7 +143,7 @@ describe('config evaluation', () => { expect(result.instructions).toBe( 'You are a helpful assistant. Your name is John and your score is 42', ); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -176,7 +176,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); // Should use first value from evaluationMetricKeys expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -208,7 +208,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -241,7 +241,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); expect(result.evaluationMetricKey).toBe('helpfulness'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -275,7 +275,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); // Empty string should be treated as invalid, so should fall back to first value in evaluationMetricKeys expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -308,7 +308,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); // Should skip empty and whitespace strings, use first valid value expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -331,7 +331,7 @@ describe('config evaluation', () => { const result = await client.completionConfig(key, testContext, defaultValue); expect(result.enabled).toBe(false); - expect(result.tracker).toBeUndefined(); + expect(result.createTracker).toBeUndefined(); }); it('handles missing metadata mode by defaulting to completion mode', async () => { @@ -352,7 +352,7 @@ describe('config evaluation', () => { const result = await client.completionConfig(key, testContext, defaultValue); expect(result.enabled).toBe(false); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.messages).toEqual([{ role: 'system', content: 'Hello' }]); expect(result.model).toEqual({ name: 'example-provider', parameters: { name: 'imagination' } }); }); @@ -381,7 +381,7 @@ describe('config evaluation', () => { expect(result.model).toEqual(defaultValue.model); expect(result.messages).toEqual(defaultValue.messages); expect(result.provider).toEqual(defaultValue.provider); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(defaultValue.enabled); expect(mockLdClient.variation).toHaveBeenCalledWith( key, @@ -408,7 +408,7 @@ describe('completionConfig method', () => { const mockConfig = { model: { name: 'test-model' }, messages: [], - tracker: {} as any, + createTracker: () => ({}) as any, enabled: true, }; @@ -449,7 +449,7 @@ describe('agentConfig method', () => { const mockConfig = { model: { name: 'test-model' }, instructions: 'You are a helpful assistant.', - tracker: {} as any, + createTracker: () => ({}) as any, enabled: true, }; @@ -527,7 +527,7 @@ describe('agents method', () => { }, provider: { name: 'openai' }, instructions: 'You are a research assistant specializing in climate change.', - tracker: expect.any(Object), + createTracker: expect.any(Function), enabled: true, }, 'writing-agent': { @@ -538,7 +538,7 @@ describe('agents method', () => { }, provider: { name: 'anthropic' }, instructions: 'You are a writing assistant with academic style.', - tracker: expect.any(Object), + createTracker: expect.any(Function), enabled: true, }, }); @@ -582,7 +582,7 @@ describe('judgeConfig method', () => { provider: { name: 'openai' }, evaluationMetricKeys: ['relevance'], messages: [{ role: 'system' as const, content: 'You are a judge for {{metric}}.' }], - tracker: {} as any, + createTracker: () => ({}) as any, toVercelAISDK: jest.fn(), }; @@ -631,6 +631,7 @@ describe('createJudge method', () => { enabled: false, }; + const mockTrackerInstance = {} as any; const mockJudgeConfig = { key: 'test-judge', enabled: true, @@ -638,7 +639,7 @@ describe('createJudge method', () => { provider: { name: 'openai' }, evaluationMetricKeys: ['relevance', 'accuracy'], messages: [{ role: 'system' as const, content: 'You are a judge.' }], - tracker: {} as any, + createTracker: () => mockTrackerInstance, toVercelAISDK: jest.fn(), }; @@ -658,12 +659,7 @@ describe('createJudge method', () => { response_to_evaluate: '{{response_to_evaluate}}', }); expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); - expect(Judge).toHaveBeenCalledWith( - mockJudgeConfig, - mockJudgeConfig.tracker, - mockProvider, - undefined, - ); + expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, undefined); expect(result).toBe(mockJudge); judgeConfigSpy.mockRestore(); }); @@ -706,7 +702,7 @@ describe('createJudge method', () => { provider: { name: 'openai' }, evaluationMetricKeys: ['relevance'], messages: [{ role: 'system' as const, content: 'You are a judge.' }], - tracker: {} as any, + createTracker: () => ({}) as any, toVercelAISDK: jest.fn(), }; @@ -741,6 +737,30 @@ describe('createJudge method', () => { }); }); +describe('createTracker method', () => { + it('reconstructs a tracker from a resumption token', () => { + const client = new LDAIClientImpl(mockLdClient); + + // Build a token manually: { runId, configKey, variationKey, version } + const payload = JSON.stringify({ + runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', + configKey: 'my-config', + variationKey: 'v1', + version: 3, + }); + const token = Buffer.from(payload).toString('base64url'); + + const tracker = client.createTracker(token, testContext); + + expect(tracker.getTrackData()).toMatchObject({ + runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', + configKey: 'my-config', + variationKey: 'v1', + version: 3, + }); + }); +}); + describe('optional default values', () => { it('uses a disabled completion config when no default is provided', async () => { const client = new LDAIClientImpl(mockLdClient); diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index 5ea65c4c93..e644eff377 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -4,11 +4,18 @@ import { LDFeedbackKind } from '../src/api/metrics'; import { LDAIConfigTrackerImpl } from '../src/LDAIConfigTrackerImpl'; import { LDClientMin } from '../src/LDClientMin'; +const testRunId = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'; +jest.mock('node:crypto', () => ({ + randomUUID: jest.fn(() => testRunId), +})); + const mockTrack = jest.fn(); const mockVariation = jest.fn(); +const mockWarn = jest.fn(); const mockLdClient: LDClientMin = { track: mockTrack, variation: mockVariation, + logger: { warn: mockWarn, error: jest.fn(), info: jest.fn(), debug: jest.fn() } as any, }; const testContext: LDContext = { kind: 'user', key: 'test-user' }; @@ -24,6 +31,7 @@ const getExpectedTrackData = () => ({ version, modelName, providerName, + runId: testRunId, }); beforeEach(() => { @@ -33,6 +41,7 @@ beforeEach(() => { it('tracks duration', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -53,6 +62,7 @@ it('tracks duration', () => { it('tracks duration of async function', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -76,6 +86,7 @@ it('tracks duration of async function', async () => { it('tracks time to first token', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -96,6 +107,7 @@ it('tracks time to first token', () => { it('tracks positive feedback', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -116,6 +128,7 @@ it('tracks positive feedback', () => { it('tracks negative feedback', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -136,6 +149,7 @@ it('tracks negative feedback', () => { it('tracks success', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -156,6 +170,7 @@ it('tracks success', () => { it('tracks OpenAI usage', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -223,6 +238,7 @@ it('tracks OpenAI usage', async () => { it('tracks error when OpenAI metrics function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -264,6 +280,7 @@ it('tracks error when OpenAI metrics function throws', async () => { it('tracks Bedrock conversation with successful response', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -334,6 +351,7 @@ it('tracks Bedrock conversation with successful response', () => { it('tracks Bedrock conversation with error response', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -369,6 +387,7 @@ describe('Vercel AI SDK generateText', () => { it('tracks Vercel AI SDK usage', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -436,6 +455,7 @@ describe('Vercel AI SDK generateText', () => { it('tracks error when Vercel AI SDK metrics function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -478,6 +498,7 @@ describe('Vercel AI SDK generateText', () => { it('tracks tokens', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -521,6 +542,7 @@ it('tracks tokens', () => { it('only tracks non-zero token counts', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -560,6 +582,7 @@ it('only tracks non-zero token counts', () => { it('returns empty summary when no metrics tracked', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -576,6 +599,7 @@ it('returns empty summary when no metrics tracked', () => { it('summarizes tracked metrics', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -612,6 +636,7 @@ it('summarizes tracked metrics', () => { it('tracks duration when async function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -639,6 +664,7 @@ it('tracks duration when async function throws', async () => { it('tracks error', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -660,6 +686,7 @@ describe('trackMetricsOf', () => { it('tracks success and token usage from metrics', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -715,6 +742,7 @@ describe('trackMetricsOf', () => { it('tracks failure when metrics indicate failure', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -745,6 +773,7 @@ describe('trackMetricsOf', () => { it('tracks failure when operation throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -774,6 +803,7 @@ describe('trackMetricsOf', () => { it('tracks metrics without token usage', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -815,6 +845,7 @@ describe('trackJudgeResponse', () => { it('tracks evaluation metric key with score', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -844,6 +875,7 @@ describe('trackJudgeResponse', () => { it('tracks multiple evaluation metrics when present', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -882,6 +914,7 @@ describe('trackToolCall', () => { it('tracks a single tool call', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -903,6 +936,7 @@ describe('trackToolCall', () => { it('includes graphKey when provided', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -926,6 +960,7 @@ describe('trackToolCalls', () => { it('tracks multiple tool calls', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -962,6 +997,7 @@ describe('graphKey parameter support', () => { it('includes graphKey in trackDuration event', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -983,6 +1019,7 @@ describe('graphKey parameter support', () => { it('includes graphKey in trackSuccess event', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -1004,6 +1041,7 @@ describe('graphKey parameter support', () => { it('does not include graphKey when not provided', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -1022,3 +1060,255 @@ describe('graphKey parameter support', () => { ); }); }); + +describe('at-most-once semantics', () => { + it('drops duplicate trackDuration call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackDuration(1000); + tracker.trackDuration(2000); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('Duration')); + }); + + it('drops duplicate trackSuccess call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackSuccess(); + tracker.trackSuccess(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops trackError call after trackSuccess with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackSuccess(); + tracker.trackError(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops duplicate trackFeedback call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackFeedback({ kind: LDFeedbackKind.Positive }); + tracker.trackFeedback({ kind: LDFeedbackKind.Negative }); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops duplicate trackTokens call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackTokens({ total: 100, input: 50, output: 50 }); + tracker.trackTokens({ total: 200, input: 100, output: 100 }); + + // First call tracks 3 events (total, input, output), second is dropped + expect(mockTrack).toHaveBeenCalledTimes(3); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops duplicate trackTimeToFirstToken call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackTimeToFirstToken(100); + tracker.trackTimeToFirstToken(200); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); +}); + +describe('resumptionToken', () => { + it('encodes runId, configKey, variationKey, and version as URL-safe Base64 JSON', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8')); + + expect(decoded).toEqual({ + runId: testRunId, + configKey, + variationKey, + version, + }); + }); + + it('includes empty variationKey explicitly when not set', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + '', + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8')); + + expect(decoded).toEqual({ + runId: testRunId, + configKey, + variationKey: '', + version, + }); + expect('variationKey' in decoded).toBe(true); + }); + + it('uses URL-safe Base64 encoding (no + / or = characters)', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + expect(token).not.toMatch(/[+/=]/); + }); +}); + +describe('fromResumptionToken', () => { + it('reconstructs tracker with original runId', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + expect(reconstructed.getTrackData().runId).toBe(testRunId); + expect(reconstructed.getTrackData().configKey).toBe(configKey); + expect(reconstructed.getTrackData().variationKey).toBe(variationKey); + expect(reconstructed.getTrackData().version).toBe(version); + }); + + it('reconstructs tracker with empty variationKey when none was set', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + '', + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + expect(reconstructed.getTrackData().variationKey).toBe(''); + }); + + it('reconstructed tracker emits track events with original runId', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + reconstructed.trackSuccess(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:generation:success', + testContext, + expect.objectContaining({ runId: testRunId }), + 1, + ); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts index d750a47e65..75681b0f83 100644 --- a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts +++ b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts @@ -38,13 +38,13 @@ describe('TrackedChat', () => { messages: [{ role: 'system', content: 'You are a helpful assistant.' }], model: { name: 'gpt-4' }, provider: { name: 'openai' }, - tracker: mockTracker, + createTracker: () => mockTracker, }; }); describe('appendMessages', () => { it('appends messages to the conversation history', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); const messagesToAppend: LDMessage[] = [ { role: 'user', content: 'Hello' }, @@ -60,7 +60,7 @@ describe('TrackedChat', () => { }); it('appends multiple message batches sequentially', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'First message' }]); chat.appendMessages([{ role: 'assistant', content: 'Second message' }]); @@ -74,7 +74,7 @@ describe('TrackedChat', () => { }); it('handles empty message array', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([]); @@ -85,7 +85,7 @@ describe('TrackedChat', () => { describe('getMessages', () => { it('returns only conversation history when includeConfigMessages is false', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([ { role: 'user', content: 'User message' }, @@ -100,7 +100,7 @@ describe('TrackedChat', () => { }); it('returns only conversation history when includeConfigMessages is omitted (defaults to false)', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'User message' }]); @@ -111,7 +111,7 @@ describe('TrackedChat', () => { }); it('returns config messages prepended when includeConfigMessages is true', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([ { role: 'user', content: 'User message' }, @@ -127,7 +127,7 @@ describe('TrackedChat', () => { }); it('returns only config messages when no conversation history exists and includeConfigMessages is true', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); const messages = chat.getMessages(true); @@ -140,7 +140,7 @@ describe('TrackedChat', () => { ...aiConfig, messages: [], }; - const chat = new TrackedChat(configWithoutMessages, mockTracker, mockProvider); + const chat = new TrackedChat(configWithoutMessages, mockProvider); const messages = chat.getMessages(false); @@ -148,7 +148,7 @@ describe('TrackedChat', () => { }); it('returns a copy of the messages array (not a reference)', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'Original message' }]); @@ -171,7 +171,7 @@ describe('TrackedChat', () => { ...aiConfig, messages: undefined, }; - const chat = new TrackedChat(configWithoutMessages, mockTracker, mockProvider); + const chat = new TrackedChat(configWithoutMessages, mockProvider); chat.appendMessages([{ role: 'user', content: 'User message' }]); @@ -196,7 +196,7 @@ describe('TrackedChat', () => { mockProvider.invokeModel.mockResolvedValue(mockResponse); - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); await chat.invoke('Hello'); @@ -216,7 +216,7 @@ describe('TrackedChat', () => { mockProvider.invokeModel.mockResolvedValue(mockResponse); - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'Pre-appended message' }]); await chat.invoke('New user input'); diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts index ac331d23e8..e1cbf93e06 100644 --- a/packages/sdk/server-ai/examples/bedrock/src/index.ts +++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts @@ -62,12 +62,13 @@ async function main() { }, ); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); process.exit(0); } - const completion = aiConfig.tracker.trackBedrockConverseMetrics( + const tracker = aiConfig.createTracker!(); + const completion = tracker.trackBedrockConverseMetrics( await awsClient.send( new ConverseCommand({ modelId: aiConfig.model?.name ?? 'no-model', diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts index 8b501e9cb7..e16643d6c5 100644 --- a/packages/sdk/server-ai/examples/openai/src/index.ts +++ b/packages/sdk/server-ai/examples/openai/src/index.ts @@ -60,20 +60,19 @@ async function main() { myVariable: 'My User Defined Variable', }); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); process.exit(0); } - const completion = await aiConfig.tracker.trackMetricsOf( - OpenAIProvider.createAIMetrics, - async () => - client.chat.completions.create({ - messages: aiConfig.messages || [], - model: aiConfig.model?.name || 'gpt-4', - temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, - max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, - }), + const tracker = aiConfig.createTracker!(); + const completion = await tracker.trackMetricsOf(OpenAIProvider.createAIMetrics, async () => + client.chat.completions.create({ + messages: aiConfig.messages || [], + model: aiConfig.model?.name || 'gpt-4', + temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, + max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, + }), ); console.log('AI Response:', completion.choices[0]?.message.content); diff --git a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts index 5f138a5dd2..af1db3e918 100644 --- a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts +++ b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts @@ -52,7 +52,7 @@ async function main() { // const aiConfig = await aiClient.completionConfig(aiConfigKey, context, defaultValue); const aiConfig = await aiClient.completionConfig(aiConfigKey, context); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); process.exit(0); } @@ -74,9 +74,9 @@ async function main() { }); // Call the model and track metrics for the ai config - const result = await aiConfig.tracker.trackMetricsOf( - VercelProvider.getAIMetricsFromResponse, - () => generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), + const tracker = aiConfig.createTracker!(); + const result = await tracker.trackMetricsOf(VercelProvider.getAIMetricsFromResponse, () => + generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), ); console.log('Response:', result.text); @@ -99,7 +99,8 @@ async function main() { }); // Stream is returned immediately (synchronously), metrics tracked in background - const streamResult = aiConfig.tracker.trackStreamMetricsOf( + const streamTracker = aiConfig.createTracker!(); + const streamResult = streamTracker.trackStreamMetricsOf( () => streamText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), VercelProvider.getAIMetricsFromStream, ); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index 37ac4e8f10..209c0ce860 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -1,4 +1,5 @@ import Mustache from 'mustache'; +import { randomUUID } from 'node:crypto'; import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common'; @@ -13,6 +14,7 @@ import { LDAIConfigDefaultKind, LDAIConfigKind, LDAIConfigMode, + LDAIConfigTracker, LDAIJudgeConfig, LDAIJudgeConfigDefault, LDJudge, @@ -87,19 +89,21 @@ export class LDAIClientImpl implements LDAIClient { return LDAIConfigUtils.createDisabledConfig(key, mode); } - const tracker = new LDAIConfigTrackerImpl( - this._ldClient, - key, - // eslint-disable-next-line no-underscore-dangle - value._ldMeta?.variationKey ?? '', - // eslint-disable-next-line no-underscore-dangle - value._ldMeta?.version ?? 1, - value.model?.name ?? '', - value.provider?.name ?? '', - context, - ); + const trackerFactory = () => + new LDAIConfigTrackerImpl( + this._ldClient, + randomUUID(), + key, + // eslint-disable-next-line no-underscore-dangle + value._ldMeta?.variationKey ?? '', + // eslint-disable-next-line no-underscore-dangle + value._ldMeta?.version ?? 1, + value.model?.name ?? '', + value.provider?.name ?? '', + context, + ); - const config = LDAIConfigUtils.fromFlagValue(key, value, tracker); + const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory); // Apply variable interpolation (always needed for ldctx) return this._applyInterpolation(config, context, variables); @@ -296,7 +300,7 @@ export class LDAIClientImpl implements LDAIClient { variables, ); - if (!config.enabled || !config.tracker) { + if (!config.enabled) { this._logger?.info(`Chat configuration is disabled: ${key}`); return undefined; } @@ -313,7 +317,7 @@ export class LDAIClientImpl implements LDAIClient { defaultAiProvider, ); - return new TrackedChat(config, config.tracker, provider, judges, this._logger); + return new TrackedChat(config, provider, judges, this._logger); } async createJudge( @@ -351,7 +355,7 @@ export class LDAIClientImpl implements LDAIClient { extendedVariables, ); - if (!judgeConfig.enabled || !judgeConfig.tracker) { + if (!judgeConfig.enabled) { this._logger?.info(`Judge configuration is disabled: ${key}`); return undefined; } @@ -361,7 +365,7 @@ export class LDAIClientImpl implements LDAIClient { return undefined; } - return new Judge(judgeConfig, judgeConfig.tracker, provider, this._logger); + return new Judge(judgeConfig, provider, this._logger); } catch (error) { this._logger?.error(`Failed to initialize judge ${key}:`, error); return undefined; @@ -380,4 +384,8 @@ export class LDAIClientImpl implements LDAIClient { ): Promise { return this.createChat(key, context, defaultValue, variables, defaultAiProvider); } + + createTracker(token: string, context: LDContext): LDAIConfigTracker { + return LDAIConfigTrackerImpl.fromResumptionToken(token, this._ldClient, context); + } } diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index adda7c97c8..151a3c1d97 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -18,6 +18,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { constructor( private _ldClient: LDClientMin, + private _runId: string, private _configKey: string, private _variationKey: string, private _version: number, @@ -27,16 +28,18 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { ) {} getTrackData(graphKey?: string): { - variationKey: string; + runId: string; configKey: string; + variationKey: string; version: number; modelName: string; providerName: string; graphKey?: string; } { return { - variationKey: this._variationKey, + runId: this._runId, configKey: this._configKey, + variationKey: this._variationKey, version: this._version, modelName: this._modelName, providerName: this._providerName, @@ -44,7 +47,42 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { }; } + get resumptionToken(): string { + const json = JSON.stringify({ + runId: this._runId, + configKey: this._configKey, + variationKey: this._variationKey, + version: this._version, + }); + return Buffer.from(json).toString('base64url'); + } + + static fromResumptionToken( + token: string, + ldClient: LDClientMin, + context: LDContext, + ): LDAIConfigTrackerImpl { + const json = Buffer.from(token, 'base64url').toString('utf8'); + const payload = JSON.parse(json); + return new LDAIConfigTrackerImpl( + ldClient, + payload.runId, + payload.configKey, + payload.variationKey ?? '', + payload.version, + '', + '', + context, + ); + } + trackDuration(duration: number, graphKey?: string): void { + if (this._trackedMetrics.durationMs !== undefined) { + this._ldClient.logger?.warn( + 'Duration has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.durationMs = duration; this._ldClient.track( '$ld:ai:duration:total', @@ -68,6 +106,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) { + if (this._trackedMetrics.timeToFirstTokenMs !== undefined) { + this._ldClient.logger?.warn( + 'Time to first token has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.timeToFirstTokenMs = timeToFirstTokenMs; this._ldClient.track( '$ld:ai:tokens:ttf', @@ -110,6 +154,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void { + if (this._trackedMetrics.feedback !== undefined) { + this._ldClient.logger?.warn( + 'Feedback has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.feedback = feedback; if (feedback.kind === LDFeedbackKind.Positive) { this._ldClient.track( @@ -129,6 +179,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackSuccess(graphKey?: string): void { + if (this._trackedMetrics.success !== undefined) { + this._ldClient.logger?.warn( + 'Generation result has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.success = true; this._ldClient.track( '$ld:ai:generation:success', @@ -139,6 +195,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackError(graphKey?: string): void { + if (this._trackedMetrics.success !== undefined) { + this._ldClient.logger?.warn( + 'Generation result has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.success = false; this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1); } @@ -301,6 +363,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackTokens(tokens: LDTokenUsage, graphKey?: string): void { + if (this._trackedMetrics.tokens !== undefined) { + this._ldClient.logger?.warn( + 'Token usage has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.tokens = tokens; const trackData = this.getTrackData(graphKey); if (tokens.total > 0) { diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index 3e4ceca864..fd93ca92a5 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -7,6 +7,7 @@ import { LDAIAgentRequestConfig, LDAICompletionConfig, LDAICompletionConfigDefault, + LDAIConfigTracker, LDAIJudgeConfig, LDAIJudgeConfigDefault, } from './config'; @@ -325,4 +326,15 @@ export interface LDAIClient { variables?: Record, defaultAiProvider?: SupportedAIProvider, ): Promise; + + /** + * Reconstructs an AIConfigTracker from a resumption token string previously + * obtained from a tracker's `resumptionToken` property. Use this to associate + * deferred events (such as user feedback) with the original invocation's runId. + * + * @param token A URL-safe Base64-encoded resumption token string. + * @param context The evaluation context to use for subsequent track calls. + * @returns A reconstructed AIConfigTracker with the original runId preserved. + */ + createTracker(token: string, context: LDContext): LDAIConfigTracker; } diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts index 542547bffc..054969dc3d 100644 --- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts +++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts @@ -1,6 +1,5 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { LDAIConfigTracker } from '../config/LDAIConfigTracker'; import { LDAICompletionConfig, LDMessage } from '../config/types'; import { Judge } from '../judge/Judge'; import { JudgeResponse } from '../judge/types'; @@ -18,7 +17,6 @@ export class TrackedChat { constructor( protected readonly aiConfig: LDAICompletionConfig, - protected readonly tracker: LDAIConfigTracker, protected readonly provider: AIProvider, protected readonly judges: Record = {}, private readonly _logger?: LDLogger, @@ -31,6 +29,8 @@ export class TrackedChat { * This method handles conversation management and tracking, delegating to the provider's invokeModel method. */ async invoke(prompt: string): Promise { + const tracker = this.aiConfig.createTracker!(); + // Convert prompt string to LDMessage with role 'user' and add to conversation history const userMessage: LDMessage = { role: 'user', @@ -43,7 +43,7 @@ export class TrackedChat { const allMessages = [...configMessages, ...this.messages]; // Delegate to provider-specific implementation with tracking - const response = await this.tracker.trackMetricsOf( + const response = await tracker.trackMetricsOf( (result: ChatResponse) => result.metrics, () => this.provider.invokeModel(allMessages), ); @@ -52,7 +52,16 @@ export class TrackedChat { this.aiConfig.judgeConfiguration?.judges && this.aiConfig.judgeConfiguration.judges.length > 0 ) { - response.evaluations = this._evaluateWithJudges(this.messages, response); + response.evaluations = this._evaluateWithJudges(this.messages, response).then( + (evaluations) => { + evaluations.forEach((judgeResponse) => { + if (judgeResponse?.success) { + tracker.trackJudgeResponse(judgeResponse); + } + }); + return evaluations; + }, + ); } this.messages.push(response.message); @@ -78,23 +87,12 @@ export class TrackedChat { const judge = this.judges[judgeConfig.key]; if (!judge) { this._logger?.warn( - `Judge configuration is not enabled: ${judgeConfig.key}`, - this.tracker.getTrackData(), + `Judge configuration is not enabled for ${judgeConfig.key} in ${this.aiConfig.key}`, ); return undefined; } - const judgeResponse = await judge.evaluateMessages( - messages, - response, - judgeConfig.samplingRate, - ); - - if (judgeResponse && judgeResponse.success) { - this.tracker.trackJudgeResponse(judgeResponse); - } - - return judgeResponse; + return judge.evaluateMessages(messages, response, judgeConfig.samplingRate); }); // ensure all evaluations complete even if some fail @@ -110,13 +108,6 @@ export class TrackedChat { return this.aiConfig; } - /** - * Get the underlying AI configuration tracker used to initialize this TrackedChat. - */ - getTracker(): LDAIConfigTracker { - return this.tracker; - } - /** * Get the underlying AI provider instance. * This provides direct access to the provider for advanced use cases. diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 3a40fd3c6d..18b243d94b 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -41,16 +41,30 @@ export interface LDAIConfigTracker { * @param graphKey When provided, associates this metric with the specified agent graph key. */ getTrackData(graphKey?: string): { - variationKey: string; + runId: string; configKey: string; + variationKey: string; version: number; modelName: string; providerName: string; graphKey?: string; }; + + /** + * A URL-safe Base64-encoded token that encodes the tracker's runId, configKey, + * variationKey, and version. Pass this to AIClient.createTracker() to reconstruct + * the tracker across process boundaries (e.g. for associating deferred feedback + * with the original invocation). + */ + readonly resumptionToken: string; + /** * Track the duration of generation. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. Use createTracker() on the config result to obtain a fresh + * tracker for a new execution. + * * Ideally this would not include overhead time such as network communication. * * @param durationMs The duration in milliseconds. @@ -61,6 +75,9 @@ export interface LDAIConfigTracker { /** * Track information about token usage. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. + * * @param tokens Token usage information. * @param graphKey When provided, associates this metric with the specified agent graph key. */ @@ -69,6 +86,9 @@ export interface LDAIConfigTracker { /** * Generation was successful. * + * At-most-once per execution: subsequent calls (including trackError) on the + * same tracker are dropped with a warning. + * * @param graphKey When provided, associates this metric with the specified agent graph key. */ trackSuccess(graphKey?: string): void; @@ -76,6 +96,9 @@ export interface LDAIConfigTracker { /** * An error was encountered during generation. * + * At-most-once per execution: subsequent calls (including trackSuccess) on the + * same tracker are dropped with a warning. + * * @param graphKey When provided, associates this metric with the specified agent graph key. */ trackError(graphKey?: string): void; @@ -83,6 +106,9 @@ export interface LDAIConfigTracker { /** * Track sentiment about the generation. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. + * * @param feedback Feedback about the generation. * @param graphKey When provided, associates this metric with the specified agent graph key. */ @@ -91,6 +117,9 @@ export interface LDAIConfigTracker { /** * Track the time to first token for this generation. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. + * * @param timeToFirstTokenMs The duration in milliseconds. * @param graphKey When provided, associates this metric with the specified agent graph key. */ diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts index 2a926f1c87..74ab8ee30a 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts @@ -82,14 +82,15 @@ export class LDAIConfigUtils { /** * Converts a LaunchDarkly flag value to the appropriate AI configuration type. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns The appropriate AI configuration type */ static fromFlagValue( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAIConfigKind { // Determine the actual mode from flag value // eslint-disable-next-line no-underscore-dangle @@ -97,12 +98,12 @@ export class LDAIConfigUtils { switch (flagValueMode) { case 'agent': - return this.toAgentConfig(key, flagValue, tracker); + return this.toAgentConfig(key, flagValue, trackerFactory); case 'judge': - return this.toJudgeConfig(key, flagValue, tracker); + return this.toJudgeConfig(key, flagValue, trackerFactory); case 'completion': default: - return this.toCompletionConfig(key, flagValue, tracker); + return this.toCompletionConfig(key, flagValue, trackerFactory); } } @@ -118,13 +119,13 @@ export class LDAIConfigUtils { return { key, enabled: false, - tracker: undefined, + createTracker: undefined, } as LDAIAgentConfig; case 'judge': return { key, enabled: false, - tracker: undefined, + createTracker: undefined, } as LDAIJudgeConfig; case 'completion': default: @@ -132,7 +133,7 @@ export class LDAIConfigUtils { return { key, enabled: false, - tracker: undefined, + createTracker: undefined, } as LDAICompletionConfig; } } @@ -156,18 +157,19 @@ export class LDAIConfigUtils { /** * Creates a completion config from flag value data. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns A completion configuration */ static toCompletionConfig( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAICompletionConfig { return { ...this._toBaseConfig(key, flagValue), - tracker, + createTracker: trackerFactory, messages: flagValue.messages, judgeConfiguration: flagValue.judgeConfiguration, }; @@ -176,18 +178,19 @@ export class LDAIConfigUtils { /** * Creates an agent config from flag value data. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns An agent configuration */ static toAgentConfig( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAIAgentConfig { return { ...this._toBaseConfig(key, flagValue), - tracker, + createTracker: trackerFactory, instructions: flagValue.instructions, judgeConfiguration: flagValue.judgeConfiguration, }; @@ -196,14 +199,15 @@ export class LDAIConfigUtils { /** * Creates a judge config from flag value data. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns A judge configuration */ static toJudgeConfig( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAIJudgeConfig { // Prioritize evaluationMetricKey, fallback to first valid (non-empty, non-whitespace) value in evaluationMetricKeys let evaluationMetricKey: string | undefined; @@ -218,7 +222,7 @@ export class LDAIConfigUtils { return { ...this._toBaseConfig(key, flagValue), - tracker, + createTracker: trackerFactory, messages: flagValue.messages, evaluationMetricKey, }; diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts index 44b89160c8..56a54d1ca2 100644 --- a/packages/sdk/server-ai/src/api/config/types.ts +++ b/packages/sdk/server-ai/src/api/config/types.ts @@ -105,10 +105,11 @@ export interface LDAIConfig extends Omit { enabled: boolean; /** - * A tracker which can be used to generate analytics. - * Undefined for disabled configs. + * Creates a new tracker for this AI Config invocation. Each call returns a + * new tracker with a fresh runId. Use createTracker() at the start of each + * execution to obtain a tracker, then use it to record metrics for that run. */ - tracker?: LDAIConfigTracker; + createTracker?: () => LDAIConfigTracker; } // ============================================================================ diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index 382addc632..1bab8d1a12 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -21,7 +21,6 @@ export class Judge { constructor( private readonly _aiConfig: LDAIJudgeConfig, - private readonly _aiConfigTracker: LDAIConfigTracker, private readonly _aiProvider: AIProvider, logger?: LDLogger, ) { @@ -65,21 +64,19 @@ export class Judge { output: string, samplingRate: number = 1, ): Promise { + const tracker = this._aiConfig.createTracker!(); try { const evaluationMetricKey = this._getEvaluationMetricKey(); if (!evaluationMetricKey) { this._logger?.warn( 'Judge configuration is missing required evaluation metric key', - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return undefined; } if (!this._aiConfig.messages) { - this._logger?.warn( - 'Judge configuration must include messages', - this._aiConfigTracker.getTrackData(), - ); + this._logger?.warn('Judge configuration must include messages', tracker.getTrackData()); return undefined; } @@ -90,19 +87,19 @@ export class Judge { const messages = this._constructEvaluationMessages(input, output); - const response = await this._aiConfigTracker.trackMetricsOf( + const response = await tracker.trackMetricsOf( (result: StructuredResponse) => result.metrics, () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure), ); let { success } = response.metrics; - const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey); + const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker); if (!evals[evaluationMetricKey]) { this._logger?.warn( 'Judge evaluation did not return the expected evaluation', - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); success = false; } @@ -149,13 +146,6 @@ export class Judge { return this._aiConfig; } - /** - * Returns the tracker associated with this judge. - */ - getTracker(): LDAIConfigTracker { - return this._aiConfigTracker; - } - /** * Returns the AI provider used by this judge. */ @@ -191,6 +181,7 @@ export class Judge { private _parseEvaluationResponse( data: Record, evaluationMetricKey: string, + tracker: LDAIConfigTracker, ): Record { const evaluations = data.evaluations as Record; const results: Record = {}; @@ -205,7 +196,7 @@ export class Judge { if (!evaluation || typeof evaluation !== 'object') { this._logger?.warn( `Missing evaluation for metric key: ${evaluationMetricKey}`, - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return results; } @@ -215,7 +206,7 @@ export class Judge { if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) { this._logger?.warn( `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`, - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return results; } @@ -223,7 +214,7 @@ export class Judge { if (typeof evalData.reasoning !== 'string') { this._logger?.warn( `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`, - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return results; }