From 7745852fd540ccd415f95ccb0dcf571b3f2c0310 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 14 Apr 2026 16:46:02 -0500 Subject: [PATCH] fix: Add support for graph metric tracking (#1269) --- .../__tests__/LDAIConfigTrackerImpl.test.ts | 145 ++++++ .../__tests__/LDGraphTrackerImpl.test.ts | 418 ++++++++++++++++++ .../server-ai/src/LDAIConfigTrackerImpl.ts | 106 +++-- .../sdk/server-ai/src/LDGraphTrackerImpl.ts | 119 +++++ .../src/api/config/LDAIConfigTracker.ts | 54 ++- .../server-ai/src/api/graph/LDGraphTracker.ts | 126 ++++++ packages/sdk/server-ai/src/api/graph/index.ts | 1 + packages/sdk/server-ai/src/api/index.ts | 1 + packages/sdk/server-ai/src/index.ts | 1 + 9 files changed, 928 insertions(+), 43 deletions(-) create mode 100644 packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts create mode 100644 packages/sdk/server-ai/src/LDGraphTrackerImpl.ts create mode 100644 packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts create mode 100644 packages/sdk/server-ai/src/api/graph/index.ts diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index 1fbe25538c..5ea65c4c93 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -877,3 +877,148 @@ describe('trackJudgeResponse', () => { ); }); }); + +describe('trackToolCall', () => { + it('tracks a single tool call', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCall('my-tool'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'my-tool' }, + 1, + ); + }); + + it('includes graphKey when provided', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCall('my-tool', 'my-graph'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), graphKey: 'my-graph', toolKey: 'my-tool' }, + 1, + ); + }); +}); + +describe('trackToolCalls', () => { + it('tracks multiple tool calls', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCalls(['tool-a', 'tool-b', 'tool-c']); + + expect(mockTrack).toHaveBeenCalledTimes(3); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-a' }, + 1, + ); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-b' }, + 1, + ); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-c' }, + 1, + ); + }); +}); + +describe('graphKey parameter support', () => { + it('includes graphKey in trackDuration event', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackDuration(1000, 'my-graph'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:duration:total', + testContext, + { ...getExpectedTrackData(), graphKey: 'my-graph' }, + 1000, + ); + }); + + it('includes graphKey in trackSuccess event', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackSuccess('my-graph'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:generation:success', + testContext, + { ...getExpectedTrackData(), graphKey: 'my-graph' }, + 1, + ); + }); + + it('does not include graphKey when not provided', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackSuccess(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:generation:success', + testContext, + getExpectedTrackData(), + 1, + ); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts new file mode 100644 index 0000000000..fe42bf4e4d --- /dev/null +++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts @@ -0,0 +1,418 @@ +import { LDContext } from '@launchdarkly/js-server-sdk-common'; + +import { LDClientMin } from '../src/LDClientMin'; +import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl'; + +const mockTrack = jest.fn(); +const mockLdClient: LDClientMin = { + track: mockTrack, + variation: jest.fn(), +}; + +const testContext: LDContext = { kind: 'user', key: 'test-user' }; +const graphKey = 'test-graph'; +const variationKey = 'v1'; +const version = 2; + +const getExpectedTrackData = () => ({ + graphKey, + variationKey, + version, +}); + +beforeEach(() => { + jest.clearAllMocks(); +}); + +it('returns track data', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + + expect(tracker.getTrackData()).toEqual(getExpectedTrackData()); +}); + +it('tracks invocation success', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationSuccess(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:invocation_success', + testContext, + getExpectedTrackData(), + 1, + ); +}); + +it('tracks invocation failure', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationFailure(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:invocation_failure', + testContext, + getExpectedTrackData(), + 1, + ); +}); + +it('tracks latency', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackLatency(1500); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:latency', + testContext, + getExpectedTrackData(), + 1500, + ); +}); + +it('tracks total tokens', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackTotalTokens({ total: 200, input: 80, output: 120 }); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:total_tokens', + testContext, + getExpectedTrackData(), + 200, + ); +}); + +it('does not track total tokens when total is zero', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackTotalTokens({ total: 0, input: 0, output: 0 }); + + expect(mockTrack).not.toHaveBeenCalled(); +}); + +it('tracks path', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + const path = ['node-a', 'node-b', 'node-c']; + tracker.trackPath(path); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:path', + testContext, + { ...getExpectedTrackData(), path }, + 1, + ); +}); + +it('tracks judge response', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + const response = { + judgeConfigKey: 'my-judge', + evals: { + relevance: { score: 0.9, reasoning: 'Relevant' }, + accuracy: { score: 0.85, reasoning: 'Accurate' }, + }, + success: true, + }; + tracker.trackJudgeResponse(response); + + expect(mockTrack).toHaveBeenCalledWith( + 'relevance', + testContext, + { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, + 0.9, + ); + expect(mockTrack).toHaveBeenCalledWith( + 'accuracy', + testContext, + { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, + 0.85, + ); +}); + +it('tracks judge response without judgeConfigKey', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + const response = { + evals: { relevance: { score: 0.7, reasoning: 'Somewhat relevant' } }, + success: true, + }; + tracker.trackJudgeResponse(response); + + expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7); +}); + +it('tracks redirect', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackRedirect('agent-a', 'agent-b'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:redirect', + testContext, + { ...getExpectedTrackData(), sourceKey: 'agent-a', redirectedTarget: 'agent-b' }, + 1, + ); +}); + +it('tracks handoff success', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffSuccess('agent-a', 'agent-b'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:handoff_success', + testContext, + { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, + 1, + ); +}); + +it('tracks handoff failure', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffFailure('agent-a', 'agent-b'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:handoff_failure', + testContext, + { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, + 1, + ); +}); + +it('returns empty summary when no metrics tracked', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + + expect(tracker.getSummary()).toEqual({}); +}); + +it('summarizes tracked graph metrics', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + + tracker.trackInvocationSuccess(); + tracker.trackLatency(2000); + tracker.trackTotalTokens({ total: 300, input: 100, output: 200 }); + tracker.trackPath(['node-a', 'node-b']); + + expect(tracker.getSummary()).toEqual({ + success: true, + durationMs: 2000, + tokens: { total: 300, input: 100, output: 200 }, + path: ['node-a', 'node-b'], + }); +}); + +describe('at-most-once semantics for graph-level metrics', () => { + it('drops duplicate trackInvocationSuccess calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationSuccess(); + tracker.trackInvocationSuccess(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + }); + + it('drops trackInvocationFailure after trackInvocationSuccess', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationSuccess(); + tracker.trackInvocationFailure(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:invocation_success', + expect.anything(), + expect.anything(), + expect.anything(), + ); + }); + + it('drops duplicate trackLatency calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackLatency(1000); + tracker.trackLatency(2000); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:latency', + testContext, + getExpectedTrackData(), + 1000, + ); + }); + + it('drops duplicate trackTotalTokens calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackTotalTokens({ total: 100, input: 40, output: 60 }); + tracker.trackTotalTokens({ total: 200, input: 80, output: 120 }); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:total_tokens', + testContext, + getExpectedTrackData(), + 100, + ); + }); + + it('drops duplicate trackPath calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackPath(['node-a']); + tracker.trackPath(['node-b', 'node-c']); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:path', + testContext, + { ...getExpectedTrackData(), path: ['node-a'] }, + 1, + ); + }); +}); + +describe('edge-level methods can be called multiple times', () => { + it('allows multiple trackRedirect calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackRedirect('a', 'b'); + tracker.trackRedirect('b', 'c'); + + expect(mockTrack).toHaveBeenCalledTimes(2); + }); + + it('allows multiple trackHandoffSuccess calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffSuccess('a', 'b'); + tracker.trackHandoffSuccess('b', 'c'); + + expect(mockTrack).toHaveBeenCalledTimes(2); + }); + + it('allows multiple trackHandoffFailure calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffFailure('a', 'b'); + tracker.trackHandoffFailure('b', 'c'); + + expect(mockTrack).toHaveBeenCalledTimes(2); + }); +}); diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index a81f8e0721..adda7c97c8 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -26,12 +26,13 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { private _context: LDContext, ) {} - getTrackData(): { + getTrackData(graphKey?: string): { variationKey: string; configKey: string; version: number; modelName: string; providerName: string; + graphKey?: string; } { return { variationKey: this._variationKey, @@ -39,15 +40,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { version: this._version, modelName: this._modelName, providerName: this._providerName, + ...(graphKey !== undefined ? { graphKey } : {}), }; } - trackDuration(duration: number): void { + trackDuration(duration: number, graphKey?: string): void { this._trackedMetrics.durationMs = duration; - this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration); + this._ldClient.track( + '$ld:ai:duration:total', + this._context, + this.getTrackData(graphKey), + duration, + ); } - async trackDurationOf(func: () => Promise): Promise { + async trackDurationOf(func: () => Promise, graphKey?: string): Promise { const startTime = Date.now(); try { // Be sure to await here so that we can track the duration of the function and also handle errors. @@ -56,66 +63,97 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } finally { const endTime = Date.now(); const duration = endTime - startTime; // duration in milliseconds - this.trackDuration(duration); + this.trackDuration(duration, graphKey); } } - trackTimeToFirstToken(timeToFirstTokenMs: number) { + trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) { this._trackedMetrics.timeToFirstTokenMs = timeToFirstTokenMs; this._ldClient.track( '$ld:ai:tokens:ttf', this._context, - this.getTrackData(), + this.getTrackData(graphKey), timeToFirstTokenMs, ); } - trackEvalScores(scores: Record) { + trackEvalScores(scores: Record, graphKey?: string) { Object.entries(scores).forEach(([metricKey, evalScore]) => { - this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score); + this._ldClient.track(metricKey, this._context, this.getTrackData(graphKey), evalScore.score); }); } - trackJudgeResponse(response: JudgeResponse) { + trackJudgeResponse(response: JudgeResponse, graphKey?: string) { Object.entries(response.evals).forEach(([metricKey, evalScore]) => { this._ldClient.track( metricKey, this._context, - { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey }, + { ...this.getTrackData(graphKey), judgeConfigKey: response.judgeConfigKey }, evalScore.score, ); }); } - trackFeedback(feedback: { kind: LDFeedbackKind }): void { + trackToolCall(toolKey: string, graphKey?: string): void { + this._ldClient.track( + '$ld:ai:tool_call', + this._context, + { ...this.getTrackData(graphKey), toolKey }, + 1, + ); + } + + trackToolCalls(toolKeys: string[], graphKey?: string): void { + toolKeys.forEach((toolKey) => { + this.trackToolCall(toolKey, graphKey); + }); + } + + trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void { this._trackedMetrics.feedback = feedback; if (feedback.kind === LDFeedbackKind.Positive) { - this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1); + this._ldClient.track( + '$ld:ai:feedback:user:positive', + this._context, + this.getTrackData(graphKey), + 1, + ); } else if (feedback.kind === LDFeedbackKind.Negative) { - this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1); + this._ldClient.track( + '$ld:ai:feedback:user:negative', + this._context, + this.getTrackData(graphKey), + 1, + ); } } - trackSuccess(): void { + trackSuccess(graphKey?: string): void { this._trackedMetrics.success = true; - this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1); + this._ldClient.track( + '$ld:ai:generation:success', + this._context, + this.getTrackData(graphKey), + 1, + ); } - trackError(): void { + trackError(graphKey?: string): void { this._trackedMetrics.success = false; - this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1); + this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1); } async trackMetricsOf( metricsExtractor: (result: TRes) => LDAIMetrics, func: () => Promise, + graphKey?: string, ): Promise { let result: TRes; try { - result = await this.trackDurationOf(func); + result = await this.trackDurationOf(func, graphKey); } catch (err) { - this.trackError(); + this.trackError(graphKey); throw err; } @@ -124,14 +162,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { // Track success/error based on metrics if (metrics.success) { - this.trackSuccess(); + this.trackSuccess(graphKey); } else { - this.trackError(); + this.trackError(graphKey); } // Track token usage if available if (metrics.usage) { - this.trackTokens(metrics.usage); + this.trackTokens(metrics.usage, graphKey); } return result; @@ -140,6 +178,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { trackStreamMetricsOf( streamCreator: () => TStream, metricsExtractor: (stream: TStream) => Promise, + graphKey?: string, ): TStream { const startTime = Date.now(); @@ -148,14 +187,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { const stream = streamCreator(); // Start background metrics tracking (fire and forget) - this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime); + this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime, graphKey); // Return stream immediately for consumption return stream; } catch (error) { // Track error if stream creation fails - this.trackDuration(Date.now() - startTime); - this.trackError(); + this.trackDuration(Date.now() - startTime, graphKey); + this.trackError(graphKey); throw error; } } @@ -164,6 +203,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { stream: TStream, metricsExtractor: (stream: TStream) => Promise, startTime: number, + graphKey?: string, ): Promise { try { // Wait for metrics to be available @@ -171,21 +211,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { // Track success/error based on metrics if (metrics.success) { - this.trackSuccess(); + this.trackSuccess(graphKey); } else { - this.trackError(); + this.trackError(graphKey); } // Track token usage if available if (metrics.usage) { - this.trackTokens(metrics.usage); + this.trackTokens(metrics.usage, graphKey); } } catch (error) { // If metrics extraction fails, track error - this.trackError(); + this.trackError(graphKey); } finally { // Track duration regardless of success/error - this.trackDuration(Date.now() - startTime); + this.trackDuration(Date.now() - startTime, graphKey); } } @@ -260,9 +300,9 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } } - trackTokens(tokens: LDTokenUsage): void { + trackTokens(tokens: LDTokenUsage, graphKey?: string): void { this._trackedMetrics.tokens = tokens; - const trackData = this.getTrackData(); + const trackData = this.getTrackData(graphKey); if (tokens.total > 0) { this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total); } diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts new file mode 100644 index 0000000000..4c08e26a58 --- /dev/null +++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts @@ -0,0 +1,119 @@ +import { LDContext } from '@launchdarkly/js-server-sdk-common'; + +import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker'; +import { JudgeResponse } from './api/judge/types'; +import { LDTokenUsage } from './api/metrics'; +import { LDClientMin } from './LDClientMin'; + +export class LDGraphTrackerImpl implements LDGraphTracker { + private _trackedMetrics: LDGraphMetricSummary = {}; + + constructor( + private _ldClient: LDClientMin, + private _graphKey: string, + private _variationKey: string, + private _version: number, + private _context: LDContext, + ) {} + + getTrackData(): { + variationKey: string; + graphKey: string; + version: number; + } { + return { + variationKey: this._variationKey, + graphKey: this._graphKey, + version: this._version, + }; + } + + trackInvocationSuccess(): void { + if (this._trackedMetrics.success !== undefined) { + return; + } + this._trackedMetrics.success = true; + this._ldClient.track('$ld:ai:graph:invocation_success', this._context, this.getTrackData(), 1); + } + + trackInvocationFailure(): void { + if (this._trackedMetrics.success !== undefined) { + return; + } + this._trackedMetrics.success = false; + this._ldClient.track('$ld:ai:graph:invocation_failure', this._context, this.getTrackData(), 1); + } + + trackLatency(durationMs: number): void { + if (this._trackedMetrics.durationMs !== undefined) { + return; + } + this._trackedMetrics.durationMs = durationMs; + this._ldClient.track('$ld:ai:graph:latency', this._context, this.getTrackData(), durationMs); + } + + trackTotalTokens(tokens: LDTokenUsage): void { + if (this._trackedMetrics.tokens !== undefined) { + return; + } + if (tokens.total <= 0) { + return; + } + this._trackedMetrics.tokens = tokens; + this._ldClient.track( + '$ld:ai:graph:total_tokens', + this._context, + this.getTrackData(), + tokens.total, + ); + } + + trackPath(path: string[]): void { + if (this._trackedMetrics.path !== undefined) { + return; + } + this._trackedMetrics.path = path; + this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1); + } + + trackJudgeResponse(response: JudgeResponse): void { + const trackData = response.judgeConfigKey + ? { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey } + : this.getTrackData(); + + Object.entries(response.evals).forEach(([metricKey, evalScore]) => { + this._ldClient.track(metricKey, this._context, trackData, evalScore.score); + }); + } + + trackRedirect(sourceKey: string, redirectedTarget: string): void { + this._ldClient.track( + '$ld:ai:graph:redirect', + this._context, + { ...this.getTrackData(), sourceKey, redirectedTarget }, + 1, + ); + } + + trackHandoffSuccess(sourceKey: string, targetKey: string): void { + this._ldClient.track( + '$ld:ai:graph:handoff_success', + this._context, + { ...this.getTrackData(), sourceKey, targetKey }, + 1, + ); + } + + trackHandoffFailure(sourceKey: string, targetKey: string): void { + this._ldClient.track( + '$ld:ai:graph:handoff_failure', + this._context, + { ...this.getTrackData(), sourceKey, targetKey }, + 1, + ); + } + + getSummary(): LDGraphMetricSummary { + return { ...this._trackedMetrics }; + } +} diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 41ff0e20a1..3a40fd3c6d 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -37,13 +37,16 @@ export interface LDAIMetricSummary { export interface LDAIConfigTracker { /** * Get the data for tracking. + * + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - getTrackData(): { + getTrackData(graphKey?: string): { variationKey: string; configKey: string; version: number; modelName: string; providerName: string; + graphKey?: string; }; /** * Track the duration of generation. @@ -51,53 +54,79 @@ export interface LDAIConfigTracker { * Ideally this would not include overhead time such as network communication. * * @param durationMs The duration in milliseconds. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackDuration(durationMs: number): void; + trackDuration(durationMs: number, graphKey?: string): void; /** * Track information about token usage. * * @param tokens Token usage information. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackTokens(tokens: LDTokenUsage): void; + trackTokens(tokens: LDTokenUsage, graphKey?: string): void; /** * Generation was successful. + * + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackSuccess(): void; + trackSuccess(graphKey?: string): void; /** * An error was encountered during generation. + * + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackError(): void; + trackError(graphKey?: string): void; /** * Track sentiment about the generation. * * @param feedback Feedback about the generation. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackFeedback(feedback: { kind: LDFeedbackKind }): void; + trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void; /** * Track the time to first token for this generation. * * @param timeToFirstTokenMs The duration in milliseconds. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackTimeToFirstToken(timeToFirstTokenMs: number): void; + trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string): void; /** * Track evaluation scores for multiple metrics. * * @param scores Record mapping metric keys to their evaluation scores + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackEvalScores(scores: Record): void; + trackEvalScores(scores: Record, graphKey?: string): void; /** * Track a judge response containing evaluation scores and judge configuration key. * * @param response Judge response containing evaluation scores and judge configuration key + * @param graphKey When provided, associates this metric with the specified agent graph key. + */ + trackJudgeResponse(response: JudgeResponse, graphKey?: string): void; + + /** + * Track a single tool invocation. + * + * @param toolKey The identifier of the tool that was invoked. + * @param graphKey When provided, associates this metric with the specified agent graph key. + */ + trackToolCall(toolKey: string, graphKey?: string): void; + + /** + * Track multiple tool invocations. + * + * @param toolKeys The identifiers of the tools that were invoked. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackJudgeResponse(response: JudgeResponse): void; + trackToolCalls(toolKeys: string[], graphKey?: string): void; /** * Track the duration of execution of the provided function. @@ -108,9 +137,10 @@ export interface LDAIConfigTracker { * This function does not automatically record an error when the function throws. * * @param func The function to track the duration of. + * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The result of the function. */ - trackDurationOf(func: () => Promise): Promise; + trackDurationOf(func: () => Promise, graphKey?: string): Promise; /** * Track metrics for a generic AI operation. @@ -124,11 +154,13 @@ export interface LDAIConfigTracker { * * @param metricsExtractor Function that extracts LDAIMetrics from the operation result * @param func Function which executes the operation + * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The result of the operation */ trackMetricsOf( metricsExtractor: (result: TRes) => LDAIMetrics, func: () => Promise, + graphKey?: string, ): Promise; /** @@ -150,11 +182,13 @@ export interface LDAIConfigTracker { * * @param streamCreator Function that creates and returns the stream (synchronous) * @param metricsExtractor Function that asynchronously extracts metrics from the stream + * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The stream result (returned immediately, not a Promise) */ trackStreamMetricsOf( streamCreator: () => TStream, metricsExtractor: (stream: TStream) => Promise, + graphKey?: string, ): TStream; /** diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts new file mode 100644 index 0000000000..94cf30658f --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts @@ -0,0 +1,126 @@ +import { JudgeResponse } from '../judge/types'; +import { LDTokenUsage } from '../metrics'; + +/** + * Metrics tracked at the graph level. + */ +export interface LDGraphMetricSummary { + /** + * True if the graph invocation succeeded, false if it failed, absent if not tracked. + */ + success?: boolean; + + /** + * Total graph execution duration in milliseconds, if tracked. + */ + durationMs?: number; + + /** + * Aggregated token usage across the entire graph invocation, if tracked. + */ + tokens?: LDTokenUsage; + + /** + * Execution path through the graph as an array of config keys, if tracked. + */ + path?: string[]; +} + +/** + * Tracker for graph-level and edge-level metrics in AI agent graph operations. + * + * Node-level metrics are tracked via each node's {@link LDAIConfigTracker}. + */ +export interface LDGraphTracker { + /** + * Get the data for tracking. + */ + getTrackData(): { + variationKey: string; + graphKey: string; + version: number; + }; + + /** + * Track a successful graph invocation. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + */ + trackInvocationSuccess(): void; + + /** + * Track an unsuccessful graph invocation. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + */ + trackInvocationFailure(): void; + + /** + * Track the total latency of graph execution. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + * + * @param durationMs Duration in milliseconds. + */ + trackLatency(durationMs: number): void; + + /** + * Track aggregated token usage across the entire graph invocation. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + * + * @param tokens Token usage information. + */ + trackTotalTokens(tokens: LDTokenUsage): void; + + /** + * Track the execution path through the graph. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + * + * @param path Array of config keys representing the sequence of nodes executed. + */ + trackPath(path: string[]): void; + + /** + * Track judge responses for the final graph output. + * + * @param response Judge response containing evaluation scores. + */ + trackJudgeResponse(response: JudgeResponse): void; + + /** + * Track when a node redirects to a different target than originally specified. + * + * May be called multiple times. + * + * @param sourceKey Config key of the source node. + * @param redirectedTarget Config key of the target node that was redirected to. + */ + trackRedirect(sourceKey: string, redirectedTarget: string): void; + + /** + * Track a successful handoff between nodes. + * + * May be called multiple times. + * + * @param sourceKey Config key of the source node. + * @param targetKey Config key of the target node. + */ + trackHandoffSuccess(sourceKey: string, targetKey: string): void; + + /** + * Track a failed handoff between nodes. + * + * May be called multiple times. + * + * @param sourceKey Config key of the source node. + * @param targetKey Config key of the target node. + */ + trackHandoffFailure(sourceKey: string, targetKey: string): void; + + /** + * Get a summary of the tracked graph-level metrics. + */ + getSummary(): LDGraphMetricSummary; +} diff --git a/packages/sdk/server-ai/src/api/graph/index.ts b/packages/sdk/server-ai/src/api/graph/index.ts new file mode 100644 index 0000000000..536e630115 --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/index.ts @@ -0,0 +1 @@ +export * from './LDGraphTracker'; diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts index 2f289b8356..7470ef740c 100644 --- a/packages/sdk/server-ai/src/api/index.ts +++ b/packages/sdk/server-ai/src/api/index.ts @@ -1,5 +1,6 @@ export * from './config'; export * from './chat'; +export * from './graph'; export * from './judge'; export * from './metrics'; export * from './LDAIClient'; diff --git a/packages/sdk/server-ai/src/index.ts b/packages/sdk/server-ai/src/index.ts index 7c1bb54b3d..8bb6c11808 100644 --- a/packages/sdk/server-ai/src/index.ts +++ b/packages/sdk/server-ai/src/index.ts @@ -26,3 +26,4 @@ export function initAi(ldClient: LDClientMin): LDAIClient { export type LDLogger = common.LDLogger; export * from './api'; +export { LDGraphTrackerImpl } from './LDGraphTrackerImpl';