feat: introduce ManagedResult, RunnerResult, and LDAIMetricSummary (AIC-2388)

jsonbailey · claude · jsonbailey · commit 192315fa3a30 · 2026-04-28T18:14:12.000-05:00
Adds RunnerResult (provider-level result type without evaluations), ManagedResult
(managed-layer result with async evaluations promise), and LDAIMetricSummary (flat
metric summary including resumptionToken). Adds toolCalls and durationMs to
LDAIMetrics. TrackedChat.run() replaces invoke() returning ManagedResult with
LDAIMetricSummary built from tracker. Adds createModel() to LDAIClient/LDAIClientImpl
as the preferred replacement for createChat(). Updates chat-judge example.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/packages/sdk/server-ai/examples/chat-judge/src/index.ts b/packages/sdk/server-ai/examples/chat-judge/src/index.ts
@@ -43,11 +43,11 @@ async function main() {
       enabled: false,
     };
 
-    const chat = await aiClient.createChat(aiConfigKey, context, defaultValue, {
+    const model = await aiClient.createModel(aiConfigKey, context, defaultValue, {
       companyName: 'LaunchDarkly',
     });
 
-    if (!chat) {
+    if (!model) {
       console.log('*** AI chat configuration is not enabled');
       process.exit(0);
     }
@@ -56,15 +56,14 @@ async function main() {
     const userInput = 'How can LaunchDarkly help me?';
     console.log('User Input:', userInput);
 
-    // The invoke method will automatically evaluate the chat response with any judges defined
-    // in the AI config.
-    const chatResponse = await chat.invoke(userInput);
-    console.log('Chat Response:', chatResponse.message.content);
+    // The run() method invokes the model and returns a ManagedResult.
+    const result = await model.run(userInput);
+    console.log('Chat Response:', result.content);
 
     // Judge evaluations run asynchronously and do not block your application.
     // Results are automatically sent to LaunchDarkly for AI config metrics.
     // You only need to await if you want to access the evaluation results in your code.
-    const evalResults = await chatResponse.evaluations;
+    const evalResults = await result.evaluations;
     console.log('Judge results:', JSON.stringify(evalResults, null, 2));
 
     console.log('Success.');
diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts
@@ -410,6 +410,16 @@ export class LDAIClientImpl implements LDAIClient {
     }
   }
 
+  async createModel(
+    key: string,
+    context: LDContext,
+    defaultValue?: LDAICompletionConfigDefault,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<TrackedChat | undefined> {
+    return this.createChat(key, context, defaultValue, variables, defaultAiProvider);
+  }
+
   /**
    * @deprecated Use `createChat` instead. This method will be removed in a future version.
    */
diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts
@@ -276,6 +276,25 @@ export interface LDAIClient {
     defaultAiProvider?: SupportedAIProvider,
   ): Promise<TrackedChat | undefined>;
 
+  /**
+   * Creates and returns a new ManagedModel (TrackedChat) instance for chat interactions.
+   * This is the preferred replacement for `createChat()`.
+   *
+   * @param key The key identifying the AI chat configuration to use.
+   * @param context The standard LDContext used when evaluating flags.
+   * @param defaultValue Optional fallback when the configuration is not available from LaunchDarkly.
+   * @param variables Dictionary of values for instruction interpolation.
+   * @param defaultAiProvider Optional default AI provider to use.
+   * @returns A promise that resolves to the TrackedChat instance, or undefined if disabled.
+   */
+  createModel(
+    key: string,
+    context: LDContext,
+    defaultValue?: LDAICompletionConfigDefault,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<TrackedChat | undefined>;
+
   /**
    * @deprecated Use `createChat` instead. This method will be removed in a future version.
    */
diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
@@ -3,6 +3,7 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common';
 import { LDAICompletionConfig, LDMessage } from '../config/types';
 import { Judge } from '../judge/Judge';
 import { LDJudgeResult } from '../judge/types';
+import { LDAIMetricSummary, ManagedResult } from '../model/types';
 import { AIProvider } from '../providers/AIProvider';
 import { ChatResponse } from './types';
 
@@ -24,9 +25,56 @@ export class TrackedChat {
     this.messages = [];
   }
 
+  /**
+   * Invoke the chat model with a prompt string and return a ManagedResult.
+   * This is the primary entry point for model invocation. Judge evaluations are
+   * wired asynchronously and exposed via ManagedResult.evaluations.
+   */
+  async run(prompt: string): Promise<ManagedResult> {
+    const tracker = this.aiConfig.createTracker!();
+
+    // Convert prompt string to LDMessage with role 'user' and add to conversation history
+    const userMessage: LDMessage = {
+      role: 'user',
+      content: prompt,
+    };
+    this.messages.push(userMessage);
+
+    // Prepend config messages to conversation history for model invocation
+    const configMessages = this.aiConfig.messages || [];
+    const allMessages = [...configMessages, ...this.messages];
+
+    // Delegate to provider-specific implementation with tracking
+    const response = await tracker.trackMetricsOf(
+      (result: ChatResponse) => result.metrics,
+      () => this.provider.invokeModel(allMessages),
+    );
+
+    this.messages.push(response.message);
+
+    // Build the metric summary from response metrics + resumption token
+    const metrics: LDAIMetricSummary = {
+      success: response.metrics.success,
+      usage: response.metrics.usage,
+      toolCalls: response.metrics.toolCalls,
+      durationMs: response.metrics.durationMs,
+      resumptionToken: tracker.resumptionToken,
+    };
+
+    // Evaluations are wired in the managed layer (PR 3). For now, resolve empty.
+    const evaluations: Promise<LDJudgeResult[]> = Promise.resolve([]);
+
+    return {
+      content: response.message.content,
+      metrics,
+      evaluations,
+    };
+  }
+
   /**
    * Invoke the chat model with a prompt string.
    * This method handles conversation management and tracking, delegating to the provider's invokeModel method.
+   * @deprecated Use `run()` instead.
    */
   async invoke(prompt: string): Promise<ChatResponse> {
     const tracker = this.aiConfig.createTracker!();
diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts
@@ -3,5 +3,6 @@ export * from './chat';
 export * from './graph';
 export * from './judge';
 export * from './metrics';
+export * from './model';
 export * from './LDAIClient';
 export * from './providers';
diff --git a/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts b/packages/sdk/server-ai/src/api/metrics/LDAIMetrics.ts
@@ -15,4 +15,16 @@ export interface LDAIMetrics {
    * This will be undefined if no token usage data is available.
    */
   usage?: LDTokenUsage;
+
+  /**
+   * List of tool call identifiers made during the operation.
+   * This will be undefined if no tool calls were made.
+   */
+  toolCalls?: string[];
+
+  /**
+   * Duration of the operation in milliseconds.
+   * This will be undefined if duration was not tracked.
+   */
+  durationMs?: number;
 }
diff --git a/packages/sdk/server-ai/src/api/model/index.ts b/packages/sdk/server-ai/src/api/model/index.ts
@@ -0,0 +1 @@
+export type { LDAIMetricSummary, ManagedResult, RunnerResult } from './types';
diff --git a/packages/sdk/server-ai/src/api/model/types.ts b/packages/sdk/server-ai/src/api/model/types.ts
@@ -0,0 +1,95 @@
+import { LDJudgeResult } from '../judge/types';
+import { LDAIMetrics } from '../metrics/LDAIMetrics';
+import { LDTokenUsage } from '../metrics/LDTokenUsage';
+
+/**
+ * Summary metrics returned in a ManagedResult or ManagedGraphResult.
+ * Provides a flat view of the key metrics for the completed operation.
+ */
+export interface LDAIMetricSummary {
+  /**
+   * Whether the AI operation was successful.
+   */
+  success: boolean;
+
+  /**
+   * Token usage information, if available.
+   */
+  usage?: LDTokenUsage;
+
+  /**
+   * List of tool call identifiers made during the operation, if any.
+   */
+  toolCalls?: string[];
+
+  /**
+   * Duration of the operation in milliseconds, if tracked.
+   */
+  durationMs?: number;
+
+  /**
+   * Resumption token for deferred feedback association.
+   */
+  resumptionToken?: string;
+}
+
+/**
+ * The result returned by a Runner (provider-level) invocation.
+ * Providers implement Runner and return RunnerResult from run().
+ * This type does NOT include evaluations — those are wired in the managed layer.
+ */
+export interface RunnerResult {
+  /**
+   * The text content of the model's response.
+   */
+  content: string;
+
+  /**
+   * Metrics information for the operation.
+   */
+  metrics: LDAIMetrics;
+
+  /**
+   * The raw response object from the provider, if available.
+   */
+  raw?: unknown;
+
+  /**
+   * Parsed structured output, if the provider returned structured data.
+   */
+  parsed?: Record<string, unknown>;
+}
+
+/**
+ * The result returned by a managed model invocation (ManagedModel.run()).
+ * Includes a promise for asynchronous judge evaluations.
+ */
+export interface ManagedResult {
+  /**
+   * The text content of the model's response.
+   */
+  content: string;
+
+  /**
+   * Summarized metrics for this invocation.
+   */
+  metrics: LDAIMetricSummary;
+
+  /**
+   * The raw response object from the provider, if available.
+   */
+  raw?: unknown;
+
+  /**
+   * Parsed structured output, if available.
+   */
+  parsed?: Record<string, unknown>;
+
+  /**
+   * Promise that resolves to the judge evaluation results.
+   * This promise encapsulates both evaluation and tracking
+   * (tracker.trackJudgeResult is called when it resolves).
+   * Awaiting this promise guarantees both evaluation and tracking are complete.
+   */
+  evaluations: Promise<LDJudgeResult[]>;
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+export type { LDAIMetricSummary, ManagedResult, RunnerResult } from './types';`