feat: Support conversation history directly in AI Provider model runners (#1371)

jsonbailey · web-flow · commit b246631bfcaf · 2026-05-14T15:00:52.000-05:00
diff --git a/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts b/packages/ai-providers/server-ai-langchain/__tests__/LangChainModelRunner.test.ts
@@ -115,4 +115,69 @@ describe('LangChainModelRunner', () => {
   it('returns the underlying chat model', () => {
     expect(runner.getChatModel()).toBe(mockLLM);
   });
+
+  describe('conversation history', () => {
+    it('accumulates history across successful calls', async () => {
+      mockLLM.invoke
+        .mockResolvedValueOnce(new AIMessage('First response'))
+        .mockResolvedValueOnce(new AIMessage('Second response'));
+
+      await runner.run('First question');
+      await runner.run('Second question');
+
+      const secondCallMessages = mockLLM.invoke.mock.calls[1][0];
+      const roles = secondCallMessages.map((m: any) => m.constructor.name);
+      expect(roles).toEqual(['HumanMessage', 'AIMessage', 'HumanMessage']);
+      expect(secondCallMessages[0].content).toBe('First question');
+      expect(secondCallMessages[1].content).toBe('First response');
+      expect(secondCallMessages[2].content).toBe('Second question');
+    });
+
+    it('does not accumulate history when the call throws', async () => {
+      mockLLM.invoke.mockRejectedValueOnce(new Error('Model error'));
+      await runner.run('Hello');
+
+      mockLLM.invoke.mockResolvedValueOnce(new AIMessage('Recovery'));
+      await runner.run('Try again');
+
+      const secondCallMessages = mockLLM.invoke.mock.calls[1][0];
+      expect(secondCallMessages).toHaveLength(1);
+      expect(secondCallMessages[0].content).toBe('Try again');
+    });
+
+    it('does not accumulate history when content is empty (multimodal)', async () => {
+      mockLLM.invoke.mockResolvedValueOnce(new AIMessage([{ type: 'image' }] as any));
+      await runner.run('Hello');
+
+      mockLLM.invoke.mockResolvedValueOnce(new AIMessage('Recovery'));
+      await runner.run('Try again');
+
+      const secondCallMessages = mockLLM.invoke.mock.calls[1][0];
+      expect(secondCallMessages).toHaveLength(1);
+      expect(secondCallMessages[0].content).toBe('Try again');
+    });
+
+    it('keeps config messages prepended ahead of accumulated history on every call', async () => {
+      const configWithMessages: LDAICompletionConfig = {
+        ...baseConfig,
+        messages: [{ role: 'system', content: 'You are helpful.' }],
+      };
+      const r = new LangChainModelRunner(mockLLM, configWithMessages, mockLogger);
+
+      mockLLM.invoke
+        .mockResolvedValueOnce(new AIMessage('Answer 1'))
+        .mockResolvedValueOnce(new AIMessage('Answer 2'));
+
+      await r.run('Q1');
+      await r.run('Q2');
+
+      const secondCallMessages = mockLLM.invoke.mock.calls[1][0];
+      expect(secondCallMessages).toHaveLength(4);
+      expect(secondCallMessages[0].constructor.name).toBe('SystemMessage');
+      expect(secondCallMessages[0].content).toBe('You are helpful.');
+      expect(secondCallMessages[1].content).toBe('Q1');
+      expect(secondCallMessages[2].content).toBe('Answer 1');
+      expect(secondCallMessages[3].content).toBe('Q2');
+    });
+  });
 });
diff --git a/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts b/packages/ai-providers/server-ai-langchain/src/LangChainModelRunner.ts
@@ -1,10 +1,10 @@
+import { InMemoryChatMessageHistory } from '@langchain/core/chat_history';
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import { AIMessage } from '@langchain/core/messages';
+import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
 
 import type {
   LDAICompletionConfig,
   LDLogger,
-  LDMessage,
   Runner,
   RunnerResult,
 } from '@launchdarkly/server-sdk-ai';
@@ -19,35 +19,49 @@ import { convertMessagesToLangChain, getAIMetricsFromResponse } from './LangChai
  */
 export class LangChainModelRunner implements Runner {
   private _llm: BaseChatModel;
-  private _config: LDAICompletionConfig;
+  private _chatHistory: InMemoryChatMessageHistory;
   private _logger?: LDLogger;
 
   constructor(llm: BaseChatModel, config: LDAICompletionConfig, logger?: LDLogger) {
     this._llm = llm;
-    this._config = config;
+    this._chatHistory = new InMemoryChatMessageHistory(
+      convertMessagesToLangChain(config.messages ?? []),
+    );
     this._logger = logger;
   }
 
   /**
    * Run the LangChain model with the given user prompt.
    *
-   * Prepends any messages defined in the AI config (system prompt, etc.) before
-   * the user prompt.
+   * The runner maintains a LangChain `InMemoryChatMessageHistory` that is
+   * initialized from any messages on the AI config (system prompt, etc.) and
+   * grows with each successful call. On every invocation the user prompt is
+   * appended to the existing history before being sent to the model. When the
+   * call succeeds and produces non-empty content, the user prompt and the
+   * assistant's reply are persisted to the history; failed calls leave the
+   * history unchanged so the next call can retry cleanly.
    *
    * @param input The user prompt string.
    * @param outputType Optional JSON schema for structured output. When provided,
    *   the parsed result is exposed via {@link RunnerResult.parsed}.
    */
   async run(input: string, outputType?: Record<string, unknown>): Promise<RunnerResult> {
-    const messages: LDMessage[] = [
-      ...(this._config.messages ?? []),
-      { role: 'user', content: input },
+    const langchainMessages: BaseMessage[] = [
+      ...(await this._chatHistory.getMessages()),
+      new HumanMessage(input),
     ];
 
-    if (outputType !== undefined) {
-      return this._runStructured(messages, outputType);
+    const result =
+      outputType !== undefined
+        ? await this._runStructured(langchainMessages, outputType)
+        : await this._runCompletion(langchainMessages);
+
+    if (result.metrics.success && result.content) {
+      await this._chatHistory.addUserMessage(input);
+      await this._chatHistory.addAIMessage(result.content);
     }
-    return this._runCompletion(messages);
+
+    return result;
   }
 
   /**
@@ -57,10 +71,9 @@ export class LangChainModelRunner implements Runner {
     return this._llm;
   }
 
-  private async _runCompletion(messages: LDMessage[]): Promise<RunnerResult> {
+  private async _runCompletion(messages: BaseMessage[]): Promise<RunnerResult> {
     try {
-      const langchainMessages = convertMessagesToLangChain(messages);
-      const response: AIMessage = await this._llm.invoke(langchainMessages);
+      const response: AIMessage = await this._llm.invoke(messages);
       const metrics = getAIMetricsFromResponse(response);
 
       let content: string = '';
@@ -85,14 +98,13 @@ export class LangChainModelRunner implements Runner {
   }
 
   private async _runStructured(
-    messages: LDMessage[],
+    messages: BaseMessage[],
     outputType: Record<string, unknown>,
   ): Promise<RunnerResult> {
     try {
-      const langchainMessages = convertMessagesToLangChain(messages);
       const response = (await this._llm
         .withStructuredOutput(outputType)
-        .invoke(langchainMessages)) as Record<string, unknown>;
+        .invoke(messages)) as Record<string, unknown>;
 
       const metrics = {
         success: true,
diff --git a/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts b/packages/ai-providers/server-ai-openai/__tests__/OpenAIModelRunner.test.ts
@@ -148,4 +148,87 @@ describe('OpenAIModelRunner', () => {
       expect(runner.getClient()).toBe(mockOpenAI);
     });
   });
+
+  describe('conversation history', () => {
+    it('accumulates history across successful calls', async () => {
+      (mockOpenAI.chat.completions.create as jest.Mock)
+        .mockResolvedValueOnce({
+          choices: [{ message: { content: 'First response' } }],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+        } as any)
+        .mockResolvedValueOnce({
+          choices: [{ message: { content: 'Second response' } }],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+        } as any);
+
+      await runner.run('First question');
+      await runner.run('Second question');
+
+      const secondCallArgs = (mockOpenAI.chat.completions.create as jest.Mock).mock.calls[1][0];
+      expect(secondCallArgs.messages).toEqual([
+        { role: 'user', content: 'First question' },
+        { role: 'assistant', content: 'First response' },
+        { role: 'user', content: 'Second question' },
+      ]);
+    });
+
+    it('does not accumulate history when the call throws', async () => {
+      (mockOpenAI.chat.completions.create as jest.Mock).mockRejectedValueOnce(new Error('boom'));
+      await runner.run('Hello!');
+
+      (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValueOnce({
+        choices: [{ message: { content: 'Recovery' } }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      } as any);
+      await runner.run('Try again');
+
+      const secondCallArgs = (mockOpenAI.chat.completions.create as jest.Mock).mock.calls[1][0];
+      expect(secondCallArgs.messages).toEqual([{ role: 'user', content: 'Try again' }]);
+    });
+
+    it('does not accumulate history when content is empty', async () => {
+      (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValueOnce({
+        choices: [{ message: {} }],
+      } as any);
+      await runner.run('Hello!');
+
+      (mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValueOnce({
+        choices: [{ message: { content: 'Recovery' } }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      } as any);
+      await runner.run('Try again');
+
+      const secondCallArgs = (mockOpenAI.chat.completions.create as jest.Mock).mock.calls[1][0];
+      expect(secondCallArgs.messages).toEqual([{ role: 'user', content: 'Try again' }]);
+    });
+
+    it('keeps config messages prepended ahead of accumulated history on every call', async () => {
+      const configWithMessages: LDAICompletionConfig = {
+        ...baseConfig,
+        messages: [{ role: 'system', content: 'You are helpful.' }],
+      };
+      const r = new OpenAIModelRunner(mockOpenAI, configWithMessages);
+
+      (mockOpenAI.chat.completions.create as jest.Mock)
+        .mockResolvedValueOnce({
+          choices: [{ message: { content: 'Answer 1' } }],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+        } as any)
+        .mockResolvedValueOnce({
+          choices: [{ message: { content: 'Answer 2' } }],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+        } as any);
+
+      await r.run('Q1');
+      await r.run('Q2');
+
+      const secondCallArgs = (mockOpenAI.chat.completions.create as jest.Mock).mock.calls[1][0];
+      expect(secondCallArgs.messages).toEqual([
+        { role: 'system', content: 'You are helpful.' },
+        { role: 'user', content: 'Q1' },
+        { role: 'assistant', content: 'Answer 1' },
+        { role: 'user', content: 'Q2' },
+      ]);
+    });
+  });
 });
diff --git a/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts b/packages/ai-providers/server-ai-openai/src/OpenAIModelRunner.ts
@@ -18,39 +18,49 @@ import { convertMessagesToOpenAI, getAIMetricsFromResponse } from './OpenAIHelpe
  */
 export class OpenAIModelRunner implements Runner {
   private _client: OpenAI;
-  private _config: LDAICompletionConfig;
   private _modelName: string;
   private _parameters: Record<string, unknown>;
+  private _history: LDMessage[];
   private _logger?: LDLogger;
 
   constructor(client: OpenAI, config: LDAICompletionConfig, logger?: LDLogger) {
     this._client = client;
-    this._config = config;
     this._modelName = config.model?.name ?? '';
     this._parameters = { ...(config.model?.parameters ?? {}) };
+    this._history = [...(config.messages ?? [])];
     this._logger = logger;
   }
 
   /**
    * Run the OpenAI model with the given user prompt.
    *
-   * Prepends any messages defined in the AI config (system prompt,
-   * instructions, etc.) before the user prompt.
+   * The runner maintains a conversation history that is initialized from any
+   * messages on the AI config (system prompt, instructions, etc.) and grows
+   * with each successful call. On every invocation the user prompt is appended
+   * to the existing history before being sent to the model. When the call
+   * succeeds and produces non-empty content, the user prompt and the
+   * assistant's reply are persisted to the history; failed calls leave the
+   * history unchanged so the next call can retry cleanly.
    *
    * @param input The user prompt string.
    * @param outputType Optional JSON schema for structured output. When provided,
    *   the response is parsed and exposed via {@link RunnerResult.parsed}.
    */
   async run(input: string, outputType?: Record<string, unknown>): Promise<RunnerResult> {
-    const messages: LDMessage[] = [
-      ...(this._config.messages ?? []),
-      { role: 'user', content: input },
-    ];
+    const userMessage: LDMessage = { role: 'user', content: input };
+    const messages: LDMessage[] = [...this._history, userMessage];
 
-    if (outputType !== undefined) {
-      return this._runStructured(messages, outputType);
+    const result =
+      outputType !== undefined
+        ? await this._runStructured(messages, outputType)
+        : await this._runCompletion(messages);
+
+    if (result.metrics.success && result.content) {
+      this._history.push(userMessage);
+      this._history.push({ role: 'assistant', content: result.content });
     }
-    return this._runCompletion(messages);
+
+    return result;
   }
 
   /**
diff --git a/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts b/packages/ai-providers/server-ai-vercel/__tests__/VercelModelRunner.test.ts
diff --git a/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts b/packages/ai-providers/server-ai-vercel/src/VercelModelRunner.ts