Skip to content

Commit 07f1af2

Browse files
jsonbaileyclaude
andcommitted
feat: add OpenAI Runner protocol implementation (AIC-2388)
Adds OpenAIModelRunner, OpenAIAgentRunner, and OpenAIRunnerFactory that implement the Runner protocol introduced in JS PR 6. The runners return RunnerResult instead of the legacy ChatResponse / StructuredResponse. OpenAIAgentRunner runs a tool-calling loop using the OpenAI Chat Completions API, populating LDAIMetrics.toolCalls with the names of tools the model invoked. Tool implementations are supplied via a ToolRegistry passed to OpenAIRunnerFactory.createAgent. The deprecated OpenAIProvider class is preserved so AIProviderFactory continues to work during the migration. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 635cf16 commit 07f1af2

9 files changed

Lines changed: 843 additions & 0 deletions

File tree

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import { OpenAI } from 'openai';
2+
3+
import { OpenAIAgentRunner } from '../src/OpenAIAgentRunner';
4+
5+
jest.mock('openai', () => ({
6+
OpenAI: jest.fn().mockImplementation(() => ({
7+
chat: {
8+
completions: {
9+
create: jest.fn(),
10+
},
11+
},
12+
})),
13+
}));
14+
15+
describe('OpenAIAgentRunner', () => {
16+
let mockOpenAI: jest.Mocked<OpenAI>;
17+
18+
beforeEach(() => {
19+
mockOpenAI = new OpenAI() as jest.Mocked<OpenAI>;
20+
});
21+
22+
it('returns content with no toolCalls when the model does not invoke tools', async () => {
23+
(mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue({
24+
choices: [{ message: { content: 'Done', tool_calls: [] } }],
25+
usage: { prompt_tokens: 8, completion_tokens: 4, total_tokens: 12 },
26+
} as any);
27+
28+
const runner = new OpenAIAgentRunner(mockOpenAI, 'gpt-4o', {}, '', [], {});
29+
const result = await runner.run([{ role: 'user', content: 'Say done' }]);
30+
31+
expect(result.content).toBe('Done');
32+
expect(result.metrics.success).toBe(true);
33+
expect(result.metrics.toolCalls).toBeUndefined();
34+
expect(result.metrics.usage).toEqual({ total: 12, input: 8, output: 4 });
35+
});
36+
37+
it('executes tools, populates toolCalls, and aggregates token usage across iterations', async () => {
38+
const create = mockOpenAI.chat.completions.create as jest.Mock;
39+
create
40+
.mockResolvedValueOnce({
41+
choices: [
42+
{
43+
message: {
44+
content: null,
45+
tool_calls: [
46+
{
47+
id: 'call_1',
48+
function: { name: 'lookup', arguments: '{"id":42}' },
49+
},
50+
],
51+
},
52+
},
53+
],
54+
usage: { prompt_tokens: 10, completion_tokens: 4, total_tokens: 14 },
55+
} as any)
56+
.mockResolvedValueOnce({
57+
choices: [{ message: { content: 'The answer is 42.', tool_calls: [] } }],
58+
usage: { prompt_tokens: 6, completion_tokens: 8, total_tokens: 14 },
59+
} as any);
60+
61+
const lookup = jest.fn().mockResolvedValue({ value: 42 });
62+
const toolDefinitions = [
63+
{
64+
type: 'function',
65+
function: { name: 'lookup', parameters: { type: 'object' } },
66+
},
67+
];
68+
const runner = new OpenAIAgentRunner(
69+
mockOpenAI,
70+
'gpt-4o',
71+
{},
72+
'You are an expert.',
73+
toolDefinitions,
74+
{ lookup },
75+
);
76+
77+
const result = await runner.run([{ role: 'user', content: 'Look up 42' }]);
78+
79+
expect(lookup).toHaveBeenCalledWith({ id: 42 });
80+
expect(create).toHaveBeenCalledTimes(2);
81+
expect(create.mock.calls[0][0].tools).toBe(toolDefinitions);
82+
expect(create.mock.calls[0][0].messages[0]).toEqual({
83+
role: 'system',
84+
content: 'You are an expert.',
85+
});
86+
expect(result.content).toBe('The answer is 42.');
87+
expect(result.metrics.toolCalls).toEqual(['lookup']);
88+
expect(result.metrics.usage).toEqual({ total: 28, input: 16, output: 12 });
89+
});
90+
91+
it('records the tool call and continues when a tool is missing from the registry', async () => {
92+
const create = mockOpenAI.chat.completions.create as jest.Mock;
93+
create
94+
.mockResolvedValueOnce({
95+
choices: [
96+
{
97+
message: {
98+
content: null,
99+
tool_calls: [{ id: 'call_x', function: { name: 'missing', arguments: '{}' } }],
100+
},
101+
},
102+
],
103+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
104+
} as any)
105+
.mockResolvedValueOnce({
106+
choices: [{ message: { content: 'fallback', tool_calls: [] } }],
107+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
108+
} as any);
109+
110+
const runner = new OpenAIAgentRunner(mockOpenAI, 'gpt-4o', {}, '', [], {});
111+
const result = await runner.run([{ role: 'user', content: 'go' }]);
112+
113+
expect(result.content).toBe('fallback');
114+
expect(result.metrics.toolCalls).toEqual(['missing']);
115+
});
116+
117+
it('returns an unsuccessful RunnerResult when the API call throws', async () => {
118+
(mockOpenAI.chat.completions.create as jest.Mock).mockRejectedValue(new Error('boom'));
119+
120+
const runner = new OpenAIAgentRunner(mockOpenAI, 'gpt-4o', {}, '', [], {});
121+
const result = await runner.run([{ role: 'user', content: 'Hi' }]);
122+
123+
expect(result.content).toBe('');
124+
expect(result.metrics.success).toBe(false);
125+
});
126+
});
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import { OpenAI } from 'openai';
2+
3+
import { OpenAIModelRunner } from '../src/OpenAIModelRunner';
4+
5+
jest.mock('openai', () => ({
6+
OpenAI: jest.fn().mockImplementation(() => ({
7+
chat: {
8+
completions: {
9+
create: jest.fn(),
10+
},
11+
},
12+
})),
13+
}));
14+
15+
describe('OpenAIModelRunner', () => {
16+
let mockOpenAI: jest.Mocked<OpenAI>;
17+
let runner: OpenAIModelRunner;
18+
19+
beforeEach(() => {
20+
mockOpenAI = new OpenAI() as jest.Mocked<OpenAI>;
21+
runner = new OpenAIModelRunner(mockOpenAI, 'gpt-3.5-turbo', {});
22+
});
23+
24+
describe('run (chat completion)', () => {
25+
it('returns a RunnerResult with content, metrics, and raw response', async () => {
26+
const mockResponse = {
27+
choices: [{ message: { content: 'Hello there!' } }],
28+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
29+
};
30+
(mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any);
31+
32+
const result = await runner.run([{ role: 'user', content: 'Hi' }]);
33+
34+
expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({
35+
model: 'gpt-3.5-turbo',
36+
messages: [{ role: 'user', content: 'Hi' }],
37+
});
38+
expect(result.content).toBe('Hello there!');
39+
expect(result.metrics).toEqual({
40+
success: true,
41+
usage: { total: 15, input: 10, output: 5 },
42+
});
43+
expect(result.raw).toBe(mockResponse);
44+
expect(result.parsed).toBeUndefined();
45+
});
46+
47+
it('marks the result unsuccessful when response has no content', async () => {
48+
const mockResponse = { choices: [{ message: {} }] };
49+
(mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any);
50+
51+
const result = await runner.run([{ role: 'user', content: 'Hi' }]);
52+
53+
expect(result.content).toBe('');
54+
expect(result.metrics.success).toBe(false);
55+
});
56+
57+
it('returns an unsuccessful RunnerResult when the API call throws', async () => {
58+
(mockOpenAI.chat.completions.create as jest.Mock).mockRejectedValue(new Error('boom'));
59+
60+
const result = await runner.run([{ role: 'user', content: 'Hi' }]);
61+
62+
expect(result.content).toBe('');
63+
expect(result.metrics.success).toBe(false);
64+
expect(result.raw).toBeUndefined();
65+
});
66+
});
67+
68+
describe('run (structured output)', () => {
69+
it('parses structured output and exposes it via parsed', async () => {
70+
const mockResponse = {
71+
choices: [{ message: { content: '{"name":"Ada","age":36}' } }],
72+
usage: { prompt_tokens: 20, completion_tokens: 10, total_tokens: 30 },
73+
};
74+
(mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any);
75+
76+
const schema = {
77+
type: 'object',
78+
properties: { name: { type: 'string' }, age: { type: 'number' } },
79+
required: ['name', 'age'],
80+
};
81+
const result = await runner.run(
82+
[{ role: 'user', content: 'Tell me about a person' }],
83+
schema,
84+
);
85+
86+
expect(mockOpenAI.chat.completions.create).toHaveBeenCalledWith({
87+
model: 'gpt-3.5-turbo',
88+
messages: [{ role: 'user', content: 'Tell me about a person' }],
89+
response_format: {
90+
type: 'json_schema',
91+
json_schema: {
92+
name: 'structured_output',
93+
schema,
94+
strict: true,
95+
},
96+
},
97+
});
98+
expect(result.content).toBe('{"name":"Ada","age":36}');
99+
expect(result.parsed).toEqual({ name: 'Ada', age: 36 });
100+
expect(result.metrics.success).toBe(true);
101+
});
102+
103+
it('marks the result unsuccessful when structured output is not valid JSON', async () => {
104+
const mockResponse = {
105+
choices: [{ message: { content: 'not json' } }],
106+
usage: { prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 },
107+
};
108+
(mockOpenAI.chat.completions.create as jest.Mock).mockResolvedValue(mockResponse as any);
109+
110+
const result = await runner.run([{ role: 'user', content: 'Hi' }], { type: 'object' });
111+
112+
expect(result.content).toBe('not json');
113+
expect(result.parsed).toBeUndefined();
114+
expect(result.metrics.success).toBe(false);
115+
});
116+
});
117+
118+
describe('getClient', () => {
119+
it('returns the underlying OpenAI client', () => {
120+
expect(runner.getClient()).toBe(mockOpenAI);
121+
});
122+
});
123+
});
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { OpenAI } from 'openai';
2+
3+
import type { LDAIAgentConfig, LDAICompletionConfig } from '@launchdarkly/server-sdk-ai';
4+
5+
import { OpenAIAgentRunner } from '../src/OpenAIAgentRunner';
6+
import { OpenAIModelRunner } from '../src/OpenAIModelRunner';
7+
import { OpenAIRunnerFactory } from '../src/OpenAIRunnerFactory';
8+
9+
jest.mock('openai', () => ({
10+
OpenAI: jest.fn().mockImplementation(() => ({
11+
chat: { completions: { create: jest.fn() } },
12+
})),
13+
}));
14+
15+
describe('OpenAIRunnerFactory', () => {
16+
let mockOpenAI: jest.Mocked<OpenAI>;
17+
let factory: OpenAIRunnerFactory;
18+
19+
beforeEach(() => {
20+
mockOpenAI = new OpenAI() as jest.Mocked<OpenAI>;
21+
factory = new OpenAIRunnerFactory(mockOpenAI);
22+
});
23+
24+
describe('createModel', () => {
25+
it('builds an OpenAIModelRunner that shares the factory client', () => {
26+
const config: LDAICompletionConfig = {
27+
key: 'completion',
28+
enabled: true,
29+
model: { name: 'gpt-4o', parameters: { temperature: 0.5 } },
30+
};
31+
32+
const runner = factory.createModel(config);
33+
34+
expect(runner).toBeInstanceOf(OpenAIModelRunner);
35+
expect(runner.getClient()).toBe(mockOpenAI);
36+
});
37+
38+
it('builds a model runner from a minimal config', () => {
39+
const runner = factory.createModel({ key: 'completion', enabled: true });
40+
expect(runner).toBeInstanceOf(OpenAIModelRunner);
41+
});
42+
});
43+
44+
describe('createAgent', () => {
45+
it('builds an OpenAIAgentRunner without tools when none are configured', () => {
46+
const config: LDAIAgentConfig = {
47+
key: 'agent',
48+
enabled: true,
49+
model: { name: 'gpt-4o' },
50+
instructions: 'be helpful',
51+
};
52+
53+
const runner = factory.createAgent(config);
54+
55+
expect(runner).toBeInstanceOf(OpenAIAgentRunner);
56+
});
57+
58+
it('extracts tool definitions from model.parameters.tools', () => {
59+
const tools = [{ type: 'function', function: { name: 'lookup' } }];
60+
const config: LDAIAgentConfig = {
61+
key: 'agent',
62+
enabled: true,
63+
model: { name: 'gpt-4o', parameters: { tools, temperature: 0.7 } },
64+
instructions: 'be helpful',
65+
};
66+
67+
const runner = factory.createAgent(config, { lookup: () => 'ok' });
68+
69+
expect(runner).toBeInstanceOf(OpenAIAgentRunner);
70+
});
71+
});
72+
73+
describe('getClient', () => {
74+
it('returns the underlying OpenAI client', () => {
75+
expect(factory.getClient()).toBe(mockOpenAI);
76+
});
77+
});
78+
79+
describe('create', () => {
80+
it('creates an OpenAIRunnerFactory instance', async () => {
81+
const f = await OpenAIRunnerFactory.create();
82+
expect(f).toBeInstanceOf(OpenAIRunnerFactory);
83+
expect(f.getClient()).toBeDefined();
84+
});
85+
});
86+
});
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import {
2+
convertMessagesToOpenAI,
3+
getAIMetricsFromResponse,
4+
getAIUsageFromResponse,
5+
} from '../src/openaiHelper';
6+
7+
describe('convertMessagesToOpenAI', () => {
8+
it('converts LDMessages to OpenAI message dicts preserving role and content', () => {
9+
const messages = convertMessagesToOpenAI([
10+
{ role: 'system', content: 'You are X' },
11+
{ role: 'user', content: 'Hi' },
12+
{ role: 'assistant', content: 'Hello' },
13+
]);
14+
15+
expect(messages).toEqual([
16+
{ role: 'system', content: 'You are X' },
17+
{ role: 'user', content: 'Hi' },
18+
{ role: 'assistant', content: 'Hello' },
19+
]);
20+
});
21+
});
22+
23+
describe('getAIUsageFromResponse', () => {
24+
it('returns undefined when usage is missing', () => {
25+
expect(getAIUsageFromResponse({})).toBeUndefined();
26+
});
27+
28+
it('maps OpenAI prompt/completion/total token fields to LDTokenUsage', () => {
29+
const usage = getAIUsageFromResponse({
30+
usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 },
31+
});
32+
33+
expect(usage).toEqual({ total: 15, input: 5, output: 10 });
34+
});
35+
});
36+
37+
describe('getAIMetricsFromResponse', () => {
38+
it('returns success=true with usage extracted from the response', () => {
39+
const metrics = getAIMetricsFromResponse({
40+
usage: { prompt_tokens: 1, completion_tokens: 2, total_tokens: 3 },
41+
});
42+
43+
expect(metrics).toEqual({
44+
success: true,
45+
usage: { total: 3, input: 1, output: 2 },
46+
});
47+
});
48+
});

0 commit comments

Comments
 (0)