Skip to content

Commit e6942a6

Browse files
authored
fix: build judge input as string; strip legacy judge config messages (#165)
1 parent 48761c9 commit e6942a6

13 files changed

Lines changed: 238 additions & 127 deletions

File tree

packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_agent_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def __init__(self, agent: Any):
3333

3434
async def run(
3535
self,
36-
input: Any,
36+
input: str,
3737
output_type: Optional[Dict[str, Any]] = None,
3838
) -> RunnerResult:
3939
"""
@@ -42,7 +42,7 @@ async def run(
4242
Delegates to the compiled LangChain agent, which handles
4343
the tool-calling loop internally.
4444
45-
:param input: The user prompt or input to the agent
45+
:param input: The user prompt string to the agent
4646
:param output_type: Reserved for future structured output support;
4747
currently ignored.
4848
:return: :class:`RunnerResult` with ``content``, ``raw`` response, and

packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_model_runner.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ class LangChainModelRunner(Runner):
2424
:meth:`run`.
2525
"""
2626

27-
def __init__(self, llm: BaseChatModel):
27+
def __init__(self, llm: BaseChatModel, config_messages: Optional[List[LDMessage]] = None):
2828
self._llm = llm
29+
self._config_messages: List[LDMessage] = list(config_messages or [])
2930

3031
def get_llm(self) -> BaseChatModel:
3132
"""
@@ -37,37 +38,28 @@ def get_llm(self) -> BaseChatModel:
3738

3839
async def run(
3940
self,
40-
input: Any,
41+
input: str,
4142
output_type: Optional[Dict[str, Any]] = None,
4243
) -> RunnerResult:
4344
"""
4445
Run the LangChain model with the given input.
4546
46-
:param input: A string prompt or a list of :class:`LDMessage` objects
47+
Prepends any config messages (system prompt, instructions, etc.) stored
48+
at construction time before the user message.
49+
50+
:param input: A string prompt
4751
:param output_type: Optional JSON schema dict requesting structured output.
4852
When provided, ``parsed`` on the returned :class:`RunnerResult` is
4953
populated with the parsed JSON document.
5054
:return: :class:`RunnerResult` containing ``content``, ``metrics``,
5155
``raw`` and (when ``output_type`` is set) ``parsed``.
5256
"""
53-
messages = self._coerce_input(input)
57+
messages = self._config_messages + [LDMessage(role='user', content=input)]
5458

5559
if output_type is not None:
5660
return await self._run_structured(messages, output_type)
5761
return await self._run_completion(messages)
5862

59-
# convert_messages_to_langchain only accepts List[LDMessage]; _coerce_input
60-
# normalizes a bare string to [LDMessage(role='user', ...)] before that step.
61-
@staticmethod
62-
def _coerce_input(input: Any) -> List[LDMessage]:
63-
if isinstance(input, str):
64-
return [LDMessage(role='user', content=input)]
65-
if isinstance(input, list):
66-
return input
67-
raise TypeError(
68-
f"Unsupported input type for LangChainModelRunner.run: {type(input).__name__}"
69-
)
70-
7163
async def _run_completion(self, messages: List[LDMessage]) -> RunnerResult:
7264
try:
7365
langchain_messages = convert_messages_to_langchain(messages)

packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_runner_factory.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,5 @@ def create_model(self, config: AIConfigKind) -> LangChainModelRunner:
6969
:return: LangChainModelRunner ready to invoke the model
7070
"""
7171
llm = create_langchain_model(config)
72-
return LangChainModelRunner(llm)
72+
config_messages = list(getattr(config, 'messages', None) or [])
73+
return LangChainModelRunner(llm, config_messages)

packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,7 @@ async def test_returns_success_true_for_string_content(self, mock_llm):
233233
mock_llm.ainvoke = AsyncMock(return_value=mock_response)
234234
provider = LangChainModelRunner(mock_llm)
235235

236-
messages = [LDMessage(role='user', content='Hello')]
237-
result = await provider.run(messages)
236+
result = await provider.run('Hello')
238237

239238
assert result.metrics.success is True
240239
assert result.content == 'Test response'
@@ -246,8 +245,7 @@ async def test_returns_success_false_for_non_string_content_and_logs_warning(sel
246245
mock_llm.ainvoke = AsyncMock(return_value=mock_response)
247246
provider = LangChainModelRunner(mock_llm)
248247

249-
messages = [LDMessage(role='user', content='Hello')]
250-
result = await provider.run(messages)
248+
result = await provider.run('Hello')
251249

252250
assert result.metrics.success is False
253251
assert result.content == ''
@@ -259,8 +257,7 @@ async def test_returns_success_false_when_model_invocation_throws_error(self, mo
259257
mock_llm.ainvoke = AsyncMock(side_effect=error)
260258
provider = LangChainModelRunner(mock_llm)
261259

262-
messages = [LDMessage(role='user', content='Hello')]
263-
result = await provider.run(messages)
260+
result = await provider.run('Hello')
264261

265262
assert result.metrics.success is False
266263
assert result.content == ''
@@ -284,9 +281,8 @@ async def test_returns_success_true_for_successful_invocation(self, mock_llm):
284281
mock_llm.with_structured_output = MagicMock(return_value=mock_structured_llm)
285282
provider = LangChainModelRunner(mock_llm)
286283

287-
messages = [LDMessage(role='user', content='Hello')]
288284
response_structure = {'type': 'object', 'properties': {}}
289-
result = await provider.run(messages, output_type=response_structure)
285+
result = await provider.run('Hello', output_type=response_structure)
290286

291287
assert result.metrics.success is True
292288
assert result.parsed == parsed_data
@@ -300,9 +296,8 @@ async def test_returns_success_false_when_structured_model_invocation_throws_err
300296
mock_llm.with_structured_output = MagicMock(return_value=mock_structured_llm)
301297
provider = LangChainModelRunner(mock_llm)
302298

303-
messages = [LDMessage(role='user', content='Hello')]
304299
response_structure = {'type': 'object', 'properties': {}}
305-
result = await provider.run(messages, output_type=response_structure)
300+
result = await provider.run('Hello', output_type=response_structure)
306301

307302
assert result.metrics.success is False
308303
assert result.parsed is None

packages/ai-providers/server-ai-openai/src/ldai_openai/openai_agent_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(
4747

4848
async def run(
4949
self,
50-
input: Any,
50+
input: str,
5151
output_type: Optional[Dict[str, Any]] = None,
5252
) -> RunnerResult:
5353
"""
@@ -56,7 +56,7 @@ async def run(
5656
Delegates to the OpenAI Agents SDK ``Runner.run``, which handles the
5757
tool-calling loop internally.
5858
59-
:param input: The user prompt or input to the agent
59+
:param input: The user prompt string to the agent
6060
:param output_type: Reserved for future structured output support;
6161
currently ignored.
6262
:return: :class:`RunnerResult` with ``content``, ``raw`` response, and

packages/ai-providers/server-ai-openai/src/ldai_openai/openai_model_runner.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,46 +28,37 @@ def __init__(
2828
client: AsyncOpenAI,
2929
model_name: str,
3030
parameters: Dict[str, Any],
31+
config_messages: Optional[List[LDMessage]] = None,
3132
):
3233
self._client = client
3334
self._model_name = model_name
3435
self._parameters = parameters
36+
self._config_messages: List[LDMessage] = list(config_messages or [])
3537

3638
async def run(
3739
self,
38-
input: Any,
40+
input: str,
3941
output_type: Optional[Dict[str, Any]] = None,
4042
) -> RunnerResult:
4143
"""
4244
Run the OpenAI model with the given input.
4345
44-
:param input: A string prompt or a list of :class:`LDMessage` objects
46+
Prepends any config messages (system prompt, instructions, etc.) stored
47+
at construction time before the user message.
48+
49+
:param input: A string prompt
4550
:param output_type: Optional JSON schema dict requesting structured output.
4651
When provided, ``parsed`` on the returned :class:`RunnerResult` is
4752
populated with the parsed JSON document.
4853
:return: :class:`RunnerResult` containing ``content``, ``metrics``,
4954
``raw`` and (when ``output_type`` is set) ``parsed``.
5055
"""
51-
try:
52-
messages = self._coerce_input(input)
53-
except TypeError as error:
54-
log.warning(f'OpenAI model runner received unsupported input type: {error}')
55-
return RunnerResult(content='', metrics=LDAIMetrics(success=False, usage=None))
56+
messages = self._config_messages + [LDMessage(role='user', content=input)]
5657

5758
if output_type is not None:
5859
return await self._run_structured(messages, output_type)
5960
return await self._run_completion(messages)
6061

61-
@staticmethod
62-
def _coerce_input(input: Any) -> List[LDMessage]:
63-
if isinstance(input, str):
64-
return [LDMessage(role='user', content=input)]
65-
if isinstance(input, list):
66-
return input
67-
raise TypeError(
68-
f"Unsupported input type for OpenAIModelRunner.run: {type(input).__name__}"
69-
)
70-
7162
async def _run_completion(self, messages: List[LDMessage]) -> RunnerResult:
7263
try:
7364
response = await self._client.chat.completions.create(

packages/ai-providers/server-ai-openai/src/ldai_openai/openai_runner_factory.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ def create_model(self, config: AIConfigKind) -> OpenAIModelRunner:
100100
tool_defs = parameters.pop('tools', None) or []
101101
if tool_defs:
102102
parameters['tools'] = normalize_tool_types(tool_defs)
103-
return OpenAIModelRunner(self._client, model_name, parameters)
103+
config_messages = list(getattr(config, 'messages', None) or [])
104+
return OpenAIModelRunner(self._client, model_name, parameters, config_messages)
104105

105106
def get_client(self) -> AsyncOpenAI:
106107
"""

packages/ai-providers/server-ai-openai/tests/test_openai_provider.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
from typing import Any
55
from unittest.mock import AsyncMock, MagicMock, patch
66

7-
from ldai import LDMessage
8-
97
from ldai_openai import OpenAIModelRunner, OpenAIRunnerFactory, get_ai_metrics_from_response, get_ai_usage_from_response
108

119

@@ -143,8 +141,7 @@ async def test_invokes_openai_chat_completions_and_returns_response(self, mock_c
143141
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
144142

145143
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
146-
messages = [LDMessage(role='user', content='Hello!')]
147-
result = await provider.run(messages)
144+
result = await provider.run('Hello!')
148145

149146
mock_client.chat.completions.create.assert_called_once_with(
150147
model='gpt-3.5-turbo',
@@ -172,8 +169,7 @@ async def test_returns_unsuccessful_response_when_no_content(self, mock_client):
172169
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
173170

174171
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
175-
messages = [LDMessage(role='user', content='Hello!')]
176-
result = await provider.run(messages)
172+
result = await provider.run('Hello!')
177173

178174
assert result.content == ''
179175
assert result.metrics.success is False
@@ -190,8 +186,7 @@ async def test_returns_unsuccessful_response_when_choices_empty(self, mock_clien
190186
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
191187

192188
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
193-
messages = [LDMessage(role='user', content='Hello!')]
194-
result = await provider.run(messages)
189+
result = await provider.run('Hello!')
195190

196191
assert result.content == ''
197192
assert result.metrics.success is False
@@ -204,8 +199,7 @@ async def test_returns_unsuccessful_response_when_exception_thrown(self, mock_cl
204199
mock_client.chat.completions.create = AsyncMock(side_effect=Exception('API Error'))
205200

206201
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
207-
messages = [LDMessage(role='user', content='Hello!')]
208-
result = await provider.run(messages)
202+
result = await provider.run('Hello!')
209203

210204
assert result.content == ''
211205
assert result.metrics.success is False
@@ -234,7 +228,6 @@ async def test_invokes_openai_with_structured_output(self, mock_client):
234228
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
235229

236230
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
237-
messages = [LDMessage(role='user', content='Tell me about a person')]
238231
response_structure = {
239232
'type': 'object',
240233
'properties': {
@@ -245,7 +238,7 @@ async def test_invokes_openai_with_structured_output(self, mock_client):
245238
'required': ['name', 'age', 'city'],
246239
}
247240

248-
result = await provider.run(messages, output_type=response_structure)
241+
result = await provider.run('Tell me about a person', output_type=response_structure)
249242

250243
assert result.parsed == {'name': 'John', 'age': 30, 'city': 'New York'}
251244
assert result.content == '{"name": "John", "age": 30, "city": "New York"}'
@@ -269,10 +262,9 @@ async def test_returns_unsuccessful_when_no_content_in_structured_response(self,
269262
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
270263

271264
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
272-
messages = [LDMessage(role='user', content='Tell me about a person')]
273265
response_structure = {'type': 'object'}
274266

275-
result = await provider.run(messages, output_type=response_structure)
267+
result = await provider.run('Tell me about a person', output_type=response_structure)
276268

277269
assert result.parsed is None
278270
assert result.content == ''
@@ -293,10 +285,9 @@ async def test_handles_json_parsing_errors(self, mock_client):
293285
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
294286

295287
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
296-
messages = [LDMessage(role='user', content='Tell me about a person')]
297288
response_structure = {'type': 'object'}
298289

299-
result = await provider.run(messages, output_type=response_structure)
290+
result = await provider.run('Tell me about a person', output_type=response_structure)
300291

301292
assert result.parsed is None
302293
assert result.content == 'invalid json content'
@@ -312,10 +303,9 @@ async def test_returns_unsuccessful_response_when_exception_thrown(self, mock_cl
312303
mock_client.chat.completions.create = AsyncMock(side_effect=Exception('API Error'))
313304

314305
provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {})
315-
messages = [LDMessage(role='user', content='Tell me about a person')]
316306
response_structure = {'type': 'object'}
317307

318-
result = await provider.run(messages, output_type=response_structure)
308+
result = await provider.run('Tell me about a person', output_type=response_structure)
319309

320310
assert result.parsed is None
321311
assert result.content == ''

packages/sdk/server-ai/src/ldai/client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ldai import log
1010
from ldai.agent_graph import AgentGraphDefinition
1111
from ldai.evaluator import Evaluator
12-
from ldai.judge import Judge
12+
from ldai.judge import Judge, _strip_legacy_judge_messages
1313
from ldai.managed_agent import ManagedAgent
1414
from ldai.managed_agent_graph import ManagedAgentGraph
1515
from ldai.managed_model import ManagedModel
@@ -237,6 +237,10 @@ def _extract_evaluation_metric_key(variation: Dict[str, Any]) -> Optional[str]:
237237

238238
evaluation_metric_key = _extract_evaluation_metric_key(variation)
239239

240+
# strip legacy judge template messages before creating config
241+
if messages:
242+
messages = _strip_legacy_judge_messages(messages)
243+
240244
config = AIJudgeConfig(
241245
key=key,
242246
enabled=bool(enabled),

0 commit comments

Comments
 (0)