Skip to content

Commit 0bd44d0

Browse files
jsonbaileyclaude
andcommitted
chore: merge main into branch, resolve client.py conflicts
Both branches independently added evaluator/judge logic (this branch) and root-level tools map support (main). Conflicts in _completion_config and __evaluate_agent resolved by keeping both changes. Parameter order swap for track_metrics_of_async auto-resolved. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2 parents 42fe82c + 53db736 commit 0bd44d0

9 files changed

Lines changed: 209 additions & 11 deletions

File tree

packages/sdk/server-ai/src/ldai/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
LDAIAgentConfig,
2828
LDAIAgentDefaults,
2929
LDMessage,
30+
LDTool,
3031
ModelConfig,
3132
ProviderConfig,
3233
)
@@ -66,6 +67,7 @@
6667
'Judge',
6768
'JudgeConfiguration',
6869
'JudgeResult',
70+
'LDTool',
6971
'LDMessage',
7072
'ModelConfig',
7173
'ProviderConfig',

packages/sdk/server-ai/src/ldai/client.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
Edge,
2626
JudgeConfiguration,
2727
LDMessage,
28+
LDTool,
2829
ModelConfig,
2930
ProviderConfig,
3031
)
@@ -51,6 +52,25 @@
5152
_DISABLED_JUDGE_DEFAULT = AIJudgeConfigDefault.disabled()
5253

5354

55+
def _parse_tools(tools_data: Optional[Dict[str, Any]]) -> Optional[Dict[str, LDTool]]:
56+
"""Parse the root-level tools map from a flag variation dict."""
57+
if not isinstance(tools_data, dict):
58+
return None
59+
result = {}
60+
for tool_name, tool_dict in tools_data.items():
61+
if not isinstance(tool_dict, dict):
62+
log.warning('Skipping tool "%s": expected a dict, got %s', tool_name, type(tool_dict).__name__)
63+
continue
64+
result[tool_name] = LDTool(
65+
name=tool_dict.get('name', tool_name),
66+
description=tool_dict.get('description'),
67+
type=tool_dict.get('type'),
68+
parameters=tool_dict.get('parameters'),
69+
custom_parameters=tool_dict.get('customParameters'),
70+
)
71+
return result or None
72+
73+
5474
class LDAIClient:
5575
"""The LaunchDarkly AI SDK client object."""
5676

@@ -91,11 +111,12 @@ def _completion_config(
91111
default_ai_provider: Optional[str] = None,
92112
) -> AICompletionConfig:
93113
(model, provider, messages, instructions,
94-
tracker_factory, enabled, judge_configuration, _) = self.__evaluate(
114+
tracker_factory, enabled, judge_configuration, variation) = self.__evaluate(
95115
key, context, default.to_dict(), variables
96116
)
97117

98118
evaluator = self._build_evaluator(judge_configuration, context, default_ai_provider, variables)
119+
tools = _parse_tools(variation.get('tools'))
99120

100121
config = AICompletionConfig(
101122
key=key,
@@ -106,6 +127,7 @@ def _completion_config(
106127
create_tracker=tracker_factory,
107128
evaluator=evaluator,
108129
judge_configuration=judge_configuration,
130+
tools=tools,
109131
)
110132

111133
return config
@@ -912,7 +934,7 @@ def __evaluate_agent(
912934
:return: Configured AIAgentConfig instance.
913935
"""
914936
(model, provider, messages, instructions,
915-
tracker_factory, enabled, judge_configuration, _) = self.__evaluate(
937+
tracker_factory, enabled, judge_configuration, variation) = self.__evaluate(
916938
key, context, default.to_dict(), variables, graph_key=graph_key
917939
)
918940

@@ -922,6 +944,7 @@ def __evaluate_agent(
922944
effective_judge_configuration = judge_configuration or JudgeConfiguration(judges=[])
923945

924946
evaluator = self._build_evaluator(effective_judge_configuration, context, default_ai_provider, variables)
947+
tools = _parse_tools(variation.get('tools'))
925948

926949
return AIAgentConfig(
927950
key=key,
@@ -932,6 +955,7 @@ def __evaluate_agent(
932955
create_tracker=tracker_factory,
933956
evaluator=evaluator,
934957
judge_configuration=effective_judge_configuration,
958+
tools=tools,
935959
)
936960

937961
def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str:

packages/sdk/server-ai/src/ldai/judge/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ async def evaluate(
7575
assert self._evaluation_response_structure is not None
7676

7777
response = await tracker.track_metrics_of_async(
78-
lambda: self._model_runner.invoke_structured_model(messages, self._evaluation_response_structure),
7978
lambda result: result.metrics,
79+
lambda: self._model_runner.invoke_structured_model(messages, self._evaluation_response_structure),
8080
)
8181

8282
parsed = self._parse_evaluation_response(response.data)

packages/sdk/server-ai/src/ldai/managed_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ async def run(self, input: str) -> AgentResult:
2929
"""
3030
tracker = self._ai_config.create_tracker()
3131
return await tracker.track_metrics_of_async(
32-
lambda: self._agent_runner.run(input),
3332
lambda result: result.metrics,
33+
lambda: self._agent_runner.run(input),
3434
)
3535

3636
def get_agent_runner(self) -> AgentRunner:

packages/sdk/server-ai/src/ldai/managed_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ async def invoke(self, prompt: str) -> ModelResponse:
4545
all_messages = config_messages + self._messages
4646

4747
response = await tracker.track_metrics_of_async(
48-
lambda: self._model_runner.invoke_model(all_messages),
4948
lambda result: result.metrics,
49+
lambda: self._model_runner.invoke_model(all_messages),
5050
)
5151

5252
input_text = '\r\n'.join(m.content for m in self._messages) if self._messages else ''

packages/sdk/server-ai/src/ldai/models.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,31 @@
88
from typing_extensions import Self
99

1010

11+
@dataclass(frozen=True)
12+
class LDTool:
13+
"""
14+
A single tool entry from the root-level tools map in an AI Config flag variation.
15+
Distinct from model.parameters.tools[] which is the raw array passed to LLM providers.
16+
"""
17+
name: str
18+
description: Optional[str] = None
19+
type: Optional[str] = None
20+
parameters: Optional[Dict[str, Any]] = None
21+
custom_parameters: Optional[Dict[str, Any]] = None
22+
23+
def to_dict(self) -> dict:
24+
result: Dict[str, Any] = {'name': self.name}
25+
if self.description is not None:
26+
result['description'] = self.description
27+
if self.type is not None:
28+
result['type'] = self.type
29+
if self.parameters is not None:
30+
result['parameters'] = self.parameters
31+
if self.custom_parameters is not None:
32+
result['customParameters'] = self.custom_parameters # camelCase in wire format
33+
return result
34+
35+
1136
@dataclass
1237
class LDMessage:
1338
role: Literal['system', 'user', 'assistant']
@@ -211,6 +236,7 @@ class AICompletionConfigDefault(AIConfigDefault):
211236
"""
212237
messages: Optional[List[LDMessage]] = None
213238
judge_configuration: Optional[JudgeConfiguration] = None
239+
tools: Optional[Dict[str, 'LDTool']] = None
214240

215241
def to_dict(self) -> dict:
216242
"""
@@ -220,6 +246,8 @@ def to_dict(self) -> dict:
220246
result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
221247
if self.judge_configuration is not None:
222248
result['judgeConfiguration'] = self.judge_configuration.to_dict()
249+
if self.tools is not None:
250+
result['tools'] = {k: v.to_dict() for k, v in self.tools.items()}
223251
return result
224252

225253

@@ -231,6 +259,7 @@ class AICompletionConfig(AIConfig):
231259
evaluator: 'Evaluator' = field(kw_only=True, repr=False, compare=False, hash=False)
232260
messages: Optional[List[LDMessage]] = None
233261
judge_configuration: Optional[JudgeConfiguration] = None
262+
tools: Optional[Dict[str, 'LDTool']] = None
234263

235264
def to_dict(self) -> dict:
236265
"""
@@ -240,6 +269,8 @@ def to_dict(self) -> dict:
240269
result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None
241270
if self.judge_configuration is not None:
242271
result['judgeConfiguration'] = self.judge_configuration.to_dict()
272+
if self.tools is not None:
273+
result['tools'] = {k: v.to_dict() for k, v in self.tools.items()}
243274
return result
244275

245276

@@ -254,6 +285,7 @@ class AIAgentConfigDefault(AIConfigDefault):
254285
"""
255286
instructions: Optional[str] = None
256287
judge_configuration: Optional[JudgeConfiguration] = None
288+
tools: Optional[Dict[str, 'LDTool']] = None
257289

258290
def to_dict(self) -> Dict[str, Any]:
259291
"""
@@ -264,6 +296,8 @@ def to_dict(self) -> Dict[str, Any]:
264296
result['instructions'] = self.instructions
265297
if self.judge_configuration is not None:
266298
result['judgeConfiguration'] = self.judge_configuration.to_dict()
299+
if self.tools is not None:
300+
result['tools'] = {k: v.to_dict() for k, v in self.tools.items()}
267301
return result
268302

269303

@@ -275,6 +309,7 @@ class AIAgentConfig(AIConfig):
275309
evaluator: 'Evaluator' = field(kw_only=True, repr=False, compare=False, hash=False)
276310
instructions: Optional[str] = None
277311
judge_configuration: Optional[JudgeConfiguration] = None
312+
tools: Optional[Dict[str, 'LDTool']] = None
278313

279314
def to_dict(self) -> Dict[str, Any]:
280315
"""
@@ -285,6 +320,8 @@ def to_dict(self) -> Dict[str, Any]:
285320
result['instructions'] = self.instructions
286321
if self.judge_configuration is not None:
287322
result['judgeConfiguration'] = self.judge_configuration.to_dict()
323+
if self.tools is not None:
324+
result['tools'] = {k: v.to_dict() for k, v in self.tools.items()}
288325
return result
289326

290327

packages/sdk/server-ai/src/ldai/tracker.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ def _track_from_metrics_extractor(
262262

263263
def track_metrics_of(
264264
self,
265-
func: Callable[[], Any],
266265
metrics_extractor: Callable[[Any], Any],
266+
func: Callable[[], Any],
267267
) -> Any:
268268
"""
269269
Track metrics for a synchronous AI operation.
@@ -277,8 +277,8 @@ def track_metrics_of(
277277
278278
For async operations, use :meth:`track_metrics_of_async`.
279279
280-
:param func: Synchronous callable that runs the operation
281280
:param metrics_extractor: Function that extracts LDAIMetrics from the operation result
281+
:param func: Synchronous callable that runs the operation
282282
:return: The result of the operation
283283
"""
284284
start_ns = time.perf_counter_ns()
@@ -294,14 +294,14 @@ def track_metrics_of(
294294
self.track_duration(duration)
295295
return self._track_from_metrics_extractor(result, metrics_extractor)
296296

297-
async def track_metrics_of_async(self, func, metrics_extractor):
297+
async def track_metrics_of_async(self, metrics_extractor, func):
298298
"""
299299
Track metrics for an async AI operation (``func`` is awaited).
300300
301301
Same event semantics as :meth:`track_metrics_of`.
302302
303-
:param func: Async callable or zero-arg callable that returns an awaitable when called
304303
:param metrics_extractor: Function that extracts LDAIMetrics from the operation result
304+
:param func: Async callable or zero-arg callable that returns an awaitable when called
305305
:return: The result of the operation
306306
"""
307307
start_ns = time.perf_counter_ns()
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import pytest
2+
from ldclient import Config, Context, LDClient
3+
from ldclient.integrations.test_data import TestData
4+
5+
from ldai import LDTool, LDAIClient
6+
from ldai.models import AIAgentConfigDefault, AICompletionConfigDefault
7+
8+
9+
@pytest.fixture
10+
def td() -> TestData:
11+
td = TestData.data_source()
12+
td.update(
13+
td.flag('completion-with-tools')
14+
.variations(
15+
{
16+
'model': {'name': 'gpt-5', 'parameters': {'temperature': 0.7}},
17+
'messages': [{'role': 'user', 'content': 'Hello'}],
18+
'tools': {
19+
'web-search-tool': {
20+
'name': 'web-search-tool',
21+
'type': 'function',
22+
'parameters': {'type': 'object', 'properties': {}, 'required': []},
23+
'customParameters': {'some-custom-parameter': 'some-custom-value'},
24+
}
25+
},
26+
'_ldMeta': {'enabled': True, 'variationKey': 'v1', 'version': 1},
27+
},
28+
)
29+
.variation_for_all(0)
30+
)
31+
32+
td.update(
33+
td.flag('completion-no-tools')
34+
.variations(
35+
{
36+
'model': {'name': 'gpt-5'},
37+
'messages': [{'role': 'user', 'content': 'Hello'}],
38+
'_ldMeta': {'enabled': True, 'variationKey': 'v1', 'version': 1},
39+
},
40+
)
41+
.variation_for_all(0)
42+
)
43+
44+
td.update(
45+
td.flag('agent-with-tools')
46+
.variations(
47+
{
48+
'model': {'name': 'gpt-5'},
49+
'instructions': 'You are a helpful agent.',
50+
'tools': {
51+
'search-tool': {
52+
'name': 'search-tool',
53+
'type': 'function',
54+
'customParameters': {'maxResults': 10},
55+
}
56+
},
57+
'_ldMeta': {'enabled': True, 'variationKey': 'v1', 'version': 1, 'mode': 'agent'},
58+
},
59+
)
60+
.variation_for_all(0)
61+
)
62+
63+
return td
64+
65+
66+
@pytest.fixture
67+
def client(td) -> LDAIClient:
68+
config = Config('fake-sdk-key', update_processor_class=td, send_events=False)
69+
ld_client = LDClient(config=config)
70+
return LDAIClient(ld_client)
71+
72+
73+
@pytest.fixture
74+
def context() -> Context:
75+
return Context.builder('test-user').name('Test User').build()
76+
77+
78+
def test_completion_config_includes_tools_from_variation(client, context):
79+
result = client.completion_config('completion-with-tools', context, AICompletionConfigDefault())
80+
81+
assert result.tools is not None
82+
assert 'web-search-tool' in result.tools
83+
tool = result.tools['web-search-tool']
84+
assert tool.name == 'web-search-tool'
85+
assert tool.type == 'function'
86+
assert tool.custom_parameters == {'some-custom-parameter': 'some-custom-value'}
87+
88+
89+
def test_completion_config_tools_none_when_not_in_variation(client, context):
90+
result = client.completion_config('completion-no-tools', context, AICompletionConfigDefault())
91+
92+
assert result.tools is None
93+
94+
95+
def test_completion_config_tools_none_when_variation_has_no_tools(client, context):
96+
default_tool = LDTool(name='default-tool', type='function', custom_parameters={'priority': 'high'})
97+
default = AICompletionConfigDefault(tools={'default-tool': default_tool})
98+
99+
result = client.completion_config('completion-no-tools', context, default)
100+
101+
assert result.tools is None
102+
103+
104+
def test_agent_config_includes_tools_from_variation(client, context):
105+
result = client.agent_config('agent-with-tools', context, AIAgentConfigDefault())
106+
107+
assert result.tools is not None
108+
assert 'search-tool' in result.tools
109+
tool = result.tools['search-tool']
110+
assert tool.name == 'search-tool'
111+
assert tool.custom_parameters == {'maxResults': 10}
112+
113+
114+
def test_aitool_to_dict_serializes_custom_parameters_as_camel_case():
115+
tool = LDTool(
116+
name='my-tool',
117+
type='function',
118+
parameters={'type': 'object'},
119+
custom_parameters={'someKey': 'someValue'},
120+
)
121+
d = tool.to_dict()
122+
123+
assert d['name'] == 'my-tool'
124+
assert d['type'] == 'function'
125+
assert d['parameters'] == {'type': 'object'}
126+
assert 'customParameters' in d
127+
assert d['customParameters'] == {'someKey': 'someValue'}
128+
assert 'custom_parameters' not in d
129+
130+
131+
def test_aitool_to_dict_omits_none_fields():
132+
tool = LDTool(name='bare-tool')
133+
d = tool.to_dict()
134+
135+
assert d == {'name': 'bare-tool'}

0 commit comments

Comments
 (0)