Skip to content

Commit 92f51fa

Browse files
fix: better handling of params and custom params for optimization (#163)
**Requirements** - [x] I have added test coverage for new or changed functionality - [x] I have followed the repository's [pull request submission guidelines](../blob/main/CONTRIBUTING.md#submitting-pull-requests) - [x] I have validated my changes against all supported platform versions **Describe the solution you've provided** Implements better handling for params on the initial variation (folds model changes in as overwrites rather than completely replacing) and ensures custom params are persisted unchanged. Additionally makes sure that tools cannot be changed by the optimization process. **Describe alternatives you've considered** This is the result of a bug report so there weren't really alternatives considered. **Additional context** Initially it was assumed that the optimization process would properly pull params forward (via the LLM) but this doesn't seem to always be the case. In the case of custom params, they aren't fed into the LLM calls since they're user-specified data (not specific to the actual optimization result). We now just pull these through as-is. In the case of tools, the model will be able to optimize the prompt to call a specific tool if multiple are provided, but we don't want to strip any tool information from the final result as it may be necessary for the calls to function. <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Moderate risk because it changes how model parameters and `tools` are carried forward across optimization iterations and what gets auto-committed, which can affect runtime agent behavior if merging/restoration logic is wrong. > > **Overview** > Improves variation-application logic so LLM-generated `current_parameters` are **merged** into existing parameters instead of replacing them, preserving user-specified/custom settings (e.g. `max_tokens`, `response_format`) when the LLM omits them. > > Prevents tool drift by always restoring the original `tools` list (and logging when the LLM returns a different one) to avoid silently dropping user tools or leaking internal framework tools. > > Captures `model.custom` from the initial LaunchDarkly variation and includes it when auto-committing a winning variation; adds focused test coverage for parameter persistence, tool restoration/warnings, and `model.custom` propagation. > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit 572a2aa. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY -->
2 parents 23baeb4 + 572a2aa commit 92f51fa

2 files changed

Lines changed: 262 additions & 1 deletion

File tree

packages/optimization/src/ldai_optimizer/client.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def __init__(self, ldClient: LDAIClient) -> None:
159159
self._last_succeeded_context: Optional[OptimizationContext] = None
160160
self._last_optimization_result_id: Optional[str] = None
161161
self._initial_tool_keys: List[str] = []
162+
self._initial_model_custom: Optional[Dict[str, Any]] = None
162163
self._total_token_usage: int = 0
163164

164165
if os.environ.get("LAUNCHDARKLY_API_KEY"):
@@ -864,6 +865,11 @@ async def _get_agent_config(
864865
if isinstance(t, dict) and "key" in t
865866
]
866867

868+
raw_model = raw_variation.get("model")
869+
self._initial_model_custom = (
870+
raw_model.get("custom") if isinstance(raw_model, dict) else None
871+
)
872+
867873
agent_config = dataclasses.replace(
868874
agent_config, instructions=raw_instructions
869875
)
@@ -1234,7 +1240,32 @@ def _apply_new_variation_response(
12341240
for msg in placeholder_warnings:
12351241
logger.warning("[Iteration %d] -> %s", iteration, msg)
12361242

1237-
self._current_parameters = response_data["current_parameters"]
1243+
# Merge the LLM's returned parameters into the existing ones so that custom
1244+
# parameters (e.g. response_format, max_tokens, structured-output config)
1245+
# are preserved even when the LLM omits them from its response.
1246+
original_params = self._current_parameters.copy()
1247+
new_params = response_data["current_parameters"]
1248+
merged_params = {**original_params, **new_params}
1249+
1250+
# Tools must be returned "unchanged" per the variation prompt. Always restore
1251+
# the original tools so that (a) user-defined tools are never silently dropped
1252+
# and (b) internal framework tools (e.g. structured-output tool injected by
1253+
# the agent SDK) cannot leak in from the LLM's response.
1254+
original_tools = original_params.get("tools")
1255+
if original_tools is not None:
1256+
returned_tools = new_params.get("tools")
1257+
if returned_tools is not None and returned_tools != original_tools:
1258+
logger.warning(
1259+
"[Iteration %d] -> LLM returned a modified tools list; restoring "
1260+
"original tools to prevent tool drift or internal-tool leakage. "
1261+
"Original: %s Returned: %s",
1262+
iteration,
1263+
[t.get("name") if isinstance(t, dict) else getattr(t, "name", t) for t in original_tools],
1264+
[t.get("name") if isinstance(t, dict) else getattr(t, "name", t) for t in returned_tools],
1265+
)
1266+
merged_params["tools"] = original_tools
1267+
1268+
self._current_parameters = merged_params
12381269

12391270
# Update model — it should always be provided since it's required in the schema
12401271
model_value = (
@@ -2024,6 +2055,8 @@ def _commit_variation(
20242055
}
20252056
if self._initial_tool_keys:
20262057
payload["toolKeys"] = list(self._initial_tool_keys)
2058+
if self._initial_model_custom:
2059+
payload["model"] = {"custom": self._initial_model_custom}
20272060

20282061
last_exc: Optional[Exception] = None
20292062
for attempt in range(1, 4):

packages/optimization/tests/test_client.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,162 @@ async def test_raises_after_max_retries_exhausted(self):
950950
assert self.handle_agent_call.call_count == 3
951951

952952

953+
# ---------------------------------------------------------------------------
954+
# Parameter persistence across variation generation
955+
# ---------------------------------------------------------------------------
956+
957+
958+
class TestParameterPersistence:
959+
"""Ensure custom parameters are preserved when the LLM generates a new variation."""
960+
961+
def setup_method(self):
962+
self.client = _make_client()
963+
agent_config = _make_agent_config()
964+
self.client._agent_key = "test-agent"
965+
self.client._agent_config = agent_config
966+
self.client._initial_instructions = AGENT_INSTRUCTIONS
967+
self.client._initialize_class_members_from_config(agent_config)
968+
969+
def _set_params(self, params: Dict[str, Any]) -> None:
970+
self.client._current_parameters = params
971+
972+
def _run_variation(self, returned_params: Dict[str, Any]) -> None:
973+
"""Helper: simulate _apply_new_variation_response with a given returned params dict."""
974+
variation_ctx = OptimizationContext(
975+
scores={},
976+
completion_response="",
977+
current_instructions=AGENT_INSTRUCTIONS,
978+
current_parameters={"temperature": 0.1},
979+
current_variables={},
980+
current_model="gpt-4o",
981+
user_input=None,
982+
iteration=1,
983+
)
984+
response_data = {
985+
"current_instructions": "Improved instructions.",
986+
"current_parameters": returned_params,
987+
"model": "gpt-4o",
988+
}
989+
self.client._options = _make_options()
990+
self.client._apply_new_variation_response(response_data, variation_ctx, json.dumps(response_data), 1)
991+
992+
async def test_custom_param_preserved_when_llm_omits_it(self):
993+
"""Parameters not in LLM response should be preserved from the original config."""
994+
self.client._options = _make_options()
995+
self.client._current_parameters = {"temperature": 0.7, "max_tokens": 512, "seed": 42}
996+
self._run_variation({"temperature": 0.5})
997+
assert self.client._current_parameters["max_tokens"] == 512
998+
assert self.client._current_parameters["seed"] == 42
999+
assert self.client._current_parameters["temperature"] == 0.5
1000+
1001+
async def test_response_format_preserved_when_llm_omits_it(self):
1002+
"""response_format (structured output config) is preserved even if LLM returns only temperature."""
1003+
self.client._options = _make_options()
1004+
self.client._current_parameters = {
1005+
"temperature": 0.7,
1006+
"response_format": {"type": "json_schema", "json_schema": {"name": "output"}},
1007+
}
1008+
self._run_variation({"temperature": 0.5})
1009+
assert self.client._current_parameters["response_format"] == {
1010+
"type": "json_schema",
1011+
"json_schema": {"name": "output"},
1012+
}
1013+
1014+
async def test_empty_returned_params_preserves_all_original_params(self):
1015+
"""If LLM returns {}, all original parameters survive."""
1016+
self.client._options = _make_options()
1017+
self.client._current_parameters = {"temperature": 0.7, "max_tokens": 256}
1018+
self._run_variation({})
1019+
assert self.client._current_parameters["temperature"] == 0.7
1020+
assert self.client._current_parameters["max_tokens"] == 256
1021+
1022+
async def test_llm_explicit_param_override_is_applied(self):
1023+
"""If the LLM explicitly returns a parameter, the new value is used."""
1024+
self.client._options = _make_options()
1025+
self.client._current_parameters = {"temperature": 0.7, "max_tokens": 256}
1026+
self._run_variation({"temperature": 0.3, "max_tokens": 128})
1027+
assert self.client._current_parameters["temperature"] == 0.3
1028+
assert self.client._current_parameters["max_tokens"] == 128
1029+
1030+
async def test_original_tools_always_restored(self):
1031+
"""Tools from the original config are always restored regardless of LLM response."""
1032+
original_tool = {"name": "my-tool", "type": "function", "description": "desc", "parameters": {}}
1033+
self.client._options = _make_options()
1034+
self.client._current_parameters = {"temperature": 0.7, "tools": [original_tool]}
1035+
self._run_variation({"temperature": 0.5, "tools": []})
1036+
assert self.client._current_parameters["tools"] == [original_tool]
1037+
1038+
async def test_internal_tool_leakage_is_blocked(self):
1039+
"""If LLM returns tools including an internal framework tool, original tools are restored."""
1040+
original_tool = {"name": "user-lookup", "type": "function", "description": "Looks up users", "parameters": {}}
1041+
internal_tool = {"name": "FinalAnswer", "type": "function", "description": "internal", "parameters": {}}
1042+
self.client._options = _make_options()
1043+
self.client._current_parameters = {"temperature": 0.7, "tools": [original_tool]}
1044+
self._run_variation({"temperature": 0.5, "tools": [original_tool, internal_tool]})
1045+
result_tools = self.client._current_parameters["tools"]
1046+
assert result_tools == [original_tool]
1047+
assert not any(t.get("name") == "FinalAnswer" for t in result_tools)
1048+
1049+
async def test_internal_tool_leakage_logs_warning(self):
1050+
"""Tool mismatch should emit a warning."""
1051+
original_tool = {"name": "my-tool", "type": "function", "description": "d", "parameters": {}}
1052+
internal_tool = {"name": "structured_output_tool", "type": "function", "description": "internal", "parameters": {}}
1053+
self.client._options = _make_options()
1054+
self.client._current_parameters = {"temperature": 0.7, "tools": [original_tool]}
1055+
with patch("ldai_optimizer.client.logger") as mock_logger:
1056+
self._run_variation({"temperature": 0.5, "tools": [internal_tool]})
1057+
warning_calls = [c for c in mock_logger.warning.call_args_list if "tool" in str(c).lower()]
1058+
assert len(warning_calls) >= 1
1059+
1060+
async def test_no_original_tools_allows_llm_returned_tools(self):
1061+
"""When the original config had no tools, the LLM is free to return tools."""
1062+
new_tool = {"name": "new-tool", "type": "function", "description": "desc", "parameters": {}}
1063+
self.client._options = _make_options()
1064+
self.client._current_parameters = {"temperature": 0.7}
1065+
self._run_variation({"temperature": 0.5, "tools": [new_tool]})
1066+
assert self.client._current_parameters.get("tools") == [new_tool]
1067+
1068+
async def test_params_preserved_across_full_optimization_loop(self):
1069+
"""End-to-end: custom params survive through a full failed-then-succeeded optimization."""
1070+
custom_params_response = json.dumps({
1071+
"current_instructions": "Improved.",
1072+
"current_parameters": {"temperature": 0.3}, # omits max_tokens and response_format
1073+
"model": "gpt-4o",
1074+
})
1075+
agent_config_with_params = _make_agent_config(
1076+
parameters={"temperature": 0.7, "max_tokens": 512, "response_format": {"type": "json_object"}},
1077+
)
1078+
mock_ldai = _make_ldai_client(agent_config=agent_config_with_params)
1079+
mock_ldai._client.variation.return_value = {
1080+
"instructions": AGENT_INSTRUCTIONS,
1081+
}
1082+
agent_responses = [
1083+
OptimizationResponse(output="Bad answer."), # iteration 1: agent
1084+
OptimizationResponse(output=custom_params_response), # iteration 1: variation
1085+
OptimizationResponse(output="Good answer."), # iteration 2: agent
1086+
OptimizationResponse(output="Good answer."), # iteration 2: validation
1087+
]
1088+
handle_agent_call = AsyncMock(side_effect=agent_responses)
1089+
judge_responses = [
1090+
OptimizationResponse(output=JUDGE_FAIL_RESPONSE),
1091+
OptimizationResponse(output=JUDGE_PASS_RESPONSE),
1092+
OptimizationResponse(output=JUDGE_PASS_RESPONSE),
1093+
]
1094+
handle_judge_call = AsyncMock(side_effect=judge_responses)
1095+
client = _make_client(mock_ldai)
1096+
options = _make_options(
1097+
handle_agent_call=handle_agent_call,
1098+
handle_judge_call=handle_judge_call,
1099+
max_attempts=3,
1100+
)
1101+
result = await client.optimize_from_options("test-agent", options)
1102+
assert result.scores["accuracy"].score == 1.0
1103+
# After variation, max_tokens and response_format should still be present
1104+
assert client._current_parameters.get("max_tokens") == 512
1105+
assert client._current_parameters.get("response_format") == {"type": "json_object"}
1106+
assert client._current_parameters.get("temperature") == 0.3 # LLM's update applied
1107+
1108+
9531109
# ---------------------------------------------------------------------------
9541110
# Full optimization loop
9551111
# ---------------------------------------------------------------------------
@@ -4052,6 +4208,48 @@ def test_toolkeys_not_in_payload_when_no_tools(self):
40524208
payload = api_client.create_ai_config_variation.call_args[0][2]
40534209
assert "toolKeys" not in payload
40544210

4211+
# --- model.custom propagation ---
4212+
4213+
def test_model_custom_included_in_payload_when_set(self):
4214+
client = self._make_client()
4215+
client._initial_model_custom = {"myApp": {"debug": True, "region": "us-east-1"}}
4216+
api_client = _make_api_client_for_commit()
4217+
4218+
client._commit_variation(
4219+
_make_winning_context(), project_key="my-project",
4220+
ai_config_key="my-agent", output_key="k", api_client=api_client,
4221+
)
4222+
4223+
payload = api_client.create_ai_config_variation.call_args[0][2]
4224+
assert payload["model"] == {"custom": {"myApp": {"debug": True, "region": "us-east-1"}}}
4225+
4226+
def test_model_not_in_payload_when_model_custom_is_none(self):
4227+
client = self._make_client()
4228+
client._initial_model_custom = None
4229+
api_client = _make_api_client_for_commit()
4230+
4231+
client._commit_variation(
4232+
_make_winning_context(), project_key="my-project",
4233+
ai_config_key="my-agent", output_key="k", api_client=api_client,
4234+
)
4235+
4236+
payload = api_client.create_ai_config_variation.call_args[0][2]
4237+
assert "model" not in payload
4238+
4239+
def test_model_not_in_payload_when_model_custom_is_empty_dict(self):
4240+
"""An empty custom dict is falsy — treated the same as absent."""
4241+
client = self._make_client()
4242+
client._initial_model_custom = {}
4243+
api_client = _make_api_client_for_commit()
4244+
4245+
client._commit_variation(
4246+
_make_winning_context(), project_key="my-project",
4247+
ai_config_key="my-agent", output_key="k", api_client=api_client,
4248+
)
4249+
4250+
payload = api_client.create_ai_config_variation.call_args[0][2]
4251+
assert "model" not in payload
4252+
40554253

40564254
# ---------------------------------------------------------------------------
40574255
# Tool key extraction from raw variation (_get_agent_config)
@@ -4101,6 +4299,36 @@ async def test_skips_tool_entries_without_key(self):
41014299
await client._get_agent_config("test-agent", LD_CONTEXT)
41024300
assert client._initial_tool_keys == ["good-tool"]
41034301

4302+
async def test_extracts_model_custom_from_raw_variation(self):
4303+
raw = {
4304+
"instructions": AGENT_INSTRUCTIONS,
4305+
"model": {"modelName": "gpt-4o", "custom": {"myApp": {"debug": True}}},
4306+
}
4307+
client = self._make_client_with_variation(raw)
4308+
await client._get_agent_config("test-agent", LD_CONTEXT)
4309+
assert client._initial_model_custom == {"myApp": {"debug": True}}
4310+
4311+
async def test_model_custom_is_none_when_variation_has_no_model(self):
4312+
raw = {"instructions": AGENT_INSTRUCTIONS}
4313+
client = self._make_client_with_variation(raw)
4314+
await client._get_agent_config("test-agent", LD_CONTEXT)
4315+
assert client._initial_model_custom is None
4316+
4317+
async def test_model_custom_is_none_when_model_has_no_custom_key(self):
4318+
raw = {
4319+
"instructions": AGENT_INSTRUCTIONS,
4320+
"model": {"modelName": "gpt-4o", "parameters": {"temperature": 0.7}},
4321+
}
4322+
client = self._make_client_with_variation(raw)
4323+
await client._get_agent_config("test-agent", LD_CONTEXT)
4324+
assert client._initial_model_custom is None
4325+
4326+
async def test_model_custom_is_none_when_model_is_not_a_dict(self):
4327+
raw = {"instructions": AGENT_INSTRUCTIONS, "model": "gpt-4o"}
4328+
client = self._make_client_with_variation(raw)
4329+
await client._get_agent_config("test-agent", LD_CONTEXT)
4330+
assert client._initial_model_custom is None
4331+
41044332

41054333
# ---------------------------------------------------------------------------
41064334
# auto_commit in optimize_from_options

0 commit comments

Comments
 (0)