Skip to content

Commit 312161f

Browse files
committed
chore: add retry logic and structural validation for variations
1 parent a387f83 commit 312161f

4 files changed

Lines changed: 196 additions & 104 deletions

File tree

packages/optimization/src/ldai_optimizer/client.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
extract_json_from_response,
6262
interpolate_variables,
6363
restore_variable_placeholders,
64+
validate_variation_response,
6465
)
6566

6667
logger = logging.getLogger(__name__)
@@ -1179,24 +1180,18 @@ def _apply_new_variation_response(
11791180
:param iteration: Current iteration number for logging
11801181
:return: A new OptimizationContext populated with the updated configuration
11811182
"""
1182-
missing_fields = []
1183-
if "current_instructions" not in response_data:
1184-
missing_fields.append("current_instructions")
1185-
if "current_parameters" not in response_data:
1186-
missing_fields.append("current_parameters")
1187-
if "model" not in response_data:
1188-
missing_fields.append("model")
1189-
1190-
if missing_fields:
1183+
validation_errors = validate_variation_response(response_data)
1184+
if validation_errors:
11911185
logger.debug(
1192-
"[Iteration %d] -> Response missing required fields: %s. Received fields: %s. Full response_data: %s",
1186+
"[Iteration %d] -> Variation response failed validation: %s. "
1187+
"Received fields: %s. Full response_data: %s",
11931188
iteration,
1194-
", ".join(missing_fields),
1189+
"; ".join(validation_errors),
11951190
list(response_data.keys()),
11961191
json.dumps(response_data, indent=2),
11971192
)
11981193
raise ValueError(
1199-
f"Response missing required fields: {', '.join(missing_fields)}. "
1194+
f"Variation response failed validation: {'; '.join(validation_errors)}. "
12001195
f"Received fields: {list(response_data.keys())}"
12011196
)
12021197

packages/optimization/src/ldai_optimizer/ld_api_client.py

Lines changed: 77 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import logging
5+
import time
56
import urllib.error
67
import urllib.request
78
from typing import Any, Dict, List, Optional, TypedDict
@@ -13,6 +14,14 @@
1314

1415
_BASE_URL = "https://app.launchdarkly.com"
1516

17+
_MAX_RETRIES = 3
18+
_INITIAL_BACKOFF = 1.0 # seconds; doubles on each attempt (1s, 2s, 4s)
19+
20+
# Status codes that warrant a retry. Everything else (including 400, 401, 403,
21+
# 404) is a permanent or auth failure — retrying would not help and could lead
22+
# to corrupted optimization results if some requests succeed and others fail.
23+
_RETRYABLE_STATUS_CODES = frozenset({429, 500, 502, 503, 504})
24+
1625

1726
class LDApiError(Exception):
1827
"""Raised when the LaunchDarkly REST API returns an error or is unreachable.
@@ -186,35 +195,74 @@ def __repr__(self) -> str:
186195
def _auth_headers(self) -> Dict[str, str]:
187196
return {"Authorization": self._api_key}
188197

189-
def _request(self, method: str, path: str, body: Any = None) -> Any:
198+
def _request(
199+
self,
200+
method: str,
201+
path: str,
202+
body: Any = None,
203+
extra_headers: Optional[Dict[str, str]] = None,
204+
) -> Any:
205+
"""Execute an HTTP request with automatic retry and exponential backoff.
206+
207+
Retries up to ``_MAX_RETRIES`` times for transient errors (429, 5xx,
208+
network failures) with exponential backoff starting at ``_INITIAL_BACKOFF``
209+
seconds. Non-retryable status codes (400, 401, 403, 404, …) are raised
210+
immediately without retrying.
211+
212+
:param method: HTTP method (GET, POST, PATCH, …).
213+
:param path: API path, appended to ``self._base_url``.
214+
:param body: Optional request body; serialised to JSON.
215+
:param extra_headers: Additional headers merged with the auth header.
216+
:raises LDApiError: After all retry attempts are exhausted, or immediately
217+
for non-retryable status codes.
218+
"""
190219
url = f"{self._base_url}{path}"
220+
headers = {**self._auth_headers(), **(extra_headers or {})}
191221
data = json.dumps(body).encode() if body is not None else None
192-
headers = self._auth_headers()
193222
if data is not None:
194223
headers["Content-Type"] = "application/json"
195-
req = urllib.request.Request(url, data=data, headers=headers, method=method)
196-
try:
197-
with urllib.request.urlopen(req) as resp:
198-
raw = resp.read()
199-
return json.loads(raw) if raw else None
200-
except urllib.error.HTTPError as exc:
201-
body_excerpt = exc.read(500).decode(errors="replace")
202-
hint = _HTTP_ERROR_HINTS.get(exc.code, "")
203-
detail = f"{hint} (API response: {body_excerpt})" if hint else f"API response: {body_excerpt}"
204-
raise LDApiError(
205-
f"LaunchDarkly API error {exc.code} {exc.msg} for {method} {path}. {detail}",
206-
status_code=exc.code,
207-
path=path,
208-
) from exc
209-
except urllib.error.URLError as exc:
210-
raise LDApiError(
211-
f"Could not reach LaunchDarkly API at {url}: {exc.reason}. "
212-
"Check your network connection and the base_url setting.",
213-
path=path,
214-
) from exc
215-
216-
def _ai_config_headers(self) -> Dict[str, str]:
217-
return {**self._auth_headers(), "LD-API-Version": "beta"}
224+
225+
last_exc: Optional[LDApiError] = None
226+
for attempt in range(_MAX_RETRIES + 1):
227+
req = urllib.request.Request(url, data=data, headers=headers, method=method)
228+
try:
229+
with urllib.request.urlopen(req) as resp:
230+
raw = resp.read()
231+
return json.loads(raw) if raw else None
232+
except urllib.error.HTTPError as exc:
233+
body_excerpt = exc.read(500).decode(errors="replace")
234+
hint = _HTTP_ERROR_HINTS.get(exc.code, "")
235+
detail = f"{hint} (API response: {body_excerpt})" if hint else f"API response: {body_excerpt}"
236+
api_error = LDApiError(
237+
f"LaunchDarkly API error {exc.code} {exc.msg} for {method} {path}. {detail}",
238+
status_code=exc.code,
239+
path=path,
240+
)
241+
if exc.code not in _RETRYABLE_STATUS_CODES:
242+
raise api_error from exc
243+
last_exc = api_error
244+
except urllib.error.URLError as exc:
245+
last_exc = LDApiError(
246+
f"Could not reach LaunchDarkly API at {url}: {exc.reason}. "
247+
"Check your network connection and the base_url setting.",
248+
path=path,
249+
)
250+
251+
if attempt < _MAX_RETRIES:
252+
delay = _INITIAL_BACKOFF * (2 ** attempt)
253+
logger.warning(
254+
"LaunchDarkly API request failed (attempt %d/%d, path=%s), "
255+
"retrying in %.1fs: %s",
256+
attempt + 1,
257+
_MAX_RETRIES + 1,
258+
path,
259+
delay,
260+
last_exc,
261+
)
262+
time.sleep(delay)
263+
264+
assert last_exc is not None
265+
raise last_exc
218266

219267
def get_model_configs(self, project_key: str) -> List[Dict[str, Any]]:
220268
"""Fetch all AI model configs for a project.
@@ -224,26 +272,8 @@ def get_model_configs(self, project_key: str) -> List[Dict[str, Any]]:
224272
:raises LDApiError: On non-200 HTTP responses or network errors.
225273
"""
226274
path = f"/api/v2/projects/{project_key}/ai-configs/model-configs"
227-
url = f"{self._base_url}{path}"
228-
req = urllib.request.Request(url, headers=self._ai_config_headers(), method="GET")
229-
try:
230-
with urllib.request.urlopen(req) as resp:
231-
raw = resp.read()
232-
return json.loads(raw) if raw else []
233-
except urllib.error.HTTPError as exc:
234-
body_excerpt = exc.read(500).decode(errors="replace")
235-
hint = _HTTP_ERROR_HINTS.get(exc.code, "")
236-
detail = f"{hint} (API response: {body_excerpt})" if hint else f"API response: {body_excerpt}"
237-
raise LDApiError(
238-
f"LaunchDarkly API error {exc.code} {exc.msg} for GET {path}. {detail}",
239-
status_code=exc.code,
240-
path=path,
241-
) from exc
242-
except urllib.error.URLError as exc:
243-
raise LDApiError(
244-
f"Could not reach LaunchDarkly API at {url}: {exc.reason}.",
245-
path=path,
246-
) from exc
275+
result = self._request("GET", path, extra_headers={"LD-API-Version": "beta"})
276+
return result if isinstance(result, list) else []
247277

248278
def get_ai_config(self, project_key: str, config_key: str) -> Any:
249279
"""Fetch a single AI Config by key, including its variations.
@@ -254,27 +284,7 @@ def get_ai_config(self, project_key: str, config_key: str) -> Any:
254284
:raises LDApiError: On non-200 HTTP responses or network errors.
255285
"""
256286
path = f"/api/v2/projects/{project_key}/ai-configs/{config_key}"
257-
headers = self._ai_config_headers()
258-
url = f"{self._base_url}{path}"
259-
req = urllib.request.Request(url, headers=headers, method="GET")
260-
try:
261-
with urllib.request.urlopen(req) as resp:
262-
raw = resp.read()
263-
return json.loads(raw) if raw else None
264-
except urllib.error.HTTPError as exc:
265-
body_excerpt = exc.read(500).decode(errors="replace")
266-
hint = _HTTP_ERROR_HINTS.get(exc.code, "")
267-
detail = f"{hint} (API response: {body_excerpt})" if hint else f"API response: {body_excerpt}"
268-
raise LDApiError(
269-
f"LaunchDarkly API error {exc.code} {exc.msg} for GET {path}. {detail}",
270-
status_code=exc.code,
271-
path=path,
272-
) from exc
273-
except urllib.error.URLError as exc:
274-
raise LDApiError(
275-
f"Could not reach LaunchDarkly API at {url}: {exc.reason}.",
276-
path=path,
277-
) from exc
287+
return self._request("GET", path, extra_headers={"LD-API-Version": "beta"})
278288

279289
def create_ai_config_variation(
280290
self, project_key: str, config_key: str, payload: Dict[str, Any]
@@ -288,28 +298,7 @@ def create_ai_config_variation(
288298
:raises LDApiError: On non-200 HTTP responses or network errors.
289299
"""
290300
path = f"/api/v2/projects/{project_key}/ai-configs/{config_key}/variations"
291-
url = f"{self._base_url}{path}"
292-
data = json.dumps(payload).encode()
293-
headers = {**self._ai_config_headers(), "Content-Type": "application/json"}
294-
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
295-
try:
296-
with urllib.request.urlopen(req) as resp:
297-
raw = resp.read()
298-
return json.loads(raw) if raw else None
299-
except urllib.error.HTTPError as exc:
300-
body_excerpt = exc.read(500).decode(errors="replace")
301-
hint = _HTTP_ERROR_HINTS.get(exc.code, "")
302-
detail = f"{hint} (API response: {body_excerpt})" if hint else f"API response: {body_excerpt}"
303-
raise LDApiError(
304-
f"LaunchDarkly API error {exc.code} {exc.msg} for POST {path}. {detail}",
305-
status_code=exc.code,
306-
path=path,
307-
) from exc
308-
except urllib.error.URLError as exc:
309-
raise LDApiError(
310-
f"Could not reach LaunchDarkly API at {url}: {exc.reason}.",
311-
path=path,
312-
) from exc
301+
return self._request("POST", path, body=payload, extra_headers={"LD-API-Version": "beta"})
313302

314303
def get_agent_optimization(
315304
self, project_key: str, optimization_key: str

packages/optimization/src/ldai_optimizer/util.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,47 @@ def create_variation_tool(model_choices: List[str]) -> ToolDefinition:
290290
)
291291

292292

293+
def validate_variation_response(response_data: Dict[str, Any]) -> List[str]:
294+
"""Validate the shape of a parsed LLM variation response.
295+
296+
Checks that the three required fields are present and have the expected
297+
types. An empty ``current_parameters`` dict is acceptable; an empty
298+
``current_instructions`` or ``model`` string is flagged as an error
299+
because downstream code cannot meaningfully use a blank value.
300+
301+
:param response_data: Parsed dict from the LLM (output of extract_json_from_response).
302+
:return: List of human-readable error strings. Empty list means the response is valid.
303+
"""
304+
errors: List[str] = []
305+
306+
if "current_instructions" not in response_data:
307+
errors.append("missing required field 'current_instructions'")
308+
elif not isinstance(response_data["current_instructions"], str):
309+
errors.append(
310+
f"'current_instructions' must be a string, "
311+
f"got {type(response_data['current_instructions']).__name__}"
312+
)
313+
elif not response_data["current_instructions"].strip():
314+
errors.append("'current_instructions' must not be empty")
315+
316+
if "current_parameters" not in response_data:
317+
errors.append("missing required field 'current_parameters'")
318+
elif not isinstance(response_data["current_parameters"], dict):
319+
errors.append(
320+
f"'current_parameters' must be a dict, "
321+
f"got {type(response_data['current_parameters']).__name__}"
322+
)
323+
324+
if "model" not in response_data:
325+
errors.append("missing required field 'model'")
326+
elif not isinstance(response_data["model"], str):
327+
errors.append(
328+
f"'model' must be a string, got {type(response_data['model']).__name__}"
329+
)
330+
331+
return errors
332+
333+
293334
def extract_json_from_response(response_str: str) -> Dict[str, Any]:
294335
"""
295336
Parse a JSON object from an LLM response string.

packages/optimization/tests/test_ld_api_client.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import urllib.request
66
from io import BytesIO
77
from typing import Any, Dict
8-
from unittest.mock import MagicMock, patch
8+
from unittest.mock import MagicMock, call, patch
99

1010
import pytest
1111

@@ -14,6 +14,7 @@
1414
AgentOptimizationResultPost as OptimizationResultPayload,
1515
LDApiClient,
1616
LDApiError,
17+
_MAX_RETRIES,
1718
_parse_agent_optimization,
1819
)
1920

@@ -295,10 +296,76 @@ def test_swallows_http_errors_without_raising(self):
295296
url="http://x", code=500, msg="Server Error", hdrs=MagicMock(), fp=BytesIO(b"err")
296297
)
297298
with patch("urllib.request.urlopen", side_effect=http_error):
298-
# must not raise
299-
client.post_agent_optimization_result("proj", "opt-id", self._make_payload())
299+
with patch("time.sleep"):
300+
# must not raise even after all retries are exhausted
301+
client.post_agent_optimization_result("proj", "opt-id", self._make_payload())
300302

301303
def test_swallows_url_errors_without_raising(self):
302304
client = LDApiClient("test-key")
303305
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("timeout")):
304-
client.post_agent_optimization_result("proj", "opt-id", self._make_payload())
306+
with patch("time.sleep"):
307+
client.post_agent_optimization_result("proj", "opt-id", self._make_payload())
308+
309+
310+
# ---------------------------------------------------------------------------
311+
# LDApiClient retry behaviour
312+
# ---------------------------------------------------------------------------
313+
314+
315+
class TestLDApiClientRetry:
316+
def _http_error(self, code: int) -> urllib.error.HTTPError:
317+
return urllib.error.HTTPError(
318+
url="http://x", code=code, msg="Error", hdrs=MagicMock(), fp=BytesIO(b"body")
319+
)
320+
321+
def test_retryable_error_retries_max_times(self):
322+
"""A 429 or 5xx should be retried up to _MAX_RETRIES times then raise."""
323+
client = LDApiClient("test-key")
324+
with patch("urllib.request.urlopen", side_effect=self._http_error(429)) as mock_open:
325+
with patch("time.sleep"):
326+
with pytest.raises(LDApiError) as exc_info:
327+
client._request("GET", "/path")
328+
assert mock_open.call_count == _MAX_RETRIES + 1
329+
assert exc_info.value.status_code == 429
330+
331+
def test_non_retryable_error_raises_immediately(self):
332+
"""A 401, 403, or 404 should raise after a single attempt with no retries."""
333+
for code in (400, 401, 403, 404):
334+
client = LDApiClient("test-key")
335+
with patch("urllib.request.urlopen", side_effect=self._http_error(code)) as mock_open:
336+
with patch("time.sleep") as mock_sleep:
337+
with pytest.raises(LDApiError) as exc_info:
338+
client._request("GET", "/path")
339+
assert mock_open.call_count == 1, f"Expected 1 attempt for {code}, got {mock_open.call_count}"
340+
mock_sleep.assert_not_called()
341+
assert exc_info.value.status_code == code
342+
343+
def test_url_error_retries_max_times(self):
344+
"""Network-level errors should also be retried."""
345+
client = LDApiClient("test-key")
346+
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("timeout")) as mock_open:
347+
with patch("time.sleep"):
348+
with pytest.raises(LDApiError):
349+
client._request("GET", "/path")
350+
assert mock_open.call_count == _MAX_RETRIES + 1
351+
352+
def test_backoff_delays_are_exponential(self):
353+
"""Sleep durations should double on each retry: 1s, 2s, 4s."""
354+
client = LDApiClient("test-key")
355+
with patch("urllib.request.urlopen", side_effect=self._http_error(500)):
356+
with patch("time.sleep") as mock_sleep:
357+
with pytest.raises(LDApiError):
358+
client._request("GET", "/path")
359+
sleep_calls = [c.args[0] for c in mock_sleep.call_args_list]
360+
assert sleep_calls == [1.0, 2.0, 4.0]
361+
362+
def test_succeeds_on_retry_after_transient_error(self):
363+
"""If a retryable error clears, the successful response should be returned."""
364+
client = LDApiClient("test-key")
365+
ok_response = _mock_urlopen({"result": "ok"})
366+
side_effects = [self._http_error(500), ok_response]
367+
with patch("urllib.request.urlopen", side_effect=side_effects) as mock_open:
368+
with patch("time.sleep"):
369+
result = client._request("GET", "/path")
370+
assert result == {"result": "ok"}
371+
assert mock_open.call_count == 2

0 commit comments

Comments
 (0)