Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

All notable changes to `uipath_llm_client` (core package) will be documented in this file.

## [1.0.11] - 2026-02-05

### Feature
- Added retry handler on 429 to include the retry-after header

## [1.0.10] - 2026-02-04

### Type fix
Expand Down Expand Up @@ -38,7 +43,7 @@ All notable changes to `uipath_llm_client` (core package) will be documented in
## [1.0.3] - 2026-02-02

### Refactor
- moved the logic of get_httpx_client_kwargs from the uipath package to this package;
- moved the logic of get_httpx_ssl_client_kwargs from the uipath package to this package;

## [1.0.2] - 2026-02-02

Expand Down
2 changes: 1 addition & 1 deletion src/uipath_llm_client/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__titile__ = "UiPath LLM Client"
__description__ = "A Python client for interacting with UiPath's LLM services."
__version__ = "1.0.10"
__version__ = "1.0.11"
8 changes: 4 additions & 4 deletions src/uipath_llm_client/httpx_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
RetryableHTTPTransport,
RetryConfig,
)
from uipath_llm_client.utils.ssl_config import get_httpx_client_kwargs
from uipath_llm_client.utils.ssl_config import get_httpx_ssl_client_kwargs


def build_routing_headers(
Expand Down Expand Up @@ -99,7 +99,7 @@ class UiPathHttpxClient(Client):

_streaming_header: str = "X-UiPath-Streaming-Enabled"
_default_headers: Mapping[str, str] = {
"X-UiPath-LLMGateway-TimeoutSeconds": "30", # server side timeout, default is 10, maximum is 300
"X-UiPath-LLMGateway-TimeoutSeconds": "300", # server side timeout, default is 10, maximum is 300
"X-UiPath-LLMGateway-AllowFull4xxResponse": "true", # allow full 4xx responses (default is false)
}

Expand Down Expand Up @@ -173,7 +173,7 @@ def __init__(
event_hooks["response"].append(logging_config.log_error)

# setup ssl context
kwargs.update(get_httpx_client_kwargs())
kwargs.update(get_httpx_ssl_client_kwargs())

super().__init__(
headers=merged_headers, transport=transport, event_hooks=event_hooks, **kwargs
Expand Down Expand Up @@ -293,7 +293,7 @@ def __init__(
event_hooks["response"].append(logging_config.alog_error)

# setup ssl context
kwargs.update(get_httpx_client_kwargs())
kwargs.update(get_httpx_ssl_client_kwargs())

super().__init__(
headers=merged_headers, transport=transport, event_hooks=event_hooks, **kwargs
Expand Down
65 changes: 64 additions & 1 deletion src/uipath_llm_client/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,73 @@ class UiPathUnprocessableEntityError(UiPathAPIError):


class UiPathRateLimitError(UiPathAPIError):
"""HTTP 429 Too Many Requests error."""
"""HTTP 429 Too Many Requests error.

Attributes:
retry_after: Seconds to wait before retrying (from Retry-After header), or None.
"""

status_code: int = 429

def __init__(
self,
message: str,
*,
request: Request,
response: Response,
body: str | dict | None = None,
):
super().__init__(message, request=request, response=response, body=body)
self._retry_after = self._parse_retry_after(response)

@property
def retry_after(self) -> float | None:
"""Get the retry-after value in seconds, if available."""
return self._retry_after

@staticmethod
def _parse_retry_after(response: Response) -> float | None:
"""Parse the Retry-After or x-retry-after header from the response.

The Retry-After header can be either:
- A number of seconds (e.g., "120")
- An HTTP-date (e.g., "Wed, 21 Oct 2015 07:28:00 GMT")

Args:
response: The httpx Response object.

Returns:
The number of seconds to wait, or None if not present/parseable.
"""
import time
from datetime import datetime, timezone

# Check both header variants (case-insensitive in httpx)
retry_after_value = response.headers.get("retry-after") or response.headers.get(
"x-retry-after"
)

if retry_after_value is None:
return None

# Try parsing as integer (seconds)
try:
return float(retry_after_value)
except ValueError:
pass

# Try parsing as HTTP-date (RFC 7231 IMF-fixdate format)
# Example: "Wed, 21 Oct 2015 07:28:00 GMT"
try:
retry_date = datetime.strptime(retry_after_value, "%a, %d %b %Y %H:%M:%S GMT")
retry_date = retry_date.replace(tzinfo=timezone.utc)
delay = retry_date.timestamp() - time.time()
return max(0.0, delay) # Don't return negative delays
except ValueError:
pass

return None


class UiPathInternalServerError(UiPathAPIError):
"""HTTP 500 Internal Server Error."""
Expand Down
65 changes: 64 additions & 1 deletion src/uipath_llm_client/utils/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
exponential backoff and jitter. It uses tenacity for retry handling
and integrates with httpx transports.

The retry logic automatically respects the `Retry-After` or `x-retry-after`
HTTP headers when present in error responses. If the header specifies a wait
time, that value is used (capped at max_delay). Otherwise, exponential backoff
with jitter is applied.

Example:
>>> from uipath_llm_client.utils.retry import RetryableHTTPTransport, RetryConfig
>>>
Expand Down Expand Up @@ -32,12 +37,14 @@
from httpx import AsyncHTTPTransport, HTTPTransport, Request, Response
from tenacity import (
AsyncRetrying,
RetryCallState,
Retrying,
before_sleep_log,
retry_if_exception_type,
stop_after_attempt,
wait_exponential_jitter,
)
from tenacity.wait import wait_base
from typing_extensions import TypedDict

from uipath_llm_client.utils.exceptions import UiPathAPIError, UiPathRateLimitError
Expand All @@ -57,6 +64,62 @@
_DEFAULT_JITTER: float = 1.0


class wait_retry_after_with_fallback(wait_base):
"""Custom wait strategy that uses Retry-After header when available.

This wait strategy checks if the exception has a retry_after attribute
(from the Retry-After or x-retry-after HTTP headers) and uses that value.
If not available, falls back to exponential backoff with jitter.

Attributes:
fallback_wait: The fallback wait strategy (exponential backoff with jitter).
max_delay: Maximum delay in seconds (caps retry-after values).
"""

def __init__(
self,
*,
initial: float,
max: float,
exp_base: float,
jitter: float,
) -> None:
"""Initialize the wait strategy.

Args:
initial: Initial delay for exponential backoff.
max: Maximum delay in seconds (also caps retry-after values).
exp_base: Exponential backoff base multiplier.
jitter: Random jitter to add to delays.
"""
self.fallback_wait = wait_exponential_jitter(
initial=initial,
max=max,
exp_base=exp_base,
jitter=jitter,
)
self.max_delay = max

def __call__(self, retry_state: RetryCallState) -> float:
"""Calculate the wait time for the next retry.

Args:
retry_state: The current retry state from tenacity.

Returns:
The number of seconds to wait before the next retry.
"""
# Check if we have a rate limit exception with retry_after
if retry_state.outcome is not None and retry_state.outcome.failed:
exception = retry_state.outcome.exception()
if isinstance(exception, UiPathRateLimitError) and exception.retry_after is not None:
# Use retry-after value, but cap at max_delay
return min(exception.retry_after, self.max_delay)

# Fall back to exponential backoff with jitter
return self.fallback_wait(retry_state)


class RetryConfig(TypedDict):
"""Configuration for retry behavior on failed requests.

Expand Down Expand Up @@ -126,7 +189,7 @@ def _build_retryer(
retryer_class = AsyncRetrying if async_mode else Retrying
return retryer_class(
stop=stop_after_attempt(max_retries),
wait=wait_exponential_jitter(
wait=wait_retry_after_with_fallback(
initial=initial_delay,
max=max_delay,
exp_base=exp_base,
Expand Down
2 changes: 1 addition & 1 deletion src/uipath_llm_client/utils/ssl_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def create_ssl_context():
)


def get_httpx_client_kwargs() -> dict[str, Any]:
def get_httpx_ssl_client_kwargs() -> dict[str, Any]:
"""Get standardized httpx client configuration."""
client_kwargs: dict[str, Any] = {"follow_redirects": True, "timeout": 30.0}

Expand Down
40 changes: 40 additions & 0 deletions tests/core/test_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,11 +842,51 @@ def test_exception_from_response(self):
mock_response.reason_phrase = "Too Many Requests"
mock_response.json.return_value = {"error": "rate limited"}
mock_response.request = MagicMock(spec=Request)
mock_response.headers = {} # Required for UiPathRateLimitError._parse_retry_after

exc = UiPathAPIError.from_response(mock_response)
assert isinstance(exc, UiPathRateLimitError)
assert exc.status_code == 429

def test_exception_from_response_with_retry_after(self):
"""Test UiPathRateLimitError parses Retry-After header."""
mock_response = MagicMock(spec=Response)
mock_response.status_code = 429
mock_response.reason_phrase = "Too Many Requests"
mock_response.json.return_value = {"error": "rate limited"}
mock_response.request = MagicMock(spec=Request)
mock_response.headers = {"retry-after": "30"}

exc = UiPathAPIError.from_response(mock_response)
assert isinstance(exc, UiPathRateLimitError)
assert exc.retry_after == 30.0

def test_exception_from_response_with_x_retry_after(self):
"""Test UiPathRateLimitError parses x-retry-after header."""
mock_response = MagicMock(spec=Response)
mock_response.status_code = 429
mock_response.reason_phrase = "Too Many Requests"
mock_response.json.return_value = {"error": "rate limited"}
mock_response.request = MagicMock(spec=Request)
mock_response.headers = {"x-retry-after": "45"}

exc = UiPathAPIError.from_response(mock_response)
assert isinstance(exc, UiPathRateLimitError)
assert exc.retry_after == 45.0

def test_exception_retry_after_none_when_not_present(self):
"""Test UiPathRateLimitError.retry_after is None when header missing."""
mock_response = MagicMock(spec=Response)
mock_response.status_code = 429
mock_response.reason_phrase = "Too Many Requests"
mock_response.json.return_value = {"error": "rate limited"}
mock_response.request = MagicMock(spec=Request)
mock_response.headers = {}

exc = UiPathAPIError.from_response(mock_response)
assert isinstance(exc, UiPathRateLimitError)
assert exc.retry_after is None


# ============================================================================
# Test Singleton Utility
Expand Down