From 5f9783a40f5d8f8a6d8610090c834f62a96399b2 Mon Sep 17 00:00:00 2001 From: Cosmin Maria Date: Thu, 16 Apr 2026 23:37:17 +0300 Subject: [PATCH 1/2] Feat: cache discovery endpoint results and extract shared get_model_info (#62) Add TTL-based caching (5 min) for get_available_models() to avoid redundant discovery calls when creating multiple models in a session. Extract shared model lookup logic into get_model_info() utility in the core package, reused by both the langchain factory and litellm client. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 6 ++ packages/uipath_langchain_client/CHANGELOG.md | 5 ++ .../uipath_langchain_client/pyproject.toml | 2 +- .../uipath_langchain_client/__version__.py | 2 +- .../src/uipath_langchain_client/factory.py | 56 ++------------- src/uipath/llm_client/__init__.py | 3 + src/uipath/llm_client/__version__.py | 2 +- .../llm_client/clients/litellm/client.py | 22 ++---- src/uipath/llm_client/settings/base.py | 39 ++++++++-- .../settings/llmgateway/settings.py | 2 +- .../llm_client/settings/platform/settings.py | 3 +- src/uipath/llm_client/utils/discovery.py | 67 +++++++++++++++++ .../core/features/settings/test_llmgateway.py | 56 +++++++++++++++ tests/core/features/settings/test_platform.py | 45 ++++++++++++ tests/core/features/test_discovery.py | 71 +++++++++++++++++++ 15 files changed, 303 insertions(+), 78 deletions(-) create mode 100644 src/uipath/llm_client/utils/discovery.py create mode 100644 tests/core/features/test_discovery.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fc2405..e7b0b42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to `uipath_llm_client` (core package) will be documented in this file. +## [1.8.4] - 2026-04-16 + +### Added +- TTL-based caching for `get_available_models()` — discovery endpoint results are cached for 5 minutes per settings instance, avoiding redundant network calls when creating multiple models in a session +- `get_model_info()` shared utility for looking up a model by name from the discovery endpoint results, with optional vendor and BYOM connection ID filters + ## [1.8.3] - 2026-04-16 ### Added diff --git a/packages/uipath_langchain_client/CHANGELOG.md b/packages/uipath_langchain_client/CHANGELOG.md index 9a34ed2..113a255 100644 --- a/packages/uipath_langchain_client/CHANGELOG.md +++ b/packages/uipath_langchain_client/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to `uipath_langchain_client` will be documented in this file. +## [1.8.4] - 2026-04-16 + +### Changed +- Factory functions (`get_chat_model`, `get_embedding_model`) now use the shared `get_model_info()` utility from the core package instead of an inline implementation + ## [1.8.3] - 2026-04-16 ### Added diff --git a/packages/uipath_langchain_client/pyproject.toml b/packages/uipath_langchain_client/pyproject.toml index 942c8e5..cba1207 100644 --- a/packages/uipath_langchain_client/pyproject.toml +++ b/packages/uipath_langchain_client/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "langchain>=1.2.15", - "uipath-llm-client>=1.8.3", + "uipath-llm-client>=1.8.4", ] [project.optional-dependencies] diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py index 514ff65..c53ebdc 100644 --- a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py +++ b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py @@ -1,3 +1,3 @@ __title__ = "UiPath LangChain Client" __description__ = "A Python client for interacting with UiPath's LLM services via LangChain." -__version__ = "1.8.3" +__version__ = "1.8.4" diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py b/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py index f4b8c05..331dd9d 100644 --- a/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py +++ b/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py @@ -22,6 +22,7 @@ from typing import Any +from uipath.llm_client.utils.discovery import get_model_info from uipath_langchain_client.base_client import ( UiPathBaseChatModel, UiPathBaseEmbeddings, @@ -37,49 +38,6 @@ ) -def _get_model_info( - model_name: str, - *, - client_settings: UiPathBaseSettings, - byo_connection_id: str | None = None, - vendor_type: VendorType | str | None = None, -) -> dict[str, Any]: - available_models = client_settings.get_available_models() - - matching_models = [m for m in available_models if m["modelName"].lower() == model_name.lower()] - - if vendor_type is not None: - matching_models = [ - m for m in matching_models if m.get("vendor", "").lower() == str(vendor_type).lower() - ] - - if byo_connection_id: - matching_models = [ - m - for m in matching_models - if (byom_details := m.get("byomDetails")) - and byom_details.get("integrationServiceConnectionId", "").lower() - == byo_connection_id.lower() - ] - - if not byo_connection_id and len(matching_models) > 1: - matching_models = [ - m - for m in matching_models - if ( - (m.get("modelSubscriptionType", "") == "UiPathOwned") - or (m.get("byomDetails") is None) - ) - ] - - if not matching_models: - raise ValueError( - f"Model {model_name} not found. Available models are: {[m['modelName'] for m in available_models]}" - ) - - return matching_models[0] - - def get_chat_model( model_name: str, *, @@ -120,11 +78,11 @@ def get_chat_model( ValueError: If the model is not found in available models or vendor is not supported. """ client_settings = client_settings or get_default_client_settings() - model_info = _get_model_info( + model_info = get_model_info( + client_settings.get_available_models(), model_name, - client_settings=client_settings, + vendor_type=str(vendor_type) if vendor_type is not None else None, byo_connection_id=byo_connection_id, - vendor_type=vendor_type, ) model_family = model_info.get("modelFamily", None) if model_family is not None: @@ -300,11 +258,11 @@ def get_embedding_model( >>> vectors = embeddings.embed_documents(["Hello world"]) """ client_settings = client_settings or get_default_client_settings() - model_info = _get_model_info( + model_info = get_model_info( + client_settings.get_available_models(), model_name, - client_settings=client_settings, + vendor_type=str(vendor_type) if vendor_type is not None else None, byo_connection_id=byo_connection_id, - vendor_type=vendor_type, ) is_uipath_owned = model_info.get("modelSubscriptionType") == "UiPathOwned" if not is_uipath_owned: diff --git a/src/uipath/llm_client/__init__.py b/src/uipath/llm_client/__init__.py index c4e6261..fd5e42a 100644 --- a/src/uipath/llm_client/__init__.py +++ b/src/uipath/llm_client/__init__.py @@ -36,6 +36,7 @@ PlatformSettings, get_default_client_settings, ) +from uipath.llm_client.utils.discovery import get_model_info from uipath.llm_client.utils.exceptions import ( UiPathAPIError, UiPathAuthenticationError, @@ -64,6 +65,8 @@ # HTTPX clients "UiPathHttpxClient", "UiPathHttpxAsyncClient", + # Discovery + "get_model_info", # Retry "RetryConfig", # Exceptions diff --git a/src/uipath/llm_client/__version__.py b/src/uipath/llm_client/__version__.py index ea22869..212fee4 100644 --- a/src/uipath/llm_client/__version__.py +++ b/src/uipath/llm_client/__version__.py @@ -1,3 +1,3 @@ __title__ = "UiPath LLM Client" __description__ = "A Python client for interacting with UiPath's LLM services." -__version__ = "1.8.3" +__version__ = "1.8.4" diff --git a/src/uipath/llm_client/clients/litellm/client.py b/src/uipath/llm_client/clients/litellm/client.py index 93d55b5..b010dec 100644 --- a/src/uipath/llm_client/clients/litellm/client.py +++ b/src/uipath/llm_client/clients/litellm/client.py @@ -34,6 +34,7 @@ RoutingMode, VendorType, ) +from uipath.llm_client.utils.discovery import get_model_info from uipath.llm_client.utils.retry import RetryConfig # Route OpenAI chat completions through base_llm_http_handler (accepts HTTPHandler) @@ -189,22 +190,11 @@ def _discover_and_build_api_config( User-supplied ``vendor_type`` filters models during discovery. User-supplied ``api_flavor`` overrides the discovered value. """ - available_models = self._client_settings.get_available_models() - matching = [ - m for m in available_models if m["modelName"].lower() == self._model_name.lower() - ] - - if vendor_type is not None: - matching = [ - m for m in matching if m.get("vendor", "").lower() == str(vendor_type).lower() - ] - - if not matching: - raise ValueError( - f"Model '{self._model_name}' not found. " - f"Available: {[m['modelName'] for m in available_models]}" - ) - model_info = matching[0] + model_info = get_model_info( + self._client_settings.get_available_models(), + self._model_name, + vendor_type=str(vendor_type) if vendor_type is not None else None, + ) model_family: str | None = None raw_family = model_info.get("modelFamily", None) diff --git a/src/uipath/llm_client/settings/base.py b/src/uipath/llm_client/settings/base.py index c2c974c..26a56a9 100644 --- a/src/uipath/llm_client/settings/base.py +++ b/src/uipath/llm_client/settings/base.py @@ -5,12 +5,13 @@ Concrete implementations are provided in the `platform` and `llmgateway` submodules. """ +import time from abc import ABC, abstractmethod from collections.abc import Mapping -from typing import Any, Self +from typing import Any, ClassVar, Self from httpx import Auth -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, PrivateAttr, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict from uipath.llm_client.settings.constants import ApiFlavor, ApiType, RoutingMode, VendorType @@ -74,12 +75,17 @@ class UiPathBaseSettings(BaseSettings, ABC): with validation aliases allowing flexible naming conventions. """ + DISCOVERY_CACHE_TTL_SECONDS: ClassVar[int] = 300 + model_config = SettingsConfigDict( validate_by_alias=True, populate_by_name=True, extra="allow", ) + _models_cache: list[dict[str, Any]] | None = PrivateAttr(default=None) + _models_cache_timestamp: float = PrivateAttr(default=0.0) + @abstractmethod def build_base_url( self, @@ -131,14 +137,33 @@ def build_auth_pipeline( """ ... + def get_available_models(self) -> list[dict[str, Any]]: + """Get the list of available models from the backend, with TTL caching. + + Results are cached for ``DISCOVERY_CACHE_TTL_SECONDS`` (default 300s). + Subsequent calls within the TTL window return the cached list. + + Returns: + A list of dictionaries containing model information. + """ + now = time.monotonic() + if ( + self._models_cache is not None + and (now - self._models_cache_timestamp) < self.DISCOVERY_CACHE_TTL_SECONDS + ): + return self._models_cache + models = self._fetch_available_models() + self._models_cache = models + self._models_cache_timestamp = now + return models + @abstractmethod - def get_available_models( - self, - ) -> list[dict[str, Any]]: - """Get the list of available models from the backend. + def _fetch_available_models(self) -> list[dict[str, Any]]: + """Fetch the list of available models from the backend. Subclasses must implement this method to query the backend's - model discovery endpoint. + model discovery endpoint. Called by :meth:`get_available_models` + when the cache is stale or empty. Returns: A list of dictionaries containing model information. diff --git a/src/uipath/llm_client/settings/llmgateway/settings.py b/src/uipath/llm_client/settings/llmgateway/settings.py index 8c0b491..f4c6920 100644 --- a/src/uipath/llm_client/settings/llmgateway/settings.py +++ b/src/uipath/llm_client/settings/llmgateway/settings.py @@ -114,7 +114,7 @@ def build_auth_headers( return headers @override - def get_available_models(self) -> list[dict[str, Any]]: + def _fetch_available_models(self) -> list[dict[str, Any]]: discovery_url = f"{self.base_url}/{self.org_id}/{self.tenant_id}/{LLMGatewayEndpoints.DISCOVERY_ENDPOINT.value}" with Client( auth=self.build_auth_pipeline(), diff --git a/src/uipath/llm_client/settings/platform/settings.py b/src/uipath/llm_client/settings/platform/settings.py index a20abdc..0b317b1 100644 --- a/src/uipath/llm_client/settings/platform/settings.py +++ b/src/uipath/llm_client/settings/platform/settings.py @@ -163,8 +163,7 @@ def build_auth_headers( return headers @override - def get_available_models(self) -> list[dict[str, Any]]: - + def _fetch_available_models(self) -> list[dict[str, Any]]: models = UiPath().agenthub.get_available_llm_models( headers=dict(self.build_auth_headers()), ) diff --git a/src/uipath/llm_client/utils/discovery.py b/src/uipath/llm_client/utils/discovery.py new file mode 100644 index 0000000..4525aeb --- /dev/null +++ b/src/uipath/llm_client/utils/discovery.py @@ -0,0 +1,67 @@ +"""Shared model discovery helpers.""" + +from typing import Any + + +def get_model_info( + available_models: list[dict[str, Any]], + model_name: str, + *, + vendor_type: str | None = None, + byo_connection_id: str | None = None, +) -> dict[str, Any]: + """Find and return a single model entry from the discovery endpoint results. + + Applies the following filters in order: + + 1. Match by ``modelName`` (case-insensitive). + 2. If ``vendor_type`` is given, keep only models whose ``vendor`` matches. + 3. If ``byo_connection_id`` is given, keep only models whose + ``byomDetails.integrationServiceConnectionId`` matches. + 4. When no ``byo_connection_id`` is provided and multiple candidates remain, + prefer UiPath-owned (non-BYOM) models. + + Args: + available_models: Full list of model dictionaries from the discovery + endpoint (as returned by :meth:`UiPathBaseSettings.get_available_models`). + model_name: Name of the model to look up. + vendor_type: Optional vendor filter (e.g. ``"openai"``). + byo_connection_id: Optional BYOM connection ID filter. + + Returns: + The first matching model dictionary. + + Raises: + ValueError: If no model matches the given criteria. + """ + matching = [m for m in available_models if m["modelName"].lower() == model_name.lower()] + + if vendor_type is not None: + matching = [m for m in matching if m.get("vendor", "").lower() == str(vendor_type).lower()] + + if byo_connection_id: + matching = [ + m + for m in matching + if (byom_details := m.get("byomDetails")) + and byom_details.get("integrationServiceConnectionId", "").lower() + == byo_connection_id.lower() + ] + + if not byo_connection_id and len(matching) > 1: + matching = [ + m + for m in matching + if ( + (m.get("modelSubscriptionType", "") == "UiPathOwned") + or (m.get("byomDetails") is None) + ) + ] + + if not matching: + raise ValueError( + f"Model '{model_name}' not found. " + f"Available models: {[m['modelName'] for m in available_models]}" + ) + + return matching[0] diff --git a/tests/core/features/settings/test_llmgateway.py b/tests/core/features/settings/test_llmgateway.py index 0cb12ab..895b67e 100644 --- a/tests/core/features/settings/test_llmgateway.py +++ b/tests/core/features/settings/test_llmgateway.py @@ -1,6 +1,7 @@ """Tests for LLMGatewaySettings.""" import os +import time from unittest.mock import MagicMock, patch import pytest @@ -153,6 +154,61 @@ def test_get_available_models_raises_on_unauthorized(self, llmgw_env_vars): assert exc_info.value.status_code == 401 +class TestLLMGatewayDiscoveryCache: + """Tests for get_available_models TTL caching.""" + + def test_second_call_returns_cached_result(self, llmgw_env_vars): + """Second call within TTL should not hit the endpoint again.""" + with patch.dict(os.environ, llmgw_env_vars, clear=True): + settings = LLMGatewaySettings() + + mock_response = MagicMock() + mock_response.is_error = False + mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}] + + with patch.object(Client, "get", return_value=mock_response) as mock_get: + first = settings.get_available_models() + second = settings.get_available_models() + assert first == second + mock_get.assert_called_once() + + def test_cache_expires_after_ttl(self, llmgw_env_vars): + """After TTL expires, the endpoint should be called again.""" + with patch.dict(os.environ, llmgw_env_vars, clear=True): + settings = LLMGatewaySettings() + + mock_response = MagicMock() + mock_response.is_error = False + mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}] + + with patch.object(Client, "get", return_value=mock_response) as mock_get: + settings.get_available_models() + assert mock_get.call_count == 1 + + # Simulate TTL expiry by rewinding the cache timestamp + settings._models_cache_timestamp = ( + time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1 + ) + + settings.get_available_models() + assert mock_get.call_count == 2 + + def test_cache_is_per_instance(self, llmgw_env_vars): + """Each settings instance should have its own independent cache.""" + with patch.dict(os.environ, llmgw_env_vars, clear=True): + settings1 = LLMGatewaySettings() + settings2 = LLMGatewaySettings() + + mock_response = MagicMock() + mock_response.is_error = False + mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}] + + with patch.object(Client, "get", return_value=mock_response) as mock_get: + settings1.get_available_models() + settings2.get_available_models() + assert mock_get.call_count == 2 + + class TestLLMGatewayAuthRefresh: """Tests for LLMGatewayS2SAuth token refresh logic.""" diff --git a/tests/core/features/settings/test_platform.py b/tests/core/features/settings/test_platform.py index 4f7f8dd..f53a6e5 100644 --- a/tests/core/features/settings/test_platform.py +++ b/tests/core/features/settings/test_platform.py @@ -1,6 +1,7 @@ """Tests for PlatformSettings.""" import os +import time from unittest.mock import MagicMock, patch import pytest @@ -240,6 +241,50 @@ def test_validate_byo_model_is_noop(self, platform_env_vars, mock_platform_auth) assert result is None +class TestPlatformDiscoveryCache: + """Tests for get_available_models TTL caching on PlatformSettings.""" + + def test_second_call_returns_cached_result(self, platform_env_vars, mock_platform_auth): + """Second call within TTL should not query the backend again.""" + with patch.dict(os.environ, platform_env_vars, clear=True): + settings = PlatformSettings() + + mock_model = MagicMock() + mock_model.model_dump.return_value = {"modelName": "gpt-4o", "vendor": "openai"} + + with patch("uipath.llm_client.settings.platform.settings.UiPath") as mock_uipath: + mock_uipath.return_value.agenthub.get_available_llm_models.return_value = [ + mock_model + ] + first = settings.get_available_models() + second = settings.get_available_models() + assert first == second + mock_uipath.return_value.agenthub.get_available_llm_models.assert_called_once() + + def test_cache_expires_after_ttl(self, platform_env_vars, mock_platform_auth): + """After TTL expires, the backend should be queried again.""" + with patch.dict(os.environ, platform_env_vars, clear=True): + settings = PlatformSettings() + + mock_model = MagicMock() + mock_model.model_dump.return_value = {"modelName": "gpt-4o", "vendor": "openai"} + + with patch("uipath.llm_client.settings.platform.settings.UiPath") as mock_uipath: + mock_uipath.return_value.agenthub.get_available_llm_models.return_value = [ + mock_model + ] + settings.get_available_models() + assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 1 + + # Simulate TTL expiry + settings._models_cache_timestamp = ( + time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1 + ) + + settings.get_available_models() + assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 2 + + class TestPlatformAuthRefresh: """Tests for PlatformAuth token refresh logic.""" diff --git a/tests/core/features/test_discovery.py b/tests/core/features/test_discovery.py new file mode 100644 index 0000000..9605799 --- /dev/null +++ b/tests/core/features/test_discovery.py @@ -0,0 +1,71 @@ +"""Tests for the shared get_model_info utility.""" + +import pytest + +from uipath.llm_client.utils.discovery import get_model_info + +_MODELS = [ + {"modelName": "gpt-4o", "vendor": "openai", "modelSubscriptionType": "UiPathOwned"}, + { + "modelName": "gpt-4o", + "vendor": "openai", + "byomDetails": {"integrationServiceConnectionId": "conn-1"}, + }, + {"modelName": "claude-3-opus", "vendor": "anthropic", "modelSubscriptionType": "UiPathOwned"}, + {"modelName": "gemini-2.0-flash", "vendor": "vertexai", "modelSubscriptionType": "UiPathOwned"}, +] + + +class TestGetModelInfo: + """Tests for get_model_info.""" + + def test_finds_model_by_name(self): + result = get_model_info(_MODELS, "claude-3-opus") + assert result["modelName"] == "claude-3-opus" + + def test_case_insensitive_match(self): + result = get_model_info(_MODELS, "Claude-3-Opus") + assert result["modelName"] == "claude-3-opus" + + def test_filters_by_vendor_type(self): + result = get_model_info(_MODELS, "gpt-4o", vendor_type="openai") + assert result["vendor"] == "openai" + + def test_vendor_type_case_insensitive(self): + result = get_model_info(_MODELS, "gpt-4o", vendor_type="OpenAi") + assert result["vendor"] == "openai" + + def test_filters_by_byo_connection_id(self): + result = get_model_info(_MODELS, "gpt-4o", byo_connection_id="conn-1") + assert result["byomDetails"]["integrationServiceConnectionId"] == "conn-1" + + def test_byo_connection_id_case_insensitive(self): + result = get_model_info(_MODELS, "gpt-4o", byo_connection_id="CONN-1") + assert result["byomDetails"]["integrationServiceConnectionId"] == "conn-1" + + def test_prefers_uipath_owned_when_no_byo_connection_id(self): + result = get_model_info(_MODELS, "gpt-4o") + assert result.get("modelSubscriptionType") == "UiPathOwned" + assert result.get("byomDetails") is None + + def test_raises_when_model_not_found(self): + with pytest.raises(ValueError, match="not found"): + get_model_info(_MODELS, "nonexistent-model") + + def test_raises_when_vendor_filter_eliminates_all(self): + with pytest.raises(ValueError, match="not found"): + get_model_info(_MODELS, "gpt-4o", vendor_type="anthropic") + + def test_raises_when_byo_connection_id_not_found(self): + with pytest.raises(ValueError, match="not found"): + get_model_info(_MODELS, "gpt-4o", byo_connection_id="nonexistent-conn") + + def test_returns_first_match_for_single_result(self): + models = [{"modelName": "my-model", "vendor": "openai"}] + result = get_model_info(models, "my-model") + assert result["modelName"] == "my-model" + + def test_error_message_includes_available_model_names(self): + with pytest.raises(ValueError, match="gpt-4o") as exc_info: + get_model_info(_MODELS, "missing") + assert "claude-3-opus" in str(exc_info.value) From bcb23c28b905c6825e2886410810f47007fcdb00 Mon Sep 17 00:00:00 2001 From: Cosmin Maria Date: Thu, 16 Apr 2026 23:54:23 +0300 Subject: [PATCH 2/2] Simplify caching to @lru_cache instead of manual TTL Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 2 +- src/uipath/llm_client/settings/base.py | 40 +++++-------------- .../settings/llmgateway/settings.py | 4 +- .../llm_client/settings/platform/settings.py | 4 +- .../core/features/settings/test_llmgateway.py | 26 +----------- tests/core/features/settings/test_platform.py | 28 +------------ 6 files changed, 20 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7b0b42..671cfaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to `uipath_llm_client` (core package) will be documented in ## [1.8.4] - 2026-04-16 ### Added -- TTL-based caching for `get_available_models()` — discovery endpoint results are cached for 5 minutes per settings instance, avoiding redundant network calls when creating multiple models in a session +- `lru_cache` on `get_available_models()` — discovery endpoint results are cached per settings instance, avoiding redundant network calls when creating multiple models in a session - `get_model_info()` shared utility for looking up a model by name from the discovery endpoint results, with optional vendor and BYOM connection ID filters ## [1.8.3] - 2026-04-16 diff --git a/src/uipath/llm_client/settings/base.py b/src/uipath/llm_client/settings/base.py index 26a56a9..9c3e0b8 100644 --- a/src/uipath/llm_client/settings/base.py +++ b/src/uipath/llm_client/settings/base.py @@ -5,13 +5,12 @@ Concrete implementations are provided in the `platform` and `llmgateway` submodules. """ -import time from abc import ABC, abstractmethod from collections.abc import Mapping -from typing import Any, ClassVar, Self +from typing import Any, Self from httpx import Auth -from pydantic import BaseModel, PrivateAttr, model_validator +from pydantic import BaseModel, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict from uipath.llm_client.settings.constants import ApiFlavor, ApiType, RoutingMode, VendorType @@ -75,16 +74,15 @@ class UiPathBaseSettings(BaseSettings, ABC): with validation aliases allowing flexible naming conventions. """ - DISCOVERY_CACHE_TTL_SECONDS: ClassVar[int] = 300 - model_config = SettingsConfigDict( validate_by_alias=True, populate_by_name=True, extra="allow", ) - _models_cache: list[dict[str, Any]] | None = PrivateAttr(default=None) - _models_cache_timestamp: float = PrivateAttr(default=0.0) + # Pydantic models are not hashable by default; restore object identity + # hashing so that @lru_cache can be used on instance methods. + __hash__ = object.__hash__ # type: ignore[assignment] @abstractmethod def build_base_url( @@ -137,33 +135,13 @@ def build_auth_pipeline( """ ... - def get_available_models(self) -> list[dict[str, Any]]: - """Get the list of available models from the backend, with TTL caching. - - Results are cached for ``DISCOVERY_CACHE_TTL_SECONDS`` (default 300s). - Subsequent calls within the TTL window return the cached list. - - Returns: - A list of dictionaries containing model information. - """ - now = time.monotonic() - if ( - self._models_cache is not None - and (now - self._models_cache_timestamp) < self.DISCOVERY_CACHE_TTL_SECONDS - ): - return self._models_cache - models = self._fetch_available_models() - self._models_cache = models - self._models_cache_timestamp = now - return models - @abstractmethod - def _fetch_available_models(self) -> list[dict[str, Any]]: - """Fetch the list of available models from the backend. + def get_available_models(self) -> list[dict[str, Any]]: + """Get the list of available models from the backend. Subclasses must implement this method to query the backend's - model discovery endpoint. Called by :meth:`get_available_models` - when the cache is stale or empty. + model discovery endpoint. Implementations should use + ``@lru_cache`` to avoid redundant network calls. Returns: A list of dictionaries containing model information. diff --git a/src/uipath/llm_client/settings/llmgateway/settings.py b/src/uipath/llm_client/settings/llmgateway/settings.py index f4c6920..a79d826 100644 --- a/src/uipath/llm_client/settings/llmgateway/settings.py +++ b/src/uipath/llm_client/settings/llmgateway/settings.py @@ -1,5 +1,6 @@ import logging from collections.abc import Mapping +from functools import lru_cache from typing import Any, Self from httpx import Client @@ -114,7 +115,8 @@ def build_auth_headers( return headers @override - def _fetch_available_models(self) -> list[dict[str, Any]]: + @lru_cache # noqa: B019 + def get_available_models(self) -> list[dict[str, Any]]: # type: ignore[override] discovery_url = f"{self.base_url}/{self.org_id}/{self.tenant_id}/{LLMGatewayEndpoints.DISCOVERY_ENDPOINT.value}" with Client( auth=self.build_auth_pipeline(), diff --git a/src/uipath/llm_client/settings/platform/settings.py b/src/uipath/llm_client/settings/platform/settings.py index 0b317b1..2d6d99c 100644 --- a/src/uipath/llm_client/settings/platform/settings.py +++ b/src/uipath/llm_client/settings/platform/settings.py @@ -1,6 +1,7 @@ """Base settings for UiPath Platform (AgentHub/Orchestrator) client.""" from collections.abc import Mapping +from functools import lru_cache from typing import Any, Self from pydantic import Field, SecretStr, model_validator @@ -163,7 +164,8 @@ def build_auth_headers( return headers @override - def _fetch_available_models(self) -> list[dict[str, Any]]: + @lru_cache # noqa: B019 + def get_available_models(self) -> list[dict[str, Any]]: # type: ignore[override] models = UiPath().agenthub.get_available_llm_models( headers=dict(self.build_auth_headers()), ) diff --git a/tests/core/features/settings/test_llmgateway.py b/tests/core/features/settings/test_llmgateway.py index 895b67e..e508144 100644 --- a/tests/core/features/settings/test_llmgateway.py +++ b/tests/core/features/settings/test_llmgateway.py @@ -1,7 +1,6 @@ """Tests for LLMGatewaySettings.""" import os -import time from unittest.mock import MagicMock, patch import pytest @@ -155,10 +154,10 @@ def test_get_available_models_raises_on_unauthorized(self, llmgw_env_vars): class TestLLMGatewayDiscoveryCache: - """Tests for get_available_models TTL caching.""" + """Tests for get_available_models lru_cache.""" def test_second_call_returns_cached_result(self, llmgw_env_vars): - """Second call within TTL should not hit the endpoint again.""" + """Second call should return the cached result without hitting the endpoint.""" with patch.dict(os.environ, llmgw_env_vars, clear=True): settings = LLMGatewaySettings() @@ -172,27 +171,6 @@ def test_second_call_returns_cached_result(self, llmgw_env_vars): assert first == second mock_get.assert_called_once() - def test_cache_expires_after_ttl(self, llmgw_env_vars): - """After TTL expires, the endpoint should be called again.""" - with patch.dict(os.environ, llmgw_env_vars, clear=True): - settings = LLMGatewaySettings() - - mock_response = MagicMock() - mock_response.is_error = False - mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}] - - with patch.object(Client, "get", return_value=mock_response) as mock_get: - settings.get_available_models() - assert mock_get.call_count == 1 - - # Simulate TTL expiry by rewinding the cache timestamp - settings._models_cache_timestamp = ( - time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1 - ) - - settings.get_available_models() - assert mock_get.call_count == 2 - def test_cache_is_per_instance(self, llmgw_env_vars): """Each settings instance should have its own independent cache.""" with patch.dict(os.environ, llmgw_env_vars, clear=True): diff --git a/tests/core/features/settings/test_platform.py b/tests/core/features/settings/test_platform.py index f53a6e5..aa126f6 100644 --- a/tests/core/features/settings/test_platform.py +++ b/tests/core/features/settings/test_platform.py @@ -1,7 +1,6 @@ """Tests for PlatformSettings.""" import os -import time from unittest.mock import MagicMock, patch import pytest @@ -242,10 +241,10 @@ def test_validate_byo_model_is_noop(self, platform_env_vars, mock_platform_auth) class TestPlatformDiscoveryCache: - """Tests for get_available_models TTL caching on PlatformSettings.""" + """Tests for get_available_models lru_cache on PlatformSettings.""" def test_second_call_returns_cached_result(self, platform_env_vars, mock_platform_auth): - """Second call within TTL should not query the backend again.""" + """Second call should return the cached result without querying the backend.""" with patch.dict(os.environ, platform_env_vars, clear=True): settings = PlatformSettings() @@ -261,29 +260,6 @@ def test_second_call_returns_cached_result(self, platform_env_vars, mock_platfor assert first == second mock_uipath.return_value.agenthub.get_available_llm_models.assert_called_once() - def test_cache_expires_after_ttl(self, platform_env_vars, mock_platform_auth): - """After TTL expires, the backend should be queried again.""" - with patch.dict(os.environ, platform_env_vars, clear=True): - settings = PlatformSettings() - - mock_model = MagicMock() - mock_model.model_dump.return_value = {"modelName": "gpt-4o", "vendor": "openai"} - - with patch("uipath.llm_client.settings.platform.settings.UiPath") as mock_uipath: - mock_uipath.return_value.agenthub.get_available_llm_models.return_value = [ - mock_model - ] - settings.get_available_models() - assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 1 - - # Simulate TTL expiry - settings._models_cache_timestamp = ( - time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1 - ) - - settings.get_available_models() - assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 2 - class TestPlatformAuthRefresh: """Tests for PlatformAuth token refresh logic."""