From 5f9783a40f5d8f8a6d8610090c834f62a96399b2 Mon Sep 17 00:00:00 2001
From: Cosmin Maria <acosmin.maria@gmail.com>
Date: Thu, 16 Apr 2026 23:37:17 +0300
Subject: [PATCH 1/2] Feat: cache discovery endpoint results and extract shared
 get_model_info (#62)

Add TTL-based caching (5 min) for get_available_models() to avoid
redundant discovery calls when creating multiple models in a session.
Extract shared model lookup logic into get_model_info() utility in the
core package, reused by both the langchain factory and litellm client.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  6 ++
 packages/uipath_langchain_client/CHANGELOG.md |  5 ++
 .../uipath_langchain_client/pyproject.toml    |  2 +-
 .../uipath_langchain_client/__version__.py    |  2 +-
 .../src/uipath_langchain_client/factory.py    | 56 ++-------------
 src/uipath/llm_client/__init__.py             |  3 +
 src/uipath/llm_client/__version__.py          |  2 +-
 .../llm_client/clients/litellm/client.py      | 22 ++----
 src/uipath/llm_client/settings/base.py        | 39 ++++++++--
 .../settings/llmgateway/settings.py           |  2 +-
 .../llm_client/settings/platform/settings.py  |  3 +-
 src/uipath/llm_client/utils/discovery.py      | 67 +++++++++++++++++
 .../core/features/settings/test_llmgateway.py | 56 +++++++++++++++
 tests/core/features/settings/test_platform.py | 45 ++++++++++++
 tests/core/features/test_discovery.py         | 71 +++++++++++++++++++
 15 files changed, 303 insertions(+), 78 deletions(-)
 create mode 100644 src/uipath/llm_client/utils/discovery.py
 create mode 100644 tests/core/features/test_discovery.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1fc2405..e7b0b42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 All notable changes to `uipath_llm_client` (core package) will be documented in this file.
 
+## [1.8.4] - 2026-04-16
+
+### Added
+- TTL-based caching for `get_available_models()` — discovery endpoint results are cached for 5 minutes per settings instance, avoiding redundant network calls when creating multiple models in a session
+- `get_model_info()` shared utility for looking up a model by name from the discovery endpoint results, with optional vendor and BYOM connection ID filters
+
 ## [1.8.3] - 2026-04-16
 
 ### Added
diff --git a/packages/uipath_langchain_client/CHANGELOG.md b/packages/uipath_langchain_client/CHANGELOG.md
index 9a34ed2..113a255 100644
--- a/packages/uipath_langchain_client/CHANGELOG.md
+++ b/packages/uipath_langchain_client/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 All notable changes to `uipath_langchain_client` will be documented in this file.
 
+## [1.8.4] - 2026-04-16
+
+### Changed
+- Factory functions (`get_chat_model`, `get_embedding_model`) now use the shared `get_model_info()` utility from the core package instead of an inline implementation
+
 ## [1.8.3] - 2026-04-16
 
 ### Added
diff --git a/packages/uipath_langchain_client/pyproject.toml b/packages/uipath_langchain_client/pyproject.toml
index 942c8e5..cba1207 100644
--- a/packages/uipath_langchain_client/pyproject.toml
+++ b/packages/uipath_langchain_client/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "langchain>=1.2.15",
-    "uipath-llm-client>=1.8.3",
+    "uipath-llm-client>=1.8.4",
 ]
 
 [project.optional-dependencies]
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
index 514ff65..c53ebdc 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LangChain Client"
 __description__ = "A Python client for interacting with UiPath's LLM services via LangChain."
-__version__ = "1.8.3"
+__version__ = "1.8.4"
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py b/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py
index f4b8c05..331dd9d 100644
--- a/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py
+++ b/packages/uipath_langchain_client/src/uipath_langchain_client/factory.py
@@ -22,6 +22,7 @@
 
 from typing import Any
 
+from uipath.llm_client.utils.discovery import get_model_info
 from uipath_langchain_client.base_client import (
     UiPathBaseChatModel,
     UiPathBaseEmbeddings,
@@ -37,49 +38,6 @@
 )
 
 
-def _get_model_info(
-    model_name: str,
-    *,
-    client_settings: UiPathBaseSettings,
-    byo_connection_id: str | None = None,
-    vendor_type: VendorType | str | None = None,
-) -> dict[str, Any]:
-    available_models = client_settings.get_available_models()
-
-    matching_models = [m for m in available_models if m["modelName"].lower() == model_name.lower()]
-
-    if vendor_type is not None:
-        matching_models = [
-            m for m in matching_models if m.get("vendor", "").lower() == str(vendor_type).lower()
-        ]
-
-    if byo_connection_id:
-        matching_models = [
-            m
-            for m in matching_models
-            if (byom_details := m.get("byomDetails"))
-            and byom_details.get("integrationServiceConnectionId", "").lower()
-            == byo_connection_id.lower()
-        ]
-
-    if not byo_connection_id and len(matching_models) > 1:
-        matching_models = [
-            m
-            for m in matching_models
-            if (
-                (m.get("modelSubscriptionType", "") == "UiPathOwned")
-                or (m.get("byomDetails") is None)
-            )
-        ]
-
-    if not matching_models:
-        raise ValueError(
-            f"Model {model_name} not found. Available models are: {[m['modelName'] for m in available_models]}"
-        )
-
-    return matching_models[0]
-
-
 def get_chat_model(
     model_name: str,
     *,
@@ -120,11 +78,11 @@ def get_chat_model(
         ValueError: If the model is not found in available models or vendor is not supported.
     """
     client_settings = client_settings or get_default_client_settings()
-    model_info = _get_model_info(
+    model_info = get_model_info(
+        client_settings.get_available_models(),
         model_name,
-        client_settings=client_settings,
+        vendor_type=str(vendor_type) if vendor_type is not None else None,
         byo_connection_id=byo_connection_id,
-        vendor_type=vendor_type,
     )
     model_family = model_info.get("modelFamily", None)
     if model_family is not None:
@@ -300,11 +258,11 @@ def get_embedding_model(
         >>> vectors = embeddings.embed_documents(["Hello world"])
     """
     client_settings = client_settings or get_default_client_settings()
-    model_info = _get_model_info(
+    model_info = get_model_info(
+        client_settings.get_available_models(),
         model_name,
-        client_settings=client_settings,
+        vendor_type=str(vendor_type) if vendor_type is not None else None,
         byo_connection_id=byo_connection_id,
-        vendor_type=vendor_type,
     )
     is_uipath_owned = model_info.get("modelSubscriptionType") == "UiPathOwned"
     if not is_uipath_owned:
diff --git a/src/uipath/llm_client/__init__.py b/src/uipath/llm_client/__init__.py
index c4e6261..fd5e42a 100644
--- a/src/uipath/llm_client/__init__.py
+++ b/src/uipath/llm_client/__init__.py
@@ -36,6 +36,7 @@
     PlatformSettings,
     get_default_client_settings,
 )
+from uipath.llm_client.utils.discovery import get_model_info
 from uipath.llm_client.utils.exceptions import (
     UiPathAPIError,
     UiPathAuthenticationError,
@@ -64,6 +65,8 @@
     # HTTPX clients
     "UiPathHttpxClient",
     "UiPathHttpxAsyncClient",
+    # Discovery
+    "get_model_info",
     # Retry
     "RetryConfig",
     # Exceptions
diff --git a/src/uipath/llm_client/__version__.py b/src/uipath/llm_client/__version__.py
index ea22869..212fee4 100644
--- a/src/uipath/llm_client/__version__.py
+++ b/src/uipath/llm_client/__version__.py
@@ -1,3 +1,3 @@
 __title__ = "UiPath LLM Client"
 __description__ = "A Python client for interacting with UiPath's LLM services."
-__version__ = "1.8.3"
+__version__ = "1.8.4"
diff --git a/src/uipath/llm_client/clients/litellm/client.py b/src/uipath/llm_client/clients/litellm/client.py
index 93d55b5..b010dec 100644
--- a/src/uipath/llm_client/clients/litellm/client.py
+++ b/src/uipath/llm_client/clients/litellm/client.py
@@ -34,6 +34,7 @@
     RoutingMode,
     VendorType,
 )
+from uipath.llm_client.utils.discovery import get_model_info
 from uipath.llm_client.utils.retry import RetryConfig
 
 # Route OpenAI chat completions through base_llm_http_handler (accepts HTTPHandler)
@@ -189,22 +190,11 @@ def _discover_and_build_api_config(
         User-supplied ``vendor_type`` filters models during discovery.
         User-supplied ``api_flavor`` overrides the discovered value.
         """
-        available_models = self._client_settings.get_available_models()
-        matching = [
-            m for m in available_models if m["modelName"].lower() == self._model_name.lower()
-        ]
-
-        if vendor_type is not None:
-            matching = [
-                m for m in matching if m.get("vendor", "").lower() == str(vendor_type).lower()
-            ]
-
-        if not matching:
-            raise ValueError(
-                f"Model '{self._model_name}' not found. "
-                f"Available: {[m['modelName'] for m in available_models]}"
-            )
-        model_info = matching[0]
+        model_info = get_model_info(
+            self._client_settings.get_available_models(),
+            self._model_name,
+            vendor_type=str(vendor_type) if vendor_type is not None else None,
+        )
 
         model_family: str | None = None
         raw_family = model_info.get("modelFamily", None)
diff --git a/src/uipath/llm_client/settings/base.py b/src/uipath/llm_client/settings/base.py
index c2c974c..26a56a9 100644
--- a/src/uipath/llm_client/settings/base.py
+++ b/src/uipath/llm_client/settings/base.py
@@ -5,12 +5,13 @@
 Concrete implementations are provided in the `platform` and `llmgateway` submodules.
 """
 
+import time
 from abc import ABC, abstractmethod
 from collections.abc import Mapping
-from typing import Any, Self
+from typing import Any, ClassVar, Self
 
 from httpx import Auth
-from pydantic import BaseModel, model_validator
+from pydantic import BaseModel, PrivateAttr, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 from uipath.llm_client.settings.constants import ApiFlavor, ApiType, RoutingMode, VendorType
@@ -74,12 +75,17 @@ class UiPathBaseSettings(BaseSettings, ABC):
     with validation aliases allowing flexible naming conventions.
     """
 
+    DISCOVERY_CACHE_TTL_SECONDS: ClassVar[int] = 300
+
     model_config = SettingsConfigDict(
         validate_by_alias=True,
         populate_by_name=True,
         extra="allow",
     )
 
+    _models_cache: list[dict[str, Any]] | None = PrivateAttr(default=None)
+    _models_cache_timestamp: float = PrivateAttr(default=0.0)
+
     @abstractmethod
     def build_base_url(
         self,
@@ -131,14 +137,33 @@ def build_auth_pipeline(
         """
         ...
 
+    def get_available_models(self) -> list[dict[str, Any]]:
+        """Get the list of available models from the backend, with TTL caching.
+
+        Results are cached for ``DISCOVERY_CACHE_TTL_SECONDS`` (default 300s).
+        Subsequent calls within the TTL window return the cached list.
+
+        Returns:
+            A list of dictionaries containing model information.
+        """
+        now = time.monotonic()
+        if (
+            self._models_cache is not None
+            and (now - self._models_cache_timestamp) < self.DISCOVERY_CACHE_TTL_SECONDS
+        ):
+            return self._models_cache
+        models = self._fetch_available_models()
+        self._models_cache = models
+        self._models_cache_timestamp = now
+        return models
+
     @abstractmethod
-    def get_available_models(
-        self,
-    ) -> list[dict[str, Any]]:
-        """Get the list of available models from the backend.
+    def _fetch_available_models(self) -> list[dict[str, Any]]:
+        """Fetch the list of available models from the backend.
 
         Subclasses must implement this method to query the backend's
-        model discovery endpoint.
+        model discovery endpoint. Called by :meth:`get_available_models`
+        when the cache is stale or empty.
 
         Returns:
             A list of dictionaries containing model information.
diff --git a/src/uipath/llm_client/settings/llmgateway/settings.py b/src/uipath/llm_client/settings/llmgateway/settings.py
index 8c0b491..f4c6920 100644
--- a/src/uipath/llm_client/settings/llmgateway/settings.py
+++ b/src/uipath/llm_client/settings/llmgateway/settings.py
@@ -114,7 +114,7 @@ def build_auth_headers(
         return headers
 
     @override
-    def get_available_models(self) -> list[dict[str, Any]]:
+    def _fetch_available_models(self) -> list[dict[str, Any]]:
         discovery_url = f"{self.base_url}/{self.org_id}/{self.tenant_id}/{LLMGatewayEndpoints.DISCOVERY_ENDPOINT.value}"
         with Client(
             auth=self.build_auth_pipeline(),
diff --git a/src/uipath/llm_client/settings/platform/settings.py b/src/uipath/llm_client/settings/platform/settings.py
index a20abdc..0b317b1 100644
--- a/src/uipath/llm_client/settings/platform/settings.py
+++ b/src/uipath/llm_client/settings/platform/settings.py
@@ -163,8 +163,7 @@ def build_auth_headers(
         return headers
 
     @override
-    def get_available_models(self) -> list[dict[str, Any]]:
-
+    def _fetch_available_models(self) -> list[dict[str, Any]]:
         models = UiPath().agenthub.get_available_llm_models(
             headers=dict(self.build_auth_headers()),
         )
diff --git a/src/uipath/llm_client/utils/discovery.py b/src/uipath/llm_client/utils/discovery.py
new file mode 100644
index 0000000..4525aeb
--- /dev/null
+++ b/src/uipath/llm_client/utils/discovery.py
@@ -0,0 +1,67 @@
+"""Shared model discovery helpers."""
+
+from typing import Any
+
+
+def get_model_info(
+    available_models: list[dict[str, Any]],
+    model_name: str,
+    *,
+    vendor_type: str | None = None,
+    byo_connection_id: str | None = None,
+) -> dict[str, Any]:
+    """Find and return a single model entry from the discovery endpoint results.
+
+    Applies the following filters in order:
+
+    1. Match by ``modelName`` (case-insensitive).
+    2. If ``vendor_type`` is given, keep only models whose ``vendor`` matches.
+    3. If ``byo_connection_id`` is given, keep only models whose
+       ``byomDetails.integrationServiceConnectionId`` matches.
+    4. When no ``byo_connection_id`` is provided and multiple candidates remain,
+       prefer UiPath-owned (non-BYOM) models.
+
+    Args:
+        available_models: Full list of model dictionaries from the discovery
+            endpoint (as returned by :meth:`UiPathBaseSettings.get_available_models`).
+        model_name: Name of the model to look up.
+        vendor_type: Optional vendor filter (e.g. ``"openai"``).
+        byo_connection_id: Optional BYOM connection ID filter.
+
+    Returns:
+        The first matching model dictionary.
+
+    Raises:
+        ValueError: If no model matches the given criteria.
+    """
+    matching = [m for m in available_models if m["modelName"].lower() == model_name.lower()]
+
+    if vendor_type is not None:
+        matching = [m for m in matching if m.get("vendor", "").lower() == str(vendor_type).lower()]
+
+    if byo_connection_id:
+        matching = [
+            m
+            for m in matching
+            if (byom_details := m.get("byomDetails"))
+            and byom_details.get("integrationServiceConnectionId", "").lower()
+            == byo_connection_id.lower()
+        ]
+
+    if not byo_connection_id and len(matching) > 1:
+        matching = [
+            m
+            for m in matching
+            if (
+                (m.get("modelSubscriptionType", "") == "UiPathOwned")
+                or (m.get("byomDetails") is None)
+            )
+        ]
+
+    if not matching:
+        raise ValueError(
+            f"Model '{model_name}' not found. "
+            f"Available models: {[m['modelName'] for m in available_models]}"
+        )
+
+    return matching[0]
diff --git a/tests/core/features/settings/test_llmgateway.py b/tests/core/features/settings/test_llmgateway.py
index 0cb12ab..895b67e 100644
--- a/tests/core/features/settings/test_llmgateway.py
+++ b/tests/core/features/settings/test_llmgateway.py
@@ -1,6 +1,7 @@
 """Tests for LLMGatewaySettings."""
 
 import os
+import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -153,6 +154,61 @@ def test_get_available_models_raises_on_unauthorized(self, llmgw_env_vars):
                 assert exc_info.value.status_code == 401
 
 
+class TestLLMGatewayDiscoveryCache:
+    """Tests for get_available_models TTL caching."""
+
+    def test_second_call_returns_cached_result(self, llmgw_env_vars):
+        """Second call within TTL should not hit the endpoint again."""
+        with patch.dict(os.environ, llmgw_env_vars, clear=True):
+            settings = LLMGatewaySettings()
+
+            mock_response = MagicMock()
+            mock_response.is_error = False
+            mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}]
+
+            with patch.object(Client, "get", return_value=mock_response) as mock_get:
+                first = settings.get_available_models()
+                second = settings.get_available_models()
+                assert first == second
+                mock_get.assert_called_once()
+
+    def test_cache_expires_after_ttl(self, llmgw_env_vars):
+        """After TTL expires, the endpoint should be called again."""
+        with patch.dict(os.environ, llmgw_env_vars, clear=True):
+            settings = LLMGatewaySettings()
+
+            mock_response = MagicMock()
+            mock_response.is_error = False
+            mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}]
+
+            with patch.object(Client, "get", return_value=mock_response) as mock_get:
+                settings.get_available_models()
+                assert mock_get.call_count == 1
+
+                # Simulate TTL expiry by rewinding the cache timestamp
+                settings._models_cache_timestamp = (
+                    time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1
+                )
+
+                settings.get_available_models()
+                assert mock_get.call_count == 2
+
+    def test_cache_is_per_instance(self, llmgw_env_vars):
+        """Each settings instance should have its own independent cache."""
+        with patch.dict(os.environ, llmgw_env_vars, clear=True):
+            settings1 = LLMGatewaySettings()
+            settings2 = LLMGatewaySettings()
+
+            mock_response = MagicMock()
+            mock_response.is_error = False
+            mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}]
+
+            with patch.object(Client, "get", return_value=mock_response) as mock_get:
+                settings1.get_available_models()
+                settings2.get_available_models()
+                assert mock_get.call_count == 2
+
+
 class TestLLMGatewayAuthRefresh:
     """Tests for LLMGatewayS2SAuth token refresh logic."""
 
diff --git a/tests/core/features/settings/test_platform.py b/tests/core/features/settings/test_platform.py
index 4f7f8dd..f53a6e5 100644
--- a/tests/core/features/settings/test_platform.py
+++ b/tests/core/features/settings/test_platform.py
@@ -1,6 +1,7 @@
 """Tests for PlatformSettings."""
 
 import os
+import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -240,6 +241,50 @@ def test_validate_byo_model_is_noop(self, platform_env_vars, mock_platform_auth)
             assert result is None
 
 
+class TestPlatformDiscoveryCache:
+    """Tests for get_available_models TTL caching on PlatformSettings."""
+
+    def test_second_call_returns_cached_result(self, platform_env_vars, mock_platform_auth):
+        """Second call within TTL should not query the backend again."""
+        with patch.dict(os.environ, platform_env_vars, clear=True):
+            settings = PlatformSettings()
+
+            mock_model = MagicMock()
+            mock_model.model_dump.return_value = {"modelName": "gpt-4o", "vendor": "openai"}
+
+            with patch("uipath.llm_client.settings.platform.settings.UiPath") as mock_uipath:
+                mock_uipath.return_value.agenthub.get_available_llm_models.return_value = [
+                    mock_model
+                ]
+                first = settings.get_available_models()
+                second = settings.get_available_models()
+                assert first == second
+                mock_uipath.return_value.agenthub.get_available_llm_models.assert_called_once()
+
+    def test_cache_expires_after_ttl(self, platform_env_vars, mock_platform_auth):
+        """After TTL expires, the backend should be queried again."""
+        with patch.dict(os.environ, platform_env_vars, clear=True):
+            settings = PlatformSettings()
+
+            mock_model = MagicMock()
+            mock_model.model_dump.return_value = {"modelName": "gpt-4o", "vendor": "openai"}
+
+            with patch("uipath.llm_client.settings.platform.settings.UiPath") as mock_uipath:
+                mock_uipath.return_value.agenthub.get_available_llm_models.return_value = [
+                    mock_model
+                ]
+                settings.get_available_models()
+                assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 1
+
+                # Simulate TTL expiry
+                settings._models_cache_timestamp = (
+                    time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1
+                )
+
+                settings.get_available_models()
+                assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 2
+
+
 class TestPlatformAuthRefresh:
     """Tests for PlatformAuth token refresh logic."""
 
diff --git a/tests/core/features/test_discovery.py b/tests/core/features/test_discovery.py
new file mode 100644
index 0000000..9605799
--- /dev/null
+++ b/tests/core/features/test_discovery.py
@@ -0,0 +1,71 @@
+"""Tests for the shared get_model_info utility."""
+
+import pytest
+
+from uipath.llm_client.utils.discovery import get_model_info
+
+_MODELS = [
+    {"modelName": "gpt-4o", "vendor": "openai", "modelSubscriptionType": "UiPathOwned"},
+    {
+        "modelName": "gpt-4o",
+        "vendor": "openai",
+        "byomDetails": {"integrationServiceConnectionId": "conn-1"},
+    },
+    {"modelName": "claude-3-opus", "vendor": "anthropic", "modelSubscriptionType": "UiPathOwned"},
+    {"modelName": "gemini-2.0-flash", "vendor": "vertexai", "modelSubscriptionType": "UiPathOwned"},
+]
+
+
+class TestGetModelInfo:
+    """Tests for get_model_info."""
+
+    def test_finds_model_by_name(self):
+        result = get_model_info(_MODELS, "claude-3-opus")
+        assert result["modelName"] == "claude-3-opus"
+
+    def test_case_insensitive_match(self):
+        result = get_model_info(_MODELS, "Claude-3-Opus")
+        assert result["modelName"] == "claude-3-opus"
+
+    def test_filters_by_vendor_type(self):
+        result = get_model_info(_MODELS, "gpt-4o", vendor_type="openai")
+        assert result["vendor"] == "openai"
+
+    def test_vendor_type_case_insensitive(self):
+        result = get_model_info(_MODELS, "gpt-4o", vendor_type="OpenAi")
+        assert result["vendor"] == "openai"
+
+    def test_filters_by_byo_connection_id(self):
+        result = get_model_info(_MODELS, "gpt-4o", byo_connection_id="conn-1")
+        assert result["byomDetails"]["integrationServiceConnectionId"] == "conn-1"
+
+    def test_byo_connection_id_case_insensitive(self):
+        result = get_model_info(_MODELS, "gpt-4o", byo_connection_id="CONN-1")
+        assert result["byomDetails"]["integrationServiceConnectionId"] == "conn-1"
+
+    def test_prefers_uipath_owned_when_no_byo_connection_id(self):
+        result = get_model_info(_MODELS, "gpt-4o")
+        assert result.get("modelSubscriptionType") == "UiPathOwned"
+        assert result.get("byomDetails") is None
+
+    def test_raises_when_model_not_found(self):
+        with pytest.raises(ValueError, match="not found"):
+            get_model_info(_MODELS, "nonexistent-model")
+
+    def test_raises_when_vendor_filter_eliminates_all(self):
+        with pytest.raises(ValueError, match="not found"):
+            get_model_info(_MODELS, "gpt-4o", vendor_type="anthropic")
+
+    def test_raises_when_byo_connection_id_not_found(self):
+        with pytest.raises(ValueError, match="not found"):
+            get_model_info(_MODELS, "gpt-4o", byo_connection_id="nonexistent-conn")
+
+    def test_returns_first_match_for_single_result(self):
+        models = [{"modelName": "my-model", "vendor": "openai"}]
+        result = get_model_info(models, "my-model")
+        assert result["modelName"] == "my-model"
+
+    def test_error_message_includes_available_model_names(self):
+        with pytest.raises(ValueError, match="gpt-4o") as exc_info:
+            get_model_info(_MODELS, "missing")
+        assert "claude-3-opus" in str(exc_info.value)

From bcb23c28b905c6825e2886410810f47007fcdb00 Mon Sep 17 00:00:00 2001
From: Cosmin Maria <acosmin.maria@gmail.com>
Date: Thu, 16 Apr 2026 23:54:23 +0300
Subject: [PATCH 2/2] Simplify caching to @lru_cache instead of manual TTL

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  2 +-
 src/uipath/llm_client/settings/base.py        | 40 +++++--------------
 .../settings/llmgateway/settings.py           |  4 +-
 .../llm_client/settings/platform/settings.py  |  4 +-
 .../core/features/settings/test_llmgateway.py | 26 +-----------
 tests/core/features/settings/test_platform.py | 28 +------------
 6 files changed, 20 insertions(+), 84 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7b0b42..671cfaa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to `uipath_llm_client` (core package) will be documented in
 ## [1.8.4] - 2026-04-16
 
 ### Added
-- TTL-based caching for `get_available_models()` — discovery endpoint results are cached for 5 minutes per settings instance, avoiding redundant network calls when creating multiple models in a session
+- `lru_cache` on `get_available_models()` — discovery endpoint results are cached per settings instance, avoiding redundant network calls when creating multiple models in a session
 - `get_model_info()` shared utility for looking up a model by name from the discovery endpoint results, with optional vendor and BYOM connection ID filters
 
 ## [1.8.3] - 2026-04-16
diff --git a/src/uipath/llm_client/settings/base.py b/src/uipath/llm_client/settings/base.py
index 26a56a9..9c3e0b8 100644
--- a/src/uipath/llm_client/settings/base.py
+++ b/src/uipath/llm_client/settings/base.py
@@ -5,13 +5,12 @@
 Concrete implementations are provided in the `platform` and `llmgateway` submodules.
 """
 
-import time
 from abc import ABC, abstractmethod
 from collections.abc import Mapping
-from typing import Any, ClassVar, Self
+from typing import Any, Self
 
 from httpx import Auth
-from pydantic import BaseModel, PrivateAttr, model_validator
+from pydantic import BaseModel, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 from uipath.llm_client.settings.constants import ApiFlavor, ApiType, RoutingMode, VendorType
@@ -75,16 +74,15 @@ class UiPathBaseSettings(BaseSettings, ABC):
     with validation aliases allowing flexible naming conventions.
     """
 
-    DISCOVERY_CACHE_TTL_SECONDS: ClassVar[int] = 300
-
     model_config = SettingsConfigDict(
         validate_by_alias=True,
         populate_by_name=True,
         extra="allow",
     )
 
-    _models_cache: list[dict[str, Any]] | None = PrivateAttr(default=None)
-    _models_cache_timestamp: float = PrivateAttr(default=0.0)
+    # Pydantic models are not hashable by default; restore object identity
+    # hashing so that @lru_cache can be used on instance methods.
+    __hash__ = object.__hash__  # type: ignore[assignment]
 
     @abstractmethod
     def build_base_url(
@@ -137,33 +135,13 @@ def build_auth_pipeline(
         """
         ...
 
-    def get_available_models(self) -> list[dict[str, Any]]:
-        """Get the list of available models from the backend, with TTL caching.
-
-        Results are cached for ``DISCOVERY_CACHE_TTL_SECONDS`` (default 300s).
-        Subsequent calls within the TTL window return the cached list.
-
-        Returns:
-            A list of dictionaries containing model information.
-        """
-        now = time.monotonic()
-        if (
-            self._models_cache is not None
-            and (now - self._models_cache_timestamp) < self.DISCOVERY_CACHE_TTL_SECONDS
-        ):
-            return self._models_cache
-        models = self._fetch_available_models()
-        self._models_cache = models
-        self._models_cache_timestamp = now
-        return models
-
     @abstractmethod
-    def _fetch_available_models(self) -> list[dict[str, Any]]:
-        """Fetch the list of available models from the backend.
+    def get_available_models(self) -> list[dict[str, Any]]:
+        """Get the list of available models from the backend.
 
         Subclasses must implement this method to query the backend's
-        model discovery endpoint. Called by :meth:`get_available_models`
-        when the cache is stale or empty.
+        model discovery endpoint. Implementations should use
+        ``@lru_cache`` to avoid redundant network calls.
 
         Returns:
             A list of dictionaries containing model information.
diff --git a/src/uipath/llm_client/settings/llmgateway/settings.py b/src/uipath/llm_client/settings/llmgateway/settings.py
index f4c6920..a79d826 100644
--- a/src/uipath/llm_client/settings/llmgateway/settings.py
+++ b/src/uipath/llm_client/settings/llmgateway/settings.py
@@ -1,5 +1,6 @@
 import logging
 from collections.abc import Mapping
+from functools import lru_cache
 from typing import Any, Self
 
 from httpx import Client
@@ -114,7 +115,8 @@ def build_auth_headers(
         return headers
 
     @override
-    def _fetch_available_models(self) -> list[dict[str, Any]]:
+    @lru_cache  # noqa: B019
+    def get_available_models(self) -> list[dict[str, Any]]:  # type: ignore[override]
         discovery_url = f"{self.base_url}/{self.org_id}/{self.tenant_id}/{LLMGatewayEndpoints.DISCOVERY_ENDPOINT.value}"
         with Client(
             auth=self.build_auth_pipeline(),
diff --git a/src/uipath/llm_client/settings/platform/settings.py b/src/uipath/llm_client/settings/platform/settings.py
index 0b317b1..2d6d99c 100644
--- a/src/uipath/llm_client/settings/platform/settings.py
+++ b/src/uipath/llm_client/settings/platform/settings.py
@@ -1,6 +1,7 @@
 """Base settings for UiPath Platform (AgentHub/Orchestrator) client."""
 
 from collections.abc import Mapping
+from functools import lru_cache
 from typing import Any, Self
 
 from pydantic import Field, SecretStr, model_validator
@@ -163,7 +164,8 @@ def build_auth_headers(
         return headers
 
     @override
-    def _fetch_available_models(self) -> list[dict[str, Any]]:
+    @lru_cache  # noqa: B019
+    def get_available_models(self) -> list[dict[str, Any]]:  # type: ignore[override]
         models = UiPath().agenthub.get_available_llm_models(
             headers=dict(self.build_auth_headers()),
         )
diff --git a/tests/core/features/settings/test_llmgateway.py b/tests/core/features/settings/test_llmgateway.py
index 895b67e..e508144 100644
--- a/tests/core/features/settings/test_llmgateway.py
+++ b/tests/core/features/settings/test_llmgateway.py
@@ -1,7 +1,6 @@
 """Tests for LLMGatewaySettings."""
 
 import os
-import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -155,10 +154,10 @@ def test_get_available_models_raises_on_unauthorized(self, llmgw_env_vars):
 
 
 class TestLLMGatewayDiscoveryCache:
-    """Tests for get_available_models TTL caching."""
+    """Tests for get_available_models lru_cache."""
 
     def test_second_call_returns_cached_result(self, llmgw_env_vars):
-        """Second call within TTL should not hit the endpoint again."""
+        """Second call should return the cached result without hitting the endpoint."""
         with patch.dict(os.environ, llmgw_env_vars, clear=True):
             settings = LLMGatewaySettings()
 
@@ -172,27 +171,6 @@ def test_second_call_returns_cached_result(self, llmgw_env_vars):
                 assert first == second
                 mock_get.assert_called_once()
 
-    def test_cache_expires_after_ttl(self, llmgw_env_vars):
-        """After TTL expires, the endpoint should be called again."""
-        with patch.dict(os.environ, llmgw_env_vars, clear=True):
-            settings = LLMGatewaySettings()
-
-            mock_response = MagicMock()
-            mock_response.is_error = False
-            mock_response.json.return_value = [{"modelName": "gpt-4o", "vendor": "openai"}]
-
-            with patch.object(Client, "get", return_value=mock_response) as mock_get:
-                settings.get_available_models()
-                assert mock_get.call_count == 1
-
-                # Simulate TTL expiry by rewinding the cache timestamp
-                settings._models_cache_timestamp = (
-                    time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1
-                )
-
-                settings.get_available_models()
-                assert mock_get.call_count == 2
-
     def test_cache_is_per_instance(self, llmgw_env_vars):
         """Each settings instance should have its own independent cache."""
         with patch.dict(os.environ, llmgw_env_vars, clear=True):
diff --git a/tests/core/features/settings/test_platform.py b/tests/core/features/settings/test_platform.py
index f53a6e5..aa126f6 100644
--- a/tests/core/features/settings/test_platform.py
+++ b/tests/core/features/settings/test_platform.py
@@ -1,7 +1,6 @@
 """Tests for PlatformSettings."""
 
 import os
-import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -242,10 +241,10 @@ def test_validate_byo_model_is_noop(self, platform_env_vars, mock_platform_auth)
 
 
 class TestPlatformDiscoveryCache:
-    """Tests for get_available_models TTL caching on PlatformSettings."""
+    """Tests for get_available_models lru_cache on PlatformSettings."""
 
     def test_second_call_returns_cached_result(self, platform_env_vars, mock_platform_auth):
-        """Second call within TTL should not query the backend again."""
+        """Second call should return the cached result without querying the backend."""
         with patch.dict(os.environ, platform_env_vars, clear=True):
             settings = PlatformSettings()
 
@@ -261,29 +260,6 @@ def test_second_call_returns_cached_result(self, platform_env_vars, mock_platfor
                 assert first == second
                 mock_uipath.return_value.agenthub.get_available_llm_models.assert_called_once()
 
-    def test_cache_expires_after_ttl(self, platform_env_vars, mock_platform_auth):
-        """After TTL expires, the backend should be queried again."""
-        with patch.dict(os.environ, platform_env_vars, clear=True):
-            settings = PlatformSettings()
-
-            mock_model = MagicMock()
-            mock_model.model_dump.return_value = {"modelName": "gpt-4o", "vendor": "openai"}
-
-            with patch("uipath.llm_client.settings.platform.settings.UiPath") as mock_uipath:
-                mock_uipath.return_value.agenthub.get_available_llm_models.return_value = [
-                    mock_model
-                ]
-                settings.get_available_models()
-                assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 1
-
-                # Simulate TTL expiry
-                settings._models_cache_timestamp = (
-                    time.monotonic() - settings.DISCOVERY_CACHE_TTL_SECONDS - 1
-                )
-
-                settings.get_available_models()
-                assert mock_uipath.return_value.agenthub.get_available_llm_models.call_count == 2
-
 
 class TestPlatformAuthRefresh:
     """Tests for PlatformAuth token refresh logic."""