Force LiteLLM OpenAI default to Responses; drop discovery tie-break

cosminacho · claude · cosminacho · commit 6c98bb59088d · 2026-04-20T13:43:07.000+03:00
Revises the PR in two directions:

- `UiPathLiteLLM` now matches the langchain factory: when no `api_flavor`
  is discovered or supplied for an OpenAI model, the client defaults to
  `ApiFlavor.RESPONSES`. Existing `openai_*_client` test fixtures
  that rely on previously-recorded chat-completions cassettes now pin
  `api_flavor=CHAT_COMPLETIONS` explicitly; the
  `openai_responses_client` fixture and `OPENAI_RESPONSES_CONFIGS`
  continue to exercise Responses against their own cassettes.
- To keep OpenAI embeddings working under the new default, the LiteLLM
  client's `embedding()` / `aembedding()` now pass the raw
  `self._model_name` (no `responses/` / `invoke/` / `converse/` route
  prefix) — those prefixes are completion-only.
- Drops the `get_model_info()` responses tie-break that was part of
  the first commit. The LiteLLM default + langchain factory default
  together cover the user-visible behavior; the tie-break was
  redundant and narrowed the data model for a case already handled
  by the defaults.

Related test updates:
- `test_openai_defaults_to_responses` replaces
  `test_openai_defaults_to_chat_completions`; the litellm-model-name
  assertion now expects the `responses/` prefix on the default path.
- Removes the three `get_model_info` tie-break tests added earlier.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,8 @@ All notable changes to `uipath_llm_client` (core package) will be documented in
 ## [1.9.3] - 2026-04-20
 
 ### Changed
-- `UiPathBaseSettings.get_model_info()` now prefers the Responses API when discovery returns multiple OpenAI entries for the same model (both `chat-completions` and `responses` flavors present). The LiteLLM client keeps its `chat-completions` fallback for the single-entry / `apiFlavor=null` case because the same client serves embedding requests.
+- `UiPathLiteLLM` now defaults to the OpenAI Responses API (`ApiFlavor.RESPONSES`) when discovery does not specify a flavor. Explicit `api_flavor=` and BYOM-discovered flavors still take precedence.
+- `UiPathLiteLLM.embedding()` / `aembedding()` use the raw model name instead of `_litellm_model`. The `responses/` / `invoke/` / `converse/` route prefixes are completion-only, so this keeps OpenAI embeddings working when the client defaults to Responses on the completions side.
 
 ## [1.9.2] - 2026-04-17
 
diff --git a/packages/uipath_langchain_client/CHANGELOG.md b/packages/uipath_langchain_client/CHANGELOG.md
@@ -6,7 +6,7 @@ All notable changes to `uipath_langchain_client` will be documented in this file
 
 ### Changed
 - `get_chat_model()` now defaults to the OpenAI Responses API (`ApiFlavor.RESPONSES`) when discovery does not specify a flavor for an OpenAI chat model. Explicit `api_flavor=` on the call and BYOM-discovered flavors still take precedence.
-- Minimum `uipath-llm-client` bumped to 1.9.3 for the `get_model_info()` Responses-preference tie-break.
+- Minimum `uipath-llm-client` bumped to 1.9.3 to align with the matching LiteLLM default.
 
 ## [1.9.2] - 2026-04-17
 
diff --git a/src/uipath/llm_client/clients/litellm/client.py b/src/uipath/llm_client/clients/litellm/client.py
@@ -213,13 +213,9 @@ def _discover_and_build_api_config(
         else:
             resolved_flavor = discovered_flavor
 
-        # OpenAI defaults to chat-completions when no flavor is discovered.
-        # RESPONSES is not a safe default here: this client serves both
-        # completions and embeddings, and the ``responses/`` model prefix in
-        # ``_resolve_litellm_model`` would break embedding calls on OpenAI
-        # embedding models that discover with ``apiFlavor=null``.
+        # OpenAI defaults to responses when no flavor is discovered
         if resolved_flavor is None and resolved_vendor in ("openai", "azure"):
-            resolved_flavor = ApiFlavor.CHAT_COMPLETIONS
+            resolved_flavor = ApiFlavor.RESPONSES
 
         # Claude detection: modelFamily from discovery, or name heuristic for BYOM
         # (BYOM discovery does not expose modelFamily).
@@ -270,7 +266,12 @@ def _resolve_llm_provider(self) -> str:
         return _VENDOR_TO_LITELLM.get(vendor, vendor)
 
     def _resolve_litellm_model(self) -> str:
-        """Build the model name litellm expects, with route prefixes where needed."""
+        """Build the completions model name litellm expects, with route prefixes where needed.
+
+        Only applied on the completions path — ``embedding()`` uses the raw model
+        name because the ``responses/`` / ``invoke/`` / ``converse/`` prefixes are
+        completion-only route hints.
+        """
         model = self._model_name
         flavor = str(self._api_config.api_flavor) if self._api_config.api_flavor else None
 
@@ -534,7 +535,7 @@ def embedding(
         **kwargs: Any,
     ) -> EmbeddingResponse:
         return litellm.embedding(  # type: ignore[return-value]
-            model=self._litellm_model,
+            model=self._model_name,
             input=input,
             custom_llm_provider=self._embedding_llm_provider,
             api_key="PLACEHOLDER",
@@ -560,7 +561,7 @@ async def aembedding(
         **kwargs: Any,
     ) -> EmbeddingResponse:
         return await litellm.aembedding(
-            model=self._litellm_model,
+            model=self._model_name,
             input=input,
             custom_llm_provider=self._embedding_llm_provider,
             api_key="PLACEHOLDER",
diff --git a/src/uipath/llm_client/settings/base.py b/src/uipath/llm_client/settings/base.py
@@ -13,13 +13,7 @@
 from pydantic import BaseModel, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
-from uipath.llm_client.settings.constants import (
-    ApiFlavor,
-    ApiType,
-    ByomApiFlavor,
-    RoutingMode,
-    VendorType,
-)
+from uipath.llm_client.settings.constants import ApiFlavor, ApiType, RoutingMode, VendorType
 
 
 class UiPathAPIConfig(BaseModel):
@@ -244,19 +238,6 @@ def get_model_info(
                 )
             ]
 
-        # When multiple OpenAI entries remain (both chat-completions and responses
-        # flavors discovered), prefer the Responses API.
-        if len(matching_models) > 1:
-            vendor = str(matching_models[0].get("vendor", "")).lower()
-            if vendor == VendorType.OPENAI:
-                responses_matches = [
-                    m
-                    for m in matching_models
-                    if m.get("apiFlavor") in (ApiFlavor.RESPONSES, ByomApiFlavor.OPENAI_RESPONSES)
-                ]
-                if responses_matches:
-                    matching_models = responses_matches
-
         if not matching_models:
             raise ValueError(
                 f"Model {model_name} not found. "
diff --git a/tests/core/clients/litellm/test_integration.py b/tests/core/clients/litellm/test_integration.py
@@ -21,17 +21,29 @@
 
 @pytest.fixture
 def openai_gpt4o_client(client_settings: UiPathBaseSettings) -> UiPathLiteLLM:
-    return UiPathLiteLLM(model_name="gpt-4o-2024-11-20", client_settings=client_settings)
+    return UiPathLiteLLM(
+        model_name="gpt-4o-2024-11-20",
+        client_settings=client_settings,
+        api_flavor=ApiFlavor.CHAT_COMPLETIONS,
+    )
 
 
 @pytest.fixture
 def openai_client(client_settings: UiPathBaseSettings) -> UiPathLiteLLM:
-    return UiPathLiteLLM(model_name="gpt-5.2-2025-12-11", client_settings=client_settings)
+    return UiPathLiteLLM(
+        model_name="gpt-5.2-2025-12-11",
+        client_settings=client_settings,
+        api_flavor=ApiFlavor.CHAT_COMPLETIONS,
+    )
 
 
 @pytest.fixture
 def openai_gpt54_client(client_settings: UiPathBaseSettings) -> UiPathLiteLLM:
-    return UiPathLiteLLM(model_name="gpt-5.4-2026-03-05", client_settings=client_settings)
+    return UiPathLiteLLM(
+        model_name="gpt-5.4-2026-03-05",
+        client_settings=client_settings,
+        api_flavor=ApiFlavor.CHAT_COMPLETIONS,
+    )
 
 
 @pytest.fixture
diff --git a/tests/core/clients/litellm/test_unit.py b/tests/core/clients/litellm/test_unit.py
@@ -148,9 +148,10 @@ def _make_client(self, model_data: dict, **kwargs) -> UiPathLiteLLM:
             **kwargs,
         )
 
-    def test_openai_model_name_unchanged(self):
+    def test_openai_model_name_has_responses_prefix(self):
+        """OpenAI defaults to responses, so the litellm model carries the responses/ prefix."""
         client = self._make_client(_OPENAI_MODEL)
-        assert client._litellm_model == "gpt-5.2-2025-12-11"
+        assert client._litellm_model == "responses/gpt-5.2-2025-12-11"
 
     def test_gemini_model_name_unchanged(self):
         client = self._make_client(_GEMINI_MODEL)
@@ -187,9 +188,9 @@ def _make_client(self, model_data: dict, **kwargs) -> UiPathLiteLLM:
             **kwargs,
         )
 
-    def test_openai_defaults_to_chat_completions(self):
+    def test_openai_defaults_to_responses(self):
         client = self._make_client(_OPENAI_MODEL)
-        assert client._api_config.api_flavor == ApiFlavor.CHAT_COMPLETIONS
+        assert client._api_config.api_flavor == ApiFlavor.RESPONSES
         assert client._api_config.vendor_type == "openai"
         assert client._api_config.routing_mode == RoutingMode.PASSTHROUGH
 
diff --git a/tests/core/features/settings/test_llmgateway.py b/tests/core/features/settings/test_llmgateway.py
@@ -619,73 +619,3 @@ def test_skips_validate_byo_model_for_uipath_owned(self, llmgw_env_vars):
         with patch.object(settings, "validate_byo_model") as mock_validate:
             settings.get_model_info("claude-3-opus")
             mock_validate.assert_not_called()
-
-    def test_prefers_responses_when_both_openai_flavors_available(self, llmgw_env_vars):
-        """When OpenAI discovery returns both chat-completions and responses entries,
-        get_model_info returns the responses one."""
-        models = [
-            {
-                "modelName": "custom-gpt",
-                "vendor": "OpenAi",
-                "apiFlavor": "OpenAiChatCompletions",
-                "modelSubscriptionType": "BYO",
-                "byomDetails": {
-                    "integrationServiceConnectionId": "conn-1",
-                    "availableOperationCodes": ["op1"],
-                },
-            },
-            {
-                "modelName": "custom-gpt",
-                "vendor": "OpenAi",
-                "apiFlavor": "OpenAiResponses",
-                "modelSubscriptionType": "BYO",
-                "byomDetails": {
-                    "integrationServiceConnectionId": "conn-1",
-                    "availableOperationCodes": ["op1"],
-                },
-            },
-        ]
-        settings = self._make_settings(llmgw_env_vars, models=models)
-        info = settings.get_model_info("custom-gpt", byo_connection_id="conn-1")
-        assert info["apiFlavor"] == "OpenAiResponses"
-
-    def test_prefers_responses_with_plain_apiflavor_strings(self, llmgw_env_vars):
-        """Tie-break also recognises the routing-form apiFlavor values."""
-        models = [
-            {
-                "modelName": "gpt-x",
-                "vendor": "OpenAi",
-                "apiFlavor": "chat-completions",
-                "modelSubscriptionType": "UiPathOwned",
-            },
-            {
-                "modelName": "gpt-x",
-                "vendor": "OpenAi",
-                "apiFlavor": "responses",
-                "modelSubscriptionType": "UiPathOwned",
-            },
-        ]
-        settings = self._make_settings(llmgw_env_vars, models=models)
-        info = settings.get_model_info("gpt-x")
-        assert info["apiFlavor"] == "responses"
-
-    def test_no_responses_preference_for_non_openai(self, llmgw_env_vars):
-        """The responses preference should not fire for non-OpenAI vendors."""
-        models = [
-            {
-                "modelName": "claude-x",
-                "vendor": "Anthropic",
-                "apiFlavor": "anthropic-claude",
-                "modelSubscriptionType": "UiPathOwned",
-            },
-            {
-                "modelName": "claude-x",
-                "vendor": "Anthropic",
-                "apiFlavor": "converse",
-                "modelSubscriptionType": "UiPathOwned",
-            },
-        ]
-        settings = self._make_settings(llmgw_env_vars, models=models)
-        info = settings.get_model_info("claude-x")
-        # First entry wins (no preference logic for Anthropic)
-        assert info["apiFlavor"] == "anthropic-claude"
diff --git a/tests/langchain/clients/litellm/conftest.py b/tests/langchain/clients/litellm/conftest.py
@@ -17,7 +17,7 @@
 from uipath.llm_client.settings.constants import ApiFlavor
 
 OPENAI_CONFIGS = [
-    {"model_class": UiPathChatLiteLLM},
+    {"model_class": UiPathChatLiteLLM, "model_kwargs": {"api_flavor": ApiFlavor.CHAT_COMPLETIONS}},
 ]
 
 OPENAI_RESPONSES_CONFIGS = [

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`from uipath.llm_client.settings.constants import ApiFlavor`
`18`	`18`
`19`	`19`	`OPENAI_CONFIGS = [`
`20`		`- {"model_class": UiPathChatLiteLLM},`
	`20`	`+ {"model_class": UiPathChatLiteLLM, "model_kwargs": {"api_flavor": ApiFlavor.CHAT_COMPLETIONS}},`
`21`	`21`	`]`
`22`	`22`
`23`	`23`	`OPENAI_RESPONSES_CONFIGS = [`