fixes

cosminacho · cosminacho · commit b2fdda7ccecc · 2026-03-21T23:03:17.000+02:00
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/base_client.py b/packages/uipath_langchain_client/src/uipath_langchain_client/base_client.py
@@ -30,6 +30,10 @@
 from typing import Any, Literal
 
 from httpx import URL, Response
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
 from langchain_core.embeddings import Embeddings
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import BaseMessage
@@ -322,72 +326,128 @@ class UiPathBaseChatModel(UiPathBaseLLMClient, BaseChatModel):
     from the ContextVar (populated by the httpx client's send()) and inject them into
     the AIMessage's response_metadata under the 'uipath_llmgateway_headers' key.
 
+    Dynamic request headers are injected via UiPathDynamicHeadersCallback: set
+    ``run_inline = True`` (already the default) so LangChain calls
+    ``on_chat_model_start`` in the same coroutine as ``_agenerate``, ensuring the
+    ContextVar is visible when ``httpx.send()`` fires.
+
     Passthrough clients that delegate to vendor SDKs should inherit from this class
     so that headers are captured transparently.
     """
 
     def _generate(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
         set_captured_response_headers({})
         try:
-            result = super()._generate(messages, *args, **kwargs)
+            result = self._uipath_generate(messages, stop=stop, run_manager=run_manager, **kwargs)
             self._inject_gateway_headers(result.generations)
             return result
         finally:
             set_captured_response_headers({})
 
+    def _uipath_generate(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Override in subclasses to provide the core (non-wrapped) generate logic."""
+        return super()._generate(messages, stop=stop, run_manager=run_manager, **kwargs)
+
     async def _agenerate(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
         set_captured_response_headers({})
         try:
-            result = await super()._agenerate(messages, *args, **kwargs)
+            result = await self._uipath_agenerate(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
             self._inject_gateway_headers(result.generations)
             return result
         finally:
             set_captured_response_headers({})
 
+    async def _uipath_agenerate(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Override in subclasses to provide the core (non-wrapped) async generate logic."""
+        return await super()._agenerate(messages, stop=stop, run_manager=run_manager, **kwargs)
+
     def _stream(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
         set_captured_response_headers({})
         try:
             first = True
-            for chunk in super()._stream(messages, *args, **kwargs):
+            for chunk in self._uipath_stream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            ):
                 if first:
                     self._inject_gateway_headers([chunk])
                     first = False
                 yield chunk
         finally:
             set_captured_response_headers({})
 
+    def _uipath_stream(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        """Override in subclasses to provide the core (non-wrapped) stream logic."""
+        yield from super()._stream(messages, stop=stop, run_manager=run_manager, **kwargs)
+
     async def _astream(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> AsyncIterator[ChatGenerationChunk]:
         set_captured_response_headers({})
         try:
             first = True
-            async for chunk in super()._astream(messages, *args, **kwargs):
+            async for chunk in self._uipath_astream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            ):
                 if first:
                     self._inject_gateway_headers([chunk])
                     first = False
                 yield chunk
         finally:
             set_captured_response_headers({})
 
+    async def _uipath_astream(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        """Override in subclasses to provide the core (non-wrapped) async stream logic."""
+        async for chunk in super()._astream(messages, stop=stop, run_manager=run_manager, **kwargs):
+            yield chunk
+
     def _inject_gateway_headers(self, generations: Sequence[ChatGeneration]) -> None:
         """Inject captured gateway headers into each generation's response_metadata."""
         if not self.captured_headers:
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/callbacks.py b/packages/uipath_langchain_client/src/uipath_langchain_client/callbacks.py
@@ -2,23 +2,19 @@
 
 from abc import abstractmethod
 from typing import Any
-from uuid import UUID
 
 from langchain_core.callbacks import BaseCallbackHandler
-from langchain_core.messages import BaseMessage
 
-from uipath.llm_client.utils.headers import (
-    set_dynamic_request_headers,
-)
+from uipath.llm_client.utils.headers import set_dynamic_request_headers
 
 
 class UiPathDynamicHeadersCallback(BaseCallbackHandler):
     """Base callback for injecting dynamic headers into each LLM gateway request.
 
     Extend this class and implement ``get_headers()`` to return the headers to
-    inject. The headers are stored in a ContextVar before each LLM call and read
-    by the httpx client's ``send()`` method, so they flow transparently through
-    the call stack regardless of which vendor SDK is in use.
+    inject. ``run_inline = True`` ensures ``on_chat_model_start`` is called
+    directly in the caller's coroutine (not via ``asyncio.gather``), so the
+    ContextVar mutation is visible when ``httpx.send()`` fires.
 
     Example (OTEL trace propagation)::
 
@@ -34,6 +30,8 @@ def get_headers(self) -> dict[str, str]:
         response = chat.invoke("Hello!", config={"callbacks": [OtelHeadersCallback()]})
     """
 
+    run_inline: bool = True  # dispatch in the caller's coroutine, not via asyncio.gather
+
     @abstractmethod
     def get_headers(self) -> dict[str, str]:
         """Return headers to inject into the next LLM gateway request."""
@@ -42,9 +40,7 @@ def get_headers(self) -> dict[str, str]:
     def on_chat_model_start(
         self,
         serialized: dict[str, Any],
-        messages: list[list[BaseMessage]],
-        *,
-        run_id: UUID,
+        messages: list[list[Any]],
         **kwargs: Any,
     ) -> None:
         set_dynamic_request_headers(self.get_headers())
@@ -53,14 +49,12 @@ def on_llm_start(
         self,
         serialized: dict[str, Any],
         prompts: list[str],
-        *,
-        run_id: UUID,
         **kwargs: Any,
     ) -> None:
         set_dynamic_request_headers(self.get_headers())
 
-    def on_llm_end(self, response: Any, *, run_id: UUID, **kwargs: Any) -> None:
+    def on_llm_end(self, response: Any, **kwargs: Any) -> None:
         set_dynamic_request_headers({})
 
-    def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
+    def on_llm_error(self, error: BaseException, **kwargs: Any) -> None:
         set_dynamic_request_headers({})
diff --git a/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py b/packages/uipath_langchain_client/src/uipath_langchain_client/clients/normalized/chat_models.py
@@ -56,10 +56,6 @@
 )
 from pydantic import Field
 
-from uipath.llm_client.utils.headers import (
-    extract_matching_headers,
-    set_captured_response_headers,
-)
 from uipath_langchain_client.base_client import UiPathBaseChatModel
 from uipath_langchain_client.settings import ApiType, RoutingMode, UiPathAPIConfig
 
@@ -311,39 +307,27 @@ def _postprocess_response(self, response: dict[str, Any]) -> ChatResult:
             llm_output=llm_output,
         )
 
-    def _generate(
+    def _uipath_generate(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        request_body = self._preprocess_request(messages, **kwargs)
+        request_body = self._preprocess_request(messages, stop=stop, **kwargs)
         response = self.uipath_request(request_body=request_body, raise_status_error=True)
-        result = self._postprocess_response(response.json())
-        if self.captured_headers:
-            captured = extract_matching_headers(response.headers, self.captured_headers)
-            if captured:
-                for gen in result.generations:
-                    gen.message.response_metadata["uipath_llmgateway_headers"] = captured
-        return result
-
-    async def _agenerate(
+        return self._postprocess_response(response.json())
+
+    async def _uipath_agenerate(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
         run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> ChatResult:
-        request_body = self._preprocess_request(messages, **kwargs)
+        request_body = self._preprocess_request(messages, stop=stop, **kwargs)
         response = await self.uipath_arequest(request_body=request_body, raise_status_error=True)
-        result = self._postprocess_response(response.json())
-        if self.captured_headers:
-            captured = extract_matching_headers(response.headers, self.captured_headers)
-            if captured:
-                for gen in result.generations:
-                    gen.message.response_metadata["uipath_llmgateway_headers"] = captured
-        return result
+        return self._postprocess_response(response.json())
 
     def _generate_chunk(
         self, original_message: str, json_data: dict[str, Any]
@@ -402,64 +386,46 @@ def _generate_chunk(
             ),
         )
 
-    def _stream(
+    def _uipath_stream(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
         run_manager: CallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
-        request_body = self._preprocess_request(messages, **kwargs)
-        set_captured_response_headers({})
-        try:
-            first = True
-            for chunk in self.uipath_stream(
-                request_body=request_body, stream_type="lines", raise_status_error=True
-            ):
-                chunk = str(chunk)
-                if chunk.startswith("data:"):
-                    chunk = chunk.split("data:")[1].strip()
-                try:
-                    json_data = json.loads(chunk)
-                except json.JSONDecodeError:
-                    continue
-                if "id" in json_data and not json_data["id"]:
-                    continue
-                gen_chunk = self._generate_chunk(chunk, json_data)
-                if first:
-                    self._inject_gateway_headers([gen_chunk])
-                    first = False
-                yield gen_chunk
-        finally:
-            set_captured_response_headers({})
-
-    async def _astream(
+        request_body = self._preprocess_request(messages, stop=stop, **kwargs)
+        for chunk in self.uipath_stream(
+            request_body=request_body, stream_type="lines", raise_status_error=True
+        ):
+            chunk = str(chunk)
+            if chunk.startswith("data:"):
+                chunk = chunk.split("data:")[1].strip()
+            try:
+                json_data = json.loads(chunk)
+            except json.JSONDecodeError:
+                continue
+            if "id" in json_data and not json_data["id"]:
+                continue
+            yield self._generate_chunk(chunk, json_data)
+
+    async def _uipath_astream(
         self,
         messages: list[BaseMessage],
-        *args: Any,
+        stop: list[str] | None = None,
         run_manager: AsyncCallbackManagerForLLMRun | None = None,
         **kwargs: Any,
     ) -> AsyncIterator[ChatGenerationChunk]:
-        request_body = self._preprocess_request(messages, **kwargs)
-        set_captured_response_headers({})
-        try:
-            first = True
-            async for chunk in self.uipath_astream(
-                request_body=request_body, stream_type="lines", raise_status_error=True
-            ):
-                chunk = str(chunk)
-                if chunk.startswith("data:"):
-                    chunk = chunk.split("data:")[1].strip()
-                try:
-                    json_data = json.loads(chunk)
-                except json.JSONDecodeError:
-                    continue
-                if "id" in json_data and not json_data["id"]:
-                    continue
-                gen_chunk = self._generate_chunk(chunk, json_data)
-                if first:
-                    self._inject_gateway_headers([gen_chunk])
-                    first = False
-                yield gen_chunk
-        finally:
-            set_captured_response_headers({})
+        request_body = self._preprocess_request(messages, stop=stop, **kwargs)
+        async for chunk in self.uipath_astream(
+            request_body=request_body, stream_type="lines", raise_status_error=True
+        ):
+            chunk = str(chunk)
+            if chunk.startswith("data:"):
+                chunk = chunk.split("data:")[1].strip()
+            try:
+                json_data = json.loads(chunk)
+            except json.JSONDecodeError:
+                continue
+            if "id" in json_data and not json_data["id"]:
+                continue
+            yield self._generate_chunk(chunk, json_data)
diff --git a/tests/langchain/test_dynamic_headers.py b/tests/langchain/test_dynamic_headers.py