fix: resolve regression on thought process display for ReAct Agent LLM

thepatrickchin · thepatrickchin · commit 4953c85f5d9b · 2026-03-10T16:45:00.000+08:00
Thought process events were not displaying because LangchainProfilerHandler
was not being invoked during LLM calls. Passing the handler via ainvoke()
config was bypassed by _runnable_config, which was built once at graph
construction time with no callbacks.

Replace the stored _runnable_config attribute with _make_runnable_config(),
which instantiates callback classes fresh on each LLM/tool call. Pass
LangchainProfilerHandler as a class reference to the graph constructor so
each invocation gets an isolated handler, also fixing a memory leak and
concurrency issues from the previously shared instance.

Signed-off-by: Patrick Chin &lt;8509935+thepatrickchin@users.noreply.github.com&gt;
diff --git a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/agent/base.py b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/agent/base.py
@@ -18,6 +18,7 @@
 import logging
 from abc import ABC
 from abc import abstractmethod
+from collections.abc import Callable
 from enum import Enum
 from typing import Any
 
@@ -71,7 +72,7 @@ class BaseAgent(ABC):
     def __init__(self,
                  llm: BaseChatModel,
                  tools: list[BaseTool],
-                 callbacks: list[AsyncCallbackHandler] | None = None,
+                 callbacks: list[Callable[[], AsyncCallbackHandler]] | None = None,
                  detailed_logs: bool = False,
                  log_response_max_chars: int = 1000) -> None:
         logger.debug("Initializing Agent Graph")
@@ -81,8 +82,14 @@ def __init__(self,
         self.detailed_logs = detailed_logs
         self.log_response_max_chars = log_response_max_chars
         self.graph = None
-        self._runnable_config = RunnableConfig(callbacks=self.callbacks,
-                                               configurable={"__pregel_runtime": DEFAULT_RUNTIME})
+
+    @property
+    def _runnable_config(self) -> RunnableConfig:
+        return self._make_runnable_config()
+
+    def _make_runnable_config(self) -> RunnableConfig:
+        return RunnableConfig(callbacks=[c() for c in self.callbacks],
+                              configurable={"__pregel_runtime": DEFAULT_RUNTIME})
 
     async def _stream_llm(self, runnable: Any, inputs: dict[str, Any]) -> AIMessage:
         """
@@ -102,7 +109,7 @@ async def _stream_llm(self, runnable: Any, inputs: dict[str, Any]) -> AIMessage:
         """
         content_parts = []
         reasoning_parts = []
-        async for event in runnable.astream(inputs, config=self._runnable_config):
+        async for event in runnable.astream(inputs, config=self._make_runnable_config()):
             content_parts.append(event.content)
             extra = getattr(event, 'additional_kwargs', None)
             if isinstance(extra, dict):
@@ -132,7 +139,7 @@ async def _call_llm(self, llm: Runnable, inputs: dict[str, Any]) -> AIMessage:
         AIMessage
             The LLM response
         """
-        response = await llm.ainvoke(inputs, config=self._runnable_config)
+        response = await llm.ainvoke(inputs, config=self._make_runnable_config())
         return AIMessage(content=str(response.content))
 
     async def _call_tool(self, tool: BaseTool, tool_input: dict[str, Any] | str, max_retries: int = 3) -> ToolMessage:
@@ -157,7 +164,7 @@ async def _call_tool(self, tool: BaseTool, tool_input: dict[str, Any] | str, max
 
         for attempt in range(1, max_retries + 1):
             try:
-                response = await tool.ainvoke(tool_input, config=self._runnable_config)
+                response = await tool.ainvoke(tool_input, config=self._make_runnable_config())
 
                 # Handle empty responses
                 if response is None or (isinstance(response, str) and response == ""):
diff --git a/packages/nvidia_nat_langchain/src/nat/plugins/langchain/agent/react_agent/register.py b/packages/nvidia_nat_langchain/src/nat/plugins/langchain/agent/react_agent/register.py
@@ -117,7 +117,7 @@ async def react_agent_workflow(config: ReActAgentWorkflowConfig, builder: Builde
         llm=llm,
         prompt=prompt,
         tools=tools,
-        callbacks=[],
+        callbacks=[LangchainProfilerHandler],
         use_tool_schema=config.include_tool_input_schema_in_tool_description,
         detailed_logs=config.verbose,
         log_response_max_chars=config.log_response_max_chars,
@@ -154,12 +154,8 @@ async def _response_fn(chat_request_or_message: ChatRequestOrMessage) -> ChatRes
 
             state = ReActGraphState(messages=messages)
 
-            # run the ReAct Agent Graph with a new callback handler instance per request
-            state = await graph.ainvoke(state,
-                                        config={
-                                            'recursion_limit': (config.max_tool_calls + 1) * 2,
-                                            'callbacks': [LangchainProfilerHandler()]
-                                        })
+            # run the ReAct Agent Graph
+            state = await graph.ainvoke(state, config={'recursion_limit': (config.max_tool_calls + 1) * 2})
             # setting recursion_limit: 4 allows 1 tool call
             #   - allows the ReAct Agent to perform 1 cycle / call 1 single tool,
             #   - but stops the agent when it tries to call a tool a second time
diff --git a/packages/nvidia_nat_langchain/tests/agent/test_base.py b/packages/nvidia_nat_langchain/tests/agent/test_base.py
@@ -22,7 +22,6 @@
 from langchain_core.messages import AIMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.messages import ToolMessage
-from langchain_core.runnables import RunnableConfig
 from langgraph.graph.state import CompiledStateGraph
 
 from nat.plugins.langchain.agent.base import BaseAgent
@@ -40,7 +39,7 @@ def __init__(self, detailed_logs=True, log_response_max_chars=1000):
         self.callbacks = []
         self.detailed_logs = detailed_logs
         self.log_response_max_chars = log_response_max_chars
-        self._runnable_config = RunnableConfig()
+        self.graph = None
 
     async def _build_graph(self, state_schema: type) -> CompiledStateGraph:
         """Mock implementation."""