@@ -1937,7 +1937,188 @@ def run(self):
19371937
19381938 def start (self , prompt : str , ** kwargs ):
19391939 """Start the agent with a prompt. This is a convenience method that wraps chat()."""
1940- return self .chat (prompt , ** kwargs )
1940+ # Check if streaming is enabled and user wants streaming chunks
1941+ if self .stream and kwargs .get ('stream' , True ):
1942+ return self ._start_stream (prompt , ** kwargs )
1943+ else :
1944+ return self .chat (prompt , ** kwargs )
1945+
1946+ def _start_stream (self , prompt : str , ** kwargs ):
1947+ """Generator method that yields streaming chunks from the agent."""
1948+ # Reset the final display flag for each new conversation
1949+ self ._final_display_shown = False
1950+
1951+ # Search for existing knowledge if any knowledge is provided
1952+ if self .knowledge :
1953+ search_results = self .knowledge .search (prompt , agent_id = self .agent_id )
1954+ if search_results :
1955+ # Check if search_results is a list of dictionaries or strings
1956+ if isinstance (search_results , dict ) and 'results' in search_results :
1957+ # Extract memory content from the results
1958+ knowledge_content = "\n " .join ([result ['memory' ] for result in search_results ['results' ]])
1959+ else :
1960+ # If search_results is a list of strings, join them directly
1961+ knowledge_content = "\n " .join (search_results )
1962+
1963+ # Append found knowledge to the prompt
1964+ prompt = f"{ prompt } \n \n Knowledge: { knowledge_content } "
1965+
1966+ # Get streaming response using the internal streaming method
1967+ for chunk in self ._chat_stream (prompt , ** kwargs ):
1968+ yield chunk
1969+
1970+ def _chat_stream (self , prompt , temperature = 0.2 , tools = None , output_json = None , output_pydantic = None , reasoning_steps = False , ** kwargs ):
1971+ """Internal streaming method that yields chunks from the LLM response."""
1972+
1973+ # Use the same logic as chat() but yield chunks instead of returning final response
1974+ if self ._using_custom_llm :
1975+ # For custom LLM, yield chunks from the LLM instance
1976+ for chunk in self ._custom_llm_stream (prompt , temperature , tools , output_json , output_pydantic , reasoning_steps , ** kwargs ):
1977+ yield chunk
1978+ else :
1979+ # For standard OpenAI client, yield chunks from the streaming response
1980+ for chunk in self ._openai_stream (prompt , temperature , tools , output_json , output_pydantic , reasoning_steps , ** kwargs ):
1981+ yield chunk
1982+
1983+ def _custom_llm_stream (self , prompt , temperature = 0.2 , tools = None , output_json = None , output_pydantic = None , reasoning_steps = False , ** kwargs ):
1984+ """Handle streaming for custom LLM instances."""
1985+ # Store chat history length for potential rollback
1986+ chat_history_length = len (self .chat_history )
1987+
1988+ try :
1989+ # Special handling for MCP tools when using provider/model format
1990+ if tools is None or (isinstance (tools , list ) and len (tools ) == 0 ):
1991+ tool_param = self .tools
1992+ else :
1993+ tool_param = tools
1994+
1995+ # Convert MCP tool objects to OpenAI format if needed
1996+ if tool_param is not None :
1997+ from ..mcp .mcp import MCP
1998+ if isinstance (tool_param , MCP ) and hasattr (tool_param , 'to_openai_tool' ):
1999+ openai_tool = tool_param .to_openai_tool ()
2000+ if openai_tool :
2001+ if isinstance (openai_tool , list ):
2002+ tool_param = openai_tool
2003+ else :
2004+ tool_param = [openai_tool ]
2005+
2006+ # Normalize prompt content for consistent chat history storage
2007+ normalized_content = prompt
2008+ if isinstance (prompt , list ):
2009+ normalized_content = next ((item ["text" ] for item in prompt if item .get ("type" ) == "text" ), "" )
2010+
2011+ # Prevent duplicate messages
2012+ if not (self .chat_history and
2013+ self .chat_history [- 1 ].get ("role" ) == "user" and
2014+ self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2015+ self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2016+
2017+ # Get streaming response from LLM instance
2018+ if hasattr (self .llm_instance , 'get_response_stream' ):
2019+ # Use streaming method if available
2020+ stream_response = self .llm_instance .get_response_stream (
2021+ prompt = prompt ,
2022+ system_prompt = self ._build_system_prompt (tools ),
2023+ chat_history = self .chat_history ,
2024+ temperature = temperature ,
2025+ tools = tool_param ,
2026+ output_json = output_json ,
2027+ output_pydantic = output_pydantic ,
2028+ verbose = self .verbose ,
2029+ markdown = self .markdown ,
2030+ console = self .console ,
2031+ agent_name = self .name ,
2032+ agent_role = self .role ,
2033+ agent_tools = [t .__name__ if hasattr (t , '__name__' ) else str (t ) for t in (tools if tools is not None else self .tools )],
2034+ reasoning_steps = reasoning_steps ,
2035+ execute_tool_fn = self .execute_tool
2036+ )
2037+
2038+ accumulated_response = ""
2039+ for chunk in stream_response :
2040+ accumulated_response += chunk
2041+ yield chunk
2042+
2043+ # Add final response to chat history
2044+ self .chat_history .append ({"role" : "assistant" , "content" : accumulated_response })
2045+
2046+ else :
2047+ # Fallback to regular response if streaming not available
2048+ response_text = self .llm_instance .get_response (
2049+ prompt = prompt ,
2050+ system_prompt = self ._build_system_prompt (tools ),
2051+ chat_history = self .chat_history ,
2052+ temperature = temperature ,
2053+ tools = tool_param ,
2054+ output_json = output_json ,
2055+ output_pydantic = output_pydantic ,
2056+ verbose = self .verbose ,
2057+ markdown = self .markdown ,
2058+ console = self .console ,
2059+ agent_name = self .name ,
2060+ agent_role = self .role ,
2061+ agent_tools = [t .__name__ if hasattr (t , '__name__' ) else str (t ) for t in (tools if tools is not None else self .tools )],
2062+ reasoning_steps = reasoning_steps ,
2063+ execute_tool_fn = self .execute_tool ,
2064+ stream = True
2065+ )
2066+
2067+ self .chat_history .append ({"role" : "assistant" , "content" : response_text })
2068+ # Yield the complete response as a single chunk
2069+ yield response_text
2070+
2071+ except Exception as e :
2072+ # Rollback chat history on error
2073+ self .chat_history = self .chat_history [:chat_history_length ]
2074+ yield f"Error: { str (e )} "
2075+
2076+ def _openai_stream (self , prompt , temperature = 0.2 , tools = None , output_json = None , output_pydantic = None , reasoning_steps = False , ** kwargs ):
2077+ """Handle streaming for standard OpenAI client."""
2078+ # Store chat history length for potential rollback
2079+ chat_history_length = len (self .chat_history )
2080+
2081+ try :
2082+ # Use the new _build_messages helper method
2083+ messages , original_prompt = self ._build_messages (prompt , temperature , output_json , output_pydantic )
2084+
2085+ # Normalize original_prompt for consistent chat history storage
2086+ normalized_content = original_prompt
2087+ if isinstance (original_prompt , list ):
2088+ normalized_content = next ((item ["text" ] for item in original_prompt if item .get ("type" ) == "text" ), "" )
2089+
2090+ # Prevent duplicate messages
2091+ if not (self .chat_history and
2092+ self .chat_history [- 1 ].get ("role" ) == "user" and
2093+ self .chat_history [- 1 ].get ("content" ) == normalized_content ):
2094+ self .chat_history .append ({"role" : "user" , "content" : normalized_content })
2095+
2096+ # Get streaming response from OpenAI client
2097+ if self ._openai_client is None :
2098+ raise ValueError ("OpenAI client is not initialized. Please provide OPENAI_API_KEY or use a custom LLM provider." )
2099+
2100+ # Stream the response using OpenAI client
2101+ accumulated_response = ""
2102+ for chunk in self ._openai_client .chat_completion_with_tools_stream (
2103+ messages = messages ,
2104+ model = self .llm ,
2105+ temperature = temperature ,
2106+ tools = self ._format_tools_for_completion (tools ),
2107+ execute_tool_fn = self .execute_tool ,
2108+ reasoning_steps = reasoning_steps ,
2109+ verbose = self .verbose ,
2110+ max_iterations = 10
2111+ ):
2112+ accumulated_response += chunk
2113+ yield chunk
2114+
2115+ # Add the accumulated response to chat history
2116+ self .chat_history .append ({"role" : "assistant" , "content" : accumulated_response })
2117+
2118+ except Exception as e :
2119+ # Rollback chat history on error
2120+ self .chat_history = self .chat_history [:chat_history_length ]
2121+ yield f"Error: { str (e )} "
19412122
19422123 def execute (self , task , context = None ):
19432124 """Execute a task synchronously - backward compatibility method"""
0 commit comments