@@ -282,42 +282,50 @@ async def generate() -> AsyncIterator[str]:
282282 {"role" : "user" , "content" : request .message },
283283 ]
284284
285+ # Agentic loop: keep calling LLM with tools until it returns plain text
286+ # (no more tool_calls). Bounded by MAX_ITERATIONS to prevent runaway loops.
287+ MAX_ITERATIONS = 6
288+ global_call_idx = 0
289+
285290 try :
286- response = await llm_router .acompletion (
287- model = model_pool ,
288- messages = messages ,
289- tools = TOOL_DEFINITIONS ,
290- stream = True ,
291- temperature = settings .llm_temperature ,
292- )
293-
294- collected_content = ""
295- tool_calls_buffer = []
296-
297- async for chunk in response :
298- delta = chunk .choices [0 ].delta if chunk .choices else None
299- if delta is None :
300- continue
301-
302- # Stream text content
303- if delta .content :
304- collected_content += delta .content
305- yield f"data: { json .dumps ({'type' : 'token' , 'content' : delta .content })} \n \n "
306-
307- # Collect tool calls (arguments arrive in chunks during streaming)
308- if delta .tool_calls :
309- for tc in delta .tool_calls :
310- if tc .index is not None :
311- while len (tool_calls_buffer ) <= tc .index :
312- tool_calls_buffer .append ({"name" : "" , "arguments" : "" })
313- if tc .function and tc .function .name :
314- tool_calls_buffer [tc .index ]["name" ] = tc .function .name
315- if tc .function and tc .function .arguments :
316- tool_calls_buffer [tc .index ]["arguments" ] += tc .function .arguments
317-
318- # 7. Execute ALL tool calls, then ONE follow-up
319- if tool_calls_buffer :
320- # Parse all tool call arguments
291+ for iteration in range (MAX_ITERATIONS ):
292+ response = await llm_router .acompletion (
293+ model = model_pool ,
294+ messages = messages ,
295+ tools = TOOL_DEFINITIONS ,
296+ stream = True ,
297+ temperature = settings .llm_temperature ,
298+ )
299+
300+ collected_content = ""
301+ tool_calls_buffer = []
302+
303+ async for chunk in response :
304+ delta = chunk .choices [0 ].delta if chunk .choices else None
305+ if delta is None :
306+ continue
307+
308+ # Stream text content
309+ if delta .content :
310+ collected_content += delta .content
311+ yield f"data: { json .dumps ({'type' : 'token' , 'content' : delta .content })} \n \n "
312+
313+ # Collect tool calls (arguments arrive in chunks during streaming)
314+ if delta .tool_calls :
315+ for tc in delta .tool_calls :
316+ if tc .index is not None :
317+ while len (tool_calls_buffer ) <= tc .index :
318+ tool_calls_buffer .append ({"name" : "" , "arguments" : "" })
319+ if tc .function and tc .function .name :
320+ tool_calls_buffer [tc .index ]["name" ] = tc .function .name
321+ if tc .function and tc .function .arguments :
322+ tool_calls_buffer [tc .index ]["arguments" ] += tc .function .arguments
323+
324+ # Loop exit: LLM returned plain text without tool calls
325+ if not tool_calls_buffer :
326+ break
327+
328+ # Parse tool call arguments
321329 parsed_calls = []
322330 for tc in tool_calls_buffer :
323331 try :
@@ -326,33 +334,37 @@ async def generate() -> AsyncIterator[str]:
326334 args = {}
327335 parsed_calls .append ((tc , args ))
328336
329- # Build single assistant message with all tool calls
330- assistant_tool_calls = [
331- {"id" : f"call_{ tc ['name' ]} _{ i } " , "type" : "function" ,
332- "function" : {"name" : tc ["name" ], "arguments" : json .dumps (args )}}
333- for i , (tc , args ) in enumerate (parsed_calls )
334- ]
335- messages .append ({"role" : "assistant" , "content" : collected_content or None , "tool_calls" : assistant_tool_calls })
337+ # Build assistant message with this iteration's tool calls
338+ assistant_tool_calls = []
339+ for tc , args in parsed_calls :
340+ call_id = f"call_{ tc ['name' ]} _{ global_call_idx } "
341+ global_call_idx += 1
342+ assistant_tool_calls .append ({
343+ "id" : call_id ,
344+ "type" : "function" ,
345+ "function" : {"name" : tc ["name" ], "arguments" : json .dumps (args )},
346+ })
347+ messages .append ({
348+ "role" : "assistant" ,
349+ "content" : collected_content or None ,
350+ "tool_calls" : assistant_tool_calls ,
351+ })
336352
337353 # Execute all tools and add results
338- for i , (tc , args ) in enumerate ( parsed_calls ):
354+ for atc , (tc , args ) in zip ( assistant_tool_calls , parsed_calls ):
339355 yield f"data: { json .dumps ({'type' : 'tool_call' , 'tool' : tc ['name' ], 'args' : args })} \n \n "
340356 tool_result = tool_executor .execute (tc ["name" ], args )
341357 yield f"data: { json .dumps ({'type' : 'tool_result' , 'tool' : tc ['name' ], 'success' : tool_result .success , 'result' : tool_result .result })} \n \n "
342- messages .append ({"role" : "tool" , "tool_call_id" : f"call_{ tc ['name' ]} _{ i } " , "content" : json .dumps (tool_result .result )})
343-
344- # ONE follow-up call with all tool results
345- # No tools param — force text response, prevent LLM from attempting more tool calls as text
346- followup = await llm_router .acompletion (
347- model = model_pool ,
348- messages = messages ,
349- stream = True ,
350- temperature = settings .llm_temperature ,
351- )
352- async for chunk in followup :
353- delta = chunk .choices [0 ].delta if chunk .choices else None
354- if delta and delta .content :
355- yield f"data: { json .dumps ({'type' : 'token' , 'content' : delta .content })} \n \n "
358+ messages .append ({
359+ "role" : "tool" ,
360+ "tool_call_id" : atc ["id" ],
361+ "content" : json .dumps (tool_result .result ),
362+ })
363+ # Continue loop — next iteration LLM will see all tool results
364+
365+ else :
366+ # Loop completed all MAX_ITERATIONS without LLM returning plain text
367+ yield f"data: { json .dumps ({'type' : 'token' , 'content' : f'\\ n\\ n[reached max tool-call iterations={ MAX_ITERATIONS } ]' })} \n \n "
356368
357369 except Exception as e :
358370 yield f"data: { json .dumps ({'type' : 'error' , 'message' : str (e )})} \n \n "
0 commit comments