Skip to content

Commit e9805d3

Browse files
CreatmanCEOclaude
andcommitted
fix(chat): agentic tool-calling loop for multi-step analyses
Previously SSE flow did ONE tool batch + ONE follow-up without tools=, preventing LLM from calling additional tools after seeing first result. Symptom: chat truncated mid-stream with "Now let me retrieve..." for analyses that need >1 sequential tool (e.g. query_wells then compute_drawdown_grid). Fix: agentic loop bounded by MAX_ITERATIONS=6. Each iteration calls LLM WITH tools=TOOL_DEFINITIONS, parses tool_calls, executes, appends results to messages, repeats. Loop exits when LLM returns plain text (no tool_calls). Verified: depression cone analysis triggers 6 tool calls (query_wells + analyze_interference + 4× compute_drawdown_grid) and streams full 4k-char analytical response. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9159d5b commit e9805d3

1 file changed

Lines changed: 69 additions & 57 deletions

File tree

backend/main.py

Lines changed: 69 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -282,42 +282,50 @@ async def generate() -> AsyncIterator[str]:
282282
{"role": "user", "content": request.message},
283283
]
284284

285+
# Agentic loop: keep calling LLM with tools until it returns plain text
286+
# (no more tool_calls). Bounded by MAX_ITERATIONS to prevent runaway loops.
287+
MAX_ITERATIONS = 6
288+
global_call_idx = 0
289+
285290
try:
286-
response = await llm_router.acompletion(
287-
model=model_pool,
288-
messages=messages,
289-
tools=TOOL_DEFINITIONS,
290-
stream=True,
291-
temperature=settings.llm_temperature,
292-
)
293-
294-
collected_content = ""
295-
tool_calls_buffer = []
296-
297-
async for chunk in response:
298-
delta = chunk.choices[0].delta if chunk.choices else None
299-
if delta is None:
300-
continue
301-
302-
# Stream text content
303-
if delta.content:
304-
collected_content += delta.content
305-
yield f"data: {json.dumps({'type': 'token', 'content': delta.content})}\n\n"
306-
307-
# Collect tool calls (arguments arrive in chunks during streaming)
308-
if delta.tool_calls:
309-
for tc in delta.tool_calls:
310-
if tc.index is not None:
311-
while len(tool_calls_buffer) <= tc.index:
312-
tool_calls_buffer.append({"name": "", "arguments": ""})
313-
if tc.function and tc.function.name:
314-
tool_calls_buffer[tc.index]["name"] = tc.function.name
315-
if tc.function and tc.function.arguments:
316-
tool_calls_buffer[tc.index]["arguments"] += tc.function.arguments
317-
318-
# 7. Execute ALL tool calls, then ONE follow-up
319-
if tool_calls_buffer:
320-
# Parse all tool call arguments
291+
for iteration in range(MAX_ITERATIONS):
292+
response = await llm_router.acompletion(
293+
model=model_pool,
294+
messages=messages,
295+
tools=TOOL_DEFINITIONS,
296+
stream=True,
297+
temperature=settings.llm_temperature,
298+
)
299+
300+
collected_content = ""
301+
tool_calls_buffer = []
302+
303+
async for chunk in response:
304+
delta = chunk.choices[0].delta if chunk.choices else None
305+
if delta is None:
306+
continue
307+
308+
# Stream text content
309+
if delta.content:
310+
collected_content += delta.content
311+
yield f"data: {json.dumps({'type': 'token', 'content': delta.content})}\n\n"
312+
313+
# Collect tool calls (arguments arrive in chunks during streaming)
314+
if delta.tool_calls:
315+
for tc in delta.tool_calls:
316+
if tc.index is not None:
317+
while len(tool_calls_buffer) <= tc.index:
318+
tool_calls_buffer.append({"name": "", "arguments": ""})
319+
if tc.function and tc.function.name:
320+
tool_calls_buffer[tc.index]["name"] = tc.function.name
321+
if tc.function and tc.function.arguments:
322+
tool_calls_buffer[tc.index]["arguments"] += tc.function.arguments
323+
324+
# Loop exit: LLM returned plain text without tool calls
325+
if not tool_calls_buffer:
326+
break
327+
328+
# Parse tool call arguments
321329
parsed_calls = []
322330
for tc in tool_calls_buffer:
323331
try:
@@ -326,33 +334,37 @@ async def generate() -> AsyncIterator[str]:
326334
args = {}
327335
parsed_calls.append((tc, args))
328336

329-
# Build single assistant message with all tool calls
330-
assistant_tool_calls = [
331-
{"id": f"call_{tc['name']}_{i}", "type": "function",
332-
"function": {"name": tc["name"], "arguments": json.dumps(args)}}
333-
for i, (tc, args) in enumerate(parsed_calls)
334-
]
335-
messages.append({"role": "assistant", "content": collected_content or None, "tool_calls": assistant_tool_calls})
337+
# Build assistant message with this iteration's tool calls
338+
assistant_tool_calls = []
339+
for tc, args in parsed_calls:
340+
call_id = f"call_{tc['name']}_{global_call_idx}"
341+
global_call_idx += 1
342+
assistant_tool_calls.append({
343+
"id": call_id,
344+
"type": "function",
345+
"function": {"name": tc["name"], "arguments": json.dumps(args)},
346+
})
347+
messages.append({
348+
"role": "assistant",
349+
"content": collected_content or None,
350+
"tool_calls": assistant_tool_calls,
351+
})
336352

337353
# Execute all tools and add results
338-
for i, (tc, args) in enumerate(parsed_calls):
354+
for atc, (tc, args) in zip(assistant_tool_calls, parsed_calls):
339355
yield f"data: {json.dumps({'type': 'tool_call', 'tool': tc['name'], 'args': args})}\n\n"
340356
tool_result = tool_executor.execute(tc["name"], args)
341357
yield f"data: {json.dumps({'type': 'tool_result', 'tool': tc['name'], 'success': tool_result.success, 'result': tool_result.result})}\n\n"
342-
messages.append({"role": "tool", "tool_call_id": f"call_{tc['name']}_{i}", "content": json.dumps(tool_result.result)})
343-
344-
# ONE follow-up call with all tool results
345-
# No tools param — force text response, prevent LLM from attempting more tool calls as text
346-
followup = await llm_router.acompletion(
347-
model=model_pool,
348-
messages=messages,
349-
stream=True,
350-
temperature=settings.llm_temperature,
351-
)
352-
async for chunk in followup:
353-
delta = chunk.choices[0].delta if chunk.choices else None
354-
if delta and delta.content:
355-
yield f"data: {json.dumps({'type': 'token', 'content': delta.content})}\n\n"
358+
messages.append({
359+
"role": "tool",
360+
"tool_call_id": atc["id"],
361+
"content": json.dumps(tool_result.result),
362+
})
363+
# Continue loop — next iteration LLM will see all tool results
364+
365+
else:
366+
# Loop completed all MAX_ITERATIONS without LLM returning plain text
367+
yield f"data: {json.dumps({'type': 'token', 'content': f'\\n\\n[reached max tool-call iterations={MAX_ITERATIONS}]'})}\n\n"
356368

357369
except Exception as e:
358370
yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"

0 commit comments

Comments
 (0)