Skip to content

Commit af73962

Browse files
fix(ollama): preserve tool call IDs to fix repeated same-tool calls
1 parent d83a037 commit af73962

2 files changed

Lines changed: 101 additions & 24 deletions

File tree

integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,10 @@ def _convert_ollama_response_to_chatmessage(ollama_response: ChatResponse) -> Ch
165165
tool_calls: list[ToolCall] = []
166166

167167
if ollama_tool_calls := ollama_message.get("tool_calls"):
168-
for ollama_tc in ollama_tool_calls:
168+
for idx, ollama_tc in enumerate(ollama_tool_calls):
169169
tool_calls.append(
170170
ToolCall(
171+
id=ollama_tc.get("id") or f"call_{idx}",
171172
tool_name=ollama_tc["function"]["name"],
172173
arguments=ollama_tc["function"]["arguments"],
173174
)
@@ -208,6 +209,7 @@ def _build_chunk(
208209
tool_calls_list.append(
209210
ToolCallDelta(
210211
index=tool_call_index,
212+
id=tool_call.get("id") or f"call_{tool_call_index}",
211213
tool_name=tool_call["function"]["name"],
212214
arguments=json.dumps(tool_call["function"]["arguments"])
213215
if tool_call["function"]["arguments"]
@@ -400,28 +402,14 @@ def _handle_streaming_response(
400402

401403
if chunk.tool_calls:
402404
for tool_call in chunk.tool_calls:
403-
# the Ollama server doesn't guarantee an id field in every tool_calls entry.
404-
# OpenAI-compatible endpoint (/v1/chat/completions) - recent releases do add an auto-generated id
405-
# when the model produces multiple tool calls, so that clients can map results back.
406-
# Native Ollama endpoint (/api/chat) and older builds
407-
# - the JSON often contains only function.name + arguments;
408-
# many users have reported that id is missing even with several calls,
409-
# making client-side resolution harder:
410-
# https://github.com/ollama/ollama/issues/6708
411-
# https://github.com/ollama/ollama/issues/7510
412-
# - If id is provided → we can distinguish multiple calls to the same tool.
413-
414-
# - If id is missing → fallback to function.name works only when there's one call.
415-
# - That's why the deduplication logic is cautious and assumes one logical
416-
# call per name when id is absent.
405+
# id is always set by _build_chunk (either from the server or synthetic "call_N").
406+
# Fall back to tool_name only as a last resort for callers that bypass _build_chunk.
417407
tool_call_id = tool_call.id or tool_call.tool_name or ""
418408
args = tool_call.arguments or ""
419409

420-
# Remember first-seen order and tool name
421410
if tool_call_id not in id_order:
422411
id_order.append(tool_call_id)
423412
name_by_id[tool_call_id] = tool_call.tool_name or ""
424-
# Update the argument accumulator for this tool_call_id.
425413
arg_by_id[tool_call_id] = args
426414

427415
if callback:
@@ -437,7 +425,9 @@ def _handle_streaming_response(
437425
tool_calls = []
438426
for tool_call_id in id_order:
439427
arguments: str = arg_by_id.get(tool_call_id, "")
440-
tool_calls.append(ToolCall(tool_name=name_by_id[tool_call_id], arguments=json.loads(arguments)))
428+
tool_calls.append(
429+
ToolCall(id=tool_call_id, tool_name=name_by_id[tool_call_id], arguments=json.loads(arguments))
430+
)
441431

442432
# We can't use _convert_streaming_chunks_to_chat_message because
443433
# we need to map tool_call name and args by order.
@@ -497,11 +487,9 @@ async def _handle_streaming_response_async(
497487
tool_call_id = tool_call.id or tool_call.tool_name or ""
498488
args = tool_call.arguments or ""
499489

500-
# Remember first-seen order and tool name
501490
if tool_call_id not in id_order:
502491
id_order.append(tool_call_id)
503492
name_by_id[tool_call_id] = tool_call.tool_name or ""
504-
# Update the argument accumulator for this tool_call_id
505493
arg_by_id[tool_call_id] = args
506494

507495
if callback is not None:
@@ -519,7 +507,9 @@ async def _handle_streaming_response_async(
519507
tool_calls = []
520508
for tool_call_id in id_order:
521509
arguments: str = arg_by_id.get(tool_call_id, "")
522-
tool_calls.append(ToolCall(tool_name=name_by_id[tool_call_id], arguments=json.loads(arguments)))
510+
tool_calls.append(
511+
ToolCall(id=tool_call_id, tool_name=name_by_id[tool_call_id], arguments=json.loads(arguments))
512+
)
523513

524514
# We can't use _convert_streaming_chunks_to_chat_message because
525515
# we need to map tool_call name and args by order.

integrations/ollama/tests/test_chat_generator.py

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,38 @@ def test_convert_ollama_response_to_chatmessage_with_tools(self):
190190
assert observed.role == "assistant"
191191
assert observed.text is None
192192
assert observed.tool_call == ToolCall(
193+
id="call_0",
193194
tool_name="get_current_weather",
194195
arguments={"format": "celsius", "location": "Paris, FR"},
195196
)
196197

198+
def test_convert_ollama_response_to_chatmessage_with_repeated_tool(self):
199+
ollama_response = ChatResponse(
200+
model="some_model",
201+
created_at="2023-12-12T14:13:43.416799Z",
202+
message={
203+
"role": "assistant",
204+
"content": "",
205+
"tool_calls": [
206+
{"function": {"name": "weather", "arguments": {"city": "Paris"}}},
207+
{"function": {"name": "weather", "arguments": {"city": "London"}}},
208+
],
209+
},
210+
done=True,
211+
total_duration=5191566416,
212+
load_duration=2154458,
213+
prompt_eval_count=26,
214+
prompt_eval_duration=383809000,
215+
eval_count=298,
216+
eval_duration=4799921000,
217+
)
218+
219+
observed = _convert_ollama_response_to_chatmessage(ollama_response)
220+
221+
assert len(observed.tool_calls) == 2
222+
assert observed.tool_calls[0] == ToolCall(id="call_0", tool_name="weather", arguments={"city": "Paris"})
223+
assert observed.tool_calls[1] == ToolCall(id="call_1", tool_name="weather", arguments={"city": "London"})
224+
197225
def test_build_chunk(self):
198226
generator = OllamaChatGenerator()
199227

@@ -386,8 +414,10 @@ def test_callback(chunk: StreamingChunk):
386414
assert result["replies"][0].text is None
387415
assert result["replies"][0].tool_calls[0].tool_name == "calculator"
388416
assert result["replies"][0].tool_calls[0].arguments == {"expression": "7 * (4 + 2)"}
417+
assert result["replies"][0].tool_calls[0].id == "call_1"
389418
assert result["replies"][0].tool_calls[1].tool_name == "factorial"
390419
assert result["replies"][0].tool_calls[1].arguments == {"n": 5}
420+
assert result["replies"][0].tool_calls[1].id == "call_2"
391421
assert result["replies"][0].meta["finish_reason"] == "stop"
392422
assert result["replies"][0].meta["model"] == "qwen3:0.6b"
393423

@@ -400,7 +430,7 @@ def test_callback(chunk: StreamingChunk):
400430
expected = {
401431
"index": 1,
402432
"arguments": '{"expression": "7 * (4 + 2)"}',
403-
"id": None,
433+
"id": "call_1",
404434
"tool_name": "calculator",
405435
}
406436
# We add extra to the expected dict if it exists in the result for comparison
@@ -413,7 +443,7 @@ def test_callback(chunk: StreamingChunk):
413443
"index": 2,
414444
"tool_name": "factorial",
415445
"arguments": '{"n": 5}',
416-
"id": None,
446+
"id": "call_2",
417447
}
418448
# We add extra to the expected dict if it exists in the result for comparison
419449
# This was added in PR https://github.com/deepset-ai/haystack/pull/10018 and released in Haystack 2.20.0
@@ -422,6 +452,62 @@ def test_callback(chunk: StreamingChunk):
422452
assert streaming_chunks[1].tool_calls[0].to_dict() == expected
423453
assert len(streaming_chunks[2].tool_calls) == 0
424454

455+
def test_handle_streaming_response_repeated_tool_calls(self):
456+
ollama_chunks = [
457+
ChatResponse(
458+
model="qwen3:0.6b",
459+
created_at="2025-07-31T14:48:03.471292Z",
460+
done=False,
461+
message=Message(
462+
role="assistant",
463+
content="",
464+
tool_calls=[
465+
Message.ToolCall(
466+
function=Message.ToolCall.Function(name="weather", arguments={"city": "Paris"})
467+
)
468+
],
469+
),
470+
),
471+
ChatResponse(
472+
model="qwen3:0.6b",
473+
created_at="2025-07-31T14:48:03.660179Z",
474+
done=False,
475+
message=Message(
476+
role="assistant",
477+
content="",
478+
tool_calls=[
479+
Message.ToolCall(
480+
function=Message.ToolCall.Function(name="weather", arguments={"city": "London"})
481+
)
482+
],
483+
),
484+
),
485+
ChatResponse(
486+
model="qwen3:0.6b",
487+
created_at="2025-07-31T14:48:03.678729Z",
488+
done=True,
489+
done_reason="stop",
490+
total_duration=774786292,
491+
load_duration=43608375,
492+
prompt_eval_count=217,
493+
prompt_eval_duration=312974541,
494+
eval_count=46,
495+
eval_duration=417069750,
496+
message=Message(role="assistant", content=""),
497+
),
498+
]
499+
500+
generator = OllamaChatGenerator()
501+
result = generator._handle_streaming_response(ollama_chunks, None)
502+
503+
assert len(result["replies"][0].tool_calls) == 2
504+
assert result["replies"][0].tool_calls[0].tool_name == "weather"
505+
assert result["replies"][0].tool_calls[0].arguments == {"city": "Paris"}
506+
assert result["replies"][0].tool_calls[0].id == "call_1"
507+
assert result["replies"][0].tool_calls[1].tool_name == "weather"
508+
assert result["replies"][0].tool_calls[1].arguments == {"city": "London"}
509+
assert result["replies"][0].tool_calls[1].id == "call_2"
510+
425511
def test_handle_streaming_response_tool_calls_with_thinking(self):
426512
ollama_chunks = [
427513
ChatResponse(
@@ -536,6 +622,7 @@ def test_callback(chunk: StreamingChunk):
536622
assert result["replies"][0].text is None
537623
assert result["replies"][0].tool_calls[0].tool_name == "add_two_numbers"
538624
assert result["replies"][0].tool_calls[0].arguments == {"a": 2, "b": 2}
625+
assert result["replies"][0].tool_calls[0].id == "call_1"
539626
assert result["replies"][0].reasoning.reasoning_text == "Okay, the user is asking 2 plus 2."
540627
assert result["replies"][0].meta["finish_reason"] == "stop"
541628
assert result["replies"][0].meta["model"] == "qwen3:0.6b"
@@ -564,7 +651,7 @@ def test_callback(chunk: StreamingChunk):
564651
expected = {
565652
"index": 1,
566653
"arguments": '{"a": 2, "b": 2}',
567-
"id": None,
654+
"id": "call_1",
568655
"tool_name": "add_two_numbers",
569656
}
570657
serialized_dict = streaming_chunks[12].tool_calls[0].to_dict()

0 commit comments

Comments
 (0)