Skip to content

Commit 38d337c

Browse files
authored
fix(DATAGO-130049): workflow node agents hallucinate tool name instead of using inline artifact block (#1551)
1 parent f81b086 commit 38d337c

4 files changed

Lines changed: 192 additions & 3 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<p align="center">
2-
<img src="./docs/static/img/logo.png" alt="Solace Agent Mesh Logo" width="100"/>
2+
<img src="https://raw.githubusercontent.com/SolaceLabs/solace-agent-mesh/refs/heads/main/docs/static/img/logo.png" alt="Solace Agent Mesh Logo" width="100"/>
33
</p>
44
<h2 align="center">
55
Solace Agent Mesh
@@ -220,5 +220,5 @@ pytest
220220
---
221221

222222
<h3 align="center">
223-
<img src="./docs/static/img/solace-logo-text.svg" alt="Solace Agent Mesh Logo" width="100"/>
223+
<img src="https://raw.githubusercontent.com/SolaceLabs/solace-agent-mesh/refs/heads/main/docs/static/img/logo.png" alt="Solace Agent Mesh Logo" width="100"/>
224224
</h3>

src/solace_agent_mesh/agent/sac/structured_invocation/handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ def _generate_workflow_instructions(
775775
If validation fails, you will be asked to retry with error feedback.
776776
777777
IMPORTANT NOTES:
778-
- Use the save_artifact tool OR inline fenced blocks to create the output artifact
778+
- Create the output artifact using the inline fenced block syntax («««save_artifact: filename="..." mime_type="..."»»»), NOT a tool call
779779
- The result embed («result:artifact=...») is MANDATORY - the invocation will fail without it
780780
- The artifact format (JSON, YAML, etc.) must be parseable
781781
- Additional fields beyond the schema are allowed, but all required fields must be present
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Regression test for issue #1261:
2+
#
3+
# _generate_workflow_instructions (handler.py) must not reference "save_artifact"
4+
# as a tool name. save_artifact is not a registered tool — it is the keyword
5+
# used inside the inline fenced block syntax («««save_artifact:...»»»).
6+
#
7+
# When a workflow node receives a StructuredInvocationRequest with an
8+
# output_schema, the handler injects instructions into the LLM prompt. The
9+
# injected text previously said "Use the save_artifact tool", causing LLMs to
10+
# hallucinate tool names ($RESULT_TOOL, $ARTIFACT_TOOL etc.) and trigger an
11+
# unnecessary retry round-trip.
12+
#
13+
# The correct artifact creation mechanism is the inline fenced block syntax, NOT a tool call.
14+
#
15+
# This test verifies:
16+
# 1. append_to_artifact IS present in the tool list (it is a real registered tool)
17+
# 2. save_artifact is NOT present as a tool (it is not a registered tool)
18+
# 3. The workflow completes in a single round-trip per node (no retry)
19+
20+
test_case_id: "workflow_correct_artifact_tool_in_prompt_001"
21+
description: "Regression guard for issue #1261: workflow node prompt must not reference save_artifact as a tool; inline fenced block syntax is the correct artifact creation mechanism."
22+
tags: ["all", "workflows", "validation"]
23+
skip_intermediate_events: true
24+
expected_completion_timeout_seconds: 20
25+
26+
test_runner_config_overrides:
27+
agent_config:
28+
artifact_scope: "namespace"
29+
30+
gateway_input:
31+
target_agent_name: "StructuredTestWorkflow"
32+
user_identity: "issue1261_test_user@example.com"
33+
external_context:
34+
a2a_session_id: "issue1261_test_session"
35+
parts:
36+
- type: "data"
37+
data:
38+
customer_name: "Alice"
39+
order_id: "ORD-9999"
40+
amount: 42
41+
42+
llm_interactions:
43+
# Turn 1 — TestPeerAgentA receives structured invocation with output_schema_override.
44+
# This is the turn where the injected prompt previously said "Use the save_artifact tool".
45+
# Assert the corrected tool list: append_to_artifact present, save_artifact absent.
46+
- static_response:
47+
choices:
48+
- message:
49+
role: "assistant"
50+
content: |
51+
«««save_artifact: filename="validate_output.json" mime_type="application/json" description="Validated order"
52+
{"customer_name": "Alice", "order_id": "ORD-9999", "amount": 42, "status": "validated"}
53+
»»»
54+
expected_request:
55+
tools_present:
56+
- "append_to_artifact"
57+
- "load_artifact"
58+
tools_absent:
59+
- "save_artifact"
60+
61+
# Turn 2 — artifact saved, emit mandatory result embed (no retry needed).
62+
- static_response:
63+
choices:
64+
- message:
65+
role: "assistant"
66+
content: "Order validated. «result:artifact=validate_output.json:v0 status=success»"
67+
expected_request:
68+
expected_tool_responses_in_llm_messages:
69+
- tool_name: "_notify_artifact_save"
70+
response_json_matches:
71+
filename: "validate_output.json"
72+
status: "success"
73+
74+
# Turn 3 — TestPeerAgentB receives structured invocation with output_schema_override.
75+
# Same assertion: append_to_artifact present, save_artifact absent.
76+
- static_response:
77+
choices:
78+
- message:
79+
role: "assistant"
80+
content: |
81+
«««save_artifact: filename="process_output.json" mime_type="application/json" description="Processed order"
82+
{"customer_name": "Alice", "order_id": "ORD-9999", "amount": 42, "status": "validated", "processed": true}
83+
»»»
84+
expected_request:
85+
tools_present:
86+
- "append_to_artifact"
87+
- "load_artifact"
88+
tools_absent:
89+
- "save_artifact"
90+
91+
# Turn 4 — artifact saved, emit result embed.
92+
- static_response:
93+
choices:
94+
- message:
95+
role: "assistant"
96+
content: "Order processed. «result:artifact=process_output.json:v0 status=success»"
97+
expected_request:
98+
expected_tool_responses_in_llm_messages:
99+
- tool_name: "_notify_artifact_save"
100+
response_json_matches:
101+
filename: "process_output.json"
102+
status: "success"
103+
104+
expected_gateway_output:
105+
- type: "final_response"
106+
task_state: "completed"
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""
2+
Regression tests for _generate_workflow_instructions — ensures the prompt injected
3+
into workflow node agents does not reference non-existent or wrong tool names,
4+
which would cause LLM hallucinations (issue #1261).
5+
"""
6+
7+
from unittest.mock import MagicMock
8+
9+
from src.solace_agent_mesh.agent.sac.structured_invocation.handler import (
10+
StructuredInvocationHandler,
11+
)
12+
from src.solace_agent_mesh.common.data_parts import StructuredInvocationRequest
13+
14+
15+
OUTPUT_SCHEMA = {
16+
"type": "object",
17+
"properties": {"doubled_value": {"type": "number"}},
18+
"required": ["doubled_value"],
19+
}
20+
21+
22+
def _make_handler() -> StructuredInvocationHandler:
23+
mock_component = MagicMock()
24+
mock_component.get_config.return_value = None
25+
return StructuredInvocationHandler(mock_component)
26+
27+
28+
def _make_request(**kwargs) -> StructuredInvocationRequest:
29+
defaults = {"workflow_name": "TestWorkflow", "node_id": "test_node"}
30+
return StructuredInvocationRequest(**{**defaults, **kwargs})
31+
32+
33+
class TestWorkflowInstructionsToolNames:
34+
"""Issue #1261: instructions must not reference non-existent tool names."""
35+
36+
def test_output_schema_branch_does_not_say_save_artifact_tool(self):
37+
# "save_artifact tool" was the hallucination trigger — the string
38+
# is fine as part of the fenced block syntax, but must not appear
39+
# as a tool name reference.
40+
instructions = _make_handler()._generate_workflow_instructions(
41+
_make_request(), OUTPUT_SCHEMA
42+
)
43+
assert "save_artifact tool" not in instructions
44+
45+
def test_no_output_schema_branch_does_not_say_save_artifact_tool(self):
46+
instructions = _make_handler()._generate_workflow_instructions(
47+
_make_request(), None
48+
)
49+
assert "save_artifact tool" not in instructions
50+
51+
def test_output_schema_branch_mentions_inline_fenced_block_syntax(self):
52+
# The correct mechanism for creating artifacts is the «««save_artifact:...»»»
53+
# inline fenced block, not a tool call.
54+
instructions = _make_handler()._generate_workflow_instructions(
55+
_make_request(), OUTPUT_SCHEMA
56+
)
57+
assert "save_artifact:" in instructions
58+
59+
def test_suggested_filename_branch_does_not_say_save_artifact_tool(self):
60+
instructions = _make_handler()._generate_workflow_instructions(
61+
_make_request(suggested_output_filename="output.json"), OUTPUT_SCHEMA
62+
)
63+
assert "save_artifact tool" not in instructions
64+
65+
def test_output_schema_is_serialized_into_instructions(self):
66+
instructions = _make_handler()._generate_workflow_instructions(
67+
_make_request(), OUTPUT_SCHEMA
68+
)
69+
assert "doubled_value" in instructions
70+
71+
def test_result_embed_syntax_is_present(self):
72+
instructions = _make_handler()._generate_workflow_instructions(
73+
_make_request(), OUTPUT_SCHEMA
74+
)
75+
assert "result:artifact=" in instructions
76+
77+
def test_not_a_tool_call_is_explicit(self):
78+
# The instruction must clarify the fenced block is NOT a tool call,
79+
# preventing the LLM from inventing a function name.
80+
instructions = _make_handler()._generate_workflow_instructions(
81+
_make_request(), OUTPUT_SCHEMA
82+
)
83+
assert "NOT a tool call" in instructions

0 commit comments

Comments
 (0)