fix(DATAGO-130049): workflow node agents hallucinate tool name instead of using inline artifact block (#1551)

cyrus2281 · web-flow · commit 38d337c275c8 · 2026-05-27T16:17:16.000Z
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-  <img src="./docs/static/img/logo.png" alt="Solace Agent Mesh Logo" width="100"/>
+  <img src="https://raw.githubusercontent.com/SolaceLabs/solace-agent-mesh/refs/heads/main/docs/static/img/logo.png" alt="Solace Agent Mesh Logo" width="100"/>
 </p>
 <h2 align="center">
   Solace Agent Mesh
@@ -220,5 +220,5 @@ pytest
 ---
 
 <h3 align="center">
-  <img src="./docs/static/img/solace-logo-text.svg" alt="Solace Agent Mesh Logo" width="100"/>
+  <img src="https://raw.githubusercontent.com/SolaceLabs/solace-agent-mesh/refs/heads/main/docs/static/img/logo.png" alt="Solace Agent Mesh Logo" width="100"/>
 </h3>
diff --git a/src/solace_agent_mesh/agent/sac/structured_invocation/handler.py b/src/solace_agent_mesh/agent/sac/structured_invocation/handler.py
@@ -775,7 +775,7 @@ def _generate_workflow_instructions(
    If validation fails, you will be asked to retry with error feedback.
 
 IMPORTANT NOTES:
-- Use the save_artifact tool OR inline fenced blocks to create the output artifact
+- Create the output artifact using the inline fenced block syntax («««save_artifact: filename="..." mime_type="..."»»»), NOT a tool call
 - The result embed («result:artifact=...») is MANDATORY - the invocation will fail without it
 - The artifact format (JSON, YAML, etc.) must be parseable
 - Additional fields beyond the schema are allowed, but all required fields must be present
diff --git a/tests/integration/scenarios_declarative/test_data/workflows/test_workflow_correct_artifact_tool_in_prompt.yaml b/tests/integration/scenarios_declarative/test_data/workflows/test_workflow_correct_artifact_tool_in_prompt.yaml
@@ -0,0 +1,106 @@
+# Regression test for issue #1261:
+#
+# _generate_workflow_instructions (handler.py) must not reference "save_artifact"
+# as a tool name. save_artifact is not a registered tool — it is the keyword
+# used inside the inline fenced block syntax («««save_artifact:...»»»).
+#
+# When a workflow node receives a StructuredInvocationRequest with an
+# output_schema, the handler injects instructions into the LLM prompt. The
+# injected text previously said "Use the save_artifact tool", causing LLMs to
+# hallucinate tool names ($RESULT_TOOL, $ARTIFACT_TOOL etc.) and trigger an
+# unnecessary retry round-trip.
+#
+# The correct artifact creation mechanism is the inline fenced block syntax, NOT a tool call.
+#
+# This test verifies:
+#   1. append_to_artifact IS present in the tool list (it is a real registered tool)
+#   2. save_artifact is NOT present as a tool (it is not a registered tool)
+#   3. The workflow completes in a single round-trip per node (no retry)
+
+test_case_id: "workflow_correct_artifact_tool_in_prompt_001"
+description: "Regression guard for issue #1261: workflow node prompt must not reference save_artifact as a tool; inline fenced block syntax is the correct artifact creation mechanism."
+tags: ["all", "workflows", "validation"]
+skip_intermediate_events: true
+expected_completion_timeout_seconds: 20
+
+test_runner_config_overrides:
+  agent_config:
+    artifact_scope: "namespace"
+
+gateway_input:
+  target_agent_name: "StructuredTestWorkflow"
+  user_identity: "issue1261_test_user@example.com"
+  external_context:
+    a2a_session_id: "issue1261_test_session"
+  parts:
+    - type: "data"
+      data:
+        customer_name: "Alice"
+        order_id: "ORD-9999"
+        amount: 42
+
+llm_interactions:
+  # Turn 1 — TestPeerAgentA receives structured invocation with output_schema_override.
+  # This is the turn where the injected prompt previously said "Use the save_artifact tool".
+  # Assert the corrected tool list: append_to_artifact present, save_artifact absent.
+  - static_response:
+      choices:
+        - message:
+            role: "assistant"
+            content: |
+              «««save_artifact: filename="validate_output.json" mime_type="application/json" description="Validated order"
+              {"customer_name": "Alice", "order_id": "ORD-9999", "amount": 42, "status": "validated"}
+              »»»
+    expected_request:
+      tools_present:
+        - "append_to_artifact"
+        - "load_artifact"
+      tools_absent:
+        - "save_artifact"
+
+  # Turn 2 — artifact saved, emit mandatory result embed (no retry needed).
+  - static_response:
+      choices:
+        - message:
+            role: "assistant"
+            content: "Order validated. «result:artifact=validate_output.json:v0 status=success»"
+    expected_request:
+      expected_tool_responses_in_llm_messages:
+        - tool_name: "_notify_artifact_save"
+          response_json_matches:
+            filename: "validate_output.json"
+            status: "success"
+
+  # Turn 3 — TestPeerAgentB receives structured invocation with output_schema_override.
+  # Same assertion: append_to_artifact present, save_artifact absent.
+  - static_response:
+      choices:
+        - message:
+            role: "assistant"
+            content: |
+              «««save_artifact: filename="process_output.json" mime_type="application/json" description="Processed order"
+              {"customer_name": "Alice", "order_id": "ORD-9999", "amount": 42, "status": "validated", "processed": true}
+              »»»
+    expected_request:
+      tools_present:
+        - "append_to_artifact"
+        - "load_artifact"
+      tools_absent:
+        - "save_artifact"
+
+  # Turn 4 — artifact saved, emit result embed.
+  - static_response:
+      choices:
+        - message:
+            role: "assistant"
+            content: "Order processed. «result:artifact=process_output.json:v0 status=success»"
+    expected_request:
+      expected_tool_responses_in_llm_messages:
+        - tool_name: "_notify_artifact_save"
+          response_json_matches:
+            filename: "process_output.json"
+            status: "success"
+
+expected_gateway_output:
+  - type: "final_response"
+    task_state: "completed"
diff --git a/tests/unit/agent/sac/test_workflow_instructions.py b/tests/unit/agent/sac/test_workflow_instructions.py
@@ -0,0 +1,83 @@
+"""
+Regression tests for _generate_workflow_instructions — ensures the prompt injected
+into workflow node agents does not reference non-existent or wrong tool names,
+which would cause LLM hallucinations (issue #1261).
+"""
+
+from unittest.mock import MagicMock
+
+from src.solace_agent_mesh.agent.sac.structured_invocation.handler import (
+    StructuredInvocationHandler,
+)
+from src.solace_agent_mesh.common.data_parts import StructuredInvocationRequest
+
+
+OUTPUT_SCHEMA = {
+    "type": "object",
+    "properties": {"doubled_value": {"type": "number"}},
+    "required": ["doubled_value"],
+}
+
+
+def _make_handler() -> StructuredInvocationHandler:
+    mock_component = MagicMock()
+    mock_component.get_config.return_value = None
+    return StructuredInvocationHandler(mock_component)
+
+
+def _make_request(**kwargs) -> StructuredInvocationRequest:
+    defaults = {"workflow_name": "TestWorkflow", "node_id": "test_node"}
+    return StructuredInvocationRequest(**{**defaults, **kwargs})
+
+
+class TestWorkflowInstructionsToolNames:
+    """Issue #1261: instructions must not reference non-existent tool names."""
+
+    def test_output_schema_branch_does_not_say_save_artifact_tool(self):
+        # "save_artifact tool" was the hallucination trigger — the string
+        # is fine as part of the fenced block syntax, but must not appear
+        # as a tool name reference.
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(), OUTPUT_SCHEMA
+        )
+        assert "save_artifact tool" not in instructions
+
+    def test_no_output_schema_branch_does_not_say_save_artifact_tool(self):
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(), None
+        )
+        assert "save_artifact tool" not in instructions
+
+    def test_output_schema_branch_mentions_inline_fenced_block_syntax(self):
+        # The correct mechanism for creating artifacts is the «««save_artifact:...»»»
+        # inline fenced block, not a tool call.
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(), OUTPUT_SCHEMA
+        )
+        assert "save_artifact:" in instructions
+
+    def test_suggested_filename_branch_does_not_say_save_artifact_tool(self):
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(suggested_output_filename="output.json"), OUTPUT_SCHEMA
+        )
+        assert "save_artifact tool" not in instructions
+
+    def test_output_schema_is_serialized_into_instructions(self):
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(), OUTPUT_SCHEMA
+        )
+        assert "doubled_value" in instructions
+
+    def test_result_embed_syntax_is_present(self):
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(), OUTPUT_SCHEMA
+        )
+        assert "result:artifact=" in instructions
+
+    def test_not_a_tool_call_is_explicit(self):
+        # The instruction must clarify the fenced block is NOT a tool call,
+        # preventing the LLM from inventing a function name.
+        instructions = _make_handler()._generate_workflow_instructions(
+            _make_request(), OUTPUT_SCHEMA
+        )
+        assert "NOT a tool call" in instructions