|
| 1 | +""" |
| 2 | +Regression tests for _generate_workflow_instructions — ensures the prompt injected |
| 3 | +into workflow node agents does not reference non-existent or wrong tool names, |
| 4 | +which would cause LLM hallucinations (issue #1261). |
| 5 | +""" |
| 6 | + |
| 7 | +from unittest.mock import MagicMock |
| 8 | + |
| 9 | +from src.solace_agent_mesh.agent.sac.structured_invocation.handler import ( |
| 10 | + StructuredInvocationHandler, |
| 11 | +) |
| 12 | +from src.solace_agent_mesh.common.data_parts import StructuredInvocationRequest |
| 13 | + |
| 14 | + |
| 15 | +OUTPUT_SCHEMA = { |
| 16 | + "type": "object", |
| 17 | + "properties": {"doubled_value": {"type": "number"}}, |
| 18 | + "required": ["doubled_value"], |
| 19 | +} |
| 20 | + |
| 21 | + |
| 22 | +def _make_handler() -> StructuredInvocationHandler: |
| 23 | + mock_component = MagicMock() |
| 24 | + mock_component.get_config.return_value = None |
| 25 | + return StructuredInvocationHandler(mock_component) |
| 26 | + |
| 27 | + |
| 28 | +def _make_request(**kwargs) -> StructuredInvocationRequest: |
| 29 | + defaults = {"workflow_name": "TestWorkflow", "node_id": "test_node"} |
| 30 | + return StructuredInvocationRequest(**{**defaults, **kwargs}) |
| 31 | + |
| 32 | + |
| 33 | +class TestWorkflowInstructionsToolNames: |
| 34 | + """Issue #1261: instructions must not reference non-existent tool names.""" |
| 35 | + |
| 36 | + def test_output_schema_branch_does_not_say_save_artifact_tool(self): |
| 37 | + # "save_artifact tool" was the hallucination trigger — the string |
| 38 | + # is fine as part of the fenced block syntax, but must not appear |
| 39 | + # as a tool name reference. |
| 40 | + instructions = _make_handler()._generate_workflow_instructions( |
| 41 | + _make_request(), OUTPUT_SCHEMA |
| 42 | + ) |
| 43 | + assert "save_artifact tool" not in instructions |
| 44 | + |
| 45 | + def test_no_output_schema_branch_does_not_say_save_artifact_tool(self): |
| 46 | + instructions = _make_handler()._generate_workflow_instructions( |
| 47 | + _make_request(), None |
| 48 | + ) |
| 49 | + assert "save_artifact tool" not in instructions |
| 50 | + |
| 51 | + def test_output_schema_branch_mentions_inline_fenced_block_syntax(self): |
| 52 | + # The correct mechanism for creating artifacts is the «««save_artifact:...»»» |
| 53 | + # inline fenced block, not a tool call. |
| 54 | + instructions = _make_handler()._generate_workflow_instructions( |
| 55 | + _make_request(), OUTPUT_SCHEMA |
| 56 | + ) |
| 57 | + assert "save_artifact:" in instructions |
| 58 | + |
| 59 | + def test_suggested_filename_branch_does_not_say_save_artifact_tool(self): |
| 60 | + instructions = _make_handler()._generate_workflow_instructions( |
| 61 | + _make_request(suggested_output_filename="output.json"), OUTPUT_SCHEMA |
| 62 | + ) |
| 63 | + assert "save_artifact tool" not in instructions |
| 64 | + |
| 65 | + def test_output_schema_is_serialized_into_instructions(self): |
| 66 | + instructions = _make_handler()._generate_workflow_instructions( |
| 67 | + _make_request(), OUTPUT_SCHEMA |
| 68 | + ) |
| 69 | + assert "doubled_value" in instructions |
| 70 | + |
| 71 | + def test_result_embed_syntax_is_present(self): |
| 72 | + instructions = _make_handler()._generate_workflow_instructions( |
| 73 | + _make_request(), OUTPUT_SCHEMA |
| 74 | + ) |
| 75 | + assert "result:artifact=" in instructions |
| 76 | + |
| 77 | + def test_not_a_tool_call_is_explicit(self): |
| 78 | + # The instruction must clarify the fenced block is NOT a tool call, |
| 79 | + # preventing the LLM from inventing a function name. |
| 80 | + instructions = _make_handler()._generate_workflow_instructions( |
| 81 | + _make_request(), OUTPUT_SCHEMA |
| 82 | + ) |
| 83 | + assert "NOT a tool call" in instructions |
0 commit comments