dify-plugin-sdks/python/tests/interfaces/model/test_wrap_think.py at e7d9e0adce17c854e13694e025c977bd1afaaf3e · langgenius/dify-plugin-sdks · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import unittest

from dify_plugin.entities.model import AIModelEntity, ModelPropertyKey, ModelType
from dify_plugin.entities.model.llm import LLMMode, LLMResult
from dify_plugin.interfaces.model.large_language_model import LargeLanguageModel


class MockLLM(LargeLanguageModel):
    """
    Concrete Mock class for testing non-abstract methods of LargeLanguageModel.
    """

    def _invoke(
        self,
        model: str,
        credentials: dict,
        prompt_messages: list,
        model_parameters: dict,
        tools: list,
        stop: list,
        stream: bool,
        user: str,
    ) -> LLMResult:
        pass

    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list, tools: list) -> int:
        return 0

    def validate_credentials(self, model: str, credentials: dict) -> None:
        pass

    @property
    def _invoke_error_mapping(self) -> dict:
        return {}


class TestWrapThinking(unittest.TestCase):
    def setUp(self):
        # Create a dummy model schema to satisfy AIModel.__init__
        dummy_schema = AIModelEntity(
            model="mock_model",
            label={"en_US": "Mock Model"},
            model_type=ModelType.LLM,
            features=[],
            model_properties={ModelPropertyKey.MODE: LLMMode.CHAT.value, ModelPropertyKey.CONTEXT_SIZE: 4096},
            parameter_rules=[],
            pricing=None,
            deprecated=False,
        )
        self.llm = MockLLM(model_schemas=[dummy_schema])

    def test_wrap_thinking_logic_closure(self):
        """
        Test that when reasoning_content ends, even if content is empty (e.g. followed immediately by tool_calls),
        the <think> tag should be closed correctly.
        """

        # Simulate simulated streaming data:
        # 1. Has reasoning_content
        # 2. reasoning_content ends, followed immediately by tool_calls (content is None)

        chunks = [
            # Chunk 1: Thinking started
            {"reasoning_content": "Thinking started.", "content": ""},
            # Chunk 2: Still thinking #1
            {"reasoning_content": " Still thinking #1.", "content": ""},
            # Chunk 3: Still thinking (reasoning_content=Empty)
            {"reasoning_content": "", "content": ""},
            # Chunk 4: Still thinking #2
            {"reasoning_content": " Still thinking #2.", "content": ""},
            # Chunk 5: Thinking ended, transitioned to Tool Call (reasoning_content=None, content=None/Empty)
            # This is a critical point, old logic would fail here because content is empty
            {"reasoning_content": None, "content": "", "tool_calls": [{"id": "call_1", "function": {}}]},
            # Chunk 6: Subsequent tool parameter stream
            {"reasoning_content": None, "content": "", "tool_calls": [{"function": {"arguments": "{"}}]},
        ]

        # Use the "new logic" from PR for testing.
        # We can directly call self.llm._wrap_thinking_by_reasoning_content.

        # Assume we are testing the logic function itself:
        is_reasoning = False
        full_output = ""

        for chunk in chunks:
            # Directly call the implementation in SDK to verify real code logic
            output, is_reasoning = self.llm._wrap_thinking_by_reasoning_content(chunk, is_reasoning)
            full_output += output

        # Verify results
        print(f"DEBUG Output: {full_output!r}")

        expected_output = "<think>\nThinking started. Still thinking #1. Still thinking #2.\n</think>"
        self.assertEqual(full_output, expected_output)

    def test_standard_reasoning_flow(self):
        """Test standard reasoning -> text flow"""
        chunks = [
            {"reasoning_content": "Thinking.", "content": ""},
            {"reasoning_content": None, "content": "Hello world."},
        ]

        is_reasoning = False
        full_output = ""
        for chunk in chunks:
            # Directly call the implementation in SDK
            output, is_reasoning = self.llm._wrap_thinking_by_reasoning_content(chunk, is_reasoning)
            full_output += output

        assert full_output == "<think>\nThinking.\n</think>Hello world."