Skip to content

Commit 093c83a

Browse files
Keyi Liclaude
andcommitted
[Bugfix] KimiK2ReasoningParser: guard against buffered end-token text in streaming
When stop sequences set output_text_buffer_length > 0, token IDs arrive in delta_token_ids before their text is flushed into delta_text. Without a guard, find() returns -1 and the reasoning/content split is silently corrupted. Add text-presence checks before both find() calls in extract_reasoning_streaming: - </think> end token path (line 215) - <|tool_calls_section_begin|> section start path (line 223) Return None (wait for flush) when the token ID is present but the text is not, matching the fix pattern from PR vllm-project#39044 (BaseThinkingReasoningParser / DeepSeekR1) and PR vllm-project#40352 (Step3ReasoningParser). Fixes vllm-project#41067 Co-authored-by: Claude <noreply@anthropic.com> Signed-off-by: Keyi Li <likey6688@gmail.com>
1 parent 0ff1bf9 commit 093c83a

2 files changed

Lines changed: 70 additions & 0 deletions

File tree

tests/reasoning/test_kimi_k2_reasoning_parser.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from unittest.mock import MagicMock
5+
46
import pytest
57

68
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
@@ -12,6 +14,20 @@
1214
REASONING_MODEL_NAME = "moonshotai/Kimi-K2.5"
1315

1416

17+
@pytest.fixture
18+
def mock_kimi_k2_tokenizer():
19+
tokenizer = MagicMock()
20+
tokenizer.get_vocab.return_value = {
21+
"<think>": 100,
22+
"</think>": 101,
23+
"<|tool_calls_section_begin|>": 200,
24+
"<|tool_calls_section_end|>": 201,
25+
"<|tool_call_begin|>": 202,
26+
"<|tool_call_end|>": 203,
27+
}
28+
return tokenizer
29+
30+
1531
@pytest.fixture(scope="module")
1632
def kimi_k2_tokenizer():
1733
return get_tokenizer(tokenizer_name=REASONING_MODEL_NAME, trust_remote_code=True)
@@ -153,3 +169,50 @@ def test_streaming_tool_section_ends_reasoning(kimi_k2_tokenizer):
153169
)
154170
assert isinstance(result, DeltaMessage)
155171
assert result.content == "<|tool_calls_section_begin|>"
172+
173+
174+
def test_streaming_end_token_id_buffered(mock_kimi_k2_tokenizer):
175+
"""When stop sequences buffer text, </think> ID arrives before its text.
176+
177+
The token ID is present in delta_token_ids but the actual string is not
178+
yet in delta_text (still buffered). The parser must return None to wait
179+
for the next delta, instead of calling find() which returns -1 and
180+
silently corrupting the text split.
181+
"""
182+
parser = KimiK2ReasoningParser(mock_kimi_k2_tokenizer)
183+
think_id = parser._start_token_id
184+
end_think_id = parser._end_token_id
185+
186+
# Simulate: </think> ID arrived but text not yet flushed.
187+
# Two token IDs in delta to bypass the single-special-token guard.
188+
result = parser.extract_reasoning_streaming(
189+
previous_text="some reasoning",
190+
current_text="some reasoning extra",
191+
delta_text="extra", # </think> text not yet flushed
192+
previous_token_ids=[think_id],
193+
current_token_ids=[think_id, end_think_id, 999],
194+
delta_token_ids=[end_think_id, 999],
195+
)
196+
assert result is None
197+
198+
199+
def test_streaming_tool_section_id_buffered(mock_kimi_k2_tokenizer):
200+
"""When stop sequences buffer text, tool section start ID arrives before its text.
201+
202+
Same buffering scenario as above but for <|tool_calls_section_begin|>.
203+
Without the guard, find() returns -1 and delta_text[:tool_index] silently
204+
drops the last character of reasoning.
205+
"""
206+
parser = KimiK2ReasoningParser(mock_kimi_k2_tokenizer)
207+
think_id = parser._start_token_id
208+
tool_begin_id = parser._tool_section_start_token_id
209+
210+
result = parser.extract_reasoning_streaming(
211+
previous_text="some reasoning",
212+
current_text="some reasoning extra",
213+
delta_text="extra", # tool section text not yet flushed
214+
previous_token_ids=[think_id],
215+
current_token_ids=[think_id, tool_begin_id, 999],
216+
delta_token_ids=[tool_begin_id, 999],
217+
)
218+
assert result is None

vllm/reasoning/kimi_k2_reasoning_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ def extract_reasoning_streaming(
221221
return None
222222

223223
if self._end_token_id in delta_token_ids:
224+
if self._end_token not in delta_text:
225+
# Token ID arrived before text was flushed (stop-sequence buffering).
226+
# Wait for the next delta when the text becomes visible.
227+
return None
224228
end_index = delta_text.find(self._end_token)
225229
reasoning = delta_text[:end_index]
226230
content = delta_text[end_index + len(self._end_token) :]
@@ -229,6 +233,9 @@ def extract_reasoning_streaming(
229233
)
230234

231235
if self._tool_section_start_token_id in delta_token_ids:
236+
if self._tool_section_start_token not in delta_text:
237+
# Token ID arrived before text was flushed (stop-sequence buffering).
238+
return None
232239
tool_index = delta_text.find(self._tool_section_start_token)
233240
reasoning = delta_text[:tool_index]
234241
content = delta_text[tool_index:]

0 commit comments

Comments
 (0)