Skip to content

Commit 7fab628

Browse files
committed
FIX: don't skip special tokens when enable_thinking is True
When thinking mode is enabled (enable_thinking=True), special tokens are needed for the thinking/reasoning format. Set skip_special_tokens=False in this case to preserve the special tokens.
1 parent a41ad2b commit 7fab628

1 file changed

Lines changed: 24 additions & 0 deletions

File tree

xinference/model/llm/vllm/core.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,30 @@ async def async_generate(
11841184

11851185
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
11861186

1187+
# When enable_thinking is True, don't skip special tokens
1188+
# Check chat_template_kwargs or reasoning_parser for enable_thinking
1189+
enable_thinking = False
1190+
if generate_config:
1191+
chat_template_kwargs = generate_config.get("chat_template_kwargs")
1192+
if chat_template_kwargs:
1193+
if isinstance(chat_template_kwargs, dict):
1194+
enable_thinking = chat_template_kwargs.get("enable_thinking", False)
1195+
elif isinstance(chat_template_kwargs, str):
1196+
try:
1197+
kwargs_dict = json.loads(chat_template_kwargs)
1198+
enable_thinking = kwargs_dict.get("enable_thinking", False)
1199+
except json.JSONDecodeError:
1200+
pass
1201+
elif not enable_thinking and self.reasoning_parser:
1202+
enable_thinking = self.reasoning_parser.enable_thinking
1203+
1204+
if (
1205+
enable_thinking
1206+
and generate_config
1207+
and generate_config.get("skip_special_tokens") is None
1208+
):
1209+
generate_config["skip_special_tokens"] = False
1210+
11871211
sanitized_generate_config = self._sanitize_generate_config(generate_config)
11881212
logger.debug(
11891213
"Enter generate, prompt: %s, generate config: %s", prompt, generate_config

0 commit comments

Comments
 (0)