Skip to content

Commit 5fd9e51

Browse files
committed
FIX: type errors in vllm and sglang multimodal chat engines
1 parent b15bb51 commit 5fd9e51

2 files changed

Lines changed: 8 additions & 3 deletions

File tree

xinference/model/llm/sglang/core.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import threading
2020
import time
2121
import uuid
22-
from typing import AsyncGenerator, Dict, List, Optional, Tuple, TypedDict, Union
22+
from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, TypedDict, Union
2323

2424
from xoscar.utils import get_next_port
2525

@@ -334,6 +334,11 @@ def _sanitize_generate_config(
334334

335335
return generate_config
336336

337+
def _get_tokenizer(self, lora_request: Any = None) -> Any:
338+
if self._engine is None:
339+
return None
340+
return self._engine.get_tokenizer()
341+
337342
@classmethod
338343
def check_lib(cls) -> Union[bool, Tuple[bool, str]]:
339344
dep_check = check_dependency_available("sglang", "sglang")
@@ -829,7 +834,7 @@ async def async_chat(
829834
chat_template = self.model_family.chat_template
830835
tokenizer = None
831836
if not chat_template:
832-
tokenizer = self._tokenizer
837+
tokenizer = self._get_tokenizer(None)
833838
if tokenizer is not None:
834839
chat_template = getattr(tokenizer, "chat_template", None)
835840
if not chat_template:

xinference/model/llm/vllm/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2002,7 +2002,7 @@ async def async_chat(
20022002
assert self.model_family.chat_template is not None
20032003

20042004
# Handle empty chat_template by falling back to tokenizer's chat_template
2005-
chat_template = self.model_family.chat_template
2005+
chat_template: Optional[str] = self.model_family.chat_template
20062006
tokenizer = None
20072007
if not chat_template:
20082008
tokenizer = await self._get_tokenizer(None)

0 commit comments

Comments
 (0)