FIX: type errors in vllm and sglang multimodal chat engines

qinxuye · qinxuye · commit 5fd9e515f120 · 2026-04-12T12:32:52.000+08:00
diff --git a/xinference/model/llm/sglang/core.py b/xinference/model/llm/sglang/core.py
@@ -19,7 +19,7 @@
 import threading
 import time
 import uuid
-from typing import AsyncGenerator, Dict, List, Optional, Tuple, TypedDict, Union
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, TypedDict, Union
 
 from xoscar.utils import get_next_port
 
@@ -334,6 +334,11 @@ def _sanitize_generate_config(
 
         return generate_config
 
+    def _get_tokenizer(self, lora_request: Any = None) -> Any:
+        if self._engine is None:
+            return None
+        return self._engine.get_tokenizer()
+
     @classmethod
     def check_lib(cls) -> Union[bool, Tuple[bool, str]]:
         dep_check = check_dependency_available("sglang", "sglang")
@@ -829,7 +834,7 @@ async def async_chat(
         chat_template = self.model_family.chat_template
         tokenizer = None
         if not chat_template:
-            tokenizer = self._tokenizer
+            tokenizer = self._get_tokenizer(None)
             if tokenizer is not None:
                 chat_template = getattr(tokenizer, "chat_template", None)
         if not chat_template:
diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -2002,7 +2002,7 @@ async def async_chat(
             assert self.model_family.chat_template is not None
 
             # Handle empty chat_template by falling back to tokenizer's chat_template
-            chat_template = self.model_family.chat_template
+            chat_template: Optional[str] = self.model_family.chat_template
             tokenizer = None
             if not chat_template:
                 tokenizer = await self._get_tokenizer(None)