Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions lmdeploy/serve/processors/multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,13 +399,14 @@ async def _get_multimodal_prompt_input(self,
mm_processor_kwargs=mm_processor_kwargs)
else:
results = await self.vl_encoder.preprocess(messages, mm_processor_kwargs=mm_processor_kwargs)
results = await self.vl_encoder.async_infer(results)
if not self.vl_encoder.model._turbomind_native_vision:
results = await self.vl_encoder.async_infer(results)
results = await self.vl_encoder.wrap_for_turbomind(messages=results,
chat_template=chat_template,
tokenizer=self.tokenizer,
sequence_start=sequence_start,
tools=tools,
chat_template_kwargs=chat_template_kwargs)
chat_template=chat_template,
tokenizer=self.tokenizer,
sequence_start=sequence_start,
tools=tools,
chat_template_kwargs=chat_template_kwargs)
elif self.backend == 'pytorch':
if self.vl_encoder._uses_new_preprocess:
input_prompt = self.vl_encoder.model.get_input_prompt(messages=messages,
Expand Down
1 change: 1 addition & 0 deletions lmdeploy/turbomind/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
from .llama import LlamaModel # noqa: F401
from .mixtral import MixtralModel # noqa: F401
from .qwen2 import Qwen2Model # noqa: F401
from .qwen2_vl import Qwen2VLModel # noqa: F401
from .qwen3 import Qwen3TextModel # noqa: F401
from .qwen3_5 import Qwen3_5Model, Qwen3_5TextModel, Qwen3_5VisionModel # noqa: F401
Loading
Loading