11# Copyright (c) OpenMMLab. All rights reserved.
2- import os
2+ from types import SimpleNamespace
33
44import pytest
5+ import torch
56
6- from lmdeploy .messages import PytorchEngineConfig
7- from lmdeploy .serve . core . vl_async_engine import adjust_vl_backend_config_for_prefix_caching
8- from lmdeploy .vl .model .qwen3 import resolve_qwen_vl_family_automodel
7+ from lmdeploy .archs import get_task
8+ from lmdeploy .messages import TurbomindEngineConfig
9+ from lmdeploy .vl .model .qwen3 import Qwen3VLModel , resolve_qwen_vl_family_automodel
910
1011
1112@pytest .mark .parametrize ('arch,expected_block' , [
@@ -25,44 +26,81 @@ def test_resolve_unknown_arch_raises():
2526 resolve_qwen_vl_family_automodel ('NotAModel' )
2627
2728
28- def test_adjust_prefix_caching_leaves_qwen3vl_untouched (monkeypatch ):
29- monkeypatch .setattr (
30- 'lmdeploy.archs.get_model_arch' ,
31- lambda _path : ('Qwen3VLForConditionalGeneration' , None ),
32- )
33- cfg = PytorchEngineConfig (enable_prefix_caching = True )
34- out = adjust_vl_backend_config_for_prefix_caching ('/fake' , cfg )
35- assert out is cfg
36- assert cfg .enable_prefix_caching is True
37-
38-
39- def test_adjust_prefix_caching_disables_for_other_vl_without_mutation (monkeypatch ):
40- monkeypatch .setattr (
41- 'lmdeploy.archs.get_model_arch' ,
42- lambda _path : ('InternVLChatModel' , None ),
43- )
44- cfg = PytorchEngineConfig (enable_prefix_caching = True )
45- out = adjust_vl_backend_config_for_prefix_caching ('/fake' , cfg )
46- assert out is not cfg
47- assert out .enable_prefix_caching is False
48- assert cfg .enable_prefix_caching is True
49-
50-
51- _HUB_SMOKE_MODEL_IDS = (
52- 'Qwen/Qwen3-VL-4B-Instruct' ,
53- 'Qwen/Qwen3-VL-8B-Instruct' ,
54- )
55-
56-
57- def test_hub_config_resolves_qwen_vl_family_smoke ():
58- """Optional: verify published configs use an architecture handled by the shared loader."""
59- if os .environ .get ('LMDEPLOY_RUN_VL_LOAD_TESTS' ) != '1' :
60- pytest .skip ('HF hub config fetch; enable with LMDEPLOY_RUN_VL_LOAD_TESTS=1' )
61- pytest .importorskip ('transformers' )
62- from transformers import AutoConfig
63-
64- for model_id in _HUB_SMOKE_MODEL_IDS :
65- cfg = AutoConfig .from_pretrained (model_id , trust_remote_code = True )
66- cls , no_split = resolve_qwen_vl_family_automodel (cfg .architectures [0 ])
67- assert cls is not None
68- assert len (no_split ) >= 1
29+ def test_get_task_routes_qwen3_vl_to_vl_engine (monkeypatch ):
30+ cfg = SimpleNamespace (to_dict = lambda : {'architectures' : ['Qwen3VLForConditionalGeneration' ]})
31+ monkeypatch .setattr ('lmdeploy.archs.get_model_arch' , lambda _path : ('Qwen3VLForConditionalGeneration' , cfg ))
32+
33+ task , pipeline_class = get_task ('/fake-model' , TurbomindEngineConfig ())
34+ assert task == 'vlm'
35+ assert pipeline_class .__name__ == 'VLAsyncEngine'
36+
37+
38+ class _DummyChatTemplate :
39+
40+ def __init__ (self , prompt ):
41+ self .prompt = prompt
42+
43+ def messages2prompt (self , messages , sequence_start , ** kwargs ):
44+ return self .prompt
45+
46+
47+ class _DummyTokenizer :
48+
49+ def encode (self , text , add_bos = False ):
50+ tokens = [] if not text else [len (text )]
51+ if add_bos :
52+ return [0 ] + tokens
53+ return tokens
54+
55+
56+ def _build_qwen3_vl_stub ():
57+ model = Qwen3VLModel .__new__ (Qwen3VLModel )
58+ model .image_token = '<|image_pad|>'
59+ model .image_token_id = 151655
60+ model .contains_video_input = False
61+ return model
62+
63+
64+ def test_qwen3_vl_to_turbomind_uses_image_token_placeholder ():
65+ model = _build_qwen3_vl_stub ()
66+ tokenizer = _DummyTokenizer ()
67+ prompt = 'prefix<|vision_start|><|image_pad|><|vision_end|>suffix'
68+ chat_template = _DummyChatTemplate (prompt )
69+ image_grid_thw = torch .tensor ([[1 , 2 , 2 ]])
70+ image_embed = torch .randn (1 , 4 )
71+ messages = [{
72+ 'role' : 'user' ,
73+ 'content' : [{
74+ 'type' : 'image' ,
75+ 'data' : object (),
76+ }],
77+ }, {
78+ 'role' : 'preprocess' ,
79+ 'content' : [{
80+ 'image_grid_thw' : image_grid_thw ,
81+ }],
82+ }, {
83+ 'role' : 'forward' ,
84+ 'content' : [image_embed ],
85+ }]
86+
87+ info = model .to_turbomind (messages , chat_template , tokenizer , sequence_start = True )
88+
89+ begin = len (tokenizer .encode ('prefix<|vision_start|>' , add_bos = True ))
90+ assert info ['input_embedding_ranges' ] == [[begin , begin + image_embed .shape [0 ]]]
91+ assert len (info ['input_embeddings' ]) == 1
92+ assert info ['input_meta' ]['mrope_position_ids' ].shape [1 ] == len (info ['input_ids' ])
93+
94+
95+ def test_qwen3_vl_to_turbomind_rejects_video ():
96+ model = _build_qwen3_vl_stub ()
97+ model .contains_video_input = True
98+ messages = [{
99+ 'role' : 'preprocess' ,
100+ 'content' : [{
101+ 'video_grid_thw' : torch .tensor ([[1 , 2 , 2 ]]),
102+ }],
103+ }]
104+
105+ with pytest .raises (NotImplementedError , match = 'supports images only' ):
106+ model .to_turbomind (messages , _DummyChatTemplate ('' ), _DummyTokenizer (), sequence_start = True )
0 commit comments