Skip to content

Commit 20ae545

Browse files
authored
fix models for transformers>=5 (#4381)
* fix models for transformers>=5 * remove qwen2_vl config
1 parent 456aca0 commit 20ae545

8 files changed

Lines changed: 56 additions & 17 deletions

File tree

lmdeploy/pytorch/configurations/chatglm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def condition(cls, hf_config):
1515
def build(cls, hf_config, model_path: str = None, **kwargs):
1616
"""build."""
1717
head_dim = hf_config.hidden_size // hf_config.num_attention_heads
18-
bos_token_id = hf_config.bos_token_id
18+
bos_token_id = getattr(hf_config, 'bos_token_id', None)
1919
if bos_token_id is None:
2020
bos_token_id = hf_config.pad_token_id
2121

lmdeploy/pytorch/configurations/deepseek_v2.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,13 @@ def build(cls, hf_config, model_path: str = None, is_draft_model: bool = False,
4141
if is_draft_model or spec_method is not None:
4242
model_paradigm = 'ar_spec'
4343

44+
bos_token_id = getattr(hf_config, 'bos_token_id', None)
4445
config = ModelConfig(
4546
hidden_size=hf_config.hidden_size,
4647
num_layers=num_layers,
4748
num_attention_heads=num_attention_heads,
4849
num_key_value_heads=num_key_value_heads,
49-
bos_token_id=hf_config.bos_token_id,
50+
bos_token_id=bos_token_id,
5051
eos_token_id=hf_config.eos_token_id,
5152
head_dim=head_dim,
5253
k_head_dim=k_head_dim,

lmdeploy/pytorch/configurations/qwen3_vl.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,15 @@ class Qwen3VLModelConfigBuilder(AutoModelConfigBuilder):
88
@classmethod
99
def condition(cls, hf_config):
1010
"""config."""
11-
return hf_config.model_type in ['qwen3_vl', 'qwen3_vl_moe']
11+
return hf_config.model_type in ['qwen2_vl', 'qwen2_5_vl', 'qwen3_vl', 'qwen3_vl_moe']
1212

1313
@classmethod
1414
def build(cls, hf_config, model_path: str = None, **kwargs):
1515
"""build."""
16+
if not hasattr(hf_config, 'text_config'):
17+
# for transformers <= 5
18+
return DefaultModelConfigBuilder.build(hf_config, model_path, **kwargs)
19+
1620
if hasattr(hf_config, 'quantization_config') and not hasattr(hf_config.text_config, 'quantization_config'):
1721
setattr(hf_config.text_config, 'quantization_config', hf_config.quantization_config)
1822
cfg = DefaultModelConfigBuilder.build(hf_config.text_config, model_path, **kwargs)

lmdeploy/pytorch/models/gemma.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
219219
is_tp=True,
220220
)
221221

222-
hidden_activation = config.hidden_activation
222+
hidden_activation = getattr(config, 'hidden_activation', None)
223223
if hidden_activation is None:
224224
hidden_activation = 'gelu_pytorch_tanh'
225225
assert hidden_activation == 'gelu_pytorch_tanh'
@@ -381,16 +381,47 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
381381
self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps, dtype=dtype, device=device)
382382

383383
# build rotary embedding
384-
self.rotary_emb = build_rotary_embedding_from_config(config)
384+
self.build_rope_emb(config)
385385

386-
if self.model_type == 'gemma3_text':
387-
rope_dim = config.head_dim
388-
rope_max_pos_emb = config.max_position_embeddings
386+
def build_rope_emb(self, config: PretrainedConfig):
387+
rope_dim = config.head_dim
388+
rope_max_pos_emb = config.max_position_embeddings
389+
390+
if self.model_type != 'gemma3_text':
391+
self.rotary_emb = build_rotary_embedding_from_config(config)
392+
return
393+
394+
# for gemma3
395+
if hasattr(config, 'rope_local_base_freq'):
389396
rope_base = config.rope_local_base_freq
397+
self.rotary_emb = build_rotary_embedding_from_config(config)
398+
399+
if self.model_type == 'gemma3_text':
400+
self.rotary_emb_local = build_rotary_embedding(
401+
rope_dim,
402+
rope_max_pos_emb,
403+
rope_base,
404+
emb_type=RopeType.Default,
405+
)
406+
else:
407+
# for transformers>=5
408+
rope_dim = config.head_dim
409+
from lmdeploy.pytorch.nn.rotary_embedding import get_rope_parameters
410+
rope_parameters = get_rope_parameters(config)
411+
full_attention = rope_parameters['full_attention']
412+
sliding_attention = rope_parameters['sliding_attention']
413+
# note that emb type has been fixed.
414+
self.rotary_emb = build_rotary_embedding(
415+
rope_dim,
416+
rope_max_pos_emb,
417+
base=full_attention['rope_theta'],
418+
scaling_factor=full_attention['factor'],
419+
emb_type=RopeType.LinearScaling,
420+
)
390421
self.rotary_emb_local = build_rotary_embedding(
391422
rope_dim,
392423
rope_max_pos_emb,
393-
rope_base,
424+
base=sliding_attention['rope_theta'],
394425
emb_type=RopeType.Default,
395426
)
396427

lmdeploy/pytorch/models/llama4.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,6 @@ def __init__(self,
811811
self._update_quant_config(config)
812812
self.language_model = Llama4ForCausalLM(config.text_config, ctx_mgr, dtype=dtype, device=device)
813813
self.vocab_size = config.text_config.vocab_size
814-
self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
815814

816815
self.input_processor = Llama4InputProcessor(config, dtype)
817816

lmdeploy/pytorch/models/qwen2_5_vl.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,13 @@ def __init__(self,
406406
dtype=dtype,
407407
device=device,
408408
)
409+
# get text_config
410+
text_config = getattr(config, 'text_config', config)
409411
# build model
410-
self.model = Qwen2Model(config, dtype=dtype, device=device)
412+
self.model = Qwen2Model(text_config, dtype=dtype, device=device)
411413
# build lm_head
412-
self.lm_head = build_rowwise_linear(config.hidden_size,
413-
config.vocab_size,
414+
self.lm_head = build_rowwise_linear(text_config.hidden_size,
415+
text_config.vocab_size,
414416
bias=False,
415417
dtype=dtype,
416418
device=device)

lmdeploy/pytorch/models/qwen2_vl.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -625,11 +625,13 @@ def __init__(self,
625625
dtype=dtype,
626626
device=device,
627627
)
628+
# get text_config
629+
text_config = getattr(config, 'text_config', config)
628630
# build model
629-
self.model = Qwen2Model(config, dtype=dtype, device=device)
631+
self.model = Qwen2Model(text_config, dtype=dtype, device=device)
630632
# build lm_head
631-
self.lm_head = build_rowwise_linear(config.hidden_size,
632-
config.vocab_size,
633+
self.lm_head = build_rowwise_linear(text_config.hidden_size,
634+
text_config.vocab_size,
633635
bias=False,
634636
dtype=dtype,
635637
device=device)

lmdeploy/vl/model/cogvlm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def proc_messages(messages, chat_template, sequence_start):
7070
prompt_messages.append(dict(role='user', content=content[0], num_images=n_images))
7171

7272
from lmdeploy.model import Vicuna
73-
llm_chat_template = Vicuna(eoa=chat_template.eoa, stop_words=chat_template.stop_words)
73+
llm_chat_template = Vicuna(eoa='</s>', stop_words=chat_template.stop_words)
7474
prompt = ''
7575
IMAGE_TOKEN = '<IMAGE_TOKEN>'
7676
for i, msg in enumerate(prompt_messages):

0 commit comments

Comments
 (0)