Skip to content

Commit fc7cb41

Browse files
committed
Merge pull request #1 from eigen-ai-labs/eigenai/add-qwen3vl
add qwen3vl
2 parents 8d67cb0 + 70cdfb4 commit fc7cb41

5 files changed

Lines changed: 434 additions & 0 deletions

File tree

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ NVIDIA Model Optimizer Changelog (Linux)
2121
- Add LTX-2 and Wan2.2 (T2V) support in the diffusers quantization workflow.
2222
- Add PTQ support for GLM-4.7, including loading MTP layer weights from a separate ``mtp.safetensors`` file and export as-is.
2323
- Add support for image-text data calibration in PTQ for Nemotron VL models.
24+
- Add Megatron Core export/import mapping for Qwen3-VL (``Qwen3VLForConditionalGeneration``) vision-language models. The mapping handles the ``model.language_model.`` weight prefix used by Qwen3-VL and supports both dense and MoE variants.
2425

2526
0.41 (2026-01-19)
2627
^^^^^^^^^^^^^^^^^

docs/source/deployment/3_unified_hf.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Models:
6161
* Llama 4, 3.x (FP8, NVFP4)
6262
* Qwen 3, 2.5 (FP8, NVFP4)
6363
* Qwen 3 MoE (FP8, NVFP4)
64+
* Qwen 3-VL (FP8, NVFP4)
6465
* Deepseek R1/V3 (NVFP4)
6566
* Mixtral 8x7B (FP8, NVFP4)
6667
* Medusa (FP8)

modelopt/torch/export/plugins/mcore_common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
qwen25_causal_lm_export,
4040
qwen25_causal_lm_import,
4141
)
42+
from .mcore_qwen3vl import (
43+
qwen3vl_causal_lm_export,
44+
qwen3vl_causal_lm_import,
45+
)
4246

4347
all_mcore_hf_export_mapping: dict[str, Any] = {
4448
"DeepseekV2ForCausalLM": deepseek_causal_lm_export,
@@ -54,6 +58,7 @@
5458
"Qwen3MoeForCausalLM": qwen3_causal_lm_export,
5559
"Qwen2ForCausalLM": qwen25_causal_lm_export,
5660
"GptOssForCausalLM": gptoss_causal_lm_export,
61+
"Qwen3VLForConditionalGeneration": qwen3vl_causal_lm_export,
5762
}
5863

5964
all_mcore_hf_import_mapping: dict[str, Any] = {
@@ -66,4 +71,5 @@
6671
"Qwen3MoeForCausalLM": qwen3_causal_lm_import,
6772
"Qwen2ForCausalLM": qwen25_causal_lm_import,
6873
"GptOssForCausalLM": gptoss_causal_lm_import,
74+
"Qwen3VLForConditionalGeneration": qwen3vl_causal_lm_import,
6975
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Custom mapping from Qwen3-VL Hugging Face models to Megatron Core models.
17+
18+
Qwen3-VL model structure differs from Qwen3:
19+
- Language model weights are under `model.language_model.` prefix
20+
- Visual encoder weights are under `model.visual.` prefix
21+
22+
This module handles the language model conversion for PTQ/QAT workflows.
23+
Visual components are typically kept in full precision.
24+
25+
HuggingFace Qwen3-VL-8B structure:
26+
- model.language_model.embed_tokens.weight
27+
- model.language_model.layers.{L}.input_layernorm.weight
28+
- model.language_model.layers.{L}.self_attn.q_proj.weight
29+
- model.language_model.layers.{L}.self_attn.k_proj.weight
30+
- model.language_model.layers.{L}.self_attn.v_proj.weight
31+
- model.language_model.layers.{L}.self_attn.q_norm.weight
32+
- model.language_model.layers.{L}.self_attn.k_norm.weight
33+
- model.language_model.layers.{L}.self_attn.o_proj.weight
34+
- model.language_model.layers.{L}.post_attention_layernorm.weight
35+
- model.language_model.layers.{L}.mlp.gate_proj.weight
36+
- model.language_model.layers.{L}.mlp.up_proj.weight
37+
- model.language_model.layers.{L}.mlp.down_proj.weight
38+
- model.language_model.norm.weight
39+
- lm_head.weight
40+
"""
41+
42+
from .mcore_custom import (
43+
COL_ETP,
44+
COL_TP,
45+
REPLICATE,
46+
ROW_ETP,
47+
ROW_TP,
48+
CustomModuleMapping,
49+
GatedMLPMerging,
50+
GatedMLPSlicing,
51+
NameRemapping,
52+
QKVMerging,
53+
QKVSlicing,
54+
)
55+
56+
# Import rules: HuggingFace -> Megatron Core
57+
qwen3vl_causal_lm_import: dict[str, CustomModuleMapping] = {
58+
# Embeddings - note the language_model prefix
59+
"word_embeddings": NameRemapping("model.language_model.embed_tokens.", COL_TP),
60+
# Final layer norm
61+
"final_layernorm": NameRemapping("model.language_model.norm.", REPLICATE),
62+
# Output layer (lm_head is at root level, not under language_model)
63+
"output_layer": NameRemapping("lm_head.", COL_TP),
64+
# Attention - input layernorm
65+
"input_layernorm": NameRemapping("model.language_model.layers.{}.input_layernorm.", REPLICATE),
66+
# Attention - QKV projection (merged)
67+
"linear_qkv": QKVMerging("model.language_model.layers.{}.self_attn.", COL_TP),
68+
# Attention - output projection
69+
"linear_proj": NameRemapping("model.language_model.layers.{}.self_attn.o_proj.", ROW_TP),
70+
# Attention - Q/K layer norms (Qwen3 uses RMSNorm on Q and K)
71+
"q_layernorm": NameRemapping("model.language_model.layers.{}.self_attn.q_norm.", REPLICATE),
72+
"k_layernorm": NameRemapping("model.language_model.layers.{}.self_attn.k_norm.", REPLICATE),
73+
# MLP - pre-MLP layernorm (post_attention_layernorm in HF)
74+
"pre_mlp_layernorm": NameRemapping(
75+
"model.language_model.layers.{}.post_attention_layernorm.", REPLICATE
76+
),
77+
# MLP - gate_proj + up_proj merged into linear_fc1
78+
"linear_fc1": GatedMLPMerging("model.language_model.layers.{}.mlp.", COL_TP),
79+
# MLP - down_proj as linear_fc2
80+
"linear_fc2": NameRemapping("model.language_model.layers.{}.mlp.down_proj.", ROW_TP),
81+
# MoE support (for Qwen3-VL MoE variants like 30B-A3B)
82+
"router": NameRemapping("model.language_model.layers.{}.mlp.gate.", REPLICATE),
83+
"local_experts.linear_fc1": GatedMLPMerging(
84+
"model.language_model.layers.{}.mlp.experts.{}.", COL_ETP
85+
),
86+
"local_experts.linear_fc2": NameRemapping(
87+
"model.language_model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP
88+
),
89+
}
90+
91+
# Export rules: Megatron Core -> HuggingFace
92+
qwen3vl_causal_lm_export: dict[str, CustomModuleMapping] = {
93+
# Embeddings
94+
"word_embeddings": NameRemapping("model.language_model.embed_tokens."),
95+
# Final layer norm
96+
"final_layernorm": NameRemapping("model.language_model.norm."),
97+
# Output layer
98+
"output_layer": NameRemapping("lm_head."),
99+
# Attention - input layernorm
100+
"input_layernorm": NameRemapping("model.language_model.layers.{}.input_layernorm."),
101+
# Attention - QKV projection (sliced back to separate q/k/v)
102+
"linear_qkv": QKVSlicing("model.language_model.layers.{}.self_attn."),
103+
# Attention - output projection
104+
"linear_proj": NameRemapping("model.language_model.layers.{}.self_attn.o_proj."),
105+
# Attention - Q/K layer norms
106+
"q_layernorm": NameRemapping("model.language_model.layers.{}.self_attn.q_norm."),
107+
"k_layernorm": NameRemapping("model.language_model.layers.{}.self_attn.k_norm."),
108+
# MLP - pre-MLP layernorm
109+
"pre_mlp_layernorm": NameRemapping("model.language_model.layers.{}.post_attention_layernorm."),
110+
# MLP - linear_fc1 sliced back to gate_proj + up_proj
111+
"linear_fc1": GatedMLPSlicing("model.language_model.layers.{}.mlp."),
112+
# MLP - down_proj
113+
"linear_fc2": NameRemapping("model.language_model.layers.{}.mlp.down_proj."),
114+
# MoE support
115+
"router": NameRemapping("model.language_model.layers.{}.mlp.gate."),
116+
"local_experts.linear_fc1": GatedMLPSlicing("model.language_model.layers.{}.mlp.experts.{}."),
117+
"local_experts.linear_fc2": NameRemapping(
118+
"model.language_model.layers.{}.mlp.experts.{}.down_proj."
119+
),
120+
}

0 commit comments

Comments
 (0)