-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[None][feat] Add the Qwen3.5 multimodal support. #12611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,7 @@ | ||
| from tensorrt_llm._torch.configs.deepseek_v3 import DeepseekV3Config | ||
| from tensorrt_llm._torch.configs.qwen3_5 import Qwen3_5MoeConfig | ||
|
|
||
| __all__ = ["DeepseekV3Config"] | ||
| __all__ = [ | ||
| "DeepseekV3Config", | ||
| "Qwen3_5MoeConfig", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,26 @@ | ||
| import re | ||
|
|
||
| from typing import Dict, List | ||
|
|
||
| import torch | ||
| from transformers import PretrainedConfig | ||
|
|
||
| from ...inputs import ( | ||
| MultimodalPlaceholderMetadata, | ||
| MultimodalPlaceholderPlacement, | ||
| register_input_processor, | ||
| support_multimodal_disaggregated, | ||
| ) | ||
| from .checkpoints.base_weight_mapper import BaseWeightMapper | ||
| from .checkpoints.hf.qwen3_5_weight_mapper import Qwen3_5MoeHfWeightMapper | ||
| from .modeling_multimodal_utils import _is_disagg | ||
| from .modeling_qwen3_next import Qwen3NextForCausalLM | ||
| from .modeling_utils import register_auto_model | ||
| from .modeling_qwen3vl import ( | ||
| Qwen3VisionModel, | ||
| Qwen3VisionModelBase, | ||
| Qwen3VLInputProcessorBase, | ||
| Qwen3VLModelBase, | ||
| ) | ||
| from .modeling_utils import ModelConfig, register_auto_model, register_vision_encoder | ||
|
|
||
| _LANG_PREFIX = "model.language_model." | ||
|
|
||
|
|
@@ -34,6 +53,32 @@ def _normalize_qwen35_exclude_modules(model_config): | |
| qc.exclude_modules = sorted(normalized) | ||
|
|
||
|
|
||
| def _ensure_qwen35_mrope_compat(text_config: PretrainedConfig) -> None: | ||
| """Normalize Qwen3.5 mRoPE fields for the shared Qwen3-VL wrapper. | ||
|
|
||
| Qwen3.5 stores RoPE metadata in ``rope_parameters``. Some config classes | ||
| may also materialize default top-level ``rope_theta`` or | ||
| ``partial_rotary_factor`` values, so prefer the checkpoint-provided nested | ||
| values unconditionally here. | ||
| """ | ||
| rope_parameters = getattr(text_config, "rope_parameters", None) | ||
| if not rope_parameters: | ||
| return | ||
|
|
||
| rope_params = dict(rope_parameters) | ||
| rope_theta = rope_params.pop("rope_theta", None) | ||
| if rope_theta is not None: | ||
| text_config.rope_theta = rope_theta | ||
|
|
||
| partial_rotary_factor = rope_params.pop("partial_rotary_factor", None) | ||
| if partial_rotary_factor is not None: | ||
| text_config.partial_rotary_factor = partial_rotary_factor | ||
|
|
||
| if not getattr(text_config, "rope_scaling", None): | ||
| rope_params.pop("rope_type", None) | ||
| text_config.rope_scaling = rope_params | ||
|
Comment on lines
+56
to
+79
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merge nested mRoPE fields even when
Suggested change- if not getattr(text_config, "rope_scaling", None):
- rope_params.pop("rope_type", None)
- text_config.rope_scaling = rope_params
+ rope_params.pop("rope_type", None)
+ if rope_params:
+ rope_scaling = dict(getattr(text_config, "rope_scaling", None) or {})
+ rope_scaling.update(rope_params)
+ text_config.rope_scaling = rope_scaling🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| @register_auto_model("Qwen3_5MoeForCausalLM") | ||
| class Qwen3_5MoeForCausalLM(Qwen3NextForCausalLM): | ||
| """Thin wrapper that registers the Qwen3.5 MoE text architecture. | ||
|
|
@@ -74,3 +119,49 @@ class Qwen3_5ForCausalLM(Qwen3NextForCausalLM): | |
| def __init__(self, model_config): | ||
| _normalize_qwen35_exclude_modules(model_config) | ||
| super().__init__(model_config) | ||
|
|
||
|
|
||
| @support_multimodal_disaggregated | ||
| @register_vision_encoder(Qwen3VisionModelBase, vlm_base_model=Qwen3VisionModel) | ||
| @register_auto_model("Qwen3_5MoeForConditionalGeneration") | ||
| @register_input_processor( | ||
| Qwen3VLInputProcessorBase, | ||
| model_type="qwen3_5_moe", | ||
| placeholder_metadata=MultimodalPlaceholderMetadata( | ||
| placeholder_map={ | ||
| "image": "<|vision_start|><|image_pad|><|vision_end|>", | ||
| "video": "<|vision_start|><|video_pad|><|vision_end|>", | ||
| }, | ||
| placeholder_placement=MultimodalPlaceholderPlacement.BEFORE_TEXT, | ||
| placeholders_separator="", | ||
| ), | ||
| ) | ||
| class Qwen3_5MoeVLModel(Qwen3VLModelBase): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we only adding Qwen3.5-MoE models? Seems there is a dense model, too. |
||
| """VLM wrapper composing Qwen3 vision encoder with Qwen3.5 MoE text decoder.""" | ||
|
|
||
| def __init__(self, model_config: ModelConfig[PretrainedConfig], *args, **kwargs): | ||
| _ensure_qwen35_mrope_compat(model_config.pretrained_config.text_config) | ||
|
|
||
| kwargs["vision_model_class"] = Qwen3VisionModel | ||
| kwargs["disable_fuse_rope"] = kwargs.get("disable_fuse_rope", False) | ||
| super().__init__(model_config, *args, **kwargs) | ||
|
|
||
| @property | ||
| def multimodal_data_device_paths(self) -> List[str]: | ||
| return [ | ||
| "image.pixel_values", | ||
| "video.pixel_values_videos", | ||
| "multimodal_embedding", | ||
| ] | ||
|
|
||
| def load_weights(self, weights: Dict[str, torch.Tensor], weight_mapper: BaseWeightMapper): | ||
| if not _is_disagg(): | ||
| self.mm_encoder.load_weights(weights) | ||
|
|
||
| weight_mapper = Qwen3_5MoeHfWeightMapper() | ||
| weight_mapper.init_model_and_config(self.llm, self.model_config) | ||
| filtered_weights = {k: v for k, v in weights.items() if not k.startswith("model.visual.")} | ||
| params_map = { | ||
| r"^model\.language_model\.(.*)$": r"model.\1", | ||
| } | ||
| self.llm.load_weights(filtered_weights, weight_mapper, params_map=params_map) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -248,6 +248,7 @@ def __getitem__(self, key): | |
| deepseek_v32="DeepseekV3Config", | ||
| kimi_k2="DeepseekV3Config", | ||
| glm_moe_dsa="DeepseekV3Config", | ||
| qwen3_5_moe="Qwen3_5MoeConfig", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q) Can I know the reason for having separate |
||
| ) # NOTE: HF config.json uses deepseek_v32 as model_type but with same DSV3 config class | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -397,6 +397,29 @@ def test_nvfp4_4gpus( | |
| task.evaluate(llm, sampling_params=self.sampling_params) | ||
|
|
||
|
|
||
| class TestQwen3_5_35B_A3B_VL(LlmapiAccuracyTestHarness): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add the new feature test case into QA test list? Thanks! |
||
| MODEL_NAME = "Qwen/Qwen3.5-35B-A3B" | ||
| MODEL_PATH = f"{llm_models_root()}/Qwen3.5-35B-A3B" | ||
| MAX_NUM_TOKENS = 16384 | ||
|
|
||
| sampling_params = SamplingParams( | ||
| max_tokens=MAX_NUM_TOKENS, | ||
| truncate_prompt_tokens=MMMU.MAX_INPUT_LEN, | ||
| stop="<|endoftext|>", | ||
| ) | ||
|
|
||
| kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6) | ||
|
|
||
| def test_auto_dtype(self): | ||
| with LLM( | ||
| self.MODEL_PATH, | ||
| max_num_tokens=self.MAX_NUM_TOKENS, | ||
| kv_cache_config=self.kv_cache_config, | ||
| ) as llm: | ||
| task = MMMU(self.MODEL_NAME) | ||
| task.evaluate(llm, sampling_params=self.sampling_params) | ||
|
Comment on lines
+400
to
+420
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a device-memory guard for this 35B MMMU case. The 30B A3B test right above already skips below a memory threshold, but this larger model has no equivalent guard. That will turn low-memory jobs into predictable OOM failures instead of clean skips. Suggested change+@pytest.mark.skip_less_device_memory(140000)
class TestQwen3_5_35B_A3B_VL(LlmapiAccuracyTestHarness):At minimum, mirror the 30B case above and bump the threshold if this model needs more headroom. 🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| class TestQwen3VL(LlmapiAccuracyTestHarness): | ||
| MODEL_NAME = "Qwen/Qwen3-VL-8B-Instruct" | ||
| MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-VL-8B-Instruct" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
QQ) Was there any case that Qwen3.5 has top-level rope_parameters?