Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/transformers/conversion_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,12 +622,32 @@ def get_model_conversion_mapping(
seen_model_types.add(model.config.model_type)

# Recurse over submodules and collect all conversions
inner_model = getattr(model, "model", None)
nested_language_model = getattr(inner_model, "language_model", None) if inner_model is not None else None
for submodule in model.modules():
if (
submodule is not model
and isinstance(submodule, PreTrainedModel)
and submodule.config.model_type not in seen_model_types
):
# `qwen3_5_text` maps hub keys `model.language_model.*` to the causal LM trunk `model.*` for
# standalone `Qwen3_5ForCausalLM`. Reversing that mapping on save replaces a leading `model` segment,
# which wrongly matches keys that already start with `model.language_model.` on composite VL models and
# duplicates the prefix (https://github.com/huggingface/transformers/issues/45216). Skip the text remap
# when this submodule is the nested `model.language_model` trunk; still apply `qwen2_moe` for
# `qwen3_5_moe_text` inside MoE VL models.
if nested_language_model is not None and submodule is nested_language_model:
model_type = submodule.config.model_type
if model_type == "qwen3_5_text":
seen_model_types.add(model_type)
continue
if model_type == "qwen3_5_moe_text":
moe_conversions = get_checkpoint_conversion_mapping("qwen2_moe")
if moe_conversions is not None:
weight_conversions.extend(moe_conversions)
seen_model_types.add(model_type)
continue

conversions = extract_weight_conversions_for_model(submodule)
if conversions is not None:
weight_conversions.extend(conversions)
Expand Down
19 changes: 19 additions & 0 deletions tests/models/qwen3_5/test_modeling_qwen3_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
"""Testing suite for the PyTorch Qwen3.5 model."""

import copy
import os
import tempfile
import unittest

from transformers import AutoProcessor, AutoTokenizer, is_torch_available
Expand Down Expand Up @@ -304,6 +306,23 @@ def setUp(self):
def test_config(self):
self.config_tester.run_common_tests()

def test_save_pretrained_no_triple_nested_language_model_prefix(self):
"""Regression test for huggingface/transformers#45216 (save_pretrained must not duplicate `language_model`)."""
from safetensors.torch import load_file

config, _ = self.model_tester.prepare_config_and_inputs_for_common()
model = Qwen3_5ForConditionalGeneration._from_config(config)
model.to(torch_device)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
loaded = load_file(os.path.join(tmpdirname, "model.safetensors"))
for key in loaded:
self.assertNotIn(
"language_model.language_model.language_model",
key,
f"Unexpected repeated language_model segments in saved key {key!r}",
)

@unittest.skip(
"Conversion only for the `CausalLM` loading from saved `ConditionalLM`, doesn't apply to simple VLM"
)
Expand Down
Loading