Skip to content

Commit 0c0286b

Browse files
Align LazyTurtle HF key resolution with reversed WeightRenaming semantics (#2793)
* Align LazyTurtle HF key resolution with reversed WeightRenaming semantics Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * cleanup Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * removed "checkpoint_path_aliases" Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> --------- Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
1 parent fdc13fe commit 0c0286b

8 files changed

Lines changed: 385 additions & 152 deletions

File tree

gptqmodel/models/base.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,9 @@ class BaseQModel(nn.Module):
247247
server = None
248248

249249
support_offload_to_disk = True
250-
# Optional runtime->checkpoint prefix overrides used by LazyTurtle when the
251-
# execution shell inserts wrapper modules that are absent from the checkpoint.
252-
checkpoint_path_aliases: Optional[tuple[tuple[str, str], ...]] = None
253-
# Optional runtime->checkpoint prefix overrides for LazyTurtle. When unset,
254-
# the loader derives them from Hugging Face conversion mappings.
255-
HF_CONVERSION_MAP_REVERSED: Optional[Dict[str, str]] = None
250+
# Optional runtime->checkpoint overrides for LazyTurtle. Prefer reversed
251+
# `WeightRenaming` entries; legacy runtime->checkpoint dicts are still accepted.
252+
HF_CONVERSION_MAP_REVERSED: Optional[Any] = None
256253

257254
moe_expert_module_name_prefixes = [".expert"]
258255

@@ -423,7 +420,7 @@ def has_moe_flag(cls, module_spec: str) -> bool:
423420
return MOE_FLAG.lstrip(":") in flags
424421

425422
@classmethod
426-
def resolve_hf_conversion_map_reversed(cls, target_model: Optional[nn.Module] = None) -> Optional[Dict[str, str]]:
423+
def resolve_hf_conversion_map_reversed(cls, target_model: Optional[nn.Module] = None) -> Optional[Any]:
427424
configured_map = getattr(cls, "HF_CONVERSION_MAP_REVERSED", None)
428425
if configured_map is not None:
429426
return copy.deepcopy(configured_map)

gptqmodel/models/definitions/base_qwen2_vl.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,6 @@ class BaseQwen2VLGPTQ(BaseQModel):
2121
loader = AutoModelForImageTextToText
2222

2323
pre_lm_head_norm_module = ["model.language_model.norm", "language_model.norm"]
24-
HF_CONVERSION_MAP_REVERSED = {
25-
"model.language_model": "model",
26-
"language_model": "model",
27-
}
28-
checkpoint_path_aliases = (
29-
("model.language_model", "model"),
30-
("language_model", "model"),
31-
)
3224

3325
module_tree = [
3426
"model",

gptqmodel/models/loader.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,6 @@ def skip(*args, **kwargs):
657657
config=model.config,
658658
model_init_kwargs=shell_model_init_kwargs,
659659
module_tree=copy.deepcopy(getattr(cls, "module_tree", None)),
660-
checkpoint_path_aliases=copy.deepcopy(getattr(cls, "checkpoint_path_aliases", None)),
661660
hf_conversion_map_reversed=copy.deepcopy(
662661
cls.resolve_hf_conversion_map_reversed(target_model=model)
663662
),

gptqmodel/utils/structure.py

Lines changed: 171 additions & 113 deletions
Large diffs are not rendered by default.

tests/test_lazy_turtle_conversion_mapping.py

Lines changed: 173 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from safetensors.torch import save_file
1010
from torch import nn
1111

12+
from gptqmodel.models.definitions.gemma3 import Gemma3ForConditionalGenerationGPTQ
1213
from gptqmodel.models.definitions.mixtral import MixtralQModel
14+
from gptqmodel.models.definitions.qwen2_5_vl import Qwen2_5_VLQModel
15+
from gptqmodel.models.definitions.qwen2_vl import Qwen2VLQModel
1316
from gptqmodel.utils import structure as structure_module
1417
from gptqmodel.utils.structure import LazyTurtle
1518

@@ -66,6 +69,18 @@ def __init__(self):
6669
self.config = SimpleNamespace(model_type="gemma3")
6770

6871

72+
class _Qwen2VLDummyModel(nn.Module):
73+
def __init__(self):
74+
super().__init__()
75+
self.config = SimpleNamespace(model_type="qwen2_vl")
76+
77+
78+
class _Qwen2_5_VLDummyModel(nn.Module):
79+
def __init__(self):
80+
super().__init__()
81+
self.config = SimpleNamespace(model_type="qwen2_5_vl")
82+
83+
6984
class _WeightRenamingStub:
7085
def __init__(self, source_pattern: str, target_pattern: str):
7186
self.source_patterns = [source_pattern]
@@ -82,6 +97,21 @@ def _gemma3_weight_renamings():
8297
]
8398

8499

100+
def _qwen2_vl_weight_renamings():
101+
return [
102+
_WeightRenamingStub(
103+
r"(?<!_)model(?!\.(language_model|visual))",
104+
"model.language_model",
105+
),
106+
_WeightRenamingStub(r"^visual", "model.visual"),
107+
]
108+
109+
110+
def _renaming_pairs(renamings) -> list[tuple[str, str]]:
111+
assert renamings is not None
112+
return [(entry.source_patterns[0], entry.target_patterns[0]) for entry in renamings]
113+
114+
85115
def _assert_gemma3_alias_resolution(turtle: LazyTurtle) -> None:
86116
assert turtle._resolve_checkpoint_module_path("model.language_model") == "language_model.model"
87117
assert turtle._resolve_checkpoint_module_path("model.vision_tower") == "vision_tower"
@@ -119,27 +149,58 @@ def _assert_gemma3_alias_resolution(turtle: LazyTurtle) -> None:
119149
assert turtle._resolve_checkpoint_tensor_name("lm_head", "weight") == "language_model.lm_head.weight"
120150

121151

152+
def _assert_qwen2_vl_alias_resolution(turtle: LazyTurtle) -> None:
153+
assert turtle._resolve_checkpoint_module_path("model.language_model") == "model"
154+
assert turtle._resolve_checkpoint_module_path("model.visual") == "visual"
155+
156+
assert (
157+
turtle._resolve_checkpoint_tensor_name(
158+
"model.language_model.layers.0.mlp",
159+
"gate_proj.weight",
160+
)
161+
== "model.layers.0.mlp.gate_proj.weight"
162+
)
163+
assert (
164+
turtle._resolve_checkpoint_tensor_name(
165+
"wrapper.model.language_model.layers.0.mlp",
166+
"gate_proj.weight",
167+
)
168+
== "model.layers.0.mlp.gate_proj.weight"
169+
)
170+
assert (
171+
turtle._resolve_checkpoint_tensor_name(
172+
"model.visual.blocks.0.attn",
173+
"weight",
174+
)
175+
== "visual.blocks.0.attn.weight"
176+
)
177+
178+
122179
def test_lazy_turtle_reverses_transformers_weight_renaming_list():
123180
reversed_map = LazyTurtle.reverse_hf_conversion_map(_gemma3_weight_renamings())
124181

125-
assert reversed_map == {
126-
"model.language_model": "language_model.model",
127-
"lm_head": "language_model.lm_head",
128-
"model.vision_tower": "vision_tower",
129-
"model.multi_modal_projector": "multi_modal_projector",
130-
}
182+
assert _renaming_pairs(reversed_map) == [
183+
("model.language_model", r"^language_model.model"),
184+
("lm_head", r"^language_model.lm_head"),
185+
("model.vision_tower", r"^vision_tower"),
186+
("model.multi_modal_projector", r"^multi_modal_projector"),
187+
]
131188

132189

133190
def test_lazy_turtle_runtime_to_checkpoint_alias_candidates_do_not_expand_infinitely(tmp_path):
191+
reversed_map = LazyTurtle.reverse_hf_conversion_map(
192+
{
193+
"language_model.model": "language_model",
194+
"language_model.lm_head": "lm_head",
195+
}
196+
)
197+
134198
turtle = _build_lazy_turtle(
135199
tmp_path,
136200
{
137201
"language_model.model.layers.0.self_attn.q_proj.weight": torch.zeros(2, 2),
138202
},
139-
hf_conversion_map_reversed={
140-
"language_model": "language_model.model",
141-
"lm_head": "language_model.lm_head",
142-
},
203+
hf_conversion_map_reversed=reversed_map,
143204
)
144205

145206
assert turtle._runtime_to_checkpoint_alias_candidates("language_model.layers.0") == [
@@ -148,6 +209,22 @@ def test_lazy_turtle_runtime_to_checkpoint_alias_candidates_do_not_expand_infini
148209
]
149210

150211

212+
def test_lazy_turtle_applies_reversed_weight_renamings_with_capturing_groups(tmp_path):
213+
reversed_map = LazyTurtle.reverse_hf_conversion_map(
214+
[_WeightRenamingStub(r"(.+)", r"timm_model.\1")]
215+
)
216+
217+
turtle = _build_lazy_turtle(
218+
tmp_path,
219+
{
220+
"backbone.conv.weight": torch.zeros(2, 2),
221+
},
222+
hf_conversion_map_reversed=reversed_map,
223+
)
224+
225+
assert turtle._resolve_checkpoint_tensor_name("timm_model.backbone.conv", "weight") == "backbone.conv.weight"
226+
227+
151228
def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_gemma3(tmp_path, monkeypatch):
152229
conversion_mapping_module = SimpleNamespace(
153230
get_checkpoint_conversion_mapping=lambda model_type: _gemma3_weight_renamings()
@@ -170,6 +247,53 @@ def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_gemma3(
170247
_assert_gemma3_alias_resolution(turtle)
171248

172249

250+
def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_qwen2_vl(tmp_path, monkeypatch):
251+
conversion_mapping_module = SimpleNamespace(
252+
get_checkpoint_conversion_mapping=lambda model_type: _qwen2_vl_weight_renamings()
253+
if model_type == "qwen2_vl"
254+
else None
255+
)
256+
monkeypatch.setattr(structure_module, "import_module", lambda name: conversion_mapping_module)
257+
258+
turtle = _build_lazy_turtle(
259+
tmp_path,
260+
{
261+
"model.layers.0.mlp.gate_proj.weight": torch.zeros(2, 2),
262+
"visual.blocks.0.attn.weight": torch.zeros(2, 2),
263+
},
264+
module_tree=Qwen2VLQModel.module_tree,
265+
target_model=_Qwen2VLDummyModel(),
266+
)
267+
268+
_assert_qwen2_vl_alias_resolution(turtle)
269+
270+
271+
def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_qwen2_5_vl(tmp_path, monkeypatch):
272+
observed_model_types: list[str] = []
273+
274+
def _get_checkpoint_conversion_mapping(model_type: str):
275+
observed_model_types.append(model_type)
276+
if model_type == "qwen2_5_vl":
277+
return _qwen2_vl_weight_renamings()
278+
return None
279+
280+
conversion_mapping_module = SimpleNamespace(get_checkpoint_conversion_mapping=_get_checkpoint_conversion_mapping)
281+
monkeypatch.setattr(structure_module, "import_module", lambda name: conversion_mapping_module)
282+
283+
turtle = _build_lazy_turtle(
284+
tmp_path,
285+
{
286+
"model.layers.0.mlp.gate_proj.weight": torch.zeros(2, 2),
287+
"visual.blocks.0.attn.weight": torch.zeros(2, 2),
288+
},
289+
module_tree=Qwen2_5_VLQModel.module_tree,
290+
target_model=_Qwen2_5_VLDummyModel(),
291+
)
292+
293+
assert observed_model_types == ["qwen2_5_vl"]
294+
_assert_qwen2_vl_alias_resolution(turtle)
295+
296+
173297
def test_lazy_turtle_falls_back_to_legacy_checkpoint_conversion_mapping(tmp_path, monkeypatch):
174298
def _raise_import_error(_name: str):
175299
raise ImportError("transformers.conversion_mapping is unavailable")
@@ -190,6 +314,45 @@ def _raise_import_error(_name: str):
190314
_assert_gemma3_alias_resolution(turtle)
191315

192316

317+
def test_base_qmodel_prefers_manual_hf_conversion_map_reversed(tmp_path, monkeypatch):
318+
manual_renamings = LazyTurtle.reverse_hf_conversion_map(_gemma3_weight_renamings())
319+
assert manual_renamings is not None
320+
monkeypatch.setattr(
321+
Gemma3ForConditionalGenerationGPTQ,
322+
"HF_CONVERSION_MAP_REVERSED",
323+
manual_renamings,
324+
raising=False,
325+
)
326+
327+
def _unexpected_import(_name: str):
328+
raise AssertionError("manual HF_CONVERSION_MAP_REVERSED should bypass inferred transformers mappings")
329+
330+
monkeypatch.setattr(structure_module, "import_module", _unexpected_import)
331+
332+
resolved = Gemma3ForConditionalGenerationGPTQ.resolve_hf_conversion_map_reversed(target_model=_Gemma3DummyModel())
333+
assert _renaming_pairs(resolved) == _renaming_pairs(manual_renamings)
334+
335+
resolved[0].source_patterns[0] = "mutated.runtime"
336+
resolved_again = Gemma3ForConditionalGenerationGPTQ.resolve_hf_conversion_map_reversed(
337+
target_model=_Gemma3DummyModel()
338+
)
339+
assert _renaming_pairs(resolved_again) == _renaming_pairs(manual_renamings)
340+
341+
turtle = _build_lazy_turtle(
342+
tmp_path,
343+
{
344+
"language_model.model.layers.0.mlp.gate_proj.weight": torch.zeros(2, 2),
345+
"vision_tower.vision_model.head.weight": torch.zeros(2, 2),
346+
"multi_modal_projector.mm_input_projection_weight": torch.zeros(2, 2),
347+
"language_model.lm_head.weight": torch.zeros(2, 2),
348+
},
349+
module_tree=Gemma3ForConditionalGenerationGPTQ.module_tree,
350+
hf_conversion_map_reversed=resolved_again,
351+
)
352+
353+
_assert_gemma3_alias_resolution(turtle)
354+
355+
193356
def test_lazy_turtle_keeps_module_tree_alias_resolution_for_mixtral(tmp_path):
194357
turtle = _build_lazy_turtle(
195358
tmp_path,

tests/test_local_model_paths.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import json
23
from types import SimpleNamespace
34

@@ -496,16 +497,18 @@ class DummyQModel:
496497
require_pkgs = []
497498
supports_desc_act = [True, False]
498499
support_offload_to_disk = True
499-
checkpoint_path_aliases = (("shell_model", "model"),)
500+
HF_CONVERSION_MAP_REVERSED = (
501+
SimpleNamespace(source_patterns=["shell_model"], target_patterns=["model"]),
502+
)
500503
config_class = None
501504

502505
@staticmethod
503506
def before_model_load(*_args, **_kwargs):
504507
return None
505508

506-
@staticmethod
507-
def resolve_hf_conversion_map_reversed(*_args, **_kwargs):
508-
return None
509+
@classmethod
510+
def resolve_hf_conversion_map_reversed(cls, *_args, **_kwargs):
511+
return copy.deepcopy(cls.HF_CONVERSION_MAP_REVERSED)
509512

510513
def __init__(self, model, **kwargs):
511514
self.model = model
@@ -531,7 +534,13 @@ def __init__(self, model, **kwargs):
531534
assert shell_configs
532535
assert load_calls == []
533536
assert isinstance(instance.turtle_model, LazyTurtle)
534-
assert instance.turtle_model._runtime_to_checkpoint_aliases == DummyQModel.checkpoint_path_aliases
537+
assert [
538+
(entry.source_patterns[0], entry.target_patterns[0])
539+
for entry in instance.turtle_model._runtime_to_checkpoint_renamings
540+
] == [
541+
(entry.source_patterns[0], entry.target_patterns[0])
542+
for entry in DummyQModel.HF_CONVERSION_MAP_REVERSED
543+
]
535544
assert instance.turtle_model.config._experts_implementation == "linear_loop"
536545
assert instance.turtle_model.config is not instance.model.config
537546

tests/test_offload_files.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -623,9 +623,9 @@ def test_lazy_turtle_hf_conversion_map_reversed_resolves_nested_language_model_p
623623
model_local_path=str(model_dir),
624624
config=SimpleNamespace(_experts_implementation=None),
625625
model_init_kwargs={"device_map": {"": "cpu"}},
626-
checkpoint_path_aliases=(
627-
("model.language_model", "model"),
628-
("language_model", "model"),
626+
hf_conversion_map_reversed=(
627+
SimpleNamespace(source_patterns=["model.language_model"], target_patterns=["model"]),
628+
SimpleNamespace(source_patterns=["language_model"], target_patterns=["model"]),
629629
),
630630
)
631631
assert source is not None

tests/test_qwen2_family_compat.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from transformers import PreTrainedTokenizerFast
1717

1818
from gptqmodel.models.definitions import base_qwen2_5_omni, base_qwen2_vl
19+
from gptqmodel.utils import structure as structure_module
1920
from gptqmodel.utils.hf import load_tokenizer
2021

2122

@@ -37,12 +38,26 @@ def test_qwen2_vl_image_only_process_vision_info_returns_image_list():
3738
assert image_inputs == [image]
3839

3940

40-
def test_qwen2_vl_declares_checkpoint_path_aliases_for_language_model_shell():
41-
assert base_qwen2_vl.BaseQwen2VLGPTQ.support_offload_to_disk is True
42-
assert base_qwen2_vl.BaseQwen2VLGPTQ.checkpoint_path_aliases == (
43-
("model.language_model", "model"),
44-
("language_model", "model"),
41+
def test_qwen2_vl_resolves_hf_conversion_map_reversed_for_language_model_shell(monkeypatch):
42+
conversion_mapping_module = types.SimpleNamespace(
43+
get_checkpoint_conversion_mapping=lambda model_type: [
44+
types.SimpleNamespace(
45+
source_patterns=[r"(?<!_)model(?!\.(language_model|visual))"],
46+
target_patterns=["model.language_model"],
47+
),
48+
]
49+
if model_type == "qwen2_vl"
50+
else None
4551
)
52+
monkeypatch.setattr(structure_module, "import_module", lambda name: conversion_mapping_module)
53+
54+
target_model = types.SimpleNamespace(config=types.SimpleNamespace(model_type="qwen2_vl"))
55+
resolved = base_qwen2_vl.BaseQwen2VLGPTQ.resolve_hf_conversion_map_reversed(target_model=target_model)
56+
57+
assert base_qwen2_vl.BaseQwen2VLGPTQ.support_offload_to_disk is True
58+
assert [(entry.source_patterns[0], entry.target_patterns[0]) for entry in resolved] == [
59+
("model.language_model", r"(?<!_)model(?!\.(language_model|visual))"),
60+
]
4661

4762

4863
def test_qwen2_vl_pre_quantize_hooks_use_inner_model_layout():

0 commit comments

Comments
 (0)