99from safetensors .torch import save_file
1010from torch import nn
1111
12+ from gptqmodel .models .definitions .gemma3 import Gemma3ForConditionalGenerationGPTQ
1213from gptqmodel .models .definitions .mixtral import MixtralQModel
14+ from gptqmodel .models .definitions .qwen2_5_vl import Qwen2_5_VLQModel
15+ from gptqmodel .models .definitions .qwen2_vl import Qwen2VLQModel
1316from gptqmodel .utils import structure as structure_module
1417from gptqmodel .utils .structure import LazyTurtle
1518
@@ -66,6 +69,18 @@ def __init__(self):
6669 self .config = SimpleNamespace (model_type = "gemma3" )
6770
6871
72+ class _Qwen2VLDummyModel (nn .Module ):
73+ def __init__ (self ):
74+ super ().__init__ ()
75+ self .config = SimpleNamespace (model_type = "qwen2_vl" )
76+
77+
78+ class _Qwen2_5_VLDummyModel (nn .Module ):
79+ def __init__ (self ):
80+ super ().__init__ ()
81+ self .config = SimpleNamespace (model_type = "qwen2_5_vl" )
82+
83+
6984class _WeightRenamingStub :
7085 def __init__ (self , source_pattern : str , target_pattern : str ):
7186 self .source_patterns = [source_pattern ]
@@ -82,6 +97,21 @@ def _gemma3_weight_renamings():
8297 ]
8398
8499
100+ def _qwen2_vl_weight_renamings ():
101+ return [
102+ _WeightRenamingStub (
103+ r"(?<!_)model(?!\.(language_model|visual))" ,
104+ "model.language_model" ,
105+ ),
106+ _WeightRenamingStub (r"^visual" , "model.visual" ),
107+ ]
108+
109+
110+ def _renaming_pairs (renamings ) -> list [tuple [str , str ]]:
111+ assert renamings is not None
112+ return [(entry .source_patterns [0 ], entry .target_patterns [0 ]) for entry in renamings ]
113+
114+
85115def _assert_gemma3_alias_resolution (turtle : LazyTurtle ) -> None :
86116 assert turtle ._resolve_checkpoint_module_path ("model.language_model" ) == "language_model.model"
87117 assert turtle ._resolve_checkpoint_module_path ("model.vision_tower" ) == "vision_tower"
@@ -119,27 +149,58 @@ def _assert_gemma3_alias_resolution(turtle: LazyTurtle) -> None:
119149 assert turtle ._resolve_checkpoint_tensor_name ("lm_head" , "weight" ) == "language_model.lm_head.weight"
120150
121151
152+ def _assert_qwen2_vl_alias_resolution (turtle : LazyTurtle ) -> None :
153+ assert turtle ._resolve_checkpoint_module_path ("model.language_model" ) == "model"
154+ assert turtle ._resolve_checkpoint_module_path ("model.visual" ) == "visual"
155+
156+ assert (
157+ turtle ._resolve_checkpoint_tensor_name (
158+ "model.language_model.layers.0.mlp" ,
159+ "gate_proj.weight" ,
160+ )
161+ == "model.layers.0.mlp.gate_proj.weight"
162+ )
163+ assert (
164+ turtle ._resolve_checkpoint_tensor_name (
165+ "wrapper.model.language_model.layers.0.mlp" ,
166+ "gate_proj.weight" ,
167+ )
168+ == "model.layers.0.mlp.gate_proj.weight"
169+ )
170+ assert (
171+ turtle ._resolve_checkpoint_tensor_name (
172+ "model.visual.blocks.0.attn" ,
173+ "weight" ,
174+ )
175+ == "visual.blocks.0.attn.weight"
176+ )
177+
178+
122179def test_lazy_turtle_reverses_transformers_weight_renaming_list ():
123180 reversed_map = LazyTurtle .reverse_hf_conversion_map (_gemma3_weight_renamings ())
124181
125- assert reversed_map == {
126- "model.language_model" : " language_model.model" ,
127- "lm_head" : " language_model.lm_head" ,
128- "model.vision_tower" : " vision_tower" ,
129- "model.multi_modal_projector" : " multi_modal_projector" ,
130- }
182+ assert _renaming_pairs ( reversed_map ) == [
183+ ( "model.language_model" , r"^ language_model.model") ,
184+ ( "lm_head" , r"^ language_model.lm_head") ,
185+ ( "model.vision_tower" , r"^ vision_tower") ,
186+ ( "model.multi_modal_projector" , r"^ multi_modal_projector") ,
187+ ]
131188
132189
133190def test_lazy_turtle_runtime_to_checkpoint_alias_candidates_do_not_expand_infinitely (tmp_path ):
191+ reversed_map = LazyTurtle .reverse_hf_conversion_map (
192+ {
193+ "language_model.model" : "language_model" ,
194+ "language_model.lm_head" : "lm_head" ,
195+ }
196+ )
197+
134198 turtle = _build_lazy_turtle (
135199 tmp_path ,
136200 {
137201 "language_model.model.layers.0.self_attn.q_proj.weight" : torch .zeros (2 , 2 ),
138202 },
139- hf_conversion_map_reversed = {
140- "language_model" : "language_model.model" ,
141- "lm_head" : "language_model.lm_head" ,
142- },
203+ hf_conversion_map_reversed = reversed_map ,
143204 )
144205
145206 assert turtle ._runtime_to_checkpoint_alias_candidates ("language_model.layers.0" ) == [
@@ -148,6 +209,22 @@ def test_lazy_turtle_runtime_to_checkpoint_alias_candidates_do_not_expand_infini
148209 ]
149210
150211
212+ def test_lazy_turtle_applies_reversed_weight_renamings_with_capturing_groups (tmp_path ):
213+ reversed_map = LazyTurtle .reverse_hf_conversion_map (
214+ [_WeightRenamingStub (r"(.+)" , r"timm_model.\1" )]
215+ )
216+
217+ turtle = _build_lazy_turtle (
218+ tmp_path ,
219+ {
220+ "backbone.conv.weight" : torch .zeros (2 , 2 ),
221+ },
222+ hf_conversion_map_reversed = reversed_map ,
223+ )
224+
225+ assert turtle ._resolve_checkpoint_tensor_name ("timm_model.backbone.conv" , "weight" ) == "backbone.conv.weight"
226+
227+
151228def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_gemma3 (tmp_path , monkeypatch ):
152229 conversion_mapping_module = SimpleNamespace (
153230 get_checkpoint_conversion_mapping = lambda model_type : _gemma3_weight_renamings ()
@@ -170,6 +247,53 @@ def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_gemma3(
170247 _assert_gemma3_alias_resolution (turtle )
171248
172249
250+ def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_qwen2_vl (tmp_path , monkeypatch ):
251+ conversion_mapping_module = SimpleNamespace (
252+ get_checkpoint_conversion_mapping = lambda model_type : _qwen2_vl_weight_renamings ()
253+ if model_type == "qwen2_vl"
254+ else None
255+ )
256+ monkeypatch .setattr (structure_module , "import_module" , lambda name : conversion_mapping_module )
257+
258+ turtle = _build_lazy_turtle (
259+ tmp_path ,
260+ {
261+ "model.layers.0.mlp.gate_proj.weight" : torch .zeros (2 , 2 ),
262+ "visual.blocks.0.attn.weight" : torch .zeros (2 , 2 ),
263+ },
264+ module_tree = Qwen2VLQModel .module_tree ,
265+ target_model = _Qwen2VLDummyModel (),
266+ )
267+
268+ _assert_qwen2_vl_alias_resolution (turtle )
269+
270+
271+ def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_qwen2_5_vl (tmp_path , monkeypatch ):
272+ observed_model_types : list [str ] = []
273+
274+ def _get_checkpoint_conversion_mapping (model_type : str ):
275+ observed_model_types .append (model_type )
276+ if model_type == "qwen2_5_vl" :
277+ return _qwen2_vl_weight_renamings ()
278+ return None
279+
280+ conversion_mapping_module = SimpleNamespace (get_checkpoint_conversion_mapping = _get_checkpoint_conversion_mapping )
281+ monkeypatch .setattr (structure_module , "import_module" , lambda name : conversion_mapping_module )
282+
283+ turtle = _build_lazy_turtle (
284+ tmp_path ,
285+ {
286+ "model.layers.0.mlp.gate_proj.weight" : torch .zeros (2 , 2 ),
287+ "visual.blocks.0.attn.weight" : torch .zeros (2 , 2 ),
288+ },
289+ module_tree = Qwen2_5_VLQModel .module_tree ,
290+ target_model = _Qwen2_5_VLDummyModel (),
291+ )
292+
293+ assert observed_model_types == ["qwen2_5_vl" ]
294+ _assert_qwen2_vl_alias_resolution (turtle )
295+
296+
173297def test_lazy_turtle_falls_back_to_legacy_checkpoint_conversion_mapping (tmp_path , monkeypatch ):
174298 def _raise_import_error (_name : str ):
175299 raise ImportError ("transformers.conversion_mapping is unavailable" )
@@ -190,6 +314,45 @@ def _raise_import_error(_name: str):
190314 _assert_gemma3_alias_resolution (turtle )
191315
192316
317+ def test_base_qmodel_prefers_manual_hf_conversion_map_reversed (tmp_path , monkeypatch ):
318+ manual_renamings = LazyTurtle .reverse_hf_conversion_map (_gemma3_weight_renamings ())
319+ assert manual_renamings is not None
320+ monkeypatch .setattr (
321+ Gemma3ForConditionalGenerationGPTQ ,
322+ "HF_CONVERSION_MAP_REVERSED" ,
323+ manual_renamings ,
324+ raising = False ,
325+ )
326+
327+ def _unexpected_import (_name : str ):
328+ raise AssertionError ("manual HF_CONVERSION_MAP_REVERSED should bypass inferred transformers mappings" )
329+
330+ monkeypatch .setattr (structure_module , "import_module" , _unexpected_import )
331+
332+ resolved = Gemma3ForConditionalGenerationGPTQ .resolve_hf_conversion_map_reversed (target_model = _Gemma3DummyModel ())
333+ assert _renaming_pairs (resolved ) == _renaming_pairs (manual_renamings )
334+
335+ resolved [0 ].source_patterns [0 ] = "mutated.runtime"
336+ resolved_again = Gemma3ForConditionalGenerationGPTQ .resolve_hf_conversion_map_reversed (
337+ target_model = _Gemma3DummyModel ()
338+ )
339+ assert _renaming_pairs (resolved_again ) == _renaming_pairs (manual_renamings )
340+
341+ turtle = _build_lazy_turtle (
342+ tmp_path ,
343+ {
344+ "language_model.model.layers.0.mlp.gate_proj.weight" : torch .zeros (2 , 2 ),
345+ "vision_tower.vision_model.head.weight" : torch .zeros (2 , 2 ),
346+ "multi_modal_projector.mm_input_projection_weight" : torch .zeros (2 , 2 ),
347+ "language_model.lm_head.weight" : torch .zeros (2 , 2 ),
348+ },
349+ module_tree = Gemma3ForConditionalGenerationGPTQ .module_tree ,
350+ hf_conversion_map_reversed = resolved_again ,
351+ )
352+
353+ _assert_gemma3_alias_resolution (turtle )
354+
355+
193356def test_lazy_turtle_keeps_module_tree_alias_resolution_for_mixtral (tmp_path ):
194357 turtle = _build_lazy_turtle (
195358 tmp_path ,
0 commit comments