|
12 | 12 |
|
13 | 13 | from gptqmodel.models.definitions.gemma3 import Gemma3ForConditionalGenerationGPTQ |
14 | 14 | from gptqmodel.models.definitions.mixtral import MixtralQModel |
| 15 | +from gptqmodel.models.definitions.ovis import OvisQModel |
15 | 16 | from gptqmodel.models.definitions.qwen2_5_vl import Qwen2_5_VLQModel |
16 | 17 | from gptqmodel.models.definitions.qwen2_vl import Qwen2VLQModel |
17 | 18 | from gptqmodel.models.definitions.qwen3_5_moe_text import Qwen3_5_MoeTextQModel |
@@ -278,6 +279,34 @@ def _assert_gemma3_shell_vision_alias_resolution(turtle: LazyTurtle) -> None: |
278 | 279 | ) |
279 | 280 |
|
280 | 281 |
|
| 282 | +def _assert_ovis_shell_vision_alias_resolution(turtle: LazyTurtle) -> None: |
| 283 | + assert ( |
| 284 | + turtle._resolve_checkpoint_module_path("visual_tokenizer.backbone.head") |
| 285 | + == "visual_tokenizer.backbone.vision_model.head" |
| 286 | + ) |
| 287 | + assert ( |
| 288 | + turtle._resolve_checkpoint_tensor_name( |
| 289 | + "visual_tokenizer", |
| 290 | + "backbone.head.mlp.fc1.weight", |
| 291 | + ) |
| 292 | + == "visual_tokenizer.backbone.vision_model.head.mlp.fc1.weight" |
| 293 | + ) |
| 294 | + assert ( |
| 295 | + turtle._resolve_checkpoint_tensor_name( |
| 296 | + "visual_tokenizer.backbone", |
| 297 | + "head.mlp.fc1.weight", |
| 298 | + ) |
| 299 | + == "visual_tokenizer.backbone.vision_model.head.mlp.fc1.weight" |
| 300 | + ) |
| 301 | + assert ( |
| 302 | + turtle._resolve_checkpoint_tensor_name( |
| 303 | + "visual_tokenizer", |
| 304 | + "backbone.embeddings.patch_embedding.weight", |
| 305 | + ) |
| 306 | + == "visual_tokenizer.backbone.vision_model.embeddings.patch_embedding.weight" |
| 307 | + ) |
| 308 | + |
| 309 | + |
281 | 310 | def _assert_qwen2_vl_alias_resolution(turtle: LazyTurtle) -> None: |
282 | 311 | assert turtle._resolve_checkpoint_module_path("model.language_model") == "model" |
283 | 312 | assert turtle._resolve_checkpoint_module_path("model.visual") == "visual" |
@@ -692,6 +721,28 @@ def test_gemma3_definition_hf_conversion_map_reversed_fixes_shell_vision_paths(t |
692 | 721 | assert resolved_hf_again[0].source_patterns[0] == r"^model\.vision_tower\.(?!vision_model\.)(.+)$" |
693 | 722 |
|
694 | 723 |
|
| 724 | +def test_ovis_definition_hf_conversion_map_reversed_fixes_shell_vision_paths(tmp_path): |
| 725 | + resolved_hf = OvisQModel.resolve_hf_conversion_map_reversed() |
| 726 | + |
| 727 | + assert resolved_hf is not None |
| 728 | + |
| 729 | + turtle = _build_lazy_turtle( |
| 730 | + tmp_path, |
| 731 | + { |
| 732 | + "visual_tokenizer.backbone.vision_model.head.mlp.fc1.weight": torch.zeros(2, 2), |
| 733 | + "visual_tokenizer.backbone.vision_model.embeddings.patch_embedding.weight": torch.zeros(2, 2), |
| 734 | + }, |
| 735 | + hf_conversion_map_reversed=resolved_hf, |
| 736 | + ) |
| 737 | + |
| 738 | + _assert_ovis_shell_vision_alias_resolution(turtle) |
| 739 | + |
| 740 | + resolved_hf[0].source_patterns[0] = r"^mutated\.runtime\.(.+)$" |
| 741 | + resolved_hf_again = OvisQModel.resolve_hf_conversion_map_reversed() |
| 742 | + assert resolved_hf_again is not None |
| 743 | + assert resolved_hf_again[0].source_patterns[0] == r"^visual_tokenizer\.backbone\.(?!vision_model\.)(.+)$" |
| 744 | + |
| 745 | + |
695 | 746 | def test_lazy_turtle_keeps_module_tree_alias_resolution_for_mixtral(tmp_path): |
696 | 747 | turtle = _build_lazy_turtle( |
697 | 748 | tmp_path, |
|
0 commit comments