diff --git a/gptqmodel/models/definitions/ovis.py b/gptqmodel/models/definitions/ovis.py index 0d064fe2e..0bb2b2bdc 100644 --- a/gptqmodel/models/definitions/ovis.py +++ b/gptqmodel/models/definitions/ovis.py @@ -5,6 +5,7 @@ import copy import logging +from types import SimpleNamespace from typing import Dict import torch @@ -18,6 +19,17 @@ class OvisQModel(BaseQModel): + HF_CONVERSION_MAP_REVERSED = ( + # Ovis 1.6 builds the SigLIP visual backbone via `AutoModel`, whose + # runtime shell exposes `visual_tokenizer.backbone.*` directly, while + # checkpoint tensors still live under `visual_tokenizer.backbone.vision_model.*`. + SimpleNamespace( + source_patterns=[r"^visual_tokenizer\.backbone\.(?!vision_model\.)(.+)$"], + target_patterns=[r"^visual_tokenizer.backbone.vision_model.\1"], + operations=[], + ), + ) + pre_lm_head_norm_module = "llm.model.norm" module_tree = [ diff --git a/gptqmodel/utils/hf.py b/gptqmodel/utils/hf.py index 574d64216..15e0a3ef0 100644 --- a/gptqmodel/utils/hf.py +++ b/gptqmodel/utils/hf.py @@ -15,6 +15,7 @@ from functools import lru_cache from transformers import ( AutoConfig, + AutoModel, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, @@ -1201,6 +1202,32 @@ def prepare_remote_code_compat(config: Any) -> None: normalize_hf_config_compat(config, trust_remote_code=True) +def register_runtime_automodel_config(config, remote_module, config_attr: str, remote_model_name: str) -> None: + # Obtain the correct config class path to register the config and model. + # Fix ValueError: Unrecognized configuration class + # for this kind of AutoModel: AutoModel. + runtime_config = getattr(config, config_attr, None) + runtime_model_cls = getattr(remote_module, remote_model_name, None) if remote_module is not None else None + if runtime_config is None or runtime_model_cls is None: + return + + runtime_config_cls = type(runtime_config) + + try: + if getattr(runtime_model_cls, "config_class", None) is not runtime_config_cls: + runtime_model_cls.config_class = runtime_config_cls + AutoModel.register(runtime_config_cls, runtime_model_cls, exist_ok=True) + except Exception as exc: + log.debug( + "HF: failed to bridge AutoModel registration for `%s` using `%s.%s`: %s", + config_attr, + getattr(remote_module, "__name__", "unknown"), + remote_model_name, + exc, + ) + + def prepare_remote_model_init_compat(model_id_or_path: Optional[str], config: Any) -> None: if not model_id_or_path: return @@ -1278,6 +1305,18 @@ def encoder_init_compat(self, encoder_config): if vision_model_cls: try_patch_legacy_flash_attn_flag(vision_model_cls) + if config.model_type == "ovis": + from transformers import LlamaForCausalLM + try_patch_legacy_flash_attn_flag(LlamaForCausalLM) + + vision_model_cls = getattr( + remote_module, + "SiglipVisualTokenizer", + None, + ) + if vision_model_cls: + try_patch_legacy_flash_attn_flag(vision_model_cls) + if ( outer_model_cls is not None and hasattr(outer_model_cls, "tie_weights") @@ -1307,6 +1346,8 @@ def tie_weights_compat(self, *args, **kwargs): outer_model_cls._gptqmodel_tie_weights_kwargs_patch = True if getattr(config, "model_type", None) == "ovis" and ovis_config_module is not None: + register_runtime_automodel_config(config, remote_module, "visual_tokenizer_config", "SiglipVisualTokenizer") + formatter_cls = getattr(ovis_config_module, "Llama3ConversationFormatter", None) if formatter_cls is not None and not getattr(formatter_cls, "_gptqmodel_tokenizer_backend_patch", False): support_tokenizer_types = list(getattr(formatter_cls, "support_tokenizer_types", None) or []) @@ -1318,6 +1359,9 @@ def tie_weights_compat(self, *args, **kwargs): formatter_cls.support_tokenizer_types = support_tokenizer_types formatter_cls._gptqmodel_tokenizer_backend_patch = True + if getattr(config, "model_type", None) == "ovis2_5": + register_runtime_automodel_config(config, remote_module, "vit_config", "Siglip2NavitModel") + if getattr(config, "model_type", None) == "hymba" and remote_module is not None: rotary_cls = getattr(remote_module, "LlamaRotaryEmbedding", None) attention_cls = getattr(remote_module, "HymbaAttention", None) @@ -1475,6 +1519,12 @@ def try_patch_legacy_flash_attn_flag(model_cls): if model_cls is None or not isinstance(model_cls, type): return + # The remote modeling code for some models(For example, ovis.) still relies on `_supports_flash_attn_2` + if hasattr(model_cls, "_supports_flash_attn"): + if not hasattr(model_cls, "_supports_flash_attn_2"): + setattr(model_cls, "_supports_flash_attn_2", bool(model_cls._supports_flash_attn)) + return + # Find the most specific class that explicitly declares the newer # `_supports_flash_attn_2` flag used by newer transformers releases. base_with_flag = None diff --git a/gptqmodel/utils/structure.py b/gptqmodel/utils/structure.py index 7a8c805e2..a59116d70 100644 --- a/gptqmodel/utils/structure.py +++ b/gptqmodel/utils/structure.py @@ -1936,6 +1936,7 @@ def _copy_checkpoint_tensors_into_submodule( grouped_names: Dict[str, list[tuple[str, str, str, Optional[int], Optional[int], Optional[int]]]] = {} for rel_name in t_params: full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, rel_name) + print("full_name", full_name, rel_name) if full_name is None: continue shard = self._weight_map.get(full_name) diff --git a/tests/test_hf_config_compat.py b/tests/test_hf_config_compat.py index 0e7f63a98..0e58c703d 100644 --- a/tests/test_hf_config_compat.py +++ b/tests/test_hf_config_compat.py @@ -6,7 +6,7 @@ import torch import transformers import transformers.generation.utils as generation_utils -from transformers import GenerationConfig, GPTNeoXConfig, LlamaConfig, cache_utils +from transformers import AutoModel, GenerationConfig, GPTNeoXConfig, LlamaConfig, cache_utils from transformers.generation.configuration_utils import GenerationMode from gptqmodel.utils import hf as hf_utils @@ -451,6 +451,118 @@ class Llama3ConversationFormatter: assert getattr(Llama3ConversationFormatter, "_gptqmodel_tokenizer_backend_patch", False) is True +def test_prepare_remote_model_init_compat_bridges_ovis_visual_tokenizer_registration(monkeypatch): + captured = {} + + class RuntimeVisualConfig: + pass + + class RemoteVisualConfig: + pass + + class DummyVisualModel: + config_class = RemoteVisualConfig + + remote_module = ModuleType("transformers_modules.fake_ovis_bridge.modeling_ovis") + remote_module.SiglipVisualTokenizer = DummyVisualModel + monkeypatch.setitem(sys.modules, remote_module.__name__, remote_module) + + config_module = ModuleType("transformers_modules.fake_ovis_bridge.configuration_ovis") + + class Llama3ConversationFormatter: + support_tokenizer_types = ["PreTrainedTokenizerFast"] + + config_module.Llama3ConversationFormatter = Llama3ConversationFormatter + monkeypatch.setitem(sys.modules, config_module.__name__, config_module) + + class DummyRemoteModel: + __module__ = remote_module.__name__ + + monkeypatch.setattr( + "transformers.dynamic_module_utils.get_class_from_dynamic_module", + lambda class_ref, model_id_or_path, **kwargs: DummyRemoteModel, + ) + + monkeypatch.setattr( + AutoModel, + "register", + classmethod( + lambda cls, config_class, model_class, exist_ok=False: captured.update( + { + "config_class": config_class, + "model_class": model_class, + "exist_ok": exist_ok, + } + ) + ), + ) + + config = SimpleNamespace( + model_type="ovis", + auto_map={"AutoModelForCausalLM": "modeling_ovis.Ovis"}, + visual_tokenizer_config=RuntimeVisualConfig(), + ) + + prepare_remote_model_init_compat("/tmp/ovis", config) + + assert captured["config_class"] is RuntimeVisualConfig + assert captured["model_class"] is DummyVisualModel + assert captured["exist_ok"] is True + assert DummyVisualModel.config_class is RuntimeVisualConfig + + +def test_prepare_remote_model_init_compat_bridges_ovis2_5_vit_registration(monkeypatch): + captured = {} + + class RuntimeVitConfig: + pass + + class RemoteVitConfig: + pass + + class DummyVitModel: + config_class = RemoteVitConfig + + remote_module = ModuleType("transformers_modules.fake_ovis2_5_bridge.modeling_ovis2_5") + remote_module.Siglip2NavitModel = DummyVitModel + monkeypatch.setitem(sys.modules, remote_module.__name__, remote_module) + + class DummyRemoteModel: + __module__ = remote_module.__name__ + + monkeypatch.setattr( + "transformers.dynamic_module_utils.get_class_from_dynamic_module", + lambda class_ref, model_id_or_path, **kwargs: DummyRemoteModel, + ) + + monkeypatch.setattr( + AutoModel, + "register", + classmethod( + lambda cls, config_class, model_class, exist_ok=False: captured.update( + { + "config_class": config_class, + "model_class": model_class, + "exist_ok": exist_ok, + } + ) + ), + ) + + config = SimpleNamespace( + model_type="ovis2_5", + auto_map={"AutoModelForCausalLM": "modeling_ovis2_5.Ovis2_5"}, + vit_config=RuntimeVitConfig(), + ) + + prepare_remote_model_init_compat("/tmp/ovis2_5", config) + + assert captured["config_class"] is RuntimeVitConfig + assert captured["model_class"] is DummyVitModel + assert captured["exist_ok"] is True + assert DummyVitModel.config_class is RuntimeVitConfig + + def test_prepare_remote_model_init_compat_promotes_phi4_positional_seed_to_meta(monkeypatch): seen_devices = []