Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions gptqmodel/models/definitions/ovis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import copy
import logging
from types import SimpleNamespace
from typing import Dict

import torch
Expand All @@ -18,6 +19,17 @@


class OvisQModel(BaseQModel):
HF_CONVERSION_MAP_REVERSED = (
# Ovis 1.6 builds the SigLIP visual backbone via `AutoModel`, whose
# runtime shell exposes `visual_tokenizer.backbone.*` directly, while
# checkpoint tensors still live under `visual_tokenizer.backbone.vision_model.*`.
SimpleNamespace(
source_patterns=[r"^visual_tokenizer\.backbone\.(?!vision_model\.)(.+)$"],
target_patterns=[r"^visual_tokenizer.backbone.vision_model.\1"],
operations=[],
),
)

pre_lm_head_norm_module = "llm.model.norm"

module_tree = [
Expand Down
50 changes: 50 additions & 0 deletions gptqmodel/utils/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from functools import lru_cache
from transformers import (
AutoConfig,
AutoModel,
AutoModelForCausalLM,
AutoTokenizer,
GenerationConfig,
Expand Down Expand Up @@ -1201,6 +1202,32 @@ def prepare_remote_code_compat(config: Any) -> None:
normalize_hf_config_compat(config, trust_remote_code=True)


def register_runtime_automodel_config(config, remote_module, config_attr: str, remote_model_name: str) -> None:
# Obtain the correct config class path to register the config and model.
# Fix ValueError: Unrecognized configuration class
# <class 'transformers_modules.Ovis1_dot_6_hyphen_Llama3_dot_2_hyphen_3B.e514127b17008465.configuration_ovis.
# SiglipVisualTokenizerConfig'> for this kind of AutoModel: AutoModel.
runtime_config = getattr(config, config_attr, None)
runtime_model_cls = getattr(remote_module, remote_model_name, None) if remote_module is not None else None
if runtime_config is None or runtime_model_cls is None:
return

runtime_config_cls = type(runtime_config)

try:
if getattr(runtime_model_cls, "config_class", None) is not runtime_config_cls:
runtime_model_cls.config_class = runtime_config_cls
AutoModel.register(runtime_config_cls, runtime_model_cls, exist_ok=True)
except Exception as exc:
log.debug(
"HF: failed to bridge AutoModel registration for `%s` using `%s.%s`: %s",
config_attr,
getattr(remote_module, "__name__", "unknown"),
remote_model_name,
exc,
)


def prepare_remote_model_init_compat(model_id_or_path: Optional[str], config: Any) -> None:
if not model_id_or_path:
return
Expand Down Expand Up @@ -1278,6 +1305,18 @@ def encoder_init_compat(self, encoder_config):
if vision_model_cls:
try_patch_legacy_flash_attn_flag(vision_model_cls)

if config.model_type == "ovis":
from transformers import LlamaForCausalLM
try_patch_legacy_flash_attn_flag(LlamaForCausalLM)

vision_model_cls = getattr(
remote_module,
"SiglipVisualTokenizer",
None,
)
if vision_model_cls:
try_patch_legacy_flash_attn_flag(vision_model_cls)

if (
outer_model_cls is not None
and hasattr(outer_model_cls, "tie_weights")
Expand Down Expand Up @@ -1307,6 +1346,8 @@ def tie_weights_compat(self, *args, **kwargs):
outer_model_cls._gptqmodel_tie_weights_kwargs_patch = True

if getattr(config, "model_type", None) == "ovis" and ovis_config_module is not None:
register_runtime_automodel_config(config, remote_module, "visual_tokenizer_config", "SiglipVisualTokenizer")

formatter_cls = getattr(ovis_config_module, "Llama3ConversationFormatter", None)
if formatter_cls is not None and not getattr(formatter_cls, "_gptqmodel_tokenizer_backend_patch", False):
support_tokenizer_types = list(getattr(formatter_cls, "support_tokenizer_types", None) or [])
Expand All @@ -1318,6 +1359,9 @@ def tie_weights_compat(self, *args, **kwargs):
formatter_cls.support_tokenizer_types = support_tokenizer_types
formatter_cls._gptqmodel_tokenizer_backend_patch = True

if getattr(config, "model_type", None) == "ovis2_5":
register_runtime_automodel_config(config, remote_module, "vit_config", "Siglip2NavitModel")

if getattr(config, "model_type", None) == "hymba" and remote_module is not None:
rotary_cls = getattr(remote_module, "LlamaRotaryEmbedding", None)
attention_cls = getattr(remote_module, "HymbaAttention", None)
Expand Down Expand Up @@ -1475,6 +1519,12 @@ def try_patch_legacy_flash_attn_flag(model_cls):
if model_cls is None or not isinstance(model_cls, type):
return

# The remote modeling code for some models(For example, ovis.) still relies on `_supports_flash_attn_2`
if hasattr(model_cls, "_supports_flash_attn"):
if not hasattr(model_cls, "_supports_flash_attn_2"):
setattr(model_cls, "_supports_flash_attn_2", bool(model_cls._supports_flash_attn))
return

# Find the most specific class that explicitly declares the newer
# `_supports_flash_attn_2` flag used by newer transformers releases.
base_with_flag = None
Expand Down
1 change: 1 addition & 0 deletions gptqmodel/utils/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1936,6 +1936,7 @@ def _copy_checkpoint_tensors_into_submodule(
grouped_names: Dict[str, list[tuple[str, str, str, Optional[int], Optional[int], Optional[int]]]] = {}
for rel_name in t_params:
full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, rel_name)
print("full_name", full_name, rel_name)
if full_name is None:
continue
shard = self._weight_map.get(full_name)
Expand Down
114 changes: 113 additions & 1 deletion tests/test_hf_config_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch
import transformers
import transformers.generation.utils as generation_utils
from transformers import GenerationConfig, GPTNeoXConfig, LlamaConfig, cache_utils
from transformers import AutoModel, GenerationConfig, GPTNeoXConfig, LlamaConfig, cache_utils
from transformers.generation.configuration_utils import GenerationMode

from gptqmodel.utils import hf as hf_utils
Expand Down Expand Up @@ -451,6 +451,118 @@ class Llama3ConversationFormatter:
assert getattr(Llama3ConversationFormatter, "_gptqmodel_tokenizer_backend_patch", False) is True


def test_prepare_remote_model_init_compat_bridges_ovis_visual_tokenizer_registration(monkeypatch):
captured = {}

class RuntimeVisualConfig:
pass

class RemoteVisualConfig:
pass

class DummyVisualModel:
config_class = RemoteVisualConfig

remote_module = ModuleType("transformers_modules.fake_ovis_bridge.modeling_ovis")
remote_module.SiglipVisualTokenizer = DummyVisualModel
monkeypatch.setitem(sys.modules, remote_module.__name__, remote_module)

config_module = ModuleType("transformers_modules.fake_ovis_bridge.configuration_ovis")

class Llama3ConversationFormatter:
support_tokenizer_types = ["PreTrainedTokenizerFast"]

config_module.Llama3ConversationFormatter = Llama3ConversationFormatter
monkeypatch.setitem(sys.modules, config_module.__name__, config_module)

class DummyRemoteModel:
__module__ = remote_module.__name__

monkeypatch.setattr(
"transformers.dynamic_module_utils.get_class_from_dynamic_module",
lambda class_ref, model_id_or_path, **kwargs: DummyRemoteModel,
)

monkeypatch.setattr(
AutoModel,
"register",
classmethod(
lambda cls, config_class, model_class, exist_ok=False: captured.update(
{
"config_class": config_class,
"model_class": model_class,
"exist_ok": exist_ok,
}
)
),
)

config = SimpleNamespace(
model_type="ovis",
auto_map={"AutoModelForCausalLM": "modeling_ovis.Ovis"},
visual_tokenizer_config=RuntimeVisualConfig(),
)

prepare_remote_model_init_compat("/tmp/ovis", config)

assert captured["config_class"] is RuntimeVisualConfig
assert captured["model_class"] is DummyVisualModel
assert captured["exist_ok"] is True
assert DummyVisualModel.config_class is RuntimeVisualConfig


def test_prepare_remote_model_init_compat_bridges_ovis2_5_vit_registration(monkeypatch):
captured = {}

class RuntimeVitConfig:
pass

class RemoteVitConfig:
pass

class DummyVitModel:
config_class = RemoteVitConfig

remote_module = ModuleType("transformers_modules.fake_ovis2_5_bridge.modeling_ovis2_5")
remote_module.Siglip2NavitModel = DummyVitModel
monkeypatch.setitem(sys.modules, remote_module.__name__, remote_module)

class DummyRemoteModel:
__module__ = remote_module.__name__

monkeypatch.setattr(
"transformers.dynamic_module_utils.get_class_from_dynamic_module",
lambda class_ref, model_id_or_path, **kwargs: DummyRemoteModel,
)

monkeypatch.setattr(
AutoModel,
"register",
classmethod(
lambda cls, config_class, model_class, exist_ok=False: captured.update(
{
"config_class": config_class,
"model_class": model_class,
"exist_ok": exist_ok,
}
)
),
)

config = SimpleNamespace(
model_type="ovis2_5",
auto_map={"AutoModelForCausalLM": "modeling_ovis2_5.Ovis2_5"},
vit_config=RuntimeVitConfig(),
)

prepare_remote_model_init_compat("/tmp/ovis2_5", config)

assert captured["config_class"] is RuntimeVitConfig
assert captured["model_class"] is DummyVitModel
assert captured["exist_ok"] is True
assert DummyVitModel.config_class is RuntimeVitConfig


def test_prepare_remote_model_init_compat_promotes_phi4_positional_seed_to_meta(monkeypatch):
seen_devices = []

Expand Down