Skip to content

Commit a41b67c

Browse files
fix AttributeError: 'NoneType' object has no attribute 'from_pretrained' (#2840)
1 parent 2782900 commit a41b67c

2 files changed

Lines changed: 124 additions & 5 deletions

File tree

gptqmodel/models/loader.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,14 @@
1919
from ..utils.modelscope import ensure_modelscope_available
2020
from ..utils.structure import LazyTurtle, print_module_tree
2121

22-
2322
if ensure_modelscope_available():
2423
from modelscope import snapshot_download
2524
else:
2625
from huggingface_hub import snapshot_download
2726

2827
import defuser
2928
from packaging.version import InvalidVersion, Version
30-
from transformers import AutoConfig, AutoTokenizer, PretrainedConfig
29+
from transformers import AutoConfig, PretrainedConfig
3130
from transformers.utils import is_flash_attn_2_available
3231

3332
from ..adapter.adapter import Adapter
@@ -45,6 +44,7 @@
4544
get_hf_config_dtype,
4645
get_hf_gguf_load_kwargs,
4746
has_native_transformers_causallm_support,
47+
load_hf_tokenizer,
4848
normalize_hf_config_compat,
4949
normalize_model_id_or_path_for_hf_gguf,
5050
normalize_torch_dtype_kwarg,
@@ -473,8 +473,9 @@ def from_pretrained(
473473
# Align config metadata with the dtype we will materialize weights in.
474474
set_hf_config_dtype(config, dtype)
475475

476-
tokenizer = AutoTokenizer.from_pretrained(
476+
tokenizer = load_hf_tokenizer(
477477
model_local_path,
478+
model_config=config,
478479
trust_remote_code=tokenizer_trust_remote_code,
479480
**_get_tokenizer_load_kwargs(model_init_kwargs),
480481
)
@@ -913,8 +914,9 @@ def from_quantized(
913914

914915
qcfg.calculate_bits_per_weight()
915916

916-
tokenizer = AutoTokenizer.from_pretrained(
917+
tokenizer = load_hf_tokenizer(
917918
model_local_path,
919+
model_config=config,
918920
trust_remote_code=tokenizer_trust_remote_code,
919921
**hf_gguf_load_kwargs,
920922
)

gptqmodel/utils/hf.py

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from transformers import (
2020
AutoConfig,
2121
AutoModelForCausalLM,
22+
AutoTokenizer,
2223
GenerationConfig,
2324
PreTrainedConfig,
2425
PreTrainedModel,
@@ -34,7 +35,6 @@
3435
)
3536
from ..utils import _MONKEY_PATCH_LOCK, internal_gguf
3637

37-
3838
# Compatibility wrapper for no_init_weights across different transformers versions
3939
# transformers >= 5.0.0: from transformers.initialization import no_init_weights
4040
# transformers < 5.0.0: from transformers.modeling_utils import no_init_weights
@@ -59,6 +59,7 @@
5959
"normalize_model_id_or_path_for_hf_gguf",
6060
"resolve_trust_remote_code",
6161
"set_hf_config_dtype",
62+
"load_hf_tokenizer",
6263
"load_tokenizer",
6364
]
6465

@@ -875,6 +876,53 @@ def get_expanded_tied_weights_keys(self, all_submodels: bool = False) -> dict:
875876
# during init, but newer transformers dropped the default attribute.
876877
PreTrainedModel.is_parallelizable = False
877878

879+
if not hasattr(PreTrainedModel, "get_head_mask"):
880+
def get_head_mask(self, head_mask, num_hidden_layers, is_attention_chunked: bool = False):
881+
# transformers 5.x removed this helper from PreTrainedModel,
882+
# but many older trust_remote_code decoder implementations
883+
# still call `self.get_head_mask(...)` from `forward()`.
884+
#
885+
# Legacy behavior:
886+
# - `None` means no masking and expands to `[None] * n_layers`
887+
# - 1D masks are `[num_heads]` and must be broadcast to every
888+
# layer
889+
# - 2D masks are `[num_hidden_layers, num_heads]` and must be
890+
# expanded to the 5D attention-mask shape expected by old
891+
# attention blocks
892+
#
893+
# Keeping this compat shim at the base-class level is safer
894+
# than patching each remote model individually because many
895+
# pre-transformers-5 architectures shared the same contract.
896+
if head_mask is None:
897+
return [None] * num_hidden_layers
898+
899+
if head_mask.dim() == 1:
900+
# [num_heads] -> [num_hidden_layers, batch, num_heads, seq, seq]
901+
head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
902+
head_mask = head_mask.expand(num_hidden_layers, -1, -1, -1, -1)
903+
elif head_mask.dim() == 2:
904+
# [num_hidden_layers, num_heads] -> [num_hidden_layers, batch, num_heads, seq, seq]
905+
head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1)
906+
else:
907+
raise ValueError(
908+
f"head_mask must have dim 1 or 2, but got shape {tuple(head_mask.shape)}."
909+
)
910+
911+
target_dtype = getattr(self, "dtype", None)
912+
if isinstance(target_dtype, torch.dtype):
913+
# Match the model compute dtype to avoid dtype promotion or
914+
# precision mismatches inside legacy attention kernels.
915+
head_mask = head_mask.to(dtype=target_dtype)
916+
917+
if is_attention_chunked:
918+
# Older chunked-attention implementations expect one extra
919+
# axis for chunk broadcasting.
920+
head_mask = head_mask.unsqueeze(-1)
921+
922+
return head_mask
923+
924+
PreTrainedModel.get_head_mask = get_head_mask
925+
878926
if not getattr(PreTrainedModel, "_gptqmodel_missing_all_tied_weights_patch", False):
879927
original_getattr = PreTrainedModel.__getattr__
880928

@@ -1338,6 +1386,75 @@ def load_tokenizer(tokenizer_or_path, *, model_config: Any = None, **kwargs):
13381386
return Tokenicer.load(tokenizer_or_path, model_config=model_config, **kwargs)
13391387

13401388

1389+
def load_hf_tokenizer(
1390+
tokenizer_or_path,
1391+
*,
1392+
model_config: Any = None,
1393+
trust_remote_code: bool = False,
1394+
**kwargs,
1395+
):
1396+
auto_tokenizer_exc = None
1397+
try:
1398+
# Preferred path: let transformers perform its normal tokenizer
1399+
# resolution. This keeps behavior identical for native tokenizers and
1400+
# for remote-code tokenizers that are still compatible with the
1401+
# installed transformers release.
1402+
return AutoTokenizer.from_pretrained(
1403+
tokenizer_or_path,
1404+
trust_remote_code=trust_remote_code,
1405+
**kwargs,
1406+
)
1407+
except AttributeError as exc:
1408+
# Narrow fallback for legacy trust_remote_code repositories. On
1409+
# transformers 5.x, some old repos no longer resolve to a tokenizer
1410+
# class inside AutoTokenizer and instead fail with
1411+
# `None.from_pretrained(...)`. Only intercept that specific compat
1412+
# break; all other exceptions should propagate unchanged.
1413+
if not trust_remote_code or "from_pretrained" not in str(exc):
1414+
raise
1415+
auto_tokenizer_exc = exc
1416+
1417+
auto_map = getattr(model_config, "auto_map", None) or {}
1418+
# Old repositories often still expose an authoritative dynamic tokenizer
1419+
# reference in `config.auto_map`, even when the higher-level
1420+
# AutoTokenizer registry no longer reaches it.
1421+
class_ref = auto_map.get("AutoTokenizer")
1422+
if isinstance(class_ref, (list, tuple)):
1423+
# HF stores tokenizer refs as [slow, fast]. Prefer the fast tokenizer
1424+
# when present, otherwise use the slow one.
1425+
class_ref = class_ref[1] if len(class_ref) > 1 and class_ref[1] is not None else class_ref[0]
1426+
1427+
if not isinstance(class_ref, str):
1428+
raise auto_tokenizer_exc
1429+
1430+
from transformers.dynamic_module_utils import get_class_from_dynamic_module
1431+
1432+
tokenizer_cls = get_class_from_dynamic_module(class_ref, str(tokenizer_or_path), **kwargs)
1433+
original_init = getattr(tokenizer_cls, "__init__", None)
1434+
if callable(original_init) and not getattr(tokenizer_cls, "_gptqmodel_legacy_init_compat", False):
1435+
def patched_init(self, *init_args, **init_kwargs):
1436+
# Some legacy tokenizers assign `bos/eos/pad/..._token_id` before
1437+
# they call `PreTrainedTokenizer.__init__()`. In transformers 5.x
1438+
# those assignments now go through base-class attribute handling,
1439+
# which expects `_special_tokens_map` to already exist. Creating
1440+
# the storage eagerly preserves the old constructor order without
1441+
# modifying the upstream repository code.
1442+
if not hasattr(self, "_special_tokens_map"):
1443+
object.__setattr__(self, "_special_tokens_map", {})
1444+
return original_init(self, *init_args, **init_kwargs)
1445+
1446+
tokenizer_cls.__init__ = patched_init
1447+
# Avoid wrapping the same dynamically imported class multiple times in
1448+
# a long-running process.
1449+
tokenizer_cls._gptqmodel_legacy_init_compat = True
1450+
tokenizer_cls.register_for_auto_class()
1451+
return tokenizer_cls.from_pretrained(
1452+
tokenizer_or_path,
1453+
trust_remote_code=trust_remote_code,
1454+
**kwargs,
1455+
)
1456+
1457+
13411458

13421459
_patch_transformers_remote_code_compat()
13431460

0 commit comments

Comments
 (0)