Skip to content

Commit 63f8bd7

Browse files
committed
Merge branch 'mergeability-pr-45541' into all-defects
2 parents d77a859 + e902b1b commit 63f8bd7

1 file changed

Lines changed: 18 additions & 9 deletions

File tree

src/transformers/tokenization_utils_base.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,6 +1705,13 @@ def from_pretrained(
17051705
else:
17061706
vocab_files["vocab_file"] = match.group()
17071707

1708+
error_message = (
1709+
f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
1710+
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
1711+
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
1712+
f"containing all relevant files for a {cls.__name__} tokenizer."
1713+
)
1714+
17081715
resolved_vocab_files = {}
17091716
for file_id, file_path in vocab_files.items():
17101717
if file_path is None:
@@ -1733,17 +1740,19 @@ def from_pretrained(
17331740
raise
17341741
except Exception:
17351742
# For any other exception, we throw a generic error.
1736-
raise OSError(
1737-
f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
1738-
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
1739-
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
1740-
f"containing all relevant files for a {cls.__name__} tokenizer."
1741-
)
1743+
raise OSError(error_message)
17421744
commit_hash = extract_commit_hash(resolved_vocab_files[file_id], commit_hash)
17431745

1744-
for file_id, file_path in vocab_files.items():
1745-
if file_id not in resolved_vocab_files:
1746-
continue
1746+
loadable_file_ids = set(cls.vocab_files_names)
1747+
if loadable_file_ids and "tokenizer_file" in resolved_vocab_files:
1748+
loadable_file_ids.add("tokenizer_file")
1749+
loadable_file_ids.intersection_update(resolved_vocab_files)
1750+
if (
1751+
(local_files_only or is_local)
1752+
and loadable_file_ids
1753+
and all(resolved_vocab_files[file_id] is None for file_id in loadable_file_ids)
1754+
):
1755+
raise OSError(error_message)
17471756

17481757
return cls._from_pretrained(
17491758
resolved_vocab_files,

0 commit comments

Comments
 (0)