@@ -45,31 +45,32 @@ def get_special_tokens_dict(
4545 special_tokens_dict = {}
4646 if not tokenizer_name_or_path :
4747 llama_classes = tuple (
48- cls for cls in [
48+ cls
49+ for cls in [
4950 getattr (transformers , "LlamaTokenizer" , None ),
5051 getattr (transformers , "LlamaTokenizerFast" , None ),
51- ] if cls is not None
52+ ]
53+ if cls is not None
5254 )
5355 is_llama_tokenizer = (
54- (bool (llama_classes ) and isinstance (tokenizer , llama_classes ))
55- or "llama" in (getattr (tokenizer , "name_or_path" , "" ) or "" ).lower ()
56- )
56+ bool (llama_classes ) and isinstance (tokenizer , llama_classes )
57+ ) or "llama" in (getattr (tokenizer , "name_or_path" , "" ) or "" ).lower ()
5758
5859 gpt_neox_classes = tuple (
59- cls for cls in [
60+ cls
61+ for cls in [
6062 getattr (transformers , "GPTNeoXTokenizerFast" , None ),
6163 getattr (transformers , "GPTNeoXTokenizer" , None ),
62- ] if cls is not None
64+ ]
65+ if cls is not None
6366 )
6467
6568 if is_llama_tokenizer :
6669 special_tokens_dict ["bos_token" ] = "<s>"
6770 special_tokens_dict ["eos_token" ] = "</s>"
6871 special_tokens_dict ["unk_token" ] = "<unk>"
6972 special_tokens_dict ["pad_token" ] = "<pad>"
70- elif isinstance (
71- tokenizer , (transformers .GPT2Tokenizer , * gpt_neox_classes )
72- ):
73+ elif isinstance (tokenizer , (transformers .GPT2Tokenizer , * gpt_neox_classes )):
7374 special_tokens_dict ["pad_token" ] = "<pad>"
7475
7576 # Add special tokens only when a custom tokenizer is not passed
@@ -117,7 +118,7 @@ def tokenizer_and_embedding_resize(
117118 dict: Metadata on number of added tokens.
118119 """
119120 num_new_tokens = tokenizer .add_special_tokens (
120- special_tokens_dict = special_tokens_dict ,
121+ special_tokens_dict = special_tokens_dict ,
121122 # replace_additional_special_tokens=False
122123 )
123124 embedding_size = int (multiple_of * math .ceil (len (tokenizer ) / multiple_of ))
0 commit comments