2020
2121# Third Party
2222from transformers import (
23- AutoModelForImageTextToText , # AutoModelForVision2Seq was renamed to this in transformers v5
23+ AutoModelForCausalLM ,
24+ AutoModelForVision2Seq ,
25+ AutoProcessor ,
26+ AutoTokenizer ,
2427)
25- from transformers import AutoModelForCausalLM , AutoProcessor , AutoTokenizer
2628import torch
2729
2830# First Party
@@ -126,17 +128,16 @@ def test_special_tokens_before_and_after():
126128 model = AutoModelForCausalLM .from_pretrained (MODEL_NAME )
127129
128130 input_tokenizer_len = len (tokenizer .get_vocab ())
129- addn_spl_tokens_before = list (tokenizer .extra_special_tokens )
131+ addn_spl_tokens_before = tokenizer .special_tokens_map .get (
132+ "additional_special_tokens"
133+ )
130134 assert (
131135 len (addn_spl_tokens_before ) > 0
132136 ), "this test needs tokenizer special tokens to not be empty before testing"
133137
134138 special_tokens_dict = {"sep_token" : "<SEP>" , "pad_token" : "<PAD>" }
135139 addn_spl_tokens_added = ["<NotSeenTokenA>" , "<NotSeenTokenB>" , "<NotSeenTokenC>" ]
136- # for transformers v5: merge existing extra_special_tokens with new ones to prevent replacement
137- special_tokens_dict ["additional_special_tokens" ] = (
138- list (tokenizer .extra_special_tokens ) + addn_spl_tokens_added
139- )
140+ special_tokens_dict ["additional_special_tokens" ] = addn_spl_tokens_added
140141
141142 resize_result = tokenizer_and_embedding_resize (
142143 special_tokens_dict = special_tokens_dict ,
@@ -149,7 +150,9 @@ def test_special_tokens_before_and_after():
149150 addn_spl_tokens_before .extend (addn_spl_tokens_added )
150151 expected_addn_special_tokens = addn_spl_tokens_before
151152 expected_embedding_size = input_tokenizer_len + len (addn_spl_tokens_added ) + 2
152- addn_spl_tokens_after = list (tokenizer .extra_special_tokens )
153+ addn_spl_tokens_after = tokenizer .special_tokens_map .get (
154+ "additional_special_tokens"
155+ )
153156
154157 assert "<SEP>" in tokenizer .get_vocab ()
155158 assert "<PAD>" in tokenizer .get_vocab ()
@@ -209,9 +212,7 @@ def test_resize_with_multiple_of():
209212
210213
211214def test_resize_llama_vision_model ():
212- model = AutoModelForImageTextToText .from_pretrained (
213- TINY_LLAMA_VISION_MODEL_NAME
214- ) # AutoModelForVision2Seq was renamed to AutoModelForImageTextToText in transformers v5
215+ model = AutoModelForVision2Seq .from_pretrained (TINY_LLAMA_VISION_MODEL_NAME )
215216 processor = AutoProcessor .from_pretrained (TINY_LLAMA_VISION_MODEL_NAME )
216217 tokenizer = processor .tokenizer
217218
0 commit comments