accommodate renaming of AutoModelForVision2Seq to AutoModelForImageTextToText

yash4242 · yash4242 · commit 3be4589d6dfc · 2026-02-24T10:38:59.000Z
diff --git a/tests/utils/test_embedding_resize.py b/tests/utils/test_embedding_resize.py
@@ -21,7 +21,7 @@
 # Third Party
 from transformers import (
     AutoModelForCausalLM,
-    AutoModelForVision2Seq,
+    AutoModelForImageTextToText, #AutoModelForVision2Seq was renamed to this in transformers v5
     AutoProcessor,
     AutoTokenizer,
 )
@@ -212,7 +212,8 @@ def test_resize_with_multiple_of():
 
 
 def test_resize_llama_vision_model():
-    model = AutoModelForVision2Seq.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
+    # model = AutoModelForVision2Seq.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
+    model = AutoModelForImageTextToText.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
     processor = AutoProcessor.from_pretrained(TINY_LLAMA_VISION_MODEL_NAME)
     tokenizer = processor.tokenizer
 
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -30,7 +30,7 @@
 from torch.cuda import OutOfMemoryError
 from transformers import (
     AutoModelForCausalLM,
-    AutoModelForVision2Seq,
+    AutoModelForImageTextToText, # AutoModelForVision2Seq was renamed in transformers v5
     AutoProcessor,
     AutoTokenizer,
     TrainerCallback,
@@ -292,7 +292,10 @@ def train(
                 )
             )
             # try to load model as a vision model
-            model = AutoModelForVision2Seq.from_pretrained(
+            # model = AutoModelForVision2Seq.from_pretrained(
+            #     model_args.model_name_or_path, **model_kwargs
+            # )
+            model = AutoModelForImageTextToText.from_pretrained(
                 model_args.model_name_or_path, **model_kwargs
             )
             try: