diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 4e43269aac..b841d52a6b 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -29,7 +29,7 @@ from transformers import AutoConfig, PretrainedConfig from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME, http_user_agent -from ..utils.import_utils import is_diffusers_available, is_torch_available +from ..utils.import_utils import is_diffusers_available, is_torch_available, is_transformers_version from ..utils.logging import get_logger @@ -152,8 +152,10 @@ class TasksManager: "AutoModelForUniversalSegmentation", ), "image-to-image": "AutoModelForImageToImage", - # TODO: AutoModelForVision2Seq is deprecated and will be removed in Transformers v5 - "image-to-text": ("AutoModelForVision2Seq", "AutoModel"), + "image-to-text": ( + "AutoModelForVision2Seq" if is_transformers_version("<", "4.54") else "AutoModelForImageTextToText", + "AutoModel", + ), "image-text-to-text": "AutoModelForImageTextToText", "mask-generation": "AutoModel", "masked-im": "AutoModelForMaskedImageModeling", diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 9151462af3..b006096a82 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -17,12 +17,14 @@ from typing import Any, Dict import numpy as np +import pytest from huggingface_hub.constants import HF_HUB_CACHE from PIL import Image from transformers import AutoTokenizer from transformers.pipelines import Pipeline from optimum.pipelines import pipeline as optimum_pipeline +from optimum.utils import is_transformers_version from optimum.utils.testing_utils import remove_directory @@ -192,6 +194,10 @@ def test_image_segmentation_pipeline(self): self.assertIn("score", result[0]) self.assertIn("mask", result[0]) + @pytest.mark.skipif( + is_transformers_version(">=", "5"), + reason="requires transformers < v5 since image-to-text pipelines is deprecated", + ) def test_image_to_text_pipeline(self): """Test image to text ORT pipeline""" pipe = optimum_pipeline(task="image-to-text", accelerator="ort")