diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index eb763b45d4..f299d9b418 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -112,6 +112,13 @@ def infer_task( ) if library_name == "transformers": + if not trust_remote_code: + logger.warning( + "This model may require executing custom code from its repository. " + "For security reasons, this is disabled by default. " + "Please review the source and rerun with `--trust-remote-code` if needed." + ) + config = AutoConfig.from_pretrained( model_name_or_path, subfolder=subfolder, diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 60d90f53e0..cd440e9bd0 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -78,7 +78,6 @@ set_simplified_chat_template, ) - logger = logging.getLogger(__name__) if is_torch_available(): @@ -684,6 +683,7 @@ def export_from_model( _variant="default", exporter="openvino", stateful=stateful, + trust_remote_code=trust_remote_code, ) logging.disable(logging.NOTSET) @@ -916,27 +916,29 @@ def _get_multi_modal_submodels_and_export_configs( preprocessors: Optional[List[Any]] = None, model_kwargs: Optional[Dict] = None, stateful: bool = True, + trust_remote_code: bool = False, ): models_for_export = {} stateful_parts = [] - model_type = model.config.model_type + base_model_type = model.config.model_type + export_model_type = getattr(model.config, "export_model_type", None) or base_model_type - if model_type == "internvl_chat" and preprocessors is not None: + if base_model_type == "internvl_chat" and preprocessors is not None: model.config.img_context_token_id = preprocessors[0].convert_tokens_to_ids("") - if model_type == "phi3_v": + if base_model_type == "phi3_v": model.config.glb_GN = model.model.vision_embed_tokens.glb_GN.tolist() model.config.sub_GN = model.model.vision_embed_tokens.sub_GN.tolist() - if model_type == "phi4mm": + if base_model_type == "phi4mm": model.config.glb_GN = model.model.embed_tokens_extend.image_embed.glb_GN.tolist() model.config.sub_GN = model.model.embed_tokens_extend.image_embed.sub_GN.tolist() model.config.num_img_tokens = model.model.embed_tokens_extend.image_embed.num_img_tokens model.config.hd_transform_order = model.model.embed_tokens_extend.image_embed.hd_transform_order if model.config.img_processor is None: model.config.img_processor = model.model.embed_tokens_extend.image_embed.img_processor.config.to_dict() - if model_type == "phi4_multimodal": + if base_model_type == "phi4_multimodal": model.config.glb_GN = model.model.embed_tokens_extend.image_embed.global_img_feature_extensor.tolist() model.config.sub_GN = model.model.embed_tokens_extend.image_embed.sub_img_feature_extensor.tolist() model.config.num_img_tokens = model.model.embed_tokens_extend.image_embed.num_img_tokens @@ -949,8 +951,19 @@ def _get_multi_modal_submodels_and_export_configs( main_config_cls = TasksManager.get_exporter_config_constructor( model=model, task=task, exporter="openvino", library_name=library_name ) + + config_kwargs = { + "int_dtype": int_dtype, + "float_dtype": float_dtype, + "preprocessors": preprocessors, + } + + if export_model_type in {"llava-qwen2", "phi3_v"}: + config_kwargs["trust_remote_code"] = trust_remote_code + main_config = main_config_cls( - model.config, int_dtype=int_dtype, float_dtype=float_dtype, preprocessors=preprocessors + model.config, + **config_kwargs, ) for behavior in main_config.SUPPORTED_BEHAVIORS: model_id = f"{behavior}_model" @@ -976,6 +989,7 @@ def _get_submodels_and_export_configs( model_kwargs: Optional[Dict] = None, exporter: str = "openvino", stateful: bool = False, + trust_remote_code: bool = False, ): if ( not custom_architecture @@ -983,7 +997,15 @@ def _get_submodels_and_export_configs( and model.config.model_type in MULTI_MODAL_TEXT_GENERATION_MODELS ): return _get_multi_modal_submodels_and_export_configs( - model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs, stateful + model, + task, + library_name, + int_dtype, + float_dtype, + preprocessors, + model_kwargs, + stateful, + trust_remote_code=trust_remote_code, ) elif not custom_architecture and library_name == "transformers" and model.config.model_type == "speecht5": return _get_speecht5_tss_model_for_export( diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 3a5d3d08fa..3334e6e556 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -2183,11 +2183,14 @@ def __init__( behavior: VLMConfigBehavior = VLMConfigBehavior.VISION_EMBEDDINGS, preprocessors: Optional[List[Any]] = None, use_past: bool = False, + trust_remote_code: bool = False, + **kwargs, ): self._behavior = behavior self._orig_config = config - if self._behavior == VLMConfigBehavior.VISION_EMBEDDINGS: - config = AutoConfig.from_pretrained(config.mm_vision_tower, trust_remote_code=True) + self._trust_remote_code = trust_remote_code + if self._behavior == VLMConfigBehavior.VISION_EMBEDDINGS and hasattr(config, "mm_vision_tower"): + config = AutoConfig.from_pretrained(config.mm_vision_tower, trust_remote_code=self._trust_remote_code) if hasattr(config, "vision_config"): config = config.vision_config super().__init__( @@ -2256,6 +2259,7 @@ def with_behavior( float_dtype=self.float_dtype, behavior=behavior, preprocessors=self._preprocessors, + trust_remote_code=self._trust_remote_code, ) def patch_model_for_export(self, model: PreTrainedModel, model_kwargs: Optional[Dict[str, Any]] = None): @@ -3066,6 +3070,8 @@ def __init__( float_dtype: str = "fp32", behavior: Phi3VisionConfigBehavior = Phi3VisionConfigBehavior.VISION_EMBEDDINGS, preprocessors: Optional[List[Any]] = None, + trust_remote_code: bool = False, + **kwargs, ): super().__init__( config=config, @@ -3076,9 +3082,10 @@ def __init__( ) self._behavior = behavior self._orig_config = config + self._trust_remote_code = trust_remote_code if self._behavior == Phi3VisionConfigBehavior.VISION_EMBEDDINGS and hasattr(config, "img_processor"): self._config = AutoConfig.from_pretrained( - config.img_processor["model_name"], trust_remote_code=True + config.img_processor["model_name"], trust_remote_code=self._trust_remote_code ).vision_config self._normalized_config = self.NORMALIZED_CONFIG_CLASS(self._config) self.DUMMY_INPUT_GENERATOR_CLASSES = (DummyVisionInputGenerator,) @@ -3127,6 +3134,7 @@ def with_behavior( float_dtype=self.float_dtype, behavior=behavior, preprocessors=self._preprocessors, + trust_remote_code=self._trust_remote_code, ) if behavior == Phi3VisionConfigBehavior.VISION_PROJECTION: return self.__class__( diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py index 9519cea1ec..60e44c8459 100644 --- a/tests/openvino/test_export.py +++ b/tests/openvino/test_export.py @@ -62,7 +62,6 @@ from optimum.utils import logging from optimum.utils.save_utils import maybe_load_preprocessors - logger = logging.get_logger() @@ -125,6 +124,7 @@ class ExportModelTest(unittest.TestCase): SUPPORTED_ARCHITECTURES.update({"qwen3": OVModelForFeatureExtraction}) GENERATIVE_MODELS = ("pix2struct", "t5", "bart", "gpt2", "whisper", "llava", "speecht5") + OV_MULTIMODAL_REMOTE_CODE_MODELS = ("llava-qwen2", "phi3_v") def _openvino_export( self, @@ -334,6 +334,29 @@ def test_compare_openvino_onnx_supported_architectures(self): if len(only_onnx) > 0: logger.warning(f"The following architectures export {only_onnx} is supported by ONNX but not OpenVINO") + @parameterized.expand(OV_MULTIMODAL_REMOTE_CODE_MODELS) + def test_export_requires_trust_remote_code_for_multimodal_models(self, model_type): + + model = MODEL_NAMES[model_type] + task = "image-text-to-text" + with TemporaryDirectory() as tmpdirname: + with self.assertRaises(ValueError) as ctx: + main_export( + model_name_or_path=model, + task=task, + output=Path(tmpdirname), + trust_remote_code=False, + ) + + self.assertIn("trust_remote_code", str(ctx.exception)) + + main_export( + model_name_or_path=model, + task=task, + output=Path(tmpdirname), + trust_remote_code=True, + ) + class CustomExportModelTest(unittest.TestCase): def test_custom_export_config_model(self):