Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/openvino/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Here is the list of the supported architectures :
- Qwen2.5-VL
- Qwen3
- Qwen3MoE
- Qwen3-VL
- Qwen3-VL (including Qwen3-VL-Embedding)
- Qwen3.5
- Qwen3.5-MoE
- Qwen3.6
Expand Down
1 change: 0 additions & 1 deletion optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ def export_pytorch(

if input_shapes is None:
input_shapes = {} # will use the defaults from DEFAULT_DUMMY_SHAPES

# Check that inputs match, and order them properly
dummy_inputs = config.generate_dummy_inputs(framework="pt", **input_shapes)
device = torch.device(device)
Expand Down
4 changes: 3 additions & 1 deletion optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5746,7 +5746,9 @@ class SiglipOpenVINOConfig(SiglipOnnxConfig):
"transformer", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers"
)
class SentenceTransformersTransformerOpenVINOConfig(SentenceTransformersTransformerOnnxConfig):
pass
def __init__(self, config, **kwargs):
super().__init__(config, **kwargs)
config.vocab_size = config.get_text_config().vocab_size


@register_in_tasks_manager("rembert", *COMMON_TEXT_TASKS)
Expand Down
190 changes: 187 additions & 3 deletions optimum/intel/openvino/modeling_sentence_transformers.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import json
import os
from pathlib import Path
from types import MethodType
from typing import Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
import torch
from huggingface_hub import hf_hub_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from huggingface_hub.utils import EntryNotFoundError
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, PretrainedConfig
from transformers import AutoProcessor, AutoTokenizer, PretrainedConfig
from transformers.file_utils import add_start_docstrings

from optimum.intel.utils.import_utils import is_sentence_transformers_version

from .configuration import OVQuantizationConfigBase
from .modeling import MODEL_START_DOCSTRING, OVModel

Expand All @@ -27,7 +33,17 @@ def __init__(self, model=None, config=None, tokenizer=None, **kwargs):
super().__init__(model, config, **kwargs)

self.encode = MethodType(SentenceTransformer.encode, self)
self._text_length = MethodType(SentenceTransformer._text_length, self)
if is_sentence_transformers_version(">=", "5.4.0"):
self.supports = MethodType(SentenceTransformer.supports, self)
self._input_length = SentenceTransformer._input_length
self._resolve_prompt = MethodType(SentenceTransformer._resolve_prompt, self)
self.is_singular_input = MethodType(SentenceTransformer.is_singular_input, self)
self.modalities = ["text", "image", "video", "message"]
self.default_prompt_name = kwargs.get("default_prompt_name", None)
self.prompts = kwargs.get("prompts", {}) or {}
self.processor = kwargs.get("processor", None)
else:
self._text_length = MethodType(SentenceTransformer._text_length, self)
self.default_prompt_name = None
self.truncate_dim = None
self.tokenizer = tokenizer
Expand All @@ -36,6 +52,9 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
super()._save_pretrained(save_directory)
self.tokenizer.save_pretrained(save_directory)

def _can_flatten_inputs(self):
return False

def forward(self, inputs: Dict[str, torch.Tensor]):
self.compile()
input_ids = inputs.get("input_ids")
Expand Down Expand Up @@ -94,6 +113,50 @@ def _from_pretrained(

tokenizer = AutoTokenizer.from_pretrained(model_id, **tokenizer_args)

if is_sentence_transformers_version(">=", "5.4.0"):
processor = None
try:
processor = AutoProcessor.from_pretrained(model_id, **tokenizer_args)
except (OSError, ValueError, KeyError, EnvironmentError):
processor = None

# Load sentence-transformers prompts/default_prompt_name from config_sentence_transformers.json,
# so that SentenceTransformer.encode's prompt resolution behaves the same as the reference model.
st_prompts: Dict[str, str] = {}
st_default_prompt_name: Optional[str] = None
st_config_path: Optional[str] = None
try:
if os.path.isdir(model_id):
candidate = (
os.path.join(model_id, subfolder, "config_sentence_transformers.json")
if subfolder
else os.path.join(model_id, "config_sentence_transformers.json")
)
if os.path.isfile(candidate):
st_config_path = candidate
else:
st_config_path = hf_hub_download(
repo_id=str(model_id),
filename="config_sentence_transformers.json",
subfolder=subfolder or None,
revision=revision,
cache_dir=cache_dir,
token=token,
local_files_only=local_files_only,
force_download=force_download,
)
except (EntryNotFoundError, OSError, ValueError):
st_config_path = None

if st_config_path is not None:
try:
with open(st_config_path, "r", encoding="utf-8") as f:
st_cfg = json.load(f)
st_prompts = st_cfg.get("prompts", {}) or {}
st_default_prompt_name = st_cfg.get("default_prompt_name", None)
except (OSError, json.JSONDecodeError):
pass

model = super()._from_pretrained(
model_id=model_id,
config=config,
Expand All @@ -111,6 +174,11 @@ def _from_pretrained(
**kwargs,
)

if is_sentence_transformers_version(">=", "5.4.0"):
model.prompts = st_prompts
model.default_prompt_name = st_default_prompt_name
model.processor = processor

return model

def tokenize(
Expand Down Expand Up @@ -160,3 +228,119 @@ def _preprocess_quantization_config(
quantization_config = quantization_config.clone()
quantization_config.tokenizer = model_name_or_path
return quantization_config

def preprocess(
self,
inputs,
prompt,
**kwargs,
):
"""
Preprocesses the inputs for the model.

Mirrors :meth:`sentence_transformers.base.modules.transformer.Transformer.preprocess`
for the text/message modalities so that tokenization matches the reference
SentenceTransformer model when a chat template is used (e.g. Qwen3-VL-Embedding).
"""
from sentence_transformers.base.modality import format_modality, infer_batch_modality

if not inputs:
return {}

# Infer modality (used both for validation and to decide preprocessing path).
modality = None
try:
modality = infer_batch_modality(inputs, supported_modalities=self.modalities)
except (ValueError, TypeError):
pass

if modality is not None and not self.supports(modality):
supported = ", ".join(format_modality(m) for m in self.modalities)
message = (
f"Modality '{format_modality(modality)}' is not supported by this {type(self).__name__} model. "
f"Supported modalities: {supported}"
)
if isinstance(modality, tuple) and all(part in self.modalities for part in modality):
message += (
f"\nThis model supports {' and '.join(modality)} individually, "
"but not in the same input. Please process each modality separately."
)
raise ValueError(message)

# If the model has a chat template, route inputs through apply_chat_template so the output
# matches the reference SentenceTransformer (which uses the processor when available).
tokenizer = self.tokenizer
processor = getattr(self, "processor", None)
chat_template_owner = None
if processor is not None and getattr(processor, "chat_template", None) is not None:
chat_template_owner = processor
elif tokenizer is not None and getattr(tokenizer, "chat_template", None) is not None:
chat_template_owner = tokenizer

if chat_template_owner is not None and "message" in self.modalities:
messages_batch = self._build_messages_batch(inputs, modality, prompt)
preprocessed = chat_template_owner.apply_chat_template(
messages_batch,
tokenize=True,
return_dict=True,
add_generation_prompt=True,
padding=True,
truncation="longest_first",
return_tensors="pt",
)
preprocessed = dict(preprocessed)
preprocessed["modality"] = "message"
else:
# Fallback: plain tokenization (e.g. for text-only models without a chat template).
if prompt and modality == "text":
inputs = [
(prompt + inp[0],) + tuple(inp[1:]) if isinstance(inp, tuple) else prompt + inp for inp in inputs
]
preprocessed = self.tokenize(inputs, **kwargs)
preprocessed["modality"] = modality

print("inputs_ids {}".format(preprocessed["input_ids"]))
return preprocessed

@staticmethod
def _build_messages_batch(
inputs: List[Any],
modality: Any,
prompt: Optional[str],
) -> List[List[Dict[str, Any]]]:
"""Convert SentenceTransformer-style inputs into a list of chat-template message lists.

Each text input becomes a ``user`` message with structured content; if ``prompt`` is
provided it is prepended as a ``system`` message (matching
``InputFormatter.prepend_prompt_to_messages``).
"""

def _content_for_item(item: Any) -> List[Dict[str, Any]]:
if isinstance(item, str):
return [{"type": "text", "text": item}]
if isinstance(item, dict):
content: List[Dict[str, Any]] = []
for key, value in item.items():
if key == "text":
content.append({"type": "text", "text": value})
elif key in ("image", "image_url"):
content.append({"type": "image", "image": value})
elif key == "video":
content.append({"type": "video", "video": value})
else:
content.append({"type": key, key: value})
return content
# Tuples/lists (e.g. text pairs) - flatten into separate text parts.
if isinstance(item, (tuple, list)):
return [{"type": "text", "text": str(v)} for v in item]
return [{"type": "text", "text": str(item)}]

messages_batch: List[List[Dict[str, Any]]] = []
for inp in inputs:
user_message = {"role": "user", "content": _content_for_item(inp)}
sample_messages: List[Dict[str, Any]] = []
if prompt:
sample_messages.append({"role": "system", "content": [{"type": "text", "text": prompt}]})
sample_messages.append(user_message)
messages_batch.append(sample_messages)
return messages_batch
Loading