@@ -31,7 +31,7 @@ class VLLMDocumentEmbedder:
3131 Before using this component, start a vLLM server with an embedding model:
3232
3333 ```bash
34- vllm serve intfloat/e5-mistral-7b-instruct
34+ vllm serve google/embeddinggemma-300m
3535 ```
3636
3737 For details on server options, see the [vLLM CLI docs](https://docs.vllm.ai/en/stable/cli/serve/).
@@ -44,7 +44,7 @@ class VLLMDocumentEmbedder:
4444
4545 doc = Document(content="I love pizza!")
4646
47- document_embedder = VLLMDocumentEmbedder(model="intfloat/e5-mistral-7b-instruct ")
47+ document_embedder = VLLMDocumentEmbedder(model="google/embeddinggemma-300m ")
4848
4949 result = document_embedder.run([doc])
5050 print(result["documents"][0].embedding)
@@ -57,8 +57,8 @@ class VLLMDocumentEmbedder:
5757
5858 ```python
5959 document_embedder = VLLMDocumentEmbedder(
60- model="jinaai/jina-embeddings-v3 ",
61- extra_parameters={"dimensions ": 32 , "truncate_prompt_tokens ": 256 },
60+ model="google/embeddinggemma-300m ",
61+ extra_parameters={"truncate_prompt_tokens ": 256 , "truncation_side ": "right" },
6262 )
6363 ```
6464 """
@@ -71,6 +71,7 @@ def __init__(
7171 api_base_url : str = "http://localhost:8000/v1" ,
7272 prefix : str = "" ,
7373 suffix : str = "" ,
74+ dimensions : int | None = None ,
7475 batch_size : int = 32 ,
7576 progress_bar : bool = True ,
7677 meta_fields_to_embed : list [str ] | None = None ,
@@ -84,16 +85,21 @@ def __init__(
8485 """
8586 Creates an instance of VLLMDocumentEmbedder.
8687
87- :param model: The name of the model served by vLLM (e.g., "intfloat/e5-mistral-7b-instruct").
88+ :param model: The name of the model served by vLLM. Check
89+ [vLLM's documentation](https://docs.vllm.ai/en/stable/models/pooling_models) for more information.
8890 :param api_key: The vLLM API key. Defaults to the `VLLM_API_KEY` environment variable.
8991 Only required if the vLLM server was started with `--api-key`.
9092 :param api_base_url: The base URL of the vLLM server.
9193 :param prefix: A string to add at the beginning of each text.
9294 :param suffix: A string to add at the end of each text.
93- :param batch_size: Number of Documents to encode at once.
94- :param progress_bar: Whether to show a progress bar. Disable in production to keep logs clean.
95- :param meta_fields_to_embed: List of meta fields to embed along with the Document text.
96- :param embedding_separator: Separator used to concatenate the meta fields to the Document text.
95+ :param dimensions: The number of dimensions of the resulting embedding. Only models trained with
96+ Matryoshka Representation Learning support this parameter. See
97+ [vLLMs documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
98+ for more information.
99+ :param batch_size: Number of documents to encode at once.
100+ :param progress_bar: Whether to show a progress bar.
101+ :param meta_fields_to_embed: List of meta fields to embed along with the document text.
102+ :param embedding_separator: Separator used to concatenate the meta fields to the document text.
97103 :param timeout: Timeout in seconds for vLLM client calls. If not set, the OpenAI client default applies.
98104 :param max_retries: Maximum number of retries for failed requests. If not set, the OpenAI client
99105 default applies.
@@ -104,15 +110,15 @@ def __init__(
104110 the component logs the error and continues processing the remaining documents.
105111 :param extra_parameters: Additional parameters forwarded as `extra_body` to the vLLM embeddings
106112 endpoint. Use this to pass parameters not part of the standard OpenAI Embeddings API, such as
107- `dimensions` (for Matryoshka models), `truncate_prompt_tokens`, `truncation_side`,
108- `additional_data`, `use_activation`, etc. See the
109- [vLLM Embeddings API docs](https://docs.vllm.ai/en/stable/models/pooling_models.html#openai-compatible-embeddings-api).
113+ `truncate_prompt_tokens`, `truncation_side`, etc. See the
114+ [vLLM Embeddings API docs](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#openai-compatible-embeddings-api).
110115 """
111116 self .model = model
112117 self .api_key = api_key
113118 self .api_base_url = api_base_url
114119 self .prefix = prefix
115120 self .suffix = suffix
121+ self .dimensions = dimensions
116122 self .batch_size = batch_size
117123 self .progress_bar = progress_bar
118124 self .meta_fields_to_embed = meta_fields_to_embed or []
@@ -149,10 +155,11 @@ def to_dict(self) -> dict[str, Any]:
149155 return default_to_dict (
150156 self ,
151157 model = self .model ,
152- api_key = self .api_key . to_dict () if self . api_key else None ,
158+ api_key = self .api_key ,
153159 api_base_url = self .api_base_url ,
154160 prefix = self .prefix ,
155161 suffix = self .suffix ,
162+ dimensions = self .dimensions ,
156163 batch_size = self .batch_size ,
157164 progress_bar = self .progress_bar ,
158165 meta_fields_to_embed = self .meta_fields_to_embed ,
@@ -183,6 +190,8 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> dict[str, str]:
183190
184191 def _prepare_input (self , inputs : list [str ]) -> dict [str , Any ]:
185192 kwargs : dict [str , Any ] = {"model" : self .model , "input" : inputs , "encoding_format" : "float" }
193+ if self .dimensions is not None :
194+ kwargs ["dimensions" ] = self .dimensions
186195 if self .extra_parameters :
187196 kwargs ["extra_body" ] = self .extra_parameters
188197 return kwargs
0 commit comments