|
1 | 1 | # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai> |
2 | 2 | # |
3 | 3 | # SPDX-License-Identifier: Apache-2.0 |
4 | | -from typing import Any, Optional |
| 4 | +from typing import Any |
5 | 5 |
|
6 | 6 | import requests |
7 | 7 | from haystack import Document, component, default_from_dict, default_to_dict |
@@ -43,11 +43,11 @@ def __init__( |
43 | 43 | suffix: str = "", |
44 | 44 | batch_size: int = 32, |
45 | 45 | progress_bar: bool = True, |
46 | | - meta_fields_to_embed: Optional[list[str]] = None, |
| 46 | + meta_fields_to_embed: list[str] | None = None, |
47 | 47 | embedding_separator: str = "\n", |
48 | | - task: Optional[str] = None, |
49 | | - dimensions: Optional[int] = None, |
50 | | - late_chunking: Optional[bool] = None, |
| 48 | + task: str | None = None, |
| 49 | + dimensions: int | None = None, |
| 50 | + late_chunking: bool | None = None, |
51 | 51 | ): |
52 | 52 | """ |
53 | 53 | Create a JinaDocumentEmbedder component. |
@@ -156,7 +156,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]: |
156 | 156 | return texts_to_embed |
157 | 157 |
|
158 | 158 | def _embed_batch( |
159 | | - self, texts_to_embed: list[str], batch_size: int, parameters: Optional[dict] = None |
| 159 | + self, texts_to_embed: list[str], batch_size: int, parameters: dict | None = None |
160 | 160 | ) -> tuple[list[list[float]], dict[str, Any]]: |
161 | 161 | """ |
162 | 162 | Embed a list of texts in batches. |
@@ -219,7 +219,7 @@ def run(self, documents: list[Document]) -> dict[str, Any]: |
219 | 219 | texts_to_embed=texts_to_embed, batch_size=self.batch_size, parameters=parameters |
220 | 220 | ) |
221 | 221 |
|
222 | | - for doc, emb in zip(documents, embeddings): |
| 222 | + for doc, emb in zip(documents, embeddings, strict=True): |
223 | 223 | doc.embedding = emb |
224 | 224 |
|
225 | 225 | return {"documents": documents, "meta": metadata} |
0 commit comments