|
8 | 8 | from pathlib import Path |
9 | 9 | from typing import Any, Literal |
10 | 10 |
|
11 | | -from google.genai.types import EmbedContentConfig, Part |
| 11 | +from google.genai.types import Content, EmbedContentConfig, Part |
12 | 12 | from haystack import Document, component, logging |
13 | 13 | from haystack.components.converters.image.image_utils import ( |
14 | 14 | _batch_convert_pdf_pages_to_images, |
|
17 | 17 | ) |
18 | 18 | from haystack.dataclasses import ByteStream |
19 | 19 | from haystack.utils.auth import Secret |
20 | | -from more_itertools import batched |
21 | 20 | from tqdm import tqdm |
22 | 21 | from tqdm.asyncio import tqdm as async_tqdm |
23 | 22 | from typing_extensions import NotRequired, TypedDict |
@@ -323,10 +322,11 @@ def _embed_batch( |
323 | 322 |
|
324 | 323 | all_embeddings: list[list[float] | None] = [] |
325 | 324 | meta: dict[str, Any] = {} |
326 | | - for batch in tqdm( |
327 | | - batched(parts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings" |
| 325 | + for i in tqdm( |
| 326 | + range(0, len(parts_to_embed), batch_size), disable=not self._progress_bar, desc="Calculating embeddings" |
328 | 327 | ): |
329 | | - args: dict[str, Any] = {"model": self._model, "contents": batch} |
| 328 | + batch = parts_to_embed[i : i + batch_size] |
| 329 | + args: dict[str, Any] = {"model": self._model, "contents": [Content(parts=[p]) for p in batch]} |
330 | 330 | if resolved_config: |
331 | 331 | args["config"] = resolved_config |
332 | 332 |
|
@@ -365,10 +365,11 @@ async def _embed_batch_async( |
365 | 365 |
|
366 | 366 | all_embeddings: list[list[float] | None] = [] |
367 | 367 | meta: dict[str, Any] = {} |
368 | | - async for batch in async_tqdm( |
369 | | - batched(parts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings" |
| 368 | + async for i in async_tqdm( |
| 369 | + range(0, len(parts_to_embed), batch_size), disable=not self._progress_bar, desc="Calculating embeddings" |
370 | 370 | ): |
371 | | - args: dict[str, Any] = {"model": self._model, "contents": batch} |
| 371 | + batch = parts_to_embed[i : i + batch_size] |
| 372 | + args: dict[str, Any] = {"model": self._model, "contents": [Content(parts=[p]) for p in batch]} |
372 | 373 | if resolved_config: |
373 | 374 | args["config"] = resolved_config |
374 | 375 |
|
|
0 commit comments