Skip to content

Commit 0862847

Browse files
authored
fix: fix GoogleGenAIMultimodalDocumentEmbedder input format (#3136)
1 parent 2d259b9 commit 0862847

2 files changed

Lines changed: 17 additions & 15 deletions

File tree

integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/document_embedder.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from google.genai import types
99
from haystack import Document, component, default_from_dict, default_to_dict, logging
1010
from haystack.utils import Secret, deserialize_secrets_inplace
11-
from more_itertools import batched
1211
from tqdm import tqdm
1312

1413
from haystack_integrations.components.common.google_genai.utils import _get_client
@@ -204,10 +203,11 @@ def _embed_batch(
204203

205204
all_embeddings = []
206205
meta: dict[str, Any] = {}
207-
for batch in tqdm(
208-
batched(texts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
206+
for i in tqdm(
207+
range(0, len(texts_to_embed), batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
209208
):
210-
args: dict[str, Any] = {"model": self._model, "contents": list(batch)}
209+
batch = texts_to_embed[i : i + batch_size]
210+
args: dict[str, Any] = {"model": self._model, "contents": batch}
211211
if resolved_config:
212212
args["config"] = resolved_config
213213

@@ -235,10 +235,11 @@ async def _embed_batch_async(
235235

236236
all_embeddings = []
237237
meta: dict[str, Any] = {}
238-
for batch in tqdm(
239-
batched(texts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
238+
for i in tqdm(
239+
range(0, len(texts_to_embed), batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
240240
):
241-
args: dict[str, Any] = {"model": self._model, "contents": list(batch)}
241+
batch = texts_to_embed[i : i + batch_size]
242+
args: dict[str, Any] = {"model": self._model, "contents": batch}
242243
if self._config:
243244
args["config"] = types.EmbedContentConfig(**self._config) if self._config else None
244245

integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/multimodal_document_embedder.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pathlib import Path
99
from typing import Any, Literal
1010

11-
from google.genai.types import EmbedContentConfig, Part
11+
from google.genai.types import Content, EmbedContentConfig, Part
1212
from haystack import Document, component, logging
1313
from haystack.components.converters.image.image_utils import (
1414
_batch_convert_pdf_pages_to_images,
@@ -17,7 +17,6 @@
1717
)
1818
from haystack.dataclasses import ByteStream
1919
from haystack.utils.auth import Secret
20-
from more_itertools import batched
2120
from tqdm import tqdm
2221
from tqdm.asyncio import tqdm as async_tqdm
2322
from typing_extensions import NotRequired, TypedDict
@@ -323,10 +322,11 @@ def _embed_batch(
323322

324323
all_embeddings: list[list[float] | None] = []
325324
meta: dict[str, Any] = {}
326-
for batch in tqdm(
327-
batched(parts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
325+
for i in tqdm(
326+
range(0, len(parts_to_embed), batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
328327
):
329-
args: dict[str, Any] = {"model": self._model, "contents": batch}
328+
batch = parts_to_embed[i : i + batch_size]
329+
args: dict[str, Any] = {"model": self._model, "contents": [Content(parts=[p]) for p in batch]}
330330
if resolved_config:
331331
args["config"] = resolved_config
332332

@@ -365,10 +365,11 @@ async def _embed_batch_async(
365365

366366
all_embeddings: list[list[float] | None] = []
367367
meta: dict[str, Any] = {}
368-
async for batch in async_tqdm(
369-
batched(parts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
368+
async for i in async_tqdm(
369+
range(0, len(parts_to_embed), batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
370370
):
371-
args: dict[str, Any] = {"model": self._model, "contents": batch}
371+
batch = parts_to_embed[i : i + batch_size]
372+
args: dict[str, Any] = {"model": self._model, "contents": [Content(parts=[p]) for p in batch]}
372373
if resolved_config:
373374
args["config"] = resolved_config
374375

0 commit comments

Comments
 (0)