Skip to content

Commit f8e5f8a

Browse files
committed
refactor: improve code formatting for consistency and readability in document embedder and tests
1 parent 38525c0 commit f8e5f8a

3 files changed

Lines changed: 19 additions & 31 deletions

File tree

integrations/google_genai/src/haystack_integrations/components/embedders/google_genai/document_embedder.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ def __init__(
8585
self._meta_fields_to_embed = meta_fields_to_embed or []
8686
self._embedding_separator = embedding_separator
8787
self._client = genai.Client(api_key=api_key.resolve_value())
88-
self._config = config if config is not None else {
89-
"task_type": "SEMANTIC_SIMILARITY"}
88+
self._config = config if config is not None else {"task_type": "SEMANTIC_SIMILARITY"}
9089

9190
def to_dict(self) -> Dict[str, Any]:
9291
"""
@@ -134,8 +133,7 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> Dict[str, str]:
134133
]
135134

136135
text_to_embed = (
137-
self._prefix + self._embedding_separator.join(
138-
[*meta_values_to_embed, doc.content or ""]) + self._suffix
136+
self._prefix + self._embedding_separator.join([*meta_values_to_embed, doc.content or ""]) + self._suffix
139137
)
140138
texts_to_embed.append(text_to_embed)
141139

@@ -151,11 +149,9 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List
151149
for batch in tqdm(
152150
batched(texts_to_embed, batch_size), disable=not self._progress_bar, desc="Calculating embeddings"
153151
):
154-
args: Dict[str, Any] = {"model": self._model,
155-
"contents": [b[1] for b in batch]}
152+
args: Dict[str, Any] = {"model": self._model, "contents": [b[1] for b in batch]}
156153
if self._config:
157-
args["config"] = types.EmbedContentConfig(
158-
**self._config) if self._config else None
154+
args["config"] = types.EmbedContentConfig(**self._config) if self._config else None
159155

160156
response = self._client.models.embed_content(**args)
161157

@@ -189,8 +185,7 @@ def run(self, documents: List[Document]) -> Dict[str, Union[List[Document], Dict
189185

190186
texts_to_embed = self._prepare_texts_to_embed(documents=documents)
191187

192-
embeddings, meta = self._embed_batch(
193-
texts_to_embed=texts_to_embed, batch_size=self._batch_size)
188+
embeddings, meta = self._embed_batch(texts_to_embed=texts_to_embed, batch_size=self._batch_size)
194189

195190
for doc, emb in zip(documents, embeddings):
196191
doc.embedding = emb

integrations/google_genai/tests/test_document_embedder.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,7 @@ def test_to_dict(self, monkeypatch):
9292
data = component.to_dict()
9393
assert data == {
9494
"type": (
95-
"haystack_integrations.components.embedders"
96-
".google_genai.document_embedder.GoogleGenAIDocumentEmbedder"
95+
"haystack_integrations.components.embedders.google_genai.document_embedder.GoogleGenAIDocumentEmbedder"
9796
),
9897
"init_parameters": {
9998
"model": "text-embedding-004",
@@ -124,8 +123,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch):
124123
data = component.to_dict()
125124
assert data == {
126125
"type": (
127-
"haystack_integrations.components.embedders"
128-
".google_genai.document_embedder.GoogleGenAIDocumentEmbedder"
126+
"haystack_integrations.components.embedders.google_genai.document_embedder.GoogleGenAIDocumentEmbedder"
129127
),
130128
"init_parameters": {
131129
"model": "model",
@@ -142,8 +140,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch):
142140

143141
def test_prepare_texts_to_embed_w_metadata(self):
144142
documents = [
145-
Document(id=f"{i}", content=f"document number {i}:\ncontent", meta={
146-
"meta_field": f"meta_value {i}"})
143+
Document(id=f"{i}", content=f"document number {i}:\ncontent", meta={"meta_field": f"meta_value {i}"})
147144
for i in range(5)
148145
]
149146

@@ -157,12 +154,11 @@ def test_prepare_texts_to_embed_w_metadata(self):
157154
"meta_value 1 | document number 1:\ncontent",
158155
"meta_value 2 | document number 2:\ncontent",
159156
"meta_value 3 | document number 3:\ncontent",
160-
"meta_value 4 | document number 4:\ncontent"
157+
"meta_value 4 | document number 4:\ncontent",
161158
]
162159

163160
def test_run_wrong_input_format(self):
164-
embedder = GoogleGenAIDocumentEmbedder(
165-
api_key=Secret.from_token("fake-api-key"))
161+
embedder = GoogleGenAIDocumentEmbedder(api_key=Secret.from_token("fake-api-key"))
166162

167163
# wrong formats
168164
string_input = "text"
@@ -175,8 +171,7 @@ def test_run_wrong_input_format(self):
175171
embedder.run(documents=list_integers_input)
176172

177173
def test_run_on_empty_list(self):
178-
embedder = GoogleGenAIDocumentEmbedder(
179-
api_key=Secret.from_token("fake-api-key"))
174+
embedder = GoogleGenAIDocumentEmbedder(api_key=Secret.from_token("fake-api-key"))
180175

181176
empty_list_input = []
182177
result = embedder.run(documents=empty_list_input)
@@ -192,14 +187,12 @@ def test_run_on_empty_list(self):
192187
def test_run(self):
193188
docs = [
194189
Document(content="I love cheese", meta={"topic": "Cuisine"}),
195-
Document(content="A transformer is a deep learning architecture", meta={
196-
"topic": "ML"}),
190+
Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
197191
]
198192

199193
model = "text-embedding-004"
200194

201-
embedder = GoogleGenAIDocumentEmbedder(model=model, meta_fields_to_embed=[
202-
"topic"], embedding_separator=" | ")
195+
embedder = GoogleGenAIDocumentEmbedder(model=model, meta_fields_to_embed=["topic"], embedding_separator=" | ")
203196

204197
result = embedder.run(documents=docs)
205198
documents_with_embeddings = result["documents"]
@@ -211,6 +204,6 @@ def test_run(self):
211204
assert len(doc.embedding) == 768
212205
assert all(isinstance(x, float) for x in doc.embedding)
213206

214-
assert (
215-
"text" in result["meta"]["model"] and "004" in result["meta"]["model"]
216-
), "The model name does not contain 'text' and '004'"
207+
assert "text" in result["meta"]["model"] and "004" in result["meta"]["model"], (
208+
"The model name does not contain 'text' and '004'"
209+
)

integrations/google_genai/tests/test_text_embedder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,6 @@ def test_run(self):
160160
assert len(result["embedding"]) == 768
161161
assert all(isinstance(x, float) for x in result["embedding"])
162162

163-
assert (
164-
"text" in result["meta"]["model"] and "004" in result["meta"]["model"]
165-
), "The model name does not contain 'text' and '004'"
163+
assert "text" in result["meta"]["model"] and "004" in result["meta"]["model"], (
164+
"The model name does not contain 'text' and '004'"
165+
)

0 commit comments

Comments
 (0)