Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/optimum/LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright 2024 deepset GmbH

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import copy
import json
from dataclasses import dataclass
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from dataclasses import dataclass
from enum import Enum
from typing import Any
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from dataclasses import replace
from typing import Any, Optional, Union

from haystack import Document, component, default_from_dict, default_to_dict
Expand Down Expand Up @@ -52,7 +57,7 @@ def __init__(
progress_bar: bool = True,
meta_fields_to_embed: Optional[list[str]] = None,
embedding_separator: str = "\n",
):
) -> None:
"""
Create a OptimumDocumentEmbedder component.

Expand Down Expand Up @@ -136,7 +141,7 @@ def __init__(
self._backend = _EmbedderBackend(params)
self._initialized = False

def warm_up(self):
def warm_up(self) -> None:
"""
Initializes the component.
"""
Expand Down Expand Up @@ -200,14 +205,12 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
A list of Documents to embed.
:returns:
The updated Documents with their embeddings.
:raises RuntimeError:
If the component was not initialized.
:raises TypeError:
If the input is not a list of Documents.
"""
if not self._initialized:
msg = "The embedding model has not been loaded. Please call warm_up() before running."
raise RuntimeError(msg)
self.warm_up()

if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
msg = (
"OptimumDocumentEmbedder expects a list of Documents as input."
Expand All @@ -221,7 +224,9 @@ def run(self, documents: list[Document]) -> dict[str, list[Document]]:

texts_to_embed = self._prepare_texts_to_embed(documents=documents)
embeddings = self._backend.embed_texts(texts_to_embed)

new_documents = []
for doc, emb in zip(documents, embeddings):
doc.embedding = emb
new_documents.append(replace(doc, embedding=emb))

return {"documents": documents}
return {"documents": new_documents}
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any, Optional, Union

from haystack import component, default_from_dict, default_to_dict
Expand Down Expand Up @@ -162,14 +166,11 @@ def run(self, text: str) -> dict[str, list[float]]:
The text to embed.
:returns:
The embeddings of the text.
:raises RuntimeError:
If the component was not initialized.
:raises TypeError:
If the input is not a string.
"""
if not self._initialized:
msg = "The embedding model has not been loaded. Please call warm_up() before running."
raise RuntimeError(msg)
self.warm_up()

if not isinstance(text, str):
msg = (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from enum import Enum


Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from dataclasses import dataclass
from enum import Enum
from typing import Any
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import copy
import tempfile
from unittest.mock import MagicMock, patch
Expand Down Expand Up @@ -371,7 +375,6 @@ def test_run(self, opt_config, quant_config):
optimizer_settings=opt_config,
quantizer_settings=quant_config,
)
embedder.warm_up()

result = embedder.run(documents=docs)
_ = [embedder.run([d]) for d in docs_copy]
Expand Down
5 changes: 4 additions & 1 deletion integrations/optimum/tests/test_optimum_text_embedder.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from unittest.mock import MagicMock, patch

import pytest
Expand Down Expand Up @@ -252,7 +256,6 @@ def test_run(self):
suffix=" suffix",
pooling_mode=pooling_mode,
)
embedder.warm_up()

result = embedder.run(text="The food was delicious")

Expand Down