diff --git a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py index 6406b2e4d8..4655c75d10 100644 --- a/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py +++ b/integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py @@ -326,7 +326,8 @@ def count_documents(self) -> int: total = self.collection.aggregate.over_all(total_count=True).total_count return total if total else 0 - def _to_data_object(self, document: Document) -> dict[str, Any]: + @staticmethod + def _to_data_object(document: Document) -> dict[str, Any]: """ Converts a Document to a Weaviate data object ready to be saved. """ @@ -365,7 +366,8 @@ def _to_data_object(self, document: Document) -> dict[str, Any]: return data - def _to_document(self, data: DataObject[dict[str, Any], None]) -> Document: + @staticmethod + def _to_document(data: DataObject[dict[str, Any], None]) -> Document: """ Converts a data object read from Weaviate into a Document. """ @@ -460,7 +462,7 @@ def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Doc result = self._query_with_filters(filters) else: result = self._query() - return [self._to_document(doc) for doc in result] + return [WeaviateDocumentStore._to_document(doc) for doc in result] def _batch_write(self, documents: list[Document]) -> int: """ @@ -476,7 +478,7 @@ def _batch_write(self, documents: list[Document]) -> int: raise ValueError(msg) batch.add_object( - properties=self._to_data_object(doc), + properties=WeaviateDocumentStore._to_data_object(doc), collection=self.collection.name, uuid=generate_uuid5(doc.id), vector=doc.embedding, @@ -524,7 +526,7 @@ def _write(self, documents: list[Document], policy: DuplicatePolicy) -> int: try: self.collection.data.insert( uuid=generate_uuid5(doc.id), - properties=self._to_data_object(doc), + properties=WeaviateDocumentStore._to_data_object(doc), vector=doc.embedding, ) @@ -848,7 +850,7 @@ def _bm25_retrieval( return_metadata=["score"], ) - return [self._to_document(doc) for doc in result.objects] + return [WeaviateDocumentStore._to_document(doc) for doc in result.objects] async def _bm25_retrieval_async( self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None @@ -866,7 +868,7 @@ async def _bm25_retrieval_async( return_metadata=["score"], ) - return [self._to_document(doc) for doc in result.objects] + return [WeaviateDocumentStore._to_document(doc) for doc in result.objects] def _embedding_retrieval( self, @@ -892,7 +894,7 @@ def _embedding_retrieval( return_metadata=["certainty"], ) - return [self._to_document(doc) for doc in result.objects] + return [WeaviateDocumentStore._to_document(doc) for doc in result.objects] async def _embedding_retrieval_async( self, @@ -920,7 +922,7 @@ async def _embedding_retrieval_async( return_metadata=["certainty"], ) - return [self._to_document(doc) for doc in result.objects] + return [WeaviateDocumentStore._to_document(doc) for doc in result.objects] def _hybrid_retrieval( self, @@ -945,7 +947,7 @@ def _hybrid_retrieval( return_metadata=["score"], ) - return [self._to_document(doc) for doc in result.objects] + return [WeaviateDocumentStore._to_document(doc) for doc in result.objects] async def _hybrid_retrieval_async( self, @@ -972,4 +974,4 @@ async def _hybrid_retrieval_async( return_metadata=["score"], ) - return [self._to_document(doc) for doc in result.objects] + return [WeaviateDocumentStore._to_document(doc) for doc in result.objects] diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index 7bcde9c6b1..4e9f772078 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -289,9 +289,9 @@ def test_from_dict(self, _mock_weaviate, monkeypatch): assert document_store._additional_config.connection.session_pool_maxsize == 20 assert document_store._additional_config.connection.session_pool_timeout == 5 - def test_to_data_object(self, document_store, test_files_path): + def test_to_data_object(self, test_files_path): doc = Document(content="test doc") - data = document_store._to_data_object(doc) + data = WeaviateDocumentStore._to_data_object(doc) assert data == { "_original_id": doc.id, "content": doc.content, @@ -305,7 +305,7 @@ def test_to_data_object(self, document_store, test_files_path): embedding=[1, 2, 3], meta={"key": "value"}, ) - data = document_store._to_data_object(doc) + data = WeaviateDocumentStore._to_data_object(doc) assert data == { "_original_id": doc.id, "content": doc.content, @@ -315,7 +315,7 @@ def test_to_data_object(self, document_store, test_files_path): "key": "value", } - def test_to_document(self, document_store, test_files_path): + def test_to_document(self, test_files_path): image = ByteStream.from_file_path(test_files_path / "robot1.jpg", mime_type="image/jpeg") data = DataObject( properties={ @@ -329,7 +329,7 @@ def test_to_document(self, document_store, test_files_path): vector={"default": [1, 2, 3]}, ) - doc = document_store._to_document(data) + doc = WeaviateDocumentStore._to_document(data) assert doc.id == "123" assert doc.content == "some content" assert doc.blob == image