Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,8 @@ def count_documents(self) -> int:
total = self.collection.aggregate.over_all(total_count=True).total_count
return total if total else 0

def _to_data_object(self, document: Document) -> dict[str, Any]:
@staticmethod
def _to_data_object(document: Document) -> dict[str, Any]:
"""
Converts a Document to a Weaviate data object ready to be saved.
"""
Expand Down Expand Up @@ -365,7 +366,8 @@ def _to_data_object(self, document: Document) -> dict[str, Any]:

return data

def _to_document(self, data: DataObject[dict[str, Any], None]) -> Document:
@staticmethod
def _to_document(data: DataObject[dict[str, Any], None]) -> Document:
"""
Converts a data object read from Weaviate into a Document.
"""
Expand Down Expand Up @@ -460,7 +462,7 @@ def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Doc
result = self._query_with_filters(filters)
else:
result = self._query()
return [self._to_document(doc) for doc in result]
return [WeaviateDocumentStore._to_document(doc) for doc in result]

def _batch_write(self, documents: list[Document]) -> int:
"""
Expand All @@ -476,7 +478,7 @@ def _batch_write(self, documents: list[Document]) -> int:
raise ValueError(msg)

batch.add_object(
properties=self._to_data_object(doc),
properties=WeaviateDocumentStore._to_data_object(doc),
collection=self.collection.name,
uuid=generate_uuid5(doc.id),
vector=doc.embedding,
Expand Down Expand Up @@ -524,7 +526,7 @@ def _write(self, documents: list[Document], policy: DuplicatePolicy) -> int:
try:
self.collection.data.insert(
uuid=generate_uuid5(doc.id),
properties=self._to_data_object(doc),
properties=WeaviateDocumentStore._to_data_object(doc),
vector=doc.embedding,
)

Expand Down Expand Up @@ -848,7 +850,7 @@ def _bm25_retrieval(
return_metadata=["score"],
)

return [self._to_document(doc) for doc in result.objects]
return [WeaviateDocumentStore._to_document(doc) for doc in result.objects]

async def _bm25_retrieval_async(
self, query: str, filters: Optional[dict[str, Any]] = None, top_k: Optional[int] = None
Expand All @@ -866,7 +868,7 @@ async def _bm25_retrieval_async(
return_metadata=["score"],
)

return [self._to_document(doc) for doc in result.objects]
return [WeaviateDocumentStore._to_document(doc) for doc in result.objects]

def _embedding_retrieval(
self,
Expand All @@ -892,7 +894,7 @@ def _embedding_retrieval(
return_metadata=["certainty"],
)

return [self._to_document(doc) for doc in result.objects]
return [WeaviateDocumentStore._to_document(doc) for doc in result.objects]

async def _embedding_retrieval_async(
self,
Expand Down Expand Up @@ -920,7 +922,7 @@ async def _embedding_retrieval_async(
return_metadata=["certainty"],
)

return [self._to_document(doc) for doc in result.objects]
return [WeaviateDocumentStore._to_document(doc) for doc in result.objects]

def _hybrid_retrieval(
self,
Expand All @@ -945,7 +947,7 @@ def _hybrid_retrieval(
return_metadata=["score"],
)

return [self._to_document(doc) for doc in result.objects]
return [WeaviateDocumentStore._to_document(doc) for doc in result.objects]

async def _hybrid_retrieval_async(
self,
Expand All @@ -972,4 +974,4 @@ async def _hybrid_retrieval_async(
return_metadata=["score"],
)

return [self._to_document(doc) for doc in result.objects]
return [WeaviateDocumentStore._to_document(doc) for doc in result.objects]
10 changes: 5 additions & 5 deletions integrations/weaviate/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,9 @@ def test_from_dict(self, _mock_weaviate, monkeypatch):
assert document_store._additional_config.connection.session_pool_maxsize == 20
assert document_store._additional_config.connection.session_pool_timeout == 5

def test_to_data_object(self, document_store, test_files_path):
def test_to_data_object(self, test_files_path):
doc = Document(content="test doc")
data = document_store._to_data_object(doc)
data = WeaviateDocumentStore._to_data_object(doc)
assert data == {
"_original_id": doc.id,
"content": doc.content,
Expand All @@ -305,7 +305,7 @@ def test_to_data_object(self, document_store, test_files_path):
embedding=[1, 2, 3],
meta={"key": "value"},
)
data = document_store._to_data_object(doc)
data = WeaviateDocumentStore._to_data_object(doc)
assert data == {
"_original_id": doc.id,
"content": doc.content,
Expand All @@ -315,7 +315,7 @@ def test_to_data_object(self, document_store, test_files_path):
"key": "value",
}

def test_to_document(self, document_store, test_files_path):
def test_to_document(self, test_files_path):
image = ByteStream.from_file_path(test_files_path / "robot1.jpg", mime_type="image/jpeg")
data = DataObject(
properties={
Expand All @@ -329,7 +329,7 @@ def test_to_document(self, document_store, test_files_path):
vector={"default": [1, 2, 3]},
)

doc = document_store._to_document(data)
doc = WeaviateDocumentStore._to_document(data)
assert doc.id == "123"
assert doc.content == "some content"
assert doc.blob == image
Expand Down