Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dataclasses import replace
from typing import Any

from haystack import logging
Expand All @@ -22,6 +23,9 @@ def _from_haystack_to_pg_documents(documents: list[Document]) -> list[dict[str,
db_document["blob_meta"] = Jsonb(blob.meta) if blob and blob.meta else None
db_document["blob_mime_type"] = blob.mime_type if blob and blob.mime_type else None
db_document["meta"] = Jsonb(db_document["meta"])
# PostgreSQL text fields cannot contain NUL (0x00) bytes, removing NUL bytes
if content := db_document["content"]:
db_document["content"] = content.replace("\x00", "")

if "sparse_embedding" in db_document:
sparse_embedding = db_document.pop("sparse_embedding", None)
Expand Down Expand Up @@ -65,7 +69,7 @@ def _from_pg_to_haystack_documents(documents: list[dict[str, Any]]) -> list[Docu

if blob_data:
blob = ByteStream(data=blob_data, meta=blob_meta, mime_type=blob_mime_type)
haystack_document.blob = blob
haystack_document = replace(haystack_document, blob=blob)

haystack_documents.append(haystack_document)

Expand Down
17 changes: 17 additions & 0 deletions integrations/pgvector/tests/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ def test_from_haystack_to_pg_documents():
embedding=[0.7, 0.8, 0.9],
score=0.7,
),
Document(
id="4",
content="This is another text\x00",
meta={"meta_key": "meta_value"},
embedding=[0.7, 0.8, 0.9],
score=0.8,
),
]

pg_docs = _from_haystack_to_pg_documents(haystack_docs)
Expand Down Expand Up @@ -64,6 +71,16 @@ def test_from_haystack_to_pg_documents():
assert pg_docs[2]["embedding"] == [0.7, 0.8, 0.9]
assert "score" not in pg_docs[2]

assert pg_docs[3]["id"] == "4"
assert pg_docs[3]["content"] == "This is another text"
assert pg_docs[3]["blob_data"] is None
assert pg_docs[3]["blob_meta"] is None
assert pg_docs[3]["blob_mime_type"] is None
assert "dataframe" not in pg_docs[3]
assert pg_docs[3]["meta"].obj == {"meta_key": "meta_value"}
assert pg_docs[3]["embedding"] == [0.7, 0.8, 0.9]
assert "score" not in pg_docs[3]


def test_from_pg_to_haystack_documents():
pg_docs = [
Expand Down