Skip to content

Commit 0c5aaf4

Browse files
kudos07anakin87
andauthored
Add Jina integration tests for text/document embedders and ranker (#2841)
* Add Jina integration tests for text/document embedders and ranker * small improvements * fmt --------- Co-authored-by: anakin87 <stefanofiorucci@gmail.com>
1 parent e724307 commit 0c5aaf4

5 files changed

Lines changed: 71 additions & 3 deletions

File tree

.github/workflows/jina.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ on:
1010
- "integrations/jina/**"
1111
- "!integrations/jina/*.md"
1212
- ".github/workflows/jina.yml"
13-
1413
defaults:
1514
run:
1615
working-directory: integrations/jina
@@ -20,6 +19,7 @@ concurrency:
2019
cancel-in-progress: true
2120

2221
env:
22+
JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
2323
PYTHONUNBUFFERED: "1"
2424
FORCE_COLOR: "1"
2525

integrations/jina/tests/test_document_embedder.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
import json
5+
import os
56
from unittest.mock import patch
67

78
import pytest
@@ -311,3 +312,27 @@ def test_run_with_v3(self):
311312
assert len(doc.embedding) == 3
312313
assert all(isinstance(x, float) for x in doc.embedding)
313314
assert metadata == {"model": model, "usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}}
315+
316+
@pytest.mark.skipif(not os.environ.get("JINA_API_KEY", None), reason="JINA_API_KEY env var not set")
317+
@pytest.mark.integration
318+
def test_run_integration(self):
319+
embedder = JinaDocumentEmbedder(model="jina-embeddings-v3", task="retrieval.passage")
320+
docs = [
321+
Document(content="Paris is the capital of France."),
322+
Document(content="Berlin is the capital of Germany."),
323+
]
324+
325+
result = embedder.run(documents=docs)
326+
327+
assert "documents" in result
328+
assert len(result["documents"]) == 2
329+
for doc in result["documents"]:
330+
assert isinstance(doc, Document)
331+
assert isinstance(doc.embedding, list)
332+
assert len(doc.embedding) > 0
333+
assert all(isinstance(x, (int, float)) for x in doc.embedding)
334+
335+
assert "meta" in result
336+
assert isinstance(result["meta"], dict)
337+
assert "model" in result["meta"]
338+
assert "usage" in result["meta"]

integrations/jina/tests/test_ranker.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
import json
5+
import os
56
from unittest.mock import patch
67

78
import pytest
@@ -145,3 +146,28 @@ def test_run_on_empty_docs(self):
145146

146147
assert result["documents"] is not None
147148
assert not result["documents"] # empty list
149+
150+
@pytest.mark.skipif(not os.environ.get("JINA_API_KEY", None), reason="JINA_API_KEY env var not set")
151+
@pytest.mark.integration
152+
def test_run_integration(self):
153+
ranker = JinaRanker(model="jina-reranker-v1-base-en")
154+
docs = [
155+
Document(content="Paris is the capital of France."),
156+
Document(content="Bananas are yellow fruits."),
157+
Document(content="Berlin is the capital of Germany."),
158+
]
159+
160+
result = ranker.run(query="What is the capital of France?", documents=docs, top_k=2)
161+
162+
assert "documents" in result
163+
ranked_docs = result["documents"]
164+
assert len(ranked_docs) == 2
165+
assert all(isinstance(doc, Document) for doc in ranked_docs)
166+
assert all(doc.score is not None for doc in ranked_docs)
167+
assert ranked_docs[0].score >= ranked_docs[1].score
168+
assert "Paris" in ranked_docs[0].content
169+
170+
assert "meta" in result
171+
assert isinstance(result["meta"], dict)
172+
assert "model" in result["meta"]
173+
assert "usage" in result["meta"]

integrations/jina/tests/test_reader_connector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def test_run_reader_mode(self):
122122
assert len(result) == 1
123123
document = result["documents"][0]
124124
assert isinstance(document, Document)
125-
assert "This domain is for use in illustrative examples" in document.content
125+
assert "This domain is for use in documentation examples" in document.content
126126
assert document.meta["title"] == "Example Domain"
127127
assert document.meta["url"] == "https://example.com/"
128128

@@ -135,7 +135,7 @@ def test_run_search_mode(self):
135135
assert len(result) >= 1
136136
for doc in result["documents"]:
137137
assert isinstance(doc, Document)
138-
assert doc.content
138+
assert doc.content is not None
139139
assert "title" in doc.meta
140140
assert "url" in doc.meta
141141
assert "description" in doc.meta

integrations/jina/tests/test_text_embedder.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
import json
5+
import os
56
from unittest.mock import patch
67

78
import pytest
@@ -145,3 +146,19 @@ def test_with_v3(self):
145146
"model": "jina-embeddings-v3",
146147
"usage": {"prompt_tokens": 6, "total_tokens": 6},
147148
}
149+
150+
@pytest.mark.skipif(not os.environ.get("JINA_API_KEY", None), reason="JINA_API_KEY env var not set")
151+
@pytest.mark.integration
152+
def test_run_integration(self):
153+
embedder = JinaTextEmbedder(task="retrieval.query")
154+
result = embedder.run(text="What is the capital of France?")
155+
156+
assert "embedding" in result
157+
assert isinstance(result["embedding"], list)
158+
assert len(result["embedding"]) > 0
159+
assert all(isinstance(x, (int, float)) for x in result["embedding"])
160+
161+
assert "meta" in result
162+
assert isinstance(result["meta"], dict)
163+
assert "model" in result["meta"]
164+
assert "usage" in result["meta"]

0 commit comments

Comments
 (0)