Skip to content

Commit 707c877

Browse files
committed
Merge branch 'main' into vllm-embedders
2 parents edc9fe1 + 04b5dda commit 707c877

55 files changed

Lines changed: 3318 additions & 303 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/labeler.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,11 @@ integration:snowflake:
228228
- any-glob-to-any-file: "integrations/snowflake/**/*"
229229
- any-glob-to-any-file: ".github/workflows/snowflake.yml"
230230

231+
integration:sqlalchemy:
232+
- changed-files:
233+
- any-glob-to-any-file: "integrations/sqlalchemy/**/*"
234+
- any-glob-to-any-file: ".github/workflows/sqlalchemy.yml"
235+
231236
integration:stackit:
232237
- changed-files:
233238
- any-glob-to-any-file: "integrations/stackit/**/*"

.github/workflows/CI_coverage_comment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ on:
4747
- "Test / qdrant"
4848
- "Test / ragas"
4949
- "Test / snowflake"
50+
- "Test / sqlalchemy"
5051
- "Test / stackit"
5152
- "Test / tavily"
5253
- "Test / togetherai"

.github/workflows/sqlalchemy.yml

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# This workflow comes from https://github.com/ofek/hatch-mypyc
2+
# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
3+
name: Test / sqlalchemy
4+
5+
on:
6+
schedule:
7+
- cron: "0 0 * * *"
8+
pull_request:
9+
paths:
10+
- "integrations/sqlalchemy/**"
11+
- "!integrations/sqlalchemy/*.md"
12+
- ".github/workflows/sqlalchemy.yml"
13+
push:
14+
branches:
15+
- main
16+
paths:
17+
- "integrations/sqlalchemy/**"
18+
- "!integrations/sqlalchemy/*.md"
19+
- ".github/workflows/sqlalchemy.yml"
20+
21+
defaults:
22+
run:
23+
working-directory: integrations/sqlalchemy
24+
25+
concurrency:
26+
group: sqlalchemy-${{ github.head_ref || github.sha }}
27+
cancel-in-progress: true
28+
29+
env:
30+
PYTHONUNBUFFERED: "1"
31+
FORCE_COLOR: "1"
32+
TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]'
33+
TEST_MATRIX_PYTHON: '["3.10", "3.14"]'
34+
35+
jobs:
36+
compute-test-matrix:
37+
runs-on: ubuntu-slim
38+
defaults:
39+
run:
40+
working-directory: .
41+
outputs:
42+
os: ${{ steps.set.outputs.os }}
43+
python-version: ${{ steps.set.outputs.python-version }}
44+
steps:
45+
- id: set
46+
run: |
47+
echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT"
48+
echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT"
49+
50+
run:
51+
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
52+
needs: compute-test-matrix
53+
permissions:
54+
contents: write
55+
pull-requests: write
56+
runs-on: ${{ matrix.os }}
57+
strategy:
58+
fail-fast: false
59+
matrix:
60+
os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
61+
python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}
62+
63+
steps:
64+
- name: Support longpaths
65+
if: matrix.os == 'windows-latest'
66+
working-directory: .
67+
run: git config --system core.longpaths true
68+
69+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
70+
71+
- name: Set up Python ${{ matrix.python-version }}
72+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
73+
with:
74+
python-version: ${{ matrix.python-version }}
75+
76+
- name: Install Hatch
77+
run: pip install hatch
78+
79+
- name: Lint
80+
if: matrix.python-version == '3.10' && runner.os == 'Linux'
81+
run: hatch run fmt-check && hatch run test:types
82+
83+
- name: Run unit tests
84+
run: hatch run test:unit-cov-retry
85+
86+
# On PR: generates coverage comment artifact. On push to main: stores coverage baseline on data branch.
87+
- name: Store unit tests coverage
88+
id: coverage_comment
89+
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
90+
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
91+
with:
92+
GITHUB_TOKEN: ${{ github.token }}
93+
COVERAGE_PATH: integrations/sqlalchemy
94+
SUBPROJECT_ID: sqlalchemy
95+
MINIMUM_GREEN: 90
96+
MINIMUM_ORANGE: 60
97+
98+
- name: Upload coverage comment to be posted
99+
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
100+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
101+
with:
102+
name: coverage-comment-sqlalchemy
103+
path: python-coverage-comment-action-sqlalchemy.txt
104+
105+
- name: Run unit tests with lowest direct dependencies
106+
if: github.event_name != 'push'
107+
run: |
108+
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
109+
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
110+
hatch run test:unit
111+
112+
- name: Nightly - run unit tests with Haystack main branch
113+
if: github.event_name == 'schedule'
114+
run: |
115+
hatch env prune
116+
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
117+
hatch run test:unit
118+
119+
notify-slack-on-failure:
120+
needs: run
121+
if: failure() && github.event_name == 'schedule'
122+
runs-on: ubuntu-slim
123+
steps:
124+
- uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1
125+
with:
126+
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
7272
| [qdrant-haystack](integrations/qdrant/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/qdrant-haystack.svg?color=orange)](https://pypi.org/project/qdrant-haystack) | [![Test / qdrant](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-qdrant/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-qdrant/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-qdrant-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-qdrant-combined/htmlcov/index.html) |
7373
| [ragas-haystack](integrations/ragas/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack) | [![Test / ragas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-ragas/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-ragas/htmlcov/index.html) | |
7474
| [snowflake-haystack](integrations/snowflake/) | Retriever | [![PyPI - Version](https://img.shields.io/pypi/v/snowflake-haystack.svg)](https://pypi.org/project/snowflake-haystack) | [![Test / snowflake](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/snowflake.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/snowflake.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-snowflake/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-snowflake/htmlcov/index.html) | |
75+
| [sqlalchemy-haystack](integrations/sqlalchemy/) | Retriever | [![PyPI - Version](https://img.shields.io/pypi/v/sqlalchemy-haystack.svg)](https://pypi.org/project/sqlalchemy-haystack) | [![Test / sqlalchemy](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/sqlalchemy.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/sqlalchemy.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-sqlalchemy/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-sqlalchemy/htmlcov/index.html) | |
7576
| [stackit-haystack](integrations/stackit/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/stackit-haystack.svg)](https://pypi.org/project/stackit-haystack) | [![Test / stackit](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/stackit.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/stackit.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-stackit/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-stackit/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-stackit-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-stackit-combined/htmlcov/index.html) |
7677
| [tavily-haystack](integrations/tavily/) | Websearch | [![PyPI - Version](https://img.shields.io/pypi/v/tavily-haystack.svg)](https://pypi.org/project/tavily-haystack) | [![Test / tavily](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/tavily.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/tavily.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-tavily/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-tavily/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-tavily-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-tavily-combined/htmlcov/index.html) |
7778
| [togetherai-haystack](integrations/togetherai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/togetherai-haystack.svg)](https://pypi.org/project/togetherai-haystack) | [![Test / togetherai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai-combined/htmlcov/index.html) |

integrations/amazon_bedrock/tests/test_document_image_embedder.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,79 @@ def test_embed_cohere_multiple_embedding_types(self, mock_boto3_session):
267267
with pytest.raises(ValueError):
268268
AmazonBedrockDocumentImageEmbedder(model="cohere.embed-english-v3", embedding_types=["float", "int8"])
269269

270+
def test_run_cohere_end_to_end(self, mock_boto3_session, image_paths):
271+
embedder = AmazonBedrockDocumentImageEmbedder(model="cohere.embed-english-v3")
272+
273+
def mock_invoke_model(*args, **kwargs):
274+
return {"body": io.StringIO('{"embeddings": [[0.1, 0.2, 0.3]]}')}
275+
276+
with patch.object(embedder, "_client") as mock_client:
277+
mock_client.invoke_model.side_effect = mock_invoke_model
278+
docs = [Document(content="cat", meta={"file_path": str(image_paths[0])})]
279+
result = embedder.run(documents=docs)
280+
281+
assert len(result["documents"]) == 1
282+
assert result["documents"][0].embedding == [0.1, 0.2, 0.3]
283+
assert result["documents"][0].meta["embedding_source"] == {
284+
"type": "image",
285+
"file_path_meta_field": "file_path",
286+
}
287+
288+
def test_run_titan_end_to_end(self, mock_boto3_session, image_paths):
289+
embedder = AmazonBedrockDocumentImageEmbedder(model="amazon.titan-embed-image-v1")
290+
291+
def mock_invoke_model(*args, **kwargs):
292+
return {"body": io.StringIO('{"embedding": [0.4, 0.5]}')}
293+
294+
with patch.object(embedder, "_client") as mock_client:
295+
mock_client.invoke_model.side_effect = mock_invoke_model
296+
docs = [Document(content="apple", meta={"file_path": str(image_paths[0])})]
297+
result = embedder.run(documents=docs)
298+
299+
assert result["documents"][0].embedding == [0.4, 0.5]
300+
301+
def test_run_with_pdf_cohere(self, mock_boto3_session, image_paths):
302+
embedder = AmazonBedrockDocumentImageEmbedder(model="cohere.embed-english-v3")
303+
304+
def mock_invoke_model(*args, **kwargs):
305+
return {"body": io.StringIO('{"embeddings": [[0.9]]}')}
306+
307+
with patch.object(embedder, "_client") as mock_client:
308+
mock_client.invoke_model.side_effect = mock_invoke_model
309+
pdf_doc = Document(content="pdf", meta={"file_path": str(image_paths[2]), "page_number": 1})
310+
result = embedder.run(documents=[pdf_doc])
311+
312+
assert result["documents"][0].embedding == [0.9]
313+
body_sent = mock_client.invoke_model.call_args.kwargs["body"]
314+
assert "data:application/pdf;base64," in body_sent
315+
316+
def test_embed_titan_with_embedding_config(self, mock_boto3_session, image_paths):
317+
embedder = AmazonBedrockDocumentImageEmbedder(
318+
model="amazon.titan-embed-image-v1",
319+
embeddingConfig={"outputEmbeddingLength": 256},
320+
)
321+
322+
def mock_invoke_model(*args, **kwargs):
323+
return {"body": io.StringIO('{"embedding": [0.1]}')}
324+
325+
with patch.object(embedder, "_client") as mock_client:
326+
mock_client.invoke_model.side_effect = mock_invoke_model
327+
embedder._embed_titan(images=["fake_base64"])
328+
329+
body_sent = mock_client.invoke_model.call_args.kwargs["body"]
330+
assert '"embeddingConfig": {"outputEmbeddingLength": 256}' in body_sent
331+
332+
def test_embed_titan_invocation_error(self, mock_boto3_session):
333+
embedder = AmazonBedrockDocumentImageEmbedder(model="amazon.titan-embed-image-v1")
334+
335+
with patch.object(embedder, "_client") as mock_client:
336+
mock_client.invoke_model.side_effect = ClientError(
337+
error_response={"Error": {"Code": "x", "Message": "y"}},
338+
operation_name="invoke_model",
339+
)
340+
with pytest.raises(AmazonBedrockInferenceError):
341+
embedder._embed_titan(images=["fake_base64"])
342+
270343
@pytest.mark.integration
271344
@pytest.mark.skipif(
272345
not os.getenv("AWS_ACCESS_KEY_ID")

integrations/amazon_bedrock/tests/test_generator.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
from unittest.mock import MagicMock, call
33

44
import pytest
5+
from botocore.exceptions import ClientError
56
from haystack.dataclasses import StreamingChunk
67

78
from haystack_integrations.common.amazon_bedrock.errors import (
89
AmazonBedrockConfigurationError,
10+
AmazonBedrockInferenceError,
911
)
1012
from haystack_integrations.components.generators.amazon_bedrock import (
1113
AmazonBedrockGenerator,
@@ -290,6 +292,62 @@ def test_get_model_adapter_model_family_over_auto_detection():
290292
assert model_adapter == AnthropicClaudeAdapter
291293

292294

295+
def test_truncate_parameter_warns(mock_boto3_session, recwarn):
296+
AmazonBedrockGenerator(model="anthropic.claude-v2", truncate=True)
297+
assert any("truncate" in str(w.message) for w in recwarn.list)
298+
299+
300+
def test_init_connection_error(mock_boto3_session):
301+
mock_boto3_session.side_effect = Exception("boom")
302+
with pytest.raises(AmazonBedrockConfigurationError):
303+
AmazonBedrockGenerator(model="anthropic.claude-v2")
304+
305+
306+
def test_run_with_streaming_callback(mock_boto3_session):
307+
generator = AmazonBedrockGenerator(model="anthropic.claude-v2")
308+
mock_client = mock_boto3_session.return_value.client.return_value
309+
310+
stream_body = MagicMock()
311+
stream_body.__iter__.return_value = [
312+
{"chunk": {"bytes": b'{"type": "content_block_start", "content_block": {"type": "text"}, "index": 0}'}},
313+
{"chunk": {"bytes": b'{"delta": {"text": "hello"}}'}},
314+
]
315+
mock_client.invoke_model_with_response_stream.return_value = {
316+
"body": stream_body,
317+
"ResponseMetadata": {"RequestId": "req-1"},
318+
}
319+
320+
callback = MagicMock()
321+
result = generator.run("Hello", streaming_callback=callback)
322+
323+
mock_client.invoke_model_with_response_stream.assert_called_once()
324+
assert result["meta"]["RequestId"] == "req-1"
325+
callback.assert_called()
326+
327+
328+
def test_run_client_error(mock_boto3_session):
329+
generator = AmazonBedrockGenerator(model="anthropic.claude-v2")
330+
mock_client = mock_boto3_session.return_value.client.return_value
331+
mock_client.invoke_model.side_effect = ClientError(
332+
error_response={"Error": {"Code": "x", "Message": "y"}}, operation_name="invoke_model"
333+
)
334+
335+
with pytest.raises(AmazonBedrockInferenceError):
336+
generator.run("Hello")
337+
338+
339+
def test_from_dict_with_streaming_callback(mock_boto3_session):
340+
data = {
341+
"type": "haystack_integrations.components.generators.amazon_bedrock.generator.AmazonBedrockGenerator",
342+
"init_parameters": {
343+
"model": "anthropic.claude-v2",
344+
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
345+
},
346+
}
347+
generator = AmazonBedrockGenerator.from_dict(data)
348+
assert generator.streaming_callback is not None
349+
350+
293351
class TestAnthropicClaudeAdapter:
294352
def test_default_init(self) -> None:
295353
adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=100)

0 commit comments

Comments
 (0)