Skip to content

Commit c45cb90

Browse files
authored
Merge branch 'main' into bedrock_region_support
2 parents 9504485 + f05d31a commit c45cb90

91 files changed

Lines changed: 800 additions & 554 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

integrations/amazon_bedrock/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [integrations/amazon_bedrock-v6.1.0] - 2026-01-13
4+
5+
### 🐛 Bug Fixes
6+
7+
- AmazonBedrockDocumentEmbedder to not modify Documents in place (#2174) (#2702)
8+
9+
310
## [integrations/amazon_bedrock-v6.0.0] - 2026-01-09
411

512
### 🧹 Chores

integrations/amazon_bedrock/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
from dataclasses import replace
23
from typing import Any
34

45
from botocore.config import Config
@@ -186,10 +187,11 @@ def _embed_cohere(self, documents: list[Document]) -> list[Document]:
186187
)
187188
all_embeddings.extend(embeddings_list)
188189

190+
new_documents = []
189191
for doc, emb in zip(documents, all_embeddings, strict=True):
190-
doc.embedding = emb
192+
new_documents.append(replace(doc, embedding=emb))
191193

192-
return documents
194+
return new_documents
193195

194196
def _embed_titan(self, documents: list[Document]) -> list[Document]:
195197
"""
@@ -214,10 +216,11 @@ def _embed_titan(self, documents: list[Document]) -> list[Document]:
214216
embedding = response_body["embedding"]
215217
all_embeddings.append(embedding)
216218

219+
new_documents = []
217220
for doc, emb in zip(documents, all_embeddings, strict=True):
218-
doc.embedding = emb
221+
new_documents.append(replace(doc, embedding=emb))
219222

220-
return documents
223+
return new_documents
221224

222225
@component.output_types(documents=list[Document])
223226
def run(self, documents: list[Document]) -> dict[str, list[Document]]:

integrations/amazon_bedrock/src/haystack_integrations/components/rankers/amazon_bedrock/ranker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from dataclasses import replace
12
from typing import Any
23

34
from botocore.exceptions import ClientError
@@ -251,8 +252,7 @@ def resolve_secret(secret: Secret | None) -> str | None:
251252
idx = result["index"]
252253
score = result["relevanceScore"]
253254
doc = documents[idx]
254-
doc.score = score
255-
sorted_docs.append(doc)
255+
sorted_docs.append(replace(doc, score=score))
256256

257257
return {"documents": sorted_docs}
258258
except ClientError as client_error:

integrations/amazon_bedrock/tests/test_document_embedder.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,74 @@ def mock_invoke_model(*args, **kwargs):
257257
assert doc.content == docs[i].content
258258
assert doc.embedding == [0.1, 0.2, 0.3]
259259

260+
def test_run_cohere_does_not_modify_original_documents(self, mock_boto3_session):
261+
embedder = AmazonBedrockDocumentEmbedder(model="cohere.embed-english-v3")
262+
263+
original_docs = [
264+
Document(content="test 1", id="doc1"),
265+
Document(content="test 2", id="doc2"),
266+
]
267+
268+
# Store original IDs to verify they're the same objects
269+
original_doc_ids = [id(doc) for doc in original_docs]
270+
original_embeddings = [doc.embedding for doc in original_docs]
271+
272+
with patch.object(embedder, "_client") as mock_client:
273+
mock_client.invoke_model.return_value = {
274+
"body": io.StringIO('{"embeddings": [[0.1, 0.2], [0.3, 0.4]]}'),
275+
}
276+
277+
result = embedder.run(documents=original_docs)
278+
279+
# Verify originals are unchanged
280+
assert all(doc.embedding is None for doc in original_docs)
281+
assert original_embeddings == [None, None]
282+
283+
# Verify returned documents are NEW instances
284+
returned_doc_ids = [id(doc) for doc in result["documents"]]
285+
assert original_doc_ids != returned_doc_ids
286+
287+
# Verify returned documents have embeddings
288+
assert result["documents"][0].embedding == [0.1, 0.2]
289+
assert result["documents"][1].embedding == [0.3, 0.4]
290+
assert result["documents"][0].content == "test 1"
291+
assert result["documents"][1].content == "test 2"
292+
293+
def test_run_titan_does_not_modify_original_documents(self, mock_boto3_session):
294+
embedder = AmazonBedrockDocumentEmbedder(model="amazon.titan-embed-text-v1")
295+
296+
original_docs = [
297+
Document(content="test 1", id="doc1"),
298+
Document(content="test 2", id="doc2"),
299+
]
300+
301+
# Store original IDs to verify they're the same objects
302+
original_doc_ids = [id(doc) for doc in original_docs]
303+
original_embeddings = [doc.embedding for doc in original_docs]
304+
305+
with patch.object(embedder, "_client") as mock_client:
306+
# Titan returns one embedding at a time
307+
mock_client.invoke_model.side_effect = [
308+
{"body": io.StringIO('{"embedding": [0.1, 0.2]}')},
309+
{"body": io.StringIO('{"embedding": [0.3, 0.4]}')},
310+
]
311+
312+
result = embedder.run(documents=original_docs)
313+
314+
# Verify originals are unchanged
315+
assert all(doc.embedding is None for doc in original_docs)
316+
assert original_embeddings == [None, None]
317+
318+
# Verify returned documents are NEW instances
319+
returned_doc_ids = [id(doc) for doc in result["documents"]]
320+
assert original_doc_ids != returned_doc_ids
321+
322+
# Verify returned documents have embeddings
323+
assert result["documents"][0].embedding == [0.1, 0.2]
324+
assert result["documents"][1].embedding == [0.3, 0.4]
325+
assert result["documents"][0].content == "test 1"
326+
assert result["documents"][1].content == "test 2"
327+
260328
@pytest.mark.integration
261329
@pytest.mark.skipif(
262330
not os.getenv("AWS_ACCESS_KEY_ID")

integrations/github/CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
11
# Changelog
22

3+
## [integrations/github-v2.0.0] - 2026-01-12
4+
5+
### 📚 Documentation
6+
7+
- Add pydoc configurations for Docusaurus (#2411)
8+
9+
### ⚙️ CI
10+
11+
- Change pytest command (#2475)
12+
13+
### 🧹 Chores
14+
15+
- Remove Readme API CI workflow and configs (#2573)
16+
- Fix nightly test (#2580)
17+
- Make fmt command more forgiving (#2671)
18+
- [**breaking**] Github - drop Python 3.9 and use X|Y typing (#2705)
19+
20+
### 🌀 Miscellaneous
21+
22+
- Adopt PEP 585 type hinting (part 3) (#2510)
23+
- Chore: Fix tool validation test failures in github integration (#2659)
24+
- Fix: Fix github integration unit tests (#2703)
25+
326
## [integrations/github-v1.2.1] - 2025-10-20
427

528
### 🚀 Features

integrations/github/pyproject.toml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,22 @@ name = "github-haystack"
77
dynamic = ["version"]
88
description = 'Haystack components for interacting with GitHub repositories'
99
readme = "README.md"
10-
requires-python = ">=3.9"
10+
requires-python = ">=3.10"
1111
license = "Apache-2.0"
1212
keywords = []
1313
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
1414
classifiers = [
1515
"License :: OSI Approved :: Apache Software License",
1616
"Development Status :: 4 - Beta",
1717
"Programming Language :: Python",
18-
"Programming Language :: Python :: 3.9",
1918
"Programming Language :: Python :: 3.10",
2019
"Programming Language :: Python :: 3.11",
2120
"Programming Language :: Python :: 3.12",
2221
"Programming Language :: Python :: 3.13",
2322
"Programming Language :: Python :: Implementation :: CPython",
2423
"Programming Language :: Python :: Implementation :: PyPy",
2524
]
26-
dependencies = ["haystack-ai>=2.12.0"]
25+
dependencies = ["haystack-ai>=2.22.0"]
2726

2827
[project.urls]
2928
Source = "https://github.com/deepset-ai/haystack-core-integrations/github"
@@ -77,7 +76,6 @@ check_untyped_defs = true
7776
disallow_incomplete_defs = true
7877

7978
[tool.ruff]
80-
target-version = "py39"
8179
line-length = 120
8280

8381
[tool.ruff.lint]
@@ -124,10 +122,6 @@ ignore = [
124122
"B008",
125123
"S101",
126124
]
127-
unfixable = [
128-
# Don't touch unused imports
129-
"F401",
130-
]
131125

132126
[tool.ruff.lint.isort]
133127
known-first-party = ["haystack_integrations"]

integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
from base64 import b64decode, b64encode
55
from enum import Enum
6-
from typing import Any, Optional, Union
6+
from typing import Any
77

88
import requests
99
from haystack import component, default_from_dict, default_to_dict, logging
@@ -79,7 +79,7 @@ def __init__(
7979
self,
8080
*,
8181
github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"),
82-
repo: Optional[str] = None,
82+
repo: str | None = None,
8383
branch: str = "main",
8484
raise_on_failure: bool = True,
8585
):
@@ -145,7 +145,7 @@ def _update_file(self, owner: str, repo: str, path: str, content: str, message:
145145
def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool:
146146
"""Check if last commit was made by the current token user."""
147147
url = f"https://api.github.com/repos/{owner}/{repo}/commits"
148-
params: dict[str, Union[str, int]] = {"per_page": 1, "sha": branch}
148+
params: dict[str, str | int] = {"per_page": 1, "sha": branch}
149149
response = requests.get(url, headers=self._get_request_headers(), params=params, timeout=10)
150150
response.raise_for_status()
151151
last_commit = response.json()[0]
@@ -191,7 +191,7 @@ def _undo_changes(self, owner: str, repo: str, payload: dict[str, Any], branch:
191191
commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits"
192192

193193
# Get the previous commit SHA
194-
params: dict[str, Union[str, int]] = {"per_page": 2, "sha": branch}
194+
params: dict[str, str | int] = {"per_page": 2, "sha": branch}
195195
commits = requests.get(commits_url, headers=self._get_request_headers(), params=params, timeout=10).json()
196196
previous_sha = commits[1]["sha"]
197197

@@ -244,10 +244,10 @@ def _delete_file(self, owner: str, repo: str, payload: dict[str, str], branch: s
244244
@component.output_types(result=str)
245245
def run(
246246
self,
247-
command: Union[Command, str],
247+
command: Command | str,
248248
payload: dict[str, Any],
249-
repo: Optional[str] = None,
250-
branch: Optional[str] = None,
249+
repo: str | None = None,
250+
branch: str | None = None,
251251
) -> dict[str, str]:
252252
"""
253253
Process GitHub file operations.

integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
import re
5-
from typing import Any, Optional
5+
from typing import Any
66

77
import requests
88
from haystack import Document, component, default_from_dict, default_to_dict, logging
@@ -37,7 +37,7 @@ class GitHubIssueViewer:
3737
def __init__(
3838
self,
3939
*,
40-
github_token: Optional[Secret] = None,
40+
github_token: Secret | None = None,
4141
raise_on_failure: bool = True,
4242
retry_attempts: int = 2,
4343
):

integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
import re
5-
from typing import Any, Optional
5+
from typing import Any
66

77
import requests
88
from haystack import component, default_from_dict, default_to_dict, logging
@@ -102,7 +102,7 @@ def _check_fork_exists(self, repo: str, fork_owner: str) -> bool:
102102
except requests.RequestException:
103103
return False
104104

105-
def _create_fork(self, owner: str, repo: str) -> Optional[str]:
105+
def _create_fork(self, owner: str, repo: str) -> str | None:
106106
"""Create a fork of the repository."""
107107
url = f"https://api.github.com/repos/{owner}/{repo}/forks"
108108
try:

integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
import re
55
import time
6-
from typing import Any, Optional
6+
from typing import Any
77

88
import requests
99
from haystack import component, default_from_dict, default_to_dict, logging
@@ -137,7 +137,7 @@ def _get_authenticated_user(self) -> str:
137137
response.raise_for_status()
138138
return response.json()["login"]
139139

140-
def _get_existing_repository(self, repo_name: str) -> Optional[str]:
140+
def _get_existing_repository(self, repo_name: str) -> str | None:
141141
"""
142142
Check if a repository with the given name already exists in the authenticated user's account.
143143

0 commit comments

Comments
 (0)