Skip to content

Commit e669c49

Browse files
Merge branch 'main' into fix/2939-sparse-vector-storage
2 parents 1187f89 + 9220d70 commit e669c49

57 files changed

Lines changed: 408 additions & 117 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

integrations/mcp/CHANGELOG.md

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,43 @@
11
# Changelog
22

3+
## [integrations/mcp-v1.3.0] - 2026-03-24
4+
5+
### 🚀 Features
6+
7+
- Make connection to Stdio MCP servers work on notebooks by redirecting errlog (#3037)
8+
9+
### 📚 Documentation
10+
11+
- Fix some invalid raises directives (#2847)
12+
- Simplify pydoc configs (#2855)
13+
14+
### 🧪 Testing
15+
16+
- Test compatible integrations with python 3.14; update pyproject (#3001)
17+
18+
### 🧹 Chores
19+
20+
- Remove unused allow-direct-references (#2866)
21+
- Add ANN ruff ruleset to llama_cpp, llama_stack, mcp, meta_llama, mistral, mongodb_atlas, nvidia, ollama, openrouter, opensearch (#2991)
22+
- Enforce ruff docstring rules (D102/D103/D205/D209/D213/D417/D419) in integrations 21-30 (#3010)
23+
24+
## [integrations/mcp-v1.2.0] - 2026-01-16
25+
26+
### 🚀 Features
27+
28+
- Add state-based configuration support to MCPToolset (#2689)
29+
30+
31+
### 🧹 Chores
32+
33+
- Remove Readme API CI workflow and configs (#2573)
34+
- Make fmt command more forgiving (#2671)
35+
36+
### 🌀 Miscellaneous
37+
38+
- Chore: Simplify and update mcp examples (#2581)
39+
- Fix: Proper tool validation in mcp (#2654)
40+
341
## [integrations/mcp-v1.1.0] - 2025-11-12
442

543
### 🚀 Features
@@ -33,28 +71,6 @@
3371
### 🌀 Miscellaneous
3472

3573
- Add MCPTool/MCPToolset warm_up (#2384)
36-
- Added `eager_connect` parameter to `MCPTool` and `MCPToolset` (default: `False`).
37-
38-
**When `eager_connect=False` (default)**:
39-
- Connection deferred until first use or `warm_up()` call
40-
- Fast initialization, enables pipeline validation without server availability
41-
- Recommended for most use cases
42-
43-
**When `eager_connect=True`**:
44-
- Connects immediately during initialization
45-
- Fail-fast validation, tool schema available immediately
46-
- Use when you need upfront validation
47-
- Existing agent/pipelines should set `eager_connect=True` to keep the behaviour from previous releases
48-
49-
```python
50-
# Lazy connection (default)
51-
tool = MCPTool(name="weather", server_info=info)
52-
53-
# Eager connection
54-
tool = MCPTool(name="weather", server_info=info, eager_connect=True)
55-
```
56-
- See related https://github.com/deepset-ai/haystack/pull/9856 for more details
57-
5874

5975
## [integrations/mcp-v0.8.0] - 2025-10-14
6076

integrations/mcp/src/haystack_integrations/tools/mcp/mcp_tool.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
import asyncio
66
import concurrent.futures
7+
import io
78
import json
9+
import sys
10+
import tempfile
811
import threading
912
import warnings
1013
from abc import ABC, abstractmethod
@@ -435,7 +438,17 @@ async def connect(self) -> list[types.Tool]:
435438
logger.debug(f"PROCESS: Connecting to stdio server with command: {self.command}")
436439

437440
server_params = StdioServerParameters(command=self.command, args=self.args, env=self.env)
438-
stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
441+
442+
# In notebook environments, sys.stderr is a custom object without a real file descriptor, which causes MCP stdio
443+
# connection to fail. We detect this and set the MCP server's errlog to a temp file instead.
444+
errlog = sys.stderr
445+
try:
446+
sys.stderr.fileno()
447+
except (io.UnsupportedOperation, AttributeError, OSError):
448+
errlog = tempfile.NamedTemporaryFile(mode="w", suffix="-mcp-stderr.log", delete=False)
449+
logger.warning("MCP server stderr redirected to {path}", path=errlog.name)
450+
451+
stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params, errlog=errlog))
439452
return await self._initialize_session_with_transport(stdio_transport, f"stdio server (command: {self.command})")
440453

441454

integrations/mcp/tests/test_mcp_tool.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import io
12
import json
23
import os
4+
from unittest.mock import AsyncMock, MagicMock, patch
35

46
import pytest
57
from haystack.components.agents import Agent
@@ -13,6 +15,7 @@
1315
MCPTool,
1416
StdioServerInfo,
1517
)
18+
from haystack_integrations.tools.mcp.mcp_tool import StdioClient
1619

1720
from .mcp_memory_transport import InMemoryServerInfo
1821
from .mcp_servers_fixtures import calculator_mcp, echo_mcp
@@ -197,6 +200,42 @@ def test_mcp_tool_serde_with_state_mapping(self, mcp_tool_cleanup):
197200
assert new_tool._inputs_from_state == {"state_a": "a"}
198201
assert new_tool._outputs_to_state == {"result": {"source": "output"}}
199202

203+
@pytest.mark.asyncio
204+
@pytest.mark.parametrize(
205+
"fileno_side_effect,fileno_return_value,notebook_environment",
206+
[
207+
(io.UnsupportedOperation("fileno"), None, True),
208+
(None, 2, False),
209+
],
210+
)
211+
async def test_stdio_client_stderr_handling(self, fileno_side_effect, fileno_return_value, notebook_environment):
212+
"""Test that StdioClient uses sys.stderr in terminals and falls back to a file in notebooks."""
213+
client = StdioClient(command="echo", args=["hello"])
214+
215+
mock_stderr = MagicMock()
216+
mock_stderr.fileno.side_effect = fileno_side_effect
217+
mock_stderr.fileno.return_value = fileno_return_value
218+
219+
with (
220+
patch.object(client, "exit_stack") as mock_stack,
221+
patch("haystack_integrations.tools.mcp.mcp_tool.stdio_client") as mock_stdio_client,
222+
patch("haystack_integrations.tools.mcp.mcp_tool.sys") as mock_sys,
223+
patch.object(client, "_initialize_session_with_transport", new_callable=AsyncMock) as mock_init,
224+
):
225+
mock_sys.stderr = mock_stderr
226+
mock_stack.enter_async_context = AsyncMock(return_value=(MagicMock(), MagicMock()))
227+
mock_init.return_value = []
228+
229+
await client.connect()
230+
231+
_, kwargs = mock_stdio_client.call_args
232+
errlog = kwargs["errlog"]
233+
if notebook_environment:
234+
assert errlog is not mock_stderr
235+
assert hasattr(errlog, "write")
236+
else:
237+
assert errlog is mock_stderr
238+
200239
@pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set")
201240
@pytest.mark.integration
202241
def test_pipeline_warmup_with_mcp_tool(self):

integrations/openrouter/pyproject.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ select = [
8585
"ARG",
8686
"B",
8787
"C",
88+
"D102", # Missing docstring in public method
89+
"D103", # Missing docstring in public function
90+
"D205", # 1 blank line required between summary line and description
91+
"D209", # Closing triple quotes go to new line
92+
"D213", # summary lines must be positioned on the second physical line of the docstring
93+
"D417", # Missing argument descriptions in the docstring
94+
"D419", # Docstring is empty
8895
"DTZ",
8996
"E",
9097
"EM",
@@ -134,9 +141,9 @@ ban-relative-imports = "parents"
134141

135142
[tool.ruff.lint.per-file-ignores]
136143
# Tests can use magic values, assertions, and relative imports
137-
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
144+
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
138145
# Examples can print their output and don't need type annotations
139-
"examples/**/*" = ["T201", "ANN"]
146+
"examples/**/*" = ["D", "T201", "ANN"]
140147

141148
[tool.coverage.run]
142149
source = ["haystack_integrations"]

integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
class OpenRouterChatGenerator(OpenAIChatGenerator):
1919
"""
2020
Enables text generation using OpenRouter generative models.
21+
2122
For supported models, see [OpenRouter docs](https://openrouter.ai/models).
2223
2324
Users can pass any text generation parameters valid for the OpenRouter chat completion API
@@ -71,8 +72,7 @@ def __init__(
7172
http_client_kwargs: dict[str, Any] | None = None,
7273
) -> None:
7374
"""
74-
Creates an instance of OpenRouterChatGenerator. Unless specified otherwise,
75-
the default model is `openai/gpt-5-mini`.
75+
Creates an instance of OpenRouterChatGenerator.
7676
7777
:param api_key:
7878
The OpenRouter API key.

integrations/opensearch/pyproject.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ select = [
9494
"ARG",
9595
"B",
9696
"C",
97+
"D102", # Missing docstring in public method
98+
"D103", # Missing docstring in public function
99+
"D205", # 1 blank line required between summary line and description
100+
"D209", # Closing triple quotes go to new line
101+
"D213", # summary lines must be positioned on the second physical line of the docstring
102+
"D417", # Missing argument descriptions in the docstring
103+
"D419", # Docstring is empty
97104
"DTZ",
98105
"E",
99106
"EM",
@@ -145,7 +152,7 @@ ban-relative-imports = "parents"
145152

146153
[tool.ruff.lint.per-file-ignores]
147154
# Tests can use magic values, assertions, and relative imports
148-
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
155+
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
149156

150157
[tool.coverage.run]
151158
source = ["haystack_integrations"]

integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/open_search_hybrid_retriever.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ def __init__(
114114
**kwargs: Any,
115115
) -> None:
116116
"""
117-
Initialize the OpenSearchHybridRetriever, a super component to retrieve documents from OpenSearch using
118-
both embedding-based and keyword-based retrieval methods.
117+
Initialize the OpenSearchHybridRetriever using both embedding-based and keyword-based retrieval methods.
118+
119+
This is a super component to retrieve documents from OpenSearch using both retrieval methods.
119120
120121
We don't explicitly define all the init parameters of the components in the constructor, for each
121122
of the components, since that would be around 20+ parameters. Instead, we define the most important ones
@@ -242,7 +243,9 @@ def __init__(
242243

243244
if TYPE_CHECKING:
244245

245-
def warm_up(self) -> None: ...
246+
def warm_up(self) -> None:
247+
"""Warm up the underlying pipeline components."""
248+
...
246249

247250
def run(
248251
self,
@@ -251,7 +254,9 @@ def run(
251254
filters_embedding: dict[str, Any] | None = None,
252255
top_k_bm25: int | None = None,
253256
top_k_embedding: int | None = None,
254-
) -> dict[str, list[Document]]: ...
257+
) -> dict[str, list[Document]]:
258+
"""Run the hybrid retrieval pipeline and return retrieved documents."""
259+
...
255260

256261
def _create_pipeline(self, data: dict[str, Any]) -> Pipeline:
257262
"""
@@ -328,6 +333,7 @@ def to_dict(self) -> dict[str, Any]:
328333

329334
@classmethod
330335
def from_dict(cls, data: dict[str, Any]) -> "OpenSearchHybridRetriever":
336+
"""Deserialize an OpenSearchHybridRetriever from a dictionary."""
331337
# deserialize the document store
332338
doc_store = OpenSearchDocumentStore.from_dict(data["init_parameters"]["document_store"])
333339
data["init_parameters"]["document_store"] = doc_store

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/auth.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def _get_aws_session(
4141
) -> "boto3.Session":
4242
"""
4343
Creates an AWS Session with the given parameters.
44+
4445
Checks if the provided AWS credentials are valid and can be used to connect to AWS.
4546
4647
:param aws_access_key_id: AWS access key ID.

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ async def write_documents_async(
559559
def _deserialize_document(hit: dict[str, Any]) -> Document:
560560
"""
561561
Creates a Document from the search hit provided.
562+
562563
This is mostly useful in self.filter_documents().
563564
"""
564565
data = hit["_source"]
@@ -1482,6 +1483,7 @@ def _embedding_retrieval(
14821483
) -> list[Document]:
14831484
"""
14841485
Retrieves documents that are most similar to the query embedding using a vector similarity metric.
1486+
14851487
It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
14861488
14871489
This method is not meant to be part of the public interface of
@@ -1513,8 +1515,9 @@ async def _embedding_retrieval_async(
15131515
search_kwargs: dict[str, Any] | None = None,
15141516
) -> list[Document]:
15151517
"""
1516-
Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity
1517-
metric. It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
1518+
Asynchronously retrieves documents most similar to the query embedding using a vector similarity metric.
1519+
1520+
It uses the OpenSearch's Approximate k-Nearest Neighbors search algorithm.
15181521
15191522
This method is not meant to be part of the public interface of
15201523
`OpenSearchDocumentStore` nor called directly.
@@ -1641,8 +1644,7 @@ def _extract_distinct_counts_from_aggregations(
16411644

16421645
def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
16431646
"""
1644-
Returns the number of unique values for each specified metadata field of the documents
1645-
that match the provided filters.
1647+
Returns the number of unique values for each specified metadata field of the documents that match the filters.
16461648
16471649
:param filters: The filters to apply to count documents.
16481650
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
@@ -1685,8 +1687,7 @@ async def count_unique_metadata_by_filter_async(
16851687
self, filters: dict[str, Any], metadata_fields: list[str]
16861688
) -> dict[str, int]:
16871689
"""
1688-
Asynchronously returns the number of unique values for each specified metadata field of the documents
1689-
that match the provided filters.
1690+
Asynchronously returns the number of unique values for each specified metadata field matching the filters.
16901691
16911692
:param filters: The filters to apply to count documents.
16921693
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
@@ -1862,6 +1863,7 @@ def get_metadata_field_unique_values(
18621863
) -> tuple[list[str], dict[str, Any] | None]:
18631864
"""
18641865
Returns unique values for a metadata field, optionally filtered by a search term in the content.
1866+
18651867
Uses composite aggregations for proper pagination beyond 10k results.
18661868
18671869
:param metadata_field: The metadata field to get unique values for.
@@ -1927,6 +1929,7 @@ async def get_metadata_field_unique_values_async(
19271929
) -> tuple[list[str], dict[str, Any] | None]:
19281930
"""
19291931
Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.
1932+
19301933
Uses composite aggregations for proper pagination beyond 10k results.
19311934
19321935
:param metadata_field: The metadata field to get unique values for.

integrations/optimum/pyproject.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ select = [
108108
"ARG",
109109
"B",
110110
"C",
111+
"D102", # Missing docstring in public method
112+
"D103", # Missing docstring in public function
113+
"D205", # 1 blank line required between summary line and description
114+
"D209", # Closing triple quotes go to new line
115+
"D213", # summary lines must be positioned on the second physical line of the docstring
116+
"D417", # Missing argument descriptions in the docstring
117+
"D419", # Docstring is empty
111118
"DTZ",
112119
"E",
113120
"EM",
@@ -153,9 +160,9 @@ ban-relative-imports = "parents"
153160

154161
[tool.ruff.lint.per-file-ignores]
155162
# Tests can use magic values, assertions, and relative imports
156-
"tests/**/*" = ["PLR2004", "S101", "TID252", "ANN"]
163+
"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"]
157164
# Examples can print their output
158-
"examples/**" = ["T201"]
165+
"examples/**" = ["D", "T201"]
159166
"tests/**" = ["T201"]
160167

161168
[tool.coverage.run]

0 commit comments

Comments
 (0)