Skip to content

Commit 496c92a

Browse files
sjrljulian-risch
andauthored
fix: Fix usage of SearchableToolset with Agent when selecting a subset of tools to be active (#11564)
Co-authored-by: Julian Risch <julian.risch@deepset.ai>
1 parent 4c021fa commit 496c92a

8 files changed

Lines changed: 587 additions & 71 deletions

File tree

haystack/components/agents/agent.py

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,71 @@ def _get_run_method_params(instance: "Agent") -> set[str]:
127127
return {name for name, p in sig.parameters.items() if p.kind != inspect.Parameter.VAR_KEYWORD}
128128

129129

130+
def _select_tools_by_name(configured_tools: ToolsType, names: list[str]) -> list[Tool | Toolset]:
131+
"""
132+
Select configured tools by name for a single run.
133+
134+
Standalone Tools are kept when their name is requested. A Toolset that exposes a requested name is replaced by a
135+
per-run `spawn()` (an isolated copy) with the requested names registered as its `_selected_tool_names`, so
136+
dynamic toolsets such as SearchableToolset preserve their behavior (search/lazy-loading) over the selected subset
137+
without mutating the shared, configured Toolset.
138+
139+
:param configured_tools: The tools configured on the Agent.
140+
:param names: The requested tool names.
141+
:returns: The selected standalone Tools and/or spawned, selection-scoped Toolsets.
142+
:raises ValueError: If no tools were configured, or if any requested name is not a valid tool name.
143+
"""
144+
if not configured_tools:
145+
raise ValueError("No tools were configured for the Agent at initialization.")
146+
147+
requested_names = set(names)
148+
items: list[Tool | Toolset] = (
149+
[configured_tools] if isinstance(configured_tools, Toolset) else list(configured_tools)
150+
)
151+
152+
# Resolve selectable names per item. For Toolsets we use get_selectable_tools() so dynamic toolsets
153+
# (e.g. SearchableToolset) offer their full catalog by name, not just the tools exposed by iteration.
154+
selectable_per_item: list[tuple[Tool | Toolset, set[str]]] = []
155+
valid_tool_names: set[str] = set()
156+
for item in items:
157+
item_names = {tool.name for tool in item.get_selectable_tools()} if isinstance(item, Toolset) else {item.name}
158+
selectable_per_item.append((item, item_names))
159+
valid_tool_names |= item_names
160+
161+
invalid_tool_names = requested_names - valid_tool_names
162+
if invalid_tool_names:
163+
raise ValueError(
164+
f"The following tool names are not valid: {invalid_tool_names}. Valid tool names are: {valid_tool_names}."
165+
)
166+
167+
selected: list[Tool | Toolset] = []
168+
for item, item_names in selectable_per_item:
169+
matched = requested_names & item_names
170+
if not matched:
171+
continue
172+
if isinstance(item, Toolset):
173+
# Apply the selection to a per-run copy so the shared, configured Toolset is never mutated.
174+
spawned = item.spawn()
175+
spawned._selected_tool_names = matched
176+
selected.append(spawned)
177+
else:
178+
selected.append(item)
179+
return selected
180+
181+
182+
def _spawn_tools(tools: ToolsType) -> ToolsType:
183+
"""
184+
Return per-run copies of `tools`, replacing each Toolset with an isolated `spawn()` (Tools are passed through).
185+
186+
This isolates run-scoped Toolset state (e.g. a SearchableToolset's discovered tools and any active name
187+
selection) so that concurrent runs sharing the same configured Toolset — such as parallel sub-agent tool calls
188+
or concurrent requests against one Agent — don't corrupt each other.
189+
"""
190+
if isinstance(tools, Toolset):
191+
return tools.spawn()
192+
return [item.spawn() if isinstance(item, Toolset) else item for item in tools]
193+
194+
130195
def _validate_prompt_message_blocks(user_prompt: str | None, system_prompt: str | None) -> None:
131196
"""
132197
Validate explicit Jinja2 message blocks in Agent prompts.
@@ -699,35 +764,26 @@ def _select_tools(self, tools: ToolsType | list[str] | None = None) -> ToolsType
699764
or if any provided tool name is not valid.
700765
:raises TypeError: If tools is not a list of Tool objects, a Toolset, or a list of tool names (strings).
701766
"""
767+
# Toolsets are spawned into per-run copies (see _spawn_tools / _select_tools_by_name) so concurrent runs
768+
# sharing the same configured Toolset don't corrupt each other's run-scoped state.
702769
if tools is None:
703-
return self.tools
770+
return _spawn_tools(self.tools)
704771

705772
if isinstance(tools, list) and all(isinstance(t, str) for t in tools):
706-
if not self.tools:
707-
raise ValueError("No tools were configured for the Agent at initialization.")
708-
available_tools = flatten_tools_or_toolsets(self.tools)
709-
selected_tool_names = cast(list[str], tools) # mypy thinks this could still be list[Tool] or Toolset
710-
valid_tool_names = {tool.name for tool in available_tools}
711-
invalid_tool_names = {name for name in selected_tool_names if name not in valid_tool_names}
712-
if invalid_tool_names:
713-
raise ValueError(
714-
f"The following tool names are not valid: {invalid_tool_names}. "
715-
f"Valid tool names are: {valid_tool_names}."
716-
)
717-
return [tool for tool in available_tools if tool.name in selected_tool_names]
773+
return _select_tools_by_name(self.tools, cast(list[str], tools))
718774

719775
if isinstance(tools, Toolset):
720776
# Per-run tools are not covered by the Agent's own warm_up(), so warm them up here.
721777
# warm_up() is expected to be idempotent, so re-warming on every run is cheap.
722778
warm_up_tools(tools)
723-
return tools
779+
return _spawn_tools(tools)
724780

725781
if isinstance(tools, list):
726782
selected = cast(list[Tool | Toolset], tools) # mypy can't narrow the Union type from isinstance check
727783
# Per-run tools are not covered by the Agent's own warm_up(), so warm them up here.
728784
# warm_up() is expected to be idempotent, so re-warming on every run is cheap.
729785
warm_up_tools(selected)
730-
return selected
786+
return _spawn_tools(selected)
731787

732788
raise TypeError(
733789
"tools must be a list of Tool and/or Toolset objects, a Toolset, or a list of tool names (strings)."

haystack/tools/searchable_toolset.py

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import copy
56
from collections.abc import Iterator
67
from typing import TYPE_CHECKING, Annotated, Any
78

@@ -177,6 +178,19 @@ def warm_up(self) -> None:
177178

178179
self._is_warmed_up = True
179180

181+
def get_selectable_tools(self) -> list[Tool]:
182+
"""
183+
Return the full catalog of tools that can be selected by name.
184+
185+
Iteration only exposes the search tool plus already-discovered tools, but name-based selection can target
186+
any tool in the catalog, so this returns the entire flattened catalog (warming up first if needed).
187+
188+
:returns: The flattened catalog of tools.
189+
"""
190+
if not self._is_warmed_up:
191+
self.warm_up()
192+
return list(self._catalog)
193+
180194
def clear(self) -> None:
181195
"""
182196
Clear all discovered tools.
@@ -186,6 +200,27 @@ def clear(self) -> None:
186200
"""
187201
self._discovered_tools.clear()
188202

203+
def spawn(self) -> "SearchableToolset":
204+
"""
205+
Return an isolated copy for a single run.
206+
207+
The copy shares the read-only catalog and BM25 index but gets fresh discovered tools and name selection,
208+
plus a bootstrap search tool bound to the copy. This way concurrent runs sharing the same configured
209+
SearchableToolset don't share discovered tools or collide on the active selection.
210+
211+
:returns: A run-scoped copy of this SearchableToolset.
212+
"""
213+
if not self._is_warmed_up:
214+
self.warm_up()
215+
new = copy.copy(self)
216+
new._discovered_tools = {}
217+
new._selected_tool_names = None
218+
# Rebuild the bootstrap tool so its closure is bound to the copy's discovered tools / selection
219+
# rather than the original's. The document store and catalog are read-only and stay shared.
220+
if not self._passthrough:
221+
new._bootstrap_tool = new._create_search_tool()
222+
return new
223+
189224
def _create_search_tool(self) -> Tool:
190225
"""Create the search_tools bootstrap tool."""
191226

@@ -216,8 +251,15 @@ def search_tools(
216251
"names/descriptions (e.g. 'route weather search')."
217252
)
218253

254+
# Scope the search to the selected subset if active so that top_k applies within the selected tools
255+
filters = None
256+
if self._selected_tool_names is not None:
257+
filters = {"field": "meta.tool_name", "operator": "in", "value": list(self._selected_tool_names)}
258+
219259
# at this point, the toolset has been warmed up, so self._document_store is not None
220-
results = self._document_store.bm25_retrieval(query=tool_keywords, top_k=num_results) # type: ignore[union-attr]
260+
results = self._document_store.bm25_retrieval( # type: ignore[union-attr]
261+
query=tool_keywords, top_k=num_results, filters=filters
262+
)
221263

222264
if not results:
223265
return "No tools found matching these keywords. Try different keywords."
@@ -252,13 +294,18 @@ def search_tools(
252294

253295
return bootstrap_tool
254296

297+
def _is_selected(self, name: str) -> bool:
298+
"""Whether a catalog tool name is allowed by the active `_selected_tool_names` filter (None means all)."""
299+
return self._selected_tool_names is None or name in self._selected_tool_names
300+
255301
def __iter__(self) -> Iterator[Tool]:
256302
"""
257303
Iterate over available tools.
258304
259-
In passthrough mode, yields all catalog tools.
260-
Otherwise, yields bootstrap tool + discovered tools.
261-
Automatically calls warm_up() if needed to ensure bootstrap tool is available.
305+
In passthrough mode, yields all catalog tools. Otherwise, yields the bootstrap search tool plus the
306+
already-discovered tools. If `_selected_tool_names` is set, catalog/discovered tools are restricted to that
307+
set, but the bootstrap search tool is always exposed so search keeps working over the selected subset.
308+
Automatically calls warm_up() if needed to ensure the bootstrap tool is available.
262309
"""
263310
# Unlike base Toolset/MCPToolset, which expose a placeholder tool before warm_up, this toolset materializes
264311
# everything (flattened catalog, bootstrap tool, passthrough decision) in warm_up.
@@ -267,11 +314,11 @@ def __iter__(self) -> Iterator[Tool]:
267314
if not self._is_warmed_up:
268315
self.warm_up()
269316
if self._passthrough:
270-
yield from self._catalog
317+
yield from (tool for tool in self._catalog if self._is_selected(tool.name))
271318
else:
272319
if self._bootstrap_tool is not None:
273320
yield self._bootstrap_tool
274-
yield from self._discovered_tools.values()
321+
yield from (tool for tool in self._discovered_tools.values() if self._is_selected(tool.name))
275322

276323
def __len__(self) -> int:
277324
"""Return the number of currently available tools."""

haystack/tools/toolset.py

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import copy
56
from collections.abc import Iterator
67
from dataclasses import dataclass, field
78
from typing import Any
@@ -47,8 +48,8 @@ def subtract(a: Annotated[int, "first number"], b: Annotated[int, "second number
4748
```
4849
4950
2. Base class for dynamic tool loading:
50-
By subclassing Toolset, you can create implementations that dynamically load tools
51-
from external sources like OpenAPI URLs, MCP servers, or other resources.
51+
By subclassing Toolset, you can create implementations that dynamically load tools from external sources like
52+
OpenAPI URLs, MCP servers, or other resources.
5253
5354
Example:
5455
```python
@@ -94,15 +95,14 @@ def from_dict(cls, data):
9495
agent = Agent(chat_generator=OpenAIChatGenerator(), tools=calculator_toolset)
9596
```
9697
97-
Toolset implements the collection interface (__iter__, __contains__, __len__, __getitem__),
98-
making it behave like a list of Tools. This makes it compatible with components that expect
99-
iterable tools, such as Agent or Haystack chat generators.
98+
Toolset implements the collection interface (__iter__, __contains__, __len__, __getitem__), making it behave like
99+
a list of Tools. This makes it compatible with components that expect iterable tools, such as Agent or Haystack
100+
chat generators.
100101
101102
When implementing a custom Toolset subclass for dynamic tool loading:
102103
- Perform the dynamic loading in the __init__ method
103104
- Override to_dict() and from_dict() methods if your tools are defined dynamically
104-
- Serialize endpoint descriptors rather than tool instances if your tools
105-
are loaded from external sources
105+
- Serialize endpoint descriptors rather than tool instances if your tools are loaded from external sources
106106
"""
107107

108108
# Use field() with default_factory to initialize the list
@@ -124,15 +124,56 @@ def __post_init__(self) -> None:
124124
# Tracks whether warm_up() has already run so subsequent calls become a no-op.
125125
self._is_warmed_up = False
126126

127+
# Optional per-run name filter. When set, iteration only yields tools whose name is in this set.
128+
# None means no filtering. Set on a per-run spawn(), so it never leaks across runs.
129+
self._selected_tool_names: set[str] | None = None
130+
127131
def __iter__(self) -> Iterator[Tool]:
128132
"""
129133
Return an iterator over the Tools in this Toolset.
130134
131-
This allows the Toolset to be used wherever a list of Tools is expected.
135+
This allows the Toolset to be used wherever a list of Tools is expected. If a name filter is active,
136+
only the tools whose names are in it are yielded.
132137
133138
:returns: An iterator yielding Tool instances
134139
"""
135-
return iter(self.tools)
140+
for tool in self.tools:
141+
if self._selected_tool_names is None or tool.name in self._selected_tool_names:
142+
yield tool
143+
144+
def get_selectable_tools(self) -> list[Tool]:
145+
"""
146+
Return the full set of tools that can be selected by name, ignoring any active name filter.
147+
148+
This differs from iteration, which yields only the tools currently exposed (and respects the name filter).
149+
Override this when a Toolset's iteration does not surface every selectable tool, so name-based selection
150+
can still target the full set.
151+
152+
Warms up the Toolset first if needed, so lazily loaded tools (those a Toolset fetches in `warm_up()`)
153+
are available for selection.
154+
155+
:returns: The list of tools available for name-based selection.
156+
"""
157+
if not self._is_warmed_up:
158+
self.warm_up()
159+
return list(self.tools)
160+
161+
def spawn(self) -> "Toolset":
162+
"""
163+
Return an isolated copy of this Toolset for a single run.
164+
165+
The copy shares this Toolset's read-only state (its tools and any warmed-up resources) but gets fresh
166+
run-scoped state, so concurrent runs that share the same configured Toolset don't corrupt each other (for
167+
example, one run's name selection leaking into another). Warms up first if needed so the copy shares the
168+
warmed state. Subclasses with additional run-scoped state should override this.
169+
170+
:returns: A run-scoped copy of this Toolset.
171+
"""
172+
if not self._is_warmed_up:
173+
self.warm_up()
174+
new = copy.copy(self)
175+
new._selected_tool_names = None
176+
return new
136177

137178
def __contains__(self, item: str | Tool) -> bool:
138179
"""
@@ -146,9 +187,9 @@ def __contains__(self, item: str | Tool) -> bool:
146187
:returns: True if contained, False otherwise
147188
"""
148189
if isinstance(item, str):
149-
return any(tool.name == item for tool in self.tools)
190+
return any(tool.name == item for tool in self)
150191
if isinstance(item, Tool):
151-
return item in self.tools
192+
return any(tool is item or tool == item for tool in self)
152193
return False
153194

154195
def warm_up(self) -> None:
@@ -281,20 +322,20 @@ def __add__(self, other: "Tool | Toolset | list[Tool]") -> "Toolset":
281322

282323
def __len__(self) -> int:
283324
"""
284-
Return the number of Tools in this Toolset.
325+
Return the number of Tools in this Toolset (respecting any active name filter).
285326
286327
:returns: Number of Tools
287328
"""
288-
return len(self.tools)
329+
return sum(1 for _ in self)
289330

290331
def __getitem__(self, index: int) -> Tool:
291332
"""
292-
Get a Tool by index.
333+
Get a Tool by index (respecting any active name filter).
293334
294335
:param index: Index of the Tool to get
295336
:returns: The Tool at the specified index
296337
"""
297-
return self.tools[index]
338+
return list(self)[index]
298339

299340

300341
class _ToolsetWrapper(Toolset):
@@ -312,9 +353,19 @@ def __init__(self, toolsets: list[Toolset]) -> None:
312353
self._is_warmed_up = False
313354

314355
def __iter__(self) -> Iterator[Tool]:
315-
"""Iterate over all tools from all toolsets."""
356+
"""Iterate over all tools from all toolsets, honoring any active name filter."""
316357
for toolset in self.toolsets:
317-
yield from toolset
358+
for tool in toolset:
359+
if self._selected_tool_names is None or tool.name in self._selected_tool_names:
360+
yield tool
361+
362+
def get_selectable_tools(self) -> list[Tool]:
363+
"""Return every selectable tool across all wrapped toolsets, ignoring any active filter."""
364+
return [tool for toolset in self.toolsets for tool in toolset.get_selectable_tools()]
365+
366+
def spawn(self) -> "_ToolsetWrapper":
367+
"""Return an isolated copy with each wrapped toolset spawned."""
368+
return _ToolsetWrapper([toolset.spawn() for toolset in self.toolsets])
318369

319370
def __contains__(self, item: Any) -> bool:
320371
"""Check if a tool is in any of the toolsets."""
@@ -371,8 +422,8 @@ def from_dict(cls, data: dict[str, Any]) -> "_ToolsetWrapper":
371422
return cls(toolsets=toolsets)
372423

373424
def __len__(self) -> int:
374-
"""Return total number of tools across all toolsets."""
375-
return sum(len(toolset) for toolset in self.toolsets)
425+
"""Return total number of tools across all toolsets (respecting any active name filter)."""
426+
return sum(1 for _ in self)
376427

377428
def __getitem__(self, index: int) -> Tool:
378429
"""Get a tool by index across all toolsets."""

0 commit comments

Comments
 (0)