Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 71 additions & 15 deletions haystack/components/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,71 @@ def _get_run_method_params(instance: "Agent") -> set[str]:
return {name for name, p in sig.parameters.items() if p.kind != inspect.Parameter.VAR_KEYWORD}


def _select_tools_by_name(configured_tools: ToolsType, names: list[str]) -> list[Tool | Toolset]:
"""
Select configured tools by name for a single run.

Standalone Tools are kept when their name is requested. A Toolset that exposes a requested name is replaced by a
per-run `spawn()` (an isolated copy) with the requested names registered as its `_selected_tool_names`, so
dynamic toolsets such as SearchableToolset preserve their behavior (search/lazy-loading) over the selected subset
without mutating the shared, configured Toolset.

:param configured_tools: The tools configured on the Agent.
:param names: The requested tool names.
:returns: The selected standalone Tools and/or spawned, selection-scoped Toolsets.
:raises ValueError: If no tools were configured, or if any requested name is not a valid tool name.
"""
if not configured_tools:
raise ValueError("No tools were configured for the Agent at initialization.")

requested_names = set(names)
items: list[Tool | Toolset] = (
[configured_tools] if isinstance(configured_tools, Toolset) else list(configured_tools)
)

# Resolve selectable names per item. For Toolsets we use get_selectable_tools() so dynamic toolsets
# (e.g. SearchableToolset) offer their full catalog by name, not just the tools exposed by iteration.
selectable_per_item: list[tuple[Tool | Toolset, set[str]]] = []
valid_tool_names: set[str] = set()
for item in items:
item_names = {tool.name for tool in item.get_selectable_tools()} if isinstance(item, Toolset) else {item.name}
selectable_per_item.append((item, item_names))
valid_tool_names |= item_names

invalid_tool_names = requested_names - valid_tool_names
if invalid_tool_names:
raise ValueError(
f"The following tool names are not valid: {invalid_tool_names}. Valid tool names are: {valid_tool_names}."
)

selected: list[Tool | Toolset] = []
for item, item_names in selectable_per_item:
matched = requested_names & item_names
if not matched:
continue
if isinstance(item, Toolset):
# Apply the selection to a per-run copy so the shared, configured Toolset is never mutated.
spawned = item.spawn()
spawned._selected_tool_names = matched
selected.append(spawned)
else:
selected.append(item)
return selected


def _spawn_tools(tools: ToolsType) -> ToolsType:
"""
Return per-run copies of `tools`, replacing each Toolset with an isolated `spawn()` (Tools are passed through).

This isolates run-scoped Toolset state (e.g. a SearchableToolset's discovered tools and any active name
selection) so that concurrent runs sharing the same configured Toolset — such as parallel sub-agent tool calls
or concurrent requests against one Agent — don't corrupt each other.
"""
if isinstance(tools, Toolset):
return tools.spawn()
return [item.spawn() if isinstance(item, Toolset) else item for item in tools]


def _validate_prompt_message_blocks(user_prompt: str | None, system_prompt: str | None) -> None:
"""
Validate explicit Jinja2 message blocks in Agent prompts.
Expand Down Expand Up @@ -699,35 +764,26 @@ def _select_tools(self, tools: ToolsType | list[str] | None = None) -> ToolsType
or if any provided tool name is not valid.
:raises TypeError: If tools is not a list of Tool objects, a Toolset, or a list of tool names (strings).
"""
# Toolsets are spawned into per-run copies (see _spawn_tools / _select_tools_by_name) so concurrent runs
# sharing the same configured Toolset don't corrupt each other's run-scoped state.
if tools is None:
return self.tools
return _spawn_tools(self.tools)

if isinstance(tools, list) and all(isinstance(t, str) for t in tools):
if not self.tools:
raise ValueError("No tools were configured for the Agent at initialization.")
available_tools = flatten_tools_or_toolsets(self.tools)
selected_tool_names = cast(list[str], tools) # mypy thinks this could still be list[Tool] or Toolset
valid_tool_names = {tool.name for tool in available_tools}
invalid_tool_names = {name for name in selected_tool_names if name not in valid_tool_names}
if invalid_tool_names:
raise ValueError(
f"The following tool names are not valid: {invalid_tool_names}. "
f"Valid tool names are: {valid_tool_names}."
)
return [tool for tool in available_tools if tool.name in selected_tool_names]
return _select_tools_by_name(self.tools, cast(list[str], tools))

if isinstance(tools, Toolset):
# Per-run tools are not covered by the Agent's own warm_up(), so warm them up here.
# warm_up() is expected to be idempotent, so re-warming on every run is cheap.
warm_up_tools(tools)
return tools
return _spawn_tools(tools)

if isinstance(tools, list):
selected = cast(list[Tool | Toolset], tools) # mypy can't narrow the Union type from isinstance check
# Per-run tools are not covered by the Agent's own warm_up(), so warm them up here.
# warm_up() is expected to be idempotent, so re-warming on every run is cheap.
warm_up_tools(selected)
return selected
return _spawn_tools(selected)

raise TypeError(
"tools must be a list of Tool and/or Toolset objects, a Toolset, or a list of tool names (strings)."
Expand Down
59 changes: 53 additions & 6 deletions haystack/tools/searchable_toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import copy
from collections.abc import Iterator
from typing import TYPE_CHECKING, Annotated, Any

Expand Down Expand Up @@ -174,6 +175,19 @@ def warm_up(self) -> None:

self._is_warmed_up = True

def get_selectable_tools(self) -> list[Tool]:
"""
Return the full catalog of tools that can be selected by name.

Iteration only exposes the search tool plus already-discovered tools, but name-based selection can target
any tool in the catalog, so this returns the entire flattened catalog (warming up first if needed).

:returns: The flattened catalog of tools.
"""
if not self._is_warmed_up:
self.warm_up()
return list(self._catalog)

def clear(self) -> None:
"""
Clear all discovered tools.
Expand All @@ -183,6 +197,27 @@ def clear(self) -> None:
"""
self._discovered_tools.clear()

def spawn(self) -> "SearchableToolset":
"""
Return an isolated copy for a single run.

The copy shares the read-only catalog and BM25 index but gets fresh discovered tools and name selection,
plus a bootstrap search tool bound to the copy. This way concurrent runs sharing the same configured
SearchableToolset don't share discovered tools or collide on the active selection.

:returns: A run-scoped copy of this SearchableToolset.
"""
if not self._is_warmed_up:
self.warm_up()
new = copy.copy(self)
new._discovered_tools = {}
new._selected_tool_names = None
# Rebuild the bootstrap tool so its closure is bound to the copy's discovered tools / selection
# rather than the original's. The document store and catalog are read-only and stay shared.
if not self._passthrough:
new._bootstrap_tool = new._create_search_tool()
return new

def _create_search_tool(self) -> Tool:
"""Create the search_tools bootstrap tool."""

Expand Down Expand Up @@ -213,8 +248,15 @@ def search_tools(
"names/descriptions (e.g. 'route weather search')."
)

# Scope the search to the selected subset if active so that top_k applies within the selected tools
filters = None
if self._selected_tool_names is not None:
filters = {"field": "meta.tool_name", "operator": "in", "value": list(self._selected_tool_names)}

# at this point, the toolset has been warmed up, so self._document_store is not None
results = self._document_store.bm25_retrieval(query=tool_keywords, top_k=num_results) # type: ignore[union-attr]
results = self._document_store.bm25_retrieval( # type: ignore[union-attr]
query=tool_keywords, top_k=num_results, filters=filters
)

if not results:
return "No tools found matching these keywords. Try different keywords."
Expand Down Expand Up @@ -249,13 +291,18 @@ def search_tools(

return bootstrap_tool

def _is_selected(self, name: str) -> bool:
"""Whether a catalog tool name is allowed by the active `_selected_tool_names` filter (None means all)."""
return self._selected_tool_names is None or name in self._selected_tool_names

def __iter__(self) -> Iterator[Tool]:
"""
Iterate over available tools.

In passthrough mode, yields all catalog tools.
Otherwise, yields bootstrap tool + discovered tools.
Automatically calls warm_up() if needed to ensure bootstrap tool is available.
In passthrough mode, yields all catalog tools. Otherwise, yields the bootstrap search tool plus the
already-discovered tools. If `_selected_tool_names` is set, catalog/discovered tools are restricted to that
set, but the bootstrap search tool is always exposed so search keeps working over the selected subset.
Automatically calls warm_up() if needed to ensure the bootstrap tool is available.
"""
# Unlike base Toolset/MCPToolset, which expose a placeholder tool before warm_up, this toolset materializes
# everything (flattened catalog, bootstrap tool, passthrough decision) in warm_up.
Expand All @@ -264,11 +311,11 @@ def __iter__(self) -> Iterator[Tool]:
if not self._is_warmed_up:
self.warm_up()
if self._passthrough:
yield from self._catalog
yield from (tool for tool in self._catalog if self._is_selected(tool.name))
else:
if self._bootstrap_tool is not None:
yield self._bootstrap_tool
yield from self._discovered_tools.values()
yield from (tool for tool in self._discovered_tools.values() if self._is_selected(tool.name))

def __len__(self) -> int:
"""Return the number of currently available tools."""
Expand Down
89 changes: 70 additions & 19 deletions haystack/tools/toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import copy
from collections.abc import Iterator
from dataclasses import dataclass, field
from typing import Any
Expand Down Expand Up @@ -47,8 +48,8 @@ def subtract(a: Annotated[int, "first number"], b: Annotated[int, "second number
```

2. Base class for dynamic tool loading:
By subclassing Toolset, you can create implementations that dynamically load tools
from external sources like OpenAPI URLs, MCP servers, or other resources.
By subclassing Toolset, you can create implementations that dynamically load tools from external sources like
OpenAPI URLs, MCP servers, or other resources.

Example:
```python
Expand Down Expand Up @@ -94,15 +95,14 @@ def from_dict(cls, data):
agent = Agent(chat_generator=OpenAIChatGenerator(), tools=calculator_toolset)
```

Toolset implements the collection interface (__iter__, __contains__, __len__, __getitem__),
making it behave like a list of Tools. This makes it compatible with components that expect
iterable tools, such as Agent or Haystack chat generators.
Toolset implements the collection interface (__iter__, __contains__, __len__, __getitem__), making it behave like
a list of Tools. This makes it compatible with components that expect iterable tools, such as Agent or Haystack
chat generators.

When implementing a custom Toolset subclass for dynamic tool loading:
- Perform the dynamic loading in the __init__ method
- Override to_dict() and from_dict() methods if your tools are defined dynamically
- Serialize endpoint descriptors rather than tool instances if your tools
are loaded from external sources
- Serialize endpoint descriptors rather than tool instances if your tools are loaded from external sources
"""

# Use field() with default_factory to initialize the list
Expand All @@ -124,15 +124,56 @@ def __post_init__(self) -> None:
# Tracks whether warm_up() has already run so subsequent calls become a no-op.
self._is_warmed_up = False

# Optional per-run name filter. When set, iteration only yields tools whose name is in this set.
# None means no filtering. Set on a per-run spawn(), so it never leaks across runs.
self._selected_tool_names: set[str] | None = None

def __iter__(self) -> Iterator[Tool]:
"""
Return an iterator over the Tools in this Toolset.

This allows the Toolset to be used wherever a list of Tools is expected.
This allows the Toolset to be used wherever a list of Tools is expected. If a name filter is active,
only the tools whose names are in it are yielded.

:returns: An iterator yielding Tool instances
"""
return iter(self.tools)
for tool in self.tools:
if self._selected_tool_names is None or tool.name in self._selected_tool_names:
yield tool

def get_selectable_tools(self) -> list[Tool]:
"""
Return the full set of tools that can be selected by name, ignoring any active name filter.

This differs from iteration, which yields only the tools currently exposed (and respects the name filter).
Override this when a Toolset's iteration does not surface every selectable tool, so name-based selection
can still target the full set.

Warms up the Toolset first if needed, so lazily loaded tools (those a Toolset fetches in `warm_up()`)
are available for selection.

:returns: The list of tools available for name-based selection.
"""
if not self._is_warmed_up:
self.warm_up()
return list(self.tools)

def spawn(self) -> "Toolset":
"""
Return an isolated copy of this Toolset for a single run.

The copy shares this Toolset's read-only state (its tools and any warmed-up resources) but gets fresh
run-scoped state, so concurrent runs that share the same configured Toolset don't corrupt each other (for
example, one run's name selection leaking into another). Warms up first if needed so the copy shares the
warmed state. Subclasses with additional run-scoped state should override this.

:returns: A run-scoped copy of this Toolset.
"""
if not self._is_warmed_up:
self.warm_up()
new = copy.copy(self)
new._selected_tool_names = None
return new

def __contains__(self, item: str | Tool) -> bool:
"""
Expand All @@ -146,9 +187,9 @@ def __contains__(self, item: str | Tool) -> bool:
:returns: True if contained, False otherwise
"""
if isinstance(item, str):
return any(tool.name == item for tool in self.tools)
return any(tool.name == item for tool in self)
if isinstance(item, Tool):
return item in self.tools
return any(tool is item or tool == item for tool in self)
return False

def warm_up(self) -> None:
Expand Down Expand Up @@ -281,20 +322,20 @@ def __add__(self, other: "Tool | Toolset | list[Tool]") -> "Toolset":

def __len__(self) -> int:
"""
Return the number of Tools in this Toolset.
Return the number of Tools in this Toolset (respecting any active name filter).

:returns: Number of Tools
"""
return len(self.tools)
return sum(1 for _ in self)

def __getitem__(self, index: int) -> Tool:
"""
Get a Tool by index.
Get a Tool by index (respecting any active name filter).

:param index: Index of the Tool to get
:returns: The Tool at the specified index
"""
return self.tools[index]
return list(self)[index]


class _ToolsetWrapper(Toolset):
Expand All @@ -312,9 +353,19 @@ def __init__(self, toolsets: list[Toolset]) -> None:
self._is_warmed_up = False

def __iter__(self) -> Iterator[Tool]:
"""Iterate over all tools from all toolsets."""
"""Iterate over all tools from all toolsets, honoring any active name filter."""
for toolset in self.toolsets:
yield from toolset
for tool in toolset:
if self._selected_tool_names is None or tool.name in self._selected_tool_names:
yield tool

def get_selectable_tools(self) -> list[Tool]:
"""Return every selectable tool across all wrapped toolsets, ignoring any active filter."""
return [tool for toolset in self.toolsets for tool in toolset.get_selectable_tools()]

def spawn(self) -> "_ToolsetWrapper":
"""Return an isolated copy with each wrapped toolset spawned."""
return _ToolsetWrapper([toolset.spawn() for toolset in self.toolsets])

def __contains__(self, item: Any) -> bool:
"""Check if a tool is in any of the toolsets."""
Expand Down Expand Up @@ -371,8 +422,8 @@ def from_dict(cls, data: dict[str, Any]) -> "_ToolsetWrapper":
return cls(toolsets=toolsets)

def __len__(self) -> int:
"""Return total number of tools across all toolsets."""
return sum(len(toolset) for toolset in self.toolsets)
"""Return total number of tools across all toolsets (respecting any active name filter)."""
return sum(1 for _ in self)

def __getitem__(self, index: int) -> Tool:
"""Get a tool by index across all toolsets."""
Expand Down
Loading
Loading