Skip to content

Commit 2eb724f

Browse files
authored
🐛 Bugfix: knowledge_base_search_tool called with TypeError: argument of type 'FieldInfo' is not iterable (#3259)
1 parent 35fce96 commit 2eb724f

4 files changed

Lines changed: 207 additions & 7 deletions

File tree

sdk/nexent/core/agents/nexent_agent.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,11 +198,16 @@ def create_local_tool(self, tool_config: ToolConfig):
198198
raise ValueError(f"{class_name} not found in local")
199199
else:
200200
if class_name == "KnowledgeBaseSearchTool":
201-
# Filter out conflicting parameters from params to avoid conflicts
202-
# These parameters have exclude=True and cannot be passed to __init__
203-
# due to smolagents.tools.Tool wrapper restrictions
201+
# Filter out conflicting parameters from params to avoid conflicts.
202+
# Parameters declared with exclude=True cannot be passed to __init__
203+
# due to smolagents.tools.Tool wrapper restrictions; they are set as
204+
# attributes on the instance after construction, sourced from metadata.
205+
# `document_paths` is intentionally hidden from the LLM and only
206+
# populated via tool_params from the northbound interface.
204207
filtered_params = {k: v for k, v in params.items()
205-
if k not in ["vdb_core", "embedding_model", "observer", "rerank_model", "display_name_to_index_map"]}
208+
if k not in ["vdb_core", "embedding_model", "observer",
209+
"rerank_model", "display_name_to_index_map",
210+
"document_paths"]}
206211
# Create instance with only non-excluded parameters
207212
tools_obj = tool_class(**filtered_params)
208213
# Set excluded parameters directly as attributes after instantiation
@@ -216,6 +221,13 @@ def create_local_tool(self, tool_config: ToolConfig):
216221
"rerank_model", None) if tool_config.metadata else None
217222
tools_obj.display_name_to_index_map = tool_config.metadata.get(
218223
"display_name_to_index_map", {}) if tool_config.metadata else {}
224+
# Internal access control: restrict results to documents whose
225+
# path_or_url is in the allow list. Only the northbound interface
226+
# may populate this; never the LLM.
227+
tools_obj.set_document_paths(
228+
tool_config.metadata.get(
229+
"document_paths") if tool_config.metadata else None
230+
)
219231
elif class_name in ["DifySearchTool", "DataMateSearchTool"]:
220232
# These parameters have exclude=True and cannot be passed to __init__
221233
filtered_params = {k: v for k, v in params.items()

sdk/nexent/core/tools/knowledge_base_search_tool.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,21 @@
2121
logger = logging.getLogger("knowledge_base_search_tool")
2222

2323

24+
def _unwrap_field_info(value):
25+
"""Resolve a value that may be wrapped in a Pydantic FieldInfo.
26+
27+
Parameters declared with `Field(...)` and `exclude=True` are not expanded by
28+
smolagents' Tool wrapper, so they arrive at `__init__` as raw FieldInfo
29+
instances instead of their declared defaults. This helper extracts the
30+
concrete value so callers can safely treat the result as plain data.
31+
"""
32+
if isinstance(value, FieldInfo):
33+
if value.default_factory is not None:
34+
return value.default_factory()
35+
return value.default
36+
return value
37+
38+
2439
class KnowledgeBaseSearchTool(Tool):
2540
"""Knowledge base search tool"""
2641

@@ -129,7 +144,10 @@ def __init__(
129144
self.rerank_model = rerank_model
130145
self.data_process_service = os.getenv("DATA_PROCESS_SERVICE")
131146
self.display_name_to_index_map = display_name_to_index_map
132-
self._internal_document_paths = document_paths
147+
# `document_paths` is declared with `exclude=True` so smolagents passes the
148+
# raw FieldInfo default when no value is supplied. Unwrap it here so the
149+
# internal filter is always a concrete list (or None), never a FieldInfo.
150+
self._internal_document_paths = _unwrap_field_info(document_paths)
133151

134152
self.record_ops = 1
135153
self.running_prompt_zh = "知识库检索中..."
@@ -144,7 +162,7 @@ def set_document_paths(self, document_paths: Optional[List[str]]) -> None:
144162
Args:
145163
document_paths: List of allowed document path_or_urls. If None, no filtering is applied.
146164
"""
147-
self._internal_document_paths = document_paths
165+
self._internal_document_paths = _unwrap_field_info(document_paths)
148166

149167
def _convert_to_index_names(self, names: List[str]) -> List[str]:
150168
"""Convert display names (knowledge_name) to index names if necessary.
@@ -188,7 +206,7 @@ def _filter_by_document_paths(self, results: List[dict]) -> List[dict]:
188206
Returns:
189207
Filtered list containing only results with allowed document paths
190208
"""
191-
allowed_paths = self._internal_document_paths
209+
allowed_paths = _unwrap_field_info(self._internal_document_paths)
192210
if not allowed_paths:
193211
return results
194212

test/sdk/core/agents/test_nexent_agent.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,88 @@ def test_create_local_tool_knowledge_base_with_display_name_map(nexent_agent_ins
939939
assert result.rerank_model == "mock_rerank_model"
940940

941941

942+
def test_create_local_tool_knowledge_base_with_document_paths_from_metadata(nexent_agent_instance):
943+
"""KnowledgeBaseSearchTool should receive document_paths from metadata via set_document_paths.
944+
945+
The `document_paths` parameter is declared with `exclude=True` so it must not
946+
be passed to __init__. Instead it must be forwarded to `set_document_paths`
947+
on the instance, sourced from `tool_config.metadata`. This guards against
948+
the FieldInfo-iteration regression reported when document_paths is unset.
949+
"""
950+
mock_kb_tool_class = MagicMock()
951+
mock_kb_tool_instance = MagicMock()
952+
mock_kb_tool_class.return_value = mock_kb_tool_instance
953+
954+
document_paths = ["s3://bucket/doc1.txt", "s3://bucket/doc2.txt"]
955+
956+
tool_config = ToolConfig(
957+
class_name="KnowledgeBaseSearchTool",
958+
name="knowledge_base_search",
959+
description="desc",
960+
inputs="{}",
961+
output_type="string",
962+
params={"top_k": 5, "index_names": ["kb1"]},
963+
source="local",
964+
metadata={
965+
"vdb_core": "mock_vdb_core",
966+
"embedding_model": "mock_embedding_model",
967+
"document_paths": document_paths,
968+
},
969+
)
970+
971+
original_value = nexent_agent.__dict__.get("KnowledgeBaseSearchTool")
972+
nexent_agent.__dict__["KnowledgeBaseSearchTool"] = mock_kb_tool_class
973+
974+
try:
975+
nexent_agent_instance.create_local_tool(tool_config)
976+
finally:
977+
if original_value is not None:
978+
nexent_agent.__dict__["KnowledgeBaseSearchTool"] = original_value
979+
elif "KnowledgeBaseSearchTool" in nexent_agent.__dict__:
980+
del nexent_agent.__dict__["KnowledgeBaseSearchTool"]
981+
982+
# document_paths is excluded and must not be forwarded to __init__.
983+
init_kwargs = mock_kb_tool_class.call_args.kwargs
984+
assert "document_paths" not in init_kwargs
985+
# It must instead be applied via set_document_paths on the instance.
986+
mock_kb_tool_instance.set_document_paths.assert_called_once_with(document_paths)
987+
988+
989+
def test_create_local_tool_knowledge_base_without_metadata_calls_set_document_paths_none(nexent_agent_instance):
990+
"""When metadata lacks document_paths, set_document_paths(None) must still be invoked.
991+
992+
Ensures the tool's internal filter is explicitly reset to None rather than
993+
left as a stale FieldInfo default from the smolagents wrapper.
994+
"""
995+
mock_kb_tool_class = MagicMock()
996+
mock_kb_tool_instance = MagicMock()
997+
mock_kb_tool_class.return_value = mock_kb_tool_instance
998+
999+
tool_config = ToolConfig(
1000+
class_name="KnowledgeBaseSearchTool",
1001+
name="knowledge_base_search",
1002+
description="desc",
1003+
inputs="{}",
1004+
output_type="string",
1005+
params={"top_k": 5, "index_names": ["kb1"]},
1006+
source="local",
1007+
metadata=None,
1008+
)
1009+
1010+
original_value = nexent_agent.__dict__.get("KnowledgeBaseSearchTool")
1011+
nexent_agent.__dict__["KnowledgeBaseSearchTool"] = mock_kb_tool_class
1012+
1013+
try:
1014+
nexent_agent_instance.create_local_tool(tool_config)
1015+
finally:
1016+
if original_value is not None:
1017+
nexent_agent.__dict__["KnowledgeBaseSearchTool"] = original_value
1018+
elif "KnowledgeBaseSearchTool" in nexent_agent.__dict__:
1019+
del nexent_agent.__dict__["KnowledgeBaseSearchTool"]
1020+
1021+
mock_kb_tool_instance.set_document_paths.assert_called_once_with(None)
1022+
1023+
9421024
def test_create_local_tool_knowledge_base_with_empty_display_name_map(nexent_agent_instance):
9431025
"""Test KnowledgeBaseSearchTool creation handles empty display_name_to_index_map."""
9441026
mock_kb_tool_class = MagicMock()

test/sdk/core/tools/test_knowledge_base_search_tool.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,3 +1776,91 @@ def test_forward_with_document_paths_filter_no_results_after_filter(self, mock_v
17761776

17771777
assert "No results found" in str(excinfo.value)
17781778

1779+
def test_filter_by_document_paths_unwraps_fieldinfo_default(self, mock_vdb_core, mock_embedding_model):
1780+
"""Filter should tolerate a FieldInfo default instead of a concrete list.
1781+
1782+
Regression: smolagents' Tool wrapper does not expand FieldInfo defaults for
1783+
parameters declared with `exclude=True`, so `self._internal_document_paths`
1784+
may arrive as a FieldInfo. The filter must unwrap it instead of failing with
1785+
`TypeError: argument of type 'FieldInfo' is not iterable`.
1786+
"""
1787+
try:
1788+
from pydantic import FieldInfo
1789+
except ImportError:
1790+
from pydantic.fields import FieldInfo
1791+
1792+
field_info_default = FieldInfo(default=["s3://bucket/doc1.txt"])
1793+
1794+
tool = KnowledgeBaseSearchTool(
1795+
index_names=["kb1"],
1796+
search_mode="hybrid",
1797+
vdb_core=mock_vdb_core,
1798+
embedding_model=mock_embedding_model,
1799+
document_paths=None,
1800+
)
1801+
# Simulate a FieldInfo being assigned directly (e.g. from smolagents wrapper).
1802+
tool._internal_document_paths = field_info_default
1803+
1804+
results = self._create_mock_formatted_results_with_paths(
1805+
["s3://bucket/doc1.txt", "s3://bucket/doc2.txt"]
1806+
)
1807+
filtered = tool._filter_by_document_paths(results)
1808+
1809+
assert len(filtered) == 1
1810+
assert filtered[0]["path_or_url"] == "s3://bucket/doc1.txt"
1811+
1812+
def test_filter_by_document_paths_unwraps_fieldinfo_default_factory(self, mock_vdb_core, mock_embedding_model):
1813+
"""Filter should tolerate a FieldInfo with default_factory."""
1814+
try:
1815+
from pydantic import FieldInfo
1816+
except ImportError:
1817+
from pydantic.fields import FieldInfo
1818+
1819+
field_info_factory = FieldInfo(
1820+
default_factory=lambda: ["s3://bucket/doc2.txt"]
1821+
)
1822+
1823+
tool = KnowledgeBaseSearchTool(
1824+
index_names=["kb1"],
1825+
search_mode="hybrid",
1826+
vdb_core=mock_vdb_core,
1827+
embedding_model=mock_embedding_model,
1828+
document_paths=None,
1829+
)
1830+
tool._internal_document_paths = field_info_factory
1831+
1832+
results = self._create_mock_formatted_results_with_paths(
1833+
["s3://bucket/doc1.txt", "s3://bucket/doc2.txt"]
1834+
)
1835+
filtered = tool._filter_by_document_paths(results)
1836+
1837+
assert len(filtered) == 1
1838+
assert filtered[0]["path_or_url"] == "s3://bucket/doc2.txt"
1839+
1840+
def test_set_document_paths_unwraps_fieldinfo(self, mock_vdb_core, mock_embedding_model):
1841+
"""set_document_paths should also accept FieldInfo input defensively."""
1842+
try:
1843+
from pydantic import FieldInfo
1844+
except ImportError:
1845+
from pydantic.fields import FieldInfo
1846+
1847+
tool = KnowledgeBaseSearchTool(
1848+
index_names=["kb1"],
1849+
search_mode="hybrid",
1850+
vdb_core=mock_vdb_core,
1851+
embedding_model=mock_embedding_model,
1852+
document_paths=None,
1853+
)
1854+
1855+
field_info = FieldInfo(default=["s3://bucket/doc1.txt"])
1856+
tool.set_document_paths(field_info)
1857+
1858+
results = self._create_mock_formatted_results_with_paths(
1859+
["s3://bucket/doc1.txt", "s3://bucket/doc2.txt"]
1860+
)
1861+
filtered = tool._filter_by_document_paths(results)
1862+
1863+
assert len(filtered) == 1
1864+
assert filtered[0]["path_or_url"] == "s3://bucket/doc1.txt"
1865+
1866+

0 commit comments

Comments
 (0)