diff --git a/nanobot/nanobot/agent/loop.py b/nanobot/nanobot/agent/loop.py index ffa47244..c19d39c8 100644 --- a/nanobot/nanobot/agent/loop.py +++ b/nanobot/nanobot/agent/loop.py @@ -18,7 +18,7 @@ from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.spawn import SpawnTool -from nanobot.agent.tools.web import WebFetchTool +from nanobot.agent.tools.web import WebFetchTool, WebSearchTool from nanobot.bus.events import InboundMessage, OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.providers.base import LLMProvider @@ -99,6 +99,7 @@ def _register_default_tools(self) -> None: # Web tools self.tools.register(WebFetchTool()) + self.tools.register(WebSearchTool()) # Message tool message_tool = MessageTool(send_callback=self.bus.publish_outbound) diff --git a/nanobot/nanobot/agent/tools/web.py b/nanobot/nanobot/agent/tools/web.py index 82d0ecff..a5942d4a 100644 --- a/nanobot/nanobot/agent/tools/web.py +++ b/nanobot/nanobot/agent/tools/web.py @@ -1,7 +1,8 @@ -"""Web tools: web_fetch.""" +"""Web tools: web_fetch, web_search.""" import html import json +import os import re from typing import Any from urllib.parse import urlparse @@ -42,6 +43,13 @@ def _validate_url(url: str) -> tuple[bool, str]: return False, str(e) +def _get_metaso_api_key() -> str: + """Get Metaso API key from env var METASO_API_KEY, or fall back to the + built-in default key which has a free quota of ~100 searches/day. + Set your own key to raise that limit.""" + return os.environ.get("METASO_API_KEY", "mk-E384C1DD5E8501BB7EFE27C949AFDE5B") + + class WebFetchTool(Tool): """Fetch and extract content from a URL using Readability.""" @@ -146,3 +154,131 @@ def _to_markdown(self, html: str) -> str: text = re.sub(r"", "\n\n", text, flags=re.I) text = re.sub(r"<(br|hr)\s*/?>", "\n", text, flags=re.I) return _normalize(_strip_tags(text)) + + +class WebSearchTool(Tool): + """Search the web using the Metaso API.""" + + name = "web_search" + description = ( + "Search the web for information. Returns a list of results " + "with titles, URLs, and snippets. Useful for finding current " + "information, looking up facts, or researching topics." + ) + parameters = { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query string", + }, + "topK": { + "type": "integer", + "description": "Maximum number of results to return (1-100). Default: 10", + "minimum": 1, + "maximum": 100, + }, + }, + "required": ["query"], + } + + _METASO_URL = "https://metaso.cn/api/v1/search" + _DEFAULT_TOP_K = 10 + _REQUEST_TIMEOUT = 30.0 + + async def execute( + self, + query: str, + top_k: int | None = None, + **kwargs: Any, + ) -> str: + if "topK" in kwargs and top_k is None: + top_k = kwargs["topK"] + + top_k = max(1, min(top_k or self._DEFAULT_TOP_K, 100)) + api_key = _get_metaso_api_key() + + try: + async with httpx.AsyncClient( + timeout=self._REQUEST_TIMEOUT, + follow_redirects=True, + max_redirects=MAX_REDIRECTS, + ) as client: + resp = await client.post( + self._METASO_URL, + json={"q": query, "scope": "webpage", "size": top_k}, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + ) + + if resp.status_code in (401, 403): + return json.dumps( + { + "error": "Metaso API unauthorized. Check METASO_API_KEY.", + "query": query, + } + ) + + if resp.status_code == 429: + return json.dumps( + { + "error": "Metaso API rate limited. Please retry later.", + "query": query, + } + ) + + resp.raise_for_status() + data = resp.json() + + code = data.get("code") + if code == 3003: + return json.dumps({"error": "Metaso daily search limit reached.", "query": query}) + if code == 2005: + return json.dumps( + { + "error": "Metaso API unauthorized (error 2005). Check METASO_API_KEY.", + "query": query, + } + ) + if code and code != 0: + return json.dumps( + { + "error": f"Metaso API error {code}: {data.get('message', 'unknown')}", + "query": query, + } + ) + + webpages = data.get("webpages", []) + results = [ + { + "title": wp.get("title", ""), + "url": wp.get("link", ""), + "snippet": wp.get("snippet", "") or wp.get("summary", ""), + } + for wp in webpages + ] + + return json.dumps( + { + "query": query, + "total": data.get("total", len(results)), + "results": results, + } + ) + + except httpx.ConnectError: + return json.dumps( + {"error": "Cannot connect to Metaso API (metaso.cn).", "query": query} + ) + except httpx.HTTPStatusError as e: + return json.dumps( + { + "error": f"Metaso API HTTP error: {e.response.status_code}", + "query": query, + "detail": e.response.text[:500], + } + ) + except Exception as e: + return json.dumps({"error": str(e), "query": query}) diff --git a/nanobot/workspace/TOOLS.md b/nanobot/workspace/TOOLS.md index 1d77bc73..c5e77a47 100644 --- a/nanobot/workspace/TOOLS.md +++ b/nanobot/workspace/TOOLS.md @@ -44,6 +44,17 @@ exec(command: str, working_dir: str = None) -> str ## Web Access +### web_search +Search the web for information. Returns a list of results with titles, URLs, and snippets. +``` +web_search(query: str, topK: int = 10) -> str +``` + +**Notes:** +- Uses the Metaso search API +- Results limited to 1-100, defaults to 10 +- Useful for finding current information and researching topics + ### web_fetch Fetch and extract main content from a URL. ``` diff --git a/tests/web_search_test.py b/tests/web_search_test.py new file mode 100644 index 00000000..dcad01ce --- /dev/null +++ b/tests/web_search_test.py @@ -0,0 +1,387 @@ +"""Tests for WebSearchTool (Metaso API integration).""" + +import json +from unittest.mock import AsyncMock, Mock, patch + +import httpx +import pytest + +from nanobot.agent.tools.web import WebSearchTool, _get_metaso_api_key + + +def _mock_response( + status_code: int = 200, json_data: dict | None = None, text: str = "" +): + resp = AsyncMock(spec=httpx.Response) + resp.status_code = status_code + resp.text = text or json.dumps(json_data or {}) + if json_data is not None: + resp.json.return_value = json_data + resp.raise_for_status = Mock() + if status_code >= 400: + resp.raise_for_status.side_effect = httpx.HTTPStatusError( + "error", request=AsyncMock(), response=resp + ) + return resp + + +@pytest.fixture +def tool(): + return WebSearchTool() + + +@pytest.fixture +def sample_response(): + return { + "code": 0, + "total": 2, + "webpages": [ + { + "title": "Python Async", + "link": "https://example.com/async", + "snippet": "Learn async", + }, + { + "title": "AsyncIO Docs", + "link": "https://docs.example.com", + "summary": "Official docs", + }, + ], + } + + +# --- Schema & configuration --- + + +def test_tool_name(tool): + assert tool.name == "web_search" + + +def test_schema_requires_query(tool): + errors = tool.validate_params({}) + assert any("query" in e for e in errors) + + +def test_schema_validates_topk_range(tool): + errors = tool.validate_params({"query": "test", "topK": 0}) + assert any("topK" in e for e in errors) + errors = tool.validate_params({"query": "test", "topK": 101}) + assert any("topK" in e for e in errors) + + +def test_schema_accepts_valid_params(tool): + assert tool.validate_params({"query": "test"}) == [] + assert tool.validate_params({"query": "test", "topK": 5}) == [] + + +def test_to_schema_structure(tool): + schema = tool.to_schema() + assert schema["type"] == "function" + assert schema["function"]["name"] == "web_search" + assert "query" in schema["function"]["parameters"]["properties"] + + +# --- API key resolution --- + + +def test_api_key_from_env(monkeypatch): + monkeypatch.setenv("METASO_API_KEY", "test-key-123") + assert _get_metaso_api_key() == "test-key-123" + + +def test_api_key_default(monkeypatch): + monkeypatch.delenv("METASO_API_KEY", raising=False) + assert _get_metaso_api_key() == "mk-E384C1DD5E8501BB7EFE27C949AFDE5B" + + +# --- Successful search --- + + +@pytest.mark.asyncio +async def test_search_success(tool, sample_response): + mock_resp = _mock_response(json_data=sample_response) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="Python async") + data = json.loads(result) + + assert data["query"] == "Python async" + assert data["total"] == 2 + assert len(data["results"]) == 2 + assert data["results"][0]["title"] == "Python Async" + assert data["results"][0]["url"] == "https://example.com/async" + assert data["results"][0]["snippet"] == "Learn async" + assert data["results"][1]["snippet"] == "Official docs" # falls back to summary + + +@pytest.mark.asyncio +async def test_search_sends_correct_request(tool, sample_response): + mock_resp = _mock_response(json_data=sample_response) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + await tool.execute(query="test query", topK=5) + + mock_client.post.assert_called_once() + call_kwargs = mock_client.post.call_args + assert call_kwargs.kwargs["json"] == { + "q": "test query", + "scope": "webpage", + "size": 5, + } + assert "Authorization" in call_kwargs.kwargs["headers"] + assert call_kwargs.kwargs["headers"]["Authorization"].startswith("Bearer ") + + +@pytest.mark.asyncio +async def test_search_empty_results(tool): + mock_resp = _mock_response(json_data={"code": 0, "total": 0, "webpages": []}) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="obscure query xyz") + data = json.loads(result) + + assert data["total"] == 0 + assert data["results"] == [] + + +# --- topK clamping --- + + +@pytest.mark.asyncio +async def test_topk_clamped_to_max(tool): + mock_resp = _mock_response(json_data={"code": 0, "webpages": []}) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + await tool.execute(query="test", topK=999) + body = mock_client.post.call_args.kwargs["json"] + assert body["size"] == 100 + + +@pytest.mark.asyncio +async def test_topk_defaults_to_10(tool): + mock_resp = _mock_response(json_data={"code": 0, "webpages": []}) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + await tool.execute(query="test") + body = mock_client.post.call_args.kwargs["json"] + assert body["size"] == 10 + + +# --- HTTP error handling --- + + +@pytest.mark.asyncio +async def test_unauthorized_401(tool): + mock_resp = _mock_response(status_code=401) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "unauthorized" in data["error"].lower() + assert data["query"] == "test" + + +@pytest.mark.asyncio +async def test_rate_limited_429(tool): + mock_resp = _mock_response(status_code=429) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "rate limited" in data["error"].lower() + + +# --- Metaso application-level errors --- + + +@pytest.mark.asyncio +async def test_daily_limit_code_3003(tool): + mock_resp = _mock_response(json_data={"code": 3003, "message": "daily limit"}) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "daily" in data["error"].lower() + + +@pytest.mark.asyncio +async def test_unauthorized_code_2005(tool): + mock_resp = _mock_response(json_data={"code": 2005, "message": "unauthorized"}) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "2005" in data["error"] + assert "unauthorized" in data["error"].lower() + + +@pytest.mark.asyncio +async def test_generic_api_error(tool): + mock_resp = _mock_response(json_data={"code": 9999, "message": "something broke"}) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "9999" in data["error"] + assert "something broke" in data["error"] + + +# --- Network errors --- + + +@pytest.mark.asyncio +async def test_connection_error(tool): + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(side_effect=httpx.ConnectError("refused")) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "Cannot connect" in data["error"] + + +@pytest.mark.asyncio +async def test_http_status_error(tool): + resp = AsyncMock(spec=httpx.Response) + resp.status_code = 500 + resp.text = "Internal Server Error" + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock( + side_effect=httpx.HTTPStatusError( + "error", request=AsyncMock(), response=resp + ) + ) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert "500" in data["error"] + + +# --- Snippet fallback --- + + +@pytest.mark.asyncio +async def test_snippet_falls_back_to_summary(tool): + mock_resp = _mock_response( + json_data={ + "code": 0, + "webpages": [ + { + "title": "No Snippet", + "link": "https://example.com", + "summary": "Fallback text", + }, + ], + } + ) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert data["results"][0]["snippet"] == "Fallback text" + + +@pytest.mark.asyncio +async def test_snippet_empty_when_both_missing(tool): + mock_resp = _mock_response( + json_data={ + "code": 0, + "webpages": [ + {"title": "No Text", "link": "https://example.com"}, + ], + } + ) + + with patch("nanobot.agent.tools.web.httpx.AsyncClient") as mock_client_cls: + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.post = AsyncMock(return_value=mock_resp) + mock_client_cls.return_value = mock_client + + result = await tool.execute(query="test") + data = json.loads(result) + + assert data["results"][0]["snippet"] == ""