From 7b3ff33a1ed147e6a3da7eef512892f50f21c03c Mon Sep 17 00:00:00 2001 From: Teo Gonzalez Collazo Date: Thu, 23 Apr 2026 09:06:26 -0700 Subject: [PATCH] feat: add Exa AI-powered search tool Adds a general-purpose web_search tool backed by Exa so the agent can ground answers in current web content when HF docs, papers, and GitHub search aren't enough. - New agent/tools/web_search_tool.py with a typed WebSearchResult model and a snippet cascade (summary > highlights > text). - Registered through create_builtin_tools only when EXA_API_KEY is set, so the tool stays dark for users who don't configure it. - Added to the research sub-agent's read-only tool allow-list. - Surfaces Exa search types, category, domain filters, date filters, and content modes (text / highlights / summary). - Tests cover response parsing, snippet fallbacks, argument validation, router gating, and the integration-attribution header. --- README.md | 5 +- agent/core/tools.py | 16 ++ agent/tools/__init__.py | 8 + agent/tools/research_tool.py | 1 + agent/tools/web_search_tool.py | 357 +++++++++++++++++++++++++++++ pyproject.toml | 3 + tests/unit/test_web_search_tool.py | 338 +++++++++++++++++++++++++++ uv.lock | 41 ++++ 8 files changed, 767 insertions(+), 2 deletions(-) create mode 100644 agent/tools/web_search_tool.py create mode 100644 tests/unit/test_web_search_tool.py diff --git a/README.md b/README.md index 29fe439b..235ffb78 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,10 @@ Create a `.env` file in the project root (or export these in your shell): ```bash ANTHROPIC_API_KEY= # if using anthropic models HF_TOKEN= -GITHUB_TOKEN= +GITHUB_TOKEN= +EXA_API_KEY= # optional, enables the web_search tool ``` -If no `HF_TOKEN` is set, the CLI will prompt you to paste one on first launch. To get a GITHUB_TOKEN follow the tutorial [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token). +If no `HF_TOKEN` is set, the CLI will prompt you to paste one on first launch. To get a GITHUB_TOKEN follow the tutorial [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token). Setting `EXA_API_KEY` ([get a key](https://exa.ai/)) unlocks the `web_search` tool for general web lookups outside the HF ecosystem. ### Usage diff --git a/agent/core/tools.py b/agent/core/tools.py index 9bbf91d7..c2f70d65 100644 --- a/agent/core/tools.py +++ b/agent/core/tools.py @@ -50,6 +50,11 @@ from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler from agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler from agent.tools.sandbox_tool import get_sandbox_tools +from agent.tools.web_search_tool import ( + WEB_SEARCH_TOOL_SPEC, + web_search_enabled, + web_search_handler, +) # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git # from agent.tools.private_hf_repo_tools import ( @@ -363,6 +368,17 @@ def create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]: ), ] + # Optional: Exa-backed general web search (enabled only when EXA_API_KEY is set) + if web_search_enabled(): + tools.append( + ToolSpec( + name=WEB_SEARCH_TOOL_SPEC["name"], + description=WEB_SEARCH_TOOL_SPEC["description"], + parameters=WEB_SEARCH_TOOL_SPEC["parameters"], + handler=web_search_handler, + ) + ) + # Sandbox or local tools (highest priority) if local_mode: from agent.tools.local_tools import get_local_tools diff --git a/agent/tools/__init__.py b/agent/tools/__init__.py index 14ef4566..09df2a78 100644 --- a/agent/tools/__init__.py +++ b/agent/tools/__init__.py @@ -20,6 +20,11 @@ ) from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler from agent.tools.types import ToolResult +from agent.tools.web_search_tool import ( + WEB_SEARCH_TOOL_SPEC, + web_search_enabled, + web_search_handler, +) __all__ = [ "ToolResult", @@ -36,4 +41,7 @@ "github_search_code_handler", "HF_INSPECT_DATASET_TOOL_SPEC", "hf_inspect_dataset_handler", + "WEB_SEARCH_TOOL_SPEC", + "web_search_enabled", + "web_search_handler", ] diff --git a/agent/tools/research_tool.py b/agent/tools/research_tool.py index fea12ba2..52475d60 100644 --- a/agent/tools/research_tool.py +++ b/agent/tools/research_tool.py @@ -39,6 +39,7 @@ "github_read_file", "hf_inspect_dataset", "hf_repo_files", + "web_search", } RESEARCH_SYSTEM_PROMPT = """\ diff --git a/agent/tools/web_search_tool.py b/agent/tools/web_search_tool.py new file mode 100644 index 00000000..dcad9d04 --- /dev/null +++ b/agent/tools/web_search_tool.py @@ -0,0 +1,357 @@ +""" +Web search tool backed by the Exa API. + +Exposes general-purpose web search so the agent can ground answers in +current web content — useful when a topic falls outside the HF ecosystem, +when training recipes require recent blog posts or announcements, or when +arxiv alone does not surface the best reference. + +Disabled unless ``EXA_API_KEY`` is set in the environment; the tool +spec factory returns ``None`` so the router simply won't register it. +""" + +from __future__ import annotations + +import asyncio +import logging +import os +from dataclasses import dataclass, field +from typing import Any + +logger = logging.getLogger(__name__) + +ENV_VAR = "EXA_API_KEY" +INTEGRATION_NAME = "ml-intern" + +DEFAULT_NUM_RESULTS = 5 +MAX_NUM_RESULTS = 25 +DEFAULT_SUMMARY_CHARACTERS = 1200 + +SEARCH_TYPES = ["auto", "neural", "fast"] +CATEGORIES = [ + "company", + "research paper", + "news", + "personal site", + "financial report", + "linkedin profile", + "pdf", + "github", + "tweet", +] + + +@dataclass +class WebSearchResult: + """Normalized search result built from the Exa response.""" + + title: str + url: str + published_date: str | None = None + author: str | None = None + score: float | None = None + summary: str | None = None + highlights: list[str] = field(default_factory=list) + text: str | None = None + + def snippet(self, max_characters: int = 500) -> str: + """Return the best-available snippet, preferring summary > highlights > text.""" + if self.summary: + return _truncate(self.summary, max_characters) + if self.highlights: + joined = " … ".join(h.strip() for h in self.highlights if h and h.strip()) + if joined: + return _truncate(joined, max_characters) + if self.text: + return _truncate(self.text, max_characters) + return "" + + +def _truncate(text: str, max_characters: int) -> str: + text = text.strip() + if len(text) <= max_characters: + return text + return text[: max_characters - 1].rstrip() + "…" + + +def _coerce_result(raw: Any) -> WebSearchResult: + """Map an Exa SDK result object (or plain dict) to WebSearchResult.""" + def _get(key: str, default: Any = None) -> Any: + if isinstance(raw, dict): + return raw.get(key, default) + return getattr(raw, key, default) + + highlights = _get("highlights") or [] + if not isinstance(highlights, list): + highlights = [str(highlights)] + + return WebSearchResult( + title=(_get("title") or "").strip() or "(untitled)", + url=(_get("url") or "").strip(), + published_date=_get("published_date") or _get("publishedDate"), + author=_get("author"), + score=_get("score"), + summary=_get("summary"), + highlights=[str(h) for h in highlights if h], + text=_get("text"), + ) + + +def _format_results( + query: str, + results: list[WebSearchResult], + search_type: str, + auto_selected: str | None, +) -> str: + if not results: + return f"No web results found for query: {query!r}." + + header = f"Web search results for: {query!r}" + if auto_selected and auto_selected != search_type: + header += f" (type={search_type} → resolved={auto_selected})" + else: + header += f" (type={search_type})" + lines: list[str] = [header, ""] + + for i, r in enumerate(results, 1): + lines.append(f"{i}. **{r.title}**") + lines.append(f" URL: {r.url}") + meta: list[str] = [] + if r.published_date: + meta.append(f"published: {r.published_date}") + if r.author: + meta.append(f"author: {r.author}") + if r.score is not None: + meta.append(f"score: {r.score:.2f}") + if meta: + lines.append(f" {' | '.join(meta)}") + + snippet = r.snippet() + if snippet: + lines.append(f" {snippet}") + lines.append("") + + return "\n".join(lines).rstrip() + "\n" + + +def _build_contents_kwargs( + text: bool, summary: bool, highlights: bool +) -> dict[str, Any]: + kwargs: dict[str, Any] = {} + if text: + kwargs["text"] = True + if highlights: + kwargs["highlights"] = True + if summary: + kwargs["summary"] = True + return kwargs + + +def _run_search( + api_key: str, + query: str, + num_results: int, + search_type: str, + category: str | None, + include_domains: list[str] | None, + exclude_domains: list[str] | None, + start_published_date: str | None, + end_published_date: str | None, + include_text: bool, + include_summary: bool, + include_highlights: bool, +) -> tuple[list[WebSearchResult], str | None]: + """Synchronous Exa call, run inside asyncio.to_thread.""" + from exa_py import Exa # imported lazily so the module loads without the dep + + client = Exa(api_key) + # Integration tracking — lets the Exa team attribute usage to this repo. + client.headers["x-exa-integration"] = INTEGRATION_NAME + + params: dict[str, Any] = { + "query": query, + "num_results": num_results, + "type": search_type, + } + if category: + params["category"] = category + if include_domains: + params["include_domains"] = include_domains + if exclude_domains: + params["exclude_domains"] = exclude_domains + if start_published_date: + params["start_published_date"] = start_published_date + if end_published_date: + params["end_published_date"] = end_published_date + + content_kwargs = _build_contents_kwargs( + text=include_text, + summary=include_summary, + highlights=include_highlights, + ) + + if content_kwargs: + response = client.search_and_contents(**params, **content_kwargs) + else: + response = client.search(**params) + + raw_results = getattr(response, "results", None) or [] + auto_type = getattr(response, "resolved_search_type", None) or getattr( + response, "search_type", None + ) + return [_coerce_result(r) for r in raw_results], auto_type + + +async def web_search_handler( + arguments: dict[str, Any], session=None +) -> tuple[str, bool]: + """Agent handler: run an Exa web search and format the results.""" + api_key = os.environ.get(ENV_VAR) + if not api_key: + return ( + f"Error: {ENV_VAR} is not set — web_search is unavailable.", + False, + ) + + query = (arguments.get("query") or "").strip() + if not query: + return "Error: 'query' is required.", False + + try: + num_results = int(arguments.get("num_results", DEFAULT_NUM_RESULTS)) + except (TypeError, ValueError): + return "Error: num_results must be an integer.", False + num_results = max(1, min(num_results, MAX_NUM_RESULTS)) + + search_type = (arguments.get("type") or "auto").strip() or "auto" + if search_type not in SEARCH_TYPES: + return ( + f"Error: type must be one of {SEARCH_TYPES}, got {search_type!r}.", + False, + ) + + category = (arguments.get("category") or "").strip() or None + if category and category not in CATEGORIES: + return ( + f"Error: category must be one of {CATEGORIES}, got {category!r}.", + False, + ) + + include_domains = arguments.get("include_domains") or None + exclude_domains = arguments.get("exclude_domains") or None + if include_domains is not None and not isinstance(include_domains, list): + return "Error: include_domains must be a list of strings.", False + if exclude_domains is not None and not isinstance(exclude_domains, list): + return "Error: exclude_domains must be a list of strings.", False + + start_published_date = (arguments.get("start_published_date") or "").strip() or None + end_published_date = (arguments.get("end_published_date") or "").strip() or None + + include_text = bool(arguments.get("include_text", False)) + include_summary = bool(arguments.get("include_summary", True)) + include_highlights = bool(arguments.get("include_highlights", True)) + + try: + results, auto_type = await asyncio.to_thread( + _run_search, + api_key, + query, + num_results, + search_type, + category, + include_domains, + exclude_domains, + start_published_date, + end_published_date, + include_text, + include_summary, + include_highlights, + ) + except ImportError: + return ( + "Error: exa-py is not installed. Run `uv sync` or " + "`pip install exa-py` to enable web_search.", + False, + ) + except Exception as e: # noqa: BLE001 — Exa SDK raises several error types + logger.exception("Exa web_search failed") + return f"Web search error: {e}", False + + return _format_results(query, results, search_type, auto_type), True + + +WEB_SEARCH_TOOL_SPEC = { + "name": "web_search", + "description": ( + "Search the open web with Exa for current information outside the HF ecosystem. " + "Use when HF docs / papers / GitHub search aren't enough — e.g. recent blog posts, " + "announcements, product pages, non-arxiv references, or cross-domain context.\n\n" + "Tips:\n" + " • Set category='research paper' to bias toward academic sources.\n" + " • Use include_domains / exclude_domains to scope to known-good sources.\n" + " • Results include a summary by default; set include_text=true for full page text.\n" + " • Narrow recency with start_published_date / end_published_date (ISO 8601).\n\n" + "Requires the EXA_API_KEY environment variable." + ), + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query (natural-language phrase works best for neural search).", + }, + "num_results": { + "type": "integer", + "description": f"Number of results to return (default {DEFAULT_NUM_RESULTS}, max {MAX_NUM_RESULTS}).", + "minimum": 1, + "maximum": MAX_NUM_RESULTS, + }, + "type": { + "type": "string", + "enum": SEARCH_TYPES, + "description": "Search mode: 'auto' (default) balances neural + fast, 'neural' for semantic, 'fast' for low-latency.", + }, + "category": { + "type": "string", + "enum": CATEGORIES, + "description": "Optional content category filter (e.g. 'research paper', 'news', 'github', 'pdf').", + }, + "include_domains": { + "type": "array", + "items": {"type": "string"}, + "description": "Only return results from these domains (e.g. ['huggingface.co', 'arxiv.org']).", + }, + "exclude_domains": { + "type": "array", + "items": {"type": "string"}, + "description": "Exclude results from these domains.", + }, + "start_published_date": { + "type": "string", + "description": "Only return results published on/after this ISO 8601 date (YYYY-MM-DD).", + }, + "end_published_date": { + "type": "string", + "description": "Only return results published on/before this ISO 8601 date (YYYY-MM-DD).", + }, + "include_text": { + "type": "boolean", + "description": "Include the full page text for each result (verbose; default false).", + }, + "include_summary": { + "type": "boolean", + "description": "Include an LLM-generated summary per result (default true).", + }, + "include_highlights": { + "type": "boolean", + "description": "Include LLM-selected highlight snippets per result (default true).", + }, + }, + "required": ["query"], + }, +} + + +def web_search_enabled() -> bool: + """Whether the tool should be registered — depends on the env var only.""" + return bool(os.environ.get(ENV_VAR)) diff --git a/pyproject.toml b/pyproject.toml index c0f7abfa..1842fc55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ dependencies = [ "uvicorn[standard]>=0.32.0", "httpx>=0.27.0", "websockets>=13.0", + # Optional integration: Exa web search (activated when EXA_API_KEY is set) + "exa-py>=2.0.0", ] [project.optional-dependencies] @@ -40,6 +42,7 @@ eval = [ # Development and testing dependencies dev = [ "pytest>=9.0.2", + "pytest-asyncio>=1.3.0", ] # All dependencies (eval + dev) diff --git a/tests/unit/test_web_search_tool.py b/tests/unit/test_web_search_tool.py new file mode 100644 index 00000000..85161b88 --- /dev/null +++ b/tests/unit/test_web_search_tool.py @@ -0,0 +1,338 @@ +"""Tests for agent/tools/web_search_tool.py — Exa-backed web search.""" + +from __future__ import annotations + +import os +import sys +import types +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +# Ensure the project root is importable (tests/unit is two levels below root). +_ROOT = Path(__file__).resolve().parent.parent.parent +if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) + +# Stub exa_py so tests do not require the package to be installed. +if "exa_py" not in sys.modules: + stub = types.ModuleType("exa_py") + stub.Exa = MagicMock() # type: ignore[attr-defined] + sys.modules["exa_py"] = stub + +from agent.tools.web_search_tool import ( # noqa: E402 + ENV_VAR, + INTEGRATION_NAME, + WEB_SEARCH_TOOL_SPEC, + WebSearchResult, + _coerce_result, + _format_results, + web_search_enabled, + web_search_handler, +) + + +# --------------------------------------------------------------------------- +# Env gating +# --------------------------------------------------------------------------- + + +def test_enabled_only_when_env_var_set(monkeypatch): + monkeypatch.delenv(ENV_VAR, raising=False) + assert web_search_enabled() is False + + monkeypatch.setenv(ENV_VAR, "sk-test") + assert web_search_enabled() is True + + +@pytest.mark.asyncio +async def test_handler_refuses_without_api_key(monkeypatch): + monkeypatch.delenv(ENV_VAR, raising=False) + output, ok = await web_search_handler({"query": "anything"}) + assert ok is False + assert ENV_VAR in output + + +# --------------------------------------------------------------------------- +# Argument validation +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_handler_requires_query(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + output, ok = await web_search_handler({"query": " "}) + assert ok is False + assert "query" in output.lower() + + +@pytest.mark.asyncio +async def test_handler_rejects_invalid_type(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + output, ok = await web_search_handler({"query": "x", "type": "keyword"}) + assert ok is False + assert "type" in output.lower() + + +@pytest.mark.asyncio +async def test_handler_rejects_invalid_category(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + output, ok = await web_search_handler({"query": "x", "category": "bogus"}) + assert ok is False + assert "category" in output.lower() + + +@pytest.mark.asyncio +async def test_handler_rejects_non_list_domains(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + output, ok = await web_search_handler( + {"query": "x", "include_domains": "arxiv.org"} + ) + assert ok is False + assert "include_domains" in output + + +# --------------------------------------------------------------------------- +# Result parsing and snippet cascade +# --------------------------------------------------------------------------- + + +def test_coerce_result_from_object(): + raw = types.SimpleNamespace( + title="A paper", + url="https://example.com/paper", + published_date="2024-06-01", + author="Author", + score=0.83, + summary="A short summary.", + highlights=["h1", "h2"], + text="Full text body", + ) + r = _coerce_result(raw) + assert r.title == "A paper" + assert r.url == "https://example.com/paper" + assert r.published_date == "2024-06-01" + assert r.summary == "A short summary." + assert r.highlights == ["h1", "h2"] + + +def test_coerce_result_from_dict_with_camel_case_date(): + raw = { + "title": "T", + "url": "https://x", + "publishedDate": "2025-01-01", + } + r = _coerce_result(raw) + assert r.published_date == "2025-01-01" + + +def test_snippet_prefers_summary(): + r = WebSearchResult( + title="t", + url="u", + summary="summary wins", + highlights=["h1", "h2"], + text="text loses", + ) + assert r.snippet() == "summary wins" + + +def test_snippet_falls_back_to_highlights_then_text(): + r1 = WebSearchResult(title="t", url="u", highlights=["h1", "h2"], text="text") + assert "h1" in r1.snippet() and "h2" in r1.snippet() + + r2 = WebSearchResult(title="t", url="u", text="just text") + assert r2.snippet() == "just text" + + r3 = WebSearchResult(title="t", url="u") + assert r3.snippet() == "" + + +def test_snippet_truncates_long_content(): + long_summary = "x" * 1000 + r = WebSearchResult(title="t", url="u", summary=long_summary) + snippet = r.snippet(max_characters=50) + assert len(snippet) <= 50 + assert snippet.endswith("…") + + +def test_format_results_empty(): + out = _format_results("no hits", [], "auto", None) + assert "No web results" in out + + +def test_format_results_shows_metadata_and_snippet(): + results = [ + WebSearchResult( + title="Cool blog post", + url="https://example.com/post", + published_date="2025-02-01", + author="Jane Doe", + score=0.91, + summary="TL;DR of the post.", + ) + ] + out = _format_results("cool post", results, "auto", "neural") + assert "Cool blog post" in out + assert "https://example.com/post" in out + assert "2025-02-01" in out + assert "Jane Doe" in out + assert "0.91" in out + assert "TL;DR" in out + assert "auto → resolved=neural" in out + + +# --------------------------------------------------------------------------- +# Integration header + end-to-end handler path (with mocked Exa client) +# --------------------------------------------------------------------------- + + +class _FakeExa: + """Minimal Exa stand-in that records its call and returns canned results.""" + + last_instance: "_FakeExa | None" = None + + def __init__(self, api_key: str): + self.api_key = api_key + self.headers: dict[str, str] = {} + self.search_calls: list[dict] = [] + self.search_and_contents_calls: list[dict] = [] + _FakeExa.last_instance = self + + def _response(self): + return types.SimpleNamespace( + results=[ + types.SimpleNamespace( + title="Result 1", + url="https://example.com/1", + published_date="2025-03-01", + author=None, + score=0.7, + summary="Summary 1", + highlights=["hl1"], + text=None, + ) + ], + resolved_search_type="neural", + ) + + def search(self, **kwargs): + self.search_calls.append(kwargs) + return self._response() + + def search_and_contents(self, **kwargs): + self.search_and_contents_calls.append(kwargs) + return self._response() + + +@pytest.mark.asyncio +async def test_handler_happy_path_sets_integration_header(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + + # Patch on the source module — the tool imports Exa locally via + # `from exa_py import Exa`, so monkeypatching the consuming namespace + # (agent.tools.web_search_tool.Exa) would miss it. + with patch("exa_py.Exa", _FakeExa): + output, ok = await web_search_handler( + { + "query": "flash attention", + "num_results": 3, + "type": "auto", + "category": "research paper", + "include_domains": ["arxiv.org"], + "start_published_date": "2024-01-01", + } + ) + + assert ok is True + assert "Result 1" in output + assert "https://example.com/1" in output + + inst = _FakeExa.last_instance + assert inst is not None + # Integration attribution header must be set on every client. + assert inst.headers.get("x-exa-integration") == INTEGRATION_NAME + # Defaults enable summary + highlights, so search_and_contents is used. + assert len(inst.search_and_contents_calls) == 1 + call = inst.search_and_contents_calls[0] + assert call["query"] == "flash attention" + assert call["num_results"] == 3 + assert call["type"] == "auto" + assert call["category"] == "research paper" + assert call["include_domains"] == ["arxiv.org"] + assert call["start_published_date"] == "2024-01-01" + assert call.get("summary") is True + assert call.get("highlights") is True + + +@pytest.mark.asyncio +async def test_handler_plain_search_when_no_contents_requested(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + + with patch("exa_py.Exa", _FakeExa): + output, ok = await web_search_handler( + { + "query": "no extras", + "include_summary": False, + "include_highlights": False, + "include_text": False, + } + ) + + assert ok is True + inst = _FakeExa.last_instance + assert inst is not None + assert len(inst.search_calls) == 1 + assert inst.search_and_contents_calls == [] + + +@pytest.mark.asyncio +async def test_handler_caps_num_results(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + + with patch("exa_py.Exa", _FakeExa): + _, ok = await web_search_handler({"query": "x", "num_results": 9999}) + + assert ok is True + inst = _FakeExa.last_instance + assert inst is not None + call = inst.search_and_contents_calls[0] + assert 1 <= call["num_results"] <= 25 + + +# --------------------------------------------------------------------------- +# Router gating +# --------------------------------------------------------------------------- + + +def test_tool_not_registered_when_api_key_unset(monkeypatch): + monkeypatch.delenv(ENV_VAR, raising=False) + + # Import lazily to avoid pulling in heavy agent modules during collection. + from agent.core.tools import create_builtin_tools + + names = {t.name for t in create_builtin_tools(local_mode=True)} + assert "web_search" not in names + + +def test_tool_registered_when_api_key_set(monkeypatch): + monkeypatch.setenv(ENV_VAR, "sk-test") + + from agent.core.tools import create_builtin_tools + + names = {t.name for t in create_builtin_tools(local_mode=True)} + assert "web_search" in names + + +# --------------------------------------------------------------------------- +# Tool spec shape +# --------------------------------------------------------------------------- + + +def test_tool_spec_shape(): + assert WEB_SEARCH_TOOL_SPEC["name"] == "web_search" + params = WEB_SEARCH_TOOL_SPEC["parameters"] + assert params["type"] == "object" + assert "query" in params["properties"] + assert params["required"] == ["query"] diff --git a/uv.lock b/uv.lock index 3ed178e5..0ca3911c 100644 --- a/uv.lock +++ b/uv.lock @@ -726,6 +726,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, ] +[[package]] +name = "exa-py" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpcore" }, + { name = "httpx" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/e4/11bbbc076ae420b9e00537945d48a03cb42cc6da63edc65bf50d23e4778e/exa_py-2.12.1.tar.gz", hash = "sha256:9ff1924fbfbcae822b20c0ddef0650fabc04ac75906b9153623eadc18135b7ce", size = 55792, upload-time = "2026-04-22T20:00:38.528Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/19/0a504b6ce7c468595cd0551f65e5c464832a1d3af8dc8acd681e21696a5f/exa_py-2.12.1-py3-none-any.whl", hash = "sha256:9e735802161482a7d5b231376257883cb4e34dbd6f75ded04ab1a5a171b69d9f", size = 74512, upload-time = "2026-04-22T20:00:34.326Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -998,6 +1016,7 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "datasets" }, + { name = "exa-py" }, { name = "fastapi" }, { name = "fastmcp" }, { name = "httpx" }, @@ -1034,10 +1053,16 @@ eval = [ { name = "tenacity" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest-asyncio" }, +] + [package.metadata] requires-dist = [ { name = "datasets", specifier = ">=4.4.1" }, { name = "datasets", marker = "extra == 'eval'", specifier = ">=4.3.0" }, + { name = "exa-py", specifier = ">=2.0.0" }, { name = "fastapi", specifier = ">=0.115.0" }, { name = "fastmcp", specifier = ">=3.2.0" }, { name = "hf-agent", extras = ["eval", "dev"], marker = "extra == 'all'" }, @@ -1062,6 +1087,9 @@ requires-dist = [ ] provides-extras = ["eval", "dev", "all"] +[package.metadata.requires-dev] +dev = [{ name = "pytest-asyncio", specifier = ">=1.3.0" }] + [[package]] name = "hf-xet" version = "1.2.0" @@ -2773,6 +2801,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0"