From 7b3ff33a1ed147e6a3da7eef512892f50f21c03c Mon Sep 17 00:00:00 2001
From: Teo Gonzalez Collazo <teodorolgonzalez5@gmail.com>
Date: Thu, 23 Apr 2026 09:06:26 -0700
Subject: [PATCH] feat: add Exa AI-powered search tool

Adds a general-purpose web_search tool backed by Exa so the agent can
ground answers in current web content when HF docs, papers, and GitHub
search aren't enough.

- New agent/tools/web_search_tool.py with a typed WebSearchResult model
  and a snippet cascade (summary > highlights > text).
- Registered through create_builtin_tools only when EXA_API_KEY is set,
  so the tool stays dark for users who don't configure it.
- Added to the research sub-agent's read-only tool allow-list.
- Surfaces Exa search types, category, domain filters, date filters,
  and content modes (text / highlights / summary).
- Tests cover response parsing, snippet fallbacks, argument validation,
  router gating, and the integration-attribution header.
---
 README.md                          |   5 +-
 agent/core/tools.py                |  16 ++
 agent/tools/__init__.py            |   8 +
 agent/tools/research_tool.py       |   1 +
 agent/tools/web_search_tool.py     | 357 +++++++++++++++++++++++++++++
 pyproject.toml                     |   3 +
 tests/unit/test_web_search_tool.py | 338 +++++++++++++++++++++++++++
 uv.lock                            |  41 ++++
 8 files changed, 767 insertions(+), 2 deletions(-)
 create mode 100644 agent/tools/web_search_tool.py
 create mode 100644 tests/unit/test_web_search_tool.py
diff --git a/README.md b/README.md
index 29fe439b..235ffb78 100644
--- a/README.md
+++ b/README.md
@@ -28,9 +28,10 @@ Create a `.env` file in the project root (or export these in your shell):
 ```bash
 ANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models
 HF_TOKEN=<your-hugging-face-token>
-GITHUB_TOKEN=<github-personal-access-token> 
+GITHUB_TOKEN=<github-personal-access-token>
+EXA_API_KEY=<your-exa-api-key> # optional, enables the web_search tool
 ```
-If no `HF_TOKEN` is set, the CLI will prompt you to paste one on first launch. To get a GITHUB_TOKEN follow the tutorial [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token).
+If no `HF_TOKEN` is set, the CLI will prompt you to paste one on first launch. To get a GITHUB_TOKEN follow the tutorial [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token). Setting `EXA_API_KEY` ([get a key](https://exa.ai/)) unlocks the `web_search` tool for general web lookups outside the HF ecosystem.
 
 ### Usage
 
diff --git a/agent/core/tools.py b/agent/core/tools.py
index 9bbf91d7..c2f70d65 100644
--- a/agent/core/tools.py
+++ b/agent/core/tools.py
@@ -50,6 +50,11 @@
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler
 from agent.tools.sandbox_tool import get_sandbox_tools
+from agent.tools.web_search_tool import (
+    WEB_SEARCH_TOOL_SPEC,
+    web_search_enabled,
+    web_search_handler,
+)
 
 # NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git
 # from agent.tools.private_hf_repo_tools import (
@@ -363,6 +368,17 @@ def create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]:
         ),
     ]
 
+    # Optional: Exa-backed general web search (enabled only when EXA_API_KEY is set)
+    if web_search_enabled():
+        tools.append(
+            ToolSpec(
+                name=WEB_SEARCH_TOOL_SPEC["name"],
+                description=WEB_SEARCH_TOOL_SPEC["description"],
+                parameters=WEB_SEARCH_TOOL_SPEC["parameters"],
+                handler=web_search_handler,
+            )
+        )
+
     # Sandbox or local tools (highest priority)
     if local_mode:
         from agent.tools.local_tools import get_local_tools
diff --git a/agent/tools/__init__.py b/agent/tools/__init__.py
index 14ef4566..09df2a78 100644
--- a/agent/tools/__init__.py
+++ b/agent/tools/__init__.py
@@ -20,6 +20,11 @@
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
 from agent.tools.types import ToolResult
+from agent.tools.web_search_tool import (
+    WEB_SEARCH_TOOL_SPEC,
+    web_search_enabled,
+    web_search_handler,
+)
 
 __all__ = [
     "ToolResult",
@@ -36,4 +41,7 @@
     "github_search_code_handler",
     "HF_INSPECT_DATASET_TOOL_SPEC",
     "hf_inspect_dataset_handler",
+    "WEB_SEARCH_TOOL_SPEC",
+    "web_search_enabled",
+    "web_search_handler",
 ]
diff --git a/agent/tools/research_tool.py b/agent/tools/research_tool.py
index fea12ba2..52475d60 100644
--- a/agent/tools/research_tool.py
+++ b/agent/tools/research_tool.py
@@ -39,6 +39,7 @@
     "github_read_file",
     "hf_inspect_dataset",
     "hf_repo_files",
+    "web_search",
 }
 
 RESEARCH_SYSTEM_PROMPT = """\
diff --git a/agent/tools/web_search_tool.py b/agent/tools/web_search_tool.py
new file mode 100644
index 00000000..dcad9d04
--- /dev/null
+++ b/agent/tools/web_search_tool.py
@@ -0,0 +1,357 @@
+"""
+Web search tool backed by the Exa API.
+
+Exposes general-purpose web search so the agent can ground answers in
+current web content — useful when a topic falls outside the HF ecosystem,
+when training recipes require recent blog posts or announcements, or when
+arxiv alone does not surface the best reference.
+
+Disabled unless ``EXA_API_KEY`` is set in the environment; the tool
+spec factory returns ``None`` so the router simply won't register it.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+ENV_VAR = "EXA_API_KEY"
+INTEGRATION_NAME = "ml-intern"
+
+DEFAULT_NUM_RESULTS = 5
+MAX_NUM_RESULTS = 25
+DEFAULT_SUMMARY_CHARACTERS = 1200
+
+SEARCH_TYPES = ["auto", "neural", "fast"]
+CATEGORIES = [
+    "company",
+    "research paper",
+    "news",
+    "personal site",
+    "financial report",
+    "linkedin profile",
+    "pdf",
+    "github",
+    "tweet",
+]
+
+
+@dataclass
+class WebSearchResult:
+    """Normalized search result built from the Exa response."""
+
+    title: str
+    url: str
+    published_date: str | None = None
+    author: str | None = None
+    score: float | None = None
+    summary: str | None = None
+    highlights: list[str] = field(default_factory=list)
+    text: str | None = None
+
+    def snippet(self, max_characters: int = 500) -> str:
+        """Return the best-available snippet, preferring summary > highlights > text."""
+        if self.summary:
+            return _truncate(self.summary, max_characters)
+        if self.highlights:
+            joined = " … ".join(h.strip() for h in self.highlights if h and h.strip())
+            if joined:
+                return _truncate(joined, max_characters)
+        if self.text:
+            return _truncate(self.text, max_characters)
+        return ""
+
+
+def _truncate(text: str, max_characters: int) -> str:
+    text = text.strip()
+    if len(text) <= max_characters:
+        return text
+    return text[: max_characters - 1].rstrip() + "…"
+
+
+def _coerce_result(raw: Any) -> WebSearchResult:
+    """Map an Exa SDK result object (or plain dict) to WebSearchResult."""
+    def _get(key: str, default: Any = None) -> Any:
+        if isinstance(raw, dict):
+            return raw.get(key, default)
+        return getattr(raw, key, default)
+
+    highlights = _get("highlights") or []
+    if not isinstance(highlights, list):
+        highlights = [str(highlights)]
+
+    return WebSearchResult(
+        title=(_get("title") or "").strip() or "(untitled)",
+        url=(_get("url") or "").strip(),
+        published_date=_get("published_date") or _get("publishedDate"),
+        author=_get("author"),
+        score=_get("score"),
+        summary=_get("summary"),
+        highlights=[str(h) for h in highlights if h],
+        text=_get("text"),
+    )
+
+
+def _format_results(
+    query: str,
+    results: list[WebSearchResult],
+    search_type: str,
+    auto_selected: str | None,
+) -> str:
+    if not results:
+        return f"No web results found for query: {query!r}."
+
+    header = f"Web search results for: {query!r}"
+    if auto_selected and auto_selected != search_type:
+        header += f" (type={search_type} → resolved={auto_selected})"
+    else:
+        header += f" (type={search_type})"
+    lines: list[str] = [header, ""]
+
+    for i, r in enumerate(results, 1):
+        lines.append(f"{i}. **{r.title}**")
+        lines.append(f"   URL: {r.url}")
+        meta: list[str] = []
+        if r.published_date:
+            meta.append(f"published: {r.published_date}")
+        if r.author:
+            meta.append(f"author: {r.author}")
+        if r.score is not None:
+            meta.append(f"score: {r.score:.2f}")
+        if meta:
+            lines.append(f"   {' | '.join(meta)}")
+
+        snippet = r.snippet()
+        if snippet:
+            lines.append(f"   {snippet}")
+        lines.append("")
+
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def _build_contents_kwargs(
+    text: bool, summary: bool, highlights: bool
+) -> dict[str, Any]:
+    kwargs: dict[str, Any] = {}
+    if text:
+        kwargs["text"] = True
+    if highlights:
+        kwargs["highlights"] = True
+    if summary:
+        kwargs["summary"] = True
+    return kwargs
+
+
+def _run_search(
+    api_key: str,
+    query: str,
+    num_results: int,
+    search_type: str,
+    category: str | None,
+    include_domains: list[str] | None,
+    exclude_domains: list[str] | None,
+    start_published_date: str | None,
+    end_published_date: str | None,
+    include_text: bool,
+    include_summary: bool,
+    include_highlights: bool,
+) -> tuple[list[WebSearchResult], str | None]:
+    """Synchronous Exa call, run inside asyncio.to_thread."""
+    from exa_py import Exa  # imported lazily so the module loads without the dep
+
+    client = Exa(api_key)
+    # Integration tracking — lets the Exa team attribute usage to this repo.
+    client.headers["x-exa-integration"] = INTEGRATION_NAME
+
+    params: dict[str, Any] = {
+        "query": query,
+        "num_results": num_results,
+        "type": search_type,
+    }
+    if category:
+        params["category"] = category
+    if include_domains:
+        params["include_domains"] = include_domains
+    if exclude_domains:
+        params["exclude_domains"] = exclude_domains
+    if start_published_date:
+        params["start_published_date"] = start_published_date
+    if end_published_date:
+        params["end_published_date"] = end_published_date
+
+    content_kwargs = _build_contents_kwargs(
+        text=include_text,
+        summary=include_summary,
+        highlights=include_highlights,
+    )
+
+    if content_kwargs:
+        response = client.search_and_contents(**params, **content_kwargs)
+    else:
+        response = client.search(**params)
+
+    raw_results = getattr(response, "results", None) or []
+    auto_type = getattr(response, "resolved_search_type", None) or getattr(
+        response, "search_type", None
+    )
+    return [_coerce_result(r) for r in raw_results], auto_type
+
+
+async def web_search_handler(
+    arguments: dict[str, Any], session=None
+) -> tuple[str, bool]:
+    """Agent handler: run an Exa web search and format the results."""
+    api_key = os.environ.get(ENV_VAR)
+    if not api_key:
+        return (
+            f"Error: {ENV_VAR} is not set — web_search is unavailable.",
+            False,
+        )
+
+    query = (arguments.get("query") or "").strip()
+    if not query:
+        return "Error: 'query' is required.", False
+
+    try:
+        num_results = int(arguments.get("num_results", DEFAULT_NUM_RESULTS))
+    except (TypeError, ValueError):
+        return "Error: num_results must be an integer.", False
+    num_results = max(1, min(num_results, MAX_NUM_RESULTS))
+
+    search_type = (arguments.get("type") or "auto").strip() or "auto"
+    if search_type not in SEARCH_TYPES:
+        return (
+            f"Error: type must be one of {SEARCH_TYPES}, got {search_type!r}.",
+            False,
+        )
+
+    category = (arguments.get("category") or "").strip() or None
+    if category and category not in CATEGORIES:
+        return (
+            f"Error: category must be one of {CATEGORIES}, got {category!r}.",
+            False,
+        )
+
+    include_domains = arguments.get("include_domains") or None
+    exclude_domains = arguments.get("exclude_domains") or None
+    if include_domains is not None and not isinstance(include_domains, list):
+        return "Error: include_domains must be a list of strings.", False
+    if exclude_domains is not None and not isinstance(exclude_domains, list):
+        return "Error: exclude_domains must be a list of strings.", False
+
+    start_published_date = (arguments.get("start_published_date") or "").strip() or None
+    end_published_date = (arguments.get("end_published_date") or "").strip() or None
+
+    include_text = bool(arguments.get("include_text", False))
+    include_summary = bool(arguments.get("include_summary", True))
+    include_highlights = bool(arguments.get("include_highlights", True))
+
+    try:
+        results, auto_type = await asyncio.to_thread(
+            _run_search,
+            api_key,
+            query,
+            num_results,
+            search_type,
+            category,
+            include_domains,
+            exclude_domains,
+            start_published_date,
+            end_published_date,
+            include_text,
+            include_summary,
+            include_highlights,
+        )
+    except ImportError:
+        return (
+            "Error: exa-py is not installed. Run `uv sync` or "
+            "`pip install exa-py` to enable web_search.",
+            False,
+        )
+    except Exception as e:  # noqa: BLE001 — Exa SDK raises several error types
+        logger.exception("Exa web_search failed")
+        return f"Web search error: {e}", False
+
+    return _format_results(query, results, search_type, auto_type), True
+
+
+WEB_SEARCH_TOOL_SPEC = {
+    "name": "web_search",
+    "description": (
+        "Search the open web with Exa for current information outside the HF ecosystem. "
+        "Use when HF docs / papers / GitHub search aren't enough — e.g. recent blog posts, "
+        "announcements, product pages, non-arxiv references, or cross-domain context.\n\n"
+        "Tips:\n"
+        "  • Set category='research paper' to bias toward academic sources.\n"
+        "  • Use include_domains / exclude_domains to scope to known-good sources.\n"
+        "  • Results include a summary by default; set include_text=true for full page text.\n"
+        "  • Narrow recency with start_published_date / end_published_date (ISO 8601).\n\n"
+        "Requires the EXA_API_KEY environment variable."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "The search query (natural-language phrase works best for neural search).",
+            },
+            "num_results": {
+                "type": "integer",
+                "description": f"Number of results to return (default {DEFAULT_NUM_RESULTS}, max {MAX_NUM_RESULTS}).",
+                "minimum": 1,
+                "maximum": MAX_NUM_RESULTS,
+            },
+            "type": {
+                "type": "string",
+                "enum": SEARCH_TYPES,
+                "description": "Search mode: 'auto' (default) balances neural + fast, 'neural' for semantic, 'fast' for low-latency.",
+            },
+            "category": {
+                "type": "string",
+                "enum": CATEGORIES,
+                "description": "Optional content category filter (e.g. 'research paper', 'news', 'github', 'pdf').",
+            },
+            "include_domains": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Only return results from these domains (e.g. ['huggingface.co', 'arxiv.org']).",
+            },
+            "exclude_domains": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Exclude results from these domains.",
+            },
+            "start_published_date": {
+                "type": "string",
+                "description": "Only return results published on/after this ISO 8601 date (YYYY-MM-DD).",
+            },
+            "end_published_date": {
+                "type": "string",
+                "description": "Only return results published on/before this ISO 8601 date (YYYY-MM-DD).",
+            },
+            "include_text": {
+                "type": "boolean",
+                "description": "Include the full page text for each result (verbose; default false).",
+            },
+            "include_summary": {
+                "type": "boolean",
+                "description": "Include an LLM-generated summary per result (default true).",
+            },
+            "include_highlights": {
+                "type": "boolean",
+                "description": "Include LLM-selected highlight snippets per result (default true).",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+
+def web_search_enabled() -> bool:
+    """Whether the tool should be registered — depends on the env var only."""
+    return bool(os.environ.get(ENV_VAR))
diff --git a/pyproject.toml b/pyproject.toml
index c0f7abfa..1842fc55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,8 @@ dependencies = [
     "uvicorn[standard]>=0.32.0",
     "httpx>=0.27.0",
     "websockets>=13.0",
+    # Optional integration: Exa web search (activated when EXA_API_KEY is set)
+    "exa-py>=2.0.0",
 ]
 
 [project.optional-dependencies]
@@ -40,6 +42,7 @@ eval = [
 # Development and testing dependencies
 dev = [
     "pytest>=9.0.2",
+    "pytest-asyncio>=1.3.0",
 ]
 
 # All dependencies (eval + dev)
diff --git a/tests/unit/test_web_search_tool.py b/tests/unit/test_web_search_tool.py
new file mode 100644
index 00000000..85161b88
--- /dev/null
+++ b/tests/unit/test_web_search_tool.py
@@ -0,0 +1,338 @@
+"""Tests for agent/tools/web_search_tool.py — Exa-backed web search."""
+
+from __future__ import annotations
+
+import os
+import sys
+import types
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Ensure the project root is importable (tests/unit is two levels below root).
+_ROOT = Path(__file__).resolve().parent.parent.parent
+if str(_ROOT) not in sys.path:
+    sys.path.insert(0, str(_ROOT))
+
+# Stub exa_py so tests do not require the package to be installed.
+if "exa_py" not in sys.modules:
+    stub = types.ModuleType("exa_py")
+    stub.Exa = MagicMock()  # type: ignore[attr-defined]
+    sys.modules["exa_py"] = stub
+
+from agent.tools.web_search_tool import (  # noqa: E402
+    ENV_VAR,
+    INTEGRATION_NAME,
+    WEB_SEARCH_TOOL_SPEC,
+    WebSearchResult,
+    _coerce_result,
+    _format_results,
+    web_search_enabled,
+    web_search_handler,
+)
+
+
+# ---------------------------------------------------------------------------
+# Env gating
+# ---------------------------------------------------------------------------
+
+
+def test_enabled_only_when_env_var_set(monkeypatch):
+    monkeypatch.delenv(ENV_VAR, raising=False)
+    assert web_search_enabled() is False
+
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+    assert web_search_enabled() is True
+
+
+@pytest.mark.asyncio
+async def test_handler_refuses_without_api_key(monkeypatch):
+    monkeypatch.delenv(ENV_VAR, raising=False)
+    output, ok = await web_search_handler({"query": "anything"})
+    assert ok is False
+    assert ENV_VAR in output
+
+
+# ---------------------------------------------------------------------------
+# Argument validation
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_handler_requires_query(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+    output, ok = await web_search_handler({"query": "   "})
+    assert ok is False
+    assert "query" in output.lower()
+
+
+@pytest.mark.asyncio
+async def test_handler_rejects_invalid_type(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+    output, ok = await web_search_handler({"query": "x", "type": "keyword"})
+    assert ok is False
+    assert "type" in output.lower()
+
+
+@pytest.mark.asyncio
+async def test_handler_rejects_invalid_category(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+    output, ok = await web_search_handler({"query": "x", "category": "bogus"})
+    assert ok is False
+    assert "category" in output.lower()
+
+
+@pytest.mark.asyncio
+async def test_handler_rejects_non_list_domains(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+    output, ok = await web_search_handler(
+        {"query": "x", "include_domains": "arxiv.org"}
+    )
+    assert ok is False
+    assert "include_domains" in output
+
+
+# ---------------------------------------------------------------------------
+# Result parsing and snippet cascade
+# ---------------------------------------------------------------------------
+
+
+def test_coerce_result_from_object():
+    raw = types.SimpleNamespace(
+        title="A paper",
+        url="https://example.com/paper",
+        published_date="2024-06-01",
+        author="Author",
+        score=0.83,
+        summary="A short summary.",
+        highlights=["h1", "h2"],
+        text="Full text body",
+    )
+    r = _coerce_result(raw)
+    assert r.title == "A paper"
+    assert r.url == "https://example.com/paper"
+    assert r.published_date == "2024-06-01"
+    assert r.summary == "A short summary."
+    assert r.highlights == ["h1", "h2"]
+
+
+def test_coerce_result_from_dict_with_camel_case_date():
+    raw = {
+        "title": "T",
+        "url": "https://x",
+        "publishedDate": "2025-01-01",
+    }
+    r = _coerce_result(raw)
+    assert r.published_date == "2025-01-01"
+
+
+def test_snippet_prefers_summary():
+    r = WebSearchResult(
+        title="t",
+        url="u",
+        summary="summary wins",
+        highlights=["h1", "h2"],
+        text="text loses",
+    )
+    assert r.snippet() == "summary wins"
+
+
+def test_snippet_falls_back_to_highlights_then_text():
+    r1 = WebSearchResult(title="t", url="u", highlights=["h1", "h2"], text="text")
+    assert "h1" in r1.snippet() and "h2" in r1.snippet()
+
+    r2 = WebSearchResult(title="t", url="u", text="just text")
+    assert r2.snippet() == "just text"
+
+    r3 = WebSearchResult(title="t", url="u")
+    assert r3.snippet() == ""
+
+
+def test_snippet_truncates_long_content():
+    long_summary = "x" * 1000
+    r = WebSearchResult(title="t", url="u", summary=long_summary)
+    snippet = r.snippet(max_characters=50)
+    assert len(snippet) <= 50
+    assert snippet.endswith("…")
+
+
+def test_format_results_empty():
+    out = _format_results("no hits", [], "auto", None)
+    assert "No web results" in out
+
+
+def test_format_results_shows_metadata_and_snippet():
+    results = [
+        WebSearchResult(
+            title="Cool blog post",
+            url="https://example.com/post",
+            published_date="2025-02-01",
+            author="Jane Doe",
+            score=0.91,
+            summary="TL;DR of the post.",
+        )
+    ]
+    out = _format_results("cool post", results, "auto", "neural")
+    assert "Cool blog post" in out
+    assert "https://example.com/post" in out
+    assert "2025-02-01" in out
+    assert "Jane Doe" in out
+    assert "0.91" in out
+    assert "TL;DR" in out
+    assert "auto → resolved=neural" in out
+
+
+# ---------------------------------------------------------------------------
+# Integration header + end-to-end handler path (with mocked Exa client)
+# ---------------------------------------------------------------------------
+
+
+class _FakeExa:
+    """Minimal Exa stand-in that records its call and returns canned results."""
+
+    last_instance: "_FakeExa | None" = None
+
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        self.headers: dict[str, str] = {}
+        self.search_calls: list[dict] = []
+        self.search_and_contents_calls: list[dict] = []
+        _FakeExa.last_instance = self
+
+    def _response(self):
+        return types.SimpleNamespace(
+            results=[
+                types.SimpleNamespace(
+                    title="Result 1",
+                    url="https://example.com/1",
+                    published_date="2025-03-01",
+                    author=None,
+                    score=0.7,
+                    summary="Summary 1",
+                    highlights=["hl1"],
+                    text=None,
+                )
+            ],
+            resolved_search_type="neural",
+        )
+
+    def search(self, **kwargs):
+        self.search_calls.append(kwargs)
+        return self._response()
+
+    def search_and_contents(self, **kwargs):
+        self.search_and_contents_calls.append(kwargs)
+        return self._response()
+
+
+@pytest.mark.asyncio
+async def test_handler_happy_path_sets_integration_header(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+
+    # Patch on the source module — the tool imports Exa locally via
+    # `from exa_py import Exa`, so monkeypatching the consuming namespace
+    # (agent.tools.web_search_tool.Exa) would miss it.
+    with patch("exa_py.Exa", _FakeExa):
+        output, ok = await web_search_handler(
+            {
+                "query": "flash attention",
+                "num_results": 3,
+                "type": "auto",
+                "category": "research paper",
+                "include_domains": ["arxiv.org"],
+                "start_published_date": "2024-01-01",
+            }
+        )
+
+    assert ok is True
+    assert "Result 1" in output
+    assert "https://example.com/1" in output
+
+    inst = _FakeExa.last_instance
+    assert inst is not None
+    # Integration attribution header must be set on every client.
+    assert inst.headers.get("x-exa-integration") == INTEGRATION_NAME
+    # Defaults enable summary + highlights, so search_and_contents is used.
+    assert len(inst.search_and_contents_calls) == 1
+    call = inst.search_and_contents_calls[0]
+    assert call["query"] == "flash attention"
+    assert call["num_results"] == 3
+    assert call["type"] == "auto"
+    assert call["category"] == "research paper"
+    assert call["include_domains"] == ["arxiv.org"]
+    assert call["start_published_date"] == "2024-01-01"
+    assert call.get("summary") is True
+    assert call.get("highlights") is True
+
+
+@pytest.mark.asyncio
+async def test_handler_plain_search_when_no_contents_requested(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+
+    with patch("exa_py.Exa", _FakeExa):
+        output, ok = await web_search_handler(
+            {
+                "query": "no extras",
+                "include_summary": False,
+                "include_highlights": False,
+                "include_text": False,
+            }
+        )
+
+    assert ok is True
+    inst = _FakeExa.last_instance
+    assert inst is not None
+    assert len(inst.search_calls) == 1
+    assert inst.search_and_contents_calls == []
+
+
+@pytest.mark.asyncio
+async def test_handler_caps_num_results(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+
+    with patch("exa_py.Exa", _FakeExa):
+        _, ok = await web_search_handler({"query": "x", "num_results": 9999})
+
+    assert ok is True
+    inst = _FakeExa.last_instance
+    assert inst is not None
+    call = inst.search_and_contents_calls[0]
+    assert 1 <= call["num_results"] <= 25
+
+
+# ---------------------------------------------------------------------------
+# Router gating
+# ---------------------------------------------------------------------------
+
+
+def test_tool_not_registered_when_api_key_unset(monkeypatch):
+    monkeypatch.delenv(ENV_VAR, raising=False)
+
+    # Import lazily to avoid pulling in heavy agent modules during collection.
+    from agent.core.tools import create_builtin_tools
+
+    names = {t.name for t in create_builtin_tools(local_mode=True)}
+    assert "web_search" not in names
+
+
+def test_tool_registered_when_api_key_set(monkeypatch):
+    monkeypatch.setenv(ENV_VAR, "sk-test")
+
+    from agent.core.tools import create_builtin_tools
+
+    names = {t.name for t in create_builtin_tools(local_mode=True)}
+    assert "web_search" in names
+
+
+# ---------------------------------------------------------------------------
+# Tool spec shape
+# ---------------------------------------------------------------------------
+
+
+def test_tool_spec_shape():
+    assert WEB_SEARCH_TOOL_SPEC["name"] == "web_search"
+    params = WEB_SEARCH_TOOL_SPEC["parameters"]
+    assert params["type"] == "object"
+    assert "query" in params["properties"]
+    assert params["required"] == ["query"]
diff --git a/uv.lock b/uv.lock
index 3ed178e5..0ca3911c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -726,6 +726,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
 ]
 
+[[package]]
+name = "exa-py"
+version = "2.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpcore" },
+    { name = "httpx" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/e4/11bbbc076ae420b9e00537945d48a03cb42cc6da63edc65bf50d23e4778e/exa_py-2.12.1.tar.gz", hash = "sha256:9ff1924fbfbcae822b20c0ddef0650fabc04ac75906b9153623eadc18135b7ce", size = 55792, upload-time = "2026-04-22T20:00:38.528Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/19/0a504b6ce7c468595cd0551f65e5c464832a1d3af8dc8acd681e21696a5f/exa_py-2.12.1-py3-none-any.whl", hash = "sha256:9e735802161482a7d5b231376257883cb4e34dbd6f75ded04ab1a5a171b69d9f", size = 74512, upload-time = "2026-04-22T20:00:34.326Z" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
@@ -998,6 +1016,7 @@ version = "0.1.0"
 source = { editable = "." }
 dependencies = [
     { name = "datasets" },
+    { name = "exa-py" },
     { name = "fastapi" },
     { name = "fastmcp" },
     { name = "httpx" },
@@ -1034,10 +1053,16 @@ eval = [
     { name = "tenacity" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "pytest-asyncio" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "datasets", specifier = ">=4.4.1" },
     { name = "datasets", marker = "extra == 'eval'", specifier = ">=4.3.0" },
+    { name = "exa-py", specifier = ">=2.0.0" },
     { name = "fastapi", specifier = ">=0.115.0" },
     { name = "fastmcp", specifier = ">=3.2.0" },
     { name = "hf-agent", extras = ["eval", "dev"], marker = "extra == 'all'" },
@@ -1062,6 +1087,9 @@ requires-dist = [
 ]
 provides-extras = ["eval", "dev", "all"]
 
+[package.metadata.requires-dev]
+dev = [{ name = "pytest-asyncio", specifier = ">=1.3.0" }]
+
 [[package]]
 name = "hf-xet"
 version = "1.2.0"
@@ -2773,6 +2801,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
 ]
 
+[[package]]
+name = "pytest-asyncio"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"