Skip to content

Commit 69796ce

Browse files
feat(brave): add BraveWebSearch integration
Implements BraveWebSearch component for Haystack using the Brave Search API. Follows the same structure as TavilyWebSearch. - sync run() and async run_async() via httpx - top_k, country, search_lang, extra_params configuration - per-run top_k override - returns documents (content = description) and links - 11 unit tests, 2 integration tests (skipped without API key) Closes #3236
1 parent 9d7f6de commit 69796ce

8 files changed

Lines changed: 580 additions & 0 deletions

File tree

integrations/brave/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Changelog
2+
3+
## [Unreleased]
4+
5+
### Added
6+
- Initial release of `brave-search-haystack` integration
7+
- `BraveWebSearch` component with sync `run()` and async `run_async()` support

integrations/brave/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# brave-search-haystack
2+
3+
[![PyPI - Version](https://img.shields.io/pypi/v/brave-search-haystack.svg)](https://pypi.org/project/brave-search-haystack)
4+
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/brave-search-haystack.svg)](https://pypi.org/project/brave-search-haystack)
5+
6+
Haystack integration for [Brave Search API](https://brave.com/search/api/).
7+
8+
## Installation
9+
10+
```bash
11+
pip install brave-search-haystack
12+
```
13+
14+
## Usage
15+
16+
```python
17+
from haystack_integrations.components.websearch.brave import BraveWebSearch
18+
from haystack.utils import Secret
19+
20+
websearch = BraveWebSearch(
21+
api_key=Secret.from_env_var("BRAVE_API_KEY"),
22+
top_k=5,
23+
)
24+
result = websearch.run(query="What is Haystack by deepset?")
25+
documents = result["documents"]
26+
links = result["links"]
27+
```
28+
29+
## License
30+
31+
`brave-search-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license.

integrations/brave/pyproject.toml

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
[build-system]
2+
requires = ["hatchling", "hatch-vcs"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "brave-search-haystack"
7+
dynamic = ["version"]
8+
description = "Haystack integration for Brave Search API"
9+
readme = "README.md"
10+
requires-python = ">=3.10"
11+
license = "Apache-2.0"
12+
keywords = ["Haystack", "Brave", "Web Search", "AI Search"]
13+
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
14+
classifiers = [
15+
"License :: OSI Approved :: Apache Software License",
16+
"Development Status :: 4 - Beta",
17+
"Programming Language :: Python",
18+
"Programming Language :: Python :: 3.10",
19+
"Programming Language :: Python :: 3.11",
20+
"Programming Language :: Python :: 3.12",
21+
"Programming Language :: Python :: 3.13",
22+
"Programming Language :: Python :: 3.14",
23+
"Programming Language :: Python :: Implementation :: CPython",
24+
"Programming Language :: Python :: Implementation :: PyPy",
25+
]
26+
dependencies = ["haystack-ai>=2.24.1", "httpx>=0.27.0"]
27+
28+
[project.urls]
29+
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/brave#readme"
30+
Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
31+
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/brave"
32+
33+
[tool.hatch.build.targets.wheel]
34+
packages = ["src/haystack_integrations"]
35+
36+
[tool.hatch.version]
37+
source = "vcs"
38+
tag-pattern = 'integrations\/brave-v(?P<version>.*)'
39+
40+
[tool.hatch.version.raw-options]
41+
root = "../.."
42+
git_describe_command = 'git describe --tags --match="integrations/brave-v[0-9]*"'
43+
44+
[tool.hatch.envs.default]
45+
installer = "uv"
46+
dependencies = ["haystack-pydoc-tools", "ruff"]
47+
48+
[tool.hatch.envs.default.scripts]
49+
docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
50+
fmt = "ruff check --fix {args}; ruff format {args}"
51+
fmt-check = "ruff check {args} && ruff format --check {args}"
52+
53+
[tool.hatch.envs.test]
54+
dependencies = [
55+
"pytest",
56+
"pytest-asyncio",
57+
"pytest-cov",
58+
"pytest-rerunfailures",
59+
"mypy",
60+
"pip",
61+
]
62+
63+
[tool.hatch.envs.test.scripts]
64+
unit = 'pytest -m "not integration" {args:tests}'
65+
integration = 'pytest -m "integration" {args:tests}'
66+
all = 'pytest {args:tests}'
67+
unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
68+
integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
69+
types = "mypy -p haystack_integrations.components.websearch.brave {args}"
70+
71+
[tool.mypy]
72+
install_types = true
73+
non_interactive = true
74+
check_untyped_defs = true
75+
disallow_incomplete_defs = true
76+
77+
[[tool.mypy.overrides]]
78+
module = ["httpx.*"]
79+
ignore_missing_imports = true
80+
81+
[tool.ruff]
82+
line-length = 120
83+
84+
[tool.ruff.lint]
85+
select = [
86+
"A",
87+
"ANN",
88+
"ARG",
89+
"B",
90+
"C",
91+
"D102",
92+
"D103",
93+
"D205",
94+
"D209",
95+
"D213",
96+
"D417",
97+
"D419",
98+
"DTZ",
99+
"E",
100+
"EM",
101+
"F",
102+
"I",
103+
"ICN",
104+
"ISC",
105+
"N",
106+
"PLC",
107+
"PLE",
108+
"PLR",
109+
"PLW",
110+
"Q",
111+
"RUF",
112+
"S",
113+
"T",
114+
"TID",
115+
"UP",
116+
"W",
117+
"YTT",
118+
]
119+
ignore = [
120+
"B027",
121+
"B008",
122+
"S105",
123+
"S106",
124+
"S107",
125+
"C901",
126+
"PLR0911",
127+
"PLR0912",
128+
"PLR0913",
129+
"PLR0915",
130+
"ANN401",
131+
]
132+
133+
[tool.ruff.lint.isort]
134+
known-first-party = ["haystack_integrations"]
135+
136+
[tool.ruff.lint.flake8-tidy-imports]
137+
ban-relative-imports = "parents"
138+
139+
[tool.ruff.lint.per-file-ignores]
140+
"tests/**/*" = ["PLR2004", "S101", "TID252", "D", "ANN"]
141+
142+
[tool.coverage.run]
143+
source = ["haystack_integrations"]
144+
branch = true
145+
relative_files = true
146+
parallel = false
147+
148+
[tool.coverage.report]
149+
omit = ["*/tests/*", "*/__init__.py"]
150+
show_missing = true
151+
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
152+
153+
[tool.pytest.ini_options]
154+
addopts = "--strict-markers"
155+
markers = [
156+
"integration: integration tests",
157+
]
158+
log_cli = true
159+
asyncio_mode = "auto"
160+
asyncio_default_fixture_loop_scope = "function"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from haystack_integrations.components.websearch.brave.brave_websearch import BraveWebSearch
6+
7+
__all__ = ["BraveWebSearch"]
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from typing import Any
6+
7+
import httpx
8+
from haystack import Document, component, logging
9+
from haystack.utils import Secret
10+
11+
logger = logging.getLogger(__name__)
12+
13+
BRAVE_SEARCH_API_URL = "https://api.search.brave.com/res/v1/web/search"
14+
15+
16+
@component
17+
class BraveWebSearch:
18+
"""
19+
A component that uses the Brave Search API to search the web and return results as Haystack Documents.
20+
21+
You need a Brave Search API key from [brave.com/search/api](https://brave.com/search/api/).
22+
23+
### Usage example
24+
25+
```python
26+
from haystack_integrations.components.websearch.brave import BraveWebSearch
27+
from haystack.utils import Secret
28+
29+
websearch = BraveWebSearch(
30+
api_key=Secret.from_env_var("BRAVE_API_KEY"),
31+
top_k=5,
32+
)
33+
result = websearch.run(query="What is Haystack by deepset?")
34+
documents = result["documents"]
35+
links = result["links"]
36+
```
37+
"""
38+
39+
def __init__(
40+
self,
41+
api_key: Secret = Secret.from_env_var("BRAVE_API_KEY"),
42+
top_k: int | None = 10,
43+
country: str | None = None,
44+
search_lang: str | None = None,
45+
extra_params: dict[str, Any] | None = None,
46+
) -> None:
47+
"""
48+
Initialize the BraveWebSearch component.
49+
50+
:param api_key:
51+
Brave Search API key. Defaults to the `BRAVE_API_KEY` environment variable.
52+
:param top_k:
53+
Maximum number of results to return. Maps to the `count` parameter in the Brave API.
54+
:param country:
55+
2-letter country code to bias search results (e.g. `"US"`, `"DE"`).
56+
See [Brave API docs](https://api.search.brave.com/app/documentation/web-search/codes#country-codes)
57+
for supported values.
58+
:param search_lang:
59+
Language code for search results (e.g. `"en"`, `"de"`).
60+
:param extra_params:
61+
Additional query parameters passed directly to the Brave Search API.
62+
See the [Brave API reference](https://api.search.brave.com/app/documentation/web-search/query)
63+
for all available options.
64+
"""
65+
self.api_key = api_key
66+
self.top_k = top_k
67+
self.country = country
68+
self.search_lang = search_lang
69+
self.extra_params = extra_params
70+
71+
@component.output_types(documents=list[Document], links=list[str])
72+
def run(
73+
self,
74+
query: str,
75+
top_k: int | None = None,
76+
) -> dict[str, Any]:
77+
"""
78+
Search the web using Brave Search and return results as Documents.
79+
80+
:param query: Search query string.
81+
:param top_k:
82+
Optional per-run override of the maximum number of results.
83+
If not provided, the init-time `top_k` is used.
84+
:returns: A dictionary with:
85+
- `documents`: List of Documents containing search result content.
86+
- `links`: List of URLs from the search results.
87+
"""
88+
params = self._build_params(query=query, top_k=top_k)
89+
headers = self._build_headers()
90+
91+
with httpx.Client() as client:
92+
response = client.get(BRAVE_SEARCH_API_URL, params=params, headers=headers)
93+
response.raise_for_status()
94+
95+
return self._parse_response(response.json())
96+
97+
@component.output_types(documents=list[Document], links=list[str])
98+
async def run_async(
99+
self,
100+
query: str,
101+
top_k: int | None = None,
102+
) -> dict[str, Any]:
103+
"""
104+
Asynchronously search the web using Brave Search and return results as Documents.
105+
106+
:param query: Search query string.
107+
:param top_k:
108+
Optional per-run override of the maximum number of results.
109+
If not provided, the init-time `top_k` is used.
110+
:returns: A dictionary with:
111+
- `documents`: List of Documents containing search result content.
112+
- `links`: List of URLs from the search results.
113+
"""
114+
params = self._build_params(query=query, top_k=top_k)
115+
headers = self._build_headers()
116+
117+
async with httpx.AsyncClient() as client:
118+
response = await client.get(BRAVE_SEARCH_API_URL, params=params, headers=headers)
119+
response.raise_for_status()
120+
121+
return self._parse_response(response.json())
122+
123+
def _build_headers(self) -> dict[str, str]:
124+
return {
125+
"Accept": "application/json",
126+
"Accept-Encoding": "gzip",
127+
"X-Subscription-Token": self.api_key.resolve_value() or "",
128+
}
129+
130+
def _build_params(self, query: str, top_k: int | None) -> dict[str, Any]:
131+
effective_top_k = top_k if top_k is not None else self.top_k
132+
params: dict[str, Any] = {"q": query}
133+
if effective_top_k is not None:
134+
params["count"] = effective_top_k
135+
if self.country is not None:
136+
params["country"] = self.country
137+
if self.search_lang is not None:
138+
params["search_lang"] = self.search_lang
139+
if self.extra_params:
140+
params.update(self.extra_params)
141+
return params
142+
143+
@staticmethod
144+
def _parse_response(response: dict[str, Any]) -> dict[str, Any]:
145+
"""
146+
Convert a Brave Search API response to Haystack Documents and links.
147+
148+
:param response: Brave Search API response dictionary.
149+
:returns: Dictionary with `documents` and `links` keys.
150+
"""
151+
documents: list[Document] = []
152+
links: list[str] = []
153+
154+
for result in response.get("web", {}).get("results", []):
155+
url = result.get("url", "")
156+
title = result.get("title", "")
157+
description = result.get("description", "")
158+
159+
documents.append(Document(content=description, meta={"title": title, "url": url}))
160+
if url:
161+
links.append(url)
162+
163+
return {"documents": documents, "links": links}

integrations/brave/src/haystack_integrations/components/websearch/py.typed

Whitespace-only changes.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0

0 commit comments

Comments
 (0)