Skip to content

Commit 0b0af1a

Browse files
authored
feat: add doc search tool (#101)
1 parent db05232 commit 0b0af1a

3 files changed

Lines changed: 456 additions & 0 deletions

File tree

src/deepset_mcp/main.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import logging
23
import os
34
from pathlib import Path
45

@@ -11,6 +12,10 @@
1112
get_latest_custom_component_installation_logs as get_latest_custom_component_installation_logs_tool,
1213
list_custom_component_installations as list_custom_component_installations_tool,
1314
)
15+
from deepset_mcp.tools.doc_search import (
16+
get_docs_config,
17+
search_docs as search_docs_tool,
18+
)
1419
from deepset_mcp.tools.haystack_service import (
1520
get_component_definition as get_component_definition_tool,
1621
get_custom_components as get_custom_components_tool,
@@ -434,6 +439,41 @@ async def search_pipeline(pipeline_name: str, query: str) -> str:
434439
return response
435440

436441

442+
# Check if docs search should be enabled
443+
docs_config = get_docs_config()
444+
if docs_config:
445+
docs_workspace, docs_pipeline_name, docs_api_key = docs_config
446+
447+
async def search_docs(query: str) -> str:
448+
"""Search the deepset platform documentation.
449+
450+
This tool allows you to search through deepset's official documentation to find
451+
information about features, API usage, best practices, and troubleshooting guides.
452+
Use this when you need to look up specific deepset functionality or help users
453+
understand how to use deepset features.
454+
455+
:param query: The search query to execute against the documentation.
456+
:returns: The formatted search results from the documentation.
457+
"""
458+
async with AsyncDeepsetClient(api_key=docs_api_key) as client:
459+
response = await search_docs_tool(
460+
client=client,
461+
workspace=docs_workspace,
462+
pipeline_name=docs_pipeline_name,
463+
query=query,
464+
)
465+
return response
466+
467+
# Add the tool to the server
468+
mcp.add_tool(search_docs)
469+
470+
else:
471+
logging.warning(
472+
"Documentation search tool not enabled. To enable, set the following environment variables: "
473+
"DEEPSET_DOCS_WORKSPACE, DEEPSET_DOCS_PIPELINE_NAME, DEEPSET_DOCS_API_KEY"
474+
)
475+
476+
437477
def main() -> None:
438478
"""Entrypoint for the deepset MCP server."""
439479
parser = argparse.ArgumentParser(description="Run the Deepset MCP server.")
@@ -447,11 +487,27 @@ def main() -> None:
447487
"-k",
448488
help="Deepset API key (env DEEPSET_API_KEY)",
449489
)
490+
parser.add_argument(
491+
"--docs-workspace",
492+
help="Deepset docs search workspace (env DEEPSET_DOCS_WORKSPACE)",
493+
)
494+
parser.add_argument(
495+
"--docs-pipeline-name",
496+
help="Deepset docs pipeline name (env DEEPSET_DOCS_PIPELINE_NAME)",
497+
)
498+
parser.add_argument(
499+
"--docs-api-key",
500+
help="Deepset docs pipeline API key (env DEEPSET_DOCS_API_KEY)",
501+
)
450502
args = parser.parse_args()
451503

452504
# prefer flags, fallback to env
453505
workspace = args.workspace or os.getenv("DEEPSET_WORKSPACE")
454506
api_key = args.api_key or os.getenv("DEEPSET_API_KEY")
507+
docs_workspace = args.docs_workspace or os.getenv("DEEPSET_DOCS_WORKSPACE")
508+
docs_pipeline_name = args.docs_pipeline_name or os.getenv("DEEPSET_DOCS_PIPELINE_NAME")
509+
docs_api_key = args.docs_api_key or os.getenv("DEEPSET_DOCS_API_KEY")
510+
455511
if not workspace:
456512
parser.error("Missing workspace: set --workspace or DEEPSET_WORKSPACE")
457513
if not api_key:
@@ -461,6 +517,14 @@ def main() -> None:
461517
os.environ["DEEPSET_WORKSPACE"] = workspace
462518
os.environ["DEEPSET_API_KEY"] = api_key
463519

520+
# Set docs environment variables if provided
521+
if docs_workspace:
522+
os.environ["DEEPSET_DOCS_WORKSPACE"] = docs_workspace
523+
if docs_pipeline_name:
524+
os.environ["DEEPSET_DOCS_PIPELINE_NAME"] = docs_pipeline_name
525+
if docs_api_key:
526+
os.environ["DEEPSET_DOCS_API_KEY"] = docs_api_key
527+
464528
# run with SSE transport (HTTP+Server-Sent Events)
465529
mcp.run(transport="stdio")
466530

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import os
2+
3+
from deepset_mcp.api.exceptions import BadRequestError, ResourceNotFoundError, UnexpectedAPIError
4+
from deepset_mcp.api.pipeline.models import DeepsetSearchResponse
5+
from deepset_mcp.api.protocols import AsyncClientProtocol
6+
7+
8+
def doc_search_results_to_llm_readable_string(results: DeepsetSearchResponse) -> str:
9+
"""Formats results of the doc search pipeline so that they can be read by an LLM.
10+
11+
:param results: DeepsetSearchResponse object
12+
:return: Formatted results.
13+
"""
14+
file_segmented_docs = []
15+
16+
previous_source_id = None
17+
for doc in results.documents:
18+
if previous_source_id != doc.meta["source_id"]:
19+
file_segmented_docs.append([{"content": doc.content, "file_path": doc.meta.get("original_file_path", "")}])
20+
previous_source_id = doc.meta.get("source_id")
21+
else:
22+
file_segmented_docs[-1].append(
23+
{"content": doc.content, "file_path": doc.meta.get("original_file_path", "")}
24+
)
25+
26+
files = []
27+
for file_docs in file_segmented_docs:
28+
start = file_docs[0]["file_path"]
29+
full_doc = " ".join([doc["content"] for doc in file_docs])
30+
files.append(start + "\n" + full_doc)
31+
32+
return "\n----\n".join(files)
33+
34+
35+
async def search_docs(
36+
client: AsyncClientProtocol,
37+
workspace: str,
38+
pipeline_name: str,
39+
query: str,
40+
) -> str:
41+
"""Search deepset documentation using a dedicated docs pipeline.
42+
43+
Uses the specified pipeline to perform a search with the given query against the deepset
44+
documentation. Before executing the search, checks if the pipeline is deployed (status = DEPLOYED).
45+
Returns search results in a human-readable format.
46+
47+
:param client: The async client for API communication.
48+
:param workspace: The workspace name for the docs pipeline.
49+
:param pipeline_name: Name of the pipeline to use for doc search.
50+
:param query: The search query to execute.
51+
:returns: A string containing the formatted search results or error message.
52+
"""
53+
try:
54+
# First, check if the pipeline exists and get its status
55+
pipeline = await client.pipelines(workspace=workspace).get(pipeline_name=pipeline_name)
56+
57+
# Check if pipeline is deployed
58+
if pipeline.status != "DEPLOYED":
59+
return f"Documentation pipeline '{pipeline_name}' is not deployed (current status: {pipeline.status})."
60+
61+
# Execute the search
62+
search_response = await client.pipelines(workspace=workspace).search(pipeline_name=pipeline_name, query=query)
63+
64+
return doc_search_results_to_llm_readable_string(search_response)
65+
66+
except ResourceNotFoundError:
67+
return f"There is no documentation pipeline named '{pipeline_name}' in workspace '{workspace}'."
68+
except BadRequestError as e:
69+
return f"Failed to search documentation using pipeline '{pipeline_name}': {e}"
70+
except UnexpectedAPIError as e:
71+
return f"Failed to search documentation using pipeline '{pipeline_name}': {e}"
72+
except Exception as e:
73+
return f"An unexpected error occurred while searching documentation with pipeline '{pipeline_name}': {str(e)}"
74+
75+
76+
def get_docs_config() -> tuple[str, str, str] | None:
77+
"""Get docs search configuration from environment variables.
78+
79+
:returns: Tuple of (workspace, pipeline_name, api_key) if all are available, None otherwise.
80+
"""
81+
workspace = os.environ.get("DEEPSET_DOCS_WORKSPACE")
82+
pipeline_name = os.environ.get("DEEPSET_DOCS_PIPELINE_NAME")
83+
api_key = os.environ.get("DEEPSET_DOCS_API_KEY")
84+
85+
if workspace and pipeline_name and api_key:
86+
return workspace, pipeline_name, api_key
87+
88+
return None

0 commit comments

Comments
 (0)