|
24 | 24 |
|
25 | 25 | # Import view models for type safety |
26 | 26 | from aperag.domains.retrieval.schemas import SearchResult |
27 | | -from aperag.domains.web_access.schemas import WebReadResponse, WebSearchResponse |
| 27 | +from aperag.domains.web_access.schemas import WebReadResponse |
28 | 28 |
|
29 | 29 | logger = logging.getLogger(__name__) |
30 | 30 |
|
@@ -104,7 +104,15 @@ async def search_collection( |
104 | 104 | topk: int = 5, |
105 | 105 | query_keywords: list[str] = None, |
106 | 106 | ) -> Dict[str, Any]: |
107 | | - """Search a persistent knowledge base for evidence relevant to the current request. |
| 107 | + """[DEPRECATED] Search a persistent knowledge base for evidence relevant to the current request. |
| 108 | +
|
| 109 | + [DEPRECATED] Phase 9 D10.d (#96, ``docs/modularization/d10-design-pack.md`` |
| 110 | + §B.5 / §H.1): use the discrete split tools instead — |
| 111 | + ``vector_search`` / ``graph_search`` / ``fulltext_search``. This |
| 112 | + omnibus tool is preserved as a deprecated alias for backward |
| 113 | + compatibility during the D10 migration window and will be removed in |
| 114 | + D11 once telemetry confirms no remaining external callers (D10.h |
| 115 | + cutover lane). Implementation is intentionally untouched. |
108 | 116 |
|
109 | 117 | Use this when: |
110 | 118 | - You already know which collection should be searched. |
@@ -265,7 +273,14 @@ async def search_chat_files( |
265 | 273 | rerank: bool = True, |
266 | 274 | topk: int = 5, |
267 | 275 | ) -> Dict[str, Any]: |
268 | | - """Search files uploaded in the current chat for evidence relevant to this turn. |
| 276 | + """[DEPRECATED] Search files uploaded in the current chat for evidence relevant to this turn. |
| 277 | +
|
| 278 | + [DEPRECATED] Phase 9 D10.d (#96, ``docs/modularization/d10-design-pack.md`` |
| 279 | + §H.2): chat-scoped omnibus search shares the deprecation timeline of |
| 280 | + ``search_collection``. The split tool surface (``vector_search`` / |
| 281 | + ``graph_search`` / ``fulltext_search``) is collection-scoped today; |
| 282 | + a chat-scoped equivalent will be sequenced in the D10.h cutover |
| 283 | + lane. Implementation is intentionally untouched. |
269 | 284 |
|
270 | 285 | Use this when: |
271 | 286 | - The user refers to files shared in this chat session. |
@@ -367,116 +382,12 @@ async def search_chat_files( |
367 | 382 | return {"error": str(e)} |
368 | 383 |
|
369 | 384 |
|
370 | | -@mcp_server.tool |
371 | | -async def web_search( |
372 | | - query: str = "", |
373 | | - max_results: int = 5, |
374 | | - timeout: int = 30, |
375 | | - locale: str = "en-US", |
376 | | - source: str = "", |
377 | | -) -> Dict[str, Any]: |
378 | | - """Search the web for current or missing information. |
379 | | -
|
380 | | - Use this when: |
381 | | - - The current turn allows web access. |
382 | | - - You need current information, external verification, or gap-filling beyond ApeRAG collections. |
383 | | -
|
384 | | - Do not use this when: |
385 | | - - The current turn disables web access. |
386 | | - - Collection or chat-file evidence is already sufficient for the requested step. |
387 | | -
|
388 | | - What success means: |
389 | | - - You received candidate web results with titles, snippets, and URLs. |
390 | | -
|
391 | | - What an empty result means: |
392 | | - - No strong web results were found for this query and scope. |
393 | | - - Use `meta.search_status` to distinguish a genuine empty result from `unavailable` or `disabled`. |
394 | | -
|
395 | | - What failure may mean: |
396 | | - - network / timeout: external search could not complete. |
397 | | - - upstream search provider issue: the search backend could not return usable results. |
398 | | -
|
399 | | - How to explain this step to the user: |
400 | | - - While running: "Searching the web for current or missing information." |
401 | | - - After completion: "Checked web sources for supporting information." |
402 | | -
|
403 | | - Args: |
404 | | - query: Search query for web search. Optional when using source-only site browsing. |
405 | | - max_results: Maximum number of results to return (default: 5) |
406 | | - timeout: Request timeout in seconds (default: 30) |
407 | | - locale: Browser locale (default: en-US) |
408 | | - source: Optional domain or URL for site-specific filtering. When provided with query, |
409 | | - limits search results to this domain (e.g., 'site:vercel.com query'). |
410 | | -
|
411 | | - Returns: |
412 | | - Web search results with URLs, titles, snippets, and metadata |
413 | | -
|
414 | | - Note: |
415 | | - Uses JINA first when configured, otherwise falls back to DuckDuckGo. |
416 | | - Search failures are soft-failed into empty result sets with lightweight `meta` diagnostics so |
417 | | - downstream workflows stay stable while still distinguishing `ok`, `empty`, `unavailable`, and `disabled`. |
418 | | - """ |
419 | | - try: |
420 | | - api_key = get_api_key() |
421 | | - logger.info( |
422 | | - "MCP web_search request query=%s source=%s max_results=%s timeout=%s locale=%s", |
423 | | - query.strip() if query else "", |
424 | | - source.strip() if source else "", |
425 | | - max_results, |
426 | | - timeout, |
427 | | - locale, |
428 | | - ) |
429 | | - |
430 | | - # Build search request |
431 | | - search_data = { |
432 | | - "max_results": max_results, |
433 | | - "timeout": timeout, |
434 | | - "locale": locale, |
435 | | - } |
436 | | - |
437 | | - # Only include non-empty optional parameters |
438 | | - if query and query.strip(): |
439 | | - search_data["query"] = query.strip() |
440 | | - |
441 | | - if source and source.strip(): |
442 | | - search_data["source"] = source.strip() |
443 | | - |
444 | | - # Use longer timeout for web search operations |
445 | | - async with httpx.AsyncClient(timeout=90.0) as client: |
446 | | - response = await client.post( |
447 | | - f"{API_BASE_URL}/api/v2/web/search", |
448 | | - headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, |
449 | | - json=search_data, |
450 | | - ) |
451 | | - if response.status_code == 200: |
452 | | - try: |
453 | | - # Parse response using view model for type safety |
454 | | - search_response = WebSearchResponse.model_validate(response.json()) |
455 | | - logger.info( |
456 | | - "MCP web_search completed query=%s source=%s status=%s results=%s providers=%s backends=%s fallback=%s", |
457 | | - query.strip() if query else "", |
458 | | - source.strip() if source else "", |
459 | | - search_response.meta.search_status if search_response.meta else "unknown", |
460 | | - len(search_response.results), |
461 | | - search_response.meta.provider_used if search_response.meta else [], |
462 | | - search_response.meta.backend_used if search_response.meta else [], |
463 | | - search_response.meta.fallback_used if search_response.meta else False, |
464 | | - ) |
465 | | - return search_response.model_dump() |
466 | | - except Exception as e: |
467 | | - logger.error(f"Failed to parse web search response: {e}") |
468 | | - return {"error": "Failed to parse web search response", "details": str(e)} |
469 | | - else: |
470 | | - logger.warning( |
471 | | - "MCP web_search failed status=%s query=%s source=%s body=%s", |
472 | | - response.status_code, |
473 | | - query.strip() if query else "", |
474 | | - source.strip() if source else "", |
475 | | - response.text, |
476 | | - ) |
477 | | - return {"error": f"Web search failed: {response.status_code}", "details": response.text} |
478 | | - except ValueError as e: |
479 | | - return {"error": str(e)} |
| 385 | +# NOTE(D10.d #96 §B.4): the ``web_search`` tool implementation moved to |
| 386 | +# ``aperag.mcp.tools.search_web`` so all D10 search tools live in the |
| 387 | +# ``aperag/mcp/tools/`` subpackage. Wire signature is preserved (no |
| 388 | +# breaking change for external MCP callers); §B.4 spec parameter |
| 389 | +# canonicalization (``top_k`` / kw-only / ``source: str | None``) is |
| 390 | +# deferred to the D10.h cutover lane. |
480 | 391 |
|
481 | 392 |
|
482 | 393 | @mcp_server.tool |
@@ -844,5 +755,23 @@ def get_api_key() -> str: |
844 | 755 | ) |
845 | 756 |
|
846 | 757 |
|
| 758 | +# Phase 9 D10.d (#96, ``docs/modularization/d10-design-pack.md`` §B): |
| 759 | +# import the split search tool functions so their ``@mcp_server.tool`` |
| 760 | +# decorators register the new surface (``vector_search`` / |
| 761 | +# ``graph_search`` / ``fulltext_search`` / ``web_search``). The imports |
| 762 | +# happen at the bottom of this module — after ``mcp_server``, |
| 763 | +# ``API_BASE_URL``, and ``get_api_key`` are defined — to break the |
| 764 | +# circular import cycle (``aperag.mcp.tools.search_*`` import from |
| 765 | +# ``aperag.mcp.server``). |
| 766 | +# |
| 767 | +# Re-exporting the function symbols at module level preserves the |
| 768 | +# existing ``aperag.mcp.server.web_search`` access path for backward |
| 769 | +# compatibility with callers (e.g. ``tests/unit_test/test_mcp_server.py``) |
| 770 | +# that read attributes off the server module directly. |
| 771 | +from aperag.mcp.tools.search_fulltext import fulltext_search # noqa: E402, F401 |
| 772 | +from aperag.mcp.tools.search_graph import graph_search # noqa: E402, F401 |
| 773 | +from aperag.mcp.tools.search_vector import vector_search # noqa: E402, F401 |
| 774 | +from aperag.mcp.tools.search_web import web_search # noqa: E402, F401 |
| 775 | + |
847 | 776 | # Export the server instance |
848 | 777 | __all__ = ["mcp_server"] |
0 commit comments