|
22 | 22 |
|
23 | 23 | # Import view models for type safety |
24 | 24 | from aperag.domains.retrieval.schemas import SearchResult |
25 | | -from aperag.domains.web_access.schemas import WebReadResponse, WebSearchResponse |
| 25 | +from aperag.domains.web_access.schemas import WebReadResponse |
26 | 26 | from aperag.mcp.tools import ( |
27 | 27 | ByteRange, |
28 | 28 | ) |
@@ -236,7 +236,15 @@ async def search_collection( |
236 | 236 | topk: int = 5, |
237 | 237 | query_keywords: list[str] = None, |
238 | 238 | ) -> Dict[str, Any]: |
239 | | - """Search a persistent knowledge base for evidence relevant to the current request. |
| 239 | + """[DEPRECATED] Search a persistent knowledge base for evidence relevant to the current request. |
| 240 | +
|
| 241 | + [DEPRECATED] Phase 9 D10.d (#96, ``docs/modularization/d10-design-pack.md`` |
| 242 | + §B.5 / §H.1): use the discrete split tools instead — |
| 243 | + ``vector_search`` / ``graph_search`` / ``fulltext_search``. This |
| 244 | + omnibus tool is preserved as a deprecated alias for backward |
| 245 | + compatibility during the D10 migration window and will be removed in |
| 246 | + D11 once telemetry confirms no remaining external callers (D10.h |
| 247 | + cutover lane). Implementation is intentionally untouched. |
240 | 248 |
|
241 | 249 | Use this when: |
242 | 250 | - You already know which collection should be searched. |
@@ -397,7 +405,14 @@ async def search_chat_files( |
397 | 405 | rerank: bool = True, |
398 | 406 | topk: int = 5, |
399 | 407 | ) -> Dict[str, Any]: |
400 | | - """Search files uploaded in the current chat for evidence relevant to this turn. |
| 408 | + """[DEPRECATED] Search files uploaded in the current chat for evidence relevant to this turn. |
| 409 | +
|
| 410 | + [DEPRECATED] Phase 9 D10.d (#96, ``docs/modularization/d10-design-pack.md`` |
| 411 | + §H.2): chat-scoped omnibus search shares the deprecation timeline of |
| 412 | + ``search_collection``. The split tool surface (``vector_search`` / |
| 413 | + ``graph_search`` / ``fulltext_search``) is collection-scoped today; |
| 414 | + a chat-scoped equivalent will be sequenced in the D10.h cutover |
| 415 | + lane. Implementation is intentionally untouched. |
401 | 416 |
|
402 | 417 | Use this when: |
403 | 418 | - The user refers to files shared in this chat session. |
@@ -499,116 +514,12 @@ async def search_chat_files( |
499 | 514 | return {"error": str(e)} |
500 | 515 |
|
501 | 516 |
|
502 | | -@mcp_server.tool |
503 | | -async def web_search( |
504 | | - query: str = "", |
505 | | - max_results: int = 5, |
506 | | - timeout: int = 30, |
507 | | - locale: str = "en-US", |
508 | | - source: str = "", |
509 | | -) -> Dict[str, Any]: |
510 | | - """Search the web for current or missing information. |
511 | | -
|
512 | | - Use this when: |
513 | | - - The current turn allows web access. |
514 | | - - You need current information, external verification, or gap-filling beyond ApeRAG collections. |
515 | | -
|
516 | | - Do not use this when: |
517 | | - - The current turn disables web access. |
518 | | - - Collection or chat-file evidence is already sufficient for the requested step. |
519 | | -
|
520 | | - What success means: |
521 | | - - You received candidate web results with titles, snippets, and URLs. |
522 | | -
|
523 | | - What an empty result means: |
524 | | - - No strong web results were found for this query and scope. |
525 | | - - Use `meta.search_status` to distinguish a genuine empty result from `unavailable` or `disabled`. |
526 | | -
|
527 | | - What failure may mean: |
528 | | - - network / timeout: external search could not complete. |
529 | | - - upstream search provider issue: the search backend could not return usable results. |
530 | | -
|
531 | | - How to explain this step to the user: |
532 | | - - While running: "Searching the web for current or missing information." |
533 | | - - After completion: "Checked web sources for supporting information." |
534 | | -
|
535 | | - Args: |
536 | | - query: Search query for web search. Optional when using source-only site browsing. |
537 | | - max_results: Maximum number of results to return (default: 5) |
538 | | - timeout: Request timeout in seconds (default: 30) |
539 | | - locale: Browser locale (default: en-US) |
540 | | - source: Optional domain or URL for site-specific filtering. When provided with query, |
541 | | - limits search results to this domain (e.g., 'site:vercel.com query'). |
542 | | -
|
543 | | - Returns: |
544 | | - Web search results with URLs, titles, snippets, and metadata |
545 | | -
|
546 | | - Note: |
547 | | - Uses JINA first when configured, otherwise falls back to DuckDuckGo. |
548 | | - Search failures are soft-failed into empty result sets with lightweight `meta` diagnostics so |
549 | | - downstream workflows stay stable while still distinguishing `ok`, `empty`, `unavailable`, and `disabled`. |
550 | | - """ |
551 | | - try: |
552 | | - api_key = get_api_key() |
553 | | - logger.info( |
554 | | - "MCP web_search request query=%s source=%s max_results=%s timeout=%s locale=%s", |
555 | | - query.strip() if query else "", |
556 | | - source.strip() if source else "", |
557 | | - max_results, |
558 | | - timeout, |
559 | | - locale, |
560 | | - ) |
561 | | - |
562 | | - # Build search request |
563 | | - search_data = { |
564 | | - "max_results": max_results, |
565 | | - "timeout": timeout, |
566 | | - "locale": locale, |
567 | | - } |
568 | | - |
569 | | - # Only include non-empty optional parameters |
570 | | - if query and query.strip(): |
571 | | - search_data["query"] = query.strip() |
572 | | - |
573 | | - if source and source.strip(): |
574 | | - search_data["source"] = source.strip() |
575 | | - |
576 | | - # Use longer timeout for web search operations |
577 | | - async with httpx.AsyncClient(timeout=90.0) as client: |
578 | | - response = await client.post( |
579 | | - f"{API_BASE_URL}/api/v2/web/search", |
580 | | - headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, |
581 | | - json=search_data, |
582 | | - ) |
583 | | - if response.status_code == 200: |
584 | | - try: |
585 | | - # Parse response using view model for type safety |
586 | | - search_response = WebSearchResponse.model_validate(response.json()) |
587 | | - logger.info( |
588 | | - "MCP web_search completed query=%s source=%s status=%s results=%s providers=%s backends=%s fallback=%s", |
589 | | - query.strip() if query else "", |
590 | | - source.strip() if source else "", |
591 | | - search_response.meta.search_status if search_response.meta else "unknown", |
592 | | - len(search_response.results), |
593 | | - search_response.meta.provider_used if search_response.meta else [], |
594 | | - search_response.meta.backend_used if search_response.meta else [], |
595 | | - search_response.meta.fallback_used if search_response.meta else False, |
596 | | - ) |
597 | | - return search_response.model_dump() |
598 | | - except Exception as e: |
599 | | - logger.error(f"Failed to parse web search response: {e}") |
600 | | - return {"error": "Failed to parse web search response", "details": str(e)} |
601 | | - else: |
602 | | - logger.warning( |
603 | | - "MCP web_search failed status=%s query=%s source=%s body=%s", |
604 | | - response.status_code, |
605 | | - query.strip() if query else "", |
606 | | - source.strip() if source else "", |
607 | | - response.text, |
608 | | - ) |
609 | | - return {"error": f"Web search failed: {response.status_code}", "details": response.text} |
610 | | - except ValueError as e: |
611 | | - return {"error": str(e)} |
| 517 | +# NOTE(D10.d #96 §B.4): the ``web_search`` tool implementation moved to |
| 518 | +# ``aperag.mcp.tools.search_web`` so all D10 search tools live in the |
| 519 | +# ``aperag/mcp/tools/`` subpackage. Wire signature is preserved (no |
| 520 | +# breaking change for external MCP callers); §B.4 spec parameter |
| 521 | +# canonicalization (``top_k`` / kw-only / ``source: str | None``) is |
| 522 | +# deferred to the D10.h cutover lane. |
612 | 523 |
|
613 | 524 |
|
614 | 525 | @mcp_server.tool |
@@ -976,5 +887,23 @@ def get_api_key() -> str: |
976 | 887 | ) |
977 | 888 |
|
978 | 889 |
|
| 890 | +# Phase 9 D10.d (#96, ``docs/modularization/d10-design-pack.md`` §B): |
| 891 | +# import the split search tool functions so their ``@mcp_server.tool`` |
| 892 | +# decorators register the new surface (``vector_search`` / |
| 893 | +# ``graph_search`` / ``fulltext_search`` / ``web_search``). The imports |
| 894 | +# happen at the bottom of this module — after ``mcp_server``, |
| 895 | +# ``API_BASE_URL``, and ``get_api_key`` are defined — to break the |
| 896 | +# circular import cycle (``aperag.mcp.tools.search_*`` import from |
| 897 | +# ``aperag.mcp.server``). |
| 898 | +# |
| 899 | +# Re-exporting the function symbols at module level preserves the |
| 900 | +# existing ``aperag.mcp.server.web_search`` access path for backward |
| 901 | +# compatibility with callers (e.g. ``tests/unit_test/test_mcp_server.py``) |
| 902 | +# that read attributes off the server module directly. |
| 903 | +from aperag.mcp.tools.search_fulltext import fulltext_search # noqa: E402, F401 |
| 904 | +from aperag.mcp.tools.search_graph import graph_search # noqa: E402, F401 |
| 905 | +from aperag.mcp.tools.search_vector import vector_search # noqa: E402, F401 |
| 906 | +from aperag.mcp.tools.search_web import web_search # noqa: E402, F401 |
| 907 | + |
979 | 908 | # Export the server instance |
980 | 909 | __all__ = ["mcp_server"] |
0 commit comments