From 405909a2d49b4823b05983bf71c56d6b653db94c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=AD=E6=BD=87?= <1576730710@qq.com>
Date: Tue, 14 Oct 2025 10:08:50 +0800
Subject: [PATCH 1/2] =?UTF-8?q?[search=20online]=20=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E8=81=94=E7=BD=91=E6=90=9C=E7=B4=A2=E6=8F=92=E4=BB=B6=E6=80=A7?=
 =?UTF-8?q?=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/internet_search.py                    | 259 ++++++++++++------
 1 file changed, 172 insertions(+), 87 deletions(-)

diff --git a/app-builder/plugins/fit_py_internet_search/src/internet_search.py b/app-builder/plugins/fit_py_internet_search/src/internet_search.py
index 02038f66d0..9ef39f92c7 100644
--- a/app-builder/plugins/fit_py_internet_search/src/internet_search.py
+++ b/app-builder/plugins/fit_py_internet_search/src/internet_search.py
@@ -4,6 +4,7 @@
 # Licensed under the MIT License. See License.txt in the project root for license information.
 # ======================================================================================================================
 import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Sequence
 from linkup import LinkupClient
@@ -75,6 +76,148 @@ def _truncate(text: str, max_chars: int) -> str:
     return text[: max_chars - 1].rstrip() + "…"
 
 
+def _extract_summary(text: str, max_sentences: int = 4) -> str:
+    """
+    从文本中提取前几句话作为摘要
+
+    Args:
+        text: 原始文本
+        max_sentences: 最多保留的句子数，默认为4句
+
+    Returns:
+        摘要文本
+    """
+    if not text:
+        return ""
+
+    # 定义句子分隔符（支持中英文）
+    import re
+    # 使用正则表达式匹配句子结束符号
+    sentences = re.split(r'([。！？\.!?]+["\'»\)]?\s*)', text)
+
+    # 重新组合句子（将分隔符和句子内容合并）
+    combined_sentences = []
+    for i in range(0, len(sentences) - 1, 2):
+        sentence = sentences[i]
+        separator = sentences[i + 1] if i + 1 < len(sentences) else ""
+        combined = (sentence + separator).strip()
+        if combined:
+            combined_sentences.append(combined)
+
+    # 如果最后一个元素没有分隔符
+    if len(sentences) % 2 == 1 and sentences[-1].strip():
+        combined_sentences.append(sentences[-1].strip())
+
+    # 取前 max_sentences 句
+    if len(combined_sentences) <= max_sentences:
+        summary = " ".join(combined_sentences)
+    else:
+        summary = " ".join(combined_sentences[:max_sentences])
+
+    # 确保摘要不会过长（最多150字符）
+    if len(summary) > 150:
+        summary = summary[:147].rstrip() + "..."
+
+    return summary
+
+def _search_exa(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
+    """在 Exa 中搜索"""
+    items: List[SearchItem] = []
+    try:
+        exa_client = Exa(api_key=api_key)
+        res = exa_client.search_and_contents(
+            query,
+            text={"max_characters": 2000},
+            livecrawl="always",
+            num_results=max_results,
+        )
+        for i, r in enumerate(getattr(res, "results", [])[:max_results]):
+            text = _truncate(getattr(r, "text", "") or getattr(r, "content", "") or "", max_snippet_chars)
+            summary = _extract_summary(text)  # 提取3-4句话作为摘要
+            items.append(
+                SearchItem(
+                    id=getattr(r, "id", "") or f"exa_{i}",
+                    text=summary,
+                    score=12.0,
+                    metadata={
+                        "fileName": getattr(r, "title", "") or "",
+                        "url": getattr(r, "url", "") or "",
+                        "source": "exa",
+                        "published_date": getattr(r, "published_date", None),
+                        "summary": summary,
+                    }
+                )
+            )
+    except Exception as e:
+        sys_plugin_logger.warning(f'Failed to search in Exa tool: {str(e)}')
+    return items
+
+
+def _search_tavily(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
+    """在 Tavily 中搜索"""
+    items: List[SearchItem] = []
+    try:
+        tavily_client = TavilyClient(api_key=api_key)
+        res = tavily_client.search(
+            query=query,
+            max_results=max_results,
+            include_images=False,
+        )
+        for i, r in enumerate(res.get("results", [])[:max_results]):
+            text = _truncate(r.get("content", "") or "", max_snippet_chars)
+            summary = _extract_summary(text)  # 提取3-4句话作为摘要
+            items.append(
+                SearchItem(
+                    id=r.get("id", "") or f"tavily_{i}",
+                    text=summary,
+                    score=12.0,
+                    metadata={
+                        "fileName": r.get("title", "") or "",
+                        "url": r.get("url", "") or "",
+                        "source": "tavily",
+                        "published_date": r.get("published_date"),
+                        "summary": summary,
+                    }
+                )
+            )
+    except Exception as e:
+        sys_plugin_logger.warning(f'Failed to search in Tavily tool: {str(e)}')
+    return items
+
+
+def _search_linkup(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
+    """在 Linkup 中搜索"""
+    items: List[SearchItem] = []
+    try:
+        linkup_client = LinkupClient(api_key=api_key)
+        resp = linkup_client.search(
+            query=query,
+            depth="standard",
+            output_type="searchResults",
+            include_images=False,
+        )
+        for i, r in enumerate(getattr(resp, "results", [])[:max_results]):
+            text = _truncate(getattr(r, "content", "") or getattr(r, "text", "") or "", max_snippet_chars)
+            summary = _extract_summary(text)  # 提取3-4句话作为摘要
+            items.append(
+                SearchItem(
+                    id=getattr(r, "id", "") or f"linkup_{i}",
+                    text=summary,
+                    score=12.0,
+                    metadata={
+                        "fileName": getattr(r, "name", None) or getattr(r, "title", "") or "",
+                        "url": getattr(r, "url", "") or "",
+                        "source": "linkup",
+                        "published_date": None,
+                        "summary": summary,
+                    }
+                )
+            )
+    except Exception as e:
+        sys_plugin_logger.warning(f'Failed to search in Linkup tool: {str(e)}')
+    return items
+
+
 def _internet_search(
         query: str,
         api_keys: Dict[str, str],
@@ -88,97 +231,39 @@ def _internet_search(
         for name in ("exa", "tavily", "linkup"):
             if api_keys.get(name):
                 selected.append(name)
-    items: List[SearchItem] = []
-    errors = []  # 记录失败的搜索工具
 
-    # Exa
+    # 准备并行搜索任务
+    search_tasks = []
     if "exa" in selected and api_keys.get("exa"):
-        try:
-            exa_client = Exa(api_key=api_keys["exa"])
-            res = exa_client.search_and_contents(
-                query,
-                text={"max_characters": 2000},
-                livecrawl="always",
-                num_results=max_results_per_provider,
-            )
-            for i, r in enumerate(getattr(res, "results", [])[:max_results_per_provider]):
-                text = _truncate(getattr(r, "text", "") or getattr(r, "content", "") or "", max_snippet_chars)
-                items.append(
-                    SearchItem(
-                        id=getattr(r, "id", "") or f"exa_{i}",
-                        text=text,
-                        score=12.0,  # 使用float确保序列化
-                        metadata={
-                            "fileName": getattr(r, "title", "") or "",
-                            "url": getattr(r, "url", "") or "",
-                            "source": "exa",
-                            "published_date": getattr(r, "published_date", None),
-                            "summary": text,
-                        }
-                    )
-                )
-        except Exception as e:
-            sys_plugin_logger.warning(f'Failed to search in Exa tool: {str(e)}')
-            errors.append("exa")
-
-    # Tavily
+        search_tasks.append(("exa", _search_exa, api_keys["exa"]))
     if "tavily" in selected and api_keys.get("tavily"):
-        try:
-            tavily_client = TavilyClient(api_key=api_keys["tavily"])
-            res = tavily_client.search(
-                query=query,
-                max_results=max_results_per_provider,
-                include_images=False,
-            )
-            for i, r in enumerate(res.get("results", [])[:max_results_per_provider]):
-                text = _truncate(r.get("content", "") or "", max_snippet_chars)
-                items.append(
-                    SearchItem(
-                        id=r.get("id", "") or f"tavily_{i}",
-                        text=text,
-                        score=12.0,
-                        metadata={
-                            "fileName": r.get("title", "") or "",
-                            "url": r.get("url", "") or "",
-                            "source": "tavily",
-                            "published_date": r.get("published_date"),
-                            "summary": text,
-                        }
-                    )
-                )
-        except Exception as e:
-            sys_plugin_logger.warning(f'Failed to search in Tavily tool: {str(e)}')
-            errors.append("tavily")
-
-    # Linkup
+        search_tasks.append(("tavily", _search_tavily, api_keys["tavily"]))
     if "linkup" in selected and api_keys.get("linkup"):
-        try:
-            linkup_client = LinkupClient(api_key=api_keys["linkup"])
-            resp = linkup_client.search(
-                query=query,
-                depth="standard",
-                output_type="searchResults",
-                include_images=False,
-            )
-            for i, r in enumerate(getattr(resp, "results", [])[:max_results_per_provider]):
-                text = _truncate(getattr(r, "content", "") or getattr(r, "text", "") or "", max_snippet_chars)
-                items.append(
-                    SearchItem(
-                        id=getattr(r, "id", "") or f"linkup_{i}",
-                        text=text,
-                        score=12.0,
-                        metadata={
-                            "fileName": getattr(r, "name", None) or getattr(r, "title", "") or "",
-                            "url": getattr(r, "url", "") or "",
-                            "source": "linkup",
-                            "published_date": None,
-                            "summary": text,
-                        }
-                    )
-                )
-        except Exception as e:
-            sys_plugin_logger.warning(f'Failed to search in Linkup tool: {str(e)}')
-            errors.append("linkup")
+        search_tasks.append(("linkup", _search_linkup, api_keys["linkup"]))
+
+    # 使用线程池并行执行搜索
+    items: List[SearchItem] = []
+    errors = []
+
+    with ThreadPoolExecutor(max_workers=len(search_tasks)) as executor:
+        # 提交所有搜索任务
+        future_to_provider = {
+            executor.submit(task_func, query, api_key, max_results_per_provider, max_snippet_chars): provider_name
+            for provider_name, task_func, api_key in search_tasks
+        }
+
+        # 收集结果
+        for future in as_completed(future_to_provider):
+            provider_name = future_to_provider[future]
+            try:
+                results = future.result()
+                if results:
+                    items.extend(results)
+                else:
+                    errors.append(provider_name)
+            except Exception as e:
+                sys_plugin_logger.error(f'Unexpected error in {provider_name} search: {str(e)}')
+                errors.append(provider_name)
     
     # 如果所有搜索都失败了，才抛出异常
     if not items and errors:

From d5d4e2ffc79fa1dcfaa9c6646832e35f0133a30b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=AD=E6=BD=87?= <1576730710@qq.com>
Date: Tue, 14 Oct 2025 16:10:13 +0800
Subject: [PATCH 2/2] =?UTF-8?q?[search=20online]=20=E4=BF=AE=E6=94=B9?=
 =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fit_py_internet_search/conf/application.yml |  1 +
 .../src/internet_search.py                      | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/app-builder/plugins/fit_py_internet_search/conf/application.yml b/app-builder/plugins/fit_py_internet_search/conf/application.yml
index 8f8f2116c8..a1c8ed00e7 100644
--- a/app-builder/plugins/fit_py_internet_search/conf/application.yml
+++ b/app-builder/plugins/fit_py_internet_search/conf/application.yml
@@ -1,5 +1,6 @@
 internet-search:
   max_results_per_provider: 5
+  summary-length: 150
   api-key:
     exa: "https://dashboard.exa.ai/home -- 登录获取api key"
     tavily: "https://app.tavily.com/home -- 登录获取api key"
diff --git a/app-builder/plugins/fit_py_internet_search/src/internet_search.py b/app-builder/plugins/fit_py_internet_search/src/internet_search.py
index 9ef39f92c7..cc57b21770 100644
--- a/app-builder/plugins/fit_py_internet_search/src/internet_search.py
+++ b/app-builder/plugins/fit_py_internet_search/src/internet_search.py
@@ -4,6 +4,7 @@
 # Licensed under the MIT License. See License.txt in the project root for license information.
 # ======================================================================================================================
 import json
+import re
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Sequence
@@ -70,6 +71,11 @@ def _get_max_results_per_provider() -> int:
     pass
 
 
+@value('internet-search.summary-length')
+def _get_max_summary_length() -> int:
+    pass
+
+
 def _truncate(text: str, max_chars: int) -> str:
     if len(text) <= max_chars:
         return text
@@ -90,8 +96,6 @@ def _extract_summary(text: str, max_sentences: int = 4) -> str:
     if not text:
         return ""
 
-    # 定义句子分隔符（支持中英文）
-    import re
     # 使用正则表达式匹配句子结束符号
     sentences = re.split(r'([。！？\.!?]+["\'»\)]?\s*)', text)
 
@@ -115,11 +119,12 @@ def _extract_summary(text: str, max_sentences: int = 4) -> str:
         summary = " ".join(combined_sentences[:max_sentences])
 
     # 确保摘要不会过长（最多150字符）
-    if len(summary) > 150:
-        summary = summary[:147].rstrip() + "..."
+    if len(summary) > _get_max_summary_length():
+        summary = summary[:(_get_max_summary_length() - 3)].rstrip() + "..."
 
     return summary
 
+
 def _search_exa(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
     """在 Exa 中搜索"""
     items: List[SearchItem] = []
@@ -264,11 +269,11 @@ def _internet_search(
             except Exception as e:
                 sys_plugin_logger.error(f'Unexpected error in {provider_name} search: {str(e)}')
                 errors.append(provider_name)
-    
+
     # 如果所有搜索都失败了，才抛出异常
     if not items and errors:
         raise FitException(
-            InternalErrorCode.CLIENT_ERROR, 
+            InternalErrorCode.CLIENT_ERROR,
             f'All search tools failed: {", ".join(errors)}'
         )