Skip to content

Commit 405909a

Browse files
[search online] 优化联网搜索插件性能
1 parent af977c4 commit 405909a

File tree

1 file changed

+172
-87
lines changed

1 file changed

+172
-87
lines changed

app-builder/plugins/fit_py_internet_search/src/internet_search.py

Lines changed: 172 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Licensed under the MIT License. See License.txt in the project root for license information.
55
# ======================================================================================================================
66
import json
7+
from concurrent.futures import ThreadPoolExecutor, as_completed
78
from dataclasses import dataclass
89
from typing import Dict, List, Optional, Sequence
910
from linkup import LinkupClient
@@ -75,6 +76,148 @@ def _truncate(text: str, max_chars: int) -> str:
7576
return text[: max_chars - 1].rstrip() + "…"
7677

7778

79+
def _extract_summary(text: str, max_sentences: int = 4) -> str:
80+
"""
81+
从文本中提取前几句话作为摘要
82+
83+
Args:
84+
text: 原始文本
85+
max_sentences: 最多保留的句子数,默认为4句
86+
87+
Returns:
88+
摘要文本
89+
"""
90+
if not text:
91+
return ""
92+
93+
# 定义句子分隔符(支持中英文)
94+
import re
95+
# 使用正则表达式匹配句子结束符号
96+
sentences = re.split(r'([。!?\.!?]+["\'»\)]?\s*)', text)
97+
98+
# 重新组合句子(将分隔符和句子内容合并)
99+
combined_sentences = []
100+
for i in range(0, len(sentences) - 1, 2):
101+
sentence = sentences[i]
102+
separator = sentences[i + 1] if i + 1 < len(sentences) else ""
103+
combined = (sentence + separator).strip()
104+
if combined:
105+
combined_sentences.append(combined)
106+
107+
# 如果最后一个元素没有分隔符
108+
if len(sentences) % 2 == 1 and sentences[-1].strip():
109+
combined_sentences.append(sentences[-1].strip())
110+
111+
# 取前 max_sentences 句
112+
if len(combined_sentences) <= max_sentences:
113+
summary = " ".join(combined_sentences)
114+
else:
115+
summary = " ".join(combined_sentences[:max_sentences])
116+
117+
# 确保摘要不会过长(最多150字符)
118+
if len(summary) > 150:
119+
summary = summary[:147].rstrip() + "..."
120+
121+
return summary
122+
123+
def _search_exa(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
124+
"""在 Exa 中搜索"""
125+
items: List[SearchItem] = []
126+
try:
127+
exa_client = Exa(api_key=api_key)
128+
res = exa_client.search_and_contents(
129+
query,
130+
text={"max_characters": 2000},
131+
livecrawl="always",
132+
num_results=max_results,
133+
)
134+
for i, r in enumerate(getattr(res, "results", [])[:max_results]):
135+
text = _truncate(getattr(r, "text", "") or getattr(r, "content", "") or "", max_snippet_chars)
136+
summary = _extract_summary(text) # 提取3-4句话作为摘要
137+
items.append(
138+
SearchItem(
139+
id=getattr(r, "id", "") or f"exa_{i}",
140+
text=summary,
141+
score=12.0,
142+
metadata={
143+
"fileName": getattr(r, "title", "") or "",
144+
"url": getattr(r, "url", "") or "",
145+
"source": "exa",
146+
"published_date": getattr(r, "published_date", None),
147+
"summary": summary,
148+
}
149+
)
150+
)
151+
except Exception as e:
152+
sys_plugin_logger.warning(f'Failed to search in Exa tool: {str(e)}')
153+
return items
154+
155+
156+
def _search_tavily(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
157+
"""在 Tavily 中搜索"""
158+
items: List[SearchItem] = []
159+
try:
160+
tavily_client = TavilyClient(api_key=api_key)
161+
res = tavily_client.search(
162+
query=query,
163+
max_results=max_results,
164+
include_images=False,
165+
)
166+
for i, r in enumerate(res.get("results", [])[:max_results]):
167+
text = _truncate(r.get("content", "") or "", max_snippet_chars)
168+
summary = _extract_summary(text) # 提取3-4句话作为摘要
169+
items.append(
170+
SearchItem(
171+
id=r.get("id", "") or f"tavily_{i}",
172+
text=summary,
173+
score=12.0,
174+
metadata={
175+
"fileName": r.get("title", "") or "",
176+
"url": r.get("url", "") or "",
177+
"source": "tavily",
178+
"published_date": r.get("published_date"),
179+
"summary": summary,
180+
}
181+
)
182+
)
183+
except Exception as e:
184+
sys_plugin_logger.warning(f'Failed to search in Tavily tool: {str(e)}')
185+
return items
186+
187+
188+
def _search_linkup(query: str, api_key: str, max_results: int, max_snippet_chars: int) -> List[SearchItem]:
189+
"""在 Linkup 中搜索"""
190+
items: List[SearchItem] = []
191+
try:
192+
linkup_client = LinkupClient(api_key=api_key)
193+
resp = linkup_client.search(
194+
query=query,
195+
depth="standard",
196+
output_type="searchResults",
197+
include_images=False,
198+
)
199+
for i, r in enumerate(getattr(resp, "results", [])[:max_results]):
200+
text = _truncate(getattr(r, "content", "") or getattr(r, "text", "") or "", max_snippet_chars)
201+
summary = _extract_summary(text) # 提取3-4句话作为摘要
202+
items.append(
203+
SearchItem(
204+
id=getattr(r, "id", "") or f"linkup_{i}",
205+
text=summary,
206+
score=12.0,
207+
metadata={
208+
"fileName": getattr(r, "name", None) or getattr(r, "title", "") or "",
209+
"url": getattr(r, "url", "") or "",
210+
"source": "linkup",
211+
"published_date": None,
212+
"summary": summary,
213+
}
214+
)
215+
)
216+
except Exception as e:
217+
sys_plugin_logger.warning(f'Failed to search in Linkup tool: {str(e)}')
218+
return items
219+
220+
78221
def _internet_search(
79222
query: str,
80223
api_keys: Dict[str, str],
@@ -88,97 +231,39 @@ def _internet_search(
88231
for name in ("exa", "tavily", "linkup"):
89232
if api_keys.get(name):
90233
selected.append(name)
91-
items: List[SearchItem] = []
92-
errors = [] # 记录失败的搜索工具
93234

94-
# Exa
235+
# 准备并行搜索任务
236+
search_tasks = []
95237
if "exa" in selected and api_keys.get("exa"):
96-
try:
97-
exa_client = Exa(api_key=api_keys["exa"])
98-
res = exa_client.search_and_contents(
99-
query,
100-
text={"max_characters": 2000},
101-
livecrawl="always",
102-
num_results=max_results_per_provider,
103-
)
104-
for i, r in enumerate(getattr(res, "results", [])[:max_results_per_provider]):
105-
text = _truncate(getattr(r, "text", "") or getattr(r, "content", "") or "", max_snippet_chars)
106-
items.append(
107-
SearchItem(
108-
id=getattr(r, "id", "") or f"exa_{i}",
109-
text=text,
110-
score=12.0, # 使用float确保序列化
111-
metadata={
112-
"fileName": getattr(r, "title", "") or "",
113-
"url": getattr(r, "url", "") or "",
114-
"source": "exa",
115-
"published_date": getattr(r, "published_date", None),
116-
"summary": text,
117-
}
118-
)
119-
)
120-
except Exception as e:
121-
sys_plugin_logger.warning(f'Failed to search in Exa tool: {str(e)}')
122-
errors.append("exa")
123-
124-
# Tavily
238+
search_tasks.append(("exa", _search_exa, api_keys["exa"]))
125239
if "tavily" in selected and api_keys.get("tavily"):
126-
try:
127-
tavily_client = TavilyClient(api_key=api_keys["tavily"])
128-
res = tavily_client.search(
129-
query=query,
130-
max_results=max_results_per_provider,
131-
include_images=False,
132-
)
133-
for i, r in enumerate(res.get("results", [])[:max_results_per_provider]):
134-
text = _truncate(r.get("content", "") or "", max_snippet_chars)
135-
items.append(
136-
SearchItem(
137-
id=r.get("id", "") or f"tavily_{i}",
138-
text=text,
139-
score=12.0,
140-
metadata={
141-
"fileName": r.get("title", "") or "",
142-
"url": r.get("url", "") or "",
143-
"source": "tavily",
144-
"published_date": r.get("published_date"),
145-
"summary": text,
146-
}
147-
)
148-
)
149-
except Exception as e:
150-
sys_plugin_logger.warning(f'Failed to search in Tavily tool: {str(e)}')
151-
errors.append("tavily")
152-
153-
# Linkup
240+
search_tasks.append(("tavily", _search_tavily, api_keys["tavily"]))
154241
if "linkup" in selected and api_keys.get("linkup"):
155-
try:
156-
linkup_client = LinkupClient(api_key=api_keys["linkup"])
157-
resp = linkup_client.search(
158-
query=query,
159-
depth="standard",
160-
output_type="searchResults",
161-
include_images=False,
162-
)
163-
for i, r in enumerate(getattr(resp, "results", [])[:max_results_per_provider]):
164-
text = _truncate(getattr(r, "content", "") or getattr(r, "text", "") or "", max_snippet_chars)
165-
items.append(
166-
SearchItem(
167-
id=getattr(r, "id", "") or f"linkup_{i}",
168-
text=text,
169-
score=12.0,
170-
metadata={
171-
"fileName": getattr(r, "name", None) or getattr(r, "title", "") or "",
172-
"url": getattr(r, "url", "") or "",
173-
"source": "linkup",
174-
"published_date": None,
175-
"summary": text,
176-
}
177-
)
178-
)
179-
except Exception as e:
180-
sys_plugin_logger.warning(f'Failed to search in Linkup tool: {str(e)}')
181-
errors.append("linkup")
242+
search_tasks.append(("linkup", _search_linkup, api_keys["linkup"]))
243+
244+
# 使用线程池并行执行搜索
245+
items: List[SearchItem] = []
246+
errors = []
247+
248+
with ThreadPoolExecutor(max_workers=len(search_tasks)) as executor:
249+
# 提交所有搜索任务
250+
future_to_provider = {
251+
executor.submit(task_func, query, api_key, max_results_per_provider, max_snippet_chars): provider_name
252+
for provider_name, task_func, api_key in search_tasks
253+
}
254+
255+
# 收集结果
256+
for future in as_completed(future_to_provider):
257+
provider_name = future_to_provider[future]
258+
try:
259+
results = future.result()
260+
if results:
261+
items.extend(results)
262+
else:
263+
errors.append(provider_name)
264+
except Exception as e:
265+
sys_plugin_logger.error(f'Unexpected error in {provider_name} search: {str(e)}')
266+
errors.append(provider_name)
182267

183268
# 如果所有搜索都失败了,才抛出异常
184269
if not items and errors:

0 commit comments

Comments
 (0)