44# Licensed under the MIT License. See License.txt in the project root for license information.
55# ======================================================================================================================
66import json
7+ from concurrent .futures import ThreadPoolExecutor , as_completed
78from dataclasses import dataclass
89from typing import Dict , List , Optional , Sequence
910from linkup import LinkupClient
@@ -75,6 +76,148 @@ def _truncate(text: str, max_chars: int) -> str:
7576 return text [: max_chars - 1 ].rstrip () + "…"
7677
7778
79+ def _extract_summary (text : str , max_sentences : int = 4 ) -> str :
80+ """
81+ 从文本中提取前几句话作为摘要
82+
83+ Args:
84+ text: 原始文本
85+ max_sentences: 最多保留的句子数,默认为4句
86+
87+ Returns:
88+ 摘要文本
89+ """
90+ if not text :
91+ return ""
92+
93+ # 定义句子分隔符(支持中英文)
94+ import re
95+ # 使用正则表达式匹配句子结束符号
96+ sentences = re .split (r'([。!?\.!?]+["\'»\)]?\s*)' , text )
97+
98+ # 重新组合句子(将分隔符和句子内容合并)
99+ combined_sentences = []
100+ for i in range (0 , len (sentences ) - 1 , 2 ):
101+ sentence = sentences [i ]
102+ separator = sentences [i + 1 ] if i + 1 < len (sentences ) else ""
103+ combined = (sentence + separator ).strip ()
104+ if combined :
105+ combined_sentences .append (combined )
106+
107+ # 如果最后一个元素没有分隔符
108+ if len (sentences ) % 2 == 1 and sentences [- 1 ].strip ():
109+ combined_sentences .append (sentences [- 1 ].strip ())
110+
111+ # 取前 max_sentences 句
112+ if len (combined_sentences ) <= max_sentences :
113+ summary = " " .join (combined_sentences )
114+ else :
115+ summary = " " .join (combined_sentences [:max_sentences ])
116+
117+ # 确保摘要不会过长(最多150字符)
118+ if len (summary ) > 150 :
119+ summary = summary [:147 ].rstrip () + "..."
120+
121+ return summary
122+
123+ def _search_exa (query : str , api_key : str , max_results : int , max_snippet_chars : int ) -> List [SearchItem ]:
124+ """在 Exa 中搜索"""
125+ items : List [SearchItem ] = []
126+ try :
127+ exa_client = Exa (api_key = api_key )
128+ res = exa_client .search_and_contents (
129+ query ,
130+ text = {"max_characters" : 2000 },
131+ livecrawl = "always" ,
132+ num_results = max_results ,
133+ )
134+ for i , r in enumerate (getattr (res , "results" , [])[:max_results ]):
135+ text = _truncate (getattr (r , "text" , "" ) or getattr (r , "content" , "" ) or "" , max_snippet_chars )
136+ summary = _extract_summary (text ) # 提取3-4句话作为摘要
137+ items .append (
138+ SearchItem (
139+ id = getattr (r , "id" , "" ) or f"exa_{ i } " ,
140+ text = summary ,
141+ score = 12.0 ,
142+ metadata = {
143+ "fileName" : getattr (r , "title" , "" ) or "" ,
144+ "url" : getattr (r , "url" , "" ) or "" ,
145+ "source" : "exa" ,
146+ "published_date" : getattr (r , "published_date" , None ),
147+ "summary" : summary ,
148+ }
149+ )
150+ )
151+ except Exception as e :
152+ sys_plugin_logger .warning (f'Failed to search in Exa tool: { str (e )} ' )
153+ return items
154+
155+
156+ def _search_tavily (query : str , api_key : str , max_results : int , max_snippet_chars : int ) -> List [SearchItem ]:
157+ """在 Tavily 中搜索"""
158+ items : List [SearchItem ] = []
159+ try :
160+ tavily_client = TavilyClient (api_key = api_key )
161+ res = tavily_client .search (
162+ query = query ,
163+ max_results = max_results ,
164+ include_images = False ,
165+ )
166+ for i , r in enumerate (res .get ("results" , [])[:max_results ]):
167+ text = _truncate (r .get ("content" , "" ) or "" , max_snippet_chars )
168+ summary = _extract_summary (text ) # 提取3-4句话作为摘要
169+ items .append (
170+ SearchItem (
171+ id = r .get ("id" , "" ) or f"tavily_{ i } " ,
172+ text = summary ,
173+ score = 12.0 ,
174+ metadata = {
175+ "fileName" : r .get ("title" , "" ) or "" ,
176+ "url" : r .get ("url" , "" ) or "" ,
177+ "source" : "tavily" ,
178+ "published_date" : r .get ("published_date" ),
179+ "summary" : summary ,
180+ }
181+ )
182+ )
183+ except Exception as e :
184+ sys_plugin_logger .warning (f'Failed to search in Tavily tool: { str (e )} ' )
185+ return items
186+
187+
188+ def _search_linkup (query : str , api_key : str , max_results : int , max_snippet_chars : int ) -> List [SearchItem ]:
189+ """在 Linkup 中搜索"""
190+ items : List [SearchItem ] = []
191+ try :
192+ linkup_client = LinkupClient (api_key = api_key )
193+ resp = linkup_client .search (
194+ query = query ,
195+ depth = "standard" ,
196+ output_type = "searchResults" ,
197+ include_images = False ,
198+ )
199+ for i , r in enumerate (getattr (resp , "results" , [])[:max_results ]):
200+ text = _truncate (getattr (r , "content" , "" ) or getattr (r , "text" , "" ) or "" , max_snippet_chars )
201+ summary = _extract_summary (text ) # 提取3-4句话作为摘要
202+ items .append (
203+ SearchItem (
204+ id = getattr (r , "id" , "" ) or f"linkup_{ i } " ,
205+ text = summary ,
206+ score = 12.0 ,
207+ metadata = {
208+ "fileName" : getattr (r , "name" , None ) or getattr (r , "title" , "" ) or "" ,
209+ "url" : getattr (r , "url" , "" ) or "" ,
210+ "source" : "linkup" ,
211+ "published_date" : None ,
212+ "summary" : summary ,
213+ }
214+ )
215+ )
216+ except Exception as e :
217+ sys_plugin_logger .warning (f'Failed to search in Linkup tool: { str (e )} ' )
218+ return items
219+
220+
78221def _internet_search (
79222 query : str ,
80223 api_keys : Dict [str , str ],
@@ -88,97 +231,39 @@ def _internet_search(
88231 for name in ("exa" , "tavily" , "linkup" ):
89232 if api_keys .get (name ):
90233 selected .append (name )
91- items : List [SearchItem ] = []
92- errors = [] # 记录失败的搜索工具
93234
94- # Exa
235+ # 准备并行搜索任务
236+ search_tasks = []
95237 if "exa" in selected and api_keys .get ("exa" ):
96- try :
97- exa_client = Exa (api_key = api_keys ["exa" ])
98- res = exa_client .search_and_contents (
99- query ,
100- text = {"max_characters" : 2000 },
101- livecrawl = "always" ,
102- num_results = max_results_per_provider ,
103- )
104- for i , r in enumerate (getattr (res , "results" , [])[:max_results_per_provider ]):
105- text = _truncate (getattr (r , "text" , "" ) or getattr (r , "content" , "" ) or "" , max_snippet_chars )
106- items .append (
107- SearchItem (
108- id = getattr (r , "id" , "" ) or f"exa_{ i } " ,
109- text = text ,
110- score = 12.0 , # 使用float确保序列化
111- metadata = {
112- "fileName" : getattr (r , "title" , "" ) or "" ,
113- "url" : getattr (r , "url" , "" ) or "" ,
114- "source" : "exa" ,
115- "published_date" : getattr (r , "published_date" , None ),
116- "summary" : text ,
117- }
118- )
119- )
120- except Exception as e :
121- sys_plugin_logger .warning (f'Failed to search in Exa tool: { str (e )} ' )
122- errors .append ("exa" )
123-
124- # Tavily
238+ search_tasks .append (("exa" , _search_exa , api_keys ["exa" ]))
125239 if "tavily" in selected and api_keys .get ("tavily" ):
126- try :
127- tavily_client = TavilyClient (api_key = api_keys ["tavily" ])
128- res = tavily_client .search (
129- query = query ,
130- max_results = max_results_per_provider ,
131- include_images = False ,
132- )
133- for i , r in enumerate (res .get ("results" , [])[:max_results_per_provider ]):
134- text = _truncate (r .get ("content" , "" ) or "" , max_snippet_chars )
135- items .append (
136- SearchItem (
137- id = r .get ("id" , "" ) or f"tavily_{ i } " ,
138- text = text ,
139- score = 12.0 ,
140- metadata = {
141- "fileName" : r .get ("title" , "" ) or "" ,
142- "url" : r .get ("url" , "" ) or "" ,
143- "source" : "tavily" ,
144- "published_date" : r .get ("published_date" ),
145- "summary" : text ,
146- }
147- )
148- )
149- except Exception as e :
150- sys_plugin_logger .warning (f'Failed to search in Tavily tool: { str (e )} ' )
151- errors .append ("tavily" )
152-
153- # Linkup
240+ search_tasks .append (("tavily" , _search_tavily , api_keys ["tavily" ]))
154241 if "linkup" in selected and api_keys .get ("linkup" ):
155- try :
156- linkup_client = LinkupClient (api_key = api_keys ["linkup" ])
157- resp = linkup_client .search (
158- query = query ,
159- depth = "standard" ,
160- output_type = "searchResults" ,
161- include_images = False ,
162- )
163- for i , r in enumerate (getattr (resp , "results" , [])[:max_results_per_provider ]):
164- text = _truncate (getattr (r , "content" , "" ) or getattr (r , "text" , "" ) or "" , max_snippet_chars )
165- items .append (
166- SearchItem (
167- id = getattr (r , "id" , "" ) or f"linkup_{ i } " ,
168- text = text ,
169- score = 12.0 ,
170- metadata = {
171- "fileName" : getattr (r , "name" , None ) or getattr (r , "title" , "" ) or "" ,
172- "url" : getattr (r , "url" , "" ) or "" ,
173- "source" : "linkup" ,
174- "published_date" : None ,
175- "summary" : text ,
176- }
177- )
178- )
179- except Exception as e :
180- sys_plugin_logger .warning (f'Failed to search in Linkup tool: { str (e )} ' )
181- errors .append ("linkup" )
242+ search_tasks .append (("linkup" , _search_linkup , api_keys ["linkup" ]))
243+
244+ # 使用线程池并行执行搜索
245+ items : List [SearchItem ] = []
246+ errors = []
247+
248+ with ThreadPoolExecutor (max_workers = len (search_tasks )) as executor :
249+ # 提交所有搜索任务
250+ future_to_provider = {
251+ executor .submit (task_func , query , api_key , max_results_per_provider , max_snippet_chars ): provider_name
252+ for provider_name , task_func , api_key in search_tasks
253+ }
254+
255+ # 收集结果
256+ for future in as_completed (future_to_provider ):
257+ provider_name = future_to_provider [future ]
258+ try :
259+ results = future .result ()
260+ if results :
261+ items .extend (results )
262+ else :
263+ errors .append (provider_name )
264+ except Exception as e :
265+ sys_plugin_logger .error (f'Unexpected error in { provider_name } search: { str (e )} ' )
266+ errors .append (provider_name )
182267
183268 # 如果所有搜索都失败了,才抛出异常
184269 if not items and errors :
0 commit comments