Skip to content

Commit 3d910c6

Browse files
committed
Fix bing link extraction
1 parent a3f4a70 commit 3d910c6

File tree

1 file changed

+38
-1
lines changed

1 file changed

+38
-1
lines changed

Tool/Sources/WebSearchService/SearchServices/HeadlessBrowserSearchService.swift

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ enum BaiduSearchResultParser {
8585
static func validate(document: SwiftSoup.Document) -> Bool {
8686
return (try? document.select("#content_left").first()) != nil
8787
}
88-
88+
8989
static func getRealLink(from baiduLink: String) async -> String {
9090
guard let url = URL(string: baiduLink) else {
9191
return baiduLink
@@ -136,6 +136,7 @@ enum BaiduSearchResultParser {
136136
completionHandler(request)
137137
}
138138
}
139+
139140
static func parse(html: String) async -> WebSearchResult {
140141
let document = try? SwiftSoup.parse(html)
141142
let elements = try? document?.select("#content_left").first()?.children()
@@ -212,6 +213,41 @@ enum BingSearchResultParser {
212213
return (try? document.select("#b_results").first()) != nil
213214
}
214215

216+
static func getRealLink(from bingLink: String) -> String {
217+
guard let url = URL(string: bingLink) else { return bingLink }
218+
219+
if let components = URLComponents(url: url, resolvingAgainstBaseURL: true),
220+
let queryItems = components.queryItems,
221+
var uParam = queryItems.first(where: { $0.name == "u" })?.value
222+
{
223+
if uParam.hasPrefix("a1aHR") {
224+
uParam.removeFirst()
225+
uParam.removeFirst()
226+
}
227+
228+
func decode() -> String? {
229+
guard let decodedData = Data(base64Encoded: uParam),
230+
let decodedString = String(data: decodedData, encoding: .utf8)
231+
else { return nil }
232+
return decodedString
233+
}
234+
235+
if let decodedString = decode() {
236+
return decodedString
237+
}
238+
uParam += "="
239+
if let decodedString = decode() {
240+
return decodedString
241+
}
242+
uParam += "="
243+
if let decodedString = decode() {
244+
return decodedString
245+
}
246+
}
247+
248+
return bingLink
249+
}
250+
215251
static func parse(html: String) -> WebSearchResult {
216252
let document = try? SwiftSoup.parse(html)
217253
let searchResults = try? document?.select("#b_results").first()
@@ -224,6 +260,7 @@ enum BingSearchResultParser {
224260
let link = try? linkElement.attr("href"),
225261
link.hasPrefix("http")
226262
{
263+
let link = getRealLink(from: link)
227264
let title = (try? titleElement.text()) ?? ""
228265
let snippet = {
229266
if let it = try? element.select(".b_caption p").first()?.text(),

0 commit comments

Comments
 (0)