@@ -38,7 +38,7 @@ struct HeadlessBrowserSearchService: SearchService {
3838 case . google:
3939 return try GoogleSearchResultParser . parse ( html: html)
4040 case . baidu:
41- return BaiduSearchResultParser . parse ( html: html)
41+ return await BaiduSearchResultParser . parse ( html: html)
4242 case . duckDuckGo:
4343 return DuckDuckGoSearchResultParser . parse ( html: html)
4444 case . bing:
@@ -85,8 +85,58 @@ enum BaiduSearchResultParser {
8585 static func validate( document: SwiftSoup . Document ) -> Bool {
8686 return ( try ? document. select ( " #content_left " ) . first ( ) ) != nil
8787 }
88+
89+ static func getRealLink( from baiduLink: String ) async -> String {
90+ guard let url = URL ( string: baiduLink) else {
91+ return baiduLink
92+ }
8893
89- static func parse( html: String ) -> WebSearchResult {
94+ let config = URLSessionConfiguration . default
95+ config. httpShouldSetCookies = true
96+ config. httpCookieAcceptPolicy = . always
97+
98+ var request = URLRequest ( url: url)
99+ request. httpMethod = " GET "
100+ request. addValue (
101+ " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15 " ,
102+ forHTTPHeaderField: " User-Agent "
103+ )
104+
105+ let redirectCapturer = RedirectCapturer ( )
106+ let session = URLSession (
107+ configuration: config,
108+ delegate: redirectCapturer,
109+ delegateQueue: nil
110+ )
111+
112+ do {
113+ let _ = try await session. data ( for: request)
114+
115+ if let finalURL = redirectCapturer. finalURL {
116+ return finalURL. absoluteString
117+ }
118+
119+ return baiduLink
120+ } catch {
121+ return baiduLink
122+ }
123+ }
124+
125+ class RedirectCapturer : NSObject , URLSessionTaskDelegate {
126+ var finalURL : URL ?
127+
128+ func urlSession(
129+ _ session: URLSession ,
130+ task: URLSessionTask ,
131+ willPerformHTTPRedirection response: HTTPURLResponse ,
132+ newRequest request: URLRequest ,
133+ completionHandler: @escaping ( URLRequest ? ) -> Void
134+ ) {
135+ finalURL = request. url
136+ completionHandler ( request)
137+ }
138+ }
139+ static func parse( html: String ) async -> WebSearchResult {
90140 let document = try ? SwiftSoup . parse ( html)
91141 let elements = try ? document? . select ( " #content_left " ) . first ( ) ? . children ( )
92142
@@ -97,6 +147,7 @@ enum BaiduSearchResultParser {
97147 let link = try ? element. select ( " a " ) . attr ( " href " ) ,
98148 link. hasPrefix ( " http " )
99149 {
150+ let realLink = await getRealLink ( from: link)
100151 let title = ( try ? titleElement. text ( ) ) ?? " "
101152 let snippet = {
102153 let abstract = try ? element. select ( " div[data-module= \" abstract \" ] " ) . text ( )
@@ -106,7 +157,7 @@ enum BaiduSearchResultParser {
106157 return ( try ? titleElement. nextElementSibling ( ) ? . text ( ) ) ?? " "
107158 } ( )
108159 results. append ( WebSearchResult . WebPage (
109- urlString: link ,
160+ urlString: realLink ,
110161 title: title,
111162 snippet: snippet
112163 ) )
0 commit comments