11using System . Configuration ;
22using System . Diagnostics ;
33using System . Globalization ;
4- using System . Net ;
54using CsvHelper ;
65using HLWebScraper . Net . Helpers ;
76using HLWebScraper . Net . Model ;
@@ -36,12 +35,22 @@ public partial class FrmMainApp : Form
3635 private KeyValueConfigurationCollection _section = new ( ) ;
3736 private CancellationTokenSource cancellationTokenSource ;
3837
38+ private static HttpClient _httpClient = new ( ) ;
39+
3940 public FrmMainApp ( )
4041 {
4142 cancellationTokenSource = new CancellationTokenSource ( ) ;
4243
4344 InitializeComponent ( ) ;
4445 GetETFTypesFromCSV ( ) ;
46+
47+ SocketsHttpHandler socketsHandler = new ( )
48+ {
49+ PooledConnectionLifetime = TimeSpan . FromMinutes ( value : 2 ) ,
50+ MaxConnectionsPerServer = 100
51+ } ;
52+
53+ _httpClient = new HttpClient ( handler : socketsHandler ) ;
4554 }
4655
4756 private void FrmMainApp_Load ( object sender , EventArgs e )
@@ -109,7 +118,6 @@ private void FillCbx_Securities(string filter = "")
109118 private static async Task < bool > ReadJsonFXFromWebAsync ( FrmMainApp formInstance ,
110119 CancellationToken cancellationToken )
111120 {
112- WebClient wc = new ( ) ;
113121 try
114122 {
115123 // Check if cancellation has been requested
@@ -123,7 +131,13 @@ private static async Task<bool> ReadJsonFXFromWebAsync(FrmMainApp formInstance,
123131 appendText : "Contacting FX Data website (http://www.floatrates.com/currency/gbp/)" ,
124132 logMessageType : LogMessageTypes . Info ) ;
125133
126- string jsonString = await wc . DownloadStringTaskAsync ( address : FxUrl ) ;
134+
135+ HttpResponseMessage response =
136+ await _httpClient . GetAsync ( requestUri : FxUrl , cancellationToken : cancellationToken ) ;
137+ response . EnsureSuccessStatusCode ( ) ;
138+
139+ string jsonString = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
140+
127141 Dictionary < string , FXCurrency > ? currencies =
128142 JsonConvert . DeserializeObject < Dictionary < string , FXCurrency > > ( value : jsonString ) ;
129143
@@ -188,9 +202,12 @@ private static async Task CollateHLStocksByLetterAsync(FrmMainApp formInstance,
188202 // Check if cancellation has been requested
189203 cancellationToken . ThrowIfCancellationRequested ( ) ;
190204
191- WebClient client = new ( ) ;
192205 string url = "https://www.hl.co.uk/shares/shares-search-results/" + alphabetChar ;
193- respString = await client . DownloadStringTaskAsync ( address : url ) ;
206+ HttpResponseMessage response =
207+ await _httpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
208+ response . EnsureSuccessStatusCode ( ) ;
209+
210+ respString = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
194211 timeOutBool = true ;
195212 }
196213 catch ( Exception ex )
@@ -254,54 +271,31 @@ private static async Task FireAndForgetAsync(HashSet<string> urls, FrmMainApp fo
254271 AppendLogWindowText ( tbx : formInstance . tbx_Log , appendText : "Scraping all items." ,
255272 logMessageType : LogMessageTypes . Start ) ;
256273
257- // Chunk size for processing tasks
258- const int chunkSize = 50 ;
259-
260- // Create a list to hold the tasks
261274 List < Task > tasks = new ( ) ;
262275
263- // Iterate over the URLs in chunks
264- for ( int i = 0 ; i < urls . Count ; i += chunkSize )
276+ // Loop through each URL in the chunk and start scraping
277+ foreach ( string url in urls )
265278 {
266- // Get the chunk of URLs
267- IEnumerable < string > chunk = urls . Skip ( count : i ) . Take ( count : chunkSize ) ;
268-
269- // Create a list to hold the tasks for this chunk
270- List < Task > chunkTasks = new ( ) ;
271-
272- // Create a new HttpClient for this chunk
273- using HttpClient httpClient = new ( ) ;
274-
275- // Loop through each URL in the chunk and start scraping
276- foreach ( string url in chunk )
277- {
278- // Check if cancellation has been requested
279- cancellationToken . ThrowIfCancellationRequested ( ) ;
280-
281- // Construct the URL with "/company-information" appended
282- string companyInfoUrl = url + "/company-information" ;
283-
284- // Add tasks to scrape the main URL and company info URL
285- chunkTasks . Add ( item : GetHtmlAsyncWithClient ( httpClient : httpClient , url : url ,
286- formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
287- chunkTasks . Add ( item : GetHtmlAsyncWithClient ( httpClient : httpClient , url : companyInfoUrl ,
288- formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
289- }
290-
291- // Add chunk tasks to the main tasks list
292- tasks . AddRange ( collection : chunkTasks ) ;
279+ // Check if cancellation has been requested
280+ cancellationToken . ThrowIfCancellationRequested ( ) ;
293281
294- // Wait for the chunk tasks to complete or cancellation requested
295- await Task . WhenAll ( tasks : chunkTasks ) ;
296-
297- // Dispose of the HttpClient to close connections and release resources
298- httpClient . Dispose ( ) ;
282+ // Construct the URL with "/company-information" appended
283+ string companyInfoUrl = url + "/company-information" ;
299284
285+ // Add tasks to scrape the main URL and company info URL
286+ tasks . Add ( item : GetHtmlAsync ( url : url ,
287+ formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
288+ tasks . Add ( item : GetHtmlAsync ( url : companyInfoUrl ,
289+ formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
300290 // Break loop if cancellation requested
301291 if ( cancellationToken . IsCancellationRequested )
302292 break ;
303293 }
304294
295+ // Wait for the chunk tasks to complete or cancellation requested
296+ await Task . WhenAll ( tasks : tasks ) ;
297+
298+
305299 // Scraping completed
306300 AppendLogWindowText ( tbx : formInstance . tbx_Log , appendText : "Scraping all items." ,
307301 logMessageType : LogMessageTypes . Done ) ;
@@ -315,31 +309,61 @@ private static async Task FireAndForgetAsync(HashSet<string> urls, FrmMainApp fo
315309 }
316310 }
317311
318- private static async Task GetHtmlAsyncWithClient ( HttpClient httpClient , string url , FrmMainApp formInstance ,
319- CancellationToken cancellationToken )
312+ private static async Task GetHtmlAsync ( string url , FrmMainApp formInstance , CancellationToken cancellationToken )
320313 {
321- // Check if cancellation has been requested
314+ // Check if cancellation has been requested before making the request
322315 cancellationToken . ThrowIfCancellationRequested ( ) ;
323316
324- // Make the HTTP request
325- HttpResponseMessage response = await httpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
326- string htmlContent = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
327-
328- // Process the HTML content
329- // Page
330- if ( ! url . Contains ( value : "company-info" ) )
331- urlAndHtmlContentHashtable . AddOrUpdate ( key : url ,
332- value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
333- text : ReturnPageText (
334- HTMLTextInHtmlContentHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
335- // Company
336- else
337- urlAndCompanyInfoHashtable . AddOrUpdate ( key : url ,
338- value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
339- text : ReturnCompanyPageText (
340- HTMLTextInCompanyInfoHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
317+ int maxRetries = 5 ;
318+ int retryCount = 0 ;
319+
320+ while ( retryCount < maxRetries )
321+ try
322+ {
323+ Application . DoEvents ( ) ;
324+ HttpResponseMessage response =
325+ await _httpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
326+ response . EnsureSuccessStatusCode ( ) ;
327+ string htmlContent = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
328+
329+ // Process the HTML content as before
330+ if ( ! url . Contains ( value : "company-info" ) )
331+ urlAndHtmlContentHashtable . AddOrUpdate ( key : url ,
332+ value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
333+ text : ReturnPageText (
334+ HTMLTextInHtmlContentHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
335+ else
336+ urlAndCompanyInfoHashtable . AddOrUpdate ( key : url ,
337+ value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
338+ text : ReturnCompanyPageText (
339+ HTMLTextInCompanyInfoHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
340+
341+ IncrementCounterAndLogProgress ( url : url , formInstance : formInstance , isSuccess : true ) ;
341342
342- IncrementCounterAndLogProgress ( url : url , formInstance : formInstance , isSuccess : true ) ;
343+ // If the request succeeds, exit the retry loop
344+ return ;
345+ }
346+ catch ( OperationCanceledException ex ) when ( ex . InnerException is TimeoutException )
347+ {
348+ Application . DoEvents ( ) ;
349+ // Timeout occurred, increment retry count
350+ retryCount ++ ;
351+ // Log the timeout error
352+ AppendLogWindowText ( tbx : formInstance . tbx_Log ,
353+ appendText :
354+ $ "Timeout occurred while fetching URL '{ url } '. Retry attempt { retryCount } of { maxRetries } .") ;
355+ }
356+ catch ( Exception ex )
357+ {
358+ // Log other errors but do not retry
359+ if ( ex is HttpRequestException )
360+ IncrementCounterAndLogProgress ( url : url , formInstance : formInstance , isSuccess : false ,
361+ errorMsg : ex . Message ) ;
362+ return ;
363+ }
364+
365+ // Maximum retry attempts reached, log error and exit
366+ AppendLogWindowText ( tbx : formInstance . tbx_Log , appendText : $ "Maximum retry attempts reached for URL '{ url } '.") ;
343367 }
344368
345369
@@ -363,8 +387,8 @@ private static async Task GetHtmlAsyncWithClient(HttpClient httpClient, string u
363387 {
364388 string logMessageVal = $ "Parsing { url } ";
365389 FrmMainApp frmMainAppInstance = ( FrmMainApp ) Application . OpenForms [ name : "FrmMainApp" ] ;
366- AppendLogWindowText ( tbx : frmMainAppInstance . tbx_Log , appendText : logMessageVal ,
367- logMessageType : LogMessageTypes . Start ) ;
390+ // AppendLogWindowText(tbx: frmMainAppInstance.tbx_Log, appendText: logMessageVal,
391+ // logMessageType: LogMessageTypes.Start);
368392
369393 string pageText = HTMLTextInHtmlContentHashtable ;
370394 string companyPageText = HTMLTextInCompanyInfoHashtable ;
0 commit comments