@@ -30,8 +30,8 @@ public partial class FrmMainApp : Form
3030 internal static IEnumerable < ETFType > ? ETF_Types ;
3131
3232 private CancellationTokenSource cancellationTokenSource ;
33-
34- private static HttpClient _httpClient = new ( ) ;
33+ private static int _maxConnectionsPerServerSetting ;
34+ private static readonly HttpClient HttpClient = new ( ) ;
3535
3636 public FrmMainApp ( )
3737 {
@@ -43,27 +43,12 @@ public FrmMainApp()
4343 HelperDataDatabaseAndStartup . DataWriteSQLiteSettingsDefaultSettings ( ) ; // fill db w/ defaults where needed
4444
4545 // get defaults
46- double pooledConnectionLifetimeSetting = Convert . ToDouble (
47- value : HelperDataApplicationSettings . DataReadSQLiteSettings ( tableName : "settings" ,
48- settingId : "PooledConnectionLifetime" ) ) ;
4946
50- double pooledConnectionIdleTimeoutSetting = Convert . ToDouble (
51- value : HelperDataApplicationSettings . DataReadSQLiteSettings ( tableName : "settings" ,
52- settingId : "PooledConnectionIdleTimeout" ) ) ;
53-
54- int maxConnectionsPerServerSetting = Convert . ToInt16 (
47+ // re _maxConnectionsPerServerSetting: this is counted 2x in the code so a setting of 50 will send out 100 requests because
48+ // ... we're basically sending 1 for the "normal" page and 1 for the "coporate info" page.
49+ _maxConnectionsPerServerSetting = Convert . ToInt16 (
5550 value : HelperDataApplicationSettings . DataReadSQLiteSettings ( tableName : "settings" ,
5651 settingId : "MaxConnectionsPerServer" ) ) ;
57-
58-
59- SocketsHttpHandler socketsHandler = new ( )
60- {
61- PooledConnectionLifetime = TimeSpan . FromMinutes ( value : pooledConnectionLifetimeSetting ) ,
62- PooledConnectionIdleTimeout = TimeSpan . FromMinutes ( value : pooledConnectionIdleTimeoutSetting ) ,
63- MaxConnectionsPerServer = maxConnectionsPerServerSetting
64- } ;
65-
66- _httpClient = new HttpClient ( handler : socketsHandler ) ;
6752 }
6853
6954 private void FrmMainApp_Load ( object sender , EventArgs e )
@@ -145,7 +130,7 @@ private static async Task<bool> ReadJsonFXFromWebAsync(FrmMainApp formInstance,
145130
146131
147132 HttpResponseMessage response =
148- await _httpClient . GetAsync ( requestUri : FxUrl , cancellationToken : cancellationToken ) ;
133+ await HttpClient . GetAsync ( requestUri : FxUrl , cancellationToken : cancellationToken ) ;
149134 response . EnsureSuccessStatusCode ( ) ;
150135
151136 string jsonString = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
@@ -216,7 +201,7 @@ private static async Task CollateHLStocksByLetterAsync(FrmMainApp formInstance,
216201
217202 string url = "https://www.hl.co.uk/shares/shares-search-results/" + alphabetChar ;
218203 HttpResponseMessage response =
219- await _httpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
204+ await HttpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
220205 response . EnsureSuccessStatusCode ( ) ;
221206
222207 respString = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
@@ -285,31 +270,54 @@ private static async Task FireAndForgetAsync(HashSet<string> urls, FrmMainApp fo
285270 AppendLogWindowText ( tbx : formInstance . tbx_Log , appendText : "Scraping all items." ,
286271 logMessageType : LogMessageTypes . Start ) ;
287272
273+ // Chunk size for processing tasks
274+ int chunkSize = _maxConnectionsPerServerSetting ;
275+
276+ // Create a list to hold the tasks
288277 List < Task > tasks = new ( ) ;
289278
290- // Loop through each URL in the chunk and start scraping
291- foreach ( string url in urls )
279+ // Iterate over the URLs in chunks
280+ for ( int i = 0 ; i < urls . Count ; i += chunkSize )
292281 {
293- // Check if cancellation has been requested
294- cancellationToken . ThrowIfCancellationRequested ( ) ;
282+ // Get the chunk of URLs
283+ IEnumerable < string > chunk = urls . Skip ( count : i ) . Take ( count : chunkSize ) ;
284+
285+ // Create a list to hold the tasks for this chunk
286+ List < Task > chunkTasks = new ( ) ;
287+
288+ // Create a new HttpClient for this chunk
289+ using HttpClient httpClient = new ( ) ;
290+
291+ // Loop through each URL in the chunk and start scraping
292+ foreach ( string url in chunk )
293+ {
294+ // Check if cancellation has been requested
295+ cancellationToken . ThrowIfCancellationRequested ( ) ;
296+
297+ // Construct the URL with "/company-information" appended
298+ string companyInfoUrl = url + "/company-information" ;
299+
300+ // Add tasks to scrape the main URL and company info URL
301+ chunkTasks . Add ( item : GetHtmlAsyncWithClient ( httpClient : httpClient , url : url ,
302+ formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
303+ chunkTasks . Add ( item : GetHtmlAsyncWithClient ( httpClient : httpClient , url : companyInfoUrl ,
304+ formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
305+ }
306+
307+ // Add chunk tasks to the main tasks list
308+ tasks . AddRange ( collection : chunkTasks ) ;
295309
296- // Construct the URL with "/company-information" appended
297- string companyInfoUrl = url + "/company-information" ;
310+ // Wait for the chunk tasks to complete or cancellation requested
311+ await Task . WhenAll ( tasks : chunkTasks ) ;
312+
313+ // Dispose of the HttpClient to close connections and release resources
314+ httpClient . Dispose ( ) ;
298315
299- // Add tasks to scrape the main URL and company info URL
300- tasks . Add ( item : GetHtmlAsync ( url : url ,
301- formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
302- tasks . Add ( item : GetHtmlAsync ( url : companyInfoUrl ,
303- formInstance : formInstance , cancellationToken : cancellationToken ) ) ;
304316 // Break loop if cancellation requested
305317 if ( cancellationToken . IsCancellationRequested )
306318 break ;
307319 }
308320
309- // Wait for the chunk tasks to complete or cancellation requested
310- await Task . WhenAll ( tasks : tasks ) ;
311-
312-
313321 // Scraping completed
314322 AppendLogWindowText ( tbx : formInstance . tbx_Log , appendText : "Scraping all items." ,
315323 logMessageType : LogMessageTypes . Done ) ;
@@ -323,64 +331,33 @@ private static async Task FireAndForgetAsync(HashSet<string> urls, FrmMainApp fo
323331 }
324332 }
325333
326- private static async Task GetHtmlAsync ( string url , FrmMainApp formInstance , CancellationToken cancellationToken )
334+ private static async Task GetHtmlAsyncWithClient ( HttpClient httpClient , string url , FrmMainApp formInstance ,
335+ CancellationToken cancellationToken )
327336 {
328- // Check if cancellation has been requested before making the request
337+ // Check if cancellation has been requested
329338 cancellationToken . ThrowIfCancellationRequested ( ) ;
330339
331- int maxRetries = 5 ;
332- int retryCount = 0 ;
333-
334- while ( retryCount < maxRetries )
335- try
336- {
337- Application . DoEvents ( ) ;
338- HttpResponseMessage response =
339- await _httpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
340- response . EnsureSuccessStatusCode ( ) ;
341- string htmlContent = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
342-
343- // Process the HTML content as before
344- if ( ! url . Contains ( value : "company-info" ) )
345- urlAndHtmlContentHashtable . AddOrUpdate ( key : url ,
346- value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
347- text : ReturnPageText (
348- HTMLTextInHtmlContentHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
349- else
350- urlAndCompanyInfoHashtable . AddOrUpdate ( key : url ,
351- value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
352- text : ReturnCompanyPageText (
353- HTMLTextInCompanyInfoHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
354-
355- IncrementCounterAndLogProgress ( url : url , formInstance : formInstance , isSuccess : true ) ;
356-
357- // If the request succeeds, exit the retry loop
358- return ;
359- }
360- catch ( OperationCanceledException ex ) when ( ex . InnerException is TimeoutException )
361- {
362- Application . DoEvents ( ) ;
363- // Timeout occurred, increment retry count
364- retryCount ++ ;
365- // Log the timeout error
366- AppendLogWindowText ( tbx : formInstance . tbx_Log ,
367- appendText :
368- $ "Timeout occurred while fetching URL '{ url } '. Retry attempt { retryCount } of { maxRetries } .") ;
369- }
370- catch ( Exception ex )
371- {
372- // Log other errors but do not retry
373- if ( ex is HttpRequestException )
374- IncrementCounterAndLogProgress ( url : url , formInstance : formInstance , isSuccess : false ,
375- errorMsg : ex . Message ) ;
376- return ;
377- }
340+ // Make the HTTP request
341+ HttpResponseMessage response = await httpClient . GetAsync ( requestUri : url , cancellationToken : cancellationToken ) ;
342+ string htmlContent = await response . Content . ReadAsStringAsync ( cancellationToken : cancellationToken ) ;
343+
344+ // Process the HTML content
345+ // Page
346+ if ( ! url . Contains ( value : "company-info" ) )
347+ urlAndHtmlContentHashtable . AddOrUpdate ( key : url ,
348+ value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
349+ text : ReturnPageText (
350+ HTMLTextInHtmlContentHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
351+ // Company
352+ else
353+ urlAndCompanyInfoHashtable . AddOrUpdate ( key : url ,
354+ value : HelperStringUtils . TrimAndReplaceNewLinesAndTabs (
355+ text : ReturnCompanyPageText (
356+ HTMLTextInCompanyInfoHashtable : HelperStringUtils . TrimInternalSpaces ( s : htmlContent ) ) ) ) ;
378357
379- // Maximum retry attempts reached, log error and exit
380- AppendLogWindowText ( tbx : formInstance . tbx_Log , appendText : $ "Maximum retry attempts reached for URL '{ url } '.") ;
358+ IncrementCounterAndLogProgress ( url : url , formInstance : formInstance , isSuccess : true ) ;
381359 }
382360
383-
384361 /// <summary>
385362 /// Creates the SEDOLs. Technically the primary key is the URL at the stage of creation.
386363 /// </summary>
@@ -782,6 +759,7 @@ await CollateHLStocksByLetterAsync(formInstance: this,
782759 string logMessageVal = "Building parsing data - ready for output." ;
783760 AppendLogWindowText ( tbx : frmMainAppInstance . tbx_Log , appendText : logMessageVal ,
784761 logMessageType : LogMessageTypes . Done ) ;
762+ nic_ProcessFinished . ShowBalloonTip ( timeout : 150 ) ;
785763 }
786764 else
787765 {
0 commit comments