Skip to content

Commit d1b93c6

Browse files
committed
Reverted the no-loops-logic introduced in 8841
It seemingly resulted in timeouts due to rate limiting on the server side whereas the loops don't. This comes at a minor performance penalty but at least doesn't ban the user
1 parent 28e6151 commit d1b93c6

7 files changed

Lines changed: 4069 additions & 98 deletions

File tree

FrmMainApp.Designer.cs

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

FrmMainApp.cs

Lines changed: 67 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ public partial class FrmMainApp : Form
3030
internal static IEnumerable<ETFType>? ETF_Types;
3131

3232
private CancellationTokenSource cancellationTokenSource;
33-
34-
private static HttpClient _httpClient = new();
33+
private static int _maxConnectionsPerServerSetting;
34+
private static readonly HttpClient HttpClient = new();
3535

3636
public FrmMainApp()
3737
{
@@ -43,27 +43,12 @@ public FrmMainApp()
4343
HelperDataDatabaseAndStartup.DataWriteSQLiteSettingsDefaultSettings(); // fill db w/ defaults where needed
4444

4545
// get defaults
46-
double pooledConnectionLifetimeSetting = Convert.ToDouble(
47-
value: HelperDataApplicationSettings.DataReadSQLiteSettings(tableName: "settings",
48-
settingId: "PooledConnectionLifetime"));
4946

50-
double pooledConnectionIdleTimeoutSetting = Convert.ToDouble(
51-
value: HelperDataApplicationSettings.DataReadSQLiteSettings(tableName: "settings",
52-
settingId: "PooledConnectionIdleTimeout"));
53-
54-
int maxConnectionsPerServerSetting = Convert.ToInt16(
47+
// re _maxConnectionsPerServerSetting: this is counted 2x in the code so a setting of 50 will send out 100 requests because
48+
// ... we're basically sending 1 for the "normal" page and 1 for the "coporate info" page.
49+
_maxConnectionsPerServerSetting = Convert.ToInt16(
5550
value: HelperDataApplicationSettings.DataReadSQLiteSettings(tableName: "settings",
5651
settingId: "MaxConnectionsPerServer"));
57-
58-
59-
SocketsHttpHandler socketsHandler = new()
60-
{
61-
PooledConnectionLifetime = TimeSpan.FromMinutes(value: pooledConnectionLifetimeSetting),
62-
PooledConnectionIdleTimeout = TimeSpan.FromMinutes(value: pooledConnectionIdleTimeoutSetting),
63-
MaxConnectionsPerServer = maxConnectionsPerServerSetting
64-
};
65-
66-
_httpClient = new HttpClient(handler: socketsHandler);
6752
}
6853

6954
private void FrmMainApp_Load(object sender, EventArgs e)
@@ -145,7 +130,7 @@ private static async Task<bool> ReadJsonFXFromWebAsync(FrmMainApp formInstance,
145130

146131

147132
HttpResponseMessage response =
148-
await _httpClient.GetAsync(requestUri: FxUrl, cancellationToken: cancellationToken);
133+
await HttpClient.GetAsync(requestUri: FxUrl, cancellationToken: cancellationToken);
149134
response.EnsureSuccessStatusCode();
150135

151136
string jsonString = await response.Content.ReadAsStringAsync(cancellationToken: cancellationToken);
@@ -216,7 +201,7 @@ private static async Task CollateHLStocksByLetterAsync(FrmMainApp formInstance,
216201

217202
string url = "https://www.hl.co.uk/shares/shares-search-results/" + alphabetChar;
218203
HttpResponseMessage response =
219-
await _httpClient.GetAsync(requestUri: url, cancellationToken: cancellationToken);
204+
await HttpClient.GetAsync(requestUri: url, cancellationToken: cancellationToken);
220205
response.EnsureSuccessStatusCode();
221206

222207
respString = await response.Content.ReadAsStringAsync(cancellationToken: cancellationToken);
@@ -285,31 +270,54 @@ private static async Task FireAndForgetAsync(HashSet<string> urls, FrmMainApp fo
285270
AppendLogWindowText(tbx: formInstance.tbx_Log, appendText: "Scraping all items.",
286271
logMessageType: LogMessageTypes.Start);
287272

273+
// Chunk size for processing tasks
274+
int chunkSize = _maxConnectionsPerServerSetting;
275+
276+
// Create a list to hold the tasks
288277
List<Task> tasks = new();
289278

290-
// Loop through each URL in the chunk and start scraping
291-
foreach (string url in urls)
279+
// Iterate over the URLs in chunks
280+
for (int i = 0; i < urls.Count; i += chunkSize)
292281
{
293-
// Check if cancellation has been requested
294-
cancellationToken.ThrowIfCancellationRequested();
282+
// Get the chunk of URLs
283+
IEnumerable<string> chunk = urls.Skip(count: i).Take(count: chunkSize);
284+
285+
// Create a list to hold the tasks for this chunk
286+
List<Task> chunkTasks = new();
287+
288+
// Create a new HttpClient for this chunk
289+
using HttpClient httpClient = new();
290+
291+
// Loop through each URL in the chunk and start scraping
292+
foreach (string url in chunk)
293+
{
294+
// Check if cancellation has been requested
295+
cancellationToken.ThrowIfCancellationRequested();
296+
297+
// Construct the URL with "/company-information" appended
298+
string companyInfoUrl = url + "/company-information";
299+
300+
// Add tasks to scrape the main URL and company info URL
301+
chunkTasks.Add(item: GetHtmlAsyncWithClient(httpClient: httpClient, url: url,
302+
formInstance: formInstance, cancellationToken: cancellationToken));
303+
chunkTasks.Add(item: GetHtmlAsyncWithClient(httpClient: httpClient, url: companyInfoUrl,
304+
formInstance: formInstance, cancellationToken: cancellationToken));
305+
}
306+
307+
// Add chunk tasks to the main tasks list
308+
tasks.AddRange(collection: chunkTasks);
295309

296-
// Construct the URL with "/company-information" appended
297-
string companyInfoUrl = url + "/company-information";
310+
// Wait for the chunk tasks to complete or cancellation requested
311+
await Task.WhenAll(tasks: chunkTasks);
312+
313+
// Dispose of the HttpClient to close connections and release resources
314+
httpClient.Dispose();
298315

299-
// Add tasks to scrape the main URL and company info URL
300-
tasks.Add(item: GetHtmlAsync(url: url,
301-
formInstance: formInstance, cancellationToken: cancellationToken));
302-
tasks.Add(item: GetHtmlAsync(url: companyInfoUrl,
303-
formInstance: formInstance, cancellationToken: cancellationToken));
304316
// Break loop if cancellation requested
305317
if (cancellationToken.IsCancellationRequested)
306318
break;
307319
}
308320

309-
// Wait for the chunk tasks to complete or cancellation requested
310-
await Task.WhenAll(tasks: tasks);
311-
312-
313321
// Scraping completed
314322
AppendLogWindowText(tbx: formInstance.tbx_Log, appendText: "Scraping all items.",
315323
logMessageType: LogMessageTypes.Done);
@@ -323,64 +331,33 @@ private static async Task FireAndForgetAsync(HashSet<string> urls, FrmMainApp fo
323331
}
324332
}
325333

326-
private static async Task GetHtmlAsync(string url, FrmMainApp formInstance, CancellationToken cancellationToken)
334+
private static async Task GetHtmlAsyncWithClient(HttpClient httpClient, string url, FrmMainApp formInstance,
335+
CancellationToken cancellationToken)
327336
{
328-
// Check if cancellation has been requested before making the request
337+
// Check if cancellation has been requested
329338
cancellationToken.ThrowIfCancellationRequested();
330339

331-
int maxRetries = 5;
332-
int retryCount = 0;
333-
334-
while (retryCount < maxRetries)
335-
try
336-
{
337-
Application.DoEvents();
338-
HttpResponseMessage response =
339-
await _httpClient.GetAsync(requestUri: url, cancellationToken: cancellationToken);
340-
response.EnsureSuccessStatusCode();
341-
string htmlContent = await response.Content.ReadAsStringAsync(cancellationToken: cancellationToken);
342-
343-
// Process the HTML content as before
344-
if (!url.Contains(value: "company-info"))
345-
urlAndHtmlContentHashtable.AddOrUpdate(key: url,
346-
value: HelperStringUtils.TrimAndReplaceNewLinesAndTabs(
347-
text: ReturnPageText(
348-
HTMLTextInHtmlContentHashtable: HelperStringUtils.TrimInternalSpaces(s: htmlContent))));
349-
else
350-
urlAndCompanyInfoHashtable.AddOrUpdate(key: url,
351-
value: HelperStringUtils.TrimAndReplaceNewLinesAndTabs(
352-
text: ReturnCompanyPageText(
353-
HTMLTextInCompanyInfoHashtable: HelperStringUtils.TrimInternalSpaces(s: htmlContent))));
354-
355-
IncrementCounterAndLogProgress(url: url, formInstance: formInstance, isSuccess: true);
356-
357-
// If the request succeeds, exit the retry loop
358-
return;
359-
}
360-
catch (OperationCanceledException ex) when (ex.InnerException is TimeoutException)
361-
{
362-
Application.DoEvents();
363-
// Timeout occurred, increment retry count
364-
retryCount++;
365-
// Log the timeout error
366-
AppendLogWindowText(tbx: formInstance.tbx_Log,
367-
appendText:
368-
$"Timeout occurred while fetching URL '{url}'. Retry attempt {retryCount} of {maxRetries}.");
369-
}
370-
catch (Exception ex)
371-
{
372-
// Log other errors but do not retry
373-
if (ex is HttpRequestException)
374-
IncrementCounterAndLogProgress(url: url, formInstance: formInstance, isSuccess: false,
375-
errorMsg: ex.Message);
376-
return;
377-
}
340+
// Make the HTTP request
341+
HttpResponseMessage response = await httpClient.GetAsync(requestUri: url, cancellationToken: cancellationToken);
342+
string htmlContent = await response.Content.ReadAsStringAsync(cancellationToken: cancellationToken);
343+
344+
// Process the HTML content
345+
// Page
346+
if (!url.Contains(value: "company-info"))
347+
urlAndHtmlContentHashtable.AddOrUpdate(key: url,
348+
value: HelperStringUtils.TrimAndReplaceNewLinesAndTabs(
349+
text: ReturnPageText(
350+
HTMLTextInHtmlContentHashtable: HelperStringUtils.TrimInternalSpaces(s: htmlContent))));
351+
// Company
352+
else
353+
urlAndCompanyInfoHashtable.AddOrUpdate(key: url,
354+
value: HelperStringUtils.TrimAndReplaceNewLinesAndTabs(
355+
text: ReturnCompanyPageText(
356+
HTMLTextInCompanyInfoHashtable: HelperStringUtils.TrimInternalSpaces(s: htmlContent))));
378357

379-
// Maximum retry attempts reached, log error and exit
380-
AppendLogWindowText(tbx: formInstance.tbx_Log, appendText: $"Maximum retry attempts reached for URL '{url}'.");
358+
IncrementCounterAndLogProgress(url: url, formInstance: formInstance, isSuccess: true);
381359
}
382360

383-
384361
/// <summary>
385362
/// Creates the SEDOLs. Technically the primary key is the URL at the stage of creation.
386363
/// </summary>
@@ -782,6 +759,7 @@ await CollateHLStocksByLetterAsync(formInstance: this,
782759
string logMessageVal = "Building parsing data - ready for output.";
783760
AppendLogWindowText(tbx: frmMainAppInstance.tbx_Log, appendText: logMessageVal,
784761
logMessageType: LogMessageTypes.Done);
762+
nic_ProcessFinished.ShowBalloonTip(timeout: 150);
785763
}
786764
else
787765
{

0 commit comments

Comments
 (0)