Skip to content

Commit 28e6151

Browse files
committed
Replace the "n/a" retuned values with empty string
1 parent 8766c18 commit 28e6151

7 files changed

Lines changed: 249 additions & 223 deletions

FrmMainApp.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ private static async Task GetHtmlAsync(string url, FrmMainApp formInstance, Canc
476476
GBP_Market_capitalisation = marketCap * gbpEqivalent,
477477
Sector = sector,
478478
ETF_Type = etfType,
479-
Top10_Components = TagsToModelValueTransformations.T2M_Top10_Components(pageText: pageText),
479+
Top10_Exposures = TagsToModelValueTransformations.T2M_Top10_Exposures(pageText: pageText),
480480
Exchange = TagsToModelValueTransformations.T2M_Exchange(companyPageText: companyPageText),
481481
Country = TagsToModelValueTransformations.T2M_Country(companyPageText: companyPageText),
482482
Indices = TagsToModelValueTransformations.T2M_Indices(companyPageText: companyPageText)

HLWebScraper.Net.csproj

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
<ForceDesignerDPIUnaware>true</ForceDesignerDPIUnaware>
1010
<ApplicationIcon>Resources\AppIcon.ico</ApplicationIcon>
1111
<SupportedOSPlatformVersion>10.0.17763.0</SupportedOSPlatformVersion>
12+
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
1213
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
1314
<ApplicationManifest>app.manifest</ApplicationManifest>
1415

@@ -34,7 +35,6 @@
3435
<ItemGroup>
3536
<PackageReference Include="CsvHelper" Version="31.0.2" />
3637
<PackageReference Include="HtmlAgilityPack" Version="1.11.59" />
37-
<PackageReference Include="Microsoft.Data.Sqlite" Version="9.0.0-preview.2.24128.4" />
3838
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
3939
<PackageReference Include="System.Data.SQLite" Version="1.0.118" />
4040
</ItemGroup>
@@ -67,13 +67,16 @@
6767
</ItemGroup>
6868

6969
<ItemGroup>
70-
<None Update="Output\example_HLWebScraper_Output_20240315_192630.csv">
70+
<None Update="changelog.md">
7171
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
7272
</None>
73-
<None Update="Properties\Settings.settings">
74-
<Generator>SettingsSingleFileGenerator</Generator>
75-
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
73+
<None Update="Output\example_HLWebScraper_Output_20240317_112225.csv">
74+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
75+
</None>
76+
<None Update="readme.md">
77+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
7678
</None>
79+
7780
</ItemGroup>
7881

7982
</Project>

Model/SEDOL.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ internal class SEDOL
99
public string SEDOL_ID { get; set; }
1010
public string Sector { get; set; }
1111
public string ETF_Type { get; set; }
12-
public string Top10_Components { get; set; }
12+
public string Top10_Exposures { get; set; }
1313
public string Exchange { get; set; }
1414
public string Country { get; set; }
1515
public string Indices { get; set; }

Model/TagsToModelValueTransformations.cs

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -180,28 +180,50 @@ public static string T2M_ETF_Type(string name)
180180
return "Not classified";
181181
}
182182

183+
184+
/// <summary>
185+
/// Reads the Exchange list (where available)
186+
/// </summary>
187+
/// <param name="companyPageText"></param>
188+
/// <returns></returns>
183189
public static string T2M_Exchange(string companyPageText)
184190
{
185-
return HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
191+
string likelyExchange = HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
186192
pageText: companyPageText,
187193
textStart: "Exchange:<dd>",
188194
textEnd: "</dd>"));
195+
196+
return likelyExchange == "-" || likelyExchange == "n/a" ? string.Empty : likelyExchange;
189197
}
190198

199+
/// <summary>
200+
/// Reads the Country (where available)
201+
/// </summary>
202+
/// <param name="companyPageText"></param>
203+
/// <returns></returns>
191204
public static string T2M_Country(string companyPageText)
192205
{
193-
return HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
206+
string likelyCountry = HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
194207
pageText: companyPageText,
195208
textStart: "Country:<dd>",
196209
textEnd: "</dd>"));
210+
211+
return likelyCountry == "-" || likelyCountry == "n/a" ? string.Empty : likelyCountry;
197212
}
198213

214+
/// <summary>
215+
/// Reads the Indices [really, index] (where available)
216+
/// </summary>
217+
/// <param name="companyPageText"></param>
218+
/// <returns></returns>
199219
public static string T2M_Indices(string companyPageText)
200220
{
201-
return HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
221+
string likelyIndex = HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
202222
pageText: companyPageText,
203223
textStart: "Indices:<dd>",
204224
textEnd: "</dd>"));
225+
226+
return likelyIndex == "-" || likelyIndex == "n/a" ? string.Empty : likelyIndex;
205227
}
206228

207229
/// <summary>
@@ -497,24 +519,24 @@ public static double T2M_Volume(string pageText)
497519
/// </summary>
498520
/// <param name="pageText"></param>
499521
/// <returns></returns>
500-
public static string T2M_Top10_Components(string pageText)
522+
public static string T2M_Top10_Exposures(string pageText)
501523
{
502-
string top10components = string.Empty;
503-
string likelyComponents = HelperStringUtils.FindTextBetween(
524+
string top10Exposures = string.Empty;
525+
string likelyExposures = HelperStringUtils.FindTextBetween(
504526
pageText: pageText,
505527
textStart: "<div id=\"top_10_exposures_data\">",
506528
textEnd: "</div>");
507-
if (likelyComponents.Contains(value: "No top ten information is available at this stage"))
508-
return top10components;
529+
if (likelyExposures.Contains(value: "No top ten information is available at this stage"))
530+
return top10Exposures;
509531

510532
try
511533
{
512-
likelyComponents = HelperStringUtils.FindTextBetween(
513-
pageText: likelyComponents,
534+
likelyExposures = HelperStringUtils.FindTextBetween(
535+
pageText: likelyExposures,
514536
textStart: "<tbody>",
515537
textEnd: "</tbody>");
516538

517-
string[] lines = likelyComponents.Split(separator: new[] { '\r', '\n' },
539+
string[] lines = likelyExposures.Split(separator: new[] { '\r', '\n' },
518540
options: StringSplitOptions.RemoveEmptyEntries);
519541

520542

@@ -525,15 +547,15 @@ public static string T2M_Top10_Components(string pageText)
525547
// Extract text from the row (remove HTML tags)
526548
string rowText = RemoveHtmlTags(line: line);
527549

528-
top10components += rowText;
550+
top10Exposures += rowText;
529551
}
530552
}
531553
catch
532554
{
533555
// nothing
534556
}
535557

536-
return top10components;
558+
return top10Exposures;
537559

538560
string RemoveHtmlTags(string line)
539561
{

0 commit comments

Comments
 (0)