Skip to content

Commit e3d9913

Browse files
authored
Merge pull request #1 from Agash/feat/expanding-currency-parsing
feat: Extended Currency Rule
2 parents 1f13859 + 0f90890 commit e3d9913

15 files changed

+446
-130
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,4 +360,5 @@ MigrationBackup/
360360
.ionide/
361361

362362
# Fody - auto-generated XML schema
363-
FodyWeavers.xsd
363+
FodyWeavers.xsd
364+
repomix-output.xml

TTSTextNormalization.Tests/Core/TextNormalizationPipelineTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public void Normalize_RuleOrder_NumberBeforeWhitespace()
107107
[TestMethod]
108108
[DataRow(
109109
" ‘Test’ 1st.. soooo cool ✨!! LOL Cost: $12.50 USD??? ",
110-
"'Test' first. soo cool sparkles! laughing out loud Cost: twelve dollars fifty cents USD?",
110+
"'Test' first. soo cool sparkles! laughing out loud Cost: twelve US dollars fifty cents?",
111111
DisplayName = "All Rules Integration Test 1 - Corrected"
112112
)]
113113
[DataRow(
@@ -122,7 +122,7 @@ public void Normalize_RuleOrder_NumberBeforeWhitespace()
122122
)]
123123
[DataRow(
124124
" OMG!!! The price is £50.00??? LOL... IDK. 1st prize! ",
125-
"oh my god! The price is fifty pounds? laughing out loud. I don't know. first prize!",
125+
"oh my god! The price is fifty British pounds? laughing out loud. I don't know. first prize!",
126126
DisplayName = "All Rules Integration Test 4 - Mixed Punctuation & Abbr - Corrected"
127127
)]
128128
[DataRow(

TTSTextNormalization.Tests/Rules/AbbreviationNormalizationRuleTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ public void Apply_MultipleAbbreviations_ReplacesAll(string input, string expecte
6767
[DataRow("lollipop", "lollipop", DisplayName = "Substring 'lol'")]
6868
[DataRow("scrolling", "scrolling", DisplayName = "Substring 'lol' (reverse)")]
6969
[DataRow("theory", "theory", DisplayName = "Substring 'ty'")]
70-
[DataRow("imo-test", "imo-test", DisplayName = "Abbreviation as prefix - Corrected Expectation")] // Lookaround fixed
71-
[DataRow("test-imo", "test-imo", DisplayName = "Abbreviation as suffix - Corrected Expectation")] // Lookaround fixed
70+
[DataRow("imo-test", "imo-test", DisplayName = "Abbreviation as prefix")]
71+
[DataRow("test-imo", "test-imo", DisplayName = "Abbreviation as suffix")]
7272
public void Apply_AbbreviationAsSubstringOrAttached_DoesNotReplace(string input, string expected)
7373
{
7474
// Act

TTSTextNormalization.Tests/Rules/CurrencyNormalizationRuleTests.cs

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,39 @@ public void Apply_NoCurrency_ReturnsInput(string input, string expected)
2323
// NOTE: Expectations updated for default Humanizer output (includes "and")
2424
[TestMethod]
2525
// Symbol First
26-
[DataRow("$1", " one dollar ", DisplayName = "USD Simple ($)")]
27-
[DataRow("$1.00", " one dollar ", DisplayName = "USD Simple zero cents ($)")]
28-
[DataRow("$1.50", " one dollar fifty cents ", DisplayName = "USD with Cents ($)")] // No "and" for cents usually
29-
[DataRow("$1,234.56", " one thousand two hundred and thirty-four dollars fifty-six cents ", DisplayName = "USD Large with Cents ($)")]
30-
[DataRow("£10", " ten pounds ", DisplayName = "GBP Simple (£)")]
31-
[DataRow("£0.50", " zero pounds fifty pence ", DisplayName = "GBP Only Pence (£)")]
26+
[DataRow("$1", " one US dollar ", DisplayName = "USD Simple ($)")]
27+
[DataRow("$1.00", " one US dollar ", DisplayName = "USD Simple zero cents ($)")]
28+
[DataRow("$1.50", " one US dollar fifty cents ", DisplayName = "USD with Cents ($)")] // No "and" for cents usually
29+
[DataRow("$1,234.56", " one thousand two hundred and thirty-four US dollars fifty-six cents ", DisplayName = "USD Large with Cents ($)")]
30+
[DataRow("£10", " ten British pounds ", DisplayName = "GBP Simple (£)")]
31+
[DataRow("£0.50", " zero British pounds fifty pence ", DisplayName = "GBP Only Pence (£)")]
3232
[DataRow("€100", " one hundred euros ", DisplayName = "EUR Simple (€)")]
3333
[DataRow("€1.25", " one euro twenty-five cents ", DisplayName = "EUR With Cents (€)")]
34-
[DataRow("¥500", " five hundred yen ", DisplayName = "JPY Simple (¥)")]
34+
[DataRow("¥500", " five hundred Japanese yen ", DisplayName = "JPY Simple (¥)")]
3535
// Code Last
36-
[DataRow("1 USD", " one dollar ", DisplayName = "USD Code Simple")]
37-
[DataRow("1.00 USD", " one dollar ", DisplayName = "USD Code zero cents")]
38-
[DataRow("1.50 USD", " one dollar fifty cents ", DisplayName = "USD Code with Cents")]
39-
[DataRow("1,234.56 USD", " one thousand two hundred and thirty-four dollars fifty-six cents ", DisplayName = "USD Code Large")]
40-
[DataRow("10 GBP", " ten pounds ", DisplayName = "GBP Code Simple")] // Uses "pound" from map
41-
[DataRow("0.50 GBP", " zero pounds fifty pence ", DisplayName = "GBP Code Only Pence")]
36+
[DataRow("1 USD", " one US dollar ", DisplayName = "USD Code Simple")]
37+
[DataRow("1.00 USD", " one US dollar ", DisplayName = "USD Code zero cents")]
38+
[DataRow("1.50 USD", " one US dollar fifty cents ", DisplayName = "USD Code with Cents")]
39+
[DataRow("1,234.56 USD", " one thousand two hundred and thirty-four US dollars fifty-six cents ", DisplayName = "USD Code Large")]
40+
[DataRow("10 GBP", " ten British pounds ", DisplayName = "GBP Code Simple")] // Uses "pound" from map
41+
[DataRow("0.50 GBP", " zero British pounds fifty pence ", DisplayName = "GBP Code Only Pence")]
4242
[DataRow("100 EUR", " one hundred euros ", DisplayName = "EUR Code Simple")]
4343
[DataRow("1.25 EUR", " one euro twenty-five cents ", DisplayName = "EUR Code With Cents")]
44-
[DataRow("500 JPY", " five hundred yen ", DisplayName = "JPY Code Simple")] // Uses "yen" from map
44+
[DataRow("500 JPY", " five hundred Japanese yen ", DisplayName = "JPY Code Simple")] // Uses "yen" from map
4545
[DataRow("100 CAD", " one hundred Canadian dollars ", DisplayName = "CAD Code Example")]
46-
[DataRow("10 BRL", " ten reais ", DisplayName = "BRL Code Example")]
46+
[DataRow("10 BRL", " ten Brazilian reais ", DisplayName = "BRL Code Example")]
47+
// Combined
48+
[DataRow("$10 USD", " ten US dollars ", DisplayName = "USD Combined ($)")]
49+
[DataRow("$10USD", " ten US dollars ", DisplayName = "USD Combined (wihtout spaces)")]
50+
[DataRow("$10MXN", " ten Mexican pesos ", DisplayName = "MXN Combined (without spaces)")]
51+
[DataRow("$10 CAD", " ten Canadian dollars ", DisplayName = "CAD Combined ($)")]
52+
[DataRow("£10 GBP", " ten British pounds ", DisplayName = "GBP Combined (£)")]
53+
[DataRow("€100 EUR", " one hundred euros ", DisplayName = "EUR Combined (€)")]
54+
[DataRow("¥500 JPY", " five hundred Japanese yen ", DisplayName = "JPY Combined (¥)")]
55+
[DataRow("10 USD $", " ten US dollars $", DisplayName = "USD Combined with Trailing Symbol")]
56+
[DataRow("10 GBP £", " ten British pounds £", DisplayName = "GBP Combined with Trailing Symbol")]
57+
[DataRow("100 EUR €", " one hundred euros €", DisplayName = "EUR Combined with Trailing Symbol")]
58+
[DataRow("500 JPY ¥", " five hundred Japanese yen ¥", DisplayName = "JPY Combined with Trailing Symbol")]
4759
public void Apply_KnownCurrencies_ReplacesWithSpokenForm(string input, string expected)
4860
{
4961
// Act
@@ -54,9 +66,10 @@ public void Apply_KnownCurrencies_ReplacesWithSpokenForm(string input, string ex
5466
}
5567

5668
[TestMethod]
57-
[DataRow("Send $10 now", "Send ten dollars now", DisplayName = "Currency within sentence")]
69+
[DataRow("Send $10 now", "Send ten US dollars now", DisplayName = "Currency within sentence")]
5870
[DataRow("It costs 50 EUR.", "It costs fifty euros .", DisplayName = "Currency at end of sentence")]
59-
[DataRow("$5 and £10", " five dollars and ten pounds ", DisplayName = "Multiple different currencies")]
71+
[DataRow("It costs 50 EUR now.", "It costs fifty euros now.", DisplayName = "Currency within sentence")]
72+
[DataRow("$5 and £10", " five US dollars and ten British pounds ", DisplayName = "Multiple different currencies")]
6073
public void Apply_CurrencyInContext_ReplacesCorrectly(string input, string expected)
6174
{
6275
// Act
@@ -69,7 +82,6 @@ public void Apply_CurrencyInContext_ReplacesCorrectly(string input, string expec
6982
[TestMethod]
7083
[DataRow("10XYZ", "10XYZ", DisplayName = "Unknown Code XYZ")]
7184
[DataRow("¤10", "¤10", DisplayName = "Generic Currency Symbol")]
72-
[DataRow("$10MXN", "$10MXN", DisplayName = "Symbol and Code")]
7385
public void Apply_UnknownOrAmbiguousCurrency_NoChange(string input, string expected)
7486
{
7587
// Act

TTSTextNormalization.sln

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TTSTextNormalization.EmojiD
99
EndProject
1010
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TTSTextNormalization", "TTSTextNormalization\TTSTextNormalization.csproj", "{1C2CA7DF-374E-FA47-469B-9751E035B2C8}"
1111
EndProject
12-
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}"
13-
EndProject
14-
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{3DCF185E-C897-4519-AB56-F4B91991DB25}"
15-
ProjectSection(SolutionItems) = preProject
16-
dotnet-publish.yml = dotnet-publish.yml
17-
EndProjectSection
18-
EndProject
1912
Global
2013
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2114
Debug|Any CPU = Debug|Any CPU
@@ -38,9 +31,6 @@ Global
3831
GlobalSection(SolutionProperties) = preSolution
3932
HideSolutionNode = FALSE
4033
EndGlobalSection
41-
GlobalSection(NestedProjects) = preSolution
42-
{3DCF185E-C897-4519-AB56-F4B91991DB25} = {02EA681E-C7D8-13C7-8484-4AC65E1B71E8}
43-
EndGlobalSection
4434
GlobalSection(ExtensibilityGlobals) = postSolution
4535
SolutionGuid = {53950FEC-997F-4537-B0E2-40090BAA342B}
4636
EndGlobalSection

TTSTextNormalization/DependencyInjection/TextNormalizationServiceCollectionExtensions.cs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
using Microsoft.Extensions.DependencyInjection;
22
using Microsoft.Extensions.DependencyInjection.Extensions;
3-
using TTSTextNormalization.Rules;
43
using TTSTextNormalization.Abstractions;
54
using TTSTextNormalization.Core;
5+
using TTSTextNormalization.Rules;
66

77
namespace TTSTextNormalization.DependencyInjection;
88

@@ -29,30 +29,61 @@ public static IServiceCollection AddTextNormalization(
2929
}
3030

3131
// --- Built-in Rule Extensions for the Builder ---
32+
33+
/// <summary>
34+
/// Adds the <see cref="BasicSanitizationRule"/> to the text normalization pipeline.
35+
/// Performs essential cleanup like normalizing line breaks and replacing fancy characters. Recommended Order: 10.
36+
/// </summary>
37+
/// <param name="builder">The text normalization builder.</param>
38+
/// <returns>The builder instance for fluent chaining.</returns>
3239
public static ITextNormalizationBuilder AddBasicSanitizationRule(this ITextNormalizationBuilder builder)
3340
{
3441
ArgumentNullException.ThrowIfNull(builder);
3542
return builder.AddRule<BasicSanitizationRule>(ServiceLifetime.Singleton);
3643
}
3744

45+
/// <summary>
46+
/// Adds the <see cref="EmojiNormalizationRule"/> to the text normalization pipeline.
47+
/// Replaces standard Unicode emojis with their textual descriptions. Recommended Order: 100.
48+
/// </summary>
49+
/// <param name="builder">The text normalization builder.</param>
50+
/// <returns>The builder instance for fluent chaining.</returns>
3851
public static ITextNormalizationBuilder AddEmojiRule(this ITextNormalizationBuilder builder)
3952
{
4053
ArgumentNullException.ThrowIfNull(builder);
4154
return builder.AddRule<EmojiNormalizationRule>(ServiceLifetime.Singleton);
4255
}
4356

57+
/// <summary>
58+
/// Adds the <see cref="CurrencyNormalizationRule"/> to the text normalization pipeline.
59+
/// Normalizes currency symbols and codes (e.g., "$10.50", "100 EUR") into spoken text. Recommended Order: 200.
60+
/// </summary>
61+
/// <param name="builder">The text normalization builder.</param>
62+
/// <returns>The builder instance for fluent chaining.</returns>
4463
public static ITextNormalizationBuilder AddCurrencyRule(this ITextNormalizationBuilder builder)
4564
{
4665
ArgumentNullException.ThrowIfNull(builder);
4766
return builder.AddRule<CurrencyNormalizationRule>(ServiceLifetime.Singleton);
4867
}
4968

69+
/// <summary>
70+
/// Adds the <see cref="AbbreviationNormalizationRule"/> to the text normalization pipeline.
71+
/// Expands common chat/gaming abbreviations (e.g., "lol", "gg"). Recommended Order: 300.
72+
/// </summary>
73+
/// <param name="builder">The text normalization builder.</param>
74+
/// <returns>The builder instance for fluent chaining.</returns>
5075
public static ITextNormalizationBuilder AddAbbreviationNormalizationRule(this ITextNormalizationBuilder builder)
5176
{
5277
ArgumentNullException.ThrowIfNull(builder);
5378
return builder.AddRule<AbbreviationNormalizationRule>(ServiceLifetime.Singleton);
5479
}
5580

81+
/// <summary>
82+
/// Adds the <see cref="NumberNormalizationRule"/> to the text normalization pipeline.
83+
/// Converts cardinals, ordinals, decimals, and version-like numbers into words. Recommended Order: 400.
84+
/// </summary>
85+
/// <param name="builder">The text normalization builder.</param>
86+
/// <returns>The builder instance for fluent chaining.</returns>
5687
public static ITextNormalizationBuilder AddNumberNormalizationRule(this ITextNormalizationBuilder builder)
5788
{
5889
ArgumentNullException.ThrowIfNull(builder);
@@ -79,6 +110,12 @@ public static ITextNormalizationBuilder AddLetterRepetitionRule(this ITextNormal
79110
return builder.AddRule<LetterRepetitionRule>(ServiceLifetime.Singleton);
80111
}
81112

113+
/// <summary>
114+
/// Adds the <see cref="WhitespaceNormalizationRule"/> to the text normalization pipeline.
115+
/// Trims ends, collapses internal spaces, and adjusts spacing around punctuation. Recommended Order: 9000.
116+
/// </summary>
117+
/// <param name="builder">The text normalization builder.</param>
118+
/// <returns>The builder instance for fluent chaining.</returns>
82119
public static ITextNormalizationBuilder AddWhitespaceNormalizationRule(this ITextNormalizationBuilder builder)
83120
{
84121
ArgumentNullException.ThrowIfNull(builder);

TTSTextNormalization/Rules/AbbreviationNormalizationRule.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ namespace TTSTextNormalization.Rules;
99
/// </summary>
1010
public sealed partial class AbbreviationNormalizationRule : ITextNormalizationRule
1111
{
12+
/// <inheritdoc/>
1213
public int Order => 300;
1314
private const int RegexTimeoutMilliseconds = 150; // Slightly increased for larger pattern
1415

@@ -73,8 +74,10 @@ public sealed partial class AbbreviationNormalizationRule : ITextNormalizationRu
7374
{ "gpu", "g p u" }, // Spell out
7475
}.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
7576

77+
/// <inheritdoc/>
7678
public AbbreviationNormalizationRule() { }
7779

80+
/// <inheritdoc/>
7881
public string Apply(string inputText)
7982
{
8083
ArgumentNullException.ThrowIfNull(inputText);

TTSTextNormalization/Rules/BasicSanitizationRule.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ public sealed partial class BasicSanitizationRule : ITextNormalizationRule
3131
{ "–", "-" }, // En dash
3232
}.ToFrozenDictionary(StringComparer.Ordinal);
3333

34+
/// <summary>
35+
/// Initializes a new instance of the <see cref="BasicSanitizationRule"/> class.
36+
/// </summary>
3437
public BasicSanitizationRule() { }
3538

3639
/// <summary>

0 commit comments

Comments
 (0)