-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTextNormalizationPipelineTests.cs
More file actions
153 lines (138 loc) · 5.94 KB
/
TextNormalizationPipelineTests.cs
File metadata and controls
153 lines (138 loc) · 5.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
using Microsoft.Extensions.DependencyInjection;
using TTSTextNormalization.Abstractions;
using TTSTextNormalization.DependencyInjection;
namespace TTSTextNormalization.Tests.Core;
[TestClass]
public class TextNormalizationPipelineTests
{
// Helper to build the pipeline with specific rules for testing
private static ITextNormalizer BuildNormalizer(Action<ITextNormalizationBuilder> configure)
{
ServiceCollection services = new();
services.AddTextNormalization(configure);
ServiceProvider provider = services.BuildServiceProvider();
return provider.GetRequiredService<ITextNormalizer>();
}
[TestMethod]
public void Normalize_NullInput_ReturnsEmptyString()
{
ITextNormalizer normalizer = BuildNormalizer(builder => { }); // No rules
Assert.AreEqual(string.Empty, normalizer.Normalize(null));
}
[TestMethod]
public void Normalize_EmptyInput_ReturnsEmptyString()
{
ITextNormalizer normalizer = BuildNormalizer(builder => { }); // No rules
Assert.AreEqual(string.Empty, normalizer.Normalize(""));
}
[TestMethod]
public void Normalize_WhitespaceInput_ReturnsEmptyString()
{
ITextNormalizer normalizer = BuildNormalizer(builder => { }); // No rules
Assert.AreEqual(string.Empty, normalizer.Normalize(" \t\n "));
}
[TestMethod]
public void Normalize_NoRules_ReturnsOriginalText()
{
ITextNormalizer normalizer = BuildNormalizer(builder => { }); // No rules
string input = "Some Text 123 !?";
Assert.AreEqual(input, normalizer.Normalize(input));
}
[TestMethod]
public void Normalize_SingleRule_Emoji_AppliesRule()
{
ITextNormalizer normalizer = BuildNormalizer(builder => builder.AddEmojiRule());
string input = "Hello ✨ world";
// Rule adds spaces, no whitespace cleanup rule added here
string expected = "Hello sparkles world";
Assert.AreEqual(expected, normalizer.Normalize(input));
}
[TestMethod]
public void Normalize_SingleRule_Whitespace_AppliesRule()
{
ITextNormalizer normalizer = BuildNormalizer(builder => builder.AddWhitespaceNormalizationRule());
string input = " Extra Spaces ";
string expected = "Extra Spaces";
Assert.AreEqual(expected, normalizer.Normalize(input));
}
[TestMethod]
public void Normalize_RuleOrder_SanitizeBeforeEmoji_WhitespaceAfter()
{
ITextNormalizer normalizer = BuildNormalizer(builder =>
{
// Explicitly add in order
builder.AddBasicSanitizationRule(); // Order 10
builder.AddEmojiRule(); // Order 100
builder.AddWhitespaceNormalizationRule(); // Order 9000
});
// Input has fancy quotes and emoji needing sanitization first, then whitespace cleanup
string input = " ‘Hey’ ✨!!! ";
// Expected:
// 1. Sanitize: "'Hey' ✨!!!"
// 2. Emoji: "'Hey' sparkles !!!"
// 3. Whitespace: "'Hey' sparkles !!!"
string expected = "'Hey' sparkles! ! !";
Assert.AreEqual(expected, normalizer.Normalize(input));
}
[TestMethod]
public void Normalize_RuleOrder_NumberBeforeWhitespace()
{
ITextNormalizer normalizer = BuildNormalizer(builder =>
{
builder.AddNumberNormalizationRule();
builder.AddWhitespaceNormalizationRule();
});
// Number rule adds spaces, WS rule cleans up
string input = "Value is 123";
// Expected:
// 1. Number: "Value is one hundred and twenty-three "
// 2. Whitespace: "Value is one hundred and twenty-three"
string expected = "Value is one hundred and twenty-three";
Assert.AreEqual(expected, normalizer.Normalize(input));
}
// --- Comprehensive Test with All Rules ---
[TestMethod]
[DataRow(
" ‘Test’ 1st.. soooo cool ✨!! LOL Cost: $12.50 USD??? ",
"'Test' first. soo cool sparkles! laughing out loud Cost: twelve US dollars fifty cents?",
DisplayName = "All Rules Integration Test 1 - Corrected"
)]
[DataRow(
"BRB... 2nd place!! Got 2 pay €1,000.00!! 🤑🤑",
"be right back. second place! Got two pay one thousand euros! money-mouth face money-mouth face", // Removed final space
DisplayName = "All Rules Integration Test 2 - Corrected"
)]
[DataRow(
"IDK man.... soooo many ??? GLHF! Check 123.45!",
"I don't know man. soo many? good luck have fun! Check one hundred and twenty-three point four five!",
DisplayName = "All Rules Integration Test 3 - Corrected"
)]
[DataRow(
" OMG!!! The price is £50.00??? LOL... IDK. 1st prize! ",
"oh my god! The price is fifty British pounds? laughing out loud. I don't know. first prize!",
DisplayName = "All Rules Integration Test 4 - Mixed Punctuation & Abbr - Corrected"
)]
[DataRow(
" SOOOOooooo MUCHHH Textt!!! 123rd ", // Note: LetterRepetition is case-insensitive
"SOO MUCHH Textt! hundred and twenty-third", // Expect 'Textt' to remain as only 't' repeats twice
DisplayName = "All Rules Integration Test 5 - Letter Repetition Focus - Corrected"
)]
public void Normalize_AllRulesEnabled_HandlesComplexInput(string input, string expected)
{
ITextNormalizer normalizer = BuildNormalizer(builder =>
{
builder.AddBasicSanitizationRule(); // 10
builder.AddEmojiRule(); // 100
builder.AddCurrencyRule(); // 200
builder.AddAbbreviationNormalizationRule(); // 300
builder.AddNumberNormalizationRule(); // 400
builder.AddExcessivePunctuationRule(); // 500
builder.AddLetterRepetitionRule(); // 510
builder.AddWhitespaceNormalizationRule(); // 9000
});
// Act
string result = normalizer.Normalize(input);
// Assert
Assert.AreEqual(expected, result);
}
}