Skip to content

Commit ab623c1

Browse files
committed
Change enum FilterType to SelectorFilterType
Implement proposals from review: * SelectorFilterType.Alphanumeric: alphanumeric characters (upper and lower case), plus '_' and '-' * SelectorFilterType.VisualUnicodeChars: All Unicode characters are allowed in a selector, except 68 non-visual characters: Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf), Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn), Whitespace Characters (non-glyph spacing).
1 parent b787b49 commit ab623c1

5 files changed

Lines changed: 138 additions & 57 deletions

File tree

src/SmartFormat.Tests/Core/ParserTests.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ public void Parser_Error_Action_OutputErrorInResult()
211211
{
212212
Parser = new ParserSettings
213213
{
214-
SelectorCharFilter = FilterType.Allowlist, // default
214+
SelectorCharFilter = SelectorFilterType.Alphanumeric, // default
215215
ErrorAction = ParseErrorAction.OutputErrorInResult
216216
}
217217
});
@@ -471,7 +471,7 @@ public void Escaping_TheEscapingCharacter_ShouldWork()
471471
[Test]
472472
public void Parsing_Selector_With_CharFromBlocklist_ShouldThrow()
473473
{
474-
var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = FilterType.Blocklist } };
474+
var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = SelectorFilterType.VisualUnicodeChars } };
475475
var parser = GetRegularParser(settings);
476476

477477
// The newline character is in the default blocklist of disallowed characters
@@ -609,7 +609,7 @@ public void Selector_WorksWithAllUnicodeChars(string selector)
609609
// See https://github.com/axuno/SmartFormat/issues/454
610610

611611
// settings must be set before parser instantiation
612-
var settings = new SmartSettings { Parser = { SelectorCharFilter = FilterType.Blocklist } };
612+
var settings = new SmartSettings { Parser = { SelectorCharFilter = SelectorFilterType.VisualUnicodeChars } };
613613
const string expected = "The Value";
614614
// The default formatter with default settings should be able to handle any
615615
// Unicode characters in selectors except the "magic" disallowed ones
@@ -751,7 +751,7 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string sel
751751
StringFormatCompatibility = false,
752752
Parser = new ParserSettings
753753
{
754-
SelectorCharFilter = FilterType.Blocklist,
754+
SelectorCharFilter = SelectorFilterType.VisualUnicodeChars,
755755
ErrorAction = ParseErrorAction.ThrowError,
756756
ParseInputAsHtml = false
757757
}
@@ -780,7 +780,7 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shoul
780780
StringFormatCompatibility = false,
781781
Parser = new ParserSettings
782782
{
783-
SelectorCharFilter = FilterType.Allowlist,
783+
SelectorCharFilter = SelectorFilterType.Alphanumeric,
784784
ErrorAction = ParseErrorAction.ThrowError,
785785
ParseInputAsHtml = false
786786
}

src/SmartFormat.Tests/Core/SettingsTests.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,21 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added()
2727
});
2828
}
2929

30-
[TestCase(FilterType.Allowlist)]
31-
[TestCase(FilterType.Blocklist)]
32-
public void ControlCharacters_Should_Be_Added_As_SelectorChars(FilterType filterType)
30+
[TestCase(SelectorFilterType.Alphanumeric)]
31+
[TestCase(SelectorFilterType.VisualUnicodeChars)]
32+
public void NonVisualCharacters_Should_Be_AddedBack_As_SelectorChars(SelectorFilterType filterType)
3333
{
3434
var settings = new SmartSettings { Parser = { SelectorCharFilter = filterType } };
35-
var controlChars = ParserSettings.ControlChars().ToList();
36-
settings.Parser.AddCustomSelectorChars(controlChars);
35+
var nonVisualChars = ParserSettings.NonVisualUnicodeCharacters;
36+
settings.Parser.AddCustomSelectorChars(nonVisualChars);
3737

3838
Assert.Multiple(() =>
3939
{
40-
Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(controlChars.Count));
40+
Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(nonVisualChars.Length));
4141
foreach (var c in settings.Parser.CustomSelectorChars)
4242
{
43-
Assert.That(settings.Parser.GetSelectorChars(), filterType == FilterType.Allowlist ? Does.Contain(c) : Does.Not.Contain(c),
44-
$"Control char U+{(int) c:X4} should be allowed as selector char.");
43+
Assert.That(settings.Parser.GetSelectorChars(), filterType == SelectorFilterType.Alphanumeric ? Does.Contain(c) : Does.Not.Contain(c),
44+
$"Character U+{(int) c:X4} should be allowed as selector char.");
4545
}
4646
});
4747
}

src/SmartFormat/Core/Settings/FilterType.cs

Lines changed: 0 additions & 23 deletions
This file was deleted.

src/SmartFormat/Core/Settings/ParserSettings.cs

Lines changed: 98 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class ParserSettings
1818
{
1919
private readonly List<char> _customSelectorChars = [];
2020
private readonly List<char> _customOperatorChars = [];
21-
private FilterType _selectorCharFilter = FilterType.Allowlist;
21+
private SelectorFilterType _selectorCharFilter = SelectorFilterType.Alphanumeric;
2222

2323
private const string StandardAllowlist = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-";
2424

@@ -40,17 +40,21 @@ public class ParserSettings
4040
internal List<char> CustomOperatorChars => _customOperatorChars;
4141

4242
/// <summary>
43-
/// When <see cref="FilterType.Allowlist"/> (default) is set, an allowlist of selector characters is used.
43+
/// When <see cref="SelectorFilterType.Alphanumeric"/> (default) is set, an allowlist of selector characters is used.
4444
/// The allowlist contains alphanumeric characters (upper and lower case), plus '_' and '-'.
4545
/// On top, any custom selector characters added with <see cref="AddCustomSelectorChars"/> are included.
4646
/// <para/>
47-
/// When <see cref="FilterType.Blocklist"/>, all Unicode characters are allowed in a selector,
48-
/// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back
49-
/// using <see cref="AddCustomSelectorChars"/>.
47+
/// When <see cref="SelectorFilterType.VisualUnicodeChars"/> is set, all Unicode characters are allowed in a selector,
48+
/// except 68 non-visual characters: Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf),
49+
/// Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn),
50+
/// Whitespace Characters (non-glyph spacing).<br/>
51+
/// Excluded characters can be added back using <see cref="AddCustomSelectorChars"/>.
52+
/// <para/>
53+
/// {}[]()\.? are characters with special functions that are never allowed.
5054
/// <para/>
5155
/// Changing this setting clears any custom operator characters added with <see cref="AddCustomOperatorChars"/>.
5256
/// </summary>
53-
public FilterType SelectorCharFilter
57+
public SelectorFilterType SelectorCharFilter
5458
{
5559
get
5660
{
@@ -68,7 +72,7 @@ public FilterType SelectorCharFilter
6872
/// This can be an allowlist, which contains explicitly allowed characters,
6973
/// or a blocklist, when all Unicode characters are allowed, except those from the blocklist.
7074
/// </summary>
71-
internal CharSet GetSelectorChars() => SelectorCharFilter == FilterType.Allowlist ? CreateAllowlist() : CreateBlocklist();
75+
internal CharSet GetSelectorChars() => SelectorCharFilter == SelectorFilterType.Alphanumeric ? CreateAllowlist() : CreateBlocklist();
7276

7377
private CharSet CreateBlocklist()
7478
{
@@ -79,8 +83,7 @@ private CharSet CreateBlocklist()
7983
chars.AddRange(SelectorDelimitingChars.AsSpan());
8084
chars.AddRange(OperatorChars.AsSpan()); // no overlaps
8185
chars.AddRange(_customOperatorChars); // no overlaps
82-
// Hard to visualize and debug, disallow by default - can be added back as custom selector chars
83-
chars.AddRange(ControlChars());
86+
chars.AddRange(NonVisualUnicodeCharacters.AsSpan());
8487

8588
// Remove characters used as custom selector chars from the blocklist
8689
foreach (var c in _customSelectorChars) chars.Remove(c);
@@ -209,13 +212,90 @@ private CharSet CreateAllowlist()
209212
];
210213

211214
/// <summary>
212-
/// Gets the set of control characters (ASCII 0-31 and 127).
215+
/// All 68 non-visual Unicode characters that are typically not used in selectors.
213216
/// </summary>
214-
internal static IEnumerable<char> ControlChars()
215-
{
216-
for (var i = 0; i <= 31; i++) yield return (char) i;
217-
yield return (char) 127; // delete character
218-
}
217+
internal static char[] NonVisualUnicodeCharacters =
218+
[
219+
// Control Characters (U+0000–U+001F, U+007F)
220+
'\u0000', // NULL – string terminator
221+
'\u0001', // START OF HEADING – protocol control
222+
'\u0002', // START OF TEXT – protocol control
223+
'\u0003', // END OF TEXT – protocol control
224+
'\u0004', // END OF TRANSMISSION – protocol control
225+
'\u0005', // ENQUIRY – request for response
226+
'\u0006', // ACKNOWLEDGE – positive response
227+
'\u0007', // BELL – triggers alert
228+
'\u0008', // BACKSPACE – moves cursor back
229+
'\u0009', // CHARACTER TABULATION – horizontal tab
230+
'\u000A', // LINE FEED – line break
231+
'\u000B', // LINE TABULATION – vertical tab
232+
'\u000C', // FORM FEED – page break
233+
'\u000D', // CARRIAGE RETURN – return to line start
234+
'\u000E', // SHIFT OUT – alternate character set
235+
'\u000F', // SHIFT IN – return to standard set
236+
'\u0010', // DATA LINK ESCAPE – protocol framing
237+
'\u0011', // DEVICE CONTROL 1 – device-specific
238+
'\u0012', // DEVICE CONTROL 2 – device-specific
239+
'\u0013', // DEVICE CONTROL 3 – device-specific
240+
'\u0014', // DEVICE CONTROL 4 – device-specific
241+
'\u0015', // NEGATIVE ACKNOWLEDGE – error signal
242+
'\u0016', // SYNCHRONOUS IDLE – timing control
243+
'\u0017', // END OF TRANSMISSION BLOCK – block end
244+
'\u0018', // CANCEL – cancel transmission
245+
'\u0019', // END OF MEDIUM – physical medium end
246+
'\u001A', // SUBSTITUTE – invalid character
247+
'\u001B', // ESCAPE – escape sequence initiator
248+
'\u001C', // FILE SEPARATOR – data structuring
249+
'\u001D', // GROUP SEPARATOR – data structuring
250+
'\u001E', // RECORD SEPARATOR – data structuring
251+
'\u001F', // UNIT SEPARATOR – data structuring
252+
'\u007F', // DELETE – erase character
253+
254+
// Format Characters (Category: Cf)
255+
'\u200B', // ZERO WIDTH SPACE – invisible space
256+
'\u200C', // ZERO WIDTH NON-JOINER – prevents ligature
257+
'\u200D', // ZERO WIDTH JOINER – forces ligature
258+
'\u2060', // WORD JOINER – prevents line break
259+
'\uFEFF', // ZERO WIDTH NO-BREAK SPACE – BOM or NBSP
260+
261+
// Directional Formatting (Category: Cf)
262+
'\u202A', // LEFT-TO-RIGHT EMBEDDING – sets LTR context
263+
'\u202B', // RIGHT-TO-LEFT EMBEDDING – sets RTL context
264+
'\u202C', // POP DIRECTIONAL FORMATTING – ends override
265+
'\u202D', // LEFT-TO-RIGHT OVERRIDE – forces LTR rendering
266+
'\u202E', // RIGHT-TO-LEFT OVERRIDE – forces RTL rendering
267+
'\u2066', // LEFT-TO-RIGHT ISOLATE – isolates LTR segment
268+
'\u2067', // RIGHT-TO-LEFT ISOLATE – isolates RTL segment
269+
'\u2068', // FIRST STRONG ISOLATE – isolates with inferred direction
270+
'\u2069', // POP DIRECTIONAL ISOLATE – ends isolate
271+
272+
// Invisible Separator
273+
'\u2063', // INVISIBLE SEPARATOR – semantic boundary marker
274+
275+
// Common Combining Marks (Category: Mn)
276+
'\u0300', // COMBINING GRAVE ACCENT – diacritic (invisible alone)
277+
'\u0301', // COMBINING ACUTE ACCENT – diacritic (invisible alone)
278+
'\u0302', // COMBINING CIRCUMFLEX ACCENT – diacritic (invisible alone)
279+
'\u0308', // COMBINING DIAERESIS – diacritic (invisible alone)
280+
281+
// Whitespace Characters (non-glyph spacing)
282+
'\u00A0', // NO-BREAK SPACE – non-breaking space
283+
'\u1680', // OGHAM SPACE MARK – special spacing
284+
'\u2000', // EN QUAD – fixed-width space
285+
'\u2001', // EM QUAD – fixed-width space
286+
'\u2002', // EN SPACE – fixed-width space
287+
'\u2003', // EM SPACE – fixed-width space
288+
'\u2004', // THREE-PER-EM SPACE – narrow space
289+
'\u2005', // FOUR-PER-EM SPACE – narrow space
290+
'\u2006', // SIX-PER-EM SPACE – narrow space
291+
'\u2007', // FIGURE SPACE – aligns digits
292+
'\u2008', // PUNCTUATION SPACE – aligns punctuation
293+
'\u2009', // THIN SPACE – narrow space
294+
'\u200A', // HAIR SPACE – ultra-thin space
295+
'\u202F', // NARROW NO-BREAK SPACE – narrow NBSP
296+
'\u205F', // MEDIUM MATHEMATICAL SPACE – math spacing
297+
'\u3000' // IDEOGRAPHIC SPACE – full-width CJK space
298+
];
219299

220300
/// <summary>
221301
/// Add a list of allowable selector characters on top of the default selector characters.
@@ -225,26 +305,23 @@ internal static IEnumerable<char> ControlChars()
225305
/// On top, any custom selector characters added with <see cref="AddCustomSelectorChars"/> are included.
226306
/// <para/>
227307
/// When <see cref="SelectorCharFilter"/> is <see langword="false"/>, all Unicode characters are allowed in a selector,
228-
/// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back
229-
/// using <see cref="AddCustomSelectorChars"/>.
308+
/// except 68 non-visual characters. Excluded characters can be added back using <see cref="AddCustomSelectorChars"/>.
230309
/// <para/>
231310
/// Operator chars and selector chars must be different.
232311
/// </summary>
233312
public void AddCustomSelectorChars(IList<char> characters)
234313
{
235-
var controlChars = ControlChars().ToList();
236-
237314
foreach (var c in characters)
238315
{
239316
// Explicitly disallow certain characters
240317
if (SelectorDelimitingChars.Contains(c) || c == CharLiteralEscapeChar
241318
|| OperatorChars.Contains(c) || CustomOperatorChars.Contains(c))
242319
throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character.");
243320

244-
if (controlChars.Contains(c))
321+
if (NonVisualUnicodeCharacters.Contains(c))
245322
_customSelectorChars.Add(c);
246323

247-
if (SelectorCharFilter == FilterType.Allowlist && !(StandardAllowlist.Contains(c) || _customSelectorChars.Contains(c))) _customSelectorChars.Add(c);
324+
if (SelectorCharFilter == SelectorFilterType.Alphanumeric && !(StandardAllowlist.Contains(c) || _customSelectorChars.Contains(c))) _customSelectorChars.Add(c);
248325
}
249326
}
250327

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//
2+
// Copyright SmartFormat Project maintainers and contributors.
3+
// Licensed under the MIT license.
4+
5+
namespace SmartFormat.Core.Settings;
6+
7+
/// <summary>
8+
/// Determines the filter type for allowed or disallowed characters.
9+
/// </summary>
10+
public enum SelectorFilterType
11+
{
12+
/// <summary>
13+
/// Use a list of characters that are allowed. The default characters are<br/>
14+
/// alphanumeric characters (upper and lower case), plus '_' and '-'.<br/>
15+
/// </summary>
16+
Alphanumeric,
17+
18+
/// <summary>
19+
/// All Unicode characters are allowed in a selector, except 68 non-visual characters:
20+
/// Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf),
21+
/// Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn),
22+
/// Whitespace Characters (non-glyph spacing).
23+
/// <para/>
24+
/// {}[]()\.? are characters with special functions that are never allowed.
25+
/// </summary>
26+
VisualUnicodeChars
27+
}

0 commit comments

Comments
 (0)