Skip to content

Commit 5db0f04

Browse files
committed
perf: reduce memory allocation in Tokenizer
Avoid unnecessary string allocations during tokenization by switching from string slicing to index-based tracking. Benchmark (100 runs): Mean: 68.48ms -> 66.88ms (-2.3%) Allocated: 67.50MB -> 61.99MB (-8.2%) Gen 0 GC: 10,666.67 -> 10,000.00 (-6.3%)
1 parent 1fd98de commit 5db0f04

2 files changed

Lines changed: 127 additions & 181 deletions

File tree

SQL.Formatter/Core/Token.cs

Lines changed: 42 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
using System;
2-
using System.Text.RegularExpressions;
1+
using System.Text.RegularExpressions;
32

43
namespace SQL.Formatter.Core
54
{
@@ -8,44 +7,52 @@ public class Token
87
public readonly TokenTypes Type;
98
public readonly string Value;
109
public readonly string Regex;
11-
public readonly string WhitespaceBefore;
1210
public readonly string Key;
1311

14-
public Token(TokenTypes type, string value, string regex, string whitespaceBefore, string key)
12+
private readonly string _input;
13+
public readonly int WhitespaceStart;
14+
public readonly int WhitespaceLength;
15+
16+
public string WhitespaceBefore =>
17+
(_input != null && WhitespaceLength > 0)
18+
? _input.Substring(WhitespaceStart, WhitespaceLength)
19+
: string.Empty;
20+
21+
public Token(
22+
TokenTypes type,
23+
string value,
24+
string regex = null,
25+
string key = null,
26+
string input = null,
27+
int wsStart = 0,
28+
int wsLen = 0)
1529
{
1630
Type = type;
1731
Value = value;
1832
Regex = regex;
19-
WhitespaceBefore = whitespaceBefore;
2033
Key = key;
34+
_input = input;
35+
WhitespaceStart = wsStart;
36+
WhitespaceLength = wsLen;
2137
}
2238

23-
public Token(TokenTypes type, string value, string regex, string whitespaceBefore)
24-
: this(type, value, regex, whitespaceBefore, null) { }
25-
26-
public Token(TokenTypes type, string value, string regex)
27-
: this(type, value, regex, null) { }
28-
29-
public Token(TokenTypes type, string value)
30-
: this(type, value, null) { }
31-
32-
public Token WithWhitespaceBefore(string whitespaceBefore)
39+
public Token WithWhitespace(string input, int start, int length)
3340
{
34-
return new Token(Type, Value, Regex, whitespaceBefore, Key);
41+
return new Token(Type, Value, Regex, Key, input, start, length);
3542
}
3643

3744
public Token WithKey(string key)
3845
{
39-
return new Token(Type, Value, Regex, WhitespaceBefore, key);
46+
return new Token(Type, Value, Regex, key, _input, WhitespaceStart, WhitespaceLength);
4047
}
4148

4249
public override string ToString()
4350
{
44-
return "type: " + Type + ", value: [" + Value + "], regex: /" + Regex + "/, key:" + Key;
51+
return $"type: {Type}, value: [{Value}], regex: /{Regex}/, key: {Key}";
4552
}
4653

4754
private static readonly Regex s_and =
48-
new Regex("^AND$", RegexOptions.IgnoreCase);
55+
new Regex("^AND$", RegexOptions.IgnoreCase);
4956
private static readonly Regex s_between =
5057
new Regex("^BETWEEN$", RegexOptions.IgnoreCase);
5158
private static readonly Regex s_limit =
@@ -59,44 +66,30 @@ public override string ToString()
5966
private static readonly Regex s_end =
6067
new Regex("^END$", RegexOptions.IgnoreCase);
6168

62-
private static Func<Token, bool> IsToken(TokenTypes type, Regex regex)
69+
private static bool IsToken(Token token, TokenTypes type, Regex regex)
6370
{
64-
return token => token?.Type == type && regex.IsMatch(token.Value);
71+
return token != null && token.Type == type && regex.IsMatch(token.Value);
6572
}
6673

67-
public static bool IsAnd(Token token)
68-
{
69-
return IsToken(TokenTypes.RESERVED_NEWLINE, s_and).Invoke(token);
70-
}
74+
public static bool IsAnd(Token token) =>
75+
IsToken(token, TokenTypes.RESERVED_NEWLINE, s_and);
7176

72-
public static bool IsBetween(Token token)
73-
{
74-
return IsToken(TokenTypes.RESERVED, s_between).Invoke(token);
75-
}
77+
public static bool IsBetween(Token token) =>
78+
IsToken(token, TokenTypes.RESERVED, s_between);
7679

77-
public static bool IsLimit(Token token)
78-
{
79-
return IsToken(TokenTypes.RESERVED_TOP_LEVEL, s_limit).Invoke(token);
80-
}
80+
public static bool IsLimit(Token token) =>
81+
IsToken(token, TokenTypes.RESERVED_TOP_LEVEL, s_limit);
8182

82-
public static bool IsSet(Token token)
83-
{
84-
return IsToken(TokenTypes.RESERVED_TOP_LEVEL, s_set).Invoke(token);
85-
}
83+
public static bool IsSet(Token token) =>
84+
IsToken(token, TokenTypes.RESERVED_TOP_LEVEL, s_set);
8685

87-
public static bool IsBy(Token token)
88-
{
89-
return IsToken(TokenTypes.RESERVED, s_by).Invoke(token);
90-
}
86+
public static bool IsBy(Token token) =>
87+
IsToken(token, TokenTypes.RESERVED, s_by);
9188

92-
public static bool IsWindow(Token token)
93-
{
94-
return IsToken(TokenTypes.RESERVED_TOP_LEVEL, s_window).Invoke(token);
95-
}
89+
public static bool IsWindow(Token token) =>
90+
IsToken(token, TokenTypes.RESERVED_TOP_LEVEL, s_window);
9691

97-
public static bool IsEnd(Token token)
98-
{
99-
return IsToken(TokenTypes.CLOSE_PAREN, s_end).Invoke(token);
100-
}
92+
public static bool IsEnd(Token token) =>
93+
IsToken(token, TokenTypes.CLOSE_PAREN, s_end);
10194
}
10295
}

0 commit comments

Comments
 (0)