Skip to content

Commit c8bed6b

Browse files
committed
perf: optimize tokenizer memory by removing FirstNotnull and closures
Improve memory efficiency by replacing the functional FirstNotnull utility with direct method calls. This eliminates implicit heap allocations from params arrays, delegate objects, and closure captures during the tokenization process. Benchmark (RepeatCount: 100): - Mean: 67.62ms -> 62.09ms (-8.2%) - Allocated: 62.02MB -> 48.86MB (-21.2%) - Gen 0 GC: 10,000.00 -> 7,666.67 (-23.3%)
1 parent 722be56 commit c8bed6b

3 files changed

Lines changed: 111 additions & 50 deletions

File tree

SQL.Formatter/Core/AbstractFormatter.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ protected static string EqualizeWhitespace(string str)
188188
protected virtual void FormatOpeningParentheses(Token token, StringBuilder query)
189189
{
190190
if (string.IsNullOrEmpty(token.WhitespaceBefore)
191-
&& (TokenLookBehind() == default || !s_preserveWhitespaceFor.Contains(TokenLookBehind().Type)))
191+
&& (TokenLookBehind() == null || !s_preserveWhitespaceFor.Contains(TokenLookBehind().Type)))
192192
{
193193
TrimEnd(query);
194194
}
@@ -269,7 +269,7 @@ protected virtual void FormatQuerySeparator(Token token, StringBuilder query)
269269
TrimEnd(query);
270270
query.Append(Show(token));
271271

272-
var lines = _cfg.LinesBetweenQueries == default ? 1 : _cfg.LinesBetweenQueries;
272+
var lines = _cfg.LinesBetweenQueries == 0 ? 1 : _cfg.LinesBetweenQueries;
273273
for (var i = 0; i < lines; i++)
274274
{
275275
query.Append('\n');

SQL.Formatter/Core/Tokenizer.cs

Lines changed: 108 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
1+
using System.Collections.Generic;
32
using System.Text.RegularExpressions;
43
using SQL.Formatter.Core.Util;
54

@@ -89,7 +88,7 @@ public Tokenizer(DialectConfig cfg)
8988
public JSLikeList<Token> Tokenize(string input)
9089
{
9190
var tokens = new List<Token>();
92-
Token previousToken = default;
91+
Token previousToken = null;
9392
var currentIndex = 0;
9493

9594
while (currentIndex < input.Length)
@@ -121,23 +120,94 @@ public JSLikeList<Token> Tokenize(string input)
121120

122121
private Token GetNextToken(string input, int offset, Token previousToken)
123122
{
124-
return Utils.FirstNotnull(
125-
() => GetCommentToken(input, offset),
126-
() => GetStringToken(input, offset),
127-
() => GetOpenParenToken(input, offset),
128-
() => GetCloseParenToken(input, offset),
129-
() => GetPlaceholderToken(input, offset),
130-
() => GetNumberToken(input, offset),
131-
() => GetReservedWordToken(input, offset, previousToken),
132-
() => GetWordToken(input, offset),
133-
() => GetOperatorToken(input, offset));
123+
var token = GetCommentToken(input, offset);
124+
if (token != null)
125+
{
126+
return token;
127+
}
128+
129+
token = GetStringToken(input, offset);
130+
if (token != null)
131+
{
132+
return token;
133+
}
134+
135+
token = GetOpenParenToken(input, offset);
136+
if (token != null)
137+
{
138+
return token;
139+
}
140+
141+
token = GetCloseParenToken(input, offset);
142+
if (token != null)
143+
{
144+
return token;
145+
}
146+
147+
token = GetPlaceholderToken(input, offset);
148+
if (token != null)
149+
{
150+
return token;
151+
}
152+
153+
token = GetNumberToken(input, offset);
154+
if (token != null)
155+
{
156+
return token;
157+
}
158+
159+
token = GetReservedWordToken(input, offset, previousToken);
160+
if (token != null)
161+
{
162+
return token;
163+
}
164+
165+
token = GetWordToken(input, offset);
166+
if (token != null)
167+
{
168+
return token;
169+
}
170+
171+
return GetOperatorToken(input, offset);
134172
}
135173

136174
private Token GetCommentToken(string input, int offset)
137175
{
138-
return Utils.FirstNotnull(
139-
() => GetTokenOnFirstMatch(input, offset, TokenTypes.LINE_COMMENT, _lineCommentPattern),
140-
() => GetTokenOnFirstMatch(input, offset, TokenTypes.BLOCK_COMMENT, _blockCommentPattern));
176+
var token = GetTokenOnFirstMatch(input, offset, TokenTypes.LINE_COMMENT, _lineCommentPattern);
177+
if (token != null)
178+
{
179+
return token;
180+
}
181+
182+
return GetTokenOnFirstMatch(input, offset, TokenTypes.BLOCK_COMMENT, _blockCommentPattern);
183+
}
184+
185+
private Token GetReservedWordToken(string input, int offset, Token previousToken)
186+
{
187+
if (previousToken?.Value == ".")
188+
{
189+
return null;
190+
}
191+
192+
var token = GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED_TOP_LEVEL, _reservedTopLevelPattern);
193+
if (token != null)
194+
{
195+
return token;
196+
}
197+
198+
token = GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED_NEWLINE, _reservedNewLinePattern);
199+
if (token != null)
200+
{
201+
return token;
202+
}
203+
204+
token = GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED_TOP_LEVEL_NO_INDENT, _reservedTopLevelNoIndentPattern);
205+
if (token != null)
206+
{
207+
return token;
208+
}
209+
210+
return GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED, _reservedPlainPattern);
141211
}
142212

143213
private Token GetStringToken(string input, int offset) =>
@@ -151,19 +221,27 @@ private Token GetCloseParenToken(string input, int offset) =>
151221

152222
private Token GetPlaceholderToken(string input, int offset)
153223
{
154-
return Utils.FirstNotnull(
155-
() => GetPlaceholderTokenWithKey(input, offset, _indentNamedPlaceholderPattern, v => v.Substring(1)),
156-
() => GetPlaceholderTokenWithKey(input, offset, _stringNamedPlaceholderPattern, v =>
157-
{
158-
return GetEscapedPlaceholderKey(v.Substring(2, v.Length - 3), v.Substring(v.Length - 1));
159-
}),
160-
() => GetPlaceholderTokenWithKey(input, offset, _indexedPlaceholderPattern, v => v.Substring(1)));
161-
}
224+
var token = GetTokenOnFirstMatch(input, offset, TokenTypes.PLACEHOLDER, _indentNamedPlaceholderPattern);
225+
if (token != null)
226+
{
227+
return token.WithKey(token.Value.Substring(1));
228+
}
162229

163-
private static Token GetPlaceholderTokenWithKey(string input, int offset, Regex regex, Func<string, string> parseKey)
164-
{
165-
var token = GetTokenOnFirstMatch(input, offset, TokenTypes.PLACEHOLDER, regex);
166-
return token?.WithKey(parseKey.Invoke(token.Value));
230+
token = GetTokenOnFirstMatch(input, offset, TokenTypes.PLACEHOLDER, _stringNamedPlaceholderPattern);
231+
if (token != null)
232+
{
233+
var v = token.Value;
234+
var key = GetEscapedPlaceholderKey(v.Substring(2, v.Length - 3), v.Substring(v.Length - 1));
235+
return token.WithKey(key);
236+
}
237+
238+
token = GetTokenOnFirstMatch(input, offset, TokenTypes.PLACEHOLDER, _indexedPlaceholderPattern);
239+
if (token != null)
240+
{
241+
return token.WithKey(token.Value.Substring(1));
242+
}
243+
244+
return null;
167245
}
168246

169247
private static string GetEscapedPlaceholderKey(string key, string quoteChar) =>
@@ -175,25 +253,14 @@ private Token GetNumberToken(string input, int offset) =>
175253
private Token GetOperatorToken(string input, int offset) =>
176254
GetTokenOnFirstMatch(input, offset, TokenTypes.OPERATOR, _operatorPattern);
177255

178-
private Token GetReservedWordToken(string input, int offset, Token previousToken)
179-
{
180-
return previousToken?.Value == "."
181-
? default
182-
: Utils.FirstNotnull(
183-
() => GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED_TOP_LEVEL, _reservedTopLevelPattern),
184-
() => GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED_NEWLINE, _reservedNewLinePattern),
185-
() => GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED_TOP_LEVEL_NO_INDENT, _reservedTopLevelNoIndentPattern),
186-
() => GetTokenOnFirstMatch(input, offset, TokenTypes.RESERVED, _reservedPlainPattern));
187-
}
188-
189256
private Token GetWordToken(string input, int offset) =>
190257
GetTokenOnFirstMatch(input, offset, TokenTypes.WORD, _wordPattern);
191258

192259
private static Token GetTokenOnFirstMatch(string input, int offset, TokenTypes type, Regex regex)
193260
{
194261
if (regex == null)
195262
{
196-
return default;
263+
return null;
197264
}
198265

199266
var match = regex.Match(input, offset, input.Length - offset);
@@ -203,7 +270,7 @@ private static Token GetTokenOnFirstMatch(string input, int offset, TokenTypes t
203270
return new Token(type, match.Value);
204271
}
205272

206-
return default;
273+
return null;
207274
}
208275
}
209276
}

SQL.Formatter/Core/Util/Utils.cs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
1+
using System.Collections.Generic;
32
using System.Linq;
43

54
namespace SQL.Formatter.Core.Util
@@ -11,11 +10,6 @@ public static List<T> NullToEmpty<T>(List<T> list)
1110
return list ?? new List<T>();
1211
}
1312

14-
public static R FirstNotnull<R>(params Func<R>[] suppliers) where R : class
15-
{
16-
return suppliers.FirstOrDefault(supplier => supplier() != null)?.Invoke();
17-
}
18-
1913
public static string Repeat(string s, int n)
2014
{
2115
return string.Concat(Enumerable.Repeat(s, n));

0 commit comments

Comments
 (0)