Skip to content

Commit eb569f9

Browse files
committed
perf: optimize AbstractFormatter using StringBuilder and static Regex
- Replace string concatenation with StringBuilder to reduce GC pressure - Use statically compiled Regex to avoid recompilation overhead - Implement O(1) TrimEnd by direct StringBuilder.Length adjustment Benchmark (100 runs): - Mean: 76.09ms -> 69.46ms (-8.7%) - Allocated: 70.7MB -> 67.5MB (-4.6%) - Gen 0 GC: 11,333 -> 10,666 (-5.9%)
1 parent 4296230 commit eb569f9

1 file changed

Lines changed: 111 additions & 94 deletions

File tree

Lines changed: 111 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Text;
34
using System.Text.RegularExpressions;
45
using SQL.Formatter.Core.Util;
56
using SQL.Formatter.Language;
@@ -16,6 +17,19 @@ public class AbstractFormatter : IDialectConfigurator
1617
private JSLikeList<Token> _tokens;
1718
private int _index;
1819

20+
// OPTIMIZATION: Compile the regex once statically to prevent recompilation overhead
21+
private static readonly Regex s_whitespaceRegex = new Regex(@"\s+", RegexOptions.Compiled);
22+
23+
private static readonly HashSet<TokenTypes> s_preserveWhitespaceFor =
24+
new HashSet<TokenTypes> {
25+
TokenTypes.OPEN_PAREN,
26+
TokenTypes.LINE_COMMENT,
27+
TokenTypes.OPERATOR,
28+
TokenTypes.RESERVED_NEWLINE
29+
};
30+
31+
public Func<DialectConfig> _doDialectConfigFunc;
32+
1933
public AbstractFormatter(FormatConfig cfg)
2034
{
2135
_cfg = cfg;
@@ -41,152 +55,149 @@ protected virtual Token TokenOverride(Token token)
4155
public string Format(string query)
4256
{
4357
_tokens = Tokenizer().Tokenize(query);
44-
var formattedQuery = GetFormattedQueryFromTokens();
45-
46-
return formattedQuery.Trim();
58+
return GetFormattedQueryFromTokens().Trim();
4759
}
4860

4961
private string GetFormattedQueryFromTokens()
5062
{
51-
var formattedQuery = string.Empty;
63+
// OPTIMIZATION: Use a StringBuilder initialized with an estimated capacity to prevent resizing.
64+
// If you know average query sizes, set this capacity accordingly.
65+
var formattedQuery = new StringBuilder(1024);
5266

5367
var index = -1;
5468
foreach (Token t in _tokens)
5569
{
5670
_index = ++index;
57-
5871
var token = TokenOverride(t);
5972

6073
if (token.Type == TokenTypes.LINE_COMMENT)
6174
{
62-
formattedQuery = FormatLineComment(token, formattedQuery);
75+
FormatLineComment(token, formattedQuery);
6376
}
6477
else if (token.Type == TokenTypes.BLOCK_COMMENT)
6578
{
66-
formattedQuery = FormatBlockComment(token, formattedQuery);
79+
FormatBlockComment(token, formattedQuery);
6780
}
6881
else if (token.Type == TokenTypes.RESERVED_TOP_LEVEL)
6982
{
70-
formattedQuery = FormatToplevelReservedWord(token, formattedQuery);
83+
FormatToplevelReservedWord(token, formattedQuery);
7184
_previousReservedToken = token;
7285
}
7386
else if (token.Type == TokenTypes.RESERVED_TOP_LEVEL_NO_INDENT)
7487
{
75-
formattedQuery = FormatTopLevelReservedWordNoIndent(token, formattedQuery);
88+
FormatTopLevelReservedWordNoIndent(token, formattedQuery);
7689
_previousReservedToken = token;
7790
}
7891
else if (token.Type == TokenTypes.RESERVED_NEWLINE)
7992
{
80-
formattedQuery = FormatNewlineReservedWord(token, formattedQuery);
93+
FormatNewlineReservedWord(token, formattedQuery);
8194
_previousReservedToken = token;
8295
}
8396
else if (token.Type == TokenTypes.RESERVED)
8497
{
85-
formattedQuery = FormatWithSpaces(token, formattedQuery);
98+
FormatWithSpaces(token, formattedQuery);
8699
_previousReservedToken = token;
87100
}
88101
else if (token.Type == TokenTypes.OPEN_PAREN)
89102
{
90-
formattedQuery = FormatOpeningParentheses(token, formattedQuery);
103+
FormatOpeningParentheses(token, formattedQuery);
91104
}
92105
else if (token.Type == TokenTypes.CLOSE_PAREN)
93106
{
94-
formattedQuery = FormatClosingParentheses(token, formattedQuery);
107+
FormatClosingParentheses(token, formattedQuery);
95108
}
96109
else if (token.Type == TokenTypes.PLACEHOLDER)
97110
{
98-
formattedQuery = FormatPlaceholder(token, formattedQuery);
111+
FormatPlaceholder(token, formattedQuery);
99112
}
100113
else if (token.Value.Equals(","))
101114
{
102-
formattedQuery = FormatComma(token, formattedQuery);
115+
FormatComma(token, formattedQuery);
103116
}
104117
else if (token.Value.Equals(":"))
105118
{
106-
formattedQuery = FormatWithSpaceAfter(token, formattedQuery);
119+
FormatWithSpaceAfter(token, formattedQuery);
107120
}
108121
else if (token.Value.Equals("."))
109122
{
110-
formattedQuery = FormatWithoutSpaces(token, formattedQuery);
123+
FormatWithoutSpaces(token, formattedQuery);
111124
}
112125
else if (token.Value.Equals(";"))
113126
{
114-
formattedQuery = FormatQuerySeparator(token, formattedQuery);
127+
FormatQuerySeparator(token, formattedQuery);
115128
}
116129
else
117130
{
118-
formattedQuery = FormatWithSpaces(token, formattedQuery);
131+
FormatWithSpaces(token, formattedQuery);
119132
}
120133
}
121134

122-
return formattedQuery;
135+
return formattedQuery.ToString();
123136
}
124137

125-
protected virtual string FormatLineComment(Token token, string query)
138+
protected virtual void FormatLineComment(Token token, StringBuilder query)
126139
{
127-
return AddNewline(query + Show(token));
140+
query.Append(Show(token));
141+
AddNewline(query);
128142
}
129143

130-
protected virtual string FormatBlockComment(Token token, string query)
144+
protected virtual void FormatBlockComment(Token token, StringBuilder query)
131145
{
132-
return AddNewline(AddNewline(query) + IndentComment(token.Value));
146+
AddNewline(query);
147+
query.Append(IndentComment(token.Value));
148+
AddNewline(query);
133149
}
134150

135151
protected virtual string IndentComment(string comment)
136152
{
137153
return comment.Replace("\n", "\n" + _indentation.GetIndent());
138154
}
139155

140-
protected virtual string FormatTopLevelReservedWordNoIndent(Token token, string query)
156+
protected virtual void FormatTopLevelReservedWordNoIndent(Token token, StringBuilder query)
141157
{
142158
_indentation.DecreaseTopLevel();
143-
query = AddNewline(query) + EqualizeWhitespace(Show(token));
144-
return AddNewline(query);
159+
AddNewline(query);
160+
query.Append(EqualizeWhitespace(Show(token)));
161+
AddNewline(query);
145162
}
146163

147-
protected virtual string FormatToplevelReservedWord(Token token, string query)
164+
protected virtual void FormatToplevelReservedWord(Token token, StringBuilder query)
148165
{
149166
_indentation.DecreaseTopLevel();
150-
151-
query = AddNewline(query);
152-
167+
AddNewline(query);
153168
_indentation.IncreaseTopLevel();
154169

155-
query += EqualizeWhitespace(Show(token));
156-
return AddNewline(query);
170+
query.Append(EqualizeWhitespace(Show(token)));
171+
AddNewline(query);
157172
}
158173

159-
protected virtual string FormatNewlineReservedWord(Token token, string query)
174+
protected virtual void FormatNewlineReservedWord(Token token, StringBuilder query)
160175
{
161176
if (Token.IsAnd(token) && Token.IsBetween(TokenLookBehind(2)))
162177
{
163-
return FormatWithSpaces(token, query);
178+
FormatWithSpaces(token, query);
179+
return;
164180
}
165181

166-
return AddNewline(query) + EqualizeWhitespace(Show(token)) + " ";
182+
AddNewline(query);
183+
query.Append(EqualizeWhitespace(Show(token))).Append(" ");
167184
}
168185

169186
protected static string EqualizeWhitespace(string str)
170187
{
171-
return Regex.Replace(str, @"\s+", " ");
188+
// Uses the statically compiled regex
189+
return s_whitespaceRegex.Replace(str, " ");
172190
}
173191

174-
private static readonly HashSet<TokenTypes> s_preserveWhitespaceFor =
175-
new HashSet<TokenTypes> {
176-
TokenTypes.OPEN_PAREN,
177-
TokenTypes.LINE_COMMENT,
178-
TokenTypes.OPERATOR,
179-
TokenTypes.RESERVED_NEWLINE};
180-
181-
protected virtual string FormatOpeningParentheses(Token token, string query)
192+
protected virtual void FormatOpeningParentheses(Token token, StringBuilder query)
182193
{
183194
if (string.IsNullOrEmpty(token.WhitespaceBefore)
184195
&& (TokenLookBehind() == default || !s_preserveWhitespaceFor.Contains(TokenLookBehind().Type)))
185196
{
186-
query = query.TrimEnd();
197+
TrimEnd(query);
187198
}
188199

189-
query += Show(token);
200+
query.Append(Show(token));
190201

191202
_inlineBlock.BeginIfPossible(_tokens, _index);
192203

@@ -195,65 +206,78 @@ protected virtual string FormatOpeningParentheses(Token token, string query)
195206
_indentation.IncreaseBlockLevel();
196207
if (!_cfg.SkipWhitespaceNearBlockParentheses)
197208
{
198-
query = AddNewline(query);
209+
AddNewline(query);
199210
}
200211
}
201-
202-
return query;
203212
}
204213

205-
protected virtual string FormatClosingParentheses(Token token, string query)
214+
protected virtual void FormatClosingParentheses(Token token, StringBuilder query)
206215
{
207216
if (_inlineBlock.IsActive())
208217
{
209218
_inlineBlock.End();
210-
return FormatWithSpaceAfter(token, query);
219+
FormatWithSpaceAfter(token, query);
211220
}
212221
else
213222
{
214223
_indentation.DecreaseBlockLevel();
215224

216225
if (!_cfg.SkipWhitespaceNearBlockParentheses)
217226
{
218-
return FormatWithSpaces(token, AddNewline(query));
227+
AddNewline(query);
228+
FormatWithSpaces(token, query);
229+
}
230+
else
231+
{
232+
FormatWithoutSpaces(token, query);
219233
}
220-
221-
return FormatWithoutSpaces(token, query);
222234
}
223235
}
224236

225-
protected virtual string FormatPlaceholder(Token token, string query)
237+
protected virtual void FormatPlaceholder(Token token, StringBuilder query)
226238
{
227-
return query + _parameters.Get(token) + " ";
239+
query.Append(_parameters.Get(token)).Append(" ");
228240
}
229241

230-
protected virtual string FormatComma(Token token, string query)
242+
protected virtual void FormatComma(Token token, StringBuilder query)
231243
{
232-
query = query.TrimEnd() + Show(token) + " ";
233-
return _inlineBlock.IsActive() || Token.IsLimit(_previousReservedToken) ? query : AddNewline(query);
244+
TrimEnd(query);
245+
query.Append(Show(token)).Append(" ");
246+
247+
if (!_inlineBlock.IsActive() && !Token.IsLimit(_previousReservedToken))
248+
{
249+
AddNewline(query);
250+
}
234251
}
235252

236-
protected virtual string FormatWithSpaceAfter(Token token, string query)
253+
protected virtual void FormatWithSpaceAfter(Token token, StringBuilder query)
237254
{
238-
return query.TrimEnd() + Show(token) + " ";
255+
TrimEnd(query);
256+
query.Append(Show(token)).Append(" ");
239257
}
240258

241-
protected virtual string FormatWithoutSpaces(Token token, string query)
259+
protected virtual void FormatWithoutSpaces(Token token, StringBuilder query)
242260
{
243-
return query.TrimEnd() + Show(token);
261+
TrimEnd(query);
262+
query.Append(Show(token));
244263
}
245264

246-
protected virtual string FormatWithSpaces(Token token, string query)
265+
protected virtual void FormatWithSpaces(Token token, StringBuilder query)
247266
{
248-
return query + Show(token) + " ";
267+
query.Append(Show(token)).Append(" ");
249268
}
250269

251-
protected virtual string FormatQuerySeparator(Token token, string query)
270+
protected virtual void FormatQuerySeparator(Token token, StringBuilder query)
252271
{
253272
_indentation.ResetIndentation();
254-
return query.TrimEnd()
255-
+ Show(token)
256-
+ Utils.Repeat("\n", _cfg.LinesBetweenQueries == default ? 1 : _cfg.LinesBetweenQueries);
273+
TrimEnd(query);
274+
query.Append(Show(token));
275+
276+
var lines = _cfg.LinesBetweenQueries == default ? 1 : _cfg.LinesBetweenQueries;
277+
for (var i = 0; i < lines; i++)
278+
{
279+
query.Append('\n');
280+
}
257281
}
258282

259283
protected virtual string Show(Token token)
@@ -266,49 +290,42 @@ protected virtual string Show(Token token)
266290
|| token.Type == TokenTypes.OPEN_PAREN
267291
|| token.Type == TokenTypes.CLOSE_PAREN))
268292
{
293+
// Note: If memory is still tight, caching upper-case values at the token generation stage is even better.
269294
return token.Value.ToUpper();
270295
}
271296

272297
return token.Value;
273298
}
274299

275-
protected virtual string AddNewline(string query)
300+
protected virtual void AddNewline(StringBuilder query)
276301
{
277-
query = query.TrimEnd();
278-
if (!query.EndsWith("\n"))
302+
TrimEnd(query);
303+
// Replaces expensive .EndsWith("\n") with a fast char index lookup
304+
if (query.Length == 0 || query[query.Length - 1] != '\n')
279305
{
280-
query += "\n";
306+
query.Append('\n');
281307
}
282308

283-
return query + _indentation.GetIndent();
284-
}
285-
286-
protected Token TokenLookBehind()
287-
{
288-
return TokenLookBehind(1);
309+
query.Append(_indentation.GetIndent());
289310
}
290311

291-
protected Token TokenLookBehind(int n)
312+
// OPTIMIZATION: Extremely fast inline trailing whitespace removal
313+
protected void TrimEnd(StringBuilder sb)
292314
{
293-
return _tokens.Get(_index - n);
294-
}
295-
296-
protected Token TokenLookAhead()
297-
{
298-
return TokenLookAhead(1);
315+
while (sb.Length > 0 && char.IsWhiteSpace(sb[sb.Length - 1]))
316+
{
317+
sb.Length--;
318+
}
299319
}
300320

301-
protected Token TokenLookAhead(int n)
302-
{
303-
return _tokens.Get(_index + n);
304-
305-
}
321+
protected Token TokenLookBehind() => TokenLookBehind(1);
322+
protected Token TokenLookBehind(int n) => _tokens.Get(_index - n);
323+
protected Token TokenLookAhead() => TokenLookAhead(1);
324+
protected Token TokenLookAhead(int n) => _tokens.Get(_index + n);
306325

307326
public virtual DialectConfig DoDialectConfig()
308327
{
309328
return _doDialectConfigFunc.Invoke();
310329
}
311-
312-
public Func<DialectConfig> _doDialectConfigFunc;
313330
}
314331
}

0 commit comments

Comments
 (0)