11using System ;
22using System . Collections . Generic ;
3+ using System . Text ;
34using System . Text . RegularExpressions ;
45using SQL . Formatter . Core . Util ;
56using SQL . Formatter . Language ;
@@ -16,6 +17,19 @@ public class AbstractFormatter : IDialectConfigurator
1617 private JSLikeList < Token > _tokens ;
1718 private int _index ;
1819
20+ // OPTIMIZATION: Compile the regex once statically to prevent recompilation overhead
21+ private static readonly Regex s_whitespaceRegex = new Regex ( @"\s+" , RegexOptions . Compiled ) ;
22+
23+ private static readonly HashSet < TokenTypes > s_preserveWhitespaceFor =
24+ new HashSet < TokenTypes > {
25+ TokenTypes . OPEN_PAREN ,
26+ TokenTypes . LINE_COMMENT ,
27+ TokenTypes . OPERATOR ,
28+ TokenTypes . RESERVED_NEWLINE
29+ } ;
30+
31+ public Func < DialectConfig > _doDialectConfigFunc ;
32+
1933 public AbstractFormatter ( FormatConfig cfg )
2034 {
2135 _cfg = cfg ;
@@ -41,152 +55,149 @@ protected virtual Token TokenOverride(Token token)
4155 public string Format ( string query )
4256 {
4357 _tokens = Tokenizer ( ) . Tokenize ( query ) ;
44- var formattedQuery = GetFormattedQueryFromTokens ( ) ;
45-
46- return formattedQuery . Trim ( ) ;
58+ return GetFormattedQueryFromTokens ( ) . Trim ( ) ;
4759 }
4860
4961 private string GetFormattedQueryFromTokens ( )
5062 {
51- var formattedQuery = string . Empty ;
63+ // OPTIMIZATION: Use a StringBuilder initialized with an estimated capacity to prevent resizing.
64+ // If you know average query sizes, set this capacity accordingly.
65+ var formattedQuery = new StringBuilder ( 1024 ) ;
5266
5367 var index = - 1 ;
5468 foreach ( Token t in _tokens )
5569 {
5670 _index = ++ index ;
57-
5871 var token = TokenOverride ( t ) ;
5972
6073 if ( token . Type == TokenTypes . LINE_COMMENT )
6174 {
62- formattedQuery = FormatLineComment ( token , formattedQuery ) ;
75+ FormatLineComment ( token , formattedQuery ) ;
6376 }
6477 else if ( token . Type == TokenTypes . BLOCK_COMMENT )
6578 {
66- formattedQuery = FormatBlockComment ( token , formattedQuery ) ;
79+ FormatBlockComment ( token , formattedQuery ) ;
6780 }
6881 else if ( token . Type == TokenTypes . RESERVED_TOP_LEVEL )
6982 {
70- formattedQuery = FormatToplevelReservedWord ( token , formattedQuery ) ;
83+ FormatToplevelReservedWord ( token , formattedQuery ) ;
7184 _previousReservedToken = token ;
7285 }
7386 else if ( token . Type == TokenTypes . RESERVED_TOP_LEVEL_NO_INDENT )
7487 {
75- formattedQuery = FormatTopLevelReservedWordNoIndent ( token , formattedQuery ) ;
88+ FormatTopLevelReservedWordNoIndent ( token , formattedQuery ) ;
7689 _previousReservedToken = token ;
7790 }
7891 else if ( token . Type == TokenTypes . RESERVED_NEWLINE )
7992 {
80- formattedQuery = FormatNewlineReservedWord ( token , formattedQuery ) ;
93+ FormatNewlineReservedWord ( token , formattedQuery ) ;
8194 _previousReservedToken = token ;
8295 }
8396 else if ( token . Type == TokenTypes . RESERVED )
8497 {
85- formattedQuery = FormatWithSpaces ( token , formattedQuery ) ;
98+ FormatWithSpaces ( token , formattedQuery ) ;
8699 _previousReservedToken = token ;
87100 }
88101 else if ( token . Type == TokenTypes . OPEN_PAREN )
89102 {
90- formattedQuery = FormatOpeningParentheses ( token , formattedQuery ) ;
103+ FormatOpeningParentheses ( token , formattedQuery ) ;
91104 }
92105 else if ( token . Type == TokenTypes . CLOSE_PAREN )
93106 {
94- formattedQuery = FormatClosingParentheses ( token , formattedQuery ) ;
107+ FormatClosingParentheses ( token , formattedQuery ) ;
95108 }
96109 else if ( token . Type == TokenTypes . PLACEHOLDER )
97110 {
98- formattedQuery = FormatPlaceholder ( token , formattedQuery ) ;
111+ FormatPlaceholder ( token , formattedQuery ) ;
99112 }
100113 else if ( token . Value . Equals ( "," ) )
101114 {
102- formattedQuery = FormatComma ( token , formattedQuery ) ;
115+ FormatComma ( token , formattedQuery ) ;
103116 }
104117 else if ( token . Value . Equals ( ":" ) )
105118 {
106- formattedQuery = FormatWithSpaceAfter ( token , formattedQuery ) ;
119+ FormatWithSpaceAfter ( token , formattedQuery ) ;
107120 }
108121 else if ( token . Value . Equals ( "." ) )
109122 {
110- formattedQuery = FormatWithoutSpaces ( token , formattedQuery ) ;
123+ FormatWithoutSpaces ( token , formattedQuery ) ;
111124 }
112125 else if ( token . Value . Equals ( ";" ) )
113126 {
114- formattedQuery = FormatQuerySeparator ( token , formattedQuery ) ;
127+ FormatQuerySeparator ( token , formattedQuery ) ;
115128 }
116129 else
117130 {
118- formattedQuery = FormatWithSpaces ( token , formattedQuery ) ;
131+ FormatWithSpaces ( token , formattedQuery ) ;
119132 }
120133 }
121134
122- return formattedQuery ;
135+ return formattedQuery . ToString ( ) ;
123136 }
124137
125- protected virtual string FormatLineComment ( Token token , string query )
138+ protected virtual void FormatLineComment ( Token token , StringBuilder query )
126139 {
127- return AddNewline ( query + Show ( token ) ) ;
140+ query . Append ( Show ( token ) ) ;
141+ AddNewline ( query ) ;
128142 }
129143
130- protected virtual string FormatBlockComment ( Token token , string query )
144+ protected virtual void FormatBlockComment ( Token token , StringBuilder query )
131145 {
132- return AddNewline ( AddNewline ( query ) + IndentComment ( token . Value ) ) ;
146+ AddNewline ( query ) ;
147+ query . Append ( IndentComment ( token . Value ) ) ;
148+ AddNewline ( query ) ;
133149 }
134150
135151 protected virtual string IndentComment ( string comment )
136152 {
137153 return comment . Replace ( "\n " , "\n " + _indentation . GetIndent ( ) ) ;
138154 }
139155
140- protected virtual string FormatTopLevelReservedWordNoIndent ( Token token , string query )
156+ protected virtual void FormatTopLevelReservedWordNoIndent ( Token token , StringBuilder query )
141157 {
142158 _indentation . DecreaseTopLevel ( ) ;
143- query = AddNewline ( query ) + EqualizeWhitespace ( Show ( token ) ) ;
144- return AddNewline ( query ) ;
159+ AddNewline ( query ) ;
160+ query . Append ( EqualizeWhitespace ( Show ( token ) ) ) ;
161+ AddNewline ( query ) ;
145162 }
146163
147- protected virtual string FormatToplevelReservedWord ( Token token , string query )
164+ protected virtual void FormatToplevelReservedWord ( Token token , StringBuilder query )
148165 {
149166 _indentation . DecreaseTopLevel ( ) ;
150-
151- query = AddNewline ( query ) ;
152-
167+ AddNewline ( query ) ;
153168 _indentation . IncreaseTopLevel ( ) ;
154169
155- query += EqualizeWhitespace ( Show ( token ) ) ;
156- return AddNewline ( query ) ;
170+ query . Append ( EqualizeWhitespace ( Show ( token ) ) ) ;
171+ AddNewline ( query ) ;
157172 }
158173
159- protected virtual string FormatNewlineReservedWord ( Token token , string query )
174+ protected virtual void FormatNewlineReservedWord ( Token token , StringBuilder query )
160175 {
161176 if ( Token . IsAnd ( token ) && Token . IsBetween ( TokenLookBehind ( 2 ) ) )
162177 {
163- return FormatWithSpaces ( token , query ) ;
178+ FormatWithSpaces ( token , query ) ;
179+ return ;
164180 }
165181
166- return AddNewline ( query ) + EqualizeWhitespace ( Show ( token ) ) + " " ;
182+ AddNewline ( query ) ;
183+ query . Append ( EqualizeWhitespace ( Show ( token ) ) ) . Append ( " " ) ;
167184 }
168185
169186 protected static string EqualizeWhitespace ( string str )
170187 {
171- return Regex . Replace ( str , @"\s+" , " " ) ;
188+ // Uses the statically compiled regex
189+ return s_whitespaceRegex . Replace ( str , " " ) ;
172190 }
173191
174- private static readonly HashSet < TokenTypes > s_preserveWhitespaceFor =
175- new HashSet < TokenTypes > {
176- TokenTypes . OPEN_PAREN ,
177- TokenTypes . LINE_COMMENT ,
178- TokenTypes . OPERATOR ,
179- TokenTypes . RESERVED_NEWLINE } ;
180-
181- protected virtual string FormatOpeningParentheses ( Token token , string query )
192+ protected virtual void FormatOpeningParentheses ( Token token , StringBuilder query )
182193 {
183194 if ( string . IsNullOrEmpty ( token . WhitespaceBefore )
184195 && ( TokenLookBehind ( ) == default || ! s_preserveWhitespaceFor . Contains ( TokenLookBehind ( ) . Type ) ) )
185196 {
186- query = query . TrimEnd ( ) ;
197+ TrimEnd ( query ) ;
187198 }
188199
189- query += Show ( token ) ;
200+ query . Append ( Show ( token ) ) ;
190201
191202 _inlineBlock . BeginIfPossible ( _tokens , _index ) ;
192203
@@ -195,65 +206,78 @@ protected virtual string FormatOpeningParentheses(Token token, string query)
195206 _indentation . IncreaseBlockLevel ( ) ;
196207 if ( ! _cfg . SkipWhitespaceNearBlockParentheses )
197208 {
198- query = AddNewline ( query ) ;
209+ AddNewline ( query ) ;
199210 }
200211 }
201-
202- return query ;
203212 }
204213
205- protected virtual string FormatClosingParentheses ( Token token , string query )
214+ protected virtual void FormatClosingParentheses ( Token token , StringBuilder query )
206215 {
207216 if ( _inlineBlock . IsActive ( ) )
208217 {
209218 _inlineBlock . End ( ) ;
210- return FormatWithSpaceAfter ( token , query ) ;
219+ FormatWithSpaceAfter ( token , query ) ;
211220 }
212221 else
213222 {
214223 _indentation . DecreaseBlockLevel ( ) ;
215224
216225 if ( ! _cfg . SkipWhitespaceNearBlockParentheses )
217226 {
218- return FormatWithSpaces ( token , AddNewline ( query ) ) ;
227+ AddNewline ( query ) ;
228+ FormatWithSpaces ( token , query ) ;
229+ }
230+ else
231+ {
232+ FormatWithoutSpaces ( token , query ) ;
219233 }
220-
221- return FormatWithoutSpaces ( token , query ) ;
222234 }
223235 }
224236
225- protected virtual string FormatPlaceholder ( Token token , string query )
237+ protected virtual void FormatPlaceholder ( Token token , StringBuilder query )
226238 {
227- return query + _parameters . Get ( token ) + " " ;
239+ query . Append ( _parameters . Get ( token ) ) . Append ( " " ) ;
228240 }
229241
230- protected virtual string FormatComma ( Token token , string query )
242+ protected virtual void FormatComma ( Token token , StringBuilder query )
231243 {
232- query = query . TrimEnd ( ) + Show ( token ) + " " ;
233- return _inlineBlock . IsActive ( ) || Token . IsLimit ( _previousReservedToken ) ? query : AddNewline ( query ) ;
244+ TrimEnd ( query ) ;
245+ query . Append ( Show ( token ) ) . Append ( " " ) ;
246+
247+ if ( ! _inlineBlock . IsActive ( ) && ! Token . IsLimit ( _previousReservedToken ) )
248+ {
249+ AddNewline ( query ) ;
250+ }
234251 }
235252
236- protected virtual string FormatWithSpaceAfter ( Token token , string query )
253+ protected virtual void FormatWithSpaceAfter ( Token token , StringBuilder query )
237254 {
238- return query . TrimEnd ( ) + Show ( token ) + " " ;
255+ TrimEnd ( query ) ;
256+ query . Append ( Show ( token ) ) . Append ( " " ) ;
239257 }
240258
241- protected virtual string FormatWithoutSpaces ( Token token , string query )
259+ protected virtual void FormatWithoutSpaces ( Token token , StringBuilder query )
242260 {
243- return query . TrimEnd ( ) + Show ( token ) ;
261+ TrimEnd ( query ) ;
262+ query . Append ( Show ( token ) ) ;
244263 }
245264
246- protected virtual string FormatWithSpaces ( Token token , string query )
265+ protected virtual void FormatWithSpaces ( Token token , StringBuilder query )
247266 {
248- return query + Show ( token ) + " " ;
267+ query . Append ( Show ( token ) ) . Append ( " " ) ;
249268 }
250269
251- protected virtual string FormatQuerySeparator ( Token token , string query )
270+ protected virtual void FormatQuerySeparator ( Token token , StringBuilder query )
252271 {
253272 _indentation . ResetIndentation ( ) ;
254- return query . TrimEnd ( )
255- + Show ( token )
256- + Utils . Repeat ( "\n " , _cfg . LinesBetweenQueries == default ? 1 : _cfg . LinesBetweenQueries ) ;
273+ TrimEnd ( query ) ;
274+ query . Append ( Show ( token ) ) ;
275+
276+ var lines = _cfg . LinesBetweenQueries == default ? 1 : _cfg . LinesBetweenQueries ;
277+ for ( var i = 0 ; i < lines ; i ++ )
278+ {
279+ query . Append ( '\n ' ) ;
280+ }
257281 }
258282
259283 protected virtual string Show ( Token token )
@@ -266,49 +290,42 @@ protected virtual string Show(Token token)
266290 || token . Type == TokenTypes . OPEN_PAREN
267291 || token . Type == TokenTypes . CLOSE_PAREN ) )
268292 {
293+ // Note: If memory is still tight, caching upper-case values at the token generation stage is even better.
269294 return token . Value . ToUpper ( ) ;
270295 }
271296
272297 return token . Value ;
273298 }
274299
275- protected virtual string AddNewline ( string query )
300+ protected virtual void AddNewline ( StringBuilder query )
276301 {
277- query = query . TrimEnd ( ) ;
278- if ( ! query . EndsWith ( "\n " ) )
302+ TrimEnd ( query ) ;
303+ // Replaces expensive .EndsWith("\n") with a fast char index lookup
304+ if ( query . Length == 0 || query [ query . Length - 1 ] != '\n ' )
279305 {
280- query += " \n " ;
306+ query . Append ( ' \n ' ) ;
281307 }
282308
283- return query + _indentation . GetIndent ( ) ;
284- }
285-
286- protected Token TokenLookBehind ( )
287- {
288- return TokenLookBehind ( 1 ) ;
309+ query . Append ( _indentation . GetIndent ( ) ) ;
289310 }
290311
291- protected Token TokenLookBehind ( int n )
312+ // OPTIMIZATION: Extremely fast inline trailing whitespace removal
313+ protected void TrimEnd ( StringBuilder sb )
292314 {
293- return _tokens . Get ( _index - n ) ;
294- }
295-
296- protected Token TokenLookAhead ( )
297- {
298- return TokenLookAhead ( 1 ) ;
315+ while ( sb . Length > 0 && char . IsWhiteSpace ( sb [ sb . Length - 1 ] ) )
316+ {
317+ sb . Length -- ;
318+ }
299319 }
300320
301- protected Token TokenLookAhead ( int n )
302- {
303- return _tokens . Get ( _index + n ) ;
304-
305- }
321+ protected Token TokenLookBehind ( ) => TokenLookBehind ( 1 ) ;
322+ protected Token TokenLookBehind ( int n ) => _tokens . Get ( _index - n ) ;
323+ protected Token TokenLookAhead ( ) => TokenLookAhead ( 1 ) ;
324+ protected Token TokenLookAhead ( int n ) => _tokens . Get ( _index + n ) ;
306325
307326 public virtual DialectConfig DoDialectConfig ( )
308327 {
309328 return _doDialectConfigFunc . Invoke ( ) ;
310329 }
311-
312- public Func < DialectConfig > _doDialectConfigFunc ;
313330 }
314331}
0 commit comments