Skip to content

Commit 335eeb4

Browse files
authored
Improve Regex performance (in particular RegexOptions.Compiled) (dotnet#271)
* Avoid generating timeout checks for infinite timeouts * Use Ldc_I4_X for Ldc when applicable * Use ToLowerInvariant for invariant mode * Remove unnecessary virtual-ness of some internal methods * Clean up CompiledRegexRunner * Lazily allocate RegexCharClass._categories * Avoid negative numbers in generated names * Optimize Is{ECMA}WordChar * Optimize common CharInClass calls * Clean up RegexCompiler.cs * Add more RegexOptions.Compiled tests * Replace RegexParser.s_category byte[] with a span * Avoid delegate allocations each time CreateInstance is called * Replace CharUnicodeInfo.GetUnicodeCategory w/ char's version It has a fast path for ASCII. * Clean up stackallocs * Remove an unnecessary box * Avoid string allocation for negated category * Simplify tests * Make low-hanging fruit allocation reduction e.g. avoiding allocating a range list if a class contains only categories, avoiding some intermediary strings, avoiding some delegate allocations for sorting, etc. * More code cleanup * Use String.IndexOf in RegexParser.Unescape
1 parent 3952a34 commit 335eeb4

26 files changed

Lines changed: 2178 additions & 2454 deletions

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CaptureCollection.cs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111

1212
namespace System.Text.RegularExpressions
1313
{
14-
/*
15-
* This collection returns the Captures for a group
16-
* in the order in which they were matched (left to right
17-
* or right to left). It is created by Group.Captures
18-
*/
14+
// This collection returns the Captures for a group
15+
// in the order in which they were matched (left to right
16+
// or right to left). It is created by Group.Captures.
17+
1918
/// <summary>
2019
/// Represents a sequence of capture substrings. The object is used
2120
/// to return the set of captures done by a single capturing group.

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunner.cs

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,21 @@ namespace System.Text.RegularExpressions
66
{
77
internal sealed class CompiledRegexRunner : RegexRunner
88
{
9-
private Action<RegexRunner>? _goMethod;
10-
private Func<RegexRunner, bool>? _findFirstCharMethod;
11-
private Action<RegexRunner>? _initTrackCountMethod;
9+
private readonly Action<RegexRunner> _goMethod;
10+
private readonly Func<RegexRunner, bool> _findFirstCharMethod;
11+
private readonly Action<RegexRunner> _initTrackCountMethod;
1212

13-
public CompiledRegexRunner() { }
14-
15-
public void SetDelegates(Action<RegexRunner> go, Func<RegexRunner, bool> firstChar, Action<RegexRunner> trackCount)
13+
public CompiledRegexRunner(Action<RegexRunner> go, Func<RegexRunner, bool> firstChar, Action<RegexRunner> trackCount)
1614
{
1715
_goMethod = go;
1816
_findFirstCharMethod = firstChar;
1917
_initTrackCountMethod = trackCount;
2018
}
2119

22-
protected override void Go()
23-
{
24-
_goMethod!(this);
25-
}
20+
protected override void Go() => _goMethod(this);
2621

27-
protected override bool FindFirstChar()
28-
{
29-
return _findFirstCharMethod!(this);
30-
}
22+
protected override bool FindFirstChar() => _findFirstCharMethod(this);
3123

32-
protected override void InitTrackCount()
33-
{
34-
_initTrackCountMethod!(this);
35-
}
24+
protected override void InitTrackCount() => _initTrackCountMethod(this);
3625
}
3726
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CompiledRegexRunnerFactory.cs

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,31 +5,21 @@
55
// This is the only concrete implementation of RegexRunnerFactory,
66
// but we cannot combine them due to RegexRunnerFactory having shipped public.
77

8-
using System.Reflection.Emit;
9-
108
namespace System.Text.RegularExpressions
119
{
1210
internal sealed class CompiledRegexRunnerFactory : RegexRunnerFactory
1311
{
14-
private readonly DynamicMethod _goMethod;
15-
private readonly DynamicMethod _findFirstCharMethod;
16-
private readonly DynamicMethod _initTrackCountMethod;
12+
private readonly Action<RegexRunner> _go;
13+
private readonly Func<RegexRunner, bool> _findFirstChar;
14+
private readonly Action<RegexRunner> _initTrackCount;
1715

18-
public CompiledRegexRunnerFactory(DynamicMethod go, DynamicMethod firstChar, DynamicMethod trackCount)
16+
public CompiledRegexRunnerFactory(Action<RegexRunner> go, Func<RegexRunner, bool> findFirstChar, Action<RegexRunner> initTrackCount)
1917
{
20-
_goMethod = go;
21-
_findFirstCharMethod = firstChar;
22-
_initTrackCountMethod = trackCount;
18+
_go = go;
19+
_findFirstChar = findFirstChar;
20+
_initTrackCount = initTrackCount;
2321
}
2422

25-
protected internal override RegexRunner CreateInstance()
26-
{
27-
CompiledRegexRunner runner = new CompiledRegexRunner();
28-
runner.SetDelegates((Action<RegexRunner>)_goMethod.CreateDelegate(typeof(Action<RegexRunner>)),
29-
(Func<RegexRunner, bool>)_findFirstCharMethod.CreateDelegate(typeof(Func<RegexRunner, bool>)),
30-
(Action<RegexRunner>)_initTrackCountMethod.CreateDelegate(typeof(Action<RegexRunner>)));
31-
32-
return runner;
33-
}
23+
protected internal override RegexRunner CreateInstance() => new CompiledRegexRunner(_go, _findFirstChar, _initTrackCount);
3424
}
3525
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,7 @@ internal Group(string text, int[] caps, int capcount, string name)
4242
/// group, in innermost-leftmost-first order (or innermost-rightmost-first order if
4343
/// compiled with the "r" option). The collection may have zero or more items.
4444
/// </summary>
45-
public CaptureCollection Captures
46-
{
47-
get
48-
{
49-
if (_capcoll == null)
50-
_capcoll = new CaptureCollection(this);
51-
52-
return _capcoll;
53-
}
54-
}
45+
public CaptureCollection Captures => _capcoll ??= new CaptureCollection(this);
5546

5647
/// <summary>
5748
/// Returns a Group object equivalent to the one supplied that is safe to share between

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/GroupCollection.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ public bool TryGetValue(string key, [NotNullWhen(true)] out Group? value)
221221
value = null;
222222
return false;
223223
}
224+
224225
value = group;
225226
return true;
226227
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs

Lines changed: 16 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ internal Match(Regex? regex, int capcount, string text, int begpos, int len, int
7373
/// </summary>
7474
public static Match Empty { get; } = new Match(null, 1, string.Empty, 0, 0, 0);
7575

76-
internal virtual void Reset(Regex regex, string text, int textbeg, int textend, int textstart)
76+
internal void Reset(Regex regex, string text, int textbeg, int textend, int textstart)
7777
{
7878
_regex = regex;
7979
Text = text;
@@ -89,16 +89,7 @@ internal virtual void Reset(Regex regex, string text, int textbeg, int textend,
8989
_balancing = false;
9090
}
9191

92-
public virtual GroupCollection Groups
93-
{
94-
get
95-
{
96-
if (_groupcoll == null)
97-
_groupcoll = new GroupCollection(this, null);
98-
99-
return _groupcoll;
100-
}
101-
}
92+
public virtual GroupCollection Groups => _groupcoll ??= new GroupCollection(this, null);
10293

10394
/// <summary>
10495
/// Returns a new Match with the results for the next match, starting
@@ -127,17 +118,13 @@ public virtual string Result(string replacement)
127118
throw new NotSupportedException(SR.NoResultOnFailed);
128119

129120
// Gets the weakly cached replacement helper or creates one if there isn't one already.
130-
RegexReplacement repl = RegexReplacement.GetOrCreate(_regex._replref!, replacement, _regex.caps!, _regex.capsize,
131-
_regex.capnames!, _regex.roptions);
132-
Span<char> charInitSpan = stackalloc char[ReplaceBufferSize];
133-
var vsb = new ValueStringBuilder(charInitSpan);
134-
121+
RegexReplacement repl = RegexReplacement.GetOrCreate(_regex._replref!, replacement, _regex.caps!, _regex.capsize, _regex.capnames!, _regex.roptions);
122+
var vsb = new ValueStringBuilder(stackalloc char[ReplaceBufferSize]);
135123
repl.ReplacementImpl(ref vsb, this);
136-
137124
return vsb.ToString();
138125
}
139126

140-
internal virtual ReadOnlySpan<char> GroupToStringImpl(int groupnum)
127+
internal ReadOnlySpan<char> GroupToStringImpl(int groupnum)
141128
{
142129
int c = _matchcount[groupnum];
143130
if (c == 0)
@@ -180,14 +167,11 @@ public static Match Synchronized(Match inner)
180167
/// <summary>
181168
/// Adds a capture to the group specified by "cap"
182169
/// </summary>
183-
internal virtual void AddMatch(int cap, int start, int len)
170+
internal void AddMatch(int cap, int start, int len)
184171
{
185-
int capcount;
186-
187-
if (_matches[cap] == null)
188-
_matches[cap] = new int[2];
172+
_matches[cap] ??= new int[2];
189173

190-
capcount = _matchcount[cap];
174+
int capcount = _matchcount[cap];
191175

192176
if (capcount * 2 + 2 > _matches[cap].Length)
193177
{
@@ -210,7 +194,7 @@ balanced match construct. (?<foo-foo2>...)
210194
If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(cap).
211195
However, since we have backtracking, we need to keep track of everything.
212196
*/
213-
internal virtual void BalanceMatch(int cap)
197+
internal void BalanceMatch(int cap)
214198
{
215199
_balancing = true;
216200

@@ -236,23 +220,23 @@ internal virtual void BalanceMatch(int cap)
236220
/// <summary>
237221
/// Removes a group match by capnum
238222
/// </summary>
239-
internal virtual void RemoveMatch(int cap)
223+
internal void RemoveMatch(int cap)
240224
{
241225
_matchcount[cap]--;
242226
}
243227

244228
/// <summary>
245229
/// Tells if a group was matched by capnum
246230
/// </summary>
247-
internal virtual bool IsMatched(int cap)
231+
internal bool IsMatched(int cap)
248232
{
249233
return cap < _matchcount.Length && _matchcount[cap] > 0 && _matches[cap][_matchcount[cap] * 2 - 1] != (-3 + 1);
250234
}
251235

252236
/// <summary>
253237
/// Returns the index of the last specified matched group by capnum
254238
/// </summary>
255-
internal virtual int MatchIndex(int cap)
239+
internal int MatchIndex(int cap)
256240
{
257241
int i = _matches[cap][_matchcount[cap] * 2 - 2];
258242
if (i >= 0)
@@ -264,7 +248,7 @@ internal virtual int MatchIndex(int cap)
264248
/// <summary>
265249
/// Returns the length of the last specified matched group by capnum
266250
/// </summary>
267-
internal virtual int MatchLength(int cap)
251+
internal int MatchLength(int cap)
268252
{
269253
int i = _matches[cap][_matchcount[cap] * 2 - 1];
270254
if (i >= 0)
@@ -276,7 +260,7 @@ internal virtual int MatchLength(int cap)
276260
/// <summary>
277261
/// Tidy the match so that it can be used as an immutable result
278262
/// </summary>
279-
internal virtual void Tidy(int textpos)
263+
internal void Tidy(int textpos)
280264
{
281265
int[] interval = _matches[0];
282266
Index = interval[0];
@@ -334,16 +318,7 @@ internal virtual void Tidy(int textpos)
334318
}
335319

336320
#if DEBUG
337-
internal bool Debug
338-
{
339-
get
340-
{
341-
if (_regex == null)
342-
return false;
343-
344-
return _regex.Debug;
345-
}
346-
}
321+
internal bool Debug => _regex != null && _regex.Debug;
347322

348323
internal virtual void Dump()
349324
{
@@ -381,16 +356,7 @@ internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int
381356
_caps = caps;
382357
}
383358

384-
public override GroupCollection Groups
385-
{
386-
get
387-
{
388-
if (_groupcoll == null)
389-
_groupcoll = new GroupCollection(this, _caps);
390-
391-
return _groupcoll;
392-
}
393-
}
359+
public override GroupCollection Groups => _groupcoll ??= new GroupCollection(this, _caps);
394360

395361
#if DEBUG
396362
internal override void Dump()

0 commit comments

Comments
 (0)