Skip to content

Commit ed9ab1b

Browse files
authored
Several tweaks to RegexGenerator (the tool, not the emitted code) (dotnet#60667)
- Fixed a bunch of nullable reference types warnings (they're suppressed as a netstandard2.0 project, but I temporarily and locally compiled it for netcoreapp current to get clean) - Fixed the comparer used to aid in avoiding recomputing regexes when typing - Added a hook to start exploring code gen for NonBacktracking
1 parent 294a284 commit ed9ab1b

File tree

4 files changed

+99
-100
lines changed

4 files changed

+99
-100
lines changed

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ private static string EmitRegexType(RegexType regexClass)
5353
}
5454

5555
// Emit containing types
56-
RegexType parent = regexClass.ParentClass;
56+
RegexType? parent = regexClass.ParentClass;
5757
var parentClasses = new Stack<string>();
58-
while (parent != null)
58+
while (parent is not null)
5959
{
6060
parentClasses.Push($"partial {parent.Keyword} {parent.Name} {parent.Constraints}");
6161
parent = parent.ParentClass;
@@ -75,6 +75,7 @@ private static string EmitRegexType(RegexType regexClass)
7575
// Generate a name to describe the regex instance. This includes the method name
7676
// the user provided and a non-randomized (for determinism) hash of it to try to make
7777
// the name that much harder to predict.
78+
Debug.Assert(regexClass.Method is not null);
7879
string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_";
7980
generatedName += ComputeStringHash(generatedName).ToString("X");
8081

@@ -104,31 +105,18 @@ static uint ComputeStringHash(string s)
104105
}
105106

106107
/// <summary>Gets whether a given regular expression method is supported by the code generator.</summary>
107-
private static bool SupportsCustomCodeGeneration(RegexMethod rm)
108-
{
109-
const RegexOptions SupportedOptions =
110-
RegexOptions.IgnoreCase |
111-
RegexOptions.Multiline |
112-
RegexOptions.ExplicitCapture |
113-
RegexOptions.Compiled |
114-
RegexOptions.Singleline |
115-
RegexOptions.IgnorePatternWhitespace |
116-
RegexOptions.RightToLeft |
117-
RegexOptions.ECMAScript |
118-
RegexOptions.CultureInvariant;
119-
120-
// If we see an option we're not aware of (but that was allowed through), don't emit custom regex code.
121-
return (rm.Options & ~(int)SupportedOptions) == 0;
122-
}
108+
private static bool SupportsCustomCodeGeneration(RegexMethod rm) =>
109+
// The generator doesn't currently know how to emit code for NonBacktracking.
110+
(rm.Options & RegexOptions.NonBacktracking) == 0;
123111

124112
/// <summary>Generates the code for a regular expression method.</summary>
125113
private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id)
126114
{
127115
string patternExpression = Literal(rm.Pattern);
128-
string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({rm.Options})";
116+
string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({(int)rm.Options})";
129117
string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ?
130118
"global::System.Threading.Timeout.InfiniteTimeSpan" :
131-
$"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})";
119+
$"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.ToString(CultureInfo.InvariantCulture)})";
132120

133121
writer.WriteLine(s_generatedCodeAttribute);
134122
writer.WriteLine($"{rm.Modifiers} global::System.Text.RegularExpressions.Regex {rm.MethodName}() => {id}.Instance;");
@@ -242,8 +230,8 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
242230
private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id)
243231
{
244232
RegexOptions options = (RegexOptions)rm.Options;
245-
var code = rm.Code;
246-
var lcc = code.LeadingCharClasses;
233+
RegexCode code = rm.Code;
234+
(string CharClass, bool CaseInsensitive)[]? lcc = code.LeadingCharClasses;
247235
bool rtl = code.RightToLeft;
248236
bool hasTextInfo = false;
249237
bool textInfoEmitted = false;
@@ -523,7 +511,7 @@ void EmitAnchorAndLeadingChecks()
523511
writer.WriteLine("return true;");
524512
}
525513
}
526-
else if (code.LeadingCharClasses is null)
514+
else if (lcc is null)
527515
{
528516
writer.WriteLine("return true;");
529517
}
@@ -680,7 +668,11 @@ void EmitAnchorAndLeadingChecks()
680668
private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id)
681669
{
682670
Debug.Assert(rm.Code.Tree.Root.Type == RegexNode.Capture);
683-
if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Code.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) &&
671+
if ((rm.Options & RegexOptions.NonBacktracking) != 0)
672+
{
673+
EmitNonBacktrackingGo(writer, rm, id);
674+
}
675+
else if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Code.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) &&
684676
(((RegexOptions)rm.Code.Tree.Root.Options) & RegexOptions.RightToLeft) == 0)
685677
{
686678
EmitSimplifiedGo(writer, rm, id);
@@ -691,6 +683,12 @@ private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id)
691683
}
692684
}
693685

686+
/// <summary>Emits the body of a Go method supporting RegexOptions.NonBacktracking.</summary>
687+
private static void EmitNonBacktrackingGo(IndentedTextWriter writer, RegexMethod rm, string id)
688+
{
689+
// TODO: Implement this and remove SupportsCustomCodeGeneration.
690+
}
691+
694692
/// <summary>Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression.</summary>
695693
private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id)
696694
{
@@ -888,7 +886,7 @@ void EmitSwitchedBranches()
888886
Debug.Assert(child.Type is RegexNode.One or RegexNode.Multi or RegexNode.Concatenate, child.Description());
889887
Debug.Assert(child.Type is not RegexNode.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Type is RegexNode.One or RegexNode.Multi));
890888

891-
RegexNode childStart = child.FindBranchOneOrMultiStart();
889+
RegexNode? childStart = child.FindBranchOneOrMultiStart();
892890
Debug.Assert(childStart is not null, child.Description());
893891

894892
writer.WriteLine($"case {Literal(childStart.FirstCharOfOneOrMulti())}:");
@@ -1248,7 +1246,7 @@ void EmitUpdateBumpalong()
12481246
}
12491247

12501248
// Emits the code to handle a single-character match.
1251-
void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string offset = null)
1249+
void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset = null)
12521250
{
12531251
string expr = $"{textSpanLocal}[{Sum(textSpanPos, offset)}]";
12541252
switch (node.Type)
@@ -1843,7 +1841,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st
18431841
const string Backtrack = "Backtrack"; // label for backtracking
18441842

18451843
int[] codes = rm.Code.Codes;
1846-
RegexOptions options = (RegexOptions)rm.Options.Value;
1844+
RegexOptions options = rm.Options;
18471845

18481846
int labelCounter = 0;
18491847
string DefineLabel(string prefix = "L") => $"{prefix}{labelCounter++}";
@@ -1919,6 +1917,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st
19191917
{
19201918
using (EmitBlock(writer, $"case {i}:"))
19211919
{
1920+
Debug.Assert(notes is not null);
19221921
BacktrackNote n = notes[i];
19231922
if (n.flags != 0)
19241923
{
@@ -2879,7 +2878,7 @@ void Goto(int i)
28792878
/// </summary>
28802879
void Trackagain() => PushTrack(currentBacktrackNote);
28812880

2882-
void PushTrack<T>(T expr) => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
2881+
void PushTrack<T>(T expr) where T : notnull => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
28832882

28842883
/// <summary>Retrieves the top entry on the tracking stack without popping.</summary>
28852884
string TopTrack() => "runtrack[runtrackpos]";
@@ -2896,7 +2895,7 @@ void Goto(int i)
28962895
int Code() => currentOpcode & RegexCode.Mask;
28972896

28982897
/// <summary>Saves the value of a local variable on the grouping stack.</summary>
2899-
void PushStack<T>(T expr) => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
2898+
void PushStack<T>(T expr) where T : notnull => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
29002899

29012900
string ReadyPushStack() => "runstack[--runstackpos]";
29022901

@@ -2924,7 +2923,7 @@ int AddUniqueTrack(int i, int flags = RegexCode.Back)
29242923
int NextCodepos() => currentCodePos + RegexCode.OpcodeSize(codes[currentCodePos]);
29252924

29262925
/// <summary>The label for the next (forward) operation.</summary>
2927-
string AdvanceLabel() => labels![NextCodepos()];
2926+
string AdvanceLabel() => labels[NextCodepos()]!;
29282927

29292928
/// <summary>Goto the next (forward) operation.</summary>
29302929
void Advance() => writer.WriteLine($"goto {AdvanceLabel()};");
@@ -2971,7 +2970,7 @@ int AddGoto(int destpos)
29712970
{
29722971
if (forwardJumpsThroughSwitch[destpos] == -1)
29732972
{
2974-
forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels![destpos], destpos);
2973+
forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels[destpos]!, destpos);
29752974
}
29762975

29772976
return forwardJumpsThroughSwitch[destpos];
@@ -2998,7 +2997,7 @@ private record BacktrackNote(int flags, string label, int codepos);
29982997

29992998
private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, RegexMethod rm)
30002999
{
3001-
if (rm.MatchTimeout.HasValue && rm.MatchTimeout.Value != Timeout.Infinite)
3000+
if (rm.MatchTimeout != Timeout.Infinite)
30023001
{
30033002
writer.WriteLine("int loopTimeoutCounter = 0;");
30043003
return true;

0 commit comments

Comments
 (0)