Skip to content

Commit 4bd0bf4

Browse files
authored
Fix regex compiler/source generator resumeAt handling of conditionals inside loops (#126561)
Update EmitExpressionConditional to reset resumeAt when inside loops, preventing stale values and incorrect matches. Fixes #126556
1 parent 0ad5249 commit 4bd0bf4

File tree

3 files changed

+157
-7
lines changed

3 files changed

+157
-7
lines changed

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,7 +2527,7 @@ void EmitExpressionConditional(RegexNode node)
25272527
writer.WriteLine();
25282528
TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch
25292529
string postYesDoneLabel = doneLabel;
2530-
if (!isAtomic && postYesDoneLabel != originalDoneLabel)
2530+
if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop)
25312531
{
25322532
writer.WriteLine($"{resumeAt} = 0;");
25332533
}
@@ -2556,7 +2556,7 @@ void EmitExpressionConditional(RegexNode node)
25562556
writer.WriteLine();
25572557
TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch
25582558
postNoDoneLabel = doneLabel;
2559-
if (!isAtomic && postNoDoneLabel != originalDoneLabel)
2559+
if ((!isAtomic && postNoDoneLabel != originalDoneLabel) || isInLoop)
25602560
{
25612561
writer.WriteLine($"{resumeAt} = 1;");
25622562
}
@@ -2566,7 +2566,7 @@ void EmitExpressionConditional(RegexNode node)
25662566
// There's only a yes branch. If it's going to cause us to output a backtracking
25672567
// label but code may not end up taking the yes branch path, we need to emit a resumeAt
25682568
// that will cause the backtracking to immediately pass through this node.
2569-
if (!isAtomic && postYesDoneLabel != originalDoneLabel)
2569+
if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop)
25702570
{
25712571
writer.WriteLine($"{resumeAt} = 2;");
25722572
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,7 +2529,7 @@ void EmitExpressionConditional(RegexNode node)
25292529
EmitNode(yesBranch);
25302530
TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch
25312531
Label postYesDoneLabel = doneLabel;
2532-
if (!isAtomic && postYesDoneLabel != originalDoneLabel)
2532+
if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop)
25332533
{
25342534
// resumeAt = 0;
25352535
Ldc(0);
@@ -2560,7 +2560,7 @@ void EmitExpressionConditional(RegexNode node)
25602560
EmitNode(noBranch);
25612561
TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch
25622562
postNoDoneLabel = doneLabel;
2563-
if (!isAtomic && postNoDoneLabel != originalDoneLabel)
2563+
if ((!isAtomic && postNoDoneLabel != originalDoneLabel) || isInLoop)
25642564
{
25652565
// resumeAt = 1;
25662566
Ldc(1);
@@ -2572,7 +2572,7 @@ void EmitExpressionConditional(RegexNode node)
25722572
// There's only a yes branch. If it's going to cause us to output a backtracking
25732573
// label but code may not end up taking the yes branch path, we need to emit a resumeAt
25742574
// that will cause the backtracking to immediately pass through this node.
2575-
if (!isAtomic && postYesDoneLabel != originalDoneLabel)
2575+
if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop)
25762576
{
25772577
// resumeAt = 2;
25782578
Ldc(2);

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,157 @@ public static IEnumerable<object[]> Matches_TestData()
434434
}
435435
}
436436

437-
#if !NETFRAMEWORK // these tests currently fail on .NET Framework, and we need to check IsDynamicCodeCompiled but that doesn't exist on .NET Framework
437+
if (!RegexHelpers.IsNonBacktracking(engine)) // balancing groups aren't supported
438+
{
439+
// ExpressionConditional with balancing groups inside a loop, auto-numbered capture groups
440+
441+
// Balancing group conditional with auto-numbered capture group and dot
442+
yield return new object[]
443+
{
444+
engine, @"(?((?'-1'))|(.)+)+(?!(?'-1'))", "abc", RegexOptions.None, new[]
445+
{
446+
new CaptureData("a", 0, 1),
447+
new CaptureData("b", 1, 1),
448+
new CaptureData("c", 2, 1),
449+
}
450+
};
451+
452+
// Balancing group conditional with auto-numbered capture group and literal
453+
yield return new object[]
454+
{
455+
engine, @"(?((?'-1'))|(a)+)+(?!(?'-1'))", "aaa", RegexOptions.None, new[]
456+
{
457+
new CaptureData("a", 0, 1),
458+
new CaptureData("a", 1, 1),
459+
new CaptureData("a", 2, 1),
460+
}
461+
};
462+
463+
// Alternation in no-branch with empty second branch, no match expected
464+
yield return new object[]
465+
{
466+
engine, @"(?((?'-1'))|((?'1'.)+|()))+(?!(?'-1'))", "a", RegexOptions.None,
467+
Array.Empty<CaptureData>()
468+
};
469+
470+
// Balancing group conditional with quantified pop {2}
471+
yield return new object[]
472+
{
473+
engine, @"(?((?'-1'){2})|((?'1'a)+))+(?!(?'-1'))", "aa", RegexOptions.None, new[]
474+
{
475+
new CaptureData("a", 0, 1),
476+
new CaptureData("a", 1, 1),
477+
}
478+
};
479+
480+
// ExpressionConditional with balancing groups inside a loop
481+
482+
// Balancing group conditional with alternation in no-branch, no match
483+
yield return new object[]
484+
{
485+
engine, @"(?((?'-1'))|((?'1'\S)+|(?'1'\s)))+(?!(?'-1'))", "abc", RegexOptions.None,
486+
Array.Empty<CaptureData>()
487+
};
488+
489+
// Balancing group conditional with nested captures in alternation
490+
yield return new object[]
491+
{
492+
engine, @"(?((?'-1'){6})|((?'1'(?'2'\S))+|(?'1'(?'2'\s))))+(?!(?'-1'))", "it not", RegexOptions.None, new[]
493+
{
494+
new CaptureData("it ", 0, 3),
495+
new CaptureData("not", 3, 3),
496+
}
497+
};
498+
499+
// Alternation in capturing group in no-branch, no match expected
500+
yield return new object[]
501+
{
502+
engine, @"(?((?'-1'))|((?'1'a)+|(?'1'b)))+(?!(?'-1'))", "abc", RegexOptions.None,
503+
Array.Empty<CaptureData>()
504+
};
505+
yield return new object[]
506+
{
507+
engine, @"(?((?'-1'))|((?'1'a)+|(?'1'b)))+(?!(?'-1'))", "aaa", RegexOptions.None,
508+
Array.Empty<CaptureData>()
509+
};
510+
511+
// No-branch with quantifier but no wrapping capture group
512+
yield return new object[]
513+
{
514+
engine, @"(?((?'-1'))|(?'1'\S)+)+(?!(?'-1'))", "abc", RegexOptions.None, new[]
515+
{
516+
new CaptureData("a", 0, 1),
517+
new CaptureData("b", 1, 1),
518+
new CaptureData("c", 2, 1),
519+
}
520+
};
521+
yield return new object[]
522+
{
523+
engine, @"(?((?'-1'))|(?'1'a)+)+(?!(?'-1'))", "aaa", RegexOptions.None, new[]
524+
{
525+
new CaptureData("a", 0, 1),
526+
new CaptureData("a", 1, 1),
527+
new CaptureData("a", 2, 1),
528+
}
529+
};
530+
531+
// No-branch with quantifier inside wrapping capture group
532+
yield return new object[]
533+
{
534+
engine, @"(?((?'-1'))|((?'1'\S)+))+(?!(?'-1'))", "abc", RegexOptions.None,
535+
Array.Empty<CaptureData>()
536+
};
537+
538+
// Non-capturing group wrapping alternation in no-branch
539+
yield return new object[]
540+
{
541+
engine, @"(?((?'-1'))|(?:(?'1'a)+|(?'1'b)))+(?!(?'-1'))", "aaa", RegexOptions.None, new[]
542+
{
543+
new CaptureData("a", 0, 1),
544+
new CaptureData("a", 1, 1),
545+
new CaptureData("a", 2, 1),
546+
}
547+
};
548+
yield return new object[]
549+
{
550+
engine, @"(?((?'-1'))|(?:(?'1'a)+|(?'1'b)))+(?!(?'-1'))", "abc", RegexOptions.None, new[]
551+
{
552+
new CaptureData("a", 0, 1),
553+
new CaptureData("b", 1, 1),
554+
}
555+
};
556+
557+
// Balancing group conditional with single char in no-branch
558+
yield return new object[]
559+
{
560+
engine, @"(?((?'-1'))|(?'1'a))+(?!(?'-1'))", "aaa", RegexOptions.None, new[]
561+
{
562+
new CaptureData("a", 0, 1),
563+
new CaptureData("a", 1, 1),
564+
new CaptureData("a", 2, 1),
565+
}
566+
};
567+
568+
// Balancing group conditional with multi-word input
569+
yield return new object[]
570+
{
571+
engine, @"(?((?'-1'))|(?'1'\S)+)+(?!(?'-1'))", "hello world", RegexOptions.None, new[]
572+
{
573+
new CaptureData("h", 0, 1),
574+
new CaptureData("e", 1, 1),
575+
new CaptureData("l", 2, 1),
576+
new CaptureData("l", 3, 1),
577+
new CaptureData("o", 4, 1),
578+
new CaptureData("w", 6, 1),
579+
new CaptureData("o", 7, 1),
580+
new CaptureData("r", 8, 1),
581+
new CaptureData("l", 9, 1),
582+
new CaptureData("d", 10, 1),
583+
}
584+
};
585+
}
586+
587+
#if !NETFRAMEWORK // these tests currently fail on .NET Framework
438588
yield return new object[]
439589
{
440590
engine, "@(a*)+?", "@", RegexOptions.None, new[]

0 commit comments

Comments
 (0)