Skip to content

Commit 0beee48

Browse files
phpstan-botstaabmclaude
authored
Isolate alternation branches in RegexGroupParser::walkGroupAst() to prevent non-empty/non-falsy state bleeding (#5602)
Co-authored-by: Markus Staab <maggus.staab@googlemail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c28d352 commit 0beee48

4 files changed

Lines changed: 122 additions & 22 deletions

File tree

src/Type/Regex/RegexGroupParser.php

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ public function parseGroups(string $regex): ?RegexAstWalkResult
120120
$subjectAsGroupResult = $this->walkGroupAst(
121121
$ast,
122122
false,
123-
false,
124123
$modifiers,
125124
RegexGroupWalkResult::createEmpty(),
126125
);
@@ -408,7 +407,6 @@ private function createGroupType(TreeNode $group, bool $maybeConstant, string $p
408407
$walkResult = $this->walkGroupAst(
409408
$group,
410409
false,
411-
false,
412410
$patternModifiers,
413411
RegexGroupWalkResult::createEmpty(),
414412
);
@@ -469,7 +467,6 @@ private function getRootAlternation(TreeNode $group): ?TreeNode
469467

470468
private function walkGroupAst(
471469
TreeNode $ast,
472-
bool $inAlternation,
473470
bool $inClass,
474471
string $patternModifiers,
475472
RegexGroupWalkResult $walkResult,
@@ -491,7 +488,7 @@ private function walkGroupAst(
491488

492489
$meaningfulTokens++;
493490

494-
if (!$nonFalsy || $inAlternation) {
491+
if (!$nonFalsy) {
495492
continue;
496493
}
497494

@@ -504,7 +501,7 @@ private function walkGroupAst(
504501
$walkResult = $walkResult->nonEmpty(TrinaryLogic::createYes());
505502

506503
// two non-empty tokens concatenated results in a non-falsy string
507-
if ($meaningfulTokens > 1 && !$inAlternation) {
504+
if ($meaningfulTokens > 1) {
508505
$walkResult = $walkResult->nonFalsy(TrinaryLogic::createYes());
509506
}
510507
}
@@ -519,7 +516,7 @@ private function walkGroupAst(
519516
if ($min >= 1) {
520517
$walkResult = $walkResult->nonEmpty(TrinaryLogic::createYes());
521518
}
522-
if ($min >= 2 && !$inAlternation) {
519+
if ($min >= 2) {
523520
$walkResult = $walkResult->nonFalsy(TrinaryLogic::createYes());
524521
}
525522
}
@@ -559,30 +556,47 @@ private function walkGroupAst(
559556
}
560557

561558
if ($ast->getId() === '#alternation') {
559+
if (count($children) === 0) {
560+
return $walkResult;
561+
}
562+
562563
$newLiterals = [];
564+
$nonEmpty = TrinaryLogic::createYes();
565+
$nonFalsy = TrinaryLogic::createYes();
566+
$numeric = TrinaryLogic::createYes();
563567
foreach ($children as $child) {
564-
$walkResult = $this->walkGroupAst(
568+
$childResult = $this->walkGroupAst(
565569
$child,
566-
true,
567570
$inClass,
568571
$patternModifiers,
569-
$walkResult->onlyLiterals([]),
572+
$walkResult->onlyLiterals([])
573+
->nonEmpty(TrinaryLogic::createMaybe())
574+
->nonFalsy(TrinaryLogic::createMaybe())
575+
->numeric(TrinaryLogic::createMaybe()),
570576
);
571577

578+
$nonEmpty = $nonEmpty->and($childResult->isNonEmpty());
579+
$nonFalsy = $nonFalsy->and($childResult->isNonFalsy());
580+
$numeric = $numeric->and($childResult->isNumeric());
581+
572582
if ($newLiterals === null) {
573583
continue;
574584
}
575585

576-
if (count($walkResult->getOnlyLiterals() ?? []) > 0) {
577-
foreach ($walkResult->getOnlyLiterals() as $alternationLiterals) {
586+
if (count($childResult->getOnlyLiterals() ?? []) > 0) {
587+
foreach ($childResult->getOnlyLiterals() as $alternationLiterals) {
578588
$newLiterals[] = $alternationLiterals;
579589
}
580590
} else {
581591
$newLiterals = null;
582592
}
583593
}
584594

585-
return $walkResult->onlyLiterals($newLiterals);
595+
return $walkResult
596+
->onlyLiterals($newLiterals)
597+
->nonEmpty($walkResult->isNonEmpty()->or($nonEmpty))
598+
->nonFalsy($walkResult->isNonFalsy()->or($nonFalsy))
599+
->numeric($walkResult->isNumeric()->and($numeric));
586600
}
587601

588602
// [^0-9] should not parse as numeric-string, and [^list-everything-but-numbers] is technically
@@ -595,7 +609,6 @@ private function walkGroupAst(
595609
foreach ($children as $child) {
596610
$walkResult = $this->walkGroupAst(
597611
$child,
598-
$inAlternation,
599612
$inClass,
600613
$patternModifiers,
601614
$walkResult,

tests/PHPStan/Analyser/nsrt/bug-12210.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,20 @@
88

99
function bug12210a(string $text): void {
1010
assert(preg_match('(((sum|min|max)))', $text, $match) === 1);
11-
assertType("array{non-empty-string, 'max'|'min'|'sum', 'max'|'min'|'sum'}", $match);
11+
assertType("array{non-falsy-string, 'max'|'min'|'sum', 'max'|'min'|'sum'}", $match);
1212
}
1313

1414
function bug12210b(string $text): void {
1515
assert(preg_match('(((sum|min|ma.)))', $text, $match) === 1);
16-
assertType("array{non-empty-string, non-empty-string, non-falsy-string}", $match);
16+
assertType("array{non-falsy-string, non-falsy-string, non-falsy-string}", $match);
1717
}
1818

1919
function bug12210c(string $text): void {
2020
assert(preg_match('(((su.|min|max)))', $text, $match) === 1);
21-
assertType("array{non-empty-string, non-empty-string, non-falsy-string}", $match);
21+
assertType("array{non-falsy-string, non-falsy-string, non-falsy-string}", $match);
2222
}
2323

2424
function bug12210d(string $text): void {
2525
assert(preg_match('(((sum|mi.|max)))', $text, $match) === 1);
26-
assertType("array{non-empty-string, non-empty-string, non-falsy-string}", $match);
26+
assertType("array{non-falsy-string, non-falsy-string, non-falsy-string}", $match);
2727
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace Bug14575;
4+
5+
use function PHPStan\Testing\assertType;
6+
7+
function doFoo(string $string): void {
8+
// Anchors in alternation should not make the match non-empty
9+
if (preg_match('(foo|$)', $string, $match)) {
10+
assertType('array{string}', $match);
11+
}
12+
13+
if (preg_match('(^|foo)', $string, $match)) {
14+
assertType('array{string}', $match);
15+
}
16+
17+
if (preg_match('(\b|foo)', $string, $match)) {
18+
assertType('array{string}', $match);
19+
}
20+
21+
if (preg_match('/foo|$/', $string, $match)) {
22+
assertType('array{string}', $match);
23+
}
24+
25+
if (preg_match('/^|bar/', $string, $match)) {
26+
assertType('array{string}', $match);
27+
}
28+
29+
// Anchor in alternation within capturing group
30+
if (preg_match('/(foo|$)/', $string, $match)) {
31+
assertType('array{string, string}', $match);
32+
}
33+
34+
if (preg_match('/(^|bar)/', $string, $match)) {
35+
assertType('array{string, string}', $match);
36+
}
37+
38+
// Anchor in alternation does not affect parent concatenation
39+
if (preg_match('/^abc(def|$)/', $string, $match)) {
40+
assertType("array{non-falsy-string, string}", $match);
41+
}
42+
43+
// All non-empty alternatives should still produce non-empty/non-falsy
44+
if (preg_match('/foo|bar/', $string, $match)) {
45+
assertType('array{non-falsy-string}', $match);
46+
}
47+
48+
if (preg_match('/foo/', $string, $match)) {
49+
assertType('array{non-falsy-string}', $match);
50+
}
51+
52+
// Anchor alone
53+
if (preg_match('/^$/', $string, $match)) {
54+
assertType('array{string}', $match);
55+
}
56+
57+
// Three-way alternation with anchor
58+
if (preg_match('/foo|bar|$/', $string, $match)) {
59+
assertType('array{string}', $match);
60+
}
61+
62+
// All alternatives are single chars (non-falsy not determinable from single tokens)
63+
if (preg_match('/a|b/', $string, $match)) {
64+
assertType('array{non-empty-string}', $match);
65+
}
66+
67+
// Empty alternation branches
68+
if (preg_match('/(|)/', $string, $match)) {
69+
assertType("array{string, ''}", $match);
70+
}
71+
72+
if (preg_match('/(foo|)/', $string, $match)) {
73+
assertType("array{string, ''|'foo'}", $match);
74+
}
75+
76+
if (preg_match('/(|bar)/', $string, $match)) {
77+
assertType("array{string, ''|'bar'}", $match);
78+
}
79+
80+
if (preg_match('/|/', $string, $match)) {
81+
assertType('array{string}', $match);
82+
}
83+
84+
if (preg_match('/foo|/', $string, $match)) {
85+
assertType('array{string}', $match);
86+
}
87+
}

tests/PHPStan/Analyser/nsrt/preg_match_shapes.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ function (string $size): void {
321321
if (preg_match('~\{(?:(include)\\s+(?:[$]?\\w+(?<!file))\\s)|(?:(include\\s+file)\\s+(?:[$]?\\w+)\\s)|(?:(include(?:Template|(?:\\s+file)))\\s+(?:\'?.*?\.latte\'?)\\s)~', $size, $matches) !== 1) {
322322
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
323323
}
324-
assertType("array{non-empty-string, '', '', non-falsy-string}|array{non-empty-string, '', non-falsy-string}|array{non-empty-string, 'include'}", $matches);
324+
assertType("array{non-falsy-string, '', '', non-falsy-string}|array{non-falsy-string, '', non-falsy-string}|array{non-falsy-string, 'include'}", $matches);
325325
};
326326

327327

@@ -519,7 +519,7 @@ function bug11323(string $s): void {
519519
assertType('array{non-falsy-string, non-empty-string, "\n", "\n"}', $matches);
520520
}
521521
if (preg_match('/foo(*:first)|bar(*:second)([x])/', $s, $matches)) {
522-
assertType("array{0: non-empty-string, 1?: 'x', MARK?: 'first'|'second'}", $matches);
522+
assertType("array{0: non-falsy-string, 1?: 'x', MARK?: 'first'|'second'}", $matches);
523523
}
524524
}
525525

@@ -762,13 +762,13 @@ function bug11604 (string $string): void {
762762
return;
763763
}
764764

765-
assertType("list{0: non-empty-string, 1?: ''|'XX', 2?: 'YY'}", $matches);
765+
assertType("list{0: non-falsy-string, 1?: ''|'XX', 2?: 'YY'}", $matches);
766766
// could be array{string, '', 'YY'}|array{string, 'XX'}|array{string}
767767
}
768768

769769
function bug11604b (string $string): void {
770770
if (preg_match('/(XX)|(YY)?(ZZ)/', $string, $matches)) {
771-
assertType("list{0: non-empty-string, 1?: ''|'XX', 2?: ''|'YY', 3?: 'ZZ'}", $matches);
771+
assertType("list{0: non-falsy-string, 1?: ''|'XX', 2?: ''|'YY', 3?: 'ZZ'}", $matches);
772772
}
773773
}
774774

@@ -890,7 +890,7 @@ function testEscapedDelimiter (string $string): void {
890890
if (preg_match(<<<'EOD'
891891
{(x\\\{)|(y\\\\\})}
892892
EOD, $string, $matches)) {
893-
assertType("array{non-empty-string, '', 'y\\\\\\\}'}|array{non-empty-string, 'x\\\{'}", $matches);
893+
assertType("array{non-falsy-string, '', 'y\\\\\\\}'}|array{non-falsy-string, 'x\\\{'}", $matches);
894894
}
895895
}
896896

0 commit comments

Comments
 (0)