Skip to content

Commit a5e4572

Browse files
committed
[RegExp] NFA matching: implement copy-free prefix-tracking tryMatch and simplify and fix allMatches.
1 parent 521495f commit a5e4572

3 files changed

Lines changed: 69 additions & 22 deletions

File tree

lib/src/regexp/nfa.dart

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,15 @@ class Nfa extends RegexpPattern {
1111
final NfaState end;
1212

1313
@override
14-
bool tryMatch(String input) {
14+
int tryMatch(String input, int start, int end) {
15+
var result = -1;
1516
var currentStates = <NfaState>{};
16-
_addStates(start, currentStates);
17-
for (final value in input.runes) {
17+
_addStates(this.start, currentStates);
18+
if (currentStates.any((state) => state.isEnd)) {
19+
result = start;
20+
}
21+
for (var i = start; i < end; i++) {
22+
final value = input.codeUnitAt(i);
1823
final nextStates = <NfaState>{};
1924
for (final state in currentStates) {
2025
final nextState = state.transitions[value];
@@ -25,10 +30,15 @@ class Nfa extends RegexpPattern {
2530
_addStates(nextState, nextStates);
2631
}
2732
}
28-
if (nextStates.isEmpty) return false;
33+
if (nextStates.isEmpty) {
34+
break;
35+
}
2936
currentStates = nextStates;
37+
if (currentStates.any((state) => state.isEnd)) {
38+
result = i + 1;
39+
}
3040
}
31-
return currentStates.any((state) => state.isEnd);
41+
return result;
3242
}
3343

3444
void _addStates(NfaState state, Set<NfaState> states) {
@@ -43,10 +53,7 @@ class NfaState {
4353
NfaState({required this.isEnd});
4454

4555
bool isEnd;
46-
4756
final Map<int, NfaState> transitions = {};
48-
4957
final List<NfaState> epsilons = [];
50-
5158
final List<NfaState> dots = [];
5259
}

lib/src/regexp/pattern.dart

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,33 @@
11
import 'package:meta/meta.dart';
22

33
abstract class RegexpPattern implements Pattern {
4-
// TODO: make it correctly match sub-strings
54
@override
6-
Match? matchAsPrefix(String input, [int start = 0]) =>
7-
tryMatch(input.substring(start))
8-
? RegexpMatch(this, input, start, input.length)
9-
: null;
5+
Iterable<Match> allMatches(String input, [int start = 0]) sync* {
6+
while (start <= input.length) {
7+
final match = matchAsPrefix(input, start);
8+
if (match == null) {
9+
start++;
10+
} else {
11+
yield match;
12+
start = match.start < match.end ? match.end : match.start + 1;
13+
}
14+
}
15+
}
1016

1117
@override
12-
Iterable<Match> allMatches(String input, [int start = 0]) sync* {
13-
for (var i = start; i < input.length; i++) {
14-
final match = matchAsPrefix(input, i);
15-
if (match != null) yield match;
18+
Match? matchAsPrefix(String input, [int start = 0]) {
19+
RangeError.checkValueInInterval(start, 0, input.length, 'start');
20+
final end = tryMatch(input, start, input.length);
21+
if (end >= start) {
22+
return RegexpMatch(this, input, start, end);
1623
}
24+
return null;
1725
}
1826

27+
/// Returns the end index (exclusive) of the longest prefix of [input] matched
28+
/// by this pattern, or `-1` if no prefix of [input] matches.
1929
@internal
20-
bool tryMatch(String input);
30+
int tryMatch(String input, int start, int end);
2131
}
2232

2333
class RegexpMatch implements Match {

test/regexp_test.dart

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ void main() {
124124
final pattern = Nfa.fromString(testData.pattern);
125125
for (final expectData in testData.expects) {
126126
expect(
127-
pattern.tryMatch(expectData.input),
127+
pattern.tryMatch(expectData.input, 0, expectData.input.length) ==
128+
expectData.input.length,
128129
expectData.match,
129130
reason:
130131
'"${testData.pattern}" '
@@ -153,11 +154,40 @@ void main() {
153154
expect(match[0], 'aaa');
154155
expect(match.groups([0, 1]), ['aaa', null]);
155156
});
157+
test('matchAsPrefix with non-matching input', () {
158+
final noMatch = pattern.matchAsPrefix('baaa');
159+
expect(noMatch, isNull);
160+
});
161+
test('matchAsPrefix with start index', () {
162+
final match = pattern.matchAsPrefix('baaa', 1)!;
163+
expect(match.pattern, pattern);
164+
expect(match.input, 'baaa');
165+
expect(match.start, 1);
166+
expect(match.end, 4);
167+
expect(match.group(0), 'aaa');
168+
});
156169
test('allMatches', () {
157-
expect(pattern.allMatches('aaa').map((each) => each[0]), [
158-
'aaa',
170+
expect(pattern.allMatches('').map((each) => each[0]), []);
171+
expect(pattern.allMatches('a').map((each) => each[0]), ['a']);
172+
expect(pattern.allMatches('aa').map((each) => each[0]), ['aa']);
173+
expect(pattern.allMatches('aaa').map((each) => each[0]), ['aaa']);
174+
expect(pattern.allMatches('baab').map((each) => each[0]), ['aa']);
175+
expect(pattern.allMatches('babaab').map((each) => each[0]), ['a', 'aa']);
176+
});
177+
test('allMatches with start index', () {
178+
expect(pattern.allMatches('babaab', 2).map((each) => each[0]), ['aa']);
179+
expect(pattern.allMatches('babaab', 3).map((each) => each[0]), ['aa']);
180+
expect(pattern.allMatches('babaab', 4).map((each) => each[0]), ['a']);
181+
});
182+
test('allMatches with zero-length matches', () {
183+
final starPattern = Node.fromString(r'a*').toNfa();
184+
expect(starPattern.allMatches('').map((each) => each[0]), ['']);
185+
expect(starPattern.allMatches('b').map((each) => each[0]), ['', '']);
186+
expect(starPattern.allMatches('baab').map((each) => each[0]), [
187+
'',
159188
'aa',
160-
'a',
189+
'',
190+
'',
161191
]);
162192
});
163193
});

0 commit comments

Comments
 (0)