Skip to content

Commit aa64008

Browse files
committed
[RegExp] Support ^ and $ anchors in RegExp NFA
1 parent 6899779 commit aa64008

4 files changed

Lines changed: 119 additions & 35 deletions

File tree

lib/src/regexp/nfa.dart

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class Nfa extends RegexpPattern {
1515
var result = -1;
1616
var currentStates = <NfaState>{};
1717
var nextStates = <NfaState>{};
18-
_addStates(this.start, currentStates);
18+
_addStates(this.start, currentStates, start, end);
1919
if (currentStates.any((state) => state.isEnd)) {
2020
result = start;
2121
}
@@ -25,10 +25,10 @@ class Nfa extends RegexpPattern {
2525
for (final state in currentStates) {
2626
final nextState = state.transitions[value];
2727
if (nextState != null) {
28-
_addStates(nextState, nextStates);
28+
_addStates(nextState, nextStates, i + 1, end);
2929
}
3030
for (final nextState in state.dots) {
31-
_addStates(nextState, nextStates);
31+
_addStates(nextState, nextStates, i + 1, end);
3232
}
3333
}
3434
if (nextStates.isEmpty) {
@@ -42,10 +42,20 @@ class Nfa extends RegexpPattern {
4242
return result;
4343
}
4444

45-
void _addStates(NfaState state, Set<NfaState> states) {
45+
void _addStates(NfaState state, Set<NfaState> states, int index, int end) {
4646
if (!states.add(state)) return;
4747
for (final other in state.epsilons) {
48-
_addStates(other, states);
48+
_addStates(other, states, index, end);
49+
}
50+
if (index == 0) {
51+
for (final other in state.startAnchors) {
52+
_addStates(other, states, index, end);
53+
}
54+
}
55+
if (index == end) {
56+
for (final other in state.endAnchors) {
57+
_addStates(other, states, index, end);
58+
}
4959
}
5060
}
5161
}
@@ -57,4 +67,6 @@ class NfaState {
5767
final Map<int, NfaState> transitions = {};
5868
final List<NfaState> epsilons = [];
5969
final List<NfaState> dots = [];
70+
final List<NfaState> startAnchors = [];
71+
final List<NfaState> endAnchors = [];
6072
}

lib/src/regexp/node.dart

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,41 @@ class ComplementNode extends Node {
237237
@override
238238
int get hashCode => Object.hash(runtimeType, child);
239239
}
240+
241+
class StartAnchorNode extends Node {
242+
@override
243+
Nfa toNfa() {
244+
final start = NfaState(isEnd: false);
245+
final end = NfaState(isEnd: true);
246+
start.startAnchors.add(end);
247+
return Nfa(start: start, end: end);
248+
}
249+
250+
@override
251+
String toString() => 'StartAnchorNode()';
252+
253+
@override
254+
bool operator ==(Object other) => other is StartAnchorNode;
255+
256+
@override
257+
int get hashCode => runtimeType.hashCode;
258+
}
259+
260+
class EndAnchorNode extends Node {
261+
@override
262+
Nfa toNfa() {
263+
final start = NfaState(isEnd: false);
264+
final end = NfaState(isEnd: true);
265+
start.endAnchors.add(end);
266+
return Nfa(start: start, end: end);
267+
}
268+
269+
@override
270+
String toString() => 'EndAnchorNode()';
271+
272+
@override
273+
bool operator ==(Object other) => other is EndAnchorNode;
274+
275+
@override
276+
int get hashCode => runtimeType.hashCode;
277+
}

lib/src/regexp/parser.dart

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@ import 'node.dart';
77
final nodeParser = () {
88
final builder = ExpressionBuilder<Node>();
99

10-
const meta = r'\.()!*+?|&';
10+
const meta = r'\.()!*+?|&^$';
1111
builder
1212
..primitive(noneOf(meta).map(LiteralNode.new))
1313
..primitive(anyOf(meta).skip(before: char(r'\')).map(LiteralNode.new))
14-
..primitive(char('.').map((_) => DotNode()));
14+
..primitive(char('.').map((_) => DotNode()))
15+
..primitive(char('^').map((_) => StartAnchorNode()))
16+
..primitive(char(r'$').map((_) => EndAnchorNode()));
1517

1618
builder.group().wrapper(char('('), char(')'), (_, value, _) => value);
1719

test/regexp_test.dart

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ void main() {
3030
expectedEqual(Node.fromString(r'(a)'), la);
3131
expectedEqual(Node.fromString(r'((a))'), la);
3232
});
33+
test('anchors', () {
34+
expectedEqual(Node.fromString(r'^'), StartAnchorNode());
35+
expectedEqual(Node.fromString(r'(^)'), StartAnchorNode());
36+
expectedEqual(Node.fromString(r'$'), EndAnchorNode());
37+
expectedEqual(Node.fromString(r'($)'), EndAnchorNode());
38+
});
3339
test('escape', () {
3440
expectedEqual(Node.fromString(r'\\'), LiteralNode('\\'));
3541
expectedEqual(Node.fromString(r'\.'), LiteralNode('.'));
@@ -41,6 +47,8 @@ void main() {
4147
expectedEqual(Node.fromString(r'\*'), LiteralNode('*'));
4248
expectedEqual(Node.fromString(r'\|'), LiteralNode('|'));
4349
expectedEqual(Node.fromString(r'\&'), LiteralNode('&'));
50+
expectedEqual(Node.fromString(r'\^'), LiteralNode('^'));
51+
expectedEqual(Node.fromString(r'\$'), LiteralNode('\$'));
4452
});
4553
test('concatenation', () {
4654
expectedEqual(Node.fromString(r'ab'), ConcatenationNode(la, lb));
@@ -189,6 +197,19 @@ void main() {
189197
'',
190198
]);
191199
});
200+
test('anchors matchAsPrefix', () {
201+
final startAnchor = Node.fromString(r'^a').toNfa();
202+
expect(startAnchor.allMatches('a').map((each) => each[0]), ['a']);
203+
expect(startAnchor.allMatches('ba').map((each) => each[0]), []);
204+
expect(startAnchor.allMatches('ab').map((each) => each[0]), ['a']);
205+
});
206+
test('anchors allMatches', () {
207+
final endAnchor = Node.fromString(r'a$').toNfa();
208+
expect(endAnchor.allMatches('a').map((each) => each[0]), ['a']);
209+
expect(endAnchor.allMatches('ba').map((each) => each[0]), ['a']);
210+
expect(endAnchor.allMatches('ab').map((each) => each[0]), []);
211+
expect(endAnchor.allMatches('babaa').map((each) => each[0]), ['a']);
212+
});
192213
});
193214
test('linter', () {
194215
expect(linter(nodeParser), isEmpty);
@@ -210,6 +231,7 @@ class Expect {
210231
}
211232

212233
const tests = [
234+
Test(r'^$', [Expect('', true), Expect('a', false)]),
213235
// Basics
214236
Test(r'', [Expect('', true), Expect('a', false), Expect('ab', false)]),
215237
Test(r'.', [
@@ -252,6 +274,44 @@ const tests = [
252274
Expect('aba', false),
253275
Expect('b', false),
254276
]),
277+
// Arbitrary ranges
278+
Test(r'a{3}', [
279+
Expect('', false),
280+
Expect('a', false),
281+
Expect('aa', false),
282+
Expect('aaa', true),
283+
Expect('aaaa', false),
284+
]),
285+
Test(r'a{2,}', [
286+
Expect('', false),
287+
Expect('a', false),
288+
Expect('aa', true),
289+
Expect('aaa', true),
290+
Expect('aaaa', true),
291+
]),
292+
Test(r'a{1,3}', [
293+
Expect('', false),
294+
Expect('a', true),
295+
Expect('aa', true),
296+
Expect('aaa', true),
297+
Expect('aaaa', false),
298+
]),
299+
Test(r'a{,2}', [
300+
Expect('', true),
301+
Expect('a', true),
302+
Expect('aa', true),
303+
Expect('aaa', false),
304+
]),
305+
// Anchors
306+
Test(r'^a', [Expect('a', true), Expect('ab', false), Expect('ba', false)]),
307+
Test(r'a$', [Expect('a', true), Expect('ba', false), Expect('ab', false)]),
308+
Test(r'^a$', [
309+
Expect('a', true),
310+
Expect('ab', false),
311+
Expect('ba', false),
312+
Expect('bab', false),
313+
Expect('aa', false),
314+
]),
255315
// https://regex-generate.github.io/regenerate/
256316
Test(r'(b(ab*a)*b|a)*', [
257317
Expect('a', true),
@@ -376,34 +436,6 @@ const tests = [
376436
Expect('aababab', false),
377437
Expect('aabbbbb', false),
378438
]),
379-
// Arbitrary ranges
380-
Test(r'a{3}', [
381-
Expect('', false),
382-
Expect('a', false),
383-
Expect('aa', false),
384-
Expect('aaa', true),
385-
Expect('aaaa', false),
386-
]),
387-
Test(r'a{2,}', [
388-
Expect('', false),
389-
Expect('a', false),
390-
Expect('aa', true),
391-
Expect('aaa', true),
392-
Expect('aaaa', true),
393-
]),
394-
Test(r'a{1,3}', [
395-
Expect('', false),
396-
Expect('a', true),
397-
Expect('aa', true),
398-
Expect('aaa', true),
399-
Expect('aaaa', false),
400-
]),
401-
Test(r'a{,2}', [
402-
Expect('', true),
403-
Expect('a', true),
404-
Expect('aa', true),
405-
Expect('aaa', false),
406-
]),
407439
// https://github.com/xysun/regex/blob/master/testing.py
408440
Test(r'(ab|a)(bc|c)', [Expect('abc', true), Expect('acb', false)]),
409441
Test(r'(ab)c|abc', [Expect('abc', true), Expect('ab', false)]),

0 commit comments

Comments
 (0)