Skip to content

Commit 7130cc6

Browse files
fix: πŸ› Fix parse issue with function types in C++ (#78)
1 parent b08123c commit 7130cc6

File tree

19 files changed

+477
-306
lines changed

19 files changed

+477
-306
lines changed

β€Ž.github/workflows/main.ymlβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
- name: Checkout sources
1414
uses: actions/checkout@v4
1515

16-
- name: Install uv
16+
- name: Set up uv
1717
uses: astral-sh/setup-uv@v5
1818

1919
- name: Install dependencies

β€Žcodelimit/common/TokenRange.pyβ€Ž

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ def __str__(self):
1414
def __repr__(self):
1515
return self.__str__()
1616

17-
def token_string(self, tokens: list[Token]):
18-
return " ".join([t.value for t in tokens[self.start:self.end]])
19-
2017
def lt(self, other: TokenRange):
2118
return self.start < other.start
2219

β€Žcodelimit/common/gsm/Pattern.pyβ€Ž

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
from copy import deepcopy
22

3+
from codelimit.common.TokenRange import TokenRange
34
from codelimit.common.gsm.automata.DFA import DFA
45
from codelimit.common.gsm.automata.State import State
56
from codelimit.common.gsm.predicate.Predicate import Predicate
7+
from codelimit.common.token_matching.predicate.Balanced import Balanced
68

79

8-
class Pattern:
9-
def __init__(self, start: int, automata: DFA):
10-
self.start = start
11-
self.end = start
10+
class Pattern(TokenRange):
11+
def __init__(self, automata: DFA, start: int = 0):
12+
super().__init__(start, start)
1213
self.automata = automata
1314
self.state = automata.start
1415
self.tokens: list = []
@@ -30,7 +31,13 @@ def consume(self, item) -> State | None:
3031
return self.state if found_transition else None
3132

3233
def is_accepting(self):
34+
for p in self.predicate_map.values():
35+
if isinstance(p, Balanced) and not p.depth == 0:
36+
return False
3337
return self.automata.is_accepting(self.state)
3438

3539
def token_string(self):
3640
return " ".join([t.value for t in self.tokens])
41+
42+
def __str__(self):
43+
return f'Pattern(start={self.start}, end={self.end}, tokens=[{self.token_string()}])'

β€Žcodelimit/common/gsm/matcher.pyβ€Ž

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99
)
1010
from codelimit.common.gsm.Pattern import Pattern
1111
from codelimit.common.gsm.operator.Operator import Operator
12-
from codelimit.common.gsm.utils import render_automata
12+
from codelimit.common.gsm.utils import render_automata, prune_nested
1313

1414
T = TypeVar("T")
1515

1616

1717
def match(expression: Expression, sequence: list) -> Pattern | None:
1818
nfa = expression_to_nfa(expression)
1919
dfa = nfa_to_dfa(nfa)
20-
pattern = Pattern(0, dfa)
20+
pattern = Pattern(dfa)
2121
for item in sequence:
2222
next_state = pattern.consume(item)
2323
if not next_state:
@@ -32,7 +32,7 @@ def match(expression: Expression, sequence: list) -> Pattern | None:
3232
def starts_with(expression: Expression, sequence: list) -> Pattern | None:
3333
nfa = expression_to_nfa(expression)
3434
dfa = nfa_to_dfa(nfa)
35-
pattern = Pattern(0, dfa)
35+
pattern = Pattern(dfa)
3636
for item in sequence:
3737
next_state = pattern.consume(item)
3838
if not next_state:
@@ -50,31 +50,29 @@ class FindState:
5050
next_state_patterns: list[Pattern]
5151

5252

53-
def find_all(expression: Expression, sequence: list) -> list[Pattern]:
53+
def find_all(expression: Expression, sequence: list, nested: bool = False) -> list[Pattern]:
5454
dfa = nfa_to_dfa(expression_to_nfa(expression))
5555
fs = FindState([], [], [])
5656
for idx, item in enumerate(sequence):
57-
fs.active_patterns.append(Pattern(idx, dfa))
57+
fs.active_patterns.append(Pattern(dfa, idx))
5858
fs.next_state_patterns = []
5959
for pattern in fs.active_patterns:
60-
if fs.matches and pattern.start < fs.matches[-1].end:
61-
continue
62-
if len(pattern.state.transition) == 0 and pattern.is_accepting():
63-
pattern.end = idx
64-
fs.matches.append(pattern)
65-
continue
6660
if pattern.consume(item):
6761
fs.next_state_patterns.append(pattern)
68-
else:
69-
if pattern.is_accepting():
70-
pattern.end = idx
62+
elif pattern.is_accepting():
63+
pattern.end = idx
64+
if not fs.matches or fs.matches[-1].end < pattern.end:
7165
fs.matches.append(pattern)
7266
fs.active_patterns = fs.next_state_patterns
7367
for pattern in fs.active_patterns:
7468
if pattern.is_accepting():
7569
pattern.end = len(sequence)
76-
fs.matches.append(pattern)
77-
return fs.matches
70+
if not fs.matches or fs.matches[-1].end < pattern.end:
71+
fs.matches.append(pattern)
72+
if nested:
73+
return fs.matches
74+
else:
75+
return prune_nested(fs.matches)
7876

7977

8078
def nfa_match(expression: Expression, sequence: list):

β€Žcodelimit/common/gsm/utils.pyβ€Ž

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import subprocess
22
import tempfile
3+
from typing import TypeVar
34

5+
from codelimit.common.TokenRange import TokenRange
46
from codelimit.common.gsm.automata.Automata import Automata
57
from codelimit.common.gsm.automata.State import State
68

@@ -58,3 +60,20 @@ def to_dot(automata: Automata):
5860
result += state_transitions_to_dot(automata, automata.start)
5961
result += "}"
6062
return result
63+
64+
65+
T = TypeVar("T", bound=TokenRange)
66+
67+
68+
def prune_nested(ranges: list[T]) -> list[T]:
69+
sorted_ranges = sorted(ranges, key=lambda x: (x.start, -(x.end - x.start)))
70+
result: list[T] = []
71+
for r in sorted_ranges:
72+
if not result:
73+
result.append(r)
74+
else:
75+
last = result[-1]
76+
if last.start <= r.start and last.end >= r.end:
77+
continue
78+
result.append(r)
79+
return result

β€Žcodelimit/common/scope/scope_utils.pyβ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,10 @@ def has_curly_suffix(tokens: list[Token], index):
120120

121121

122122
def get_headers(
123-
tokens: list[Token], expression: Expression, followed_by: Expression = None
123+
tokens: list[Token], expression: Expression, followed_by: Expression = None, nested: bool = False
124124
) -> list[Header]:
125125
# expression = replace_string_literal_with_predicate(expression)
126-
patterns = find_all(expression, tokens)
126+
patterns = find_all(expression, tokens, nested=nested)
127127
if followed_by:
128128
patterns = [p for p in patterns if starts_with(followed_by, tokens[p.end:])]
129129
result = []

β€Žcodelimit/common/token_utils.pyβ€Ž

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,7 @@ def sort_tokens(tokens: list[Token]) -> list[Token]:
3939
result = sorted(tokens, key=lambda t: t.location.column)
4040
result = sorted(result, key=lambda t: t.location.line)
4141
return result
42+
43+
44+
def token_string(tokens: list[Token], token_range: TokenRange) -> str:
45+
return " ".join([t.value for t in tokens[token_range.start:token_range.end]])

β€Žcodelimit/common/utils.pyβ€Ž

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,6 @@ def _get_git_branch(path: Path) -> str | None:
214214
return ref
215215
try:
216216
out = sh.git('-c', f'safe.directory={path.resolve()}', 'rev-parse', '--abbrev-ref', 'HEAD', _cwd=path)
217-
print(out)
218217
return out.strip()
219218
except (sh.ErrorReturnCode, sh.CommandNotFound):
220219
return None

β€Žcodelimit/languages/Java.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def extract_headers(self, tokens: list) -> list:
3030
[Keyword('throws'), ZeroOrMore(And(Not(';'), Not('{'))), Symbol("{")]
3131
)
3232
]
33-
)
33+
, nested=True)
3434
return filter_headers(headers, tokens)
3535

3636
def extract_blocks(self, tokens: list, headers: list) -> list:

β€Žcodelimit/languages/JavaScript.pyβ€Ž

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def extract_headers(self, tokens: list[Token]) -> list[Header]:
2323
functions = get_headers(
2424
tokens,
2525
[Optional(Keyword("function")), Name(), OneOrMore(Balanced("(", ")"))],
26-
Symbol("{"),
26+
Symbol("{"), nested=True
2727
)
2828
arrow_functions = get_headers(
2929
tokens,
@@ -35,11 +35,11 @@ def extract_headers(self, tokens: list[Token]) -> list[Header]:
3535
OneOrMore(Balanced("(", ")")),
3636
Symbol("=>"),
3737
],
38-
Symbol("{"),
38+
Symbol("{"), nested=True,
3939
)
4040
return functions + arrow_functions
4141

4242
def extract_blocks(
43-
self, tokens: list[Token], headers: list[Header]
43+
self, tokens: list[Token], headers: list[Header]
4444
) -> list[TokenRange]:
4545
return get_blocks(tokens, "{", "}")

0 commit comments

Comments
Β (0)