Syntax-Engine-CFG-PARSER-ANALYZER/my_parser.py at main · DioBey7/Syntax-Engine-CFG-PARSER-ANALYZER · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import my_reader

class Parser:
    def __init__(self, grammar_file, sentences_file):
        self.reader = my_reader.Reader(grammar_file, sentences_file)
        self.grammar = self.reader.read_bnf_grammar()
        self.sentences = self.reader.read_sentences()
        self.current_index = 0
        self.tokens = []
        self.max_idx = 0
        self.expected_at_max = []

    def detect_left_recursion(self):
        dependencies = {}
        for lhs, productions in self.grammar.items():
            first_symbols = set()
            for prod in productions:
                if prod and prod[0] in self.grammar:
                    first_symbols.add(prod[0])
            dependencies[lhs] = first_symbols

        cycles = []
        visited = {}
        path = []

        def dfs(node):
            if node in path:
                start = path.index(node)
                cycles.append(path[start:] + [node])
                return
            if visited.get(node): return
            path.append(node)
            for neighbor in dependencies.get(node, []):
                dfs(neighbor)
            path.pop()
            visited[node] = True

        for nt in self.grammar:
            dfs(nt)
        return cycles

    def parsing_rules(self, symbol):
        if symbol == "ε":
            return "ε"

        if symbol not in self.grammar:
            if self.current_index < len(self.tokens) and self.tokens[self.current_index] == symbol:
                val = self.tokens[self.current_index]
                self.current_index += 1
                return val

            if self.current_index >= self.max_idx:
                if self.current_index > self.max_idx:
                    self.max_idx = self.current_index
                    self.expected_at_max = []
                if symbol not in self.expected_at_max:
                    self.expected_at_max.append(symbol)
            return None

        start_pos = self.current_index
        sorted_productions = sorted(self.grammar[symbol], key=len, reverse=True)

        for production in sorted_productions:
            self.current_index = start_pos
            children = {}
            match_all = True

            for part in production:
                res = self.parsing_rules(part)
                if res is not None:
                    children[part] = res
                else:
                    match_all = False
                    break

            if match_all:
                return children
        return None

    def error_check(self, sentence, start_symbol):
        token_label = sentence[self.max_idx] if self.max_idx < len(sentence) else "EOF"
        expected_str = ' or '.join([f'"{e}"' for e in self.expected_at_max])
        why_msg = ""
        if self.max_idx == 0:
            why_msg = f"the sentence begins with '{token_label}', but grammar requires {expected_str} to start"
        else:
            prev_token = sentence[self.max_idx - 1]
            why_msg = f"after '{prev_token}', the grammar requires {expected_str} to continue the sequence, but found '{token_label}'"

        print("Invalid\n")
        print("Error:")
        print(f"•Where the error occurs: at token {self.max_idx + 1} (\"{token_label}\")")
        print(f"•What was expected: {expected_str}")
        print(f"•Why the sentence is invalid: {why_msg}")