|
28 | 28 | NODE = list[int] |
29 | 29 | TRIE = list[list[int]] |
30 | 30 |
|
31 | | - |
| 31 | +# The integer in a trie nodes are encoded as follows: |
| 32 | +# 0..len(trie) - 1: index of the next node in the |
| 33 | +# TK_KIND_OFFSET - TK_KIND_OFFSET_FOR_LOOK_AHEAD: the token kind is not longer part of the kw |
| 34 | +# TK_KIND_OFFSET_FOR_LOOK_AHEAD -: the token kind if the current node is a terminal |
32 | 35 | TK_KIND_OFFSET = 1000 |
33 | 36 | TK_KIND_OFFSET_FOR_LOOK_AHEAD = 2000 |
34 | 37 |
|
@@ -121,19 +124,21 @@ def rewrite(t: NODE): |
121 | 124 | return out |
122 | 125 |
|
123 | 126 |
|
124 | | -def FindInTrie(trie: TRIE, s: str) -> tuple[int, int]: |
| 127 | +def FindInTrie(trie: TRIE, s: str) -> tuple[int, TK_KIND]: |
| 128 | + """ Returns (size, TK_KIND) if s[:size] is a token in the trie and 0, 0 otherwise""" |
125 | 129 | node = trie[0] |
126 | 130 | for n, cc in enumerate(s): |
127 | 131 | x = node[ord(cc)] |
128 | 132 | if x == NODE_NULL: |
129 | | - return 0, 0 |
| 133 | + return 0, TK_KIND.INVALID |
130 | 134 | if x >= len(trie): |
131 | 135 | if x >= TK_KIND_OFFSET_FOR_LOOK_AHEAD: |
| 136 | + # we found a char that is not part of the kw |
132 | 137 | return n, TK_KIND(x - TK_KIND_OFFSET_FOR_LOOK_AHEAD) |
133 | 138 | else: |
134 | 139 | return n + 1, TK_KIND(x - TK_KIND_OFFSET) |
135 | 140 | node = trie[x] |
136 | | - return 0, 0 |
| 141 | + return 0, TK_KIND.INVALID |
137 | 142 |
|
138 | 143 |
|
139 | 144 | def VerifyTrie(trie: TRIE, KWs): |
@@ -259,7 +264,7 @@ def add_kw_simple(kw, tag): |
259 | 264 | def add_kw(kw, tag, non_succ): |
260 | 265 | # keyword is only valid if not followed by char in non_succ |
261 | 266 | # E.g. |
262 | | - # if is a keyword but ifoo is not |
| 267 | + # `if`` is a keyword but `ifoo`` is not |
263 | 268 | # simarly |
264 | 269 | # >> is an operator(-keyword) for most subsequent chars |
265 | 270 | # except >>> and >>= |
@@ -307,6 +312,8 @@ def add_kw(kw, tag, non_succ): |
307 | 312 |
|
308 | 313 | def MakeTrieNoisy(): |
309 | 314 | KWs = GetAllKWAndOps() |
| 315 | + for k, v in sorted(KWs): |
| 316 | + print(k, v) |
310 | 317 | trie = MakeInitialTrie(KWs) |
311 | 318 | # |
312 | 319 | print("Stats") |
@@ -650,6 +657,13 @@ def MakePerfectHashForBinOp(): |
650 | 657 |
|
651 | 658 | if __name__ == "__main__": |
652 | 659 | if len(sys.argv) == 1: |
| 660 | + inp = Lexer(LexerRaw("stdin", sys.stdin)) |
| 661 | + while True: |
| 662 | + tk = inp.next() |
| 663 | + if tk.kind == TK_KIND.SPECIAL_EOF: |
| 664 | + break |
| 665 | + print(tk) |
| 666 | + elif sys.argv[1] == "trie_stats": |
653 | 667 | MakeTrieNoisy() |
654 | 668 | elif sys.argv[1] == "gen_cc": |
655 | 669 | cgen.ReplaceContent(GenerateCodeCC, sys.stdin, sys.stdout) |
|
0 commit comments