-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexer.py
More file actions
51 lines (48 loc) · 1.18 KB
/
lexer.py
File metadata and controls
51 lines (48 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
TOKEN_REGEX = [
('NUMBER', r'\d+(\.\d+)?'),
('STRING', r'\".*?\"'),
('SAY', r'say|yell'),
('PLANT', r'plant|keep'),
('IF', r'check'),
('ELSE', r'flip'),
('HOP', r'hop'),
('MAGIC', r'magic'),
('CAST', r'cast'),
('RETURN', r'return'),
('END', r'end'),
('PLUS', r'\+'),
('MINUS', r'-'),
('MULT', r'\*'),
('DIV', r'/'),
('MOD', r'%'),
('EQ', r'=='),
('NE', r'!='),
('GT', r'>'),
('LT', r'<'),
('GE', r'>='),
('LE', r'<='),
('EQUALS', r'='),
('LPAREN', r'\('),
('RPAREN', r'\)'),
('COLON', r':'),
('NAME', r'[a-zA-Z_][a-zA-Z0-9_]*'),
('NEWLINE', r'\n'),
('SKIP', r'[ \t]+'),
]
def tokenize(code):
tokens = []
while code:
match = None
for tok_type, regex in TOKEN_REGEX:
pattern = re.compile(regex)
match = pattern.match(code)
if match:
value = match.group(0)
if tok_type != 'SKIP':
tokens.append((tok_type, value))
code = code[len(value):]
break
if not match:
raise SyntaxError(f'Unknown token: {code}')
return tokens