|
1 | | -use super::Value; |
| 1 | +use crate::utils::{Token, Value}; |
2 | 2 | use std::collections::HashMap; |
3 | 3 |
|
4 | | -// ========================= Tokenizer ========================= \\ |
5 | | - |
6 | | -#[derive(Clone, PartialEq)] |
7 | | -pub enum Token { |
8 | | - Newline, |
9 | | - EOF, |
10 | | - Identifier(String), |
11 | | - Value(Value), |
12 | | - Operator(String), |
13 | | - Keyword(String), |
14 | | - Symbol(String), |
15 | | -} |
16 | | - |
17 | | -impl Token { |
18 | | - pub fn to_string(&self) -> String { |
19 | | - match self { |
20 | | - Token::Newline => "\n".to_string(), |
21 | | - Token::EOF => "EOF".to_string(), |
22 | | - Token::Identifier(s) => format!("ID[{}]", s), |
23 | | - Token::Value(v) => format!("VAL[{}]", v.to_string()), |
24 | | - Token::Operator(s) => format!("OP[{}]", s), |
25 | | - Token::Keyword(s) => format!("KEY[{}]", s), |
26 | | - Token::Symbol(s) => format!("SYM[{}]", s), |
27 | | - } |
28 | | - } |
29 | | -} |
30 | | - |
31 | | -impl std::fmt::Debug for Token { |
32 | | - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
33 | | - write!(f, "{}", self.to_string()) |
34 | | - } |
35 | | -} |
36 | | - |
37 | | -pub struct Tokenizer { |
38 | | - code: String, |
39 | | - pointer: usize, |
40 | | -} |
41 | | - |
42 | | -impl Tokenizer { |
43 | | - pub fn new(code: String) -> Self { |
44 | | - Self { code, pointer: 0 } |
45 | | - } |
46 | | - |
47 | | - fn skip_whitespace(&mut self) { |
48 | | - while self.pointer < self.code.len() |
49 | | - && self.code[self.pointer..].starts_with(char::is_whitespace) |
50 | | - { |
51 | | - self.pointer += 1; |
52 | | - } |
53 | | - } |
54 | | - |
55 | | - fn get_number(&mut self) -> String { |
56 | | - let start = self.pointer; |
57 | | - while self.pointer < self.code.len() |
58 | | - && self.code[self.pointer..].starts_with(char::is_numeric) |
59 | | - | self.code[self.pointer..].starts_with('.') |
60 | | - { |
61 | | - self.pointer += 1; |
62 | | - } |
63 | | - self.code[start..self.pointer].to_string() |
64 | | - } |
65 | | - |
66 | | - fn get_identifier(&mut self) -> String { |
67 | | - let start = self.pointer; |
68 | | - while self.pointer < self.code.len() |
69 | | - && (self.code[self.pointer..].starts_with(char::is_alphanumeric) |
70 | | - || self.code[self.pointer..].starts_with('_')) |
71 | | - { |
72 | | - self.pointer += 1; |
73 | | - } |
74 | | - self.code[start..self.pointer].to_string() |
75 | | - } |
76 | | - |
77 | | - fn tokenize(&mut self) -> Option<Token> { |
78 | | - let keyword_list = vec![ |
79 | | - "nop", |
80 | | - "match", |
81 | | - "if", |
82 | | - "else", |
83 | | - "while", |
84 | | - "for", |
85 | | - "in", |
86 | | - "global", |
87 | | - "assert", |
88 | | - "setup", |
89 | | - "update", |
90 | | - "clone_setup", |
91 | | - "clone_update", |
92 | | - "when", |
93 | | - "fn", |
94 | | - "import", |
95 | | - ]; |
96 | | - |
97 | | - if self.pointer >= self.code.len() { |
98 | | - return None; |
99 | | - } |
100 | | - |
101 | | - // Newline |
102 | | - if self.code[self.pointer..].starts_with('\n') { |
103 | | - self.pointer += 1; |
104 | | - return Some(Token::Newline); |
105 | | - } |
106 | | - |
107 | | - self.skip_whitespace(); |
108 | | - |
109 | | - if self.pointer >= self.code.len() { |
110 | | - return None; |
111 | | - } |
112 | | - |
113 | | - let c = &self.code[self.pointer..]; |
114 | | - |
115 | | - // Comments |
116 | | - if c.starts_with("//") || c.starts_with("#") { |
117 | | - while self.pointer < self.code.len() && !self.code[self.pointer..].starts_with('\n') { |
118 | | - self.pointer += 1; |
119 | | - } |
120 | | - return Some(Token::Newline); |
121 | | - } |
122 | | - |
123 | | - if c.starts_with("/*") { |
124 | | - self.pointer += 2; |
125 | | - while self.pointer < self.code.len() && !self.code[self.pointer..].starts_with("*/") { |
126 | | - self.pointer += 1; |
127 | | - } |
128 | | - if self.pointer < self.code.len() { |
129 | | - self.pointer += 2; // Skip */ |
130 | | - } |
131 | | - return Some(Token::Newline); |
132 | | - } |
133 | | - |
134 | | - // Null |
135 | | - if c.starts_with("null") { |
136 | | - self.pointer += 4; |
137 | | - return Some(Token::Value(Value::Null)); |
138 | | - } |
139 | | - |
140 | | - // Booleans |
141 | | - if c.starts_with("true") { |
142 | | - self.pointer += 4; |
143 | | - return Some(Token::Value(Value::Boolean(true))); |
144 | | - } |
145 | | - |
146 | | - if c.starts_with("false") { |
147 | | - self.pointer += 5; |
148 | | - return Some(Token::Value(Value::Boolean(false))); |
149 | | - } |
150 | | - |
151 | | - // Strings |
152 | | - if c.starts_with('"') { |
153 | | - self.pointer += 1; |
154 | | - let start = self.pointer; |
155 | | - while self.pointer < self.code.len() && !self.code[self.pointer..].starts_with('"') { |
156 | | - self.pointer += 1; |
157 | | - } |
158 | | - let string = &self.code[start..self.pointer] |
159 | | - .replace("\\\"", "\"") // Unescape quotes |
160 | | - .replace("\\n", "\n") // Unescape newlines |
161 | | - .replace("\\t", "\t"); // Unescape tabs |
162 | | - self.pointer += 1; // Skip closing " |
163 | | - return Some(Token::Value(Value::String(string.to_string()))); |
164 | | - } |
165 | | - |
166 | | - // Multi-char operators first |
167 | | - let two = &self.code[self.pointer..self.pointer + 2.min(self.code.len() - self.pointer)]; |
168 | | - if [ |
169 | | - "+=", "-=", "*=", "/=", "==", "!=", "<=", ">=", "&&", "||", "..", "**", "<<", ">>", |
170 | | - "++", "--", |
171 | | - ] |
172 | | - .contains(&two) |
173 | | - { |
174 | | - self.pointer += 2; |
175 | | - return Some(Token::Operator(two.to_string())); |
176 | | - } |
177 | | - |
178 | | - // Single-char operators |
179 | | - let one = &self.code[self.pointer..self.pointer + 1]; |
180 | | - if ["=", "+", "-", "*", "/", "%", "^", "&", "|", "<", ">", "!"].contains(&one) { |
181 | | - self.pointer += 1; |
182 | | - return Some(Token::Operator(one.to_string())); |
183 | | - } |
184 | | - |
185 | | - // Symbols |
186 | | - if ["(", ")", "[", "]", "{", "}", ",", ":", "."].contains(&one) { |
187 | | - self.pointer += 1; |
188 | | - return Some(Token::Symbol(one.to_string())); |
189 | | - } |
190 | | - |
191 | | - // Numbers |
192 | | - if c.starts_with("0x") || c.starts_with("0b") || c.starts_with("0o") { |
193 | | - // Hex, binary, or octal numbers |
194 | | - let base = if c.starts_with("0x") { |
195 | | - 16 |
196 | | - } else if c.starts_with("0b") { |
197 | | - 2 |
198 | | - } else { |
199 | | - 8 |
200 | | - }; |
201 | | - self.pointer += 2; // Skip 0x, 0b, or 0o |
202 | | - let start = self.pointer; |
203 | | - while self.pointer < self.code.len() |
204 | | - && self.code[self.pointer..].starts_with(char::is_alphanumeric) |
205 | | - { |
206 | | - self.pointer += 1; |
207 | | - } |
208 | | - let number = &self.code[start..self.pointer]; |
209 | | - return Some(Token::Value(Value::Number( |
210 | | - i64::from_str_radix(number, base).unwrap() as f32, |
211 | | - ))); |
212 | | - } |
213 | | - |
214 | | - if c.chars().next().unwrap().is_ascii_digit() { |
215 | | - let number = self.get_number(); |
216 | | - return Some(Token::Value(Value::Number(number.parse().unwrap()))); |
217 | | - } |
218 | | - |
219 | | - if c.chars().next().unwrap() == '.' { |
220 | | - self.pointer += 1; |
221 | | - let number = self.get_number(); |
222 | | - return Some(Token::Value(Value::Number( |
223 | | - format!(".{}", number).parse().unwrap(), |
224 | | - ))); |
225 | | - } |
226 | | - |
227 | | - // Identifiers or keywords |
228 | | - if c.chars().next().unwrap().is_alphabetic() || c.starts_with('_') { |
229 | | - let ident = self.get_identifier(); |
230 | | - if keyword_list.contains(&ident.as_str()) { |
231 | | - return Some(Token::Keyword(ident)); |
232 | | - } else { |
233 | | - return Some(Token::Identifier(ident)); |
234 | | - } |
235 | | - } |
236 | | - |
237 | | - // Skip unknown character |
238 | | - self.pointer += 1; |
239 | | - self.tokenize() |
240 | | - } |
241 | | - |
242 | | - pub fn tokenize_full(&mut self) -> Vec<Token> { |
243 | | - let mut tokens = vec![]; |
244 | | - while let Some(token) = self.tokenize() { |
245 | | - tokens.push(token); |
246 | | - } |
247 | | - tokens.push(Token::EOF); |
248 | | - tokens |
249 | | - } |
250 | | -} |
251 | | - |
252 | | -// ========================= Parser ========================= \\ |
253 | | - |
254 | | -#[derive(Clone, PartialEq)] |
| 4 | +#[derive(Debug, Clone, PartialEq)] |
255 | 5 | pub enum Expression { |
256 | 6 | Value(Value), |
257 | 7 | List(Vec<Expression>), |
@@ -285,66 +35,56 @@ pub enum Expression { |
285 | 35 | }, |
286 | 36 | } |
287 | 37 |
|
288 | | -impl Expression { |
289 | | - pub fn to_string(&self) -> String { |
| 38 | +impl std::fmt::Display for Expression { |
| 39 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
290 | 40 | match self { |
291 | | - Expression::Value(v) => v.to_string(), |
| 41 | + Expression::Value(v) => write!(f, "VAL[{}]", v.to_string()), |
292 | 42 | Expression::List(l) => { |
293 | 43 | let list_str = l |
294 | 44 | .iter() |
295 | 45 | .map(|e| e.to_string()) |
296 | 46 | .collect::<Vec<_>>() |
297 | 47 | .join(", "); |
298 | | - format!("[{}]", list_str) |
| 48 | + write!(f, "[{}]", list_str) |
299 | 49 | } |
300 | 50 | Expression::Object(o) => { |
301 | 51 | let obj_str = o |
302 | 52 | .iter() |
303 | 53 | .map(|(k, v)| format!("{}: {}", k, v.to_string())) |
304 | 54 | .collect::<Vec<_>>() |
305 | 55 | .join(", "); |
306 | | - format!("{{ {} }}", obj_str) |
| 56 | + write!(f, "{{ {} }}", obj_str) |
307 | 57 | } |
308 | 58 | Expression::Closure { args, returns, .. } => { |
309 | 59 | let args_str = args.join(", "); |
310 | | - format!("({}) {} {{ ... }}", args_str, returns.to_string(),) |
| 60 | + write!(f, "({}) {} {{ ... }}", args_str, returns.to_string()) |
311 | 61 | } |
312 | 62 | Expression::ListMemberAccess { list, index } => { |
313 | | - format!("{}[{}]", list.to_string(), index.to_string()) |
| 63 | + write!(f, "{}[{}]", list.to_string(), index.to_string()) |
314 | 64 | } |
315 | | - Expression::Identifier(id) => id.clone(), |
316 | | - Expression::PostIncrement(id) => format!("{}++", id), |
317 | | - Expression::PostDecrement(id) => format!("{}--", id), |
318 | | - Expression::PreIncrement(id) => format!("++{}", id), |
319 | | - Expression::PreDecrement(id) => format!("--{}", id), |
320 | | - Expression::Binary { |
321 | | - left, |
322 | | - operator, |
323 | | - right, |
324 | | - } => { |
325 | | - format!("({} {} {})", left.to_string(), operator, right.to_string()) |
| 65 | + Expression::Identifier(id) => write!(f, "ID[{}]", id), |
| 66 | + Expression::PostIncrement(id) => write!(f, "{}++", id), |
| 67 | + Expression::PostDecrement(id) => write!(f, "{}--", id), |
| 68 | + Expression::PreIncrement(id) => write!(f, "++{}", id), |
| 69 | + Expression::PreDecrement(id) => write!(f, "--{}", id), |
| 70 | + Expression::Binary { left, operator, right } => { |
| 71 | + write!(f, "({} {} {})", left.to_string(), operator, right.to_string()) |
326 | 72 | } |
327 | 73 | Expression::Unary { operator, operand } => { |
328 | | - format!("({}{})", operator, operand.to_string()) |
| 74 | + write!(f, "({}{})", operator, operand.to_string()) |
329 | 75 | } |
330 | 76 | Expression::Call { function, args } => { |
331 | 77 | let args_str = args |
332 | 78 | .iter() |
333 | 79 | .map(|arg| arg.to_string()) |
334 | 80 | .collect::<Vec<_>>() |
335 | 81 | .join(", "); |
336 | | - format!("{}({})", function, args_str) |
| 82 | + write!(f, "{}({})", function, args_str) |
337 | 83 | } |
338 | 84 | } |
339 | 85 | } |
340 | 86 | } |
341 | 87 |
|
342 | | -impl std::fmt::Debug for Expression { |
343 | | - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
344 | | - write!(f, "{}", self.to_string()) |
345 | | - } |
346 | | -} |
347 | | - |
348 | 88 | #[derive(Clone, PartialEq)] |
349 | 89 | pub enum Statement { |
350 | 90 | Assignment { |
|
0 commit comments