Skip to content

Commit 1bfc71a

Browse files
Split language.rs
1 parent 74f67c9 commit 1bfc71a

3 files changed

Lines changed: 271 additions & 278 deletions

File tree

src/utils/language/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pub mod parser;
2+
pub use parser::*;
3+
4+
pub mod tokenizer;
5+
pub use tokenizer::*;
Lines changed: 18 additions & 278 deletions
Original file line numberDiff line numberDiff line change
@@ -1,257 +1,7 @@
1-
use super::Value;
1+
use crate::utils::{Token, Value};
22
use std::collections::HashMap;
33

4-
// ========================= Tokenizer ========================= \\
5-
6-
#[derive(Clone, PartialEq)]
7-
pub enum Token {
8-
Newline,
9-
EOF,
10-
Identifier(String),
11-
Value(Value),
12-
Operator(String),
13-
Keyword(String),
14-
Symbol(String),
15-
}
16-
17-
impl Token {
18-
pub fn to_string(&self) -> String {
19-
match self {
20-
Token::Newline => "\n".to_string(),
21-
Token::EOF => "EOF".to_string(),
22-
Token::Identifier(s) => format!("ID[{}]", s),
23-
Token::Value(v) => format!("VAL[{}]", v.to_string()),
24-
Token::Operator(s) => format!("OP[{}]", s),
25-
Token::Keyword(s) => format!("KEY[{}]", s),
26-
Token::Symbol(s) => format!("SYM[{}]", s),
27-
}
28-
}
29-
}
30-
31-
impl std::fmt::Debug for Token {
32-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33-
write!(f, "{}", self.to_string())
34-
}
35-
}
36-
37-
pub struct Tokenizer {
38-
code: String,
39-
pointer: usize,
40-
}
41-
42-
impl Tokenizer {
43-
pub fn new(code: String) -> Self {
44-
Self { code, pointer: 0 }
45-
}
46-
47-
fn skip_whitespace(&mut self) {
48-
while self.pointer < self.code.len()
49-
&& self.code[self.pointer..].starts_with(char::is_whitespace)
50-
{
51-
self.pointer += 1;
52-
}
53-
}
54-
55-
fn get_number(&mut self) -> String {
56-
let start = self.pointer;
57-
while self.pointer < self.code.len()
58-
&& self.code[self.pointer..].starts_with(char::is_numeric)
59-
| self.code[self.pointer..].starts_with('.')
60-
{
61-
self.pointer += 1;
62-
}
63-
self.code[start..self.pointer].to_string()
64-
}
65-
66-
fn get_identifier(&mut self) -> String {
67-
let start = self.pointer;
68-
while self.pointer < self.code.len()
69-
&& (self.code[self.pointer..].starts_with(char::is_alphanumeric)
70-
|| self.code[self.pointer..].starts_with('_'))
71-
{
72-
self.pointer += 1;
73-
}
74-
self.code[start..self.pointer].to_string()
75-
}
76-
77-
fn tokenize(&mut self) -> Option<Token> {
78-
let keyword_list = vec![
79-
"nop",
80-
"match",
81-
"if",
82-
"else",
83-
"while",
84-
"for",
85-
"in",
86-
"global",
87-
"assert",
88-
"setup",
89-
"update",
90-
"clone_setup",
91-
"clone_update",
92-
"when",
93-
"fn",
94-
"import",
95-
];
96-
97-
if self.pointer >= self.code.len() {
98-
return None;
99-
}
100-
101-
// Newline
102-
if self.code[self.pointer..].starts_with('\n') {
103-
self.pointer += 1;
104-
return Some(Token::Newline);
105-
}
106-
107-
self.skip_whitespace();
108-
109-
if self.pointer >= self.code.len() {
110-
return None;
111-
}
112-
113-
let c = &self.code[self.pointer..];
114-
115-
// Comments
116-
if c.starts_with("//") || c.starts_with("#") {
117-
while self.pointer < self.code.len() && !self.code[self.pointer..].starts_with('\n') {
118-
self.pointer += 1;
119-
}
120-
return Some(Token::Newline);
121-
}
122-
123-
if c.starts_with("/*") {
124-
self.pointer += 2;
125-
while self.pointer < self.code.len() && !self.code[self.pointer..].starts_with("*/") {
126-
self.pointer += 1;
127-
}
128-
if self.pointer < self.code.len() {
129-
self.pointer += 2; // Skip */
130-
}
131-
return Some(Token::Newline);
132-
}
133-
134-
// Null
135-
if c.starts_with("null") {
136-
self.pointer += 4;
137-
return Some(Token::Value(Value::Null));
138-
}
139-
140-
// Booleans
141-
if c.starts_with("true") {
142-
self.pointer += 4;
143-
return Some(Token::Value(Value::Boolean(true)));
144-
}
145-
146-
if c.starts_with("false") {
147-
self.pointer += 5;
148-
return Some(Token::Value(Value::Boolean(false)));
149-
}
150-
151-
// Strings
152-
if c.starts_with('"') {
153-
self.pointer += 1;
154-
let start = self.pointer;
155-
while self.pointer < self.code.len() && !self.code[self.pointer..].starts_with('"') {
156-
self.pointer += 1;
157-
}
158-
let string = &self.code[start..self.pointer]
159-
.replace("\\\"", "\"") // Unescape quotes
160-
.replace("\\n", "\n") // Unescape newlines
161-
.replace("\\t", "\t"); // Unescape tabs
162-
self.pointer += 1; // Skip closing "
163-
return Some(Token::Value(Value::String(string.to_string())));
164-
}
165-
166-
// Multi-char operators first
167-
let two = &self.code[self.pointer..self.pointer + 2.min(self.code.len() - self.pointer)];
168-
if [
169-
"+=", "-=", "*=", "/=", "==", "!=", "<=", ">=", "&&", "||", "..", "**", "<<", ">>",
170-
"++", "--",
171-
]
172-
.contains(&two)
173-
{
174-
self.pointer += 2;
175-
return Some(Token::Operator(two.to_string()));
176-
}
177-
178-
// Single-char operators
179-
let one = &self.code[self.pointer..self.pointer + 1];
180-
if ["=", "+", "-", "*", "/", "%", "^", "&", "|", "<", ">", "!"].contains(&one) {
181-
self.pointer += 1;
182-
return Some(Token::Operator(one.to_string()));
183-
}
184-
185-
// Symbols
186-
if ["(", ")", "[", "]", "{", "}", ",", ":", "."].contains(&one) {
187-
self.pointer += 1;
188-
return Some(Token::Symbol(one.to_string()));
189-
}
190-
191-
// Numbers
192-
if c.starts_with("0x") || c.starts_with("0b") || c.starts_with("0o") {
193-
// Hex, binary, or octal numbers
194-
let base = if c.starts_with("0x") {
195-
16
196-
} else if c.starts_with("0b") {
197-
2
198-
} else {
199-
8
200-
};
201-
self.pointer += 2; // Skip 0x, 0b, or 0o
202-
let start = self.pointer;
203-
while self.pointer < self.code.len()
204-
&& self.code[self.pointer..].starts_with(char::is_alphanumeric)
205-
{
206-
self.pointer += 1;
207-
}
208-
let number = &self.code[start..self.pointer];
209-
return Some(Token::Value(Value::Number(
210-
i64::from_str_radix(number, base).unwrap() as f32,
211-
)));
212-
}
213-
214-
if c.chars().next().unwrap().is_ascii_digit() {
215-
let number = self.get_number();
216-
return Some(Token::Value(Value::Number(number.parse().unwrap())));
217-
}
218-
219-
if c.chars().next().unwrap() == '.' {
220-
self.pointer += 1;
221-
let number = self.get_number();
222-
return Some(Token::Value(Value::Number(
223-
format!(".{}", number).parse().unwrap(),
224-
)));
225-
}
226-
227-
// Identifiers or keywords
228-
if c.chars().next().unwrap().is_alphabetic() || c.starts_with('_') {
229-
let ident = self.get_identifier();
230-
if keyword_list.contains(&ident.as_str()) {
231-
return Some(Token::Keyword(ident));
232-
} else {
233-
return Some(Token::Identifier(ident));
234-
}
235-
}
236-
237-
// Skip unknown character
238-
self.pointer += 1;
239-
self.tokenize()
240-
}
241-
242-
pub fn tokenize_full(&mut self) -> Vec<Token> {
243-
let mut tokens = vec![];
244-
while let Some(token) = self.tokenize() {
245-
tokens.push(token);
246-
}
247-
tokens.push(Token::EOF);
248-
tokens
249-
}
250-
}
251-
252-
// ========================= Parser ========================= \\
253-
254-
#[derive(Clone, PartialEq)]
4+
#[derive(Debug, Clone, PartialEq)]
2555
pub enum Expression {
2566
Value(Value),
2577
List(Vec<Expression>),
@@ -285,66 +35,56 @@ pub enum Expression {
28535
},
28636
}
28737

288-
impl Expression {
289-
pub fn to_string(&self) -> String {
38+
impl std::fmt::Display for Expression {
39+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29040
match self {
291-
Expression::Value(v) => v.to_string(),
41+
Expression::Value(v) => write!(f, "VAL[{}]", v.to_string()),
29242
Expression::List(l) => {
29343
let list_str = l
29444
.iter()
29545
.map(|e| e.to_string())
29646
.collect::<Vec<_>>()
29747
.join(", ");
298-
format!("[{}]", list_str)
48+
write!(f, "[{}]", list_str)
29949
}
30050
Expression::Object(o) => {
30151
let obj_str = o
30252
.iter()
30353
.map(|(k, v)| format!("{}: {}", k, v.to_string()))
30454
.collect::<Vec<_>>()
30555
.join(", ");
306-
format!("{{ {} }}", obj_str)
56+
write!(f, "{{ {} }}", obj_str)
30757
}
30858
Expression::Closure { args, returns, .. } => {
30959
let args_str = args.join(", ");
310-
format!("({}) {} {{ ... }}", args_str, returns.to_string(),)
60+
write!(f, "({}) {} {{ ... }}", args_str, returns.to_string())
31161
}
31262
Expression::ListMemberAccess { list, index } => {
313-
format!("{}[{}]", list.to_string(), index.to_string())
63+
write!(f, "{}[{}]", list.to_string(), index.to_string())
31464
}
315-
Expression::Identifier(id) => id.clone(),
316-
Expression::PostIncrement(id) => format!("{}++", id),
317-
Expression::PostDecrement(id) => format!("{}--", id),
318-
Expression::PreIncrement(id) => format!("++{}", id),
319-
Expression::PreDecrement(id) => format!("--{}", id),
320-
Expression::Binary {
321-
left,
322-
operator,
323-
right,
324-
} => {
325-
format!("({} {} {})", left.to_string(), operator, right.to_string())
65+
Expression::Identifier(id) => write!(f, "ID[{}]", id),
66+
Expression::PostIncrement(id) => write!(f, "{}++", id),
67+
Expression::PostDecrement(id) => write!(f, "{}--", id),
68+
Expression::PreIncrement(id) => write!(f, "++{}", id),
69+
Expression::PreDecrement(id) => write!(f, "--{}", id),
70+
Expression::Binary { left, operator, right } => {
71+
write!(f, "({} {} {})", left.to_string(), operator, right.to_string())
32672
}
32773
Expression::Unary { operator, operand } => {
328-
format!("({}{})", operator, operand.to_string())
74+
write!(f, "({}{})", operator, operand.to_string())
32975
}
33076
Expression::Call { function, args } => {
33177
let args_str = args
33278
.iter()
33379
.map(|arg| arg.to_string())
33480
.collect::<Vec<_>>()
33581
.join(", ");
336-
format!("{}({})", function, args_str)
82+
write!(f, "{}({})", function, args_str)
33783
}
33884
}
33985
}
34086
}
34187

342-
impl std::fmt::Debug for Expression {
343-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
344-
write!(f, "{}", self.to_string())
345-
}
346-
}
347-
34888
#[derive(Clone, PartialEq)]
34989
pub enum Statement {
35090
Assignment {

0 commit comments

Comments
 (0)