@@ -4,17 +4,19 @@ use chumsky::input::{Input, ValueInput};
44use chumsky:: prelude:: * ;
55use chumsky:: span:: SimpleSpan ;
66use chumsky:: text:: { ident, int} ;
7+ use core:: f64;
78use num_complex:: Complex64 ;
9+ use std:: iter:: Peekable ;
810use std:: ops:: Range ;
11+ use std:: str:: Chars ;
912
1013pub type Span = SimpleSpan ;
1114
1215#[ derive( Clone , Debug , PartialEq ) ]
1316pub enum Token < ' src > {
1417 // literals ----------------------------------------------------------------
1518 Const ( Literal ) , // numeric or complex constants recognised at lex‑time
16- Var ( & ' src str ) , // #identifier (variables)
17- Call ( & ' src str ) ,
19+ Ident ( & ' src str ) ,
1820 // punctuation -------------------------------------------------------------
1921 LParen ,
2022 RParen ,
@@ -34,124 +36,216 @@ pub enum Token<'src> {
3436 If ,
3537}
3638
37- pub fn lexer < ' src > ( ) -> impl Parser < ' src , & ' src str , Vec < ( Token < ' src > , Span ) > , extra:: Err < Rich < ' src , char > > > {
38- // ── numbers ────────────────────────────────────────────────────────────
39- let num = int ( 10 )
40- . then ( just ( '.' ) . then ( int ( 10 ) ) . or_not ( ) )
41- . then ( just ( 'e' ) . or ( just ( 'E' ) ) . then ( one_of ( "+-" ) . or_not ( ) ) . then ( int ( 10 ) ) . or_not ( ) )
42- . map ( |( ( int_part, frac) , exp) : ( ( & str , _ ) , _ ) | {
43- let mut s = int_part. to_string ( ) ;
44- if let Some ( ( _, frac) ) = frac {
45- s. push ( '.' ) ;
46- s. push_str ( frac) ;
39+ fn const_lit ( name : & str ) -> Option < Literal > {
40+ use std:: f64:: consts:: * ;
41+
42+ Some ( match name {
43+ "pi" | "π" => Literal :: Float ( PI ) ,
44+ "tau" | "τ" => Literal :: Float ( TAU ) ,
45+ "e" => Literal :: Float ( E ) ,
46+ "phi" | "φ" => Literal :: Float ( 1.618_033_988_75 ) ,
47+ "inf" | "∞" => Literal :: Float ( f64:: INFINITY ) ,
48+ "i" => Literal :: Complex ( Complex64 :: new ( 0.0 , 1.0 ) ) ,
49+ "G" => Literal :: Float ( 9.80665 ) ,
50+ _ => return None ,
51+ } )
52+ }
53+
54+ pub struct Lexer < ' a > {
55+ input : & ' a str ,
56+ pos : usize ,
57+ }
58+
59+ impl < ' a > Lexer < ' a > {
60+ pub fn new ( input : & ' a str ) -> Self {
61+ Self { input, pos : 0 }
62+ }
63+
64+ fn peek ( & self ) -> Option < char > {
65+ self . input [ self . pos ..] . chars ( ) . next ( )
66+ }
67+
68+ fn bump ( & mut self ) -> Option < char > {
69+ let c = self . peek ( ) ?;
70+ self . pos += c. len_utf8 ( ) ;
71+ Some ( c)
72+ }
73+
74+ fn consume_while < F > ( & mut self , cond : F ) -> & ' a str
75+ where
76+ F : Fn ( char ) -> bool ,
77+ {
78+ let start = self . pos ;
79+ while self . peek ( ) . is_some_and ( & cond) {
80+ self . bump ( ) ;
81+ }
82+ & self . input [ start..self . pos ]
83+ }
84+
85+ fn lex_ident ( & mut self ) -> & ' a str {
86+ self . consume_while ( |c| c. is_alphanumeric ( ) || c == '_' )
87+ }
88+
89+ fn lex_uint ( & mut self ) -> Option < ( u64 , usize ) > {
90+ let mut v = 0u64 ;
91+ let mut digits = 0 ;
92+ while let Some ( d) = self . peek ( ) . and_then ( |c| c. to_digit ( 10 ) ) {
93+ v = v * 10 + d as u64 ;
94+ digits += 1 ;
95+ self . bump ( ) ;
96+ }
97+ ( digits > 0 ) . then_some ( ( v, digits) )
98+ }
99+
100+ fn lex_number ( & mut self ) -> Option < f64 > {
101+ let start_pos = self . pos ;
102+ let ( int_val, int_digits) = self . lex_uint ( ) . unwrap_or ( ( 0 , 0 ) ) ;
103+ let mut got_digit = int_digits > 0 ;
104+ let mut num = int_val as f64 ;
105+
106+ if self . peek ( ) == Some ( '.' ) {
107+ self . bump ( ) ;
108+ if let Some ( ( frac_val, frac_digits) ) = self . lex_uint ( ) {
109+ num += ( frac_val as f64 ) / 10f64 . powi ( frac_digits as i32 ) ;
110+ got_digit = true ;
47111 }
48- if let Some ( ( ( e, sign) , exp) ) = exp {
49- s. push ( e) ;
50- if let Some ( sign) = sign {
51- s. push ( sign) ;
112+ }
113+
114+ if matches ! ( self . peek( ) , Some ( 'e' | 'E' ) ) {
115+ self . bump ( ) ;
116+ let sign = match self . peek ( ) {
117+ Some ( '+' ) => {
118+ self . bump ( ) ;
119+ 1
52120 }
53- s. push_str ( exp) ;
121+ Some ( '-' ) => {
122+ self . bump ( ) ;
123+ -1
124+ }
125+ _ => 1 ,
126+ } ;
127+ if let Some ( ( exp_val, _) ) = self . lex_uint ( ) {
128+ num *= 10f64 . powi ( sign * exp_val as i32 ) ;
129+ } else {
130+ self . pos = start_pos;
131+ return None ;
54132 }
55- Token :: Const ( Literal :: Float ( s. parse :: < f64 > ( ) . unwrap ( ) ) )
56- } ) ;
57-
58- // ── single‑char symbols ────────────────────────────────────────────────
59- let sym = choice ( (
60- just ( '(' ) . to ( Token :: LParen ) ,
61- just ( ')' ) . to ( Token :: RParen ) ,
62- just ( ',' ) . to ( Token :: Comma ) ,
63- just ( '+' ) . to ( Token :: Plus ) ,
64- just ( '-' ) . to ( Token :: Minus ) ,
65- just ( '*' ) . to ( Token :: Star ) ,
66- just ( '/' ) . to ( Token :: Slash ) ,
67- just ( '^' ) . to ( Token :: Caret ) ,
68- ) ) ;
69-
70- // ── comparison operators ───────────────────────────────────────────────
71- let cmp = choice ( (
72- just ( "<=" ) . to ( Token :: Le ) ,
73- just ( ">=" ) . to ( Token :: Ge ) ,
74- just ( "==" ) . to ( Token :: EqEq ) ,
75- just ( '<' ) . to ( Token :: Lt ) ,
76- just ( '>' ) . to ( Token :: Gt ) ,
77- ) ) ;
78-
79- let kw_token = |w, t| just ( w) . padded ( ) . to ( t) ;
80-
81- let kw_lit = |w, lit : Literal | just ( w) . padded ( ) . to ( lit) ;
82-
83- let const_token = choice ( (
84- kw_lit ( "pi" , Literal :: Float ( std:: f64:: consts:: PI ) ) ,
85- kw_lit ( "π" , Literal :: Float ( std:: f64:: consts:: PI ) ) ,
86- kw_lit ( "tau" , Literal :: Float ( std:: f64:: consts:: TAU ) ) ,
87- kw_lit ( "τ" , Literal :: Float ( std:: f64:: consts:: TAU ) ) ,
88- kw_lit ( "e" , Literal :: Float ( std:: f64:: consts:: E ) ) ,
89- kw_lit ( "phi" , Literal :: Float ( 1.618_033_988_75 ) ) ,
90- kw_lit ( "φ" , Literal :: Float ( 1.618_033_988_75 ) ) ,
91- kw_lit ( "inf" , Literal :: Float ( f64:: INFINITY ) ) ,
92- kw_lit ( "∞" , Literal :: Float ( f64:: INFINITY ) ) ,
93- kw_lit ( "i" , Literal :: Complex ( Complex64 :: new ( 0.0 , 1.0 ) ) ) ,
94- kw_lit ( "G" , Literal :: Float ( 9.80665 ) ) ,
95- ) )
96- . map ( Token :: Const ) ;
97-
98- let var_token = just ( '#' ) . ignore_then ( ident ( ) ) . map ( Token :: Var ) ;
99- let call_token = just ( '@' ) . ignore_then ( ident ( ) ) . map ( Token :: Call ) ;
100-
101- choice ( ( num, kw_token ( "if" , Token :: If ) , const_token, cmp, sym, var_token, call_token) )
102- . map_with ( |t, e| ( t, e. span ( ) ) )
103- . padded ( )
104- . repeated ( )
105- . collect ( )
106- }
133+ }
107134
108- #[ derive( Debug ) ]
109- pub struct TokenStream < ' src > {
110- tokens : Vec < ( Token < ' src > , Span ) > ,
111- }
135+ got_digit. then_some ( num)
136+ }
137+
138+ fn skip_ws ( & mut self ) {
139+ self . consume_while ( char:: is_whitespace) ;
140+ }
141+
142+ pub fn next_token ( & mut self ) -> Option < Token < ' a > > {
143+ self . skip_ws ( ) ;
144+ let start = self . pos ;
145+ let ch = self . bump ( ) ?;
146+
147+ use Token :: * ;
148+ let tok = match ch {
149+ '(' => LParen ,
150+ ')' => RParen ,
151+ ',' => Comma ,
152+ '+' => Plus ,
153+ '-' => Minus ,
154+ '*' => Star ,
155+ '/' => Slash ,
156+ '^' => Caret ,
157+
158+ '<' => {
159+ if self . peek ( ) == Some ( '=' ) {
160+ self . bump ( ) ;
161+ Le
162+ } else {
163+ Lt
164+ }
165+ }
166+ '>' => {
167+ if self . peek ( ) == Some ( '=' ) {
168+ self . bump ( ) ;
169+ Ge
170+ } else {
171+ Gt
172+ }
173+ }
174+ '=' => {
175+ if self . peek ( ) == Some ( '=' ) {
176+ self . bump ( ) ;
177+ EqEq
178+ } else {
179+ return None ;
180+ }
181+ }
182+
183+ c if c. is_ascii_digit ( ) || ( c == '.' && self . peek ( ) . is_some_and ( |c| c. is_ascii_digit ( ) ) ) => {
184+ self . pos = start;
185+ Const ( Literal :: Float ( self . lex_number ( ) ?) )
186+ }
187+
188+ _ => {
189+ self . consume_while ( |c| c. is_alphanumeric ( ) || c == '_' ) ;
190+ let ident = & self . input [ start..self . pos ] ;
191+
192+ if ident == "if" {
193+ If
194+ } else if let Some ( lit) = const_lit ( ident) {
195+ Const ( lit)
196+ } else if ch. is_alphanumeric ( ) {
197+ Ident ( ident)
198+ } else {
199+ return None ;
200+ }
201+ }
202+ } ;
112203
113- impl < ' src > TokenStream < ' src > {
114- pub fn new ( tokens : Vec < ( Token < ' src > , Span ) > ) -> Self {
115- TokenStream { tokens }
204+ Some ( tok)
116205 }
117206}
118207
119- impl < ' src > Input < ' src > for TokenStream < ' src > {
120- type Token = ( Token < ' src > , Span ) ;
208+ impl < ' src > Input < ' src > for Lexer < ' src > {
209+ type Token = Token < ' src > ;
121210 type Span = Span ;
122- type Cursor = usize ;
123- type MaybeToken = ( Token < ' src > , Span ) ;
211+ type Cursor = usize ; // byte offset inside `input`
212+ type MaybeToken = Token < ' src > ;
124213 type Cache = Self ;
125214
215+ #[ inline]
126216 fn begin ( self ) -> ( Self :: Cursor , Self :: Cache ) {
127217 ( 0 , self )
128218 }
129219
220+ #[ inline]
130221 fn cursor_location ( cursor : & Self :: Cursor ) -> usize {
131222 * cursor
132223 }
133224
134- #[ inline( always ) ]
225+ #[ inline]
135226 unsafe fn next_maybe ( this : & mut Self :: Cache , cursor : & mut Self :: Cursor ) -> Option < Self :: MaybeToken > {
136- if let Some ( tok) = this. tokens . get ( * cursor) {
137- * cursor += 1 ;
138- Some ( tok. clone ( ) )
227+ this. pos = * cursor;
228+ if let Some ( tok) = this. next_token ( ) {
229+ * cursor = this. pos ;
230+ Some ( tok)
139231 } else {
140232 None
141233 }
142234 }
143235
144- #[ inline( always ) ]
236+ #[ inline]
145237 unsafe fn span ( _this : & mut Self :: Cache , range : Range < & Self :: Cursor > ) -> Self :: Span {
146238 ( * range. start ..* range. end ) . into ( )
147239 }
148240}
149241
150- impl < ' src > ValueInput < ' src > for TokenStream < ' src > {
242+ impl < ' src > ValueInput < ' src > for Lexer < ' src > {
243+ #[ inline]
151244 unsafe fn next ( this : & mut Self :: Cache , cursor : & mut Self :: Cursor ) -> Option < Self :: Token > {
152- if let Some ( tok) = this. tokens . get ( * cursor) {
153- * cursor += 1 ;
154- Some ( tok. clone ( ) )
245+ this. pos = * cursor;
246+ if let Some ( tok) = this. next_token ( ) {
247+ * cursor = this. pos ;
248+ Some ( tok)
155249 } else {
156250 None
157251 }
0 commit comments