@@ -16,6 +16,8 @@ pub enum Token {
1616 Param ( String ) ,
1717
1818 Range {
19+ /// Whether the left side of the range is bound by the previous token
20+ /// (but it's not contained in this token)
1921 bind_left : bool ,
2022 bind_right : bool ,
2123 } ,
@@ -39,12 +41,16 @@ pub enum Token {
3941 Annotate , // @
4042}
4143
44+ /// Lex chars to tokens until the end of the input
4245pub fn lexer ( ) -> impl Parser < char , Vec < TokenSpan > , Error = Cheap < char > > {
43- let whitespace = filter ( | x : & char | x . is_inline_whitespace ( ) )
46+ lex_token ( )
4447 . repeated ( )
45- . at_least ( 1 )
46- . ignored ( ) ;
48+ . then_ignore ( ignored ( ) )
49+ . then_ignore ( end ( ) )
50+ }
4751
52+ /// Lex chars to a single token
53+ pub fn lex_token ( ) -> impl Parser < char , TokenSpan , Error = Cheap < char > > {
4854 let control_multi = choice ( (
4955 just ( "->" ) . to ( Token :: ArrowThin ) ,
5056 just ( "=>" ) . to ( Token :: ArrowFat ) ,
@@ -109,41 +115,55 @@ pub fn lexer() -> impl Parser<char, Vec<TokenSpan>, Error = Cheap<char>> {
109115 ) )
110116 . recover_with ( skip_then_retry_until ( [ ] ) . skip_start ( ) ) ;
111117
112- let comment = just ( '#' )
113- . then ( newline. not ( ) . repeated ( ) )
114- . separated_by ( newline. then ( whitespace. or_not ( ) ) )
115- . at_least ( 1 )
116- . ignored ( ) ;
117-
118- let range = ( whitespace. or_not ( ) )
118+ let range = ( whitespace ( ) . or_not ( ) )
119119 . then_ignore ( just ( ".." ) )
120- . then ( whitespace. or_not ( ) )
120+ . then ( whitespace ( ) . or_not ( ) )
121121 . map ( |( left, right) | Token :: Range {
122+ // If there was no whitespace before (after), then we mark the range
123+ // as bound on the left (right).
122124 bind_left : left. is_none ( ) ,
123125 bind_right : right. is_none ( ) ,
124126 } )
125127 . map_with_span ( TokenSpan ) ;
126128
127- let line_wrap = newline
129+ choice ( ( range, ignored ( ) . ignore_then ( token. map_with_span ( TokenSpan ) ) ) )
130+ }
131+
132+ fn ignored ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
133+ choice ( ( comment ( ) , whitespace ( ) , line_wrap ( ) ) )
134+ . repeated ( )
135+ . ignored ( )
136+ }
137+
138+ fn whitespace ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
139+ filter ( |x : & char | x. is_inline_whitespace ( ) )
140+ . repeated ( )
141+ . at_least ( 1 )
142+ . ignored ( )
143+ }
144+
145+ fn line_wrap ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
146+ newline ( )
128147 . then (
129148 // We can optionally have an empty line, or a line with a comment,
130149 // between the initial line and the continued line
131- whitespace
150+ whitespace ( )
132151 . or_not ( )
133- . then ( comment. or_not ( ) )
134- . then ( newline)
152+ . then ( comment ( ) . or_not ( ) )
153+ . then ( newline ( ) )
135154 . repeated ( ) ,
136155 )
137- . then ( whitespace. repeated ( ) )
156+ . then ( whitespace ( ) . repeated ( ) )
138157 . then ( just ( '\\' ) )
139- . ignored ( ) ;
140-
141- let ignored = choice ( ( comment, whitespace, line_wrap) ) . repeated ( ) ;
158+ . ignored ( )
159+ }
142160
143- choice ( ( range, ignored. ignore_then ( token. map_with_span ( TokenSpan ) ) ) )
144- . repeated ( )
145- . then_ignore ( ignored)
146- . then_ignore ( end ( ) )
161+ fn comment ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
162+ just ( '#' )
163+ . then ( newline ( ) . not ( ) . repeated ( ) )
164+ . separated_by ( newline ( ) . then ( whitespace ( ) . or_not ( ) ) )
165+ . at_least ( 1 )
166+ . ignored ( )
147167}
148168
149169pub fn ident_part ( ) -> impl Parser < char , String , Error = Cheap < char > > + Clone {
@@ -625,4 +645,37 @@ mod test {
625645 // Unicode escape
626646 assert_snapshot ! ( quoted_string( true ) . parse( r"'\u{01f422}'" ) . unwrap( ) , @"🐢" ) ;
627647 }
648+
649+ #[ test]
650+ fn range ( ) {
651+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "1..2" ) . unwrap( ) ) , @r###"
652+ TokenVec (
653+ 0..1: Literal(Integer(1)),
654+ 1..3: Range { bind_left: true, bind_right: true },
655+ 3..4: Literal(Integer(2)),
656+ )
657+ "### ) ;
658+
659+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "..2" ) . unwrap( ) ) , @r###"
660+ TokenVec (
661+ 0..2: Range { bind_left: true, bind_right: true },
662+ 2..3: Literal(Integer(2)),
663+ )
664+ "### ) ;
665+
666+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "1.." ) . unwrap( ) ) , @r###"
667+ TokenVec (
668+ 0..1: Literal(Integer(1)),
669+ 1..3: Range { bind_left: true, bind_right: true },
670+ )
671+ "### ) ;
672+
673+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "in ..5" ) . unwrap( ) ) , @r###"
674+ TokenVec (
675+ 0..2: Ident("in"),
676+ 2..5: Range { bind_left: false, bind_right: true },
677+ 5..6: Literal(Integer(5)),
678+ )
679+ "### ) ;
680+ }
628681}
0 commit comments