@@ -8,13 +8,14 @@ mod tests;
88
99use core:: str;
1010use std:: {
11- borrow:: Cow ,
1211 fmt:: { Debug , Display } ,
1312 slice:: SliceIndex ,
1413} ;
1514
15+ use bumpalo:: { Bump , collections:: Vec } ;
16+ use logos:: Logos ;
1617use logos:: Skip ;
17- pub use logos:: { Logos , Span , SpannedIter } ;
18+ pub use logos:: { Span , SpannedIter } ;
1819use memchr:: { memchr, memchr3} ;
1920use thiserror:: Error ;
2021
@@ -27,7 +28,7 @@ pub type Result<T, E = LexingError> = std::result::Result<T, E>;
2728#[ logos( skip( "[ \t ]+" ) ) ]
2829#[ logos( skip( r"(\\\n)+" ) ) ]
2930#[ logos( skip( "#" , skip_line) ) ]
30- #[ logos( extras = Context ) ]
31+ #[ logos( extras = Extra ) ]
3132#[ logos( subpattern identifier = r"[a-zA-Z_][a-zA-Z0-9_]*" ) ]
3233#[ logos( error( LexingError , callback = |lex| LexingError :: from( lex) ) ) ]
3334pub enum Token < ' a > {
@@ -215,6 +216,9 @@ pub enum Token<'a> {
215216 Semicolon ,
216217}
217218
219+ #[ derive( Debug , Default , PartialEq , Eq ) ]
220+ pub struct Extra ( Context , * const Bump ) ;
221+
218222#[ derive( Debug , Default , PartialEq , Eq ) ]
219223pub enum Context {
220224 #[ default]
@@ -243,6 +247,12 @@ pub struct Identifier<'a> {
243247 pub literal : & ' a str ,
244248}
245249
250+ impl < ' a > Token < ' a > {
251+ pub fn lex ( source : & ' a [ u8 ] , arena : & ' a Bump ) -> logos:: Lexer < ' a , Self > {
252+ Lexer :: with_extras ( source, Extra ( Context :: AcceptExpression , arena) )
253+ }
254+ }
255+
246256impl From < & mut Lexer < ' _ > > for LexingError {
247257 fn from ( lex : & mut Lexer < ' _ > ) -> Self {
248258 Self :: Unexpected ( lex. span ( ) , String :: from_utf8_lossy ( lex. slice ( ) ) . to_string ( ) )
@@ -259,7 +269,7 @@ fn parse_string<'a>(lex: &mut logos::Lexer<'a, Token<'a>>) -> Result<Slice<'a>>
259269}
260270
261271fn parse_regex_or_slash < ' a > ( lex : & mut logos:: Lexer < ' a , Token < ' a > > ) -> Result < Token < ' a > > {
262- match lex. extras {
272+ match lex. extras . 0 {
263273 Context :: AcceptExpression => {
264274 accept_operator ( lex) ;
265275 parse_content :: < false , true , '/' > ( lex) . map ( Token :: Regex )
@@ -280,7 +290,7 @@ fn parse_content<'a, const MINIMAL: bool, const REGEX: bool, const DELIMITER: ch
280290) -> Result < Slice < ' a > > {
281291 let rest = lex. remainder ( ) ;
282292 let mut start = 0 ;
283- let mut out: Cow < ' a , [ u8 ] > = Cow :: Borrowed ( & [ ] ) ;
293+ let mut out = Slice :: Borrowed ( & [ ] ) ;
284294
285295 while let Some ( rel_i) = memchr3 ( b'\n' , b'\\' , DELIMITER as u8 , & rest[ start..] ) {
286296 let i = start + rel_i;
@@ -290,15 +300,18 @@ fn parse_content<'a, const MINIMAL: bool, const REGEX: bool, const DELIMITER: ch
290300 // push remaining segment
291301 lex. bump ( i + 1 ) ;
292302 if start == 0 {
293- out = Cow :: Borrowed ( & rest[ ..i] ) ;
303+ out = Slice :: Borrowed ( & rest[ ..i] ) ;
294304 } else {
295- out. to_mut ( ) . extend_from_slice ( & rest[ start..i] ) ;
305+ out. to_mut ( lex. extras . arena ( ) )
306+ . extend_from_slice ( & rest[ start..i] ) ;
296307 }
297- return Ok ( Slice ( out) ) ;
308+ return Ok ( out) ;
298309 }
299310 b'\\' => {
300- out. to_mut ( ) . extend_from_slice ( & rest[ start..i] ) ;
301- let consumed = parse_escape :: < MINIMAL , REGEX > ( & rest[ i..] , out. to_mut ( ) ) ?;
311+ out. to_mut ( lex. extras . arena ( ) )
312+ . extend_from_slice ( & rest[ start..i] ) ;
313+ let consumed =
314+ parse_escape :: < MINIMAL , REGEX > ( & rest[ i..] , out. to_mut ( lex. extras . arena ( ) ) ) ?;
302315 start = i + consumed;
303316 }
304317 _ => break ,
@@ -394,20 +407,22 @@ impl<'a> Identifier<'a> {
394407}
395408
396409fn accept_expression ( lex : & mut Lexer < ' _ > ) {
397- lex. extras = Context :: AcceptExpression ;
410+ lex. extras . 0 = Context :: AcceptExpression ;
398411}
399412
400413fn accept_operator ( lex : & mut Lexer < ' _ > ) {
401- lex. extras = Context :: AcceptOperator ;
414+ lex. extras . 0 = Context :: AcceptOperator ;
402415}
403416
404- #[ repr( transparent) ]
405417#[ derive( PartialEq , Eq , PartialOrd , Ord , Clone ) ]
406- pub struct Slice < ' a > ( Cow < ' a , [ u8 ] > ) ;
418+ pub enum Slice < ' a > {
419+ Borrowed ( & ' a [ u8 ] ) ,
420+ Owned ( Vec < ' a , u8 > ) ,
421+ }
407422
408423impl Display for Slice < ' _ > {
409424 fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
410- write ! ( f, "{}" , String :: from_utf8_lossy( & self . 0 ) . as_ref( ) )
425+ write ! ( f, "{}" , String :: from_utf8_lossy( self . as_ref ( ) ) . as_ref( ) )
411426 }
412427}
413428
@@ -419,6 +434,41 @@ impl Debug for Slice<'_> {
419434
420435impl AsRef < [ u8 ] > for Slice < ' _ > {
421436 fn as_ref ( & self ) -> & [ u8 ] {
422- & self . 0
437+ match self {
438+ Self :: Borrowed ( x) => x,
439+ Self :: Owned ( x) => x,
440+ }
441+ }
442+ }
443+
444+ impl < ' a > Slice < ' a > {
445+ pub fn to_mut ( & mut self , arena : & ' a Bump ) -> & mut Vec < ' a , u8 > {
446+ if let Self :: Borrowed ( x) = self {
447+ let mut vec = Vec :: new_in ( arena) ;
448+ vec. extend_from_slice_copy ( x) ;
449+ * self = Self :: Owned ( vec) ;
450+ }
451+ let Self :: Owned ( x) = self else { unreachable ! ( ) } ;
452+ x
453+ }
454+ }
455+
456+ impl < ' a > From < & ' a [ u8 ] > for Slice < ' a > {
457+ fn from ( value : & ' a [ u8 ] ) -> Self {
458+ Self :: Borrowed ( value)
459+ }
460+ }
461+
462+ impl < ' a > From < Vec < ' a , u8 > > for Slice < ' a > {
463+ fn from ( value : Vec < ' a , u8 > ) -> Self {
464+ Self :: Owned ( value)
465+ }
466+ }
467+
468+ impl Extra {
469+ fn arena < ' a > ( & self ) -> & ' a Bump {
470+ // SAFETY: lives for as long as self because it's the same lifetime as
471+ // the source being lexed; Logos just can't take lifetimes on extras.
472+ unsafe { & * self . 1 }
423473 }
424474}
0 commit comments