@@ -140,8 +140,7 @@ pub enum TokenKind {
140140
141141 /// A lifetime, e.g. `'a`.
142142 Lifetime {
143- starts_with_number : bool ,
144- has_emoji : bool ,
143+ invalid : bool ,
145144 } ,
146145
147146 /// `;`
@@ -585,7 +584,7 @@ impl<'a> Cursor<'a> {
585584 let kind = RawStr { n_hashes : res. ok ( ) } ;
586585 Literal { kind, suffix_start }
587586 }
588- _ => self . ident_or_unknown_prefix ( ) ,
587+ _ => self . ident_or_unknown_prefix ( false ) ,
589588 } ,
590589
591590 // Byte literal, byte string literal, raw byte string literal or identifier.
@@ -604,7 +603,7 @@ impl<'a> Cursor<'a> {
604603
605604 // Identifier (this should be checked after other variant that can
606605 // start as identifier).
607- c if is_id_start ( c) => self . ident_or_unknown_prefix ( ) ,
606+ c if is_id_start ( c) => self . ident_or_unknown_prefix ( false ) ,
608607
609608 // Numeric literal.
610609 c @ '0' ..='9' => {
@@ -662,7 +661,7 @@ impl<'a> Cursor<'a> {
662661 Literal { kind, suffix_start }
663662 }
664663 // Identifier starting with an emoji. Only lexed for graceful error recovery.
665- c if !c . is_ascii ( ) && c . is_emoji_char ( ) => self . invalid_ident ( ) ,
664+ c if is_emoji ( c ) => self . invalid_ident ( ) ,
666665 _ => Unknown ,
667666 } ;
668667 if matches ! ( self . frontmatter_allowed, FrontmatterAllowed :: Yes )
@@ -833,25 +832,22 @@ impl<'a> Cursor<'a> {
833832 RawIdent
834833 }
835834
836- fn ident_or_unknown_prefix ( & mut self ) -> TokenKind {
837- debug_assert ! ( is_id_start( self . prev( ) ) ) ;
835+ fn ident_or_unknown_prefix ( & mut self , already_invalid : bool ) -> TokenKind {
836+ debug_assert ! ( is_id_start( self . prev( ) ) || already_invalid ) ;
838837 // Start is already eaten, eat the rest of identifier.
839838 self . eat_while ( is_id_continue) ;
840839 // Known prefixes must have been handled earlier. So if
841840 // we see a prefix here, it is definitely an unknown prefix.
842841 match self . first ( ) {
843842 '#' | '"' | '\'' => UnknownPrefix ,
844- c if !c . is_ascii ( ) && c . is_emoji_char ( ) => self . invalid_ident ( ) ,
843+ c if is_emoji ( c ) => self . invalid_ident ( ) ,
845844 _ => Ident ,
846845 }
847846 }
848847
849848 fn invalid_ident ( & mut self ) -> TokenKind {
850849 // Start is already eaten, eat the rest of identifier.
851- self . eat_while ( |c| {
852- const ZERO_WIDTH_JOINER : char = '\u{200d}' ;
853- is_id_continue ( c) || ( !c. is_ascii ( ) && c. is_emoji_char ( ) ) || c == ZERO_WIDTH_JOINER
854- } ) ;
850+ self . eat_while ( |c| is_id_continue ( c) || is_emoji ( c) ) ;
855851 // An invalid identifier followed by '#' or '"' or '\'' could be
856852 // interpreted as an invalid literal prefix. We don't bother doing that
857853 // because the treatment of invalid identifiers and invalid prefixes
@@ -896,7 +892,7 @@ impl<'a> Cursor<'a> {
896892 let kind = mk_kind_raw ( res. ok ( ) ) ;
897893 Literal { kind, suffix_start }
898894 }
899- _ => self . ident_or_unknown_prefix ( ) ,
895+ _ => self . ident_or_unknown_prefix ( false ) ,
900896 }
901897 }
902898
@@ -976,7 +972,7 @@ impl<'a> Cursor<'a> {
976972 fn lifetime_or_char ( & mut self ) -> TokenKind {
977973 debug_assert ! ( self . prev( ) == '\'' ) ;
978974
979- let mut has_emoji = false ;
975+ let mut invalid = false ;
980976 let can_be_a_lifetime = if self . second ( ) == '\'' {
981977 // It's surely not a lifetime.
982978 false
@@ -985,11 +981,9 @@ impl<'a> Cursor<'a> {
985981 // Also check if it's a number for a better error reporting (so '0 will
986982 // be reported as invalid lifetime and not as unterminated char literal).
987983 let c = self . first ( ) ;
988- let is_emoji = !c. is_ascii ( ) && c. is_emoji_char ( ) ;
989- if is_emoji {
990- has_emoji = true ;
991- }
992- is_id_start ( c) || c. is_ascii_digit ( ) || is_emoji
984+ invalid |= c. is_ascii_digit ( ) ;
985+ invalid |= is_emoji ( c) ;
986+ is_id_start ( c) || invalid
993987 } ;
994988
995989 if !can_be_a_lifetime {
@@ -1019,13 +1013,7 @@ impl<'a> Cursor<'a> {
10191013 // First symbol can be a number (which isn't a valid identifier start),
10201014 // so skip it without any checks.
10211015 self . bump ( ) ;
1022- self . eat_while ( |c| {
1023- let is_emoji = !c. is_ascii ( ) && c. is_emoji_char ( ) ;
1024- if is_emoji {
1025- has_emoji = true ;
1026- }
1027- is_id_continue ( c) || is_emoji
1028- } ) ;
1016+ invalid |= matches ! ( self . ident_or_unknown_prefix( invalid) , InvalidIdent ) ;
10291017
10301018 match self . first ( ) {
10311019 // Check if after skipping literal contents we've met a closing
@@ -1037,7 +1025,7 @@ impl<'a> Cursor<'a> {
10371025 Literal { kind, suffix_start : self . pos_within_token ( ) }
10381026 }
10391027 '#' if !starts_with_number => UnknownPrefixLifetime ,
1040- _ => Lifetime { starts_with_number , has_emoji } ,
1028+ _ => Lifetime { invalid } ,
10411029 }
10421030 }
10431031
@@ -1290,3 +1278,7 @@ impl<'a> Cursor<'a> {
12901278 self . eat_while ( is_id_continue) ;
12911279 }
12921280}
1281+
1282+ fn is_emoji ( c : char ) -> bool {
1283+ !c. is_ascii ( ) && c. is_emoji_char ( )
1284+ }
0 commit comments