@@ -8,7 +8,7 @@ use std::fmt;
88use std:: ops:: RangeInclusive ;
99
1010use crate :: ast:: AstNode ;
11- use crate :: { SyntaxNode , ast, match_ast, syntax_error:: SyntaxError } ;
11+ use crate :: { SyntaxNode , SyntaxToken , ast, match_ast, syntax_error:: SyntaxError } ;
1212use rowan:: { TextRange , TextSize } ;
1313use squawk_parser:: SyntaxKind :: * ;
1414pub ( crate ) fn validate ( root : & SyntaxNode , errors : & mut Vec < SyntaxError > ) {
@@ -30,6 +30,14 @@ pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec<SyntaxError>) {
3030 }
3131 }
3232 }
33+ for element in root. descendants_with_tokens ( ) {
34+ if let Some ( token) = element. into_token ( )
35+ && token. kind ( ) == IDENT
36+ && let Some ( err) = validate_unicode_esc_ident ( & token)
37+ {
38+ errors. push ( err) ;
39+ }
40+ }
3341}
3442
3543fn validate_select ( it : ast:: Select , acc : & mut Vec < SyntaxError > ) {
@@ -184,15 +192,7 @@ fn validate_unicode_esc_string(lit: &ast::Literal) -> Option<SyntaxError> {
184192 UNICODE_ESC_STRING => unicode_esc = Some ( token) ,
185193 UESCAPE_KW => seen_uescape = true ,
186194 STRING if seen_uescape => {
187- let text = token. text ( ) ;
188- let inner = text
189- . strip_prefix ( '\'' )
190- . and_then ( |s| s. strip_suffix ( '\'' ) )
191- . unwrap_or ( "" ) ;
192- let mut chars = inner. chars ( ) ;
193- if let ( Some ( c) , None ) = ( chars. next ( ) , chars. next ( ) ) {
194- escape_char = c;
195- }
195+ escape_char = uescape_char ( & token) . unwrap_or ( escape_char) ;
196196 break ;
197197 }
198198 _ => ( ) ,
@@ -208,6 +208,45 @@ fn validate_unicode_esc_string(lit: &ast::Literal) -> Option<SyntaxError> {
208208 Some ( SyntaxError :: new ( err. to_string ( ) , token. text_range ( ) ) )
209209}
210210
211+ fn validate_unicode_esc_ident ( token : & SyntaxToken ) -> Option < SyntaxError > {
212+ let text = token. text ( ) ;
213+ let inside = text
214+ . strip_prefix ( "U&\" " )
215+ . or_else ( || text. strip_prefix ( "u&\" " ) )
216+ . and_then ( |s| s. strip_suffix ( '"' ) ) ?;
217+
218+ let mut escape_char = '\\' ;
219+ let mut seen_uescape = false ;
220+ let mut next = token. next_sibling_or_token ( ) ;
221+ while let Some ( element) = next {
222+ match element. kind ( ) {
223+ WHITESPACE | COMMENT => ( ) ,
224+ UESCAPE_KW => seen_uescape = true ,
225+ STRING if seen_uescape => {
226+ if let Some ( string_token) = element. as_token ( ) {
227+ escape_char = uescape_char ( string_token) . unwrap_or ( escape_char) ;
228+ }
229+ break ;
230+ }
231+ _ => break ,
232+ }
233+ next = element. next_sibling_or_token ( ) ;
234+ }
235+
236+ let err = check_unicode_esc_str ( inside, escape_char) ?;
237+ Some ( SyntaxError :: new ( err. to_string ( ) , token. text_range ( ) ) )
238+ }
239+
240+ fn uescape_char ( string_token : & SyntaxToken ) -> Option < char > {
241+ let text = string_token. text ( ) ;
242+ let inner = text. strip_prefix ( '\'' ) ?. strip_suffix ( '\'' ) ?;
243+ let mut chars = inner. chars ( ) ;
244+ match ( chars. next ( ) , chars. next ( ) ) {
245+ ( Some ( c) , None ) => Some ( c) ,
246+ _ => None ,
247+ }
248+ }
249+
211250enum UnicodeEscapeKind {
212251 Short ,
213252 Extended ,
0 commit comments