@@ -895,7 +895,7 @@ impl<'a> Tokenizer<'a> {
895895 } ;
896896
897897 let mut location = state. location ( ) ;
898- while let Some ( token) = self . next_token ( & mut state) ? {
898+ while let Some ( token) = self . next_token ( & mut state, buf . last ( ) . map ( |t| & t . token ) ) ? {
899899 let span = location. span_to ( state. location ( ) ) ;
900900
901901 buf. push ( TokenWithSpan { token, span } ) ;
@@ -932,7 +932,7 @@ impl<'a> Tokenizer<'a> {
932932 }
933933
934934 /// Get the next token or return None
935- fn next_token ( & self , chars : & mut State ) -> Result < Option < Token > , TokenizerError > {
935+ fn next_token ( & self , chars : & mut State , prev_token : Option < & Token > ) -> Result < Option < Token > , TokenizerError > {
936936 match chars. peek ( ) {
937937 Some ( & ch) => match ch {
938938 ' ' => self . consume_and_return ( chars, Token :: Whitespace ( Whitespace :: Space ) ) ,
@@ -1211,17 +1211,29 @@ impl<'a> Tokenizer<'a> {
12111211 chars. next ( ) ;
12121212 }
12131213
1214+ // If the dialect supports identifiers that start with a numeric prefix
1215+ // and we have now consumed a dot, check if the previous token was a Word.
1216+ // If so, what follows is definitely not part of a decimal number and
1217+ // we should yield the dot as a dedicated token so compound identifiers
1218+ // starting with digits can be parsed correctly.
1219+ if s == "." && self . dialect . supports_numeric_prefix ( ) {
1220+ if let Some ( Token :: Word ( _) ) = prev_token {
1221+ return Ok ( Some ( Token :: Period ) ) ;
1222+ }
1223+ }
1224+
1225+ // Consume fractional digits.
12141226 s += & peeking_next_take_while ( chars, |ch, next_ch| {
12151227 ch. is_ascii_digit ( ) || is_number_separator ( ch, next_ch)
12161228 } ) ;
12171229
1218- // No number -> Token::Period
1230+ // No fraction -> Token::Period
12191231 if s == "." {
12201232 return Ok ( Some ( Token :: Period ) ) ;
12211233 }
12221234
1223- let mut exponent_part = String :: new ( ) ;
12241235 // Parse exponent as number
1236+ let mut exponent_part = String :: new ( ) ;
12251237 if chars. peek ( ) == Some ( & 'e' ) || chars. peek ( ) == Some ( & 'E' ) {
12261238 let mut char_clone = chars. peekable . clone ( ) ;
12271239 exponent_part. push ( char_clone. next ( ) . unwrap ( ) ) ;
@@ -1250,14 +1262,23 @@ impl<'a> Tokenizer<'a> {
12501262 }
12511263 }
12521264
1253- // mysql dialect supports identifiers that start with a numeric prefix,
1254- // as long as they aren't an exponent number.
1255- if self . dialect . supports_numeric_prefix ( ) && exponent_part. is_empty ( ) {
1256- let word =
1257- peeking_take_while ( chars, |ch| self . dialect . is_identifier_part ( ch) ) ;
1258-
1259- if !word. is_empty ( ) {
1260- s += word. as_str ( ) ;
1265+ // If the dialect supports identifiers that start with a numeric prefix,
1266+ // we need to check if the value is in fact an identifier and must thus
1267+ // be tokenized as a word.
1268+ if self . dialect . supports_numeric_prefix ( ) {
1269+ if exponent_part. is_empty ( ) {
1270+ // If it is not a number with an exponent, it may be
1271+ // an unqualified identifier starting with digits.
1272+ let word =
1273+ peeking_take_while ( chars, |ch| self . dialect . is_identifier_part ( ch) ) ;
1274+
1275+ if !word. is_empty ( ) {
1276+ s += word. as_str ( ) ;
1277+ return Ok ( Some ( Token :: make_word ( s. as_str ( ) , None ) ) ) ;
1278+ }
1279+ } else if prev_token. map_or ( false , |t| t == & Token :: Period ) {
1280+ // If the previous token was a period, thus not belonging to a number,
1281+ // the value we have is part of an identifier.
12611282 return Ok ( Some ( Token :: make_word ( s. as_str ( ) , None ) ) ) ;
12621283 }
12631284 }
@@ -3960,4 +3981,39 @@ mod tests {
39603981 ] ,
39613982 ) ;
39623983 }
3984+
3985+ #[ test]
3986+ fn test_tokenize_identifiers_numeric_prefix ( ) {
3987+ all_dialects_where ( |dialect| dialect. supports_numeric_prefix ( ) ) . tokenizes_to (
3988+ "123abc" ,
3989+ vec ! [
3990+ Token :: make_word( "123abc" , None ) ,
3991+ ] ,
3992+ ) ;
3993+
3994+ all_dialects_where ( |dialect| dialect. supports_numeric_prefix ( ) ) . tokenizes_to (
3995+ "12e34" ,
3996+ vec ! [
3997+ Token :: Number ( "12e34" . to_string( ) , false ) ,
3998+ ] ,
3999+ ) ;
4000+
4001+ all_dialects_where ( |dialect| dialect. supports_numeric_prefix ( ) ) . tokenizes_to (
4002+ "t.12e34" ,
4003+ vec ! [
4004+ Token :: make_word( "t" , None ) ,
4005+ Token :: Period ,
4006+ Token :: make_word( "12e34" , None ) ,
4007+ ] ,
4008+ ) ;
4009+
4010+ all_dialects_where ( |dialect| dialect. supports_numeric_prefix ( ) ) . tokenizes_to (
4011+ "t.1two3" ,
4012+ vec ! [
4013+ Token :: make_word( "t" , None ) ,
4014+ Token :: Period ,
4015+ Token :: make_word( "1two3" , None ) ,
4016+ ] ,
4017+ ) ;
4018+ }
39634019}
0 commit comments