@@ -945,10 +945,65 @@ impl<'a> Tokenizer<'a> {
945945 while let Some ( token) = self . next_token ( & mut state, buf. last ( ) . map ( |t| & t. token ) ) ? {
946946 let span = location. span_to ( state. location ( ) ) ;
947947
948- buf. push ( TokenWithSpan { token, span } ) ;
948+ // Check if this is a multiline comment hint that should be expanded
949+ match & token {
950+ Token :: Whitespace ( Whitespace :: MultiLineComment ( comment) )
951+ if self . dialect . supports_multiline_comment_hints ( )
952+ && comment. starts_with ( '!' ) =>
953+ {
954+ // Re-tokenize the hints and add them to the buffer
955+ self . tokenize_comment_hints ( comment, span, buf) ?;
956+ }
957+ _ => {
958+ buf. push ( TokenWithSpan { token, span } ) ;
959+ }
960+ }
961+
962+ location = state. location ( ) ;
963+ }
964+ Ok ( ( ) )
965+ }
966+
967+ /// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
968+ /// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
969+ fn tokenize_comment_hints (
970+ & self ,
971+ comment : & str ,
972+ span : Span ,
973+ buf : & mut Vec < TokenWithSpan > ,
974+ ) -> Result < ( ) , TokenizerError > {
975+ // Strip the leading '!' and any version digits (e.g., "50110")
976+ let hint_content = comment
977+ . strip_prefix ( '!' )
978+ . unwrap_or ( comment)
979+ . trim_start_matches ( |c : char | c. is_ascii_digit ( ) ) ;
980+
981+ // If there's no content after stripping, nothing to tokenize
982+ if hint_content. is_empty ( ) {
983+ return Ok ( ( ) ) ;
984+ }
985+
986+ // Create a new tokenizer for the hint content
987+ let inner = Tokenizer :: new ( self . dialect , hint_content) . with_unescape ( self . unescape ) ;
988+
989+ // Create a state for tracking position within the hint
990+ let mut state = State {
991+ peekable : hint_content. chars ( ) . peekable ( ) ,
992+ line : span. start . line ,
993+ col : span. start . column ,
994+ } ;
949995
996+ // Tokenize the hint content and add tokens to the buffer
997+ let mut location = state. location ( ) ;
998+ while let Some ( token) = inner. next_token ( & mut state, buf. last ( ) . map ( |t| & t. token ) ) ? {
999+ let token_span = location. span_to ( state. location ( ) ) ;
1000+ buf. push ( TokenWithSpan {
1001+ token,
1002+ span : token_span,
1003+ } ) ;
9501004 location = state. location ( ) ;
9511005 }
1006+
9521007 Ok ( ( ) )
9531008 }
9541009
@@ -2233,7 +2288,6 @@ impl<'a> Tokenizer<'a> {
22332288 let mut s = String :: new ( ) ;
22342289 let mut nested = 1 ;
22352290 let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2236-
22372291 loop {
22382292 match chars. next ( ) {
22392293 Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -4218,6 +4272,88 @@ mod tests {
42184272 Token :: Whitespace ( Whitespace :: Space ) ,
42194273 Token :: make_word( "y" , None ) ,
42204274 ] ,
4221- )
4275+ ) ;
4276+ }
4277+
4278+ #[ test]
4279+ fn tokenize_multiline_comment_with_comment_hint ( ) {
4280+ let sql = String :: from ( "0/*! word */1" ) ;
4281+
4282+ let dialect = MySqlDialect { } ;
4283+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4284+ let expected = vec ! [
4285+ Token :: Number ( "0" . to_string( ) , false ) ,
4286+ Token :: Whitespace ( Whitespace :: Space ) ,
4287+ Token :: Word ( Word {
4288+ value: "word" . to_string( ) ,
4289+ quote_style: None ,
4290+ keyword: Keyword :: NoKeyword ,
4291+ } ) ,
4292+ Token :: Whitespace ( Whitespace :: Space ) ,
4293+ Token :: Number ( "1" . to_string( ) , false ) ,
4294+ ] ;
4295+ compare ( expected, tokens) ;
4296+ }
4297+
4298+ #[ test]
4299+ fn tokenize_multiline_comment_with_comment_hint_and_version ( ) {
4300+ let sql_multi = String :: from ( "0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1" ) ;
4301+ let dialect = MySqlDialect { } ;
4302+ let tokens = Tokenizer :: new ( & dialect, & sql_multi) . tokenize ( ) . unwrap ( ) ;
4303+ let expected = vec ! [
4304+ Token :: Number ( "0" . to_string( ) , false ) ,
4305+ Token :: Whitespace ( Whitespace :: Space ) ,
4306+ Token :: Whitespace ( Whitespace :: Space ) ,
4307+ Token :: Word ( Word {
4308+ value: "KEY_BLOCK_SIZE" . to_string( ) ,
4309+ quote_style: None ,
4310+ keyword: Keyword :: KEY_BLOCK_SIZE ,
4311+ } ) ,
4312+ Token :: Whitespace ( Whitespace :: Space ) ,
4313+ Token :: Eq ,
4314+ Token :: Whitespace ( Whitespace :: Space ) ,
4315+ Token :: Number ( "1024" . to_string( ) , false ) ,
4316+ Token :: Whitespace ( Whitespace :: Space ) ,
4317+ Token :: Number ( "1" . to_string( ) , false ) ,
4318+ ] ;
4319+ compare ( expected, tokens) ;
4320+
4321+ let tokens = Tokenizer :: new ( & dialect, "0 /*!50110 */ 1" )
4322+ . tokenize ( )
4323+ . unwrap ( ) ;
4324+ compare (
4325+ vec ! [
4326+ Token :: Number ( "0" . to_string( ) , false ) ,
4327+ Token :: Whitespace ( Whitespace :: Space ) ,
4328+ Token :: Whitespace ( Whitespace :: Space ) ,
4329+ Token :: Whitespace ( Whitespace :: Space ) ,
4330+ Token :: Number ( "1" . to_string( ) , false ) ,
4331+ ] ,
4332+ tokens,
4333+ ) ;
4334+
4335+ let tokens = Tokenizer :: new ( & dialect, "0 /*!*/ 1" ) . tokenize ( ) . unwrap ( ) ;
4336+ compare (
4337+ vec ! [
4338+ Token :: Number ( "0" . to_string( ) , false ) ,
4339+ Token :: Whitespace ( Whitespace :: Space ) ,
4340+ Token :: Whitespace ( Whitespace :: Space ) ,
4341+ Token :: Number ( "1" . to_string( ) , false ) ,
4342+ ] ,
4343+ tokens,
4344+ ) ;
4345+ let tokens = Tokenizer :: new ( & dialect, "0 /*! */ 1" ) . tokenize ( ) . unwrap ( ) ;
4346+ compare (
4347+ vec ! [
4348+ Token :: Number ( "0" . to_string( ) , false ) ,
4349+ Token :: Whitespace ( Whitespace :: Space ) ,
4350+ Token :: Whitespace ( Whitespace :: Space ) ,
4351+ Token :: Whitespace ( Whitespace :: Space ) ,
4352+ Token :: Whitespace ( Whitespace :: Space ) ,
4353+ Token :: Whitespace ( Whitespace :: Space ) ,
4354+ Token :: Number ( "1" . to_string( ) , false ) ,
4355+ ] ,
4356+ tokens,
4357+ ) ;
42224358 }
42234359}
0 commit comments