@@ -29,10 +29,10 @@ use alloc::{
2929 vec,
3030 vec:: Vec ,
3131} ;
32- use core:: iter:: Peekable ;
3332use core:: num:: NonZeroU8 ;
3433use core:: str:: Chars ;
3534use core:: { cmp, fmt} ;
35+ use core:: { iter:: Peekable , str} ;
3636
3737#[ cfg( feature = "serde" ) ]
3838use serde:: { Deserialize , Serialize } ;
@@ -46,7 +46,10 @@ use crate::dialect::{
4646 SnowflakeDialect ,
4747} ;
4848use crate :: keywords:: { Keyword , ALL_KEYWORDS , ALL_KEYWORDS_INDEX } ;
49- use crate :: { ast:: DollarQuotedString , dialect:: HiveDialect } ;
49+ use crate :: {
50+ ast:: { DollarQuotedString , QuoteDelimitedString } ,
51+ dialect:: HiveDialect ,
52+ } ;
5053
5154/// SQL Token enumeration
5255#[ derive( Debug , Clone , PartialEq , PartialOrd , Eq , Ord , Hash ) ]
@@ -98,6 +101,12 @@ pub enum Token {
98101 TripleDoubleQuotedRawStringLiteral ( String ) ,
99102 /// "National" string literal: i.e: N'string'
100103 NationalStringLiteral ( String ) ,
104+ /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
105+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
106+ QuoteDelimitedStringLiteral ( QuoteDelimitedString ) ,
107+ /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
108+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
109+ NationalQuoteDelimitedStringLiteral ( QuoteDelimitedString ) ,
101110 /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
102111 EscapedStringLiteral ( String ) ,
103112 /// Unicode string literal: i.e: U&'first \000A second'
@@ -294,6 +303,8 @@ impl fmt::Display for Token {
294303 Token :: TripleDoubleQuotedString ( ref s) => write ! ( f, "\" \" \" {s}\" \" \" " ) ,
295304 Token :: DollarQuotedString ( ref s) => write ! ( f, "{s}" ) ,
296305 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{s}'" ) ,
306+ Token :: QuoteDelimitedStringLiteral ( ref s) => s. fmt ( f) ,
307+ Token :: NationalQuoteDelimitedStringLiteral ( ref s) => write ! ( f, "N{s}" ) ,
297308 Token :: EscapedStringLiteral ( ref s) => write ! ( f, "E'{s}'" ) ,
298309 Token :: UnicodeStringLiteral ( ref s) => write ! ( f, "U&'{s}'" ) ,
299310 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{s}'" ) ,
@@ -1072,13 +1083,35 @@ impl<'a> Tokenizer<'a> {
10721083 self . tokenize_single_quoted_string ( chars, '\'' , backslash_escape) ?;
10731084 Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
10741085 }
1086+ Some ( & q @ 'q' ) | Some ( & q @ 'Q' )
1087+ if self . dialect . supports_quote_delimited_string ( ) =>
1088+ {
1089+ chars. next ( ) ; // consume and check the next char
1090+ if let Some ( '\'' ) = chars. peek ( ) {
1091+ self . tokenize_quote_delimited_string ( chars, & [ n, q] )
1092+ . map ( |s| Some ( Token :: NationalQuoteDelimitedStringLiteral ( s) ) )
1093+ } else {
1094+ let s = self . tokenize_word ( String :: from_iter ( [ n, q] ) , chars) ;
1095+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
1096+ }
1097+ }
10751098 _ => {
10761099 // regular identifier starting with an "N"
10771100 let s = self . tokenize_word ( n, chars) ;
10781101 Ok ( Some ( Token :: make_word ( & s, None ) ) )
10791102 }
10801103 }
10811104 }
1105+ q @ 'Q' | q @ 'q' if self . dialect . supports_quote_delimited_string ( ) => {
1106+ chars. next ( ) ; // consume and check the next char
1107+ if let Some ( '\'' ) = chars. peek ( ) {
1108+ self . tokenize_quote_delimited_string ( chars, & [ q] )
1109+ . map ( |s| Some ( Token :: QuoteDelimitedStringLiteral ( s) ) )
1110+ } else {
1111+ let s = self . tokenize_word ( q, chars) ;
1112+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
1113+ }
1114+ }
10821115 // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
10831116 x @ 'e' | x @ 'E' if self . dialect . supports_string_escape_constant ( ) => {
10841117 let starting_loc = chars. location ( ) ;
@@ -2072,6 +2105,61 @@ impl<'a> Tokenizer<'a> {
20722105 )
20732106 }
20742107
2108+ /// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
2109+ ///
2110+ /// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
2111+ fn tokenize_quote_delimited_string (
2112+ & self ,
2113+ chars : & mut State ,
2114+ // the prefix that introduced the possible literal or word,
2115+ // e.g. "Q" or "nq"
2116+ literal_prefix : & [ char ] ,
2117+ ) -> Result < QuoteDelimitedString , TokenizerError > {
2118+ let literal_start_loc = chars. location ( ) ;
2119+ chars. next ( ) ;
2120+
2121+ let start_quote_loc = chars. location ( ) ;
2122+ let ( start_quote, end_quote) = match chars. next ( ) {
2123+ None | Some ( ' ' ) | Some ( '\t' ) | Some ( '\r' ) | Some ( '\n' ) => {
2124+ return self . tokenizer_error (
2125+ start_quote_loc,
2126+ format ! (
2127+ "Invalid space, tab, newline, or EOF after '{}''" ,
2128+ String :: from_iter( literal_prefix)
2129+ ) ,
2130+ ) ;
2131+ }
2132+ Some ( c) => (
2133+ c,
2134+ match c {
2135+ '[' => ']' ,
2136+ '{' => '}' ,
2137+ '<' => '>' ,
2138+ '(' => ')' ,
2139+ c => c,
2140+ } ,
2141+ ) ,
2142+ } ;
2143+
2144+ // read the string literal until the "quote character" following a by literal quote
2145+ let mut value = String :: new ( ) ;
2146+ while let Some ( ch) = chars. next ( ) {
2147+ if ch == end_quote {
2148+ if let Some ( '\'' ) = chars. peek ( ) {
2149+ chars. next ( ) ; // ~ consume the quote
2150+ return Ok ( QuoteDelimitedString {
2151+ start_quote,
2152+ value,
2153+ end_quote,
2154+ } ) ;
2155+ }
2156+ }
2157+ value. push ( ch) ;
2158+ }
2159+
2160+ self . tokenizer_error ( literal_start_loc, "Unterminated string literal" )
2161+ }
2162+
20752163 /// Read a quoted string.
20762164 fn tokenize_quoted_string (
20772165 & self ,
0 commit comments