Skip to content

Commit 4eee454

Browse files
xitepayman-sigma
authored andcommitted
Oracle: Support for quote delimited strings (apache#2130)
1 parent f52544b commit 4eee454

File tree

9 files changed

+381
-6
lines changed

9 files changed

+381
-6
lines changed

src/ast/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ pub use self::trigger::{
110110

111111
pub use self::value::{
112112
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
113-
NormalizationForm, TrimWhereField, Value, ValueWithSpan,
113+
NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan,
114114
};
115115

116116
use crate::ast::helpers::key_value_options::KeyValueOptions;

src/ast/value.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ pub enum Value {
167167
TripleDoubleQuotedRawStringLiteral(String),
168168
/// N'string value'
169169
NationalStringLiteral(String),
170+
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
171+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
172+
QuoteDelimitedStringLiteral(QuoteDelimitedString),
173+
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
174+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
175+
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
170176
/// X'hex value'
171177
HexStringLiteral(String),
172178

@@ -207,6 +213,8 @@ impl Value {
207213
| Value::NationalStringLiteral(s)
208214
| Value::HexStringLiteral(s) => Some(s),
209215
Value::DollarQuotedString(s) => Some(s.value),
216+
Value::QuoteDelimitedStringLiteral(s) => Some(s.value),
217+
Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value),
210218
_ => None,
211219
}
212220
}
@@ -242,6 +250,8 @@ impl fmt::Display for Value {
242250
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
243251
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
244252
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
253+
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
254+
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
245255
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
246256
Value::Boolean(v) => write!(f, "{v}"),
247257
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
@@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString {
279289
}
280290
}
281291

292+
/// A quote delimited string literal, e.g. `Q'_abc_'`.
293+
///
294+
/// See [Value::QuoteDelimitedStringLiteral] and/or
295+
/// [Value::NationalQuoteDelimitedStringLiteral].
296+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
297+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
298+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
299+
pub struct QuoteDelimitedString {
300+
/// the quote start character; i.e. the character _after_ the opening `Q'`
301+
pub start_quote: char,
302+
/// the string literal value itself
303+
pub value: String,
304+
/// the quote end character; i.e. the character _before_ the closing `'`
305+
pub end_quote: char,
306+
}
307+
308+
impl fmt::Display for QuoteDelimitedString {
309+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310+
write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote)
311+
}
312+
}
313+
282314
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
283315
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
284316
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,8 @@ impl Dialect for GenericDialect {
195195
fn supports_interval_options(&self) -> bool {
196196
true
197197
}
198+
199+
fn supports_quote_delimited_string(&self) -> bool {
200+
true
201+
}
198202
}

src/dialect/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,13 @@ pub trait Dialect: Debug + Any {
12141214
fn supports_semantic_view_table_factor(&self) -> bool {
12151215
false
12161216
}
1217+
1218+
/// Support quote delimited string literals, e.g. `Q'{...}'`
1219+
///
1220+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
1221+
fn supports_quote_delimited_string(&self) -> bool {
1222+
false
1223+
}
12171224
}
12181225

12191226
/// This represents the operators for which precedence must be defined

src/dialect/oracle.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,8 @@ impl Dialect for OracleDialect {
9595
fn supports_group_by_expr(&self) -> bool {
9696
true
9797
}
98+
99+
fn supports_quote_delimited_string(&self) -> bool {
100+
true
101+
}
98102
}

src/parser/merge.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! SQL Parser for a `MERGE` statement
1414
1515
#[cfg(not(feature = "std"))]
16-
use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec};
16+
use alloc::{boxed::Box, format, vec, vec::Vec};
1717

1818
use crate::{
1919
ast::{

src/parser/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
17541754
| Token::TripleSingleQuotedRawStringLiteral(_)
17551755
| Token::TripleDoubleQuotedRawStringLiteral(_)
17561756
| Token::NationalStringLiteral(_)
1757+
| Token::QuoteDelimitedStringLiteral(_)
1758+
| Token::NationalQuoteDelimitedStringLiteral(_)
17571759
| Token::HexStringLiteral(_) => {
17581760
self.prev_token();
17591761
Ok(Expr::Value(self.parse_value()?))
@@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
27702772
| Token::EscapedStringLiteral(_)
27712773
| Token::UnicodeStringLiteral(_)
27722774
| Token::NationalStringLiteral(_)
2775+
| Token::QuoteDelimitedStringLiteral(_)
2776+
| Token::NationalQuoteDelimitedStringLiteral(_)
27732777
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
27742778
_ => self.expected(
27752779
"either filler, WITH, or WITHOUT in LISTAGG",
@@ -10728,6 +10732,12 @@ impl<'a> Parser<'a> {
1072810732
Token::NationalStringLiteral(ref s) => {
1072910733
ok_value(Value::NationalStringLiteral(s.to_string()))
1073010734
}
10735+
Token::QuoteDelimitedStringLiteral(v) => {
10736+
ok_value(Value::QuoteDelimitedStringLiteral(v))
10737+
}
10738+
Token::NationalQuoteDelimitedStringLiteral(v) => {
10739+
ok_value(Value::NationalQuoteDelimitedStringLiteral(v))
10740+
}
1073110741
Token::EscapedStringLiteral(ref s) => {
1073210742
ok_value(Value::EscapedStringLiteral(s.to_string()))
1073310743
}

src/tokenizer.rs

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ use alloc::{
2929
vec,
3030
vec::Vec,
3131
};
32-
use core::iter::Peekable;
3332
use core::num::NonZeroU8;
3433
use core::str::Chars;
3534
use core::{cmp, fmt};
35+
use core::{iter::Peekable, str};
3636

3737
#[cfg(feature = "serde")]
3838
use serde::{Deserialize, Serialize};
@@ -46,7 +46,10 @@ use crate::dialect::{
4646
SnowflakeDialect,
4747
};
4848
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
49-
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
49+
use crate::{
50+
ast::{DollarQuotedString, QuoteDelimitedString},
51+
dialect::HiveDialect,
52+
};
5053

5154
/// SQL Token enumeration
5255
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@@ -98,6 +101,12 @@ pub enum Token {
98101
TripleDoubleQuotedRawStringLiteral(String),
99102
/// "National" string literal: i.e: N'string'
100103
NationalStringLiteral(String),
104+
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
105+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
106+
QuoteDelimitedStringLiteral(QuoteDelimitedString),
107+
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
108+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
109+
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
101110
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
102111
EscapedStringLiteral(String),
103112
/// Unicode string literal: i.e: U&'first \000A second'
@@ -294,6 +303,8 @@ impl fmt::Display for Token {
294303
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
295304
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
296305
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
306+
Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f),
307+
Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"),
297308
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
298309
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
299310
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@@ -1072,13 +1083,35 @@ impl<'a> Tokenizer<'a> {
10721083
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
10731084
Ok(Some(Token::NationalStringLiteral(s)))
10741085
}
1086+
Some(&q @ 'q') | Some(&q @ 'Q')
1087+
if self.dialect.supports_quote_delimited_string() =>
1088+
{
1089+
chars.next(); // consume and check the next char
1090+
if let Some('\'') = chars.peek() {
1091+
self.tokenize_quote_delimited_string(chars, &[n, q])
1092+
.map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s)))
1093+
} else {
1094+
let s = self.tokenize_word(String::from_iter([n, q]), chars);
1095+
Ok(Some(Token::make_word(&s, None)))
1096+
}
1097+
}
10751098
_ => {
10761099
// regular identifier starting with an "N"
10771100
let s = self.tokenize_word(n, chars);
10781101
Ok(Some(Token::make_word(&s, None)))
10791102
}
10801103
}
10811104
}
1105+
q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => {
1106+
chars.next(); // consume and check the next char
1107+
if let Some('\'') = chars.peek() {
1108+
self.tokenize_quote_delimited_string(chars, &[q])
1109+
.map(|s| Some(Token::QuoteDelimitedStringLiteral(s)))
1110+
} else {
1111+
let s = self.tokenize_word(q, chars);
1112+
Ok(Some(Token::make_word(&s, None)))
1113+
}
1114+
}
10821115
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
10831116
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
10841117
let starting_loc = chars.location();
@@ -2072,6 +2105,61 @@ impl<'a> Tokenizer<'a> {
20722105
)
20732106
}
20742107

2108+
/// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
2109+
///
2110+
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
2111+
fn tokenize_quote_delimited_string(
2112+
&self,
2113+
chars: &mut State,
2114+
// the prefix that introduced the possible literal or word,
2115+
// e.g. "Q" or "nq"
2116+
literal_prefix: &[char],
2117+
) -> Result<QuoteDelimitedString, TokenizerError> {
2118+
let literal_start_loc = chars.location();
2119+
chars.next();
2120+
2121+
let start_quote_loc = chars.location();
2122+
let (start_quote, end_quote) = match chars.next() {
2123+
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
2124+
return self.tokenizer_error(
2125+
start_quote_loc,
2126+
format!(
2127+
"Invalid space, tab, newline, or EOF after '{}''",
2128+
String::from_iter(literal_prefix)
2129+
),
2130+
);
2131+
}
2132+
Some(c) => (
2133+
c,
2134+
match c {
2135+
'[' => ']',
2136+
'{' => '}',
2137+
'<' => '>',
2138+
'(' => ')',
2139+
c => c,
2140+
},
2141+
),
2142+
};
2143+
2144+
// read the string literal until the "quote character" following a by literal quote
2145+
let mut value = String::new();
2146+
while let Some(ch) = chars.next() {
2147+
if ch == end_quote {
2148+
if let Some('\'') = chars.peek() {
2149+
chars.next(); // ~ consume the quote
2150+
return Ok(QuoteDelimitedString {
2151+
start_quote,
2152+
value,
2153+
end_quote,
2154+
});
2155+
}
2156+
}
2157+
value.push(ch);
2158+
}
2159+
2160+
self.tokenizer_error(literal_start_loc, "Unterminated string literal")
2161+
}
2162+
20752163
/// Read a quoted string.
20762164
fn tokenize_quoted_string(
20772165
&self,

0 commit comments

Comments
 (0)