Skip to content

Commit e2643fb

Browse files
committed
Squashed merge of expand-parse-without-semicolons
1 parent 64f4b1f commit e2643fb

File tree

8 files changed

+855
-137
lines changed

8 files changed

+855
-137
lines changed

src/dialect/mod.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,8 +1291,14 @@ pub trait Dialect: Debug + Any {
12911291
/// Returns true if the specified keyword should be parsed as a table factor alias.
12921292
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
12931293
/// to enable looking ahead if needed.
1294-
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
1295-
explicit || self.is_table_alias(kw, parser)
1294+
///
1295+
/// When the dialect supports statements without semicolon delimiter, actual keywords aren't parsed as aliases.
1296+
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
1297+
if self.supports_statements_without_semicolon_delimiter() {
1298+
kw == &Keyword::NoKeyword
1299+
} else {
1300+
explicit || self.is_table_alias(kw, _parser)
1301+
}
12961302
}
12971303

12981304
/// Returns true if this dialect supports querying historical table data
@@ -1657,6 +1663,18 @@ pub trait Dialect: Debug + Any {
16571663
fn supports_comma_separated_trim(&self) -> bool {
16581664
false
16591665
}
1666+
1667+
/// Returns true if the dialect supports parsing statements without a semicolon delimiter.
1668+
///
1669+
/// If returns true, the following SQL will not parse. If returns `false` the SQL will parse
1670+
///
1671+
/// ```sql
1672+
/// SELECT 1
1673+
/// SELECT 2
1674+
/// ```
1675+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
1676+
false
1677+
}
16601678
}
16611679

16621680
/// Operators for which precedence must be defined.

src/dialect/mssql.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ impl Dialect for MsSqlDialect {
6666
}
6767

6868
fn supports_connect_by(&self) -> bool {
69-
true
69+
false
7070
}
7171

7272
fn supports_eq_alias_assignment(&self) -> bool {
@@ -122,6 +122,10 @@ impl Dialect for MsSqlDialect {
122122
true
123123
}
124124

125+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
126+
true
127+
}
128+
125129
/// See <https://learn.microsoft.com/en-us/sql/relational-databases/security/authentication-access/server-level-roles>
126130
fn get_reserved_grantees_types(&self) -> &[GranteesType] {
127131
&[GranteesType::Public]
@@ -378,6 +382,9 @@ impl MsSqlDialect {
378382
) -> Result<Vec<Statement>, ParserError> {
379383
let mut stmts = Vec::new();
380384
loop {
385+
while let Token::SemiColon = parser.peek_token_ref().token {
386+
parser.advance_token();
387+
}
381388
if let Token::EOF = parser.peek_token_ref().token {
382389
break;
383390
}

src/keywords.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,6 +1215,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
12151215
Keyword::SEMI,
12161216
Keyword::RETURNING,
12171217
Keyword::OUTPUT,
1218+
Keyword::RETURN,
12181219
Keyword::ASOF,
12191220
Keyword::MATCH_CONDITION,
12201221
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
@@ -1240,6 +1241,11 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
12401241
Keyword::TABLESAMPLE,
12411242
Keyword::FROM,
12421243
Keyword::OPEN,
1244+
Keyword::INSERT,
1245+
Keyword::UPDATE,
1246+
Keyword::DELETE,
1247+
Keyword::EXEC,
1248+
Keyword::EXECUTE,
12431249
];
12441250

12451251
/// Can't be used as a column alias, so that `SELECT <expr> alias`
@@ -1270,6 +1276,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
12701276
Keyword::DISTRIBUTE,
12711277
Keyword::RETURNING,
12721278
Keyword::VALUES,
1279+
Keyword::RETURN,
12731280
// Reserved only as a column alias in the `SELECT` clause
12741281
Keyword::FROM,
12751282
Keyword::INTO,
@@ -1284,6 +1291,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
12841291
Keyword::LIMIT,
12851292
Keyword::HAVING,
12861293
Keyword::WHERE,
1294+
Keyword::RETURN,
12871295
];
12881296

12891297
/// Global list of reserved keywords that cannot be parsed as identifiers
@@ -1294,4 +1302,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
12941302
Keyword::INTERVAL,
12951303
Keyword::STRUCT,
12961304
Keyword::TRIM,
1305+
Keyword::RETURN,
12971306
];

src/parser/mod.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,22 @@ impl ParserOptions {
288288
self.unescape = unescape;
289289
self
290290
}
291+
292+
/// Set if semicolon statement delimiters are required.
293+
///
294+
/// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse.
295+
///
296+
/// ```sql
297+
/// SELECT 1
298+
/// SELECT 2
299+
/// ```
300+
pub fn with_require_semicolon_stmt_delimiter(
301+
mut self,
302+
require_semicolon_stmt_delimiter: bool,
303+
) -> Self {
304+
self.require_semicolon_stmt_delimiter = require_semicolon_stmt_delimiter;
305+
self
306+
}
291307
}
292308

293309
#[derive(Copy, Clone)]
@@ -384,7 +400,11 @@ impl<'a> Parser<'a> {
384400
state: ParserState::Normal,
385401
dialect,
386402
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
387-
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
403+
options: ParserOptions::new()
404+
.with_trailing_commas(dialect.supports_trailing_commas())
405+
.with_require_semicolon_stmt_delimiter(
406+
!dialect.supports_statements_without_semicolon_delimiter(),
407+
),
388408
}
389409
}
390410

@@ -506,13 +526,18 @@ impl<'a> Parser<'a> {
506526

507527
match &self.peek_token_ref().token {
508528
Token::EOF => break,
509-
510529
// end of statement
511530
Token::Word(word) => {
512531
if expecting_statement_delimiter && word.keyword == Keyword::END {
513532
break;
514533
}
515534
}
535+
// don't expect a semicolon statement delimiter after a newline when not otherwise required
536+
Token::Whitespace(Whitespace::Newline) => {
537+
if !self.options.require_semicolon_stmt_delimiter {
538+
expecting_statement_delimiter = false;
539+
}
540+
}
516541
_ => {}
517542
}
518543

@@ -522,7 +547,7 @@ impl<'a> Parser<'a> {
522547

523548
let statement = self.parse_statement()?;
524549
stmts.push(statement);
525-
expecting_statement_delimiter = true;
550+
expecting_statement_delimiter = self.options.require_semicolon_stmt_delimiter;
526551
}
527552
Ok(stmts)
528553
}
@@ -4970,6 +4995,14 @@ impl<'a> Parser<'a> {
49704995
return Ok(vec![]);
49714996
}
49724997

4998+
if end_token == Token::SemiColon && !self.options.require_semicolon_stmt_delimiter {
4999+
if let Token::Word(ref kw) = self.peek_token().token {
5000+
if kw.keyword != Keyword::NoKeyword {
5001+
return Ok(vec![]);
5002+
}
5003+
}
5004+
}
5005+
49735006
if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] {
49745007
let _ = self.consume_token(&Token::Comma);
49755008
return Ok(vec![]);
@@ -4987,6 +5020,9 @@ impl<'a> Parser<'a> {
49875020
) -> Result<Vec<Statement>, ParserError> {
49885021
let mut values = vec![];
49895022
loop {
5023+
// ignore empty statements (between successive statement delimiters)
5024+
while self.consume_token(&Token::SemiColon) {}
5025+
49905026
match &self.peek_nth_token_ref(0).token {
49915027
Token::EOF => break,
49925028
Token::Word(w) => {
@@ -4998,7 +5034,13 @@ impl<'a> Parser<'a> {
49985034
}
49995035

50005036
values.push(self.parse_statement()?);
5001-
self.expect_token(&Token::SemiColon)?;
5037+
5038+
if self.options.require_semicolon_stmt_delimiter {
5039+
self.expect_token(&Token::SemiColon)?;
5040+
}
5041+
5042+
// ignore empty statements (between successive statement delimiters)
5043+
while self.consume_token(&Token::SemiColon) {}
50025044
}
50035045
Ok(values)
50045046
}
@@ -19571,7 +19613,28 @@ impl<'a> Parser<'a> {
1957119613

1957219614
/// Parse [Statement::Return]
1957319615
fn parse_return(&mut self) -> Result<Statement, ParserError> {
19574-
match self.maybe_parse(|p| p.parse_expr())? {
19616+
let rs = self.maybe_parse(|p| {
19617+
let expr = p.parse_expr()?;
19618+
19619+
match &expr {
19620+
Expr::Value(_)
19621+
| Expr::Function(_)
19622+
| Expr::UnaryOp { .. }
19623+
| Expr::BinaryOp { .. }
19624+
| Expr::Case { .. }
19625+
| Expr::Cast { .. }
19626+
| Expr::Convert { .. }
19627+
| Expr::Subquery(_) => Ok(expr),
19628+
// todo: how to restrict to variables?
19629+
Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr),
19630+
_ => parser_err!(
19631+
"Non-returnable expression found following RETURN",
19632+
p.peek_token().span.start
19633+
),
19634+
}
19635+
})?;
19636+
19637+
match rs {
1957519638
Some(expr) => Ok(Statement::Return(ReturnStatement {
1957619639
value: Some(ReturnStatementValue::Expr(expr)),
1957719640
})),

src/test_utils.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#[cfg(not(feature = "std"))]
2626
use alloc::{
2727
boxed::Box,
28+
format,
2829
string::{String, ToString},
2930
vec,
3031
vec::Vec,
@@ -186,6 +187,32 @@ impl TestedDialects {
186187
statements
187188
}
188189

190+
/// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text.
191+
pub fn statements_without_semicolons_parse_to(
192+
&self,
193+
sql: &str,
194+
canonical: &str,
195+
) -> Vec<Statement> {
196+
let sql_without_semicolons = sql.replace(";", " ");
197+
let statements = self
198+
.parse_sql_statements(&sql_without_semicolons)
199+
.expect(&sql_without_semicolons);
200+
if !canonical.is_empty() && sql != canonical {
201+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
202+
} else {
203+
assert_eq!(
204+
sql,
205+
statements
206+
.iter()
207+
// note: account for format_statement_list manually inserted semicolons
208+
.map(|s| s.to_string().trim_end_matches(";").to_string())
209+
.collect::<Vec<_>>()
210+
.join("; ")
211+
);
212+
}
213+
statements
214+
}
215+
189216
/// Ensures that `sql` parses as an [`Expr`], and that
190217
/// re-serializing the parse result produces canonical
191218
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
@@ -319,6 +346,43 @@ where
319346
all_dialects_where(|d| !except(d))
320347
}
321348

349+
/// Returns all dialects that don't support statements without semicolon delimiters.
350+
/// (i.e. dialects that require semicolon delimiters.)
351+
pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects {
352+
let tested_dialects =
353+
all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter());
354+
assert_ne!(tested_dialects.dialects.len(), 0);
355+
tested_dialects
356+
}
357+
358+
/// Returns all dialects that do support statements without semicolon delimiters.
359+
/// (i.e. dialects not requiring semicolon delimiters.)
360+
pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects {
361+
let tested_dialects =
362+
all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter());
363+
assert_ne!(tested_dialects.dialects.len(), 0);
364+
tested_dialects
365+
}
366+
367+
/// Asserts an error for `parse_sql_statements`:
368+
/// - "end of statement" for dialects that require semicolon delimiters
369+
/// - "an SQL statement" for dialects that don't require semicolon delimiters.
370+
pub fn assert_err_parse_statements(sql: &str, found: &str) {
371+
assert_eq!(
372+
ParserError::ParserError(format!("Expected: end of statement, found: {found}")),
373+
all_dialects_requiring_semicolon_statement_delimiter()
374+
.parse_sql_statements(sql)
375+
.unwrap_err()
376+
);
377+
378+
assert_eq!(
379+
ParserError::ParserError(format!("Expected: an SQL statement, found: {found}")),
380+
all_dialects_not_requiring_semicolon_statement_delimiter()
381+
.parse_sql_statements(sql)
382+
.unwrap_err()
383+
);
384+
}
385+
322386
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
323387
assert_eq!(
324388
expected,

0 commit comments

Comments
 (0)