Skip to content

Commit e558757

Browse files
Merge branch 'main' into forbid-unsafe-code
2 parents 0c68b87 + 20b9849 commit e558757

15 files changed

Lines changed: 560 additions & 68 deletions

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use criterion::{criterion_group, criterion_main, Criterion};
19-
use sqlparser::dialect::GenericDialect;
19+
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect};
2020
use sqlparser::keywords::Keyword;
2121
use sqlparser::parser::Parser;
2222
use sqlparser::tokenizer::{Span, Word};
@@ -177,11 +177,82 @@ fn parse_compound_chain(c: &mut Criterion) {
177177
group.finish();
178178
}
179179

180+
/// Benchmark parsing pathological compound chains with a reserved keyword in
181+
/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to
182+
/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended
183+
/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at
184+
/// every segment.
185+
fn parse_compound_keyword_chain(c: &mut Criterion) {
186+
let mut group = c.benchmark_group("parse_compound_keyword_chain");
187+
let dialect = GenericDialect {};
188+
189+
for &n in &[5usize, 10, 15] {
190+
let body = std::iter::repeat_n(".not-b", n).collect::<String>();
191+
let sql = format!("SELECT x{body}");
192+
193+
group.bench_function(format!("chain_{n}"), |b| {
194+
b.iter(|| {
195+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
196+
});
197+
});
198+
}
199+
200+
group.finish();
201+
}
202+
203+
/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains
204+
/// that previously caused 2^N work in `parse_prefix`. Each nested
205+
/// `current_time(` segment used to be explored twice at every level (once via
206+
/// the speculative reserved-word arm, once via the unreserved-word fallback),
207+
/// doubling work per level. Post-fix the cost is linear in chain length.
208+
fn parse_prefix_keyword_call_chain(c: &mut Criterion) {
209+
let mut group = c.benchmark_group("parse_prefix_keyword_call_chain");
210+
let dialect = PostgreSqlDialect {};
211+
212+
for &n in &[10usize, 20, 30] {
213+
let sql = String::from("if(") + &"current_time(".repeat(n) + "x";
214+
215+
group.bench_function(format!("chain_{n}"), |b| {
216+
b.iter(|| {
217+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
218+
});
219+
});
220+
}
221+
222+
group.finish();
223+
}
224+
225+
/// Benchmark parsing pathological `case-case-case-...c` chains that
226+
/// previously caused 2^N work in `parse_prefix`. Each `case` token used to
227+
/// trigger a speculative `parse_case_expr` that recursively descends the
228+
/// chain, but the unreserved-word fallback returns `Identifier(case)` so the
229+
/// overall `parse_prefix` succeeds and the failure cache never fires.
230+
/// Post-fix the per-arm cache short-circuits the speculative descent.
231+
fn parse_prefix_case_chain(c: &mut Criterion) {
232+
let mut group = c.benchmark_group("parse_prefix_case_chain");
233+
let dialect = SQLiteDialect {};
234+
235+
for &n in &[10usize, 20, 30] {
236+
let sql = "case\t-".repeat(n) + "c";
237+
238+
group.bench_function(format!("chain_{n}"), |b| {
239+
b.iter(|| {
240+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
241+
});
242+
});
243+
}
244+
245+
group.finish();
246+
}
247+
180248
criterion_group!(
181249
benches,
182250
basic_queries,
183251
word_to_ident,
184252
parse_many_identifiers,
185-
parse_compound_chain
253+
parse_compound_chain,
254+
parse_compound_keyword_chain,
255+
parse_prefix_keyword_call_chain,
256+
parse_prefix_case_chain
186257
);
187258
criterion_main!(benches);

src/ast/comments.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ impl Comments {
3333
/// last accepted comment. In other words, this method will skip the
3434
/// comment if its comming out of order (as encountered in the parsed
3535
/// source code.)
36-
pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
36+
pub fn offer(&mut self, comment: CommentWithSpan) {
3737
if self
3838
.0
3939
.last()
@@ -71,7 +71,7 @@ impl Comments {
7171
/// // all comments appearing before line seven, i.e. before the first statement itself
7272
/// assert_eq!(
7373
/// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
74-
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
74+
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement"]);
7575
///
7676
/// // all comments appearing within the first statement
7777
/// assert_eq!(
@@ -81,7 +81,7 @@ impl Comments {
8181
/// // all comments appearing within or after the first statement
8282
/// assert_eq!(
8383
/// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
84-
/// &[" world ", " second statement\n", " trailing comment\n"]);
84+
/// &[" world ", " second statement", " trailing comment"]);
8585
/// ```
8686
///
8787
/// The [Spanned](crate::ast::Spanned) trait allows you to access location

src/ast/mod.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4487,6 +4487,28 @@ pub enum Statement {
44874487
comment: Option<String>,
44884488
},
44894489
/// ```sql
4490+
/// CREATE [ OR REPLACE ] [ { TEMP | TEMPORARY | VOLATILE } ] FILE FORMAT [ IF NOT EXISTS ] <name>
4491+
/// [ TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] ]
4492+
/// [ COMMENT = '<string_literal>' ]
4493+
/// ```
4494+
/// See <https://docs.snowflake.com/en/sql-reference/sql/create-file-format>
4495+
CreateFileFormat {
4496+
/// `OR REPLACE` flag.
4497+
or_replace: bool,
4498+
/// Whether file format is temporary.
4499+
temporary: bool,
4500+
/// Whether file format is volatile.
4501+
volatile: bool,
4502+
/// `IF NOT EXISTS` flag.
4503+
if_not_exists: bool,
4504+
/// File format name.
4505+
name: ObjectName,
4506+
/// Format type options (e.g. `TYPE`, `FIELD_DELIMITER`, `COMPRESSION`, ...).
4507+
options: KeyValueOptions,
4508+
/// Optional comment.
4509+
comment: Option<String>,
4510+
},
4511+
/// ```sql
44904512
/// ASSERT <condition> [AS <message>]
44914513
/// ```
44924514
Assert {
@@ -6185,6 +6207,31 @@ impl fmt::Display for Statement {
61856207
}
61866208
Ok(())
61876209
}
6210+
Statement::CreateFileFormat {
6211+
or_replace,
6212+
temporary,
6213+
volatile,
6214+
if_not_exists,
6215+
name,
6216+
options,
6217+
comment,
6218+
} => {
6219+
write!(
6220+
f,
6221+
"CREATE {or_replace}{temp}{volatile}FILE FORMAT {if_not_exists}{name}",
6222+
or_replace = if *or_replace { "OR REPLACE " } else { "" },
6223+
temp = if *temporary { "TEMPORARY " } else { "" },
6224+
volatile = if *volatile { "VOLATILE " } else { "" },
6225+
if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" },
6226+
)?;
6227+
if !options.options.is_empty() {
6228+
write!(f, " {options}")?;
6229+
}
6230+
if let Some(comment) = comment {
6231+
write!(f, " COMMENT='{}'", comment)?;
6232+
}
6233+
Ok(())
6234+
}
61886235
Statement::CopyIntoSnowflake {
61896236
kind,
61906237
into,
@@ -12038,7 +12085,8 @@ impl fmt::Display for OptimizerHint {
1203812085
f.write_str(prefix)?;
1203912086
f.write_str(&self.prefix)?;
1204012087
f.write_str("+")?;
12041-
f.write_str(&self.text)
12088+
f.write_str(&self.text)?;
12089+
f.write_str("\n")
1204212090
}
1204312091
OptimizerHintStyle::MultiLine => {
1204412092
f.write_str("/*")?;

src/ast/query.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3715,8 +3715,11 @@ pub struct SelectInto {
37153715
pub unlogged: bool,
37163716
/// `TABLE` keyword present.
37173717
pub table: bool,
3718-
/// Name of the target table.
3719-
pub name: ObjectName,
3718+
/// Target(s) of the `INTO` clause.
3719+
///
3720+
/// [Postgres]: https://www.postgresql.org/docs/current/sql-selectinto.html
3721+
/// [MySQL]: https://dev.mysql.com/doc/refman/9.7/en/select-into.html
3722+
pub targets: Vec<Expr>,
37203723
}
37213724

37223725
impl fmt::Display for SelectInto {
@@ -3725,7 +3728,14 @@ impl fmt::Display for SelectInto {
37253728
let unlogged = if self.unlogged { " UNLOGGED" } else { "" };
37263729
let table = if self.table { " TABLE" } else { "" };
37273730

3728-
write!(f, "INTO{}{}{} {}", temporary, unlogged, table, self.name)
3731+
write!(
3732+
f,
3733+
"INTO{}{}{} {}",
3734+
temporary,
3735+
unlogged,
3736+
table,
3737+
display_comma_separated(&self.targets)
3738+
)
37293739
}
37303740
}
37313741

src/ast/spans.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ impl Spanned for Values {
297297
/// - [Statement::CreateProcedure]
298298
/// - [Statement::CreateMacro]
299299
/// - [Statement::CreateStage]
300+
/// - [Statement::CreateFileFormat]
300301
/// - [Statement::Assert]
301302
/// - [Statement::Grant]
302303
/// - [Statement::Revoke]
@@ -457,6 +458,7 @@ impl Spanned for Statement {
457458
Statement::CreateProcedure { .. } => Span::empty(),
458459
Statement::CreateMacro { .. } => Span::empty(),
459460
Statement::CreateStage { .. } => Span::empty(),
461+
Statement::CreateFileFormat { .. } => Span::empty(),
460462
Statement::Assert { .. } => Span::empty(),
461463
Statement::Grant { .. } => Span::empty(),
462464
Statement::Deny { .. } => Span::empty(),
@@ -2390,10 +2392,10 @@ impl Spanned for SelectInto {
23902392
temporary: _, // bool
23912393
unlogged: _, // bool
23922394
table: _, // bool
2393-
name,
2395+
targets,
23942396
} = self;
23952397

2396-
name.span()
2398+
union_spans(targets.iter().map(|t| t.span()))
23972399
}
23982400
}
23992401

src/ast/value.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ impl fmt::Display for Value {
273273
Value::DollarQuotedString(v) => write!(f, "{v}"),
274274
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
275275
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
276-
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
276+
Value::NationalStringLiteral(v) => write!(f, "N'{}'", escape_single_quote_string(v)),
277277
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
278278
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
279279
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),

src/dialect/snowflake.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ impl Dialect for SnowflakeDialect {
326326
);
327327
} else if parser.parse_keyword(Keyword::DATABASE) {
328328
return Some(parse_create_database(or_replace, transient, parser));
329+
} else if parser.parse_keywords(&[Keyword::FILE, Keyword::FORMAT]) {
330+
return Some(parse_create_file_format(
331+
or_replace, temporary, volatile, parser,
332+
));
329333
} else {
330334
// need to go back with the cursor
331335
let mut back = 1;
@@ -1272,6 +1276,35 @@ pub fn parse_create_stage(
12721276
})
12731277
}
12741278

1279+
/// Parse a Snowflake `CREATE FILE FORMAT` statement.
1280+
/// See <https://docs.snowflake.com/en/sql-reference/sql/create-file-format>
1281+
pub fn parse_create_file_format(
1282+
or_replace: bool,
1283+
temporary: bool,
1284+
volatile: bool,
1285+
parser: &mut Parser,
1286+
) -> Result<Statement, ParserError> {
1287+
let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
1288+
let name = parser.parse_object_name(true)?;
1289+
let options = parser.parse_key_value_options(false, &[Keyword::COMMENT])?;
1290+
let comment = if parser.parse_keyword(Keyword::COMMENT) {
1291+
parser.expect_token(&Token::Eq)?;
1292+
Some(parser.parse_comment_value()?)
1293+
} else {
1294+
None
1295+
};
1296+
1297+
Ok(Statement::CreateFileFormat {
1298+
or_replace,
1299+
temporary,
1300+
volatile,
1301+
if_not_exists,
1302+
name,
1303+
options,
1304+
comment,
1305+
})
1306+
}
1307+
12751308
pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
12761309
let mut ident = String::new();
12771310
while let Some(next_token) = parser.next_token_no_skip() {

src/dialect/spark.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ impl Dialect for SparkSqlDialect {
116116
true
117117
}
118118

119+
/// See:
120+
/// - <https://spark.apache.org/docs/latest/sql-pipe-syntax.html>
121+
/// - <https://issues.apache.org/jira/browse/SPARK-49528>
122+
fn supports_pipe_operator(&self) -> bool {
123+
true
124+
}
125+
119126
/// Parse the `DIV` keyword as integer division.
120127
///
121128
/// Example: `SELECT 10 DIV 3` returns `3`.

0 commit comments

Comments
 (0)