Skip to content

Commit 3ff5a45

Browse files
Merge branch 'main' into postgres-regression-11
2 parents 641151d + 20b9849 commit 3ff5a45

27 files changed

Lines changed: 861 additions & 85 deletions

.github/workflows/rust.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ jobs:
8282
with:
8383
targets: 'thumbv6m-none-eabi'
8484
- run: cargo check --no-default-features --target thumbv6m-none-eabi
85+
- run: cargo check --no-default-features --features visitor --target thumbv6m-none-eabi
8586

8687
test:
8788
strategy:

derive/src/visit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ pub(crate) fn derive_visit(
6262
fn visit<V: sqlparser::ast::#visitor_trait>(
6363
&#modifier self,
6464
visitor: &mut V
65-
) -> ::std::ops::ControlFlow<V::Break> {
65+
) -> ::core::ops::ControlFlow<V::Break> {
6666
#pre_visit
6767
#children
6868
#post_visit
69-
::std::ops::ControlFlow::Continue(())
69+
::core::ops::ControlFlow::Continue(())
7070
}
7171
}
7272
};

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use criterion::{criterion_group, criterion_main, Criterion};
19-
use sqlparser::dialect::GenericDialect;
19+
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect};
2020
use sqlparser::keywords::Keyword;
2121
use sqlparser::parser::Parser;
2222
use sqlparser::tokenizer::{Span, Word};
@@ -152,5 +152,107 @@ fn parse_many_identifiers(c: &mut Criterion) {
152152
group.finish();
153153
}
154154

155-
criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers);
155+
/// Benchmark parsing pathological compound chains that previously caused 2^N
156+
/// work in `parse_compound_expr`. The input `IF a0.a1...aN.#` rejects at the
157+
/// trailing `#`, which used to force quadratic-or-worse backtracking through
158+
/// the chain.
159+
fn parse_compound_chain(c: &mut Criterion) {
160+
let mut group = c.benchmark_group("parse_compound_chain");
161+
let dialect = GenericDialect {};
162+
163+
for &n in &[10usize, 20, 30] {
164+
let chain = (0..n)
165+
.map(|i| format!("a{i}"))
166+
.collect::<Vec<_>>()
167+
.join(".");
168+
let sql = format!("IF {chain}.#");
169+
170+
group.bench_function(format!("chain_{n}"), |b| {
171+
b.iter(|| {
172+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
173+
});
174+
});
175+
}
176+
177+
group.finish();
178+
}
179+
180+
/// Benchmark parsing pathological compound chains with a reserved keyword in
181+
/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to
182+
/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended
183+
/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at
184+
/// every segment.
185+
fn parse_compound_keyword_chain(c: &mut Criterion) {
186+
let mut group = c.benchmark_group("parse_compound_keyword_chain");
187+
let dialect = GenericDialect {};
188+
189+
for &n in &[5usize, 10, 15] {
190+
let body = std::iter::repeat_n(".not-b", n).collect::<String>();
191+
let sql = format!("SELECT x{body}");
192+
193+
group.bench_function(format!("chain_{n}"), |b| {
194+
b.iter(|| {
195+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
196+
});
197+
});
198+
}
199+
200+
group.finish();
201+
}
202+
203+
/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains
204+
/// that previously caused 2^N work in `parse_prefix`. Each nested
205+
/// `current_time(` segment used to be explored twice at every level (once via
206+
/// the speculative reserved-word arm, once via the unreserved-word fallback),
207+
/// doubling work per level. Post-fix the cost is linear in chain length.
208+
fn parse_prefix_keyword_call_chain(c: &mut Criterion) {
209+
let mut group = c.benchmark_group("parse_prefix_keyword_call_chain");
210+
let dialect = PostgreSqlDialect {};
211+
212+
for &n in &[10usize, 20, 30] {
213+
let sql = String::from("if(") + &"current_time(".repeat(n) + "x";
214+
215+
group.bench_function(format!("chain_{n}"), |b| {
216+
b.iter(|| {
217+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
218+
});
219+
});
220+
}
221+
222+
group.finish();
223+
}
224+
225+
/// Benchmark parsing pathological `case-case-case-...c` chains that
226+
/// previously caused 2^N work in `parse_prefix`. Each `case` token used to
227+
/// trigger a speculative `parse_case_expr` that recursively descends the
228+
/// chain, but the unreserved-word fallback returns `Identifier(case)` so the
229+
/// overall `parse_prefix` succeeds and the failure cache never fires.
230+
/// Post-fix the per-arm cache short-circuits the speculative descent.
231+
fn parse_prefix_case_chain(c: &mut Criterion) {
232+
let mut group = c.benchmark_group("parse_prefix_case_chain");
233+
let dialect = SQLiteDialect {};
234+
235+
for &n in &[10usize, 20, 30] {
236+
let sql = "case\t-".repeat(n) + "c";
237+
238+
group.bench_function(format!("chain_{n}"), |b| {
239+
b.iter(|| {
240+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
241+
});
242+
});
243+
}
244+
245+
group.finish();
246+
}
247+
248+
criterion_group!(
249+
benches,
250+
basic_queries,
251+
word_to_ident,
252+
parse_many_identifiers,
253+
parse_compound_chain,
254+
parse_compound_keyword_chain,
255+
parse_prefix_keyword_call_chain,
256+
parse_prefix_case_chain
257+
);
156258
criterion_main!(benches);

src/ast/comments.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ impl Comments {
3333
/// last accepted comment. In other words, this method will skip the
3434
/// comment if its comming out of order (as encountered in the parsed
3535
/// source code.)
36-
pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
36+
pub fn offer(&mut self, comment: CommentWithSpan) {
3737
if self
3838
.0
3939
.last()
@@ -71,7 +71,7 @@ impl Comments {
7171
/// // all comments appearing before line seven, i.e. before the first statement itself
7272
/// assert_eq!(
7373
/// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
74-
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
74+
/// &["\n header comment ...\n ... spanning multiple lines\n", " first statement"]);
7575
///
7676
/// // all comments appearing within the first statement
7777
/// assert_eq!(
@@ -81,7 +81,7 @@ impl Comments {
8181
/// // all comments appearing within or after the first statement
8282
/// assert_eq!(
8383
/// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
84-
/// &[" world ", " second statement\n", " trailing comment\n"]);
84+
/// &[" world ", " second statement", " trailing comment"]);
8585
/// ```
8686
///
8787
/// The [Spanned](crate::ast::Spanned) trait allows you to access location

src/ast/data_type.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,11 @@ pub enum DataType {
439439
Custom(ObjectName, Vec<String>),
440440
/// Arrays.
441441
Array(ArrayElemTypeDef),
442-
/// Map, see [ClickHouse].
442+
/// Map, see [ClickHouse], [Hive].
443443
///
444444
/// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map
445-
Map(Box<DataType>, Box<DataType>),
445+
/// [Hive]: https://hive.apache.org/docs/latest/language/languagemanual-types/
446+
Map(Box<DataType>, Box<DataType>, MapBracketKind),
446447
/// Tuple, see [ClickHouse].
447448
///
448449
/// [ClickHouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
@@ -785,9 +786,14 @@ impl fmt::Display for DataType {
785786
DataType::LowCardinality(data_type) => {
786787
write!(f, "LowCardinality({data_type})")
787788
}
788-
DataType::Map(key_data_type, value_data_type) => {
789-
write!(f, "Map({key_data_type}, {value_data_type})")
790-
}
789+
DataType::Map(key_data_type, value_data_type, bracket) => match bracket {
790+
MapBracketKind::Parentheses => {
791+
write!(f, "Map({key_data_type}, {value_data_type})")
792+
}
793+
MapBracketKind::AngleBrackets => {
794+
write!(f, "MAP<{key_data_type}, {value_data_type}>")
795+
}
796+
},
791797
DataType::Tuple(fields) => {
792798
write!(f, "Tuple({})", display_comma_separated(fields))
793799
}
@@ -904,6 +910,17 @@ pub enum StructBracketKind {
904910
AngleBrackets,
905911
}
906912

913+
/// Type of brackets used for `MAP` types.
914+
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
915+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
916+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
917+
pub enum MapBracketKind {
918+
/// Example: `Map(String, UInt16)`
919+
Parentheses,
920+
/// Example: `MAP<STRING, INT>`
921+
AngleBrackets,
922+
}
923+
907924
/// Timestamp and Time data types information about TimeZone formatting.
908925
///
909926
/// This is more related to a display information than real differences between each variant. To

src/ast/ddl.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2817,6 +2817,10 @@ pub struct CreateIndex {
28172817
pub unique: bool,
28182818
/// whether the index is created concurrently
28192819
pub concurrently: bool,
2820+
/// whether the index is created asynchronously ([DSQL]).
2821+
///
2822+
/// [DSQL]: https://docs.aws.amazon.com/aurora-dsql/latest/userguide/working-with-create-index-async.html
2823+
pub r#async: bool,
28202824
/// IF NOT EXISTS clause
28212825
pub if_not_exists: bool,
28222826
/// INCLUDE clause: <https://www.postgresql.org/docs/current/sql-createindex.html>
@@ -2842,13 +2846,14 @@ impl fmt::Display for CreateIndex {
28422846
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
28432847
write!(
28442848
f,
2845-
"CREATE {unique}INDEX {concurrently}{if_not_exists}",
2849+
"CREATE {unique}INDEX {concurrently}{async_}{if_not_exists}",
28462850
unique = if self.unique { "UNIQUE " } else { "" },
28472851
concurrently = if self.concurrently {
28482852
"CONCURRENTLY "
28492853
} else {
28502854
""
28512855
},
2856+
async_ = if self.r#async { "ASYNC " } else { "" },
28522857
if_not_exists = if self.if_not_exists {
28532858
"IF NOT EXISTS "
28542859
} else {

src/ast/mod.rs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use crate::{
5353

5454
pub use self::data_type::{
5555
ArrayElemTypeDef, BinaryLength, CharLengthUnits, CharacterLength, DataType, EnumMember,
56-
ExactNumberInfo, IntervalFields, StructBracketKind, TimezoneInfo,
56+
ExactNumberInfo, IntervalFields, MapBracketKind, StructBracketKind, TimezoneInfo,
5757
};
5858
pub use self::dcl::{
5959
AlterRoleOperation, CreateRole, Grant, ResetConfig, Revoke, RoleOption, SecondaryRoles,
@@ -4500,6 +4500,28 @@ pub enum Statement {
45004500
comment: Option<String>,
45014501
},
45024502
/// ```sql
4503+
/// CREATE [ OR REPLACE ] [ { TEMP | TEMPORARY | VOLATILE } ] FILE FORMAT [ IF NOT EXISTS ] <name>
4504+
/// [ TYPE = { CSV | JSON | AVRO | ORC | PARQUET | XML } [ formatTypeOptions ] ]
4505+
/// [ COMMENT = '<string_literal>' ]
4506+
/// ```
4507+
/// See <https://docs.snowflake.com/en/sql-reference/sql/create-file-format>
4508+
CreateFileFormat {
4509+
/// `OR REPLACE` flag.
4510+
or_replace: bool,
4511+
/// Whether file format is temporary.
4512+
temporary: bool,
4513+
/// Whether file format is volatile.
4514+
volatile: bool,
4515+
/// `IF NOT EXISTS` flag.
4516+
if_not_exists: bool,
4517+
/// File format name.
4518+
name: ObjectName,
4519+
/// Format type options (e.g. `TYPE`, `FIELD_DELIMITER`, `COMPRESSION`, ...).
4520+
options: KeyValueOptions,
4521+
/// Optional comment.
4522+
comment: Option<String>,
4523+
},
4524+
/// ```sql
45034525
/// ASSERT <condition> [AS <message>]
45044526
/// ```
45054527
Assert {
@@ -4865,6 +4887,20 @@ pub enum Statement {
48654887
/// Snowflake `LIST`
48664888
/// See: <https://docs.snowflake.com/en/sql-reference/sql/list>
48674889
List(FileStagingCommand),
4890+
/// Snowflake `PUT`
4891+
/// ```sql
4892+
/// PUT 'file://<path>' <internalStage> [ <option> = <value> ... ]
4893+
/// ```
4894+
/// Options include `PARALLEL`, `AUTO_COMPRESS`, `SOURCE_COMPRESSION`, `OVERWRITE`.
4895+
/// See: <https://docs.snowflake.com/en/sql-reference/sql/put>
4896+
Put {
4897+
/// Local source URI as written in the statement, e.g. `file:///tmp/data.csv`.
4898+
source: String,
4899+
/// Target internal stage (e.g. `@mystage`, `@~`, `@%table`).
4900+
stage: ObjectName,
4901+
/// Trailing options (`PARALLEL=4`, `AUTO_COMPRESS=TRUE`, ...).
4902+
options: KeyValueOptions,
4903+
},
48684904
/// Snowflake `REMOVE`
48694905
/// See: <https://docs.snowflake.com/en/sql-reference/sql/remove>
48704906
Remove(FileStagingCommand),
@@ -6186,6 +6222,31 @@ impl fmt::Display for Statement {
61866222
}
61876223
Ok(())
61886224
}
6225+
Statement::CreateFileFormat {
6226+
or_replace,
6227+
temporary,
6228+
volatile,
6229+
if_not_exists,
6230+
name,
6231+
options,
6232+
comment,
6233+
} => {
6234+
write!(
6235+
f,
6236+
"CREATE {or_replace}{temp}{volatile}FILE FORMAT {if_not_exists}{name}",
6237+
or_replace = if *or_replace { "OR REPLACE " } else { "" },
6238+
temp = if *temporary { "TEMPORARY " } else { "" },
6239+
volatile = if *volatile { "VOLATILE " } else { "" },
6240+
if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" },
6241+
)?;
6242+
if !options.options.is_empty() {
6243+
write!(f, " {options}")?;
6244+
}
6245+
if let Some(comment) = comment {
6246+
write!(f, " COMMENT='{}'", comment)?;
6247+
}
6248+
Ok(())
6249+
}
61896250
Statement::CopyIntoSnowflake {
61906251
kind,
61916252
into,
@@ -6387,6 +6448,17 @@ impl fmt::Display for Statement {
63876448
Statement::WaitFor(s) => write!(f, "{s}"),
63886449
Statement::Return(r) => write!(f, "{r}"),
63896450
Statement::List(command) => write!(f, "LIST {command}"),
6451+
Statement::Put {
6452+
source,
6453+
stage,
6454+
options,
6455+
} => {
6456+
write!(f, "PUT '{source}' {stage}")?;
6457+
if !options.options.is_empty() {
6458+
write!(f, " {options}")?;
6459+
}
6460+
Ok(())
6461+
}
63906462
Statement::Remove(command) => write!(f, "REMOVE {command}"),
63916463
Statement::ExportData(e) => write!(f, "{e}"),
63926464
Statement::CreateUser(s) => write!(f, "{s}"),
@@ -12183,7 +12255,8 @@ impl fmt::Display for OptimizerHint {
1218312255
f.write_str(prefix)?;
1218412256
f.write_str(&self.prefix)?;
1218512257
f.write_str("+")?;
12186-
f.write_str(&self.text)
12258+
f.write_str(&self.text)?;
12259+
f.write_str("\n")
1218712260
}
1218812261
OptimizerHintStyle::MultiLine => {
1218912262
f.write_str("/*")?;

src/ast/query.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3715,8 +3715,11 @@ pub struct SelectInto {
37153715
pub unlogged: bool,
37163716
/// `TABLE` keyword present.
37173717
pub table: bool,
3718-
/// Name of the target table.
3719-
pub name: ObjectName,
3718+
/// Target(s) of the `INTO` clause.
3719+
///
3720+
/// [Postgres]: https://www.postgresql.org/docs/current/sql-selectinto.html
3721+
/// [MySQL]: https://dev.mysql.com/doc/refman/9.7/en/select-into.html
3722+
pub targets: Vec<Expr>,
37203723
}
37213724

37223725
impl fmt::Display for SelectInto {
@@ -3725,7 +3728,14 @@ impl fmt::Display for SelectInto {
37253728
let unlogged = if self.unlogged { " UNLOGGED" } else { "" };
37263729
let table = if self.table { " TABLE" } else { "" };
37273730

3728-
write!(f, "INTO{}{}{} {}", temporary, unlogged, table, self.name)
3731+
write!(
3732+
f,
3733+
"INTO{}{}{} {}",
3734+
temporary,
3735+
unlogged,
3736+
table,
3737+
display_comma_separated(&self.targets)
3738+
)
37293739
}
37303740
}
37313741

0 commit comments

Comments
 (0)